1
- """ Collects metadata of assemblies from NCBI API """
1
+ """Collects metadata of assemblies from NCBI API"""
2
2
3
3
__author__ = "Berger, Phillip"
4
4
@@ -14,16 +14,14 @@ class NCBIAssemblyMetadata:
14
14
15
15
_all_metadata : dict
16
16
_count : int
17
- _ani_gcf : list
18
17
_parameters : dict
19
18
_accessions : list [str ]
20
19
_contig_n50 : int
21
20
_all_metadata_complete : dict
22
21
23
- def __init__ (self , all_metadata : dict , ani_gcf : list , count = 8 , contig_n50 = 10000 ):
22
+ def __init__ (self , all_metadata : dict , count = 8 , contig_n50 = 10000 ):
24
23
self ._all_metadata = all_metadata
25
24
self ._count = count
26
- self ._ani_gcf = ani_gcf
27
25
self ._contig_n50 = contig_n50
28
26
29
27
self ._set_parameters ()
@@ -72,25 +70,27 @@ def _set_parameters(self):
72
70
}
73
71
74
72
def _make_request (self , taxon : str ):
75
- api_url = f"https://api.ncbi.nlm.nih.gov/datasets/v1 /genome/taxon/{ taxon } "
73
+ api_url = f"https://api.ncbi.nlm.nih.gov/datasets/v2 /genome/taxon/{ taxon } /dataset_report "
76
74
accessions = []
77
75
count = 0
78
76
for request_type , parameters in self ._parameters .items ():
79
77
raw_response = requests .get (api_url , params = parameters , timeout = 5 )
80
78
response = raw_response .json ()
81
79
if response :
82
80
try :
83
- assemblies = response ["assemblies" ]
84
- for assembly in assemblies :
85
- curr_assembly = assembly ["assembly" ]
86
- curr_accession = curr_assembly ["assembly_accession" ]
87
- curr_contig_n50 = curr_assembly ["contig_n50" ]
81
+ reports = response ["reports" ]
82
+ for report in reports :
83
+ accession = report ["accession" ]
84
+ contig_n50 = report ["assembly_stats" ]["contig_n50" ]
85
+ taxonomy_check_status = report ["average_nucleotide_identity" ][
86
+ "taxonomy_check_status"
87
+ ]
88
88
if count < self ._count :
89
89
if (
90
- curr_accession in self . _ani_gcf
91
- and curr_contig_n50 > self ._contig_n50
90
+ taxonomy_check_status == "OK"
91
+ and contig_n50 > self ._contig_n50
92
92
):
93
- accessions .append (curr_accession )
93
+ accessions .append (accession )
94
94
count += 1
95
95
else :
96
96
break
0 commit comments