13
13
14
14
import json
15
15
16
- DIAMOND_FIELDS = fields = ['qseqid ' , 'sseqid ' , 'pident ' , 'length ' , 'mismatch' , 'gapopen' , 'qstart' , 'qend' , 'sstart' , 'send' , 'evalue' , 'bitscore' , 'taxids' , 'taxname' , 'assemblyID' , 'analysisID ' ]
16
+ DIAMOND_FIELDS = fields = ['assemblyID ' , 'analysisID ' , 'qseqid ' , 'start ' , 'stop ' ]
17
17
18
18
## ============================ IMPORT AND DELETE ============================ ##
19
19
# full import of analyses
@@ -547,38 +547,39 @@ def __importTaxaminer(assemblyID, analysisID, base_path):
547
547
cursor .execute ("INSERT INTO analysesTaxaminer (analysisID) VALUES (%s)" , (analysisID ,))
548
548
connection .commit ()
549
549
550
- """
551
- # parse diamond
550
+ # Load taxonomic hits
552
551
diamond_path = base_path + "taxonomic_hits.txt"
552
+ print (diamond_path )
553
553
if not os .path .isfile (diamond_path ):
554
554
return 0 , createNotification (message = f"taXaminerImportDBError: Diamond data is missing!" )
555
-
556
- FIELDS = ['qseqid', 'sseqid', 'pident', 'length', 'mismatch', 'gapopen', 'qstart', 'qend', 'sstart', 'send', 'evalue', 'bitscore', 'taxids', 'taxname']
557
- TYPES = {'qseqid': str, 'sseqid': str, 'pident': float, 'length': float, 'mismatch': float, 'gapopen': float, 'qstart': float,
558
- 'qend': float, 'sstart': float, 'send': float, 'evalue': float, 'bitscore': float, 'taxids': str, 'taxname': str}
559
- rows = []
555
+
556
+ # build data rows
557
+ # => save assemblyID, analysisID, qseqID together with the row number to index file
558
+ sql_rows = []
560
559
with open (diamond_path ) as file :
561
- my_reader = csv.DictReader(file, delimiter='\t ', fieldnames=FIELDS)
562
- for row in my_reader:
563
- # manually set types
564
- for field in FIELDS:
565
- if TYPES.get(field) != str:
566
- if TYPES.get(field) == int:
567
- row[field] = int(row[field])
568
- elif TYPES.get(field) == float:
569
- row[field] = float(row[field])
570
- # cleared for db insert
571
- rows.append((assemblyID, analysisID, row['qseqid'], json.dumps(row)))
572
-
573
- print("Database Inserts look like this:" + str(rows[0]))
574
-
575
- # .executemany() exceeds the 'max_allowed_packet'
576
- # if you encounter this error use 'SET SESSION max_allowed_packet=500*1024*1024' or 'SET GLOBAL max_allowed_packet=500*1024*1024'
577
- # TLDR: MOOOOOOOOOOREEEEEEE RAM
560
+ start_index = 0
561
+ curr_id = ""
562
+ outer_index = 0
563
+ for i , line in enumerate (file .readlines ()):
564
+ # primer
565
+ if i == 0 :
566
+ curr_id = line .split ("\t " )[0 ]
567
+
568
+ # determine new id
569
+ next_id = line .split ("\t " )[0 ]
570
+ if next_id != curr_id :
571
+ # start -> stop
572
+ sql_rows .append ((assemblyID , analysisID , curr_id , start_index , i - 1 ))
573
+ curr_id = next_id
574
+ start_index = i
575
+ outer_index = i
576
+
577
+ # final row
578
+ sql_rows .append ((assemblyID , analysisID , curr_id , start_index , outer_index ))
579
+
578
580
connection , cursor , error = connect ()
579
- cursor.executemany("INSERT INTO taxaminerDiamond (assemblyID, analysisID, qseqID, data ) VALUES (%s, %s, %s, %s)", rows )
581
+ cursor .executemany ("INSERT INTO taxaminerDiamond (assemblyID, analysisID, qseqID, start, stop ) VALUES (%s, %s, %s, %s, %s )" , sql_rows )
580
582
connection .commit ()
581
- """
582
583
583
584
return 1 , []
584
585
except Exception as err :
@@ -707,10 +708,10 @@ def deleteAnalysesByAnalysesID(analyses_id):
707
708
try :
708
709
connection , cursor , error = connect ()
709
710
cursor .execute (
710
- "SELECT assemblies.id, assemblies.name, analyses.path FROM assemblies, analyses WHERE analyses.id=%s AND analyses.assemblyID=assemblies.id" ,
711
+ "SELECT assemblies.id, assemblies.name, analyses.path, analyses.type FROM assemblies, analyses WHERE analyses.id=%s AND analyses.assemblyID=assemblies.id" ,
711
712
(analyses_id ,),
712
713
)
713
- assembly_id , assembly_name , analyses_path = cursor .fetchone ()
714
+ assembly_id , assembly_name , analyses_path , analysis_type = cursor .fetchone ()
714
715
715
716
cursor .execute (
716
717
"SELECT taxa.* FROM assemblies, taxa WHERE assemblies.id=%s AND assemblies.taxonID=taxa.id" ,
@@ -725,7 +726,7 @@ def deleteAnalysesByAnalysesID(analyses_id):
725
726
status , error = __deleteAnalysesEntryByAnalysesID (analyses_id )
726
727
727
728
if status and taxon and assembly_name and analyses_path :
728
- status , error = __deleteAnalysesFile (taxon , assembly_name , analyses_path )
729
+ status , error = __deleteAnalysesFile (taxon , assembly_name , analyses_path , type = analysis_type )
729
730
else :
730
731
return 0 , error
731
732
@@ -740,7 +741,7 @@ def deleteAnalysesByAnalysesID(analyses_id):
740
741
741
742
742
743
# deletes files for annotation
743
- def __deleteAnalysesFile (taxon , assembly_name , analyses_path ):
744
+ def __deleteAnalysesFile (taxon , assembly_name , analyses_path , type = "" ):
744
745
"""
745
746
Deletes data for specific annotation.
746
747
"""
@@ -749,6 +750,11 @@ def __deleteAnalysesFile(taxon, assembly_name, analyses_path):
749
750
path = f"{ BASE_PATH_TO_STORAGE } taxa/{ scientificName } "
750
751
751
752
run (args = ["rm" , "-r" , analyses_path ])
753
+ if type == "taxaminer" :
754
+ print ("Analysis is taXaminer, deleting parent directory as well" )
755
+ # go one folder up
756
+ taxaminer_folder = "/" .join (analyses_path .split ("/" )[0 :- 1 ])
757
+ run (args = ["rm" , "-r" , taxaminer_folder ])
752
758
753
759
return 1 , createNotification ("Success" , "Successfully deleted analyses" , "success" )
754
760
except Exception as err :
@@ -759,6 +765,7 @@ def __deleteAnalysesEntryByAnalysesID(id):
759
765
try :
760
766
connection , cursor , error = connect ()
761
767
cursor .execute ("DELETE FROM analyses WHERE id=%s" , (id ,))
768
+ cursor .execute ("DELETE FROM taxaminerDiamond WHERE analysisID=%s" , (id ,))
762
769
connection .commit ()
763
770
return 1 , []
764
771
except Exception as err :
@@ -1210,18 +1217,20 @@ def fetchRepeatmaskerAnalysesByAssemblyID(assemblyID):
1210
1217
def fetchTaxaminerDiamond (assemblyID , analysisID , qseqid ):
1211
1218
try :
1212
1219
connection , cursor , error = connect ()
1213
- cursor .execute ("SELECT * FROM taxaminerDiamond WHERE assemblyID=%s AND analysisID =%s AND qseqID=%s" ,
1220
+ cursor .execute ("SELECT * FROM taxaminerDiamond, analysesTaxaminer WHERE taxaminerDiamond.analysisID=analysesTaxaminer.analysisID AND taxaminerDiamond. assemblyID=%s AND analysesTaxaminer.id =%s AND qseqID=%s" ,
1214
1221
(assemblyID , analysisID , qseqid )
1215
1222
)
1216
- rows = cursor .fetchall ()
1217
- final_rows = []
1218
- for row in rows :
1219
- temp_dict = dict ()
1220
- for i in range (len (row )):
1221
- temp_dict [DIAMOND_FIELDS [i ]] = row [i ]
1222
- final_rows .append (temp_dict )
1223
-
1224
- return final_rows
1223
+ row = cursor .fetchone ()
1224
+
1225
+ # catch no entries
1226
+ if not row :
1227
+ return []
1228
+
1229
+ temp_dict = dict ()
1230
+ for i in range (0 , 5 ):
1231
+ temp_dict [DIAMOND_FIELDS [i ]] = row [i ]
1232
+
1233
+ return temp_dict
1225
1234
except Exception as err :
1226
1235
return 0 , createNotification (message = str (err ))
1227
1236
0 commit comments