Skip to content

Commit 82a648a

Browse files
committed
Introducing the taXaminer-dashboard extension
1 parent 9ddf68e commit 82a648a

File tree

26 files changed

+1780
-479
lines changed

26 files changed

+1780
-479
lines changed

flask-backend/src/Routes/annotations.py

+26
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
fetchFeatures,
1414
import_annotation,
1515
updateAnnotationLabel,
16+
grepFeature
1617
)
1718

1819
# setup blueprint name
@@ -173,6 +174,31 @@ def annotations_bp_fetchFeatures():
173174
return REQUESTMETHODERROR
174175

175176

177+
# FETCH ALL ASSEMBLIES
178+
@annotations_bp.route("/grepFeatures", methods=["POST"])
179+
def annotations_bp_grepFeatures():
180+
if request.method == "POST":
181+
req = request.get_json(force=True)
182+
userID = req.get("userID", None)
183+
token = req.get("token", None)
184+
185+
# token still active
186+
valid_token, error = validateActiveToken(userID, token, ACCESS_LVL_1)
187+
if not valid_token:
188+
response = jsonify({"payload": {}, "notification": error})
189+
response.headers.add("Access-Control-Allow-Origin", "*")
190+
return response
191+
192+
# grep input
193+
search = req.get("search", None)
194+
annotation_id = req.get("annotationID", None)
195+
196+
coords = grepFeature(search, annotation_id)
197+
return jsonify({"coords": coords})
198+
else:
199+
return REQUESTMETHODERROR
200+
201+
176202
# FETCH ALL UNIQUE FEATURE TYPES
177203
@annotations_bp.route("/fetchFeatureSeqIDs", methods=["POST"])
178204
def annotations_bp_fetchFeatureSeqIDs():

flask-backend/src/Routes/taxaminer_data.py

+42-10
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
# general imports
2+
import ast
23
from email.mime import base
34
import json
45
from pathlib import Path
6+
import sys
57
from urllib import response
68
from flask import Blueprint, jsonify, request, abort, Response
79
from . import file_io
@@ -117,7 +119,8 @@ def main_data():
117119

118120
path = get_basepath(assembly_id=assembly_id, analysis_id=analysis_id)
119121

120-
122+
if not path:
123+
return abort(500)
121124
json_data = file_io.indexed_data(f"{path}gene_table_taxon_assignment.csv")
122125

123126
# return as json
@@ -145,9 +148,36 @@ def diamond_data():
145148
return response
146149

147150
try:
148-
json_data = fetchTaxaminerDiamond(assembly_id, analysis_id, qseq_id)
149-
return jsonify(json_data)
150-
except Exception:
151+
DIAMOND_FIELDS = fields = ['qseqid', 'sseqid', 'pident', 'length', 'mismatch', 'gapopen', 'qstart', 'qend', 'sstart', 'send', 'evalue', 'bitscore', 'taxids', 'taxname', 'assemblyID', 'analysisID']
152+
index_data = fetchTaxaminerDiamond(assembly_id, analysis_id, qseq_id)
153+
if index_data == []:
154+
return jsonify([])
155+
total_count = int(index_data['stop']) - int(index_data['start'])
156+
157+
# get on disk
158+
path = get_basepath(assembly_id=assembly_id, analysis_id=analysis_id)
159+
print((index_data, path), file=sys.stderr)
160+
161+
final_lines = []
162+
with open(path + "taxonomic_hits.txt") as f:
163+
for i in range(int(index_data['start'])):
164+
next(f)
165+
counter = 0
166+
for line in f:
167+
# ensure we stop at the end of the block
168+
if counter == total_count:
169+
break
170+
171+
temp_dict = {}
172+
fields = line.split("\t")
173+
for i, field in enumerate(fields):
174+
temp_dict[DIAMOND_FIELDS[i]] = field
175+
final_lines.append(temp_dict)
176+
counter += 1
177+
178+
return jsonify(final_lines)
179+
except Exception as e:
180+
print(e, file=sys.stderr)
151181
return abort(404)
152182

153183

@@ -237,16 +267,18 @@ def get_config():
237267
fields = fetchTaxaminerSettings(userID, analysisID)
238268
# no previous settings
239269
if not fields:
240-
setTaxaminerSettings(userID, analysisID, "[]")
241-
return jsonify("[]")
270+
setTaxaminerSettings(userID, analysisID, "[]", "[]")
271+
return jsonify({"custom_fields": [], "selection": []})
242272
else:
243-
fields_json = json.loads(fields[0])
244-
return jsonify(fields_json)
273+
data_json = json.loads(fields[0])
274+
print(fields, file=sys.stderr)
275+
return jsonify({"custom_fields": ast.literal_eval(fields[0]), "selection": ast.literal_eval(fields[1])})
245276
# store settings in database
246277
elif request.method == "PUT":
247278
# TODO: add support for additional settings
248279
new_fields = request.json['fields']
249-
setTaxaminerSettings(userID, analysisID, json.dumps(new_fields))
280+
new_seletion = request.json['selection']
281+
setTaxaminerSettings(userID, analysisID, json.dumps(new_fields), json.dumps(new_seletion))
250282
return jsonify(new_fields)
251283

252284

@@ -272,7 +304,7 @@ def pca_contributions():
272304
basepath = get_basepath(assembly_id=assembly_id, analysis_id=analysis_id)
273305

274306
if basepath:
275-
with open(f"{basepath}pca_loadings.csv", 'r') as file:
307+
with open(f"{basepath}contribution_of_variables.csv", 'r') as file:
276308
lines = file.readlines()
277309

278310
final_lines = []

flask-backend/src/modules/analyses.py

+50-41
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
import json
1515

16-
DIAMOND_FIELDS = fields = ['qseqid', 'sseqid', 'pident', 'length', 'mismatch', 'gapopen', 'qstart', 'qend', 'sstart', 'send', 'evalue', 'bitscore', 'taxids', 'taxname', 'assemblyID', 'analysisID']
16+
DIAMOND_FIELDS = fields = ['assemblyID', 'analysisID', 'qseqid', 'start', 'stop']
1717

1818
## ============================ IMPORT AND DELETE ============================ ##
1919
# full import of analyses
@@ -547,38 +547,39 @@ def __importTaxaminer(assemblyID, analysisID, base_path):
547547
cursor.execute("INSERT INTO analysesTaxaminer (analysisID) VALUES (%s)", (analysisID,))
548548
connection.commit()
549549

550-
"""
551-
# parse diamond
550+
# Load taxonomic hits
552551
diamond_path = base_path + "taxonomic_hits.txt"
552+
print(diamond_path)
553553
if not os.path.isfile(diamond_path):
554554
return 0, createNotification(message=f"taXaminerImportDBError: Diamond data is missing!")
555-
556-
FIELDS = ['qseqid', 'sseqid', 'pident', 'length', 'mismatch', 'gapopen', 'qstart', 'qend', 'sstart', 'send', 'evalue', 'bitscore', 'taxids', 'taxname']
557-
TYPES = {'qseqid': str, 'sseqid': str, 'pident': float, 'length': float, 'mismatch': float, 'gapopen': float, 'qstart': float,
558-
'qend': float, 'sstart': float, 'send': float, 'evalue': float, 'bitscore': float, 'taxids': str, 'taxname': str}
559-
rows = []
555+
556+
# build data rows
557+
# => save assemblyID, analysisID, qseqID together with the row number to index file
558+
sql_rows = []
560559
with open(diamond_path) as file:
561-
my_reader = csv.DictReader(file, delimiter='\t', fieldnames=FIELDS)
562-
for row in my_reader:
563-
# manually set types
564-
for field in FIELDS:
565-
if TYPES.get(field) != str:
566-
if TYPES.get(field) == int:
567-
row[field] = int(row[field])
568-
elif TYPES.get(field) == float:
569-
row[field] = float(row[field])
570-
# cleared for db insert
571-
rows.append((assemblyID, analysisID, row['qseqid'], json.dumps(row)))
572-
573-
print("Database Inserts look like this:" + str(rows[0]))
574-
575-
# .executemany() exceeds the 'max_allowed_packet'
576-
# if you encounter this error use 'SET SESSION max_allowed_packet=500*1024*1024' or 'SET GLOBAL max_allowed_packet=500*1024*1024'
577-
# TLDR: MOOOOOOOOOOREEEEEEE RAM
560+
start_index = 0
561+
curr_id = ""
562+
outer_index = 0
563+
for i, line in enumerate(file.readlines()):
564+
# primer
565+
if i == 0:
566+
curr_id = line.split("\t")[0]
567+
568+
# determine new id
569+
next_id = line.split("\t")[0]
570+
if next_id != curr_id:
571+
# start -> stop
572+
sql_rows.append((assemblyID, analysisID, curr_id, start_index, i-1))
573+
curr_id = next_id
574+
start_index = i
575+
outer_index = i
576+
577+
# final row
578+
sql_rows.append((assemblyID, analysisID, curr_id, start_index, outer_index))
579+
578580
connection, cursor, error = connect()
579-
cursor.executemany("INSERT INTO taxaminerDiamond (assemblyID, analysisID, qseqID, data) VALUES (%s, %s, %s, %s)", rows)
581+
cursor.executemany("INSERT INTO taxaminerDiamond (assemblyID, analysisID, qseqID, start, stop) VALUES (%s, %s, %s, %s, %s)", sql_rows)
580582
connection.commit()
581-
"""
582583

583584
return 1, []
584585
except Exception as err:
@@ -707,10 +708,10 @@ def deleteAnalysesByAnalysesID(analyses_id):
707708
try:
708709
connection, cursor, error = connect()
709710
cursor.execute(
710-
"SELECT assemblies.id, assemblies.name, analyses.path FROM assemblies, analyses WHERE analyses.id=%s AND analyses.assemblyID=assemblies.id",
711+
"SELECT assemblies.id, assemblies.name, analyses.path, analyses.type FROM assemblies, analyses WHERE analyses.id=%s AND analyses.assemblyID=assemblies.id",
711712
(analyses_id,),
712713
)
713-
assembly_id, assembly_name, analyses_path = cursor.fetchone()
714+
assembly_id, assembly_name, analyses_path, analysis_type = cursor.fetchone()
714715

715716
cursor.execute(
716717
"SELECT taxa.* FROM assemblies, taxa WHERE assemblies.id=%s AND assemblies.taxonID=taxa.id",
@@ -725,7 +726,7 @@ def deleteAnalysesByAnalysesID(analyses_id):
725726
status, error = __deleteAnalysesEntryByAnalysesID(analyses_id)
726727

727728
if status and taxon and assembly_name and analyses_path:
728-
status, error = __deleteAnalysesFile(taxon, assembly_name, analyses_path)
729+
status, error = __deleteAnalysesFile(taxon, assembly_name, analyses_path, type=analysis_type)
729730
else:
730731
return 0, error
731732

@@ -740,7 +741,7 @@ def deleteAnalysesByAnalysesID(analyses_id):
740741

741742

742743
# deletes files for annotation
743-
def __deleteAnalysesFile(taxon, assembly_name, analyses_path):
744+
def __deleteAnalysesFile(taxon, assembly_name, analyses_path, type=""):
744745
"""
745746
Deletes data for specific annotation.
746747
"""
@@ -749,6 +750,11 @@ def __deleteAnalysesFile(taxon, assembly_name, analyses_path):
749750
path = f"{BASE_PATH_TO_STORAGE}taxa/{scientificName}"
750751

751752
run(args=["rm", "-r", analyses_path])
753+
if type == "taxaminer":
754+
print("Analysis is taXaminer, deleting parent directory as well")
755+
# go one folder up
756+
taxaminer_folder = "/".join(analyses_path.split("/")[0:-1])
757+
run(args=["rm", "-r", taxaminer_folder])
752758

753759
return 1, createNotification("Success", "Successfully deleted analyses", "success")
754760
except Exception as err:
@@ -759,6 +765,7 @@ def __deleteAnalysesEntryByAnalysesID(id):
759765
try:
760766
connection, cursor, error = connect()
761767
cursor.execute("DELETE FROM analyses WHERE id=%s", (id,))
768+
cursor.execute("DELETE FROM taxaminerDiamond WHERE analysisID=%s", (id,))
762769
connection.commit()
763770
return 1, []
764771
except Exception as err:
@@ -1210,18 +1217,20 @@ def fetchRepeatmaskerAnalysesByAssemblyID(assemblyID):
12101217
def fetchTaxaminerDiamond(assemblyID, analysisID, qseqid):
12111218
try:
12121219
connection, cursor, error = connect()
1213-
cursor.execute("SELECT * FROM taxaminerDiamond WHERE assemblyID=%s AND analysisID=%s AND qseqID=%s",
1220+
cursor.execute("SELECT * FROM taxaminerDiamond, analysesTaxaminer WHERE taxaminerDiamond.analysisID=analysesTaxaminer.analysisID AND taxaminerDiamond.assemblyID=%s AND analysesTaxaminer.id=%s AND qseqID=%s",
12141221
(assemblyID, analysisID, qseqid)
12151222
)
1216-
rows = cursor.fetchall()
1217-
final_rows = []
1218-
for row in rows:
1219-
temp_dict = dict()
1220-
for i in range(len(row)):
1221-
temp_dict[DIAMOND_FIELDS[i]] = row[i]
1222-
final_rows.append(temp_dict)
1223-
1224-
return final_rows
1223+
row = cursor.fetchone()
1224+
1225+
# catch no entries
1226+
if not row:
1227+
return []
1228+
1229+
temp_dict = dict()
1230+
for i in range(0, 5):
1231+
temp_dict[DIAMOND_FIELDS[i]] = row[i]
1232+
1233+
return temp_dict
12251234
except Exception as err:
12261235
return 0, createNotification(message=str(err))
12271236

flask-backend/src/modules/annotations.py

+23
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
from subprocess import run
77
from glob import glob
88
from operator import contains, is_, is_not, lt, le, eq, ne, ge, gt
9+
import subprocess
10+
import sys
911

1012
from .notifications import createNotification
1113
from .db_connection import connect, DB_NAME
@@ -871,6 +873,27 @@ def fetchFeatureSeqIDs(assemblyID=0, taxonIDs=[]):
871873
return [], createNotification(message=f"FeatureTypesFetchingError: {str(err)}")
872874

873875

876+
def grepFeature(search, annotation_id):
877+
try:
878+
connection, cursor, error = connect()
879+
cursor.execute("SELECT path FROM genomicAnnotations WHERE id=35;")
880+
annotation_path = cursor.fetchone()[0]
881+
882+
zcat_annot = subprocess.Popen(["zcat", annotation_path], stdout=subprocess.PIPE)
883+
grep_feature = subprocess.run(["grep", search], stdin=zcat_annot.stdout, capture_output=True, text=True)
884+
my_result = grep_feature.stdout.split("\n")[0]
885+
if my_result != "":
886+
cols = my_result.split("\t")
887+
pos_string = f"{cols[0]}:{cols[3]}..{cols[4]}"
888+
return pos_string
889+
else:
890+
return ""
891+
892+
except Exception as err:
893+
return 0, createNotification(message=str(err))
894+
895+
896+
874897
# fetches all unique feature types from all features
875898
def fetchFeatureTypes(assemblyID=0, taxonIDs=[], seqIDs=[]):
876899
"""

flask-backend/src/modules/assemblies.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ def import_assembly(taxon, dataset, userID, taskID=""):
4040
return 0, createNotification(message="Missing user ID!")
4141

4242
assembly_id, error = __get_new_assembly_ID()
43+
print(assembly_id, flush=True)
4344
if not assembly_id:
4445
return 0, error
4546

@@ -51,7 +52,7 @@ def import_assembly(taxon, dataset, userID, taskID=""):
5152
main_file_path, assembly_name, error = __store_assembly(dataset, taxon, assembly_id)
5253
if not main_file_path or not exists(main_file_path):
5354
deleteAssemblyByAssemblyID(assembly_id)
54-
print(error)
55+
print(error, flush=True)
5556
return 0, error
5657

5758
fasta_content, error = parseFasta(main_file_path, taskID)
@@ -129,7 +130,7 @@ def __get_new_assembly_ID():
129130
else:
130131
next_id = auto_increment_counter
131132

132-
cursor.execute("ALTER TABLE assemblies AUTO_INCREMENT = %s", (next_id + 1,))
133+
# cursor.execute("ALTER TABLE assemblies AUTO_INCREMENT = %s", (next_id + 1,))
133134
connection.commit()
134135
except Exception as err:
135136
return 0, createNotification(message=f"AssemblyCreationError: {str(err)}")

flask-backend/src/modules/combined_imports.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from re import compile
77
from sys import argv
88
from datetime import datetime
9+
from .db_connection import connect
910

1011
from modules.environment import BASE_PATH_TO_IMPORT
1112
from modules.assemblies import (
@@ -295,8 +296,9 @@ def importDataset(
295296
return summary, createNotification(message="Exact one assembly needs to be supplied!")
296297

297298
assembly = assembly[0]
298-
299299
assembly_id, notification = import_assembly(taxon, assembly, userID, taskID)
300+
301+
300302
if not assembly_id:
301303
return summary, notification
302304
summary["assemblyID"] = assembly_id

flask-backend/src/modules/users.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -326,7 +326,7 @@ def removeBookmark(userID, assemblyID):
326326
def fetchTaxaminerSettings(userID, analysisID):
327327
try:
328328
connection, cursor, error = connect()
329-
cursor.execute("SELECT custom_fields FROM settingsTaxaminer WHERE analysisID=%s AND userID=%s",
329+
cursor.execute("SELECT custom_fields, selection FROM settingsTaxaminer WHERE analysisID=%s AND userID=%s",
330330
(analysisID, userID)
331331
)
332332
settings = cursor.fetchone()
@@ -336,11 +336,11 @@ def fetchTaxaminerSettings(userID, analysisID):
336336

337337

338338
""" set taXaminer settings"""
339-
def setTaxaminerSettings(userID, analysisID, json_string, setting="fields"):
339+
def setTaxaminerSettings(userID, analysisID, custom_fields_json, selection_json, setting="fields"):
340340
try:
341341
connection, cursor, error = connect()
342-
cursor.execute("INSERT INTO settingsTaxaminer (userID, analysisID, custom_fields) VALUES (%s,%s,%s) ON DUPLICATE KEY UPDATE custom_fields=%s;",
343-
(userID, analysisID, json_string, json_string)
342+
cursor.execute("INSERT INTO settingsTaxaminer (userID, analysisID, custom_fields, selection) VALUES (%s,%s,%s,%s) ON DUPLICATE KEY UPDATE custom_fields=%s, selection=%s;",
343+
(userID, analysisID, custom_fields_json, selection_json, custom_fields_json, selection_json)
344344
)
345345
connection.commit()
346346
except Exception as err:

0 commit comments

Comments
 (0)