Skip to content

Commit 0f7c579

Browse files
committed
1.8.6
1 parent 9681a5b commit 0f7c579

23 files changed

+1131
-361
lines changed

bycon/definitions/datatable_mappings.yaml

+220-183
Large diffs are not rendered by default.

bycon/definitions/handover_definitions.yaml

-3
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@ h->o_types:
4848
id: 'pgx:HO.biosamplestable'
4949
label: "Biosamples Table"
5050
note: "retrieve data of the biosamples matched by the query as tab-delimited table"
51-
output: datatable
5251
paginated_entities:
5352
- biosample
5453
- analysis
@@ -106,7 +105,6 @@ h->o_types:
106105
id: 'pgx:HO.pgxseg'
107106
label: "Found Variants (.pgxseg)"
108107
note: "retrieve variants matched by the query as .pgxseg download"
109-
output: pgxseg
110108
paginated_entities:
111109
- all
112110
h->o_key: 'variants._id'
@@ -128,7 +126,6 @@ h->o_types:
128126
id: 'pgx:HO.bedfile2ucsc'
129127
label: "Show Found Variants in the UCSC browser"
130128
note: "map variants matched by the query to the UCSC browser"
131-
output: ucscbrowser
132129
paginated_entities:
133130
- all
134131
h->o_key: 'variants._id'

bycon/definitions/variant_type_definitions.yaml

+13
Original file line numberDiff line numberDiff line change
@@ -297,3 +297,16 @@ SO:0001413:
297297
cnv_dummy_value: Null
298298
child_terms:
299299
- SO:0001413
300+
- SO:0000806
301+
302+
SO:0000806:
303+
variant_state:
304+
id: SO:0000806
305+
label: fusion
306+
variant_type_id: SO:0000806
307+
variant_type: BND
308+
VRS_type: Null
309+
VCF_symbolic_allele: <BND>
310+
cnv_dummy_value: Null
311+
child_terms:
312+
- SO:0000806

bycon/lib/bycon_helpers.py

+30-4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import base36, humps, json, re, time
22

33
from isodate import parse_duration
4+
from datetime import datetime
45
from os import environ
56
from pymongo import MongoClient
67

@@ -284,9 +285,7 @@ def assign_nested_value(parent, dotted_key, v, parameter_definitions={}):
284285
################################################################################
285286

286287
def get_nested_value(parent, dotted_key, parameter_type="string"):
287-
288-
ps = dotted_key.split('.')
289-
288+
ps = str(dotted_key).split('.')
290289
v = ""
291290

292291
if len(ps) == 1:
@@ -342,11 +341,25 @@ def decamelize_words(j_d):
342341

343342
def prdbughead(this=""):
344343
BYC.update({"DEBUG_MODE": True})
344+
prtexthead()
345+
print(this)
346+
347+
################################################################################
348+
349+
def prtexthead():
345350
if not "local" in ENV:
346351
print('Content-Type: text/plain')
347352
print('status: 302')
348353
print()
349-
print(this)
354+
355+
################################################################################
356+
357+
def prdlhead(filename="download.txt"):
358+
if not "local" in ENV:
359+
print('Content-Type: text/tsv')
360+
print(f'Content-Disposition: attachment; filename={filename}')
361+
print('status: 200')
362+
print()
350363

351364
################################################################################
352365

@@ -366,5 +379,18 @@ def prjsonnice(this):
366379
def prjsoncam(this):
367380
prjsonnice(humps.camelize(this))
368381

382+
################################################################################
383+
384+
def isotoday():
385+
return str(datetime.today().strftime('%Y-%m-%d'))
386+
387+
################################################################################
388+
389+
def isonow():
390+
return str(datetime.datetime.now().isoformat())
391+
392+
393+
394+
369395

370396

bycon/lib/dataset_parsing.py

+18-27
Original file line numberDiff line numberDiff line change
@@ -9,23 +9,22 @@
99
################################################################################
1010

1111
def select_dataset_ids():
12-
if ds_id_from_rest_path_value() is not False:
12+
if ds_id_from_rest_path_value():
1313
return
14-
if ds_id_from_accessid() is not False:
14+
if ds_id_from_accessid():
1515
return
16-
if ds_id_from_record_id() is not False:
16+
if ds_id_from_record_id():
1717
return
18-
if ds_ids_from_form() is not False:
18+
if ds_ids_from_form():
1919
return
20-
if ds_id_from_default() is not False:
20+
if ds_id_from_default():
2121
return
2222

2323

2424
################################################################################
2525

2626
def ds_id_from_rest_path_value():
27-
ds_p_id = rest_path_value("datasets")
28-
if not ds_p_id:
27+
if not (ds_p_id := rest_path_value("datasets")):
2928
return False
3029

3130
ds_ids = []
@@ -44,7 +43,7 @@ def ds_id_from_rest_path_value():
4443

4544
def ds_id_from_record_id():
4645
"""
47-
For data retrieval associated with a single record by its path id siuch as
46+
For data retrieval associated with a single record by its path id such as
4847
`biosamples/{id}` the default Beacon model does not provide any way to provide
4948
the associated dataset id with the request. The assumption is that any record
5049
id is unique across all datasets.
@@ -62,18 +61,15 @@ def ds_id_from_accessid():
6261
# TODO: This is very verbose. In principle there should be an earlier
6362
# test of existence...
6463

65-
accessid = BYC_PARS.get("accessid", False)
66-
if any(x is False for x in [accessid]):
64+
if not (accessid := BYC_PARS.get("accessid")):
6765
return False
6866

6967
ho_client = MongoClient(host=DB_MONGOHOST)
7068
h_o = ho_client[HOUSEKEEPING_DB][HOUSEKEEPING_HO_COLL].find_one({"id": accessid})
7169
if not h_o:
7270
return False
7371
ds_id = h_o.get("source_db", False)
74-
if ds_id is False:
75-
return False
76-
if ds_id not in BYC["DATABASE_NAMES"]:
72+
if (ds_id := str(h_o.get("source_db"))) not in BYC["DATABASE_NAMES"]:
7773
return False
7874
BYC.update({"BYC_DATASET_IDS": [ds_id]})
7975
return True
@@ -82,26 +78,21 @@ def ds_id_from_accessid():
8278
################################################################################
8379

8480
def ds_ids_from_form():
85-
f_ds_ids = BYC_PARS.get("dataset_ids", False)
86-
if f_ds_ids is False:
87-
return False
88-
ds_ids = []
89-
for ds_id in f_ds_ids:
90-
if ds_id in BYC["DATABASE_NAMES"]:
91-
ds_ids.append(ds_id)
92-
93-
if len(ds_ids) < 1:
81+
82+
if not (f_ds_ids := BYC_PARS.get("dataset_ids")):
9483
return False
95-
BYC.update({"BYC_DATASET_IDS": ds_ids})
96-
return True
84+
ds_ids = [ds for ds in f_ds_ids if ds in BYC.get("DATABASE_NAMES",[])]
85+
if len(ds_ids) > 0:
86+
BYC.update({"BYC_DATASET_IDS": ds_ids})
87+
return True
88+
return False
9789

9890

9991
################################################################################
10092

10193
def ds_id_from_default():
102-
defaults: object = BYC["beacon_defaults"].get("defaults", {})
103-
ds_id = defaults.get("default_dataset_id", "___undefined___")
104-
if ds_id not in BYC["DATABASE_NAMES"]:
94+
defaults: object = BYC["beacon_defaults"].get("defaults", {})
95+
if (ds_id := str(defaults.get("default_dataset_id"))) not in BYC["DATABASE_NAMES"]:
10596
return False
10697
BYC.update({"BYC_DATASET_IDS": [ ds_id ]})
10798
return True

bycon/lib/genome_utils.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ def refseqAliases(self):
4949
# -------------------------------------------------------------------------#
5050

5151
def chro(self, s_id="___none___"):
52-
return self.chro_aliases.get(s_id, "___none___")
52+
return self.chro_aliases.get(s_id)
5353

5454

5555
# -------------------------------------------------------------------------#

bycon/lib/response_remapping.py

+15-14
Original file line numberDiff line numberDiff line change
@@ -57,13 +57,9 @@ def remap_variants(r_s_res):
5757
}
5858
for d_v in d_vs:
5959
c_l_v = {}
60-
for c_k in ("id", "biosample_id", "info"):
61-
c_v = d_v.get(c_k)
62-
if c_v:
60+
for c_k in ("id", "biosample_id", "analysis_id", "individual_id", "info"):
61+
if (c_v := d_v.get(c_k)):
6362
c_l_v.update({c_k: c_v})
64-
a_id = d_v.get("analysis_id")
65-
if a_id:
66-
c_l_v.update({"analysis_id": a_id})
6763
v["case_level_data"].append(c_l_v)
6864

6965
# TODO: Keep legacy pars?
@@ -130,15 +126,16 @@ def remap_runs(r_s_res):
130126
return r_s_res
131127

132128
runs = []
133-
for cs_i, cs_r in enumerate(r_s_res):
129+
for ana in r_s_res:
134130
r = {
135-
"id": cs_r.get("id", ""),
136-
"analysis_id": cs_r.get("id", ""),
137-
"biosample_id": cs_r.get("biosample_id", ""),
138-
"individual_id": cs_r.get("individual_id", ""),
131+
"id": ana.get("id", ""),
132+
"individual_id": ana.get("individual_id", ""),
139133
"run_date": datetime.datetime.fromisoformat(
140-
cs_r.get("updated", datetime.datetime.now().isoformat())).isoformat()
134+
ana.get("updated", datetime.datetime.now().isoformat())).isoformat()
141135
}
136+
for p in ["biosample_id", "individual_id", "platform_model"]:
137+
if (v := ana.get(p)):
138+
r.update({p: v})
142139
runs.append(r)
143140

144141
return runs
@@ -203,7 +200,12 @@ def remap_individuals(r_s_res):
203200
if not "individual" in BYC["response_entity_id"]:
204201
return r_s_res
205202

206-
return r_s_res
203+
ind_s = []
204+
for ind_i, ind in enumerate(r_s_res):
205+
individual_remap_pgx_diseases(ind)
206+
ind_s.append(ind)
207+
208+
return ind_s
207209

208210

209211
################################################################################
@@ -264,7 +266,6 @@ def phenopack_individual(ind, data_db):
264266
ind.pop(k, None)
265267

266268
individual_remap_pgx_diseases(ind)
267-
268269
for d_i, d in enumerate(ind["diseases"]):
269270
for k in ["followup_state", "followup_time"]:
270271
ind["diseases"][d_i].pop(k, None)

bycon/lib/variant_mapping.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,9 @@ def byconVariant(self, variant={}):
6464

6565
# -------------------------------------------------------------------------#
6666

67-
def pgxVariant(self, variant={}):
67+
def pgxVariant(self, variant=None):
68+
if not variant:
69+
return self.pgx_variant
6870
self.byc_variant = variant
6971
self.__create_canonical_variant()
7072
var_keys = self.pgx_variant.keys()
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
$schema: https://json-schema.org/draft/2020-12/schema
2-
$id: https://progenetix.org/services/schemas/analysis/v2021-11-01
2+
$id: https://progenetix.org/services/schemas/analysis/v2024-07-08
33
title: analysis
44
description: >-
5-
The `analysis` object represents a information about the data generation (e.g.
6-
experimental platform) and data analysis steps leading to (a set of) genomic variation
7-
call(s). This is in contrast to the Beacon v2 default model which has a separate
8-
`run` schema.
5+
The `analysis` object represents a information about the data analysis steps
6+
leading to (a set of) genomic variation call(s).
97
type: object
8+
note: >-
9+
On 2024-07-08 aligned with current Beacon v2 main
10+
1011
properties:
1112
id:
1213
type: string
@@ -20,38 +21,45 @@ properties:
2021
type: string
2122
examples:
2223
- pgxbs-kftva59y
23-
description:
24+
runId:
25+
description: >-
26+
Run identifier (external accession or internal ID).
27+
NOTE: Not used in bycon
28+
type: string
29+
examples:
30+
- SRR10903401
31+
analysisDate:
32+
description: Date at which analysis was performed.
2433
type: string
34+
format: date
2535
examples:
26-
- CNV analysis from SNP6 array
27-
platformModel:
36+
- '2021-10-17'
37+
pipelineName:
2838
description: >-
29-
Ontology value for experimental platform or methodology used. For
30-
sequencing platforms the use of "OBI:0400103 - DNA sequencer" is suggested.
31-
$ref: ../common/ontologyTerm.yaml
39+
Analysis pipeline and version if a standardized pipeline was used
40+
type: string
3241
examples:
33-
- id: geo:GPL3381
34-
label: "Stanford Microarray Facility cDNA array [SHDV]"
35-
- id: OBI:0002750
36-
label: Oxford Nanopore MinION
37-
- id: EFO:0010938
38-
label: large-insert clone DNA microarray
39-
experimentAccession:
42+
- progenetix-labelseg-v1.2
43+
pipelineRef:
4044
description: >-
41-
Identifier for primary experimental data.
42-
Provenance: progenetix.org
43-
$ref: ../common/ontologyTerm.yaml
45+
Link to Analysis pipeline resource
46+
type: string
4447
examples:
45-
- id: geo:GSM93480
46-
seriesAccession:
48+
- https://github.com/baudisgroup/LabelSeg
49+
aligner:
4750
description: >-
48-
Identifier for primary experimental series.
49-
Provenance: progenetix.org
50-
$ref: ../common/ontologyTerm.yaml
51+
Reference to mapping/alignment software
52+
NOTE: Not used in bycon
53+
type: string
54+
examples:
55+
- bwa-0.7.8
56+
variantCaller:
57+
description: >-
58+
Reference to variant calling software / pipeline
59+
NOTE: Not used in bycon
60+
type: string
5161
examples:
52-
- id: geo:GSE4079
53-
provenance:
54-
"$ref": ../common/provenance.yaml
62+
- GATK4.0
5563
info:
5664
type: object
5765
updated:
@@ -62,4 +70,5 @@ properties:
6270
required:
6371
- id
6472
- biosampleId
73+
- analysisDate
6574
additionalProperties: true

0 commit comments

Comments
 (0)