Skip to content

Commit 53f02ac

Browse files
authored
feat: add va spec pydantic models (#6)
close #3
1 parent fe006b5 commit 53f02ac

File tree

10 files changed

+581
-2
lines changed

10 files changed

+581
-2
lines changed

.github/workflows/release.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ jobs:
5050
runs-on: ubuntu-latest
5151
environment:
5252
name: pypi
53-
url: https://pypi.org/p/ga4gh-va-spec
53+
url: https://pypi.org/p/ga4gh.va_spec
5454
permissions:
5555
id-token: write # IMPORTANT: mandatory for trusted publishing
5656
steps:

.gitmodules

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
[submodule "submodules/va_spec"]
2+
path = submodules/va_spec
3+
url = https://github.com/ga4gh/va-spec
4+
branch = 1.x

pyproject.toml

+7-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,11 @@ keywords = [
3030
]
3131
requires-python = ">=3.10"
3232
dynamic = ["version"]
33-
dependencies = []
33+
dependencies = [
34+
"ga4gh.vrs~=2.0.0a12",
35+
"ga4gh.cat_vrs~=0.1.0",
36+
"pydantic==2.*"
37+
]
3438

3539
[project.optional-dependencies]
3640
dev = [
@@ -132,7 +136,9 @@ ignore = [
132136
# ANN102 - missing-type-cls
133137
# S101 - assert
134138
# B011 - assert-false
139+
# N815 - mixed-case-variable-in-class-scope
135140
"tests/*" = ["ANN001", "ANN2", "ANN102", "S101", "B011"]
141+
"src/ga4gh/va_spec/profiles/*" = ["ANN102", "N815"]
136142

137143
[tool.setuptools.packages.find]
138144
where = ["src"]
+44
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
"""Package for VA-Spec Python implementation"""
2+
3+
from .assay_var_effect import (
4+
AssayVariantEffectClinicalClassificationStatement,
5+
AssayVariantEffectFunctionalClassificationStatement,
6+
AssayVariantEffectMeasurementStudyResult,
7+
AveClinicalClassification,
8+
AveFunctionalClassification,
9+
)
10+
from .caf_study_result import CohortAlleleFrequencyStudyResult
11+
from .var_path_stmt import PenetranceQualifier, VariantPathogenicityStatement
12+
from .var_study_stmt import (
13+
AlleleOriginQualifier,
14+
AllelePrevalenceQualifier,
15+
DiagnosticPredicate,
16+
OncogenicPredicate,
17+
PrognosticPredicate,
18+
TherapeuticResponsePredicate,
19+
VariantDiagnosticStudyStatement,
20+
VariantOncogenicityStudyStatement,
21+
VariantPrognosticStudyStatement,
22+
VariantTherapeuticResponseStudyStatement,
23+
)
24+
25+
__all__ = [
26+
"AveFunctionalClassification",
27+
"AveClinicalClassification",
28+
"AssayVariantEffectFunctionalClassificationStatement",
29+
"AssayVariantEffectClinicalClassificationStatement",
30+
"AssayVariantEffectMeasurementStudyResult",
31+
"CohortAlleleFrequencyStudyResult",
32+
"PenetranceQualifier",
33+
"VariantPathogenicityStatement",
34+
"AlleleOriginQualifier",
35+
"DiagnosticPredicate",
36+
"OncogenicPredicate",
37+
"PrognosticPredicate",
38+
"TherapeuticResponsePredicate",
39+
"AllelePrevalenceQualifier",
40+
"VariantDiagnosticStudyStatement",
41+
"VariantOncogenicityStudyStatement",
42+
"VariantPrognosticStudyStatement",
43+
"VariantTherapeuticResponseStudyStatement",
44+
]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
"""VA Spec Assay Variant Effect statement and study result Profiles"""
2+
3+
from __future__ import annotations
4+
5+
from enum import Enum
6+
from typing import Literal
7+
8+
from ga4gh.cat_vrs.core_models import CategoricalVariant
9+
from ga4gh.core.entity_models import (
10+
IRI,
11+
Coding,
12+
DataSet,
13+
Method,
14+
StatementBase,
15+
StudyGroup,
16+
StudyResult,
17+
StudyResultBase,
18+
)
19+
from ga4gh.vrs.models import MolecularVariation
20+
from pydantic import ConfigDict, Field
21+
22+
23+
class AveFunctionalClassification(str, Enum):
24+
"""The functional classification of the variant effect in the assay."""
25+
26+
NORMAL = "normal"
27+
INDETERMINATE = "indeterminate"
28+
ABNORMAL = "abnormal"
29+
30+
31+
class AveClinicalClassification(str, Enum):
32+
"""The clinical strength of evidence of the variant effect in the assay."""
33+
34+
PS3_STRONG = "PS3_Strong"
35+
PS3_MODERATE = "PS3_Moderate"
36+
PS3_SUPPORTING = "PS3_Supporting"
37+
BS3_STRONG = "BS3_Strong"
38+
BS3_MODERATE = "BS3_Moderate"
39+
BS3_SUPPORTING = "BS3_Supporting"
40+
41+
42+
class AssayVariantEffectFunctionalClassificationStatement(StatementBase):
43+
"""A statement that assigns a functional classification to a variant effect from a functional assay."""
44+
45+
model_config = ConfigDict(use_enum_values=True)
46+
47+
type: Literal["AssayVariantEffectFunctionalClassificationStatement"] = Field(
48+
"AssayVariantEffectFunctionalClassificationStatement",
49+
description="MUST be 'AssayVariantEffectFunctionalClassificationStatement'.",
50+
)
51+
subjectVariant: MolecularVariation | CategoricalVariant | IRI = Field(
52+
...,
53+
description="A protein or genomic contextual or canonical molecular variant.",
54+
)
55+
predicate: Literal["hasAssayVariantEffectFor"] = Field(
56+
"hasAssayVariantEffectFor",
57+
description="The relationship declared to hold between the subject and the object of the Statement.",
58+
)
59+
objectAssay: IRI | Coding = Field(
60+
...,
61+
description="The assay that is evaluated for the variant effect. (e.g growth in haploid cell culture protein stability in fluorescence assay)",
62+
)
63+
classification: AveFunctionalClassification = Field(
64+
...,
65+
description="The functional classification of the variant effect in the assay.",
66+
)
67+
specifiedBy: Method | IRI | None = Field(
68+
None,
69+
description="The method that specifies the functional classification of the variant effect in the assay.",
70+
)
71+
72+
73+
class AssayVariantEffectClinicalClassificationStatement(StatementBase):
74+
"""A statement that assigns a clinical strength of evidence to a variant effect from a functional assay."""
75+
76+
model_config = ConfigDict(use_enum_values=True)
77+
78+
type: Literal["AssayVariantEffectClinicalClassificationStatement"] = Field(
79+
"AssayVariantEffectClinicalClassificationStatement",
80+
description="MUST be 'AssayVariantEffectClinicalClassificationStatement'.",
81+
)
82+
subjectVariant: MolecularVariation | CategoricalVariant | IRI = Field(
83+
...,
84+
description="A protein or genomic contextual or canonical molecular variant.",
85+
)
86+
predicate: Literal["hasAssayVariantEffectFor"] = Field(
87+
"hasAssayVariantEffectFor",
88+
description="The relationship declared to hold between the subject and the object of the Statement.",
89+
)
90+
objectAssay: IRI | Coding = Field(
91+
...,
92+
description="The assay that is evaluated for the variant effect. (e.g growth in haploid cell culture protein stability in fluorescence assay)",
93+
)
94+
classification: AveClinicalClassification = Field(
95+
...,
96+
description="The clinical strength of evidence of the variant effect in the assay.",
97+
)
98+
specifiedBy: Method | IRI | None = Field(
99+
None,
100+
description="The method that specifies the clinical strength of evidence of the variant effect in the assay.",
101+
)
102+
103+
104+
class AssayVariantEffectMeasurementStudyResult(StudyResultBase):
105+
"""A StudyResult that reports a variant effect score from a functional assay."""
106+
107+
model_config = ConfigDict(use_enum_values=True)
108+
109+
type: Literal["AssayVariantEffectMeasurementStudyResult"] = Field(
110+
"AssayVariantEffectMeasurementStudyResult",
111+
description="MUST be 'AssayVariantEffectMeasurementStudyResult'.",
112+
)
113+
componentResult: list[StudyResult] | None = Field(
114+
None,
115+
description="Another StudyResult comprised of data items about the same focus as its parent Result, but based on a more narrowly scoped analysis of the foundational data (e.g. an analysis based on data about a subset of the parent Results full study population) .",
116+
)
117+
studyGroup: StudyGroup | None = Field(
118+
None,
119+
description="A description of a specific group or population of subjects interrogated in the ResearchStudy that produced the data captured in the StudyResult.",
120+
)
121+
focusVariant: MolecularVariation | IRI | None = Field(
122+
None,
123+
description="The human mapped representation of the variant that is the subject of the Statement.",
124+
)
125+
score: float | None = Field(
126+
None, description="The score of the variant effect in the assay."
127+
)
128+
specifiedBy: Method | IRI | None = Field(
129+
None,
130+
description="The assay that was used to measure the variant effect with all the various properties",
131+
)
132+
sourceDataSet: list[DataSet] | None = Field(
133+
None, description="The full data set that this measurement is a part of"
134+
)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
"""VA Spec Cohort Allele Frequency (population frequency) Study Result Standard Profile"""
2+
3+
from __future__ import annotations
4+
5+
from typing import Literal
6+
7+
from ga4gh.core.entity_models import (
8+
DataSet,
9+
StudyResult,
10+
StudyResultBase,
11+
)
12+
from ga4gh.vrs.models import Allele
13+
from pydantic import ConfigDict, Field
14+
15+
16+
class CohortAlleleFrequencyStudyResult(StudyResultBase):
17+
"""A StudyResult that reports measures related to the frequency of an Allele in a cohort"""
18+
19+
model_config = ConfigDict(use_enum_values=True)
20+
21+
type: Literal["CohortAlleleFrequencyStudyResult"] = Field(
22+
"CohortAlleleFrequencyStudyResult",
23+
description="MUST be 'CohortAlleleFrequencyStudyResult'.",
24+
)
25+
sourceDataSet: list[DataSet] | None = Field(
26+
None,
27+
description="The dataset from which the CohortAlleleFrequencyStudyResult was reported.",
28+
)
29+
focusAllele: Allele | str = Field(
30+
...,
31+
description="The specific subject or experimental unit in a Study that data in the StudyResult object is about - e.g. a particular variant in a population allele frequency dataset like ExAC or gnomAD.",
32+
)
33+
focusAlleleCount: int = Field(
34+
..., description="The number of occurrences of the focusAllele in the cohort."
35+
)
36+
locusAlleleCount: int = Field(
37+
...,
38+
description="The number of occurrences of all alleles at the locus in the cohort (sometimes referred to as 'allele number')",
39+
)
40+
focusAlleleFrequency: float = Field(
41+
..., description="The frequency of the focusAllele in the cohort."
42+
)
43+
cohort: list[StudyResult] = Field(
44+
..., description="The cohort from which the frequency was derived."
45+
)
46+
subCohortFrequency: list[CohortAlleleFrequencyStudyResult] | None = Field(
47+
None,
48+
description="A list of CohortAlleleFrequency objects describing subcohorts of the cohort currently being described. This creates a recursive relationship and subcohorts can be further subdivided into more subcohorts. This enables, for example, the description of different ancestry groups and sexes among those ancestry groups.",
49+
)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
"""VA Spec Variant Pathogenicity Statement Standard Profile"""
2+
3+
from enum import Enum
4+
from typing import Literal
5+
6+
from ga4gh.cat_vrs.core_models import CategoricalVariant
7+
from ga4gh.core.domain_models import Condition, Gene
8+
from ga4gh.core.entity_models import IRI, Coding, StatementBase
9+
from ga4gh.vrs.models import Variation
10+
from pydantic import ConfigDict, Field
11+
12+
13+
class PenetranceQualifier(str, Enum):
14+
"""Reports the penetrance of the pathogenic effect - i.e. the extent to which the
15+
variant impact is expressed by individuals carrying it as a measure of the
16+
proportion of carriers exhibiting the condition.
17+
"""
18+
19+
HIGH = "high"
20+
LOW = "low"
21+
RISK_ALLELE = "risk allele"
22+
23+
24+
class VariantPathogenicityStatement(StatementBase):
25+
"""A Statement describing the role of a variant in causing an inherited condition."""
26+
27+
model_config = ConfigDict(use_enum_values=True)
28+
29+
type: Literal["VariantPathogenicityStatement"] = Field(
30+
"VariantPathogenicityStatement",
31+
description="MUST be 'VariantPathogenicityStatement'.",
32+
)
33+
subjectVariant: Variation | CategoricalVariant | IRI = Field(
34+
..., description="A variant that is the subject of the Statement."
35+
)
36+
predicate: Literal["isCausalFor"] = Field(
37+
"isCausalFor",
38+
description="The relationship declared to hold between the subject and the object of the Statement.",
39+
)
40+
objectCondition: Condition | IRI = Field(
41+
..., description="The Condition for which the variant impact is stated."
42+
)
43+
penetranceQualifier: PenetranceQualifier | None = Field(
44+
None,
45+
description="Reports the penetrance of the pathogenic effect - i.e. the extent to which the variant impact is expressed by individuals carrying it as a measure of the proportion of carriers exhibiting the condition.",
46+
)
47+
modeOfInheritanceQualifier: list[Coding] | None = Field(
48+
None,
49+
description="Reports a pattern of inheritance expected for the pathogenic effect of the variant. Use HPO terms within the hierarchy of 'HP:0000005' (mode of inheritance) to specify.",
50+
)
51+
geneContextQualifier: Gene | IRI | None = Field(
52+
None,
53+
description="Reports the gene through which the pathogenic effect asserted for the variant is mediated (i.e. it is the variant's impact on this gene that is responsible for causing the condition).",
54+
)

0 commit comments

Comments
 (0)