Skip to content

Commit f33c0ff

Browse files
feat: implement amino acid mutation filters
issue: #287 This also fixed nucleotide mutations: They need to be passed as a string array instead of a comma separated list
1 parent d6747f9 commit f33c0ff

36 files changed

+1375
-524
lines changed

lapis2/src/main/kotlin/org/genspectrum/lapis/OpenApiDocs.kt

Lines changed: 99 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -11,59 +11,81 @@ import org.genspectrum.lapis.config.SequenceFilterFields
1111
import org.genspectrum.lapis.controller.AGGREGATED_GROUP_BY_FIELDS_DESCRIPTION
1212
import org.genspectrum.lapis.controller.AGGREGATED_REQUEST_SCHEMA
1313
import org.genspectrum.lapis.controller.AGGREGATED_RESPONSE_SCHEMA
14+
import org.genspectrum.lapis.controller.AMINO_ACID_MUTATIONS_SCHEMA
1415
import org.genspectrum.lapis.controller.DETAILS_FIELDS_DESCRIPTION
1516
import org.genspectrum.lapis.controller.DETAILS_REQUEST_SCHEMA
1617
import org.genspectrum.lapis.controller.DETAILS_RESPONSE_SCHEMA
17-
import org.genspectrum.lapis.controller.FIELDS_PROPERTY
18-
import org.genspectrum.lapis.controller.MIN_PROPORTION_PROPERTY
18+
import org.genspectrum.lapis.controller.NUCLEOTIDE_MUTATIONS_SCHEMA
1919
import org.genspectrum.lapis.controller.REQUEST_SCHEMA_WITH_MIN_PROPORTION
2020
import org.genspectrum.lapis.controller.SEQUENCE_FILTERS_SCHEMA
21+
import org.genspectrum.lapis.request.AMINO_ACID_MUTATIONS_PROPERTY
22+
import org.genspectrum.lapis.request.AminoAcidMutation
23+
import org.genspectrum.lapis.request.FIELDS_PROPERTY
24+
import org.genspectrum.lapis.request.MIN_PROPORTION_PROPERTY
25+
import org.genspectrum.lapis.request.NUCLEOTIDE_MUTATIONS_PROPERTY
26+
import org.genspectrum.lapis.request.NucleotideMutation
2127
import org.genspectrum.lapis.response.COUNT_PROPERTY
2228

2329
fun buildOpenApiSchema(sequenceFilterFields: SequenceFilterFields, databaseConfig: DatabaseConfig): OpenAPI {
2430
val requestProperties = when (databaseConfig.schema.opennessLevel) {
25-
OpennessLevel.PROTECTED -> sequenceFilterFieldSchemas(sequenceFilterFields) + ("accessKey" to accessKeySchema())
26-
else -> sequenceFilterFieldSchemas(sequenceFilterFields)
31+
OpennessLevel.PROTECTED -> primitiveSequenceFilterFieldSchemas(sequenceFilterFields) +
32+
("accessKey" to accessKeySchema())
33+
34+
else -> primitiveSequenceFilterFieldSchemas(sequenceFilterFields)
2735
}
2836

37+
val sequenceFilters = requestProperties +
38+
Pair(NUCLEOTIDE_MUTATIONS_PROPERTY, nucleotideMutations()) +
39+
Pair(AMINO_ACID_MUTATIONS_PROPERTY, aminoAcidMutations())
40+
2941
return OpenAPI()
3042
.components(
31-
Components().addSchemas(
32-
SEQUENCE_FILTERS_SCHEMA,
33-
Schema<String>()
34-
.type("object")
35-
.description("valid filters for sequence data")
36-
.properties(requestProperties),
37-
).addSchemas(
38-
REQUEST_SCHEMA_WITH_MIN_PROPORTION,
39-
Schema<String>()
40-
.type("object")
41-
.description("valid filters for sequence data")
42-
.properties(requestProperties + Pair(MIN_PROPORTION_PROPERTY, Schema<String>().type("number"))),
43-
).addSchemas(
44-
AGGREGATED_REQUEST_SCHEMA,
45-
requestSchemaWithFields(requestProperties, AGGREGATED_GROUP_BY_FIELDS_DESCRIPTION),
46-
).addSchemas(
47-
DETAILS_REQUEST_SCHEMA,
48-
requestSchemaWithFields(requestProperties, DETAILS_FIELDS_DESCRIPTION),
49-
).addSchemas(
50-
AGGREGATED_RESPONSE_SCHEMA,
51-
Schema<String>()
52-
.type("object")
53-
.description(
54-
"Aggregated sequence data. " +
55-
"If fields are specified, then these fields are also keys in the result. " +
56-
"The key 'count' is always present.",
57-
)
58-
.required(listOf(COUNT_PROPERTY))
59-
.properties(getAggregatedResponseProperties(metadataFieldSchemas(databaseConfig))),
60-
).addSchemas(
61-
DETAILS_RESPONSE_SCHEMA,
62-
Schema<String>()
63-
.type("object")
64-
.description("The response contains the metadata of every sequence matching the sequence filters.")
65-
.properties(metadataFieldSchemas(databaseConfig)),
66-
),
43+
Components()
44+
.addSchemas(
45+
SEQUENCE_FILTERS_SCHEMA,
46+
Schema<String>()
47+
.type("object")
48+
.description("valid filters for sequence data")
49+
.properties(requestProperties),
50+
)
51+
.addSchemas(
52+
REQUEST_SCHEMA_WITH_MIN_PROPORTION,
53+
Schema<String>()
54+
.type("object")
55+
.description("valid filters for sequence data")
56+
.properties(sequenceFilters + Pair(MIN_PROPORTION_PROPERTY, Schema<String>().type("number"))),
57+
)
58+
.addSchemas(
59+
AGGREGATED_REQUEST_SCHEMA,
60+
requestSchemaWithFields(sequenceFilters, AGGREGATED_GROUP_BY_FIELDS_DESCRIPTION),
61+
)
62+
.addSchemas(
63+
DETAILS_REQUEST_SCHEMA,
64+
requestSchemaWithFields(sequenceFilters, DETAILS_FIELDS_DESCRIPTION),
65+
)
66+
.addSchemas(
67+
AGGREGATED_RESPONSE_SCHEMA,
68+
Schema<String>()
69+
.type("object")
70+
.description(
71+
"Aggregated sequence data. " +
72+
"If fields are specified, then these fields are also keys in the result. " +
73+
"The key 'count' is always present.",
74+
)
75+
.required(listOf(COUNT_PROPERTY))
76+
.properties(getAggregatedResponseProperties(metadataFieldSchemas(databaseConfig))),
77+
)
78+
.addSchemas(
79+
DETAILS_RESPONSE_SCHEMA,
80+
Schema<String>()
81+
.type("object")
82+
.description(
83+
"The response contains the metadata of every sequence matching the sequence filters.",
84+
)
85+
.properties(metadataFieldSchemas(databaseConfig)),
86+
)
87+
.addSchemas(NUCLEOTIDE_MUTATIONS_SCHEMA, nucleotideMutations())
88+
.addSchemas(AMINO_ACID_MUTATIONS_SCHEMA, aminoAcidMutations()),
6789
)
6890
}
6991

@@ -78,12 +100,13 @@ private fun mapToOpenApiType(type: MetadataType): String = when (type) {
78100
MetadataType.FLOAT -> "number"
79101
}
80102

81-
private fun sequenceFilterFieldSchemas(sequenceFilterFields: SequenceFilterFields) = sequenceFilterFields.fields
82-
.map { (fieldName, fieldType) -> fieldName to Schema<String>().type(fieldType.openApiType) }
83-
.toMap()
103+
private fun primitiveSequenceFilterFieldSchemas(sequenceFilterFields: SequenceFilterFields) =
104+
sequenceFilterFields.fields
105+
.map { (fieldName, fieldType) -> fieldName to Schema<String>().type(fieldType.openApiType) }
106+
.toMap()
84107

85108
private fun requestSchemaWithFields(
86-
requestProperties: Map<SequenceFilterFieldName, Schema<Any>>,
109+
requestProperties: Map<SequenceFilterFieldName, Schema<out Any>>,
87110
fieldsDescription: String,
88111
): Schema<*> =
89112
Schema<String>()
@@ -109,6 +132,38 @@ private fun accessKeySchema() = Schema<String>()
109132
"the other also grants access to detailed data.",
110133
)
111134

135+
private fun nucleotideMutations() =
136+
Schema<List<NucleotideMutation>>()
137+
.type("array")
138+
.items(
139+
Schema<String>()
140+
.type("string")
141+
.example("sequence1:A123T")
142+
.description(
143+
"""
144+
|A nucleotide mutation in the format "\<sequenceName\>?:\<fromSymbol\>?\<position\>\<toSymbol\>?".
145+
|If the sequenceName is not provided, LAPIS will use the default sequence name.
146+
|The fromSymbol is optional.
147+
|If the toSymbol is not provided, the statement means "has any mutation at the given position".
148+
""".trimMargin(),
149+
),
150+
)
151+
152+
private fun aminoAcidMutations() =
153+
Schema<List<AminoAcidMutation>>()
154+
.type("array")
155+
.items(
156+
Schema<String>()
157+
.type("string")
158+
.example("S:123T")
159+
.description(
160+
"""
161+
|A amino acid mutation in the format "\<gene\>:\<position\>\<toSymbol\>?".
162+
|If the toSymbol is not provided, the statement means "has any mutation at the given position".
163+
""".trimMargin(),
164+
),
165+
)
166+
112167
// This is a function so that the resulting schema can be reused in multiple places. The setters mutate the instance.
113168
private fun fieldsSchema() = Schema<String>()
114169
.type("array")

lapis2/src/main/kotlin/org/genspectrum/lapis/config/SequenceFilterFields.kt

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,22 +4,19 @@ typealias SequenceFilterFieldName = String
44

55
data class SequenceFilterFields(val fields: Map<SequenceFilterFieldName, SequenceFilterFieldType>) {
66
companion object {
7-
private val nucleotideMutationsField = Pair("nucleotideMutations", SequenceFilterFieldType.MutationsList)
8-
97
fun fromDatabaseConfig(databaseConfig: DatabaseConfig): SequenceFilterFields {
108
val metadataFields = databaseConfig.schema.metadata
119
.map(::mapToSequenceFilterFields)
1210
.flatten()
1311
.toMap()
14-
val staticFields = listOf(nucleotideMutationsField)
1512

1613
val featuresFields = if (databaseConfig.schema.features.isEmpty()) {
1714
emptyMap<SequenceFilterFieldName, SequenceFilterFieldType>()
1815
} else {
1916
databaseConfig.schema.features.associate(::mapToSequenceFilterFieldsFromFeatures)
2017
}
2118

22-
return SequenceFilterFields(fields = metadataFields + staticFields + featuresFields)
19+
return SequenceFilterFields(fields = metadataFields + featuresFields)
2320
}
2421
}
2522
}
@@ -55,7 +52,6 @@ sealed class SequenceFilterFieldType(val openApiType: kotlin.String) {
5552
object String : SequenceFilterFieldType("string")
5653
object PangoLineage : SequenceFilterFieldType("string")
5754
object Date : SequenceFilterFieldType("string")
58-
object MutationsList : SequenceFilterFieldType("string")
5955
object VariantQuery : SequenceFilterFieldType("string")
6056
data class DateFrom(val associatedField: SequenceFilterFieldName) : SequenceFilterFieldType("string")
6157
data class DateTo(val associatedField: SequenceFilterFieldName) : SequenceFilterFieldType("string")

0 commit comments

Comments
 (0)