Skip to content

Add validation for fasta input and tests #20

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 28 commits into from
May 13, 2024
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
4aca65a
Add validation for fasta files
JoseEspinosa May 8, 2024
be8b290
Fix format
JoseEspinosa May 8, 2024
0536976
Add test_fasta profile
JoseEspinosa May 8, 2024
fe1ca80
Avoid error if dict key not set
JoseEspinosa May 8, 2024
38bd3ee
Get rid of parameter from module
JoseEspinosa May 8, 2024
a47e239
Merge remote-tracking branch 'upstream/dev' into updates
JoseEspinosa May 8, 2024
7c48293
Make lint happy
JoseEspinosa May 8, 2024
cf6688b
Merge branch 'dev' into updates
JoseEspinosa May 9, 2024
ca19068
Update bin/fetch_oma_by_sequence.py
JoseEspinosa May 9, 2024
2f2eaf0
Update bin/fetch_oma_by_sequence.py
JoseEspinosa May 9, 2024
0f1ef99
Merge remote-tracking branch 'upstream/dev' into updates
JoseEspinosa May 9, 2024
36d3769
Merge branch 'updates' of https://github.com/JoseEspinosa/reportho in…
JoseEspinosa May 9, 2024
15aa8f6
Branch depending on whether uniprot_id or fasta provided
JoseEspinosa May 10, 2024
bb04c56
Update tests
JoseEspinosa May 10, 2024
a376ae3
Get rid of leftovers of the uniprot_query parameter
JoseEspinosa May 10, 2024
f1f25bd
Update docs
JoseEspinosa May 10, 2024
b899246
Do not use set for ch_versions
JoseEspinosa May 10, 2024
3f472ad
Add test_fasta to CI
JoseEspinosa May 10, 2024
72f80aa
Make nf-core lint happy
JoseEspinosa May 10, 2024
3490c4c
Make prettier happy
JoseEspinosa May 10, 2024
8fe4f82
Make nf-core lint happy (bug in tools until fixed)
JoseEspinosa May 10, 2024
b13725c
Revert changes in .nf-core.yml
JoseEspinosa May 10, 2024
320ec61
Merge remote-tracking branch 'upstream/dev' into updates
JoseEspinosa May 10, 2024
6d270ea
Assign ch_versions
JoseEspinosa May 10, 2024
b849f92
Fix tyop
JoseEspinosa May 13, 2024
195ab8d
Add samplesheet_fasta in assets
JoseEspinosa May 13, 2024
f276a8e
Rename samplesheet_fasta example
JoseEspinosa May 13, 2024
cc50269
Update docs/usage.md
itrujnara May 13, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 16 additions & 2 deletions assets/schema_input.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,22 @@
"type": "string",
"pattern": "^\\S+$",
"errorMessage": "A query must be provided"
},
"fasta": {
"type": "string",
"format": "file-path",
"exists": true,
"pattern": "^\\S+\\.fa(sta)?$",
"errorMessage": "Fasta file must be provided, cannot contain spaces and must have extension '.fa' or '.fasta'"
}
}
},
"anyOf": [
{
"required": ["id", "query"]
},
"required": ["id", "query"]
}
{
"required": ["id", "fasta"]
}
]
}
9 changes: 6 additions & 3 deletions bin/fetch_oma_by_sequence.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ def main() -> None:
raise ValueError("Not enough arguments. Usage: fetch_oma_by_sequence.py <fasta> <id_out> <taxid_out> <exact_out>")

seqs = SeqIO.parse(sys.argv[1], "fasta")

seq = next(seqs).seq

# Only use the first sequence, ignore all others
Expand All @@ -30,11 +31,12 @@ def main() -> None:

# Find the main isoform
for it in json["targets"]:
if it["is_main_isoform"]:
entry = it
break
if it["is_main_isoform"]:
entry = it
break

# Write exact match status

if json["identified_by"] == "exact match":
print("true", file=open(sys.argv[4], 'w'))
else:
Expand All @@ -53,6 +55,7 @@ def main() -> None:
raise ValueError("Isoform not found")

print(entry["canonicalid"], file=open(sys.argv[2], "w"))

print(entry["species"]["taxon_id"], file=open(sys.argv[3], "w"))


Expand Down
1 change: 1 addition & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ process {
// ----------------------

withName: 'FETCH_SEQUENCES_ONLINE' {
ext.args = { params.uniprot_query ? "" : "cat ${query_fasta} >> ${meta.id}_orthologs.fa" }
publishDir = [
path: { "${params.outdir}/sequences" },
mode: params.publish_dir_mode,
Expand Down
32 changes: 32 additions & 0 deletions conf/test_fasta.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Nextflow config file for running minimal tests
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Defines input files and everything required to run a fast and simple pipeline test.

Use as follows:
nextflow run nf-core/reportho -profile test,<docker/singularity> --outdir <OUTDIR>

----------------------------------------------------------------------------------------
*/

params {
config_profile_name = 'Test profile'
config_profile_description = 'Minimal test dataset to check pipeline function'

// Limit resources so that this can run on GitHub Actions
max_cpus = 2
max_memory = '6.GB'
max_time = '6.h'

// Input data
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/reportho/testdata/samplesheet/samplesheet_fasta.csv'

// Other parameters
uniprot_query = false
skip_eggnog = true
min_score = 3
skip_iqtree = true
fastme_bootstrap = 0
}

7 changes: 4 additions & 3 deletions modules/local/fetch_sequences_online.nf
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ process FETCH_SEQUENCES_ONLINE {
input:
tuple val(meta), path(ids), path(query_fasta)


output:
tuple val(meta), path("*_orthologs.fa") , emit: fasta
tuple val(meta), path("*_seq_hits.txt") , emit: hits
Expand All @@ -20,11 +21,11 @@ process FETCH_SEQUENCES_ONLINE {
task.ext.when == null || task.ext.when

script:
prefix = task.ext.prefix ?: meta.id
add_query = params.uniprot_query ? "" : "cat $query_fasta >> ${prefix}_orthologs.fa"
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: meta.id
"""
fetch_sequences.py $ids $prefix > ${prefix}_orthologs.fa
$add_query
$args

cat <<- END_VERSIONS > versions.yml
"${task.process}":
Expand Down
7 changes: 4 additions & 3 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -185,8 +185,9 @@ profiles {
executor.cpus = 4
executor.memory = 8.GB
}
test { includeConfig 'conf/test.config' }
test_full { includeConfig 'conf/test_full.config' }
test { includeConfig 'conf/test.config' }
test_fasta { includeConfig 'conf/test_fasta.config' }
test_full { includeConfig 'conf/test_full.config' }
}

// Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile
Expand All @@ -199,7 +200,7 @@ singularity.registry = 'quay.io'

// Nextflow plugins
plugins {
id 'nf-[email protected]' // Validation of pipeline parameters and creation of an input channel from a sample sheet
id 'nf-[email protected]' // Validation of pipeline parameters and creation of an input channel from a sample sheet
}

// Export these variables to prevent local Python/R libraries from conflicting with those in the container
Expand Down
7 changes: 4 additions & 3 deletions subworkflows/local/get_orthologs.nf
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ workflow GET_ORTHOLOGS {
ch_orthogroups = Channel.empty()

// Preprocessing - find the ID and taxid of the query sequences

if (!params.uniprot_query) {
ch_samplesheet
.map { it -> [it[0], file(it[1])] }
Expand All @@ -45,7 +44,8 @@ workflow GET_ORTHOLOGS {
ch_versions
.mix(IDENTIFY_SEQ_ONLINE.out.versions)
.set { ch_versions }
} else {
}
else {
WRITE_SEQINFO (
ch_samplesheet
)
Expand Down Expand Up @@ -78,7 +78,8 @@ workflow GET_ORTHOLOGS {
ch_versions
.mix(FETCH_OMA_GROUP_LOCAL.out.versions)
.set { ch_versions }
} else {
}
else {
FETCH_OMA_GROUP_ONLINE (
ch_query
)
Expand Down
8 changes: 6 additions & 2 deletions subworkflows/local/utils_nfcore_reportho_pipeline/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,12 @@ workflow PIPELINE_INITIALISATION {
Channel
.fromSamplesheet("input")
.map {
id, query ->
[ id, query ]
id, query, fasta ->
if (query) {
[ id, query ]
} else {
[ id, fasta ]
}
}
.set { ch_samplesheet }

Expand Down
8 changes: 4 additions & 4 deletions workflows/reportho.nf
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,13 @@ workflow REPORTHO {
.mix(GET_ORTHOLOGS.out.versions)
.set { ch_versions }

ch_seqhits = ch_samplesheet.map { [it[0], []] }
ch_seqhits = ch_samplesheet.map { [it[0], []] }
ch_seqmisses = ch_samplesheet.map { [it[0], []] }
ch_strhits = ch_samplesheet.map { [it[0], []] }
ch_strhits = ch_samplesheet.map { [it[0], []] }
ch_strmisses = ch_samplesheet.map { [it[0], []] }
ch_alignment = ch_samplesheet.map { [it[0], []] }
ch_iqtree = ch_samplesheet.map { [it[0], []] }
ch_fastme = ch_samplesheet.map { [it[0], []] }
ch_iqtree = ch_samplesheet.map { [it[0], []] }
ch_fastme = ch_samplesheet.map { [it[0], []] }

if (!params.skip_downstream) {
FETCH_SEQUENCES (
Expand Down
Loading