nf-core · JoseEspinosa · May 13, 2024 · May 8, 2024 · May 8, 2024 · May 8, 2024
diff --git a/assets/schema_input.json b/assets/schema_input.json
@@ -17,8 +17,22 @@
                 "type": "string",
                 "pattern": "^\\S+$",
                 "errorMessage": "A query must be provided"
+            },
+            "fasta": {
+                "type": "string",
+                "format": "file-path",
+                "exists": true,
+                "pattern": "^\\S+\\.fa(sta)?$",
+                "errorMessage": "Fasta file must be provided, cannot contain spaces and must have extension '.fa' or '.fasta'"
             }
+        }
+    },
+    "anyOf": [
+        {
+            "required": ["id", "query"]
         },
-        "required": ["id", "query"]
-    }
+        {
+            "required": ["id", "fasta"]
+        }
+    ]
 }
diff --git a/bin/fetch_oma_by_sequence.py b/bin/fetch_oma_by_sequence.py
@@ -15,6 +15,7 @@ def main() -> None:
         raise ValueError("Not enough arguments. Usage: fetch_oma_by_sequence.py <fasta> <id_out> <taxid_out> <exact_out>")
 
     seqs = SeqIO.parse(sys.argv[1], "fasta")
+
     seq = next(seqs).seq
 
     # Only use the first sequence, ignore all others
@@ -30,11 +31,12 @@ def main() -> None:
 
     # Find the main isoform
     for it in json["targets"]:
-            if it["is_main_isoform"]:
-                entry = it
-                break
+        if it["is_main_isoform"]:
+            entry = it
+            break
 
     # Write exact match status
+
     if json["identified_by"] == "exact match":
         print("true", file=open(sys.argv[4], 'w'))
     else:
@@ -53,6 +55,7 @@ def main() -> None:
                 raise ValueError("Isoform not found")
 
     print(entry["canonicalid"], file=open(sys.argv[2], "w"))
+
     print(entry["species"]["taxon_id"], file=open(sys.argv[3], "w"))
 
 

diff --git a/conf/modules.config b/conf/modules.config
@@ -126,6 +126,7 @@ process {
     // ----------------------
 
     withName: 'FETCH_SEQUENCES_ONLINE' {
+        ext.args   = { params.uniprot_query ? "" : "cat ${query_fasta} >> ${meta.id}_orthologs.fa" }
         publishDir = [
             path: { "${params.outdir}/sequences" },
             mode: params.publish_dir_mode,

diff --git a/conf/test_fasta.config b/conf/test_fasta.config
@@ -0,0 +1,32 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Nextflow config file for running minimal tests
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Defines input files and everything required to run a fast and simple pipeline test.
+
+    Use as follows:
+        nextflow run nf-core/reportho -profile test,<docker/singularity> --outdir <OUTDIR>
+
+----------------------------------------------------------------------------------------
+*/
+
+params {
+    config_profile_name        = 'Test profile'
+    config_profile_description = 'Minimal test dataset to check pipeline function'
+
+    // Limit resources so that this can run on GitHub Actions
+    max_cpus   = 2
+    max_memory = '6.GB'
+    max_time   = '6.h'
+
+    // Input data
+    input  = 'https://raw.githubusercontent.com/nf-core/test-datasets/reportho/testdata/samplesheet/samplesheet_fasta.csv'
+
+    // Other parameters
+    uniprot_query    = false
+    skip_eggnog      = true
+    min_score        = 3
+    skip_iqtree      = true
+    fastme_bootstrap = 0
+}
+
diff --git a/modules/local/fetch_sequences_online.nf b/modules/local/fetch_sequences_online.nf
@@ -10,6 +10,7 @@ process FETCH_SEQUENCES_ONLINE {
     input:
     tuple val(meta), path(ids), path(query_fasta)
 
+
     output:
     tuple val(meta), path("*_orthologs.fa")  , emit: fasta
     tuple val(meta), path("*_seq_hits.txt")  , emit: hits
@@ -20,11 +21,11 @@ process FETCH_SEQUENCES_ONLINE {
     task.ext.when == null || task.ext.when
 
     script:
-    prefix    = task.ext.prefix ?: meta.id
-    add_query = params.uniprot_query ? "" : "cat $query_fasta >> ${prefix}_orthologs.fa"
+    def args = task.ext.args ?: ''
+    prefix   = task.ext.prefix ?: meta.id
     """
     fetch_sequences.py $ids $prefix > ${prefix}_orthologs.fa
-    $add_query
+    $args
 
     cat <<- END_VERSIONS > versions.yml
     "${task.process}":

diff --git a/nextflow.config b/nextflow.config
@@ -185,8 +185,9 @@ profiles {
         executor.cpus          = 4
         executor.memory        = 8.GB
     }
-    test      { includeConfig 'conf/test.config'      }
-    test_full { includeConfig 'conf/test_full.config' }
+    test       { includeConfig 'conf/test.config'       }
+    test_fasta { includeConfig 'conf/test_fasta.config' }
+    test_full  { includeConfig 'conf/test_full.config'  }
 }
 
 // Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile
@@ -199,7 +200,7 @@ singularity.registry = 'quay.io'
 
 // Nextflow plugins
 plugins {
-    id 'nf-[email protected]' // Validation of pipeline parameters and creation of an input channel from a sample sheet
+    id 'nf-[email protected]' // Validation of pipeline parameters and creation of an input channel from a sample sheet
 }
 
 // Export these variables to prevent local Python/R libraries from conflicting with those in the container

diff --git a/subworkflows/local/get_orthologs.nf b/subworkflows/local/get_orthologs.nf
@@ -28,7 +28,6 @@ workflow GET_ORTHOLOGS {
     ch_orthogroups = Channel.empty()
 
     // Preprocessing - find the ID and taxid of the query sequences
-
     if (!params.uniprot_query) {
         ch_samplesheet
             .map { it -> [it[0], file(it[1])] }
@@ -45,7 +44,8 @@ workflow GET_ORTHOLOGS {
         ch_versions
             .mix(IDENTIFY_SEQ_ONLINE.out.versions)
             .set { ch_versions }
-    } else {
+    }
+    else {
         WRITE_SEQINFO (
             ch_samplesheet
         )
@@ -78,7 +78,8 @@ workflow GET_ORTHOLOGS {
             ch_versions
                 .mix(FETCH_OMA_GROUP_LOCAL.out.versions)
                 .set { ch_versions }
-        } else {
+        }
+        else {
             FETCH_OMA_GROUP_ONLINE (
                 ch_query
             )

diff --git a/subworkflows/local/utils_nfcore_reportho_pipeline/main.nf b/subworkflows/local/utils_nfcore_reportho_pipeline/main.nf
@@ -79,8 +79,12 @@ workflow PIPELINE_INITIALISATION {
     Channel
         .fromSamplesheet("input")
         .map {
-            id, query ->
-                [ id, query ]
+            id, query, fasta ->
+                if (query) {
+                    [ id, query ]
+                } else {
+                    [ id, fasta ]
+                }
         }
         .set { ch_samplesheet }
 

diff --git a/workflows/reportho.nf b/workflows/reportho.nf
@@ -44,13 +44,13 @@ workflow REPORTHO {
         .mix(GET_ORTHOLOGS.out.versions)
         .set { ch_versions }
 
-    ch_seqhits = ch_samplesheet.map { [it[0], []] }
+    ch_seqhits   = ch_samplesheet.map { [it[0], []] }
     ch_seqmisses = ch_samplesheet.map { [it[0], []] }
-    ch_strhits = ch_samplesheet.map { [it[0], []] }
+    ch_strhits   = ch_samplesheet.map { [it[0], []] }
     ch_strmisses = ch_samplesheet.map { [it[0], []] }
     ch_alignment = ch_samplesheet.map { [it[0], []] }
-    ch_iqtree = ch_samplesheet.map { [it[0], []] }
-    ch_fastme = ch_samplesheet.map { [it[0], []] }
+    ch_iqtree    = ch_samplesheet.map { [it[0], []] }
+    ch_fastme    = ch_samplesheet.map { [it[0], []] }
 
     if (!params.skip_downstream) {
         FETCH_SEQUENCES (