From 9d5dec7197d1321ad44aeb349335d1ca578ceb30 Mon Sep 17 00:00:00 2001 From: jasmezz Date: Thu, 6 Jun 2024 13:52:12 +0200 Subject: [PATCH 01/15] Update deebgc/pipeline module --- modules.json | 2 +- modules/nf-core/deepbgc/pipeline/main.nf | 50 ++- .../deepbgc/pipeline/tests/main.nf.test | 116 ++++++ .../deepbgc/pipeline/tests/main.nf.test.snap | 331 ++++++++++++++++++ .../nf-core/deepbgc/pipeline/tests/tags.yml | 2 + 5 files changed, 489 insertions(+), 12 deletions(-) create mode 100644 modules/nf-core/deepbgc/pipeline/tests/main.nf.test create mode 100644 modules/nf-core/deepbgc/pipeline/tests/main.nf.test.snap create mode 100644 modules/nf-core/deepbgc/pipeline/tests/tags.yml diff --git a/modules.json b/modules.json index 9518e146..d9a80d84 100644 --- a/modules.json +++ b/modules.json @@ -72,7 +72,7 @@ }, "deepbgc/pipeline": { "branch": "master", - "git_sha": "f315f85d9ac6c321f6e3596493fd61019340df2a", + "git_sha": "0ea330bfd93fbbe5b5cae9afab565c73cc508583", "installed_by": ["modules"] }, "fargene": { diff --git a/modules/nf-core/deepbgc/pipeline/main.nf b/modules/nf-core/deepbgc/pipeline/main.nf index 9cc22708..fc72d238 100644 --- a/modules/nf-core/deepbgc/pipeline/main.nf +++ b/modules/nf-core/deepbgc/pipeline/main.nf @@ -12,17 +12,17 @@ process DEEPBGC_PIPELINE { path(db) output: - tuple val(meta), path("${prefix}/README.txt") , optional: true, emit: readme - tuple val(meta), path("${prefix}/LOG.txt") , emit: log - tuple val(meta), path("${prefix}/${genome.baseName}.antismash.json") , optional: true, emit: json - tuple val(meta), path("${prefix}/${genome.baseName}.bgc.gbk") , optional: true, emit: bgc_gbk - tuple val(meta), path("${prefix}/${genome.baseName}.bgc.tsv") , optional: true, emit: bgc_tsv - tuple val(meta), path("${prefix}/${genome.baseName}.full.gbk") , optional: true, emit: full_gbk - tuple val(meta), path("${prefix}/${genome.baseName}.pfam.tsv") , optional: true, emit: pfam_tsv - tuple val(meta), path("${prefix}/evaluation/${genome.baseName}.bgc.png") , optional: true, emit: bgc_png - tuple val(meta), path("${prefix}/evaluation/${genome.baseName}.pr.png") , optional: true, emit: pr_png - tuple val(meta), path("${prefix}/evaluation/${genome.baseName}.roc.png") , optional: true, emit: roc_png - tuple val(meta), path("${prefix}/evaluation/${genome.baseName}.score.png") , optional: true, emit: score_png + tuple val(meta), path("${prefix}/README.txt") , optional: true, emit: readme + tuple val(meta), path("${prefix}/LOG.txt") , emit: log + tuple val(meta), path("${prefix}/${prefix}.antismash.json") , optional: true, emit: json + tuple val(meta), path("${prefix}/${prefix}.bgc.gbk") , optional: true, emit: bgc_gbk + tuple val(meta), path("${prefix}/${prefix}.bgc.tsv") , optional: true, emit: bgc_tsv + tuple val(meta), path("${prefix}/${prefix}.full.gbk") , optional: true, emit: full_gbk + tuple val(meta), path("${prefix}/${prefix}.pfam.tsv") , optional: true, emit: pfam_tsv + tuple val(meta), path("${prefix}/evaluation/${prefix}.bgc.png") , optional: true, emit: bgc_png + tuple val(meta), path("${prefix}/evaluation/${prefix}.pr.png") , optional: true, emit: pr_png + tuple val(meta), path("${prefix}/evaluation/${prefix}.roc.png") , optional: true, emit: roc_png + tuple val(meta), path("${prefix}/evaluation/${prefix}.score.png"), optional: true, emit: score_png path "versions.yml" , emit: versions when: @@ -43,6 +43,34 @@ process DEEPBGC_PIPELINE { mv "${genome.baseName}/" "${prefix}/" fi + for i in \$(find -name '${genome.baseName}*' -type f); do + mv \$i \${i/${genome.baseName}/${prefix}}; + done + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + deepbgc: \$(echo \$(deepbgc info 2>&1 /dev/null/ | grep 'version' | cut -d " " -f3) ) + prodigal: \$(prodigal -v 2>&1 | sed -n 's/Prodigal V\\(.*\\):.*/\\1/p') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + mkdir -p ${prefix}/evaluation + touch ${prefix}/README.txt + touch ${prefix}/LOG.txt + touch ${prefix}/${prefix}.antismash.json + touch ${prefix}/${prefix}.bgc.gbk + touch ${prefix}/${prefix}.bgc.tsv + touch ${prefix}/${prefix}.full.gbk + touch ${prefix}/${prefix}.pfam.tsv + touch ${prefix}/evaluation/${prefix}.bgc.png + touch ${prefix}/evaluation/${prefix}.pr.png + touch ${prefix}/evaluation/${prefix}.roc.png + touch ${prefix}/evaluation/${prefix}.score.png + cat <<-END_VERSIONS > versions.yml "${task.process}": deepbgc: \$(echo \$(deepbgc info 2>&1 /dev/null/ | grep 'version' | cut -d " " -f3) ) diff --git a/modules/nf-core/deepbgc/pipeline/tests/main.nf.test b/modules/nf-core/deepbgc/pipeline/tests/main.nf.test new file mode 100644 index 00000000..190b7e8f --- /dev/null +++ b/modules/nf-core/deepbgc/pipeline/tests/main.nf.test @@ -0,0 +1,116 @@ +nextflow_process { + + name "Test Process DEEPBGC_PIPELINE" + script "../main.nf" + process "DEEPBGC_PIPELINE" + + tag "modules" + tag "modules_nfcore" + tag "deepbgc" + tag "deepbgc/pipeline" + tag "deepbgc/download" + tag "gunzip" + tag "prodigal" + + setup { + run("DEEPBGC_DOWNLOAD") { + script "../..//download/main.nf" + process { + """ + """ + } + } + run("GUNZIP") { + script "../../../gunzip/main.nf" + process { + """ + input[0] = Channel.fromList([ + tuple([ id:'test_gbk', single_end:false ], // meta map + file(params.test_data['bacteroides_fragilis']['illumina']['test1_contigs_fa_gz'], checkIfExists: true)) + ]) + """ + } + } + run("PRODIGAL") { + script "../../../prodigal/main.nf" + process { + """ + input[0] = GUNZIP.out.gunzip + input[1] = 'gbk' + """ + } + } + } + + test("deepbgc pipeline gbk - bacteroides fragilis - test1_contigs.fa.gz") { + + when { + process { + """ + input [0] = PRODIGAL.out.gene_annotations + input [1] = DEEPBGC_DOWNLOAD.out.db + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match("gbk_versions") }, + { assert snapshot(process.out.json).match("gbk_json") }, + { assert path(process.out.log.get(0).get(1)).exists() }, + { assert path(process.out.bgc_gbk.get(0).get(1)).exists() }, + { assert path(process.out.full_gbk.get(0).get(1)).exists() } + ) + } + + } + + test("deepbgc pipeline fa - bacteroides fragilis - test1_contigs.fa.gz") { + + when { + process { + """ + input [0] = GUNZIP.out.gunzip + input [1] = DEEPBGC_DOWNLOAD.out.db + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match("fa_versions") }, + { assert snapshot(process.out.bgc_gbk).match("fa_bgc_gbk") }, + { assert snapshot(process.out.bgc_png).match("fa_bgc_png") }, + { assert snapshot(process.out.score_png).match("fa_score_png") }, + { assert snapshot(process.out.pfam_tsv).match("fa_pfam_tsv") }, + { assert path(process.out.json.get(0).get(1)).exists() }, + { assert path(process.out.log.get(0).get(1)).exists() }, + { assert path(process.out.bgc_tsv.get(0).get(1)).exists() }, + { assert path(process.out.full_gbk.get(0).get(1)).exists() } + ) + } + } + + test("deepbgc pipeline fa - bacteroides fragilis - test1_contigs.fa.gz - stub") { + options "-stub" + when { + process { + """ + input [0] = GUNZIP.out.gunzip + input [1] = DEEPBGC_DOWNLOAD.out.db + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + +} + diff --git a/modules/nf-core/deepbgc/pipeline/tests/main.nf.test.snap b/modules/nf-core/deepbgc/pipeline/tests/main.nf.test.snap new file mode 100644 index 00000000..ef64db97 --- /dev/null +++ b/modules/nf-core/deepbgc/pipeline/tests/main.nf.test.snap @@ -0,0 +1,331 @@ +{ + "gbk_versions": { + "content": [ + [ + "versions.yml:md5,988a1db70bd9e95ad22c25b4d6d40e6e" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.1" + }, + "timestamp": "2023-12-01T18:29:41.728695197" + }, + "fa_bgc_png": { + "content": [ + [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.bgc.png:md5,f4a0fc6cd260e2d7ad16f7a1fa103f96" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-03T16:29:32.389704368" + }, + "deepbgc pipeline fa - bacteroides fragilis - test1_contigs.fa.gz - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "README.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "LOG.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "10": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.score.png:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "11": [ + "versions.yml:md5,988a1db70bd9e95ad22c25b4d6d40e6e" + ], + "2": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.antismash.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.bgc.gbk:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.bgc.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.full.gbk:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.pfam.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.bgc.png:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "8": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.pr.png:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "9": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.roc.png:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bgc_gbk": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.bgc.gbk:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bgc_png": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.bgc.png:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bgc_tsv": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.bgc.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "full_gbk": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.full.gbk:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.antismash.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "LOG.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "pfam_tsv": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.pfam.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "pr_png": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.pr.png:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "readme": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "README.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "roc_png": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.roc.png:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "score_png": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.score.png:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,988a1db70bd9e95ad22c25b4d6d40e6e" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-03T16:32:11.354631831" + }, + "fa_score_png": { + "content": [ + [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.score.png:md5,572e8882031f667580d8c8e13c2cbb91" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-03T16:29:32.401051746" + }, + "fa_pfam_tsv": { + "content": [ + [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.pfam.tsv:md5,1179eb4e6df0c83aaeec18d7d34e7524" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-03T16:29:32.411632144" + }, + "gbk_json": { + "content": [ + [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.antismash.json:md5,889ac1efb6a9a7d7b8c65e4cd2233bba" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-03T16:25:25.861672633" + }, + "fa_versions": { + "content": [ + [ + "versions.yml:md5,988a1db70bd9e95ad22c25b4d6d40e6e" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.1" + }, + "timestamp": "2023-12-01T18:44:16.352023677" + }, + "fa_bgc_gbk": { + "content": [ + [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.bgc.gbk:md5,7fc70dd034903622dae273bf71b402f2" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-03T16:29:32.383560585" + } +} \ No newline at end of file diff --git a/modules/nf-core/deepbgc/pipeline/tests/tags.yml b/modules/nf-core/deepbgc/pipeline/tests/tags.yml new file mode 100644 index 00000000..c6c4e11d --- /dev/null +++ b/modules/nf-core/deepbgc/pipeline/tests/tags.yml @@ -0,0 +1,2 @@ +deepbgc/pipeline: + - "modules/nf-core/deepbgc/pipeline/**" From aa19d6053963baebbbb60992c7bc46899a9bdca6 Mon Sep 17 00:00:00 2001 From: jasmezz Date: Thu, 6 Jun 2024 13:56:49 +0200 Subject: [PATCH 02/15] Update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index d3ec6b99..73d0cc3c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,6 +26,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#365](https://github.com/nf-core/funcscan/pull/365) Fixed AMRFinderPlus module and usage docs for manual database download. (by @jasmezz) - [#371](https://github.com/nf-core/funcscan/pull/371) Fixed AMRFinderPlus parameter `arg_amrfinderplus_name`. (by @m3hdad) - [#376](https://github.com/nf-core/funcscan/pull/376) Fixed an occasional RGI process failure when certain files not produced. (❤️ to @amizeranschi for reporting, fix by @amizeranschi & @jfy133) +- [#386](https://github.com/nf-core/funcscan/pull/386) Updated DeepBGC module to fix output file names. (by @jfy133, @jasmezz) ### `Dependencies` From fbc3061d2b6f2f01f8ebb3da69f8f29ca09bb37e Mon Sep 17 00:00:00 2001 From: jasmezz Date: Fri, 7 Jun 2024 14:05:24 +0200 Subject: [PATCH 03/15] Adding warning for missing deepbgc output, need only to add meta.id --- subworkflows/local/bgc.nf | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/subworkflows/local/bgc.nf b/subworkflows/local/bgc.nf index 455e7719..31d900ee 100644 --- a/subworkflows/local/bgc.nf +++ b/subworkflows/local/bgc.nf @@ -99,7 +99,10 @@ workflow BGC { DEEPBGC_PIPELINE ( fastas, ch_deepbgc_database) ch_versions = ch_versions.mix( DEEPBGC_PIPELINE.out.versions ) - ch_bgcresults_for_combgc = ch_bgcresults_for_combgc.mix( DEEPBGC_PIPELINE.out.bgc_tsv ) + + if ( params.bgc_skip_antismash && params.bgc_skip_gecco && !params.bgc_skip_deepbgc ) { + DEEPBGC_PIPELINE.out.bgc_tsv.collect{it[1]}.ifEmpty(log.warn("[nf-core/funcscan] No hits found by DeepBGC; comBGC summary tool will not be run for sample ${DEEPBGC_PIPELINE.out.bgc_tsv.collect{it[0]}}.")) // TODO: Trying to insert meta.id in the end of the warning string + } } // GECCO From e00a9680f326cb8fd0af32a365027f18ed7fc6db Mon Sep 17 00:00:00 2001 From: jasmezz Date: Mon, 10 Jun 2024 12:08:22 +0200 Subject: [PATCH 04/15] Print warning if no hits found by BGC tools --- subworkflows/local/bgc.nf | 81 +++++++++++++++++++++++++++++---------- workflows/funcscan.nf | 4 +- 2 files changed, 62 insertions(+), 23 deletions(-) diff --git a/subworkflows/local/bgc.nf b/subworkflows/local/bgc.nf index 31d900ee..d2d3d9b6 100644 --- a/subworkflows/local/bgc.nf +++ b/subworkflows/local/bgc.nf @@ -99,10 +99,7 @@ workflow BGC { DEEPBGC_PIPELINE ( fastas, ch_deepbgc_database) ch_versions = ch_versions.mix( DEEPBGC_PIPELINE.out.versions ) - - if ( params.bgc_skip_antismash && params.bgc_skip_gecco && !params.bgc_skip_deepbgc ) { - DEEPBGC_PIPELINE.out.bgc_tsv.collect{it[1]}.ifEmpty(log.warn("[nf-core/funcscan] No hits found by DeepBGC; comBGC summary tool will not be run for sample ${DEEPBGC_PIPELINE.out.bgc_tsv.collect{it[0]}}.")) // TODO: Trying to insert meta.id in the end of the warning string - } + ch_bgcresults_for_combgc = ch_bgcresults_for_combgc.mix( DEEPBGC_PIPELINE.out.bgc_tsv ) } // GECCO @@ -151,28 +148,72 @@ workflow BGC { } // COMBGC - COMBGC ( ch_bgcresults_for_combgc ) - ch_versions = ch_versions.mix( COMBGC.out.versions ) - // COMBGC concatenation - if ( !params.run_taxa_classification ) { - ch_combgc_summaries = COMBGC.out.tsv.map{ it[1] }.collectFile( name: 'combgc_complete_summary.tsv', storeDir: "${params.outdir}/reports/combgc", keepHeader:true ) + // Print warning if BGC tools find no hits + if ( !params.bgc_skip_antismash && !params.bgc_skip_deepbgc && !params.bgc_skip_gecco ) { // If all BGC tools are executed but find no hits + DEEPBGC_PIPELINE.out.bgc_tsv.ifEmpty( + ANTISMASH_ANTISMASHLITE.out.gbk_results.ifEmpty( + GECCO_RUN.out.gbk.ifEmpty ( + DEEPBGC_PIPELINE.out.bgc_gbk // DeepBGC GBK output always exists, take meta from there + .filter { + meta, gbk -> + log.warn("[nf-core/funcscan] BGC workflow: No hits found by BGC tools; comBGC summary tool will not be run for sample ${meta.id}.") + } + ) + ) + ) + } else if ( !params.bgc_skip_antismash && !params.bgc_skip_deepbgc ) { + ANTISMASH_ANTISMASHLITE.out.gbk_results.ifEmpty( + DEEPBGC_PIPELINE.out.bgc_tsv.ifEmpty( + DEEPBGC_PIPELINE.out.bgc_gbk // DeepBGC GBK output always exists, take meta from there + .filter { + meta, gbk -> + log.warn("[nf-core/funcscan] BGC workflow: No hits found by BGC tools; comBGC summary tool will not be run for sample ${meta.id}.") + } + ) + ) + } else if ( !params.bgc_skip_antismash && !params.bgc_skip_gecco ) { + ANTISMASH_ANTISMASHLITE.out.gbk_results.ifEmpty( + GECCO_RUN.out.gbk.ifEmpty ( + GECCO_RUN.out.genes // GECCO ".genes.tsv" output always exists, take meta from there + .filter { + meta, tsv -> + log.warn("[nf-core/funcscan] BGC workflow: No hits found by BGC tools; comBGC summary tool will not be run for sample ${meta.id}.") + } + ) + ) + } else if ( !params.bgc_skip_antismash ) { + ANTISMASH_ANTISMASHLITE.out.gbk_results.ifEmpty( + ANTISMASH_ANTISMASHLITE.out.gbk_input + .filter { + meta, tsv -> // AntiSMASH gbk_input always exists, take meta from there + log.warn("[nf-core/funcscan] BGC workflow: No hits found by BGC tools; comBGC summary tool will not be run for sample ${meta.id}.") + } + ) } else { - ch_combgc_summaries = COMBGC.out.tsv.map{ it[1] }.collectFile( name: 'combgc_complete_summary.tsv', keepHeader:true ) - } + COMBGC ( ch_bgcresults_for_combgc ) + ch_versions = ch_versions.mix( COMBGC.out.versions ) - // MERGE_TAXONOMY - if ( params.run_taxa_classification ) { + // COMBGC concatenation + if ( !params.run_taxa_classification ) { + ch_combgc_summaries = COMBGC.out.tsv.map{ it[1] }.collectFile( name: 'combgc_complete_summary.tsv', storeDir: "${params.outdir}/reports/combgc", keepHeader:true ) + } else { + ch_combgc_summaries = COMBGC.out.tsv.map{ it[1] }.collectFile( name: 'combgc_complete_summary.tsv', keepHeader:true ) + } - ch_mmseqs_taxonomy_list = tsvs.map{ it[1] }.collect() - MERGE_TAXONOMY_COMBGC( ch_combgc_summaries, ch_mmseqs_taxonomy_list ) - ch_versions = ch_versions.mix( MERGE_TAXONOMY_COMBGC.out.versions ) + // MERGE_TAXONOMY + if ( params.run_taxa_classification ) { - ch_tabix_input = Channel.of( [ 'id':'combgc_complete_summary_taxonomy' ] ) - .combine(MERGE_TAXONOMY_COMBGC.out.tsv) + ch_mmseqs_taxonomy_list = tsvs.map{ it[1] }.collect() + MERGE_TAXONOMY_COMBGC( ch_combgc_summaries, ch_mmseqs_taxonomy_list ) + ch_versions = ch_versions.mix( MERGE_TAXONOMY_COMBGC.out.versions ) - BGC_TABIX_BGZIP( ch_tabix_input ) - ch_versions = ch_versions.mix( BGC_TABIX_BGZIP.out.versions ) + ch_tabix_input = Channel.of( [ 'id':'combgc_complete_summary_taxonomy' ] ) + .combine(MERGE_TAXONOMY_COMBGC.out.tsv) + + BGC_TABIX_BGZIP( ch_tabix_input ) + ch_versions = ch_versions.mix( BGC_TABIX_BGZIP.out.versions ) + } } emit: diff --git a/workflows/funcscan.nf b/workflows/funcscan.nf index 7dd7d98f..e0a8cac5 100644 --- a/workflows/funcscan.nf +++ b/workflows/funcscan.nf @@ -122,7 +122,7 @@ workflow FUNCSCAN { */ // Some tools require annotated FASTAs - if ( ( params.run_arg_screening && !params.arg_skip_deeparg ) || ( params.run_amp_screening && ( !params.amp_skip_hmmsearch || !params.amp_skip_amplify || !params.amp_skip_ampir ) ) || ( params.run_bgc_screening && ( !params.bgc_skip_hmmsearch || !params.bgc_skip_antismash ) ) ) { + if ( ( params.run_arg_screening && !params.arg_skip_deeparg ) || ( params.run_amp_screening && ( !params.amp_skip_hmmsearch || !params.amp_skip_amplify || !params.amp_skip_ampir ) ) || ( params.run_bgc_screening && ( !params.bgc_skip_hmmsearch || !params.bgc_skip_antismash || !params.bgc_skip_deepbgc || !params.bgc_skip_gecco ) ) ) { ANNOTATION( ch_input_for_annotation ) ch_versions = ch_versions.mix( ANNOTATION.out.versions ) ch_multiqc_files = ch_multiqc_files.mix( ANNOTATION.out.multiqc_files ) @@ -362,8 +362,6 @@ workflow FUNCSCAN { ) ) - ch_multiqc_files = ch_multiqc_files.mix( ANNOTATION.out.multiqc_files.collect{it[1]}.ifEmpty([]) ) - MULTIQC ( ch_multiqc_files.collect(), ch_multiqc_config.toList(), From 3fb1dae0b87a9e3c3fb7374063d4eb82f97ca03b Mon Sep 17 00:00:00 2001 From: jasmezz Date: Mon, 10 Jun 2024 13:32:21 +0200 Subject: [PATCH 05/15] Add missing if-else statements; add comments --- subworkflows/local/bgc.nf | 40 ++++++++++++++++++++++++++++++++------- 1 file changed, 33 insertions(+), 7 deletions(-) diff --git a/subworkflows/local/bgc.nf b/subworkflows/local/bgc.nf index d2d3d9b6..672c872b 100644 --- a/subworkflows/local/bgc.nf +++ b/subworkflows/local/bgc.nf @@ -157,37 +157,63 @@ workflow BGC { DEEPBGC_PIPELINE.out.bgc_gbk // DeepBGC GBK output always exists, take meta from there .filter { meta, gbk -> - log.warn("[nf-core/funcscan] BGC workflow: No hits found by BGC tools; comBGC summary tool will not be run for sample ${meta.id}.") + log.warn("[nf-core/funcscan] BGC workflow: No hits found by DeepBGC, antiSMASH, and GECCO; comBGC summary tool will not be run for sample ${meta.id}.") } ) ) ) - } else if ( !params.bgc_skip_antismash && !params.bgc_skip_deepbgc ) { + } else if ( !params.bgc_skip_antismash && !params.bgc_skip_gecco ) { // If only antiSMASH and GECCO are executed but find no hits + ANTISMASH_ANTISMASHLITE.out.gbk_results.ifEmpty( + GECCO_RUN.out.gbk.ifEmpty ( + GECCO_RUN.out.genes // GECCO ".genes.tsv" output always exists, take meta from there + .filter { + meta, tsv -> + log.warn("[nf-core/funcscan] BGC workflow: No hits found by antiSMASH and GECCO; comBGC summary tool will not be run for sample ${meta.id}.") + } + ) + ) + } else if ( !params.bgc_skip_antismash && !params.bgc_skip_deepbgc ) { // If only antiSMASH and DeepBGC are executed but find no hits ANTISMASH_ANTISMASHLITE.out.gbk_results.ifEmpty( DEEPBGC_PIPELINE.out.bgc_tsv.ifEmpty( DEEPBGC_PIPELINE.out.bgc_gbk // DeepBGC GBK output always exists, take meta from there .filter { meta, gbk -> - log.warn("[nf-core/funcscan] BGC workflow: No hits found by BGC tools; comBGC summary tool will not be run for sample ${meta.id}.") + log.warn("[nf-core/funcscan] BGC workflow: No hits found by antiSMASH and DeepBGC; comBGC summary tool will not be run for sample ${meta.id}.") } ) ) - } else if ( !params.bgc_skip_antismash && !params.bgc_skip_gecco ) { + } else if ( !params.bgc_skip_gecco && !params.bgc_skip_deepbgc ) { // If only GECCO and DeepBGC are executed but find no hits ANTISMASH_ANTISMASHLITE.out.gbk_results.ifEmpty( GECCO_RUN.out.gbk.ifEmpty ( GECCO_RUN.out.genes // GECCO ".genes.tsv" output always exists, take meta from there .filter { meta, tsv -> - log.warn("[nf-core/funcscan] BGC workflow: No hits found by BGC tools; comBGC summary tool will not be run for sample ${meta.id}.") + log.warn("[nf-core/funcscan] BGC workflow: No hits found by GECCO and DeepBGC; comBGC summary tool will not be run for sample ${meta.id}.") } ) ) - } else if ( !params.bgc_skip_antismash ) { + } else if ( !params.bgc_skip_deepbgc ) { // If only DeepBGC is executed but finds no hits + DEEPBGC_PIPELINE.out.bgc_tsv.ifEmpty( + DEEPBGC_PIPELINE.out.bgc_gbk // DeepBGC GBK output always exists, take meta from there + .filter { + meta, gbk -> + log.warn("[nf-core/funcscan] BGC workflow: No hits found by DeepBGC; comBGC summary tool will not be run for sample ${meta.id}.") + } + ) + } else if ( !params.bgc_skip_gecco ) { // If only GECCO is executed but finds no hits + GECCO_RUN.out.gbk.ifEmpty ( + GECCO_RUN.out.genes // GECCO ".genes.tsv" output always exists, take meta from there + .filter { + meta, tsv -> + log.warn("[nf-core/funcscan] BGC workflow: No hits found by GECCO; comBGC summary tool will not be run for sample ${meta.id}.") + } + ) + } else if ( !params.bgc_skip_antismash ) { // If only antiSMASH is executed but finds no hits ANTISMASH_ANTISMASHLITE.out.gbk_results.ifEmpty( ANTISMASH_ANTISMASHLITE.out.gbk_input .filter { meta, tsv -> // AntiSMASH gbk_input always exists, take meta from there - log.warn("[nf-core/funcscan] BGC workflow: No hits found by BGC tools; comBGC summary tool will not be run for sample ${meta.id}.") + log.warn("[nf-core/funcscan] BGC workflow: No hits found by antiSMASH; comBGC summary tool will not be run for sample ${meta.id}.") } ) } else { From f6b792e7ababdfa4d3d4320449c41ead75a0ef5a Mon Sep 17 00:00:00 2001 From: jasmezz Date: Wed, 12 Jun 2024 13:47:43 +0200 Subject: [PATCH 06/15] Alternative way of warning if no BGC hits are found --- subworkflows/local/bgc.nf | 86 +++++++-------------------------------- 1 file changed, 14 insertions(+), 72 deletions(-) diff --git a/subworkflows/local/bgc.nf b/subworkflows/local/bgc.nf index 672c872b..b4fcf3dc 100644 --- a/subworkflows/local/bgc.nf +++ b/subworkflows/local/bgc.nf @@ -80,6 +80,9 @@ workflow BGC { meta, files -> [meta, files.flatten()] } + ANTISMASH_ANTISMASHLITE.out.gbk_results.ifEmpty( + ch_antismashresults_for_combgc = Channel.empty() + ) ch_bgcresults_for_combgc = ch_bgcresults_for_combgc.mix( ch_antismashresults_for_combgc ) } @@ -148,77 +151,17 @@ workflow BGC { } // COMBGC - - // Print warning if BGC tools find no hits - if ( !params.bgc_skip_antismash && !params.bgc_skip_deepbgc && !params.bgc_skip_gecco ) { // If all BGC tools are executed but find no hits - DEEPBGC_PIPELINE.out.bgc_tsv.ifEmpty( - ANTISMASH_ANTISMASHLITE.out.gbk_results.ifEmpty( - GECCO_RUN.out.gbk.ifEmpty ( - DEEPBGC_PIPELINE.out.bgc_gbk // DeepBGC GBK output always exists, take meta from there - .filter { - meta, gbk -> - log.warn("[nf-core/funcscan] BGC workflow: No hits found by DeepBGC, antiSMASH, and GECCO; comBGC summary tool will not be run for sample ${meta.id}.") - } - ) - ) - ) - } else if ( !params.bgc_skip_antismash && !params.bgc_skip_gecco ) { // If only antiSMASH and GECCO are executed but find no hits - ANTISMASH_ANTISMASHLITE.out.gbk_results.ifEmpty( - GECCO_RUN.out.gbk.ifEmpty ( - GECCO_RUN.out.genes // GECCO ".genes.tsv" output always exists, take meta from there - .filter { - meta, tsv -> - log.warn("[nf-core/funcscan] BGC workflow: No hits found by antiSMASH and GECCO; comBGC summary tool will not be run for sample ${meta.id}.") - } - ) - ) - } else if ( !params.bgc_skip_antismash && !params.bgc_skip_deepbgc ) { // If only antiSMASH and DeepBGC are executed but find no hits - ANTISMASH_ANTISMASHLITE.out.gbk_results.ifEmpty( - DEEPBGC_PIPELINE.out.bgc_tsv.ifEmpty( - DEEPBGC_PIPELINE.out.bgc_gbk // DeepBGC GBK output always exists, take meta from there - .filter { - meta, gbk -> - log.warn("[nf-core/funcscan] BGC workflow: No hits found by antiSMASH and DeepBGC; comBGC summary tool will not be run for sample ${meta.id}.") - } - ) - ) - } else if ( !params.bgc_skip_gecco && !params.bgc_skip_deepbgc ) { // If only GECCO and DeepBGC are executed but find no hits - ANTISMASH_ANTISMASHLITE.out.gbk_results.ifEmpty( - GECCO_RUN.out.gbk.ifEmpty ( - GECCO_RUN.out.genes // GECCO ".genes.tsv" output always exists, take meta from there - .filter { - meta, tsv -> - log.warn("[nf-core/funcscan] BGC workflow: No hits found by GECCO and DeepBGC; comBGC summary tool will not be run for sample ${meta.id}.") - } - ) - ) - } else if ( !params.bgc_skip_deepbgc ) { // If only DeepBGC is executed but finds no hits - DEEPBGC_PIPELINE.out.bgc_tsv.ifEmpty( - DEEPBGC_PIPELINE.out.bgc_gbk // DeepBGC GBK output always exists, take meta from there - .filter { - meta, gbk -> - log.warn("[nf-core/funcscan] BGC workflow: No hits found by DeepBGC; comBGC summary tool will not be run for sample ${meta.id}.") - } - ) - } else if ( !params.bgc_skip_gecco ) { // If only GECCO is executed but finds no hits - GECCO_RUN.out.gbk.ifEmpty ( - GECCO_RUN.out.genes // GECCO ".genes.tsv" output always exists, take meta from there - .filter { - meta, tsv -> - log.warn("[nf-core/funcscan] BGC workflow: No hits found by GECCO; comBGC summary tool will not be run for sample ${meta.id}.") - } - ) - } else if ( !params.bgc_skip_antismash ) { // If only antiSMASH is executed but finds no hits - ANTISMASH_ANTISMASHLITE.out.gbk_results.ifEmpty( - ANTISMASH_ANTISMASHLITE.out.gbk_input - .filter { - meta, tsv -> // AntiSMASH gbk_input always exists, take meta from there - log.warn("[nf-core/funcscan] BGC workflow: No hits found by antiSMASH; comBGC summary tool will not be run for sample ${meta.id}.") - } - ) - } else { - COMBGC ( ch_bgcresults_for_combgc ) - ch_versions = ch_versions.mix( COMBGC.out.versions ) + COMBGC ( + ch_bgcresults_for_combgc + ) + + ch_bgcresults_for_combgc.ifEmpty( + fastas.filter { + meta, fna -> + log.warn("[nf-core/funcscan] BGC workflow: No hits found by BGC tools; comBGC summary tool will not be run for sample ${meta.id}.") + } + ) + ch_versions = ch_versions.mix( COMBGC.out.versions ) // COMBGC concatenation if ( !params.run_taxa_classification ) { @@ -240,7 +183,6 @@ workflow BGC { BGC_TABIX_BGZIP( ch_tabix_input ) ch_versions = ch_versions.mix( BGC_TABIX_BGZIP.out.versions ) } - } emit: versions = ch_versions From 291b6aa8dd670d168e6aa1e5130b04ec7ce8b1a8 Mon Sep 17 00:00:00 2001 From: jasmezz Date: Wed, 12 Jun 2024 13:49:24 +0200 Subject: [PATCH 07/15] Fix formatting --- subworkflows/local/bgc.nf | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/subworkflows/local/bgc.nf b/subworkflows/local/bgc.nf index b4fcf3dc..ce059673 100644 --- a/subworkflows/local/bgc.nf +++ b/subworkflows/local/bgc.nf @@ -151,9 +151,7 @@ workflow BGC { } // COMBGC - COMBGC ( - ch_bgcresults_for_combgc - ) + COMBGC ( ch_bgcresults_for_combgc ) ch_bgcresults_for_combgc.ifEmpty( fastas.filter { @@ -163,26 +161,26 @@ workflow BGC { ) ch_versions = ch_versions.mix( COMBGC.out.versions ) - // COMBGC concatenation - if ( !params.run_taxa_classification ) { - ch_combgc_summaries = COMBGC.out.tsv.map{ it[1] }.collectFile( name: 'combgc_complete_summary.tsv', storeDir: "${params.outdir}/reports/combgc", keepHeader:true ) - } else { - ch_combgc_summaries = COMBGC.out.tsv.map{ it[1] }.collectFile( name: 'combgc_complete_summary.tsv', keepHeader:true ) - } + // COMBGC concatenation + if ( !params.run_taxa_classification ) { + ch_combgc_summaries = COMBGC.out.tsv.map{ it[1] }.collectFile( name: 'combgc_complete_summary.tsv', storeDir: "${params.outdir}/reports/combgc", keepHeader:true ) + } else { + ch_combgc_summaries = COMBGC.out.tsv.map{ it[1] }.collectFile( name: 'combgc_complete_summary.tsv', keepHeader:true ) + } - // MERGE_TAXONOMY - if ( params.run_taxa_classification ) { + // MERGE_TAXONOMY + if ( params.run_taxa_classification ) { - ch_mmseqs_taxonomy_list = tsvs.map{ it[1] }.collect() - MERGE_TAXONOMY_COMBGC( ch_combgc_summaries, ch_mmseqs_taxonomy_list ) - ch_versions = ch_versions.mix( MERGE_TAXONOMY_COMBGC.out.versions ) + ch_mmseqs_taxonomy_list = tsvs.map{ it[1] }.collect() + MERGE_TAXONOMY_COMBGC( ch_combgc_summaries, ch_mmseqs_taxonomy_list ) + ch_versions = ch_versions.mix( MERGE_TAXONOMY_COMBGC.out.versions ) - ch_tabix_input = Channel.of( [ 'id':'combgc_complete_summary_taxonomy' ] ) - .combine(MERGE_TAXONOMY_COMBGC.out.tsv) + ch_tabix_input = Channel.of( [ 'id':'combgc_complete_summary_taxonomy' ] ) + .combine(MERGE_TAXONOMY_COMBGC.out.tsv) - BGC_TABIX_BGZIP( ch_tabix_input ) - ch_versions = ch_versions.mix( BGC_TABIX_BGZIP.out.versions ) - } + BGC_TABIX_BGZIP( ch_tabix_input ) + ch_versions = ch_versions.mix( BGC_TABIX_BGZIP.out.versions ) + } emit: versions = ch_versions From a325aaf82de6ab68e782512ee287a93574bf25ae Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Mon, 17 Jun 2024 08:15:57 +0200 Subject: [PATCH 08/15] Move ANNOTATION MQC mixing to right place --- workflows/funcscan.nf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/workflows/funcscan.nf b/workflows/funcscan.nf index e0a8cac5..acfa9239 100644 --- a/workflows/funcscan.nf +++ b/workflows/funcscan.nf @@ -125,7 +125,6 @@ workflow FUNCSCAN { if ( ( params.run_arg_screening && !params.arg_skip_deeparg ) || ( params.run_amp_screening && ( !params.amp_skip_hmmsearch || !params.amp_skip_amplify || !params.amp_skip_ampir ) ) || ( params.run_bgc_screening && ( !params.bgc_skip_hmmsearch || !params.bgc_skip_antismash || !params.bgc_skip_deepbgc || !params.bgc_skip_gecco ) ) ) { ANNOTATION( ch_input_for_annotation ) ch_versions = ch_versions.mix( ANNOTATION.out.versions ) - ch_multiqc_files = ch_multiqc_files.mix( ANNOTATION.out.multiqc_files ) ch_new_annotation = ch_input_for_annotation .join( ANNOTATION.out.faa ) @@ -362,6 +361,8 @@ workflow FUNCSCAN { ) ) + ch_multiqc_files = ch_multiqc_files.mix( ANNOTATION.out.multiqc_files ) + MULTIQC ( ch_multiqc_files.collect(), ch_multiqc_config.toList(), From c9ce4dd4b050bd42dab660086a5a6e282ca65c34 Mon Sep 17 00:00:00 2001 From: jasmezz Date: Mon, 17 Jun 2024 14:33:11 +0200 Subject: [PATCH 09/15] Fix multiqc annotation channel, fix prepped_input, have all BGC tools use GBK as input --- subworkflows/local/annotation.nf | 6 +++--- workflows/funcscan.nf | 8 +++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/subworkflows/local/annotation.nf b/subworkflows/local/annotation.nf index 74bf9666..c593dcb8 100644 --- a/subworkflows/local/annotation.nf +++ b/subworkflows/local/annotation.nf @@ -22,10 +22,10 @@ workflow ANNOTATION { ch_versions = Channel.empty() ch_multiqc_files = Channel.empty() - if ( params.annotation_tool == "pyrodigal" || ( params.annotation_tool == "prodigal" && params.run_bgc_screening == true && !params.bgc_skip_antismash ) ) { // Need to use pyrodigal for antiSMASH because prodigal GBK annotation format is incompatible with antiSMASH. + if ( params.annotation_tool == "pyrodigal" || ( params.annotation_tool == "prodigal" && params.run_bgc_screening == true && ( !params.bgc_skip_gecco || !params.bgc_skip_antismash || !params.bgc_skip_deepbgc ) ) ) { // Need to use Pyrodigal for BGC tools because Prodigal GBK annotation format is incompatible with them. - if ( params.annotation_tool == "prodigal" && params.run_bgc_screening == true && !params.bgc_skip_antismash ) { - log.warn("[nf-core/funcscan] Switching annotation tool to: pyrodigal. This is because prodigal annotations (in GBK format) are incompatible with antiSMASH. If you specifically wish to run prodigal instead, please skip antiSMASH or provide a pre-annotated GBK file in the samplesheet.") + if ( params.annotation_tool == "prodigal" ) { + log.warn("[nf-core/funcscan] Switching annotation tool to: Pyrodigal. This is because Prodigal annotations (in GBK format) are incompatible with antiSMASH, DeepBGC, and GECCO. If you specifically wish to run Prodigal instead, please skip BGC workflow or provide a pre-annotated GBK file in the samplesheet.") } PYRODIGAL ( fasta, "gbk" ) diff --git a/workflows/funcscan.nf b/workflows/funcscan.nf index acfa9239..374b160e 100644 --- a/workflows/funcscan.nf +++ b/workflows/funcscan.nf @@ -122,20 +122,24 @@ workflow FUNCSCAN { */ // Some tools require annotated FASTAs - if ( ( params.run_arg_screening && !params.arg_skip_deeparg ) || ( params.run_amp_screening && ( !params.amp_skip_hmmsearch || !params.amp_skip_amplify || !params.amp_skip_ampir ) ) || ( params.run_bgc_screening && ( !params.bgc_skip_hmmsearch || !params.bgc_skip_antismash || !params.bgc_skip_deepbgc || !params.bgc_skip_gecco ) ) ) { + if ( ( params.run_arg_screening && !params.arg_skip_deeparg ) || ( params.run_amp_screening && ( !params.amp_skip_hmmsearch || !params.amp_skip_amplify || !params.amp_skip_ampir ) ) || ( params.run_bgc_screening ) ) { ANNOTATION( ch_input_for_annotation ) ch_versions = ch_versions.mix( ANNOTATION.out.versions ) + ch_multiqc_files = ch_multiqc_files.mix( ANNOTATION.out.multiqc_files.collect{it[1]} ) + ch_new_annotation = ch_input_for_annotation .join( ANNOTATION.out.faa ) .join( ANNOTATION.out.gbk ) } else { ch_new_annotation = Channel.empty() + ch_multiqc_files = ch_multiqc_files.mix( Channel.of([]) ) } // Mix back the preannotated samples with the newly annotated ones ch_prepped_input = ch_intermediate_input.preannotated + .mix( ch_intermediate_input.fastas ) .mix( ch_new_annotation ) .filter { meta, fasta, faa, gbk -> meta.category != 'long' } .multiMap { @@ -361,8 +365,6 @@ workflow FUNCSCAN { ) ) - ch_multiqc_files = ch_multiqc_files.mix( ANNOTATION.out.multiqc_files ) - MULTIQC ( ch_multiqc_files.collect(), ch_multiqc_config.toList(), From 9e80d2da8a452ad5147903801e6629c4a206efc4 Mon Sep 17 00:00:00 2001 From: jasmezz Date: Mon, 17 Jun 2024 17:01:40 +0200 Subject: [PATCH 10/15] Fix BGC tools input channel --- subworkflows/local/bgc.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/bgc.nf b/subworkflows/local/bgc.nf index ce059673..f8e53bf8 100644 --- a/subworkflows/local/bgc.nf +++ b/subworkflows/local/bgc.nf @@ -100,14 +100,14 @@ workflow BGC { ch_versions = ch_versions.mix( DEEPBGC_DOWNLOAD.out.versions ) } - DEEPBGC_PIPELINE ( fastas, ch_deepbgc_database) + DEEPBGC_PIPELINE ( gbks, ch_deepbgc_database) ch_versions = ch_versions.mix( DEEPBGC_PIPELINE.out.versions ) ch_bgcresults_for_combgc = ch_bgcresults_for_combgc.mix( DEEPBGC_PIPELINE.out.bgc_tsv ) } // GECCO if ( !params.bgc_skip_gecco ) { - ch_gecco_input = fastas.groupTuple() + ch_gecco_input = gbks.groupTuple() .multiMap { fastas: [ it[0], it[1], [] ] } From ddd4441c526e24e71af5d79551fe90199fecd4d7 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Wed, 19 Jun 2024 09:40:25 +0200 Subject: [PATCH 11/15] Remove ugly warning from nf-validation --- nextflow.config | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index 8c368156..c676b9e5 100644 --- a/nextflow.config +++ b/nextflow.config @@ -31,6 +31,9 @@ params { version = false pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/' + // To stop the random warning coming from nf-validation, remove on upgrade to nf-schema + monochromeLogs = null + // Taxonomy classification options run_taxa_classification = false taxa_classification_tool = 'mmseqs2' @@ -244,7 +247,7 @@ params { // Schema validation default options validationFailUnrecognisedParams = false validationLenientMode = false - validationSchemaIgnoreParams = 'genomes,igenomes_base,fasta' + validationSchemaIgnoreParams = 'genomes,igenomes_base,fasta,monochromeLogs' validationShowHiddenParams = false validate_params = true From bfb76e88b71e48d26687befc80d987e7e343e7b1 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Wed, 19 Jun 2024 09:40:50 +0200 Subject: [PATCH 12/15] Move multiQC file annotation mixing to the right place and fix duplicate process running --- workflows/funcscan.nf | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/workflows/funcscan.nf b/workflows/funcscan.nf index 98e2a120..dd5028f8 100644 --- a/workflows/funcscan.nf +++ b/workflows/funcscan.nf @@ -126,20 +126,16 @@ workflow FUNCSCAN { ANNOTATION( ch_input_for_annotation ) ch_versions = ch_versions.mix( ANNOTATION.out.versions ) - ch_multiqc_files = ch_multiqc_files.mix( ANNOTATION.out.multiqc_files.collect{it[1]} ) - ch_new_annotation = ch_input_for_annotation .join( ANNOTATION.out.faa ) .join( ANNOTATION.out.gbk ) } else { ch_new_annotation = Channel.empty() - ch_multiqc_files = ch_multiqc_files.mix( Channel.of([]) ) } // Mix back the preannotated samples with the newly annotated ones ch_prepped_input = ch_intermediate_input.preannotated - .mix( ch_intermediate_input.fastas ) .mix( ch_new_annotation ) .filter { meta, fasta, faa, gbk -> meta.category != 'long' } .multiMap { @@ -367,6 +363,10 @@ workflow FUNCSCAN { ) ) + if ( ( params.run_arg_screening && !params.arg_skip_deeparg ) || ( params.run_amp_screening && ( !params.amp_skip_hmmsearch || !params.amp_skip_amplify || !params.amp_skip_ampir ) ) || ( params.run_bgc_screening ) ) { + ch_multiqc_files = ch_multiqc_files.mix( ANNOTATION.out.multiqc_files.collect{it[1]} ) + } + MULTIQC ( ch_multiqc_files.collect(), ch_multiqc_config.toList(), From 18afe1fcfd040a34e541cf5d9a16f5a740db4b53 Mon Sep 17 00:00:00 2001 From: jasmezz Date: Thu, 20 Jun 2024 11:34:20 +0200 Subject: [PATCH 13/15] Fix comBGC warning --- subworkflows/local/bgc.nf | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/subworkflows/local/bgc.nf b/subworkflows/local/bgc.nf index f8e53bf8..98e22f92 100644 --- a/subworkflows/local/bgc.nf +++ b/subworkflows/local/bgc.nf @@ -151,14 +151,17 @@ workflow BGC { } // COMBGC - COMBGC ( ch_bgcresults_for_combgc ) - - ch_bgcresults_for_combgc.ifEmpty( - fastas.filter { - meta, fna -> - log.warn("[nf-core/funcscan] BGC workflow: No hits found by BGC tools; comBGC summary tool will not be run for sample ${meta.id}.") + ch_bgc_warning = fastas + .map { + meta, fasta -> + "[nf-core/funcscan] BGC workflow: No hits found by BGC tools; comBGC summary tool will not be run for sample: ${meta.id}" } - ) + .collect() + + ch_bgcresults_for_combgc + .ifEmpty { log.warn(ch_bgc_warning.val[0]) } + + COMBGC ( ch_bgcresults_for_combgc ) ch_versions = ch_versions.mix( COMBGC.out.versions ) // COMBGC concatenation From 29701018c65a1694a96c4196af6dae5801529d0e Mon Sep 17 00:00:00 2001 From: jasmezz Date: Thu, 20 Jun 2024 17:29:58 +0200 Subject: [PATCH 14/15] Fix warning if no BGC hits found --- subworkflows/local/bgc.nf | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/subworkflows/local/bgc.nf b/subworkflows/local/bgc.nf index 98e22f92..99d371e0 100644 --- a/subworkflows/local/bgc.nf +++ b/subworkflows/local/bgc.nf @@ -73,16 +73,22 @@ workflow BGC { ANTISMASH_ANTISMASHLITE ( gbks, ch_antismash_databases, ch_antismash_directory, [] ) ch_versions = ch_versions.mix( ANTISMASH_ANTISMASHLITE.out.versions ) - ch_antismashresults_for_combgc = ANTISMASH_ANTISMASHLITE.out.knownclusterblast_dir + ch_antismashresults = ANTISMASH_ANTISMASHLITE.out.knownclusterblast_dir .mix( ANTISMASH_ANTISMASHLITE.out.gbk_input ) .groupTuple() .map{ meta, files -> - [meta, files.flatten()] + [ meta, files.flatten() ] } - ANTISMASH_ANTISMASHLITE.out.gbk_results.ifEmpty( - ch_antismashresults_for_combgc = Channel.empty() - ) + + // Filter out samples with no BGC hits + ch_antismashresults_for_combgc = ch_antismashresults + .join(fastas, remainder: false) + .join(ANTISMASH_ANTISMASHLITE.out.gbk_results, remainder: false) + .map { + meta, gbk_input, fasta, gbk_results -> + [ meta, gbk_input ] + } ch_bgcresults_for_combgc = ch_bgcresults_for_combgc.mix( ch_antismashresults_for_combgc ) } @@ -151,15 +157,14 @@ workflow BGC { } // COMBGC - ch_bgc_warning = fastas - .map { - meta, fasta -> - "[nf-core/funcscan] BGC workflow: No hits found by BGC tools; comBGC summary tool will not be run for sample: ${meta.id}" - } - .collect() ch_bgcresults_for_combgc - .ifEmpty { log.warn(ch_bgc_warning.val[0]) } + .join(fastas, remainder: true) + .filter { + meta, bgcfile, fasta -> + if ( !bgcfile ) { log.warn("[nf-core/funcscan] BGC workflow: No hits found by BGC tools; comBGC summary tool will not be run for sample: ${meta.id}") } + return [meta, bgcfile, fasta] + } COMBGC ( ch_bgcresults_for_combgc ) ch_versions = ch_versions.mix( COMBGC.out.versions ) From 7dafc169d318f1b01513a6aab8b0cb5c6f1d6c82 Mon Sep 17 00:00:00 2001 From: Jasmin Frangenberg <73216762+jasmezz@users.noreply.github.com> Date: Thu, 20 Jun 2024 20:36:55 +0000 Subject: [PATCH 15/15] Update CHANGELOG.md [skip ci] --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a146128a..92d76abf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,7 +27,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#365](https://github.com/nf-core/funcscan/pull/365) Fixed AMRFinderPlus module and usage docs for manual database download. (by @jasmezz) - [#371](https://github.com/nf-core/funcscan/pull/371) Fixed AMRFinderPlus parameter `arg_amrfinderplus_name`. (by @m3hdad) - [#376](https://github.com/nf-core/funcscan/pull/376) Fixed an occasional RGI process failure when certain files not produced. (❤️ to @amizeranschi for reporting, fix by @amizeranschi & @jfy133) -- [#386](https://github.com/nf-core/funcscan/pull/386) Updated DeepBGC module to fix output file names. (by @jfy133, @jasmezz) +- [#386](https://github.com/nf-core/funcscan/pull/386) Updated DeepBGC module to fix output file names, separate annotation step for all BGC tools, add warning if no BGCs found, fix MultiQC reporting of annotation workflow. (by @jfy133, @jasmezz) ### `Dependencies`