diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5b8befd9..61895ff9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -17,7 +17,7 @@ concurrency: jobs: test: - name: Run pipeline with test data (AMP and ARG workflows) + name: Run pipeline with test data (AMP and ARG) # Only run on push if this is the nf-core dev branch (merged PRs) if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/funcscan') }}" runs-on: ubuntu-latest @@ -27,9 +27,9 @@ jobs: - "23.04.0" - "latest-everything" parameters: - - "--annotation_tool prodigal" - - "--annotation_tool prokka" - - "--annotation_tool bakta --annotation_bakta_db_downloadtype light --arg_skip_deeparg --arg_skip_amrfinderplus" # Skip deeparg and amrfinderplus due to otherwise running out of space on GitHub Actions + - "-profile docker,test_preannotated --annotation_tool prodigal" + - "-profile docker,test --annotation_tool prokka" + - "-profile docker,test --annotation_tool bakta --annotation_bakta_db_downloadtype light --arg_skip_deeparg --arg_skip_amrfinderplus" # Skip deeparg and amrfinderplus due to otherwise running out of space on GitHub Actions steps: - name: Check out pipeline code @@ -43,12 +43,12 @@ jobs: - name: Disk space cleanup uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 - - name: Run pipeline with test data (AMP and ARG workflows) + - name: Run pipeline with test data (AMP/ARG workflows) run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results ${{ matrix.parameters }} + nextflow run ${GITHUB_WORKSPACE} ${{ matrix.parameters }} --outdir ./results test_bgc: - name: Run pipeline with test data (BGC workflow) + name: Run pipeline with test data (BGC) # Only run on push if this is the nf-core dev branch (merged PRs) if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/funcscan') }}" runs-on: ubuntu-latest @@ -58,9 +58,9 @@ jobs: - "23.04.0" - "latest-everything" parameters: - - "--annotation_tool prodigal" - - "--annotation_tool prokka" - - "--annotation_tool bakta --annotation_bakta_db_downloadtype light" + - "-profile docker,test_preannotated_bgc --annotation_tool prodigal" + - "-profile docker,test_bgc --annotation_tool prokka" + - "-profile docker,test_bgc --annotation_tool bakta --annotation_bakta_db_downloadtype light" steps: - name: Check out pipeline code @@ -76,10 +76,10 @@ jobs: - name: Run pipeline with test data (BGC workflow) run: | - nextflow run ${GITHUB_WORKSPACE} -profile test_bgc,docker --outdir ./results ${{ matrix.parameters }} --bgc_skip_deepbgc + nextflow run ${GITHUB_WORKSPACE} ${{ matrix.parameters }} --outdir ./results --bgc_skip_deepbgc test_taxonomy: - name: Run pipeline with test data (AMP, ARG and BGC taxonomy workflows) + name: Run pipeline with test data (AMP, ARG and BGC with taxonomy) # Only run on push if this is the nf-core dev branch (merged PRs) if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/funcscan') }}" runs-on: ubuntu-latest @@ -89,9 +89,9 @@ jobs: - "23.04.0" - "latest-everything" parameters: - - "--annotation_tool prodigal" - - "--annotation_tool prokka" - - "--annotation_tool bakta --annotation_bakta_db_downloadtype light" + - "-profile docker,test_taxonomy --annotation_tool prodigal" + - "-profile docker,test_taxonomy --annotation_tool prokka" + - "-profile docker,test_taxonomy --annotation_tool bakta --annotation_bakta_db_downloadtype light" steps: - name: Check out pipeline code @@ -107,4 +107,4 @@ jobs: - name: Run pipeline with test data (AMP, ARG and BGC taxonomy workflows) run: | - nextflow run ${GITHUB_WORKSPACE} -profile test_taxonomy,docker --outdir ./results ${{ matrix.parameters }} + nextflow run ${GITHUB_WORKSPACE} ${{ matrix.parameters }} --outdir ./results diff --git a/CHANGELOG.md b/CHANGELOG.md index 6ffe23d8..5f3e8e60 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#343](https://github.com/nf-core/funcscan/pull/343) Added contig taxonomic classification using [MMseqs2](https://github.com/soedinglab/MMseqs2/). (by @darcy220606) - [#358](https://github.com/nf-core/funcscan/pull/358) Improved RGI databases handling, users can supply their own CARD now. (by @jasmezz) - [#375](https://github.com/nf-core/funcscan/pull/375) Merged pipeline template of nf-core/tools version 2.14.1 (by @jfy133) +- [#381](https://github.com/nf-core/funcscan/pull/381) Added support for supplying pre-annotated sequences to the pipeline. (by @jfy133, @jasmezz) ### `Fixed` diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index efe5277c..45ddd48c 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -10,6 +10,14 @@ report_section_order: "nf-core-funcscan-summary": order: -1002 +run_modules: + - prokka + - custom_content + +table_columns_visible: + Prokka: + organism: False + export_plots: true disable_version_detection: true diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv index 22583f22..791912cd 100644 --- a/assets/samplesheet.csv +++ b/assets/samplesheet.csv @@ -1,3 +1,4 @@ -sample,fasta -sample_1,https://raw.githubusercontent.com/nf-core/test-datasets/funcscan/wastewater_metagenome_contigs_1.fasta.gz -sample_2,https://raw.githubusercontent.com/nf-core/test-datasets/funcscan/wastewater_metagenome_contigs_2.fasta.gz +sample,fasta,protein,gbk +sample_1,https://raw.githubusercontent.com/nf-core/test-datasets/funcscan/wastewater_metagenome_contigs_1.fasta.gz,https://raw.githubusercontent.com/nf-core/test-datasets/funcscan/wastewater_metagenome_contigs_prokka_1.faa,https://raw.githubusercontent.com/nf-core/test-datasets/funcscan/wastewater_metagenome_contigs_prokka_1.gbk +sample_2,https://raw.githubusercontent.com/nf-core/test-datasets/funcscan/wastewater_metagenome_contigs_2.fasta.gz,https://raw.githubusercontent.com/nf-core/test-datasets/funcscan/wastewater_metagenome_contigs_prokka_2.faa.gz,https://raw.githubusercontent.com/nf-core/test-datasets/funcscan/wastewater_metagenome_contigs_prokka_2.gbk.gz +sample_3,https://raw.githubusercontent.com/nf-core/test-datasets/funcscan/wastewater_metagenome_contigs.fasta diff --git a/assets/schema_input.json b/assets/schema_input.json index 757969c2..25efc523 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -18,9 +18,27 @@ "type": "string", "format": "file-path", "exists": true, - "pattern": "^\\S+\\.(fasta|fas|fa|fna)(\\.gz)?$", - "errorMessage": "Fasta file for reads must be provided, cannot contain spaces and must have extension '.fasta', '.fas', '.fa' or '.fna' (any of these can be optionally compressed as '.gz')", + "pattern": "^\\S+\\.(fasta|fas|fna|fa)(\\.gz)?$", + "errorMessage": "Fasta file for reads must be provided, cannot contain spaces and must have extension '.fa.gz', '.fna.gz' or '.fasta.gz'", "unique": true + }, + "protein": { + "type": "string", + "format": "file-path", + "exists": true, + "pattern": "^\\S+\\.(faa)(\\.gz)?$", + "errorMessage": "Input file for peptide annotations has incorrect file format. File must end in .fasta, .faa", + "unique": true, + "dependentRequired": ["gbk"] + }, + "gbk": { + "type": "string", + "format": "file-path", + "exists": true, + "pattern": "^\\S+\\.(gbk|gbff)(\\.gz)?$", + "errorMessage": "Input file for feature annotations has incorrect file format. File must end in .gbk or .gbff", + "unique": true, + "dependentRequired": ["protein"] } }, "required": ["sample", "fasta"] diff --git a/conf/base.config b/conf/base.config index c3d2523f..32c67616 100644 --- a/conf/base.config +++ b/conf/base.config @@ -79,11 +79,6 @@ process { time = { check_max( 8.h * task.attempt, 'time' ) } } - withName: PRODIGAL_GFF { - memory = { check_max( 2.GB * task.attempt, 'memory' ) } - cpus = 1 - } - withName: PRODIGAL_GBK { memory = { check_max( 2.GB * task.attempt, 'memory' ) } cpus = 1 diff --git a/conf/modules.config b/conf/modules.config index 2f63d961..3694f0c8 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -33,32 +33,6 @@ process { ] } - withName: SEQKIT_SEQ_LONG { - ext.prefix = { "${meta.id}_long" } - publishDir = [ - path: { "${params.outdir}/qc/seqkit/" }, - mode: params.publish_dir_mode, - enabled: params.contig_qc_savesplitfastas, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.args = [ - "--min-len ${params.contig_qc_lengththreshold}" - ].join(' ').trim() - } - - withName: SEQKIT_SEQ_SHORT { - ext.prefix = { "${meta.id}_short" } - publishDir = [ - path: { "${params.outdir}/qc/seqkit/" }, - mode: params.publish_dir_mode, - enabled: params.contig_qc_savesplitfastas, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.args = [ - "--max-len ${params.contig_qc_lengththreshold - 1}" - ].join(' ').trim() - } - withName: MMSEQS_DATABASES { publishDir = [ path: { "${params.outdir}/databases/mmseqs/" }, @@ -110,6 +84,7 @@ process { } withName: PROKKA { + ext.prefix = { "${meta.id}_prokka" } // to prevent pigz symlink problems of input files if already uncompressed during post-annotation gzipping publishDir = [ path: { "${params.outdir}/annotation/prokka/" }, mode: params.publish_dir_mode, @@ -128,7 +103,7 @@ process { params.annotation_prokka_rawproduct ? '--rawproduct' : '', params.annotation_prokka_rnammer ? '--rnammer' : '', params.annotation_prokka_compliant ? '--compliant' : '', - params.annotation_prokka_addgenes ? '--addgenes' : '' + params.annotation_prokka_addgenes ? '--addgenes' : '', ].join(' ').trim() } @@ -145,6 +120,7 @@ process { } withName: BAKTA_BAKTA { + ext.prefix = { "${meta.id}_bakta" } // to prevent pigz symlink problems of input files if already uncompressed during post-annotation gzipping publishDir = [ path: { "${params.outdir}/annotation/bakta/${meta.id}" }, mode: params.publish_dir_mode, @@ -174,28 +150,12 @@ process { ].join(' ').trim() } - withName: PRODIGAL_GFF { - publishDir = [ - path: { "${params.outdir}/annotation/prodigal/${meta.id}" }, - mode: params.publish_dir_mode, - enabled: params.save_annotations, - pattern: "*.{faa,fna,gff}.gz", - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.args = [ - params.annotation_prodigal_singlemode ? "-p single" : "-p meta", - params.annotation_prodigal_closed ? "-c" : "", - params.annotation_prodigal_forcenonsd ? "-n" : "", - "-g ${params.annotation_prodigal_transtable}" - ].join(' ').trim() - } - - withName: PRODIGAL_GBK { + withName: PRODIGAL { publishDir = [ path: { "${params.outdir}/annotation/prodigal/${meta.id}" }, mode: params.publish_dir_mode, enabled: params.save_annotations, - pattern: "*.gbk.gz", + pattern: "*.{faa,fna,gbk,faa.gz,faa.gz,fna.gz,gbk.gz}", saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] ext.args = [ @@ -207,11 +167,12 @@ process { } withName: PYRODIGAL { + ext.prefix = { "${meta.id}_pyrodigal" } // to prevent pigz symlink problems of input files if already uncompressed during post-annotation gzipping publishDir = [ path: { "${params.outdir}/annotation/pyrodigal/${meta.id}" }, mode: params.publish_dir_mode, enabled: params.save_annotations, - pattern: "*.{faa,fna,gff,score}.gz", + pattern: "*.{faa,fna,gbk,score}.gz", saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] ext.args = [ @@ -287,6 +248,7 @@ process { } withName: FARGENE { + tag = {"${meta.id}|${hmm_model}"} publishDir = [ [ path: { "${params.outdir}/arg/fargene/${meta.id}" }, diff --git a/conf/test.config b/conf/test.config index 887f3528..f1345093 100644 --- a/conf/test.config +++ b/conf/test.config @@ -23,7 +23,7 @@ params { input = params.pipelines_testdata_base_path + 'funcscan/samplesheet_reduced.csv' amp_hmmsearch_models = params.pipelines_testdata_base_path + 'funcscan/hmms/mybacteriocin.hmm' - annotation_tool = 'prodigal' + annotation_tool = 'pyrodigal' run_arg_screening = true arg_fargene_hmmmodel = 'class_a,class_b_1_2' diff --git a/conf/test_bgc.config b/conf/test_bgc.config index 89228579..d1419d86 100644 --- a/conf/test_bgc.config +++ b/conf/test_bgc.config @@ -23,7 +23,7 @@ params { input = 'https://raw.githubusercontent.com/nf-core/test-datasets/funcscan/samplesheet_reduced.csv' bgc_hmmsearch_models = 'https://raw.githubusercontent.com/antismash/antismash/fd61de057e082fbf071732ac64b8b2e8883de32f/antismash/detection/hmm_detection/data/ToyB.hmm' - annotation_tool = 'prodigal' + annotation_tool = 'pyrodigal' run_arg_screening = false run_amp_screening = false diff --git a/conf/test_nothing.config b/conf/test_nothing.config index 34fdd49a..9da474c7 100644 --- a/conf/test_nothing.config +++ b/conf/test_nothing.config @@ -7,7 +7,7 @@ Although in this case we turn everything off Use as follows: - nextflow run nf-core/funcscan -profile test, --outdir + nextflow run nf-core/funcscan -profile test_nothing, --outdir ---------------------------------------------------------------------------------------- */ @@ -24,10 +24,30 @@ params { // Input data input = 'https://raw.githubusercontent.com/nf-core/test-datasets/funcscan/samplesheet_reduced.csv' amp_hmmsearch_models = 'https://raw.githubusercontent.com/nf-core/test-datasets/funcscan/hmms/mybacteriocin.hmm' + bgc_hmmsearch_models = 'https://raw.githubusercontent.com/antismash/antismash/fd61de057e082fbf071732ac64b8b2e8883de32f/antismash/detection/hmm_detection/data/ToyB.hmm' - annotation_tool = 'prodigal' + annotation_tool = 'pyrodigal' run_arg_screening = false run_amp_screening = false run_bgc_screening = false + + arg_fargene_hmmmodel = 'class_a,class_b_1_2' + + amp_skip_amplify = true + amp_skip_macrel = true + amp_skip_ampir = true + amp_skip_hmmsearch = true + + arg_skip_deeparg = true + arg_skip_fargene = true + arg_skip_rgi = true + arg_skip_amrfinderplus = true + arg_skip_deeparg = true + arg_skip_abricate = true + + bgc_skip_antismash = true + bgc_skip_deepbgc = true + bgc_skip_gecco = true + bgc_skip_hmmsearch = true } diff --git a/conf/test_preannotated.config b/conf/test_preannotated.config new file mode 100644 index 00000000..27e67209 --- /dev/null +++ b/conf/test_preannotated.config @@ -0,0 +1,32 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/funcscan -profile test_preannotated, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Test profile - preannotated input' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + // Input data + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/funcscan/samplesheet_preannotated.csv' + amp_hmmsearch_models = 'https://raw.githubusercontent.com/nf-core/test-datasets/funcscan/hmms/mybacteriocin.hmm' + + annotation_tool = 'pyrodigal' + + run_arg_screening = true + arg_fargene_hmmmodel = 'class_a,class_b_1_2' + + run_amp_screening = true +} diff --git a/conf/test_preannotated_bgc.config b/conf/test_preannotated_bgc.config new file mode 100644 index 00000000..47aa7504 --- /dev/null +++ b/conf/test_preannotated_bgc.config @@ -0,0 +1,31 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/funcscan -profile test_preannotated_bgc, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'BGC test profile - preannotated input BGC' + config_profile_description = 'Minimal test dataset to check BGC workflow function' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + // Input data + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/funcscan/samplesheet_preannotated.csv' + bgc_hmmsearch_models = 'https://raw.githubusercontent.com/antismash/antismash/fd61de057e082fbf071732ac64b8b2e8883de32f/antismash/detection/hmm_detection/data/ToyB.hmm' + + annotation_tool = 'pyrodigal' + + run_arg_screening = false + run_amp_screening = false + run_bgc_screening = true +} diff --git a/conf/test_taxonomy.config b/conf/test_taxonomy.config index ad477b3c..33506878 100644 --- a/conf/test_taxonomy.config +++ b/conf/test_taxonomy.config @@ -16,7 +16,7 @@ params { // Limit resources so that this can run on GitHub Actions max_cpus = 2 - max_memory = '8.GB' + max_memory = '14.GB' max_time = '6.h' // Input data @@ -25,7 +25,7 @@ params { amp_hmmsearch_models = 'https://raw.githubusercontent.com/nf-core/test-datasets/funcscan/hmms/mybacteriocin.hmm' run_taxa_classification = true - annotation_tool = 'prodigal' + annotation_tool = 'pyrodigal' run_arg_screening = true arg_skip_deeparg = true @@ -36,3 +36,9 @@ params { run_bgc_screening = true bgc_skip_deepbgc = true } + +process { + withName: MMSEQS_DATABASES { + memory = '14.GB' + } +} diff --git a/docs/output.md b/docs/output.md index 65c73a42..58de5553 100644 --- a/docs/output.md +++ b/docs/output.md @@ -14,12 +14,6 @@ Similarly, all downloaded databases are saved (i.e. from [MMseqs2](https://githu Furthermore, for reproducibility, versions of all software used in the run is presented in a [MultiQC](http://multiqc.info) report. -:::info -Note that (unannotated) input contigs will be split into two categories per sample: long and short. Each sample will thus get two sets of results for each ARG/AMP screening (suffixed with `_long` and `_short` respectively, assuming contigs remain above/below the threshold), whereas for BGC results only `_long` will exist. This is because BGCs can only be reliability screened with longer contigs. - -The threshold for the separation can be adjusted with `--contig_qc_lengththreshold `. -::: - The directories listed below will be created in the results directory (specified by the `--outdir` flag) after the pipeline has finished. All paths are relative to this top-level output directory. The default directory structure of nf-core/funcscan is: ```console @@ -111,18 +105,6 @@ Output Summaries: ## Tool details -### Input contig QC - -
-Output files - -- `qc/seqkit/` - - `_long.fasta`: FASTA file containing contigs equal or longer than the threshold set by `--contig_qc_lengththreshold` used in downstream AMP, ARG, BGC subworkflows - - `_short.fasta`: FASTA file containing contigs shorter than the threshold set by `--contig_qc_lengththreshold` used in downstream AMP, ARG subworkflows -
- -[SeqKit](https://bioinf.shenwei.me/seqkit/) is a cross-platform and ultrafast toolkit for FASTA/Q file manipulation. - ### Taxonomic classification tool
@@ -149,7 +131,6 @@ Output Summaries: - `prodigal/` - `/`: - - `*.gff`: annotation in GFF3 format, containing both sequences and annotations - `*.fna`: nucleotide FASTA file of the input contig sequences - `*.faa`: protein FASTA file of the translated CDS sequences - `*.gbk`: annotation in GBK format, containing both sequences and annotations @@ -167,8 +148,8 @@ Output Summaries: - `pyrodigal/` - `/`: - - `*.gff`: annotation in GFF3 format, containing both sequences and annotations - - `*.fna`: nucleotide FASTA file of the input contig sequences + - `*.gbk`: annotation in GBK format, containing both sequences and annotations + - `*.fna`: nucleotide FASTA file of the annotated CDS sequences - `*.faa`: protein FASTA file of the translated CDS sequences > Descriptions taken from the [Pyrodigal documentation](https://pyrodigal.readthedocs.io/) diff --git a/docs/usage.md b/docs/usage.md index f102cb1c..4640db88 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -52,25 +52,39 @@ nf-core/funcscan takes FASTA files as input, typically contigs or whole genome s --input '[path to samplesheet file]' ``` -The input samplesheet has to be a comma-separated file (`.csv`) with 2 columns (`sample`, and `fasta`), and a header row as shown in the examples below. +The input samplesheet has to be a comma-separated file (`.csv`) with 2 (`sample`, and `fasta`) or 4 columns (`sample`, `fasta`, `protein`, `gbk`), and a header row as shown in the examples below. -```bash +If you already have annotated contigs with peptide sequences and an annotation file in Genbank format (`.gbk.` or `.gbff`), you can supply these to the pipeline using the optional `protein` and `gbk` columns. If these additional columns are supplied, pipeline annotation (i.e. with bakta, prodigal, pyrodigal or prokka) will be skipped and the corresponding annotation files used instead. + +For two columns (without pre-annotated data): + +```csv title="samplesheet.csv" sample,fasta sample_1,///wastewater_metagenome_contigs_1.fasta.gz sample_2,///wastewater_metagenome_contigs_2.fasta.gz ``` -| Column | Description | -| -------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `sample` | Custom sample name. This will be used to name all output files from the pipeline. Spaces in sample names are automatically converted to underscores (`_`). | -| `fasta` | Path or URL to a gzipped or uncompressed FASTA file. Accepted file suffixes are: `.fasta`, `.fna`, or `.fa`, or any of these with `.gz`, e.g. `.fa.gz`. | +For four columns (with pre-annotated data): + +```csv title="samplesheet.csv" +sample,fasta,protein,gbk +sample_1,///wastewater_metagenome_contigs_1.fasta.gz,///wastewater_metagenome_contigs_1.faa,///wastewater_metagenome_contigs_1.fasta.gbk +sample_2,///wastewater_metagenome_contigs_2.fasta.gz,///wastewater_metagenome_contigs_2.faa,///wastewater_metagenome_contigs_2.fasta.gbk +``` + +| Column | Description | +| --------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `sample` | Custom sample name. This will be used to name all output files from the pipeline. Spaces in sample names are automatically converted to underscores (`_`). | +| `fasta` | Path or URL to a gzipped or uncompressed FASTA file. Accepted file suffixes are: `.fasta`, `.fna`, or `.fa`, or any of these with `.gz`, e.g. `.fa.gz`. | +| `protein` | Optional path to a pre-generated amino acid FASTA file (`.faa`) containing protein annotations of `fasta`, optionally gzipped. Required to be supplied if `gbk` also given. | +| `gbk` | Optional path to a pre-generated annotation file in Genbank format (`.gbk`, or `.gbff`) format containing annotations information of `fasta`, optionally gzipped. Required to be supplied if `protein` is also given. | An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. -:::warning +:::danger We highly recommend performing quality control on input contigs before running the pipeline. You may not receive results for some tools if none of the contigs in a FASTA file reach certain thresholds. Check parameter documentation for relevant minimum contig parameters. -For example, by default BGC screening requires contigs of at least 3,000 bp (see `--contig_qc_lengththreshold`). +For example, ideally BGC screening requires contigs of at least 3,000 bp else downstream tools may crash. ::: ## Notes on screening tools and taxonomic classification @@ -97,13 +111,11 @@ MMseqs2 is currently the only taxonomic classification tool used in the pipeline antiSMASH has a minimum contig parameter, in which only contigs of a certain length (or longer) will be screened. In cases where no hits are found in these, the tool ends successfully without hits. However if no contigs in an input file reach that minimum threshold, the tool will end with a 'failure' code, and cause the pipeline to crash. -To prevent entire pipeline failures due to a single 'bad sample', nf-core/funcscan will filter out any input sample in which none of the contigs reach the minimum contig length in bp specified with `--bgc_antismash_sampleminlength` (default: 1000). - -> ⚠️ If a sample does not reach this contig length threshold, you will receive a warning in your console and in the `.nextflow.log` file, and no result files will exist for this sample in your results directory for this tool. - When the annotation is run with Prokka, the resulting `.gbk` file passed to antiSMASH may produce the error `translation longer than location allows` and end the pipeline run. This Prokka bug has been reported before (see [discussion on GitHub](https://github.com/antismash/antismash/discussions/450)) and is not likely to be fixed soon. -> ⚠️ If antiSMASH is run for BGC detection, we recommend to **not** run Prokka for annotation but instead use the default annotation tool (Pyrodigal) or switch to Prodigal, or (for bacteria only!) Bakta. +:::warning +If antiSMASH is run for BGC detection, we recommend to **not** run Prokka for annotation but instead use the default annotation tool (Pyrodigal) or switch to Prodigal, or (for bacteria only!) Bakta. +::: ## Databases and reference files @@ -111,9 +123,11 @@ Various tools of nf-core/funcscan use databases and reference files to operate. nf-core/funcscan offers the functionality to auto-download databases for you, and as these databases can be very large, and we suggest to store these files in a central place from where you can reuse them across pipeline runs. -We **highly recommend** allowing the pipeline to download these databases for you on a first run, saving these to your results directory with `--save_databases`, then moving these to a different location (in case you wish to delete the results directory of this first run). An exception to this is HMM files where no auto-downloading functionality is possible. +If your infrastructure has internet access (particularly on compute nodes), we **highly recommend** allowing the pipeline to download these databases for you on a first run, saving these to your results directory with `--save_databases`, then moving these to a different location (in case you wish to delete the results directory of this first run). An exception to this is HMM files where no auto-downloading functionality is possible. -> ⚠️ We generally do not recommend downloading the databases yourself, as this can often be non-trivial to do! +:::warning +We generally do not recommend downloading the databases yourself, as this can often be non-trivial to do! +::: As a reference, we will describe below where and how you can obtain databases and reference files used for tools included in the pipeline. @@ -135,7 +149,9 @@ And then passed to the pipeline with: --annotation_bakta_db_localpath ///db/ ``` -> ℹ️ The flag `--save_databases` saves the pipeline-downloaded databases in your results directory. You can then move these to a central cache directory of your choice for re-use in the future. +:::info +The flag `--save_databases` saves the pipeline-downloaded databases in your results directory. You can then move these to a central cache directory of your choice for re-use in the future. +::: ### hmmsearch @@ -190,7 +206,9 @@ To obtain a local version of the database:
-> ℹ️ The flag `--save_databases` saves the pipeline-downloaded databases in your results directory. You can then move these to a central cache directory of your choice for re-use in the future. +:::info +The flag `--save_databases` saves the pipeline-downloaded databases in your results directory. You can then move these to a central cache directory of your choice for re-use in the future. +::: ### DeepARG @@ -218,8 +236,9 @@ You can then supply the path to resulting database directory with: Note that if you supply your own database that is not downloaded by the pipeline, make sure to also supply `--arg_deeparg_data_version` along with the version number so hAMRonization will correctly display the database version in the summary report. - -> ℹ️ The flag `--save_databases` saves the pipeline-downloaded databases in your results directory. You can then move these to a central cache directory of your choice for re-use in the future. +:::info +The flag `--save_databases` saves the pipeline-downloaded databases in your results directory. You can then move these to a central cache directory of your choice for re-use in the future. +::: ### RGI @@ -234,7 +253,9 @@ You can then supply the path to resulting database directory with: --arg_rgi_database '////' ``` -> ℹ️ The flag `--save_databases` saves the pipeline-downloaded databases in your results directory. You can then move these to a central cache directory of your choice for re-use in the future. +:::info +The flag `--save_databases` saves the pipeline-downloaded databases in your results directory. You can then move these to a central cache directory of your choice for re-use in the future. +::: ### antiSMASH @@ -257,9 +278,13 @@ To supply the database directories to the pipeline: Note that the names of the supplied folders must differ from each other (e.g. `antismash_db` and `antismash_dir`). If they are not provided, the databases will be auto-downloaded upon each BGC screening run of the pipeline. -> ℹ️ The flag `--save_databases` saves the pipeline-downloaded databases in your results directory. You can then move these to a central cache directory of your choice for re-use in the future. +:::info +The flag `--save_databases` saves the pipeline-downloaded databases in your results directory. You can then move these to a central cache directory of your choice for re-use in the future. +::: -> ℹ️ If installing with conda, the installation directory will be `lib/python3.10/site-packages/antismash` from the base directory of your conda install or conda environment directory. +:::info +If installing with conda, the installation directory will be `lib/python3.10/site-packages/antismash` from the base directory of your conda install or conda environment directory. +::: ### DeepBGC diff --git a/nextflow.config b/nextflow.config index 4047b6be..6a0b16de 100644 --- a/nextflow.config +++ b/nextflow.config @@ -31,10 +31,6 @@ params { version = false pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/' - // QC options - contig_qc_lengththreshold = 3000 - contig_qc_savesplitfastas = false - // Taxonomy classification options run_taxa_classification = false taxa_classification_tool = 'mmseqs2' @@ -350,12 +346,14 @@ profiles { executor.cpus = 4 executor.memory = 8.GB } - test { includeConfig 'conf/test.config' } - test_bgc { includeConfig 'conf/test_bgc.config' } - test_taxonomy { includeConfig 'conf/test_taxonomy.config' } - test_full { includeConfig 'conf/test_full.config' } - test_deeparg { includeConfig 'conf/test_deeparg.config' } - test_nothing { includeConfig 'conf/test_nothing.config' } + test { includeConfig 'conf/test.config' } + test_bgc { includeConfig 'conf/test_bgc.config' } + test_taxonomy { includeConfig 'conf/test_taxonomy.config' } + test_full { includeConfig 'conf/test_full.config' } + test_deeparg { includeConfig 'conf/test_deeparg.config' } + test_nothing { includeConfig 'conf/test_nothing.config' } + test_preannotated { includeConfig 'conf/test_preannotated.config' } + test_preannotated_bgc { includeConfig 'conf/test_preannotated_bgc.config' } } // Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile diff --git a/nextflow_schema.json b/nextflow_schema.json index 14256fb0..75b5830b 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -68,27 +68,6 @@ }, "fa_icon": "fas fa-network-wired" }, - "input_qc": { - "title": "Input Quality Control", - "type": "object", - "description": "These parameters influence some simple contig quality control that occur prior to screening.", - "default": "", - "properties": { - "contig_qc_lengththreshold": { - "type": "number", - "default": 3000, - "description": "The sequence length threshold at which contigs are considered 'long' (above or same as threshold) vs. 'short' (below threshold).", - "help_text": "nf-core/funcscan will split unannotated input contigs into two categories based on this parameter: long, and short.\n\nFor both ARG and AMPs, both categories of contigs will be screened. For BGCs, only contigs in the 'long' category will be screened.\n\nThis is due to an (approximate) 'biological' minimal that nucleotide sequences need to be to synthesise a valid BGC, as well as to speeding up pipeline runs by screening only useful contigs.\n\nFile and sample ID names in results tables will be indicated by `_long` and `_short` suffixes.", - "fa_icon": "fas fa-check-circle" - }, - "contig_qc_savesplitfastas": { - "type": "boolean", - "description": "Specify to save the `_long` and `_short` FASTAs generated during QC", - "fa_icon": "fas fa-check-circle" - } - }, - "fa_icon": "fas fa-network-wired" - }, "taxonomic_classification": { "title": "Taxonomic classification", "type": "object", @@ -1461,9 +1440,6 @@ { "$ref": "#/definitions/screening_type_activation" }, - { - "$ref": "#/definitions/input_qc" - }, { "$ref": "#/definitions/taxonomic_classification" }, diff --git a/subworkflows/local/amp.nf b/subworkflows/local/amp.nf index 30f4a171..47dec041 100644 --- a/subworkflows/local/amp.nf +++ b/subworkflows/local/amp.nf @@ -14,9 +14,9 @@ include { MERGE_TAXONOMY_AMPCOMBI } from '.. workflow AMP { take: - contigs // tuple val(meta), path(contigs) - faa // tuple val(meta), path(PROKKA/PRODIGAL.out.faa) - tsv // tuple val(meta), path(MMSEQS_CREATETSV.out.tsv) + fastas // tuple val(meta), path(contigs) + faas // tuple val(meta), path(PROKKA/PRODIGAL.out.faa) + tsvs // tuple val(meta), path(MMSEQS_CREATETSV.out.tsv) main: ch_versions = Channel.empty() @@ -27,10 +27,10 @@ workflow AMP { // When adding new tool that requires FAA, make sure to update conditions // in funcscan.nf around annotation and AMP subworkflow execution // to ensure annotation is executed! - ch_faa_for_amplify = faa - ch_faa_for_amp_hmmsearch = faa - ch_faa_for_ampir = faa - ch_faa_for_ampcombi = faa + ch_faa_for_amplify = faas + ch_faa_for_amp_hmmsearch = faas + ch_faa_for_ampir = faas + ch_faa_for_ampcombi = faas // AMPLIFY if ( !params.amp_skip_amplify ) { @@ -41,7 +41,7 @@ workflow AMP { // MACREL if ( !params.amp_skip_macrel ) { - MACREL_CONTIGS ( contigs ) + MACREL_CONTIGS ( fastas ) ch_versions = ch_versions.mix( MACREL_CONTIGS.out.versions ) GUNZIP_MACREL_PRED ( MACREL_CONTIGS.out.amp_prediction ) GUNZIP_MACREL_ORFS ( MACREL_CONTIGS.out.all_orfs ) @@ -71,14 +71,15 @@ workflow AMP { [ meta, file ] } - ch_in_for_amp_hmmsearch = ch_faa_for_amp_hmmsearch.combine( ch_amp_hmm_models_meta ) - .map { - meta_faa, faa, meta_hmm, hmm -> - def meta_new = [:] - meta_new['id'] = meta_faa['id'] - meta_new['hmm_id'] = meta_hmm['id'] - [ meta_new, hmm, faa, params.amp_hmmsearch_savealignments, params.amp_hmmsearch_savetargets, params.amp_hmmsearch_savedomains ] - } + ch_in_for_amp_hmmsearch = ch_faa_for_amp_hmmsearch + .combine( ch_amp_hmm_models_meta ) + .map { + meta_faa, faa, meta_hmm, hmm -> + def meta_new = [:] + meta_new['id'] = meta_faa['id'] + meta_new['hmm_id'] = meta_hmm['id'] + [ meta_new, hmm, faa, params.amp_hmmsearch_savealignments, params.amp_hmmsearch_savetargets, params.amp_hmmsearch_savedomains ] + } AMP_HMMER_HMMSEARCH ( ch_in_for_amp_hmmsearch ) ch_versions = ch_versions.mix( AMP_HMMER_HMMSEARCH.out.versions ) @@ -107,15 +108,15 @@ workflow AMP { //AMPCOMBI concatenation if ( !params.run_taxa_classification ) { - ch_ampcombi_summaries = AMPCOMBI.out.csv.map{ it[1] }.collectFile( name: 'ampcombi_complete_summary.tsv', storeDir: "${params.outdir}/reports/ampcombi",keepHeader:true ) + ch_ampcombi_summaries = AMPCOMBI.out.csv.map{ it[1] }.collectFile( name: 'ampcombi_complete_summary.csv', storeDir: "${params.outdir}/reports/ampcombi",keepHeader:true ) } else { - ch_ampcombi_summaries = AMPCOMBI.out.csv.map{ it[1] }.collectFile( name: 'ampcombi_complete_summary.tsv', keepHeader:true ) + ch_ampcombi_summaries = AMPCOMBI.out.csv.map{ it[1] }.collectFile( name: 'ampcombi_complete_summary.csv', keepHeader:true ) } // MERGE_TAXONOMY if ( params.run_taxa_classification ) { - ch_mmseqs_taxonomy_list = tsv.map{ it[1] }.collect() + ch_mmseqs_taxonomy_list = tsvs.map{ it[1] }.collect() MERGE_TAXONOMY_AMPCOMBI(ch_ampcombi_summaries, ch_mmseqs_taxonomy_list) ch_versions = ch_versions.mix(MERGE_TAXONOMY_AMPCOMBI.out.versions) diff --git a/subworkflows/local/annotation.nf b/subworkflows/local/annotation.nf new file mode 100644 index 00000000..74bf9666 --- /dev/null +++ b/subworkflows/local/annotation.nf @@ -0,0 +1,93 @@ +/* + Run annotation tools +*/ + +include { PROKKA } from '../../modules/nf-core/prokka/main' +include { PRODIGAL } from '../../modules/nf-core/prodigal/main' +include { PYRODIGAL } from '../../modules/nf-core/pyrodigal/main' +include { BAKTA_BAKTADBDOWNLOAD } from '../../modules/nf-core/bakta/baktadbdownload/main' +include { BAKTA_BAKTA } from '../../modules/nf-core/bakta/bakta/main' +include { GUNZIP as GUNZIP_PRODIGAL_FNA } from '../../modules/nf-core/gunzip/main' +include { GUNZIP as GUNZIP_PRODIGAL_FAA } from '../../modules/nf-core/gunzip/main' +include { GUNZIP as GUNZIP_PRODIGAL_GBK } from '../../modules/nf-core/gunzip/main' +include { GUNZIP as GUNZIP_PYRODIGAL_FNA } from '../../modules/nf-core/gunzip/main' +include { GUNZIP as GUNZIP_PYRODIGAL_FAA } from '../../modules/nf-core/gunzip/main' +include { GUNZIP as GUNZIP_PYRODIGAL_GBK } from '../../modules/nf-core/gunzip/main' + +workflow ANNOTATION { + take: + fasta // tuple val(meta), path(contigs) + + main: + ch_versions = Channel.empty() + ch_multiqc_files = Channel.empty() + + if ( params.annotation_tool == "pyrodigal" || ( params.annotation_tool == "prodigal" && params.run_bgc_screening == true && !params.bgc_skip_antismash ) ) { // Need to use pyrodigal for antiSMASH because prodigal GBK annotation format is incompatible with antiSMASH. + + if ( params.annotation_tool == "prodigal" && params.run_bgc_screening == true && !params.bgc_skip_antismash ) { + log.warn("[nf-core/funcscan] Switching annotation tool to: pyrodigal. This is because prodigal annotations (in GBK format) are incompatible with antiSMASH. If you specifically wish to run prodigal instead, please skip antiSMASH or provide a pre-annotated GBK file in the samplesheet.") + } + + PYRODIGAL ( fasta, "gbk" ) + GUNZIP_PYRODIGAL_FAA ( PYRODIGAL.out.faa ) + GUNZIP_PYRODIGAL_FNA ( PYRODIGAL.out.fna) + GUNZIP_PYRODIGAL_GBK ( PYRODIGAL.out.annotations ) + ch_versions = ch_versions.mix(PYRODIGAL.out.versions) + ch_versions = ch_versions.mix(GUNZIP_PYRODIGAL_FAA.out.versions) + ch_versions = ch_versions.mix(GUNZIP_PYRODIGAL_FNA.out.versions) + ch_versions = ch_versions.mix(GUNZIP_PYRODIGAL_GBK.out.versions) + ch_annotation_faa = GUNZIP_PYRODIGAL_FAA.out.gunzip + ch_annotation_fna = GUNZIP_PYRODIGAL_FNA.out.gunzip + ch_annotation_gbk = GUNZIP_PYRODIGAL_GBK.out.gunzip + + } else if ( params.annotation_tool == "prodigal" ) { + + PRODIGAL ( fasta, "gbk" ) + GUNZIP_PRODIGAL_FAA ( PRODIGAL.out.amino_acid_fasta ) + GUNZIP_PRODIGAL_FNA ( PRODIGAL.out.nucleotide_fasta) + GUNZIP_PRODIGAL_GBK ( PRODIGAL.out.gene_annotations ) + ch_versions = ch_versions.mix(PRODIGAL.out.versions) + ch_versions = ch_versions.mix(GUNZIP_PRODIGAL_FAA.out.versions) + ch_versions = ch_versions.mix(GUNZIP_PRODIGAL_FNA.out.versions) + ch_versions = ch_versions.mix(GUNZIP_PRODIGAL_GBK.out.versions) + ch_annotation_faa = GUNZIP_PRODIGAL_FAA.out.gunzip + ch_annotation_fna = GUNZIP_PRODIGAL_FNA.out.gunzip + ch_annotation_gbk = GUNZIP_PRODIGAL_GBK.out.gunzip + + } else if ( params.annotation_tool == "prokka" ) { + + PROKKA ( fasta, [], [] ) + ch_versions = ch_versions.mix(PROKKA.out.versions) + ch_multiqc_files = PROKKA.out.txt.collect{it[1]}.ifEmpty([]) + ch_annotation_faa = PROKKA.out.faa + ch_annotation_fna = PROKKA.out.fna + ch_annotation_gbk = PROKKA.out.gbk + + } else if ( params.annotation_tool == "bakta" ) { + + // BAKTA prepare download + if ( params.annotation_bakta_db_localpath ) { + ch_bakta_db = Channel + .fromPath( params.annotation_bakta_db_localpath ) + .first() + } else { + BAKTA_BAKTADBDOWNLOAD ( ) + ch_versions = ch_versions.mix( BAKTA_BAKTADBDOWNLOAD.out.versions ) + ch_bakta_db = ( BAKTA_BAKTADBDOWNLOAD.out.db ) + } + + BAKTA_BAKTA ( fasta, ch_bakta_db, [], [] ) + ch_versions = ch_versions.mix(BAKTA_BAKTA.out.versions) + ch_multiqc_files = BAKTA_BAKTA.out.txt.collect{it[1]}.ifEmpty([]) + ch_annotation_faa = BAKTA_BAKTA.out.faa + ch_annotation_fna = BAKTA_BAKTA.out.fna + ch_annotation_gbk = BAKTA_BAKTA.out.gbff + } + + emit: + versions = ch_versions + multiqc_files = ch_multiqc_files + faa = ch_annotation_faa // [ [meta], path(faa) ] + fna = ch_annotation_fna // [ [meta], path(fna) ] + gbk = ch_annotation_gbk // [ [meta], path(gbk) ] +} diff --git a/subworkflows/local/arg.nf b/subworkflows/local/arg.nf index f56a1d67..49d536d6 100644 --- a/subworkflows/local/arg.nf +++ b/subworkflows/local/arg.nf @@ -22,9 +22,9 @@ include { MERGE_TAXONOMY_HAMRONIZATION } from '../../modules/local/merge_t workflow ARG { take: - contigs // tuple val(meta), path(contigs) + fastas // tuple val(meta), path(contigs) annotations - tsv // tuple val(meta), path(MMSEQS_CREATETSV.out.tsv) + tsvs // tuple val(meta), path(MMSEQS_CREATETSV.out.tsv) main: ch_versions = Channel.empty() @@ -45,7 +45,7 @@ workflow ARG { } if ( !params.arg_skip_amrfinderplus ) { - AMRFINDERPLUS_RUN ( contigs, ch_amrfinderplus_db ) + AMRFINDERPLUS_RUN ( fastas, ch_amrfinderplus_db ) ch_versions = ch_versions.mix( AMRFINDERPLUS_RUN.out.versions ) // Reporting @@ -59,20 +59,20 @@ workflow ARG { ch_fargene_classes = Channel.fromList( params.arg_fargene_hmmmodel.tokenize(',') ) - ch_fargene_input = contigs + ch_fargene_input = fastas .combine( ch_fargene_classes ) .map { - meta, contigs, hmm_class -> + meta, fastas, hmm_class -> def meta_new = meta.clone() meta_new['hmm_class'] = hmm_class - [ meta_new, contigs, hmm_class ] + [ meta_new, fastas, hmm_class ] } .multiMap { - contigs: [ it[0], it[1] ] + fastas: [ it[0], it[1] ] hmmclass: it[2] } - FARGENE ( ch_fargene_input.contigs, ch_fargene_input.hmmclass ) + FARGENE ( ch_fargene_input.fastas, ch_fargene_input.hmmclass ) ch_versions = ch_versions.mix( FARGENE.out.versions ) // Reporting @@ -102,7 +102,7 @@ workflow ARG { RGI_CARDANNOTATION ( rgi_database ) ch_versions = ch_versions.mix( RGI_CARDANNOTATION.out.versions ) - RGI_MAIN ( contigs, RGI_CARDANNOTATION.out.db, [] ) + RGI_MAIN ( fastas, RGI_CARDANNOTATION.out.db, [] ) ch_versions = ch_versions.mix( RGI_MAIN.out.versions ) // Reporting @@ -149,7 +149,7 @@ workflow ARG { // ABRicate run if ( !params.arg_skip_abricate ) { - ABRICATE_RUN ( contigs ) + ABRICATE_RUN ( fastas ) ch_versions = ch_versions.mix( ABRICATE_RUN.out.versions ) HAMRONIZATION_ABRICATE ( ABRICATE_RUN.out.report, 'json', '1.0.1', '2021-Mar-27' ) @@ -170,7 +170,7 @@ workflow ARG { // MERGE_TAXONOMY if ( params.run_taxa_classification ) { - ch_mmseqs_taxonomy_list = tsv.map{ it[1] }.collect() + ch_mmseqs_taxonomy_list = tsvs.map{ it[1] }.collect() MERGE_TAXONOMY_HAMRONIZATION( HAMRONIZATION_SUMMARIZE.out.tsv, ch_mmseqs_taxonomy_list ) ch_versions = ch_versions.mix( MERGE_TAXONOMY_HAMRONIZATION.out.versions ) diff --git a/subworkflows/local/bgc.nf b/subworkflows/local/bgc.nf index 48266303..455e7719 100644 --- a/subworkflows/local/bgc.nf +++ b/subworkflows/local/bgc.nf @@ -18,11 +18,10 @@ include { MERGE_TAXONOMY_COMBGC } from '../../modules/local/m workflow BGC { take: - fna // tuple val(meta), path(PREPPED_INPUT.out.fna) - gff // tuple val(meta), path(.out.gff) - faa // tuple val(meta), path(.out.faa) - gbk // tuple val(meta), path(.out.gbk) - tsv // tuple val(meta), path(MMSEQS_CREATETSV.out.tsv) + fastas // tuple val(meta), path(PREPPED_INPUT.out.fna) + faas // tuple val(meta), path(.out.faa) + gbks // tuple val(meta), path(.out.gbk) + tsvs // tuple val(meta), path(MMSEQS_CREATETSV.out.tsv) main: ch_versions = Channel.empty() @@ -31,7 +30,7 @@ workflow BGC { // When adding new tool that requires FAA, make sure to update conditions // in funcscan.nf around annotation and AMP subworkflow execution // to ensure annotation is executed! - ch_faa_for_bgc_hmmsearch = faa + ch_faa_for_bgc_hmmsearch = faas // ANTISMASH if ( !params.bgc_skip_antismash ) { @@ -71,35 +70,17 @@ workflow BGC { } - if ( params.annotation_tool == 'prodigal' || params.annotation_tool == "pyrodigal" ) { - - ch_antismash_input = fna.join(gff, by: 0) - .multiMap { - meta, fna, gff -> - fna: [ meta, fna ] - gff: [ gff ] - } - - ANTISMASH_ANTISMASHLITE ( ch_antismash_input.fna, ch_antismash_databases, ch_antismash_directory, ch_antismash_input.gff ) - - } else if ( params.annotation_tool == 'prokka' ) { - - ANTISMASH_ANTISMASHLITE ( gbk, ch_antismash_databases, ch_antismash_directory, [] ) - - } else if ( params.annotation_tool == 'bakta' ) { - - ANTISMASH_ANTISMASHLITE ( gbk, ch_antismash_databases, ch_antismash_directory, [] ) - - } + ANTISMASH_ANTISMASHLITE ( gbks, ch_antismash_databases, ch_antismash_directory, [] ) ch_versions = ch_versions.mix( ANTISMASH_ANTISMASHLITE.out.versions ) ch_antismashresults_for_combgc = ANTISMASH_ANTISMASHLITE.out.knownclusterblast_dir - .mix( ANTISMASH_ANTISMASHLITE.out.gbk_input ) - .groupTuple() - .map{ - meta, files -> - [meta, files.flatten()] - } + .mix( ANTISMASH_ANTISMASHLITE.out.gbk_input ) + .groupTuple() + .map{ + meta, files -> + [meta, files.flatten()] + } + ch_bgcresults_for_combgc = ch_bgcresults_for_combgc.mix( ch_antismashresults_for_combgc ) } @@ -116,16 +97,16 @@ workflow BGC { ch_versions = ch_versions.mix( DEEPBGC_DOWNLOAD.out.versions ) } - DEEPBGC_PIPELINE ( fna, ch_deepbgc_database) + DEEPBGC_PIPELINE ( fastas, ch_deepbgc_database) ch_versions = ch_versions.mix( DEEPBGC_PIPELINE.out.versions ) ch_bgcresults_for_combgc = ch_bgcresults_for_combgc.mix( DEEPBGC_PIPELINE.out.bgc_tsv ) } // GECCO if ( !params.bgc_skip_gecco ) { - ch_gecco_input = fna.groupTuple() + ch_gecco_input = fastas.groupTuple() .multiMap { - fna: [ it[0], it[1], [] ] + fastas: [ it[0], it[1], [] ] } GECCO_RUN ( ch_gecco_input, [] ) @@ -180,7 +161,7 @@ workflow BGC { // MERGE_TAXONOMY if ( params.run_taxa_classification ) { - ch_mmseqs_taxonomy_list = tsv.map{ it[1] }.collect() + ch_mmseqs_taxonomy_list = tsvs.map{ it[1] }.collect() MERGE_TAXONOMY_COMBGC( ch_combgc_summaries, ch_mmseqs_taxonomy_list ) ch_versions = ch_versions.mix( MERGE_TAXONOMY_COMBGC.out.versions ) diff --git a/subworkflows/local/taxa_class.nf b/subworkflows/local/taxa_class.nf index ec9f273a..253ea704 100644 --- a/subworkflows/local/taxa_class.nf +++ b/subworkflows/local/taxa_class.nf @@ -36,15 +36,22 @@ workflow TAXA_CLASS { // MMSEQS_CREATEDB MMSEQS_CREATEDB ( contigs ) ch_versions = ch_versions.mix( MMSEQS_CREATEDB.out.versions ) - ch_taxonomy_querydb = MMSEQS_CREATEDB.out.db // MMSEQS_TAXONOMY - MMSEQS_TAXONOMY ( ch_taxonomy_querydb, ch_mmseqs_db ) + MMSEQS_TAXONOMY ( MMSEQS_CREATEDB.out.db, ch_mmseqs_db ) ch_versions = ch_versions.mix( MMSEQS_TAXONOMY.out.versions ) ch_taxonomy_querydb_taxdb = MMSEQS_TAXONOMY.out.db_taxonomy + // Join together to ensure in sync + ch_taxonomy_input_for_createtsv = MMSEQS_CREATEDB.out.db + .join(MMSEQS_TAXONOMY.out.db_taxonomy) + .multiMap { meta, db, db_taxonomy -> + db: [ meta,db ] + taxdb: [ meta, db_taxonomy ] + } + // MMSEQS_CREATETSV - MMSEQS_CREATETSV ( ch_taxonomy_querydb_taxdb, [[:],[]], ch_taxonomy_querydb ) + MMSEQS_CREATETSV ( ch_taxonomy_input_for_createtsv.taxdb, [[:],[]], ch_taxonomy_input_for_createtsv.db ) ch_versions = ch_versions.mix( MMSEQS_CREATETSV.out.versions ) ch_taxonomy_tsv = MMSEQS_CREATETSV.out.tsv } diff --git a/subworkflows/local/utils_nfcore_funcscan_pipeline/main.nf b/subworkflows/local/utils_nfcore_funcscan_pipeline/main.nf index 5d74b7c7..e2c26d1a 100644 --- a/subworkflows/local/utils_nfcore_funcscan_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_funcscan_pipeline/main.nf @@ -152,7 +152,7 @@ def validateInputParameters() { // 3. Give warning if not using container system assuming conda - if ( params.run_bgc_screening && ( !params.bgc_antismash_databases || !params.bgc_antismash_installationdirectory ) && !params.bgc_skip_antismash && ( session.config.conda && session.config.conda.enabled ) ) { log.warn "[nf-core/funcscan] Running antiSMASH download database module, and detected conda has been enabled. Assuming using conda for pipeline run, check config if this is not expected!" } + if ( params.run_bgc_screening && ( !params.bgc_antismash_databases || !params.bgc_antismash_installationdirectory ) && !params.bgc_skip_antismash && ( session.config.conda && session.config.conda.enabled ) ) { log.warn "[nf-core/funcscan] Running antiSMASH download database module, and detected conda has been enabled. Assuming using conda for pipeline run. Check config if this is not expected!" } } diff --git a/workflows/funcscan.nf b/workflows/funcscan.nf index 1635c7c7..1847c015 100644 --- a/workflows/funcscan.nf +++ b/workflows/funcscan.nf @@ -29,9 +29,10 @@ ch_multiqc_custom_methods_description = params.multiqc_methods_description ? fil // // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules // -include { AMP } from '../subworkflows/local/amp' -include { ARG } from '../subworkflows/local/arg' -include { BGC } from '../subworkflows/local/bgc' +include { ANNOTATION } from '../subworkflows/local/annotation' +include { AMP } from '../subworkflows/local/amp' +include { ARG } from '../subworkflows/local/arg' +include { BGC } from '../subworkflows/local/bgc' include { TAXA_CLASS } from '../subworkflows/local/taxa_class' /* @@ -44,22 +45,7 @@ include { TAXA_CLASS } from '../subworkflows/local/taxa_class' // MODULE: Installed directly from nf-core/modules // include { MULTIQC } from '../modules/nf-core/multiqc/main' -include { GUNZIP as GUNZIP_FASTA_PREP } from '../modules/nf-core/gunzip/main' -include { GUNZIP as GUNZIP_PRODIGAL_FNA } from '../modules/nf-core/gunzip/main' -include { GUNZIP as GUNZIP_PRODIGAL_FAA } from '../modules/nf-core/gunzip/main' -include { GUNZIP as GUNZIP_PRODIGAL_GFF } from '../modules/nf-core/gunzip/main' -include { GUNZIP as GUNZIP_PYRODIGAL_FNA } from '../modules/nf-core/gunzip/main' -include { GUNZIP as GUNZIP_PYRODIGAL_FAA } from '../modules/nf-core/gunzip/main' -include { GUNZIP as GUNZIP_PYRODIGAL_GFF } from '../modules/nf-core/gunzip/main' -include { PROKKA } from '../modules/nf-core/prokka/main' -include { PRODIGAL as PRODIGAL_GFF } from '../modules/nf-core/prodigal/main' -include { PRODIGAL as PRODIGAL_GBK } from '../modules/nf-core/prodigal/main' -include { PYRODIGAL as PYRODIGAL_GBK } from '../modules/nf-core/pyrodigal/main' -include { PYRODIGAL as PYRODIGAL_GFF } from '../modules/nf-core/pyrodigal/main' -include { BAKTA_BAKTADBDOWNLOAD } from '../modules/nf-core/bakta/baktadbdownload/main' -include { BAKTA_BAKTA } from '../modules/nf-core/bakta/bakta/main' -include { SEQKIT_SEQ as SEQKIT_SEQ_LONG } from '../modules/nf-core/seqkit/seq/main' -include { SEQKIT_SEQ as SEQKIT_SEQ_SHORT } from '../modules/nf-core/seqkit/seq/main' +include { GUNZIP as GUNZIP_INPUT_PREP } from '../modules/nf-core/gunzip/main' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -80,42 +66,69 @@ workflow FUNCSCAN { ch_input = Channel.fromSamplesheet("input") // Some tools require uncompressed input - fasta_prep = ch_input - .branch { - compressed: it[1].toString().endsWith('.gz') - uncompressed: it[1] - } + ch_input_prep = ch_input + .map{ meta, fasta, faa, gbk -> [meta, [fasta, faa, gbk]] } + .transpose() + .branch { + compressed: it[1].toString().endsWith('.gz') + uncompressed: it[1] + } - GUNZIP_FASTA_PREP ( fasta_prep.compressed ) - ch_versions = ch_versions.mix( GUNZIP_FASTA_PREP.out.versions ) + GUNZIP_INPUT_PREP ( ch_input_prep.compressed ) + ch_versions = ch_versions.mix( GUNZIP_INPUT_PREP.out.versions ) // Merge all the already uncompressed and newly compressed FASTAs here into // a single input channel for downstream - ch_unzipped_fastas = GUNZIP_FASTA_PREP.out.gunzip - .mix( fasta_prep.uncompressed ) - - // Split each FASTA into long and short contigs to - // speed up BGC workflow with BGC-compatible contig lengths only - SEQKIT_SEQ_LONG ( ch_unzipped_fastas ) - SEQKIT_SEQ_SHORT ( ch_unzipped_fastas ) - ch_versions = ch_versions.mix( SEQKIT_SEQ_LONG.out.versions ) - ch_versions = ch_versions.mix( SEQKIT_SEQ_SHORT.out.versions ) - - ch_prepped_input_long = SEQKIT_SEQ_LONG.out.fastx - .map{ meta, file -> [ meta + [id: meta.id + '_long', length: "long" ], file ] } - .filter{ - meta, fasta -> - !fasta.isEmpty() + ch_intermediate_input = GUNZIP_INPUT_PREP.out.gunzip + .mix( ch_input_prep.uncompressed ) + .groupTuple() + .map{ + meta, files -> + def fasta_found = files.find{it.toString().tokenize('.').last().matches('fasta|fas|fna|fa')} + def faa_found = files.find{it.toString().endsWith('.faa')} + def gbk_found = files.find{it.toString().tokenize('.').last().matches('gbk|gbff')} + def fasta = fasta_found != null ? fasta_found : [] + def faa = faa_found != null ? faa_found : [] + def gbk = gbk_found != null ? gbk_found : [] + + [meta, fasta, faa, gbk] } - - ch_prepped_input_short = SEQKIT_SEQ_SHORT.out.fastx - .map{ meta, file -> [ meta + [id: meta.id + '_short', length: "short" ], file ]} - .filter{ - meta, fasta -> - !fasta.isEmpty() + .branch { + meta, fasta, faa, gbk -> + preannotated: gbk != [] + fastas: true } - ch_prepped_input = ch_prepped_input_long.mix( ch_prepped_input_short ) + ch_input_for_annotation = ch_intermediate_input.fastas.map { meta, fasta, protein, gbk -> [ meta, fasta ] } + + /* + ANNOTATION + */ + + // Some tools require annotated FASTAs + if ( ( params.run_arg_screening && !params.arg_skip_deeparg ) || ( params.run_amp_screening && ( !params.amp_skip_hmmsearch || !params.amp_skip_amplify || !params.amp_skip_ampir ) ) || ( params.run_bgc_screening && ( !params.bgc_skip_hmmsearch || !params.bgc_skip_antismash ) ) ) { + ANNOTATION( ch_input_for_annotation ) + ch_versions = ch_versions.mix( ANNOTATION.out.versions ) + ch_multiqc_files = ch_multiqc_files.mix( ANNOTATION.out.multiqc_files ) + + ch_new_annotation = ch_input_for_annotation + .join( ANNOTATION.out.faa ) + .join( ANNOTATION.out.gbk ) + + } else { + ch_new_annotation = Channel.empty() + } + + // Mix back the preannotated samples with the newly annotated ones + ch_prepped_input = ch_intermediate_input.preannotated + .mix( ch_new_annotation ) + .multiMap { + meta, fasta, faa, gbk -> + fastas: [meta, fasta] + faas: [meta, faa] + gbks: [meta, gbk] + } + /* TAXONOMIC CLASSIFICATION @@ -125,7 +138,7 @@ workflow FUNCSCAN { // This can be either on NT or AA level depending on annotation. // TODO: Only NT at the moment. AA tax. classification will be added only when its PR is merged. if ( params.run_taxa_classification ) { - TAXA_CLASS ( ch_prepped_input ) + TAXA_CLASS ( ch_prepped_input.fastas ) ch_versions = ch_versions.mix( TAXA_CLASS.out.versions ) ch_taxonomy_tsv = TAXA_CLASS.out.sample_taxonomy @@ -137,83 +150,6 @@ workflow FUNCSCAN { ch_taxonomy_tsv = Channel.empty() } - /* - ANNOTATION - */ - - // Some tools require annotated FASTAs - // For prodigal: run twice, once for gff and once for gbk generation, (for parity with PROKKA which produces both) - if ( ( params.run_arg_screening && !params.arg_skip_deeparg ) || ( params.run_amp_screening && ( !params.amp_skip_hmmsearch || !params.amp_skip_amplify || !params.amp_skip_ampir ) ) || ( params.run_bgc_screening && ( !params.bgc_skip_hmmsearch || !params.bgc_skip_antismash ) ) ) { - - if ( params.annotation_tool == "prodigal" ) { - PRODIGAL_GFF ( ch_prepped_input, "gff" ) - GUNZIP_PRODIGAL_FAA ( PRODIGAL_GFF.out.amino_acid_fasta ) - GUNZIP_PRODIGAL_FNA ( PRODIGAL_GFF.out.nucleotide_fasta ) - GUNZIP_PRODIGAL_GFF ( PRODIGAL_GFF.out.gene_annotations ) - ch_versions = ch_versions.mix( PRODIGAL_GFF.out.versions ) - ch_annotation_faa = GUNZIP_PRODIGAL_FAA.out.gunzip - ch_annotation_fna = GUNZIP_PRODIGAL_FNA.out.gunzip - ch_annotation_gff = GUNZIP_PRODIGAL_GFF.out.gunzip - ch_annotation_gbk = Channel.empty() // Prodigal GBK and GFF output are mutually exclusive - - if ( params.save_annotations == true ) { - PRODIGAL_GBK ( ch_prepped_input, "gbk" ) - ch_versions = ch_versions.mix( PRODIGAL_GBK.out.versions ) - ch_annotation_gbk = PRODIGAL_GBK.out.gene_annotations // Prodigal GBK output stays zipped because it is currently not used by any downstream subworkflow. - } - } else if ( params.annotation_tool == "pyrodigal" ) { - PYRODIGAL_GFF ( ch_prepped_input, "gff" ) - GUNZIP_PYRODIGAL_FAA ( PYRODIGAL_GFF.out.faa ) - GUNZIP_PYRODIGAL_FNA ( PYRODIGAL_GFF.out.fna ) - GUNZIP_PYRODIGAL_GFF ( PYRODIGAL_GFF.out.annotations ) - ch_versions = ch_versions.mix( PYRODIGAL_GFF.out.versions ) - ch_annotation_faa = GUNZIP_PYRODIGAL_FAA.out.gunzip - ch_annotation_fna = GUNZIP_PYRODIGAL_FNA.out.gunzip - ch_annotation_gff = GUNZIP_PYRODIGAL_GFF.out.gunzip - ch_annotation_gbk = Channel.empty() // Pyrodigal GBK and GFF output are mutually exclusive - - if ( params.save_annotations == true ) { - PYRODIGAL_GBK ( ch_prepped_input, "gbk" ) - ch_versions = ch_versions.mix( PYRODIGAL_GBK.out.versions ) - ch_annotation_gbk = PYRODIGAL_GBK.out.annotations // Pyrodigal GBK output stays zipped because it is currently not used by any downstream subworkflow. - } - } else if ( params.annotation_tool == "prokka" ) { - PROKKA ( ch_prepped_input, [], [] ) - ch_versions = ch_versions.mix( PROKKA.out.versions ) - ch_annotation_faa = PROKKA.out.faa - ch_annotation_fna = PROKKA.out.fna - ch_annotation_gff = PROKKA.out.gff - ch_annotation_gbk = PROKKA.out.gbk - } else if ( params.annotation_tool == "bakta" ) { - - // BAKTA prepare download - if ( params.annotation_bakta_db_localpath ) { - ch_bakta_db = Channel - .fromPath( params.annotation_bakta_db_localpath ) - .first() - } else { - BAKTA_BAKTADBDOWNLOAD ( ) - ch_versions = ch_versions.mix( BAKTA_BAKTADBDOWNLOAD.out.versions ) - ch_bakta_db = ( BAKTA_BAKTADBDOWNLOAD.out.db ) - } - - BAKTA_BAKTA ( ch_prepped_input, ch_bakta_db, [], [] ) - ch_versions = ch_versions.mix( BAKTA_BAKTA.out.versions ) - ch_annotation_faa = BAKTA_BAKTA.out.faa - ch_annotation_fna = BAKTA_BAKTA.out.fna - ch_annotation_gff = BAKTA_BAKTA.out.gff - ch_annotation_gbk = BAKTA_BAKTA.out.gbff - } - - } else { - - ch_annotation_faa = Channel.empty() - ch_annotation_fna = Channel.empty() - ch_annotation_gff = Channel.empty() - ch_annotation_gbk = Channel.empty() - - } - /* SCREENING */ @@ -223,11 +159,11 @@ workflow FUNCSCAN { */ if ( params.run_amp_screening && !params.run_taxa_classification ) { AMP ( - ch_prepped_input, - ch_annotation_faa + ch_prepped_input.fastas, + ch_prepped_input.faas .filter { meta, file -> - if ( file.isEmpty() ) log.warn("Annotation of following sample produced produced an empty FAA file. AMP screening tools requiring this file will not be executed: ${meta.id}") + if ( file != [] && file.isEmpty() ) log.warn("[nf-core/funcscan] Annotation of following sample produced produced an empty FAA file. AMP screening tools requiring this file will not be executed: ${meta.id}") !file.isEmpty() }, @@ -236,17 +172,17 @@ workflow FUNCSCAN { ch_versions = ch_versions.mix(AMP.out.versions) } else if ( params.run_amp_screening && params.run_taxa_classification ) { AMP ( - ch_prepped_input, - ch_annotation_faa + ch_prepped_input.fastas, + ch_prepped_input.faas .filter { meta, file -> - if ( file.isEmpty() ) log.warn("Annotation of following sample produced produced an empty FAA file. AMP screening tools requiring this file will not be executed: ${meta.id}") + if ( file != [] && file.isEmpty() ) log.warn("[nf-core/funcscan] Annotation of following sample produced produced an empty FAA file. AMP screening tools requiring this file will not be executed: ${meta.id}") !file.isEmpty() }, ch_taxonomy_tsv .filter { meta, file -> - if ( file.isEmpty() ) log.warn("Taxonomy classification of the following sample produced an empty TSV file. Taxonomy merging will not be executed: ${meta.id}") + if ( file != [] && file.isEmpty() ) log.warn("[nf-core/funcscan] Taxonomy classification of the following sample produced an empty TSV file. Taxonomy merging will not be executed: ${meta.id}") !file.isEmpty() } ) @@ -259,17 +195,17 @@ workflow FUNCSCAN { if ( params.run_arg_screening && !params.run_taxa_classification ) { if ( params.arg_skip_deeparg ) { ARG ( - ch_prepped_input, + ch_prepped_input.fastas, [], ch_taxonomy_tsv ) } else { ARG ( - ch_prepped_input, - ch_annotation_faa + ch_prepped_input.fastas, + ch_prepped_input.faas .filter { meta, file -> - if ( file.isEmpty() ) log.warn("Annotation of following sample produced produced an empty FAA file. AMP screening tools requiring this file will not be executed: ${meta.id}") + if ( file.isEmpty() ) log.warn("[nf-core/funcscan] Annotation of following sample produced produced an empty FAA file. AMP screening tools requiring this file will not be executed: ${meta.id}") !file.isEmpty() }, ch_taxonomy_tsv @@ -279,28 +215,28 @@ workflow FUNCSCAN { } else if ( params.run_arg_screening && params.run_taxa_classification ) { if ( params.arg_skip_deeparg ) { ARG ( - ch_prepped_input, + ch_prepped_input.fastas, [], ch_taxonomy_tsv .filter { meta, file -> - if ( file.isEmpty() ) log.warn("Taxonomy classification of the following sample produced an empty TSV file. Taxonomy merging will not be executed: ${meta.id}") + if ( file.isEmpty() ) log.warn("[nf-core/funcscan] Taxonomy classification of the following sample produced an empty TSV file. Taxonomy merging will not be executed: ${meta.id}") !file.isEmpty() } ) } else { ARG ( - ch_prepped_input, - ch_annotation_faa + ch_prepped_input.fastas, + ch_prepped_input.faas .filter { meta, file -> - if ( file.isEmpty() ) log.warn("Annotation of following sample produced produced an empty FAA file. AMP screening tools requiring this file will not be executed: ${meta.id}") + if ( file.isEmpty() ) log.warn("[nf-core/funcscan] Annotation of following sample produced produced an empty FAA file. AMP screening tools requiring this file will not be executed: ${meta.id}") !file.isEmpty() }, ch_taxonomy_tsv .filter { meta, file -> - if ( file.isEmpty() ) log.warn("Taxonomy classification of the following sample produced an empty TSV file. Taxonomy merging will not be executed: ${meta.id}") + if ( file.isEmpty() ) log.warn("[nf-core/funcscan] Taxonomy classification of the following sample produced an empty TSV file. Taxonomy merging will not be executed: ${meta.id}") !file.isEmpty() } ) @@ -313,23 +249,17 @@ workflow FUNCSCAN { */ if ( params.run_bgc_screening && !params.run_taxa_classification ) { BGC ( - ch_prepped_input_long, - ch_annotation_gff + ch_prepped_input.fastas, + ch_prepped_input.faas .filter { meta, file -> - if ( file.isEmpty() ) log.warn("Annotation of following sample produced produced an empty GFF file. AMP screening tools requiring this file will not be executed: ${meta.id}") + if ( file != [] && file.isEmpty() ) log.warn("[nf-core/funcscan] Annotation of following sample produced produced an empty GFF file. AMP screening tools requiring this file will not be executed: ${meta.id}") !file.isEmpty() }, - ch_annotation_faa + ch_prepped_input.gbks .filter { meta, file -> - if ( file.isEmpty() ) log.warn("Annotation of following sample produced produced an empty FAA file. AMP screening tools requiring this file will not be executed: ${meta.id}") - !file.isEmpty() - }, - ch_annotation_gbk - .filter { - meta, file -> - if ( file.isEmpty() ) log.warn("Annotation of following sample produced produced an empty GBK file. AMP screening tools requiring this file will not be executed: ${meta.id}") + if ( file != [] && file.isEmpty() ) log.warn("[nf-core/funcscan] Annotation of following sample produced produced an empty FAA file. AMP screening tools requiring this file will not be executed: ${meta.id}") !file.isEmpty() }, ch_taxonomy_tsv @@ -337,29 +267,23 @@ workflow FUNCSCAN { ch_versions = ch_versions.mix( BGC.out.versions ) } else if ( params.run_bgc_screening && params.run_taxa_classification ) { BGC ( - ch_prepped_input, - ch_annotation_gff + ch_prepped_input.fastas, + ch_prepped_input.faas .filter { meta, file -> - if ( file.isEmpty() ) log.warn("Annotation of following sample produced produced an empty GFF file. AMP screening tools requiring this file will not be executed: ${meta.id}") + if ( file.isEmpty() ) log.warn("[nf-core/funcscan] Annotation of following sample produced produced an empty FAA file. AMP screening tools requiring this file will not be executed: ${meta.id}") !file.isEmpty() }, - ch_annotation_faa + ch_prepped_input.gbks .filter { meta, file -> - if ( file.isEmpty() ) log.warn("Annotation of following sample produced produced an empty FAA file. AMP screening tools requiring this file will not be executed: ${meta.id}") - !file.isEmpty() - }, - ch_annotation_gbk - .filter { - meta, file -> - if ( file.isEmpty() ) log.warn("Annotation of following sample produced produced an empty GBK file. AMP screening tools requiring this file will not be executed: ${meta.id}") + if ( file.isEmpty() ) log.warn("[nf-core/funcscan] Annotation of following sample produced produced an empty GBK file. AMP screening tools requiring this file will not be executed: ${meta.id}") !file.isEmpty() }, ch_taxonomy_tsv .filter { meta, file -> - if ( file.isEmpty() ) log.warn("Taxonomy classification of the following sample produced an empty TSV file. Taxonomy merging will not be executed: ${meta.id}") + if ( file.isEmpty() ) log.warn("[nf-core/funcscan] Taxonomy classification of the following sample produced an empty TSV file. Taxonomy merging will not be executed: ${meta.id}") !file.isEmpty() } ) @@ -409,9 +333,7 @@ workflow FUNCSCAN { ) ) - if( params.annotation_tool=='prokka' ) { - ch_multiqc_files = ch_multiqc_files.mix( PROKKA.out.txt.collect{it[1]}.ifEmpty([]) ) - } + ch_multiqc_files = ch_multiqc_files.mix( ANNOTATION.out.multiqc_files.collect{it[1]}.ifEmpty([]) ) MULTIQC ( ch_multiqc_files.collect(),