Skip to content

Commit c6bfe7a

Browse files
authored
Merge pull request #1058 from drpatelh/updates
Use nf-validation plugin for parameter and samplesheet validation
2 parents fc88456 + 8d9ccac commit c6bfe7a

13 files changed

+115
-352
lines changed

.nf-core.yml

+1
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,5 @@ lint:
44
- assets/email_template.html
55
- assets/email_template.txt
66
- lib/NfcoreTemplate.groovy
7+
- pyproject.toml
78
multiqc_config: false

CHANGELOG.md

+12
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,23 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
55

66
## v3.13.0dev - [date]
77

8+
### Credits
9+
10+
Special thanks to the following for their contributions to the release:
11+
12+
- [Adam Talbot](https://github.com/adamrtalbot)
13+
- [Júlia Mir Pedrol](https://github.com/mirpedrol)
14+
- [Matthias Zepper](https://github.com/MatthiasZepper)
15+
- [Maxime Garcia](https://github.com/maxulysse)
16+
17+
Thank you to everyone else that has contributed by reporting bugs, enhancements or in any other way, shape or form.
18+
819
### Enhancements & fixes
920

1021
- [PR #1049](https://github.com/nf-core/rnaseq/pull/1049) - Display a warning when `--extra_star_align_args` are used with `--aligner star_rsem`
1122
- [PR #1051](https://github.com/nf-core/rnaseq/pull/1051) - Remove `public_aws_ecr` profile
1223
- [PR #1054](https://github.com/nf-core/rnaseq/pull/1054) - Template update to nf-core/tools v2.9
24+
- [PR #1058](https://github.com/nf-core/rnaseq/pull/1058) - Use `nf-validation` plugin for parameter and samplesheet validation
1325

1426
## [[3.12.0](https://github.com/nf-core/rnaseq/releases/tag/3.12.0)] - 2023-06-02
1527

assets/schema_input.json

+10-5
Original file line numberDiff line numberDiff line change
@@ -10,30 +10,35 @@
1010
"sample": {
1111
"type": "string",
1212
"pattern": "^\\S+$",
13-
"errorMessage": "Sample name must be provided and cannot contain spaces"
13+
"errorMessage": "Sample name must be provided and cannot contain spaces",
14+
"meta": ["id"]
1415
},
1516
"fastq_1": {
1617
"type": "string",
18+
"format": "file-path",
19+
"exists": true,
1720
"pattern": "^\\S+\\.f(ast)?q\\.gz$",
1821
"errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'"
1922
},
2023
"fastq_2": {
2124
"errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'",
25+
"type": "string",
26+
"format": "file-path",
27+
"exists": true,
2228
"anyOf": [
2329
{
24-
"type": "string",
2530
"pattern": "^\\S+\\.f(ast)?q\\.gz$"
2631
},
2732
{
28-
"type": "string",
2933
"maxLength": 0
3034
}
3135
]
3236
},
3337
"strandedness": {
3438
"type": "string",
35-
"errorMessage": "Strandedness must be provided and be one of 'forward', 'reverse' or 'unstranded'",
36-
"enum": ["forward", "reverse", "unstranded"]
39+
"errorMessage": "Strandedness must be provided and be one of 'auto', 'forward', 'reverse' or 'unstranded'",
40+
"enum": ["forward", "reverse", "unstranded", "auto"],
41+
"meta": ["strandedness"]
3742
}
3843
},
3944
"required": ["sample", "fastq_1", "strandedness"]

bin/check_samplesheet.py

-172
This file was deleted.

conf/modules.config

-8
Original file line numberDiff line numberDiff line change
@@ -23,14 +23,6 @@ process {
2323
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
2424
]
2525

26-
withName: 'SAMPLESHEET_CHECK' {
27-
publishDir = [
28-
path: { "${params.outdir}/pipeline_info" },
29-
mode: params.publish_dir_mode,
30-
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
31-
]
32-
}
33-
3426
withName: 'CUSTOM_DUMPSOFTWAREVERSIONS' {
3527
publishDir = [
3628
path: { "${params.outdir}/pipeline_info" },

docs/usage.md

+3-3
Original file line numberDiff line numberDiff line change
@@ -91,9 +91,9 @@ The `--umitools_grouping_method` parameter affects [how similar, but non-identic
9191

9292
#### Examples:
9393

94-
| UMI type | Source | Pipeline parameters |
95-
| ------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------- |
96-
| In read name | [Illumina BCL convert >3.7.5](https://emea.support.illumina.com/content/dam/illumina-support/documents/documentation/software_documentation/bcl_convert/bcl-convert-v3-7-5-software-guide-1000000163594-00.pdf) | `--with_umi --skip_umi_extract --umitools_umi_separator ":"` |
94+
| UMI type | Source | Pipeline parameters |
95+
| ------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------- |
96+
| In read name | [Illumina BCL convert >3.7.5](https://emea.support.illumina.com/content/dam/illumina-support/documents/documentation/software_documentation/bcl_convert/bcl-convert-v3-7-5-software-guide-1000000163594-00.pdf) | `--with_umi --skip_umi_extract --umitools_umi_separator ":"` |
9797
| In sequence | [Lexogen QuantSeq® 3’ mRNA-Seq V2 FWD](https://www.lexogen.com/quantseq-3mrna-sequencing) + [UMI Second Strand Synthesis Module](https://faqs.lexogen.com/faq/how-can-i-add-umis-to-my-quantseq-libraries) | `--with_umi --umitools_extract_method "regex" --umitools_bc_pattern "^(?P<umi_1>.{6})(?P<discard_1>.{4}).*"` |
9898
| In sequence | [Lexogen CORALL® Total RNA-Seq V1](https://www.lexogen.com/corall-total-rna-seq/)<br> > _mind [Appendix H](https://www.lexogen.com/wp-content/uploads/2020/04/095UG190V0130_CORALL-Total-RNA-Seq_2020-03-31.pdf) regarding optional trimming_ | `--with_umi --umitools_extract_method "regex" --umitools_bc_pattern "^(?P<umi_1>.{12}).*"`<br>Optional: `--clip_r2 9 --three_prime_clip_r2 12` |
9999
| In sequence | [Takara Bio SMARTer® Stranded Total RNA-Seq Kit v3](https://www.takarabio.com/documents/User%20Manual/SMARTer%20Stranded%20Total%20RNA/SMARTer%20Stranded%20Total%20RNA-Seq%20Kit%20v3%20-%20Pico%20Input%20Mammalian%20User%20Manual-a_114949.pdf) | `--with_umi --umitools_extract_method "regex" --umitools_bc_pattern2 "^(?P<umi_1>.{8})(?P<discard_1>.{6}).*"` |

lib/WorkflowMain.groovy

-5
Original file line numberDiff line numberDiff line change
@@ -44,11 +44,6 @@ class WorkflowMain {
4444

4545
// Check AWS batch settings
4646
NfcoreTemplate.awsBatch(workflow, params)
47-
48-
// Check input has been provided
49-
if (!params.input) {
50-
Nextflow.error("Please provide an input samplesheet to the pipeline e.g. '--input samplesheet.csv'")
51-
}
5247
}
5348

5449
//

lib/WorkflowRnaseq.groovy

+28-25
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,9 @@ class WorkflowRnaseq {
1111
//
1212
// Check and validate parameters
1313
//
14-
public static void initialise(params, log, valid_params) {
14+
public static void initialise(params, log) {
1515
genomeExistsError(params, log)
1616

17-
18-
if (!params.fasta) {
19-
Nextflow.error("Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file.")
20-
}
21-
2217
if (!params.gtf && !params.gff) {
2318
Nextflow.error("No GTF or GFF3 annotation specified! The pipeline requires at least one of these files.")
2419
}
@@ -54,27 +49,13 @@ class WorkflowRnaseq {
5449
}
5550
}
5651

57-
if (!params.skip_trimming) {
58-
if (!valid_params['trimmers'].contains(params.trimmer)) {
59-
Nextflow.error("Invalid option: '${params.trimmer}'. Valid options for '--trimmer': ${valid_params['trimmers'].join(', ')}.")
60-
}
61-
}
62-
63-
if (!params.skip_alignment) {
64-
if (!valid_params['aligners'].contains(params.aligner)) {
65-
Nextflow.error("Invalid option: '${params.aligner}'. Valid options for '--aligner': ${valid_params['aligners'].join(', ')}.")
66-
}
67-
} else {
52+
if (params.skip_alignment) {
6853
skipAlignmentWarn(log)
6954
}
7055

7156
if (!params.skip_pseudo_alignment && params.pseudo_aligner) {
72-
if (!valid_params['pseudoaligners'].contains(params.pseudo_aligner)) {
73-
Nextflow.error("Invalid option: '${params.pseudo_aligner}'. Valid options for '--pseudo_aligner': ${valid_params['pseudoaligners'].join(', ')}.")
74-
} else {
75-
if (!(params.salmon_index || params.transcript_fasta || (params.fasta && (params.gtf || params.gff)))) {
76-
Nextflow.error("To use `--pseudo_aligner 'salmon'`, you must provide either --salmon_index or --transcript_fasta or both --fasta and --gtf / --gff.")
77-
}
57+
if (!(params.salmon_index || params.transcript_fasta || (params.fasta && (params.gtf || params.gff)))) {
58+
Nextflow.error("To use `--pseudo_aligner 'salmon'`, you must provide either --salmon_index or --transcript_fasta or both --fasta and --gtf / --gff.")
7859
}
7960
}
8061

@@ -109,12 +90,34 @@ class WorkflowRnaseq {
10990
}
11091

11192
// Check which RSeQC modules we are running
93+
def valid_rseqc_modules = ['bam_stat', 'inner_distance', 'infer_experiment', 'junction_annotation', 'junction_saturation', 'read_distribution', 'read_duplication', 'tin']
11294
def rseqc_modules = params.rseqc_modules ? params.rseqc_modules.split(',').collect{ it.trim().toLowerCase() } : []
113-
if ((valid_params['rseqc_modules'] + rseqc_modules).unique().size() != valid_params['rseqc_modules'].size()) {
114-
Nextflow.error("Invalid option: ${params.rseqc_modules}. Valid options for '--rseqc_modules': ${valid_params['rseqc_modules'].join(', ')}")
95+
if ((valid_rseqc_modules + rseqc_modules).unique().size() != valid_rseqc_modules.size()) {
96+
Nextflow.error("Invalid option: ${params.rseqc_modules}. Valid options for '--rseqc_modules': ${valid_rseqc_modules.join(', ')}")
11597
}
11698
}
11799

100+
//
101+
// Function to validate channels from input samplesheet
102+
//
103+
public static ArrayList validateInput(input) {
104+
def (metas, fastqs) = input[1..2]
105+
106+
// Check that multiple runs of the same sample are of the same strandedness
107+
def strandedness_ok = metas.collect{ it.strandedness }.unique().size == 1
108+
if (!strandedness_ok) {
109+
Nextflow.error("Please check input samplesheet -> Multiple runs of a sample must have the same strandedness!: ${metas[0].id}")
110+
}
111+
112+
// Check that multiple runs of the same sample are of the same datatype i.e. single-end / paired-end
113+
def endedness_ok = metas.collect{ it.single_end }.unique().size == 1
114+
if (!endedness_ok) {
115+
Nextflow.error("Please check input samplesheet -> Multiple runs of a sample must be of the same datatype i.e. single-end or paired-end: ${metas[0].id}")
116+
}
117+
118+
return [ metas[0], fastqs ]
119+
}
120+
118121
//
119122
// Function to check whether biotype field exists in GTF file
120123
//

modules/local/samplesheet_check.nf

-31
This file was deleted.

0 commit comments

Comments
 (0)