Skip to content

Commit 5bd04b4

Browse files
authored
Merge pull request #1342 from nf-core/factor_out_preprocessing
Factor out preprocessing
2 parents 66f3594 + 5507a6d commit 5bd04b4

File tree

16 files changed

+945
-419
lines changed

16 files changed

+945
-419
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ Thank you to everyone else that has contributed by reporting bugs, enhancements
104104
- [PR #1336](https://github.com/nf-core/rnaseq/pull/1334) - Use nf-core/setup-nf-test to install nf-test from cache during CI/CD
105105
- [PR #1340](https://github.com/nf-core/rnaseq/pull/1340) - Remove out-of-date Azure specific guidance
106106
- [PR #1341](https://github.com/nf-core/rnaseq/pull/1341) - Add rename in the MultiQC report for samples without techreps
107+
- [PR #1342](https://github.com/nf-core/rnaseq/pull/1342) - Factor out preprocessing
107108

108109
### Parameters
109110

main.nf

+1
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ workflow NFCORE_RNASEQ {
117117
PREPARE_GENOME.out.salmon_index,
118118
PREPARE_GENOME.out.kallisto_index,
119119
PREPARE_GENOME.out.bbsplit_index,
120+
PREPARE_GENOME.out.rrna_fastas,
120121
PREPARE_GENOME.out.sortmerna_index,
121122
PREPARE_GENOME.out.splicesites,
122123
!params.remove_ribo_rna && params.remove_ribo_rna

modules.json

+10-5
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
"bbmap/bbsplit": {
99
"branch": "master",
1010
"git_sha": "2c6b1144ed58b6184ad58fc4e6b6a90219b4bf4f",
11-
"installed_by": ["modules"]
11+
"installed_by": ["fastq_qc_trim_filter_setstrandedness", "modules"]
1212
},
1313
"bedtools/genomecov": {
1414
"branch": "master",
@@ -18,7 +18,7 @@
1818
"cat/fastq": {
1919
"branch": "master",
2020
"git_sha": "4fc983ad0b30e6e32696fa7d980c76c7bfe1c03e",
21-
"installed_by": ["modules"]
21+
"installed_by": ["fastq_qc_trim_filter_setstrandedness", "modules"]
2222
},
2323
"custom/catadditionalfasta": {
2424
"branch": "master",
@@ -202,7 +202,7 @@
202202
"sortmerna": {
203203
"branch": "master",
204204
"git_sha": "df05c8db5195867c0bc7b92c1788115b66f0d17d",
205-
"installed_by": ["modules"]
205+
"installed_by": ["fastq_qc_trim_filter_setstrandedness", "modules"]
206206
},
207207
"star/align": {
208208
"branch": "master",
@@ -315,17 +315,22 @@
315315
"fastq_fastqc_umitools_fastp": {
316316
"branch": "master",
317317
"git_sha": "db35d26edeafacf9906a517827df621a29adc13d",
318-
"installed_by": ["subworkflows"]
318+
"installed_by": ["fastq_qc_trim_filter_setstrandedness", "subworkflows"]
319319
},
320320
"fastq_fastqc_umitools_trimgalore": {
321321
"branch": "master",
322322
"git_sha": "cb6defa0834eda9d6d3f967e981c819fc3e257bf",
323+
"installed_by": ["fastq_qc_trim_filter_setstrandedness", "subworkflows"]
324+
},
325+
"fastq_qc_trim_filter_setstrandedness": {
326+
"branch": "master",
327+
"git_sha": "b86de50ab60c19ab40e70a4501820f4cb307050b",
323328
"installed_by": ["subworkflows"]
324329
},
325330
"fastq_subsample_fq_salmon": {
326331
"branch": "master",
327332
"git_sha": "727232afb8294b53dd9d05bfe469b70cce1675bb",
328-
"installed_by": ["subworkflows"]
333+
"installed_by": ["fastq_qc_trim_filter_setstrandedness", "subworkflows"]
329334
},
330335
"quantify_pseudo_alignment": {
331336
"branch": "master",

subworkflows/local/prepare_genome/main.nf

+7-4
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,11 @@ workflow PREPARE_GENOME {
228228
// Uncompress sortmerna index or generate from scratch if required
229229
//
230230
ch_sortmerna_index = Channel.empty()
231+
ch_rrna_fastas = Channel.empty()
232+
231233
if ('sortmerna' in prepare_tool_indices) {
234+
ribo_db = file(sortmerna_fasta_list)
235+
232236
if (sortmerna_index) {
233237
if (sortmerna_index.endsWith('.tar.gz')) {
234238
ch_sortmerna_index = UNTAR_SORTMERNA_INDEX ( [ [:], sortmerna_index ] ).untar.map { it[1] }
@@ -237,14 +241,12 @@ workflow PREPARE_GENOME {
237241
ch_sortmerna_index = Channel.value(file(sortmerna_index))
238242
}
239243
} else {
240-
ch_sortmerna_fastas = Channel.from(file(sortmerna_fasta_list).readLines())
244+
ch_rrna_fastas = Channel.from(ribo_db.readLines())
241245
.map { row -> file(row, checkIfExists: true) }
242-
.collect()
243-
.map { [ 'rrna_refs', it ] }
244246

245247
SORTMERNA_INDEX (
246248
Channel.of([ [],[] ]),
247-
ch_sortmerna_fastas,
249+
ch_rrna_fastas.collect().map { [ 'rrna_refs', it ] },
248250
Channel.of([ [],[] ])
249251
)
250252
ch_sortmerna_index = SORTMERNA_INDEX.out.index.first()
@@ -370,6 +372,7 @@ workflow PREPARE_GENOME {
370372
chrom_sizes = ch_chrom_sizes // channel: path(genome.sizes)
371373
splicesites = ch_splicesites // channel: path(genome.splicesites.txt)
372374
bbsplit_index = ch_bbsplit_index // channel: path(bbsplit/index/)
375+
rrna_fastas = ch_rrna_fastas // channel: path(sortmerna_fasta_list)
373376
sortmerna_index = ch_sortmerna_index // channel: path(sortmerna/index/)
374377
star_index = ch_star_index // channel: path(star/index/)
375378
rsem_index = ch_rsem_index // channel: path(rsem/index/)

subworkflows/local/utils_nfcore_rnaseq_pipeline/main.nf

+1-57
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ include { imNotification } from '../../nf-core/utils_nfcore_pipeline'
2020
include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline'
2121
include { workflowCitation } from '../../nf-core/utils_nfcore_pipeline'
2222
include { logColours } from '../../nf-core/utils_nfcore_pipeline'
23+
include { calculateStrandedness } from '../../nf-core/fastq_qc_trim_filter_setstrandedness'
2324

2425
/*
2526
========================================================================================
@@ -548,63 +549,6 @@ def biotypeInGtf(gtf_file, biotype) {
548549
}
549550
}
550551

551-
//
552-
// Function to determine library type by comparing type counts. Consistent
553-
// between Salmon and RSeQC
554-
//
555-
def calculateStrandedness(forwardFragments, reverseFragments, unstrandedFragments, stranded_threshold=0.8, unstranded_threshold=0.1) {
556-
def totalFragments = forwardFragments + reverseFragments + unstrandedFragments
557-
def totalStrandedFragments = forwardFragments + reverseFragments
558-
559-
def library_strandedness = 'undetermined'
560-
if (totalStrandedFragments > 0) {
561-
def forwardProportion = forwardFragments / (totalStrandedFragments as double)
562-
def reverseProportion = reverseFragments / (totalStrandedFragments as double)
563-
def proportionDifference = Math.abs(forwardProportion - reverseProportion)
564-
565-
if (forwardProportion >= stranded_threshold) {
566-
strandedness = 'forward'
567-
} else if (reverseProportion >= stranded_threshold) {
568-
strandedness = 'reverse'
569-
} else if (proportionDifference <= unstranded_threshold) {
570-
strandedness = 'unstranded'
571-
}
572-
}
573-
574-
return [
575-
inferred_strandedness: strandedness,
576-
forwardFragments: (forwardFragments / (totalFragments as double)) * 100,
577-
reverseFragments: (reverseFragments / (totalFragments as double)) * 100,
578-
unstrandedFragments: (unstrandedFragments / (totalFragments as double)) * 100
579-
]
580-
}
581-
582-
//
583-
// Function that parses Salmon quant 'lib_format_counts.json' output file to get inferred strandedness
584-
//
585-
def getSalmonInferredStrandedness(json_file, stranded_threshold = 0.8, unstranded_threshold = 0.1) {
586-
// Parse the JSON content of the file
587-
def libCounts = new JsonSlurper().parseText(json_file.text)
588-
589-
// Calculate the counts for forward and reverse strand fragments
590-
def forwardKeys = ['SF', 'ISF', 'MSF', 'OSF']
591-
def reverseKeys = ['SR', 'ISR', 'MSR', 'OSR']
592-
593-
// Calculate unstranded fragments (IU and U)
594-
// NOTE: this is here for completeness, but actually all fragments have a
595-
// strandedness (even if the overall library does not), so all these values
596-
// will be '0'. See
597-
// https://groups.google.com/g/sailfish-users/c/yxzBDv6NB6I
598-
def unstrandedKeys = ['IU', 'U', 'MU']
599-
600-
def forwardFragments = forwardKeys.collect { libCounts[it] ?: 0 }.sum()
601-
def reverseFragments = reverseKeys.collect { libCounts[it] ?: 0 }.sum()
602-
def unstrandedFragments = unstrandedKeys.collect { libCounts[it] ?: 0 }.sum()
603-
604-
// Use shared calculation function to determine strandedness
605-
return calculateStrandedness(forwardFragments, reverseFragments, unstrandedFragments, stranded_threshold, unstranded_threshold)
606-
}
607-
608552
//
609553
// Function that parses RSeQC infer_experiment output file to get inferred strandedness
610554
//

subworkflows/local/utils_nfcore_rnaseq_pipeline/tests/main.function.nf.test

-109
Original file line numberDiff line numberDiff line change
@@ -390,115 +390,6 @@ nextflow_function {
390390

391391
}
392392

393-
test("Test Function getSalmonInferredStrandedness unstranded") {
394-
395-
function "getSalmonInferredStrandedness"
396-
397-
when {
398-
function {
399-
"""
400-
import groovy.json.JsonOutput
401-
402-
// Define the JSON contents for the test
403-
def json_contents = JsonOutput.toJson([
404-
"SF": 0,
405-
"SR": 0,
406-
"ISF": 100,
407-
"ISR": 100,
408-
"IU": 0,
409-
"U": 0
410-
])
411-
def jsonFile = file("${workDir}/salmonUnstranded.json")
412-
jsonFile.write(json_contents)
413-
414-
input[0] = jsonFile
415-
input[1] = 0.8
416-
input[2] = 0.1
417-
"""
418-
}
419-
}
420-
421-
then {
422-
assertAll(
423-
{ assert function.success },
424-
{ assert snapshot(function.result).match() }
425-
)
426-
}
427-
428-
}
429-
430-
test("Test Function getSalmonInferredStrandedness forward") {
431-
432-
function "getSalmonInferredStrandedness"
433-
434-
when {
435-
function {
436-
"""
437-
import groovy.json.JsonOutput
438-
439-
def json_contents = JsonOutput.toJson([
440-
"SF": 0,
441-
"SR": 0,
442-
"ISF": 100,
443-
"ISR": 0,
444-
"IU": 0,
445-
"U": 0
446-
])
447-
def jsonFile = file("${workDir}/salmonForward.json")
448-
jsonFile.write(json_contents)
449-
450-
input[0] = jsonFile
451-
input[1] = 0.8
452-
input[2] = 0.1
453-
"""
454-
}
455-
}
456-
457-
then {
458-
assertAll(
459-
{ assert function.success },
460-
{ assert snapshot(function.result).match() }
461-
)
462-
}
463-
464-
}
465-
466-
test("Test Function getSalmonInferredStrandedness reverse") {
467-
468-
function "getSalmonInferredStrandedness"
469-
470-
when {
471-
function {
472-
"""
473-
import groovy.json.JsonOutput
474-
475-
def json_contents = JsonOutput.toJson([
476-
"SF": 0,
477-
"SR": 0,
478-
"ISF": 0,
479-
"ISR": 100,
480-
"IU": 0,
481-
"U": 0
482-
])
483-
def jsonFile = file("${workDir}/salmonReverse.json")
484-
jsonFile.write(json_contents)
485-
486-
input[0] = jsonFile
487-
input[1] = 0.8
488-
input[2] = 0.1
489-
"""
490-
}
491-
}
492-
493-
then {
494-
assertAll(
495-
{ assert function.success },
496-
{ assert snapshot(function.result).match() }
497-
)
498-
}
499-
500-
}
501-
502393
test("Test Function getStarPercentMapped pass") {
503394

504395
function "getStarPercentMapped"

subworkflows/local/utils_nfcore_rnaseq_pipeline/tests/main.function.nf.test.snap

-45
Original file line numberDiff line numberDiff line change
@@ -45,36 +45,6 @@
4545
},
4646
"timestamp": "2024-03-06T14:33:26.903306"
4747
},
48-
"Test Function getSalmonInferredStrandedness unstranded": {
49-
"content": [
50-
{
51-
"inferred_strandedness": "unstranded",
52-
"forwardFragments": 50.0,
53-
"reverseFragments": 50.0,
54-
"unstrandedFragments": 0.0
55-
}
56-
],
57-
"meta": {
58-
"nf-test": "0.8.4",
59-
"nextflow": "23.10.1"
60-
},
61-
"timestamp": "2024-06-18T14:29:54.96715"
62-
},
63-
"Test Function getSalmonInferredStrandedness reverse": {
64-
"content": [
65-
{
66-
"inferred_strandedness": "reverse",
67-
"forwardFragments": 0.0,
68-
"reverseFragments": 100.0,
69-
"unstrandedFragments": 0.0
70-
}
71-
],
72-
"meta": {
73-
"nf-test": "0.8.4",
74-
"nextflow": "23.10.1"
75-
},
76-
"timestamp": "2024-06-18T14:30:11.417381"
77-
},
7848
"Test Function checkSamplesAfterGrouping invalid strandedness": {
7949
"content": null,
8050
"meta": {
@@ -215,21 +185,6 @@
215185
},
216186
"timestamp": "2024-03-06T14:32:49.565504"
217187
},
218-
"Test Function getSalmonInferredStrandedness forward": {
219-
"content": [
220-
{
221-
"inferred_strandedness": "forward",
222-
"forwardFragments": 100.0,
223-
"reverseFragments": 0.0,
224-
"unstrandedFragments": 0.0
225-
}
226-
],
227-
"meta": {
228-
"nf-test": "0.8.4",
229-
"nextflow": "23.10.1"
230-
},
231-
"timestamp": "2024-06-18T14:30:03.301262"
232-
},
233188
"Test Function rsemStarIndexWarn": {
234189
"content": null,
235190
"meta": {

0 commit comments

Comments
 (0)