1
+ #! /usr/bin/env nextflow
2
+
3
+ nextflow. enable. dsl = 2
4
+
5
+
6
+
7
+ // WORKFLOW SPECIFICATION
8
+ // --------------------------------------------------------------- //
9
+ workflow {
10
+
11
+
12
+ // make sure the user provided inputs that exist
13
+ assert params. pb_fastq : " Please provide a PacBio HiFi CCS FASTQ.gz file with the --pb_fastq argument."
14
+ assert file(params. pb_fastq). exists : " Provided path to PacBio FASTQ does not exist."
15
+ assert params. ont_fastq : " Please provide a Oxford Nanopore FASTQ.gz file file with the --ont_fastq argument."
16
+ assert file(params. ont_fastq). exists : " Provided path to Nanopore FASTQ does not exist."
17
+ assert params. ref_fasta : " Please provide a reference FASTA with the --ref_fasta argument."
18
+ assert file(params. ref_fasta). exists : " Provided path to reference FASTA does not exist."
19
+
20
+
21
+ // input channels
22
+ ch_pb_reads = Channel
23
+ .fromPath ( params. pb_fastq )
24
+ .splitFastq ( by : params. split_max )
25
+ .map { fastq -> tuple( file(fastq), file(fastq). getBaseName(), " pacbio" ) }
26
+
27
+ ch_ont_reads = Channel
28
+ .fromPath ( params. ont_fastq )
29
+ .splitFastq ( by : params. split_max )
30
+ .map { fastq -> tuple( file(fastq), file(fastq). getBaseName(), " ont" ) }
31
+
32
+ ch_ref = Channel
33
+ .fromPath ( params. ref_fasta )
34
+
35
+ ch_desired_regions = Channel
36
+ .fromPath ( params. desired_regions )
37
+ .splitCsv( header : true , sep = " \t " )
38
+ .map { row -> tuple( row. samtools_expression, row. file_label, row. description ) }
39
+
40
+
41
+ // Workflow steps
42
+ process MAP_TO_REF (
43
+ ch_pb_reads
44
+ .mix ( ch_ont_reads ),
45
+ ch_ref
46
+ )
47
+
48
+ process EXTRACT_DESIRED_REGIONS (
49
+ MAP_TO_REF.out,
50
+ ch_desired_regions
51
+ )
52
+
53
+ process MERGE_PACBIO_FASTQS (
54
+ MAP_AND_EXTRACT.out
55
+ .filter { x[2 ] == " pacbio" }
56
+ .groupTuple( by : [ 1 , 2 , 3 ] )
57
+ )
58
+
59
+ process MERGE_ONT_FASTQS (
60
+ MAP_AND_EXTRACT.out
61
+ .filter { x[2 ] == " ont" }
62
+ .groupTuple( by : [ 1 , 2 , 3 ] )
63
+ )
64
+
65
+ process RUN_HIFIASM (
66
+ MERGE_PACBIO_FASTQS.out,
67
+ MERGE_ONT_FASTQS.out
68
+ )
69
+
70
+ process CONVERT_CONFIGS_TO_FASTA (
71
+ RUN_HIFIASM.out
72
+ )
73
+
74
+ }
75
+ // --------------------------------------------------------------- //
76
+
77
+
78
+
79
+ // DERIVATIVE PARAMETER SPECIFICATION
80
+ // --------------------------------------------------------------- //
81
+ // Additional parameters that are derived from parameters set in nextflow.config
82
+
83
+ params. extracted = params. results + " /01_extracted_regions"
84
+ params. assembly = params. results " /02_hifiasm_assembly"
85
+
86
+ // --------------------------------------------------------------- //
87
+
88
+
89
+
90
+
91
+ // PROCESS SPECIFICATION
92
+ // --------------------------------------------------------------- //
93
+
94
+ process MAP_TO_REF {
95
+
96
+ /* */
97
+
98
+ tag " ${ basename} , ${ platform} "
99
+ label " map_and_extract"
100
+
101
+ cpus 8
102
+
103
+ input:
104
+ tuple path(fastq), val(basename), val(platform)
105
+ each path(ref_fasta)
106
+
107
+ output:
108
+ path " *.bam"
109
+
110
+ script:
111
+ minimap2_preset = platform == " pacbio" ? " map-hifi" : " map-ont"
112
+ """
113
+ minimap2 -t 6 -L --eqx -ax ${ minimap2_preset} \
114
+ ${ ref_fasta} \
115
+ ${ fastq} \
116
+ | samtools view -Sbt ${ ref_fasta} \
117
+ | samtools sort - -o ${ basename} _{platform}.bam
118
+ """
119
+
120
+ }
121
+
122
+ process EXTRACT_DESIRED_REGIONS {
123
+
124
+ /* */
125
+
126
+ tag " ${ basename} , ${ platform} , ${ file_label} "
127
+ label " map_and_extract"
128
+
129
+ input:
130
+ each path(bam)
131
+ tuple val(expression), val(file_label), val(description)
132
+
133
+ output:
134
+ tuple path(" ${ basename} _${ platform} _${ file_label} .fastq.gz" ), val(basename), val(platform), val(file_label)
135
+
136
+ script:
137
+ basename = file(bam). getSimpleName(). split(" _" )[0 ]
138
+ platform = file(bam). getSimpleName(). split(" _" )[1 ]
139
+ """
140
+ samtools index ${ bam}
141
+ samtools view -b ${ bam} ${ expression} \
142
+ | samtools fastq - \
143
+ | reformat.sh qin=33 int=f in=stdin.fq \
144
+ out=${ basename} _${ platform} _${ file_label} .fastq.gz
145
+ """
146
+
147
+ }
148
+
149
+ process MERGE_PACBIO_FASTQS {
150
+
151
+ /* */
152
+
153
+ tag " ${ basename} , ${ platform} , ${ file_label} "
154
+ publishDir params. extracted, mode: ' copy' , overwrite: true
155
+
156
+ cpus 10
157
+
158
+ input:
159
+ tuple path(" to_merge/*" ), val(basename), val(platform), val(file_label)
160
+
161
+ output:
162
+ tuple path(" ${ basename} _${ platform} _${ file_label} .fastq.gz" ), val(basename), val(platform), val(file_label)
163
+
164
+ script:
165
+ """
166
+ seqkit scat \
167
+ --threads ${ task.cpus} \
168
+ --find-only \
169
+ --out-format fastq
170
+ to_merge/ | gzip -c > ${ basename} _${ platform} _${ file_label} .fastq.gz
171
+ """
172
+
173
+ }
174
+
175
+ process MERGE_ONT_FASTQS {
176
+
177
+ /* */
178
+
179
+ tag " ${ basename} , ${ platform} , ${ file_label} "
180
+ publishDir params. extracted, mode: ' copy' , overwrite: true
181
+
182
+ cpus 10
183
+
184
+ input:
185
+ tuple path(" to_merge/*" ), val(basename), val(platform), val(file_label)
186
+
187
+ output:
188
+ tuple path(" ${ basename} _${ platform} _${ file_label} .fastq.gz" ), val(basename), val(platform), val(file_label)
189
+
190
+ script:
191
+ """
192
+ seqkit scat \
193
+ --threads ${ task.cpus} \
194
+ --find-only \
195
+ --out-format fastq
196
+ to_merge/ | gzip -c > ${ basename} _${ platform} _${ file_label} .fastq.gz
197
+ """
198
+
199
+ }
200
+
201
+ process RUN_HIFIASM {
202
+
203
+ /* */
204
+
205
+ tag " ${ basename} , ${ file_label} "
206
+ publishDir " ${ params.assembly} /${ basename} _${ file_label} " , mode: ' copy' , overwrite: true
207
+
208
+ cpus 8
209
+
210
+ input:
211
+ tuple path(pb_fastq), val(basename), val(platform), val(file_label)
212
+ tuple path(ont_fastq), val(basename), val(platform), val(file_label)
213
+
214
+ output:
215
+ tuple path(" *" ), val(basename), val(platform), val(file_label)
216
+
217
+ script:
218
+ """
219
+ hifiasm -o ${ basename} _${ file_label} -t6 --ul ${ ont_fastq} $${ pb_fastq}
220
+ """
221
+
222
+ }
223
+
224
+ process CONVERT_CONFIGS_TO_FASTA {
225
+
226
+ /* */
227
+
228
+ tag " ${ basename} , ${ platform} , ${ file_label} "
229
+ label " map_and_extract"
230
+ publishDir params. assembly, mode: ' copy' , overwrite: true
231
+
232
+ cpus 8
233
+
234
+ input:
235
+ tuple path(" hifiasm_files/*" ), val(basename), val(platform), val(file_label)
236
+
237
+ output:
238
+ path " ${ basename} _${ file_label} .p_contigs.fasta"
239
+
240
+ shell:
241
+ '''
242
+ awk '/^S/{print ">"\$ 2"\n "\$ 3}' hifiasm_files/!{basename}_!{file_label}.bp.p_ctg.gfa \
243
+ | fold > ${basename}_${file_label}.p_contigs.fasta
244
+ '''
245
+
246
+ }
247
+
248
+ // --------------------------------------------------------------- //
0 commit comments