@@ -49,7 +49,7 @@ def checkDuplicatedLabels(data):
49
49
for sample in data :
50
50
num_bar += len (data [sample ])
51
51
for barcode in data [sample ]:
52
- labels .add (data [sample ][barcode ])
52
+ labels .add (data [sample ][barcode ][ 0 ] )
53
53
num_lab = len (labels )
54
54
55
55
if num_bar == num_lab :
@@ -71,13 +71,16 @@ def readSampleTable(sampleTable):
71
71
elem = line .rstrip ().split ("\t " )
72
72
if len (elem ) < 3 :
73
73
continue
74
-
75
- sample , barcode , label = elem
74
+ if len (elem ) == 3 :
75
+ sample , barcode , label = elem
76
+ bc_pos = ""
77
+ elif len (elem ) == 4 :
78
+ sample , bc_pos , barcode , label = elem
76
79
# sanitize label
77
80
label = label .replace (' ' , '_' )
78
81
if sample not in d :
79
82
d [sample ] = dict ()
80
- d [sample ][barcode ] = label
83
+ d [sample ][barcode ] = [ label , bc_pos ]
81
84
82
85
if barcode != 'default' and len (barcode ) > bcLen :
83
86
bcLen = len (barcode )
@@ -261,6 +264,7 @@ def processSingle(args, sDict, bcLen, read1):
261
264
262
265
def wrapper (foo ):
263
266
d , args , sDict , bcLen , bc_dict = foo
267
+
264
268
print (f"Pool runner: sample { d } with bcLen { bcLen } " )
265
269
# Make the output directories
266
270
try :
@@ -281,21 +285,21 @@ def wrapper(foo):
281
285
oDict = dict ()
282
286
if R2 is not None :
283
287
for k , v in sDict .items ():
284
- oDict [k ] = [subprocess .Popen (['gzip' , '-c' ], stdout = open ('{}/{}/{}_R1.fastq.gz' .format (args .output , d , v ), "wb" ), stdin = subprocess .PIPE , bufsize = 0 ).stdin ,
285
- subprocess .Popen (['gzip' , '-c' ], stdout = open ('{}/{}/{}_R2.fastq.gz' .format (args .output , d , v ), "wb" ), stdin = subprocess .PIPE , bufsize = 0 ).stdin ]
288
+ oDict [k ] = [subprocess .Popen (['gzip' , '-c' ], stdout = open ('{}/{}/{}_R1.fastq.gz' .format (args .output , d , v [ 0 ] ), "wb" ), stdin = subprocess .PIPE , bufsize = 0 ).stdin ,
289
+ subprocess .Popen (['gzip' , '-c' ], stdout = open ('{}/{}/{}_R2.fastq.gz' .format (args .output , d , v [ 0 ] ), "wb" ), stdin = subprocess .PIPE , bufsize = 0 ).stdin ]
286
290
if 'default' not in oDict :
287
291
k = 'default'
288
- v = 'unknown'
289
- oDict [k ] = [subprocess .Popen (['gzip' , '-c' ], stdout = open ('{}/{}/{}_R1.fastq.gz' .format (args .output , d , v ), "wb" ), stdin = subprocess .PIPE , bufsize = 0 ).stdin ,
290
- subprocess .Popen (['gzip' , '-c' ], stdout = open ('{}/{}/{}_R2.fastq.gz' .format (args .output , d , v ), "wb" ), stdin = subprocess .PIPE , bufsize = 0 ).stdin ]
292
+ v = [ 'unknown' , '' ]
293
+ oDict [k ] = [subprocess .Popen (['gzip' , '-c' ], stdout = open ('{}/{}/{}_R1.fastq.gz' .format (args .output , d , v [ 0 ] ), "wb" ), stdin = subprocess .PIPE , bufsize = 0 ).stdin ,
294
+ subprocess .Popen (['gzip' , '-c' ], stdout = open ('{}/{}/{}_R2.fastq.gz' .format (args .output , d , v [ 0 ] ), "wb" ), stdin = subprocess .PIPE , bufsize = 0 ).stdin ]
291
295
processPaired (args , oDict , bcLen , R1 , R2 , bc_dict , sDict )
292
296
else :
293
297
for k , v in sDict .items ():
294
- oDict [k ] = [subprocess .Popen (['gzip' , '-c' ], stdout = open ('{}/{}/{}_R1.fastq.gz' .format (args .output , d , v ), "wb" ), stdin = subprocess .PIPE , bufsize = 0 ).stdin ]
298
+ oDict [k ] = [subprocess .Popen (['gzip' , '-c' ], stdout = open ('{}/{}/{}_R1.fastq.gz' .format (args .output , d , v [ 0 ] ), "wb" ), stdin = subprocess .PIPE , bufsize = 0 ).stdin ]
295
299
if 'default' not in oDict :
296
300
k = 'default'
297
301
v = 'unknown'
298
- oDict [k ] = [subprocess .Popen (['gzip' , '-c' ], stdout = open ('{}/{}/{}_R1.fastq.gz' .format (args .output , d , v ), "wb" ), stdin = subprocess .PIPE , bufsize = 0 ).stdin ]
302
+ oDict [k ] = [subprocess .Popen (['gzip' , '-c' ], stdout = open ('{}/{}/{}_R1.fastq.gz' .format (args .output , d , v [ 0 ] ), "wb" ), stdin = subprocess .PIPE , bufsize = 0 ).stdin ]
299
303
processSingle (args , oDict , bcLen , R1 )
300
304
return bc_dict
301
305
@@ -308,8 +312,11 @@ def plot_bc_occurance(R1, bc_dict, false_bc, output_path, sDict):
308
312
x_ticks = ["false_bc" ]
309
313
310
314
for k ,v in sorted (bc_dict .items ()):
311
- percentages .append (float (v / total_sum )* 100 )
312
- x_ticks .append (str (k ) + '-' + sDict [str (k )])
315
+ percentages .append (float (v / total_sum )* 100 )
316
+ if sDict [str (k )][1 ] == '' :
317
+ x_ticks .append (str (k ) + ' ' + sDict [str (k )][0 ])
318
+ else :
319
+ x_ticks .append (sDict [str (k )][1 ] + ' ' + sDict [str (k )][0 ])
313
320
314
321
percentages = np .asarray (percentages )
315
322
bc_mean = np .mean (percentages [1 :])
0 commit comments