8
8
import matplotlib .pyplot as plt
9
9
import numpy as np
10
10
import editdistance as ed
11
+ from rich import print
11
12
12
13
def parseArgs (args = None ):
13
14
parser = argparse .ArgumentParser (description = "Process RELACS reads by moving a barcode into the header and writing output to per-barcode files." )
@@ -212,7 +213,7 @@ def writePaired(read1, read2, of, bc, bcLen, args, doTrim=True):
212
213
return bc
213
214
214
215
215
- def processPaired (args , sDict , bcLen , read1 , read2 , bc_dict ):
216
+ def processPaired (args , sDict , bcLen , read1 , read2 , bc_dict , ori_rDict ):
216
217
f1_ = subprocess .Popen ("gunzip -c {}" .format (read1 ), shell = True , stdout = subprocess .PIPE , stderr = subprocess .PIPE )
217
218
f2_ = subprocess .Popen ("gunzip -c {}" .format (read2 ), shell = True , stdout = subprocess .PIPE , stderr = subprocess .PIPE )
218
219
f1 = f1_ .stdout
@@ -238,7 +239,7 @@ def processPaired(args, sDict, bcLen, read1, read2, bc_dict):
238
239
bc_dict [bc ] += 1
239
240
else :
240
241
false_bc += 1
241
- plot_bc_occurance (read1 , bc_dict , false_bc , args .output )
242
+ plot_bc_occurance (read1 , bc_dict , false_bc , args .output , ori_rDict )
242
243
f1 .close ()
243
244
f2 .close ()
244
245
@@ -259,9 +260,8 @@ def processSingle(args, sDict, bcLen, read1):
259
260
260
261
261
262
def wrapper (foo ):
262
- d , args , sDict , bcLen ,bc_dict = foo
263
- print ("Processing library {}" .format (d ))
264
-
263
+ d , args , sDict , bcLen , bc_dict = foo
264
+ print (f"Pool runner: sample { d } with bcLen { bcLen } " )
265
265
# Make the output directories
266
266
try :
267
267
os .makedirs ("{}/{}" .format (args .output , d ))
@@ -288,7 +288,7 @@ def wrapper(foo):
288
288
v = 'unknown'
289
289
oDict [k ] = [subprocess .Popen (['gzip' , '-c' ], stdout = open ('{}/{}/{}_R1.fastq.gz' .format (args .output , d , v ), "wb" ), stdin = subprocess .PIPE , bufsize = 0 ).stdin ,
290
290
subprocess .Popen (['gzip' , '-c' ], stdout = open ('{}/{}/{}_R2.fastq.gz' .format (args .output , d , v ), "wb" ), stdin = subprocess .PIPE , bufsize = 0 ).stdin ]
291
- processPaired (args , oDict , bcLen , R1 , R2 , bc_dict )
291
+ processPaired (args , oDict , bcLen , R1 , R2 , bc_dict , sDict )
292
292
else :
293
293
for k , v in sDict .items ():
294
294
oDict [k ] = [subprocess .Popen (['gzip' , '-c' ], stdout = open ('{}/{}/{}_R1.fastq.gz' .format (args .output , d , v ), "wb" ), stdin = subprocess .PIPE , bufsize = 0 ).stdin ]
@@ -299,16 +299,17 @@ def wrapper(foo):
299
299
processSingle (args , oDict , bcLen , R1 )
300
300
return bc_dict
301
301
302
- def plot_bc_occurance (R1 , bc_dict , false_bc , output_path ):
302
+ def plot_bc_occurance (R1 , bc_dict , false_bc , output_path , sDict ):
303
303
total_sum = false_bc
304
304
for k ,v in bc_dict .items ():
305
305
total_sum += v
306
306
307
307
percentages = [float (false_bc / total_sum )* 100 ]
308
308
x_ticks = ["false_bc" ]
309
- for k ,v in bc_dict .items ():
309
+
310
+ for k ,v in sorted (bc_dict .items ()):
310
311
percentages .append (float (v / total_sum )* 100 )
311
- x_ticks .append (str (k ))
312
+ x_ticks .append (str (k ) + '-' + sDict [ str ( k )] )
312
313
313
314
percentages = np .asarray (percentages )
314
315
bc_mean = np .mean (percentages [1 :])
@@ -322,11 +323,11 @@ def plot_bc_occurance(R1, bc_dict, false_bc, output_path):
322
323
exp_value = 100 / (len (x )- 1 )
323
324
ax .axhline (y = exp_value , linestyle = "--" , linewidth = 0.5 , color = 'k' )
324
325
xx = [- 1 ]+ list (range (len (x )))+ [len (x )+ 1 ]
325
- print (xx )
326
326
ax .fill_between (xx , [bc_mean + bc_std ]* len (xx ), [bc_mean - bc_std ]* len (xx ), color = 'dimgrey' , alpha = 0.2 , zorder = 3 )
327
327
plt .ylabel ("% of total reads" )
328
328
sample_name = R1 .split ("_R1" )[0 ]
329
329
fig_path_name = os .path .join (output_path ,sample_name + "_fig.png" )
330
+ plt .tight_layout ()
330
331
plt .savefig (fig_path_name , pad_inches = 0.6 , bbox_inches = 'tight' )
331
332
332
333
def main (args = None ):
0 commit comments