Skip to content

Commit f466f61

Browse files
authored
Merge pull request #163 from mvdbeek/py3_lumpy
Make all lumpy python scripts python2.7 and python3 compatible
2 parents 6f679dd + 5eb03e9 commit f466f61

13 files changed

+130
-135
lines changed

.travis.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
language: python
22
python:
33
- "2.7"
4-
script:
4+
- "3.5"
5+
script:
56
- source activate travis
67
- ./scripts/lumpyexpress -h
78
- python scripts/cnvanator_to_bedpes.py --cnvkit -b 100 --del_o delo2 --dup_o dupo2 -c data/example.cns

scripts/bedpe_sort.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,4 +54,4 @@
5454
for c in order:
5555
if c in B:
5656
for l in sorted(B[c], key=itemgetter(0)):
57-
print l[1]
57+
print(l[1])

scripts/check_sorting.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,15 @@
55
import numpy as np
66

77
if len(sys.argv) < 2:
8-
print 'usage:' + sys.argv[0] + ' <bam 1> <bam 2> <..>'
8+
print('usage:' + sys.argv[0] + ' <bam 1> <bam 2> <..>')
99
exit(1)
1010

1111
order = []
1212

1313

1414
for i in range(1,len(sys.argv)):
1515
bam_file = sys.argv[i]
16-
print bam_file
16+
print(bam_file)
1717

1818
p = subprocess.Popen(\
1919
['samtools', 'view', '-H', bam_file], \
@@ -48,19 +48,19 @@
4848
curr_chrom_index = order.index(chrom)
4949
curr_pos = -1
5050
elif order.index(chrom) < curr_chrom_index:
51-
print 'out of order:\t' + l + '\toccurred after\t' + \
52-
order[curr_chrom_index] + '\t' + str(curr_pos)
51+
print('out of order:\t' + l + '\toccurred after\t' + \
52+
order[curr_chrom_index] + '\t' + str(curr_pos))
5353
broke = True
5454
break
5555

5656
if pos > curr_pos:
5757
curr_pos = pos
5858
elif pos < curr_pos:
59-
print 'out of order:\t' + l + '\toccurred after\t' + \
60-
order[curr_chrom_index] + '\t' + str(curr_pos)
59+
print('out of order:\t' + l + '\toccurred after\t' + \
60+
order[curr_chrom_index] + '\t' + str(curr_pos))
6161
broke = True
6262
break
6363
if not broke:
64-
print "in order"
64+
print("in order")
6565

6666

scripts/extract-sites.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from __future__ import print_function
1+
22
import sys
33
import gzip
44
import collections

scripts/extractSplitReads_BwaMem

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ def extractSplitsFromBwaMem(inFile,numSplits,includeDups,minNonOverlap):
1414
for line in data:
1515
split = 0
1616
if line[0] == '@':
17-
print line.strip()
17+
print(line.strip())
1818
continue
1919
samList = line.strip().split('\t')
2020
sam = SAM(samList)
@@ -29,10 +29,10 @@ def extractSplitsFromBwaMem(inFile,numSplits,includeDups,minNonOverlap):
2929
mateFlag = int(0)
3030
if mate[2]=="-": mateFlag = int(16)
3131
if split:
32-
read1 = sam.flag & 64
32+
read1 = sam.flag & 64
3333
if read1 == 64: tag = "_1"
3434
else: tag="_2"
35-
samList[0] = sam.query + tag
35+
samList[0] = sam.query + tag
3636
readCigar = sam.cigar
3737
readCigarOps = extractCigarOps(readCigar,sam.flag)
3838
readQueryPos = calcQueryPosFromCigar(readCigarOps)
@@ -43,7 +43,7 @@ def extractSplitsFromBwaMem(inFile,numSplits,includeDups,minNonOverlap):
4343
nonOverlap2 = 1 + mateQueryPos.qePos - mateQueryPos.qsPos - overlap
4444
mno = min(nonOverlap1, nonOverlap2)
4545
if mno >= minNonOverlap:
46-
print "\t".join(samList)
46+
print("\t".join(samList))
4747

4848
#--------------------------------------------------------------------------------------------------
4949
# functions
@@ -82,7 +82,7 @@ class SAM (object):
8282
return int(tagParts[2],16);
8383
return tagParts[2];
8484
return None;
85-
85+
8686
#-----------------------------------------------
8787
cigarPattern = '([0-9]+[MIDNSHP])'
8888
cigarSearch = re.compile(cigarPattern)
@@ -121,9 +121,9 @@ def calcQueryPosFromCigar(cigarOps):
121121
qsPos = 0
122122
qePos = 0
123123
qLen = 0
124-
# if first op is a H, need to shift start position
125-
# the opPosition counter sees if the for loop is looking at the first index of the cigar object
126-
opPosition = 0
124+
# if first op is a H, need to shift start position
125+
# the opPosition counter sees if the for loop is looking at the first index of the cigar object
126+
opPosition = 0
127127
for cigar in cigarOps:
128128
if opPosition == 0 and (cigar.op == 'H' or cigar.op == 'S'):
129129
qsPos += cigar.length
@@ -164,40 +164,40 @@ def calcQueryOverlap(s1,e1,s2,e2):
164164

165165
class Usage(Exception):
166166
def __init__(self, msg):
167-
self.msg = msg
167+
self.msg = msg
168168

169169
def main():
170-
170+
171171
usage = """%prog -i <file>
172172
173173
extractSplitReads_BwaMem v0.1.0
174-
Author: Ira Hall
174+
Author: Ira Hall
175175
Description: Get split-read alignments from bwa-mem in lumpy compatible format. Ignores reads marked as duplicates.
176-
Works on read or position sorted SAM input. Tested on bwa mem v0.7.5a-r405.
176+
Works on read or position sorted SAM input. Tested on bwa mem v0.7.5a-r405.
177177
"""
178178
parser = OptionParser(usage)
179-
180-
parser.add_option("-i", "--inFile", dest="inFile",
179+
180+
parser.add_option("-i", "--inFile", dest="inFile",
181181
help="A SAM file or standard input (-i stdin).",
182182
metavar="FILE")
183-
parser.add_option("-n", "--numSplits", dest="numSplits", default=2, type = "int",
183+
parser.add_option("-n", "--numSplits", dest="numSplits", default=2, type = "int",
184184
help="The maximum number of split-read mappings to allow per read. Reads with more are excluded. Default=2",
185185
metavar="INT")
186-
parser.add_option("-d", "--includeDups", dest="includeDups", action="store_true",default=0,
186+
parser.add_option("-d", "--includeDups", dest="includeDups", action="store_true",default=0,
187187
help="Include alignments marked as duplicates. Default=False")
188-
parser.add_option("-m", "--minNonOverlap", dest="minNonOverlap", default=20, type = "int",
188+
parser.add_option("-m", "--minNonOverlap", dest="minNonOverlap", default=20, type = "int",
189189
help="minimum non-overlap between split alignments on the query (default=20)",
190190
metavar="INT")
191191
(opts, args) = parser.parse_args()
192192
if opts.inFile is None:
193193
parser.print_help()
194-
print
194+
print()
195195
else:
196196
try:
197197
extractSplitsFromBwaMem(opts.inFile, opts.numSplits, opts.includeDups, opts.minNonOverlap)
198198
except IOError as err:
199199
sys.stderr.write("IOError " + str(err) + "\n");
200200
return
201201
if __name__ == "__main__":
202-
sys.exit(main())
203-
202+
sys.exit(main())
203+

scripts/get_coverages.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import numpy as np
66

77
if len(sys.argv) < 2:
8-
print 'usage:' + sys.argv[0] + ' <in bam 1> <in bam 2> <..>'
8+
print('usage:' + sys.argv[0] + ' <in bam 1> <in bam 2> <..>')
99
exit(1)
1010

1111
for i in range(1,len(sys.argv)):
@@ -54,14 +54,14 @@
5454
for l in f:
5555
a = l.rstrip().split('\t')
5656
if float(a[3]) > 0:
57-
C.append(float(a[3]))
57+
C.append(float(a[3]))
5858
W.append((float(a[2])-float(a[1]))/total_len)
5959
min_c = min(C)
6060
max_c = max(C)
6161
mean_c = np.average(C,weights=W)
6262
stdev_c = np.std(C)
63-
print coverage_file + \
63+
print(coverage_file + \
6464
'\tmin:' + str(min_c) + \
6565
'\tmax:' + str(max_c) + \
66-
'\tmean(non-zero):' + str(mean_c)
66+
'\tmean(non-zero):' + str(mean_c))
6767
f.close()

scripts/get_exclude_regions.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import numpy as np
66

77
if len(sys.argv) < 3:
8-
print 'usage:' + sys.argv[0] + ' <max> <out file> <in bam 1> <in bam 2> <..>'
8+
print('usage:' + sys.argv[0] + ' <max> <out file> <in bam 1> <in bam 2> <..>')
99
exit(1)
1010

1111
max_c = int(sys.argv[1])
@@ -24,7 +24,7 @@
2424
o.write(l)
2525
f.close()
2626
o.close()
27-
27+
2828
p = subprocess.Popen(\
2929
'cat .exclude.tmp | ' \
3030
'sort -S 20G -k1,1 -k2,2n | ' \

scripts/l_bp.py

Lines changed: 20 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import sys
2-
from sets import Set
32
import re
43

54
def find_all(a_str, sub):
@@ -34,7 +33,7 @@ def parse_vcf(vcf_file_name, vcf_lines, vcf_headers, add_sname=True):
3433
A[7] += ';' + 'SNAME=' + ','.join(samples)
3534
l = '\t'.join(A)
3635

37-
36+
3837
if 'SVTYPE=BND' in A[7]:
3938
m = re.search(r"(\[|\])(.*)(\[|\])",A[4])
4039
o_chr,o_pos = m.group(2).split(':')
@@ -44,13 +43,13 @@ def parse_vcf(vcf_file_name, vcf_lines, vcf_headers, add_sname=True):
4443
pos_s = A[7].find('++:')
4544

4645
if neg_s > 0:
47-
neg_e = neg_s + A[7][neg_s:].find(';')
46+
neg_e = neg_s + A[7][neg_s:].find(';')
4847
pre=A[7][:neg_s]
4948
mid=A[7][neg_s:neg_e]
5049
post=A[7][neg_e:]
5150
A[7] = pre + '++:0,' + mid + post
5251
else:
53-
pos_e = pos_s + A[7][pos_s:].find(';')
52+
pos_e = pos_s + A[7][pos_s:].find(';')
5453
pre=A[7][:pos_s]
5554
mid=A[7][pos_s:pos_e]
5655
post=A[7][pos_e:]
@@ -91,7 +90,7 @@ def split_v(l):
9190

9291
start_r = pos_r + int(m['CIEND'].split(',')[0])
9392
end_r = pos_r + int(m['CIEND'].split(',')[1])
94-
93+
9594
strands = m['STRANDS']
9695

9796
return [m['SVTYPE'],chr_l,chr_r,strands,start_l,end_l,start_r,end_r,m]
@@ -131,7 +130,7 @@ def header_line_cmp(l1, l2):
131130
return -1
132131

133132
if l2[:12] == '##fileformat':
134-
return 1
133+
return 1
135134

136135
# make sure #CHROM ... is last
137136
if l1[1] != '#':
@@ -140,14 +139,14 @@ def header_line_cmp(l1, l2):
140139
return -1
141140

142141
if l1.find('=') == -1:
143-
return -1
142+
return -1
144143
if l2.find('=') == -1:
145144
return 1
146145

147146
h1 = l1[:l1.find('=')]
148147
h2 = l2[:l2.find('=')]
149148
if h1 not in order:
150-
return -1
149+
return -1
151150
if h2 not in order:
152151
return 1
153152
return cmp(order.index(h1),order.index(h2))
@@ -166,10 +165,10 @@ class breakpoint:
166165
sv_type = ''
167166

168167
strands = ''
169-
168+
170169
l = ''
171170

172-
def __init__(self,
171+
def __init__(self,
173172
l,
174173
percent_slop=0,
175174
fixed_slop=0):
@@ -182,7 +181,7 @@ def __init__(self,
182181
self.start_l,\
183182
self.end_l,\
184183
self.start_r, \
185-
self.end_r,
184+
self.end_r,
186185
m] = split_v(l)
187186

188187
self.p_l = [float(x) for x in m['PRPOS'].split(',')]
@@ -218,7 +217,7 @@ def __init__(self,
218217
self.p_r = [float(x)/sum_p_r for x in new_p_r]
219218

220219
# old_l = float(self.end_l - self.start_l + 1)
221-
220+
222221
# self.start_l = max(0,self.start_l-l_slop)
223222
# self.end_l = self.end_l+l_slop
224223

@@ -253,7 +252,7 @@ def __str__(self):
253252
self.end_l,\
254253
self.chr_r,\
255254
self.start_r, \
256-
self.end_r,
255+
self.end_r,
257256
self.sv_type,\
258257
self.strands,\
259258
self.p_l,
@@ -304,7 +303,7 @@ def trim(A):
304303
if A[i] == 0:
305304
clip_end += 1
306305
else:
307-
break
306+
break
308307
return [clip_start, clip_end]
309308

310309

@@ -338,11 +337,11 @@ def align_intervals(I):
338337
new_i = [0]*n + new_i
339338

340339
if i[END] < end:
341-
n = end - i[END]
340+
n = end - i[END]
342341
new_i = new_i + [0]*n
343-
342+
344343
new_I.append(new_i)
345-
344+
346345
return [start, end, new_I]
347346

348347

@@ -386,12 +385,12 @@ def bron_kerbosch(G, R, P, X):
386385
if (len(P) == 0) and (len(X) == 0):
387386
yield R
388387
for v in P:
389-
V = Set([v])
390-
N = Set([g[0] for g in G[v].edges])
391-
388+
V = set([v])
389+
N = set([g[0] for g in G[v].edges])
390+
392391
for r in bron_kerbosch(G, \
393392
R.union(V), \
394-
P.intersection(N),
393+
P.intersection(N),
395394
X.intersection(N)):
396395
yield r
397396

0 commit comments

Comments
 (0)