@@ -602,9 +602,9 @@ def sequencetyper(self):
602
602
sample [self .analysistype ].mismatchestosequencetype = 'NA'
603
603
dotter ()
604
604
else :
605
- sample [self .analysistype ].sequencetype = 'NA'
606
605
sample [self .analysistype ].matchestosequencetype = 'NA'
607
606
sample [self .analysistype ].mismatchestosequencetype = 'NA'
607
+ sample [self .analysistype ].sequencetype = 'NA'
608
608
609
609
def reprofiler (self , header , genome , sample ):
610
610
# reprofiler(numGenes, profileFile, geneList, genome)
@@ -620,54 +620,55 @@ def reprofiler(self, header, genome, sample):
620
620
# Find the last profile entry in the dictionary of profiles
621
621
# Opens uses the command line tool 'tail' to look at the last line of the file (-1). This last line
622
622
# is split on tabs, and only the first entry (the sequence type number) is captured
623
- if os .path .isfile (sample [self .analysistype ].supplementalprofile ):
624
- try :
625
- lastentry = int (subprocess .check_output (['tail' , '-1' , sample [self .analysistype ].supplementalprofile ])
626
- .split ("\t " )[0 ]) + 1
627
- except ValueError :
623
+ if sample [self .analysistype ].supplementalprofile != 'NA' :
624
+ if os .path .isfile (sample [self .analysistype ].supplementalprofile ):
625
+ try :
626
+ lastentry = int (
627
+ subprocess .check_output (['tail' , '-1' , sample [self .analysistype ].supplementalprofile ])
628
+ .split ("\t " )[0 ]) + 1
629
+ except ValueError :
630
+ lastentry = 1000000
631
+ else :
632
+ open (sample [self .analysistype ].supplementalprofile , 'wb' ).close ()
628
633
lastentry = 1000000
629
- else :
630
- open (sample [self .analysistype ].supplementalprofile , 'wb' ).close ()
631
- lastentry = 1000000
632
- # As there can be multiple profiles in MLSTSeqType, this loop only needs to be performed once.
633
- seqcount = 0
634
- # Go through the sequence types
635
- try :
636
- sequencetype = self .mlstseqtype [genome ].keys ()[0 ]
637
- except IndexError :
638
- sequencetype = ''
639
- seqcount = 1
640
- # Only do this once
641
- if seqcount == 0 :
642
- # Set the :newprofile string to start with the new profile name (e.g. 1000000_CFIA)
643
- newprofile = str (lastentry )
644
- # The number of matches to the reference profile
645
- nummatches = self .mlstseqtype [genome ][sequencetype ].keys ()[0 ]
646
- for sample in self .metadata :
647
- if sample .name == genome :
648
- # The genes in geneList - should be in the correct order
649
- for gene in sorted (sample [self .analysistype ].allelenames ):
650
- # The allele for each gene in the query genome
651
- allele = self .mlstseqtype [genome ][sequencetype ][nummatches ][gene ].keys ()[0 ]
652
- # Append the allele to newprofile
653
- newprofile += '\t {}' .format (allele )
654
- # Add the MLST results for the query genome as well as the new profile data
655
- # to resultProfile
656
- self .resultprofile [genome ]['{}(new)' .format (str (lastentry ))][header ][gene ][allele ] = \
657
- self .mlstseqtype [genome ][sequencetype ][nummatches ][gene ][allele ].values ()[0 ]
658
- seqcount += 1
659
- sample [self .analysistype ].mismatchestosequencetype = 'NA'
660
- # sample[self.analysistype].sequencetype = '{}_CFIA'.format(str(lastentry))
661
- sample [self .analysistype ].matchestosequencetype = header
662
- # Only perform the next loop if :newprofile exists
663
- if newprofile :
664
- # Open the profile file to append
665
- print sample [self .analysistype ].supplementalprofile
666
- with open (sample [self .analysistype ].supplementalprofile , 'ab' ) as appendfile :
667
- # Append the new profile to the end of the profile file
668
- appendfile .write ('{}\n ' .format (newprofile ))
669
- # Re-run profiler with the updated files
670
- self .profiler ()
634
+ # As there can be multiple profiles in MLSTSeqType, this loop only needs to be performed once.
635
+ seqcount = 0
636
+ # Go through the sequence types
637
+ try :
638
+ sequencetype = self .mlstseqtype [genome ].keys ()[0 ]
639
+ except IndexError :
640
+ sequencetype = ''
641
+ seqcount = 1
642
+ # Only do this once
643
+ if seqcount == 0 :
644
+ # Set the :newprofile string to start with the new profile name (e.g. 1000000_CFIA)
645
+ newprofile = str (lastentry )
646
+ # The number of matches to the reference profile
647
+ nummatches = self .mlstseqtype [genome ][sequencetype ].keys ()[0 ]
648
+ for sample in self .metadata :
649
+ if sample .name == genome :
650
+ # The genes in geneList - should be in the correct order
651
+ for gene in sorted (sample [self .analysistype ].allelenames ):
652
+ # The allele for each gene in the query genome
653
+ allele = self .mlstseqtype [genome ][sequencetype ][nummatches ][gene ].keys ()[0 ]
654
+ # Append the allele to newprofile
655
+ newprofile += '\t {}' .format (allele )
656
+ # Add the MLST results for the query genome as well as the new profile data
657
+ # to resultProfile
658
+ self .resultprofile [genome ]['{}(new)' .format (str (lastentry ))][header ][gene ][allele ] = \
659
+ self .mlstseqtype [genome ][sequencetype ][nummatches ][gene ][allele ].values ()[0 ]
660
+ seqcount += 1
661
+ sample [self .analysistype ].mismatchestosequencetype = 'NA'
662
+ # sample[self.analysistype].sequencetype = '{}_CFIA'.format(str(lastentry))
663
+ sample [self .analysistype ].matchestosequencetype = header
664
+ # Only perform the next loop if :newprofile exists
665
+ if newprofile :
666
+ # Open the profile file to append
667
+ with open (sample [self .analysistype ].supplementalprofile , 'ab' ) as appendfile :
668
+ # Append the new profile to the end of the profile file
669
+ appendfile .write ('{}\n ' .format (newprofile ))
670
+ # Re-run profiler with the updated files
671
+ self .profiler ()
671
672
672
673
def reporter (self ):
673
674
""" Parse the results into a report"""
@@ -1094,13 +1095,13 @@ def organismchooser(self):
1094
1095
if not self .allelepath :
1095
1096
# If the name of the organism to analyse was provided
1096
1097
assert self .organism , 'Need to provide either a path to the alleles or an organism name'
1098
+ # If the -g flag was included, download the appropriate MLST scheme for the organism
1099
+ if self .getmlst and self .organism :
1100
+ self .getmlsthelper ()
1097
1101
self .allelepath = '{}{}' .format (self .path , self .organism )
1098
1102
assert os .path .isdir (self .allelepath ), 'Cannot find {}. Please ensure that the folder exists, or ' \
1099
1103
'use the -g option to download the {} MLST scheme' \
1100
1104
.format (self .allelepath , self .organism )
1101
- # If the -g flag was included, download the appropriate MLST scheme for the organism
1102
- if self .getmlst and self .organism :
1103
- self .getmlsthelper ()
1104
1105
# Tries to get the organism name for the folder containing the alleles
1105
1106
self .organism = self .organism if self .organism else os .path .split (self .allelepath )[- 1 ]
1106
1107
if self .cleardatabases :
@@ -1134,8 +1135,6 @@ def organismchooser(self):
1134
1135
self .combinedalleles = glob ('{}/*.fasta' .format (self .allelepath ))
1135
1136
# Get the .txt profile file name and path into a variable
1136
1137
self .profile = glob ('{}/*.txt' .format (self .allelepath ))
1137
- # Add the appropriate variables to the metadata object for each sample
1138
- self .scheme = self .scheme if self .scheme else self .analysistype
1139
1138
for sample in self .samples :
1140
1139
sample [self .analysistype ].alleles = [os .path .split (x )[1 ].split ('.' )[0 ] for x in self .alleles ]
1141
1140
sample [self .analysistype ].allelenames = [os .path .split (x )[1 ].split ('.' )[0 ] for x in self .alleles ]
@@ -1155,7 +1154,8 @@ def getmlsthelper(self):
1155
1154
# As there are multiple profiles for certain organisms, this dictionary has the schemes I use as values
1156
1155
organismdictionary = {'Escherichia' : 'Escherichia coli#1' ,
1157
1156
'Vibrio' : 'Vibrio parahaemolyticus' ,
1158
- 'Campylobacter' : 'Campylobacter jejuni' }
1157
+ 'Campylobacter' : 'Campylobacter jejuni' ,
1158
+ 'Listeria' : 'Listeria monocytogenes' }
1159
1159
# rMLST alleles cannot be fetched in the same way
1160
1160
if self .scheme .lower () != 'rmlst' :
1161
1161
# Allow for a genus not in the dictionary being specified
@@ -1267,7 +1267,7 @@ def __init__(self):
1267
1267
assert os .path .isdir (self .referenceprofilepath ), 'Cannot find {}. Please double check that you ' \
1268
1268
'provided the proper path to the reference profile ' \
1269
1269
'folder' .format (self .referenceprofilepath )
1270
- self .scheme = args .scheme
1270
+ self .scheme = args .scheme if args . scheme else args . type
1271
1271
self .organism = args .organism
1272
1272
self .updateprofile = args .updateprofilefalse
1273
1273
self .updateallele = args .updateallelefalse
@@ -1322,8 +1322,8 @@ class PipelineInit(object):
1322
1322
def strainer (self ):
1323
1323
from accessoryFunctions import GenObject
1324
1324
for sample in self .runmetadata .samples :
1325
+ setattr (sample , self .analysistype , GenObject ())
1325
1326
if sample .general .bestassemblyfile != 'NA' :
1326
- setattr (sample , self .analysistype , GenObject ())
1327
1327
if self .analysistype .lower () == 'rmlst' :
1328
1328
# Run the allele updater method
1329
1329
updatecall , allelefolder = getrmlsthelper (self .referencefilepath , self .updatermlst , self .start )
@@ -1351,14 +1351,14 @@ def strainer(self):
1351
1351
sample [self .analysistype ].combinedalleles = self .combinedalleles
1352
1352
sample [self .analysistype ].supplementalprofile = self .supplementalprofile
1353
1353
else :
1354
- setattr (sample , self .analysistype , GenObject ())
1355
1354
# Set the metadata file appropriately
1356
1355
sample [self .analysistype ].alleles = 'NA'
1357
1356
sample [self .analysistype ].allelenames = 'NA'
1358
1357
sample [self .analysistype ].profile = 'NA'
1359
1358
sample [self .analysistype ].analysistype = 'NA'
1360
1359
sample [self .analysistype ].reportdir = 'NA'
1361
1360
sample [self .analysistype ].combinedalleles = 'NA'
1361
+ sample [self .analysistype ].supplementalprofile = 'NA'
1362
1362
1363
1363
def __init__ (self , inputobject , analysistype ):
1364
1364
self .runmetadata = inputobject .runmetadata
0 commit comments