Skip to content

Commit 3cc809c

Browse files
committed
script to produce msa and hmm in the format fDOG-Assembly requires from a fasta file
1 parent ef5acb7 commit 3cc809c

File tree

1 file changed

+102
-0
lines changed

1 file changed

+102
-0
lines changed

fdog/makeCoreGroupFromFasta.py

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
# -*- coding: utf-8 -*-
2+
3+
#######################################################################
4+
# Copyright (C) 2021 Hannah Muelbaier
5+
#
6+
# This script is used to prepare the core group used as input for fDOG-Assembly from a fasta file of an ortholog group.
7+
#
8+
# This script is distributed in the hope that it will be useful,
9+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11+
# GNU General Public License <http://www.gnu.org/licenses/> for
12+
# more details
13+
#
14+
15+
#
16+
#######################################################################
17+
18+
############################ imports ##################################
19+
import os
20+
import os.path
21+
import sys
22+
import argparse
23+
import fdog.libs.alignment as align_fn
24+
import fdog.libs.zzz as general_fn
25+
26+
def check_fasta(file):
27+
nHeader = general_fn.count_line(file, '>', True)
28+
nSeq = general_fn.count_line(file, '>', False)
29+
if not nHeader == nSeq:
30+
return(1)
31+
return(0)
32+
33+
def make_single_line_fasta(input, gene, out_folder):
34+
print(out_folder)
35+
output = out_folder + gene + ".fa"
36+
print(output)
37+
with open(input, 'r') as f_input, open(output, 'w') as f_output:
38+
block = []
39+
for line in f_input:
40+
if line.startswith('>'):
41+
if block:
42+
f_output.write(''.join(block) + '\n')
43+
block = []
44+
f_output.write(line)
45+
else:
46+
block.append(line.strip())
47+
48+
if block:
49+
f_output.write(''.join(block) + '\n')
50+
return (output)
51+
52+
def makeMSA(out_folder, gene, fasta_file):
53+
aln_file = out_folder + gene + '.aln'
54+
if align_fn.get_muscle_version('muscle') == 'v3':
55+
os.system('muscle -quiet -in %s -out %s' % (fasta_file, aln_file))
56+
#print("muscle -quiet -in " + output_file + " -out " + aln_file)
57+
else:
58+
os.system('muscle -quiet -align %s -out %s' % (fasta_file, aln_file))
59+
return aln_file
60+
61+
def makeHMM(out_folder, gene, aln_file):
62+
hmm_dir = out_folder + 'hmm_dir'
63+
os.system('mkdir %s >/dev/null 2>&1' % (hmm_dir))
64+
out_file = '%s/%s.hmm' % (hmm_dir, gene)
65+
hmmbuild_cmd = 'hmmbuild --amino %s %s' % (out_file, aln_file)
66+
os.system(hmmbuild_cmd)
67+
return out_file
68+
69+
70+
def main():
71+
72+
#################### handle user input #####################################
73+
version = '0.0.1'
74+
################### initialize parser ######################################
75+
parser = argparse.ArgumentParser(description='You are running fdog.addCoreGroup version ' + str(version) + '.')
76+
################## required arguments ######################################
77+
required = parser.add_argument_group('Required arguments')
78+
required.add_argument('--fasta', help='Path to fasta file of ortholog group.', action='store', default='', required=True)
79+
required.add_argument('--out', help='Path to output folder.', action='store', default='', required=True)
80+
required.add_argument('--geneName', help='Path to output folder.', action='store', default='', required=True)
81+
args = parser.parse_args()
82+
83+
fasta_file_input = args.fasta
84+
out_folder = args.out
85+
gene = args.geneName
86+
87+
88+
out_folder = out_folder + '/' + gene + '/'
89+
os.system('mkdir %s >/dev/null 2>&1' % (out_folder))
90+
91+
if check_fasta(fasta_file_input) == 1:
92+
fasta_file = make_single_line_fasta(fasta_file_input, gene, out_folder)
93+
else:
94+
fasta_file = out_folder + gene + '.fa'
95+
os.system('cp ' + fasta_file_input + ' ' + fasta_file)
96+
97+
aln_file = makeMSA(out_folder, gene, fasta_file)
98+
hmm_file = makeHMM(out_folder, gene, aln_file)
99+
100+
print('Core group located at %s. Fasta file: %s; MSA: %s; HMM: %s' % (out_folder, fasta_file, aln_file, hmm_file))
101+
102+
main()

0 commit comments

Comments
 (0)