|
| 1 | +# -*- coding: utf-8 -*- |
| 2 | + |
| 3 | +####################################################################### |
| 4 | +# Copyright (C) 2021 Hannah Muelbaier |
| 5 | +# |
| 6 | +# This script is used to prepare the core group used as input for fDOG-Assembly from a fasta file of an ortholog group. |
| 7 | +# |
| 8 | +# This script is distributed in the hope that it will be useful, |
| 9 | +# but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 10 | +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 11 | +# GNU General Public License <http://www.gnu.org/licenses/> for |
| 12 | +# more details |
| 13 | +# |
| 14 | + |
| 15 | +# |
| 16 | +####################################################################### |
| 17 | + |
| 18 | +############################ imports ################################## |
| 19 | +import os |
| 20 | +import os.path |
| 21 | +import sys |
| 22 | +import argparse |
| 23 | +import fdog.libs.alignment as align_fn |
| 24 | +import fdog.libs.zzz as general_fn |
| 25 | + |
| 26 | +def check_fasta(file): |
| 27 | + nHeader = general_fn.count_line(file, '>', True) |
| 28 | + nSeq = general_fn.count_line(file, '>', False) |
| 29 | + if not nHeader == nSeq: |
| 30 | + return(1) |
| 31 | + return(0) |
| 32 | + |
| 33 | +def make_single_line_fasta(input, gene, out_folder): |
| 34 | + print(out_folder) |
| 35 | + output = out_folder + gene + ".fa" |
| 36 | + print(output) |
| 37 | + with open(input, 'r') as f_input, open(output, 'w') as f_output: |
| 38 | + block = [] |
| 39 | + for line in f_input: |
| 40 | + if line.startswith('>'): |
| 41 | + if block: |
| 42 | + f_output.write(''.join(block) + '\n') |
| 43 | + block = [] |
| 44 | + f_output.write(line) |
| 45 | + else: |
| 46 | + block.append(line.strip()) |
| 47 | + |
| 48 | + if block: |
| 49 | + f_output.write(''.join(block) + '\n') |
| 50 | + return (output) |
| 51 | + |
| 52 | +def makeMSA(out_folder, gene, fasta_file): |
| 53 | + aln_file = out_folder + gene + '.aln' |
| 54 | + if align_fn.get_muscle_version('muscle') == 'v3': |
| 55 | + os.system('muscle -quiet -in %s -out %s' % (fasta_file, aln_file)) |
| 56 | + #print("muscle -quiet -in " + output_file + " -out " + aln_file) |
| 57 | + else: |
| 58 | + os.system('muscle -quiet -align %s -out %s' % (fasta_file, aln_file)) |
| 59 | + return aln_file |
| 60 | + |
| 61 | +def makeHMM(out_folder, gene, aln_file): |
| 62 | + hmm_dir = out_folder + 'hmm_dir' |
| 63 | + os.system('mkdir %s >/dev/null 2>&1' % (hmm_dir)) |
| 64 | + out_file = '%s/%s.hmm' % (hmm_dir, gene) |
| 65 | + hmmbuild_cmd = 'hmmbuild --amino %s %s' % (out_file, aln_file) |
| 66 | + os.system(hmmbuild_cmd) |
| 67 | + return out_file |
| 68 | + |
| 69 | + |
| 70 | +def main(): |
| 71 | + |
| 72 | + #################### handle user input ##################################### |
| 73 | + version = '0.0.1' |
| 74 | + ################### initialize parser ###################################### |
| 75 | + parser = argparse.ArgumentParser(description='You are running fdog.addCoreGroup version ' + str(version) + '.') |
| 76 | + ################## required arguments ###################################### |
| 77 | + required = parser.add_argument_group('Required arguments') |
| 78 | + required.add_argument('--fasta', help='Path to fasta file of ortholog group.', action='store', default='', required=True) |
| 79 | + required.add_argument('--out', help='Path to output folder.', action='store', default='', required=True) |
| 80 | + required.add_argument('--geneName', help='Path to output folder.', action='store', default='', required=True) |
| 81 | + args = parser.parse_args() |
| 82 | + |
| 83 | + fasta_file_input = args.fasta |
| 84 | + out_folder = args.out |
| 85 | + gene = args.geneName |
| 86 | + |
| 87 | + |
| 88 | + out_folder = out_folder + '/' + gene + '/' |
| 89 | + os.system('mkdir %s >/dev/null 2>&1' % (out_folder)) |
| 90 | + |
| 91 | + if check_fasta(fasta_file_input) == 1: |
| 92 | + fasta_file = make_single_line_fasta(fasta_file_input, gene, out_folder) |
| 93 | + else: |
| 94 | + fasta_file = out_folder + gene + '.fa' |
| 95 | + os.system('cp ' + fasta_file_input + ' ' + fasta_file) |
| 96 | + |
| 97 | + aln_file = makeMSA(out_folder, gene, fasta_file) |
| 98 | + hmm_file = makeHMM(out_folder, gene, aln_file) |
| 99 | + |
| 100 | + print('Core group located at %s. Fasta file: %s; MSA: %s; HMM: %s' % (out_folder, fasta_file, aln_file, hmm_file)) |
| 101 | + |
| 102 | +main() |
0 commit comments