12
12
import getpass
13
13
14
14
from gotoh2 import *
15
+ from autobot import get_driver , login , retrieve_genomes
15
16
16
17
todaystr = datetime .strftime (datetime .now (), '%Y-%m-%d' )
17
18
cwd = os .getcwd ()
18
19
19
- def get_driver (download_folder , executable_path ):
20
- """
21
- Instantiate remote control interface for Firefox web browser
22
-
23
- :param download_folder: path to write downloaded files
24
- :param executable_path: path to geckodriver executable
25
- :return:
26
- """
27
- profile = webdriver .FirefoxProfile ()
28
- profile .set_preference ('browser.download.folderList' , 2 )
29
- profile .set_preference ('browser.download.manager.showWhenStarting' , False )
30
- profile .set_preference ("browser.download.dir" , download_folder )
31
- profile .set_preference ('browser.helperApps.alwaysAsk.force' , False )
32
- profile .set_preference ("browser.helperApps.neverAsk.saveToDisk" , "application/octet-stream,octet-stream" )
33
- profile .set_preference ("browser.helperApps.neverAsk.openFile" , "application/octet-stream,octet-stream" )
34
-
35
- opts = Options ()
36
- opts .headless = True # opts.set_headless()
37
- assert opts .headless
38
-
39
- return webdriver .Firefox (firefox_profile = profile , options = opts , executable_path = executable_path )
40
-
41
-
42
- def login (driver ):
43
- """
44
- Use GISAID access credentials to login to database.
45
- :param driver: webdriver.Firefox object
46
- :return:
47
- """
48
- driver .get ('https://www.epicov.org/epi3/cfrontend' )
49
- time .sleep (15 ) # seconds
50
-
51
- # read login credentials from Environment Variables
52
- try :
53
- user = os .environ ['gisaid_u_variable' ]
54
- pw = os .environ ['gisaid_pw_variable' ]
55
- except KeyError :
56
- # variables not set, get access credentials interactively
57
- user = getpass .getpass (prompt = 'GISAID username: ' )
58
- pw = getpass .getpass (prompt = 'GISAID password: ' )
59
-
60
- print ('logging in' )
61
- driver .execute_script ('document.getElementById("elogin").value="{}"' .format (user ))
62
- driver .execute_script ('document.getElementById("epassword").value="{}"' .format (pw ))
63
- time .sleep (5 )
64
-
65
- # call javascript login function
66
- driver .execute_script ('doLogin()' )
67
- time .sleep (5 )
68
-
69
- #navigate to corona virus page
70
- print ('navigating to CoV db' )
71
- element = driver .find_element_by_xpath ("//*[contains(text(), 'Browse')]" )
72
- element .click ()
73
- time .sleep (5 )
74
-
75
- return driver
76
-
77
- def find_prefix (driver ):
78
- #find prefix variable
79
- element = driver .find_element_by_xpath ("//div[@class='buttons container-slot']" )
80
- return element .get_attribute ('id' ).split ('_' )[1 ]
81
-
82
- def retrieve_genomes (driver , start , end , download_folder ):
83
- """
84
- Retrieve genomes with a specified deposition date range in the GISAID database.
85
- Adding several time delays to avoid spamming the database.
86
-
87
- :param driver: webdriver.Firefox object from login()
88
- :param start: date in ISO format (yyyy-mm-dd)
89
- :param end: date in ISO format (yyyy-mm-dd)
90
- :return: path to file download
91
- """
92
-
93
- # find prefix variable
94
- element = driver .find_element_by_xpath ("//div[@class='buttons container-slot']" )
95
- htmlid_as_list = element .get_attribute ('id' ).split ('_' )
96
- variable = htmlid_as_list [1 ]
97
-
98
- # navigate to corona virus page
99
- print ('navigating to CoV db' )
100
- element = driver .find_element_by_xpath ("//button[contains(text(), 'Reset')]" )
101
- element .click ()
102
- time .sleep (5 )
103
-
104
- # trigger selection change
105
- time_string = '[id^="ce_' + variable + '"][id$="_input"]'
106
-
107
- driver .execute_script ("document.querySelectorAll('{}')[2].value = '{}'" .format (time_string , start ))
108
- driver .execute_script ("document.querySelectorAll('{}')[2].onchange()" .format (time_string ))
109
-
110
- driver .execute_script ("document.querySelectorAll('{}')[3].value = '{}'" .format (time_string , end ))
111
- driver .execute_script ("document.querySelectorAll('{}')[3].onchange()" .format (time_string ))
112
-
113
- driver .execute_script ("document.querySelectorAll('[id^=\" ce_{}\" ][id$=_input]')[2].onchange()" .format (variable ))
114
- time .sleep (15 )
115
-
116
- print ('selecting all seqs' )
117
- element = driver .find_element_by_xpath ("//*[contains(text(), 'Total')]" )
118
- count = element .get_attribute ('innerHTML' ).split ()[1 ].replace (',' , '' )
119
- if int (count ) > 10000 :
120
- time .sleep (15 )
121
-
122
- checkbox = driver .find_element_by_xpath ("//span[@class='yui-dt-label']/input[@type='checkbox']" )
123
- checkbox .click ()
124
- time .sleep (5 )
125
-
126
- # download seqs
127
- element = driver .find_element_by_xpath ("//*[contains(text(), 'Download')]" )
128
- driver .execute_script ("arguments[0].click();" , element )
129
- time .sleep (5 )
130
-
131
- # switch to iframe to download
132
- driver .switch_to_frame (driver .find_element_by_tag_name ("iframe" ))
133
- print ("Download" )
134
- time .sleep (5 )
135
-
136
- button = driver .find_element_by_xpath (
137
- "//*[contains(text(), 'Download')]//ancestor::div[@style='float: right']"
138
- )
139
- button .click ()
140
- time .sleep (5 )
141
-
142
- # wait for download to complete
143
- while True :
144
- files = os .listdir (download_folder )
145
- if len (files ) == 0 :
146
- # download has not started yet
147
- time .sleep (10 )
148
- continue
149
- if any ([f .endswith ('.part' ) for f in files ]):
150
- time .sleep (5 )
151
- continue
152
- break
153
-
154
- print ('Downloading complete' )
155
- downloaded_file = os .listdir (download_folder )[0 ]
156
- driver .switch_to .default_content ()
157
-
158
- # reset browser
159
- element = driver .find_element_by_xpath ("//button[contains(text(), 'Reset')]" )
160
- element .click ()
161
- return os .path .join (download_folder , downloaded_file )
162
-
163
-
164
20
def compare_dicts (old_fasta , new_fasta ):
165
21
old_dict = dict (convert_fasta (open (old_fasta )))
166
22
new_dict = dict (convert_fasta (open (new_fasta )))
@@ -174,6 +30,8 @@ def compare_dicts(old_fasta, new_fasta):
174
30
block_diff [key ] = value
175
31
return block_diff
176
32
33
+
34
+
177
35
def parse_args ():
178
36
""" Command line interface """
179
37
parser = argparse .ArgumentParser (
0 commit comments