Skip to content

Commit b8cc2ce

Browse files
author
emmanuel
committed
importing functions from autobot instead of duplication
1 parent 38e0e6c commit b8cc2ce

File tree

3 files changed

+11
-153
lines changed

3 files changed

+11
-153
lines changed

.gitignore

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
data/
1+
data/*
22
.idea/
33
treetime/*
44
scripts/geckodriver.log

scripts/ChunkyBot.py

+3-145
Original file line numberDiff line numberDiff line change
@@ -12,155 +12,11 @@
1212
import getpass
1313

1414
from gotoh2 import *
15+
from autobot import get_driver, login, retrieve_genomes
1516

1617
todaystr = datetime.strftime(datetime.now(), '%Y-%m-%d')
1718
cwd = os.getcwd()
1819

19-
def get_driver(download_folder, executable_path):
20-
"""
21-
Instantiate remote control interface for Firefox web browser
22-
23-
:param download_folder: path to write downloaded files
24-
:param executable_path: path to geckodriver executable
25-
:return:
26-
"""
27-
profile = webdriver.FirefoxProfile()
28-
profile.set_preference('browser.download.folderList', 2)
29-
profile.set_preference('browser.download.manager.showWhenStarting', False)
30-
profile.set_preference("browser.download.dir", download_folder)
31-
profile.set_preference('browser.helperApps.alwaysAsk.force', False)
32-
profile.set_preference("browser.helperApps.neverAsk.saveToDisk", "application/octet-stream,octet-stream")
33-
profile.set_preference("browser.helperApps.neverAsk.openFile", "application/octet-stream,octet-stream")
34-
35-
opts = Options()
36-
opts.headless = True # opts.set_headless()
37-
assert opts.headless
38-
39-
return webdriver.Firefox(firefox_profile=profile, options=opts, executable_path=executable_path)
40-
41-
42-
def login(driver):
43-
"""
44-
Use GISAID access credentials to login to database.
45-
:param driver: webdriver.Firefox object
46-
:return:
47-
"""
48-
driver.get('https://www.epicov.org/epi3/cfrontend')
49-
time.sleep(15) # seconds
50-
51-
# read login credentials from Environment Variables
52-
try:
53-
user = os.environ['gisaid_u_variable']
54-
pw = os.environ['gisaid_pw_variable']
55-
except KeyError:
56-
# variables not set, get access credentials interactively
57-
user = getpass.getpass(prompt='GISAID username: ')
58-
pw = getpass.getpass(prompt='GISAID password: ')
59-
60-
print('logging in')
61-
driver.execute_script('document.getElementById("elogin").value="{}"'.format(user))
62-
driver.execute_script('document.getElementById("epassword").value="{}"'.format(pw))
63-
time.sleep(5)
64-
65-
# call javascript login function
66-
driver.execute_script('doLogin()')
67-
time.sleep(5)
68-
69-
#navigate to corona virus page
70-
print('navigating to CoV db')
71-
element = driver.find_element_by_xpath("//*[contains(text(), 'Browse')]")
72-
element.click()
73-
time.sleep(5)
74-
75-
return driver
76-
77-
def find_prefix(driver):
78-
#find prefix variable
79-
element = driver.find_element_by_xpath("//div[@class='buttons container-slot']")
80-
return element.get_attribute('id').split('_')[1]
81-
82-
def retrieve_genomes(driver, start, end, download_folder):
83-
"""
84-
Retrieve genomes with a specified deposition date range in the GISAID database.
85-
Adding several time delays to avoid spamming the database.
86-
87-
:param driver: webdriver.Firefox object from login()
88-
:param start: date in ISO format (yyyy-mm-dd)
89-
:param end: date in ISO format (yyyy-mm-dd)
90-
:return: path to file download
91-
"""
92-
93-
# find prefix variable
94-
element = driver.find_element_by_xpath("//div[@class='buttons container-slot']")
95-
htmlid_as_list = element.get_attribute('id').split('_')
96-
variable = htmlid_as_list[1]
97-
98-
# navigate to corona virus page
99-
print('navigating to CoV db')
100-
element = driver.find_element_by_xpath("//button[contains(text(), 'Reset')]")
101-
element.click()
102-
time.sleep(5)
103-
104-
# trigger selection change
105-
time_string = '[id^="ce_' + variable + '"][id$="_input"]'
106-
107-
driver.execute_script("document.querySelectorAll('{}')[2].value = '{}'".format(time_string, start))
108-
driver.execute_script("document.querySelectorAll('{}')[2].onchange()".format(time_string))
109-
110-
driver.execute_script("document.querySelectorAll('{}')[3].value = '{}'".format(time_string, end))
111-
driver.execute_script("document.querySelectorAll('{}')[3].onchange()".format(time_string))
112-
113-
driver.execute_script("document.querySelectorAll('[id^=\"ce_{}\"][id$=_input]')[2].onchange()".format(variable))
114-
time.sleep(15)
115-
116-
print('selecting all seqs')
117-
element = driver.find_element_by_xpath("//*[contains(text(), 'Total')]")
118-
count = element.get_attribute('innerHTML').split()[1].replace(',', '')
119-
if int(count) > 10000:
120-
time.sleep(15)
121-
122-
checkbox = driver.find_element_by_xpath("//span[@class='yui-dt-label']/input[@type='checkbox']")
123-
checkbox.click()
124-
time.sleep(5)
125-
126-
# download seqs
127-
element = driver.find_element_by_xpath("//*[contains(text(), 'Download')]")
128-
driver.execute_script("arguments[0].click();", element)
129-
time.sleep(5)
130-
131-
# switch to iframe to download
132-
driver.switch_to_frame(driver.find_element_by_tag_name("iframe"))
133-
print("Download")
134-
time.sleep(5)
135-
136-
button = driver.find_element_by_xpath(
137-
"//*[contains(text(), 'Download')]//ancestor::div[@style='float: right']"
138-
)
139-
button.click()
140-
time.sleep(5)
141-
142-
# wait for download to complete
143-
while True:
144-
files = os.listdir(download_folder)
145-
if len(files) == 0:
146-
# download has not started yet
147-
time.sleep(10)
148-
continue
149-
if any([f.endswith('.part') for f in files]):
150-
time.sleep(5)
151-
continue
152-
break
153-
154-
print('Downloading complete')
155-
downloaded_file = os.listdir(download_folder)[0]
156-
driver.switch_to.default_content()
157-
158-
# reset browser
159-
element = driver.find_element_by_xpath("//button[contains(text(), 'Reset')]")
160-
element.click()
161-
return os.path.join(download_folder, downloaded_file)
162-
163-
16420
def compare_dicts(old_fasta, new_fasta):
16521
old_dict = dict(convert_fasta(open(old_fasta)))
16622
new_dict = dict(convert_fasta(open(new_fasta)))
@@ -174,6 +30,8 @@ def compare_dicts(old_fasta, new_fasta):
17430
block_diff[key] = value
17531
return block_diff
17632

33+
34+
17735
def parse_args():
17836
""" Command line interface """
17937
parser = argparse.ArgumentParser(

scripts/autobot.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,12 @@ def login(driver):
6262
driver.execute_script('doLogin()')
6363
time.sleep(5)
6464

65+
# navigate to corona virus page
66+
print('navigating to CoV db')
67+
element = driver.find_element_by_xpath("//*[contains(text(), 'Browse')]")
68+
element.click()
69+
time.sleep(5)
70+
6571
return driver
6672

6773

@@ -81,12 +87,6 @@ def retrieve_genomes(driver, start, end, download_folder):
8187
htmlid_as_list = element.get_attribute('id').split('_')
8288
variable = htmlid_as_list[1]
8389

84-
# navigate to corona virus page
85-
print('navigating to CoV db')
86-
element = driver.find_element_by_xpath("//*[contains(text(), 'Browse')]")
87-
element.click()
88-
time.sleep(5)
89-
9090
# trigger selection change
9191
time_string = '[id^="ce_' + variable + '"][id$="_input"]'
9292

@@ -133,7 +133,6 @@ def retrieve_genomes(driver, start, end, download_folder):
133133
time.sleep(10)
134134
continue
135135
if any([f.endswith('.part') for f in files]):
136-
# FIXME: is this platform specific?
137136
time.sleep(5)
138137
continue
139138
break
@@ -142,6 +141,7 @@ def retrieve_genomes(driver, start, end, download_folder):
142141
downloaded_file = os.listdir(download_folder)[0]
143142

144143
# reset browser
144+
time.sleep(30)
145145
driver.switch_to.default_content()
146146
element = driver.find_element_by_xpath("//button[@class='sys-event-hook sys-form-button']")
147147
element.click()

0 commit comments

Comments
 (0)