Skip to content

Commit 3a4c313

Browse files
Merge pull request #25 from shaikhsajid1111/version_2.0
Updates for version 2.0.0
2 parents 84ec015 + 9ba4586 commit 3a4c313

9 files changed

+693
-589
lines changed

README.md

+25
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,11 @@ Output:
227227
<td>String</td>
228228
<td>If output_format parameter is set to CSV, then it is valid for directory parameter to be passed. If not passed then CSV file will be saved in current working directory.</td>
229229
</tr>
230+
<tr>
231+
<td>headless</td>
232+
<td>Boolean</td>
233+
<td>Whether to run crawler headlessly?. Default is <code>True</code></td>
234+
</tr>
230235
</tbody>
231236
</table>
232237

@@ -486,6 +491,26 @@ Output:
486491
<td>String</td>
487492
<td>If output parameter is set to CSV, then it is valid for directory parameter to be passed. If not passed then CSV file will be saved in current working directory.</td>
488493
</tr>
494+
<tr>
495+
<td>since_id</td>
496+
<td>Integer</td>
497+
<td>After (NOT inclusive) a specified Snowflake ID. Example <a href="https://twitter.com/search?q=since_id%3A1138872932887924737%20max_id%3A1144730280353247233%20%23nasamoontunes&src=typed_query&f=live">here</a></td>
498+
</tr>
499+
<tr>
500+
<td>max_id</td>
501+
<td>Integer</td>
502+
<td>At or before (inclusive) a specified Snowflake ID. Example <a href="https://twitter.com/search?q=since_id%3A1138872932887924737%20max_id%3A1144730280353247233%20%23nasamoontunes&src=typed_query&f=live">here</a></td>
503+
</tr>
504+
<tr>
505+
<td>within_time</td>
506+
<td>String</td>
507+
<td>Search within the last number of days, hours, minutes, or seconds. Example <code>2d, 3h, 5m, 30s</code>.</td>
508+
</tr>
509+
<tr>
510+
<td>headless</td>
511+
<td>Boolean</td>
512+
<td>Whether to run crawler headlessly?. Default is <code>True</code></td>
513+
</tr>
489514
</tbody>
490515
</table>
491516
</div>

requirements.txt

-5
This file was deleted.

setup.py

+8-6
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,10 @@
33
with open("README.md", "r", encoding="utf-8") as file:
44
long_description = file.read()
55

6-
requirements = []
7-
8-
for line in open("requirements.txt", 'r', encoding="utf-8").readlines():
9-
requirements.append(line.replace("\n", ""))
106

117
setuptools.setup(
128
name="twitter_scraper_selenium",
13-
version="0.1.7",
9+
version="2.0.0",
1410
author="Sajid Shaikh",
1511
author_email="[email protected]",
1612
description="Python package to scrap twitter's front-end easily with selenium",
@@ -41,5 +37,11 @@
4137

4238
],
4339
python_requires=">=3.6",
44-
install_requires=requirements
40+
install_requires=[
41+
'python-dateutil==2.8.2',
42+
'selenium==4.3.0',
43+
'selenium-wire==4.6.4',
44+
'webdriver-manager==3.2.2',
45+
'fake-headers==1.0.2'
46+
]
4547
)

twitter_scraper_selenium/driver_initialization.py

+18-11
Original file line numberDiff line numberDiff line change
@@ -2,27 +2,32 @@
22
try:
33
from seleniumwire import webdriver
44
# to add capabilities for chrome and firefox, import their Options with different aliases
5-
from selenium.webdriver.chrome.options import Options as ChromeOptions
6-
from selenium.webdriver.firefox.options import Options as FirefoxOptions
5+
from selenium.webdriver.chrome.options import Options as CustomChromeOptions
6+
from selenium.webdriver.firefox.options import Options as CustomFireFoxOptions
77
# import webdriver for downloading respective driver for the browser
88
from webdriver_manager.chrome import ChromeDriverManager
99
from webdriver_manager.firefox import GeckoDriverManager
1010
from fake_headers import Headers
11+
from selenium.webdriver.chrome.service import Service as ChromeService
12+
from selenium.webdriver.firefox.service import Service as FirefoxService
13+
1114
except Exception as ex:
1215
print(ex)
1316

1417

1518
class Initializer:
1619

17-
def __init__(self, browser_name, proxy=None):
20+
def __init__(self, browser_name, headless, proxy=None):
1821
self.browser_name = browser_name
1922
self.proxy = proxy
23+
self.headless = headless
2024

2125
def set_properties(self, browser_option):
2226
"""adds capabilities to the driver"""
2327
header = Headers().generate()['User-Agent']
24-
browser_option.add_argument(
25-
'--headless') # runs browser in headless mode
28+
if self.headless:
29+
browser_option.add_argument(
30+
'--headless') # runs browser in headless mode
2631
browser_option.add_argument('--no-sandbox')
2732
browser_option.add_argument("--disable-dev-shm-usage")
2833
browser_option.add_argument('--ignore-certificate-errors')
@@ -37,7 +42,7 @@ def set_driver_for_browser(self, browser_name):
3742
"""expects browser name and returns a driver instance"""
3843
# if browser is suppose to be chrome
3944
if browser_name.lower() == "chrome":
40-
browser_option = ChromeOptions()
45+
browser_option = CustomChromeOptions()
4146
# automatically installs chromedriver and initialize it and returns the instance
4247
if self.proxy is not None:
4348
options = {
@@ -46,24 +51,26 @@ def set_driver_for_browser(self, browser_name):
4651
'no_proxy': 'localhost, 127.0.0.1'
4752
}
4853
print("Using: {}".format(self.proxy))
49-
return webdriver.Chrome(executable_path=ChromeDriverManager().install(),
54+
55+
return webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()),
5056
options=self.set_properties(browser_option), seleniumwire_options=options)
5157

52-
return webdriver.Chrome(executable_path=ChromeDriverManager().install(), options=self.set_properties(browser_option))
58+
return webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=self.set_properties(browser_option))
5359
elif browser_name.lower() == "firefox":
54-
browser_option = FirefoxOptions()
60+
browser_option = CustomFireFoxOptions()
5561
if self.proxy is not None:
5662
options = {
5763
'https': 'https://{}'.format(self.proxy.replace(" ", "")),
5864
'http': 'http://{}'.format(self.proxy.replace(" ", "")),
5965
'no_proxy': 'localhost, 127.0.0.1'
6066
}
6167
print("Using: {}".format(self.proxy))
62-
return webdriver.Firefox(executable_path=GeckoDriverManager().install(),
68+
69+
return webdriver.Firefox(service=FirefoxService(executable_path=GeckoDriverManager().install()),
6370
options=self.set_properties(browser_option), seleniumwire_options=options)
6471

6572
# automatically installs geckodriver and initialize it and returns the instance
66-
return webdriver.Firefox(executable_path=GeckoDriverManager().install(), options=self.set_properties(browser_option))
73+
return webdriver.Firefox(service=FirefoxService(executable_path=GeckoDriverManager().install()), options=self.set_properties(browser_option))
6774
else:
6875
# if browser_name is not chrome neither firefox than raise an exception
6976
raise Exception("Browser not supported!")

twitter_scraper_selenium/driver_utils.py

+31-29
Original file line numberDiff line numberDiff line change
@@ -13,39 +13,41 @@
1313
from random import randint
1414
except Exception as ex:
1515
frameinfo = currentframe()
16-
print("Error on line no. {} : {}".format(frameinfo.f_lineno,ex))
16+
print("Error on line no. {} : {}".format(frameinfo.f_lineno, ex))
1717

1818
frameinfo = currentframe()
1919

20+
2021
class Utilities:
21-
"""this class contains all the method related to driver behaviour,
22-
like scrolling, waiting for element to appear, it contains all static
23-
method, which accepts driver instance as a argument"""
22+
"""this class contains all the method related to driver behaviour,
23+
like scrolling, waiting for element to appear, it contains all static
24+
method, which accepts driver instance as a argument"""
2425

25-
@staticmethod
26-
def __wait_until_tweets_appear(driver):
27-
try:
28-
WebDriverWait(driver, 10).until(EC.presence_of_element_located(
29-
(By.CSS_SELECTOR, '[data-testid="tweet"]')))
30-
except WebDriverException:
31-
print("Tweets did not appear!")
26+
@staticmethod
27+
def __wait_until_tweets_appear(driver):
28+
try:
29+
WebDriverWait(driver, 10).until(EC.presence_of_element_located(
30+
(By.CSS_SELECTOR, '[data-testid="tweet"]')))
31+
except WebDriverException:
32+
print(
33+
"Tweets did not appear!, Try setting headless=False to see what is happening")
3234

33-
@staticmethod
34-
def __scroll_down(driver):
35-
try:
36-
body = driver.find_element_by_css_selector('body')
37-
for _ in range(3):
38-
body.send_keys(Keys.PAGE_DOWN)
39-
except Exception as ex:
40-
print("Error on line no. {} : {}".format(frameinfo.f_lineno,ex))
35+
@staticmethod
36+
def __scroll_down(driver):
37+
try:
38+
body = driver.find_element(By.CSS_SELECTOR, 'body')
39+
for _ in range(randint(1,3)):
40+
body.send_keys(Keys.PAGE_DOWN)
41+
except Exception as ex:
42+
print("Error on line no. {} : {}".format(frameinfo.f_lineno, ex))
4143

42-
@staticmethod
43-
def __wait_until_completion(driver):
44-
"""waits until the page have completed loading"""
45-
try:
46-
state = ""
47-
while state != "complete":
48-
time.sleep(randint(3, 5))
49-
state = driver.execute_script("return document.readyState")
50-
except Exception as ex:
51-
print(ex)
44+
@staticmethod
45+
def __wait_until_completion(driver):
46+
"""waits until the page have completed loading"""
47+
try:
48+
state = ""
49+
while state != "complete":
50+
time.sleep(randint(3, 5))
51+
state = driver.execute_script("return document.readyState")
52+
except Exception as ex:
53+
print(ex)

0 commit comments

Comments
 (0)