Skip to content

Commit bf7326f

Browse files
authored
Merge pull request #862 from ScrapeGraphAI/pre/beta
Pre/beta
2 parents 96064f2 + a9569ac commit bf7326f

File tree

10 files changed

+580
-91
lines changed

10 files changed

+580
-91
lines changed

.github/workflows/release.yml

+33-11
Original file line numberDiff line numberDiff line change
@@ -14,40 +14,55 @@ jobs:
1414
run: |
1515
sudo apt update
1616
sudo apt install -y git
17+
18+
- name: Set up Python
19+
uses: actions/setup-python@v5
20+
with:
21+
python-version: '3.10'
22+
1723
- name: Install uv
1824
uses: astral-sh/setup-uv@v3
25+
1926
- name: Install Node Env
2027
uses: actions/setup-node@v4
2128
with:
2229
node-version: 20
30+
2331
- name: Checkout
2432
uses: actions/[email protected]
2533
with:
2634
fetch-depth: 0
2735
persist-credentials: false
28-
- name: Build app
36+
37+
- name: Build and validate package
2938
run: |
39+
uv venv
40+
. .venv/bin/activate
41+
uv pip install --upgrade setuptools wheel hatchling
3042
uv sync --frozen
43+
uv pip install -e .
3144
uv build
32-
id: build_cache
33-
if: success()
45+
uv pip install --upgrade pkginfo==1.12.0 twine==6.0.1 # Upgrade pkginfo and install twine
46+
python -m twine check dist/*
47+
48+
- name: Debug Dist Directory
49+
run: ls -al dist
50+
3451
- name: Cache build
35-
uses: actions/cache@v2
52+
uses: actions/cache@v3
3653
with:
3754
path: ./dist
38-
key: ${{ runner.os }}-build-${{ hashFiles('dist/**') }}
39-
if: steps.build_cache.outputs.id != ''
55+
key: ${{ runner.os }}-build-${{ github.sha }}
4056

4157
release:
4258
name: Release
4359
runs-on: ubuntu-latest
4460
needs: build
4561
environment: development
46-
if: |
47-
github.event_name == 'push' && github.ref == 'refs/heads/main' ||
48-
github.event_name == 'push' && github.ref == 'refs/heads/pre/beta' ||
49-
github.event_name == 'pull_request' && github.event.action == 'closed' && github.event.pull_request.merged && github.event.pull_request.base.ref == 'main' ||
50-
github.event_name == 'pull_request' && github.event.action == 'closed' && github.event.pull_request.merged && github.event.pull_request.base.ref == 'pre/beta'
62+
if: >
63+
github.event_name == 'push' && (github.ref == 'refs/heads/main' || github.ref == 'refs/heads/pre/beta') ||
64+
(github.event_name == 'pull_request' && github.event.action == 'closed' && github.event.pull_request.merged &&
65+
(github.event.pull_request.base.ref == 'main' || github.event.pull_request.base.ref == 'pre/beta'))
5166
permissions:
5267
contents: write
5368
issues: write
@@ -59,6 +74,13 @@ jobs:
5974
with:
6075
fetch-depth: 0
6176
persist-credentials: false
77+
78+
- name: Restore build artifacts
79+
uses: actions/cache@v3
80+
with:
81+
path: ./dist
82+
key: ${{ runner.os }}-build-${{ github.sha }}
83+
6284
- name: Semantic Release
6385
uses: cycjimmy/[email protected]
6486
with:

CHANGELOG.md

+129-1
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,144 @@
1-
## [1.33.11](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.33.10...v1.33.11) (2025-01-02)
1+
## [1.34.0-beta.14](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.34.0-beta.13...v1.34.0-beta.14) (2025-01-03)
22

33

44
### Bug Fixes
55

6+
* add model tokens ([9b16cb9](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/9b16cb987fd93132d814ebd933af1565eb166331))
67
* revert ([b312251](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/b312251cc56ee4c82554ecf116b5e6edd1560726))
78
* revert ([bb5de58](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/bb5de581c064a1d141f849081e52987500957d1c))
9+
* validate URL only if the input type is a URL ([e2caee6](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/e2caee695ecce2d13aa5a82306097b1a80ba0e18))
810

911

1012
### Docs
1113

14+
* added api reference 🔗 ([67038e1](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/67038e195224e1a721fe123ad1d5604b3592df20))
1215
* added official cookbook reference ([98aa74f](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/98aa74ff2d35041884130be14efdf47ca5e716df))
16+
* fixed missing import ([96064f2](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/96064f20ee8a849a2548f293419cf9028386c47b))
1317
* updated documentation reference ([fe89ae2](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/fe89ae29e6dc5f4322c25c693e2c9f6ce958d6e2))
1418

19+
20+
### CI
21+
22+
* **release:** 1.33.10 [skip ci] ([a44b74a](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/a44b74aa6f7be7cdb4bdbebebc3b51a6d54a51e6))
23+
* **release:** 1.33.11 [skip ci] ([30f48b3](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/30f48b394f6eb8c7c9a1fa113bffabd2ac1ac585))
24+
* **release:** 1.33.9 [skip ci] ([9b6d6c0](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/9b6d6c0efb2fd1af5bf87cf61a0ba3d79876d21d))
25+
26+
## [1.34.0-beta.13](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.34.0-beta.12...v1.34.0-beta.13) (2025-01-03)
27+
28+
29+
30+
### Bug Fixes
31+
32+
* bump hatchling version to 1.26.3 ([159ed32](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/159ed329d2e8fa86015df1e59a7e2ebb439c6ec0))
33+
34+
## [1.34.0-beta.12](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.34.0-beta.11...v1.34.0-beta.12) (2025-01-02)
35+
36+
37+
### Docs
38+
39+
### Bug Fixes
40+
41+
* removed license for license-files ([b5acfb4](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/b5acfb414321989c45f76fad82f0d720ec889274))
42+
43+
## [1.34.0-beta.11](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.34.0-beta.10...v1.34.0-beta.11) (2025-01-02)
44+
45+
46+
### Bug Fixes
47+
48+
* added license-files = [ ([9150e4c](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/9150e4c95fa468afe9ddda3f1278b5037a2d0f38))
49+
50+
## [1.34.0-beta.10](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.34.0-beta.9...v1.34.0-beta.10) (2025-01-02)
51+
52+
53+
### Bug Fixes
54+
55+
* upgrade twine ([020e211](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/020e21123889c6483459e9db1c3c796cbc116140))
56+
57+
## [1.34.0-beta.9](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.34.0-beta.8...v1.34.0-beta.9) (2025-01-02)
58+
59+
60+
### Bug Fixes
61+
62+
* update pkginfo ([9203ab9](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/9203ab9a4ab4400105fd34433684f9ac2453f35c))
63+
64+
## [1.34.0-beta.8](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.34.0-beta.7...v1.34.0-beta.8) (2025-01-02)
65+
66+
67+
### Bug Fixes
68+
69+
* added twine ([df07da9](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/df07da9bcc59cbccf1c45d69e3a3e904eaed565b))
70+
* twine ([eb36a2b](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/eb36a2b630d62363f3c57e243f2b90cf530c0a3b))
71+
* uv virtual env ([fce9886](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/fce988687b3dc6fc36ce9244a8c2744f4a25d561))
72+
* version ([95b8990](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/95b8990a3649646972e12d78b11c7e1b7e707bf6))
73+
* workflow ([abe2945](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/abe29457f2380932d070bfd607c8ab5f749627c3))
74+
75+
## [1.34.0-beta.7](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.34.0-beta.6...v1.34.0-beta.7) (2025-01-02)
76+
77+
78+
### Bug Fixes
79+
80+
* revert to d1b2104 ([a0c0a7f](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/a0c0a7ff5c5dc9a107e7be8d5b5e1854886d411c))
81+
82+
## [1.34.0-beta.6](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.34.0-beta.5...v1.34.0-beta.6) (2025-01-02)
83+
84+
85+
### Bug Fixes
86+
87+
* release workflow ([a00f128](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/a00f128992e9fef88c870295c46b983b4286a3eb))
88+
89+
## [1.34.0-beta.5](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.34.0-beta.4...v1.34.0-beta.5) (2025-01-02)
90+
91+
92+
### Bug Fixes
93+
94+
* release workflow ([cb6d140](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/cb6d140042685bd419444d75ae7cab706cbcee38))
95+
* uv build ([1be6ffe](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/1be6ffe309124d55b8b3b66ded448f06dfd87b7e))
96+
* uv install workflow ([bcac20a](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/bcac20a7a8e65e2aa5760fb14e17b8054b4f4cf4))
97+
98+
## [1.34.0-beta.4](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.34.0-beta.3...v1.34.0-beta.4) (2024-12-18)
99+
100+
101+
### Bug Fixes
102+
103+
* build config ([b186a4f](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/b186a4f1c73fe29fa706158cc3c61812d6b16343))
104+
* build config ([46f5985](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/46f598546109067267d01ae7d8ea7609526ea4d4))
105+
* build config ([d2fc53f](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/d2fc53fc8414475c9bee7590144fe4251d56faf4))
106+
* last desperate attempt to restore automatic builds ([2538fe3](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/2538fe3db339014ef54e2c78269bce9259e284ea))
107+
* release config ([9cd0d31](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/9cd0d31882c22f347ebd9c58d8dd66b47d178c64))
108+
* release config ([62ee294](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/62ee294a864993a9414644c1547bafb96a43df20))
109+
* release config ([89863ee](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/89863ee166e09ee18287bfcc1b5475d894c9e8c6))
110+
* release config ([38e477c](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/38e477c540a3a50fc7ff6120da255d51798bfadd))
111+
112+
## [1.34.0-beta.3](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.34.0-beta.2...v1.34.0-beta.3) (2024-12-18)
113+
114+
115+
### Bug Fixes
116+
117+
* pyproject ([35a4907](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/35a490747cf6b8dad747a4af7f02d6f5aeb0d338))
118+
119+
## [1.34.0-beta.2](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.34.0-beta.1...v1.34.0-beta.2) (2024-12-17)
120+
121+
122+
### Bug Fixes
123+
124+
* context window ([ffdadae](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/ffdadaed6fe3f17da535e6eddb73851fce2f4bf2))
125+
* formatting ([d1b2104](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/d1b2104f28d84c5129edb29a5efdaf5bf7d22bfb))
126+
* pyproject ([76ac0a2](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/76ac0a2141d9d53af023a405e2c61849921e4f0e))
127+
* pyproject ([3dcfcd4](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/3dcfcd492e71297031a7df1dba9dd135f1fae60e))
128+
* pyproject ([bf6cb0a](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/bf6cb0a582004617724e11ed04ba617eb39abc0c))
129+
* uv.lock ([0a7fc39](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/0a7fc392dea2b62122b977d62f4d85b117fc8351))
130+
131+
132+
### CI
133+
134+
* **release:** 1.33.3 [skip ci] ([488093a](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/488093a63fcc1dc01eabdab301d752416a025139))
135+
* **release:** 1.33.4 [skip ci] ([a789179](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/a78917997060edbd61df5279546587e4ef123ea1))
136+
* **release:** 1.33.5 [skip ci] ([7a6164f](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/7a6164f1dc6dbb8ff0b4f7fc653f3910445f0754))
137+
* **release:** 1.33.6 [skip ci] ([ca96c3d](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/ca96c3d4309bd2b92c87a2b0095578dda302ad92))
138+
* **release:** 1.33.7 [skip ci] ([7a5764e](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/7a5764e3fdbfea12b04ea0686a28025a9d89cb2f))
139+
* **release:** 1.33.8 [skip ci] ([bdd6a39](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/bdd6a392e2c18de8c3e4e47e2f91a4a366365ff2))
140+
141+
15142
## [1.33.2](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.33.1...v1.33.2) (2024-12-06)
16143

17144

@@ -29,6 +156,7 @@
29156
## [1.33.0](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.32.0...v1.33.0) (2024-12-05)
30157

31158

159+
32160
### Features
33161

34162
* add api integration ([8aa9103](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/8aa9103f02af92d9e1a780450daa7bb303afc150))

examples/extras/chromium_selenium.py

+119
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
import asyncio
2+
import os
3+
import json
4+
from dotenv import load_dotenv
5+
from scrapegraphai.docloaders.chromium import ChromiumLoader # Import your ChromiumLoader class
6+
from scrapegraphai.graphs import SmartScraperGraph
7+
from scrapegraphai.utils import prettify_exec_info
8+
from aiohttp import ClientError
9+
10+
# Load environment variables for API keys
11+
load_dotenv()
12+
13+
# ************************************************
14+
# Define function to analyze content with ScrapegraphAI
15+
# ************************************************
16+
async def analyze_content_with_scrapegraph(content: str):
17+
"""
18+
Analyze scraped content using ScrapegraphAI.
19+
20+
Args:
21+
content (str): The scraped HTML or text content.
22+
23+
Returns:
24+
dict: The result from ScrapegraphAI analysis.
25+
"""
26+
try:
27+
# Initialize ScrapegraphAI SmartScraperGraph
28+
smart_scraper = SmartScraperGraph(
29+
prompt="Summarize the main content of this webpage and extract any contact information.",
30+
source=content, # Pass the content directly
31+
config={
32+
"llm": {
33+
"api_key": os.getenv("OPENAI_API_KEY"),
34+
"model": "openai/gpt-4o",
35+
},
36+
"verbose": True
37+
}
38+
)
39+
result = smart_scraper.run()
40+
return result
41+
except Exception as e:
42+
print(f"❌ ScrapegraphAI analysis failed: {e}")
43+
return {"error": str(e)}
44+
45+
# ************************************************
46+
# Test scraper and ScrapegraphAI pipeline
47+
# ************************************************
48+
async def test_scraper_with_analysis(scraper: ChromiumLoader, urls: list):
49+
"""
50+
Test scraper for the given backend and URLs, then analyze content with ScrapegraphAI.
51+
52+
Args:
53+
scraper (ChromiumLoader): The ChromiumLoader instance.
54+
urls (list): A list of URLs to scrape.
55+
"""
56+
for url in urls:
57+
try:
58+
print(f"\n🔎 Scraping: {url} using {scraper.backend}...")
59+
result = await scraper.scrape(url)
60+
61+
if "Error" in result or not result.strip():
62+
print(f"❌ Failed to scrape {url}: {result}")
63+
else:
64+
print(f"✅ Successfully scraped {url}. Content (first 200 chars): {result[:200]}")
65+
66+
# Pass scraped content to ScrapegraphAI for analysis
67+
print("🤖 Analyzing content with ScrapegraphAI...")
68+
analysis_result = await analyze_content_with_scrapegraph(result)
69+
print("📝 Analysis Result:")
70+
print(json.dumps(analysis_result, indent=4))
71+
72+
except ClientError as ce:
73+
print(f"❌ Network error while scraping {url}: {ce}")
74+
except Exception as e:
75+
print(f"❌ Unexpected error while scraping {url}: {e}")
76+
77+
# ************************************************
78+
# Main Execution
79+
# ************************************************
80+
async def main():
81+
urls_to_scrape = [
82+
"https://example.com",
83+
"https://www.python.org",
84+
"https://invalid-url.test"
85+
]
86+
87+
# Test with Playwright backend
88+
print("\n--- Testing Playwright Backend ---")
89+
try:
90+
scraper_playwright_chromium = ChromiumLoader(urls=urls_to_scrape, backend="playwright", headless=True, browser_name = "chromium")
91+
await test_scraper_with_analysis(scraper_playwright_chromium, urls_to_scrape)
92+
93+
scraper_playwright_firefox = ChromiumLoader(urls=urls_to_scrape, backend="playwright", headless=True, browser_name = "firefox")
94+
await test_scraper_with_analysis(scraper_playwright_firefox, urls_to_scrape)
95+
except ImportError as ie:
96+
print(f"❌ Playwright ImportError: {ie}")
97+
except Exception as e:
98+
print(f"❌ Error initializing Playwright ChromiumLoader: {e}")
99+
100+
# Test with Selenium backend
101+
print("\n--- Testing Selenium Backend ---")
102+
try:
103+
scraper_selenium_chromium = ChromiumLoader(urls=urls_to_scrape, backend="selenium", headless=True, browser_name = "chromium")
104+
await test_scraper_with_analysis(scraper_selenium_chromium, urls_to_scrape)
105+
106+
scraper_selenium_firefox = ChromiumLoader(urls=urls_to_scrape, backend="selenium", headless=True, browser_name = "firefox")
107+
await test_scraper_with_analysis(scraper_selenium_firefox, urls_to_scrape)
108+
except ImportError as ie:
109+
print(f"❌ Selenium ImportError: {ie}")
110+
except Exception as e:
111+
print(f"❌ Error initializing Selenium ChromiumLoader: {e}")
112+
113+
if __name__ == "__main__":
114+
try:
115+
asyncio.run(main())
116+
except KeyboardInterrupt:
117+
print("❌ Program interrupted by user.")
118+
except Exception as e:
119+
print(f"❌ Program crashed: {e}")

pyproject.toml

+3-9
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,6 @@
11
[project]
22
name = "scrapegraphai"
3-
4-
5-
6-
version = "1.33.11"
7-
8-
9-
3+
version = "1.34.0b14"
104

115
description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines."
126
authors = [
@@ -48,7 +42,6 @@ dependencies = [
4842
"scrapegraph-py>=1.7.0"
4943
]
5044

51-
license = "MIT"
5245
readme = "README.md"
5346
homepage = "https://scrapegraphai.com/"
5447
repository = "https://github.com/ScrapeGraphAI/Scrapegraph-ai"
@@ -115,7 +108,8 @@ screenshot_scraper = [
115108
]
116109

117110
[build-system]
118-
requires = ["hatchling"]
111+
requires = ["hatchling==1.26.3"]
112+
119113
build-backend = "hatchling.build"
120114

121115
[dependency-groups]

requirements-dev.txt

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
pytest==8.0.0
2+
pytest-asyncio==0.25.0
23
pytest-mock==3.14.0
34
burr[start]==0.22.1
45
sphinx==6.0

0 commit comments

Comments
 (0)