Skip to content

Commit b312251

Browse files
committed
fix: revert
1 parent bb5de58 commit b312251

File tree

523 files changed

+27946
-75
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

523 files changed

+27946
-75
lines changed

README.md

+10-31
Original file line numberDiff line numberDiff line change
@@ -24,21 +24,9 @@ Just say which information you want to extract and the library will do it for yo
2424
<img src="https://raw.githubusercontent.com/VinciGit00/Scrapegraph-ai/main/docs/assets/sgai-hero.png" alt="ScrapeGraphAI Hero" style="width: 100%;">
2525
</p>
2626

27-
## 🔗 ScrapeGraph API & SDKs
28-
If you are looking for a quick solution to integrate ScrapeGraph in your system, check out our powerful API [here!](https://dashboard.scrapegraphai.com/login)
27+
## News 📰
2928

30-
<p align="center">
31-
<img src="https://raw.githubusercontent.com/VinciGit00/Scrapegraph-ai/main/docs/assets/api-banner.png" alt="ScrapeGraph API Banner" style="width: 100%;">
32-
</p>
33-
34-
We offer SDKs in both Python and Node.js, making it easy to integrate into your projects. Check them out below:
35-
36-
| SDK | Language | GitHub Link |
37-
|-----------|----------|-----------------------------------------------------------------------------|
38-
| Python SDK | Python | [scrapegraph-py](https://github.com/ScrapeGraphAI/scrapegraph-sdk/tree/main/scrapegraph-py) |
39-
| Node.js SDK | Node.js | [scrapegraph-js](https://github.com/ScrapeGraphAI/scrapegraph-sdk/tree/main/scrapegraph-js) |
40-
41-
The Official API Documentation can be found [here](https://docs.scrapegraphai.com/).
29+
- ScrapegraphAI has now his APIs! Check it out [here](https://scrapegraphai.com)!
4230

4331
## 🚀 Quick install
4432

@@ -99,8 +87,8 @@ graph_config = {
9987

10088
# Create the SmartScraperGraph instance
10189
smart_scraper_graph = SmartScraperGraph(
102-
prompt="Extract me all the news from the website",
103-
source="https://www.wired.com",
90+
prompt="Find some information about what does the company do, the name and a contact email.",
91+
source="https://scrapegraphai.com/",
10492
config=graph_config
10593
)
10694

@@ -112,20 +100,10 @@ print(json.dumps(result, indent=4))
112100
The output will be a dictionary like the following:
113101

114102
```python
115-
"result": {
116-
"news": [
117-
{
118-
"title": "The New Jersey Drone Mystery May Not Actually Be That Mysterious",
119-
"link": "https://www.wired.com/story/new-jersey-drone-mystery-maybe-not-drones/",
120-
"author": "Lily Hay Newman"
121-
},
122-
{
123-
"title": "Former ByteDance Intern Accused of Sabotage Among Winners of Prestigious AI Award",
124-
"link": "https://www.wired.com/story/bytedance-intern-best-paper-neurips/",
125-
"author": "Louise Matsakis"
126-
},
127-
...
128-
]
103+
{
104+
"company": "ScrapeGraphAI",
105+
"name": "ScrapeGraphAI Extracting content from websites and local documents using LLM",
106+
"contact_email": "[email protected]"
129107
}
130108
```
131109
There are other pipelines that can be used to extract information from multiple pages, generate Python scripts, or even generate audio files.
@@ -157,7 +135,8 @@ Try it directly on the web using Google Colab:
157135
## 📖 Documentation
158136

159137
The documentation for ScrapeGraphAI can be found [here](https://scrapegraph-ai.readthedocs.io/en/latest/).
160-
Check out also the Docusaurus [here](https://docs-oss.scrapegraphai.com/).
138+
139+
Check out also the Docusaurus [here](https://scrapegraph-doc.onrender.com/).
161140

162141
## 🏆 Sponsors
163142
<div style="text-align: center;">

examples/anthropic/.env.example

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
ANTHROPIC_API_KEY="YOUR ANTHROPIC API KEY"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
"""
2+
Basic example of scraping pipeline using Code Generator with schema
3+
"""
4+
import os, json
5+
from typing import List
6+
from dotenv import load_dotenv
7+
from pydantic import BaseModel, Field
8+
from scrapegraphai.graphs import CodeGeneratorGraph
9+
10+
load_dotenv()
11+
12+
# ************************************************
13+
# Define the output schema for the graph
14+
# ************************************************
15+
16+
class Project(BaseModel):
17+
title: str = Field(description="The title of the project")
18+
description: str = Field(description="The description of the project")
19+
20+
class Projects(BaseModel):
21+
projects: List[Project]
22+
23+
# ************************************************
24+
# Define the configuration for the graph
25+
# ************************************************
26+
27+
anthropic_key = os.getenv("ANTHROPIC_API_KEY")
28+
29+
graph_config = {
30+
"llm": {
31+
"api_key":anthropic_key,
32+
"model": "anthropic/claude-3-haiku-20240307",
33+
},
34+
"verbose": True,
35+
"headless": False,
36+
"reduction": 2,
37+
"max_iterations": {
38+
"overall": 10,
39+
"syntax": 3,
40+
"execution": 3,
41+
"validation": 3,
42+
"semantic": 3
43+
},
44+
"output_file_name": "extracted_data.py"
45+
}
46+
47+
# ************************************************
48+
# Create the SmartScraperGraph instance and run it
49+
# ************************************************
50+
51+
code_generator_graph = CodeGeneratorGraph(
52+
prompt="List me all the projects with their description",
53+
source="https://perinim.github.io/projects/",
54+
schema=Projects,
55+
config=graph_config
56+
)
57+
58+
result = code_generator_graph.run()
59+
print(result)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
"""
2+
Basic example of scraping pipeline using CSVScraperGraph from CSV documents
3+
"""
4+
import os
5+
from dotenv import load_dotenv
6+
import pandas as pd
7+
from scrapegraphai.graphs import CSVScraperGraph
8+
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
9+
10+
load_dotenv()
11+
12+
# ************************************************
13+
# Read the CSV file
14+
# ************************************************
15+
16+
FILE_NAME = "inputs/username.csv"
17+
curr_dir = os.path.dirname(os.path.realpath(__file__))
18+
file_path = os.path.join(curr_dir, FILE_NAME)
19+
20+
text = pd.read_csv(file_path)
21+
22+
# ************************************************
23+
# Define the configuration for the graph
24+
# ************************************************
25+
26+
# required environment variables in .env
27+
# HUGGINGFACEHUB_API_TOKEN
28+
# ANTHROPIC_API_KEY
29+
load_dotenv()
30+
31+
graph_config = {
32+
"llm": {
33+
"api_key": os.getenv("ANTHROPIC_API_KEY"),
34+
"model": "anthropic/claude-3-haiku-20240307",
35+
},
36+
}
37+
38+
# ************************************************
39+
# Create the CSVScraperGraph instance and run it
40+
# ************************************************
41+
42+
csv_scraper_graph = CSVScraperGraph(
43+
prompt="List me all the last names",
44+
source=str(text), # Pass the content of the file, not the file object
45+
config=graph_config
46+
)
47+
48+
result = csv_scraper_graph.run()
49+
print(result)
50+
51+
# ************************************************
52+
# Get graph execution info
53+
# ************************************************
54+
55+
graph_exec_info = csv_scraper_graph.get_execution_info()
56+
print(prettify_exec_info(graph_exec_info))
57+
58+
# Save to json or csv
59+
convert_to_csv(result, "result")
60+
convert_to_json(result, "result")
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
"""
2+
Basic example of scraping pipeline using CSVScraperMultiGraph from CSV documents
3+
"""
4+
import os
5+
from dotenv import load_dotenv
6+
import pandas as pd
7+
from scrapegraphai.graphs import CSVScraperMultiGraph
8+
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
9+
10+
load_dotenv()
11+
# ************************************************
12+
# Read the CSV file
13+
# ************************************************
14+
15+
FILE_NAME = "inputs/username.csv"
16+
curr_dir = os.path.dirname(os.path.realpath(__file__))
17+
file_path = os.path.join(curr_dir, FILE_NAME)
18+
19+
text = pd.read_csv(file_path)
20+
21+
# ************************************************
22+
# Define the configuration for the graph
23+
# ************************************************
24+
25+
graph_config = {
26+
"llm": {
27+
"api_key": os.getenv("ANTHROPIC_API_KEY"),
28+
"model": "anthropic/claude-3-haiku-20240307",
29+
},
30+
}
31+
32+
# ************************************************
33+
# Create the CSVScraperMultiGraph instance and run it
34+
# ************************************************
35+
36+
csv_scraper_graph = CSVScraperMultiGraph(
37+
prompt="List me all the last names",
38+
source=[str(text), str(text)],
39+
config=graph_config
40+
)
41+
42+
result = csv_scraper_graph.run()
43+
print(result)
44+
45+
# ************************************************
46+
# Get graph execution info
47+
# ************************************************
48+
49+
graph_exec_info = csv_scraper_graph.get_execution_info()
50+
print(prettify_exec_info(graph_exec_info))
51+
52+
# Save to json or csv
53+
convert_to_csv(result, "result")
54+
convert_to_json(result, "result")
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
"""
2+
Example of custom graph using existing nodes
3+
"""
4+
import os
5+
from dotenv import load_dotenv
6+
from langchain_anthropic import ChatAnthropic
7+
from scrapegraphai.graphs import BaseGraph
8+
from scrapegraphai.nodes import FetchNode, ParseNode, GenerateAnswerNode, RobotsNode
9+
load_dotenv()
10+
11+
# ************************************************
12+
# Define the configuration for the graph
13+
# ************************************************
14+
15+
graph_config = {
16+
"llm": {
17+
"api_key": os.getenv("ANTHROPIC_API_KEY"),
18+
"model": "claude-3-haiku-20240307",
19+
},
20+
}
21+
22+
# ************************************************
23+
# Define the graph nodes
24+
# ************************************************
25+
26+
llm_model = ChatAnthropic(graph_config["llm"])
27+
28+
# define the nodes for the graph
29+
robot_node = RobotsNode(
30+
input="url",
31+
output=["is_scrapable"],
32+
node_config={
33+
"llm_model": llm_model,
34+
"force_scraping": True,
35+
"verbose": True,
36+
}
37+
)
38+
39+
fetch_node = FetchNode(
40+
input="url | local_dir",
41+
output=["doc"],
42+
node_config={
43+
"verbose": True,
44+
"headless": True,
45+
}
46+
)
47+
parse_node = ParseNode(
48+
input="doc",
49+
output=["parsed_doc"],
50+
node_config={
51+
"chunk_size": 4096,
52+
"verbose": True,
53+
}
54+
)
55+
generate_answer_node = GenerateAnswerNode(
56+
input="user_prompt & (relevant_chunks | parsed_doc | doc)",
57+
output=["answer"],
58+
node_config={
59+
"llm_model": llm_model,
60+
"verbose": True,
61+
}
62+
)
63+
64+
# ************************************************
65+
# Create the graph by defining the connections
66+
# ************************************************
67+
68+
graph = BaseGraph(
69+
nodes=[
70+
robot_node,
71+
fetch_node,
72+
parse_node,
73+
generate_answer_node,
74+
],
75+
edges=[
76+
(robot_node, fetch_node),
77+
(fetch_node, parse_node),
78+
(parse_node, generate_answer_node)
79+
],
80+
entry_point=robot_node
81+
)
82+
83+
# ************************************************
84+
# Execute the graph
85+
# ************************************************
86+
87+
result, execution_info = graph.execute({
88+
"user_prompt": "Describe the content",
89+
"url": "https://example.com/"
90+
})
91+
92+
# get the answer from the result
93+
result = result.get("answer", "No answer found.")
94+
print(result)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
"""
2+
depth_search_graph_opeani example
3+
"""
4+
import os
5+
from dotenv import load_dotenv
6+
from scrapegraphai.graphs import DepthSearchGraph
7+
8+
load_dotenv()
9+
10+
graph_config = {
11+
"llm": {
12+
"api_key": os.getenv("ANTHROPIC_API_KEY"),
13+
"model": "anthropic/claude-3-haiku-20240307",
14+
},
15+
"verbose": True,
16+
"headless": False,
17+
"depth": 2,
18+
"only_inside_links": False,
19+
}
20+
21+
search_graph = DepthSearchGraph(
22+
prompt="List me all the projects with their description",
23+
source="https://perinim.github.io",
24+
config=graph_config
25+
)
26+
27+
result = search_graph.run()
28+
print(result)

0 commit comments

Comments
 (0)