Skip to content

Commit 0b582be

Browse files
committed
refactoring examples
1 parent 6797947 commit 0b582be

File tree

80 files changed

+3374
-17
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

80 files changed

+3374
-17
lines changed
+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# OpenAI API Configuration
2+
OPENAI_API_KEY=your-openai-api-key-here
3+
4+
# Optional Configurations
5+
MAX_TOKENS=4000
6+
MODEL_NAME=gpt-4-1106-preview
7+
TEMPERATURE=0.7
8+
9+
# Code Generator Settings
10+
DEFAULT_LANGUAGE=python
11+
GENERATE_TESTS=true
12+
ADD_DOCUMENTATION=true
13+
CODE_STYLE=pep8
14+
TYPE_CHECKING=true
+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# Code Generator Graph Example
2+
3+
This example demonstrates how to use Scrapegraph-ai to generate code based on specifications and requirements.
4+
5+
## Features
6+
7+
- Code generation from specifications
8+
- Multiple programming languages support
9+
- Code documentation
10+
- Best practices implementation
11+
12+
## Setup
13+
14+
1. Install required dependencies
15+
2. Copy `.env.example` to `.env`
16+
3. Configure your API keys in the `.env` file
17+
18+
## Usage
19+
20+
```python
21+
from scrapegraphai.graphs import CodeGeneratorGraph
22+
23+
graph = CodeGeneratorGraph()
24+
code = graph.generate("code specification")
25+
```
26+
27+
## Environment Variables
28+
29+
Required environment variables:
30+
- `OPENAI_API_KEY`: Your OpenAI API key
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
"""
2+
Basic example of scraping pipeline using Code Generator with schema
3+
"""
4+
5+
import json
6+
from typing import List
7+
from dotenv import load_dotenv
8+
from pydantic import BaseModel, Field
9+
from scrapegraphai.graphs import CodeGeneratorGraph
10+
11+
load_dotenv()
12+
13+
# ************************************************
14+
# Define the output schema for the graph
15+
# ************************************************
16+
17+
class Project(BaseModel):
18+
title: str = Field(description="The title of the project")
19+
description: str = Field(description="The description of the project")
20+
21+
class Projects(BaseModel):
22+
projects: List[Project]
23+
24+
# ************************************************
25+
# Define the configuration for the graph
26+
# ************************************************
27+
28+
29+
graph_config = {
30+
"llm": {
31+
"model": "ollama/llama3",
32+
"temperature": 0,
33+
"format": "json",
34+
"base_url": "http://localhost:11434",
35+
},
36+
"verbose": True,
37+
"headless": False,
38+
"reduction": 2,
39+
"max_iterations": {
40+
"overall": 10,
41+
"syntax": 3,
42+
"execution": 3,
43+
"validation": 3,
44+
"semantic": 3
45+
},
46+
"output_file_name": "extracted_data.py"
47+
}
48+
49+
# ************************************************
50+
# Create the SmartScraperGraph instance and run it
51+
# ************************************************
52+
53+
code_generator_graph = CodeGeneratorGraph(
54+
prompt="List me all the projects with their description",
55+
source="https://perinim.github.io/projects/",
56+
schema=Projects,
57+
config=graph_config
58+
)
59+
60+
result = code_generator_graph.run()
61+
print(result)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
"""
2+
Basic example of scraping pipeline using Code Generator with schema
3+
"""
4+
import os
5+
from typing import List
6+
from dotenv import load_dotenv
7+
from pydantic import BaseModel, Field
8+
from scrapegraphai.graphs import CodeGeneratorGraph
9+
10+
load_dotenv()
11+
12+
# ************************************************
13+
# Define the output schema for the graph
14+
# ************************************************
15+
16+
class Project(BaseModel):
17+
title: str = Field(description="The title of the project")
18+
description: str = Field(description="The description of the project")
19+
20+
class Projects(BaseModel):
21+
projects: List[Project]
22+
23+
# ************************************************
24+
# Define the configuration for the graph
25+
# ************************************************
26+
27+
openai_key = os.getenv("OPENAI_APIKEY")
28+
29+
graph_config = {
30+
"llm": {
31+
"api_key":openai_key,
32+
"model": "openai/gpt-4o-mini",
33+
},
34+
"verbose": True,
35+
"headless": False,
36+
"reduction": 2,
37+
"max_iterations": {
38+
"overall": 10,
39+
"syntax": 3,
40+
"execution": 3,
41+
"validation": 3,
42+
"semantic": 3
43+
},
44+
"output_file_name": "extracted_data.py"
45+
}
46+
47+
# ************************************************
48+
# Create the SmartScraperGraph instance and run it
49+
# ************************************************
50+
51+
code_generator_graph = CodeGeneratorGraph(
52+
prompt="List me all the projects with their description",
53+
source="https://perinim.github.io/projects/",
54+
schema=Projects,
55+
config=graph_config
56+
)
57+
58+
result = code_generator_graph.run()
59+
print(result)
+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# OpenAI API Configuration
2+
OPENAI_API_KEY=your-openai-api-key-here
3+
4+
# Optional Configurations
5+
MAX_TOKENS=4000
6+
MODEL_NAME=gpt-4-1106-preview
7+
TEMPERATURE=0.7
8+
9+
# CSV Scraper Settings
10+
CSV_DELIMITER=,
11+
MAX_ROWS=1000

examples/csv_scraper_graph/README.md

+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# CSV Scraper Graph Example
2+
3+
This example demonstrates how to use Scrapegraph-ai to extract data from web sources and save it in CSV format.
4+
5+
## Features
6+
7+
- Table data extraction
8+
- CSV formatting
9+
- Data cleaning
10+
- Structured output
11+
12+
## Setup
13+
14+
1. Install required dependencies
15+
2. Copy `.env.example` to `.env`
16+
3. Configure your API keys in the `.env` file
17+
18+
## Usage
19+
20+
```python
21+
from scrapegraphai.graphs import CsvScraperGraph
22+
23+
graph = CsvScraperGraph()
24+
csv_data = graph.scrape("https://example.com/table")
25+
```
26+
27+
## Environment Variables
28+
29+
Required environment variables:
30+
- `OPENAI_API_KEY`: Your OpenAI API key
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
"""
2+
Basic example of scraping pipeline using CSVScraperMultiGraph from CSV documents
3+
"""
4+
5+
import os
6+
import pandas as pd
7+
from scrapegraphai.graphs import CSVScraperMultiGraph
8+
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
9+
10+
# ************************************************
11+
# Read the CSV file
12+
# ************************************************
13+
14+
FILE_NAME = "inputs/username.csv"
15+
curr_dir = os.path.dirname(os.path.realpath(__file__))
16+
file_path = os.path.join(curr_dir, FILE_NAME)
17+
18+
text = pd.read_csv(file_path)
19+
20+
# ************************************************
21+
# Define the configuration for the graph
22+
# ************************************************
23+
24+
graph_config = {
25+
"llm": {
26+
"model": "ollama/llama3",
27+
"temperature": 0,
28+
"format": "json", # Ollama needs the format to be specified explicitly
29+
# "model_tokens": 2000, # set context length arbitrarily
30+
"base_url": "http://localhost:11434",
31+
},
32+
"embeddings": {
33+
"model": "ollama/nomic-embed-text",
34+
"temperature": 0,
35+
"base_url": "http://localhost:11434",
36+
},
37+
"verbose": True,
38+
}
39+
40+
# ************************************************
41+
# Create the CSVScraperMultiGraph instance and run it
42+
# ************************************************
43+
44+
csv_scraper_graph = CSVScraperMultiGraph(
45+
prompt="List me all the last names",
46+
source=[str(text), str(text)],
47+
config=graph_config
48+
)
49+
50+
result = csv_scraper_graph.run()
51+
print(result)
52+
53+
# ************************************************
54+
# Get graph execution info
55+
# ************************************************
56+
57+
graph_exec_info = csv_scraper_graph.get_execution_info()
58+
print(prettify_exec_info(graph_exec_info))
59+
60+
# Save to json or csv
61+
convert_to_csv(result, "result")
62+
convert_to_json(result, "result")
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
"""
2+
Basic example of scraping pipeline using CSVScraperGraph from CSV documents
3+
"""
4+
5+
import os
6+
import pandas as pd
7+
from scrapegraphai.graphs import CSVScraperGraph
8+
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
9+
10+
# ************************************************
11+
# Read the CSV file
12+
# ************************************************
13+
14+
FILE_NAME = "inputs/username.csv"
15+
curr_dir = os.path.dirname(os.path.realpath(__file__))
16+
file_path = os.path.join(curr_dir, FILE_NAME)
17+
18+
text = pd.read_csv(file_path)
19+
20+
# ************************************************
21+
# Define the configuration for the graph
22+
# ************************************************
23+
24+
graph_config = {
25+
"llm": {
26+
"model": "ollama/llama3",
27+
"temperature": 0,
28+
"format": "json", # Ollama needs the format to be specified explicitly
29+
# "model_tokens": 2000, # set context length arbitrarily
30+
"base_url": "http://localhost:11434",
31+
},
32+
"embeddings": {
33+
"model": "ollama/nomic-embed-text",
34+
"temperature": 0,
35+
"base_url": "http://localhost:11434",
36+
},
37+
"verbose": True,
38+
}
39+
40+
# ************************************************
41+
# Create the CSVScraperGraph instance and run it
42+
# ************************************************
43+
44+
csv_scraper_graph = CSVScraperGraph(
45+
prompt="List me all the last names",
46+
source=str(text), # Pass the content of the file, not the file object
47+
config=graph_config
48+
)
49+
50+
result = csv_scraper_graph.run()
51+
print(result)
52+
53+
# ************************************************
54+
# Get graph execution info
55+
# ************************************************
56+
57+
graph_exec_info = csv_scraper_graph.get_execution_info()
58+
print(prettify_exec_info(graph_exec_info))
59+
60+
# Save to json or csv
61+
convert_to_csv(result, "result")
62+
convert_to_json(result, "result")
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
Username; Identifier;First name;Last name
2+
booker12;9012;Rachel;Booker
3+
grey07;2070;Laura;Grey
4+
johnson81;4081;Craig;Johnson
5+
jenkins46;9346;Mary;Jenkins
6+
smith79;5079;Jamie;Smith
7+

examples/anthropic/csv_scraper_graph_multi_anthropic.py renamed to examples/csv_scraper_graph/openai/csv_scraper_graph_multi_openai.py

+11-6
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,9 @@
33
"""
44
import os
55
from dotenv import load_dotenv
6+
import pandas as pd
67
from scrapegraphai.graphs import CSVScraperMultiGraph
7-
from scrapegraphai.utils import prettify_exec_info
8+
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
89

910
load_dotenv()
1011
# ************************************************
@@ -15,17 +16,17 @@
1516
curr_dir = os.path.dirname(os.path.realpath(__file__))
1617
file_path = os.path.join(curr_dir, FILE_NAME)
1718

18-
with open(file_path, 'r') as file:
19-
text = file.read()
19+
text = pd.read_csv(file_path)
2020

2121
# ************************************************
2222
# Define the configuration for the graph
2323
# ************************************************
24+
openai_key = os.getenv("OPENAI_APIKEY")
2425

2526
graph_config = {
26-
"llm": {
27-
"api_key": os.getenv("ANTHROPIC_API_KEY"),
28-
"model": "anthropic/claude-3-haiku-20240307",
27+
"llm": {
28+
"api_key": openai_key,
29+
"model": "openai/gpt-4o",
2930
},
3031
}
3132

@@ -48,3 +49,7 @@
4849

4950
graph_exec_info = csv_scraper_graph.get_execution_info()
5051
print(prettify_exec_info(graph_exec_info))
52+
53+
# Save to json or csv
54+
convert_to_csv(result, "result")
55+
convert_to_json(result, "result")

0 commit comments

Comments
 (0)