ScrapeGraphAI
diff --git a/‎examples/code_generator_graph/.env.example
Lines changed: 14 additions & 0 deletions b/‎examples/code_generator_graph/.env.example
Lines changed: 14 additions & 0 deletions
diff --git a/‎examples/code_generator_graph/README.md
Lines changed: 30 additions & 0 deletions b/‎examples/code_generator_graph/README.md
Lines changed: 30 additions & 0 deletions
diff --git a/‎examples/code_generator_graph/ollama/code_generator_graph_ollama.py
Lines changed: 61 additions & 0 deletions b/‎examples/code_generator_graph/ollama/code_generator_graph_ollama.py
Lines changed: 61 additions & 0 deletions
diff --git a/‎examples/code_generator_graph/openai/code_generator_graph_openai.py
Lines changed: 59 additions & 0 deletions b/‎examples/code_generator_graph/openai/code_generator_graph_openai.py
Lines changed: 59 additions & 0 deletions
diff --git a/‎examples/csv_scraper_graph/.env.example
Lines changed: 11 additions & 0 deletions b/‎examples/csv_scraper_graph/.env.example
Lines changed: 11 additions & 0 deletions
diff --git a/‎examples/csv_scraper_graph/README.md
Lines changed: 30 additions & 0 deletions b/‎examples/csv_scraper_graph/README.md
Lines changed: 30 additions & 0 deletions
diff --git a/‎examples/csv_scraper_graph/ollama/csv_scraper_graph_multi_ollama.py
Lines changed: 62 additions & 0 deletions b/‎examples/csv_scraper_graph/ollama/csv_scraper_graph_multi_ollama.py
Lines changed: 62 additions & 0 deletions
diff --git a/‎examples/csv_scraper_graph/ollama/csv_scraper_ollama.py
Lines changed: 62 additions & 0 deletions b/‎examples/csv_scraper_graph/ollama/csv_scraper_ollama.py
Lines changed: 62 additions & 0 deletions
diff --git a/‎examples/csv_scraper_graph/ollama/inputs/username.csv
Lines changed: 7 additions & 0 deletions b/‎examples/csv_scraper_graph/ollama/inputs/username.csv
Lines changed: 7 additions & 0 deletions
diff --git a/‎examples/anthropic/csv_scraper_graph_multi_anthropic.py renamed to ‎examples/csv_scraper_graph/openai/csv_scraper_graph_multi_openai.py
Lines changed: 11 additions & 6 deletions b/‎examples/anthropic/csv_scraper_graph_multi_anthropic.py renamed to ‎examples/csv_scraper_graph/openai/csv_scraper_graph_multi_openai.py
Lines changed: 11 additions & 6 deletions
@@ -0,0 +1,14 @@
+# OpenAI API Configuration
+OPENAI_API_KEY=your-openai-api-key-here
+
+# Optional Configurations
+MAX_TOKENS=4000
+MODEL_NAME=gpt-4-1106-preview
+TEMPERATURE=0.7
+
+# Code Generator Settings
+DEFAULT_LANGUAGE=python
+GENERATE_TESTS=true
+ADD_DOCUMENTATION=true
+CODE_STYLE=pep8
+TYPE_CHECKING=true 
@@ -0,0 +1,30 @@
+# Code Generator Graph Example
+
+This example demonstrates how to use Scrapegraph-ai to generate code based on specifications and requirements.
+
+## Features
+
+- Code generation from specifications
+- Multiple programming languages support
+- Code documentation
+- Best practices implementation
+
+## Setup
+
+1. Install required dependencies
+2. Copy `.env.example` to `.env`
+3. Configure your API keys in the `.env` file
+
+## Usage
+
+```python
+from scrapegraphai.graphs import CodeGeneratorGraph
+
+graph = CodeGeneratorGraph()
+code = graph.generate("code specification")
+```
+
+## Environment Variables
+
+Required environment variables:
+- `OPENAI_API_KEY`: Your OpenAI API key 
@@ -0,0 +1,61 @@
+""" 
+Basic example of scraping pipeline using Code Generator with schema
+"""
+
+import json
+from typing import List
+from dotenv import load_dotenv
+from pydantic import BaseModel, Field
+from scrapegraphai.graphs import CodeGeneratorGraph
+
+load_dotenv()
+
+# ************************************************
+# Define the output schema for the graph
+# ************************************************
+
+class Project(BaseModel):
+    title: str = Field(description="The title of the project")
+    description: str = Field(description="The description of the project")
+
+class Projects(BaseModel):
+    projects: List[Project]
+
+# ************************************************
+# Define the configuration for the graph
+# ************************************************
+
+
+graph_config = {
+    "llm": {
+        "model": "ollama/llama3",
+        "temperature": 0,
+        "format": "json",
+        "base_url": "http://localhost:11434",
+    },
+    "verbose": True,
+    "headless": False,
+    "reduction": 2,
+    "max_iterations": {
+        "overall": 10,
+        "syntax": 3,
+        "execution": 3,
+        "validation": 3,
+        "semantic": 3
+    },
+    "output_file_name": "extracted_data.py"
+}
+
+# ************************************************
+# Create the SmartScraperGraph instance and run it
+# ************************************************
+
+code_generator_graph = CodeGeneratorGraph(
+    prompt="List me all the projects with their description",
+    source="https://perinim.github.io/projects/",
+    schema=Projects,
+    config=graph_config
+)
+
+result = code_generator_graph.run()
+print(result)
@@ -0,0 +1,59 @@
+""" 
+Basic example of scraping pipeline using Code Generator with schema
+"""
+import os
+from typing import List
+from dotenv import load_dotenv
+from pydantic import BaseModel, Field
+from scrapegraphai.graphs import CodeGeneratorGraph
+
+load_dotenv()
+
+# ************************************************
+# Define the output schema for the graph
+# ************************************************
+
+class Project(BaseModel):
+    title: str = Field(description="The title of the project")
+    description: str = Field(description="The description of the project")
+
+class Projects(BaseModel):
+    projects: List[Project]
+
+# ************************************************
+# Define the configuration for the graph
+# ************************************************
+
+openai_key = os.getenv("OPENAI_APIKEY")
+
+graph_config = {
+    "llm": {
+        "api_key":openai_key,
+        "model": "openai/gpt-4o-mini",
+    },
+    "verbose": True,
+    "headless": False,
+    "reduction": 2,
+    "max_iterations": {
+        "overall": 10,
+        "syntax": 3,
+        "execution": 3,
+        "validation": 3,
+        "semantic": 3
+    },
+    "output_file_name": "extracted_data.py"
+}
+
+# ************************************************
+# Create the SmartScraperGraph instance and run it
+# ************************************************
+
+code_generator_graph = CodeGeneratorGraph(
+    prompt="List me all the projects with their description",
+    source="https://perinim.github.io/projects/",
+    schema=Projects,
+    config=graph_config
+)
+
+result = code_generator_graph.run()
+print(result)
@@ -0,0 +1,11 @@
+# OpenAI API Configuration
+OPENAI_API_KEY=your-openai-api-key-here
+
+# Optional Configurations
+MAX_TOKENS=4000
+MODEL_NAME=gpt-4-1106-preview
+TEMPERATURE=0.7
+
+# CSV Scraper Settings
+CSV_DELIMITER=,
+MAX_ROWS=1000 
@@ -0,0 +1,30 @@
+# CSV Scraper Graph Example
+
+This example demonstrates how to use Scrapegraph-ai to extract data from web sources and save it in CSV format.
+
+## Features
+
+- Table data extraction
+- CSV formatting
+- Data cleaning
+- Structured output
+
+## Setup
+
+1. Install required dependencies
+2. Copy `.env.example` to `.env`
+3. Configure your API keys in the `.env` file
+
+## Usage
+
+```python
+from scrapegraphai.graphs import CsvScraperGraph
+
+graph = CsvScraperGraph()
+csv_data = graph.scrape("https://example.com/table")
+```
+
+## Environment Variables
+
+Required environment variables:
+- `OPENAI_API_KEY`: Your OpenAI API key 
@@ -0,0 +1,62 @@
+"""
+Basic example of scraping pipeline using CSVScraperMultiGraph from CSV documents
+"""
+
+import os
+import pandas as pd
+from scrapegraphai.graphs import CSVScraperMultiGraph
+from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
+
+# ************************************************
+# Read the CSV file
+# ************************************************
+
+FILE_NAME = "inputs/username.csv"
+curr_dir = os.path.dirname(os.path.realpath(__file__))
+file_path = os.path.join(curr_dir, FILE_NAME)
+
+text = pd.read_csv(file_path)
+
+# ************************************************
+# Define the configuration for the graph
+# ************************************************
+
+graph_config = {
+    "llm": {
+        "model": "ollama/llama3",
+        "temperature": 0,
+        "format": "json",  # Ollama needs the format to be specified explicitly
+        # "model_tokens": 2000, # set context length arbitrarily
+        "base_url": "http://localhost:11434",
+    },
+    "embeddings": {
+        "model": "ollama/nomic-embed-text",
+        "temperature": 0,
+        "base_url": "http://localhost:11434",
+    },
+    "verbose": True,
+}
+
+# ************************************************
+# Create the CSVScraperMultiGraph instance and run it
+# ************************************************
+
+csv_scraper_graph = CSVScraperMultiGraph(
+    prompt="List me all the last names",
+    source=[str(text), str(text)],
+    config=graph_config
+)
+
+result = csv_scraper_graph.run()
+print(result)
+
+# ************************************************
+# Get graph execution info
+# ************************************************
+
+graph_exec_info = csv_scraper_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
+
+# Save to json or csv
+convert_to_csv(result, "result")
+convert_to_json(result, "result")
@@ -0,0 +1,62 @@
+"""
+Basic example of scraping pipeline using CSVScraperGraph from CSV documents
+"""
+
+import os
+import pandas as pd
+from scrapegraphai.graphs import CSVScraperGraph
+from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
+
+# ************************************************
+# Read the CSV file
+# ************************************************
+
+FILE_NAME = "inputs/username.csv"
+curr_dir = os.path.dirname(os.path.realpath(__file__))
+file_path = os.path.join(curr_dir, FILE_NAME)
+
+text = pd.read_csv(file_path)
+
+# ************************************************
+# Define the configuration for the graph
+# ************************************************
+
+graph_config = {
+    "llm": {
+        "model": "ollama/llama3",
+        "temperature": 0,
+        "format": "json",  # Ollama needs the format to be specified explicitly
+        # "model_tokens": 2000, # set context length arbitrarily
+        "base_url": "http://localhost:11434",
+    },
+    "embeddings": {
+        "model": "ollama/nomic-embed-text",
+        "temperature": 0,
+        "base_url": "http://localhost:11434",
+    },
+    "verbose": True,
+}
+
+# ************************************************
+# Create the CSVScraperGraph instance and run it
+# ************************************************
+
+csv_scraper_graph = CSVScraperGraph(
+    prompt="List me all the last names",
+    source=str(text),  # Pass the content of the file, not the file object
+    config=graph_config
+)
+
+result = csv_scraper_graph.run()
+print(result)
+
+# ************************************************
+# Get graph execution info
+# ************************************************
+
+graph_exec_info = csv_scraper_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
+
+# Save to json or csv
+convert_to_csv(result, "result")
+convert_to_json(result, "result")
@@ -0,0 +1,7 @@
+Username; Identifier;First name;Last name
+booker12;9012;Rachel;Booker
+grey07;2070;Laura;Grey
+johnson81;4081;Craig;Johnson
+jenkins46;9346;Mary;Jenkins
+smith79;5079;Jamie;Smith
+
@@ -3,8 +3,9 @@
 """
 import os
 from dotenv import load_dotenv
+import pandas as pd
 from scrapegraphai.graphs import CSVScraperMultiGraph
-from scrapegraphai.utils import prettify_exec_info
+from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
 
 load_dotenv()
 # ************************************************
@@ -15,17 +16,17 @@
 curr_dir = os.path.dirname(os.path.realpath(__file__))
 file_path = os.path.join(curr_dir, FILE_NAME)
 
-with open(file_path, 'r') as file:
-    text = file.read()
+text = pd.read_csv(file_path)
 
 # ************************************************
 # Define the configuration for the graph
 # ************************************************
+openai_key = os.getenv("OPENAI_APIKEY")
 
 graph_config = {
-    "llm": {
-        "api_key": os.getenv("ANTHROPIC_API_KEY"),
-        "model": "anthropic/claude-3-haiku-20240307",
+     "llm": {
+        "api_key": openai_key,
+        "model": "openai/gpt-4o",
     },
 }
 
@@ -48,3 +49,7 @@
 
 graph_exec_info = csv_scraper_graph.get_execution_info()
 print(prettify_exec_info(graph_exec_info))
+
+# Save to json or csv
+convert_to_csv(result, "result")
+convert_to_json(result, "result")