ScrapeGraphAI
diff --git a/‎.github/FUNDING.yml
+1-1 b/‎.github/FUNDING.yml
+1-1
diff --git a/‎.github/ISSUE_TEMPLATE/custom.md
-2 b/‎.github/ISSUE_TEMPLATE/custom.md
-2
diff --git a/‎.github/workflows/release.yml
+13-13 b/‎.github/workflows/release.yml
+13-13
diff --git a/‎.readthedocs.yaml
+2-1 b/‎.readthedocs.yaml
+2-1
diff --git a/‎.releaserc.yml
-1 b/‎.releaserc.yml
-1
diff --git a/‎Dockerfile
+1-1 b/‎Dockerfile
+1-1
diff --git a/‎LICENSE
+1-1 b/‎LICENSE
+1-1
diff --git a/‎README.md
+1-1 b/‎README.md
+1-1
diff --git a/‎SECURITY.md
-1 b/‎SECURITY.md
-1
diff --git a/‎docs/README.md
+1-1 b/‎docs/README.md
+1-1
diff --git a/‎docs/requirements-dev.txt
+7 b/‎docs/requirements-dev.txt
+7
diff --git a/‎docs/russian.md
+1-1 b/‎docs/russian.md
+1-1
diff --git a/‎docs/source/conf.py
+9-10 b/‎docs/source/conf.py
+9-10
diff --git a/‎docs/source/getting_started/examples.rst
+1-1 b/‎docs/source/getting_started/examples.rst
+1-1
diff --git a/‎docs/source/getting_started/installation.rst
+2-4 b/‎docs/source/getting_started/installation.rst
+2-4
diff --git a/‎docs/source/index.rst
+1-1 b/‎docs/source/index.rst
+1-1
diff --git a/‎docs/source/introduction/overview.rst
+10-28 b/‎docs/source/introduction/overview.rst
+10-28
diff --git a/‎docs/source/modules/modules.rst
-1 b/‎docs/source/modules/modules.rst
-1
diff --git a/‎docs/source/modules/scrapegraphai.helpers.models_tokens.rst
+1-1 b/‎docs/source/modules/scrapegraphai.helpers.models_tokens.rst
+1-1
diff --git a/‎docs/source/scrapers/llm.rst
+5-6 b/‎docs/source/scrapers/llm.rst
+5-6
diff --git a/‎examples/ScrapegraphAI_cookbook.ipynb
+1-1 b/‎examples/ScrapegraphAI_cookbook.ipynb
+1-1
diff --git a/‎examples/code_generator_graph/.env.example
+1-1 b/‎examples/code_generator_graph/.env.example
+1-1
diff --git a/‎examples/code_generator_graph/README.md
+1-1 b/‎examples/code_generator_graph/README.md
+1-1
@@ -12,4 +12,4 @@ lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cl
 polar: # Replace with a single Polar username
 buy_me_a_coffee: # Replace with a single Buy Me a Coffee username
 thanks_dev: # Replace with a single thanks.dev username
-custom: 
+custom:
@@ -6,5 +6,3 @@ labels: ''
 assignees: ''
 
 ---
-
-
@@ -19,21 +19,21 @@ jobs:
         uses: actions/setup-python@v5
         with:
           python-version: '3.10'
-          
+
       - name: Install uv
         uses: astral-sh/setup-uv@v3
-        
+
       - name: Install Node Env
         uses: actions/setup-node@v4
         with:
           node-version: 20
-          
+
       - name: Checkout
         uses: actions/[email protected]
         with:
           fetch-depth: 0
           persist-credentials: false
-          
+
       - name: Build and validate package
         run: |
           uv venv
@@ -44,10 +44,10 @@ jobs:
           uv build
           uv pip install --upgrade pkginfo==1.12.0 twine==6.0.1  # Upgrade pkginfo and install twine
           python -m twine check dist/*
-          
+
       - name: Debug Dist Directory
         run: ls -al dist
-        
+
       - name: Cache build
         uses: actions/cache@v3
         with:
@@ -59,7 +59,7 @@ jobs:
     runs-on: ubuntu-latest
     needs: build
     environment: development
-    if: > 
+    if: >
         github.event_name == 'push' && (github.ref == 'refs/heads/main' || github.ref == 'refs/heads/pre/beta') ||
         (github.event_name == 'pull_request' && github.event.action == 'closed' && github.event.pull_request.merged &&
          (github.event.pull_request.base.ref == 'main' || github.event.pull_request.base.ref == 'pre/beta'))
@@ -74,23 +74,23 @@ jobs:
         with:
           fetch-depth: 0
           persist-credentials: false
-          
+
       - name: Restore build artifacts
         uses: actions/cache@v3
         with:
           path: ./dist
           key: ${{ runner.os }}-build-${{ github.sha }}
-          
+
       - name: Semantic Release
         uses: cycjimmy/[email protected]
         with:
           semantic_version: 23
           extra_plugins: |
             semantic-release-pypi@3
-            @semantic-release/git 
-            @semantic-release/commit-analyzer@12 
-            @semantic-release/release-notes-generator@13 
-            @semantic-release/github@10 
+            @semantic-release/git
+            @semantic-release/commit-analyzer@12
+            @semantic-release/release-notes-generator@13
+            @semantic-release/github@10
             @semantic-release/changelog@6
             conventional-changelog-conventionalcommits@7
         env:
 
@@ -1,3 +1,4 @@
+
 # Read the Docs configuration file for Sphinx projects
 # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 
@@ -32,4 +33,4 @@ sphinx:
 # See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
 # python:
 #   install:
-#     - requirements: docs/requirements.txt
+#     - requirements: docs/requirements.txt
@@ -53,4 +53,3 @@ branches:
     channel: "dev"
     prerelease: "beta"
 debug: true
-
@@ -6,4 +6,4 @@ RUN pip install --no-cache-dir scrapegraphai
 RUN pip install --no-cache-dir scrapegraphai[burr]
 
 RUN python3 -m playwright install-deps
-RUN python3 -m playwright install
+RUN python3 -m playwright install
@@ -4,4 +4,4 @@ Permission is hereby granted, free of charge, to any person obtaining a copy of
 
 The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 
-THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -182,7 +182,7 @@ The Official API Documentation can be found [here](https://docs.scrapegraphai.co
   </a>
 </div>
 
-## 📈 Telemetry 
+## 📈 Telemetry
 We collect anonymous usage metrics to enhance our package's quality and user experience. The data helps us prioritize improvements and ensure compatibility. If you wish to opt-out, set the environment variable SCRAPEGRAPHAI_TELEMETRY_ENABLED=false. For more information, please refer to the documentation [here](https://scrapegraph-ai.readthedocs.io/en/latest/scrapers/telemetry.html).
 
 
 
@@ -3,4 +3,3 @@
 ## Reporting a Vulnerability
 
 For reporting a vulnerability contact directly [email protected]
-
@@ -55,7 +55,7 @@ markmap:
     - Use Selenium or Playwright to take screenshots
     - Use LLM to asses if it is a block-like page, paragraph-like page, etc.
     - [Issue #88](https://github.com/VinciGit00/Scrapegraph-ai/issues/88)
-    
+
 ## **Long-Term Goals**
 
 - Automatic generation of scraping pipelines from a given prompt
 
@@ -0,0 +1,7 @@
+sphinx>=7.1.2
+sphinx-rtd-theme>=1.3.0
+myst-parser>=2.0.0
+sphinx-copybutton>=0.5.2
+sphinx-design>=0.5.0
+sphinx-autodoc-typehints>=1.25.2
+sphinx-autoapi>=3.0.0 
@@ -228,4 +228,4 @@ ScrapeGraphAI лицензирован под MIT License. Подробнее с
 ## Благодарности
 
 - Мы хотели бы поблагодарить всех участников проекта и сообщество с открытым исходным кодом за их поддержку.
-- ScrapeGraphAI предназначен только для исследования данных и научных целей. Мы не несем ответственности за неправильное использование библиотеки.
+- ScrapeGraphAI предназначен только для исследования данных и научных целей. Мы не несем ответственности за неправильное использование библиотеки.
@@ -12,31 +12,30 @@
 import sys
 
 # import all the modules
-sys.path.insert(0, os.path.abspath('../../'))
+sys.path.insert(0, os.path.abspath("../../"))
 
-project = 'ScrapeGraphAI'
-copyright = '2024, ScrapeGraphAI'
-author = 'Marco Vinciguerra, Marco Perini, Lorenzo Padoan'
+project = "ScrapeGraphAI"
+copyright = "2024, ScrapeGraphAI"
+author = "Marco Vinciguerra, Marco Perini, Lorenzo Padoan"
 
 html_last_updated_fmt = "%b %d, %Y"
 
 # -- General configuration ---------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
 
-extensions = ['sphinx.ext.autodoc', 'sphinx.ext.napoleon']
+extensions = ["sphinx.ext.autodoc", "sphinx.ext.napoleon"]
 
-templates_path = ['_templates']
+templates_path = ["_templates"]
 exclude_patterns = []
 
 # -- Options for HTML output -------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
 
-html_theme = 'furo'
+html_theme = "furo"
 html_theme_options = {
     "source_repository": "https://github.com/VinciGit00/Scrapegraph-ai/",
     "source_branch": "main",
     "source_directory": "docs/source/",
-    'navigation_with_keys': True,
-    'sidebar_hide_name': False,
+    "navigation_with_keys": True,
+    "sidebar_hide_name": False,
 }
-
@@ -84,4 +84,4 @@ After that, you can run the following code, using only your machine resources br
    result = smart_scraper_graph.run()
    print(result)
 
-To find out how you can customize the `graph_config` dictionary, by using different LLM and adding new parameters, check the `Scrapers` section!
+To find out how you can customize the `graph_config` dictionary, by using different LLM and adding new parameters, check the `Scrapers` section!
@@ -22,7 +22,7 @@ The library is available on PyPI, so it can be installed using the following com
    pip install scrapegraphai
 
 .. important::
-   
+
    It is higly recommended to install the library in a virtual environment (conda, venv, etc.)
 
 If your clone the repository, it is recommended to use a package manager like `uv <https://github.com/astral-sh/uv>`_.
@@ -35,7 +35,7 @@ To install the library using uv, you can run the following command:
    uv build
 
 .. caution::
-   
+
       **Rye** must be installed first by following the instructions on the `official website <https://github.com/astral-sh/uv>`_.
 
 Additionally on Windows when using WSL
@@ -46,5 +46,3 @@ If you are using Windows Subsystem for Linux (WSL) and you are facing issues wit
 .. code-block:: bash
 
    sudo apt-get -y install libnss3 libnspr4 libgbm1 libasound2
-
-
@@ -43,4 +43,4 @@ Indices and tables
 
 * :ref:`genindex`
 * :ref:`modindex`
-* :ref:`search`
+* :ref:`search`
@@ -3,46 +3,23 @@
    :width: 50%
    :alt: ScrapegraphAI
 
-Overview 
+Overview
 ========
 
 ScrapeGraphAI is an **open-source** Python library designed to revolutionize **scraping** tools.
-In today's data-intensive digital landscape, this library stands out by integrating **Large Language Models** (LLMs) 
+In today's data-intensive digital landscape, this library stands out by integrating **Large Language Models** (LLMs)
 and modular **graph-based** pipelines to automate the scraping of data from various sources (e.g., websites, local files etc.).
 
 Simply specify the information you need to extract, and ScrapeGraphAI handles the rest, providing a more **flexible** and **low-maintenance** solution compared to traditional scraping tools.
 
 For comprehensive documentation and updates, visit our `website <https://scrapegraphai.com>`_.
 
-Key Features
------------
-
-* **Just One Prompt Away**: Transform any website into clean, organized data for AI agents and Data Analytics
-* **Save Time**: No more writing complex code or dealing with manual extraction
-* **Save Money**: High-quality data extraction at a fraction of the cost of traditional scraping services
-* **AI Powered**: State-of-the-art AI technologies for fast, accurate, and dependable results
-
-Community Impact
---------------
-
-Our open-source technology is continuously enhanced by a global community of developers:
-
-* **+17K** stars on Github
-* **7,000,000+** extracted webpages
-* **250k+** unique users
-
-Services
---------
-
-* **Markdownify**: Convert webpage to markdown format (2 credits/page)
-* **Smart Scraper**: Structured AI web scraping given a URL (5 credits/page)
-* **Local Scraper**: Structured AI scraping given your local HTML (10 credits/page)
 
 Why ScrapegraphAI?
 ==================
 
 Traditional web scraping tools often rely on fixed patterns or manual configuration to extract data from web pages.
-ScrapegraphAI, leveraging the power of LLMs, adapts to changes in website structures, reducing the need for constant developer intervention. 
+ScrapegraphAI, leveraging the power of LLMs, adapts to changes in website structures, reducing the need for constant developer intervention.
 This flexibility ensures that scrapers remain functional even when website layouts change.
 
 We support many LLMs including **GPT, Gemini, Groq, Azure, Hugging Face** etc.
@@ -187,13 +164,13 @@ FAQ
       - Check your internet connection. Low speed or unstable connection can cause the HTML to not load properly.
 
       - Try using a proxy server to mask your IP address. Check out the :ref:`Proxy` section for more information on how to configure proxy settings.
-      
+
       - Use a different LLM model. Some models might perform better on certain websites than others.
 
       - Set the `verbose` parameter to `True` in the graph_config to see more detailed logs.
 
       - Visualize the pipeline graphically using :ref:`Burr`.
-   
+
    If the issue persists, please report it on the GitHub repository.
 
 6. **How does ScrapeGraphAI handle the context window limit of LLMs?**
@@ -226,3 +203,8 @@ Sponsors
    :width: 11%
    :alt: Scrapedo
    :target: https://scrape.do
+
+.. image:: ../../assets/scrapegraph_logo.png
+   :width: 11%
+   :alt: ScrapegraphAI
+   :target: https://scrapegraphai.com
@@ -7,4 +7,3 @@ scrapegraphai
    scrapegraphai
 
    scrapegraphai.helpers.models_tokens
-
@@ -25,4 +25,4 @@ Example usage:
    else:
        print(f"{model_name} not found in the models list")
 
-This information is crucial for users to understand the capabilities and limitations of different AI models when designing their scraping pipelines.
+This information is crucial for users to understand the capabilities and limitations of different AI models when designing their scraping pipelines.
@@ -133,11 +133,11 @@ We can also pass a model instance for the chat model and the embedding model. Fo
         openai_api_version="AZURE_OPENAI_API_VERSION",
     )
     # Supposing model_tokens are 100K
-    model_tokens_count = 100000 
+    model_tokens_count = 100000
     graph_config = {
         "llm": {
             "model_instance": llm_model_instance,
-            "model_tokens": model_tokens_count, 
+            "model_tokens": model_tokens_count,
         },
         "embeddings": {
             "model_instance": embedder_model_instance
@@ -198,7 +198,7 @@ We can also pass a model instance for the chat model and the embedding model. Fo
 Other LLM models
 ^^^^^^^^^^^^^^^^
 
-We can also pass a model instance for the chat model and the embedding model through the **model_instance** parameter. 
+We can also pass a model instance for the chat model and the embedding model through the **model_instance** parameter.
 This feature enables you to utilize a Langchain model instance.
 You will discover the model you require within the provided list:
 
@@ -208,7 +208,7 @@ You will discover the model you require within the provided list:
 For instance, consider **chat model** Moonshot. We can integrate it in the following manner:
 
 .. code-block:: python
-    
+
     from langchain_community.chat_models.moonshot import MoonshotChat
 
     # The configuration parameters are contingent upon the specific model you select
@@ -221,8 +221,7 @@ For instance, consider **chat model** Moonshot. We can integrate it in the follo
     llm_model_instance = MoonshotChat(**llm_instance_config)
     graph_config = {
         "llm": {
-            "model_instance": llm_model_instance, 
+            "model_instance": llm_model_instance,
             "model_tokens": 5000
         },
     }
-    
@@ -912,4 +912,4 @@
   },
   "nbformat": 4,
   "nbformat_minor": 0
-}
+}
@@ -11,4 +11,4 @@ DEFAULT_LANGUAGE=python
 GENERATE_TESTS=true
 ADD_DOCUMENTATION=true
 CODE_STYLE=pep8
-TYPE_CHECKING=true 
+TYPE_CHECKING=true
@@ -27,4 +27,4 @@ code = graph.generate("code specification")
 ## Environment Variables
 
 Required environment variables:
-- `OPENAI_API_KEY`: Your OpenAI API key 
+- `OPENAI_API_KEY`: Your OpenAI API key
-Original file line number
+Diff line change
 assignees: ''
 ---
+-
+-
Original file line number	Diff line number	Diff line change
`@@ -4,4 +4,4 @@ Permission is hereby granted, free of charge, to any person obtaining a copy of`
`4`	`4`
`5`	`5`	`The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.`
`6`	`6`
`7`		`-THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.`
	`7`	`+THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.`
Original file line number	Diff line number	Diff line change
`@@ -3,4 +3,3 @@`
`3`	`3`	`## Reporting a Vulnerability`
`4`	`4`
`5`	`5`	`For reporting a vulnerability contact directly [email protected]`
`6`		`-`
Original file line number	Diff line number	Diff line change
`@@ -7,4 +7,3 @@ scrapegraphai`
`7`	`7`	`scrapegraphai`
`8`	`8`
`9`	`9`	`scrapegraphai.helpers.models_tokens`
`10`		`-`