Skip to content

Commit ce25b6a

Browse files
committed
fix: search
1 parent c5fbb8b commit ce25b6a

File tree

4 files changed

+42
-34
lines changed

4 files changed

+42
-34
lines changed

pyproject.toml

+2-2
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,10 @@ dependencies = [
2727
"playwright>=1.43.0",
2828
"undetected-playwright>=0.3.0",
2929
"semchunk>=2.2.0",
30-
"googlesearch-python>=1.2.5",
3130
"async-timeout>=4.0.3",
3231
"simpleeval>=1.0.0",
33-
"jsonschema>=4.23.0"
32+
"jsonschema>=4.23.0",
33+
"duckduckgo-search>=7.2.1"
3434
]
3535

3636
readme = "README.md"

scrapegraphai/nodes/search_internet_node.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ def __init__(
4848
self.search_engine = (
4949
node_config["search_engine"]
5050
if node_config.get("search_engine")
51-
else "google"
51+
else "duckduckgo"
5252
)
5353

5454
self.serper_api_key = (

scrapegraphai/utils/research_web.py

+7-16
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,12 @@
77

88
import requests
99
from bs4 import BeautifulSoup
10-
from googlesearch import search as google_search
1110
from langchain_community.tools import DuckDuckGoSearchResults
1211

1312

1413
def search_on_web(
1514
query: str,
16-
search_engine: str = "Google",
15+
search_engine: str = "duckduckgo",
1716
max_results: int = 10,
1817
port: int = 8080,
1918
timeout: int = 10,
@@ -41,7 +40,7 @@ def search_on_web(
4140
raise ValueError("Query must be a non-empty string")
4241

4342
search_engine = search_engine.lower()
44-
valid_engines = {"google", "duckduckgo", "bing", "searxng", "serper"}
43+
valid_engines = {"duckduckgo", "bing", "searxng", "serper"}
4544
if search_engine not in valid_engines:
4645
raise ValueError(f"Search engine must be one of: {', '.join(valid_engines)}")
4746

@@ -52,20 +51,12 @@ def search_on_web(
5251

5352
try:
5453
results = []
55-
if search_engine == "google":
56-
kwargs = {
57-
"num_results": max_results,
58-
"proxy": formatted_proxy,
59-
"lang": language,
60-
}
61-
if region:
62-
kwargs["region"] = region
63-
64-
results = list(google_search(query, **kwargs))
65-
66-
elif search_engine == "duckduckgo":
54+
if search_engine == "duckduckgo":
55+
# Create a DuckDuckGo search object with max_results
6756
research = DuckDuckGoSearchResults(max_results=max_results)
57+
# Run the search
6858
res = research.run(query)
59+
# Extract URLs using regex
6960
results = re.findall(r"https?://[^\s,\]]+", res)
7061

7162
elif search_engine == "bing":
@@ -74,7 +65,7 @@ def search_on_web(
7465
elif search_engine == "searxng":
7566
results = _search_searxng(query, max_results, port, timeout)
7667

77-
elif search_engine.lower() == "serper":
68+
elif search_engine == "serper":
7869
results = _search_serper(query, max_results, serper_api_key, timeout)
7970

8071
return filter_pdf_links(results)

uv.lock

+32-15
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)