7
7
8
8
import requests
9
9
from bs4 import BeautifulSoup
10
- from googlesearch import search as google_search
11
10
from langchain_community .tools import DuckDuckGoSearchResults
12
11
13
12
14
13
def search_on_web (
15
14
query : str ,
16
- search_engine : str = "Google " ,
15
+ search_engine : str = "duckduckgo " ,
17
16
max_results : int = 10 ,
18
17
port : int = 8080 ,
19
18
timeout : int = 10 ,
@@ -41,7 +40,7 @@ def search_on_web(
41
40
raise ValueError ("Query must be a non-empty string" )
42
41
43
42
search_engine = search_engine .lower ()
44
- valid_engines = {"google" , " duckduckgo" , "bing" , "searxng" , "serper" }
43
+ valid_engines = {"duckduckgo" , "bing" , "searxng" , "serper" }
45
44
if search_engine not in valid_engines :
46
45
raise ValueError (f"Search engine must be one of: { ', ' .join (valid_engines )} " )
47
46
@@ -52,20 +51,12 @@ def search_on_web(
52
51
53
52
try :
54
53
results = []
55
- if search_engine == "google" :
56
- kwargs = {
57
- "num_results" : max_results ,
58
- "proxy" : formatted_proxy ,
59
- "lang" : language ,
60
- }
61
- if region :
62
- kwargs ["region" ] = region
63
-
64
- results = list (google_search (query , ** kwargs ))
65
-
66
- elif search_engine == "duckduckgo" :
54
+ if search_engine == "duckduckgo" :
55
+ # Create a DuckDuckGo search object with max_results
67
56
research = DuckDuckGoSearchResults (max_results = max_results )
57
+ # Run the search
68
58
res = research .run (query )
59
+ # Extract URLs using regex
69
60
results = re .findall (r"https?://[^\s,\]]+" , res )
70
61
71
62
elif search_engine == "bing" :
@@ -74,7 +65,7 @@ def search_on_web(
74
65
elif search_engine == "searxng" :
75
66
results = _search_searxng (query , max_results , port , timeout )
76
67
77
- elif search_engine . lower () == "serper" :
68
+ elif search_engine == "serper" :
78
69
results = _search_serper (query , max_results , serper_api_key , timeout )
79
70
80
71
return filter_pdf_links (results )
0 commit comments