Skip to content

Commit af2461c

Browse files
authored
Add query_prefix + Return TED Transcript URL for Downstream Scraping Tasks (#11090)
1 parent 60c1549 commit af2461c

File tree

8 files changed

+83
-6
lines changed

8 files changed

+83
-6
lines changed

api/core/tools/provider/builtin/duckduckgo/tools/ddgo_img.py

+6
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,12 @@ def _invoke(self, user_id: str, tool_parameters: dict[str, Any]) -> list[ToolInv
1818
"size": tool_parameters.get("size"),
1919
"max_results": tool_parameters.get("max_results"),
2020
}
21+
22+
# Add query_prefix handling
23+
query_prefix = tool_parameters.get("query_prefix", "").strip()
24+
final_query = f"{query_prefix} {query_dict['keywords']}".strip()
25+
query_dict["keywords"] = final_query
26+
2127
response = DDGS().images(**query_dict)
2228
markdown_result = "\n\n"
2329
json_result = []

api/core/tools/provider/builtin/duckduckgo/tools/ddgo_img.yaml

+11
Original file line numberDiff line numberDiff line change
@@ -86,3 +86,14 @@ parameters:
8686
en_US: The size of the image to be searched.
8787
zh_Hans: 要搜索的图片的大小
8888
form: form
89+
- name: query_prefix
90+
label:
91+
en_US: Query Prefix
92+
zh_Hans: 查询前缀
93+
type: string
94+
required: false
95+
default: ""
96+
form: form
97+
human_description:
98+
en_US: Specific Search e.g. "site:unsplash.com"
99+
zh_Hans: 定向搜索 e.g. "site:unsplash.com"

api/core/tools/provider/builtin/duckduckgo/tools/ddgo_news.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from core.tools.tool.builtin_tool import BuiltinTool
88

99
SUMMARY_PROMPT = """
10-
User's query:
10+
User's query:
1111
{query}
1212
1313
Here are the news results:
@@ -30,6 +30,12 @@ def _invoke(self, user_id: str, tool_parameters: dict[str, Any]) -> ToolInvokeMe
3030
"safesearch": "moderate",
3131
"region": "wt-wt",
3232
}
33+
34+
# Add query_prefix handling
35+
query_prefix = tool_parameters.get("query_prefix", "").strip()
36+
final_query = f"{query_prefix} {query_dict['keywords']}".strip()
37+
query_dict["keywords"] = final_query
38+
3339
try:
3440
response = list(DDGS().news(**query_dict))
3541
if not response:

api/core/tools/provider/builtin/duckduckgo/tools/ddgo_news.yaml

+11
Original file line numberDiff line numberDiff line change
@@ -69,3 +69,14 @@ parameters:
6969
en_US: Whether to pass the news results to llm for summarization.
7070
zh_Hans: 是否需要将新闻结果传给大模型总结
7171
form: form
72+
- name: query_prefix
73+
label:
74+
en_US: Query Prefix
75+
zh_Hans: 查询前缀
76+
type: string
77+
required: false
78+
default: ""
79+
form: form
80+
human_description:
81+
en_US: Specific Search e.g. "site:msn.com"
82+
zh_Hans: 定向搜索 e.g. "site:msn.com"

api/core/tools/provider/builtin/duckduckgo/tools/ddgo_search.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from core.tools.tool.builtin_tool import BuiltinTool
88

99
SUMMARY_PROMPT = """
10-
User's query:
10+
User's query:
1111
{query}
1212
1313
Here is the search engine result:
@@ -26,7 +26,12 @@ def _invoke(self, user_id: str, tool_parameters: dict[str, Any]) -> ToolInvokeMe
2626
query = tool_parameters.get("query")
2727
max_results = tool_parameters.get("max_results", 5)
2828
require_summary = tool_parameters.get("require_summary", False)
29-
response = DDGS().text(query, max_results=max_results)
29+
30+
# Add query_prefix handling
31+
query_prefix = tool_parameters.get("query_prefix", "").strip()
32+
final_query = f"{query_prefix} {query}".strip()
33+
34+
response = DDGS().text(final_query, max_results=max_results)
3035
if require_summary:
3136
results = "\n".join([res.get("body") for res in response])
3237
results = self.summary_results(user_id=user_id, content=results, query=query)

api/core/tools/provider/builtin/duckduckgo/tools/ddgo_search.yaml

+11
Original file line numberDiff line numberDiff line change
@@ -39,3 +39,14 @@ parameters:
3939
en_US: Whether to pass the search results to llm for summarization.
4040
zh_Hans: 是否需要将搜索结果传给大模型总结
4141
form: form
42+
- name: query_prefix
43+
label:
44+
en_US: Query Prefix
45+
zh_Hans: 查询前缀
46+
type: string
47+
required: false
48+
default: ""
49+
form: form
50+
human_description:
51+
en_US: Specific Search e.g. "site:wikipedia.org"
52+
zh_Hans: 定向搜索 e.g. "site:wikipedia.org"

api/core/tools/provider/builtin/duckduckgo/tools/ddgo_video.py

+19-3
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ class DuckDuckGoVideoSearchTool(BuiltinTool):
2424

2525
def _invoke(self, user_id: str, tool_parameters: dict[str, Any]) -> list[ToolInvokeMessage]:
2626
query_dict = {
27-
"keywords": tool_parameters.get("query"),
27+
"keywords": tool_parameters.get("query"), # LLM's query
2828
"region": tool_parameters.get("region", "wt-wt"),
2929
"safesearch": tool_parameters.get("safesearch", "moderate"),
3030
"timelimit": tool_parameters.get("timelimit"),
@@ -40,6 +40,12 @@ def _invoke(self, user_id: str, tool_parameters: dict[str, Any]) -> list[ToolInv
4040
# Get proxy URL from parameters
4141
proxy_url = tool_parameters.get("proxy_url", "").strip()
4242

43+
query_prefix = tool_parameters.get("query_prefix", "").strip()
44+
final_query = f"{query_prefix} {query_dict['keywords']}".strip()
45+
46+
# Update the keywords in query_dict with the final_query
47+
query_dict["keywords"] = final_query
48+
4349
response = DDGS().videos(**query_dict)
4450

4551
# Create HTML result with embedded iframes
@@ -51,9 +57,13 @@ def _invoke(self, user_id: str, tool_parameters: dict[str, Any]) -> list[ToolInv
5157
embed_html = res.get("embed_html", "")
5258
description = res.get("description", "")
5359
content_url = res.get("content", "")
60+
transcript_url = None
5461

5562
# Handle TED.com videos
56-
if not embed_html and "ted.com/talks" in content_url:
63+
if "ted.com/talks" in content_url:
64+
# Create transcript URL
65+
transcript_url = f"{content_url}/transcript"
66+
# Create embed URL
5767
embed_url = content_url.replace("www.ted.com", "embed.ted.com")
5868
if proxy_url:
5969
embed_url = f"{proxy_url}{embed_url}"
@@ -68,8 +78,14 @@ def _invoke(self, user_id: str, tool_parameters: dict[str, Any]) -> list[ToolInv
6878

6979
markdown_result += f"{title}\n\n"
7080
markdown_result += f"{embed_html}\n\n"
81+
if description:
82+
markdown_result += f"{description}\n\n"
7183
markdown_result += "---\n\n"
7284

73-
json_result.append(self.create_json_message(res))
85+
# Add transcript_url to the JSON result if available
86+
result_dict = res.copy()
87+
if transcript_url:
88+
result_dict["transcript_url"] = transcript_url
89+
json_result.append(self.create_json_message(result_dict))
7490

7591
return [self.create_text_message(markdown_result)] + json_result

api/core/tools/provider/builtin/duckduckgo/tools/ddgo_video.yaml

+11
Original file line numberDiff line numberDiff line change
@@ -95,3 +95,14 @@ parameters:
9595
en_US: Proxy URL
9696
zh_Hans: 视频代理地址
9797
form: form
98+
- name: query_prefix
99+
label:
100+
en_US: Query Prefix
101+
zh_Hans: 查询前缀
102+
type: string
103+
required: false
104+
default: ""
105+
form: form
106+
human_description:
107+
en_US: Specific Search e.g. "site:www.ted.com"
108+
zh_Hans: 定向搜索 e.g. "site:www.ted.com"

0 commit comments

Comments
 (0)