You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I confirm that I am using English to submit this report (我已阅读并同意 Language Policy).
[FOR CHINESE USERS] 请务必使用英文提交 Issue,否则会被关闭。谢谢!:)
Please do not modify this template :) and fill in all the required fields.
Dify version
0.11.0
Cloud or Self Hosted
Self Hosted (Docker)
Steps to reproduce
1、create a knowledge base
2、add a new doc that include images
logs here:
api module:
Traceback (most recent call last):
File "/app/api/controllers/console/datasets/datasets.py", line 448, in post
response = indexing_runner.indexing_estimate(
File "/app/api/core/indexing_runner.py", line 262, in indexing_estimate
text_docs = index_processor.extract(extract_setting, process_rule_mode=tmp_processing_rule["mode"])
File "/app/api/core/rag/index_processor/processor/paragraph_index_processor.py", line 20, in extract
text_docs = ExtractProcessor.extract(
File "/app/api/core/rag/extractor/extract_processor.py", line 157, in extract
return extractor.extract()
File "/app/api/core/rag/extractor/word_extractor.py", line 65, in extract
content = self.parse_docx(self.file_path, "storage")
File "/app/api/core/rag/extractor/word_extractor.py", line 211, in parse_docx
image_map = self._extract_images_from_docx(doc, image_folder)
File "/app/api/core/rag/extractor/word_extractor.py", line 89, in _extract_images_from_docx
response = ssrf_proxy.get(url, stream=True)
File "/app/api/core/helper/ssrf_proxy.py", line 70, in get
return make_request("GET", url, max_retries=max_retries, **kwargs)
File "/app/api/core/helper/ssrf_proxy.py", line 49, in make_request
response = client.request(method=method, url=url, **kwargs)
TypeError: Client.request() got an unexpected keyword argument 'stream'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/app/api/.venv/lib/python3.10/site-packages/flask/app.py", line 880, in full_dispatch_request
rv = self.dispatch_request()
File "/app/api/.venv/lib/python3.10/site-packages/flask/app.py", line 865, in dispatch_request
return self.ensure_sync(self.view_functions[rule.endpoint])(**view_args) # type: ignore[no-any-return]
File "/app/api/.venv/lib/python3.10/site-packages/flask_restful/__init__.py", line 489, in wrapper
resp = resource(*args, **kwargs)
File "/app/api/.venv/lib/python3.10/site-packages/flask/views.py", line 110, in view
return current_app.ensure_sync(self.dispatch_request)(**kwargs) # type: ignore[no-any-return]
File "/app/api/.venv/lib/python3.10/site-packages/flask_restful/__init__.py", line 604, in dispatch_request
resp = meth(*args, **kwargs)
File "/app/api/controllers/console/wraps.py", line 142, in decorated
return view(*args, **kwargs)
File "/app/api/libs/login.py", line 92, in decorated_view
return current_app.ensure_sync(func)(*args, **kwargs)
File "/app/api/controllers/console/wraps.py", line 26, in decorated
return view(*args, **kwargs)
File "/app/api/controllers/console/datasets/datasets.py", line 464, in post
raise IndexingEstimateError(str(e))
controllers.console.datasets.error.IndexingEstimateError: 500 Internal Server Error: Client.request() got an unexpected keyword argument 'stream'
work module:
2024-11-18 09:24:29.077 ERROR [Dummy-3] [indexing_runner.py:92] - consume document failed
Traceback (most recent call last):
File "/app/api/core/indexing_runner.py", line 66, in run
text_docs = self._extract(index_processor, dataset_document, processing_rule.to_dict())
File "/app/api/core/indexing_runner.py", line 313, in _extract
text_docs = index_processor.extract(extract_setting, process_rule_mode=process_rule["mode"])
File "/app/api/core/rag/index_processor/processor/paragraph_index_processor.py", line 20, in extract
text_docs = ExtractProcessor.extract(
File "/app/api/core/rag/extractor/extract_processor.py", line 157, in extract
return extractor.extract()
File "/app/api/core/rag/extractor/word_extractor.py", line 65, in extract
content = self.parse_docx(self.file_path, "storage")
File "/app/api/core/rag/extractor/word_extractor.py", line 211, in parse_docx
image_map = self._extract_images_from_docx(doc, image_folder)
File "/app/api/core/rag/extractor/word_extractor.py", line 89, in _extract_images_from_docx
response = ssrf_proxy.get(url, stream=True)
File "/app/api/core/helper/ssrf_proxy.py", line 70, in get
return make_request("GET", url, max_retries=max_retries, **kwargs)
File "/app/api/core/helper/ssrf_proxy.py", line 49, in make_request
response = client.request(method=method, url=url, **kwargs)
TypeError: Client.request() got an unexpected keyword argument 'stream'
✔️ Expected Behavior
No response
❌ Actual Behavior
No response
The text was updated successfully, but these errors were encountered:
Hey @comddy, I'm here to help you with any bugs, questions, or contribution-related matters. Let's squash those bugs together! 🤖
To resolve the error "Client.request() got an unexpected keyword argument 'stream'", you need to remove the stream argument from the kwargs before passing it to the client.request method in the make_request function. Here's how you can modify the make_request function:
defmake_request(method, url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs):
if"stream"inkwargs:
kwargs.pop("stream")
if"allow_redirects"inkwargs:
allow_redirects=kwargs.pop("allow_redirects")
if"follow_redirects"notinkwargs:
kwargs["follow_redirects"] =allow_redirectsif"timeout"notinkwargs:
kwargs["timeout"] =httpx.Timeout(
timeout=dify_config.SSRF_DEFAULT_TIME_OUT,
connect=dify_config.SSRF_DEFAULT_CONNECT_TIME_OUT,
read=dify_config.SSRF_DEFAULT_READ_TIME_OUT,
write=dify_config.SSRF_DEFAULT_WRITE_TIME_OUT,
)
retries=0whileretries<=max_retries:
try:
ifdify_config.SSRF_PROXY_ALL_URL:
withhttpx.Client(proxy=dify_config.SSRF_PROXY_ALL_URL) asclient:
response=client.request(method=method, url=url, **kwargs)
elifproxy_mounts:
withhttpx.Client(mounts=proxy_mounts) asclient:
response=client.request(method=method, url=url, **kwargs)
else:
withhttpx.Client() asclient:
response=client.request(method=method, url=url, **kwargs)
ifresponse.status_codenotinSTATUS_FORCELIST:
returnresponseelse:
logging.warning(f"Received status code {response.status_code} for URL {url} which is in the force list")
excepthttpx.RequestErrorase:
logging.warning(f"Request to URL {url} failed on attempt {retries+1}: {e}")
retries+=1ifretries<=max_retries:
time.sleep(BACKOFF_FACTOR* (2** (retries-1)))
raiseException(f"Reached maximum retries ({max_retries}) for URL {url}")
This change will prevent the stream argument from being passed to the client.request method, thus resolving the error [1].
Self Checks
Dify version
0.11.0
Cloud or Self Hosted
Self Hosted (Docker)
Steps to reproduce
1、create a knowledge base

2、add a new doc that include images
logs here:
api module:
work module:
✔️ Expected Behavior
No response
❌ Actual Behavior
No response
The text was updated successfully, but these errors were encountered: