Skip to content

feat: support LLM process document file #10966

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Nov 22, 2024
12 changes: 5 additions & 7 deletions api/core/memory/token_buffer_memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

from core.app.app_config.features.file_upload.manager import FileUploadConfigManager
from core.file import file_manager
from core.file.models import FileType
from core.model_manager import ModelInstance
from core.model_runtime.entities import (
AssistantPromptMessage,
Expand Down Expand Up @@ -103,12 +102,11 @@ def get_history_prompt_messages(
prompt_message_contents: list[PromptMessageContent] = []
prompt_message_contents.append(TextPromptMessageContent(data=message.query))
for file in file_objs:
if file.type in {FileType.IMAGE, FileType.AUDIO}:
prompt_message = file_manager.to_prompt_message_content(
file,
image_detail_config=detail,
)
prompt_message_contents.append(prompt_message)
prompt_message = file_manager.to_prompt_message_content(
file,
image_detail_config=detail,
)
prompt_message_contents.append(prompt_message)

prompt_messages.append(UserPromptMessage(content=prompt_message_contents))

Expand Down
2 changes: 1 addition & 1 deletion api/core/model_runtime/entities/message_entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ class PromptMessageFunction(BaseModel):
function: PromptMessageTool


class PromptMessageContentType(Enum):
class PromptMessageContentType(str, Enum):
"""
Enum class for prompt message content type.
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ features:
- vision
- tool-call
- stream-tool-call
- document
model_properties:
mode: chat
context_size: 1048576
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ features:
- vision
- tool-call
- stream-tool-call
- document
model_properties:
mode: chat
context_size: 1048576
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ features:
- vision
- tool-call
- stream-tool-call
- document
model_properties:
mode: chat
context_size: 1048576
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ features:
- vision
- tool-call
- stream-tool-call
- document
model_properties:
mode: chat
context_size: 1048576
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ features:
- vision
- tool-call
- stream-tool-call
- document
model_properties:
mode: chat
context_size: 1048576
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ features:
- vision
- tool-call
- stream-tool-call
- document
model_properties:
mode: chat
context_size: 1048576
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ features:
- vision
- tool-call
- stream-tool-call
- document
model_properties:
mode: chat
context_size: 1048576
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ features:
- vision
- tool-call
- stream-tool-call
- document
model_properties:
mode: chat
context_size: 2097152
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ features:
- vision
- tool-call
- stream-tool-call
- document
model_properties:
mode: chat
context_size: 2097152
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ features:
- vision
- tool-call
- stream-tool-call
- document
model_properties:
mode: chat
context_size: 2097152
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ features:
- vision
- tool-call
- stream-tool-call
- document
model_properties:
mode: chat
context_size: 2097152
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ features:
- vision
- tool-call
- stream-tool-call
- document
model_properties:
mode: chat
context_size: 2097152
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ features:
- vision
- tool-call
- stream-tool-call
- document
model_properties:
mode: chat
context_size: 2097152
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ features:
- vision
- tool-call
- stream-tool-call
- document
model_properties:
mode: chat
context_size: 32767
Expand Down
22 changes: 22 additions & 0 deletions api/core/model_runtime/model_providers/google/llm/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta
from core.model_runtime.entities.message_entities import (
AssistantPromptMessage,
DocumentPromptMessageContent,
ImagePromptMessageContent,
PromptMessage,
PromptMessageContentType,
Expand All @@ -35,6 +36,21 @@
from core.model_runtime.errors.validate import CredentialsValidateFailedError
from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel

GOOGLE_AVAILABLE_MIMETYPE = [
"application/pdf",
"application/x-javascript",
"text/javascript",
"application/x-python",
"text/x-python",
"text/plain",
"text/html",
"text/css",
"text/md",
"text/csv",
"text/xml",
"text/rtf",
]


class GoogleLargeLanguageModel(LargeLanguageModel):
def _invoke(
Expand Down Expand Up @@ -370,6 +386,12 @@ def _format_message_to_glm_content(self, message: PromptMessage) -> ContentType:
raise ValueError(f"Failed to fetch image data from url {message_content.data}, {ex}")
blob = {"inline_data": {"mime_type": mime_type, "data": base64_data}}
glm_content["parts"].append(blob)
elif c.type == PromptMessageContentType.DOCUMENT:
message_content = cast(DocumentPromptMessageContent, c)
if message_content.mime_type not in GOOGLE_AVAILABLE_MIMETYPE:
raise ValueError(f"Unsupported mime type {message_content.mime_type}")
blob = {"inline_data": {"mime_type": message_content.mime_type, "data": message_content.data}}
glm_content["parts"].append(blob)

return glm_content
elif isinstance(message, AssistantPromptMessage):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ model_type: llm
features:
- vision
- agent-thought
- video
model_properties:
mode: chat
context_size: 32000
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ model_type: llm
features:
- vision
- agent-thought
- video
model_properties:
mode: chat
context_size: 32000
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ model_type: llm
features:
- vision
- agent-thought
- video
model_properties:
mode: chat
context_size: 32768
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ model_type: llm
features:
- vision
- agent-thought
- video
model_properties:
mode: chat
context_size: 8000
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ model_properties:
mode: chat
features:
- vision
- video
parameter_rules:
- name: temperature
use_template: temperature
Expand Down
10 changes: 8 additions & 2 deletions api/core/workflow/nodes/llm/exc.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,15 @@ class NoPromptFoundError(LLMNodeError):
"""Raised when no prompt is found in the LLM configuration."""


class NotSupportedPromptTypeError(LLMNodeError):
"""Raised when the prompt type is not supported."""
class TemplateTypeNotSupportError(LLMNodeError):
def __init__(self, *, type_name: str):
super().__init__(f"Prompt type {type_name} is not supported.")


class MemoryRolePrefixRequiredError(LLMNodeError):
"""Raised when memory role prefix is required for completion model."""


class FileTypeNotSupportError(LLMNodeError):
def __init__(self, *, type_name: str):
super().__init__(f"{type_name} type is not supported by this model")
9 changes: 4 additions & 5 deletions api/core/workflow/nodes/llm/node.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,14 +65,15 @@
ModelConfig,
)
from .exc import (
FileTypeNotSupportError,
InvalidContextStructureError,
InvalidVariableTypeError,
LLMModeRequiredError,
LLMNodeError,
MemoryRolePrefixRequiredError,
ModelNotExistError,
NoPromptFoundError,
NotSupportedPromptTypeError,
TemplateTypeNotSupportError,
VariableNotFoundError,
)

Expand Down Expand Up @@ -621,9 +622,7 @@ def _fetch_prompt_messages(
prompt_content = prompt_messages[0].content.replace("#sys.query#", user_query)
prompt_messages[0].content = prompt_content
else:
errmsg = f"Prompt type {type(prompt_template)} is not supported"
logger.warning(errmsg)
raise NotSupportedPromptTypeError(errmsg)
raise TemplateTypeNotSupportError(type_name=str(type(prompt_template)))

if vision_enabled and user_files:
file_prompts = []
Expand Down Expand Up @@ -671,7 +670,7 @@ def _fetch_prompt_messages(
and ModelFeature.AUDIO not in model_config.model_schema.features
)
):
continue
raise FileTypeNotSupportError(type_name=content_item.type)
prompt_message_content.append(content_item)
if len(prompt_message_content) == 1 and prompt_message_content[0].type == PromptMessageContentType.TEXT:
prompt_message.content = prompt_message_content[0].data
Expand Down
53 changes: 0 additions & 53 deletions api/tests/unit_tests/core/workflow/nodes/llm/test_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,59 +400,6 @@ def test_fetch_prompt_messages__basic(faker, llm_node, model_config):
)
},
),
LLMNodeTestScenario(
description="Prompt template with variable selector of File without vision feature",
user_query=fake_query,
user_files=[],
vision_enabled=True,
vision_detail=fake_vision_detail,
features=[],
window_size=fake_window_size,
prompt_template=[
LLMNodeChatModelMessage(
text="{{#input.image#}}",
role=PromptMessageRole.USER,
edition_type="basic",
),
],
expected_messages=mock_history[fake_window_size * -2 :] + [UserPromptMessage(content=fake_query)],
file_variables={
"input.image": File(
tenant_id="test",
type=FileType.IMAGE,
filename="test1.jpg",
transfer_method=FileTransferMethod.REMOTE_URL,
remote_url=fake_remote_url,
)
},
),
LLMNodeTestScenario(
description="Prompt template with variable selector of File with video file and vision feature",
user_query=fake_query,
user_files=[],
vision_enabled=True,
vision_detail=fake_vision_detail,
features=[ModelFeature.VISION],
window_size=fake_window_size,
prompt_template=[
LLMNodeChatModelMessage(
text="{{#input.image#}}",
role=PromptMessageRole.USER,
edition_type="basic",
),
],
expected_messages=mock_history[fake_window_size * -2 :] + [UserPromptMessage(content=fake_query)],
file_variables={
"input.image": File(
tenant_id="test",
type=FileType.VIDEO,
filename="test1.mp4",
transfer_method=FileTransferMethod.REMOTE_URL,
remote_url=fake_remote_url,
extension="mp4",
)
},
),
]

for scenario in test_scenarios:
Expand Down
Loading
Loading