Merge pull request #48 from smcazares/494-prompt-build

smcazares · web-flow · commit c82f115f9776 · 2024-10-04T11:57:54.000-04:00
494 prompt build
diff --git a/components/frontend_streamlit/src/pages/4_Query.py b/components/frontend_streamlit/src/pages/4_Query.py
@@ -96,16 +96,20 @@ def chat_content():
               chunk_url = chunk_url.replace("gs://",
                   "https://storage.googleapis.com/", 1)
 
+            document_url = reference["document_url"]
             if modality == "text":
-              document_url = reference["document_url"]
               document_text = reference["document_text"]
               st.text_area(
-                f"Reference: {document_url}",
+                f"\nReference {query_index}: {document_url}",
                 document_text,
                 key=f"ref_{query_index}")
             elif modality == "image" and chunk_type in [".pdf",
                  ".png", ".jpg", ".jpeg", ".gif", ".bmp"]:
               # .tif/.tiff not available, all other file types are untested
+              page = reference["page"]
+              st.write(
+                f"\nReference {query_index}: {document_url}, Page {page+1}",
+                key=f"ref_{query_index}")
               st.image(chunk_url)
             else:
               logging.error("Reference modality unknown")
diff --git a/components/llm_service/src/routes/chat.py b/components/llm_service/src/routes/chat.py
@@ -293,7 +293,7 @@ async def create_user_chat(
     # generate text from prompt
     response = await llm_chat(prompt,
                               llm_type,
-                              chat_file_type=chat_file_type,
+                              chat_file_types=[chat_file_type],
                               chat_file_bytes=chat_file_bytes,
                               chat_file_urls=chat_file_urls)
 
diff --git a/components/llm_service/src/routes/llm.py b/components/llm_service/src/routes/llm.py
@@ -279,7 +279,7 @@ async def generate_multimodal(gen_config: LLMMultimodalGenerateModel):
 
   try:
     user_file_bytes = b64decode(user_file_b64)
-    result = await llm_generate_multimodal(prompt, file_mime_type, llm_type,
+    result = await llm_generate_multimodal(prompt, [file_mime_type], llm_type,
                                       user_file_bytes)
 
     return {
diff --git a/components/llm_service/src/services/llm_generate.py b/components/llm_service/src/services/llm_generate.py
@@ -107,7 +107,7 @@ async def llm_generate(prompt: str, llm_type: str) -> str:
   except Exception as e:
     raise InternalServerError(str(e)) from e
 
-async def llm_generate_multimodal(prompt: str, llm_type: str, user_file_type: str,
+async def llm_generate_multimodal(prompt: str, llm_type: str, user_file_types: List[str],
                              user_file_bytes: bytes = None,
                              user_file_urls: List[str] = None) -> str:
   """
@@ -116,6 +116,7 @@ async def llm_generate_multimodal(prompt: str, llm_type: str, user_file_type: st
     prompt: the text prompt to pass to the LLM
     user_file_bytes: bytes of the file provided by the user
     user_file_urls: list of URLs to include in context
+    user_file_types: list of mime times for files to include in context
     llm_type: the type of LLM to use (default to gemini)
   Returns:
     the text response: str
@@ -145,7 +146,7 @@ async def llm_generate_multimodal(prompt: str, llm_type: str, user_file_type: st
             f"Vertex model {llm_type} needs to be multimodal")
       response = await google_llm_predict(prompt, is_chat, is_multimodal,
                             google_llm, None, user_file_bytes,
-                            user_file_urls, user_file_type)
+                            user_file_urls, user_file_types)
     else:
       raise ResourceNotFoundException(f"Cannot find llm type '{llm_type}'")
 
@@ -159,9 +160,9 @@ async def llm_generate_multimodal(prompt: str, llm_type: str, user_file_type: st
 async def llm_chat(prompt: str, llm_type: str,
                    user_chat: Optional[UserChat] = None,
                    user_query: Optional[UserQuery] = None,
-                   chat_file_type: str = None,
-                   chat_file_urls: List[str] = None,
-                   chat_file_bytes: bytes = None) -> str:
+                   chat_file_types: Optional[List[str]] = None,
+                   chat_file_urls: Optional[List[str]] = None,
+                   chat_file_bytes: Optional[bytes] = None) -> str:
   """
   Send a prompt to a chat model and return string response.
   Supports including a file in the chat context, either by URL or
@@ -174,7 +175,7 @@ async def llm_chat(prompt: str, llm_type: str,
     user_query (optional): a user query to use for context
     chat_file_bytes (bytes): bytes of file to include in chat context
     chat_file_urls (List[str]): urls of files to include in chat context
-    chat_file_type (str): mime type of file to include in chat context
+    chat_file_types (List[str]): mime types of files to include in chat context
   Returns:
     the text response: str
   """
@@ -185,7 +186,7 @@ async def llm_chat(prompt: str, llm_type: str,
               f" user_query=[{user_query}]"
               f" chat_file_bytes=[{chat_file_bytes_log}]"
               f" chat_file_urls=[{chat_file_urls}]"
-              f" chat_file_type=[{chat_file_type}]")
+              f" chat_file_type=[{chat_file_types}]")
 
   if llm_type not in get_model_config().get_chat_llm_types():
     raise ResourceNotFoundException(f"Cannot find chat llm type '{llm_type}'")
@@ -198,7 +199,7 @@ async def llm_chat(prompt: str, llm_type: str,
           "Must set only one of chat_file_bytes/chat_file_urls")
     if llm_type not in get_provider_models(PROVIDER_VERTEX):
       raise InternalServerError("Chat files only supported for Vertex")
-    if chat_file_type is None:
+    if chat_file_types is None:
       raise InternalServerError("Mime type must be passed for chat file")
     is_multimodal = True
 
@@ -209,6 +210,8 @@ async def llm_chat(prompt: str, llm_type: str,
     if user_chat is not None or user_query is not None:
       context_prompt = get_context_prompt(
           user_chat=user_chat, user_query=user_query)
+      # context_prompt includes only text (no images/video) from
+      # user_chat.history and user_query.history
       prompt = context_prompt + "\n" + prompt
 
     # check whether the context length exceeds the limit for the model
@@ -241,7 +244,7 @@ async def llm_chat(prompt: str, llm_type: str,
       response = await google_llm_predict(prompt, is_chat, is_multimodal,
                                           google_llm, user_chat,
                                           chat_file_bytes,
-                                          chat_file_urls, chat_file_type)
+                                          chat_file_urls, chat_file_types)
     elif llm_type in get_provider_models(PROVIDER_LANGCHAIN):
       response = await langchain_llm_generate(prompt, llm_type, user_chat)
     return response
@@ -271,6 +274,7 @@ def get_context_prompt(user_chat=None,
         prompt_list.append(f"Human input: {content}")
       elif UserChat.is_ai(entry):
         prompt_list.append(f"AI response: {content}")
+      # prompt_list includes only text from user_chat.history
 
   if user_query is not None:
     history = user_query.history
@@ -280,6 +284,7 @@ def get_context_prompt(user_chat=None,
         prompt_list.append(f"Human input: {content}")
       elif UserQuery.is_ai(entry):
         prompt_list.append(f"AI response: {content}")
+      # prompt_list includes only text from user_query.history
 
   context_prompt = "\n\n".join(prompt_list)
 
@@ -294,6 +299,8 @@ def check_context_length(prompt, llm_type):
   """
   # check if prompt exceeds context window length for model
   # assume a constant relationship between tokens and chars
+  # TODO: Recalculate max_context_length for text prompt,
+  # subtracting out tokens used by non-text context (image, video, etc)
   token_length = len(prompt) / CHARS_PER_TOKEN
   max_context_length = get_model_config_value(llm_type,
                                               KEY_MODEL_CONTEXT_LENGTH,
@@ -489,9 +496,9 @@ async def model_garden_predict(prompt: str,
 
 async def google_llm_predict(prompt: str, is_chat: bool, is_multimodal: bool,
                 google_llm: str, user_chat=None,
-                user_file_bytes: bytes=None,
-                user_file_urls: List[str]=None,
-                user_file_type: str=None) -> str:
+                user_file_bytes: Optional[bytes]=None,
+                user_file_urls: Optional[List[str]]=None,
+                user_file_types: Optional[List[str]]=None) -> str:
   """
   Generate text with a Google multimodal LLM given a prompt.
   Args:
@@ -502,7 +509,7 @@ async def google_llm_predict(prompt: str, is_chat: bool, is_multimodal: bool,
     user_chat: chat history
     user_file_bytes: the bytes of the file provided by the user
     user_file_urls: list of urls of files provided by the user
-    user_file_type: mime type of the file provided by the user
+    user_file_types: list of mime types of the files provided by the user
   Returns:
     the text response.
   """
@@ -513,7 +520,7 @@ async def google_llm_predict(prompt: str, is_chat: bool, is_multimodal: bool,
               f" is_multimodal=[{is_multimodal}], google_llm=[{google_llm}],"
               f" user_file_bytes=[{user_file_bytes_log}],"
               f" user_file_urls=[{user_file_urls}],"
-              f" user_file_type=[{user_file_type}].")
+              f" user_file_type=[{user_file_types}].")
 
   # TODO: Consider images in chat
   prompt_list = []
@@ -525,6 +532,8 @@ async def google_llm_predict(prompt: str, is_chat: bool, is_multimodal: bool,
         prompt_list.append(f"Human input: {content}")
       elif UserChat.is_ai(entry):
         prompt_list.append(f"AI response: {content}")
+      # prompt_list includes only text (no images/video)
+      # from user_chat.history
   prompt_list.append(prompt)
   context_prompt = "\n\n".join(prompt_list)
 
@@ -555,12 +564,16 @@ async def google_llm_predict(prompt: str, is_chat: bool, is_multimodal: bool,
         if is_multimodal:
           user_file_parts = []
           if user_file_bytes is not None:
+            # user_file_bytes refers to a single image and so we index into
+            # user_file_types (a list) to get a single mime type
             user_file_parts = [Part.from_data(user_file_bytes,
-                                              mime_type=user_file_type)]
+                                              mime_type=user_file_types[0])]
           elif user_file_urls is not None:
+            # user_file_urls and user_file_types are same-length lists
+            # referring to one or more images
             user_file_parts = [
               Part.from_uri(user_file_url, mime_type=user_file_type)
-              for user_file_url in user_file_urls
+              for user_file_url, user_file_type in zip(user_file_urls, user_file_types)
             ]
           else:
             raise RuntimeError(
diff --git a/components/llm_service/src/services/llm_generate_test.py b/components/llm_service/src/services/llm_generate_test.py
@@ -165,7 +165,7 @@ async def test_llm_generate_multimodal(clean_firestore):
   return_value=FAKE_GOOGLE_RESPONSE):
     response = await llm_generate_multimodal(FAKE_PROMPT,
                                         VERTEX_LLM_TYPE_GEMINI_PRO_VISION,
-                                        FAKE_FILE_TYPE,
+                                        [FAKE_FILE_TYPE],
                                         fake_file_bytes)
   fake_file.close()
   assert response == FAKE_GENERATE_RESPONSE
diff --git a/components/llm_service/src/services/query/query_prompts.py b/components/llm_service/src/services/query/query_prompts.py
@@ -31,7 +31,11 @@ def get_question_prompt(prompt: str,
   """ Create question prompt with context for LLM """
   Logger.info(f"Creating question prompt with context "
               f"for LLM prompt=[{prompt}]")
-  context_list = [ref.document_text for ref in query_context]
+  context_list = []
+  for ref in query_context:
+    if hasattr(ref, "modality") and ref.modality=="text":
+      if hasattr(ref, "document_text"):
+        context_list.append(ref.document_text)
   text_context = "\n\n".join(context_list)
 
   if llm_type == TRUSS_LLM_LLAMA2_CHAT:
diff --git a/components/llm_service/src/services/query/query_service.py b/components/llm_service/src/services/query/query_service.py
@@ -57,6 +57,7 @@
                                           delete_vertex_search)
 from utils.errors import (NoDocumentsIndexedException,
                           ContextWindowExceededException)
+from utils.file_helper import validate_multimodal_file_type
 from utils import text_helper
 from config import (PROJECT_ID, DEFAULT_QUERY_CHAT_MODEL,
                     DEFAULT_MULTIMODAL_LLM_TYPE,
@@ -176,14 +177,32 @@ async def query_generate(
         prompt, None, user_id, q_engine, query_references, user_query)
 
   # generate question prompt
+  # (from user's text prompt plus text info in query_references)
   question_prompt, query_references = \
       await generate_question_prompt(prompt,
                                      llm_type,
                                      query_references,
                                      user_query)
 
-  # send prompt to model
-  question_response = await llm_chat(question_prompt, llm_type)
+  # generate list of URLs for additional context
+  # (from non-text info in query_references)
+  context_urls = []
+  context_urls_mimetype = []
+  for ref in query_references:
+    if hasattr(ref, "modality") and ref.modality != "text":
+      if hasattr(ref, "chunk_url"):
+        ref_filename = ref.chunk_url
+        ref_mimetype = validate_multimodal_file_type(file_name=ref_filename,
+                                                     file_b64=None)
+        context_urls.append(ref_filename)
+        context_urls_mimetype.append(ref_mimetype)
+        # TODO: If ref is a video chunk, then update ref.chunk_url
+        # according to ref.timestamp_start and ref.timestamp_stop
+
+  # send prompt and additional context to model
+  question_response = await llm_chat(question_prompt, llm_type,
+                                     chat_file_types=context_urls_mimetype,
+                                     chat_file_urls=context_urls)
 
   # update user query with response
   if user_query:
@@ -650,7 +669,7 @@ def update_user_query(prompt: str,
                       query_references: List[QueryReference],
                       user_query: UserQuery = None,
                       query_filter=None) -> \
-                      Tuple[UserQuery, dict]:
+                      Tuple[UserQuery, List[dict]]:
   """ Save user query history """
   query_reference_dicts = [
     ref.get_fields(reformat_datetime=True) for ref in query_references