Fixes up PDF summarizer example

skrawcz · skrawcz · commit 734f5b6f7243 · 2023-08-05T23:32:37.000-07:00
The request wasn't wired correctly through on the server.
When posting files, you have to use the Form object in fastapi if
you're also passing in other information.

Also changes DAG to make it clearer to track what the final
content passed to the LLM would be.
diff --git a/examples/LLM_Workflows/pdf_summarizer/README.md b/examples/LLM_Workflows/pdf_summarizer/README.md
@@ -22,5 +22,5 @@ Here are a few ideas:
 3. Create a `.env` (next to `README.md` and `docker-compose.yaml`) and add your OpenAI API key in  such that `OPENAI_API_KEY=YOUR_API_KEY`
 4. Build docker images `docker compose build`
 5. Create docker containers `docker compose up -d`
-6. Go to [http://localhost:8080/docs] to see if the FastAPI server is running
-7. Go to [http://localhost:8081/] to view the Streamlit app
+6. Go to [http://localhost:8080/docs](http://localhost:8080/docs) to see if the FastAPI server is running
+7. Go to [http://localhost:8081/](http://localhost:8081/) to view the Streamlit app
diff --git a/examples/LLM_Workflows/pdf_summarizer/backend/server.py b/examples/LLM_Workflows/pdf_summarizer/backend/server.py
@@ -40,9 +40,9 @@ class SummarizeResponse(pydantic.BaseModel):
 @app.post("/summarize")
 async def summarize_pdf(
     pdf_file: fastapi.UploadFile,
-    openai_gpt_model: str = "gpt-3.5-turbo-0613",
-    content_type: str = "Scientific article",
-    user_query: str = "Can you ELI5 the paper?",
+    openai_gpt_model: str = fastapi.Form(...),  # = "gpt-3.5-turbo-0613",
+    content_type: str = fastapi.Form(...),  # = "Scientific article",
+    user_query: str = fastapi.Form(...),  # = "Can you ELI5 the paper?",
 ) -> SummarizeResponse:
     """Request `summarized_text` from Hamilton driver with `pdf_file` and `user_query`"""
     results = await async_dr.execute(
@@ -61,9 +61,9 @@ async def summarize_pdf(
 @app.post("/summarize_sync")
 def summarize_pdf_sync(
     pdf_file: fastapi.UploadFile,
-    openai_gpt_model: str = "gpt-3.5-turbo-0613",
-    content_type: str = "Scientific article",
-    user_query: str = "Can you ELI5 the paper?",
+    openai_gpt_model: str = fastapi.Form(...),  # = "gpt-3.5-turbo-0613",
+    content_type: str = fastapi.Form(...),  # = "Scientific article",
+    user_query: str = fastapi.Form(...),  # = "Can you ELI5 the paper?",
 ) -> SummarizeResponse:
     """Request `summarized_text` from Hamilton driver with `pdf_file` and `user_query`"""
     results = sync_dr.execute(
@@ -81,7 +81,7 @@ def summarize_pdf_sync(
 
 # add to SwaggerUI the execution DAG png
 # see http://localhost:8080/docs#/default/summarize_pdf_summarize_post
-base64_viz = base64.b64encode(open("summarize_route.png", "rb").read()).decode("utf-8")
+base64_viz = base64.b64encode(open("summarization_module.png", "rb").read()).decode("utf-8")
 app.routes[
     -1
 ].description = f"""<h1>Execution DAG</h1><img alt="" src="data:image/png;base64,{base64_viz}"/>"""
diff --git a/examples/LLM_Workflows/pdf_summarizer/backend/summarization.py b/examples/LLM_Workflows/pdf_summarizer/backend/summarization.py
@@ -124,16 +124,24 @@ def summarized_chunks(
     return _summarized_text
 
 
+def prompt_and_text_content(
+    summarize_text_from_summaries_prompt: str, user_query: str, summarized_chunks: str
+) -> str:
+    """Creates the prompt for summarizing the text from the summarized chunks of the pdf.
+    :param summarize_text_from_summaries_prompt: the template to use to summarize the chunks.
+    :param user_query: the original user query.
+    :param summarized_chunks: a long string of chunked summaries of a file.
+    :return: the prompt to use to summarize the chunks.
+    """
+    return summarize_text_from_summaries_prompt.format(query=user_query, results=summarized_chunks)
+
+
 def summarized_text(
-    user_query: str,
-    summarized_chunks: str,
-    summarize_text_from_summaries_prompt: str,
+    prompt_and_text_content: str,
     openai_gpt_model: str,
 ) -> str:
     """Summarizes the text from the summarized chunks of the pdf.
-    :param user_query: the original user query.
-    :param summarized_chunks: a long string of chunked summaries of a file.
-    :param summarize_text_from_summaries_prompt: the template to use to summarize the chunks.
+    :param prompt_and_text_content: the prompt and content to send over.
     :param openai_gpt_model: which openai gpt model to use.
     :return: the string response from the openai API.
     """
@@ -142,9 +150,7 @@ def summarized_text(
         messages=[
             {
                 "role": "user",
-                "content": summarize_text_from_summaries_prompt.format(
-                    query=user_query, results=summarized_chunks
-                ),
+                "content": prompt_and_text_content,
             }
         ],
         temperature=0,
diff --git a/examples/LLM_Workflows/pdf_summarizer/backend/summarization_module.png b/examples/LLM_Workflows/pdf_summarizer/backend/summarization_module.png
diff --git a/examples/LLM_Workflows/pdf_summarizer/backend/summarize_route.png b/examples/LLM_Workflows/pdf_summarizer/backend/summarize_route.png
diff --git a/examples/LLM_Workflows/pdf_summarizer/frontend/app.py b/examples/LLM_Workflows/pdf_summarizer/frontend/app.py
@@ -25,7 +25,7 @@ def post_pdf(
     response = requests.post(
         server_url,
         files=files,
-        json=dict(
+        data=dict(
             openai_gpt_model=openai_gpt_model,
             content_type=content_type,
             user_query=user_query,