Skip to content

Commit 734f5b6

Browse files
committed
Fixes up PDF summarizer example
The request wasn't wired correctly through on the server. When posting files, you have to use the Form object in fastapi if you're also passing in other information. Also changes DAG to make it clearer to track what the final content passed to the LLM would be.
1 parent 2641aae commit 734f5b6

File tree

6 files changed

+25
-19
lines changed

6 files changed

+25
-19
lines changed

examples/LLM_Workflows/pdf_summarizer/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,5 +22,5 @@ Here are a few ideas:
2222
3. Create a `.env` (next to `README.md` and `docker-compose.yaml`) and add your OpenAI API key in such that `OPENAI_API_KEY=YOUR_API_KEY`
2323
4. Build docker images `docker compose build`
2424
5. Create docker containers `docker compose up -d`
25-
6. Go to [http://localhost:8080/docs] to see if the FastAPI server is running
26-
7. Go to [http://localhost:8081/] to view the Streamlit app
25+
6. Go to [http://localhost:8080/docs](http://localhost:8080/docs) to see if the FastAPI server is running
26+
7. Go to [http://localhost:8081/](http://localhost:8081/) to view the Streamlit app

examples/LLM_Workflows/pdf_summarizer/backend/server.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,9 @@ class SummarizeResponse(pydantic.BaseModel):
4040
@app.post("/summarize")
4141
async def summarize_pdf(
4242
pdf_file: fastapi.UploadFile,
43-
openai_gpt_model: str = "gpt-3.5-turbo-0613",
44-
content_type: str = "Scientific article",
45-
user_query: str = "Can you ELI5 the paper?",
43+
openai_gpt_model: str = fastapi.Form(...), # = "gpt-3.5-turbo-0613",
44+
content_type: str = fastapi.Form(...), # = "Scientific article",
45+
user_query: str = fastapi.Form(...), # = "Can you ELI5 the paper?",
4646
) -> SummarizeResponse:
4747
"""Request `summarized_text` from Hamilton driver with `pdf_file` and `user_query`"""
4848
results = await async_dr.execute(
@@ -61,9 +61,9 @@ async def summarize_pdf(
6161
@app.post("/summarize_sync")
6262
def summarize_pdf_sync(
6363
pdf_file: fastapi.UploadFile,
64-
openai_gpt_model: str = "gpt-3.5-turbo-0613",
65-
content_type: str = "Scientific article",
66-
user_query: str = "Can you ELI5 the paper?",
64+
openai_gpt_model: str = fastapi.Form(...), # = "gpt-3.5-turbo-0613",
65+
content_type: str = fastapi.Form(...), # = "Scientific article",
66+
user_query: str = fastapi.Form(...), # = "Can you ELI5 the paper?",
6767
) -> SummarizeResponse:
6868
"""Request `summarized_text` from Hamilton driver with `pdf_file` and `user_query`"""
6969
results = sync_dr.execute(
@@ -81,7 +81,7 @@ def summarize_pdf_sync(
8181

8282
# add to SwaggerUI the execution DAG png
8383
# see http://localhost:8080/docs#/default/summarize_pdf_summarize_post
84-
base64_viz = base64.b64encode(open("summarize_route.png", "rb").read()).decode("utf-8")
84+
base64_viz = base64.b64encode(open("summarization_module.png", "rb").read()).decode("utf-8")
8585
app.routes[
8686
-1
8787
].description = f"""<h1>Execution DAG</h1><img alt="" src="data:image/png;base64,{base64_viz}"/>"""

examples/LLM_Workflows/pdf_summarizer/backend/summarization.py

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -124,16 +124,24 @@ def summarized_chunks(
124124
return _summarized_text
125125

126126

127+
def prompt_and_text_content(
128+
summarize_text_from_summaries_prompt: str, user_query: str, summarized_chunks: str
129+
) -> str:
130+
"""Creates the prompt for summarizing the text from the summarized chunks of the pdf.
131+
:param summarize_text_from_summaries_prompt: the template to use to summarize the chunks.
132+
:param user_query: the original user query.
133+
:param summarized_chunks: a long string of chunked summaries of a file.
134+
:return: the prompt to use to summarize the chunks.
135+
"""
136+
return summarize_text_from_summaries_prompt.format(query=user_query, results=summarized_chunks)
137+
138+
127139
def summarized_text(
128-
user_query: str,
129-
summarized_chunks: str,
130-
summarize_text_from_summaries_prompt: str,
140+
prompt_and_text_content: str,
131141
openai_gpt_model: str,
132142
) -> str:
133143
"""Summarizes the text from the summarized chunks of the pdf.
134-
:param user_query: the original user query.
135-
:param summarized_chunks: a long string of chunked summaries of a file.
136-
:param summarize_text_from_summaries_prompt: the template to use to summarize the chunks.
144+
:param prompt_and_text_content: the prompt and content to send over.
137145
:param openai_gpt_model: which openai gpt model to use.
138146
:return: the string response from the openai API.
139147
"""
@@ -142,9 +150,7 @@ def summarized_text(
142150
messages=[
143151
{
144152
"role": "user",
145-
"content": summarize_text_from_summaries_prompt.format(
146-
query=user_query, results=summarized_chunks
147-
),
153+
"content": prompt_and_text_content,
148154
}
149155
],
150156
temperature=0,
Loading
Binary file not shown.

examples/LLM_Workflows/pdf_summarizer/frontend/app.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ def post_pdf(
2525
response = requests.post(
2626
server_url,
2727
files=files,
28-
json=dict(
28+
data=dict(
2929
openai_gpt_model=openai_gpt_model,
3030
content_type=content_type,
3131
user_query=user_query,

0 commit comments

Comments
 (0)