-
Notifications
You must be signed in to change notification settings - Fork 5.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[agent, browsing] Support viewing pdf and png/jpg via browser #7457
Changes from 26 commits
e026049
11f0cc5
18e3e42
1a4561a
70ce3c7
dd26952
7d67121
e9f4956
323233a
6b4b105
727dbf6
a54c855
0a57d98
678cc5e
120dae0
422248a
d74c79d
92c9022
93f6ad7
799779a
0f0fda8
88d2dea
7a4436f
3a50204
b5d7af5
09194bf
5d58c43
273437f
a682e83
55704f5
e52dd8f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,146 @@ | ||
""" | ||
Utility module for generating file viewer HTML content. | ||
""" | ||
|
||
import base64 | ||
import mimetypes | ||
import os | ||
|
||
|
||
def generate_file_viewer_html(file_path: str) -> str: | ||
""" | ||
Generate HTML content for viewing different file types. | ||
|
||
Args: | ||
file_path: The absolute path to the file | ||
|
||
Returns: | ||
str: HTML content for viewing the file | ||
|
||
Raises: | ||
ValueError: If the file extension is not supported | ||
""" | ||
file_extension = os.path.splitext(file_path)[1].lower() | ||
file_name = os.path.basename(file_path) | ||
|
||
# Define supported file extensions | ||
supported_extensions = [ | ||
'.pdf', | ||
'.png', | ||
'.jpg', | ||
'.jpeg', | ||
'.gif', | ||
] | ||
|
||
# Check if the file extension is supported | ||
if file_extension not in supported_extensions: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @adityasoni9998 I remember you found out that files downloaded by browsergym always miss extensions in their filenames, right? Maybe we should handle that case here as well and try to guess the file type? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Anyways this is non-blocking There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, i think we can do that on the browsergym side of work |
||
raise ValueError( | ||
f"Unsupported file extension: {file_extension}. " | ||
f"Supported extensions are: {', '.join(supported_extensions)}" | ||
) | ||
|
||
# Check if the file exists | ||
if not os.path.exists(file_path): | ||
raise ValueError( | ||
f'File not found locally: {file_path}. Please download the file to the local machine and try again.' | ||
) | ||
|
||
# Read file content directly | ||
file_content = None | ||
mime_type = mimetypes.guess_type(file_path)[0] or 'application/octet-stream' | ||
|
||
# For binary files (images, PDFs), encode as base64 | ||
if file_extension in ['.pdf', '.png', '.jpg', '.jpeg', '.gif', '.bmp']: | ||
with open(file_path, 'rb') as file: | ||
file_content = base64.b64encode(file.read()).decode('utf-8') | ||
# For text files, read as text | ||
else: | ||
with open(file_path, 'r', encoding='utf-8') as file: | ||
file_content = file.read() | ||
|
||
return f"""<!DOCTYPE html> | ||
<html lang="en"> | ||
<head> | ||
<meta charset="UTF-8"> | ||
<meta name="viewport" content="width=device-width, initial-scale=1.0"> | ||
<title>File Viewer - {file_name}</title> | ||
<script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.11.174/pdf.min.js"></script> | ||
<style> | ||
body, html {{ margin: 0; padding: 0; height: 100%; overflow: hidden; font-family: Arial, sans-serif; }} | ||
#viewer-container {{ width: 100%; height: 100vh; overflow: auto; }} | ||
.page {{ margin: 10px auto; box-shadow: 0 0 10px rgba(0,0,0,0.3); }} | ||
.text-content {{ margin: 20px; white-space: pre-wrap; font-family: monospace; line-height: 1.5; }} | ||
.error {{ color: red; margin: 20px; }} | ||
img {{ max-width: 100%; margin: 20px auto; display: block; }} | ||
</style> | ||
</head> | ||
<body> | ||
<div id="viewer-container"></div> | ||
<script> | ||
const filePath = "{file_path}"; | ||
const fileExtension = "{file_extension}"; | ||
const fileContent = `{file_content if file_extension not in ['.pdf', '.png', '.jpg', '.jpeg', '.gif', '.bmp'] else ''}`; | ||
const fileBase64 = "{file_content if file_extension in ['.pdf', '.png', '.jpg', '.jpeg', '.gif', '.bmp'] else ''}"; | ||
const mimeType = "{mime_type}"; | ||
const container = document.getElementById('viewer-container'); | ||
|
||
async function loadContent() {{ | ||
try {{ | ||
if (fileExtension === '.pdf') {{ | ||
pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.11.174/pdf.worker.min.js'; | ||
const binaryString = atob(fileBase64); | ||
const bytes = new Uint8Array(binaryString.length); | ||
for (let i = 0; i < binaryString.length; i++) {{ | ||
bytes[i] = binaryString.charCodeAt(i); | ||
}} | ||
|
||
const loadingTask = pdfjsLib.getDocument({{data: bytes.buffer}}); | ||
const pdf = await loadingTask.promise; | ||
|
||
// Get total number of pages | ||
const numPages = pdf.numPages; | ||
|
||
// Render each page | ||
for (let pageNum = 1; pageNum <= numPages; pageNum++) {{ | ||
const page = await pdf.getPage(pageNum); | ||
|
||
// Set scale for rendering | ||
const viewport = page.getViewport({{ scale: 1.5 }}); | ||
|
||
// Create canvas for rendering | ||
const canvas = document.createElement('canvas'); | ||
canvas.className = 'page'; | ||
canvas.width = viewport.width; | ||
canvas.height = viewport.height; | ||
container.appendChild(canvas); | ||
|
||
// Render PDF page into canvas context | ||
const context = canvas.getContext('2d'); | ||
const renderContext = {{ | ||
canvasContext: context, | ||
viewport: viewport | ||
}}; | ||
|
||
await page.render(renderContext).promise; | ||
}} | ||
}} else if (['.png', '.jpg', '.jpeg', '.gif', '.bmp'].includes(fileExtension)) {{ | ||
const img = document.createElement('img'); | ||
img.src = `data:${{mimeType}};base64,${{fileBase64}}`; | ||
img.alt = filePath.split('/').pop(); | ||
container.appendChild(img); | ||
}} else {{ | ||
const pre = document.createElement('pre'); | ||
pre.className = 'text-content'; | ||
pre.textContent = fileContent; | ||
container.appendChild(pre); | ||
}} | ||
}} catch (error) {{ | ||
console.error('Error:', error); | ||
container.innerHTML = `<div class="error"><h2>Error loading file</h2><p>${{error.message}}</p></div>`; | ||
}} | ||
}} | ||
|
||
window.onload = loadContent; | ||
</script> | ||
</body> | ||
</html>""" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I have a stupid question: isn't
server_url
always "127.0.0.1"?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yes, but the port could be very different across different environment (saas, docker), and we need to tell the agent the port 😢
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah yeah I forgot about that.
So in the long-term, do we want to make prompts templating languages, and then just fill in the port info during runtime?