Skip to content

Commit cc024e1

Browse files
authored
Merge pull request #925 from ScrapeGraphAI/pre/beta
Pre/beta
2 parents 5c3d62d + 7320c71 commit cc024e1

File tree

5 files changed

+38
-34
lines changed

5 files changed

+38
-34
lines changed

CHANGELOG.md

+3-12
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,10 @@
1-
## [1.38.1](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.38.0...v1.38.1) (2025-02-15)
1+
## [1.39.0-beta.1](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.38.1-beta.1...v1.39.0-beta.1) (2025-02-17)
22

33

4-
### Bug Fixes
5-
6-
* filter links ([04b9197](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/04b91972e88b69b722454d54c8635dfb49b38b44))
7-
8-
9-
### Test
10-
11-
* Add coverage improvement test for tests/test_scrape_do.py ([4ce6d1b](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/4ce6d1b94306d0ae94a74748726468a5132b7969))
12-
4+
### Features
135

14-
### CI
6+
* add the new handling exception ([5c0bc46](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/5c0bc46c6322ea07efa31d95819d7da47462f981))
157

16-
* **release:** 1.38.1-beta.1 [skip ci] ([83be82a](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/83be82a11e83eb2be60a945deac361c46526c785))
178

189
## [1.38.1-beta.1](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.38.0...v1.38.1-beta.1) (2025-02-13)
1910

docs/assets/api-banner.png

-10.8 KB
Loading

pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[project]
22
name = "scrapegraphai"
33

4-
version = "1.38.1"
4+
version = "1.39.0b1"
55

66

77
description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines."

scrapegraphai/helpers/models_tokens.py

+1
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@
8080
"llama3.2": 128000,
8181
"llama3.2:1b": 128000,
8282
"llama3.2:3b": 128000,
83+
"llama3.3": 128000,
8384
"llama3.3:70b": 128000,
8485
"scrapegraph": 8192,
8586
"mistral-small": 128000,

scrapegraphai/nodes/generate_answer_node.py

+33-21
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
"""
44

55
import time
6+
import json
67
from typing import List, Optional
78

89
from langchain.prompts import PromptTemplate
@@ -120,7 +121,11 @@ def execute(self, state: dict) -> dict:
120121
else:
121122
if not isinstance(self.llm_model, ChatBedrock):
122123
output_parser = JsonOutputParser()
123-
format_instructions = output_parser.get_format_instructions()
124+
format_instructions = (
125+
"You must respond with a JSON object. Your response should be formatted as a valid JSON "
126+
"with a 'content' field containing your analysis. For example:\n"
127+
'{"content": "your analysis here"}'
128+
)
124129
else:
125130
output_parser = None
126131
format_instructions = ""
@@ -131,13 +136,25 @@ def execute(self, state: dict) -> dict:
131136
and not self.script_creator
132137
or self.is_md_scraper
133138
):
134-
template_no_chunks_prompt = TEMPLATE_NO_CHUNKS_MD
135-
template_chunks_prompt = TEMPLATE_CHUNKS_MD
136-
template_merge_prompt = TEMPLATE_MERGE_MD
139+
template_no_chunks_prompt = (
140+
TEMPLATE_NO_CHUNKS_MD + "\n\nIMPORTANT: " + format_instructions
141+
)
142+
template_chunks_prompt = (
143+
TEMPLATE_CHUNKS_MD + "\n\nIMPORTANT: " + format_instructions
144+
)
145+
template_merge_prompt = (
146+
TEMPLATE_MERGE_MD + "\n\nIMPORTANT: " + format_instructions
147+
)
137148
else:
138-
template_no_chunks_prompt = TEMPLATE_NO_CHUNKS
139-
template_chunks_prompt = TEMPLATE_CHUNKS
140-
template_merge_prompt = TEMPLATE_MERGE
149+
template_no_chunks_prompt = (
150+
TEMPLATE_NO_CHUNKS + "\n\nIMPORTANT: " + format_instructions
151+
)
152+
template_chunks_prompt = (
153+
TEMPLATE_CHUNKS + "\n\nIMPORTANT: " + format_instructions
154+
)
155+
template_merge_prompt = (
156+
TEMPLATE_MERGE + "\n\nIMPORTANT: " + format_instructions
157+
)
141158

142159
if self.additional_info is not None:
143160
template_no_chunks_prompt = self.additional_info + template_no_chunks_prompt
@@ -161,8 +178,9 @@ def execute(self, state: dict) -> dict:
161178
answer = self.invoke_with_timeout(
162179
chain, {"question": user_prompt}, self.timeout
163180
)
164-
except Timeout:
165-
state.update({self.output[0]: {"error": "Response timeout exceeded"}})
181+
except (Timeout, json.JSONDecodeError) as e:
182+
error_msg = "Response timeout exceeded" if isinstance(e, Timeout) else "Invalid JSON response format"
183+
state.update({self.output[0]: {"error": error_msg, "raw_response": str(e)}})
166184
return state
167185

168186
state.update({self.output[0]: answer})
@@ -191,14 +209,9 @@ def execute(self, state: dict) -> dict:
191209
batch_results = self.invoke_with_timeout(
192210
async_runner, {"question": user_prompt}, self.timeout
193211
)
194-
except Timeout:
195-
state.update(
196-
{
197-
self.output[0]: {
198-
"error": "Response timeout exceeded during chunk processing"
199-
}
200-
}
201-
)
212+
except (Timeout, json.JSONDecodeError) as e:
213+
error_msg = "Response timeout exceeded during chunk processing" if isinstance(e, Timeout) else "Invalid JSON response format in chunk processing"
214+
state.update({self.output[0]: {"error": error_msg, "raw_response": str(e)}})
202215
return state
203216

204217
merge_prompt = PromptTemplate(
@@ -216,10 +229,9 @@ def execute(self, state: dict) -> dict:
216229
{"context": batch_results, "question": user_prompt},
217230
self.timeout,
218231
)
219-
except Timeout:
220-
state.update(
221-
{self.output[0]: {"error": "Response timeout exceeded during merge"}}
222-
)
232+
except (Timeout, json.JSONDecodeError) as e:
233+
error_msg = "Response timeout exceeded during merge" if isinstance(e, Timeout) else "Invalid JSON response format during merge"
234+
state.update({self.output[0]: {"error": error_msg, "raw_response": str(e)}})
223235
return state
224236

225237
state.update({self.output[0]: answer})

0 commit comments

Comments
 (0)