Skip to content

Commit 4141164

Browse files
committed
Enhance retry mechanism in ChatGoogleGenerativeAI with customizable parameters
- Updated `_create_retry_decorator` to accept parameters for max retries and exponential backoff settings. - Modified `_chat_with_retry` to utilize these parameters from `kwargs`. - Added handling for `ResourceExhausted` exceptions to respect the `retry_after` delay. - Introduced a new test to validate the retry decorator with custom parameters.
1 parent 0d40a4f commit 4141164

File tree

2 files changed

+46
-9
lines changed

2 files changed

+46
-9
lines changed

libs/genai/langchain_google_genai/chat_models.py

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import json
66
import logging
77
import mimetypes
8+
import time
89
import uuid
910
import warnings
1011
from difflib import get_close_matches
@@ -139,7 +140,12 @@ class ChatGoogleGenerativeAIError(GoogleGenerativeAIError):
139140
"""
140141

141142

142-
def _create_retry_decorator() -> Callable[[Any], Any]:
143+
def _create_retry_decorator(
144+
max_retries: int = 6,
145+
wait_exponential_multiplier: float = 2.0,
146+
wait_exponential_min: float = 1.0,
147+
wait_exponential_max: float = 60.0,
148+
) -> Callable[[Any], Any]:
143149
"""
144150
Creates and returns a preconfigured tenacity retry decorator.
145151
@@ -151,15 +157,14 @@ def _create_retry_decorator() -> Callable[[Any], Any]:
151157
Callable[[Any], Any]: A retry decorator configured for handling specific
152158
Google API exceptions.
153159
"""
154-
multiplier = 2
155-
min_seconds = 1
156-
max_seconds = 60
157-
max_retries = 2
158-
159160
return retry(
160161
reraise=True,
161162
stop=stop_after_attempt(max_retries),
162-
wait=wait_exponential(multiplier=multiplier, min=min_seconds, max=max_seconds),
163+
wait=wait_exponential(
164+
multiplier=wait_exponential_multiplier,
165+
min=wait_exponential_min,
166+
max=wait_exponential_max,
167+
),
163168
retry=(
164169
retry_if_exception_type(google.api_core.exceptions.ResourceExhausted)
165170
| retry_if_exception_type(google.api_core.exceptions.ServiceUnavailable)
@@ -184,13 +189,17 @@ def _chat_with_retry(generation_method: Callable, **kwargs: Any) -> Any:
184189
Returns:
185190
Any: The result from the chat generation method.
186191
"""
187-
retry_decorator = _create_retry_decorator()
192+
retry_decorator = _create_retry_decorator(
193+
max_retries=kwargs.get("max_retries", 6),
194+
wait_exponential_multiplier=kwargs.get("wait_exponential_multiplier", 2.0),
195+
wait_exponential_min=kwargs.get("wait_exponential_min", 1.0),
196+
wait_exponential_max=kwargs.get("wait_exponential_max", 60.0),
197+
)
188198

189199
@retry_decorator
190200
def _chat_with_retry(**kwargs: Any) -> Any:
191201
try:
192202
return generation_method(**kwargs)
193-
# Do not retry for these errors.
194203
except google.api_core.exceptions.FailedPrecondition as exc:
195204
if "location is not supported" in exc.message:
196205
error_msg = (
@@ -204,6 +213,13 @@ def _chat_with_retry(**kwargs: Any) -> Any:
204213
raise ChatGoogleGenerativeAIError(
205214
f"Invalid argument provided to Gemini: {e}"
206215
) from e
216+
except google.api_core.exceptions.ResourceExhausted as e:
217+
# Handle quota-exceeded error with recommended retry delay
218+
if hasattr(e, "retry_after") and e.retry_after < kwargs.get(
219+
"wait_exponential_max", 60.0
220+
):
221+
time.sleep(e.retry_after)
222+
raise e
207223
except Exception as e:
208224
raise e
209225

libs/genai/tests/unit_tests/test_chat_models.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
GenerateContentResponse,
1616
Part,
1717
)
18+
from google.api_core.exceptions import ResourceExhausted
1819
from langchain_core.load import dumps, loads
1920
from langchain_core.messages import (
2021
AIMessage,
@@ -30,6 +31,7 @@
3031

3132
from langchain_google_genai.chat_models import (
3233
ChatGoogleGenerativeAI,
34+
_chat_with_retry,
3335
_convert_tool_message_to_part,
3436
_parse_chat_history,
3537
_parse_response_candidate,
@@ -771,3 +773,22 @@ def test_model_kwargs() -> None:
771773
assert llm.model == "models/my-model"
772774
assert llm.convert_system_message_to_human is True
773775
assert llm.model_kwargs == {"foo": "bar"}
776+
777+
778+
def test_retry_decorator_with_custom_parameters() -> None:
779+
# Mock the generation method
780+
mock_generation_method = Mock()
781+
mock_generation_method.side_effect = ResourceExhausted("Quota exceeded")
782+
783+
# Call the function with custom retry parameters
784+
with pytest.raises(ResourceExhausted):
785+
_chat_with_retry(
786+
generation_method=mock_generation_method,
787+
max_retries=3,
788+
wait_exponential_multiplier=1.5,
789+
wait_exponential_min=2.0,
790+
wait_exponential_max=30.0,
791+
)
792+
793+
# Verify that the retry mechanism used the custom parameters
794+
assert mock_generation_method.call_count == 3

0 commit comments

Comments
 (0)