Skip to content

Commit faafa09

Browse files
andrewsgcojenco
authored andcommitted
feat: media operation retries can be configured using the same interface as with non-media operation (googleapis#447)
All media operation calls (downloads and uploads) can be configured with Retry objects and ConditionalRetryPolicy objects, nearly identically to non-media operations. This is accomplished by converting the Retry object to a google-resumable-media-python library RetryStrategy object at the point of entry to that library. Custom predicates of Retry objects (for instance set with Retry(predicate=...)) are not supported for media operations; they will be replaced with a media-operation-specific predicate. This change is backwards-compatible for users of public methods using num_retries arguments to configure uploads; num_retries continue to be supported but the deprecation warning remains in effect. They will be fully removed and replaced with Retry objects in the future. With this change, the default parameters for a media operations retry changes to be uniform with non-media operation retries. Specifically, the retry deadline for media operation retries becomes 120 seconds unless otherwise configured.
1 parent 9503b08 commit faafa09

File tree

9 files changed

+1127
-100
lines changed

9 files changed

+1127
-100
lines changed

google/cloud/storage/_helpers.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import os
2424

2525
from six.moves.urllib.parse import urlsplit
26+
from google import resumable_media
2627
from google.cloud.storage.constants import _DEFAULT_TIMEOUT
2728
from google.cloud.storage.retry import DEFAULT_RETRY
2829
from google.cloud.storage.retry import DEFAULT_RETRY_IF_METAGENERATION_SPECIFIED
@@ -45,6 +46,12 @@
4546
("if_source_metageneration_not_match", "ifSourceMetagenerationNotMatch"),
4647
)
4748

49+
_NUM_RETRIES_MESSAGE = (
50+
"`num_retries` has been deprecated and will be removed in a future "
51+
"release. Use the `retry` argument with a Retry or ConditionalRetryPolicy "
52+
"object, or None, instead."
53+
)
54+
4855

4956
def _get_storage_host():
5057
return os.environ.get(STORAGE_EMULATOR_ENV_VAR, _DEFAULT_STORAGE_HOST)
@@ -524,3 +531,37 @@ def _bucket_bound_hostname_url(host, scheme=None):
524531
return host
525532

526533
return "{scheme}://{host}/".format(scheme=scheme, host=host)
534+
535+
536+
def _api_core_retry_to_resumable_media_retry(retry, num_retries=None):
537+
"""Convert google.api.core.Retry to google.resumable_media.RetryStrategy.
538+
539+
Custom predicates are not translated.
540+
541+
:type retry: google.api_core.Retry
542+
:param retry: (Optional) The google.api_core.Retry object to translate.
543+
544+
:type num_retries: int
545+
:param num_retries: (Optional) The number of retries desired. This is
546+
supported for backwards compatibility and is mutually exclusive with
547+
`retry`.
548+
549+
:rtype: google.resumable_media.RetryStrategy
550+
:returns: A RetryStrategy with all applicable attributes copied from input,
551+
or a RetryStrategy with max_retries set to 0 if None was input.
552+
"""
553+
554+
if retry is not None and num_retries is not None:
555+
raise ValueError("num_retries and retry arguments are mutually exclusive")
556+
557+
elif retry is not None:
558+
return resumable_media.RetryStrategy(
559+
max_sleep=retry._maximum,
560+
max_cumulative_retry=retry._deadline,
561+
initial_delay=retry._initial,
562+
multiplier=retry._multiplier,
563+
)
564+
elif num_retries is not None:
565+
return resumable_media.RetryStrategy(max_retries=num_retries)
566+
else:
567+
return resumable_media.RetryStrategy(max_retries=0)

google/cloud/storage/blob.py

Lines changed: 320 additions & 30 deletions
Large diffs are not rendered by default.

google/cloud/storage/client.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353
from google.cloud.storage.acl import DefaultObjectACL
5454
from google.cloud.storage.constants import _DEFAULT_TIMEOUT
5555
from google.cloud.storage.retry import DEFAULT_RETRY
56+
from google.cloud.storage.retry import ConditionalRetryPolicy
5657

5758

5859
_marker = object()
@@ -972,6 +973,7 @@ def download_blob_to_file(
972973
if_metageneration_not_match=None,
973974
timeout=_DEFAULT_TIMEOUT,
974975
checksum="md5",
976+
retry=DEFAULT_RETRY,
975977
):
976978
"""Download the contents of a blob object or blob URI into a file-like object.
977979
@@ -1021,6 +1023,27 @@ def download_blob_to_file(
10211023
downloads where chunk_size is set) an INFO-level log will be
10221024
emitted. Supported values are "md5", "crc32c" and None. The default
10231025
is "md5".
1026+
retry (google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy)
1027+
(Optional) How to retry the RPC. A None value will disable
1028+
retries. A google.api_core.retry.Retry value will enable retries,
1029+
and the object will define retriable response codes and errors and
1030+
configure backoff and timeout options.
1031+
1032+
A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
1033+
Retry object and activates it only if certain conditions are met.
1034+
This class exists to provide safe defaults for RPC calls that are
1035+
not technically safe to retry normally (due to potential data
1036+
duplication or other side-effects) but become safe to retry if a
1037+
condition such as if_metageneration_match is set.
1038+
1039+
See the retry.py source code and docstrings in this package
1040+
(google.cloud.storage.retry) for information on retry types and how
1041+
to configure them.
1042+
1043+
Media operations (downloads and uploads) do not support non-default
1044+
predicates in a Retry object. The default will always be used. Other
1045+
configuration changes for Retry objects such as delays and deadlines
1046+
are respected.
10241047
10251048
Examples:
10261049
Download a blob using a blob resource.
@@ -1046,6 +1069,19 @@ def download_blob_to_file(
10461069
10471070
10481071
"""
1072+
1073+
# Handle ConditionalRetryPolicy.
1074+
if isinstance(retry, ConditionalRetryPolicy):
1075+
# Conditional retries are designed for non-media calls, which change
1076+
# arguments into query_params dictionaries. Media operations work
1077+
# differently, so here we make a "fake" query_params to feed to the
1078+
# ConditionalRetryPolicy.
1079+
query_params = {
1080+
"ifGenerationMatch": if_generation_match,
1081+
"ifMetagenerationMatch": if_metageneration_match,
1082+
}
1083+
retry = retry.get_retry_policy_if_conditions_met(query_params=query_params)
1084+
10491085
if not isinstance(blob_or_uri, Blob):
10501086
blob_or_uri = Blob.from_string(blob_or_uri)
10511087
download_url = blob_or_uri._get_download_url(
@@ -1070,6 +1106,7 @@ def download_blob_to_file(
10701106
raw_download,
10711107
timeout=timeout,
10721108
checksum=checksum,
1109+
retry=retry,
10731110
)
10741111
except resumable_media.InvalidResponse as exc:
10751112
_raise_from_invalid_response(exc)
@@ -1222,6 +1259,8 @@ def list_blobs(
12221259
max_results=max_results,
12231260
extra_params=extra_params,
12241261
page_start=_blobs_page_start,
1262+
timeout=timeout,
1263+
retry=retry,
12251264
)
12261265
iterator.bucket = bucket
12271266
iterator.prefixes = set()

google/cloud/storage/fileio.py

Lines changed: 89 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,14 @@
1313
# limitations under the License.
1414

1515
import io
16+
import warnings
1617

1718
from google.api_core.exceptions import RequestRangeNotSatisfiable
19+
from google.cloud.storage._helpers import _NUM_RETRIES_MESSAGE
20+
from google.cloud.storage.retry import DEFAULT_RETRY
21+
from google.cloud.storage.retry import DEFAULT_RETRY_IF_METAGENERATION_SPECIFIED
22+
from google.cloud.storage.retry import ConditionalRetryPolicy
23+
1824

1925
# Resumable uploads require a chunk size of precisely a multiple of 256 KiB.
2026
CHUNK_SIZE_MULTIPLE = 256 * 1024 # 256 KiB
@@ -28,20 +34,22 @@
2834
"if_metageneration_match",
2935
"if_metageneration_not_match",
3036
"timeout",
37+
"retry",
3138
}
3239

3340
# Valid keyword arguments for upload methods.
3441
# Note: Changes here need to be reflected in the blob.open() docstring.
3542
VALID_UPLOAD_KWARGS = {
3643
"content_type",
37-
"num_retries",
3844
"predefined_acl",
45+
"num_retries",
3946
"if_generation_match",
4047
"if_generation_not_match",
4148
"if_metageneration_match",
4249
"if_metageneration_not_match",
4350
"timeout",
4451
"checksum",
52+
"retry",
4553
}
4654

4755

@@ -58,13 +66,35 @@ class BlobReader(io.BufferedIOBase):
5866
bytes than the chunk_size are requested, the remainder is buffered.
5967
The default is the chunk_size of the blob, or 40MiB.
6068
69+
:type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
70+
:param retry: (Optional) How to retry the RPC. A None value will disable
71+
retries. A google.api_core.retry.Retry value will enable retries,
72+
and the object will define retriable response codes and errors and
73+
configure backoff and timeout options.
74+
75+
A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
76+
Retry object and activates it only if certain conditions are met.
77+
This class exists to provide safe defaults for RPC calls that are
78+
not technically safe to retry normally (due to potential data
79+
duplication or other side-effects) but become safe to retry if a
80+
condition such as if_metageneration_match is set.
81+
82+
See the retry.py source code and docstrings in this package
83+
(google.cloud.storage.retry) for information on retry types and how
84+
to configure them.
85+
86+
Media operations (downloads and uploads) do not support non-default
87+
predicates in a Retry object. The default will always be used. Other
88+
configuration changes for Retry objects such as delays and deadlines
89+
are respected.
90+
6191
:param download_kwargs: Keyword arguments to pass to the underlying API
6292
calls. The following arguments are supported: "if_generation_match",
6393
"if_generation_not_match", "if_metageneration_match",
6494
"if_metageneration_not_match", "timeout".
6595
"""
6696

67-
def __init__(self, blob, chunk_size=None, **download_kwargs):
97+
def __init__(self, blob, chunk_size=None, retry=DEFAULT_RETRY, **download_kwargs):
6898
"""docstring note that download_kwargs also used for reload()"""
6999
for kwarg in download_kwargs:
70100
if kwarg not in VALID_DOWNLOAD_KWARGS:
@@ -76,6 +106,7 @@ def __init__(self, blob, chunk_size=None, **download_kwargs):
76106
self._pos = 0
77107
self._buffer = io.BytesIO()
78108
self._chunk_size = chunk_size or blob.chunk_size or DEFAULT_CHUNK_SIZE
109+
self._retry = retry
79110
self._download_kwargs = download_kwargs
80111

81112
def read(self, size=-1):
@@ -102,6 +133,7 @@ def read(self, size=-1):
102133
start=fetch_start,
103134
end=fetch_end,
104135
checksum=None,
136+
retry=self._retry,
105137
**self._download_kwargs
106138
)
107139
except RequestRangeNotSatisfiable:
@@ -197,14 +229,43 @@ class BlobWriter(io.BufferedIOBase):
197229
changes the behavior of flush() to conform to TextIOWrapper's
198230
expectations.
199231
232+
:type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
233+
:param retry: (Optional) How to retry the RPC. A None value will disable
234+
retries. A google.api_core.retry.Retry value will enable retries,
235+
and the object will define retriable response codes and errors and
236+
configure backoff and timeout options.
237+
238+
A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
239+
Retry object and activates it only if certain conditions are met.
240+
This class exists to provide safe defaults for RPC calls that are
241+
not technically safe to retry normally (due to potential data
242+
duplication or other side-effects) but become safe to retry if a
243+
condition such as if_metageneration_match is set.
244+
245+
See the retry.py source code and docstrings in this package
246+
(google.cloud.storage.retry) for information on retry types and how
247+
to configure them.
248+
249+
Media operations (downloads and uploads) do not support non-default
250+
predicates in a Retry object. The default will always be used. Other
251+
configuration changes for Retry objects such as delays and deadlines
252+
are respected.
253+
200254
:param upload_kwargs: Keyword arguments to pass to the underlying API
201255
calls. The following arguments are supported: "if_generation_match",
202256
"if_generation_not_match", "if_metageneration_match",
203257
"if_metageneration_not_match", "timeout", "content_type",
204258
"num_retries", "predefined_acl", "checksum".
205259
"""
206260

207-
def __init__(self, blob, chunk_size=None, text_mode=False, **upload_kwargs):
261+
def __init__(
262+
self,
263+
blob,
264+
chunk_size=None,
265+
text_mode=False,
266+
retry=DEFAULT_RETRY_IF_METAGENERATION_SPECIFIED,
267+
**upload_kwargs
268+
):
208269
for kwarg in upload_kwargs:
209270
if kwarg not in VALID_UPLOAD_KWARGS:
210271
raise ValueError(
@@ -219,6 +280,7 @@ def __init__(self, blob, chunk_size=None, text_mode=False, **upload_kwargs):
219280
# In text mode this class will be wrapped and TextIOWrapper requires a
220281
# different behavior of flush().
221282
self._text_mode = text_mode
283+
self._retry = retry
222284
self._upload_kwargs = upload_kwargs
223285

224286
@property
@@ -259,20 +321,32 @@ def write(self, b):
259321
return pos
260322

261323
def _initiate_upload(self):
324+
# num_retries is only supported for backwards-compatibility reasons.
262325
num_retries = self._upload_kwargs.pop("num_retries", None)
326+
retry = self._retry
263327
content_type = self._upload_kwargs.pop("content_type", None)
264328

265-
if (
266-
self._upload_kwargs.get("if_metageneration_match") is None
267-
and num_retries is None
268-
):
269-
# Uploads are only idempotent (safe to retry) if
270-
# if_metageneration_match is set. If it is not set, the default
271-
# num_retries should be 0. Note: Because retry logic for uploads is
272-
# provided by the google-resumable-media-python package, it doesn't
273-
# use the ConditionalRetryStrategy class used in other API calls in
274-
# this library to solve this problem.
275-
num_retries = 0
329+
if num_retries is not None:
330+
warnings.warn(_NUM_RETRIES_MESSAGE, DeprecationWarning, stacklevel=2)
331+
# num_retries and retry are mutually exclusive. If num_retries is
332+
# set and retry is exactly the default, then nullify retry for
333+
# backwards compatibility.
334+
if retry is DEFAULT_RETRY_IF_METAGENERATION_SPECIFIED:
335+
retry = None
336+
337+
# Handle ConditionalRetryPolicy.
338+
if isinstance(retry, ConditionalRetryPolicy):
339+
# Conditional retries are designed for non-media calls, which change
340+
# arguments into query_params dictionaries. Media operations work
341+
# differently, so here we make a "fake" query_params to feed to the
342+
# ConditionalRetryPolicy.
343+
query_params = {
344+
"ifGenerationMatch": self._upload_kwargs.get("if_generation_match"),
345+
"ifMetagenerationMatch": self._upload_kwargs.get(
346+
"if_metageneration_match"
347+
),
348+
}
349+
retry = retry.get_retry_policy_if_conditions_met(query_params=query_params)
276350

277351
self._upload_and_transport = self._blob._initiate_resumable_upload(
278352
self._blob.bucket.client,
@@ -281,6 +355,7 @@ def _initiate_upload(self):
281355
None,
282356
num_retries,
283357
chunk_size=self._chunk_size,
358+
retry=retry,
284359
**self._upload_kwargs
285360
)
286361

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
dependencies = [
3131
"google-auth >= 1.11.0, < 2.0dev",
3232
"google-cloud-core >= 1.4.1, < 2.0dev",
33-
"google-resumable-media >= 1.2.0, < 2.0dev",
33+
"google-resumable-media >= 1.3.0, < 2.0dev",
3434
"requests >= 2.18.0, < 3.0.0dev",
3535
"googleapis-common-protos < 1.53.0; python_version<'3.0'",
3636
]

tests/unit/test__helpers.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -593,6 +593,45 @@ def test_hostname_and_scheme(self):
593593
self.assertEqual(self._call_fut(host=HOST, scheme=SCHEME), EXPECTED_URL)
594594

595595

596+
class Test__api_core_retry_to_resumable_media_retry(unittest.TestCase):
597+
def test_conflict(self):
598+
from google.cloud.storage._helpers import (
599+
_api_core_retry_to_resumable_media_retry,
600+
)
601+
602+
with self.assertRaises(ValueError):
603+
_api_core_retry_to_resumable_media_retry(retry=DEFAULT_RETRY, num_retries=2)
604+
605+
def test_retry(self):
606+
from google.cloud.storage._helpers import (
607+
_api_core_retry_to_resumable_media_retry,
608+
)
609+
610+
retry_strategy = _api_core_retry_to_resumable_media_retry(retry=DEFAULT_RETRY)
611+
self.assertEqual(retry_strategy.max_sleep, DEFAULT_RETRY._maximum)
612+
self.assertEqual(retry_strategy.max_cumulative_retry, DEFAULT_RETRY._deadline)
613+
self.assertEqual(retry_strategy.initial_delay, DEFAULT_RETRY._initial)
614+
self.assertEqual(retry_strategy.multiplier, DEFAULT_RETRY._multiplier)
615+
616+
def test_num_retries(self):
617+
from google.cloud.storage._helpers import (
618+
_api_core_retry_to_resumable_media_retry,
619+
)
620+
621+
retry_strategy = _api_core_retry_to_resumable_media_retry(
622+
retry=None, num_retries=2
623+
)
624+
self.assertEqual(retry_strategy.max_retries, 2)
625+
626+
def test_none(self):
627+
from google.cloud.storage._helpers import (
628+
_api_core_retry_to_resumable_media_retry,
629+
)
630+
631+
retry_strategy = _api_core_retry_to_resumable_media_retry(retry=None)
632+
self.assertEqual(retry_strategy.max_retries, 0)
633+
634+
596635
class _MD5Hash(object):
597636
def __init__(self, digest_val):
598637
self.digest_val = digest_val

0 commit comments

Comments
 (0)