Skip to content

Commit aa6c5eb

Browse files
committed
feat(beetmover): allow not failing on unknown mimetypes
This is needed to support work from mozilla-releng#1141 where we're uploading all artifacts from upstream tasks, and can't reasonably predict all possible file types and extensions. This also fixes a minor bug in the `upload_to_s3` tests, where they implicitly rely on `setup_mimetypes()` being called elsewhere to pass (which means they fail mysteriously if run independently).
1 parent 2a1d96e commit aa6c5eb

File tree

4 files changed

+63
-14
lines changed

4 files changed

+63
-14
lines changed

beetmoverscript/src/beetmoverscript/gcloud.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -102,11 +102,14 @@ def setup_gcs_credentials(raw_creds):
102102
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = fp.name
103103

104104

105-
async def upload_to_gcs(context, target_path, path, expiry=None):
105+
async def upload_to_gcs(context, target_path, path, expiry=None, fail_on_unknown_mimetype=True):
106106
product = get_product_name(context.task, context.config)
107107
mime_type = mimetypes.guess_type(path)[0]
108108
if not mime_type:
109-
raise ScriptWorkerTaskException("Unable to discover valid mime-type for path ({}), mimetypes.guess_type() returned {}".format(path, mime_type))
109+
if fail_on_unknown_mimetype:
110+
raise ScriptWorkerTaskException("Unable to discover valid mime-type for path ({}), mimetypes.guess_type() returned {}".format(path, mime_type))
111+
else:
112+
mime_type = "application/octet-stream"
110113
bucket_name = get_bucket_name(context, product, "gcloud")
111114

112115
bucket = Bucket(context.gcs_client, name=bucket_name)

beetmoverscript/src/beetmoverscript/script.py

+16-5
Original file line numberDiff line numberDiff line change
@@ -650,7 +650,7 @@ def enrich_balrog_manifest(context, locale):
650650

651651

652652
# retry_upload {{{1
653-
async def retry_upload(context, destinations, path, expiry=None):
653+
async def retry_upload(context, destinations, path, expiry=None, fail_on_unknown_mimetype=True):
654654
"""Manage upload of `path` to `destinations`."""
655655
cloud_uploads = {key: [] for key in context.config["clouds"]}
656656

@@ -661,11 +661,17 @@ async def retry_upload(context, destinations, path, expiry=None):
661661
# S3 upload
662662
enabled = is_cloud_enabled(context.config, "aws", context.resource)
663663
if enabled is True or (enabled == "buildhub-only" and path.endswith("buildhub.json")):
664-
cloud_uploads["aws"].append(asyncio.ensure_future(upload_to_s3(context=context, s3_key=dest, path=path)))
664+
cloud_uploads["aws"].append(
665+
asyncio.ensure_future(upload_to_s3(context=context, s3_key=dest, path=path, fail_on_unknown_mimetype=fail_on_unknown_mimetype))
666+
)
665667

666668
# GCS upload
667669
if is_cloud_enabled(context.config, "gcloud", context.resource):
668-
cloud_uploads["gcloud"].append(asyncio.ensure_future(upload_to_gcs(context=context, target_path=dest, path=path, expiry=expiry)))
670+
cloud_uploads["gcloud"].append(
671+
asyncio.ensure_future(
672+
upload_to_gcs(context=context, target_path=dest, path=path, expiry=expiry, fail_on_unknown_mimetype=fail_on_unknown_mimetype)
673+
)
674+
)
669675

670676
await await_and_raise_uploads(cloud_uploads, context.config["clouds"], context.resource)
671677

@@ -685,11 +691,15 @@ async def put(context, url, headers, abs_filename, session=None):
685691

686692

687693
# upload_to_s3 {{{1
688-
async def upload_to_s3(context, s3_key, path):
694+
async def upload_to_s3(context, s3_key, path, fail_on_unknown_mimetype=True):
689695
product = get_product_name(context.task, context.config)
690696
mime_type = mimetypes.guess_type(path)[0]
691697
if not mime_type:
692-
raise ScriptWorkerTaskException("Unable to discover valid mime-type for path ({}), mimetypes.guess_type() returned {}".format(path, mime_type))
698+
if fail_on_unknown_mimetype:
699+
raise ScriptWorkerTaskException("Unable to discover valid mime-type for path ({}), mimetypes.guess_type() returned {}".format(path, mime_type))
700+
else:
701+
mime_type = "application/octet-stream"
702+
693703
api_kwargs = {
694704
"Bucket": get_bucket_name(context, product, "aws"),
695705
"Key": s3_key,
@@ -699,6 +709,7 @@ async def upload_to_s3(context, s3_key, path):
699709
"Content-Type": mime_type,
700710
"Cache-Control": "public, max-age=%d" % CACHE_CONTROL_MAXAGE,
701711
}
712+
702713
creds = get_credentials(context, "aws")
703714
s3 = boto3.client("s3", aws_access_key_id=creds["id"], aws_secret_access_key=creds["key"])
704715
url = s3.generate_presigned_url("put_object", api_kwargs, ExpiresIn=1800, HttpMethod="PUT")

beetmoverscript/tests/test_gcloud.py

+13-7
Original file line numberDiff line numberDiff line change
@@ -147,21 +147,23 @@ def close(self):
147147

148148

149149
@pytest.mark.parametrize(
150-
"path,expiry,exists,raise_class",
150+
"path,expiry,exists,expected_mimetype,raise_class,fail_on_unknown_mimetype",
151151
(
152152
# Raise when can't find a mimetype
153-
("foo/nomimetype", None, False, ScriptWorkerTaskException),
153+
("foo/nomimetype", None, False, None, ScriptWorkerTaskException, True),
154+
# Don't raise when can't find a mimetype
155+
("foo/nomimetype", None, False, "application/octet-stream", None, False),
154156
# No expiration given
155-
("foo/target.zip", None, False, None),
157+
("foo/target.zip", None, False, "application/zip", None, True),
156158
# With expiration
157-
("foo/target.zip", datetime.now().isoformat(), False, None),
159+
("foo/target.zip", datetime.now().isoformat(), False, "application/zip", None, True),
158160
# With existing file
159-
("foo/target.zip", None, True, None),
161+
("foo/target.zip", None, True, "application/zip", None, True),
160162
),
161-
ids=["no mimetype", "no expiry", "with expiry", "with existing file"],
163+
ids=["no mimetype", "no mimetype no fail", "no expiry", "with expiry", "with existing file"],
162164
)
163165
@pytest.mark.asyncio
164-
async def test_upload_to_gcs(context, monkeypatch, path, expiry, exists, raise_class):
166+
async def test_upload_to_gcs(context, monkeypatch, path, expiry, exists, expected_mimetype, raise_class, fail_on_unknown_mimetype):
165167
context.gcs_client = FakeClient()
166168
blob = FakeClient.FakeBlob()
167169
blob._exists = exists
@@ -181,13 +183,15 @@ async def test_upload_to_gcs(context, monkeypatch, path, expiry, exists, raise_c
181183
target_path="path/target",
182184
path=path,
183185
expiry=expiry,
186+
fail_on_unknown_mimetype=fail_on_unknown_mimetype,
184187
)
185188
else:
186189
await beetmoverscript.gcloud.upload_to_gcs(
187190
context=context,
188191
target_path="path/target",
189192
path=path,
190193
expiry=expiry,
194+
fail_on_unknown_mimetype=fail_on_unknown_mimetype,
191195
)
192196

193197
if expiry:
@@ -199,6 +203,8 @@ async def test_upload_to_gcs(context, monkeypatch, path, expiry, exists, raise_c
199203
else:
200204
log_warn.assert_not_called()
201205

206+
assert blob.upload_from_filename.call_args[1].get("content_type") == expected_mimetype
207+
202208

203209
@pytest.mark.parametrize(
204210
"candidate_blobs,release_blobs,partner_match,raises",

beetmoverscript/tests/test_script.py

+29
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,7 @@ async def test_retry_upload(context, mocker):
208208
# upload_to_s3 {{{1
209209
@pytest.mark.asyncio
210210
async def test_upload_to_s3(context, mocker):
211+
setup_mimetypes()
211212
context.release_props["appName"] = "fake"
212213
mocker.patch.object(beetmoverscript.script, "retry_async", new=noop_async)
213214
mocker.patch.object(beetmoverscript.script, "boto3")
@@ -216,13 +217,41 @@ async def test_upload_to_s3(context, mocker):
216217

217218
@pytest.mark.asyncio
218219
async def test_upload_to_s3_raises(context, mocker):
220+
setup_mimetypes()
219221
context.release_props["appName"] = "fake"
220222
mocker.patch.object(beetmoverscript.script, "retry_async", new=noop_async)
221223
mocker.patch.object(beetmoverscript.script, "boto3")
222224
with pytest.raises(ScriptWorkerTaskException):
223225
await beetmoverscript.script.upload_to_s3(context, "foo", "mime.invalid")
224226

225227

228+
@pytest.mark.asyncio
229+
@pytest.mark.parametrize(
230+
"fail_on_unknown_mime_type",
231+
(
232+
pytest.param(
233+
True,
234+
id="fail_on_unknown_mime_type",
235+
),
236+
pytest.param(
237+
False,
238+
id="dont_fail_on_unknown_mime_type",
239+
),
240+
),
241+
)
242+
async def test_upload_to_s3_fail_on_missing_mime_type(context, mocker, fail_on_unknown_mime_type):
243+
setup_mimetypes()
244+
context.release_props["appName"] = "fake"
245+
mocked_retry_async = mocker.patch.object(beetmoverscript.script, "retry_async")
246+
mocker.patch.object(beetmoverscript.script, "boto3")
247+
if fail_on_unknown_mime_type:
248+
with pytest.raises(ScriptWorkerTaskException):
249+
await beetmoverscript.script.upload_to_s3(context, "foo", "mime.invalid", fail_on_unknown_mime_type)
250+
else:
251+
await beetmoverscript.script.upload_to_s3(context, "foo", "mime.invalid", fail_on_unknown_mime_type)
252+
assert mocked_retry_async.call_args[1]["args"][2].get("Content-Type") == "application/octet-stream"
253+
254+
226255
@pytest.fixture
227256
def restore_buildhub_file():
228257
original_location = "tests/test_work_dir/cot/eSzfNqMZT_mSiQQXu8hyqg/public/build/buildhub.json"

0 commit comments

Comments
 (0)