Skip to content

Commit 91889d1

Browse files
authored
Fix training data discovery (#5)
* Fix package data inclusion * Fix getting outputs of a erroneously deployed stack * Remove ResponseMetadata from configuration * Remove ResponseMetadata from API response * Update data-capture destination * Fix contents look up * Fix typo * Remove stack outputs cashing * Add CopyZips function * Add template to package distribution * Remove template URL * Convert traffic_shadowing to package * Update source bucket * Rename function parameters * Update IAM policy * Update distribution bucket * Check response status_code before searching through objects * Check for number of training files first * Update stub for lambda tests
1 parent 6e4068e commit 91889d1

File tree

4 files changed

+45
-25
lines changed

4 files changed

+45
-25
lines changed

README.md

Lines changed: 18 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -20,31 +20,31 @@ The following rights are required for deploying traffic-shadowing stack.
2020
"Sid": "VisualEditor0",
2121
"Effect": "Allow",
2222
"Action": [
23-
"iam:GetRole",
24-
"iam:CreateRole",
25-
"iam:DeleteRole",
26-
"iam:GetRolePolicy",
27-
"iam:PutRolePolicy",
28-
"iam:DeleteRolePolicy",
29-
"iam:PassRole",
3023
"lambda:CreateFunction",
31-
"lambda:DeleteFunction",
24+
"iam:GetRole",
3225
"lambda:InvokeFunction",
3326
"lambda:ListVersionsByFunction",
27+
"iam:CreateRole",
28+
"iam:DeleteRole",
3429
"lambda:GetFunctionConfiguration",
35-
"lambda:PutFunctionConcurrency",
36-
"lambda:AddPermission",
37-
"lambda:RemovePermission"
38-
"lambda:PublishVersion",
30+
"iam:PutRolePolicy",
3931
"cloudformation:DescribeStacks",
32+
"lambda:PutFunctionConcurrency",
33+
"iam:PassRole",
4034
"cloudformation:DescribeStackEvents",
35+
"lambda:AddPermission",
4136
"cloudformation:CreateStack",
37+
"iam:DeleteRolePolicy",
4238
"cloudformation:DeleteStack",
39+
"lambda:DeleteFunction",
40+
"lambda:PublishVersion",
41+
"lambda:RemovePermission",
42+
"iam:GetRolePolicy"
4343
],
4444
"Resource": [
45-
"arn:aws:iam:::role/*",
46-
"arn:aws:cloudformation:::stack/*/*",
47-
"arn:aws:lambda:::function:*"
45+
"arn:aws:cloudformation:*:*:stack/*/*",
46+
"arn:aws:lambda:*:*:function:*",
47+
"arn:aws:iam::*:role/*"
4848
]
4949
},
5050
{
@@ -53,9 +53,10 @@ The following rights are required for deploying traffic-shadowing stack.
5353
"Action": [
5454
"s3:PutBucketNotification",
5555
"s3:CreateBucket",
56-
"s3:DeleteBucket"
57-
"s3:GetBucketLocation",
5856
"s3:GetBucketNotification",
57+
"s3:GetBucketLocation",
58+
"s3:DeleteBucket",
59+
"s3:GetObject"
5960
],
6061
"Resource": "*"
6162
}

aws/traffic_shadowing/src/utils.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -110,10 +110,15 @@ def get_largest_csv(self, bucket: str, prefix: str, model_name: str) -> str:
110110
"""Parse largest csv file from bucket/prefix."""
111111
path = '/'.join([prefix, model_name])
112112
response = self._s3_client.list_objects_v2(Bucket=bucket, Prefix=path)
113-
candidates = list(filter(
114-
lambda x: os.path.splitext(x['Key'])[1].lower() == '.csv',
115-
response.get('Contents')
116-
))
113+
if response['ResponseMetadata']['HTTPStatusCode'] != 200:
114+
logger.error(response)
115+
raise ValueError(f"Could not list objects from s3://{bucket}/{path}")
116+
candidates = []
117+
if response['KeyCount'] > 0:
118+
candidates = list(filter(
119+
lambda x: os.path.splitext(x['Key'])[1].lower() == '.csv',
120+
response['Contents']
121+
))
117122
if not candidates:
118123
raise errors.DataNotFound(f'Didn\'t find .csv training data under "{path}" path')
119124
candidates.sort(key=lambda x: x['Size'], reverse=True)

aws/traffic_shadowing/tests/stubs/http/aws.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,21 @@ def expected_params(self) -> dict:
2424
@property
2525
def service_response(self) -> dict:
2626
return {
27+
'ResponseMetadata': {
28+
'RequestId': 'xxxxxxxxxxxxxxxx',
29+
'HostId': 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx',
30+
'HTTPStatusCode': 200,
31+
'HTTPHeaders': {
32+
'x-amz-id-2': 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx',
33+
'x-amz-request-id': 'xxxxxxxxxxxxxxxx',
34+
'date': 'Wed, 20 May 2020 11:38:02 GMT',
35+
'x-amz-bucket-region': 'eu-west-3',
36+
'content-type': 'application/xml',
37+
'transfer-encoding': 'chunked',
38+
'server': 'AmazonS3'
39+
},
40+
'RetryAttempts': 1,
41+
},
2742
'IsTruncated': False,
2843
'Contents': [
2944
{
@@ -41,7 +56,6 @@ def service_response(self) -> dict:
4156
'KeyCount': 1
4257
}
4358

44-
4559
class GetObjectStub(StubBase):
4660
method = 'get_object'
4761

hydro_integrations/aws/sagemaker/traffic_shadowing/template.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,10 +57,10 @@ Resources:
5757
ServiceToken: !GetAtt CopyZipsFunction.Arn
5858
DestRegion: !Ref AWS::Region
5959
DestBucket: !Ref LambdaZipsBucket
60-
SourceBucket: hydrosphere-integrations-eu-west-3
60+
SourceBucket: hydrosphere-integrations
6161
Prefix: ""
6262
Objects:
63-
- lambda/traffic_shadowing/1a46812f177b62fc77b02818f04c0ff4
63+
- lambda/traffic_shadowing/d2ca18fd62456c06ceba108712f65373
6464
CopyZipsFunction:
6565
Type: AWS::Lambda::Function
6666
Properties:
@@ -128,7 +128,7 @@ Resources:
128128
production data for analysis.
129129
Code:
130130
S3Bucket: !Ref LambdaZipsBucket
131-
S3Key: lambda/traffic_shadowing/1a46812f177b62fc77b02818f04c0ff4
131+
S3Key: lambda/traffic_shadowing/d2ca18fd62456c06ceba108712f65373
132132
Timeout: 240
133133
MemorySize: 256
134134
Runtime: python3.7

0 commit comments

Comments
 (0)