Skip to content

Commit edb40dc

Browse files
update secops anonymization pipeline
1 parent 01c1565 commit edb40dc

File tree

5 files changed

+38
-511
lines changed

5 files changed

+38
-511
lines changed

fast/project-templates/secops-anonymization-pipeline/README.md

+3
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ The following diagram illustrates the high-level design of the solution, which c
2222

2323
The use case is a SecOps deployment composed of 2 tenants (one for production and one for development/testing). There might be the need to export production data from the prod tenant and import them back in DEV (possibly anonymizing it) for rules and/or parser development, that is why this pipeline might be convenient for speeding up the data migration process.
2424

25+
The solution is based on a custom Python script responsible for implementing the aforementioned logic. The script leverages the new [SecOps API Wrapper](https://github.com/google/secops-wrapper) available also in [PyPi](https://pypi.org/project/secops/).
26+
2527
### Pipeline Steps
2628

2729
- **SecOps Export**: Triggered via the corresponding TRIGGER-EXPORT action. Call [SecOps Export API](https://cloud.google.com/chronicle/docs/reference/rest/v1alpha/projects.locations.instances.dataExports) to trigger raw logs export on a GCS bucket based on either all the log types or one o more of them for a specific time frame. By default, the export will be for the previous day, otherwise the following parameters can be specified to change the time frame:
@@ -92,6 +94,7 @@ terraform apply
9294
#### Step 5: Test solution
9395

9496
Test the solution triggering an export from the Cloud Scheduler page, after few hours (accoding to the size of the export) logs should be available on secops-export bucket. Please check for any issue during export using the corresponding APIs and the export ID.
97+
9598
<!-- BEGIN TFDOC -->
9699
## Variables
97100

fast/project-templates/secops-anonymization-pipeline/source/main.py

+30-72
Original file line numberDiff line numberDiff line change
@@ -17,16 +17,13 @@
1717
import os
1818
import click
1919
import logging
20-
import sys
2120
import google.cloud.logging
22-
from google.auth.transport.requests import AuthorizedSession
23-
from google.oauth2 import service_account
2421
from jinja2 import Template
2522
from shared import utils
2623
from google.cloud import dlp_v2
2724
from google.cloud import storage
28-
from datetime import date, timedelta
29-
from shared import secops
25+
from datetime import date, timedelta, datetime
26+
from secops import SecOpsClient
3027

3128
client = google.cloud.logging.Client()
3229
client.setup_logging()
@@ -37,11 +34,6 @@
3734
format='[%(levelname)-8s] - %(asctime)s - %(message)s')
3835
logging.root.setLevel(logging.DEBUG)
3936

40-
SCOPES = [
41-
"https://www.googleapis.com/auth/chronicle-backstory",
42-
"https://www.googleapis.com/auth/malachite-ingestion"
43-
]
44-
4537
SECOPS_REGION = os.environ.get("SECOPS_REGION")
4638
GCP_PROJECT_ID = os.environ.get("GCP_PROJECT")
4739
SECOPS_EXPORT_BUCKET = os.environ.get("SECOPS_EXPORT_BUCKET")
@@ -51,25 +43,15 @@
5143
SECOPS_SOURCE_CUSTOMER_ID = os.environ.get("SECOPS_SOURCE_CUSTOMER_ID")
5244
SECOPS_TARGET_CUSTOMER_ID = os.environ.get("SECOPS_TARGET_CUSTOMER_ID")
5345
SECOPS_TARGET_FORWARDER_ID = os.environ.get("SECOPS_TARGET_FORWARDER_ID")
54-
55-
SKIP_ANONYMIZATION = False if (os.environ.get(
56-
"SKIP_ANONYMIZATION", "false").lower() == "false") else True
46+
SKIP_ANONYMIZATION = False if (os.environ.get("SKIP_ANONYMIZATION", "false").lower() == "false") else True
5747
DLP_DEIDENTIFY_TEMPLATE_ID = os.environ.get("DLP_DEIDENTIFY_TEMPLATE_ID")
5848
DLP_INSPECT_TEMPLATE_ID = os.environ.get("DLP_INSPECT_TEMPLATE_ID")
5949
DLP_REGION = os.environ.get("DLP_REGION")
6050

6151

6252
def import_logs(export_date):
63-
# Initialize with default credentials - will automatically use the service account
64-
# assigned to your Google Cloud resource
65-
client = secops.SecOpsClient()
66-
67-
# Initialize Chronicle client
68-
chronicle = client.chronicle(
69-
customer_id=SECOPS_TARGET_CUSTOMER_ID, # Your Chronicle instance ID
70-
project_id=SECOPS_TARGET_PROJECT, # Your GCP project ID
71-
region=SECOPS_REGION # Chronicle API region
72-
)
53+
client = SecOpsClient()
54+
chronicle = client.chronicle(customer_id=SECOPS_TARGET_CUSTOMER_ID, project_id=SECOPS_TARGET_PROJECT, region=SECOPS_REGION)
7355

7456
storage_client = storage.Client()
7557
BUCKET = SECOPS_OUTPUT_BUCKET if not SKIP_ANONYMIZATION else SECOPS_EXPORT_BUCKET
@@ -87,13 +69,13 @@ def import_logs(export_date):
8769
for line in f:
8870
logs.append(line.rstrip('\n'))
8971
if len(logs) == 1000:
90-
response = chronicle.ingest_logs(logs=logs, log_type=log_type, forwarder_id=SECOPS_TARGET_FORWARDER_ID)
72+
response = chronicle.ingest_log(log_message=logs, log_type=log_type, forwarder_id=SECOPS_TARGET_FORWARDER_ID)
9173
LOGGER.debug(response)
9274
logs = []
9375

9476
# Send any remaining entries
9577
if len(logs) > 0:
96-
response = chronicle.ingest_logs(logs=logs, log_type=log_type, forwarder_id=SECOPS_TARGET_FORWARDER_ID)
78+
response = chronicle.ingest_log(log_message=logs, log_type=log_type, forwarder_id=SECOPS_TARGET_FORWARDER_ID)
9779
LOGGER.debug(response)
9880

9981
# delete both export and anonymized buckets after ingesting logs
@@ -119,35 +101,27 @@ def trigger_export(export_date: str, export_start_datetime: str,
119101
:return:
120102
"""
121103

104+
client = SecOpsClient()
105+
chronicle = client.chronicle(customer_id=SECOPS_SOURCE_CUSTOMER_ID, project_id=SECOPS_SOURCE_PROJECT, region=SECOPS_REGION)
122106

123-
# Initialize with default credentials - will automatically use the service account
124-
# assigned to your Google Cloud resource
125-
client = secops.SecOpsClient()
107+
export_ids = []
126108

127-
# Initialize Chronicle client
128-
chronicle = client.chronicle(
129-
customer_id=SECOPS_SOURCE_CUSTOMER_ID, # Your Chronicle instance ID
130-
project_id=SECOPS_SOURCE_PROJECT, # Your GCP project ID
131-
region=SECOPS_REGION # Chronicle API region
132-
)
109+
if export_start_datetime and export_end_datetime:
110+
start_time, end_time = datetime.strptime(export_start_datetime, "%Y-%m-%dT%H:%M:%SZ"), datetime.strptime(export_end_datetime, "%Y-%m-%dT%H:%M:%SZ")
111+
else:
112+
start_time, end_time = utils.format_date_time_range(date_input=export_date)
113+
gcs_bucket = f"projects/{GCP_PROJECT_ID}/buckets/{SECOPS_EXPORT_BUCKET}"
133114

134-
export_ids = []
135115
try:
136-
if log_types is None:
137-
export_response = chronicle.create_data_export(
138-
project=GCP_PROJECT_ID, export_date=export_date,
139-
export_start_datetime=export_start_datetime,
140-
export_end_datetime=export_end_datetime)
116+
if log_types is None or log_types == "":
117+
export_response = chronicle.create_data_export(start_time=start_time, end_time=end_time, gcs_bucket=gcs_bucket, export_all_logs=True)
141118
LOGGER.info(export_response)
142119
export_id = export_response["dataExportStatus"]["name"].split("/")[-1]
143120
export_ids.append(export_id)
144121
LOGGER.info(f"Triggered export with ID: {export_id}")
145122
else:
146123
for log_type in log_types.split(","):
147-
export_response = chronicle.create_data_export(
148-
project=GCP_PROJECT_ID, export_date=export_date,
149-
export_start_datetime=export_start_datetime,
150-
export_end_datetime=export_end_datetime, log_type=log_type)
124+
export_response = chronicle.create_data_export(start_time=start_time, end_time=end_time, gcs_bucket=gcs_bucket, log_type=log_type)
151125
export_id = export_response["dataExportStatus"]["name"].split("/")[-1]
152126
export_ids.append(export_id)
153127
LOGGER.info(f"Triggered export with ID: {export_id}")
@@ -164,22 +138,14 @@ def anonymize_data(export_date):
164138
:param export_date: date for which data should be anonymized
165139
:return:
166140
"""
167-
# Initialize with default credentials - will automatically use the service account
168-
# assigned to your Google Cloud resource
169-
client = secops.SecOpsClient()
170-
171-
# Initialize Chronicle client
172-
chronicle = client.chronicle(
173-
customer_id=SECOPS_SOURCE_CUSTOMER_ID, # Your Chronicle instance ID
174-
project_id=SECOPS_SOURCE_PROJECT, # Your GCP project ID
175-
region=SECOPS_REGION # Chronicle API region
176-
)
177-
export_ids = utils.get_secops_export_folders_for_date(SECOPS_EXPORT_BUCKET,
178-
export_date=export_date)
141+
142+
client = SecOpsClient()
143+
chronicle = client.chronicle(customer_id=SECOPS_SOURCE_CUSTOMER_ID, project_id=SECOPS_SOURCE_PROJECT, region=SECOPS_REGION)
144+
export_ids = utils.get_secops_export_folders_for_date(SECOPS_EXPORT_BUCKET, export_date=export_date)
179145

180146
export_finished = True
181147
for export_id in export_ids:
182-
export = chronicle.get_data_export(name=export_id)
148+
export = chronicle.get_data_export(data_export_id=export_id)
183149
LOGGER.info(f"Export response: {export}.")
184150
if "dataExportStatus"in export and export["dataExportStatus"]["stage"] == "FINISHED_SUCCESS":
185151
export_state = export["dataExportStatus"]["stage"]
@@ -261,21 +227,13 @@ def main(request):
261227

262228

263229
@click.command()
264-
@click.option('--export-date', '-d', required=False, type=str,
265-
help='Date for secops export and anonymization.')
266-
@click.option('--export-start-datetime', '-d', required=False, type=str,
267-
help='Start datetime for secops export and anonymization.')
268-
@click.option('--export-end-datetime', '-d', required=False, type=str,
269-
help='End datetime for secops export and anonymization.')
230+
@click.option('--export-date', '-d', required=False, type=str, help='Date for secops export and anonymization.')
231+
@click.option('--export-start-datetime', '-d', required=False, type=str, help='Start datetime for secops export and anonymization.')
232+
@click.option('--export-end-datetime', '-d', required=False, type=str, help='End datetime for secops export and anonymization.')
270233
@click.option('--log-type', type=str, multiple=True)
271-
@click.option(
272-
'--action',
273-
type=click.Choice(['TRIGGER-EXPORT', 'ANONYMIZE-DATA',
274-
'IMPORT-DATA']), required=True)
275-
@click.option('--debug', is_flag=True, default=False,
276-
help='Turn on debug logging.')
277-
def main_cli(export_date, export_start_datetime, export_end_datetime,
278-
log_type: list, action: str, debug=False):
234+
@click.option('--action', type=click.Choice(['TRIGGER-EXPORT', 'ANONYMIZE-DATA', 'IMPORT-DATA']), required=True)
235+
@click.option('--debug', is_flag=True, default=False, help='Turn on debug logging.')
236+
def main_cli(export_date, export_start_datetime, export_end_datetime, log_type: list, action: str, debug=False):
279237
"""
280238
CLI entry point.
281239
:param date: date for secops export and anonymization
@@ -288,7 +246,7 @@ def main_cli(export_date, export_start_datetime, export_end_datetime,
288246
trigger_export(export_date=export_date,
289247
export_start_datetime=export_start_datetime,
290248
export_end_datetime=export_end_datetime,
291-
log_types=log_type)
249+
log_types=','.join(log_type))
292250
case "ANONYMIZE-DATA":
293251
anonymize_data(export_date=export_date)
294252
case "IMPORT-DATA":

fast/project-templates/secops-anonymization-pipeline/source/requirements.txt

+1
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,4 @@ google-cloud-storage
2323
click==8.1.3
2424
google-cloud-dlp
2525
google-cloud-logging
26+
secops

0 commit comments

Comments
 (0)