Skip to content

Commit c91eb11

Browse files
authored
feat(bigquery): add support for listing jobs by parent job (#9225)
* Add parent_job_id, num_child_jobs to *Job classes * Add parent_job parameter to client.list_jobs() * Add system test for listing scripting jobs
1 parent 2bb22ae commit c91eb11

File tree

5 files changed

+117
-0
lines changed

5 files changed

+117
-0
lines changed

bigquery/google/cloud/bigquery/client.py

+10
Original file line numberDiff line numberDiff line change
@@ -1216,6 +1216,7 @@ def cancel_job(self, job_id, project=None, location=None, retry=DEFAULT_RETRY):
12161216
def list_jobs(
12171217
self,
12181218
project=None,
1219+
parent_job=None,
12191220
max_results=None,
12201221
page_token=None,
12211222
all_users=None,
@@ -1233,6 +1234,11 @@ def list_jobs(
12331234
project (str, optional):
12341235
Project ID to use for retreiving datasets. Defaults
12351236
to the client's project.
1237+
parent_job (Optional[Union[ \
1238+
:class:`~google.cloud.bigquery.job._AsyncJob`, \
1239+
str, \
1240+
]]):
1241+
If set, retrieve only child jobs of the specified parent.
12361242
max_results (int, optional):
12371243
Maximum number of jobs to return.
12381244
page_token (str, optional):
@@ -1265,6 +1271,9 @@ def list_jobs(
12651271
google.api_core.page_iterator.Iterator:
12661272
Iterable of job instances.
12671273
"""
1274+
if isinstance(parent_job, job._AsyncJob):
1275+
parent_job = parent_job.job_id
1276+
12681277
extra_params = {
12691278
"allUsers": all_users,
12701279
"stateFilter": state_filter,
@@ -1275,6 +1284,7 @@ def list_jobs(
12751284
google.cloud._helpers._millis_from_datetime(max_creation_time)
12761285
),
12771286
"projection": "full",
1287+
"parentJobId": parent_job,
12781288
}
12791289

12801290
extra_params = {

bigquery/google/cloud/bigquery/job.py

+25
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,31 @@ def job_id(self):
332332
"""str: ID of the job."""
333333
return _helpers._get_sub_prop(self._properties, ["jobReference", "jobId"])
334334

335+
@property
336+
def parent_job_id(self):
337+
"""Return the ID of the parent job.
338+
339+
See:
340+
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics.FIELDS.parent_job_id
341+
342+
Returns:
343+
Optional[str]
344+
"""
345+
return _helpers._get_sub_prop(self._properties, ["statistics", "parentJobId"])
346+
347+
@property
348+
def num_child_jobs(self):
349+
"""The number of child jobs executed.
350+
351+
See:
352+
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics.FIELDS.num_child_jobs
353+
354+
Returns:
355+
int
356+
"""
357+
count = _helpers._get_sub_prop(self._properties, ["statistics", "numChildJobs"])
358+
return int(count) if count is not None else 0
359+
335360
@property
336361
def project(self):
337362
"""Project bound to the job.

bigquery/tests/system.py

+48
Original file line numberDiff line numberDiff line change
@@ -431,6 +431,54 @@ def test_list_tables(self):
431431
)
432432
self.assertGreater(len(list(iterator)), 0)
433433

434+
def test_listing_scripting_jobs(self):
435+
# run an SQL script
436+
sql_script = """
437+
-- Declare a variable to hold names as an array.
438+
DECLARE top_names ARRAY<STRING>;
439+
440+
-- Build an array of the top 100 names from the year 2017.
441+
SET top_names = (
442+
SELECT ARRAY_AGG(name ORDER BY number DESC LIMIT 100)
443+
FROM `bigquery-public-data.usa_names.usa_1910_current`
444+
WHERE year = 2017
445+
);
446+
447+
-- Which names appear as words in Shakespeare's plays?
448+
SELECT
449+
name AS shakespeare_name
450+
FROM UNNEST(top_names) AS name
451+
WHERE name IN (
452+
SELECT word
453+
FROM `bigquery-public-data.samples.shakespeare`
454+
);
455+
"""
456+
test_start = datetime.datetime.utcnow()
457+
query_job = Config.CLIENT.query(sql_script, project=Config.CLIENT.project)
458+
query_job.result()
459+
460+
# fetch jobs created by the SQL script, sort them into parent and
461+
# child jobs
462+
script_jobs = list(Config.CLIENT.list_jobs(min_creation_time=test_start))
463+
464+
parent_jobs = []
465+
child_jobs = []
466+
467+
for job in script_jobs:
468+
if job.num_child_jobs > 0:
469+
parent_jobs.append(job)
470+
else:
471+
child_jobs.append(job)
472+
473+
assert len(parent_jobs) == 1 # also implying num_child_jobs > 0
474+
assert len(child_jobs) == parent_jobs[0].num_child_jobs
475+
476+
# fetch jobs using the parent job filter, verify that results are as expected
477+
fetched_jobs = list(Config.CLIENT.list_jobs(parent_job=parent_jobs[0]))
478+
assert sorted(job.job_id for job in fetched_jobs) == sorted(
479+
job.job_id for job in child_jobs
480+
)
481+
434482
def test_update_table(self):
435483
dataset = self.temp_dataset(_make_dataset_id("update_table"))
436484

bigquery/tests/unit/test_client.py

+18
Original file line numberDiff line numberDiff line change
@@ -2952,6 +2952,24 @@ def test_list_jobs_w_time_filter(self):
29522952
},
29532953
)
29542954

2955+
def test_list_jobs_w_parent_job_filter(self):
2956+
from google.cloud.bigquery import job
2957+
2958+
creds = _make_credentials()
2959+
client = self._make_one(self.PROJECT, creds)
2960+
conn = client._connection = make_connection({}, {})
2961+
2962+
parent_job_args = ["parent-job-123", job._AsyncJob("parent-job-123", client)]
2963+
2964+
for parent_job in parent_job_args:
2965+
list(client.list_jobs(parent_job=parent_job))
2966+
conn.api_request.assert_called_once_with(
2967+
method="GET",
2968+
path="/projects/%s/jobs" % self.PROJECT,
2969+
query_params={"projection": "full", "parentJobId": "parent-job-123"},
2970+
)
2971+
conn.api_request.reset_mock()
2972+
29552973
def test_load_table_from_uri(self):
29562974
from google.cloud.bigquery.job import LoadJob
29572975

bigquery/tests/unit/test_job.py

+16
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,22 @@ def test_job_type(self):
268268

269269
self.assertEqual(derived.job_type, "derived")
270270

271+
def test_parent_job_id(self):
272+
client = _make_client(project=self.PROJECT)
273+
job = self._make_one(self.JOB_ID, client)
274+
275+
self.assertIsNone(job.parent_job_id)
276+
job._properties["statistics"] = {"parentJobId": "parent-job-123"}
277+
self.assertEqual(job.parent_job_id, "parent-job-123")
278+
279+
def test_num_child_jobs(self):
280+
client = _make_client(project=self.PROJECT)
281+
job = self._make_one(self.JOB_ID, client)
282+
283+
self.assertEqual(job.num_child_jobs, 0)
284+
job._properties["statistics"] = {"numChildJobs": "17"}
285+
self.assertEqual(job.num_child_jobs, 17)
286+
271287
def test_labels_miss(self):
272288
client = _make_client(project=self.PROJECT)
273289
job = self._make_one(self.JOB_ID, client)

0 commit comments

Comments
 (0)