Skip to content

Commit 39e88ef

Browse files
authored
fix(ingest/bigquery): support google-cloud-bigquery 3.15.0 (#9595)
1 parent b0060ce commit 39e88ef

File tree

4 files changed

+14
-6
lines changed

4 files changed

+14
-6
lines changed

metadata-ingestion/src/datahub/emitter/sql_parsing_builder.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ class SqlParsingBuilder:
9292
def __post_init__(self) -> None:
9393
if self.usage_config:
9494
self._usage_aggregator = UsageAggregator(self.usage_config)
95-
else:
95+
elif self.generate_usage_statistics:
9696
logger.info("No usage config provided, not generating usage statistics")
9797
self.generate_usage_statistics = False
9898

metadata-ingestion/src/datahub/ingestion/run/pipeline.py

+4
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ class CliReport(Report):
137137

138138
disk_info: Optional[dict] = None
139139
peak_disk_usage: Optional[str] = None
140+
_initial_disk_usage: int = -1
140141
_peak_disk_usage: int = 0
141142

142143
thread_count: Optional[int] = None
@@ -156,12 +157,15 @@ def compute_stats(self) -> None:
156157

157158
try:
158159
disk_usage = shutil.disk_usage("/")
160+
if self._initial_disk_usage < 0:
161+
self._initial_disk_usage = disk_usage.used
159162
if self._peak_disk_usage < disk_usage.used:
160163
self._peak_disk_usage = disk_usage.used
161164
self.peak_disk_usage = humanfriendly.format_size(self._peak_disk_usage)
162165
self.disk_info = {
163166
"total": humanfriendly.format_size(disk_usage.total),
164167
"used": humanfriendly.format_size(disk_usage.used),
168+
"used_initally": humanfriendly.format_size(self._initial_disk_usage),
165169
"free": humanfriendly.format_size(disk_usage.free),
166170
}
167171
except Exception as e:

metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -1283,9 +1283,13 @@ def create_bigquery_temp_table(
12831283
# temporary table dance. However, that would require either a) upgrading to
12841284
# use GE's batch v3 API or b) bypassing GE altogether.
12851285

1286-
query_job: Optional[
1287-
"google.cloud.bigquery.job.query.QueryJob"
1288-
] = cursor._query_job
1286+
query_job: Optional["google.cloud.bigquery.job.query.QueryJob"] = (
1287+
# In google-cloud-bigquery 3.15.0, the _query_job attribute was
1288+
# made public and renamed to query_job.
1289+
cursor.query_job
1290+
if hasattr(cursor, "query_job")
1291+
else cursor._query_job # type: ignore[attr-defined]
1292+
)
12891293
assert query_job
12901294
temp_destination_table = query_job.destination
12911295
bigquery_temp_table = f"{temp_destination_table.project}.{temp_destination_table.dataset_id}.{temp_destination_table.table_id}"

metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -353,13 +353,13 @@ def _populate_external_lineage_map(self, discovered_tables: List[str]) -> None:
353353

354354
self._populate_external_lineage_from_copy_history(discovered_tables)
355355
logger.info(
356-
"Done populating external lineage from copy history."
356+
"Done populating external lineage from copy history. "
357357
f"Found {self.report.num_external_table_edges_scanned} external lineage edges so far."
358358
)
359359

360360
self._populate_external_lineage_from_show_query(discovered_tables)
361361
logger.info(
362-
"Done populating external lineage from show external tables."
362+
"Done populating external lineage from show external tables. "
363363
f"Found {self.report.num_external_table_edges_scanned} external lineage edges so far."
364364
)
365365

0 commit comments

Comments
 (0)