1
1
package ai .chronon .integrations .cloud_gcp
2
2
3
- import com .google .cloud .bigquery .{
3
+ import com .google .cloud .spark . bigquery . repackaged . com . google . cloud . bigquery .{
4
4
BigQuery ,
5
5
BigQueryOptions ,
6
6
ExternalTableDefinition ,
7
7
StandardTableDefinition ,
8
8
TableDefinition ,
9
- TableId
9
+ TableId ,
10
10
}
11
11
import com .google .cloud .spark .bigquery .BigQueryCatalog
12
12
import org .apache .iceberg .gcp .bigquery .BigQueryMetastoreCatalog
@@ -19,6 +19,7 @@ import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
19
19
import org .apache .spark .sql .execution .datasources .v2 .parquet .ParquetTable
20
20
import org .apache .spark .sql .types .StructType
21
21
import org .apache .spark .sql .util .CaseInsensitiveStringMap
22
+ import com .google .cloud .spark .bigquery .{SchemaConverters , SchemaConvertersConfiguration }
22
23
23
24
import java .util
24
25
import scala .jdk .CollectionConverters ._
@@ -122,22 +123,22 @@ class DelegatingBigQueryMetastoreCatalog extends TableCatalog with SupportsNames
122
123
fileBasedTable
123
124
}
124
125
case stTable : StandardTableDefinition => {
126
+ import com .google .cloud .spark .bigquery .repackaged .com .google .inject .Injector
127
+ import com .google .cloud .spark .bigquery .v2 .Spark35BigQueryTable
125
128
// todo(tchow): Support partitioning
126
129
127
130
// Hack because there's a bug in the BigQueryCatalog where they ignore the projectId.
128
131
// See: https://github.com/GoogleCloudDataproc/spark-bigquery-connector/pull/1340
129
132
// ideally it should be the below:
130
133
// val connectorTable = connectorCatalog.loadTable(ident)
134
+ // So instead, we read the bigqueryTable ourselves to get access to the schema and pass it through.
131
135
val nativeTable = connectorCatalog.loadTable(Identifier .of(Array (tId.getDataset), tId.getTable))
132
- logger.info(s " Table: ${nativeTable}" )
133
- logger.info(s " Table name: ${nativeTable.name()}" )
134
- logger.info(s " Table properties: ${nativeTable.properties()}" )
135
- logger.info(s " Table partitioning: ${nativeTable.partitioning()}" )
136
- logger.info(" Table schema:" )
137
- logger.info(f " ${nativeTable.schema()}" )
138
- logger.info(" Table columns:" )
139
- logger.info(f " ${nativeTable.columns()}" )
140
- nativeTable
136
+ val injector = nativeTable.getClass.getDeclaredField(" injector" )
137
+ injector.setAccessible(true )
138
+ val value = injector.get(nativeTable).asInstanceOf [Injector ]
139
+ val sc = SchemaConverters .from(SchemaConvertersConfiguration .createDefault())
140
+ val sparkSchema = sc.toSpark(stTable.getSchema)
141
+ new Spark35BigQueryTable (value, () => sparkSchema)
141
142
}
142
143
case _ => throw new IllegalStateException (s " Cannot support table of type: ${table.getDefinition}" )
143
144
}
0 commit comments