|
1 | 1 | package ai.chronon.integrations.cloud_gcp
|
2 | 2 |
|
3 |
| -import ai.chronon.api.ScalaJavaConversions.ListOps |
4 | 3 | import ai.chronon.spark.TableUtils
|
5 | 4 | import ai.chronon.spark.format.Format
|
6 | 5 | import ai.chronon.spark.format.FormatProvider
|
7 | 6 | import ai.chronon.spark.format.Hive
|
8 |
| -import com.google.cloud.bigquery._ |
| 7 | +import com.google.cloud.bigquery.BigQuery |
| 8 | +import com.google.cloud.bigquery.BigQueryOptions |
| 9 | +import com.google.cloud.bigquery.ExternalTableDefinition |
| 10 | +import com.google.cloud.bigquery.FormatOptions |
| 11 | +import com.google.cloud.bigquery.StandardTableDefinition |
| 12 | +import com.google.cloud.bigquery.Table |
| 13 | +import com.google.cloud.bigquery.TableDefinition |
9 | 14 | import com.google.cloud.bigquery.connector.common.BigQueryUtil
|
10 | 15 | import com.google.cloud.spark.bigquery.repackaged.com.google.cloud.bigquery.TableId
|
11 | 16 | import org.apache.spark.sql.SparkSession
|
@@ -51,19 +56,21 @@ case class GcpFormatProvider(sparkSession: SparkSession) extends FormatProvider
|
51 | 56 | BigQueryFormat(tableId.getProject, sparkOptions)
|
52 | 57 | }
|
53 | 58 |
|
54 |
| - private def getFormat(table: Table): Format = |
| 59 | + private[cloud_gcp] def getFormat(table: Table): Format = |
55 | 60 | table.getDefinition.asInstanceOf[TableDefinition] match {
|
56 | 61 |
|
57 | 62 | case definition: ExternalTableDefinition =>
|
58 |
| - val uris = definition.getSourceUris.toScala |
59 |
| - .map(uri => uri.stripSuffix("/*") + "/") |
60 |
| - |
61 |
| - assert(uris.length == 1, s"External table ${table.getFriendlyName} can be backed by only one URI.") |
62 |
| - |
63 | 63 | val formatOptions = definition.getFormatOptions
|
64 | 64 | .asInstanceOf[FormatOptions]
|
65 |
| - |
66 |
| - GCS(table.getTableId.getProject, uris.head, formatOptions.getType) |
| 65 | + val externalTable = table.getDefinition.asInstanceOf[ExternalTableDefinition] |
| 66 | + val uri = Option(externalTable.getHivePartitioningOptions) |
| 67 | + .map(_.getSourceUriPrefix) |
| 68 | + .getOrElse { |
| 69 | + val uris = externalTable.getSourceUris |
| 70 | + require(uris.size == 1, s"External table ${table} can be backed by only one URI.") |
| 71 | + uris.get(0).replaceAll("/\\*\\.parquet$", "") |
| 72 | + } |
| 73 | + GCS(table.getTableId.getProject, uri, formatOptions.getType) |
67 | 74 |
|
68 | 75 | case _: StandardTableDefinition =>
|
69 | 76 | BigQueryFormat(table.getTableId.getProject, Map.empty)
|
|
0 commit comments