|
188 | 188 | supportsDBT: false
|
189 | 189 | supported_destination_sync_modes:
|
190 | 190 | - "append"
|
191 |
| -- dockerImage: "airbyte/destination-bigquery:0.6.12" |
| 191 | +- dockerImage: "airbyte/destination-bigquery:1.0.0" |
192 | 192 | spec:
|
193 | 193 | documentationUrl: "https://docs.airbyte.io/integrations/destinations/bigquery"
|
194 | 194 | connectionSpecification:
|
|
201 | 201 | additionalProperties: true
|
202 | 202 | properties:
|
203 | 203 | big_query_client_buffer_size_mb:
|
204 |
| - title: "Google BigQuery client chunk size" |
205 |
| - description: "Google BigQuery client's chunk(buffer) size (MIN=1, MAX =\ |
| 204 | + title: "Google BigQuery Client Chunk Size (Optional)" |
| 205 | + description: "Google BigQuery client's chunk (buffer) size (MIN=1, MAX =\ |
206 | 206 | \ 15) for each table. The size that will be written by a single RPC. Written\
|
207 | 207 | \ data will be buffered and only flushed upon reaching this size or closing\
|
208 |
| - \ the channel. The default 15MiB value is used if not set explicitly.\ |
209 |
| - \ It's recommended to decrease value for big data sets migration for less\ |
210 |
| - \ HEAP memory consumption and avoiding crashes. For more details refer\ |
211 |
| - \ to https://googleapis.dev/python/bigquery/latest/generated/google.cloud.bigquery.client.Client.html" |
| 208 | + \ the channel. The default 15MB value is used if not set explicitly. Read\ |
| 209 | + \ more <a href=\"https://googleapis.dev/python/bigquery/latest/generated/google.cloud.bigquery.client.Client.html\"\ |
| 210 | + >here</a>." |
212 | 211 | type: "integer"
|
213 | 212 | minimum: 1
|
214 | 213 | maximum: 15
|
|
218 | 217 | project_id:
|
219 | 218 | type: "string"
|
220 | 219 | description: "The GCP project ID for the project containing the target BigQuery\
|
221 |
| - \ dataset." |
| 220 | + \ dataset. Read more <a href=\"https://cloud.google.com/iam/docs/creating-managing-service-accounts#creating\"\ |
| 221 | + >here</a>." |
222 | 222 | title: "Project ID"
|
223 | 223 | dataset_id:
|
224 | 224 | type: "string"
|
225 |
| - description: "Default BigQuery Dataset ID tables are replicated to if the\ |
226 |
| - \ source does not specify a namespace." |
| 225 | + description: "The default BigQuery Dataset ID that tables are replicated\ |
| 226 | + \ to if the source does not specify a namespace. Read more <a href=\"\ |
| 227 | + https://cloud.google.com/bigquery/docs/datasets#create-dataset\">here</a>." |
227 | 228 | title: "Default Dataset ID"
|
228 | 229 | dataset_location:
|
229 | 230 | type: "string"
|
230 | 231 | description: "The location of the dataset. Warning: Changes made after creation\
|
231 |
| - \ will not be applied." |
232 |
| - title: "Dataset Location" |
| 232 | + \ will not be applied. The default \"US\" value is used if not set explicitly.\ |
| 233 | + \ Read more <a href=\"https://cloud.google.com/bigquery/docs/locations\"\ |
| 234 | + >here</a>." |
| 235 | + title: "Dataset Location (Optional)" |
233 | 236 | default: "US"
|
234 | 237 | enum:
|
235 | 238 | - "US"
|
|
266 | 269 | credentials_json:
|
267 | 270 | type: "string"
|
268 | 271 | description: "The contents of the JSON service account key. Check out the\
|
269 |
| - \ <a href=\"https://docs.airbyte.io/integrations/destinations/bigquery\"\ |
| 272 | + \ <a href=\"https://docs.airbyte.com/integrations/destinations/bigquery#service-account-key\"\ |
270 | 273 | >docs</a> if you need help generating this key. Default credentials will\
|
271 | 274 | \ be used if this field is left empty."
|
272 |
| - title: "Credentials JSON" |
| 275 | + title: "Credentials JSON (Optional)" |
273 | 276 | airbyte_secret: true
|
274 | 277 | transformation_priority:
|
275 | 278 | type: "string"
|
276 | 279 | description: "Interactive run type means that the query is executed as soon\
|
277 | 280 | \ as possible, and these queries count towards concurrent rate limit and\
|
278 |
| - \ daily limit. Batch queries are queued and started as soon as idle resources\ |
| 281 | + \ daily limit. Read more about interactive run type <a href=\"https://cloud.google.com/bigquery/docs/running-queries#queries\"\ |
| 282 | + >here</a>. Batch queries are queued and started as soon as idle resources\ |
279 | 283 | \ are available in the BigQuery shared resource pool, which usually occurs\
|
280 | 284 | \ within a few minutes. Batch queries don’t count towards your concurrent\
|
281 |
| - \ rate limit." |
282 |
| - title: "Transformation Query Run Type" |
| 285 | + \ rate limit. Read more about batch queries <a href=\"https://cloud.google.com/bigquery/docs/running-queries#batch\"\ |
| 286 | + >here</a>. The default \"interactive\" value is used if not set explicitly." |
| 287 | + title: "Transformation Query Run Type (Optional)" |
283 | 288 | default: "interactive"
|
284 | 289 | enum:
|
285 | 290 | - "interactive"
|
|
288 | 293 | type: "object"
|
289 | 294 | title: "Loading Method"
|
290 | 295 | description: "Loading method used to send select the way data will be uploaded\
|
291 |
| - \ to BigQuery." |
| 296 | + \ to BigQuery. <br><b>Standard Inserts</b> - Direct uploading using SQL\ |
| 297 | + \ INSERT statements. This method is extremely inefficient and provided\ |
| 298 | + \ only for quick testing. In almost all cases, you should use staging.\ |
| 299 | + \ <br><b>GCS Staging</b> - Writes large batches of records to a file,\ |
| 300 | + \ uploads the file to GCS, then uses <b>COPY INTO table</b> to upload\ |
| 301 | + \ the file. Recommended for most workloads for better speed and scalability.\ |
| 302 | + \ Read more about GCS Staging <a href=\"https://docs.airbyte.com/integrations/destinations/bigquery#gcs-staging\"\ |
| 303 | + >here</a>." |
292 | 304 | oneOf:
|
293 | 305 | - title: "Standard Inserts"
|
294 | 306 | additionalProperties: false
|
295 |
| - description: "Direct uploading using streams." |
296 | 307 | required:
|
297 | 308 | - "method"
|
298 | 309 | properties:
|
|
301 | 312 | const: "Standard"
|
302 | 313 | - title: "GCS Staging"
|
303 | 314 | additionalProperties: false
|
304 |
| - description: "Writes large batches of records to a file, uploads the file\ |
305 |
| - \ to GCS, then uses <pre>COPY INTO table</pre> to upload the file. Recommended\ |
306 |
| - \ for large production workloads for better speed and scalability." |
307 | 315 | required:
|
308 | 316 | - "method"
|
309 | 317 | - "gcs_bucket_name"
|
|
316 | 324 | gcs_bucket_name:
|
317 | 325 | title: "GCS Bucket Name"
|
318 | 326 | type: "string"
|
319 |
| - description: "The name of the GCS bucket." |
| 327 | + description: "The name of the GCS bucket. Read more <a href=\"https://cloud.google.com/storage/docs/naming-buckets\"\ |
| 328 | + >here</a>." |
320 | 329 | examples:
|
321 | 330 | - "airbyte_sync"
|
322 | 331 | gcs_bucket_path:
|
| 332 | + title: "GCS Bucket Path" |
323 | 333 | description: "Directory under the GCS bucket where data will be written."
|
324 | 334 | type: "string"
|
325 | 335 | examples:
|
326 | 336 | - "data_sync/test"
|
327 | 337 | part_size_mb:
|
328 |
| - title: "Block Size (MB) for GCS multipart upload" |
| 338 | + title: "Block Size (MB) for GCS Multipart Upload (Optional)" |
329 | 339 | description: "This is the size of a \"Part\" being buffered in memory.\
|
330 | 340 | \ It limits the memory usage when writing. Larger values will allow\
|
331 | 341 | \ to upload a bigger files and improve the speed, but consumes more\
|
|
340 | 350 | type: "string"
|
341 | 351 | description: "This upload method is supposed to temporary store records\
|
342 | 352 | \ in GCS bucket. What do you want to do with data in GCS bucket\
|
343 |
| - \ when migration has finished?" |
344 |
| - title: "GCS tmp files afterward processing" |
| 353 | + \ when migration has finished? The default \"Delete all tmp files\ |
| 354 | + \ from GCS\" value is used if not set explicitly." |
| 355 | + title: "GCS Tmp Files Afterward Processing (Optional)" |
345 | 356 | default: "Delete all tmp files from GCS"
|
346 | 357 | enum:
|
347 | 358 | - "Delete all tmp files from GCS"
|
348 | 359 | - "Keep all tmp files in GCS"
|
349 | 360 | credential:
|
350 | 361 | title: "Credential"
|
| 362 | + description: "An HMAC key is a type of credential and can be associated\ |
| 363 | + \ with a service account or a user account in Cloud Storage. Read\ |
| 364 | + \ more <a href=\"https://cloud.google.com/storage/docs/authentication/hmackeys\"\ |
| 365 | + >here</a>." |
351 | 366 | type: "object"
|
352 | 367 | oneOf:
|
353 | 368 | - title: "HMAC key"
|
|
0 commit comments