@@ -87,27 +87,199 @@ def delete(self):
87
87
self ._wrapped .close ()
88
88
89
89
90
- def dataset_exists ( client , dataset_reference ):
91
- """Return if a dataset exists.
90
+ def test_create_client_default_credentials ( ):
91
+ """Create a BigQuery client with Application Default Credentials"""
92
92
93
- Args:
94
- client (google.cloud.bigquery.client.Client):
95
- A client to connect to the BigQuery API.
96
- dataset_reference (google.cloud.bigquery.dataset.DatasetReference):
97
- A reference to the dataset to look for.
93
+ # [START bigquery_client_default_credentials]
94
+ from google .cloud import bigquery
98
95
99
- Returns:
100
- bool: ``True`` if the dataset exists, ``False`` otherwise.
96
+ # If you don't specify credentials when constructing the client, the
97
+ # client library will look for credentials in the environment.
98
+ client = bigquery .Client ()
99
+ # [END bigquery_client_default_credentials]
100
+
101
+ assert client is not None
102
+
103
+
104
+ def test_create_table_nested_repeated_schema (client , to_delete ):
105
+ dataset_id = "create_table_nested_repeated_{}" .format (_millis ())
106
+ dataset_ref = client .dataset (dataset_id )
107
+ dataset = bigquery .Dataset (dataset_ref )
108
+ client .create_dataset (dataset )
109
+ to_delete .append (dataset )
110
+
111
+ # [START bigquery_nested_repeated_schema]
112
+ # from google.cloud import bigquery
113
+ # client = bigquery.Client()
114
+ # dataset_ref = client.dataset('my_dataset')
115
+
116
+ schema = [
117
+ bigquery .SchemaField ("id" , "STRING" , mode = "NULLABLE" ),
118
+ bigquery .SchemaField ("first_name" , "STRING" , mode = "NULLABLE" ),
119
+ bigquery .SchemaField ("last_name" , "STRING" , mode = "NULLABLE" ),
120
+ bigquery .SchemaField ("dob" , "DATE" , mode = "NULLABLE" ),
121
+ bigquery .SchemaField (
122
+ "addresses" ,
123
+ "RECORD" ,
124
+ mode = "REPEATED" ,
125
+ fields = [
126
+ bigquery .SchemaField ("status" , "STRING" , mode = "NULLABLE" ),
127
+ bigquery .SchemaField ("address" , "STRING" , mode = "NULLABLE" ),
128
+ bigquery .SchemaField ("city" , "STRING" , mode = "NULLABLE" ),
129
+ bigquery .SchemaField ("state" , "STRING" , mode = "NULLABLE" ),
130
+ bigquery .SchemaField ("zip" , "STRING" , mode = "NULLABLE" ),
131
+ bigquery .SchemaField ("numberOfYears" , "STRING" , mode = "NULLABLE" ),
132
+ ],
133
+ ),
134
+ ]
135
+ table_ref = dataset_ref .table ("my_table" )
136
+ table = bigquery .Table (table_ref , schema = schema )
137
+ table = client .create_table (table ) # API request
138
+
139
+ print ("Created table {}" .format (table .full_table_id ))
140
+ # [END bigquery_nested_repeated_schema]
141
+
142
+
143
+ def test_create_table_cmek (client , to_delete ):
144
+ dataset_id = "create_table_cmek_{}" .format (_millis ())
145
+ dataset = bigquery .Dataset (client .dataset (dataset_id ))
146
+ client .create_dataset (dataset )
147
+ to_delete .append (dataset )
148
+
149
+ # [START bigquery_create_table_cmek]
150
+ # from google.cloud import bigquery
151
+ # client = bigquery.Client()
152
+ # dataset_id = 'my_dataset'
153
+
154
+ table_ref = client .dataset (dataset_id ).table ("my_table" )
155
+ table = bigquery .Table (table_ref )
156
+
157
+ # Set the encryption key to use for the table.
158
+ # TODO: Replace this key with a key you have created in Cloud KMS.
159
+ kms_key_name = "projects/{}/locations/{}/keyRings/{}/cryptoKeys/{}" .format (
160
+ "cloud-samples-tests" , "us-central1" , "test" , "test"
161
+ )
162
+ table .encryption_configuration = bigquery .EncryptionConfiguration (
163
+ kms_key_name = kms_key_name
164
+ )
165
+
166
+ table = client .create_table (table ) # API request
167
+
168
+ assert table .encryption_configuration .kms_key_name == kms_key_name
169
+ # [END bigquery_create_table_cmek]
170
+
171
+
172
+ def test_create_partitioned_table (client , to_delete ):
173
+ dataset_id = "create_table_partitioned_{}" .format (_millis ())
174
+ dataset_ref = bigquery .Dataset (client .dataset (dataset_id ))
175
+ dataset = client .create_dataset (dataset_ref )
176
+ to_delete .append (dataset )
177
+
178
+ # [START bigquery_create_table_partitioned]
179
+ # from google.cloud import bigquery
180
+ # client = bigquery.Client()
181
+ # dataset_ref = client.dataset('my_dataset')
182
+
183
+ table_ref = dataset_ref .table ("my_partitioned_table" )
184
+ schema = [
185
+ bigquery .SchemaField ("name" , "STRING" ),
186
+ bigquery .SchemaField ("post_abbr" , "STRING" ),
187
+ bigquery .SchemaField ("date" , "DATE" ),
188
+ ]
189
+ table = bigquery .Table (table_ref , schema = schema )
190
+ table .time_partitioning = bigquery .TimePartitioning (
191
+ type_ = bigquery .TimePartitioningType .DAY ,
192
+ field = "date" , # name of column to use for partitioning
193
+ expiration_ms = 7776000000 ,
194
+ ) # 90 days
195
+
196
+ table = client .create_table (table )
197
+
198
+ print (
199
+ "Created table {}, partitioned on column {}" .format (
200
+ table .table_id , table .time_partitioning .field
201
+ )
202
+ )
203
+ # [END bigquery_create_table_partitioned]
204
+
205
+ assert table .time_partitioning .type_ == "DAY"
206
+ assert table .time_partitioning .field == "date"
207
+ assert table .time_partitioning .expiration_ms == 7776000000
208
+
209
+
210
+ def test_load_and_query_partitioned_table (client , to_delete ):
211
+ dataset_id = "load_partitioned_table_dataset_{}" .format (_millis ())
212
+ dataset = bigquery .Dataset (client .dataset (dataset_id ))
213
+ client .create_dataset (dataset )
214
+ to_delete .append (dataset )
215
+
216
+ # [START bigquery_load_table_partitioned]
217
+ # from google.cloud import bigquery
218
+ # client = bigquery.Client()
219
+ # dataset_id = 'my_dataset'
220
+ table_id = "us_states_by_date"
221
+
222
+ dataset_ref = client .dataset (dataset_id )
223
+ job_config = bigquery .LoadJobConfig ()
224
+ job_config .schema = [
225
+ bigquery .SchemaField ("name" , "STRING" ),
226
+ bigquery .SchemaField ("post_abbr" , "STRING" ),
227
+ bigquery .SchemaField ("date" , "DATE" ),
228
+ ]
229
+ job_config .skip_leading_rows = 1
230
+ job_config .time_partitioning = bigquery .TimePartitioning (
231
+ type_ = bigquery .TimePartitioningType .DAY ,
232
+ field = "date" , # name of column to use for partitioning
233
+ expiration_ms = 7776000000 ,
234
+ ) # 90 days
235
+ uri = "gs://cloud-samples-data/bigquery/us-states/us-states-by-date.csv"
236
+
237
+ load_job = client .load_table_from_uri (
238
+ uri , dataset_ref .table (table_id ), job_config = job_config
239
+ ) # API request
240
+
241
+ assert load_job .job_type == "load"
242
+
243
+ load_job .result () # Waits for table load to complete.
244
+
245
+ table = client .get_table (dataset_ref .table (table_id ))
246
+ print ("Loaded {} rows to table {}" .format (table .num_rows , table_id ))
247
+ # [END bigquery_load_table_partitioned]
248
+ assert table .num_rows == 50
249
+
250
+ project_id = client .project
251
+
252
+ # [START bigquery_query_partitioned_table]
253
+ import datetime
254
+
255
+ # from google.cloud import bigquery
256
+ # client = bigquery.Client()
257
+ # project_id = 'my-project'
258
+ # dataset_id = 'my_dataset'
259
+ table_id = "us_states_by_date"
260
+
261
+ sql_template = """
262
+ SELECT *
263
+ FROM `{}.{}.{}`
264
+ WHERE date BETWEEN @start_date AND @end_date
101
265
"""
102
- from google .cloud .exceptions import NotFound
266
+ sql = sql_template .format (project_id , dataset_id , table_id )
267
+ job_config = bigquery .QueryJobConfig ()
268
+ job_config .query_parameters = [
269
+ bigquery .ScalarQueryParameter ("start_date" , "DATE" , datetime .date (1800 , 1 , 1 )),
270
+ bigquery .ScalarQueryParameter ("end_date" , "DATE" , datetime .date (1899 , 12 , 31 )),
271
+ ]
103
272
104
- try :
105
- client .get_dataset (dataset_reference )
106
- return True
107
- except NotFound :
108
- return False
273
+ # API request
274
+ query_job = client .query (sql , job_config = job_config )
109
275
276
+ rows = list (query_job )
277
+ print ("{} states were admitted to the US in the 1800s" .format (len (rows )))
278
+ # [END bigquery_query_partitioned_table]
279
+ assert len (rows ) == 29
110
280
281
+
282
+ # [START bigquery_table_exists]
111
283
def table_exists (client , table_reference ):
112
284
"""Return if a table exists.
113
285
@@ -289,43 +461,6 @@ def test_update_table_expiration(client, to_delete):
289
461
# [END bigquery_update_table_expiration]
290
462
291
463
292
- @pytest .mark .skip (
293
- reason = (
294
- "update_table() is flaky "
295
- "https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5589"
296
- )
297
- )
298
- def test_add_empty_column (client , to_delete ):
299
- """Adds an empty column to an existing table."""
300
- dataset_id = "add_empty_column_dataset_{}" .format (_millis ())
301
- table_id = "add_empty_column_table_{}" .format (_millis ())
302
- dataset = bigquery .Dataset (client .dataset (dataset_id ))
303
- dataset = client .create_dataset (dataset )
304
- to_delete .append (dataset )
305
-
306
- table = bigquery .Table (dataset .table (table_id ), schema = SCHEMA )
307
- table = client .create_table (table )
308
-
309
- # [START bigquery_add_empty_column]
310
- # from google.cloud import bigquery
311
- # client = bigquery.Client()
312
- # dataset_id = 'my_dataset'
313
- # table_id = 'my_table'
314
-
315
- table_ref = client .dataset (dataset_id ).table (table_id )
316
- table = client .get_table (table_ref ) # API request
317
-
318
- original_schema = table .schema
319
- new_schema = original_schema [:] # creates a copy of the schema
320
- new_schema .append (bigquery .SchemaField ("phone" , "STRING" ))
321
-
322
- table .schema = new_schema
323
- table = client .update_table (table , ["schema" ]) # API request
324
-
325
- assert len (table .schema ) == len (original_schema ) + 1 == len (new_schema )
326
- # [END bigquery_add_empty_column]
327
-
328
-
329
464
@pytest .mark .skip (
330
465
reason = (
331
466
"update_table() is flaky "
@@ -414,47 +549,6 @@ def test_update_table_cmek(client, to_delete):
414
549
# [END bigquery_update_table_cmek]
415
550
416
551
417
- def test_browse_table_data (client , to_delete , capsys ):
418
- """Retreive selected row data from a table."""
419
-
420
- # [START bigquery_browse_table]
421
- # from google.cloud import bigquery
422
- # client = bigquery.Client()
423
-
424
- dataset_ref = client .dataset ("samples" , project = "bigquery-public-data" )
425
- table_ref = dataset_ref .table ("shakespeare" )
426
- table = client .get_table (table_ref ) # API call
427
-
428
- # Load all rows from a table
429
- rows = client .list_rows (table )
430
- assert len (list (rows )) == table .num_rows
431
-
432
- # Load the first 10 rows
433
- rows = client .list_rows (table , max_results = 10 )
434
- assert len (list (rows )) == 10
435
-
436
- # Specify selected fields to limit the results to certain columns
437
- fields = table .schema [:2 ] # first two columns
438
- rows = client .list_rows (table , selected_fields = fields , max_results = 10 )
439
- assert len (rows .schema ) == 2
440
- assert len (list (rows )) == 10
441
-
442
- # Use the start index to load an arbitrary portion of the table
443
- rows = client .list_rows (table , start_index = 10 , max_results = 10 )
444
-
445
- # Print row data in tabular format
446
- format_string = "{!s:<16} " * len (rows .schema )
447
- field_names = [field .name for field in rows .schema ]
448
- print (format_string .format (* field_names )) # prints column headers
449
- for row in rows :
450
- print (format_string .format (* row )) # prints row data
451
- # [END bigquery_browse_table]
452
-
453
- out , err = capsys .readouterr ()
454
- out = list (filter (bool , out .split ("\n " ))) # list of non-blank lines
455
- assert len (out ) == 11
456
-
457
-
458
552
@pytest .mark .skip (
459
553
reason = (
460
554
"update_table() is flaky "
0 commit comments