Skip to content

Commit 579aaca

Browse files
authored
Add support for PARQUET format in BigQuery load jobs. (#3357)
* Add support for PARQUET format in BigQuery load jobs. Also adds a code sample demonstrating / testing parquet loads. Modelled after the Python sample at https://cloud.google.com/bigquery/docs/loading-data-cloud-storage-parquet * s/remoteLoadJob/loadJob/g All jobs are remote jobs.
1 parent 91d36ab commit 579aaca

File tree

4 files changed

+48
-6
lines changed

4 files changed

+48
-6
lines changed

google-cloud-clients/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/FormatOptions.java

+9-1
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ public class FormatOptions implements Serializable {
3434
static final String DATASTORE_BACKUP = "DATASTORE_BACKUP";
3535
static final String AVRO = "AVRO";
3636
static final String GOOGLE_SHEETS = "GOOGLE_SHEETS";
37+
static final String PARQUET = "PARQUET";
3738
private static final long serialVersionUID = -443376052020423691L;
3839

3940
private final String type;
@@ -104,8 +105,15 @@ public static FormatOptions googleSheets() {
104105
}
105106

106107
/**
107-
* Default options for the provided format.
108+
* Default options for PARQUET format.
108109
*/
110+
public static FormatOptions parquet() {
111+
return new FormatOptions(PARQUET);
112+
}
113+
114+
/**
115+
* Default options for the provided format.
116+
*/
109117
public static FormatOptions of(String format) {
110118
if (checkNotNull(format).equals(CSV)) {
111119
return csv();

google-cloud-examples/src/main/java/com/google/cloud/examples/bigquery/snippets/BigQuerySnippets.java

+3-3
Original file line numberDiff line numberDiff line change
@@ -438,10 +438,10 @@ public Long writeRemoteFileToTable(String datasetName, String tableName)
438438
.setSchema(schema)
439439
.build();
440440
// Load the table
441-
Job remoteLoadJob = bigquery.create(JobInfo.of(configuration));
442-
remoteLoadJob = remoteLoadJob.waitFor();
441+
Job loadJob = bigquery.create(JobInfo.of(configuration));
442+
loadJob = loadJob.waitFor();
443443
// Check the table
444-
System.out.println("State: " + remoteLoadJob.getStatus().getState());
444+
System.out.println("State: " + loadJob.getStatus().getState());
445445
return ((StandardTableDefinition) bigquery.getTable(tableId).getDefinition()).getNumRows();
446446
// [END bigquery_load_table_gcs_json]
447447
}

google-cloud-examples/src/main/java/com/google/cloud/examples/bigquery/snippets/CloudSnippets.java

+27-1
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,18 @@
1414
* limitations under the License.
1515
*/
1616

17-
package com.google.cloud.examples.bigquery.cloudsnippets;
17+
package com.google.cloud.examples.bigquery.snippets;
1818

1919
import com.google.cloud.bigquery.BigQuery;
2020
import com.google.cloud.bigquery.FieldValue;
2121
import com.google.cloud.bigquery.FieldValueList;
22+
import com.google.cloud.bigquery.FormatOptions;
23+
import com.google.cloud.bigquery.Job;
24+
import com.google.cloud.bigquery.JobInfo;
25+
import com.google.cloud.bigquery.LoadJobConfiguration;
2226
import com.google.cloud.bigquery.QueryJobConfiguration;
2327
import com.google.cloud.bigquery.QueryParameterValue;
28+
import com.google.cloud.bigquery.StandardTableDefinition;
2429
import com.google.cloud.bigquery.TableId;
2530
import java.util.concurrent.TimeoutException;
2631
import org.joda.time.DateTime;
@@ -283,4 +288,25 @@ public void runQueryWithTimestampParameters() throws InterruptedException {
283288
}
284289
// [END bigquery_query_params_timestamps]
285290
}
291+
292+
/**
293+
* Example of loading a parquet file from GCS to a table.
294+
*/
295+
public void loadTableGcsParquet(String datasetName) throws InterruptedException {
296+
// [START bigquery_load_table_gcs_parquet]
297+
String sourceUri = "gs://cloud-samples-data/bigquery/us-states/us-states.parquet";
298+
TableId tableId = TableId.of(datasetName, "us_states");
299+
LoadJobConfiguration configuration =
300+
LoadJobConfiguration.builder(tableId, sourceUri)
301+
.setFormatOptions(FormatOptions.parquet())
302+
.build();
303+
// Load the table
304+
Job loadJob = bigquery.create(JobInfo.of(configuration));
305+
loadJob = loadJob.waitFor();
306+
// Check the table
307+
StandardTableDefinition destinationTable = bigquery.getTable(tableId).getDefinition();
308+
System.out.println("State: " + loadJob.getStatus().getState());
309+
System.out.printf("Loaded %d rows.\n", destinationTable.getNumRows());
310+
// [END bigquery_load_table_gcs_parquet]
311+
}
286312
}

google-cloud-examples/src/test/java/com/google/cloud/examples/bigquery/snippets/ITCloudSnippets.java

+9-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
* limitations under the License.
1515
*/
1616

17-
package com.google.cloud.examples.bigquery.cloudsnippets;
17+
package com.google.cloud.examples.bigquery.snippets;
1818

1919
import static org.junit.Assert.assertTrue;
2020

@@ -119,4 +119,12 @@ public void testRunQueryWithTimestampParameters() throws InterruptedException {
119119
String got = bout.toString();
120120
assertTrue(got.contains("2016-12-07T09:00:00Z"));
121121
}
122+
123+
@Test
124+
public void testLoadTableGcsParquet() throws InterruptedException {
125+
cloudSnippets.loadTableGcsParquet(DATASET);
126+
String got = bout.toString();
127+
assertTrue(got.contains("DONE"));
128+
assertTrue(got.contains("Loaded 50 rows."));
129+
}
122130
}

0 commit comments

Comments
 (0)