Skip to content

Commit 5cfc619

Browse files
authored
BigQuery: Add ORC format support for load jobs, missing bigtable support. (#3391)
* BigQuery: Add ORC format support for load jobs. Additionally, plumb in the (missing) Bigtable format support for federated tables. * add overrides, unit testing * Wire bigtable up into formatoptions * add copyright headers. * Convert BigtableColumn and BigtableColumnFamily to autovalue generation. * excise unused imports, address codacy kvetching about declaration order. * Address reviewer comments: formatting/whitespace, serializable, asserts * unused imports (asserts)
1 parent a2a9bba commit 5cfc619

File tree

6 files changed

+597
-4
lines changed

6 files changed

+597
-4
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
/*
2+
* Copyright 2018 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package com.google.cloud.bigquery;
17+
18+
import com.google.common.base.Function;
19+
import com.google.auto.value.AutoValue;
20+
import java.io.Serializable;
21+
import javax.annotation.Nullable;
22+
23+
@AutoValue
24+
public abstract class BigtableColumn implements Serializable {
25+
26+
private static final long serialVersionUID = 1L;
27+
28+
@Nullable
29+
public abstract String getQualifierEncoded();
30+
31+
@Nullable
32+
public abstract String getFieldName();
33+
34+
@Nullable
35+
public abstract Boolean getOnlyReadLatest();
36+
37+
@Nullable
38+
public abstract String getEncoding();
39+
40+
@Nullable
41+
public abstract String getType();
42+
43+
@AutoValue.Builder
44+
public abstract static class Builder {
45+
46+
/**
47+
* Qualifier of the column.
48+
*
49+
* Columns in the parent column family that has this exact qualifier are exposed as . field. If
50+
* the qualifier is valid UTF-8 string, it can be specified in the qualifier_string field.
51+
* Otherwise, a base-64 encoded value must be set to qualifier_encoded. The column field name is
52+
* the same as the column qualifier. However, if the qualifier is not a valid BigQuery field
53+
* identifier, a valid identifier must be provided as field_name.
54+
*/
55+
public abstract Builder setQualifierEncoded(String qualifierEncoded);
56+
57+
/**
58+
* If the qualifier is not a valid BigQuery field identifier, a valid identifier must be
59+
* provided as the column field name and is used as field name in queries.
60+
*/
61+
public abstract Builder setFieldName(String fieldName);
62+
63+
/**
64+
* If this is set, only the latest version of value in this column are exposed.
65+
*
66+
* 'onlyReadLatest' can also be set at the column family level. However, the setting at the
67+
* column level takes precedence if 'onlyReadLatest' is set at both levels.
68+
*/
69+
public abstract Builder setOnlyReadLatest(Boolean onlyReadLatest);
70+
71+
/**
72+
* The encoding of the values when the type is not STRING. Acceptable encoding values are: TEXT
73+
* - indicates values are alphanumeric text strings. BINARY - indicates values are encoded using
74+
* HBase Bytes.toBytes family of functions.
75+
*
76+
* Encoding can also be set at the column family level. However, the setting at the column level
77+
* takes precedence if 'encoding' is set at both levels.
78+
*/
79+
public abstract Builder setEncoding(String encoding);
80+
81+
/**
82+
* The type to convert the value in cells of this column.
83+
*
84+
* The values are expected to be encoded using HBase Bytes.toBytes function when using the
85+
* BINARY encoding value. Following BigQuery types are allowed (case-sensitive): BYTES STRING
86+
* INTEGER FLOAT BOOLEAN Default type is BYTES.
87+
*
88+
* 'type' can also be set at the column family level. However, the setting at the column level
89+
* takes precedence if 'type' is set at both levels.
90+
*/
91+
public abstract Builder setType(String type);
92+
93+
public abstract BigtableColumn build();
94+
}
95+
96+
static Builder newBuilder() {
97+
return new AutoValue_BigtableColumn.Builder();
98+
}
99+
100+
static BigtableColumn fromPb(com.google.api.services.bigquery.model.BigtableColumn column) {
101+
Builder builder = newBuilder();
102+
builder.setQualifierEncoded(column.getQualifierEncoded());
103+
builder.setFieldName(column.getFieldName());
104+
builder.setOnlyReadLatest(column.getOnlyReadLatest());
105+
builder.setEncoding(column.getEncoding());
106+
builder.setType(column.getType());
107+
return builder.build();
108+
}
109+
110+
com.google.api.services.bigquery.model.BigtableColumn toPb() {
111+
com.google.api.services.bigquery.model.BigtableColumn column = new com.google.api.services.bigquery.model.BigtableColumn()
112+
.setQualifierEncoded(getQualifierEncoded())
113+
.setFieldName(getFieldName())
114+
.setOnlyReadLatest(getOnlyReadLatest())
115+
.setEncoding(getEncoding())
116+
.setType(getType());
117+
return column;
118+
}
119+
120+
static final Function<com.google.api.services.bigquery.model.BigtableColumn, BigtableColumn> FROM_PB_FUNCTION =
121+
new Function<com.google.api.services.bigquery.model.BigtableColumn, BigtableColumn>() {
122+
@Override
123+
public BigtableColumn apply(
124+
com.google.api.services.bigquery.model.BigtableColumn pb) {
125+
return BigtableColumn.fromPb(pb);
126+
}
127+
};
128+
129+
static final Function<BigtableColumn, com.google.api.services.bigquery.model.BigtableColumn> TO_PB_FUNCTION =
130+
new Function<BigtableColumn, com.google.api.services.bigquery.model.BigtableColumn>() {
131+
@Override
132+
public com.google.api.services.bigquery.model.BigtableColumn apply(
133+
BigtableColumn column) {
134+
return column.toPb();
135+
}
136+
};
137+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
/*
2+
* Copyright 2018 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package com.google.cloud.bigquery;
17+
18+
import com.google.common.base.Function;
19+
import com.google.common.collect.Lists;
20+
import com.google.auto.value.AutoValue;
21+
import java.io.Serializable;
22+
import java.util.List;
23+
24+
/**
25+
* List of column families to expose in the table schema along with their types. This list restricts
26+
* the column families that can be referenced in queries and specifies their value types.
27+
*
28+
* You can use this list to do type conversions - see the 'type' field for more details. If you
29+
* leave this list empty, all column families are present in the table schema and their values are
30+
* read as BYTES. During a query only the column families referenced in that query are read from
31+
* Bigtable.
32+
*/
33+
34+
@AutoValue
35+
public abstract class BigtableColumnFamily implements Serializable {
36+
37+
private static final long serialVersionUID = 1L;
38+
39+
public abstract String getFamilyID();
40+
41+
public abstract List<BigtableColumn> getColumns();
42+
43+
public abstract String getEncoding();
44+
45+
public abstract Boolean getOnlyReadLatest();
46+
47+
public abstract String getType();
48+
49+
@AutoValue.Builder
50+
public abstract static class Builder {
51+
52+
/**
53+
* Identifier of the column family.
54+
*/
55+
public abstract Builder setFamilyID(String familyID);
56+
57+
/**
58+
* Lists of columns that should be exposed as individual fields as opposed to a list of (column
59+
* name, value) pairs. All columns whose qualifier matches a qualifier in this list can be
60+
* accessed as .. Other columns can be accessed as a list through .Column field.
61+
*/
62+
public abstract Builder setColumns(List<BigtableColumn> columns);
63+
64+
/**
65+
* The encoding of the values when the type is not STRING.
66+
*
67+
* Acceptable encoding values are: TEXT - indicates values are alphanumeric text strings. BINARY
68+
* - indicates values are encoded using HBase Bytes.toBytes family of functions.
69+
*
70+
* This can be overridden for a specific column by listing that column in 'columns' and
71+
* specifying an encoding for it.
72+
*/
73+
public abstract Builder setEncoding(String encoding);
74+
75+
/**
76+
* If true, only the latest version of values are exposed for all columns in this column family.
77+
* This can be overridden for a specific column by listing that column in 'columns' and
78+
* specifying a different setting for that column.
79+
*/
80+
public abstract Builder setOnlyReadLatest(Boolean onlyReadLatest);
81+
82+
/**
83+
* The type to convert the value in cells of this column family. The values are expected to be
84+
* encoded using HBase Bytes.toBytes function when using the BINARY encoding value.
85+
*
86+
* Following BigQuery types are allowed (case-sensitive): BYTES STRING INTEGER FLOAT BOOLEAN.
87+
*
88+
* The default type is BYTES. This can be overridden for a specific column by listing that
89+
* column in 'columns' and specifying a type for it.
90+
*/
91+
public abstract Builder setType(String type);
92+
93+
public abstract BigtableColumnFamily build();
94+
}
95+
96+
static Builder newBuilder() {
97+
return new AutoValue_BigtableColumnFamily.Builder();
98+
}
99+
100+
static BigtableColumnFamily fromPb(
101+
com.google.api.services.bigquery.model.BigtableColumnFamily columnFamily) {
102+
Builder builder = newBuilder();
103+
builder.setFamilyID(columnFamily.getFamilyId());
104+
builder.setColumns(Lists.transform(columnFamily.getColumns(), BigtableColumn.FROM_PB_FUNCTION));
105+
builder.setEncoding(columnFamily.getEncoding());
106+
builder.setOnlyReadLatest(columnFamily.getOnlyReadLatest());
107+
builder.setType(columnFamily.getType());
108+
return builder.build();
109+
110+
}
111+
112+
com.google.api.services.bigquery.model.BigtableColumnFamily toPb() {
113+
com.google.api.services.bigquery.model.BigtableColumnFamily colFamilyPb = new com.google.api.services.bigquery.model.BigtableColumnFamily()
114+
.setFamilyId(getFamilyID())
115+
.setEncoding(getEncoding())
116+
.setOnlyReadLatest(getOnlyReadLatest())
117+
.setType(getType());
118+
if (getColumns() != null) {
119+
colFamilyPb.setColumns(Lists.transform(getColumns(), BigtableColumn.TO_PB_FUNCTION));
120+
}
121+
return colFamilyPb;
122+
}
123+
124+
static final Function<com.google.api.services.bigquery.model.BigtableColumnFamily, BigtableColumnFamily> FROM_PB_FUNCTION =
125+
new Function<com.google.api.services.bigquery.model.BigtableColumnFamily, BigtableColumnFamily>() {
126+
@Override
127+
public BigtableColumnFamily apply(
128+
com.google.api.services.bigquery.model.BigtableColumnFamily pb) {
129+
return BigtableColumnFamily.fromPb(pb);
130+
}
131+
};
132+
133+
static final Function<BigtableColumnFamily, com.google.api.services.bigquery.model.BigtableColumnFamily> TO_PB_FUNCTION =
134+
new Function<BigtableColumnFamily, com.google.api.services.bigquery.model.BigtableColumnFamily>() {
135+
@Override
136+
public com.google.api.services.bigquery.model.BigtableColumnFamily apply(
137+
BigtableColumnFamily columnFamily) {
138+
return columnFamily.toPb();
139+
}
140+
};
141+
}
142+
143+
144+

0 commit comments

Comments
 (0)