Skip to content

Commit 0c1e946

Browse files
feat: Add support for dataproc BatchController service (#546)
PiperOrigin-RevId: 404333740 Source-Link: googleapis/googleapis@5088bd7 Source-Link: googleapis/googleapis-gen@44b8707 Copy-Tag: eyJwIjoiLmdpdGh1Yi8uT3dsQm90LnlhbWwiLCJoIjoiNDRiODcwNzgzYzE5NDM3NzFiY2RlYzc0OGFlNGQ1MjA4YzNkN2YwZSJ9
1 parent e985832 commit 0c1e946

29 files changed

+18208
-8792
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,372 @@
1+
// Copyright 2021 Google LLC
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
syntax = "proto3";
16+
17+
package google.cloud.dataproc.v1;
18+
19+
import "google/api/annotations.proto";
20+
import "google/api/client.proto";
21+
import "google/api/field_behavior.proto";
22+
import "google/api/resource.proto";
23+
import "google/cloud/dataproc/v1/shared.proto";
24+
import "google/longrunning/operations.proto";
25+
import "google/protobuf/empty.proto";
26+
import "google/protobuf/timestamp.proto";
27+
28+
option go_package = "google.golang.org/genproto/googleapis/cloud/dataproc/v1;dataproc";
29+
option java_multiple_files = true;
30+
option java_outer_classname = "BatchesProto";
31+
option java_package = "com.google.cloud.dataproc.v1";
32+
33+
// The BatchController provides methods to manage batch workloads.
34+
service BatchController {
35+
option (google.api.default_host) = "dataproc.googleapis.com";
36+
option (google.api.oauth_scopes) = "https://www.googleapis.com/auth/cloud-platform";
37+
38+
// Creates a batch workload that executes asynchronously.
39+
rpc CreateBatch(CreateBatchRequest) returns (google.longrunning.Operation) {
40+
option (google.api.http) = {
41+
post: "/v1/{parent=projects/*/locations/*}/batches"
42+
body: "batch"
43+
};
44+
option (google.api.method_signature) = "parent,batch,batch_id";
45+
option (google.longrunning.operation_info) = {
46+
response_type: "Batch"
47+
metadata_type: "google.cloud.dataproc.v1.BatchOperationMetadata"
48+
};
49+
}
50+
51+
// Gets the batch workload resource representation.
52+
rpc GetBatch(GetBatchRequest) returns (Batch) {
53+
option (google.api.http) = {
54+
get: "/v1/{name=projects/*/locations/*/batches/*}"
55+
};
56+
option (google.api.method_signature) = "name";
57+
}
58+
59+
// Lists batch workloads.
60+
rpc ListBatches(ListBatchesRequest) returns (ListBatchesResponse) {
61+
option (google.api.http) = {
62+
get: "/v1/{parent=projects/*/locations/*}/batches"
63+
};
64+
option (google.api.method_signature) = "parent";
65+
}
66+
67+
// Deletes the batch workload resource. If the batch is not in terminal state,
68+
// the delete fails and the response returns `FAILED_PRECONDITION`.
69+
rpc DeleteBatch(DeleteBatchRequest) returns (google.protobuf.Empty) {
70+
option (google.api.http) = {
71+
delete: "/v1/{name=projects/*/locations/*/batches/*}"
72+
};
73+
option (google.api.method_signature) = "name";
74+
}
75+
}
76+
77+
// A request to create a batch workload.
78+
message CreateBatchRequest {
79+
// Required. The parent resource where this batch will be created.
80+
string parent = 1 [
81+
(google.api.field_behavior) = REQUIRED,
82+
(google.api.resource_reference) = {
83+
child_type: "dataproc.googleapis.com/Batch"
84+
}
85+
];
86+
87+
// Required. The batch to create.
88+
Batch batch = 2 [(google.api.field_behavior) = REQUIRED];
89+
90+
// Optional. The ID to use for the batch, which will become the final component of
91+
// the batch's resource name.
92+
//
93+
// This value must be 4-63 characters. Valid characters are `/[a-z][0-9]-/`.
94+
string batch_id = 3 [(google.api.field_behavior) = OPTIONAL];
95+
96+
// Optional. A unique ID used to identify the request. If the service
97+
// receives two
98+
// [CreateBatchRequest](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#google.cloud.dataproc.v1.CreateBatchRequest)s
99+
// with the same request_id, the second request is ignored and the
100+
// Operation that corresponds to the first Batch created and stored
101+
// in the backend is returned.
102+
//
103+
// Recommendation: Set this value to a
104+
// [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier).
105+
//
106+
// The value must contain only letters (a-z, A-Z), numbers (0-9),
107+
// underscores (_), and hyphens (-). The maximum length is 40 characters.
108+
string request_id = 4 [(google.api.field_behavior) = OPTIONAL];
109+
}
110+
111+
// A request to get the resource representation for a batch workload.
112+
message GetBatchRequest {
113+
// Required. The name of the batch to retrieve.
114+
string name = 1 [
115+
(google.api.field_behavior) = REQUIRED,
116+
(google.api.resource_reference) = {
117+
type: "dataproc.googleapis.com/Batch"
118+
}
119+
];
120+
}
121+
122+
// A request to list batch workloads in a project.
123+
message ListBatchesRequest {
124+
// Required. The parent, which owns this collection of batches.
125+
string parent = 1 [
126+
(google.api.field_behavior) = REQUIRED,
127+
(google.api.resource_reference) = {
128+
child_type: "dataproc.googleapis.com/Batch"
129+
}
130+
];
131+
132+
// Optional. The maximum number of batches to return in each response.
133+
// The service may return fewer than this value.
134+
// The default page size is 20; the maximum page size is 1000.
135+
int32 page_size = 2 [(google.api.field_behavior) = OPTIONAL];
136+
137+
// Optional. A page token received from a previous `ListBatches` call.
138+
// Provide this token to retrieve the subsequent page.
139+
string page_token = 3 [(google.api.field_behavior) = OPTIONAL];
140+
}
141+
142+
// A list of batch workloads.
143+
message ListBatchesResponse {
144+
// The batches from the specified collection.
145+
repeated Batch batches = 1;
146+
147+
// A token, which can be sent as `page_token` to retrieve the next page.
148+
// If this field is omitted, there are no subsequent pages.
149+
string next_page_token = 2;
150+
}
151+
152+
// A request to delete a batch workload.
153+
message DeleteBatchRequest {
154+
// Required. The name of the batch resource to delete.
155+
string name = 1 [
156+
(google.api.field_behavior) = REQUIRED,
157+
(google.api.resource_reference) = {
158+
type: "dataproc.googleapis.com/Batch"
159+
}
160+
];
161+
}
162+
163+
// A representation of a batch workload in the service.
164+
message Batch {
165+
option (google.api.resource) = {
166+
type: "dataproc.googleapis.com/Batch"
167+
pattern: "projects/{project}/locations/{location}/batches/{batch}"
168+
};
169+
170+
// Historical state information.
171+
message StateHistory {
172+
// Output only. The state of the batch at this point in history.
173+
State state = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
174+
175+
// Output only. Details about the state at this point in history.
176+
string state_message = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
177+
178+
// Output only. The time when the batch entered the historical state.
179+
google.protobuf.Timestamp state_start_time = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
180+
}
181+
182+
// The batch state.
183+
enum State {
184+
// The batch state is unknown.
185+
STATE_UNSPECIFIED = 0;
186+
187+
// The batch is created before running.
188+
PENDING = 1;
189+
190+
// The batch is running.
191+
RUNNING = 2;
192+
193+
// The batch is cancelling.
194+
CANCELLING = 3;
195+
196+
// The batch cancellation was successful.
197+
CANCELLED = 4;
198+
199+
// The batch completed successfully.
200+
SUCCEEDED = 5;
201+
202+
// The batch is no longer running due to an error.
203+
FAILED = 6;
204+
}
205+
206+
// Output only. The resource name of the batch.
207+
string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
208+
209+
// Output only. A batch UUID (Unique Universal Identifier). The service
210+
// generates this value when it creates the batch.
211+
string uuid = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
212+
213+
// Output only. The time when the batch was created.
214+
google.protobuf.Timestamp create_time = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
215+
216+
// The application/framework-specific portion of the batch configuration.
217+
oneof batch_config {
218+
// Optional. PySpark batch config.
219+
PySparkBatch pyspark_batch = 4 [(google.api.field_behavior) = OPTIONAL];
220+
221+
// Optional. Spark batch config.
222+
SparkBatch spark_batch = 5 [(google.api.field_behavior) = OPTIONAL];
223+
224+
// Optional. SparkR batch config.
225+
SparkRBatch spark_r_batch = 6 [(google.api.field_behavior) = OPTIONAL];
226+
227+
// Optional. SparkSql batch config.
228+
SparkSqlBatch spark_sql_batch = 7 [(google.api.field_behavior) = OPTIONAL];
229+
}
230+
231+
// Output only. Runtime information about batch execution.
232+
RuntimeInfo runtime_info = 8 [(google.api.field_behavior) = OUTPUT_ONLY];
233+
234+
// Output only. The state of the batch.
235+
State state = 9 [(google.api.field_behavior) = OUTPUT_ONLY];
236+
237+
// Output only. Batch state details, such as a failure
238+
// description if the state is `FAILED`.
239+
string state_message = 10 [(google.api.field_behavior) = OUTPUT_ONLY];
240+
241+
// Output only. The time when the batch entered a current state.
242+
google.protobuf.Timestamp state_time = 11 [(google.api.field_behavior) = OUTPUT_ONLY];
243+
244+
// Output only. The email address of the user who created the batch.
245+
string creator = 12 [(google.api.field_behavior) = OUTPUT_ONLY];
246+
247+
// Optional. The labels to associate with this batch.
248+
// Label **keys** must contain 1 to 63 characters, and must conform to
249+
// [RFC 1035](https://www.ietf.org/rfc/rfc1035.txt).
250+
// Label **values** may be empty, but, if present, must contain 1 to 63
251+
// characters, and must conform to [RFC
252+
// 1035](https://www.ietf.org/rfc/rfc1035.txt). No more than 32 labels can be
253+
// associated with a batch.
254+
map<string, string> labels = 13 [(google.api.field_behavior) = OPTIONAL];
255+
256+
// Optional. Runtime configuration for the batch execution.
257+
RuntimeConfig runtime_config = 14 [(google.api.field_behavior) = OPTIONAL];
258+
259+
// Optional. Environment configuration for the batch execution.
260+
EnvironmentConfig environment_config = 15 [(google.api.field_behavior) = OPTIONAL];
261+
262+
// Output only. The resource name of the operation associated with this batch.
263+
string operation = 16 [(google.api.field_behavior) = OUTPUT_ONLY];
264+
265+
// Output only. Historical state information for the batch.
266+
repeated StateHistory state_history = 17 [(google.api.field_behavior) = OUTPUT_ONLY];
267+
}
268+
269+
// A configuration for running an
270+
// [Apache
271+
// PySpark](https://spark.apache.org/docs/latest/api/python/getting_started/quickstart.html)
272+
// batch workload.
273+
message PySparkBatch {
274+
// Required. The HCFS URI of the main Python file to use as the Spark driver. Must
275+
// be a .py file.
276+
string main_python_file_uri = 1 [(google.api.field_behavior) = REQUIRED];
277+
278+
// Optional. The arguments to pass to the driver. Do not include arguments
279+
// that can be set as batch properties, such as `--conf`, since a collision
280+
// can occur that causes an incorrect batch submission.
281+
repeated string args = 2 [(google.api.field_behavior) = OPTIONAL];
282+
283+
// Optional. HCFS file URIs of Python files to pass to the PySpark
284+
// framework. Supported file types: `.py`, `.egg`, and `.zip`.
285+
repeated string python_file_uris = 3 [(google.api.field_behavior) = OPTIONAL];
286+
287+
// Optional. HCFS URIs of jar files to add to the classpath of the
288+
// Spark driver and tasks.
289+
repeated string jar_file_uris = 4 [(google.api.field_behavior) = OPTIONAL];
290+
291+
// Optional. HCFS URIs of files to be placed in the working directory of
292+
// each executor.
293+
repeated string file_uris = 5 [(google.api.field_behavior) = OPTIONAL];
294+
295+
// Optional. HCFS URIs of archives to be extracted into the working directory
296+
// of each executor. Supported file types:
297+
// `.jar`, `.tar`, `.tar.gz`, `.tgz`, and `.zip`.
298+
repeated string archive_uris = 6 [(google.api.field_behavior) = OPTIONAL];
299+
}
300+
301+
// A configuration for running an [Apache Spark](http://spark.apache.org/)
302+
// batch workload.
303+
message SparkBatch {
304+
// The specification of the main method to call to drive the Spark
305+
// workload. Specify either the jar file that contains the main class or the
306+
// main class name. To pass both a main jar and a main class in that jar, add
307+
// the jar to `jar_file_uris`, and then specify the main class
308+
// name in `main_class`.
309+
oneof driver {
310+
// Optional. The HCFS URI of the jar file that contains the main class.
311+
string main_jar_file_uri = 1 [(google.api.field_behavior) = OPTIONAL];
312+
313+
// Optional. The name of the driver main class. The jar file that contains the class
314+
// must be in the classpath or specified in `jar_file_uris`.
315+
string main_class = 2 [(google.api.field_behavior) = OPTIONAL];
316+
}
317+
318+
// Optional. The arguments to pass to the driver. Do not include arguments
319+
// that can be set as batch properties, such as `--conf`, since a collision
320+
// can occur that causes an incorrect batch submission.
321+
repeated string args = 3 [(google.api.field_behavior) = OPTIONAL];
322+
323+
// Optional. HCFS URIs of jar files to add to the classpath of the
324+
// Spark driver and tasks.
325+
repeated string jar_file_uris = 4 [(google.api.field_behavior) = OPTIONAL];
326+
327+
// Optional. HCFS URIs of files to be placed in the working directory of
328+
// each executor.
329+
repeated string file_uris = 5 [(google.api.field_behavior) = OPTIONAL];
330+
331+
// Optional. HCFS URIs of archives to be extracted into the working directory
332+
// of each executor. Supported file types:
333+
// `.jar`, `.tar`, `.tar.gz`, `.tgz`, and `.zip`.
334+
repeated string archive_uris = 6 [(google.api.field_behavior) = OPTIONAL];
335+
}
336+
337+
// A configuration for running an
338+
// [Apache SparkR](https://spark.apache.org/docs/latest/sparkr.html)
339+
// batch workload.
340+
message SparkRBatch {
341+
// Required. The HCFS URI of the main R file to use as the driver.
342+
// Must be a `.R` or `.r` file.
343+
string main_r_file_uri = 1 [(google.api.field_behavior) = REQUIRED];
344+
345+
// Optional. The arguments to pass to the Spark driver. Do not include arguments
346+
// that can be set as batch properties, such as `--conf`, since a collision
347+
// can occur that causes an incorrect batch submission.
348+
repeated string args = 2 [(google.api.field_behavior) = OPTIONAL];
349+
350+
// Optional. HCFS URIs of files to be placed in the working directory of
351+
// each executor.
352+
repeated string file_uris = 3 [(google.api.field_behavior) = OPTIONAL];
353+
354+
// Optional. HCFS URIs of archives to be extracted into the working directory
355+
// of each executor. Supported file types:
356+
// `.jar`, `.tar`, `.tar.gz`, `.tgz`, and `.zip`.
357+
repeated string archive_uris = 4 [(google.api.field_behavior) = OPTIONAL];
358+
}
359+
360+
// A configuration for running
361+
// [Apache Spark SQL](http://spark.apache.org/sql/) queries as a batch workload.
362+
message SparkSqlBatch {
363+
// Required. The HCFS URI of the script that contains Spark SQL queries to execute.
364+
string query_file_uri = 1 [(google.api.field_behavior) = REQUIRED];
365+
366+
// Optional. Mapping of query variable names to values (equivalent to the
367+
// Spark SQL command: `SET name="value";`).
368+
map<string, string> query_variables = 2 [(google.api.field_behavior) = OPTIONAL];
369+
370+
// Optional. HCFS URIs of jar files to be added to the Spark CLASSPATH.
371+
repeated string jar_file_uris = 3 [(google.api.field_behavior) = OPTIONAL];
372+
}

0 commit comments

Comments
 (0)