Skip to content

Commit 4165794

Browse files
bradmirobcoe
authored andcommitted
fix: overhauled quickstart (#280)
* Added create cluster sample for Cloud Dataproc * Modify test and slight change to Dataproc success message * Changed region tag to include 'dataproc' * changed dataproc imports to explicit v1 * Added create cluster sample for Cloud Dataproc Updated READMEs ignore createCluster until it lands Modify test and slight change to Dataproc success message changed dataproc imports to explicit v1 * Adding updated Dataproc quickstart * Added mocha commands imports to tests Co-authored-by: Benjamin E. Coe <[email protected]>
1 parent 14f36df commit 4165794

File tree

5 files changed

+199
-47
lines changed

5 files changed

+199
-47
lines changed

dataproc/createCluster.js

+2-2
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,9 @@ function main(projectId, region, clusterName) {
2525

2626
async function createCluster() {
2727
// TODO(developer): Uncomment and set the following variables
28-
// project_id = 'YOUR_PROJECT_ID'
28+
// projectId = 'YOUR_PROJECT_ID'
2929
// region = 'YOUR_CLUSTER_REGION'
30-
// cluster_name = 'YOUR_CLUSTER_NAME'
30+
// clusterName = 'YOUR_CLUSTER_NAME'
3131

3232
// Create the cluster config
3333
const request = {

dataproc/package.json

+3-3
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,9 @@
1414
"test": "mocha system-test --timeout 600000"
1515
},
1616
"dependencies": {
17-
"@google-cloud/dataproc": "^1.4.4",
18-
"uuid": "^3.3.3",
19-
"yargs": "^15.0.0"
17+
"@google-cloud/dataproc": "^1.4.1",
18+
"@google-cloud/storage": "^4.1.3",
19+
"sleep": "^6.1.0"
2020
},
2121
"devDependencies": {
2222
"chai": "^4.2.0",

dataproc/quickstart.js

+127-34
Original file line numberDiff line numberDiff line change
@@ -14,42 +14,135 @@
1414

1515
'use strict';
1616

17-
// [START dataproc_quickstart]
18-
const dataproc = require('@google-cloud/dataproc');
19-
const client = new dataproc.v1.ClusterControllerClient();
20-
21-
async function quickstart() {
22-
const projectId = await client.getProjectId();
23-
const request = {
24-
region: 'global',
25-
projectId,
26-
};
27-
const [resources] = await client.listClusters(request);
28-
console.log('Total resources:', resources.length);
29-
for (const resource of resources) {
30-
console.log(resource);
31-
}
17+
function main(projectId, region, clusterName, jobFilePath) {
18+
// [START dataproc_quickstart]
19+
const dataproc = require('@google-cloud/dataproc').v1;
20+
const {Storage} = require('@google-cloud/storage');
3221

33-
let nextRequest = request;
34-
// Or obtain the paged response.
35-
const options = {autoPaginate: false};
36-
do {
37-
const responses = await client.listClusters(nextRequest, options);
38-
// The actual resources in a response.
39-
const resources = responses[0];
40-
// The next request if the response shows that there are more responses.
41-
nextRequest = responses[1];
42-
// The actual response object, if necessary.
43-
// const rawResponse = responses[2];
44-
for (const resource of resources) {
45-
console.log(resource);
46-
}
47-
} while (nextRequest);
22+
const sleep = require('sleep');
23+
24+
// Create a cluster client with the endpoint set to the desired cluster region
25+
const clusterClient = new dataproc.ClusterControllerClient({
26+
apiEndpoint: `${region}-dataproc.googleapis.com`,
27+
});
4828

49-
client.listClustersStream(request).on('data', element => {
50-
console.log(element);
29+
// Create a job client with the endpoint set to the desired cluster region
30+
const jobClient = new dataproc.v1.JobControllerClient({
31+
apiEndpoint: `${region}-dataproc.googleapis.com`,
5132
});
33+
34+
async function quickstart() {
35+
// TODO(developer): Uncomment and set the following variables
36+
// projectId = 'YOUR_PROJECT_ID'
37+
// region = 'YOUR_CLUSTER_REGION'
38+
// clusterName = 'YOUR_CLUSTER_NAME'
39+
// jobFilePath = 'YOUR_JOB_FILE_PATH'
40+
41+
// Create the cluster config
42+
const cluster = {
43+
projectId: projectId,
44+
region: region,
45+
cluster: {
46+
clusterName: clusterName,
47+
config: {
48+
masterConfig: {
49+
numInstances: 1,
50+
machineTypeUri: 'n1-standard-1',
51+
},
52+
workerConfig: {
53+
numInstances: 2,
54+
machineTypeUri: 'n1-standard-1',
55+
},
56+
},
57+
},
58+
};
59+
60+
// Create the cluster
61+
const [operation] = await clusterClient.createCluster(cluster);
62+
const [response] = await operation.promise();
63+
64+
// Output a success message
65+
console.log(`Cluster created successfully: ${response.clusterName}`);
66+
67+
const job = {
68+
projectId: projectId,
69+
region: region,
70+
job: {
71+
placement: {
72+
clusterName: clusterName,
73+
},
74+
pysparkJob: {
75+
mainPythonFileUri: jobFilePath,
76+
},
77+
},
78+
};
79+
80+
let [jobResp] = await jobClient.submitJob(job);
81+
const jobId = jobResp.reference.jobId;
82+
83+
console.log(`Submitted job "${jobId}".`);
84+
85+
// Terminal states for a job
86+
const terminalStates = new Set(['DONE', 'ERROR', 'CANCELLED']);
87+
88+
// Create a timeout such that the job gets cancelled if not
89+
// in a termimal state after a fixed period of time.
90+
const timeout = 600000;
91+
const start = new Date();
92+
93+
// Wait for the job to finish.
94+
const jobReq = {
95+
projectId: projectId,
96+
region: region,
97+
jobId: jobId,
98+
};
99+
100+
while (!terminalStates.has(jobResp.status.state)) {
101+
if (new Date() - timeout > start) {
102+
await jobClient.cancelJob(jobReq);
103+
console.log(
104+
`Job ${jobId} timed out after threshold of ` +
105+
`${timeout / 60000} minutes.`
106+
);
107+
break;
108+
}
109+
await sleep.sleep(1);
110+
[jobResp] = await jobClient.getJob(jobReq);
111+
}
112+
113+
const clusterReq = {
114+
projectId: projectId,
115+
region: region,
116+
clusterName: clusterName,
117+
};
118+
119+
const [clusterResp] = await clusterClient.getCluster(clusterReq);
120+
121+
const storage = new Storage();
122+
123+
const output = await storage
124+
.bucket(clusterResp.config.configBucket)
125+
.file(
126+
`google-cloud-dataproc-metainfo/${clusterResp.clusterUuid}/` +
127+
`jobs/${jobId}/driveroutput.000000000`
128+
)
129+
.download();
130+
131+
// Output a success message.
132+
console.log(
133+
`Job ${jobId} finished with state ${jobResp.status.state}:\n${output}`
134+
);
135+
136+
// Delete the cluster once the job has terminated.
137+
const [deleteOperation] = await clusterClient.deleteCluster(clusterReq);
138+
await deleteOperation.promise();
139+
140+
// Output a success message
141+
console.log(`Cluster ${clusterName} successfully deleted.`);
142+
}
143+
144+
quickstart();
145+
// [END dataproc_quickstart]
52146
}
53147

54-
quickstart();
55-
// [END dataproc_quickstart]
148+
main(...process.argv.slice(2));

dataproc/system-test/createCluster.test.js

+3-3
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,10 @@ const cp = require('child_process');
2020
const uuid = require('uuid');
2121

2222
const region = 'us-central1';
23-
const clusterName = `test-${uuid()}`;
23+
const clusterName = `node-cc-test-${uuid()}`;
2424

2525
const dataproc = require('@google-cloud/dataproc').v1;
26-
const client = new dataproc.v1.ClusterControllerClient({
26+
const clusterClient = new dataproc.ClusterControllerClient({
2727
apiEndpoint: `${region}-dataproc.googleapis.com`,
2828
});
2929

@@ -40,7 +40,7 @@ describe('create a dataproc cluster', () => {
4040
});
4141

4242
after(async () => {
43-
await client.deleteCluster({
43+
await clusterClient.deleteCluster({
4444
projectId: projectId,
4545
region: region,
4646
clusterName: clusterName,

dataproc/system-test/quickstart.test.js

+64-5
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,74 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15+
'use strict';
16+
1517
const {assert} = require('chai');
16-
const {describe, it} = require('mocha');
18+
const {describe, it, before, after} = require('mocha');
1719
const cp = require('child_process');
20+
const uuid = require('uuid');
21+
22+
const dataproc = require('@google-cloud/dataproc').v1;
23+
const {Storage} = require('@google-cloud/storage');
24+
25+
const myUuid = uuid();
26+
const region = 'us-central1';
27+
const clusterName = `node-qs-test-${myUuid}`;
28+
const bucketName = `node-dataproc-qs-test-${myUuid}`;
29+
const projectId = process.env.GCLOUD_PROJECT;
30+
const jobFileName = 'sum.py';
31+
const jobFilePath = `gs://${bucketName}/${jobFileName}`;
32+
const sortCode =
33+
'import pyspark\n' +
34+
'sc = pyspark.SparkContext()\n' +
35+
'rdd = sc.parallelize((1,2,3,4,5))\n' +
36+
'sum = rdd.reduce(lambda x, y: x + y)\n';
37+
38+
const clusterClient = new dataproc.v1.ClusterControllerClient({
39+
apiEndpoint: `${region}-dataproc.googleapis.com`,
40+
});
41+
42+
const storage = new Storage();
1843

1944
const execSync = cmd => cp.execSync(cmd, {encoding: 'utf-8'});
2045

21-
describe('dataproc samples', () => {
22-
it('should run the quickstart', async () => {
23-
const stdout = execSync('node quickstart');
24-
assert.match(stdout, /Total resources:/);
46+
describe('execute the quickstart', () => {
47+
before(async () => {
48+
const [bucket] = await storage.createBucket(bucketName);
49+
await bucket.file(jobFileName).save(sortCode);
50+
});
51+
52+
it('should execute the quickstart', async () => {
53+
const stdout = execSync(
54+
`node quickstart.js "${projectId}" "${region}" "${clusterName}" "${jobFilePath}"`
55+
);
56+
assert.match(stdout, /Cluster created successfully/);
57+
assert.match(stdout, /Submitted job/);
58+
assert.match(stdout, /finished with state DONE:/);
59+
assert.match(stdout, /successfully deleted/);
60+
});
61+
62+
after(async () => {
63+
await storage
64+
.bucket(bucketName)
65+
.file(jobFileName)
66+
.delete();
67+
await storage.bucket(bucketName).delete();
68+
69+
const [clusters] = await clusterClient.listClusters({
70+
projectId: projectId,
71+
region: region,
72+
});
73+
74+
for (const cluster of clusters) {
75+
if (cluster.clusterName === clusterName) {
76+
await clusterClient.deleteCluster({
77+
projectId: projectId,
78+
region: region,
79+
clusterName: clusterName,
80+
});
81+
break;
82+
}
83+
}
2584
});
2685
});

0 commit comments

Comments
 (0)