Skip to content

Commit 3ef4622

Browse files
author
Joanna Grycz
committed
feat: tpu_queued_resources_startup_script
1 parent 7a89075 commit 3ef4622

8 files changed

+329
-0
lines changed

.github/workflows/tpu.yaml

+54
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
# Copyright 2024 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
name: tpu
16+
on:
17+
push:
18+
branches:
19+
- main
20+
paths:
21+
- 'tpu/**'
22+
- '.github/workflows/tpu.yaml'
23+
- '.github/workflows/test.yaml'
24+
pull_request:
25+
types:
26+
- opened
27+
- reopened
28+
- synchronize
29+
- labeled
30+
paths:
31+
- 'tpu/**'
32+
- '.github/workflows/tpu.yaml'
33+
- '.github/workflows/test.yaml'
34+
schedule:
35+
- cron: '0 0 * * 0'
36+
jobs:
37+
test:
38+
# Ref: https://github.com/google-github-actions/auth#usage
39+
permissions:
40+
contents: 'read'
41+
id-token: 'write'
42+
if: github.event.action != 'labeled' || github.event.label.name == 'actions:force-run'
43+
uses: ./.github/workflows/test.yaml
44+
with:
45+
name: 'tpu'
46+
path: 'tpu'
47+
flakybot:
48+
# Ref: https://github.com/google-github-actions/auth#usage
49+
permissions:
50+
contents: 'read'
51+
id-token: 'write'
52+
if: github.event_name == 'schedule' && always() # always() submits logs even if tests fail
53+
uses: ./.github/workflows/flakybot.yaml
54+
needs: [test]

.github/workflows/utils/workflows.json

+1
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@
9090
"speech",
9191
"talent",
9292
"texttospeech",
93+
"tpu",
9394
"translate",
9495
"video-intelligence",
9596
"vision/productSearch",

CODEOWNERS

+1
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ compute @GoogleCloudPlatform/dee-infra @GoogleCloudPlatform/nodejs-samples-revie
2020
iam @GoogleCloudPlatform/dee-infra @GoogleCloudPlatform/nodejs-samples-reviewers @GoogleCloudPlatform/cloud-samples-reviewers
2121
kms @GoogleCloudPlatform/dee-infra @GoogleCloudPlatform/nodejs-samples-reviewers @GoogleCloudPlatform/cloud-samples-reviewers
2222
orgpolicy @GoogleCloudPlatform/dee-infra @GoogleCloudPlatform/nodejs-samples-reviewers @GoogleCloudPlatform/cloud-samples-reviewers
23+
tpu @GoogleCloudPlatform/dee-infra @GoogleCloudPlatform/nodejs-samples-reviewers @GoogleCloudPlatform/cloud-samples-reviewers
2324
recaptcha_enterprise @GoogleCloudPlatform/dee-infra @GoogleCloudPlatform/nodejs-samples-reviewers @GoogleCloudPlatform/cloud-samples-reviewers
2425
recaptcha_enterprise/demosite @GoogleCloudPlatform/dee-infra @GoogleCloudPlatform/recaptcha-customer-obsession-reviewers @GoogleCloudPlatform/nodejs-samples-reviewers @GoogleCloudPlatform/cloud-samples-reviewers
2526
secret-manager @GoogleCloudPlatform/dee-infra @GoogleCloudPlatform/nodejs-samples-reviewers @GoogleCloudPlatform/cloud-samples-reviewers @GoogleCloudPlatform/cloud-secrets-team

tpu/package.json

+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
{
2+
"name": "nodejs-docs-samples-tpu",
3+
"license": "Apache-2.0",
4+
"author": "Google Inc.",
5+
"engines": {
6+
"node": ">=16.0.0"
7+
},
8+
"repository": "googleapis/nodejs-tpu",
9+
"private": true,
10+
"files": [
11+
"*.js"
12+
],
13+
"scripts": {
14+
"test": "c8 mocha -p -j 2 test --timeout 1200000"
15+
},
16+
"dependencies": {
17+
"@google-cloud/tpu": "^3.5.0"
18+
},
19+
"devDependencies": {
20+
"c8": "^10.0.0",
21+
"mocha": "^10.0.0"
22+
}
23+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
/*
2+
* Copyright 2024 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* https://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
'use strict';
18+
19+
async function main(
20+
nodeName,
21+
queuedResourceName,
22+
zone,
23+
tpuType,
24+
tpuSoftwareVersion
25+
) {
26+
// [START tpu_queued_resources_startup_script]
27+
// Import the TPU library
28+
const {TpuClient} = require('@google-cloud/tpu').v2alpha1;
29+
const {Node, NetworkConfig, QueuedResource} =
30+
require('@google-cloud/tpu').protos.google.cloud.tpu.v2alpha1;
31+
32+
// Instantiate a tpuClient
33+
const tpuClient = new TpuClient();
34+
35+
/**
36+
* TODO(developer): Update/uncomment these variables before running the sample.
37+
*/
38+
// Project ID or project number of the Google Cloud project, where you want to create queued resource.
39+
const projectId = await tpuClient.getProjectId();
40+
41+
// The name of the network you want the node to connect to. The network should be assigned to your project.
42+
const networkName = 'compute-tpu-network';
43+
44+
// The region of the network, that you want the node to connect to.
45+
const region = 'europe-west4';
46+
47+
// The name for your queued resource.
48+
// queuedResourceName = 'queued-resource-1';
49+
50+
// The name for your node.
51+
// nodeName = 'node-name-1';
52+
53+
// The zone in which to create the node.
54+
// For more information about supported TPU types for specific zones,
55+
// see https://cloud.google.com/tpu/docs/regions-zones
56+
// zone = 'europe-west4-a';
57+
58+
// The accelerator type that specifies the version and size of the node you want to create.
59+
// For more information about supported accelerator types for each TPU version,
60+
// see https://cloud.google.com/tpu/docs/system-architecture-tpu-vm#versions.
61+
// tpuType = 'v2-8';
62+
63+
// Software version that specifies the version of the node runtime to install. For more information,
64+
// see https://cloud.google.com/tpu/docs/runtimes
65+
// tpuSoftwareVersion = 'tpu-vm-tf-2.14.1';
66+
67+
async function callCreateQueuedResourceStartupScript() {
68+
// Create a node
69+
const node = new Node({
70+
name: nodeName,
71+
zone,
72+
acceleratorType: tpuType,
73+
runtimeVersion: tpuSoftwareVersion,
74+
// Define network
75+
networkConfig: new NetworkConfig({
76+
enableExternalIps: true,
77+
network: `projects/${projectId}/global/networks/${networkName}`,
78+
subnetwork: `projects/${projectId}/regions/${region}/subnetworks/${networkName}`,
79+
}),
80+
queuedResource: `projects/${projectId}/locations/${zone}/queuedResources/${queuedResourceName}`,
81+
metadata: {
82+
// The script updates numpy to the latest version and logs the output to a file.
83+
'startup-script': `#!/bin/bash
84+
echo "Hello World" > /var/log/hello.log
85+
sudo pip3 install --upgrade numpy >> /var/log/hello.log 2>&1`,
86+
},
87+
});
88+
89+
// Define parent for requests
90+
const parent = `projects/${projectId}/locations/${zone}`;
91+
92+
// Create queued resource
93+
const queuedResource = new QueuedResource({
94+
name: queuedResourceName,
95+
tpu: {
96+
nodeSpec: [
97+
{
98+
parent,
99+
node,
100+
nodeId: nodeName,
101+
},
102+
],
103+
},
104+
});
105+
106+
const request = {
107+
parent: `projects/${projectId}/locations/${zone}`,
108+
queuedResource,
109+
queuedResourceId: queuedResourceName,
110+
};
111+
112+
const [operation] = await tpuClient.createQueuedResource(request);
113+
114+
// Wait for the create operation to complete.
115+
await operation.promise();
116+
117+
// You can wait until TPU Node is READY,
118+
// and check its status using getTpuVm() from `tpu_vm_get` sample.
119+
console.log(
120+
`Queued resource ${queuedResourceName} with start-up script created.`
121+
);
122+
}
123+
await callCreateQueuedResourceStartupScript();
124+
// [END tpu_queued_resources_startup_script]
125+
}
126+
127+
main(...process.argv.slice(2)).catch(err => {
128+
console.error(err);
129+
process.exitCode = 1;
130+
});
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
/*
2+
* Copyright 2024 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* https://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
'use strict';
18+
19+
async function main(queuedResourceName, zone) {
20+
// [START tpu_queued_resources_delete_force]
21+
// Import the TPU library
22+
const {TpuClient} = require('@google-cloud/tpu').v2alpha1;
23+
24+
// Instantiate a tpuClient
25+
const tpuClient = new TpuClient();
26+
27+
/**
28+
* TODO(developer): Update/uncomment these variables before running the sample.
29+
*/
30+
// Project ID or project number of the Google Cloud project, where you want to delete node.
31+
const projectId = await tpuClient.getProjectId();
32+
33+
// The name of queued resource.
34+
// queuedResourceName = 'queued-resource-1';
35+
36+
// The zone of your queued resource.
37+
// zone = 'europe-west4-a';
38+
39+
async function callForceDeleteQueuedResource() {
40+
const request = {
41+
name: `projects/${projectId}/locations/${zone}/queuedResources/${queuedResourceName}`,
42+
force: true,
43+
};
44+
45+
const [operation] = await tpuClient.deleteQueuedResource(request);
46+
47+
// Wait for the delete operation to complete.
48+
await operation.promise();
49+
50+
console.log(`Queued resource ${queuedResourceName} deletion forced.`);
51+
}
52+
await callForceDeleteQueuedResource();
53+
// [END tpu_queued_resources_delete_force]
54+
}
55+
56+
main(...process.argv.slice(2)).catch(err => {
57+
console.error(err);
58+
process.exitCode = 1;
59+
});

tpu/test/.eslintrc

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
---
2+
env:
3+
mocha: true
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
/*
2+
* Copyright 2024 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* https://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
'use strict';
18+
19+
const path = require('path');
20+
const assert = require('node:assert/strict');
21+
const {after, describe, it} = require('mocha');
22+
const cp = require('child_process');
23+
24+
const execSync = cmd => cp.execSync(cmd, {encoding: 'utf-8'});
25+
const cwd = path.join(__dirname, '..');
26+
27+
describe('TPU queued resource with start-up script', async () => {
28+
const queuedResourceName = `queued-resource-startup-script-${Math.floor(Math.random() * 1000 + 1)}`;
29+
const nodeName = `node-startup-script-2a2b3c${Math.floor(Math.random() * 1000 + 1)}`;
30+
const zone = 'us-east1-d';
31+
const tpuType = 'v3-32';
32+
const tpuSoftwareVersion = 'tpu-vm-tf-2.14.1';
33+
34+
after(() => {
35+
// Delete queued resource
36+
execSync(
37+
`node ./queuedResources/forceDeleteQueuedResource.js ${queuedResourceName} ${zone}`,
38+
{
39+
cwd,
40+
}
41+
);
42+
});
43+
44+
it('should create queued resource with start-up script', () => {
45+
const response = execSync(
46+
`node ./queuedResources/createQueuedResourceStartupScript.js ${nodeName} ${queuedResourceName} ${zone} ${tpuType} ${tpuSoftwareVersion}`,
47+
{
48+
cwd,
49+
}
50+
);
51+
52+
assert(
53+
response.includes(
54+
`Queued resource ${queuedResourceName} with start-up script created.`
55+
)
56+
);
57+
});
58+
});

0 commit comments

Comments
 (0)