Skip to content

feat(tpu): add tpu vm create spot sample. #9610

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 28 commits into from
Dec 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
b951997
Changed package, added information to CODEOWNERS
TetyanaYahodska Oct 15, 2024
9ee20d7
Added information to CODEOWNERS
TetyanaYahodska Oct 15, 2024
f0b8314
Added timeout
TetyanaYahodska Oct 16, 2024
055d61e
Merge branch 'main' into tpu-vm-crud-operations
TetyanaYahodska Oct 16, 2024
d3e1dee
Fixed parameters for test
TetyanaYahodska Oct 16, 2024
2253b54
Fixed DeleteTpuVm and naming
TetyanaYahodska Oct 17, 2024
d29a6b5
Added comment, created Util class
TetyanaYahodska Oct 18, 2024
d832b31
Merge branch 'main' into tpu-vm-crud-operations
TetyanaYahodska Oct 23, 2024
6956852
Fixed naming
TetyanaYahodska Oct 23, 2024
478beaa
Fixed whitespace
TetyanaYahodska Oct 23, 2024
f6b76cc
Merge branch 'main' into tpu-vm-crud-operations
TetyanaYahodska Oct 29, 2024
ec13f4d
Split PR into smaller, deleted redundant code
TetyanaYahodska Oct 29, 2024
af6e29e
Implemented tpu_vm_create_spot sample, created test
TetyanaYahodska Oct 29, 2024
5c78d5f
changed zone
TetyanaYahodska Oct 29, 2024
857f87f
Merged changes from main
TetyanaYahodska Oct 30, 2024
803a5d9
Changed zone
TetyanaYahodska Oct 30, 2024
f5adfaf
Merge branch 'main' into tpu_vm_create_spot
TetyanaYahodska Oct 31, 2024
d55c212
Fixed empty lines and tests, deleted cleanup method
TetyanaYahodska Oct 31, 2024
51e17cd
Changed zone
TetyanaYahodska Nov 1, 2024
b734024
Merged changes from main
TetyanaYahodska Nov 7, 2024
e0dd3f6
Fixed tests
TetyanaYahodska Nov 18, 2024
623e97d
Deleted redundant test class
TetyanaYahodska Nov 18, 2024
a2981ef
Increased timeout
TetyanaYahodska Nov 18, 2024
996c3d9
Merged changes from main
TetyanaYahodska Nov 20, 2024
2d0fc31
Fixed test
TetyanaYahodska Nov 20, 2024
efecb9a
Merge branch 'main' into tpu_vm_create_spot
TetyanaYahodska Nov 27, 2024
4e8f388
Resolved conflict
TetyanaYahodska Dec 9, 2024
8f0f84f
Resolved conflict
TetyanaYahodska Dec 10, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 80 additions & 0 deletions tpu/src/main/java/tpu/CreateSpotTpuVm.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
/*
* Copyright 2024 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package tpu;

//[START tpu_vm_create_spot]
import com.google.cloud.tpu.v2.CreateNodeRequest;
import com.google.cloud.tpu.v2.Node;
import com.google.cloud.tpu.v2.SchedulingConfig;
import com.google.cloud.tpu.v2.TpuClient;
import java.io.IOException;
import java.util.concurrent.ExecutionException;

public class CreateSpotTpuVm {
public static void main(String[] args)
throws IOException, ExecutionException, InterruptedException {
// TODO(developer): Replace these variables before running the sample.
// Project ID or project number of the Google Cloud project you want to create a node.
String projectId = "YOUR_PROJECT_ID";
// The zone in which to create the TPU.
// For more information about supported TPU types for specific zones,
// see https://cloud.google.com/tpu/docs/regions-zones
String zone = "us-central1-f";
// The name for your TPU.
String nodeName = "YOUR_TPY_NAME";
// The accelerator type that specifies the version and size of the Cloud TPU you want to create.
// For more information about supported accelerator types for each TPU version,
// see https://cloud.google.com/tpu/docs/system-architecture-tpu-vm#versions.
String tpuType = "v2-8";
// Software version that specifies the version of the TPU runtime to install.
// For more information see https://cloud.google.com/tpu/docs/runtimes
String tpuSoftwareVersion = "tpu-vm-tf-2.14.1";

createSpotTpuVm(projectId, zone, nodeName, tpuType, tpuSoftwareVersion);
}

// Creates a preemptible TPU VM with the specified name, zone, accelerator type, and version.
public static Node createSpotTpuVm(
String projectId, String zone, String nodeName, String tpuType, String tpuSoftwareVersion)
throws IOException, ExecutionException, InterruptedException {
// Initialize client that will be used to send requests. This client only needs to be created
// once, and can be reused for multiple requests.
try (TpuClient tpuClient = TpuClient.create()) {
String parent = String.format("projects/%s/locations/%s", projectId, zone);
// TODO: Wait for update of library to change preemptible to spot=True
SchedulingConfig schedulingConfig = SchedulingConfig.newBuilder()
.setPreemptible(true)
.build();

Node tpuVm = Node.newBuilder()
.setName(nodeName)
.setAcceleratorType(tpuType)
.setRuntimeVersion(tpuSoftwareVersion)
.setSchedulingConfig(schedulingConfig)
.build();

CreateNodeRequest request = CreateNodeRequest.newBuilder()
.setParent(parent)
.setNodeId(nodeName)
.setNode(tpuVm)
.build();

return tpuClient.createNodeAsync(request).get();
}
}
}
//[END tpu_vm_create_spot]
23 changes: 23 additions & 0 deletions tpu/src/test/java/tpu/TpuVmIT.java
Original file line number Diff line number Diff line change
Expand Up @@ -210,4 +210,27 @@ public void testStopTpuVm() throws IOException, ExecutionException, InterruptedE
assertEquals(returnedNode, mockNode);
}
}

@Test
public void testCreateSpotTpuVm() throws Exception {
try (MockedStatic<TpuClient> mockedTpuClient = mockStatic(TpuClient.class)) {
Node mockNode = mock(Node.class);
TpuClient mockTpuClient = mock(TpuClient.class);
OperationFuture mockFuture = mock(OperationFuture.class);

mockedTpuClient.when(TpuClient::create).thenReturn(mockTpuClient);
when(mockTpuClient.createNodeAsync(any(CreateNodeRequest.class)))
.thenReturn(mockFuture);
when(mockFuture.get()).thenReturn(mockNode);

Node returnedNode = CreateSpotTpuVm.createSpotTpuVm(
PROJECT_ID, ZONE, NODE_NAME,
TPU_TYPE, TPU_SOFTWARE_VERSION);

verify(mockTpuClient, times(1))
.createNodeAsync(any(CreateNodeRequest.class));
verify(mockFuture, times(1)).get();
assertEquals(returnedNode, mockNode);
}
}
}
Loading