Skip to content

feat: New Queued resources Samples: Create, Create Spot, Get, Delete operations + tests #12716

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Dec 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion tpu/create_tpu_topology.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def create_cloud_tpu_with_topology(
node = tpu_v2.Node()
# Here we are creating a TPU v3-8 with 2x2 topology.
node.accelerator_config = tpu_v2.AcceleratorConfig(
type_=tpu_v2.AcceleratorConfig.Type.V3,
type_=tpu_v2.AcceleratorConfig.Type.V2,
topology="2x2",
)
node.runtime_version = runtime_version
Expand Down
2 changes: 1 addition & 1 deletion tpu/delete_tpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,4 +45,4 @@ def delete_cloud_tpu(project_id: str, zone: str, tpu_name: str = "tpu-name") ->
if __name__ == "__main__":
PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")
ZONE = "us-central1-b"
delete_cloud_tpu(PROJECT_ID, ZONE, "tpu-name12")
delete_cloud_tpu(PROJECT_ID, ZONE, "tpu-name")
80 changes: 80 additions & 0 deletions tpu/queued_resources_create.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os

from google.cloud.tpu_v2alpha1 import CreateQueuedResourceRequest, Node


def create_queued_resource(
project_id: str,
zone: str,
tpu_name: str,
tpu_type: str = "v2-8",
runtime_version: str = "tpu-vm-tf-2.17.0-pjrt",
queued_resource_name: str = "resource-name",
) -> Node:
# [START tpu_queued_resources_create]
from google.cloud import tpu_v2alpha1

# TODO(developer): Update and un-comment below lines
# project_id = "your-project-id"
# zone = "us-central1-b"
# tpu_name = "tpu-name"
# tpu_type = "v2-8"
# runtime_version = "tpu-vm-tf-2.17.0-pjrt"
# queued_resource_name = "resource-name"

node = tpu_v2alpha1.Node()
node.accelerator_type = tpu_type
# To see available runtime version use command:
# gcloud compute tpus versions list --zone={ZONE}
node.runtime_version = runtime_version

node_spec = tpu_v2alpha1.QueuedResource.Tpu.NodeSpec()
node_spec.parent = f"projects/{project_id}/locations/{zone}"
node_spec.node_id = tpu_name
node_spec.node = node

resource = tpu_v2alpha1.QueuedResource()
resource.tpu = tpu_v2alpha1.QueuedResource.Tpu(node_spec=[node_spec])

request = CreateQueuedResourceRequest(
parent=f"projects/{project_id}/locations/{zone}",
queued_resource_id=queued_resource_name,
queued_resource=resource,
)

client = tpu_v2alpha1.TpuClient()
operation = client.create_queued_resource(request=request)

response = operation.result()
print(response.name)
print(response.state.state)
# Example response:
# projects/[project_id]/locations/[zone]/queuedResources/resource-name
# State.WAITING_FOR_RESOURCES

# [END tpu_queued_resources_create]
return response


if __name__ == "__main__":
PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")
ZONE = "us-central1-b"
create_queued_resource(
project_id=PROJECT_ID,
zone=ZONE,
tpu_name="tpu-name",
queued_resource_name="resource-name",
)
90 changes: 90 additions & 0 deletions tpu/queued_resources_create_network.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os

from google.cloud.tpu_v2alpha1 import CreateQueuedResourceRequest, Node


def create_queued_resource_network(
project_id: str,
zone: str,
tpu_name: str,
tpu_type: str = "v2-8",
runtime_version: str = "tpu-vm-tf-2.17.0-pjrt",
queued_resource_name: str = "resource-name",
network: str = "default",
) -> Node:
# [START tpu_queued_resources_network]
from google.cloud import tpu_v2alpha1

# TODO(developer): Update and un-comment below lines
# project_id = "your-project-id"
# zone = "us-central1-b"
# tpu_name = "tpu-name"
# tpu_type = "v2-8"
# runtime_version = "tpu-vm-tf-2.17.0-pjrt"
# queued_resource_name = "resource-name"
# network = "default"

node = tpu_v2alpha1.Node()
node.accelerator_type = tpu_type
node.runtime_version = runtime_version
# Setting network configuration
node.network_config = tpu_v2alpha1.NetworkConfig(
network=network, # Update if you want to use a specific network
subnetwork="default", # Update if you want to use a specific subnetwork
enable_external_ips=True,
can_ip_forward=True,
)

node_spec = tpu_v2alpha1.QueuedResource.Tpu.NodeSpec()
node_spec.parent = f"projects/{project_id}/locations/{zone}"
node_spec.node_id = tpu_name
node_spec.node = node

resource = tpu_v2alpha1.QueuedResource()
resource.tpu = tpu_v2alpha1.QueuedResource.Tpu(node_spec=[node_spec])

request = CreateQueuedResourceRequest(
parent=f"projects/{project_id}/locations/{zone}",
queued_resource_id=queued_resource_name,
queued_resource=resource,
)

client = tpu_v2alpha1.TpuClient()
operation = client.create_queued_resource(request=request)

response = operation.result()
print(response.name)
print(response.tpu.node_spec[0].node.network_config)
print(resource.tpu.node_spec[0].node.network_config.network == "default")
# Example response:
# network: "default"
# subnetwork: "default"
# enable_external_ips: true
# can_ip_forward: true

# [END tpu_queued_resources_network]
return response


if __name__ == "__main__":
PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")
ZONE = "us-central1-b"
create_queued_resource_network(
project_id=PROJECT_ID,
zone=ZONE,
tpu_name="tpu-name",
queued_resource_name="resource-name",
)
82 changes: 82 additions & 0 deletions tpu/queued_resources_create_spot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os

from google.cloud.tpu_v2alpha1 import CreateQueuedResourceRequest, Node


def create_queued_resource_spot(
project_id: str,
zone: str,
tpu_name: str,
tpu_type: str = "v2-8",
runtime_version: str = "tpu-vm-tf-2.17.0-pjrt",
queued_resource_name: str = "resource-name",
) -> Node:
# [START tpu_queued_resources_create_spot]
from google.cloud import tpu_v2alpha1

# TODO(developer): Update and un-comment below lines
# project_id = "your-project-id"
# zone = "us-central1-b"
# tpu_name = "tpu-name"
# tpu_type = "v2-8"
# runtime_version = "tpu-vm-tf-2.17.0-pjrt"
# queued_resource_name = "resource-name"

node = tpu_v2alpha1.Node()
node.accelerator_type = tpu_type
# To see available runtime version use command:
# gcloud compute tpus versions list --zone={ZONE}
node.runtime_version = runtime_version

node_spec = tpu_v2alpha1.QueuedResource.Tpu.NodeSpec()
node_spec.parent = f"projects/{project_id}/locations/{zone}"
node_spec.node_id = tpu_name
node_spec.node = node

resource = tpu_v2alpha1.QueuedResource()
resource.tpu = tpu_v2alpha1.QueuedResource.Tpu(node_spec=[node_spec])
# Create a spot resource
resource.spot = tpu_v2alpha1.QueuedResource.Spot()

request = CreateQueuedResourceRequest(
parent=f"projects/{project_id}/locations/{zone}",
queued_resource_id=queued_resource_name,
queued_resource=resource,
)

client = tpu_v2alpha1.TpuClient()
operation = client.create_queued_resource(request=request)
response = operation.result()

print(response.name)
print(response.state.state)
# Example response:
# projects/[project_id]/locations/[zone]/queuedResources/resource-name
# State.WAITING_FOR_RESOURCES

# [END tpu_queued_resources_create_spot]
return response


if __name__ == "__main__":
PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")
ZONE = "us-central1-b"
create_queued_resource_spot(
project_id=PROJECT_ID,
zone=ZONE,
tpu_name="tpu-name",
queued_resource_name="resource-name",
)
93 changes: 93 additions & 0 deletions tpu/queued_resources_create_startup_script.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os

from google.cloud.tpu_v2alpha1 import CreateQueuedResourceRequest, Node


def create_queued_resource_startup_script(
project_id: str,
zone: str,
tpu_name: str,
tpu_type: str = "v2-8",
runtime_version: str = "tpu-vm-tf-2.17.0-pjrt",
queued_resource_name: str = "resource-name",
) -> Node:
# [START tpu_queued_resources_startup_script]
from google.cloud import tpu_v2alpha1

# TODO(developer): Update and un-comment below lines
# project_id = "your-project-id"
# zone = "us-central1-b"
# tpu_name = "tpu-name"
# tpu_type = "v2-8"
# runtime_version = "tpu-vm-tf-2.17.0-pjrt"
# queued_resource_name = "resource-name"

node = tpu_v2alpha1.Node()
node.accelerator_type = tpu_type
# To see available runtime version use command:
# gcloud compute tpus versions list --zone={ZONE}
node.runtime_version = runtime_version
# This startup script updates numpy to the latest version and logs the output to a file.
script = {
"startup-script": """#!/bin/bash
echo "Hello World" > /var/log/hello.log
sudo pip3 install --upgrade numpy >> /var/log/hello.log 2>&1
"""
}
node.metadata = script
# Enabling external IPs for internet access from the TPU node for updating numpy
node.network_config = tpu_v2alpha1.NetworkConfig(
enable_external_ips=True,
)

node_spec = tpu_v2alpha1.QueuedResource.Tpu.NodeSpec()
node_spec.parent = f"projects/{project_id}/locations/{zone}"
node_spec.node_id = tpu_name
node_spec.node = node

resource = tpu_v2alpha1.QueuedResource()
resource.tpu = tpu_v2alpha1.QueuedResource.Tpu(node_spec=[node_spec])

request = CreateQueuedResourceRequest(
parent=f"projects/{project_id}/locations/{zone}",
queued_resource_id=queued_resource_name,
queued_resource=resource,
)

client = tpu_v2alpha1.TpuClient()
operation = client.create_queued_resource(request=request)

response = operation.result()
print(response.name)
print(response.tpu.node_spec[0].node.metadata)
# Example response:
# projects/[project_id]/locations/[zone]/queuedResources/resource-name
# {'startup-script': '#!/bin/bash\n echo "Hello World" > /var/log/hello.log\n
# sudo pip3 install --upgrade numpy >> /var/log/hello.log 2>&1\n '}

# [END tpu_queued_resources_startup_script]
return response


if __name__ == "__main__":
PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")
ZONE = "us-central1-b"
create_queued_resource_startup_script(
project_id=PROJECT_ID,
zone=ZONE,
tpu_name="tpu-name",
queued_resource_name="resource-name",
)
Loading