|
6 | 6 | "source": [
|
7 | 7 | "# Test Training Operator Integration\n",
|
8 | 8 | "\n",
|
9 |
| - "This example notebook is loosely based on [this](https://github.com/kubeflow/training-operator/blob/master/sdk/python/examples/kubeflow-tfjob-sdk.ipynb) upstream example.\n", |
| 9 | + "This example notebook is loosely based on the following upstream examples:\n", |
| 10 | + "* [TFJob](https://github.com/kubeflow/training-operator/blob/964a6e836eedff11edfe79cc9f4e5b7c623cbe88/examples/tensorflow/image-classification/create-tfjob.ipynb)\n", |
| 11 | + "* [PyTorchJob](https://github.com/kubeflow/training-operator/blob/964a6e836eedff11edfe79cc9f4e5b7c623cbe88/examples/pytorch/image-classification/create-pytorchjob.ipynb)\n", |
| 12 | + "* [PaddleJob](https://github.com/kubeflow/training-operator/blob/964a6e836eedff11edfe79cc9f4e5b7c623cbe88/examples/paddlepaddle/simple-cpu.yaml)\n", |
10 | 13 | "\n",
|
11 |
| - "- create training job of type: TFJob, PyTorchJob, and PaddleJob\n", |
| 14 | + "Note that the above can get out of sync with the actual testing upstream does, so make sure to also check out [upstream E2E tests](https://github.com/kubeflow/training-operator/tree/964a6e836eedff11edfe79cc9f4e5b7c623cbe88/sdk/python/test/e2e) for updating the notebook.\n", |
| 15 | + "\n", |
| 16 | + "The workflow for each job (TFJob, PyTorchJob, and PaddleJob) is:\n", |
| 17 | + "- create training job\n", |
12 | 18 | "- monitor its execution\n",
|
13 | 19 | "- get training logs\n",
|
14 | 20 | "- delete job"
|
|
142 | 148 | "source": [
|
143 | 149 | "### Define a TFJob\n",
|
144 | 150 | "\n",
|
145 |
| - "Define a TFJob object before deploying it. This TFJob is similar to [this](https://github.com/kubeflow/training-operator/blob/master/sdk/python/examples/kubeflow-tfjob-sdk.ipynb) example." |
| 151 | + "Define a TFJob object before deploying it." |
146 | 152 | ]
|
147 | 153 | },
|
148 | 154 | {
|
|
411 | 417 | "source": [
|
412 | 418 | "PYTORCHJOB_NAME = \"pytorch-dist-mnist-gloo\"\n",
|
413 | 419 | "PYTORCHJOB_CONTAINER = \"pytorch\"\n",
|
414 |
| - "PYTORCHJOB_IMAGE = \"gcr.io/kubeflow-ci/pytorch-dist-mnist-test:v1.0\"" |
| 420 | + "PYTORCHJOB_IMAGE = \"kubeflow/pytorch-dist-mnist:v1-3a360ba\"\n", |
| 421 | + "# The image above should be updated with each release with the latest available in the registry." |
415 | 422 | ]
|
416 | 423 | },
|
417 | 424 | {
|
|
633 | 640 | "source": [
|
634 | 641 | "### Define a PaddleJob\n",
|
635 | 642 | "\n",
|
636 |
| - "Define a PaddleJob object before deploying it. This PaddleJob is loosely based on [this](https://github.com/kubeflow/training-operator/blob/11b7a115e6538caeab405344af98f0d5b42a4c96/examples/paddlepaddle/simple-cpu.yaml) example." |
| 643 | + "Define a PaddleJob object before deploying it." |
637 | 644 | ]
|
638 | 645 | },
|
639 | 646 | {
|
|
644 | 651 | "source": [
|
645 | 652 | "PADDLEJOB_NAME = \"paddle-simple-cpu\"\n",
|
646 | 653 | "PADDLEJOB_CONTAINER = \"paddle\"\n",
|
647 |
| - "PADDLEJOB_IMAGE = \"registry.baidubce.com/paddlepaddle/paddle:2.4.0rc0-cpu\"" |
| 654 | + "PADDLEJOB_IMAGE = \"docker.io/paddlepaddle/paddle:2.4.0rc0-cpu\"" |
648 | 655 | ]
|
649 | 656 | },
|
650 | 657 | {
|
|
0 commit comments