minor cosmetic updates to lpg and checkpoint conversion syntax (#846)

vivianrwu · web-flow · commit b0588cc70373 · 2024-10-08T11:34:01.000-07:00
minor updates to lpg and checkpoint conversion
diff --git a/benchmarks/benchmark/tools/profile-generator/README.md b/benchmarks/benchmark/tools/profile-generator/README.md
@@ -25,7 +25,7 @@ It currently supports the following frameworks:
 - text generation inference (tgi)
 - vllm
 - sax
--jetstream
+- jetstream
 
 ## Instructions
 
@@ -50,6 +50,12 @@ not logged into gcloud, run the following:
 gcloud auth application-default login
 ```
 
+If you do not already have an output bucket, create one by running:
+
+```
+gcloud storage buckets create gs://OUTPUT_BUCKET
+```
+
 To give viewer permissions on the gcs bucket to the gcloud service account,
 run the following:
 
@@ -88,13 +94,13 @@ gcloud artifacts repositories create ai-benchmark --location=us-central1 --repos
 
 ### Step 4: create and configure terraform.tfvars
 
-Create a `terraform.tfvars` file. `./sample-tfvars` is provided as an example
+Create a `terraform.tfvars` file. `./sample.tfvars` is provided as an example
 file. You can copy the file as a starting point.
 Note that at a minimum you will have to change the existing
 `credentials_config`, `project_id`, and `artifact_registry`.
 
 ```bash
-cp ./sample-tfvars terraform.tfvars
+cp ./sample.tfvars terraform.tfvars
 ```
 
 Fill out your `terraform.tfvars` with the desired model and server configuration, referring to the list of required and optional variables [here](#variables). The following variables are required:
diff --git a/benchmarks/benchmark/tools/profile-generator/container/benchmark_serving.py b/benchmarks/benchmark/tools/profile-generator/container/benchmark_serving.py
@@ -555,8 +555,8 @@ def main(args: argparse.Namespace):
 
     # NOTE: The latency below includes requests awaiting time on server side.
     # It's not comparable with the model inference latency for batch size 1.
-    **(get_stats_for_set("latency", "Average milliseconds/request (includes waiting time on server)" ,[1000 * latency for _, _, latency in REQUEST_LATENCY])),
-    **(get_stats_for_set("per_output_token_latency", "Average milliseconds/output_token (includes waiting time on server)", [1000 * latency / output_len for _, output_len, latency in REQUEST_LATENCY])),
+    **(get_stats_for_set("latency", "milliseconds/request (includes waiting time on server)" ,[1000 * latency for _, _, latency in REQUEST_LATENCY])),
+    **(get_stats_for_set("per_output_token_latency", "milliseconds/output_token (includes waiting time on server)", [1000 * latency / output_len for _, output_len, latency in REQUEST_LATENCY])),
     **(get_stats_for_set("input_len", "input length", [float(prompt_len) for prompt_len, _, _ in REQUEST_LATENCY])),
     **(get_stats_for_set("output_len", "output length", [float(output_len) for _, output_len, _ in REQUEST_LATENCY]))
   }
diff --git a/tutorials-and-examples/inference-servers/checkpoints/checkpoint_converter.sh b/tutorials-and-examples/inference-servers/checkpoints/checkpoint_converter.sh
@@ -114,7 +114,7 @@ convert_maxtext_checkpoint() {
     git checkout ${VERSION}
     python3 -m pip install -r requirements.txt
 
-    if [ $VERSION == "jetstream-v0.2.2" || $VERSION == "jetstream-v0.2.1" || $VERSION == "jetstream-v0.2.0" ]; then
+    if [[ $VERSION == "jetstream-v0.2.2" || $VERSION == "jetstream-v0.2.1" || $VERSION == "jetstream-v0.2.0" ]]; then
         pip3 install orbax-checkpoint==0.5.20
     else
         pip3 install orbax-checkpoint==0.6.0