diff --git a/dags/inference/maxtext_inference.py b/dags/inference/maxtext_inference.py index d0219fa14..e117aff50 100644 --- a/dags/inference/maxtext_inference.py +++ b/dags/inference/maxtext_inference.py @@ -457,7 +457,7 @@ "quant_mode": W_INT8_KV_INT8, "quantization": "int8", "quantize_kvcache": "true", - "per_device_batch_size": 258, + "per_device_batch_size": 128, "kv_quant_axis": "heads_and_dkv", "run_eval": True, },