@@ -239,6 +239,7 @@ def tune_model(
239
239
accelerator_count : Optional [int ] = None ,
240
240
accelerator_type : Optional [_ACCELERATOR_TYPE_TYPE ] = None ,
241
241
max_context_length : Optional [str ] = None ,
242
+ output_dimensionality : Optional [int ] = None ,
242
243
) -> "_LanguageModelTuningJob" :
243
244
"""Tunes a model based on training data.
244
245
@@ -273,6 +274,8 @@ def tune_model(
273
274
accelerator_type: Type of accelerator to use. Type can be "TPU" or "GPU". Type is ignored, if accelerator is specified.
274
275
max_context_length: The max context length used for tuning.
275
276
Can be either '8k' or '32k'
277
+ output_dimensionality: The output dimensionality of the tuned model,
278
+ for text embedding tuning.
276
279
277
280
Returns:
278
281
A `LanguageModelTuningJob` object that represents the tuning job.
@@ -293,6 +296,8 @@ def tune_model(
293
296
tuning_parameters ["batch_size" ] = batch_size
294
297
if train_steps is not None :
295
298
tuning_parameters ["train_steps" ] = train_steps
299
+ if output_dimensionality is not None :
300
+ tuning_parameters ["output_dimensionality" ] = output_dimensionality
296
301
if learning_rate is not None :
297
302
_LOGGER .warning (
298
303
"The learning_rate parameter is deprecated."
@@ -2189,7 +2194,7 @@ async def get_embeddings_async(
2189
2194
# for corpus, queries, test and validation data.
2190
2195
# TODO(b/625884109): Validate input args, batch_size >0 and train_steps >30, and
2191
2196
# task_type must be 'DEFAULT' or None if _model_id is textembedding-gecko@001.
2192
- class _TunableTextEmbeddingModelMixin (_TunableModelMixin ):
2197
+ class _PreviewTunableTextEmbeddingModelMixin (_TunableModelMixin ):
2193
2198
@classmethod
2194
2199
def get_tuned_model (cls , * args , ** kwargs ):
2195
2200
del args , kwargs # Unused.
@@ -2213,7 +2218,9 @@ def tune_model(
2213
2218
machine_type : Optional [str ] = None ,
2214
2219
accelerator : Optional [str ] = None ,
2215
2220
accelerator_count : Optional [int ] = None ,
2216
- ) -> "_LanguageModelTuningJob" :
2221
+ output_dimensionality : Optional [int ] = None ,
2222
+ learning_rate_multiplier : Optional [float ] = None ,
2223
+ ) -> "_TextEmbeddingModelTuningJob" :
2217
2224
"""Tunes a model based on training data.
2218
2225
2219
2226
This method launches and returns an asynchronous model tuning job.
@@ -2229,14 +2236,30 @@ def tune_model(
2229
2236
queries_data: URI pointing to data in JSON lines format.
2230
2237
test_data: URI pointing to data in TSV format.
2231
2238
validation_data: URI pointing to data in TSV format.
2232
- batch_size: Size of batch.
2233
- train_steps: Number of training batches to tune on.
2239
+ batch_size: The training batch size.
2240
+ train_steps: The number of steps to perform model tuning. Must
2241
+ be greater than 30.
2234
2242
tuned_model_location: GCP location where the tuned model should be deployed.
2235
2243
model_display_name: Custom display name for the tuned model.
2236
- task_type: Type of task. Can be "RETRIEVAL_QUERY", "RETRIEVAL_DOCUMENT", "SEMANTIC_SIMILARITY", "CLASSIFICATION", "CLUSTERING", "QUESTION_ANSWERING", or "FACT_VERIFICATION".
2237
- machine_type: Machine type. E.g., "a2-highgpu-1g". See also: https://cloud.google.com/vertex-ai/docs/training/configure-compute.
2238
- accelerator_count: Count of accelerators.
2239
- accelerator: Kind of accelerator. E.g., "NVIDIA_TESLA_A100". See also: https://cloud.google.com/vertex-ai/docs/training/configure-compute.
2244
+ task_type: The task type expected to be used during inference.
2245
+ Valid values are `DEFAULT`, `RETRIEVAL_QUERY`, `RETRIEVAL_DOCUMENT`,
2246
+ `SEMANTIC_SIMILARITY`, `CLASSIFICATION`, `CLUSTERING`,
2247
+ `FACT_VERIFICATION`, and `QUESTION_ANSWERING`.
2248
+ machine_type: The machine type to use for training. For information
2249
+ about selecting the machine type that matches the accelerator
2250
+ type and count you have selected, see
2251
+ https://cloud.google.com/compute/docs/gpus.
2252
+ accelerator: The accelerator type to use for tuning, for example
2253
+ `NVIDIA_TESLA_V100`. For possible values, see
2254
+ https://cloud.google.com/vertex-ai/generative-ai/docs/models/tune-embeddings#using-accelerators.
2255
+ accelerator_count: The number of accelerators to use when training.
2256
+ Using a greater number of accelerators may make training faster,
2257
+ but has no effect on quality.
2258
+ output_dimensionality: The desired embedding dimension of your
2259
+ tuned model, up to 768. This is only supported for models
2260
+ `text-embedding-004` and `text-multilingual-embedding-002`.
2261
+ learning_rate_multiplier: A multiplier to apply to the
2262
+ recommended learning rate during tuning.
2240
2263
Returns:
2241
2264
A `LanguageModelTuningJob` object that represents the tuning job.
2242
2265
Calling `job.result()` blocks until the tuning is complete and returns a `LanguageModel` object.
@@ -2260,6 +2283,8 @@ def tune_model(
2260
2283
machine_type = machine_type ,
2261
2284
accelerator = accelerator ,
2262
2285
accelerator_count = accelerator_count ,
2286
+ output_dimensionality = output_dimensionality ,
2287
+ learning_rate_multiplier = learning_rate_multiplier ,
2263
2288
)
2264
2289
2265
2290
def _bundle_up_tuning_job (self , pipeline_job ):
@@ -2318,14 +2343,95 @@ def deploy_tuned_model(
2318
2343
return model
2319
2344
2320
2345
2346
+ class _TunableTextEmbeddingModelMixin (_PreviewTunableTextEmbeddingModelMixin ):
2347
+ def tune_model (
2348
+ self ,
2349
+ * ,
2350
+ training_data : Optional [str ] = None ,
2351
+ corpus_data : Optional [str ] = None ,
2352
+ queries_data : Optional [str ] = None ,
2353
+ test_data : Optional [str ] = None ,
2354
+ validation_data : Optional [str ] = None ,
2355
+ batch_size : Optional [int ] = None ,
2356
+ train_steps : Optional [int ] = None ,
2357
+ tuned_model_location : Optional [str ] = None ,
2358
+ model_display_name : Optional [str ] = None ,
2359
+ task_type : Optional [str ] = None ,
2360
+ machine_type : Optional [str ] = None ,
2361
+ accelerator : Optional [str ] = None ,
2362
+ accelerator_count : Optional [int ] = None ,
2363
+ ) -> "_TextEmbeddingModelTuningJob" :
2364
+ """Tunes a model based on training data.
2365
+
2366
+ This method launches and returns an asynchronous model tuning job.
2367
+ Usage:
2368
+ ```
2369
+ tuning_job = model.tune_model(...)
2370
+ ... do some other work
2371
+ tuned_model = tuning_job.get_tuned_model() # Blocks until tuning is complete
2372
+
2373
+ Args:
2374
+ training_data: URI pointing to training data in TSV format.
2375
+ corpus_data: URI pointing to data in JSON lines format.
2376
+ queries_data: URI pointing to data in JSON lines format.
2377
+ test_data: URI pointing to data in TSV format.
2378
+ validation_data: URI pointing to data in TSV format.
2379
+ batch_size: The training batch size.
2380
+ train_steps: The number of steps to perform model tuning. Must
2381
+ be greater than 30.
2382
+ tuned_model_location: GCP location where the tuned model should be deployed.
2383
+ model_display_name: Custom display name for the tuned model.
2384
+ task_type: The task type expected to be used during inference.
2385
+ Valid values are `DEFAULT`, `RETRIEVAL_QUERY`, `RETRIEVAL_DOCUMENT`,
2386
+ `SEMANTIC_SIMILARITY`, `CLASSIFICATION`, `CLUSTERING`,
2387
+ `FACT_VERIFICATION`, and `QUESTION_ANSWERING`.
2388
+ machine_type: The machine type to use for training. For information
2389
+ about selecting the machine type that matches the accelerator
2390
+ type and count you have selected, see
2391
+ https://cloud.google.com/compute/docs/gpus.
2392
+ accelerator: The accelerator type to use for tuning, for example
2393
+ `NVIDIA_TESLA_V100`. For possible values, see
2394
+ https://cloud.google.com/vertex-ai/generative-ai/docs/models/tune-embeddings#using-accelerators.
2395
+ accelerator_count: The number of accelerators to use when training.
2396
+ Using a greater number of accelerators may make training faster,
2397
+ but has no effect on quality.
2398
+ Returns:
2399
+ A `LanguageModelTuningJob` object that represents the tuning job.
2400
+ Calling `job.result()` blocks until the tuning is complete and
2401
+ returns a `LanguageModel` object.
2402
+
2403
+ Raises:
2404
+ ValueError: If the provided parameter combinations or values are not
2405
+ supported.
2406
+ RuntimeError: If the model does not support tuning
2407
+ """
2408
+
2409
+ return super ().tune_model (
2410
+ training_data = training_data ,
2411
+ corpus_data = corpus_data ,
2412
+ queries_data = queries_data ,
2413
+ test_data = test_data ,
2414
+ validation_data = validation_data ,
2415
+ task_type = task_type ,
2416
+ batch_size = batch_size ,
2417
+ train_steps = train_steps ,
2418
+ tuned_model_location = tuned_model_location ,
2419
+ model_display_name = model_display_name ,
2420
+ machine_type = machine_type ,
2421
+ accelerator = accelerator ,
2422
+ accelerator_count = accelerator_count ,
2423
+ )
2424
+
2425
+
2321
2426
class TextEmbeddingModel (_TextEmbeddingModel , _TunableTextEmbeddingModelMixin ):
2322
2427
__module__ = "vertexai.language_models"
2323
2428
2324
2429
2325
2430
class _PreviewTextEmbeddingModel (
2326
- TextEmbeddingModel ,
2431
+ _TextEmbeddingModel ,
2327
2432
_ModelWithBatchPredict ,
2328
2433
_CountTokensMixin ,
2434
+ _PreviewTunableTextEmbeddingModelMixin ,
2329
2435
):
2330
2436
__name__ = "TextEmbeddingModel"
2331
2437
__module__ = "vertexai.preview.language_models"
0 commit comments