Use model type to check local or remote model (opensearch-project#3597)

b4sjoo · rithin-pullela-aws · commit 60d540a0548b · 2025-03-04T17:03:39.000-08:00
* use model type to check local or remote model Signed-off-by: Sicheng Song <sicheng.song@outlook.com> * spotless Signed-off-by: Sicheng Song <sicheng.song@outlook.com> * Ignore test resource Signed-off-by: Sicheng Song <sicheng.song@outlook.com> * Add java doc Signed-off-by: Sicheng Song <sicheng.song@outlook.com> * Handle when model not in cache Signed-off-by: Sicheng Song <sicheng.song@outlook.com> * Handle when model not in cache Signed-off-by: Sicheng Song <sicheng.song@outlook.com> --------- Signed-off-by: Sicheng Song <sicheng.song@outlook.com> (cherry picked from commit 696b1e1)
diff --git a/plugin/src/main/java/org/opensearch/ml/rest/RestMLPredictionAction.java b/plugin/src/main/java/org/opensearch/ml/rest/RestMLPredictionAction.java
@@ -18,6 +18,7 @@
 import java.io.IOException;
 import java.util.List;
 import java.util.Locale;
+import java.util.Objects;
 import java.util.Optional;
 
 import org.opensearch.client.node.NodeClient;
@@ -82,27 +83,30 @@ public List<Route> routes() {
 
     @Override
     public RestChannelConsumer prepareRequest(RestRequest request, NodeClient client) throws IOException {
-        String algorithm = request.param(PARAMETER_ALGORITHM);
+        String userAlgorithm = request.param(PARAMETER_ALGORITHM);
         String modelId = getParameterId(request, PARAMETER_MODEL_ID);
         Optional<FunctionName> functionName = modelManager.getOptionalModelFunctionName(modelId);
 
-        if (algorithm == null && functionName.isPresent()) {
-            algorithm = functionName.get().name();
-        }
-
-        if (algorithm != null) {
-            MLPredictionTaskRequest mlPredictionTaskRequest = getRequest(modelId, algorithm, request);
-            return channel -> client
-                .execute(MLPredictionTaskAction.INSTANCE, mlPredictionTaskRequest, new RestToXContentListener<>(channel));
+        // check if the model is in cache
+        if (functionName.isPresent()) {
+            MLPredictionTaskRequest predictionRequest = getRequest(
+                modelId,
+                functionName.get().name(),
+                Objects.requireNonNullElse(userAlgorithm, functionName.get().name()),
+                request
+            );
+            return channel -> client.execute(MLPredictionTaskAction.INSTANCE, predictionRequest, new RestToXContentListener<>(channel));
         }
 
+        // If the model isn't in cache
         return channel -> {
             ActionListener<MLModel> listener = ActionListener.wrap(mlModel -> {
-                String algoName = mlModel.getAlgorithm().name();
+                String modelType = mlModel.getAlgorithm().name();
+                String modelAlgorithm = Objects.requireNonNullElse(userAlgorithm, mlModel.getAlgorithm().name());
                 client
                     .execute(
                         MLPredictionTaskAction.INSTANCE,
-                        getRequest(modelId, algoName, request),
+                        getRequest(modelId, modelType, modelAlgorithm, request),
                         new RestToXContentListener<>(channel)
                     );
             }, e -> {
@@ -120,17 +124,22 @@ public RestChannelConsumer prepareRequest(RestRequest request, NodeClient client
     }
 
     /**
-     * Creates a MLPredictionTaskRequest from a RestRequest
+     * Creates a MLPredictionTaskRequest from a RestRequest. This method validates the request based on
+     * enabled features and model types, and parses the input data for prediction.
      *
-     * @param request RestRequest
-     * @return MLPredictionTaskRequest
+     * @param modelId The ID of the ML model to use for prediction
+     * @param modelType The type of the ML model, extracted from model cache to specify if its a remote model or a local model
+     * @param userAlgorithm The algorithm specified by the user for prediction, this is used todetermine the interface of the model
+     * @param request The REST request containing prediction input data
+     * @return MLPredictionTaskRequest configured with the model and input parameters
      */
     @VisibleForTesting
-    MLPredictionTaskRequest getRequest(String modelId, String algorithm, RestRequest request) throws IOException {
+    MLPredictionTaskRequest getRequest(String modelId, String modelType, String userAlgorithm, RestRequest request) throws IOException {
         ActionType actionType = ActionType.from(getActionTypeFromRestRequest(request));
-        if (FunctionName.REMOTE.name().equals(algorithm) && !mlFeatureEnabledSetting.isRemoteInferenceEnabled()) {
+        if (FunctionName.REMOTE.name().equals(modelType) && !mlFeatureEnabledSetting.isRemoteInferenceEnabled()) {
             throw new IllegalStateException(REMOTE_INFERENCE_DISABLED_ERR_MSG);
-        } else if (FunctionName.isDLModel(FunctionName.from(algorithm.toUpperCase())) && !mlFeatureEnabledSetting.isLocalModelEnabled()) {
+        } else if (FunctionName.isDLModel(FunctionName.from(modelType.toUpperCase(Locale.ROOT)))
+            && !mlFeatureEnabledSetting.isLocalModelEnabled()) {
             throw new IllegalStateException(LOCAL_MODEL_DISABLED_ERR_MSG);
         } else if (ActionType.BATCH_PREDICT == actionType && !mlFeatureEnabledSetting.isOfflineBatchInferenceEnabled()) {
             throw new IllegalStateException(BATCH_INFERENCE_DISABLED_ERR_MSG);
@@ -140,7 +149,7 @@ MLPredictionTaskRequest getRequest(String modelId, String algorithm, RestRequest
 
         XContentParser parser = request.contentParser();
         ensureExpectedToken(XContentParser.Token.START_OBJECT, parser.nextToken(), parser);
-        MLInput mlInput = MLInput.parse(parser, algorithm, actionType);
+        MLInput mlInput = MLInput.parse(parser, userAlgorithm, actionType);
         return new MLPredictionTaskRequest(modelId, mlInput, null);
     }
 
diff --git a/plugin/src/test/java/org/opensearch/ml/rest/RestMLPredictionActionTests.java b/plugin/src/test/java/org/opensearch/ml/rest/RestMLPredictionActionTests.java
@@ -69,7 +69,7 @@ public class RestMLPredictionActionTests extends OpenSearchTestCase {
     @Before
     public void setup() {
         MockitoAnnotations.openMocks(this);
-        when(modelManager.getOptionalModelFunctionName(anyString())).thenReturn(Optional.empty());
+        when(modelManager.getOptionalModelFunctionName(anyString())).thenReturn(Optional.of(FunctionName.REMOTE));
         when(mlFeatureEnabledSetting.isRemoteInferenceEnabled()).thenReturn(true);
         when(mlFeatureEnabledSetting.isLocalModelEnabled()).thenReturn(true);
         restMLPredictionAction = new RestMLPredictionAction(modelManager, mlFeatureEnabledSetting);
@@ -121,7 +121,8 @@ public void testRoutes_Batch() {
 
     public void testGetRequest() throws IOException {
         RestRequest request = getRestRequest_PredictModel();
-        MLPredictionTaskRequest mlPredictionTaskRequest = restMLPredictionAction.getRequest("modelId", FunctionName.KMEANS.name(), request);
+        MLPredictionTaskRequest mlPredictionTaskRequest = restMLPredictionAction
+            .getRequest("modelId", FunctionName.KMEANS.name(), FunctionName.KMEANS.name(), request);
 
         MLInput mlInput = mlPredictionTaskRequest.getMlInput();
         verifyParsedKMeansMLInput(mlInput);
@@ -133,7 +134,8 @@ public void testGetRequest_RemoteInferenceDisabled() throws IOException {
 
         when(mlFeatureEnabledSetting.isRemoteInferenceEnabled()).thenReturn(false);
         RestRequest request = getRestRequest_PredictModel();
-        MLPredictionTaskRequest mlPredictionTaskRequest = restMLPredictionAction.getRequest("modelId", FunctionName.REMOTE.name(), request);
+        MLPredictionTaskRequest mlPredictionTaskRequest = restMLPredictionAction
+            .getRequest("modelId", FunctionName.REMOTE.name(), "text_embedding", request);
     }
 
     public void testGetRequest_LocalModelInferenceDisabled() throws IOException {
@@ -143,7 +145,7 @@ public void testGetRequest_LocalModelInferenceDisabled() throws IOException {
         when(mlFeatureEnabledSetting.isLocalModelEnabled()).thenReturn(false);
         RestRequest request = getRestRequest_PredictModel();
         MLPredictionTaskRequest mlPredictionTaskRequest = restMLPredictionAction
-            .getRequest("modelId", FunctionName.TEXT_EMBEDDING.name(), request);
+            .getRequest("modelId", FunctionName.TEXT_EMBEDDING.name(), "text_embedding", request);
     }
 
     public void testPrepareRequest() throws Exception {
@@ -182,7 +184,7 @@ public void testPrepareBatchRequest_WrongActionType() throws Exception {
         thrown.expectMessage("Wrong Action Type");
 
         RestRequest request = getBatchRestRequest_WrongActionType();
-        restMLPredictionAction.getRequest("model id", "remote", request);
+        restMLPredictionAction.getRequest("model id", "remote", "text_embedding", request);
     }
 
     @Ignore