opensearch-project
diff --git a/‎CHANGELOG.md
Lines changed: 1 addition & 0 deletions b/‎CHANGELOG.md
Lines changed: 1 addition & 0 deletions
diff --git a/‎qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/rolling/RestNeuralStatsActionIT.java
Lines changed: 87 additions & 99 deletions b/‎qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/rolling/RestNeuralStatsActionIT.java
Lines changed: 87 additions & 99 deletions
diff --git a/‎src/main/java/org/opensearch/neuralsearch/processor/semantic/SemanticFieldProcessor.java
Lines changed: 20 additions & 5 deletions b/‎src/main/java/org/opensearch/neuralsearch/processor/semantic/SemanticFieldProcessor.java
Lines changed: 20 additions & 5 deletions
diff --git a/‎src/main/java/org/opensearch/neuralsearch/query/NeuralQueryBuilder.java
Lines changed: 6 additions & 11 deletions b/‎src/main/java/org/opensearch/neuralsearch/query/NeuralQueryBuilder.java
Lines changed: 6 additions & 11 deletions
@@ -22,6 +22,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 - [Stats] Add stats for text embedding processor with different settings ([#1332](https://github.com/opensearch-project/neural-search/pull/1332))
 - Validate model id and analyzer should not be provided at the same time for the neural sparse query ([#1359](https://github.com/opensearch-project/neural-search/pull/1359))
 - [Stats] Add stats for score based and rank based normalization processors ([#1326](https://github.com/opensearch-project/neural-search/pull/1326))
+- [Stats] Add stats tracking for semantic field ([#1362](https://github.com/opensearch-project/neural-search/pull/1362))
 
 ### Bug Fixes
 - Fix score value as null for single shard when sorting is not done on score field ([#1277](https://github.com/opensearch-project/neural-search/pull/1277))
 
@@ -4,18 +4,6 @@
  */
 package org.opensearch.neuralsearch.bwc.rolling;
 
-import org.opensearch.neuralsearch.stats.events.EventStatName;
-import org.opensearch.neuralsearch.stats.info.InfoStatName;
-
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.util.ArrayList;
-import java.util.Map;
-
-import static org.opensearch.neuralsearch.util.TestUtils.NODES_BWC_CLUSTER;
-import static org.opensearch.neuralsearch.util.TestUtils.TEXT_EMBEDDING_PROCESSOR;
-import static org.opensearch.neuralsearch.util.TestUtils.getModelId;
-
 public class RestNeuralStatsActionIT extends AbstractRollingUpgradeTestCase {
     private static final String PIPELINE_NAME = "nlp-pipeline-stats";
     private static final String TEST_FIELD = "passage_text";
@@ -32,91 +20,91 @@ public class RestNeuralStatsActionIT extends AbstractRollingUpgradeTestCase {
     // TODO: There is a bug in stats api which need to be fixed before enabling following tests
     // https://github.com/opensearch-project/neural-search/issues/1368
 
-//    public void testStats_E2EFlow() throws Exception {
-//
-//        waitForClusterHealthGreen(NODES_BWC_CLUSTER, 90);
-//        updateClusterSettings("plugins.neural_search.stats_enabled", true);
-//
-//        // Get initial stats
-//        String responseBody = executeNeuralStatRequest(new ArrayList<>(), new ArrayList<>());
-//        logger.info("Initial:" + responseBody);
-//        Map<String, Object> infoStats = parseInfoStatsResponse(responseBody);
-//        Map<String, Object> aggregatedNodeStats = parseAggregatedNodeStatsResponse(responseBody);
-//
-//        int numberOfExecution = (int) getNestedValue(aggregatedNodeStats, EventStatName.TEXT_EMBEDDING_PROCESSOR_EXECUTIONS);
-//        int numberOfProcessor = (int) getNestedValue(infoStats, InfoStatName.TEXT_EMBEDDING_PROCESSORS);
-//
-//        switch (getClusterType()) {
-//            case OLD:
-//                modelId = uploadTextEmbeddingModel();
-//                loadModel(modelId);
-//                createPipelineProcessor(modelId, PIPELINE_NAME);
-//                createIndexWithConfiguration(
-//                    getIndexNameForTest(),
-//                    Files.readString(Path.of(classLoader.getResource("processor/IndexMappings.json").toURI())),
-//                    PIPELINE_NAME
-//                );
-//                addDocument(getIndexNameForTest(), "0", TEST_FIELD, TEXT, null, null);
-//                addDocument(getIndexNameForTest(), "1", TEST_FIELD, TEXT, null, null);
-//                addDocument(getIndexNameForTest(), "2", TEST_FIELD, TEXT, null, null);
-//
-//                responseBody = executeNeuralStatRequest(new ArrayList<>(), new ArrayList<>());
-//                logger.info("Old after insert:" + responseBody);
-//                assertEquals(
-//                    numberOfExecution + 3,
-//                    getNestedValue(parseAggregatedNodeStatsResponse(responseBody), EventStatName.TEXT_EMBEDDING_PROCESSOR_EXECUTIONS)
-//                );
-//                assertEquals(
-//                    numberOfProcessor + 1,
-//                    getNestedValue(parseInfoStatsResponse(responseBody), InfoStatName.TEXT_EMBEDDING_PROCESSORS)
-//                );
-//                break;
-//            case MIXED:
-//                modelId = getModelId(getIngestionPipeline(PIPELINE_NAME), TEXT_EMBEDDING_PROCESSOR);
-//                loadModel(modelId);
-//                addDocument(getIndexNameForTest(), "3", TEST_FIELD, TEXT_MIXED, null, null);
-//                addDocument(getIndexNameForTest(), "4", TEST_FIELD, TEXT_MIXED, null, null);
-//                addDocument(getIndexNameForTest(), "5", TEST_FIELD, TEXT_MIXED, null, null);
-//
-//                // Get stats
-//                responseBody = executeNeuralStatRequest(new ArrayList<>(), new ArrayList<>());
-//                logger.info("Mixed after insert:" + responseBody);
-//
-//                assertEquals(
-//                    numberOfExecution + 3,
-//                    getNestedValue(parseAggregatedNodeStatsResponse(responseBody), EventStatName.TEXT_EMBEDDING_PROCESSOR_EXECUTIONS)
-//                );
-//                assertEquals(
-//                    numberOfProcessor,
-//                    getNestedValue(parseInfoStatsResponse(responseBody), InfoStatName.TEXT_EMBEDDING_PROCESSORS)
-//                );
-//                break;
-//            case UPGRADED:
-//                try {
-//                    modelId = getModelId(getIngestionPipeline(PIPELINE_NAME), TEXT_EMBEDDING_PROCESSOR);
-//                    loadModel(modelId);
-//                    addDocument(getIndexNameForTest(), "6", TEST_FIELD, TEXT_UPGRADED, null, null);
-//                    addDocument(getIndexNameForTest(), "7", TEST_FIELD, TEXT_UPGRADED, null, null);
-//                    addDocument(getIndexNameForTest(), "8", TEST_FIELD, TEXT_UPGRADED, null, null);
-//
-//                    // Get stats
-//                    responseBody = executeNeuralStatRequest(new ArrayList<>(), new ArrayList<>());
-//                    logger.info("Upgraded after insert:" + responseBody);
-//
-//                    assertEquals(
-//                        numberOfExecution + 3,
-//                        getNestedValue(parseAggregatedNodeStatsResponse(responseBody), EventStatName.TEXT_EMBEDDING_PROCESSOR_EXECUTIONS)
-//                    );
-//                    assertEquals(
-//                        numberOfProcessor,
-//                        getNestedValue(parseInfoStatsResponse(responseBody), InfoStatName.TEXT_EMBEDDING_PROCESSORS)
-//                    );
-//                } finally {
-//                    wipeOfTestResources(getIndexNameForTest(), PIPELINE_NAME, modelId, null);
-//                }
-//                break;
-//            default:
-//                throw new IllegalStateException("Unexpected value: " + getClusterType());
-//        }
-//    }
+    // public void testStats_E2EFlow() throws Exception {
+    //
+    // waitForClusterHealthGreen(NODES_BWC_CLUSTER, 90);
+    // updateClusterSettings("plugins.neural_search.stats_enabled", true);
+    //
+    // // Get initial stats
+    // String responseBody = executeNeuralStatRequest(new ArrayList<>(), new ArrayList<>());
+    // logger.info("Initial:" + responseBody);
+    // Map<String, Object> infoStats = parseInfoStatsResponse(responseBody);
+    // Map<String, Object> aggregatedNodeStats = parseAggregatedNodeStatsResponse(responseBody);
+    //
+    // int numberOfExecution = (int) getNestedValue(aggregatedNodeStats, EventStatName.TEXT_EMBEDDING_PROCESSOR_EXECUTIONS);
+    // int numberOfProcessor = (int) getNestedValue(infoStats, InfoStatName.TEXT_EMBEDDING_PROCESSORS);
+    //
+    // switch (getClusterType()) {
+    // case OLD:
+    // modelId = uploadTextEmbeddingModel();
+    // loadModel(modelId);
+    // createPipelineProcessor(modelId, PIPELINE_NAME);
+    // createIndexWithConfiguration(
+    // getIndexNameForTest(),
+    // Files.readString(Path.of(classLoader.getResource("processor/IndexMappings.json").toURI())),
+    // PIPELINE_NAME
+    // );
+    // addDocument(getIndexNameForTest(), "0", TEST_FIELD, TEXT, null, null);
+    // addDocument(getIndexNameForTest(), "1", TEST_FIELD, TEXT, null, null);
+    // addDocument(getIndexNameForTest(), "2", TEST_FIELD, TEXT, null, null);
+    //
+    // responseBody = executeNeuralStatRequest(new ArrayList<>(), new ArrayList<>());
+    // logger.info("Old after insert:" + responseBody);
+    // assertEquals(
+    // numberOfExecution + 3,
+    // getNestedValue(parseAggregatedNodeStatsResponse(responseBody), EventStatName.TEXT_EMBEDDING_PROCESSOR_EXECUTIONS)
+    // );
+    // assertEquals(
+    // numberOfProcessor + 1,
+    // getNestedValue(parseInfoStatsResponse(responseBody), InfoStatName.TEXT_EMBEDDING_PROCESSORS)
+    // );
+    // break;
+    // case MIXED:
+    // modelId = getModelId(getIngestionPipeline(PIPELINE_NAME), TEXT_EMBEDDING_PROCESSOR);
+    // loadModel(modelId);
+    // addDocument(getIndexNameForTest(), "3", TEST_FIELD, TEXT_MIXED, null, null);
+    // addDocument(getIndexNameForTest(), "4", TEST_FIELD, TEXT_MIXED, null, null);
+    // addDocument(getIndexNameForTest(), "5", TEST_FIELD, TEXT_MIXED, null, null);
+    //
+    // // Get stats
+    // responseBody = executeNeuralStatRequest(new ArrayList<>(), new ArrayList<>());
+    // logger.info("Mixed after insert:" + responseBody);
+    //
+    // assertEquals(
+    // numberOfExecution + 3,
+    // getNestedValue(parseAggregatedNodeStatsResponse(responseBody), EventStatName.TEXT_EMBEDDING_PROCESSOR_EXECUTIONS)
+    // );
+    // assertEquals(
+    // numberOfProcessor,
+    // getNestedValue(parseInfoStatsResponse(responseBody), InfoStatName.TEXT_EMBEDDING_PROCESSORS)
+    // );
+    // break;
+    // case UPGRADED:
+    // try {
+    // modelId = getModelId(getIngestionPipeline(PIPELINE_NAME), TEXT_EMBEDDING_PROCESSOR);
+    // loadModel(modelId);
+    // addDocument(getIndexNameForTest(), "6", TEST_FIELD, TEXT_UPGRADED, null, null);
+    // addDocument(getIndexNameForTest(), "7", TEST_FIELD, TEXT_UPGRADED, null, null);
+    // addDocument(getIndexNameForTest(), "8", TEST_FIELD, TEXT_UPGRADED, null, null);
+    //
+    // // Get stats
+    // responseBody = executeNeuralStatRequest(new ArrayList<>(), new ArrayList<>());
+    // logger.info("Upgraded after insert:" + responseBody);
+    //
+    // assertEquals(
+    // numberOfExecution + 3,
+    // getNestedValue(parseAggregatedNodeStatsResponse(responseBody), EventStatName.TEXT_EMBEDDING_PROCESSOR_EXECUTIONS)
+    // );
+    // assertEquals(
+    // numberOfProcessor,
+    // getNestedValue(parseInfoStatsResponse(responseBody), InfoStatName.TEXT_EMBEDDING_PROCESSORS)
+    // );
+    // } finally {
+    // wipeOfTestResources(getIndexNameForTest(), PIPELINE_NAME, modelId, null);
+    // }
+    // break;
+    // default:
+    // throw new IllegalStateException("Unexpected value: " + getClusterType());
+    // }
+    // }
 }
@@ -20,6 +20,8 @@
 import org.opensearch.neuralsearch.processor.chunker.Chunker;
 import org.opensearch.neuralsearch.processor.chunker.FixedTokenLengthChunker;
 import org.opensearch.neuralsearch.processor.dto.SemanticFieldInfo;
+import org.opensearch.neuralsearch.stats.events.EventStatName;
+import org.opensearch.neuralsearch.stats.events.EventStatsManager;
 import org.opensearch.neuralsearch.util.TokenWeightUtil;
 import org.opensearch.neuralsearch.util.prune.PruneType;
 import org.opensearch.neuralsearch.util.prune.PruneUtils;
@@ -118,6 +120,7 @@ public IngestDocument execute(IngestDocument ingestDocument) throws Exception {
      */
     @Override
     public void execute(IngestDocument ingestDocument, BiConsumer<IngestDocument, Exception> handler) {
+        EventStatsManager.increment(EventStatName.SEMANTIC_FIELD_PROCESSOR_EXECUTIONS);
         try {
             unflattenIngestDoc(ingestDocument);
             // Collect all the semantic field info based on the path of semantic fields found in the index mapping
@@ -173,7 +176,10 @@ private void process(
     ) {
         setModelInfo(ingestDocument, semanticFieldInfoList);
 
-        chunk(ingestDocument, semanticFieldInfoList);
+        boolean isChunked = chunk(ingestDocument, semanticFieldInfoList);
+        if (isChunked) {
+            EventStatsManager.increment(EventStatName.SEMANTIC_FIELD_PROCESSOR_CHUNKING_EXECUTIONS);
+        }
 
         generateAndSetEmbedding(ingestDocument, semanticFieldInfoList, handler);
     }
@@ -277,12 +283,13 @@ private List<SemanticFieldInfo> getSemanticFieldInfo(IngestDocument ingestDocume
         return semanticFieldInfos;
     }
 
-    private void chunk(@NonNull final IngestDocument ingestDocument, @NonNull final List<SemanticFieldInfo> semanticFieldInfoList) {
+    private boolean chunk(@NonNull final IngestDocument ingestDocument, @NonNull final List<SemanticFieldInfo> semanticFieldInfoList) {
         final Map<String, Object> sourceAndMetadataMap = ingestDocument.getSourceAndMetadata();
         int maxTokenCount = getMaxTokenCount(sourceAndMetadataMap, environment.settings(), clusterService);
-
+        boolean isChunked = false;
         for (SemanticFieldInfo semanticFieldInfo : semanticFieldInfoList) {
             if (semanticFieldInfo.getChunkingEnabled()) {
+                isChunked = true;
                 if (semanticFieldInfo.getChunkers() == null || semanticFieldInfo.getChunkers().isEmpty()) {
                     semanticFieldInfo.setChunkers(List.of(defaultTextChunker));
                 }
@@ -294,6 +301,7 @@ private void chunk(@NonNull final IngestDocument ingestDocument, @NonNull final
                 semanticFieldInfo.setChunks(List.of(semanticFieldInfo.getValue()));
             }
         }
+        return isChunked;
     }
 
     private void setChunkedText(@NonNull final IngestDocument ingestDocument, @NonNull final SemanticFieldInfo semanticFieldInfo) {
@@ -386,6 +394,7 @@ private void collectSemanticFieldInfo(
 
     @Override
     public void subBatchExecute(List<IngestDocumentWrapper> ingestDocumentWrappers, Consumer<List<IngestDocumentWrapper>> handler) {
+        EventStatsManager.increment(EventStatName.SEMANTIC_FIELD_PROCESSOR_EXECUTIONS);
         if (ingestDocumentWrappers == null || ingestDocumentWrappers.isEmpty()) {
             handler.accept(ingestDocumentWrappers);
             return;
@@ -451,15 +460,17 @@ private void batchProcess(
         @NonNull final Map<IngestDocumentWrapper, List<SemanticFieldInfo>> docToSemanticFieldInfoMap,
         @NonNull final Consumer<List<IngestDocumentWrapper>> handler
     ) {
-
+        boolean isChunked = false;
         for (Map.Entry<IngestDocumentWrapper, List<SemanticFieldInfo>> entry : docToSemanticFieldInfoMap.entrySet()) {
             final IngestDocumentWrapper ingestDocumentWrapper = entry.getKey();
             final IngestDocument ingestDocument = entry.getKey().getIngestDocument();
             final List<SemanticFieldInfo> semanticFieldInfoList = entry.getValue();
             try {
                 setModelInfo(ingestDocument, semanticFieldInfoList);
 
-                chunk(ingestDocument, semanticFieldInfoList);
+                if (chunk(ingestDocument, semanticFieldInfoList)) {
+                    isChunked = true;
+                }
             } catch (Exception e) {
                 log.error(
                     String.format(
@@ -476,6 +487,10 @@ private void batchProcess(
             }
         }
 
+        if (isChunked) {
+            EventStatsManager.increment(EventStatName.SEMANTIC_FIELD_PROCESSOR_CHUNKING_EXECUTIONS);
+        }
+
         batchGenerateAndSetEmbedding(ingestDocumentWrappers, docToSemanticFieldInfoMap, handler);
     }
 
 
@@ -17,7 +17,6 @@
 import static org.opensearch.neuralsearch.common.MinClusterVersionUtil.isClusterOnOrAfterMinReqVersionForSemanticFieldType;
 import static org.opensearch.neuralsearch.common.VectorUtil.vectorAsListToArray;
 import static org.opensearch.neuralsearch.constants.MappingConstants.PATH_SEPARATOR;
-import static org.opensearch.neuralsearch.constants.SemanticFieldConstants.DEFAULT_SEMANTIC_INFO_FIELD_NAME_SUFFIX;
 import static org.opensearch.neuralsearch.constants.SemanticInfoFieldConstants.EMBEDDING_FIELD_NAME;
 import static org.opensearch.neuralsearch.constants.SemanticInfoFieldConstants.CHUNKS_FIELD_NAME;
 import static org.opensearch.neuralsearch.processor.TextImageEmbeddingProcessor.INPUT_IMAGE;
@@ -83,6 +82,8 @@
 import org.opensearch.neuralsearch.common.MinClusterVersionUtil;
 import org.opensearch.neuralsearch.mapper.SemanticFieldMapper;
 import org.opensearch.neuralsearch.query.dto.NeuralQueryBuildStage;
+import org.opensearch.neuralsearch.stats.events.EventStatName;
+import org.opensearch.neuralsearch.stats.events.EventStatsManager;
 import org.opensearch.neuralsearch.util.NeuralSearchClusterUtil;
 
 import org.opensearch.neuralsearch.ml.MLCommonsClientAccessor;
@@ -551,6 +552,7 @@ protected void doXContent(XContentBuilder xContentBuilder, Params params) throws
      * @throws IOException can be thrown by parser
      */
     public static NeuralQueryBuilder fromXContent(XContentParser parser) throws IOException {
+        EventStatsManager.increment(EventStatName.NEURAL_QUERY_REQUESTS);
         final Builder builder = new Builder();
         if (parser.currentToken() != XContentParser.Token.START_OBJECT) {
             throw new ParsingException(parser.getTokenLocation(), "Token must be START_OBJECT");
@@ -708,6 +710,7 @@ private QueryBuilder rewriteQueryForSemanticField(@NonNull final NeuralQueryTarg
         final String chunksPath = targetFieldConfig.getChunksPath();
 
         if (KNNVectorFieldMapper.CONTENT_TYPE.equals(embeddingFieldType)) {
+            EventStatsManager.increment(EventStatName.NEURAL_QUERY_AGAINST_SEMANTIC_DENSE_REQUESTS);
             if (modelIdToVectorSupplierMap == null
                 || modelIdToVectorSupplierMap.get(searchModelId) == null
                 || modelIdToVectorSupplierMap.get(searchModelId).get() == null) {
@@ -723,6 +726,7 @@ private QueryBuilder rewriteQueryForSemanticField(@NonNull final NeuralQueryTarg
                 return neuralKNNQueryBuilder;
             }
         } else if (RankFeaturesFieldMapper.CONTENT_TYPE.equals(embeddingFieldType)) {
+            EventStatsManager.increment(EventStatName.NEURAL_QUERY_AGAINST_SEMANTIC_SPARSE_REQUESTS);
             Supplier<Map<String, Float>> queryTokensSupplier = queryTokensMapSupplier;
             // If the raw token is not provided or no search analyzer provided
             // then try to find the token generated by the ml model
@@ -798,7 +802,7 @@ private QueryBuilder rewriteQueryAgainstKnnField(QueryRewriteContext queryRewrit
             if (vectorSupplier().get() == null) {
                 return this;
             }
-
+            EventStatsManager.increment(EventStatName.NEURAL_QUERY_AGAINST_KNN_REQUESTS);
             return createKNNQueryBuilder(fieldName(), vectorSupplier.get());
         }
 
@@ -977,15 +981,6 @@ private String getErrorMessageWithBaseErrorForSemantic(@NonNull final String err
         return "Failed to rewrite the neural query against the semantic field " + fieldName + ". " + errorMessage;
     }
 
-    private String getSemanticInfoFieldPath(SemanticFieldMapper.SemanticFieldType semanticFieldType) {
-        final String[] paths = semanticFieldType.name().split("\\.");
-        final String semanticInfoFieldName = semanticFieldType.getSemanticParameters().getSemanticInfoFieldName();
-        paths[paths.length - 1] = semanticInfoFieldName == null
-            ? paths[paths.length - 1] + DEFAULT_SEMANTIC_INFO_FIELD_NAME_SUFFIX
-            : semanticInfoFieldName;
-        return String.join(PATH_SEPARATOR, paths) + PATH_SEPARATOR + CHUNKS_FIELD_NAME;
-    }
-
     private QueryBuilder inferenceForSemanticField(
         @NonNull final QueryRewriteContext queryRewriteContext,
         @NonNull final Set<String> modelIdsFromTargetFields,