Skip to content

Commit e462617

Browse files
committed
Add stats tracking for semantic field
Signed-off-by: Bo Zhang <[email protected]>
1 parent 981411f commit e462617

File tree

9 files changed

+65
-15
lines changed

9 files changed

+65
-15
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
1919
- Support custom weights in RRF normalization processor ([#1322](https://github.com/opensearch-project/neural-search/pull/1322))
2020
- [Stats] Add stats tracking for semantic highlighting ([#1327](https://github.com/opensearch-project/neural-search/pull/1327))
2121
- [Stats] Add stats for text embedding processor with different settings ([#1332](https://github.com/opensearch-project/neural-search/pull/1332))
22+
- [Stats] Add stats tracking for semantic field ([#1362](https://github.com/opensearch-project/neural-search/pull/1362))
23+
2224
### Bug Fixes
2325
- Fix score value as null for single shard when sorting is not done on score field ([#1277](https://github.com/opensearch-project/neural-search/pull/1277))
2426
- Return bad request for stats API calls with invalid stat names instead of ignoring them ([#1291](https://github.com/opensearch-project/neural-search/pull/1291))

src/main/java/org/opensearch/neuralsearch/processor/semantic/SemanticFieldProcessor.java

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
import org.opensearch.neuralsearch.processor.chunker.Chunker;
2121
import org.opensearch.neuralsearch.processor.chunker.FixedTokenLengthChunker;
2222
import org.opensearch.neuralsearch.processor.dto.SemanticFieldInfo;
23+
import org.opensearch.neuralsearch.stats.events.EventStatName;
24+
import org.opensearch.neuralsearch.stats.events.EventStatsManager;
2325
import org.opensearch.neuralsearch.util.TokenWeightUtil;
2426
import org.opensearch.neuralsearch.util.prune.PruneType;
2527
import org.opensearch.neuralsearch.util.prune.PruneUtils;
@@ -118,6 +120,7 @@ public IngestDocument execute(IngestDocument ingestDocument) throws Exception {
118120
*/
119121
@Override
120122
public void execute(IngestDocument ingestDocument, BiConsumer<IngestDocument, Exception> handler) {
123+
EventStatsManager.increment(EventStatName.SEMANTIC_FIELD_PROCESSOR_EXECUTIONS);
121124
try {
122125
unflattenIngestDoc(ingestDocument);
123126
// Collect all the semantic field info based on the path of semantic fields found in the index mapping
@@ -173,11 +176,23 @@ private void process(
173176
) {
174177
setModelInfo(ingestDocument, semanticFieldInfoList);
175178

179+
recordChunking(List.of(semanticFieldInfoList));
176180
chunk(ingestDocument, semanticFieldInfoList);
177181

178182
generateAndSetEmbedding(ingestDocument, semanticFieldInfoList, handler);
179183
}
180184

185+
private void recordChunking(@NonNull Collection<List<SemanticFieldInfo>> semanticFieldInfoLists) {
186+
for (List<SemanticFieldInfo> semanticFieldInfoList : semanticFieldInfoLists) {
187+
for (SemanticFieldInfo semanticFieldInfo : semanticFieldInfoList) {
188+
if (semanticFieldInfo.getChunkingEnabled()) {
189+
EventStatsManager.increment(EventStatName.SEMANTIC_FIELD_PROCESSOR_CHUNKING_EXECUTIONS);
190+
break;
191+
}
192+
}
193+
}
194+
}
195+
181196
private void setModelInfo(@NonNull final IngestDocument ingestDocument, @NonNull final List<SemanticFieldInfo> semanticFieldInfoList) {
182197
final Map<String, Map<String, Object>> modelIdToInfoMap = new HashMap<>();
183198
for (final Map.Entry<String, MLModel> entry : modelIdToModelMap.entrySet()) {
@@ -386,6 +401,7 @@ private void collectSemanticFieldInfo(
386401

387402
@Override
388403
public void subBatchExecute(List<IngestDocumentWrapper> ingestDocumentWrappers, Consumer<List<IngestDocumentWrapper>> handler) {
404+
EventStatsManager.increment(EventStatName.SEMANTIC_FIELD_PROCESSOR_EXECUTIONS);
389405
if (ingestDocumentWrappers == null || ingestDocumentWrappers.isEmpty()) {
390406
handler.accept(ingestDocumentWrappers);
391407
return;
@@ -451,7 +467,7 @@ private void batchProcess(
451467
@NonNull final Map<IngestDocumentWrapper, List<SemanticFieldInfo>> docToSemanticFieldInfoMap,
452468
@NonNull final Consumer<List<IngestDocumentWrapper>> handler
453469
) {
454-
470+
recordChunking(docToSemanticFieldInfoMap.values());
455471
for (Map.Entry<IngestDocumentWrapper, List<SemanticFieldInfo>> entry : docToSemanticFieldInfoMap.entrySet()) {
456472
final IngestDocumentWrapper ingestDocumentWrapper = entry.getKey();
457473
final IngestDocument ingestDocument = entry.getKey().getIngestDocument();

src/main/java/org/opensearch/neuralsearch/query/NeuralQueryBuilder.java

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
import static org.opensearch.neuralsearch.common.MinClusterVersionUtil.isClusterOnOrAfterMinReqVersionForSemanticFieldType;
1818
import static org.opensearch.neuralsearch.common.VectorUtil.vectorAsListToArray;
1919
import static org.opensearch.neuralsearch.constants.MappingConstants.PATH_SEPARATOR;
20-
import static org.opensearch.neuralsearch.constants.SemanticFieldConstants.DEFAULT_SEMANTIC_INFO_FIELD_NAME_SUFFIX;
2120
import static org.opensearch.neuralsearch.constants.SemanticInfoFieldConstants.EMBEDDING_FIELD_NAME;
2221
import static org.opensearch.neuralsearch.constants.SemanticInfoFieldConstants.CHUNKS_FIELD_NAME;
2322
import static org.opensearch.neuralsearch.processor.TextImageEmbeddingProcessor.INPUT_IMAGE;
@@ -83,6 +82,8 @@
8382
import org.opensearch.neuralsearch.common.MinClusterVersionUtil;
8483
import org.opensearch.neuralsearch.mapper.SemanticFieldMapper;
8584
import org.opensearch.neuralsearch.query.dto.NeuralQueryBuildStage;
85+
import org.opensearch.neuralsearch.stats.events.EventStatName;
86+
import org.opensearch.neuralsearch.stats.events.EventStatsManager;
8687
import org.opensearch.neuralsearch.util.NeuralSearchClusterUtil;
8788

8889
import org.opensearch.neuralsearch.ml.MLCommonsClientAccessor;
@@ -551,6 +552,7 @@ protected void doXContent(XContentBuilder xContentBuilder, Params params) throws
551552
* @throws IOException can be thrown by parser
552553
*/
553554
public static NeuralQueryBuilder fromXContent(XContentParser parser) throws IOException {
555+
EventStatsManager.increment(EventStatName.NEURAL_QUERY_COUNT);
554556
final Builder builder = new Builder();
555557
if (parser.currentToken() != XContentParser.Token.START_OBJECT) {
556558
throw new ParsingException(parser.getTokenLocation(), "Token must be START_OBJECT");
@@ -707,6 +709,7 @@ private QueryBuilder rewriteQueryForSemanticField(@NonNull final NeuralQueryTarg
707709
final String chunksPath = targetFieldConfig.getChunksPath();
708710

709711
if (KNNVectorFieldMapper.CONTENT_TYPE.equals(embeddingFieldType)) {
712+
EventStatsManager.increment(EventStatName.NEURAL_QUERY_AGAINST_SEMANTIC_DENSE_COUNT);
710713
if (modelIdToVectorSupplierMap == null
711714
|| modelIdToVectorSupplierMap.get(searchModelId) == null
712715
|| modelIdToVectorSupplierMap.get(searchModelId).get() == null) {
@@ -722,6 +725,7 @@ private QueryBuilder rewriteQueryForSemanticField(@NonNull final NeuralQueryTarg
722725
return neuralKNNQueryBuilder;
723726
}
724727
} else if (RankFeaturesFieldMapper.CONTENT_TYPE.equals(embeddingFieldType)) {
728+
EventStatsManager.increment(EventStatName.NEURAL_QUERY_AGAINST_SEMANTIC_SPARSE_COUNT);
725729
Supplier<Map<String, Float>> queryTokensSupplier = queryTokensMapSupplier;
726730
// If the raw token is not provided or no search analyzer provided
727731
// then try to find the token generated by the ml model
@@ -797,7 +801,7 @@ private QueryBuilder rewriteQueryAgainstKnnField(QueryRewriteContext queryRewrit
797801
if (vectorSupplier().get() == null) {
798802
return this;
799803
}
800-
804+
EventStatsManager.increment(EventStatName.NEURAL_QUERY_AGAINST_DENSE_COUNT);
801805
return createKNNQueryBuilder(fieldName(), vectorSupplier.get());
802806
}
803807

@@ -961,15 +965,6 @@ private String getErrorMessageWithBaseErrorForSemantic(@NonNull final String err
961965
return "Failed to rewrite the neural query against the semantic field " + fieldName + ". " + errorMessage;
962966
}
963967

964-
private String getSemanticInfoFieldPath(SemanticFieldMapper.SemanticFieldType semanticFieldType) {
965-
final String[] paths = semanticFieldType.name().split("\\.");
966-
final String semanticInfoFieldName = semanticFieldType.getSemanticParameters().getSemanticInfoFieldName();
967-
paths[paths.length - 1] = semanticInfoFieldName == null
968-
? paths[paths.length - 1] + DEFAULT_SEMANTIC_INFO_FIELD_NAME_SUFFIX
969-
: semanticInfoFieldName;
970-
return String.join(PATH_SEPARATOR, paths) + PATH_SEPARATOR + CHUNKS_FIELD_NAME;
971-
}
972-
973968
private QueryBuilder inferenceForSemanticField(
974969
@NonNull final QueryRewriteContext queryRewriteContext,
975970
@NonNull final Set<String> modelIdsFromTargetFields,

src/main/java/org/opensearch/neuralsearch/query/NeuralSparseQueryBuilder.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@
2828
import org.apache.lucene.search.Query;
2929
import org.opensearch.OpenSearchException;
3030
import org.opensearch.Version;
31+
import org.opensearch.neuralsearch.stats.events.EventStatName;
32+
import org.opensearch.neuralsearch.stats.events.EventStatsManager;
3133
import org.opensearch.transport.client.Client;
3234
import org.opensearch.common.SetOnce;
3335
import org.opensearch.common.collect.Tuple;
@@ -250,6 +252,7 @@ protected void doXContent(XContentBuilder xContentBuilder, Params params) throws
250252
* @throws IOException can be thrown by parser
251253
*/
252254
public static NeuralSparseQueryBuilder fromXContent(XContentParser parser) throws IOException {
255+
EventStatsManager.increment(EventStatName.NEURAL_SPARSE_QUERY_COUNT);
253256
NeuralSparseQueryBuilder sparseEncodingQueryBuilder = new NeuralSparseQueryBuilder();
254257
if (parser.currentToken() != XContentParser.Token.START_OBJECT) {
255258
throw new ParsingException(parser.getTokenLocation(), "First token of " + NAME + "query must be START_OBJECT");

src/main/java/org/opensearch/neuralsearch/stats/events/EventStatName.java

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,30 @@ public enum EventStatName implements StatName {
3131
EventStatType.TIMESTAMPED_EVENT_COUNTER
3232
),
3333
TEXT_CHUNKING_DELIMITER_EXECUTIONS("text_chunking_delimiter_executions", "processors.ingest", EventStatType.TIMESTAMPED_EVENT_COUNTER),
34+
SEMANTIC_FIELD_PROCESSOR_EXECUTIONS("semantic_field_executions", "processors.ingest", EventStatType.TIMESTAMPED_EVENT_COUNTER),
35+
SEMANTIC_FIELD_PROCESSOR_CHUNKING_EXECUTIONS(
36+
"semantic_field_chunking_executions",
37+
"processors.ingest",
38+
EventStatType.TIMESTAMPED_EVENT_COUNTER
39+
),
3440
SEMANTIC_HIGHLIGHTING_REQUEST_COUNT(
3541
"semantic_highlighting_request_count",
3642
"semantic_highlighting",
3743
EventStatType.TIMESTAMPED_EVENT_COUNTER
38-
);
44+
),
45+
NEURAL_QUERY_COUNT("neural_query_requests", "query.neural", EventStatType.TIMESTAMPED_EVENT_COUNTER),
46+
NEURAL_QUERY_AGAINST_DENSE_COUNT("neural_query_against_dense_requests", "query.neural", EventStatType.TIMESTAMPED_EVENT_COUNTER),
47+
NEURAL_QUERY_AGAINST_SEMANTIC_DENSE_COUNT(
48+
"neural_query_against_semantic_dense_requests",
49+
"query.neural",
50+
EventStatType.TIMESTAMPED_EVENT_COUNTER
51+
),
52+
NEURAL_QUERY_AGAINST_SEMANTIC_SPARSE_COUNT(
53+
"neural_query_against_semantic_sparse_requests",
54+
"query.neural",
55+
EventStatType.TIMESTAMPED_EVENT_COUNTER
56+
),
57+
NEURAL_SPARSE_QUERY_COUNT("neural_sparse_requests", "query.neural_sparse", EventStatType.TIMESTAMPED_EVENT_COUNTER);
3958

4059
private final String nameString;
4160
private final String path;

src/test/java/org/opensearch/neuralsearch/processor/semantic/SemanticFieldProcessorTests.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@
5454
import static org.opensearch.neuralsearch.constants.SemanticFieldConstants.CHUNKING;
5555
import static org.opensearch.neuralsearch.constants.SemanticFieldConstants.MODEL_ID;
5656
import static org.opensearch.neuralsearch.processor.TextChunkingProcessorTests.getAnalysisRegistry;
57+
import org.opensearch.neuralsearch.util.TestUtils;
5758

5859
public class SemanticFieldProcessorTests extends OpenSearchTestCase {
5960
@Mock
@@ -80,6 +81,9 @@ public class SemanticFieldProcessorTests extends OpenSearchTestCase {
8081

8182
@Before
8283
public void setup() {
84+
// Initialize EventStatsManager for tests
85+
TestUtils.initializeEventStatsManager();
86+
8387
MockitoAnnotations.openMocks(this);
8488
// mock env
8589
final Settings settings = Settings.builder()

src/test/java/org/opensearch/neuralsearch/query/NeuralQueryBuilderRewriteTests.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
import org.opensearch.neuralsearch.processor.TextInferenceRequest;
3939
import org.opensearch.neuralsearch.util.NeuralSearchClusterTestUtils;
4040
import org.opensearch.neuralsearch.util.NeuralSearchClusterUtil;
41+
import org.opensearch.neuralsearch.util.TestUtils;
4142
import org.opensearch.test.OpenSearchTestCase;
4243
import org.opensearch.transport.client.Client;
4344

@@ -99,6 +100,8 @@ public class NeuralQueryBuilderRewriteTests extends OpenSearchTestCase {
99100
public void setup() throws Exception {
100101
mlClient = mock(MLCommonsClientAccessor.class);
101102
NeuralQueryBuilder.initialize(mlClient);
103+
// Initialize EventStatsManager for tests
104+
TestUtils.initializeEventStatsManager();
102105
}
103106

104107
@SneakyThrows

src/test/java/org/opensearch/neuralsearch/query/NeuralQueryBuilderTests.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@
6060
import org.opensearch.index.query.QueryShardContext;
6161
import org.opensearch.knn.index.query.rescore.RescoreContext;
6262
import org.opensearch.neuralsearch.common.MinClusterVersionUtil;
63+
import org.opensearch.neuralsearch.util.TestUtils;
6364
import org.opensearch.test.OpenSearchTestCase;
6465

6566
import lombok.SneakyThrows;
@@ -90,7 +91,10 @@ public class NeuralQueryBuilderTests extends OpenSearchTestCase {
9091
private static final QueryBuilder ADDITIONAL_TEST_FILTER = new TermQueryBuilder(TERM_QUERY_FIELD_NAME, TERM_QUERY_FIELD_VALUE);
9192

9293
@Before
93-
public void setup() throws Exception {}
94+
public void setup() throws Exception {
95+
// Initialize EventStatsManager for tests
96+
TestUtils.initializeEventStatsManager();
97+
}
9498

9599
@SneakyThrows
96100
public void testFromXContent_whenBuiltWithDefaults_thenBuildSuccessfully() {

src/test/java/org/opensearch/neuralsearch/query/NeuralSparseQueryBuilderTests.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
import org.opensearch.index.analysis.AnalyzerScope;
5050
import org.opensearch.index.analysis.IndexAnalyzers;
5151
import org.opensearch.index.analysis.NamedAnalyzer;
52+
import org.opensearch.neuralsearch.util.TestUtils;
5253
import org.opensearch.transport.client.Client;
5354
import org.opensearch.cluster.service.ClusterService;
5455
import org.opensearch.common.SetOnce;
@@ -87,8 +88,11 @@ public class NeuralSparseQueryBuilderTests extends OpenSearchTestCase {
8788
private static final Supplier<Map<String, Float>> QUERY_TOKENS_SUPPLIER = () -> Map.of("hello", 1.f, "world", 2.f);
8889

8990
@Before
90-
public void setupClusterServiceToCurrentVersion() {
91+
public void setup() {
9192
setUpClusterService(Version.CURRENT);
93+
94+
// Initialize EventStatsManager for tests
95+
TestUtils.initializeEventStatsManager();
9296
}
9397

9498
@SneakyThrows

0 commit comments

Comments
 (0)