Skip to content

Commit 301d6b5

Browse files
committed
Add stats tracking for semantic field
Signed-off-by: Bo Zhang <[email protected]>
1 parent 8068294 commit 301d6b5

File tree

11 files changed

+111
-19
lines changed

11 files changed

+111
-19
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
2222
- [Stats] Add stats for text embedding processor with different settings ([#1332](https://github.com/opensearch-project/neural-search/pull/1332))
2323
- Validate model id and analyzer should not be provided at the same time for the neural sparse query ([#1359](https://github.com/opensearch-project/neural-search/pull/1359))
2424
- [Stats] Add stats for score based and rank based normalization processors ([#1326](https://github.com/opensearch-project/neural-search/pull/1326))
25+
- [Stats] Add stats tracking for semantic field ([#1362](https://github.com/opensearch-project/neural-search/pull/1362))
2526

2627
### Bug Fixes
2728
- Fix score value as null for single shard when sorting is not done on score field ([#1277](https://github.com/opensearch-project/neural-search/pull/1277))

src/main/java/org/opensearch/neuralsearch/processor/semantic/SemanticFieldProcessor.java

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
import org.opensearch.neuralsearch.processor.chunker.Chunker;
2121
import org.opensearch.neuralsearch.processor.chunker.FixedTokenLengthChunker;
2222
import org.opensearch.neuralsearch.processor.dto.SemanticFieldInfo;
23+
import org.opensearch.neuralsearch.stats.events.EventStatName;
24+
import org.opensearch.neuralsearch.stats.events.EventStatsManager;
2325
import org.opensearch.neuralsearch.util.TokenWeightUtil;
2426
import org.opensearch.neuralsearch.util.prune.PruneType;
2527
import org.opensearch.neuralsearch.util.prune.PruneUtils;
@@ -118,6 +120,7 @@ public IngestDocument execute(IngestDocument ingestDocument) throws Exception {
118120
*/
119121
@Override
120122
public void execute(IngestDocument ingestDocument, BiConsumer<IngestDocument, Exception> handler) {
123+
EventStatsManager.increment(EventStatName.SEMANTIC_FIELD_PROCESSOR_EXECUTIONS);
121124
try {
122125
unflattenIngestDoc(ingestDocument);
123126
// Collect all the semantic field info based on the path of semantic fields found in the index mapping
@@ -173,7 +176,10 @@ private void process(
173176
) {
174177
setModelInfo(ingestDocument, semanticFieldInfoList);
175178

176-
chunk(ingestDocument, semanticFieldInfoList);
179+
boolean isChunked = chunk(ingestDocument, semanticFieldInfoList);
180+
if (isChunked) {
181+
EventStatsManager.increment(EventStatName.SEMANTIC_FIELD_PROCESSOR_CHUNKING_EXECUTIONS);
182+
}
177183

178184
generateAndSetEmbedding(ingestDocument, semanticFieldInfoList, handler);
179185
}
@@ -277,12 +283,13 @@ private List<SemanticFieldInfo> getSemanticFieldInfo(IngestDocument ingestDocume
277283
return semanticFieldInfos;
278284
}
279285

280-
private void chunk(@NonNull final IngestDocument ingestDocument, @NonNull final List<SemanticFieldInfo> semanticFieldInfoList) {
286+
private boolean chunk(@NonNull final IngestDocument ingestDocument, @NonNull final List<SemanticFieldInfo> semanticFieldInfoList) {
281287
final Map<String, Object> sourceAndMetadataMap = ingestDocument.getSourceAndMetadata();
282288
int maxTokenCount = getMaxTokenCount(sourceAndMetadataMap, environment.settings(), clusterService);
283-
289+
boolean isChunked = false;
284290
for (SemanticFieldInfo semanticFieldInfo : semanticFieldInfoList) {
285291
if (semanticFieldInfo.getChunkingEnabled()) {
292+
isChunked = true;
286293
if (semanticFieldInfo.getChunkers() == null || semanticFieldInfo.getChunkers().isEmpty()) {
287294
semanticFieldInfo.setChunkers(List.of(defaultTextChunker));
288295
}
@@ -294,6 +301,7 @@ private void chunk(@NonNull final IngestDocument ingestDocument, @NonNull final
294301
semanticFieldInfo.setChunks(List.of(semanticFieldInfo.getValue()));
295302
}
296303
}
304+
return isChunked;
297305
}
298306

299307
private void setChunkedText(@NonNull final IngestDocument ingestDocument, @NonNull final SemanticFieldInfo semanticFieldInfo) {
@@ -386,6 +394,7 @@ private void collectSemanticFieldInfo(
386394

387395
@Override
388396
public void subBatchExecute(List<IngestDocumentWrapper> ingestDocumentWrappers, Consumer<List<IngestDocumentWrapper>> handler) {
397+
EventStatsManager.increment(EventStatName.SEMANTIC_FIELD_PROCESSOR_EXECUTIONS);
389398
if (ingestDocumentWrappers == null || ingestDocumentWrappers.isEmpty()) {
390399
handler.accept(ingestDocumentWrappers);
391400
return;
@@ -451,15 +460,17 @@ private void batchProcess(
451460
@NonNull final Map<IngestDocumentWrapper, List<SemanticFieldInfo>> docToSemanticFieldInfoMap,
452461
@NonNull final Consumer<List<IngestDocumentWrapper>> handler
453462
) {
454-
463+
boolean isChunked = false;
455464
for (Map.Entry<IngestDocumentWrapper, List<SemanticFieldInfo>> entry : docToSemanticFieldInfoMap.entrySet()) {
456465
final IngestDocumentWrapper ingestDocumentWrapper = entry.getKey();
457466
final IngestDocument ingestDocument = entry.getKey().getIngestDocument();
458467
final List<SemanticFieldInfo> semanticFieldInfoList = entry.getValue();
459468
try {
460469
setModelInfo(ingestDocument, semanticFieldInfoList);
461470

462-
chunk(ingestDocument, semanticFieldInfoList);
471+
if (chunk(ingestDocument, semanticFieldInfoList)) {
472+
isChunked = true;
473+
}
463474
} catch (Exception e) {
464475
log.error(
465476
String.format(
@@ -476,6 +487,10 @@ private void batchProcess(
476487
}
477488
}
478489

490+
if (isChunked) {
491+
EventStatsManager.increment(EventStatName.SEMANTIC_FIELD_PROCESSOR_CHUNKING_EXECUTIONS);
492+
}
493+
479494
batchGenerateAndSetEmbedding(ingestDocumentWrappers, docToSemanticFieldInfoMap, handler);
480495
}
481496

src/main/java/org/opensearch/neuralsearch/query/NeuralQueryBuilder.java

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
import static org.opensearch.neuralsearch.common.MinClusterVersionUtil.isClusterOnOrAfterMinReqVersionForSemanticFieldType;
1818
import static org.opensearch.neuralsearch.common.VectorUtil.vectorAsListToArray;
1919
import static org.opensearch.neuralsearch.constants.MappingConstants.PATH_SEPARATOR;
20-
import static org.opensearch.neuralsearch.constants.SemanticFieldConstants.DEFAULT_SEMANTIC_INFO_FIELD_NAME_SUFFIX;
2120
import static org.opensearch.neuralsearch.constants.SemanticInfoFieldConstants.EMBEDDING_FIELD_NAME;
2221
import static org.opensearch.neuralsearch.constants.SemanticInfoFieldConstants.CHUNKS_FIELD_NAME;
2322
import static org.opensearch.neuralsearch.processor.TextImageEmbeddingProcessor.INPUT_IMAGE;
@@ -83,6 +82,8 @@
8382
import org.opensearch.neuralsearch.common.MinClusterVersionUtil;
8483
import org.opensearch.neuralsearch.mapper.SemanticFieldMapper;
8584
import org.opensearch.neuralsearch.query.dto.NeuralQueryBuildStage;
85+
import org.opensearch.neuralsearch.stats.events.EventStatName;
86+
import org.opensearch.neuralsearch.stats.events.EventStatsManager;
8687
import org.opensearch.neuralsearch.util.NeuralSearchClusterUtil;
8788

8889
import org.opensearch.neuralsearch.ml.MLCommonsClientAccessor;
@@ -551,6 +552,7 @@ protected void doXContent(XContentBuilder xContentBuilder, Params params) throws
551552
* @throws IOException can be thrown by parser
552553
*/
553554
public static NeuralQueryBuilder fromXContent(XContentParser parser) throws IOException {
555+
EventStatsManager.increment(EventStatName.NEURAL_QUERY_REQUESTS);
554556
final Builder builder = new Builder();
555557
if (parser.currentToken() != XContentParser.Token.START_OBJECT) {
556558
throw new ParsingException(parser.getTokenLocation(), "Token must be START_OBJECT");
@@ -708,6 +710,7 @@ private QueryBuilder rewriteQueryForSemanticField(@NonNull final NeuralQueryTarg
708710
final String chunksPath = targetFieldConfig.getChunksPath();
709711

710712
if (KNNVectorFieldMapper.CONTENT_TYPE.equals(embeddingFieldType)) {
713+
EventStatsManager.increment(EventStatName.NEURAL_QUERY_AGAINST_SEMANTIC_DENSE_REQUESTS);
711714
if (modelIdToVectorSupplierMap == null
712715
|| modelIdToVectorSupplierMap.get(searchModelId) == null
713716
|| modelIdToVectorSupplierMap.get(searchModelId).get() == null) {
@@ -723,6 +726,7 @@ private QueryBuilder rewriteQueryForSemanticField(@NonNull final NeuralQueryTarg
723726
return neuralKNNQueryBuilder;
724727
}
725728
} else if (RankFeaturesFieldMapper.CONTENT_TYPE.equals(embeddingFieldType)) {
729+
EventStatsManager.increment(EventStatName.NEURAL_QUERY_AGAINST_SEMANTIC_SPARSE_REQUESTS);
726730
Supplier<Map<String, Float>> queryTokensSupplier = queryTokensMapSupplier;
727731
// If the raw token is not provided or no search analyzer provided
728732
// then try to find the token generated by the ml model
@@ -798,7 +802,7 @@ private QueryBuilder rewriteQueryAgainstKnnField(QueryRewriteContext queryRewrit
798802
if (vectorSupplier().get() == null) {
799803
return this;
800804
}
801-
805+
EventStatsManager.increment(EventStatName.NEURAL_QUERY_AGAINST_KNN_REQUESTS);
802806
return createKNNQueryBuilder(fieldName(), vectorSupplier.get());
803807
}
804808

@@ -977,15 +981,6 @@ private String getErrorMessageWithBaseErrorForSemantic(@NonNull final String err
977981
return "Failed to rewrite the neural query against the semantic field " + fieldName + ". " + errorMessage;
978982
}
979983

980-
private String getSemanticInfoFieldPath(SemanticFieldMapper.SemanticFieldType semanticFieldType) {
981-
final String[] paths = semanticFieldType.name().split("\\.");
982-
final String semanticInfoFieldName = semanticFieldType.getSemanticParameters().getSemanticInfoFieldName();
983-
paths[paths.length - 1] = semanticInfoFieldName == null
984-
? paths[paths.length - 1] + DEFAULT_SEMANTIC_INFO_FIELD_NAME_SUFFIX
985-
: semanticInfoFieldName;
986-
return String.join(PATH_SEPARATOR, paths) + PATH_SEPARATOR + CHUNKS_FIELD_NAME;
987-
}
988-
989984
private QueryBuilder inferenceForSemanticField(
990985
@NonNull final QueryRewriteContext queryRewriteContext,
991986
@NonNull final Set<String> modelIdsFromTargetFields,

src/main/java/org/opensearch/neuralsearch/query/NeuralSparseQueryBuilder.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@
2929
import org.opensearch.OpenSearchException;
3030
import org.opensearch.Version;
3131
import org.opensearch.neuralsearch.processor.NeuralQueryEnricherProcessor;
32+
import org.opensearch.neuralsearch.stats.events.EventStatName;
33+
import org.opensearch.neuralsearch.stats.events.EventStatsManager;
3234
import org.opensearch.transport.client.Client;
3335
import org.opensearch.common.SetOnce;
3436
import org.opensearch.common.collect.Tuple;
@@ -256,6 +258,7 @@ protected void doXContent(XContentBuilder xContentBuilder, Params params) throws
256258
* @throws IOException can be thrown by parser
257259
*/
258260
public static NeuralSparseQueryBuilder fromXContent(XContentParser parser) throws IOException {
261+
EventStatsManager.increment(EventStatName.NEURAL_SPARSE_QUERY_REQUESTS);
259262
NeuralSparseQueryBuilder sparseEncodingQueryBuilder = new NeuralSparseQueryBuilder();
260263
if (parser.currentToken() != XContentParser.Token.START_OBJECT) {
261264
throw new ParsingException(parser.getTokenLocation(), "First token of " + NAME + "query must be START_OBJECT");

src/main/java/org/opensearch/neuralsearch/stats/events/EventStatName.java

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,12 @@ public enum EventStatName implements StatName {
3131
EventStatType.TIMESTAMPED_EVENT_COUNTER
3232
),
3333
TEXT_CHUNKING_DELIMITER_EXECUTIONS("text_chunking_delimiter_executions", "processors.ingest", EventStatType.TIMESTAMPED_EVENT_COUNTER),
34+
SEMANTIC_FIELD_PROCESSOR_EXECUTIONS("semantic_field_executions", "processors.ingest", EventStatType.TIMESTAMPED_EVENT_COUNTER),
35+
SEMANTIC_FIELD_PROCESSOR_CHUNKING_EXECUTIONS(
36+
"semantic_field_chunking_executions",
37+
"processors.ingest",
38+
EventStatType.TIMESTAMPED_EVENT_COUNTER
39+
),
3440
SEMANTIC_HIGHLIGHTING_REQUEST_COUNT(
3541
"semantic_highlighting_request_count",
3642
"semantic_highlighting",
@@ -59,7 +65,21 @@ public enum EventStatName implements StatName {
5965
HYBRID_QUERY_REQUESTS("hybrid_query_requests", "query.hybrid", EventStatType.TIMESTAMPED_EVENT_COUNTER),
6066
HYBRID_QUERY_INNER_HITS_REQUESTS("hybrid_query_with_inner_hits_requests", "query.hybrid", EventStatType.TIMESTAMPED_EVENT_COUNTER),
6167
HYBRID_QUERY_FILTER_REQUESTS("hybrid_query_with_filter_requests", "query.hybrid", EventStatType.TIMESTAMPED_EVENT_COUNTER),
62-
HYBRID_QUERY_PAGINATION_REQUESTS("hybrid_query_with_pagination_requests", "query.hybrid", EventStatType.TIMESTAMPED_EVENT_COUNTER),;
68+
HYBRID_QUERY_PAGINATION_REQUESTS("hybrid_query_with_pagination_requests", "query.hybrid", EventStatType.TIMESTAMPED_EVENT_COUNTER),
69+
// Neural query stats
70+
NEURAL_QUERY_REQUESTS("neural_query_requests", "query.neural", EventStatType.TIMESTAMPED_EVENT_COUNTER),
71+
NEURAL_QUERY_AGAINST_KNN_REQUESTS("neural_query_against_knn_requests", "query.neural", EventStatType.TIMESTAMPED_EVENT_COUNTER),
72+
NEURAL_QUERY_AGAINST_SEMANTIC_DENSE_REQUESTS(
73+
"neural_query_against_semantic_dense_requests",
74+
"query.neural",
75+
EventStatType.TIMESTAMPED_EVENT_COUNTER
76+
),
77+
NEURAL_QUERY_AGAINST_SEMANTIC_SPARSE_REQUESTS(
78+
"neural_query_against_semantic_sparse_requests",
79+
"query.neural",
80+
EventStatType.TIMESTAMPED_EVENT_COUNTER
81+
),
82+
NEURAL_SPARSE_QUERY_REQUESTS("neural_sparse_query_requests", "query.neural_sparse", EventStatType.TIMESTAMPED_EVENT_COUNTER);
6383

6484
private final String nameString;
6585
private final String path;

src/test/java/org/opensearch/neuralsearch/processor/semantic/SemanticFieldProcessorTests.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
import static org.opensearch.neuralsearch.constants.SemanticFieldConstants.MODEL_ID;
5656
import static org.opensearch.neuralsearch.constants.SemanticFieldConstants.SEMANTIC_FIELD_SEARCH_ANALYZER;
5757
import static org.opensearch.neuralsearch.processor.TextChunkingProcessorTests.getAnalysisRegistry;
58+
import org.opensearch.neuralsearch.util.TestUtils;
5859

5960
public class SemanticFieldProcessorTests extends OpenSearchTestCase {
6061
@Mock
@@ -81,6 +82,9 @@ public class SemanticFieldProcessorTests extends OpenSearchTestCase {
8182

8283
@Before
8384
public void setup() {
85+
// Initialize EventStatsManager for tests
86+
TestUtils.initializeEventStatsManager();
87+
8488
MockitoAnnotations.openMocks(this);
8589
// mock env
8690
final Settings settings = Settings.builder()

src/test/java/org/opensearch/neuralsearch/query/NeuralQueryBuilderRewriteTests.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
import org.opensearch.neuralsearch.processor.TextInferenceRequest;
3939
import org.opensearch.neuralsearch.util.NeuralSearchClusterTestUtils;
4040
import org.opensearch.neuralsearch.util.NeuralSearchClusterUtil;
41+
import org.opensearch.neuralsearch.util.TestUtils;
4142
import org.opensearch.test.OpenSearchTestCase;
4243
import org.opensearch.transport.client.Client;
4344

@@ -99,6 +100,8 @@ public class NeuralQueryBuilderRewriteTests extends OpenSearchTestCase {
99100
public void setup() throws Exception {
100101
mlClient = mock(MLCommonsClientAccessor.class);
101102
NeuralQueryBuilder.initialize(mlClient);
103+
// Initialize EventStatsManager for tests
104+
TestUtils.initializeEventStatsManager();
102105
}
103106

104107
@SneakyThrows

src/test/java/org/opensearch/neuralsearch/query/NeuralQueryBuilderTests.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@
6060
import org.opensearch.index.query.QueryShardContext;
6161
import org.opensearch.knn.index.query.rescore.RescoreContext;
6262
import org.opensearch.neuralsearch.common.MinClusterVersionUtil;
63+
import org.opensearch.neuralsearch.util.TestUtils;
6364
import org.opensearch.test.OpenSearchTestCase;
6465

6566
import lombok.SneakyThrows;
@@ -90,7 +91,10 @@ public class NeuralQueryBuilderTests extends OpenSearchTestCase {
9091
private static final QueryBuilder ADDITIONAL_TEST_FILTER = new TermQueryBuilder(TERM_QUERY_FIELD_NAME, TERM_QUERY_FIELD_VALUE);
9192

9293
@Before
93-
public void setup() throws Exception {}
94+
public void setup() throws Exception {
95+
// Initialize EventStatsManager for tests
96+
TestUtils.initializeEventStatsManager();
97+
}
9498

9599
@SneakyThrows
96100
public void testFromXContent_whenBuiltWithDefaults_thenBuildSuccessfully() {

src/test/java/org/opensearch/neuralsearch/query/NeuralQueryIT.java

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,14 @@
44
*/
55
package org.opensearch.neuralsearch.query;
66

7+
import static org.opensearch.neuralsearch.settings.NeuralSearchSettings.NEURAL_STATS_ENABLED;
78
import static org.opensearch.neuralsearch.util.TestUtils.DELTA_FOR_SCORE_ASSERTION;
89
import static org.opensearch.neuralsearch.util.TestUtils.TEST_DIMENSION;
910
import static org.opensearch.neuralsearch.util.TestUtils.TEST_SPACE_TYPE;
1011
import static org.opensearch.neuralsearch.util.TestUtils.createRandomVector;
1112
import static org.opensearch.neuralsearch.util.TestUtils.objectToFloat;
1213

14+
import java.util.ArrayList;
1315
import java.util.Collections;
1416
import java.util.List;
1517
import java.util.Map;
@@ -24,6 +26,7 @@
2426
import com.google.common.primitives.Floats;
2527

2628
import lombok.SneakyThrows;
29+
import org.opensearch.neuralsearch.stats.events.EventStatName;
2730

2831
public class NeuralQueryIT extends BaseNeuralSearchIT {
2932
private static final String TEST_BASIC_INDEX_NAME = "test-neural-basic-index";
@@ -98,6 +101,8 @@ public void setUp() throws Exception {
98101
*/
99102
@SneakyThrows
100103
public void testQueryWithBoostAndImageQueryAndRadialQuery() {
104+
// Enable stats for the test
105+
updateClusterSettings(NEURAL_STATS_ENABLED.getKey(), true);
101106
String modelId = null;
102107
initializeIndexIfNotExist(TEST_BASIC_INDEX_NAME);
103108
modelId = prepareModel();
@@ -174,6 +179,24 @@ public void testQueryWithBoostAndImageQueryAndRadialQuery() {
174179
objectToFloat(firstInnerHitWithMinScoreQuery.get("_score")),
175180
DELTA_FOR_SCORE_ASSERTION
176181
);
182+
183+
// Get stats
184+
String responseBody = executeNeuralStatRequest(new ArrayList<>(), new ArrayList<>());
185+
Map<String, Object> allNodesStats = parseAggregatedNodeStatsResponse(responseBody);
186+
187+
// Parse json to get stats
188+
assertEquals(
189+
"Stats should contain the expected number of neural_query_against_knn_requests",
190+
4,
191+
getNestedValue(allNodesStats, EventStatName.NEURAL_QUERY_AGAINST_KNN_REQUESTS)
192+
);
193+
assertEquals(
194+
"Stats should contain the expected number of neural_query_requests",
195+
4,
196+
getNestedValue(allNodesStats, EventStatName.NEURAL_QUERY_REQUESTS)
197+
);
198+
// Disable stats to not impact other tests
199+
updateClusterSettings(NEURAL_STATS_ENABLED.getKey(), false);
177200
}
178201

179202
/**

src/test/java/org/opensearch/neuralsearch/query/NeuralSparseQueryBuilderTests.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
import org.opensearch.index.analysis.AnalyzerScope;
5050
import org.opensearch.index.analysis.IndexAnalyzers;
5151
import org.opensearch.index.analysis.NamedAnalyzer;
52+
import org.opensearch.neuralsearch.util.TestUtils;
5253
import org.opensearch.transport.client.Client;
5354
import org.opensearch.cluster.service.ClusterService;
5455
import org.opensearch.common.SetOnce;
@@ -87,8 +88,11 @@ public class NeuralSparseQueryBuilderTests extends OpenSearchTestCase {
8788
private static final Supplier<Map<String, Float>> QUERY_TOKENS_SUPPLIER = () -> Map.of("hello", 1.f, "world", 2.f);
8889

8990
@Before
90-
public void setupClusterServiceToCurrentVersion() {
91+
public void setup() {
9192
setUpClusterService(Version.CURRENT);
93+
94+
// Initialize EventStatsManager for tests
95+
TestUtils.initializeEventStatsManager();
9296
}
9397

9498
@SneakyThrows

0 commit comments

Comments
 (0)