Skip to content

Commit 45b6130

Browse files
committed
Add stats tracking for semantic field
Signed-off-by: Bo Zhang <[email protected]>
1 parent 981411f commit 45b6130

File tree

9 files changed

+68
-19
lines changed

9 files changed

+68
-19
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
1919
- Support custom weights in RRF normalization processor ([#1322](https://github.com/opensearch-project/neural-search/pull/1322))
2020
- [Stats] Add stats tracking for semantic highlighting ([#1327](https://github.com/opensearch-project/neural-search/pull/1327))
2121
- [Stats] Add stats for text embedding processor with different settings ([#1332](https://github.com/opensearch-project/neural-search/pull/1332))
22+
- [Stats] Add stats tracking for semantic field ([#1362](https://github.com/opensearch-project/neural-search/pull/1362))
23+
2224
### Bug Fixes
2325
- Fix score value as null for single shard when sorting is not done on score field ([#1277](https://github.com/opensearch-project/neural-search/pull/1277))
2426
- Return bad request for stats API calls with invalid stat names instead of ignoring them ([#1291](https://github.com/opensearch-project/neural-search/pull/1291))

src/main/java/org/opensearch/neuralsearch/processor/semantic/SemanticFieldProcessor.java

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
import org.opensearch.neuralsearch.processor.chunker.Chunker;
2121
import org.opensearch.neuralsearch.processor.chunker.FixedTokenLengthChunker;
2222
import org.opensearch.neuralsearch.processor.dto.SemanticFieldInfo;
23+
import org.opensearch.neuralsearch.stats.events.EventStatName;
24+
import org.opensearch.neuralsearch.stats.events.EventStatsManager;
2325
import org.opensearch.neuralsearch.util.TokenWeightUtil;
2426
import org.opensearch.neuralsearch.util.prune.PruneType;
2527
import org.opensearch.neuralsearch.util.prune.PruneUtils;
@@ -118,6 +120,7 @@ public IngestDocument execute(IngestDocument ingestDocument) throws Exception {
118120
*/
119121
@Override
120122
public void execute(IngestDocument ingestDocument, BiConsumer<IngestDocument, Exception> handler) {
123+
EventStatsManager.increment(EventStatName.SEMANTIC_FIELD_PROCESSOR_EXECUTIONS);
121124
try {
122125
unflattenIngestDoc(ingestDocument);
123126
// Collect all the semantic field info based on the path of semantic fields found in the index mapping
@@ -173,7 +176,10 @@ private void process(
173176
) {
174177
setModelInfo(ingestDocument, semanticFieldInfoList);
175178

176-
chunk(ingestDocument, semanticFieldInfoList);
179+
boolean shouldRecordChunking = chunk(ingestDocument, semanticFieldInfoList);
180+
if (shouldRecordChunking) {
181+
EventStatsManager.increment(EventStatName.SEMANTIC_FIELD_PROCESSOR_CHUNKING_EXECUTIONS);
182+
}
177183

178184
generateAndSetEmbedding(ingestDocument, semanticFieldInfoList, handler);
179185
}
@@ -277,12 +283,13 @@ private List<SemanticFieldInfo> getSemanticFieldInfo(IngestDocument ingestDocume
277283
return semanticFieldInfos;
278284
}
279285

280-
private void chunk(@NonNull final IngestDocument ingestDocument, @NonNull final List<SemanticFieldInfo> semanticFieldInfoList) {
286+
private boolean chunk(@NonNull final IngestDocument ingestDocument, @NonNull final List<SemanticFieldInfo> semanticFieldInfoList) {
281287
final Map<String, Object> sourceAndMetadataMap = ingestDocument.getSourceAndMetadata();
282288
int maxTokenCount = getMaxTokenCount(sourceAndMetadataMap, environment.settings(), clusterService);
283-
289+
boolean shouldRecordChunking = false;
284290
for (SemanticFieldInfo semanticFieldInfo : semanticFieldInfoList) {
285291
if (semanticFieldInfo.getChunkingEnabled()) {
292+
shouldRecordChunking = true;
286293
if (semanticFieldInfo.getChunkers() == null || semanticFieldInfo.getChunkers().isEmpty()) {
287294
semanticFieldInfo.setChunkers(List.of(defaultTextChunker));
288295
}
@@ -294,6 +301,7 @@ private void chunk(@NonNull final IngestDocument ingestDocument, @NonNull final
294301
semanticFieldInfo.setChunks(List.of(semanticFieldInfo.getValue()));
295302
}
296303
}
304+
return shouldRecordChunking;
297305
}
298306

299307
private void setChunkedText(@NonNull final IngestDocument ingestDocument, @NonNull final SemanticFieldInfo semanticFieldInfo) {
@@ -386,6 +394,7 @@ private void collectSemanticFieldInfo(
386394

387395
@Override
388396
public void subBatchExecute(List<IngestDocumentWrapper> ingestDocumentWrappers, Consumer<List<IngestDocumentWrapper>> handler) {
397+
EventStatsManager.increment(EventStatName.SEMANTIC_FIELD_PROCESSOR_EXECUTIONS);
389398
if (ingestDocumentWrappers == null || ingestDocumentWrappers.isEmpty()) {
390399
handler.accept(ingestDocumentWrappers);
391400
return;
@@ -451,15 +460,17 @@ private void batchProcess(
451460
@NonNull final Map<IngestDocumentWrapper, List<SemanticFieldInfo>> docToSemanticFieldInfoMap,
452461
@NonNull final Consumer<List<IngestDocumentWrapper>> handler
453462
) {
454-
463+
boolean shouldRecordChunking = false;
455464
for (Map.Entry<IngestDocumentWrapper, List<SemanticFieldInfo>> entry : docToSemanticFieldInfoMap.entrySet()) {
456465
final IngestDocumentWrapper ingestDocumentWrapper = entry.getKey();
457466
final IngestDocument ingestDocument = entry.getKey().getIngestDocument();
458467
final List<SemanticFieldInfo> semanticFieldInfoList = entry.getValue();
459468
try {
460469
setModelInfo(ingestDocument, semanticFieldInfoList);
461470

462-
chunk(ingestDocument, semanticFieldInfoList);
471+
if (chunk(ingestDocument, semanticFieldInfoList)) {
472+
shouldRecordChunking = true;
473+
}
463474
} catch (Exception e) {
464475
log.error(
465476
String.format(
@@ -476,6 +487,10 @@ private void batchProcess(
476487
}
477488
}
478489

490+
if (shouldRecordChunking) {
491+
EventStatsManager.increment(EventStatName.SEMANTIC_FIELD_PROCESSOR_CHUNKING_EXECUTIONS);
492+
}
493+
479494
batchGenerateAndSetEmbedding(ingestDocumentWrappers, docToSemanticFieldInfoMap, handler);
480495
}
481496

src/main/java/org/opensearch/neuralsearch/query/NeuralQueryBuilder.java

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
import static org.opensearch.neuralsearch.common.MinClusterVersionUtil.isClusterOnOrAfterMinReqVersionForSemanticFieldType;
1818
import static org.opensearch.neuralsearch.common.VectorUtil.vectorAsListToArray;
1919
import static org.opensearch.neuralsearch.constants.MappingConstants.PATH_SEPARATOR;
20-
import static org.opensearch.neuralsearch.constants.SemanticFieldConstants.DEFAULT_SEMANTIC_INFO_FIELD_NAME_SUFFIX;
2120
import static org.opensearch.neuralsearch.constants.SemanticInfoFieldConstants.EMBEDDING_FIELD_NAME;
2221
import static org.opensearch.neuralsearch.constants.SemanticInfoFieldConstants.CHUNKS_FIELD_NAME;
2322
import static org.opensearch.neuralsearch.processor.TextImageEmbeddingProcessor.INPUT_IMAGE;
@@ -83,6 +82,8 @@
8382
import org.opensearch.neuralsearch.common.MinClusterVersionUtil;
8483
import org.opensearch.neuralsearch.mapper.SemanticFieldMapper;
8584
import org.opensearch.neuralsearch.query.dto.NeuralQueryBuildStage;
85+
import org.opensearch.neuralsearch.stats.events.EventStatName;
86+
import org.opensearch.neuralsearch.stats.events.EventStatsManager;
8687
import org.opensearch.neuralsearch.util.NeuralSearchClusterUtil;
8788

8889
import org.opensearch.neuralsearch.ml.MLCommonsClientAccessor;
@@ -551,6 +552,7 @@ protected void doXContent(XContentBuilder xContentBuilder, Params params) throws
551552
* @throws IOException can be thrown by parser
552553
*/
553554
public static NeuralQueryBuilder fromXContent(XContentParser parser) throws IOException {
555+
EventStatsManager.increment(EventStatName.NEURAL_QUERY_REQUESTS);
554556
final Builder builder = new Builder();
555557
if (parser.currentToken() != XContentParser.Token.START_OBJECT) {
556558
throw new ParsingException(parser.getTokenLocation(), "Token must be START_OBJECT");
@@ -707,6 +709,7 @@ private QueryBuilder rewriteQueryForSemanticField(@NonNull final NeuralQueryTarg
707709
final String chunksPath = targetFieldConfig.getChunksPath();
708710

709711
if (KNNVectorFieldMapper.CONTENT_TYPE.equals(embeddingFieldType)) {
712+
EventStatsManager.increment(EventStatName.NEURAL_QUERY_AGAINST_SEMANTIC_DENSE_REQUESTS);
710713
if (modelIdToVectorSupplierMap == null
711714
|| modelIdToVectorSupplierMap.get(searchModelId) == null
712715
|| modelIdToVectorSupplierMap.get(searchModelId).get() == null) {
@@ -722,6 +725,7 @@ private QueryBuilder rewriteQueryForSemanticField(@NonNull final NeuralQueryTarg
722725
return neuralKNNQueryBuilder;
723726
}
724727
} else if (RankFeaturesFieldMapper.CONTENT_TYPE.equals(embeddingFieldType)) {
728+
EventStatsManager.increment(EventStatName.NEURAL_QUERY_AGAINST_SEMANTIC_SPARSE_REQUESTS);
725729
Supplier<Map<String, Float>> queryTokensSupplier = queryTokensMapSupplier;
726730
// If the raw token is not provided or no search analyzer provided
727731
// then try to find the token generated by the ml model
@@ -797,7 +801,7 @@ private QueryBuilder rewriteQueryAgainstKnnField(QueryRewriteContext queryRewrit
797801
if (vectorSupplier().get() == null) {
798802
return this;
799803
}
800-
804+
EventStatsManager.increment(EventStatName.NEURAL_QUERY_AGAINST_DENSE_REQUESTS);
801805
return createKNNQueryBuilder(fieldName(), vectorSupplier.get());
802806
}
803807

@@ -961,15 +965,6 @@ private String getErrorMessageWithBaseErrorForSemantic(@NonNull final String err
961965
return "Failed to rewrite the neural query against the semantic field " + fieldName + ". " + errorMessage;
962966
}
963967

964-
private String getSemanticInfoFieldPath(SemanticFieldMapper.SemanticFieldType semanticFieldType) {
965-
final String[] paths = semanticFieldType.name().split("\\.");
966-
final String semanticInfoFieldName = semanticFieldType.getSemanticParameters().getSemanticInfoFieldName();
967-
paths[paths.length - 1] = semanticInfoFieldName == null
968-
? paths[paths.length - 1] + DEFAULT_SEMANTIC_INFO_FIELD_NAME_SUFFIX
969-
: semanticInfoFieldName;
970-
return String.join(PATH_SEPARATOR, paths) + PATH_SEPARATOR + CHUNKS_FIELD_NAME;
971-
}
972-
973968
private QueryBuilder inferenceForSemanticField(
974969
@NonNull final QueryRewriteContext queryRewriteContext,
975970
@NonNull final Set<String> modelIdsFromTargetFields,

src/main/java/org/opensearch/neuralsearch/query/NeuralSparseQueryBuilder.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@
2828
import org.apache.lucene.search.Query;
2929
import org.opensearch.OpenSearchException;
3030
import org.opensearch.Version;
31+
import org.opensearch.neuralsearch.stats.events.EventStatName;
32+
import org.opensearch.neuralsearch.stats.events.EventStatsManager;
3133
import org.opensearch.transport.client.Client;
3234
import org.opensearch.common.SetOnce;
3335
import org.opensearch.common.collect.Tuple;
@@ -250,6 +252,7 @@ protected void doXContent(XContentBuilder xContentBuilder, Params params) throws
250252
* @throws IOException can be thrown by parser
251253
*/
252254
public static NeuralSparseQueryBuilder fromXContent(XContentParser parser) throws IOException {
255+
EventStatsManager.increment(EventStatName.NEURAL_SPARSE_QUERY_REQUESTS);
253256
NeuralSparseQueryBuilder sparseEncodingQueryBuilder = new NeuralSparseQueryBuilder();
254257
if (parser.currentToken() != XContentParser.Token.START_OBJECT) {
255258
throw new ParsingException(parser.getTokenLocation(), "First token of " + NAME + "query must be START_OBJECT");

src/main/java/org/opensearch/neuralsearch/stats/events/EventStatName.java

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,30 @@ public enum EventStatName implements StatName {
3131
EventStatType.TIMESTAMPED_EVENT_COUNTER
3232
),
3333
TEXT_CHUNKING_DELIMITER_EXECUTIONS("text_chunking_delimiter_executions", "processors.ingest", EventStatType.TIMESTAMPED_EVENT_COUNTER),
34+
SEMANTIC_FIELD_PROCESSOR_EXECUTIONS("semantic_field_executions", "processors.ingest", EventStatType.TIMESTAMPED_EVENT_COUNTER),
35+
SEMANTIC_FIELD_PROCESSOR_CHUNKING_EXECUTIONS(
36+
"semantic_field_chunking_executions",
37+
"processors.ingest",
38+
EventStatType.TIMESTAMPED_EVENT_COUNTER
39+
),
3440
SEMANTIC_HIGHLIGHTING_REQUEST_COUNT(
3541
"semantic_highlighting_request_count",
3642
"semantic_highlighting",
3743
EventStatType.TIMESTAMPED_EVENT_COUNTER
38-
);
44+
),
45+
NEURAL_QUERY_REQUESTS("neural_query_requests", "query.neural", EventStatType.TIMESTAMPED_EVENT_COUNTER),
46+
NEURAL_QUERY_AGAINST_DENSE_REQUESTS("neural_query_against_dense_requests", "query.neural", EventStatType.TIMESTAMPED_EVENT_COUNTER),
47+
NEURAL_QUERY_AGAINST_SEMANTIC_DENSE_REQUESTS(
48+
"neural_query_against_semantic_dense_requests",
49+
"query.neural",
50+
EventStatType.TIMESTAMPED_EVENT_COUNTER
51+
),
52+
NEURAL_QUERY_AGAINST_SEMANTIC_SPARSE_REQUESTS(
53+
"neural_query_against_semantic_sparse_requests",
54+
"query.neural",
55+
EventStatType.TIMESTAMPED_EVENT_COUNTER
56+
),
57+
NEURAL_SPARSE_QUERY_REQUESTS("neural_sparse_query_requests", "query.neural_sparse", EventStatType.TIMESTAMPED_EVENT_COUNTER);
3958

4059
private final String nameString;
4160
private final String path;

src/test/java/org/opensearch/neuralsearch/processor/semantic/SemanticFieldProcessorTests.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@
5454
import static org.opensearch.neuralsearch.constants.SemanticFieldConstants.CHUNKING;
5555
import static org.opensearch.neuralsearch.constants.SemanticFieldConstants.MODEL_ID;
5656
import static org.opensearch.neuralsearch.processor.TextChunkingProcessorTests.getAnalysisRegistry;
57+
import org.opensearch.neuralsearch.util.TestUtils;
5758

5859
public class SemanticFieldProcessorTests extends OpenSearchTestCase {
5960
@Mock
@@ -80,6 +81,9 @@ public class SemanticFieldProcessorTests extends OpenSearchTestCase {
8081

8182
@Before
8283
public void setup() {
84+
// Initialize EventStatsManager for tests
85+
TestUtils.initializeEventStatsManager();
86+
8387
MockitoAnnotations.openMocks(this);
8488
// mock env
8589
final Settings settings = Settings.builder()

src/test/java/org/opensearch/neuralsearch/query/NeuralQueryBuilderRewriteTests.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
import org.opensearch.neuralsearch.processor.TextInferenceRequest;
3939
import org.opensearch.neuralsearch.util.NeuralSearchClusterTestUtils;
4040
import org.opensearch.neuralsearch.util.NeuralSearchClusterUtil;
41+
import org.opensearch.neuralsearch.util.TestUtils;
4142
import org.opensearch.test.OpenSearchTestCase;
4243
import org.opensearch.transport.client.Client;
4344

@@ -99,6 +100,8 @@ public class NeuralQueryBuilderRewriteTests extends OpenSearchTestCase {
99100
public void setup() throws Exception {
100101
mlClient = mock(MLCommonsClientAccessor.class);
101102
NeuralQueryBuilder.initialize(mlClient);
103+
// Initialize EventStatsManager for tests
104+
TestUtils.initializeEventStatsManager();
102105
}
103106

104107
@SneakyThrows

src/test/java/org/opensearch/neuralsearch/query/NeuralQueryBuilderTests.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@
6060
import org.opensearch.index.query.QueryShardContext;
6161
import org.opensearch.knn.index.query.rescore.RescoreContext;
6262
import org.opensearch.neuralsearch.common.MinClusterVersionUtil;
63+
import org.opensearch.neuralsearch.util.TestUtils;
6364
import org.opensearch.test.OpenSearchTestCase;
6465

6566
import lombok.SneakyThrows;
@@ -90,7 +91,10 @@ public class NeuralQueryBuilderTests extends OpenSearchTestCase {
9091
private static final QueryBuilder ADDITIONAL_TEST_FILTER = new TermQueryBuilder(TERM_QUERY_FIELD_NAME, TERM_QUERY_FIELD_VALUE);
9192

9293
@Before
93-
public void setup() throws Exception {}
94+
public void setup() throws Exception {
95+
// Initialize EventStatsManager for tests
96+
TestUtils.initializeEventStatsManager();
97+
}
9498

9599
@SneakyThrows
96100
public void testFromXContent_whenBuiltWithDefaults_thenBuildSuccessfully() {

src/test/java/org/opensearch/neuralsearch/query/NeuralSparseQueryBuilderTests.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
import org.opensearch.index.analysis.AnalyzerScope;
5050
import org.opensearch.index.analysis.IndexAnalyzers;
5151
import org.opensearch.index.analysis.NamedAnalyzer;
52+
import org.opensearch.neuralsearch.util.TestUtils;
5253
import org.opensearch.transport.client.Client;
5354
import org.opensearch.cluster.service.ClusterService;
5455
import org.opensearch.common.SetOnce;
@@ -87,8 +88,11 @@ public class NeuralSparseQueryBuilderTests extends OpenSearchTestCase {
8788
private static final Supplier<Map<String, Float>> QUERY_TOKENS_SUPPLIER = () -> Map.of("hello", 1.f, "world", 2.f);
8889

8990
@Before
90-
public void setupClusterServiceToCurrentVersion() {
91+
public void setup() {
9192
setUpClusterService(Version.CURRENT);
93+
94+
// Initialize EventStatsManager for tests
95+
TestUtils.initializeEventStatsManager();
9296
}
9397

9498
@SneakyThrows

0 commit comments

Comments
 (0)