Skip to content

Commit 331f29e

Browse files
[Star Tree] Lucene Abstractions for Star Tree File Formats (#15278) (#15436)
--------- Signed-off-by: Sarthak Aggarwal <[email protected]> (cherry picked from commit 9e5604b) Signed-off-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
1 parent 4241c13 commit 331f29e

17 files changed

+588
-7
lines changed
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
*
4+
* The OpenSearch Contributors require contributions made to
5+
* this file be licensed under the Apache-2.0 license or a
6+
* compatible open source license.
7+
*/
8+
9+
package org.apache.lucene.codecs.lucene90;
10+
11+
import org.apache.lucene.codecs.DocValuesConsumer;
12+
import org.apache.lucene.index.SegmentWriteState;
13+
14+
import java.io.Closeable;
15+
import java.io.IOException;
16+
17+
/**
18+
* This class is an abstraction of the {@link DocValuesConsumer} for the Star Tree index structure.
19+
* It is responsible to consume various types of document values (numeric, binary, sorted, sorted numeric,
20+
* and sorted set) for fields in the Star Tree index.
21+
*
22+
* @opensearch.experimental
23+
*/
24+
public class Lucene90DocValuesConsumerWrapper implements Closeable {
25+
26+
private final Lucene90DocValuesConsumer lucene90DocValuesConsumer;
27+
28+
public Lucene90DocValuesConsumerWrapper(
29+
SegmentWriteState state,
30+
String dataCodec,
31+
String dataExtension,
32+
String metaCodec,
33+
String metaExtension
34+
) throws IOException {
35+
lucene90DocValuesConsumer = new Lucene90DocValuesConsumer(state, dataCodec, dataExtension, metaCodec, metaExtension);
36+
}
37+
38+
public Lucene90DocValuesConsumer getLucene90DocValuesConsumer() {
39+
return lucene90DocValuesConsumer;
40+
}
41+
42+
@Override
43+
public void close() throws IOException {
44+
lucene90DocValuesConsumer.close();
45+
}
46+
}
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
*
4+
* The OpenSearch Contributors require contributions made to
5+
* this file be licensed under the Apache-2.0 license or a
6+
* compatible open source license.
7+
*/
8+
9+
package org.apache.lucene.codecs.lucene90;
10+
11+
import org.apache.lucene.codecs.DocValuesProducer;
12+
import org.apache.lucene.index.SegmentReadState;
13+
14+
import java.io.Closeable;
15+
import java.io.IOException;
16+
17+
/**
18+
* This class is a custom abstraction of the {@link DocValuesProducer} for the Star Tree index structure.
19+
* It is responsible for providing access to various types of document values (numeric, binary, sorted, sorted numeric,
20+
* and sorted set) for fields in the Star Tree index.
21+
*
22+
* @opensearch.experimental
23+
*/
24+
public class Lucene90DocValuesProducerWrapper implements Closeable {
25+
26+
private final Lucene90DocValuesProducer lucene90DocValuesProducer;
27+
28+
public Lucene90DocValuesProducerWrapper(
29+
SegmentReadState state,
30+
String dataCodec,
31+
String dataExtension,
32+
String metaCodec,
33+
String metaExtension
34+
) throws IOException {
35+
lucene90DocValuesProducer = new Lucene90DocValuesProducer(state, dataCodec, dataExtension, metaCodec, metaExtension);
36+
}
37+
38+
public DocValuesProducer getLucene90DocValuesProducer() {
39+
return lucene90DocValuesProducer;
40+
}
41+
42+
@Override
43+
public void close() throws IOException {
44+
lucene90DocValuesProducer.close();
45+
}
46+
}
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
*
4+
* The OpenSearch Contributors require contributions made to
5+
* this file be licensed under the Apache-2.0 license or a
6+
* compatible open source license.
7+
*/
8+
9+
package org.apache.lucene.index;
10+
11+
import org.apache.lucene.util.Counter;
12+
13+
/**
14+
* A wrapper class for writing sorted numeric doc values.
15+
* <p>
16+
* This class provides a convenient way to add sorted numeric doc values to a field
17+
* and retrieve the corresponding {@link SortedNumericDocValues} instance.
18+
*
19+
* @opensearch.experimental
20+
*/
21+
public class SortedNumericDocValuesWriterWrapper {
22+
23+
private final SortedNumericDocValuesWriter sortedNumericDocValuesWriter;
24+
25+
/**
26+
* Sole constructor. Constructs a new {@link SortedNumericDocValuesWriterWrapper} instance.
27+
*
28+
* @param fieldInfo the field information for the field being written
29+
* @param counter a counter for tracking memory usage
30+
*/
31+
public SortedNumericDocValuesWriterWrapper(FieldInfo fieldInfo, Counter counter) {
32+
sortedNumericDocValuesWriter = new SortedNumericDocValuesWriter(fieldInfo, counter);
33+
}
34+
35+
/**
36+
* Adds a value to the sorted numeric doc values for the specified document.
37+
*
38+
* @param docID the document ID
39+
* @param value the value to add
40+
*/
41+
public void addValue(int docID, long value) {
42+
sortedNumericDocValuesWriter.addValue(docID, value);
43+
}
44+
45+
/**
46+
* Returns the {@link SortedNumericDocValues} instance containing the sorted numeric doc values
47+
*
48+
* @return the {@link SortedNumericDocValues} instance
49+
*/
50+
public SortedNumericDocValues getDocValues() {
51+
return sortedNumericDocValuesWriter.getDocValues();
52+
}
53+
}

server/src/main/java/org/opensearch/index/codec/composite/CompositeCodecFactory.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import org.apache.lucene.codecs.Codec;
1313
import org.apache.lucene.codecs.lucene99.Lucene99Codec;
1414
import org.opensearch.common.annotation.ExperimentalApi;
15+
import org.opensearch.index.codec.composite.composite99.Composite99Codec;
1516
import org.opensearch.index.mapper.MapperService;
1617

1718
import java.util.HashMap;
@@ -29,6 +30,10 @@
2930
*/
3031
@ExperimentalApi
3132
public class CompositeCodecFactory {
33+
34+
// we can use this to track the latest composite codec
35+
public static final String COMPOSITE_CODEC = Composite99Codec.COMPOSITE_INDEX_CODEC_NAME;
36+
3237
public CompositeCodecFactory() {}
3338

3439
public Map<String, Codec> getCompositeIndexCodecs(MapperService mapperService, Logger logger) {
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
*
4+
* The OpenSearch Contributors require contributions made to
5+
* this file be licensed under the Apache-2.0 license or a
6+
* compatible open source license.
7+
*/
8+
9+
package org.opensearch.index.codec.composite;
10+
11+
import org.apache.lucene.codecs.DocValuesConsumer;
12+
import org.apache.lucene.codecs.lucene90.Lucene90DocValuesConsumerWrapper;
13+
import org.apache.lucene.index.SegmentWriteState;
14+
15+
import java.io.IOException;
16+
17+
/**
18+
* A factory class that provides a factory method for creating {@link DocValuesConsumer} instances
19+
* for the latest composite codec.
20+
* <p>
21+
* The segments are written using the latest composite codec. The codec
22+
* internally manages calling the appropriate consumer factory for its abstractions.
23+
* <p>
24+
* This design ensures forward compatibility for writing operations
25+
*
26+
* @opensearch.experimental
27+
*/
28+
public class LuceneDocValuesConsumerFactory {
29+
30+
public static DocValuesConsumer getDocValuesConsumerForCompositeCodec(
31+
SegmentWriteState state,
32+
String dataCodec,
33+
String dataExtension,
34+
String metaCodec,
35+
String metaExtension
36+
) throws IOException {
37+
try (
38+
Lucene90DocValuesConsumerWrapper lucene90DocValuesConsumerWrapper = new Lucene90DocValuesConsumerWrapper(
39+
state,
40+
dataCodec,
41+
dataExtension,
42+
metaCodec,
43+
metaExtension
44+
)
45+
) {
46+
return lucene90DocValuesConsumerWrapper.getLucene90DocValuesConsumer();
47+
}
48+
}
49+
50+
}
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
*
4+
* The OpenSearch Contributors require contributions made to
5+
* this file be licensed under the Apache-2.0 license or a
6+
* compatible open source license.
7+
*/
8+
9+
package org.opensearch.index.codec.composite;
10+
11+
import org.apache.lucene.codecs.DocValuesProducer;
12+
import org.apache.lucene.codecs.lucene90.Lucene90DocValuesProducerWrapper;
13+
import org.apache.lucene.index.SegmentReadState;
14+
import org.opensearch.index.codec.composite.composite99.Composite99Codec;
15+
16+
import java.io.IOException;
17+
18+
/**
19+
* A factory class that provides a factory method for creating {@link DocValuesProducer} instances
20+
* based on the specified composite codec.
21+
* <p>
22+
* In producers, we want to ensure compatibility with older codec versions during the segment reads.
23+
* This approach allows for writing with only the latest codec while maintaining
24+
* the ability to read data encoded with any codec version present in the segment.
25+
* <p>
26+
* This design ensures backward compatibility for reads across different codec versions.
27+
*
28+
* @opensearch.experimental
29+
*/
30+
public class LuceneDocValuesProducerFactory {
31+
32+
public static DocValuesProducer getDocValuesProducerForCompositeCodec(
33+
String compositeCodec,
34+
SegmentReadState state,
35+
String dataCodec,
36+
String dataExtension,
37+
String metaCodec,
38+
String metaExtension
39+
) throws IOException {
40+
41+
switch (compositeCodec) {
42+
case Composite99Codec.COMPOSITE_INDEX_CODEC_NAME:
43+
try (
44+
Lucene90DocValuesProducerWrapper lucene90DocValuesProducerWrapper = new Lucene90DocValuesProducerWrapper(
45+
state,
46+
dataCodec,
47+
dataExtension,
48+
metaCodec,
49+
metaExtension
50+
)
51+
) {
52+
return lucene90DocValuesProducerWrapper.getLucene90DocValuesProducer();
53+
}
54+
default:
55+
throw new IllegalStateException("Invalid composite codec " + "[" + compositeCodec + "]");
56+
}
57+
58+
}
59+
60+
}

server/src/main/java/org/opensearch/index/codec/composite/Composite99Codec.java renamed to server/src/main/java/org/opensearch/index/codec/composite/composite99/Composite99Codec.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
* compatible open source license.
77
*/
88

9-
package org.opensearch.index.codec.composite;
9+
package org.opensearch.index.codec.composite.composite99;
1010

1111
import org.apache.logging.log4j.Logger;
1212
import org.apache.lucene.codecs.Codec;

server/src/main/java/org/opensearch/index/codec/composite/Composite99DocValuesFormat.java renamed to server/src/main/java/org/opensearch/index/codec/composite/composite99/Composite99DocValuesFormat.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
* compatible open source license.
77
*/
88

9-
package org.opensearch.index.codec.composite;
9+
package org.opensearch.index.codec.composite.composite99;
1010

1111
import org.apache.lucene.codecs.DocValuesConsumer;
1212
import org.apache.lucene.codecs.DocValuesFormat;

server/src/main/java/org/opensearch/index/codec/composite/Composite99DocValuesReader.java renamed to server/src/main/java/org/opensearch/index/codec/composite/composite99/Composite99DocValuesReader.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
* compatible open source license.
77
*/
88

9-
package org.opensearch.index.codec.composite;
9+
package org.opensearch.index.codec.composite.composite99;
1010

1111
import org.apache.lucene.codecs.DocValuesProducer;
1212
import org.apache.lucene.index.BinaryDocValues;
@@ -17,6 +17,9 @@
1717
import org.apache.lucene.index.SortedNumericDocValues;
1818
import org.apache.lucene.index.SortedSetDocValues;
1919
import org.opensearch.common.annotation.ExperimentalApi;
20+
import org.opensearch.index.codec.composite.CompositeIndexFieldInfo;
21+
import org.opensearch.index.codec.composite.CompositeIndexReader;
22+
import org.opensearch.index.codec.composite.CompositeIndexValues;
2023

2124
import java.io.IOException;
2225
import java.util.ArrayList;

server/src/main/java/org/opensearch/index/codec/composite/Composite99DocValuesWriter.java renamed to server/src/main/java/org/opensearch/index/codec/composite/composite99/Composite99DocValuesWriter.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
* compatible open source license.
77
*/
88

9-
package org.opensearch.index.codec.composite;
9+
package org.opensearch.index.codec.composite.composite99;
1010

1111
import org.apache.lucene.codecs.DocValuesConsumer;
1212
import org.apache.lucene.codecs.DocValuesProducer;
@@ -18,6 +18,9 @@
1818
import org.apache.lucene.index.SegmentWriteState;
1919
import org.apache.lucene.index.SortedNumericDocValues;
2020
import org.opensearch.common.annotation.ExperimentalApi;
21+
import org.opensearch.index.codec.composite.CompositeIndexFieldInfo;
22+
import org.opensearch.index.codec.composite.CompositeIndexReader;
23+
import org.opensearch.index.codec.composite.CompositeIndexValues;
2124
import org.opensearch.index.codec.composite.datacube.startree.StarTreeValues;
2225
import org.opensearch.index.compositeindex.datacube.startree.StarTreeField;
2326
import org.opensearch.index.compositeindex.datacube.startree.builder.StarTreesBuilder;
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
*
4+
* The OpenSearch Contributors require contributions made to
5+
* this file be licensed under the Apache-2.0 license or a
6+
* compatible open source license.
7+
*/
8+
9+
/**
10+
* Responsible for handling all composite index codecs and operations associated with Composite99 codec
11+
*/
12+
package org.opensearch.index.codec.composite.composite99;
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
org.opensearch.index.codec.composite.Composite99Codec
1+
org.opensearch.index.codec.composite.composite99.Composite99Codec

server/src/test/java/org/opensearch/index/codec/CodecTests.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@
4848
import org.opensearch.env.Environment;
4949
import org.opensearch.index.IndexSettings;
5050
import org.opensearch.index.analysis.IndexAnalyzers;
51-
import org.opensearch.index.codec.composite.Composite99Codec;
51+
import org.opensearch.index.codec.composite.composite99.Composite99Codec;
5252
import org.opensearch.index.engine.EngineConfig;
5353
import org.opensearch.index.mapper.MapperService;
5454
import org.opensearch.index.similarity.SimilarityService;

0 commit comments

Comments
 (0)