Skip to content

Commit 6b4f163

Browse files
committed
Merge branch 'main' into vectorized_exhaustive_evaluation
2 parents 3833804 + 0a40f9e commit 6b4f163

File tree

42 files changed

+882
-610
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+882
-610
lines changed

.github/actions/prepare-for-build/action.yml

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,13 @@ runs:
2525
java-version: ${{ inputs.java-version }}
2626
java-package: jdk
2727

28-
# This includes "smart" caching of the wrapper and dependencies.
28+
- name: Cache gradle-wrapper.jar
29+
uses: actions/cache@v4
30+
with:
31+
path: gradle/wrapper/gradle-wrapper.jar
32+
key: gradle-wrapper-${{ hashFiles('gradle/wrapper/gradle-wrapper.jar.sha256') }}
33+
34+
# This includes "smart" caching of gradle dependencies.
2935
- name: Set up Gradle
3036
uses: gradle/actions/setup-gradle@v4
3137
with:

.github/workflows/verify-changelog-and-set-milestone.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ jobs:
1414
changelog-verifier-and-milestone-setter:
1515
name: Verify Change Log Entry and Set Milestone
1616
runs-on: ubuntu-latest
17+
if: github.event.pull_request.draft == false
1718

1819
steps:
1920
- name: Checkout repository

build-tools/build-infra/src/main/java/org/apache/lucene/gradle/WrapperDownloader.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,9 @@ public void run(Path destination) throws IOException, NoSuchAlgorithmException {
159159
}
160160

161161
switch (connection.getResponseCode()) {
162+
case /* TOO_MANY_REQUESTS */ 429:
163+
// it may not be possible to recover from this using a short delay
164+
// but try anyway.
162165
case HttpURLConnection.HTTP_INTERNAL_ERROR:
163166
case HttpURLConnection.HTTP_UNAVAILABLE:
164167
case HttpURLConnection.HTTP_BAD_GATEWAY:

build.gradle

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,7 @@ apply from: file('gradle/documentation/markdown.gradle')
200200
apply from: file('gradle/documentation/render-javadoc.gradle')
201201
apply from: file('gradle/documentation/check-broken-links.gradle')
202202

203+
apply from: file('gradle/hacks/downloader-retries.gradle')
203204
apply from: file('gradle/hacks/gradle-archives.gradle')
204205
apply from: file('gradle/hacks/wipe-temp.gradle')
205206
apply from: file('gradle/hacks/hashmapAssertions.gradle')
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
allprojects {project ->
19+
// Limit configuration to just those projects that actually have the plugin enabled.
20+
plugins.matching { Plugin plugin ->
21+
return plugin.class.name == "de.undercouch.gradle.tasks.download.DownloadTaskPlugin"
22+
}.configureEach {
23+
project.tasks.withType(Download).configureEach {
24+
it.retries 3
25+
}
26+
}
27+
}

gradle/libs.versions.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ procfork = "1.0.6"
5151
# unit tests
5252
randomizedtesting = "2.8.3"
5353
# license checks
54-
rat = "0.14"
54+
rat = "0.15"
5555
# spatial-extras/ support
5656
s2-geometry = "1.0.0"
5757
# spatial-extras/ support

gradlew

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,12 @@ if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then
222222
fi
223223

224224
GRADLE_WRAPPER_JAR="$APP_HOME/gradle/wrapper/gradle-wrapper.jar"
225-
if ! ( cd "$APP_HOME/gradle/wrapper" && sha256sum --status -c "${GRADLE_WRAPPER_JAR}.sha256" ); then
225+
if "$darwin"; then
226+
shasumcmd=shasum
227+
else
228+
shasumcmd=sha256sum
229+
fi
230+
if [ ! -e "$GRADLE_WRAPPER_JAR" ] || ! ( cd "$APP_HOME/gradle/wrapper" && "$shasumcmd" --status -c "${GRADLE_WRAPPER_JAR}.sha256" ); then
226231
"$JAVACMD" $JAVA_OPTS "$APP_HOME/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/WrapperDownloader.java" "$GRADLE_WRAPPER_JAR"
227232
WRAPPER_STATUS=$?
228233
if [ "$WRAPPER_STATUS" -eq 1 ]; then

lucene/CHANGES.txt

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,8 @@ Improvements
9090
* GITHUB#14602: Refactor the expressions compiler to use official ClassData BSM with indexed lookup
9191
(Uwe Schindler)
9292

93+
* GITHUB#14443: Support adaptive refresh in Searcher Managers (Vigya Sharma)
94+
9395
Optimizations
9496
---------------------
9597
* GITHUB#14418: Quick exit on filter query matching no docs when rewriting knn query. (Pan Guixin)
@@ -116,7 +118,13 @@ Optimizations
116118

117119
Bug Fixes
118120
---------------------
119-
(No changes)
121+
* GITHUB#14654: ValueSource.fromDoubleValuesSource(dvs).getSortField() would throw errors when
122+
used if the DoubleValuesSource needed scores. (David Smiley)
123+
124+
* GITHUB#14682 : Fix for add char and token filters in Luke Analysis tab. (Amir Raza)
125+
126+
* GITHUB#14161: PointInSetQuery's constructor now throws IllegalArgumentException
127+
instead of UnsupportedOperationException when values are out of order. (Shubham Sharma)
120128

121129
Build
122130
---------------------

lucene/core/src/java/org/apache/lucene/search/AbstractKnnVectorQuery.java

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -194,19 +194,23 @@ private TopDocs getLeafResults(
194194
final int cost = acceptDocs.cardinality();
195195
QueryTimeout queryTimeout = timeLimitingKnnCollectorManager.getQueryTimeout();
196196

197-
if (cost <= k) {
198-
// If there are <= k possible matches, short-circuit and perform exact search, since HNSW
199-
// must always visit at least k documents
197+
float leafProportion = ctx.reader().maxDoc() / (float) ctx.parent.reader().maxDoc();
198+
int perLeafTopK = perLeafTopKCalculation(k, leafProportion);
199+
200+
if (cost <= perLeafTopK) {
201+
// If there are <= perLeafTopK possible matches, short-circuit and perform exact search, since
202+
// HNSW must always visit at least perLeafTopK documents
200203
return exactSearch(ctx, new BitSetIterator(acceptDocs, cost), queryTimeout);
201204
}
202205

203206
// Perform the approximate kNN search
204207
// We pass cost + 1 here to account for the edge case when we explore exactly cost vectors
205208
TopDocs results = approximateSearch(ctx, acceptDocs, cost + 1, timeLimitingKnnCollectorManager);
209+
206210
if ((results.totalHits.relation() == TotalHits.Relation.EQUAL_TO
207-
// We know that there are more than `k` available docs, if we didn't even get `k`
208-
// something weird happened, and we need to drop to exact search
209-
&& results.scoreDocs.length >= k)
211+
// We know that there are more than `perLeafTopK` available docs, if we didn't even get
212+
// `perLeafTopK` something weird happened, and we need to drop to exact search
213+
&& results.scoreDocs.length >= perLeafTopK)
210214
// Return partial results only when timeout is met
211215
|| (queryTimeout != null && queryTimeout.shouldExit())) {
212216
return results;

lucene/core/src/java/org/apache/lucene/search/DISIDocIdStream.java

Lines changed: 0 additions & 68 deletions
This file was deleted.

lucene/core/src/java/org/apache/lucene/search/DenseConjunctionBulkScorer.java

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -198,17 +198,7 @@ private int scoreWindow(
198198
int bitsetWindowMax = (int) Math.min(minDocIDRunEnd, (long) WINDOW_SIZE + min);
199199

200200
if (windowTwoPhases.isEmpty()) {
201-
if (acceptDocs == null && windowApproximations.size() == 1) {
202-
// We have a range of doc IDs where all matches of an iterator are matches of the
203-
// conjunction.
204-
DocIdSetIterator iterator = windowApproximations.get(0);
205-
if (iterator.docID() < min) {
206-
iterator.advance(min);
207-
}
208-
collector.collect(new DISIDocIdStream(iterator, bitsetWindowMax, clauseWindowMatches));
209-
} else {
210-
scoreWindowUsingBitSet(collector, acceptDocs, windowApproximations, min, bitsetWindowMax);
211-
}
201+
scoreWindowUsingBitSet(collector, acceptDocs, windowApproximations, min, bitsetWindowMax);
212202
} else {
213203
windowTwoPhases.sort(Comparator.comparingDouble(TwoPhaseIterator::matchCost));
214204
scoreWindowUsingLeapFrog(

lucene/core/src/java/org/apache/lucene/search/HnswQueueSaturationCollector.java

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -94,8 +94,13 @@ public void nextCandidate() {
9494
@Override
9595
public KnnSearchStrategy getSearchStrategy() {
9696
KnnSearchStrategy delegateStrategy = delegate.getSearchStrategy();
97-
assert delegateStrategy instanceof KnnSearchStrategy.Hnsw;
98-
return new KnnSearchStrategy.Patience(
99-
this, ((KnnSearchStrategy.Hnsw) delegateStrategy).filteredSearchThreshold());
97+
if (delegateStrategy instanceof KnnSearchStrategy.Hnsw hnswStrategy) {
98+
return new KnnSearchStrategy.Patience(this, hnswStrategy.filteredSearchThreshold());
99+
} else if (delegateStrategy instanceof KnnSearchStrategy.Seeded seededStrategy) {
100+
if (seededStrategy.originalStrategy() instanceof KnnSearchStrategy.Hnsw hnswStrategy) {
101+
return new KnnSearchStrategy.Patience(this, hnswStrategy.filteredSearchThreshold());
102+
}
103+
}
104+
return delegateStrategy;
100105
}
101106
}

lucene/core/src/java/org/apache/lucene/search/PatienceKnnVectorQuery.java

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,7 @@ public class PatienceKnnVectorQuery extends AbstractKnnVectorQuery {
4343

4444
private final int patience;
4545
private final double saturationThreshold;
46-
47-
final AbstractKnnVectorQuery delegate;
46+
private AbstractKnnVectorQuery delegate;
4847

4948
/**
5049
* Construct a new PatienceKnnVectorQuery instance for a float vector field
@@ -234,4 +233,18 @@ public KnnCollector newCollector(
234233
patience);
235234
}
236235
}
236+
237+
@Override
238+
public Query rewrite(IndexSearcher indexSearcher) throws IOException {
239+
if (delegate instanceof SeededKnnVectorQuery seededKnnVectorQuery) {
240+
// this is required because SeededKnnVectorQuery now requires its own rewriting logic (to
241+
// create the seed Weight)
242+
delegate =
243+
new SeededKnnVectorQuery(
244+
seededKnnVectorQuery.delegate,
245+
seededKnnVectorQuery.seed,
246+
seededKnnVectorQuery.createSeedWeight(indexSearcher));
247+
}
248+
return super.rewrite(indexSearcher);
249+
}
237250
}
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
package org.apache.lucene.search;
18+
19+
import java.io.IOException;
20+
import org.apache.lucene.index.DirectoryReader;
21+
import org.apache.lucene.index.IndexCommit;
22+
23+
/**
24+
* Expert: Interface to supply commit for searcher refresh.
25+
*
26+
* @lucene.experimental
27+
*/
28+
public interface RefreshCommitSupplier {
29+
30+
/**
31+
* Expert: Returns the index commit that searcher should refresh on. A null return value (default)
32+
* indicates reader should refresh on the latest commit.
33+
*
34+
* @param reader DirectoryReader to refresh
35+
*/
36+
default IndexCommit getSearcherRefreshCommit(DirectoryReader reader) throws IOException {
37+
return null;
38+
}
39+
}

0 commit comments

Comments
 (0)