Skip to content

Commit 607db38

Browse files
authored
Merge pull request #237 from milderhc/vector-attributes-defaults
Adjust IndexKind and DistanceFunction defaults
2 parents 8bee95b + e8daf1e commit 607db38

File tree

22 files changed

+117
-104
lines changed

22 files changed

+117
-104
lines changed

aiservices/openai/src/test/java/com/microsoft/semantickernel/aiservices/openai/chatcompletion/JsonSchemaTest.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ public void jacksonGenerationTest() throws JsonProcessingException {
1818
Assertions.assertEquals("foo", format.getJsonSchema().getName());
1919

2020
Assertions.assertTrue(format.getJsonSchema().getSchema()
21-
.replaceAll("\n", "")
21+
.replaceAll("\\r\\n|\\r|\\n", "")
2222
.replaceAll(" +", "")
2323
.contains(
2424
"\"type\":\"object\",\"properties\":{\"bar\":{}}"));

api-test/integration-tests/src/test/java/com/microsoft/semantickernel/tests/connectors/memory/jdbc/Hotel.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import com.microsoft.semantickernel.data.vectorstorage.attributes.VectorStoreRecordKeyAttribute;
77
import com.microsoft.semantickernel.data.vectorstorage.attributes.VectorStoreRecordVectorAttribute;
88
import com.microsoft.semantickernel.data.vectorstorage.definition.DistanceFunction;
9+
import com.microsoft.semantickernel.data.vectorstorage.definition.IndexKind;
910

1011
import java.util.List;
1112

@@ -37,7 +38,7 @@ public class Hotel {
3738
private final List<Float> dotProduct;
3839

3940
@JsonProperty("indexedSummaryEmbedding")
40-
@VectorStoreRecordVectorAttribute(dimensions = 8, indexKind = "hnsw", distanceFunction = DistanceFunction.EUCLIDEAN_DISTANCE)
41+
@VectorStoreRecordVectorAttribute(dimensions = 8, indexKind = IndexKind.HNSW, distanceFunction = DistanceFunction.EUCLIDEAN_DISTANCE)
4142
private final List<Float> indexedEuclidean;
4243
@VectorStoreRecordDataAttribute
4344
private double rating;

api-test/integration-tests/src/test/java/com/microsoft/semantickernel/tests/connectors/memory/jdbc/JDBCVectorStoreRecordCollectionTest.java

+17-4
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
import com.microsoft.semantickernel.data.vectorstorage.options.GetRecordOptions;
1818
import com.microsoft.semantickernel.data.vectorstorage.options.VectorSearchOptions;
1919
import com.mysql.cj.jdbc.MysqlDataSource;
20+
21+
import java.io.IOException;
2022
import java.nio.file.Files;
2123
import java.nio.file.Path;
2224
import java.util.ArrayList;
@@ -61,6 +63,16 @@ public enum QueryProvider {
6163
HSQLDB
6264
}
6365

66+
static Path createTempDbFile(String prefix) {
67+
try {
68+
Path file = Files.createTempFile(prefix, ".db");
69+
file.toFile().deleteOnExit();
70+
return file;
71+
} catch (IOException e) {
72+
throw new RuntimeException(e);
73+
}
74+
}
75+
6476
private JDBCVectorStoreRecordCollection<Hotel> buildRecordCollection(QueryProvider provider,
6577
@Nonnull String collectionName) {
6678
SQLVectorStoreQueryProvider queryProvider;
@@ -88,17 +100,18 @@ private JDBCVectorStoreRecordCollection<Hotel> buildRecordCollection(QueryProvid
88100
.build();
89101
break;
90102
case SQLite:
103+
Path sqliteDb = createTempDbFile("sqliteDb");
91104
SQLiteDataSource sqliteDataSource = new SQLiteDataSource();
92-
sqliteDataSource.setUrl("jdbc:sqlite:file:testdb");
105+
sqliteDataSource.setUrl("jdbc:sqlite:file:" + sqliteDb.toFile().getAbsolutePath());
93106
dataSource = sqliteDataSource;
107+
94108
queryProvider = SQLiteVectorStoreQueryProvider.builder()
95-
.withDataSource(sqliteDataSource)
109+
.withDataSource(dataSource)
96110
.build();
97111
break;
98112
case HSQLDB:
99113
try {
100-
Path file = Files.createTempFile("testdb", ".db");
101-
file.toFile().deleteOnExit();
114+
Path file = createTempDbFile("testHSQLDB");
102115

103116
Properties properties = new Properties();
104117
properties.putAll(

api-test/integration-tests/src/test/java/com/microsoft/semantickernel/tests/connectors/memory/jdbc/JDBCVectorStoreTest.java

+7-4
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import org.testcontainers.utility.DockerImageName;
2222

2323
import javax.sql.DataSource;
24+
import java.io.IOException;
2425
import java.nio.file.Files;
2526
import java.nio.file.Path;
2627
import java.util.Arrays;
@@ -30,6 +31,7 @@
3031

3132
import com.microsoft.semantickernel.tests.connectors.memory.jdbc.JDBCVectorStoreRecordCollectionTest.QueryProvider;
3233

34+
import static com.microsoft.semantickernel.tests.connectors.memory.jdbc.JDBCVectorStoreRecordCollectionTest.createTempDbFile;
3335
import static org.junit.jupiter.api.Assertions.assertEquals;
3436
import static org.junit.jupiter.api.Assertions.assertNotNull;
3537
import static org.junit.jupiter.api.Assertions.assertTrue;
@@ -72,17 +74,18 @@ private JDBCVectorStore buildVectorStore(QueryProvider provider) {
7274
.build();
7375
break;
7476
case SQLite:
77+
Path sqliteDb = createTempDbFile("testSQLite");
7578
SQLiteDataSource sqliteDataSource = new SQLiteDataSource();
76-
sqliteDataSource.setUrl("jdbc:sqlite:file:test");
79+
sqliteDataSource.setUrl("jdbc:sqlite:file:" + sqliteDb.toFile().getAbsolutePath());
7780
dataSource = sqliteDataSource;
81+
7882
queryProvider = SQLiteVectorStoreQueryProvider.builder()
79-
.withDataSource(sqliteDataSource)
83+
.withDataSource(dataSource)
8084
.build();
8185
break;
8286
case HSQLDB:
8387
try {
84-
Path file = Files.createTempFile("testdb", ".db");
85-
file.toFile().deleteOnExit();
88+
Path file = createTempDbFile("testHSQLDB");
8689

8790
Properties properties = new Properties();
8891
properties.putAll(

api-test/integration-tests/src/test/java/com/microsoft/semantickernel/tests/connectors/memory/redis/Hotel.java

+5-3
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
import com.microsoft.semantickernel.data.vectorstorage.attributes.VectorStoreRecordKeyAttribute;
77
import com.microsoft.semantickernel.data.vectorstorage.attributes.VectorStoreRecordVectorAttribute;
88
import com.microsoft.semantickernel.data.vectorstorage.definition.DistanceFunction;
9+
import com.microsoft.semantickernel.data.vectorstorage.definition.IndexKind;
10+
911
import java.util.List;
1012

1113
public class Hotel {
@@ -24,15 +26,15 @@ public class Hotel {
2426
private final String description;
2527

2628
@JsonProperty("summaryEmbedding1")
27-
@VectorStoreRecordVectorAttribute(dimensions = 8, distanceFunction = DistanceFunction.EUCLIDEAN_DISTANCE)
29+
@VectorStoreRecordVectorAttribute(dimensions = 8, indexKind = IndexKind.HNSW, distanceFunction = DistanceFunction.EUCLIDEAN_DISTANCE)
2830
private final List<Float> euclidean;
2931

3032
@JsonProperty("summaryEmbedding2")
31-
@VectorStoreRecordVectorAttribute(dimensions = 8, distanceFunction = DistanceFunction.COSINE_DISTANCE)
33+
@VectorStoreRecordVectorAttribute(dimensions = 8)
3234
private final List<Float> cosineDistance;
3335

3436
@JsonProperty("summaryEmbedding3")
35-
@VectorStoreRecordVectorAttribute(dimensions = 8, distanceFunction = DistanceFunction.DOT_PRODUCT)
37+
@VectorStoreRecordVectorAttribute(dimensions = 8, indexKind = IndexKind.HNSW, distanceFunction = DistanceFunction.DOT_PRODUCT)
3638
private final List<Float> dotProduct;
3739
@VectorStoreRecordDataAttribute
3840
private double rating;

samples/semantickernel-concepts/semantickernel-syntax-examples/src/main/java/com/microsoft/semantickernel/samples/syntaxexamples/memory/InMemoryVolatileVectorStore.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import java.util.Map;
2121
import java.util.stream.Collectors;
2222
import com.microsoft.semantickernel.data.vectorstorage.definition.DistanceFunction;
23+
import com.microsoft.semantickernel.data.vectorstorage.definition.IndexKind;
2324
import reactor.core.publisher.Flux;
2425
import reactor.core.publisher.Mono;
2526

@@ -44,7 +45,7 @@ static class GitHubFile {
4445
private final String description;
4546
@VectorStoreRecordDataAttribute
4647
private final String link;
47-
@VectorStoreRecordVectorAttribute(dimensions = EMBEDDING_DIMENSIONS, indexKind = "Hnsw", distanceFunction = DistanceFunction.COSINE_DISTANCE)
48+
@VectorStoreRecordVectorAttribute(dimensions = EMBEDDING_DIMENSIONS, indexKind = IndexKind.HNSW, distanceFunction = DistanceFunction.COSINE_DISTANCE)
4849
private final List<Float> embedding;
4950

5051
public GitHubFile(

samples/semantickernel-concepts/semantickernel-syntax-examples/src/main/java/com/microsoft/semantickernel/samples/syntaxexamples/memory/VectorStoreWithAzureAISearch.java

+3-1
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@
2828
import java.util.List;
2929
import java.util.Map;
3030
import java.util.stream.Collectors;
31+
32+
import com.microsoft.semantickernel.data.vectorstorage.definition.IndexKind;
3133
import reactor.core.publisher.Flux;
3234
import reactor.core.publisher.Mono;
3335

@@ -58,7 +60,7 @@ static class GitHubFile {
5860
private final String description;
5961
@VectorStoreRecordDataAttribute
6062
private final String link;
61-
@VectorStoreRecordVectorAttribute(dimensions = EMBEDDING_DIMENSIONS, indexKind = "Hnsw", distanceFunction = DistanceFunction.COSINE_DISTANCE)
63+
@VectorStoreRecordVectorAttribute(dimensions = EMBEDDING_DIMENSIONS, indexKind = IndexKind.HNSW, distanceFunction = DistanceFunction.COSINE_DISTANCE)
6264
private final List<Float> embedding;
6365

6466
public GitHubFile() {

samples/semantickernel-concepts/semantickernel-syntax-examples/src/main/java/com/microsoft/semantickernel/samples/syntaxexamples/memory/VectorStoreWithJDBC.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ static class GitHubFile {
4949
private final String description;
5050
@VectorStoreRecordDataAttribute
5151
private final String link;
52-
@VectorStoreRecordVectorAttribute(dimensions = EMBEDDING_DIMENSIONS, indexKind = "Hnsw", distanceFunction = DistanceFunction.COSINE_DISTANCE)
52+
@VectorStoreRecordVectorAttribute(dimensions = EMBEDDING_DIMENSIONS, distanceFunction = DistanceFunction.COSINE_DISTANCE)
5353
private final List<Float> embedding;
5454

5555
public GitHubFile() {

samples/semantickernel-concepts/semantickernel-syntax-examples/src/main/java/com/microsoft/semantickernel/samples/syntaxexamples/memory/VectorStoreWithRedis.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import java.util.stream.Collectors;
2929

3030
import com.microsoft.semantickernel.data.vectorstorage.definition.DistanceFunction;
31+
import com.microsoft.semantickernel.data.vectorstorage.definition.IndexKind;
3132
import reactor.core.publisher.Flux;
3233
import reactor.core.publisher.Mono;
3334
import redis.clients.jedis.JedisPooled;
@@ -52,7 +53,7 @@ public static class GitHubFile {
5253
private final String description;
5354
@VectorStoreRecordDataAttribute
5455
private final String link;
55-
@VectorStoreRecordVectorAttribute(dimensions = EMBEDDING_DIMENSIONS, indexKind = "Hnsw", distanceFunction = DistanceFunction.COSINE_DISTANCE)
56+
@VectorStoreRecordVectorAttribute(dimensions = EMBEDDING_DIMENSIONS, indexKind = IndexKind.HNSW, distanceFunction = DistanceFunction.COSINE_DISTANCE)
5657
private final List<Float> embedding;
5758

5859
public GitHubFile() {

semantickernel-api/src/test/java/com/microsoft/semantickernel/templateengine/handlebars/HandlebarsPromptTemplateTest.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ public void testSerializesObject() {
156156

157157
String result = instance.renderAsync(Kernel.builder().build(), arguments, null)
158158
.block();
159-
Assertions.assertEquals(expResult, result.replaceAll("\\n", ""));
159+
Assertions.assertEquals(expResult, result.replaceAll("\\r\\n|\\r|\\n", ""));
160160
}
161161

162162
@Test

semantickernel-experimental/src/main/java/com/microsoft/semantickernel/connectors/data/azureaisearch/AzureAISearchVectorStoreCollectionCreateMapping.java

+4-2
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
import com.azure.search.documents.indexes.models.VectorSearchAlgorithmConfiguration;
1111
import com.azure.search.documents.indexes.models.VectorSearchAlgorithmMetric;
1212
import com.azure.search.documents.indexes.models.VectorSearchProfile;
13+
import com.microsoft.semantickernel.data.vectorstorage.definition.DistanceFunction;
14+
import com.microsoft.semantickernel.data.vectorstorage.definition.IndexKind;
1315
import com.microsoft.semantickernel.data.vectorstorage.definition.VectorStoreRecordDataField;
1416
import com.microsoft.semantickernel.data.vectorstorage.definition.VectorStoreRecordKeyField;
1517
import com.microsoft.semantickernel.data.vectorstorage.definition.VectorStoreRecordVectorField;
@@ -31,7 +33,7 @@ private static String getAlgorithmConfigName(VectorStoreRecordVectorField vector
3133

3234
private static VectorSearchAlgorithmMetric getAlgorithmMetric(
3335
@Nonnull VectorStoreRecordVectorField vectorField) {
34-
if (vectorField.getDistanceFunction() == null) {
36+
if (vectorField.getDistanceFunction() == DistanceFunction.UNDEFINED) {
3537
return VectorSearchAlgorithmMetric.COSINE;
3638
}
3739

@@ -50,7 +52,7 @@ private static VectorSearchAlgorithmMetric getAlgorithmMetric(
5052

5153
private static VectorSearchAlgorithmConfiguration getAlgorithmConfig(
5254
@Nonnull VectorStoreRecordVectorField vectorField) {
53-
if (vectorField.getIndexKind() == null) {
55+
if (vectorField.getIndexKind() == IndexKind.UNDEFINED) {
5456
return new HnswAlgorithmConfiguration(getAlgorithmConfigName(vectorField))
5557
.setParameters(new HnswParameters().setMetric(getAlgorithmMetric(vectorField)));
5658
}

semantickernel-experimental/src/main/java/com/microsoft/semantickernel/connectors/data/jdbc/JDBCVectorStoreQueryProvider.java

+8-4
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import com.microsoft.semantickernel.data.vectorsearch.VectorSearchResult;
66
import com.microsoft.semantickernel.data.vectorstorage.VectorStoreRecordMapper;
77
import com.microsoft.semantickernel.data.vectorstorage.definition.DistanceFunction;
8+
import com.microsoft.semantickernel.data.vectorstorage.definition.IndexKind;
89
import com.microsoft.semantickernel.data.vectorstorage.definition.VectorStoreRecordDefinition;
910
import com.microsoft.semantickernel.data.vectorstorage.definition.VectorStoreRecordField;
1011
import com.microsoft.semantickernel.data.vectorstorage.definition.VectorStoreRecordVectorField;
@@ -263,7 +264,9 @@ public void createCollection(String collectionName,
263264

264265
// No approximate search is supported in JDBCVectorStoreQueryProvider
265266
if (recordDefinition.getVectorFields().stream()
266-
.anyMatch(field -> field.getIndexKind() != null)) {
267+
.anyMatch(
268+
field -> field.getIndexKind() != null && field.getIndexKind() != IndexKind.FLAT
269+
&& field.getIndexKind() != IndexKind.UNDEFINED)) {
267270
LOGGER
268271
.warn(String.format("Indexes are not supported in %s. Ignoring indexKind property.",
269272
this.getClass().getName()));
@@ -532,9 +535,10 @@ public <Record> List<VectorSearchResult<Record>> search(String collectionName,
532535
List<Record> records = getRecordsWithFilter(collectionName, recordDefinition, mapper,
533536
new GetRecordOptions(true), filter, parameters);
534537

535-
DistanceFunction distanceFunction = vectorField.getDistanceFunction() == null
536-
? DistanceFunction.EUCLIDEAN_DISTANCE
537-
: vectorField.getDistanceFunction();
538+
DistanceFunction distanceFunction = vectorField
539+
.getDistanceFunction() == DistanceFunction.UNDEFINED
540+
? DistanceFunction.EUCLIDEAN_DISTANCE
541+
: vectorField.getDistanceFunction();
538542

539543
return VectorOperations.exactSimilaritySearch(records, vector, vectorField,
540544
distanceFunction, options);

semantickernel-experimental/src/main/java/com/microsoft/semantickernel/connectors/data/postgres/PostgreSQLVectorDistanceFunction.java

+3-5
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
public enum PostgreSQLVectorDistanceFunction {
77
L2("vector_l2_ops", "<->"), COSINE("vector_cosine_ops", "<=>"), INNER_PRODUCT("vector_ip_ops",
8-
"<#>");
8+
"<#>"), UNDEFINED(null, null);
99

1010
private final String value;
1111
private final String operator;
@@ -24,17 +24,15 @@ public String getOperator() {
2424
}
2525

2626
public static PostgreSQLVectorDistanceFunction fromDistanceFunction(DistanceFunction function) {
27-
if (function == null) {
28-
return null;
29-
}
30-
3127
switch (function) {
3228
case EUCLIDEAN_DISTANCE:
3329
return L2;
3430
case COSINE_DISTANCE:
3531
return COSINE;
3632
case DOT_PRODUCT:
3733
return INNER_PRODUCT;
34+
case UNDEFINED:
35+
return UNDEFINED;
3836
default:
3937
throw new IllegalArgumentException("Unsupported distance function: " + function);
4038
}

semantickernel-experimental/src/main/java/com/microsoft/semantickernel/connectors/data/postgres/PostgreSQLVectorIndexKind.java

+5-6
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import com.microsoft.semantickernel.data.vectorstorage.definition.IndexKind;
55

66
public enum PostgreSQLVectorIndexKind {
7-
HNSW("hnsw"), IVFFLAT("ivfflat");
7+
HNSW("hnsw"), IVFFLAT("ivfflat"), UNDEFINED(null);
88

99
private final String value;
1010

@@ -17,15 +17,14 @@ public String getValue() {
1717
}
1818

1919
public static PostgreSQLVectorIndexKind fromIndexKind(IndexKind indexKind) {
20-
if (indexKind == null) {
21-
return null;
22-
}
23-
2420
switch (indexKind) {
2521
case HNSW:
2622
return HNSW;
27-
case FLAT:
23+
case IVFFLAT:
2824
return IVFFLAT;
25+
case FLAT:
26+
case UNDEFINED:
27+
return UNDEFINED;
2928
default:
3029
throw new IllegalArgumentException("Unsupported index kind: " + indexKind);
3130
}

semantickernel-experimental/src/main/java/com/microsoft/semantickernel/connectors/data/postgres/PostgreSQLVectorStoreQueryProvider.java

+9-8
Original file line numberDiff line numberDiff line change
@@ -164,12 +164,12 @@ private String createIndexForVectorField(String collectionName,
164164
PostgreSQLVectorDistanceFunction distanceFunction = PostgreSQLVectorDistanceFunction
165165
.fromDistanceFunction(vectorField.getDistanceFunction());
166166

167-
// If indexKind is not specified, no index is created
168-
// and pgvector performs exact nearest neighbor search.
169-
if (indexKind == null) {
167+
// If there is no approximate search index associated to the vector field,
168+
// there is no need to create an index and pgvector performs exact nearest neighbor search.
169+
if (indexKind == PostgreSQLVectorIndexKind.UNDEFINED) {
170170
return null;
171171
}
172-
if (distanceFunction == null) {
172+
if (distanceFunction == PostgreSQLVectorDistanceFunction.UNDEFINED) {
173173
throw new SKException(
174174
"Distance function is required for vector field: " + vectorField.getName());
175175
}
@@ -358,10 +358,11 @@ public <Record> List<VectorSearchResult<Record>> search(String collectionName,
358358
PostgreSQLVectorDistanceFunction distanceFunction = PostgreSQLVectorDistanceFunction
359359
.fromDistanceFunction(vectorField.getDistanceFunction());
360360

361-
// If indexKind is not specified, there is no index associated to the vector field
362-
// and pgvector performs exact nearest neighbor search.
363-
// If indexKind is specified, a distance function is required.
364-
if (indexKind != null && distanceFunction == null) {
361+
// If there is no approximate search index associated to the vector field,
362+
// there is no index defined in the database and pgvector performs exact nearest neighbor search.
363+
// If indexKind is defined, distance function is required.
364+
if (indexKind != PostgreSQLVectorIndexKind.UNDEFINED
365+
&& distanceFunction == PostgreSQLVectorDistanceFunction.UNDEFINED) {
365366
throw new SKException(
366367
"Distance function is required for vector field: " + vectorField.getName());
367368
}

semantickernel-experimental/src/main/java/com/microsoft/semantickernel/connectors/data/redis/RedisVectorStoreCollectionCreateMapping.java

+5-3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
// Copyright (c) Microsoft. All rights reserved.
22
package com.microsoft.semantickernel.connectors.data.redis;
33

4+
import com.microsoft.semantickernel.data.vectorstorage.definition.DistanceFunction;
5+
import com.microsoft.semantickernel.data.vectorstorage.definition.IndexKind;
46
import com.microsoft.semantickernel.data.vectorstorage.definition.VectorStoreRecordDataField;
57
import com.microsoft.semantickernel.data.vectorstorage.definition.VectorStoreRecordField;
68
import com.microsoft.semantickernel.data.vectorstorage.definition.VectorStoreRecordKeyField;
@@ -32,8 +34,8 @@ public class RedisVectorStoreCollectionCreateMapping {
3234

3335
private static String getAlgorithmMetric(
3436
VectorStoreRecordVectorField vectorField) {
35-
if (vectorField.getDistanceFunction() == null) {
36-
return RedisVectorDistanceMetric.EUCLIDEAN;
37+
if (vectorField.getDistanceFunction() == DistanceFunction.UNDEFINED) {
38+
return RedisVectorDistanceMetric.COSINE;
3739
}
3840

3941
switch (vectorField.getDistanceFunction()) {
@@ -51,7 +53,7 @@ private static String getAlgorithmMetric(
5153

5254
private static Schema.VectorField.VectorAlgo getAlgorithmConfig(
5355
VectorStoreRecordVectorField vectorField) {
54-
if (vectorField.getIndexKind() == null) {
56+
if (vectorField.getIndexKind() == IndexKind.UNDEFINED) {
5557
return Schema.VectorField.VectorAlgo.HNSW;
5658
}
5759

0 commit comments

Comments
 (0)