Skip to content

Commit ffb1a30

Browse files
westey-mglorious-beard
authored andcommitted
.Net: Add hybrid search sample (microsoft#11262)
### Description Closes microsoft#10896 Add hybrid search sample ### Contribution Checklist <!-- Before submitting this PR, please make sure: --> - [ ] The code builds clean without any errors or warnings - [ ] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [ ] All unit tests pass, and I have added new tests where possible - [ ] I didn't break anyone 😄
1 parent a6db655 commit ffb1a30

8 files changed

+156
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
// Copyright (c) Microsoft. All rights reserved.
2+
3+
using Azure;
4+
using Azure.Identity;
5+
using Azure.Search.Documents.Indexes;
6+
using Microsoft.Extensions.VectorData;
7+
using Microsoft.SemanticKernel.Connectors.AzureAISearch;
8+
using Microsoft.SemanticKernel.Connectors.AzureOpenAI;
9+
using Microsoft.SemanticKernel.Embeddings;
10+
11+
namespace Memory;
12+
13+
/// <summary>
14+
/// A simple example showing how to ingest data into a vector store and then use hybrid search to find related records to a given string and set of keywords.
15+
///
16+
/// The example shows the following steps:
17+
/// 1. Create an embedding generator.
18+
/// 2. Create an AzureAISearch Vector Store.
19+
/// 3. Ingest some data into the vector store.
20+
/// 4. Do a hybrid search on the vector store with various text+keyword and filtering options.
21+
/// </summary>
22+
public class VectorStore_HybridSearch_Simple_AzureAISearch(ITestOutputHelper output) : BaseTest(output)
23+
{
24+
[Fact]
25+
public async Task IngestDataAndUseHybridSearch()
26+
{
27+
// Create an embedding generation service.
28+
var textEmbeddingGenerationService = new AzureOpenAITextEmbeddingGenerationService(
29+
TestConfiguration.AzureOpenAIEmbeddings.DeploymentName,
30+
TestConfiguration.AzureOpenAIEmbeddings.Endpoint,
31+
new AzureCliCredential());
32+
33+
// Construct the AzureAISearch VectorStore.
34+
var searchIndexClient = new SearchIndexClient(
35+
new Uri(TestConfiguration.AzureAISearch.Endpoint),
36+
new AzureKeyCredential(TestConfiguration.AzureAISearch.ApiKey));
37+
var vectorStore = new AzureAISearchVectorStore(searchIndexClient);
38+
39+
// Get and create collection if it doesn't exist.
40+
var collection = vectorStore.GetCollection<string, Glossary>("skglossary");
41+
await collection.CreateCollectionIfNotExistsAsync();
42+
var hybridSearchCollection = (IKeywordHybridSearch<Glossary>)collection;
43+
44+
// Create glossary entries and generate embeddings for them.
45+
var glossaryEntries = CreateGlossaryEntries().ToList();
46+
var tasks = glossaryEntries.Select(entry => Task.Run(async () =>
47+
{
48+
entry.DefinitionEmbedding = await textEmbeddingGenerationService.GenerateEmbeddingAsync(entry.Definition);
49+
}));
50+
await Task.WhenAll(tasks);
51+
52+
// Upsert the glossary entries into the collection and return their keys.
53+
var upsertedKeysTasks = glossaryEntries.Select(x => collection.UpsertAsync(x));
54+
var upsertedKeys = await Task.WhenAll(upsertedKeysTasks);
55+
56+
// Search the collection using a vector search.
57+
var searchString = "What is an Application Programming Interface";
58+
var searchVector = await textEmbeddingGenerationService.GenerateEmbeddingAsync(searchString);
59+
var searchResult = await hybridSearchCollection.HybridSearchAsync(searchVector, ["Application", "Programming", "Interface"], new() { Top = 1 });
60+
var resultRecords = await searchResult.Results.ToListAsync();
61+
62+
Console.WriteLine("Search string: " + searchString);
63+
Console.WriteLine("Result: " + resultRecords.First().Record.Definition);
64+
Console.WriteLine();
65+
66+
// Search the collection using a vector search.
67+
searchString = "What is Retrieval Augmented Generation";
68+
searchVector = await textEmbeddingGenerationService.GenerateEmbeddingAsync(searchString);
69+
searchResult = await hybridSearchCollection.HybridSearchAsync(searchVector, ["Retrieval", "Augmented", "Generation"], new() { Top = 1 });
70+
resultRecords = await searchResult.Results.ToListAsync();
71+
72+
Console.WriteLine("Search string: " + searchString);
73+
Console.WriteLine("Result: " + resultRecords.First().Record.Definition);
74+
Console.WriteLine();
75+
76+
// Search the collection using a vector search with pre-filtering.
77+
searchString = "What is Retrieval Augmented Generation";
78+
searchVector = await textEmbeddingGenerationService.GenerateEmbeddingAsync(searchString);
79+
searchResult = await hybridSearchCollection.HybridSearchAsync(searchVector, ["Retrieval", "Augmented", "Generation"], new() { Top = 3, Filter = g => g.Category == "External Definitions" });
80+
resultRecords = await searchResult.Results.ToListAsync();
81+
82+
Console.WriteLine("Search string: " + searchString);
83+
Console.WriteLine("Number of results: " + resultRecords.Count);
84+
Console.WriteLine("Result 1 Score: " + resultRecords[0].Score);
85+
Console.WriteLine("Result 1: " + resultRecords[0].Record.Definition);
86+
Console.WriteLine("Result 2 Score: " + resultRecords[1].Score);
87+
Console.WriteLine("Result 2: " + resultRecords[1].Record.Definition);
88+
}
89+
90+
/// <summary>
91+
/// Sample model class that represents a glossary entry.
92+
/// </summary>
93+
/// <remarks>
94+
/// Note that each property is decorated with an attribute that specifies how the property should be treated by the vector store.
95+
/// This allows us to create a collection in the vector store and upsert and retrieve instances of this class without any further configuration.
96+
/// </remarks>
97+
private sealed class Glossary
98+
{
99+
[VectorStoreRecordKey]
100+
public string Key { get; set; }
101+
102+
[VectorStoreRecordData(IsFilterable = true)]
103+
public string Category { get; set; }
104+
105+
[VectorStoreRecordData]
106+
public string Term { get; set; }
107+
108+
[VectorStoreRecordData(IsFullTextSearchable = true)]
109+
public string Definition { get; set; }
110+
111+
[VectorStoreRecordVector(1536)]
112+
public ReadOnlyMemory<float> DefinitionEmbedding { get; set; }
113+
}
114+
115+
/// <summary>
116+
/// Create some sample glossary entries.
117+
/// </summary>
118+
/// <returns>A list of sample glossary entries.</returns>
119+
private static IEnumerable<Glossary> CreateGlossaryEntries()
120+
{
121+
yield return new Glossary
122+
{
123+
Key = "1",
124+
Category = "External Definitions",
125+
Term = "API",
126+
Definition = "Application Programming Interface. A set of rules and specifications that allow software components to communicate and exchange data."
127+
};
128+
129+
yield return new Glossary
130+
{
131+
Key = "2",
132+
Category = "Core Definitions",
133+
Term = "Connectors",
134+
Definition = "Connectors allow you to integrate with various services provide AI capabilities, including LLM, AudioToText, TextToAudio, Embedding generation, etc."
135+
};
136+
137+
yield return new Glossary
138+
{
139+
Key = "3",
140+
Category = "External Definitions",
141+
Term = "RAG",
142+
Definition = "Retrieval Augmented Generation - a term that refers to the process of retrieving additional data to provide as context to an LLM to use when generating a response (completion) to a user’s question (prompt)."
143+
};
144+
}
145+
}

dotnet/samples/Concepts/Memory/VectorStore_VectorSearch_MultiStore_AzureAISearch.cs

+1
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ namespace Memory;
2020
/// <para><see cref="VectorStore_VectorSearch_MultiStore_Qdrant"/></para>
2121
/// <para><see cref="VectorStore_VectorSearch_MultiStore_Redis"/></para>
2222
/// <para><see cref="VectorStore_VectorSearch_MultiStore_InMemory"/></para>
23+
/// <para><see cref="VectorStore_VectorSearch_MultiStore_Postgres"/></para>
2324
///
2425
/// To run this sample, you need an already existing Azure AI Search instance.
2526
/// To set your secrets use:

dotnet/samples/Concepts/Memory/VectorStore_VectorSearch_MultiStore_Common.cs

+1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ namespace Memory;
1515
/// <para><see cref="VectorStore_VectorSearch_MultiStore_Qdrant"/></para>
1616
/// <para><see cref="VectorStore_VectorSearch_MultiStore_Redis"/></para>
1717
/// <para><see cref="VectorStore_VectorSearch_MultiStore_InMemory"/></para>
18+
/// <para><see cref="VectorStore_VectorSearch_MultiStore_Postgres"/></para>
1819
/// </summary>
1920
/// <param name="vectorStore">The vector store to ingest data into.</param>
2021
/// <param name="textEmbeddingGenerationService">The service to use for generating embeddings.</param>

dotnet/samples/Concepts/Memory/VectorStore_VectorSearch_MultiStore_InMemory.cs

+1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ namespace Memory;
1818
/// <para><see cref="VectorStore_VectorSearch_MultiStore_AzureAISearch"/></para>
1919
/// <para><see cref="VectorStore_VectorSearch_MultiStore_Redis"/></para>
2020
/// <para><see cref="VectorStore_VectorSearch_MultiStore_Qdrant"/></para>
21+
/// <para><see cref="VectorStore_VectorSearch_MultiStore_Postgres"/></para>
2122
/// </summary>
2223
public class VectorStore_VectorSearch_MultiStore_InMemory(ITestOutputHelper output) : BaseTest(output)
2324
{

dotnet/samples/Concepts/Memory/VectorStore_VectorSearch_MultiStore_Postgres.cs

+1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ namespace Memory;
1818
///
1919
/// For other databases, see the following classes:
2020
/// <para><see cref="VectorStore_VectorSearch_MultiStore_AzureAISearch"/></para>
21+
/// <para><see cref="VectorStore_VectorSearch_MultiStore_Qdrant"/></para>
2122
/// <para><see cref="VectorStore_VectorSearch_MultiStore_Redis"/></para>
2223
/// <para><see cref="VectorStore_VectorSearch_MultiStore_InMemory"/></para>
2324
///

dotnet/samples/Concepts/Memory/VectorStore_VectorSearch_MultiStore_Qdrant.cs

+1
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ namespace Memory;
2020
/// <para><see cref="VectorStore_VectorSearch_MultiStore_AzureAISearch"/></para>
2121
/// <para><see cref="VectorStore_VectorSearch_MultiStore_Redis"/></para>
2222
/// <para><see cref="VectorStore_VectorSearch_MultiStore_InMemory"/></para>
23+
/// <para><see cref="VectorStore_VectorSearch_MultiStore_Postgres"/></para>
2324
///
2425
/// To run this sample, you need a local instance of Docker running, since the associated fixture will try and start a Qdrant container in the local docker instance.
2526
/// </summary>

dotnet/samples/Concepts/Memory/VectorStore_VectorSearch_MultiStore_Redis.cs

+1
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ namespace Memory;
2020
/// <para><see cref="VectorStore_VectorSearch_MultiStore_AzureAISearch"/></para>
2121
/// <para><see cref="VectorStore_VectorSearch_MultiStore_Qdrant"/></para>
2222
/// <para><see cref="VectorStore_VectorSearch_MultiStore_InMemory"/></para>
23+
/// <para><see cref="VectorStore_VectorSearch_MultiStore_Postgres"/></para>
2324
///
2425
/// Redis supports two record storage types: Json and HashSet.
2526
/// Note the use of the <see cref="RedisStorageType"/> enum to specify the preferred storage type.

dotnet/samples/Concepts/README.md

+5
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,11 @@ dotnet test -l "console;verbosity=detailed" --filter "FullyQualifiedName=ChatCom
138138
- [VectorStore_DataIngestion_Simple: A simple example of how to do data ingestion into a vector store when getting started.](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Memory/VectorStore_DataIngestion_Simple.cs)
139139
- [VectorStore_DataIngestion_MultiStore: An example of data ingestion that uses the same code to ingest into multiple vector stores types.](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Memory/VectorStore_DataIngestion_MultiStore.cs)
140140
- [VectorStore_DataIngestion_CustomMapper: An example that shows how to use a custom mapper for when your data model and storage model doesn't match.](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Memory/VectorStore_DataIngestion_CustomMapper.cs)
141+
- [VectorStore_VectorSearch_Simple: A simple example of how to do data ingestion into a vector store and then doing a vector similarity search over the data.](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Memory/VectorStore_VectorSearch_Simple.cs)
142+
- [VectorStore_VectorSearch_Paging: An example showing how to do vector search with paging.](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Memory/VectorStore_VectorSearch_Paging.cs)
143+
- [VectorStore_VectorSearch_MultiVector: An example showing how to pick a target vector when doing vector search on a record that contains multiple vectors.](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Memory/VectorStore_VectorSearch_MultiVector.cs)
144+
- [VectorStore_VectorSearch_MultiStore_Common: An example showing how to write vector database agnostic code with different vector databases.](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Memory/VectorStore_VectorSearch_MultiStore_Common.cs)
145+
- [VectorStore_HybridSearch_Simple_AzureAISearch: An example showing how to do hybrid search using AzureAISearch.](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Memory/VectorStore_HybridSearch_Simple_AzureAISearch.cs)
141146
- [VectorStore_GenericDataModel_Interop: An example that shows how you can use the built-in, generic data model from Semantic Kernel to read and write to a Vector Store.](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Memory/VectorStore_GenericDataModel_Interop.cs)
142147
- [VectorStore_ConsumeFromMemoryStore_AzureAISearch: An example that shows how you can use the AzureAISearchVectorStore to consume data that was ingested using the AzureAISearchMemoryStore.](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Memory/VectorStore_ConsumeFromMemoryStore_AzureAISearch.cs)
143148
- [VectorStore_ConsumeFromMemoryStore_Qdrant: An example that shows how you can use the QdrantVectorStore to consume data that was ingested using the QdrantMemoryStore.](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Memory/VectorStore_ConsumeFromMemoryStore_Qdrant.cs)

0 commit comments

Comments
 (0)