Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

.Net: Add hybrid search sample #11262

Merged
merged 8 commits into from
Apr 1, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
// Copyright (c) Microsoft. All rights reserved.

using Azure;
using Azure.Identity;
using Azure.Search.Documents.Indexes;
using Microsoft.Extensions.VectorData;
using Microsoft.SemanticKernel.Connectors.AzureAISearch;
using Microsoft.SemanticKernel.Connectors.AzureOpenAI;
using Microsoft.SemanticKernel.Embeddings;

namespace Memory;

/// <summary>
/// A simple example showing how to ingest data into a vector store and then use hybrid search to find related records to a given string and set of keywords.
///
/// The example shows the following steps:
/// 1. Create an embedding generator.
/// 2. Create an AzureAISearch Vector Store.
/// 3. Ingest some data into the vector store.
/// 4. Do a hybrid search on the vector store with various text+keyword and filtering options.
/// </summary>
public class VectorStore_HybridSearch_Simple_AzureAISearch(ITestOutputHelper output) : BaseTest(output)
{
[Fact]
public async Task IngestDataAndUseHybridSearch()
{
// Create an embedding generation service.
var textEmbeddingGenerationService = new AzureOpenAITextEmbeddingGenerationService(
TestConfiguration.AzureOpenAIEmbeddings.DeploymentName,
TestConfiguration.AzureOpenAIEmbeddings.Endpoint,
new AzureCliCredential());

// Construct the AzureAISearch VectorStore.
var searchIndexClient = new SearchIndexClient(
new Uri(TestConfiguration.AzureAISearch.Endpoint),
new AzureKeyCredential(TestConfiguration.AzureAISearch.ApiKey));
var vectorStore = new AzureAISearchVectorStore(searchIndexClient);

// Get and create collection if it doesn't exist.
var collection = vectorStore.GetCollection<string, Glossary>("skglossary");
await collection.CreateCollectionIfNotExistsAsync();
var hybridSearchCollection = (IKeywordHybridSearch<Glossary>)collection;

// Create glossary entries and generate embeddings for them.
var glossaryEntries = CreateGlossaryEntries().ToList();
var tasks = glossaryEntries.Select(entry => Task.Run(async () =>
{
entry.DefinitionEmbedding = await textEmbeddingGenerationService.GenerateEmbeddingAsync(entry.Definition);
}));
await Task.WhenAll(tasks);

// Upsert the glossary entries into the collection and return their keys.
var upsertedKeysTasks = glossaryEntries.Select(x => collection.UpsertAsync(x));
var upsertedKeys = await Task.WhenAll(upsertedKeysTasks);

// Search the collection using a vector search.
var searchString = "What is an Application Programming Interface";
var searchVector = await textEmbeddingGenerationService.GenerateEmbeddingAsync(searchString);
var searchResult = await hybridSearchCollection.HybridSearchAsync(searchVector, ["Application", "Programming", "Interface"], new() { Top = 1 });
var resultRecords = await searchResult.Results.ToListAsync();

Console.WriteLine("Search string: " + searchString);
Console.WriteLine("Result: " + resultRecords.First().Record.Definition);
Console.WriteLine();

// Search the collection using a vector search.
searchString = "What is Retrieval Augmented Generation";
searchVector = await textEmbeddingGenerationService.GenerateEmbeddingAsync(searchString);
searchResult = await hybridSearchCollection.HybridSearchAsync(searchVector, ["Retrieval", "Augmented", "Generation"], new() { Top = 1 });
resultRecords = await searchResult.Results.ToListAsync();

Console.WriteLine("Search string: " + searchString);
Console.WriteLine("Result: " + resultRecords.First().Record.Definition);
Console.WriteLine();

// Search the collection using a vector search with pre-filtering.
searchString = "What is Retrieval Augmented Generation";
searchVector = await textEmbeddingGenerationService.GenerateEmbeddingAsync(searchString);
searchResult = await hybridSearchCollection.HybridSearchAsync(searchVector, ["Retrieval", "Augmented", "Generation"], new() { Top = 3, Filter = g => g.Category == "External Definitions" });
resultRecords = await searchResult.Results.ToListAsync();

Console.WriteLine("Search string: " + searchString);
Console.WriteLine("Number of results: " + resultRecords.Count);
Console.WriteLine("Result 1 Score: " + resultRecords[0].Score);
Console.WriteLine("Result 1: " + resultRecords[0].Record.Definition);
Console.WriteLine("Result 2 Score: " + resultRecords[1].Score);
Console.WriteLine("Result 2: " + resultRecords[1].Record.Definition);
}

/// <summary>
/// Sample model class that represents a glossary entry.
/// </summary>
/// <remarks>
/// Note that each property is decorated with an attribute that specifies how the property should be treated by the vector store.
/// This allows us to create a collection in the vector store and upsert and retrieve instances of this class without any further configuration.
/// </remarks>
private sealed class Glossary
{
[VectorStoreRecordKey]
public string Key { get; set; }

[VectorStoreRecordData(IsFilterable = true)]
public string Category { get; set; }

[VectorStoreRecordData]
public string Term { get; set; }

[VectorStoreRecordData(IsFullTextSearchable = true)]
public string Definition { get; set; }

[VectorStoreRecordVector(1536)]
public ReadOnlyMemory<float> DefinitionEmbedding { get; set; }
}

/// <summary>
/// Create some sample glossary entries.
/// </summary>
/// <returns>A list of sample glossary entries.</returns>
private static IEnumerable<Glossary> CreateGlossaryEntries()
{
yield return new Glossary
{
Key = "1",
Category = "External Definitions",
Term = "API",
Definition = "Application Programming Interface. A set of rules and specifications that allow software components to communicate and exchange data."
};

yield return new Glossary
{
Key = "2",
Category = "Core Definitions",
Term = "Connectors",
Definition = "Connectors allow you to integrate with various services provide AI capabilities, including LLM, AudioToText, TextToAudio, Embedding generation, etc."
};

yield return new Glossary
{
Key = "3",
Category = "External Definitions",
Term = "RAG",
Definition = "Retrieval Augmented Generation - a term that refers to the process of retrieving additional data to provide as context to an LLM to use when generating a response (completion) to a user’s question (prompt)."
};
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ namespace Memory;
/// <para><see cref="VectorStore_VectorSearch_MultiStore_Qdrant"/></para>
/// <para><see cref="VectorStore_VectorSearch_MultiStore_Redis"/></para>
/// <para><see cref="VectorStore_VectorSearch_MultiStore_InMemory"/></para>
/// <para><see cref="VectorStore_VectorSearch_MultiStore_Postgres"/></para>
///
/// To run this sample, you need an already existing Azure AI Search instance.
/// To set your secrets use:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ namespace Memory;
/// <para><see cref="VectorStore_VectorSearch_MultiStore_Qdrant"/></para>
/// <para><see cref="VectorStore_VectorSearch_MultiStore_Redis"/></para>
/// <para><see cref="VectorStore_VectorSearch_MultiStore_InMemory"/></para>
/// <para><see cref="VectorStore_VectorSearch_MultiStore_Postgres"/></para>
/// </summary>
/// <param name="vectorStore">The vector store to ingest data into.</param>
/// <param name="textEmbeddingGenerationService">The service to use for generating embeddings.</param>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ namespace Memory;
/// <para><see cref="VectorStore_VectorSearch_MultiStore_AzureAISearch"/></para>
/// <para><see cref="VectorStore_VectorSearch_MultiStore_Redis"/></para>
/// <para><see cref="VectorStore_VectorSearch_MultiStore_Qdrant"/></para>
/// <para><see cref="VectorStore_VectorSearch_MultiStore_Postgres"/></para>
/// </summary>
public class VectorStore_VectorSearch_MultiStore_InMemory(ITestOutputHelper output) : BaseTest(output)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ namespace Memory;
///
/// For other databases, see the following classes:
/// <para><see cref="VectorStore_VectorSearch_MultiStore_AzureAISearch"/></para>
/// <para><see cref="VectorStore_VectorSearch_MultiStore_Qdrant"/></para>
/// <para><see cref="VectorStore_VectorSearch_MultiStore_Redis"/></para>
/// <para><see cref="VectorStore_VectorSearch_MultiStore_InMemory"/></para>
///
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ namespace Memory;
/// <para><see cref="VectorStore_VectorSearch_MultiStore_AzureAISearch"/></para>
/// <para><see cref="VectorStore_VectorSearch_MultiStore_Redis"/></para>
/// <para><see cref="VectorStore_VectorSearch_MultiStore_InMemory"/></para>
/// <para><see cref="VectorStore_VectorSearch_MultiStore_Postgres"/></para>
///
/// To run this sample, you need a local instance of Docker running, since the associated fixture will try and start a Qdrant container in the local docker instance.
/// </summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ namespace Memory;
/// <para><see cref="VectorStore_VectorSearch_MultiStore_AzureAISearch"/></para>
/// <para><see cref="VectorStore_VectorSearch_MultiStore_Qdrant"/></para>
/// <para><see cref="VectorStore_VectorSearch_MultiStore_InMemory"/></para>
/// <para><see cref="VectorStore_VectorSearch_MultiStore_Postgres"/></para>
///
/// Redis supports two record storage types: Json and HashSet.
/// Note the use of the <see cref="RedisStorageType"/> enum to specify the preferred storage type.
Expand Down
5 changes: 5 additions & 0 deletions dotnet/samples/Concepts/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,11 @@ dotnet test -l "console;verbosity=detailed" --filter "FullyQualifiedName=ChatCom
- [VectorStore_DataIngestion_Simple: A simple example of how to do data ingestion into a vector store when getting started.](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Memory/VectorStore_DataIngestion_Simple.cs)
- [VectorStore_DataIngestion_MultiStore: An example of data ingestion that uses the same code to ingest into multiple vector stores types.](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Memory/VectorStore_DataIngestion_MultiStore.cs)
- [VectorStore_DataIngestion_CustomMapper: An example that shows how to use a custom mapper for when your data model and storage model doesn't match.](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Memory/VectorStore_DataIngestion_CustomMapper.cs)
- [VectorStore_VectorSearch_Simple: A simple example of how to do data ingestion into a vector store and then doing a vector similarity search over the data.](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Memory/VectorStore_VectorSearch_Simple.cs)
- [VectorStore_VectorSearch_Paging: An example showing how to do vector search with paging.](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Memory/VectorStore_VectorSearch_Paging.cs)
- [VectorStore_VectorSearch_MultiVector: An example showing how to pick a target vector when doing vector search on a record that contains multiple vectors.](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Memory/VectorStore_VectorSearch_MultiVector.cs)
- [VectorStore_VectorSearch_MultiStore_Common: An example showing how to write vector database agnostic code with different vector databases.](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Memory/VectorStore_VectorSearch_MultiStore_Common.cs)
- [VectorStore_HybridSearch_Simple_AzureAISearch: An example showing how to do hybrid search using AzureAISearch.](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Memory/VectorStore_HybridSearch_Simple_AzureAISearch.cs)
- [VectorStore_GenericDataModel_Interop: An example that shows how you can use the built-in, generic data model from Semantic Kernel to read and write to a Vector Store.](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Memory/VectorStore_GenericDataModel_Interop.cs)
- [VectorStore_ConsumeFromMemoryStore_AzureAISearch: An example that shows how you can use the AzureAISearchVectorStore to consume data that was ingested using the AzureAISearchMemoryStore.](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Memory/VectorStore_ConsumeFromMemoryStore_AzureAISearch.cs)
- [VectorStore_ConsumeFromMemoryStore_Qdrant: An example that shows how you can use the QdrantVectorStore to consume data that was ingested using the QdrantMemoryStore.](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Memory/VectorStore_ConsumeFromMemoryStore_Qdrant.cs)
Expand Down
Loading