.Net: Add hybrid search sample (microsoft#11262)

westey-m · glorious-beard · commit ffb1a30a991a · 2025-04-06T16:24:26.000-07:00
### Description Closes microsoft#10896 Add hybrid search sample ### Contribution Checklist  - [ ] The code builds clean without any errors or warnings - [ ] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [ ] All unit tests pass, and I have added new tests where possible - [ ] I didn't break anyone 😄
diff --git a/dotnet/samples/Concepts/Memory/VectorStore_HybridSearch_Simple_AzureAISearch.cs b/dotnet/samples/Concepts/Memory/VectorStore_HybridSearch_Simple_AzureAISearch.cs
@@ -0,0 +1,145 @@
+﻿// Copyright (c) Microsoft. All rights reserved.
+
+using Azure;
+using Azure.Identity;
+using Azure.Search.Documents.Indexes;
+using Microsoft.Extensions.VectorData;
+using Microsoft.SemanticKernel.Connectors.AzureAISearch;
+using Microsoft.SemanticKernel.Connectors.AzureOpenAI;
+using Microsoft.SemanticKernel.Embeddings;
+
+namespace Memory;
+
+/// <summary>
+/// A simple example showing how to ingest data into a vector store and then use hybrid search to find related records to a given string and set of keywords.
+///
+/// The example shows the following steps:
+/// 1. Create an embedding generator.
+/// 2. Create an AzureAISearch Vector Store.
+/// 3. Ingest some data into the vector store.
+/// 4. Do a hybrid search on the vector store with various text+keyword and filtering options.
+/// </summary>
+public class VectorStore_HybridSearch_Simple_AzureAISearch(ITestOutputHelper output) : BaseTest(output)
+{
+    [Fact]
+    public async Task IngestDataAndUseHybridSearch()
+    {
+        // Create an embedding generation service.
+        var textEmbeddingGenerationService = new AzureOpenAITextEmbeddingGenerationService(
+                TestConfiguration.AzureOpenAIEmbeddings.DeploymentName,
+                TestConfiguration.AzureOpenAIEmbeddings.Endpoint,
+                new AzureCliCredential());
+
+        // Construct the AzureAISearch VectorStore.
+        var searchIndexClient = new SearchIndexClient(
+            new Uri(TestConfiguration.AzureAISearch.Endpoint),
+            new AzureKeyCredential(TestConfiguration.AzureAISearch.ApiKey));
+        var vectorStore = new AzureAISearchVectorStore(searchIndexClient);
+
+        // Get and create collection if it doesn't exist.
+        var collection = vectorStore.GetCollection<string, Glossary>("skglossary");
+        await collection.CreateCollectionIfNotExistsAsync();
+        var hybridSearchCollection = (IKeywordHybridSearch<Glossary>)collection;
+
+        // Create glossary entries and generate embeddings for them.
+        var glossaryEntries = CreateGlossaryEntries().ToList();
+        var tasks = glossaryEntries.Select(entry => Task.Run(async () =>
+        {
+            entry.DefinitionEmbedding = await textEmbeddingGenerationService.GenerateEmbeddingAsync(entry.Definition);
+        }));
+        await Task.WhenAll(tasks);
+
+        // Upsert the glossary entries into the collection and return their keys.
+        var upsertedKeysTasks = glossaryEntries.Select(x => collection.UpsertAsync(x));
+        var upsertedKeys = await Task.WhenAll(upsertedKeysTasks);
+
+        // Search the collection using a vector search.
+        var searchString = "What is an Application Programming Interface";
+        var searchVector = await textEmbeddingGenerationService.GenerateEmbeddingAsync(searchString);
+        var searchResult = await hybridSearchCollection.HybridSearchAsync(searchVector, ["Application", "Programming", "Interface"], new() { Top = 1 });
+        var resultRecords = await searchResult.Results.ToListAsync();
+
+        Console.WriteLine("Search string: " + searchString);
+        Console.WriteLine("Result: " + resultRecords.First().Record.Definition);
+        Console.WriteLine();
+
+        // Search the collection using a vector search.
+        searchString = "What is Retrieval Augmented Generation";
+        searchVector = await textEmbeddingGenerationService.GenerateEmbeddingAsync(searchString);
+        searchResult = await hybridSearchCollection.HybridSearchAsync(searchVector, ["Retrieval", "Augmented", "Generation"], new() { Top = 1 });
+        resultRecords = await searchResult.Results.ToListAsync();
+
+        Console.WriteLine("Search string: " + searchString);
+        Console.WriteLine("Result: " + resultRecords.First().Record.Definition);
+        Console.WriteLine();
+
+        // Search the collection using a vector search with pre-filtering.
+        searchString = "What is Retrieval Augmented Generation";
+        searchVector = await textEmbeddingGenerationService.GenerateEmbeddingAsync(searchString);
+        searchResult = await hybridSearchCollection.HybridSearchAsync(searchVector, ["Retrieval", "Augmented", "Generation"], new() { Top = 3, Filter = g => g.Category == "External Definitions" });
+        resultRecords = await searchResult.Results.ToListAsync();
+
+        Console.WriteLine("Search string: " + searchString);
+        Console.WriteLine("Number of results: " + resultRecords.Count);
+        Console.WriteLine("Result 1 Score: " + resultRecords[0].Score);
+        Console.WriteLine("Result 1: " + resultRecords[0].Record.Definition);
+        Console.WriteLine("Result 2 Score: " + resultRecords[1].Score);
+        Console.WriteLine("Result 2: " + resultRecords[1].Record.Definition);
+    }
+
+    /// <summary>
+    /// Sample model class that represents a glossary entry.
+    /// </summary>
+    /// <remarks>
+    /// Note that each property is decorated with an attribute that specifies how the property should be treated by the vector store.
+    /// This allows us to create a collection in the vector store and upsert and retrieve instances of this class without any further configuration.
+    /// </remarks>
+    private sealed class Glossary
+    {
+        [VectorStoreRecordKey]
+        public string Key { get; set; }
+
+        [VectorStoreRecordData(IsFilterable = true)]
+        public string Category { get; set; }
+
+        [VectorStoreRecordData]
+        public string Term { get; set; }
+
+        [VectorStoreRecordData(IsFullTextSearchable = true)]
+        public string Definition { get; set; }
+
+        [VectorStoreRecordVector(1536)]
+        public ReadOnlyMemory<float> DefinitionEmbedding { get; set; }
+    }
+
+    /// <summary>
+    /// Create some sample glossary entries.
+    /// </summary>
+    /// <returns>A list of sample glossary entries.</returns>
+    private static IEnumerable<Glossary> CreateGlossaryEntries()
+    {
+        yield return new Glossary
+        {
+            Key = "1",
+            Category = "External Definitions",
+            Term = "API",
+            Definition = "Application Programming Interface. A set of rules and specifications that allow software components to communicate and exchange data."
+        };
+
+        yield return new Glossary
+        {
+            Key = "2",
+            Category = "Core Definitions",
+            Term = "Connectors",
+            Definition = "Connectors allow you to integrate with various services provide AI capabilities, including LLM, AudioToText, TextToAudio, Embedding generation, etc."
+        };
+
+        yield return new Glossary
+        {
+            Key = "3",
+            Category = "External Definitions",
+            Term = "RAG",
+            Definition = "Retrieval Augmented Generation - a term that refers to the process of retrieving additional data to provide as context to an LLM to use when generating a response (completion) to a user’s question (prompt)."
+        };
+    }
+}
diff --git a/dotnet/samples/Concepts/Memory/VectorStore_VectorSearch_MultiStore_AzureAISearch.cs b/dotnet/samples/Concepts/Memory/VectorStore_VectorSearch_MultiStore_AzureAISearch.cs
@@ -20,6 +20,7 @@ namespace Memory;
 /// <para><see cref="VectorStore_VectorSearch_MultiStore_Qdrant"/></para>
 /// <para><see cref="VectorStore_VectorSearch_MultiStore_Redis"/></para>
 /// <para><see cref="VectorStore_VectorSearch_MultiStore_InMemory"/></para>
+/// <para><see cref="VectorStore_VectorSearch_MultiStore_Postgres"/></para>
 ///
 /// To run this sample, you need an already existing Azure AI Search instance.
 /// To set your secrets use:
diff --git a/dotnet/samples/Concepts/Memory/VectorStore_VectorSearch_MultiStore_Common.cs b/dotnet/samples/Concepts/Memory/VectorStore_VectorSearch_MultiStore_Common.cs
@@ -15,6 +15,7 @@ namespace Memory;
 /// <para><see cref="VectorStore_VectorSearch_MultiStore_Qdrant"/></para>
 /// <para><see cref="VectorStore_VectorSearch_MultiStore_Redis"/></para>
 /// <para><see cref="VectorStore_VectorSearch_MultiStore_InMemory"/></para>
+/// <para><see cref="VectorStore_VectorSearch_MultiStore_Postgres"/></para>
 /// </summary>
 /// <param name="vectorStore">The vector store to ingest data into.</param>
 /// <param name="textEmbeddingGenerationService">The service to use for generating embeddings.</param>
diff --git a/dotnet/samples/Concepts/Memory/VectorStore_VectorSearch_MultiStore_InMemory.cs b/dotnet/samples/Concepts/Memory/VectorStore_VectorSearch_MultiStore_InMemory.cs
@@ -18,6 +18,7 @@ namespace Memory;
 /// <para><see cref="VectorStore_VectorSearch_MultiStore_AzureAISearch"/></para>
 /// <para><see cref="VectorStore_VectorSearch_MultiStore_Redis"/></para>
 /// <para><see cref="VectorStore_VectorSearch_MultiStore_Qdrant"/></para>
+/// <para><see cref="VectorStore_VectorSearch_MultiStore_Postgres"/></para>
 /// </summary>
 public class VectorStore_VectorSearch_MultiStore_InMemory(ITestOutputHelper output) : BaseTest(output)
 {
diff --git a/dotnet/samples/Concepts/Memory/VectorStore_VectorSearch_MultiStore_Postgres.cs b/dotnet/samples/Concepts/Memory/VectorStore_VectorSearch_MultiStore_Postgres.cs
@@ -18,6 +18,7 @@ namespace Memory;
 ///
 /// For other databases, see the following classes:
 /// <para><see cref="VectorStore_VectorSearch_MultiStore_AzureAISearch"/></para>
+/// <para><see cref="VectorStore_VectorSearch_MultiStore_Qdrant"/></para>
 /// <para><see cref="VectorStore_VectorSearch_MultiStore_Redis"/></para>
 /// <para><see cref="VectorStore_VectorSearch_MultiStore_InMemory"/></para>
 ///
diff --git a/dotnet/samples/Concepts/Memory/VectorStore_VectorSearch_MultiStore_Qdrant.cs b/dotnet/samples/Concepts/Memory/VectorStore_VectorSearch_MultiStore_Qdrant.cs
@@ -20,6 +20,7 @@ namespace Memory;
 /// <para><see cref="VectorStore_VectorSearch_MultiStore_AzureAISearch"/></para>
 /// <para><see cref="VectorStore_VectorSearch_MultiStore_Redis"/></para>
 /// <para><see cref="VectorStore_VectorSearch_MultiStore_InMemory"/></para>
+/// <para><see cref="VectorStore_VectorSearch_MultiStore_Postgres"/></para>
 ///
 /// To run this sample, you need a local instance of Docker running, since the associated fixture will try and start a Qdrant container in the local docker instance.
 /// </summary>
diff --git a/dotnet/samples/Concepts/Memory/VectorStore_VectorSearch_MultiStore_Redis.cs b/dotnet/samples/Concepts/Memory/VectorStore_VectorSearch_MultiStore_Redis.cs
@@ -20,6 +20,7 @@ namespace Memory;
 /// <para><see cref="VectorStore_VectorSearch_MultiStore_AzureAISearch"/></para>
 /// <para><see cref="VectorStore_VectorSearch_MultiStore_Qdrant"/></para>
 /// <para><see cref="VectorStore_VectorSearch_MultiStore_InMemory"/></para>
+/// <para><see cref="VectorStore_VectorSearch_MultiStore_Postgres"/></para>
 ///
 /// Redis supports two record storage types: Json and HashSet.
 /// Note the use of the <see cref="RedisStorageType"/> enum to specify the preferred storage type.
diff --git a/dotnet/samples/Concepts/README.md b/dotnet/samples/Concepts/README.md
@@ -138,6 +138,11 @@ dotnet test -l "console;verbosity=detailed" --filter "FullyQualifiedName=ChatCom
 - [VectorStore_DataIngestion_Simple: A simple example of how to do data ingestion into a vector store when getting started.](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Memory/VectorStore_DataIngestion_Simple.cs)
 - [VectorStore_DataIngestion_MultiStore: An example of data ingestion that uses the same code to ingest into multiple vector stores types.](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Memory/VectorStore_DataIngestion_MultiStore.cs)
 - [VectorStore_DataIngestion_CustomMapper: An example that shows how to use a custom mapper for when your data model and storage model doesn't match.](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Memory/VectorStore_DataIngestion_CustomMapper.cs)
+- [VectorStore_VectorSearch_Simple: A simple example of how to do data ingestion into a vector store and then doing a vector similarity search over the data.](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Memory/VectorStore_VectorSearch_Simple.cs)
+- [VectorStore_VectorSearch_Paging: An example showing how to do vector search with paging.](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Memory/VectorStore_VectorSearch_Paging.cs)
+- [VectorStore_VectorSearch_MultiVector: An example showing how to pick a target vector when doing vector search on a record that contains multiple vectors.](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Memory/VectorStore_VectorSearch_MultiVector.cs)
+- [VectorStore_VectorSearch_MultiStore_Common: An example showing how to write vector database agnostic code with different vector databases.](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Memory/VectorStore_VectorSearch_MultiStore_Common.cs)
+- [VectorStore_HybridSearch_Simple_AzureAISearch: An example showing how to do hybrid search using AzureAISearch.](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Memory/VectorStore_HybridSearch_Simple_AzureAISearch.cs)
 - [VectorStore_GenericDataModel_Interop: An example that shows how you can use the built-in, generic data model from Semantic Kernel to read and write to a Vector Store.](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Memory/VectorStore_GenericDataModel_Interop.cs)
 - [VectorStore_ConsumeFromMemoryStore_AzureAISearch: An example that shows how you can use the AzureAISearchVectorStore to consume data that was ingested using the AzureAISearchMemoryStore.](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Memory/VectorStore_ConsumeFromMemoryStore_AzureAISearch.cs)
 - [VectorStore_ConsumeFromMemoryStore_Qdrant: An example that shows how you can use the QdrantVectorStore to consume data that was ingested using the QdrantMemoryStore.](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Memory/VectorStore_ConsumeFromMemoryStore_Qdrant.cs)

Original file line number	Diff line number	Diff line change
`@@ -18,6 +18,7 @@ namespace Memory;`
`18`	`18`	`/// <para><see cref="VectorStore_VectorSearch_MultiStore_AzureAISearch"/></para>`
`19`	`19`	`/// <para><see cref="VectorStore_VectorSearch_MultiStore_Redis"/></para>`
`20`	`20`	`/// <para><see cref="VectorStore_VectorSearch_MultiStore_Qdrant"/></para>`
	`21`	`+/// <para><see cref="VectorStore_VectorSearch_MultiStore_Postgres"/></para>`
`21`	`22`	`/// </summary>`
`22`	`23`	`public class VectorStore_VectorSearch_MultiStore_InMemory(ITestOutputHelper output) : BaseTest(output)`
`23`	`24`	`{`