Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Python: restructed data folder and multiple improvements to vector stores #11302

Merged
merged 3 commits into from
Apr 1, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 10 additions & 13 deletions python/samples/concepts/caching/semantic_caching.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,25 +7,22 @@
from typing import Annotated
from uuid import uuid4

from semantic_kernel import Kernel
from semantic_kernel.connectors.ai.embedding_generator_base import EmbeddingGeneratorBase
from semantic_kernel.connectors.ai.open_ai.services.open_ai_chat_completion import OpenAIChatCompletion
from semantic_kernel.connectors.ai.open_ai.services.open_ai_text_embedding import OpenAITextEmbedding
from semantic_kernel.connectors.ai.open_ai import OpenAIChatCompletion, OpenAITextEmbedding
from semantic_kernel.connectors.memory.in_memory.in_memory_store import InMemoryVectorStore
from semantic_kernel.data.record_definition import vectorstoremodel
from semantic_kernel.data.record_definition.vector_store_record_fields import (
from semantic_kernel.data import (
VectorizedSearchMixin,
VectorSearchOptions,
VectorStore,
VectorStoreRecordCollection,
VectorStoreRecordDataField,
VectorStoreRecordKeyField,
VectorStoreRecordVectorField,
vectorstoremodel,
)
from semantic_kernel.data.vector_search.vector_search_options import VectorSearchOptions
from semantic_kernel.data.vector_search.vectorized_search import VectorizedSearchMixin
from semantic_kernel.data.vector_storage.vector_store import VectorStore
from semantic_kernel.data.vector_storage.vector_store_record_collection import VectorStoreRecordCollection
from semantic_kernel.filters.filter_types import FilterTypes
from semantic_kernel.filters.functions.function_invocation_context import FunctionInvocationContext
from semantic_kernel.filters.prompts.prompt_render_context import PromptRenderContext
from semantic_kernel.functions.function_result import FunctionResult
from semantic_kernel.kernel import Kernel
from semantic_kernel.filters import FilterTypes, FunctionInvocationContext, PromptRenderContext
from semantic_kernel.functions import FunctionResult

COLLECTION_NAME = "llm_responses"
RECORD_ID_KEY = "cache_record_id"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,18 @@

from samples.concepts.setup.chat_completion_services import Services, get_chat_completion_service_and_request_settings
from semantic_kernel import Kernel
from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior
from semantic_kernel.connectors.ai import FunctionChoiceBehavior
from semantic_kernel.connectors.memory.azure_cosmos_db.azure_cosmos_db_no_sql_store import AzureCosmosDBNoSQLStore
from semantic_kernel.contents import ChatHistory
from semantic_kernel.contents.chat_message_content import ChatMessageContent
from semantic_kernel.contents import ChatHistory, ChatMessageContent
from semantic_kernel.core_plugins.math_plugin import MathPlugin
from semantic_kernel.core_plugins.time_plugin import TimePlugin
from semantic_kernel.data.record_definition.vector_store_model_decorator import vectorstoremodel
from semantic_kernel.data.record_definition.vector_store_record_fields import (
from semantic_kernel.data import (
VectorStore,
VectorStoreRecordCollection,
VectorStoreRecordDataField,
VectorStoreRecordKeyField,
vectorstoremodel,
)
from semantic_kernel.data.vector_storage.vector_store import VectorStore
from semantic_kernel.data.vector_storage.vector_store_record_collection import VectorStoreRecordCollection

"""
This sample demonstrates how to build a conversational chatbot
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@

import asyncio

from step_0_data_model import HotelSampleClass

from semantic_kernel import Kernel
from semantic_kernel.connectors.ai.open_ai import OpenAITextEmbedding
from semantic_kernel.connectors.memory.azure_ai_search import AzureAISearchCollection

###
# The data model used for this sample is based on the hotel data model from the Azure AI Search samples.
# When deploying a new index in Azure AI Search using the import wizard you can choose to deploy the 'hotel-samples'
Expand All @@ -13,24 +19,19 @@
# This sample assumes the index is deployed, the vector fields can be empty.
# If the vector fields are empty, change the first_run parameter to True to add the vectors.
###
from step_0_data_model import HotelSampleClass

from semantic_kernel import Kernel
from semantic_kernel.connectors.ai.open_ai import OpenAITextEmbedding
from semantic_kernel.connectors.memory.azure_ai_search import AzureAISearchCollection
from semantic_kernel.data import (
VectorSearchOptions,
VectorStoreRecordUtils,
)
from semantic_kernel.data.vector_search import add_vector_to_records

first_run = False

# Note: you may need to update this `collection_name` depending upon how your index is named.
COLLECTION_NAME = "hotels-sample-index"


async def add_vectors(collection: AzureAISearchCollection, vectorizer: VectorStoreRecordUtils):
"""This is a simple function that uses the VectorStoreRecordUtils to add vectors to the records in the collection.
async def add_vectors(collection: AzureAISearchCollection, kernel: Kernel):
"""This is a simple function that uses the add_vector_to_records function to add vectors.

It first uses the search_client within the collection to get a list of ids.
and then uses the upsert to add the vectors to the records.
Expand All @@ -42,7 +43,7 @@ async def add_vectors(collection: AzureAISearchCollection, vectorizer: VectorSto
if hotels is not None and isinstance(hotels, list):
for hotel in hotels:
if not hotel.description_vector or not hotel.description_fr_vector:
hotel = await vectorizer.add_vector_to_records(hotel, HotelSampleClass)
hotel = await add_vector_to_records(kernel, hotel, HotelSampleClass)
await collection.upsert(hotel)


Expand All @@ -52,10 +53,8 @@ async def main(query: str, first_run: bool = False):
# Add the OpenAI text embedding service
embeddings = OpenAITextEmbedding(service_id="embedding", ai_model_id="text-embedding-3-small")
kernel.add_service(embeddings)
# Create the VectorStoreRecordUtils object
vectorizer = VectorStoreRecordUtils(kernel)
# Create the Azure AI Search collection
collection = AzureAISearchCollection[HotelSampleClass](
collection = AzureAISearchCollection[str, HotelSampleClass](
collection_name=COLLECTION_NAME, data_model_type=HotelSampleClass
)
# Check if the collection exists.
Expand All @@ -71,7 +70,7 @@ async def main(query: str, first_run: bool = False):

# If it is the first run and there are no vectors, add them.
if first_run:
await add_vectors(collection, vectorizer)
await add_vectors(collection, kernel)

# Search using just text, by default this will search all the searchable text fields in the index.
results = await collection.text_search(search_text=query)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,21 @@


import asyncio
from collections.abc import Coroutine
from collections.abc import Awaitable, Callable
from typing import Any

from step_0_data_model import HotelSampleClass

from semantic_kernel import Kernel
from semantic_kernel.connectors.ai import FunctionChoiceBehavior
from semantic_kernel.connectors.ai.open_ai import (
OpenAIChatCompletion,
OpenAIChatPromptExecutionSettings,
OpenAITextEmbedding,
)
from semantic_kernel.connectors.memory.azure_ai_search import AzureAISearchCollection
from semantic_kernel.contents import ChatHistory

###
# The data model used for this sample is based on the hotel data model from the Azure AI Search samples.
# When deploying a new index in Azure AI Search using the import wizard you can choose to deploy the 'hotel-samples'
Expand All @@ -16,26 +28,12 @@
# This sample assumes the index is deployed, and the vectors have been filled.
# Use the step_1_interact_with_the_collection.py sample, with `first_run = True` to fill the vectors.
###
from step_0_data_model import HotelSampleClass

from semantic_kernel import Kernel
from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior
from semantic_kernel.connectors.ai.open_ai import (
OpenAIChatCompletion,
OpenAIChatPromptExecutionSettings,
OpenAITextEmbedding,
)
from semantic_kernel.connectors.memory.azure_ai_search import AzureAISearchCollection
from semantic_kernel.contents import ChatHistory
from semantic_kernel.data import (
VectorSearchFilter,
VectorSearchOptions,
VectorStoreRecordUtils,
)
from semantic_kernel.data.search_options import SearchOptions
from semantic_kernel.data.text_search.vector_store_text_search import VectorStoreTextSearch
from semantic_kernel.filters.filter_types import FilterTypes
from semantic_kernel.filters.functions.function_invocation_context import FunctionInvocationContext
from semantic_kernel.data.text_search import SearchOptions
from semantic_kernel.filters import FilterTypes, FunctionInvocationContext
from semantic_kernel.functions import (
KernelArguments,
KernelParameterMetadata,
Expand All @@ -50,15 +48,15 @@
kernel.add_service(OpenAIChatCompletion(service_id=service_id))
embeddings = OpenAITextEmbedding(service_id="embedding", ai_model_id="text-embedding-3-small")
kernel.add_service(embeddings)
vectorizer = VectorStoreRecordUtils(kernel)

# Create a Text Search object, with a Azure AI Search collection.
# using the `from_vector_text_search` method means that this plugin will only use text search.
# You can also choose to use the `from_vectorized_search` method to use vector search.
# Or the `from_vectorizable_text_search` method if the collection is setup to vectorize incoming texts.
text_search = VectorStoreTextSearch.from_vector_text_search(
AzureAISearchCollection[HotelSampleClass](collection_name=COLLECTION_NAME, data_model_type=HotelSampleClass)
collection = AzureAISearchCollection[str, HotelSampleClass](
collection_name=COLLECTION_NAME, data_model_type=HotelSampleClass
)
text_search = collection.create_text_search_from_vector_text_search()


# Before we create the plugin, we want to create a function that will help the plugin work the way we want it to.
Expand Down Expand Up @@ -195,7 +193,9 @@ def update_options_search(
# This allows us to see what parameters are being passed to the plugin.
# And this gives us a way to debug the search experience and if necessary tweak the parameters and descriptions.
@kernel.filter(filter_type=FilterTypes.FUNCTION_INVOCATION)
async def log_search_filter(context: FunctionInvocationContext, next: Coroutine[FunctionInvocationContext, Any, None]):
async def log_search_filter(
context: FunctionInvocationContext, next: Callable[[FunctionInvocationContext], Awaitable[None]]
):
if context.function.plugin_name == "azure_ai_search":
print(f"Calling Azure AI Search ({context.function.name}) with arguments:")
for arg in context.arguments:
Expand Down
12 changes: 4 additions & 8 deletions python/samples/concepts/memory/complex_memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,6 @@
from semantic_kernel.connectors.memory.redis import RedisHashsetCollection, RedisJsonCollection
from semantic_kernel.connectors.memory.weaviate import WeaviateCollection
from semantic_kernel.data import (
DISTANCE_FUNCTION_DIRECTION_HELPER,
DistanceFunction,
IndexKind,
VectorizableTextSearchMixin,
VectorizedSearchMixin,
VectorSearchFilter,
Expand All @@ -45,7 +42,8 @@
VectorTextSearchMixin,
vectorstoremodel,
)
from semantic_kernel.data.record_definition.vector_store_record_utils import VectorStoreRecordUtils
from semantic_kernel.data.const import DISTANCE_FUNCTION_DIRECTION_HELPER, DistanceFunction, IndexKind
from semantic_kernel.data.vector_search import add_vector_to_records

# This is a rather complex sample, showing how to use the vector store
# with a number of different collections.
Expand Down Expand Up @@ -254,14 +252,12 @@ async def main(collection: str, use_azure_openai: bool):
)

print_with_color("Adding records!", Colors.CBLUE)
records = await VectorStoreRecordUtils(kernel).add_vector_to_records(
[record1, record2, record3], data_model_type=DataModel
)
records = await add_vector_to_records(kernel, [record1, record2, record3], data_model_type=DataModel)
records = [record1, record2, record3]
keys = await record_collection.upsert_batch(records)
print(f" Upserted {keys=}")
print_with_color("Getting records!", Colors.CBLUE)
results = await record_collection.get_batch([record1.id, record2.id, record3.id])
results = await record_collection.get([record1.id, record2.id, record3.id])
if results:
[print_record(record=result) for result in results]
else:
Expand Down
21 changes: 3 additions & 18 deletions python/samples/concepts/memory/data_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,17 +63,6 @@ class DataModelPydantic(BaseModel):
other: str | None = None


# Data model using Pydantic BaseModels with mixed annotations (from pydantic and SK)
@vectorstoremodel
class DataModelPydanticComplex(BaseModel):
vector: Annotated[list[float], VectorStoreRecordVectorField]
key: Annotated[str, Field(default_factory=lambda: str(uuid4())), VectorStoreRecordKeyField()]
content: Annotated[str, VectorStoreRecordDataField(has_embedding=True, embedding_property_name="vector")] = (
"content1"
)
other: str | None = None


# Data model using Python classes
# This one includes a custom serialize and deserialize method
@vectorstoremodel
Expand Down Expand Up @@ -133,25 +122,21 @@ def deserialize(cls, obj: dict[str, Any]) -> "DataModelDataclass":
if __name__ == "__main__":
data_item1 = DataModelDataclass(content="Hello, world!", vector=[1.0, 2.0, 3.0], other=None)
data_item2 = DataModelPydantic(content="Hello, world!", vector=[1.0, 2.0, 3.0], other=None)
data_item3 = DataModelPydanticComplex(content="Hello, world!", vector=[1.0, 2.0, 3.0], other=None)
data_item4 = DataModelPython(content="Hello, world!", vector=[1.0, 2.0, 3.0], other=None)
data_item3 = DataModelPython(content="Hello, world!", vector=[1.0, 2.0, 3.0], other=None)
print("Example records:")
print(f"DataClass:\n {data_item1}", end="\n\n")
print(f"Pydantic:\n {data_item2}", end="\n\n")
print(f"Pydantic with annotations:\n {data_item3}", end="\n\n")
print(f"Python:\n {data_item4}", end="\n\n")
print(f"Python:\n {data_item3}", end="\n\n")

print("Item definitions:")
print(f"DataClass:\n {data_item1.__kernel_vectorstoremodel_definition__}", end="\n\n")
print(f"Pydantic:\n {data_item2.__kernel_vectorstoremodel_definition__}", end="\n\n")
print(f"Pydantic with annotations:\n {data_item3.__kernel_vectorstoremodel_definition__}", end="\n\n")
print(f"Python:\n {data_item4.__kernel_vectorstoremodel_definition__}", end="\n\n")
print(f"Python:\n {data_item3.__kernel_vectorstoremodel_definition__}", end="\n\n")
print(f"Definition for use with Pandas:\n {data_model_definition_pandas}", end="\n\n")
if (
data_item1.__kernel_vectorstoremodel_definition__.fields
== data_item2.__kernel_vectorstoremodel_definition__.fields
== data_item3.__kernel_vectorstoremodel_definition__.fields
== data_item4.__kernel_vectorstoremodel_definition__.fields
== data_model_definition_pandas.fields
):
print("All data models are the same")
Expand Down
4 changes: 2 additions & 2 deletions python/samples/concepts/memory/memory_with_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@
VectorStoreRecordDataField,
VectorStoreRecordDefinition,
VectorStoreRecordKeyField,
VectorStoreRecordUtils,
VectorStoreRecordVectorField,
)
from semantic_kernel.data.vector_search import add_vector_to_records

model_fields = VectorStoreRecordDefinition(
container_mode=True,
Expand Down Expand Up @@ -51,7 +51,7 @@ async def main():

# create the dataframe and add the embeddings
df = pd.DataFrame(records)
df = await VectorStoreRecordUtils(kernel).add_vector_to_records(df, None, data_model_definition=model_fields)
df = await add_vector_to_records(kernel, df, None, data_model_definition=model_fields)
print("Records with embeddings:")
print(df.shape)
print(df.head(5))
Expand Down
10 changes: 3 additions & 7 deletions python/samples/concepts/memory/simple_memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,15 @@
)
from semantic_kernel.connectors.memory.in_memory import InMemoryVectorCollection
from semantic_kernel.data import (
DISTANCE_FUNCTION_DIRECTION_HELPER,
DistanceFunction,
IndexKind,
VectorSearchFilter,
VectorSearchOptions,
VectorStoreRecordDataField,
VectorStoreRecordKeyField,
VectorStoreRecordUtils,
VectorStoreRecordVectorField,
vectorstoremodel,
)
from semantic_kernel.data.const import DISTANCE_FUNCTION_DIRECTION_HELPER, DistanceFunction, IndexKind
from semantic_kernel.data.vector_search import add_vector_to_records

# This is the most basic example of a vector store and collection
# For a more complex example, using different collection types, see "complex_memory.py"
Expand Down Expand Up @@ -115,9 +113,7 @@ async def main():

# First add vectors to the records
print_with_color("Adding records!", Colors.CBLUE)
records_with_embedding = await VectorStoreRecordUtils(kernel).add_vector_to_records(
records, data_model_type=DataModel
)
records_with_embedding = await add_vector_to_records(kernel, records, data_model_type=DataModel)
# Next upsert them to the store.
keys = await record_collection.upsert_batch(records_with_embedding)
print(f" Upserted {keys=}")
Expand Down
5 changes: 1 addition & 4 deletions python/samples/concepts/memory/utils.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,9 @@
# Copyright (c) Microsoft. All rights reserved.


from typing import TypeVar

from samples.concepts.resources.utils import Colors, print_with_color
from semantic_kernel.data import (
VectorSearchResult,
)
from semantic_kernel.data import VectorSearchResult

_T = TypeVar("_T")

Expand Down
Loading
Loading