KxSystems · bu2kx · Apr 25, 2024 · Nov 16, 2023 · Dec 4, 2023 · Feb 8, 2024
diff --git a/.dockerignore b/.dockerignore
@@ -5,6 +5,7 @@ scripts/
 tests/
 examples/
 local_server/
+assets/
 *.md
 *.pyc
 .dockerignore

diff --git a/.env.example b/.env.example
@@ -0,0 +1,106 @@
+# Core environment variables
+DATASTORE="<your_datastore>"
+BEARER_TOKEN="<your_bearer_token>"
+OPENAI_API_KEY="<your_openai_api_key>"
+EMBEDDING_DIMENSION=256 # edit this value based on the dimension of the embeddings you want to use
+EMBEDDING_MODEL="text-embedding-3-large" # edit this value based on the model you want to use e.g. text-embedding-3-small, text-embedding-ada-002
+
+# Optional environment variables for Azure OpenAI
+OPENAI_API_BASE="https://<AzureOpenAIName>.openai.azure.com/"
+OPENAI_API_TYPE="azure"
+OPENAI_EMBEDDINGMODEL_DEPLOYMENTID="<Name of embedding model deployment>"
+OPENAI_METADATA_EXTRACTIONMODEL_DEPLOYMENTID="<Name of deployment of model for metatdata>"
+OPENAI_COMPLETIONMODEL_DEPLOYMENTID="<Name of general model deployment used for completion>"
+OPENAI_EMBEDDING_BATCH_SIZE="<Batch size of embedding, for AzureOAI, this value need to be set as 1>"
+
+# Pinecone configuration
+PINECONE_API_KEY="<your_pinecone_api_key>"
+PINECONE_ENVIRONMENT="<your_pinecone_environment>"
+PINECONE_INDEX="<your_pinecone_index>"
+
+# Weaviate configuration
+WEAVIATE_URL="<your_weaviate_instance_url>"
+WEAVIATE_API_KEY="<your_api_key_for_WCS>"
+WEAVIATE_CLASS="<your_optional_weaviate_class>"
+
+# Zilliz configuration
+ZILLIZ_COLLECTION="<your_zilliz_collection>"
+ZILLIZ_URI="<your_zilliz_uri>"
+ZILLIZ_USER="<your_zilliz_username>"
+ZILLIZ_PASSWORD="<your_zilliz_password>"
+
+# Milvus configuration
+MILVUS_COLLECTION="<your_milvus_collection>"
+MILVUS_HOST="<your_milvus_host>"
+MILVUS_PORT="<your_milvus_port>"
+MILVUS_USER="<your_milvus_username>"
+MILVUS_PASSWORD="<your_milvus_password>"
+
+# Qdrant configuration
+QDRANT_URL="<your_qdrant_url>"
+QDRANT_PORT="<your_qdrant_port>"
+QDRANT_GRPC_PORT="<your_qdrant_grpc_port>"
+QDRANT_API_KEY="<your_qdrant_api_key>"
+QDRANT_COLLECTION="<your_qdrant_collection>"
+
+# AnalyticDB configuration
+PG_HOST="<your_analyticdb_host>"
+PG_PORT="<your_analyticdb_port>"
+PG_USER="<your_analyticdb_username>"
+PG_PASSWORD="<your_analyticdb_password>"
+PG_DATABASE="<your_analyticdb_database>"
+PG_COLLECTION="<your_analyticdb_collection>"
+
+# Redis configuration
+REDIS_HOST="<your_redis_host>"
+REDIS_PORT="<your_redis_port>"
+REDIS_PASSWORD="<your_redis_password>"
+REDIS_INDEX_NAME="<your_redis_index_name>"
+REDIS_DOC_PREFIX="<your_redis_doc_prefix>"
+REDIS_DISTANCE_METRIC="<your_redis_distance_metric>"
+REDIS_INDEX_TYPE="<your_redis_index_type>"
+
+# Llama configuration
+LLAMA_INDEX_TYPE="<gpt_vector_index_type>"
+LLAMA_INDEX_JSON_PATH="<path_to_saved_index_json_file>"
+LLAMA_QUERY_KWARGS_JSON_PATH="<path_to_saved_query_kwargs_json_file>"
+LLAMA_RESPONSE_MODE="<response_mode_for_query>"
+
+# Chroma configuration
+CHROMA_COLLECTION="<your_chroma_collection>"
+CHROMA_IN_MEMORY="<true_or_false>"
+CHROMA_PERSISTENCE_DIR="<your_chroma_persistence_directory>"
+CHROMA_HOST="<your_chroma_host>"
+CHROMA_PORT="<your_chroma_port>"
+
+# Azure Cognitive Search configuration
+AZURESEARCH_SERVICE="<your_search_service_name>"
+AZURESEARCH_INDEX="<your_search_index_name>"
+AZURESEARCH_API_KEY="<your_api_key>" # (optional, uses key-free managed identity if not set)
+
+# Azure CosmosDB Mongo vCore configuration
+AZCOSMOS_API="<your azure cosmos db api, for now it only supports mongo>"
+AZCOSMOS_CONNSTR="<your azure cosmos db mongo vcore connection string>"
+AZCOSMOS_DATABASE_NAME="<your mongo database name>"
+AZCOSMOS_CONTAINER_NAME="<your mongo container name>"
+
+# Supabase configuration
+SUPABASE_URL="<supabase_project_url>"
+SUPABASE_ANON_KEY="<supabase_project_api_anon_key>"
+
+# Postgres configuration
+PG_HOST="<postgres_host>"
+PG_PORT="<postgres_port>"
+PG_USER="<postgres_user>"
+PG_PASSWORD="<postgres_password>"
+PG_DB="<postgres_database>"
+
+# Elasticsearch configuration
+ELASTICSEARCH_URL="<elasticsearch_host_and_port>" # (either specify host or cloud_id)
+ELASTICSEARCH_CLOUD_ID="<elasticsearch_cloud_id>"
+ELASTICSEARCH_USERNAME="<elasticsearch_username>"
+ELASTICSEARCH_PASSWORD="<elasticsearch_password>"
+ELASTICSEARCH_API_KEY="<elasticsearch_api_key>"
+ELASTICSEARCH_INDEX="<elasticsearch_index_name>"
+ELASTICSEARCH_REPLICAS="<elasticsearch_replicas>"
+ELASTICSEARCH_SHARDS="<elasticsearch_shards>"
diff --git a/.gitignore b/.gitignore
@@ -116,6 +116,9 @@ env.bak/
 venv.bak/
 myvenv/
 
+# Exception for .env.example
+!.env.example
+
 # Spyder project settings
 .spyderproject
 .spyproject

diff --git a/.well-known/openapi.yaml b/.well-known/openapi.yaml
@@ -3,8 +3,8 @@ info:
   title: Retrieval Plugin API
   description: A retrieval API for querying and filtering documents based on natural language queries and metadata
   version: 1.0.0
-  servers:
-    - url: https://your-app-url.com
+servers:
+  - url: https://your-app-url.com
 paths:
   /query:
     post:

diff --git a/README.md b/README.md
diff --git a/assets/example.png b/assets/example.png
diff --git a/datastore/factory.py b/datastore/factory.py
@@ -36,6 +36,12 @@ async def get_datastore() -> DataStore:
             from datastore.providers.redis_datastore import RedisDataStore
 
             return await RedisDataStore.init()
+        case "azurecosmosdb":
+            from datastore.providers.azurecosmosdb_datastore import (
+                AzureCosmosDBDataStore,
+            )
+
+            return await AzureCosmosDBDataStore.create()
         case "qdrant":
             from datastore.providers.qdrant_datastore import QdrantDataStore
 

diff --git a/datastore/providers/analyticdb_datastore.py b/datastore/providers/analyticdb_datastore.py
@@ -30,7 +30,7 @@
     "host": os.environ.get("PG_HOST", "localhost"),
     "port": int(os.environ.get("PG_PORT", "5432")),
 }
-OUTPUT_DIM = 1536
+OUTPUT_DIM = int(os.environ.get("EMBEDDING_DIMENSION", 256))
 
 
 class AnalyticDBDataStore(DataStore):
-Original file line number
+Diff line change
@@ Expand Up / @@ -5,6 +5,7 @@ scripts/ @@
     tests/
     examples/
     local_server/
+    assets/
     *.md
     *.pyc
     .dockerignore
@@ Expand Down @@