Skip to content

Commit f78b953

Browse files
speedstorm1copybara-github
authored andcommitted
feat: Adding Pinecone Vector DB option for RAG corpuses to SDK
PiperOrigin-RevId: 675328511
1 parent a6cbb74 commit f78b953

File tree

6 files changed

+97
-6
lines changed

6 files changed

+97
-6
lines changed

tests/unit/vertex_rag/test_rag_constants.py

+28
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
from vertexai.preview.rag import (
2222
EmbeddingModelConfig,
23+
Pinecone,
2324
RagCorpus,
2425
RagFile,
2526
RagResource,
@@ -69,6 +70,14 @@
6970
collection_name=TEST_WEAVIATE_COLLECTION_NAME,
7071
api_key=TEST_WEAVIATE_API_KEY_SECRET_VERSION,
7172
)
73+
TEST_PINECONE_INDEX_NAME = "test-pinecone-index"
74+
TEST_PINECONE_API_KEY_SECRET_VERSION = (
75+
"projects/test-project/secrets/test-secret/versions/1"
76+
)
77+
TEST_PINECONE_CONFIG = Pinecone(
78+
index_name=TEST_PINECONE_INDEX_NAME,
79+
api_key=TEST_PINECONE_API_KEY_SECRET_VERSION,
80+
)
7281
TEST_VERTEX_FEATURE_STORE_RESOURCE_NAME = "test-feature-view-resource-name"
7382
TEST_GAPIC_RAG_CORPUS = GapicRagCorpus(
7483
name=TEST_RAG_CORPUS_RESOURCE_NAME,
@@ -106,6 +115,19 @@
106115
),
107116
),
108117
)
118+
TEST_GAPIC_RAG_CORPUS_PINECONE = GapicRagCorpus(
119+
name=TEST_RAG_CORPUS_RESOURCE_NAME,
120+
display_name=TEST_CORPUS_DISPLAY_NAME,
121+
description=TEST_CORPUS_DISCRIPTION,
122+
rag_vector_db_config=RagVectorDbConfig(
123+
pinecone=RagVectorDbConfig.Pinecone(index_name=TEST_PINECONE_INDEX_NAME),
124+
api_auth=api_auth.ApiAuth(
125+
api_key_config=api_auth.ApiAuth.ApiKeyConfig(
126+
api_key_secret_version=TEST_PINECONE_API_KEY_SECRET_VERSION
127+
),
128+
),
129+
),
130+
)
109131
TEST_EMBEDDING_MODEL_CONFIG = EmbeddingModelConfig(
110132
publisher_model="publishers/google/models/textembedding-gecko",
111133
)
@@ -130,6 +152,12 @@
130152
description=TEST_CORPUS_DISCRIPTION,
131153
vector_db=TEST_VERTEX_FEATURE_STORE_CONFIG,
132154
)
155+
TEST_RAG_CORPUS_PINECONE = RagCorpus(
156+
name=TEST_RAG_CORPUS_RESOURCE_NAME,
157+
display_name=TEST_CORPUS_DISPLAY_NAME,
158+
description=TEST_CORPUS_DISCRIPTION,
159+
vector_db=TEST_PINECONE_CONFIG,
160+
)
133161
TEST_PAGE_TOKEN = "test-page-token"
134162

135163
# RagFiles

tests/unit/vertex_rag/test_rag_data.py

+24
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,21 @@ def create_rag_corpus_mock_vertex_feature_store():
7979
yield create_rag_corpus_mock_vertex_feature_store
8080

8181

82+
@pytest.fixture
83+
def create_rag_corpus_mock_pinecone():
84+
with mock.patch.object(
85+
VertexRagDataServiceClient,
86+
"create_rag_corpus",
87+
) as create_rag_corpus_mock_pinecone:
88+
create_rag_corpus_lro_mock = mock.Mock(ga_operation.Operation)
89+
create_rag_corpus_lro_mock.done.return_value = True
90+
create_rag_corpus_lro_mock.result.return_value = (
91+
tc.TEST_GAPIC_RAG_CORPUS_PINECONE
92+
)
93+
create_rag_corpus_mock_pinecone.return_value = create_rag_corpus_lro_mock
94+
yield create_rag_corpus_mock_pinecone
95+
96+
8297
@pytest.fixture
8398
def list_rag_corpora_pager_mock():
8499
with mock.patch.object(
@@ -242,6 +257,15 @@ def test_create_corpus_vertex_feature_store_success(self):
242257

243258
rag_corpus_eq(rag_corpus, tc.TEST_RAG_CORPUS_VERTEX_FEATURE_STORE)
244259

260+
@pytest.mark.usefixtures("create_rag_corpus_mock_pinecone")
261+
def test_create_corpus_pinecone_success(self):
262+
rag_corpus = rag.create_corpus(
263+
display_name=tc.TEST_CORPUS_DISPLAY_NAME,
264+
vector_db=tc.TEST_PINECONE_CONFIG,
265+
)
266+
267+
rag_corpus_eq(rag_corpus, tc.TEST_RAG_CORPUS_PINECONE)
268+
245269
@pytest.mark.usefixtures("rag_data_client_mock_exception")
246270
def test_create_corpus_failure(self):
247271
with pytest.raises(RuntimeError) as e:

vertexai/preview/rag/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
EmbeddingModelConfig,
4141
JiraQuery,
4242
JiraSource,
43+
Pinecone,
4344
RagCorpus,
4445
RagFile,
4546
RagResource,
@@ -54,6 +55,7 @@
5455
"EmbeddingModelConfig",
5556
"JiraQuery",
5657
"JiraSource",
58+
"Pinecone",
5759
"RagCorpus",
5860
"RagFile",
5961
"RagResource",

vertexai/preview/rag/rag_data.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
from vertexai.preview.rag.utils.resources import (
4646
EmbeddingModelConfig,
4747
JiraSource,
48+
Pinecone,
4849
RagCorpus,
4950
RagFile,
5051
SlackChannelsSource,
@@ -57,7 +58,7 @@ def create_corpus(
5758
display_name: Optional[str] = None,
5859
description: Optional[str] = None,
5960
embedding_model_config: Optional[EmbeddingModelConfig] = None,
60-
vector_db: Optional[Union[Weaviate, VertexFeatureStore]] = None,
61+
vector_db: Optional[Union[Weaviate, VertexFeatureStore, Pinecone]] = None,
6162
) -> RagCorpus:
6263
"""Creates a new RagCorpus resource.
6364

vertexai/preview/rag/utils/_gapic_utils.py

+26-4
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
)
3939
from vertexai.preview.rag.utils.resources import (
4040
EmbeddingModelConfig,
41+
Pinecone,
4142
RagCorpus,
4243
RagFile,
4344
SlackChannelsSource,
@@ -98,8 +99,8 @@ def convert_gapic_to_embedding_model_config(
9899

99100
def convert_gapic_to_vector_db(
100101
gapic_vector_db: RagVectorDbConfig,
101-
) -> Union[Weaviate, VertexFeatureStore]:
102-
"""Convert Gapic RagVectorDbConfig to Weaviate or VertexFeatureStore."""
102+
) -> Union[Weaviate, VertexFeatureStore, Pinecone]:
103+
"""Convert Gapic RagVectorDbConfig to Weaviate, VertexFeatureStore, or Pinecone."""
103104
if gapic_vector_db.__contains__("weaviate"):
104105
return Weaviate(
105106
weaviate_http_endpoint=gapic_vector_db.weaviate.http_endpoint,
@@ -110,6 +111,11 @@ def convert_gapic_to_vector_db(
110111
return VertexFeatureStore(
111112
resource_name=gapic_vector_db.vertex_feature_store.feature_view_resource_name,
112113
)
114+
elif gapic_vector_db.__contains__("pinecone"):
115+
return Pinecone(
116+
index_name=gapic_vector_db.pinecone.index_name,
117+
api_key=gapic_vector_db.api_auth.api_key_config.api_key_secret_version,
118+
)
113119
else:
114120
return None
115121

@@ -395,7 +401,7 @@ def set_embedding_model_config(
395401

396402

397403
def set_vector_db(
398-
vector_db: Union[Weaviate, VertexFeatureStore],
404+
vector_db: Union[Weaviate, VertexFeatureStore, Pinecone],
399405
rag_corpus: GapicRagCorpus,
400406
) -> None:
401407
"""Sets the vector db configuration for the rag corpus."""
@@ -423,5 +429,21 @@ def set_vector_db(
423429
feature_view_resource_name=resource_name,
424430
),
425431
)
432+
elif isinstance(vector_db, Pinecone):
433+
index_name = vector_db.index_name
434+
api_key = vector_db.api_key
435+
436+
rag_corpus.rag_vector_db_config = RagVectorDbConfig(
437+
pinecone=RagVectorDbConfig.Pinecone(
438+
index_name=index_name,
439+
),
440+
api_auth=api_auth.ApiAuth(
441+
api_key_config=api_auth.ApiAuth.ApiKeyConfig(
442+
api_key_secret_version=api_key
443+
),
444+
),
445+
)
426446
else:
427-
raise TypeError("vector_db must be a Weaviate or VertexFeatureStore.")
447+
raise TypeError(
448+
"vector_db must be a Weaviate, VertexFeatureStore, or Pinecone."
449+
)

vertexai/preview/rag/utils/resources.py

+15-1
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,20 @@ class VertexFeatureStore:
9898
resource_name: str
9999

100100

101+
@dataclasses.dataclass
102+
class Pinecone:
103+
"""Pinecone.
104+
105+
Attributes:
106+
index_name: The Pinecone index name.
107+
api_key: The SecretManager resource name for the Pinecone DB API token. Format:
108+
``projects/{project}/secrets/{secret}/versions/{version}``
109+
"""
110+
111+
index_name: str
112+
api_key: str
113+
114+
101115
@dataclasses.dataclass
102116
class RagCorpus:
103117
"""RAG corpus(output only).
@@ -115,7 +129,7 @@ class RagCorpus:
115129
display_name: Optional[str] = None
116130
description: Optional[str] = None
117131
embedding_model_config: Optional[EmbeddingModelConfig] = None
118-
vector_db: Optional[Union[Weaviate, VertexFeatureStore]] = None
132+
vector_db: Optional[Union[Weaviate, VertexFeatureStore, Pinecone]] = None
119133

120134

121135
@dataclasses.dataclass

0 commit comments

Comments
 (0)