Skip to content

Commit 1c3a6c4

Browse files
Destination Pinecone: Add source_tag for attribution + unit tests (#38151)
Co-authored-by: Aaron ("AJ") Steers <[email protected]>
1 parent bc83bee commit 1c3a6c4

File tree

7 files changed

+66
-3
lines changed

7 files changed

+66
-3
lines changed

airbyte-integrations/connectors/destination-pinecone/destination_pinecone/indexer.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
33
#
44

5+
import os
56
import uuid
67
from typing import Optional
78

@@ -25,14 +26,17 @@
2526

2627
MAX_IDS_PER_DELETE = 1000
2728

29+
AIRBYTE_TAG = "airbyte"
30+
AIRBYTE_TEST_TAG = "airbyte_test"
31+
2832

2933
class PineconeIndexer(Indexer):
3034
config: PineconeIndexingModel
3135

3236
def __init__(self, config: PineconeIndexingModel, embedding_dimensions: int):
3337
super().__init__(config)
3438
try:
35-
self.pc = PineconeGRPC(api_key=config.pinecone_key, threaded=True)
39+
self.pc = PineconeGRPC(api_key=config.pinecone_key, source_tag=self.get_source_tag, threaded=True)
3640
except PineconeException as e:
3741
return AirbyteConnectionStatus(status=Status.FAILED, message=str(e))
3842

@@ -62,6 +66,10 @@ def pre_sync(self, catalog: ConfiguredAirbyteCatalog):
6266
def post_sync(self):
6367
return []
6468

69+
def get_source_tag(self):
70+
is_test = "PYTEST_CURRENT_TEST" in os.environ or "RUN_IN_AIRBYTE_CI" in os.environ
71+
return AIRBYTE_TEST_TAG if is_test else AIRBYTE_TAG
72+
6573
def delete_vectors(self, filter, namespace=None, prefix=None):
6674
if self._pod_type == "starter":
6775
# Starter pod types have a maximum of 100000 rows

airbyte-integrations/connectors/destination-pinecone/integration_tests/pinecone_integration_test.py

+3
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
import json
66
import logging
7+
import os
78
import time
89

910
from airbyte_cdk.destinations.vector_db_based.embedder import OPEN_AI_VECTOR_SIZE
@@ -48,6 +49,8 @@ def tearDown(self):
4849
else :
4950
print("Noting to delete. No data in the index/namespace.")
5051

52+
def test_integration_test_flag_is_set(self):
53+
assert "PYTEST_CURRENT_TEST" in os.environ
5154

5255
def test_check_valid_config(self):
5356
outcome = DestinationPinecone().check(logging.getLogger("airbyte"), self.config)

airbyte-integrations/connectors/destination-pinecone/metadata.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ data:
1313
connectorSubtype: vectorstore
1414
connectorType: destination
1515
definitionId: 3d2b6f84-7f0d-4e3f-a5e5-7c7d4b50eabd
16-
dockerImageTag: 0.1.0
16+
dockerImageTag: 0.1.1
1717
dockerRepository: airbyte/destination-pinecone
1818
documentationUrl: https://docs.airbyte.com/integrations/destinations/pinecone
1919
githubIssueLabel: destination-pinecone

airbyte-integrations/connectors/destination-pinecone/pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
44

55
[tool.poetry]
66
name = "airbyte-destination-pinecone"
7-
version = "0.1.0"
7+
version = "0.1.1"
88
description = "Airbyte destination implementation for Pinecone."
99
authors = ["Airbyte <[email protected]>"]
1010
license = "MIT"

airbyte-integrations/connectors/destination-pinecone/unit_tests/destination_test.py

+20
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,26 @@ def test_check_with_errors(self, MockedEmbedder, MockedPineconeIndexer):
6666
mock_embedder.check.assert_called_once()
6767
mock_indexer.check.assert_called_once()
6868

69+
def test_check_with_config_errors(self):
70+
bad_config = {
71+
"processing": {"text_fields": ["str_col"], "metadata_fields": [], "chunk_size": 1000},
72+
"embedding_2": {"mode": "openai", "openai_key": "mykey"},
73+
"indexing": {
74+
"pinecone_key": "mykey",
75+
"pinecone_environment": "myenv",
76+
"index": "myindex",
77+
},
78+
}
79+
destination = DestinationPinecone()
80+
result = destination.check(self.logger, bad_config)
81+
self.assertEqual(result.status, Status.FAILED)
82+
83+
def test_check_with_init_indexer_errors(self):
84+
destination = DestinationPinecone()
85+
with patch("destination_pinecone.destination.PineconeIndexer", side_effect=Exception("Indexer Error")):
86+
result = destination.check(self.logger, self.config)
87+
self.assertEqual(result.status, Status.FAILED)
88+
6989
@patch("destination_pinecone.destination.Writer")
7090
@patch("destination_pinecone.destination.PineconeIndexer")
7191
@patch("destination_pinecone.destination.create_from_config")

airbyte-integrations/connectors/destination-pinecone/unit_tests/pinecone_indexer_test.py

+31
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
33
#
44

5+
import os
56
from unittest.mock import ANY, MagicMock, Mock, call, patch
67

78
import pytest
@@ -55,6 +56,36 @@ def mock_determine_spec_type():
5556
mock.return_value = "pod"
5657
yield mock
5758

59+
60+
def test_get_source_tag_default():
61+
# case when no test env variables are set
62+
os.environ.pop("PYTEST_CURRENT_TEST", None)
63+
os.environ.pop("RUN_IN_AIRBYTE_CI", None)
64+
indexer = create_pinecone_indexer()
65+
assert indexer.get_source_tag() == "airbyte"
66+
67+
68+
def test_get_source_tag_with_pytest():
69+
# pytest is running by default here
70+
indexer = create_pinecone_indexer()
71+
assert indexer.get_source_tag() == "airbyte_test"
72+
73+
# pytest plus ci is running
74+
with patch.dict("os.environ", {"RUN_IN_AIRBYTE_CI": "value does not matter"}):
75+
assert indexer.get_source_tag() == "airbyte_test"
76+
77+
78+
@patch.dict("os.environ", {"RUN_IN_AIRBYTE_CI": "Value does not matter"})
79+
def test_get_source_tag_with_ci():
80+
# CI and pytest is running
81+
indexer = create_pinecone_indexer()
82+
assert indexer.get_source_tag() == "airbyte_test"
83+
84+
# CI is running but pytest is not
85+
with patch.dict("os.environ", {"PYTEST_CURRENT_TEST": "Value does not matter"}):
86+
assert indexer.get_source_tag() == "airbyte_test"
87+
88+
5889
def test_pinecone_index_upsert_and_delete(mock_describe_index):
5990
indexer = create_pinecone_indexer()
6091
indexer._pod_type = "p1"

docs/integrations/destinations/pinecone.md

+1
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ OpenAI and Fake embeddings produce vectors with 1536 dimensions, and the Cohere
7676

7777
| Version | Date | Pull Request | Subject |
7878
| :------ | :--------- | :-------------------------------------------------------- | :--------------------------------------------------------------------------------------------------------------------------- |
79+
| 0.1.1 | 2023-05-14 | [#38151](https://github.com/airbytehq/airbyte/pull/38151) | Add airbyte source tag for attribution
7980
| 0.1.0 | 2023-05-06 | [#37756](https://github.com/airbytehq/airbyte/pull/37756) | Add support for Pinecone Serverless |
8081
| 0.0.24 | 2023-04-15 | [#37333](https://github.com/airbytehq/airbyte/pull/37333) | Update CDK & pytest version to fix security vulnerabilities. |
8182
| 0.0.23 | 2023-03-22 | [#35911](https://github.com/airbytehq/airbyte/pull/35911) | Bump versions to latest, resolves test failures. |

0 commit comments

Comments
 (0)