Skip to content

Destination Pinecone: Add source_tag for attribution + unit tests #38151

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
May 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
#

import os
import uuid
from typing import Optional

Expand All @@ -25,14 +26,17 @@

MAX_IDS_PER_DELETE = 1000

AIRBYTE_TAG = "airbyte"
AIRBYTE_TEST_TAG = "airbyte_test"


class PineconeIndexer(Indexer):
config: PineconeIndexingModel

def __init__(self, config: PineconeIndexingModel, embedding_dimensions: int):
super().__init__(config)
try:
self.pc = PineconeGRPC(api_key=config.pinecone_key, threaded=True)
self.pc = PineconeGRPC(api_key=config.pinecone_key, source_tag=self.get_source_tag, threaded=True)
except PineconeException as e:
return AirbyteConnectionStatus(status=Status.FAILED, message=str(e))

Expand Down Expand Up @@ -62,6 +66,10 @@ def pre_sync(self, catalog: ConfiguredAirbyteCatalog):
def post_sync(self):
return []

def get_source_tag(self):
is_test = "PYTEST_CURRENT_TEST" in os.environ or "RUN_IN_AIRBYTE_CI" in os.environ
return AIRBYTE_TEST_TAG if is_test else AIRBYTE_TAG

def delete_vectors(self, filter, namespace=None, prefix=None):
if self._pod_type == "starter":
# Starter pod types have a maximum of 100000 rows
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import json
import logging
import os
import time

from airbyte_cdk.destinations.vector_db_based.embedder import OPEN_AI_VECTOR_SIZE
Expand Down Expand Up @@ -48,6 +49,8 @@ def tearDown(self):
else :
print("Noting to delete. No data in the index/namespace.")

def test_integration_test_flag_is_set(self):
assert "PYTEST_CURRENT_TEST" in os.environ

def test_check_valid_config(self):
outcome = DestinationPinecone().check(logging.getLogger("airbyte"), self.config)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ data:
connectorSubtype: vectorstore
connectorType: destination
definitionId: 3d2b6f84-7f0d-4e3f-a5e5-7c7d4b50eabd
dockerImageTag: 0.1.0
dockerImageTag: 0.1.1
dockerRepository: airbyte/destination-pinecone
documentationUrl: https://docs.airbyte.com/integrations/destinations/pinecone
githubIssueLabel: destination-pinecone
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"

[tool.poetry]
name = "airbyte-destination-pinecone"
version = "0.1.0"
version = "0.1.1"
description = "Airbyte destination implementation for Pinecone."
authors = ["Airbyte <[email protected]>"]
license = "MIT"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,26 @@ def test_check_with_errors(self, MockedEmbedder, MockedPineconeIndexer):
mock_embedder.check.assert_called_once()
mock_indexer.check.assert_called_once()

def test_check_with_config_errors(self):
bad_config = {
"processing": {"text_fields": ["str_col"], "metadata_fields": [], "chunk_size": 1000},
"embedding_2": {"mode": "openai", "openai_key": "mykey"},
"indexing": {
"pinecone_key": "mykey",
"pinecone_environment": "myenv",
"index": "myindex",
},
}
destination = DestinationPinecone()
result = destination.check(self.logger, bad_config)
self.assertEqual(result.status, Status.FAILED)

def test_check_with_init_indexer_errors(self):
destination = DestinationPinecone()
with patch("destination_pinecone.destination.PineconeIndexer", side_effect=Exception("Indexer Error")):
result = destination.check(self.logger, self.config)
self.assertEqual(result.status, Status.FAILED)

@patch("destination_pinecone.destination.Writer")
@patch("destination_pinecone.destination.PineconeIndexer")
@patch("destination_pinecone.destination.create_from_config")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
#

import os
from unittest.mock import ANY, MagicMock, Mock, call, patch

import pytest
Expand Down Expand Up @@ -55,6 +56,36 @@ def mock_determine_spec_type():
mock.return_value = "pod"
yield mock


def test_get_source_tag_default():
# case when no test env variables are set
os.environ.pop("PYTEST_CURRENT_TEST", None)
os.environ.pop("RUN_IN_AIRBYTE_CI", None)
indexer = create_pinecone_indexer()
assert indexer.get_source_tag() == "airbyte"


def test_get_source_tag_with_pytest():
# pytest is running by default here
indexer = create_pinecone_indexer()
assert indexer.get_source_tag() == "airbyte_test"

# pytest plus ci is running
with patch.dict("os.environ", {"RUN_IN_AIRBYTE_CI": "value does not matter"}):
assert indexer.get_source_tag() == "airbyte_test"


@patch.dict("os.environ", {"RUN_IN_AIRBYTE_CI": "Value does not matter"})
def test_get_source_tag_with_ci():
# CI and pytest is running
indexer = create_pinecone_indexer()
assert indexer.get_source_tag() == "airbyte_test"

# CI is running but pytest is not
with patch.dict("os.environ", {"PYTEST_CURRENT_TEST": "Value does not matter"}):
assert indexer.get_source_tag() == "airbyte_test"


def test_pinecone_index_upsert_and_delete(mock_describe_index):
indexer = create_pinecone_indexer()
indexer._pod_type = "p1"
Expand Down
1 change: 1 addition & 0 deletions docs/integrations/destinations/pinecone.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ OpenAI and Fake embeddings produce vectors with 1536 dimensions, and the Cohere

| Version | Date | Pull Request | Subject |
| :------ | :--------- | :-------------------------------------------------------- | :--------------------------------------------------------------------------------------------------------------------------- |
| 0.1.1 | 2023-05-14 | [#38151](https://github.com/airbytehq/airbyte/pull/38151) | Add airbyte source tag for attribution
| 0.1.0 | 2023-05-06 | [#37756](https://github.com/airbytehq/airbyte/pull/37756) | Add support for Pinecone Serverless |
| 0.0.24 | 2023-04-15 | [#37333](https://github.com/airbytehq/airbyte/pull/37333) | Update CDK & pytest version to fix security vulnerabilities. |
| 0.0.23 | 2023-03-22 | [#35911](https://github.com/airbytehq/airbyte/pull/35911) | Bump versions to latest, resolves test failures. |
Expand Down
Loading