Skip to content

Commit 751b7af

Browse files
midavadimlazebnyioctavia-squidington-iiikatmarkham
authored
šŸ› Source Mixpanel - Add new datatime formats for state for cohort_members stream, added obsolete state reset for cohort_members stream (#38066)
Co-authored-by: Serhii Lazebnyi <[email protected]> Co-authored-by: Octavia Squidington III <[email protected]> Co-authored-by: katmarkham <[email protected]> Co-authored-by: Serhii Lazebnyi <[email protected]>
1 parent b45b38d commit 751b7af

File tree

12 files changed

+464
-60
lines changed

12 files changed

+464
-60
lines changed

ā€Žairbyte-integrations/connectors/source-mixpanel/integration_tests/abnormal_state.json

+22-1
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,28 @@
3535
{
3636
"type": "STREAM",
3737
"stream": {
38-
"stream_state": { "last_seen": "2030-01-01T00:00:00" },
38+
"stream_state": {
39+
"states": [
40+
{
41+
"partition": {
42+
"id": 4269289,
43+
"parent_slice": {}
44+
},
45+
"cursor": {
46+
"last_seen": "2030-01-01T00:00:00"
47+
}
48+
},
49+
{
50+
"partition": {
51+
"id": 1343181,
52+
"parent_slice": {}
53+
},
54+
"cursor": {
55+
"last_seen": "2030-01-01T00:00:00"
56+
}
57+
}
58+
]
59+
},
3960
"stream_descriptor": { "name": "cohort_members" }
4061
}
4162
},

ā€Žairbyte-integrations/connectors/source-mixpanel/metadata.yaml

+17-5
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ data:
1111
connectorSubtype: api
1212
connectorType: source
1313
definitionId: 12928b32-bf0a-4f1e-964f-07e12e37153a
14-
dockerImageTag: 2.3.1
14+
dockerImageTag: 3.0.0
1515
dockerRepository: airbyte/source-mixpanel
1616
documentationUrl: https://docs.airbyte.com/integrations/sources/mixpanel
1717
githubIssueLabel: source-mixpanel
@@ -26,18 +26,30 @@ data:
2626
registries:
2727
cloud:
2828
enabled: true
29-
dockerImageTag: 2.2.0 # temporary pin due to a bug in 2.3.0 https://github.com/airbytehq/airbyte/pull/38106
3029
oss:
3130
enabled: true
32-
dockerImageTag: 2.2.0 # temporary pin due to a bug in 2.3.0 https://github.com/airbytehq/airbyte/pull/38106
3331
releaseStage: generally_available
3432
releases:
3533
breakingChanges:
34+
3.0.0:
35+
message:
36+
In this release, CohortMembers stream has changed due to changes in primary key and an improper state format.
37+
Please reset CohortMembers stream. For more information, see our migration documentation.
38+
upgradeDeadline: "2024-06-03"
3639
2.0.0:
37-
message: In this release, the default primary key for stream Export has been deleted, allowing users to select the key that best fits their data. Refreshing the source schema and resetting affected streams is necessary only if new primary keys are to be applied following the upgrade.
40+
message:
41+
In this release, the default primary key for stream Export has been
42+
deleted, allowing users to select the key that best fits their data. Refreshing
43+
the source schema and resetting affected streams is necessary only if new
44+
primary keys are to be applied following the upgrade.
3845
upgradeDeadline: "2023-11-30"
3946
1.0.0:
40-
message: In this release, the datetime field of stream engage has had its type changed from date-time to string due to inconsistent data from Mixpanel. Additionally, the primary key for stream export has been fixed to uniquely identify records. Users will need to refresh the source schema and reset affected streams after upgrading.
47+
message:
48+
In this release, the datetime field of stream engage has had its
49+
type changed from date-time to string due to inconsistent data from Mixpanel.
50+
Additionally, the primary key for stream export has been fixed to uniquely
51+
identify records. Users will need to refresh the source schema and reset
52+
affected streams after upgrading.
4153
upgradeDeadline: "2023-10-31"
4254
suggestedStreams:
4355
streams:

ā€Žairbyte-integrations/connectors/source-mixpanel/poetry.lock

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

ā€Žairbyte-integrations/connectors/source-mixpanel/pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ requires = [ "poetry-core>=1.0.0",]
33
build-backend = "poetry.core.masonry.api"
44

55
[tool.poetry]
6-
version = "2.3.1"
6+
version = "3.0.0"
77
name = "source-mixpanel"
88
description = "Source implementation for Mixpanel."
99
authors = [ "Airbyte <[email protected]>",]

ā€Žairbyte-integrations/connectors/source-mixpanel/source_mixpanel/components.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,6 @@
99
from airbyte_cdk.models import AirbyteMessage, SyncMode, Type
1010
from airbyte_cdk.sources.declarative.extractors import DpathExtractor
1111
from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
12-
from airbyte_cdk.sources.declarative.migrations.legacy_to_per_partition_state_migration import LegacyToPerPartitionStateMigration
13-
from airbyte_cdk.sources.declarative.models import DatetimeBasedCursor
1412
from airbyte_cdk.sources.declarative.partition_routers import SubstreamPartitionRouter
1513
from airbyte_cdk.sources.declarative.requesters import HttpRequester
1614
from airbyte_cdk.sources.declarative.requesters.paginators.strategies.page_increment import PageIncrement
@@ -261,6 +259,8 @@ class EngagePaginationStrategy(PageIncrement):
261259
page - incremental page number
262260
"""
263261

262+
_total = 0
263+
264264
def next_page_token(self, response, last_records: List[Mapping[str, Any]]) -> Optional[Mapping[str, Any]]:
265265
"""
266266
Determines page and subpage numbers for the `items` stream
@@ -281,6 +281,10 @@ def next_page_token(self, response, last_records: List[Mapping[str, Any]]) -> Op
281281
self._total = None
282282
return None
283283

284+
def reset(self) -> None:
285+
super().reset()
286+
self._total = 0
287+
284288

285289
class EngageJsonFileSchemaLoader(JsonFileSchemaLoader):
286290
"""Engage schema combines static and dynamic approaches"""

ā€Žairbyte-integrations/connectors/source-mixpanel/source_mixpanel/manifest.yaml

+13-4
Original file line numberDiff line numberDiff line change
@@ -129,12 +129,14 @@ definitions:
129129
record_selector:
130130
$ref: "#/definitions/selector_empty_dpath"
131131
record_filter:
132-
condition: "{{ record['created'] >= stream_state.created if stream_state.created else true }}"
132+
condition: "{{ record.created >= stream_interval.start_time }}"
133133
incremental_sync:
134134
type: DatetimeBasedCursor
135135
cursor_field: created
136136
cursor_datetime_formats:
137+
- "%Y-%m-%dT%H:%M:%S"
137138
- "%Y-%m-%d %H:%M:%S"
139+
- "%Y-%m-%dT%H:%M:%SZ"
138140
- "%Y-%m-%dT%H:%M:%S%z"
139141
datetime_format: "%Y-%m-%d %H:%M:%S"
140142
start_datetime:
@@ -173,7 +175,7 @@ definitions:
173175
record_selector:
174176
$ref: "#/definitions/selector"
175177
record_filter:
176-
condition: "{{ record['$properties']['$last_seen'] >= stream_state.last_seen if stream_state.last_seen else true }}"
178+
condition: "{{ record['$properties']['$last_seen'] >= stream_interval.start_time }}"
177179
incremental_sync:
178180
type: DatetimeBasedCursor
179181
cursor_field: last_seen
@@ -191,14 +193,17 @@ definitions:
191193
fields:
192194
- path:
193195
- browser_version
194-
value: "{{ record.browser_version | string }}"
196+
value: "{{ record.browser_version | string if record.browser_version else '' }}"
195197
schema_loader:
196198
type: CustomSchemaLoader
197199
class_name: "source_mixpanel.components.EngageJsonFileSchemaLoader"
198200
file_path: "./source_mixpanel/schemas/{{ parameters['name'] }}.json"
199201

200202
cohort_members_stream:
201203
$ref: "#/definitions/engage_stream"
204+
primary_key:
205+
- distinct_id
206+
- cohort_id
202207
$parameters:
203208
name: cohort_members
204209
path: 2.0/engage
@@ -210,6 +215,10 @@ definitions:
210215
http_method: POST
211216
paginator:
212217
$ref: "#/definitions/paginator"
218+
record_selector:
219+
$ref: "#/definitions/selector"
220+
record_filter:
221+
condition: "{{ record['$properties']['$last_seen'] >= stream_interval.start_time }}"
213222
partition_router:
214223
class_name: "source_mixpanel.components.CohortMembersSubstreamPartitionRouter"
215224
parent_stream_configs:
@@ -232,7 +241,7 @@ definitions:
232241
fields:
233242
- path:
234243
- browser_version
235-
value: "{{ record.browser_version | string }}"
244+
value: "{{ record.browser_version | string if record.browser_version else '' }}"
236245

237246
# No API docs! build based on singer source
238247
revenue_stream:

ā€Žairbyte-integrations/connectors/source-mixpanel/source_mixpanel/streams/__init__.py

-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
from .engage import EngageSchema
33
from .export import Export, ExportSchema
44

5-
65
__all__ = [
76
"IncrementalMixpanelStream",
87
"MixpanelStream",

ā€Žairbyte-integrations/connectors/source-mixpanel/unit_tests/conftest.py

+19
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
#
22
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
33
#
4+
import os
5+
from pathlib import Path
46

57
import pendulum
68
import pytest
@@ -37,3 +39,20 @@ def config_raw(config):
3739
@pytest.fixture(autouse=True)
3840
def patch_time(mocker):
3941
mocker.patch("time.sleep")
42+
43+
44+
ENV_REQUEST_CACHE_PATH = "REQUEST_CACHE_PATH"
45+
os.environ["REQUEST_CACHE_PATH"] = ENV_REQUEST_CACHE_PATH
46+
47+
def delete_cache_files(cache_directory):
48+
directory_path = Path(cache_directory)
49+
if directory_path.exists() and directory_path.is_dir():
50+
for file_path in directory_path.glob("*.sqlite"):
51+
file_path.unlink()
52+
53+
@pytest.fixture(autouse=True)
54+
def clear_cache_before_each_test():
55+
# The problem: Once the first request is cached, we will keep getting the cached result no matter what setup we prepared for a particular test.
56+
# Solution: We must delete the cache before each test because for the same URL, we want to define multiple responses and status codes.
57+
delete_cache_files(os.getenv(ENV_REQUEST_CACHE_PATH))
58+
yield

ā€Žairbyte-integrations/connectors/source-mixpanel/unit_tests/test_source.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -142,9 +142,9 @@ def test_streams_string_date(requests_mock, config_raw):
142142
),
143143
)
144144
def test_config_validation(config, success, expected_error_message, requests_mock):
145-
requests_mock.get("https://mixpanel.com/api/2.0/cohorts/list", status_code=200, json=[{'a': 1}])
146-
requests_mock.get("https://mixpanel.com/api/2.0/cohorts/list", status_code=200, json=[{'a': 1}])
147-
requests_mock.get("https://eu.mixpanel.com/api/2.0/cohorts/list", status_code=200, json=[{'a': 1}])
145+
requests_mock.get("https://mixpanel.com/api/2.0/cohorts/list", status_code=200, json=[{'a': 1, 'created':'2021-02-11T00:00:00Z'}])
146+
requests_mock.get("https://mixpanel.com/api/2.0/cohorts/list", status_code=200, json=[{'a': 1, 'created':'2021-02-11T00:00:00Z'}])
147+
requests_mock.get("https://eu.mixpanel.com/api/2.0/cohorts/list", status_code=200, json=[{'a': 1, 'created':'2021-02-11T00:00:00Z'}])
148148
try:
149149
is_success, message = SourceMixpanel().check_connection(None, config)
150150
except AirbyteTracedException as e:

0 commit comments

Comments
Ā (0)