Skip to content

Commit 8d9a3aa

Browse files
drrestlazebnyioctavia-squidington-iii
authored
🎉 Source Okta: added parameter 'start_date' (#15050)
* Added parameter 'start_date' in Okta source added: parameter 'start_date' to source Okta changed: unit tests * changes: fix in the case of ISSUE: #14196 * Okta documentation in new format * changes: fix to use super() instead of instance of stream parent * changes: additional changes into OKTA documentaton * changes: switch release to beta * changed: set dockerImageTag -> 0.1.11 * changed: source_specs * ... * ... * Rollback releaseStage * Refactored start date logic * Deleted microseconds from state * Add start date to all streams * Updated to linter * Fixed unit tests * Updated unit tests * auto-bump connector version [ci skip] Co-authored-by: Serhii <[email protected]> Co-authored-by: Octavia Squidington III <[email protected]>
1 parent 8282a45 commit 8d9a3aa

File tree

13 files changed

+431
-269
lines changed

13 files changed

+431
-269
lines changed

airbyte-config/init/src/main/resources/seed/source_definitions.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -628,7 +628,7 @@
628628
- name: Okta
629629
sourceDefinitionId: 1d4fdb25-64fc-4569-92da-fcdca79a8372
630630
dockerRepository: airbyte/source-okta
631-
dockerImageTag: 0.1.11
631+
dockerImageTag: 0.1.12
632632
documentationUrl: https://docs.airbyte.io/integrations/sources/okta
633633
icon: okta.svg
634634
sourceType: api

airbyte-config/init/src/main/resources/seed/source_specs.yaml

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6031,7 +6031,7 @@
60316031
- - "client_secret"
60326032
oauthFlowOutputParameters:
60336033
- - "access_token"
6034-
- dockerImage: "airbyte/source-okta:0.1.11"
6034+
- dockerImage: "airbyte/source-okta:0.1.12"
60356035
spec:
60366036
documentationUrl: "https://docs.airbyte.io/integrations/sources/okta"
60376037
connectionSpecification:
@@ -6047,6 +6047,14 @@
60476047
description: "The Okta domain. See the <a href=\"https://docs.airbyte.io/integrations/sources/okta\"\
60486048
>docs</a> for instructions on how to find it."
60496049
airbyte_secret: false
6050+
start_date:
6051+
type: "string"
6052+
pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$"
6053+
description: "UTC date and time in the format YYYY-MM-DDTHH:MM:SSZ. Any\
6054+
\ data before this date will not be replicated."
6055+
examples:
6056+
- "2022-07-22T00:00:00Z"
6057+
title: "Start Date"
60506058
credentials:
60516059
title: "Authorization Method *"
60526060
type: "object"
@@ -6107,15 +6115,15 @@
61076115
oauth_config_specification:
61086116
oauth_user_input_from_connector_config_specification:
61096117
type: "object"
6110-
additionalProperties: false
6118+
additionalProperties: true
61116119
properties:
61126120
domain:
61136121
type: "string"
61146122
path_in_connector_config:
61156123
- "domain"
61166124
complete_oauth_output_specification:
61176125
type: "object"
6118-
additionalProperties: false
6126+
additionalProperties: true
61196127
properties:
61206128
refresh_token:
61216129
type: "string"
@@ -6124,15 +6132,15 @@
61246132
- "refresh_token"
61256133
complete_oauth_server_input_specification:
61266134
type: "object"
6127-
additionalProperties: false
6135+
additionalProperties: true
61286136
properties:
61296137
client_id:
61306138
type: "string"
61316139
client_secret:
61326140
type: "string"
61336141
complete_oauth_server_output_specification:
61346142
type: "object"
6135-
additionalProperties: false
6143+
additionalProperties: true
61366144
properties:
61376145
client_id:
61386146
type: "string"

airbyte-integrations/connectors/source-okta/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,5 +12,5 @@ RUN pip install .
1212
ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py"
1313
ENTRYPOINT ["python", "/airbyte/integration_code/main.py"]
1414

15-
LABEL io.airbyte.version=0.1.11
15+
LABEL io.airbyte.version=0.1.12
1616
LABEL io.airbyte.name=airbyte/source-okta

airbyte-integrations/connectors/source-okta/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ Make sure to familiarize yourself with [pytest test discovery](https://docs.pyte
9797
First install test dependencies into your virtual environment:
9898

9999
```shell
100-
pip install .[tests]
100+
pip install .'[tests]'
101101
```
102102

103103
### Unit Tests
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
#
2+
# Copyright (c) 2022 Airbyte, Inc., all rights reserved.
3+
#
4+
5+
from typing import Any, Mapping, Tuple
6+
7+
import requests
8+
from airbyte_cdk.sources.streams.http.auth import Oauth2Authenticator
9+
10+
11+
class OktaOauth2Authenticator(Oauth2Authenticator):
12+
def get_refresh_request_body(self) -> Mapping[str, Any]:
13+
return {
14+
"grant_type": "refresh_token",
15+
"refresh_token": self.refresh_token,
16+
}
17+
18+
def refresh_access_token(self) -> Tuple[str, int]:
19+
try:
20+
response = requests.request(
21+
method="POST",
22+
url=self.token_refresh_endpoint,
23+
data=self.get_refresh_request_body(),
24+
auth=(self.client_id, self.client_secret),
25+
)
26+
response.raise_for_status()
27+
response_json = response.json()
28+
return response_json["access_token"], response_json["expires_in"]
29+
except Exception as e:
30+
raise Exception(f"Error while refreshing access token: {e}") from e

airbyte-integrations/connectors/source-okta/source_okta/source.py

Lines changed: 64 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -13,16 +13,18 @@
1313
from airbyte_cdk.sources import AbstractSource
1414
from airbyte_cdk.sources.streams import Stream
1515
from airbyte_cdk.sources.streams.http import HttpStream
16-
from airbyte_cdk.sources.streams.http.auth import Oauth2Authenticator, TokenAuthenticator
16+
17+
from .utils import datetime_to_string, delete_milliseconds, get_api_endpoint, get_start_date, initialize_authenticator
1718

1819

1920
class OktaStream(HttpStream, ABC):
2021
page_size = 200
2122

22-
def __init__(self, url_base: str, *args, **kwargs):
23+
def __init__(self, url_base: str, start_date: pendulum.datetime, *args, **kwargs):
2324
super().__init__(*args, **kwargs)
2425
# Inject custom url base to the stream
2526
self._url_base = url_base.rstrip("/") + "/"
27+
self.start_date = start_date
2628

2729
@property
2830
def url_base(self) -> str:
@@ -97,11 +99,10 @@ def request_params(
9799
stream_slice: Mapping[str, any] = None,
98100
next_page_token: Mapping[str, Any] = None,
99101
) -> MutableMapping[str, Any]:
100-
stream_state = stream_state or {}
101102
params = super().request_params(stream_state, stream_slice, next_page_token)
102-
latest_entry = stream_state.get(self.cursor_field)
103-
if latest_entry:
104-
params["filter"] = f'{self.cursor_field} gt "{latest_entry}"'
103+
latest_entry = stream_state.get(self.cursor_field) if stream_state else datetime_to_string(self.start_date)
104+
filter_param = {"filter": f'{self.cursor_field} gt "{latest_entry}"'}
105+
params.update(filter_param)
105106
return params
106107

107108

@@ -120,7 +121,7 @@ class GroupMembers(IncrementalOktaStream):
120121
use_cache = True
121122

122123
def stream_slices(self, **kwargs):
123-
group_stream = Groups(authenticator=self.authenticator, url_base=self.url_base)
124+
group_stream = Groups(authenticator=self.authenticator, url_base=self.url_base, start_date=self.start_date)
124125
for group in group_stream.read_records(sync_mode=SyncMode.full_refresh):
125126
yield {"group_id": group["id"]}
126127

@@ -134,10 +135,12 @@ def request_params(
134135
stream_slice: Mapping[str, any] = None,
135136
next_page_token: Mapping[str, Any] = None,
136137
) -> MutableMapping[str, Any]:
137-
params = OktaStream.request_params(self, stream_state, stream_slice, next_page_token)
138-
latest_entry = stream_state.get(self.cursor_field)
139-
if latest_entry:
140-
params["after"] = latest_entry
138+
# Filter param should be ignored SCIM filter expressions can't use the published
139+
# attribute since it may conflict with the logic of the since, after, and until query params.
140+
# Docs: https://developer.okta.com/docs/reference/api/system-log/#expression-filter
141+
params = super(IncrementalOktaStream, self).request_params(stream_state, stream_slice, next_page_token)
142+
latest_entry = stream_state.get(self.cursor_field) if stream_state else self.min_user_id
143+
params["after"] = latest_entry
141144
return params
142145

143146
def get_updated_state(self, current_stream_state: MutableMapping[str, Any], latest_record: Mapping[str, Any]) -> Mapping[str, Any]:
@@ -154,7 +157,7 @@ class GroupRoleAssignments(OktaStream):
154157
use_cache = True
155158

156159
def stream_slices(self, **kwargs):
157-
group_stream = Groups(authenticator=self.authenticator, url_base=self.url_base)
160+
group_stream = Groups(authenticator=self.authenticator, url_base=self.url_base, start_date=self.start_date)
158161
for group in group_stream.read_records(sync_mode=SyncMode.full_refresh):
159162
yield {"group_id": group["id"]}
160163

@@ -168,6 +171,28 @@ class Logs(IncrementalOktaStream):
168171
cursor_field = "published"
169172
primary_key = "uuid"
170173

174+
def __init__(self, url_base, **kwargs):
175+
super().__init__(url_base=url_base, **kwargs)
176+
self._raise_on_http_errors: bool = True
177+
178+
@property
179+
def raise_on_http_errors(self) -> bool:
180+
return self._raise_on_http_errors
181+
182+
def should_retry(self, response: requests.Response) -> bool:
183+
"""
184+
When the connector gets abnormal state API retrun errror with 400 status code
185+
and internal error code E0000001. The connector ignores an error with 400 code
186+
to finish successfully sync and inform the user about an error in logs with an
187+
error message.
188+
"""
189+
190+
if response.status_code == 400 and response.json().get("errorCode") == "E0000001":
191+
self.logger.info(f"{response.json()['errorSummary']}")
192+
self._raise_on_http_errors = False
193+
return False
194+
return HttpStream.should_retry(self, response)
195+
171196
def path(self, **kwargs) -> str:
172197
return "logs"
173198

@@ -177,24 +202,27 @@ def request_params(
177202
stream_slice: Mapping[str, any] = None,
178203
next_page_token: Mapping[str, Any] = None,
179204
) -> MutableMapping[str, Any]:
180-
# The log stream use a different params to get data
181-
# https://developer.okta.com/docs/reference/api/system-log/#datetime-filter
182-
stream_state = stream_state or {}
183-
params = OktaStream.request_params(self, stream_state, stream_slice, next_page_token)
184-
latest_entry = stream_state.get(self.cursor_field)
185-
if latest_entry:
186-
params["since"] = latest_entry
187-
# [Test-driven Development] Set until When the cursor value from the stream state
188-
# is abnormally large, otherwise the server side that sets now to until
189-
# will throw an error: The "until" date must be later than the "since" date
190-
# https://developer.okta.com/docs/reference/api/system-log/#request-parameters
191-
parsed = pendulum.parse(latest_entry)
192-
utc_now = pendulum.utcnow()
193-
if parsed > utc_now:
194-
params["until"] = latest_entry
195-
205+
# The log stream use a different params to get data.
206+
# Docs: https://developer.okta.com/docs/reference/api/system-log/#datetime-filter
207+
# Filter param should be ignored SCIM filter expressions can't use the published
208+
# attribute since it may conflict with the logic of the since, after, and until query params.
209+
# Docs: https://developer.okta.com/docs/reference/api/system-log/#expression-filter
210+
params = super(IncrementalOktaStream, self).request_params(stream_state, stream_slice, next_page_token)
211+
latest_entry = stream_state.get(self.cursor_field) if stream_state else self.start_date
212+
params["since"] = latest_entry
196213
return params
197214

215+
def parse_response(
216+
self,
217+
response: requests.Response,
218+
**kwargs,
219+
) -> Iterable[Mapping]:
220+
data = response.json() if isinstance(response.json(), list) else []
221+
222+
for record in data:
223+
record[self.cursor_field] = delete_milliseconds(record[self.cursor_field])
224+
yield record
225+
198226

199227
class Users(IncrementalOktaStream):
200228
cursor_field = "lastUpdated"
@@ -242,7 +270,7 @@ class UserRoleAssignments(OktaStream):
242270
use_cache = True
243271

244272
def stream_slices(self, **kwargs):
245-
user_stream = Users(authenticator=self.authenticator, url_base=self.url_base)
273+
user_stream = Users(authenticator=self.authenticator, url_base=self.url_base, start_date=self.start_date)
246274
for user in user_stream.read_records(sync_mode=SyncMode.full_refresh):
247275
yield {"user_id": user["id"]}
248276

@@ -264,7 +292,7 @@ def parse_response(
264292
yield from response.json()["permissions"]
265293

266294
def stream_slices(self, **kwargs):
267-
custom_roles = CustomRoles(authenticator=self.authenticator, url_base=self.url_base)
295+
custom_roles = CustomRoles(authenticator=self.authenticator, url_base=self.url_base, start_date=self.start_date)
268296
for role in custom_roles.read_records(sync_mode=SyncMode.full_refresh):
269297
yield {"role_id": role["id"]}
270298

@@ -273,66 +301,11 @@ def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
273301
return f"iam/roles/{role_id}/permissions"
274302

275303

276-
class OktaOauth2Authenticator(Oauth2Authenticator):
277-
def get_refresh_request_body(self) -> Mapping[str, Any]:
278-
return {
279-
"grant_type": "refresh_token",
280-
"refresh_token": self.refresh_token,
281-
}
282-
283-
def refresh_access_token(self) -> Tuple[str, int]:
284-
try:
285-
response = requests.request(
286-
method="POST",
287-
url=self.token_refresh_endpoint,
288-
data=self.get_refresh_request_body(),
289-
auth=(self.client_id, self.client_secret),
290-
)
291-
response.raise_for_status()
292-
response_json = response.json()
293-
return response_json["access_token"], response_json["expires_in"]
294-
except Exception as e:
295-
raise Exception(f"Error while refreshing access token: {e}") from e
296-
297-
298304
class SourceOkta(AbstractSource):
299-
def initialize_authenticator(self, config: Mapping[str, Any]):
300-
if "token" in config:
301-
return TokenAuthenticator(config["token"], auth_method="SSWS")
302-
303-
creds = config.get("credentials")
304-
if not creds:
305-
raise Exception("Config validation error. `credentials` not specified.")
306-
307-
auth_type = creds.get("auth_type")
308-
if not auth_type:
309-
raise Exception("Config validation error. `auth_type` not specified.")
310-
311-
if auth_type == "api_token":
312-
return TokenAuthenticator(creds["api_token"], auth_method="SSWS")
313-
314-
if auth_type == "oauth2.0":
315-
return OktaOauth2Authenticator(
316-
token_refresh_endpoint=self.get_token_refresh_endpoint(config),
317-
client_secret=creds["client_secret"],
318-
client_id=creds["client_id"],
319-
refresh_token=creds["refresh_token"],
320-
)
321-
322-
@staticmethod
323-
def get_url_base(config: Mapping[str, Any]) -> str:
324-
return config.get("base_url") or f"https://{config['domain']}.okta.com"
325-
326-
def get_api_endpoint(self, config: Mapping[str, Any]) -> str:
327-
return parse.urljoin(self.get_url_base(config), "/api/v1/")
328-
329-
def get_token_refresh_endpoint(self, config: Mapping[str, Any]) -> str:
330-
return parse.urljoin(self.get_url_base(config), "/oauth2/v1/token")
331-
332305
def check_connection(self, logger, config) -> Tuple[bool, any]:
333306
try:
334-
auth = self.initialize_authenticator(config)
335-
api_endpoint = self.get_api_endpoint(config)
307+
auth = initialize_authenticator(config)
308+
api_endpoint = get_api_endpoint(config)
336309
url = parse.urljoin(api_endpoint, "users")
337310

338311
response = requests.get(
@@ -349,13 +322,11 @@ def check_connection(self, logger, config) -> Tuple[bool, any]:
349322
return False, "Failed to authenticate with the provided credentials"
350323

351324
def streams(self, config: Mapping[str, Any]) -> List[Stream]:
352-
auth = self.initialize_authenticator(config)
353-
api_endpoint = self.get_api_endpoint(config)
325+
auth = initialize_authenticator(config)
326+
api_endpoint = get_api_endpoint(config)
327+
start_date = get_start_date(config)
354328

355-
initialization_params = {
356-
"authenticator": auth,
357-
"url_base": api_endpoint,
358-
}
329+
initialization_params = {"authenticator": auth, "url_base": api_endpoint, "start_date": start_date}
359330

360331
return [
361332
Groups(**initialization_params),

0 commit comments

Comments
 (0)