Skip to content

Commit 7180c59

Browse files
committed
update Google Sheets Source after review #2
1 parent 44847d8 commit 7180c59

File tree

5 files changed

+51
-49
lines changed

5 files changed

+51
-49
lines changed

airbyte-integrations/connectors/source-google-sheets/google_sheets_source/client.py

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -22,32 +22,35 @@
2222
SOFTWARE.
2323
"""
2424

25+
from typing import Dict, List
26+
2527
import backoff
26-
from apiclient import discovery, errors
27-
from requests.status_codes import codes as status_codes
28+
from googleapiclient import errors
29+
from requests import codes as status_codes
30+
31+
from .helpers import SCOPES, Helpers
2832

2933

3034
def error_handler(error):
3135
return error.resp.status != status_codes.TOO_MANY_REQUESTS
3236

3337

3438
class GoogleSheetsClient:
35-
@staticmethod
39+
def __init__(self, credentials: Dict[str, str], scopes: List[str] = SCOPES):
40+
self.client = Helpers.get_authenticated_sheets_client(credentials, scopes)
41+
3642
@backoff.on_exception(backoff.expo, errors.HttpError, max_time=60, giveup=error_handler)
37-
def get(client: discovery.Resource, **kwargs):
38-
return client.get(**kwargs).execute()
43+
def get(self, **kwargs):
44+
return self.client.get(**kwargs).execute()
3945

40-
@staticmethod
4146
@backoff.on_exception(backoff.expo, errors.HttpError, max_time=60, giveup=error_handler)
42-
def create(client: discovery.Resource, **kwargs):
43-
return client.create(**kwargs).execute()
47+
def create(self, **kwargs):
48+
return self.client.create(**kwargs).execute()
4449

45-
@staticmethod
4650
@backoff.on_exception(backoff.expo, errors.HttpError, max_time=60, giveup=error_handler)
47-
def get_values(client: discovery.Resource, **kwargs):
48-
return client.values().batchGet(**kwargs).execute()
51+
def get_values(self, **kwargs):
52+
return self.client.values().batchGet(**kwargs).execute()
4953

50-
@staticmethod
5154
@backoff.on_exception(backoff.expo, errors.HttpError, max_time=60, giveup=error_handler)
52-
def update_values(client: discovery.Resource, **kwargs):
53-
return client.values().batchUpdate(**kwargs).execute()
55+
def update_values(self, **kwargs):
56+
return self.client.values().batchUpdate(**kwargs).execute()

airbyte-integrations/connectors/source-google-sheets/google_sheets_source/google_sheets_source.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -48,11 +48,11 @@ def __init__(self):
4848

4949
def check(self, logger: AirbyteLogger, config: json) -> AirbyteConnectionStatus:
5050
# Check involves verifying that the specified spreadsheet is reachable with our credentials.
51-
client = Helpers.get_authenticated_sheets_client(json.loads(config["credentials_json"]))
51+
client = GoogleSheetsClient(json.loads(config["credentials_json"]))
5252
spreadsheet_id = config["spreadsheet_id"]
5353
try:
5454
# Attempt to get first row of sheet
55-
GoogleSheetsClient.get(client, spreadsheetId=spreadsheet_id, includeGridData=False, ranges="1:1")
55+
client.get(spreadsheetId=spreadsheet_id, includeGridData=False, ranges="1:1")
5656
except errors.HttpError as err:
5757
reason = str(err)
5858
# Give a clearer message if it's a common error like 404.
@@ -64,13 +64,11 @@ def check(self, logger: AirbyteLogger, config: json) -> AirbyteConnectionStatus:
6464
return AirbyteConnectionStatus(status=Status.SUCCEEDED)
6565

6666
def discover(self, logger: AirbyteLogger, config: json) -> AirbyteCatalog:
67-
client = Helpers.get_authenticated_sheets_client(json.loads(config["credentials_json"]))
67+
client = GoogleSheetsClient(json.loads(config["credentials_json"]))
6868
spreadsheet_id = config["spreadsheet_id"]
6969
try:
7070
logger.info(f"Running discovery on sheet {spreadsheet_id}")
71-
spreadsheet_metadata = Spreadsheet.parse_obj(
72-
GoogleSheetsClient.get(client, spreadsheetId=spreadsheet_id, includeGridData=False)
73-
)
71+
spreadsheet_metadata = Spreadsheet.parse_obj(client.get(spreadsheetId=spreadsheet_id, includeGridData=False))
7472
sheet_names = [sheet.properties.title for sheet in spreadsheet_metadata.sheets]
7573
streams = []
7674
for sheet_name in sheet_names:
@@ -88,7 +86,7 @@ def discover(self, logger: AirbyteLogger, config: json) -> AirbyteCatalog:
8886
def read(
8987
self, logger: AirbyteLogger, config: json, catalog: ConfiguredAirbyteCatalog, state: Dict[str, any]
9088
) -> Generator[AirbyteMessage, None, None]:
91-
client = Helpers.get_authenticated_sheets_client(json.loads(config["credentials_json"]))
89+
client = GoogleSheetsClient(json.loads(config["credentials_json"]))
9290

9391
sheet_to_column_name = Helpers.parse_sheet_and_column_names_from_catalog(catalog)
9492
spreadsheet_id = config["spreadsheet_id"]
@@ -106,7 +104,7 @@ def read(
106104
range = f"{sheet}!{row_cursor}:{row_cursor + ROW_BATCH_SIZE}"
107105
logger.info(f"Fetching range {range}")
108106
row_batch = SpreadsheetValues.parse_obj(
109-
GoogleSheetsClient.get_values(client, spreadsheetId=spreadsheet_id, ranges=range, majorDimension="ROWS")
107+
client.get_values(spreadsheetId=spreadsheet_id, ranges=range, majorDimension="ROWS")
110108
)
111109
row_cursor += ROW_BATCH_SIZE + 1
112110
# there should always be one range since we requested only one

airbyte-integrations/connectors/source-google-sheets/google_sheets_source/helpers.py

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -27,28 +27,27 @@
2727
from typing import Dict, FrozenSet, Iterable, List
2828

2929
from airbyte_protocol import AirbyteRecordMessage, AirbyteStream, ConfiguredAirbyteCatalog
30-
from apiclient import discovery
3130
from google.oauth2 import service_account
31+
from googleapiclient import discovery
3232

33-
from .client import GoogleSheetsClient
3433
from .models.spreadsheet import RowData, Spreadsheet
3534

3635
SCOPES = ["https://www.googleapis.com/auth/spreadsheets.readonly", "https://www.googleapis.com/auth/drive.readonly"]
3736

3837

3938
class Helpers(object):
4039
@staticmethod
41-
def get_authenticated_sheets_client(credentials: Dict[str, str], scopes=SCOPES) -> discovery.Resource:
40+
def get_authenticated_sheets_client(credentials: Dict[str, str], scopes: List[str] = SCOPES) -> discovery.Resource:
4241
creds = Helpers.get_authenticated_google_credentials(credentials, scopes)
4342
return discovery.build("sheets", "v4", credentials=creds).spreadsheets()
4443

4544
@staticmethod
46-
def get_authenticated_drive_client(credentials: Dict[str, str], scopes=SCOPES) -> discovery.Resource:
45+
def get_authenticated_drive_client(credentials: Dict[str, str], scopes: List[str] = SCOPES) -> discovery.Resource:
4746
creds = Helpers.get_authenticated_google_credentials(credentials, scopes)
4847
return discovery.build("drive", "v3", credentials=creds)
4948

5049
@staticmethod
51-
def get_authenticated_google_credentials(credentials: Dict[str, str], scopes=SCOPES):
50+
def get_authenticated_google_credentials(credentials: Dict[str, str], scopes: List[str] = SCOPES):
5251
return service_account.Credentials.from_service_account_info(credentials, scopes=scopes)
5352

5453
@staticmethod
@@ -87,10 +86,8 @@ def get_formatted_row_values(row_data: RowData) -> List[str]:
8786
return [value.formattedValue for value in row_data.values]
8887

8988
@staticmethod
90-
def get_first_row(client: discovery.Resource, spreadsheet_id: str, sheet_name: str) -> List[str]:
91-
spreadsheet = Spreadsheet.parse_obj(
92-
GoogleSheetsClient.get(client, spreadsheetId=spreadsheet_id, includeGridData=True, ranges=f"{sheet_name}!1:1")
93-
)
89+
def get_first_row(client, spreadsheet_id: str, sheet_name: str) -> List[str]:
90+
spreadsheet = Spreadsheet.parse_obj(client.get(spreadsheetId=spreadsheet_id, includeGridData=True, ranges=f"{sheet_name}!1:1"))
9491

9592
# There is only one sheet since we are specifying the sheet in the requested ranges.
9693
returned_sheets = spreadsheet.sheets
@@ -134,7 +131,7 @@ def row_data_to_record_message(sheet_name: str, cell_values: List[str], column_i
134131

135132
@staticmethod
136133
def get_available_sheets_to_column_index_to_name(
137-
client: discovery.Resource, spreadsheet_id: str, requested_sheets_and_columns: Dict[str, FrozenSet[str]]
134+
client, spreadsheet_id: str, requested_sheets_and_columns: Dict[str, FrozenSet[str]]
138135
) -> Dict[str, Dict[int, str]]:
139136
available_sheets = Helpers.get_sheets_in_spreadsheet(client, spreadsheet_id)
140137

@@ -151,8 +148,8 @@ def get_available_sheets_to_column_index_to_name(
151148
return available_sheets_to_column_index_to_name
152149

153150
@staticmethod
154-
def get_sheets_in_spreadsheet(client: discovery.Resource, spreadsheet_id: str):
155-
spreadsheet_metadata = Spreadsheet.parse_obj(GoogleSheetsClient.get(client, spreadsheetId=spreadsheet_id, includeGridData=False))
151+
def get_sheets_in_spreadsheet(client, spreadsheet_id: str):
152+
spreadsheet_metadata = Spreadsheet.parse_obj(client.get(spreadsheetId=spreadsheet_id, includeGridData=False))
156153
return [sheet.properties.title for sheet in spreadsheet_metadata.sheets]
157154

158155
@staticmethod

airbyte-integrations/connectors/source-google-sheets/integration_tests/integration_test.py

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@
2929
from typing import Dict
3030

3131
from airbyte_protocol import ConfiguredAirbyteCatalog, ConnectorSpecification
32-
from apiclient import discovery
3332
from base_python_test import StandardSourceTestIface
3433
from google_sheets_source.client import GoogleSheetsClient
3534
from google_sheets_source.helpers import Helpers
@@ -62,7 +61,7 @@ def get_catalog(self) -> ConfiguredAirbyteCatalog:
6261
def setup(self) -> None:
6362
Path(self._get_tmp_dir()).mkdir(parents=True, exist_ok=True)
6463

65-
sheets_client = Helpers.get_authenticated_sheets_client(self._get_creds(), SCOPES)
64+
sheets_client = GoogleSheetsClient(self._get_creds(), SCOPES)
6665
spreadsheet_id = self._create_spreadsheet(sheets_client)
6766
self._write_spreadsheet_id(spreadsheet_id)
6867

@@ -90,7 +89,7 @@ def _get_creds(self) -> Dict[str, str]:
9089
def _get_tmp_dir():
9190
return "/test_root/gsheet_test"
9291

93-
def _create_spreadsheet(self, sheets_client: discovery.Resource) -> str:
92+
def _create_spreadsheet(self, sheets_client: GoogleSheetsClient) -> str:
9493
"""
9594
:return: spreadsheetId
9695
"""
@@ -99,7 +98,7 @@ def _create_spreadsheet(self, sheets_client: discovery.Resource) -> str:
9998
"sheets": [{"properties": {"title": "sheet1"}}, {"properties": {"title": "sheet2"}}],
10099
}
101100

102-
spreadsheet = Spreadsheet.parse_obj(GoogleSheetsClient.create(sheets_client, body=request))
101+
spreadsheet = Spreadsheet.parse_obj(sheets_client.create(body=request))
103102
spreadsheet_id = spreadsheet.spreadsheetId
104103

105104
rows = [["header1", "irrelevant", "header3", "", "ignored"]]
@@ -109,13 +108,11 @@ def _create_spreadsheet(self, sheets_client: discovery.Resource) -> str:
109108
rows.append(["", "", ""])
110109
rows.append(["orphan1", "orphan2", "orphan3"])
111110

112-
GoogleSheetsClient.update_values(
113-
sheets_client,
111+
sheets_client.update_values(
114112
spreadsheetId=spreadsheet_id,
115113
body={"data": {"majorDimension": "ROWS", "values": rows, "range": "sheet1"}, "valueInputOption": "RAW"},
116114
)
117-
GoogleSheetsClient.update_values(
118-
sheets_client,
115+
sheets_client.update_values(
119116
spreadsheetId=spreadsheet_id,
120117
body={"data": {"majorDimension": "ROWS", "values": rows, "range": "sheet2"}, "valueInputOption": "RAW"},
121118
)

airbyte-integrations/connectors/source-google-sheets/unit_tests/test_helpers.py

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,10 @@
2323
"""
2424

2525
import unittest
26-
from unittest.mock import Mock
26+
from unittest.mock import Mock, patch
2727

2828
from airbyte_protocol import AirbyteRecordMessage, AirbyteStream, ConfiguredAirbyteCatalog, ConfiguredAirbyteStream
29+
from google_sheets_source.client import GoogleSheetsClient
2930
from google_sheets_source.helpers import Helpers
3031
from google_sheets_source.models import CellData, GridData, RowData, Sheet, SheetProperties, Spreadsheet
3132

@@ -142,8 +143,10 @@ def test_get_first_row(self):
142143

143144
client = Mock()
144145
client.get.return_value.execute.return_value = fake_response
145-
146-
actual = Helpers.get_first_row(client, spreadsheet_id, sheet)
146+
with patch.object(GoogleSheetsClient, "__init__", lambda s, credentials, scopes: None):
147+
sheet_client = GoogleSheetsClient({"fake": "credentials"}, ["auth_scopes"])
148+
sheet_client.client = client
149+
actual = Helpers.get_first_row(sheet_client, spreadsheet_id, sheet)
147150
self.assertEqual(expected_first_row, actual)
148151
client.get.assert_called_with(spreadsheetId=spreadsheet_id, includeGridData=True, ranges=f"{sheet}!1:1")
149152

@@ -154,8 +157,10 @@ def test_get_sheets_in_spreadsheet(self):
154157
client.get.return_value.execute.return_value = Spreadsheet(
155158
spreadsheetId=spreadsheet_id, sheets=[Sheet(properties=SheetProperties(title=t)) for t in expected_sheets]
156159
)
157-
158-
actual_sheets = Helpers.get_sheets_in_spreadsheet(client, spreadsheet_id)
160+
with patch.object(GoogleSheetsClient, "__init__", lambda s, credentials, scopes: None):
161+
sheet_client = GoogleSheetsClient({"fake": "credentials"}, ["auth_scopes"])
162+
sheet_client.client = client
163+
actual_sheets = Helpers.get_sheets_in_spreadsheet(sheet_client, spreadsheet_id)
159164

160165
self.assertEqual(expected_sheets, actual_sheets)
161166
client.get.assert_called_with(spreadsheetId=spreadsheet_id, includeGridData=False)
@@ -186,9 +191,11 @@ def mock_client_call(spreadsheetId, includeGridData, ranges=None):
186191

187192
client = Mock()
188193
client.get.side_effect = mock_client_call
189-
194+
with patch.object(GoogleSheetsClient, "__init__", lambda s, credentials, scopes: None):
195+
sheet_client = GoogleSheetsClient({"fake": "credentials"}, ["auth_scopes"])
196+
sheet_client.client = client
190197
actual = Helpers.get_available_sheets_to_column_index_to_name(
191-
client, spreadsheet_id, {sheet1: frozenset(sheet1_first_row), "doesnotexist": frozenset(["1", "2"])}
198+
sheet_client, spreadsheet_id, {sheet1: frozenset(sheet1_first_row), "doesnotexist": frozenset(["1", "2"])}
192199
)
193200
expected = {sheet1: {0: "1", 1: "2", 2: "3", 3: "4"}}
194201

0 commit comments

Comments
 (0)