Skip to content

Commit a1da5fc

Browse files
authored
🐛 Source Google Sheets: revert 'add row_id to rows and use as primary key' (#19574)
* revert PR 19215 * bump
1 parent 5c7affc commit a1da5fc

File tree

7 files changed

+15
-42
lines changed

7 files changed

+15
-42
lines changed

airbyte-config/init/src/main/resources/seed/source_definitions.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -610,7 +610,7 @@
610610
- name: Google Sheets
611611
sourceDefinitionId: 71607ba1-c0ac-4799-8049-7f4b90dd50f7
612612
dockerRepository: airbyte/source-google-sheets
613-
dockerImageTag: 0.2.30
613+
dockerImageTag: 0.2.31
614614
documentationUrl: https://docs.airbyte.com/integrations/sources/google-sheets
615615
icon: google-sheets.svg
616616
sourceType: file

airbyte-config/init/src/main/resources/seed/source_specs.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5509,7 +5509,7 @@
55095509
oauthFlowOutputParameters:
55105510
- - "access_token"
55115511
- - "refresh_token"
5512-
- dockerImage: "airbyte/source-google-sheets:0.2.30"
5512+
- dockerImage: "airbyte/source-google-sheets:0.2.31"
55135513
spec:
55145514
documentationUrl: "https://docs.airbyte.com/integrations/sources/google-sheets"
55155515
connectionSpecification:

airbyte-integrations/connectors/source-google-sheets/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,5 +34,5 @@ COPY google_sheets_source ./google_sheets_source
3434
ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py"
3535
ENTRYPOINT ["python", "/airbyte/integration_code/main.py"]
3636

37-
LABEL io.airbyte.version=0.2.30
37+
LABEL io.airbyte.version=0.2.31
3838
LABEL io.airbyte.name=airbyte/source-google-sheets

airbyte-integrations/connectors/source-google-sheets/google_sheets_source/google_sheets_source.py

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -169,15 +169,9 @@ def read(
169169
if len(row_values) == 0:
170170
break
171171

172-
row_id = row_cursor
173172
for row in row_values:
174173
if not Helpers.is_row_empty(row) and Helpers.row_contains_relevant_data(row, column_index_to_name.keys()):
175-
yield AirbyteMessage(
176-
type=Type.RECORD, record=Helpers.row_data_to_record_message(sheet, row_id, row, column_index_to_name)
177-
)
178-
row_id += 1
179-
180-
row_cursor += ROW_BATCH_SIZE + 1
174+
yield AirbyteMessage(type=Type.RECORD, record=Helpers.row_data_to_record_message(sheet, row, column_index_to_name))
181175
logger.info(f"Finished syncing spreadsheet {spreadsheet_id}")
182176

183177
@staticmethod

airbyte-integrations/connectors/source-google-sheets/google_sheets_source/helpers.py

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -53,22 +53,14 @@ def headers_to_airbyte_stream(logger: AirbyteLogger, sheet_name: str, header_row
5353
if duplicate_fields:
5454
logger.warn(f"Duplicate headers found in {sheet_name}. Ignoring them :{duplicate_fields}")
5555

56-
props = {field: {"type": "string"} for field in fields}
57-
props["row_id"] = {"type": "integer"}
5856
sheet_json_schema = {
5957
"$schema": "http://json-schema.org/draft-07/schema#",
6058
"type": "object",
61-
"required": ["row_id"],
6259
# For simplicity, the type of every cell is a string
63-
"properties": props,
60+
"properties": {field: {"type": "string"} for field in fields},
6461
}
6562

66-
return AirbyteStream(
67-
name=sheet_name,
68-
json_schema=sheet_json_schema,
69-
supported_sync_modes=[SyncMode.full_refresh],
70-
source_defined_primary_key=[["row_id"]],
71-
)
63+
return AirbyteStream(name=sheet_name, json_schema=sheet_json_schema, supported_sync_modes=[SyncMode.full_refresh])
7264

7365
@staticmethod
7466
def get_valid_headers_and_duplicates(header_row_values: List[str]) -> (List[str], List[str]):
@@ -129,10 +121,8 @@ def parse_sheet_and_column_names_from_catalog(catalog: ConfiguredAirbyteCatalog)
129121
return sheet_to_column_name
130122

131123
@staticmethod
132-
def row_data_to_record_message(
133-
sheet_name: str, row_id: int, cell_values: List[str], column_index_to_name: Dict[int, str]
134-
) -> AirbyteRecordMessage:
135-
data = {"row_id": row_id}
124+
def row_data_to_record_message(sheet_name: str, cell_values: List[str], column_index_to_name: Dict[int, str]) -> AirbyteRecordMessage:
125+
data = {}
136126
for relevant_index in sorted(column_index_to_name.keys()):
137127
if relevant_index >= len(cell_values):
138128
break

airbyte-integrations/connectors/source-google-sheets/unit_tests/test_helpers.py

Lines changed: 6 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -27,18 +27,14 @@ def test_headers_to_airbyte_stream(self):
2727
sheet_name = "sheet1"
2828
header_values = ["h1", "h2", "h3"]
2929

30-
props = {header: {"type": "string"} for header in header_values}
31-
props["row_id"] = {"type": "integer"}
3230
expected_stream = AirbyteStream(
3331
name=sheet_name,
3432
json_schema={
3533
"$schema": "http://json-schema.org/draft-07/schema#",
3634
"type": "object",
37-
"required": ["row_id"],
3835
# For simplicity, the type of every cell is a string
39-
"properties": props,
36+
"properties": {header: {"type": "string"} for header in header_values},
4037
},
41-
source_defined_primary_key=[["row_id"]],
4238
supported_sync_modes=[SyncMode.full_refresh],
4339
)
4440

@@ -61,20 +57,15 @@ def test_duplicate_headers_to_ab_stream_ignores_duplicates(self):
6157
header_values = ["h1", "h1", "h3"]
6258

6359
# h1 is ignored because it is duplicate
64-
props = {
65-
"h3": {"type": "string"},
66-
"row_id": {"type": "integer"},
67-
}
60+
expected_stream_header_values = ["h3"]
6861
expected_stream = AirbyteStream(
6962
name=sheet_name,
7063
json_schema={
7164
"$schema": "http://json-schema.org/draft-07/schema#",
7265
"type": "object",
73-
"required": ["row_id"],
7466
# For simplicity, the type of every cell is a string
75-
"properties": props,
67+
"properties": {header: {"type": "string"} for header in expected_stream_header_values},
7668
},
77-
source_defined_primary_key=[["row_id"]],
7869
supported_sync_modes=[SyncMode.full_refresh],
7970
)
8071

@@ -90,11 +81,9 @@ def test_headers_to_airbyte_stream_blank_values_terminate_row(self):
9081
json_schema={
9182
"$schema": "http://json-schema.org/draft-07/schema#",
9283
"type": "object",
93-
"required": ["row_id"],
9484
# For simplicity, the type of every cell is a string
95-
"properties": {"h1": {"type": "string"}, "row_id": {"type": "integer"}},
85+
"properties": {"h1": {"type": "string"}},
9686
},
97-
source_defined_primary_key=[["row_id"]],
9887
supported_sync_modes=[SyncMode.full_refresh],
9988
)
10089
actual_stream = Helpers.headers_to_airbyte_stream(logger, sheet_name, header_values)
@@ -154,11 +143,10 @@ def test_row_data_to_record_message(self):
154143
sheet = "my_sheet"
155144
cell_values = ["v1", "v2", "v3", "v4"]
156145
column_index_to_name = {0: "c1", 3: "c4"}
157-
row_id = 1
158146

159-
actual = Helpers.row_data_to_record_message(sheet, row_id, cell_values, column_index_to_name)
147+
actual = Helpers.row_data_to_record_message(sheet, cell_values, column_index_to_name)
160148

161-
expected = AirbyteRecordMessage(stream=sheet, data={"row_id": row_id, "c1": "v1", "c4": "v4"}, emitted_at=1)
149+
expected = AirbyteRecordMessage(stream=sheet, data={"c1": "v1", "c4": "v4"}, emitted_at=1)
162150
self.assertEqual(expected.stream, actual.stream)
163151
self.assertEqual(expected.data, actual.data)
164152

docs/integrations/sources/google-sheets.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ The [Google API rate limit](https://developers.google.com/sheets/api/limits) is
7676

7777
| Version | Date | Pull Request | Subject |
7878
| ------- | ---------- | -------------------------------------------------------- | ----------------------------------------------------------------------------- |
79+
| 0.2.31 | 2022-10-09 | [](https://github.com/airbytehq/airbyte/pull/) | Revert 'Add row_id to rows and use as primary key' |
7980
| 0.2.30 | 2022-10-09 | [](https://github.com/airbytehq/airbyte/pull/) | Add row_id to rows and use as primary key |
8081
| 0.2.21 | 2022-10-04 | [15591](https://github.com/airbytehq/airbyte/pull/15591) | Clean instantiation of AirbyteStream |
8182
| 0.2.20 | 2022-10-10 | [17766](https://github.com/airbytehq/airbyte/pull/17766) | Fix null pointer exception when parsing the spreadsheet id. |

0 commit comments

Comments
 (0)