airbytehq · marcosmarxm · Apr 11, 2024 · Feb 19, 2024 · Feb 19, 2024 · Mar 7, 2024
diff --git a/airbyte-integrations/connectors/source-google-sheets/source_google_sheets/client.py b/airbyte-integrations/connectors/source-google-sheets/source_google_sheets/client.py
@@ -21,7 +21,7 @@ class Backoff:
         @classmethod
         def increase_row_batch_size(cls, details):
             if details["exception"].status_code == status_codes.TOO_MANY_REQUESTS and cls.row_batch_size < 1000:
-                cls.row_batch_size = cls.row_batch_size + 10
+                cls.row_batch_size = cls.row_batch_size + 100
                 logger.info(f"Increasing number of records fetching due to rate limits. Current value: {cls.row_batch_size}")
 
         @staticmethod

diff --git a/airbyte-integrations/connectors/source-google-sheets/source_google_sheets/source.py b/airbyte-integrations/connectors/source-google-sheets/source_google_sheets/source.py
@@ -149,6 +149,7 @@ def _read(
         catalog: ConfiguredAirbyteCatalog,
     ) -> Generator[AirbyteMessage, None, None]:
         client = GoogleSheetsClient(self.get_credentials(config))
+        client.Backoff.row_batch_size = config["batch_size"]
-        client.Backoff.row_batch_size = config["batch_size"]
+        client.Backoff.row_batch_size = config.get("batch_size", 200)
-        client.Backoff.row_batch_size = config["batch_size"]
+        client.Backoff.row_batch_size = config.get("batch_size", 200)
 
         sheet_to_column_name = Helpers.parse_sheet_and_column_names_from_catalog(catalog)
         stream_name_to_stream = {stream.stream.name: stream for stream in catalog.streams}

diff --git a/airbyte-integrations/connectors/source-google-sheets/source_google_sheets/spec.yaml b/airbyte-integrations/connectors/source-google-sheets/source_google_sheets/spec.yaml
@@ -8,6 +8,12 @@ connectionSpecification:
     - credentials
   additionalProperties: true
   properties:
+    batch_size:
+      type: integer
+      title: Row Batch Size
+      description: >-
+        An integer representing row batch size with each sent request to Google Sheets API. Defaults to 200.
+      default: 200
     spreadsheet_id:
       type: string
       title: Spreadsheet Link

diff --git a/airbyte-integrations/connectors/source-google-sheets/unit_tests/test_client.py b/airbyte-integrations/connectors/source-google-sheets/unit_tests/test_client.py
@@ -24,8 +24,8 @@ def test_backoff_increase_row_batch_size():
     e = requests.HTTPError("error")
     e.status_code = 429
     client.Backoff.increase_row_batch_size({"exception": e})
-    assert client.Backoff.row_batch_size == 210
-    assert client._create_range("spreadsheet_id", 0) == "spreadsheet_id!0:210"
+    assert client.Backoff.row_batch_size == 300
+    assert client._create_range("spreadsheet_id", 0) == "spreadsheet_id!0:300"
     client.Backoff.row_batch_size = 1000
     client.Backoff.increase_row_batch_size({"exception": e})
     assert client.Backoff.row_batch_size == 1000
@@ -57,12 +57,12 @@ def test_client_get_values_on_backoff(caplog):
     e = requests.HTTPError("error")
     e.status_code = 429
     client_google_sheets.Backoff.increase_row_batch_size({"exception": e})
-    assert client_google_sheets.Backoff.row_batch_size == 220
+    assert client_google_sheets.Backoff.row_batch_size == 310
     client_google_sheets.get_values(
         sheet="sheet",
         row_cursor=0,
         spreadsheetId="spreadsheet_id",
         majorDimension="ROWS",
     )
 
-    assert "Fetching range sheet!0:220" in caplog.text
+    assert "Fetching range sheet!0:310" in caplog.text
diff --git a/docs/integrations/sources/google-sheets.md b/docs/integrations/sources/google-sheets.md
@@ -97,8 +97,9 @@ If your spreadsheet is viewable by anyone with its link, no further action is ne
   - To authenticate your Google account via OAuth, select **Authenticate via Google (OAuth)** from the dropdown and enter your Google application's client ID, client secret, and refresh token.
 <!-- /env:oss -->
 6. For **Spreadsheet Link**, enter the link to the Google spreadsheet. To get the link, go to the Google spreadsheet you want to sync, click **Share** in the top right corner, and click **Copy Link**.
-7. (Optional) You may enable the option to **Convert Column Names to SQL-Compliant Format**. Enabling this option will allow the connector to convert column names to a standardized, SQL-friendly format. For example, a column name of `Café Earnings 2022` will be converted to `cafe_earnings_2022`. We recommend enabling this option if your target destination is SQL-based (ie Postgres, MySQL). Set to false by default.
-8. Click **Set up source** and wait for the tests to complete.
+7. For **Batch Size**, enter an integer which represents batch size when processing a Google Sheet. Default value is 200.
+8. (Optional) You may enable the option to **Convert Column Names to SQL-Compliant Format**. Enabling this option will allow the connector to convert column names to a standardized, SQL-friendly format. For example, a column name of `Café Earnings 2022` will be converted to `cafe_earnings_2022`. We recommend enabling this option if your target destination is SQL-based (ie Postgres, MySQL). Set to false by default.
+9. Click **Set up source** and wait for the tests to complete.
 
 <HideInUI>