Skip to content

🎉Source Google Search Console: increase unit test coverage at least 90% #13037

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,7 @@
documentationUrl: https://docs.airbyte.io/integrations/sources/google-search-console
icon: googlesearchconsole.svg
sourceType: api
releaseStage: alpha
releaseStage: beta
- name: Google Sheets
sourceDefinitionId: 71607ba1-c0ac-4799-8049-7f4b90dd50f7
dockerRepository: airbyte/source-google-sheets
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

TEST_REQUIREMENTS = [
"pytest~=6.1",
"requests-mock",
"source-acceptance-test",
]

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#
# Copyright (c) 2021 Airbyte, Inc., all rights reserved.
#

from pytest import fixture


@fixture(name="config")
def config_fixture(requests_mock):
url = "https://oauth2.googleapis.com/token"
requests_mock.post(url, json={"access_token": "token", "expires_in": 10})
config = {
"site_urls": ["https://example.com"],
"start_date": "start_date",
"end_date": "end_date",
"authorization": {
"auth_type": "Client",
"client_id": "client_id",
"client_secret": "client_secret",
"refresh_token": "refresh_token"
}
}

return config
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,16 @@
# Copyright (c) 2021 Airbyte, Inc., all rights reserved.
#

import logging
from unittest.mock import MagicMock, patch
from urllib.parse import quote_plus

import pytest
from airbyte_cdk.models import SyncMode
from airbyte_cdk.sources.streams.http.auth import NoAuth
from source_google_search_console.streams import ROW_LIMIT, SearchAnalyticsByDate
from source_google_search_console.streams import ROW_LIMIT, SearchAnalyticsByDate, GoogleSearchConsole
from source_google_search_console.source import SourceGoogleSearchConsole

logger = logging.getLogger('airbyte')


class MockResponse:
Expand All @@ -27,7 +31,7 @@ def json(self):
],
)
def test_pagination(count, expected):
stream = SearchAnalyticsByDate(NoAuth(), ["https://example.com"], "start_date", "end_date")
stream = SearchAnalyticsByDate(None, ["https://example.com"], "start_date", "end_date")
response = MockResponse(stream.data_field, count)
stream.next_page_token(response)
assert stream.start_row == expected
Expand All @@ -43,7 +47,7 @@ def test_pagination(count, expected):
],
)
def test_slice(site_urls, sync_mode):
stream = SearchAnalyticsByDate(NoAuth(), site_urls, "2021-09-01", "2021-09-07")
stream = SearchAnalyticsByDate(None, site_urls, "2021-09-01", "2021-09-07")

search_types = stream.search_types
stream_slice = stream.stream_slices(sync_mode=sync_mode)
Expand Down Expand Up @@ -85,14 +89,14 @@ def test_slice(site_urls, sync_mode):
],
)
def test_state(current_stream_state, latest_record, expected):
stream = SearchAnalyticsByDate(NoAuth(), ["https://example.com"], "start_date", "end_date")
stream = SearchAnalyticsByDate(None, ["https://example.com"], "start_date", "end_date")

value = stream.get_updated_state(current_stream_state, latest_record)
assert value == expected


def test_updated_state():
stream = SearchAnalyticsByDate(NoAuth(), ["https://domain1.com", "https://domain2.com"], "start_date", "end_date")
stream = SearchAnalyticsByDate(None, ["https://domain1.com", "https://domain2.com"], "start_date", "end_date")

state = {}
record = {"site_url": "https://domain1.com", "search_type": "web", "date": "2022-01-01"}
Expand All @@ -105,3 +109,68 @@ def test_updated_state():
"https://domain2.com": {"web": {"date": "2022-01-01"}},
"date": "2022-01-01",
}


@pytest.mark.parametrize(
"stream_class, expected",
[
(
GoogleSearchConsole,
{'keys': ['keys']},
),
(
SearchAnalyticsByDate,
{"date": "keys", "search_type": "web", "site_url": "https://domain1.com"}
)
],
)
@patch.multiple(GoogleSearchConsole, __abstractmethods__=set())
def test_parse_response(stream_class, expected):
stream = stream_class(None, ["https://domain1.com", "https://domain2.com"], "2021-09-01", "2021-09-07")

stream.data_field = "data_field"
stream_slice = next(stream.stream_slices(sync_mode=SyncMode.full_refresh))
response = MagicMock()
response.json = MagicMock(return_value={"data_field": [{"keys": ["keys"]}]})

record = next(stream.parse_response(response, stream_state={}, stream_slice=stream_slice))

assert record == expected


def test_check_connection_ok(config, requests_mock):
url = "https://www.googleapis.com/webmasters/v3/sites/https%3A%2F%2Fexample.com"
requests_mock.get(url, json={})
ok, error_msg = SourceGoogleSearchConsole().check_connection(logger, config=config)

assert ok
assert not error_msg


def test_check_connection_invalid_config(config):
config.pop("start_date")
ok, error_msg = SourceGoogleSearchConsole().check_connection(logger, config=config)

assert not ok
assert error_msg


def test_check_connection_exception(config):
ok, error_msg = SourceGoogleSearchConsole().check_connection(logger, config=config)

assert not ok
assert error_msg


def test_streams(config):
streams = SourceGoogleSearchConsole().streams(config)

assert len(streams) == 8


def test_get_start_date():
stream = SearchAnalyticsByDate(None, ["https://domain1.com", "https://domain2.com"], "2021-09-01", "2021-09-07")
date = "2021-09-07"
state_date = stream._get_start_date(stream_state={"https://domain1.com": {"web": {"date": date}}}, site_url="https://domain1.com", search_type="web")

assert date == str(state_date)
110 changes: 66 additions & 44 deletions docs/integrations/sources/google-search-console.md
Original file line number Diff line number Diff line change
@@ -1,51 +1,17 @@
# Google Search Console

## Overview
This page contains the setup guide and reference information for the google search console source connector.

The Google Search Console source supports both Full Refresh and Incremental syncs. You can choose if this connector will copy only the new or updated data, or all rows in the tables and columns you set up for replication, every time a sync is run.

### Output schema
## Prerequisites

This Source is capable of syncing the following Streams:

* [Sites](https://developers.google.com/webmaster-tools/search-console-api-original/v3/sites/get)
* [Sitemaps](https://developers.google.com/webmaster-tools/search-console-api-original/v3/sitemaps/list)
* [Full Analytics report](https://developers.google.com/webmaster-tools/search-console-api-original/v3/searchanalytics/query) \(this stream has a long sync time because it is very detailed, use with care\)
* [Analytics report by country](https://developers.google.com/webmaster-tools/search-console-api-original/v3/searchanalytics/query)
* [Analytics report by date](https://developers.google.com/webmaster-tools/search-console-api-original/v3/searchanalytics/query)
* [Analytics report by device](https://developers.google.com/webmaster-tools/search-console-api-original/v3/searchanalytics/query)
* [Analytics report by page](https://developers.google.com/webmaster-tools/search-console-api-original/v3/searchanalytics/query)
* [Analytics report by query](https://developers.google.com/webmaster-tools/search-console-api-original/v3/searchanalytics/query)

### Data type mapping

| Integration Type | Airbyte Type | Notes |
| :--- | :--- | :--- |
| `string` | `string` | |
| `number` | `number` | |
| `array` | `array` | |
| `object` | `object` | |

### Features

| Feature | Supported?\(Yes/No\) | Notes |
| :--- | :--- | :--- |
| Full Refresh Sync | Yes | |
| Incremental Sync | Yes | except Sites and Sitemaps |
| SSL connection | Yes | |
| Namespaces | No | |

### Performance considerations

This connector attempts to back off gracefully when it hits Reports API's rate limits. To find more information about limits, see [Usage Limits](https://developers.google.com/webmaster-tools/search-console-api-original/v3/limits) documentation.
* Credentials to a Google Service Account \(or Google Service Account with delegated Domain Wide Authority\) or Google User Account

## Getting started

### Requirements
## Setup guide
### Step 1: Set up google search console

* Credentials to a Google Service Account \(or Google Service Account with delegated Domain Wide Authority\) or Google User Account

## How to create the client credentials for Google Search Console, to use with Airbyte?
#### How to create the client credentials for Google Search Console, to use with Airbyte?

You can either:

Expand Down Expand Up @@ -90,13 +56,70 @@ Follow the Google Documentation for performing [Delegating domain-wide authority

At the end of this process, you should have JSON credentials to this Google Service Account.

You should now be ready to use the Google Workspace Admin Reports API connector in Airbyte.
## Step 2: Set up the google search console connector in Airbyte

### For Airbyte Cloud:

1. [Log into your Airbyte Cloud](https://cloud.airbyte.io/workspaces) account.
2. In the left navigation bar, click **Sources**. In the top-right corner, click **+new source**.
3. On the Set up the source page, enter the name for the google search console connector and select **google search console** from the Source type dropdown.
4. Click Authenticate your account to sign in with Google and authorize your account.
5. Fill in the `site_urls` field.
5. Fill in the `start date` field.
6. You should be ready to sync data.

### For Airbyte OSS:

1. Fill in the `service_account_info` and `email` fields for authentication.
2. Fill in the `site_urls` field.
3. Fill in the `start date` field.
4. You should be ready to sync data.


## CHANGELOG
## Supported sync modes

The google search console source connector supports the following [sync modes](https://docs.airbyte.com/cloud/core-concepts#connection-sync-modes):

| Feature | Supported?\(Yes/No\) | Notes |
| :--- | :--- | :--- |
| Full Refresh Sync | Yes | |
| Incremental Sync | Yes | except Sites and Sitemaps |
| SSL connection | Yes | |
| Namespaces | No | |


## Supported Streams

* [Sites](https://developers.google.com/webmaster-tools/search-console-api-original/v3/sites/get)
* [Sitemaps](https://developers.google.com/webmaster-tools/search-console-api-original/v3/sitemaps/list)
* [Full Analytics report](https://developers.google.com/webmaster-tools/search-console-api-original/v3/searchanalytics/query) \(this stream has a long sync time because it is very detailed, use with care\)
* [Analytics report by country](https://developers.google.com/webmaster-tools/search-console-api-original/v3/searchanalytics/query)
* [Analytics report by date](https://developers.google.com/webmaster-tools/search-console-api-original/v3/searchanalytics/query)
* [Analytics report by device](https://developers.google.com/webmaster-tools/search-console-api-original/v3/searchanalytics/query)
* [Analytics report by page](https://developers.google.com/webmaster-tools/search-console-api-original/v3/searchanalytics/query)
* [Analytics report by query](https://developers.google.com/webmaster-tools/search-console-api-original/v3/searchanalytics/query)


## Performance considerations

This connector attempts to back off gracefully when it hits Reports API's rate limits. To find more information about limits, see [Usage Limits](https://developers.google.com/webmaster-tools/search-console-api-original/v3/limits) documentation.


## Data type map

| Integration Type | Airbyte Type | Notes |
| :--- | :--- | :--- |
| `string` | `string` | |
| `number` | `number` | |
| `array` | `array` | |
| `object` | `object` | |


## Changelog

| Version | Date | Pull Request | Subject |
|:---------| :--- | :--- | :--- |
| `0.1.12` | 2022-05-04 | [\#12482](https://github.com/airbytehq/airbyte/pull/12482) | Update input configuration copy |
| `0.1.12` | 2022-05-04 | [12482](https://github.com/airbytehq/airbyte/pull/12482) | Update input configuration copy |
| `0.1.11` | 2022-01-05 | [9186](https://github.com/airbytehq/airbyte/pull/9186) [9194](https://github.com/airbytehq/airbyte/pull/9194) | Fix incremental sync: keep all urls in state object |
| `0.1.10` | 2021-12-23 | [9073](https://github.com/airbytehq/airbyte/pull/9073) | Add slicing by date range |
| `0.1.9` | 2021-12-22 | [9047](https://github.com/airbytehq/airbyte/pull/9047) | Add 'order' to spec.json props |
Expand All @@ -108,4 +131,3 @@ You should now be ready to use the Google Workspace Admin Reports API connector
| `0.1.2` | 2021-09-17 | [6222](https://github.com/airbytehq/airbyte/pull/6222) | Correct Spec File |
| `0.1.1` | 2021-09-22 | [6315](https://github.com/airbytehq/airbyte/pull/6315) | Verify access to all sites when performing connection check |
| `0.1.0` | 2021-09-03 | [5350](https://github.com/airbytehq/airbyte/pull/5350) | Initial Release |