Skip to content

feat(qa_check): enable checking connector docs structure via qa check #39326

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
e33561e
enable CheckDocumentationStructure qa check + extended test suite
darynaishchenko May 31, 2024
8b5993b
added keywords for creds check, valid http statuses, spec for low-code
darynaishchenko May 31, 2024
ae8e1f8
added specific heading for cloud/oss setup
darynaishchenko May 31, 2024
8d082e5
updated _replace_link func
darynaishchenko May 31, 2024
c5648db
updated headers and description templates. added unit tests.
darynaishchenko Jun 6, 2024
c6d52f8
added CheckDocumentationLinks
darynaishchenko Jun 10, 2024
ff7ae9c
refactored documentation qa checks
darynaishchenko Jun 10, 2024
2f10134
refactored changelog checking
darynaishchenko Jun 11, 2024
58f5c34
updated unit tests
darynaishchenko Jun 11, 2024
3aa5061
Merge branch 'master' into daryna/move-TestConnectorDocumentation-fro…
darynaishchenko Jun 11, 2024
e96ebea
fix tests
darynaishchenko Jun 11, 2024
ce250d7
bump version
darynaishchenko Jun 11, 2024
d39e7f6
refactor type hints
darynaishchenko Jun 11, 2024
f118b61
updated connector_spec_file_content comment
darynaishchenko Jun 21, 2024
122cb39
deleted separete doc templates
darynaishchenko Jun 21, 2024
c3bf40b
deleted documentation utils from common utils
darynaishchenko Jun 21, 2024
3bba5be
added documentation models and helpers
darynaishchenko Jun 21, 2024
3769314
refactor documentation checks
darynaishchenko Jun 21, 2024
9685475
added one standard template
darynaishchenko Jun 21, 2024
8966149
deleted old documentation file
darynaishchenko Jun 21, 2024
92844a2
added templates for checks descriptions
darynaishchenko Jun 21, 2024
0a0609d
generated qa-checks doc
darynaishchenko Jun 21, 2024
075de5c
updated init.py
darynaishchenko Jun 21, 2024
b5bc9bf
updated unit tests
darynaishchenko Jun 21, 2024
27a6f27
fix bugs in qa checks
darynaishchenko Jun 25, 2024
ca0bd36
format fix
darynaishchenko Jun 25, 2024
77b61a6
renamed templates
darynaishchenko Jul 3, 2024
bbfb429
refactor code
darynaishchenko Jul 4, 2024
8d549db
Merge branch 'master' into daryna/move-TestConnectorDocumentation-fro…
darynaishchenko Jul 4, 2024
c3372fe
updated qa-checks.md
darynaishchenko Jul 4, 2024
ec08f63
Merge branch 'master' into daryna/move-TestConnectorDocumentation-fro…
darynaishchenko Jul 8, 2024
cf571cc
Merge branch 'master' into daryna/move-TestConnectorDocumentation-fro…
darynaishchenko Aug 12, 2024
9316069
bump versions
darynaishchenko Aug 12, 2024
05e48ea
fixed docs
darynaishchenko Aug 12, 2024
00f4289
updated link to template
darynaishchenko Aug 12, 2024
4e1ee54
removed invalid link
darynaishchenko Aug 12, 2024
0fd84a4
Merge branch 'master' into daryna/move-TestConnectorDocumentation-fro…
darynaishchenko Aug 12, 2024
7a2a9d3
updated CheckDocumentationLinks to skip example urls and 406 status code
darynaishchenko Aug 12, 2024
3ea9b9a
Merge branch 'master' into daryna/move-TestConnectorDocumentation-fro…
darynaishchenko Aug 13, 2024
86bce76
Merge branch 'master' into daryna/move-TestConnectorDocumentation-fro…
darynaishchenko Aug 14, 2024
4611ef2
fixed documentation
darynaishchenko Aug 14, 2024
8bd8d65
updated links validation
darynaishchenko Aug 14, 2024
5e1d905
fixed tests
darynaishchenko Aug 14, 2024
c9c609d
Merge branch 'master' into daryna/move-TestConnectorDocumentation-fro…
darynaishchenko Aug 14, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions airbyte-ci/connectors/connector_ops/connector_ops/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#

import functools
import json
import logging
import os
import re
Expand Down Expand Up @@ -383,6 +384,25 @@ def metadata(self) -> Optional[dict]:
return None
return yaml.safe_load((self.code_directory / METADATA_FILE_NAME).read_text())["data"]

@property
def connector_spec_file_content(self) -> Optional[dict]:
"""
Returns spec file content from spec.yaml, spec.json, manifest.yaml.
This spec is not a "source of truth" for connector specification and can't be used as is.
"""
yaml_spec = Path(self.python_source_dir_path / "spec.yaml")
json_spec = Path(self.python_source_dir_path / "spec.json")

if yaml_spec.exists():
return yaml.safe_load(yaml_spec.read_text())
elif json_spec.exists():
with open(json_spec) as f:
return json.load(f)
elif self.manifest_path.exists():
return yaml.safe_load(self.manifest_path.read_text())["spec"]

return None

@property
def language(self) -> ConnectorLanguage:
if Path(self.code_directory / self.technical_name.replace("-", "_") / "manifest.yaml").is_file():
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
<details>
<summary>Expand to review</summary>
</details>
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@

1. [Log into your Airbyte Cloud](https://cloud.airbyte.com/workspaces) account.
2. Click Sources and then click + New source.
3. On the Set up the source page, select {connector_name} from the Source type dropdown.
4. Enter a name for the {connector_name} connector.
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@

1. Navigate to the Airbyte Open Source dashboard.
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@

<HideInUI>

This page contains the setup guide and reference information for the [{connector_name}]({docs_link}) source connector.

</HideInUI>
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@

The {connector_name} source connector supports the following [sync modes](https://docs.airbyte.com/cloud/core-concepts/#connection-sync-modes):
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@

Now that you have set up the {connector_name} source connector, check out the following {connector_name} tutorials:

Large diffs are not rendered by default.

14 changes: 14 additions & 0 deletions airbyte-ci/connectors/connectors_qa/src/connectors_qa/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,15 @@ def applies_to_connector_types(self) -> List[str]:
"""
return ALL_TYPES

@property
def applies_to_connector_ab_internal_sl(self) -> int:
"""The connector ab_internal_s that the QA check applies to

Returns:
int: integer value for connector ab_internal_sl level
"""
return 0

@property
@abstractmethod
def category(self) -> CheckCategory:
Expand Down Expand Up @@ -187,6 +196,11 @@ def run(self, connector: Connector) -> CheckResult:
connector,
f"Check does not apply to {connector.cloud_usage} connectors",
)
if connector.ab_internal_sl < self.applies_to_connector_ab_internal_sl:
return self.skip(
connector,
f"Check does not apply to connectors with sl < {self.applies_to_connector_ab_internal_sl}",
)
return self._run(connector)

def _run(self, connector: Connector) -> CheckResult:
Expand Down
150 changes: 149 additions & 1 deletion airbyte-ci/connectors/connectors_qa/src/connectors_qa/utils.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.

import re
from difflib import get_close_matches
from pathlib import Path
from typing import Set
from typing import Any, Set

from connector_ops.utils import Connector # type: ignore
from connectors_qa import consts
from markdown_it import MarkdownIt
from markdown_it.tree import SyntaxTreeNode


def remove_strict_encrypt_suffix(connector_technical_name: str) -> str:
Expand Down Expand Up @@ -42,3 +46,147 @@ def get_all_connectors_in_directory(directory: Path) -> Set[Connector]:
if connector_directory.is_dir() and (connector_directory / consts.METADATA_FILE_NAME).exists():
connectors.append(Connector(connector_directory.name))
return set(connectors)


def remove_step_from_heading(heading: str) -> str:
if "Step 1: " in heading:
return heading.replace("Step 1: ", "")
if "Step 2: " in heading:
return heading.replace("Step 2: ", "")
return heading


def required_titles_from_spec(spec: dict[str, Any]) -> tuple[list[str], bool]:
has_credentials = False
spec_required = spec.get("required")
if not spec_required:
return [], False

spec_properties = spec["properties"].keys()
creds = ["credentials", "client_id", "client_secret", "access_token", "refresh_token", "authorization"]

if any(x in spec_required for x in creds):
has_credentials = True
if any(x in spec_properties for x in creds):
has_credentials = True
if has_credentials:
[spec_required.remove(cred) for cred in creds if cred in spec_required]

titles = [spec["properties"][field]["title"].lower() for field in spec_required]
return titles, has_credentials


def documentation_node(connector_documentation: str) -> SyntaxTreeNode:
md = MarkdownIt("commonmark")
tokens = md.parse(connector_documentation)
return SyntaxTreeNode(tokens)


def header_name(n: SyntaxTreeNode) -> str:
return n.to_tokens()[1].children[0].content


def prepare_lines_to_compare(connector_name: str, docs_line: str, template_line: str) -> tuple[str]:
def _replace_link(docs_string: str, link_to_replace: str) -> str:
links = re.findall("(https?://[^\s)]+)", docs_string)
for link in links:
docs_string = docs_string.replace(link, link_to_replace)
return docs_string

connector_name_to_replace = "{connector_name}"
link_to_replace = "{docs_link}"

template_line = (
template_line.replace(connector_name_to_replace, connector_name) if connector_name_to_replace in template_line else template_line
)
docs_line = _replace_link(docs_line, link_to_replace) if link_to_replace in template_line else docs_line

return docs_line, template_line


def remove_not_required_step_headers(headers: tuple[str]) -> tuple[str]:
"""
Removes headers like Step 1.1 Step 3 Step 2.3 from actual headers, if they placed after Step 1: header.
from: "Connector name", "Prerequisites", "Setup guide", "Step 1: do something 1", "Step 1.11: do something 11",
"Step 2: do something 2", "Step 2.1: do something 2.1", "Changelog"
To: "Connector name", "Prerequisites", "Setup guide", "Step 1: do something 1", "Step 2: do something 2", "Changelog"
This is connector specific headers, so we can ignore them.
"""
step_one_index = None
for header in headers:
if re.search("Step 1: ", header):
step_one_index = headers.index(header)
if not step_one_index: # docs doesn't have Step 1 headers
return headers

step_headers = headers[step_one_index:]
pattern = "Step \d+.?\d*: "
step = "Step 1: "
i = 0
while i < len(step_headers):
if step in step_headers[i]: # if Step 1/2: is substring of current header
if i + 1 < len(step_headers) and re.match(pattern, step_headers[i + 1]): # check that header has Step x:
if "Step 2: " in step_headers[i + 1]: # found Step 2, it's required header, move to the next one
step = "Step 2: "
i += 1
continue
else:
step_headers.remove(step_headers[i + 1]) # remove all other steps from headers
continue # move to the next header after Step 1/2 header
else:
break
break

headers = headers[:step_one_index]
headers.extend(step_headers)
return headers


def reason_titles_not_match(heading_names_value: str, template_headings_value: str, template_headings: list[str]) -> str:
reason = f"Heading '{heading_names_value}' is not in the right place, the name of heading is incorrect or not expected.\n"
close_titles = get_close_matches(heading_names_value, template_headings)
if close_titles and close_titles[0] != heading_names_value:
diff = f"Diff:\nActual Heading: '{heading_names_value}'. Possible correct heading: '{close_titles}'. Expected Heading: '{template_headings_value}'"
else:
diff = f"Diff:\nActual Heading: '{heading_names_value}'. Expected Heading: '{template_headings_value}'"
return reason + diff


def reason_missing_titles(template_headings_index: int, template_headings: list[str], not_required_headers: list[str]) -> str:
missing = template_headings[template_headings_index:]
required = [m for m in missing if m not in not_required_headers]
return f"Required missing headers: {required}. All missing headers: {missing}"


def description_end_line_index(heading: str, actual_headings: list[str], header_line_map: dict[str, int]) -> int:
if actual_headings.index(heading) + 1 == len(actual_headings):
return
return header_line_map[actual_headings[actual_headings.index(heading) + 1]]


def prepare_headers(connector_documentation: dict) -> list[str]:
node = documentation_node(connector_documentation)
headers = [header_name(n) for n in node if n.type == "heading"] # find all headers
headers = remove_not_required_step_headers(headers) # remove Step 1.1 Step 3 ... headers
headers = tuple([remove_step_from_heading(h) for h in headers]) # remove Step 1 and Step 2 from header name
return headers


def prepare_changelog_to_compare(docs: list[str]) -> list[str]:
docs_to_compare = []
_siblings_content = []
n = "\n"
docs = "".join(docs)
node = documentation_node(docs)

for sibling in node[0].siblings:
_siblings_content.append(sibling.content.rstrip())

for c in _siblings_content:
if n in c:
docs_to_compare += [_c + n for _c in c.split(n)]
else:
docs_to_compare.append(c)

docs_to_compare = list(filter(("").__ne__, docs_to_compare))
return docs_to_compare
Loading
Loading