Skip to content

Add pre-commit hook to validate provider spec #33608

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,13 @@ repos:
require_serial: true
files: ^airflow/providers/.*\.py$
additional_dependencies: ['rich>=12.4.4']
- id: check-provider-yaml-value-importable
name: Check values in provider.yaml are importable
entry: ./scripts/ci/pre_commit/pre_commit_provider_import_path_exists.py
language: python
pass_filenames: true
files: ^airflow/providers/.*$
additional_dependencies: ['pyyaml']
- id: update-local-yml-file
name: Update mounts in the local yml file
entry: ./scripts/ci/pre_commit/pre_commit_local_yml_mounts.py
Expand Down
2 changes: 2 additions & 0 deletions STATIC_CODE_CHECKS.rst
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,8 @@ require Breeze Docker image to be built locally.
+-----------------------------------------------------------+--------------------------------------------------------------+---------+
| check-provider-yaml-valid | Validate provider.yaml files | * |
+-----------------------------------------------------------+--------------------------------------------------------------+---------+
| check-provider-yaml-value-importable | Check values in provider.yaml are importable | |
+-----------------------------------------------------------+--------------------------------------------------------------+---------+
| check-providers-init-file-missing | Provider init file is missing | |
+-----------------------------------------------------------+--------------------------------------------------------------+---------+
| check-providers-subpackages-init-file-exist | Provider subpackage init files are there | |
Expand Down
4 changes: 3 additions & 1 deletion airflow/providers/apache/pinot/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -59,5 +59,7 @@ hooks:
- airflow.providers.apache.pinot.hooks.pinot

connection-types:
- hook-class-name: airflow.providers.apache.pinot.hooks.pinot.PinotHook
- hook-class-name: airflow.providers.apache.pinot.hooks.pinot.PinotDbApiHook
connection-type: pinot
- hook-class-name: airflow.providers.apache.pinot.hooks.pinot.PinotAdminHook
connection-type: pinot_admin
2 changes: 1 addition & 1 deletion airflow/providers/redis/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -68,4 +68,4 @@ connection-types:
connection-type: redis

logging:
- airflow.providers.redis.redis_task_handler.RedisTaskHandler
- airflow.providers.redis.log.redis_task_handler.RedisTaskHandler
2 changes: 1 addition & 1 deletion airflow/providers/slack/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -82,4 +82,4 @@ connection-types:
connection-type: slackwebhook

notifications:
- airflow.providers.slack.notifications.slack_notifier.SlackNotifier
- airflow.providers.slack.notifications.slack.SlackNotifier
1 change: 1 addition & 0 deletions dev/breeze/src/airflow_breeze/pre_commit_ids.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
"check-pre-commit-information-consistent",
"check-provide-create-sessions-imports",
"check-provider-yaml-valid",
"check-provider-yaml-value-importable",
"check-providers-init-file-missing",
"check-providers-subpackages-init-file-exist",
"check-pydevd-left-in-code",
Expand Down
2 changes: 1 addition & 1 deletion images/breeze/output-commands-hash.txt
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ setup:version:be116d90a21c2afe01087f7609774e1e
setup:fd391bab5277ad3aca65987a84144d51
shell:c7adf57a354ecb496ae12fc0b9eaea80
start-airflow:23df4528b3972977e58f7d29336500f7
static-checks:5ded21248cd4f5617779f58ecf6c554c
static-checks:3c6b570f3a05f12cc08eb0d0d2462c29
testing:docker-compose-tests:0c810047fc66a0cfe91119e2d08b3507
testing:helm-tests:8e491da2e01ebd815322c37562059d77
testing:integration-tests:f2a400ac9f722b9c279abbd4e3313e71
Expand Down
108 changes: 54 additions & 54 deletions images/breeze/output-commands.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
158 changes: 81 additions & 77 deletions images/breeze/output_static-checks.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
136 changes: 136 additions & 0 deletions scripts/ci/pre_commit/pre_commit_provider_import_path_exists.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
#!/usr/bin/env python
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

from __future__ import annotations

import ast
import functools
import itertools
import os
import pathlib
import sys
import typing

import yaml

if typing.TYPE_CHECKING:
import typing_extensions

AIRFLOW_PROVIDERS_ROOT = pathlib.Path("airflow/providers")

CheckType = typing.Union[
None, # Checking for the module only.
typing.Type[ast.ClassDef],
typing.Type[ast.FunctionDef],
]

# Tuple items are:
# - Section: Which top-level section in provider.yaml to check.
# - Subkey: If None, each value in the section is treated as an import path.
# If not None, each value is a dict, with the value to the specified key
# being an import path.
# - Check type: If None, the value is a module path. Otherwise the value is a
# (module-global) object's full name and this specifies its AST type.
SECTIONS_TO_CHECK: list[tuple[str, str | None, CheckType]] = [
("auth-backends", None, None),
("connection-types", "hook-class-name", ast.ClassDef),
("executors", None, ast.ClassDef),
("extra-links", None, ast.ClassDef),
("secrets-backends", None, ast.ClassDef),
("logging", None, ast.ClassDef),
("notifications", None, ast.ClassDef),
("task-decorators", "class-name", ast.FunctionDef),
]


@functools.lru_cache
def _get_module(module_name: str) -> ast.Module | None:
path = pathlib.Path(*module_name.split(".")).with_suffix(".py")
if not path.is_file():
return None
return ast.parse(path.read_text("utf-8"), os.fspath(path))


def _is_check_type(v: ast.AST, t: type) -> typing_extensions.TypeGuard[ast.ClassDef | ast.FunctionDef]:
return isinstance(v, t)


def _iter_unimportable_paths(
data: dict,
section: str,
subkey: str | None,
check_type: CheckType,
) -> typing.Iterator[str]:
for item in data.get(section, ()):
if subkey is None:
import_path = item
else:
import_path = item[subkey]

if check_type is None:
module_name = import_path
class_name = ""
else:
module_name, class_name = import_path.rsplit(".", 1)

if (module := _get_module(module_name)) is None:
yield import_path
continue

if check_type is None:
continue

has_class = any(
_is_check_type(child, check_type) and child.name == class_name
for child in ast.iter_child_nodes(module)
)
if not has_class:
yield import_path


def _iter_provider_yaml_errors(path: pathlib.Path) -> typing.Iterator[tuple[pathlib.Path, str, str]]:
with path.open() as f:
data = yaml.safe_load(f)
for section, subkey, check_type in SECTIONS_TO_CHECK:
for error in _iter_unimportable_paths(data, section, subkey, check_type):
yield path, section, error


def _iter_provider_paths(argv: list[str]) -> typing.Iterator[pathlib.Path]:
for argument in argv:
parents = list(pathlib.Path(argument).parents)
try:
root_index = parents.index(AIRFLOW_PROVIDERS_ROOT)
except ValueError: # Not in providers directory.
continue
for parent in parents[:root_index]:
if (provider_yaml := parent.joinpath("provider.yaml")).is_file():
yield provider_yaml


def main(argv: list[str]) -> int:
providers_to_check = sorted(set(_iter_provider_paths(argv)))
errors = list(itertools.chain.from_iterable(_iter_provider_yaml_errors(p) for p in providers_to_check))
for provider, section, value in errors:
print(f"Broken {provider.relative_to(AIRFLOW_PROVIDERS_ROOT)} {section!r}: {value}")
return len(errors)


if __name__ == "__main__":
sys.exit(main(sys.argv))