Skip to content

Commit 5389595

Browse files
committed
Add pre-commit hook to validate provider spec
Also fix errors found by the new check.
1 parent bcd119f commit 5389595

File tree

9 files changed

+284
-134
lines changed

9 files changed

+284
-134
lines changed

.pre-commit-config.yaml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -372,6 +372,13 @@ repos:
372372
require_serial: true
373373
files: ^airflow/providers/.*\.py$
374374
additional_dependencies: ['rich>=12.4.4']
375+
- id: check-provider-yaml-value-importable
376+
name: Check values in provider.yaml are importable
377+
entry: ./scripts/ci/pre_commit/pre_commit_provider_import_path_exists.py
378+
language: python
379+
pass_filenames: true
380+
files: ^airflow/providers/.*$
381+
additional_dependencies: ['pyyaml']
375382
- id: update-local-yml-file
376383
name: Update mounts in the local yml file
377384
entry: ./scripts/ci/pre_commit/pre_commit_local_yml_mounts.py

STATIC_CODE_CHECKS.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,8 @@ require Breeze Docker image to be built locally.
219219
+-----------------------------------------------------------+--------------------------------------------------------------+---------+
220220
| check-provider-yaml-valid | Validate provider.yaml files | * |
221221
+-----------------------------------------------------------+--------------------------------------------------------------+---------+
222+
| check-provider-yaml-value-importable | Check values in provider.yaml are importable | |
223+
+-----------------------------------------------------------+--------------------------------------------------------------+---------+
222224
| check-providers-init-file-missing | Provider init file is missing | |
223225
+-----------------------------------------------------------+--------------------------------------------------------------+---------+
224226
| check-providers-subpackages-init-file-exist | Provider subpackage init files are there | |

airflow/providers/redis/provider.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,4 +68,4 @@ connection-types:
6868
connection-type: redis
6969

7070
logging:
71-
- airflow.providers.redis.redis_task_handler.RedisTaskHandler
71+
- airflow.providers.redis.log.redis_task_handler.RedisTaskHandler

airflow/providers/slack/provider.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,4 +82,4 @@ connection-types:
8282
connection-type: slackwebhook
8383

8484
notifications:
85-
- airflow.providers.slack.notifications.slack_notifier.SlackNotifier
85+
- airflow.providers.slack.notifications.slack.SlackNotifier

dev/breeze/src/airflow_breeze/pre_commit_ids.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@
6363
"check-pre-commit-information-consistent",
6464
"check-provide-create-sessions-imports",
6565
"check-provider-yaml-valid",
66+
"check-provider-yaml-value-importable",
6667
"check-providers-init-file-missing",
6768
"check-providers-subpackages-init-file-exist",
6869
"check-pydevd-left-in-code",

images/breeze/output-commands-hash.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ setup:version:be116d90a21c2afe01087f7609774e1e
6464
setup:fd391bab5277ad3aca65987a84144d51
6565
shell:c7adf57a354ecb496ae12fc0b9eaea80
6666
start-airflow:23df4528b3972977e58f7d29336500f7
67-
static-checks:5ded21248cd4f5617779f58ecf6c554c
67+
static-checks:3c6b570f3a05f12cc08eb0d0d2462c29
6868
testing:docker-compose-tests:0c810047fc66a0cfe91119e2d08b3507
6969
testing:helm-tests:8e491da2e01ebd815322c37562059d77
7070
testing:integration-tests:f2a400ac9f722b9c279abbd4e3313e71

images/breeze/output-commands.svg

Lines changed: 54 additions & 54 deletions
Loading

images/breeze/output_static-checks.svg

Lines changed: 81 additions & 77 deletions
Loading
Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
#!/usr/bin/env python
2+
#
3+
# Licensed to the Apache Software Foundation (ASF) under one
4+
# or more contributor license agreements. See the NOTICE file
5+
# distributed with this work for additional information
6+
# regarding copyright ownership. The ASF licenses this file
7+
# to you under the Apache License, Version 2.0 (the
8+
# "License"); you may not use this file except in compliance
9+
# with the License. You may obtain a copy of the License at
10+
#
11+
# http://www.apache.org/licenses/LICENSE-2.0
12+
#
13+
# Unless required by applicable law or agreed to in writing,
14+
# software distributed under the License is distributed on an
15+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16+
# KIND, either express or implied. See the License for the
17+
# specific language governing permissions and limitations
18+
# under the License.
19+
20+
from __future__ import annotations
21+
22+
import ast
23+
import functools
24+
import itertools
25+
import os
26+
import pathlib
27+
import sys
28+
import typing
29+
30+
import yaml
31+
32+
if typing.TYPE_CHECKING:
33+
import typing_extensions
34+
35+
AIRFLOW_PROVIDERS_ROOT = pathlib.Path("airflow/providers")
36+
37+
CheckType = typing.Union[
38+
None, # Checking for the module only.
39+
typing.Type[ast.ClassDef],
40+
typing.Type[ast.FunctionDef],
41+
]
42+
43+
# Tuple items are:
44+
# - Section: Which top-level section in provider.yaml to check.
45+
# - Subkey: If None, each value in the section is treated as an import path.
46+
# If not None, each value is a dict, with the value to the specified key
47+
# being an import path.
48+
# - Check type: If None, the value is a module path. Otherwise the value is a
49+
# (module-global) object's full name and this specifies its AST type.
50+
SECTIONS_TO_CHECK: list[tuple[str, str | None, CheckType]] = [
51+
("auth-backends", None, None),
52+
("connection-types", "hook-class-name", ast.ClassDef),
53+
("executors", None, ast.ClassDef),
54+
("extra-links", None, ast.ClassDef),
55+
("secrets-backends", None, ast.ClassDef),
56+
("logging", None, ast.ClassDef),
57+
("notifications", None, ast.ClassDef),
58+
("task-decorators", "class-name", ast.FunctionDef),
59+
]
60+
61+
62+
@functools.lru_cache
63+
def _get_module(module_name: str) -> ast.Module | None:
64+
path = pathlib.Path(*module_name.split(".")).with_suffix(".py")
65+
if not path.is_file():
66+
return None
67+
return ast.parse(path.read_text("utf-8"), os.fspath(path))
68+
69+
70+
def _is_check_type(v: ast.AST, t: type) -> typing_extensions.TypeGuard[ast.ClassDef | ast.FunctionDef]:
71+
return isinstance(v, t)
72+
73+
74+
def _iter_unimportable_paths(
75+
data: dict,
76+
section: str,
77+
subkey: str | None,
78+
check_type: CheckType,
79+
) -> typing.Iterator[str]:
80+
for item in data.get(section, ()):
81+
if subkey is None:
82+
import_path = item
83+
else:
84+
import_path = item[subkey]
85+
86+
if check_type is None:
87+
module_name = import_path
88+
class_name = ""
89+
else:
90+
module_name, class_name = import_path.rsplit(".", 1)
91+
92+
if (module := _get_module(module_name)) is None:
93+
yield import_path
94+
continue
95+
96+
if check_type is None:
97+
continue
98+
99+
has_class = any(
100+
_is_check_type(child, check_type) and child.name == class_name
101+
for child in ast.iter_child_nodes(module)
102+
)
103+
if not has_class:
104+
yield import_path
105+
106+
107+
def _iter_provider_yaml_errors(path: pathlib.Path) -> typing.Iterator[tuple[pathlib.Path, str, str]]:
108+
with path.open() as f:
109+
data = yaml.safe_load(f)
110+
for section, subkey, check_type in SECTIONS_TO_CHECK:
111+
for error in _iter_unimportable_paths(data, section, subkey, check_type):
112+
yield path, section, error
113+
114+
115+
def _iter_provider_paths(argv: list[str]) -> typing.Iterator[pathlib.Path]:
116+
for argument in argv:
117+
parents = list(pathlib.Path(argument).parents)
118+
try:
119+
root_index = parents.index(AIRFLOW_PROVIDERS_ROOT)
120+
except ValueError: # Not in providers directory.
121+
continue
122+
for parent in parents[:root_index]:
123+
if (provider_yaml := parent.joinpath("provider.yaml")).is_file():
124+
yield provider_yaml
125+
126+
127+
def main(argv: list[str]) -> int:
128+
providers_to_check = sorted(set(_iter_provider_paths(argv)))
129+
errors = list(itertools.chain.from_iterable(_iter_provider_yaml_errors(p) for p in providers_to_check))
130+
for provider, section, value in errors:
131+
print(f"Broken {provider.relative_to(AIRFLOW_PROVIDERS_ROOT)} {section!r}: {value}")
132+
return len(errors)
133+
134+
135+
if __name__ == "__main__":
136+
sys.exit(main(sys.argv))

0 commit comments

Comments
 (0)