Skip to content

Commit fd51173

Browse files
authored
chore: filter CFN docs (#2853)
1 parent 2eed7d7 commit fd51173

File tree

2 files changed

+23
-76
lines changed

2 files changed

+23
-76
lines changed

bin/parse_docs.py

+23-7
Original file line numberDiff line numberDiff line change
@@ -13,18 +13,16 @@
1313
import argparse
1414
import json
1515
import re
16-
from contextlib import suppress
1716
from pathlib import Path
1817
from typing import Dict, Iterator, Tuple
1918

2019

2120
def parse(s: str) -> Iterator[Tuple[str, str]]:
2221
"""Parse an AWS docs page in Markdown format, yielding each property."""
2322
# Prevent from parsing return values accidentally
24-
with suppress(ValueError):
25-
s = s[: s.index("# Return Value")]
26-
with suppress(ValueError):
27-
s = s[: s.index("# Return value")]
23+
s = stringbetween(s, "#\s+Properties", "#\s+Return values")
24+
if not s:
25+
return
2826
parts = s.split("\n\n")
2927
for part in parts:
3028
match = re.match(r"^\s*`(\w+)`\s+<a", part)
@@ -59,7 +57,18 @@ def convert_to_full_path(description: str, prefix: str) -> str:
5957

6058

6159
def stringbetween(s: str, sep1: str, sep2: str) -> str:
62-
return s.split(sep1, 1)[1].split(sep2, 1)[0]
60+
"""
61+
Return string between regexes. Case-insensitive. If sep2 doesn't match,
62+
returns to end of string.
63+
"""
64+
start = re.search(sep1, s, re.IGNORECASE)
65+
if not start:
66+
return ""
67+
s = s[start.end() :]
68+
end = re.search(sep2, s, re.IGNORECASE)
69+
if not end:
70+
return s
71+
return s[: end.start()]
6372

6473

6574
def main() -> None:
@@ -71,7 +80,14 @@ def main() -> None:
7180
props: Dict[str, Dict[str, str]] = {}
7281
for path in args.dir.glob("*.md"):
7382
text = path.read_text()
74-
title = stringbetween(text, "# ", "<a")
83+
title = stringbetween(text, r"#\s+", r"<a")
84+
if not title:
85+
raise Exception(f"{path} has no title")
86+
# In CFN docs, always expect either `AWS::Foo::Bar`, or `AWS::Foo::Bar SomeProperty`,
87+
# which maps to the definition names in GoFormation schema
88+
# Tangentially related: https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/cfn-resource-specification-format.html
89+
if args.cfn and not re.match(r"^\w+::\w+::\w+( \w+)?$", title):
90+
continue
7591
page = title if args.cfn else path.stem
7692
for name, description in parse(text):
7793
if page not in props:

0 commit comments

Comments
 (0)