Skip to content

Commit 460d1e9

Browse files
more informative output about standardization and resolving some type hints
1 parent fd76049 commit 460d1e9

File tree

1 file changed

+47
-18
lines changed

1 file changed

+47
-18
lines changed

scripts/generate-catalog.py

Lines changed: 47 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,15 @@
44
import os
55
from pathlib import Path
66
import time
7-
import urllib
7+
from typing import Dict
8+
import urllib.request
89
import tarfile
910
import re
1011
from datetime import timedelta, datetime
1112
import git
13+
from git import Repo as GitRepo
1214
import yaml
15+
from github.PaginatedList import PaginatedList
1316

1417
from common import (
1518
store_data,
@@ -48,11 +51,12 @@ def __init__(
4851
linting,
4952
formatting,
5053
config_readme,
51-
settings: dict,
54+
settings: Dict | None,
5255
release,
5356
updated_at,
5457
topics,
5558
):
59+
self.full_name: str
5660
for attr in [
5761
"full_name",
5862
"description",
@@ -85,12 +89,22 @@ def __init__(
8589
)
8690
self.config_readme = config_readme
8791
self.standardized = True
92+
self.non_standardized_reason = None
8893
else:
8994
self.mandatory_flags = []
9095
self.software_stack_deployment = None
9196
self.config_readme = None
9297
self.report = False
9398
self.standardized = False
99+
self.non_standardized_reason = []
100+
if settings is None:
101+
self.non_standardized_reason.append(
102+
"no .snakemake-workflow-catalog.yml found in repo root"
103+
)
104+
if config_readme is None:
105+
self.non_standardized_reason.append(
106+
"no config/README.md found in repo root"
107+
)
94108

95109
# increase this if fields above change
96110
self.data_format = Repo.data_format
@@ -101,11 +115,12 @@ def __init__(
101115
total_count = 1
102116
offset = 0
103117
else:
104-
latest_commit = int(os.environ.get("LATEST_COMMIT"))
118+
assert "LATEST_COMMIT" in os.environ
119+
latest_commit = int(os.environ["LATEST_COMMIT"])
105120

106121
date_threshold = datetime.today() - timedelta(latest_commit)
107122
date_threshold = datetime.strftime(date_threshold, "%Y-%m-%d")
108-
repo_search = g.search_repositories(
123+
repo_search: PaginatedList = g.search_repositories(
109124
f"snakemake workflow in:readme archived:false pushed:>={date_threshold}",
110125
sort="updated",
111126
)
@@ -176,16 +191,17 @@ def __init__(
176191
# download release tag (use hardcoded url, because repo.tarball_url can sometimes
177192
# cause ambiguity errors if a branch is called the same as the release).
178193
tarball_url = f"https://github.com/{repo.full_name}/tarball/refs/tags/{release.tag_name}"
179-
get_tarfile = lambda: tarfile.open(
180-
fileobj=urllib.request.urlopen(tarball_url), mode="r|gz"
181-
)
194+
def get_tarfile():
195+
return tarfile.open(
196+
fileobj=urllib.request.urlopen(tarball_url), mode="r|gz"
197+
)
182198
root_dir = get_tarfile().getmembers()[0].name
183199
get_tarfile().extractall(path=tmp, filter="tar")
184200
tmp /= root_dir
185201
else:
186202
# no latest release, clone main branch
187203
try:
188-
gitrepo = git.Repo.clone_from(repo.clone_url, str(tmp), depth=1)
204+
gitrepo = GitRepo.clone_from(repo.clone_url, str(tmp), depth=1)
189205
except git.GitCommandError:
190206
log_skip("error cloning repository")
191207
register_skip(repo)
@@ -220,6 +236,13 @@ def __init__(
220236
with open(settings_file) as settings_file:
221237
try:
222238
settings = yaml.load(settings_file, yaml.SafeLoader)
239+
if not isinstance(settings, dict):
240+
logging.info(
241+
"No standardized usage possible because "
242+
".snakemake-workflow-catalog.yml does not contain a YAML "
243+
"mapping."
244+
)
245+
settings = None
223246
except yaml.scanner.ScannerError as e:
224247
logging.info(
225248
"No standardized usage possible because "
@@ -266,17 +289,23 @@ def __init__(
266289

267290
topics = call_rate_limit_aware(repo.get_topics)
268291

292+
repo_obj = Repo(
293+
repo,
294+
linting,
295+
formatting,
296+
config_readme,
297+
settings,
298+
release,
299+
updated_at,
300+
topics,
301+
)
302+
logging.info(
303+
f"Repo {repo_obj.full_name} processed successfully as "
304+
f"{'standardized' if repo_obj.standardized else 'non-standardized'} workflow. "
305+
)
306+
269307
repos.append(
270-
Repo(
271-
repo,
272-
linting,
273-
formatting,
274-
config_readme,
275-
settings,
276-
release,
277-
updated_at,
278-
topics,
279-
).__dict__
308+
repo_obj.__dict__
280309
)
281310

282311
if test_repo is None:

0 commit comments

Comments
 (0)