4
4
import os
5
5
from pathlib import Path
6
6
import time
7
- import urllib
7
+ from typing import Dict
8
+ import urllib .request
8
9
import tarfile
9
10
import re
10
11
from datetime import timedelta , datetime
11
12
import git
13
+ from git import Repo as GitRepo
12
14
import yaml
15
+ from github .PaginatedList import PaginatedList
13
16
14
17
from common import (
15
18
store_data ,
@@ -48,11 +51,12 @@ def __init__(
48
51
linting ,
49
52
formatting ,
50
53
config_readme ,
51
- settings : dict ,
54
+ settings : Dict | None ,
52
55
release ,
53
56
updated_at ,
54
57
topics ,
55
58
):
59
+ self .full_name : str
56
60
for attr in [
57
61
"full_name" ,
58
62
"description" ,
@@ -85,12 +89,22 @@ def __init__(
85
89
)
86
90
self .config_readme = config_readme
87
91
self .standardized = True
92
+ self .non_standardized_reason = None
88
93
else :
89
94
self .mandatory_flags = []
90
95
self .software_stack_deployment = None
91
96
self .config_readme = None
92
97
self .report = False
93
98
self .standardized = False
99
+ self .non_standardized_reason = []
100
+ if settings is None :
101
+ self .non_standardized_reason .append (
102
+ "no .snakemake-workflow-catalog.yml found in repo root"
103
+ )
104
+ if config_readme is None :
105
+ self .non_standardized_reason .append (
106
+ "no config/README.md found in repo root"
107
+ )
94
108
95
109
# increase this if fields above change
96
110
self .data_format = Repo .data_format
@@ -101,11 +115,12 @@ def __init__(
101
115
total_count = 1
102
116
offset = 0
103
117
else :
104
- latest_commit = int (os .environ .get ("LATEST_COMMIT" ))
118
+ assert "LATEST_COMMIT" in os .environ
119
+ latest_commit = int (os .environ ["LATEST_COMMIT" ])
105
120
106
121
date_threshold = datetime .today () - timedelta (latest_commit )
107
122
date_threshold = datetime .strftime (date_threshold , "%Y-%m-%d" )
108
- repo_search = g .search_repositories (
123
+ repo_search : PaginatedList = g .search_repositories (
109
124
f"snakemake workflow in:readme archived:false pushed:>={ date_threshold } " ,
110
125
sort = "updated" ,
111
126
)
@@ -176,16 +191,17 @@ def __init__(
176
191
# download release tag (use hardcoded url, because repo.tarball_url can sometimes
177
192
# cause ambiguity errors if a branch is called the same as the release).
178
193
tarball_url = f"https://github.com/{ repo .full_name } /tarball/refs/tags/{ release .tag_name } "
179
- get_tarfile = lambda : tarfile .open (
180
- fileobj = urllib .request .urlopen (tarball_url ), mode = "r|gz"
181
- )
194
+ def get_tarfile ():
195
+ return tarfile .open (
196
+ fileobj = urllib .request .urlopen (tarball_url ), mode = "r|gz"
197
+ )
182
198
root_dir = get_tarfile ().getmembers ()[0 ].name
183
199
get_tarfile ().extractall (path = tmp , filter = "tar" )
184
200
tmp /= root_dir
185
201
else :
186
202
# no latest release, clone main branch
187
203
try :
188
- gitrepo = git . Repo .clone_from (repo .clone_url , str (tmp ), depth = 1 )
204
+ gitrepo = GitRepo .clone_from (repo .clone_url , str (tmp ), depth = 1 )
189
205
except git .GitCommandError :
190
206
log_skip ("error cloning repository" )
191
207
register_skip (repo )
@@ -220,6 +236,13 @@ def __init__(
220
236
with open (settings_file ) as settings_file :
221
237
try :
222
238
settings = yaml .load (settings_file , yaml .SafeLoader )
239
+ if not isinstance (settings , dict ):
240
+ logging .info (
241
+ "No standardized usage possible because "
242
+ ".snakemake-workflow-catalog.yml does not contain a YAML "
243
+ "mapping."
244
+ )
245
+ settings = None
223
246
except yaml .scanner .ScannerError as e :
224
247
logging .info (
225
248
"No standardized usage possible because "
@@ -266,17 +289,23 @@ def __init__(
266
289
267
290
topics = call_rate_limit_aware (repo .get_topics )
268
291
292
+ repo_obj = Repo (
293
+ repo ,
294
+ linting ,
295
+ formatting ,
296
+ config_readme ,
297
+ settings ,
298
+ release ,
299
+ updated_at ,
300
+ topics ,
301
+ )
302
+ logging .info (
303
+ f"Repo { repo_obj .full_name } processed successfully as "
304
+ f"{ 'standardized' if repo_obj .standardized else 'non-standardized' } workflow. "
305
+ )
306
+
269
307
repos .append (
270
- Repo (
271
- repo ,
272
- linting ,
273
- formatting ,
274
- config_readme ,
275
- settings ,
276
- release ,
277
- updated_at ,
278
- topics ,
279
- ).__dict__
308
+ repo_obj .__dict__
280
309
)
281
310
282
311
if test_repo is None :
0 commit comments