Skip to content

Commit b7df846

Browse files
committed
python scripts: enable standard type checking (#14321)
Fix all the type issues, enable checking so that no more creep in. A couple third-party libraries have some issues (such as totally wrong typing), I just disabled in that case. Fixes are mostly basic stuff such as adding missing null checks, occasional cast or type-hint.
1 parent aa437a8 commit b7df846

11 files changed

+118
-95
lines changed

dev-tools/scripts/addBackcompatIndexes.py

+12-12
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818

1919
# For usage information, see:
20-
#
20+
#
2121
# http://wiki.apache.org/lucene-java/ReleaseTodo#Generate_Backcompat_Indexes
2222

2323

@@ -49,7 +49,7 @@ def create_and_add_index(source, indextype, index_version, current_version, temp
4949
filename = '%s.%s-%s.zip' % (prefix, index_version, indextype)
5050
else:
5151
filename = '%s.%s.zip' % (prefix, index_version)
52-
52+
5353
print(' creating %s...' % filename, end='', flush=True)
5454
module = 'backward-codecs'
5555
index_dir = os.path.join('lucene', module, 'src/test/org/apache/lucene/backward_index')
@@ -76,7 +76,7 @@ def create_and_add_index(source, indextype, index_version, current_version, temp
7676
])
7777
base_dir = os.getcwd()
7878
bc_index_file = os.path.join(temp_dir, filename)
79-
79+
8080
if os.path.exists(bc_index_file):
8181
print('alreadyexists')
8282
else:
@@ -85,7 +85,7 @@ def create_and_add_index(source, indextype, index_version, current_version, temp
8585
if not os.path.exists(bc_index_file):
8686
raise Exception("Expected file can't be found: %s" %bc_index_file)
8787
print('done')
88-
88+
8989
print(' adding %s...' % filename, end='', flush=True)
9090
scriptutil.run('cp %s %s' % (bc_index_file, os.path.join(base_dir, index_dir)))
9191
os.chdir(base_dir)
@@ -125,7 +125,7 @@ def append(buffer, changed):
125125
buffer.append('\n')
126126
buffer.append(('%s\n') % index_version)
127127
return True
128-
128+
129129
changed = scriptutil.update_file(filename, re.compile(r'.*'), edit, append)
130130
print('done' if changed else 'uptodate')
131131

@@ -139,7 +139,7 @@ def download_from_cdn(version, remotename, localname):
139139
try:
140140
urllib.request.urlretrieve(url, localname)
141141
return True
142-
except urllib.error.URLError as e:
142+
except urllib.error.HTTPError as e:
143143
if e.code == 404:
144144
return False
145145
raise e
@@ -149,14 +149,14 @@ def download_from_archives(version, remotename, localname):
149149
try:
150150
urllib.request.urlretrieve(url, localname)
151151
return True
152-
except urllib.error.URLError as e:
152+
except urllib.error.HTTPError as e:
153153
if e.code == 404:
154154
return False
155155
raise e
156156

157157
def download_release(version, temp_dir, force):
158158
print(' downloading %s source release...' % version, end='', flush=True)
159-
source = os.path.join(temp_dir, 'lucene-%s' % version)
159+
source = os.path.join(temp_dir, 'lucene-%s' % version)
160160
if os.path.exists(source):
161161
if force:
162162
shutil.rmtree(source)
@@ -173,7 +173,7 @@ def download_release(version, temp_dir, force):
173173
olddir = os.getcwd()
174174
os.chdir(temp_dir)
175175
scriptutil.run('tar -xvzf %s' % source_tgz)
176-
os.chdir(olddir)
176+
os.chdir(olddir)
177177
print('done')
178178
return source
179179

@@ -195,9 +195,9 @@ def read_config():
195195
c = parser.parse_args()
196196

197197
return c
198-
198+
199199
def main():
200-
c = read_config()
200+
c = read_config()
201201
if not os.path.exists(c.temp_dir):
202202
os.makedirs(c.temp_dir)
203203

@@ -216,7 +216,7 @@ def main():
216216
create_and_add_index(source, 'dvupdates', c.version, current_version, c.temp_dir)
217217
create_and_add_index(source, 'emptyIndex', c.version, current_version, c.temp_dir)
218218
print ('\nMANUAL UPDATE REQUIRED: edit TestGenerateBwcIndices to enable moreterms, dvupdates, and empty index testing')
219-
219+
220220
print('\nAdding backwards compatibility tests')
221221
update_backcompat_tests(c.version, current_version)
222222

dev-tools/scripts/addVersion.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ def edit(buffer, match, line):
3737
buffer.append('%s\n---------------------\n(No changes)\n\n' % header)
3838
buffer.append(line)
3939
return match is not None
40-
40+
4141
changed = update_file(filename, matcher, edit)
4242
print('done' if changed else 'uptodate')
4343

@@ -53,7 +53,7 @@ def ensure_deprecated(buffer):
5353
if last.strip() != '@Deprecated':
5454
spaces = ' ' * (len(last) - len(last.lstrip()) - 1)
5555
del buffer[-1] # Remove comment closer line
56-
if (len(buffer) >= 4 and re.search('for Lucene.\s*$', buffer[-1]) is not None):
56+
if (len(buffer) >= 4 and re.search(r'for Lucene.\s*$', buffer[-1]) is not None):
5757
del buffer[-3:] # drop the trailing lines '<p> / Use this to get the latest ... / ... for Lucene.'
5858
buffer.append(( '{0} * @deprecated ({1}) Use latest\n'
5959
+ '{0} */\n'
@@ -73,7 +73,7 @@ def buffer_constant(buffer, line):
7373
buffer.append('%s@Deprecated\n' % spaces)
7474
buffer.append('{0}public static final Version {1} = new Version({2}, {3}, {4});\n'.format
7575
(spaces, new_version.constant, new_version.major, new_version.minor, new_version.bugfix))
76-
76+
7777
class Edit(object):
7878
found = -1
7979
def __call__(self, buffer, match, line):
@@ -97,14 +97,14 @@ def __call__(self, buffer, match, line):
9797

9898
buffer.append(line)
9999
return False
100-
100+
101101
changed = update_file(filename, matcher, Edit())
102102
print('done' if changed else 'uptodate')
103103

104104
def update_build_version(new_version):
105105
print(' changing baseVersion...', end='', flush=True)
106106
filename = 'build.gradle'
107-
def edit(buffer, match, line):
107+
def edit(buffer, _, line):
108108
if new_version.dot in line:
109109
return None
110110
buffer.append(' String baseVersion = \'' + new_version.dot + '\'\n')
@@ -118,7 +118,7 @@ def update_latest_constant(new_version):
118118
print(' changing Version.LATEST to %s...' % new_version.constant, end='', flush=True)
119119
filename = 'lucene/core/src/java/org/apache/lucene/util/Version.java'
120120
matcher = re.compile('public static final Version LATEST')
121-
def edit(buffer, match, line):
121+
def edit(buffer, _, line):
122122
if new_version.constant in line:
123123
return None
124124
buffer.append(line.rpartition('=')[0] + ('= %s;\n' % new_version.constant))

dev-tools/scripts/buildAndPushRelease.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ def runAndSendGPGPassword(command, password):
5151
p = subprocess.Popen(command, shell=True, bufsize=0, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, stdin=subprocess.PIPE)
5252
f = open(LOG, 'ab')
5353
while True:
54+
assert p.stdout
55+
assert p.stdin
5456
p.stdout.flush()
5557
line = p.stdout.readline()
5658
if len(line) == 0:
@@ -176,8 +178,7 @@ def checkDOAPfiles(version):
176178
treeRoot = ET.parse(doapFile).getroot()
177179
doapRevisions = set()
178180
for revision in treeRoot.findall(xpathRevision):
179-
match = reDoapRevision.match(revision.text)
180-
if (match is not None):
181+
if (revision.text and (match := reDoapRevision.match(revision.text))):
181182
if (match.group(1) not in ('0', '1', '2')): # Ignore 0.X, 1.X and 2.X revisions
182183
doapRevisions.add(normalizeVersion(match.groups()))
183184
else:
@@ -412,6 +413,7 @@ def main():
412413
print('Next run the smoker tester:')
413414
p = re.compile(".*/")
414415
m = p.match(sys.argv[0])
416+
assert m
415417
if not c.sign:
416418
signed = "--not-signed"
417419
else:

dev-tools/scripts/create_line_file_docs.py

+10-10
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ def compress_with_seek_points(file_name_in, file_name_out, num_seek_points):
6161
break
6262

6363
bytes_in_chunk += len(line)
64-
f_out.write(line)
64+
f_out.write(line) # false positive in python's crazy typing # pyright: ignore[reportArgumentType]
6565

6666
if bytes_in_chunk > bytes_per_chunk and chunk_count < num_seek_points:
6767
f_out.close()
@@ -72,12 +72,12 @@ def compress_with_seek_points(file_name_in, file_name_out, num_seek_points):
7272
for seek_point in seek_points:
7373
f_out.write('%d\n' % seek_point)
7474

75-
re_tag = re.compile('<[^>]+?>')
76-
re_newlines = re.compile('\n+')
77-
re_space = re.compile('\s')
75+
re_tag = re.compile(r'<[^>]+?>')
76+
re_newlines = re.compile(r'\n+')
77+
re_space = re.compile(r'\s')
7878

7979
# used to find word break, for splitting docs into ~1 KB sized smaller docs:
80-
re_next_non_word_character = re.compile('\W', re.U)
80+
re_next_non_word_character = re.compile(r'\W', re.U)
8181

8282
EUROPARL_V7_URL = 'https://www.statmt.org/europarl/v7/europarl.tgz'
8383

@@ -101,7 +101,7 @@ def split_docs(all_out, title_string, date_string, body_string):
101101
char_count = len(body_string)
102102

103103
body_string_fragment = body_string[:char_count].strip()
104-
104+
105105
#print('write title %d, body %d' % (len(title_string), len(body_string_fragment)))
106106
all_out.write('%s\t%s\t%s\n' % (title_string, date_string, body_string_fragment))
107107
body_string = body_string[char_count:]
@@ -143,7 +143,7 @@ def sample_europarl():
143143
next_print_time = start_time + 3
144144
# normalize text a bit and concatenate all lines into single file, counting total lines/bytes
145145
with open(all_txt_file_name, 'w', encoding='utf-8') as all_out:
146-
for dir_path, dir_names, file_names in os.walk('%s/txt' % tmp_dir_path):
146+
for dir_path, _, file_names in os.walk('%s/txt' % tmp_dir_path):
147147
for file_name in file_names:
148148
if file_name.endswith('.txt'):
149149
file_count += 1
@@ -155,7 +155,7 @@ def sample_europarl():
155155
year = 2000 + year
156156

157157
date_string = '%04d-%02d-%02d' % (year, month, day)
158-
158+
159159
# unfortunately we need errors='ignore' since in Europarl v7, one file (pl/ep-09-10-22-009.txt) has invalid utf-8:
160160
chapter_count = 0
161161
with open('%s/%s' % (dir_path, file_name), 'r', encoding='utf-8', errors='ignore') as f_in:
@@ -176,7 +176,7 @@ def sample_europarl():
176176
doc_count += split_docs(all_out, last_title, date_string, s)
177177
else:
178178
skip_count += 1
179-
179+
180180
last_text = []
181181
chapter_count += 1
182182
while True:
@@ -248,7 +248,7 @@ def sample_europarl():
248248
compress_with_seek_points(file_name_out,
249249
file_name_out + '.gz',
250250
mb)
251-
251+
252252
finally:
253253
print('Removing tmp dir "%s"...' % tmp_dir_path)
254254
if not DEBUG:

dev-tools/scripts/diff_lucene_changes.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,10 @@ def get_changes_url(branch_name):
4242
return url
4343

4444
def extract_release_section(changes_txt, release_name):
45-
return re.search(f'=======+ Lucene {re.escape(release_name)} =======+(.*?)=======+ Lucene .*? =======+$',
46-
changes_txt.decode('utf-8'), re.MULTILINE | re.DOTALL).group(1).encode('utf-8')
45+
match = re.search(f'=======+ Lucene {re.escape(release_name)} =======+(.*?)=======+ Lucene .*? =======+$',
46+
changes_txt.decode('utf-8'), re.MULTILINE | re.DOTALL)
47+
assert match
48+
return match.group(1).encode('utf-8')
4749

4850
def main():
4951
if len(sys.argv) < 3 or len(sys.argv) > 5:

dev-tools/scripts/githubPRs.py

+11-6
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,10 @@
2626
import argparse
2727
import json
2828
import re
29+
from typing import cast
2930
from github import Github
30-
from jira import JIRA
31+
from jira import JIRA, Issue
32+
from jira.client import ResultList
3133
from jinja2 import Environment, BaseLoader
3234

3335
def read_config():
@@ -46,7 +48,7 @@ def out(text):
4648

4749
def make_html(dict):
4850
global conf
49-
template = Environment(loader=BaseLoader).from_string("""
51+
template = Environment(loader=BaseLoader()).from_string("""
5052
<h1>Lucene Github PR report</h1>
5153
5254
<p>Number of open Pull Requests: {{ open_count }}</p>
@@ -75,7 +77,7 @@ def main():
7577
gh = Github(token)
7678
else:
7779
gh = Github()
78-
jira = JIRA('https://issues.apache.org/jira')
80+
jira = JIRA('https://issues.apache.org/jira') # this ctor has broken types in jira library. # pyright: ignore[reportArgumentType]
7981
result = {}
8082
repo = gh.get_repo('apache/lucene')
8183
open_prs = repo.get_pulls(state='open')
@@ -100,19 +102,22 @@ def main():
100102
issue_ids = []
101103
issue_to_pr = {}
102104
for pr in has_jira:
103-
jira_issue_str = re.match(r'.*\b((LUCENE)-\d{3,6})\b', pr.title).group(1)
105+
match = re.match(r'.*\b((LUCENE)-\d{3,6})\b', pr.title)
106+
assert match
107+
jira_issue_str = match.group(1)
104108
issue_ids.append(jira_issue_str)
105109
issue_to_pr[jira_issue_str] = pr
106110

107-
resolved_jiras = jira.search_issues(jql_str="key in (%s) AND status in ('Closed', 'Resolved')" % ", ".join(issue_ids))
111+
resolved_jiras = cast(ResultList[Issue], jira.search_issues(jql_str="key in (%s) AND status in ('Closed', 'Resolved')" % ", ".join(issue_ids)))
108112
closed_jiras = []
109113
for issue in resolved_jiras:
110114
pr_title = issue_to_pr[issue.key].title
111115
pr_number = issue_to_pr[issue.key].number
112116
assignee = issue.fields.assignee.name if issue.fields.assignee else None
117+
resolution = issue.fields.resolution.name if issue.fields.resolution else None
113118
closed_jiras.append({ 'issue_key': issue.key,
114119
'status': issue.fields.status.name,
115-
'resolution': issue.fields.resolution.name,
120+
'resolution': resolution,
116121
'resolution_date': issue.fields.resolutiondate[:10],
117122
'pr_number': pr_number,
118123
'pr_title': pr_title,

dev-tools/scripts/pyproject.toml

+1-10
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,7 @@ venv = ".venv"
44
# TODO: improve!
55
# typeCheckingMode = "strict"
66
reportUnnecessaryTypeIgnoreComment = "error"
7-
typeCheckingMode = "basic"
8-
# TODO: we should fix these
9-
reportArgumentType = "none"
10-
reportAttributeAccessIssue = "none"
11-
reportCallIssue = "none"
12-
reportInvalidStringEscapeSequence = "none"
13-
reportOperatorIssue = "none"
14-
reportOptionalIterable = "none"
15-
reportOptionalMemberAccess = "none"
16-
reportOptionalSubscript = "none"
7+
typeCheckingMode = "standard"
178

189
[tool.ruff]
1910
line-length = 200

0 commit comments

Comments
 (0)