Skip to content

python scripts: enable standard type checking #14321

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 2, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 12 additions & 12 deletions dev-tools/scripts/addBackcompatIndexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@


# For usage information, see:
#
#
# http://wiki.apache.org/lucene-java/ReleaseTodo#Generate_Backcompat_Indexes


Expand Down Expand Up @@ -49,7 +49,7 @@ def create_and_add_index(source, indextype, index_version, current_version, temp
filename = '%s.%s-%s.zip' % (prefix, index_version, indextype)
else:
filename = '%s.%s.zip' % (prefix, index_version)

print(' creating %s...' % filename, end='', flush=True)
module = 'backward-codecs'
index_dir = os.path.join('lucene', module, 'src/test/org/apache/lucene/backward_index')
Expand All @@ -76,7 +76,7 @@ def create_and_add_index(source, indextype, index_version, current_version, temp
])
base_dir = os.getcwd()
bc_index_file = os.path.join(temp_dir, filename)

if os.path.exists(bc_index_file):
print('alreadyexists')
else:
Expand All @@ -85,7 +85,7 @@ def create_and_add_index(source, indextype, index_version, current_version, temp
if not os.path.exists(bc_index_file):
raise Exception("Expected file can't be found: %s" %bc_index_file)
print('done')

print(' adding %s...' % filename, end='', flush=True)
scriptutil.run('cp %s %s' % (bc_index_file, os.path.join(base_dir, index_dir)))
os.chdir(base_dir)
Expand Down Expand Up @@ -125,7 +125,7 @@ def append(buffer, changed):
buffer.append('\n')
buffer.append(('%s\n') % index_version)
return True

changed = scriptutil.update_file(filename, re.compile(r'.*'), edit, append)
print('done' if changed else 'uptodate')

Expand All @@ -139,7 +139,7 @@ def download_from_cdn(version, remotename, localname):
try:
urllib.request.urlretrieve(url, localname)
return True
except urllib.error.URLError as e:
except urllib.error.HTTPError as e:
if e.code == 404:
return False
raise e
Expand All @@ -149,14 +149,14 @@ def download_from_archives(version, remotename, localname):
try:
urllib.request.urlretrieve(url, localname)
return True
except urllib.error.URLError as e:
except urllib.error.HTTPError as e:
if e.code == 404:
return False
raise e

def download_release(version, temp_dir, force):
print(' downloading %s source release...' % version, end='', flush=True)
source = os.path.join(temp_dir, 'lucene-%s' % version)
source = os.path.join(temp_dir, 'lucene-%s' % version)
if os.path.exists(source):
if force:
shutil.rmtree(source)
Expand All @@ -173,7 +173,7 @@ def download_release(version, temp_dir, force):
olddir = os.getcwd()
os.chdir(temp_dir)
scriptutil.run('tar -xvzf %s' % source_tgz)
os.chdir(olddir)
os.chdir(olddir)
print('done')
return source

Expand All @@ -195,9 +195,9 @@ def read_config():
c = parser.parse_args()

return c

def main():
c = read_config()
c = read_config()
if not os.path.exists(c.temp_dir):
os.makedirs(c.temp_dir)

Expand All @@ -216,7 +216,7 @@ def main():
create_and_add_index(source, 'dvupdates', c.version, current_version, c.temp_dir)
create_and_add_index(source, 'emptyIndex', c.version, current_version, c.temp_dir)
print ('\nMANUAL UPDATE REQUIRED: edit TestGenerateBwcIndices to enable moreterms, dvupdates, and empty index testing')

print('\nAdding backwards compatibility tests')
update_backcompat_tests(c.version, current_version)

Expand Down
12 changes: 6 additions & 6 deletions dev-tools/scripts/addVersion.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def edit(buffer, match, line):
buffer.append('%s\n---------------------\n(No changes)\n\n' % header)
buffer.append(line)
return match is not None

changed = update_file(filename, matcher, edit)
print('done' if changed else 'uptodate')

Expand All @@ -53,7 +53,7 @@ def ensure_deprecated(buffer):
if last.strip() != '@Deprecated':
spaces = ' ' * (len(last) - len(last.lstrip()) - 1)
del buffer[-1] # Remove comment closer line
if (len(buffer) >= 4 and re.search('for Lucene.\s*$', buffer[-1]) is not None):
if (len(buffer) >= 4 and re.search(r'for Lucene.\s*$', buffer[-1]) is not None):
del buffer[-3:] # drop the trailing lines '<p> / Use this to get the latest ... / ... for Lucene.'
buffer.append(( '{0} * @deprecated ({1}) Use latest\n'
+ '{0} */\n'
Expand All @@ -73,7 +73,7 @@ def buffer_constant(buffer, line):
buffer.append('%s@Deprecated\n' % spaces)
buffer.append('{0}public static final Version {1} = new Version({2}, {3}, {4});\n'.format
(spaces, new_version.constant, new_version.major, new_version.minor, new_version.bugfix))

class Edit(object):
found = -1
def __call__(self, buffer, match, line):
Expand All @@ -97,14 +97,14 @@ def __call__(self, buffer, match, line):

buffer.append(line)
return False

changed = update_file(filename, matcher, Edit())
print('done' if changed else 'uptodate')

def update_build_version(new_version):
print(' changing baseVersion...', end='', flush=True)
filename = 'build.gradle'
def edit(buffer, match, line):
def edit(buffer, _, line):
if new_version.dot in line:
return None
buffer.append(' String baseVersion = \'' + new_version.dot + '\'\n')
Expand All @@ -118,7 +118,7 @@ def update_latest_constant(new_version):
print(' changing Version.LATEST to %s...' % new_version.constant, end='', flush=True)
filename = 'lucene/core/src/java/org/apache/lucene/util/Version.java'
matcher = re.compile('public static final Version LATEST')
def edit(buffer, match, line):
def edit(buffer, _, line):
if new_version.constant in line:
return None
buffer.append(line.rpartition('=')[0] + ('= %s;\n' % new_version.constant))
Expand Down
6 changes: 4 additions & 2 deletions dev-tools/scripts/buildAndPushRelease.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ def runAndSendGPGPassword(command, password):
p = subprocess.Popen(command, shell=True, bufsize=0, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, stdin=subprocess.PIPE)
f = open(LOG, 'ab')
while True:
assert p.stdout
assert p.stdin
p.stdout.flush()
line = p.stdout.readline()
if len(line) == 0:
Expand Down Expand Up @@ -176,8 +178,7 @@ def checkDOAPfiles(version):
treeRoot = ET.parse(doapFile).getroot()
doapRevisions = set()
for revision in treeRoot.findall(xpathRevision):
match = reDoapRevision.match(revision.text)
if (match is not None):
if (revision.text and (match := reDoapRevision.match(revision.text))):
if (match.group(1) not in ('0', '1', '2')): # Ignore 0.X, 1.X and 2.X revisions
doapRevisions.add(normalizeVersion(match.groups()))
else:
Expand Down Expand Up @@ -412,6 +413,7 @@ def main():
print('Next run the smoker tester:')
p = re.compile(".*/")
m = p.match(sys.argv[0])
assert m
if not c.sign:
signed = "--not-signed"
else:
Expand Down
20 changes: 10 additions & 10 deletions dev-tools/scripts/create_line_file_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def compress_with_seek_points(file_name_in, file_name_out, num_seek_points):
break

bytes_in_chunk += len(line)
f_out.write(line)
f_out.write(line) # false positive in python's crazy typing # pyright: ignore[reportArgumentType]

if bytes_in_chunk > bytes_per_chunk and chunk_count < num_seek_points:
f_out.close()
Expand All @@ -72,12 +72,12 @@ def compress_with_seek_points(file_name_in, file_name_out, num_seek_points):
for seek_point in seek_points:
f_out.write('%d\n' % seek_point)

re_tag = re.compile('<[^>]+?>')
re_newlines = re.compile('\n+')
re_space = re.compile('\s')
re_tag = re.compile(r'<[^>]+?>')
re_newlines = re.compile(r'\n+')
re_space = re.compile(r'\s')

# used to find word break, for splitting docs into ~1 KB sized smaller docs:
re_next_non_word_character = re.compile('\W', re.U)
re_next_non_word_character = re.compile(r'\W', re.U)

EUROPARL_V7_URL = 'https://www.statmt.org/europarl/v7/europarl.tgz'

Expand All @@ -101,7 +101,7 @@ def split_docs(all_out, title_string, date_string, body_string):
char_count = len(body_string)

body_string_fragment = body_string[:char_count].strip()

#print('write title %d, body %d' % (len(title_string), len(body_string_fragment)))
all_out.write('%s\t%s\t%s\n' % (title_string, date_string, body_string_fragment))
body_string = body_string[char_count:]
Expand Down Expand Up @@ -143,7 +143,7 @@ def sample_europarl():
next_print_time = start_time + 3
# normalize text a bit and concatenate all lines into single file, counting total lines/bytes
with open(all_txt_file_name, 'w', encoding='utf-8') as all_out:
for dir_path, dir_names, file_names in os.walk('%s/txt' % tmp_dir_path):
for dir_path, _, file_names in os.walk('%s/txt' % tmp_dir_path):
for file_name in file_names:
if file_name.endswith('.txt'):
file_count += 1
Expand All @@ -155,7 +155,7 @@ def sample_europarl():
year = 2000 + year

date_string = '%04d-%02d-%02d' % (year, month, day)

# unfortunately we need errors='ignore' since in Europarl v7, one file (pl/ep-09-10-22-009.txt) has invalid utf-8:
chapter_count = 0
with open('%s/%s' % (dir_path, file_name), 'r', encoding='utf-8', errors='ignore') as f_in:
Expand All @@ -176,7 +176,7 @@ def sample_europarl():
doc_count += split_docs(all_out, last_title, date_string, s)
else:
skip_count += 1

last_text = []
chapter_count += 1
while True:
Expand Down Expand Up @@ -248,7 +248,7 @@ def sample_europarl():
compress_with_seek_points(file_name_out,
file_name_out + '.gz',
mb)

finally:
print('Removing tmp dir "%s"...' % tmp_dir_path)
if not DEBUG:
Expand Down
6 changes: 4 additions & 2 deletions dev-tools/scripts/diff_lucene_changes.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,10 @@ def get_changes_url(branch_name):
return url

def extract_release_section(changes_txt, release_name):
return re.search(f'=======+ Lucene {re.escape(release_name)} =======+(.*?)=======+ Lucene .*? =======+$',
changes_txt.decode('utf-8'), re.MULTILINE | re.DOTALL).group(1).encode('utf-8')
match = re.search(f'=======+ Lucene {re.escape(release_name)} =======+(.*?)=======+ Lucene .*? =======+$',
changes_txt.decode('utf-8'), re.MULTILINE | re.DOTALL)
assert match
return match.group(1).encode('utf-8')

def main():
if len(sys.argv) < 3 or len(sys.argv) > 5:
Expand Down
17 changes: 11 additions & 6 deletions dev-tools/scripts/githubPRs.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,10 @@
import argparse
import json
import re
from typing import cast
from github import Github
from jira import JIRA
from jira import JIRA, Issue
from jira.client import ResultList
from jinja2 import Environment, BaseLoader

def read_config():
Expand All @@ -46,7 +48,7 @@ def out(text):

def make_html(dict):
global conf
template = Environment(loader=BaseLoader).from_string("""
template = Environment(loader=BaseLoader()).from_string("""
<h1>Lucene Github PR report</h1>

<p>Number of open Pull Requests: {{ open_count }}</p>
Expand Down Expand Up @@ -75,7 +77,7 @@ def main():
gh = Github(token)
else:
gh = Github()
jira = JIRA('https://issues.apache.org/jira')
jira = JIRA('https://issues.apache.org/jira') # this ctor has broken types in jira library. # pyright: ignore[reportArgumentType]
result = {}
repo = gh.get_repo('apache/lucene')
open_prs = repo.get_pulls(state='open')
Expand All @@ -100,19 +102,22 @@ def main():
issue_ids = []
issue_to_pr = {}
for pr in has_jira:
jira_issue_str = re.match(r'.*\b((LUCENE)-\d{3,6})\b', pr.title).group(1)
match = re.match(r'.*\b((LUCENE)-\d{3,6})\b', pr.title)
assert match
jira_issue_str = match.group(1)
issue_ids.append(jira_issue_str)
issue_to_pr[jira_issue_str] = pr

resolved_jiras = jira.search_issues(jql_str="key in (%s) AND status in ('Closed', 'Resolved')" % ", ".join(issue_ids))
resolved_jiras = cast(ResultList[Issue], jira.search_issues(jql_str="key in (%s) AND status in ('Closed', 'Resolved')" % ", ".join(issue_ids)))
closed_jiras = []
for issue in resolved_jiras:
pr_title = issue_to_pr[issue.key].title
pr_number = issue_to_pr[issue.key].number
assignee = issue.fields.assignee.name if issue.fields.assignee else None
resolution = issue.fields.resolution.name if issue.fields.resolution else None
closed_jiras.append({ 'issue_key': issue.key,
'status': issue.fields.status.name,
'resolution': issue.fields.resolution.name,
'resolution': resolution,
'resolution_date': issue.fields.resolutiondate[:10],
'pr_number': pr_number,
'pr_title': pr_title,
Expand Down
11 changes: 1 addition & 10 deletions dev-tools/scripts/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,7 @@ venv = ".venv"
# TODO: improve!
# typeCheckingMode = "strict"
reportUnnecessaryTypeIgnoreComment = "error"
typeCheckingMode = "basic"
# TODO: we should fix these
reportArgumentType = "none"
reportAttributeAccessIssue = "none"
reportCallIssue = "none"
reportInvalidStringEscapeSequence = "none"
reportOperatorIssue = "none"
reportOptionalIterable = "none"
reportOptionalMemberAccess = "none"
reportOptionalSubscript = "none"
typeCheckingMode = "standard"

[tool.ruff]
line-length = 200
Expand Down
Loading