Skip to content

Commit a467d17

Browse files
tomasr8srinivasreddy
authored andcommitted
pythongh-104400: pygettext: Prepare to replace TokenEater with a NodeVisitor (python#129672)
* Update the module docstring * Move ``key_for`` inside the class * Move ``write_pot_file`` outside the class
1 parent 68160cc commit a467d17

File tree

1 file changed

+69
-68
lines changed

1 file changed

+69
-68
lines changed

Tools/i18n/pygettext.py

+69-68
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,9 @@
77
the programming language and can be used from within Python programs.
88
Martin von Loewis' work[1] helps considerably in this regard.
99
10-
There's one problem though; xgettext is the program that scans source code
11-
looking for message strings, but it groks only C (or C++). Python
12-
introduces a few wrinkles, such as dual quoting characters, triple quoted
13-
strings, and raw strings. xgettext understands none of this.
14-
15-
Enter pygettext, which uses Python's standard tokenize module to scan
16-
Python source code, generating .pot files identical to what GNU xgettext[2]
17-
generates for C and C++ code. From there, the standard GNU tools can be
18-
used.
10+
pygettext uses Python's standard tokenize module to scan Python source
11+
code, generating .pot files identical to what GNU xgettext[2] generates
12+
for C and C++ code. From there, the standard GNU tools can be used.
1913
2014
A word about marking Python strings as candidates for translation. GNU
2115
xgettext recognizes the following keywords: gettext, dgettext, dcgettext,
@@ -41,6 +35,9 @@
4135
option arguments is broken, and in these cases, pygettext just defines
4236
additional switches.
4337
38+
NOTE: The public interface of pygettext is limited to the command-line
39+
interface only. The internal API is subject to change without notice.
40+
4441
Usage: pygettext [options] inputfile ...
4542
4643
Options:
@@ -328,12 +325,6 @@ def add_location(self, filename, lineno, msgid_plural=None, *, is_docstring=Fals
328325
self.is_docstring |= is_docstring
329326

330327

331-
def key_for(msgid, msgctxt=None):
332-
if msgctxt is not None:
333-
return (msgctxt, msgid)
334-
return msgid
335-
336-
337328
class TokenEater:
338329
def __init__(self, options):
339330
self.__options = options
@@ -354,6 +345,10 @@ def __call__(self, ttype, tstring, stup, etup, line):
354345
## file=sys.stderr)
355346
self.__state(ttype, tstring, stup[0])
356347

348+
@property
349+
def messages(self):
350+
return self.__messages
351+
357352
def __waiting(self, ttype, tstring, lineno):
358353
opts = self.__options
359354
# Do docstring extractions, if enabled
@@ -513,7 +508,7 @@ def __addentry(self, msg, lineno=None, *, is_docstring=False):
513508
lineno = self.__lineno
514509
msgctxt = msg.get('msgctxt')
515510
msgid_plural = msg.get('msgid_plural')
516-
key = key_for(msgid, msgctxt)
511+
key = self._key_for(msgid, msgctxt)
517512
if key in self.__messages:
518513
self.__messages[key].add_location(
519514
self.__curfile,
@@ -530,6 +525,12 @@ def __addentry(self, msg, lineno=None, *, is_docstring=False):
530525
is_docstring=is_docstring,
531526
)
532527

528+
@staticmethod
529+
def _key_for(msgid, msgctxt=None):
530+
if msgctxt is not None:
531+
return (msgctxt, msgid)
532+
return msgid
533+
533534
def warn_unexpected_token(self, token):
534535
print((
535536
'*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"'
@@ -543,58 +544,58 @@ def set_filename(self, filename):
543544
self.__curfile = filename
544545
self.__freshmodule = 1
545546

546-
def write(self, fp):
547-
options = self.__options
548-
timestamp = time.strftime('%Y-%m-%d %H:%M%z')
549-
encoding = fp.encoding if fp.encoding else 'UTF-8'
550-
print(pot_header % {'time': timestamp, 'version': __version__,
551-
'charset': encoding,
552-
'encoding': '8bit'}, file=fp)
553-
554-
# Sort locations within each message by filename and lineno
555-
sorted_keys = [
556-
(key, sorted(msg.locations))
557-
for key, msg in self.__messages.items()
558-
]
559-
# Sort messages by locations
560-
# For example, a message with locations [('test.py', 1), ('test.py', 2)] will
561-
# appear before a message with locations [('test.py', 1), ('test.py', 3)]
562-
sorted_keys.sort(key=itemgetter(1))
563-
564-
for key, locations in sorted_keys:
565-
msg = self.__messages[key]
566-
if options.writelocations:
567-
# location comments are different b/w Solaris and GNU:
568-
if options.locationstyle == options.SOLARIS:
569-
for location in locations:
570-
print(f'# File: {location.filename}, line: {location.lineno}', file=fp)
571-
elif options.locationstyle == options.GNU:
572-
# fit as many locations on one line, as long as the
573-
# resulting line length doesn't exceed 'options.width'
574-
locline = '#:'
575-
for location in locations:
576-
s = f' {location.filename}:{location.lineno}'
577-
if len(locline) + len(s) <= options.width:
578-
locline = locline + s
579-
else:
580-
print(locline, file=fp)
581-
locline = f'#:{s}'
582-
if len(locline) > 2:
547+
548+
def write_pot_file(messages, options, fp):
549+
timestamp = time.strftime('%Y-%m-%d %H:%M%z')
550+
encoding = fp.encoding if fp.encoding else 'UTF-8'
551+
print(pot_header % {'time': timestamp, 'version': __version__,
552+
'charset': encoding,
553+
'encoding': '8bit'}, file=fp)
554+
555+
# Sort locations within each message by filename and lineno
556+
sorted_keys = [
557+
(key, sorted(msg.locations))
558+
for key, msg in messages.items()
559+
]
560+
# Sort messages by locations
561+
# For example, a message with locations [('test.py', 1), ('test.py', 2)] will
562+
# appear before a message with locations [('test.py', 1), ('test.py', 3)]
563+
sorted_keys.sort(key=itemgetter(1))
564+
565+
for key, locations in sorted_keys:
566+
msg = messages[key]
567+
if options.writelocations:
568+
# location comments are different b/w Solaris and GNU:
569+
if options.locationstyle == options.SOLARIS:
570+
for location in locations:
571+
print(f'# File: {location.filename}, line: {location.lineno}', file=fp)
572+
elif options.locationstyle == options.GNU:
573+
# fit as many locations on one line, as long as the
574+
# resulting line length doesn't exceed 'options.width'
575+
locline = '#:'
576+
for location in locations:
577+
s = f' {location.filename}:{location.lineno}'
578+
if len(locline) + len(s) <= options.width:
579+
locline = locline + s
580+
else:
583581
print(locline, file=fp)
584-
if msg.is_docstring:
585-
# If the entry was gleaned out of a docstring, then add a
586-
# comment stating so. This is to aid translators who may wish
587-
# to skip translating some unimportant docstrings.
588-
print('#, docstring', file=fp)
589-
if msg.msgctxt is not None:
590-
print('msgctxt', normalize(msg.msgctxt, encoding), file=fp)
591-
print('msgid', normalize(msg.msgid, encoding), file=fp)
592-
if msg.msgid_plural is not None:
593-
print('msgid_plural', normalize(msg.msgid_plural, encoding), file=fp)
594-
print('msgstr[0] ""', file=fp)
595-
print('msgstr[1] ""\n', file=fp)
596-
else:
597-
print('msgstr ""\n', file=fp)
582+
locline = f'#:{s}'
583+
if len(locline) > 2:
584+
print(locline, file=fp)
585+
if msg.is_docstring:
586+
# If the entry was gleaned out of a docstring, then add a
587+
# comment stating so. This is to aid translators who may wish
588+
# to skip translating some unimportant docstrings.
589+
print('#, docstring', file=fp)
590+
if msg.msgctxt is not None:
591+
print('msgctxt', normalize(msg.msgctxt, encoding), file=fp)
592+
print('msgid', normalize(msg.msgid, encoding), file=fp)
593+
if msg.msgid_plural is not None:
594+
print('msgid_plural', normalize(msg.msgid_plural, encoding), file=fp)
595+
print('msgstr[0] ""', file=fp)
596+
print('msgstr[1] ""\n', file=fp)
597+
else:
598+
print('msgstr ""\n', file=fp)
598599

599600

600601
def main():
@@ -752,7 +753,7 @@ class Options:
752753
fp = open(options.outfile, 'w')
753754
closep = 1
754755
try:
755-
eater.write(fp)
756+
write_pot_file(eater.messages, options, fp)
756757
finally:
757758
if closep:
758759
fp.close()

0 commit comments

Comments
 (0)