Skip to content

Commit 9df4953

Browse files
serhiy-storchakaebonnal
authored andcommitted
pythongh-126727: Fix locale.nl_langinfo(locale.ERA) (pythonGH-126730)
It now returns multiple era description segments separated by semicolons. Previously it only returned the first segment on platforms with Glibc.
1 parent b2163b7 commit 9df4953

File tree

4 files changed

+95
-28
lines changed

4 files changed

+95
-28
lines changed

Doc/library/locale.rst

+6-4
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,8 @@ The :mod:`locale` module defines the following exception and functions:
281281

282282
.. data:: ERA
283283

284-
Get a string that represents the era used in the current locale.
284+
Get a string which describes how years are counted and displayed for
285+
each era in a locale.
285286

286287
Most locales do not define this value. An example of a locale which does
287288
define this value is the Japanese one. In Japan, the traditional
@@ -290,9 +291,10 @@ The :mod:`locale` module defines the following exception and functions:
290291

291292
Normally it should not be necessary to use this value directly. Specifying
292293
the ``E`` modifier in their format strings causes the :func:`time.strftime`
293-
function to use this information. The format of the returned string is not
294-
specified, and therefore you should not assume knowledge of it on different
295-
systems.
294+
function to use this information.
295+
The format of the returned string is specified in *The Open Group Base
296+
Specifications Issue 8*, paragraph `7.3.5.2 LC_TIME C-Language Access
297+
<https://pubs.opengroup.org/onlinepubs/9799919799/basedefs/V1_chap07.html#tag_07_03_05_02>`_.
296298

297299
.. data:: ERA_D_T_FMT
298300

Lib/test/test__locale.py

+45
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,14 @@ def accept(loc):
9090
'bn_IN': (100, {0: '\u09e6', 10: '\u09e7\u09e6', 99: '\u09ef\u09ef'}),
9191
}
9292

93+
known_era = {
94+
'C': (0, ''),
95+
'en_US': (0, ''),
96+
'ja_JP': (11, '+:1:2019/05/01:2019/12/31:令和:%EC元年'),
97+
'zh_TW': (3, '+:1:1912/01/01:1912/12/31:民國:%EC元年'),
98+
'th_TW': (1, '+:1:-543/01/01:+*:พ.ศ.:%EC %Ey'),
99+
}
100+
93101
if sys.platform == 'win32':
94102
# ps_AF doesn't work on Windows: see bpo-38324 (msg361830)
95103
del known_numerics['ps_AF']
@@ -230,6 +238,43 @@ def test_alt_digits_nl_langinfo(self):
230238
if not tested:
231239
self.skipTest('no suitable locales')
232240

241+
@unittest.skipUnless(nl_langinfo, "nl_langinfo is not available")
242+
@unittest.skipUnless(hasattr(locale, 'ERA'), "requires locale.ERA")
243+
@unittest.skipIf(
244+
support.is_emscripten or support.is_wasi,
245+
"musl libc issue on Emscripten, bpo-46390"
246+
)
247+
def test_era_nl_langinfo(self):
248+
# Test nl_langinfo(ERA)
249+
tested = False
250+
for loc in candidate_locales:
251+
with self.subTest(locale=loc):
252+
try:
253+
setlocale(LC_TIME, loc)
254+
except Error:
255+
self.skipTest(f'no locale {loc!r}')
256+
continue
257+
258+
with self.subTest(locale=loc):
259+
era = nl_langinfo(locale.ERA)
260+
self.assertIsInstance(era, str)
261+
if era:
262+
self.assertEqual(era.count(':'), (era.count(';') + 1) * 5, era)
263+
264+
loc1 = loc.split('.', 1)[0]
265+
if loc1 in known_era:
266+
count, sample = known_era[loc1]
267+
if count:
268+
if not era:
269+
self.skipTest(f'ERA is not set for locale {loc!r} on this platform')
270+
self.assertGreaterEqual(era.count(';') + 1, count)
271+
self.assertIn(sample, era)
272+
else:
273+
self.assertEqual(era, '')
274+
tested = True
275+
if not tested:
276+
self.skipTest('no suitable locales')
277+
233278
def test_float_parsing(self):
234279
# Bug #1391872: Test whether float parsing is okay on European
235280
# locales.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
``locale.nl_langinfo(locale.ERA)`` now returns multiple era description
2+
segments separated by semicolons. Previously it only returned the first
3+
segment on platforms with Glibc.

Modules/_localemodule.c

+41-24
Original file line numberDiff line numberDiff line change
@@ -636,6 +636,37 @@ restore_locale(char *oldloc)
636636
}
637637
}
638638

639+
#ifdef __GLIBC__
640+
#if defined(ALT_DIGITS) || defined(ERA)
641+
static PyObject *
642+
decode_strings(const char *result, size_t max_count)
643+
{
644+
/* Convert a sequence of NUL-separated C strings to a Python string
645+
* containing semicolon separated items. */
646+
size_t i = 0;
647+
size_t count = 0;
648+
for (; count < max_count && result[i]; count++) {
649+
i += strlen(result + i) + 1;
650+
}
651+
char *buf = PyMem_Malloc(i);
652+
if (buf == NULL) {
653+
PyErr_NoMemory();
654+
return NULL;
655+
}
656+
memcpy(buf, result, i);
657+
/* Replace all NULs with semicolons. */
658+
i = 0;
659+
while (--count) {
660+
i += strlen(buf + i);
661+
buf[i++] = ';';
662+
}
663+
PyObject *pyresult = PyUnicode_DecodeLocale(buf, NULL);
664+
PyMem_Free(buf);
665+
return pyresult;
666+
}
667+
#endif
668+
#endif
669+
639670
/*[clinic input]
640671
_locale.nl_langinfo
641672
@@ -668,32 +699,18 @@ _locale_nl_langinfo_impl(PyObject *module, int item)
668699
}
669700
PyObject *pyresult;
670701
#ifdef __GLIBC__
702+
/* According to the POSIX specification the result must be
703+
* a sequence of semicolon-separated strings.
704+
* But in Glibc they are NUL-separated. */
671705
#ifdef ALT_DIGITS
672706
if (item == ALT_DIGITS && *result) {
673-
/* According to the POSIX specification the result must be
674-
* a sequence of up to 100 semicolon-separated strings.
675-
* But in Glibc they are NUL-separated. */
676-
Py_ssize_t i = 0;
677-
int count = 0;
678-
for (; count < 100 && result[i]; count++) {
679-
i += strlen(result + i) + 1;
680-
}
681-
char *buf = PyMem_Malloc(i);
682-
if (buf == NULL) {
683-
PyErr_NoMemory();
684-
pyresult = NULL;
685-
}
686-
else {
687-
memcpy(buf, result, i);
688-
/* Replace all NULs with semicolons. */
689-
i = 0;
690-
while (--count) {
691-
i += strlen(buf + i);
692-
buf[i++] = ';';
693-
}
694-
pyresult = PyUnicode_DecodeLocale(buf, NULL);
695-
PyMem_Free(buf);
696-
}
707+
pyresult = decode_strings(result, 100);
708+
}
709+
else
710+
#endif
711+
#ifdef ERA
712+
if (item == ERA && *result) {
713+
pyresult = decode_strings(result, SIZE_MAX);
697714
}
698715
else
699716
#endif

0 commit comments

Comments
 (0)