Skip to content

Commit 4b4e0db

Browse files
[3.12] gh-126727: Fix locale.nl_langinfo(locale.ERA) (GH-126730) (GH-127098)
It now returns multiple era description segments separated by semicolons. Previously it only returned the first segment on platforms with Glibc. (cherry picked from commit 4803cd0)
1 parent d997be0 commit 4b4e0db

File tree

4 files changed

+96
-28
lines changed

4 files changed

+96
-28
lines changed

Doc/library/locale.rst

+6-4
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,8 @@ The :mod:`locale` module defines the following exception and functions:
281281

282282
.. data:: ERA
283283

284-
Get a string that represents the era used in the current locale.
284+
Get a string which describes how years are counted and displayed for
285+
each era in a locale.
285286

286287
Most locales do not define this value. An example of a locale which does
287288
define this value is the Japanese one. In Japan, the traditional
@@ -290,9 +291,10 @@ The :mod:`locale` module defines the following exception and functions:
290291

291292
Normally it should not be necessary to use this value directly. Specifying
292293
the ``E`` modifier in their format strings causes the :func:`time.strftime`
293-
function to use this information. The format of the returned string is not
294-
specified, and therefore you should not assume knowledge of it on different
295-
systems.
294+
function to use this information.
295+
The format of the returned string is specified in *The Open Group Base
296+
Specifications Issue 8*, paragraph `7.3.5.2 LC_TIME C-Language Access
297+
<https://pubs.opengroup.org/onlinepubs/9799919799/basedefs/V1_chap07.html#tag_07_03_05_02>`_.
296298

297299
.. data:: ERA_D_T_FMT
298300

Lib/test/test__locale.py

+46
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,14 @@ def accept(loc):
9090
'bn_IN': (100, {0: '\u09e6', 10: '\u09e7\u09e6', 99: '\u09ef\u09ef'}),
9191
}
9292

93+
known_era = {
94+
'C': (0, ''),
95+
'en_US': (0, ''),
96+
'ja_JP': (11, '+:1:2019/05/01:2019/12/31:令和:%EC元年'),
97+
'zh_TW': (3, '+:1:1912/01/01:1912/12/31:民國:%EC元年'),
98+
'th_TW': (1, '+:1:-543/01/01:+*:พ.ศ.:%EC %Ey'),
99+
}
100+
93101
if sys.platform == 'win32':
94102
# ps_AF doesn't work on Windows: see bpo-38324 (msg361830)
95103
del known_numerics['ps_AF']
@@ -228,6 +236,44 @@ def test_alt_digits_nl_langinfo(self):
228236
if not tested:
229237
self.skipTest('no suitable locales')
230238

239+
@unittest.skipUnless(nl_langinfo, "nl_langinfo is not available")
240+
@unittest.skipUnless(hasattr(locale, 'ERA'), "requires locale.ERA")
241+
@unittest.skipIf(
242+
support.is_emscripten or support.is_wasi,
243+
"musl libc issue on Emscripten, bpo-46390"
244+
)
245+
def test_era_nl_langinfo(self):
246+
# Test nl_langinfo(ERA)
247+
tested = False
248+
for loc in candidate_locales:
249+
with self.subTest(locale=loc):
250+
try:
251+
setlocale(LC_TIME, loc)
252+
setlocale(LC_CTYPE, loc)
253+
except Error:
254+
self.skipTest(f'no locale {loc!r}')
255+
continue
256+
257+
with self.subTest(locale=loc):
258+
era = nl_langinfo(locale.ERA)
259+
self.assertIsInstance(era, str)
260+
if era:
261+
self.assertEqual(era.count(':'), (era.count(';') + 1) * 5, era)
262+
263+
loc1 = loc.split('.', 1)[0]
264+
if loc1 in known_era:
265+
count, sample = known_era[loc1]
266+
if count:
267+
if not era:
268+
self.skipTest(f'ERA is not set for locale {loc!r} on this platform')
269+
self.assertGreaterEqual(era.count(';') + 1, count)
270+
self.assertIn(sample, era)
271+
else:
272+
self.assertEqual(era, '')
273+
tested = True
274+
if not tested:
275+
self.skipTest('no suitable locales')
276+
231277
def test_float_parsing(self):
232278
# Bug #1391872: Test whether float parsing is okay on European
233279
# locales.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
``locale.nl_langinfo(locale.ERA)`` now returns multiple era description
2+
segments separated by semicolons. Previously it only returned the first
3+
segment on platforms with Glibc.

Modules/_localemodule.c

+41-24
Original file line numberDiff line numberDiff line change
@@ -595,6 +595,37 @@ static struct langinfo_constant{
595595
{0, 0}
596596
};
597597

598+
#ifdef __GLIBC__
599+
#if defined(ALT_DIGITS) || defined(ERA)
600+
static PyObject *
601+
decode_strings(const char *result, size_t max_count)
602+
{
603+
/* Convert a sequence of NUL-separated C strings to a Python string
604+
* containing semicolon separated items. */
605+
size_t i = 0;
606+
size_t count = 0;
607+
for (; count < max_count && result[i]; count++) {
608+
i += strlen(result + i) + 1;
609+
}
610+
char *buf = PyMem_Malloc(i);
611+
if (buf == NULL) {
612+
PyErr_NoMemory();
613+
return NULL;
614+
}
615+
memcpy(buf, result, i);
616+
/* Replace all NULs with semicolons. */
617+
i = 0;
618+
while (--count) {
619+
i += strlen(buf + i);
620+
buf[i++] = ';';
621+
}
622+
PyObject *pyresult = PyUnicode_DecodeLocale(buf, NULL);
623+
PyMem_Free(buf);
624+
return pyresult;
625+
}
626+
#endif
627+
#endif
628+
598629
/*[clinic input]
599630
_locale.nl_langinfo
600631
@@ -620,32 +651,18 @@ _locale_nl_langinfo_impl(PyObject *module, int item)
620651
result = result != NULL ? result : "";
621652
PyObject *pyresult;
622653
#ifdef __GLIBC__
654+
/* According to the POSIX specification the result must be
655+
* a sequence of semicolon-separated strings.
656+
* But in Glibc they are NUL-separated. */
623657
#ifdef ALT_DIGITS
624658
if (item == ALT_DIGITS && *result) {
625-
/* According to the POSIX specification the result must be
626-
* a sequence of up to 100 semicolon-separated strings.
627-
* But in Glibc they are NUL-separated. */
628-
Py_ssize_t i = 0;
629-
int count = 0;
630-
for (; count < 100 && result[i]; count++) {
631-
i += strlen(result + i) + 1;
632-
}
633-
char *buf = PyMem_Malloc(i);
634-
if (buf == NULL) {
635-
PyErr_NoMemory();
636-
pyresult = NULL;
637-
}
638-
else {
639-
memcpy(buf, result, i);
640-
/* Replace all NULs with semicolons. */
641-
i = 0;
642-
while (--count) {
643-
i += strlen(buf + i);
644-
buf[i++] = ';';
645-
}
646-
pyresult = PyUnicode_DecodeLocale(buf, NULL);
647-
PyMem_Free(buf);
648-
}
659+
pyresult = decode_strings(result, 100);
660+
}
661+
else
662+
#endif
663+
#ifdef ERA
664+
if (item == ERA && *result) {
665+
pyresult = decode_strings(result, SIZE_MAX);
649666
}
650667
else
651668
#endif

0 commit comments

Comments
 (0)