Skip to content

Commit f39a07b

Browse files
authored
gh-87790: support thousands separators for formatting fractional part of floats (#125304)
```pycon >>> f"{123_456.123_456:_._f}" # Whole and fractional '123_456.123_456' >>> f"{123_456.123_456:_f}" # Integer component only '123_456.123456' >>> f"{123_456.123_456:._f}" # Fractional component only '123456.123_456' >>> f"{123_456.123_456:.4_f}" # with precision '123456.1_235' ```
1 parent fa6a814 commit f39a07b

File tree

9 files changed

+218
-45
lines changed

9 files changed

+218
-45
lines changed

Doc/library/string.rst

+23-3
Original file line numberDiff line numberDiff line change
@@ -319,14 +319,19 @@ non-empty format specification typically modifies the result.
319319
The general form of a *standard format specifier* is:
320320

321321
.. productionlist:: format-spec
322-
format_spec: [[`fill`]`align`][`sign`]["z"]["#"]["0"][`width`][`grouping_option`]["." `precision`][`type`]
322+
format_spec: [`options`][`width_and_precision`][`type`]
323+
options: [[`fill`]`align`][`sign`]["z"]["#"]["0"]
323324
fill: <any character>
324325
align: "<" | ">" | "=" | "^"
325326
sign: "+" | "-" | " "
327+
width_and_precision: [`width_with_grouping`][`precision_with_grouping`]
328+
width_with_grouping: [`width`][`grouping_option`]
329+
precision_with_grouping: "." [`precision`]`grouping_option`
326330
width: `~python-grammar:digit`+
327331
grouping_option: "_" | ","
328332
precision: `~python-grammar:digit`+
329-
type: "b" | "c" | "d" | "e" | "E" | "f" | "F" | "g" | "G" | "n" | "o" | "s" | "x" | "X" | "%"
333+
type: "b" | "c" | "d" | "e" | "E" | "f" | "F" | "g"
334+
: | "G" | "n" | "o" | "s" | "x" | "X" | "%"
330335

331336
If a valid *align* value is specified, it can be preceded by a *fill*
332337
character that can be any character and defaults to a space if omitted.
@@ -458,6 +463,13 @@ indicates the maximum field size - in other words, how many characters will be
458463
used from the field content. The *precision* is not allowed for integer
459464
presentation types.
460465

466+
The ``'_'`` or ``','`` option after *precision* means the use of an underscore
467+
or a comma for a thousands separator of the fractional part for floating-point
468+
presentation types.
469+
470+
.. versionchanged:: 3.14
471+
Support thousands separators for the fractional part.
472+
461473
Finally, the *type* determines how the data should be presented.
462474

463475
The available string presentation types are:
@@ -704,10 +716,18 @@ Replacing ``%x`` and ``%o`` and converting the value to different bases::
704716
>>> "int: {0:d}; hex: {0:#x}; oct: {0:#o}; bin: {0:#b}".format(42)
705717
'int: 42; hex: 0x2a; oct: 0o52; bin: 0b101010'
706718

707-
Using the comma as a thousands separator::
719+
Using the comma or the underscore as a thousands separator::
708720

709721
>>> '{:,}'.format(1234567890)
710722
'1,234,567,890'
723+
>>> '{:_}'.format(1234567890)
724+
'1_234_567_890'
725+
>>> '{:_}'.format(123456789.123456789)
726+
'123_456_789.12345679'
727+
>>> '{:._}'.format(123456789.123456789)
728+
'123456789.123_456_79'
729+
>>> '{:_._}'.format(123456789.123456789)
730+
'123_456_789.123_456_79'
711731

712732
Expressing a percentage::
713733

Doc/whatsnew/3.14.rst

+5
Original file line numberDiff line numberDiff line change
@@ -336,6 +336,11 @@ Other language changes
336336
making it a :term:`generic type`.
337337
(Contributed by Brian Schubert in :gh:`126012`.)
338338

339+
* Support underscore and comma as thousands separators in the fractional part
340+
for floating-point presentation types of the new-style string formatting
341+
(with :func:`format` or :ref:`f-strings`).
342+
(Contrubuted by Sergey B Kirpichev in :gh:`87790`.)
343+
339344
* ``\B`` in :mod:`regular expression <re>` now matches empty input string.
340345
Now it is always the opposite of ``\b``.
341346
(Contributed by Serhiy Storchaka in :gh:`124130`.)

Include/internal/pycore_unicodeobject.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -246,7 +246,8 @@ extern Py_ssize_t _PyUnicode_InsertThousandsGrouping(
246246
Py_ssize_t min_width,
247247
const char *grouping,
248248
PyObject *thousands_sep,
249-
Py_UCS4 *maxchar);
249+
Py_UCS4 *maxchar,
250+
int forward);
250251

251252
/* --- Misc functions ----------------------------------------------------- */
252253

Lib/test/test_float.py

+22
Original file line numberDiff line numberDiff line change
@@ -754,6 +754,28 @@ def test_format(self):
754754
self.assertEqual(format(INF, 'f'), 'inf')
755755
self.assertEqual(format(INF, 'F'), 'INF')
756756

757+
# thousands separators
758+
x = 123_456.123_456
759+
self.assertEqual(format(x, '_f'), '123_456.123456')
760+
self.assertEqual(format(x, ',f'), '123,456.123456')
761+
self.assertEqual(format(x, '._f'), '123456.123_456')
762+
self.assertEqual(format(x, '.,f'), '123456.123,456')
763+
self.assertEqual(format(x, '_._f'), '123_456.123_456')
764+
self.assertEqual(format(x, ',.,f'), '123,456.123,456')
765+
self.assertEqual(format(x, '.10_f'), '123456.123_456_000_0')
766+
self.assertEqual(format(x, '.10,f'), '123456.123,456,000,0')
767+
self.assertEqual(format(x, '>21._f'), ' 123456.123_456')
768+
self.assertEqual(format(x, '<21._f'), '123456.123_456 ')
769+
self.assertEqual(format(x, '+.11_e'), '+1.234_561_234_56e+05')
770+
self.assertEqual(format(x, '+.11,e'), '+1.234,561,234,56e+05')
771+
772+
self.assertRaises(ValueError, format, x, '._6f')
773+
self.assertRaises(ValueError, format, x, '.,_f')
774+
self.assertRaises(ValueError, format, x, '.6,_f')
775+
self.assertRaises(ValueError, format, x, '.6_,f')
776+
self.assertRaises(ValueError, format, x, '.6_n')
777+
self.assertRaises(ValueError, format, x, '.6,n')
778+
757779
@support.requires_IEEE_754
758780
def test_format_testfile(self):
759781
with open(format_testfile, encoding="utf-8") as testfile:

Lib/test/test_format.py

+4
Original file line numberDiff line numberDiff line change
@@ -515,11 +515,15 @@ def test_with_a_commas_and_an_underscore_in_format_specifier(self):
515515
error_msg = re.escape("Cannot specify both ',' and '_'.")
516516
with self.assertRaisesRegex(ValueError, error_msg):
517517
'{:,_}'.format(1)
518+
with self.assertRaisesRegex(ValueError, error_msg):
519+
'{:.,_f}'.format(1.1)
518520

519521
def test_with_an_underscore_and_a_comma_in_format_specifier(self):
520522
error_msg = re.escape("Cannot specify both ',' and '_'.")
521523
with self.assertRaisesRegex(ValueError, error_msg):
522524
'{:_,}'.format(1)
525+
with self.assertRaisesRegex(ValueError, error_msg):
526+
'{:._,f}'.format(1.1)
523527

524528
def test_better_error_message_format(self):
525529
# https://bugs.python.org/issue20524
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Support underscore and comma as thousands separators in the fractional part for
2+
floating-point presentation types of the new-style string formatting (with
3+
:func:`format` or :ref:`f-strings`). Patch by Sergey B Kirpichev.

Objects/stringlib/localeutil.h

+21-6
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ InsertThousandsGrouping_fill(_PyUnicodeWriter *writer, Py_ssize_t *buffer_pos,
4747
PyObject *digits, Py_ssize_t *digits_pos,
4848
Py_ssize_t n_chars, Py_ssize_t n_zeros,
4949
PyObject *thousands_sep, Py_ssize_t thousands_sep_len,
50-
Py_UCS4 *maxchar)
50+
Py_UCS4 *maxchar, int forward)
5151
{
5252
if (!writer) {
5353
/* if maxchar > 127, maxchar is already set */
@@ -59,24 +59,39 @@ InsertThousandsGrouping_fill(_PyUnicodeWriter *writer, Py_ssize_t *buffer_pos,
5959
}
6060

6161
if (thousands_sep) {
62-
*buffer_pos -= thousands_sep_len;
63-
62+
if (!forward) {
63+
*buffer_pos -= thousands_sep_len;
64+
}
6465
/* Copy the thousands_sep chars into the buffer. */
6566
_PyUnicode_FastCopyCharacters(writer->buffer, *buffer_pos,
6667
thousands_sep, 0,
6768
thousands_sep_len);
69+
if (forward) {
70+
*buffer_pos += thousands_sep_len;
71+
}
6872
}
6973

70-
*buffer_pos -= n_chars;
71-
*digits_pos -= n_chars;
74+
if (!forward) {
75+
*buffer_pos -= n_chars;
76+
*digits_pos -= n_chars;
77+
}
7278
_PyUnicode_FastCopyCharacters(writer->buffer, *buffer_pos,
7379
digits, *digits_pos,
7480
n_chars);
81+
if (forward) {
82+
*buffer_pos += n_chars;
83+
*digits_pos += n_chars;
84+
}
7585

7686
if (n_zeros) {
77-
*buffer_pos -= n_zeros;
87+
if (!forward) {
88+
*buffer_pos -= n_zeros;
89+
}
7890
int kind = PyUnicode_KIND(writer->buffer);
7991
void *data = PyUnicode_DATA(writer->buffer);
8092
unicode_fill(kind, data, '0', *buffer_pos, n_zeros);
93+
if (forward) {
94+
*buffer_pos += n_zeros;
95+
}
8196
}
8297
}

Objects/unicodeobject.c

+7-6
Original file line numberDiff line numberDiff line change
@@ -9772,7 +9772,8 @@ _PyUnicode_InsertThousandsGrouping(
97729772
Py_ssize_t min_width,
97739773
const char *grouping,
97749774
PyObject *thousands_sep,
9775-
Py_UCS4 *maxchar)
9775+
Py_UCS4 *maxchar,
9776+
int forward)
97769777
{
97779778
min_width = Py_MAX(0, min_width);
97789779
if (writer) {
@@ -9809,14 +9810,14 @@ _PyUnicode_InsertThousandsGrouping(
98099810
should be an empty string */
98109811
assert(!(grouping[0] == CHAR_MAX && thousands_sep_len != 0));
98119812

9812-
digits_pos = d_pos + n_digits;
9813+
digits_pos = d_pos + (forward ? 0 : n_digits);
98139814
if (writer) {
9814-
buffer_pos = writer->pos + n_buffer;
9815+
buffer_pos = writer->pos + (forward ? 0 : n_buffer);
98159816
assert(buffer_pos <= PyUnicode_GET_LENGTH(writer->buffer));
98169817
assert(digits_pos <= PyUnicode_GET_LENGTH(digits));
98179818
}
98189819
else {
9819-
buffer_pos = n_buffer;
9820+
buffer_pos = forward ? 0 : n_buffer;
98209821
}
98219822

98229823
if (!writer) {
@@ -9838,7 +9839,7 @@ _PyUnicode_InsertThousandsGrouping(
98389839
digits, &digits_pos,
98399840
n_chars, n_zeros,
98409841
use_separator ? thousands_sep : NULL,
9841-
thousands_sep_len, maxchar);
9842+
thousands_sep_len, maxchar, forward);
98429843

98439844
/* Use a separator next time. */
98449845
use_separator = 1;
@@ -9867,7 +9868,7 @@ _PyUnicode_InsertThousandsGrouping(
98679868
digits, &digits_pos,
98689869
n_chars, n_zeros,
98699870
use_separator ? thousands_sep : NULL,
9870-
thousands_sep_len, maxchar);
9871+
thousands_sep_len, maxchar, forward);
98719872
}
98729873
return count;
98739874
}

0 commit comments

Comments
 (0)