Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bpo-45708: Support underscore separators when formatting Decimal objects #29438

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions Doc/library/decimal.rst
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,11 @@ Decimal objects
Underscores are allowed for grouping, as with integral and floating-point
literals in code.

.. versionchanged:: 3.11
The underscore grouping option in the :ref:`formatting mini-language
<formatspec>` is now supported for :class:`Decimal` objects:
``f"{Decimal(1234567):_}"`` gives ``'1_234_567'``.

Decimal floating point objects share many properties with the other built-in
numeric types such as :class:`float` and :class:`int`. All of the usual math
operations and special methods apply. Likewise, decimal objects can be
Expand Down
9 changes: 9 additions & 0 deletions Doc/whatsnew/3.11.rst
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,15 @@ New Modules
Improved Modules
================

decimal
-------

* Formatting of :class:`~decimal.Decimal` objects now supports underscores
for grouping, as outlined in :PEP:`515`. For example,
``f"{Decimal(1234567):_}"`` gives ``'1_234_567'``. Previously, only commas
were supported for grouping. (Contributed by Mark Dickinson in
:issue:`45708`.)

fractions
---------

Expand Down
2 changes: 1 addition & 1 deletion Lib/_pydecimal.py
Original file line number Diff line number Diff line change
Expand Up @@ -6154,7 +6154,7 @@ def _convert_for_comparison(self, other, equality_op=False):
(?P<alt>\#)?
(?P<zeropad>0)?
(?P<minimumwidth>(?!0)\d+)?
(?P<thousands_sep>,)?
(?P<thousands_sep>[_,])?
(?:\.(?P<precision>0|(?!0)\d+))?
(?P<type>[eEfFgGn%])?
\Z
Expand Down
34 changes: 33 additions & 1 deletion Lib/test/test_decimal.py
Original file line number Diff line number Diff line change
Expand Up @@ -1050,7 +1050,7 @@ def test_formatting(self):
('\x00>10', '1.2345', '\x00\x00\x00\x001.2345'),
('\x00<10', '1.2345', '1.2345\x00\x00\x00\x00'),

# thousands separator
# thousands separator: ','
(',', '1234567', '1,234,567'),
(',', '123456', '123,456'),
(',', '12345', '12,345'),
Expand Down Expand Up @@ -1082,6 +1082,38 @@ def test_formatting(self):
(',e', '123456', '1.23456e+5'),
(',E', '123456', '1.23456E+5'),

# thousands separator: '_'
('_', '1234567', '1_234_567'),
('_', '123456', '123_456'),
('_', '12345', '12_345'),
('_', '1234', '1_234'),
('_', '123', '123'),
('_', '12', '12'),
('_', '1', '1'),
('_', '0', '0'),
('_', '-1234567', '-1_234_567'),
('_', '-123456', '-123_456'),
('7_', '123456', '123_456'),
('8_', '123456', ' 123_456'),
('08_', '123456', '0_123_456'), # special case: extra 0 needed
('+08_', '123456', '+123_456'), # but not if there's a sign
(' 08_', '123456', ' 123_456'),
('08_', '-123456', '-123_456'),
('+09_', '123456', '+0_123_456'),
# ... with fractional part...
('07_', '1234.56', '1_234.56'),
('08_', '1234.56', '1_234.56'),
('09_', '1234.56', '01_234.56'),
('010_', '1234.56', '001_234.56'),
('011_', '1234.56', '0_001_234.56'),
('012_', '1234.56', '0_001_234.56'),
('08_.1f', '1234.5', '01_234.5'),
# no thousands separators in fraction part
('_', '1.23456789', '1.23456789'),
('_%', '123.456789', '12_345.6789%'),
('_e', '123456', '1.23456e+5'),
('_E', '123456', '1.23456E+5'),

# issue 6850
('a=-7.0', '0.12345', 'aaaa0.1'),

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Support underscores in :class:`~decimal.Decimal` object formatting: for
example, ``f"{Decimal(1234567):_}"`` now gives ``'1_234_567'``. (Previously,
this gave an "invalid format string" :exc:`ValueError`.)
241 changes: 238 additions & 3 deletions Modules/_decimal/_decimal.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#include "mpdecimal.h"

#include <stdlib.h>
#include <locale.h>

#include "docstrings.h"

Expand Down Expand Up @@ -3208,6 +3209,240 @@ dict_get_item_string(PyObject *dict, const char *key, PyObject **valueobj, const
return 0;
}

/* Mini-language format string parser.

This is a version of libmdpec's mpd_parse_fmt_str, adapted for CPython. It
currently differs from the libmpdec version in only one respect: it
supports the use of underscores for thousands separators. */

/* Copy a single UTF-8 char to dest. See: The Unicode Standard, version 5.2,
chapter 3.9: Well-formed UTF-8 byte sequences. */
static int
_mpd_copy_utf8(char dest[5], const char *s)
{
const unsigned char *cp = (const unsigned char *)s;
unsigned char lb, ub;
int count, i;


if (*cp == 0) {
/* empty string */
dest[0] = '\0';
return 0;
}
else if (*cp <= 0x7f) {
/* ascii */
dest[0] = *cp;
dest[1] = '\0';
return 1;
}
else if (0xc2 <= *cp && *cp <= 0xdf) {
lb = 0x80; ub = 0xbf;
count = 2;
}
else if (*cp == 0xe0) {
lb = 0xa0; ub = 0xbf;
count = 3;
}
else if (*cp <= 0xec) {
lb = 0x80; ub = 0xbf;
count = 3;
}
else if (*cp == 0xed) {
lb = 0x80; ub = 0x9f;
count = 3;
}
else if (*cp <= 0xef) {
lb = 0x80; ub = 0xbf;
count = 3;
}
else if (*cp == 0xf0) {
lb = 0x90; ub = 0xbf;
count = 4;
}
else if (*cp <= 0xf3) {
lb = 0x80; ub = 0xbf;
count = 4;
}
else if (*cp == 0xf4) {
lb = 0x80; ub = 0x8f;
count = 4;
}
else {
/* invalid */
goto error;
}

dest[0] = *cp++;
if (*cp < lb || ub < *cp) {
goto error;
}
dest[1] = *cp++;
for (i = 2; i < count; i++) {
if (*cp < 0x80 || 0xbf < *cp) {
goto error;
}
dest[i] = *cp++;
}
dest[i] = '\0';

return count;

error:
dest[0] = '\0';
return -1;
}

#if SIZE_MAX == MPD_SIZE_MAX
#define mpd_strtossize _mpd_strtossize
#else
#include <errno.h>

static inline mpd_ssize_t
mpd_strtossize(const char *s, char **end, int base)
{
int64_t retval;

errno = 0;
retval = _mpd_strtossize(s, end, base);
if (errno == 0 && (retval > MPD_SSIZE_MAX || retval < MPD_SSIZE_MIN)) {
errno = ERANGE;
}
if (errno == ERANGE) {
return (retval < 0) ? MPD_SSIZE_MIN : MPD_SSIZE_MAX;
}

return (mpd_ssize_t)retval;
}
#endif

static int
mpd_parse_fmt_str_ex(mpd_spec_t *spec, const char *fmt, int caps)
{
char *cp = (char *)fmt;
int have_align = 0, n;

/* defaults */
spec->min_width = 0;
spec->prec = -1;
spec->type = caps ? 'G' : 'g';
spec->align = '>';
spec->sign = '-';
spec->dot = "";
spec->sep = "";
spec->grouping = "";


/* presume that the first character is a UTF-8 fill character */
if ((n = _mpd_copy_utf8(spec->fill, cp)) < 0) {
return 0;
}

/* alignment directive, prefixed by a fill character */
if (*cp && (*(cp+n) == '<' || *(cp+n) == '>' ||
*(cp+n) == '=' || *(cp+n) == '^')) {
cp += n;
spec->align = *cp++;
have_align = 1;
} /* alignment directive */
else {
/* default fill character */
spec->fill[0] = ' ';
spec->fill[1] = '\0';
if (*cp == '<' || *cp == '>' ||
*cp == '=' || *cp == '^') {
spec->align = *cp++;
have_align = 1;
}
}

/* sign formatting */
if (*cp == '+' || *cp == '-' || *cp == ' ') {
spec->sign = *cp++;
}

/* zero padding */
if (*cp == '0') {
/* zero padding implies alignment, which should not be
* specified twice. */
if (have_align) {
return 0;
}
spec->align = 'z';
spec->fill[0] = *cp++;
spec->fill[1] = '\0';
}

/* minimum width */
if (isdigit((unsigned char)*cp)) {
if (*cp == '0') {
return 0;
}
errno = 0;
spec->min_width = mpd_strtossize(cp, &cp, 10);
if (errno == ERANGE || errno == EINVAL) {
return 0;
}
}

/* thousands separator */
if (*cp == ',') {
spec->dot = ".";
spec->sep = ",";
spec->grouping = "\003\003";
cp++;
}
else if (*cp == '_') {
spec->dot = ".";
spec->sep = "_";
spec->grouping = "\003\003";
cp++;
}

/* fraction digits or significant digits */
if (*cp == '.') {
cp++;
if (!isdigit((unsigned char)*cp)) {
return 0;
}
errno = 0;
spec->prec = mpd_strtossize(cp, &cp, 10);
if (errno == ERANGE || errno == EINVAL) {
return 0;
}
}

/* type */
if (*cp == 'E' || *cp == 'e' || *cp == 'F' || *cp == 'f' ||
*cp == 'G' || *cp == 'g' || *cp == '%') {
spec->type = *cp++;
}
else if (*cp == 'N' || *cp == 'n') {
/* locale specific conversion */
struct lconv *lc;
/* separator has already been specified */
if (*spec->sep) {
return 0;
}
spec->type = *cp++;
spec->type = (spec->type == 'N') ? 'G' : 'g';
lc = localeconv();
spec->dot = lc->decimal_point;
spec->sep = lc->thousands_sep;
spec->grouping = lc->grouping;
if (mpd_validate_lconv(spec) < 0) {
return 0; /* GCOV_NOT_REACHED */
}
}

/* check correctness */
if (*cp != '\0') {
return 0;
}

return 1;
}

/* Formatted representation of a PyDecObject. */
static PyObject *
dec_format(PyObject *dec, PyObject *args)
Expand Down Expand Up @@ -3239,7 +3474,7 @@ dec_format(PyObject *dec, PyObject *args)
}
if (size > 0 && fmt[0] == '\0') {
/* NUL fill character: must be replaced with a valid UTF-8 char
before calling mpd_parse_fmt_str(). */
before calling mpd_parse_fmt_str_ex(). */
replace_fillchar = 1;
fmt = dec_strdup(fmt, size);
if (fmt == NULL) {
Expand All @@ -3254,7 +3489,7 @@ dec_format(PyObject *dec, PyObject *args)
return NULL;
}

if (!mpd_parse_fmt_str(&spec, fmt, CtxCaps(context))) {
if (!mpd_parse_fmt_str_ex(&spec, fmt, CtxCaps(context))) {
PyErr_SetString(PyExc_ValueError,
"invalid format string");
goto finish;
Expand All @@ -3271,7 +3506,7 @@ dec_format(PyObject *dec, PyObject *args)
/* Values for decimal_point, thousands_sep and grouping can
be explicitly specified in the override dict. These values
take precedence over the values obtained from localeconv()
in mpd_parse_fmt_str(). The feature is not documented and
in mpd_parse_fmt_str_ex(). The feature is not documented and
is only used in test_decimal. */
if (!PyDict_Check(override)) {
PyErr_SetString(PyExc_TypeError,
Expand Down