Skip to content

Commit

Permalink
gh-106320: Move _PyUnicodeWriter to the internal C API (#106342)
Browse files Browse the repository at this point in the history
Move also _PyUnicode_FormatAdvancedWriter().

CJK codecs and multibytecodec.c now define the Py_BUILD_CORE_MODULE
macro.
  • Loading branch information
vstinner authored Jul 3, 2023
1 parent d65b783 commit 5ccbbe5
Show file tree
Hide file tree
Showing 8 changed files with 166 additions and 144 deletions.
139 changes: 0 additions & 139 deletions Include/cpython/unicodeobject.h
Original file line number Diff line number Diff line change
Expand Up @@ -480,131 +480,6 @@ PyAPI_FUNC(Py_UCS4) _PyUnicode_FindMaxChar (
Py_ssize_t start,
Py_ssize_t end);

/* --- _PyUnicodeWriter API ----------------------------------------------- */

typedef struct {
PyObject *buffer;
void *data;
int kind;
Py_UCS4 maxchar;
Py_ssize_t size;
Py_ssize_t pos;

/* minimum number of allocated characters (default: 0) */
Py_ssize_t min_length;

/* minimum character (default: 127, ASCII) */
Py_UCS4 min_char;

/* If non-zero, overallocate the buffer (default: 0). */
unsigned char overallocate;

/* If readonly is 1, buffer is a shared string (cannot be modified)
and size is set to 0. */
unsigned char readonly;
} _PyUnicodeWriter ;

/* Initialize a Unicode writer.
*
* By default, the minimum buffer size is 0 character and overallocation is
* disabled. Set min_length, min_char and overallocate attributes to control
* the allocation of the buffer. */
PyAPI_FUNC(void)
_PyUnicodeWriter_Init(_PyUnicodeWriter *writer);

/* Prepare the buffer to write 'length' characters
with the specified maximum character.
Return 0 on success, raise an exception and return -1 on error. */
#define _PyUnicodeWriter_Prepare(WRITER, LENGTH, MAXCHAR) \
(((MAXCHAR) <= (WRITER)->maxchar \
&& (LENGTH) <= (WRITER)->size - (WRITER)->pos) \
? 0 \
: (((LENGTH) == 0) \
? 0 \
: _PyUnicodeWriter_PrepareInternal((WRITER), (LENGTH), (MAXCHAR))))

/* Don't call this function directly, use the _PyUnicodeWriter_Prepare() macro
instead. */
PyAPI_FUNC(int)
_PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
Py_ssize_t length, Py_UCS4 maxchar);

/* Prepare the buffer to have at least the kind KIND.
For example, kind=PyUnicode_2BYTE_KIND ensures that the writer will
support characters in range U+000-U+FFFF.
Return 0 on success, raise an exception and return -1 on error. */
#define _PyUnicodeWriter_PrepareKind(WRITER, KIND) \
((KIND) <= (WRITER)->kind \
? 0 \
: _PyUnicodeWriter_PrepareKindInternal((WRITER), (KIND)))

/* Don't call this function directly, use the _PyUnicodeWriter_PrepareKind()
macro instead. */
PyAPI_FUNC(int)
_PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer,
int kind);

/* Append a Unicode character.
Return 0 on success, raise an exception and return -1 on error. */
PyAPI_FUNC(int)
_PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer,
Py_UCS4 ch
);

/* Append a Unicode string.
Return 0 on success, raise an exception and return -1 on error. */
PyAPI_FUNC(int)
_PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer,
PyObject *str /* Unicode string */
);

/* Append a substring of a Unicode string.
Return 0 on success, raise an exception and return -1 on error. */
PyAPI_FUNC(int)
_PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer,
PyObject *str, /* Unicode string */
Py_ssize_t start,
Py_ssize_t end
);

/* Append an ASCII-encoded byte string.
Return 0 on success, raise an exception and return -1 on error. */
PyAPI_FUNC(int)
_PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer,
const char *str, /* ASCII-encoded byte string */
Py_ssize_t len /* number of bytes, or -1 if unknown */
);

/* Append a latin1-encoded byte string.
Return 0 on success, raise an exception and return -1 on error. */
PyAPI_FUNC(int)
_PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer,
const char *str, /* latin1-encoded byte string */
Py_ssize_t len /* length in bytes */
);

/* Get the value of the writer as a Unicode string. Clear the
buffer of the writer. Raise an exception and return NULL
on error. */
PyAPI_FUNC(PyObject *)
_PyUnicodeWriter_Finish(_PyUnicodeWriter *writer);

/* Deallocate memory of a writer (clear its internal buffer). */
PyAPI_FUNC(void)
_PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer);


/* Format the object based on the format_spec, as defined in PEP 3101
(Advanced String Formatting). */
PyAPI_FUNC(int) _PyUnicode_FormatAdvancedWriter(
_PyUnicodeWriter *writer,
PyObject *obj,
PyObject *format_spec,
Py_ssize_t start,
Py_ssize_t end);

/* --- Manage the default encoding ---------------------------------------- */

/* Returns a pointer to the default encoding (UTF-8) of the
Expand Down Expand Up @@ -774,20 +649,6 @@ PyAPI_FUNC(PyObject *) _PyUnicode_XStrip(
PyObject *sepobj
);

/* Using explicit passed-in values, insert the thousands grouping
into the string pointed to by buffer. For the argument descriptions,
see Objects/stringlib/localeutil.h */
PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping(
_PyUnicodeWriter *writer,
Py_ssize_t n_buffer,
PyObject *digits,
Py_ssize_t d_pos,
Py_ssize_t n_digits,
Py_ssize_t min_width,
const char *grouping,
PyObject *thousands_sep,
Py_UCS4 *maxchar);

/* === Characters Type APIs =============================================== */

/* These should not be used directly. Use the Py_UNICODE_IS* and
Expand Down
2 changes: 2 additions & 0 deletions Include/internal/pycore_complexobject.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ extern "C" {
# error "this header requires Py_BUILD_CORE define"
#endif

#include "pycore_unicodeobject.h" // _PyUnicodeWriter

/* Operations on complex numbers from complexmodule.c */

PyAPI_FUNC(Py_complex) _Py_c_sum(Py_complex, Py_complex);
Expand Down
2 changes: 2 additions & 0 deletions Include/internal/pycore_floatobject.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ extern "C" {
#endif


#include "pycore_unicodeobject.h" // _PyUnicodeWriter

/* runtime lifecycle */

extern void _PyFloat_InitState(PyInterpreterState *);
Expand Down
145 changes: 143 additions & 2 deletions Include/internal/pycore_unicodeobject.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,148 @@ extern "C" {
void _PyUnicode_ExactDealloc(PyObject *op);
Py_ssize_t _PyUnicode_InternedSize(void);

/* runtime lifecycle */
/* --- _PyUnicodeWriter API ----------------------------------------------- */

typedef struct {
PyObject *buffer;
void *data;
int kind;
Py_UCS4 maxchar;
Py_ssize_t size;
Py_ssize_t pos;

/* minimum number of allocated characters (default: 0) */
Py_ssize_t min_length;

/* minimum character (default: 127, ASCII) */
Py_UCS4 min_char;

/* If non-zero, overallocate the buffer (default: 0). */
unsigned char overallocate;

/* If readonly is 1, buffer is a shared string (cannot be modified)
and size is set to 0. */
unsigned char readonly;
} _PyUnicodeWriter ;

/* Initialize a Unicode writer.
*
* By default, the minimum buffer size is 0 character and overallocation is
* disabled. Set min_length, min_char and overallocate attributes to control
* the allocation of the buffer. */
PyAPI_FUNC(void)
_PyUnicodeWriter_Init(_PyUnicodeWriter *writer);

/* Prepare the buffer to write 'length' characters
with the specified maximum character.
Return 0 on success, raise an exception and return -1 on error. */
#define _PyUnicodeWriter_Prepare(WRITER, LENGTH, MAXCHAR) \
(((MAXCHAR) <= (WRITER)->maxchar \
&& (LENGTH) <= (WRITER)->size - (WRITER)->pos) \
? 0 \
: (((LENGTH) == 0) \
? 0 \
: _PyUnicodeWriter_PrepareInternal((WRITER), (LENGTH), (MAXCHAR))))

/* Don't call this function directly, use the _PyUnicodeWriter_Prepare() macro
instead. */
PyAPI_FUNC(int)
_PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
Py_ssize_t length, Py_UCS4 maxchar);

/* Prepare the buffer to have at least the kind KIND.
For example, kind=PyUnicode_2BYTE_KIND ensures that the writer will
support characters in range U+000-U+FFFF.
Return 0 on success, raise an exception and return -1 on error. */
#define _PyUnicodeWriter_PrepareKind(WRITER, KIND) \
((KIND) <= (WRITER)->kind \
? 0 \
: _PyUnicodeWriter_PrepareKindInternal((WRITER), (KIND)))

/* Don't call this function directly, use the _PyUnicodeWriter_PrepareKind()
macro instead. */
PyAPI_FUNC(int)
_PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer,
int kind);

/* Append a Unicode character.
Return 0 on success, raise an exception and return -1 on error. */
PyAPI_FUNC(int)
_PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer,
Py_UCS4 ch
);

/* Append a Unicode string.
Return 0 on success, raise an exception and return -1 on error. */
PyAPI_FUNC(int)
_PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer,
PyObject *str /* Unicode string */
);

/* Append a substring of a Unicode string.
Return 0 on success, raise an exception and return -1 on error. */
PyAPI_FUNC(int)
_PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer,
PyObject *str, /* Unicode string */
Py_ssize_t start,
Py_ssize_t end
);

/* Append an ASCII-encoded byte string.
Return 0 on success, raise an exception and return -1 on error. */
PyAPI_FUNC(int)
_PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer,
const char *str, /* ASCII-encoded byte string */
Py_ssize_t len /* number of bytes, or -1 if unknown */
);

/* Append a latin1-encoded byte string.
Return 0 on success, raise an exception and return -1 on error. */
PyAPI_FUNC(int)
_PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer,
const char *str, /* latin1-encoded byte string */
Py_ssize_t len /* length in bytes */
);

/* Get the value of the writer as a Unicode string. Clear the
buffer of the writer. Raise an exception and return NULL
on error. */
PyAPI_FUNC(PyObject *)
_PyUnicodeWriter_Finish(_PyUnicodeWriter *writer);

/* Deallocate memory of a writer (clear its internal buffer). */
PyAPI_FUNC(void)
_PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer);


/* Format the object based on the format_spec, as defined in PEP 3101
(Advanced String Formatting). */
PyAPI_FUNC(int) _PyUnicode_FormatAdvancedWriter(
_PyUnicodeWriter *writer,
PyObject *obj,
PyObject *format_spec,
Py_ssize_t start,
Py_ssize_t end);

/* --- Methods & Slots ---------------------------------------------------- */

/* Using explicit passed-in values, insert the thousands grouping
into the string pointed to by buffer. For the argument descriptions,
see Objects/stringlib/localeutil.h */
PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping(
_PyUnicodeWriter *writer,
Py_ssize_t n_buffer,
PyObject *digits,
Py_ssize_t d_pos,
Py_ssize_t n_digits,
Py_ssize_t min_width,
const char *grouping,
PyObject *thousands_sep,
Py_UCS4 *maxchar);

/* --- Runtime lifecycle -------------------------------------------------- */

extern void _PyUnicode_InitState(PyInterpreterState *);
extern PyStatus _PyUnicode_InitGlobalObjects(PyInterpreterState *);
Expand All @@ -24,7 +165,7 @@ extern void _PyUnicode_FiniTypes(PyInterpreterState *);

extern PyTypeObject _PyUnicodeASCIIIter_Type;

/* other API */
/* --- Other API ---------------------------------------------------------- */

struct _Py_unicode_runtime_ids {
PyThread_type_lock lock;
Expand Down
4 changes: 4 additions & 0 deletions Modules/cjkcodecs/cjkcodecs.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@
#ifndef _CJKCODECS_H_
#define _CJKCODECS_H_

#ifndef Py_BUILD_CORE_BUILTIN
# define Py_BUILD_CORE_MODULE 1
#endif

#include "Python.h"
#include "multibytecodec.h"

Expand Down
4 changes: 4 additions & 0 deletions Modules/cjkcodecs/multibytecodec.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@
* Written by Hye-Shik Chang <perky@FreeBSD.org>
*/

#ifndef Py_BUILD_CORE_BUILTIN
# define Py_BUILD_CORE_MODULE 1
#endif

#include "Python.h"
#include "structmember.h" // PyMemberDef
#include "multibytecodec.h"
Expand Down
2 changes: 2 additions & 0 deletions Modules/cjkcodecs/multibytecodec.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
extern "C" {
#endif

#include "pycore_unicodeobject.h" // _PyUnicodeWriter

#ifdef uint16_t
typedef uint16_t ucs2_t, DBCHAR;
#else
Expand Down
12 changes: 9 additions & 3 deletions Tools/c-analyzer/c_parser/preprocessor/gcc.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,14 @@

from . import common as _common

# Modules/socketmodule.h uses pycore_time.h which needs the Py_BUILD_CORE
# macro. Usually it's defined by the C file which includes it.
# Other header files have a similar issue.
NEED_BUILD_CORE = {
'cjkcodecs.h',
'multibytecodec.h',
'socketmodule.h',
}

TOOL = 'gcc'

Expand Down Expand Up @@ -62,9 +70,7 @@ def preprocess(filename,
filename = _normpath(filename, cwd)

postargs = POST_ARGS
if os.path.basename(filename) == 'socketmodule.h':
# Modules/socketmodule.h uses pycore_time.h which needs Py_BUILD_CORE.
# Usually it's defined by the C file which includes it.
if os.path.basename(filename) in NEED_BUILD_CORE:
postargs += ('-DPy_BUILD_CORE=1',)

text = _common.preprocess(
Expand Down

0 comments on commit 5ccbbe5

Please sign in to comment.