Skip to content

Commit

Permalink
gh-103656: Transfer f-string buffers to parser to avoid use-after-free (
Browse files Browse the repository at this point in the history
GH-103896)

Co-authored-by: Pablo Galindo <pablogsal@gmail.com>
  • Loading branch information
lysnikolaou and pablogsal authored Apr 27, 2023
1 parent 76632b8 commit 9169a56
Show file tree
Hide file tree
Showing 9 changed files with 146 additions and 66 deletions.
11 changes: 5 additions & 6 deletions Grammar/python.gram
Original file line number Diff line number Diff line change
Expand Up @@ -881,14 +881,13 @@ fstring_middle[expr_ty]:
| fstring_replacement_field
| t=FSTRING_MIDDLE { _PyPegen_constant_from_token(p, t) }
fstring_replacement_field[expr_ty]:
| '{' a=(yield_expr | star_expressions) debug_expr="="? conversion=[fstring_conversion] format=[fstring_full_format_spec] '}' {
_PyPegen_formatted_value(p, a, debug_expr, conversion, format, EXTRA)
}
| '{' a=(yield_expr | star_expressions) debug_expr="="? conversion=[fstring_conversion] format=[fstring_full_format_spec] rbrace='}' {
_PyPegen_formatted_value(p, a, debug_expr, conversion, format, rbrace, EXTRA) }
| invalid_replacement_field
fstring_conversion[expr_ty]:
fstring_conversion[ResultTokenWithMetadata*]:
| conv_token="!" conv=NAME { _PyPegen_check_fstring_conversion(p, conv_token, conv) }
fstring_full_format_spec[expr_ty]:
| ':' spec=fstring_format_spec* { spec ? _PyAST_JoinedStr((asdl_expr_seq*)spec, EXTRA) : NULL }
fstring_full_format_spec[ResultTokenWithMetadata*]:
| colon=':' spec=fstring_format_spec* { _PyPegen_setup_full_format_spec(p, colon, (asdl_expr_seq *) spec, EXTRA) }
fstring_format_spec[expr_ty]:
| t=FSTRING_MIDDLE { _PyPegen_constant_from_token(p, t) }
| fstring_replacement_field
Expand Down
14 changes: 14 additions & 0 deletions Lib/test/test_fstring.py
Original file line number Diff line number Diff line change
Expand Up @@ -1535,5 +1535,19 @@ def test_not_closing_quotes(self):
self.assertAllRaise(SyntaxError, "unterminated triple-quoted f-string literal",
['f"""', "f'''"])

def test_syntax_error_after_debug(self):
self.assertAllRaise(SyntaxError, "f-string: expecting a valid expression after '{'",
[
"f'{1=}{;'",
"f'{1=}{+;'",
"f'{1=}{2}{;'",
"f'{1=}{3}{;'",
])
self.assertAllRaise(SyntaxError, "f-string: expecting '=', or '!', or ':', or '}'",
[
"f'{1=}{1;'",
"f'{1=}{1;}'",
])

if __name__ == '__main__':
unittest.main()
92 changes: 51 additions & 41 deletions Parser/action_helpers.c
Original file line number Diff line number Diff line change
Expand Up @@ -965,17 +965,43 @@ _PyPegen_check_legacy_stmt(Parser *p, expr_ty name) {
return 0;
}

expr_ty
_PyPegen_check_fstring_conversion(Parser *p, Token* symbol, expr_ty conv) {
if (symbol->lineno != conv->lineno || symbol->end_col_offset != conv->col_offset) {
static ResultTokenWithMetadata *
result_token_with_metadata(Parser *p, void *result, PyObject *metadata)
{
ResultTokenWithMetadata *res = _PyArena_Malloc(p->arena, sizeof(ResultTokenWithMetadata));
if (res == NULL) {
return NULL;
}
res->metadata = metadata;
res->result = result;
return res;
}

ResultTokenWithMetadata *
_PyPegen_check_fstring_conversion(Parser *p, Token* conv_token, expr_ty conv)
{
if (conv_token->lineno != conv->lineno || conv_token->end_col_offset != conv->col_offset) {
return RAISE_SYNTAX_ERROR_KNOWN_RANGE(
symbol, conv,
conv_token, conv,
"f-string: conversion type must come right after the exclamanation mark"
);
}
return conv;
return result_token_with_metadata(p, conv, conv_token->metadata);
}

ResultTokenWithMetadata *
_PyPegen_setup_full_format_spec(Parser *p, Token *colon, asdl_expr_seq *spec, int lineno, int col_offset,
int end_lineno, int end_col_offset, PyArena *arena)
{
if (!spec) {
return NULL;
}
expr_ty res = _PyAST_JoinedStr(spec, lineno, col_offset, end_lineno, end_col_offset, p->arena);
if (!res) {
return NULL;
}
return result_token_with_metadata(p, res, colon->metadata);
}

const char *
_PyPegen_get_expr_name(expr_ty e)
Expand Down Expand Up @@ -1197,27 +1223,6 @@ _PyPegen_nonparen_genexp_in_call(Parser *p, expr_ty args, asdl_comprehension_seq

// Fstring stuff

static expr_ty
decode_fstring_buffer(Parser *p, int lineno, int col_offset, int end_lineno,
int end_col_offset)
{
tokenizer_mode *tok_mode = &(p->tok->tok_mode_stack[p->tok->tok_mode_stack_index]);
assert(tok_mode->last_expr_buffer != NULL);
assert(tok_mode->last_expr_size >= 0 && tok_mode->last_expr_end >= 0);

PyObject *res = PyUnicode_DecodeUTF8(
tok_mode->last_expr_buffer,
tok_mode->last_expr_size - tok_mode->last_expr_end,
NULL
);
if (!res || _PyArena_AddPyObject(p->arena, res) < 0) {
Py_XDECREF(res);
return NULL;
}

return _PyAST_Constant(res, NULL, lineno, col_offset, end_lineno, end_col_offset, p->arena);
}

static expr_ty
_PyPegen_decode_fstring_part(Parser* p, int is_raw, expr_ty constant) {
assert(PyUnicode_CheckExact(constant->v.Constant.value));
Expand Down Expand Up @@ -1386,19 +1391,20 @@ expr_ty _PyPegen_constant_from_string(Parser* p, Token* tok) {
return _PyAST_Constant(s, kind, tok->lineno, tok->col_offset, tok->end_lineno, tok->end_col_offset, p->arena);
}

expr_ty _PyPegen_formatted_value(Parser *p, expr_ty expression, Token *debug, expr_ty conversion,
expr_ty format, int lineno, int col_offset, int end_lineno, int end_col_offset,
PyArena *arena) {
expr_ty _PyPegen_formatted_value(Parser *p, expr_ty expression, Token *debug, ResultTokenWithMetadata *conversion,
ResultTokenWithMetadata *format, Token *closing_brace, int lineno, int col_offset,
int end_lineno, int end_col_offset, PyArena *arena) {
int conversion_val = -1;
if (conversion != NULL) {
assert(conversion->kind == Name_kind);
Py_UCS4 first = PyUnicode_READ_CHAR(conversion->v.Name.id, 0);
expr_ty conversion_expr = (expr_ty) conversion->result;
assert(conversion_expr->kind == Name_kind);
Py_UCS4 first = PyUnicode_READ_CHAR(conversion_expr->v.Name.id, 0);

if (PyUnicode_GET_LENGTH(conversion->v.Name.id) > 1 ||
if (PyUnicode_GET_LENGTH(conversion_expr->v.Name.id) > 1 ||
!(first == 's' || first == 'r' || first == 'a')) {
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(conversion,
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(conversion_expr,
"f-string: invalid conversion character %R: expected 's', 'r', or 'a'",
conversion->v.Name.id);
conversion_expr->v.Name.id);
return NULL;
}

Expand All @@ -1410,30 +1416,34 @@ expr_ty _PyPegen_formatted_value(Parser *p, expr_ty expression, Token *debug, ex
}

expr_ty formatted_value = _PyAST_FormattedValue(
expression, conversion_val, format,
expression, conversion_val, format ? (expr_ty) format->result : NULL,
lineno, col_offset, end_lineno,
end_col_offset, arena
);

if (debug) {
/* Find the non whitespace token after the "=" */
int debug_end_line, debug_end_offset;
PyObject *debug_metadata;

if (conversion) {
debug_end_line = conversion->lineno;
debug_end_offset = conversion->col_offset;
debug_end_line = ((expr_ty) conversion->result)->lineno;
debug_end_offset = ((expr_ty) conversion->result)->col_offset;
debug_metadata = conversion->metadata;
}
else if (format) {
debug_end_line = format->lineno;
debug_end_offset = format->col_offset + 1; // HACK: ??
debug_end_line = ((expr_ty) format->result)->lineno;
debug_end_offset = ((expr_ty) format->result)->col_offset + 1;
debug_metadata = format->metadata;
}
else {
debug_end_line = end_lineno;
debug_end_offset = end_col_offset;
debug_metadata = closing_brace->metadata;
}

expr_ty debug_text = decode_fstring_buffer(p, lineno, col_offset + 1,
debug_end_line, debug_end_offset - 1);
expr_ty debug_text = _PyAST_Constant(debug_metadata, NULL, lineno, col_offset + 1, debug_end_line,
debug_end_offset - 1, p->arena);
if (!debug_text) {
return NULL;
}
Expand Down
24 changes: 12 additions & 12 deletions Parser/parser.c

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 17 additions & 3 deletions Parser/pegen.c
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,16 @@ initialize_token(Parser *p, Token *parser_token, struct token *new_token, int to
return -1;
}

parser_token->metadata = NULL;
if (new_token->metadata != NULL) {
if (_PyArena_AddPyObject(p->arena, new_token->metadata) < 0) {
Py_DECREF(parser_token->metadata);
return -1;
}
parser_token->metadata = new_token->metadata;
new_token->metadata = NULL;
}

parser_token->level = new_token->level;
parser_token->lineno = new_token->lineno;
parser_token->col_offset = p->tok->lineno == p->starting_lineno ? p->starting_col_offset + new_token->col_offset
Expand Down Expand Up @@ -198,6 +208,7 @@ int
_PyPegen_fill_token(Parser *p)
{
struct token new_token;
new_token.metadata = NULL;
int type = _PyTokenizer_Get(p->tok, &new_token);

// Record and skip '# type: ignore' comments
Expand All @@ -206,14 +217,14 @@ _PyPegen_fill_token(Parser *p)
char *tag = PyMem_Malloc(len + 1);
if (tag == NULL) {
PyErr_NoMemory();
return -1;
goto error;
}
strncpy(tag, new_token.start, len);
tag[len] = '\0';
// Ownership of tag passes to the growable array
if (!growable_comment_array_add(&p->type_ignore_comments, p->tok->lineno, tag)) {
PyErr_NoMemory();
return -1;
goto error;
}
type = _PyTokenizer_Get(p->tok, &new_token);
}
Expand All @@ -234,11 +245,14 @@ _PyPegen_fill_token(Parser *p)

// Check if we are at the limit of the token array capacity and resize if needed
if ((p->fill == p->size) && (_resize_tokens_array(p) != 0)) {
return -1;
goto error;
}

Token *t = p->tokens[p->fill];
return initialize_token(p, t, &new_token, type);
error:
Py_XDECREF(new_token.metadata);
return -1;
}

#if defined(Py_DEBUG)
Expand Down
13 changes: 11 additions & 2 deletions Parser/pegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ typedef struct {
int level;
int lineno, col_offset, end_lineno, end_col_offset;
Memo *memo;
PyObject *metadata;
} Token;

typedef struct {
Expand Down Expand Up @@ -118,6 +119,11 @@ typedef struct {
int is_keyword;
} KeywordOrStarred;

typedef struct {
void *result;
PyObject *metadata;
} ResultTokenWithMetadata;

// Internal parser functions
#if defined(Py_DEBUG)
void _PyPegen_clear_memo_statistics(void);
Expand Down Expand Up @@ -310,7 +316,8 @@ StarEtc *_PyPegen_star_etc(Parser *, arg_ty, asdl_seq *, arg_ty);
arguments_ty _PyPegen_make_arguments(Parser *, asdl_arg_seq *, SlashWithDefault *,
asdl_arg_seq *, asdl_seq *, StarEtc *);
arguments_ty _PyPegen_empty_arguments(Parser *);
expr_ty _PyPegen_formatted_value(Parser *, expr_ty, Token *, expr_ty, expr_ty, int, int, int, int, PyArena *);
expr_ty _PyPegen_formatted_value(Parser *, expr_ty, Token *, ResultTokenWithMetadata *, ResultTokenWithMetadata *, Token *,
int, int, int, int, PyArena *);
AugOperator *_PyPegen_augoperator(Parser*, operator_ty type);
stmt_ty _PyPegen_function_def_decorators(Parser *, asdl_expr_seq *, stmt_ty);
stmt_ty _PyPegen_class_def_decorators(Parser *, asdl_expr_seq *, stmt_ty);
Expand All @@ -329,7 +336,9 @@ expr_ty _PyPegen_ensure_real(Parser *p, expr_ty);
asdl_seq *_PyPegen_join_sequences(Parser *, asdl_seq *, asdl_seq *);
int _PyPegen_check_barry_as_flufl(Parser *, Token *);
int _PyPegen_check_legacy_stmt(Parser *p, expr_ty t);
expr_ty _PyPegen_check_fstring_conversion(Parser *p, Token *, expr_ty t);
ResultTokenWithMetadata *_PyPegen_check_fstring_conversion(Parser *p, Token *, expr_ty t);
ResultTokenWithMetadata *_PyPegen_setup_full_format_spec(Parser *, Token *, asdl_expr_seq *, int, int,
int, int, PyArena *);
mod_ty _PyPegen_make_module(Parser *, asdl_stmt_seq *);
void *_PyPegen_arguments_parsing_error(Parser *, expr_ty);
expr_ty _PyPegen_get_last_comprehension_item(comprehension_ty comprehension);
Expand Down
Loading

0 comments on commit 9169a56

Please sign in to comment.