From 71949f3b0dfdacbddc0c012accefbb8e8ec759d4 Mon Sep 17 00:00:00 2001 From: Wang Xuerui Date: Mon, 16 May 2016 14:10:54 +0800 Subject: [PATCH 01/10] libfmt_macros: resolve all implicit refs while parsing --- src/libfmt_macros/lib.rs | 57 ++++++++++++++++++++++++++++++++-------- 1 file changed, 46 insertions(+), 11 deletions(-) diff --git a/src/libfmt_macros/lib.rs b/src/libfmt_macros/lib.rs index 07d26cddfdb87..983f92d9e19d8 100644 --- a/src/libfmt_macros/lib.rs +++ b/src/libfmt_macros/lib.rs @@ -144,6 +144,8 @@ pub struct Parser<'a> { cur: iter::Peekable>, /// Error messages accumulated during parsing pub errors: Vec, + /// Current position of implicit positional argument pointer + curarg: usize, } impl<'a> Iterator for Parser<'a> { @@ -186,6 +188,7 @@ impl<'a> Parser<'a> { input: s, cur: s.char_indices().peekable(), errors: vec![], + curarg: 0, } } @@ -259,9 +262,41 @@ impl<'a> Parser<'a> { /// Parses an Argument structure, or what's contained within braces inside /// the format string fn argument(&mut self) -> Argument<'a> { + let mut pos = self.position(); + let mut format = self.format(); + + // Resolve CountIsNextParam's into absolute references. + // Current argument's position must be known so this is done after + // format parsing. + // Curiously, currently {:.*} for named arguments is implemented, + // and it consumes a positional arg slot just like a positional {:.*} + // does. The current behavior is reproduced to prevent any + // incompatibilities. + match format.precision { + CountIsNextParam => { + // eat the current implicit arg + let i = self.curarg; + self.curarg += 1; + format.precision = CountIsParam(i); + } + _ => {} + } + + // Resolve ArgumentNext's into absolute references. + // This must come after count resolution because we may consume one + // more arg if precision is CountIsNextParam. + match pos { + ArgumentNext => { + let i = self.curarg; + self.curarg += 1; + pos = ArgumentIs(i); + } + _ => {} + } + Argument { - position: self.position(), - format: self.format(), + position: pos, + format: format, } } @@ -487,7 +522,7 @@ mod tests { fn format_nothing() { same("{}", &[NextArgument(Argument { - position: ArgumentNext, + position: ArgumentIs(0), format: fmtdflt(), })]); } @@ -565,7 +600,7 @@ mod tests { fn format_counts() { same("{:10s}", &[NextArgument(Argument { - position: ArgumentNext, + position: ArgumentIs(0), format: FormatSpec { fill: None, align: AlignUnknown, @@ -577,7 +612,7 @@ mod tests { })]); same("{:10$.10s}", &[NextArgument(Argument { - position: ArgumentNext, + position: ArgumentIs(0), format: FormatSpec { fill: None, align: AlignUnknown, @@ -589,19 +624,19 @@ mod tests { })]); same("{:.*s}", &[NextArgument(Argument { - position: ArgumentNext, + position: ArgumentIs(1), format: FormatSpec { fill: None, align: AlignUnknown, flags: 0, - precision: CountIsNextParam, + precision: CountIsParam(0), width: CountImplied, ty: "s", }, })]); same("{:.10$s}", &[NextArgument(Argument { - position: ArgumentNext, + position: ArgumentIs(0), format: FormatSpec { fill: None, align: AlignUnknown, @@ -613,7 +648,7 @@ mod tests { })]); same("{:a$.b$s}", &[NextArgument(Argument { - position: ArgumentNext, + position: ArgumentIs(0), format: FormatSpec { fill: None, align: AlignUnknown, @@ -628,7 +663,7 @@ mod tests { fn format_flags() { same("{:-}", &[NextArgument(Argument { - position: ArgumentNext, + position: ArgumentIs(0), format: FormatSpec { fill: None, align: AlignUnknown, @@ -640,7 +675,7 @@ mod tests { })]); same("{:+#}", &[NextArgument(Argument { - position: ArgumentNext, + position: ArgumentIs(0), format: FormatSpec { fill: None, align: AlignUnknown, From 06b034ae8b418beab7d0767a9a8d29023558d701 Mon Sep 17 00:00:00 2001 From: Wang Xuerui Date: Tue, 17 May 2016 01:02:42 +0800 Subject: [PATCH 02/10] format: remove all implicit ref handling outside of libfmt_macros format: beautifully get rid of ArgumentNext and CountIsNextParam Now that CountIsNextParam and ArgumentNext are resolved during parse, the need for handling them outside of libfmt_macros is obviated. Note: *one* instance of implicit reference handling still remains, and that's for implementing `all_args_simple`. It's trivial enough though, so in this case it may be tolerable. --- src/libfmt_macros/lib.rs | 59 +++++++++++++------------------- src/librustc_typeck/check/mod.rs | 2 +- src/libsyntax_ext/format.rs | 29 +++++++--------- 3 files changed, 36 insertions(+), 54 deletions(-) diff --git a/src/libfmt_macros/lib.rs b/src/libfmt_macros/lib.rs index 983f92d9e19d8..e7d401f0929fe 100644 --- a/src/libfmt_macros/lib.rs +++ b/src/libfmt_macros/lib.rs @@ -80,8 +80,6 @@ pub struct FormatSpec<'a> { /// Enum describing where an argument for a format can be located. #[derive(Copy, Clone, PartialEq)] pub enum Position<'a> { - /// The argument will be in the next position. This is the default. - ArgumentNext, /// The argument is located at a specific index. ArgumentIs(usize), /// The argument has a name. @@ -127,8 +125,6 @@ pub enum Count<'a> { CountIsName(&'a str), /// The count is specified by the argument at the given index. CountIsParam(usize), - /// The count is specified by the next parameter. - CountIsNextParam, /// The count is implied and cannot be explicitly specified. CountImplied, } @@ -262,37 +258,18 @@ impl<'a> Parser<'a> { /// Parses an Argument structure, or what's contained within braces inside /// the format string fn argument(&mut self) -> Argument<'a> { - let mut pos = self.position(); - let mut format = self.format(); - - // Resolve CountIsNextParam's into absolute references. - // Current argument's position must be known so this is done after - // format parsing. - // Curiously, currently {:.*} for named arguments is implemented, - // and it consumes a positional arg slot just like a positional {:.*} - // does. The current behavior is reproduced to prevent any - // incompatibilities. - match format.precision { - CountIsNextParam => { - // eat the current implicit arg - let i = self.curarg; - self.curarg += 1; - format.precision = CountIsParam(i); - } - _ => {} - } + let pos = self.position(); + let format = self.format(); - // Resolve ArgumentNext's into absolute references. - // This must come after count resolution because we may consume one - // more arg if precision is CountIsNextParam. - match pos { - ArgumentNext => { + // Resolve position after parsing format spec. + let pos = match pos { + Some(position) => position, + None => { let i = self.curarg; self.curarg += 1; - pos = ArgumentIs(i); + ArgumentIs(i) } - _ => {} - } + }; Argument { position: pos, @@ -302,13 +279,19 @@ impl<'a> Parser<'a> { /// Parses a positional argument for a format. This could either be an /// integer index of an argument, a named argument, or a blank string. - fn position(&mut self) -> Position<'a> { + /// Returns `Some(parsed_position)` if the position is not implicitly + /// consuming a macro argument, `None` if it's the case. + fn position(&mut self) -> Option> { if let Some(i) = self.integer() { - ArgumentIs(i) + Some(ArgumentIs(i)) } else { match self.cur.peek() { - Some(&(_, c)) if c.is_alphabetic() => ArgumentNamed(self.word()), - _ => ArgumentNext, + Some(&(_, c)) if c.is_alphabetic() => Some(ArgumentNamed(self.word())), + + // This is an `ArgumentNext`. + // Record the fact and do the resolution after parsing the + // format spec, to make things like `{:.*}` work. + _ => None, } } } @@ -375,7 +358,11 @@ impl<'a> Parser<'a> { } if self.consume('.') { if self.consume('*') { - spec.precision = CountIsNextParam; + // Resolve `CountIsNextParam`. + // We can do this immediately as `position` is resolved later. + let i = self.curarg; + self.curarg += 1; + spec.precision = CountIsParam(i); } else { spec.precision = self.count(); } diff --git a/src/librustc_typeck/check/mod.rs b/src/librustc_typeck/check/mod.rs index 8daa16180a905..fc1d2236f3fea 100644 --- a/src/librustc_typeck/check/mod.rs +++ b/src/librustc_typeck/check/mod.rs @@ -881,7 +881,7 @@ fn check_on_unimplemented<'a, 'tcx>(ccx: &CrateCtxt<'a, 'tcx>, } }, // `{:1}` and `{}` are not to be used - Position::ArgumentIs(_) | Position::ArgumentNext => { + Position::ArgumentIs(_) => { span_err!(ccx.tcx.sess, attr.span, E0231, "only named substitution \ parameters are allowed"); diff --git a/src/libsyntax_ext/format.rs b/src/libsyntax_ext/format.rs index dc572e652c671..c0150e5ce1d95 100644 --- a/src/libsyntax_ext/format.rs +++ b/src/libsyntax_ext/format.rs @@ -68,8 +68,10 @@ struct Context<'a, 'b:'a> { name_positions: HashMap, - /// Updated as arguments are consumed - next_arg: usize, + /// Current position of the implicit positional arg pointer, as if it + /// still existed in this phase of processing. + /// Used only for `all_pieces_simple` tracking in `trans_piece`. + curarg: usize, } /// Parses the arguments from the given list of tokens, returning None @@ -159,11 +161,6 @@ impl<'a, 'b> Context<'a, 'b> { // argument second, if it's an implicit positional parameter // it's written second, so it should come after width/precision. let pos = match arg.position { - parse::ArgumentNext => { - let i = self.next_arg; - self.next_arg += 1; - Exact(i) - } parse::ArgumentIs(i) => Exact(i), parse::ArgumentNamed(s) => Named(s.to_string()), }; @@ -183,11 +180,6 @@ impl<'a, 'b> Context<'a, 'b> { parse::CountIsName(s) => { self.verify_arg_type(Named(s.to_string()), Unsigned); } - parse::CountIsNextParam => { - let next_arg = self.next_arg; - self.verify_arg_type(Exact(next_arg), Unsigned); - self.next_arg += 1; - } } } @@ -309,7 +301,6 @@ impl<'a, 'b> Context<'a, 'b> { count("Param", Some(self.ecx.expr_usize(sp, i))) } parse::CountImplied => count("Implied", None), - parse::CountIsNextParam => count("NextParam", None), parse::CountIsName(n) => { let i = match self.name_positions.get(n) { Some(&i) => i, @@ -355,8 +346,6 @@ impl<'a, 'b> Context<'a, 'b> { } }; match arg.position { - // These two have a direct mapping - parse::ArgumentNext => pos("Next", None), parse::ArgumentIs(i) => pos("At", Some(i)), // Named arguments are converted to positional arguments @@ -373,7 +362,13 @@ impl<'a, 'b> Context<'a, 'b> { }; let simple_arg = parse::Argument { - position: parse::ArgumentNext, + position: { + // We don't have ArgumentNext any more, so we have to + // track the current argument ourselves. + let i = self.curarg; + self.curarg += 1; + parse::ArgumentIs(i) + }, format: parse::FormatSpec { fill: arg.format.fill, align: parse::AlignUnknown, @@ -640,7 +635,7 @@ pub fn expand_preparsed_format_args(ecx: &mut ExtCtxt, sp: Span, name_positions: HashMap::new(), name_types: HashMap::new(), name_ordering: name_ordering, - next_arg: 0, + curarg: 0, literal: String::new(), pieces: Vec::new(), str_pieces: Vec::new(), From 0e2a96321a368c62cf98e73c67507a9bcdded01e Mon Sep 17 00:00:00 2001 From: Wang Xuerui Date: Sun, 5 Jun 2016 18:01:37 +0800 Subject: [PATCH 03/10] syntax_ext: format: separate verification and translation of pieces --- src/libsyntax_ext/format.rs | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/libsyntax_ext/format.rs b/src/libsyntax_ext/format.rs index c0150e5ce1d95..8a329126ca85b 100644 --- a/src/libsyntax_ext/format.rs +++ b/src/libsyntax_ext/format.rs @@ -651,21 +651,27 @@ pub fn expand_preparsed_format_args(ecx: &mut ExtCtxt, sp: Span, }; let mut parser = parse::Parser::new(&fmt); + let mut pieces = vec![]; loop { match parser.next() { Some(piece) => { if !parser.errors.is_empty() { break } cx.verify_piece(&piece); - if let Some(piece) = cx.trans_piece(&piece) { - let s = cx.trans_literal_string(); - cx.str_pieces.push(s); - cx.pieces.push(piece); - } + pieces.push(piece); } None => break } } + + for piece in pieces { + if let Some(piece) = cx.trans_piece(&piece) { + let s = cx.trans_literal_string(); + cx.str_pieces.push(s); + cx.pieces.push(piece); + } + } + if !parser.errors.is_empty() { cx.ecx.span_err(cx.fmtsp, &format!("invalid format string: {}", parser.errors.remove(0))); From 1ace7f0d49f9e482efe680d691a9d1a8f7e53f60 Mon Sep 17 00:00:00 2001 From: Wang Xuerui Date: Sun, 5 Jun 2016 20:39:05 +0800 Subject: [PATCH 04/10] syntax_ext: format: resolve named arguments early Converts named argument references into indices, right after verification as suggested by @alexcrichton. This drastically simplifies the whole process! --- src/libsyntax_ext/format.rs | 147 +++++++++++++++--------------------- 1 file changed, 62 insertions(+), 85 deletions(-) diff --git a/src/libsyntax_ext/format.rs b/src/libsyntax_ext/format.rs index 8a329126ca85b..2635213f58e60 100644 --- a/src/libsyntax_ext/format.rs +++ b/src/libsyntax_ext/format.rs @@ -46,15 +46,12 @@ struct Context<'a, 'b:'a> { /// Parsed argument expressions and the types that we've found so far for /// them. + /// Named expressions are resolved early, and are appended to the end of + /// argument expressions. args: Vec>, arg_types: Vec>, - /// Parsed named expressions and the types that we've found for them so far. - /// Note that we keep a side-array of the ordering of the named arguments - /// found to be sure that we can translate them in the same order that they - /// were declared in. - names: HashMap>, - name_types: HashMap, - name_ordering: Vec, + /// Map from named arguments to their resolved indices. + names: HashMap, /// The latest consecutive literal strings, or empty if there weren't any. literal: String, @@ -66,8 +63,6 @@ struct Context<'a, 'b:'a> { /// Stays `true` if all formatting parameters are default (as in "{}{}"). all_pieces_simple: bool, - name_positions: HashMap, - /// Current position of the implicit positional arg pointer, as if it /// still existed in this phase of processing. /// Used only for `all_pieces_simple` tracking in `trans_piece`. @@ -80,15 +75,12 @@ struct Context<'a, 'b:'a> { /// /// If parsing succeeds, the return value is: /// ```ignore -/// Some((fmtstr, unnamed arguments, ordering of named arguments, -/// named arguments)) +/// Some((fmtstr, parsed arguments, index map for named arguments)) /// ``` fn parse_args(ecx: &mut ExtCtxt, sp: Span, tts: &[tokenstream::TokenTree]) - -> Option<(P, Vec>, Vec, - HashMap>)> { - let mut args = Vec::new(); - let mut names = HashMap::>::new(); - let mut order = Vec::new(); + -> Option<(P, Vec>, HashMap)> { + let mut args = Vec::>::new(); + let mut names = HashMap::::new(); let mut p = ecx.new_parser_from_tts(tts); @@ -132,20 +124,45 @@ fn parse_args(ecx: &mut ExtCtxt, sp: Span, tts: &[tokenstream::TokenTree]) ecx.struct_span_err(e.span, &format!("duplicate argument named `{}`", name)) - .span_note(prev.span, "previously here") + .span_note(args[*prev].span, "previously here") .emit(); continue; } - order.push(name.to_string()); - names.insert(name.to_string(), e); + + // Resolve names into slots early. + // Since all the positional args are already seen at this point + // if the input is valid, we can simply append to the positional + // args. And remember the names. + let slot = args.len(); + names.insert(name.to_string(), slot); + args.push(e); } else { args.push(panictry!(p.parse_expr())); } } - Some((fmtstr, args, order, names)) + Some((fmtstr, args, names)) } impl<'a, 'b> Context<'a, 'b> { + fn resolve_name_inplace(&self, p: &mut parse::Piece) { + let lookup = |s| *self.names.get(s).unwrap_or(&0); + + match *p { + parse::String(_) => {} + parse::NextArgument(ref mut arg) => { + if let parse::ArgumentNamed(s) = arg.position { + arg.position = parse::ArgumentIs(lookup(s)); + } + if let parse::CountIsName(s) = arg.format.width { + arg.format.width = parse::CountIsParam(lookup(s)); + } + if let parse::CountIsName(s) = arg.format.precision { + arg.format.precision = parse::CountIsParam(lookup(s)); + } + } + } + } + /// Verifies one piece of a parse string. All errors are not emitted as /// fatal so we can continue giving errors about this and possibly other /// format strings. @@ -214,24 +231,16 @@ impl<'a, 'b> Context<'a, 'b> { } Named(name) => { - let span = match self.names.get(&name) { - Some(e) => e.span, + let idx = match self.names.get(&name) { + Some(e) => *e, None => { let msg = format!("there is no argument named `{}`", name); self.ecx.span_err(self.fmtsp, &msg[..]); return; } }; - self.verify_same(span, &ty, self.name_types.get(&name)); - if !self.name_types.contains_key(&name) { - self.name_types.insert(name.clone(), ty); - } - // Assign this named argument a slot in the arguments array if - // it hasn't already been assigned a slot. - if !self.name_positions.contains_key(&name) { - let slot = self.name_positions.len(); - self.name_positions.insert(name, slot); - } + // Treat as positional arg. + self.verify_arg_type(Exact(idx), ty) } } } @@ -301,14 +310,8 @@ impl<'a, 'b> Context<'a, 'b> { count("Param", Some(self.ecx.expr_usize(sp, i))) } parse::CountImplied => count("Implied", None), - parse::CountIsName(n) => { - let i = match self.name_positions.get(n) { - Some(&i) => i, - None => 0, // error already emitted elsewhere - }; - let i = i + self.args.len(); - count("Param", Some(self.ecx.expr_usize(sp, i))) - } + // should never be the case, names are already resolved + parse::CountIsName(_) => panic!("should never happen"), } } @@ -348,16 +351,9 @@ impl<'a, 'b> Context<'a, 'b> { match arg.position { parse::ArgumentIs(i) => pos("At", Some(i)), - // Named arguments are converted to positional arguments - // at the end of the list of arguments - parse::ArgumentNamed(n) => { - let i = match self.name_positions.get(n) { - Some(&i) => i, - None => 0, // error already emitted elsewhere - }; - let i = i + self.args.len(); - pos("At", Some(i)) - } + // should never be the case, because names are already + // resolved. + parse::ArgumentNamed(_) => panic!("should never happen"), } }; @@ -448,7 +444,6 @@ impl<'a, 'b> Context<'a, 'b> { /// to fn into_expr(mut self) -> P { let mut locals = Vec::new(); - let mut names = vec![None; self.name_positions.len()]; let mut pats = Vec::new(); let mut heads = Vec::new(); @@ -485,29 +480,11 @@ impl<'a, 'b> Context<'a, 'b> { self.ecx.expr_ident(e.span, name))); heads.push(self.ecx.expr_addr_of(e.span, e)); } - for name in &self.name_ordering { - let e = match self.names.remove(name) { - Some(e) => e, - None => continue - }; - let arg_ty = match self.name_types.get(name) { - Some(ty) => ty, - None => continue - }; - - let lname = self.ecx.ident_of(&format!("__arg{}", - *name)); - pats.push(self.ecx.pat_ident(DUMMY_SP, lname)); - names[*self.name_positions.get(name).unwrap()] = - Some(Context::format_arg(self.ecx, self.macsp, e.span, arg_ty, - self.ecx.expr_ident(e.span, lname))); - heads.push(self.ecx.expr_addr_of(e.span, e)); - } // Now create a vector containing all the arguments - let args = locals.into_iter().chain(names.into_iter().map(|a| a.unwrap())); + let args = locals.into_iter().collect(); - let args_array = self.ecx.expr_vec(self.fmtsp, args.collect()); + let args_array = self.ecx.expr_vec(self.fmtsp, args); // Constructs an AST equivalent to: // @@ -605,9 +582,9 @@ pub fn expand_format_args<'cx>(ecx: &'cx mut ExtCtxt, sp: Span, -> Box { match parse_args(ecx, sp, tts) { - Some((efmt, args, order, names)) => { + Some((efmt, args, names)) => { MacEager::expr(expand_preparsed_format_args(ecx, sp, efmt, - args, order, names)) + args, names)) } None => DummyResult::expr(sp) } @@ -618,8 +595,7 @@ pub fn expand_format_args<'cx>(ecx: &'cx mut ExtCtxt, sp: Span, pub fn expand_preparsed_format_args(ecx: &mut ExtCtxt, sp: Span, efmt: P, args: Vec>, - name_ordering: Vec, - names: HashMap>) + names: HashMap) -> P { let arg_types: Vec<_> = (0..args.len()).map(|_| None).collect(); let macsp = ecx.call_site(); @@ -632,9 +608,6 @@ pub fn expand_preparsed_format_args(ecx: &mut ExtCtxt, sp: Span, args: args, arg_types: arg_types, names: names, - name_positions: HashMap::new(), - name_types: HashMap::new(), - name_ordering: name_ordering, curarg: 0, literal: String::new(), pieces: Vec::new(), @@ -655,9 +628,10 @@ pub fn expand_preparsed_format_args(ecx: &mut ExtCtxt, sp: Span, loop { match parser.next() { - Some(piece) => { + Some(mut piece) => { if !parser.errors.is_empty() { break } cx.verify_piece(&piece); + cx.resolve_name_inplace(&mut piece); pieces.push(piece); } None => break @@ -683,14 +657,17 @@ pub fn expand_preparsed_format_args(ecx: &mut ExtCtxt, sp: Span, } // Make sure that all arguments were used and all arguments have types. + let num_pos_args = cx.args.len() - cx.names.len(); for (i, ty) in cx.arg_types.iter().enumerate() { if ty.is_none() { - cx.ecx.span_err(cx.args[i].span, "argument never used"); - } - } - for (name, e) in &cx.names { - if !cx.name_types.contains_key(name) { - cx.ecx.span_err(e.span, "named argument never used"); + let msg = if i >= num_pos_args { + // named argument + "named argument never used" + } else { + // positional argument + "argument never used" + }; + cx.ecx.span_err(cx.args[i].span, msg); } } From eb5417bf12ea7f709f6603308d86b61a1fc845c5 Mon Sep 17 00:00:00 2001 From: Wang Xuerui Date: Sat, 21 May 2016 12:58:17 +0800 Subject: [PATCH 05/10] syntax_ext: format: rename variants of ArgumentType for clarity --- src/libsyntax_ext/format.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/libsyntax_ext/format.rs b/src/libsyntax_ext/format.rs index 2635213f58e60..9e6a83a1863b1 100644 --- a/src/libsyntax_ext/format.rs +++ b/src/libsyntax_ext/format.rs @@ -27,8 +27,8 @@ use std::collections::HashMap; #[derive(PartialEq)] enum ArgumentType { - Known(String), - Unsigned + Placeholder(String), + Count, } enum Position { @@ -182,7 +182,7 @@ impl<'a, 'b> Context<'a, 'b> { parse::ArgumentNamed(s) => Named(s.to_string()), }; - let ty = Known(arg.format.ty.to_string()); + let ty = Placeholder(arg.format.ty.to_string()); self.verify_arg_type(pos, ty); } } @@ -192,10 +192,10 @@ impl<'a, 'b> Context<'a, 'b> { match c { parse::CountImplied | parse::CountIs(..) => {} parse::CountIsParam(i) => { - self.verify_arg_type(Exact(i), Unsigned); + self.verify_arg_type(Exact(i), Count); } parse::CountIsName(s) => { - self.verify_arg_type(Named(s.to_string()), Unsigned); + self.verify_arg_type(Named(s.to_string()), Count); } } } @@ -545,7 +545,7 @@ impl<'a, 'b> Context<'a, 'b> { ty: &ArgumentType, arg: P) -> P { let trait_ = match *ty { - Known(ref tyname) => { + Placeholder(ref tyname) => { match &tyname[..] { "" => "Display", "?" => "Debug", @@ -564,7 +564,7 @@ impl<'a, 'b> Context<'a, 'b> { } } } - Unsigned => { + Count => { let path = ecx.std_path(&["fmt", "ArgumentV1", "from_usize"]); return ecx.expr_call_global(macsp, path, vec![arg]) } From 5e55a4411684a9cf3932b0597607cf82433ff3ba Mon Sep 17 00:00:00 2001 From: Wang Xuerui Date: Sat, 14 May 2016 20:42:47 +0800 Subject: [PATCH 06/10] syntax_ext: format: allow multiple formats for one argument This commit removed the restriction of only allowing one type per argument. This is achieved by adding mappings between macro arguments and format placeholders, then taking the mapping into consideration when emitting the Arguments expression. syntax_ext: format: fix implicit positional arguments syntax_ext: format: don't panic if no args given for implicit positional args Check the list lengths before use. Fixes regression of `compile-fail/macro-backtrace-println.rs`. syntax_ext: format: also map CountIsParam indices to expanded args syntax_ext: format: fix ICE in case of malformed format args --- src/libsyntax_ext/format.rs | 126 ++++++++++++-------------- src/test/compile-fail/ifmt-bad-arg.rs | 3 - 2 files changed, 57 insertions(+), 72 deletions(-) diff --git a/src/libsyntax_ext/format.rs b/src/libsyntax_ext/format.rs index 9e6a83a1863b1..e6a5ff1d44482 100644 --- a/src/libsyntax_ext/format.rs +++ b/src/libsyntax_ext/format.rs @@ -49,7 +49,7 @@ struct Context<'a, 'b:'a> { /// Named expressions are resolved early, and are appended to the end of /// argument expressions. args: Vec>, - arg_types: Vec>, + arg_types: Vec>, /// Map from named arguments to their resolved indices. names: HashMap, @@ -63,6 +63,13 @@ struct Context<'a, 'b:'a> { /// Stays `true` if all formatting parameters are default (as in "{}{}"). all_pieces_simple: bool, + /// Mapping between positional argument references and indices into the + /// final generated static argument array. We record the starting indices + /// corresponding to each positional argument, and number of references + /// consumed so far for each argument, to facilitate correct `Position` + /// mapping in `trans_piece`. + arg_index_map: Vec, + /// Current position of the implicit positional arg pointer, as if it /// still existed in this phase of processing. /// Used only for `all_pieces_simple` tracking in `trans_piece`. @@ -218,16 +225,7 @@ impl<'a, 'b> Context<'a, 'b> { self.ecx.span_err(self.fmtsp, &msg[..]); return; } - { - let arg_type = match self.arg_types[arg] { - None => None, - Some(ref x) => Some(x) - }; - self.verify_same(self.args[arg].span, &ty, arg_type); - } - if self.arg_types[arg].is_none() { - self.arg_types[arg] = Some(ty); - } + self.arg_types[arg].push(ty); } Named(name) => { @@ -245,48 +243,17 @@ impl<'a, 'b> Context<'a, 'b> { } } - /// When we're keeping track of the types that are declared for certain - /// arguments, we assume that `None` means we haven't seen this argument - /// yet, `Some(None)` means that we've seen the argument, but no format was - /// specified, and `Some(Some(x))` means that the argument was declared to - /// have type `x`. - /// - /// Obviously `Some(Some(x)) != Some(Some(y))`, but we consider it true - /// that: `Some(None) == Some(Some(x))` - fn verify_same(&self, - sp: Span, - ty: &ArgumentType, - before: Option<&ArgumentType>) { - let cur = match before { - None => return, - Some(t) => t, - }; - if *ty == *cur { - return - } - match (cur, ty) { - (&Known(ref cur), &Known(ref ty)) => { - self.ecx.span_err(sp, - &format!("argument redeclared with type `{}` when \ - it was previously `{}`", - *ty, - *cur)); - } - (&Known(ref cur), _) => { - self.ecx.span_err(sp, - &format!("argument used to format with `{}` was \ - attempted to not be used for formatting", - *cur)); - } - (_, &Known(ref ty)) => { - self.ecx.span_err(sp, - &format!("argument previously used as a format \ - argument attempted to be used as `{}`", - *ty)); - } - (_, _) => { - self.ecx.span_err(sp, "argument declared with multiple formats"); - } + // NOTE: Keep the ordering the same as `into_expr`'s expansion would do! + fn build_index_map(&mut self) { + let args_len = self.args.len(); + self.arg_index_map.reserve(args_len); + + let mut sofar = 0usize; + + // Generate mapping for positional args + for i in 0..args_len { + self.arg_index_map.push(sofar); + sofar += self.arg_types[i].len(); } } @@ -294,7 +261,9 @@ impl<'a, 'b> Context<'a, 'b> { ecx.std_path(&["fmt", "rt", "v1", s]) } - fn trans_count(&self, c: parse::Count) -> P { + fn trans_count(&self, + c: parse::Count, + arg_index_consumed: &mut Vec) -> P { let sp = self.macsp; let count = |c, arg| { let mut path = Context::rtpath(self.ecx, "Count"); @@ -307,7 +276,11 @@ impl<'a, 'b> Context<'a, 'b> { match c { parse::CountIs(i) => count("Is", Some(self.ecx.expr_usize(sp, i))), parse::CountIsParam(i) => { - count("Param", Some(self.ecx.expr_usize(sp, i))) + // This needs mapping too, as `i` is referring to a macro + // argument. + let arg_idx = self.arg_index_map[i] + arg_index_consumed[i]; + arg_index_consumed[i] += 1; + count("Param", Some(self.ecx.expr_usize(sp, arg_idx))) } parse::CountImplied => count("Implied", None), // should never be the case, names are already resolved @@ -325,7 +298,10 @@ impl<'a, 'b> Context<'a, 'b> { /// Translate a `parse::Piece` to a static `rt::Argument` or append /// to the `literal` string. - fn trans_piece(&mut self, piece: &parse::Piece) -> Option> { + fn trans_piece(&mut self, + piece: &parse::Piece, + arg_index_consumed: &mut Vec) + -> Option> { let sp = self.macsp; match *piece { parse::String(s) => { @@ -349,7 +325,18 @@ impl<'a, 'b> Context<'a, 'b> { } }; match arg.position { - parse::ArgumentIs(i) => pos("At", Some(i)), + parse::ArgumentIs(i) => { + // Map to index in final generated argument array + // in case of multiple types specified + let arg_idx = if self.args.len() > i { + let arg_idx = self.arg_index_map[i] + arg_index_consumed[i]; + arg_index_consumed[i] += 1; + arg_idx + } else { + 0 // error already emitted elsewhere + }; + pos("At", Some(arg_idx)) + } // should never be the case, because names are already // resolved. @@ -396,8 +383,8 @@ impl<'a, 'b> Context<'a, 'b> { }; let align = self.ecx.expr_path(align); let flags = self.ecx.expr_u32(sp, arg.format.flags); - let prec = self.trans_count(arg.format.precision); - let width = self.trans_count(arg.format.width); + let prec = self.trans_count(arg.format.precision, arg_index_consumed); + let width = self.trans_count(arg.format.width, arg_index_consumed); let path = self.ecx.path_global(sp, Context::rtpath(self.ecx, "FormatSpec")); let fmt = self.ecx.expr_struct(sp, path, vec!( self.ecx.field_imm(sp, self.ecx.ident_of("fill"), fill), @@ -469,15 +456,12 @@ impl<'a, 'b> Context<'a, 'b> { // of each variable because we don't want to move out of the arguments // passed to this function. for (i, e) in self.args.into_iter().enumerate() { - let arg_ty = match self.arg_types[i].as_ref() { - Some(ty) => ty, - None => continue // error already generated - }; - let name = self.ecx.ident_of(&format!("__arg{}", i)); pats.push(self.ecx.pat_ident(DUMMY_SP, name)); - locals.push(Context::format_arg(self.ecx, self.macsp, e.span, arg_ty, - self.ecx.expr_ident(e.span, name))); + for ref arg_ty in self.arg_types[i].iter() { + locals.push(Context::format_arg(self.ecx, self.macsp, e.span, arg_ty, + self.ecx.expr_ident(e.span, name))); + } heads.push(self.ecx.expr_addr_of(e.span, e)); } @@ -597,7 +581,7 @@ pub fn expand_preparsed_format_args(ecx: &mut ExtCtxt, sp: Span, args: Vec>, names: HashMap) -> P { - let arg_types: Vec<_> = (0..args.len()).map(|_| None).collect(); + let arg_types: Vec<_> = (0..args.len()).map(|_| Vec::new()).collect(); let macsp = ecx.call_site(); // Expand the format literal so that efmt.span will have a backtrace. This // is essential for locating a bug when the format literal is generated in @@ -609,6 +593,7 @@ pub fn expand_preparsed_format_args(ecx: &mut ExtCtxt, sp: Span, arg_types: arg_types, names: names, curarg: 0, + arg_index_map: Vec::new(), literal: String::new(), pieces: Vec::new(), str_pieces: Vec::new(), @@ -638,8 +623,11 @@ pub fn expand_preparsed_format_args(ecx: &mut ExtCtxt, sp: Span, } } + cx.build_index_map(); + + let mut arg_index_consumed = vec![0usize; cx.arg_index_map.len()]; for piece in pieces { - if let Some(piece) = cx.trans_piece(&piece) { + if let Some(piece) = cx.trans_piece(&piece, &mut arg_index_consumed) { let s = cx.trans_literal_string(); cx.str_pieces.push(s); cx.pieces.push(piece); @@ -659,7 +647,7 @@ pub fn expand_preparsed_format_args(ecx: &mut ExtCtxt, sp: Span, // Make sure that all arguments were used and all arguments have types. let num_pos_args = cx.args.len() - cx.names.len(); for (i, ty) in cx.arg_types.iter().enumerate() { - if ty.is_none() { + if ty.len() == 0 { let msg = if i >= num_pos_args { // named argument "named argument never used" diff --git a/src/test/compile-fail/ifmt-bad-arg.rs b/src/test/compile-fail/ifmt-bad-arg.rs index 1368702b160bf..272ad980feb46 100644 --- a/src/test/compile-fail/ifmt-bad-arg.rs +++ b/src/test/compile-fail/ifmt-bad-arg.rs @@ -23,9 +23,6 @@ fn main() { format!("{foo}", 1, foo=2); //~ ERROR: argument never used format!("", foo=2); //~ ERROR: named argument never used - format!("{0:x} {0:X}", 1); //~ ERROR: redeclared with type `X` - format!("{foo:x} {foo:X}", foo=1); //~ ERROR: redeclared with type `X` - format!("{foo}", foo=1, foo=2); //~ ERROR: duplicate argument format!("", foo=1, 2); //~ ERROR: positional arguments cannot follow From f457e6c3e0724ba3a7d37460ea7088cf67a6af4b Mon Sep 17 00:00:00 2001 From: Wang Xuerui Date: Sat, 21 May 2016 16:00:01 +0800 Subject: [PATCH 07/10] syntax_ext: format: process counts uniquely and separately --- src/libsyntax_ext/format.rs | 73 +++++++++++++++++++++++++++++++------ 1 file changed, 62 insertions(+), 11 deletions(-) diff --git a/src/libsyntax_ext/format.rs b/src/libsyntax_ext/format.rs index e6a5ff1d44482..606840cd1cb8e 100644 --- a/src/libsyntax_ext/format.rs +++ b/src/libsyntax_ext/format.rs @@ -24,6 +24,7 @@ use syntax_pos::{Span, DUMMY_SP}; use syntax::tokenstream; use std::collections::HashMap; +use std::collections::hash_map::Entry; #[derive(PartialEq)] enum ArgumentType { @@ -70,6 +71,12 @@ struct Context<'a, 'b:'a> { /// mapping in `trans_piece`. arg_index_map: Vec, + count_args_index_offset: usize, + + count_args: Vec, + count_positions: HashMap, + count_positions_count: usize, + /// Current position of the implicit positional arg pointer, as if it /// still existed in this phase of processing. /// Used only for `all_pieces_simple` tracking in `trans_piece`. @@ -225,7 +232,22 @@ impl<'a, 'b> Context<'a, 'b> { self.ecx.span_err(self.fmtsp, &msg[..]); return; } - self.arg_types[arg].push(ty); + match ty { + Placeholder(_) => { + self.arg_types[arg].push(ty); + } + Count => { + match self.count_positions.entry(arg) { + Entry::Vacant(e) => { + let i = self.count_positions_count; + e.insert(i); + self.count_args.push(Exact(arg)); + self.count_positions_count += 1; + } + Entry::Occupied(_) => {} + } + } + } } Named(name) => { @@ -255,15 +277,17 @@ impl<'a, 'b> Context<'a, 'b> { self.arg_index_map.push(sofar); sofar += self.arg_types[i].len(); } + + // Record starting index for counts, which appear just + // after the positional args + self.count_args_index_offset = sofar; } fn rtpath(ecx: &ExtCtxt, s: &str) -> Vec { ecx.std_path(&["fmt", "rt", "v1", s]) } - fn trans_count(&self, - c: parse::Count, - arg_index_consumed: &mut Vec) -> P { + fn trans_count(&self, c: parse::Count) -> P { let sp = self.macsp; let count = |c, arg| { let mut path = Context::rtpath(self.ecx, "Count"); @@ -278,9 +302,12 @@ impl<'a, 'b> Context<'a, 'b> { parse::CountIsParam(i) => { // This needs mapping too, as `i` is referring to a macro // argument. - let arg_idx = self.arg_index_map[i] + arg_index_consumed[i]; - arg_index_consumed[i] += 1; - count("Param", Some(self.ecx.expr_usize(sp, arg_idx))) + let i = match self.count_positions.get(&i) { + Some(&i) => i, + None => 0, // error already emitted elsewhere + }; + let i = i + self.count_args_index_offset; + count("Param", Some(self.ecx.expr_usize(sp, i))) } parse::CountImplied => count("Implied", None), // should never be the case, names are already resolved @@ -383,8 +410,8 @@ impl<'a, 'b> Context<'a, 'b> { }; let align = self.ecx.expr_path(align); let flags = self.ecx.expr_u32(sp, arg.format.flags); - let prec = self.trans_count(arg.format.precision, arg_index_consumed); - let width = self.trans_count(arg.format.width, arg_index_consumed); + let prec = self.trans_count(arg.format.precision); + let width = self.trans_count(arg.format.width); let path = self.ecx.path_global(sp, Context::rtpath(self.ecx, "FormatSpec")); let fmt = self.ecx.expr_struct(sp, path, vec!( self.ecx.field_imm(sp, self.ecx.ident_of("fill"), fill), @@ -431,6 +458,7 @@ impl<'a, 'b> Context<'a, 'b> { /// to fn into_expr(mut self) -> P { let mut locals = Vec::new(); + let mut counts = Vec::new(); let mut pats = Vec::new(); let mut heads = Vec::new(); @@ -447,6 +475,10 @@ impl<'a, 'b> Context<'a, 'b> { piece_ty, self.str_pieces); + // Before consuming the expressions, we have to remember spans for + // count arguments as they are now generated separate from other + // arguments, hence have no access to the `P`'s. + let spans_pos: Vec<_> = self.args.iter().map(|e| e.span.clone()).collect(); // Right now there is a bug such that for the expression: // foo(bar(&1)) @@ -464,11 +496,23 @@ impl<'a, 'b> Context<'a, 'b> { } heads.push(self.ecx.expr_addr_of(e.span, e)); } + for pos in self.count_args { + let name = self.ecx.ident_of(&match pos { + Exact(i) => format!("__arg{}", i), + _ => panic!("should never happen"), + }); + let span = match pos { + Exact(i) => spans_pos[i], + _ => panic!("should never happen"), + }; + counts.push(Context::format_arg(self.ecx, self.macsp, span, &Count, + self.ecx.expr_ident(span, name))); + } // Now create a vector containing all the arguments - let args = locals.into_iter().collect(); + let args = locals.into_iter().chain(counts.into_iter()); - let args_array = self.ecx.expr_vec(self.fmtsp, args); + let args_array = self.ecx.expr_vec(self.fmtsp, args.collect()); // Constructs an AST equivalent to: // @@ -594,6 +638,10 @@ pub fn expand_preparsed_format_args(ecx: &mut ExtCtxt, sp: Span, names: names, curarg: 0, arg_index_map: Vec::new(), + count_args: Vec::new(), + count_positions: HashMap::new(), + count_positions_count: 0, + count_args_index_offset: 0, literal: String::new(), pieces: Vec::new(), str_pieces: Vec::new(), @@ -648,6 +696,9 @@ pub fn expand_preparsed_format_args(ecx: &mut ExtCtxt, sp: Span, let num_pos_args = cx.args.len() - cx.names.len(); for (i, ty) in cx.arg_types.iter().enumerate() { if ty.len() == 0 { + if cx.count_positions.contains_key(&i) { + continue; + } let msg = if i >= num_pos_args { // named argument "named argument never used" From 718099435bfc1b76650f3cbdc78d334a16d99176 Mon Sep 17 00:00:00 2001 From: Wang Xuerui Date: Thu, 2 Jun 2016 18:16:24 +0800 Subject: [PATCH 08/10] syntax_ext: format: de-duplicate argument objects --- src/libsyntax_ext/format.rs | 43 ++++++++++++++++++++++++++----------- 1 file changed, 31 insertions(+), 12 deletions(-) diff --git a/src/libsyntax_ext/format.rs b/src/libsyntax_ext/format.rs index 606840cd1cb8e..916a2aa572ccb 100644 --- a/src/libsyntax_ext/format.rs +++ b/src/libsyntax_ext/format.rs @@ -50,7 +50,8 @@ struct Context<'a, 'b:'a> { /// Named expressions are resolved early, and are appended to the end of /// argument expressions. args: Vec>, - arg_types: Vec>, + arg_types: Vec>, + arg_unique_types: Vec>, /// Map from named arguments to their resolved indices. names: HashMap, @@ -69,7 +70,7 @@ struct Context<'a, 'b:'a> { /// corresponding to each positional argument, and number of references /// consumed so far for each argument, to facilitate correct `Position` /// mapping in `trans_piece`. - arg_index_map: Vec, + arg_index_map: Vec>, count_args_index_offset: usize, @@ -234,7 +235,17 @@ impl<'a, 'b> Context<'a, 'b> { } match ty { Placeholder(_) => { - self.arg_types[arg].push(ty); + // record every (position, type) combination only once + let ref mut seen_ty = self.arg_unique_types[arg]; + let i = match seen_ty.iter().position(|x| *x == ty) { + Some(i) => i, + None => { + let i = seen_ty.len(); + seen_ty.push(ty); + i + } + }; + self.arg_types[arg].push(i); } Count => { match self.count_positions.entry(arg) { @@ -274,8 +285,13 @@ impl<'a, 'b> Context<'a, 'b> { // Generate mapping for positional args for i in 0..args_len { - self.arg_index_map.push(sofar); - sofar += self.arg_types[i].len(); + let ref arg_types = self.arg_types[i]; + let mut arg_offsets = Vec::with_capacity(arg_types.len()); + for offset in arg_types { + arg_offsets.push(sofar + *offset); + } + self.arg_index_map.push(arg_offsets); + sofar += self.arg_unique_types[i].len(); } // Record starting index for counts, which appear just @@ -355,12 +371,13 @@ impl<'a, 'b> Context<'a, 'b> { parse::ArgumentIs(i) => { // Map to index in final generated argument array // in case of multiple types specified - let arg_idx = if self.args.len() > i { - let arg_idx = self.arg_index_map[i] + arg_index_consumed[i]; - arg_index_consumed[i] += 1; - arg_idx - } else { - 0 // error already emitted elsewhere + let arg_idx = match arg_index_consumed.get_mut(i) { + None => 0, // error already emitted elsewhere + Some(offset) => { + let arg_idx = self.arg_index_map[i][*offset]; + *offset += 1; + arg_idx + } }; pos("At", Some(arg_idx)) } @@ -490,7 +507,7 @@ impl<'a, 'b> Context<'a, 'b> { for (i, e) in self.args.into_iter().enumerate() { let name = self.ecx.ident_of(&format!("__arg{}", i)); pats.push(self.ecx.pat_ident(DUMMY_SP, name)); - for ref arg_ty in self.arg_types[i].iter() { + for ref arg_ty in self.arg_unique_types[i].iter() { locals.push(Context::format_arg(self.ecx, self.macsp, e.span, arg_ty, self.ecx.expr_ident(e.span, name))); } @@ -626,6 +643,7 @@ pub fn expand_preparsed_format_args(ecx: &mut ExtCtxt, sp: Span, names: HashMap) -> P { let arg_types: Vec<_> = (0..args.len()).map(|_| Vec::new()).collect(); + let arg_unique_types: Vec<_> = (0..args.len()).map(|_| Vec::new()).collect(); let macsp = ecx.call_site(); // Expand the format literal so that efmt.span will have a backtrace. This // is essential for locating a bug when the format literal is generated in @@ -635,6 +653,7 @@ pub fn expand_preparsed_format_args(ecx: &mut ExtCtxt, sp: Span, ecx: ecx, args: args, arg_types: arg_types, + arg_unique_types: arg_unique_types, names: names, curarg: 0, arg_index_map: Vec::new(), From 03563b12e8705f7d8fedf6e1bc48f2bcf22c20bf Mon Sep 17 00:00:00 2001 From: Wang Xuerui Date: Sat, 21 May 2016 23:08:30 +0800 Subject: [PATCH 09/10] format: add tests for ergonomic format_args! format: workaround pretty-printer to pass tests --- src/test/run-pass/ifmt.rs | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/src/test/run-pass/ifmt.rs b/src/test/run-pass/ifmt.rs index 27cafeacc203d..0a69ccf47dd9f 100644 --- a/src/test/run-pass/ifmt.rs +++ b/src/test/run-pass/ifmt.rs @@ -163,6 +163,24 @@ pub fn main() { t!(format!("{:?}", 0.0), "0"); + // Ergonomic format_args! + t!(format!("{0:x} {0:X}", 15), "f F"); + t!(format!("{0:x} {0:X} {}", 15), "f F 15"); + // NOTE: For now the longer test cases must not be followed immediately by + // >1 empty lines, or the pretty printer will break. Since no one wants to + // touch the current pretty printer (#751), we have no choice but to work + // around it. Some of the following test cases are also affected. + t!(format!("{:x}{0:X}{a:x}{:X}{1:x}{a:X}", 13, 14, a=15), "dDfEeF"); + t!(format!("{a:x} {a:X}", a=15), "f F"); + + // And its edge cases + t!(format!("{a:.0$} {b:.0$} {0:.0$}\n{a:.c$} {b:.c$} {c:.c$}", + 4, a="abcdefg", b="hijklmn", c=3), + "abcd hijk 4\nabc hij 3"); + t!(format!("{a:.*} {0} {:.*}", 4, 3, "efgh", a="abcdef"), "abcd 4 efg"); + t!(format!("{:.a$} {a} {a:#x}", "aaaaaa", a=2), "aa 2 0x2"); + + // Test that pointers don't get truncated. { let val = usize::MAX; @@ -177,6 +195,7 @@ pub fn main() { test_write(); test_print(); test_order(); + test_once(); // make sure that format! doesn't move out of local variables let a: Box<_> = box 3; @@ -260,3 +279,17 @@ fn test_order() { foo(), foo(), foo(), a=foo(), b=foo(), c=foo()), "1 2 4 5 3 6".to_string()); } + +fn test_once() { + // Make sure each argument are evaluted only once even though it may be + // formatted multiple times + fn foo() -> isize { + static mut FOO: isize = 0; + unsafe { + FOO += 1; + FOO + } + } + assert_eq!(format!("{0} {0} {0} {a} {a} {a}", foo(), a=foo()), + "1 1 1 2 2 2".to_string()); +} From 51e54e57a45d7b956e3e3390db76ea2651f5d3b3 Mon Sep 17 00:00:00 2001 From: Wang Xuerui Date: Thu, 2 Jun 2016 21:47:34 +0800 Subject: [PATCH 10/10] syntax_ext: format: better code documentation --- src/libsyntax_ext/format.rs | 58 +++++++++++++++++++++++++++++-------- 1 file changed, 46 insertions(+), 12 deletions(-) diff --git a/src/libsyntax_ext/format.rs b/src/libsyntax_ext/format.rs index 916a2aa572ccb..94bb78edaacdb 100644 --- a/src/libsyntax_ext/format.rs +++ b/src/libsyntax_ext/format.rs @@ -45,12 +45,22 @@ struct Context<'a, 'b:'a> { /// The span of the format string literal. fmtsp: Span, - /// Parsed argument expressions and the types that we've found so far for - /// them. + /// List of parsed argument expressions. /// Named expressions are resolved early, and are appended to the end of /// argument expressions. + /// + /// Example showing the various data structures in motion: + /// + /// * Original: `"{foo:o} {:o} {foo:x} {0:x} {1:o} {:x} {1:x} {0:o}"` + /// * Implicit argument resolution: `"{foo:o} {0:o} {foo:x} {0:x} {1:o} {1:x} {1:x} {0:o}"` + /// * Name resolution: `"{2:o} {0:o} {2:x} {0:x} {1:o} {1:x} {1:x} {0:o}"` + /// * `arg_types` (in JSON): `[[0, 1, 0], [0, 1, 1], [0, 1]]` + /// * `arg_unique_types` (in simplified JSON): `[["o", "x"], ["o", "x"], ["o", "x"]]` + /// * `names` (in JSON): `{"foo": 2}` args: Vec>, + /// Placeholder slot numbers indexed by argument. arg_types: Vec>, + /// Unique format specs seen for each argument. arg_unique_types: Vec>, /// Map from named arguments to their resolved indices. names: HashMap, @@ -69,13 +79,31 @@ struct Context<'a, 'b:'a> { /// final generated static argument array. We record the starting indices /// corresponding to each positional argument, and number of references /// consumed so far for each argument, to facilitate correct `Position` - /// mapping in `trans_piece`. + /// mapping in `trans_piece`. In effect this can be seen as a "flattened" + /// version of `arg_unique_types`. + /// + /// Again with the example described above in docstring for `args`: + /// + /// * `arg_index_map` (in JSON): `[[0, 1, 0], [2, 3, 3], [4, 5]]` arg_index_map: Vec>, + /// Starting offset of count argument slots. count_args_index_offset: usize, + /// Count argument slots and tracking data structures. + /// Count arguments are separately tracked for de-duplication in case + /// multiple references are made to one argument. For example, in this + /// format string: + /// + /// * Original: `"{:.*} {:.foo$} {1:.*} {:.0$}"` + /// * Implicit argument resolution: `"{1:.0$} {2:.foo$} {1:.3$} {4:.0$}"` + /// * Name resolution: `"{1:.0$} {2:.5$} {1:.3$} {4:.0$}"` + /// * `count_positions` (in JSON): `{0: 0, 5: 1, 3: 2}` + /// * `count_args`: `vec![Exact(0), Exact(5), Exact(3)]` count_args: Vec, + /// Relative slot numbers for count arguments. count_positions: HashMap, + /// Number of count slots assigned. count_positions_count: usize, /// Current position of the implicit positional arg pointer, as if it @@ -160,6 +188,8 @@ fn parse_args(ecx: &mut ExtCtxt, sp: Span, tts: &[tokenstream::TokenTree]) impl<'a, 'b> Context<'a, 'b> { fn resolve_name_inplace(&self, p: &mut parse::Piece) { + // NOTE: the `unwrap_or` branch is needed in case of invalid format + // arguments, e.g. `format_args!("{foo}")`. let lookup = |s| *self.names.get(s).unwrap_or(&0); match *p { @@ -178,9 +208,9 @@ impl<'a, 'b> Context<'a, 'b> { } } - /// Verifies one piece of a parse string. All errors are not emitted as - /// fatal so we can continue giving errors about this and possibly other - /// format strings. + /// Verifies one piece of a parse string, and remembers it if valid. + /// All errors are not emitted as fatal so we can continue giving errors + /// about this and possibly other format strings. fn verify_piece(&mut self, p: &parse::Piece) { match *p { parse::String(..) => {} @@ -223,6 +253,8 @@ impl<'a, 'b> Context<'a, 'b> { } } + /// Actually verifies and tracks a given format placeholder + /// (a.k.a. argument). fn verify_arg_type(&mut self, arg: Position, ty: ArgumentType) { match arg { Exact(arg) => { @@ -276,14 +308,15 @@ impl<'a, 'b> Context<'a, 'b> { } } - // NOTE: Keep the ordering the same as `into_expr`'s expansion would do! + /// Builds the mapping between format placeholders and argument objects. fn build_index_map(&mut self) { + // NOTE: Keep the ordering the same as `into_expr`'s expansion would do! let args_len = self.args.len(); self.arg_index_map.reserve(args_len); let mut sofar = 0usize; - // Generate mapping for positional args + // Map the arguments for i in 0..args_len { let ref arg_types = self.arg_types[i]; let mut arg_offsets = Vec::with_capacity(arg_types.len()); @@ -294,8 +327,7 @@ impl<'a, 'b> Context<'a, 'b> { sofar += self.arg_unique_types[i].len(); } - // Record starting index for counts, which appear just - // after the positional args + // Record starting index for counts, which appear just after arguments self.count_args_index_offset = sofar; } @@ -471,8 +503,8 @@ impl<'a, 'b> Context<'a, 'b> { ecx.expr_block(ecx.block(sp, vec![stmt, ecx.stmt_expr(ecx.expr_ident(sp, name))])) } - /// Actually builds the expression which the iformat! block will be expanded - /// to + /// Actually builds the expression which the format_args! block will be + /// expanded to fn into_expr(mut self) -> P { let mut locals = Vec::new(); let mut counts = Vec::new(); @@ -642,6 +674,8 @@ pub fn expand_preparsed_format_args(ecx: &mut ExtCtxt, sp: Span, args: Vec>, names: HashMap) -> P { + // NOTE: this verbose way of initializing `Vec>` is because + // `ArgumentType` does not derive `Clone`. let arg_types: Vec<_> = (0..args.len()).map(|_| Vec::new()).collect(); let arg_unique_types: Vec<_> = (0..args.len()).map(|_| Vec::new()).collect(); let macsp = ecx.call_site();