diff --git a/doc/rust.md b/doc/rust.md index e998f97869f7a..59ff973605217 100644 --- a/doc/rust.md +++ b/doc/rust.md @@ -239,13 +239,14 @@ literal : string_lit | char_lit | num_lit ; ~~~~~~~~ {.ebnf .gram} char_lit : '\x27' char_body '\x27' ; -string_lit : '"' string_body * '"' ; +string_lit : '"' string_body * '"' | 'r' raw_string ; char_body : non_single_quote | '\x5c' [ '\x27' | common_escape ] ; string_body : non_double_quote | '\x5c' [ '\x22' | common_escape ] ; +raw_string : '"' raw_string_body '"' | '#' raw_string '#' ; common_escape : '\x5c' | 'n' | 'r' | 't' | '0' @@ -267,9 +268,10 @@ which must be _escaped_ by a preceding U+005C character (`\`). A _string literal_ is a sequence of any Unicode characters enclosed within two `U+0022` (double-quote) characters, with the exception of `U+0022` -itself, which must be _escaped_ by a preceding `U+005C` character (`\`). +itself, which must be _escaped_ by a preceding `U+005C` character (`\`), +or a _raw string literal_. -Some additional _escapes_ are available in either character or string +Some additional _escapes_ are available in either character or non-raw string literals. An escape starts with a `U+005C` (`\`) and continues with one of the following forms: @@ -285,9 +287,35 @@ the following forms: * A _whitespace escape_ is one of the characters `U+006E` (`n`), `U+0072` (`r`), or `U+0074` (`t`), denoting the unicode values `U+000A` (LF), `U+000D` (CR) or `U+0009` (HT) respectively. - * The _backslash escape_ is the character U+005C (`\`) which must be + * The _backslash escape_ is the character `U+005C` (`\`) which must be escaped in order to denote *itself*. +Raw string literals do not process any escapes. They start with the character +`U+0072` (`r`), followed zero or more of the character `U+0023` (`#`) and a +`U+0022` (double-quote) character. The _raw string body_ is not defined in the +EBNF grammar above: it can contain any sequence of Unicode characters and is +terminated only by another `U+0022` (double-quote) character, followed by the +same number of `U+0023` (`#`) characters that preceeded the opening `U+0022` +(double-quote) character. + +All Unicode characters contained in the raw string body represent themselves, +the characters `U+0022` (double-quote) (except when followed by at least as +many `U+0023` (`#`) characters as were used to start the raw string literal) or +`U+005C` (`\`) do not have any special meaning. + +Examples for string literals: + +~~~ +"foo"; r"foo"; // foo +"\"foo\""; r#""foo""#; // "foo" + +"foo #\"# bar"; +r##"foo #"# bar"##; // foo #"# bar + +"\x52"; "R"; r"R"; // R +"\\x52"; r"\x52"; // \x52 +~~~ + #### Number literals ~~~~~~~~ {.ebnf .gram} diff --git a/doc/tutorial.md b/doc/tutorial.md index 49ba38954b386..fcb8f2836a4a9 100644 --- a/doc/tutorial.md +++ b/doc/tutorial.md @@ -353,7 +353,12 @@ whose literals are written between single quotes, as in `'x'`. Just like C, Rust understands a number of character escapes, using the backslash character, such as `\n`, `\r`, and `\t`. String literals, written between double quotes, allow the same escape sequences. -More on strings [later](#vectors-and-strings). + +On the other hand, raw string literals do not process any escape sequences. +They are written as `r##"blah"##`, with a matching number of zero or more `#` +before the opening and after the closing quote, and can contain any sequence of +characters except their closing delimiter. More on strings +[later](#vectors-and-strings). The nil type, written `()`, has a single value, also written `()`. diff --git a/src/etc/vim/syntax/rust.vim b/src/etc/vim/syntax/rust.vim index c465f076c74ab..7d51c3b847588 100644 --- a/src/etc/vim/syntax/rust.vim +++ b/src/etc/vim/syntax/rust.vim @@ -148,6 +148,7 @@ syn match rustFormat display "%%" contained syn match rustSpecial display contained /\\\([nrt\\'"]\|x\x\{2}\|u\x\{4}\|U\x\{8}\)/ syn match rustStringContinuation display contained /\\\n\s*/ syn region rustString start=+"+ skip=+\\\\\|\\"+ end=+"+ contains=rustTodo,rustFormat,rustSpecial,rustStringContinuation +syn region rustString start='r\z(#*\)"' end='"\z1' syn region rustAttribute start="#\[" end="\]" contains=rustString,rustDeriving syn region rustDeriving start="deriving(" end=")" contained contains=rustTrait diff --git a/src/librustc/front/test.rs b/src/librustc/front/test.rs index e4904863a94bc..aad2228f66fda 100644 --- a/src/librustc/front/test.rs +++ b/src/librustc/front/test.rs @@ -407,7 +407,7 @@ fn mk_test_desc_and_fn_rec(cx: &TestCtxt, test: &Test) -> @ast::Expr { debug2!("encoding {}", ast_util::path_name_i(path)); let name_lit: ast::lit = - nospan(ast::lit_str(ast_util::path_name_i(path).to_managed())); + nospan(ast::lit_str(ast_util::path_name_i(path).to_managed(), ast::CookedStr)); let name_expr = @ast::Expr { id: ast::DUMMY_NODE_ID, diff --git a/src/librustc/metadata/creader.rs b/src/librustc/metadata/creader.rs index c41e1d78f6486..fd4f31c3dc75f 100644 --- a/src/librustc/metadata/creader.rs +++ b/src/librustc/metadata/creader.rs @@ -142,7 +142,7 @@ fn visit_view_item(e: @mut Env, i: &ast::view_item) { let ident = token::ident_to_str(&ident); let meta_items = match path_opt { None => meta_items.clone(), - Some(p) => { + Some((p, _path_str_style)) => { let p_path = Path(p); match p_path.filestem() { Some(s) => diff --git a/src/librustc/metadata/encoder.rs b/src/librustc/metadata/encoder.rs index e88ee70105195..75564f039bf3f 100644 --- a/src/librustc/metadata/encoder.rs +++ b/src/librustc/metadata/encoder.rs @@ -1446,7 +1446,7 @@ fn encode_meta_item(ebml_w: &mut writer::Encoder, mi: @MetaItem) { } MetaNameValue(name, value) => { match value.node { - lit_str(value) => { + lit_str(value, _) => { ebml_w.start_tag(tag_meta_item_name_value); ebml_w.start_tag(tag_meta_item_name); ebml_w.writer.write(name.as_bytes()); diff --git a/src/librustc/middle/check_const.rs b/src/librustc/middle/check_const.rs index 30cf827cb72cf..dd624a882839a 100644 --- a/src/librustc/middle/check_const.rs +++ b/src/librustc/middle/check_const.rs @@ -86,7 +86,7 @@ pub fn check_pat(v: &mut CheckCrateVisitor, p: @Pat, _is_const: bool) { match e.node { ExprVstore( @Expr { node: ExprLit(@codemap::Spanned { - node: lit_str(_), + node: lit_str(*), _}), _ }, ExprVstoreUniq @@ -120,7 +120,7 @@ pub fn check_expr(v: &mut CheckCrateVisitor, "disallowed operator in constant expression"); return; } - ExprLit(@codemap::Spanned {node: lit_str(_), _}) => { } + ExprLit(@codemap::Spanned {node: lit_str(*), _}) => { } ExprBinary(*) | ExprUnary(*) => { if method_map.contains_key(&e.id) { sess.span_err(e.span, "user-defined operators are not \ diff --git a/src/librustc/middle/const_eval.rs b/src/librustc/middle/const_eval.rs index cc818c9c00120..77cba0f33eae2 100644 --- a/src/librustc/middle/const_eval.rs +++ b/src/librustc/middle/const_eval.rs @@ -475,7 +475,7 @@ pub fn eval_const_expr_partial(tcx: &T, e: &Expr) pub fn lit_to_const(lit: &lit) -> const_val { match lit.node { - lit_str(s) => const_str(s), + lit_str(s, _) => const_str(s), lit_char(n) => const_uint(n as u64), lit_int(n, _) => const_int(n), lit_uint(n, _) => const_uint(n), diff --git a/src/librustc/middle/trans/consts.rs b/src/librustc/middle/trans/consts.rs index 25d600de1e92b..dd938b5a60f8d 100644 --- a/src/librustc/middle/trans/consts.rs +++ b/src/librustc/middle/trans/consts.rs @@ -71,7 +71,7 @@ pub fn const_lit(cx: &mut CrateContext, e: &ast::Expr, lit: ast::lit) } ast::lit_bool(b) => C_bool(b), ast::lit_nil => C_nil(), - ast::lit_str(s) => C_estr_slice(cx, s) + ast::lit_str(s, _) => C_estr_slice(cx, s) } } diff --git a/src/librustc/middle/trans/expr.rs b/src/librustc/middle/trans/expr.rs index 098f0e3db7c90..df5e69f0b4fdb 100644 --- a/src/librustc/middle/trans/expr.rs +++ b/src/librustc/middle/trans/expr.rs @@ -705,7 +705,7 @@ fn trans_rvalue_dps_unadjusted(bcx: @mut Block, expr: &ast::Expr, args.iter().enumerate().map(|(i, arg)| (i, *arg)).collect(); return trans_adt(bcx, repr, 0, numbered_fields, None, dest); } - ast::ExprLit(@codemap::Spanned {node: ast::lit_str(s), _}) => { + ast::ExprLit(@codemap::Spanned {node: ast::lit_str(s, _), _}) => { return tvec::trans_lit_str(bcx, expr, s, dest); } ast::ExprVstore(contents, ast::ExprVstoreSlice) | diff --git a/src/librustc/middle/trans/tvec.rs b/src/librustc/middle/trans/tvec.rs index 896ce4be33726..7bcbedf50f75f 100644 --- a/src/librustc/middle/trans/tvec.rs +++ b/src/librustc/middle/trans/tvec.rs @@ -205,7 +205,7 @@ pub fn trans_slice_vstore(bcx: @mut Block, // Handle the &"..." case: match content_expr.node { - ast::ExprLit(@codemap::Spanned {node: ast::lit_str(s), span: _}) => { + ast::ExprLit(@codemap::Spanned {node: ast::lit_str(s, _), span: _}) => { return trans_lit_str(bcx, content_expr, s, dest); } _ => {} @@ -296,7 +296,7 @@ pub fn trans_uniq_or_managed_vstore(bcx: @mut Block, heap: heap, vstore_expr: &a heap_exchange => { match content_expr.node { ast::ExprLit(@codemap::Spanned { - node: ast::lit_str(s), span + node: ast::lit_str(s, _), span }) => { let llptrval = C_cstr(bcx.ccx(), s); let llptrval = PointerCast(bcx, llptrval, Type::i8p()); @@ -357,7 +357,7 @@ pub fn write_content(bcx: @mut Block, let _indenter = indenter(); match content_expr.node { - ast::ExprLit(@codemap::Spanned { node: ast::lit_str(s), _ }) => { + ast::ExprLit(@codemap::Spanned { node: ast::lit_str(s, _), _ }) => { match dest { Ignore => { return bcx; @@ -490,7 +490,7 @@ pub fn elements_required(bcx: @mut Block, content_expr: &ast::Expr) -> uint { //! Figure out the number of elements we need to store this content match content_expr.node { - ast::ExprLit(@codemap::Spanned { node: ast::lit_str(s), _ }) => { + ast::ExprLit(@codemap::Spanned { node: ast::lit_str(s, _), _ }) => { s.len() }, ast::ExprVec(ref es, _) => es.len(), diff --git a/src/librustc/middle/ty.rs b/src/librustc/middle/ty.rs index d21852751b459..d14e81244596a 100644 --- a/src/librustc/middle/ty.rs +++ b/src/librustc/middle/ty.rs @@ -3266,7 +3266,7 @@ pub fn expr_kind(tcx: ctxt, ast::ExprDoBody(*) | ast::ExprBlock(*) | ast::ExprRepeat(*) | - ast::ExprLit(@codemap::Spanned {node: lit_str(_), _}) | + ast::ExprLit(@codemap::Spanned {node: lit_str(*), _}) | ast::ExprVstore(_, ast::ExprVstoreSlice) | ast::ExprVstore(_, ast::ExprVstoreMutSlice) | ast::ExprVec(*) => { diff --git a/src/librustc/middle/typeck/check/mod.rs b/src/librustc/middle/typeck/check/mod.rs index 1fc04c7a0f67a..ec5ee3838b187 100644 --- a/src/librustc/middle/typeck/check/mod.rs +++ b/src/librustc/middle/typeck/check/mod.rs @@ -2259,7 +2259,7 @@ pub fn check_expr_with_unifier(fcx: @mut FnCtxt, match expr.node { ast::ExprVstore(ev, vst) => { let typ = match ev.node { - ast::ExprLit(@codemap::Spanned { node: ast::lit_str(_), _ }) => { + ast::ExprLit(@codemap::Spanned { node: ast::lit_str(*), _ }) => { let tt = ast_expr_vstore_to_vstore(fcx, ev, vst); ty::mk_estr(tcx, tt) } diff --git a/src/librustdoc/clean.rs b/src/librustdoc/clean.rs index b9100d6e36511..54b8a5c038d96 100644 --- a/src/librustdoc/clean.rs +++ b/src/librustdoc/clean.rs @@ -1008,7 +1008,7 @@ impl Clean for ast::view_item_ { fn clean(&self) -> ViewItemInner { match self { &ast::view_item_extern_mod(ref i, ref p, ref mi, ref id) => - ExternMod(i.clean(), p.map(|x| x.to_owned()), mi.clean(), *id), + ExternMod(i.clean(), p.map(|&(ref x, _)| x.to_owned()), mi.clean(), *id), &ast::view_item_use(ref vp) => Import(vp.clean()) } } @@ -1114,7 +1114,7 @@ impl ToSource for syntax::codemap::Span { fn lit_to_str(lit: &ast::lit) -> ~str { match lit.node { - ast::lit_str(st) => st.to_owned(), + ast::lit_str(st, _) => st.to_owned(), ast::lit_char(c) => ~"'" + std::char::from_u32(c).unwrap().to_str() + "'", ast::lit_int(i, _t) => i.to_str(), ast::lit_uint(u, _t) => u.to_str(), diff --git a/src/librustpkg/util.rs b/src/librustpkg/util.rs index 78b5321ae13e6..d7138139bc1b6 100644 --- a/src/librustpkg/util.rs +++ b/src/librustpkg/util.rs @@ -406,7 +406,7 @@ impl<'self> Visitor<()> for ViewItemVisitor<'self> { // ignore metadata, I guess ast::view_item_extern_mod(lib_ident, path_opt, _, _) => { let lib_name = match path_opt { - Some(p) => p, + Some((p, _)) => p, None => self.sess.str_of(lib_ident) }; debug2!("Finding and installing... {}", lib_name); @@ -513,7 +513,7 @@ pub fn find_and_install_dependencies(context: &BuildContext, pub fn mk_string_lit(s: @str) -> ast::lit { Spanned { - node: ast::lit_str(s), + node: ast::lit_str(s, ast::CookedStr), span: dummy_sp() } } diff --git a/src/libsyntax/ast.rs b/src/libsyntax/ast.rs index 6de3b7aa0b099..63c2f0e519147 100644 --- a/src/libsyntax/ast.rs +++ b/src/libsyntax/ast.rs @@ -680,11 +680,17 @@ pub enum mac_ { mac_invoc_tt(Path,~[token_tree],SyntaxContext), // new macro-invocation } +#[deriving(Clone, Eq, Encodable, Decodable, IterBytes)] +pub enum StrStyle { + CookedStr, + RawStr(uint) +} + pub type lit = Spanned; #[deriving(Clone, Eq, Encodable, Decodable, IterBytes)] pub enum lit_ { - lit_str(@str), + lit_str(@str, StrStyle), lit_char(u32), lit_int(i64, int_ty), lit_uint(u64, uint_ty), @@ -862,6 +868,7 @@ pub enum asm_dialect { #[deriving(Clone, Eq, Encodable, Decodable, IterBytes)] pub struct inline_asm { asm: @str, + asm_str_style: StrStyle, clobbers: @str, inputs: ~[(@str, @Expr)], outputs: ~[(@str, @Expr)], @@ -1027,7 +1034,7 @@ pub enum view_item_ { // optional @str: if present, this is a location (containing // arbitrary characters) from which to fetch the crate sources // For example, extern mod whatever = "github.com/mozilla/rust" - view_item_extern_mod(Ident, Option<@str>, ~[@MetaItem], NodeId), + view_item_extern_mod(Ident, Option<(@str, StrStyle)>, ~[@MetaItem], NodeId), view_item_use(~[@view_path]), } diff --git a/src/libsyntax/attr.rs b/src/libsyntax/attr.rs index df31fece5eaee..d9a23f6eb35cf 100644 --- a/src/libsyntax/attr.rs +++ b/src/libsyntax/attr.rs @@ -67,7 +67,7 @@ impl AttrMetaMethods for MetaItem { match self.node { MetaNameValue(_, ref v) => { match v.node { - ast::lit_str(s) => Some(s), + ast::lit_str(s, _) => Some(s), _ => None, } }, @@ -127,7 +127,7 @@ impl AttributeMethods for Attribute { /* Constructors */ pub fn mk_name_value_item_str(name: @str, value: @str) -> @MetaItem { - let value_lit = dummy_spanned(ast::lit_str(value)); + let value_lit = dummy_spanned(ast::lit_str(value, ast::CookedStr)); mk_name_value_item(name, value_lit) } @@ -153,7 +153,7 @@ pub fn mk_attr(item: @MetaItem) -> Attribute { pub fn mk_sugared_doc_attr(text: @str, lo: BytePos, hi: BytePos) -> Attribute { let style = doc_comment_style(text); - let lit = spanned(lo, hi, ast::lit_str(text)); + let lit = spanned(lo, hi, ast::lit_str(text, ast::CookedStr)); let attr = Attribute_ { style: style, value: @spanned(lo, hi, MetaNameValue(@"doc", lit)), diff --git a/src/libsyntax/ext/asm.rs b/src/libsyntax/ext/asm.rs index 9241e8c4fbcb1..e836367555a74 100644 --- a/src/libsyntax/ext/asm.rs +++ b/src/libsyntax/ext/asm.rs @@ -44,6 +44,7 @@ pub fn expand_asm(cx: @ExtCtxt, sp: Span, tts: &[ast::token_tree]) tts.to_owned()); let mut asm = @""; + let mut asm_str_style = None; let mut outputs = ~[]; let mut inputs = ~[]; let mut cons = ~""; @@ -58,8 +59,11 @@ pub fn expand_asm(cx: @ExtCtxt, sp: Span, tts: &[ast::token_tree]) while continue_ { match state { Asm => { - asm = expr_to_str(cx, p.parse_expr(), - "inline assembly must be a string literal."); + let (s, style) = + expr_to_str(cx, p.parse_expr(), + "inline assembly must be a string literal."); + asm = s; + asm_str_style = Some(style); } Outputs => { while *p.token != token::EOF && @@ -70,7 +74,7 @@ pub fn expand_asm(cx: @ExtCtxt, sp: Span, tts: &[ast::token_tree]) p.eat(&token::COMMA); } - let constraint = p.parse_str(); + let (constraint, _str_style) = p.parse_str(); p.expect(&token::LPAREN); let out = p.parse_expr(); p.expect(&token::RPAREN); @@ -93,7 +97,7 @@ pub fn expand_asm(cx: @ExtCtxt, sp: Span, tts: &[ast::token_tree]) p.eat(&token::COMMA); } - let constraint = p.parse_str(); + let (constraint, _str_style) = p.parse_str(); p.expect(&token::LPAREN); let input = p.parse_expr(); p.expect(&token::RPAREN); @@ -111,14 +115,15 @@ pub fn expand_asm(cx: @ExtCtxt, sp: Span, tts: &[ast::token_tree]) p.eat(&token::COMMA); } - let clob = format!("~\\{{}\\}", p.parse_str()); + let (s, _str_style) = p.parse_str(); + let clob = format!("~\\{{}\\}", s); clobs.push(clob); } cons = clobs.connect(","); } Options => { - let option = p.parse_str(); + let (option, _str_style) = p.parse_str(); if "volatile" == option { volatile = true; @@ -175,6 +180,7 @@ pub fn expand_asm(cx: @ExtCtxt, sp: Span, tts: &[ast::token_tree]) id: ast::DUMMY_NODE_ID, node: ast::ExprInlineAsm(ast::inline_asm { asm: asm, + asm_str_style: asm_str_style.unwrap(), clobbers: cons.to_managed(), inputs: inputs, outputs: outputs, diff --git a/src/libsyntax/ext/base.rs b/src/libsyntax/ext/base.rs index 3b4be1de3e81b..7f89271927c63 100644 --- a/src/libsyntax/ext/base.rs +++ b/src/libsyntax/ext/base.rs @@ -410,10 +410,10 @@ impl ExtCtxt { } } -pub fn expr_to_str(cx: @ExtCtxt, expr: @ast::Expr, err_msg: &str) -> @str { +pub fn expr_to_str(cx: @ExtCtxt, expr: @ast::Expr, err_msg: &str) -> (@str, ast::StrStyle) { match expr.node { ast::ExprLit(l) => match l.node { - ast::lit_str(s) => s, + ast::lit_str(s, style) => (s, style), _ => cx.span_fatal(l.span, err_msg) }, _ => cx.span_fatal(expr.span, err_msg) @@ -437,7 +437,8 @@ pub fn get_single_str_from_tts(cx: @ExtCtxt, } match tts[0] { - ast::tt_tok(_, token::LIT_STR(ident)) => cx.str_of(ident), + ast::tt_tok(_, token::LIT_STR(ident)) + | ast::tt_tok(_, token::LIT_STR_RAW(ident, _)) => cx.str_of(ident), _ => cx.span_fatal(sp, format!("{} requires a string.", name)), } } diff --git a/src/libsyntax/ext/build.rs b/src/libsyntax/ext/build.rs index 65a6572fa5e99..a533618720083 100644 --- a/src/libsyntax/ext/build.rs +++ b/src/libsyntax/ext/build.rs @@ -562,7 +562,7 @@ impl AstBuilder for @ExtCtxt { self.expr_vstore(sp, self.expr_vec(sp, exprs), ast::ExprVstoreSlice) } fn expr_str(&self, sp: Span, s: @str) -> @ast::Expr { - self.expr_lit(sp, ast::lit_str(s)) + self.expr_lit(sp, ast::lit_str(s, ast::CookedStr)) } fn expr_str_uniq(&self, sp: Span, s: @str) -> @ast::Expr { self.expr_vstore(sp, self.expr_str(sp, s), ast::ExprVstoreUniq) diff --git a/src/libsyntax/ext/bytes.rs b/src/libsyntax/ext/bytes.rs index b27fcb6c9b9f7..5ebaea2ce44c8 100644 --- a/src/libsyntax/ext/bytes.rs +++ b/src/libsyntax/ext/bytes.rs @@ -28,7 +28,7 @@ pub fn expand_syntax_ext(cx: @ExtCtxt, sp: Span, tts: &[ast::token_tree]) -> bas // expression is a literal ast::ExprLit(lit) => match lit.node { // string literal, push each byte to vector expression - ast::lit_str(s) => { + ast::lit_str(s, _) => { for byte in s.byte_iter() { bytes.push(cx.expr_u8(expr.span, byte)); } diff --git a/src/libsyntax/ext/deriving/generic.rs b/src/libsyntax/ext/deriving/generic.rs index b3fd4f920d88f..c31c609d4e74a 100644 --- a/src/libsyntax/ext/deriving/generic.rs +++ b/src/libsyntax/ext/deriving/generic.rs @@ -361,7 +361,7 @@ impl<'self> TraitDef<'self> { span, cx.meta_name_value(span, @"doc", - ast::lit_str(@"Automatically derived."))); + ast::lit_str(@"Automatically derived.", ast::CookedStr))); cx.item( span, ::parse::token::special_idents::clownshoes_extensions, diff --git a/src/libsyntax/ext/env.rs b/src/libsyntax/ext/env.rs index 63a45b06e1644..15630e37eadd5 100644 --- a/src/libsyntax/ext/env.rs +++ b/src/libsyntax/ext/env.rs @@ -41,10 +41,13 @@ pub fn expand_env(cx: @ExtCtxt, sp: Span, tts: &[ast::token_tree]) cx.span_fatal(sp, "env! takes 1 or 2 arguments"); } - let var = expr_to_str(cx, exprs[0], "expected string literal"); + let (var, _var_str_style) = expr_to_str(cx, exprs[0], "expected string literal"); let msg = match exprs.len() { 1 => format!("Environment variable {} not defined", var).to_managed(), - 2 => expr_to_str(cx, exprs[1], "expected string literal"), + 2 => { + let (s, _style) = expr_to_str(cx, exprs[1], "expected string literal"); + s + } _ => cx.span_fatal(sp, "env! takes 1 or 2 arguments") }; diff --git a/src/libsyntax/ext/fmt.rs b/src/libsyntax/ext/fmt.rs index cd364e7ad64a7..8258048a04db1 100644 --- a/src/libsyntax/ext/fmt.rs +++ b/src/libsyntax/ext/fmt.rs @@ -30,7 +30,7 @@ pub fn expand_syntax_ext(cx: @ExtCtxt, sp: Span, tts: &[ast::token_tree]) if args.len() == 0 { cx.span_fatal(sp, "fmt! takes at least 1 argument."); } - let fmt = + let (fmt, _fmt_str_style) = expr_to_str(cx, args[0], "first argument to fmt! must be a string literal."); let fmtspan = args[0].span; diff --git a/src/libsyntax/ext/format.rs b/src/libsyntax/ext/format.rs index 8d327de6d6111..171748e9b2e99 100644 --- a/src/libsyntax/ext/format.rs +++ b/src/libsyntax/ext/format.rs @@ -722,8 +722,8 @@ pub fn expand_args(ecx: @ExtCtxt, sp: Span, (_, None) => { return MRExpr(ecx.expr_uint(sp, 2)); } }; cx.fmtsp = efmt.span; - let fmt = expr_to_str(ecx, efmt, - "format argument must be a string literal."); + let (fmt, _fmt_str_style) = expr_to_str(ecx, efmt, + "format argument must be a string literal."); let mut err = false; do parse::parse_error::cond.trap(|m| { diff --git a/src/libsyntax/ext/quote.rs b/src/libsyntax/ext/quote.rs index bfd1e9cc9943a..4bef960185523 100644 --- a/src/libsyntax/ext/quote.rs +++ b/src/libsyntax/ext/quote.rs @@ -118,7 +118,7 @@ pub mod rt { impl<'self> ToSource for &'self str { fn to_source(&self) -> @str { - let lit = dummy_spanned(ast::lit_str(self.to_managed())); + let lit = dummy_spanned(ast::lit_str(self.to_managed(), ast::CookedStr)); pprust::lit_to_str(@lit).to_managed() } } @@ -464,6 +464,13 @@ fn mk_token(cx: @ExtCtxt, sp: Span, tok: &token::Token) -> @ast::Expr { ~[mk_ident(cx, sp, ident)]); } + LIT_STR_RAW(ident, n) => { + return cx.expr_call_ident(sp, + id_ext("LIT_STR_RAW"), + ~[mk_ident(cx, sp, ident), + cx.expr_uint(sp, n)]); + } + IDENT(ident, b) => { return cx.expr_call_ident(sp, id_ext("IDENT"), diff --git a/src/libsyntax/parse/lexer.rs b/src/libsyntax/parse/lexer.rs index 79c330c473714..a43e018cf4949 100644 --- a/src/libsyntax/parse/lexer.rs +++ b/src/libsyntax/parse/lexer.rs @@ -213,10 +213,22 @@ fn byte_offset(rdr: &StringReader, pos: BytePos) -> BytePos { (pos - rdr.filemap.start_pos) } +/// Calls `f` with a string slice of the source text spanning from `start` +/// up to but excluding `rdr.last_pos`, meaning the slice does not include +/// the character `rdr.curr`. pub fn with_str_from(rdr: @mut StringReader, start: BytePos, f: &fn(s: &str) -> T) -> T { + with_str_from_to(rdr, start, rdr.last_pos, f) +} + +/// Calls `f` with astring slice of the source text spanning from `start` +/// up to but excluding `end`. +fn with_str_from_to(rdr: @mut StringReader, + start: BytePos, + end: BytePos, + f: &fn(s: &str) -> T) -> T { f(rdr.src.slice( byte_offset(rdr, start).to_uint(), - byte_offset(rdr, rdr.last_pos).to_uint())) + byte_offset(rdr, end).to_uint())) } // EFFECT: advance the StringReader by one character. If a newline is @@ -612,7 +624,10 @@ fn ident_continue(c: char) -> bool { // EFFECT: updates the interner fn next_token_inner(rdr: @mut StringReader) -> token::Token { let c = rdr.curr; - if ident_start(c) { + if ident_start(c) && nextch(rdr) != '"' && nextch(rdr) != '#' { + // Note: r as in r" or r#" is part of a raw string literal, + // not an identifier, and is handled further down. + let start = rdr.last_pos; while ident_continue(rdr.curr) { bump(rdr); @@ -829,6 +844,47 @@ fn next_token_inner(rdr: @mut StringReader) -> token::Token { bump(rdr); return token::LIT_STR(str_to_ident(accum_str)); } + 'r' => { + let start_bpos = rdr.last_pos; + bump(rdr); + let mut hash_count = 0u; + while rdr.curr == '#' { + bump(rdr); + hash_count += 1; + } + if rdr.curr != '"' { + fatal_span_char(rdr, start_bpos, rdr.last_pos, + ~"only `#` is allowed in raw string delimitation; \ + found illegal character", + rdr.curr); + } + bump(rdr); + let content_start_bpos = rdr.last_pos; + let mut content_end_bpos; + 'outer: loop { + if is_eof(rdr) { + fatal_span(rdr, start_bpos, rdr.last_pos, + ~"unterminated raw string"); + } + if rdr.curr == '"' { + content_end_bpos = rdr.last_pos; + for _ in range(0, hash_count) { + bump(rdr); + if rdr.curr != '#' { + continue 'outer; + } + } + break; + } + bump(rdr); + } + bump(rdr); + let str_content = with_str_from_to(rdr, + content_start_bpos, + content_end_bpos, + str_to_ident); + return token::LIT_STR_RAW(str_content, hash_count); + } '-' => { if nextch(rdr) == '>' { bump(rdr); @@ -987,6 +1043,14 @@ mod test { assert_eq!(tok, token::LIFETIME(id)); } + #[test] fn raw_string() { + let env = setup(@"r###\"\"#a\\b\x00c\"\"###"); + let TokenAndSpan {tok, sp: _} = + env.string_reader.next_token(); + let id = token::str_to_ident("\"#a\\b\x00c\""); + assert_eq!(tok, token::LIT_STR_RAW(id, 3)); + } + #[test] fn line_doc_comments() { assert!(!is_line_non_doc_comment("///")); assert!(!is_line_non_doc_comment("/// blah")); diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs index cad19543608ba..ffebe7980bf51 100644 --- a/src/libsyntax/parse/parser.rs +++ b/src/libsyntax/parse/parser.rs @@ -48,6 +48,7 @@ use ast::{BiRem, required}; use ast::{ret_style, return_val, BiShl, BiShr, Stmt, StmtDecl}; use ast::{StmtExpr, StmtSemi, StmtMac, struct_def, struct_field}; use ast::{struct_variant_kind, BiSub}; +use ast::StrStyle; use ast::{sty_box, sty_region, sty_static, sty_uniq, sty_value}; use ast::{token_tree, trait_method, trait_ref, tt_delim, tt_seq, tt_tok}; use ast::{tt_nonterminal, tuple_variant_kind, Ty, ty_, ty_bot, ty_box}; @@ -1282,7 +1283,8 @@ impl Parser { token::LIT_FLOAT(s, ft) => lit_float(self.id_to_str(s), ft), token::LIT_FLOAT_UNSUFFIXED(s) => lit_float_unsuffixed(self.id_to_str(s)), - token::LIT_STR(s) => lit_str(self.id_to_str(s)), + token::LIT_STR(s) => lit_str(self.id_to_str(s), ast::CookedStr), + token::LIT_STR_RAW(s, n) => lit_str(self.id_to_str(s), ast::RawStr(n)), token::LPAREN => { self.expect(&token::RPAREN); lit_nil }, _ => { self.unexpected_last(tok); } } @@ -2157,7 +2159,7 @@ impl Parser { // HACK: turn &[...] into a &-evec ex = match e.node { ExprVec(*) | ExprLit(@codemap::Spanned { - node: lit_str(_), span: _ + node: lit_str(*), span: _ }) if m == MutImmutable => { ExprVstore(e, ExprVstoreSlice) @@ -2181,7 +2183,7 @@ impl Parser { ExprVec(*) | ExprRepeat(*) if m == MutMutable => ExprVstore(e, ExprVstoreMutBox), ExprVec(*) | - ExprLit(@codemap::Spanned { node: lit_str(_), span: _}) | + ExprLit(@codemap::Spanned { node: lit_str(*), span: _}) | ExprRepeat(*) if m == MutImmutable => ExprVstore(e, ExprVstoreBox), _ => self.mk_unary(UnBox(m), e) }; @@ -2194,7 +2196,7 @@ impl Parser { // HACK: turn ~[...] into a ~-evec ex = match e.node { ExprVec(*) | - ExprLit(@codemap::Spanned { node: lit_str(_), span: _}) | + ExprLit(@codemap::Spanned { node: lit_str(*), span: _}) | ExprRepeat(*) => ExprVstore(e, ExprVstoreUniq), _ => self.mk_unary(UnUniq, e) }; @@ -2706,7 +2708,7 @@ impl Parser { pat = match sub.node { PatLit(e@@Expr { node: ExprLit(@codemap::Spanned { - node: lit_str(_), + node: lit_str(*), span: _}), _ }) => { let vst = @Expr { @@ -2734,7 +2736,7 @@ impl Parser { pat = match sub.node { PatLit(e@@Expr { node: ExprLit(@codemap::Spanned { - node: lit_str(_), + node: lit_str(*), span: _}), _ }) => { let vst = @Expr { @@ -2763,7 +2765,7 @@ impl Parser { pat = match sub.node { PatLit(e@@Expr { node: ExprLit(@codemap::Spanned { - node: lit_str(_), span: _}), _ + node: lit_str(*), span: _}), _ }) => { let vst = @Expr { id: ast::DUMMY_NODE_ID, @@ -4345,7 +4347,8 @@ impl Parser { // parse a string as an ABI spec on an extern type or module fn parse_opt_abis(&self) -> Option { match *self.token { - token::LIT_STR(s) => { + token::LIT_STR(s) + | token::LIT_STR_RAW(s, _) => { self.bump(); let the_string = ident_to_str(&s); let mut abis = AbiSet::empty(); @@ -4371,15 +4374,15 @@ impl Parser { abi::all_names().connect(", "), word)); } - } - } + } + } Some(abis) } _ => { None - } - } + } + } } // parse one of the items or view items allowed by the @@ -4930,17 +4933,17 @@ impl Parser { } } - pub fn parse_optional_str(&self) -> Option<@str> { - match *self.token { - token::LIT_STR(s) => { - self.bump(); - Some(ident_to_str(&s)) - } - _ => None - } + pub fn parse_optional_str(&self) -> Option<(@str, ast::StrStyle)> { + let (s, style) = match *self.token { + token::LIT_STR(s) => (s, ast::CookedStr), + token::LIT_STR_RAW(s, n) => (s, ast::RawStr(n)), + _ => return None + }; + self.bump(); + Some((ident_to_str(&s), style)) } - pub fn parse_str(&self) -> @str { + pub fn parse_str(&self) -> (@str, StrStyle) { match self.parse_optional_str() { Some(s) => { s } _ => self.fatal("expected string literal") diff --git a/src/libsyntax/parse/token.rs b/src/libsyntax/parse/token.rs index d0faf917688d7..ba4c2637d10e3 100644 --- a/src/libsyntax/parse/token.rs +++ b/src/libsyntax/parse/token.rs @@ -79,6 +79,7 @@ pub enum Token { LIT_FLOAT(ast::Ident, ast::float_ty), LIT_FLOAT_UNSUFFIXED(ast::Ident), LIT_STR(ast::Ident), + LIT_STR_RAW(ast::Ident, uint), /* raw str delimited by n hash symbols */ /* Name components */ // an identifier contains an "is_mod_name" boolean, @@ -194,6 +195,10 @@ pub fn to_str(input: @ident_interner, t: &Token) -> ~str { body } LIT_STR(ref s) => { format!("\"{}\"", ident_to_str(s).escape_default()) } + LIT_STR_RAW(ref s, n) => { + format!("r{delim}\"{string}\"{delim}", + delim="#".repeat(n), string=ident_to_str(s)) + } /* Name components */ IDENT(s, _) => input.get(s.name).to_owned(), @@ -243,6 +248,7 @@ pub fn can_begin_expr(t: &Token) -> bool { LIT_FLOAT(_, _) => true, LIT_FLOAT_UNSUFFIXED(_) => true, LIT_STR(_) => true, + LIT_STR_RAW(_, _) => true, POUND => true, AT => true, NOT => true, @@ -284,6 +290,7 @@ pub fn is_lit(t: &Token) -> bool { LIT_FLOAT(_, _) => true, LIT_FLOAT_UNSUFFIXED(_) => true, LIT_STR(_) => true, + LIT_STR_RAW(_, _) => true, _ => false } } diff --git a/src/libsyntax/print/pprust.rs b/src/libsyntax/print/pprust.rs index 2a3add059ce33..7091a2d551877 100644 --- a/src/libsyntax/print/pprust.rs +++ b/src/libsyntax/print/pprust.rs @@ -1433,10 +1433,10 @@ pub fn print_expr(s: @ps, expr: &ast::Expr) { word(s.s, "asm!"); } popen(s); - print_string(s, a.asm); + print_string(s, a.asm, a.asm_str_style); word_space(s, ":"); for &(co, o) in a.outputs.iter() { - print_string(s, co); + print_string(s, co, ast::CookedStr); popen(s); print_expr(s, o); pclose(s); @@ -1444,14 +1444,14 @@ pub fn print_expr(s: @ps, expr: &ast::Expr) { } word_space(s, ":"); for &(co, o) in a.inputs.iter() { - print_string(s, co); + print_string(s, co, ast::CookedStr); popen(s); print_expr(s, o); pclose(s); word_space(s, ","); } word_space(s, ":"); - print_string(s, a.clobbers); + print_string(s, a.clobbers, ast::CookedStr); pclose(s); } ast::ExprMac(ref m) => print_mac(s, m), @@ -1894,9 +1894,11 @@ pub fn print_view_item(s: @ps, item: &ast::view_item) { ast::view_item_extern_mod(id, ref optional_path, ref mta, _) => { head(s, "extern mod"); print_ident(s, id); - for p in optional_path.iter() { + for &(ref p, style) in optional_path.iter() { + space(s.s); word(s.s, "="); - print_string(s, *p); + space(s.s); + print_string(s, *p, style); } if !mta.is_empty() { popen(s); @@ -2058,7 +2060,7 @@ pub fn print_literal(s: @ps, lit: &ast::lit) { _ => () } match lit.node { - ast::lit_str(st) => print_string(s, st), + ast::lit_str(st, style) => print_string(s, st, style), ast::lit_char(ch) => { let mut res = ~"'"; do char::from_u32(ch).unwrap().escape_default |c| { @@ -2178,10 +2180,13 @@ pub fn print_comment(s: @ps, cmnt: &comments::cmnt) { } } -pub fn print_string(s: @ps, st: &str) { - word(s.s, "\""); - word(s.s, st.escape_default()); - word(s.s, "\""); +pub fn print_string(s: @ps, st: &str, style: ast::StrStyle) { + let st = match style { + ast::CookedStr => format!("\"{}\"", st.escape_default()), + ast::RawStr(n) => format!("r{delim}\"{string}\"{delim}", + delim="#".repeat(n), string=st) + }; + word(s.s, st); } pub fn to_str(t: &T, f: &fn(@ps, &T), intr: @ident_interner) -> ~str { diff --git a/src/test/compile-fail/raw-str-delim.rs b/src/test/compile-fail/raw-str-delim.rs new file mode 100644 index 0000000000000..83afb33b641cf --- /dev/null +++ b/src/test/compile-fail/raw-str-delim.rs @@ -0,0 +1,13 @@ +// Copyright 2013 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +static s: &'static str = + r#x"#"x# //~ ERROR only `#` is allowed in raw string delimitation; found illegal character +; diff --git a/src/test/compile-fail/raw-str-unbalanced.rs b/src/test/compile-fail/raw-str-unbalanced.rs new file mode 100644 index 0000000000000..3e161041711db --- /dev/null +++ b/src/test/compile-fail/raw-str-unbalanced.rs @@ -0,0 +1,14 @@ +// Copyright 2013 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +static s: &'static str = + r#" + "## //~ ERROR expected `;` but found `#` +; diff --git a/src/test/compile-fail/raw-str-unterminated.rs b/src/test/compile-fail/raw-str-unterminated.rs new file mode 100644 index 0000000000000..4151cf32346ce --- /dev/null +++ b/src/test/compile-fail/raw-str-unterminated.rs @@ -0,0 +1,14 @@ +// Copyright 2013 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +static s: &'static str = + r#" string literal goes on + and on + //~^^ ERROR unterminated raw string diff --git a/src/test/pretty/raw-str-nonexpr.rs b/src/test/pretty/raw-str-nonexpr.rs new file mode 100644 index 0000000000000..d7960a7ea8040 --- /dev/null +++ b/src/test/pretty/raw-str-nonexpr.rs @@ -0,0 +1,16 @@ +// Copyright 2012 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +// pp-exact + +#[cfg = r#"just parse this"#] +extern mod blah = r##"blah"##; + +fn main() { unsafe { asm!(r###"blah"###); } } diff --git a/src/test/run-pass/raw-str.rs b/src/test/run-pass/raw-str.rs new file mode 100644 index 0000000000000..bfe5326043983 Binary files /dev/null and b/src/test/run-pass/raw-str.rs differ