]> git.lizzy.rs Git - rust.git/commitdiff
syntax: Handle \r\n in byte string literals
authorAlex Crichton <alex@alexcrichton.com>
Tue, 5 Aug 2014 22:13:57 +0000 (15:13 -0700)
committerAlex Crichton <alex@alexcrichton.com>
Wed, 6 Aug 2014 00:02:55 +0000 (17:02 -0700)
This ended up passing through the lexer but dying later on in parsing when it
wasn't handled. The strategy taken was to copy the `str_lit` funciton, but adapt
it for bytes.

Closes #16278

src/libsyntax/parse/mod.rs
src/test/run-pass/.gitattributes
src/test/run-pass/issue-16278.rs [new file with mode: 0644]

index 8f960e37de2e74f062c2f20f8b7df32353a277d9..5b70ed609d98f05ff06ca31cb2901a57107a1796 100644 (file)
@@ -21,6 +21,7 @@
 use std::io::File;
 use std::rc::Rc;
 use std::str;
+use std::iter;
 
 pub mod lexer;
 pub mod parser;
@@ -327,7 +328,7 @@ pub fn str_lit(lit: &str) -> String {
     let error = |i| format!("lexer should have rejected {} at {}", lit, i);
 
     /// Eat everything up to a non-whitespace
-    fn eat<'a>(it: &mut ::std::iter::Peekable<(uint, char), ::std::str::CharOffsets<'a>>) {
+    fn eat<'a>(it: &mut iter::Peekable<(uint, char), str::CharOffsets<'a>>) {
         loop {
             match it.peek().map(|x| x.val1()) {
                 Some(' ') | Some('\n') | Some('\r') | Some('\t') => {
@@ -471,35 +472,54 @@ pub fn binary_lit(lit: &str) -> Rc<Vec<u8>> {
     // FIXME #8372: This could be a for-loop if it didn't borrow the iterator
     let error = |i| format!("lexer should have rejected {} at {}", lit, i);
 
+    /// Eat everything up to a non-whitespace
+    fn eat<'a, I: Iterator<(uint, u8)>>(it: &mut iter::Peekable<(uint, u8), I>) {
+        loop {
+            match it.peek().map(|x| x.val1()) {
+                Some(b' ') | Some(b'\n') | Some(b'\r') | Some(b'\t') => {
+                    it.next();
+                },
+                _ => { break; }
+            }
+        }
+    }
+
     // binary literals *must* be ASCII, but the escapes don't have to be
-    let mut chars = lit.as_bytes().iter().enumerate().peekable();
+    let mut chars = lit.bytes().enumerate().peekable();
     loop {
         match chars.next() {
-            Some((i, &c)) => {
-                if c == b'\\' {
-                    if *chars.peek().expect(error(i).as_slice()).val1() == b'\n' {
-                        loop {
-                            // eat everything up to a non-whitespace
-                            match chars.peek().map(|x| *x.val1()) {
-                                Some(b' ') | Some(b'\n') | Some(b'\r') | Some(b'\t') => {
-                                    chars.next();
-                                },
-                                _ => { break; }
-                            }
+            Some((i, b'\\')) => {
+                let em = error(i);
+                match chars.peek().expect(em.as_slice()).val1() {
+                    b'\n' => eat(&mut chars),
+                    b'\r' => {
+                        chars.next();
+                        if chars.peek().expect(em.as_slice()).val1() != b'\n' {
+                            fail!("lexer accepted bare CR");
                         }
-                    } else {
+                        eat(&mut chars);
+                    }
+                    _ => {
                         // otherwise, a normal escape
                         let (c, n) = byte_lit(lit.slice_from(i));
-                        for _ in range(0, n - 1) { // we don't need to move past the first \
+                        // we don't need to move past the first \
+                        for _ in range(0, n - 1) {
                             chars.next();
                         }
                         res.push(c);
                     }
-                } else {
-                    res.push(c);
                 }
             },
-            None => { break; }
+            Some((i, b'\r')) => {
+                let em = error(i);
+                if chars.peek().expect(em.as_slice()).val1() != b'\n' {
+                    fail!("lexer accepted bare CR");
+                }
+                chars.next();
+                res.push(b'\n');
+            }
+            Some((_, c)) => res.push(c),
+            None => break,
         }
     }
 
index c6a6f23074de0633519ae33822f4a0f18bfb024f..46db548a8c49799dc191fe556631f0aad566bc2a 100644 (file)
@@ -1 +1,2 @@
 lexer-crlf-line-endings-string-literal-doc-comment.rs -text
+issue-16278.rs -text
diff --git a/src/test/run-pass/issue-16278.rs b/src/test/run-pass/issue-16278.rs
new file mode 100644 (file)
index 0000000..f92426d
--- /dev/null
@@ -0,0 +1,20 @@
+// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+// ignore-tidy-cr
+
+// this file has some special \r\n endings (use xxd to see them)
+
+fn main() {assert_eq!(b"", b"\\r
+                                   ");
+assert_eq!(b"\n", b"\r
+");
+}
+