mod parser;
-mod byte;
-mod byte_string;
-mod char;
mod string;
pub use self::{
- byte::parse_byte_literal,
- byte_string::parse_byte_string_literal,
- char::parse_char_literal,
- parser::{CharComponent, CharComponentKind, StringComponent, StringComponentKind},
- string::parse_string_literal,
+ parser::{StringComponent, StringComponentKind},
+ string::{parse_string_literal, parse_char_literal, parse_byte_literal, parse_byte_string_literal},
};
+++ /dev/null
-use super::parser::Parser;
-use super::CharComponent;
-
-pub fn parse_byte_literal(src: &str) -> ByteComponentIterator {
- ByteComponentIterator {
- parser: Parser::new(src),
- has_closing_quote: false,
- }
-}
-
-pub struct ByteComponentIterator<'a> {
- parser: Parser<'a>,
- pub has_closing_quote: bool,
-}
-
-impl<'a> Iterator for ByteComponentIterator<'a> {
- type Item = CharComponent;
- fn next(&mut self) -> Option<CharComponent> {
- if self.parser.pos == 0 {
- assert!(
- self.parser.advance() == 'b',
- "Byte literal should start with a `b`"
- );
-
- assert!(
- self.parser.advance() == '\'',
- "Byte literal should start with a `b`, followed by a quote"
- );
- }
-
- if let Some(component) = self.parser.parse_char_component() {
- return Some(component);
- }
-
- // We get here when there are no char components left to parse
- if self.parser.peek() == Some('\'') {
- self.parser.advance();
- self.has_closing_quote = true;
- }
-
- assert!(
- self.parser.peek() == None,
- "byte literal should leave no unparsed input: src = {:?}, pos = {}, length = {}",
- self.parser.src,
- self.parser.pos,
- self.parser.src.len()
- );
-
- None
- }
-}
+++ /dev/null
-use super::parser::Parser;
-use super::StringComponent;
-
-pub fn parse_byte_string_literal(src: &str) -> ByteStringComponentIterator {
- ByteStringComponentIterator {
- parser: Parser::new(src),
- has_closing_quote: false,
- }
-}
-
-pub struct ByteStringComponentIterator<'a> {
- parser: Parser<'a>,
- pub has_closing_quote: bool,
-}
-
-impl<'a> Iterator for ByteStringComponentIterator<'a> {
- type Item = StringComponent;
- fn next(&mut self) -> Option<StringComponent> {
- if self.parser.pos == 0 {
- assert!(
- self.parser.advance() == 'b',
- "byte string literal should start with a `b`"
- );
-
- assert!(
- self.parser.advance() == '"',
- "byte string literal should start with a `b`, followed by double quotes"
- );
- }
-
- if let Some(component) = self.parser.parse_string_component() {
- return Some(component);
- }
-
- // We get here when there are no char components left to parse
- if self.parser.peek() == Some('"') {
- self.parser.advance();
- self.has_closing_quote = true;
- }
-
- assert!(
- self.parser.peek() == None,
- "byte string literal should leave no unparsed input: src = {:?}, pos = {}, length = {}",
- self.parser.src,
- self.parser.pos,
- self.parser.src.len()
- );
-
- None
- }
-}
+++ /dev/null
-use super::parser::Parser;
-use super::CharComponent;
-
-pub fn parse_char_literal(src: &str) -> CharComponentIterator {
- CharComponentIterator {
- parser: Parser::new(src),
- has_closing_quote: false,
- }
-}
-
-pub struct CharComponentIterator<'a> {
- parser: Parser<'a>,
- pub has_closing_quote: bool,
-}
-
-impl<'a> Iterator for CharComponentIterator<'a> {
- type Item = CharComponent;
- fn next(&mut self) -> Option<CharComponent> {
- if self.parser.pos == 0 {
- assert!(
- self.parser.advance() == '\'',
- "char literal should start with a quote"
- );
- }
-
- if let Some(component) = self.parser.parse_char_component() {
- return Some(component);
- }
-
- // We get here when there are no char components left to parse
- if self.parser.peek() == Some('\'') {
- self.parser.advance();
- self.has_closing_quote = true;
- }
-
- assert!(
- self.parser.peek() == None,
- "char literal should leave no unparsed input: src = {:?}, pos = {}, length = {}",
- self.parser.src,
- self.parser.pos,
- self.parser.src.len()
- );
-
- None
- }
-}
-
-#[cfg(test)]
-mod tests {
- use rowan::TextRange;
- use crate::string_lexing::{
- CharComponent,
- CharComponentKind::*,
-};
-
- fn parse(src: &str) -> (bool, Vec<CharComponent>) {
- let component_iterator = &mut super::parse_char_literal(src);
- let components: Vec<_> = component_iterator.collect();
- (component_iterator.has_closing_quote, components)
- }
-
- fn unclosed_char_component(src: &str) -> CharComponent {
- let (has_closing_quote, components) = parse(src);
- assert!(!has_closing_quote, "char should not have closing quote");
- assert!(components.len() == 1);
- components[0].clone()
- }
-
- fn closed_char_component(src: &str) -> CharComponent {
- let (has_closing_quote, components) = parse(src);
- assert!(has_closing_quote, "char should have closing quote");
- assert!(
- components.len() == 1,
- "Literal: {}\nComponents: {:#?}",
- src,
- components
- );
- components[0].clone()
- }
-
- fn closed_char_components(src: &str) -> Vec<CharComponent> {
- let (has_closing_quote, components) = parse(src);
- assert!(has_closing_quote, "char should have closing quote");
- components
- }
-
- fn range_closed(src: &str) -> TextRange {
- TextRange::from_to(1.into(), (src.len() as u32 - 1).into())
- }
-
- fn range_unclosed(src: &str) -> TextRange {
- TextRange::from_to(1.into(), (src.len() as u32).into())
- }
-
- #[test]
- fn test_unicode_escapes() {
- let unicode_escapes = &[r"{DEAD}", "{BEEF}", "{FF}", "{}", ""];
- for escape in unicode_escapes {
- let escape_sequence = format!(r"'\u{}'", escape);
- let component = closed_char_component(&escape_sequence);
- let expected_range = range_closed(&escape_sequence);
- assert_eq!(component.kind, UnicodeEscape);
- assert_eq!(component.range, expected_range);
- }
- }
-
- #[test]
- fn test_unicode_escapes_unclosed() {
- let unicode_escapes = &["{DEAD", "{BEEF", "{FF"];
- for escape in unicode_escapes {
- let escape_sequence = format!(r"'\u{}'", escape);
- let component = unclosed_char_component(&escape_sequence);
- let expected_range = range_unclosed(&escape_sequence);
- assert_eq!(component.kind, UnicodeEscape);
- assert_eq!(component.range, expected_range);
- }
- }
-
- #[test]
- fn test_empty_char() {
- let (has_closing_quote, components) = parse("''");
- assert!(has_closing_quote, "char should have closing quote");
- assert!(components.len() == 0);
- }
-
- #[test]
- fn test_unclosed_char() {
- let component = unclosed_char_component("'a");
- assert!(component.kind == CodePoint);
- assert!(component.range == TextRange::from_to(1.into(), 2.into()));
- }
-
- #[test]
- fn test_digit_escapes() {
- let literals = &[r"", r"5", r"55"];
-
- for literal in literals {
- let lit_text = format!(r"'\x{}'", literal);
- let component = closed_char_component(&lit_text);
- assert!(component.kind == AsciiCodeEscape);
- assert!(component.range == range_closed(&lit_text));
- }
-
- // More than 2 digits starts a new codepoint
- let components = closed_char_components(r"'\x555'");
- assert!(components.len() == 2);
- assert!(components[1].kind == CodePoint);
- }
-
- #[test]
- fn test_ascii_escapes() {
- let literals = &[
- r"\'", "\\\"", // equivalent to \"
- r"\n", r"\r", r"\t", r"\\", r"\0",
- ];
-
- for literal in literals {
- let lit_text = format!("'{}'", literal);
- let component = closed_char_component(&lit_text);
- assert!(component.kind == AsciiEscape);
- assert!(component.range == range_closed(&lit_text));
- }
- }
-
- #[test]
- fn test_no_escapes() {
- let literals = &['"', 'n', 'r', 't', '0', 'x', 'u'];
-
- for &literal in literals {
- let lit_text = format!("'{}'", literal);
- let component = closed_char_component(&lit_text);
- assert!(component.kind == CodePoint);
- assert!(component.range == range_closed(&lit_text));
- }
- }
-}
use rowan::{TextRange, TextUnit};
-use self::CharComponentKind::*;
+use self::StringComponentKind::*;
pub struct Parser<'a> {
+ pub(super) quote: u8,
pub(super) src: &'a str,
pub(super) pos: usize,
}
impl<'a> Parser<'a> {
- pub fn new(src: &'a str) -> Parser<'a> {
- Parser { src, pos: 0 }
+ pub fn new(src: &'a str, quote: u8) -> Parser<'a> {
+ Parser { quote, src, pos: 0 }
}
// Utility methods
// Char parsing methods
- fn parse_unicode_escape(&mut self, start: TextUnit) -> CharComponent {
+ fn parse_unicode_escape(&mut self, start: TextUnit) -> StringComponent {
match self.peek() {
Some('{') => {
self.advance();
}
let end = self.get_pos();
- CharComponent::new(TextRange::from_to(start, end), UnicodeEscape)
+ StringComponent::new(TextRange::from_to(start, end), UnicodeEscape)
}
Some(_) | None => {
let end = self.get_pos();
- CharComponent::new(TextRange::from_to(start, end), UnicodeEscape)
+ StringComponent::new(TextRange::from_to(start, end), UnicodeEscape)
}
}
}
- fn parse_ascii_code_escape(&mut self, start: TextUnit) -> CharComponent {
+ fn parse_ascii_code_escape(&mut self, start: TextUnit) -> StringComponent {
let code_start = self.get_pos();
while let Some(next) = self.peek() {
if next == '\'' || (self.get_pos() - code_start == 2.into()) {
}
let end = self.get_pos();
- CharComponent::new(TextRange::from_to(start, end), AsciiCodeEscape)
+ StringComponent::new(TextRange::from_to(start, end), AsciiCodeEscape)
}
- fn parse_escape(&mut self, start: TextUnit) -> CharComponent {
+ fn parse_escape(&mut self, start: TextUnit) -> StringComponent {
if self.peek().is_none() {
- return CharComponent::new(TextRange::from_to(start, start), AsciiEscape);
+ return StringComponent::new(TextRange::from_to(start, start), AsciiEscape);
}
let next = self.advance();
match next {
'x' => self.parse_ascii_code_escape(start),
'u' => self.parse_unicode_escape(start),
- _ => CharComponent::new(range, AsciiEscape),
- }
- }
-
- pub fn parse_char_component(&mut self) -> Option<CharComponent> {
- let next = self.peek()?;
-
- // Ignore character close
- if next == '\'' {
- return None;
- }
-
- let start = self.get_pos();
- self.advance();
-
- if next == '\\' {
- Some(self.parse_escape(start))
- } else {
- let end = self.get_pos();
- Some(CharComponent::new(
- TextRange::from_to(start, end),
- CodePoint,
- ))
+ _ => StringComponent::new(range, AsciiEscape),
}
}
}
}
- pub fn parse_string_component(&mut self) -> Option<StringComponent> {
+ pub fn parse_component(&mut self) -> Option<StringComponent> {
let next = self.peek()?;
// Ignore string close
- if next == '"' {
+ if next == self.quote as char {
return None;
}
if next == '\\' {
// Strings can use `\` to ignore newlines, so we first try to parse one of those
// before falling back to parsing char escapes
- self.parse_ignore_newline(start).or_else(|| {
- let char_component = self.parse_escape(start);
- Some(StringComponent::new(
- char_component.range,
- StringComponentKind::Char(char_component.kind),
- ))
- })
+ if self.quote == b'"' {
+ if let Some(component) = self.parse_ignore_newline(start) {
+ return Some(component);
+ }
+ }
+
+ Some(self.parse_escape(start))
} else {
let end = self.get_pos();
Some(StringComponent::new(
TextRange::from_to(start, end),
- StringComponentKind::Char(CodePoint),
+ CodePoint,
))
}
}
#[derive(Debug, Eq, PartialEq, Clone)]
pub enum StringComponentKind {
IgnoreNewline,
- Char(CharComponentKind),
-}
-
-#[derive(Debug, Eq, PartialEq, Clone)]
-pub struct CharComponent {
- pub range: TextRange,
- pub kind: CharComponentKind,
-}
-
-impl CharComponent {
- fn new(range: TextRange, kind: CharComponentKind) -> CharComponent {
- CharComponent { range, kind }
- }
-}
-
-#[derive(Debug, Eq, PartialEq, Clone)]
-pub enum CharComponentKind {
CodePoint,
AsciiEscape,
AsciiCodeEscape,
-use super::parser::Parser;
-use super::StringComponent;
+use crate::string_lexing::{
+ parser::Parser,
+ StringComponent,
+};
pub fn parse_string_literal(src: &str) -> StringComponentIterator {
StringComponentIterator {
- parser: Parser::new(src),
+ parser: Parser::new(src, b'"'),
has_closing_quote: false,
+ prefix: None,
+ quote: b'"',
+ }
+}
+
+pub fn parse_byte_string_literal(src: &str) -> StringComponentIterator {
+ StringComponentIterator {
+ parser: Parser::new(src, b'"'),
+ has_closing_quote: false,
+ prefix: Some(b'b'),
+ quote: b'"',
+ }
+}
+
+pub fn parse_char_literal(src: &str) -> StringComponentIterator {
+ StringComponentIterator {
+ parser: Parser::new(src, b'\''),
+ has_closing_quote: false,
+ prefix: None,
+ quote: b'\'',
+ }
+}
+
+pub fn parse_byte_literal(src: &str) -> StringComponentIterator {
+ StringComponentIterator {
+ parser: Parser::new(src, b'\''),
+ has_closing_quote: false,
+ prefix: Some(b'b'),
+ quote: b'\'',
}
}
pub struct StringComponentIterator<'a> {
parser: Parser<'a>,
pub has_closing_quote: bool,
+ prefix: Option<u8>,
+ quote: u8,
}
impl<'a> Iterator for StringComponentIterator<'a> {
type Item = StringComponent;
fn next(&mut self) -> Option<StringComponent> {
if self.parser.pos == 0 {
+ if let Some(prefix) = self.prefix {
+ assert!(
+ self.parser.advance() == prefix as char,
+ "literal should start with a {:?}",
+ prefix as char,
+ );
+ }
assert!(
- self.parser.advance() == '"',
- "string literal should start with double quotes"
+ self.parser.advance() == self.quote as char,
+ "literal should start with a {:?}",
+ self.quote as char,
);
}
- if let Some(component) = self.parser.parse_string_component() {
+ if let Some(component) = self.parser.parse_component() {
return Some(component);
}
// We get here when there are no char components left to parse
- if self.parser.peek() == Some('"') {
+ if self.parser.peek() == Some(self.quote as char) {
self.parser.advance();
self.has_closing_quote = true;
}
assert!(
self.parser.peek() == None,
- "string literal should leave no unparsed input: src = {:?}, pos = {}, length = {}",
+ "literal should leave no unparsed input: src = {:?}, pos = {}, length = {}",
self.parser.src,
self.parser.pos,
self.parser.src.len()
None
}
}
+
+#[cfg(test)]
+mod tests {
+ use rowan::TextRange;
+ use crate::string_lexing::{
+ StringComponent,
+ StringComponentKind::*,
+};
+
+ fn parse(src: &str) -> (bool, Vec<StringComponent>) {
+ let component_iterator = &mut super::parse_char_literal(src);
+ let components: Vec<_> = component_iterator.collect();
+ (component_iterator.has_closing_quote, components)
+ }
+
+ fn unclosed_char_component(src: &str) -> StringComponent {
+ let (has_closing_quote, components) = parse(src);
+ assert!(!has_closing_quote, "char should not have closing quote");
+ assert!(components.len() == 1);
+ components[0].clone()
+ }
+
+ fn closed_char_component(src: &str) -> StringComponent {
+ let (has_closing_quote, components) = parse(src);
+ assert!(has_closing_quote, "char should have closing quote");
+ assert!(
+ components.len() == 1,
+ "Literal: {}\nComponents: {:#?}",
+ src,
+ components
+ );
+ components[0].clone()
+ }
+
+ fn closed_char_components(src: &str) -> Vec<StringComponent> {
+ let (has_closing_quote, components) = parse(src);
+ assert!(has_closing_quote, "char should have closing quote");
+ components
+ }
+
+ fn range_closed(src: &str) -> TextRange {
+ TextRange::from_to(1.into(), (src.len() as u32 - 1).into())
+ }
+
+ fn range_unclosed(src: &str) -> TextRange {
+ TextRange::from_to(1.into(), (src.len() as u32).into())
+ }
+
+ #[test]
+ fn test_unicode_escapes() {
+ let unicode_escapes = &[r"{DEAD}", "{BEEF}", "{FF}", "{}", ""];
+ for escape in unicode_escapes {
+ let escape_sequence = format!(r"'\u{}'", escape);
+ let component = closed_char_component(&escape_sequence);
+ let expected_range = range_closed(&escape_sequence);
+ assert_eq!(component.kind, UnicodeEscape);
+ assert_eq!(component.range, expected_range);
+ }
+ }
+
+ #[test]
+ fn test_unicode_escapes_unclosed() {
+ let unicode_escapes = &["{DEAD", "{BEEF", "{FF"];
+ for escape in unicode_escapes {
+ let escape_sequence = format!(r"'\u{}'", escape);
+ let component = unclosed_char_component(&escape_sequence);
+ let expected_range = range_unclosed(&escape_sequence);
+ assert_eq!(component.kind, UnicodeEscape);
+ assert_eq!(component.range, expected_range);
+ }
+ }
+
+ #[test]
+ fn test_empty_char() {
+ let (has_closing_quote, components) = parse("''");
+ assert!(has_closing_quote, "char should have closing quote");
+ assert!(components.len() == 0);
+ }
+
+ #[test]
+ fn test_unclosed_char() {
+ let component = unclosed_char_component("'a");
+ assert!(component.kind == CodePoint);
+ assert!(component.range == TextRange::from_to(1.into(), 2.into()));
+ }
+
+ #[test]
+ fn test_digit_escapes() {
+ let literals = &[r"", r"5", r"55"];
+
+ for literal in literals {
+ let lit_text = format!(r"'\x{}'", literal);
+ let component = closed_char_component(&lit_text);
+ assert!(component.kind == AsciiCodeEscape);
+ assert!(component.range == range_closed(&lit_text));
+ }
+
+ // More than 2 digits starts a new codepoint
+ let components = closed_char_components(r"'\x555'");
+ assert!(components.len() == 2);
+ assert!(components[1].kind == CodePoint);
+ }
+
+ #[test]
+ fn test_ascii_escapes() {
+ let literals = &[
+ r"\'", "\\\"", // equivalent to \"
+ r"\n", r"\r", r"\t", r"\\", r"\0",
+ ];
+
+ for literal in literals {
+ let lit_text = format!("'{}'", literal);
+ let component = closed_char_component(&lit_text);
+ assert!(component.kind == AsciiEscape);
+ assert!(component.range == range_closed(&lit_text));
+ }
+ }
+
+ #[test]
+ fn test_no_escapes() {
+ let literals = &['"', 'n', 'r', 't', '0', 'x', 'u'];
+
+ for &literal in literals {
+ let lit_text = format!("'{}'", literal);
+ let component = closed_char_component(&lit_text);
+ assert!(component.kind == CodePoint);
+ assert!(component.range == range_closed(&lit_text));
+ }
+ }
+}
use crate::{
ast::{self, AstNode},
- string_lexing::{self, CharComponentKind},
+ string_lexing::{self, StringComponentKind},
TextRange,
validation::char,
yellow::{
pub(super) fn validate_byte_component(
text: &str,
- kind: CharComponentKind,
+ kind: StringComponentKind,
range: TextRange,
errors: &mut Vec<SyntaxError>,
) {
- use self::CharComponentKind::*;
+ use self::StringComponentKind::*;
match kind {
AsciiEscape => validate_byte_escape(text, range, errors),
AsciiCodeEscape => validate_byte_code_escape(text, range, errors),
errors.push(SyntaxError::new(ByteOutOfRange, range));
}
}
+ IgnoreNewline => { /* always valid */ }
}
}
let range = component.range + literal_range.start();
match component.kind {
- StringComponentKind::Char(kind) => {
+ StringComponentKind::IgnoreNewline => { /* always valid */ }
+ _ => {
// Chars must escape \t, \n and \r codepoints, but strings don't
let text = &literal_text[component.range];
match text {
"\t" | "\n" | "\r" => { /* always valid */ }
- _ => byte::validate_byte_component(text, kind, range, errors),
+ _ => byte::validate_byte_component(text, component.kind, range, errors),
}
}
- StringComponentKind::IgnoreNewline => { /* always valid */ }
}
}
use crate::{
ast::{self, AstNode},
- string_lexing::{self, CharComponentKind},
+ string_lexing::{self, StringComponentKind},
TextRange,
yellow::{
SyntaxError,
pub(super) fn validate_char_component(
text: &str,
- kind: CharComponentKind,
+ kind: StringComponentKind,
range: TextRange,
errors: &mut Vec<SyntaxError>,
) {
// Validate escapes
- use self::CharComponentKind::*;
+ use self::StringComponentKind::*;
match kind {
AsciiEscape => validate_ascii_escape(text, range, errors),
AsciiCodeEscape => validate_ascii_code_escape(text, range, errors),
errors.push(SyntaxError::new(UnescapedCodepoint, range));
}
}
+ StringComponentKind::IgnoreNewline => { /* always valid */ }
}
}
use crate::{
ast::{self, AstNode},
- string_lexing::{self, StringComponentKind},
+ string_lexing,
yellow::{
SyntaxError,
SyntaxErrorKind::*,
for component in &mut components {
let range = component.range + literal_range.start();
- match component.kind {
- StringComponentKind::Char(kind) => {
- // Chars must escape \t, \n and \r codepoints, but strings don't
- let text = &literal_text[component.range];
- match text {
- "\t" | "\n" | "\r" => { /* always valid */ }
- _ => char::validate_char_component(text, kind, range, errors),
- }
- }
- StringComponentKind::IgnoreNewline => { /* always valid */ }
+ // Chars must escape \t, \n and \r codepoints, but strings don't
+ let text = &literal_text[component.range];
+ match text {
+ "\t" | "\n" | "\r" => { /* always valid */ }
+ _ => char::validate_char_component(text, component.kind, range, errors),
}
}