1 // Copyright 2012 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 // ignore-lexer-test FIXME #15679
15 The CodeMap tracks all the source code used within a single crate, mapping
16 from integer byte positions to the original source code location. Each bit of
17 source parsed during crate parsing (typically files, in-memory strings, or
18 various bits of macro expansion) cover a continuous range of bytes in the
19 CodeMap and are represented by FileMaps. Byte positions are stored in `spans`
20 and used pervasively in the compiler. They are absolute positions within the
21 CodeMap, which upon request can be converted to line and column information,
22 source code snippets, etc.
26 use serialize::{Encodable, Decodable, Encoder, Decoder};
27 use std::cell::RefCell;
32 fn from_uint(n: uint) -> Self;
33 fn to_uint(&self) -> uint;
36 /// A byte offset. Keep this small (currently 32-bits), as AST contains
38 #[deriving(Clone, PartialEq, Eq, Hash, PartialOrd, Show)]
39 pub struct BytePos(pub u32);
41 /// A character offset. Because of multibyte utf8 characters, a byte offset
42 /// is not equivalent to a character offset. The CodeMap will convert BytePos
43 /// values to CharPos values as necessary.
44 #[deriving(PartialEq, Hash, PartialOrd, Show)]
45 pub struct CharPos(pub uint);
47 // FIXME: Lots of boilerplate in these impls, but so far my attempts to fix
48 // have been unsuccessful
50 impl Pos for BytePos {
51 fn from_uint(n: uint) -> BytePos { BytePos(n as u32) }
52 fn to_uint(&self) -> uint { let BytePos(n) = *self; n as uint }
55 impl Add<BytePos, BytePos> for BytePos {
56 fn add(&self, rhs: &BytePos) -> BytePos {
57 BytePos((self.to_uint() + rhs.to_uint()) as u32)
61 impl Sub<BytePos, BytePos> for BytePos {
62 fn sub(&self, rhs: &BytePos) -> BytePos {
63 BytePos((self.to_uint() - rhs.to_uint()) as u32)
67 impl Pos for CharPos {
68 fn from_uint(n: uint) -> CharPos { CharPos(n) }
69 fn to_uint(&self) -> uint { let CharPos(n) = *self; n }
72 impl Add<CharPos,CharPos> for CharPos {
73 fn add(&self, rhs: &CharPos) -> CharPos {
74 CharPos(self.to_uint() + rhs.to_uint())
78 impl Sub<CharPos,CharPos> for CharPos {
79 fn sub(&self, rhs: &CharPos) -> CharPos {
80 CharPos(self.to_uint() - rhs.to_uint())
85 Spans represent a region of code, used for error reporting. Positions in spans
86 are *absolute* positions from the beginning of the codemap, not positions
87 relative to FileMaps. Methods on the CodeMap can be used to relate spans back
88 to the original source.
90 #[deriving(Clone, Show, Hash)]
94 /// Information about where the macro came from, if this piece of
95 /// code was created by a macro expansion.
99 pub static DUMMY_SP: Span = Span { lo: BytePos(0), hi: BytePos(0), expn_id: NO_EXPANSION };
101 #[deriving(Clone, PartialEq, Eq, Encodable, Decodable, Hash, Show)]
102 pub struct Spanned<T> {
107 impl PartialEq for Span {
108 fn eq(&self, other: &Span) -> bool {
109 return (*self).lo == (*other).lo && (*self).hi == (*other).hi;
111 fn ne(&self, other: &Span) -> bool { !(*self).eq(other) }
116 impl<S:Encoder<E>, E> Encodable<S, E> for Span {
117 /* Note #1972 -- spans are encoded but not decoded */
118 fn encode(&self, s: &mut S) -> Result<(), E> {
123 impl<D:Decoder<E>, E> Decodable<D, E> for Span {
124 fn decode(_d: &mut D) -> Result<Span, E> {
129 pub fn spanned<T>(lo: BytePos, hi: BytePos, t: T) -> Spanned<T> {
130 respan(mk_sp(lo, hi), t)
133 pub fn respan<T>(sp: Span, t: T) -> Spanned<T> {
134 Spanned {node: t, span: sp}
137 pub fn dummy_spanned<T>(t: T) -> Spanned<T> {
141 /* assuming that we're not in macro expansion */
142 pub fn mk_sp(lo: BytePos, hi: BytePos) -> Span {
143 Span {lo: lo, hi: hi, expn_id: NO_EXPANSION}
146 /// Return the span itself if it doesn't come from a macro expansion,
147 /// otherwise return the call site span up to the `enclosing_sp` by
148 /// following the `expn_info` chain.
149 pub fn original_sp(cm: &CodeMap, sp: Span, enclosing_sp: Span) -> Span {
150 let call_site1 = cm.with_expn_info(sp.expn_id, |ei| ei.map(|ei| ei.call_site));
151 let call_site2 = cm.with_expn_info(enclosing_sp.expn_id, |ei| ei.map(|ei| ei.call_site));
152 match (call_site1, call_site2) {
154 (Some(call_site1), Some(call_site2)) if call_site1 == call_site2 => sp,
155 (Some(call_site1), _) => original_sp(cm, call_site1, enclosing_sp),
159 /// A source code location used for error reporting
161 /// Information about the original source
162 pub file: Rc<FileMap>,
163 /// The (1-based) line number
165 /// The (0-based) column offset
169 /// A source code location used as the result of lookup_char_pos_adj
170 // Actually, *none* of the clients use the filename *or* file field;
171 // perhaps they should just be removed.
172 pub struct LocWithOpt {
173 pub filename: FileName,
176 pub file: Option<Rc<FileMap>>,
179 // used to be structural records. Better names, anyone?
180 pub struct FileMapAndLine { pub fm: Rc<FileMap>, pub line: uint }
181 pub struct FileMapAndBytePos { pub fm: Rc<FileMap>, pub pos: BytePos }
183 /// The syntax with which a macro was invoked.
184 #[deriving(Clone, Hash, Show)]
185 pub enum MacroFormat {
186 /// e.g. #[deriving(...)] <item>
192 #[deriving(Clone, Hash, Show)]
193 pub struct NameAndSpan {
194 /// The name of the macro that was invoked to create the thing
197 /// The format with which the macro was invoked.
198 pub format: MacroFormat,
199 /// The span of the macro definition itself. The macro may not
200 /// have a sensible definition span (e.g. something defined
201 /// completely inside libsyntax) in which case this is None.
202 pub span: Option<Span>
205 /// Extra information for tracking macro expansion of spans
206 #[deriving(Hash, Show)]
207 pub struct ExpnInfo {
208 /// The location of the actual macro invocation, e.g. `let x =
211 /// This may recursively refer to other macro invocations, e.g. if
212 /// `foo!()` invoked `bar!()` internally, and there was an
213 /// expression inside `bar!`; the call_site of the expression in
214 /// the expansion would point to the `bar!` invocation; that
215 /// call_site span would have its own ExpnInfo, with the call_site
216 /// pointing to the `foo!` invocation.
218 /// Information about the macro and its definition.
220 /// The `callee` of the inner expression in the `call_site`
221 /// example would point to the `macro_rules! bar { ... }` and that
222 /// of the `bar!()` invocation would point to the `macro_rules!
224 pub callee: NameAndSpan
227 #[deriving(PartialEq, Eq, Clone, Show, Hash, Encodable, Decodable)]
228 pub struct ExpnId(u32);
230 pub static NO_EXPANSION: ExpnId = ExpnId(-1);
233 pub fn from_llvm_cookie(cookie: c_uint) -> ExpnId {
234 ExpnId(cookie as u32)
237 pub fn to_llvm_cookie(self) -> i32 {
238 let ExpnId(cookie) = self;
243 pub type FileName = String;
245 pub struct FileLines {
246 pub file: Rc<FileMap>,
250 /// Identifies an offset of a multi-byte character in a FileMap
251 pub struct MultiByteChar {
252 /// The absolute offset of the character in the CodeMap
254 /// The number of bytes, >=2
258 /// A single source in the CodeMap
260 /// The name of the file that the source came from, source that doesn't
261 /// originate from files has names between angle brackets by convention,
264 /// The complete source code
266 /// The start position of this source in the CodeMap
267 pub start_pos: BytePos,
268 /// Locations of lines beginnings in the source code
269 pub lines: RefCell<Vec<BytePos> >,
270 /// Locations of multi-byte characters in the source code
271 pub multibyte_chars: RefCell<Vec<MultiByteChar> >,
275 /// EFFECT: register a start-of-line offset in the
276 /// table of line-beginnings.
277 /// UNCHECKED INVARIANT: these offsets must be added in the right
278 /// order and must be in the right places; there is shared knowledge
279 /// about what ends a line between this file and parse.rs
280 /// WARNING: pos param here is the offset relative to start of CodeMap,
281 /// and CodeMap will append a newline when adding a filemap without a newline at the end,
282 /// so the safe way to call this is with value calculated as
283 /// filemap.start_pos + newline_offset_relative_to_the_start_of_filemap.
284 pub fn next_line(&self, pos: BytePos) {
285 // the new charpos must be > the last one (or it's the first one).
286 let mut lines = self.lines.borrow_mut();;
287 let line_len = lines.len();
288 assert!(line_len == 0 || (*lines.get(line_len - 1) < pos))
292 /// get a line from the list of pre-computed line-beginnings
294 /// NOTE(stage0, pcwalton): Remove `#[allow(unused_mut)]` after snapshot.
296 pub fn get_line(&self, line: int) -> String {
297 let mut lines = self.lines.borrow_mut();
298 let begin: BytePos = *lines.get(line as uint) - self.start_pos;
299 let begin = begin.to_uint();
300 let slice = self.src.as_slice().slice_from(begin);
301 match slice.find('\n') {
302 Some(e) => slice.slice_to(e).to_string(),
303 None => slice.to_string()
307 pub fn record_multibyte_char(&self, pos: BytePos, bytes: uint) {
308 assert!(bytes >=2 && bytes <= 4);
309 let mbc = MultiByteChar {
313 self.multibyte_chars.borrow_mut().push(mbc);
316 pub fn is_real_file(&self) -> bool {
317 !(self.name.as_slice().starts_with("<") &&
318 self.name.as_slice().ends_with(">"))
323 pub files: RefCell<Vec<Rc<FileMap>>>,
324 expansions: RefCell<Vec<ExpnInfo>>
328 pub fn new() -> CodeMap {
330 files: RefCell::new(Vec::new()),
331 expansions: RefCell::new(Vec::new()),
335 pub fn new_filemap(&self, filename: FileName, src: String) -> Rc<FileMap> {
336 let mut files = self.files.borrow_mut();
337 let start_pos = match files.last() {
339 Some(last) => last.start_pos.to_uint() + last.src.len(),
342 // Remove utf-8 BOM if any.
343 // FIXME #12884: no efficient/safe way to remove from the start of a string
344 // and reuse the allocation.
345 let mut src = if src.as_slice().starts_with("\ufeff") {
346 String::from_str(src.as_slice().slice_from(3))
348 String::from_str(src.as_slice())
351 // Append '\n' in case it's not already there.
352 // This is a workaround to prevent CodeMap.lookup_filemap_idx from accidentally
353 // overflowing into the next filemap in case the last byte of span is also the last
354 // byte of filemap, which leads to incorrect results from CodeMap.span_to_*.
355 if src.len() > 0 && !src.as_slice().ends_with("\n") {
359 let filemap = Rc::new(FileMap {
361 src: src.to_string(),
362 start_pos: Pos::from_uint(start_pos),
363 lines: RefCell::new(Vec::new()),
364 multibyte_chars: RefCell::new(Vec::new()),
367 files.push(filemap.clone());
372 pub fn mk_substr_filename(&self, sp: Span) -> String {
373 let pos = self.lookup_char_pos(sp.lo);
374 (format!("<{}:{}:{}>",
377 pos.col.to_uint() + 1)).to_string()
380 /// Lookup source information about a BytePos
381 pub fn lookup_char_pos(&self, pos: BytePos) -> Loc {
385 pub fn lookup_char_pos_adj(&self, pos: BytePos) -> LocWithOpt {
386 let loc = self.lookup_char_pos(pos);
388 filename: loc.file.name.to_string(),
395 pub fn span_to_string(&self, sp: Span) -> String {
396 if self.files.borrow().len() == 0 && sp == DUMMY_SP {
397 return "no-location".to_string();
400 let lo = self.lookup_char_pos_adj(sp.lo);
401 let hi = self.lookup_char_pos_adj(sp.hi);
402 return (format!("{}:{}:{}: {}:{}",
405 lo.col.to_uint() + 1,
407 hi.col.to_uint() + 1)).to_string()
410 pub fn span_to_filename(&self, sp: Span) -> FileName {
411 self.lookup_char_pos(sp.lo).file.name.to_string()
414 pub fn span_to_lines(&self, sp: Span) -> FileLines {
415 let lo = self.lookup_char_pos(sp.lo);
416 let hi = self.lookup_char_pos(sp.hi);
417 let mut lines = Vec::new();
418 for i in range(lo.line - 1u, hi.line as uint) {
421 FileLines {file: lo.file, lines: lines}
424 pub fn span_to_snippet(&self, sp: Span) -> Option<String> {
425 let begin = self.lookup_byte_offset(sp.lo);
426 let end = self.lookup_byte_offset(sp.hi);
428 // FIXME #8256: this used to be an assert but whatever precondition
429 // it's testing isn't true for all spans in the AST, so to allow the
430 // caller to not have to fail (and it can't catch it since the CodeMap
431 // isn't sendable), return None
432 if begin.fm.start_pos != end.fm.start_pos {
435 Some(begin.fm.src.as_slice().slice(begin.pos.to_uint(),
436 end.pos.to_uint()).to_string())
440 pub fn get_filemap(&self, filename: &str) -> Rc<FileMap> {
441 for fm in self.files.borrow().iter() {
442 if filename == fm.name.as_slice() {
446 fail!("asking for {} which we don't know about", filename);
449 pub fn lookup_byte_offset(&self, bpos: BytePos) -> FileMapAndBytePos {
450 let idx = self.lookup_filemap_idx(bpos);
451 let fm = self.files.borrow().get(idx).clone();
452 let offset = bpos - fm.start_pos;
453 FileMapAndBytePos {fm: fm, pos: offset}
456 /// Converts an absolute BytePos to a CharPos relative to the filemap and above.
457 pub fn bytepos_to_file_charpos(&self, bpos: BytePos) -> CharPos {
458 let idx = self.lookup_filemap_idx(bpos);
459 let files = self.files.borrow();
460 let map = files.get(idx);
462 // The number of extra bytes due to multibyte chars in the FileMap
463 let mut total_extra_bytes = 0;
465 for mbc in map.multibyte_chars.borrow().iter() {
466 debug!("{}-byte char at {}", mbc.bytes, mbc.pos);
468 // every character is at least one byte, so we only
469 // count the actual extra bytes.
470 total_extra_bytes += mbc.bytes - 1;
471 // We should never see a byte position in the middle of a
473 assert!(bpos.to_uint() >= mbc.pos.to_uint() + mbc.bytes);
479 assert!(map.start_pos.to_uint() + total_extra_bytes <= bpos.to_uint());
480 CharPos(bpos.to_uint() - map.start_pos.to_uint() - total_extra_bytes)
483 fn lookup_filemap_idx(&self, pos: BytePos) -> uint {
484 let files = self.files.borrow();
486 let len = files.len();
490 let m = (a + b) / 2u;
491 if files.get(m).start_pos > pos {
497 // There can be filemaps with length 0. These have the same start_pos as the previous
498 // filemap, but are not the filemaps we want (because they are length 0, they cannot
499 // contain what we are looking for). So, rewind until we find a useful filemap.
501 let lines = files.get(a).lines.borrow();
507 fail!("position {} does not resolve to a source location", pos.to_uint());
512 fail!("position {} does not resolve to a source location", pos.to_uint())
518 // NOTE(stage0, pcwalton): Remove `#[allow(unused_mut)]` after snapshot.
520 fn lookup_line(&self, pos: BytePos) -> FileMapAndLine {
521 let idx = self.lookup_filemap_idx(pos);
523 let files = self.files.borrow();
524 let f = files.get(idx).clone();
527 let mut lines = f.lines.borrow_mut();
528 let mut b = lines.len();
530 let m = (a + b) / 2u;
531 if *lines.get(m) > pos { b = m; } else { a = m; }
534 FileMapAndLine {fm: f, line: a}
537 fn lookup_pos(&self, pos: BytePos) -> Loc {
538 let FileMapAndLine {fm: f, line: a} = self.lookup_line(pos);
539 let line = a + 1u; // Line numbers start at 1
540 let chpos = self.bytepos_to_file_charpos(pos);
541 let linebpos = *f.lines.borrow().get(a);
542 let linechpos = self.bytepos_to_file_charpos(linebpos);
543 debug!("byte pos {} is on the line at byte pos {}",
545 debug!("char pos {} is on the line at char pos {}",
547 debug!("byte is on line: {}", line);
548 assert!(chpos >= linechpos);
552 col: chpos - linechpos
556 pub fn record_expansion(&self, expn_info: ExpnInfo) -> ExpnId {
557 let mut expansions = self.expansions.borrow_mut();
558 expansions.push(expn_info);
559 ExpnId(expansions.len().to_u32().expect("too many ExpnInfo's!") - 1)
562 pub fn with_expn_info<T>(&self, id: ExpnId, f: |Option<&ExpnInfo>| -> T) -> T {
564 NO_EXPANSION => f(None),
565 ExpnId(i) => f(Some(&(*self.expansions.borrow())[i as uint]))
576 let cm = CodeMap::new();
577 let fm = cm.new_filemap("blork.rs".to_string(),
578 "first line.\nsecond line".to_string());
579 fm.next_line(BytePos(0));
580 assert_eq!(&fm.get_line(0),&"first line.".to_string());
581 // TESTING BROKEN BEHAVIOR:
582 fm.next_line(BytePos(10));
583 assert_eq!(&fm.get_line(1), &".".to_string());
589 let cm = CodeMap::new();
590 let fm = cm.new_filemap("blork.rs".to_string(),
591 "first line.\nsecond line".to_string());
592 // TESTING *REALLY* BROKEN BEHAVIOR:
593 fm.next_line(BytePos(0));
594 fm.next_line(BytePos(10));
595 fm.next_line(BytePos(2));
598 fn init_code_map() -> CodeMap {
599 let cm = CodeMap::new();
600 let fm1 = cm.new_filemap("blork.rs".to_string(),
601 "first line.\nsecond line".to_string());
602 let fm2 = cm.new_filemap("empty.rs".to_string(),
604 let fm3 = cm.new_filemap("blork2.rs".to_string(),
605 "first line.\nsecond line".to_string());
607 fm1.next_line(BytePos(0));
608 fm1.next_line(BytePos(12));
609 fm2.next_line(BytePos(24));
610 fm3.next_line(BytePos(24));
611 fm3.next_line(BytePos(34));
618 // Test lookup_byte_offset
619 let cm = init_code_map();
621 let fmabp1 = cm.lookup_byte_offset(BytePos(22));
622 assert_eq!(fmabp1.fm.name, "blork.rs".to_string());
623 assert_eq!(fmabp1.pos, BytePos(22));
625 let fmabp2 = cm.lookup_byte_offset(BytePos(24));
626 assert_eq!(fmabp2.fm.name, "blork2.rs".to_string());
627 assert_eq!(fmabp2.pos, BytePos(0));
632 // Test bytepos_to_file_charpos
633 let cm = init_code_map();
635 let cp1 = cm.bytepos_to_file_charpos(BytePos(22));
636 assert_eq!(cp1, CharPos(22));
638 let cp2 = cm.bytepos_to_file_charpos(BytePos(24));
639 assert_eq!(cp2, CharPos(0));
644 // Test zero-length filemaps.
645 let cm = init_code_map();
647 let loc1 = cm.lookup_char_pos(BytePos(22));
648 assert_eq!(loc1.file.name, "blork.rs".to_string());
649 assert_eq!(loc1.line, 2);
650 assert_eq!(loc1.col, CharPos(10));
652 let loc2 = cm.lookup_char_pos(BytePos(24));
653 assert_eq!(loc2.file.name, "blork2.rs".to_string());
654 assert_eq!(loc2.line, 1);
655 assert_eq!(loc2.col, CharPos(0));
658 fn init_code_map_mbc() -> CodeMap {
659 let cm = CodeMap::new();
660 // € is a three byte utf8 char.
662 cm.new_filemap("blork.rs".to_string(),
663 "fir€st €€€€ line.\nsecond line".to_string());
664 let fm2 = cm.new_filemap("blork2.rs".to_string(),
665 "first line€€.\n€ second line".to_string());
667 fm1.next_line(BytePos(0));
668 fm1.next_line(BytePos(22));
669 fm2.next_line(BytePos(40));
670 fm2.next_line(BytePos(58));
672 fm1.record_multibyte_char(BytePos(3), 3);
673 fm1.record_multibyte_char(BytePos(9), 3);
674 fm1.record_multibyte_char(BytePos(12), 3);
675 fm1.record_multibyte_char(BytePos(15), 3);
676 fm1.record_multibyte_char(BytePos(18), 3);
677 fm2.record_multibyte_char(BytePos(50), 3);
678 fm2.record_multibyte_char(BytePos(53), 3);
679 fm2.record_multibyte_char(BytePos(58), 3);
686 // Test bytepos_to_file_charpos in the presence of multi-byte chars
687 let cm = init_code_map_mbc();
689 let cp1 = cm.bytepos_to_file_charpos(BytePos(3));
690 assert_eq!(cp1, CharPos(3));
692 let cp2 = cm.bytepos_to_file_charpos(BytePos(6));
693 assert_eq!(cp2, CharPos(4));
695 let cp3 = cm.bytepos_to_file_charpos(BytePos(56));
696 assert_eq!(cp3, CharPos(12));
698 let cp4 = cm.bytepos_to_file_charpos(BytePos(61));
699 assert_eq!(cp4, CharPos(15));
704 // Test span_to_lines for a span ending at the end of filemap
705 let cm = init_code_map();
706 let span = Span {lo: BytePos(12), hi: BytePos(23), expn_id: NO_EXPANSION};
707 let file_lines = cm.span_to_lines(span);
709 assert_eq!(file_lines.file.name, "blork.rs".to_string());
710 assert_eq!(file_lines.lines.len(), 1);
711 assert_eq!(*file_lines.lines.get(0), 1u);
716 // Test span_to_snippet for a span ending at the end of filemap
717 let cm = init_code_map();
718 let span = Span {lo: BytePos(12), hi: BytePos(23), expn_id: NO_EXPANSION};
719 let snippet = cm.span_to_snippet(span);
721 assert_eq!(snippet, Some("second line".to_string()));
726 // Test span_to_str for a span ending at the end of filemap
727 let cm = init_code_map();
728 let span = Span {lo: BytePos(12), hi: BytePos(23), expn_id: NO_EXPANSION};
729 let sstr = cm.span_to_string(span);
731 assert_eq!(sstr, "blork.rs:2:1: 2:12".to_string());