1 // Copyright 2012 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 // ignore-lexer-test FIXME #15679
15 The CodeMap tracks all the source code used within a single crate, mapping
16 from integer byte positions to the original source code location. Each bit of
17 source parsed during crate parsing (typically files, in-memory strings, or
18 various bits of macro expansion) cover a continuous range of bytes in the
19 CodeMap and are represented by FileMaps. Byte positions are stored in `spans`
20 and used pervasively in the compiler. They are absolute positions within the
21 CodeMap, which upon request can be converted to line and column information,
22 source code snippets, etc.
26 pub use self::MacroFormat::*;
28 use serialize::{Encodable, Decodable, Encoder, Decoder};
29 use std::cell::RefCell;
34 fn from_uint(n: uint) -> Self;
35 fn to_uint(&self) -> uint;
38 /// A byte offset. Keep this small (currently 32-bits), as AST contains
40 #[deriving(Clone, PartialEq, Eq, Hash, PartialOrd, Show)]
41 pub struct BytePos(pub u32);
43 /// A character offset. Because of multibyte utf8 characters, a byte offset
44 /// is not equivalent to a character offset. The CodeMap will convert BytePos
45 /// values to CharPos values as necessary.
46 #[deriving(PartialEq, Hash, PartialOrd, Show)]
47 pub struct CharPos(pub uint);
49 // FIXME: Lots of boilerplate in these impls, but so far my attempts to fix
50 // have been unsuccessful
52 impl Pos for BytePos {
53 fn from_uint(n: uint) -> BytePos { BytePos(n as u32) }
54 fn to_uint(&self) -> uint { let BytePos(n) = *self; n as uint }
57 impl Add<BytePos, BytePos> for BytePos {
58 fn add(&self, rhs: &BytePos) -> BytePos {
59 BytePos((self.to_uint() + rhs.to_uint()) as u32)
63 impl Sub<BytePos, BytePos> for BytePos {
64 fn sub(&self, rhs: &BytePos) -> BytePos {
65 BytePos((self.to_uint() - rhs.to_uint()) as u32)
69 impl Pos for CharPos {
70 fn from_uint(n: uint) -> CharPos { CharPos(n) }
71 fn to_uint(&self) -> uint { let CharPos(n) = *self; n }
74 impl Add<CharPos,CharPos> for CharPos {
75 fn add(&self, rhs: &CharPos) -> CharPos {
76 CharPos(self.to_uint() + rhs.to_uint())
80 impl Sub<CharPos,CharPos> for CharPos {
81 fn sub(&self, rhs: &CharPos) -> CharPos {
82 CharPos(self.to_uint() - rhs.to_uint())
87 Spans represent a region of code, used for error reporting. Positions in spans
88 are *absolute* positions from the beginning of the codemap, not positions
89 relative to FileMaps. Methods on the CodeMap can be used to relate spans back
90 to the original source.
92 #[deriving(Clone, Show, Hash)]
96 /// Information about where the macro came from, if this piece of
97 /// code was created by a macro expansion.
101 pub const DUMMY_SP: Span = Span { lo: BytePos(0), hi: BytePos(0), expn_id: NO_EXPANSION };
103 #[deriving(Clone, PartialEq, Eq, Encodable, Decodable, Hash, Show)]
104 pub struct Spanned<T> {
109 impl PartialEq for Span {
110 fn eq(&self, other: &Span) -> bool {
111 return (*self).lo == (*other).lo && (*self).hi == (*other).hi;
113 fn ne(&self, other: &Span) -> bool { !(*self).eq(other) }
118 impl<S:Encoder<E>, E> Encodable<S, E> for Span {
119 /* Note #1972 -- spans are encoded but not decoded */
120 fn encode(&self, s: &mut S) -> Result<(), E> {
125 impl<D:Decoder<E>, E> Decodable<D, E> for Span {
126 fn decode(_d: &mut D) -> Result<Span, E> {
131 pub fn spanned<T>(lo: BytePos, hi: BytePos, t: T) -> Spanned<T> {
132 respan(mk_sp(lo, hi), t)
135 pub fn respan<T>(sp: Span, t: T) -> Spanned<T> {
136 Spanned {node: t, span: sp}
139 pub fn dummy_spanned<T>(t: T) -> Spanned<T> {
143 /* assuming that we're not in macro expansion */
144 pub fn mk_sp(lo: BytePos, hi: BytePos) -> Span {
145 Span {lo: lo, hi: hi, expn_id: NO_EXPANSION}
148 /// Return the span itself if it doesn't come from a macro expansion,
149 /// otherwise return the call site span up to the `enclosing_sp` by
150 /// following the `expn_info` chain.
151 pub fn original_sp(cm: &CodeMap, sp: Span, enclosing_sp: Span) -> Span {
152 let call_site1 = cm.with_expn_info(sp.expn_id, |ei| ei.map(|ei| ei.call_site));
153 let call_site2 = cm.with_expn_info(enclosing_sp.expn_id, |ei| ei.map(|ei| ei.call_site));
154 match (call_site1, call_site2) {
156 (Some(call_site1), Some(call_site2)) if call_site1 == call_site2 => sp,
157 (Some(call_site1), _) => original_sp(cm, call_site1, enclosing_sp),
161 /// A source code location used for error reporting
163 /// Information about the original source
164 pub file: Rc<FileMap>,
165 /// The (1-based) line number
167 /// The (0-based) column offset
171 /// A source code location used as the result of lookup_char_pos_adj
172 // Actually, *none* of the clients use the filename *or* file field;
173 // perhaps they should just be removed.
174 pub struct LocWithOpt {
175 pub filename: FileName,
178 pub file: Option<Rc<FileMap>>,
181 // used to be structural records. Better names, anyone?
182 pub struct FileMapAndLine { pub fm: Rc<FileMap>, pub line: uint }
183 pub struct FileMapAndBytePos { pub fm: Rc<FileMap>, pub pos: BytePos }
185 /// The syntax with which a macro was invoked.
186 #[deriving(Clone, Hash, Show)]
187 pub enum MacroFormat {
188 /// e.g. #[deriving(...)] <item>
194 #[deriving(Clone, Hash, Show)]
195 pub struct NameAndSpan {
196 /// The name of the macro that was invoked to create the thing
199 /// The format with which the macro was invoked.
200 pub format: MacroFormat,
201 /// The span of the macro definition itself. The macro may not
202 /// have a sensible definition span (e.g. something defined
203 /// completely inside libsyntax) in which case this is None.
204 pub span: Option<Span>
207 /// Extra information for tracking macro expansion of spans
208 #[deriving(Hash, Show)]
209 pub struct ExpnInfo {
210 /// The location of the actual macro invocation, e.g. `let x =
213 /// This may recursively refer to other macro invocations, e.g. if
214 /// `foo!()` invoked `bar!()` internally, and there was an
215 /// expression inside `bar!`; the call_site of the expression in
216 /// the expansion would point to the `bar!` invocation; that
217 /// call_site span would have its own ExpnInfo, with the call_site
218 /// pointing to the `foo!` invocation.
220 /// Information about the macro and its definition.
222 /// The `callee` of the inner expression in the `call_site`
223 /// example would point to the `macro_rules! bar { ... }` and that
224 /// of the `bar!()` invocation would point to the `macro_rules!
226 pub callee: NameAndSpan
229 #[deriving(PartialEq, Eq, Clone, Show, Hash, Encodable, Decodable)]
230 pub struct ExpnId(u32);
232 pub const NO_EXPANSION: ExpnId = ExpnId(-1);
235 pub fn from_llvm_cookie(cookie: c_uint) -> ExpnId {
236 ExpnId(cookie as u32)
239 pub fn to_llvm_cookie(self) -> i32 {
240 let ExpnId(cookie) = self;
245 pub type FileName = String;
247 pub struct FileLines {
248 pub file: Rc<FileMap>,
252 /// Identifies an offset of a multi-byte character in a FileMap
253 pub struct MultiByteChar {
254 /// The absolute offset of the character in the CodeMap
256 /// The number of bytes, >=2
260 /// A single source in the CodeMap
262 /// The name of the file that the source came from, source that doesn't
263 /// originate from files has names between angle brackets by convention,
266 /// The complete source code
268 /// The start position of this source in the CodeMap
269 pub start_pos: BytePos,
270 /// Locations of lines beginnings in the source code
271 pub lines: RefCell<Vec<BytePos> >,
272 /// Locations of multi-byte characters in the source code
273 pub multibyte_chars: RefCell<Vec<MultiByteChar> >,
277 /// EFFECT: register a start-of-line offset in the
278 /// table of line-beginnings.
279 /// UNCHECKED INVARIANT: these offsets must be added in the right
280 /// order and must be in the right places; there is shared knowledge
281 /// about what ends a line between this file and parse.rs
282 /// WARNING: pos param here is the offset relative to start of CodeMap,
283 /// and CodeMap will append a newline when adding a filemap without a newline at the end,
284 /// so the safe way to call this is with value calculated as
285 /// filemap.start_pos + newline_offset_relative_to_the_start_of_filemap.
286 pub fn next_line(&self, pos: BytePos) {
287 // the new charpos must be > the last one (or it's the first one).
288 let mut lines = self.lines.borrow_mut();
289 let line_len = lines.len();
290 assert!(line_len == 0 || ((*lines)[line_len - 1] < pos))
294 /// get a line from the list of pre-computed line-beginnings
296 pub fn get_line(&self, line_number: uint) -> Option<String> {
297 let lines = self.lines.borrow();
298 lines.get(line_number).map(|&line| {
299 let begin: BytePos = line - self.start_pos;
300 let begin = begin.to_uint();
301 let slice = self.src.as_slice().slice_from(begin);
302 match slice.find('\n') {
303 Some(e) => slice.slice_to(e),
309 pub fn record_multibyte_char(&self, pos: BytePos, bytes: uint) {
310 assert!(bytes >=2 && bytes <= 4);
311 let mbc = MultiByteChar {
315 self.multibyte_chars.borrow_mut().push(mbc);
318 pub fn is_real_file(&self) -> bool {
319 !(self.name.as_slice().starts_with("<") &&
320 self.name.as_slice().ends_with(">"))
325 pub files: RefCell<Vec<Rc<FileMap>>>,
326 expansions: RefCell<Vec<ExpnInfo>>
330 pub fn new() -> CodeMap {
332 files: RefCell::new(Vec::new()),
333 expansions: RefCell::new(Vec::new()),
337 pub fn new_filemap(&self, filename: FileName, src: String) -> Rc<FileMap> {
338 let mut files = self.files.borrow_mut();
339 let start_pos = match files.last() {
341 Some(last) => last.start_pos.to_uint() + last.src.len(),
344 // Remove utf-8 BOM if any.
345 // FIXME #12884: no efficient/safe way to remove from the start of a string
346 // and reuse the allocation.
347 let mut src = if src.as_slice().starts_with("\ufeff") {
348 String::from_str(src.as_slice().slice_from(3))
350 String::from_str(src.as_slice())
353 // Append '\n' in case it's not already there.
354 // This is a workaround to prevent CodeMap.lookup_filemap_idx from accidentally
355 // overflowing into the next filemap in case the last byte of span is also the last
356 // byte of filemap, which leads to incorrect results from CodeMap.span_to_*.
357 if src.len() > 0 && !src.as_slice().ends_with("\n") {
361 let filemap = Rc::new(FileMap {
363 src: src.to_string(),
364 start_pos: Pos::from_uint(start_pos),
365 lines: RefCell::new(Vec::new()),
366 multibyte_chars: RefCell::new(Vec::new()),
369 files.push(filemap.clone());
374 pub fn mk_substr_filename(&self, sp: Span) -> String {
375 let pos = self.lookup_char_pos(sp.lo);
376 (format!("<{}:{}:{}>",
379 pos.col.to_uint() + 1)).to_string()
382 /// Lookup source information about a BytePos
383 pub fn lookup_char_pos(&self, pos: BytePos) -> Loc {
387 pub fn lookup_char_pos_adj(&self, pos: BytePos) -> LocWithOpt {
388 let loc = self.lookup_char_pos(pos);
390 filename: loc.file.name.to_string(),
397 pub fn span_to_string(&self, sp: Span) -> String {
398 if self.files.borrow().len() == 0 && sp == DUMMY_SP {
399 return "no-location".to_string();
402 let lo = self.lookup_char_pos_adj(sp.lo);
403 let hi = self.lookup_char_pos_adj(sp.hi);
404 return (format!("{}:{}:{}: {}:{}",
407 lo.col.to_uint() + 1,
409 hi.col.to_uint() + 1)).to_string()
412 pub fn span_to_filename(&self, sp: Span) -> FileName {
413 self.lookup_char_pos(sp.lo).file.name.to_string()
416 pub fn span_to_lines(&self, sp: Span) -> FileLines {
417 let lo = self.lookup_char_pos(sp.lo);
418 let hi = self.lookup_char_pos(sp.hi);
419 let mut lines = Vec::new();
420 for i in range(lo.line - 1u, hi.line as uint) {
423 FileLines {file: lo.file, lines: lines}
426 pub fn span_to_snippet(&self, sp: Span) -> Option<String> {
427 let begin = self.lookup_byte_offset(sp.lo);
428 let end = self.lookup_byte_offset(sp.hi);
430 // FIXME #8256: this used to be an assert but whatever precondition
431 // it's testing isn't true for all spans in the AST, so to allow the
432 // caller to not have to panic (and it can't catch it since the CodeMap
433 // isn't sendable), return None
434 if begin.fm.start_pos != end.fm.start_pos {
437 Some(begin.fm.src.as_slice().slice(begin.pos.to_uint(),
438 end.pos.to_uint()).to_string())
442 pub fn get_filemap(&self, filename: &str) -> Rc<FileMap> {
443 for fm in self.files.borrow().iter() {
444 if filename == fm.name.as_slice() {
448 panic!("asking for {} which we don't know about", filename);
451 pub fn lookup_byte_offset(&self, bpos: BytePos) -> FileMapAndBytePos {
452 let idx = self.lookup_filemap_idx(bpos);
453 let fm = (*self.files.borrow())[idx].clone();
454 let offset = bpos - fm.start_pos;
455 FileMapAndBytePos {fm: fm, pos: offset}
458 /// Converts an absolute BytePos to a CharPos relative to the filemap and above.
459 pub fn bytepos_to_file_charpos(&self, bpos: BytePos) -> CharPos {
460 let idx = self.lookup_filemap_idx(bpos);
461 let files = self.files.borrow();
462 let map = &(*files)[idx];
464 // The number of extra bytes due to multibyte chars in the FileMap
465 let mut total_extra_bytes = 0;
467 for mbc in map.multibyte_chars.borrow().iter() {
468 debug!("{}-byte char at {}", mbc.bytes, mbc.pos);
470 // every character is at least one byte, so we only
471 // count the actual extra bytes.
472 total_extra_bytes += mbc.bytes - 1;
473 // We should never see a byte position in the middle of a
475 assert!(bpos.to_uint() >= mbc.pos.to_uint() + mbc.bytes);
481 assert!(map.start_pos.to_uint() + total_extra_bytes <= bpos.to_uint());
482 CharPos(bpos.to_uint() - map.start_pos.to_uint() - total_extra_bytes)
485 fn lookup_filemap_idx(&self, pos: BytePos) -> uint {
486 let files = self.files.borrow();
488 let len = files.len();
492 let m = (a + b) / 2u;
493 if files[m].start_pos > pos {
499 // There can be filemaps with length 0. These have the same start_pos as
500 // the previous filemap, but are not the filemaps we want (because they
501 // are length 0, they cannot contain what we are looking for). So,
502 // rewind until we find a useful filemap.
504 let lines = files[a].lines.borrow();
510 panic!("position {} does not resolve to a source location",
516 panic!("position {} does not resolve to a source location",
523 fn lookup_line(&self, pos: BytePos) -> FileMapAndLine {
524 let idx = self.lookup_filemap_idx(pos);
526 let files = self.files.borrow();
527 let f = (*files)[idx].clone();
530 let lines = f.lines.borrow();
531 let mut b = lines.len();
533 let m = (a + b) / 2u;
534 if (*lines)[m] > pos { b = m; } else { a = m; }
537 FileMapAndLine {fm: f, line: a}
540 fn lookup_pos(&self, pos: BytePos) -> Loc {
541 let FileMapAndLine {fm: f, line: a} = self.lookup_line(pos);
542 let line = a + 1u; // Line numbers start at 1
543 let chpos = self.bytepos_to_file_charpos(pos);
544 let linebpos = (*f.lines.borrow())[a];
545 let linechpos = self.bytepos_to_file_charpos(linebpos);
546 debug!("byte pos {} is on the line at byte pos {}",
548 debug!("char pos {} is on the line at char pos {}",
550 debug!("byte is on line: {}", line);
551 assert!(chpos >= linechpos);
555 col: chpos - linechpos
559 pub fn record_expansion(&self, expn_info: ExpnInfo) -> ExpnId {
560 let mut expansions = self.expansions.borrow_mut();
561 expansions.push(expn_info);
562 ExpnId(expansions.len().to_u32().expect("too many ExpnInfo's!") - 1)
565 pub fn with_expn_info<T>(&self, id: ExpnId, f: |Option<&ExpnInfo>| -> T) -> T {
567 NO_EXPANSION => f(None),
568 ExpnId(i) => f(Some(&(*self.expansions.borrow())[i as uint]))
579 let cm = CodeMap::new();
580 let fm = cm.new_filemap("blork.rs".to_string(),
581 "first line.\nsecond line".to_string());
582 fm.next_line(BytePos(0));
583 assert_eq!(fm.get_line(0), Some("first line.".to_string()));
584 // TESTING BROKEN BEHAVIOR:
585 fm.next_line(BytePos(10));
586 assert_eq!(fm.get_line(1), Some(".".to_string()));
592 let cm = CodeMap::new();
593 let fm = cm.new_filemap("blork.rs".to_string(),
594 "first line.\nsecond line".to_string());
595 // TESTING *REALLY* BROKEN BEHAVIOR:
596 fm.next_line(BytePos(0));
597 fm.next_line(BytePos(10));
598 fm.next_line(BytePos(2));
601 fn init_code_map() -> CodeMap {
602 let cm = CodeMap::new();
603 let fm1 = cm.new_filemap("blork.rs".to_string(),
604 "first line.\nsecond line".to_string());
605 let fm2 = cm.new_filemap("empty.rs".to_string(),
607 let fm3 = cm.new_filemap("blork2.rs".to_string(),
608 "first line.\nsecond line".to_string());
610 fm1.next_line(BytePos(0));
611 fm1.next_line(BytePos(12));
612 fm2.next_line(BytePos(24));
613 fm3.next_line(BytePos(24));
614 fm3.next_line(BytePos(34));
621 // Test lookup_byte_offset
622 let cm = init_code_map();
624 let fmabp1 = cm.lookup_byte_offset(BytePos(22));
625 assert_eq!(fmabp1.fm.name, "blork.rs".to_string());
626 assert_eq!(fmabp1.pos, BytePos(22));
628 let fmabp2 = cm.lookup_byte_offset(BytePos(24));
629 assert_eq!(fmabp2.fm.name, "blork2.rs".to_string());
630 assert_eq!(fmabp2.pos, BytePos(0));
635 // Test bytepos_to_file_charpos
636 let cm = init_code_map();
638 let cp1 = cm.bytepos_to_file_charpos(BytePos(22));
639 assert_eq!(cp1, CharPos(22));
641 let cp2 = cm.bytepos_to_file_charpos(BytePos(24));
642 assert_eq!(cp2, CharPos(0));
647 // Test zero-length filemaps.
648 let cm = init_code_map();
650 let loc1 = cm.lookup_char_pos(BytePos(22));
651 assert_eq!(loc1.file.name, "blork.rs".to_string());
652 assert_eq!(loc1.line, 2);
653 assert_eq!(loc1.col, CharPos(10));
655 let loc2 = cm.lookup_char_pos(BytePos(24));
656 assert_eq!(loc2.file.name, "blork2.rs".to_string());
657 assert_eq!(loc2.line, 1);
658 assert_eq!(loc2.col, CharPos(0));
661 fn init_code_map_mbc() -> CodeMap {
662 let cm = CodeMap::new();
663 // € is a three byte utf8 char.
665 cm.new_filemap("blork.rs".to_string(),
666 "fir€st €€€€ line.\nsecond line".to_string());
667 let fm2 = cm.new_filemap("blork2.rs".to_string(),
668 "first line€€.\n€ second line".to_string());
670 fm1.next_line(BytePos(0));
671 fm1.next_line(BytePos(22));
672 fm2.next_line(BytePos(40));
673 fm2.next_line(BytePos(58));
675 fm1.record_multibyte_char(BytePos(3), 3);
676 fm1.record_multibyte_char(BytePos(9), 3);
677 fm1.record_multibyte_char(BytePos(12), 3);
678 fm1.record_multibyte_char(BytePos(15), 3);
679 fm1.record_multibyte_char(BytePos(18), 3);
680 fm2.record_multibyte_char(BytePos(50), 3);
681 fm2.record_multibyte_char(BytePos(53), 3);
682 fm2.record_multibyte_char(BytePos(58), 3);
689 // Test bytepos_to_file_charpos in the presence of multi-byte chars
690 let cm = init_code_map_mbc();
692 let cp1 = cm.bytepos_to_file_charpos(BytePos(3));
693 assert_eq!(cp1, CharPos(3));
695 let cp2 = cm.bytepos_to_file_charpos(BytePos(6));
696 assert_eq!(cp2, CharPos(4));
698 let cp3 = cm.bytepos_to_file_charpos(BytePos(56));
699 assert_eq!(cp3, CharPos(12));
701 let cp4 = cm.bytepos_to_file_charpos(BytePos(61));
702 assert_eq!(cp4, CharPos(15));
707 // Test span_to_lines for a span ending at the end of filemap
708 let cm = init_code_map();
709 let span = Span {lo: BytePos(12), hi: BytePos(23), expn_id: NO_EXPANSION};
710 let file_lines = cm.span_to_lines(span);
712 assert_eq!(file_lines.file.name, "blork.rs".to_string());
713 assert_eq!(file_lines.lines.len(), 1);
714 assert_eq!(file_lines.lines[0], 1u);
719 // Test span_to_snippet for a span ending at the end of filemap
720 let cm = init_code_map();
721 let span = Span {lo: BytePos(12), hi: BytePos(23), expn_id: NO_EXPANSION};
722 let snippet = cm.span_to_snippet(span);
724 assert_eq!(snippet, Some("second line".to_string()));
729 // Test span_to_str for a span ending at the end of filemap
730 let cm = init_code_map();
731 let span = Span {lo: BytePos(12), hi: BytePos(23), expn_id: NO_EXPANSION};
732 let sstr = cm.span_to_string(span);
734 assert_eq!(sstr, "blork.rs:2:1: 2:12".to_string());