1 // Copyright 2012 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 // ignore-lexer-test FIXME #15679
15 The CodeMap tracks all the source code used within a single crate, mapping
16 from integer byte positions to the original source code location. Each bit of
17 source parsed during crate parsing (typically files, in-memory strings, or
18 various bits of macro expansion) cover a continuous range of bytes in the
19 CodeMap and are represented by FileMaps. Byte positions are stored in `spans`
20 and used pervasively in the compiler. They are absolute positions within the
21 CodeMap, which upon request can be converted to line and column information,
22 source code snippets, etc.
26 use serialize::{Encodable, Decodable, Encoder, Decoder};
27 use std::cell::RefCell;
31 fn from_uint(n: uint) -> Self;
32 fn to_uint(&self) -> uint;
35 /// A byte offset. Keep this small (currently 32-bits), as AST contains
37 #[deriving(Clone, PartialEq, Eq, Hash, PartialOrd, Show)]
38 pub struct BytePos(pub u32);
40 /// A character offset. Because of multibyte utf8 characters, a byte offset
41 /// is not equivalent to a character offset. The CodeMap will convert BytePos
42 /// values to CharPos values as necessary.
43 #[deriving(PartialEq, Hash, PartialOrd, Show)]
44 pub struct CharPos(pub uint);
46 // FIXME: Lots of boilerplate in these impls, but so far my attempts to fix
47 // have been unsuccessful
49 impl Pos for BytePos {
50 fn from_uint(n: uint) -> BytePos { BytePos(n as u32) }
51 fn to_uint(&self) -> uint { let BytePos(n) = *self; n as uint }
54 impl Add<BytePos, BytePos> for BytePos {
55 fn add(&self, rhs: &BytePos) -> BytePos {
56 BytePos((self.to_uint() + rhs.to_uint()) as u32)
60 impl Sub<BytePos, BytePos> for BytePos {
61 fn sub(&self, rhs: &BytePos) -> BytePos {
62 BytePos((self.to_uint() - rhs.to_uint()) as u32)
66 impl Pos for CharPos {
67 fn from_uint(n: uint) -> CharPos { CharPos(n) }
68 fn to_uint(&self) -> uint { let CharPos(n) = *self; n }
71 impl Add<CharPos,CharPos> for CharPos {
72 fn add(&self, rhs: &CharPos) -> CharPos {
73 CharPos(self.to_uint() + rhs.to_uint())
77 impl Sub<CharPos,CharPos> for CharPos {
78 fn sub(&self, rhs: &CharPos) -> CharPos {
79 CharPos(self.to_uint() - rhs.to_uint())
84 Spans represent a region of code, used for error reporting. Positions in spans
85 are *absolute* positions from the beginning of the codemap, not positions
86 relative to FileMaps. Methods on the CodeMap can be used to relate spans back
87 to the original source.
89 #[deriving(Clone, Show, Hash)]
93 /// Information about where the macro came from, if this piece of
94 /// code was created by a macro expansion.
98 pub static DUMMY_SP: Span = Span { lo: BytePos(0), hi: BytePos(0), expn_id: NO_EXPANSION };
100 #[deriving(Clone, PartialEq, Eq, Encodable, Decodable, Hash, Show)]
101 pub struct Spanned<T> {
106 impl PartialEq for Span {
107 fn eq(&self, other: &Span) -> bool {
108 return (*self).lo == (*other).lo && (*self).hi == (*other).hi;
110 fn ne(&self, other: &Span) -> bool { !(*self).eq(other) }
115 impl<S:Encoder<E>, E> Encodable<S, E> for Span {
116 /* Note #1972 -- spans are encoded but not decoded */
117 fn encode(&self, s: &mut S) -> Result<(), E> {
122 impl<D:Decoder<E>, E> Decodable<D, E> for Span {
123 fn decode(_d: &mut D) -> Result<Span, E> {
128 pub fn spanned<T>(lo: BytePos, hi: BytePos, t: T) -> Spanned<T> {
129 respan(mk_sp(lo, hi), t)
132 pub fn respan<T>(sp: Span, t: T) -> Spanned<T> {
133 Spanned {node: t, span: sp}
136 pub fn dummy_spanned<T>(t: T) -> Spanned<T> {
140 /* assuming that we're not in macro expansion */
141 pub fn mk_sp(lo: BytePos, hi: BytePos) -> Span {
142 Span {lo: lo, hi: hi, expn_id: NO_EXPANSION}
145 /// Return the span itself if it doesn't come from a macro expansion,
146 /// otherwise return the call site span up to the `enclosing_sp` by
147 /// following the `expn_info` chain.
148 pub fn original_sp(cm: &CodeMap, sp: Span, enclosing_sp: Span) -> Span {
149 let call_site1 = cm.with_expn_info(sp.expn_id, |ei| ei.map(|ei| ei.call_site));
150 let call_site2 = cm.with_expn_info(enclosing_sp.expn_id, |ei| ei.map(|ei| ei.call_site));
151 match (call_site1, call_site2) {
153 (Some(call_site1), Some(call_site2)) if call_site1 == call_site2 => sp,
154 (Some(call_site1), _) => original_sp(cm, call_site1, enclosing_sp),
158 /// A source code location used for error reporting
160 /// Information about the original source
161 pub file: Rc<FileMap>,
162 /// The (1-based) line number
164 /// The (0-based) column offset
168 /// A source code location used as the result of lookup_char_pos_adj
169 // Actually, *none* of the clients use the filename *or* file field;
170 // perhaps they should just be removed.
171 pub struct LocWithOpt {
172 pub filename: FileName,
175 pub file: Option<Rc<FileMap>>,
178 // used to be structural records. Better names, anyone?
179 pub struct FileMapAndLine { pub fm: Rc<FileMap>, pub line: uint }
180 pub struct FileMapAndBytePos { pub fm: Rc<FileMap>, pub pos: BytePos }
182 /// The syntax with which a macro was invoked.
183 #[deriving(Clone, Hash, Show)]
184 pub enum MacroFormat {
185 /// e.g. #[deriving(...)] <item>
191 #[deriving(Clone, Hash, Show)]
192 pub struct NameAndSpan {
193 /// The name of the macro that was invoked to create the thing
196 /// The format with which the macro was invoked.
197 pub format: MacroFormat,
198 /// The span of the macro definition itself. The macro may not
199 /// have a sensible definition span (e.g. something defined
200 /// completely inside libsyntax) in which case this is None.
201 pub span: Option<Span>
204 /// Extra information for tracking macro expansion of spans
205 #[deriving(Hash, Show)]
206 pub struct ExpnInfo {
207 /// The location of the actual macro invocation, e.g. `let x =
210 /// This may recursively refer to other macro invocations, e.g. if
211 /// `foo!()` invoked `bar!()` internally, and there was an
212 /// expression inside `bar!`; the call_site of the expression in
213 /// the expansion would point to the `bar!` invocation; that
214 /// call_site span would have its own ExpnInfo, with the call_site
215 /// pointing to the `foo!` invocation.
217 /// Information about the macro and its definition.
219 /// The `callee` of the inner expression in the `call_site`
220 /// example would point to the `macro_rules! bar { ... }` and that
221 /// of the `bar!()` invocation would point to the `macro_rules!
223 pub callee: NameAndSpan
226 #[deriving(PartialEq, Eq, Clone, Show, Hash)]
227 pub struct ExpnId(u32);
229 pub static NO_EXPANSION: ExpnId = ExpnId(-1);
231 pub type FileName = String;
233 pub struct FileLines {
234 pub file: Rc<FileMap>,
238 /// Identifies an offset of a multi-byte character in a FileMap
239 pub struct MultiByteChar {
240 /// The absolute offset of the character in the CodeMap
242 /// The number of bytes, >=2
246 /// A single source in the CodeMap
248 /// The name of the file that the source came from, source that doesn't
249 /// originate from files has names between angle brackets by convention,
252 /// The complete source code
254 /// The start position of this source in the CodeMap
255 pub start_pos: BytePos,
256 /// Locations of lines beginnings in the source code
257 pub lines: RefCell<Vec<BytePos> >,
258 /// Locations of multi-byte characters in the source code
259 pub multibyte_chars: RefCell<Vec<MultiByteChar> >,
263 /// EFFECT: register a start-of-line offset in the
264 /// table of line-beginnings.
265 /// UNCHECKED INVARIANT: these offsets must be added in the right
266 /// order and must be in the right places; there is shared knowledge
267 /// about what ends a line between this file and parse.rs
268 /// WARNING: pos param here is the offset relative to start of CodeMap,
269 /// and CodeMap will append a newline when adding a filemap without a newline at the end,
270 /// so the safe way to call this is with value calculated as
271 /// filemap.start_pos + newline_offset_relative_to_the_start_of_filemap.
272 pub fn next_line(&self, pos: BytePos) {
273 // the new charpos must be > the last one (or it's the first one).
274 let mut lines = self.lines.borrow_mut();;
275 let line_len = lines.len();
276 assert!(line_len == 0 || (*lines.get(line_len - 1) < pos))
280 /// get a line from the list of pre-computed line-beginnings
281 pub fn get_line(&self, line: int) -> String {
282 let mut lines = self.lines.borrow_mut();
283 let begin: BytePos = *lines.get(line as uint) - self.start_pos;
284 let begin = begin.to_uint();
285 let slice = self.src.as_slice().slice_from(begin);
286 match slice.find('\n') {
287 Some(e) => slice.slice_to(e).to_string(),
288 None => slice.to_string()
292 pub fn record_multibyte_char(&self, pos: BytePos, bytes: uint) {
293 assert!(bytes >=2 && bytes <= 4);
294 let mbc = MultiByteChar {
298 self.multibyte_chars.borrow_mut().push(mbc);
301 pub fn is_real_file(&self) -> bool {
302 !(self.name.as_slice().starts_with("<") &&
303 self.name.as_slice().ends_with(">"))
308 pub files: RefCell<Vec<Rc<FileMap>>>,
309 expansions: RefCell<Vec<ExpnInfo>>
313 pub fn new() -> CodeMap {
315 files: RefCell::new(Vec::new()),
316 expansions: RefCell::new(Vec::new()),
320 pub fn new_filemap(&self, filename: FileName, src: String) -> Rc<FileMap> {
321 let mut files = self.files.borrow_mut();
322 let start_pos = match files.last() {
324 Some(last) => last.start_pos.to_uint() + last.src.len(),
327 // Remove utf-8 BOM if any.
328 // FIXME #12884: no efficient/safe way to remove from the start of a string
329 // and reuse the allocation.
330 let mut src = if src.as_slice().starts_with("\ufeff") {
331 String::from_str(src.as_slice().slice_from(3))
333 String::from_str(src.as_slice())
336 // Append '\n' in case it's not already there.
337 // This is a workaround to prevent CodeMap.lookup_filemap_idx from accidentally
338 // overflowing into the next filemap in case the last byte of span is also the last
339 // byte of filemap, which leads to incorrect results from CodeMap.span_to_*.
340 if src.len() > 0 && !src.as_slice().ends_with("\n") {
344 let filemap = Rc::new(FileMap {
346 src: src.to_string(),
347 start_pos: Pos::from_uint(start_pos),
348 lines: RefCell::new(Vec::new()),
349 multibyte_chars: RefCell::new(Vec::new()),
352 files.push(filemap.clone());
357 pub fn mk_substr_filename(&self, sp: Span) -> String {
358 let pos = self.lookup_char_pos(sp.lo);
359 (format!("<{}:{}:{}>",
362 pos.col.to_uint() + 1)).to_string()
365 /// Lookup source information about a BytePos
366 pub fn lookup_char_pos(&self, pos: BytePos) -> Loc {
370 pub fn lookup_char_pos_adj(&self, pos: BytePos) -> LocWithOpt {
371 let loc = self.lookup_char_pos(pos);
373 filename: loc.file.name.to_string(),
380 pub fn span_to_string(&self, sp: Span) -> String {
381 if self.files.borrow().len() == 0 && sp == DUMMY_SP {
382 return "no-location".to_string();
385 let lo = self.lookup_char_pos_adj(sp.lo);
386 let hi = self.lookup_char_pos_adj(sp.hi);
387 return (format!("{}:{}:{}: {}:{}",
390 lo.col.to_uint() + 1,
392 hi.col.to_uint() + 1)).to_string()
395 pub fn span_to_filename(&self, sp: Span) -> FileName {
396 self.lookup_char_pos(sp.lo).file.name.to_string()
399 pub fn span_to_lines(&self, sp: Span) -> FileLines {
400 let lo = self.lookup_char_pos(sp.lo);
401 let hi = self.lookup_char_pos(sp.hi);
402 let mut lines = Vec::new();
403 for i in range(lo.line - 1u, hi.line as uint) {
406 FileLines {file: lo.file, lines: lines}
409 pub fn span_to_snippet(&self, sp: Span) -> Option<String> {
410 let begin = self.lookup_byte_offset(sp.lo);
411 let end = self.lookup_byte_offset(sp.hi);
413 // FIXME #8256: this used to be an assert but whatever precondition
414 // it's testing isn't true for all spans in the AST, so to allow the
415 // caller to not have to fail (and it can't catch it since the CodeMap
416 // isn't sendable), return None
417 if begin.fm.start_pos != end.fm.start_pos {
420 Some(begin.fm.src.as_slice().slice(begin.pos.to_uint(),
421 end.pos.to_uint()).to_string())
425 pub fn get_filemap(&self, filename: &str) -> Rc<FileMap> {
426 for fm in self.files.borrow().iter() {
427 if filename == fm.name.as_slice() {
431 fail!("asking for {} which we don't know about", filename);
434 pub fn lookup_byte_offset(&self, bpos: BytePos) -> FileMapAndBytePos {
435 let idx = self.lookup_filemap_idx(bpos);
436 let fm = self.files.borrow().get(idx).clone();
437 let offset = bpos - fm.start_pos;
438 FileMapAndBytePos {fm: fm, pos: offset}
441 /// Converts an absolute BytePos to a CharPos relative to the filemap and above.
442 pub fn bytepos_to_file_charpos(&self, bpos: BytePos) -> CharPos {
443 let idx = self.lookup_filemap_idx(bpos);
444 let files = self.files.borrow();
445 let map = files.get(idx);
447 // The number of extra bytes due to multibyte chars in the FileMap
448 let mut total_extra_bytes = 0;
450 for mbc in map.multibyte_chars.borrow().iter() {
451 debug!("{}-byte char at {}", mbc.bytes, mbc.pos);
453 // every character is at least one byte, so we only
454 // count the actual extra bytes.
455 total_extra_bytes += mbc.bytes - 1;
456 // We should never see a byte position in the middle of a
458 assert!(bpos.to_uint() >= mbc.pos.to_uint() + mbc.bytes);
464 assert!(map.start_pos.to_uint() + total_extra_bytes <= bpos.to_uint());
465 CharPos(bpos.to_uint() - map.start_pos.to_uint() - total_extra_bytes)
468 fn lookup_filemap_idx(&self, pos: BytePos) -> uint {
469 let files = self.files.borrow();
471 let len = files.len();
475 let m = (a + b) / 2u;
476 if files.get(m).start_pos > pos {
482 // There can be filemaps with length 0. These have the same start_pos as the previous
483 // filemap, but are not the filemaps we want (because they are length 0, they cannot
484 // contain what we are looking for). So, rewind until we find a useful filemap.
486 let lines = files.get(a).lines.borrow();
492 fail!("position {} does not resolve to a source location", pos.to_uint());
497 fail!("position {} does not resolve to a source location", pos.to_uint())
503 fn lookup_line(&self, pos: BytePos) -> FileMapAndLine {
504 let idx = self.lookup_filemap_idx(pos);
506 let files = self.files.borrow();
507 let f = files.get(idx).clone();
510 let mut lines = f.lines.borrow_mut();
511 let mut b = lines.len();
513 let m = (a + b) / 2u;
514 if *lines.get(m) > pos { b = m; } else { a = m; }
517 FileMapAndLine {fm: f, line: a}
520 fn lookup_pos(&self, pos: BytePos) -> Loc {
521 let FileMapAndLine {fm: f, line: a} = self.lookup_line(pos);
522 let line = a + 1u; // Line numbers start at 1
523 let chpos = self.bytepos_to_file_charpos(pos);
524 let linebpos = *f.lines.borrow().get(a);
525 let linechpos = self.bytepos_to_file_charpos(linebpos);
526 debug!("byte pos {} is on the line at byte pos {}",
528 debug!("char pos {} is on the line at char pos {}",
530 debug!("byte is on line: {}", line);
531 assert!(chpos >= linechpos);
535 col: chpos - linechpos
539 pub fn record_expansion(&self, expn_info: ExpnInfo) -> ExpnId {
540 let mut expansions = self.expansions.borrow_mut();
541 expansions.push(expn_info);
542 ExpnId(expansions.len().to_u32().expect("too many ExpnInfo's!") - 1)
545 pub fn with_expn_info<T>(&self, id: ExpnId, f: |Option<&ExpnInfo>| -> T) -> T {
547 NO_EXPANSION => f(None),
548 ExpnId(i) => f(Some(&(*self.expansions.borrow())[i as uint]))
559 let cm = CodeMap::new();
560 let fm = cm.new_filemap("blork.rs".to_string(),
561 "first line.\nsecond line".to_string());
562 fm.next_line(BytePos(0));
563 assert_eq!(&fm.get_line(0),&"first line.".to_string());
564 // TESTING BROKEN BEHAVIOR:
565 fm.next_line(BytePos(10));
566 assert_eq!(&fm.get_line(1), &".".to_string());
572 let cm = CodeMap::new();
573 let fm = cm.new_filemap("blork.rs".to_string(),
574 "first line.\nsecond line".to_string());
575 // TESTING *REALLY* BROKEN BEHAVIOR:
576 fm.next_line(BytePos(0));
577 fm.next_line(BytePos(10));
578 fm.next_line(BytePos(2));
581 fn init_code_map() -> CodeMap {
582 let cm = CodeMap::new();
583 let fm1 = cm.new_filemap("blork.rs".to_string(),
584 "first line.\nsecond line".to_string());
585 let fm2 = cm.new_filemap("empty.rs".to_string(),
587 let fm3 = cm.new_filemap("blork2.rs".to_string(),
588 "first line.\nsecond line".to_string());
590 fm1.next_line(BytePos(0));
591 fm1.next_line(BytePos(12));
592 fm2.next_line(BytePos(24));
593 fm3.next_line(BytePos(24));
594 fm3.next_line(BytePos(34));
601 // Test lookup_byte_offset
602 let cm = init_code_map();
604 let fmabp1 = cm.lookup_byte_offset(BytePos(22));
605 assert_eq!(fmabp1.fm.name, "blork.rs".to_string());
606 assert_eq!(fmabp1.pos, BytePos(22));
608 let fmabp2 = cm.lookup_byte_offset(BytePos(24));
609 assert_eq!(fmabp2.fm.name, "blork2.rs".to_string());
610 assert_eq!(fmabp2.pos, BytePos(0));
615 // Test bytepos_to_file_charpos
616 let cm = init_code_map();
618 let cp1 = cm.bytepos_to_file_charpos(BytePos(22));
619 assert_eq!(cp1, CharPos(22));
621 let cp2 = cm.bytepos_to_file_charpos(BytePos(24));
622 assert_eq!(cp2, CharPos(0));
627 // Test zero-length filemaps.
628 let cm = init_code_map();
630 let loc1 = cm.lookup_char_pos(BytePos(22));
631 assert_eq!(loc1.file.name, "blork.rs".to_string());
632 assert_eq!(loc1.line, 2);
633 assert_eq!(loc1.col, CharPos(10));
635 let loc2 = cm.lookup_char_pos(BytePos(24));
636 assert_eq!(loc2.file.name, "blork2.rs".to_string());
637 assert_eq!(loc2.line, 1);
638 assert_eq!(loc2.col, CharPos(0));
641 fn init_code_map_mbc() -> CodeMap {
642 let cm = CodeMap::new();
643 // € is a three byte utf8 char.
645 cm.new_filemap("blork.rs".to_string(),
646 "fir€st €€€€ line.\nsecond line".to_string());
647 let fm2 = cm.new_filemap("blork2.rs".to_string(),
648 "first line€€.\n€ second line".to_string());
650 fm1.next_line(BytePos(0));
651 fm1.next_line(BytePos(22));
652 fm2.next_line(BytePos(40));
653 fm2.next_line(BytePos(58));
655 fm1.record_multibyte_char(BytePos(3), 3);
656 fm1.record_multibyte_char(BytePos(9), 3);
657 fm1.record_multibyte_char(BytePos(12), 3);
658 fm1.record_multibyte_char(BytePos(15), 3);
659 fm1.record_multibyte_char(BytePos(18), 3);
660 fm2.record_multibyte_char(BytePos(50), 3);
661 fm2.record_multibyte_char(BytePos(53), 3);
662 fm2.record_multibyte_char(BytePos(58), 3);
669 // Test bytepos_to_file_charpos in the presence of multi-byte chars
670 let cm = init_code_map_mbc();
672 let cp1 = cm.bytepos_to_file_charpos(BytePos(3));
673 assert_eq!(cp1, CharPos(3));
675 let cp2 = cm.bytepos_to_file_charpos(BytePos(6));
676 assert_eq!(cp2, CharPos(4));
678 let cp3 = cm.bytepos_to_file_charpos(BytePos(56));
679 assert_eq!(cp3, CharPos(12));
681 let cp4 = cm.bytepos_to_file_charpos(BytePos(61));
682 assert_eq!(cp4, CharPos(15));
687 // Test span_to_lines for a span ending at the end of filemap
688 let cm = init_code_map();
689 let span = Span {lo: BytePos(12), hi: BytePos(23), expn_id: NO_EXPANSION};
690 let file_lines = cm.span_to_lines(span);
692 assert_eq!(file_lines.file.name, "blork.rs".to_string());
693 assert_eq!(file_lines.lines.len(), 1);
694 assert_eq!(*file_lines.lines.get(0), 1u);
699 // Test span_to_snippet for a span ending at the end of filemap
700 let cm = init_code_map();
701 let span = Span {lo: BytePos(12), hi: BytePos(23), expn_id: NO_EXPANSION};
702 let snippet = cm.span_to_snippet(span);
704 assert_eq!(snippet, Some("second line".to_string()));
709 // Test span_to_str for a span ending at the end of filemap
710 let cm = init_code_map();
711 let span = Span {lo: BytePos(12), hi: BytePos(23), expn_id: NO_EXPANSION};
712 let sstr = cm.span_to_string(span);
714 assert_eq!(sstr, "blork.rs:2:1: 2:12".to_string());