1 // Copyright 2012 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
13 The CodeMap tracks all the source code used within a single crate, mapping
14 from integer byte positions to the original source code location. Each bit of
15 source parsed during crate parsing (typically files, in-memory strings, or
16 various bits of macro expansion) cover a continuous range of bytes in the
17 CodeMap and are represented by FileMaps. Byte positions are stored in `spans`
18 and used pervasively in the compiler. They are absolute positions within the
19 CodeMap, which upon request can be converted to line and column information,
20 source code snippets, etc.
24 use serialize::{Encodable, Decodable, Encoder, Decoder};
25 use std::cell::RefCell;
30 fn from_uint(n: uint) -> Self;
31 fn to_uint(&self) -> uint;
34 /// A byte offset. Keep this small (currently 32-bits), as AST contains
36 #[deriving(Clone, PartialEq, Eq, Hash, PartialOrd, Show)]
37 pub struct BytePos(pub u32);
39 /// A character offset. Because of multibyte utf8 characters, a byte offset
40 /// is not equivalent to a character offset. The CodeMap will convert BytePos
41 /// values to CharPos values as necessary.
42 #[deriving(PartialEq, Hash, PartialOrd, Show)]
43 pub struct CharPos(pub uint);
45 // FIXME: Lots of boilerplate in these impls, but so far my attempts to fix
46 // have been unsuccessful
48 impl Pos for BytePos {
49 fn from_uint(n: uint) -> BytePos { BytePos(n as u32) }
50 fn to_uint(&self) -> uint { let BytePos(n) = *self; n as uint }
53 impl Add<BytePos, BytePos> for BytePos {
54 fn add(&self, rhs: &BytePos) -> BytePos {
55 BytePos((self.to_uint() + rhs.to_uint()) as u32)
59 impl Sub<BytePos, BytePos> for BytePos {
60 fn sub(&self, rhs: &BytePos) -> BytePos {
61 BytePos((self.to_uint() - rhs.to_uint()) as u32)
65 impl Pos for CharPos {
66 fn from_uint(n: uint) -> CharPos { CharPos(n) }
67 fn to_uint(&self) -> uint { let CharPos(n) = *self; n }
70 impl Add<CharPos,CharPos> for CharPos {
71 fn add(&self, rhs: &CharPos) -> CharPos {
72 CharPos(self.to_uint() + rhs.to_uint())
76 impl Sub<CharPos,CharPos> for CharPos {
77 fn sub(&self, rhs: &CharPos) -> CharPos {
78 CharPos(self.to_uint() - rhs.to_uint())
83 Spans represent a region of code, used for error reporting. Positions in spans
84 are *absolute* positions from the beginning of the codemap, not positions
85 relative to FileMaps. Methods on the CodeMap can be used to relate spans back
86 to the original source.
88 #[deriving(Clone, Show, Hash)]
92 /// Information about where the macro came from, if this piece of
93 /// code was created by a macro expansion.
94 pub expn_info: Option<Gc<ExpnInfo>>
97 pub static DUMMY_SP: Span = Span { lo: BytePos(0), hi: BytePos(0), expn_info: None };
99 #[deriving(Clone, PartialEq, Eq, Encodable, Decodable, Hash)]
100 pub struct Spanned<T> {
105 impl PartialEq for Span {
106 fn eq(&self, other: &Span) -> bool {
107 return (*self).lo == (*other).lo && (*self).hi == (*other).hi;
109 fn ne(&self, other: &Span) -> bool { !(*self).eq(other) }
114 impl<S:Encoder<E>, E> Encodable<S, E> for Span {
115 /* Note #1972 -- spans are encoded but not decoded */
116 fn encode(&self, s: &mut S) -> Result<(), E> {
121 impl<D:Decoder<E>, E> Decodable<D, E> for Span {
122 fn decode(_d: &mut D) -> Result<Span, E> {
127 pub fn spanned<T>(lo: BytePos, hi: BytePos, t: T) -> Spanned<T> {
128 respan(mk_sp(lo, hi), t)
131 pub fn respan<T>(sp: Span, t: T) -> Spanned<T> {
132 Spanned {node: t, span: sp}
135 pub fn dummy_spanned<T>(t: T) -> Spanned<T> {
139 /* assuming that we're not in macro expansion */
140 pub fn mk_sp(lo: BytePos, hi: BytePos) -> Span {
141 Span {lo: lo, hi: hi, expn_info: None}
144 /// Return the span itself if it doesn't come from a macro expansion,
145 /// otherwise return the call site span up to the `enclosing_sp` by
146 /// following the `expn_info` chain.
147 pub fn original_sp(sp: Span, enclosing_sp: Span) -> Span {
148 match (sp.expn_info, enclosing_sp.expn_info) {
150 (Some(expn1), Some(expn2)) if expn1.call_site == expn2.call_site => sp,
151 (Some(expn1), _) => original_sp(expn1.call_site, enclosing_sp),
155 /// A source code location used for error reporting
157 /// Information about the original source
158 pub file: Rc<FileMap>,
159 /// The (1-based) line number
161 /// The (0-based) column offset
165 /// A source code location used as the result of lookup_char_pos_adj
166 // Actually, *none* of the clients use the filename *or* file field;
167 // perhaps they should just be removed.
168 pub struct LocWithOpt {
169 pub filename: FileName,
172 pub file: Option<Rc<FileMap>>,
175 // used to be structural records. Better names, anyone?
176 pub struct FileMapAndLine { pub fm: Rc<FileMap>, pub line: uint }
177 pub struct FileMapAndBytePos { pub fm: Rc<FileMap>, pub pos: BytePos }
179 /// The syntax with which a macro was invoked.
180 #[deriving(Clone, Hash, Show)]
181 pub enum MacroFormat {
182 /// e.g. #[deriving(...)] <item>
188 #[deriving(Clone, Hash, Show)]
189 pub struct NameAndSpan {
190 /// The name of the macro that was invoked to create the thing
193 /// The format with which the macro was invoked.
194 pub format: MacroFormat,
195 /// The span of the macro definition itself. The macro may not
196 /// have a sensible definition span (e.g. something defined
197 /// completely inside libsyntax) in which case this is None.
198 pub span: Option<Span>
201 /// Extra information for tracking macro expansion of spans
202 #[deriving(Hash, Show)]
203 pub struct ExpnInfo {
204 /// The location of the actual macro invocation, e.g. `let x =
207 /// This may recursively refer to other macro invocations, e.g. if
208 /// `foo!()` invoked `bar!()` internally, and there was an
209 /// expression inside `bar!`; the call_site of the expression in
210 /// the expansion would point to the `bar!` invocation; that
211 /// call_site span would have its own ExpnInfo, with the call_site
212 /// pointing to the `foo!` invocation.
214 /// Information about the macro and its definition.
216 /// The `callee` of the inner expression in the `call_site`
217 /// example would point to the `macro_rules! bar { ... }` and that
218 /// of the `bar!()` invocation would point to the `macro_rules!
220 pub callee: NameAndSpan
223 pub type FileName = String;
225 pub struct FileLines {
226 pub file: Rc<FileMap>,
230 /// Identifies an offset of a multi-byte character in a FileMap
231 pub struct MultiByteChar {
232 /// The absolute offset of the character in the CodeMap
234 /// The number of bytes, >=2
238 /// A single source in the CodeMap
240 /// The name of the file that the source came from, source that doesn't
241 /// originate from files has names between angle brackets by convention,
244 /// The complete source code
246 /// The start position of this source in the CodeMap
247 pub start_pos: BytePos,
248 /// Locations of lines beginnings in the source code
249 pub lines: RefCell<Vec<BytePos> >,
250 /// Locations of multi-byte characters in the source code
251 pub multibyte_chars: RefCell<Vec<MultiByteChar> >,
255 /// EFFECT: register a start-of-line offset in the
256 /// table of line-beginnings.
257 /// UNCHECKED INVARIANT: these offsets must be added in the right
258 /// order and must be in the right places; there is shared knowledge
259 /// about what ends a line between this file and parse.rs
260 /// WARNING: pos param here is the offset relative to start of CodeMap,
261 /// and CodeMap will append a newline when adding a filemap without a newline at the end,
262 /// so the safe way to call this is with value calculated as
263 /// filemap.start_pos + newline_offset_relative_to_the_start_of_filemap.
264 pub fn next_line(&self, pos: BytePos) {
265 // the new charpos must be > the last one (or it's the first one).
266 let mut lines = self.lines.borrow_mut();;
267 let line_len = lines.len();
268 assert!(line_len == 0 || (*lines.get(line_len - 1) < pos))
272 /// get a line from the list of pre-computed line-beginnings
273 pub fn get_line(&self, line: int) -> String {
274 let mut lines = self.lines.borrow_mut();
275 let begin: BytePos = *lines.get(line as uint) - self.start_pos;
276 let begin = begin.to_uint();
277 let slice = self.src.as_slice().slice_from(begin);
278 match slice.find('\n') {
279 Some(e) => slice.slice_to(e).to_string(),
280 None => slice.to_string()
284 pub fn record_multibyte_char(&self, pos: BytePos, bytes: uint) {
285 assert!(bytes >=2 && bytes <= 4);
286 let mbc = MultiByteChar {
290 self.multibyte_chars.borrow_mut().push(mbc);
293 pub fn is_real_file(&self) -> bool {
294 !(self.name.as_slice().starts_with("<") &&
295 self.name.as_slice().ends_with(">"))
300 pub files: RefCell<Vec<Rc<FileMap>>>
304 pub fn new() -> CodeMap {
306 files: RefCell::new(Vec::new()),
310 pub fn new_filemap(&self, filename: FileName, src: String) -> Rc<FileMap> {
311 let mut files = self.files.borrow_mut();
312 let start_pos = match files.last() {
314 Some(last) => last.start_pos.to_uint() + last.src.len(),
317 // Remove utf-8 BOM if any.
318 // FIXME #12884: no efficient/safe way to remove from the start of a string
319 // and reuse the allocation.
320 let mut src = if src.as_slice().starts_with("\ufeff") {
321 String::from_str(src.as_slice().slice_from(3))
323 String::from_str(src.as_slice())
326 // Append '\n' in case it's not already there.
327 // This is a workaround to prevent CodeMap.lookup_filemap_idx from accidentally
328 // overflowing into the next filemap in case the last byte of span is also the last
329 // byte of filemap, which leads to incorrect results from CodeMap.span_to_*.
330 if src.len() > 0 && !src.as_slice().ends_with("\n") {
334 let filemap = Rc::new(FileMap {
336 src: src.to_string(),
337 start_pos: Pos::from_uint(start_pos),
338 lines: RefCell::new(Vec::new()),
339 multibyte_chars: RefCell::new(Vec::new()),
342 files.push(filemap.clone());
347 pub fn mk_substr_filename(&self, sp: Span) -> String {
348 let pos = self.lookup_char_pos(sp.lo);
349 (format!("<{}:{}:{}>",
352 pos.col.to_uint() + 1)).to_string()
355 /// Lookup source information about a BytePos
356 pub fn lookup_char_pos(&self, pos: BytePos) -> Loc {
360 pub fn lookup_char_pos_adj(&self, pos: BytePos) -> LocWithOpt {
361 let loc = self.lookup_char_pos(pos);
363 filename: loc.file.name.to_string(),
370 pub fn span_to_string(&self, sp: Span) -> String {
371 if self.files.borrow().len() == 0 && sp == DUMMY_SP {
372 return "no-location".to_string();
375 let lo = self.lookup_char_pos_adj(sp.lo);
376 let hi = self.lookup_char_pos_adj(sp.hi);
377 return (format!("{}:{}:{}: {}:{}",
380 lo.col.to_uint() + 1,
382 hi.col.to_uint() + 1)).to_string()
385 pub fn span_to_filename(&self, sp: Span) -> FileName {
386 self.lookup_char_pos(sp.lo).file.name.to_string()
389 pub fn span_to_lines(&self, sp: Span) -> FileLines {
390 let lo = self.lookup_char_pos(sp.lo);
391 let hi = self.lookup_char_pos(sp.hi);
392 let mut lines = Vec::new();
393 for i in range(lo.line - 1u, hi.line as uint) {
396 FileLines {file: lo.file, lines: lines}
399 pub fn span_to_snippet(&self, sp: Span) -> Option<String> {
400 let begin = self.lookup_byte_offset(sp.lo);
401 let end = self.lookup_byte_offset(sp.hi);
403 // FIXME #8256: this used to be an assert but whatever precondition
404 // it's testing isn't true for all spans in the AST, so to allow the
405 // caller to not have to fail (and it can't catch it since the CodeMap
406 // isn't sendable), return None
407 if begin.fm.start_pos != end.fm.start_pos {
410 Some(begin.fm.src.as_slice().slice(begin.pos.to_uint(),
411 end.pos.to_uint()).to_string())
415 pub fn get_filemap(&self, filename: &str) -> Rc<FileMap> {
416 for fm in self.files.borrow().iter() {
417 if filename == fm.name.as_slice() {
421 fail!("asking for {} which we don't know about", filename);
424 pub fn lookup_byte_offset(&self, bpos: BytePos) -> FileMapAndBytePos {
425 let idx = self.lookup_filemap_idx(bpos);
426 let fm = self.files.borrow().get(idx).clone();
427 let offset = bpos - fm.start_pos;
428 FileMapAndBytePos {fm: fm, pos: offset}
431 /// Converts an absolute BytePos to a CharPos relative to the filemap and above.
432 pub fn bytepos_to_file_charpos(&self, bpos: BytePos) -> CharPos {
433 let idx = self.lookup_filemap_idx(bpos);
434 let files = self.files.borrow();
435 let map = files.get(idx);
437 // The number of extra bytes due to multibyte chars in the FileMap
438 let mut total_extra_bytes = 0;
440 for mbc in map.multibyte_chars.borrow().iter() {
441 debug!("{}-byte char at {}", mbc.bytes, mbc.pos);
443 // every character is at least one byte, so we only
444 // count the actual extra bytes.
445 total_extra_bytes += mbc.bytes - 1;
446 // We should never see a byte position in the middle of a
448 assert!(bpos.to_uint() >= mbc.pos.to_uint() + mbc.bytes);
454 assert!(map.start_pos.to_uint() + total_extra_bytes <= bpos.to_uint());
455 CharPos(bpos.to_uint() - map.start_pos.to_uint() - total_extra_bytes)
458 fn lookup_filemap_idx(&self, pos: BytePos) -> uint {
459 let files = self.files.borrow();
461 let len = files.len();
465 let m = (a + b) / 2u;
466 if files.get(m).start_pos > pos {
472 // There can be filemaps with length 0. These have the same start_pos as the previous
473 // filemap, but are not the filemaps we want (because they are length 0, they cannot
474 // contain what we are looking for). So, rewind until we find a useful filemap.
476 let lines = files.get(a).lines.borrow();
482 fail!("position {} does not resolve to a source location", pos.to_uint());
487 fail!("position {} does not resolve to a source location", pos.to_uint())
493 fn lookup_line(&self, pos: BytePos) -> FileMapAndLine {
494 let idx = self.lookup_filemap_idx(pos);
496 let files = self.files.borrow();
497 let f = files.get(idx).clone();
500 let mut lines = f.lines.borrow_mut();
501 let mut b = lines.len();
503 let m = (a + b) / 2u;
504 if *lines.get(m) > pos { b = m; } else { a = m; }
507 FileMapAndLine {fm: f, line: a}
510 fn lookup_pos(&self, pos: BytePos) -> Loc {
511 let FileMapAndLine {fm: f, line: a} = self.lookup_line(pos);
512 let line = a + 1u; // Line numbers start at 1
513 let chpos = self.bytepos_to_file_charpos(pos);
514 let linebpos = *f.lines.borrow().get(a);
515 let linechpos = self.bytepos_to_file_charpos(linebpos);
516 debug!("byte pos {} is on the line at byte pos {}",
518 debug!("char pos {} is on the line at char pos {}",
520 debug!("byte is on line: {}", line);
521 assert!(chpos >= linechpos);
525 col: chpos - linechpos
536 let cm = CodeMap::new();
537 let fm = cm.new_filemap("blork.rs".to_string(),
538 "first line.\nsecond line".to_string());
539 fm.next_line(BytePos(0));
540 assert_eq!(&fm.get_line(0),&"first line.".to_string());
541 // TESTING BROKEN BEHAVIOR:
542 fm.next_line(BytePos(10));
543 assert_eq!(&fm.get_line(1), &".".to_string());
549 let cm = CodeMap::new();
550 let fm = cm.new_filemap("blork.rs".to_string(),
551 "first line.\nsecond line".to_string());
552 // TESTING *REALLY* BROKEN BEHAVIOR:
553 fm.next_line(BytePos(0));
554 fm.next_line(BytePos(10));
555 fm.next_line(BytePos(2));
558 fn init_code_map() -> CodeMap {
559 let cm = CodeMap::new();
560 let fm1 = cm.new_filemap("blork.rs".to_string(),
561 "first line.\nsecond line".to_string());
562 let fm2 = cm.new_filemap("empty.rs".to_string(),
564 let fm3 = cm.new_filemap("blork2.rs".to_string(),
565 "first line.\nsecond line".to_string());
567 fm1.next_line(BytePos(0));
568 fm1.next_line(BytePos(12));
569 fm2.next_line(BytePos(24));
570 fm3.next_line(BytePos(24));
571 fm3.next_line(BytePos(34));
578 // Test lookup_byte_offset
579 let cm = init_code_map();
581 let fmabp1 = cm.lookup_byte_offset(BytePos(22));
582 assert_eq!(fmabp1.fm.name, "blork.rs".to_string());
583 assert_eq!(fmabp1.pos, BytePos(22));
585 let fmabp2 = cm.lookup_byte_offset(BytePos(24));
586 assert_eq!(fmabp2.fm.name, "blork2.rs".to_string());
587 assert_eq!(fmabp2.pos, BytePos(0));
592 // Test bytepos_to_file_charpos
593 let cm = init_code_map();
595 let cp1 = cm.bytepos_to_file_charpos(BytePos(22));
596 assert_eq!(cp1, CharPos(22));
598 let cp2 = cm.bytepos_to_file_charpos(BytePos(24));
599 assert_eq!(cp2, CharPos(0));
604 // Test zero-length filemaps.
605 let cm = init_code_map();
607 let loc1 = cm.lookup_char_pos(BytePos(22));
608 assert_eq!(loc1.file.name, "blork.rs".to_string());
609 assert_eq!(loc1.line, 2);
610 assert_eq!(loc1.col, CharPos(10));
612 let loc2 = cm.lookup_char_pos(BytePos(24));
613 assert_eq!(loc2.file.name, "blork2.rs".to_string());
614 assert_eq!(loc2.line, 1);
615 assert_eq!(loc2.col, CharPos(0));
618 fn init_code_map_mbc() -> CodeMap {
619 let cm = CodeMap::new();
620 // € is a three byte utf8 char.
622 cm.new_filemap("blork.rs".to_string(),
623 "fir€st €€€€ line.\nsecond line".to_string());
624 let fm2 = cm.new_filemap("blork2.rs".to_string(),
625 "first line€€.\n€ second line".to_string());
627 fm1.next_line(BytePos(0));
628 fm1.next_line(BytePos(22));
629 fm2.next_line(BytePos(40));
630 fm2.next_line(BytePos(58));
632 fm1.record_multibyte_char(BytePos(3), 3);
633 fm1.record_multibyte_char(BytePos(9), 3);
634 fm1.record_multibyte_char(BytePos(12), 3);
635 fm1.record_multibyte_char(BytePos(15), 3);
636 fm1.record_multibyte_char(BytePos(18), 3);
637 fm2.record_multibyte_char(BytePos(50), 3);
638 fm2.record_multibyte_char(BytePos(53), 3);
639 fm2.record_multibyte_char(BytePos(58), 3);
646 // Test bytepos_to_file_charpos in the presence of multi-byte chars
647 let cm = init_code_map_mbc();
649 let cp1 = cm.bytepos_to_file_charpos(BytePos(3));
650 assert_eq!(cp1, CharPos(3));
652 let cp2 = cm.bytepos_to_file_charpos(BytePos(6));
653 assert_eq!(cp2, CharPos(4));
655 let cp3 = cm.bytepos_to_file_charpos(BytePos(56));
656 assert_eq!(cp3, CharPos(12));
658 let cp4 = cm.bytepos_to_file_charpos(BytePos(61));
659 assert_eq!(cp4, CharPos(15));
664 // Test span_to_lines for a span ending at the end of filemap
665 let cm = init_code_map();
666 let span = Span {lo: BytePos(12), hi: BytePos(23), expn_info: None};
667 let file_lines = cm.span_to_lines(span);
669 assert_eq!(file_lines.file.name, "blork.rs".to_string());
670 assert_eq!(file_lines.lines.len(), 1);
671 assert_eq!(*file_lines.lines.get(0), 1u);
676 // Test span_to_snippet for a span ending at the end of filemap
677 let cm = init_code_map();
678 let span = Span {lo: BytePos(12), hi: BytePos(23), expn_info: None};
679 let snippet = cm.span_to_snippet(span);
681 assert_eq!(snippet, Some("second line".to_string()));
686 // Test span_to_str for a span ending at the end of filemap
687 let cm = init_code_map();
688 let span = Span {lo: BytePos(12), hi: BytePos(23), expn_info: None};
689 let sstr = cm.span_to_string(span);
691 assert_eq!(sstr, "blork.rs:2:1: 2:12".to_string());