1 // Copyright 2012 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 // ignore-lexer-test FIXME #15679
15 The CodeMap tracks all the source code used within a single crate, mapping
16 from integer byte positions to the original source code location. Each bit of
17 source parsed during crate parsing (typically files, in-memory strings, or
18 various bits of macro expansion) cover a continuous range of bytes in the
19 CodeMap and are represented by FileMaps. Byte positions are stored in `spans`
20 and used pervasively in the compiler. They are absolute positions within the
21 CodeMap, which upon request can be converted to line and column information,
22 source code snippets, etc.
26 use serialize::{Encodable, Decodable, Encoder, Decoder};
27 use std::cell::RefCell;
32 fn from_uint(n: uint) -> Self;
33 fn to_uint(&self) -> uint;
36 /// A byte offset. Keep this small (currently 32-bits), as AST contains
38 #[deriving(Clone, PartialEq, Eq, Hash, PartialOrd, Show)]
39 pub struct BytePos(pub u32);
41 /// A character offset. Because of multibyte utf8 characters, a byte offset
42 /// is not equivalent to a character offset. The CodeMap will convert BytePos
43 /// values to CharPos values as necessary.
44 #[deriving(PartialEq, Hash, PartialOrd, Show)]
45 pub struct CharPos(pub uint);
47 // FIXME: Lots of boilerplate in these impls, but so far my attempts to fix
48 // have been unsuccessful
50 impl Pos for BytePos {
51 fn from_uint(n: uint) -> BytePos { BytePos(n as u32) }
52 fn to_uint(&self) -> uint { let BytePos(n) = *self; n as uint }
55 impl Add<BytePos, BytePos> for BytePos {
56 fn add(&self, rhs: &BytePos) -> BytePos {
57 BytePos((self.to_uint() + rhs.to_uint()) as u32)
61 impl Sub<BytePos, BytePos> for BytePos {
62 fn sub(&self, rhs: &BytePos) -> BytePos {
63 BytePos((self.to_uint() - rhs.to_uint()) as u32)
67 impl Pos for CharPos {
68 fn from_uint(n: uint) -> CharPos { CharPos(n) }
69 fn to_uint(&self) -> uint { let CharPos(n) = *self; n }
72 impl Add<CharPos,CharPos> for CharPos {
73 fn add(&self, rhs: &CharPos) -> CharPos {
74 CharPos(self.to_uint() + rhs.to_uint())
78 impl Sub<CharPos,CharPos> for CharPos {
79 fn sub(&self, rhs: &CharPos) -> CharPos {
80 CharPos(self.to_uint() - rhs.to_uint())
85 Spans represent a region of code, used for error reporting. Positions in spans
86 are *absolute* positions from the beginning of the codemap, not positions
87 relative to FileMaps. Methods on the CodeMap can be used to relate spans back
88 to the original source.
90 #[deriving(Clone, Show, Hash)]
94 /// Information about where the macro came from, if this piece of
95 /// code was created by a macro expansion.
96 pub expn_info: Option<Gc<ExpnInfo>>
99 pub static DUMMY_SP: Span = Span { lo: BytePos(0), hi: BytePos(0), expn_info: None };
101 #[deriving(Clone, PartialEq, Eq, Encodable, Decodable, Hash, Show)]
102 pub struct Spanned<T> {
107 impl PartialEq for Span {
108 fn eq(&self, other: &Span) -> bool {
109 return (*self).lo == (*other).lo && (*self).hi == (*other).hi;
111 fn ne(&self, other: &Span) -> bool { !(*self).eq(other) }
116 impl<S:Encoder<E>, E> Encodable<S, E> for Span {
117 /* Note #1972 -- spans are encoded but not decoded */
118 fn encode(&self, s: &mut S) -> Result<(), E> {
123 impl<D:Decoder<E>, E> Decodable<D, E> for Span {
124 fn decode(_d: &mut D) -> Result<Span, E> {
129 pub fn spanned<T>(lo: BytePos, hi: BytePos, t: T) -> Spanned<T> {
130 respan(mk_sp(lo, hi), t)
133 pub fn respan<T>(sp: Span, t: T) -> Spanned<T> {
134 Spanned {node: t, span: sp}
137 pub fn dummy_spanned<T>(t: T) -> Spanned<T> {
141 /* assuming that we're not in macro expansion */
142 pub fn mk_sp(lo: BytePos, hi: BytePos) -> Span {
143 Span {lo: lo, hi: hi, expn_info: None}
146 /// Return the span itself if it doesn't come from a macro expansion,
147 /// otherwise return the call site span up to the `enclosing_sp` by
148 /// following the `expn_info` chain.
149 pub fn original_sp(sp: Span, enclosing_sp: Span) -> Span {
150 match (sp.expn_info, enclosing_sp.expn_info) {
152 (Some(expn1), Some(expn2)) if expn1.call_site == expn2.call_site => sp,
153 (Some(expn1), _) => original_sp(expn1.call_site, enclosing_sp),
157 /// A source code location used for error reporting
159 /// Information about the original source
160 pub file: Rc<FileMap>,
161 /// The (1-based) line number
163 /// The (0-based) column offset
167 /// A source code location used as the result of lookup_char_pos_adj
168 // Actually, *none* of the clients use the filename *or* file field;
169 // perhaps they should just be removed.
170 pub struct LocWithOpt {
171 pub filename: FileName,
174 pub file: Option<Rc<FileMap>>,
177 // used to be structural records. Better names, anyone?
178 pub struct FileMapAndLine { pub fm: Rc<FileMap>, pub line: uint }
179 pub struct FileMapAndBytePos { pub fm: Rc<FileMap>, pub pos: BytePos }
181 /// The syntax with which a macro was invoked.
182 #[deriving(Clone, Hash, Show)]
183 pub enum MacroFormat {
184 /// e.g. #[deriving(...)] <item>
190 #[deriving(Clone, Hash, Show)]
191 pub struct NameAndSpan {
192 /// The name of the macro that was invoked to create the thing
195 /// The format with which the macro was invoked.
196 pub format: MacroFormat,
197 /// The span of the macro definition itself. The macro may not
198 /// have a sensible definition span (e.g. something defined
199 /// completely inside libsyntax) in which case this is None.
200 pub span: Option<Span>
203 /// Extra information for tracking macro expansion of spans
204 #[deriving(Hash, Show)]
205 pub struct ExpnInfo {
206 /// The location of the actual macro invocation, e.g. `let x =
209 /// This may recursively refer to other macro invocations, e.g. if
210 /// `foo!()` invoked `bar!()` internally, and there was an
211 /// expression inside `bar!`; the call_site of the expression in
212 /// the expansion would point to the `bar!` invocation; that
213 /// call_site span would have its own ExpnInfo, with the call_site
214 /// pointing to the `foo!` invocation.
216 /// Information about the macro and its definition.
218 /// The `callee` of the inner expression in the `call_site`
219 /// example would point to the `macro_rules! bar { ... }` and that
220 /// of the `bar!()` invocation would point to the `macro_rules!
222 pub callee: NameAndSpan
225 pub type FileName = String;
227 pub struct FileLines {
228 pub file: Rc<FileMap>,
232 /// Identifies an offset of a multi-byte character in a FileMap
233 pub struct MultiByteChar {
234 /// The absolute offset of the character in the CodeMap
236 /// The number of bytes, >=2
240 /// A single source in the CodeMap
242 /// The name of the file that the source came from, source that doesn't
243 /// originate from files has names between angle brackets by convention,
246 /// The complete source code
248 /// The start position of this source in the CodeMap
249 pub start_pos: BytePos,
250 /// Locations of lines beginnings in the source code
251 pub lines: RefCell<Vec<BytePos> >,
252 /// Locations of multi-byte characters in the source code
253 pub multibyte_chars: RefCell<Vec<MultiByteChar> >,
257 /// EFFECT: register a start-of-line offset in the
258 /// table of line-beginnings.
259 /// UNCHECKED INVARIANT: these offsets must be added in the right
260 /// order and must be in the right places; there is shared knowledge
261 /// about what ends a line between this file and parse.rs
262 /// WARNING: pos param here is the offset relative to start of CodeMap,
263 /// and CodeMap will append a newline when adding a filemap without a newline at the end,
264 /// so the safe way to call this is with value calculated as
265 /// filemap.start_pos + newline_offset_relative_to_the_start_of_filemap.
266 pub fn next_line(&self, pos: BytePos) {
267 // the new charpos must be > the last one (or it's the first one).
268 let mut lines = self.lines.borrow_mut();;
269 let line_len = lines.len();
270 assert!(line_len == 0 || (*lines.get(line_len - 1) < pos))
274 /// get a line from the list of pre-computed line-beginnings
275 pub fn get_line(&self, line: int) -> String {
276 let mut lines = self.lines.borrow_mut();
277 let begin: BytePos = *lines.get(line as uint) - self.start_pos;
278 let begin = begin.to_uint();
279 let slice = self.src.as_slice().slice_from(begin);
280 match slice.find('\n') {
281 Some(e) => slice.slice_to(e).to_string(),
282 None => slice.to_string()
286 pub fn record_multibyte_char(&self, pos: BytePos, bytes: uint) {
287 assert!(bytes >=2 && bytes <= 4);
288 let mbc = MultiByteChar {
292 self.multibyte_chars.borrow_mut().push(mbc);
295 pub fn is_real_file(&self) -> bool {
296 !(self.name.as_slice().starts_with("<") &&
297 self.name.as_slice().ends_with(">"))
302 pub files: RefCell<Vec<Rc<FileMap>>>
306 pub fn new() -> CodeMap {
308 files: RefCell::new(Vec::new()),
312 pub fn new_filemap(&self, filename: FileName, src: String) -> Rc<FileMap> {
313 let mut files = self.files.borrow_mut();
314 let start_pos = match files.last() {
316 Some(last) => last.start_pos.to_uint() + last.src.len(),
319 // Remove utf-8 BOM if any.
320 // FIXME #12884: no efficient/safe way to remove from the start of a string
321 // and reuse the allocation.
322 let mut src = if src.as_slice().starts_with("\ufeff") {
323 String::from_str(src.as_slice().slice_from(3))
325 String::from_str(src.as_slice())
328 // Append '\n' in case it's not already there.
329 // This is a workaround to prevent CodeMap.lookup_filemap_idx from accidentally
330 // overflowing into the next filemap in case the last byte of span is also the last
331 // byte of filemap, which leads to incorrect results from CodeMap.span_to_*.
332 if src.len() > 0 && !src.as_slice().ends_with("\n") {
336 let filemap = Rc::new(FileMap {
338 src: src.to_string(),
339 start_pos: Pos::from_uint(start_pos),
340 lines: RefCell::new(Vec::new()),
341 multibyte_chars: RefCell::new(Vec::new()),
344 files.push(filemap.clone());
349 pub fn mk_substr_filename(&self, sp: Span) -> String {
350 let pos = self.lookup_char_pos(sp.lo);
351 (format!("<{}:{}:{}>",
354 pos.col.to_uint() + 1)).to_string()
357 /// Lookup source information about a BytePos
358 pub fn lookup_char_pos(&self, pos: BytePos) -> Loc {
362 pub fn lookup_char_pos_adj(&self, pos: BytePos) -> LocWithOpt {
363 let loc = self.lookup_char_pos(pos);
365 filename: loc.file.name.to_string(),
372 pub fn span_to_string(&self, sp: Span) -> String {
373 if self.files.borrow().len() == 0 && sp == DUMMY_SP {
374 return "no-location".to_string();
377 let lo = self.lookup_char_pos_adj(sp.lo);
378 let hi = self.lookup_char_pos_adj(sp.hi);
379 return (format!("{}:{}:{}: {}:{}",
382 lo.col.to_uint() + 1,
384 hi.col.to_uint() + 1)).to_string()
387 pub fn span_to_filename(&self, sp: Span) -> FileName {
388 self.lookup_char_pos(sp.lo).file.name.to_string()
391 pub fn span_to_lines(&self, sp: Span) -> FileLines {
392 let lo = self.lookup_char_pos(sp.lo);
393 let hi = self.lookup_char_pos(sp.hi);
394 let mut lines = Vec::new();
395 for i in range(lo.line - 1u, hi.line as uint) {
398 FileLines {file: lo.file, lines: lines}
401 pub fn span_to_snippet(&self, sp: Span) -> Option<String> {
402 let begin = self.lookup_byte_offset(sp.lo);
403 let end = self.lookup_byte_offset(sp.hi);
405 // FIXME #8256: this used to be an assert but whatever precondition
406 // it's testing isn't true for all spans in the AST, so to allow the
407 // caller to not have to fail (and it can't catch it since the CodeMap
408 // isn't sendable), return None
409 if begin.fm.start_pos != end.fm.start_pos {
412 Some(begin.fm.src.as_slice().slice(begin.pos.to_uint(),
413 end.pos.to_uint()).to_string())
417 pub fn get_filemap(&self, filename: &str) -> Rc<FileMap> {
418 for fm in self.files.borrow().iter() {
419 if filename == fm.name.as_slice() {
423 fail!("asking for {} which we don't know about", filename);
426 pub fn lookup_byte_offset(&self, bpos: BytePos) -> FileMapAndBytePos {
427 let idx = self.lookup_filemap_idx(bpos);
428 let fm = self.files.borrow().get(idx).clone();
429 let offset = bpos - fm.start_pos;
430 FileMapAndBytePos {fm: fm, pos: offset}
433 /// Converts an absolute BytePos to a CharPos relative to the filemap and above.
434 pub fn bytepos_to_file_charpos(&self, bpos: BytePos) -> CharPos {
435 let idx = self.lookup_filemap_idx(bpos);
436 let files = self.files.borrow();
437 let map = files.get(idx);
439 // The number of extra bytes due to multibyte chars in the FileMap
440 let mut total_extra_bytes = 0;
442 for mbc in map.multibyte_chars.borrow().iter() {
443 debug!("{}-byte char at {}", mbc.bytes, mbc.pos);
445 // every character is at least one byte, so we only
446 // count the actual extra bytes.
447 total_extra_bytes += mbc.bytes - 1;
448 // We should never see a byte position in the middle of a
450 assert!(bpos.to_uint() >= mbc.pos.to_uint() + mbc.bytes);
456 assert!(map.start_pos.to_uint() + total_extra_bytes <= bpos.to_uint());
457 CharPos(bpos.to_uint() - map.start_pos.to_uint() - total_extra_bytes)
460 fn lookup_filemap_idx(&self, pos: BytePos) -> uint {
461 let files = self.files.borrow();
463 let len = files.len();
467 let m = (a + b) / 2u;
468 if files.get(m).start_pos > pos {
474 // There can be filemaps with length 0. These have the same start_pos as the previous
475 // filemap, but are not the filemaps we want (because they are length 0, they cannot
476 // contain what we are looking for). So, rewind until we find a useful filemap.
478 let lines = files.get(a).lines.borrow();
484 fail!("position {} does not resolve to a source location", pos.to_uint());
489 fail!("position {} does not resolve to a source location", pos.to_uint())
495 fn lookup_line(&self, pos: BytePos) -> FileMapAndLine {
496 let idx = self.lookup_filemap_idx(pos);
498 let files = self.files.borrow();
499 let f = files.get(idx).clone();
502 let mut lines = f.lines.borrow_mut();
503 let mut b = lines.len();
505 let m = (a + b) / 2u;
506 if *lines.get(m) > pos { b = m; } else { a = m; }
509 FileMapAndLine {fm: f, line: a}
512 fn lookup_pos(&self, pos: BytePos) -> Loc {
513 let FileMapAndLine {fm: f, line: a} = self.lookup_line(pos);
514 let line = a + 1u; // Line numbers start at 1
515 let chpos = self.bytepos_to_file_charpos(pos);
516 let linebpos = *f.lines.borrow().get(a);
517 let linechpos = self.bytepos_to_file_charpos(linebpos);
518 debug!("byte pos {} is on the line at byte pos {}",
520 debug!("char pos {} is on the line at char pos {}",
522 debug!("byte is on line: {}", line);
523 assert!(chpos >= linechpos);
527 col: chpos - linechpos
538 let cm = CodeMap::new();
539 let fm = cm.new_filemap("blork.rs".to_string(),
540 "first line.\nsecond line".to_string());
541 fm.next_line(BytePos(0));
542 assert_eq!(&fm.get_line(0),&"first line.".to_string());
543 // TESTING BROKEN BEHAVIOR:
544 fm.next_line(BytePos(10));
545 assert_eq!(&fm.get_line(1), &".".to_string());
551 let cm = CodeMap::new();
552 let fm = cm.new_filemap("blork.rs".to_string(),
553 "first line.\nsecond line".to_string());
554 // TESTING *REALLY* BROKEN BEHAVIOR:
555 fm.next_line(BytePos(0));
556 fm.next_line(BytePos(10));
557 fm.next_line(BytePos(2));
560 fn init_code_map() -> CodeMap {
561 let cm = CodeMap::new();
562 let fm1 = cm.new_filemap("blork.rs".to_string(),
563 "first line.\nsecond line".to_string());
564 let fm2 = cm.new_filemap("empty.rs".to_string(),
566 let fm3 = cm.new_filemap("blork2.rs".to_string(),
567 "first line.\nsecond line".to_string());
569 fm1.next_line(BytePos(0));
570 fm1.next_line(BytePos(12));
571 fm2.next_line(BytePos(24));
572 fm3.next_line(BytePos(24));
573 fm3.next_line(BytePos(34));
580 // Test lookup_byte_offset
581 let cm = init_code_map();
583 let fmabp1 = cm.lookup_byte_offset(BytePos(22));
584 assert_eq!(fmabp1.fm.name, "blork.rs".to_string());
585 assert_eq!(fmabp1.pos, BytePos(22));
587 let fmabp2 = cm.lookup_byte_offset(BytePos(24));
588 assert_eq!(fmabp2.fm.name, "blork2.rs".to_string());
589 assert_eq!(fmabp2.pos, BytePos(0));
594 // Test bytepos_to_file_charpos
595 let cm = init_code_map();
597 let cp1 = cm.bytepos_to_file_charpos(BytePos(22));
598 assert_eq!(cp1, CharPos(22));
600 let cp2 = cm.bytepos_to_file_charpos(BytePos(24));
601 assert_eq!(cp2, CharPos(0));
606 // Test zero-length filemaps.
607 let cm = init_code_map();
609 let loc1 = cm.lookup_char_pos(BytePos(22));
610 assert_eq!(loc1.file.name, "blork.rs".to_string());
611 assert_eq!(loc1.line, 2);
612 assert_eq!(loc1.col, CharPos(10));
614 let loc2 = cm.lookup_char_pos(BytePos(24));
615 assert_eq!(loc2.file.name, "blork2.rs".to_string());
616 assert_eq!(loc2.line, 1);
617 assert_eq!(loc2.col, CharPos(0));
620 fn init_code_map_mbc() -> CodeMap {
621 let cm = CodeMap::new();
622 // € is a three byte utf8 char.
624 cm.new_filemap("blork.rs".to_string(),
625 "fir€st €€€€ line.\nsecond line".to_string());
626 let fm2 = cm.new_filemap("blork2.rs".to_string(),
627 "first line€€.\n€ second line".to_string());
629 fm1.next_line(BytePos(0));
630 fm1.next_line(BytePos(22));
631 fm2.next_line(BytePos(40));
632 fm2.next_line(BytePos(58));
634 fm1.record_multibyte_char(BytePos(3), 3);
635 fm1.record_multibyte_char(BytePos(9), 3);
636 fm1.record_multibyte_char(BytePos(12), 3);
637 fm1.record_multibyte_char(BytePos(15), 3);
638 fm1.record_multibyte_char(BytePos(18), 3);
639 fm2.record_multibyte_char(BytePos(50), 3);
640 fm2.record_multibyte_char(BytePos(53), 3);
641 fm2.record_multibyte_char(BytePos(58), 3);
648 // Test bytepos_to_file_charpos in the presence of multi-byte chars
649 let cm = init_code_map_mbc();
651 let cp1 = cm.bytepos_to_file_charpos(BytePos(3));
652 assert_eq!(cp1, CharPos(3));
654 let cp2 = cm.bytepos_to_file_charpos(BytePos(6));
655 assert_eq!(cp2, CharPos(4));
657 let cp3 = cm.bytepos_to_file_charpos(BytePos(56));
658 assert_eq!(cp3, CharPos(12));
660 let cp4 = cm.bytepos_to_file_charpos(BytePos(61));
661 assert_eq!(cp4, CharPos(15));
666 // Test span_to_lines for a span ending at the end of filemap
667 let cm = init_code_map();
668 let span = Span {lo: BytePos(12), hi: BytePos(23), expn_info: None};
669 let file_lines = cm.span_to_lines(span);
671 assert_eq!(file_lines.file.name, "blork.rs".to_string());
672 assert_eq!(file_lines.lines.len(), 1);
673 assert_eq!(*file_lines.lines.get(0), 1u);
678 // Test span_to_snippet for a span ending at the end of filemap
679 let cm = init_code_map();
680 let span = Span {lo: BytePos(12), hi: BytePos(23), expn_info: None};
681 let snippet = cm.span_to_snippet(span);
683 assert_eq!(snippet, Some("second line".to_string()));
688 // Test span_to_str for a span ending at the end of filemap
689 let cm = init_code_map();
690 let span = Span {lo: BytePos(12), hi: BytePos(23), expn_info: None};
691 let sstr = cm.span_to_string(span);
693 assert_eq!(sstr, "blork.rs:2:1: 2:12".to_string());