src/librustc_back/svh.rs

   1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
   2 // file at the top-level directory of this distribution and at
   3 // http://rust-lang.org/COPYRIGHT.
   4 //
   5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
   6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
   7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
   8 // option. This file may not be copied, modified, or distributed
   9 // except according to those terms.
  10
  11 //! Calculation and management of a Strict Version Hash for crates
  12 //!
  13 //! # Today's ABI problem
  14 //!
  15 //! In today's implementation of rustc, it is incredibly difficult to achieve
  16 //! forward binary compatibility without resorting to C-like interfaces. Within
  17 //! rust code itself, abi details such as symbol names suffer from a variety of
  18 //! unrelated factors to code changing such as the "def id drift" problem. This
  19 //! ends up yielding confusing error messages about metadata mismatches and
  20 //! such.
  21 //!
  22 //! The core of this problem is when an upstream dependency changes and
  23 //! downstream dependents are not recompiled. This causes compile errors because
  24 //! the upstream crate's metadata has changed but the downstream crates are
  25 //! still referencing the older crate's metadata.
  26 //!
  27 //! This problem exists for many reasons, the primary of which is that rust does
  28 //! not currently support forwards ABI compatibility (in place upgrades of a
  29 //! crate).
  30 //!
  31 //! # SVH and how it alleviates the problem
  32 //!
  33 //! With all of this knowledge on hand, this module contains the implementation
  34 //! of a notion of a "Strict Version Hash" for a crate. This is essentially a
  35 //! hash of all contents of a crate which can somehow be exposed to downstream
  36 //! crates.
  37 //!
  38 //! This hash is currently calculated by just hashing the AST, but this is
  39 //! obviously wrong (doc changes should not result in an incompatible ABI).
  40 //! Implementation-wise, this is required at this moment in time.
  41 //!
  42 //! By encoding this strict version hash into all crate's metadata, stale crates
  43 //! can be detected immediately and error'd about by rustc itself.
  44 //!
  45 //! # Relevant links
  46 //!
  47 //! Original issue: https://github.com/rust-lang/rust/issues/10207
  48
  49 use std::fmt;
  50 use std::hash::Hash;
  51 use std::hash::sip::SipState;
  52 use std::iter::range_step;
  53 use syntax::ast;
  54 use syntax::visit;
  55
  56 #[derive(Clone, PartialEq)]
  57 pub struct Svh {
  58     hash: String,
  59 }
  60
  61 impl Svh {
  62     pub fn new(hash: &str) -> Svh {
  63         assert!(hash.len() == 16);
  64         Svh { hash: hash.to_string() }
  65     }
  66
  67     pub fn as_str<'a>(&'a self) -> &'a str {
  68         self.hash[]
  69     }
  70
  71     pub fn calculate(metadata: &Vec<String>, krate: &ast::Crate) -> Svh {
  72         // FIXME (#14132): This is better than it used to be, but it still not
  73         // ideal. We now attempt to hash only the relevant portions of the
  74         // Crate AST as well as the top-level crate attributes. (However,
  75         // the hashing of the crate attributes should be double-checked
  76         // to ensure it is not incorporating implementation artifacts into
  77         // the hash that are not otherwise visible.)
  78
  79         // FIXME: this should use SHA1, not SipHash. SipHash is not built to
  80         //        avoid collisions.
  81         let mut state = SipState::new();
  82
  83         for data in metadata.iter() {
  84             data.hash(&mut state);
  85         }
  86
  87         {
  88             let mut visit = svh_visitor::make(&mut state);
  89             visit::walk_crate(&mut visit, krate);
  90         }
  91
  92         // FIXME (#14132): This hash is still sensitive to e.g. the
  93         // spans of the crate Attributes and their underlying
  94         // MetaItems; we should make ContentHashable impl for those
  95         // types and then use hash_content.  But, since all crate
  96         // attributes should appear near beginning of the file, it is
  97         // not such a big deal to be sensitive to their spans for now.
  98         //
  99         // We hash only the MetaItems instead of the entire Attribute
 100         // to avoid hashing the AttrId
 101         for attr in krate.attrs.iter() {
 102             attr.node.value.hash(&mut state);
 103         }
 104
 105         let hash = state.result();
 106         return Svh {
 107             hash: range_step(0u, 64u, 4u).map(|i| hex(hash >> i)).collect()
 108         };
 109
 110         fn hex(b: u64) -> char {
 111             let b = (b & 0xf) as u8;
 112             let b = match b {
 113                 0 ... 9 => '0' as u8 + b,
 114                 _ => 'a' as u8 + b - 10,
 115             };
 116             b as char
 117         }
 118     }
 119 }
 120
 121 impl fmt::Show for Svh {
 122     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 123         f.pad(self.as_str())
 124     }
 125 }
 126
 127 // FIXME (#14132): Even this SVH computation still has implementation
 128 // artifacts: namely, the order of item declaration will affect the
 129 // hash computation, but for many kinds of items the order of
 130 // declaration should be irrelevant to the ABI.
 131
 132 mod svh_visitor {
 133     pub use self::SawExprComponent::*;
 134     pub use self::SawStmtComponent::*;
 135     use self::SawAbiComponent::*;
 136     use syntax::ast;
 137     use syntax::ast::*;
 138     use syntax::codemap::Span;
 139     use syntax::parse::token;
 140     use syntax::print::pprust;
 141     use syntax::visit;
 142     use syntax::visit::{Visitor, FnKind};
 143
 144     use std::hash::Hash;
 145     use std::hash::sip::SipState;
 146
 147     pub struct StrictVersionHashVisitor<'a> {
 148         pub st: &'a mut SipState,
 149     }
 150
 151     pub fn make<'a>(st: &'a mut SipState) -> StrictVersionHashVisitor<'a> {
 152         StrictVersionHashVisitor { st: st }
 153     }
 154
 155     // To off-load the bulk of the hash-computation on deriving(Hash),
 156     // we define a set of enums corresponding to the content that our
 157     // crate visitor will encounter as it traverses the ast.
 158     //
 159     // The important invariant is that all of the Saw*Component enums
 160     // do not carry any Spans, Names, or Idents.
 161     //
 162     // Not carrying any Names/Idents is the important fix for problem
 163     // noted on PR #13948: using the ident.name as the basis for a
 164     // hash leads to unstable SVH, because ident.name is just an index
 165     // into intern table (i.e. essentially a random address), not
 166     // computed from the name content.
 167     //
 168     // With the below enums, the SVH computation is not sensitive to
 169     // artifacts of how rustc was invoked nor of how the source code
 170     // was laid out.  (Or at least it is *less* sensitive.)
 171
 172     // This enum represents the different potential bits of code the
 173     // visitor could encounter that could affect the ABI for the crate,
 174     // and assigns each a distinct tag to feed into the hash computation.
 175     #[derive(Hash)]
 176     enum SawAbiComponent<'a> {
 177
 178         // FIXME (#14132): should we include (some function of)
 179         // ident.ctxt as well?
 180         SawIdent(token::InternedString),
 181         SawStructDef(token::InternedString),
 182
 183         SawLifetimeRef(token::InternedString),
 184         SawLifetimeDef(token::InternedString),
 185
 186         SawMod,
 187         SawViewItem,
 188         SawForeignItem,
 189         SawItem,
 190         SawDecl,
 191         SawTy,
 192         SawGenerics,
 193         SawFn,
 194         SawTyMethod,
 195         SawTraitMethod,
 196         SawStructField,
 197         SawVariant,
 198         SawExplicitSelf,
 199         SawPath,
 200         SawOptLifetimeRef,
 201         SawBlock,
 202         SawPat,
 203         SawLocal,
 204         SawArm,
 205         SawExpr(SawExprComponent<'a>),
 206         SawStmt(SawStmtComponent),
 207     }
 208
 209     /// SawExprComponent carries all of the information that we want
 210     /// to include in the hash that *won't* be covered by the
 211     /// subsequent recursive traversal of the expression's
 212     /// substructure by the visitor.
 213     ///
 214     /// We know every Expr_ variant is covered by a variant because
 215     /// `fn saw_expr` maps each to some case below.  Ensuring that
 216     /// each variant carries an appropriate payload has to be verified
 217     /// by hand.
 218     ///
 219     /// (However, getting that *exactly* right is not so important
 220     /// because the SVH is just a developer convenience; there is no
 221     /// guarantee of collision-freedom, hash collisions are just
 222     /// (hopefully) unlikely.)
 223     #[derive(Hash)]
 224     pub enum SawExprComponent<'a> {
 225
 226         SawExprLoop(Option<token::InternedString>),
 227         SawExprField(token::InternedString),
 228         SawExprTupField(uint),
 229         SawExprBreak(Option<token::InternedString>),
 230         SawExprAgain(Option<token::InternedString>),
 231
 232         SawExprBox,
 233         SawExprVec,
 234         SawExprCall,
 235         SawExprMethodCall,
 236         SawExprTup,
 237         SawExprBinary(ast::BinOp),
 238         SawExprUnary(ast::UnOp),
 239         SawExprLit(ast::Lit_),
 240         SawExprCast,
 241         SawExprIf,
 242         SawExprWhile,
 243         SawExprMatch,
 244         SawExprClosure,
 245         SawExprBlock,
 246         SawExprAssign,
 247         SawExprAssignOp(ast::BinOp),
 248         SawExprIndex,
 249         SawExprRange,
 250         SawExprPath,
 251         SawExprAddrOf(ast::Mutability),
 252         SawExprRet,
 253         SawExprInlineAsm(&'a ast::InlineAsm),
 254         SawExprStruct,
 255         SawExprRepeat,
 256         SawExprParen,
 257         SawExprForLoop,
 258     }
 259
 260     fn saw_expr<'a>(node: &'a Expr_) -> SawExprComponent<'a> {
 261         match *node {
 262             ExprBox(..)              => SawExprBox,
 263             ExprVec(..)              => SawExprVec,
 264             ExprCall(..)             => SawExprCall,
 265             ExprMethodCall(..)       => SawExprMethodCall,
 266             ExprTup(..)              => SawExprTup,
 267             ExprBinary(op, _, _)     => SawExprBinary(op),
 268             ExprUnary(op, _)         => SawExprUnary(op),
 269             ExprLit(ref lit)         => SawExprLit(lit.node.clone()),
 270             ExprCast(..)             => SawExprCast,
 271             ExprIf(..)               => SawExprIf,
 272             ExprWhile(..)            => SawExprWhile,
 273             ExprLoop(_, id)          => SawExprLoop(id.map(content)),
 274             ExprMatch(..)            => SawExprMatch,
 275             ExprClosure(..)          => SawExprClosure,
 276             ExprBlock(..)            => SawExprBlock,
 277             ExprAssign(..)           => SawExprAssign,
 278             ExprAssignOp(op, _, _)   => SawExprAssignOp(op),
 279             ExprField(_, id)         => SawExprField(content(id.node)),
 280             ExprTupField(_, id)      => SawExprTupField(id.node),
 281             ExprIndex(..)            => SawExprIndex,
 282             ExprRange(..)            => SawExprRange,
 283             ExprPath(..)             => SawExprPath,
 284             ExprAddrOf(m, _)         => SawExprAddrOf(m),
 285             ExprBreak(id)            => SawExprBreak(id.map(content)),
 286             ExprAgain(id)            => SawExprAgain(id.map(content)),
 287             ExprRet(..)              => SawExprRet,
 288             ExprInlineAsm(ref asm)   => SawExprInlineAsm(asm),
 289             ExprStruct(..)           => SawExprStruct,
 290             ExprRepeat(..)           => SawExprRepeat,
 291             ExprParen(..)            => SawExprParen,
 292             ExprForLoop(..)          => SawExprForLoop,
 293
 294             // just syntactic artifacts, expanded away by time of SVH.
 295             ExprIfLet(..)            => unreachable!(),
 296             ExprWhileLet(..)         => unreachable!(),
 297             ExprMac(..)              => unreachable!(),
 298         }
 299     }
 300
 301     /// SawStmtComponent is analogous to SawExprComponent, but for statements.
 302     #[derive(Hash)]
 303     pub enum SawStmtComponent {
 304         SawStmtDecl,
 305         SawStmtExpr,
 306         SawStmtSemi,
 307     }
 308
 309     fn saw_stmt(node: &Stmt_) -> SawStmtComponent {
 310         match *node {
 311             StmtDecl(..) => SawStmtDecl,
 312             StmtExpr(..) => SawStmtExpr,
 313             StmtSemi(..) => SawStmtSemi,
 314             StmtMac(..)  => unreachable!(),
 315         }
 316     }
 317
 318     // Ad-hoc overloading between Ident and Name to their intern table lookups.
 319     trait InternKey { fn get_content(self) -> token::InternedString; }
 320     impl InternKey for Ident {
 321         fn get_content(self) -> token::InternedString { token::get_ident(self) }
 322     }
 323     impl InternKey for Name {
 324         fn get_content(self) -> token::InternedString { token::get_name(self) }
 325     }
 326     fn content<K:InternKey>(k: K) -> token::InternedString { k.get_content() }
 327
 328     impl<'a, 'v> Visitor<'v> for StrictVersionHashVisitor<'a> {
 329
 330         fn visit_mac(&mut self, macro: &Mac) {
 331             // macro invocations, namely macro_rules definitions,
 332             // *can* appear as items, even in the expanded crate AST.
 333
 334             if macro_name(macro).get() == "macro_rules" {
 335                 // Pretty-printing definition to a string strips out
 336                 // surface artifacts (currently), such as the span
 337                 // information, yielding a content-based hash.
 338
 339                 // FIXME (#14132): building temporary string is
 340                 // expensive; a direct content-based hash on token
 341                 // trees might be faster. Implementing this is far
 342                 // easier in short term.
 343                 let macro_defn_as_string = pprust::to_string(|pp_state| {
 344                     pp_state.print_mac(macro, token::Paren)
 345                 });
 346                 macro_defn_as_string.hash(self.st);
 347             } else {
 348                 // It is not possible to observe any kind of macro
 349                 // invocation at this stage except `macro_rules!`.
 350                 panic!("reached macro somehow: {}",
 351                       pprust::to_string(|pp_state| {
 352                           pp_state.print_mac(macro, token::Paren)
 353                       }));
 354             }
 355
 356             visit::walk_mac(self, macro);
 357
 358             fn macro_name(macro: &Mac) -> token::InternedString {
 359                 match &macro.node {
 360                     &MacInvocTT(ref path, ref _tts, ref _stx_ctxt) => {
 361                         let s = path.segments[];
 362                         assert_eq!(s.len(), 1);
 363                         content(s[0].identifier)
 364                     }
 365                 }
 366             }
 367         }
 368
 369         fn visit_struct_def(&mut self, s: &StructDef, ident: Ident,
 370                             g: &Generics, _: NodeId) {
 371             SawStructDef(content(ident)).hash(self.st);
 372             visit::walk_generics(self, g);
 373             visit::walk_struct_def(self, s)
 374         }
 375
 376         fn visit_variant(&mut self, v: &Variant, g: &Generics) {
 377             SawVariant.hash(self.st);
 378             // walk_variant does not call walk_generics, so do it here.
 379             visit::walk_generics(self, g);
 380             visit::walk_variant(self, v, g)
 381         }
 382
 383         fn visit_opt_lifetime_ref(&mut self, _: Span, l: &Option<Lifetime>) {
 384             SawOptLifetimeRef.hash(self.st);
 385             // (This is a strange method in the visitor trait, in that
 386             // it does not expose a walk function to do the subroutine
 387             // calls.)
 388             match *l {
 389                 Some(ref l) => self.visit_lifetime_ref(l),
 390                 None => ()
 391             }
 392         }
 393
 394         // All of the remaining methods just record (in the hash
 395         // SipState) that the visitor saw that particular variant
 396         // (with its payload), and continue walking as the default
 397         // visitor would.
 398         //
 399         // Some of the implementations have some notes as to how one
 400         // might try to make their SVH computation less discerning
 401         // (e.g. by incorporating reachability analysis).  But
 402         // currently all of their implementations are uniform and
 403         // uninteresting.
 404         //
 405         // (If you edit a method such that it deviates from the
 406         // pattern, please move that method up above this comment.)
 407
 408         fn visit_ident(&mut self, _: Span, ident: Ident) {
 409             SawIdent(content(ident)).hash(self.st);
 410         }
 411
 412         fn visit_lifetime_ref(&mut self, l: &Lifetime) {
 413             SawLifetimeRef(content(l.name)).hash(self.st);
 414         }
 415
 416         fn visit_lifetime_def(&mut self, l: &LifetimeDef) {
 417             SawLifetimeDef(content(l.lifetime.name)).hash(self.st);
 418         }
 419
 420         // We do recursively walk the bodies of functions/methods
 421         // (rather than omitting their bodies from the hash) since
 422         // monomorphization and cross-crate inlining generally implies
 423         // that a change to a crate body will require downstream
 424         // crates to be recompiled.
 425         fn visit_expr(&mut self, ex: &Expr) {
 426             SawExpr(saw_expr(&ex.node)).hash(self.st); visit::walk_expr(self, ex)
 427         }
 428
 429         fn visit_stmt(&mut self, s: &Stmt) {
 430             SawStmt(saw_stmt(&s.node)).hash(self.st); visit::walk_stmt(self, s)
 431         }
 432
 433         fn visit_view_item(&mut self, i: &ViewItem) {
 434             // Two kinds of view items can affect the ABI for a crate:
 435             // exported `pub use` view items (since that may expose
 436             // items that downstream crates can call), and `use
 437             // foo::Trait`, since changing that may affect method
 438             // resolution.
 439             //
 440             // The simplest approach to handling both of the above is
 441             // just to adopt the same simple-minded (fine-grained)
 442             // hash that I am deploying elsewhere here.
 443             SawViewItem.hash(self.st); visit::walk_view_item(self, i)
 444         }
 445
 446         fn visit_foreign_item(&mut self, i: &ForeignItem) {
 447             // FIXME (#14132) ideally we would incorporate privacy (or
 448             // perhaps reachability) somewhere here, so foreign items
 449             // that do not leak into downstream crates would not be
 450             // part of the ABI.
 451             SawForeignItem.hash(self.st); visit::walk_foreign_item(self, i)
 452         }
 453
 454         fn visit_item(&mut self, i: &Item) {
 455             // FIXME (#14132) ideally would incorporate reachability
 456             // analysis somewhere here, so items that never leak into
 457             // downstream crates (e.g. via monomorphisation or
 458             // inlining) would not be part of the ABI.
 459             SawItem.hash(self.st); visit::walk_item(self, i)
 460         }
 461
 462         fn visit_mod(&mut self, m: &Mod, _s: Span, _n: NodeId) {
 463             SawMod.hash(self.st); visit::walk_mod(self, m)
 464         }
 465
 466         fn visit_decl(&mut self, d: &Decl) {
 467             SawDecl.hash(self.st); visit::walk_decl(self, d)
 468         }
 469
 470         fn visit_ty(&mut self, t: &Ty) {
 471             SawTy.hash(self.st); visit::walk_ty(self, t)
 472         }
 473
 474         fn visit_generics(&mut self, g: &Generics) {
 475             SawGenerics.hash(self.st); visit::walk_generics(self, g)
 476         }
 477
 478         fn visit_fn(&mut self, fk: FnKind<'v>, fd: &'v FnDecl,
 479                     b: &'v Block, s: Span, _: NodeId) {
 480             SawFn.hash(self.st); visit::walk_fn(self, fk, fd, b, s)
 481         }
 482
 483         fn visit_ty_method(&mut self, t: &TypeMethod) {
 484             SawTyMethod.hash(self.st); visit::walk_ty_method(self, t)
 485         }
 486
 487         fn visit_trait_item(&mut self, t: &TraitItem) {
 488             SawTraitMethod.hash(self.st); visit::walk_trait_item(self, t)
 489         }
 490
 491         fn visit_struct_field(&mut self, s: &StructField) {
 492             SawStructField.hash(self.st); visit::walk_struct_field(self, s)
 493         }
 494
 495         fn visit_explicit_self(&mut self, es: &ExplicitSelf) {
 496             SawExplicitSelf.hash(self.st); visit::walk_explicit_self(self, es)
 497         }
 498
 499         fn visit_path(&mut self, path: &Path, _: ast::NodeId) {
 500             SawPath.hash(self.st); visit::walk_path(self, path)
 501         }
 502
 503         fn visit_block(&mut self, b: &Block) {
 504             SawBlock.hash(self.st); visit::walk_block(self, b)
 505         }
 506
 507         fn visit_pat(&mut self, p: &Pat) {
 508             SawPat.hash(self.st); visit::walk_pat(self, p)
 509         }
 510
 511         fn visit_local(&mut self, l: &Local) {
 512             SawLocal.hash(self.st); visit::walk_local(self, l)
 513         }
 514
 515         fn visit_arm(&mut self, a: &Arm) {
 516             SawArm.hash(self.st); visit::walk_arm(self, a)
 517         }
 518     }
 519 }