1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 //! Calculation and management of a Strict Version Hash for crates
13 //! # Today's ABI problem
15 //! In today's implementation of rustc, it is incredibly difficult to achieve
16 //! forward binary compatibility without resorting to C-like interfaces. Within
17 //! rust code itself, abi details such as symbol names suffer from a variety of
18 //! unrelated factors to code changing such as the "def id drift" problem. This
19 //! ends up yielding confusing error messages about metadata mismatches and
22 //! The core of this problem is when an upstream dependency changes and
23 //! downstream dependents are not recompiled. This causes compile errors because
24 //! the upstream crate's metadata has changed but the downstream crates are
25 //! still referencing the older crate's metadata.
27 //! This problem exists for many reasons, the primary of which is that rust does
28 //! not currently support forwards ABI compatibility (in place upgrades of a
31 //! # SVH and how it alleviates the problem
33 //! With all of this knowledge on hand, this module contains the implementation
34 //! of a notion of a "Strict Version Hash" for a crate. This is essentially a
35 //! hash of all contents of a crate which can somehow be exposed to downstream
38 //! This hash is currently calculated by just hashing the AST, but this is
39 //! obviously wrong (doc changes should not result in an incompatible ABI).
40 //! Implementation-wise, this is required at this moment in time.
42 //! By encoding this strict version hash into all crate's metadata, stale crates
43 //! can be detected immediately and error'd about by rustc itself.
47 //! Original issue: https://github.com/rust-lang/rust/issues/10207
51 use std::hash::sip::SipState;
52 use std::iter::range_step;
56 #[deriving(Clone, PartialEq)]
62 pub fn new(hash: &str) -> Svh {
63 assert!(hash.len() == 16);
64 Svh { hash: hash.to_string() }
67 pub fn as_str<'a>(&'a self) -> &'a str {
71 pub fn calculate(krate: &ast::Crate) -> Svh {
72 // FIXME (#14132): This is better than it used to be, but it still not
73 // ideal. We now attempt to hash only the relevant portions of the
74 // Crate AST as well as the top-level crate attributes. (However,
75 // the hashing of the crate attributes should be double-checked
76 // to ensure it is not incorporating implementation artifacts into
77 // the hash that are not otherwise visible.)
79 // FIXME: this should use SHA1, not SipHash. SipHash is not built to
81 let mut state = SipState::new();
84 let mut visit = svh_visitor::make(&mut state);
85 visit::walk_crate(&mut visit, krate, ());
88 // FIXME (#14132): This hash is still sensitive to e.g. the
89 // spans of the crate Attributes and their underlying
90 // MetaItems; we should make ContentHashable impl for those
91 // types and then use hash_content. But, since all crate
92 // attributes should appear near beginning of the file, it is
93 // not such a big deal to be sensitive to their spans for now.
95 // We hash only the MetaItems instead of the entire Attribute
96 // to avoid hashing the AttrId
97 for attr in krate.attrs.iter() {
98 attr.node.value.hash(&mut state);
101 let hash = state.result();
103 hash: range_step(0u, 64u, 4u).map(|i| hex(hash >> i)).collect()
106 fn hex(b: u64) -> char {
107 let b = (b & 0xf) as u8;
109 0 .. 9 => '0' as u8 + b,
110 _ => 'a' as u8 + b - 10,
117 impl fmt::Show for Svh {
118 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
123 // FIXME (#14132): Even this SVH computation still has implementation
124 // artifacts: namely, the order of item declaration will affect the
125 // hash computation, but for many kinds of items the order of
126 // declaration should be irrelevant to the ABI.
131 use syntax::codemap::Span;
132 use syntax::parse::token;
133 use syntax::print::pprust;
135 use syntax::visit::{Visitor, FnKind};
138 use std::hash::sip::SipState;
140 pub struct StrictVersionHashVisitor<'a> {
141 pub st: &'a mut SipState,
144 pub fn make<'a>(st: &'a mut SipState) -> StrictVersionHashVisitor<'a> {
145 StrictVersionHashVisitor { st: st }
148 // To off-load the bulk of the hash-computation on deriving(Hash),
149 // we define a set of enums corresponding to the content that our
150 // crate visitor will encounter as it traverses the ast.
152 // The important invariant is that all of the Saw*Component enums
153 // do not carry any Spans, Names, or Idents.
155 // Not carrying any Names/Idents is the important fix for problem
156 // noted on PR #13948: using the ident.name as the basis for a
157 // hash leads to unstable SVH, because ident.name is just an index
158 // into intern table (i.e. essentially a random address), not
159 // computed from the name content.
161 // With the below enums, the SVH computation is not sensitive to
162 // artifacts of how rustc was invoked nor of how the source code
163 // was laid out. (Or at least it is *less* sensitive.)
165 // This enum represents the different potential bits of code the
166 // visitor could encounter that could affect the ABI for the crate,
167 // and assigns each a distinct tag to feed into the hash computation.
169 enum SawAbiComponent<'a> {
171 // FIXME (#14132): should we include (some function of)
172 // ident.ctxt as well?
173 SawIdent(token::InternedString),
174 SawStructDef(token::InternedString),
176 SawLifetimeRef(token::InternedString),
177 SawLifetimeDecl(token::InternedString),
198 SawExpr(SawExprComponent<'a>),
199 SawStmt(SawStmtComponent),
202 /// SawExprComponent carries all of the information that we want
203 /// to include in the hash that *won't* be covered by the
204 /// subsequent recursive traversal of the expression's
205 /// substructure by the visitor.
207 /// We know every Expr_ variant is covered by a variant because
208 /// `fn saw_expr` maps each to some case below. Ensuring that
209 /// each variant carries an appropriate payload has to be verified
212 /// (However, getting that *exactly* right is not so important
213 /// because the SVH is just a developer convenience; there is no
214 /// guarantee of collision-freedom, hash collisions are just
215 /// (hopefully) unlikely.)
217 pub enum SawExprComponent<'a> {
219 SawExprLoop(Option<token::InternedString>),
220 SawExprField(token::InternedString),
221 SawExprBreak(Option<token::InternedString>),
222 SawExprAgain(Option<token::InternedString>),
230 SawExprBinary(ast::BinOp),
231 SawExprUnary(ast::UnOp),
232 SawExprLit(ast::Lit_),
241 SawExprAssignOp(ast::BinOp),
244 SawExprAddrOf(ast::Mutability),
246 SawExprInlineAsm(&'a ast::InlineAsm),
252 fn saw_expr<'a>(node: &'a Expr_) -> SawExprComponent<'a> {
254 ExprVstore(..) => SawExprVstore,
255 ExprBox(..) => SawExprBox,
256 ExprVec(..) => SawExprVec,
257 ExprCall(..) => SawExprCall,
258 ExprMethodCall(..) => SawExprMethodCall,
259 ExprTup(..) => SawExprTup,
260 ExprBinary(op, _, _) => SawExprBinary(op),
261 ExprUnary(op, _) => SawExprUnary(op),
262 ExprLit(lit) => SawExprLit(lit.node.clone()),
263 ExprCast(..) => SawExprCast,
264 ExprIf(..) => SawExprIf,
265 ExprWhile(..) => SawExprWhile,
266 ExprLoop(_, id) => SawExprLoop(id.map(content)),
267 ExprMatch(..) => SawExprMatch,
268 ExprFnBlock(..) => SawExprFnBlock,
269 ExprProc(..) => SawExprProc,
270 ExprBlock(..) => SawExprBlock,
271 ExprAssign(..) => SawExprAssign,
272 ExprAssignOp(op, _, _) => SawExprAssignOp(op),
273 ExprField(_, id, _) => SawExprField(content(id.node)),
274 ExprIndex(..) => SawExprIndex,
275 ExprPath(..) => SawExprPath,
276 ExprAddrOf(m, _) => SawExprAddrOf(m),
277 ExprBreak(id) => SawExprBreak(id.map(content)),
278 ExprAgain(id) => SawExprAgain(id.map(content)),
279 ExprRet(..) => SawExprRet,
280 ExprInlineAsm(ref asm) => SawExprInlineAsm(asm),
281 ExprStruct(..) => SawExprStruct,
282 ExprRepeat(..) => SawExprRepeat,
283 ExprParen(..) => SawExprParen,
285 // just syntactic artifacts, expanded away by time of SVH.
286 ExprForLoop(..) => unreachable!(),
287 ExprMac(..) => unreachable!(),
291 /// SawStmtComponent is analogous to SawExprComponent, but for statements.
293 pub enum SawStmtComponent {
299 fn saw_stmt(node: &Stmt_) -> SawStmtComponent {
301 StmtDecl(..) => SawStmtDecl,
302 StmtExpr(..) => SawStmtExpr,
303 StmtSemi(..) => SawStmtSemi,
304 StmtMac(..) => unreachable!(),
308 // Ad-hoc overloading between Ident and Name to their intern table lookups.
309 trait InternKey { fn get_content(self) -> token::InternedString; }
310 impl InternKey for Ident {
311 fn get_content(self) -> token::InternedString { token::get_ident(self) }
313 impl InternKey for Name {
314 fn get_content(self) -> token::InternedString { token::get_name(self) }
316 fn content<K:InternKey>(k: K) -> token::InternedString { k.get_content() }
318 // local short-hand eases writing signatures of syntax::visit mod.
321 impl<'a> Visitor<E> for StrictVersionHashVisitor<'a> {
323 fn visit_mac(&mut self, macro: &Mac, e: E) {
324 // macro invocations, namely macro_rules definitions,
325 // *can* appear as items, even in the expanded crate AST.
327 if macro_name(macro).get() == "macro_rules" {
328 // Pretty-printing definition to a string strips out
329 // surface artifacts (currently), such as the span
330 // information, yielding a content-based hash.
332 // FIXME (#14132): building temporary string is
333 // expensive; a direct content-based hash on token
334 // trees might be faster. Implementing this is far
335 // easier in short term.
336 let macro_defn_as_string =
337 pprust::to_str(|pp_state| pp_state.print_mac(macro));
338 macro_defn_as_string.hash(self.st);
340 // It is not possible to observe any kind of macro
341 // invocation at this stage except `macro_rules!`.
342 fail!("reached macro somehow: {}",
343 pprust::to_str(|pp_state| pp_state.print_mac(macro)));
346 visit::walk_mac(self, macro, e);
348 fn macro_name(macro: &Mac) -> token::InternedString {
350 &MacInvocTT(ref path, ref _tts, ref _stx_ctxt) => {
351 let s = path.segments.as_slice();
352 assert_eq!(s.len(), 1);
353 content(s[0].identifier)
359 fn visit_struct_def(&mut self, s: &StructDef, ident: Ident,
360 g: &Generics, _: NodeId, e: E) {
361 SawStructDef(content(ident)).hash(self.st);
362 visit::walk_generics(self, g, e.clone());
363 visit::walk_struct_def(self, s, e)
366 fn visit_variant(&mut self, v: &Variant, g: &Generics, e: E) {
367 SawVariant.hash(self.st);
368 // walk_variant does not call walk_generics, so do it here.
369 visit::walk_generics(self, g, e.clone());
370 visit::walk_variant(self, v, g, e)
373 fn visit_opt_lifetime_ref(&mut self, _: Span, l: &Option<Lifetime>, env: E) {
374 SawOptLifetimeRef.hash(self.st);
375 // (This is a strange method in the visitor trait, in that
376 // it does not expose a walk function to do the subroutine
379 Some(ref l) => self.visit_lifetime_ref(l, env),
384 // All of the remaining methods just record (in the hash
385 // SipState) that the visitor saw that particular variant
386 // (with its payload), and continue walking as the default
389 // Some of the implementations have some notes as to how one
390 // might try to make their SVH computation less discerning
391 // (e.g. by incorporating reachability analysis). But
392 // currently all of their implementations are uniform and
395 // (If you edit a method such that it deviates from the
396 // pattern, please move that method up above this comment.)
398 fn visit_ident(&mut self, _: Span, ident: Ident, _: E) {
399 SawIdent(content(ident)).hash(self.st);
402 fn visit_lifetime_ref(&mut self, l: &Lifetime, _: E) {
403 SawLifetimeRef(content(l.name)).hash(self.st);
406 fn visit_lifetime_decl(&mut self, l: &Lifetime, _: E) {
407 SawLifetimeDecl(content(l.name)).hash(self.st);
410 // We do recursively walk the bodies of functions/methods
411 // (rather than omitting their bodies from the hash) since
412 // monomorphization and cross-crate inlining generally implies
413 // that a change to a crate body will require downstream
414 // crates to be recompiled.
415 fn visit_expr(&mut self, ex: &Expr, e: E) {
416 SawExpr(saw_expr(&ex.node)).hash(self.st); visit::walk_expr(self, ex, e)
419 fn visit_stmt(&mut self, s: &Stmt, e: E) {
420 SawStmt(saw_stmt(&s.node)).hash(self.st); visit::walk_stmt(self, s, e)
423 fn visit_view_item(&mut self, i: &ViewItem, e: E) {
424 // Two kinds of view items can affect the ABI for a crate:
425 // exported `pub use` view items (since that may expose
426 // items that downstream crates can call), and `use
427 // foo::Trait`, since changing that may affect method
430 // The simplest approach to handling both of the above is
431 // just to adopt the same simple-minded (fine-grained)
432 // hash that I am deploying elsewhere here.
433 SawViewItem.hash(self.st); visit::walk_view_item(self, i, e)
436 fn visit_foreign_item(&mut self, i: &ForeignItem, e: E) {
437 // FIXME (#14132) ideally we would incorporate privacy (or
438 // perhaps reachability) somewhere here, so foreign items
439 // that do not leak into downstream crates would not be
441 SawForeignItem.hash(self.st); visit::walk_foreign_item(self, i, e)
444 fn visit_item(&mut self, i: &Item, e: E) {
445 // FIXME (#14132) ideally would incorporate reachability
446 // analysis somewhere here, so items that never leak into
447 // downstream crates (e.g. via monomorphisation or
448 // inlining) would not be part of the ABI.
449 SawItem.hash(self.st); visit::walk_item(self, i, e)
452 fn visit_mod(&mut self, m: &Mod, _s: Span, _n: NodeId, e: E) {
453 SawMod.hash(self.st); visit::walk_mod(self, m, e)
456 fn visit_decl(&mut self, d: &Decl, e: E) {
457 SawDecl.hash(self.st); visit::walk_decl(self, d, e)
460 fn visit_ty(&mut self, t: &Ty, e: E) {
461 SawTy.hash(self.st); visit::walk_ty(self, t, e)
464 fn visit_generics(&mut self, g: &Generics, e: E) {
465 SawGenerics.hash(self.st); visit::walk_generics(self, g, e)
468 fn visit_fn(&mut self, fk: &FnKind, fd: &FnDecl, b: &Block, s: Span, _: NodeId, e: E) {
469 SawFn.hash(self.st); visit::walk_fn(self, fk, fd, b, s, e)
472 fn visit_ty_method(&mut self, t: &TypeMethod, e: E) {
473 SawTyMethod.hash(self.st); visit::walk_ty_method(self, t, e)
476 fn visit_trait_method(&mut self, t: &TraitMethod, e: E) {
477 SawTraitMethod.hash(self.st); visit::walk_trait_method(self, t, e)
480 fn visit_struct_field(&mut self, s: &StructField, e: E) {
481 SawStructField.hash(self.st); visit::walk_struct_field(self, s, e)
484 fn visit_explicit_self(&mut self, es: &ExplicitSelf, e: E) {
485 SawExplicitSelf.hash(self.st); visit::walk_explicit_self(self, es, e)
488 fn visit_path(&mut self, path: &Path, _: ast::NodeId, e: E) {
489 SawPath.hash(self.st); visit::walk_path(self, path, e)
492 fn visit_block(&mut self, b: &Block, e: E) {
493 SawBlock.hash(self.st); visit::walk_block(self, b, e)
496 fn visit_pat(&mut self, p: &Pat, e: E) {
497 SawPat.hash(self.st); visit::walk_pat(self, p, e)
500 fn visit_local(&mut self, l: &Local, e: E) {
501 SawLocal.hash(self.st); visit::walk_local(self, l, e)
504 fn visit_arm(&mut self, a: &Arm, e: E) {
505 SawArm.hash(self.st); visit::walk_arm(self, a, e)