1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 //! Calculation and management of a Strict Version Hash for crates
13 //! # Today's ABI problem
15 //! In today's implementation of rustc, it is incredibly difficult to achieve
16 //! forward binary compatibility without resorting to C-like interfaces. Within
17 //! rust code itself, abi details such as symbol names suffer from a variety of
18 //! unrelated factors to code changing such as the "def id drift" problem. This
19 //! ends up yielding confusing error messages about metadata mismatches and
22 //! The core of this problem is when an upstream dependency changes and
23 //! downstream dependents are not recompiled. This causes compile errors because
24 //! the upstream crate's metadata has changed but the downstream crates are
25 //! still referencing the older crate's metadata.
27 //! This problem exists for many reasons, the primary of which is that rust does
28 //! not currently support forwards ABI compatibility (in place upgrades of a
31 //! # SVH and how it alleviates the problem
33 //! With all of this knowledge on hand, this module contains the implementation
34 //! of a notion of a "Strict Version Hash" for a crate. This is essentially a
35 //! hash of all contents of a crate which can somehow be exposed to downstream
38 //! This hash is currently calculated by just hashing the AST, but this is
39 //! obviously wrong (doc changes should not result in an incompatible ABI).
40 //! Implementation-wise, this is required at this moment in time.
42 //! By encoding this strict version hash into all crate's metadata, stale crates
43 //! can be detected immediately and error'd about by rustc itself.
47 //! Original issue: https://github.com/rust-lang/rust/issues/10207
50 use std::hash::{Hash, SipHasher, Hasher};
52 use rustc_front::visit;
54 #[derive(Clone, PartialEq, Debug)]
60 pub fn new(hash: &str) -> Svh {
61 assert!(hash.len() == 16);
62 Svh { hash: hash.to_string() }
65 pub fn as_str<'a>(&'a self) -> &'a str {
69 pub fn calculate(metadata: &Vec<String>, krate: &hir::Crate) -> Svh {
70 // FIXME (#14132): This is better than it used to be, but it still not
71 // ideal. We now attempt to hash only the relevant portions of the
72 // Crate AST as well as the top-level crate attributes. (However,
73 // the hashing of the crate attributes should be double-checked
74 // to ensure it is not incorporating implementation artifacts into
75 // the hash that are not otherwise visible.)
77 // FIXME: this should use SHA1, not SipHash. SipHash is not built to
79 let mut state = SipHasher::new();
81 for data in metadata {
82 data.hash(&mut state);
86 let mut visit = svh_visitor::make(&mut state);
87 visit::walk_crate(&mut visit, krate);
90 // FIXME (#14132): This hash is still sensitive to e.g. the
91 // spans of the crate Attributes and their underlying
92 // MetaItems; we should make ContentHashable impl for those
93 // types and then use hash_content. But, since all crate
94 // attributes should appear near beginning of the file, it is
95 // not such a big deal to be sensitive to their spans for now.
97 // We hash only the MetaItems instead of the entire Attribute
98 // to avoid hashing the AttrId
99 for attr in &krate.attrs {
100 attr.node.value.hash(&mut state);
103 let hash = state.finish();
105 hash: (0..64).step_by(4).map(|i| hex(hash >> i)).collect()
108 fn hex(b: u64) -> char {
109 let b = (b & 0xf) as u8;
111 0 ... 9 => '0' as u8 + b,
112 _ => 'a' as u8 + b - 10,
119 impl fmt::Display for Svh {
120 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
125 // FIXME (#14132): Even this SVH computation still has implementation
126 // artifacts: namely, the order of item declaration will affect the
127 // hash computation, but for many kinds of items the order of
128 // declaration should be irrelevant to the ABI.
131 pub use self::SawExprComponent::*;
132 pub use self::SawStmtComponent::*;
133 use self::SawAbiComponent::*;
134 use syntax::ast::{self, Name, NodeId};
135 use syntax::codemap::Span;
136 use syntax::parse::token;
137 use rustc_front::visit;
138 use rustc_front::visit::{Visitor, FnKind};
139 use rustc_front::hir::*;
140 use rustc_front::hir;
142 use std::hash::{Hash, SipHasher};
144 pub struct StrictVersionHashVisitor<'a> {
145 pub st: &'a mut SipHasher,
148 pub fn make<'a>(st: &'a mut SipHasher) -> StrictVersionHashVisitor<'a> {
149 StrictVersionHashVisitor { st: st }
152 // To off-load the bulk of the hash-computation on #[derive(Hash)],
153 // we define a set of enums corresponding to the content that our
154 // crate visitor will encounter as it traverses the ast.
156 // The important invariant is that all of the Saw*Component enums
157 // do not carry any Spans, Names, or Idents.
159 // Not carrying any Names/Idents is the important fix for problem
160 // noted on PR #13948: using the ident.name as the basis for a
161 // hash leads to unstable SVH, because ident.name is just an index
162 // into intern table (i.e. essentially a random address), not
163 // computed from the name content.
165 // With the below enums, the SVH computation is not sensitive to
166 // artifacts of how rustc was invoked nor of how the source code
167 // was laid out. (Or at least it is *less* sensitive.)
169 // This enum represents the different potential bits of code the
170 // visitor could encounter that could affect the ABI for the crate,
171 // and assigns each a distinct tag to feed into the hash computation.
173 enum SawAbiComponent<'a> {
175 // FIXME (#14132): should we include (some function of)
176 // ident.ctxt as well?
177 SawIdent(token::InternedString),
178 SawStructDef(token::InternedString),
180 SawLifetime(token::InternedString),
181 SawLifetimeDef(token::InternedString),
200 SawExpr(SawExprComponent<'a>),
201 SawStmt(SawStmtComponent),
204 /// SawExprComponent carries all of the information that we want
205 /// to include in the hash that *won't* be covered by the
206 /// subsequent recursive traversal of the expression's
207 /// substructure by the visitor.
209 /// We know every Expr_ variant is covered by a variant because
210 /// `fn saw_expr` maps each to some case below. Ensuring that
211 /// each variant carries an appropriate payload has to be verified
214 /// (However, getting that *exactly* right is not so important
215 /// because the SVH is just a developer convenience; there is no
216 /// guarantee of collision-freedom, hash collisions are just
217 /// (hopefully) unlikely.)
219 pub enum SawExprComponent<'a> {
221 SawExprLoop(Option<token::InternedString>),
222 SawExprField(token::InternedString),
223 SawExprTupField(usize),
224 SawExprBreak(Option<token::InternedString>),
225 SawExprAgain(Option<token::InternedString>),
232 SawExprBinary(hir::BinOp_),
233 SawExprUnary(hir::UnOp),
234 SawExprLit(ast::Lit_),
242 SawExprAssignOp(hir::BinOp_),
245 SawExprPath(Option<usize>),
246 SawExprAddrOf(hir::Mutability),
248 SawExprInlineAsm(&'a hir::InlineAsm),
253 fn saw_expr<'a>(node: &'a Expr_) -> SawExprComponent<'a> {
255 ExprBox(..) => SawExprBox,
256 ExprVec(..) => SawExprVec,
257 ExprCall(..) => SawExprCall,
258 ExprMethodCall(..) => SawExprMethodCall,
259 ExprTup(..) => SawExprTup,
260 ExprBinary(op, _, _) => SawExprBinary(op.node),
261 ExprUnary(op, _) => SawExprUnary(op),
262 ExprLit(ref lit) => SawExprLit(lit.node.clone()),
263 ExprCast(..) => SawExprCast,
264 ExprIf(..) => SawExprIf,
265 ExprWhile(..) => SawExprWhile,
266 ExprLoop(_, id) => SawExprLoop(id.map(|id| id.name.as_str())),
267 ExprMatch(..) => SawExprMatch,
268 ExprClosure(..) => SawExprClosure,
269 ExprBlock(..) => SawExprBlock,
270 ExprAssign(..) => SawExprAssign,
271 ExprAssignOp(op, _, _) => SawExprAssignOp(op.node),
272 ExprField(_, name) => SawExprField(name.node.as_str()),
273 ExprTupField(_, id) => SawExprTupField(id.node),
274 ExprIndex(..) => SawExprIndex,
275 ExprRange(..) => SawExprRange,
276 ExprPath(ref qself, _) => SawExprPath(qself.as_ref().map(|q| q.position)),
277 ExprAddrOf(m, _) => SawExprAddrOf(m),
278 ExprBreak(id) => SawExprBreak(id.map(|id| id.node.name.as_str())),
279 ExprAgain(id) => SawExprAgain(id.map(|id| id.node.name.as_str())),
280 ExprRet(..) => SawExprRet,
281 ExprInlineAsm(ref asm) => SawExprInlineAsm(asm),
282 ExprStruct(..) => SawExprStruct,
283 ExprRepeat(..) => SawExprRepeat,
287 /// SawStmtComponent is analogous to SawExprComponent, but for statements.
289 pub enum SawStmtComponent {
295 fn saw_stmt(node: &Stmt_) -> SawStmtComponent {
297 StmtDecl(..) => SawStmtDecl,
298 StmtExpr(..) => SawStmtExpr,
299 StmtSemi(..) => SawStmtSemi,
303 impl<'a, 'v> Visitor<'v> for StrictVersionHashVisitor<'a> {
304 fn visit_struct_def(&mut self, s: &StructDef, name: Name,
305 g: &Generics, _: NodeId, _: Span) {
306 SawStructDef(name.as_str()).hash(self.st);
307 visit::walk_generics(self, g);
308 visit::walk_struct_def(self, s)
311 fn visit_variant(&mut self, v: &Variant, g: &Generics, item_id: NodeId) {
312 SawVariant.hash(self.st);
313 // walk_variant does not call walk_generics, so do it here.
314 visit::walk_generics(self, g);
315 visit::walk_variant(self, v, g, item_id)
318 // All of the remaining methods just record (in the hash
319 // SipHasher) that the visitor saw that particular variant
320 // (with its payload), and continue walking as the default
323 // Some of the implementations have some notes as to how one
324 // might try to make their SVH computation less discerning
325 // (e.g. by incorporating reachability analysis). But
326 // currently all of their implementations are uniform and
329 // (If you edit a method such that it deviates from the
330 // pattern, please move that method up above this comment.)
332 fn visit_name(&mut self, _: Span, name: Name) {
333 SawIdent(name.as_str()).hash(self.st);
336 fn visit_lifetime(&mut self, l: &Lifetime) {
337 SawLifetime(l.name.as_str()).hash(self.st);
340 fn visit_lifetime_def(&mut self, l: &LifetimeDef) {
341 SawLifetimeDef(l.lifetime.name.as_str()).hash(self.st);
344 // We do recursively walk the bodies of functions/methods
345 // (rather than omitting their bodies from the hash) since
346 // monomorphization and cross-crate inlining generally implies
347 // that a change to a crate body will require downstream
348 // crates to be recompiled.
349 fn visit_expr(&mut self, ex: &Expr) {
350 SawExpr(saw_expr(&ex.node)).hash(self.st); visit::walk_expr(self, ex)
353 fn visit_stmt(&mut self, s: &Stmt) {
354 SawStmt(saw_stmt(&s.node)).hash(self.st); visit::walk_stmt(self, s)
357 fn visit_foreign_item(&mut self, i: &ForeignItem) {
358 // FIXME (#14132) ideally we would incorporate privacy (or
359 // perhaps reachability) somewhere here, so foreign items
360 // that do not leak into downstream crates would not be
362 SawForeignItem.hash(self.st); visit::walk_foreign_item(self, i)
365 fn visit_item(&mut self, i: &Item) {
366 // FIXME (#14132) ideally would incorporate reachability
367 // analysis somewhere here, so items that never leak into
368 // downstream crates (e.g. via monomorphisation or
369 // inlining) would not be part of the ABI.
370 SawItem.hash(self.st); visit::walk_item(self, i)
373 fn visit_mod(&mut self, m: &Mod, _s: Span, _n: NodeId) {
374 SawMod.hash(self.st); visit::walk_mod(self, m)
377 fn visit_decl(&mut self, d: &Decl) {
378 SawDecl.hash(self.st); visit::walk_decl(self, d)
381 fn visit_ty(&mut self, t: &Ty) {
382 SawTy.hash(self.st); visit::walk_ty(self, t)
385 fn visit_generics(&mut self, g: &Generics) {
386 SawGenerics.hash(self.st); visit::walk_generics(self, g)
389 fn visit_fn(&mut self, fk: FnKind<'v>, fd: &'v FnDecl,
390 b: &'v Block, s: Span, _: NodeId) {
391 SawFn.hash(self.st); visit::walk_fn(self, fk, fd, b, s)
394 fn visit_trait_item(&mut self, ti: &TraitItem) {
395 SawTraitItem.hash(self.st); visit::walk_trait_item(self, ti)
398 fn visit_impl_item(&mut self, ii: &ImplItem) {
399 SawImplItem.hash(self.st); visit::walk_impl_item(self, ii)
402 fn visit_struct_field(&mut self, s: &StructField) {
403 SawStructField.hash(self.st); visit::walk_struct_field(self, s)
406 fn visit_explicit_self(&mut self, es: &ExplicitSelf) {
407 SawExplicitSelf.hash(self.st); visit::walk_explicit_self(self, es)
410 fn visit_path(&mut self, path: &Path, _: ast::NodeId) {
411 SawPath.hash(self.st); visit::walk_path(self, path)
414 fn visit_path_list_item(&mut self, prefix: &Path, item: &'v PathListItem) {
415 SawPath.hash(self.st); visit::walk_path_list_item(self, prefix, item)
418 fn visit_block(&mut self, b: &Block) {
419 SawBlock.hash(self.st); visit::walk_block(self, b)
422 fn visit_pat(&mut self, p: &Pat) {
423 SawPat.hash(self.st); visit::walk_pat(self, p)
426 fn visit_local(&mut self, l: &Local) {
427 SawLocal.hash(self.st); visit::walk_local(self, l)
430 fn visit_arm(&mut self, a: &Arm) {
431 SawArm.hash(self.st); visit::walk_arm(self, a)