From 82b5f1d8690ec538557ce3f355add91e3809ba51 Mon Sep 17 00:00:00 2001 From: Michael Woerister Date: Tue, 1 Mar 2016 08:18:21 -0500 Subject: [PATCH] Remove old symbol naming code. --- src/librustc_trans/back/link.rs | 144 ------------------------ src/librustc_trans/back/symbol_names.rs | 92 ++++++++++++++- 2 files changed, 88 insertions(+), 148 deletions(-) diff --git a/src/librustc_trans/back/link.rs b/src/librustc_trans/back/link.rs index 2a9bd6618a3..a8e83bdb6b7 100644 --- a/src/librustc_trans/back/link.rs +++ b/src/librustc_trans/back/link.rs @@ -22,9 +22,7 @@ use session::Session; use middle::cstore::{self, CrateStore, LinkMeta}; use middle::cstore::{LinkagePreference, NativeLibraryKind}; -use middle::def_id::DefId; use middle::dependency_format::Linkage; -use middle::ty::TyCtxt; use trans::CrateTranslation; use util::common::time; use util::fs::fix_windows_verbatim_for_gcc; @@ -43,7 +41,6 @@ use flate; use syntax::ast; use syntax::codemap::Span; -use syntax::parse::token::InternedString; use syntax::attr::AttrMetaMethods; use rustc_front::hir; @@ -78,58 +75,6 @@ RLIB_BYTECODE_OBJECT_V1_DATASIZE_OFFSET + 8; -/* - * Name mangling and its relationship to metadata. This is complex. Read - * carefully. - * - * The semantic model of Rust linkage is, broadly, that "there's no global - * namespace" between crates. Our aim is to preserve the illusion of this - * model despite the fact that it's not *quite* possible to implement on - * modern linkers. We initially didn't use system linkers at all, but have - * been convinced of their utility. - * - * There are a few issues to handle: - * - * - Linkers operate on a flat namespace, so we have to flatten names. - * We do this using the C++ namespace-mangling technique. Foo::bar - * symbols and such. - * - * - Symbols with the same name but different types need to get different - * linkage-names. We do this by hashing a string-encoding of the type into - * a fixed-size (currently 16-byte hex) cryptographic hash function (CHF: - * we use SHA256) to "prevent collisions". This is not airtight but 16 hex - * digits on uniform probability means you're going to need 2**32 same-name - * symbols in the same process before you're even hitting birthday-paradox - * collision probability. - * - * - Symbols in different crates but with same names "within" the crate need - * to get different linkage-names. - * - * - The hash shown in the filename needs to be predictable and stable for - * build tooling integration. It also needs to be using a hash function - * which is easy to use from Python, make, etc. - * - * So here is what we do: - * - * - Consider the package id; every crate has one (specified with crate_id - * attribute). If a package id isn't provided explicitly, we infer a - * versionless one from the output name. The version will end up being 0.0 - * in this case. CNAME and CVERS are taken from this package id. For - * example, github.com/mozilla/CNAME#CVERS. - * - * - Define CMH as SHA256(crateid). - * - * - Define CMH8 as the first 8 characters of CMH. - * - * - Compile our crate to lib CNAME-CMH8-CVERS.so - * - * - Define STH(sym) as SHA256(CMH, type_str(sym)) - * - * - Suffix a mangled sym with ::STH@CVERS, so that it is unique in the - * name, non-name metadata, and type sense, and versioned in the way - * system linkers understand. - */ - pub fn find_crate_name(sess: Option<&Session>, attrs: &[ast::Attribute], input: &Input) -> String { @@ -191,95 +136,6 @@ pub fn build_link_meta(sess: &Session, return r; } -pub fn def_to_string(_tcx: &TyCtxt, did: DefId) -> String { - format!("{}:{}", did.krate, did.index.as_usize()) -} - -// Name sanitation. LLVM will happily accept identifiers with weird names, but -// gas doesn't! -// gas accepts the following characters in symbols: a-z, A-Z, 0-9, ., _, $ -pub fn sanitize(s: &str) -> String { - let mut result = String::new(); - for c in s.chars() { - match c { - // Escape these with $ sequences - '@' => result.push_str("$SP$"), - '*' => result.push_str("$BP$"), - '&' => result.push_str("$RF$"), - '<' => result.push_str("$LT$"), - '>' => result.push_str("$GT$"), - '(' => result.push_str("$LP$"), - ')' => result.push_str("$RP$"), - ',' => result.push_str("$C$"), - - // '.' doesn't occur in types and functions, so reuse it - // for ':' and '-' - '-' | ':' => result.push('.'), - - // These are legal symbols - 'a' ... 'z' - | 'A' ... 'Z' - | '0' ... '9' - | '_' | '.' | '$' => result.push(c), - - _ => { - result.push('$'); - for c in c.escape_unicode().skip(1) { - match c { - '{' => {}, - '}' => result.push('$'), - c => result.push(c), - } - } - } - } - } - - // Underscore-qualify anything that didn't start as an ident. - if !result.is_empty() && - result.as_bytes()[0] != '_' as u8 && - ! (result.as_bytes()[0] as char).is_xid_start() { - return format!("_{}", &result[..]); - } - - return result; -} - -pub fn mangle>(path: PI, hash: Option<&str>) -> String { - // Follow C++ namespace-mangling style, see - // http://en.wikipedia.org/wiki/Name_mangling for more info. - // - // It turns out that on OSX you can actually have arbitrary symbols in - // function names (at least when given to LLVM), but this is not possible - // when using unix's linker. Perhaps one day when we just use a linker from LLVM - // we won't need to do this name mangling. The problem with name mangling is - // that it seriously limits the available characters. For example we can't - // have things like &T in symbol names when one would theoretically - // want them for things like impls of traits on that type. - // - // To be able to work on all platforms and get *some* reasonable output, we - // use C++ name-mangling. - - let mut n = String::from("_ZN"); // _Z == Begin name-sequence, N == nested - - fn push(n: &mut String, s: &str) { - let sani = sanitize(s); - n.push_str(&format!("{}{}", sani.len(), sani)); - } - - // First, connect each component with pairs. - for data in path { - push(&mut n, &data); - } - - if let Some(s) = hash { - push(&mut n, s) - } - - n.push('E'); // End name-sequence. - n -} - pub fn get_linker(sess: &Session) -> (String, Command) { if let Some(ref linker) = sess.opts.cg.linker { (linker.clone(), Command::new(linker)) diff --git a/src/librustc_trans/back/symbol_names.rs b/src/librustc_trans/back/symbol_names.rs index 6edf8db9bf7..7f42cc673c7 100644 --- a/src/librustc_trans/back/symbol_names.rs +++ b/src/librustc_trans/back/symbol_names.rs @@ -107,9 +107,8 @@ use std::fmt::Write; use syntax::ast; -use syntax::parse::token; +use syntax::parse::token::{self, InternedString}; use serialize::hex::ToHex; -use super::link; pub fn def_id_to_string<'tcx>(tcx: &ty::TyCtxt<'tcx>, def_id: DefId) -> String { @@ -207,7 +206,7 @@ fn exported_name_with_opt_suffix<'a, 'tcx>(ccx: &CrateContext<'a, 'tcx>, path.push(token::intern_and_get_ident(suffix)); } - link::mangle(path.into_iter(), Some(&hash[..])) + mangle(path.into_iter(), Some(&hash[..])) } pub fn exported_name<'a, 'tcx>(ccx: &CrateContext<'a, 'tcx>, @@ -232,5 +231,90 @@ pub fn internal_name_from_type_and_suffix<'a, 'tcx>(ccx: &CrateContext<'a, 'tcx> let path = [token::intern(&t.to_string()).as_str(), gensym_name(suffix).as_str()]; let hash = get_symbol_hash(ccx, &Vec::new(), cstore::LOCAL_CRATE, &[t]); - link::mangle(path.iter().cloned(), Some(&hash[..])) + mangle(path.iter().cloned(), Some(&hash[..])) +} + +// Name sanitation. LLVM will happily accept identifiers with weird names, but +// gas doesn't! +// gas accepts the following characters in symbols: a-z, A-Z, 0-9, ., _, $ +pub fn sanitize(s: &str) -> String { + let mut result = String::new(); + for c in s.chars() { + match c { + // Escape these with $ sequences + '@' => result.push_str("$SP$"), + '*' => result.push_str("$BP$"), + '&' => result.push_str("$RF$"), + '<' => result.push_str("$LT$"), + '>' => result.push_str("$GT$"), + '(' => result.push_str("$LP$"), + ')' => result.push_str("$RP$"), + ',' => result.push_str("$C$"), + + // '.' doesn't occur in types and functions, so reuse it + // for ':' and '-' + '-' | ':' => result.push('.'), + + // These are legal symbols + 'a' ... 'z' + | 'A' ... 'Z' + | '0' ... '9' + | '_' | '.' | '$' => result.push(c), + + _ => { + result.push('$'); + for c in c.escape_unicode().skip(1) { + match c { + '{' => {}, + '}' => result.push('$'), + c => result.push(c), + } + } + } + } + } + + // Underscore-qualify anything that didn't start as an ident. + if !result.is_empty() && + result.as_bytes()[0] != '_' as u8 && + ! (result.as_bytes()[0] as char).is_xid_start() { + return format!("_{}", &result[..]); + } + + return result; +} + +pub fn mangle>(path: PI, hash: Option<&str>) -> String { + // Follow C++ namespace-mangling style, see + // http://en.wikipedia.org/wiki/Name_mangling for more info. + // + // It turns out that on OSX you can actually have arbitrary symbols in + // function names (at least when given to LLVM), but this is not possible + // when using unix's linker. Perhaps one day when we just use a linker from LLVM + // we won't need to do this name mangling. The problem with name mangling is + // that it seriously limits the available characters. For example we can't + // have things like &T in symbol names when one would theoretically + // want them for things like impls of traits on that type. + // + // To be able to work on all platforms and get *some* reasonable output, we + // use C++ name-mangling. + + let mut n = String::from("_ZN"); // _Z == Begin name-sequence, N == nested + + fn push(n: &mut String, s: &str) { + let sani = sanitize(s); + n.push_str(&format!("{}{}", sani.len(), sani)); + } + + // First, connect each component with pairs. + for data in path { + push(&mut n, &data); + } + + if let Some(s) = hash { + push(&mut n, s) + } + + n.push('E'); // End name-sequence. + n } -- 2.44.0