diff --git a/compiler/rustc_interface/src/tests.rs b/compiler/rustc_interface/src/tests.rs index 04a7714d4137e..546cc4d0597f7 100644 --- a/compiler/rustc_interface/src/tests.rs +++ b/compiler/rustc_interface/src/tests.rs @@ -8,7 +8,8 @@ use rustc_session::config::{ FunctionReturn, InliningThreshold, Input, InstrumentCoverage, InstrumentXRay, LinkSelfContained, LinkerPluginLto, LocationDetail, LtoCli, MirSpanview, NextSolverConfig, OomStrategy, Options, OutFileName, OutputType, OutputTypes, PAuthKey, PacRet, Passes, Polonius, - ProcMacroExecutionStrategy, Strip, SwitchWithOptPath, SymbolManglingVersion, WasiExecModel, + ProcMacroExecutionStrategy, Strip, SwitchWithOptPath, SymbolManglingDigest, + SymbolManglingVersion, WasiExecModel, }; use rustc_session::lint::Level; use rustc_session::search_paths::SearchPath; @@ -702,6 +703,7 @@ fn test_unstable_options_tracking_hash() { untracked!(self_profile_events, Some(vec![String::new()])); untracked!(span_debug, true); untracked!(span_free_formats, true); + untracked!(symbol_mangling_digest, SymbolManglingDigest::new()); untracked!(temps_dir, Some(String::from("abc"))); untracked!(threads, 99); untracked!(time_llvm_passes, true); diff --git a/compiler/rustc_session/src/config.rs b/compiler/rustc_session/src/config.rs index 0c21e4eb43e78..18004dbfff09c 100644 --- a/compiler/rustc_session/src/config.rs +++ b/compiler/rustc_session/src/config.rs @@ -21,6 +21,7 @@ use rustc_target::abi::Align; use rustc_target::spec::LinkSelfContainedComponents; use rustc_target::spec::{PanicStrategy, RelocModel, SanitizerSet, SplitDebuginfo}; use rustc_target::spec::{Target, TargetTriple, TargetWarnings, TARGETS}; +use std::cmp::PartialEq; use std::collections::btree_map::{ Iter as BTreeMapIter, Keys as BTreeMapKeysIter, Values as BTreeMapValuesIter, }; @@ -28,7 +29,7 @@ use std::collections::{BTreeMap, BTreeSet}; use std::ffi::OsStr; use std::fmt; use std::fs; -use std::hash::Hash; +use std::hash::{Hash, Hasher}; use std::iter; use std::path::{Path, PathBuf}; use std::str::{self, FromStr}; @@ -364,6 +365,127 @@ pub enum SymbolManglingVersion { V0, } +#[derive(Clone, Debug)] +pub struct SymbolManglingDigest { + fulls: FxHashSet, + prefixes: Vec, + salt: String, + level: u8, + excluded: bool, +} + +impl SymbolManglingDigest { + pub fn new() -> Self { + Self { + fulls: FxHashSet::default(), + prefixes: Vec::new(), + salt: String::new(), + level: 2, + excluded: false, + } + } + + pub fn enabled(&self) -> bool { + !self.fulls.is_empty() || !self.prefixes.is_empty() || self.excluded + } + + pub fn hasher_enable(&mut self, args: &str) -> bool { + let cloned = self.clone(); + if self.hasher_reinit(args) { + return true; + } + self.fulls = cloned.fulls; + self.prefixes = cloned.prefixes; + self.level = cloned.level; + self.salt = cloned.salt; + self.excluded = cloned.excluded; + false + } + + pub fn hasher_args(&self) -> (&str, u8) { + (&self.salt, self.level) + } + + pub fn hasher_contains(&self, val: &str) -> bool { + if self.fulls.contains(val) { + return self.excluded ^ true; + } + for prefix in self.prefixes.iter() { + if val.starts_with(prefix) { + return self.excluded ^ true; + } + } + self.excluded ^ false + } + + fn hasher_reinit(&mut self, args: &str) -> bool { + for arg in args.split(',') { + let mut it = arg.split('='); + let Some(name) = it.next() else { + continue; + }; + if let Some(value) = it.next() { + match name { + "salt" => self.salt = value.to_string(), + "level" => match value { + "1" => self.level = 1, + "2" => self.level = 2, + _ => return false, + }, + "excluded" => match value { + "true" => self.excluded = true, + "false" => self.excluded = false, + _ => return false, + }, + _ => return false, + } + } else if name.ends_with("*") { + let _ = self.prefixes.push(name[..name.len() - 1].to_string()); + } else { + let _ = self.fulls.insert(name.to_string()); + } + } + true + } + + fn to_vec(&self) -> Vec<&str> { + let mut ret = Vec::with_capacity(self.fulls.len() + self.prefixes.len()); + #[allow(rustc::potential_query_instability)] + self.fulls.iter().for_each(|val| ret.push(val.as_str())); + ret.sort(); + self.prefixes.iter().for_each(|val| ret.push(val.as_str())); + ret[self.fulls.len()..].sort(); + ret + } +} + +impl Hash for SymbolManglingDigest { + fn hash(&self, hasher: &mut H) + where + H: Hasher, + { + for val in self.to_vec() { + val.hash(hasher); + } + self.fulls.len().hash(hasher); + self.prefixes.len().hash(hasher); + self.salt.hash(hasher); + self.level.hash(hasher); + self.excluded.hash(hasher); + } +} + +impl PartialEq for SymbolManglingDigest { + fn eq(&self, other: &Self) -> bool { + self.excluded == other.excluded + && self.level == other.level + && self.salt == other.salt + && self.fulls.len() == other.fulls.len() + && self.prefixes.len() == other.prefixes.len() + && self.to_vec() == other.to_vec() + } +} + #[derive(Clone, Copy, Debug, PartialEq, Hash)] pub enum DebugInfo { None, @@ -2763,6 +2885,12 @@ pub fn build_session_options(early_dcx: &mut EarlyDiagCtxt, matches: &getopts::M ); } + if unstable_opts.symbol_mangling_digest.enabled() { + early_dcx.early_error( + "option `-C instrument-coverage` is not compatible with `-Z symbol_mangling_digest`" + ); + } + // `-C instrument-coverage` implies `-C symbol-mangling-version=v0` - to ensure consistent // and reversible name mangling. Note, LLVM coverage tools can analyze coverage over // multiple runs, including some changes to source code; so mangled names must be consistent @@ -3226,8 +3354,8 @@ pub(crate) mod dep_tracking { ErrorOutputType, FunctionReturn, InliningThreshold, InstrumentCoverage, InstrumentXRay, LinkerPluginLto, LocationDetail, LtoCli, NextSolverConfig, OomStrategy, OptLevel, OutFileName, OutputType, OutputTypes, Polonius, RemapPathScopeComponents, ResolveDocLinks, - SourceFileHashAlgorithm, SplitDwarfKind, SwitchWithOptPath, SymbolManglingVersion, - TrimmedDefPaths, WasiExecModel, + SourceFileHashAlgorithm, SplitDwarfKind, SwitchWithOptPath, SymbolManglingDigest, + SymbolManglingVersion, TrimmedDefPaths, WasiExecModel, }; use crate::lint; use crate::utils::NativeLib; @@ -3319,6 +3447,7 @@ pub(crate) mod dep_tracking { SplitDwarfKind, StackProtector, SwitchWithOptPath, + SymbolManglingDigest, SymbolManglingVersion, RemapPathScopeComponents, SourceFileHashAlgorithm, diff --git a/compiler/rustc_session/src/options.rs b/compiler/rustc_session/src/options.rs index 06b554e8e63b1..4504eb2fcb48a 100644 --- a/compiler/rustc_session/src/options.rs +++ b/compiler/rustc_session/src/options.rs @@ -405,6 +405,7 @@ mod desc { pub const parse_switch_with_opt_path: &str = "an optional path to the profiling data output directory"; pub const parse_merge_functions: &str = "one of: `disabled`, `trampolines`, or `aliases`"; + pub const parse_symbol_mangling_digest: &str = "configuing parameters for shortening symbol names: `[*],...[,excluded=true|false][,salt=value][,level=1|2]`"; pub const parse_symbol_mangling_version: &str = "either `legacy` or `v0` (RFC 2603)"; pub const parse_src_file_hash: &str = "either `md5` or `sha1`"; pub const parse_relocation_model: &str = @@ -1206,6 +1207,16 @@ mod parse { true } + pub(crate) fn parse_symbol_mangling_digest( + slot: &mut SymbolManglingDigest, + v: Option<&str>, + ) -> bool { + if let Some(v) = v { + return slot.hasher_enable(v); + } + true + } + pub(crate) fn parse_src_file_hash( slot: &mut Option, v: Option<&str>, @@ -1905,6 +1916,8 @@ written to standard error output)"), "prefer dynamic linking to static linking for staticlibs (default: no)"), strict_init_checks: bool = (false, parse_bool, [TRACKED], "control if mem::uninitialized and mem::zeroed panic on more UB"), + symbol_mangling_digest: SymbolManglingDigest = (SymbolManglingDigest::new(), parse_symbol_mangling_digest, [UNTRACKED], + "configuring parameters for shortening symbol names(default: disable)"), #[rustc_lint_opt_deny_field_access("use `Session::teach` instead of this field")] teach: bool = (false, parse_bool, [TRACKED], "show extended diagnostic help (default: no)"), diff --git a/compiler/rustc_symbol_mangling/src/digest.rs b/compiler/rustc_symbol_mangling/src/digest.rs new file mode 100644 index 0000000000000..891e37bf39a90 --- /dev/null +++ b/compiler/rustc_symbol_mangling/src/digest.rs @@ -0,0 +1,43 @@ +use rustc_data_structures::stable_hasher::{Hash64, HashStable, StableHasher}; +use rustc_hir::def_id::DefId; +use rustc_middle::ty::TyCtxt; + +pub(super) fn generate<'tcx>(tcx: TyCtxt<'tcx>, symbol: String, def_id: DefId) -> String { + let crate_name = tcx.crate_name(def_id.krate); + let crate_name = crate_name.as_str(); + let symbol_mangling_digest = &tcx.sess.opts.unstable_opts.symbol_mangling_digest; + if !symbol_mangling_digest.hasher_contains(crate_name) { + return symbol; + } + + let (salt, level) = symbol_mangling_digest.hasher_args(); + + let hash = tcx.with_stable_hashing_context(|mut hcx| { + let mut hasher = StableHasher::new(); + symbol.hash_stable(&mut hcx, &mut hasher); + salt.hash_stable(&mut hcx, &mut hasher); + hasher.finish::().as_u64() + }); + + match level { + 1 => encode_1(tcx, crate_name, hash, def_id), + _ => encode_2(tcx, crate_name, hash, def_id), + } +} + +fn encode_1<'tcx>(tcx: TyCtxt<'tcx>, crate_name: &str, hash: u64, def_id: DefId) -> String { + if let Some(item_name) = tcx.opt_item_name(def_id) { + let item_name = item_name.as_str(); + format!( + "_ZN{}{crate_name}.{item_name}.{:08x}E", + crate_name.len() + item_name.len() + 11, + hash >> 8 + ) + } else { + encode_2(tcx, crate_name, hash, def_id) + } +} + +fn encode_2<'tcx>(_tcx: TyCtxt<'tcx>, crate_name: &str, hash: u64, _def_id: DefId) -> String { + format!("_ZN{}{crate_name}.{hash:016x}E", crate_name.len() + 18) +} diff --git a/compiler/rustc_symbol_mangling/src/lib.rs b/compiler/rustc_symbol_mangling/src/lib.rs index 8c035ba948b15..44aff72c89dba 100644 --- a/compiler/rustc_symbol_mangling/src/lib.rs +++ b/compiler/rustc_symbol_mangling/src/lib.rs @@ -111,6 +111,7 @@ use rustc_middle::query::Providers; use rustc_middle::ty::{self, Instance, TyCtxt}; use rustc_session::config::SymbolManglingVersion; +mod digest; mod legacy; mod v0; @@ -267,6 +268,12 @@ fn compute_symbol_name<'tcx>( SymbolManglingVersion::V0 => v0::mangle(tcx, instance, instantiating_crate), }; + let symbol = if tcx.sess.opts.unstable_opts.symbol_mangling_digest.enabled() { + digest::generate(tcx, symbol, def_id) + } else { + symbol + }; + debug_assert!( rustc_demangle::try_demangle(&symbol).is_ok(), "compute_symbol_name: `{symbol}` cannot be demangled" diff --git a/src/doc/unstable-book/src/compiler-flags/symbol_mangling_digest.md b/src/doc/unstable-book/src/compiler-flags/symbol_mangling_digest.md new file mode 100644 index 0000000000000..a241a73be8a86 --- /dev/null +++ b/src/doc/unstable-book/src/compiler-flags/symbol_mangling_digest.md @@ -0,0 +1,10 @@ +# `symbol_mangling_digest` + +An optimization option is added to allow users to generate shorter symbol names for dylib. At the expense of commissioning capabilities such as readability of symbol names, this option eliminates the space bottlenech encountered by using Rust to replace existing C/C++ functional modules in resource-constrained scenarios. + +The new option are defined as follows: `-Z symbol_mangling_digest=[*],...[,excluded=][,salt=][,level=<1|2>]`. + +- `crate_name[*],...`: Name of a crate. Multiple crate names are allowd. If the suffix `*` is carried, it is the prefix of the crate name. It and `excluded` togeter determine the range of symbols to be optimized. User must be very clear about the optimization range. If the crate supports regular expression maching, the optimization range is difficult to determine. May cause confusion. Defaults to null. +- `excluded=`: If the value is `false`, only the names of symbols whose crate names are successfully matched are optimized. If the value is `true`, it indicates that the name of the symbol that fails to be matched is optimized. the default value is `false`. +- `salt=`: User-specified salt value used in hash calculation. The default value is null. +- `level=<1|2>`: Specifies the combination policy of the final symbol name. If the value is `1`, the final combination format is `{crate}.{item}.{hash32}`. If the value is `2`, the final combination format is `{crate}.{hash64}`. The default value is `2`. diff --git a/tests/run-make/symbol-mangling-digest/Makefile b/tests/run-make/symbol-mangling-digest/Makefile new file mode 100644 index 0000000000000..e42d49f3d4775 --- /dev/null +++ b/tests/run-make/symbol-mangling-digest/Makefile @@ -0,0 +1,5 @@ +include ../tools.mk + +all: + $(RUSTC) -C prefer-dynamic -Z symbol_mangling_digest=foo foo.rs + $(RUSTC) -C prefer-dynamic -Z symbol_mangling_digest=foo --extern foo=$(TMPDIR)/libfoo.so bar.rs diff --git a/tests/run-make/symbol-mangling-digest/bar.rs b/tests/run-make/symbol-mangling-digest/bar.rs new file mode 100644 index 0000000000000..a9832a09a996f --- /dev/null +++ b/tests/run-make/symbol-mangling-digest/bar.rs @@ -0,0 +1,7 @@ +#![crate_type = "bin"] + +extern crate foo; + +fn main() { + foo::foo(); +} diff --git a/tests/run-make/symbol-mangling-digest/foo.rs b/tests/run-make/symbol-mangling-digest/foo.rs new file mode 100644 index 0000000000000..20e6d723729c0 --- /dev/null +++ b/tests/run-make/symbol-mangling-digest/foo.rs @@ -0,0 +1,4 @@ +#![crate_type = "dylib"] +pub fn foo() { + println!("hello foo"); +}