Skip to content

Commit

Permalink
MCP #705: Provide the option `-Csymbol-mangling-version=hashed -Z uns…
Browse files Browse the repository at this point in the history
…table-options` to shorten symbol names by replacing them with a digest.

Enrich test cases
  • Loading branch information
h1467792822 committed Jan 25, 2024
1 parent 7ffc697 commit d2163b9
Show file tree
Hide file tree
Showing 15 changed files with 210 additions and 49 deletions.
15 changes: 15 additions & 0 deletions compiler/rustc_session/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,7 @@ impl SwitchWithOptPath {
pub enum SymbolManglingVersion {
Legacy,
V0,
Hashed,
}

#[derive(Clone, Copy, Debug, PartialEq, Hash)]
Expand Down Expand Up @@ -2692,6 +2693,7 @@ pub fn build_session_options(early_dcx: &mut EarlyDiagCtxt, matches: &getopts::M
match cg.symbol_mangling_version {
// Stable values:
None | Some(SymbolManglingVersion::V0) => {}

// Unstable values:
Some(SymbolManglingVersion::Legacy) => {
if !unstable_opts.unstable_options {
Expand All @@ -2700,6 +2702,13 @@ pub fn build_session_options(early_dcx: &mut EarlyDiagCtxt, matches: &getopts::M
);
}
}
Some(SymbolManglingVersion::Hashed) => {
if !unstable_opts.unstable_options {
early_dcx.early_fatal(
"`-C symbol-mangling-version=hashed` requires `-Z unstable-options`",
);
}
}
}

// Check for unstable values of `-C instrument-coverage`.
Expand Down Expand Up @@ -2741,6 +2750,12 @@ pub fn build_session_options(early_dcx: &mut EarlyDiagCtxt, matches: &getopts::M
);
}
Some(SymbolManglingVersion::V0) => {}
Some(SymbolManglingVersion::Hashed) => {
early_dcx.early_warn(
"-C instrument-coverage requires symbol mangling version `v0`, \
but `-C symbol-mangling-version=hashed` was specified",
);
}
}
}

Expand Down
6 changes: 4 additions & 2 deletions compiler/rustc_session/src/options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,8 @@ mod desc {
pub const parse_switch_with_opt_path: &str =
"an optional path to the profiling data output directory";
pub const parse_merge_functions: &str = "one of: `disabled`, `trampolines`, or `aliases`";
pub const parse_symbol_mangling_version: &str = "either `legacy` or `v0` (RFC 2603)";
pub const parse_symbol_mangling_version: &str =
"one of: `legacy`, `v0` (RFC 2603), or `hashed`";
pub const parse_src_file_hash: &str = "either `md5` or `sha1`";
pub const parse_relocation_model: &str =
"one of supported relocation models (`rustc --print relocation-models`)";
Expand Down Expand Up @@ -1180,6 +1181,7 @@ mod parse {
*slot = match v {
Some("legacy") => Some(SymbolManglingVersion::Legacy),
Some("v0") => Some(SymbolManglingVersion::V0),
Some("hashed") => Some(SymbolManglingVersion::Hashed),
_ => return false,
};
true
Expand Down Expand Up @@ -1504,7 +1506,7 @@ options! {
"tell the linker which information to strip (`none` (default), `debuginfo` or `symbols`)"),
symbol_mangling_version: Option<SymbolManglingVersion> = (None,
parse_symbol_mangling_version, [TRACKED],
"which mangling version to use for symbol names ('legacy' (default) or 'v0')"),
"which mangling version to use for symbol names ('legacy' (default), 'v0', or 'hashed')"),
target_cpu: Option<String> = (None, parse_opt_string, [TRACKED],
"select target processor (`rustc --print target-cpus` for details)"),
target_feature: String = (String::new(), parse_target_feature, [TRACKED],
Expand Down
43 changes: 43 additions & 0 deletions compiler/rustc_symbol_mangling/src/hashed.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
use crate::v0;
use rustc_data_structures::stable_hasher::{Hash64, HashStable, StableHasher};
use rustc_hir::def_id::CrateNum;
use rustc_middle::ty::{Instance, TyCtxt};

use std::fmt::Write;

pub(super) fn mangle<'tcx>(
tcx: TyCtxt<'tcx>,
instance: Instance<'tcx>,
instantiating_crate: Option<CrateNum>,
full_mangling_name: impl FnOnce() -> String,
) -> String {
// The symbol of a generic function may be scattered in multiple downstream dylibs.
// If the symbol of a generic function still contains `crate name`, hash conflicts between the
// generic funcion and other symbols of the same `crate` cannot be detected in time during
// construction. This symbol conflict is left over until it occurs during run time.
// In this case, `instantiating-crate name` is used to replace `crate name` can completely
// eliminate the risk of the preceding potential hash conflict.
let crate_num =
if let Some(krate) = instantiating_crate { krate } else { instance.def_id().krate };

let mut symbol = "_RNxC".to_string();
v0::push_ident(tcx.crate_name(crate_num).as_str(), &mut symbol);

let hash = tcx.with_stable_hashing_context(|mut hcx| {
let mut hasher = StableHasher::new();
full_mangling_name().hash_stable(&mut hcx, &mut hasher);
hasher.finish::<Hash64>().as_u64()
});

push_hash64(hash, &mut symbol);

symbol
}

// The hash is encoded based on `base-62` and the final terminator `_` is removed because it does
// not help prevent hash collisions
fn push_hash64(hash: u64, output: &mut String) {
let hash = v0::encode_integer_62(hash);
let hash_len = hash.len();
let _ = write!(output, "{hash_len}H{}", &hash[..hash_len - 1]);
}
4 changes: 4 additions & 0 deletions compiler/rustc_symbol_mangling/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ use rustc_middle::query::Providers;
use rustc_middle::ty::{self, Instance, TyCtxt};
use rustc_session::config::SymbolManglingVersion;

mod hashed;
mod legacy;
mod v0;

Expand Down Expand Up @@ -265,6 +266,9 @@ fn compute_symbol_name<'tcx>(
let symbol = match mangling_version {
SymbolManglingVersion::Legacy => legacy::mangle(tcx, instance, instantiating_crate),
SymbolManglingVersion::V0 => v0::mangle(tcx, instance, instantiating_crate),
SymbolManglingVersion::Hashed => hashed::mangle(tcx, instance, instantiating_crate, || {
v0::mangle(tcx, instance, instantiating_crate)
}),
};

debug_assert!(
Expand Down
104 changes: 61 additions & 43 deletions compiler/rustc_symbol_mangling/src/v0.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,10 +116,7 @@ impl<'tcx> SymbolMangler<'tcx> {
/// * `x > 0` is encoded as `x - 1` in base 62, followed by `"_"`,
/// e.g. `1` becomes `"0_"`, `62` becomes `"Z_"`, etc.
fn push_integer_62(&mut self, x: u64) {
if let Some(x) = x.checked_sub(1) {
base_n::push_str(x as u128, 62, &mut self.out);
}
self.push("_");
push_integer_62(x, &mut self.out)
}

/// Push a `tag`-prefixed base 62 integer, when larger than `0`, that is:
Expand All @@ -138,45 +135,7 @@ impl<'tcx> SymbolMangler<'tcx> {
}

fn push_ident(&mut self, ident: &str) {
let mut use_punycode = false;
for b in ident.bytes() {
match b {
b'_' | b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' => {}
0x80..=0xff => use_punycode = true,
_ => bug!("symbol_names: bad byte {} in ident {:?}", b, ident),
}
}

let punycode_string;
let ident = if use_punycode {
self.push("u");

// FIXME(eddyb) we should probably roll our own punycode implementation.
let mut punycode_bytes = match punycode::encode(ident) {
Ok(s) => s.into_bytes(),
Err(()) => bug!("symbol_names: punycode encoding failed for ident {:?}", ident),
};

// Replace `-` with `_`.
if let Some(c) = punycode_bytes.iter_mut().rfind(|&&mut c| c == b'-') {
*c = b'_';
}

// FIXME(eddyb) avoid rechecking UTF-8 validity.
punycode_string = String::from_utf8(punycode_bytes).unwrap();
&punycode_string
} else {
ident
};

let _ = write!(self.out, "{}", ident.len());

// Write a separating `_` if necessary (leading digit or `_`).
if let Some('_' | '0'..='9') = ident.chars().next() {
self.push("_");
}

self.push(ident);
push_ident(ident, &mut self.out)
}

fn path_append_ns(
Expand Down Expand Up @@ -836,3 +795,62 @@ impl<'tcx> Printer<'tcx> for SymbolMangler<'tcx> {
Ok(())
}
}
/// Push a `_`-terminated base 62 integer, using the format
/// specified in the RFC as `<base-62-number>`, that is:
/// * `x = 0` is encoded as just the `"_"` terminator
/// * `x > 0` is encoded as `x - 1` in base 62, followed by `"_"`,
/// e.g. `1` becomes `"0_"`, `62` becomes `"Z_"`, etc.
pub(crate) fn push_integer_62(x: u64, output: &mut String) {
if let Some(x) = x.checked_sub(1) {
base_n::push_str(x as u128, 62, output);
}
output.push('_');
}

pub(crate) fn encode_integer_62(x: u64) -> String {
let mut output = String::new();
push_integer_62(x, &mut output);
output
}

pub(crate) fn push_ident(ident: &str, output: &mut String) {
let mut use_punycode = false;
for b in ident.bytes() {
match b {
b'_' | b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' => {}
0x80..=0xff => use_punycode = true,
_ => bug!("symbol_names: bad byte {} in ident {:?}", b, ident),
}
}

let punycode_string;
let ident = if use_punycode {
output.push('u');

// FIXME(eddyb) we should probably roll our own punycode implementation.
let mut punycode_bytes = match punycode::encode(ident) {
Ok(s) => s.into_bytes(),
Err(()) => bug!("symbol_names: punycode encoding failed for ident {:?}", ident),
};

// Replace `-` with `_`.
if let Some(c) = punycode_bytes.iter_mut().rfind(|&&mut c| c == b'-') {
*c = b'_';
}

// FIXME(eddyb) avoid rechecking UTF-8 validity.
punycode_string = String::from_utf8(punycode_bytes).unwrap();
&punycode_string
} else {
ident
};

let _ = write!(output, "{}", ident.len());

// Write a separating `_` if necessary (leading digit or `_`).
if let Some('_' | '0'..='9') = ident.chars().next() {
output.push('_');
}

output.push_str(ident);
}
47 changes: 47 additions & 0 deletions tests/run-make/symbol-mangling-hashed/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
include ../tools.mk

# only-linux
# only-x86_64

NM=nm -D
RLIB_NAME=liba_rlib.rlib
DYLIB_NAME=liba_dylib.so
SO_NAME=libb_dylib.so
BIN_NAME=b_bin

ifeq ($(UNAME),Darwin)
NM=nm -gU
RLIB_NAME=liba_rlib.rlib
DYLIB_NAME=liba_dylib.dylib
SO_NAME=libb_dylib.dylib
BIN_NAME=b_bin
endif

ifdef IS_WINDOWS
NM=nm -g
RLIB_NAME=liba_rlib.dll.a
DYLIB_NAME=liba_dylib.dll
SO_NAME=libb_dylib.dll
BIN_NAME=b_bin.exe
endif

all:
$(RUSTC) -C prefer-dynamic -Z unstable-options -C symbol-mangling-version=hashed -C metadata=foo a_dylib.rs
$(RUSTC) -C prefer-dynamic -Z unstable-options -C symbol-mangling-version=hashed -C metadata=bar a_rlib.rs
$(RUSTC) -C prefer-dynamic -L $(TMPDIR) b_dylib.rs
$(RUSTC) -C prefer-dynamic -L $(TMPDIR) b_bin.rs

# Check hashed symbol name

[ "$$($(NM) $(TMPDIR)/$(DYLIB_NAME) | grep -c hello)" -eq "0" ]
[ "$$($(NM) $(TMPDIR)/$(DYLIB_NAME) | grep _RNxC7a_dylib | grep -c ' T ')" -eq "1" ]

[ "$$($(NM) $(TMPDIR)/$(SO_NAME) | grep b_dylib | grep -c hello)" -eq "1" ]
[ "$$($(NM) $(TMPDIR)/$(SO_NAME) | grep _RNxC6a_rlib | grep -c ' T ')" -eq "1" ]
[ "$$($(NM) $(TMPDIR)/$(SO_NAME) | grep _RNxC7a_dylib | grep -c ' U ')" -eq "1" ]

[ "$$($(NM) $(TMPDIR)/$(BIN_NAME) | grep _RNxC6a_rlib | grep -c ' U ')" -eq "1" ]
[ "$$($(NM) $(TMPDIR)/$(BIN_NAME) | grep _RNxC7a_dylib | grep -c ' U ')" -eq "1" ]
[ "$$($(NM) $(TMPDIR)/$(BIN_NAME) | grep b_dylib | grep hello | grep -c ' U ')" -eq "1" ]

$(call RUN,$(BIN_NAME))
4 changes: 4 additions & 0 deletions tests/run-make/symbol-mangling-hashed/a_dylib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#![crate_type="dylib"]
pub fn hello() {
println!("hello dylib");
}
5 changes: 5 additions & 0 deletions tests/run-make/symbol-mangling-hashed/a_rlib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#![crate_type="rlib"]

pub fn hello() {
println!("hello rlib");
}
9 changes: 9 additions & 0 deletions tests/run-make/symbol-mangling-hashed/b_bin.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
extern crate a_rlib;
extern crate a_dylib;
extern crate b_dylib;

fn main() {
a_rlib::hello();
a_dylib::hello();
b_dylib::hello();
}
9 changes: 9 additions & 0 deletions tests/run-make/symbol-mangling-hashed/b_dylib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#![crate_type="dylib"]

extern crate a_rlib;
extern crate a_dylib;

pub fn hello() {
a_rlib::hello();
a_dylib::hello();
}
2 changes: 1 addition & 1 deletion tests/ui/symbol-mangling-version/bad-value.bad.stderr
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
error: incorrect value `bad-value` for codegen option `symbol-mangling-version` - either `legacy` or `v0` (RFC 2603) was expected
error: incorrect value `bad-value` for codegen option `symbol-mangling-version` - one of: `legacy`, `v0` (RFC 2603), or `hashed` was expected

2 changes: 1 addition & 1 deletion tests/ui/symbol-mangling-version/bad-value.blank.stderr
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
error: incorrect value `` for codegen option `symbol-mangling-version` - either `legacy` or `v0` (RFC 2603) was expected
error: incorrect value `` for codegen option `symbol-mangling-version` - one of: `legacy`, `v0` (RFC 2603), or `hashed` was expected

2 changes: 1 addition & 1 deletion tests/ui/symbol-mangling-version/bad-value.no-value.stderr
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
error: codegen option `symbol-mangling-version` requires either `legacy` or `v0` (RFC 2603) (C symbol-mangling-version=<value>)
error: codegen option `symbol-mangling-version` requires one of: `legacy`, `v0` (RFC 2603), or `hashed` (C symbol-mangling-version=<value>)

2 changes: 2 additions & 0 deletions tests/ui/symbol-mangling-version/unstable.hashed.stderr
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
error: `-C symbol-mangling-version=hashed` requires `-Z unstable-options`

5 changes: 4 additions & 1 deletion tests/ui/symbol-mangling-version/unstable.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
// revisions: legacy legacy-ok
// revisions: legacy legacy-ok hashed hashed-ok
// [legacy] compile-flags: -Csymbol-mangling-version=legacy
// [legacy-ok] check-pass
// [legacy-ok] compile-flags: -Zunstable-options -Csymbol-mangling-version=legacy
// [hashed] compile-flags: -Csymbol-mangling-version=hashed
// [hashed-ok] check-pass
// [hashed-ok] compile-flags: -Zunstable-options -Csymbol-mangling-version=hashed

fn main() {}

0 comments on commit d2163b9

Please sign in to comment.