Skip to content

Commit

Permalink
Auto merge of #26411 - dotdash:fat_in_registers, r=aatch
Browse files Browse the repository at this point in the history

This has a number of advantages compared to creating a copy in memory
and passing a pointer. The obvious one is that we don't have to put the
data into memory but can keep it in registers. Since we're currently
passing a pointer anyway (instead of using e.g. a known offset on the
stack, which is what the `byval` attribute would achieve), we only use a
single additional register for each fat pointer, but save at least two
pointers worth of stack in exchange (sometimes more because more than
one copy gets eliminated). On archs that pass arguments on the stack, we
save a pointer worth of stack even without considering the omitted
copies.

Additionally, LLVM can optimize the code a lot better, to a large degree
due to the fact that lots of copies are gone or can be optimized away.
Additionally, we can now emit attributes like nonnull on the data and/or
vtable pointers contained in the fat pointer, potentially allowing for
even more optimizations.

This results in LLVM passes being about 3-7% faster (depending on the
crate), and the resulting code is also a few percent smaller, for
example:

|text|data|filename|
|----|----|--------|
|5671479|3941461|before/librustc-d8ace771.so|
|5447663|3905745|after/librustc-d8ace771.so|
| | | |
|1944425|2394024|before/libstd-d8ace771.so|
|1896769|2387610|after/libstd-d8ace771.so|

I had to remove a call in the backtrace-debuginfo test, because LLVM can
now merge the tails of some blocks when optimizations are turned on,
which can't correctly preserve line info.

Fixes #22924

Cc #22891 (at least for fat pointers the code is good now)
  • Loading branch information
bors committed Jun 20, 2015
2 parents 40d19bf + f777562 commit 306a99e
Show file tree
Hide file tree
Showing 17 changed files with 322 additions and 256 deletions.
12 changes: 12 additions & 0 deletions src/librustc_llvm/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2252,6 +2252,18 @@ pub fn get_param(llfn: ValueRef, index: c_uint) -> ValueRef {
}
}

pub fn get_params(llfn: ValueRef) -> Vec<ValueRef> {
unsafe {
let num_params = LLVMCountParams(llfn);
let mut params = Vec::with_capacity(num_params as usize);
for idx in 0..num_params {
params.push(LLVMGetParam(llfn, idx));
}

params
}
}

#[allow(missing_copy_implementations)]
pub enum RustString_opaque {}
pub type RustStringRef = *mut RustString_opaque;
Expand Down
18 changes: 5 additions & 13 deletions src/librustc_trans/trans/_match.rs
Original file line number Diff line number Diff line change
Expand Up @@ -828,19 +828,11 @@ fn compare_values<'blk, 'tcx>(cx: Block<'blk, 'tcx>,
None,
&format!("comparison of `{}`", rhs_t),
StrEqFnLangItem);
let t = ty::mk_str_slice(cx.tcx(), cx.tcx().mk_region(ty::ReStatic), ast::MutImmutable);
// The comparison function gets the slices by value, so we have to make copies here. Even
// if the function doesn't write through the pointer, things like lifetime intrinsics
// require that we do this properly
let lhs_arg = alloc_ty(cx, t, "lhs");
let rhs_arg = alloc_ty(cx, t, "rhs");
memcpy_ty(cx, lhs_arg, lhs, t);
memcpy_ty(cx, rhs_arg, rhs, t);
let res = callee::trans_lang_call(cx, did, &[lhs_arg, rhs_arg], None, debug_loc);
call_lifetime_end(res.bcx, lhs_arg);
call_lifetime_end(res.bcx, rhs_arg);

res
let lhs_data = Load(cx, expr::get_dataptr(cx, lhs));
let lhs_len = Load(cx, expr::get_len(cx, lhs));
let rhs_data = Load(cx, expr::get_dataptr(cx, rhs));
let rhs_len = Load(cx, expr::get_len(cx, rhs));
callee::trans_lang_call(cx, did, &[lhs_data, lhs_len, rhs_data, rhs_len], None, debug_loc)
}

let _icx = push_ctxt("compare_values");
Expand Down
25 changes: 12 additions & 13 deletions src/librustc_trans/trans/asm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,32 +45,31 @@ pub fn trans_inline_asm<'blk, 'tcx>(bcx: Block<'blk, 'tcx>, ia: &ast::InlineAsm)
output_types.push(type_of::type_of(bcx.ccx(), out_datum.ty));
let val = out_datum.val;
if is_rw {
ext_inputs.push(unpack_result!(bcx, {
callee::trans_arg_datum(bcx,
expr_ty(bcx, &**out),
out_datum,
cleanup::CustomScope(temp_scope),
callee::DontAutorefArg)
}));
bcx = callee::trans_arg_datum(bcx,
expr_ty(bcx, &**out),
out_datum,
cleanup::CustomScope(temp_scope),
callee::DontAutorefArg,
&mut ext_inputs);
ext_constraints.push(i.to_string());
}
val

}).collect::<Vec<_>>();

// Now the input operands
let mut inputs = ia.inputs.iter().map(|&(ref c, ref input)| {
let mut inputs = Vec::new();
for &(ref c, ref input) in &ia.inputs {
constraints.push((*c).clone());

let in_datum = unpack_datum!(bcx, expr::trans(bcx, &**input));
unpack_result!(bcx, {
callee::trans_arg_datum(bcx,
bcx = callee::trans_arg_datum(bcx,
expr_ty(bcx, &**input),
in_datum,
cleanup::CustomScope(temp_scope),
callee::DontAutorefArg)
})
}).collect::<Vec<_>>();
callee::DontAutorefArg,
&mut inputs);
}
inputs.push_all(&ext_inputs[..]);

// no failure occurred preparing operands, no need to cleanup
Expand Down
81 changes: 47 additions & 34 deletions src/librustc_trans/trans/attributes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ pub fn from_fn_type<'a, 'tcx>(ccx: &CrateContext<'a, 'tcx>, fn_type: ty::Ty<'tcx
};

// Index 0 is the return value of the llvm func, so we start at 1
let mut first_arg_offset = 1;
let mut idx = 1;
if let ty::FnConverging(ret_ty) = ret_ty {
// A function pointer is called without the declaration
// available, so we have to apply any attributes with ABI
Expand All @@ -206,7 +206,7 @@ pub fn from_fn_type<'a, 'tcx>(ccx: &CrateContext<'a, 'tcx>, fn_type: ty::Ty<'tcx
.arg(1, llvm::DereferenceableAttribute(llret_sz));

// Add one more since there's an outptr
first_arg_offset += 1;
idx += 1;
} else {
// The `noalias` attribute on the return value is useful to a
// function ptr caller.
Expand Down Expand Up @@ -236,10 +236,9 @@ pub fn from_fn_type<'a, 'tcx>(ccx: &CrateContext<'a, 'tcx>, fn_type: ty::Ty<'tcx
}
}

for (idx, &t) in input_tys.iter().enumerate().map(|(i, v)| (i + first_arg_offset, v)) {
for &t in input_tys.iter() {
match t.sty {
// this needs to be first to prevent fat pointers from falling through
_ if !common::type_is_immediate(ccx, t) => {
_ if type_of::arg_is_indirect(ccx, t) => {
let llarg_sz = machine::llsize_of_real(ccx, type_of::type_of(ccx, t));

// For non-immediate arguments the callee gets its own copy of
Expand All @@ -256,49 +255,63 @@ pub fn from_fn_type<'a, 'tcx>(ccx: &CrateContext<'a, 'tcx>, fn_type: ty::Ty<'tcx

// `Box` pointer parameters never alias because ownership is transferred
ty::TyBox(inner) => {
let llsz = machine::llsize_of_real(ccx, type_of::type_of(ccx, inner));

attrs.arg(idx, llvm::Attribute::NoAlias)
.arg(idx, llvm::DereferenceableAttribute(llsz));
attrs.arg(idx, llvm::Attribute::NoAlias);

if common::type_is_sized(ccx.tcx(), inner) {
let llsz = machine::llsize_of_real(ccx, type_of::type_of(ccx, inner));
attrs.arg(idx, llvm::DereferenceableAttribute(llsz));
} else {
attrs.arg(idx, llvm::NonNullAttribute);
if ty::type_is_trait(inner) {
attrs.arg(idx + 1, llvm::NonNullAttribute);
}
}
}

// `&mut` pointer parameters never alias other parameters, or mutable global data
//
// `&T` where `T` contains no `UnsafeCell<U>` is immutable, and can be marked as both
// `readonly` and `noalias`, as LLVM's definition of `noalias` is based solely on
// memory dependencies rather than pointer equality
ty::TyRef(b, mt) if mt.mutbl == ast::MutMutable ||
!ty::type_contents(ccx.tcx(), mt.ty).interior_unsafe() => {
ty::TyRef(b, mt) => {
// `&mut` pointer parameters never alias other parameters, or mutable global data
//
// `&T` where `T` contains no `UnsafeCell<U>` is immutable, and can be marked as
// both `readonly` and `noalias`, as LLVM's definition of `noalias` is based solely
// on memory dependencies rather than pointer equality
let interior_unsafe = ty::type_contents(ccx.tcx(), mt.ty).interior_unsafe();

let llsz = machine::llsize_of_real(ccx, type_of::type_of(ccx, mt.ty));
attrs.arg(idx, llvm::Attribute::NoAlias)
.arg(idx, llvm::DereferenceableAttribute(llsz));
if mt.mutbl == ast::MutMutable || !interior_unsafe {
attrs.arg(idx, llvm::Attribute::NoAlias);
}

if mt.mutbl == ast::MutImmutable {
if mt.mutbl == ast::MutImmutable && !interior_unsafe {
attrs.arg(idx, llvm::Attribute::ReadOnly);
}

// & pointer parameters are also never null and for sized types we also know
// exactly how many bytes we can dereference
if common::type_is_sized(ccx.tcx(), mt.ty) {
let llsz = machine::llsize_of_real(ccx, type_of::type_of(ccx, mt.ty));
attrs.arg(idx, llvm::DereferenceableAttribute(llsz));
} else {
attrs.arg(idx, llvm::NonNullAttribute);
if ty::type_is_trait(mt.ty) {
attrs.arg(idx + 1, llvm::NonNullAttribute);
}
}

// When a reference in an argument has no named lifetime, it's
// impossible for that reference to escape this function
// (returned or stored beyond the call by a closure).
if let ReLateBound(_, BrAnon(_)) = *b {
attrs.arg(idx, llvm::Attribute::NoCapture);
}
}

// When a reference in an argument has no named lifetime, it's impossible for that
// reference to escape this function (returned or stored beyond the call by a closure).
ty::TyRef(&ReLateBound(_, BrAnon(_)), mt) => {
let llsz = machine::llsize_of_real(ccx, type_of::type_of(ccx, mt.ty));
attrs.arg(idx, llvm::Attribute::NoCapture)
.arg(idx, llvm::DereferenceableAttribute(llsz));
}

// & pointer parameters are also never null and we know exactly how
// many bytes we can dereference
ty::TyRef(_, mt) => {
let llsz = machine::llsize_of_real(ccx, type_of::type_of(ccx, mt.ty));
attrs.arg(idx, llvm::DereferenceableAttribute(llsz));
}
_ => ()
}

if common::type_is_fat_ptr(ccx.tcx(), t) {
idx += 2;
} else {
idx += 1;
}
}

attrs
Expand Down
63 changes: 46 additions & 17 deletions src/librustc_trans/trans/base.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1297,16 +1297,28 @@ pub type RvalueDatum<'tcx> = datum::Datum<'tcx, datum::Rvalue>;
// create_datums_for_fn_args: creates rvalue datums for each of the
// incoming function arguments. These will later be stored into
// appropriate lvalue datums.
pub fn create_datums_for_fn_args<'a, 'tcx>(fcx: &FunctionContext<'a, 'tcx>,
pub fn create_datums_for_fn_args<'a, 'tcx>(bcx: Block<'a, 'tcx>,
arg_tys: &[Ty<'tcx>])
-> Vec<RvalueDatum<'tcx>> {
let _icx = push_ctxt("create_datums_for_fn_args");
let fcx = bcx.fcx;

// Return an array wrapping the ValueRefs that we get from `get_param` for
// each argument into datums.
arg_tys.iter().enumerate().map(|(i, &arg_ty)| {
let llarg = get_param(fcx.llfn, fcx.arg_pos(i) as c_uint);
datum::Datum::new(llarg, arg_ty, arg_kind(fcx, arg_ty))
let mut i = fcx.arg_offset() as c_uint;
arg_tys.iter().map(|&arg_ty| {
if common::type_is_fat_ptr(bcx.tcx(), arg_ty) {
let llty = type_of::type_of(bcx.ccx(), arg_ty);
let data = get_param(fcx.llfn, i);
let extra = get_param(fcx.llfn, i + 1);
let fat_ptr = expr::make_fat_ptr(bcx, llty, data, extra);
i += 2;
datum::Datum::new(fat_ptr, arg_ty, datum::Rvalue { mode: datum::ByValue })
} else {
let llarg = get_param(fcx.llfn, i);
i += 1;
datum::Datum::new(llarg, arg_ty, arg_kind(fcx, arg_ty))
}
}).collect()
}

Expand All @@ -1321,12 +1333,23 @@ fn create_datums_for_fn_args_under_call_abi<'blk, 'tcx>(
arg_tys: &[Ty<'tcx>])
-> Vec<RvalueDatum<'tcx>> {
let mut result = Vec::new();
let mut idx = bcx.fcx.arg_offset() as c_uint;
for (i, &arg_ty) in arg_tys.iter().enumerate() {
if i < arg_tys.len() - 1 {
// Regular argument.
let llarg = get_param(bcx.fcx.llfn, bcx.fcx.arg_pos(i) as c_uint);
result.push(datum::Datum::new(llarg, arg_ty, arg_kind(bcx.fcx,
arg_ty)));
result.push(if common::type_is_fat_ptr(bcx.tcx(), arg_ty) {
let llty = type_of::type_of(bcx.ccx(), arg_ty);
let data = get_param(bcx.fcx.llfn, idx);
let extra = get_param(bcx.fcx.llfn, idx + 1);
idx += 2;
let fat_ptr = expr::make_fat_ptr(bcx, llty, data, extra);
datum::Datum::new(fat_ptr, arg_ty, datum::Rvalue { mode: datum::ByValue })
} else {
let val = get_param(bcx.fcx.llfn, idx);
idx += 1;
datum::Datum::new(val, arg_ty, arg_kind(bcx.fcx, arg_ty))
});

continue
}

Expand All @@ -1346,15 +1369,21 @@ fn create_datums_for_fn_args_under_call_abi<'blk, 'tcx>(
llval| {
for (j, &tupled_arg_ty) in
tupled_arg_tys.iter().enumerate() {
let llarg =
get_param(bcx.fcx.llfn,
bcx.fcx.arg_pos(i + j) as c_uint);
let lldest = GEPi(bcx, llval, &[0, j]);
let datum = datum::Datum::new(
llarg,
tupled_arg_ty,
arg_kind(bcx.fcx, tupled_arg_ty));
bcx = datum.store_to(bcx, lldest);
if common::type_is_fat_ptr(bcx.tcx(), tupled_arg_ty) {
let data = get_param(bcx.fcx.llfn, idx);
let extra = get_param(bcx.fcx.llfn, idx + 1);
Store(bcx, data, expr::get_dataptr(bcx, lldest));
Store(bcx, extra, expr::get_len(bcx, lldest));
idx += 2;
} else {
let datum = datum::Datum::new(
get_param(bcx.fcx.llfn, idx),
tupled_arg_ty,
arg_kind(bcx.fcx, tupled_arg_ty));
idx += 1;
bcx = datum.store_to(bcx, lldest);
};
}
bcx
}));
Expand Down Expand Up @@ -1566,7 +1595,7 @@ pub fn trans_closure<'a, 'b, 'tcx>(ccx: &CrateContext<'a, 'tcx>,
}
_ => {
let arg_tys = untuple_arguments_if_necessary(ccx, &monomorphized_arg_types, abi);
create_datums_for_fn_args(&fcx, &arg_tys)
create_datums_for_fn_args(bcx, &arg_tys)
}
};

Expand Down Expand Up @@ -1773,7 +1802,7 @@ fn trans_enum_variant_or_tuple_like_struct<'a, 'tcx>(ccx: &CrateContext<'a, 'tcx
ty::erase_late_bound_regions(
ccx.tcx(), &ty::ty_fn_args(ctor_ty));

let arg_datums = create_datums_for_fn_args(&fcx, &arg_tys[..]);
let arg_datums = create_datums_for_fn_args(bcx, &arg_tys[..]);

if !type_is_zero_size(fcx.ccx, result_ty.unwrap()) {
let dest = fcx.get_ret_slot(bcx, result_ty, "eret_slot");
Expand Down
Loading

0 comments on commit 306a99e

Please sign in to comment.