Merge commit '266e96785ab71834b917bf474f130a6d8fdecd4b' into sync_cg_clif-2022-10-23

This commit is contained in:
bjorn3
2022-10-23 16:22:55 +02:00
45 changed files with 1041 additions and 634 deletions

View File

@@ -14,6 +14,10 @@ pub(crate) fn codegen_llvm_intrinsic_call<'tcx>(
target: Option<BasicBlock>,
) {
match intrinsic {
"llvm.x86.sse2.pause" | "llvm.aarch64.isb" => {
// Spin loop hint
}
// Used by `_mm_movemask_epi8` and `_mm256_movemask_epi8`
"llvm.x86.sse2.pmovmskb.128" | "llvm.x86.avx2.pmovmskb" | "llvm.x86.sse2.movmsk.pd" => {
intrinsic_args!(fx, args => (a); intrinsic);
@@ -25,8 +29,7 @@ pub(crate) fn codegen_llvm_intrinsic_call<'tcx>(
let mut res = fx.bcx.ins().iconst(types::I32, 0);
for lane in (0..lane_count).rev() {
let a_lane =
a.value_field(fx, mir::Field::new(lane.try_into().unwrap())).load_scalar(fx);
let a_lane = a.value_lane(fx, lane).load_scalar(fx);
// cast float to int
let a_lane = match lane_ty {

View File

@@ -84,6 +84,30 @@ fn simd_for_each_lane<'tcx>(
}
}
fn simd_pair_for_each_lane_typed<'tcx>(
fx: &mut FunctionCx<'_, '_, 'tcx>,
x: CValue<'tcx>,
y: CValue<'tcx>,
ret: CPlace<'tcx>,
f: &dyn Fn(&mut FunctionCx<'_, '_, 'tcx>, CValue<'tcx>, CValue<'tcx>) -> CValue<'tcx>,
) {
assert_eq!(x.layout(), y.layout());
let layout = x.layout();
let (lane_count, _lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
let (ret_lane_count, _ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
assert_eq!(lane_count, ret_lane_count);
for lane_idx in 0..lane_count {
let x_lane = x.value_lane(fx, lane_idx);
let y_lane = y.value_lane(fx, lane_idx);
let res_lane = f(fx, x_lane, y_lane);
ret.place_lane(fx, lane_idx).write_cvalue(fx, res_lane);
}
}
fn simd_pair_for_each_lane<'tcx>(
fx: &mut FunctionCx<'_, '_, 'tcx>,
x: CValue<'tcx>,
@@ -504,37 +528,7 @@ fn codegen_regular_intrinsic_call<'tcx>(
_ => unreachable!(),
};
let signed = type_sign(lhs.layout().ty);
let checked_res = crate::num::codegen_checked_int_binop(fx, bin_op, lhs, rhs);
let (val, has_overflow) = checked_res.load_scalar_pair(fx);
let clif_ty = fx.clif_type(lhs.layout().ty).unwrap();
let (min, max) = type_min_max_value(&mut fx.bcx, clif_ty, signed);
let val = match (intrinsic, signed) {
(sym::saturating_add, false) => fx.bcx.ins().select(has_overflow, max, val),
(sym::saturating_sub, false) => fx.bcx.ins().select(has_overflow, min, val),
(sym::saturating_add, true) => {
let rhs = rhs.load_scalar(fx);
let rhs_ge_zero =
fx.bcx.ins().icmp_imm(IntCC::SignedGreaterThanOrEqual, rhs, 0);
let sat_val = fx.bcx.ins().select(rhs_ge_zero, max, min);
fx.bcx.ins().select(has_overflow, sat_val, val)
}
(sym::saturating_sub, true) => {
let rhs = rhs.load_scalar(fx);
let rhs_ge_zero =
fx.bcx.ins().icmp_imm(IntCC::SignedGreaterThanOrEqual, rhs, 0);
let sat_val = fx.bcx.ins().select(rhs_ge_zero, min, max);
fx.bcx.ins().select(has_overflow, sat_val, val)
}
_ => unreachable!(),
};
let res = CValue::by_val(val, lhs.layout());
let res = crate::num::codegen_saturating_int_binop(fx, bin_op, lhs, rhs);
ret.write_cvalue(fx, res);
}
sym::rotate_left => {
@@ -819,8 +813,8 @@ fn codegen_regular_intrinsic_call<'tcx>(
sym::ptr_guaranteed_cmp => {
intrinsic_args!(fx, args => (a, b); intrinsic);
let val = crate::num::codegen_ptr_binop(fx, BinOp::Eq, a, b);
ret.write_cvalue(fx, val);
let val = crate::num::codegen_ptr_binop(fx, BinOp::Eq, a, b).load_scalar(fx);
ret.write_cvalue(fx, CValue::by_val(val, fx.layout_of(fx.tcx.types.u8)));
}
sym::caller_location => {
@@ -1206,7 +1200,7 @@ fn codegen_regular_intrinsic_call<'tcx>(
// FIXME once unwinding is supported, change this to actually catch panics
let f_sig = fx.bcx.func.import_signature(Signature {
call_conv: fx.target_config.default_call_conv,
params: vec![AbiParam::new(fx.bcx.func.dfg.value_type(data))],
params: vec![AbiParam::new(pointer_ty(fx.tcx))],
returns: vec![],
});

View File

@@ -2,6 +2,7 @@
use rustc_middle::ty::subst::SubstsRef;
use rustc_span::Symbol;
use rustc_target::abi::Endian;
use super::*;
use crate::prelude::*;
@@ -26,7 +27,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
span: Span,
) {
match intrinsic {
sym::simd_cast => {
sym::simd_as | sym::simd_cast => {
intrinsic_args!(fx, args => (a); intrinsic);
if !a.layout().ty.is_simd() {
@@ -162,6 +163,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
}
}
} else {
// FIXME remove this case
intrinsic.as_str()["simd_shuffle".len()..].parse().unwrap()
};
@@ -650,8 +652,128 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
}
}
// simd_saturating_*
// simd_bitmask
sym::simd_select_bitmask => {
intrinsic_args!(fx, args => (m, a, b); intrinsic);
if !a.layout().ty.is_simd() {
report_simd_type_validation_error(fx, intrinsic, span, a.layout().ty);
return;
}
assert_eq!(a.layout(), b.layout());
let (lane_count, lane_ty) = a.layout().ty.simd_size_and_type(fx.tcx);
let lane_layout = fx.layout_of(lane_ty);
let m = m.load_scalar(fx);
for lane in 0..lane_count {
let m_lane = fx.bcx.ins().ushr_imm(m, u64::from(lane) as i64);
let m_lane = fx.bcx.ins().band_imm(m_lane, 1);
let a_lane = a.value_lane(fx, lane).load_scalar(fx);
let b_lane = b.value_lane(fx, lane).load_scalar(fx);
let m_lane = fx.bcx.ins().icmp_imm(IntCC::Equal, m_lane, 0);
let res_lane =
CValue::by_val(fx.bcx.ins().select(m_lane, b_lane, a_lane), lane_layout);
ret.place_lane(fx, lane).write_cvalue(fx, res_lane);
}
}
sym::simd_bitmask => {
intrinsic_args!(fx, args => (a); intrinsic);
let (lane_count, lane_ty) = a.layout().ty.simd_size_and_type(fx.tcx);
let lane_clif_ty = fx.clif_type(lane_ty).unwrap();
// The `fn simd_bitmask(vector) -> unsigned integer` intrinsic takes a
// vector mask and returns the most significant bit (MSB) of each lane in the form
// of either:
// * an unsigned integer
// * an array of `u8`
// If the vector has less than 8 lanes, a u8 is returned with zeroed trailing bits.
//
// The bit order of the result depends on the byte endianness, LSB-first for little
// endian and MSB-first for big endian.
let expected_int_bits = lane_count.max(8);
let expected_bytes = expected_int_bits / 8 + ((expected_int_bits % 8 > 0) as u64);
match lane_ty.kind() {
ty::Int(_) | ty::Uint(_) => {}
_ => {
fx.tcx.sess.span_fatal(
span,
&format!(
"invalid monomorphization of `simd_bitmask` intrinsic: \
vector argument `{}`'s element type `{}`, expected integer element \
type",
a.layout().ty,
lane_ty
),
);
}
}
let res_type =
Type::int_with_byte_size(u16::try_from(expected_bytes).unwrap()).unwrap();
let mut res = fx.bcx.ins().iconst(res_type, 0);
let lanes = match fx.tcx.sess.target.endian {
Endian::Big => Box::new(0..lane_count) as Box<dyn Iterator<Item = u64>>,
Endian::Little => Box::new((0..lane_count).rev()) as Box<dyn Iterator<Item = u64>>,
};
for lane in lanes {
let a_lane = a.value_lane(fx, lane).load_scalar(fx);
// extract sign bit of an int
let a_lane_sign = fx.bcx.ins().ushr_imm(a_lane, i64::from(lane_clif_ty.bits() - 1));
// shift sign bit into result
let a_lane_sign = clif_intcast(fx, a_lane_sign, res_type, false);
res = fx.bcx.ins().ishl_imm(res, 1);
res = fx.bcx.ins().bor(res, a_lane_sign);
}
match ret.layout().ty.kind() {
ty::Uint(i) if i.bit_width() == Some(expected_int_bits) => {}
ty::Array(elem, len)
if matches!(elem.kind(), ty::Uint(ty::UintTy::U8))
&& len.try_eval_usize(fx.tcx, ty::ParamEnv::reveal_all())
== Some(expected_bytes) => {}
_ => {
fx.tcx.sess.span_fatal(
span,
&format!(
"invalid monomorphization of `simd_bitmask` intrinsic: \
cannot return `{}`, expected `u{}` or `[u8; {}]`",
ret.layout().ty,
expected_int_bits,
expected_bytes
),
);
}
}
let res = CValue::by_val(res, ret.layout());
ret.write_cvalue(fx, res);
}
sym::simd_saturating_add | sym::simd_saturating_sub => {
intrinsic_args!(fx, args => (x, y); intrinsic);
let bin_op = match intrinsic {
sym::simd_saturating_add => BinOp::Add,
sym::simd_saturating_sub => BinOp::Sub,
_ => unreachable!(),
};
// FIXME use vector instructions when possible
simd_pair_for_each_lane_typed(fx, x, y, ret, &|fx, x_lane, y_lane| {
crate::num::codegen_saturating_int_binop(fx, bin_op, x_lane, y_lane)
});
}
// simd_arith_offset
// simd_scatter
// simd_gather
_ => {