#![feature(core_intrinsics)]
+use std::arch::x86_64::*;
use std::io::Write;
use std::intrinsics;
#[target_feature(enable = "sse2")]
unsafe fn test_simd() {
- use std::arch::x86_64::*;
-
let x = _mm_setzero_si128();
let y = _mm_set1_epi16(7);
let or = _mm_or_si128(x, y);
test_mm_slli_si128();
test_mm_movemask_epi8();
test_mm256_movemask_epi8();
+ test_mm_add_epi8();
+ test_mm_add_pd();
let mask1 = _mm_movemask_epi8(dbg!(_mm_setr_epi8(255u8 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)));
assert_eq!(mask1, 1);
#[target_feature(enable = "sse2")]
unsafe fn test_mm_slli_si128() {
- use std::arch::x86_64::*;
-
#[rustfmt::skip]
let a = _mm_setr_epi8(
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
#[target_feature(enable = "sse2")]
unsafe fn test_mm_movemask_epi8() {
- use std::arch::x86_64::*;
-
#[rustfmt::skip]
let a = _mm_setr_epi8(
0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8, 0b01,
#[target_feature(enable = "avx2")]
unsafe fn test_mm256_movemask_epi8() {
- use std::arch::x86_64::*;
-
let a = _mm256_set1_epi8(-1);
let r = _mm256_movemask_epi8(a);
let e = -1;
assert_eq!(r, e);
}
+#[target_feature(enable = "sse2")]
+unsafe fn test_mm_add_epi8() {
+ let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ #[rustfmt::skip]
+ let b = _mm_setr_epi8(
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ );
+ let r = _mm_add_epi8(a, b);
+ #[rustfmt::skip]
+ let e = _mm_setr_epi8(
+ 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
+ );
+ assert_eq_m128i(r, e);
+}
+
+#[target_feature(enable = "sse2")]
+unsafe fn test_mm_add_pd() {
+ let a = _mm_setr_pd(1.0, 2.0);
+ let b = _mm_setr_pd(5.0, 10.0);
+ let r = _mm_add_pd(a, b);
+ assert_eq_m128d(r, _mm_setr_pd(6.0, 12.0));
+}
+
fn assert_eq_m128i(x: std::arch::x86_64::__m128i, y: std::arch::x86_64::__m128i) {
unsafe {
assert_eq!(std::mem::transmute::<_, [u8; 16]>(x), std::mem::transmute::<_, [u8; 16]>(x));
}
}
+#[target_feature(enable = "sse2")]
+pub unsafe fn assert_eq_m128d(a: __m128d, b: __m128d) {
+ if _mm_movemask_pd(_mm_cmpeq_pd(a, b)) != 0b11 {
+ panic!("{:?} != {:?}", a, b);
+ }
+}
+
#[derive(PartialEq)]
enum LoopState {
Continue(()),
(lane_layout, lane_count)
}
-fn simd_for_each_lane<'tcx, B: Backend>(
+pub fn simd_for_each_lane<'tcx, B: Backend>(
fx: &mut FunctionCx<'_, 'tcx, B>,
intrinsic: &str,
x: CValue<'tcx>,
}
}
-fn bool_to_zero_or_max_uint<'tcx>(
+pub fn bool_to_zero_or_max_uint<'tcx>(
fx: &mut FunctionCx<'_, 'tcx, impl Backend>,
layout: TyLayout<'tcx>,
val: Value,
) -> CValue<'tcx> {
let ty = fx.clif_type(layout.ty).unwrap();
- let zero = fx.bcx.ins().iconst(ty, 0);
- let max = fx.bcx.ins().iconst(ty, (u64::max_value() >> (64 - ty.bits())) as i64);
- let res = crate::common::codegen_select(&mut fx.bcx, val, max, zero);
+ let int_ty = match ty {
+ types::F32 => types::I32,
+ types::F64 => types::I64,
+ ty => ty,
+ };
+
+ let zero = fx.bcx.ins().iconst(int_ty, 0);
+ let max = fx.bcx.ins().iconst(int_ty, (u64::max_value() >> (64 - int_ty.bits())) as i64);
+ let mut res = crate::common::codegen_select(&mut fx.bcx, val, max, zero);
+
+ if ty.is_float() {
+ res = fx.bcx.ins().bitcast(ty, res);
+ }
+
CValue::by_val(res, layout)
}
macro_rules! simd_cmp {
($fx:expr, $intrinsic:expr, $cc:ident($x:ident, $y:ident) -> $ret:ident) => {
- simd_for_each_lane($fx, $intrinsic, $x, $y, $ret, |fx, _lane_layout, res_lane_layout, x_lane, y_lane| {
- let res_lane = fx.bcx.ins().icmp(IntCC::$cc, x_lane, y_lane);
+ simd_for_each_lane($fx, $intrinsic, $x, $y, $ret, |fx, lane_layout, res_lane_layout, x_lane, y_lane| {
+ let res_lane = match lane_layout.ty.sty {
+ ty::Uint(_) | ty::Int(_) => fx.bcx.ins().icmp(IntCC::$cc, x_lane, y_lane),
+ _ => unreachable!("{:?}", lane_layout.ty),
+ };
bool_to_zero_or_max_uint(fx, res_lane_layout, res_lane)
});
};
}
-macro_rules! simd_binop {
+macro_rules! simd_int_binop {
($fx:expr, $intrinsic:expr, $op:ident($x:ident, $y:ident) -> $ret:ident) => {
- simd_for_each_lane($fx, $intrinsic, $x, $y, $ret, |fx, _lane_layout, ret_lane_layout, x_lane, y_lane| {
- let res_lane = fx.bcx.ins().$op(x_lane, y_lane);
+ simd_for_each_lane($fx, $intrinsic, $x, $y, $ret, |fx, lane_layout, ret_lane_layout, x_lane, y_lane| {
+ let res_lane = match lane_layout.ty.sty {
+ ty::Uint(_) | ty::Int(_) => fx.bcx.ins().$op(x_lane, y_lane),
+ _ => unreachable!("{:?}", lane_layout.ty),
+ };
CValue::by_val(res_lane, ret_lane_layout)
});
};
};
}
+macro_rules! simd_int_flt_binop {
+ ($fx:expr, $intrinsic:expr, $op:ident|$op_f:ident($x:ident, $y:ident) -> $ret:ident) => {
+ simd_for_each_lane($fx, $intrinsic, $x, $y, $ret, |fx, lane_layout, ret_lane_layout, x_lane, y_lane| {
+ let res_lane = match lane_layout.ty.sty {
+ ty::Uint(_) | ty::Int(_) => fx.bcx.ins().$op(x_lane, y_lane),
+ ty::Float(_) => fx.bcx.ins().$op_f(x_lane, y_lane),
+ _ => unreachable!("{:?}", lane_layout.ty),
+ };
+ CValue::by_val(res_lane, ret_lane_layout)
+ });
+ };
+ ($fx:expr, $intrinsic:expr, $op_u:ident|$op_s:ident|$op_f:ident($x:ident, $y:ident) -> $ret:ident) => {
+ simd_for_each_lane($fx, $intrinsic, $x, $y, $ret, |fx, lane_layout, ret_lane_layout, x_lane, y_lane| {
+ let res_lane = match lane_layout.ty.sty {
+ ty::Uint(_) => fx.bcx.ins().$op_u(x_lane, y_lane),
+ ty::Int(_) => fx.bcx.ins().$op_s(x_lane, y_lane),
+ ty::Float(_) => fx.bcx.ins().$op_f(x_lane, y_lane),
+ _ => unreachable!("{:?}", lane_layout.ty),
+ };
+ CValue::by_val(res_lane, ret_lane_layout)
+ });
+ };
+}
+
+macro_rules! simd_flt_binop {
+ ($fx:expr, $intrinsic:expr, $op:ident($x:ident, $y:ident) -> $ret:ident) => {
+ simd_for_each_lane($fx, $intrinsic, $x, $y, $ret, |fx, lane_layout, ret_lane_layout, x_lane, y_lane| {
+ let res_lane = match lane_layout.ty.sty {
+ ty::Float(_) => fx.bcx.ins().$op(x_lane, y_lane),
+ _ => unreachable!("{:?}", lane_layout.ty),
+ };
+ CValue::by_val(res_lane, ret_lane_layout)
+ });
+ }
+}
+
pub fn codegen_intrinsic_call<'a, 'tcx: 'a>(
fx: &mut FunctionCx<'a, 'tcx, impl Backend>,
def_id: DefId,
let indexes = {
use rustc::mir::interpret::*;
- let idx_place = match idx {
- Operand::Copy(idx_place) => {
- idx_place
- }
- _ => panic!("simd_shuffle* idx is not Operand::Copy, but {:?}", idx),
- };
-
- assert!(idx_place.projection.is_none());
- let static_ = match &idx_place.base {
- PlaceBase::Static(static_) => {
- static_
- }
- PlaceBase::Local(_) => panic!("simd_shuffle* idx is not constant, but a local"),
- };
-
- let idx_const = match &static_.kind {
- StaticKind::Static(_) => unimplemented!(),
- StaticKind::Promoted(promoted) => {
- fx.tcx.const_eval(ParamEnv::reveal_all().and(GlobalId {
- instance: fx.instance,
- promoted: Some(*promoted),
- })).unwrap()
- }
- };
+ let idx_const = crate::constant::mir_operand_get_const_val(fx, idx).expect("simd_shuffle* idx not const");
let idx_bytes = match idx_const.val {
ConstValue::ByRef { align: _, offset, alloc } => {
};
simd_add, (c x, c y) {
- simd_binop!(fx, intrinsic, iadd(x, y) -> ret);
+ simd_int_flt_binop!(fx, intrinsic, iadd|fadd(x, y) -> ret);
};
simd_sub, (c x, c y) {
- simd_binop!(fx, intrinsic, isub(x, y) -> ret);
+ simd_int_flt_binop!(fx, intrinsic, isub|fsub(x, y) -> ret);
};
simd_mul, (c x, c y) {
- simd_binop!(fx, intrinsic, imul(x, y) -> ret);
+ simd_int_flt_binop!(fx, intrinsic, imul|fmul(x, y) -> ret);
};
simd_div, (c x, c y) {
- simd_binop!(fx, intrinsic, udiv|sdiv(x, y) -> ret);
- };
- simd_rem, (c x, c y) {
- simd_binop!(fx, intrinsic, urem|srem(x, y) -> ret);
+ simd_int_flt_binop!(fx, intrinsic, udiv|sdiv|fdiv(x, y) -> ret);
};
simd_shl, (c x, c y) {
- simd_binop!(fx, intrinsic, ishl(x, y) -> ret);
+ simd_int_binop!(fx, intrinsic, ishl(x, y) -> ret);
};
simd_shr, (c x, c y) {
- simd_binop!(fx, intrinsic, ushr|sshr(x, y) -> ret);
+ simd_int_binop!(fx, intrinsic, ushr|sshr(x, y) -> ret);
};
simd_and, (c x, c y) {
- simd_binop!(fx, intrinsic, band(x, y) -> ret);
+ simd_int_binop!(fx, intrinsic, band(x, y) -> ret);
};
simd_or, (c x, c y) {
- simd_binop!(fx, intrinsic, bor(x, y) -> ret);
+ simd_int_binop!(fx, intrinsic, bor(x, y) -> ret);
};
simd_xor, (c x, c y) {
- simd_binop!(fx, intrinsic, bxor(x, y) -> ret);
+ simd_int_binop!(fx, intrinsic, bxor(x, y) -> ret);
};
simd_fmin, (c x, c y) {
- simd_binop!(fx, intrinsic, fmin(x, y) -> ret);
+ simd_flt_binop!(fx, intrinsic, fmin(x, y) -> ret);
};
simd_fmax, (c x, c y) {
- simd_binop!(fx, intrinsic, fmax(x, y) -> ret);
+ simd_flt_binop!(fx, intrinsic, fmax(x, y) -> ret);
};
}
use crate::prelude::*;
+use crate::intrinsics::*;
use rustc::ty::subst::SubstsRef;
}
};
- crate::intrinsics::intrinsic_match! {
+ intrinsic_match! {
fx, intrinsic, substs, args,
_ => {
fx.tcx.sess.warn(&format!("unsupported llvm intrinsic {}; replacing with trap", intrinsic));
};
// Used by `_mm_movemask_epi8` and `_mm256_movemask_epi8`
- llvm.x86.sse2.pmovmskb.128 | llvm.x86.avx2.pmovmskb, (c a) {
- let (lane_layout, lane_count) = crate::intrinsics::lane_type_and_count(fx, a.layout(), intrinsic);
- assert_eq!(lane_layout.ty.sty, fx.tcx.types.i8.sty);
- assert!(lane_count == 16 || lane_count == 32);
+ llvm.x86.sse2.pmovmskb.128 | llvm.x86.avx2.pmovmskb | llvm.x86.sse2.movmsk.pd, (c a) {
+ let (lane_layout, lane_count) = lane_type_and_count(fx, a.layout(), intrinsic);
+ let lane_ty = fx.clif_type(lane_layout.ty).unwrap();
+ assert!(lane_count <= 32);
let mut res = fx.bcx.ins().iconst(types::I32, 0);
for lane in (0..lane_count).rev() {
let a_lane = a.value_field(fx, mir::Field::new(lane.try_into().unwrap())).load_scalar(fx);
- let a_lane_sign = fx.bcx.ins().ushr_imm(a_lane, 7); // extract sign bit of 8bit int
- let a_lane_sign = fx.bcx.ins().uextend(types::I32, a_lane_sign);
+
+ // cast float to int
+ let a_lane = match lane_ty {
+ types::F32 => fx.bcx.ins().bitcast(types::I32, a_lane),
+ types::F64 => fx.bcx.ins().bitcast(types::I64, a_lane),
+ _ => a_lane,
+ };
+
+ // extract sign bit of an int
+ let a_lane_sign = fx.bcx.ins().ushr_imm(a_lane, i64::from(lane_ty.bits() - 1));
+
+ // shift sign bit into result
+ let a_lane_sign = clif_intcast(fx, a_lane_sign, types::I32, false);
res = fx.bcx.ins().ishl_imm(res, 1);
res = fx.bcx.ins().bor(res, a_lane_sign);
}
let res = CValue::by_val(res, fx.layout_of(fx.tcx.types.i32));
ret.write_cvalue(fx, res);
};
+ llvm.x86.sse2.cmp.ps | llvm.x86.sse2.cmp.pd, (c x, c y, o kind) {
+ let kind_const = crate::constant::mir_operand_get_const_val(fx, kind).expect("llvm.x86.sse2.cmp.* kind not const");
+ let flt_cc = match kind_const.val.try_to_bits(Size::from_bytes(1)).expect(&format!("kind not scalar: {:?}", kind_const)) {
+ 0 => FloatCC::Equal,
+ 1 => FloatCC::LessThan,
+ 2 => FloatCC::LessThanOrEqual,
+ 7 => {
+ unimplemented!("Compares corresponding elements in `a` and `b` to see if neither is `NaN`.");
+ }
+ 3 => {
+ unimplemented!("Compares corresponding elements in `a` and `b` to see if either is `NaN`.");
+ }
+ 4 => FloatCC::NotEqual,
+ 5 => {
+ unimplemented!("not less than");
+ }
+ 6 => {
+ unimplemented!("not less than or equal");
+ }
+ kind => unreachable!("kind {:?}", kind),
+ };
+
+ simd_for_each_lane(fx, intrinsic, x, y, ret, |fx, lane_layout, res_lane_layout, x_lane, y_lane| {
+ let res_lane = match lane_layout.ty.sty {
+ ty::Float(_) => fx.bcx.ins().fcmp(flt_cc, x_lane, y_lane),
+ _ => unreachable!("{:?}", lane_layout.ty),
+ };
+ bool_to_zero_or_max_uint(fx, res_lane_layout, res_lane)
+ });
+ };
}
if let Some((_, dest)) = destination {