]> git.lizzy.rs Git - rust.git/blob - compiler/rustc_codegen_gcc/src/intrinsic/mod.rs
Merge commit 'e228f0c16ea8c34794a6285bf57aab627c26b147' into libgccjit-codegen
[rust.git] / compiler / rustc_codegen_gcc / src / intrinsic / mod.rs
1 pub mod llvm;
2 mod simd;
3
4 use gccjit::{ComparisonOp, Function, RValue, ToRValue, Type, UnaryOp};
5 use rustc_codegen_ssa::MemFlags;
6 use rustc_codegen_ssa::base::wants_msvc_seh;
7 use rustc_codegen_ssa::common::{IntPredicate, span_invalid_monomorphization_error};
8 use rustc_codegen_ssa::mir::operand::{OperandRef, OperandValue};
9 use rustc_codegen_ssa::mir::place::PlaceRef;
10 use rustc_codegen_ssa::traits::{ArgAbiMethods, BaseTypeMethods, BuilderMethods, ConstMethods, IntrinsicCallMethods};
11 use rustc_middle::bug;
12 use rustc_middle::ty::{self, Instance, Ty};
13 use rustc_span::{Span, Symbol, symbol::kw, sym};
14 use rustc_target::abi::{HasDataLayout, LayoutOf};
15 use rustc_target::abi::call::{ArgAbi, FnAbi, PassMode};
16 use rustc_target::spec::PanicStrategy;
17
18 use crate::abi::GccType;
19 use crate::builder::Builder;
20 use crate::common::TypeReflection;
21 use crate::context::CodegenCx;
22 use crate::type_of::LayoutGccExt;
23 use crate::intrinsic::simd::generic_simd_intrinsic;
24
25 fn get_simple_intrinsic<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, name: Symbol) -> Option<Function<'gcc>> {
26     let gcc_name = match name {
27         sym::sqrtf32 => "sqrtf",
28         sym::sqrtf64 => "sqrt",
29         sym::powif32 => "__builtin_powif",
30         sym::powif64 => "__builtin_powi",
31         sym::sinf32 => "sinf",
32         sym::sinf64 => "sin",
33         sym::cosf32 => "cosf",
34         sym::cosf64 => "cos",
35         sym::powf32 => "powf",
36         sym::powf64 => "pow",
37         sym::expf32 => "expf",
38         sym::expf64 => "exp",
39         sym::exp2f32 => "exp2f",
40         sym::exp2f64 => "exp2",
41         sym::logf32 => "logf",
42         sym::logf64 => "log",
43         sym::log10f32 => "log10f",
44         sym::log10f64 => "log10",
45         sym::log2f32 => "log2f",
46         sym::log2f64 => "log2",
47         sym::fmaf32 => "fmaf",
48         sym::fmaf64 => "fma",
49         sym::fabsf32 => "fabsf",
50         sym::fabsf64 => "fabs",
51         sym::minnumf32 => "fminf",
52         sym::minnumf64 => "fmin",
53         sym::maxnumf32 => "fmaxf",
54         sym::maxnumf64 => "fmax",
55         sym::copysignf32 => "copysignf",
56         sym::copysignf64 => "copysign",
57         sym::floorf32 => "floorf",
58         sym::floorf64 => "floor",
59         sym::ceilf32 => "ceilf",
60         sym::ceilf64 => "ceil",
61         sym::truncf32 => "truncf",
62         sym::truncf64 => "trunc",
63         sym::rintf32 => "rintf",
64         sym::rintf64 => "rint",
65         sym::nearbyintf32 => "nearbyintf",
66         sym::nearbyintf64 => "nearbyint",
67         sym::roundf32 => "roundf",
68         sym::roundf64 => "round",
69         sym::abort => "abort",
70         _ => return None,
71     };
72     Some(cx.context.get_builtin_function(&gcc_name))
73 }
74
75 impl<'a, 'gcc, 'tcx> IntrinsicCallMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
76     fn codegen_intrinsic_call(&mut self, instance: Instance<'tcx>, fn_abi: &FnAbi<'tcx, Ty<'tcx>>, args: &[OperandRef<'tcx, RValue<'gcc>>], llresult: RValue<'gcc>, span: Span) {
77         let tcx = self.tcx;
78         let callee_ty = instance.ty(tcx, ty::ParamEnv::reveal_all());
79
80         let (def_id, substs) = match *callee_ty.kind() {
81             ty::FnDef(def_id, substs) => (def_id, substs),
82             _ => bug!("expected fn item type, found {}", callee_ty),
83         };
84
85         let sig = callee_ty.fn_sig(tcx);
86         let sig = tcx.normalize_erasing_late_bound_regions(ty::ParamEnv::reveal_all(), sig);
87         let arg_tys = sig.inputs();
88         let ret_ty = sig.output();
89         let name = tcx.item_name(def_id);
90         let name_str = &*name.as_str();
91
92         let llret_ty = self.layout_of(ret_ty).gcc_type(self, true);
93         let result = PlaceRef::new_sized(llresult, fn_abi.ret.layout);
94
95         let simple = get_simple_intrinsic(self, name);
96         let llval =
97             match name {
98                 _ if simple.is_some() => {
99                     // FIXME(antoyo): remove this cast when the API supports function.
100                     let func = unsafe { std::mem::transmute(simple.expect("simple")) };
101                     self.call(self.type_void(), func, &args.iter().map(|arg| arg.immediate()).collect::<Vec<_>>(), None)
102                 },
103                 sym::likely => {
104                     self.expect(args[0].immediate(), true)
105                 }
106                 sym::unlikely => {
107                     self.expect(args[0].immediate(), false)
108                 }
109                 kw::Try => {
110                     try_intrinsic(
111                         self,
112                         args[0].immediate(),
113                         args[1].immediate(),
114                         args[2].immediate(),
115                         llresult,
116                     );
117                     return;
118                 }
119                 sym::breakpoint => {
120                     unimplemented!();
121                 }
122                 sym::va_copy => {
123                     unimplemented!();
124                 }
125                 sym::va_arg => {
126                     unimplemented!();
127                 }
128
129                 sym::volatile_load | sym::unaligned_volatile_load => {
130                     let tp_ty = substs.type_at(0);
131                     let mut ptr = args[0].immediate();
132                     if let PassMode::Cast(ty) = fn_abi.ret.mode {
133                         ptr = self.pointercast(ptr, self.type_ptr_to(ty.gcc_type(self)));
134                     }
135                     let load = self.volatile_load(ptr.get_type(), ptr);
136                     // TODO(antoyo): set alignment.
137                     self.to_immediate(load, self.layout_of(tp_ty))
138                 }
139                 sym::volatile_store => {
140                     let dst = args[0].deref(self.cx());
141                     args[1].val.volatile_store(self, dst);
142                     return;
143                 }
144                 sym::unaligned_volatile_store => {
145                     let dst = args[0].deref(self.cx());
146                     args[1].val.unaligned_volatile_store(self, dst);
147                     return;
148                 }
149                 sym::prefetch_read_data
150                     | sym::prefetch_write_data
151                     | sym::prefetch_read_instruction
152                     | sym::prefetch_write_instruction => {
153                         unimplemented!();
154                     }
155                 sym::ctlz
156                     | sym::ctlz_nonzero
157                     | sym::cttz
158                     | sym::cttz_nonzero
159                     | sym::ctpop
160                     | sym::bswap
161                     | sym::bitreverse
162                     | sym::rotate_left
163                     | sym::rotate_right
164                     | sym::saturating_add
165                     | sym::saturating_sub => {
166                         let ty = arg_tys[0];
167                         match int_type_width_signed(ty, self) {
168                             Some((width, signed)) => match name {
169                                 sym::ctlz | sym::cttz => {
170                                     let func = self.current_func.borrow().expect("func");
171                                     let then_block = func.new_block("then");
172                                     let else_block = func.new_block("else");
173                                     let after_block = func.new_block("after");
174
175                                     let arg = args[0].immediate();
176                                     let result = func.new_local(None, arg.get_type(), "zeros");
177                                     let zero = self.cx.context.new_rvalue_zero(arg.get_type());
178                                     let cond = self.cx.context.new_comparison(None, ComparisonOp::Equals, arg, zero);
179                                     self.block.expect("block").end_with_conditional(None, cond, then_block, else_block);
180
181                                     let zero_result = self.cx.context.new_rvalue_from_long(arg.get_type(), width as i64);
182                                     then_block.add_assignment(None, result, zero_result);
183                                     then_block.end_with_jump(None, after_block);
184
185                                     // NOTE: since jumps were added in a place
186                                     // count_leading_zeroes() does not expect, the current blocks
187                                     // in the state need to be updated.
188                                     *self.current_block.borrow_mut() = Some(else_block);
189                                     self.block = Some(else_block);
190
191                                     let zeros =
192                                         match name {
193                                             sym::ctlz => self.count_leading_zeroes(width, arg),
194                                             sym::cttz => self.count_trailing_zeroes(width, arg),
195                                             _ => unreachable!(),
196                                         };
197                                     else_block.add_assignment(None, result, zeros);
198                                     else_block.end_with_jump(None, after_block);
199
200                                     // NOTE: since jumps were added in a place rustc does not
201                                     // expect, the current blocks in the state need to be updated.
202                                     *self.current_block.borrow_mut() = Some(after_block);
203                                     self.block = Some(after_block);
204
205                                     result.to_rvalue()
206                                 }
207                                 sym::ctlz_nonzero => {
208                                     self.count_leading_zeroes(width, args[0].immediate())
209                                 },
210                                 sym::cttz_nonzero => {
211                                     self.count_trailing_zeroes(width, args[0].immediate())
212                                 }
213                                 sym::ctpop => self.pop_count(args[0].immediate()),
214                                 sym::bswap => {
215                                     if width == 8 {
216                                         args[0].immediate() // byte swap a u8/i8 is just a no-op
217                                     }
218                                     else {
219                                         // TODO(antoyo): check if it's faster to use string literals and a
220                                         // match instead of format!.
221                                         let bswap = self.cx.context.get_builtin_function(&format!("__builtin_bswap{}", width));
222                                         let mut arg = args[0].immediate();
223                                         // FIXME(antoyo): this cast should not be necessary. Remove
224                                         // when having proper sized integer types.
225                                         let param_type = bswap.get_param(0).to_rvalue().get_type();
226                                         if param_type != arg.get_type() {
227                                             arg = self.bitcast(arg, param_type);
228                                         }
229                                         self.cx.context.new_call(None, bswap, &[arg])
230                                     }
231                                 },
232                                 sym::bitreverse => self.bit_reverse(width, args[0].immediate()),
233                                 sym::rotate_left | sym::rotate_right => {
234                                     // TODO(antoyo): implement using algorithm from:
235                                     // https://blog.regehr.org/archives/1063
236                                     // for other platforms.
237                                     let is_left = name == sym::rotate_left;
238                                     let val = args[0].immediate();
239                                     let raw_shift = args[1].immediate();
240                                     if is_left {
241                                         self.rotate_left(val, raw_shift, width)
242                                     }
243                                     else {
244                                         self.rotate_right(val, raw_shift, width)
245                                     }
246                                 },
247                                 sym::saturating_add => {
248                                     self.saturating_add(args[0].immediate(), args[1].immediate(), signed, width)
249                                 },
250                                 sym::saturating_sub => {
251                                     self.saturating_sub(args[0].immediate(), args[1].immediate(), signed, width)
252                                 },
253                                 _ => bug!(),
254                             },
255                             None => {
256                                 span_invalid_monomorphization_error(
257                                     tcx.sess,
258                                     span,
259                                     &format!(
260                                         "invalid monomorphization of `{}` intrinsic: \
261                                       expected basic integer type, found `{}`",
262                                       name, ty
263                                     ),
264                                 );
265                                 return;
266                             }
267                         }
268                     }
269
270                 sym::raw_eq => {
271                     use rustc_target::abi::Abi::*;
272                     let tp_ty = substs.type_at(0);
273                     let layout = self.layout_of(tp_ty).layout;
274                     let use_integer_compare = match layout.abi {
275                         Scalar(_) | ScalarPair(_, _) => true,
276                         Uninhabited | Vector { .. } => false,
277                         Aggregate { .. } => {
278                             // For rusty ABIs, small aggregates are actually passed
279                             // as `RegKind::Integer` (see `FnAbi::adjust_for_abi`),
280                             // so we re-use that same threshold here.
281                             layout.size <= self.data_layout().pointer_size * 2
282                         }
283                     };
284
285                     let a = args[0].immediate();
286                     let b = args[1].immediate();
287                     if layout.size.bytes() == 0 {
288                         self.const_bool(true)
289                     }
290                     /*else if use_integer_compare {
291                         let integer_ty = self.type_ix(layout.size.bits()); // FIXME(antoyo): LLVM creates an integer of 96 bits for [i32; 3], but gcc doesn't support this, so it creates an integer of 128 bits.
292                         let ptr_ty = self.type_ptr_to(integer_ty);
293                         let a_ptr = self.bitcast(a, ptr_ty);
294                         let a_val = self.load(integer_ty, a_ptr, layout.align.abi);
295                         let b_ptr = self.bitcast(b, ptr_ty);
296                         let b_val = self.load(integer_ty, b_ptr, layout.align.abi);
297                         self.icmp(IntPredicate::IntEQ, a_val, b_val)
298                     }*/
299                     else {
300                         let void_ptr_type = self.context.new_type::<*const ()>();
301                         let a_ptr = self.bitcast(a, void_ptr_type);
302                         let b_ptr = self.bitcast(b, void_ptr_type);
303                         let n = self.context.new_cast(None, self.const_usize(layout.size.bytes()), self.sizet_type);
304                         let builtin = self.context.get_builtin_function("memcmp");
305                         let cmp = self.context.new_call(None, builtin, &[a_ptr, b_ptr, n]);
306                         self.icmp(IntPredicate::IntEQ, cmp, self.const_i32(0))
307                     }
308                 }
309
310                 _ if name_str.starts_with("simd_") => {
311                     match generic_simd_intrinsic(self, name, callee_ty, args, ret_ty, llret_ty, span) {
312                         Ok(llval) => llval,
313                         Err(()) => return,
314                     }
315                 }
316
317                 _ => bug!("unknown intrinsic '{}'", name),
318             };
319
320         if !fn_abi.ret.is_ignore() {
321             if let PassMode::Cast(ty) = fn_abi.ret.mode {
322                 let ptr_llty = self.type_ptr_to(ty.gcc_type(self));
323                 let ptr = self.pointercast(result.llval, ptr_llty);
324                 self.store(llval, ptr, result.align);
325             }
326             else {
327                 OperandRef::from_immediate_or_packed_pair(self, llval, result.layout)
328                     .val
329                     .store(self, result);
330             }
331         }
332     }
333
334     fn abort(&mut self) {
335         let func = self.context.get_builtin_function("abort");
336         let func: RValue<'gcc> = unsafe { std::mem::transmute(func) };
337         self.call(self.type_void(), func, &[], None);
338     }
339
340     fn assume(&mut self, value: Self::Value) {
341         // TODO(antoyo): switch to asumme when it exists.
342         // Or use something like this:
343         // #define __assume(cond) do { if (!(cond)) __builtin_unreachable(); } while (0)
344         self.expect(value, true);
345     }
346
347     fn expect(&mut self, cond: Self::Value, _expected: bool) -> Self::Value {
348         // TODO(antoyo)
349         cond
350     }
351
352     fn sideeffect(&mut self) {
353         // TODO(antoyo)
354     }
355
356     fn va_start(&mut self, _va_list: RValue<'gcc>) -> RValue<'gcc> {
357         unimplemented!();
358     }
359
360     fn va_end(&mut self, _va_list: RValue<'gcc>) -> RValue<'gcc> {
361         unimplemented!();
362     }
363 }
364
365 impl<'a, 'gcc, 'tcx> ArgAbiMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
366     fn store_fn_arg(&mut self, arg_abi: &ArgAbi<'tcx, Ty<'tcx>>, idx: &mut usize, dst: PlaceRef<'tcx, Self::Value>) {
367         arg_abi.store_fn_arg(self, idx, dst)
368     }
369
370     fn store_arg(&mut self, arg_abi: &ArgAbi<'tcx, Ty<'tcx>>, val: RValue<'gcc>, dst: PlaceRef<'tcx, RValue<'gcc>>) {
371         arg_abi.store(self, val, dst)
372     }
373
374     fn arg_memory_ty(&self, arg_abi: &ArgAbi<'tcx, Ty<'tcx>>) -> Type<'gcc> {
375         arg_abi.memory_ty(self)
376     }
377 }
378
379 pub trait ArgAbiExt<'gcc, 'tcx> {
380     fn memory_ty(&self, cx: &CodegenCx<'gcc, 'tcx>) -> Type<'gcc>;
381     fn store(&self, bx: &mut Builder<'_, 'gcc, 'tcx>, val: RValue<'gcc>, dst: PlaceRef<'tcx, RValue<'gcc>>);
382     fn store_fn_arg(&self, bx: &mut Builder<'_, 'gcc, 'tcx>, idx: &mut usize, dst: PlaceRef<'tcx, RValue<'gcc>>);
383 }
384
385 impl<'gcc, 'tcx> ArgAbiExt<'gcc, 'tcx> for ArgAbi<'tcx, Ty<'tcx>> {
386     /// Gets the LLVM type for a place of the original Rust type of
387     /// this argument/return, i.e., the result of `type_of::type_of`.
388     fn memory_ty(&self, cx: &CodegenCx<'gcc, 'tcx>) -> Type<'gcc> {
389         self.layout.gcc_type(cx, true)
390     }
391
392     /// Stores a direct/indirect value described by this ArgAbi into a
393     /// place for the original Rust type of this argument/return.
394     /// Can be used for both storing formal arguments into Rust variables
395     /// or results of call/invoke instructions into their destinations.
396     fn store(&self, bx: &mut Builder<'_, 'gcc, 'tcx>, val: RValue<'gcc>, dst: PlaceRef<'tcx, RValue<'gcc>>) {
397         if self.is_ignore() {
398             return;
399         }
400         if self.is_sized_indirect() {
401             OperandValue::Ref(val, None, self.layout.align.abi).store(bx, dst)
402         }
403         else if self.is_unsized_indirect() {
404             bug!("unsized `ArgAbi` must be handled through `store_fn_arg`");
405         }
406         else if let PassMode::Cast(cast) = self.mode {
407             // FIXME(eddyb): Figure out when the simpler Store is safe, clang
408             // uses it for i16 -> {i8, i8}, but not for i24 -> {i8, i8, i8}.
409             let can_store_through_cast_ptr = false;
410             if can_store_through_cast_ptr {
411                 let cast_ptr_llty = bx.type_ptr_to(cast.gcc_type(bx));
412                 let cast_dst = bx.pointercast(dst.llval, cast_ptr_llty);
413                 bx.store(val, cast_dst, self.layout.align.abi);
414             }
415             else {
416                 // The actual return type is a struct, but the ABI
417                 // adaptation code has cast it into some scalar type.  The
418                 // code that follows is the only reliable way I have
419                 // found to do a transform like i64 -> {i32,i32}.
420                 // Basically we dump the data onto the stack then memcpy it.
421                 //
422                 // Other approaches I tried:
423                 // - Casting rust ret pointer to the foreign type and using Store
424                 //   is (a) unsafe if size of foreign type > size of rust type and
425                 //   (b) runs afoul of strict aliasing rules, yielding invalid
426                 //   assembly under -O (specifically, the store gets removed).
427                 // - Truncating foreign type to correct integral type and then
428                 //   bitcasting to the struct type yields invalid cast errors.
429
430                 // We instead thus allocate some scratch space...
431                 let scratch_size = cast.size(bx);
432                 let scratch_align = cast.align(bx);
433                 let llscratch = bx.alloca(cast.gcc_type(bx), scratch_align);
434                 bx.lifetime_start(llscratch, scratch_size);
435
436                 // ... where we first store the value...
437                 bx.store(val, llscratch, scratch_align);
438
439                 // ... and then memcpy it to the intended destination.
440                 bx.memcpy(
441                     dst.llval,
442                     self.layout.align.abi,
443                     llscratch,
444                     scratch_align,
445                     bx.const_usize(self.layout.size.bytes()),
446                     MemFlags::empty(),
447                 );
448
449                 bx.lifetime_end(llscratch, scratch_size);
450             }
451         }
452         else {
453             OperandValue::Immediate(val).store(bx, dst);
454         }
455     }
456
457     fn store_fn_arg<'a>(&self, bx: &mut Builder<'a, 'gcc, 'tcx>, idx: &mut usize, dst: PlaceRef<'tcx, RValue<'gcc>>) {
458         let mut next = || {
459             let val = bx.current_func().get_param(*idx as i32);
460             *idx += 1;
461             val.to_rvalue()
462         };
463         match self.mode {
464             PassMode::Ignore => {}
465             PassMode::Pair(..) => {
466                 OperandValue::Pair(next(), next()).store(bx, dst);
467             }
468             PassMode::Indirect { extra_attrs: Some(_), .. } => {
469                 OperandValue::Ref(next(), Some(next()), self.layout.align.abi).store(bx, dst);
470             }
471             PassMode::Direct(_) | PassMode::Indirect { extra_attrs: None, .. } | PassMode::Cast(_) => {
472                 let next_arg = next();
473                 self.store(bx, next_arg.to_rvalue(), dst);
474             }
475         }
476     }
477 }
478
479 fn int_type_width_signed<'gcc, 'tcx>(ty: Ty<'tcx>, cx: &CodegenCx<'gcc, 'tcx>) -> Option<(u64, bool)> {
480     match ty.kind() {
481         ty::Int(t) => Some((
482             match t {
483                 rustc_middle::ty::IntTy::Isize => u64::from(cx.tcx.sess.target.pointer_width),
484                 rustc_middle::ty::IntTy::I8 => 8,
485                 rustc_middle::ty::IntTy::I16 => 16,
486                 rustc_middle::ty::IntTy::I32 => 32,
487                 rustc_middle::ty::IntTy::I64 => 64,
488                 rustc_middle::ty::IntTy::I128 => 128,
489             },
490             true,
491         )),
492         ty::Uint(t) => Some((
493             match t {
494                 rustc_middle::ty::UintTy::Usize => u64::from(cx.tcx.sess.target.pointer_width),
495                 rustc_middle::ty::UintTy::U8 => 8,
496                 rustc_middle::ty::UintTy::U16 => 16,
497                 rustc_middle::ty::UintTy::U32 => 32,
498                 rustc_middle::ty::UintTy::U64 => 64,
499                 rustc_middle::ty::UintTy::U128 => 128,
500             },
501             false,
502         )),
503         _ => None,
504     }
505 }
506
507 impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
508     fn bit_reverse(&mut self, width: u64, value: RValue<'gcc>) -> RValue<'gcc> {
509         let typ = value.get_type();
510         let context = &self.cx.context;
511         match width {
512             8 => {
513                 // First step.
514                 let left = self.and(value, context.new_rvalue_from_int(typ, 0xF0));
515                 let left = self.lshr(left, context.new_rvalue_from_int(typ, 4));
516                 let right = self.and(value, context.new_rvalue_from_int(typ, 0x0F));
517                 let right = self.shl(right, context.new_rvalue_from_int(typ, 4));
518                 let step1 = self.or(left, right);
519
520                 // Second step.
521                 let left = self.and(step1, context.new_rvalue_from_int(typ, 0xCC));
522                 let left = self.lshr(left, context.new_rvalue_from_int(typ, 2));
523                 let right = self.and(step1, context.new_rvalue_from_int(typ, 0x33));
524                 let right = self.shl(right, context.new_rvalue_from_int(typ, 2));
525                 let step2 = self.or(left, right);
526
527                 // Third step.
528                 let left = self.and(step2, context.new_rvalue_from_int(typ, 0xAA));
529                 let left = self.lshr(left, context.new_rvalue_from_int(typ, 1));
530                 let right = self.and(step2, context.new_rvalue_from_int(typ, 0x55));
531                 let right = self.shl(right, context.new_rvalue_from_int(typ, 1));
532                 let step3 = self.or(left, right);
533
534                 step3
535             },
536             16 => {
537                 // First step.
538                 let left = self.and(value, context.new_rvalue_from_int(typ, 0x5555));
539                 let left = self.shl(left, context.new_rvalue_from_int(typ, 1));
540                 let right = self.and(value, context.new_rvalue_from_int(typ, 0xAAAA));
541                 let right = self.lshr(right, context.new_rvalue_from_int(typ, 1));
542                 let step1 = self.or(left, right);
543
544                 // Second step.
545                 let left = self.and(step1, context.new_rvalue_from_int(typ, 0x3333));
546                 let left = self.shl(left, context.new_rvalue_from_int(typ, 2));
547                 let right = self.and(step1, context.new_rvalue_from_int(typ, 0xCCCC));
548                 let right = self.lshr(right, context.new_rvalue_from_int(typ, 2));
549                 let step2 = self.or(left, right);
550
551                 // Third step.
552                 let left = self.and(step2, context.new_rvalue_from_int(typ, 0x0F0F));
553                 let left = self.shl(left, context.new_rvalue_from_int(typ, 4));
554                 let right = self.and(step2, context.new_rvalue_from_int(typ, 0xF0F0));
555                 let right = self.lshr(right, context.new_rvalue_from_int(typ, 4));
556                 let step3 = self.or(left, right);
557
558                 // Fourth step.
559                 let left = self.and(step3, context.new_rvalue_from_int(typ, 0x00FF));
560                 let left = self.shl(left, context.new_rvalue_from_int(typ, 8));
561                 let right = self.and(step3, context.new_rvalue_from_int(typ, 0xFF00));
562                 let right = self.lshr(right, context.new_rvalue_from_int(typ, 8));
563                 let step4 = self.or(left, right);
564
565                 step4
566             },
567             32 => {
568                 // TODO(antoyo): Refactor with other implementations.
569                 // First step.
570                 let left = self.and(value, context.new_rvalue_from_long(typ, 0x55555555));
571                 let left = self.shl(left, context.new_rvalue_from_long(typ, 1));
572                 let right = self.and(value, context.new_rvalue_from_long(typ, 0xAAAAAAAA));
573                 let right = self.lshr(right, context.new_rvalue_from_long(typ, 1));
574                 let step1 = self.or(left, right);
575
576                 // Second step.
577                 let left = self.and(step1, context.new_rvalue_from_long(typ, 0x33333333));
578                 let left = self.shl(left, context.new_rvalue_from_long(typ, 2));
579                 let right = self.and(step1, context.new_rvalue_from_long(typ, 0xCCCCCCCC));
580                 let right = self.lshr(right, context.new_rvalue_from_long(typ, 2));
581                 let step2 = self.or(left, right);
582
583                 // Third step.
584                 let left = self.and(step2, context.new_rvalue_from_long(typ, 0x0F0F0F0F));
585                 let left = self.shl(left, context.new_rvalue_from_long(typ, 4));
586                 let right = self.and(step2, context.new_rvalue_from_long(typ, 0xF0F0F0F0));
587                 let right = self.lshr(right, context.new_rvalue_from_long(typ, 4));
588                 let step3 = self.or(left, right);
589
590                 // Fourth step.
591                 let left = self.and(step3, context.new_rvalue_from_long(typ, 0x00FF00FF));
592                 let left = self.shl(left, context.new_rvalue_from_long(typ, 8));
593                 let right = self.and(step3, context.new_rvalue_from_long(typ, 0xFF00FF00));
594                 let right = self.lshr(right, context.new_rvalue_from_long(typ, 8));
595                 let step4 = self.or(left, right);
596
597                 // Fifth step.
598                 let left = self.and(step4, context.new_rvalue_from_long(typ, 0x0000FFFF));
599                 let left = self.shl(left, context.new_rvalue_from_long(typ, 16));
600                 let right = self.and(step4, context.new_rvalue_from_long(typ, 0xFFFF0000));
601                 let right = self.lshr(right, context.new_rvalue_from_long(typ, 16));
602                 let step5 = self.or(left, right);
603
604                 step5
605             },
606             64 => {
607                 // First step.
608                 let left = self.shl(value, context.new_rvalue_from_long(typ, 32));
609                 let right = self.lshr(value, context.new_rvalue_from_long(typ, 32));
610                 let step1 = self.or(left, right);
611
612                 // Second step.
613                 let left = self.and(step1, context.new_rvalue_from_long(typ, 0x0001FFFF0001FFFF));
614                 let left = self.shl(left, context.new_rvalue_from_long(typ, 15));
615                 let right = self.and(step1, context.new_rvalue_from_long(typ, 0xFFFE0000FFFE0000u64 as i64)); // TODO(antoyo): transmute the number instead?
616                 let right = self.lshr(right, context.new_rvalue_from_long(typ, 17));
617                 let step2 = self.or(left, right);
618
619                 // Third step.
620                 let left = self.lshr(step2, context.new_rvalue_from_long(typ, 10));
621                 let left = self.xor(step2, left);
622                 let temp = self.and(left, context.new_rvalue_from_long(typ, 0x003F801F003F801F));
623
624                 let left = self.shl(temp, context.new_rvalue_from_long(typ, 10));
625                 let left = self.or(temp, left);
626                 let step3 = self.xor(left, step2);
627
628                 // Fourth step.
629                 let left = self.lshr(step3, context.new_rvalue_from_long(typ, 4));
630                 let left = self.xor(step3, left);
631                 let temp = self.and(left, context.new_rvalue_from_long(typ, 0x0E0384210E038421));
632
633                 let left = self.shl(temp, context.new_rvalue_from_long(typ, 4));
634                 let left = self.or(temp, left);
635                 let step4 = self.xor(left, step3);
636
637                 // Fifth step.
638                 let left = self.lshr(step4, context.new_rvalue_from_long(typ, 2));
639                 let left = self.xor(step4, left);
640                 let temp = self.and(left, context.new_rvalue_from_long(typ, 0x2248884222488842));
641
642                 let left = self.shl(temp, context.new_rvalue_from_long(typ, 2));
643                 let left = self.or(temp, left);
644                 let step5 = self.xor(left, step4);
645
646                 step5
647             },
648             128 => {
649                 // TODO(antoyo): find a more efficient implementation?
650                 let sixty_four = self.context.new_rvalue_from_long(typ, 64);
651                 let high = self.context.new_cast(None, value >> sixty_four, self.u64_type);
652                 let low = self.context.new_cast(None, value, self.u64_type);
653
654                 let reversed_high = self.bit_reverse(64, high);
655                 let reversed_low = self.bit_reverse(64, low);
656
657                 let new_low = self.context.new_cast(None, reversed_high, typ);
658                 let new_high = self.context.new_cast(None, reversed_low, typ) << sixty_four;
659
660                 new_low | new_high
661             },
662             _ => {
663                 panic!("cannot bit reverse with width = {}", width);
664             },
665         }
666     }
667
668     fn count_leading_zeroes(&self, width: u64, arg: RValue<'gcc>) -> RValue<'gcc> {
669         // TODO(antoyo): use width?
670         let arg_type = arg.get_type();
671         let count_leading_zeroes =
672             if arg_type.is_uint(&self.cx) {
673                 "__builtin_clz"
674             }
675             else if arg_type.is_ulong(&self.cx) {
676                 "__builtin_clzl"
677             }
678             else if arg_type.is_ulonglong(&self.cx) {
679                 "__builtin_clzll"
680             }
681             else if width == 128 {
682                 // Algorithm from: https://stackoverflow.com/a/28433850/389119
683                 let array_type = self.context.new_array_type(None, arg_type, 3);
684                 let result = self.current_func()
685                     .new_local(None, array_type, "count_loading_zeroes_results");
686
687                 let sixty_four = self.context.new_rvalue_from_long(arg_type, 64);
688                 let high = self.context.new_cast(None, arg >> sixty_four, self.u64_type);
689                 let low = self.context.new_cast(None, arg, self.u64_type);
690
691                 let zero = self.context.new_rvalue_zero(self.usize_type);
692                 let one = self.context.new_rvalue_one(self.usize_type);
693                 let two = self.context.new_rvalue_from_long(self.usize_type, 2);
694
695                 let clzll = self.context.get_builtin_function("__builtin_clzll");
696
697                 let first_elem = self.context.new_array_access(None, result, zero);
698                 let first_value = self.context.new_cast(None, self.context.new_call(None, clzll, &[high]), arg_type);
699                 self.llbb()
700                     .add_assignment(None, first_elem, first_value);
701
702                 let second_elem = self.context.new_array_access(None, result, one);
703                 let second_value = self.context.new_cast(None, self.context.new_call(None, clzll, &[low]), arg_type) + sixty_four;
704                 self.llbb()
705                     .add_assignment(None, second_elem, second_value);
706
707                 let third_elem = self.context.new_array_access(None, result, two);
708                 let third_value = self.context.new_rvalue_from_long(arg_type, 128);
709                 self.llbb()
710                     .add_assignment(None, third_elem, third_value);
711
712                 let not_high = self.context.new_unary_op(None, UnaryOp::LogicalNegate, self.u64_type, high);
713                 let not_low = self.context.new_unary_op(None, UnaryOp::LogicalNegate, self.u64_type, low);
714                 let not_low_and_not_high = not_low & not_high;
715                 let index = not_high + not_low_and_not_high;
716
717                 let res = self.context.new_array_access(None, result, index);
718
719                 return self.context.new_cast(None, res, arg_type);
720             }
721             else {
722                 let count_leading_zeroes = self.context.get_builtin_function("__builtin_clz");
723                 let arg = self.context.new_cast(None, arg, self.uint_type);
724                 let diff = self.int_width(self.uint_type) - self.int_width(arg_type);
725                 let diff = self.context.new_rvalue_from_long(self.int_type, diff);
726                 let res = self.context.new_call(None, count_leading_zeroes, &[arg]) - diff;
727                 return self.context.new_cast(None, res, arg_type);
728             };
729         let count_leading_zeroes = self.context.get_builtin_function(count_leading_zeroes);
730         let res = self.context.new_call(None, count_leading_zeroes, &[arg]);
731         self.context.new_cast(None, res, arg_type)
732     }
733
734     fn count_trailing_zeroes(&self, _width: u64, arg: RValue<'gcc>) -> RValue<'gcc> {
735         let arg_type = arg.get_type();
736         let (count_trailing_zeroes, expected_type) =
737             if arg_type.is_uchar(&self.cx) || arg_type.is_ushort(&self.cx) || arg_type.is_uint(&self.cx) {
738                 // NOTE: we don't need to & 0xFF for uchar because the result is undefined on zero.
739                 ("__builtin_ctz", self.cx.uint_type)
740             }
741             else if arg_type.is_ulong(&self.cx) {
742                 ("__builtin_ctzl", self.cx.ulong_type)
743             }
744             else if arg_type.is_ulonglong(&self.cx) {
745                 ("__builtin_ctzll", self.cx.ulonglong_type)
746             }
747             else if arg_type.is_u128(&self.cx) {
748                 // Adapted from the algorithm to count leading zeroes from: https://stackoverflow.com/a/28433850/389119
749                 let array_type = self.context.new_array_type(None, arg_type, 3);
750                 let result = self.current_func()
751                     .new_local(None, array_type, "count_loading_zeroes_results");
752
753                 let sixty_four = self.context.new_rvalue_from_long(arg_type, 64);
754                 let high = self.context.new_cast(None, arg >> sixty_four, self.u64_type);
755                 let low = self.context.new_cast(None, arg, self.u64_type);
756
757                 let zero = self.context.new_rvalue_zero(self.usize_type);
758                 let one = self.context.new_rvalue_one(self.usize_type);
759                 let two = self.context.new_rvalue_from_long(self.usize_type, 2);
760
761                 let ctzll = self.context.get_builtin_function("__builtin_ctzll");
762
763                 let first_elem = self.context.new_array_access(None, result, zero);
764                 let first_value = self.context.new_cast(None, self.context.new_call(None, ctzll, &[low]), arg_type);
765                 self.llbb()
766                     .add_assignment(None, first_elem, first_value);
767
768                 let second_elem = self.context.new_array_access(None, result, one);
769                 let second_value = self.context.new_cast(None, self.context.new_call(None, ctzll, &[high]), arg_type) + sixty_four;
770                 self.llbb()
771                     .add_assignment(None, second_elem, second_value);
772
773                 let third_elem = self.context.new_array_access(None, result, two);
774                 let third_value = self.context.new_rvalue_from_long(arg_type, 128);
775                 self.llbb()
776                     .add_assignment(None, third_elem, third_value);
777
778                 let not_low = self.context.new_unary_op(None, UnaryOp::LogicalNegate, self.u64_type, low);
779                 let not_high = self.context.new_unary_op(None, UnaryOp::LogicalNegate, self.u64_type, high);
780                 let not_low_and_not_high = not_low & not_high;
781                 let index = not_low + not_low_and_not_high;
782
783                 let res = self.context.new_array_access(None, result, index);
784
785                 return self.context.new_cast(None, res, arg_type);
786             }
787             else {
788                 unimplemented!("count_trailing_zeroes for {:?}", arg_type);
789             };
790         let count_trailing_zeroes = self.context.get_builtin_function(count_trailing_zeroes);
791         let arg =
792             if arg_type != expected_type {
793                 self.context.new_cast(None, arg, expected_type)
794             }
795             else {
796                 arg
797             };
798         let res = self.context.new_call(None, count_trailing_zeroes, &[arg]);
799         self.context.new_cast(None, res, arg_type)
800     }
801
802     fn int_width(&self, typ: Type<'gcc>) -> i64 {
803         self.cx.int_width(typ) as i64
804     }
805
806     fn pop_count(&self, value: RValue<'gcc>) -> RValue<'gcc> {
807         // TODO(antoyo): use the optimized version with fewer operations.
808         let value_type = value.get_type();
809
810         if value_type.is_u128(&self.cx) {
811             // TODO(antoyo): implement in the normal algorithm below to have a more efficient
812             // implementation (that does not require a call to __popcountdi2).
813             let popcount = self.context.get_builtin_function("__builtin_popcountll");
814             let sixty_four = self.context.new_rvalue_from_long(value_type, 64);
815             let high = self.context.new_cast(None, value >> sixty_four, self.cx.ulonglong_type);
816             let high = self.context.new_call(None, popcount, &[high]);
817             let low = self.context.new_cast(None, value, self.cx.ulonglong_type);
818             let low = self.context.new_call(None, popcount, &[low]);
819             return high + low;
820         }
821
822         // First step.
823         let mask = self.context.new_rvalue_from_long(value_type, 0x5555555555555555);
824         let left = value & mask;
825         let shifted = value >> self.context.new_rvalue_from_int(value_type, 1);
826         let right = shifted & mask;
827         let value = left + right;
828
829         // Second step.
830         let mask = self.context.new_rvalue_from_long(value_type, 0x3333333333333333);
831         let left = value & mask;
832         let shifted = value >> self.context.new_rvalue_from_int(value_type, 2);
833         let right = shifted & mask;
834         let value = left + right;
835
836         // Third step.
837         let mask = self.context.new_rvalue_from_long(value_type, 0x0F0F0F0F0F0F0F0F);
838         let left = value & mask;
839         let shifted = value >> self.context.new_rvalue_from_int(value_type, 4);
840         let right = shifted & mask;
841         let value = left + right;
842
843         if value_type.is_u8(&self.cx) {
844             return value;
845         }
846
847         // Fourth step.
848         let mask = self.context.new_rvalue_from_long(value_type, 0x00FF00FF00FF00FF);
849         let left = value & mask;
850         let shifted = value >> self.context.new_rvalue_from_int(value_type, 8);
851         let right = shifted & mask;
852         let value = left + right;
853
854         if value_type.is_u16(&self.cx) {
855             return value;
856         }
857
858         // Fifth step.
859         let mask = self.context.new_rvalue_from_long(value_type, 0x0000FFFF0000FFFF);
860         let left = value & mask;
861         let shifted = value >> self.context.new_rvalue_from_int(value_type, 16);
862         let right = shifted & mask;
863         let value = left + right;
864
865         if value_type.is_u32(&self.cx) {
866             return value;
867         }
868
869         // Sixth step.
870         let mask = self.context.new_rvalue_from_long(value_type, 0x00000000FFFFFFFF);
871         let left = value & mask;
872         let shifted = value >> self.context.new_rvalue_from_int(value_type, 32);
873         let right = shifted & mask;
874         let value = left + right;
875
876         value
877     }
878
879     // Algorithm from: https://blog.regehr.org/archives/1063
880     fn rotate_left(&mut self, value: RValue<'gcc>, shift: RValue<'gcc>, width: u64) -> RValue<'gcc> {
881         let max = self.context.new_rvalue_from_long(shift.get_type(), width as i64);
882         let shift = shift % max;
883         let lhs = self.shl(value, shift);
884         let result_and =
885             self.and(
886                 self.context.new_unary_op(None, UnaryOp::Minus, shift.get_type(), shift),
887                 self.context.new_rvalue_from_long(shift.get_type(), width as i64 - 1),
888             );
889         let rhs = self.lshr(value, result_and);
890         self.or(lhs, rhs)
891     }
892
893     // Algorithm from: https://blog.regehr.org/archives/1063
894     fn rotate_right(&mut self, value: RValue<'gcc>, shift: RValue<'gcc>, width: u64) -> RValue<'gcc> {
895         let max = self.context.new_rvalue_from_long(shift.get_type(), width as i64);
896         let shift = shift % max;
897         let lhs = self.lshr(value, shift);
898         let result_and =
899             self.and(
900                 self.context.new_unary_op(None, UnaryOp::Minus, shift.get_type(), shift),
901                 self.context.new_rvalue_from_long(shift.get_type(), width as i64 - 1),
902             );
903         let rhs = self.shl(value, result_and);
904         self.or(lhs, rhs)
905     }
906
907     fn saturating_add(&mut self, lhs: RValue<'gcc>, rhs: RValue<'gcc>, signed: bool, width: u64) -> RValue<'gcc> {
908         let func = self.current_func.borrow().expect("func");
909
910         if signed {
911             // Algorithm from: https://stackoverflow.com/a/56531252/389119
912             let after_block = func.new_block("after");
913             let func_name =
914                 match width {
915                     8 => "__builtin_add_overflow",
916                     16 => "__builtin_add_overflow",
917                     32 => "__builtin_sadd_overflow",
918                     64 => "__builtin_saddll_overflow",
919                     128 => "__builtin_add_overflow",
920                     _ => unreachable!(),
921                 };
922             let overflow_func = self.context.get_builtin_function(func_name);
923             let result_type = lhs.get_type();
924             let res = func.new_local(None, result_type, "saturating_sum");
925             let overflow = self.overflow_call(overflow_func, &[lhs, rhs, res.get_address(None)], None);
926
927             let then_block = func.new_block("then");
928
929             let unsigned_type = self.context.new_int_type(width as i32 / 8, false);
930             let shifted = self.context.new_cast(None, lhs, unsigned_type) >> self.context.new_rvalue_from_int(unsigned_type, width as i32 - 1);
931             let uint_max = self.context.new_unary_op(None, UnaryOp::BitwiseNegate, unsigned_type,
932                 self.context.new_rvalue_from_int(unsigned_type, 0)
933             );
934             let int_max = uint_max >> self.context.new_rvalue_one(unsigned_type);
935             then_block.add_assignment(None, res, self.context.new_cast(None, shifted + int_max, result_type));
936             then_block.end_with_jump(None, after_block);
937
938             self.block.expect("block").end_with_conditional(None, overflow, then_block, after_block);
939
940             // NOTE: since jumps were added in a place rustc does not
941             // expect, the current blocks in the state need to be updated.
942             *self.current_block.borrow_mut() = Some(after_block);
943             self.block = Some(after_block);
944
945             res.to_rvalue()
946         }
947         else {
948             // Algorithm from: http://locklessinc.com/articles/sat_arithmetic/
949             let res = lhs + rhs;
950             let res_type = res.get_type();
951             let cond = self.context.new_comparison(None, ComparisonOp::LessThan, res, lhs);
952             let value = self.context.new_unary_op(None, UnaryOp::Minus, res_type, self.context.new_cast(None, cond, res_type));
953             res | value
954         }
955     }
956
957     // Algorithm from: https://locklessinc.com/articles/sat_arithmetic/
958     fn saturating_sub(&mut self, lhs: RValue<'gcc>, rhs: RValue<'gcc>, signed: bool, width: u64) -> RValue<'gcc> {
959         if signed {
960             // Also based on algorithm from: https://stackoverflow.com/a/56531252/389119
961             let func_name =
962                 match width {
963                     8 => "__builtin_sub_overflow",
964                     16 => "__builtin_sub_overflow",
965                     32 => "__builtin_ssub_overflow",
966                     64 => "__builtin_ssubll_overflow",
967                     128 => "__builtin_sub_overflow",
968                     _ => unreachable!(),
969                 };
970             let overflow_func = self.context.get_builtin_function(func_name);
971             let result_type = lhs.get_type();
972             let func = self.current_func.borrow().expect("func");
973             let res = func.new_local(None, result_type, "saturating_diff");
974             let overflow = self.overflow_call(overflow_func, &[lhs, rhs, res.get_address(None)], None);
975
976             let then_block = func.new_block("then");
977             let after_block = func.new_block("after");
978
979             let unsigned_type = self.context.new_int_type(width as i32 / 8, false);
980             let shifted = self.context.new_cast(None, lhs, unsigned_type) >> self.context.new_rvalue_from_int(unsigned_type, width as i32 - 1);
981             let uint_max = self.context.new_unary_op(None, UnaryOp::BitwiseNegate, unsigned_type,
982                 self.context.new_rvalue_from_int(unsigned_type, 0)
983             );
984             let int_max = uint_max >> self.context.new_rvalue_one(unsigned_type);
985             then_block.add_assignment(None, res, self.context.new_cast(None, shifted + int_max, result_type));
986             then_block.end_with_jump(None, after_block);
987
988             self.block.expect("block").end_with_conditional(None, overflow, then_block, after_block);
989
990             // NOTE: since jumps were added in a place rustc does not
991             // expect, the current blocks in the state need to be updated.
992             *self.current_block.borrow_mut() = Some(after_block);
993             self.block = Some(after_block);
994
995             res.to_rvalue()
996         }
997         else {
998             let res = lhs - rhs;
999             let comparison = self.context.new_comparison(None, ComparisonOp::LessThanEquals, res, lhs);
1000             let comparison = self.context.new_cast(None, comparison, lhs.get_type());
1001             let unary_op = self.context.new_unary_op(None, UnaryOp::Minus, comparison.get_type(), comparison);
1002             self.and(res, unary_op)
1003         }
1004     }
1005 }
1006
1007 fn try_intrinsic<'gcc, 'tcx>(bx: &mut Builder<'_, 'gcc, 'tcx>, try_func: RValue<'gcc>, data: RValue<'gcc>, _catch_func: RValue<'gcc>, dest: RValue<'gcc>) {
1008     if bx.sess().panic_strategy() == PanicStrategy::Abort {
1009         bx.call(bx.type_void(), try_func, &[data], None);
1010         // Return 0 unconditionally from the intrinsic call;
1011         // we can never unwind.
1012         let ret_align = bx.tcx.data_layout.i32_align.abi;
1013         bx.store(bx.const_i32(0), dest, ret_align);
1014     }
1015     else if wants_msvc_seh(bx.sess()) {
1016         unimplemented!();
1017     }
1018     else {
1019         unimplemented!();
1020     }
1021 }