From: Denys Zariaiev Date: Sat, 19 Jan 2019 20:59:34 +0000 (+0100) Subject: Create `nvptx64-nvidia-cuda` target specification X-Git-Url: https://git.lizzy.rs/?a=commitdiff_plain;h=d3903d5f9c4e58cc3fa256ec5be52717b84e6308;p=rust.git Create `nvptx64-nvidia-cuda` target specification --- diff --git a/src/bootstrap/lib.rs b/src/bootstrap/lib.rs index 37451a74dfa..b0bbf2395e2 100644 --- a/src/bootstrap/lib.rs +++ b/src/bootstrap/lib.rs @@ -831,6 +831,7 @@ fn linker(&self, target: Interned) -> Option<&Path> { !target.contains("msvc") && !target.contains("emscripten") && !target.contains("wasm32") && + !target.contains("nvptx") && !target.contains("fuchsia") { Some(self.cc(target)) } else { diff --git a/src/bootstrap/sanity.rs b/src/bootstrap/sanity.rs index fe547a6b151..ff4fb85bbfa 100644 --- a/src/bootstrap/sanity.rs +++ b/src/bootstrap/sanity.rs @@ -156,7 +156,7 @@ pub fn check(build: &mut Build) { panic!("the iOS target is only supported on macOS"); } - if target.contains("-none-") { + if target.contains("-none-") || target.contains("nvptx") { if build.no_std(*target).is_none() { let target = build.config.target_config.entry(target.clone()) .or_default(); @@ -165,7 +165,7 @@ pub fn check(build: &mut Build) { } if build.no_std(*target) == Some(false) { - panic!("All the *-none-* targets are no-std targets") + panic!("All the *-none-* and nvptx* targets are no-std targets") } } diff --git a/src/librustc/ty/context.rs b/src/librustc/ty/context.rs index 4c8f8141116..c126bff4258 100644 --- a/src/librustc/ty/context.rs +++ b/src/librustc/ty/context.rs @@ -1675,6 +1675,12 @@ pub fn is_bound_region_in_impl_item( } false } + + /// Determine whether identifiers in the assembly have strict naming rules. + /// Currently, only NVPTX* targets need it. + pub fn has_strict_asm_symbol_naming(&self) -> bool { + self.gcx.sess.target.target.arch.contains("nvptx") + } } impl<'a, 'tcx> TyCtxt<'a, 'tcx, 'tcx> { diff --git a/src/librustc_codegen_ssa/back/link.rs b/src/librustc_codegen_ssa/back/link.rs index d03bb0a3d73..2a5ecf9a059 100644 --- a/src/librustc_codegen_ssa/back/link.rs +++ b/src/librustc_codegen_ssa/back/link.rs @@ -149,6 +149,7 @@ fn infer_from( LinkerFlavor::Ld => "ld", LinkerFlavor::Msvc => "link.exe", LinkerFlavor::Lld(_) => "lld", + LinkerFlavor::PtxLinker => "rust-ptx-linker", }), flavor)), (Some(linker), None) => { let stem = if linker.extension().and_then(|ext| ext.to_str()) == Some("exe") { diff --git a/src/librustc_codegen_ssa/back/linker.rs b/src/librustc_codegen_ssa/back/linker.rs index ad61f8f01d8..5e9aeed7107 100644 --- a/src/librustc_codegen_ssa/back/linker.rs +++ b/src/librustc_codegen_ssa/back/linker.rs @@ -83,6 +83,10 @@ pub fn to_linker<'a>( LinkerFlavor::Lld(LldFlavor::Wasm) => { Box::new(WasmLd::new(cmd, sess, self)) as Box } + + LinkerFlavor::PtxLinker => { + Box::new(PtxLinker { cmd, sess }) as Box + } } } } @@ -1080,3 +1084,124 @@ fn exported_symbols(tcx: TyCtxt, crate_type: CrateType) -> Vec { symbols } + +/// Much simplified and explicit CLI for the NVPTX linker. The linker operates +/// with bitcode and uses LLVM backend to generate a PTX assembly. +pub struct PtxLinker<'a> { + cmd: Command, + sess: &'a Session, +} + +impl<'a> Linker for PtxLinker<'a> { + fn link_rlib(&mut self, path: &Path) { + self.cmd.arg("--rlib").arg(path); + } + + fn link_whole_rlib(&mut self, path: &Path) { + self.cmd.arg("--rlib").arg(path); + } + + fn include_path(&mut self, path: &Path) { + self.cmd.arg("-L").arg(path); + } + + fn debuginfo(&mut self) { + self.cmd.arg("--debug"); + } + + fn add_object(&mut self, path: &Path) { + self.cmd.arg("--bitcode").arg(path); + } + + fn args(&mut self, args: &[String]) { + self.cmd.args(args); + } + + fn optimize(&mut self) { + self.cmd.arg(match self.sess.opts.optimize { + OptLevel::No => "-O0", + OptLevel::Less => "-O1", + OptLevel::Default => "-O2", + OptLevel::Aggressive => "-O3", + OptLevel::Size => "-Os", + OptLevel::SizeMin => "-Os" + }); + } + + fn output_filename(&mut self, path: &Path) { + self.cmd.arg("-o").arg(path); + } + + fn finalize(&mut self) -> Command { + ::std::mem::replace(&mut self.cmd, Command::new("")) + } + + fn link_dylib(&mut self, _lib: &str) { + panic!("external dylibs not supported") + } + + fn link_rust_dylib(&mut self, _lib: &str, _path: &Path) { + panic!("external dylibs not supported") + } + + fn link_staticlib(&mut self, _lib: &str) { + panic!("staticlibs not supported") + } + + fn link_whole_staticlib(&mut self, _lib: &str, _search_path: &[PathBuf]) { + panic!("staticlibs not supported") + } + + fn framework_path(&mut self, _path: &Path) { + panic!("frameworks not supported") + } + + fn link_framework(&mut self, _framework: &str) { + panic!("frameworks not supported") + } + + fn position_independent_executable(&mut self) { + } + + fn full_relro(&mut self) { + } + + fn partial_relro(&mut self) { + } + + fn no_relro(&mut self) { + } + + fn build_static_executable(&mut self) { + } + + fn gc_sections(&mut self, _keep_metadata: bool) { + } + + fn pgo_gen(&mut self) { + } + + fn no_default_libraries(&mut self) { + } + + fn build_dylib(&mut self, _out_filename: &Path) { + } + + fn export_symbols(&mut self, _tmpdir: &Path, _crate_type: CrateType) { + } + + fn subsystem(&mut self, _subsystem: &str) { + } + + fn no_position_independent_executable(&mut self) { + } + + fn group_start(&mut self) { + } + + fn group_end(&mut self) { + } + + fn cross_lang_lto(&mut self) { + } +} diff --git a/src/librustc_codegen_utils/lib.rs b/src/librustc_codegen_utils/lib.rs index 1f590d46ed8..8e96f985401 100644 --- a/src/librustc_codegen_utils/lib.rs +++ b/src/librustc_codegen_utils/lib.rs @@ -12,6 +12,7 @@ #![feature(nll)] #![allow(unused_attributes)] #![feature(rustc_diagnostic_macros)] +#![feature(in_band_lifetimes)] #![recursion_limit="256"] diff --git a/src/librustc_codegen_utils/symbol_names.rs b/src/librustc_codegen_utils/symbol_names.rs index 9267f14f242..f2014f74212 100644 --- a/src/librustc_codegen_utils/symbol_names.rs +++ b/src/librustc_codegen_utils/symbol_names.rs @@ -103,7 +103,7 @@ use syntax_pos::symbol::Symbol; -use std::fmt::Write; +use std::fmt::{self, Write}; use std::mem::discriminant; pub fn provide(providers: &mut Providers) { @@ -221,7 +221,7 @@ fn get_symbol_hash<'a, 'tcx>( } fn def_symbol_name<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, def_id: DefId) -> ty::SymbolName { - let mut buffer = SymbolPathBuffer::new(); + let mut buffer = SymbolPathBuffer::new(tcx); item_path::with_forced_absolute_paths(|| { tcx.push_item_path(&mut buffer, def_id, false); }); @@ -317,7 +317,7 @@ fn compute_symbol_name<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, instance: Instance let hash = get_symbol_hash(tcx, def_id, instance, instance_ty, substs); - let mut buf = SymbolPathBuffer::from_interned(tcx.def_symbol_name(def_id)); + let mut buf = SymbolPathBuffer::from_interned(tcx.def_symbol_name(def_id), tcx); if instance.is_vtable_shim() { buf.push("{{vtable-shim}}"); @@ -339,26 +339,28 @@ fn compute_symbol_name<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, instance: Instance // // To be able to work on all platforms and get *some* reasonable output, we // use C++ name-mangling. -#[derive(Debug)] -struct SymbolPathBuffer { +struct SymbolPathBuffer<'a, 'tcx> { + tcx: TyCtxt<'a, 'tcx, 'tcx>, result: String, temp_buf: String, } -impl SymbolPathBuffer { - fn new() -> Self { +impl SymbolPathBuffer<'a, 'tcx> { + fn new(tcx: TyCtxt<'a, 'tcx, 'tcx>) -> Self { let mut result = SymbolPathBuffer { result: String::with_capacity(64), temp_buf: String::with_capacity(16), + tcx, }; result.result.push_str("_ZN"); // _Z == Begin name-sequence, N == nested result } - fn from_interned(symbol: ty::SymbolName) -> Self { + fn from_interned(symbol: ty::SymbolName, tcx: TyCtxt<'a, 'tcx, 'tcx>) -> Self { let mut result = SymbolPathBuffer { result: String::with_capacity(64), temp_buf: String::with_capacity(16), + tcx, }; result.result.push_str(&symbol.as_str()); result @@ -377,7 +379,7 @@ fn finish(mut self, hash: u64) -> String { } } -impl ItemPathBuffer for SymbolPathBuffer { +impl ItemPathBuffer for SymbolPathBuffer<'a, 'tcx> { fn root_mode(&self) -> &RootMode { const ABSOLUTE: &RootMode = &RootMode::Absolute; ABSOLUTE @@ -385,7 +387,7 @@ fn root_mode(&self) -> &RootMode { fn push(&mut self, text: &str) { self.temp_buf.clear(); - let need_underscore = sanitize(&mut self.temp_buf, text); + let need_underscore = sanitize(&mut self.temp_buf, text, self.tcx); let _ = write!( self.result, "{}", @@ -398,12 +400,24 @@ fn push(&mut self, text: &str) { } } +// Manual Debug implementation to omit non-Debug `tcx` field. +impl fmt::Debug for SymbolPathBuffer<'_, '_> { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.debug_struct("SymbolPathBuffer") + .field("result", &self.result) + .field("temp_buf", &self.temp_buf) + .finish() + } +} + // Name sanitation. LLVM will happily accept identifiers with weird names, but // gas doesn't! // gas accepts the following characters in symbols: a-z, A-Z, 0-9, ., _, $ +// NVPTX assembly has more strict naming rules than gas, so additionally, dots +// are replaced with '$' there. // // returns true if an underscore must be added at the start -pub fn sanitize(result: &mut String, s: &str) -> bool { +pub fn sanitize(result: &mut String, s: &str, tcx: TyCtxt<'a, 'tcx, 'tcx>) -> bool { for c in s.chars() { match c { // Escape these with $ sequences @@ -416,12 +430,25 @@ pub fn sanitize(result: &mut String, s: &str) -> bool { ')' => result.push_str("$RP$"), ',' => result.push_str("$C$"), - // '.' doesn't occur in types and functions, so reuse it - // for ':' and '-' - '-' | ':' => result.push('.'), + '-' | ':' => if tcx.has_strict_asm_symbol_naming() { + // NVPTX doesn't support these characters in symbol names. + result.push('$') + } + else { + // '.' doesn't occur in types and functions, so reuse it + // for ':' and '-' + result.push('.') + }, + + '.' => if tcx.has_strict_asm_symbol_naming() { + result.push('$') + } + else { + result.push('.') + }, // These are legal symbols - 'a'..='z' | 'A'..='Z' | '0'..='9' | '_' | '.' | '$' => result.push(c), + 'a'..='z' | 'A'..='Z' | '0'..='9' | '_' | '$' => result.push(c), _ => { result.push('$'); diff --git a/src/librustc_target/spec/mod.rs b/src/librustc_target/spec/mod.rs index e47da3cff95..aeecce49b0c 100644 --- a/src/librustc_target/spec/mod.rs +++ b/src/librustc_target/spec/mod.rs @@ -75,6 +75,7 @@ pub enum LinkerFlavor { Ld, Msvc, Lld(LldFlavor), + PtxLinker, } #[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd, Hash, @@ -143,6 +144,7 @@ pub fn desc(&self) -> &str { ((LinkerFlavor::Gcc), "gcc"), ((LinkerFlavor::Ld), "ld"), ((LinkerFlavor::Msvc), "msvc"), + ((LinkerFlavor::PtxLinker), "ptx-linker"), ((LinkerFlavor::Lld(LldFlavor::Wasm)), "wasm-ld"), ((LinkerFlavor::Lld(LldFlavor::Ld64)), "ld64.lld"), ((LinkerFlavor::Lld(LldFlavor::Ld)), "ld.lld"), @@ -455,6 +457,8 @@ fn $module() { ("x86_64-fortanix-unknown-sgx", x86_64_fortanix_unknown_sgx), ("x86_64-unknown-uefi", x86_64_unknown_uefi), + + ("nvptx64-nvidia-cuda", nvptx64_nvidia_cuda), } /// Everything `rustc` knows about how to compile for a specific target. diff --git a/src/librustc_target/spec/nvptx64_nvidia_cuda.rs b/src/librustc_target/spec/nvptx64_nvidia_cuda.rs new file mode 100644 index 00000000000..ed5d0f24506 --- /dev/null +++ b/src/librustc_target/spec/nvptx64_nvidia_cuda.rs @@ -0,0 +1,73 @@ +use spec::{LinkerFlavor, Target, TargetOptions, TargetResult, PanicStrategy, MergeFunctions}; +use spec::abi::Abi; + +pub fn target() -> TargetResult { + Ok(Target { + arch: "nvptx64".to_string(), + data_layout: "e-i64:64-i128:128-v16:16-v32:32-n16:32:64".to_string(), + llvm_target: "nvptx64-nvidia-cuda".to_string(), + + target_os: "cuda".to_string(), + target_vendor: "nvidia".to_string(), + target_env: String::new(), + + linker_flavor: LinkerFlavor::PtxLinker, + + target_endian: "little".to_string(), + target_pointer_width: "64".to_string(), + target_c_int_width: "32".to_string(), + + options: TargetOptions { + // The linker can be installed from `crates.io`. + linker: Some("rust-ptx-linker".to_string()), + + // With `ptx-linker` approach, it can be later overriden via link flags. + cpu: "sm_20".to_string(), + + // TODO(denzp): create tests for the atomics. + max_atomic_width: Some(64), + + // Unwinding on CUDA is neither feasible nor useful. + panic_strategy: PanicStrategy::Abort, + + // Needed to use `dylib` and `bin` crate types and the linker. + dynamic_linking: true, + executables: true, + + // Avoid using dylib because it contain metadata not supported + // by LLVM NVPTX backend. + only_cdylib: true, + + // Let the `ptx-linker` to handle LLVM lowering into MC / assembly. + obj_is_bitcode: true, + + // Convinient and predicable naming scheme. + dll_prefix: "".to_string(), + dll_suffix: ".ptx".to_string(), + exe_suffix: ".ptx".to_string(), + + // Disable MergeFunctions LLVM optimisation pass because it can + // produce kernel functions that call other kernel functions. + // This behavior is not supported by PTX ISA. + merge_functions: MergeFunctions::Disabled, + + // TODO(denzp): enable compilation tests for the target and + // create the tests for this. + abi_blacklist: vec![ + Abi::Cdecl, + Abi::Stdcall, + Abi::Fastcall, + Abi::Vectorcall, + Abi::Thiscall, + Abi::Aapcs, + Abi::Win64, + Abi::SysV64, + Abi::Msp430Interrupt, + Abi::X86Interrupt, + Abi::AmdGpuKernel, + ], + + .. Default::default() + }, + }) +} diff --git a/src/test/run-make/nvptx-binary-crate/Makefile b/src/test/run-make/nvptx-binary-crate/Makefile new file mode 100644 index 00000000000..4c22dae265c --- /dev/null +++ b/src/test/run-make/nvptx-binary-crate/Makefile @@ -0,0 +1,9 @@ +-include ../../run-make-fulldeps/tools.mk + +ifeq ($(TARGET),nvptx64-nvidia-cuda) +all: + $(RUSTC) main.rs -Clink-arg=--arch=sm_60 --crate-type="bin" -O --target $(TARGET) + FileCheck main.rs --input-file $(TMPDIR)/main.ptx +else +all: +endif diff --git a/src/test/run-make/nvptx-binary-crate/main.rs b/src/test/run-make/nvptx-binary-crate/main.rs new file mode 100644 index 00000000000..826bc3a47bb --- /dev/null +++ b/src/test/run-make/nvptx-binary-crate/main.rs @@ -0,0 +1,28 @@ +#![no_std] +#![no_main] +#![deny(warnings)] +#![feature(abi_ptx, core_intrinsics)] + +// Check the overriden CUDA arch. +// CHECK: .target sm_60 +// CHECK: .address_size 64 + +// Verify that no extra function declarations are present. +// CHECK-NOT: .func + +// CHECK-LABEL: .visible .entry top_kernel( +#[no_mangle] +pub unsafe extern "ptx-kernel" fn top_kernel(a: *const u32, b: *mut u32) { + // CHECK: add.s32 %{{r[0-9]+}}, %{{r[0-9]+}}, 5; + *b = *a + 5; +} + +// Verify that no extra function definitions are there. +// CHECK-NOT: .func +// CHECK-NOT: .entry + +#[panic_handler] +unsafe fn breakpoint_panic_handler(_: &::core::panic::PanicInfo) -> ! { + core::intrinsics::breakpoint(); + core::hint::unreachable_unchecked(); +} diff --git a/src/test/run-make/nvptx-dylib-crate/Makefile b/src/test/run-make/nvptx-dylib-crate/Makefile new file mode 100644 index 00000000000..7284e9d1a7c --- /dev/null +++ b/src/test/run-make/nvptx-dylib-crate/Makefile @@ -0,0 +1,10 @@ +-include ../../run-make-fulldeps/tools.mk + +ifeq ($(TARGET),nvptx64-nvidia-cuda) +all: + $(RUSTC) dep.rs --crate-type="rlib" --target $(TARGET) + $(RUSTC) kernel.rs --crate-type="cdylib" -O --target $(TARGET) + FileCheck kernel.rs --input-file $(TMPDIR)/kernel.ptx +else +all: +endif diff --git a/src/test/run-make/nvptx-dylib-crate/dep.rs b/src/test/run-make/nvptx-dylib-crate/dep.rs new file mode 100644 index 00000000000..57f3ee87cdb --- /dev/null +++ b/src/test/run-make/nvptx-dylib-crate/dep.rs @@ -0,0 +1,14 @@ +#![no_std] +#![deny(warnings)] + +#[inline(never)] +#[no_mangle] +pub fn wrapping_external_fn(a: u32) -> u32 { + a.wrapping_mul(a) +} + +#[inline(never)] +#[no_mangle] +pub fn panicking_external_fn(a: u32) -> u32 { + a * a +} diff --git a/src/test/run-make/nvptx-dylib-crate/kernel.rs b/src/test/run-make/nvptx-dylib-crate/kernel.rs new file mode 100644 index 00000000000..a889e23018d --- /dev/null +++ b/src/test/run-make/nvptx-dylib-crate/kernel.rs @@ -0,0 +1,67 @@ +#![no_std] +#![deny(warnings)] +#![feature(abi_ptx, core_intrinsics)] + +extern crate dep; + +// Verify the default CUDA arch. +// CHECK: .target sm_20 +// CHECK: .address_size 64 + +// Make sure declarations are there. +// CHECK: .func (.param .b32 func_retval0) wrapping_external_fn +// CHECK: .func (.param .b32 func_retval0) panicking_external_fn +// CHECK: .func [[PANIC_HANDLER:_ZN4core9panicking5panic[a-zA-Z0-9]+]] +// CHECK: .func [[PANIC_FMT:_ZN4core9panicking9panic_fmt[a-zA-Z0-9]+]] + +// CHECK-LABEL: .visible .entry top_kernel( +#[no_mangle] +pub unsafe extern "ptx-kernel" fn top_kernel(a: *const u32, b: *mut u32) { + // CHECK: call.uni (retval0), + // CHECK-NEXT: wrapping_external_fn + // CHECK: ld.param.b32 %[[LHS:r[0-9]+]], [retval0+0]; + let lhs = dep::wrapping_external_fn(*a); + + // CHECK: call.uni (retval0), + // CHECK-NEXT: panicking_external_fn + // CHECK: ld.param.b32 %[[RHS:r[0-9]+]], [retval0+0]; + let rhs = dep::panicking_external_fn(*a); + + // CHECK: add.s32 %[[RES:r[0-9]+]], %[[RHS]], %[[LHS]]; + // CHECK: st.global.u32 [%{{rd[0-9]+}}], %[[RES]]; + *b = lhs + rhs; +} + +// Verify that external function bodies are available. +// CHECK-LABEL: .func (.param .b32 func_retval0) wrapping_external_fn +// CHECK: { +// CHECK: st.param.b32 [func_retval0+0], %{{r[0-9]+}}; +// CHECK: } + +// Also verify panic behavior. +// CHECK-LABEL: .func (.param .b32 func_retval0) panicking_external_fn +// CHECK: { +// CHECK: %{{p[0-9]+}} bra [[PANIC_LABEL:[a-zA-Z0-9_]+]]; +// CHECK: [[PANIC_LABEL]]: +// CHECK: call.uni +// CHECK: [[PANIC_HANDLER]] +// CHECK: } + +// Verify whether panic handler is present. +// CHECK: .func [[PANIC_HANDLER]]() +// CHECK: { +// CHECK: call.uni +// CHECK: [[PANIC_FMT]] +// CHECK: } + +// And finally, check the dummy panic formatter. +// CHECK: .func [[PANIC_FMT]]() +// CHECK: { +// CHECK: trap; +// CHECK: } + +#[panic_handler] +unsafe fn breakpoint_panic_handler(_: &::core::panic::PanicInfo) -> ! { + core::intrinsics::breakpoint(); + core::hint::unreachable_unchecked(); +} diff --git a/src/test/run-make/nvptx-emit-asm/Makefile b/src/test/run-make/nvptx-emit-asm/Makefile new file mode 100644 index 00000000000..e03601878bd --- /dev/null +++ b/src/test/run-make/nvptx-emit-asm/Makefile @@ -0,0 +1,9 @@ +-include ../../run-make-fulldeps/tools.mk + +ifeq ($(TARGET),nvptx64-nvidia-cuda) +all: + $(RUSTC) kernel.rs --crate-type="rlib" --emit asm,llvm-ir -O --target $(TARGET) + FileCheck kernel.rs --input-file $(TMPDIR)/kernel.s +else +all: +endif diff --git a/src/test/run-make/nvptx-emit-asm/kernel.rs b/src/test/run-make/nvptx-emit-asm/kernel.rs new file mode 100644 index 00000000000..070a6efd2d5 --- /dev/null +++ b/src/test/run-make/nvptx-emit-asm/kernel.rs @@ -0,0 +1,41 @@ +#![no_std] +#![deny(warnings)] +#![feature(abi_ptx)] + +// Verify the default CUDA arch. +// CHECK: .target sm_20 +// CHECK: .address_size 64 + +// Verify function name doesn't contain unacceaptable characters. +// CHECK: .func (.param .b32 func_retval0) [[IMPL_FN:_ZN[a-zA-Z0-9$_]+square[a-zA-Z0-9$_]+]] + +// CHECK-LABEL: .visible .entry top_kernel( +#[no_mangle] +pub unsafe extern "ptx-kernel" fn top_kernel(a: *const u32, b: *mut u32) { + // CHECK: call.uni (retval0), + // CHECK-NEXT: [[IMPL_FN]] + *b = deep::private::MyStruct::new(*a).square(); +} + +pub mod deep { + pub mod private { + pub struct MyStruct(T); + + impl MyStruct { + pub fn new(a: u32) -> Self { + MyStruct(a) + } + + #[inline(never)] + pub fn square(&self) -> u32 { + self.0.wrapping_mul(self.0) + } + } + } +} + +// Verify that external function bodies are available. +// CHECK: .func (.param .b32 func_retval0) [[IMPL_FN]] +// CHECK: { +// CHECK: mul.lo.s32 %{{r[0-9]+}}, %{{r[0-9]+}}, %{{r[0-9]+}} +// CHECK: }