]> git.lizzy.rs Git - rust.git/commitdiff
Create `nvptx64-nvidia-cuda` target specification
authorDenys Zariaiev <denys.zariaiev@gmail.com>
Sat, 19 Jan 2019 20:59:34 +0000 (21:59 +0100)
committerDenys Zariaiev <denys.zariaiev@gmail.com>
Sun, 27 Jan 2019 15:04:09 +0000 (16:04 +0100)
16 files changed:
src/bootstrap/lib.rs
src/bootstrap/sanity.rs
src/librustc/ty/context.rs
src/librustc_codegen_ssa/back/link.rs
src/librustc_codegen_ssa/back/linker.rs
src/librustc_codegen_utils/lib.rs
src/librustc_codegen_utils/symbol_names.rs
src/librustc_target/spec/mod.rs
src/librustc_target/spec/nvptx64_nvidia_cuda.rs [new file with mode: 0644]
src/test/run-make/nvptx-binary-crate/Makefile [new file with mode: 0644]
src/test/run-make/nvptx-binary-crate/main.rs [new file with mode: 0644]
src/test/run-make/nvptx-dylib-crate/Makefile [new file with mode: 0644]
src/test/run-make/nvptx-dylib-crate/dep.rs [new file with mode: 0644]
src/test/run-make/nvptx-dylib-crate/kernel.rs [new file with mode: 0644]
src/test/run-make/nvptx-emit-asm/Makefile [new file with mode: 0644]
src/test/run-make/nvptx-emit-asm/kernel.rs [new file with mode: 0644]

index 37451a74dfad631819972c2a55068835fc38598b..b0bbf2395e2fc0db50ff26fe3a88b435f94d32bc 100644 (file)
@@ -831,6 +831,7 @@ fn linker(&self, target: Interned<String>) -> Option<&Path> {
                   !target.contains("msvc") &&
                   !target.contains("emscripten") &&
                   !target.contains("wasm32") &&
+                  !target.contains("nvptx") &&
                   !target.contains("fuchsia") {
             Some(self.cc(target))
         } else {
index fe547a6b151c229bda8408f143448fa7219cf56d..ff4fb85bbfad335b4a7d41b8e1bba4a41f36e40c 100644 (file)
@@ -156,7 +156,7 @@ pub fn check(build: &mut Build) {
             panic!("the iOS target is only supported on macOS");
         }
 
-        if target.contains("-none-") {
+        if target.contains("-none-") || target.contains("nvptx") {
             if build.no_std(*target).is_none() {
                 let target = build.config.target_config.entry(target.clone())
                     .or_default();
@@ -165,7 +165,7 @@ pub fn check(build: &mut Build) {
             }
 
             if build.no_std(*target) == Some(false) {
-                panic!("All the *-none-* targets are no-std targets")
+                panic!("All the *-none-* and nvptx* targets are no-std targets")
             }
         }
 
index 4c8f81411163c374a2c464815376fef0f41bf4db..c126bff42582a327974871bb1a5d815bf41fabbe 100644 (file)
@@ -1675,6 +1675,12 @@ pub fn is_bound_region_in_impl_item(
         }
         false
     }
+
+    /// Determine whether identifiers in the assembly have strict naming rules.
+    /// Currently, only NVPTX* targets need it.
+    pub fn has_strict_asm_symbol_naming(&self) -> bool {
+        self.gcx.sess.target.target.arch.contains("nvptx")
+    }
 }
 
 impl<'a, 'tcx> TyCtxt<'a, 'tcx, 'tcx> {
index d03bb0a3d73a46fbe4d6e809061abf4feb56f9cd..2a5ecf9a0593ff400a301a73c6e15371b8b1ee32 100644 (file)
@@ -149,6 +149,7 @@ fn infer_from(
                 LinkerFlavor::Ld => "ld",
                 LinkerFlavor::Msvc => "link.exe",
                 LinkerFlavor::Lld(_) => "lld",
+                LinkerFlavor::PtxLinker => "rust-ptx-linker",
             }), flavor)),
             (Some(linker), None) => {
                 let stem = if linker.extension().and_then(|ext| ext.to_str()) == Some("exe") {
index ad61f8f01d8d416692f70e678474e2b0ab15ce07..5e9aeed7107acbf2a208b3c878036702ab57e17b 100644 (file)
@@ -83,6 +83,10 @@ pub fn to_linker<'a>(
             LinkerFlavor::Lld(LldFlavor::Wasm) => {
                 Box::new(WasmLd::new(cmd, sess, self)) as Box<dyn Linker>
             }
+
+            LinkerFlavor::PtxLinker => {
+                Box::new(PtxLinker { cmd, sess }) as Box<dyn Linker>
+            }
         }
     }
 }
@@ -1080,3 +1084,124 @@ fn exported_symbols(tcx: TyCtxt, crate_type: CrateType) -> Vec<String> {
 
     symbols
 }
+
+/// Much simplified and explicit CLI for the NVPTX linker. The linker operates
+/// with bitcode and uses LLVM backend to generate a PTX assembly.
+pub struct PtxLinker<'a> {
+    cmd: Command,
+    sess: &'a Session,
+}
+
+impl<'a> Linker for PtxLinker<'a> {
+    fn link_rlib(&mut self, path: &Path) {
+        self.cmd.arg("--rlib").arg(path);
+    }
+
+    fn link_whole_rlib(&mut self, path: &Path) {
+        self.cmd.arg("--rlib").arg(path);
+    }
+
+    fn include_path(&mut self, path: &Path) {
+        self.cmd.arg("-L").arg(path);
+    }
+
+    fn debuginfo(&mut self) {
+        self.cmd.arg("--debug");
+    }
+
+    fn add_object(&mut self, path: &Path) {
+        self.cmd.arg("--bitcode").arg(path);
+    }
+
+    fn args(&mut self, args: &[String]) {
+        self.cmd.args(args);
+    }
+
+    fn optimize(&mut self) {
+        self.cmd.arg(match self.sess.opts.optimize {
+            OptLevel::No => "-O0",
+            OptLevel::Less => "-O1",
+            OptLevel::Default => "-O2",
+            OptLevel::Aggressive => "-O3",
+            OptLevel::Size => "-Os",
+            OptLevel::SizeMin => "-Os"
+        });
+    }
+
+    fn output_filename(&mut self, path: &Path) {
+        self.cmd.arg("-o").arg(path);
+    }
+
+    fn finalize(&mut self) -> Command {
+        ::std::mem::replace(&mut self.cmd, Command::new(""))
+    }
+
+    fn link_dylib(&mut self, _lib: &str) {
+        panic!("external dylibs not supported")
+    }
+
+    fn link_rust_dylib(&mut self, _lib: &str, _path: &Path) {
+        panic!("external dylibs not supported")
+    }
+
+    fn link_staticlib(&mut self, _lib: &str) {
+        panic!("staticlibs not supported")
+    }
+
+    fn link_whole_staticlib(&mut self, _lib: &str, _search_path: &[PathBuf]) {
+        panic!("staticlibs not supported")
+    }
+
+    fn framework_path(&mut self, _path: &Path) {
+        panic!("frameworks not supported")
+    }
+
+    fn link_framework(&mut self, _framework: &str) {
+        panic!("frameworks not supported")
+    }
+
+    fn position_independent_executable(&mut self) {
+    }
+
+    fn full_relro(&mut self) {
+    }
+
+    fn partial_relro(&mut self) {
+    }
+
+    fn no_relro(&mut self) {
+    }
+
+    fn build_static_executable(&mut self) {
+    }
+
+    fn gc_sections(&mut self, _keep_metadata: bool) {
+    }
+
+    fn pgo_gen(&mut self) {
+    }
+
+    fn no_default_libraries(&mut self) {
+    }
+
+    fn build_dylib(&mut self, _out_filename: &Path) {
+    }
+
+    fn export_symbols(&mut self, _tmpdir: &Path, _crate_type: CrateType) {
+    }
+
+    fn subsystem(&mut self, _subsystem: &str) {
+    }
+
+    fn no_position_independent_executable(&mut self) {
+    }
+
+    fn group_start(&mut self) {
+    }
+
+    fn group_end(&mut self) {
+    }
+
+    fn cross_lang_lto(&mut self) {
+    }
+}
index 1f590d46ed8c657bb18b48b173ce8c2b06a75366..8e96f98540117cfd0dc7eeead57b0ff34e8c753f 100644 (file)
@@ -12,6 +12,7 @@
 #![feature(nll)]
 #![allow(unused_attributes)]
 #![feature(rustc_diagnostic_macros)]
+#![feature(in_band_lifetimes)]
 
 #![recursion_limit="256"]
 
index 9267f14f2423456e7de2dc19a023d10b2b55a271..f2014f7421289460ea18d7b0f708656ba25b4b41 100644 (file)
 
 use syntax_pos::symbol::Symbol;
 
-use std::fmt::Write;
+use std::fmt::{self, Write};
 use std::mem::discriminant;
 
 pub fn provide(providers: &mut Providers) {
@@ -221,7 +221,7 @@ fn get_symbol_hash<'a, 'tcx>(
 }
 
 fn def_symbol_name<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, def_id: DefId) -> ty::SymbolName {
-    let mut buffer = SymbolPathBuffer::new();
+    let mut buffer = SymbolPathBuffer::new(tcx);
     item_path::with_forced_absolute_paths(|| {
         tcx.push_item_path(&mut buffer, def_id, false);
     });
@@ -317,7 +317,7 @@ fn compute_symbol_name<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, instance: Instance
 
     let hash = get_symbol_hash(tcx, def_id, instance, instance_ty, substs);
 
-    let mut buf = SymbolPathBuffer::from_interned(tcx.def_symbol_name(def_id));
+    let mut buf = SymbolPathBuffer::from_interned(tcx.def_symbol_name(def_id), tcx);
 
     if instance.is_vtable_shim() {
         buf.push("{{vtable-shim}}");
@@ -339,26 +339,28 @@ fn compute_symbol_name<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, instance: Instance
 //
 // To be able to work on all platforms and get *some* reasonable output, we
 // use C++ name-mangling.
-#[derive(Debug)]
-struct SymbolPathBuffer {
+struct SymbolPathBuffer<'a, 'tcx> {
+    tcx: TyCtxt<'a, 'tcx, 'tcx>,
     result: String,
     temp_buf: String,
 }
 
-impl SymbolPathBuffer {
-    fn new() -> Self {
+impl SymbolPathBuffer<'a, 'tcx> {
+    fn new(tcx: TyCtxt<'a, 'tcx, 'tcx>) -> Self {
         let mut result = SymbolPathBuffer {
             result: String::with_capacity(64),
             temp_buf: String::with_capacity(16),
+            tcx,
         };
         result.result.push_str("_ZN"); // _Z == Begin name-sequence, N == nested
         result
     }
 
-    fn from_interned(symbol: ty::SymbolName) -> Self {
+    fn from_interned(symbol: ty::SymbolName, tcx: TyCtxt<'a, 'tcx, 'tcx>) -> Self {
         let mut result = SymbolPathBuffer {
             result: String::with_capacity(64),
             temp_buf: String::with_capacity(16),
+            tcx,
         };
         result.result.push_str(&symbol.as_str());
         result
@@ -377,7 +379,7 @@ fn finish(mut self, hash: u64) -> String {
     }
 }
 
-impl ItemPathBuffer for SymbolPathBuffer {
+impl ItemPathBuffer for SymbolPathBuffer<'a, 'tcx> {
     fn root_mode(&self) -> &RootMode {
         const ABSOLUTE: &RootMode = &RootMode::Absolute;
         ABSOLUTE
@@ -385,7 +387,7 @@ fn root_mode(&self) -> &RootMode {
 
     fn push(&mut self, text: &str) {
         self.temp_buf.clear();
-        let need_underscore = sanitize(&mut self.temp_buf, text);
+        let need_underscore = sanitize(&mut self.temp_buf, text, self.tcx);
         let _ = write!(
             self.result,
             "{}",
@@ -398,12 +400,24 @@ fn push(&mut self, text: &str) {
     }
 }
 
+// Manual Debug implementation to omit non-Debug `tcx` field.
+impl fmt::Debug for SymbolPathBuffer<'_, '_> {
+    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+        fmt.debug_struct("SymbolPathBuffer")
+            .field("result", &self.result)
+            .field("temp_buf", &self.temp_buf)
+            .finish()
+    }
+}
+
 // Name sanitation. LLVM will happily accept identifiers with weird names, but
 // gas doesn't!
 // gas accepts the following characters in symbols: a-z, A-Z, 0-9, ., _, $
+// NVPTX assembly has more strict naming rules than gas, so additionally, dots
+// are replaced with '$' there.
 //
 // returns true if an underscore must be added at the start
-pub fn sanitize(result: &mut String, s: &str) -> bool {
+pub fn sanitize(result: &mut String, s: &str, tcx: TyCtxt<'a, 'tcx, 'tcx>) -> bool {
     for c in s.chars() {
         match c {
             // Escape these with $ sequences
@@ -416,12 +430,25 @@ pub fn sanitize(result: &mut String, s: &str) -> bool {
             ')' => result.push_str("$RP$"),
             ',' => result.push_str("$C$"),
 
-            // '.' doesn't occur in types and functions, so reuse it
-            // for ':' and '-'
-            '-' | ':' => result.push('.'),
+            '-' | ':' => if tcx.has_strict_asm_symbol_naming() {
+                // NVPTX doesn't support these characters in symbol names.
+                result.push('$')
+            }
+            else {
+                // '.' doesn't occur in types and functions, so reuse it
+                // for ':' and '-'
+                result.push('.')
+            },
+
+            '.' => if tcx.has_strict_asm_symbol_naming() {
+                result.push('$')
+            }
+            else {
+                result.push('.')
+            },
 
             // These are legal symbols
-            'a'..='z' | 'A'..='Z' | '0'..='9' | '_' | '.' | '$' => result.push(c),
+            'a'..='z' | 'A'..='Z' | '0'..='9' | '_' | '$' => result.push(c),
 
             _ => {
                 result.push('$');
index e47da3cff95b695e2545a8b28bcc5f20724cfab9..aeecce49b0c67689c1e07638e288581ebc4d04fa 100644 (file)
@@ -75,6 +75,7 @@ pub enum LinkerFlavor {
     Ld,
     Msvc,
     Lld(LldFlavor),
+    PtxLinker,
 }
 
 #[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd, Hash,
@@ -143,6 +144,7 @@ pub fn desc(&self) -> &str {
     ((LinkerFlavor::Gcc), "gcc"),
     ((LinkerFlavor::Ld), "ld"),
     ((LinkerFlavor::Msvc), "msvc"),
+    ((LinkerFlavor::PtxLinker), "ptx-linker"),
     ((LinkerFlavor::Lld(LldFlavor::Wasm)), "wasm-ld"),
     ((LinkerFlavor::Lld(LldFlavor::Ld64)), "ld64.lld"),
     ((LinkerFlavor::Lld(LldFlavor::Ld)), "ld.lld"),
@@ -455,6 +457,8 @@ fn $module() {
     ("x86_64-fortanix-unknown-sgx", x86_64_fortanix_unknown_sgx),
 
     ("x86_64-unknown-uefi", x86_64_unknown_uefi),
+
+    ("nvptx64-nvidia-cuda", nvptx64_nvidia_cuda),
 }
 
 /// Everything `rustc` knows about how to compile for a specific target.
diff --git a/src/librustc_target/spec/nvptx64_nvidia_cuda.rs b/src/librustc_target/spec/nvptx64_nvidia_cuda.rs
new file mode 100644 (file)
index 0000000..ed5d0f2
--- /dev/null
@@ -0,0 +1,73 @@
+use spec::{LinkerFlavor, Target, TargetOptions, TargetResult, PanicStrategy, MergeFunctions};
+use spec::abi::Abi;
+
+pub fn target() -> TargetResult {
+    Ok(Target {
+        arch: "nvptx64".to_string(),
+        data_layout: "e-i64:64-i128:128-v16:16-v32:32-n16:32:64".to_string(),
+        llvm_target: "nvptx64-nvidia-cuda".to_string(),
+
+        target_os: "cuda".to_string(),
+        target_vendor: "nvidia".to_string(),
+        target_env: String::new(),
+
+        linker_flavor: LinkerFlavor::PtxLinker,
+
+        target_endian: "little".to_string(),
+        target_pointer_width: "64".to_string(),
+        target_c_int_width: "32".to_string(),
+
+        options: TargetOptions {
+            // The linker can be installed from `crates.io`.
+            linker: Some("rust-ptx-linker".to_string()),
+
+            // With `ptx-linker` approach, it can be later overriden via link flags.
+            cpu: "sm_20".to_string(),
+
+            // TODO(denzp): create tests for the atomics.
+            max_atomic_width: Some(64),
+
+            // Unwinding on CUDA is neither feasible nor useful.
+            panic_strategy: PanicStrategy::Abort,
+
+            // Needed to use `dylib` and `bin` crate types and the linker.
+            dynamic_linking: true,
+            executables: true,
+
+            // Avoid using dylib because it contain metadata not supported
+            // by LLVM NVPTX backend.
+            only_cdylib: true,
+
+            // Let the `ptx-linker` to handle LLVM lowering into MC / assembly.
+            obj_is_bitcode: true,
+
+            // Convinient and predicable naming scheme.
+            dll_prefix: "".to_string(),
+            dll_suffix: ".ptx".to_string(),
+            exe_suffix: ".ptx".to_string(),
+
+            // Disable MergeFunctions LLVM optimisation pass because it can
+            // produce kernel functions that call other kernel functions.
+            // This behavior is not supported by PTX ISA.
+            merge_functions: MergeFunctions::Disabled,
+
+            // TODO(denzp): enable compilation tests for the target and
+            // create the tests for this.
+            abi_blacklist: vec![
+                Abi::Cdecl,
+                Abi::Stdcall,
+                Abi::Fastcall,
+                Abi::Vectorcall,
+                Abi::Thiscall,
+                Abi::Aapcs,
+                Abi::Win64,
+                Abi::SysV64,
+                Abi::Msp430Interrupt,
+                Abi::X86Interrupt,
+                Abi::AmdGpuKernel,
+            ],
+
+            .. Default::default()
+        },
+    })
+}
diff --git a/src/test/run-make/nvptx-binary-crate/Makefile b/src/test/run-make/nvptx-binary-crate/Makefile
new file mode 100644 (file)
index 0000000..4c22dae
--- /dev/null
@@ -0,0 +1,9 @@
+-include ../../run-make-fulldeps/tools.mk
+
+ifeq ($(TARGET),nvptx64-nvidia-cuda)
+all:
+       $(RUSTC) main.rs -Clink-arg=--arch=sm_60 --crate-type="bin" -O --target $(TARGET)
+       FileCheck main.rs --input-file $(TMPDIR)/main.ptx
+else
+all:
+endif
diff --git a/src/test/run-make/nvptx-binary-crate/main.rs b/src/test/run-make/nvptx-binary-crate/main.rs
new file mode 100644 (file)
index 0000000..826bc3a
--- /dev/null
@@ -0,0 +1,28 @@
+#![no_std]
+#![no_main]
+#![deny(warnings)]
+#![feature(abi_ptx, core_intrinsics)]
+
+// Check the overriden CUDA arch.
+// CHECK: .target sm_60
+// CHECK: .address_size 64
+
+// Verify that no extra function declarations are present.
+// CHECK-NOT: .func
+
+// CHECK-LABEL: .visible .entry top_kernel(
+#[no_mangle]
+pub unsafe extern "ptx-kernel" fn top_kernel(a: *const u32, b: *mut u32) {
+    // CHECK: add.s32 %{{r[0-9]+}}, %{{r[0-9]+}}, 5;
+    *b = *a + 5;
+}
+
+// Verify that no extra function definitions are there.
+// CHECK-NOT: .func
+// CHECK-NOT: .entry
+
+#[panic_handler]
+unsafe fn breakpoint_panic_handler(_: &::core::panic::PanicInfo) -> ! {
+    core::intrinsics::breakpoint();
+    core::hint::unreachable_unchecked();
+}
diff --git a/src/test/run-make/nvptx-dylib-crate/Makefile b/src/test/run-make/nvptx-dylib-crate/Makefile
new file mode 100644 (file)
index 0000000..7284e9d
--- /dev/null
@@ -0,0 +1,10 @@
+-include ../../run-make-fulldeps/tools.mk
+
+ifeq ($(TARGET),nvptx64-nvidia-cuda)
+all:
+       $(RUSTC) dep.rs --crate-type="rlib" --target $(TARGET)
+       $(RUSTC) kernel.rs --crate-type="cdylib" -O --target $(TARGET)
+       FileCheck kernel.rs --input-file $(TMPDIR)/kernel.ptx
+else
+all:
+endif
diff --git a/src/test/run-make/nvptx-dylib-crate/dep.rs b/src/test/run-make/nvptx-dylib-crate/dep.rs
new file mode 100644 (file)
index 0000000..57f3ee8
--- /dev/null
@@ -0,0 +1,14 @@
+#![no_std]
+#![deny(warnings)]
+
+#[inline(never)]
+#[no_mangle]
+pub fn wrapping_external_fn(a: u32) -> u32 {
+    a.wrapping_mul(a)
+}
+
+#[inline(never)]
+#[no_mangle]
+pub fn panicking_external_fn(a: u32) -> u32 {
+    a * a
+}
diff --git a/src/test/run-make/nvptx-dylib-crate/kernel.rs b/src/test/run-make/nvptx-dylib-crate/kernel.rs
new file mode 100644 (file)
index 0000000..a889e23
--- /dev/null
@@ -0,0 +1,67 @@
+#![no_std]
+#![deny(warnings)]
+#![feature(abi_ptx, core_intrinsics)]
+
+extern crate dep;
+
+// Verify the default CUDA arch.
+// CHECK: .target sm_20
+// CHECK: .address_size 64
+
+// Make sure declarations are there.
+// CHECK: .func (.param .b32 func_retval0) wrapping_external_fn
+// CHECK: .func (.param .b32 func_retval0) panicking_external_fn
+// CHECK: .func [[PANIC_HANDLER:_ZN4core9panicking5panic[a-zA-Z0-9]+]]
+// CHECK: .func [[PANIC_FMT:_ZN4core9panicking9panic_fmt[a-zA-Z0-9]+]]
+
+// CHECK-LABEL: .visible .entry top_kernel(
+#[no_mangle]
+pub unsafe extern "ptx-kernel" fn top_kernel(a: *const u32, b: *mut u32) {
+    // CHECK:      call.uni (retval0),
+    // CHECK-NEXT: wrapping_external_fn
+    // CHECK:      ld.param.b32 %[[LHS:r[0-9]+]], [retval0+0];
+    let lhs = dep::wrapping_external_fn(*a);
+
+    // CHECK:      call.uni (retval0),
+    // CHECK-NEXT: panicking_external_fn
+    // CHECK:      ld.param.b32 %[[RHS:r[0-9]+]], [retval0+0];
+    let rhs = dep::panicking_external_fn(*a);
+
+    // CHECK: add.s32 %[[RES:r[0-9]+]], %[[RHS]], %[[LHS]];
+    // CHECK: st.global.u32 [%{{rd[0-9]+}}], %[[RES]];
+    *b = lhs + rhs;
+}
+
+// Verify that external function bodies are available.
+// CHECK-LABEL: .func (.param .b32 func_retval0) wrapping_external_fn
+// CHECK: {
+// CHECK:   st.param.b32 [func_retval0+0], %{{r[0-9]+}};
+// CHECK: }
+
+// Also verify panic behavior.
+// CHECK-LABEL: .func (.param .b32 func_retval0) panicking_external_fn
+// CHECK: {
+// CHECK:   %{{p[0-9]+}} bra [[PANIC_LABEL:[a-zA-Z0-9_]+]];
+// CHECK: [[PANIC_LABEL]]:
+// CHECK:   call.uni
+// CHECK:   [[PANIC_HANDLER]]
+// CHECK: }
+
+// Verify whether panic handler is present.
+// CHECK: .func [[PANIC_HANDLER]]()
+// CHECK: {
+// CHECK:   call.uni
+// CHECK:   [[PANIC_FMT]]
+// CHECK: }
+
+// And finally, check the dummy panic formatter.
+// CHECK: .func [[PANIC_FMT]]()
+// CHECK: {
+// CHECK:   trap;
+// CHECK: }
+
+#[panic_handler]
+unsafe fn breakpoint_panic_handler(_: &::core::panic::PanicInfo) -> ! {
+    core::intrinsics::breakpoint();
+    core::hint::unreachable_unchecked();
+}
diff --git a/src/test/run-make/nvptx-emit-asm/Makefile b/src/test/run-make/nvptx-emit-asm/Makefile
new file mode 100644 (file)
index 0000000..e036018
--- /dev/null
@@ -0,0 +1,9 @@
+-include ../../run-make-fulldeps/tools.mk
+
+ifeq ($(TARGET),nvptx64-nvidia-cuda)
+all:
+       $(RUSTC) kernel.rs --crate-type="rlib" --emit asm,llvm-ir -O --target $(TARGET)
+       FileCheck kernel.rs --input-file $(TMPDIR)/kernel.s
+else
+all:
+endif
diff --git a/src/test/run-make/nvptx-emit-asm/kernel.rs b/src/test/run-make/nvptx-emit-asm/kernel.rs
new file mode 100644 (file)
index 0000000..070a6ef
--- /dev/null
@@ -0,0 +1,41 @@
+#![no_std]
+#![deny(warnings)]
+#![feature(abi_ptx)]
+
+// Verify the default CUDA arch.
+// CHECK: .target sm_20
+// CHECK: .address_size 64
+
+// Verify function name doesn't contain unacceaptable characters.
+// CHECK: .func (.param .b32 func_retval0) [[IMPL_FN:_ZN[a-zA-Z0-9$_]+square[a-zA-Z0-9$_]+]]
+
+// CHECK-LABEL: .visible .entry top_kernel(
+#[no_mangle]
+pub unsafe extern "ptx-kernel" fn top_kernel(a: *const u32, b: *mut u32) {
+    // CHECK:      call.uni (retval0),
+    // CHECK-NEXT: [[IMPL_FN]]
+    *b = deep::private::MyStruct::new(*a).square();
+}
+
+pub mod deep {
+    pub mod private {
+        pub struct MyStruct<T>(T);
+
+        impl MyStruct<u32> {
+            pub fn new(a: u32) -> Self {
+                MyStruct(a)
+            }
+
+            #[inline(never)]
+            pub fn square(&self) -> u32 {
+                self.0.wrapping_mul(self.0)
+            }
+        }
+    }
+}
+
+// Verify that external function bodies are available.
+// CHECK: .func (.param .b32 func_retval0) [[IMPL_FN]]
+// CHECK: {
+// CHECK:   mul.lo.s32 %{{r[0-9]+}}, %{{r[0-9]+}}, %{{r[0-9]+}}
+// CHECK: }