!target.contains("msvc") &&
!target.contains("emscripten") &&
!target.contains("wasm32") &&
+ !target.contains("nvptx") &&
!target.contains("fuchsia") {
Some(self.cc(target))
} else {
panic!("the iOS target is only supported on macOS");
}
- if target.contains("-none-") {
+ if target.contains("-none-") || target.contains("nvptx") {
if build.no_std(*target).is_none() {
let target = build.config.target_config.entry(target.clone())
.or_default();
}
if build.no_std(*target) == Some(false) {
- panic!("All the *-none-* targets are no-std targets")
+ panic!("All the *-none-* and nvptx* targets are no-std targets")
}
}
}
false
}
+
+ /// Determine whether identifiers in the assembly have strict naming rules.
+ /// Currently, only NVPTX* targets need it.
+ pub fn has_strict_asm_symbol_naming(&self) -> bool {
+ self.gcx.sess.target.target.arch.contains("nvptx")
+ }
}
impl<'a, 'tcx> TyCtxt<'a, 'tcx, 'tcx> {
LinkerFlavor::Ld => "ld",
LinkerFlavor::Msvc => "link.exe",
LinkerFlavor::Lld(_) => "lld",
+ LinkerFlavor::PtxLinker => "rust-ptx-linker",
}), flavor)),
(Some(linker), None) => {
let stem = if linker.extension().and_then(|ext| ext.to_str()) == Some("exe") {
LinkerFlavor::Lld(LldFlavor::Wasm) => {
Box::new(WasmLd::new(cmd, sess, self)) as Box<dyn Linker>
}
+
+ LinkerFlavor::PtxLinker => {
+ Box::new(PtxLinker { cmd, sess }) as Box<dyn Linker>
+ }
}
}
}
symbols
}
+
+/// Much simplified and explicit CLI for the NVPTX linker. The linker operates
+/// with bitcode and uses LLVM backend to generate a PTX assembly.
+pub struct PtxLinker<'a> {
+ cmd: Command,
+ sess: &'a Session,
+}
+
+impl<'a> Linker for PtxLinker<'a> {
+ fn link_rlib(&mut self, path: &Path) {
+ self.cmd.arg("--rlib").arg(path);
+ }
+
+ fn link_whole_rlib(&mut self, path: &Path) {
+ self.cmd.arg("--rlib").arg(path);
+ }
+
+ fn include_path(&mut self, path: &Path) {
+ self.cmd.arg("-L").arg(path);
+ }
+
+ fn debuginfo(&mut self) {
+ self.cmd.arg("--debug");
+ }
+
+ fn add_object(&mut self, path: &Path) {
+ self.cmd.arg("--bitcode").arg(path);
+ }
+
+ fn args(&mut self, args: &[String]) {
+ self.cmd.args(args);
+ }
+
+ fn optimize(&mut self) {
+ self.cmd.arg(match self.sess.opts.optimize {
+ OptLevel::No => "-O0",
+ OptLevel::Less => "-O1",
+ OptLevel::Default => "-O2",
+ OptLevel::Aggressive => "-O3",
+ OptLevel::Size => "-Os",
+ OptLevel::SizeMin => "-Os"
+ });
+ }
+
+ fn output_filename(&mut self, path: &Path) {
+ self.cmd.arg("-o").arg(path);
+ }
+
+ fn finalize(&mut self) -> Command {
+ ::std::mem::replace(&mut self.cmd, Command::new(""))
+ }
+
+ fn link_dylib(&mut self, _lib: &str) {
+ panic!("external dylibs not supported")
+ }
+
+ fn link_rust_dylib(&mut self, _lib: &str, _path: &Path) {
+ panic!("external dylibs not supported")
+ }
+
+ fn link_staticlib(&mut self, _lib: &str) {
+ panic!("staticlibs not supported")
+ }
+
+ fn link_whole_staticlib(&mut self, _lib: &str, _search_path: &[PathBuf]) {
+ panic!("staticlibs not supported")
+ }
+
+ fn framework_path(&mut self, _path: &Path) {
+ panic!("frameworks not supported")
+ }
+
+ fn link_framework(&mut self, _framework: &str) {
+ panic!("frameworks not supported")
+ }
+
+ fn position_independent_executable(&mut self) {
+ }
+
+ fn full_relro(&mut self) {
+ }
+
+ fn partial_relro(&mut self) {
+ }
+
+ fn no_relro(&mut self) {
+ }
+
+ fn build_static_executable(&mut self) {
+ }
+
+ fn gc_sections(&mut self, _keep_metadata: bool) {
+ }
+
+ fn pgo_gen(&mut self) {
+ }
+
+ fn no_default_libraries(&mut self) {
+ }
+
+ fn build_dylib(&mut self, _out_filename: &Path) {
+ }
+
+ fn export_symbols(&mut self, _tmpdir: &Path, _crate_type: CrateType) {
+ }
+
+ fn subsystem(&mut self, _subsystem: &str) {
+ }
+
+ fn no_position_independent_executable(&mut self) {
+ }
+
+ fn group_start(&mut self) {
+ }
+
+ fn group_end(&mut self) {
+ }
+
+ fn cross_lang_lto(&mut self) {
+ }
+}
#![feature(nll)]
#![allow(unused_attributes)]
#![feature(rustc_diagnostic_macros)]
+#![feature(in_band_lifetimes)]
#![recursion_limit="256"]
use syntax_pos::symbol::Symbol;
-use std::fmt::Write;
+use std::fmt::{self, Write};
use std::mem::discriminant;
pub fn provide(providers: &mut Providers) {
}
fn def_symbol_name<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, def_id: DefId) -> ty::SymbolName {
- let mut buffer = SymbolPathBuffer::new();
+ let mut buffer = SymbolPathBuffer::new(tcx);
item_path::with_forced_absolute_paths(|| {
tcx.push_item_path(&mut buffer, def_id, false);
});
let hash = get_symbol_hash(tcx, def_id, instance, instance_ty, substs);
- let mut buf = SymbolPathBuffer::from_interned(tcx.def_symbol_name(def_id));
+ let mut buf = SymbolPathBuffer::from_interned(tcx.def_symbol_name(def_id), tcx);
if instance.is_vtable_shim() {
buf.push("{{vtable-shim}}");
//
// To be able to work on all platforms and get *some* reasonable output, we
// use C++ name-mangling.
-#[derive(Debug)]
-struct SymbolPathBuffer {
+struct SymbolPathBuffer<'a, 'tcx> {
+ tcx: TyCtxt<'a, 'tcx, 'tcx>,
result: String,
temp_buf: String,
}
-impl SymbolPathBuffer {
- fn new() -> Self {
+impl SymbolPathBuffer<'a, 'tcx> {
+ fn new(tcx: TyCtxt<'a, 'tcx, 'tcx>) -> Self {
let mut result = SymbolPathBuffer {
result: String::with_capacity(64),
temp_buf: String::with_capacity(16),
+ tcx,
};
result.result.push_str("_ZN"); // _Z == Begin name-sequence, N == nested
result
}
- fn from_interned(symbol: ty::SymbolName) -> Self {
+ fn from_interned(symbol: ty::SymbolName, tcx: TyCtxt<'a, 'tcx, 'tcx>) -> Self {
let mut result = SymbolPathBuffer {
result: String::with_capacity(64),
temp_buf: String::with_capacity(16),
+ tcx,
};
result.result.push_str(&symbol.as_str());
result
}
}
-impl ItemPathBuffer for SymbolPathBuffer {
+impl ItemPathBuffer for SymbolPathBuffer<'a, 'tcx> {
fn root_mode(&self) -> &RootMode {
const ABSOLUTE: &RootMode = &RootMode::Absolute;
ABSOLUTE
fn push(&mut self, text: &str) {
self.temp_buf.clear();
- let need_underscore = sanitize(&mut self.temp_buf, text);
+ let need_underscore = sanitize(&mut self.temp_buf, text, self.tcx);
let _ = write!(
self.result,
"{}",
}
}
+// Manual Debug implementation to omit non-Debug `tcx` field.
+impl fmt::Debug for SymbolPathBuffer<'_, '_> {
+ fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+ fmt.debug_struct("SymbolPathBuffer")
+ .field("result", &self.result)
+ .field("temp_buf", &self.temp_buf)
+ .finish()
+ }
+}
+
// Name sanitation. LLVM will happily accept identifiers with weird names, but
// gas doesn't!
// gas accepts the following characters in symbols: a-z, A-Z, 0-9, ., _, $
+// NVPTX assembly has more strict naming rules than gas, so additionally, dots
+// are replaced with '$' there.
//
// returns true if an underscore must be added at the start
-pub fn sanitize(result: &mut String, s: &str) -> bool {
+pub fn sanitize(result: &mut String, s: &str, tcx: TyCtxt<'a, 'tcx, 'tcx>) -> bool {
for c in s.chars() {
match c {
// Escape these with $ sequences
')' => result.push_str("$RP$"),
',' => result.push_str("$C$"),
- // '.' doesn't occur in types and functions, so reuse it
- // for ':' and '-'
- '-' | ':' => result.push('.'),
+ '-' | ':' => if tcx.has_strict_asm_symbol_naming() {
+ // NVPTX doesn't support these characters in symbol names.
+ result.push('$')
+ }
+ else {
+ // '.' doesn't occur in types and functions, so reuse it
+ // for ':' and '-'
+ result.push('.')
+ },
+
+ '.' => if tcx.has_strict_asm_symbol_naming() {
+ result.push('$')
+ }
+ else {
+ result.push('.')
+ },
// These are legal symbols
- 'a'..='z' | 'A'..='Z' | '0'..='9' | '_' | '.' | '$' => result.push(c),
+ 'a'..='z' | 'A'..='Z' | '0'..='9' | '_' | '$' => result.push(c),
_ => {
result.push('$');
Ld,
Msvc,
Lld(LldFlavor),
+ PtxLinker,
}
#[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd, Hash,
((LinkerFlavor::Gcc), "gcc"),
((LinkerFlavor::Ld), "ld"),
((LinkerFlavor::Msvc), "msvc"),
+ ((LinkerFlavor::PtxLinker), "ptx-linker"),
((LinkerFlavor::Lld(LldFlavor::Wasm)), "wasm-ld"),
((LinkerFlavor::Lld(LldFlavor::Ld64)), "ld64.lld"),
((LinkerFlavor::Lld(LldFlavor::Ld)), "ld.lld"),
("x86_64-fortanix-unknown-sgx", x86_64_fortanix_unknown_sgx),
("x86_64-unknown-uefi", x86_64_unknown_uefi),
+
+ ("nvptx64-nvidia-cuda", nvptx64_nvidia_cuda),
}
/// Everything `rustc` knows about how to compile for a specific target.
--- /dev/null
+use spec::{LinkerFlavor, Target, TargetOptions, TargetResult, PanicStrategy, MergeFunctions};
+use spec::abi::Abi;
+
+pub fn target() -> TargetResult {
+ Ok(Target {
+ arch: "nvptx64".to_string(),
+ data_layout: "e-i64:64-i128:128-v16:16-v32:32-n16:32:64".to_string(),
+ llvm_target: "nvptx64-nvidia-cuda".to_string(),
+
+ target_os: "cuda".to_string(),
+ target_vendor: "nvidia".to_string(),
+ target_env: String::new(),
+
+ linker_flavor: LinkerFlavor::PtxLinker,
+
+ target_endian: "little".to_string(),
+ target_pointer_width: "64".to_string(),
+ target_c_int_width: "32".to_string(),
+
+ options: TargetOptions {
+ // The linker can be installed from `crates.io`.
+ linker: Some("rust-ptx-linker".to_string()),
+
+ // With `ptx-linker` approach, it can be later overriden via link flags.
+ cpu: "sm_20".to_string(),
+
+ // TODO(denzp): create tests for the atomics.
+ max_atomic_width: Some(64),
+
+ // Unwinding on CUDA is neither feasible nor useful.
+ panic_strategy: PanicStrategy::Abort,
+
+ // Needed to use `dylib` and `bin` crate types and the linker.
+ dynamic_linking: true,
+ executables: true,
+
+ // Avoid using dylib because it contain metadata not supported
+ // by LLVM NVPTX backend.
+ only_cdylib: true,
+
+ // Let the `ptx-linker` to handle LLVM lowering into MC / assembly.
+ obj_is_bitcode: true,
+
+ // Convinient and predicable naming scheme.
+ dll_prefix: "".to_string(),
+ dll_suffix: ".ptx".to_string(),
+ exe_suffix: ".ptx".to_string(),
+
+ // Disable MergeFunctions LLVM optimisation pass because it can
+ // produce kernel functions that call other kernel functions.
+ // This behavior is not supported by PTX ISA.
+ merge_functions: MergeFunctions::Disabled,
+
+ // TODO(denzp): enable compilation tests for the target and
+ // create the tests for this.
+ abi_blacklist: vec![
+ Abi::Cdecl,
+ Abi::Stdcall,
+ Abi::Fastcall,
+ Abi::Vectorcall,
+ Abi::Thiscall,
+ Abi::Aapcs,
+ Abi::Win64,
+ Abi::SysV64,
+ Abi::Msp430Interrupt,
+ Abi::X86Interrupt,
+ Abi::AmdGpuKernel,
+ ],
+
+ .. Default::default()
+ },
+ })
+}
--- /dev/null
+-include ../../run-make-fulldeps/tools.mk
+
+ifeq ($(TARGET),nvptx64-nvidia-cuda)
+all:
+ $(RUSTC) main.rs -Clink-arg=--arch=sm_60 --crate-type="bin" -O --target $(TARGET)
+ FileCheck main.rs --input-file $(TMPDIR)/main.ptx
+else
+all:
+endif
--- /dev/null
+#![no_std]
+#![no_main]
+#![deny(warnings)]
+#![feature(abi_ptx, core_intrinsics)]
+
+// Check the overriden CUDA arch.
+// CHECK: .target sm_60
+// CHECK: .address_size 64
+
+// Verify that no extra function declarations are present.
+// CHECK-NOT: .func
+
+// CHECK-LABEL: .visible .entry top_kernel(
+#[no_mangle]
+pub unsafe extern "ptx-kernel" fn top_kernel(a: *const u32, b: *mut u32) {
+ // CHECK: add.s32 %{{r[0-9]+}}, %{{r[0-9]+}}, 5;
+ *b = *a + 5;
+}
+
+// Verify that no extra function definitions are there.
+// CHECK-NOT: .func
+// CHECK-NOT: .entry
+
+#[panic_handler]
+unsafe fn breakpoint_panic_handler(_: &::core::panic::PanicInfo) -> ! {
+ core::intrinsics::breakpoint();
+ core::hint::unreachable_unchecked();
+}
--- /dev/null
+-include ../../run-make-fulldeps/tools.mk
+
+ifeq ($(TARGET),nvptx64-nvidia-cuda)
+all:
+ $(RUSTC) dep.rs --crate-type="rlib" --target $(TARGET)
+ $(RUSTC) kernel.rs --crate-type="cdylib" -O --target $(TARGET)
+ FileCheck kernel.rs --input-file $(TMPDIR)/kernel.ptx
+else
+all:
+endif
--- /dev/null
+#![no_std]
+#![deny(warnings)]
+
+#[inline(never)]
+#[no_mangle]
+pub fn wrapping_external_fn(a: u32) -> u32 {
+ a.wrapping_mul(a)
+}
+
+#[inline(never)]
+#[no_mangle]
+pub fn panicking_external_fn(a: u32) -> u32 {
+ a * a
+}
--- /dev/null
+#![no_std]
+#![deny(warnings)]
+#![feature(abi_ptx, core_intrinsics)]
+
+extern crate dep;
+
+// Verify the default CUDA arch.
+// CHECK: .target sm_20
+// CHECK: .address_size 64
+
+// Make sure declarations are there.
+// CHECK: .func (.param .b32 func_retval0) wrapping_external_fn
+// CHECK: .func (.param .b32 func_retval0) panicking_external_fn
+// CHECK: .func [[PANIC_HANDLER:_ZN4core9panicking5panic[a-zA-Z0-9]+]]
+// CHECK: .func [[PANIC_FMT:_ZN4core9panicking9panic_fmt[a-zA-Z0-9]+]]
+
+// CHECK-LABEL: .visible .entry top_kernel(
+#[no_mangle]
+pub unsafe extern "ptx-kernel" fn top_kernel(a: *const u32, b: *mut u32) {
+ // CHECK: call.uni (retval0),
+ // CHECK-NEXT: wrapping_external_fn
+ // CHECK: ld.param.b32 %[[LHS:r[0-9]+]], [retval0+0];
+ let lhs = dep::wrapping_external_fn(*a);
+
+ // CHECK: call.uni (retval0),
+ // CHECK-NEXT: panicking_external_fn
+ // CHECK: ld.param.b32 %[[RHS:r[0-9]+]], [retval0+0];
+ let rhs = dep::panicking_external_fn(*a);
+
+ // CHECK: add.s32 %[[RES:r[0-9]+]], %[[RHS]], %[[LHS]];
+ // CHECK: st.global.u32 [%{{rd[0-9]+}}], %[[RES]];
+ *b = lhs + rhs;
+}
+
+// Verify that external function bodies are available.
+// CHECK-LABEL: .func (.param .b32 func_retval0) wrapping_external_fn
+// CHECK: {
+// CHECK: st.param.b32 [func_retval0+0], %{{r[0-9]+}};
+// CHECK: }
+
+// Also verify panic behavior.
+// CHECK-LABEL: .func (.param .b32 func_retval0) panicking_external_fn
+// CHECK: {
+// CHECK: %{{p[0-9]+}} bra [[PANIC_LABEL:[a-zA-Z0-9_]+]];
+// CHECK: [[PANIC_LABEL]]:
+// CHECK: call.uni
+// CHECK: [[PANIC_HANDLER]]
+// CHECK: }
+
+// Verify whether panic handler is present.
+// CHECK: .func [[PANIC_HANDLER]]()
+// CHECK: {
+// CHECK: call.uni
+// CHECK: [[PANIC_FMT]]
+// CHECK: }
+
+// And finally, check the dummy panic formatter.
+// CHECK: .func [[PANIC_FMT]]()
+// CHECK: {
+// CHECK: trap;
+// CHECK: }
+
+#[panic_handler]
+unsafe fn breakpoint_panic_handler(_: &::core::panic::PanicInfo) -> ! {
+ core::intrinsics::breakpoint();
+ core::hint::unreachable_unchecked();
+}
--- /dev/null
+-include ../../run-make-fulldeps/tools.mk
+
+ifeq ($(TARGET),nvptx64-nvidia-cuda)
+all:
+ $(RUSTC) kernel.rs --crate-type="rlib" --emit asm,llvm-ir -O --target $(TARGET)
+ FileCheck kernel.rs --input-file $(TMPDIR)/kernel.s
+else
+all:
+endif
--- /dev/null
+#![no_std]
+#![deny(warnings)]
+#![feature(abi_ptx)]
+
+// Verify the default CUDA arch.
+// CHECK: .target sm_20
+// CHECK: .address_size 64
+
+// Verify function name doesn't contain unacceaptable characters.
+// CHECK: .func (.param .b32 func_retval0) [[IMPL_FN:_ZN[a-zA-Z0-9$_]+square[a-zA-Z0-9$_]+]]
+
+// CHECK-LABEL: .visible .entry top_kernel(
+#[no_mangle]
+pub unsafe extern "ptx-kernel" fn top_kernel(a: *const u32, b: *mut u32) {
+ // CHECK: call.uni (retval0),
+ // CHECK-NEXT: [[IMPL_FN]]
+ *b = deep::private::MyStruct::new(*a).square();
+}
+
+pub mod deep {
+ pub mod private {
+ pub struct MyStruct<T>(T);
+
+ impl MyStruct<u32> {
+ pub fn new(a: u32) -> Self {
+ MyStruct(a)
+ }
+
+ #[inline(never)]
+ pub fn square(&self) -> u32 {
+ self.0.wrapping_mul(self.0)
+ }
+ }
+ }
+}
+
+// Verify that external function bodies are available.
+// CHECK: .func (.param .b32 func_retval0) [[IMPL_FN]]
+// CHECK: {
+// CHECK: mul.lo.s32 %{{r[0-9]+}}, %{{r[0-9]+}}, %{{r[0-9]+}}
+// CHECK: }