
These classes are redundant, as the untyped "Int" classes can be used for all float operations. This change is intended to be as minimal as possible and leaves the many potential simplifications and refactors this exposes as future work.
163 lines
4.7 KiB
LLVM
163 lines
4.7 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32
|
|
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64
|
|
; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
|
|
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
|
|
|
|
|
|
;; i8
|
|
define i8 @ld_global_i8(ptr addrspace(0) %ptr) {
|
|
; PTX32-LABEL: ld_global_i8(
|
|
; PTX32: {
|
|
; PTX32-NEXT: .reg .b32 %r<3>;
|
|
; PTX32-EMPTY:
|
|
; PTX32-NEXT: // %bb.0:
|
|
; PTX32-NEXT: ld.param.b32 %r1, [ld_global_i8_param_0];
|
|
; PTX32-NEXT: ld.b8 %r2, [%r1];
|
|
; PTX32-NEXT: st.param.b32 [func_retval0], %r2;
|
|
; PTX32-NEXT: ret;
|
|
;
|
|
; PTX64-LABEL: ld_global_i8(
|
|
; PTX64: {
|
|
; PTX64-NEXT: .reg .b32 %r<2>;
|
|
; PTX64-NEXT: .reg .b64 %rd<2>;
|
|
; PTX64-EMPTY:
|
|
; PTX64-NEXT: // %bb.0:
|
|
; PTX64-NEXT: ld.param.b64 %rd1, [ld_global_i8_param_0];
|
|
; PTX64-NEXT: ld.b8 %r1, [%rd1];
|
|
; PTX64-NEXT: st.param.b32 [func_retval0], %r1;
|
|
; PTX64-NEXT: ret;
|
|
%a = load i8, ptr addrspace(0) %ptr
|
|
ret i8 %a
|
|
}
|
|
|
|
;; i16
|
|
define i16 @ld_global_i16(ptr addrspace(0) %ptr) {
|
|
; PTX32-LABEL: ld_global_i16(
|
|
; PTX32: {
|
|
; PTX32-NEXT: .reg .b32 %r<3>;
|
|
; PTX32-EMPTY:
|
|
; PTX32-NEXT: // %bb.0:
|
|
; PTX32-NEXT: ld.param.b32 %r1, [ld_global_i16_param_0];
|
|
; PTX32-NEXT: ld.b16 %r2, [%r1];
|
|
; PTX32-NEXT: st.param.b32 [func_retval0], %r2;
|
|
; PTX32-NEXT: ret;
|
|
;
|
|
; PTX64-LABEL: ld_global_i16(
|
|
; PTX64: {
|
|
; PTX64-NEXT: .reg .b32 %r<2>;
|
|
; PTX64-NEXT: .reg .b64 %rd<2>;
|
|
; PTX64-EMPTY:
|
|
; PTX64-NEXT: // %bb.0:
|
|
; PTX64-NEXT: ld.param.b64 %rd1, [ld_global_i16_param_0];
|
|
; PTX64-NEXT: ld.b16 %r1, [%rd1];
|
|
; PTX64-NEXT: st.param.b32 [func_retval0], %r1;
|
|
; PTX64-NEXT: ret;
|
|
%a = load i16, ptr addrspace(0) %ptr
|
|
ret i16 %a
|
|
}
|
|
|
|
;; i32
|
|
define i32 @ld_global_i32(ptr addrspace(0) %ptr) {
|
|
; PTX32-LABEL: ld_global_i32(
|
|
; PTX32: {
|
|
; PTX32-NEXT: .reg .b32 %r<3>;
|
|
; PTX32-EMPTY:
|
|
; PTX32-NEXT: // %bb.0:
|
|
; PTX32-NEXT: ld.param.b32 %r1, [ld_global_i32_param_0];
|
|
; PTX32-NEXT: ld.b32 %r2, [%r1];
|
|
; PTX32-NEXT: st.param.b32 [func_retval0], %r2;
|
|
; PTX32-NEXT: ret;
|
|
;
|
|
; PTX64-LABEL: ld_global_i32(
|
|
; PTX64: {
|
|
; PTX64-NEXT: .reg .b32 %r<2>;
|
|
; PTX64-NEXT: .reg .b64 %rd<2>;
|
|
; PTX64-EMPTY:
|
|
; PTX64-NEXT: // %bb.0:
|
|
; PTX64-NEXT: ld.param.b64 %rd1, [ld_global_i32_param_0];
|
|
; PTX64-NEXT: ld.b32 %r1, [%rd1];
|
|
; PTX64-NEXT: st.param.b32 [func_retval0], %r1;
|
|
; PTX64-NEXT: ret;
|
|
%a = load i32, ptr addrspace(0) %ptr
|
|
ret i32 %a
|
|
}
|
|
|
|
;; i64
|
|
define i64 @ld_global_i64(ptr addrspace(0) %ptr) {
|
|
; PTX32-LABEL: ld_global_i64(
|
|
; PTX32: {
|
|
; PTX32-NEXT: .reg .b32 %r<2>;
|
|
; PTX32-NEXT: .reg .b64 %rd<2>;
|
|
; PTX32-EMPTY:
|
|
; PTX32-NEXT: // %bb.0:
|
|
; PTX32-NEXT: ld.param.b32 %r1, [ld_global_i64_param_0];
|
|
; PTX32-NEXT: ld.b64 %rd1, [%r1];
|
|
; PTX32-NEXT: st.param.b64 [func_retval0], %rd1;
|
|
; PTX32-NEXT: ret;
|
|
;
|
|
; PTX64-LABEL: ld_global_i64(
|
|
; PTX64: {
|
|
; PTX64-NEXT: .reg .b64 %rd<3>;
|
|
; PTX64-EMPTY:
|
|
; PTX64-NEXT: // %bb.0:
|
|
; PTX64-NEXT: ld.param.b64 %rd1, [ld_global_i64_param_0];
|
|
; PTX64-NEXT: ld.b64 %rd2, [%rd1];
|
|
; PTX64-NEXT: st.param.b64 [func_retval0], %rd2;
|
|
; PTX64-NEXT: ret;
|
|
%a = load i64, ptr addrspace(0) %ptr
|
|
ret i64 %a
|
|
}
|
|
|
|
;; f32
|
|
define float @ld_global_f32(ptr addrspace(0) %ptr) {
|
|
; PTX32-LABEL: ld_global_f32(
|
|
; PTX32: {
|
|
; PTX32-NEXT: .reg .b32 %r<3>;
|
|
; PTX32-EMPTY:
|
|
; PTX32-NEXT: // %bb.0:
|
|
; PTX32-NEXT: ld.param.b32 %r1, [ld_global_f32_param_0];
|
|
; PTX32-NEXT: ld.b32 %r2, [%r1];
|
|
; PTX32-NEXT: st.param.b32 [func_retval0], %r2;
|
|
; PTX32-NEXT: ret;
|
|
;
|
|
; PTX64-LABEL: ld_global_f32(
|
|
; PTX64: {
|
|
; PTX64-NEXT: .reg .b32 %r<2>;
|
|
; PTX64-NEXT: .reg .b64 %rd<2>;
|
|
; PTX64-EMPTY:
|
|
; PTX64-NEXT: // %bb.0:
|
|
; PTX64-NEXT: ld.param.b64 %rd1, [ld_global_f32_param_0];
|
|
; PTX64-NEXT: ld.b32 %r1, [%rd1];
|
|
; PTX64-NEXT: st.param.b32 [func_retval0], %r1;
|
|
; PTX64-NEXT: ret;
|
|
%a = load float, ptr addrspace(0) %ptr
|
|
ret float %a
|
|
}
|
|
|
|
;; f64
|
|
define double @ld_global_f64(ptr addrspace(0) %ptr) {
|
|
; PTX32-LABEL: ld_global_f64(
|
|
; PTX32: {
|
|
; PTX32-NEXT: .reg .b32 %r<2>;
|
|
; PTX32-NEXT: .reg .b64 %rd<2>;
|
|
; PTX32-EMPTY:
|
|
; PTX32-NEXT: // %bb.0:
|
|
; PTX32-NEXT: ld.param.b32 %r1, [ld_global_f64_param_0];
|
|
; PTX32-NEXT: ld.b64 %rd1, [%r1];
|
|
; PTX32-NEXT: st.param.b64 [func_retval0], %rd1;
|
|
; PTX32-NEXT: ret;
|
|
;
|
|
; PTX64-LABEL: ld_global_f64(
|
|
; PTX64: {
|
|
; PTX64-NEXT: .reg .b64 %rd<3>;
|
|
; PTX64-EMPTY:
|
|
; PTX64-NEXT: // %bb.0:
|
|
; PTX64-NEXT: ld.param.b64 %rd1, [ld_global_f64_param_0];
|
|
; PTX64-NEXT: ld.b64 %rd2, [%rd1];
|
|
; PTX64-NEXT: st.param.b64 [func_retval0], %rd2;
|
|
; PTX64-NEXT: ret;
|
|
%a = load double, ptr addrspace(0) %ptr
|
|
ret double %a
|
|
}
|