Reland #154439. Reverted with #155914. Account for: - Windows `ptxas` outputting error messages to `stdout` instead of `stderr` - Tests in `llvm/test/DebugInfo/NVPTX`
95 lines
3.9 KiB
LLVM
95 lines
3.9 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: not llc < %s -mtriple=nvptx -mattr=+ptx72 -mcpu=sm_52 2>&1 | FileCheck %s --check-prefixes=CHECK-FAILS
|
|
; RUN: not llc < %s -mtriple=nvptx -mattr=+ptx73 -mcpu=sm_50 2>&1 | FileCheck %s --check-prefixes=CHECK-FAILS
|
|
|
|
; RUN: llc < %s -mtriple=nvptx -mattr=+ptx73 -mcpu=sm_52 | FileCheck %s --check-prefixes=CHECK-32
|
|
; RUN: llc < %s -mtriple=nvptx64 -mattr=+ptx73 -mcpu=sm_52 | FileCheck %s --check-prefixes=CHECK-64
|
|
; RUN: %if ptxas-isa-7.3 && ptxas-ptr32 %{ llc < %s -mtriple=nvptx -mattr=+ptx73 -mcpu=sm_52 | %ptxas-verify %}
|
|
; RUN: %if ptxas-isa-7.3 %{ llc < %s -mtriple=nvptx64 -mattr=+ptx73 -mcpu=sm_52 | %ptxas-verify %}
|
|
|
|
; CHECK-FAILS: in function test_dynamic_stackalloc{{.*}}: Support for dynamic alloca introduced in PTX ISA version 7.3 and requires target sm_52.
|
|
|
|
define i32 @test_dynamic_stackalloc(i64 %n) {
|
|
; CHECK-32-LABEL: test_dynamic_stackalloc(
|
|
; CHECK-32: {
|
|
; CHECK-32-NEXT: .reg .b32 %r<7>;
|
|
; CHECK-32-EMPTY:
|
|
; CHECK-32-NEXT: // %bb.0:
|
|
; CHECK-32-NEXT: ld.param.b32 %r1, [test_dynamic_stackalloc_param_0];
|
|
; CHECK-32-NEXT: add.s32 %r2, %r1, 7;
|
|
; CHECK-32-NEXT: and.b32 %r3, %r2, -8;
|
|
; CHECK-32-NEXT: alloca.u32 %r4, %r3, 16;
|
|
; CHECK-32-NEXT: cvta.local.u32 %r5, %r4;
|
|
; CHECK-32-NEXT: { // callseq 0, 0
|
|
; CHECK-32-NEXT: .param .b32 param0;
|
|
; CHECK-32-NEXT: .param .b32 retval0;
|
|
; CHECK-32-NEXT: st.param.b32 [param0], %r5;
|
|
; CHECK-32-NEXT: call.uni (retval0), bar, (param0);
|
|
; CHECK-32-NEXT: ld.param.b32 %r6, [retval0];
|
|
; CHECK-32-NEXT: } // callseq 0
|
|
; CHECK-32-NEXT: st.param.b32 [func_retval0], %r6;
|
|
; CHECK-32-NEXT: ret;
|
|
;
|
|
; CHECK-64-LABEL: test_dynamic_stackalloc(
|
|
; CHECK-64: {
|
|
; CHECK-64-NEXT: .reg .b32 %r<2>;
|
|
; CHECK-64-NEXT: .reg .b64 %rd<6>;
|
|
; CHECK-64-EMPTY:
|
|
; CHECK-64-NEXT: // %bb.0:
|
|
; CHECK-64-NEXT: ld.param.b64 %rd1, [test_dynamic_stackalloc_param_0];
|
|
; CHECK-64-NEXT: add.s64 %rd2, %rd1, 7;
|
|
; CHECK-64-NEXT: and.b64 %rd3, %rd2, -8;
|
|
; CHECK-64-NEXT: alloca.u64 %rd4, %rd3, 16;
|
|
; CHECK-64-NEXT: cvta.local.u64 %rd5, %rd4;
|
|
; CHECK-64-NEXT: { // callseq 0, 0
|
|
; CHECK-64-NEXT: .param .b64 param0;
|
|
; CHECK-64-NEXT: .param .b32 retval0;
|
|
; CHECK-64-NEXT: st.param.b64 [param0], %rd5;
|
|
; CHECK-64-NEXT: call.uni (retval0), bar, (param0);
|
|
; CHECK-64-NEXT: ld.param.b32 %r1, [retval0];
|
|
; CHECK-64-NEXT: } // callseq 0
|
|
; CHECK-64-NEXT: st.param.b32 [func_retval0], %r1;
|
|
; CHECK-64-NEXT: ret;
|
|
%alloca = alloca i8, i64 %n, align 16
|
|
%call = call i32 @bar(ptr %alloca)
|
|
ret i32 %call
|
|
}
|
|
|
|
define float @test_dynamic_stackalloc_unaligned(i64 %0) {
|
|
; CHECK-32-LABEL: test_dynamic_stackalloc_unaligned(
|
|
; CHECK-32: {
|
|
; CHECK-32-NEXT: .reg .b32 %r<7>;
|
|
; CHECK-32-EMPTY:
|
|
; CHECK-32-NEXT: // %bb.0:
|
|
; CHECK-32-NEXT: ld.param.b32 %r1, [test_dynamic_stackalloc_unaligned_param_0];
|
|
; CHECK-32-NEXT: shl.b32 %r2, %r1, 2;
|
|
; CHECK-32-NEXT: add.s32 %r3, %r2, 7;
|
|
; CHECK-32-NEXT: and.b32 %r4, %r3, -8;
|
|
; CHECK-32-NEXT: alloca.u32 %r5, %r4, 8;
|
|
; CHECK-32-NEXT: ld.local.b32 %r6, [%r5];
|
|
; CHECK-32-NEXT: st.param.b32 [func_retval0], %r6;
|
|
; CHECK-32-NEXT: ret;
|
|
;
|
|
; CHECK-64-LABEL: test_dynamic_stackalloc_unaligned(
|
|
; CHECK-64: {
|
|
; CHECK-64-NEXT: .reg .b32 %r<2>;
|
|
; CHECK-64-NEXT: .reg .b64 %rd<6>;
|
|
; CHECK-64-EMPTY:
|
|
; CHECK-64-NEXT: // %bb.0:
|
|
; CHECK-64-NEXT: ld.param.b64 %rd1, [test_dynamic_stackalloc_unaligned_param_0];
|
|
; CHECK-64-NEXT: shl.b64 %rd2, %rd1, 2;
|
|
; CHECK-64-NEXT: add.s64 %rd3, %rd2, 7;
|
|
; CHECK-64-NEXT: and.b64 %rd4, %rd3, -8;
|
|
; CHECK-64-NEXT: alloca.u64 %rd5, %rd4, 8;
|
|
; CHECK-64-NEXT: ld.local.b32 %r1, [%rd5];
|
|
; CHECK-64-NEXT: st.param.b32 [func_retval0], %r1;
|
|
; CHECK-64-NEXT: ret;
|
|
%4 = alloca float, i64 %0, align 4
|
|
%5 = getelementptr float, ptr %4, i64 0
|
|
%6 = load float, ptr %5, align 4
|
|
ret float %6
|
|
}
|
|
|
|
declare i32 @bar(ptr)
|
|
|