llvm-project/llvm/test/CodeGen/NVPTX/dynamic-stackalloc-regression.ll
Alex MacLean 831592d617
[NVPTX] Fixup under-aligned dynamic alloca lowering (#139628)
The alignment on a ISD::DYNAMIC_STACKALLOC node may be 0 to indicate
that the default stack alignment should be used. Prior to this change,
we passed this alignment through unchanged leading to an error in
ptxas. Now, we use the stack-alignment in this case. Also did a little
cleanup while I'm here.
2025-05-13 09:56:41 -07:00

30 lines
1.0 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=nvptx64 -mattr=+ptx73 -mcpu=sm_52 | FileCheck %s
target triple = "nvptx64-nvidia-cuda"
define void @foo(i64 %a, ptr %p0, ptr %p1) {
; CHECK-LABEL: foo(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<10>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [foo_param_0];
; CHECK-NEXT: add.s64 %rd2, %rd1, 7;
; CHECK-NEXT: and.b64 %rd3, %rd2, -8;
; CHECK-NEXT: alloca.u64 %rd4, %rd3, 16;
; CHECK-NEXT: cvta.local.u64 %rd5, %rd4;
; CHECK-NEXT: ld.param.b64 %rd6, [foo_param_1];
; CHECK-NEXT: alloca.u64 %rd7, %rd3, 16;
; CHECK-NEXT: cvta.local.u64 %rd8, %rd7;
; CHECK-NEXT: ld.param.b64 %rd9, [foo_param_2];
; CHECK-NEXT: st.b64 [%rd6], %rd5;
; CHECK-NEXT: st.b64 [%rd9], %rd8;
; CHECK-NEXT: ret;
%b = alloca i8, i64 %a, align 16
%c = alloca i8, i64 %a, align 16
store ptr %b, ptr %p0, align 8
store ptr %c, ptr %p1, align 8
ret void
}