Him188 77af9d1023
[AArch64][GlobalISel] Implement selectVaStartAAPCS (#106979)
This commit adds the missing support for varargs in the instruction
selection pass for AAPCS. Previously we only implemented this for
Darwin.

The implementation was according to AAPCS and SelectionDAG's
LowerAAPCS_VASTART.

It resolves all VA_START fallbacks in RAJAperf, llvm-test-suite, and
SPEC CPU2017. These benchmarks now compile and pass without fallbacks
due to varargs.

---------

Co-authored-by: Madhur Amilkanthwar <madhura@nvidia.com>
2024-09-19 11:48:14 +05:30

385 lines
14 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-unknown-linux-gnu -O0 -global-isel=0 -o - < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc -mtriple=aarch64-unknown-linux-gnu -O0 -global-isel=1 -global-isel-abort=1 -o - < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
%struct.__va_list = type { ptr, ptr, ptr, i32, i32 }
declare void @llvm.va_start(ptr) nounwind
declare void @llvm.va_end(ptr) nounwind
declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)
declare void @llvm.va_start.p0(ptr)
declare void @llvm.va_end.p0(ptr)
declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
define i64 @vararg(...) #0 {
; CHECK-SD-LABEL: vararg:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: sub sp, sp, #224
; CHECK-SD-NEXT: .cfi_def_cfa_offset 224
; CHECK-SD-NEXT: stp x29, x30, [sp, #208] // 16-byte Folded Spill
; CHECK-SD-NEXT: add x29, sp, #208
; CHECK-SD-NEXT: .cfi_def_cfa w29, 16
; CHECK-SD-NEXT: .cfi_offset w30, -8
; CHECK-SD-NEXT: .cfi_offset w29, -16
; CHECK-SD-NEXT: str q7, [sp, #112]
; CHECK-SD-NEXT: str q6, [sp, #96]
; CHECK-SD-NEXT: str q5, [sp, #80]
; CHECK-SD-NEXT: str q4, [sp, #64]
; CHECK-SD-NEXT: str q3, [sp, #48]
; CHECK-SD-NEXT: str q2, [sp, #32]
; CHECK-SD-NEXT: str q1, [sp, #16]
; CHECK-SD-NEXT: str q0, [sp]
; CHECK-SD-NEXT: stur x7, [x29, #-16]
; CHECK-SD-NEXT: stur x6, [x29, #-24]
; CHECK-SD-NEXT: stur x5, [x29, #-32]
; CHECK-SD-NEXT: stur x4, [x29, #-40]
; CHECK-SD-NEXT: stur x3, [x29, #-48]
; CHECK-SD-NEXT: stur x2, [x29, #-56]
; CHECK-SD-NEXT: stur x1, [x29, #-64]
; CHECK-SD-NEXT: stur x0, [x29, #-72]
; CHECK-SD-NEXT: mov w8, #-128 // =0xffffff80
; CHECK-SD-NEXT: str w8, [x29, #20]
; CHECK-SD-NEXT: mov w8, #-64 // =0xffffffc0
; CHECK-SD-NEXT: str w8, [x29, #16]
; CHECK-SD-NEXT: add x8, x29, #16
; CHECK-SD-NEXT: stur x8, [x29, #-8]
; CHECK-SD-NEXT: mov x8, sp
; CHECK-SD-NEXT: add x8, x8, #128
; CHECK-SD-NEXT: str x8, [x29, #8]
; CHECK-SD-NEXT: sub x8, x29, #72
; CHECK-SD-NEXT: add x8, x8, #64
; CHECK-SD-NEXT: str x8, [x29]
; CHECK-SD-NEXT: mov w8, #1 // =0x1
; CHECK-SD-NEXT: mov w0, w8
; CHECK-SD-NEXT: .cfi_def_cfa wsp, 224
; CHECK-SD-NEXT: ldp x29, x30, [sp, #208] // 16-byte Folded Reload
; CHECK-SD-NEXT: add sp, sp, #224
; CHECK-SD-NEXT: .cfi_def_cfa_offset 0
; CHECK-SD-NEXT: .cfi_restore w30
; CHECK-SD-NEXT: .cfi_restore w29
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: vararg:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: sub sp, sp, #224
; CHECK-GI-NEXT: .cfi_def_cfa_offset 224
; CHECK-GI-NEXT: stp x29, x30, [sp, #208] // 16-byte Folded Spill
; CHECK-GI-NEXT: add x29, sp, #208
; CHECK-GI-NEXT: .cfi_def_cfa w29, 16
; CHECK-GI-NEXT: .cfi_offset w30, -8
; CHECK-GI-NEXT: .cfi_offset w29, -16
; CHECK-GI-NEXT: stur x0, [x29, #-64]
; CHECK-GI-NEXT: stur x1, [x29, #-56]
; CHECK-GI-NEXT: stur x2, [x29, #-48]
; CHECK-GI-NEXT: stur x3, [x29, #-40]
; CHECK-GI-NEXT: stur x4, [x29, #-32]
; CHECK-GI-NEXT: stur x5, [x29, #-24]
; CHECK-GI-NEXT: stur x6, [x29, #-16]
; CHECK-GI-NEXT: stur x7, [x29, #-8]
; CHECK-GI-NEXT: str q0, [sp, #16]
; CHECK-GI-NEXT: str q1, [sp, #32]
; CHECK-GI-NEXT: str q2, [sp, #48]
; CHECK-GI-NEXT: str q3, [sp, #64]
; CHECK-GI-NEXT: str q4, [sp, #80]
; CHECK-GI-NEXT: str q5, [sp, #96]
; CHECK-GI-NEXT: str q6, [sp, #112]
; CHECK-GI-NEXT: str q7, [sp, #128]
; CHECK-GI-NEXT: add x9, sp, #8
; CHECK-GI-NEXT: add x8, x29, #16
; CHECK-GI-NEXT: str x8, [x9]
; CHECK-GI-NEXT: add x8, x29, #0
; CHECK-GI-NEXT: str x8, [x9, #8]
; CHECK-GI-NEXT: add x8, sp, #144
; CHECK-GI-NEXT: str x8, [x9, #16]
; CHECK-GI-NEXT: mov w8, #-64 // =0xffffffc0
; CHECK-GI-NEXT: str w8, [x9, #24]
; CHECK-GI-NEXT: mov w8, #-128 // =0xffffff80
; CHECK-GI-NEXT: str w8, [x9, #28]
; CHECK-GI-NEXT: mov w8, #1 // =0x1
; CHECK-GI-NEXT: mov w0, w8
; CHECK-GI-NEXT: .cfi_def_cfa wsp, 224
; CHECK-GI-NEXT: ldp x29, x30, [sp, #208] // 16-byte Folded Reload
; CHECK-GI-NEXT: add sp, sp, #224
; CHECK-GI-NEXT: .cfi_def_cfa_offset 0
; CHECK-GI-NEXT: .cfi_restore w30
; CHECK-GI-NEXT: .cfi_restore w29
; CHECK-GI-NEXT: ret
entry:
%g = alloca ptr, align 4
call void @llvm.va_start(ptr %g)
ret i64 1
}
define i64 @vararg_many_gpr(i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, i64 %a7, ...) #0 {
; CHECK-SD-LABEL: vararg_many_gpr:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: sub sp, sp, #160
; CHECK-SD-NEXT: .cfi_def_cfa_offset 160
; CHECK-SD-NEXT: stp x29, x30, [sp, #144] // 16-byte Folded Spill
; CHECK-SD-NEXT: add x29, sp, #144
; CHECK-SD-NEXT: .cfi_def_cfa w29, 16
; CHECK-SD-NEXT: .cfi_offset w30, -8
; CHECK-SD-NEXT: .cfi_offset w29, -16
; CHECK-SD-NEXT: str q7, [sp, #112]
; CHECK-SD-NEXT: str q6, [sp, #96]
; CHECK-SD-NEXT: str q5, [sp, #80]
; CHECK-SD-NEXT: str q4, [sp, #64]
; CHECK-SD-NEXT: str q3, [sp, #48]
; CHECK-SD-NEXT: str q2, [sp, #32]
; CHECK-SD-NEXT: str q1, [sp, #16]
; CHECK-SD-NEXT: str q0, [sp]
; CHECK-SD-NEXT: stur x7, [x29, #-16]
; CHECK-SD-NEXT: mov w8, #-128 // =0xffffff80
; CHECK-SD-NEXT: str w8, [x29, #20]
; CHECK-SD-NEXT: mov w8, #-8 // =0xfffffff8
; CHECK-SD-NEXT: str w8, [x29, #16]
; CHECK-SD-NEXT: add x8, x29, #16
; CHECK-SD-NEXT: stur x8, [x29, #-8]
; CHECK-SD-NEXT: mov x8, sp
; CHECK-SD-NEXT: add x8, x8, #128
; CHECK-SD-NEXT: str x8, [x29, #8]
; CHECK-SD-NEXT: sub x8, x29, #16
; CHECK-SD-NEXT: add x8, x8, #8
; CHECK-SD-NEXT: str x8, [x29]
; CHECK-SD-NEXT: mov w8, #1 // =0x1
; CHECK-SD-NEXT: mov w0, w8
; CHECK-SD-NEXT: .cfi_def_cfa wsp, 160
; CHECK-SD-NEXT: ldp x29, x30, [sp, #144] // 16-byte Folded Reload
; CHECK-SD-NEXT: add sp, sp, #160
; CHECK-SD-NEXT: .cfi_def_cfa_offset 0
; CHECK-SD-NEXT: .cfi_restore w30
; CHECK-SD-NEXT: .cfi_restore w29
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: vararg_many_gpr:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: sub sp, sp, #176
; CHECK-GI-NEXT: .cfi_def_cfa_offset 176
; CHECK-GI-NEXT: stp x29, x30, [sp, #160] // 16-byte Folded Spill
; CHECK-GI-NEXT: add x29, sp, #160
; CHECK-GI-NEXT: .cfi_def_cfa w29, 16
; CHECK-GI-NEXT: .cfi_offset w30, -8
; CHECK-GI-NEXT: .cfi_offset w29, -16
; CHECK-GI-NEXT: stur x7, [x29, #-8]
; CHECK-GI-NEXT: str q0, [sp, #16]
; CHECK-GI-NEXT: str q1, [sp, #32]
; CHECK-GI-NEXT: str q2, [sp, #48]
; CHECK-GI-NEXT: str q3, [sp, #64]
; CHECK-GI-NEXT: str q4, [sp, #80]
; CHECK-GI-NEXT: str q5, [sp, #96]
; CHECK-GI-NEXT: str q6, [sp, #112]
; CHECK-GI-NEXT: str q7, [sp, #128]
; CHECK-GI-NEXT: add x9, sp, #8
; CHECK-GI-NEXT: add x8, x29, #16
; CHECK-GI-NEXT: str x8, [x9]
; CHECK-GI-NEXT: add x8, x29, #0
; CHECK-GI-NEXT: str x8, [x9, #8]
; CHECK-GI-NEXT: add x8, sp, #144
; CHECK-GI-NEXT: str x8, [x9, #16]
; CHECK-GI-NEXT: mov w8, #-8 // =0xfffffff8
; CHECK-GI-NEXT: str w8, [x9, #24]
; CHECK-GI-NEXT: mov w8, #-128 // =0xffffff80
; CHECK-GI-NEXT: str w8, [x9, #28]
; CHECK-GI-NEXT: mov w8, #1 // =0x1
; CHECK-GI-NEXT: mov w0, w8
; CHECK-GI-NEXT: .cfi_def_cfa wsp, 176
; CHECK-GI-NEXT: ldp x29, x30, [sp, #160] // 16-byte Folded Reload
; CHECK-GI-NEXT: add sp, sp, #176
; CHECK-GI-NEXT: .cfi_def_cfa_offset 0
; CHECK-GI-NEXT: .cfi_restore w30
; CHECK-GI-NEXT: .cfi_restore w29
; CHECK-GI-NEXT: ret
entry:
%g = alloca ptr, align 4
call void @llvm.va_start(ptr %g)
ret i64 1
}
define i64 @vararg_many_float(float %a1, float %a2, float %a3, float %a4, float %a5, float %a6, float %a7, ...) #0 {
; CHECK-SD-LABEL: vararg_many_float:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: sub sp, sp, #112
; CHECK-SD-NEXT: .cfi_def_cfa_offset 112
; CHECK-SD-NEXT: stp x29, x30, [sp, #96] // 16-byte Folded Spill
; CHECK-SD-NEXT: add x29, sp, #96
; CHECK-SD-NEXT: .cfi_def_cfa w29, 16
; CHECK-SD-NEXT: .cfi_offset w30, -8
; CHECK-SD-NEXT: .cfi_offset w29, -16
; CHECK-SD-NEXT: str q7, [sp]
; CHECK-SD-NEXT: str x7, [sp, #80]
; CHECK-SD-NEXT: str x6, [sp, #72]
; CHECK-SD-NEXT: str x5, [sp, #64]
; CHECK-SD-NEXT: str x4, [sp, #56]
; CHECK-SD-NEXT: str x3, [sp, #48]
; CHECK-SD-NEXT: str x2, [sp, #40]
; CHECK-SD-NEXT: str x1, [sp, #32]
; CHECK-SD-NEXT: str x0, [sp, #24]
; CHECK-SD-NEXT: mov w8, #-16 // =0xfffffff0
; CHECK-SD-NEXT: str w8, [x29, #20]
; CHECK-SD-NEXT: mov w8, #-64 // =0xffffffc0
; CHECK-SD-NEXT: str w8, [x29, #16]
; CHECK-SD-NEXT: add x8, x29, #16
; CHECK-SD-NEXT: stur x8, [x29, #-8]
; CHECK-SD-NEXT: mov x8, sp
; CHECK-SD-NEXT: add x8, x8, #16
; CHECK-SD-NEXT: str x8, [x29, #8]
; CHECK-SD-NEXT: add x8, sp, #24
; CHECK-SD-NEXT: add x8, x8, #64
; CHECK-SD-NEXT: str x8, [x29]
; CHECK-SD-NEXT: mov w8, #1 // =0x1
; CHECK-SD-NEXT: mov w0, w8
; CHECK-SD-NEXT: .cfi_def_cfa wsp, 112
; CHECK-SD-NEXT: ldp x29, x30, [sp, #96] // 16-byte Folded Reload
; CHECK-SD-NEXT: add sp, sp, #112
; CHECK-SD-NEXT: .cfi_def_cfa_offset 0
; CHECK-SD-NEXT: .cfi_restore w30
; CHECK-SD-NEXT: .cfi_restore w29
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: vararg_many_float:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: sub sp, sp, #112
; CHECK-GI-NEXT: .cfi_def_cfa_offset 112
; CHECK-GI-NEXT: stp x29, x30, [sp, #96] // 16-byte Folded Spill
; CHECK-GI-NEXT: add x29, sp, #96
; CHECK-GI-NEXT: .cfi_def_cfa w29, 16
; CHECK-GI-NEXT: .cfi_offset w30, -8
; CHECK-GI-NEXT: .cfi_offset w29, -16
; CHECK-GI-NEXT: str x0, [sp, #32]
; CHECK-GI-NEXT: str x1, [sp, #40]
; CHECK-GI-NEXT: str x2, [sp, #48]
; CHECK-GI-NEXT: str x3, [sp, #56]
; CHECK-GI-NEXT: str x4, [sp, #64]
; CHECK-GI-NEXT: str x5, [sp, #72]
; CHECK-GI-NEXT: str x6, [sp, #80]
; CHECK-GI-NEXT: str x7, [sp, #88]
; CHECK-GI-NEXT: str q7, [sp, #16]
; CHECK-GI-NEXT: add x9, sp, #8
; CHECK-GI-NEXT: add x8, x29, #16
; CHECK-GI-NEXT: str x8, [x9]
; CHECK-GI-NEXT: add x8, sp, #96
; CHECK-GI-NEXT: str x8, [x9, #8]
; CHECK-GI-NEXT: add x8, sp, #32
; CHECK-GI-NEXT: str x8, [x9, #16]
; CHECK-GI-NEXT: mov w8, #-64 // =0xffffffc0
; CHECK-GI-NEXT: str w8, [x9, #24]
; CHECK-GI-NEXT: mov w8, #-16 // =0xfffffff0
; CHECK-GI-NEXT: str w8, [x9, #28]
; CHECK-GI-NEXT: mov w8, #1 // =0x1
; CHECK-GI-NEXT: mov w0, w8
; CHECK-GI-NEXT: .cfi_def_cfa wsp, 112
; CHECK-GI-NEXT: ldp x29, x30, [sp, #96] // 16-byte Folded Reload
; CHECK-GI-NEXT: add sp, sp, #112
; CHECK-GI-NEXT: .cfi_def_cfa_offset 0
; CHECK-GI-NEXT: .cfi_restore w30
; CHECK-GI-NEXT: .cfi_restore w29
; CHECK-GI-NEXT: ret
entry:
%g = alloca ptr, align 4
call void @llvm.va_start(ptr %g)
ret i64 1
}
define i64 @gpr1_fpr1(i32 %i, float %f, ...) #0 {
; CHECK-SD-LABEL: gpr1_fpr1:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: sub sp, sp, #192
; CHECK-SD-NEXT: .cfi_def_cfa_offset 192
; CHECK-SD-NEXT: stp x29, x30, [sp, #176] // 16-byte Folded Spill
; CHECK-SD-NEXT: add x29, sp, #176
; CHECK-SD-NEXT: .cfi_def_cfa w29, 16
; CHECK-SD-NEXT: .cfi_offset w30, -8
; CHECK-SD-NEXT: .cfi_offset w29, -16
; CHECK-SD-NEXT: str q7, [sp, #96]
; CHECK-SD-NEXT: str q6, [sp, #80]
; CHECK-SD-NEXT: str q5, [sp, #64]
; CHECK-SD-NEXT: str q4, [sp, #48]
; CHECK-SD-NEXT: str q3, [sp, #32]
; CHECK-SD-NEXT: str q2, [sp, #16]
; CHECK-SD-NEXT: str q1, [sp]
; CHECK-SD-NEXT: stur x7, [x29, #-16]
; CHECK-SD-NEXT: stur x6, [x29, #-24]
; CHECK-SD-NEXT: stur x5, [x29, #-32]
; CHECK-SD-NEXT: stur x4, [x29, #-40]
; CHECK-SD-NEXT: stur x3, [x29, #-48]
; CHECK-SD-NEXT: stur x2, [x29, #-56]
; CHECK-SD-NEXT: stur x1, [x29, #-64]
; CHECK-SD-NEXT: mov w8, #-112 // =0xffffff90
; CHECK-SD-NEXT: str w8, [x29, #20]
; CHECK-SD-NEXT: mov w8, #-56 // =0xffffffc8
; CHECK-SD-NEXT: str w8, [x29, #16]
; CHECK-SD-NEXT: add x8, x29, #16
; CHECK-SD-NEXT: stur x8, [x29, #-8]
; CHECK-SD-NEXT: mov x8, sp
; CHECK-SD-NEXT: add x8, x8, #112
; CHECK-SD-NEXT: str x8, [x29, #8]
; CHECK-SD-NEXT: sub x8, x29, #64
; CHECK-SD-NEXT: add x8, x8, #56
; CHECK-SD-NEXT: str x8, [x29]
; CHECK-SD-NEXT: mov w8, #1 // =0x1
; CHECK-SD-NEXT: mov w0, w8
; CHECK-SD-NEXT: .cfi_def_cfa wsp, 192
; CHECK-SD-NEXT: ldp x29, x30, [sp, #176] // 16-byte Folded Reload
; CHECK-SD-NEXT: add sp, sp, #192
; CHECK-SD-NEXT: .cfi_def_cfa_offset 0
; CHECK-SD-NEXT: .cfi_restore w30
; CHECK-SD-NEXT: .cfi_restore w29
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: gpr1_fpr1:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: sub sp, sp, #208
; CHECK-GI-NEXT: .cfi_def_cfa_offset 208
; CHECK-GI-NEXT: stp x29, x30, [sp, #192] // 16-byte Folded Spill
; CHECK-GI-NEXT: add x29, sp, #192
; CHECK-GI-NEXT: .cfi_def_cfa w29, 16
; CHECK-GI-NEXT: .cfi_offset w30, -8
; CHECK-GI-NEXT: .cfi_offset w29, -16
; CHECK-GI-NEXT: stur x1, [x29, #-56]
; CHECK-GI-NEXT: stur x2, [x29, #-48]
; CHECK-GI-NEXT: stur x3, [x29, #-40]
; CHECK-GI-NEXT: stur x4, [x29, #-32]
; CHECK-GI-NEXT: stur x5, [x29, #-24]
; CHECK-GI-NEXT: stur x6, [x29, #-16]
; CHECK-GI-NEXT: stur x7, [x29, #-8]
; CHECK-GI-NEXT: str q1, [sp, #16]
; CHECK-GI-NEXT: str q2, [sp, #32]
; CHECK-GI-NEXT: str q3, [sp, #48]
; CHECK-GI-NEXT: str q4, [sp, #64]
; CHECK-GI-NEXT: str q5, [sp, #80]
; CHECK-GI-NEXT: str q6, [sp, #96]
; CHECK-GI-NEXT: str q7, [sp, #112]
; CHECK-GI-NEXT: add x9, sp, #8
; CHECK-GI-NEXT: add x8, x29, #16
; CHECK-GI-NEXT: str x8, [x9]
; CHECK-GI-NEXT: add x8, x29, #0
; CHECK-GI-NEXT: str x8, [x9, #8]
; CHECK-GI-NEXT: add x8, sp, #128
; CHECK-GI-NEXT: str x8, [x9, #16]
; CHECK-GI-NEXT: mov w8, #-56 // =0xffffffc8
; CHECK-GI-NEXT: str w8, [x9, #24]
; CHECK-GI-NEXT: mov w8, #-112 // =0xffffff90
; CHECK-GI-NEXT: str w8, [x9, #28]
; CHECK-GI-NEXT: mov w8, #1 // =0x1
; CHECK-GI-NEXT: mov w0, w8
; CHECK-GI-NEXT: .cfi_def_cfa wsp, 208
; CHECK-GI-NEXT: ldp x29, x30, [sp, #192] // 16-byte Folded Reload
; CHECK-GI-NEXT: add sp, sp, #208
; CHECK-GI-NEXT: .cfi_def_cfa_offset 0
; CHECK-GI-NEXT: .cfi_restore w30
; CHECK-GI-NEXT: .cfi_restore w29
; CHECK-GI-NEXT: ret
entry:
%g = alloca ptr, align 4
call void @llvm.va_start(ptr %g)
ret i64 1
}
; To make the outputs more readable
attributes #0 = { uwtable "frame-pointer"="all" }
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; CHECK: {{.*}}