
Previously when a packed struct, containing vector data types such as uint16x8_t, is passed as a function argument, the alignment of the struct used by the function caller and the alignment used by the callee to load the argument from stack does not match. This patch implements section 6.8.2, stage C.4 of the Procedure Call Standard for the Arm 64-bit Architecture (AAPCS64): "If the argument is an HFA, an HVA, a Quad-precision Floating-point or short vector type then the NSAA is rounded up to the next multiple of 8 if its natural alignment is ≤ 8 or the next multiple of 16 if its natural alignment is ≥ 16." This ensures the alignments of the packed structs used as function arguments are the same as described in the AAPCS64 for both the caller and callee. Reference: AAPCS64 (https://github.com/ARM-software/abi-aa/blob/latest-release/aapcs64/aapcs64.rst) Reviewed By: olista01, rjmccall, tmatheson Differential Revision: https://reviews.llvm.org/D146242
52 lines
2.4 KiB
C
52 lines
2.4 KiB
C
// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon -target-abi aapcs -ffreestanding -emit-llvm -w -o - %s | FileCheck %s
|
|
|
|
// AAPCS clause C.8 says: If the argument has an alignment of 16 then the NGRN
|
|
// is rounded up to the next even number.
|
|
|
|
// CHECK: void @test1(i32 noundef %x0, i128 noundef %x2_x3, i128 noundef %x4_x5, i128 noundef %x6_x7, i128 %sp.coerce)
|
|
typedef union { __int128 a; } Small;
|
|
void test1(int x0, __int128 x2_x3, __int128 x4_x5, __int128 x6_x7, Small sp) {
|
|
}
|
|
|
|
|
|
// CHECK: void @test2(i32 noundef %x0, i128 %x2_x3.coerce, i32 noundef %x4, i128 %x6_x7.coerce, i32 noundef %sp, i128 %sp16.coerce)
|
|
void test2(int x0, Small x2_x3, int x4, Small x6_x7, int sp, Small sp16) {
|
|
}
|
|
|
|
// We coerce HFAs into a contiguous [N x double] type if they're going on the
|
|
// stack in order to avoid holes. Make sure we get all of them, and not just the
|
|
// first:
|
|
|
|
// CHECK: void @test3([4 x float] alignstack(8) %s0_s3.coerce, float noundef %s4, [4 x float] alignstack(8) %sp.coerce, [4 x float] alignstack(8) %sp16.coerce)
|
|
typedef struct { float arr[4]; } HFA;
|
|
void test3(HFA s0_s3, float s4, HFA sp, HFA sp16) {
|
|
}
|
|
|
|
|
|
// However, we shouldn't perform the [N x double] coercion on types which have
|
|
// sufficient alignment to avoid holes on their own. We could coerce to [N x
|
|
// fp128] or something, but leaving them as-is retains more information for
|
|
// users to debug.
|
|
|
|
// CHECK: void @test4([3 x <16 x i8>] alignstack(16) %v0_v2.coerce, [3 x <16 x i8>] alignstack(16) %v3_v5.coerce, [3 x <16 x i8>] alignstack(16) %sp.coerce, double noundef %sp48, [3 x <16 x i8>] alignstack(16) %sp64.coerce)
|
|
typedef __attribute__((neon_vector_type(16))) signed char int8x16_t;
|
|
typedef struct { int8x16_t arr[3]; } BigHFA;
|
|
void test4(BigHFA v0_v2, BigHFA v3_v5, BigHFA sp, double sp48, BigHFA sp64) {
|
|
}
|
|
|
|
// It's the job of the argument *consumer* to perform the required sign & zero
|
|
// extensions under AAPCS. There shouldn't be
|
|
|
|
// CHECK: define{{.*}} i8 @test5(i8 noundef %a, i16 noundef %b)
|
|
unsigned char test5(unsigned char a, signed short b) {
|
|
}
|
|
|
|
// __fp16 can be used as a function argument or return type (ACLE 2.0)
|
|
// CHECK: define{{.*}} half @test_half(half noundef %{{.*}})
|
|
__fp16 test_half(__fp16 A) { }
|
|
|
|
// __fp16 is a base type for homogeneous floating-point aggregates for AArch64 (but not 32-bit ARM).
|
|
// CHECK: define{{.*}} %struct.HFA_half @test_half_hfa([4 x half] alignstack(8) %{{.*}})
|
|
struct HFA_half { __fp16 a[4]; };
|
|
struct HFA_half test_half_hfa(struct HFA_half A) { }
|