llvm-project/clang/test/CodeGen/alloc-align-attr.c
T0b1-iOS d35931c49e
[Clang][CodeGen][X86] don't coerce int128 into {i64,i64} for SysV-like ABIs (#135230)
Currently, clang coerces (u)int128_t to two i64 IR parameters when they
are passed in registers. This leads to broken debug info for them after
applying SROA+InstCombine. SROA generates IR like this
([godbolt](https://godbolt.org/z/YrTa4chfc)):
```llvm
define dso_local { i64, i64 } @add(i64 noundef %a.coerce0, i64 noundef %a.coerce1)  {
entry:
  %a.sroa.2.0.insert.ext = zext i64 %a.coerce1 to i128
  %a.sroa.2.0.insert.shift = shl nuw i128 %a.sroa.2.0.insert.ext, 64
  %a.sroa.0.0.insert.ext = zext i64 %a.coerce0 to i128
  %a.sroa.0.0.insert.insert = or i128 %a.sroa.2.0.insert.shift, %a.sroa.0.0.insert.ext
    #dbg_value(i128 %a.sroa.0.0.insert.insert, !17, !DIExpression(), !18)
// ...
!17 = !DILocalVariable(name: "a", arg: 1, scope: !10, file: !11, line: 1, type: !14)
// ...
```
  
and InstCombine then removes the `or`, moving it into the
`DIExpression`, and the `shl` at which point the debug info salvaging in
`Transforms/Local` replaces the arguments with `poison` as it does not
allow constants larger than 64 bit in `DIExpression`s.
  
I'm working under the assumption that there is interest in fixing this.
If not, please tell me.
By not coercing `int128_t`s into `{i64, i64}` but keeping them as
`i128`, the debug info stays intact and SelectionDAG then generates two
`DW_OP_LLVM_fragment` expressions for the two corresponding argument
registers.

Given that the ABI code for x64 seems to not coerce the argument when it
is passed on the stack, it should not lead to any problems keeping it as
an `i128` when it is passed in registers.

Alternatively, this could be fixed by checking if a constant value fits
in 64 bits in the debug info salvaging code and then extending the value
on the expression stack to the necessary width. This fixes InstCombine
breaking the debug info but then SelectionDAG removes the expression and
that seems significantly more complex to debug.

Another fix may be to generate `DW_OP_LLVM_fragment` expressions when
removing the `or` as it gets marked as disjoint by InstCombine. However,
I don't know if the KnownBits information is still available at the time
the `or` gets removed and it would probably require refactoring of the
debug info salvaging code as that currently only seems to replace single
expressions and is not designed to support generating new debug records.

Converting `(u)int128_t` arguments to `i128` in the IR seems like the
simpler solution, if it doesn't cause any ABI issues.
2025-07-17 09:57:32 -07:00

115 lines
5.1 KiB
C

// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// RUN: %clang_cc1 -triple x86_64-pc-linux -emit-llvm -o - %s | FileCheck %s
__INT32_TYPE__*m1(__INT32_TYPE__ i) __attribute__((alloc_align(1)));
// Condition where parameter to m1 is not size_t.
// CHECK-LABEL: @test1(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
// CHECK-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
// CHECK-NEXT: [[CALL:%.*]] = call ptr @m1(i32 noundef [[TMP0]])
// CHECK-NEXT: [[CASTED_ALIGN:%.*]] = zext i32 [[TMP0]] to i64
// CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[CALL]], i64 [[CASTED_ALIGN]]) ]
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[CALL]], align 4
// CHECK-NEXT: ret i32 [[TMP1]]
//
__INT32_TYPE__ test1(__INT32_TYPE__ a) {
return *m1(a);
}
// Condition where test2 param needs casting.
// CHECK-LABEL: @test2(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK-NEXT: store i64 [[A:%.*]], ptr [[A_ADDR]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[A_ADDR]], align 8
// CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
// CHECK-NEXT: [[CALL:%.*]] = call ptr @m1(i32 noundef [[CONV]])
// CHECK-NEXT: [[CASTED_ALIGN:%.*]] = zext i32 [[CONV]] to i64
// CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[CALL]], i64 [[CASTED_ALIGN]]) ]
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[CALL]], align 4
// CHECK-NEXT: ret i32 [[TMP1]]
//
__INT32_TYPE__ test2(__SIZE_TYPE__ a) {
return *m1(a);
}
__INT32_TYPE__ *m2(__SIZE_TYPE__ i) __attribute__((alloc_align(1)));
// test3 param needs casting, but 'm2' is correct.
// CHECK-LABEL: @test3(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
// CHECK-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[TMP0]] to i64
// CHECK-NEXT: [[CALL:%.*]] = call ptr @m2(i64 noundef [[CONV]])
// CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[CALL]], i64 [[CONV]]) ]
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[CALL]], align 4
// CHECK-NEXT: ret i32 [[TMP1]]
//
__INT32_TYPE__ test3(__INT32_TYPE__ a) {
return *m2(a);
}
// Every type matches, canonical example.
// CHECK-LABEL: @test4(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK-NEXT: store i64 [[A:%.*]], ptr [[A_ADDR]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[A_ADDR]], align 8
// CHECK-NEXT: [[CALL:%.*]] = call ptr @m2(i64 noundef [[TMP0]])
// CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[CALL]], i64 [[TMP0]]) ]
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[CALL]], align 4
// CHECK-NEXT: ret i32 [[TMP1]]
//
__INT32_TYPE__ test4(__SIZE_TYPE__ a) {
return *m2(a);
}
struct Empty {};
struct MultiArgs { __INT64_TYPE__ a, b;};
// Struct parameter doesn't take up an IR parameter, 'i' takes up 1.
// Truncation to i64 is permissible, since alignments of greater than 2^64 are insane.
__INT32_TYPE__ *m3(struct Empty s, __int128_t i) __attribute__((alloc_align(2)));
// CHECK-LABEL: @test5(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i128, align 16
// CHECK-NEXT: [[E:%.*]] = alloca [[STRUCT_EMPTY:%.*]], align 1
// CHECK-NEXT: store i128 [[A:%.*]], ptr [[A_ADDR]], align 16
// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr [[A_ADDR]], align 16
// CHECK-NEXT: [[CALL:%.*]] = call ptr @m3(i128 noundef [[TMP0]])
// CHECK-NEXT: [[CASTED_ALIGN:%.*]] = trunc i128 [[TMP0]] to i64
// CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[CALL]], i64 [[CASTED_ALIGN]]) ]
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[CALL]], align 4
// CHECK-NEXT: ret i32 [[TMP1]]
//
__INT32_TYPE__ test5(__int128_t a) {
struct Empty e;
return *m3(e, a);
}
// Struct parameter takes up 2 parameters, 'i' takes up 1.
__INT32_TYPE__ *m4(struct MultiArgs s, __int128_t i) __attribute__((alloc_align(2)));
// CHECK-LABEL: @test6(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i128, align 16
// CHECK-NEXT: [[E:%.*]] = alloca [[STRUCT_MULTIARGS:%.*]], align 8
// CHECK-NEXT: store i128 [[A:%.*]], ptr [[A_ADDR]], align 16
// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr [[A_ADDR]], align 16
// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw { i64, i64 }, ptr [[E]], i32 0, i32 0
// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw { i64, i64 }, ptr [[E]], i32 0, i32 1
// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8
// CHECK-NEXT: [[CALL:%.*]] = call ptr @m4(i64 [[TMP2]], i64 [[TMP4]], i128 noundef [[TMP0]])
// CHECK-NEXT: [[CASTED_ALIGN:%.*]] = trunc i128 [[TMP0]] to i64
// CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[CALL]], i64 [[CASTED_ALIGN]]) ]
// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[CALL]], align 4
// CHECK-NEXT: ret i32 [[TMP5]]
//
__INT32_TYPE__ test6(__int128_t a) {
struct MultiArgs e;
return *m4(e, a);
}