
Currently, clang coerces (u)int128_t to two i64 IR parameters when they are passed in registers. This leads to broken debug info for them after applying SROA+InstCombine. SROA generates IR like this ([godbolt](https://godbolt.org/z/YrTa4chfc)): ```llvm define dso_local { i64, i64 } @add(i64 noundef %a.coerce0, i64 noundef %a.coerce1) { entry: %a.sroa.2.0.insert.ext = zext i64 %a.coerce1 to i128 %a.sroa.2.0.insert.shift = shl nuw i128 %a.sroa.2.0.insert.ext, 64 %a.sroa.0.0.insert.ext = zext i64 %a.coerce0 to i128 %a.sroa.0.0.insert.insert = or i128 %a.sroa.2.0.insert.shift, %a.sroa.0.0.insert.ext #dbg_value(i128 %a.sroa.0.0.insert.insert, !17, !DIExpression(), !18) // ... !17 = !DILocalVariable(name: "a", arg: 1, scope: !10, file: !11, line: 1, type: !14) // ... ``` and InstCombine then removes the `or`, moving it into the `DIExpression`, and the `shl` at which point the debug info salvaging in `Transforms/Local` replaces the arguments with `poison` as it does not allow constants larger than 64 bit in `DIExpression`s. I'm working under the assumption that there is interest in fixing this. If not, please tell me. By not coercing `int128_t`s into `{i64, i64}` but keeping them as `i128`, the debug info stays intact and SelectionDAG then generates two `DW_OP_LLVM_fragment` expressions for the two corresponding argument registers. Given that the ABI code for x64 seems to not coerce the argument when it is passed on the stack, it should not lead to any problems keeping it as an `i128` when it is passed in registers. Alternatively, this could be fixed by checking if a constant value fits in 64 bits in the debug info salvaging code and then extending the value on the expression stack to the necessary width. This fixes InstCombine breaking the debug info but then SelectionDAG removes the expression and that seems significantly more complex to debug. Another fix may be to generate `DW_OP_LLVM_fragment` expressions when removing the `or` as it gets marked as disjoint by InstCombine. However, I don't know if the KnownBits information is still available at the time the `or` gets removed and it would probably require refactoring of the debug info salvaging code as that currently only seems to replace single expressions and is not designed to support generating new debug records. Converting `(u)int128_t` arguments to `i128` in the IR seems like the simpler solution, if it doesn't cause any ABI issues.
115 lines
5.1 KiB
C
115 lines
5.1 KiB
C
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
|
|
// RUN: %clang_cc1 -triple x86_64-pc-linux -emit-llvm -o - %s | FileCheck %s
|
|
|
|
__INT32_TYPE__*m1(__INT32_TYPE__ i) __attribute__((alloc_align(1)));
|
|
|
|
// Condition where parameter to m1 is not size_t.
|
|
// CHECK-LABEL: @test1(
|
|
// CHECK-NEXT: entry:
|
|
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
|
|
// CHECK-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR]], align 4
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
|
|
// CHECK-NEXT: [[CALL:%.*]] = call ptr @m1(i32 noundef [[TMP0]])
|
|
// CHECK-NEXT: [[CASTED_ALIGN:%.*]] = zext i32 [[TMP0]] to i64
|
|
// CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[CALL]], i64 [[CASTED_ALIGN]]) ]
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[CALL]], align 4
|
|
// CHECK-NEXT: ret i32 [[TMP1]]
|
|
//
|
|
__INT32_TYPE__ test1(__INT32_TYPE__ a) {
|
|
return *m1(a);
|
|
}
|
|
// Condition where test2 param needs casting.
|
|
// CHECK-LABEL: @test2(
|
|
// CHECK-NEXT: entry:
|
|
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
|
|
// CHECK-NEXT: store i64 [[A:%.*]], ptr [[A_ADDR]], align 8
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[A_ADDR]], align 8
|
|
// CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
|
|
// CHECK-NEXT: [[CALL:%.*]] = call ptr @m1(i32 noundef [[CONV]])
|
|
// CHECK-NEXT: [[CASTED_ALIGN:%.*]] = zext i32 [[CONV]] to i64
|
|
// CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[CALL]], i64 [[CASTED_ALIGN]]) ]
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[CALL]], align 4
|
|
// CHECK-NEXT: ret i32 [[TMP1]]
|
|
//
|
|
__INT32_TYPE__ test2(__SIZE_TYPE__ a) {
|
|
return *m1(a);
|
|
}
|
|
__INT32_TYPE__ *m2(__SIZE_TYPE__ i) __attribute__((alloc_align(1)));
|
|
|
|
// test3 param needs casting, but 'm2' is correct.
|
|
// CHECK-LABEL: @test3(
|
|
// CHECK-NEXT: entry:
|
|
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
|
|
// CHECK-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR]], align 4
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
|
|
// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[TMP0]] to i64
|
|
// CHECK-NEXT: [[CALL:%.*]] = call ptr @m2(i64 noundef [[CONV]])
|
|
// CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[CALL]], i64 [[CONV]]) ]
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[CALL]], align 4
|
|
// CHECK-NEXT: ret i32 [[TMP1]]
|
|
//
|
|
__INT32_TYPE__ test3(__INT32_TYPE__ a) {
|
|
return *m2(a);
|
|
}
|
|
|
|
// Every type matches, canonical example.
|
|
// CHECK-LABEL: @test4(
|
|
// CHECK-NEXT: entry:
|
|
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
|
|
// CHECK-NEXT: store i64 [[A:%.*]], ptr [[A_ADDR]], align 8
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[A_ADDR]], align 8
|
|
// CHECK-NEXT: [[CALL:%.*]] = call ptr @m2(i64 noundef [[TMP0]])
|
|
// CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[CALL]], i64 [[TMP0]]) ]
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[CALL]], align 4
|
|
// CHECK-NEXT: ret i32 [[TMP1]]
|
|
//
|
|
__INT32_TYPE__ test4(__SIZE_TYPE__ a) {
|
|
return *m2(a);
|
|
}
|
|
|
|
|
|
struct Empty {};
|
|
struct MultiArgs { __INT64_TYPE__ a, b;};
|
|
// Struct parameter doesn't take up an IR parameter, 'i' takes up 1.
|
|
// Truncation to i64 is permissible, since alignments of greater than 2^64 are insane.
|
|
__INT32_TYPE__ *m3(struct Empty s, __int128_t i) __attribute__((alloc_align(2)));
|
|
// CHECK-LABEL: @test5(
|
|
// CHECK-NEXT: entry:
|
|
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i128, align 16
|
|
// CHECK-NEXT: [[E:%.*]] = alloca [[STRUCT_EMPTY:%.*]], align 1
|
|
// CHECK-NEXT: store i128 [[A:%.*]], ptr [[A_ADDR]], align 16
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr [[A_ADDR]], align 16
|
|
// CHECK-NEXT: [[CALL:%.*]] = call ptr @m3(i128 noundef [[TMP0]])
|
|
// CHECK-NEXT: [[CASTED_ALIGN:%.*]] = trunc i128 [[TMP0]] to i64
|
|
// CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[CALL]], i64 [[CASTED_ALIGN]]) ]
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[CALL]], align 4
|
|
// CHECK-NEXT: ret i32 [[TMP1]]
|
|
//
|
|
__INT32_TYPE__ test5(__int128_t a) {
|
|
struct Empty e;
|
|
return *m3(e, a);
|
|
}
|
|
// Struct parameter takes up 2 parameters, 'i' takes up 1.
|
|
__INT32_TYPE__ *m4(struct MultiArgs s, __int128_t i) __attribute__((alloc_align(2)));
|
|
// CHECK-LABEL: @test6(
|
|
// CHECK-NEXT: entry:
|
|
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i128, align 16
|
|
// CHECK-NEXT: [[E:%.*]] = alloca [[STRUCT_MULTIARGS:%.*]], align 8
|
|
// CHECK-NEXT: store i128 [[A:%.*]], ptr [[A_ADDR]], align 16
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr [[A_ADDR]], align 16
|
|
// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw { i64, i64 }, ptr [[E]], i32 0, i32 0
|
|
// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8
|
|
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw { i64, i64 }, ptr [[E]], i32 0, i32 1
|
|
// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8
|
|
// CHECK-NEXT: [[CALL:%.*]] = call ptr @m4(i64 [[TMP2]], i64 [[TMP4]], i128 noundef [[TMP0]])
|
|
// CHECK-NEXT: [[CASTED_ALIGN:%.*]] = trunc i128 [[TMP0]] to i64
|
|
// CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[CALL]], i64 [[CASTED_ALIGN]]) ]
|
|
// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[CALL]], align 4
|
|
// CHECK-NEXT: ret i32 [[TMP5]]
|
|
//
|
|
__INT32_TYPE__ test6(__int128_t a) {
|
|
struct MultiArgs e;
|
|
return *m4(e, a);
|
|
}
|
|
|