llvm-project/flang/test/Fir/target-rewrite-complex-10-x86.fir
Tom Eccles d16ecad968
[flang] Disable noalias by default (#142128)
With these enabled we see a 70% performance regression for exchange2_r
on neoverse-v1 (aws graviton 3) using `-mcpu=native -Ofast -flto`. There
is also a smaller regression on neoverse-v2.

This appears to be because function specialization is no longer kicking
in during LTO for digits_2. This can be seen in the output executable:
previously it contained specialized copies of the function with names
like `_QMbrute_forcePdigits_2.specialized.4`. Now there are no names
like this.

The bug is not in flang - instead in the function specialization pass -
but due to the size of the regression I would like to request that this
is disabled until function specialization has been fixed.
2025-05-30 17:35:41 +01:00

35 lines
1.8 KiB
Plaintext

// Test COMPLEX(10) passing and returning on X86
// RUN: fir-opt --target-rewrite="target=x86_64-unknown-linux-gnu" %s | FileCheck %s --check-prefix=AMD64
// RUN: tco -target="x86_64-unknown-linux-gnu" --force-no-alias %s | FileCheck %s --check-prefix=AMD64_LLVM
module attributes {fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.target_triple = "x86_64-unknown-linux-gnu"} {
func.func @returncomplex10() -> complex<f80> {
%1 = fir.zero_bits complex<f80>
return %1 : complex<f80>
}
// AMD64-LABEL: func.func @returncomplex10() -> tuple<f80, f80> {
// AMD64: %[[VAL_0:.*]] = fir.zero_bits complex<f80>
// AMD64: %[[VAL_1:.*]] = fir.alloca tuple<f80, f80>
// AMD64: %[[VAL_2:.*]] = fir.convert %[[VAL_1]] : (!fir.ref<tuple<f80, f80>>) -> !fir.ref<complex<f80>>
// AMD64: fir.store %[[VAL_0]] to %[[VAL_2]] : !fir.ref<complex<f80>>
// AMD64: %[[VAL_3:.*]] = fir.load %[[VAL_1]] : !fir.ref<tuple<f80, f80>>
// AMD64: return %[[VAL_3]] : tuple<f80, f80>
// AMD64_LLVM: define { x86_fp80, x86_fp80 } @returncomplex10()
func.func @takecomplex10(%z: complex<f80>) {
%0 = fir.alloca complex<f80>
fir.store %z to %0 : !fir.ref<complex<f80>>
return
}
// AMD64-LABEL: func.func @takecomplex10(
// AMD64-SAME: %[[VAL_0:.*]]: !fir.ref<tuple<f80, f80>> {llvm.align = 16 : i32, llvm.byval = tuple<f80, f80>}) {
// AMD64: %[[VAL_1:.*]] = fir.convert %[[VAL_0]] : (!fir.ref<tuple<f80, f80>>) -> !fir.ref<complex<f80>>
// AMD64: %[[VAL_2:.*]] = fir.load %[[VAL_1]] : !fir.ref<complex<f80>>
// AMD64: %[[VAL_3:.*]] = fir.alloca complex<f80>
// AMD64: fir.store %[[VAL_2]] to %[[VAL_3]] : !fir.ref<complex<f80>>
// AMD64_LLVM: define void @takecomplex10(ptr noalias byval({ x86_fp80, x86_fp80 }) align 16 captures(none) %0)
}