Florian Hahn 40d952b874
[CGP] Avoid replacing a free ext with multiple other exts. (#77094)
Replacing a free extension with 2 or more extensions unnecessarily
increases the number of IR instructions without providing any benefits.
It also unnecessarily causes operations to be performed on wider types
than necessary.

In some cases, the extra extensions also pessimize codegen (see
bfis-in-loop.ll).

The changes in arm64-codegen-prepare-extload.ll also show that we avoid
promotions that should only be performed in stress mode.

PR: https://github.com/llvm/llvm-project/pull/77094
2024-01-18 10:48:10 +00:00

146 lines
5.0 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: opt -mtriple=aarch64-linux-gnu -passes=typepromotion < %s | llc -mtriple=aarch64-linux-gnu -o - | FileCheck %s
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
%struct.bar = type { %struct.foo }
%struct.foo = type { ptr }
%struct.wobble = type { ptr }
%struct.zot = type <{ %struct.wobble, ptr, ptr, i8, [7 x i8] }>
@global = external global %struct.bar, align 8
define i64 @bfis_in_loop_zero() {
; CHECK-LABEL: bfis_in_loop_zero:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: adrp x8, :got:global
; CHECK-NEXT: mov x0, xzr
; CHECK-NEXT: mov w9, wzr
; CHECK-NEXT: ldr x8, [x8, :got_lo12:global]
; CHECK-NEXT: ldr x8, [x8]
; CHECK-NEXT: .LBB0_1: // %midblock
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldrh w10, [x8, #72]
; CHECK-NEXT: ldr x13, [x8, #8]
; CHECK-NEXT: lsr w11, w10, #8
; CHECK-NEXT: cmp w10, #0
; CHECK-NEXT: ldr x8, [x13, #16]
; CHECK-NEXT: cset w12, ne
; CHECK-NEXT: csel w9, w9, w11, eq
; CHECK-NEXT: and x11, x0, #0xffffffff00000000
; CHECK-NEXT: bfi w10, w9, #8, #24
; CHECK-NEXT: orr x11, x11, x12, lsl #16
; CHECK-NEXT: orr x0, x11, x10
; CHECK-NEXT: cbnz x13, .LBB0_1
; CHECK-NEXT: // %bb.2: // %exit
; CHECK-NEXT: ret
entry:
%var = load ptr, ptr @global, align 8
br label %preheader
preheader:
br label %header
header: ; preds = %bb63, %bb
%var4 = phi i64 [ %var30, %latch ], [ 0, %preheader ]
%var5 = phi ptr [ %var38, %latch ], [ %var, %preheader ]
%var6 = phi i8 [ %var21, %latch ], [ 0, %preheader ]
br label %midblock
midblock: ; preds = %bb9
%var15 = getelementptr inbounds %struct.wobble, ptr %var5, i64 9
%var17 = load i16, ptr %var15, align 8
%var18 = icmp eq i16 %var17, 0
%var19 = lshr i16 %var17, 8
%var20 = trunc i16 %var19 to i8
%var21 = select i1 %var18, i8 %var6, i8 %var20
%var22 = zext i8 %var21 to i16
%var23 = shl nuw i16 %var22, 8
%var24 = and i16 %var17, 255
%var25 = or i16 %var23, %var24
%var26 = select i1 %var18, i64 0, i64 65536
%var27 = zext i16 %var25 to i64
%var28 = and i64 %var4, -4294967296
%var29 = or i64 %var26, %var28
%var30 = or i64 %var29, %var27
br label %latch
latch: ; preds = %bb14, %bb9
%var34 = getelementptr inbounds %struct.wobble, ptr %var5, i64 1, i32 0
%var35 = load ptr, ptr %var34, align 8
%var36 = icmp eq ptr %var35, null
%var37 = getelementptr inbounds %struct.zot, ptr %var35, i64 0, i32 2
%var38 = load ptr, ptr %var37, align 8
br i1 %var36, label %exit, label %header
exit:
ret i64 %var30
}
define i64 @bfis_in_loop_undef() {
; CHECK-LABEL: bfis_in_loop_undef:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: adrp x9, :got:global
; CHECK-NEXT: mov w8, wzr
; CHECK-NEXT: // implicit-def: $x0
; CHECK-NEXT: ldr x9, [x9, :got_lo12:global]
; CHECK-NEXT: ldr x9, [x9]
; CHECK-NEXT: .LBB1_1: // %midblock
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldrh w10, [x9, #72]
; CHECK-NEXT: ldr x13, [x9, #8]
; CHECK-NEXT: lsr w11, w10, #8
; CHECK-NEXT: cmp w10, #0
; CHECK-NEXT: ldr x9, [x13, #16]
; CHECK-NEXT: cset w12, ne
; CHECK-NEXT: csel w8, w8, w11, eq
; CHECK-NEXT: and x11, x0, #0xffffffff00000000
; CHECK-NEXT: bfi w10, w8, #8, #24
; CHECK-NEXT: orr x11, x11, x12, lsl #16
; CHECK-NEXT: orr x0, x11, x10
; CHECK-NEXT: cbnz x13, .LBB1_1
; CHECK-NEXT: // %bb.2: // %exit
; CHECK-NEXT: ret
entry:
%var = load ptr, ptr @global, align 8
br label %preheader
preheader:
br label %header
header: ; preds = %bb63, %bb
%var4 = phi i64 [ %var30, %latch ], [ undef, %preheader ]
%var5 = phi ptr [ %var38, %latch ], [ %var, %preheader ]
%var6 = phi i8 [ %var21, %latch ], [ undef, %preheader ]
br label %midblock
midblock: ; preds = %bb9
%var15 = getelementptr inbounds %struct.wobble, ptr %var5, i64 9
%var17 = load i16, ptr %var15, align 8
%var18 = icmp eq i16 %var17, 0
%var19 = lshr i16 %var17, 8
%var20 = trunc i16 %var19 to i8
%var21 = select i1 %var18, i8 %var6, i8 %var20
%var22 = zext i8 %var21 to i16
%var23 = shl nuw i16 %var22, 8
%var24 = and i16 %var17, 255
%var25 = or i16 %var23, %var24
%var26 = select i1 %var18, i64 0, i64 65536
%var27 = zext i16 %var25 to i64
%var28 = and i64 %var4, -4294967296
%var29 = or i64 %var26, %var28
%var30 = or i64 %var29, %var27
br label %latch
latch: ; preds = %bb14, %bb9
%var34 = getelementptr inbounds %struct.wobble, ptr %var5, i64 1, i32 0
%var35 = load ptr, ptr %var34, align 8
%var36 = icmp eq ptr %var35, null
%var37 = getelementptr inbounds %struct.zot, ptr %var35, i64 0, i32 2
%var38 = load ptr, ptr %var37, align 8
br i1 %var36, label %exit, label %header
exit:
ret i64 %var30
}