
Replacing a free extension with 2 or more extensions unnecessarily increases the number of IR instructions without providing any benefits. It also unnecessarily causes operations to be performed on wider types than necessary. In some cases, the extra extensions also pessimize codegen (see bfis-in-loop.ll). The changes in arm64-codegen-prepare-extload.ll also show that we avoid promotions that should only be performed in stress mode. PR: https://github.com/llvm/llvm-project/pull/77094
146 lines
5.0 KiB
LLVM
146 lines
5.0 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: opt -mtriple=aarch64-linux-gnu -passes=typepromotion < %s | llc -mtriple=aarch64-linux-gnu -o - | FileCheck %s
|
|
|
|
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
|
|
|
|
%struct.bar = type { %struct.foo }
|
|
%struct.foo = type { ptr }
|
|
%struct.wobble = type { ptr }
|
|
%struct.zot = type <{ %struct.wobble, ptr, ptr, i8, [7 x i8] }>
|
|
|
|
@global = external global %struct.bar, align 8
|
|
|
|
define i64 @bfis_in_loop_zero() {
|
|
; CHECK-LABEL: bfis_in_loop_zero:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: adrp x8, :got:global
|
|
; CHECK-NEXT: mov x0, xzr
|
|
; CHECK-NEXT: mov w9, wzr
|
|
; CHECK-NEXT: ldr x8, [x8, :got_lo12:global]
|
|
; CHECK-NEXT: ldr x8, [x8]
|
|
; CHECK-NEXT: .LBB0_1: // %midblock
|
|
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: ldrh w10, [x8, #72]
|
|
; CHECK-NEXT: ldr x13, [x8, #8]
|
|
; CHECK-NEXT: lsr w11, w10, #8
|
|
; CHECK-NEXT: cmp w10, #0
|
|
; CHECK-NEXT: ldr x8, [x13, #16]
|
|
; CHECK-NEXT: cset w12, ne
|
|
; CHECK-NEXT: csel w9, w9, w11, eq
|
|
; CHECK-NEXT: and x11, x0, #0xffffffff00000000
|
|
; CHECK-NEXT: bfi w10, w9, #8, #24
|
|
; CHECK-NEXT: orr x11, x11, x12, lsl #16
|
|
; CHECK-NEXT: orr x0, x11, x10
|
|
; CHECK-NEXT: cbnz x13, .LBB0_1
|
|
; CHECK-NEXT: // %bb.2: // %exit
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%var = load ptr, ptr @global, align 8
|
|
br label %preheader
|
|
|
|
preheader:
|
|
br label %header
|
|
|
|
header: ; preds = %bb63, %bb
|
|
%var4 = phi i64 [ %var30, %latch ], [ 0, %preheader ]
|
|
%var5 = phi ptr [ %var38, %latch ], [ %var, %preheader ]
|
|
%var6 = phi i8 [ %var21, %latch ], [ 0, %preheader ]
|
|
br label %midblock
|
|
|
|
midblock: ; preds = %bb9
|
|
%var15 = getelementptr inbounds %struct.wobble, ptr %var5, i64 9
|
|
%var17 = load i16, ptr %var15, align 8
|
|
%var18 = icmp eq i16 %var17, 0
|
|
%var19 = lshr i16 %var17, 8
|
|
%var20 = trunc i16 %var19 to i8
|
|
%var21 = select i1 %var18, i8 %var6, i8 %var20
|
|
%var22 = zext i8 %var21 to i16
|
|
%var23 = shl nuw i16 %var22, 8
|
|
%var24 = and i16 %var17, 255
|
|
%var25 = or i16 %var23, %var24
|
|
%var26 = select i1 %var18, i64 0, i64 65536
|
|
%var27 = zext i16 %var25 to i64
|
|
%var28 = and i64 %var4, -4294967296
|
|
%var29 = or i64 %var26, %var28
|
|
%var30 = or i64 %var29, %var27
|
|
br label %latch
|
|
|
|
latch: ; preds = %bb14, %bb9
|
|
%var34 = getelementptr inbounds %struct.wobble, ptr %var5, i64 1, i32 0
|
|
%var35 = load ptr, ptr %var34, align 8
|
|
%var36 = icmp eq ptr %var35, null
|
|
%var37 = getelementptr inbounds %struct.zot, ptr %var35, i64 0, i32 2
|
|
%var38 = load ptr, ptr %var37, align 8
|
|
br i1 %var36, label %exit, label %header
|
|
|
|
exit:
|
|
ret i64 %var30
|
|
}
|
|
|
|
define i64 @bfis_in_loop_undef() {
|
|
; CHECK-LABEL: bfis_in_loop_undef:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: adrp x9, :got:global
|
|
; CHECK-NEXT: mov w8, wzr
|
|
; CHECK-NEXT: // implicit-def: $x0
|
|
; CHECK-NEXT: ldr x9, [x9, :got_lo12:global]
|
|
; CHECK-NEXT: ldr x9, [x9]
|
|
; CHECK-NEXT: .LBB1_1: // %midblock
|
|
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: ldrh w10, [x9, #72]
|
|
; CHECK-NEXT: ldr x13, [x9, #8]
|
|
; CHECK-NEXT: lsr w11, w10, #8
|
|
; CHECK-NEXT: cmp w10, #0
|
|
; CHECK-NEXT: ldr x9, [x13, #16]
|
|
; CHECK-NEXT: cset w12, ne
|
|
; CHECK-NEXT: csel w8, w8, w11, eq
|
|
; CHECK-NEXT: and x11, x0, #0xffffffff00000000
|
|
; CHECK-NEXT: bfi w10, w8, #8, #24
|
|
; CHECK-NEXT: orr x11, x11, x12, lsl #16
|
|
; CHECK-NEXT: orr x0, x11, x10
|
|
; CHECK-NEXT: cbnz x13, .LBB1_1
|
|
; CHECK-NEXT: // %bb.2: // %exit
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%var = load ptr, ptr @global, align 8
|
|
br label %preheader
|
|
|
|
preheader:
|
|
br label %header
|
|
|
|
header: ; preds = %bb63, %bb
|
|
%var4 = phi i64 [ %var30, %latch ], [ undef, %preheader ]
|
|
%var5 = phi ptr [ %var38, %latch ], [ %var, %preheader ]
|
|
%var6 = phi i8 [ %var21, %latch ], [ undef, %preheader ]
|
|
br label %midblock
|
|
|
|
midblock: ; preds = %bb9
|
|
%var15 = getelementptr inbounds %struct.wobble, ptr %var5, i64 9
|
|
%var17 = load i16, ptr %var15, align 8
|
|
%var18 = icmp eq i16 %var17, 0
|
|
%var19 = lshr i16 %var17, 8
|
|
%var20 = trunc i16 %var19 to i8
|
|
%var21 = select i1 %var18, i8 %var6, i8 %var20
|
|
%var22 = zext i8 %var21 to i16
|
|
%var23 = shl nuw i16 %var22, 8
|
|
%var24 = and i16 %var17, 255
|
|
%var25 = or i16 %var23, %var24
|
|
%var26 = select i1 %var18, i64 0, i64 65536
|
|
%var27 = zext i16 %var25 to i64
|
|
%var28 = and i64 %var4, -4294967296
|
|
%var29 = or i64 %var26, %var28
|
|
%var30 = or i64 %var29, %var27
|
|
br label %latch
|
|
|
|
latch: ; preds = %bb14, %bb9
|
|
%var34 = getelementptr inbounds %struct.wobble, ptr %var5, i64 1, i32 0
|
|
%var35 = load ptr, ptr %var34, align 8
|
|
%var36 = icmp eq ptr %var35, null
|
|
%var37 = getelementptr inbounds %struct.zot, ptr %var35, i64 0, i32 2
|
|
%var38 = load ptr, ptr %var37, align 8
|
|
br i1 %var36, label %exit, label %header
|
|
|
|
exit:
|
|
ret i64 %var30
|
|
}
|