
Minor improvement on cc39c3b17fb2598e20ca0854f9fe6d69169d85c7. Use an aligned stack slot to store the shifted value. Use the native register width as shifting unit, so the load of the shift result is aligned. If the shift amount is a multiple of the native register width, there is no need to do a follow-up shift after the load. I added new tests for these cases. Co-authored-by: Gergely Futo <gergely.futo@hightec-rt.com>
218 lines
6.7 KiB
LLVM
218 lines
6.7 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -verify-machineinstrs < %s | FileCheck %s
|
|
target datalayout = "E-m:e-p:32:32-i128:64-n32"
|
|
target triple = "powerpc-ellcc-linux"
|
|
|
|
; Function Attrs: nounwind
|
|
define void @foo1(ptr %a, ptr readonly %b, ptr readonly %c) #0 {
|
|
; CHECK-LABEL: foo1:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: stwu 1, -64(1)
|
|
; CHECK-NEXT: stw 29, 52(1) # 4-byte Folded Spill
|
|
; CHECK-NEXT: li 7, 2048
|
|
; CHECK-NEXT: stw 30, 56(1) # 4-byte Folded Spill
|
|
; CHECK-NEXT: li 6, 0
|
|
; CHECK-NEXT: mtctr 7
|
|
; CHECK-NEXT: addi 7, 1, 16
|
|
; CHECK-NEXT: .LBB0_1: # %for.body
|
|
; CHECK-NEXT: #
|
|
; CHECK-NEXT: lwz 8, 0(4)
|
|
; CHECK-NEXT: lwz 9, 4(4)
|
|
; CHECK-NEXT: lwz 10, 8(4)
|
|
; CHECK-NEXT: lwz 11, 12(4)
|
|
; CHECK-NEXT: lwz 12, 12(5)
|
|
; CHECK-NEXT: stw 6, 44(1)
|
|
; CHECK-NEXT: stw 6, 40(1)
|
|
; CHECK-NEXT: stw 6, 36(1)
|
|
; CHECK-NEXT: stw 6, 32(1)
|
|
; CHECK-NEXT: stw 11, 28(1)
|
|
; CHECK-NEXT: stw 10, 24(1)
|
|
; CHECK-NEXT: clrlwi 10, 12, 27
|
|
; CHECK-NEXT: stw 9, 20(1)
|
|
; CHECK-NEXT: stw 8, 16(1)
|
|
; CHECK-NEXT: rlwinm 8, 12, 29, 28, 29
|
|
; CHECK-NEXT: lwzux 9, 8, 7
|
|
; CHECK-NEXT: subfic 12, 10, 32
|
|
; CHECK-NEXT: lwz 11, 8(8)
|
|
; CHECK-NEXT: slw 9, 9, 10
|
|
; CHECK-NEXT: lwz 0, 4(8)
|
|
; CHECK-NEXT: lwz 8, 12(8)
|
|
; CHECK-NEXT: srw 30, 11, 12
|
|
; CHECK-NEXT: slw 29, 0, 10
|
|
; CHECK-NEXT: srw 0, 0, 12
|
|
; CHECK-NEXT: srw 12, 8, 12
|
|
; CHECK-NEXT: slw 11, 11, 10
|
|
; CHECK-NEXT: slw 8, 8, 10
|
|
; CHECK-NEXT: stw 8, 12(3)
|
|
; CHECK-NEXT: or 8, 11, 12
|
|
; CHECK-NEXT: stw 8, 8(3)
|
|
; CHECK-NEXT: or 8, 9, 0
|
|
; CHECK-NEXT: stw 8, 0(3)
|
|
; CHECK-NEXT: or 8, 29, 30
|
|
; CHECK-NEXT: stw 8, 4(3)
|
|
; CHECK-NEXT: bdnz .LBB0_1
|
|
; CHECK-NEXT: # %bb.2: # %for.end
|
|
; CHECK-NEXT: lwz 30, 56(1) # 4-byte Folded Reload
|
|
; CHECK-NEXT: lwz 29, 52(1) # 4-byte Folded Reload
|
|
; CHECK-NEXT: addi 1, 1, 64
|
|
; CHECK-NEXT: blr
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %for.body, %entry
|
|
%i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
|
%0 = load i128, ptr %b, align 16
|
|
%1 = load i128, ptr %c, align 16
|
|
%shl = shl i128 %0, %1
|
|
store i128 %shl, ptr %a, align 16
|
|
%inc = add nsw i32 %i.02, 1
|
|
%exitcond = icmp eq i32 %inc, 2048
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body
|
|
ret void
|
|
}
|
|
|
|
; Function Attrs: nounwind
|
|
define void @foo2(ptr %a, ptr readonly %b, ptr readonly %c) #0 {
|
|
; CHECK-LABEL: foo2:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: stwu 1, -48(1)
|
|
; CHECK-NEXT: stw 30, 40(1) # 4-byte Folded Spill
|
|
; CHECK-NEXT: li 6, 2048
|
|
; CHECK-NEXT: mtctr 6
|
|
; CHECK-NEXT: addi 6, 1, 24
|
|
; CHECK-NEXT: .LBB1_1: # %for.body
|
|
; CHECK-NEXT: #
|
|
; CHECK-NEXT: lwz 7, 0(4)
|
|
; CHECK-NEXT: lwz 8, 4(4)
|
|
; CHECK-NEXT: lwz 11, 12(5)
|
|
; CHECK-NEXT: lwz 9, 8(4)
|
|
; CHECK-NEXT: lwz 10, 12(4)
|
|
; CHECK-NEXT: stw 8, 28(1)
|
|
; CHECK-NEXT: rlwinm 8, 11, 29, 28, 29
|
|
; CHECK-NEXT: stw 7, 24(1)
|
|
; CHECK-NEXT: srawi 7, 7, 31
|
|
; CHECK-NEXT: stw 10, 36(1)
|
|
; CHECK-NEXT: clrlwi 10, 11, 27
|
|
; CHECK-NEXT: stw 9, 32(1)
|
|
; CHECK-NEXT: subfic 12, 10, 32
|
|
; CHECK-NEXT: stw 7, 20(1)
|
|
; CHECK-NEXT: stw 7, 16(1)
|
|
; CHECK-NEXT: stw 7, 12(1)
|
|
; CHECK-NEXT: stw 7, 8(1)
|
|
; CHECK-NEXT: sub 7, 6, 8
|
|
; CHECK-NEXT: lwz 8, 4(7)
|
|
; CHECK-NEXT: lwz 9, 0(7)
|
|
; CHECK-NEXT: lwz 11, 12(7)
|
|
; CHECK-NEXT: srw 0, 8, 10
|
|
; CHECK-NEXT: lwz 7, 8(7)
|
|
; CHECK-NEXT: slw 30, 9, 12
|
|
; CHECK-NEXT: slw 8, 8, 12
|
|
; CHECK-NEXT: srw 11, 11, 10
|
|
; CHECK-NEXT: slw 12, 7, 12
|
|
; CHECK-NEXT: srw 7, 7, 10
|
|
; CHECK-NEXT: or 7, 8, 7
|
|
; CHECK-NEXT: stw 7, 8(3)
|
|
; CHECK-NEXT: or 7, 12, 11
|
|
; CHECK-NEXT: sraw 9, 9, 10
|
|
; CHECK-NEXT: stw 7, 12(3)
|
|
; CHECK-NEXT: or 7, 30, 0
|
|
; CHECK-NEXT: stw 9, 0(3)
|
|
; CHECK-NEXT: stw 7, 4(3)
|
|
; CHECK-NEXT: bdnz .LBB1_1
|
|
; CHECK-NEXT: # %bb.2: # %for.end
|
|
; CHECK-NEXT: lwz 30, 40(1) # 4-byte Folded Reload
|
|
; CHECK-NEXT: addi 1, 1, 48
|
|
; CHECK-NEXT: blr
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %for.body, %entry
|
|
%i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
|
%0 = load i128, ptr %b, align 16
|
|
%1 = load i128, ptr %c, align 16
|
|
%shl = ashr i128 %0, %1
|
|
store i128 %shl, ptr %a, align 16
|
|
%inc = add nsw i32 %i.02, 1
|
|
%exitcond = icmp eq i32 %inc, 2048
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body
|
|
ret void
|
|
}
|
|
|
|
; Function Attrs: nounwind
|
|
define void @foo3(ptr %a, ptr readonly %b, ptr readonly %c) #0 {
|
|
; CHECK-LABEL: foo3:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: stwu 1, -64(1)
|
|
; CHECK-NEXT: stw 29, 52(1) # 4-byte Folded Spill
|
|
; CHECK-NEXT: li 7, 2048
|
|
; CHECK-NEXT: stw 30, 56(1) # 4-byte Folded Spill
|
|
; CHECK-NEXT: li 6, 0
|
|
; CHECK-NEXT: mtctr 7
|
|
; CHECK-NEXT: addi 7, 1, 32
|
|
; CHECK-NEXT: .LBB2_1: # %for.body
|
|
; CHECK-NEXT: #
|
|
; CHECK-NEXT: lwz 8, 0(4)
|
|
; CHECK-NEXT: lwz 12, 12(5)
|
|
; CHECK-NEXT: lwz 9, 4(4)
|
|
; CHECK-NEXT: lwz 10, 8(4)
|
|
; CHECK-NEXT: lwz 11, 12(4)
|
|
; CHECK-NEXT: stw 8, 32(1)
|
|
; CHECK-NEXT: rlwinm 8, 12, 29, 28, 29
|
|
; CHECK-NEXT: stw 6, 28(1)
|
|
; CHECK-NEXT: sub 8, 7, 8
|
|
; CHECK-NEXT: stw 6, 24(1)
|
|
; CHECK-NEXT: stw 6, 20(1)
|
|
; CHECK-NEXT: stw 6, 16(1)
|
|
; CHECK-NEXT: stw 11, 44(1)
|
|
; CHECK-NEXT: clrlwi 11, 12, 27
|
|
; CHECK-NEXT: stw 10, 40(1)
|
|
; CHECK-NEXT: subfic 0, 11, 32
|
|
; CHECK-NEXT: stw 9, 36(1)
|
|
; CHECK-NEXT: lwz 9, 4(8)
|
|
; CHECK-NEXT: lwz 10, 0(8)
|
|
; CHECK-NEXT: lwz 12, 12(8)
|
|
; CHECK-NEXT: srw 30, 9, 11
|
|
; CHECK-NEXT: lwz 8, 8(8)
|
|
; CHECK-NEXT: slw 29, 10, 0
|
|
; CHECK-NEXT: slw 9, 9, 0
|
|
; CHECK-NEXT: srw 12, 12, 11
|
|
; CHECK-NEXT: slw 0, 8, 0
|
|
; CHECK-NEXT: srw 8, 8, 11
|
|
; CHECK-NEXT: or 8, 9, 8
|
|
; CHECK-NEXT: stw 8, 8(3)
|
|
; CHECK-NEXT: or 8, 0, 12
|
|
; CHECK-NEXT: srw 10, 10, 11
|
|
; CHECK-NEXT: stw 8, 12(3)
|
|
; CHECK-NEXT: or 8, 29, 30
|
|
; CHECK-NEXT: stw 10, 0(3)
|
|
; CHECK-NEXT: stw 8, 4(3)
|
|
; CHECK-NEXT: bdnz .LBB2_1
|
|
; CHECK-NEXT: # %bb.2: # %for.end
|
|
; CHECK-NEXT: lwz 30, 56(1) # 4-byte Folded Reload
|
|
; CHECK-NEXT: lwz 29, 52(1) # 4-byte Folded Reload
|
|
; CHECK-NEXT: addi 1, 1, 64
|
|
; CHECK-NEXT: blr
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %for.body, %entry
|
|
%i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
|
%0 = load i128, ptr %b, align 16
|
|
%1 = load i128, ptr %c, align 16
|
|
%shl = lshr i128 %0, %1
|
|
store i128 %shl, ptr %a, align 16
|
|
%inc = add nsw i32 %i.02, 1
|
|
%exitcond = icmp eq i32 %inc, 2048
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body
|
|
ret void
|
|
}
|
|
|
|
attributes #0 = { nounwind }
|
|
|