QingShan Zhang 4bd186c0ff [PowerPC] Exploit the rldicl + rldicl when and with mask
If we are and the constant like 0xFFFFFFC00000, for now, we are using several
instructions to generate this 48bit constant and final an "and". However, we
could exploit it with two rotate instructions.

       MB          ME               MB+63-ME
+----------------------+     +----------------------+
|0000001111111111111000| ->  |0000000001111111111111|
+----------------------+     +----------------------+
 0                    63      0                    63
Rotate left ME + 1 bit first, and then, mask it with (MB + 63 - ME, 63),
finally, rotate back. Notice that, we need to round it with 64 bit for the
wrapping case.

Reviewed by: ChenZheng, Nemanjai

Differential Revision: https://reviews.llvm.org/D71831
2020-04-17 05:24:00 +00:00

511 lines
13 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -ppc-gpr-icmps=all -verify-machineinstrs -mtriple=powerpc64le-unknown-unknown -ppc-convert-rr-to-ri=true | FileCheck %s
define zeroext i1 @all_bits_clear(i32 %P, i32 %Q) {
; CHECK-LABEL: all_bits_clear:
; CHECK: # %bb.0:
; CHECK-NEXT: or 3, 3, 4
; CHECK-NEXT: cntlzw 3, 3
; CHECK-NEXT: srwi 3, 3, 5
; CHECK-NEXT: blr
%a = icmp eq i32 %P, 0
%b = icmp eq i32 %Q, 0
%c = and i1 %a, %b
ret i1 %c
}
define zeroext i1 @all_sign_bits_clear(i32 %P, i32 %Q) {
; CHECK-LABEL: all_sign_bits_clear:
; CHECK: # %bb.0:
; CHECK-NEXT: or 3, 3, 4
; CHECK-NEXT: nor 3, 3, 3
; CHECK-NEXT: srwi 3, 3, 31
; CHECK-NEXT: blr
%a = icmp sgt i32 %P, -1
%b = icmp sgt i32 %Q, -1
%c = and i1 %a, %b
ret i1 %c
}
define zeroext i1 @all_bits_set(i32 %P, i32 %Q) {
; CHECK-LABEL: all_bits_set:
; CHECK: # %bb.0:
; CHECK-NEXT: li 5, -1
; CHECK-NEXT: and 3, 3, 4
; CHECK-NEXT: xor 3, 3, 5
; CHECK-NEXT: cntlzw 3, 3
; CHECK-NEXT: srwi 3, 3, 5
; CHECK-NEXT: blr
%a = icmp eq i32 %P, -1
%b = icmp eq i32 %Q, -1
%c = and i1 %a, %b
ret i1 %c
}
define zeroext i1 @all_sign_bits_set(i32 %P, i32 %Q) {
; CHECK-LABEL: all_sign_bits_set:
; CHECK: # %bb.0:
; CHECK-NEXT: and 3, 3, 4
; CHECK-NEXT: srwi 3, 3, 31
; CHECK-NEXT: blr
%a = icmp slt i32 %P, 0
%b = icmp slt i32 %Q, 0
%c = and i1 %a, %b
ret i1 %c
}
define zeroext i1 @any_bits_set(i32 %P, i32 %Q) {
; CHECK-LABEL: any_bits_set:
; CHECK: # %bb.0:
; CHECK-NEXT: or 3, 3, 4
; CHECK-NEXT: cntlzw 3, 3
; CHECK-NEXT: srwi 3, 3, 5
; CHECK-NEXT: xori 3, 3, 1
; CHECK-NEXT: blr
%a = icmp ne i32 %P, 0
%b = icmp ne i32 %Q, 0
%c = or i1 %a, %b
ret i1 %c
}
define zeroext i1 @any_sign_bits_set(i32 %P, i32 %Q) {
; CHECK-LABEL: any_sign_bits_set:
; CHECK: # %bb.0:
; CHECK-NEXT: or 3, 3, 4
; CHECK-NEXT: srwi 3, 3, 31
; CHECK-NEXT: blr
%a = icmp slt i32 %P, 0
%b = icmp slt i32 %Q, 0
%c = or i1 %a, %b
ret i1 %c
}
define zeroext i1 @any_bits_clear(i32 %P, i32 %Q) {
; CHECK-LABEL: any_bits_clear:
; CHECK: # %bb.0:
; CHECK-NEXT: li 5, -1
; CHECK-NEXT: and 3, 3, 4
; CHECK-NEXT: xor 3, 3, 5
; CHECK-NEXT: cntlzw 3, 3
; CHECK-NEXT: srwi 3, 3, 5
; CHECK-NEXT: xori 3, 3, 1
; CHECK-NEXT: blr
%a = icmp ne i32 %P, -1
%b = icmp ne i32 %Q, -1
%c = or i1 %a, %b
ret i1 %c
}
define zeroext i1 @any_sign_bits_clear(i32 %P, i32 %Q) {
; CHECK-LABEL: any_sign_bits_clear:
; CHECK: # %bb.0:
; CHECK-NEXT: and 3, 3, 4
; CHECK-NEXT: nor 3, 3, 3
; CHECK-NEXT: srwi 3, 3, 31
; CHECK-NEXT: blr
%a = icmp sgt i32 %P, -1
%b = icmp sgt i32 %Q, -1
%c = or i1 %a, %b
ret i1 %c
}
; PR3351 - (P == 0) & (Q == 0) -> (P|Q) == 0
define i32 @all_bits_clear_branch(i32* %P, i32* %Q) {
; CHECK-LABEL: all_bits_clear_branch:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: or. 3, 3, 4
; CHECK-NEXT: bne 0, .LBB8_2
; CHECK-NEXT: # %bb.1: # %bb1
; CHECK-NEXT: li 3, 4
; CHECK-NEXT: blr
; CHECK-NEXT: .LBB8_2: # %return
; CHECK-NEXT: li 3, 192
; CHECK-NEXT: blr
entry:
%a = icmp eq i32* %P, null
%b = icmp eq i32* %Q, null
%c = and i1 %a, %b
br i1 %c, label %bb1, label %return
bb1:
ret i32 4
return:
ret i32 192
}
define i32 @all_sign_bits_clear_branch(i32 %P, i32 %Q) {
; CHECK-LABEL: all_sign_bits_clear_branch:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: or 3, 3, 4
; CHECK-NEXT: cmpwi 3, 0
; CHECK-NEXT: blt 0, .LBB9_2
; CHECK-NEXT: # %bb.1: # %bb1
; CHECK-NEXT: li 3, 4
; CHECK-NEXT: blr
; CHECK-NEXT: .LBB9_2: # %return
; CHECK-NEXT: li 3, 192
; CHECK-NEXT: blr
entry:
%a = icmp sgt i32 %P, -1
%b = icmp sgt i32 %Q, -1
%c = and i1 %a, %b
br i1 %c, label %bb1, label %return
bb1:
ret i32 4
return:
ret i32 192
}
define i32 @all_bits_set_branch(i32 %P, i32 %Q) {
; CHECK-LABEL: all_bits_set_branch:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: and 3, 3, 4
; CHECK-NEXT: cmpwi 3, -1
; CHECK-NEXT: bne 0, .LBB10_2
; CHECK-NEXT: # %bb.1: # %bb1
; CHECK-NEXT: li 3, 4
; CHECK-NEXT: blr
; CHECK-NEXT: .LBB10_2: # %return
; CHECK-NEXT: li 3, 192
; CHECK-NEXT: blr
entry:
%a = icmp eq i32 %P, -1
%b = icmp eq i32 %Q, -1
%c = and i1 %a, %b
br i1 %c, label %bb1, label %return
bb1:
ret i32 4
return:
ret i32 192
}
define i32 @all_sign_bits_set_branch(i32 %P, i32 %Q) {
; CHECK-LABEL: all_sign_bits_set_branch:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: and 3, 3, 4
; CHECK-NEXT: cmpwi 3, -1
; CHECK-NEXT: bgt 0, .LBB11_2
; CHECK-NEXT: # %bb.1: # %bb1
; CHECK-NEXT: li 3, 4
; CHECK-NEXT: blr
; CHECK-NEXT: .LBB11_2: # %return
; CHECK-NEXT: li 3, 192
; CHECK-NEXT: blr
entry:
%a = icmp slt i32 %P, 0
%b = icmp slt i32 %Q, 0
%c = and i1 %a, %b
br i1 %c, label %bb1, label %return
bb1:
ret i32 4
return:
ret i32 192
}
; PR3351 - (P != 0) | (Q != 0) -> (P|Q) != 0
define i32 @any_bits_set_branch(i32* %P, i32* %Q) {
; CHECK-LABEL: any_bits_set_branch:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: or. 3, 3, 4
; CHECK-NEXT: beq 0, .LBB12_2
; CHECK-NEXT: # %bb.1: # %bb1
; CHECK-NEXT: li 3, 4
; CHECK-NEXT: blr
; CHECK-NEXT: .LBB12_2: # %return
; CHECK-NEXT: li 3, 192
; CHECK-NEXT: blr
entry:
%a = icmp ne i32* %P, null
%b = icmp ne i32* %Q, null
%c = or i1 %a, %b
br i1 %c, label %bb1, label %return
bb1:
ret i32 4
return:
ret i32 192
}
define i32 @any_sign_bits_set_branch(i32 %P, i32 %Q) {
; CHECK-LABEL: any_sign_bits_set_branch:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: or 3, 3, 4
; CHECK-NEXT: cmpwi 3, -1
; CHECK-NEXT: bgt 0, .LBB13_2
; CHECK-NEXT: # %bb.1: # %bb1
; CHECK-NEXT: li 3, 4
; CHECK-NEXT: blr
; CHECK-NEXT: .LBB13_2: # %return
; CHECK-NEXT: li 3, 192
; CHECK-NEXT: blr
entry:
%a = icmp slt i32 %P, 0
%b = icmp slt i32 %Q, 0
%c = or i1 %a, %b
br i1 %c, label %bb1, label %return
bb1:
ret i32 4
return:
ret i32 192
}
define i32 @any_bits_clear_branch(i32 %P, i32 %Q) {
; CHECK-LABEL: any_bits_clear_branch:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: and 3, 3, 4
; CHECK-NEXT: cmpwi 3, -1
; CHECK-NEXT: beq 0, .LBB14_2
; CHECK-NEXT: # %bb.1: # %bb1
; CHECK-NEXT: li 3, 4
; CHECK-NEXT: blr
; CHECK-NEXT: .LBB14_2: # %return
; CHECK-NEXT: li 3, 192
; CHECK-NEXT: blr
entry:
%a = icmp ne i32 %P, -1
%b = icmp ne i32 %Q, -1
%c = or i1 %a, %b
br i1 %c, label %bb1, label %return
bb1:
ret i32 4
return:
ret i32 192
}
define i32 @any_sign_bits_clear_branch(i32 %P, i32 %Q) {
; CHECK-LABEL: any_sign_bits_clear_branch:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: and 3, 3, 4
; CHECK-NEXT: cmpwi 3, 0
; CHECK-NEXT: blt 0, .LBB15_2
; CHECK-NEXT: # %bb.1: # %bb1
; CHECK-NEXT: li 3, 4
; CHECK-NEXT: blr
; CHECK-NEXT: .LBB15_2: # %return
; CHECK-NEXT: li 3, 192
; CHECK-NEXT: blr
entry:
%a = icmp sgt i32 %P, -1
%b = icmp sgt i32 %Q, -1
%c = or i1 %a, %b
br i1 %c, label %bb1, label %return
bb1:
ret i32 4
return:
ret i32 192
}
define <4 x i1> @all_bits_clear_vec(<4 x i32> %P, <4 x i32> %Q) {
; CHECK-LABEL: all_bits_clear_vec:
; CHECK: # %bb.0:
; CHECK-NEXT: xxlxor 36, 36, 36
; CHECK-NEXT: xxlor 34, 34, 35
; CHECK-NEXT: vcmpequw 2, 2, 4
; CHECK-NEXT: blr
%a = icmp eq <4 x i32> %P, zeroinitializer
%b = icmp eq <4 x i32> %Q, zeroinitializer
%c = and <4 x i1> %a, %b
ret <4 x i1> %c
}
define <4 x i1> @all_sign_bits_clear_vec(<4 x i32> %P, <4 x i32> %Q) {
; CHECK-LABEL: all_sign_bits_clear_vec:
; CHECK: # %bb.0:
; CHECK-NEXT: xxleqv 36, 36, 36
; CHECK-NEXT: xxlor 34, 34, 35
; CHECK-NEXT: vcmpgtsw 2, 2, 4
; CHECK-NEXT: blr
%a = icmp sgt <4 x i32> %P, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = icmp sgt <4 x i32> %Q, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = and <4 x i1> %a, %b
ret <4 x i1> %c
}
define <4 x i1> @all_bits_set_vec(<4 x i32> %P, <4 x i32> %Q) {
; CHECK-LABEL: all_bits_set_vec:
; CHECK: # %bb.0:
; CHECK-NEXT: xxleqv 36, 36, 36
; CHECK-NEXT: xxland 34, 34, 35
; CHECK-NEXT: vcmpequw 2, 2, 4
; CHECK-NEXT: blr
%a = icmp eq <4 x i32> %P, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = icmp eq <4 x i32> %Q, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = and <4 x i1> %a, %b
ret <4 x i1> %c
}
define <4 x i1> @all_sign_bits_set_vec(<4 x i32> %P, <4 x i32> %Q) {
; CHECK-LABEL: all_sign_bits_set_vec:
; CHECK: # %bb.0:
; CHECK-NEXT: xxlxor 36, 36, 36
; CHECK-NEXT: xxland 34, 34, 35
; CHECK-NEXT: vcmpgtsw 2, 4, 2
; CHECK-NEXT: blr
%a = icmp slt <4 x i32> %P, zeroinitializer
%b = icmp slt <4 x i32> %Q, zeroinitializer
%c = and <4 x i1> %a, %b
ret <4 x i1> %c
}
define <4 x i1> @any_bits_set_vec(<4 x i32> %P, <4 x i32> %Q) {
; CHECK-LABEL: any_bits_set_vec:
; CHECK: # %bb.0:
; CHECK-NEXT: xxlxor 36, 36, 36
; CHECK-NEXT: xxlor 34, 34, 35
; CHECK-NEXT: vcmpequw 2, 2, 4
; CHECK-NEXT: xxlnor 34, 34, 34
; CHECK-NEXT: blr
%a = icmp ne <4 x i32> %P, zeroinitializer
%b = icmp ne <4 x i32> %Q, zeroinitializer
%c = or <4 x i1> %a, %b
ret <4 x i1> %c
}
define <4 x i1> @any_sign_bits_set_vec(<4 x i32> %P, <4 x i32> %Q) {
; CHECK-LABEL: any_sign_bits_set_vec:
; CHECK: # %bb.0:
; CHECK-NEXT: xxlxor 36, 36, 36
; CHECK-NEXT: xxlor 34, 34, 35
; CHECK-NEXT: vcmpgtsw 2, 4, 2
; CHECK-NEXT: blr
%a = icmp slt <4 x i32> %P, zeroinitializer
%b = icmp slt <4 x i32> %Q, zeroinitializer
%c = or <4 x i1> %a, %b
ret <4 x i1> %c
}
define <4 x i1> @any_bits_clear_vec(<4 x i32> %P, <4 x i32> %Q) {
; CHECK-LABEL: any_bits_clear_vec:
; CHECK: # %bb.0:
; CHECK-NEXT: xxleqv 36, 36, 36
; CHECK-NEXT: xxland 34, 34, 35
; CHECK-NEXT: vcmpequw 2, 2, 4
; CHECK-NEXT: xxlnor 34, 34, 34
; CHECK-NEXT: blr
%a = icmp ne <4 x i32> %P, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = icmp ne <4 x i32> %Q, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = or <4 x i1> %a, %b
ret <4 x i1> %c
}
define <4 x i1> @any_sign_bits_clear_vec(<4 x i32> %P, <4 x i32> %Q) {
; CHECK-LABEL: any_sign_bits_clear_vec:
; CHECK: # %bb.0:
; CHECK-NEXT: xxleqv 36, 36, 36
; CHECK-NEXT: xxland 34, 34, 35
; CHECK-NEXT: vcmpgtsw 2, 2, 4
; CHECK-NEXT: blr
%a = icmp sgt <4 x i32> %P, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = icmp sgt <4 x i32> %Q, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = or <4 x i1> %a, %b
ret <4 x i1> %c
}
define zeroext i1 @ne_neg1_and_ne_zero(i64 %x) {
; CHECK-LABEL: ne_neg1_and_ne_zero:
; CHECK: # %bb.0:
; CHECK-NEXT: addi 3, 3, 1
; CHECK-NEXT: li 4, 1
; CHECK-NEXT: subfic 3, 3, 1
; CHECK-NEXT: subfe 3, 4, 4
; CHECK-NEXT: neg 3, 3
; CHECK-NEXT: blr
%cmp1 = icmp ne i64 %x, -1
%cmp2 = icmp ne i64 %x, 0
%and = and i1 %cmp1, %cmp2
ret i1 %and
}
; PR32401 - https://bugs.llvm.org/show_bug.cgi?id=32401
define zeroext i1 @and_eq(i16 zeroext %a, i16 zeroext %b, i16 zeroext %c, i16 zeroext %d) {
; CHECK-LABEL: and_eq:
; CHECK: # %bb.0:
; CHECK-NEXT: xor 3, 3, 4
; CHECK-NEXT: xor 4, 5, 6
; CHECK-NEXT: or 3, 3, 4
; CHECK-NEXT: cntlzw 3, 3
; CHECK-NEXT: srwi 3, 3, 5
; CHECK-NEXT: blr
%cmp1 = icmp eq i16 %a, %b
%cmp2 = icmp eq i16 %c, %d
%and = and i1 %cmp1, %cmp2
ret i1 %and
}
define zeroext i1 @or_ne(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: or_ne:
; CHECK: # %bb.0:
; CHECK-NEXT: xor 3, 3, 4
; CHECK-NEXT: xor 4, 5, 6
; CHECK-NEXT: or 3, 3, 4
; CHECK-NEXT: cntlzw 3, 3
; CHECK-NEXT: srwi 3, 3, 5
; CHECK-NEXT: xori 3, 3, 1
; CHECK-NEXT: blr
%cmp1 = icmp ne i32 %a, %b
%cmp2 = icmp ne i32 %c, %d
%or = or i1 %cmp1, %cmp2
ret i1 %or
}
; This should not be transformed because vector compares + bitwise logic are faster.
define <4 x i1> @and_eq_vec(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) {
; CHECK-LABEL: and_eq_vec:
; CHECK: # %bb.0:
; CHECK-NEXT: vcmpequw 2, 2, 3
; CHECK-NEXT: vcmpequw 3, 4, 5
; CHECK-NEXT: xxland 34, 34, 35
; CHECK-NEXT: blr
%cmp1 = icmp eq <4 x i32> %a, %b
%cmp2 = icmp eq <4 x i32> %c, %d
%and = and <4 x i1> %cmp1, %cmp2
ret <4 x i1> %and
}
define i1 @or_icmps_const_1bit_diff(i64 %x) {
; CHECK-LABEL: or_icmps_const_1bit_diff:
; CHECK: # %bb.0:
; CHECK-NEXT: addi 3, 3, -13
; CHECK-NEXT: rldicl 3, 3, 61, 1
; CHECK-NEXT: rotldi 3, 3, 3
; CHECK-NEXT: cntlzd 3, 3
; CHECK-NEXT: rldicl 3, 3, 58, 63
; CHECK-NEXT: blr
%a = icmp eq i64 %x, 17
%b = icmp eq i64 %x, 13
%r = or i1 %a, %b
ret i1 %r
}
define i1 @and_icmps_const_1bit_diff(i32 %x) {
; CHECK-LABEL: and_icmps_const_1bit_diff:
; CHECK: # %bb.0:
; CHECK-NEXT: addi 3, 3, -4625
; CHECK-NEXT: rlwinm 3, 3, 0, 28, 26
; CHECK-NEXT: cntlzw 3, 3
; CHECK-NEXT: nor 3, 3, 3
; CHECK-NEXT: rlwinm 3, 3, 27, 31, 31
; CHECK-NEXT: blr
%a = icmp ne i32 %x, 4625
%b = icmp ne i32 %x, 4641
%r = and i1 %a, %b
ret i1 %r
}