llvm-project/llvm/test/CodeGen/X86/combine-and.ll
Roman Lebedev 0aef747b84
[NFC][X86][Codegen] Megacommit: mass-regenerate all check lines that were already autogenerated
The motivation is that the update script has at least two deviations
(`<...>@GOT`/`<...>@PLT`/ and not hiding pointer arithmetics) from
what pretty much all the checklines were generated with,
and most of the tests are still not updated, so each time one of the
non-up-to-date tests is updated to see the effect of the code change,
there is a lot of noise. Instead of having to deal with that each
time, let's just deal with everything at once.

This has been done via:
```
cd llvm-project/llvm/test/CodeGen/X86
grep -rl "; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py" | xargs -L1 <...>/llvm-project/llvm/utils/update_llc_test_checks.py --llc-binary <...>/llvm-project/build/bin/llc
```

Not all tests were regenerated, however.
2021-06-11 23:57:02 +03:00

313 lines
9.1 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 < %s | FileCheck %s
define i32 @and_self(i32 %x) {
; CHECK-LABEL: and_self:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: retq
%and = and i32 %x, %x
ret i32 %and
}
define <4 x i32> @and_self_vec(<4 x i32> %x) {
; CHECK-LABEL: and_self_vec:
; CHECK: # %bb.0:
; CHECK-NEXT: retq
%and = and <4 x i32> %x, %x
ret <4 x i32> %and
}
;
; Verify that the DAGCombiner is able to fold a vector AND into a blend
; if one of the operands to the AND is a vector of all constants, and each
; constant element is either zero or all-ones.
;
define <4 x i32> @test1(<4 x i32> %A) {
; CHECK-LABEL: test1:
; CHECK: # %bb.0:
; CHECK-NEXT: xorps %xmm1, %xmm1
; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; CHECK-NEXT: retq
%1 = and <4 x i32> %A, <i32 -1, i32 0, i32 0, i32 0>
ret <4 x i32> %1
}
define <4 x i32> @test2(<4 x i32> %A) {
; CHECK-LABEL: test2:
; CHECK: # %bb.0:
; CHECK-NEXT: xorps %xmm1, %xmm1
; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
; CHECK-NEXT: retq
%1 = and <4 x i32> %A, <i32 0, i32 -1, i32 0, i32 0>
ret <4 x i32> %1
}
define <4 x i32> @test3(<4 x i32> %A) {
; CHECK-LABEL: test3:
; CHECK: # %bb.0:
; CHECK-NEXT: xorps %xmm1, %xmm1
; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2],xmm1[3]
; CHECK-NEXT: retq
%1 = and <4 x i32> %A, <i32 0, i32 0, i32 -1, i32 0>
ret <4 x i32> %1
}
define <4 x i32> @test4(<4 x i32> %A) {
; CHECK-LABEL: test4:
; CHECK: # %bb.0:
; CHECK-NEXT: xorps %xmm1, %xmm1
; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
; CHECK-NEXT: retq
%1 = and <4 x i32> %A, <i32 0, i32 0, i32 0, i32 -1>
ret <4 x i32> %1
}
define <4 x i32> @test5(<4 x i32> %A) {
; CHECK-LABEL: test5:
; CHECK: # %bb.0:
; CHECK-NEXT: xorps %xmm1, %xmm1
; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
; CHECK-NEXT: retq
%1 = and <4 x i32> %A, <i32 -1, i32 0, i32 -1, i32 0>
ret <4 x i32> %1
}
define <4 x i32> @test6(<4 x i32> %A) {
; CHECK-LABEL: test6:
; CHECK: # %bb.0:
; CHECK-NEXT: xorps %xmm1, %xmm1
; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3]
; CHECK-NEXT: retq
%1 = and <4 x i32> %A, <i32 0, i32 -1, i32 0, i32 -1>
ret <4 x i32> %1
}
define <4 x i32> @test7(<4 x i32> %A) {
; CHECK-LABEL: test7:
; CHECK: # %bb.0:
; CHECK-NEXT: xorps %xmm1, %xmm1
; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; CHECK-NEXT: retq
%1 = and <4 x i32> %A, <i32 0, i32 0, i32 -1, i32 -1>
ret <4 x i32> %1
}
define <4 x i32> @test8(<4 x i32> %A) {
; CHECK-LABEL: test8:
; CHECK: # %bb.0:
; CHECK-NEXT: xorps %xmm1, %xmm1
; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3]
; CHECK-NEXT: retq
%1 = and <4 x i32> %A, <i32 -1, i32 0, i32 0, i32 -1>
ret <4 x i32> %1
}
define <4 x i32> @test9(<4 x i32> %A) {
; CHECK-LABEL: test9:
; CHECK: # %bb.0:
; CHECK-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
; CHECK-NEXT: retq
%1 = and <4 x i32> %A, <i32 -1, i32 -1, i32 0, i32 0>
ret <4 x i32> %1
}
define <4 x i32> @test10(<4 x i32> %A) {
; CHECK-LABEL: test10:
; CHECK: # %bb.0:
; CHECK-NEXT: xorps %xmm1, %xmm1
; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3]
; CHECK-NEXT: retq
%1 = and <4 x i32> %A, <i32 0, i32 -1, i32 -1, i32 0>
ret <4 x i32> %1
}
define <4 x i32> @test11(<4 x i32> %A) {
; CHECK-LABEL: test11:
; CHECK: # %bb.0:
; CHECK-NEXT: xorps %xmm1, %xmm1
; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; CHECK-NEXT: retq
%1 = and <4 x i32> %A, <i32 0, i32 -1, i32 -1, i32 -1>
ret <4 x i32> %1
}
define <4 x i32> @test12(<4 x i32> %A) {
; CHECK-LABEL: test12:
; CHECK: # %bb.0:
; CHECK-NEXT: xorps %xmm1, %xmm1
; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
; CHECK-NEXT: retq
%1 = and <4 x i32> %A, <i32 -1, i32 -1, i32 -1, i32 0>
ret <4 x i32> %1
}
define <4 x i32> @test13(<4 x i32> %A) {
; CHECK-LABEL: test13:
; CHECK: # %bb.0:
; CHECK-NEXT: xorps %xmm1, %xmm1
; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
; CHECK-NEXT: retq
%1 = and <4 x i32> %A, <i32 -1, i32 -1, i32 0, i32 -1>
ret <4 x i32> %1
}
define <4 x i32> @test14(<4 x i32> %A) {
; CHECK-LABEL: test14:
; CHECK: # %bb.0:
; CHECK-NEXT: xorps %xmm1, %xmm1
; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
; CHECK-NEXT: retq
%1 = and <4 x i32> %A, <i32 -1, i32 0, i32 -1, i32 -1>
ret <4 x i32> %1
}
; X & undef must fold to 0. So lane 0 must choose from the zero vector.
define <4 x i32> @undef_lane(<4 x i32> %x) {
; CHECK-LABEL: undef_lane:
; CHECK: # %bb.0:
; CHECK-NEXT: xorps %xmm1, %xmm1
; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3]
; CHECK-NEXT: retq
%r = and <4 x i32> %x, <i32 undef, i32 4294967295, i32 0, i32 4294967295>
ret <4 x i32> %r
}
define <4 x i32> @test15(<4 x i32> %A, <4 x i32> %B) {
; CHECK-LABEL: test15:
; CHECK: # %bb.0:
; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
; CHECK-NEXT: retq
%1 = and <4 x i32> %A, <i32 -1, i32 0, i32 -1, i32 -1>
%2 = and <4 x i32> %B, <i32 0, i32 -1, i32 0, i32 0>
%3 = or <4 x i32> %1, %2
ret <4 x i32> %3
}
define <4 x i32> @test16(<4 x i32> %A, <4 x i32> %B) {
; CHECK-LABEL: test16:
; CHECK: # %bb.0:
; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
; CHECK-NEXT: retq
%1 = and <4 x i32> %A, <i32 -1, i32 0, i32 -1, i32 0>
%2 = and <4 x i32> %B, <i32 0, i32 -1, i32 0, i32 -1>
%3 = or <4 x i32> %1, %2
ret <4 x i32> %3
}
define <4 x i32> @test17(<4 x i32> %A, <4 x i32> %B) {
; CHECK-LABEL: test17:
; CHECK: # %bb.0:
; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3]
; CHECK-NEXT: retq
%1 = and <4 x i32> %A, <i32 0, i32 -1, i32 0, i32 -1>
%2 = and <4 x i32> %B, <i32 -1, i32 0, i32 -1, i32 0>
%3 = or <4 x i32> %1, %2
ret <4 x i32> %3
}
;
; fold (and (or x, C), D) -> D if (C & D) == D
;
define <2 x i64> @and_or_v2i64(<2 x i64> %a0) {
; CHECK-LABEL: and_or_v2i64:
; CHECK: # %bb.0:
; CHECK-NEXT: movaps {{.*#+}} xmm0 = [8,8]
; CHECK-NEXT: retq
%1 = or <2 x i64> %a0, <i64 255, i64 255>
%2 = and <2 x i64> %1, <i64 8, i64 8>
ret <2 x i64> %2
}
define <4 x i32> @and_or_v4i32(<4 x i32> %a0) {
; CHECK-LABEL: and_or_v4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: movaps {{.*#+}} xmm0 = [3,3,3,3]
; CHECK-NEXT: retq
%1 = or <4 x i32> %a0, <i32 15, i32 15, i32 15, i32 15>
%2 = and <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3>
ret <4 x i32> %2
}
define <8 x i16> @and_or_v8i16(<8 x i16> %a0) {
; CHECK-LABEL: and_or_v8i16:
; CHECK: # %bb.0:
; CHECK-NEXT: movaps {{.*#+}} xmm0 = [15,7,3,1,14,10,2,32767]
; CHECK-NEXT: retq
%1 = or <8 x i16> %a0, <i16 255, i16 127, i16 63, i16 31, i16 15, i16 31, i16 63, i16 -1>
%2 = and <8 x i16> %1, <i16 15, i16 7, i16 3, i16 1, i16 14, i16 10, i16 2, i16 32767>
ret <8 x i16> %2
}
;
; known bits folding
;
define <2 x i64> @and_or_zext_v2i32(<2 x i32> %a0) {
; CHECK-LABEL: and_or_zext_v2i32:
; CHECK: # %bb.0:
; CHECK-NEXT: xorps %xmm0, %xmm0
; CHECK-NEXT: retq
%1 = zext <2 x i32> %a0 to <2 x i64>
%2 = or <2 x i64> %1, <i64 1, i64 1>
%3 = and <2 x i64> %2, <i64 4294967296, i64 4294967296>
ret <2 x i64> %3
}
define <4 x i32> @and_or_zext_v4i16(<4 x i16> %a0) {
; CHECK-LABEL: and_or_zext_v4i16:
; CHECK: # %bb.0:
; CHECK-NEXT: xorps %xmm0, %xmm0
; CHECK-NEXT: retq
%1 = zext <4 x i16> %a0 to <4 x i32>
%2 = or <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
%3 = and <4 x i32> %2, <i32 65536, i32 65536, i32 65536, i32 65536>
ret <4 x i32> %3
}
;
; known sign bits folding
;
define <8 x i16> @ashr_mask1_v8i16(<8 x i16> %a0) {
; CHECK-LABEL: ashr_mask1_v8i16:
; CHECK: # %bb.0:
; CHECK-NEXT: psrlw $15, %xmm0
; CHECK-NEXT: retq
%1 = ashr <8 x i16> %a0, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
%2 = and <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
ret <8 x i16> %2
}
define <4 x i32> @ashr_mask7_v4i32(<4 x i32> %a0) {
; CHECK-LABEL: ashr_mask7_v4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: psrad $31, %xmm0
; CHECK-NEXT: psrld $29, %xmm0
; CHECK-NEXT: retq
%1 = ashr <4 x i32> %a0, <i32 31, i32 31, i32 31, i32 31>
%2 = and <4 x i32> %1, <i32 7, i32 7, i32 7, i32 7>
ret <4 x i32> %2
}
;
; SimplifyDemandedBits
;
; PR34620 - redundant PAND after vector shift of a byte vector (PSRLW)
define <16 x i8> @PR34620(<16 x i8> %a0, <16 x i8> %a1) {
; CHECK-LABEL: PR34620:
; CHECK: # %bb.0:
; CHECK-NEXT: psrlw $1, %xmm0
; CHECK-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: paddb %xmm1, %xmm0
; CHECK-NEXT: retq
%1 = lshr <16 x i8> %a0, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%2 = and <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%3 = add <16 x i8> %2, %a1
ret <16 x i8> %3
}