dnsampaio 28d0718033
[DAGCombiner] Add combine avg from shifts (#113909)
This teaches dagcombiner to fold:
`(asr (add nsw x, y), 1) -> (avgfloors x, y)`
`(lsr (add nuw x, y), 1) -> (avgflooru x, y)`

as well the combine them to a ceil variant:
`(avgfloors (add nsw x, y), 1) -> (avgceils x, y)` 
`(avgflooru (add nuw x, y), 1) -> (avgceilu x, y)`

iff valid for the target.

Removes some of the ARM MVE patterns that are now dead code.
It adds the avg opcodes to `IsQRMVEInstruction` as to preserve the
immediate splatting as before.
2024-10-31 10:57:27 +01:00

351 lines
12 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc -mtriple=aarch64 < %s | FileCheck %s
define <16 x i16> @zext_avgflooru(<16 x i8> %a0, <16 x i8> %a1) {
; CHECK-LABEL: zext_avgflooru:
; CHECK: // %bb.0:
; CHECK-NEXT: uhadd v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ushll2 v1.8h, v0.16b, #0
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-NEXT: ret
%x0 = zext <16 x i8> %a0 to <16 x i16>
%x1 = zext <16 x i8> %a1 to <16 x i16>
%and = and <16 x i16> %x0, %x1
%xor = xor <16 x i16> %x0, %x1
%shift = lshr <16 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%avg = add <16 x i16> %and, %shift
ret <16 x i16> %avg
}
define <16 x i16> @zext_avgflooru_mismatch(<16 x i8> %a0, <16 x i4> %a1) {
; CHECK-LABEL: zext_avgflooru_mismatch:
; CHECK: // %bb.0:
; CHECK-NEXT: movi v2.16b, #15
; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
; CHECK-NEXT: uhadd v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ushll2 v1.8h, v0.16b, #0
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-NEXT: ret
%x0 = zext <16 x i8> %a0 to <16 x i16>
%x1 = zext <16 x i4> %a1 to <16 x i16>
%and = and <16 x i16> %x0, %x1
%xor = xor <16 x i16> %x0, %x1
%shift = lshr <16 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%avg = add <16 x i16> %and, %shift
ret <16 x i16> %avg
}
define <16 x i16> @zext_avgceilu(<16 x i8> %a0, <16 x i8> %a1) {
; CHECK-LABEL: zext_avgceilu:
; CHECK: // %bb.0:
; CHECK-NEXT: urhadd v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ushll2 v1.8h, v0.16b, #0
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-NEXT: ret
%x0 = zext <16 x i8> %a0 to <16 x i16>
%x1 = zext <16 x i8> %a1 to <16 x i16>
%or = or <16 x i16> %x0, %x1
%xor = xor <16 x i16> %x0, %x1
%shift = lshr <16 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%avg = sub <16 x i16> %or, %shift
ret <16 x i16> %avg
}
define <16 x i16> @zext_avgceilu_mismatch(<16 x i4> %a0, <16 x i8> %a1) {
; CHECK-LABEL: zext_avgceilu_mismatch:
; CHECK: // %bb.0:
; CHECK-NEXT: movi v2.16b, #15
; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
; CHECK-NEXT: urhadd v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ushll2 v1.8h, v0.16b, #0
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-NEXT: ret
%x0 = zext <16 x i4> %a0 to <16 x i16>
%x1 = zext <16 x i8> %a1 to <16 x i16>
%or = or <16 x i16> %x0, %x1
%xor = xor <16 x i16> %x0, %x1
%shift = lshr <16 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%avg = sub <16 x i16> %or, %shift
ret <16 x i16> %avg
}
define <16 x i16> @sext_avgfloors(<16 x i8> %a0, <16 x i8> %a1) {
; CHECK-LABEL: sext_avgfloors:
; CHECK: // %bb.0:
; CHECK-NEXT: shadd v0.16b, v0.16b, v1.16b
; CHECK-NEXT: sshll2 v1.8h, v0.16b, #0
; CHECK-NEXT: sshll v0.8h, v0.8b, #0
; CHECK-NEXT: ret
%x0 = sext <16 x i8> %a0 to <16 x i16>
%x1 = sext <16 x i8> %a1 to <16 x i16>
%and = and <16 x i16> %x0, %x1
%xor = xor <16 x i16> %x0, %x1
%shift = ashr <16 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%avg = add <16 x i16> %and, %shift
ret <16 x i16> %avg
}
define <16 x i16> @sext_avgfloors_mismatch(<16 x i8> %a0, <16 x i4> %a1) {
; CHECK-LABEL: sext_avgfloors_mismatch:
; CHECK: // %bb.0:
; CHECK-NEXT: ushll2 v2.8h, v1.16b, #0
; CHECK-NEXT: ushll v1.8h, v1.8b, #0
; CHECK-NEXT: sshll v3.8h, v0.8b, #0
; CHECK-NEXT: sshll2 v0.8h, v0.16b, #0
; CHECK-NEXT: shl v1.8h, v1.8h, #12
; CHECK-NEXT: shl v2.8h, v2.8h, #12
; CHECK-NEXT: sshr v4.8h, v1.8h, #12
; CHECK-NEXT: sshr v1.8h, v2.8h, #12
; CHECK-NEXT: shadd v1.8h, v0.8h, v1.8h
; CHECK-NEXT: shadd v0.8h, v3.8h, v4.8h
; CHECK-NEXT: ret
%x0 = sext <16 x i8> %a0 to <16 x i16>
%x1 = sext <16 x i4> %a1 to <16 x i16>
%and = and <16 x i16> %x0, %x1
%xor = xor <16 x i16> %x0, %x1
%shift = ashr <16 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%avg = add <16 x i16> %and, %shift
ret <16 x i16> %avg
}
define <16 x i16> @sext_avgceils(<16 x i8> %a0, <16 x i8> %a1) {
; CHECK-LABEL: sext_avgceils:
; CHECK: // %bb.0:
; CHECK-NEXT: srhadd v0.16b, v0.16b, v1.16b
; CHECK-NEXT: sshll2 v1.8h, v0.16b, #0
; CHECK-NEXT: sshll v0.8h, v0.8b, #0
; CHECK-NEXT: ret
%x0 = sext <16 x i8> %a0 to <16 x i16>
%x1 = sext <16 x i8> %a1 to <16 x i16>
%or = or <16 x i16> %x0, %x1
%xor = xor <16 x i16> %x0, %x1
%shift = ashr <16 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%avg = sub <16 x i16> %or, %shift
ret <16 x i16> %avg
}
define <16 x i16> @sext_avgceils_mismatch(<16 x i4> %a0, <16 x i8> %a1) {
; CHECK-LABEL: sext_avgceils_mismatch:
; CHECK: // %bb.0:
; CHECK-NEXT: ushll v2.8h, v0.8b, #0
; CHECK-NEXT: ushll2 v0.8h, v0.16b, #0
; CHECK-NEXT: sshll v3.8h, v1.8b, #0
; CHECK-NEXT: sshll2 v1.8h, v1.16b, #0
; CHECK-NEXT: shl v2.8h, v2.8h, #12
; CHECK-NEXT: shl v0.8h, v0.8h, #12
; CHECK-NEXT: sshr v2.8h, v2.8h, #12
; CHECK-NEXT: sshr v0.8h, v0.8h, #12
; CHECK-NEXT: srhadd v1.8h, v0.8h, v1.8h
; CHECK-NEXT: srhadd v0.8h, v2.8h, v3.8h
; CHECK-NEXT: ret
%x0 = sext <16 x i4> %a0 to <16 x i16>
%x1 = sext <16 x i8> %a1 to <16 x i16>
%or = or <16 x i16> %x0, %x1
%xor = xor <16 x i16> %x0, %x1
%shift = ashr <16 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%avg = sub <16 x i16> %or, %shift
ret <16 x i16> %avg
}
define <8 x i16> @add_avgflooru(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: add_avgflooru:
; CHECK: // %bb.0:
; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
; CHECK-NEXT: ret
%add = add nuw <8 x i16> %a0, %a1
%avg = lshr <8 x i16> %add, splat(i16 1)
ret <8 x i16> %avg
}
define <8 x i16> @add_avgflooru_mismatch(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: add_avgflooru_mismatch:
; CHECK: // %bb.0:
; CHECK-NEXT: add v0.8h, v0.8h, v1.8h
; CHECK-NEXT: ushr v0.8h, v0.8h, #1
; CHECK-NEXT: ret
%add = add <8 x i16> %a0, %a1
%avg = lshr <8 x i16> %add, splat(i16 1)
ret <8 x i16> %avg
}
define <8 x i16> @add_avgceilu(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: add_avgceilu:
; CHECK: // %bb.0:
; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h
; CHECK-NEXT: ret
%add0 = add nuw <8 x i16> %a0, splat(i16 1)
%add = add nuw <8 x i16> %a1, %add0
%avg = lshr <8 x i16> %add, splat(i16 1)
ret <8 x i16> %avg
}
define <8 x i16> @add_avgceilu2(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: add_avgceilu2:
; CHECK: // %bb.0:
; CHECK-NEXT: urhadd v0.8h, v1.8h, v0.8h
; CHECK-NEXT: ret
%add0 = add nuw <8 x i16> %a1, %a0
%add = add nuw <8 x i16> %add0, splat(i16 1)
%avg = lshr <8 x i16> %add, splat(i16 1)
ret <8 x i16> %avg
}
define <8 x i16> @add_avgceilu_mismatch1(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: add_avgceilu_mismatch1:
; CHECK: // %bb.0:
; CHECK-NEXT: movi v2.8h, #1
; CHECK-NEXT: add v0.8h, v1.8h, v0.8h
; CHECK-NEXT: uhadd v0.8h, v0.8h, v2.8h
; CHECK-NEXT: ret
%add0 = add <8 x i16> %a1, %a0
%add = add nuw <8 x i16> %add0, splat(i16 1)
%avg = lshr <8 x i16> %add, splat(i16 1)
ret <8 x i16> %avg
}
define <8 x i16> @add_avgceilu_mismatch2(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: add_avgceilu_mismatch2:
; CHECK: // %bb.0:
; CHECK-NEXT: mvn v1.16b, v1.16b
; CHECK-NEXT: sub v0.8h, v0.8h, v1.8h
; CHECK-NEXT: ushr v0.8h, v0.8h, #1
; CHECK-NEXT: ret
%add0 = add nuw <8 x i16> %a1, %a0
%add = add <8 x i16> %add0, splat(i16 1)
%avg = lshr <8 x i16> %add, splat(i16 1)
ret <8 x i16> %avg
}
define <8 x i16> @add_avgceilu_mismatch3(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: add_avgceilu_mismatch3:
; CHECK: // %bb.0:
; CHECK-NEXT: mvn v1.16b, v1.16b
; CHECK-NEXT: sub v0.8h, v0.8h, v1.8h
; CHECK-NEXT: ushr v0.8h, v0.8h, #1
; CHECK-NEXT: ret
%add0 = add nuw <8 x i16> %a1, %a0
%add = add <8 x i16> %add0, splat(i16 1)
%avg = lshr <8 x i16> %add, splat(i16 1)
ret <8 x i16> %avg
}
define <8 x i16> @add_avgfloors(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: add_avgfloors:
; CHECK: // %bb.0:
; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h
; CHECK-NEXT: ret
%add = add nsw <8 x i16> %a0, %a1
%avg = ashr <8 x i16> %add, splat(i16 1)
ret <8 x i16> %avg
}
define <8 x i16> @add_avgfloors_mismatch(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: add_avgfloors_mismatch:
; CHECK: // %bb.0:
; CHECK-NEXT: add v0.8h, v0.8h, v1.8h
; CHECK-NEXT: sshr v0.8h, v0.8h, #1
; CHECK-NEXT: ret
%add = add <8 x i16> %a0, %a1
%avg = ashr <8 x i16> %add, splat(i16 1)
ret <8 x i16> %avg
}
define <8 x i16> @add_avgfoor_mismatch2(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: add_avgfoor_mismatch2:
; CHECK: // %bb.0:
; CHECK-NEXT: add v0.8h, v0.8h, v1.8h
; CHECK-NEXT: sshr v0.8h, v0.8h, #2
; CHECK-NEXT: ret
%add = add nsw <8 x i16> %a0, %a1
%avg = ashr <8 x i16> %add, splat(i16 2)
ret <8 x i16> %avg
}
define <8 x i16> @add_avgceils(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: add_avgceils:
; CHECK: // %bb.0:
; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h
; CHECK-NEXT: ret
%add0 = add nsw <8 x i16> %a0, splat(i16 1)
%add = add nsw <8 x i16> %a1, %add0
%avg = ashr <8 x i16> %add, splat(i16 1)
ret <8 x i16> %avg
}
define <8 x i16> @add_avgceils2(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: add_avgceils2:
; CHECK: // %bb.0:
; CHECK-NEXT: srhadd v0.8h, v1.8h, v0.8h
; CHECK-NEXT: ret
%add0 = add nsw <8 x i16> %a1, %a0
%add = add nsw <8 x i16> %add0, splat(i16 1)
%avg = ashr <8 x i16> %add, splat(i16 1)
ret <8 x i16> %avg
}
define <8 x i16> @add_avgceils_mismatch1(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: add_avgceils_mismatch1:
; CHECK: // %bb.0:
; CHECK-NEXT: movi v2.8h, #1
; CHECK-NEXT: add v0.8h, v1.8h, v0.8h
; CHECK-NEXT: shadd v0.8h, v0.8h, v2.8h
; CHECK-NEXT: ret
%add0 = add <8 x i16> %a1, %a0
%add = add nsw <8 x i16> %add0, splat(i16 1)
%avg = ashr <8 x i16> %add, splat(i16 1)
ret <8 x i16> %avg
}
define <8 x i16> @add_avgceils_mismatch2(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: add_avgceils_mismatch2:
; CHECK: // %bb.0:
; CHECK-NEXT: mvn v1.16b, v1.16b
; CHECK-NEXT: sub v0.8h, v0.8h, v1.8h
; CHECK-NEXT: sshr v0.8h, v0.8h, #1
; CHECK-NEXT: ret
%add0 = add nsw <8 x i16> %a1, %a0
%add = add <8 x i16> %add0, splat(i16 1)
%avg = ashr <8 x i16> %add, splat(i16 1)
ret <8 x i16> %avg
}
define <8 x i16> @add_avgceils_mismatch3(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: add_avgceils_mismatch3:
; CHECK: // %bb.0:
; CHECK-NEXT: mvn v1.16b, v1.16b
; CHECK-NEXT: sub v0.8h, v0.8h, v1.8h
; CHECK-NEXT: sshr v0.8h, v0.8h, #1
; CHECK-NEXT: ret
%add0 = add nsw <8 x i16> %a1, %a0
%add = add <8 x i16> %add0, splat(i16 1)
%avg = ashr <8 x i16> %add, splat(i16 1)
ret <8 x i16> %avg
}
define <8 x i16> @add_avgceils_mismatch4(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: add_avgceils_mismatch4:
; CHECK: // %bb.0:
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: sub v0.8h, v1.8h, v0.8h
; CHECK-NEXT: sshr v0.8h, v0.8h, #2
; CHECK-NEXT: ret
%add0 = add nsw <8 x i16> %a0, splat(i16 1)
%add = add nsw <8 x i16> %a1, %add0
%avg = ashr <8 x i16> %add, splat(i16 2)
ret <8 x i16> %avg
}
define <8 x i16> @add_avgceilu_mismatch(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: add_avgceilu_mismatch:
; CHECK: // %bb.0:
; CHECK-NEXT: movi v2.8h, #1
; CHECK-NEXT: add v0.8h, v1.8h, v0.8h
; CHECK-NEXT: add v0.8h, v0.8h, v2.8h
; CHECK-NEXT: ushr v0.8h, v0.8h, #2
; CHECK-NEXT: ret
%add0 = add nuw <8 x i16> %a1, %a0
%add = add nuw <8 x i16> %add0, splat(i16 1)
%avg = lshr <8 x i16> %add, splat(i16 2)
ret <8 x i16> %avg
}