
- Sink splat operands to mul instructions for types where we can use the lane-indexed variants. - When sinking operands for [su]mull, also sink the ext instruction.
233 lines
7.2 KiB
LLVM
233 lines
7.2 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s
|
|
|
|
define <8 x i16> @mul_splat_sext_v8i16(ptr %x, ptr %y) {
|
|
; CHECK-LABEL: mul_splat_sext_v8i16:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: ldr d1, [x0]
|
|
; CHECK-NEXT: movi v0.2d, #0000000000000000
|
|
; CHECK-NEXT: mov x8, xzr
|
|
; CHECK-NEXT: dup v1.8b, v1.b[3]
|
|
; CHECK-NEXT: .LBB0_1: // %l1
|
|
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: ldr d2, [x1, x8]
|
|
; CHECK-NEXT: add x8, x8, #4
|
|
; CHECK-NEXT: cmp w8, #4
|
|
; CHECK-NEXT: smlal v0.8h, v2.8b, v1.8b
|
|
; CHECK-NEXT: b.eq .LBB0_1
|
|
; CHECK-NEXT: // %bb.2: // %l2
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%x.val = load <8 x i8>, ptr %x
|
|
%x.ext = sext <8 x i8> %x.val to <8 x i16>
|
|
%a = shufflevector <8 x i16> %x.ext, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
|
|
br label %l1
|
|
|
|
l1:
|
|
%p = phi i32 [ 0, %entry ], [ %pa, %l1 ]
|
|
%q = phi <8 x i16> [ zeroinitializer, %entry ], [ %c, %l1 ]
|
|
%y.idx = mul nuw nsw i32 %p, 4
|
|
%y.ptr = getelementptr i8, ptr %y, i32 %y.idx
|
|
%y.val = load <8 x i8>, ptr %y.ptr
|
|
%y.ext = sext <8 x i8> %y.val to <8 x i16>
|
|
%b = mul <8 x i16> %y.ext, %a
|
|
%c = add <8 x i16> %q, %b
|
|
%pa = add i32 %p, 1
|
|
%c1 = icmp eq i32 %p, 0
|
|
br i1 %c1, label %l1, label %l2
|
|
|
|
l2:
|
|
ret <8 x i16> %c
|
|
}
|
|
|
|
define <4 x i32> @mul_splat_sext_v4i32(ptr %x, ptr %y) {
|
|
; CHECK-LABEL: mul_splat_sext_v4i32:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: movi v0.2d, #0000000000000000
|
|
; CHECK-NEXT: ldr d1, [x0]
|
|
; CHECK-NEXT: mov x8, xzr
|
|
; CHECK-NEXT: .LBB1_1: // %l1
|
|
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: ldr d2, [x1, x8]
|
|
; CHECK-NEXT: add x8, x8, #8
|
|
; CHECK-NEXT: cmp w8, #8
|
|
; CHECK-NEXT: smlal v0.4s, v2.4h, v1.h[3]
|
|
; CHECK-NEXT: b.eq .LBB1_1
|
|
; CHECK-NEXT: // %bb.2: // %l2
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%x.val = load <4 x i16>, ptr %x
|
|
%x.ext = sext <4 x i16> %x.val to <4 x i32>
|
|
%a = shufflevector <4 x i32> %x.ext, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
|
|
br label %l1
|
|
|
|
l1:
|
|
%p = phi i32 [ 0, %entry ], [ %pa, %l1 ]
|
|
%q = phi <4 x i32> [ zeroinitializer, %entry ], [ %c, %l1 ]
|
|
%y.idx = mul nuw nsw i32 %p, 4
|
|
%y.ptr = getelementptr i16, ptr %y, i32 %y.idx
|
|
%y.val = load <4 x i16>, ptr %y.ptr
|
|
%y.ext = sext <4 x i16> %y.val to <4 x i32>
|
|
%b = mul <4 x i32> %y.ext, %a
|
|
%c = add <4 x i32> %q, %b
|
|
%pa = add i32 %p, 1
|
|
%c1 = icmp eq i32 %p, 0
|
|
br i1 %c1, label %l1, label %l2
|
|
|
|
l2:
|
|
ret <4 x i32> %c
|
|
}
|
|
|
|
define <2 x i64> @mul_splat_sext_v2i64(ptr %x, ptr %y) {
|
|
; CHECK-LABEL: mul_splat_sext_v2i64:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: movi v0.2d, #0000000000000000
|
|
; CHECK-NEXT: ldr d1, [x0]
|
|
; CHECK-NEXT: mov x8, xzr
|
|
; CHECK-NEXT: .LBB2_1: // %l1
|
|
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: ldr d2, [x1, x8]
|
|
; CHECK-NEXT: add x8, x8, #16
|
|
; CHECK-NEXT: cmp w8, #16
|
|
; CHECK-NEXT: smlal v0.2d, v2.2s, v1.s[1]
|
|
; CHECK-NEXT: b.eq .LBB2_1
|
|
; CHECK-NEXT: // %bb.2: // %l2
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%x.val = load <2 x i32>, ptr %x
|
|
%x.ext = sext <2 x i32> %x.val to <2 x i64>
|
|
%a = shufflevector <2 x i64> %x.ext, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
|
|
br label %l1
|
|
|
|
l1:
|
|
%p = phi i32 [ 0, %entry ], [ %pa, %l1 ]
|
|
%q = phi <2 x i64> [ zeroinitializer, %entry ], [ %c, %l1 ]
|
|
%y.idx = mul nuw nsw i32 %p, 4
|
|
%y.ptr = getelementptr i32, ptr %y, i32 %y.idx
|
|
%y.val = load <2 x i32>, ptr %y.ptr
|
|
%y.ext = sext <2 x i32> %y.val to <2 x i64>
|
|
%b = mul <2 x i64> %y.ext, %a
|
|
%c = add <2 x i64> %q, %b
|
|
%pa = add i32 %p, 1
|
|
%c1 = icmp eq i32 %p, 0
|
|
br i1 %c1, label %l1, label %l2
|
|
|
|
l2:
|
|
ret <2 x i64> %c
|
|
}
|
|
|
|
define <8 x i16> @mul_sext_splat_v8i16(ptr %x, ptr %y) {
|
|
; CHECK-LABEL: mul_sext_splat_v8i16:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: ldr d1, [x0]
|
|
; CHECK-NEXT: movi v0.2d, #0000000000000000
|
|
; CHECK-NEXT: mov x8, xzr
|
|
; CHECK-NEXT: dup v1.8b, v1.b[3]
|
|
; CHECK-NEXT: .LBB3_1: // %l1
|
|
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: ldr d2, [x1, x8]
|
|
; CHECK-NEXT: add x8, x8, #4
|
|
; CHECK-NEXT: cmp w8, #4
|
|
; CHECK-NEXT: smlal v0.8h, v2.8b, v1.8b
|
|
; CHECK-NEXT: b.eq .LBB3_1
|
|
; CHECK-NEXT: // %bb.2: // %l2
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%x.val = load <8 x i8>, ptr %x
|
|
%x.spt = shufflevector <8 x i8> %x.val, <8 x i8> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
|
|
%a = sext <8 x i8> %x.spt to <8 x i16>
|
|
br label %l1
|
|
|
|
l1:
|
|
%p = phi i32 [ 0, %entry ], [ %pa, %l1 ]
|
|
%q = phi <8 x i16> [ zeroinitializer, %entry ], [ %c, %l1 ]
|
|
%y.idx = mul nuw nsw i32 %p, 4
|
|
%y.ptr = getelementptr i8, ptr %y, i32 %y.idx
|
|
%y.val = load <8 x i8>, ptr %y.ptr
|
|
%y.ext = sext <8 x i8> %y.val to <8 x i16>
|
|
%b = mul <8 x i16> %y.ext, %a
|
|
%c = add <8 x i16> %q, %b
|
|
%pa = add i32 %p, 1
|
|
%c1 = icmp eq i32 %p, 0
|
|
br i1 %c1, label %l1, label %l2
|
|
|
|
l2:
|
|
ret <8 x i16> %c
|
|
}
|
|
|
|
define <4 x i32> @mul_sext_splat_v4i32(ptr %x, ptr %y) {
|
|
; CHECK-LABEL: mul_sext_splat_v4i32:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: movi v0.2d, #0000000000000000
|
|
; CHECK-NEXT: ldr d1, [x0]
|
|
; CHECK-NEXT: mov x8, xzr
|
|
; CHECK-NEXT: .LBB4_1: // %l1
|
|
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: ldr d2, [x1, x8]
|
|
; CHECK-NEXT: add x8, x8, #8
|
|
; CHECK-NEXT: cmp w8, #8
|
|
; CHECK-NEXT: smlal v0.4s, v2.4h, v1.h[3]
|
|
; CHECK-NEXT: b.eq .LBB4_1
|
|
; CHECK-NEXT: // %bb.2: // %l2
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%x.val = load <4 x i16>, ptr %x
|
|
%x.spt = shufflevector <4 x i16> %x.val, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
|
|
%a = sext <4 x i16> %x.spt to <4 x i32>
|
|
br label %l1
|
|
|
|
l1:
|
|
%p = phi i32 [ 0, %entry ], [ %pa, %l1 ]
|
|
%q = phi <4 x i32> [ zeroinitializer, %entry ], [ %c, %l1 ]
|
|
%y.idx = mul nuw nsw i32 %p, 4
|
|
%y.ptr = getelementptr i16, ptr %y, i32 %y.idx
|
|
%y.val = load <4 x i16>, ptr %y.ptr
|
|
%y.ext = sext <4 x i16> %y.val to <4 x i32>
|
|
%b = mul <4 x i32> %y.ext, %a
|
|
%c = add <4 x i32> %q, %b
|
|
%pa = add i32 %p, 1
|
|
%c1 = icmp eq i32 %p, 0
|
|
br i1 %c1, label %l1, label %l2
|
|
|
|
l2:
|
|
ret <4 x i32> %c
|
|
}
|
|
|
|
define <2 x i64> @mul_sext_splat_v2i64(ptr %x, ptr %y) {
|
|
; CHECK-LABEL: mul_sext_splat_v2i64:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: movi v0.2d, #0000000000000000
|
|
; CHECK-NEXT: ldr d1, [x0]
|
|
; CHECK-NEXT: mov x8, xzr
|
|
; CHECK-NEXT: .LBB5_1: // %l1
|
|
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: ldr d2, [x1, x8]
|
|
; CHECK-NEXT: add x8, x8, #16
|
|
; CHECK-NEXT: cmp w8, #16
|
|
; CHECK-NEXT: smlal v0.2d, v2.2s, v1.s[1]
|
|
; CHECK-NEXT: b.eq .LBB5_1
|
|
; CHECK-NEXT: // %bb.2: // %l2
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%x.val = load <2 x i32>, ptr %x
|
|
%x.spt = shufflevector <2 x i32> %x.val, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
|
|
%a = sext <2 x i32> %x.spt to <2 x i64>
|
|
br label %l1
|
|
|
|
l1:
|
|
%p = phi i32 [ 0, %entry ], [ %pa, %l1 ]
|
|
%q = phi <2 x i64> [ zeroinitializer, %entry ], [ %c, %l1 ]
|
|
%y.idx = mul nuw nsw i32 %p, 4
|
|
%y.ptr = getelementptr i32, ptr %y, i32 %y.idx
|
|
%y.val = load <2 x i32>, ptr %y.ptr
|
|
%y.ext = sext <2 x i32> %y.val to <2 x i64>
|
|
%b = mul <2 x i64> %y.ext, %a
|
|
%c = add <2 x i64> %q, %b
|
|
%pa = add i32 %p, 1
|
|
%c1 = icmp eq i32 %p, 0
|
|
br i1 %c1, label %l1, label %l2
|
|
|
|
l2:
|
|
ret <2 x i64> %c
|
|
}
|