llvm-project/llvm/test/CodeGen/AArch64/neon-dotpattern.ll
Harvin Iriawan db158c7c83 [AArch64] Update generic sched model to A510
Refresh of the generic scheduling model to use A510 instead of A55.
  Main benefits are to the little core, and introducing SVE scheduling information.
  Changes tested on various OoO cores, no performance degradation is seen.

  Differential Revision: https://reviews.llvm.org/D156799
2023-08-21 12:25:15 +01:00

95 lines
3.1 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple aarch64-none-linux-gnu -mattr=+dotprod < %s | FileCheck %s
define fastcc void @test_sdot_v4i8(ptr noalias nocapture %0, ptr noalias nocapture readonly %1, ptr noalias nocapture readonly %2) {
; CHECK-LABEL: test_sdot_v4i8:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ldr w8, [x2]
; CHECK-NEXT: ldr w9, [x1]
; CHECK-NEXT: dup v0.2s, wzr
; CHECK-NEXT: fmov s1, w8
; CHECK-NEXT: fmov s2, w9
; CHECK-NEXT: sdot v0.2s, v1.8b, v2.8b
; CHECK-NEXT: fmov x8, d0
; CHECK-NEXT: str w8, [x0]
; CHECK-NEXT: ret
entry:
%3 = load i8, ptr %1, align 1
%4 = sext i8 %3 to i32
%5 = load i8, ptr %2, align 1
%6 = sext i8 %5 to i32
%7 = mul nsw i32 %6, %4
%8 = getelementptr inbounds i8, ptr %1, i64 1
%9 = load i8, ptr %8, align 1
%10 = sext i8 %9 to i32
%11 = getelementptr inbounds i8, ptr %2, i64 1
%12 = load i8, ptr %11, align 1
%13 = sext i8 %12 to i32
%14 = mul nsw i32 %13, %10
%15 = add nsw i32 %14, %7
%16 = getelementptr inbounds i8, ptr %1, i64 2
%17 = load i8, ptr %16, align 1
%18 = sext i8 %17 to i32
%19 = getelementptr inbounds i8, ptr %2, i64 2
%20 = load i8, ptr %19, align 1
%21 = sext i8 %20 to i32
%22 = mul nsw i32 %21, %18
%23 = add nsw i32 %22, %15
%24 = getelementptr inbounds i8, ptr %1, i64 3
%25 = load i8, ptr %24, align 1
%26 = sext i8 %25 to i32
%27 = getelementptr inbounds i8, ptr %2, i64 3
%28 = load i8, ptr %27, align 1
%29 = sext i8 %28 to i32
%30 = mul nsw i32 %29, %26
%31 = add nsw i32 %30, %23
store i32 %31, ptr %0, align 64
ret void
}
define fastcc void @test_udot_v4i8(ptr noalias nocapture %0, ptr noalias nocapture readonly %1, ptr noalias nocapture readonly %2) {
; CHECK-LABEL: test_udot_v4i8:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ldr w8, [x2]
; CHECK-NEXT: ldr w9, [x1]
; CHECK-NEXT: dup v0.2s, wzr
; CHECK-NEXT: fmov s1, w8
; CHECK-NEXT: fmov s2, w9
; CHECK-NEXT: udot v0.2s, v1.8b, v2.8b
; CHECK-NEXT: fmov x8, d0
; CHECK-NEXT: str w8, [x0]
; CHECK-NEXT: ret
entry:
%3 = load i8, ptr %1, align 1
%4 = zext i8 %3 to i32
%5 = load i8, ptr %2, align 1
%6 = zext i8 %5 to i32
%7 = mul nsw i32 %6, %4
%8 = getelementptr inbounds i8, ptr %1, i64 1
%9 = load i8, ptr %8, align 1
%10 = zext i8 %9 to i32
%11 = getelementptr inbounds i8, ptr %2, i64 1
%12 = load i8, ptr %11, align 1
%13 = zext i8 %12 to i32
%14 = mul nsw i32 %13, %10
%15 = add nsw i32 %14, %7
%16 = getelementptr inbounds i8, ptr %1, i64 2
%17 = load i8, ptr %16, align 1
%18 = zext i8 %17 to i32
%19 = getelementptr inbounds i8, ptr %2, i64 2
%20 = load i8, ptr %19, align 1
%21 = zext i8 %20 to i32
%22 = mul nsw i32 %21, %18
%23 = add nsw i32 %22, %15
%24 = getelementptr inbounds i8, ptr %1, i64 3
%25 = load i8, ptr %24, align 1
%26 = zext i8 %25 to i32
%27 = getelementptr inbounds i8, ptr %2, i64 3
%28 = load i8, ptr %27, align 1
%29 = zext i8 %28 to i32
%30 = mul nsw i32 %29, %26
%31 = add nsw i32 %30, %23
store i32 %31, ptr %0, align 64
ret void
}