
Update to use REG_SEQUENCE when possible. This patch only update td pattern to utilize REG_SEQUENCE for INSERT_SUBREG for cases where it does not produce a nesting of REG_SEQUENCE. This seem to show some improvement in code gen for `llvm/test/CodeGen/PowerPC/mmaplus-intrinsics.ll`. Fixes part of https://github.com/llvm/llvm-project/issues/125502
22 lines
1.0 KiB
LLVM
22 lines
1.0 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -verify-machineinstrs -mtriple powerpc64le-unknown-linux-gnu \
|
|
; RUN: -mcpu=pwr10 -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s \
|
|
; RUN: | FileCheck %s
|
|
|
|
define void @copy_novsrp() local_unnamed_addr {
|
|
; CHECK-LABEL: copy_novsrp:
|
|
; CHECK: # %bb.0: # %dmblvi_entry
|
|
; CHECK-NEXT: xxlxor vs0, vs0, vs0
|
|
; CHECK-NEXT: xxlxor vs3, vs3, vs3
|
|
; CHECK-NEXT: stxv vs0, 0(0)
|
|
dmblvi_entry:
|
|
%0 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> zeroinitializer, <16 x i8> undef, <16 x i8> undef, <16 x i8> zeroinitializer)
|
|
%1 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> %0)
|
|
%2 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 3
|
|
store <16 x i8> %2, ptr null, align 1
|
|
unreachable
|
|
}
|
|
|
|
declare <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>)
|
|
declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1>)
|