When unable to widen a vector load/store we can replace the operation with a masked variant. Support for extending loads largely came for free hence its inclusion, but truncating stores require more work. Fixes https://github.com/llvm/llvm-project/issues/159995
50 lines
1.7 KiB
LLVM
50 lines
1.7 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=ve -mattr=+vpu | FileCheck %s
|
|
|
|
; This would assert because VE specified that all setcc
|
|
; nodes (even with vector operands) return a scalar value.
|
|
|
|
define <4 x i8> @udiv_by_minus_one(<4 x i8> %x) {
|
|
; CHECK-LABEL: udiv_by_minus_one:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: and %s4, %s0, (56)0
|
|
; CHECK-NEXT: and %s1, %s1, (56)0
|
|
; CHECK-NEXT: and %s2, %s2, (56)0
|
|
; CHECK-NEXT: and %s3, %s3, (56)0
|
|
; CHECK-NEXT: or %s0, 0, (0)1
|
|
; CHECK-NEXT: cmpu.w %s5, %s3, (56)0
|
|
; CHECK-NEXT: or %s3, 0, (0)1
|
|
; CHECK-NEXT: cmov.w.eq %s3, (63)0, %s5
|
|
; CHECK-NEXT: cmpu.w %s5, %s2, (56)0
|
|
; CHECK-NEXT: or %s2, 0, (0)1
|
|
; CHECK-NEXT: cmov.w.eq %s2, (63)0, %s5
|
|
; CHECK-NEXT: cmpu.w %s5, %s1, (56)0
|
|
; CHECK-NEXT: or %s1, 0, (0)1
|
|
; CHECK-NEXT: cmov.w.eq %s1, (63)0, %s5
|
|
; CHECK-NEXT: cmpu.w %s4, %s4, (56)0
|
|
; CHECK-NEXT: cmov.w.eq %s0, (63)0, %s4
|
|
; CHECK-NEXT: b.l.t (, %s10)
|
|
%r = udiv <4 x i8> %x, <i8 255, i8 255, i8 255, i8 255>
|
|
ret <4 x i8> %r
|
|
}
|
|
|
|
define <4 x i8> @urem_by_minus_one(<4 x i8> %x) {
|
|
; CHECK-LABEL: urem_by_minus_one:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: and %s4, %s0, (56)0
|
|
; CHECK-NEXT: and %s5, %s1, (56)0
|
|
; CHECK-NEXT: and %s6, %s2, (56)0
|
|
; CHECK-NEXT: and %s7, %s3, (56)0
|
|
; CHECK-NEXT: cmpu.w %s7, %s7, (56)0
|
|
; CHECK-NEXT: cmov.w.eq %s3, (0)1, %s7
|
|
; CHECK-NEXT: cmpu.w %s6, %s6, (56)0
|
|
; CHECK-NEXT: cmov.w.eq %s2, (0)1, %s6
|
|
; CHECK-NEXT: cmpu.w %s5, %s5, (56)0
|
|
; CHECK-NEXT: cmov.w.eq %s1, (0)1, %s5
|
|
; CHECK-NEXT: cmpu.w %s4, %s4, (56)0
|
|
; CHECK-NEXT: cmov.w.eq %s0, (0)1, %s4
|
|
; CHECK-NEXT: b.l.t (, %s10)
|
|
%r = urem <4 x i8> %x, <i8 255, i8 255, i8 255, i8 255>
|
|
ret <4 x i8> %r
|
|
}
|