
The main change in this patch is we go from emitting the expression: @ cfa - NumBytes - NumScalableBytes * VG To: @ cfa - VG * NumScalableBytes - NumBytes That is, VG is the first expression. This is for a future patch that adds an alternative way to resolve VG (which uses the CFA, so it is convenient for the CFA to be at the top of the stack). Since doing this is fairly churn-heavy, I took the opportunity to also save up to 4-bytes per SVE CFI expression. This is done by folding LEB128 constants to literals when in the range 0 to 31, and using the offset in `DW_OP_breg*` expressions.
619 lines
21 KiB
LLVM
619 lines
21 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+bf16 < %s | FileCheck %s
|
|
|
|
define <vscale x 16 x i8> @test_lane0_16xi8(<vscale x 16 x i8> %a) {
|
|
; CHECK-LABEL: test_lane0_16xi8:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.b, vl1
|
|
; CHECK-NEXT: mov w8, #30 // =0x1e
|
|
; CHECK-NEXT: mov z0.b, p0/m, w8
|
|
; CHECK-NEXT: ret
|
|
%b = insertelement <vscale x 16 x i8> %a, i8 30, i32 0
|
|
ret <vscale x 16 x i8> %b
|
|
}
|
|
|
|
define <vscale x 8 x i16> @test_lane0_8xi16(<vscale x 8 x i16> %a) {
|
|
; CHECK-LABEL: test_lane0_8xi16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h, vl1
|
|
; CHECK-NEXT: mov w8, #30 // =0x1e
|
|
; CHECK-NEXT: mov z0.h, p0/m, w8
|
|
; CHECK-NEXT: ret
|
|
%b = insertelement <vscale x 8 x i16> %a, i16 30, i32 0
|
|
ret <vscale x 8 x i16> %b
|
|
}
|
|
|
|
define <vscale x 4 x i32> @test_lane0_4xi32(<vscale x 4 x i32> %a) {
|
|
; CHECK-LABEL: test_lane0_4xi32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl1
|
|
; CHECK-NEXT: mov w8, #30 // =0x1e
|
|
; CHECK-NEXT: mov z0.s, p0/m, w8
|
|
; CHECK-NEXT: ret
|
|
%b = insertelement <vscale x 4 x i32> %a, i32 30, i32 0
|
|
ret <vscale x 4 x i32> %b
|
|
}
|
|
|
|
define <vscale x 2 x i64> @test_lane0_2xi64(<vscale x 2 x i64> %a) {
|
|
; CHECK-LABEL: test_lane0_2xi64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl1
|
|
; CHECK-NEXT: mov w8, #30 // =0x1e
|
|
; CHECK-NEXT: mov z0.d, p0/m, x8
|
|
; CHECK-NEXT: ret
|
|
%b = insertelement <vscale x 2 x i64> %a, i64 30, i32 0
|
|
ret <vscale x 2 x i64> %b
|
|
}
|
|
|
|
define <vscale x 2 x double> @test_lane0_2xf64(<vscale x 2 x double> %a) {
|
|
; CHECK-LABEL: test_lane0_2xf64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fmov d1, #1.00000000
|
|
; CHECK-NEXT: ptrue p0.d, vl1
|
|
; CHECK-NEXT: mov z0.d, p0/m, z1.d
|
|
; CHECK-NEXT: ret
|
|
%b = insertelement <vscale x 2 x double> %a, double 1.0, i32 0
|
|
ret <vscale x 2 x double> %b
|
|
}
|
|
|
|
define <vscale x 4 x float> @test_lane0_4xf32(<vscale x 4 x float> %a) {
|
|
; CHECK-LABEL: test_lane0_4xf32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fmov s1, #1.00000000
|
|
; CHECK-NEXT: ptrue p0.s, vl1
|
|
; CHECK-NEXT: mov z0.s, p0/m, z1.s
|
|
; CHECK-NEXT: ret
|
|
%b = insertelement <vscale x 4 x float> %a, float 1.0, i32 0
|
|
ret <vscale x 4 x float> %b
|
|
}
|
|
|
|
define <vscale x 8 x half> @test_lane0_8xf16(<vscale x 8 x half> %a) {
|
|
; CHECK-LABEL: test_lane0_8xf16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fmov h1, #1.00000000
|
|
; CHECK-NEXT: ptrue p0.h, vl1
|
|
; CHECK-NEXT: mov z0.h, p0/m, z1.h
|
|
; CHECK-NEXT: ret
|
|
%b = insertelement <vscale x 8 x half> %a, half 1.0, i32 0
|
|
ret <vscale x 8 x half> %b
|
|
}
|
|
|
|
define <vscale x 8 x bfloat> @test_lane0_8xbf16(<vscale x 8 x bfloat> %a, bfloat %x) {
|
|
; CHECK-LABEL: test_lane0_8xbf16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h, vl1
|
|
; CHECK-NEXT: // kill: def $h1 killed $h1 def $z1
|
|
; CHECK-NEXT: mov z0.h, p0/m, z1.h
|
|
; CHECK-NEXT: ret
|
|
%b = insertelement <vscale x 8 x bfloat> %a, bfloat %x, i32 0
|
|
ret <vscale x 8 x bfloat> %b
|
|
}
|
|
|
|
; Undefined lane insert
|
|
define <vscale x 2 x i64> @test_lane4_2xi64(<vscale x 2 x i64> %a) {
|
|
; CHECK-LABEL: test_lane4_2xi64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov w8, #4 // =0x4
|
|
; CHECK-NEXT: index z1.d, #0, #1
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: mov z2.d, x8
|
|
; CHECK-NEXT: mov w8, #30 // =0x1e
|
|
; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z2.d
|
|
; CHECK-NEXT: mov z0.d, p0/m, x8
|
|
; CHECK-NEXT: ret
|
|
%b = insertelement <vscale x 2 x i64> %a, i64 30, i32 4
|
|
ret <vscale x 2 x i64> %b
|
|
}
|
|
|
|
; Undefined lane insert
|
|
define <vscale x 8 x half> @test_lane9_8xf16(<vscale x 8 x half> %a) {
|
|
; CHECK-LABEL: test_lane9_8xf16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov w8, #9 // =0x9
|
|
; CHECK-NEXT: index z1.h, #0, #1
|
|
; CHECK-NEXT: ptrue p0.h
|
|
; CHECK-NEXT: mov z2.h, w8
|
|
; CHECK-NEXT: cmpeq p0.h, p0/z, z1.h, z2.h
|
|
; CHECK-NEXT: fmov h1, #1.00000000
|
|
; CHECK-NEXT: mov z0.h, p0/m, h1
|
|
; CHECK-NEXT: ret
|
|
%b = insertelement <vscale x 8 x half> %a, half 1.0, i32 9
|
|
ret <vscale x 8 x half> %b
|
|
}
|
|
|
|
define <vscale x 8 x bfloat> @test_lane9_8xbf16(<vscale x 8 x bfloat> %a, bfloat %x) {
|
|
; CHECK-LABEL: test_lane9_8xbf16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov w8, #9 // =0x9
|
|
; CHECK-NEXT: index z2.h, #0, #1
|
|
; CHECK-NEXT: ptrue p0.h
|
|
; CHECK-NEXT: mov z3.h, w8
|
|
; CHECK-NEXT: cmpeq p0.h, p0/z, z2.h, z3.h
|
|
; CHECK-NEXT: mov z0.h, p0/m, h1
|
|
; CHECK-NEXT: ret
|
|
%b = insertelement <vscale x 8 x bfloat> %a, bfloat %x, i32 9
|
|
ret <vscale x 8 x bfloat> %b
|
|
}
|
|
|
|
define <vscale x 16 x i8> @test_lane1_16xi8(<vscale x 16 x i8> %a) {
|
|
; CHECK-LABEL: test_lane1_16xi8:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov w8, #1 // =0x1
|
|
; CHECK-NEXT: index z1.b, #0, #1
|
|
; CHECK-NEXT: ptrue p0.b
|
|
; CHECK-NEXT: mov z2.b, w8
|
|
; CHECK-NEXT: mov w8, #30 // =0x1e
|
|
; CHECK-NEXT: cmpeq p0.b, p0/z, z1.b, z2.b
|
|
; CHECK-NEXT: mov z0.b, p0/m, w8
|
|
; CHECK-NEXT: ret
|
|
%b = insertelement <vscale x 16 x i8> %a, i8 30, i32 1
|
|
ret <vscale x 16 x i8> %b
|
|
}
|
|
|
|
define <vscale x 16 x i8> @test_lanex_16xi8(<vscale x 16 x i8> %a, i32 %x) {
|
|
; CHECK-LABEL: test_lanex_16xi8:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: index z1.b, #0, #1
|
|
; CHECK-NEXT: mov w8, w0
|
|
; CHECK-NEXT: ptrue p0.b
|
|
; CHECK-NEXT: mov z2.b, w8
|
|
; CHECK-NEXT: mov w8, #30 // =0x1e
|
|
; CHECK-NEXT: cmpeq p0.b, p0/z, z1.b, z2.b
|
|
; CHECK-NEXT: mov z0.b, p0/m, w8
|
|
; CHECK-NEXT: ret
|
|
%b = insertelement <vscale x 16 x i8> %a, i8 30, i32 %x
|
|
ret <vscale x 16 x i8> %b
|
|
}
|
|
|
|
|
|
; Redundant lane insert
|
|
define <vscale x 4 x i32> @extract_insert_4xi32(<vscale x 4 x i32> %a) {
|
|
; CHECK-LABEL: extract_insert_4xi32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ret
|
|
%b = extractelement <vscale x 4 x i32> %a, i32 2
|
|
%c = insertelement <vscale x 4 x i32> %a, i32 %b, i32 2
|
|
ret <vscale x 4 x i32> %c
|
|
}
|
|
|
|
define <vscale x 8 x i16> @test_lane6_undef_8xi16(i16 %a) {
|
|
; CHECK-LABEL: test_lane6_undef_8xi16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov w8, #6 // =0x6
|
|
; CHECK-NEXT: index z0.h, #0, #1
|
|
; CHECK-NEXT: ptrue p0.h
|
|
; CHECK-NEXT: mov z1.h, w8
|
|
; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, z1.h
|
|
; CHECK-NEXT: mov z0.h, p0/m, w0
|
|
; CHECK-NEXT: ret
|
|
%b = insertelement <vscale x 8 x i16> poison, i16 %a, i32 6
|
|
ret <vscale x 8 x i16> %b
|
|
}
|
|
|
|
define <vscale x 16 x i8> @test_lane0_undef_16xi8(i8 %a) {
|
|
; CHECK-LABEL: test_lane0_undef_16xi8:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fmov s0, w0
|
|
; CHECK-NEXT: ret
|
|
%b = insertelement <vscale x 16 x i8> poison, i8 %a, i32 0
|
|
ret <vscale x 16 x i8> %b
|
|
}
|
|
|
|
define <vscale x 16 x i8> @test_insert0_of_extract0_16xi8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
|
|
; CHECK-LABEL: test_insert0_of_extract0_16xi8:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fmov w8, s1
|
|
; CHECK-NEXT: ptrue p0.b, vl1
|
|
; CHECK-NEXT: mov z0.b, p0/m, w8
|
|
; CHECK-NEXT: ret
|
|
%c = extractelement <vscale x 16 x i8> %b, i32 0
|
|
%d = insertelement <vscale x 16 x i8> %a, i8 %c, i32 0
|
|
ret <vscale x 16 x i8> %d
|
|
}
|
|
|
|
define <vscale x 16 x i8> @test_insert64_of_extract64_16xi8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
|
|
; CHECK-LABEL: test_insert64_of_extract64_16xi8:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov w8, #64 // =0x40
|
|
; CHECK-NEXT: whilels p0.b, xzr, x8
|
|
; CHECK-NEXT: mov z2.b, w8
|
|
; CHECK-NEXT: lastb w9, p0, z1.b
|
|
; CHECK-NEXT: index z1.b, #0, #1
|
|
; CHECK-NEXT: ptrue p0.b
|
|
; CHECK-NEXT: cmpeq p0.b, p0/z, z1.b, z2.b
|
|
; CHECK-NEXT: mov z0.b, p0/m, w9
|
|
; CHECK-NEXT: ret
|
|
%c = extractelement <vscale x 16 x i8> %b, i32 64
|
|
%d = insertelement <vscale x 16 x i8> %a, i8 %c, i32 64
|
|
ret <vscale x 16 x i8> %d
|
|
}
|
|
|
|
define <vscale x 16 x i8> @test_insert3_of_extract1_16xi8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
|
|
; CHECK-LABEL: test_insert3_of_extract1_16xi8:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov w8, #3 // =0x3
|
|
; CHECK-NEXT: index z2.b, #0, #1
|
|
; CHECK-NEXT: ptrue p0.b
|
|
; CHECK-NEXT: mov z3.b, w8
|
|
; CHECK-NEXT: umov w8, v1.b[1]
|
|
; CHECK-NEXT: cmpeq p0.b, p0/z, z2.b, z3.b
|
|
; CHECK-NEXT: mov z0.b, p0/m, w8
|
|
; CHECK-NEXT: ret
|
|
%c = extractelement <vscale x 16 x i8> %b, i32 1
|
|
%d = insertelement <vscale x 16 x i8> %a, i8 %c, i32 3
|
|
ret <vscale x 16 x i8> %d
|
|
}
|
|
|
|
define <vscale x 8 x half> @test_insert_into_undef_nxv8f16(half %a) {
|
|
; CHECK-LABEL: test_insert_into_undef_nxv8f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
|
|
; CHECK-NEXT: ret
|
|
%b = insertelement <vscale x 8 x half> poison, half %a, i32 0
|
|
ret <vscale x 8 x half> %b
|
|
}
|
|
|
|
define <vscale x 4 x half> @test_insert_into_undef_nxv4f16(half %a) {
|
|
; CHECK-LABEL: test_insert_into_undef_nxv4f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
|
|
; CHECK-NEXT: ret
|
|
%b = insertelement <vscale x 4 x half> poison, half %a, i32 0
|
|
ret <vscale x 4 x half> %b
|
|
}
|
|
|
|
define <vscale x 2 x half> @test_insert_into_undef_nxv2f16(half %a) {
|
|
; CHECK-LABEL: test_insert_into_undef_nxv2f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
|
|
; CHECK-NEXT: ret
|
|
%b = insertelement <vscale x 2 x half> poison, half %a, i32 0
|
|
ret <vscale x 2 x half> %b
|
|
}
|
|
|
|
define <vscale x 8 x bfloat> @test_insert_into_undef_nxv8bf16(bfloat %a) {
|
|
; CHECK-LABEL: test_insert_into_undef_nxv8bf16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
|
|
; CHECK-NEXT: ret
|
|
%b = insertelement <vscale x 8 x bfloat> poison, bfloat %a, i32 0
|
|
ret <vscale x 8 x bfloat> %b
|
|
}
|
|
|
|
define <vscale x 4 x bfloat> @test_insert_into_undef_nxv4bf16(bfloat %a) {
|
|
; CHECK-LABEL: test_insert_into_undef_nxv4bf16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
|
|
; CHECK-NEXT: ret
|
|
%b = insertelement <vscale x 4 x bfloat> poison, bfloat %a, i32 0
|
|
ret <vscale x 4 x bfloat> %b
|
|
}
|
|
|
|
define <vscale x 2 x bfloat> @test_insert_into_undef_nxv2bf16(bfloat %a) {
|
|
; CHECK-LABEL: test_insert_into_undef_nxv2bf16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
|
|
; CHECK-NEXT: ret
|
|
%b = insertelement <vscale x 2 x bfloat> poison, bfloat %a, i32 0
|
|
ret <vscale x 2 x bfloat> %b
|
|
}
|
|
|
|
define <vscale x 4 x float> @test_insert_into_undef_nxv4f32(float %a) {
|
|
; CHECK-LABEL: test_insert_into_undef_nxv4f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
|
|
; CHECK-NEXT: ret
|
|
%b = insertelement <vscale x 4 x float> poison, float %a, i32 0
|
|
ret <vscale x 4 x float> %b
|
|
}
|
|
|
|
define <vscale x 2 x float> @test_insert_into_undef_nxv2f32(float %a) {
|
|
; CHECK-LABEL: test_insert_into_undef_nxv2f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
|
|
; CHECK-NEXT: ret
|
|
%b = insertelement <vscale x 2 x float> poison, float %a, i32 0
|
|
ret <vscale x 2 x float> %b
|
|
}
|
|
|
|
define <vscale x 2 x double> @test_insert_into_undef_nxv2f64(double %a) {
|
|
; CHECK-LABEL: test_insert_into_undef_nxv2f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
|
; CHECK-NEXT: ret
|
|
%b = insertelement <vscale x 2 x double> poison, double %a, i32 0
|
|
ret <vscale x 2 x double> %b
|
|
}
|
|
|
|
; Insert scalar at index
|
|
define <vscale x 2 x half> @test_insert_with_index_nxv2f16(half %h, i64 %idx) {
|
|
; CHECK-LABEL: test_insert_with_index_nxv2f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: index z1.d, #0, #1
|
|
; CHECK-NEXT: mov z2.d, x0
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z2.d
|
|
; CHECK-NEXT: mov z0.h, p0/m, h0
|
|
; CHECK-NEXT: ret
|
|
%res = insertelement <vscale x 2 x half> poison, half %h, i64 %idx
|
|
ret <vscale x 2 x half> %res
|
|
}
|
|
|
|
define <vscale x 4 x half> @test_insert_with_index_nxv4f16(half %h, i64 %idx) {
|
|
; CHECK-LABEL: test_insert_with_index_nxv4f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: index z1.s, #0, #1
|
|
; CHECK-NEXT: mov z2.s, w0
|
|
; CHECK-NEXT: ptrue p0.s
|
|
; CHECK-NEXT: cmpeq p0.s, p0/z, z1.s, z2.s
|
|
; CHECK-NEXT: mov z0.h, p0/m, h0
|
|
; CHECK-NEXT: ret
|
|
%res = insertelement <vscale x 4 x half> poison, half %h, i64 %idx
|
|
ret <vscale x 4 x half> %res
|
|
}
|
|
|
|
define <vscale x 8 x half> @test_insert_with_index_nxv8f16(half %h, i64 %idx) {
|
|
; CHECK-LABEL: test_insert_with_index_nxv8f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: index z1.h, #0, #1
|
|
; CHECK-NEXT: mov z2.h, w0
|
|
; CHECK-NEXT: ptrue p0.h
|
|
; CHECK-NEXT: cmpeq p0.h, p0/z, z1.h, z2.h
|
|
; CHECK-NEXT: mov z0.h, p0/m, h0
|
|
; CHECK-NEXT: ret
|
|
%res = insertelement <vscale x 8 x half> poison, half %h, i64 %idx
|
|
ret <vscale x 8 x half> %res
|
|
}
|
|
|
|
define <vscale x 2 x bfloat> @test_insert_with_index_nxv2bf16(bfloat %h, i64 %idx) {
|
|
; CHECK-LABEL: test_insert_with_index_nxv2bf16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: index z1.d, #0, #1
|
|
; CHECK-NEXT: mov z2.d, x0
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z2.d
|
|
; CHECK-NEXT: mov z0.h, p0/m, h0
|
|
; CHECK-NEXT: ret
|
|
%res = insertelement <vscale x 2 x bfloat> poison, bfloat %h, i64 %idx
|
|
ret <vscale x 2 x bfloat> %res
|
|
}
|
|
|
|
define <vscale x 4 x bfloat> @test_insert_with_index_nxv4bf16(bfloat %h, i64 %idx) {
|
|
; CHECK-LABEL: test_insert_with_index_nxv4bf16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: index z1.s, #0, #1
|
|
; CHECK-NEXT: mov z2.s, w0
|
|
; CHECK-NEXT: ptrue p0.s
|
|
; CHECK-NEXT: cmpeq p0.s, p0/z, z1.s, z2.s
|
|
; CHECK-NEXT: mov z0.h, p0/m, h0
|
|
; CHECK-NEXT: ret
|
|
%res = insertelement <vscale x 4 x bfloat> poison, bfloat %h, i64 %idx
|
|
ret <vscale x 4 x bfloat> %res
|
|
}
|
|
|
|
define <vscale x 8 x bfloat> @test_insert_with_index_nxv8bf16(bfloat %h, i64 %idx) {
|
|
; CHECK-LABEL: test_insert_with_index_nxv8bf16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: index z1.h, #0, #1
|
|
; CHECK-NEXT: mov z2.h, w0
|
|
; CHECK-NEXT: ptrue p0.h
|
|
; CHECK-NEXT: cmpeq p0.h, p0/z, z1.h, z2.h
|
|
; CHECK-NEXT: mov z0.h, p0/m, h0
|
|
; CHECK-NEXT: ret
|
|
%res = insertelement <vscale x 8 x bfloat> poison, bfloat %h, i64 %idx
|
|
ret <vscale x 8 x bfloat> %res
|
|
}
|
|
|
|
define <vscale x 2 x float> @test_insert_with_index_nxv2f32(float %f, i64 %idx) {
|
|
; CHECK-LABEL: test_insert_with_index_nxv2f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: index z1.d, #0, #1
|
|
; CHECK-NEXT: mov z2.d, x0
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z2.d
|
|
; CHECK-NEXT: mov z0.s, p0/m, s0
|
|
; CHECK-NEXT: ret
|
|
%res = insertelement <vscale x 2 x float> poison, float %f, i64 %idx
|
|
ret <vscale x 2 x float> %res
|
|
}
|
|
|
|
define <vscale x 4 x float> @test_insert_with_index_nxv4f32(float %f, i64 %idx) {
|
|
; CHECK-LABEL: test_insert_with_index_nxv4f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: index z1.s, #0, #1
|
|
; CHECK-NEXT: mov z2.s, w0
|
|
; CHECK-NEXT: ptrue p0.s
|
|
; CHECK-NEXT: cmpeq p0.s, p0/z, z1.s, z2.s
|
|
; CHECK-NEXT: mov z0.s, p0/m, s0
|
|
; CHECK-NEXT: ret
|
|
%res = insertelement <vscale x 4 x float> poison, float %f, i64 %idx
|
|
ret <vscale x 4 x float> %res
|
|
}
|
|
|
|
define <vscale x 2 x double> @test_insert_with_index_nxv2f64(double %d, i64 %idx) {
|
|
; CHECK-LABEL: test_insert_with_index_nxv2f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: index z1.d, #0, #1
|
|
; CHECK-NEXT: mov z2.d, x0
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z2.d
|
|
; CHECK-NEXT: mov z0.d, p0/m, d0
|
|
; CHECK-NEXT: ret
|
|
%res = insertelement <vscale x 2 x double> poison, double %d, i64 %idx
|
|
ret <vscale x 2 x double> %res
|
|
}
|
|
|
|
;Predicate insert
|
|
define <vscale x 2 x i1> @test_predicate_insert_2xi1_immediate (<vscale x 2 x i1> %val, i1 %elt) {
|
|
; CHECK-LABEL: test_predicate_insert_2xi1_immediate:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov z0.d, p0/z, #1 // =0x1
|
|
; CHECK-NEXT: ptrue p0.d, vl1
|
|
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
|
|
; CHECK-NEXT: mov z0.d, p0/m, x0
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: and z0.d, z0.d, #0x1
|
|
; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
|
|
; CHECK-NEXT: ret
|
|
%res = insertelement <vscale x 2 x i1> %val, i1 %elt, i32 0
|
|
ret <vscale x 2 x i1> %res
|
|
}
|
|
|
|
define <vscale x 4 x i1> @test_predicate_insert_4xi1_immediate (<vscale x 4 x i1> %val, i1 %elt) {
|
|
; CHECK-LABEL: test_predicate_insert_4xi1_immediate:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov w8, #2 // =0x2
|
|
; CHECK-NEXT: index z0.s, #0, #1
|
|
; CHECK-NEXT: ptrue p1.s
|
|
; CHECK-NEXT: mov z1.s, w8
|
|
; CHECK-NEXT: cmpeq p2.s, p1/z, z0.s, z1.s
|
|
; CHECK-NEXT: mov z0.s, p0/z, #1 // =0x1
|
|
; CHECK-NEXT: mov z0.s, p2/m, w0
|
|
; CHECK-NEXT: and z0.s, z0.s, #0x1
|
|
; CHECK-NEXT: cmpne p0.s, p1/z, z0.s, #0
|
|
; CHECK-NEXT: ret
|
|
%res = insertelement <vscale x 4 x i1> %val, i1 %elt, i32 2
|
|
ret <vscale x 4 x i1> %res
|
|
}
|
|
|
|
define <vscale x 8 x i1> @test_predicate_insert_8xi1_immediate (<vscale x 8 x i1> %val, i32 %idx) {
|
|
; CHECK-LABEL: test_predicate_insert_8xi1_immediate:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: index z0.h, #0, #1
|
|
; CHECK-NEXT: mov w8, w0
|
|
; CHECK-NEXT: ptrue p1.h
|
|
; CHECK-NEXT: mov z1.h, w8
|
|
; CHECK-NEXT: mov w8, #1 // =0x1
|
|
; CHECK-NEXT: cmpeq p2.h, p1/z, z0.h, z1.h
|
|
; CHECK-NEXT: mov z0.h, p0/z, #1 // =0x1
|
|
; CHECK-NEXT: mov z0.h, p2/m, w8
|
|
; CHECK-NEXT: and z0.h, z0.h, #0x1
|
|
; CHECK-NEXT: cmpne p0.h, p1/z, z0.h, #0
|
|
; CHECK-NEXT: ret
|
|
%res = insertelement <vscale x 8 x i1> %val, i1 1, i32 %idx
|
|
ret <vscale x 8 x i1> %res
|
|
}
|
|
|
|
define <vscale x 16 x i1> @test_predicate_insert_16xi1_immediate (<vscale x 16 x i1> %val) {
|
|
; CHECK-LABEL: test_predicate_insert_16xi1_immediate:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov w8, #4 // =0x4
|
|
; CHECK-NEXT: index z0.b, #0, #1
|
|
; CHECK-NEXT: ptrue p1.b
|
|
; CHECK-NEXT: mov z1.b, w8
|
|
; CHECK-NEXT: mov w8, wzr
|
|
; CHECK-NEXT: cmpeq p2.b, p1/z, z0.b, z1.b
|
|
; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1
|
|
; CHECK-NEXT: mov z0.b, p2/m, w8
|
|
; CHECK-NEXT: and z0.b, z0.b, #0x1
|
|
; CHECK-NEXT: cmpne p0.b, p1/z, z0.b, #0
|
|
; CHECK-NEXT: ret
|
|
%res = insertelement <vscale x 16 x i1> %val, i1 0, i32 4
|
|
ret <vscale x 16 x i1> %res
|
|
}
|
|
|
|
|
|
define <vscale x 2 x i1> @test_predicate_insert_2xi1(<vscale x 2 x i1> %val, i1 %elt, i32 %idx) {
|
|
; CHECK-LABEL: test_predicate_insert_2xi1:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: index z0.d, #0, #1
|
|
; CHECK-NEXT: mov w8, w1
|
|
; CHECK-NEXT: ptrue p1.d
|
|
; CHECK-NEXT: mov z1.d, x8
|
|
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
|
|
; CHECK-NEXT: cmpeq p2.d, p1/z, z0.d, z1.d
|
|
; CHECK-NEXT: mov z0.d, p0/z, #1 // =0x1
|
|
; CHECK-NEXT: mov z0.d, p2/m, x0
|
|
; CHECK-NEXT: and z0.d, z0.d, #0x1
|
|
; CHECK-NEXT: cmpne p0.d, p1/z, z0.d, #0
|
|
; CHECK-NEXT: ret
|
|
%res = insertelement <vscale x 2 x i1> %val, i1 %elt, i32 %idx
|
|
ret <vscale x 2 x i1> %res
|
|
}
|
|
|
|
define <vscale x 4 x i1> @test_predicate_insert_4xi1(<vscale x 4 x i1> %val, i1 %elt, i32 %idx) {
|
|
; CHECK-LABEL: test_predicate_insert_4xi1:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: index z0.s, #0, #1
|
|
; CHECK-NEXT: mov w8, w1
|
|
; CHECK-NEXT: ptrue p1.s
|
|
; CHECK-NEXT: mov z1.s, w8
|
|
; CHECK-NEXT: cmpeq p2.s, p1/z, z0.s, z1.s
|
|
; CHECK-NEXT: mov z0.s, p0/z, #1 // =0x1
|
|
; CHECK-NEXT: mov z0.s, p2/m, w0
|
|
; CHECK-NEXT: and z0.s, z0.s, #0x1
|
|
; CHECK-NEXT: cmpne p0.s, p1/z, z0.s, #0
|
|
; CHECK-NEXT: ret
|
|
%res = insertelement <vscale x 4 x i1> %val, i1 %elt, i32 %idx
|
|
ret <vscale x 4 x i1> %res
|
|
}
|
|
define <vscale x 8 x i1> @test_predicate_insert_8xi1(<vscale x 8 x i1> %val, i1 %elt, i32 %idx) {
|
|
; CHECK-LABEL: test_predicate_insert_8xi1:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: index z0.h, #0, #1
|
|
; CHECK-NEXT: mov w8, w1
|
|
; CHECK-NEXT: ptrue p1.h
|
|
; CHECK-NEXT: mov z1.h, w8
|
|
; CHECK-NEXT: cmpeq p2.h, p1/z, z0.h, z1.h
|
|
; CHECK-NEXT: mov z0.h, p0/z, #1 // =0x1
|
|
; CHECK-NEXT: mov z0.h, p2/m, w0
|
|
; CHECK-NEXT: and z0.h, z0.h, #0x1
|
|
; CHECK-NEXT: cmpne p0.h, p1/z, z0.h, #0
|
|
; CHECK-NEXT: ret
|
|
%res = insertelement <vscale x 8 x i1> %val, i1 %elt, i32 %idx
|
|
ret <vscale x 8 x i1> %res
|
|
}
|
|
|
|
define <vscale x 16 x i1> @test_predicate_insert_16xi1(<vscale x 16 x i1> %val, i1 %elt, i32 %idx) {
|
|
; CHECK-LABEL: test_predicate_insert_16xi1:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: index z0.b, #0, #1
|
|
; CHECK-NEXT: mov w8, w1
|
|
; CHECK-NEXT: ptrue p1.b
|
|
; CHECK-NEXT: mov z1.b, w8
|
|
; CHECK-NEXT: cmpeq p2.b, p1/z, z0.b, z1.b
|
|
; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1
|
|
; CHECK-NEXT: mov z0.b, p2/m, w0
|
|
; CHECK-NEXT: and z0.b, z0.b, #0x1
|
|
; CHECK-NEXT: cmpne p0.b, p1/z, z0.b, #0
|
|
; CHECK-NEXT: ret
|
|
%res = insertelement <vscale x 16 x i1> %val, i1 %elt, i32 %idx
|
|
ret <vscale x 16 x i1> %res
|
|
}
|
|
|
|
define <vscale x 32 x i1> @test_predicate_insert_32xi1(<vscale x 32 x i1> %val, i1 %elt, i32 %idx) uwtable {
|
|
; CHECK-LABEL: test_predicate_insert_32xi1:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-NEXT: .cfi_offset w29, -16
|
|
; CHECK-NEXT: addvl sp, sp, #-2
|
|
; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 16 + 16 * VG
|
|
; CHECK-NEXT: rdvl x8, #2
|
|
; CHECK-NEXT: mov z0.b, p1/z, #1 // =0x1
|
|
; CHECK-NEXT: mov z1.b, p0/z, #1 // =0x1
|
|
; CHECK-NEXT: sub x8, x8, #1
|
|
; CHECK-NEXT: mov w9, w1
|
|
; CHECK-NEXT: ptrue p1.b
|
|
; CHECK-NEXT: cmp x9, x8
|
|
; CHECK-NEXT: csel x8, x9, x8, lo
|
|
; CHECK-NEXT: mov x9, sp
|
|
; CHECK-NEXT: str z0, [sp, #1, mul vl]
|
|
; CHECK-NEXT: str z1, [sp]
|
|
; CHECK-NEXT: strb w0, [x9, x8]
|
|
; CHECK-NEXT: ldr z0, [sp]
|
|
; CHECK-NEXT: ldr z1, [sp, #1, mul vl]
|
|
; CHECK-NEXT: and z0.b, z0.b, #0x1
|
|
; CHECK-NEXT: and z1.b, z1.b, #0x1
|
|
; CHECK-NEXT: cmpne p0.b, p1/z, z0.b, #0
|
|
; CHECK-NEXT: cmpne p1.b, p1/z, z1.b, #0
|
|
; CHECK-NEXT: addvl sp, sp, #2
|
|
; CHECK-NEXT: .cfi_def_cfa wsp, 16
|
|
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 0
|
|
; CHECK-NEXT: .cfi_restore w29
|
|
; CHECK-NEXT: ret
|
|
%res = insertelement <vscale x 32 x i1> %val, i1 %elt, i32 %idx
|
|
ret <vscale x 32 x i1> %res
|
|
}
|