
The default legalization of unsupported vector types is to promote the integers in each lane, which leads to extra sign or zero extending and masking when moving data into and out of vectors. Switch our preferred type legalization from the default to vector widening, which keeps the data in the low lanes of the vector rather than in the low bits of each lane. The unused high lanes can be ignored. Half-wide vectors are now loaded from memory into the low 64 bits of the v128 rather than spread out among the lanes. As a result, v128.load64_splat is a much more common operation, so add new patterns to support it. Differential Revision: https://reviews.llvm.org/D107502
1287 lines
51 KiB
TableGen
1287 lines
51 KiB
TableGen
// WebAssemblyInstrSIMD.td - WebAssembly SIMD codegen support -*- tablegen -*-//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
///
|
|
/// \file
|
|
/// WebAssembly SIMD operand code-gen constructs.
|
|
///
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Instructions requiring HasSIMD128 and the simd128 prefix byte
|
|
multiclass SIMD_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s,
|
|
list<dag> pattern_r, string asmstr_r = "",
|
|
string asmstr_s = "", bits<32> simdop = -1> {
|
|
defm "" : I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r, asmstr_s,
|
|
!if(!ge(simdop, 0x100),
|
|
!or(0xfd0000, !and(0xffff, simdop)),
|
|
!or(0xfd00, !and(0xff, simdop)))>,
|
|
Requires<[HasSIMD128]>;
|
|
}
|
|
|
|
defm "" : ARGUMENT<V128, v16i8>;
|
|
defm "" : ARGUMENT<V128, v8i16>;
|
|
defm "" : ARGUMENT<V128, v4i32>;
|
|
defm "" : ARGUMENT<V128, v2i64>;
|
|
defm "" : ARGUMENT<V128, v4f32>;
|
|
defm "" : ARGUMENT<V128, v2f64>;
|
|
|
|
// Constrained immediate argument types
|
|
foreach SIZE = [8, 16] in
|
|
def ImmI#SIZE : ImmLeaf<i32,
|
|
"return -(1 << ("#SIZE#" - 1)) <= Imm && Imm < (1 << ("#SIZE#" - 1));"
|
|
>;
|
|
foreach SIZE = [2, 4, 8, 16, 32] in
|
|
def LaneIdx#SIZE : ImmLeaf<i32, "return 0 <= Imm && Imm < "#SIZE#";">;
|
|
|
|
// Create vector with identical lanes: splat
|
|
def splat2 : PatFrag<(ops node:$x), (build_vector $x, $x)>;
|
|
def splat4 : PatFrag<(ops node:$x), (build_vector $x, $x, $x, $x)>;
|
|
def splat8 : PatFrag<(ops node:$x), (build_vector $x, $x, $x, $x,
|
|
$x, $x, $x, $x)>;
|
|
def splat16 : PatFrag<(ops node:$x),
|
|
(build_vector $x, $x, $x, $x, $x, $x, $x, $x,
|
|
$x, $x, $x, $x, $x, $x, $x, $x)>;
|
|
|
|
class Vec {
|
|
ValueType vt;
|
|
ValueType int_vt;
|
|
ValueType lane_vt;
|
|
WebAssemblyRegClass lane_rc;
|
|
int lane_bits;
|
|
ImmLeaf lane_idx;
|
|
PatFrag splat;
|
|
string prefix;
|
|
Vec split;
|
|
}
|
|
|
|
def I8x16 : Vec {
|
|
let vt = v16i8;
|
|
let int_vt = vt;
|
|
let lane_vt = i32;
|
|
let lane_rc = I32;
|
|
let lane_bits = 8;
|
|
let lane_idx = LaneIdx16;
|
|
let splat = splat16;
|
|
let prefix = "i8x16";
|
|
}
|
|
|
|
def I16x8 : Vec {
|
|
let vt = v8i16;
|
|
let int_vt = vt;
|
|
let lane_vt = i32;
|
|
let lane_rc = I32;
|
|
let lane_bits = 16;
|
|
let lane_idx = LaneIdx8;
|
|
let splat = splat8;
|
|
let prefix = "i16x8";
|
|
let split = I8x16;
|
|
}
|
|
|
|
def I32x4 : Vec {
|
|
let vt = v4i32;
|
|
let int_vt = vt;
|
|
let lane_vt = i32;
|
|
let lane_rc = I32;
|
|
let lane_bits = 32;
|
|
let lane_idx = LaneIdx4;
|
|
let splat = splat4;
|
|
let prefix = "i32x4";
|
|
let split = I16x8;
|
|
}
|
|
|
|
def I64x2 : Vec {
|
|
let vt = v2i64;
|
|
let int_vt = vt;
|
|
let lane_vt = i64;
|
|
let lane_rc = I64;
|
|
let lane_bits = 64;
|
|
let lane_idx = LaneIdx2;
|
|
let splat = splat2;
|
|
let prefix = "i64x2";
|
|
let split = I32x4;
|
|
}
|
|
|
|
def F32x4 : Vec {
|
|
let vt = v4f32;
|
|
let int_vt = v4i32;
|
|
let lane_vt = f32;
|
|
let lane_rc = F32;
|
|
let lane_bits = 32;
|
|
let lane_idx = LaneIdx4;
|
|
let splat = splat4;
|
|
let prefix = "f32x4";
|
|
}
|
|
|
|
def F64x2 : Vec {
|
|
let vt = v2f64;
|
|
let int_vt = v2i64;
|
|
let lane_vt = f64;
|
|
let lane_rc = F64;
|
|
let lane_bits = 64;
|
|
let lane_idx = LaneIdx2;
|
|
let splat = splat2;
|
|
let prefix = "f64x2";
|
|
}
|
|
|
|
defvar AllVecs = [I8x16, I16x8, I32x4, I64x2, F32x4, F64x2];
|
|
defvar IntVecs = [I8x16, I16x8, I32x4, I64x2];
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Load and store
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Load: v128.load
|
|
let mayLoad = 1, UseNamedOperandTable = 1 in {
|
|
defm LOAD_V128_A32 :
|
|
SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
|
|
(outs), (ins P2Align:$p2align, offset32_op:$off), [],
|
|
"v128.load\t$dst, ${off}(${addr})$p2align",
|
|
"v128.load\t$off$p2align", 0>;
|
|
defm LOAD_V128_A64 :
|
|
SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset64_op:$off, I64:$addr),
|
|
(outs), (ins P2Align:$p2align, offset64_op:$off), [],
|
|
"v128.load\t$dst, ${off}(${addr})$p2align",
|
|
"v128.load\t$off$p2align", 0>;
|
|
}
|
|
|
|
// Def load patterns from WebAssemblyInstrMemory.td for vector types
|
|
foreach vec = AllVecs in {
|
|
defm : LoadPatNoOffset<vec.vt, load, "LOAD_V128">;
|
|
defm : LoadPatImmOff<vec.vt, load, regPlusImm, "LOAD_V128">;
|
|
defm : LoadPatImmOff<vec.vt, load, or_is_add, "LOAD_V128">;
|
|
defm : LoadPatOffsetOnly<vec.vt, load, "LOAD_V128">;
|
|
defm : LoadPatGlobalAddrOffOnly<vec.vt, load, "LOAD_V128">;
|
|
}
|
|
|
|
// v128.loadX_splat
|
|
multiclass SIMDLoadSplat<int size, bits<32> simdop> {
|
|
let mayLoad = 1, UseNamedOperandTable = 1 in {
|
|
defm LOAD#size#_SPLAT_A32 :
|
|
SIMD_I<(outs V128:$dst),
|
|
(ins P2Align:$p2align, offset32_op:$off, I32:$addr),
|
|
(outs),
|
|
(ins P2Align:$p2align, offset32_op:$off), [],
|
|
"v128.load"#size#"_splat\t$dst, ${off}(${addr})$p2align",
|
|
"v128.load"#size#"_splat\t$off$p2align", simdop>;
|
|
defm LOAD#size#_SPLAT_A64 :
|
|
SIMD_I<(outs V128:$dst),
|
|
(ins P2Align:$p2align, offset64_op:$off, I64:$addr),
|
|
(outs),
|
|
(ins P2Align:$p2align, offset64_op:$off), [],
|
|
"v128.load"#size#"_splat\t$dst, ${off}(${addr})$p2align",
|
|
"v128.load"#size#"_splat\t$off$p2align", simdop>;
|
|
}
|
|
}
|
|
|
|
defm "" : SIMDLoadSplat<8, 7>;
|
|
defm "" : SIMDLoadSplat<16, 8>;
|
|
defm "" : SIMDLoadSplat<32, 9>;
|
|
defm "" : SIMDLoadSplat<64, 10>;
|
|
|
|
def wasm_load_splat_t : SDTypeProfile<1, 1, [SDTCisPtrTy<1>]>;
|
|
def wasm_load_splat : SDNode<"WebAssemblyISD::LOAD_SPLAT", wasm_load_splat_t,
|
|
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
|
|
def load_splat : PatFrag<(ops node:$addr), (wasm_load_splat node:$addr)>;
|
|
|
|
foreach vec = AllVecs in {
|
|
defvar inst = "LOAD"#vec.lane_bits#"_SPLAT";
|
|
defm : LoadPatNoOffset<vec.vt, load_splat, inst>;
|
|
defm : LoadPatImmOff<vec.vt, load_splat, regPlusImm, inst>;
|
|
defm : LoadPatImmOff<vec.vt, load_splat, or_is_add, inst>;
|
|
defm : LoadPatOffsetOnly<vec.vt, load_splat, inst>;
|
|
defm : LoadPatGlobalAddrOffOnly<vec.vt, load_splat, inst>;
|
|
}
|
|
|
|
// Load and extend
|
|
multiclass SIMDLoadExtend<Vec vec, string loadPat, bits<32> simdop> {
|
|
defvar signed = vec.prefix#".load"#loadPat#"_s";
|
|
defvar unsigned = vec.prefix#".load"#loadPat#"_u";
|
|
let mayLoad = 1, UseNamedOperandTable = 1 in {
|
|
defm LOAD_EXTEND_S_#vec#_A32 :
|
|
SIMD_I<(outs V128:$dst),
|
|
(ins P2Align:$p2align, offset32_op:$off, I32:$addr),
|
|
(outs), (ins P2Align:$p2align, offset32_op:$off), [],
|
|
signed#"\t$dst, ${off}(${addr})$p2align",
|
|
signed#"\t$off$p2align", simdop>;
|
|
defm LOAD_EXTEND_U_#vec#_A32 :
|
|
SIMD_I<(outs V128:$dst),
|
|
(ins P2Align:$p2align, offset32_op:$off, I32:$addr),
|
|
(outs), (ins P2Align:$p2align, offset32_op:$off), [],
|
|
unsigned#"\t$dst, ${off}(${addr})$p2align",
|
|
unsigned#"\t$off$p2align", !add(simdop, 1)>;
|
|
defm LOAD_EXTEND_S_#vec#_A64 :
|
|
SIMD_I<(outs V128:$dst),
|
|
(ins P2Align:$p2align, offset64_op:$off, I64:$addr),
|
|
(outs), (ins P2Align:$p2align, offset64_op:$off), [],
|
|
signed#"\t$dst, ${off}(${addr})$p2align",
|
|
signed#"\t$off$p2align", simdop>;
|
|
defm LOAD_EXTEND_U_#vec#_A64 :
|
|
SIMD_I<(outs V128:$dst),
|
|
(ins P2Align:$p2align, offset64_op:$off, I64:$addr),
|
|
(outs), (ins P2Align:$p2align, offset64_op:$off), [],
|
|
unsigned#"\t$dst, ${off}(${addr})$p2align",
|
|
unsigned#"\t$off$p2align", !add(simdop, 1)>;
|
|
}
|
|
}
|
|
|
|
defm "" : SIMDLoadExtend<I16x8, "8x8", 1>;
|
|
defm "" : SIMDLoadExtend<I32x4, "16x4", 3>;
|
|
defm "" : SIMDLoadExtend<I64x2, "32x2", 5>;
|
|
|
|
foreach vec = [I16x8, I32x4, I64x2] in
|
|
foreach exts = [["sextloadvi", "_S"],
|
|
["zextloadvi", "_U"],
|
|
["extloadvi", "_U"]] in {
|
|
defvar loadpat = !cast<PatFrag>(exts[0]#vec.split.lane_bits);
|
|
defvar inst = "LOAD_EXTEND"#exts[1]#"_"#vec;
|
|
defm : LoadPatNoOffset<vec.vt, loadpat, inst>;
|
|
defm : LoadPatImmOff<vec.vt, loadpat, regPlusImm, inst>;
|
|
defm : LoadPatImmOff<vec.vt, loadpat, or_is_add, inst>;
|
|
defm : LoadPatOffsetOnly<vec.vt, loadpat, inst>;
|
|
defm : LoadPatGlobalAddrOffOnly<vec.vt, loadpat, inst>;
|
|
}
|
|
|
|
// Load lane into zero vector
|
|
multiclass SIMDLoadZero<Vec vec, bits<32> simdop> {
|
|
defvar name = "v128.load"#vec.lane_bits#"_zero";
|
|
let mayLoad = 1, UseNamedOperandTable = 1 in {
|
|
defm LOAD_ZERO_#vec#_A32 :
|
|
SIMD_I<(outs V128:$dst),
|
|
(ins P2Align:$p2align, offset32_op:$off, I32:$addr),
|
|
(outs), (ins P2Align:$p2align, offset32_op:$off), [],
|
|
name#"\t$dst, ${off}(${addr})$p2align",
|
|
name#"\t$off$p2align", simdop>;
|
|
defm LOAD_ZERO_#vec#_A64 :
|
|
SIMD_I<(outs V128:$dst),
|
|
(ins P2Align:$p2align, offset64_op:$off, I64:$addr),
|
|
(outs), (ins P2Align:$p2align, offset64_op:$off), [],
|
|
name#"\t$dst, ${off}(${addr})$p2align",
|
|
name#"\t$off$p2align", simdop>;
|
|
} // mayLoad = 1, UseNamedOperandTable = 1
|
|
}
|
|
|
|
defm "" : SIMDLoadZero<I32x4, 0x5c>;
|
|
defm "" : SIMDLoadZero<I64x2, 0x5d>;
|
|
|
|
// Use load_zero to load scalars into vectors as well where possible.
|
|
// TODO: i32, i16, and i8 scalars
|
|
def load_scalar :
|
|
PatFrag<(ops node:$addr), (scalar_to_vector (i64 (load $addr)))>;
|
|
defm : LoadPatNoOffset<v2i64, load_scalar, "LOAD_ZERO_I64x2">;
|
|
defm : LoadPatImmOff<v2i64, load_scalar, regPlusImm, "LOAD_ZERO_I64x2">;
|
|
defm : LoadPatImmOff<v2i64, load_scalar, or_is_add, "LOAD_ZERO_I64x2">;
|
|
defm : LoadPatOffsetOnly<v2i64, load_scalar, "LOAD_ZERO_I64x2">;
|
|
defm : LoadPatGlobalAddrOffOnly<v2i64, load_scalar, "LOAD_ZERO_I64x2">;
|
|
|
|
// TODO: f32x4 and f64x2 as well
|
|
foreach vec = [I32x4, I64x2] in {
|
|
defvar inst = "LOAD_ZERO_"#vec;
|
|
defvar pat = PatFrag<(ops node:$ptr),
|
|
(vector_insert (vec.splat (vec.lane_vt 0)), (vec.lane_vt (load $ptr)), 0)>;
|
|
defm : LoadPatNoOffset<vec.vt, pat, inst>;
|
|
defm : LoadPatImmOff<vec.vt, pat, regPlusImm, inst>;
|
|
defm : LoadPatImmOff<vec.vt, pat, or_is_add, inst>;
|
|
defm : LoadPatOffsetOnly<vec.vt, pat, inst>;
|
|
defm : LoadPatGlobalAddrOffOnly<vec.vt, pat, inst>;
|
|
}
|
|
|
|
// Load lane
|
|
multiclass SIMDLoadLane<Vec vec, bits<32> simdop> {
|
|
defvar name = "v128.load"#vec.lane_bits#"_lane";
|
|
let mayLoad = 1, UseNamedOperandTable = 1 in {
|
|
defm LOAD_LANE_#vec#_A32 :
|
|
SIMD_I<(outs V128:$dst),
|
|
(ins P2Align:$p2align, offset32_op:$off, vec_i8imm_op:$idx,
|
|
I32:$addr, V128:$vec),
|
|
(outs), (ins P2Align:$p2align, offset32_op:$off, vec_i8imm_op:$idx),
|
|
[], name#"\t$dst, ${off}(${addr})$p2align, $vec, $idx",
|
|
name#"\t$off$p2align, $idx", simdop>;
|
|
defm LOAD_LANE_#vec#_A64 :
|
|
SIMD_I<(outs V128:$dst),
|
|
(ins P2Align:$p2align, offset64_op:$off, vec_i8imm_op:$idx,
|
|
I64:$addr, V128:$vec),
|
|
(outs), (ins P2Align:$p2align, offset64_op:$off, vec_i8imm_op:$idx),
|
|
[], name#"\t$dst, ${off}(${addr})$p2align, $vec, $idx",
|
|
name#"\t$off$p2align, $idx", simdop>;
|
|
} // mayLoad = 1, UseNamedOperandTable = 1
|
|
}
|
|
|
|
defm "" : SIMDLoadLane<I8x16, 0x54>;
|
|
defm "" : SIMDLoadLane<I16x8, 0x55>;
|
|
defm "" : SIMDLoadLane<I32x4, 0x56>;
|
|
defm "" : SIMDLoadLane<I64x2, 0x57>;
|
|
|
|
// Select loads with no constant offset.
|
|
multiclass LoadLanePatNoOffset<Vec vec, SDPatternOperator kind> {
|
|
defvar load_lane_a32 = !cast<NI>("LOAD_LANE_"#vec#"_A32");
|
|
defvar load_lane_a64 = !cast<NI>("LOAD_LANE_"#vec#"_A64");
|
|
def : Pat<(vec.vt (kind (i32 I32:$addr),
|
|
(vec.vt V128:$vec), (i32 vec.lane_idx:$idx))),
|
|
(load_lane_a32 0, 0, imm:$idx, $addr, $vec)>,
|
|
Requires<[HasAddr32]>;
|
|
def : Pat<(vec.vt (kind (i64 I64:$addr),
|
|
(vec.vt V128:$vec), (i32 vec.lane_idx:$idx))),
|
|
(load_lane_a64 0, 0, imm:$idx, $addr, $vec)>,
|
|
Requires<[HasAddr64]>;
|
|
}
|
|
|
|
def load8_lane :
|
|
PatFrag<(ops node:$ptr, node:$vec, node:$idx),
|
|
(vector_insert $vec, (i32 (extloadi8 $ptr)), $idx)>;
|
|
def load16_lane :
|
|
PatFrag<(ops node:$ptr, node:$vec, node:$idx),
|
|
(vector_insert $vec, (i32 (extloadi16 $ptr)), $idx)>;
|
|
def load32_lane :
|
|
PatFrag<(ops node:$ptr, node:$vec, node:$idx),
|
|
(vector_insert $vec, (i32 (load $ptr)), $idx)>;
|
|
def load64_lane :
|
|
PatFrag<(ops node:$ptr, node:$vec, node:$idx),
|
|
(vector_insert $vec, (i64 (load $ptr)), $idx)>;
|
|
// TODO: floating point lanes as well
|
|
|
|
defm : LoadLanePatNoOffset<I8x16, load8_lane>;
|
|
defm : LoadLanePatNoOffset<I16x8, load16_lane>;
|
|
defm : LoadLanePatNoOffset<I32x4, load32_lane>;
|
|
defm : LoadLanePatNoOffset<I64x2, load64_lane>;
|
|
|
|
// TODO: Also support the other load patterns for load_lane once the instructions
|
|
// are merged to the proposal.
|
|
|
|
// Store: v128.store
|
|
let mayStore = 1, UseNamedOperandTable = 1 in {
|
|
defm STORE_V128_A32 :
|
|
SIMD_I<(outs), (ins P2Align:$p2align, offset32_op:$off, I32:$addr, V128:$vec),
|
|
(outs), (ins P2Align:$p2align, offset32_op:$off), [],
|
|
"v128.store\t${off}(${addr})$p2align, $vec",
|
|
"v128.store\t$off$p2align", 11>;
|
|
defm STORE_V128_A64 :
|
|
SIMD_I<(outs), (ins P2Align:$p2align, offset64_op:$off, I64:$addr, V128:$vec),
|
|
(outs), (ins P2Align:$p2align, offset64_op:$off), [],
|
|
"v128.store\t${off}(${addr})$p2align, $vec",
|
|
"v128.store\t$off$p2align", 11>;
|
|
}
|
|
|
|
// Def store patterns from WebAssemblyInstrMemory.td for vector types
|
|
foreach vec = AllVecs in {
|
|
defm : StorePatNoOffset<vec.vt, store, "STORE_V128">;
|
|
defm : StorePatImmOff<vec.vt, store, regPlusImm, "STORE_V128">;
|
|
defm : StorePatImmOff<vec.vt, store, or_is_add, "STORE_V128">;
|
|
defm : StorePatOffsetOnly<vec.vt, store, "STORE_V128">;
|
|
defm : StorePatGlobalAddrOffOnly<vec.vt, store, "STORE_V128">;
|
|
}
|
|
|
|
// Store lane
|
|
multiclass SIMDStoreLane<Vec vec, bits<32> simdop> {
|
|
defvar name = "v128.store"#vec.lane_bits#"_lane";
|
|
let mayStore = 1, UseNamedOperandTable = 1 in {
|
|
defm STORE_LANE_#vec#_A32 :
|
|
SIMD_I<(outs),
|
|
(ins P2Align:$p2align, offset32_op:$off, vec_i8imm_op:$idx,
|
|
I32:$addr, V128:$vec),
|
|
(outs), (ins P2Align:$p2align, offset32_op:$off, vec_i8imm_op:$idx),
|
|
[], name#"\t${off}(${addr})$p2align, $vec, $idx",
|
|
name#"\t$off$p2align, $idx", simdop>;
|
|
defm STORE_LANE_#vec#_A64 :
|
|
SIMD_I<(outs V128:$dst),
|
|
(ins P2Align:$p2align, offset64_op:$off, vec_i8imm_op:$idx,
|
|
I64:$addr, V128:$vec),
|
|
(outs), (ins P2Align:$p2align, offset64_op:$off, vec_i8imm_op:$idx),
|
|
[], name#"\t${off}(${addr})$p2align, $vec, $idx",
|
|
name#"\t$off$p2align, $idx", simdop>;
|
|
} // mayStore = 1, UseNamedOperandTable = 1
|
|
}
|
|
|
|
defm "" : SIMDStoreLane<I8x16, 0x58>;
|
|
defm "" : SIMDStoreLane<I16x8, 0x59>;
|
|
defm "" : SIMDStoreLane<I32x4, 0x5a>;
|
|
defm "" : SIMDStoreLane<I64x2, 0x5b>;
|
|
|
|
// Select stores with no constant offset.
|
|
multiclass StoreLanePatNoOffset<Vec vec, SDPatternOperator kind> {
|
|
def : Pat<(kind (i32 I32:$addr), (vec.vt V128:$vec), (i32 vec.lane_idx:$idx)),
|
|
(!cast<NI>("STORE_LANE_"#vec#"_A32") 0, 0, imm:$idx, $addr, $vec)>,
|
|
Requires<[HasAddr32]>;
|
|
def : Pat<(kind (i64 I64:$addr), (vec.vt V128:$vec), (i32 vec.lane_idx:$idx)),
|
|
(!cast<NI>("STORE_LANE_"#vec#"_A64") 0, 0, imm:$idx, $addr, $vec)>,
|
|
Requires<[HasAddr64]>;
|
|
}
|
|
|
|
def store8_lane :
|
|
PatFrag<(ops node:$ptr, node:$vec, node:$idx),
|
|
(truncstorei8 (i32 (vector_extract $vec, $idx)), $ptr)>;
|
|
def store16_lane :
|
|
PatFrag<(ops node:$ptr, node:$vec, node:$idx),
|
|
(truncstorei16 (i32 (vector_extract $vec, $idx)), $ptr)>;
|
|
def store32_lane :
|
|
PatFrag<(ops node:$ptr, node:$vec, node:$idx),
|
|
(store (i32 (vector_extract $vec, $idx)), $ptr)>;
|
|
def store64_lane :
|
|
PatFrag<(ops node:$ptr, node:$vec, node:$idx),
|
|
(store (i64 (vector_extract $vec, $idx)), $ptr)>;
|
|
// TODO: floating point lanes as well
|
|
|
|
let AddedComplexity = 1 in {
|
|
defm : StoreLanePatNoOffset<I8x16, store8_lane>;
|
|
defm : StoreLanePatNoOffset<I16x8, store16_lane>;
|
|
defm : StoreLanePatNoOffset<I32x4, store32_lane>;
|
|
defm : StoreLanePatNoOffset<I64x2, store64_lane>;
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Constructing SIMD values
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Constant: v128.const
|
|
multiclass ConstVec<Vec vec, dag ops, dag pat, string args> {
|
|
let isMoveImm = 1, isReMaterializable = 1 in
|
|
defm CONST_V128_#vec : SIMD_I<(outs V128:$dst), ops, (outs), ops,
|
|
[(set V128:$dst, (vec.vt pat))],
|
|
"v128.const\t$dst, "#args,
|
|
"v128.const\t"#args, 12>;
|
|
}
|
|
|
|
defm "" : ConstVec<I8x16,
|
|
(ins vec_i8imm_op:$i0, vec_i8imm_op:$i1,
|
|
vec_i8imm_op:$i2, vec_i8imm_op:$i3,
|
|
vec_i8imm_op:$i4, vec_i8imm_op:$i5,
|
|
vec_i8imm_op:$i6, vec_i8imm_op:$i7,
|
|
vec_i8imm_op:$i8, vec_i8imm_op:$i9,
|
|
vec_i8imm_op:$iA, vec_i8imm_op:$iB,
|
|
vec_i8imm_op:$iC, vec_i8imm_op:$iD,
|
|
vec_i8imm_op:$iE, vec_i8imm_op:$iF),
|
|
(build_vector ImmI8:$i0, ImmI8:$i1, ImmI8:$i2, ImmI8:$i3,
|
|
ImmI8:$i4, ImmI8:$i5, ImmI8:$i6, ImmI8:$i7,
|
|
ImmI8:$i8, ImmI8:$i9, ImmI8:$iA, ImmI8:$iB,
|
|
ImmI8:$iC, ImmI8:$iD, ImmI8:$iE, ImmI8:$iF),
|
|
!strconcat("$i0, $i1, $i2, $i3, $i4, $i5, $i6, $i7, ",
|
|
"$i8, $i9, $iA, $iB, $iC, $iD, $iE, $iF")>;
|
|
defm "" : ConstVec<I16x8,
|
|
(ins vec_i16imm_op:$i0, vec_i16imm_op:$i1,
|
|
vec_i16imm_op:$i2, vec_i16imm_op:$i3,
|
|
vec_i16imm_op:$i4, vec_i16imm_op:$i5,
|
|
vec_i16imm_op:$i6, vec_i16imm_op:$i7),
|
|
(build_vector
|
|
ImmI16:$i0, ImmI16:$i1, ImmI16:$i2, ImmI16:$i3,
|
|
ImmI16:$i4, ImmI16:$i5, ImmI16:$i6, ImmI16:$i7),
|
|
"$i0, $i1, $i2, $i3, $i4, $i5, $i6, $i7">;
|
|
let IsCanonical = 1 in
|
|
defm "" : ConstVec<I32x4,
|
|
(ins vec_i32imm_op:$i0, vec_i32imm_op:$i1,
|
|
vec_i32imm_op:$i2, vec_i32imm_op:$i3),
|
|
(build_vector (i32 imm:$i0), (i32 imm:$i1),
|
|
(i32 imm:$i2), (i32 imm:$i3)),
|
|
"$i0, $i1, $i2, $i3">;
|
|
defm "" : ConstVec<I64x2,
|
|
(ins vec_i64imm_op:$i0, vec_i64imm_op:$i1),
|
|
(build_vector (i64 imm:$i0), (i64 imm:$i1)),
|
|
"$i0, $i1">;
|
|
defm "" : ConstVec<F32x4,
|
|
(ins f32imm_op:$i0, f32imm_op:$i1,
|
|
f32imm_op:$i2, f32imm_op:$i3),
|
|
(build_vector (f32 fpimm:$i0), (f32 fpimm:$i1),
|
|
(f32 fpimm:$i2), (f32 fpimm:$i3)),
|
|
"$i0, $i1, $i2, $i3">;
|
|
defm "" : ConstVec<F64x2,
|
|
(ins f64imm_op:$i0, f64imm_op:$i1),
|
|
(build_vector (f64 fpimm:$i0), (f64 fpimm:$i1)),
|
|
"$i0, $i1">;
|
|
|
|
// Shuffle lanes: shuffle
|
|
defm SHUFFLE :
|
|
SIMD_I<(outs V128:$dst),
|
|
(ins V128:$x, V128:$y,
|
|
vec_i8imm_op:$m0, vec_i8imm_op:$m1,
|
|
vec_i8imm_op:$m2, vec_i8imm_op:$m3,
|
|
vec_i8imm_op:$m4, vec_i8imm_op:$m5,
|
|
vec_i8imm_op:$m6, vec_i8imm_op:$m7,
|
|
vec_i8imm_op:$m8, vec_i8imm_op:$m9,
|
|
vec_i8imm_op:$mA, vec_i8imm_op:$mB,
|
|
vec_i8imm_op:$mC, vec_i8imm_op:$mD,
|
|
vec_i8imm_op:$mE, vec_i8imm_op:$mF),
|
|
(outs),
|
|
(ins
|
|
vec_i8imm_op:$m0, vec_i8imm_op:$m1,
|
|
vec_i8imm_op:$m2, vec_i8imm_op:$m3,
|
|
vec_i8imm_op:$m4, vec_i8imm_op:$m5,
|
|
vec_i8imm_op:$m6, vec_i8imm_op:$m7,
|
|
vec_i8imm_op:$m8, vec_i8imm_op:$m9,
|
|
vec_i8imm_op:$mA, vec_i8imm_op:$mB,
|
|
vec_i8imm_op:$mC, vec_i8imm_op:$mD,
|
|
vec_i8imm_op:$mE, vec_i8imm_op:$mF),
|
|
[],
|
|
"i8x16.shuffle\t$dst, $x, $y, "#
|
|
"$m0, $m1, $m2, $m3, $m4, $m5, $m6, $m7, "#
|
|
"$m8, $m9, $mA, $mB, $mC, $mD, $mE, $mF",
|
|
"i8x16.shuffle\t"#
|
|
"$m0, $m1, $m2, $m3, $m4, $m5, $m6, $m7, "#
|
|
"$m8, $m9, $mA, $mB, $mC, $mD, $mE, $mF",
|
|
13>;
|
|
|
|
// Shuffles after custom lowering
|
|
def wasm_shuffle_t : SDTypeProfile<1, 18, []>;
|
|
def wasm_shuffle : SDNode<"WebAssemblyISD::SHUFFLE", wasm_shuffle_t>;
|
|
foreach vec = AllVecs in {
|
|
def : Pat<(vec.vt (wasm_shuffle (vec.vt V128:$x), (vec.vt V128:$y),
|
|
(i32 LaneIdx32:$m0), (i32 LaneIdx32:$m1),
|
|
(i32 LaneIdx32:$m2), (i32 LaneIdx32:$m3),
|
|
(i32 LaneIdx32:$m4), (i32 LaneIdx32:$m5),
|
|
(i32 LaneIdx32:$m6), (i32 LaneIdx32:$m7),
|
|
(i32 LaneIdx32:$m8), (i32 LaneIdx32:$m9),
|
|
(i32 LaneIdx32:$mA), (i32 LaneIdx32:$mB),
|
|
(i32 LaneIdx32:$mC), (i32 LaneIdx32:$mD),
|
|
(i32 LaneIdx32:$mE), (i32 LaneIdx32:$mF))),
|
|
(SHUFFLE $x, $y,
|
|
imm:$m0, imm:$m1, imm:$m2, imm:$m3,
|
|
imm:$m4, imm:$m5, imm:$m6, imm:$m7,
|
|
imm:$m8, imm:$m9, imm:$mA, imm:$mB,
|
|
imm:$mC, imm:$mD, imm:$mE, imm:$mF)>;
|
|
}
|
|
|
|
// Swizzle lanes: i8x16.swizzle
|
|
def wasm_swizzle_t : SDTypeProfile<1, 2, []>;
|
|
def wasm_swizzle : SDNode<"WebAssemblyISD::SWIZZLE", wasm_swizzle_t>;
|
|
defm SWIZZLE :
|
|
SIMD_I<(outs V128:$dst), (ins V128:$src, V128:$mask), (outs), (ins),
|
|
[(set (v16i8 V128:$dst),
|
|
(wasm_swizzle (v16i8 V128:$src), (v16i8 V128:$mask)))],
|
|
"i8x16.swizzle\t$dst, $src, $mask", "i8x16.swizzle", 14>;
|
|
|
|
def : Pat<(int_wasm_swizzle (v16i8 V128:$src), (v16i8 V128:$mask)),
|
|
(SWIZZLE $src, $mask)>;
|
|
|
|
multiclass Splat<Vec vec, bits<32> simdop> {
|
|
defm SPLAT_#vec : SIMD_I<(outs V128:$dst), (ins vec.lane_rc:$x),
|
|
(outs), (ins),
|
|
[(set (vec.vt V128:$dst),
|
|
(vec.splat vec.lane_rc:$x))],
|
|
vec.prefix#".splat\t$dst, $x", vec.prefix#".splat",
|
|
simdop>;
|
|
}
|
|
|
|
defm "" : Splat<I8x16, 15>;
|
|
defm "" : Splat<I16x8, 16>;
|
|
defm "" : Splat<I32x4, 17>;
|
|
defm "" : Splat<I64x2, 18>;
|
|
defm "" : Splat<F32x4, 19>;
|
|
defm "" : Splat<F64x2, 20>;
|
|
|
|
// scalar_to_vector leaves high lanes undefined, so can be a splat
|
|
foreach vec = AllVecs in
|
|
def : Pat<(vec.vt (scalar_to_vector (vec.lane_vt vec.lane_rc:$x))),
|
|
(!cast<Instruction>("SPLAT_"#vec) $x)>;
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Accessing lanes
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Extract lane as a scalar: extract_lane / extract_lane_s / extract_lane_u
|
|
multiclass ExtractLane<Vec vec, bits<32> simdop, string suffix = ""> {
|
|
defm EXTRACT_LANE_#vec#suffix :
|
|
SIMD_I<(outs vec.lane_rc:$dst), (ins V128:$vec, vec_i8imm_op:$idx),
|
|
(outs), (ins vec_i8imm_op:$idx), [],
|
|
vec.prefix#".extract_lane"#suffix#"\t$dst, $vec, $idx",
|
|
vec.prefix#".extract_lane"#suffix#"\t$idx", simdop>;
|
|
}
|
|
|
|
defm "" : ExtractLane<I8x16, 21, "_s">;
|
|
defm "" : ExtractLane<I8x16, 22, "_u">;
|
|
defm "" : ExtractLane<I16x8, 24, "_s">;
|
|
defm "" : ExtractLane<I16x8, 25, "_u">;
|
|
defm "" : ExtractLane<I32x4, 27>;
|
|
defm "" : ExtractLane<I64x2, 29>;
|
|
defm "" : ExtractLane<F32x4, 31>;
|
|
defm "" : ExtractLane<F64x2, 33>;
|
|
|
|
def : Pat<(vector_extract (v16i8 V128:$vec), (i32 LaneIdx16:$idx)),
|
|
(EXTRACT_LANE_I8x16_u $vec, imm:$idx)>;
|
|
def : Pat<(vector_extract (v8i16 V128:$vec), (i32 LaneIdx8:$idx)),
|
|
(EXTRACT_LANE_I16x8_u $vec, imm:$idx)>;
|
|
def : Pat<(vector_extract (v4i32 V128:$vec), (i32 LaneIdx4:$idx)),
|
|
(EXTRACT_LANE_I32x4 $vec, imm:$idx)>;
|
|
def : Pat<(vector_extract (v4f32 V128:$vec), (i32 LaneIdx4:$idx)),
|
|
(EXTRACT_LANE_F32x4 $vec, imm:$idx)>;
|
|
def : Pat<(vector_extract (v2i64 V128:$vec), (i32 LaneIdx2:$idx)),
|
|
(EXTRACT_LANE_I64x2 $vec, imm:$idx)>;
|
|
def : Pat<(vector_extract (v2f64 V128:$vec), (i32 LaneIdx2:$idx)),
|
|
(EXTRACT_LANE_F64x2 $vec, imm:$idx)>;
|
|
|
|
def : Pat<
|
|
(sext_inreg (vector_extract (v16i8 V128:$vec), (i32 LaneIdx16:$idx)), i8),
|
|
(EXTRACT_LANE_I8x16_s $vec, imm:$idx)>;
|
|
def : Pat<
|
|
(and (vector_extract (v16i8 V128:$vec), (i32 LaneIdx16:$idx)), (i32 0xff)),
|
|
(EXTRACT_LANE_I8x16_u $vec, imm:$idx)>;
|
|
def : Pat<
|
|
(sext_inreg (vector_extract (v8i16 V128:$vec), (i32 LaneIdx8:$idx)), i16),
|
|
(EXTRACT_LANE_I16x8_s $vec, imm:$idx)>;
|
|
def : Pat<
|
|
(and (vector_extract (v8i16 V128:$vec), (i32 LaneIdx8:$idx)), (i32 0xffff)),
|
|
(EXTRACT_LANE_I16x8_u $vec, imm:$idx)>;
|
|
|
|
// Replace lane value: replace_lane
|
|
multiclass ReplaceLane<Vec vec, bits<32> simdop> {
|
|
defm REPLACE_LANE_#vec :
|
|
SIMD_I<(outs V128:$dst), (ins V128:$vec, vec_i8imm_op:$idx, vec.lane_rc:$x),
|
|
(outs), (ins vec_i8imm_op:$idx),
|
|
[(set V128:$dst, (vector_insert
|
|
(vec.vt V128:$vec),
|
|
(vec.lane_vt vec.lane_rc:$x),
|
|
(i32 vec.lane_idx:$idx)))],
|
|
vec.prefix#".replace_lane\t$dst, $vec, $idx, $x",
|
|
vec.prefix#".replace_lane\t$idx", simdop>;
|
|
}
|
|
|
|
defm "" : ReplaceLane<I8x16, 23>;
|
|
defm "" : ReplaceLane<I16x8, 26>;
|
|
defm "" : ReplaceLane<I32x4, 28>;
|
|
defm "" : ReplaceLane<I64x2, 30>;
|
|
defm "" : ReplaceLane<F32x4, 32>;
|
|
defm "" : ReplaceLane<F64x2, 34>;
|
|
|
|
// Lower undef lane indices to zero
|
|
def : Pat<(vector_insert (v16i8 V128:$vec), I32:$x, undef),
|
|
(REPLACE_LANE_I8x16 $vec, 0, $x)>;
|
|
def : Pat<(vector_insert (v8i16 V128:$vec), I32:$x, undef),
|
|
(REPLACE_LANE_I16x8 $vec, 0, $x)>;
|
|
def : Pat<(vector_insert (v4i32 V128:$vec), I32:$x, undef),
|
|
(REPLACE_LANE_I32x4 $vec, 0, $x)>;
|
|
def : Pat<(vector_insert (v2i64 V128:$vec), I64:$x, undef),
|
|
(REPLACE_LANE_I64x2 $vec, 0, $x)>;
|
|
def : Pat<(vector_insert (v4f32 V128:$vec), F32:$x, undef),
|
|
(REPLACE_LANE_F32x4 $vec, 0, $x)>;
|
|
def : Pat<(vector_insert (v2f64 V128:$vec), F64:$x, undef),
|
|
(REPLACE_LANE_F64x2 $vec, 0, $x)>;
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Comparisons
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
multiclass SIMDCondition<Vec vec, string name, CondCode cond, bits<32> simdop> {
|
|
defm _#vec :
|
|
SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), (outs), (ins),
|
|
[(set (vec.int_vt V128:$dst),
|
|
(setcc (vec.vt V128:$lhs), (vec.vt V128:$rhs), cond))],
|
|
vec.prefix#"."#name#"\t$dst, $lhs, $rhs",
|
|
vec.prefix#"."#name, simdop>;
|
|
}
|
|
|
|
multiclass SIMDConditionInt<string name, CondCode cond, bits<32> baseInst> {
|
|
defm "" : SIMDCondition<I8x16, name, cond, baseInst>;
|
|
defm "" : SIMDCondition<I16x8, name, cond, !add(baseInst, 10)>;
|
|
defm "" : SIMDCondition<I32x4, name, cond, !add(baseInst, 20)>;
|
|
}
|
|
|
|
multiclass SIMDConditionFP<string name, CondCode cond, bits<32> baseInst> {
|
|
defm "" : SIMDCondition<F32x4, name, cond, baseInst>;
|
|
defm "" : SIMDCondition<F64x2, name, cond, !add(baseInst, 6)>;
|
|
}
|
|
|
|
// Equality: eq
|
|
let isCommutable = 1 in {
|
|
defm EQ : SIMDConditionInt<"eq", SETEQ, 35>;
|
|
defm EQ : SIMDCondition<I64x2, "eq", SETEQ, 214>;
|
|
defm EQ : SIMDConditionFP<"eq", SETOEQ, 65>;
|
|
} // isCommutable = 1
|
|
|
|
// Non-equality: ne
|
|
let isCommutable = 1 in {
|
|
defm NE : SIMDConditionInt<"ne", SETNE, 36>;
|
|
defm NE : SIMDCondition<I64x2, "ne", SETNE, 215>;
|
|
defm NE : SIMDConditionFP<"ne", SETUNE, 66>;
|
|
} // isCommutable = 1
|
|
|
|
// Less than: lt_s / lt_u / lt
|
|
defm LT_S : SIMDConditionInt<"lt_s", SETLT, 37>;
|
|
defm LT_S : SIMDCondition<I64x2, "lt_s", SETLT, 216>;
|
|
defm LT_U : SIMDConditionInt<"lt_u", SETULT, 38>;
|
|
defm LT : SIMDConditionFP<"lt", SETOLT, 67>;
|
|
|
|
// Greater than: gt_s / gt_u / gt
|
|
defm GT_S : SIMDConditionInt<"gt_s", SETGT, 39>;
|
|
defm GT_S : SIMDCondition<I64x2, "gt_s", SETGT, 217>;
|
|
defm GT_U : SIMDConditionInt<"gt_u", SETUGT, 40>;
|
|
defm GT : SIMDConditionFP<"gt", SETOGT, 68>;
|
|
|
|
// Less than or equal: le_s / le_u / le
|
|
defm LE_S : SIMDConditionInt<"le_s", SETLE, 41>;
|
|
defm LE_S : SIMDCondition<I64x2, "le_s", SETLE, 218>;
|
|
defm LE_U : SIMDConditionInt<"le_u", SETULE, 42>;
|
|
defm LE : SIMDConditionFP<"le", SETOLE, 69>;
|
|
|
|
// Greater than or equal: ge_s / ge_u / ge
|
|
defm GE_S : SIMDConditionInt<"ge_s", SETGE, 43>;
|
|
defm GE_S : SIMDCondition<I64x2, "ge_s", SETGE, 219>;
|
|
defm GE_U : SIMDConditionInt<"ge_u", SETUGE, 44>;
|
|
defm GE : SIMDConditionFP<"ge", SETOGE, 70>;
|
|
|
|
// Lower float comparisons that don't care about NaN to standard WebAssembly
|
|
// float comparisons. These instructions are generated with nnan and in the
|
|
// target-independent expansion of unordered comparisons and ordered ne.
|
|
foreach nodes = [[seteq, EQ_F32x4], [setne, NE_F32x4], [setlt, LT_F32x4],
|
|
[setgt, GT_F32x4], [setle, LE_F32x4], [setge, GE_F32x4]] in
|
|
def : Pat<(v4i32 (nodes[0] (v4f32 V128:$lhs), (v4f32 V128:$rhs))),
|
|
(nodes[1] $lhs, $rhs)>;
|
|
|
|
foreach nodes = [[seteq, EQ_F64x2], [setne, NE_F64x2], [setlt, LT_F64x2],
|
|
[setgt, GT_F64x2], [setle, LE_F64x2], [setge, GE_F64x2]] in
|
|
def : Pat<(v2i64 (nodes[0] (v2f64 V128:$lhs), (v2f64 V128:$rhs))),
|
|
(nodes[1] $lhs, $rhs)>;
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Bitwise operations
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
multiclass SIMDBinary<Vec vec, SDPatternOperator node, string name, bits<32> simdop> {
|
|
defm _#vec : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs),
|
|
(outs), (ins),
|
|
[(set (vec.vt V128:$dst),
|
|
(node (vec.vt V128:$lhs), (vec.vt V128:$rhs)))],
|
|
vec.prefix#"."#name#"\t$dst, $lhs, $rhs",
|
|
vec.prefix#"."#name, simdop>;
|
|
}
|
|
|
|
multiclass SIMDBitwise<SDPatternOperator node, string name, bits<32> simdop,
|
|
bit commutable = false> {
|
|
let isCommutable = commutable in
|
|
defm "" : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs),
|
|
(outs), (ins), [],
|
|
"v128."#name#"\t$dst, $lhs, $rhs", "v128."#name, simdop>;
|
|
foreach vec = IntVecs in
|
|
def : Pat<(node (vec.vt V128:$lhs), (vec.vt V128:$rhs)),
|
|
(!cast<NI>(NAME) $lhs, $rhs)>;
|
|
}
|
|
|
|
multiclass SIMDUnary<Vec vec, SDPatternOperator node, string name, bits<32> simdop> {
|
|
defm _#vec : SIMD_I<(outs V128:$dst), (ins V128:$v), (outs), (ins),
|
|
[(set (vec.vt V128:$dst),
|
|
(vec.vt (node (vec.vt V128:$v))))],
|
|
vec.prefix#"."#name#"\t$dst, $v",
|
|
vec.prefix#"."#name, simdop>;
|
|
}
|
|
|
|
// Bitwise logic: v128.not
|
|
defm NOT : SIMD_I<(outs V128:$dst), (ins V128:$v), (outs), (ins), [],
|
|
"v128.not\t$dst, $v", "v128.not", 77>;
|
|
foreach vec = IntVecs in
|
|
def : Pat<(vnot (vec.vt V128:$v)), (NOT $v)>;
|
|
|
|
// Bitwise logic: v128.and / v128.or / v128.xor
|
|
defm AND : SIMDBitwise<and, "and", 78, true>;
|
|
defm OR : SIMDBitwise<or, "or", 80, true>;
|
|
defm XOR : SIMDBitwise<xor, "xor", 81, true>;
|
|
|
|
// Bitwise logic: v128.andnot
|
|
def andnot : PatFrag<(ops node:$left, node:$right), (and $left, (vnot $right))>;
|
|
defm ANDNOT : SIMDBitwise<andnot, "andnot", 79>;
|
|
|
|
// Bitwise select: v128.bitselect
|
|
defm BITSELECT :
|
|
SIMD_I<(outs V128:$dst), (ins V128:$v1, V128:$v2, V128:$c), (outs), (ins), [],
|
|
"v128.bitselect\t$dst, $v1, $v2, $c", "v128.bitselect", 82>;
|
|
|
|
foreach vec = AllVecs in
|
|
def : Pat<(vec.vt (int_wasm_bitselect
|
|
(vec.vt V128:$v1), (vec.vt V128:$v2), (vec.vt V128:$c))),
|
|
(BITSELECT $v1, $v2, $c)>;
|
|
|
|
// Bitselect is equivalent to (c & v1) | (~c & v2)
|
|
foreach vec = IntVecs in
|
|
def : Pat<(vec.vt (or (and (vec.vt V128:$c), (vec.vt V128:$v1)),
|
|
(and (vnot V128:$c), (vec.vt V128:$v2)))),
|
|
(BITSELECT $v1, $v2, $c)>;
|
|
|
|
// Also implement vselect in terms of bitselect
|
|
foreach vec = AllVecs in
|
|
def : Pat<(vec.vt (vselect
|
|
(vec.int_vt V128:$c), (vec.vt V128:$v1), (vec.vt V128:$v2))),
|
|
(BITSELECT $v1, $v2, $c)>;
|
|
|
|
// MVP select on v128 values
|
|
defm SELECT_V128 :
|
|
I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs, I32:$cond), (outs), (ins), [],
|
|
"v128.select\t$dst, $lhs, $rhs, $cond", "v128.select", 0x1b>;
|
|
|
|
foreach vec = AllVecs in {
|
|
def : Pat<(select I32:$cond, (vec.vt V128:$lhs), (vec.vt V128:$rhs)),
|
|
(SELECT_V128 $lhs, $rhs, $cond)>;
|
|
|
|
// ISD::SELECT requires its operand to conform to getBooleanContents, but
|
|
// WebAssembly's select interprets any non-zero value as true, so we can fold
|
|
// a setne with 0 into a select.
|
|
def : Pat<(select
|
|
(i32 (setne I32:$cond, 0)), (vec.vt V128:$lhs), (vec.vt V128:$rhs)),
|
|
(SELECT_V128 $lhs, $rhs, $cond)>;
|
|
|
|
// And again, this time with seteq instead of setne and the arms reversed.
|
|
def : Pat<(select
|
|
(i32 (seteq I32:$cond, 0)), (vec.vt V128:$lhs), (vec.vt V128:$rhs)),
|
|
(SELECT_V128 $rhs, $lhs, $cond)>;
|
|
} // foreach vec
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Integer unary arithmetic
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
multiclass SIMDUnaryInt<SDPatternOperator node, string name, bits<32> baseInst> {
|
|
defm "" : SIMDUnary<I8x16, node, name, baseInst>;
|
|
defm "" : SIMDUnary<I16x8, node, name, !add(baseInst, 32)>;
|
|
defm "" : SIMDUnary<I32x4, node, name, !add(baseInst, 64)>;
|
|
defm "" : SIMDUnary<I64x2, node, name, !add(baseInst, 96)>;
|
|
}
|
|
|
|
// Integer vector negation
|
|
def ivneg : PatFrag<(ops node:$in), (sub immAllZerosV, $in)>;
|
|
|
|
// Integer absolute value: abs
|
|
defm ABS : SIMDUnaryInt<abs, "abs", 96>;
|
|
|
|
// Integer negation: neg
|
|
defm NEG : SIMDUnaryInt<ivneg, "neg", 97>;
|
|
|
|
// Population count: popcnt
|
|
defm POPCNT : SIMDUnary<I8x16, ctpop, "popcnt", 0x62>;
|
|
|
|
// Any lane true: any_true
|
|
defm ANYTRUE : SIMD_I<(outs I32:$dst), (ins V128:$vec), (outs), (ins), [],
|
|
"v128.any_true\t$dst, $vec", "v128.any_true", 0x53>;
|
|
|
|
foreach vec = IntVecs in
|
|
def : Pat<(int_wasm_anytrue (vec.vt V128:$vec)), (ANYTRUE V128:$vec)>;
|
|
|
|
// All lanes true: all_true
|
|
multiclass SIMDAllTrue<Vec vec, bits<32> simdop> {
|
|
defm ALLTRUE_#vec : SIMD_I<(outs I32:$dst), (ins V128:$vec), (outs), (ins),
|
|
[(set I32:$dst,
|
|
(i32 (int_wasm_alltrue (vec.vt V128:$vec))))],
|
|
vec.prefix#".all_true\t$dst, $vec",
|
|
vec.prefix#".all_true", simdop>;
|
|
}
|
|
|
|
defm "" : SIMDAllTrue<I8x16, 0x63>;
|
|
defm "" : SIMDAllTrue<I16x8, 0x83>;
|
|
defm "" : SIMDAllTrue<I32x4, 0xa3>;
|
|
defm "" : SIMDAllTrue<I64x2, 0xc3>;
|
|
|
|
// Reductions already return 0 or 1, so and 1, setne 0, and seteq 1
|
|
// can be folded out
|
|
foreach reduction =
|
|
[["int_wasm_anytrue", "ANYTRUE", "I8x16"],
|
|
["int_wasm_anytrue", "ANYTRUE", "I16x8"],
|
|
["int_wasm_anytrue", "ANYTRUE", "I32x4"],
|
|
["int_wasm_anytrue", "ANYTRUE", "I64x2"],
|
|
["int_wasm_alltrue", "ALLTRUE_I8x16", "I8x16"],
|
|
["int_wasm_alltrue", "ALLTRUE_I16x8", "I16x8"],
|
|
["int_wasm_alltrue", "ALLTRUE_I32x4", "I32x4"],
|
|
["int_wasm_alltrue", "ALLTRUE_I64x2", "I64x2"]] in {
|
|
defvar intrinsic = !cast<Intrinsic>(reduction[0]);
|
|
defvar inst = !cast<NI>(reduction[1]);
|
|
defvar vec = !cast<Vec>(reduction[2]);
|
|
def : Pat<(i32 (and (i32 (intrinsic (vec.vt V128:$x))), (i32 1))), (inst $x)>;
|
|
def : Pat<(i32 (setne (i32 (intrinsic (vec.vt V128:$x))), (i32 0))), (inst $x)>;
|
|
def : Pat<(i32 (seteq (i32 (intrinsic (vec.vt V128:$x))), (i32 1))), (inst $x)>;
|
|
}
|
|
|
|
multiclass SIMDBitmask<Vec vec, bits<32> simdop> {
|
|
defm _#vec : SIMD_I<(outs I32:$dst), (ins V128:$vec), (outs), (ins),
|
|
[(set I32:$dst,
|
|
(i32 (int_wasm_bitmask (vec.vt V128:$vec))))],
|
|
vec.prefix#".bitmask\t$dst, $vec", vec.prefix#".bitmask",
|
|
simdop>;
|
|
}
|
|
|
|
defm BITMASK : SIMDBitmask<I8x16, 100>;
|
|
defm BITMASK : SIMDBitmask<I16x8, 132>;
|
|
defm BITMASK : SIMDBitmask<I32x4, 164>;
|
|
defm BITMASK : SIMDBitmask<I64x2, 196>;
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Bit shifts
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
multiclass SIMDShift<Vec vec, SDNode node, string name, bits<32> simdop> {
|
|
defm _#vec : SIMD_I<(outs V128:$dst), (ins V128:$vec, I32:$x), (outs), (ins),
|
|
[(set (vec.vt V128:$dst), (node V128:$vec, I32:$x))],
|
|
vec.prefix#"."#name#"\t$dst, $vec, $x",
|
|
vec.prefix#"."#name, simdop>;
|
|
}
|
|
|
|
multiclass SIMDShiftInt<SDNode node, string name, bits<32> baseInst> {
|
|
defm "" : SIMDShift<I8x16, node, name, baseInst>;
|
|
defm "" : SIMDShift<I16x8, node, name, !add(baseInst, 32)>;
|
|
defm "" : SIMDShift<I32x4, node, name, !add(baseInst, 64)>;
|
|
defm "" : SIMDShift<I64x2, node, name, !add(baseInst, 96)>;
|
|
}
|
|
|
|
// WebAssembly SIMD shifts are nonstandard in that the shift amount is
|
|
// an i32 rather than a vector, so they need custom nodes.
|
|
def wasm_shift_t :
|
|
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVT<2, i32>]>;
|
|
def wasm_shl : SDNode<"WebAssemblyISD::VEC_SHL", wasm_shift_t>;
|
|
def wasm_shr_s : SDNode<"WebAssemblyISD::VEC_SHR_S", wasm_shift_t>;
|
|
def wasm_shr_u : SDNode<"WebAssemblyISD::VEC_SHR_U", wasm_shift_t>;
|
|
|
|
// Left shift by scalar: shl
|
|
defm SHL : SIMDShiftInt<wasm_shl, "shl", 107>;
|
|
|
|
// Right shift by scalar: shr_s / shr_u
|
|
defm SHR_S : SIMDShiftInt<wasm_shr_s, "shr_s", 108>;
|
|
defm SHR_U : SIMDShiftInt<wasm_shr_u, "shr_u", 109>;
|
|
|
|
// Optimize away an explicit mask on a shift count.
|
|
def : Pat<(wasm_shl (v16i8 V128:$lhs), (and I32:$rhs, 7)),
|
|
(SHL_I8x16 V128:$lhs, I32:$rhs)>;
|
|
def : Pat<(wasm_shr_s (v16i8 V128:$lhs), (and I32:$rhs, 7)),
|
|
(SHR_S_I8x16 V128:$lhs, I32:$rhs)>;
|
|
def : Pat<(wasm_shr_u (v16i8 V128:$lhs), (and I32:$rhs, 7)),
|
|
(SHR_U_I8x16 V128:$lhs, I32:$rhs)>;
|
|
|
|
def : Pat<(wasm_shl (v8i16 V128:$lhs), (and I32:$rhs, 15)),
|
|
(SHL_I16x8 V128:$lhs, I32:$rhs)>;
|
|
def : Pat<(wasm_shr_s (v8i16 V128:$lhs), (and I32:$rhs, 15)),
|
|
(SHR_S_I16x8 V128:$lhs, I32:$rhs)>;
|
|
def : Pat<(wasm_shr_u (v8i16 V128:$lhs), (and I32:$rhs, 15)),
|
|
(SHR_U_I16x8 V128:$lhs, I32:$rhs)>;
|
|
|
|
def : Pat<(wasm_shl (v4i32 V128:$lhs), (and I32:$rhs, 31)),
|
|
(SHL_I32x4 V128:$lhs, I32:$rhs)>;
|
|
def : Pat<(wasm_shr_s (v4i32 V128:$lhs), (and I32:$rhs, 31)),
|
|
(SHR_S_I32x4 V128:$lhs, I32:$rhs)>;
|
|
def : Pat<(wasm_shr_u (v4i32 V128:$lhs), (and I32:$rhs, 31)),
|
|
(SHR_U_I32x4 V128:$lhs, I32:$rhs)>;
|
|
|
|
def : Pat<(wasm_shl (v2i64 V128:$lhs), (trunc (and I64:$rhs, 63))),
|
|
(SHL_I64x2 V128:$lhs, (I32_WRAP_I64 I64:$rhs))>;
|
|
def : Pat<(wasm_shr_s (v2i64 V128:$lhs), (trunc (and I64:$rhs, 63))),
|
|
(SHR_S_I64x2 V128:$lhs, (I32_WRAP_I64 I64:$rhs))>;
|
|
def : Pat<(wasm_shr_u (v2i64 V128:$lhs), (trunc (and I64:$rhs, 63))),
|
|
(SHR_U_I64x2 V128:$lhs, (I32_WRAP_I64 I64:$rhs))>;
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Integer binary arithmetic
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
multiclass SIMDBinaryIntNoI8x16<SDPatternOperator node, string name, bits<32> baseInst> {
|
|
defm "" : SIMDBinary<I16x8, node, name, !add(baseInst, 32)>;
|
|
defm "" : SIMDBinary<I32x4, node, name, !add(baseInst, 64)>;
|
|
defm "" : SIMDBinary<I64x2, node, name, !add(baseInst, 96)>;
|
|
}
|
|
|
|
multiclass SIMDBinaryIntSmall<SDPatternOperator node, string name, bits<32> baseInst> {
|
|
defm "" : SIMDBinary<I8x16, node, name, baseInst>;
|
|
defm "" : SIMDBinary<I16x8, node, name, !add(baseInst, 32)>;
|
|
}
|
|
|
|
multiclass SIMDBinaryIntNoI64x2<SDPatternOperator node, string name, bits<32> baseInst> {
|
|
defm "" : SIMDBinaryIntSmall<node, name, baseInst>;
|
|
defm "" : SIMDBinary<I32x4, node, name, !add(baseInst, 64)>;
|
|
}
|
|
|
|
multiclass SIMDBinaryInt<SDPatternOperator node, string name, bits<32> baseInst> {
|
|
defm "" : SIMDBinaryIntNoI64x2<node, name, baseInst>;
|
|
defm "" : SIMDBinary<I64x2, node, name, !add(baseInst, 96)>;
|
|
}
|
|
|
|
// Integer addition: add / add_sat_s / add_sat_u
|
|
let isCommutable = 1 in {
|
|
defm ADD : SIMDBinaryInt<add, "add", 110>;
|
|
defm ADD_SAT_S : SIMDBinaryIntSmall<saddsat, "add_sat_s", 111>;
|
|
defm ADD_SAT_U : SIMDBinaryIntSmall<uaddsat, "add_sat_u", 112>;
|
|
} // isCommutable = 1
|
|
|
|
// Integer subtraction: sub / sub_sat_s / sub_sat_u
|
|
defm SUB : SIMDBinaryInt<sub, "sub", 113>;
|
|
defm SUB_SAT_S :
|
|
SIMDBinaryIntSmall<int_wasm_sub_sat_signed, "sub_sat_s", 114>;
|
|
defm SUB_SAT_U :
|
|
SIMDBinaryIntSmall<int_wasm_sub_sat_unsigned, "sub_sat_u", 115>;
|
|
|
|
// Integer multiplication: mul
|
|
let isCommutable = 1 in
|
|
defm MUL : SIMDBinaryIntNoI8x16<mul, "mul", 117>;
|
|
|
|
// Integer min_s / min_u / max_s / max_u
|
|
let isCommutable = 1 in {
|
|
defm MIN_S : SIMDBinaryIntNoI64x2<smin, "min_s", 118>;
|
|
defm MIN_U : SIMDBinaryIntNoI64x2<umin, "min_u", 119>;
|
|
defm MAX_S : SIMDBinaryIntNoI64x2<smax, "max_s", 120>;
|
|
defm MAX_U : SIMDBinaryIntNoI64x2<umax, "max_u", 121>;
|
|
} // isCommutable = 1
|
|
|
|
// Integer unsigned rounding average: avgr_u
|
|
let isCommutable = 1 in {
|
|
defm AVGR_U : SIMDBinary<I8x16, int_wasm_avgr_unsigned, "avgr_u", 123>;
|
|
defm AVGR_U : SIMDBinary<I16x8, int_wasm_avgr_unsigned, "avgr_u", 155>;
|
|
}
|
|
|
|
def add_nuw : PatFrag<(ops node:$lhs, node:$rhs), (add $lhs, $rhs),
|
|
"return N->getFlags().hasNoUnsignedWrap();">;
|
|
|
|
foreach vec = [I8x16, I16x8] in {
|
|
defvar inst = !cast<NI>("AVGR_U_"#vec);
|
|
def : Pat<(wasm_shr_u
|
|
(add_nuw
|
|
(add_nuw (vec.vt V128:$lhs), (vec.vt V128:$rhs)),
|
|
(vec.splat (i32 1))),
|
|
(i32 1)),
|
|
(inst $lhs, $rhs)>;
|
|
}
|
|
|
|
// Widening dot product: i32x4.dot_i16x8_s
|
|
let isCommutable = 1 in
|
|
defm DOT : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), (outs), (ins),
|
|
[(set V128:$dst, (int_wasm_dot V128:$lhs, V128:$rhs))],
|
|
"i32x4.dot_i16x8_s\t$dst, $lhs, $rhs", "i32x4.dot_i16x8_s",
|
|
186>;
|
|
|
|
// Extending multiplication: extmul_{low,high}_P, extmul_high
|
|
def extend_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
|
|
def extend_low_s : SDNode<"WebAssemblyISD::EXTEND_LOW_S", extend_t>;
|
|
def extend_high_s : SDNode<"WebAssemblyISD::EXTEND_HIGH_S", extend_t>;
|
|
def extend_low_u : SDNode<"WebAssemblyISD::EXTEND_LOW_U", extend_t>;
|
|
def extend_high_u : SDNode<"WebAssemblyISD::EXTEND_HIGH_U", extend_t>;
|
|
|
|
multiclass SIMDExtBinary<Vec vec, SDPatternOperator node, string name,
|
|
bits<32> simdop> {
|
|
defm _#vec : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs),
|
|
(outs), (ins),
|
|
[(set (vec.vt V128:$dst), (node
|
|
(vec.split.vt V128:$lhs),(vec.split.vt V128:$rhs)))],
|
|
vec.prefix#"."#name#"\t$dst, $lhs, $rhs",
|
|
vec.prefix#"."#name, simdop>;
|
|
}
|
|
|
|
class ExtMulPat<SDNode extend> :
|
|
PatFrag<(ops node:$lhs, node:$rhs),
|
|
(mul (extend $lhs), (extend $rhs))> {}
|
|
|
|
def extmul_low_s : ExtMulPat<extend_low_s>;
|
|
def extmul_high_s : ExtMulPat<extend_high_s>;
|
|
def extmul_low_u : ExtMulPat<extend_low_u>;
|
|
def extmul_high_u : ExtMulPat<extend_high_u>;
|
|
|
|
defm EXTMUL_LOW_S :
|
|
SIMDExtBinary<I16x8, extmul_low_s, "extmul_low_i8x16_s", 0x9c>;
|
|
defm EXTMUL_HIGH_S :
|
|
SIMDExtBinary<I16x8, extmul_high_s, "extmul_high_i8x16_s", 0x9d>;
|
|
defm EXTMUL_LOW_U :
|
|
SIMDExtBinary<I16x8, extmul_low_u, "extmul_low_i8x16_u", 0x9e>;
|
|
defm EXTMUL_HIGH_U :
|
|
SIMDExtBinary<I16x8, extmul_high_u, "extmul_high_i8x16_u", 0x9f>;
|
|
|
|
defm EXTMUL_LOW_S :
|
|
SIMDExtBinary<I32x4, extmul_low_s, "extmul_low_i16x8_s", 0xbc>;
|
|
defm EXTMUL_HIGH_S :
|
|
SIMDExtBinary<I32x4, extmul_high_s, "extmul_high_i16x8_s", 0xbd>;
|
|
defm EXTMUL_LOW_U :
|
|
SIMDExtBinary<I32x4, extmul_low_u, "extmul_low_i16x8_u", 0xbe>;
|
|
defm EXTMUL_HIGH_U :
|
|
SIMDExtBinary<I32x4, extmul_high_u, "extmul_high_i16x8_u", 0xbf>;
|
|
|
|
defm EXTMUL_LOW_S :
|
|
SIMDExtBinary<I64x2, extmul_low_s, "extmul_low_i32x4_s", 0xdc>;
|
|
defm EXTMUL_HIGH_S :
|
|
SIMDExtBinary<I64x2, extmul_high_s, "extmul_high_i32x4_s", 0xdd>;
|
|
defm EXTMUL_LOW_U :
|
|
SIMDExtBinary<I64x2, extmul_low_u, "extmul_low_i32x4_u", 0xde>;
|
|
defm EXTMUL_HIGH_U :
|
|
SIMDExtBinary<I64x2, extmul_high_u, "extmul_high_i32x4_u", 0xdf>;
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Floating-point unary arithmetic
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
multiclass SIMDUnaryFP<SDNode node, string name, bits<32> baseInst> {
|
|
defm "" : SIMDUnary<F32x4, node, name, baseInst>;
|
|
defm "" : SIMDUnary<F64x2, node, name, !add(baseInst, 12)>;
|
|
}
|
|
|
|
// Absolute value: abs
|
|
defm ABS : SIMDUnaryFP<fabs, "abs", 224>;
|
|
|
|
// Negation: neg
|
|
defm NEG : SIMDUnaryFP<fneg, "neg", 225>;
|
|
|
|
// Square root: sqrt
|
|
defm SQRT : SIMDUnaryFP<fsqrt, "sqrt", 227>;
|
|
|
|
// Rounding: ceil, floor, trunc, nearest
|
|
defm CEIL : SIMDUnary<F32x4, fceil, "ceil", 0x67>;
|
|
defm FLOOR : SIMDUnary<F32x4, ffloor, "floor", 0x68>;
|
|
defm TRUNC: SIMDUnary<F32x4, ftrunc, "trunc", 0x69>;
|
|
defm NEAREST: SIMDUnary<F32x4, fnearbyint, "nearest", 0x6a>;
|
|
defm CEIL : SIMDUnary<F64x2, fceil, "ceil", 0x74>;
|
|
defm FLOOR : SIMDUnary<F64x2, ffloor, "floor", 0x75>;
|
|
defm TRUNC: SIMDUnary<F64x2, ftrunc, "trunc", 0x7a>;
|
|
defm NEAREST: SIMDUnary<F64x2, fnearbyint, "nearest", 0x94>;
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Floating-point binary arithmetic
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
multiclass SIMDBinaryFP<SDPatternOperator node, string name, bits<32> baseInst> {
|
|
defm "" : SIMDBinary<F32x4, node, name, baseInst>;
|
|
defm "" : SIMDBinary<F64x2, node, name, !add(baseInst, 12)>;
|
|
}
|
|
|
|
// Addition: add
|
|
let isCommutable = 1 in
|
|
defm ADD : SIMDBinaryFP<fadd, "add", 228>;
|
|
|
|
// Subtraction: sub
|
|
defm SUB : SIMDBinaryFP<fsub, "sub", 229>;
|
|
|
|
// Multiplication: mul
|
|
let isCommutable = 1 in
|
|
defm MUL : SIMDBinaryFP<fmul, "mul", 230>;
|
|
|
|
// Division: div
|
|
defm DIV : SIMDBinaryFP<fdiv, "div", 231>;
|
|
|
|
// NaN-propagating minimum: min
|
|
defm MIN : SIMDBinaryFP<fminimum, "min", 232>;
|
|
|
|
// NaN-propagating maximum: max
|
|
defm MAX : SIMDBinaryFP<fmaximum, "max", 233>;
|
|
|
|
// Pseudo-minimum: pmin
|
|
def pmin : PatFrag<(ops node:$lhs, node:$rhs),
|
|
(vselect (setolt $rhs, $lhs), $rhs, $lhs)>;
|
|
defm PMIN : SIMDBinaryFP<pmin, "pmin", 234>;
|
|
|
|
// Pseudo-maximum: pmax
|
|
def pmax : PatFrag<(ops node:$lhs, node:$rhs),
|
|
(vselect (setolt $lhs, $rhs), $rhs, $lhs)>;
|
|
defm PMAX : SIMDBinaryFP<pmax, "pmax", 235>;
|
|
|
|
// Also match the pmin/pmax cases where the operands are int vectors (but the
|
|
// comparison is still a floating point comparison). This can happen when using
|
|
// the wasm_simd128.h intrinsics because v128_t is an integer vector.
|
|
foreach vec = [F32x4, F64x2] in {
|
|
defvar pmin = !cast<NI>("PMIN_"#vec);
|
|
defvar pmax = !cast<NI>("PMAX_"#vec);
|
|
def : Pat<(vec.int_vt (vselect
|
|
(setolt (vec.vt (bitconvert V128:$rhs)),
|
|
(vec.vt (bitconvert V128:$lhs))),
|
|
V128:$rhs, V128:$lhs)),
|
|
(pmin $lhs, $rhs)>;
|
|
def : Pat<(vec.int_vt (vselect
|
|
(setolt (vec.vt (bitconvert V128:$lhs)),
|
|
(vec.vt (bitconvert V128:$rhs))),
|
|
V128:$rhs, V128:$lhs)),
|
|
(pmax $lhs, $rhs)>;
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Conversions
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
multiclass SIMDConvert<Vec vec, Vec arg, SDPatternOperator op, string name,
|
|
bits<32> simdop> {
|
|
defm op#_#vec :
|
|
SIMD_I<(outs V128:$dst), (ins V128:$vec), (outs), (ins),
|
|
[(set (vec.vt V128:$dst), (vec.vt (op (arg.vt V128:$vec))))],
|
|
vec.prefix#"."#name#"\t$dst, $vec", vec.prefix#"."#name, simdop>;
|
|
}
|
|
|
|
// Floating point to integer with saturation: trunc_sat
|
|
defm "" : SIMDConvert<I32x4, F32x4, fp_to_sint, "trunc_sat_f32x4_s", 248>;
|
|
defm "" : SIMDConvert<I32x4, F32x4, fp_to_uint, "trunc_sat_f32x4_u", 249>;
|
|
|
|
// Support the saturating variety as well.
|
|
def trunc_s_sat32 : PatFrag<(ops node:$x), (fp_to_sint_sat $x, i32)>;
|
|
def trunc_u_sat32 : PatFrag<(ops node:$x), (fp_to_uint_sat $x, i32)>;
|
|
def : Pat<(v4i32 (trunc_s_sat32 (v4f32 V128:$src))), (fp_to_sint_I32x4 $src)>;
|
|
def : Pat<(v4i32 (trunc_u_sat32 (v4f32 V128:$src))), (fp_to_uint_I32x4 $src)>;
|
|
|
|
def trunc_sat_zero_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
|
|
def trunc_sat_zero_s :
|
|
SDNode<"WebAssemblyISD::TRUNC_SAT_ZERO_S", trunc_sat_zero_t>;
|
|
def trunc_sat_zero_u :
|
|
SDNode<"WebAssemblyISD::TRUNC_SAT_ZERO_U", trunc_sat_zero_t>;
|
|
defm "" : SIMDConvert<I32x4, F64x2, trunc_sat_zero_s, "trunc_sat_zero_f64x2_s",
|
|
0xfc>;
|
|
defm "" : SIMDConvert<I32x4, F64x2, trunc_sat_zero_u, "trunc_sat_zero_f64x2_u",
|
|
0xfd>;
|
|
|
|
// Integer to floating point: convert
|
|
def convert_low_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
|
|
def convert_low_s : SDNode<"WebAssemblyISD::CONVERT_LOW_S", convert_low_t>;
|
|
def convert_low_u : SDNode<"WebAssemblyISD::CONVERT_LOW_U", convert_low_t>;
|
|
defm "" : SIMDConvert<F32x4, I32x4, sint_to_fp, "convert_i32x4_s", 250>;
|
|
defm "" : SIMDConvert<F32x4, I32x4, uint_to_fp, "convert_i32x4_u", 251>;
|
|
defm "" : SIMDConvert<F64x2, I32x4, convert_low_s, "convert_low_i32x4_s", 0xfe>;
|
|
defm "" : SIMDConvert<F64x2, I32x4, convert_low_u, "convert_low_i32x4_u", 0xff>;
|
|
|
|
// Extending operations
|
|
// TODO: refactor this to be uniform for i64x2 if the numbering is not changed.
|
|
multiclass SIMDExtend<Vec vec, bits<32> baseInst> {
|
|
defm "" : SIMDConvert<vec, vec.split, extend_low_s,
|
|
"extend_low_"#vec.split.prefix#"_s", baseInst>;
|
|
defm "" : SIMDConvert<vec, vec.split, extend_high_s,
|
|
"extend_high_"#vec.split.prefix#"_s", !add(baseInst, 1)>;
|
|
defm "" : SIMDConvert<vec, vec.split, extend_low_u,
|
|
"extend_low_"#vec.split.prefix#"_u", !add(baseInst, 2)>;
|
|
defm "" : SIMDConvert<vec, vec.split, extend_high_u,
|
|
"extend_high_"#vec.split.prefix#"_u", !add(baseInst, 3)>;
|
|
}
|
|
|
|
defm "" : SIMDExtend<I16x8, 0x87>;
|
|
defm "" : SIMDExtend<I32x4, 0xa7>;
|
|
defm "" : SIMDExtend<I64x2, 0xc7>;
|
|
|
|
// Narrowing operations
|
|
multiclass SIMDNarrow<Vec vec, bits<32> baseInst> {
|
|
defvar name = vec.split.prefix#".narrow_"#vec.prefix;
|
|
defm NARROW_S_#vec.split :
|
|
SIMD_I<(outs V128:$dst), (ins V128:$low, V128:$high), (outs), (ins),
|
|
[(set (vec.split.vt V128:$dst), (vec.split.vt (int_wasm_narrow_signed
|
|
(vec.vt V128:$low), (vec.vt V128:$high))))],
|
|
name#"_s\t$dst, $low, $high", name#"_s", baseInst>;
|
|
defm NARROW_U_#vec.split :
|
|
SIMD_I<(outs V128:$dst), (ins V128:$low, V128:$high), (outs), (ins),
|
|
[(set (vec.split.vt V128:$dst), (vec.split.vt (int_wasm_narrow_unsigned
|
|
(vec.vt V128:$low), (vec.vt V128:$high))))],
|
|
name#"_u\t$dst, $low, $high", name#"_u", !add(baseInst, 1)>;
|
|
}
|
|
|
|
defm "" : SIMDNarrow<I16x8, 101>;
|
|
defm "" : SIMDNarrow<I32x4, 133>;
|
|
|
|
// Bitcasts are nops
|
|
// Matching bitcast t1 to t1 causes strange errors, so avoid repeating types
|
|
foreach t1 = AllVecs in
|
|
foreach t2 = AllVecs in
|
|
if !ne(t1, t2) then
|
|
def : Pat<(t1.vt (bitconvert (t2.vt V128:$v))), (t1.vt V128:$v)>;
|
|
|
|
// Extended pairwise addition
|
|
defm "" : SIMDConvert<I16x8, I8x16, int_wasm_extadd_pairwise_signed,
|
|
"extadd_pairwise_i8x16_s", 0x7c>;
|
|
defm "" : SIMDConvert<I16x8, I8x16, int_wasm_extadd_pairwise_unsigned,
|
|
"extadd_pairwise_i8x16_u", 0x7d>;
|
|
defm "" : SIMDConvert<I32x4, I16x8, int_wasm_extadd_pairwise_signed,
|
|
"extadd_pairwise_i16x8_s", 0x7e>;
|
|
defm "" : SIMDConvert<I32x4, I16x8, int_wasm_extadd_pairwise_unsigned,
|
|
"extadd_pairwise_i16x8_u", 0x7f>;
|
|
|
|
// f64x2 <-> f32x4 conversions
|
|
def demote_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
|
|
def demote_zero : SDNode<"WebAssemblyISD::DEMOTE_ZERO", demote_t>;
|
|
defm "" : SIMDConvert<F32x4, F64x2, demote_zero,
|
|
"demote_zero_f64x2", 0x5e>;
|
|
|
|
def promote_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
|
|
def promote_low : SDNode<"WebAssemblyISD::PROMOTE_LOW", promote_t>;
|
|
defm "" : SIMDConvert<F64x2, F32x4, promote_low, "promote_low_f32x4", 0x5f>;
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Saturating Rounding Q-Format Multiplication
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
defm Q15MULR_SAT_S :
|
|
SIMDBinary<I16x8, int_wasm_q15mulr_sat_signed, "q15mulr_sat_s", 0x82>;
|