[CIR][AArch64] Added vector intrinsics for shift left (#187516)

Added vector intrinsics for 
vshlq_n_s8
vshlq_n_s16
vshlq_n_s32
vshlq_n_s64
vshlq_n_u8
vshlq_n_u16
vshlq_n_u32
vshlq_n_u64

vshl_n_s8
vshl_n_s16
vshl_n_s32
vshl_n_s64
vshl_n_u8
vshl_n_u16
vshl_n_u32
vshl_n_u64

these cover all the vector intrinsics for constant shift 

the method followed 

1) the vectors for quad words are of the form `64x2`, `32x4`, `16x8`,
`8x16` and the shift is a constant value but for shift left we need both
of them to be vectors so we take the constant shift and convert it into
a vector of respective form, for `64x2` we convert the constant to
`64x2`, I have learnt that this process is also called **splat**
2) After splat we have that the lhs and rhs are of the same size hence
the shift left can be applied
3) There is one issue though, the ops[0] is not of the right size, for
quad words it falls back to the default int8*16 in the function, so I am
converting it to the required size using bit casting, `8x16` = `64x2` so
we can bitcast and get the vector array in the right form.


Wrote the test cases for all the intrinsics listed above

#185382
This commit is contained in:
albertbolt1 2026-04-06 21:30:38 +05:30 committed by GitHub
parent 34a16392fa
commit 8d7823ea8f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 262 additions and 230 deletions

View File

@ -182,6 +182,31 @@ static mlir::Value emitNeonSplat(CIRGenBuilderTy &builder, mlir::Location loc,
return builder.createVecShuffle(loc, v, shuffleMask);
}
/// Build a constant shift amount vector of `vecTy` to shift a vector
/// Here `shitfVal` is a constant integer that will be splated into a
/// a const vector of `vecTy` which is the return of this function
static mlir::Value emitNeonShiftVector(CIRGenBuilderTy &builder,
mlir::Value shiftVal,
cir::VectorType vecTy,
mlir::Location loc) {
mlir::Type eltTy = vecTy.getElementType();
if (shiftVal.getType() != eltTy) {
shiftVal = builder.createIntCast(shiftVal, eltTy);
}
return cir::VecSplatOp::create(builder, loc, vecTy, shiftVal);
}
static mlir::Value emitCommonNeonShift(CIRGenBuilderTy &builder,
mlir::Location loc,
cir::VectorType resTy,
mlir::Value shifTgt,
mlir::Value shiftAmt, bool shiftLeft) {
shiftAmt = emitNeonShiftVector(builder, shiftAmt, resTy, loc);
return cir::ShiftOp::create(builder, loc, resTy,
builder.createBitcast(shifTgt, resTy), shiftAmt,
shiftLeft);
}
static mlir::Value emitCommonNeonBuiltinExpr(
CIRGenFunction &cgf, unsigned builtinID, unsigned llvmIntrinsic,
unsigned altLLVMIntrinsic, const char *nameHint, unsigned modifier,
@ -413,8 +438,14 @@ static mlir::Value emitCommonNeonBuiltinExpr(
case NEON::BI__builtin_neon_vsha512h2q_u64:
case NEON::BI__builtin_neon_vsha512su0q_u64:
case NEON::BI__builtin_neon_vsha512su1q_u64:
cgf.cgm.errorNYI(expr->getSourceRange(),
std::string("unimplemented AArch64 builtin call: ") +
ctx.BuiltinInfo.getName(builtinID));
return mlir::Value{};
case NEON::BI__builtin_neon_vshl_n_v:
case NEON::BI__builtin_neon_vshlq_n_v:
return emitCommonNeonShift(cgf.getBuilder(), loc, vTy, ops[0], ops[1],
/*shiftLeft=*/true);
case NEON::BI__builtin_neon_vshll_n_v:
case NEON::BI__builtin_neon_vshrn_n_v:
case NEON::BI__builtin_neon_vshr_n_v:
@ -1961,7 +1992,6 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr,
// defer to common code if it's been added to our special map.
builtin = findARMVectorIntrinsicInMap(AArch64SIMDIntrinsicMap, builtinID,
aarch64SIMDIntrinsicsProvenSorted);
if (builtin)
return emitCommonNeonBuiltinExpr(
*this, builtin->BuiltinID, builtin->LLVMIntrinsic,

View File

@ -6342,166 +6342,6 @@ float64x2_t test_vmulxq_f64(float64x2_t a, float64x2_t b) {
return vmulxq_f64(a, b);
}
// CHECK-LABEL: define dso_local <8 x i8> @test_vshl_n_s8(
// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[VSHL_N:%.*]] = shl <8 x i8> [[A]], splat (i8 3)
// CHECK-NEXT: ret <8 x i8> [[VSHL_N]]
//
int8x8_t test_vshl_n_s8(int8x8_t a) {
return vshl_n_s8(a, 3);
}
// CHECK-LABEL: define dso_local <4 x i16> @test_vshl_n_s16(
// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
// CHECK-NEXT: [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], splat (i16 3)
// CHECK-NEXT: ret <4 x i16> [[VSHL_N]]
//
int16x4_t test_vshl_n_s16(int16x4_t a) {
return vshl_n_s16(a, 3);
}
// CHECK-LABEL: define dso_local <2 x i32> @test_vshl_n_s32(
// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
// CHECK-NEXT: [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], splat (i32 3)
// CHECK-NEXT: ret <2 x i32> [[VSHL_N]]
//
int32x2_t test_vshl_n_s32(int32x2_t a) {
return vshl_n_s32(a, 3);
}
// CHECK-LABEL: define dso_local <16 x i8> @test_vshlq_n_s8(
// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[VSHL_N:%.*]] = shl <16 x i8> [[A]], splat (i8 3)
// CHECK-NEXT: ret <16 x i8> [[VSHL_N]]
//
int8x16_t test_vshlq_n_s8(int8x16_t a) {
return vshlq_n_s8(a, 3);
}
// CHECK-LABEL: define dso_local <8 x i16> @test_vshlq_n_s16(
// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
// CHECK-NEXT: [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], splat (i16 3)
// CHECK-NEXT: ret <8 x i16> [[VSHL_N]]
//
int16x8_t test_vshlq_n_s16(int16x8_t a) {
return vshlq_n_s16(a, 3);
}
// CHECK-LABEL: define dso_local <4 x i32> @test_vshlq_n_s32(
// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
// CHECK-NEXT: [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], splat (i32 3)
// CHECK-NEXT: ret <4 x i32> [[VSHL_N]]
//
int32x4_t test_vshlq_n_s32(int32x4_t a) {
return vshlq_n_s32(a, 3);
}
// CHECK-LABEL: define dso_local <2 x i64> @test_vshlq_n_s64(
// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
// CHECK-NEXT: [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], splat (i64 3)
// CHECK-NEXT: ret <2 x i64> [[VSHL_N]]
//
int64x2_t test_vshlq_n_s64(int64x2_t a) {
return vshlq_n_s64(a, 3);
}
// CHECK-LABEL: define dso_local <8 x i8> @test_vshl_n_u8(
// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[VSHL_N:%.*]] = shl <8 x i8> [[A]], splat (i8 3)
// CHECK-NEXT: ret <8 x i8> [[VSHL_N]]
//
uint8x8_t test_vshl_n_u8(uint8x8_t a) {
return vshl_n_u8(a, 3);
}
// CHECK-LABEL: define dso_local <4 x i16> @test_vshl_n_u16(
// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
// CHECK-NEXT: [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], splat (i16 3)
// CHECK-NEXT: ret <4 x i16> [[VSHL_N]]
//
uint16x4_t test_vshl_n_u16(uint16x4_t a) {
return vshl_n_u16(a, 3);
}
// CHECK-LABEL: define dso_local <2 x i32> @test_vshl_n_u32(
// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
// CHECK-NEXT: [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], splat (i32 3)
// CHECK-NEXT: ret <2 x i32> [[VSHL_N]]
//
uint32x2_t test_vshl_n_u32(uint32x2_t a) {
return vshl_n_u32(a, 3);
}
// CHECK-LABEL: define dso_local <16 x i8> @test_vshlq_n_u8(
// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[VSHL_N:%.*]] = shl <16 x i8> [[A]], splat (i8 3)
// CHECK-NEXT: ret <16 x i8> [[VSHL_N]]
//
uint8x16_t test_vshlq_n_u8(uint8x16_t a) {
return vshlq_n_u8(a, 3);
}
// CHECK-LABEL: define dso_local <8 x i16> @test_vshlq_n_u16(
// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
// CHECK-NEXT: [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], splat (i16 3)
// CHECK-NEXT: ret <8 x i16> [[VSHL_N]]
//
uint16x8_t test_vshlq_n_u16(uint16x8_t a) {
return vshlq_n_u16(a, 3);
}
// CHECK-LABEL: define dso_local <4 x i32> @test_vshlq_n_u32(
// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
// CHECK-NEXT: [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], splat (i32 3)
// CHECK-NEXT: ret <4 x i32> [[VSHL_N]]
//
uint32x4_t test_vshlq_n_u32(uint32x4_t a) {
return vshlq_n_u32(a, 3);
}
// CHECK-LABEL: define dso_local <2 x i64> @test_vshlq_n_u64(
// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
// CHECK-NEXT: [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], splat (i64 3)
// CHECK-NEXT: ret <2 x i64> [[VSHL_N]]
//
uint64x2_t test_vshlq_n_u64(uint64x2_t a) {
return vshlq_n_u64(a, 3);
}
// CHECK-LABEL: define dso_local <8 x i8> @test_vshr_n_s8(
// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@ -17413,30 +17253,6 @@ uint64x1_t test_vrsra_n_u64(uint64x1_t a, uint64x1_t b) {
return vrsra_n_u64(a, b, 1);
}
// CHECK-LABEL: define dso_local <1 x i64> @test_vshl_n_s64(
// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
// CHECK-NEXT: [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], splat (i64 1)
// CHECK-NEXT: ret <1 x i64> [[VSHL_N]]
//
int64x1_t test_vshl_n_s64(int64x1_t a) {
return vshl_n_s64(a, 1);
}
// CHECK-LABEL: define dso_local <1 x i64> @test_vshl_n_u64(
// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
// CHECK-NEXT: [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], splat (i64 1)
// CHECK-NEXT: ret <1 x i64> [[VSHL_N]]
//
uint64x1_t test_vshl_n_u64(uint64x1_t a) {
return vshl_n_u64(a, 1);
}
// CHECK-LABEL: define dso_local i8 @test_vqshlb_n_s8(
// CHECK-SAME: i8 noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]

View File

@ -936,51 +936,6 @@ uint32x4_t test_vabaq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) {
// LLVM-NEXT: ret <4 x i32> [[ADD_I]]
return vabaq_u32(v1, v2, v3);
}
//===------------------------------------------------------===//
// 2.1.3.1.1. Vector Shift Left
//===------------------------------------------------------===//
// ALL-LABEL: test_vshld_n_s64
int64_t test_vshld_n_s64(int64_t a) {
// CIR: cir.shift(left, {{.*}})
// LLVM-SAME: i64 {{.*}} [[A:%.*]])
// LLVM: [[SHL_N:%.*]] = shl i64 [[A]], 1
// LLVM: ret i64 [[SHL_N]]
return (int64_t)vshld_n_s64(a, 1);
}
// ALL-LABEL: test_vshld_n_u64
int64_t test_vshld_n_u64(int64_t a) {
// CIR: cir.shift(left, {{.*}})
// LLVM-SAME: i64 {{.*}} [[A:%.*]])
// LLVM: [[SHL_N:%.*]] = shl i64 [[A]], 1
// LLVM: ret i64 [[SHL_N]]
return (int64_t)vshld_n_u64(a, 1);
}
// LLVM-LABEL: test_vshld_s64
// CIR-LABEL: vshld_s64
int64_t test_vshld_s64(int64_t a,int64_t b) {
// CIR: cir.call_llvm_intrinsic "aarch64.neon.sshl" %{{.*}}, %{{.*}} : (!s64i, !s64i) -> !s64i
// LLVM-SAME: i64 {{.*}} [[A:%.*]], i64 {{.*}} [[B:%.*]]) #[[ATTR0:[0-9]+]] {
// LLVM: [[VSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sshl.i64(i64 [[A]], i64 [[B]])
// LLVM: ret i64 [[VSHLD_S64_I]]
return (int64_t)vshld_s64(a, b);
}
// LLVM-LABEL: test_vshld_u64
// CIR-LABEL: vshld_u64
int64_t test_vshld_u64(int64_t a,int64_t b) {
// CIR: cir.call_llvm_intrinsic "aarch64.neon.ushl" %{{.*}}, %{{.*}} : (!u64i, !s64i) -> !u64i
// LLVM-SAME: i64 {{.*}} [[A:%.*]], i64 {{.*}} [[B:%.*]]) #[[ATTR0:[0-9]+]] {
// LLVM: [[VSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.ushl.i64(i64 [[A]], i64 [[B]])
// LLVM: ret i64 [[VSHLD_S64_I]]
return (int64_t)vshld_u64(a, b);
}
//===----------------------------------------------------------------------===//
// 2.1.1.7. Maximum
@ -1518,3 +1473,234 @@ poly16x8_t test_vmull_high_p8(poly8x16_t a, poly8x16_t b) {
// LLVM-NEXT: ret <8 x i16> [[VMULL_I_I]]
return vmull_high_p8(a, b);
}
//===------------------------------------------------------===//
// 2.1.3.1.1. Vector Shift Left
//===------------------------------------------------------===//
// ALL-LABEL: test_vshld_n_s64
int64_t test_vshld_n_s64(int64_t a) {
// CIR: cir.shift(left, {{.*}})
// LLVM-SAME: i64 {{.*}} [[A:%.*]])
// LLVM: [[SHL_N:%.*]] = shl i64 [[A]], 1
// LLVM: ret i64 [[SHL_N]]
return (int64_t)vshld_n_s64(a, 1);
}
// ALL-LABEL: test_vshld_n_u64
int64_t test_vshld_n_u64(int64_t a) {
// CIR: cir.shift(left, {{.*}})
// LLVM-SAME: i64 {{.*}} [[A:%.*]])
// LLVM: [[SHL_N:%.*]] = shl i64 [[A]], 1
// LLVM: ret i64 [[SHL_N]]
return (int64_t)vshld_n_u64(a, 1);
}
// LLVM-LABEL: test_vshld_s64
// CIR-LABEL: vshld_s64
int64_t test_vshld_s64(int64_t a,int64_t b) {
// CIR: cir.call_llvm_intrinsic "aarch64.neon.sshl" %{{.*}}, %{{.*}} : (!s64i, !s64i) -> !s64i
// LLVM-SAME: i64 {{.*}} [[A:%.*]], i64 {{.*}} [[B:%.*]]) #[[ATTR0:[0-9]+]] {
// LLVM: [[VSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sshl.i64(i64 [[A]], i64 [[B]])
// LLVM: ret i64 [[VSHLD_S64_I]]
return (int64_t)vshld_s64(a, b);
}
// LLVM-LABEL: test_vshld_u64
// CIR-LABEL: vshld_u64
int64_t test_vshld_u64(int64_t a,int64_t b) {
// CIR: cir.call_llvm_intrinsic "aarch64.neon.ushl" %{{.*}}, %{{.*}} : (!u64i, !s64i) -> !u64i
// LLVM-SAME: i64 {{.*}} [[A:%.*]], i64 {{.*}} [[B:%.*]]) #[[ATTR0:[0-9]+]] {
// LLVM: [[VSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.ushl.i64(i64 [[A]], i64 [[B]])
// LLVM: ret i64 [[VSHLD_S64_I]]
return (int64_t)vshld_u64(a, b);
}
// ALL-LABEL: test_vshlq_n_s8
int8x16_t test_vshlq_n_s8(int8x16_t a) {
// CIR: [[RES:%.*]] = cir.shift(left, %{{.*}} : !cir.vector<16 x !s8i>, %{{.*}} : !cir.vector<16 x !s8i>) -> !cir.vector<16 x !s8i>
// LLVM-SAME: <16 x i8> {{.*}} [[A:%.*]]) {{.*}} {
// LLVM: [[VSHL_N:%.*]] = shl <16 x i8> [[A]], splat (i8 3)
// LLVM: ret <16 x i8> [[VSHL_N]]
//
return vshlq_n_s8(a, 3);
}
// ALL-LABEL: test_vshlq_n_s16
int16x8_t test_vshlq_n_s16(int16x8_t a) {
// CIR: [[RES:%.*]] = cir.shift(left, %{{.*}} : !cir.vector<8 x !s16i>, %{{.*}} : !cir.vector<8 x !s16i>) -> !cir.vector<8 x !s16i>
// LLVM-SAME: <8 x i16> {{.*}} [[A:%.*]]) {{.*}} {
// LLVM: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
// LLVM-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
// LLVM: [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], splat (i16 3)
// LLVM: ret <8 x i16> [[VSHL_N]]
return vshlq_n_s16(a, 3);
}
// ALL-LABEL: test_vshlq_n_s32
int32x4_t test_vshlq_n_s32(int32x4_t a) {
// CIR: [[RES:%.*]] = cir.shift(left, %{{.*}} : !cir.vector<4 x !s32i>, %{{.*}} : !cir.vector<4 x !s32i>) -> !cir.vector<4 x !s32i>
// LLVM-SAME: <4 x i32> {{.*}} [[A:%.*]]) {{.*}} {
// LLVM: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
// LLVM: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
// LLVM: [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], splat (i32 3)
// LLVM: ret <4 x i32> [[VSHL_N]]
return vshlq_n_s32(a, 3);
}
// ALL-LABEL: test_vshlq_n_s64
int64x2_t test_vshlq_n_s64(int64x2_t a) {
// CIR: [[RES:%.*]] = cir.shift(left, %{{.*}} : !cir.vector<2 x !s64i>, %{{.*}} : !cir.vector<2 x !s64i>) -> !cir.vector<2 x !s64i>
// LLVM-SAME: <2 x i64> {{.*}} [[A:%.*]]) {{.*}} {
// LLVM: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
// LLVM: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
// LLVM: [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], splat (i64 3)
// LLVM: ret <2 x i64> [[VSHL_N]]
return vshlq_n_s64(a, 3);
}
// ALL-LABEL: test_vshlq_n_u8
uint8x16_t test_vshlq_n_u8(uint8x16_t a) {
// CIR: [[RES:%.*]] = cir.shift(left, %{{.*}} : !cir.vector<16 x !u8i>, %{{.*}} : !cir.vector<16 x !u8i>) -> !cir.vector<16 x !u8i>
// LLVM-SAME: <16 x i8> {{.*}} [[A:%.*]]) {{.*}} {
// LLVM: [[VSHL_N:%.*]] = shl <16 x i8> [[A]], splat (i8 3)
// LLVM: ret <16 x i8> [[VSHL_N]]
return vshlq_n_u8(a, 3);
}
// ALL-LABEL: test_vshlq_n_u16
uint16x8_t test_vshlq_n_u16(uint16x8_t a) {
// CIR: [[RES:%.*]] = cir.shift(left, %{{.*}} : !cir.vector<8 x !u16i>, %{{.*}} : !cir.vector<8 x !u16i>) -> !cir.vector<8 x !u16i>
// LLVM-SAME: <8 x i16> {{.*}} [[A:%.*]]) {{.*}} {
// LLVM: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
// LLVM: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
// LLVM: [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], splat (i16 3)
// LLVM: ret <8 x i16> [[VSHL_N]]
return vshlq_n_u16(a, 3);
}
// ALL-LABEL: test_vshlq_n_u32
uint32x4_t test_vshlq_n_u32(uint32x4_t a) {
// CIR: [[RES:%.*]] = cir.shift(left, %{{.*}} : !cir.vector<4 x !u32i>, %{{.*}} : !cir.vector<4 x !u32i>) -> !cir.vector<4 x !u32i>
// LLVM-SAME: <4 x i32> {{.*}} [[A:%.*]]) {{.*}} {
// LLVM: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
// LLVM: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
// LLVM: [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], splat (i32 3)
// LLVM: ret <4 x i32> [[VSHL_N]]
return vshlq_n_u32(a, 3);
}
// ALL-LABEL: test_vshlq_n_u64
uint64x2_t test_vshlq_n_u64(uint64x2_t a) {
// CIR: [[RES:%.*]] = cir.shift(left, %{{.*}} : !cir.vector<2 x !u64i>, %{{.*}} : !cir.vector<2 x !u64i>) -> !cir.vector<2 x !u64i>
// LLVM-SAME: <2 x i64> {{.*}} [[A:%.*]]) {{.*}} {
// LLVM: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
// LLVM: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
// LLVM: [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], splat (i64 3)
// LLVM: ret <2 x i64> [[VSHL_N]]
return vshlq_n_u64(a, 3);
}
// ALL-LABEL: test_vshl_n_s8
int8x8_t test_vshl_n_s8(int8x8_t a) {
// CIR: [[RES:%.*]] = cir.shift(left, %{{.*}} : !cir.vector<8 x !s8i>, %{{.*}} : !cir.vector<8 x !s8i>) -> !cir.vector<8 x !s8i>
// LLVM-SAME: <8 x i8> {{.*}} [[A:%.*]]) {{.*}} {
// LLVM: [[VSHL_N:%.*]] = shl <8 x i8> [[A]], splat (i8 3)
// LLVM: ret <8 x i8> [[VSHL_N]]
return vshl_n_s8(a, 3);
}
// ALL-LABEL: test_vshl_n_s16
int16x4_t test_vshl_n_s16(int16x4_t a) {
// CIR: [[RES:%.*]] = cir.shift(left, %{{.*}} : !cir.vector<4 x !s16i>, %{{.*}} : !cir.vector<4 x !s16i>) -> !cir.vector<4 x !s16i>
// LLVM-SAME: <4 x i16> {{.*}} [[A:%.*]]) {{.*}} {
// LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
// LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
// LLVM: [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], splat (i16 3)
// LLVM: ret <4 x i16> [[VSHL_N]]
return vshl_n_s16(a, 3);
}
// ALL-LABEL: test_vshl_n_s32
int32x2_t test_vshl_n_s32(int32x2_t a) {
// CIR: [[RES:%.*]] = cir.shift(left, %{{.*}} : !cir.vector<2 x !s32i>, %{{.*}} : !cir.vector<2 x !s32i>) -> !cir.vector<2 x !s32i>
// LLVM-SAME: <2 x i32> {{.*}} [[A:%.*]]) {{.*}} {
// LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
// LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
// LLVM: [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], splat (i32 3)
// LLVM: ret <2 x i32> [[VSHL_N]]
return vshl_n_s32(a, 3);
}
// ALL-LABEL: test_vshl_n_s64
int64x1_t test_vshl_n_s64(int64x1_t a) {
// CIR: [[RES:%.*]] = cir.shift(left, %{{.*}} : !cir.vector<1 x !s64i>, %{{.*}} : !cir.vector<1 x !s64i>) -> !cir.vector<1 x !s64i>
// LLVM-SAME: <1 x i64> {{.*}} [[A:%.*]]) {{.*}} {
// LLVM: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
// LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
// LLVM: [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], splat (i64 1)
// LLVM: ret <1 x i64> [[VSHL_N]]
return vshl_n_s64(a, 1);
}
// ALL-LABEL: test_vshl_n_u8
uint8x8_t test_vshl_n_u8(uint8x8_t a) {
// CIR: [[RES:%.*]] = cir.shift(left, %{{.*}} : !cir.vector<8 x !u8i>, %{{.*}} : !cir.vector<8 x !u8i>) -> !cir.vector<8 x !u8i>
// LLVM-SAME: <8 x i8> {{.*}} [[A:%.*]]) {{.*}} {
// LLVM: [[VSHL_N:%.*]] = shl <8 x i8> [[A]], splat (i8 3)
// LLVM: ret <8 x i8> [[VSHL_N]]
return vshl_n_u8(a, 3);
}
// ALL-LABEL: test_vshl_n_u16
uint16x4_t test_vshl_n_u16(uint16x4_t a) {
// CIR: [[RES:%.*]] = cir.shift(left, %{{.*}} : !cir.vector<4 x !u16i>, %{{.*}} : !cir.vector<4 x !u16i>) -> !cir.vector<4 x !u16i>
// LLVM-SAME: <4 x i16> {{.*}} [[A:%.*]]) {{.*}} {
// LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
// LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
// LLVM: [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], splat (i16 3)
// LLVM: ret <4 x i16> [[VSHL_N]]
return vshl_n_u16(a, 3);
}
// ALL-LABEL: test_vshl_n_u32
uint32x2_t test_vshl_n_u32(uint32x2_t a) {
// CIR: [[RES:%.*]] = cir.shift(left, %{{.*}} : !cir.vector<2 x !u32i>, %{{.*}} : !cir.vector<2 x !u32i>) -> !cir.vector<2 x !u32i>
// LLVM-SAME: <2 x i32> {{.*}} [[A:%.*]]) {{.*}} {
// LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
// LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
// LLVM: [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], splat (i32 3)
// LLVM: ret <2 x i32> [[VSHL_N]]
return vshl_n_u32(a, 3);
}
// ALL-LABEL: test_vshl_n_u64
uint64x1_t test_vshl_n_u64(uint64x1_t a) {
// CIR: [[RES:%.*]] = cir.shift(left, %{{.*}} : !cir.vector<1 x !u64i>, %{{.*}} : !cir.vector<1 x !u64i>) -> !cir.vector<1 x !u64i>
// LLVM-SAME: <1 x i64> {{.*}} [[A:%.*]]) {{.*}} {
// LLVM: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
// LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
// LLVM: [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], splat (i64 1)
// LLVM: ret <1 x i64> [[VSHL_N]]
return vshl_n_u64(a, 1);
}