
In arm-neon.h, we insert shufflevectors around each intrinsic when the target is big-endian, to compensate for the difference between the ABI-defined memory format of vectors (with the whole vector stored as one big-endian access) and LLVM's target-independent expectations (with the lowest-numbered lane in the lowest address). However, this code was written for the AArch64 ABI, and the AArch32 ABI differs slightly: it requires that vectors are stored in memory as-if stored with VSTM, which does a series of 64-bit accesses, instead of the AArch64 VSTR, which does a single 128-bit access. This means that for AArch32 we need to reverse the lanes in each 64-bit chunk of the vector, instead of in the whole vector. Since there are only a small number of different shufflevector orderings needed, I've split them out into macros, so that this doesn't need separate conditions in each intrinsic definition.
116 lines
4.9 KiB
C
116 lines
4.9 KiB
C
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
|
|
|
|
// REQUIRES: arm-registered-target
|
|
|
|
// RUN: %clang_cc1 -triple armv8a-arm-none-eabihf -target-cpu generic -emit-llvm -o - %s -disable-O0-optnone | \
|
|
// RUN: opt -S -passes=instcombine -o - | FileCheck %s --check-prefix=LE
|
|
// RUN: %clang_cc1 -triple armebv8a-arm-none-eabihf -target-cpu generic -emit-llvm -o - %s -disable-O0-optnone | \
|
|
// RUN: opt -S -passes=instcombine -o - | FileCheck %s --check-prefix=BE
|
|
|
|
#include <arm_neon.h>
|
|
|
|
// LE-LABEL: define dso_local i32 @int32x4_t_lane_0(
|
|
// LE-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
// LE-NEXT: [[ENTRY:.*:]]
|
|
// LE-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x i32> [[A]], i64 0
|
|
// LE-NEXT: ret i32 [[VGET_LANE]]
|
|
//
|
|
// BE-LABEL: define dso_local i32 @int32x4_t_lane_0(
|
|
// BE-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
// BE-NEXT: [[ENTRY:.*:]]
|
|
// BE-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x i32> [[A]], i64 1
|
|
// BE-NEXT: ret i32 [[VGET_LANE]]
|
|
//
|
|
int int32x4_t_lane_0(int32x4_t a) { return vgetq_lane_s32(a, 0); }
|
|
// LE-LABEL: define dso_local i32 @int32x4_t_lane_1(
|
|
// LE-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
|
|
// LE-NEXT: [[ENTRY:.*:]]
|
|
// LE-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x i32> [[A]], i64 1
|
|
// LE-NEXT: ret i32 [[VGET_LANE]]
|
|
//
|
|
// BE-LABEL: define dso_local i32 @int32x4_t_lane_1(
|
|
// BE-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
|
|
// BE-NEXT: [[ENTRY:.*:]]
|
|
// BE-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x i32> [[A]], i64 0
|
|
// BE-NEXT: ret i32 [[VGET_LANE]]
|
|
//
|
|
int int32x4_t_lane_1(int32x4_t a) { return vgetq_lane_s32(a, 1); }
|
|
// LE-LABEL: define dso_local i32 @int32x4_t_lane_2(
|
|
// LE-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
|
|
// LE-NEXT: [[ENTRY:.*:]]
|
|
// LE-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x i32> [[A]], i64 2
|
|
// LE-NEXT: ret i32 [[VGET_LANE]]
|
|
//
|
|
// BE-LABEL: define dso_local i32 @int32x4_t_lane_2(
|
|
// BE-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
|
|
// BE-NEXT: [[ENTRY:.*:]]
|
|
// BE-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x i32> [[A]], i64 3
|
|
// BE-NEXT: ret i32 [[VGET_LANE]]
|
|
//
|
|
int int32x4_t_lane_2(int32x4_t a) { return vgetq_lane_s32(a, 2); }
|
|
// LE-LABEL: define dso_local i32 @int32x4_t_lane_3(
|
|
// LE-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
|
|
// LE-NEXT: [[ENTRY:.*:]]
|
|
// LE-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x i32> [[A]], i64 3
|
|
// LE-NEXT: ret i32 [[VGET_LANE]]
|
|
//
|
|
// BE-LABEL: define dso_local i32 @int32x4_t_lane_3(
|
|
// BE-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
|
|
// BE-NEXT: [[ENTRY:.*:]]
|
|
// BE-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x i32> [[A]], i64 2
|
|
// BE-NEXT: ret i32 [[VGET_LANE]]
|
|
//
|
|
int int32x4_t_lane_3(int32x4_t a) { return vgetq_lane_s32(a, 3); }
|
|
// LE-LABEL: define dso_local i32 @int32x2_t_lane_0(
|
|
// LE-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
|
|
// LE-NEXT: [[ENTRY:.*:]]
|
|
// LE-NEXT: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[A]], i64 0
|
|
// LE-NEXT: ret i32 [[VGET_LANE]]
|
|
//
|
|
// BE-LABEL: define dso_local i32 @int32x2_t_lane_0(
|
|
// BE-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
|
|
// BE-NEXT: [[ENTRY:.*:]]
|
|
// BE-NEXT: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[A]], i64 1
|
|
// BE-NEXT: ret i32 [[VGET_LANE]]
|
|
//
|
|
int int32x2_t_lane_0(int32x2_t a) { return vget_lane_s32(a, 0); }
|
|
// LE-LABEL: define dso_local i32 @int32x2_t_lane_1(
|
|
// LE-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
|
|
// LE-NEXT: [[ENTRY:.*:]]
|
|
// LE-NEXT: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[A]], i64 1
|
|
// LE-NEXT: ret i32 [[VGET_LANE]]
|
|
//
|
|
// BE-LABEL: define dso_local i32 @int32x2_t_lane_1(
|
|
// BE-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
|
|
// BE-NEXT: [[ENTRY:.*:]]
|
|
// BE-NEXT: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[A]], i64 0
|
|
// BE-NEXT: ret i32 [[VGET_LANE]]
|
|
//
|
|
int int32x2_t_lane_1(int32x2_t a) { return vget_lane_s32(a, 1); }
|
|
// LE-LABEL: define dso_local i64 @int64x2_t_lane_0(
|
|
// LE-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
|
|
// LE-NEXT: [[ENTRY:.*:]]
|
|
// LE-NEXT: [[VGET_LANE:%.*]] = extractelement <2 x i64> [[A]], i64 0
|
|
// LE-NEXT: ret i64 [[VGET_LANE]]
|
|
//
|
|
// BE-LABEL: define dso_local i64 @int64x2_t_lane_0(
|
|
// BE-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
|
|
// BE-NEXT: [[ENTRY:.*:]]
|
|
// BE-NEXT: [[VGET_LANE:%.*]] = extractelement <2 x i64> [[A]], i64 0
|
|
// BE-NEXT: ret i64 [[VGET_LANE]]
|
|
//
|
|
int64_t int64x2_t_lane_0(int64x2_t a) { return vgetq_lane_s64(a, 0); }
|
|
// LE-LABEL: define dso_local i64 @int64x2_t_lane_1(
|
|
// LE-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
|
|
// LE-NEXT: [[ENTRY:.*:]]
|
|
// LE-NEXT: [[VGET_LANE:%.*]] = extractelement <2 x i64> [[A]], i64 1
|
|
// LE-NEXT: ret i64 [[VGET_LANE]]
|
|
//
|
|
// BE-LABEL: define dso_local i64 @int64x2_t_lane_1(
|
|
// BE-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
|
|
// BE-NEXT: [[ENTRY:.*:]]
|
|
// BE-NEXT: [[VGET_LANE:%.*]] = extractelement <2 x i64> [[A]], i64 1
|
|
// BE-NEXT: ret i64 [[VGET_LANE]]
|
|
//
|
|
int64_t int64x2_t_lane_1(int64x2_t a) { return vgetq_lane_s64(a, 1); }
|