llvm-project/clang/test/CodeGen/arm64-vrnd-constrained.c
Lukacma 6c3adaafe3
[AARCH64][Neon] switch to using bitcasts in arm_neon.h where appropriate (#127043)
Currently arm_neon.h emits C-style casts to do vector type casts. This
relies on implicit conversion between vector types to be enabled, which
is currently deprecated behaviour and soon will disappear. To ensure
NEON code will keep working afterwards, this patch changes all this
vector type casts into bitcasts.


Co-authored-by: Momchil Velikov <momchil.velikov@arm.com>
2025-04-01 09:45:16 +01:00

211 lines
14 KiB
C

// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// RUN: %clang_cc1 -triple arm64-apple-ios7 -target-feature +neon -ffreestanding -flax-vector-conversions=none -emit-llvm -o - %s \
// RUN: | FileCheck --check-prefix=UNCONSTRAINED %s
// RUN: %clang_cc1 -triple arm64-apple-ios7 -target-feature +neon -ffreestanding -flax-vector-conversions=none -ffp-exception-behavior=strict -emit-llvm -o - %s \
// RUN: | FileCheck --check-prefix=CONSTRAINED %s
// REQUIRES: aarch64-registered-target
#include <arm_neon.h>
// UNCONSTRAINED-LABEL: define <2 x double> @rnd5(
// UNCONSTRAINED-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
// UNCONSTRAINED-NEXT: [[__P0_ADDR_I:%.*]] = alloca <2 x double>, align 16
// UNCONSTRAINED-NEXT: [[__RET_I:%.*]] = alloca <2 x double>, align 16
// UNCONSTRAINED-NEXT: [[REF_TMP_I:%.*]] = alloca <16 x i8>, align 16
// UNCONSTRAINED-NEXT: [[A_ADDR:%.*]] = alloca <2 x double>, align 16
// UNCONSTRAINED-NEXT: store <2 x double> [[A]], ptr [[A_ADDR]], align 16
// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16
// UNCONSTRAINED-NEXT: store <2 x double> [[TMP0]], ptr [[__P0_ADDR_I]], align 16
// UNCONSTRAINED-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_ADDR_I]], align 16
// UNCONSTRAINED-NEXT: [[VRNDZ_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
// UNCONSTRAINED-NEXT: [[VRNDZ1_I:%.*]] = call <2 x double> @llvm.trunc.v2f64(<2 x double> [[VRNDZ_I]])
// UNCONSTRAINED-NEXT: store <2 x double> [[VRNDZ1_I]], ptr [[REF_TMP_I]], align 16
// UNCONSTRAINED-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[REF_TMP_I]], align 16
// UNCONSTRAINED-NEXT: store <2 x double> [[TMP2]], ptr [[__RET_I]], align 16
// UNCONSTRAINED-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr [[__RET_I]], align 16
// UNCONSTRAINED-NEXT: ret <2 x double> [[TMP3]]
//
// CONSTRAINED-LABEL: define <2 x double> @rnd5(
// CONSTRAINED-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
// CONSTRAINED-NEXT: [[ENTRY:.*:]]
// CONSTRAINED-NEXT: [[__P0_ADDR_I:%.*]] = alloca <2 x double>, align 16
// CONSTRAINED-NEXT: [[__RET_I:%.*]] = alloca <2 x double>, align 16
// CONSTRAINED-NEXT: [[REF_TMP_I:%.*]] = alloca <16 x i8>, align 16
// CONSTRAINED-NEXT: [[A_ADDR:%.*]] = alloca <2 x double>, align 16
// CONSTRAINED-NEXT: store <2 x double> [[A]], ptr [[A_ADDR]], align 16
// CONSTRAINED-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16
// CONSTRAINED-NEXT: store <2 x double> [[TMP0]], ptr [[__P0_ADDR_I]], align 16
// CONSTRAINED-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_ADDR_I]], align 16
// CONSTRAINED-NEXT: [[VRNDZ_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
// CONSTRAINED-NEXT: [[VRNDZ1_I:%.*]] = call <2 x double> @llvm.experimental.constrained.trunc.v2f64(<2 x double> [[VRNDZ_I]], metadata !"fpexcept.strict") #[[ATTR2:[0-9]+]]
// CONSTRAINED-NEXT: store <2 x double> [[VRNDZ1_I]], ptr [[REF_TMP_I]], align 16
// CONSTRAINED-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[REF_TMP_I]], align 16
// CONSTRAINED-NEXT: store <2 x double> [[TMP2]], ptr [[__RET_I]], align 16
// CONSTRAINED-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr [[__RET_I]], align 16
// CONSTRAINED-NEXT: ret <2 x double> [[TMP3]]
//
float64x2_t rnd5(float64x2_t a) { return vrndq_f64(a); }
// UNCONSTRAINED-LABEL: define <2 x double> @rnd13(
// UNCONSTRAINED-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
// UNCONSTRAINED-NEXT: [[__P0_ADDR_I:%.*]] = alloca <2 x double>, align 16
// UNCONSTRAINED-NEXT: [[__RET_I:%.*]] = alloca <2 x double>, align 16
// UNCONSTRAINED-NEXT: [[REF_TMP_I:%.*]] = alloca <16 x i8>, align 16
// UNCONSTRAINED-NEXT: [[A_ADDR:%.*]] = alloca <2 x double>, align 16
// UNCONSTRAINED-NEXT: store <2 x double> [[A]], ptr [[A_ADDR]], align 16
// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16
// UNCONSTRAINED-NEXT: store <2 x double> [[TMP0]], ptr [[__P0_ADDR_I]], align 16
// UNCONSTRAINED-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_ADDR_I]], align 16
// UNCONSTRAINED-NEXT: [[VRNDM_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
// UNCONSTRAINED-NEXT: [[VRNDM1_I:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> [[VRNDM_I]])
// UNCONSTRAINED-NEXT: store <2 x double> [[VRNDM1_I]], ptr [[REF_TMP_I]], align 16
// UNCONSTRAINED-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[REF_TMP_I]], align 16
// UNCONSTRAINED-NEXT: store <2 x double> [[TMP2]], ptr [[__RET_I]], align 16
// UNCONSTRAINED-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr [[__RET_I]], align 16
// UNCONSTRAINED-NEXT: ret <2 x double> [[TMP3]]
//
// CONSTRAINED-LABEL: define <2 x double> @rnd13(
// CONSTRAINED-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
// CONSTRAINED-NEXT: [[ENTRY:.*:]]
// CONSTRAINED-NEXT: [[__P0_ADDR_I:%.*]] = alloca <2 x double>, align 16
// CONSTRAINED-NEXT: [[__RET_I:%.*]] = alloca <2 x double>, align 16
// CONSTRAINED-NEXT: [[REF_TMP_I:%.*]] = alloca <16 x i8>, align 16
// CONSTRAINED-NEXT: [[A_ADDR:%.*]] = alloca <2 x double>, align 16
// CONSTRAINED-NEXT: store <2 x double> [[A]], ptr [[A_ADDR]], align 16
// CONSTRAINED-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16
// CONSTRAINED-NEXT: store <2 x double> [[TMP0]], ptr [[__P0_ADDR_I]], align 16
// CONSTRAINED-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_ADDR_I]], align 16
// CONSTRAINED-NEXT: [[VRNDM_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
// CONSTRAINED-NEXT: [[VRNDM1_I:%.*]] = call <2 x double> @llvm.experimental.constrained.floor.v2f64(<2 x double> [[VRNDM_I]], metadata !"fpexcept.strict") #[[ATTR2]]
// CONSTRAINED-NEXT: store <2 x double> [[VRNDM1_I]], ptr [[REF_TMP_I]], align 16
// CONSTRAINED-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[REF_TMP_I]], align 16
// CONSTRAINED-NEXT: store <2 x double> [[TMP2]], ptr [[__RET_I]], align 16
// CONSTRAINED-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr [[__RET_I]], align 16
// CONSTRAINED-NEXT: ret <2 x double> [[TMP3]]
//
float64x2_t rnd13(float64x2_t a) { return vrndmq_f64(a); }
// UNCONSTRAINED-LABEL: define <2 x double> @rnd18(
// UNCONSTRAINED-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
// UNCONSTRAINED-NEXT: [[__P0_ADDR_I:%.*]] = alloca <2 x double>, align 16
// UNCONSTRAINED-NEXT: [[__RET_I:%.*]] = alloca <2 x double>, align 16
// UNCONSTRAINED-NEXT: [[REF_TMP_I:%.*]] = alloca <16 x i8>, align 16
// UNCONSTRAINED-NEXT: [[A_ADDR:%.*]] = alloca <2 x double>, align 16
// UNCONSTRAINED-NEXT: store <2 x double> [[A]], ptr [[A_ADDR]], align 16
// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16
// UNCONSTRAINED-NEXT: store <2 x double> [[TMP0]], ptr [[__P0_ADDR_I]], align 16
// UNCONSTRAINED-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_ADDR_I]], align 16
// UNCONSTRAINED-NEXT: [[VRNDP_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
// UNCONSTRAINED-NEXT: [[VRNDP1_I:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> [[VRNDP_I]])
// UNCONSTRAINED-NEXT: store <2 x double> [[VRNDP1_I]], ptr [[REF_TMP_I]], align 16
// UNCONSTRAINED-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[REF_TMP_I]], align 16
// UNCONSTRAINED-NEXT: store <2 x double> [[TMP2]], ptr [[__RET_I]], align 16
// UNCONSTRAINED-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr [[__RET_I]], align 16
// UNCONSTRAINED-NEXT: ret <2 x double> [[TMP3]]
//
// CONSTRAINED-LABEL: define <2 x double> @rnd18(
// CONSTRAINED-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
// CONSTRAINED-NEXT: [[ENTRY:.*:]]
// CONSTRAINED-NEXT: [[__P0_ADDR_I:%.*]] = alloca <2 x double>, align 16
// CONSTRAINED-NEXT: [[__RET_I:%.*]] = alloca <2 x double>, align 16
// CONSTRAINED-NEXT: [[REF_TMP_I:%.*]] = alloca <16 x i8>, align 16
// CONSTRAINED-NEXT: [[A_ADDR:%.*]] = alloca <2 x double>, align 16
// CONSTRAINED-NEXT: store <2 x double> [[A]], ptr [[A_ADDR]], align 16
// CONSTRAINED-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16
// CONSTRAINED-NEXT: store <2 x double> [[TMP0]], ptr [[__P0_ADDR_I]], align 16
// CONSTRAINED-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_ADDR_I]], align 16
// CONSTRAINED-NEXT: [[VRNDP_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
// CONSTRAINED-NEXT: [[VRNDP1_I:%.*]] = call <2 x double> @llvm.experimental.constrained.ceil.v2f64(<2 x double> [[VRNDP_I]], metadata !"fpexcept.strict") #[[ATTR2]]
// CONSTRAINED-NEXT: store <2 x double> [[VRNDP1_I]], ptr [[REF_TMP_I]], align 16
// CONSTRAINED-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[REF_TMP_I]], align 16
// CONSTRAINED-NEXT: store <2 x double> [[TMP2]], ptr [[__RET_I]], align 16
// CONSTRAINED-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr [[__RET_I]], align 16
// CONSTRAINED-NEXT: ret <2 x double> [[TMP3]]
//
float64x2_t rnd18(float64x2_t a) { return vrndpq_f64(a); }
// UNCONSTRAINED-LABEL: define <2 x double> @rnd22(
// UNCONSTRAINED-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
// UNCONSTRAINED-NEXT: [[__P0_ADDR_I:%.*]] = alloca <2 x double>, align 16
// UNCONSTRAINED-NEXT: [[__RET_I:%.*]] = alloca <2 x double>, align 16
// UNCONSTRAINED-NEXT: [[REF_TMP_I:%.*]] = alloca <16 x i8>, align 16
// UNCONSTRAINED-NEXT: [[A_ADDR:%.*]] = alloca <2 x double>, align 16
// UNCONSTRAINED-NEXT: store <2 x double> [[A]], ptr [[A_ADDR]], align 16
// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16
// UNCONSTRAINED-NEXT: store <2 x double> [[TMP0]], ptr [[__P0_ADDR_I]], align 16
// UNCONSTRAINED-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_ADDR_I]], align 16
// UNCONSTRAINED-NEXT: [[VRNDA_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
// UNCONSTRAINED-NEXT: [[VRNDA1_I:%.*]] = call <2 x double> @llvm.round.v2f64(<2 x double> [[VRNDA_I]])
// UNCONSTRAINED-NEXT: store <2 x double> [[VRNDA1_I]], ptr [[REF_TMP_I]], align 16
// UNCONSTRAINED-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[REF_TMP_I]], align 16
// UNCONSTRAINED-NEXT: store <2 x double> [[TMP2]], ptr [[__RET_I]], align 16
// UNCONSTRAINED-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr [[__RET_I]], align 16
// UNCONSTRAINED-NEXT: ret <2 x double> [[TMP3]]
//
// CONSTRAINED-LABEL: define <2 x double> @rnd22(
// CONSTRAINED-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
// CONSTRAINED-NEXT: [[ENTRY:.*:]]
// CONSTRAINED-NEXT: [[__P0_ADDR_I:%.*]] = alloca <2 x double>, align 16
// CONSTRAINED-NEXT: [[__RET_I:%.*]] = alloca <2 x double>, align 16
// CONSTRAINED-NEXT: [[REF_TMP_I:%.*]] = alloca <16 x i8>, align 16
// CONSTRAINED-NEXT: [[A_ADDR:%.*]] = alloca <2 x double>, align 16
// CONSTRAINED-NEXT: store <2 x double> [[A]], ptr [[A_ADDR]], align 16
// CONSTRAINED-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16
// CONSTRAINED-NEXT: store <2 x double> [[TMP0]], ptr [[__P0_ADDR_I]], align 16
// CONSTRAINED-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_ADDR_I]], align 16
// CONSTRAINED-NEXT: [[VRNDA_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
// CONSTRAINED-NEXT: [[VRNDA1_I:%.*]] = call <2 x double> @llvm.experimental.constrained.round.v2f64(<2 x double> [[VRNDA_I]], metadata !"fpexcept.strict") #[[ATTR2]]
// CONSTRAINED-NEXT: store <2 x double> [[VRNDA1_I]], ptr [[REF_TMP_I]], align 16
// CONSTRAINED-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[REF_TMP_I]], align 16
// CONSTRAINED-NEXT: store <2 x double> [[TMP2]], ptr [[__RET_I]], align 16
// CONSTRAINED-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr [[__RET_I]], align 16
// CONSTRAINED-NEXT: ret <2 x double> [[TMP3]]
//
float64x2_t rnd22(float64x2_t a) { return vrndaq_f64(a); }
// UNCONSTRAINED-LABEL: define <2 x double> @rnd25(
// UNCONSTRAINED-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
// UNCONSTRAINED-NEXT: [[__P0_ADDR_I:%.*]] = alloca <2 x double>, align 16
// UNCONSTRAINED-NEXT: [[__RET_I:%.*]] = alloca <2 x double>, align 16
// UNCONSTRAINED-NEXT: [[REF_TMP_I:%.*]] = alloca <16 x i8>, align 16
// UNCONSTRAINED-NEXT: [[A_ADDR:%.*]] = alloca <2 x double>, align 16
// UNCONSTRAINED-NEXT: store <2 x double> [[A]], ptr [[A_ADDR]], align 16
// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16
// UNCONSTRAINED-NEXT: store <2 x double> [[TMP0]], ptr [[__P0_ADDR_I]], align 16
// UNCONSTRAINED-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_ADDR_I]], align 16
// UNCONSTRAINED-NEXT: [[VRNDX_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
// UNCONSTRAINED-NEXT: [[VRNDX1_I:%.*]] = call <2 x double> @llvm.rint.v2f64(<2 x double> [[VRNDX_I]])
// UNCONSTRAINED-NEXT: store <2 x double> [[VRNDX1_I]], ptr [[REF_TMP_I]], align 16
// UNCONSTRAINED-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[REF_TMP_I]], align 16
// UNCONSTRAINED-NEXT: store <2 x double> [[TMP2]], ptr [[__RET_I]], align 16
// UNCONSTRAINED-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr [[__RET_I]], align 16
// UNCONSTRAINED-NEXT: ret <2 x double> [[TMP3]]
//
// CONSTRAINED-LABEL: define <2 x double> @rnd25(
// CONSTRAINED-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
// CONSTRAINED-NEXT: [[ENTRY:.*:]]
// CONSTRAINED-NEXT: [[__P0_ADDR_I:%.*]] = alloca <2 x double>, align 16
// CONSTRAINED-NEXT: [[__RET_I:%.*]] = alloca <2 x double>, align 16
// CONSTRAINED-NEXT: [[REF_TMP_I:%.*]] = alloca <16 x i8>, align 16
// CONSTRAINED-NEXT: [[A_ADDR:%.*]] = alloca <2 x double>, align 16
// CONSTRAINED-NEXT: store <2 x double> [[A]], ptr [[A_ADDR]], align 16
// CONSTRAINED-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16
// CONSTRAINED-NEXT: store <2 x double> [[TMP0]], ptr [[__P0_ADDR_I]], align 16
// CONSTRAINED-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_ADDR_I]], align 16
// CONSTRAINED-NEXT: [[VRNDX_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
// CONSTRAINED-NEXT: [[VRNDX1_I:%.*]] = call <2 x double> @llvm.experimental.constrained.rint.v2f64(<2 x double> [[VRNDX_I]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR2]]
// CONSTRAINED-NEXT: store <2 x double> [[VRNDX1_I]], ptr [[REF_TMP_I]], align 16
// CONSTRAINED-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[REF_TMP_I]], align 16
// CONSTRAINED-NEXT: store <2 x double> [[TMP2]], ptr [[__RET_I]], align 16
// CONSTRAINED-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr [[__RET_I]], align 16
// CONSTRAINED-NEXT: ret <2 x double> [[TMP3]]
//
float64x2_t rnd25(float64x2_t a) { return vrndxq_f64(a); }