[clang] Introduce elementwise ctlz/cttz builtins (#131995)
These builtins are modeled on the clzg/ctzg builtins, which accept an optional second argument. This second argument is returned if the first argument is 0. These builtins unconditionally exhibit zero-is-undef behaviour, regardless of target preference for the other ctz/clz builtins. The builtins have constexpr support. Fixes #154113
This commit is contained in:
parent
d770567a51
commit
8b128388b5
@ -760,8 +760,9 @@ Unless specified otherwise operation(±0) = ±0 and operation(±infinity) = ±in
|
||||
The integer elementwise intrinsics, including ``__builtin_elementwise_popcount``,
|
||||
``__builtin_elementwise_bitreverse``, ``__builtin_elementwise_add_sat``,
|
||||
``__builtin_elementwise_sub_sat``, ``__builtin_elementwise_max``,
|
||||
``__builtin_elementwise_min``, and ``__builtin_elementwise_abs``
|
||||
can be called in a ``constexpr`` context.
|
||||
``__builtin_elementwise_min``, ``__builtin_elementwise_abs``,
|
||||
``__builtin_elementwise_ctlz``, and ``__builtin_elementwise_cttz`` can be
|
||||
called in a ``constexpr`` context.
|
||||
|
||||
No implicit promotion of integer types takes place. The mixing of integer types
|
||||
of different sizes and signs is forbidden in binary and ternary builtins.
|
||||
@ -870,6 +871,14 @@ T __builtin_elementwise_fshr(T x, T y, T z) perform a funnel shift right. Co
|
||||
significant bits of the wide value), the combined value is shifted
|
||||
right by z, and the least significant bits are extracted to produce
|
||||
a result that is the same size as the original arguments.
|
||||
T __builtin_elementwise_ctlz(T x[, T y]) return the number of leading 0 bits in the first argument. If integer types
|
||||
the first argument is 0 and an optional second argument is provided,
|
||||
the second argument is returned. It is undefined behaviour if the
|
||||
first argument is 0 and no second argument is provided.
|
||||
T __builtin_elementwise_cttz(T x[, T y]) return the number of trailing 0 bits in the first argument. If integer types
|
||||
the first argument is 0 and an optional second argument is provided,
|
||||
the second argument is returned. It is undefined behaviour if the
|
||||
first argument is 0 and no second argument is provided.
|
||||
============================================== ====================================================================== =========================================
|
||||
|
||||
|
||||
|
@ -1526,6 +1526,18 @@ def ElementwiseFshr : Builtin {
|
||||
let Prototype = "void(...)";
|
||||
}
|
||||
|
||||
def ElementwiseCtlz : Builtin {
|
||||
let Spellings = ["__builtin_elementwise_ctlz"];
|
||||
let Attributes = [NoThrow, Const, CustomTypeChecking, Constexpr];
|
||||
let Prototype = "void(...)";
|
||||
}
|
||||
|
||||
def ElementwiseCttz : Builtin {
|
||||
let Spellings = ["__builtin_elementwise_cttz"];
|
||||
let Attributes = [NoThrow, Const, CustomTypeChecking, Constexpr];
|
||||
let Prototype = "void(...)";
|
||||
}
|
||||
|
||||
def ReduceMax : Builtin {
|
||||
let Spellings = ["__builtin_reduce_max"];
|
||||
let Attributes = [NoThrow, Const, CustomTypeChecking, Constexpr];
|
||||
|
@ -400,6 +400,9 @@ def note_constexpr_non_const_vectorelements : Note<
|
||||
"cannot determine number of elements for sizeless vectors in a constant expression">;
|
||||
def note_constexpr_assumption_failed : Note<
|
||||
"assumption evaluated to false">;
|
||||
def note_constexpr_countzeroes_zero : Note<
|
||||
"evaluation of %select{__builtin_elementwise_ctlz|__builtin_elementwise_cttz}0 "
|
||||
"with a zero value is undefined">;
|
||||
def err_experimental_clang_interp_failed : Error<
|
||||
"the experimental clang interpreter failed to evaluate an expression">;
|
||||
|
||||
|
@ -1785,6 +1785,93 @@ static bool interp__builtin_elementwise_popcount(InterpState &S, CodePtr OpPC,
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Can be called with an integer or vector as the first and only parameter.
|
||||
static bool interp__builtin_elementwise_countzeroes(InterpState &S,
|
||||
CodePtr OpPC,
|
||||
const InterpFrame *Frame,
|
||||
const CallExpr *Call,
|
||||
unsigned BuiltinID) {
|
||||
const bool HasZeroArg = Call->getNumArgs() == 2;
|
||||
const bool IsCTTZ = BuiltinID == Builtin::BI__builtin_elementwise_cttz;
|
||||
assert(Call->getNumArgs() == 1 || HasZeroArg);
|
||||
if (Call->getArg(0)->getType()->isIntegerType()) {
|
||||
PrimType ArgT = *S.getContext().classify(Call->getArg(0)->getType());
|
||||
APSInt Val = popToAPSInt(S.Stk, ArgT);
|
||||
std::optional<APSInt> ZeroVal;
|
||||
if (HasZeroArg) {
|
||||
ZeroVal = Val;
|
||||
Val = popToAPSInt(S.Stk, ArgT);
|
||||
}
|
||||
|
||||
if (Val.isZero()) {
|
||||
if (ZeroVal) {
|
||||
pushInteger(S, *ZeroVal, Call->getType());
|
||||
return true;
|
||||
}
|
||||
// If we haven't been provided the second argument, the result is
|
||||
// undefined
|
||||
S.FFDiag(S.Current->getSource(OpPC),
|
||||
diag::note_constexpr_countzeroes_zero)
|
||||
<< /*IsTrailing=*/IsCTTZ;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (BuiltinID == Builtin::BI__builtin_elementwise_ctlz) {
|
||||
pushInteger(S, Val.countLeadingZeros(), Call->getType());
|
||||
} else {
|
||||
pushInteger(S, Val.countTrailingZeros(), Call->getType());
|
||||
}
|
||||
return true;
|
||||
}
|
||||
// Otherwise, the argument must be a vector.
|
||||
const ASTContext &ASTCtx = S.getASTContext();
|
||||
Pointer ZeroArg;
|
||||
if (HasZeroArg) {
|
||||
assert(Call->getArg(1)->getType()->isVectorType() &&
|
||||
ASTCtx.hasSameUnqualifiedType(Call->getArg(0)->getType(),
|
||||
Call->getArg(1)->getType()));
|
||||
ZeroArg = S.Stk.pop<Pointer>();
|
||||
assert(ZeroArg.getFieldDesc()->isPrimitiveArray());
|
||||
}
|
||||
assert(Call->getArg(0)->getType()->isVectorType());
|
||||
const Pointer &Arg = S.Stk.pop<Pointer>();
|
||||
assert(Arg.getFieldDesc()->isPrimitiveArray());
|
||||
const Pointer &Dst = S.Stk.peek<Pointer>();
|
||||
assert(Dst.getFieldDesc()->isPrimitiveArray());
|
||||
assert(Arg.getFieldDesc()->getNumElems() ==
|
||||
Dst.getFieldDesc()->getNumElems());
|
||||
|
||||
QualType ElemType = Arg.getFieldDesc()->getElemQualType();
|
||||
PrimType ElemT = *S.getContext().classify(ElemType);
|
||||
unsigned NumElems = Arg.getNumElems();
|
||||
|
||||
// FIXME: Reading from uninitialized vector elements?
|
||||
for (unsigned I = 0; I != NumElems; ++I) {
|
||||
INT_TYPE_SWITCH_NO_BOOL(ElemT, {
|
||||
APInt EltVal = Arg.atIndex(I).deref<T>().toAPSInt();
|
||||
if (EltVal.isZero()) {
|
||||
if (HasZeroArg) {
|
||||
Dst.atIndex(I).deref<T>() = ZeroArg.atIndex(I).deref<T>();
|
||||
} else {
|
||||
// If we haven't been provided the second argument, the result is
|
||||
// undefined
|
||||
S.FFDiag(S.Current->getSource(OpPC),
|
||||
diag::note_constexpr_countzeroes_zero)
|
||||
<< /*IsTrailing=*/IsCTTZ;
|
||||
return false;
|
||||
}
|
||||
} else if (IsCTTZ) {
|
||||
Dst.atIndex(I).deref<T>() = T::from(EltVal.countTrailingZeros());
|
||||
} else {
|
||||
Dst.atIndex(I).deref<T>() = T::from(EltVal.countLeadingZeros());
|
||||
}
|
||||
Dst.atIndex(I).initialize();
|
||||
});
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool interp__builtin_memcpy(InterpState &S, CodePtr OpPC,
|
||||
const InterpFrame *Frame,
|
||||
const CallExpr *Call, unsigned ID) {
|
||||
@ -2903,6 +2990,11 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
|
||||
case Builtin::BI__builtin_ctzg:
|
||||
return interp__builtin_ctz(S, OpPC, Frame, Call, BuiltinID);
|
||||
|
||||
case Builtin::BI__builtin_elementwise_ctlz:
|
||||
case Builtin::BI__builtin_elementwise_cttz:
|
||||
return interp__builtin_elementwise_countzeroes(S, OpPC, Frame, Call,
|
||||
BuiltinID);
|
||||
|
||||
case Builtin::BI__builtin_bswap16:
|
||||
case Builtin::BI__builtin_bswap32:
|
||||
case Builtin::BI__builtin_bswap64:
|
||||
|
@ -11827,6 +11827,53 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
|
||||
|
||||
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
|
||||
}
|
||||
case Builtin::BI__builtin_elementwise_ctlz:
|
||||
case Builtin::BI__builtin_elementwise_cttz: {
|
||||
APValue SourceLHS;
|
||||
std::optional<APValue> Fallback;
|
||||
if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS))
|
||||
return false;
|
||||
if (E->getNumArgs() > 1) {
|
||||
APValue FallbackTmp;
|
||||
if (!EvaluateAsRValue(Info, E->getArg(1), FallbackTmp))
|
||||
return false;
|
||||
Fallback = FallbackTmp;
|
||||
}
|
||||
|
||||
QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType();
|
||||
unsigned SourceLen = SourceLHS.getVectorLength();
|
||||
SmallVector<APValue, 4> ResultElements;
|
||||
ResultElements.reserve(SourceLen);
|
||||
|
||||
for (unsigned EltNum = 0; EltNum < SourceLen; ++EltNum) {
|
||||
APSInt LHS = SourceLHS.getVectorElt(EltNum).getInt();
|
||||
if (!LHS) {
|
||||
// Without a fallback, a zero element is undefined
|
||||
if (!Fallback) {
|
||||
Info.FFDiag(E, diag::note_constexpr_countzeroes_zero)
|
||||
<< /*IsTrailing=*/(E->getBuiltinCallee() ==
|
||||
Builtin::BI__builtin_elementwise_cttz);
|
||||
return false;
|
||||
}
|
||||
ResultElements.push_back(Fallback->getVectorElt(EltNum));
|
||||
continue;
|
||||
}
|
||||
switch (E->getBuiltinCallee()) {
|
||||
case Builtin::BI__builtin_elementwise_ctlz:
|
||||
ResultElements.push_back(APValue(
|
||||
APSInt(APInt(Info.Ctx.getIntWidth(DestEltTy), LHS.countl_zero()),
|
||||
DestEltTy->isUnsignedIntegerOrEnumerationType())));
|
||||
break;
|
||||
case Builtin::BI__builtin_elementwise_cttz:
|
||||
ResultElements.push_back(APValue(
|
||||
APSInt(APInt(Info.Ctx.getIntWidth(DestEltTy), LHS.countr_zero()),
|
||||
DestEltTy->isUnsignedIntegerOrEnumerationType())));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -13382,6 +13429,7 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
|
||||
case Builtin::BI__builtin_clzll:
|
||||
case Builtin::BI__builtin_clzs:
|
||||
case Builtin::BI__builtin_clzg:
|
||||
case Builtin::BI__builtin_elementwise_ctlz:
|
||||
case Builtin::BI__lzcnt16: // Microsoft variants of count leading-zeroes
|
||||
case Builtin::BI__lzcnt:
|
||||
case Builtin::BI__lzcnt64: {
|
||||
@ -13390,7 +13438,9 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
|
||||
return false;
|
||||
|
||||
std::optional<APSInt> Fallback;
|
||||
if (BuiltinOp == Builtin::BI__builtin_clzg && E->getNumArgs() > 1) {
|
||||
if ((BuiltinOp == Builtin::BI__builtin_clzg ||
|
||||
BuiltinOp == Builtin::BI__builtin_elementwise_ctlz) &&
|
||||
E->getNumArgs() > 1) {
|
||||
APSInt FallbackTemp;
|
||||
if (!EvaluateInteger(E->getArg(1), FallbackTemp, Info))
|
||||
return false;
|
||||
@ -13408,6 +13458,11 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
|
||||
BuiltinOp != Builtin::BI__lzcnt &&
|
||||
BuiltinOp != Builtin::BI__lzcnt64;
|
||||
|
||||
if (BuiltinOp == Builtin::BI__builtin_elementwise_ctlz) {
|
||||
Info.FFDiag(E, diag::note_constexpr_countzeroes_zero)
|
||||
<< /*IsTrailing=*/false;
|
||||
}
|
||||
|
||||
if (ZeroIsUndefined)
|
||||
return Error(E);
|
||||
}
|
||||
@ -13462,13 +13517,16 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
|
||||
case Builtin::BI__builtin_ctzl:
|
||||
case Builtin::BI__builtin_ctzll:
|
||||
case Builtin::BI__builtin_ctzs:
|
||||
case Builtin::BI__builtin_ctzg: {
|
||||
case Builtin::BI__builtin_ctzg:
|
||||
case Builtin::BI__builtin_elementwise_cttz: {
|
||||
APSInt Val;
|
||||
if (!EvaluateInteger(E->getArg(0), Val, Info))
|
||||
return false;
|
||||
|
||||
std::optional<APSInt> Fallback;
|
||||
if (BuiltinOp == Builtin::BI__builtin_ctzg && E->getNumArgs() > 1) {
|
||||
if ((BuiltinOp == Builtin::BI__builtin_ctzg ||
|
||||
BuiltinOp == Builtin::BI__builtin_elementwise_cttz) &&
|
||||
E->getNumArgs() > 1) {
|
||||
APSInt FallbackTemp;
|
||||
if (!EvaluateInteger(E->getArg(1), FallbackTemp, Info))
|
||||
return false;
|
||||
@ -13479,6 +13537,10 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
|
||||
if (Fallback)
|
||||
return Success(*Fallback, E);
|
||||
|
||||
if (BuiltinOp == Builtin::BI__builtin_elementwise_cttz) {
|
||||
Info.FFDiag(E, diag::note_constexpr_countzeroes_zero)
|
||||
<< /*IsTrailing=*/true;
|
||||
}
|
||||
return Error(E);
|
||||
}
|
||||
|
||||
|
@ -3326,9 +3326,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
|
||||
case Builtin::BI__builtin_ctz:
|
||||
case Builtin::BI__builtin_ctzl:
|
||||
case Builtin::BI__builtin_ctzll:
|
||||
case Builtin::BI__builtin_ctzg: {
|
||||
bool HasFallback = BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_ctzg &&
|
||||
E->getNumArgs() > 1;
|
||||
case Builtin::BI__builtin_ctzg:
|
||||
case Builtin::BI__builtin_elementwise_cttz: {
|
||||
bool HasFallback =
|
||||
(BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_ctzg ||
|
||||
BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_elementwise_cttz) &&
|
||||
E->getNumArgs() > 1;
|
||||
|
||||
Value *ArgValue =
|
||||
HasFallback ? EmitScalarExpr(E->getArg(0))
|
||||
@ -3338,8 +3341,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
|
||||
Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
|
||||
|
||||
llvm::Type *ResultType = ConvertType(E->getType());
|
||||
Value *ZeroUndef =
|
||||
Builder.getInt1(HasFallback || getTarget().isCLZForZeroUndef());
|
||||
// The elementwise builtins always exhibit zero-is-undef behaviour
|
||||
Value *ZeroUndef = Builder.getInt1(
|
||||
HasFallback || getTarget().isCLZForZeroUndef() ||
|
||||
BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_elementwise_cttz);
|
||||
Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
|
||||
if (Result->getType() != ResultType)
|
||||
Result =
|
||||
@ -3358,9 +3363,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
|
||||
case Builtin::BI__builtin_clz:
|
||||
case Builtin::BI__builtin_clzl:
|
||||
case Builtin::BI__builtin_clzll:
|
||||
case Builtin::BI__builtin_clzg: {
|
||||
bool HasFallback = BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_clzg &&
|
||||
E->getNumArgs() > 1;
|
||||
case Builtin::BI__builtin_clzg:
|
||||
case Builtin::BI__builtin_elementwise_ctlz: {
|
||||
bool HasFallback =
|
||||
(BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_clzg ||
|
||||
BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_elementwise_ctlz) &&
|
||||
E->getNumArgs() > 1;
|
||||
|
||||
Value *ArgValue =
|
||||
HasFallback ? EmitScalarExpr(E->getArg(0))
|
||||
@ -3370,8 +3378,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
|
||||
Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
|
||||
|
||||
llvm::Type *ResultType = ConvertType(E->getType());
|
||||
Value *ZeroUndef =
|
||||
Builder.getInt1(HasFallback || getTarget().isCLZForZeroUndef());
|
||||
// The elementwise builtins always exhibit zero-is-undef behaviour
|
||||
Value *ZeroUndef = Builder.getInt1(
|
||||
HasFallback || getTarget().isCLZForZeroUndef() ||
|
||||
BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_elementwise_ctlz);
|
||||
Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
|
||||
if (Result->getType() != ResultType)
|
||||
Result =
|
||||
|
@ -3080,6 +3080,19 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
|
||||
TheCall->setType(Magnitude.get()->getType());
|
||||
break;
|
||||
}
|
||||
case Builtin::BI__builtin_elementwise_ctlz:
|
||||
case Builtin::BI__builtin_elementwise_cttz:
|
||||
// These builtins can be unary or binary. Note for empty calls we call the
|
||||
// unary checker in order to not emit an error that says the function
|
||||
// expects 2 arguments, which would be misleading.
|
||||
if (TheCall->getNumArgs() <= 1) {
|
||||
if (PrepareBuiltinElementwiseMathOneArgCall(
|
||||
TheCall, EltwiseBuiltinArgTyRestriction::IntegerTy))
|
||||
return ExprError();
|
||||
} else if (BuiltinElementwiseMath(
|
||||
TheCall, EltwiseBuiltinArgTyRestriction::IntegerTy))
|
||||
return ExprError();
|
||||
break;
|
||||
case Builtin::BI__builtin_reduce_max:
|
||||
case Builtin::BI__builtin_reduce_min: {
|
||||
if (PrepareBuiltinReduceMathOneArgCall(TheCall))
|
||||
|
@ -1265,3 +1265,99 @@ void test_builtin_elementwise_fshl(long long int i1, long long int i2,
|
||||
u4 tmp_vu_l = __builtin_elementwise_fshl(vu1, vu2, vu3);
|
||||
u4 tmp_vu_r = __builtin_elementwise_fshr(vu1, vu2, vu3);
|
||||
}
|
||||
|
||||
void test_builtin_elementwise_ctlz(si8 vs1, si8 vs2, u4 vu1,
|
||||
long long int lli, short si,
|
||||
_BitInt(31) bi, int i,
|
||||
char ci) {
|
||||
// CHECK: [[V8S1:%.+]] = load <8 x i16>, ptr %vs1.addr
|
||||
// CHECK-NEXT: call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> [[V8S1]], i1 true)
|
||||
vs1 = __builtin_elementwise_ctlz(vs1);
|
||||
|
||||
// CHECK: [[V8S1:%.+]] = load <8 x i16>, ptr %vs1.addr
|
||||
// CHECK-NEXT: [[CLZ:%.+]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> [[V8S1]], i1 true)
|
||||
// CHECK-NEXT: [[ISZERO:%.+]] = icmp eq <8 x i16> [[V8S1]], zeroinitializer
|
||||
// CHECK-NEXT: [[V8S2:%.+]] = load <8 x i16>, ptr %vs2.addr
|
||||
// select <8 x i1> [[ISZERO]], <8 x i16> [[CLZ]], <8 x i16> [[V8S2]]
|
||||
vs1 = __builtin_elementwise_ctlz(vs1, vs2);
|
||||
|
||||
// CHECK: [[V4U1:%.+]] = load <4 x i32>, ptr %vu1.addr
|
||||
// CHECK-NEXT: call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[V4U1]], i1 true)
|
||||
vu1 = __builtin_elementwise_ctlz(vu1);
|
||||
|
||||
// CHECK: [[LLI:%.+]] = load i64, ptr %lli.addr
|
||||
// CHECK-NEXT: call i64 @llvm.ctlz.i64(i64 [[LLI]], i1 true)
|
||||
lli = __builtin_elementwise_ctlz(lli);
|
||||
|
||||
// CHECK: [[SI:%.+]] = load i16, ptr %si.addr
|
||||
// CHECK-NEXT: call i16 @llvm.ctlz.i16(i16 [[SI]], i1 true)
|
||||
si = __builtin_elementwise_ctlz(si);
|
||||
|
||||
// CHECK: [[BI1:%.+]] = load i32, ptr %bi.addr
|
||||
// CHECK-NEXT: [[BI2:%.+]] = trunc i32 [[BI1]] to i31
|
||||
// CHECK-NEXT: call i31 @llvm.ctlz.i31(i31 [[BI2]], i1 true)
|
||||
bi = __builtin_elementwise_ctlz(bi);
|
||||
|
||||
// CHECK: [[BI1:%.+]] = load i32, ptr %bi.addr
|
||||
// CHECK-NEXT: [[BI2:%.+]] = trunc i32 [[BI1]] to i31
|
||||
// CHECK-NEXT: [[CLZ:%.+]] = call i31 @llvm.ctlz.i31(i31 [[BI2]], i1 true)
|
||||
// CHECK-NEXT: [[ISZERO:%.+]] = icmp eq i31 [[BI2]], 0
|
||||
// CHECK-NEXT: select i1 [[ISZERO]], i31 1, i31 [[CLZ]]
|
||||
bi = __builtin_elementwise_ctlz(bi, (_BitInt(31))1);
|
||||
|
||||
// CHECK: [[I:%.+]] = load i32, ptr %i.addr
|
||||
// CHECK-NEXT: call i32 @llvm.ctlz.i32(i32 [[I]], i1 true)
|
||||
i = __builtin_elementwise_ctlz(i);
|
||||
|
||||
// CHECK: [[CI:%.+]] = load i8, ptr %ci.addr
|
||||
// CHECK-NEXT: call i8 @llvm.ctlz.i8(i8 [[CI]], i1 true)
|
||||
ci = __builtin_elementwise_ctlz(ci);
|
||||
}
|
||||
|
||||
void test_builtin_elementwise_cttz(si8 vs1, si8 vs2, u4 vu1,
|
||||
long long int lli, short si,
|
||||
_BitInt(31) bi, int i,
|
||||
char ci) {
|
||||
// CHECK: [[V8S1:%.+]] = load <8 x i16>, ptr %vs1.addr
|
||||
// CHECK-NEXT: call <8 x i16> @llvm.cttz.v8i16(<8 x i16> [[V8S1]], i1 true)
|
||||
vs1 = __builtin_elementwise_cttz(vs1);
|
||||
|
||||
// CHECK: [[V8S1:%.+]] = load <8 x i16>, ptr %vs1.addr
|
||||
// CHECK-NEXT: [[ctz:%.+]] = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> [[V8S1]], i1 true)
|
||||
// CHECK-NEXT: [[ISZERO:%.+]] = icmp eq <8 x i16> [[V8S1]], zeroinitializer
|
||||
// CHECK-NEXT: [[V8S2:%.+]] = load <8 x i16>, ptr %vs2.addr
|
||||
// select <8 x i1> [[ISZERO]], <8 x i16> [[ctz]], <8 x i16> [[V8S2]]
|
||||
vs1 = __builtin_elementwise_cttz(vs1, vs2);
|
||||
|
||||
// CHECK: [[V4U1:%.+]] = load <4 x i32>, ptr %vu1.addr
|
||||
// CHECK-NEXT: call <4 x i32> @llvm.cttz.v4i32(<4 x i32> [[V4U1]], i1 true)
|
||||
vu1 = __builtin_elementwise_cttz(vu1);
|
||||
|
||||
// CHECK: [[LLI:%.+]] = load i64, ptr %lli.addr
|
||||
// CHECK-NEXT: call i64 @llvm.cttz.i64(i64 [[LLI]], i1 true)
|
||||
lli = __builtin_elementwise_cttz(lli);
|
||||
|
||||
// CHECK: [[SI:%.+]] = load i16, ptr %si.addr
|
||||
// CHECK-NEXT: call i16 @llvm.cttz.i16(i16 [[SI]], i1 true)
|
||||
si = __builtin_elementwise_cttz(si);
|
||||
|
||||
// CHECK: [[BI1:%.+]] = load i32, ptr %bi.addr
|
||||
// CHECK-NEXT: [[BI2:%.+]] = trunc i32 [[BI1]] to i31
|
||||
// CHECK-NEXT: call i31 @llvm.cttz.i31(i31 [[BI2]], i1 true)
|
||||
bi = __builtin_elementwise_cttz(bi);
|
||||
|
||||
// CHECK: [[BI1:%.+]] = load i32, ptr %bi.addr
|
||||
// CHECK-NEXT: [[BI2:%.+]] = trunc i32 [[BI1]] to i31
|
||||
// CHECK-NEXT: [[ctz:%.+]] = call i31 @llvm.cttz.i31(i31 [[BI2]], i1 true)
|
||||
// CHECK-NEXT: [[ISZERO:%.+]] = icmp eq i31 [[BI2]], 0
|
||||
// CHECK-NEXT: select i1 [[ISZERO]], i31 1, i31 [[ctz]]
|
||||
bi = __builtin_elementwise_cttz(bi, (_BitInt(31))1);
|
||||
|
||||
// CHECK: [[I:%.+]] = load i32, ptr %i.addr
|
||||
// CHECK-NEXT: call i32 @llvm.cttz.i32(i32 [[I]], i1 true)
|
||||
i = __builtin_elementwise_cttz(i);
|
||||
|
||||
// CHECK: [[CI:%.+]] = load i8, ptr %ci.addr
|
||||
// CHECK-NEXT: call i8 @llvm.cttz.i8(i8 [[CI]], i1 true)
|
||||
ci = __builtin_elementwise_cttz(ci);
|
||||
}
|
||||
|
@ -1339,3 +1339,47 @@ float3 foo(float3 a,const struct_float3* hi) {
|
||||
float3 b = __builtin_elementwise_max((float3)(0.0f), a);
|
||||
return __builtin_elementwise_pow(b, hi->b.yyy);
|
||||
}
|
||||
|
||||
void test_builtin_elementwise_ctlz(int i32, int2 v2i32, short i16,
|
||||
double f64, double2 v2f64) {
|
||||
f64 = __builtin_elementwise_ctlz(f64);
|
||||
// expected-error@-1 {{1st argument must be a scalar or vector of integer types (was 'double')}}
|
||||
|
||||
_Complex float c1;
|
||||
c1 = __builtin_elementwise_ctlz(c1);
|
||||
// expected-error@-1 {{1st argument must be a scalar or vector of integer types (was '_Complex float')}}
|
||||
|
||||
v2i32 = __builtin_elementwise_ctlz(v2i32, i32);
|
||||
// expected-error@-1 {{arguments are of different types ('int2' (vector of 2 'int' values) vs 'int')}}
|
||||
|
||||
v2i32 = __builtin_elementwise_ctlz(v2i32, f64);
|
||||
// expected-error@-1 {{arguments are of different types ('int2' (vector of 2 'int' values) vs 'double')}}
|
||||
|
||||
v2i32 = __builtin_elementwise_ctlz();
|
||||
// expected-error@-1 {{too few arguments to function call, expected 1, have 0}}
|
||||
|
||||
v2i32 = __builtin_elementwise_ctlz(v2i32, v2i32, f64);
|
||||
// expected-error@-1 {{too many arguments to function call, expected 2, have 3}}
|
||||
}
|
||||
|
||||
void test_builtin_elementwise_cttz(int i32, int2 v2i32, short i16,
|
||||
double f64, double2 v2f64) {
|
||||
f64 = __builtin_elementwise_cttz(f64);
|
||||
// expected-error@-1 {{1st argument must be a scalar or vector of integer types (was 'double')}}
|
||||
|
||||
_Complex float c1;
|
||||
c1 = __builtin_elementwise_cttz(c1);
|
||||
// expected-error@-1 {{1st argument must be a scalar or vector of integer types (was '_Complex float')}}
|
||||
|
||||
v2i32 = __builtin_elementwise_cttz(v2i32, i32);
|
||||
// expected-error@-1 {{arguments are of different types ('int2' (vector of 2 'int' values) vs 'int')}}
|
||||
|
||||
v2i32 = __builtin_elementwise_cttz(v2i32, f64);
|
||||
// expected-error@-1 {{arguments are of different types ('int2' (vector of 2 'int' values) vs 'double')}}
|
||||
|
||||
v2i32 = __builtin_elementwise_cttz();
|
||||
// expected-error@-1 {{too few arguments to function call, expected 1, have 0}}
|
||||
|
||||
v2i32 = __builtin_elementwise_cttz(v2i32, v2i32, f64);
|
||||
// expected-error@-1 {{too many arguments to function call, expected 2, have 3}}
|
||||
}
|
||||
|
@ -894,3 +894,45 @@ CHECK_FOUR_FLOAT_VEC(__builtin_elementwise_abs((vector4float){-1.123, 2.123, -3.
|
||||
CHECK_FOUR_FLOAT_VEC(__builtin_elementwise_abs((vector4double){-1.123, 2.123, -3.123, 4.123}), ((vector4double){1.123, 2.123, 3.123, 4.123}))
|
||||
static_assert(__builtin_elementwise_abs((float)-1.123) - (float)1.123 < 1e-6); // making sure one element works
|
||||
#undef CHECK_FOUR_FLOAT_VEC
|
||||
|
||||
static_assert(__builtin_elementwise_ctlz(2) == 30);
|
||||
static_assert(__builtin_elementwise_ctlz(2, 8) == 30);
|
||||
static_assert(__builtin_elementwise_ctlz(0, 8) == 8);
|
||||
static_assert(__builtin_elementwise_ctlz(0, 0) == 0);
|
||||
static_assert(__builtin_elementwise_ctlz((char)2) == 6);
|
||||
static_assert(__builtin_elementwise_ctlz((short)2) == 14);
|
||||
static_assert(__builtin_elementwise_ctlz((char)1) == 0x7);
|
||||
static_assert(__builtin_elementwise_ctlz((char)4) == 0x5);
|
||||
static_assert(__builtin_elementwise_ctlz((char)127) == 0x1);
|
||||
static_assert(__builtin_elementwise_ctlz((char)128) == 0x0);
|
||||
static_assert(__builtin_bit_cast(unsigned, __builtin_elementwise_ctlz((vector4char){1, 4, 127, (char)128})) == (LITTLE_END ? 0x00010507 : 0x07050100));
|
||||
|
||||
constexpr int clz0 = __builtin_elementwise_ctlz(0);
|
||||
// expected-error@-1 {{must be initialized by a constant expression}} \
|
||||
// expected-note@-1 {{evaluation of __builtin_elementwise_ctlz with a zero value is undefined}}
|
||||
constexpr vector4char clz1 = __builtin_elementwise_ctlz((vector4char){1, 0, 3, 4});
|
||||
// expected-error@-1 {{must be initialized by a constant expression}} \
|
||||
// expected-note@-1 {{evaluation of __builtin_elementwise_ctlz with a zero value is undefined}}
|
||||
static_assert(__builtin_bit_cast(unsigned, __builtin_elementwise_ctlz((vector4char){1, 0, 127, 0}, (vector4char){9, -1, 9, -2})) == (LITTLE_END ? 0xFE01FF07 : 0x07FF01FE));
|
||||
static_assert(__builtin_bit_cast(unsigned, __builtin_elementwise_ctlz((vector4char){0, 0, 0, 0}, (vector4char){0, 0, 0, 0})) == 0);
|
||||
|
||||
static_assert(__builtin_elementwise_cttz(2) == 1);
|
||||
static_assert(__builtin_elementwise_cttz(2, 8) == 1);
|
||||
static_assert(__builtin_elementwise_cttz(0, 8) == 8);
|
||||
static_assert(__builtin_elementwise_cttz(0, 0) == 0);
|
||||
static_assert(__builtin_elementwise_cttz((char)2) == 1);
|
||||
static_assert(__builtin_elementwise_cttz((short)2) == 1);
|
||||
static_assert(__builtin_elementwise_cttz((char)8) == 0x3);
|
||||
static_assert(__builtin_elementwise_cttz((char)32) == 0x5);
|
||||
static_assert(__builtin_elementwise_cttz((char)127) == 0x0);
|
||||
static_assert(__builtin_elementwise_cttz((char)128) == 0x7);
|
||||
static_assert(__builtin_bit_cast(unsigned, __builtin_elementwise_cttz((vector4char){8, 32, 127, (char)128})) == (LITTLE_END ? 0x07000503 : 0x03050007));
|
||||
|
||||
constexpr int ctz0 = __builtin_elementwise_cttz(0);
|
||||
// expected-error@-1 {{must be initialized by a constant expression}} \
|
||||
// expected-note@-1 {{evaluation of __builtin_elementwise_cttz with a zero value is undefined}}
|
||||
constexpr vector4char ctz1 = __builtin_elementwise_cttz((vector4char){1, 0, 3, 4});
|
||||
// expected-error@-1 {{must be initialized by a constant expression}} \
|
||||
// expected-note@-1 {{evaluation of __builtin_elementwise_cttz with a zero value is undefined}}
|
||||
static_assert(__builtin_bit_cast(unsigned, __builtin_elementwise_cttz((vector4char){8, 0, 127, 0}, (vector4char){9, -1, 9, -2})) == (LITTLE_END ? 0xFE00FF03 : 0x03FF00FE));
|
||||
static_assert(__builtin_bit_cast(unsigned, __builtin_elementwise_cttz((vector4char){0, 0, 0, 0}, (vector4char){0, 0, 0, 0})) == 0);
|
||||
|
Loading…
x
Reference in New Issue
Block a user