Compare commits

..

4 Commits

Author SHA1 Message Date
pvanhout
6cd7c41646 [AMDGPU] Expand scratch atomics to flat atomics if GAS is enabled 2025-08-22 12:45:16 +02:00
pvanhout
3c6b5f75a5 [AMDGPU] Precommit memory legalizer tests for private AS 2025-08-22 12:45:15 +02:00
pvanhout
9cdf588d22 Rename "Expand" to "ExpandCustom" 2025-08-22 12:44:48 +02:00
pvanhout
d05704bce4 [CodeGen][TLI] Allow targets to custom expand atomic load/stores
Loads didn't have the `Expand` option in `AtomicExpandPass`. Stores had `Expand`  but it didn't defer to TLI and instead did an action directly.
Move the old behavior to a `XChg`  expansion and make `Expand` behave like all other instructions.
2025-08-22 10:14:26 +02:00
178 changed files with 118200 additions and 10156 deletions

View File

@ -138,12 +138,6 @@
Dump function CFGs to graphviz format after each stage;enable '-print-loops'
for color-coded blocks
- `--dump-dot-func=<func1,func2,func3...>`
Dump function CFGs to graphviz format for specified functions only;
takes function name patterns (regex supported). Note: C++ function names
must be passed using their mangled names
- `--dump-linux-exceptions`
Dump Linux kernel exception table

View File

@ -15,12 +15,6 @@
#include "llvm/Support/CommandLine.h"
namespace llvm {
namespace bolt {
class BinaryFunction;
}
} // namespace llvm
namespace opts {
enum HeatmapModeKind {
@ -106,9 +100,6 @@ extern llvm::cl::opt<unsigned> Verbosity;
/// Return true if we should process all functions in the binary.
bool processAllFunctions();
/// Return true if we should dump dot graphs for the given function.
bool shouldDumpDot(const llvm::bolt::BinaryFunction &Function);
enum GadgetScannerKind { GS_PACRET, GS_PAUTH, GS_ALL };
extern llvm::cl::bits<GadgetScannerKind> GadgetScannersToRun;

View File

@ -52,7 +52,6 @@ namespace opts {
extern cl::opt<bool> PrintAll;
extern cl::opt<bool> PrintDynoStats;
extern cl::opt<bool> DumpDotAll;
extern bool shouldDumpDot(const bolt::BinaryFunction &Function);
extern cl::opt<std::string> AsmDump;
extern cl::opt<bolt::PLTCall::OptType> PLT;
extern cl::opt<bolt::IdenticalCodeFolding::ICFLevel, false,
@ -341,7 +340,7 @@ Error BinaryFunctionPassManager::runPasses() {
Function.print(BC.outs(), Message);
if (opts::shouldDumpDot(Function))
if (opts::DumpDotAll)
Function.dumpGraphForPass(PassIdName);
}
}

View File

@ -115,35 +115,6 @@ cl::opt<bool> DumpDotAll(
"enable '-print-loops' for color-coded blocks"),
cl::Hidden, cl::cat(BoltCategory));
cl::list<std::string> DumpDotFunc(
"dump-dot-func", cl::CommaSeparated,
cl::desc(
"dump function CFGs to graphviz format for specified functions only;"
"takes function name patterns (regex supported)"),
cl::value_desc("func1,func2,func3,..."), cl::Hidden, cl::cat(BoltCategory));
bool shouldDumpDot(const bolt::BinaryFunction &Function) {
// If dump-dot-all is enabled, dump all functions
if (DumpDotAll)
return !Function.isIgnored();
// If no specific functions specified in dump-dot-func, don't dump any
if (DumpDotFunc.empty())
return false;
if (Function.isIgnored())
return false;
// Check if function matches any of the specified patterns
for (const std::string &Name : DumpDotFunc) {
if (Function.hasNameRegex(Name)) {
return true;
}
}
return false;
}
static cl::list<std::string>
ForceFunctionNames("funcs",
cl::CommaSeparated,
@ -3598,7 +3569,7 @@ void RewriteInstance::postProcessFunctions() {
if (opts::PrintAll || opts::PrintCFG)
Function.print(BC->outs(), "after building cfg");
if (opts::shouldDumpDot(Function))
if (opts::DumpDotAll)
Function.dumpGraphForPass("00_build-cfg");
if (opts::PrintLoopInfo) {

View File

@ -1,24 +0,0 @@
#include <iostream>
// Multiple functions to test selective dumping
int add(int a, int b) { return a + b; }
int multiply(int a, int b) { return a * b; }
int main_helper() {
std::cout << "Helper function" << std::endl;
return 42;
}
int main_secondary() { return add(5, 3); }
void other_function() { std::cout << "Other function" << std::endl; }
int main() {
int result = add(10, 20);
result = multiply(result, 2);
main_helper();
main_secondary();
other_function();
return result;
}

View File

@ -1,52 +0,0 @@
# Test the --dump-dot-func option with multiple functions
# (includes tests for both mangled/unmangled names)
RUN: %clang++ %p/Inputs/multi-func.cpp -o %t.exe -Wl,-q
# Test 1: --dump-dot-func with specific function name (mangled)
RUN: llvm-bolt %t.exe -o %t.bolt1 --dump-dot-func=_Z3addii -v=1 2>&1 | FileCheck %s --check-prefix=ADD
# Test 2: --dump-dot-func with regex pattern (main.*)
RUN: llvm-bolt %t.exe -o %t.bolt2 --dump-dot-func="main.*" -v=1 2>&1 | FileCheck %s --check-prefix=MAIN-REGEX
# Test 3: --dump-dot-func with multiple specific functions (mangled names)
RUN: llvm-bolt %t.exe -o %t.bolt3 --dump-dot-func=_Z3addii,_Z8multiplyii -v=1 2>&1 | FileCheck %s --check-prefix=MULTI
# Test 4: No option specified should create no dot files
RUN: llvm-bolt %t.exe -o %t.bolt4 2>&1 | FileCheck %s --check-prefix=NONE
# Test 5: --dump-dot-func with non-existent function
RUN: llvm-bolt %t.exe -o %t.bolt5 --dump-dot-func=nonexistent -v=1 2>&1 | FileCheck %s --check-prefix=NONEXISTENT
# Test 6: Backward compatibility - --dump-dot-all should still work
RUN: llvm-bolt %t.exe -o %t.bolt6 --dump-dot-all -v=1 2>&1 | FileCheck %s --check-prefix=ALL
# Test 7: Test with unmangled function name (main function)
RUN: llvm-bolt %t.exe -o %t.bolt7 --dump-dot-func=main -v=1 2>&1 | FileCheck %s --check-prefix=MAIN-UNMANGLED
# Check that specific functions are dumped
ADD: BOLT-INFO: dumping CFG to _Z3addii-00_build-cfg.dot
ADD-NOT: BOLT-INFO: dumping CFG to main-00_build-cfg.dot
ADD-NOT: BOLT-INFO: dumping CFG to _Z8multiplyii-00_build-cfg.dot
ADD-NOT: BOLT-INFO: dumping CFG to _Z11main_helperv-00_build-cfg.dot
MAIN-REGEX-DAG: BOLT-INFO: dumping CFG to main-00_build-cfg.dot
MAIN-REGEX-NOT: BOLT-INFO: dumping CFG to _Z3addii-00_build-cfg.dot
MAIN-REGEX-NOT: BOLT-INFO: dumping CFG to _Z8multiplyii-00_build-cfg.dot
MULTI-DAG: BOLT-INFO: dumping CFG to _Z3addii-00_build-cfg.dot
MULTI-DAG: BOLT-INFO: dumping CFG to _Z8multiplyii-00_build-cfg.dot
MULTI-NOT: BOLT-INFO: dumping CFG to main-00_build-cfg.dot
MULTI-NOT: BOLT-INFO: dumping CFG to _Z11main_helperv-00_build-cfg.dot
# Should be no dumping messages when no option is specified
NONE-NOT: BOLT-INFO: dumping CFG
# Should be no dumping messages for non-existent function
NONEXISTENT-NOT: BOLT-INFO: dumping CFG
ALL: BOLT-INFO: dumping CFG to main-00_build-cfg.dot
MAIN-UNMANGLED: BOLT-INFO: dumping CFG to main-00_build-cfg.dot
MAIN-UNMANGLED-NOT: BOLT-INFO: dumping CFG to _Z3addii-00_build-cfg.dot
MAIN-UNMANGLED-NOT: BOLT-INFO: dumping CFG to _Z8multiplyii-00_build-cfg.dot

View File

@ -15,12 +15,14 @@ using namespace clang::ast_matchers;
namespace clang::tidy::bugprone {
namespace {
// Determine if the result of an expression is "stored" in some way.
// It is true if the value is stored into a variable or used as initialization
// or passed to a function or constructor.
// For this use case compound assignments are not counted as a "store" (the 'E'
// expression should have pointer type).
static bool isExprValueStored(const Expr *E, ASTContext &C) {
bool isExprValueStored(const Expr *E, ASTContext &C) {
E = E->IgnoreParenCasts();
// Get first non-paren, non-cast parent.
ParentMapContext &PMap = C.getParentMapContext();
@ -47,8 +49,6 @@ static bool isExprValueStored(const Expr *E, ASTContext &C) {
return isa<CallExpr, CXXConstructExpr>(ParentE);
}
namespace {
AST_MATCHER_P(CXXTryStmt, hasHandlerFor,
ast_matchers::internal::Matcher<QualType>, InnerMatcher) {
for (unsigned NH = Node.getNumHandlers(), I = 0; I < NH; ++I) {

View File

@ -14,8 +14,10 @@ using namespace clang::ast_matchers;
namespace clang::tidy::bugprone {
static bool isConcatenatedLiteralsOnPurpose(ASTContext *Ctx,
const StringLiteral *Lit) {
namespace {
bool isConcatenatedLiteralsOnPurpose(ASTContext *Ctx,
const StringLiteral *Lit) {
// String literals surrounded by parentheses are assumed to be on purpose.
// i.e.: const char* Array[] = { ("a" "b" "c"), "d", [...] };
@ -56,8 +58,6 @@ static bool isConcatenatedLiteralsOnPurpose(ASTContext *Ctx,
return false;
}
namespace {
AST_MATCHER_P(StringLiteral, isConcatenatedLiteral, unsigned,
MaxConcatenatedTokens) {
return Node.getNumConcatenated() > 1 &&

View File

@ -46,9 +46,7 @@ enum class ConversionKind {
ToLongDouble
};
} // namespace
static ConversionKind classifyConversionFunc(const FunctionDecl *FD) {
ConversionKind classifyConversionFunc(const FunctionDecl *FD) {
return llvm::StringSwitch<ConversionKind>(FD->getName())
.Cases("atoi", "atol", ConversionKind::ToInt)
.Case("atoll", ConversionKind::ToLongInt)
@ -56,8 +54,8 @@ static ConversionKind classifyConversionFunc(const FunctionDecl *FD) {
.Default(ConversionKind::None);
}
static ConversionKind classifyFormatString(StringRef Fmt, const LangOptions &LO,
const TargetInfo &TI) {
ConversionKind classifyFormatString(StringRef Fmt, const LangOptions &LO,
const TargetInfo &TI) {
// Scan the format string for the first problematic format specifier, then
// report that as the conversion type. This will miss additional conversion
// specifiers, but that is acceptable behavior.
@ -130,7 +128,7 @@ static ConversionKind classifyFormatString(StringRef Fmt, const LangOptions &LO,
return H.get();
}
static StringRef classifyConversionType(ConversionKind K) {
StringRef classifyConversionType(ConversionKind K) {
switch (K) {
case ConversionKind::None:
llvm_unreachable("Unexpected conversion kind");
@ -150,7 +148,7 @@ static StringRef classifyConversionType(ConversionKind K) {
llvm_unreachable("Unknown conversion kind");
}
static StringRef classifyReplacement(ConversionKind K) {
StringRef classifyReplacement(ConversionKind K) {
switch (K) {
case ConversionKind::None:
llvm_unreachable("Unexpected conversion kind");
@ -175,6 +173,7 @@ static StringRef classifyReplacement(ConversionKind K) {
}
llvm_unreachable("Unknown conversion kind");
}
} // unnamed namespace
void StrToNumCheck::check(const MatchFinder::MatchResult &Result) {
const auto *Call = Result.Nodes.getNodeAs<CallExpr>("expr");

View File

@ -59,9 +59,7 @@ AST_MATCHER(FunctionDecl, isPlacementOverload) {
return true;
}
} // namespace
static OverloadedOperatorKind getCorrespondingOverload(const FunctionDecl *FD) {
OverloadedOperatorKind getCorrespondingOverload(const FunctionDecl *FD) {
switch (FD->getOverloadedOperator()) {
default:
break;
@ -77,7 +75,7 @@ static OverloadedOperatorKind getCorrespondingOverload(const FunctionDecl *FD) {
llvm_unreachable("Not an overloaded allocation operator");
}
static const char *getOperatorName(OverloadedOperatorKind K) {
const char *getOperatorName(OverloadedOperatorKind K) {
switch (K) {
default:
break;
@ -93,14 +91,13 @@ static const char *getOperatorName(OverloadedOperatorKind K) {
llvm_unreachable("Not an overloaded allocation operator");
}
static bool areCorrespondingOverloads(const FunctionDecl *LHS,
const FunctionDecl *RHS) {
bool areCorrespondingOverloads(const FunctionDecl *LHS,
const FunctionDecl *RHS) {
return RHS->getOverloadedOperator() == getCorrespondingOverload(LHS);
}
static bool
hasCorrespondingOverloadInBaseClass(const CXXMethodDecl *MD,
const CXXRecordDecl *RD = nullptr) {
bool hasCorrespondingOverloadInBaseClass(const CXXMethodDecl *MD,
const CXXRecordDecl *RD = nullptr) {
if (RD) {
// Check the methods in the given class and accessible to derived classes.
for (const auto *BMD : RD->methods())
@ -127,6 +124,8 @@ hasCorrespondingOverloadInBaseClass(const CXXMethodDecl *MD,
return false;
}
} // anonymous namespace
void NewDeleteOverloadsCheck::registerMatchers(MatchFinder *Finder) {
// Match all operator new and operator delete overloads (including the array
// forms). Do not match implicit operators, placement operators, or

View File

@ -395,12 +395,16 @@ void MacroToEnumCallbacks::Endif(SourceLocation Loc, SourceLocation IfLoc) {
--CurrentFile->ConditionScopes;
}
namespace {
template <size_t N>
static bool textEquals(const char (&Needle)[N], const char *HayStack) {
bool textEquals(const char (&Needle)[N], const char *HayStack) {
return StringRef{HayStack, N - 1} == Needle;
}
template <size_t N> static size_t len(const char (&)[N]) { return N - 1; }
template <size_t N> size_t len(const char (&)[N]) { return N - 1; }
} // namespace
void MacroToEnumCallbacks::PragmaDirective(SourceLocation Loc,
PragmaIntroducerKind Introducer) {

View File

@ -16,13 +16,14 @@ using namespace clang::ast_matchers;
namespace clang::tidy::modernize {
static constexpr char ConstructorCall[] = "constructorCall";
static constexpr char ResetCall[] = "resetCall";
static constexpr char NewExpression[] = "newExpression";
namespace {
static std::string getNewExprName(const CXXNewExpr *NewExpr,
const SourceManager &SM,
const LangOptions &Lang) {
constexpr char ConstructorCall[] = "constructorCall";
constexpr char ResetCall[] = "resetCall";
constexpr char NewExpression[] = "newExpression";
std::string getNewExprName(const CXXNewExpr *NewExpr, const SourceManager &SM,
const LangOptions &Lang) {
StringRef WrittenName = Lexer::getSourceText(
CharSourceRange::getTokenRange(
NewExpr->getAllocatedTypeSourceInfo()->getTypeLoc().getSourceRange()),
@ -33,6 +34,8 @@ static std::string getNewExprName(const CXXNewExpr *NewExpr,
return WrittenName.str();
}
} // namespace
const char MakeSmartPtrCheck::PointerType[] = "pointerType";
MakeSmartPtrCheck::MakeSmartPtrCheck(StringRef Name, ClangTidyContext *Context,

View File

@ -19,7 +19,9 @@ using namespace clang::ast_matchers;
namespace clang::tidy::modernize {
static bool containsEscapes(StringRef HayStack, StringRef Escapes) {
namespace {
bool containsEscapes(StringRef HayStack, StringRef Escapes) {
size_t BackSlash = HayStack.find('\\');
if (BackSlash == StringRef::npos)
return false;
@ -33,16 +35,16 @@ static bool containsEscapes(StringRef HayStack, StringRef Escapes) {
return true;
}
static bool isRawStringLiteral(StringRef Text) {
bool isRawStringLiteral(StringRef Text) {
// Already a raw string literal if R comes before ".
const size_t QuotePos = Text.find('"');
assert(QuotePos != StringRef::npos);
return (QuotePos > 0) && (Text[QuotePos - 1] == 'R');
}
static bool containsEscapedCharacters(const MatchFinder::MatchResult &Result,
const StringLiteral *Literal,
const CharsBitSet &DisallowedChars) {
bool containsEscapedCharacters(const MatchFinder::MatchResult &Result,
const StringLiteral *Literal,
const CharsBitSet &DisallowedChars) {
// FIXME: Handle L"", u8"", u"" and U"" literals.
if (!Literal->isOrdinary())
return false;
@ -62,12 +64,14 @@ static bool containsEscapedCharacters(const MatchFinder::MatchResult &Result,
return containsEscapes(Text, R"('\"?x01)");
}
static bool containsDelimiter(StringRef Bytes, const std::string &Delimiter) {
bool containsDelimiter(StringRef Bytes, const std::string &Delimiter) {
return Bytes.find(Delimiter.empty()
? std::string(R"lit()")lit")
: (")" + Delimiter + R"(")")) != StringRef::npos;
}
} // namespace
RawStringLiteralCheck::RawStringLiteralCheck(StringRef Name,
ClangTidyContext *Context)
: ClangTidyCheck(Name, Context),

View File

@ -29,13 +29,12 @@
using namespace clang::ast_matchers;
namespace clang::tidy::objc {
namespace {
static constexpr StringRef WeakText = "__weak";
static constexpr StringRef StrongText = "__strong";
static constexpr StringRef UnsafeUnretainedText = "__unsafe_unretained";
namespace {
/// Matches ObjCIvarRefExpr, DeclRefExpr, or MemberExpr that reference
/// Objective-C object (or block) variables or fields whose object lifetimes
/// are not __unsafe_unretained.
@ -50,8 +49,6 @@ AST_POLYMORPHIC_MATCHER(isObjCManagedLifetime,
QT.getQualifiers().getObjCLifetime() > Qualifiers::OCL_ExplicitNone;
}
} // namespace
static std::optional<FixItHint>
fixItHintReplacementForOwnershipString(StringRef Text, CharSourceRange Range,
StringRef Ownership) {
@ -96,6 +93,8 @@ fixItHintForVarDecl(const VarDecl *VD, const SourceManager &SM,
return FixItHint::CreateInsertion(Range.getBegin(), "__unsafe_unretained ");
}
} // namespace
void NSInvocationArgumentLifetimeCheck::registerMatchers(MatchFinder *Finder) {
Finder->addMatcher(
traverse(

View File

@ -27,14 +27,11 @@ enum NamingStyle {
CategoryProperty = 2,
};
} // namespace
/// For now we will only fix 'CamelCase' or 'abc_CamelCase' property to
/// 'camelCase' or 'abc_camelCase'. For other cases the users need to
/// come up with a proper name by their own.
/// FIXME: provide fix for snake_case to snakeCase
static FixItHint generateFixItHint(const ObjCPropertyDecl *Decl,
NamingStyle Style) {
FixItHint generateFixItHint(const ObjCPropertyDecl *Decl, NamingStyle Style) {
auto Name = Decl->getName();
auto NewName = Decl->getName().str();
size_t Index = 0;
@ -53,7 +50,7 @@ static FixItHint generateFixItHint(const ObjCPropertyDecl *Decl,
return {};
}
static std::string validPropertyNameRegex(bool UsedInMatcher) {
std::string validPropertyNameRegex(bool UsedInMatcher) {
// Allow any of these names:
// foo
// fooBar
@ -75,13 +72,13 @@ static std::string validPropertyNameRegex(bool UsedInMatcher) {
return StartMatcher + "([a-z]|[A-Z][A-Z0-9])[a-z0-9A-Z]*$";
}
static bool hasCategoryPropertyPrefix(llvm::StringRef PropertyName) {
bool hasCategoryPropertyPrefix(llvm::StringRef PropertyName) {
auto RegexExp =
llvm::Regex("^[a-zA-Z][a-zA-Z0-9]*_[a-zA-Z0-9][a-zA-Z0-9_]+$");
return RegexExp.match(PropertyName);
}
static bool prefixedPropertyNameValid(llvm::StringRef PropertyName) {
bool prefixedPropertyNameValid(llvm::StringRef PropertyName) {
size_t Start = PropertyName.find_first_of('_');
assert(Start != llvm::StringRef::npos && Start + 1 < PropertyName.size());
auto Prefix = PropertyName.substr(0, Start);
@ -91,6 +88,7 @@ static bool prefixedPropertyNameValid(llvm::StringRef PropertyName) {
auto RegexExp = llvm::Regex(llvm::StringRef(validPropertyNameRegex(false)));
return RegexExp.match(PropertyName.substr(Start + 1));
}
} // namespace
void PropertyDeclarationCheck::registerMatchers(MatchFinder *Finder) {
Finder->addMatcher(objcPropertyDecl(

View File

@ -17,6 +17,7 @@
#include <optional>
namespace clang::tidy::performance {
namespace {
using namespace ::clang::ast_matchers;
using llvm::StringRef;
@ -29,8 +30,8 @@ static constexpr StringRef MethodDeclId = "methodDecl";
static constexpr StringRef FunctionDeclId = "functionDecl";
static constexpr StringRef OldVarDeclId = "oldVarDecl";
static void recordFixes(const VarDecl &Var, ASTContext &Context,
DiagnosticBuilder &Diagnostic) {
void recordFixes(const VarDecl &Var, ASTContext &Context,
DiagnosticBuilder &Diagnostic) {
Diagnostic << utils::fixit::changeVarDeclToReference(Var, Context);
if (!Var.getType().isLocalConstQualified()) {
if (std::optional<FixItHint> Fix = utils::fixit::addQualifierToVarDecl(
@ -39,8 +40,8 @@ static void recordFixes(const VarDecl &Var, ASTContext &Context,
}
}
static std::optional<SourceLocation> firstLocAfterNewLine(SourceLocation Loc,
SourceManager &SM) {
std::optional<SourceLocation> firstLocAfterNewLine(SourceLocation Loc,
SourceManager &SM) {
bool Invalid = false;
const char *TextAfter = SM.getCharacterData(Loc, &Invalid);
if (Invalid) {
@ -50,8 +51,8 @@ static std::optional<SourceLocation> firstLocAfterNewLine(SourceLocation Loc,
return Loc.getLocWithOffset(TextAfter[Offset] == '\0' ? Offset : Offset + 1);
}
static void recordRemoval(const DeclStmt &Stmt, ASTContext &Context,
DiagnosticBuilder &Diagnostic) {
void recordRemoval(const DeclStmt &Stmt, ASTContext &Context,
DiagnosticBuilder &Diagnostic) {
auto &SM = Context.getSourceManager();
// Attempt to remove trailing comments as well.
auto Tok = utils::lexer::findNextTokenSkippingComments(Stmt.getEndLoc(), SM,
@ -73,8 +74,6 @@ static void recordRemoval(const DeclStmt &Stmt, ASTContext &Context,
}
}
namespace {
AST_MATCHER_FUNCTION_P(StatementMatcher,
isRefReturningMethodCallWithConstOverloads,
std::vector<StringRef>, ExcludedContainerTypes) {
@ -131,8 +130,6 @@ AST_MATCHER_FUNCTION_P(StatementMatcher, initializerReturnsReferenceToConst,
hasUnaryOperand(OldVarDeclRef)))));
}
} // namespace
// This checks that the variable itself is only used as const, and also makes
// sure that it does not reference another variable that could be modified in
// the BlockStmt. It does this by checking the following:
@ -183,13 +180,13 @@ static bool isInitializingVariableImmutable(
return false;
}
static bool isVariableUnused(const VarDecl &Var, const Stmt &BlockStmt,
ASTContext &Context) {
bool isVariableUnused(const VarDecl &Var, const Stmt &BlockStmt,
ASTContext &Context) {
return allDeclRefExprs(Var, BlockStmt, Context).empty();
}
static const SubstTemplateTypeParmType *
getSubstitutedType(const QualType &Type, ASTContext &Context) {
const SubstTemplateTypeParmType *getSubstitutedType(const QualType &Type,
ASTContext &Context) {
auto Matches = match(
qualType(anyOf(substTemplateTypeParmType().bind("subst"),
hasDescendant(substTemplateTypeParmType().bind("subst")))),
@ -197,9 +194,9 @@ getSubstitutedType(const QualType &Type, ASTContext &Context) {
return selectFirst<SubstTemplateTypeParmType>("subst", Matches);
}
static bool differentReplacedTemplateParams(const QualType &VarType,
const QualType &InitializerType,
ASTContext &Context) {
bool differentReplacedTemplateParams(const QualType &VarType,
const QualType &InitializerType,
ASTContext &Context) {
if (const SubstTemplateTypeParmType *VarTmplType =
getSubstitutedType(VarType, Context)) {
if (const SubstTemplateTypeParmType *InitializerTmplType =
@ -215,8 +212,8 @@ static bool differentReplacedTemplateParams(const QualType &VarType,
return false;
}
static QualType constructorArgumentType(const VarDecl *OldVar,
const BoundNodes &Nodes) {
QualType constructorArgumentType(const VarDecl *OldVar,
const BoundNodes &Nodes) {
if (OldVar) {
return OldVar->getType();
}
@ -227,6 +224,8 @@ static QualType constructorArgumentType(const VarDecl *OldVar,
return MethodDecl->getReturnType();
}
} // namespace
UnnecessaryCopyInitialization::UnnecessaryCopyInitialization(
StringRef Name, ClangTidyContext *Context)
: ClangTidyCheck(Name, Context),

View File

@ -21,14 +21,16 @@ using namespace clang::ast_matchers;
namespace clang::tidy::performance {
static std::string paramNameOrIndex(StringRef Name, size_t Index) {
namespace {
std::string paramNameOrIndex(StringRef Name, size_t Index) {
return (Name.empty() ? llvm::Twine('#') + llvm::Twine(Index + 1)
: llvm::Twine('\'') + Name + llvm::Twine('\''))
.str();
}
static bool hasLoopStmtAncestor(const DeclRefExpr &DeclRef, const Decl &Decl,
ASTContext &Context) {
bool hasLoopStmtAncestor(const DeclRefExpr &DeclRef, const Decl &Decl,
ASTContext &Context) {
auto Matches = match(
traverse(TK_AsIs,
decl(forEachDescendant(declRefExpr(
@ -39,6 +41,8 @@ static bool hasLoopStmtAncestor(const DeclRefExpr &DeclRef, const Decl &Decl,
return Matches.empty();
}
} // namespace
UnnecessaryValueParamCheck::UnnecessaryValueParamCheck(
StringRef Name, ClangTidyContext *Context)
: ClangTidyCheck(Name, Context),

View File

@ -122,15 +122,15 @@ AST_MATCHER(EnumDecl, hasSequentialInitialValues) {
return !AllEnumeratorsArePowersOfTwo;
}
} // namespace
static std::string getName(const EnumDecl *Decl) {
std::string getName(const EnumDecl *Decl) {
if (!Decl->getDeclName())
return "<unnamed>";
return Decl->getQualifiedNameAsString();
}
} // namespace
EnumInitialValueCheck::EnumInitialValueCheck(StringRef Name,
ClangTidyContext *Context)
: ClangTidyCheck(Name, Context),

View File

@ -144,8 +144,6 @@ struct CognitiveComplexity final {
void account(SourceLocation Loc, unsigned short Nesting, Criteria C);
};
} // namespace
// All the possible messages that can be output. The choice of the message
// to use is based of the combination of the CognitiveComplexity::Criteria.
// It would be nice to have it in CognitiveComplexity struct, but then it is
@ -165,27 +163,23 @@ static const std::array<const StringRef, 4> Msgs = {{
}};
// Criteria is a bitset, thus a few helpers are needed.
static CognitiveComplexity::Criteria
operator|(CognitiveComplexity::Criteria LHS,
CognitiveComplexity::Criteria RHS) {
CognitiveComplexity::Criteria operator|(CognitiveComplexity::Criteria LHS,
CognitiveComplexity::Criteria RHS) {
return static_cast<CognitiveComplexity::Criteria>(llvm::to_underlying(LHS) |
llvm::to_underlying(RHS));
}
static CognitiveComplexity::Criteria
operator&(CognitiveComplexity::Criteria LHS,
CognitiveComplexity::Criteria RHS) {
CognitiveComplexity::Criteria operator&(CognitiveComplexity::Criteria LHS,
CognitiveComplexity::Criteria RHS) {
return static_cast<CognitiveComplexity::Criteria>(llvm::to_underlying(LHS) &
llvm::to_underlying(RHS));
}
static CognitiveComplexity::Criteria &
operator|=(CognitiveComplexity::Criteria &LHS,
CognitiveComplexity::Criteria RHS) {
CognitiveComplexity::Criteria &operator|=(CognitiveComplexity::Criteria &LHS,
CognitiveComplexity::Criteria RHS) {
LHS = operator|(LHS, RHS);
return LHS;
}
static CognitiveComplexity::Criteria &
operator&=(CognitiveComplexity::Criteria &LHS,
CognitiveComplexity::Criteria RHS) {
CognitiveComplexity::Criteria &operator&=(CognitiveComplexity::Criteria &LHS,
CognitiveComplexity::Criteria RHS) {
LHS = operator&(LHS, RHS);
return LHS;
}
@ -205,8 +199,6 @@ void CognitiveComplexity::account(SourceLocation Loc, unsigned short Nesting,
Total += Increase;
}
namespace {
class FunctionASTVisitor final
: public RecursiveASTVisitor<FunctionASTVisitor> {
using Base = RecursiveASTVisitor<FunctionASTVisitor>;

View File

@ -41,11 +41,9 @@ AST_MATCHER(Stmt, isNULLMacroExpansion) {
return isNULLMacroExpansion(&Node, Finder->getASTContext());
}
} // namespace
static StringRef getZeroLiteralToCompareWithForType(CastKind CastExprKind,
QualType Type,
ASTContext &Context) {
StringRef getZeroLiteralToCompareWithForType(CastKind CastExprKind,
QualType Type,
ASTContext &Context) {
switch (CastExprKind) {
case CK_IntegralToBoolean:
return Type->isUnsignedIntegerType() ? "0u" : "0";
@ -64,15 +62,15 @@ static StringRef getZeroLiteralToCompareWithForType(CastKind CastExprKind,
}
}
static bool isUnaryLogicalNotOperator(const Stmt *Statement) {
bool isUnaryLogicalNotOperator(const Stmt *Statement) {
const auto *UnaryOperatorExpr = dyn_cast<UnaryOperator>(Statement);
return UnaryOperatorExpr && UnaryOperatorExpr->getOpcode() == UO_LNot;
}
static void fixGenericExprCastToBool(DiagnosticBuilder &Diag,
const ImplicitCastExpr *Cast,
const Stmt *Parent, ASTContext &Context,
bool UseUpperCaseLiteralSuffix) {
void fixGenericExprCastToBool(DiagnosticBuilder &Diag,
const ImplicitCastExpr *Cast, const Stmt *Parent,
ASTContext &Context,
bool UseUpperCaseLiteralSuffix) {
// In case of expressions like (! integer), we should remove the redundant not
// operator and use inverted comparison (integer == 0).
bool InvertComparison =
@ -135,8 +133,8 @@ static void fixGenericExprCastToBool(DiagnosticBuilder &Diag,
Diag << FixItHint::CreateInsertion(EndLoc, EndLocInsertion);
}
static StringRef getEquivalentBoolLiteralForExpr(const Expr *Expression,
ASTContext &Context) {
StringRef getEquivalentBoolLiteralForExpr(const Expr *Expression,
ASTContext &Context) {
if (isNULLMacroExpansion(Expression, Context)) {
return "false";
}
@ -163,7 +161,7 @@ static StringRef getEquivalentBoolLiteralForExpr(const Expr *Expression,
return {};
}
static bool needsSpacePrefix(SourceLocation Loc, ASTContext &Context) {
bool needsSpacePrefix(SourceLocation Loc, ASTContext &Context) {
SourceRange PrefixRange(Loc.getLocWithOffset(-1), Loc);
StringRef SpaceBeforeStmtStr = Lexer::getSourceText(
CharSourceRange::getCharRange(PrefixRange), Context.getSourceManager(),
@ -175,10 +173,9 @@ static bool needsSpacePrefix(SourceLocation Loc, ASTContext &Context) {
return !AllowedCharacters.contains(SpaceBeforeStmtStr.back());
}
static void fixGenericExprCastFromBool(DiagnosticBuilder &Diag,
const ImplicitCastExpr *Cast,
ASTContext &Context,
StringRef OtherType) {
void fixGenericExprCastFromBool(DiagnosticBuilder &Diag,
const ImplicitCastExpr *Cast,
ASTContext &Context, StringRef OtherType) {
if (!Context.getLangOpts().CPlusPlus) {
Diag << FixItHint::CreateInsertion(Cast->getBeginLoc(),
(Twine("(") + OtherType + ")").str());
@ -203,9 +200,8 @@ static void fixGenericExprCastFromBool(DiagnosticBuilder &Diag,
}
}
static StringRef
getEquivalentForBoolLiteral(const CXXBoolLiteralExpr *BoolLiteral,
QualType DestType, ASTContext &Context) {
StringRef getEquivalentForBoolLiteral(const CXXBoolLiteralExpr *BoolLiteral,
QualType DestType, ASTContext &Context) {
// Prior to C++11, false literal could be implicitly converted to pointer.
if (!Context.getLangOpts().CPlusPlus11 &&
(DestType->isPointerType() || DestType->isMemberPointerType()) &&
@ -226,8 +222,8 @@ getEquivalentForBoolLiteral(const CXXBoolLiteralExpr *BoolLiteral,
return BoolLiteral->getValue() ? "1" : "0";
}
static bool isCastAllowedInCondition(const ImplicitCastExpr *Cast,
ASTContext &Context) {
bool isCastAllowedInCondition(const ImplicitCastExpr *Cast,
ASTContext &Context) {
std::queue<const Stmt *> Q;
Q.push(Cast);
@ -255,6 +251,8 @@ static bool isCastAllowedInCondition(const ImplicitCastExpr *Cast,
return false;
}
} // anonymous namespace
ImplicitBoolConversionCheck::ImplicitBoolConversionCheck(
StringRef Name, ClangTidyContext *Context)
: ClangTidyCheck(Name, Context),

View File

@ -28,11 +28,8 @@ AST_MATCHER_P(QualType, hasUnqualifiedType,
enum class Qualifier { Const, Volatile, Restrict };
} // namespace
static std::optional<Token>
findQualToken(const VarDecl *Decl, Qualifier Qual,
const MatchFinder::MatchResult &Result) {
std::optional<Token> findQualToken(const VarDecl *Decl, Qualifier Qual,
const MatchFinder::MatchResult &Result) {
// Since either of the locs can be in a macro, use `makeFileCharRange` to be
// sure that we have a consistent `CharSourceRange`, located entirely in the
// source file.
@ -61,7 +58,7 @@ findQualToken(const VarDecl *Decl, Qualifier Qual,
*Result.SourceManager);
}
static std::optional<SourceRange>
std::optional<SourceRange>
getTypeSpecifierLocation(const VarDecl *Var,
const MatchFinder::MatchResult &Result) {
SourceRange TypeSpecifier(
@ -76,8 +73,8 @@ getTypeSpecifierLocation(const VarDecl *Var,
return TypeSpecifier;
}
static std::optional<SourceRange>
mergeReplacementRange(SourceRange &TypeSpecifier, const Token &ConstToken) {
std::optional<SourceRange> mergeReplacementRange(SourceRange &TypeSpecifier,
const Token &ConstToken) {
if (TypeSpecifier.getBegin().getLocWithOffset(-1) == ConstToken.getEndLoc()) {
TypeSpecifier.setBegin(ConstToken.getLocation());
return std::nullopt;
@ -89,19 +86,21 @@ mergeReplacementRange(SourceRange &TypeSpecifier, const Token &ConstToken) {
return SourceRange(ConstToken.getLocation(), ConstToken.getEndLoc());
}
static bool isPointerConst(QualType QType) {
bool isPointerConst(QualType QType) {
QualType Pointee = QType->getPointeeType();
assert(!Pointee.isNull() && "can't have a null Pointee");
return Pointee.isConstQualified();
}
static bool isAutoPointerConst(QualType QType) {
bool isAutoPointerConst(QualType QType) {
QualType Pointee =
cast<AutoType>(QType->getPointeeType().getTypePtr())->desugar();
assert(!Pointee.isNull() && "can't have a null Pointee");
return Pointee.isConstQualified();
}
} // namespace
QualifiedAutoCheck::QualifiedAutoCheck(StringRef Name,
ClangTidyContext *Context)
: ClangTidyCheck(Name, Context),

View File

@ -14,18 +14,19 @@ using namespace clang::ast_matchers;
namespace clang::tidy::readability {
static const char *const RedundantReturnDiag =
"redundant return statement at the end "
"of a function with a void return type";
static const char *const RedundantContinueDiag =
"redundant continue statement at the "
"end of loop statement";
namespace {
static bool isLocationInMacroExpansion(const SourceManager &SM,
SourceLocation Loc) {
const char *const RedundantReturnDiag = "redundant return statement at the end "
"of a function with a void return type";
const char *const RedundantContinueDiag = "redundant continue statement at the "
"end of loop statement";
bool isLocationInMacroExpansion(const SourceManager &SM, SourceLocation Loc) {
return SM.isMacroBodyExpansion(Loc) || SM.isMacroArgExpansion(Loc);
}
} // namespace
void RedundantControlFlowCheck::registerMatchers(MatchFinder *Finder) {
Finder->addMatcher(
functionDecl(isDefinition(), returns(voidType()),

View File

@ -13,14 +13,16 @@
namespace clang::tidy::utils::type_traits {
static bool classHasTrivialCopyAndDestroy(QualType Type) {
namespace {
bool classHasTrivialCopyAndDestroy(QualType Type) {
auto *Record = Type->getAsCXXRecordDecl();
return Record && Record->hasDefinition() &&
!Record->hasNonTrivialCopyConstructor() &&
!Record->hasNonTrivialDestructor();
}
static bool hasDeletedCopyConstructor(QualType Type) {
bool hasDeletedCopyConstructor(QualType Type) {
auto *Record = Type->getAsCXXRecordDecl();
if (!Record || !Record->hasDefinition())
return false;
@ -31,6 +33,8 @@ static bool hasDeletedCopyConstructor(QualType Type) {
return false;
}
} // namespace
std::optional<bool> isExpensiveToCopy(QualType Type,
const ASTContext &Context) {
if (Type->isDependentType() || Type->isIncompleteType())

View File

@ -309,13 +309,6 @@ NVPTX Support
X86 Support
^^^^^^^^^^^
- More SSE, AVX and AVX512 intrinsics, including initializers and general
arithmetic can now be used in C++ constant expressions.
- Some SSE, AVX and AVX512 intrinsics have been converted to wrap
generic __builtin intrinsics.
- NOTE: Please avoid use of the __builtin_ia32_* intrinsics - these are not
guaranteed to exist in future releases, or match behaviour with previous
releases of clang or other compilers.
Arm and AArch64 Support
^^^^^^^^^^^^^^^^^^^^^^^

View File

@ -627,23 +627,11 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def pmuldq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">;
def pmuludq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">;
def pmulhuw256 : X86Builtin<"_Vector<16, unsigned short>(_Vector<16, unsigned short>, _Vector<16, unsigned short>)">;
def pmulhw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
def psllv8si : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
def psrav8si : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
def psrlv8si : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
def psllv4di : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>)">;
def psrlv4di : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>)">;
}
let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def psllv4si : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
def psrav4si : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
def psrlv4si : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
def psllv2di : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
def psrlv2di : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def pmulhuw256 : X86Builtin<"_Vector<16, unsigned short>(_Vector<16, unsigned short>, _Vector<16, unsigned short>)">;
def pmulhw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
}
let Features = "avx2", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
@ -666,6 +654,46 @@ let Features = "avx2", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
def maskstoreq : X86Builtin<"void(_Vector<2, long long int *>, _Vector<2, long long int>, _Vector<2, long long int>)">;
}
let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
def psllv8si : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
}
let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
def psllv4si : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
}
let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
def psllv4di : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>)">;
}
let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
def psllv2di : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
}
let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
def psrav8si : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
}
let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
def psrav4si : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
}
let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
def psrlv8si : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
}
let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
def psrlv4si : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
}
let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
def psrlv4di : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>)">;
}
let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
def psrlv2di : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
}
let Features = "avx2", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
def gatherd_pd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, double const *, _Vector<4, int>, _Vector<2, double>, _Constant char)">;
}

View File

@ -11669,24 +11669,13 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
case clang::X86::BI__builtin_ia32_pmulhuw512:
case clang::X86::BI__builtin_ia32_pmulhw128:
case clang::X86::BI__builtin_ia32_pmulhw256:
case clang::X86::BI__builtin_ia32_pmulhw512:
case clang::X86::BI__builtin_ia32_psllv2di:
case clang::X86::BI__builtin_ia32_psllv4di:
case clang::X86::BI__builtin_ia32_psllv4si:
case clang::X86::BI__builtin_ia32_psllv8si:
case clang::X86::BI__builtin_ia32_psrav4si:
case clang::X86::BI__builtin_ia32_psrav8si:
case clang::X86::BI__builtin_ia32_psrlv2di:
case clang::X86::BI__builtin_ia32_psrlv4di:
case clang::X86::BI__builtin_ia32_psrlv4si:
case clang::X86::BI__builtin_ia32_psrlv8si:{
case clang::X86::BI__builtin_ia32_pmulhw512: {
APValue SourceLHS, SourceRHS;
if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
!EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
return false;
QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType();
bool DestUnsigned = DestEltTy->isUnsignedIntegerOrEnumerationType();
unsigned SourceLen = SourceLHS.getVectorLength();
SmallVector<APValue, 4> ResultElements;
ResultElements.reserve(SourceLen);
@ -11698,12 +11687,12 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
case Builtin::BI__builtin_elementwise_add_sat:
ResultElements.push_back(APValue(
APSInt(LHS.isSigned() ? LHS.sadd_sat(RHS) : LHS.uadd_sat(RHS),
DestUnsigned)));
DestEltTy->isUnsignedIntegerOrEnumerationType())));
break;
case Builtin::BI__builtin_elementwise_sub_sat:
ResultElements.push_back(APValue(
APSInt(LHS.isSigned() ? LHS.ssub_sat(RHS) : LHS.usub_sat(RHS),
DestUnsigned)));
DestEltTy->isUnsignedIntegerOrEnumerationType())));
break;
case clang::X86::BI__builtin_ia32_pmulhuw128:
case clang::X86::BI__builtin_ia32_pmulhuw256:
@ -11717,40 +11706,6 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
ResultElements.push_back(APValue(APSInt(llvm::APIntOps::mulhs(LHS, RHS),
/*isUnsigned=*/false)));
break;
case clang::X86::BI__builtin_ia32_psllv2di:
case clang::X86::BI__builtin_ia32_psllv4di:
case clang::X86::BI__builtin_ia32_psllv4si:
case clang::X86::BI__builtin_ia32_psllv8si:
if (RHS.uge(RHS.getBitWidth())) {
ResultElements.push_back(
APValue(APSInt(APInt::getZero(RHS.getBitWidth()), DestUnsigned)));
break;
}
ResultElements.push_back(
APValue(APSInt(LHS.shl(RHS.getZExtValue()), DestUnsigned)));
break;
case clang::X86::BI__builtin_ia32_psrav4si:
case clang::X86::BI__builtin_ia32_psrav8si:
if (RHS.uge(RHS.getBitWidth())) {
ResultElements.push_back(
APValue(APSInt(LHS.ashr(RHS.getBitWidth() - 1), DestUnsigned)));
break;
}
ResultElements.push_back(
APValue(APSInt(LHS.ashr(RHS.getZExtValue()), DestUnsigned)));
break;
case clang::X86::BI__builtin_ia32_psrlv2di:
case clang::X86::BI__builtin_ia32_psrlv4di:
case clang::X86::BI__builtin_ia32_psrlv4si:
case clang::X86::BI__builtin_ia32_psrlv8si:
if (RHS.uge(RHS.getBitWidth())) {
ResultElements.push_back(
APValue(APSInt(APInt::getZero(RHS.getBitWidth()), DestUnsigned)));
break;
}
ResultElements.push_back(
APValue(APSInt(LHS.lshr(RHS.getZExtValue()), DestUnsigned)));
break;
}
}

View File

@ -3721,7 +3721,7 @@ _mm_maskstore_epi64(long long *__X, __m128i __M, __m128i __Y)
/// A 256-bit vector of [8 x i32] containing the unsigned shift counts (in
/// bits).
/// \returns A 256-bit vector of [8 x i32] containing the result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_sllv_epi32(__m256i __X, __m256i __Y)
{
return (__m256i)__builtin_ia32_psllv8si((__v8si)__X, (__v8si)__Y);
@ -3743,7 +3743,7 @@ _mm256_sllv_epi32(__m256i __X, __m256i __Y)
/// A 128-bit vector of [4 x i32] containing the unsigned shift counts (in
/// bits).
/// \returns A 128-bit vector of [4 x i32] containing the result.
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_sllv_epi32(__m128i __X, __m128i __Y)
{
return (__m128i)__builtin_ia32_psllv4si((__v4si)__X, (__v4si)__Y);
@ -3765,7 +3765,7 @@ _mm_sllv_epi32(__m128i __X, __m128i __Y)
/// A 256-bit vector of [4 x i64] containing the unsigned shift counts (in
/// bits).
/// \returns A 256-bit vector of [4 x i64] containing the result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_sllv_epi64(__m256i __X, __m256i __Y)
{
return (__m256i)__builtin_ia32_psllv4di((__v4di)__X, (__v4di)__Y);
@ -3787,7 +3787,7 @@ _mm256_sllv_epi64(__m256i __X, __m256i __Y)
/// A 128-bit vector of [2 x i64] containing the unsigned shift counts (in
/// bits).
/// \returns A 128-bit vector of [2 x i64] containing the result.
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_sllv_epi64(__m128i __X, __m128i __Y)
{
return (__m128i)__builtin_ia32_psllv2di((__v2di)__X, (__v2di)__Y);
@ -3810,7 +3810,7 @@ _mm_sllv_epi64(__m128i __X, __m128i __Y)
/// A 256-bit vector of [8 x i32] containing the unsigned shift counts (in
/// bits).
/// \returns A 256-bit vector of [8 x i32] containing the result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_srav_epi32(__m256i __X, __m256i __Y)
{
return (__m256i)__builtin_ia32_psrav8si((__v8si)__X, (__v8si)__Y);
@ -3833,7 +3833,7 @@ _mm256_srav_epi32(__m256i __X, __m256i __Y)
/// A 128-bit vector of [4 x i32] containing the unsigned shift counts (in
/// bits).
/// \returns A 128-bit vector of [4 x i32] containing the result.
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_srav_epi32(__m128i __X, __m128i __Y)
{
return (__m128i)__builtin_ia32_psrav4si((__v4si)__X, (__v4si)__Y);
@ -3855,7 +3855,7 @@ _mm_srav_epi32(__m128i __X, __m128i __Y)
/// A 256-bit vector of [8 x i32] containing the unsigned shift counts (in
/// bits).
/// \returns A 256-bit vector of [8 x i32] containing the result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_srlv_epi32(__m256i __X, __m256i __Y)
{
return (__m256i)__builtin_ia32_psrlv8si((__v8si)__X, (__v8si)__Y);
@ -3877,7 +3877,7 @@ _mm256_srlv_epi32(__m256i __X, __m256i __Y)
/// A 128-bit vector of [4 x i32] containing the unsigned shift counts (in
/// bits).
/// \returns A 128-bit vector of [4 x i32] containing the result.
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_srlv_epi32(__m128i __X, __m128i __Y)
{
return (__m128i)__builtin_ia32_psrlv4si((__v4si)__X, (__v4si)__Y);
@ -3899,7 +3899,7 @@ _mm_srlv_epi32(__m128i __X, __m128i __Y)
/// A 256-bit vector of [4 x i64] containing the unsigned shift counts (in
/// bits).
/// \returns A 256-bit vector of [4 x i64] containing the result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_srlv_epi64(__m256i __X, __m256i __Y)
{
return (__m256i)__builtin_ia32_psrlv4di((__v4di)__X, (__v4di)__Y);
@ -3921,7 +3921,7 @@ _mm256_srlv_epi64(__m256i __X, __m256i __Y)
/// A 128-bit vector of [2 x i64] containing the unsigned shift counts (in
/// bits).
/// \returns A 128-bit vector of [2 x i64] containing the result.
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_srlv_epi64(__m128i __X, __m128i __Y)
{
return (__m128i)__builtin_ia32_psrlv2di((__v2di)__X, (__v2di)__Y);

View File

@ -327,6 +327,7 @@ __m256i test_mm256_cvtepi8_epi16(__m128i a) {
// CHECK: sext <16 x i8> %{{.*}} to <16 x i16>
return _mm256_cvtepi8_epi16(a);
}
TEST_CONSTEXPR(match_v16hi(_mm256_cvtepi8_epi16(_mm_setr_epi8(-3, 2, -1, 0, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12)), -3, 2, -1, 0, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12));
__m256i test_mm256_cvtepi8_epi32(__m128i a) {
@ -335,6 +336,7 @@ __m256i test_mm256_cvtepi8_epi32(__m128i a) {
// CHECK: sext <8 x i8> %{{.*}} to <8 x i32>
return _mm256_cvtepi8_epi32(a);
}
TEST_CONSTEXPR(match_v8si(_mm256_cvtepi8_epi32(_mm_setr_epi8(-3, 2, -1, 0, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12)), -3, 2, -1, 0, 1, -2, 3, -4));
__m256i test_mm256_cvtepi8_epi64(__m128i a) {
@ -343,6 +345,7 @@ __m256i test_mm256_cvtepi8_epi64(__m128i a) {
// CHECK: sext <4 x i8> %{{.*}} to <4 x i64>
return _mm256_cvtepi8_epi64(a);
}
TEST_CONSTEXPR(match_v4di(_mm256_cvtepi8_epi64(_mm_setr_epi8(-3, 2, -1, 0, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12)), -3, 2, -1, 0));
__m256i test_mm256_cvtepi16_epi32(__m128i a) {
@ -350,6 +353,7 @@ __m256i test_mm256_cvtepi16_epi32(__m128i a) {
// CHECK: sext <8 x i16> %{{.*}} to <8 x i32>
return _mm256_cvtepi16_epi32(a);
}
TEST_CONSTEXPR(match_v8si(_mm256_cvtepi16_epi32(_mm_setr_epi16(-300, 2, -1, 0, 1, -2, 3, -4)), -300, 2, -1, 0, 1, -2, 3, -4));
__m256i test_mm256_cvtepi16_epi64(__m128i a) {
@ -358,6 +362,7 @@ __m256i test_mm256_cvtepi16_epi64(__m128i a) {
// CHECK: sext <4 x i16> %{{.*}} to <4 x i64>
return _mm256_cvtepi16_epi64(a);
}
TEST_CONSTEXPR(match_v4di(_mm256_cvtepi16_epi64(_mm_setr_epi16(-300, 2, -1, 0, 1, -2, 3, -4)), -300, 2, -1, 0));
__m256i test_mm256_cvtepi32_epi64(__m128i a) {
@ -365,6 +370,7 @@ __m256i test_mm256_cvtepi32_epi64(__m128i a) {
// CHECK: sext <4 x i32> %{{.*}} to <4 x i64>
return _mm256_cvtepi32_epi64(a);
}
TEST_CONSTEXPR(match_v4di(_mm256_cvtepi32_epi64(_mm_setr_epi32(-70000, 2, -1, 0)), -70000, 2, -1, 0));
__m256i test_mm256_cvtepu8_epi16(__m128i a) {
@ -372,6 +378,7 @@ __m256i test_mm256_cvtepu8_epi16(__m128i a) {
// CHECK: zext <16 x i8> %{{.*}} to <16 x i16>
return _mm256_cvtepu8_epi16(a);
}
TEST_CONSTEXPR(match_v16hi(_mm256_cvtepu8_epi16(_mm_setr_epi8(-3, 2, -1, 0, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12)), 253, 2, 255, 0, 1, 254, 3, 252, 5, 250, 7, 248, 9, 246, 11, 244));
__m256i test_mm256_cvtepu8_epi32(__m128i a) {
@ -380,6 +387,7 @@ __m256i test_mm256_cvtepu8_epi32(__m128i a) {
// CHECK: zext <8 x i8> %{{.*}} to <8 x i32>
return _mm256_cvtepu8_epi32(a);
}
TEST_CONSTEXPR(match_v8si(_mm256_cvtepu8_epi32(_mm_setr_epi8(-3, 2, -1, 0, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12)), 253, 2, 255, 0, 1, 254, 3, 252));
__m256i test_mm256_cvtepu8_epi64(__m128i a) {
@ -388,6 +396,7 @@ __m256i test_mm256_cvtepu8_epi64(__m128i a) {
// CHECK: zext <4 x i8> %{{.*}} to <4 x i64>
return _mm256_cvtepu8_epi64(a);
}
TEST_CONSTEXPR(match_v4di(_mm256_cvtepu8_epi64(_mm_setr_epi8(-3, 2, -1, 0, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12)), 253, 2, 255, 0));
__m256i test_mm256_cvtepu16_epi32(__m128i a) {
@ -395,6 +404,7 @@ __m256i test_mm256_cvtepu16_epi32(__m128i a) {
// CHECK: zext <8 x i16> {{.*}} to <8 x i32>
return _mm256_cvtepu16_epi32(a);
}
TEST_CONSTEXPR(match_v8si(_mm256_cvtepu16_epi32(_mm_setr_epi16(-300, 2, -1, 0, 1, -2, 3, -4)), 65236, 2, 65535, 0, 1, 65534, 3, 65532));
__m256i test_mm256_cvtepu16_epi64(__m128i a) {
@ -403,6 +413,7 @@ __m256i test_mm256_cvtepu16_epi64(__m128i a) {
// CHECK: zext <4 x i16> %{{.*}} to <4 x i64>
return _mm256_cvtepu16_epi64(a);
}
TEST_CONSTEXPR(match_v4di(_mm256_cvtepu16_epi64(_mm_setr_epi16(-300, 2, -1, 0, 1, -2, 3, -4)), 65236, 2, 65535, 0));
__m256i test_mm256_cvtepu32_epi64(__m128i a) {
@ -410,6 +421,7 @@ __m256i test_mm256_cvtepu32_epi64(__m128i a) {
// CHECK: zext <4 x i32> %{{.*}} to <4 x i64>
return _mm256_cvtepu32_epi64(a);
}
TEST_CONSTEXPR(match_v4di(_mm256_cvtepu32_epi64(_mm_setr_epi32(-70000, 2, -1, 0)), 4294897296, 2, 4294967295, 0));
__m128i test0_mm256_extracti128_si256_0(__m256i a) {
@ -1108,28 +1120,24 @@ __m128i test_mm_sllv_epi32(__m128i a, __m128i b) {
// CHECK: call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
return _mm_sllv_epi32(a, b);
}
TEST_CONSTEXPR(match_v4si(_mm_sllv_epi32((__m128i)(__v4si){1, -2, 3, -4}, (__m128i)(__v4si){1, 2, 3, -4}), 2, -8, 24, 0));
__m256i test_mm256_sllv_epi32(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_sllv_epi32
// CHECK: call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}})
return _mm256_sllv_epi32(a, b);
}
TEST_CONSTEXPR(match_v8si(_mm256_sllv_epi32((__m256i)(__v8si){1, -2, 3, -4, 5, -6, 7, -8}, (__m256i)(__v8si){1, 2, 3, 4, -17, 31, 33, 29}), 2, -8, 24, -64, 0, 0, 0, 0));
__m128i test_mm_sllv_epi64(__m128i a, __m128i b) {
// CHECK-LABEL: test_mm_sllv_epi64
// CHECK: call {{.*}}<2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
return _mm_sllv_epi64(a, b);
}
TEST_CONSTEXPR(match_m128i(_mm_sllv_epi64((__m128i)(__v2di){1, -3}, (__m128i)(__v2di){8, 63}), 256, 0x8000000000000000ULL));
__m256i test_mm256_sllv_epi64(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_sllv_epi64
// CHECK: call {{.*}}<4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}})
return _mm256_sllv_epi64(a, b);
}
TEST_CONSTEXPR(match_m256i(_mm256_sllv_epi64((__m256i)(__v4di){1, -2, 3, -4}, (__m256i)(__v4di){1, 2, 3, -4}), 2, -8, 24, 0));
__m256i test_mm256_sra_epi16(__m256i a, __m128i b) {
// CHECK-LABEL: test_mm256_sra_epi16
@ -1172,14 +1180,12 @@ __m128i test_mm_srav_epi32(__m128i a, __m128i b) {
// CHECK: call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
return _mm_srav_epi32(a, b);
}
TEST_CONSTEXPR(match_v4si(_mm_srav_epi32((__m128i)(__v4si){1, -2, 3, -4}, (__m128i)(__v4si){1, 2, 3, -4}), 0, -1, 0, -1));
__m256i test_mm256_srav_epi32(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_srav_epi32
// CHECK: call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}})
return _mm256_srav_epi32(a, b);
}
TEST_CONSTEXPR(match_v8si(_mm256_srav_epi32((__m256i)(__v8si){1, -2, 3, -4, 5, -6, 7, -8}, (__m256i)(__v8si){1, 2, 3, 4, -17, 31, 33, 29}), 0, -1, 0, -1, 0, -1, 0, -1));
__m256i test_mm256_srl_epi16(__m256i a, __m128i b) {
// CHECK-LABEL: test_mm256_srl_epi16
@ -1246,28 +1252,24 @@ __m128i test_mm_srlv_epi32(__m128i a, __m128i b) {
// CHECK: call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
return _mm_srlv_epi32(a, b);
}
TEST_CONSTEXPR(match_v4si(_mm_srlv_epi32((__m128i)(__v4si){1, -2, 3, -4}, (__m128i)(__v4si){1, 2, 3, -4}), 0, 1073741823, 0, 0));
__m256i test_mm256_srlv_epi32(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_srlv_epi32
// CHECK: call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}})
return _mm256_srlv_epi32(a, b);
}
TEST_CONSTEXPR(match_v8si(_mm256_srlv_epi32((__m256i)(__v8si){1, -2, 3, -4, 5, -6, 7, -8}, (__m256i)(__v8si){1, 2, 3, 4, -17, 31, 33, 29}), 0, 1073741823, 0, 268435455, 0, 1, 0, 7));
__m128i test_mm_srlv_epi64(__m128i a, __m128i b) {
// CHECK-LABEL: test_mm_srlv_epi64
// CHECK: call {{.*}}<2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
return _mm_srlv_epi64(a, b);
}
TEST_CONSTEXPR(match_m128i(_mm_srlv_epi64((__m128i)(__v2di){1, -3}, (__m128i)(__v2di){8, 63}), 0, 1));
__m256i test_mm256_srlv_epi64(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_srlv_epi64
// CHECK: call {{.*}}<4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}})
return _mm256_srlv_epi64(a, b);
}
TEST_CONSTEXPR(match_m256i(_mm256_srlv_epi64((__m256i)(__v4di){1, -2, 3, -4}, (__m256i)(__v4di){1, 2, 3, -4}), 0, 0x3FFFFFFFFFFFFFFFULL, 0, 0));
__m256i test_mm256_stream_load_si256(__m256i const *a) {
// CHECK-LABEL: test_mm256_stream_load_si256

View File

@ -106,12 +106,6 @@ if(MSVC)
endif()
set(ASAN_CFLAGS ${SANITIZER_COMMON_CFLAGS})
# Win/ASan relies on the runtime functions being hotpatchable. See
# https://github.com/llvm/llvm-project/pull/149444
if(MSVC)
list(APPEND ASAN_CFLAGS /hotpatch)
endif()
append_list_if(MSVC /Zl ASAN_CFLAGS)
set(ASAN_COMMON_DEFINITIONS "")

View File

@ -792,7 +792,7 @@ static void PrintNoOriginTrackingWarning() {
static void PrintNoTaintWarning(const void *address) {
Decorator d;
Printf(" %sDFSan: no tainted value at %x%s\n", d.Warning(), address,
Printf(" %sDFSan: no tainted value at %zx%s\n", d.Warning(), (uptr)address,
d.Default());
}

View File

@ -176,7 +176,7 @@ static void HwasanFormatMemoryUsage(InternalScopedString &s) {
"HWASAN pid: %d rss: %zd threads: %zd stacks: %zd"
" thr_aux: %zd stack_depot: %zd uniq_stacks: %zd"
" heap: %zd",
internal_getpid(), GetRSS(), thread_stats.n_live_threads,
(int)internal_getpid(), GetRSS(), thread_stats.n_live_threads,
thread_stats.total_stack_size,
thread_stats.n_live_threads * thread_list.MemoryUsedPerThread(),
sds.allocated, sds.n_uniq_ids, asc[AllocatorStatMapped]);
@ -692,7 +692,7 @@ void __hwasan_handle_longjmp(const void *sp_dst) {
"WARNING: HWASan is ignoring requested __hwasan_handle_longjmp: "
"stack top: %p; target %p; distance: %p (%zd)\n"
"False positive error reports may follow\n",
(void *)sp, (void *)dst, dst - sp, dst - sp);
(void *)sp, (void *)dst, (void *)(dst - sp), dst - sp);
return;
}
TagMemory(sp, dst - sp, 0);

View File

@ -41,7 +41,7 @@ static inline bool malloc_bisect(StackTrace *stack, uptr orig_size) {
if (h < left || h > right)
return false;
if (flags()->malloc_bisect_dump) {
Printf("[alloc] %u %zu\n", h, orig_size);
Printf("[alloc] %u %zu\n", (u32)h, orig_size);
stack->Print();
}
return true;

View File

@ -306,8 +306,9 @@ static void PrintStackAllocations(const StackAllocationsRingBuffer *sa,
"%p is located %zd bytes %s a %zd-byte local variable %s "
"[%p,%p) "
"in %s %s\n",
untagged_addr, offset, whence, local.size, local.name, best_beg,
best_beg + local.size, local.function_name, location.data());
(void *)untagged_addr, offset, whence, local.size, local.name,
(void *)best_beg, (void *)(best_beg + local.size),
local.function_name, location.data());
location.clear();
Printf("%s\n", d.Default());
}
@ -738,8 +739,8 @@ void BaseReport::PrintHeapOrGlobalCandidate() const {
Printf("%s", d.Location());
Printf("%p is located %zd bytes %s a %zd-byte region [%p,%p)\n",
untagged_addr, offset, whence,
candidate.heap.end - candidate.heap.begin, candidate.heap.begin,
candidate.heap.end);
candidate.heap.end - candidate.heap.begin,
(void *)candidate.heap.begin, (void *)candidate.heap.end);
Printf("%s", d.Allocation());
Printf("allocated by thread T%u here:\n", candidate.heap.thread_id);
Printf("%s", d.Default());
@ -762,11 +763,11 @@ void BaseReport::PrintHeapOrGlobalCandidate() const {
Printf(
"%p is located %zd bytes %s a %zd-byte global variable "
"%s [%p,%p) in %s\n",
untagged_addr,
(void *)untagged_addr,
candidate.after ? untagged_addr - (info.start + info.size)
: info.start - untagged_addr,
candidate.after ? "after" : "before", info.size, info.name,
info.start, info.start + info.size, module_name);
(void *)info.start, (void *)(info.start + info.size), module_name);
} else {
uptr size = GetGlobalSizeFromDescriptor(candidate.untagged_addr);
if (size == 0)
@ -774,14 +775,14 @@ void BaseReport::PrintHeapOrGlobalCandidate() const {
Printf(
"%p is located %s a global variable in "
"\n #0 0x%x (%s+0x%x)\n",
untagged_addr, candidate.after ? "after" : "before",
candidate.untagged_addr, module_name, module_address);
(void *)untagged_addr, candidate.after ? "after" : "before",
(void *)candidate.untagged_addr, module_name, (u32)module_address);
else
Printf(
"%p is located %s a %zd-byte global variable in "
"\n #0 0x%x (%s+0x%x)\n",
untagged_addr, candidate.after ? "after" : "before", size,
candidate.untagged_addr, module_name, module_address);
(void *)untagged_addr, candidate.after ? "after" : "before", size,
(void *)candidate.untagged_addr, module_name, (u32)module_address);
}
Printf("%s", d.Default());
}
@ -792,8 +793,8 @@ void BaseReport::PrintAddressDescription() const {
int num_descriptions_printed = 0;
if (MemIsShadow(untagged_addr)) {
Printf("%s%p is HWAsan shadow memory.\n%s", d.Location(), untagged_addr,
d.Default());
Printf("%s%p is HWAsan shadow memory.\n%s", d.Location(),
(void *)untagged_addr, d.Default());
return;
}
@ -802,7 +803,7 @@ void BaseReport::PrintAddressDescription() const {
Printf(
"%s[%p,%p) is a %s %s heap chunk; "
"size: %zd offset: %zd\n%s",
d.Location(), heap.begin, heap.begin + heap.size,
d.Location(), (void *)heap.begin, (void *)(heap.begin + heap.size),
heap.from_small_heap ? "small" : "large",
heap.is_allocated ? "allocated" : "unallocated", heap.size,
untagged_addr - heap.begin, d.Default());
@ -821,8 +822,8 @@ void BaseReport::PrintAddressDescription() const {
Printf("%s", d.Error());
Printf("\nCause: stack tag-mismatch\n");
Printf("%s", d.Location());
Printf("Address %p is located in stack of thread T%zd\n", untagged_addr,
sa.thread_id());
Printf("Address %p is located in stack of thread T%zd\n",
(void *)untagged_addr, (ssize)sa.thread_id());
Printf("%s", d.Default());
announce_by_id(sa.thread_id());
PrintStackAllocations(sa.get(), ptr_tag, untagged_addr);
@ -842,9 +843,9 @@ void BaseReport::PrintAddressDescription() const {
Printf("\nCause: use-after-free\n");
Printf("%s", d.Location());
Printf("%p is located %zd bytes inside a %zd-byte region [%p,%p)\n",
untagged_addr, untagged_addr - UntagAddr(har.tagged_addr),
har.requested_size, UntagAddr(har.tagged_addr),
UntagAddr(har.tagged_addr) + har.requested_size);
(void *)untagged_addr, untagged_addr - UntagAddr(har.tagged_addr),
(ssize)har.requested_size, UntagAddr(har.tagged_addr),
(void *)(UntagAddr(har.tagged_addr) + har.requested_size));
Printf("%s", d.Allocation());
Printf("freed by thread T%u here:\n", ha.free_thread_id);
Printf("%s", d.Default());
@ -858,7 +859,7 @@ void BaseReport::PrintAddressDescription() const {
// Print a developer note: the index of this heap object
// in the thread's deallocation ring buffer.
Printf("hwasan_dev_note_heap_rb_distance: %zd %zd\n", ha.ring_index + 1,
flags()->heap_history_size);
(ssize)flags()->heap_history_size);
Printf("hwasan_dev_note_num_matching_addrs: %zd\n", ha.num_matching_addrs);
Printf("hwasan_dev_note_num_matching_addrs_4b: %zd\n",
ha.num_matching_addrs_4b);
@ -915,10 +916,11 @@ InvalidFreeReport::~InvalidFreeReport() {
const Thread *thread = GetCurrentThread();
if (thread) {
Report("ERROR: %s: %s on address %p at pc %p on thread T%zd\n",
SanitizerToolName, bug_type, untagged_addr, pc, thread->unique_id());
SanitizerToolName, bug_type, (void *)untagged_addr, (void *)pc,
(ssize)thread->unique_id());
} else {
Report("ERROR: %s: %s on address %p at pc %p on unknown thread\n",
SanitizerToolName, bug_type, untagged_addr, pc);
SanitizerToolName, bug_type, (void *)untagged_addr, (void *)pc);
}
Printf("%s", d.Access());
if (shadow.addr) {
@ -967,7 +969,8 @@ TailOverwrittenReport::~TailOverwrittenReport() {
Printf("%s", d.Error());
const char *bug_type = "allocation-tail-overwritten";
Report("ERROR: %s: %s; heap object [%p,%p) of size %zd\n", SanitizerToolName,
bug_type, untagged_addr, untagged_addr + orig_size, orig_size);
bug_type, (void *)untagged_addr, (void *)(untagged_addr + orig_size),
orig_size);
Printf("\n%s", d.Default());
Printf(
"Stack of invalid access unknown. Issue detected at deallocation "
@ -1037,7 +1040,7 @@ TagMismatchReport::~TagMismatchReport() {
uptr pc = GetTopPc(stack);
Printf("%s", d.Error());
Report("ERROR: %s: %s on address %p at pc %p\n", SanitizerToolName, bug_type,
untagged_addr, pc);
(void *)untagged_addr, (void *)pc);
Thread *t = GetCurrentThread();
@ -1049,12 +1052,12 @@ TagMismatchReport::~TagMismatchReport() {
GetShortTagCopy(MemToShadow(untagged_addr + mismatch_offset));
Printf(
"%s of size %zu at %p tags: %02x/%02x(%02x) (ptr/mem) in thread T%zd\n",
is_store ? "WRITE" : "READ", access_size, untagged_addr, ptr_tag,
mem_tag, short_tag, t->unique_id());
is_store ? "WRITE" : "READ", access_size, (void *)untagged_addr,
ptr_tag, mem_tag, short_tag, (ssize)t->unique_id());
} else {
Printf("%s of size %zu at %p tags: %02x/%02x (ptr/mem) in thread T%zd\n",
is_store ? "WRITE" : "READ", access_size, untagged_addr, ptr_tag,
mem_tag, t->unique_id());
is_store ? "WRITE" : "READ", access_size, (void *)untagged_addr,
ptr_tag, mem_tag, (ssize)t->unique_id());
}
if (mismatch_offset)
Printf("Invalid access starting at offset %zu\n", mismatch_offset);
@ -1093,7 +1096,7 @@ void ReportTagMismatch(StackTrace *stack, uptr tagged_addr, uptr access_size,
// See the frame breakdown defined in __hwasan_tag_mismatch (from
// hwasan_tag_mismatch_{aarch64,riscv64}.S).
void ReportRegisters(const uptr *frame, uptr pc) {
Printf("\nRegisters where the failure occurred (pc %p):\n", pc);
Printf("\nRegisters where the failure occurred (pc %p):\n", (void *)pc);
// We explicitly print a single line (4 registers/line) each iteration to
// reduce the amount of logcat error messages printed. Each Printf() will

View File

@ -173,9 +173,10 @@ uptr Thread::stack_size() {
}
void Thread::Print(const char *Prefix) {
Printf("%sT%zd %p stack: [%p,%p) sz: %zd tls: [%p,%p)\n", Prefix, unique_id_,
(void *)this, stack_bottom(), stack_top(),
stack_top() - stack_bottom(), tls_begin(), tls_end());
Printf("%sT%zd %p stack: [%p,%p) sz: %zd tls: [%p,%p)\n", Prefix,
(ssize_t)unique_id_, (void *)this, (void *)stack_bottom(),
(void *)stack_top(), stack_top() - stack_bottom(), (void *)tls_begin(),
(void *)tls_end());
}
static u32 xorshift(u32 state) {

View File

@ -806,7 +806,7 @@ static bool ReportUnsuspendedThreads(
succeded = false;
Report(
"Running thread %zu was not suspended. False leaks are possible.\n",
os_id);
(usize)os_id);
}
}
return succeded;

View File

@ -29,7 +29,7 @@ static void ProtectGap(uptr addr, uptr size) {
Printf("protect_shadow_gap=0:"
" not protecting shadow gap, allocating gap's shadow\n"
"|| `[%p, %p]` || ShadowGap's shadow ||\n",
GapShadowBeg, GapShadowEnd);
(void *)GapShadowBeg, (void *)GapShadowEnd);
ReserveShadowMemoryRange(GapShadowBeg, GapShadowEnd,
"unprotected gap shadow");
return;

View File

@ -105,7 +105,7 @@ __xray_register_sleds(const XRaySledEntry *SledsBegin,
}
if (Verbosity())
Report("Registering %d new functions!\n", SledMap.Functions);
Report("Registering %d new functions!\n", (int)SledMap.Functions);
{
SpinMutexLock Guard(&XRayInstrMapMutex);

View File

@ -308,7 +308,8 @@ XRayPatchingStatus controlPatchingObjectUnchecked(bool Enable, int32_t ObjId) {
return XRayPatchingStatus::NOT_INITIALIZED;
if (Verbosity())
Report("Patching object %d with %d functions.\n", ObjId, InstrMap.Entries);
Report("Patching object %d with %d functions.\n", ObjId,
(int)InstrMap.Entries);
// Check if the corresponding DSO has been unloaded.
if (!InstrMap.Loaded) {

View File

@ -280,7 +280,6 @@ set(TARGET_LIBC_ENTRYPOINTS
set(TARGET_LIBM_ENTRYPOINTS
# math.h entrypoints
libc.src.math.acos
libc.src.math.acosf
libc.src.math.acoshf
libc.src.math.asin

View File

@ -2432,6 +2432,14 @@ functions:
return_type: double
arguments:
- type: double
- name: sincosf
standards:
- gnu
return_type: void
arguments:
- type: float
- type: float *
- type: float *
- name: sinf
standards:
- stdc
@ -2445,22 +2453,6 @@ functions:
arguments:
- type: _Float16
guard: LIBC_TYPES_HAS_FLOAT16
- name: sincos
standards:
- gnu
return_type: void
arguments:
- type: double
- type: double *
- type: double *
- name: sincosf
standards:
- gnu
return_type: void
arguments:
- type: float
- type: float *
- type: float *
- name: sinhf
standards:
- stdc

View File

@ -978,11 +978,11 @@ public:
# ifndef _LIBCPP_CXX03_LANG
_LIBCPP_HIDE_FROM_ABI map(map&& __m) = default;
_LIBCPP_HIDE_FROM_ABI map(map&& __m) noexcept(is_nothrow_move_constructible<__base>::value) = default;
_LIBCPP_HIDE_FROM_ABI map(map&& __m, const allocator_type& __a);
_LIBCPP_HIDE_FROM_ABI map& operator=(map&& __m) = default;
_LIBCPP_HIDE_FROM_ABI map& operator=(map&& __m) noexcept(is_nothrow_move_assignable<__base>::value) = default;
_LIBCPP_HIDE_FROM_ABI map(initializer_list<value_type> __il, const key_compare& __comp = key_compare())
: __tree_(__vc(__comp)) {
@ -1646,11 +1646,12 @@ public:
# ifndef _LIBCPP_CXX03_LANG
_LIBCPP_HIDE_FROM_ABI multimap(multimap&& __m) = default;
_LIBCPP_HIDE_FROM_ABI multimap(multimap&& __m) noexcept(is_nothrow_move_constructible<__base>::value) = default;
_LIBCPP_HIDE_FROM_ABI multimap(multimap&& __m, const allocator_type& __a);
_LIBCPP_HIDE_FROM_ABI multimap& operator=(multimap&& __m) = default;
_LIBCPP_HIDE_FROM_ABI multimap&
operator=(multimap&& __m) noexcept(is_nothrow_move_assignable<__base>::value) = default;
_LIBCPP_HIDE_FROM_ABI multimap(initializer_list<value_type> __il, const key_compare& __comp = key_compare())
: __tree_(__vc(__comp)) {

View File

@ -667,7 +667,7 @@ public:
_LIBCPP_HIDE_FROM_ABI set& operator=(const set& __s) = default;
# ifndef _LIBCPP_CXX03_LANG
_LIBCPP_HIDE_FROM_ABI set(set&& __s) = default;
_LIBCPP_HIDE_FROM_ABI set(set&& __s) noexcept(is_nothrow_move_constructible<__base>::value) = default;
# endif // _LIBCPP_CXX03_LANG
_LIBCPP_HIDE_FROM_ABI explicit set(const allocator_type& __a) : __tree_(__a) {}
@ -699,7 +699,10 @@ public:
return *this;
}
_LIBCPP_HIDE_FROM_ABI set& operator=(set&& __s) = default;
_LIBCPP_HIDE_FROM_ABI set& operator=(set&& __s) noexcept(is_nothrow_move_assignable<__base>::value) {
__tree_ = std::move(__s.__tree_);
return *this;
}
# endif // _LIBCPP_CXX03_LANG
_LIBCPP_HIDE_FROM_ABI ~set() { static_assert(sizeof(std::__diagnose_non_const_comparator<_Key, _Compare>()), ""); }
@ -1123,7 +1126,7 @@ public:
_LIBCPP_HIDE_FROM_ABI multiset& operator=(const multiset& __s) = default;
# ifndef _LIBCPP_CXX03_LANG
_LIBCPP_HIDE_FROM_ABI multiset(multiset&& __s) = default;
_LIBCPP_HIDE_FROM_ABI multiset(multiset&& __s) noexcept(is_nothrow_move_constructible<__base>::value) = default;
_LIBCPP_HIDE_FROM_ABI multiset(multiset&& __s, const allocator_type& __a);
# endif // _LIBCPP_CXX03_LANG
@ -1155,7 +1158,10 @@ public:
return *this;
}
_LIBCPP_HIDE_FROM_ABI multiset& operator=(multiset&& __s) = default;
_LIBCPP_HIDE_FROM_ABI multiset& operator=(multiset&& __s) _NOEXCEPT_(is_nothrow_move_assignable<__base>::value) {
__tree_ = std::move(__s.__tree_);
return *this;
}
# endif // _LIBCPP_CXX03_LANG
_LIBCPP_HIDE_FROM_ABI ~multiset() {

View File

@ -1049,7 +1049,8 @@ public:
_LIBCPP_HIDE_FROM_ABI unordered_map(const unordered_map& __u) = default;
_LIBCPP_HIDE_FROM_ABI unordered_map(const unordered_map& __u, const allocator_type& __a);
# ifndef _LIBCPP_CXX03_LANG
_LIBCPP_HIDE_FROM_ABI unordered_map(unordered_map&& __u) = default;
_LIBCPP_HIDE_FROM_ABI unordered_map(unordered_map&& __u)
_NOEXCEPT_(is_nothrow_move_constructible<__table>::value) = default;
_LIBCPP_HIDE_FROM_ABI unordered_map(unordered_map&& __u, const allocator_type& __a);
_LIBCPP_HIDE_FROM_ABI unordered_map(initializer_list<value_type> __il);
_LIBCPP_HIDE_FROM_ABI
@ -1101,7 +1102,8 @@ public:
_LIBCPP_HIDE_FROM_ABI unordered_map& operator=(const unordered_map& __u) = default;
# ifndef _LIBCPP_CXX03_LANG
_LIBCPP_HIDE_FROM_ABI unordered_map& operator=(unordered_map&& __u) = default;
_LIBCPP_HIDE_FROM_ABI unordered_map& operator=(unordered_map&& __u)
_NOEXCEPT_(is_nothrow_move_assignable<__table>::value) = default;
_LIBCPP_HIDE_FROM_ABI unordered_map& operator=(initializer_list<value_type> __il);
# endif // _LIBCPP_CXX03_LANG
@ -1821,7 +1823,8 @@ public:
_LIBCPP_HIDE_FROM_ABI unordered_multimap(const unordered_multimap& __u) = default;
_LIBCPP_HIDE_FROM_ABI unordered_multimap(const unordered_multimap& __u, const allocator_type& __a);
# ifndef _LIBCPP_CXX03_LANG
_LIBCPP_HIDE_FROM_ABI unordered_multimap(unordered_multimap&& __u) = default;
_LIBCPP_HIDE_FROM_ABI unordered_multimap(unordered_multimap&& __u)
_NOEXCEPT_(is_nothrow_move_constructible<__table>::value) = default;
_LIBCPP_HIDE_FROM_ABI unordered_multimap(unordered_multimap&& __u, const allocator_type& __a);
_LIBCPP_HIDE_FROM_ABI unordered_multimap(initializer_list<value_type> __il);
_LIBCPP_HIDE_FROM_ABI unordered_multimap(
@ -1873,7 +1876,8 @@ public:
_LIBCPP_HIDE_FROM_ABI unordered_multimap& operator=(const unordered_multimap& __u) = default;
# ifndef _LIBCPP_CXX03_LANG
_LIBCPP_HIDE_FROM_ABI unordered_multimap& operator=(unordered_multimap&& __u) = default;
_LIBCPP_HIDE_FROM_ABI unordered_multimap& operator=(unordered_multimap&& __u)
_NOEXCEPT_(is_nothrow_move_assignable<__table>::value) = default;
_LIBCPP_HIDE_FROM_ABI unordered_multimap& operator=(initializer_list<value_type> __il);
# endif // _LIBCPP_CXX03_LANG

View File

@ -706,7 +706,7 @@ public:
_LIBCPP_HIDE_FROM_ABI unordered_set(const unordered_set& __u) = default;
_LIBCPP_HIDE_FROM_ABI unordered_set(const unordered_set& __u, const allocator_type& __a);
# ifndef _LIBCPP_CXX03_LANG
_LIBCPP_HIDE_FROM_ABI unordered_set(unordered_set&& __u) = default;
_LIBCPP_HIDE_FROM_ABI unordered_set(unordered_set&& __u) _NOEXCEPT_(is_nothrow_move_constructible<__table>::value);
_LIBCPP_HIDE_FROM_ABI unordered_set(unordered_set&& __u, const allocator_type& __a);
_LIBCPP_HIDE_FROM_ABI unordered_set(initializer_list<value_type> __il);
_LIBCPP_HIDE_FROM_ABI
@ -735,7 +735,8 @@ public:
_LIBCPP_HIDE_FROM_ABI unordered_set& operator=(const unordered_set& __u) = default;
# ifndef _LIBCPP_CXX03_LANG
_LIBCPP_HIDE_FROM_ABI unordered_set& operator=(unordered_set&& __u) = default;
_LIBCPP_HIDE_FROM_ABI unordered_set& operator=(unordered_set&& __u)
_NOEXCEPT_(is_nothrow_move_assignable<__table>::value) = default;
_LIBCPP_HIDE_FROM_ABI unordered_set& operator=(initializer_list<value_type> __il);
# endif // _LIBCPP_CXX03_LANG
@ -1075,6 +1076,11 @@ unordered_set<_Value, _Hash, _Pred, _Alloc>::unordered_set(const unordered_set&
# ifndef _LIBCPP_CXX03_LANG
template <class _Value, class _Hash, class _Pred, class _Alloc>
inline unordered_set<_Value, _Hash, _Pred, _Alloc>::unordered_set(unordered_set&& __u)
_NOEXCEPT_(is_nothrow_move_constructible<__table>::value)
: __table_(std::move(__u.__table_)) {}
template <class _Value, class _Hash, class _Pred, class _Alloc>
unordered_set<_Value, _Hash, _Pred, _Alloc>::unordered_set(unordered_set&& __u, const allocator_type& __a)
: __table_(std::move(__u.__table_), __a) {
@ -1288,7 +1294,8 @@ public:
_LIBCPP_HIDE_FROM_ABI unordered_multiset(const unordered_multiset& __u) = default;
_LIBCPP_HIDE_FROM_ABI unordered_multiset(const unordered_multiset& __u, const allocator_type& __a);
# ifndef _LIBCPP_CXX03_LANG
_LIBCPP_HIDE_FROM_ABI unordered_multiset(unordered_multiset&& __u) = default;
_LIBCPP_HIDE_FROM_ABI unordered_multiset(unordered_multiset&& __u)
_NOEXCEPT_(is_nothrow_move_constructible<__table>::value);
_LIBCPP_HIDE_FROM_ABI unordered_multiset(unordered_multiset&& __u, const allocator_type& __a);
_LIBCPP_HIDE_FROM_ABI unordered_multiset(initializer_list<value_type> __il);
_LIBCPP_HIDE_FROM_ABI unordered_multiset(
@ -1317,7 +1324,8 @@ public:
_LIBCPP_HIDE_FROM_ABI unordered_multiset& operator=(const unordered_multiset& __u) = default;
# ifndef _LIBCPP_CXX03_LANG
_LIBCPP_HIDE_FROM_ABI unordered_multiset& operator=(unordered_multiset&& __u) = default;
_LIBCPP_HIDE_FROM_ABI unordered_multiset& operator=(unordered_multiset&& __u)
_NOEXCEPT_(is_nothrow_move_assignable<__table>::value) = default;
_LIBCPP_HIDE_FROM_ABI unordered_multiset& operator=(initializer_list<value_type> __il);
# endif // _LIBCPP_CXX03_LANG
@ -1667,6 +1675,11 @@ unordered_multiset<_Value, _Hash, _Pred, _Alloc>::unordered_multiset(
# ifndef _LIBCPP_CXX03_LANG
template <class _Value, class _Hash, class _Pred, class _Alloc>
inline unordered_multiset<_Value, _Hash, _Pred, _Alloc>::unordered_multiset(unordered_multiset&& __u)
_NOEXCEPT_(is_nothrow_move_constructible<__table>::value)
: __table_(std::move(__u.__table_)) {}
template <class _Value, class _Hash, class _Pred, class _Alloc>
unordered_multiset<_Value, _Hash, _Pred, _Alloc>::unordered_multiset(
unordered_multiset&& __u, const allocator_type& __a)

View File

@ -1399,7 +1399,6 @@ void SymbolTable::resolveAlternateNames() {
auto toUndef = dyn_cast<Undefined>(toSym);
if (toUndef && (!toUndef->weakAlias || toUndef->isAntiDep))
continue;
toSym->isUsedInRegularObj = true;
if (toSym->isLazy())
forceLazy(toSym);
u->setWeakAlias(toSym);

View File

@ -1,25 +0,0 @@
; REQUIRES: x86
; RUN: mkdir -p %t.dir
; RUN: llvm-as -o %t.obj %s
; RUN: lld-link -out:%t.dll -dll -noentry %t.obj -export:test
target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-windows-msvc19.33.0"
$alt = comdat any
@alt = weak_odr dso_local global i32 0, comdat, align 4
@ext = external dso_local global i32, align 4
; Function Attrs: noinline nounwind optnone uwtable
define dso_local i32 @test() #0 {
entry:
%0 = load i32, ptr @ext, align 4
ret i32 %0
}
attributes #0 = { noinline nounwind optnone uwtable }
!llvm.linker.options = !{!0}
!0 = !{!"/alternatename:ext=alt"}

View File

@ -347,9 +347,6 @@ Status Value::GetValueAsData(ExecutionContext *exe_ctx, DataExtractor &data,
else
data.SetAddressByteSize(sizeof(void *));
if (!type_size)
return Status::FromErrorString("type does not have a size");
uint32_t result_byte_size = *type_size;
if (m_value.GetData(data, result_byte_size))
return error; // Success;

View File

@ -15,7 +15,6 @@ add_lldb_unittest(LLDBCoreTests
SourceManagerTest.cpp
TelemetryTest.cpp
UniqueCStringMapTest.cpp
Value.cpp
LINK_COMPONENTS
Support

View File

@ -1,39 +0,0 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "lldb/Core/Value.h"
#include "Plugins/Platform/MacOSX/PlatformMacOSX.h"
#include "Plugins/TypeSystem/Clang/TypeSystemClang.h"
#include "TestingSupport/SubsystemRAII.h"
#include "TestingSupport/Symbol/ClangTestUtils.h"
#include "lldb/Utility/DataExtractor.h"
#include "gtest/gtest.h"
using namespace lldb_private;
using namespace lldb_private::clang_utils;
TEST(ValueTest, GetValueAsData) {
SubsystemRAII<FileSystem, HostInfo, PlatformMacOSX> subsystems;
auto holder = std::make_unique<clang_utils::TypeSystemClangHolder>("test");
auto *clang = holder->GetAST();
Value v(Scalar(42));
DataExtractor extractor;
// no compiler type
Status status = v.GetValueAsData(nullptr, extractor, nullptr);
ASSERT_TRUE(status.Fail());
// with compiler type
v.SetCompilerType(clang->GetBasicType(lldb::BasicType::eBasicTypeChar));
status = v.GetValueAsData(nullptr, extractor, nullptr);
ASSERT_TRUE(status.Success());
}

View File

@ -268,6 +268,7 @@ public:
CmpArithIntrinsic, // Use a target-specific intrinsic for special compare
// operations; used by X86.
Expand, // Generic expansion in terms of other atomic operations.
CustomExpand, // Custom target-specific expansion using TLI hooks.
// Rewrite to a non-atomic form for use in a known non-preemptible
// environment.
@ -2273,6 +2274,18 @@ public:
"Generic atomicrmw expansion unimplemented on this target");
}
/// Perform a atomic store using a target-specific way.
virtual void emitExpandAtomicStore(StoreInst *SI) const {
llvm_unreachable(
"Generic atomic store expansion unimplemented on this target");
}
/// Perform a atomic load using a target-specific way.
virtual void emitExpandAtomicLoad(LoadInst *LI) const {
llvm_unreachable(
"Generic atomic load expansion unimplemented on this target");
}
/// Perform a cmpxchg expansion using a target-specific method.
virtual void emitExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) const {
llvm_unreachable("Generic cmpxchg expansion unimplemented on this target");
@ -2377,8 +2390,8 @@ public:
}
/// Returns how the given (atomic) store should be expanded by the IR-level
/// AtomicExpand pass into. For instance AtomicExpansionKind::Expand will try
/// to use an atomicrmw xchg.
/// AtomicExpand pass into. For instance AtomicExpansionKind::CustomExpand
/// will try to use an atomicrmw xchg.
virtual AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const {
return AtomicExpansionKind::None;
}

View File

@ -84,7 +84,7 @@ private:
bool expandAtomicLoadToCmpXchg(LoadInst *LI);
StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI);
bool tryExpandAtomicStore(StoreInst *SI);
void expandAtomicStore(StoreInst *SI);
void expandAtomicStoreToXChg(StoreInst *SI);
bool tryExpandAtomicRMW(AtomicRMWInst *AI);
AtomicRMWInst *convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI);
Value *
@ -537,6 +537,9 @@ bool AtomicExpandImpl::tryExpandAtomicLoad(LoadInst *LI) {
case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
LI->setAtomic(AtomicOrdering::NotAtomic);
return true;
case TargetLoweringBase::AtomicExpansionKind::CustomExpand:
TLI->emitExpandAtomicLoad(LI);
return true;
default:
llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
}
@ -546,8 +549,11 @@ bool AtomicExpandImpl::tryExpandAtomicStore(StoreInst *SI) {
switch (TLI->shouldExpandAtomicStoreInIR(SI)) {
case TargetLoweringBase::AtomicExpansionKind::None:
return false;
case TargetLoweringBase::AtomicExpansionKind::CustomExpand:
TLI->emitExpandAtomicStore(SI);
return true;
case TargetLoweringBase::AtomicExpansionKind::Expand:
expandAtomicStore(SI);
expandAtomicStoreToXChg(SI);
return true;
case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
SI->setAtomic(AtomicOrdering::NotAtomic);
@ -620,7 +626,7 @@ StoreInst *AtomicExpandImpl::convertAtomicStoreToIntegerType(StoreInst *SI) {
return NewSI;
}
void AtomicExpandImpl::expandAtomicStore(StoreInst *SI) {
void AtomicExpandImpl::expandAtomicStoreToXChg(StoreInst *SI) {
// This function is only called on atomic stores that are too large to be
// atomic if implemented as a native store. So we replace them by an
// atomic swap, that can be implemented for example as a ldrex/strex on ARM
@ -741,7 +747,7 @@ bool AtomicExpandImpl::tryExpandAtomicRMW(AtomicRMWInst *AI) {
}
case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
return lowerAtomicRMWInst(AI);
case TargetLoweringBase::AtomicExpansionKind::Expand:
case TargetLoweringBase::AtomicExpansionKind::CustomExpand:
TLI->emitExpandAtomicRMW(AI);
return true;
default:
@ -1695,7 +1701,7 @@ bool AtomicExpandImpl::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
return true;
case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
return lowerAtomicCmpXchgInst(CI);
case TargetLoweringBase::AtomicExpansionKind::Expand: {
case TargetLoweringBase::AtomicExpansionKind::CustomExpand: {
TLI->emitExpandAtomicCmpXchg(CI);
return true;
}

View File

@ -18983,9 +18983,7 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
// single-step fp_round we want to fold to.
// In other words, double rounding isn't the same as rounding.
// Also, this is a value preserving truncation iff both fp_round's are.
if ((N->getFlags().hasAllowContract() &&
N0->getFlags().hasAllowContract()) ||
N0IsTrunc)
if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc)
return DAG.getNode(
ISD::FP_ROUND, DL, VT, N0.getOperand(0),
DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL, /*isTarget=*/true));

View File

@ -18,7 +18,6 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/UniformityAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/TargetPassConfig.h"
@ -27,7 +26,6 @@
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/KnownFPClass.h"

View File

@ -27,7 +27,6 @@
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/ErrorHandling.h"
#ifdef EXPENSIVE_CHECKS

View File

@ -6135,19 +6135,6 @@ unsigned AMDGPUTargetLowering::computeNumSignBitsForTargetInstr(
}
}
bool AMDGPUTargetLowering::canCreateUndefOrPoisonForTargetNode(
SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
unsigned Opcode = Op.getOpcode();
switch (Opcode) {
case AMDGPUISD::BFE_I32:
case AMDGPUISD::BFE_U32:
return false;
}
return TargetLowering::canCreateUndefOrPoisonForTargetNode(
Op, DemandedElts, DAG, PoisonOnly, ConsiderFlags, Depth);
}
bool AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(
SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool SNaN,
unsigned Depth) const {

View File

@ -323,12 +323,6 @@ public:
const MachineRegisterInfo &MRI,
unsigned Depth = 0) const override;
bool canCreateUndefOrPoisonForTargetNode(SDValue Op,
const APInt &DemandedElts,
const SelectionDAG &DAG,
bool PoisonOnly, bool ConsiderFlags,
unsigned Depth) const override;
bool isKnownNeverNaNForTargetNode(SDValue Op, const APInt &DemandedElts,
const SelectionDAG &DAG, bool SNaN = false,
unsigned Depth = 0) const override;

View File

@ -21,7 +21,6 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Transforms/Utils/Local.h"

View File

@ -225,7 +225,6 @@
#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/InstSimplifyFolder.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/Utils/Local.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/AttributeMask.h"
@ -244,7 +243,6 @@
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/ReplaceConstant.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/AMDGPUAddrSpace.h"
#include "llvm/Support/Alignment.h"

View File

@ -190,14 +190,12 @@
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/ReplaceConstant.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"

View File

@ -22,6 +22,7 @@
namespace llvm {
class AsmPrinter;
class MCContext;
} // namespace llvm
class AMDGPUMCInstLower {
MCContext &Ctx;
@ -65,5 +66,4 @@ static inline const MCExpr *lowerAddrSpaceCast(const TargetMachine &TM,
return nullptr;
}
} // namespace
} // namespace llvm
#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUMCINSTLOWER_H

View File

@ -90,7 +90,6 @@
#include "llvm/IR/PatternMatch.h"
#include "llvm/InitializePasses.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Passes/CodeGenPassBuilder.h"
#include "llvm/Passes/PassBuilder.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/FormatVariadic.h"
@ -126,44 +125,6 @@ using namespace llvm;
using namespace llvm::PatternMatch;
namespace {
//===----------------------------------------------------------------------===//
// AMDGPU CodeGen Pass Builder interface.
//===----------------------------------------------------------------------===//
class AMDGPUCodeGenPassBuilder
: public CodeGenPassBuilder<AMDGPUCodeGenPassBuilder, GCNTargetMachine> {
using Base = CodeGenPassBuilder<AMDGPUCodeGenPassBuilder, GCNTargetMachine>;
public:
AMDGPUCodeGenPassBuilder(GCNTargetMachine &TM,
const CGPassBuilderOption &Opts,
PassInstrumentationCallbacks *PIC);
void addIRPasses(AddIRPass &) const;
void addCodeGenPrepare(AddIRPass &) const;
void addPreISel(AddIRPass &addPass) const;
void addILPOpts(AddMachinePass &) const;
void addAsmPrinter(AddMachinePass &, CreateMCStreamer) const;
Error addInstSelector(AddMachinePass &) const;
void addPreRewrite(AddMachinePass &) const;
void addMachineSSAOptimization(AddMachinePass &) const;
void addPostRegAlloc(AddMachinePass &) const;
void addPreEmitPass(AddMachinePass &) const;
void addPreEmitRegAlloc(AddMachinePass &) const;
Error addRegAssignmentOptimized(AddMachinePass &) const;
void addPreRegAlloc(AddMachinePass &) const;
void addOptimizedRegAlloc(AddMachinePass &) const;
void addPreSched2(AddMachinePass &) const;
/// Check if a pass is enabled given \p Opt option. The option always
/// overrides defaults if explicitly used. Otherwise its default will be used
/// given that a pass shall work at an optimization \p Level minimum.
bool isPassEnabled(const cl::opt<bool> &Opt,
CodeGenOptLevel Level = CodeGenOptLevel::Default) const;
void addEarlyCSEOrGVNPass(AddIRPass &) const;
void addStraightLineScalarOptimizationPasses(AddIRPass &) const;
};
class SGPRRegisterRegAlloc : public RegisterRegAllocBase<SGPRRegisterRegAlloc> {
public:
SGPRRegisterRegAlloc(const char *N, const char *D, FunctionPassCtor C)

View File

@ -18,6 +18,7 @@
#include "llvm/CodeGen/CodeGenTargetMachineImpl.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/Passes/CodeGenPassBuilder.h"
#include <optional>
#include <utility>
@ -157,6 +158,44 @@ public:
}
};
//===----------------------------------------------------------------------===//
// AMDGPU CodeGen Pass Builder interface.
//===----------------------------------------------------------------------===//
class AMDGPUCodeGenPassBuilder
: public CodeGenPassBuilder<AMDGPUCodeGenPassBuilder, GCNTargetMachine> {
using Base = CodeGenPassBuilder<AMDGPUCodeGenPassBuilder, GCNTargetMachine>;
public:
AMDGPUCodeGenPassBuilder(GCNTargetMachine &TM,
const CGPassBuilderOption &Opts,
PassInstrumentationCallbacks *PIC);
void addIRPasses(AddIRPass &) const;
void addCodeGenPrepare(AddIRPass &) const;
void addPreISel(AddIRPass &addPass) const;
void addILPOpts(AddMachinePass &) const;
void addAsmPrinter(AddMachinePass &, CreateMCStreamer) const;
Error addInstSelector(AddMachinePass &) const;
void addPreRewrite(AddMachinePass &) const;
void addMachineSSAOptimization(AddMachinePass &) const;
void addPostRegAlloc(AddMachinePass &) const;
void addPreEmitPass(AddMachinePass &) const;
void addPreEmitRegAlloc(AddMachinePass &) const;
Error addRegAssignmentOptimized(AddMachinePass &) const;
void addPreRegAlloc(AddMachinePass &) const;
void addOptimizedRegAlloc(AddMachinePass &) const;
void addPreSched2(AddMachinePass &) const;
/// Check if a pass is enabled given \p Opt option. The option always
/// overrides defaults if explicitly used. Otherwise its default will be used
/// given that a pass shall work at an optimization \p Level minimum.
bool isPassEnabled(const cl::opt<bool> &Opt,
CodeGenOptLevel Level = CodeGenOptLevel::Default) const;
void addEarlyCSEOrGVNPass(AddIRPass &) const;
void addStraightLineScalarOptimizationPasses(AddIRPass &) const;
};
} // end namespace llvm
#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETMACHINE_H

View File

@ -21,7 +21,6 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsR600.h"
#include "llvm/Passes/CodeGenPassBuilder.h"
using namespace llvm;

View File

@ -20,8 +20,6 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
using namespace llvm;
namespace {
class R600MCInstLower : public AMDGPUMCInstLower {
public:

View File

@ -19,7 +19,6 @@
#include "R600MachineFunctionInfo.h"
#include "R600MachineScheduler.h"
#include "R600TargetTransformInfo.h"
#include "llvm/Passes/CodeGenPassBuilder.h"
#include "llvm/Transforms/Scalar.h"
#include <optional>
@ -47,21 +46,6 @@ static MachineSchedRegistry R600SchedRegistry("r600",
"Run R600's custom scheduler",
createR600MachineScheduler);
//===----------------------------------------------------------------------===//
// R600 CodeGen Pass Builder interface.
//===----------------------------------------------------------------------===//
class R600CodeGenPassBuilder
: public CodeGenPassBuilder<R600CodeGenPassBuilder, R600TargetMachine> {
public:
R600CodeGenPassBuilder(R600TargetMachine &TM, const CGPassBuilderOption &Opts,
PassInstrumentationCallbacks *PIC);
void addPreISel(AddIRPass &addPass) const;
void addAsmPrinter(AddMachinePass &, CreateMCStreamer) const;
Error addInstSelector(AddMachinePass &) const;
};
//===----------------------------------------------------------------------===//
// R600 Target Machine (R600 -> Cayman)
//===----------------------------------------------------------------------===//

View File

@ -57,6 +57,21 @@ public:
createMachineScheduler(MachineSchedContext *C) const override;
};
//===----------------------------------------------------------------------===//
// R600 CodeGen Pass Builder interface.
//===----------------------------------------------------------------------===//
class R600CodeGenPassBuilder
: public CodeGenPassBuilder<R600CodeGenPassBuilder, R600TargetMachine> {
public:
R600CodeGenPassBuilder(R600TargetMachine &TM, const CGPassBuilderOption &Opts,
PassInstrumentationCallbacks *PIC);
void addPreISel(AddIRPass &addPass) const;
void addAsmPrinter(AddMachinePass &, CreateMCStreamer) const;
Error addInstSelector(AddMachinePass &) const;
};
} // end namespace llvm
#endif // LLVM_LIB_TARGET_AMDGPU_R600TARGETMACHINE_H

View File

@ -16,14 +16,12 @@
#include "GCNSubtarget.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/UniformityAnalysis.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/InitializePasses.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"

View File

@ -17808,11 +17808,19 @@ static bool flatInstrMayAccessPrivate(const Instruction *I) {
!AMDGPU::hasValueInRangeLikeMetadata(*MD, AMDGPUAS::PRIVATE_ADDRESS);
}
static TargetLowering::AtomicExpansionKind
getPrivateAtomicExpansionKind(const GCNSubtarget &STI) {
// For GAS, lower to flat atomic.
return STI.hasGloballyAddressableScratch()
? TargetLowering::AtomicExpansionKind::Expand
: TargetLowering::AtomicExpansionKind::NotAtomic;
}
TargetLowering::AtomicExpansionKind
SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
unsigned AS = RMW->getPointerAddressSpace();
if (AS == AMDGPUAS::PRIVATE_ADDRESS)
return AtomicExpansionKind::NotAtomic;
return getPrivateAtomicExpansionKind(*getSubtarget());
// 64-bit flat atomics that dynamically reside in private memory will silently
// be dropped.
@ -17823,7 +17831,7 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
if (AS == AMDGPUAS::FLAT_ADDRESS &&
DL.getTypeSizeInBits(RMW->getType()) == 64 &&
flatInstrMayAccessPrivate(RMW))
return AtomicExpansionKind::Expand;
return AtomicExpansionKind::CustomExpand;
auto ReportUnsafeHWInst = [=](TargetLowering::AtomicExpansionKind Kind) {
OptimizationRemarkEmitter ORE(RMW->getFunction());
@ -17898,7 +17906,7 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
// does. InstCombine transforms these with 0 to or, so undo that.
if (Constant *ConstVal = dyn_cast<Constant>(RMW->getValOperand());
ConstVal && ConstVal->isNullValue())
return AtomicExpansionKind::Expand;
return AtomicExpansionKind::CustomExpand;
}
// If the allocation could be in remote, fine-grained memory, the rmw
@ -18027,9 +18035,9 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
// fadd.
if (Subtarget->hasLDSFPAtomicAddF32()) {
if (RMW->use_empty() && Subtarget->hasAtomicFaddNoRtnInsts())
return AtomicExpansionKind::Expand;
return AtomicExpansionKind::CustomExpand;
if (!RMW->use_empty() && Subtarget->hasAtomicFaddRtnInsts())
return AtomicExpansionKind::Expand;
return AtomicExpansionKind::CustomExpand;
}
}
}
@ -18083,14 +18091,14 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
TargetLowering::AtomicExpansionKind
SITargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
return LI->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS
? AtomicExpansionKind::NotAtomic
? getPrivateAtomicExpansionKind(*getSubtarget())
: AtomicExpansionKind::None;
}
TargetLowering::AtomicExpansionKind
SITargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
return SI->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS
? AtomicExpansionKind::NotAtomic
? getPrivateAtomicExpansionKind(*getSubtarget())
: AtomicExpansionKind::None;
}
@ -18098,7 +18106,7 @@ TargetLowering::AtomicExpansionKind
SITargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CmpX) const {
unsigned AddrSpace = CmpX->getPointerAddressSpace();
if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS)
return AtomicExpansionKind::NotAtomic;
return getPrivateAtomicExpansionKind(*getSubtarget());
if (AddrSpace != AMDGPUAS::FLAT_ADDRESS || !flatInstrMayAccessPrivate(CmpX))
return AtomicExpansionKind::None;
@ -18109,7 +18117,7 @@ SITargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CmpX) const {
// If a 64-bit flat atomic may alias private, we need to avoid using the
// atomic in the private case.
return DL.getTypeSizeInBits(ValTy) == 64 ? AtomicExpansionKind::Expand
return DL.getTypeSizeInBits(ValTy) == 64 ? AtomicExpansionKind::CustomExpand
: AtomicExpansionKind::None;
}
@ -18468,9 +18476,24 @@ void SITargetLowering::emitExpandAtomicAddrSpacePredicate(
Builder.CreateBr(ExitBB);
}
static void convertScratchAtomicToFlatAtomic(Instruction *I,
unsigned PtrOpIdx) {
Value *PtrOp = I->getOperand(PtrOpIdx);
assert(PtrOp->getType()->getPointerAddressSpace() ==
AMDGPUAS::PRIVATE_ADDRESS);
Type *FlatPtr = PointerType::get(I->getContext(), AMDGPUAS::FLAT_ADDRESS);
Value *ASCast = CastInst::CreatePointerCast(PtrOp, FlatPtr, "scratch.ascast",
I->getIterator());
I->setOperand(PtrOpIdx, ASCast);
}
void SITargetLowering::emitExpandAtomicRMW(AtomicRMWInst *AI) const {
AtomicRMWInst::BinOp Op = AI->getOperation();
if (AI->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS)
return convertScratchAtomicToFlatAtomic(AI, AI->getPointerOperandIndex());
if (Op == AtomicRMWInst::Sub || Op == AtomicRMWInst::Or ||
Op == AtomicRMWInst::Xor) {
if (const auto *ConstVal = dyn_cast<Constant>(AI->getValOperand());
@ -18493,9 +18516,28 @@ void SITargetLowering::emitExpandAtomicRMW(AtomicRMWInst *AI) const {
}
void SITargetLowering::emitExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) const {
if (CI->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS)
return convertScratchAtomicToFlatAtomic(CI, CI->getPointerOperandIndex());
emitExpandAtomicAddrSpacePredicate(CI);
}
void SITargetLowering::emitExpandAtomicLoad(LoadInst *LI) const {
if (LI->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS)
return convertScratchAtomicToFlatAtomic(LI, LI->getPointerOperandIndex());
llvm_unreachable(
"Expand Atomic Load only handles SCRATCH -> FLAT conversion");
}
void SITargetLowering::emitExpandAtomicStore(StoreInst *SI) const {
if (SI->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS)
return convertScratchAtomicToFlatAtomic(SI, SI->getPointerOperandIndex());
llvm_unreachable(
"Expand Atomic Store only handles SCRATCH -> FLAT conversion");
}
LoadInst *
SITargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
IRBuilder<> Builder(AI);

View File

@ -562,6 +562,8 @@ public:
void emitExpandAtomicAddrSpacePredicate(Instruction *AI) const;
void emitExpandAtomicRMW(AtomicRMWInst *AI) const override;
void emitExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) const override;
void emitExpandAtomicLoad(LoadInst *LI) const override;
void emitExpandAtomicStore(StoreInst *SI) const override;
LoadInst *
lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;

View File

@ -32,15 +32,11 @@
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/Sequence.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachinePassManager.h"
#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/IR/Dominators.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/DebugCounter.h"
#include "llvm/TargetParser/TargetParser.h"
using namespace llvm;
#define DEBUG_TYPE "si-insert-waitcnts"

View File

@ -17,7 +17,6 @@
#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachinePassManager.h"
#include "llvm/InitializePasses.h"
using namespace llvm;

View File

@ -24,7 +24,6 @@
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/InitializePasses.h"
using namespace llvm;

View File

@ -80,7 +80,6 @@
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/IR/Dominators.h"
using namespace llvm;

View File

@ -23,7 +23,6 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/InitializePasses.h"
using namespace llvm;

View File

@ -7893,7 +7893,7 @@ LoongArchTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
if (Size < 32 && (AI->getOperation() == AtomicRMWInst::And ||
AI->getOperation() == AtomicRMWInst::Or ||
AI->getOperation() == AtomicRMWInst::Xor))
return AtomicExpansionKind::Expand;
return AtomicExpansionKind::CustomExpand;
if (AI->getOperation() == AtomicRMWInst::Nand || Size < 32)
return AtomicExpansionKind::CmpXChg;
}

View File

@ -2073,23 +2073,15 @@ Instruction *InstCombinerImpl::visitIntToPtr(IntToPtrInst &CI) {
}
// Replace (inttoptr (add (ptrtoint %Base), %Offset)) with
// (getelementptr i8, %Base, %Offset) if the pointer is only used as integer
// value.
// (getelementptr i8, %Base, %Offset) if all users are ICmps.
Value *Base;
Value *Offset;
auto UsesPointerAsInt = [](User *U) {
if (isa<ICmpInst, PtrToIntInst>(U))
return true;
if (auto *P = dyn_cast<PHINode>(U))
return P->hasOneUse() && isa<ICmpInst, PtrToIntInst>(*P->user_begin());
return false;
};
if (match(CI.getOperand(0),
m_OneUse(m_c_Add(m_PtrToIntSameSize(DL, m_Value(Base)),
m_Value(Offset)))) &&
CI.getType()->getPointerAddressSpace() ==
Base->getType()->getPointerAddressSpace() &&
all_of(CI.users(), UsesPointerAsInt)) {
all_of(CI.users(), IsaPred<ICmpInst>)) {
return GetElementPtrInst::Create(Builder.getInt8Ty(), Base, Offset);
}

View File

@ -642,13 +642,6 @@ Value *ConstantOffsetExtractor::applyExts(Value *V) {
Instruction *Ext = I->clone();
Ext->setOperand(0, Current);
// In ConstantOffsetExtractor::find we do not analyze nuw/nsw for trunc, so
// we assume that it is ok to redistribute trunc over add/sub/or. But for
// example (add (trunc nuw A), (trunc nuw B)) is more poisonous than (trunc
// nuw (add A, B))). To make such redistributions legal we drop all the
// poison generating flags from cloned trunc instructions here.
if (isa<TruncInst>(Ext))
Ext->dropPoisonGeneratingFlags();
Ext->insertBefore(*IP->getParent(), IP);
Current = Ext;
}

View File

@ -153,7 +153,11 @@ template <typename LTy, typename RTy> struct match_combine_or {
match_combine_or(const LTy &Left, const RTy &Right) : L(Left), R(Right) {}
template <typename ITy> bool match(ITy *V) const {
return L.match(V) || R.match(V);
if (L.match(V))
return true;
if (R.match(V))
return true;
return false;
}
};

View File

@ -344,7 +344,7 @@ define amdgpu_ps void @fptrunc_f32_to_bf16(float %a, ptr %out) {
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
; GFX1250-NEXT: flat_store_b16 v[2:3], v0 scope:SCOPE_SE
; GFX1250-NEXT: flat_store_b16 v[2:3], v0
; GFX1250-NEXT: s_endpgm
entry:
%a.cvt = fptrunc float %a to bfloat
@ -380,7 +380,7 @@ define amdgpu_ps void @fptrunc_f32_to_bf16_abs(float %a, ptr %out) {
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, |v0|, s0
; GFX1250-NEXT: flat_store_b16 v[2:3], v0 scope:SCOPE_SE
; GFX1250-NEXT: flat_store_b16 v[2:3], v0
; GFX1250-NEXT: s_endpgm
entry:
%a.abs = call float @llvm.fabs.f32(float %a)
@ -417,7 +417,7 @@ define amdgpu_ps void @fptrunc_f32_to_bf16_neg(float %a, ptr %out) {
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, -v0, s0
; GFX1250-NEXT: flat_store_b16 v[2:3], v0 scope:SCOPE_SE
; GFX1250-NEXT: flat_store_b16 v[2:3], v0
; GFX1250-NEXT: s_endpgm
entry:
%a.neg = fneg float %a
@ -480,7 +480,7 @@ define amdgpu_ps void @fptrunc_f64_to_bf16(double %a, ptr %out) {
; GFX1250-NEXT: s_or_b32 vcc_lo, vcc_lo, s0
; GFX1250-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo
; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
; GFX1250-NEXT: flat_store_b16 v[2:3], v0 scope:SCOPE_SE
; GFX1250-NEXT: flat_store_b16 v[2:3], v0
; GFX1250-NEXT: s_endpgm
entry:
%a.cvt = fptrunc double %a to bfloat
@ -543,7 +543,7 @@ define amdgpu_ps void @fptrunc_f64_to_bf16_neg(double %a, ptr %out) {
; GFX1250-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
; GFX1250-NEXT: flat_store_b16 v[2:3], v0 scope:SCOPE_SE
; GFX1250-NEXT: flat_store_b16 v[2:3], v0
; GFX1250-NEXT: s_endpgm
entry:
%a.neg = fneg double %a
@ -607,7 +607,7 @@ define amdgpu_ps void @fptrunc_f64_to_bf16_abs(double %a, ptr %out) {
; GFX1250-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
; GFX1250-NEXT: flat_store_b16 v[2:3], v0 scope:SCOPE_SE
; GFX1250-NEXT: flat_store_b16 v[2:3], v0
; GFX1250-NEXT: s_endpgm
entry:
%a.abs = call double @llvm.fabs.f64(double %a)

View File

@ -1582,22 +1582,28 @@ define amdgpu_kernel void @frem_f64(ptr addrspace(1) %out, ptr addrspace(1) %in1
; SI-NEXT: s_nop 1
; SI-NEXT: v_div_fmas_f64 v[4:5], v[12:13], v[6:7], v[10:11]
; SI-NEXT: v_div_fixup_f64 v[4:5], v[4:5], v[2:3], v[0:1]
; SI-NEXT: v_readfirstlane_b32 s0, v4
; SI-NEXT: v_readfirstlane_b32 s1, v5
; SI-NEXT: s_bfe_u32 s2, s1, 0xb0014
; SI-NEXT: s_add_i32 s8, s2, 0xfffffc01
; SI-NEXT: s_mov_b32 s3, 0xfffff
; SI-NEXT: s_mov_b32 s2, s6
; SI-NEXT: s_lshr_b64 s[2:3], s[2:3], s8
; SI-NEXT: s_andn2_b64 s[2:3], s[0:1], s[2:3]
; SI-NEXT: s_and_b32 s9, s1, 0x80000000
; SI-NEXT: s_cmp_lt_i32 s8, 0
; SI-NEXT: s_cselect_b32 s2, 0, s2
; SI-NEXT: s_cselect_b32 s3, s9, s3
; SI-NEXT: s_cmp_gt_i32 s8, 51
; SI-NEXT: s_cselect_b32 s1, s1, s3
; SI-NEXT: s_cselect_b32 s0, s0, s2
; SI-NEXT: v_fma_f64 v[0:1], -s[0:1], v[2:3], v[0:1]
; SI-NEXT: v_readfirstlane_b32 s2, v5
; SI-NEXT: s_bfe_u32 s0, s2, 0xb0014
; SI-NEXT: s_add_i32 s3, s0, 0xfffffc01
; SI-NEXT: s_mov_b32 s1, 0xfffff
; SI-NEXT: s_mov_b32 s0, s6
; SI-NEXT: s_lshr_b64 s[0:1], s[0:1], s3
; SI-NEXT: v_not_b32_e32 v6, s0
; SI-NEXT: v_and_b32_e32 v6, v4, v6
; SI-NEXT: v_not_b32_e32 v7, s1
; SI-NEXT: v_and_b32_e32 v5, v5, v7
; SI-NEXT: s_and_b32 s0, s2, 0x80000000
; SI-NEXT: s_cmp_lt_i32 s3, 0
; SI-NEXT: s_cselect_b64 vcc, -1, 0
; SI-NEXT: v_cndmask_b32_e64 v6, v6, 0, vcc
; SI-NEXT: v_mov_b32_e32 v7, s0
; SI-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc
; SI-NEXT: s_cmp_gt_i32 s3, 51
; SI-NEXT: s_cselect_b64 vcc, -1, 0
; SI-NEXT: v_mov_b32_e32 v7, s2
; SI-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc
; SI-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc
; SI-NEXT: v_fma_f64 v[0:1], -v[4:5], v[2:3], v[0:1]
; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; SI-NEXT: s_endpgm
;
@ -1853,22 +1859,28 @@ define amdgpu_kernel void @fast_frem_f64(ptr addrspace(1) %out, ptr addrspace(1)
; SI-NEXT: v_mul_f64 v[6:7], v[0:1], v[4:5]
; SI-NEXT: v_fma_f64 v[8:9], -v[2:3], v[6:7], v[0:1]
; SI-NEXT: v_fma_f64 v[4:5], v[8:9], v[4:5], v[6:7]
; SI-NEXT: v_readfirstlane_b32 s4, v4
; SI-NEXT: v_readfirstlane_b32 s5, v5
; SI-NEXT: s_bfe_u32 s6, s5, 0xb0014
; SI-NEXT: s_add_i32 s8, s6, 0xfffffc01
; SI-NEXT: s_mov_b32 s7, 0xfffff
; SI-NEXT: s_mov_b32 s6, s2
; SI-NEXT: s_lshr_b64 s[6:7], s[6:7], s8
; SI-NEXT: s_andn2_b64 s[6:7], s[4:5], s[6:7]
; SI-NEXT: s_and_b32 s9, s5, 0x80000000
; SI-NEXT: s_cmp_lt_i32 s8, 0
; SI-NEXT: s_cselect_b32 s6, 0, s6
; SI-NEXT: s_cselect_b32 s7, s9, s7
; SI-NEXT: s_cmp_gt_i32 s8, 51
; SI-NEXT: s_cselect_b32 s5, s5, s7
; SI-NEXT: s_cselect_b32 s4, s4, s6
; SI-NEXT: v_fma_f64 v[0:1], -s[4:5], v[2:3], v[0:1]
; SI-NEXT: v_readfirstlane_b32 s6, v5
; SI-NEXT: s_bfe_u32 s4, s6, 0xb0014
; SI-NEXT: s_add_i32 s7, s4, 0xfffffc01
; SI-NEXT: s_mov_b32 s5, 0xfffff
; SI-NEXT: s_mov_b32 s4, s2
; SI-NEXT: s_lshr_b64 s[4:5], s[4:5], s7
; SI-NEXT: v_not_b32_e32 v6, s4
; SI-NEXT: v_and_b32_e32 v6, v4, v6
; SI-NEXT: v_not_b32_e32 v7, s5
; SI-NEXT: v_and_b32_e32 v5, v5, v7
; SI-NEXT: s_and_b32 s4, s6, 0x80000000
; SI-NEXT: s_cmp_lt_i32 s7, 0
; SI-NEXT: s_cselect_b64 vcc, -1, 0
; SI-NEXT: v_cndmask_b32_e64 v6, v6, 0, vcc
; SI-NEXT: v_mov_b32_e32 v7, s4
; SI-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc
; SI-NEXT: s_cmp_gt_i32 s7, 51
; SI-NEXT: s_cselect_b64 vcc, -1, 0
; SI-NEXT: v_mov_b32_e32 v7, s6
; SI-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc
; SI-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc
; SI-NEXT: v_fma_f64 v[0:1], -v[4:5], v[2:3], v[0:1]
; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; SI-NEXT: s_endpgm
;
@ -2097,22 +2109,28 @@ define amdgpu_kernel void @unsafe_frem_f64(ptr addrspace(1) %out, ptr addrspace(
; SI-NEXT: v_mul_f64 v[6:7], v[0:1], v[4:5]
; SI-NEXT: v_fma_f64 v[8:9], -v[2:3], v[6:7], v[0:1]
; SI-NEXT: v_fma_f64 v[4:5], v[8:9], v[4:5], v[6:7]
; SI-NEXT: v_readfirstlane_b32 s4, v4
; SI-NEXT: v_readfirstlane_b32 s5, v5
; SI-NEXT: s_bfe_u32 s6, s5, 0xb0014
; SI-NEXT: s_add_i32 s8, s6, 0xfffffc01
; SI-NEXT: s_mov_b32 s7, 0xfffff
; SI-NEXT: s_mov_b32 s6, s2
; SI-NEXT: s_lshr_b64 s[6:7], s[6:7], s8
; SI-NEXT: s_andn2_b64 s[6:7], s[4:5], s[6:7]
; SI-NEXT: s_and_b32 s9, s5, 0x80000000
; SI-NEXT: s_cmp_lt_i32 s8, 0
; SI-NEXT: s_cselect_b32 s6, 0, s6
; SI-NEXT: s_cselect_b32 s7, s9, s7
; SI-NEXT: s_cmp_gt_i32 s8, 51
; SI-NEXT: s_cselect_b32 s5, s5, s7
; SI-NEXT: s_cselect_b32 s4, s4, s6
; SI-NEXT: v_fma_f64 v[0:1], -s[4:5], v[2:3], v[0:1]
; SI-NEXT: v_readfirstlane_b32 s6, v5
; SI-NEXT: s_bfe_u32 s4, s6, 0xb0014
; SI-NEXT: s_add_i32 s7, s4, 0xfffffc01
; SI-NEXT: s_mov_b32 s5, 0xfffff
; SI-NEXT: s_mov_b32 s4, s2
; SI-NEXT: s_lshr_b64 s[4:5], s[4:5], s7
; SI-NEXT: v_not_b32_e32 v6, s4
; SI-NEXT: v_and_b32_e32 v6, v4, v6
; SI-NEXT: v_not_b32_e32 v7, s5
; SI-NEXT: v_and_b32_e32 v5, v5, v7
; SI-NEXT: s_and_b32 s4, s6, 0x80000000
; SI-NEXT: s_cmp_lt_i32 s7, 0
; SI-NEXT: s_cselect_b64 vcc, -1, 0
; SI-NEXT: v_cndmask_b32_e64 v6, v6, 0, vcc
; SI-NEXT: v_mov_b32_e32 v7, s4
; SI-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc
; SI-NEXT: s_cmp_gt_i32 s7, 51
; SI-NEXT: s_cselect_b64 vcc, -1, 0
; SI-NEXT: v_mov_b32_e32 v7, s6
; SI-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc
; SI-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc
; SI-NEXT: v_fma_f64 v[0:1], -v[4:5], v[2:3], v[0:1]
; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; SI-NEXT: s_endpgm
;
@ -5233,22 +5251,27 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; SI-NEXT: s_nop 1
; SI-NEXT: v_div_fmas_f64 v[8:9], v[16:17], v[10:11], v[14:15]
; SI-NEXT: v_div_fixup_f64 v[8:9], v[8:9], v[6:7], v[2:3]
; SI-NEXT: v_readfirstlane_b32 s0, v8
; SI-NEXT: v_readfirstlane_b32 s1, v9
; SI-NEXT: s_bfe_u32 s2, s1, 0xb0014
; SI-NEXT: s_add_i32 s10, s2, 0xfffffc01
; SI-NEXT: v_readfirstlane_b32 s8, v9
; SI-NEXT: s_bfe_u32 s0, s8, 0xb0014
; SI-NEXT: s_add_i32 s9, s0, 0xfffffc01
; SI-NEXT: s_mov_b32 s3, 0xfffff
; SI-NEXT: s_mov_b32 s2, s6
; SI-NEXT: s_lshr_b64 s[8:9], s[2:3], s10
; SI-NEXT: s_andn2_b64 s[8:9], s[0:1], s[8:9]
; SI-NEXT: s_and_b32 s11, s1, 0x80000000
; SI-NEXT: s_cmp_lt_i32 s10, 0
; SI-NEXT: s_cselect_b32 s8, 0, s8
; SI-NEXT: s_cselect_b32 s9, s11, s9
; SI-NEXT: s_cmp_gt_i32 s10, 51
; SI-NEXT: s_cselect_b32 s1, s1, s9
; SI-NEXT: s_cselect_b32 s0, s0, s8
; SI-NEXT: v_fma_f64 v[2:3], -s[0:1], v[6:7], v[2:3]
; SI-NEXT: s_lshr_b64 s[0:1], s[2:3], s9
; SI-NEXT: v_not_b32_e32 v10, s0
; SI-NEXT: v_and_b32_e32 v10, v8, v10
; SI-NEXT: v_not_b32_e32 v11, s1
; SI-NEXT: v_and_b32_e32 v9, v9, v11
; SI-NEXT: s_and_b32 s0, s8, 0x80000000
; SI-NEXT: s_cmp_lt_i32 s9, 0
; SI-NEXT: s_cselect_b64 vcc, -1, 0
; SI-NEXT: v_cndmask_b32_e64 v10, v10, 0, vcc
; SI-NEXT: v_mov_b32_e32 v11, s0
; SI-NEXT: v_cndmask_b32_e32 v9, v9, v11, vcc
; SI-NEXT: s_cmp_gt_i32 s9, 51
; SI-NEXT: s_cselect_b64 vcc, -1, 0
; SI-NEXT: v_mov_b32_e32 v11, s8
; SI-NEXT: v_cndmask_b32_e32 v9, v9, v11, vcc
; SI-NEXT: v_cndmask_b32_e32 v8, v10, v8, vcc
; SI-NEXT: v_fma_f64 v[2:3], -v[8:9], v[6:7], v[2:3]
; SI-NEXT: v_div_scale_f64 v[6:7], s[0:1], v[4:5], v[4:5], v[0:1]
; SI-NEXT: v_rcp_f64_e32 v[8:9], v[6:7]
; SI-NEXT: v_fma_f64 v[10:11], -v[6:7], v[8:9], 1.0
@ -5264,20 +5287,26 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; SI-NEXT: s_nop 1
; SI-NEXT: v_div_fmas_f64 v[6:7], v[14:15], v[8:9], v[12:13]
; SI-NEXT: v_div_fixup_f64 v[6:7], v[6:7], v[4:5], v[0:1]
; SI-NEXT: v_readfirstlane_b32 s0, v6
; SI-NEXT: v_readfirstlane_b32 s1, v7
; SI-NEXT: s_bfe_u32 s8, s1, 0xb0014
; SI-NEXT: s_addk_i32 s8, 0xfc01
; SI-NEXT: s_lshr_b64 s[2:3], s[2:3], s8
; SI-NEXT: s_andn2_b64 s[2:3], s[0:1], s[2:3]
; SI-NEXT: s_and_b32 s9, s1, 0x80000000
; SI-NEXT: s_cmp_lt_i32 s8, 0
; SI-NEXT: s_cselect_b32 s2, 0, s2
; SI-NEXT: s_cselect_b32 s3, s9, s3
; SI-NEXT: s_cmp_gt_i32 s8, 51
; SI-NEXT: s_cselect_b32 s1, s1, s3
; SI-NEXT: s_cselect_b32 s0, s0, s2
; SI-NEXT: v_fma_f64 v[0:1], -s[0:1], v[4:5], v[0:1]
; SI-NEXT: v_readfirstlane_b32 s8, v7
; SI-NEXT: s_bfe_u32 s0, s8, 0xb0014
; SI-NEXT: s_add_i32 s9, s0, 0xfffffc01
; SI-NEXT: s_lshr_b64 s[0:1], s[2:3], s9
; SI-NEXT: v_not_b32_e32 v8, s0
; SI-NEXT: v_and_b32_e32 v8, v6, v8
; SI-NEXT: v_not_b32_e32 v9, s1
; SI-NEXT: v_and_b32_e32 v7, v7, v9
; SI-NEXT: s_and_b32 s0, s8, 0x80000000
; SI-NEXT: s_cmp_lt_i32 s9, 0
; SI-NEXT: s_cselect_b64 vcc, -1, 0
; SI-NEXT: v_cndmask_b32_e64 v8, v8, 0, vcc
; SI-NEXT: v_mov_b32_e32 v9, s0
; SI-NEXT: v_cndmask_b32_e32 v7, v7, v9, vcc
; SI-NEXT: s_cmp_gt_i32 s9, 51
; SI-NEXT: s_cselect_b64 vcc, -1, 0
; SI-NEXT: v_mov_b32_e32 v9, s8
; SI-NEXT: v_cndmask_b32_e32 v7, v7, v9, vcc
; SI-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc
; SI-NEXT: v_fma_f64 v[0:1], -v[6:7], v[4:5], v[0:1]
; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; SI-NEXT: s_endpgm
;

View File

@ -86,15 +86,3 @@ entry:
store atomic i32 %val, ptr addrspace(3) %dst syncscope("wavefront") unordered, align 4
ret void
}
; GCN: scratch_atomic_store:
; CU: scratch_store_b32 off, v{{.*}}, s{{.*}} scope:SCOPE_SE
; NOCU: scratch_store_b32 off, v{{.*}}, s{{.*}} scope:SCOPE_SE
; GCN: .amdhsa_kernel scratch_atomic_store
; CU: .amdhsa_uses_cu_stores 1
; NOCU: .amdhsa_uses_cu_stores 0
define amdgpu_kernel void @scratch_atomic_store(ptr addrspace(5) %dst, i32 %val) {
entry:
store atomic i32 %val, ptr addrspace(5) %dst syncscope("wavefront") unordered, align 4
ret void
}

View File

@ -18,16 +18,6 @@ define amdgpu_ps <4 x float> @gather4_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: gather4_2d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX10-NEXT: image_gather4 v[0:3], v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-TRUE16-LABEL: gather4_2d:
; GFX11-TRUE16: ; %bb.0: ; %main_body
; GFX11-TRUE16-NEXT: s_mov_b32 s12, exec_lo
@ -84,16 +74,6 @@ define amdgpu_ps <4 x float> @gather4_cube(<8 x i32> inreg %rsrc, <4 x i32> inre
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: gather4_cube:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX10-NEXT: image_gather4 v[0:3], [v0, v2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_CUBE a16
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-TRUE16-LABEL: gather4_cube:
; GFX11-TRUE16: ; %bb.0: ; %main_body
; GFX11-TRUE16-NEXT: s_mov_b32 s12, exec_lo
@ -152,16 +132,6 @@ define amdgpu_ps <4 x float> @gather4_2darray(<8 x i32> inreg %rsrc, <4 x i32> i
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: gather4_2darray:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX10-NEXT: image_gather4 v[0:3], [v0, v2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY a16
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-TRUE16-LABEL: gather4_2darray:
; GFX11-TRUE16: ; %bb.0: ; %main_body
; GFX11-TRUE16-NEXT: s_mov_b32 s12, exec_lo
@ -220,16 +190,6 @@ define amdgpu_ps <4 x float> @gather4_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inre
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: gather4_c_2d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX10-NEXT: image_gather4_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-TRUE16-LABEL: gather4_c_2d:
; GFX11-TRUE16: ; %bb.0: ; %main_body
; GFX11-TRUE16-NEXT: s_mov_b32 s12, exec_lo
@ -286,16 +246,6 @@ define amdgpu_ps <4 x float> @gather4_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inr
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: gather4_cl_2d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX10-NEXT: image_gather4_cl v[0:3], [v0, v2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-TRUE16-LABEL: gather4_cl_2d:
; GFX11-TRUE16: ; %bb.0: ; %main_body
; GFX11-TRUE16-NEXT: s_mov_b32 s12, exec_lo
@ -356,16 +306,6 @@ define amdgpu_ps <4 x float> @gather4_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> i
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: gather4_c_cl_2d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX10-NEXT: image_gather4_c_cl v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-TRUE16-LABEL: gather4_c_cl_2d:
; GFX11-TRUE16: ; %bb.0: ; %main_body
; GFX11-TRUE16-NEXT: s_mov_b32 s12, exec_lo
@ -422,16 +362,6 @@ define amdgpu_ps <4 x float> @gather4_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inre
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: gather4_b_2d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX10-NEXT: image_gather4_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-TRUE16-LABEL: gather4_b_2d:
; GFX11-TRUE16: ; %bb.0: ; %main_body
; GFX11-TRUE16-NEXT: s_mov_b32 s12, exec_lo
@ -488,16 +418,6 @@ define amdgpu_ps <4 x float> @gather4_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> in
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: gather4_c_b_2d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX10-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX10-NEXT: image_gather4_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-TRUE16-LABEL: gather4_c_b_2d:
; GFX11-TRUE16: ; %bb.0: ; %main_body
; GFX11-TRUE16-NEXT: s_mov_b32 s12, exec_lo
@ -556,16 +476,6 @@ define amdgpu_ps <4 x float> @gather4_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> i
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: gather4_b_cl_2d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX10-NEXT: image_gather4_b_cl v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-TRUE16-LABEL: gather4_b_cl_2d:
; GFX11-TRUE16: ; %bb.0: ; %main_body
; GFX11-TRUE16-NEXT: s_mov_b32 s12, exec_lo
@ -628,16 +538,6 @@ define amdgpu_ps <4 x float> @gather4_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32>
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: gather4_c_b_cl_2d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX10-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX10-NEXT: image_gather4_c_b_cl v[0:3], [v0, v1, v2, v4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-TRUE16-LABEL: gather4_c_b_cl_2d:
; GFX11-TRUE16: ; %bb.0: ; %main_body
; GFX11-TRUE16-NEXT: s_mov_b32 s12, exec_lo
@ -691,13 +591,6 @@ define amdgpu_ps <4 x float> @gather4_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inre
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: gather4_l_2d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
; GFX10-NEXT: image_gather4_l v[0:3], [v0, v2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-TRUE16-LABEL: gather4_l_2d:
; GFX11-TRUE16: ; %bb.0: ; %main_body
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v2.l
@ -743,13 +636,6 @@ define amdgpu_ps <4 x float> @gather4_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> in
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: gather4_c_l_2d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
; GFX10-NEXT: image_gather4_c_l v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-TRUE16-LABEL: gather4_c_l_2d:
; GFX11-TRUE16: ; %bb.0: ; %main_body
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v2.l
@ -791,13 +677,6 @@ define amdgpu_ps <4 x float> @gather4_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inr
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: gather4_lz_2d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
; GFX10-NEXT: image_gather4_lz v[0:3], v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-TRUE16-LABEL: gather4_lz_2d:
; GFX11-TRUE16: ; %bb.0: ; %main_body
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
@ -839,13 +718,6 @@ define amdgpu_ps <4 x float> @gather4_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> i
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: gather4_c_lz_2d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
; GFX10-NEXT: image_gather4_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-TRUE16-LABEL: gather4_c_lz_2d:
; GFX11-TRUE16: ; %bb.0: ; %main_body
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v2.l
@ -901,4 +773,5 @@ attributes #0 = { nounwind }
attributes #1 = { nounwind readonly }
attributes #2 = { nounwind readnone }
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; GFX10: {{.*}}
; GFX12: {{.*}}

View File

@ -76,13 +76,12 @@ define amdgpu_kernel void @v_round_f64(ptr addrspace(1) %out, ptr addrspace(1) %
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_mov_b64 s[4:5], s[2:3]
; SI-NEXT: buffer_load_dwordx2 v[2:3], v[0:1], s[4:7], 0 addr64
; SI-NEXT: s_movk_i32 s4, 0xfc01
; SI-NEXT: s_mov_b32 s2, -1
; SI-NEXT: s_mov_b32 s3, 0xfffff
; SI-NEXT: v_mov_b32_e32 v8, 0x3ff00000
; SI-NEXT: s_waitcnt vmcnt(0)
; SI-NEXT: v_bfe_u32 v4, v3, 20, 11
; SI-NEXT: v_add_i32_e32 v6, vcc, s4, v4
; SI-NEXT: v_add_i32_e32 v6, vcc, 0xfffffc01, v4
; SI-NEXT: v_lshr_b64 v[4:5], s[2:3], v6
; SI-NEXT: v_and_b32_e32 v7, 0x80000000, v3
; SI-NEXT: v_not_b32_e32 v5, v5

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,95 +0,0 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s
define void @vec_reduce_and_v32i8(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_and_v32i8:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78
; CHECK-NEXT: xvshuf4i.b $xr1, $xr1, 228
; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 8
; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvsrli.d $xr1, $xr1, 32
; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvshuf4i.b $xr1, $xr1, 14
; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvrepl128vei.b $xr1, $xr1, 1
; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
; CHECK-NEXT: xvstelm.b $xr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <32 x i8>, ptr %src
%res = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> %v)
store i8 %res, ptr %dst
ret void
}
define void @vec_reduce_and_v16i16(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_and_v16i16:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78
; CHECK-NEXT: xvshuf4i.h $xr1, $xr1, 228
; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 8
; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvshuf4i.h $xr1, $xr1, 14
; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvrepl128vei.h $xr1, $xr1, 1
; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
; CHECK-NEXT: xvstelm.h $xr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <16 x i16>, ptr %src
%res = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> %v)
store i16 %res, ptr %dst
ret void
}
define void @vec_reduce_and_v8i32(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_and_v8i32:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78
; CHECK-NEXT: xvshuf4i.w $xr1, $xr1, 228
; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvshuf4i.w $xr1, $xr1, 14
; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvrepl128vei.w $xr1, $xr1, 1
; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
; CHECK-NEXT: xvstelm.w $xr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <8 x i32>, ptr %src
%res = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> %v)
store i32 %res, ptr %dst
ret void
}
define void @vec_reduce_and_v4i64(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_and_v4i64:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI3_0)
; CHECK-NEXT: xvpermi.d $xr2, $xr0, 78
; CHECK-NEXT: xvshuf.d $xr1, $xr0, $xr2
; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvrepl128vei.d $xr1, $xr1, 1
; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
; CHECK-NEXT: xvstelm.d $xr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <4 x i64>, ptr %src
%res = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> %v)
store i64 %res, ptr %dst
ret void
}

View File

@ -1,95 +0,0 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s
define void @vec_reduce_or_v32i8(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_or_v32i8:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78
; CHECK-NEXT: xvshuf4i.b $xr1, $xr1, 228
; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 8
; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvsrli.d $xr1, $xr1, 32
; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvshuf4i.b $xr1, $xr1, 14
; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvrepl128vei.b $xr1, $xr1, 1
; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
; CHECK-NEXT: xvstelm.b $xr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <32 x i8>, ptr %src
%res = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> %v)
store i8 %res, ptr %dst
ret void
}
define void @vec_reduce_or_v16i16(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_or_v16i16:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78
; CHECK-NEXT: xvshuf4i.h $xr1, $xr1, 228
; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 8
; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvshuf4i.h $xr1, $xr1, 14
; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvrepl128vei.h $xr1, $xr1, 1
; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
; CHECK-NEXT: xvstelm.h $xr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <16 x i16>, ptr %src
%res = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> %v)
store i16 %res, ptr %dst
ret void
}
define void @vec_reduce_or_v8i32(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_or_v8i32:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78
; CHECK-NEXT: xvshuf4i.w $xr1, $xr1, 228
; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvshuf4i.w $xr1, $xr1, 14
; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvrepl128vei.w $xr1, $xr1, 1
; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
; CHECK-NEXT: xvstelm.w $xr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <8 x i32>, ptr %src
%res = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> %v)
store i32 %res, ptr %dst
ret void
}
define void @vec_reduce_or_v4i64(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_or_v4i64:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI3_0)
; CHECK-NEXT: xvpermi.d $xr2, $xr0, 78
; CHECK-NEXT: xvshuf.d $xr1, $xr0, $xr2
; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvrepl128vei.d $xr1, $xr1, 1
; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
; CHECK-NEXT: xvstelm.d $xr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <4 x i64>, ptr %src
%res = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> %v)
store i64 %res, ptr %dst
ret void
}

View File

@ -1,95 +0,0 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s
define void @vec_reduce_smax_v32i8(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_smax_v32i8:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78
; CHECK-NEXT: xvshuf4i.b $xr1, $xr1, 228
; CHECK-NEXT: xvmax.b $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 8
; CHECK-NEXT: xvmax.b $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvsrli.d $xr1, $xr1, 32
; CHECK-NEXT: xvmax.b $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvshuf4i.b $xr1, $xr1, 14
; CHECK-NEXT: xvmax.b $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvrepl128vei.b $xr1, $xr1, 1
; CHECK-NEXT: xvmax.b $xr0, $xr0, $xr1
; CHECK-NEXT: xvstelm.b $xr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <32 x i8>, ptr %src
%res = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> %v)
store i8 %res, ptr %dst
ret void
}
define void @vec_reduce_smax_v16i16(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_smax_v16i16:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78
; CHECK-NEXT: xvshuf4i.h $xr1, $xr1, 228
; CHECK-NEXT: xvmax.h $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 8
; CHECK-NEXT: xvmax.h $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvshuf4i.h $xr1, $xr1, 14
; CHECK-NEXT: xvmax.h $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvrepl128vei.h $xr1, $xr1, 1
; CHECK-NEXT: xvmax.h $xr0, $xr0, $xr1
; CHECK-NEXT: xvstelm.h $xr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <16 x i16>, ptr %src
%res = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> %v)
store i16 %res, ptr %dst
ret void
}
define void @vec_reduce_smax_v8i32(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_smax_v8i32:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78
; CHECK-NEXT: xvshuf4i.w $xr1, $xr1, 228
; CHECK-NEXT: xvmax.w $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvshuf4i.w $xr1, $xr1, 14
; CHECK-NEXT: xvmax.w $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvrepl128vei.w $xr1, $xr1, 1
; CHECK-NEXT: xvmax.w $xr0, $xr0, $xr1
; CHECK-NEXT: xvstelm.w $xr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <8 x i32>, ptr %src
%res = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> %v)
store i32 %res, ptr %dst
ret void
}
define void @vec_reduce_smax_v4i64(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_smax_v4i64:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI3_0)
; CHECK-NEXT: xvpermi.d $xr2, $xr0, 78
; CHECK-NEXT: xvshuf.d $xr1, $xr0, $xr2
; CHECK-NEXT: xvmax.d $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvrepl128vei.d $xr1, $xr1, 1
; CHECK-NEXT: xvmax.d $xr0, $xr0, $xr1
; CHECK-NEXT: xvstelm.d $xr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <4 x i64>, ptr %src
%res = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> %v)
store i64 %res, ptr %dst
ret void
}

View File

@ -1,95 +0,0 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s
define void @vec_reduce_smin_v32i8(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_smin_v32i8:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78
; CHECK-NEXT: xvshuf4i.b $xr1, $xr1, 228
; CHECK-NEXT: xvmin.b $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 8
; CHECK-NEXT: xvmin.b $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvsrli.d $xr1, $xr1, 32
; CHECK-NEXT: xvmin.b $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvshuf4i.b $xr1, $xr1, 14
; CHECK-NEXT: xvmin.b $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvrepl128vei.b $xr1, $xr1, 1
; CHECK-NEXT: xvmin.b $xr0, $xr0, $xr1
; CHECK-NEXT: xvstelm.b $xr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <32 x i8>, ptr %src
%res = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> %v)
store i8 %res, ptr %dst
ret void
}
define void @vec_reduce_smin_v16i16(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_smin_v16i16:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78
; CHECK-NEXT: xvshuf4i.h $xr1, $xr1, 228
; CHECK-NEXT: xvmin.h $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 8
; CHECK-NEXT: xvmin.h $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvshuf4i.h $xr1, $xr1, 14
; CHECK-NEXT: xvmin.h $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvrepl128vei.h $xr1, $xr1, 1
; CHECK-NEXT: xvmin.h $xr0, $xr0, $xr1
; CHECK-NEXT: xvstelm.h $xr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <16 x i16>, ptr %src
%res = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> %v)
store i16 %res, ptr %dst
ret void
}
define void @vec_reduce_smin_v8i32(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_smin_v8i32:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78
; CHECK-NEXT: xvshuf4i.w $xr1, $xr1, 228
; CHECK-NEXT: xvmin.w $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvshuf4i.w $xr1, $xr1, 14
; CHECK-NEXT: xvmin.w $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvrepl128vei.w $xr1, $xr1, 1
; CHECK-NEXT: xvmin.w $xr0, $xr0, $xr1
; CHECK-NEXT: xvstelm.w $xr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <8 x i32>, ptr %src
%res = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> %v)
store i32 %res, ptr %dst
ret void
}
define void @vec_reduce_smin_v4i64(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_smin_v4i64:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI3_0)
; CHECK-NEXT: xvpermi.d $xr2, $xr0, 78
; CHECK-NEXT: xvshuf.d $xr1, $xr0, $xr2
; CHECK-NEXT: xvmin.d $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvrepl128vei.d $xr1, $xr1, 1
; CHECK-NEXT: xvmin.d $xr0, $xr0, $xr1
; CHECK-NEXT: xvstelm.d $xr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <4 x i64>, ptr %src
%res = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> %v)
store i64 %res, ptr %dst
ret void
}

View File

@ -1,95 +0,0 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s
define void @vec_reduce_umax_v32i8(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_umax_v32i8:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78
; CHECK-NEXT: xvshuf4i.b $xr1, $xr1, 228
; CHECK-NEXT: xvmax.bu $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 8
; CHECK-NEXT: xvmax.bu $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvsrli.d $xr1, $xr1, 32
; CHECK-NEXT: xvmax.bu $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvshuf4i.b $xr1, $xr1, 14
; CHECK-NEXT: xvmax.bu $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvrepl128vei.b $xr1, $xr1, 1
; CHECK-NEXT: xvmax.bu $xr0, $xr0, $xr1
; CHECK-NEXT: xvstelm.b $xr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <32 x i8>, ptr %src
%res = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> %v)
store i8 %res, ptr %dst
ret void
}
define void @vec_reduce_umax_v16i16(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_umax_v16i16:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78
; CHECK-NEXT: xvshuf4i.h $xr1, $xr1, 228
; CHECK-NEXT: xvmax.hu $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 8
; CHECK-NEXT: xvmax.hu $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvshuf4i.h $xr1, $xr1, 14
; CHECK-NEXT: xvmax.hu $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvrepl128vei.h $xr1, $xr1, 1
; CHECK-NEXT: xvmax.hu $xr0, $xr0, $xr1
; CHECK-NEXT: xvstelm.h $xr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <16 x i16>, ptr %src
%res = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> %v)
store i16 %res, ptr %dst
ret void
}
define void @vec_reduce_umax_v8i32(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_umax_v8i32:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78
; CHECK-NEXT: xvshuf4i.w $xr1, $xr1, 228
; CHECK-NEXT: xvmax.wu $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvshuf4i.w $xr1, $xr1, 14
; CHECK-NEXT: xvmax.wu $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvrepl128vei.w $xr1, $xr1, 1
; CHECK-NEXT: xvmax.wu $xr0, $xr0, $xr1
; CHECK-NEXT: xvstelm.w $xr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <8 x i32>, ptr %src
%res = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> %v)
store i32 %res, ptr %dst
ret void
}
define void @vec_reduce_umax_v4i64(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_umax_v4i64:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI3_0)
; CHECK-NEXT: xvpermi.d $xr2, $xr0, 78
; CHECK-NEXT: xvshuf.d $xr1, $xr0, $xr2
; CHECK-NEXT: xvmax.du $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvrepl128vei.d $xr1, $xr1, 1
; CHECK-NEXT: xvmax.du $xr0, $xr0, $xr1
; CHECK-NEXT: xvstelm.d $xr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <4 x i64>, ptr %src
%res = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> %v)
store i64 %res, ptr %dst
ret void
}

View File

@ -1,95 +0,0 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s
define void @vec_reduce_umin_v32i8(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_umin_v32i8:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78
; CHECK-NEXT: xvshuf4i.b $xr1, $xr1, 228
; CHECK-NEXT: xvmin.bu $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 8
; CHECK-NEXT: xvmin.bu $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvsrli.d $xr1, $xr1, 32
; CHECK-NEXT: xvmin.bu $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvshuf4i.b $xr1, $xr1, 14
; CHECK-NEXT: xvmin.bu $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvrepl128vei.b $xr1, $xr1, 1
; CHECK-NEXT: xvmin.bu $xr0, $xr0, $xr1
; CHECK-NEXT: xvstelm.b $xr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <32 x i8>, ptr %src
%res = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> %v)
store i8 %res, ptr %dst
ret void
}
define void @vec_reduce_umin_v16i16(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_umin_v16i16:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78
; CHECK-NEXT: xvshuf4i.h $xr1, $xr1, 228
; CHECK-NEXT: xvmin.hu $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 8
; CHECK-NEXT: xvmin.hu $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvshuf4i.h $xr1, $xr1, 14
; CHECK-NEXT: xvmin.hu $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvrepl128vei.h $xr1, $xr1, 1
; CHECK-NEXT: xvmin.hu $xr0, $xr0, $xr1
; CHECK-NEXT: xvstelm.h $xr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <16 x i16>, ptr %src
%res = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> %v)
store i16 %res, ptr %dst
ret void
}
define void @vec_reduce_umin_v8i32(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_umin_v8i32:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78
; CHECK-NEXT: xvshuf4i.w $xr1, $xr1, 228
; CHECK-NEXT: xvmin.wu $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvshuf4i.w $xr1, $xr1, 14
; CHECK-NEXT: xvmin.wu $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvrepl128vei.w $xr1, $xr1, 1
; CHECK-NEXT: xvmin.wu $xr0, $xr0, $xr1
; CHECK-NEXT: xvstelm.w $xr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <8 x i32>, ptr %src
%res = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> %v)
store i32 %res, ptr %dst
ret void
}
define void @vec_reduce_umin_v4i64(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_umin_v4i64:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI3_0)
; CHECK-NEXT: xvpermi.d $xr2, $xr0, 78
; CHECK-NEXT: xvshuf.d $xr1, $xr0, $xr2
; CHECK-NEXT: xvmin.du $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvrepl128vei.d $xr1, $xr1, 1
; CHECK-NEXT: xvmin.du $xr0, $xr0, $xr1
; CHECK-NEXT: xvstelm.d $xr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <4 x i64>, ptr %src
%res = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> %v)
store i64 %res, ptr %dst
ret void
}

View File

@ -1,95 +0,0 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s
define void @vec_reduce_xor_v32i8(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_xor_v32i8:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78
; CHECK-NEXT: xvshuf4i.b $xr1, $xr1, 228
; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 8
; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvsrli.d $xr1, $xr1, 32
; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvshuf4i.b $xr1, $xr1, 14
; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvrepl128vei.b $xr1, $xr1, 1
; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
; CHECK-NEXT: xvstelm.b $xr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <32 x i8>, ptr %src
%res = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> %v)
store i8 %res, ptr %dst
ret void
}
define void @vec_reduce_xor_v16i16(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_xor_v16i16:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78
; CHECK-NEXT: xvshuf4i.h $xr1, $xr1, 228
; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 8
; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvshuf4i.h $xr1, $xr1, 14
; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvrepl128vei.h $xr1, $xr1, 1
; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
; CHECK-NEXT: xvstelm.h $xr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <16 x i16>, ptr %src
%res = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> %v)
store i16 %res, ptr %dst
ret void
}
define void @vec_reduce_xor_v8i32(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_xor_v8i32:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78
; CHECK-NEXT: xvshuf4i.w $xr1, $xr1, 228
; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvshuf4i.w $xr1, $xr1, 14
; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvrepl128vei.w $xr1, $xr1, 1
; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
; CHECK-NEXT: xvstelm.w $xr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <8 x i32>, ptr %src
%res = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> %v)
store i32 %res, ptr %dst
ret void
}
define void @vec_reduce_xor_v4i64(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_xor_v4i64:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI3_0)
; CHECK-NEXT: xvpermi.d $xr2, $xr0, 78
; CHECK-NEXT: xvshuf.d $xr1, $xr0, $xr2
; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
; CHECK-NEXT: xvrepl128vei.d $xr1, $xr1, 1
; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
; CHECK-NEXT: xvstelm.d $xr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <4 x i64>, ptr %src
%res = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> %v)
store i64 %res, ptr %dst
ret void
}

View File

@ -1,168 +0,0 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s
define void @vec_reduce_and_v16i8(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_and_v16i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
; CHECK-NEXT: vsrli.d $vr1, $vr0, 32
; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14
; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1
; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <16 x i8>, ptr %src
%res = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> %v)
store i8 %res, ptr %dst
ret void
}
define void @vec_reduce_and_v8i8(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_and_v8i8:
; CHECK: # %bb.0:
; CHECK-NEXT: ld.d $a0, $a0, 0
; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
; CHECK-NEXT: vsrli.d $vr1, $vr0, 32
; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14
; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1
; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <8 x i8>, ptr %src
%res = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> %v)
store i8 %res, ptr %dst
ret void
}
define void @vec_reduce_and_v4i8(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_and_v4i8:
; CHECK: # %bb.0:
; CHECK-NEXT: ld.w $a0, $a0, 0
; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0
; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14
; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1
; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <4 x i8>, ptr %src
%res = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> %v)
store i8 %res, ptr %dst
ret void
}
define void @vec_reduce_and_v2i8(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_and_v2i8:
; CHECK: # %bb.0:
; CHECK-NEXT: ld.h $a0, $a0, 0
; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 0
; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1
; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <2 x i8>, ptr %src
%res = call i8 @llvm.vector.reduce.and.v2i8(<2 x i8> %v)
store i8 %res, ptr %dst
ret void
}
define void @vec_reduce_and_v8i16(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_and_v8i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
; CHECK-NEXT: vshuf4i.h $vr1, $vr0, 14
; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
; CHECK-NEXT: vreplvei.h $vr1, $vr0, 1
; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <8 x i16>, ptr %src
%res = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> %v)
store i16 %res, ptr %dst
ret void
}
define void @vec_reduce_and_v4i16(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_and_v4i16:
; CHECK: # %bb.0:
; CHECK-NEXT: ld.d $a0, $a0, 0
; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
; CHECK-NEXT: vshuf4i.h $vr1, $vr0, 14
; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
; CHECK-NEXT: vreplvei.h $vr1, $vr0, 1
; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <4 x i16>, ptr %src
%res = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> %v)
store i16 %res, ptr %dst
ret void
}
define void @vec_reduce_and_v2i16(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_and_v2i16:
; CHECK: # %bb.0:
; CHECK-NEXT: ld.w $a0, $a0, 0
; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0
; CHECK-NEXT: vreplvei.h $vr1, $vr0, 1
; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <2 x i16>, ptr %src
%res = call i16 @llvm.vector.reduce.and.v2i16(<2 x i16> %v)
store i16 %res, ptr %dst
ret void
}
define void @vec_reduce_and_v4i32(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_and_v4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
; CHECK-NEXT: vshuf4i.w $vr1, $vr0, 14
; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
; CHECK-NEXT: vreplvei.w $vr1, $vr0, 1
; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <4 x i32>, ptr %src
%res = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %v)
store i32 %res, ptr %dst
ret void
}
define void @vec_reduce_and_v2i32(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_and_v2i32:
; CHECK: # %bb.0:
; CHECK-NEXT: ld.d $a0, $a0, 0
; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
; CHECK-NEXT: vreplvei.w $vr1, $vr0, 1
; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <2 x i32>, ptr %src
%res = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> %v)
store i32 %res, ptr %dst
ret void
}
define void @vec_reduce_and_v2i64(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_and_v2i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
; CHECK-NEXT: vreplvei.d $vr1, $vr0, 1
; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <2 x i64>, ptr %src
%res = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> %v)
store i64 %res, ptr %dst
ret void
}

View File

@ -1,168 +0,0 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s
define void @vec_reduce_or_v16i8(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_or_v16i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
; CHECK-NEXT: vsrli.d $vr1, $vr0, 32
; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14
; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1
; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <16 x i8>, ptr %src
%res = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> %v)
store i8 %res, ptr %dst
ret void
}
define void @vec_reduce_or_v8i8(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_or_v8i8:
; CHECK: # %bb.0:
; CHECK-NEXT: ld.d $a0, $a0, 0
; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
; CHECK-NEXT: vsrli.d $vr1, $vr0, 32
; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14
; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1
; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <8 x i8>, ptr %src
%res = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> %v)
store i8 %res, ptr %dst
ret void
}
define void @vec_reduce_or_v4i8(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_or_v4i8:
; CHECK: # %bb.0:
; CHECK-NEXT: ld.w $a0, $a0, 0
; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0
; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14
; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1
; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <4 x i8>, ptr %src
%res = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> %v)
store i8 %res, ptr %dst
ret void
}
define void @vec_reduce_or_v2i8(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_or_v2i8:
; CHECK: # %bb.0:
; CHECK-NEXT: ld.h $a0, $a0, 0
; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 0
; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1
; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <2 x i8>, ptr %src
%res = call i8 @llvm.vector.reduce.or.v2i8(<2 x i8> %v)
store i8 %res, ptr %dst
ret void
}
define void @vec_reduce_or_v8i16(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_or_v8i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
; CHECK-NEXT: vshuf4i.h $vr1, $vr0, 14
; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
; CHECK-NEXT: vreplvei.h $vr1, $vr0, 1
; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <8 x i16>, ptr %src
%res = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> %v)
store i16 %res, ptr %dst
ret void
}
define void @vec_reduce_or_v4i16(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_or_v4i16:
; CHECK: # %bb.0:
; CHECK-NEXT: ld.d $a0, $a0, 0
; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
; CHECK-NEXT: vshuf4i.h $vr1, $vr0, 14
; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
; CHECK-NEXT: vreplvei.h $vr1, $vr0, 1
; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <4 x i16>, ptr %src
%res = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> %v)
store i16 %res, ptr %dst
ret void
}
define void @vec_reduce_or_v2i16(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_or_v2i16:
; CHECK: # %bb.0:
; CHECK-NEXT: ld.w $a0, $a0, 0
; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0
; CHECK-NEXT: vreplvei.h $vr1, $vr0, 1
; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <2 x i16>, ptr %src
%res = call i16 @llvm.vector.reduce.or.v2i16(<2 x i16> %v)
store i16 %res, ptr %dst
ret void
}
define void @vec_reduce_or_v4i32(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_or_v4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
; CHECK-NEXT: vshuf4i.w $vr1, $vr0, 14
; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
; CHECK-NEXT: vreplvei.w $vr1, $vr0, 1
; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <4 x i32>, ptr %src
%res = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %v)
store i32 %res, ptr %dst
ret void
}
define void @vec_reduce_or_v2i32(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_or_v2i32:
; CHECK: # %bb.0:
; CHECK-NEXT: ld.d $a0, $a0, 0
; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
; CHECK-NEXT: vreplvei.w $vr1, $vr0, 1
; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <2 x i32>, ptr %src
%res = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> %v)
store i32 %res, ptr %dst
ret void
}
define void @vec_reduce_or_v2i64(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_or_v2i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
; CHECK-NEXT: vreplvei.d $vr1, $vr0, 1
; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <2 x i64>, ptr %src
%res = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> %v)
store i64 %res, ptr %dst
ret void
}

View File

@ -1,168 +0,0 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s
define void @vec_reduce_smax_v16i8(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_smax_v16i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
; CHECK-NEXT: vmax.b $vr0, $vr0, $vr1
; CHECK-NEXT: vsrli.d $vr1, $vr0, 32
; CHECK-NEXT: vmax.b $vr0, $vr0, $vr1
; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14
; CHECK-NEXT: vmax.b $vr0, $vr0, $vr1
; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1
; CHECK-NEXT: vmax.b $vr0, $vr0, $vr1
; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <16 x i8>, ptr %src
%res = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %v)
store i8 %res, ptr %dst
ret void
}
define void @vec_reduce_smax_v8i8(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_smax_v8i8:
; CHECK: # %bb.0:
; CHECK-NEXT: ld.d $a0, $a0, 0
; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
; CHECK-NEXT: vsrli.d $vr1, $vr0, 32
; CHECK-NEXT: vmax.b $vr0, $vr0, $vr1
; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14
; CHECK-NEXT: vmax.b $vr0, $vr0, $vr1
; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1
; CHECK-NEXT: vmax.b $vr0, $vr0, $vr1
; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <8 x i8>, ptr %src
%res = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> %v)
store i8 %res, ptr %dst
ret void
}
define void @vec_reduce_smax_v4i8(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_smax_v4i8:
; CHECK: # %bb.0:
; CHECK-NEXT: ld.w $a0, $a0, 0
; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0
; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14
; CHECK-NEXT: vmax.b $vr0, $vr0, $vr1
; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1
; CHECK-NEXT: vmax.b $vr0, $vr0, $vr1
; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <4 x i8>, ptr %src
%res = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> %v)
store i8 %res, ptr %dst
ret void
}
define void @vec_reduce_smax_v2i8(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_smax_v2i8:
; CHECK: # %bb.0:
; CHECK-NEXT: ld.h $a0, $a0, 0
; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 0
; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1
; CHECK-NEXT: vmax.b $vr0, $vr0, $vr1
; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <2 x i8>, ptr %src
%res = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> %v)
store i8 %res, ptr %dst
ret void
}
define void @vec_reduce_smax_v8i16(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_smax_v8i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
; CHECK-NEXT: vmax.h $vr0, $vr0, $vr1
; CHECK-NEXT: vshuf4i.h $vr1, $vr0, 14
; CHECK-NEXT: vmax.h $vr0, $vr0, $vr1
; CHECK-NEXT: vreplvei.h $vr1, $vr0, 1
; CHECK-NEXT: vmax.h $vr0, $vr0, $vr1
; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <8 x i16>, ptr %src
%res = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %v)
store i16 %res, ptr %dst
ret void
}
define void @vec_reduce_smax_v4i16(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_smax_v4i16:
; CHECK: # %bb.0:
; CHECK-NEXT: ld.d $a0, $a0, 0
; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
; CHECK-NEXT: vshuf4i.h $vr1, $vr0, 14
; CHECK-NEXT: vmax.h $vr0, $vr0, $vr1
; CHECK-NEXT: vreplvei.h $vr1, $vr0, 1
; CHECK-NEXT: vmax.h $vr0, $vr0, $vr1
; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <4 x i16>, ptr %src
%res = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> %v)
store i16 %res, ptr %dst
ret void
}
define void @vec_reduce_smax_v2i16(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_smax_v2i16:
; CHECK: # %bb.0:
; CHECK-NEXT: ld.w $a0, $a0, 0
; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0
; CHECK-NEXT: vreplvei.h $vr1, $vr0, 1
; CHECK-NEXT: vmax.h $vr0, $vr0, $vr1
; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <2 x i16>, ptr %src
%res = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> %v)
store i16 %res, ptr %dst
ret void
}
define void @vec_reduce_smax_v4i32(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_smax_v4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
; CHECK-NEXT: vshuf4i.w $vr1, $vr0, 14
; CHECK-NEXT: vmax.w $vr0, $vr0, $vr1
; CHECK-NEXT: vreplvei.w $vr1, $vr0, 1
; CHECK-NEXT: vmax.w $vr0, $vr0, $vr1
; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <4 x i32>, ptr %src
%res = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %v)
store i32 %res, ptr %dst
ret void
}
define void @vec_reduce_smax_v2i32(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_smax_v2i32:
; CHECK: # %bb.0:
; CHECK-NEXT: ld.d $a0, $a0, 0
; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
; CHECK-NEXT: vreplvei.w $vr1, $vr0, 1
; CHECK-NEXT: vmax.w $vr0, $vr0, $vr1
; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <2 x i32>, ptr %src
%res = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> %v)
store i32 %res, ptr %dst
ret void
}
define void @vec_reduce_smax_v2i64(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_smax_v2i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
; CHECK-NEXT: vreplvei.d $vr1, $vr0, 1
; CHECK-NEXT: vmax.d $vr0, $vr0, $vr1
; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <2 x i64>, ptr %src
%res = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> %v)
store i64 %res, ptr %dst
ret void
}

View File

@ -1,168 +0,0 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s
define void @vec_reduce_smin_v16i8(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_smin_v16i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
; CHECK-NEXT: vmin.b $vr0, $vr0, $vr1
; CHECK-NEXT: vsrli.d $vr1, $vr0, 32
; CHECK-NEXT: vmin.b $vr0, $vr0, $vr1
; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14
; CHECK-NEXT: vmin.b $vr0, $vr0, $vr1
; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1
; CHECK-NEXT: vmin.b $vr0, $vr0, $vr1
; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <16 x i8>, ptr %src
%res = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %v)
store i8 %res, ptr %dst
ret void
}
define void @vec_reduce_smin_v8i8(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_smin_v8i8:
; CHECK: # %bb.0:
; CHECK-NEXT: ld.d $a0, $a0, 0
; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
; CHECK-NEXT: vsrli.d $vr1, $vr0, 32
; CHECK-NEXT: vmin.b $vr0, $vr0, $vr1
; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14
; CHECK-NEXT: vmin.b $vr0, $vr0, $vr1
; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1
; CHECK-NEXT: vmin.b $vr0, $vr0, $vr1
; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <8 x i8>, ptr %src
%res = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> %v)
store i8 %res, ptr %dst
ret void
}
define void @vec_reduce_smin_v4i8(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_smin_v4i8:
; CHECK: # %bb.0:
; CHECK-NEXT: ld.w $a0, $a0, 0
; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0
; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14
; CHECK-NEXT: vmin.b $vr0, $vr0, $vr1
; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1
; CHECK-NEXT: vmin.b $vr0, $vr0, $vr1
; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <4 x i8>, ptr %src
%res = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> %v)
store i8 %res, ptr %dst
ret void
}
define void @vec_reduce_smin_v2i8(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_smin_v2i8:
; CHECK: # %bb.0:
; CHECK-NEXT: ld.h $a0, $a0, 0
; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 0
; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1
; CHECK-NEXT: vmin.b $vr0, $vr0, $vr1
; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <2 x i8>, ptr %src
%res = call i8 @llvm.vector.reduce.smin.v2i8(<2 x i8> %v)
store i8 %res, ptr %dst
ret void
}
define void @vec_reduce_smin_v8i16(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_smin_v8i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
; CHECK-NEXT: vmin.h $vr0, $vr0, $vr1
; CHECK-NEXT: vshuf4i.h $vr1, $vr0, 14
; CHECK-NEXT: vmin.h $vr0, $vr0, $vr1
; CHECK-NEXT: vreplvei.h $vr1, $vr0, 1
; CHECK-NEXT: vmin.h $vr0, $vr0, $vr1
; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <8 x i16>, ptr %src
%res = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %v)
store i16 %res, ptr %dst
ret void
}
define void @vec_reduce_smin_v4i16(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_smin_v4i16:
; CHECK: # %bb.0:
; CHECK-NEXT: ld.d $a0, $a0, 0
; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
; CHECK-NEXT: vshuf4i.h $vr1, $vr0, 14
; CHECK-NEXT: vmin.h $vr0, $vr0, $vr1
; CHECK-NEXT: vreplvei.h $vr1, $vr0, 1
; CHECK-NEXT: vmin.h $vr0, $vr0, $vr1
; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <4 x i16>, ptr %src
%res = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> %v)
store i16 %res, ptr %dst
ret void
}
define void @vec_reduce_smin_v2i16(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_smin_v2i16:
; CHECK: # %bb.0:
; CHECK-NEXT: ld.w $a0, $a0, 0
; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0
; CHECK-NEXT: vreplvei.h $vr1, $vr0, 1
; CHECK-NEXT: vmin.h $vr0, $vr0, $vr1
; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <2 x i16>, ptr %src
%res = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> %v)
store i16 %res, ptr %dst
ret void
}
define void @vec_reduce_smin_v4i32(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_smin_v4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
; CHECK-NEXT: vshuf4i.w $vr1, $vr0, 14
; CHECK-NEXT: vmin.w $vr0, $vr0, $vr1
; CHECK-NEXT: vreplvei.w $vr1, $vr0, 1
; CHECK-NEXT: vmin.w $vr0, $vr0, $vr1
; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <4 x i32>, ptr %src
%res = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %v)
store i32 %res, ptr %dst
ret void
}
define void @vec_reduce_smin_v2i32(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_smin_v2i32:
; CHECK: # %bb.0:
; CHECK-NEXT: ld.d $a0, $a0, 0
; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
; CHECK-NEXT: vreplvei.w $vr1, $vr0, 1
; CHECK-NEXT: vmin.w $vr0, $vr0, $vr1
; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <2 x i32>, ptr %src
%res = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> %v)
store i32 %res, ptr %dst
ret void
}
define void @vec_reduce_smin_v2i64(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_smin_v2i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
; CHECK-NEXT: vreplvei.d $vr1, $vr0, 1
; CHECK-NEXT: vmin.d $vr0, $vr0, $vr1
; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <2 x i64>, ptr %src
%res = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> %v)
store i64 %res, ptr %dst
ret void
}

View File

@ -1,168 +0,0 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s
define void @vec_reduce_umax_v16i8(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_umax_v16i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
; CHECK-NEXT: vmax.bu $vr0, $vr0, $vr1
; CHECK-NEXT: vsrli.d $vr1, $vr0, 32
; CHECK-NEXT: vmax.bu $vr0, $vr0, $vr1
; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14
; CHECK-NEXT: vmax.bu $vr0, $vr0, $vr1
; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1
; CHECK-NEXT: vmax.bu $vr0, $vr0, $vr1
; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <16 x i8>, ptr %src
%res = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %v)
store i8 %res, ptr %dst
ret void
}
define void @vec_reduce_umax_v8i8(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_umax_v8i8:
; CHECK: # %bb.0:
; CHECK-NEXT: ld.d $a0, $a0, 0
; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
; CHECK-NEXT: vsrli.d $vr1, $vr0, 32
; CHECK-NEXT: vmax.bu $vr0, $vr0, $vr1
; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14
; CHECK-NEXT: vmax.bu $vr0, $vr0, $vr1
; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1
; CHECK-NEXT: vmax.bu $vr0, $vr0, $vr1
; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <8 x i8>, ptr %src
%res = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> %v)
store i8 %res, ptr %dst
ret void
}
define void @vec_reduce_umax_v4i8(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_umax_v4i8:
; CHECK: # %bb.0:
; CHECK-NEXT: ld.w $a0, $a0, 0
; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0
; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14
; CHECK-NEXT: vmax.bu $vr0, $vr0, $vr1
; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1
; CHECK-NEXT: vmax.bu $vr0, $vr0, $vr1
; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <4 x i8>, ptr %src
%res = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> %v)
store i8 %res, ptr %dst
ret void
}
define void @vec_reduce_umax_v2i8(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_umax_v2i8:
; CHECK: # %bb.0:
; CHECK-NEXT: ld.h $a0, $a0, 0
; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 0
; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1
; CHECK-NEXT: vmax.bu $vr0, $vr0, $vr1
; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <2 x i8>, ptr %src
%res = call i8 @llvm.vector.reduce.umax.v2i8(<2 x i8> %v)
store i8 %res, ptr %dst
ret void
}
define void @vec_reduce_umax_v8i16(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_umax_v8i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
; CHECK-NEXT: vmax.hu $vr0, $vr0, $vr1
; CHECK-NEXT: vshuf4i.h $vr1, $vr0, 14
; CHECK-NEXT: vmax.hu $vr0, $vr0, $vr1
; CHECK-NEXT: vreplvei.h $vr1, $vr0, 1
; CHECK-NEXT: vmax.hu $vr0, $vr0, $vr1
; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <8 x i16>, ptr %src
%res = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %v)
store i16 %res, ptr %dst
ret void
}
define void @vec_reduce_umax_v4i16(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_umax_v4i16:
; CHECK: # %bb.0:
; CHECK-NEXT: ld.d $a0, $a0, 0
; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
; CHECK-NEXT: vshuf4i.h $vr1, $vr0, 14
; CHECK-NEXT: vmax.hu $vr0, $vr0, $vr1
; CHECK-NEXT: vreplvei.h $vr1, $vr0, 1
; CHECK-NEXT: vmax.hu $vr0, $vr0, $vr1
; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <4 x i16>, ptr %src
%res = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> %v)
store i16 %res, ptr %dst
ret void
}
define void @vec_reduce_umax_v2i16(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_umax_v2i16:
; CHECK: # %bb.0:
; CHECK-NEXT: ld.w $a0, $a0, 0
; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0
; CHECK-NEXT: vreplvei.h $vr1, $vr0, 1
; CHECK-NEXT: vmax.hu $vr0, $vr0, $vr1
; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <2 x i16>, ptr %src
%res = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> %v)
store i16 %res, ptr %dst
ret void
}
define void @vec_reduce_umax_v4i32(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_umax_v4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
; CHECK-NEXT: vshuf4i.w $vr1, $vr0, 14
; CHECK-NEXT: vmax.wu $vr0, $vr0, $vr1
; CHECK-NEXT: vreplvei.w $vr1, $vr0, 1
; CHECK-NEXT: vmax.wu $vr0, $vr0, $vr1
; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <4 x i32>, ptr %src
%res = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %v)
store i32 %res, ptr %dst
ret void
}
define void @vec_reduce_umax_v2i32(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_umax_v2i32:
; CHECK: # %bb.0:
; CHECK-NEXT: ld.d $a0, $a0, 0
; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
; CHECK-NEXT: vreplvei.w $vr1, $vr0, 1
; CHECK-NEXT: vmax.wu $vr0, $vr0, $vr1
; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <2 x i32>, ptr %src
%res = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> %v)
store i32 %res, ptr %dst
ret void
}
define void @vec_reduce_umax_v2i64(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_umax_v2i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
; CHECK-NEXT: vreplvei.d $vr1, $vr0, 1
; CHECK-NEXT: vmax.du $vr0, $vr0, $vr1
; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <2 x i64>, ptr %src
%res = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> %v)
store i64 %res, ptr %dst
ret void
}

Some files were not shown because too many files have changed in this diff Show More