Compare commits
29 Commits
users/pier
...
main
Author | SHA1 | Date | |
---|---|---|---|
![]() |
2a79ef66eb | ||
![]() |
b149fc7755 | ||
![]() |
7c1d2467f1 | ||
![]() |
15a192cde5 | ||
![]() |
2014890c09 | ||
![]() |
d8769bb5b7 | ||
![]() |
3292edb7b4 | ||
![]() |
d2b810e24f | ||
![]() |
a8aacb1b66 | ||
![]() |
d2dee948a4 | ||
![]() |
a6fcd1a663 | ||
![]() |
2975e674ec | ||
![]() |
ee5367bedb | ||
![]() |
d5af08a221 | ||
![]() |
d6fcaef281 | ||
![]() |
17dbb92612 | ||
![]() |
8439777131 | ||
![]() |
fda24dbc16 | ||
![]() |
7594b4b8d1 | ||
![]() |
bfc16510c7 | ||
![]() |
fd52f4d232 | ||
![]() |
8bc038daf2 | ||
![]() |
faca8c9ed4 | ||
![]() |
1b4fe26343 | ||
![]() |
00a405f666 | ||
![]() |
8bf105cb01 | ||
![]() |
2d3167f8d8 | ||
![]() |
4ff7ac2330 | ||
![]() |
8d7df8bba1 |
@ -138,6 +138,12 @@
|
||||
Dump function CFGs to graphviz format after each stage;enable '-print-loops'
|
||||
for color-coded blocks
|
||||
|
||||
- `--dump-dot-func=<func1,func2,func3...>`
|
||||
|
||||
Dump function CFGs to graphviz format for specified functions only;
|
||||
takes function name patterns (regex supported). Note: C++ function names
|
||||
must be passed using their mangled names
|
||||
|
||||
- `--dump-linux-exceptions`
|
||||
|
||||
Dump Linux kernel exception table
|
||||
|
@ -15,6 +15,12 @@
|
||||
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
|
||||
namespace llvm {
|
||||
namespace bolt {
|
||||
class BinaryFunction;
|
||||
}
|
||||
} // namespace llvm
|
||||
|
||||
namespace opts {
|
||||
|
||||
enum HeatmapModeKind {
|
||||
@ -100,6 +106,9 @@ extern llvm::cl::opt<unsigned> Verbosity;
|
||||
/// Return true if we should process all functions in the binary.
|
||||
bool processAllFunctions();
|
||||
|
||||
/// Return true if we should dump dot graphs for the given function.
|
||||
bool shouldDumpDot(const llvm::bolt::BinaryFunction &Function);
|
||||
|
||||
enum GadgetScannerKind { GS_PACRET, GS_PAUTH, GS_ALL };
|
||||
|
||||
extern llvm::cl::bits<GadgetScannerKind> GadgetScannersToRun;
|
||||
|
@ -52,6 +52,7 @@ namespace opts {
|
||||
extern cl::opt<bool> PrintAll;
|
||||
extern cl::opt<bool> PrintDynoStats;
|
||||
extern cl::opt<bool> DumpDotAll;
|
||||
extern bool shouldDumpDot(const bolt::BinaryFunction &Function);
|
||||
extern cl::opt<std::string> AsmDump;
|
||||
extern cl::opt<bolt::PLTCall::OptType> PLT;
|
||||
extern cl::opt<bolt::IdenticalCodeFolding::ICFLevel, false,
|
||||
@ -340,7 +341,7 @@ Error BinaryFunctionPassManager::runPasses() {
|
||||
|
||||
Function.print(BC.outs(), Message);
|
||||
|
||||
if (opts::DumpDotAll)
|
||||
if (opts::shouldDumpDot(Function))
|
||||
Function.dumpGraphForPass(PassIdName);
|
||||
}
|
||||
}
|
||||
|
@ -115,6 +115,35 @@ cl::opt<bool> DumpDotAll(
|
||||
"enable '-print-loops' for color-coded blocks"),
|
||||
cl::Hidden, cl::cat(BoltCategory));
|
||||
|
||||
cl::list<std::string> DumpDotFunc(
|
||||
"dump-dot-func", cl::CommaSeparated,
|
||||
cl::desc(
|
||||
"dump function CFGs to graphviz format for specified functions only;"
|
||||
"takes function name patterns (regex supported)"),
|
||||
cl::value_desc("func1,func2,func3,..."), cl::Hidden, cl::cat(BoltCategory));
|
||||
|
||||
bool shouldDumpDot(const bolt::BinaryFunction &Function) {
|
||||
// If dump-dot-all is enabled, dump all functions
|
||||
if (DumpDotAll)
|
||||
return !Function.isIgnored();
|
||||
|
||||
// If no specific functions specified in dump-dot-func, don't dump any
|
||||
if (DumpDotFunc.empty())
|
||||
return false;
|
||||
|
||||
if (Function.isIgnored())
|
||||
return false;
|
||||
|
||||
// Check if function matches any of the specified patterns
|
||||
for (const std::string &Name : DumpDotFunc) {
|
||||
if (Function.hasNameRegex(Name)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static cl::list<std::string>
|
||||
ForceFunctionNames("funcs",
|
||||
cl::CommaSeparated,
|
||||
@ -3569,7 +3598,7 @@ void RewriteInstance::postProcessFunctions() {
|
||||
if (opts::PrintAll || opts::PrintCFG)
|
||||
Function.print(BC->outs(), "after building cfg");
|
||||
|
||||
if (opts::DumpDotAll)
|
||||
if (opts::shouldDumpDot(Function))
|
||||
Function.dumpGraphForPass("00_build-cfg");
|
||||
|
||||
if (opts::PrintLoopInfo) {
|
||||
|
24
bolt/test/Inputs/multi-func.cpp
Normal file
24
bolt/test/Inputs/multi-func.cpp
Normal file
@ -0,0 +1,24 @@
|
||||
#include <iostream>
|
||||
|
||||
// Multiple functions to test selective dumping
|
||||
int add(int a, int b) { return a + b; }
|
||||
|
||||
int multiply(int a, int b) { return a * b; }
|
||||
|
||||
int main_helper() {
|
||||
std::cout << "Helper function" << std::endl;
|
||||
return 42;
|
||||
}
|
||||
|
||||
int main_secondary() { return add(5, 3); }
|
||||
|
||||
void other_function() { std::cout << "Other function" << std::endl; }
|
||||
|
||||
int main() {
|
||||
int result = add(10, 20);
|
||||
result = multiply(result, 2);
|
||||
main_helper();
|
||||
main_secondary();
|
||||
other_function();
|
||||
return result;
|
||||
}
|
52
bolt/test/dump-dot-func.test
Normal file
52
bolt/test/dump-dot-func.test
Normal file
@ -0,0 +1,52 @@
|
||||
# Test the --dump-dot-func option with multiple functions
|
||||
# (includes tests for both mangled/unmangled names)
|
||||
|
||||
RUN: %clang++ %p/Inputs/multi-func.cpp -o %t.exe -Wl,-q
|
||||
|
||||
# Test 1: --dump-dot-func with specific function name (mangled)
|
||||
RUN: llvm-bolt %t.exe -o %t.bolt1 --dump-dot-func=_Z3addii -v=1 2>&1 | FileCheck %s --check-prefix=ADD
|
||||
|
||||
# Test 2: --dump-dot-func with regex pattern (main.*)
|
||||
RUN: llvm-bolt %t.exe -o %t.bolt2 --dump-dot-func="main.*" -v=1 2>&1 | FileCheck %s --check-prefix=MAIN-REGEX
|
||||
|
||||
# Test 3: --dump-dot-func with multiple specific functions (mangled names)
|
||||
RUN: llvm-bolt %t.exe -o %t.bolt3 --dump-dot-func=_Z3addii,_Z8multiplyii -v=1 2>&1 | FileCheck %s --check-prefix=MULTI
|
||||
|
||||
# Test 4: No option specified should create no dot files
|
||||
RUN: llvm-bolt %t.exe -o %t.bolt4 2>&1 | FileCheck %s --check-prefix=NONE
|
||||
|
||||
# Test 5: --dump-dot-func with non-existent function
|
||||
RUN: llvm-bolt %t.exe -o %t.bolt5 --dump-dot-func=nonexistent -v=1 2>&1 | FileCheck %s --check-prefix=NONEXISTENT
|
||||
|
||||
# Test 6: Backward compatibility - --dump-dot-all should still work
|
||||
RUN: llvm-bolt %t.exe -o %t.bolt6 --dump-dot-all -v=1 2>&1 | FileCheck %s --check-prefix=ALL
|
||||
|
||||
# Test 7: Test with unmangled function name (main function)
|
||||
RUN: llvm-bolt %t.exe -o %t.bolt7 --dump-dot-func=main -v=1 2>&1 | FileCheck %s --check-prefix=MAIN-UNMANGLED
|
||||
|
||||
# Check that specific functions are dumped
|
||||
ADD: BOLT-INFO: dumping CFG to _Z3addii-00_build-cfg.dot
|
||||
ADD-NOT: BOLT-INFO: dumping CFG to main-00_build-cfg.dot
|
||||
ADD-NOT: BOLT-INFO: dumping CFG to _Z8multiplyii-00_build-cfg.dot
|
||||
ADD-NOT: BOLT-INFO: dumping CFG to _Z11main_helperv-00_build-cfg.dot
|
||||
|
||||
MAIN-REGEX-DAG: BOLT-INFO: dumping CFG to main-00_build-cfg.dot
|
||||
MAIN-REGEX-NOT: BOLT-INFO: dumping CFG to _Z3addii-00_build-cfg.dot
|
||||
MAIN-REGEX-NOT: BOLT-INFO: dumping CFG to _Z8multiplyii-00_build-cfg.dot
|
||||
|
||||
MULTI-DAG: BOLT-INFO: dumping CFG to _Z3addii-00_build-cfg.dot
|
||||
MULTI-DAG: BOLT-INFO: dumping CFG to _Z8multiplyii-00_build-cfg.dot
|
||||
MULTI-NOT: BOLT-INFO: dumping CFG to main-00_build-cfg.dot
|
||||
MULTI-NOT: BOLT-INFO: dumping CFG to _Z11main_helperv-00_build-cfg.dot
|
||||
|
||||
# Should be no dumping messages when no option is specified
|
||||
NONE-NOT: BOLT-INFO: dumping CFG
|
||||
|
||||
# Should be no dumping messages for non-existent function
|
||||
NONEXISTENT-NOT: BOLT-INFO: dumping CFG
|
||||
|
||||
ALL: BOLT-INFO: dumping CFG to main-00_build-cfg.dot
|
||||
|
||||
MAIN-UNMANGLED: BOLT-INFO: dumping CFG to main-00_build-cfg.dot
|
||||
MAIN-UNMANGLED-NOT: BOLT-INFO: dumping CFG to _Z3addii-00_build-cfg.dot
|
||||
MAIN-UNMANGLED-NOT: BOLT-INFO: dumping CFG to _Z8multiplyii-00_build-cfg.dot
|
@ -15,14 +15,12 @@ using namespace clang::ast_matchers;
|
||||
|
||||
namespace clang::tidy::bugprone {
|
||||
|
||||
namespace {
|
||||
|
||||
// Determine if the result of an expression is "stored" in some way.
|
||||
// It is true if the value is stored into a variable or used as initialization
|
||||
// or passed to a function or constructor.
|
||||
// For this use case compound assignments are not counted as a "store" (the 'E'
|
||||
// expression should have pointer type).
|
||||
bool isExprValueStored(const Expr *E, ASTContext &C) {
|
||||
static bool isExprValueStored(const Expr *E, ASTContext &C) {
|
||||
E = E->IgnoreParenCasts();
|
||||
// Get first non-paren, non-cast parent.
|
||||
ParentMapContext &PMap = C.getParentMapContext();
|
||||
@ -49,6 +47,8 @@ bool isExprValueStored(const Expr *E, ASTContext &C) {
|
||||
return isa<CallExpr, CXXConstructExpr>(ParentE);
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
AST_MATCHER_P(CXXTryStmt, hasHandlerFor,
|
||||
ast_matchers::internal::Matcher<QualType>, InnerMatcher) {
|
||||
for (unsigned NH = Node.getNumHandlers(), I = 0; I < NH; ++I) {
|
||||
|
@ -14,10 +14,8 @@ using namespace clang::ast_matchers;
|
||||
|
||||
namespace clang::tidy::bugprone {
|
||||
|
||||
namespace {
|
||||
|
||||
bool isConcatenatedLiteralsOnPurpose(ASTContext *Ctx,
|
||||
const StringLiteral *Lit) {
|
||||
static bool isConcatenatedLiteralsOnPurpose(ASTContext *Ctx,
|
||||
const StringLiteral *Lit) {
|
||||
// String literals surrounded by parentheses are assumed to be on purpose.
|
||||
// i.e.: const char* Array[] = { ("a" "b" "c"), "d", [...] };
|
||||
|
||||
@ -58,6 +56,8 @@ bool isConcatenatedLiteralsOnPurpose(ASTContext *Ctx,
|
||||
return false;
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
AST_MATCHER_P(StringLiteral, isConcatenatedLiteral, unsigned,
|
||||
MaxConcatenatedTokens) {
|
||||
return Node.getNumConcatenated() > 1 &&
|
||||
|
@ -46,7 +46,9 @@ enum class ConversionKind {
|
||||
ToLongDouble
|
||||
};
|
||||
|
||||
ConversionKind classifyConversionFunc(const FunctionDecl *FD) {
|
||||
} // namespace
|
||||
|
||||
static ConversionKind classifyConversionFunc(const FunctionDecl *FD) {
|
||||
return llvm::StringSwitch<ConversionKind>(FD->getName())
|
||||
.Cases("atoi", "atol", ConversionKind::ToInt)
|
||||
.Case("atoll", ConversionKind::ToLongInt)
|
||||
@ -54,8 +56,8 @@ ConversionKind classifyConversionFunc(const FunctionDecl *FD) {
|
||||
.Default(ConversionKind::None);
|
||||
}
|
||||
|
||||
ConversionKind classifyFormatString(StringRef Fmt, const LangOptions &LO,
|
||||
const TargetInfo &TI) {
|
||||
static ConversionKind classifyFormatString(StringRef Fmt, const LangOptions &LO,
|
||||
const TargetInfo &TI) {
|
||||
// Scan the format string for the first problematic format specifier, then
|
||||
// report that as the conversion type. This will miss additional conversion
|
||||
// specifiers, but that is acceptable behavior.
|
||||
@ -128,7 +130,7 @@ ConversionKind classifyFormatString(StringRef Fmt, const LangOptions &LO,
|
||||
return H.get();
|
||||
}
|
||||
|
||||
StringRef classifyConversionType(ConversionKind K) {
|
||||
static StringRef classifyConversionType(ConversionKind K) {
|
||||
switch (K) {
|
||||
case ConversionKind::None:
|
||||
llvm_unreachable("Unexpected conversion kind");
|
||||
@ -148,7 +150,7 @@ StringRef classifyConversionType(ConversionKind K) {
|
||||
llvm_unreachable("Unknown conversion kind");
|
||||
}
|
||||
|
||||
StringRef classifyReplacement(ConversionKind K) {
|
||||
static StringRef classifyReplacement(ConversionKind K) {
|
||||
switch (K) {
|
||||
case ConversionKind::None:
|
||||
llvm_unreachable("Unexpected conversion kind");
|
||||
@ -173,7 +175,6 @@ StringRef classifyReplacement(ConversionKind K) {
|
||||
}
|
||||
llvm_unreachable("Unknown conversion kind");
|
||||
}
|
||||
} // unnamed namespace
|
||||
|
||||
void StrToNumCheck::check(const MatchFinder::MatchResult &Result) {
|
||||
const auto *Call = Result.Nodes.getNodeAs<CallExpr>("expr");
|
||||
|
@ -59,7 +59,9 @@ AST_MATCHER(FunctionDecl, isPlacementOverload) {
|
||||
return true;
|
||||
}
|
||||
|
||||
OverloadedOperatorKind getCorrespondingOverload(const FunctionDecl *FD) {
|
||||
} // namespace
|
||||
|
||||
static OverloadedOperatorKind getCorrespondingOverload(const FunctionDecl *FD) {
|
||||
switch (FD->getOverloadedOperator()) {
|
||||
default:
|
||||
break;
|
||||
@ -75,7 +77,7 @@ OverloadedOperatorKind getCorrespondingOverload(const FunctionDecl *FD) {
|
||||
llvm_unreachable("Not an overloaded allocation operator");
|
||||
}
|
||||
|
||||
const char *getOperatorName(OverloadedOperatorKind K) {
|
||||
static const char *getOperatorName(OverloadedOperatorKind K) {
|
||||
switch (K) {
|
||||
default:
|
||||
break;
|
||||
@ -91,13 +93,14 @@ const char *getOperatorName(OverloadedOperatorKind K) {
|
||||
llvm_unreachable("Not an overloaded allocation operator");
|
||||
}
|
||||
|
||||
bool areCorrespondingOverloads(const FunctionDecl *LHS,
|
||||
const FunctionDecl *RHS) {
|
||||
static bool areCorrespondingOverloads(const FunctionDecl *LHS,
|
||||
const FunctionDecl *RHS) {
|
||||
return RHS->getOverloadedOperator() == getCorrespondingOverload(LHS);
|
||||
}
|
||||
|
||||
bool hasCorrespondingOverloadInBaseClass(const CXXMethodDecl *MD,
|
||||
const CXXRecordDecl *RD = nullptr) {
|
||||
static bool
|
||||
hasCorrespondingOverloadInBaseClass(const CXXMethodDecl *MD,
|
||||
const CXXRecordDecl *RD = nullptr) {
|
||||
if (RD) {
|
||||
// Check the methods in the given class and accessible to derived classes.
|
||||
for (const auto *BMD : RD->methods())
|
||||
@ -124,8 +127,6 @@ bool hasCorrespondingOverloadInBaseClass(const CXXMethodDecl *MD,
|
||||
return false;
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
void NewDeleteOverloadsCheck::registerMatchers(MatchFinder *Finder) {
|
||||
// Match all operator new and operator delete overloads (including the array
|
||||
// forms). Do not match implicit operators, placement operators, or
|
||||
|
@ -395,16 +395,12 @@ void MacroToEnumCallbacks::Endif(SourceLocation Loc, SourceLocation IfLoc) {
|
||||
--CurrentFile->ConditionScopes;
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
template <size_t N>
|
||||
bool textEquals(const char (&Needle)[N], const char *HayStack) {
|
||||
static bool textEquals(const char (&Needle)[N], const char *HayStack) {
|
||||
return StringRef{HayStack, N - 1} == Needle;
|
||||
}
|
||||
|
||||
template <size_t N> size_t len(const char (&)[N]) { return N - 1; }
|
||||
|
||||
} // namespace
|
||||
template <size_t N> static size_t len(const char (&)[N]) { return N - 1; }
|
||||
|
||||
void MacroToEnumCallbacks::PragmaDirective(SourceLocation Loc,
|
||||
PragmaIntroducerKind Introducer) {
|
||||
|
@ -16,14 +16,13 @@ using namespace clang::ast_matchers;
|
||||
|
||||
namespace clang::tidy::modernize {
|
||||
|
||||
namespace {
|
||||
static constexpr char ConstructorCall[] = "constructorCall";
|
||||
static constexpr char ResetCall[] = "resetCall";
|
||||
static constexpr char NewExpression[] = "newExpression";
|
||||
|
||||
constexpr char ConstructorCall[] = "constructorCall";
|
||||
constexpr char ResetCall[] = "resetCall";
|
||||
constexpr char NewExpression[] = "newExpression";
|
||||
|
||||
std::string getNewExprName(const CXXNewExpr *NewExpr, const SourceManager &SM,
|
||||
const LangOptions &Lang) {
|
||||
static std::string getNewExprName(const CXXNewExpr *NewExpr,
|
||||
const SourceManager &SM,
|
||||
const LangOptions &Lang) {
|
||||
StringRef WrittenName = Lexer::getSourceText(
|
||||
CharSourceRange::getTokenRange(
|
||||
NewExpr->getAllocatedTypeSourceInfo()->getTypeLoc().getSourceRange()),
|
||||
@ -34,8 +33,6 @@ std::string getNewExprName(const CXXNewExpr *NewExpr, const SourceManager &SM,
|
||||
return WrittenName.str();
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
const char MakeSmartPtrCheck::PointerType[] = "pointerType";
|
||||
|
||||
MakeSmartPtrCheck::MakeSmartPtrCheck(StringRef Name, ClangTidyContext *Context,
|
||||
|
@ -19,9 +19,7 @@ using namespace clang::ast_matchers;
|
||||
|
||||
namespace clang::tidy::modernize {
|
||||
|
||||
namespace {
|
||||
|
||||
bool containsEscapes(StringRef HayStack, StringRef Escapes) {
|
||||
static bool containsEscapes(StringRef HayStack, StringRef Escapes) {
|
||||
size_t BackSlash = HayStack.find('\\');
|
||||
if (BackSlash == StringRef::npos)
|
||||
return false;
|
||||
@ -35,16 +33,16 @@ bool containsEscapes(StringRef HayStack, StringRef Escapes) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool isRawStringLiteral(StringRef Text) {
|
||||
static bool isRawStringLiteral(StringRef Text) {
|
||||
// Already a raw string literal if R comes before ".
|
||||
const size_t QuotePos = Text.find('"');
|
||||
assert(QuotePos != StringRef::npos);
|
||||
return (QuotePos > 0) && (Text[QuotePos - 1] == 'R');
|
||||
}
|
||||
|
||||
bool containsEscapedCharacters(const MatchFinder::MatchResult &Result,
|
||||
const StringLiteral *Literal,
|
||||
const CharsBitSet &DisallowedChars) {
|
||||
static bool containsEscapedCharacters(const MatchFinder::MatchResult &Result,
|
||||
const StringLiteral *Literal,
|
||||
const CharsBitSet &DisallowedChars) {
|
||||
// FIXME: Handle L"", u8"", u"" and U"" literals.
|
||||
if (!Literal->isOrdinary())
|
||||
return false;
|
||||
@ -64,14 +62,12 @@ bool containsEscapedCharacters(const MatchFinder::MatchResult &Result,
|
||||
return containsEscapes(Text, R"('\"?x01)");
|
||||
}
|
||||
|
||||
bool containsDelimiter(StringRef Bytes, const std::string &Delimiter) {
|
||||
static bool containsDelimiter(StringRef Bytes, const std::string &Delimiter) {
|
||||
return Bytes.find(Delimiter.empty()
|
||||
? std::string(R"lit()")lit")
|
||||
: (")" + Delimiter + R"(")")) != StringRef::npos;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
RawStringLiteralCheck::RawStringLiteralCheck(StringRef Name,
|
||||
ClangTidyContext *Context)
|
||||
: ClangTidyCheck(Name, Context),
|
||||
|
@ -29,12 +29,13 @@
|
||||
using namespace clang::ast_matchers;
|
||||
|
||||
namespace clang::tidy::objc {
|
||||
namespace {
|
||||
|
||||
static constexpr StringRef WeakText = "__weak";
|
||||
static constexpr StringRef StrongText = "__strong";
|
||||
static constexpr StringRef UnsafeUnretainedText = "__unsafe_unretained";
|
||||
|
||||
namespace {
|
||||
|
||||
/// Matches ObjCIvarRefExpr, DeclRefExpr, or MemberExpr that reference
|
||||
/// Objective-C object (or block) variables or fields whose object lifetimes
|
||||
/// are not __unsafe_unretained.
|
||||
@ -49,6 +50,8 @@ AST_POLYMORPHIC_MATCHER(isObjCManagedLifetime,
|
||||
QT.getQualifiers().getObjCLifetime() > Qualifiers::OCL_ExplicitNone;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
static std::optional<FixItHint>
|
||||
fixItHintReplacementForOwnershipString(StringRef Text, CharSourceRange Range,
|
||||
StringRef Ownership) {
|
||||
@ -93,8 +96,6 @@ fixItHintForVarDecl(const VarDecl *VD, const SourceManager &SM,
|
||||
return FixItHint::CreateInsertion(Range.getBegin(), "__unsafe_unretained ");
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void NSInvocationArgumentLifetimeCheck::registerMatchers(MatchFinder *Finder) {
|
||||
Finder->addMatcher(
|
||||
traverse(
|
||||
|
@ -27,11 +27,14 @@ enum NamingStyle {
|
||||
CategoryProperty = 2,
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
/// For now we will only fix 'CamelCase' or 'abc_CamelCase' property to
|
||||
/// 'camelCase' or 'abc_camelCase'. For other cases the users need to
|
||||
/// come up with a proper name by their own.
|
||||
/// FIXME: provide fix for snake_case to snakeCase
|
||||
FixItHint generateFixItHint(const ObjCPropertyDecl *Decl, NamingStyle Style) {
|
||||
static FixItHint generateFixItHint(const ObjCPropertyDecl *Decl,
|
||||
NamingStyle Style) {
|
||||
auto Name = Decl->getName();
|
||||
auto NewName = Decl->getName().str();
|
||||
size_t Index = 0;
|
||||
@ -50,7 +53,7 @@ FixItHint generateFixItHint(const ObjCPropertyDecl *Decl, NamingStyle Style) {
|
||||
return {};
|
||||
}
|
||||
|
||||
std::string validPropertyNameRegex(bool UsedInMatcher) {
|
||||
static std::string validPropertyNameRegex(bool UsedInMatcher) {
|
||||
// Allow any of these names:
|
||||
// foo
|
||||
// fooBar
|
||||
@ -72,13 +75,13 @@ std::string validPropertyNameRegex(bool UsedInMatcher) {
|
||||
return StartMatcher + "([a-z]|[A-Z][A-Z0-9])[a-z0-9A-Z]*$";
|
||||
}
|
||||
|
||||
bool hasCategoryPropertyPrefix(llvm::StringRef PropertyName) {
|
||||
static bool hasCategoryPropertyPrefix(llvm::StringRef PropertyName) {
|
||||
auto RegexExp =
|
||||
llvm::Regex("^[a-zA-Z][a-zA-Z0-9]*_[a-zA-Z0-9][a-zA-Z0-9_]+$");
|
||||
return RegexExp.match(PropertyName);
|
||||
}
|
||||
|
||||
bool prefixedPropertyNameValid(llvm::StringRef PropertyName) {
|
||||
static bool prefixedPropertyNameValid(llvm::StringRef PropertyName) {
|
||||
size_t Start = PropertyName.find_first_of('_');
|
||||
assert(Start != llvm::StringRef::npos && Start + 1 < PropertyName.size());
|
||||
auto Prefix = PropertyName.substr(0, Start);
|
||||
@ -88,7 +91,6 @@ bool prefixedPropertyNameValid(llvm::StringRef PropertyName) {
|
||||
auto RegexExp = llvm::Regex(llvm::StringRef(validPropertyNameRegex(false)));
|
||||
return RegexExp.match(PropertyName.substr(Start + 1));
|
||||
}
|
||||
} // namespace
|
||||
|
||||
void PropertyDeclarationCheck::registerMatchers(MatchFinder *Finder) {
|
||||
Finder->addMatcher(objcPropertyDecl(
|
||||
|
@ -17,7 +17,6 @@
|
||||
#include <optional>
|
||||
|
||||
namespace clang::tidy::performance {
|
||||
namespace {
|
||||
|
||||
using namespace ::clang::ast_matchers;
|
||||
using llvm::StringRef;
|
||||
@ -30,8 +29,8 @@ static constexpr StringRef MethodDeclId = "methodDecl";
|
||||
static constexpr StringRef FunctionDeclId = "functionDecl";
|
||||
static constexpr StringRef OldVarDeclId = "oldVarDecl";
|
||||
|
||||
void recordFixes(const VarDecl &Var, ASTContext &Context,
|
||||
DiagnosticBuilder &Diagnostic) {
|
||||
static void recordFixes(const VarDecl &Var, ASTContext &Context,
|
||||
DiagnosticBuilder &Diagnostic) {
|
||||
Diagnostic << utils::fixit::changeVarDeclToReference(Var, Context);
|
||||
if (!Var.getType().isLocalConstQualified()) {
|
||||
if (std::optional<FixItHint> Fix = utils::fixit::addQualifierToVarDecl(
|
||||
@ -40,8 +39,8 @@ void recordFixes(const VarDecl &Var, ASTContext &Context,
|
||||
}
|
||||
}
|
||||
|
||||
std::optional<SourceLocation> firstLocAfterNewLine(SourceLocation Loc,
|
||||
SourceManager &SM) {
|
||||
static std::optional<SourceLocation> firstLocAfterNewLine(SourceLocation Loc,
|
||||
SourceManager &SM) {
|
||||
bool Invalid = false;
|
||||
const char *TextAfter = SM.getCharacterData(Loc, &Invalid);
|
||||
if (Invalid) {
|
||||
@ -51,8 +50,8 @@ std::optional<SourceLocation> firstLocAfterNewLine(SourceLocation Loc,
|
||||
return Loc.getLocWithOffset(TextAfter[Offset] == '\0' ? Offset : Offset + 1);
|
||||
}
|
||||
|
||||
void recordRemoval(const DeclStmt &Stmt, ASTContext &Context,
|
||||
DiagnosticBuilder &Diagnostic) {
|
||||
static void recordRemoval(const DeclStmt &Stmt, ASTContext &Context,
|
||||
DiagnosticBuilder &Diagnostic) {
|
||||
auto &SM = Context.getSourceManager();
|
||||
// Attempt to remove trailing comments as well.
|
||||
auto Tok = utils::lexer::findNextTokenSkippingComments(Stmt.getEndLoc(), SM,
|
||||
@ -74,6 +73,8 @@ void recordRemoval(const DeclStmt &Stmt, ASTContext &Context,
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
AST_MATCHER_FUNCTION_P(StatementMatcher,
|
||||
isRefReturningMethodCallWithConstOverloads,
|
||||
std::vector<StringRef>, ExcludedContainerTypes) {
|
||||
@ -130,6 +131,8 @@ AST_MATCHER_FUNCTION_P(StatementMatcher, initializerReturnsReferenceToConst,
|
||||
hasUnaryOperand(OldVarDeclRef)))));
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
// This checks that the variable itself is only used as const, and also makes
|
||||
// sure that it does not reference another variable that could be modified in
|
||||
// the BlockStmt. It does this by checking the following:
|
||||
@ -180,13 +183,13 @@ static bool isInitializingVariableImmutable(
|
||||
return false;
|
||||
}
|
||||
|
||||
bool isVariableUnused(const VarDecl &Var, const Stmt &BlockStmt,
|
||||
ASTContext &Context) {
|
||||
static bool isVariableUnused(const VarDecl &Var, const Stmt &BlockStmt,
|
||||
ASTContext &Context) {
|
||||
return allDeclRefExprs(Var, BlockStmt, Context).empty();
|
||||
}
|
||||
|
||||
const SubstTemplateTypeParmType *getSubstitutedType(const QualType &Type,
|
||||
ASTContext &Context) {
|
||||
static const SubstTemplateTypeParmType *
|
||||
getSubstitutedType(const QualType &Type, ASTContext &Context) {
|
||||
auto Matches = match(
|
||||
qualType(anyOf(substTemplateTypeParmType().bind("subst"),
|
||||
hasDescendant(substTemplateTypeParmType().bind("subst")))),
|
||||
@ -194,9 +197,9 @@ const SubstTemplateTypeParmType *getSubstitutedType(const QualType &Type,
|
||||
return selectFirst<SubstTemplateTypeParmType>("subst", Matches);
|
||||
}
|
||||
|
||||
bool differentReplacedTemplateParams(const QualType &VarType,
|
||||
const QualType &InitializerType,
|
||||
ASTContext &Context) {
|
||||
static bool differentReplacedTemplateParams(const QualType &VarType,
|
||||
const QualType &InitializerType,
|
||||
ASTContext &Context) {
|
||||
if (const SubstTemplateTypeParmType *VarTmplType =
|
||||
getSubstitutedType(VarType, Context)) {
|
||||
if (const SubstTemplateTypeParmType *InitializerTmplType =
|
||||
@ -212,8 +215,8 @@ bool differentReplacedTemplateParams(const QualType &VarType,
|
||||
return false;
|
||||
}
|
||||
|
||||
QualType constructorArgumentType(const VarDecl *OldVar,
|
||||
const BoundNodes &Nodes) {
|
||||
static QualType constructorArgumentType(const VarDecl *OldVar,
|
||||
const BoundNodes &Nodes) {
|
||||
if (OldVar) {
|
||||
return OldVar->getType();
|
||||
}
|
||||
@ -224,8 +227,6 @@ QualType constructorArgumentType(const VarDecl *OldVar,
|
||||
return MethodDecl->getReturnType();
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
UnnecessaryCopyInitialization::UnnecessaryCopyInitialization(
|
||||
StringRef Name, ClangTidyContext *Context)
|
||||
: ClangTidyCheck(Name, Context),
|
||||
|
@ -21,16 +21,14 @@ using namespace clang::ast_matchers;
|
||||
|
||||
namespace clang::tidy::performance {
|
||||
|
||||
namespace {
|
||||
|
||||
std::string paramNameOrIndex(StringRef Name, size_t Index) {
|
||||
static std::string paramNameOrIndex(StringRef Name, size_t Index) {
|
||||
return (Name.empty() ? llvm::Twine('#') + llvm::Twine(Index + 1)
|
||||
: llvm::Twine('\'') + Name + llvm::Twine('\''))
|
||||
.str();
|
||||
}
|
||||
|
||||
bool hasLoopStmtAncestor(const DeclRefExpr &DeclRef, const Decl &Decl,
|
||||
ASTContext &Context) {
|
||||
static bool hasLoopStmtAncestor(const DeclRefExpr &DeclRef, const Decl &Decl,
|
||||
ASTContext &Context) {
|
||||
auto Matches = match(
|
||||
traverse(TK_AsIs,
|
||||
decl(forEachDescendant(declRefExpr(
|
||||
@ -41,8 +39,6 @@ bool hasLoopStmtAncestor(const DeclRefExpr &DeclRef, const Decl &Decl,
|
||||
return Matches.empty();
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
UnnecessaryValueParamCheck::UnnecessaryValueParamCheck(
|
||||
StringRef Name, ClangTidyContext *Context)
|
||||
: ClangTidyCheck(Name, Context),
|
||||
|
@ -122,15 +122,15 @@ AST_MATCHER(EnumDecl, hasSequentialInitialValues) {
|
||||
return !AllEnumeratorsArePowersOfTwo;
|
||||
}
|
||||
|
||||
std::string getName(const EnumDecl *Decl) {
|
||||
} // namespace
|
||||
|
||||
static std::string getName(const EnumDecl *Decl) {
|
||||
if (!Decl->getDeclName())
|
||||
return "<unnamed>";
|
||||
|
||||
return Decl->getQualifiedNameAsString();
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
EnumInitialValueCheck::EnumInitialValueCheck(StringRef Name,
|
||||
ClangTidyContext *Context)
|
||||
: ClangTidyCheck(Name, Context),
|
||||
|
@ -144,6 +144,8 @@ struct CognitiveComplexity final {
|
||||
void account(SourceLocation Loc, unsigned short Nesting, Criteria C);
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
// All the possible messages that can be output. The choice of the message
|
||||
// to use is based of the combination of the CognitiveComplexity::Criteria.
|
||||
// It would be nice to have it in CognitiveComplexity struct, but then it is
|
||||
@ -163,23 +165,27 @@ static const std::array<const StringRef, 4> Msgs = {{
|
||||
}};
|
||||
|
||||
// Criteria is a bitset, thus a few helpers are needed.
|
||||
CognitiveComplexity::Criteria operator|(CognitiveComplexity::Criteria LHS,
|
||||
CognitiveComplexity::Criteria RHS) {
|
||||
static CognitiveComplexity::Criteria
|
||||
operator|(CognitiveComplexity::Criteria LHS,
|
||||
CognitiveComplexity::Criteria RHS) {
|
||||
return static_cast<CognitiveComplexity::Criteria>(llvm::to_underlying(LHS) |
|
||||
llvm::to_underlying(RHS));
|
||||
}
|
||||
CognitiveComplexity::Criteria operator&(CognitiveComplexity::Criteria LHS,
|
||||
CognitiveComplexity::Criteria RHS) {
|
||||
static CognitiveComplexity::Criteria
|
||||
operator&(CognitiveComplexity::Criteria LHS,
|
||||
CognitiveComplexity::Criteria RHS) {
|
||||
return static_cast<CognitiveComplexity::Criteria>(llvm::to_underlying(LHS) &
|
||||
llvm::to_underlying(RHS));
|
||||
}
|
||||
CognitiveComplexity::Criteria &operator|=(CognitiveComplexity::Criteria &LHS,
|
||||
CognitiveComplexity::Criteria RHS) {
|
||||
static CognitiveComplexity::Criteria &
|
||||
operator|=(CognitiveComplexity::Criteria &LHS,
|
||||
CognitiveComplexity::Criteria RHS) {
|
||||
LHS = operator|(LHS, RHS);
|
||||
return LHS;
|
||||
}
|
||||
CognitiveComplexity::Criteria &operator&=(CognitiveComplexity::Criteria &LHS,
|
||||
CognitiveComplexity::Criteria RHS) {
|
||||
static CognitiveComplexity::Criteria &
|
||||
operator&=(CognitiveComplexity::Criteria &LHS,
|
||||
CognitiveComplexity::Criteria RHS) {
|
||||
LHS = operator&(LHS, RHS);
|
||||
return LHS;
|
||||
}
|
||||
@ -199,6 +205,8 @@ void CognitiveComplexity::account(SourceLocation Loc, unsigned short Nesting,
|
||||
Total += Increase;
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
class FunctionASTVisitor final
|
||||
: public RecursiveASTVisitor<FunctionASTVisitor> {
|
||||
using Base = RecursiveASTVisitor<FunctionASTVisitor>;
|
||||
|
@ -41,9 +41,11 @@ AST_MATCHER(Stmt, isNULLMacroExpansion) {
|
||||
return isNULLMacroExpansion(&Node, Finder->getASTContext());
|
||||
}
|
||||
|
||||
StringRef getZeroLiteralToCompareWithForType(CastKind CastExprKind,
|
||||
QualType Type,
|
||||
ASTContext &Context) {
|
||||
} // namespace
|
||||
|
||||
static StringRef getZeroLiteralToCompareWithForType(CastKind CastExprKind,
|
||||
QualType Type,
|
||||
ASTContext &Context) {
|
||||
switch (CastExprKind) {
|
||||
case CK_IntegralToBoolean:
|
||||
return Type->isUnsignedIntegerType() ? "0u" : "0";
|
||||
@ -62,15 +64,15 @@ StringRef getZeroLiteralToCompareWithForType(CastKind CastExprKind,
|
||||
}
|
||||
}
|
||||
|
||||
bool isUnaryLogicalNotOperator(const Stmt *Statement) {
|
||||
static bool isUnaryLogicalNotOperator(const Stmt *Statement) {
|
||||
const auto *UnaryOperatorExpr = dyn_cast<UnaryOperator>(Statement);
|
||||
return UnaryOperatorExpr && UnaryOperatorExpr->getOpcode() == UO_LNot;
|
||||
}
|
||||
|
||||
void fixGenericExprCastToBool(DiagnosticBuilder &Diag,
|
||||
const ImplicitCastExpr *Cast, const Stmt *Parent,
|
||||
ASTContext &Context,
|
||||
bool UseUpperCaseLiteralSuffix) {
|
||||
static void fixGenericExprCastToBool(DiagnosticBuilder &Diag,
|
||||
const ImplicitCastExpr *Cast,
|
||||
const Stmt *Parent, ASTContext &Context,
|
||||
bool UseUpperCaseLiteralSuffix) {
|
||||
// In case of expressions like (! integer), we should remove the redundant not
|
||||
// operator and use inverted comparison (integer == 0).
|
||||
bool InvertComparison =
|
||||
@ -133,8 +135,8 @@ void fixGenericExprCastToBool(DiagnosticBuilder &Diag,
|
||||
Diag << FixItHint::CreateInsertion(EndLoc, EndLocInsertion);
|
||||
}
|
||||
|
||||
StringRef getEquivalentBoolLiteralForExpr(const Expr *Expression,
|
||||
ASTContext &Context) {
|
||||
static StringRef getEquivalentBoolLiteralForExpr(const Expr *Expression,
|
||||
ASTContext &Context) {
|
||||
if (isNULLMacroExpansion(Expression, Context)) {
|
||||
return "false";
|
||||
}
|
||||
@ -161,7 +163,7 @@ StringRef getEquivalentBoolLiteralForExpr(const Expr *Expression,
|
||||
return {};
|
||||
}
|
||||
|
||||
bool needsSpacePrefix(SourceLocation Loc, ASTContext &Context) {
|
||||
static bool needsSpacePrefix(SourceLocation Loc, ASTContext &Context) {
|
||||
SourceRange PrefixRange(Loc.getLocWithOffset(-1), Loc);
|
||||
StringRef SpaceBeforeStmtStr = Lexer::getSourceText(
|
||||
CharSourceRange::getCharRange(PrefixRange), Context.getSourceManager(),
|
||||
@ -173,9 +175,10 @@ bool needsSpacePrefix(SourceLocation Loc, ASTContext &Context) {
|
||||
return !AllowedCharacters.contains(SpaceBeforeStmtStr.back());
|
||||
}
|
||||
|
||||
void fixGenericExprCastFromBool(DiagnosticBuilder &Diag,
|
||||
const ImplicitCastExpr *Cast,
|
||||
ASTContext &Context, StringRef OtherType) {
|
||||
static void fixGenericExprCastFromBool(DiagnosticBuilder &Diag,
|
||||
const ImplicitCastExpr *Cast,
|
||||
ASTContext &Context,
|
||||
StringRef OtherType) {
|
||||
if (!Context.getLangOpts().CPlusPlus) {
|
||||
Diag << FixItHint::CreateInsertion(Cast->getBeginLoc(),
|
||||
(Twine("(") + OtherType + ")").str());
|
||||
@ -200,8 +203,9 @@ void fixGenericExprCastFromBool(DiagnosticBuilder &Diag,
|
||||
}
|
||||
}
|
||||
|
||||
StringRef getEquivalentForBoolLiteral(const CXXBoolLiteralExpr *BoolLiteral,
|
||||
QualType DestType, ASTContext &Context) {
|
||||
static StringRef
|
||||
getEquivalentForBoolLiteral(const CXXBoolLiteralExpr *BoolLiteral,
|
||||
QualType DestType, ASTContext &Context) {
|
||||
// Prior to C++11, false literal could be implicitly converted to pointer.
|
||||
if (!Context.getLangOpts().CPlusPlus11 &&
|
||||
(DestType->isPointerType() || DestType->isMemberPointerType()) &&
|
||||
@ -222,8 +226,8 @@ StringRef getEquivalentForBoolLiteral(const CXXBoolLiteralExpr *BoolLiteral,
|
||||
return BoolLiteral->getValue() ? "1" : "0";
|
||||
}
|
||||
|
||||
bool isCastAllowedInCondition(const ImplicitCastExpr *Cast,
|
||||
ASTContext &Context) {
|
||||
static bool isCastAllowedInCondition(const ImplicitCastExpr *Cast,
|
||||
ASTContext &Context) {
|
||||
std::queue<const Stmt *> Q;
|
||||
Q.push(Cast);
|
||||
|
||||
@ -251,8 +255,6 @@ bool isCastAllowedInCondition(const ImplicitCastExpr *Cast,
|
||||
return false;
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
ImplicitBoolConversionCheck::ImplicitBoolConversionCheck(
|
||||
StringRef Name, ClangTidyContext *Context)
|
||||
: ClangTidyCheck(Name, Context),
|
||||
|
@ -28,8 +28,11 @@ AST_MATCHER_P(QualType, hasUnqualifiedType,
|
||||
|
||||
enum class Qualifier { Const, Volatile, Restrict };
|
||||
|
||||
std::optional<Token> findQualToken(const VarDecl *Decl, Qualifier Qual,
|
||||
const MatchFinder::MatchResult &Result) {
|
||||
} // namespace
|
||||
|
||||
static std::optional<Token>
|
||||
findQualToken(const VarDecl *Decl, Qualifier Qual,
|
||||
const MatchFinder::MatchResult &Result) {
|
||||
// Since either of the locs can be in a macro, use `makeFileCharRange` to be
|
||||
// sure that we have a consistent `CharSourceRange`, located entirely in the
|
||||
// source file.
|
||||
@ -58,7 +61,7 @@ std::optional<Token> findQualToken(const VarDecl *Decl, Qualifier Qual,
|
||||
*Result.SourceManager);
|
||||
}
|
||||
|
||||
std::optional<SourceRange>
|
||||
static std::optional<SourceRange>
|
||||
getTypeSpecifierLocation(const VarDecl *Var,
|
||||
const MatchFinder::MatchResult &Result) {
|
||||
SourceRange TypeSpecifier(
|
||||
@ -73,8 +76,8 @@ getTypeSpecifierLocation(const VarDecl *Var,
|
||||
return TypeSpecifier;
|
||||
}
|
||||
|
||||
std::optional<SourceRange> mergeReplacementRange(SourceRange &TypeSpecifier,
|
||||
const Token &ConstToken) {
|
||||
static std::optional<SourceRange>
|
||||
mergeReplacementRange(SourceRange &TypeSpecifier, const Token &ConstToken) {
|
||||
if (TypeSpecifier.getBegin().getLocWithOffset(-1) == ConstToken.getEndLoc()) {
|
||||
TypeSpecifier.setBegin(ConstToken.getLocation());
|
||||
return std::nullopt;
|
||||
@ -86,21 +89,19 @@ std::optional<SourceRange> mergeReplacementRange(SourceRange &TypeSpecifier,
|
||||
return SourceRange(ConstToken.getLocation(), ConstToken.getEndLoc());
|
||||
}
|
||||
|
||||
bool isPointerConst(QualType QType) {
|
||||
static bool isPointerConst(QualType QType) {
|
||||
QualType Pointee = QType->getPointeeType();
|
||||
assert(!Pointee.isNull() && "can't have a null Pointee");
|
||||
return Pointee.isConstQualified();
|
||||
}
|
||||
|
||||
bool isAutoPointerConst(QualType QType) {
|
||||
static bool isAutoPointerConst(QualType QType) {
|
||||
QualType Pointee =
|
||||
cast<AutoType>(QType->getPointeeType().getTypePtr())->desugar();
|
||||
assert(!Pointee.isNull() && "can't have a null Pointee");
|
||||
return Pointee.isConstQualified();
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
QualifiedAutoCheck::QualifiedAutoCheck(StringRef Name,
|
||||
ClangTidyContext *Context)
|
||||
: ClangTidyCheck(Name, Context),
|
||||
|
@ -14,19 +14,18 @@ using namespace clang::ast_matchers;
|
||||
|
||||
namespace clang::tidy::readability {
|
||||
|
||||
namespace {
|
||||
static const char *const RedundantReturnDiag =
|
||||
"redundant return statement at the end "
|
||||
"of a function with a void return type";
|
||||
static const char *const RedundantContinueDiag =
|
||||
"redundant continue statement at the "
|
||||
"end of loop statement";
|
||||
|
||||
const char *const RedundantReturnDiag = "redundant return statement at the end "
|
||||
"of a function with a void return type";
|
||||
const char *const RedundantContinueDiag = "redundant continue statement at the "
|
||||
"end of loop statement";
|
||||
|
||||
bool isLocationInMacroExpansion(const SourceManager &SM, SourceLocation Loc) {
|
||||
static bool isLocationInMacroExpansion(const SourceManager &SM,
|
||||
SourceLocation Loc) {
|
||||
return SM.isMacroBodyExpansion(Loc) || SM.isMacroArgExpansion(Loc);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void RedundantControlFlowCheck::registerMatchers(MatchFinder *Finder) {
|
||||
Finder->addMatcher(
|
||||
functionDecl(isDefinition(), returns(voidType()),
|
||||
|
@ -13,16 +13,14 @@
|
||||
|
||||
namespace clang::tidy::utils::type_traits {
|
||||
|
||||
namespace {
|
||||
|
||||
bool classHasTrivialCopyAndDestroy(QualType Type) {
|
||||
static bool classHasTrivialCopyAndDestroy(QualType Type) {
|
||||
auto *Record = Type->getAsCXXRecordDecl();
|
||||
return Record && Record->hasDefinition() &&
|
||||
!Record->hasNonTrivialCopyConstructor() &&
|
||||
!Record->hasNonTrivialDestructor();
|
||||
}
|
||||
|
||||
bool hasDeletedCopyConstructor(QualType Type) {
|
||||
static bool hasDeletedCopyConstructor(QualType Type) {
|
||||
auto *Record = Type->getAsCXXRecordDecl();
|
||||
if (!Record || !Record->hasDefinition())
|
||||
return false;
|
||||
@ -33,8 +31,6 @@ bool hasDeletedCopyConstructor(QualType Type) {
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
std::optional<bool> isExpensiveToCopy(QualType Type,
|
||||
const ASTContext &Context) {
|
||||
if (Type->isDependentType() || Type->isIncompleteType())
|
||||
|
@ -309,6 +309,13 @@ NVPTX Support
|
||||
|
||||
X86 Support
|
||||
^^^^^^^^^^^
|
||||
- More SSE, AVX and AVX512 intrinsics, including initializers and general
|
||||
arithmetic can now be used in C++ constant expressions.
|
||||
- Some SSE, AVX and AVX512 intrinsics have been converted to wrap
|
||||
generic __builtin intrinsics.
|
||||
- NOTE: Please avoid use of the __builtin_ia32_* intrinsics - these are not
|
||||
guaranteed to exist in future releases, or match behaviour with previous
|
||||
releases of clang or other compilers.
|
||||
|
||||
Arm and AArch64 Support
|
||||
^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
@ -627,11 +627,23 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
|
||||
let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
|
||||
def pmuldq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">;
|
||||
def pmuludq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">;
|
||||
}
|
||||
|
||||
let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
|
||||
def pmulhuw256 : X86Builtin<"_Vector<16, unsigned short>(_Vector<16, unsigned short>, _Vector<16, unsigned short>)">;
|
||||
def pmulhw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
|
||||
|
||||
def psllv8si : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
|
||||
def psrav8si : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
|
||||
def psrlv8si : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
|
||||
def psllv4di : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>)">;
|
||||
def psrlv4di : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>)">;
|
||||
}
|
||||
|
||||
let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
|
||||
def psllv4si : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
|
||||
def psrav4si : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
|
||||
def psrlv4si : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
|
||||
def psllv2di : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
|
||||
def psrlv2di : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
|
||||
}
|
||||
|
||||
let Features = "avx2", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
|
||||
@ -654,46 +666,6 @@ let Features = "avx2", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
|
||||
def maskstoreq : X86Builtin<"void(_Vector<2, long long int *>, _Vector<2, long long int>, _Vector<2, long long int>)">;
|
||||
}
|
||||
|
||||
let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
|
||||
def psllv8si : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
|
||||
}
|
||||
|
||||
let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
|
||||
def psllv4si : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
|
||||
}
|
||||
|
||||
let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
|
||||
def psllv4di : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>)">;
|
||||
}
|
||||
|
||||
let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
|
||||
def psllv2di : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
|
||||
}
|
||||
|
||||
let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
|
||||
def psrav8si : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
|
||||
}
|
||||
|
||||
let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
|
||||
def psrav4si : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
|
||||
}
|
||||
|
||||
let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
|
||||
def psrlv8si : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
|
||||
}
|
||||
|
||||
let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
|
||||
def psrlv4si : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
|
||||
}
|
||||
|
||||
let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
|
||||
def psrlv4di : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>)">;
|
||||
}
|
||||
|
||||
let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
|
||||
def psrlv2di : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
|
||||
}
|
||||
|
||||
let Features = "avx2", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
|
||||
def gatherd_pd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, double const *, _Vector<4, int>, _Vector<2, double>, _Constant char)">;
|
||||
}
|
||||
|
@ -11669,13 +11669,24 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
|
||||
case clang::X86::BI__builtin_ia32_pmulhuw512:
|
||||
case clang::X86::BI__builtin_ia32_pmulhw128:
|
||||
case clang::X86::BI__builtin_ia32_pmulhw256:
|
||||
case clang::X86::BI__builtin_ia32_pmulhw512: {
|
||||
case clang::X86::BI__builtin_ia32_pmulhw512:
|
||||
case clang::X86::BI__builtin_ia32_psllv2di:
|
||||
case clang::X86::BI__builtin_ia32_psllv4di:
|
||||
case clang::X86::BI__builtin_ia32_psllv4si:
|
||||
case clang::X86::BI__builtin_ia32_psllv8si:
|
||||
case clang::X86::BI__builtin_ia32_psrav4si:
|
||||
case clang::X86::BI__builtin_ia32_psrav8si:
|
||||
case clang::X86::BI__builtin_ia32_psrlv2di:
|
||||
case clang::X86::BI__builtin_ia32_psrlv4di:
|
||||
case clang::X86::BI__builtin_ia32_psrlv4si:
|
||||
case clang::X86::BI__builtin_ia32_psrlv8si:{
|
||||
APValue SourceLHS, SourceRHS;
|
||||
if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
|
||||
!EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
|
||||
return false;
|
||||
|
||||
QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType();
|
||||
bool DestUnsigned = DestEltTy->isUnsignedIntegerOrEnumerationType();
|
||||
unsigned SourceLen = SourceLHS.getVectorLength();
|
||||
SmallVector<APValue, 4> ResultElements;
|
||||
ResultElements.reserve(SourceLen);
|
||||
@ -11687,12 +11698,12 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
|
||||
case Builtin::BI__builtin_elementwise_add_sat:
|
||||
ResultElements.push_back(APValue(
|
||||
APSInt(LHS.isSigned() ? LHS.sadd_sat(RHS) : LHS.uadd_sat(RHS),
|
||||
DestEltTy->isUnsignedIntegerOrEnumerationType())));
|
||||
DestUnsigned)));
|
||||
break;
|
||||
case Builtin::BI__builtin_elementwise_sub_sat:
|
||||
ResultElements.push_back(APValue(
|
||||
APSInt(LHS.isSigned() ? LHS.ssub_sat(RHS) : LHS.usub_sat(RHS),
|
||||
DestEltTy->isUnsignedIntegerOrEnumerationType())));
|
||||
DestUnsigned)));
|
||||
break;
|
||||
case clang::X86::BI__builtin_ia32_pmulhuw128:
|
||||
case clang::X86::BI__builtin_ia32_pmulhuw256:
|
||||
@ -11706,6 +11717,40 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
|
||||
ResultElements.push_back(APValue(APSInt(llvm::APIntOps::mulhs(LHS, RHS),
|
||||
/*isUnsigned=*/false)));
|
||||
break;
|
||||
case clang::X86::BI__builtin_ia32_psllv2di:
|
||||
case clang::X86::BI__builtin_ia32_psllv4di:
|
||||
case clang::X86::BI__builtin_ia32_psllv4si:
|
||||
case clang::X86::BI__builtin_ia32_psllv8si:
|
||||
if (RHS.uge(RHS.getBitWidth())) {
|
||||
ResultElements.push_back(
|
||||
APValue(APSInt(APInt::getZero(RHS.getBitWidth()), DestUnsigned)));
|
||||
break;
|
||||
}
|
||||
ResultElements.push_back(
|
||||
APValue(APSInt(LHS.shl(RHS.getZExtValue()), DestUnsigned)));
|
||||
break;
|
||||
case clang::X86::BI__builtin_ia32_psrav4si:
|
||||
case clang::X86::BI__builtin_ia32_psrav8si:
|
||||
if (RHS.uge(RHS.getBitWidth())) {
|
||||
ResultElements.push_back(
|
||||
APValue(APSInt(LHS.ashr(RHS.getBitWidth() - 1), DestUnsigned)));
|
||||
break;
|
||||
}
|
||||
ResultElements.push_back(
|
||||
APValue(APSInt(LHS.ashr(RHS.getZExtValue()), DestUnsigned)));
|
||||
break;
|
||||
case clang::X86::BI__builtin_ia32_psrlv2di:
|
||||
case clang::X86::BI__builtin_ia32_psrlv4di:
|
||||
case clang::X86::BI__builtin_ia32_psrlv4si:
|
||||
case clang::X86::BI__builtin_ia32_psrlv8si:
|
||||
if (RHS.uge(RHS.getBitWidth())) {
|
||||
ResultElements.push_back(
|
||||
APValue(APSInt(APInt::getZero(RHS.getBitWidth()), DestUnsigned)));
|
||||
break;
|
||||
}
|
||||
ResultElements.push_back(
|
||||
APValue(APSInt(LHS.lshr(RHS.getZExtValue()), DestUnsigned)));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3721,7 +3721,7 @@ _mm_maskstore_epi64(long long *__X, __m128i __M, __m128i __Y)
|
||||
/// A 256-bit vector of [8 x i32] containing the unsigned shift counts (in
|
||||
/// bits).
|
||||
/// \returns A 256-bit vector of [8 x i32] containing the result.
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
|
||||
_mm256_sllv_epi32(__m256i __X, __m256i __Y)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_psllv8si((__v8si)__X, (__v8si)__Y);
|
||||
@ -3743,7 +3743,7 @@ _mm256_sllv_epi32(__m256i __X, __m256i __Y)
|
||||
/// A 128-bit vector of [4 x i32] containing the unsigned shift counts (in
|
||||
/// bits).
|
||||
/// \returns A 128-bit vector of [4 x i32] containing the result.
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
|
||||
_mm_sllv_epi32(__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_psllv4si((__v4si)__X, (__v4si)__Y);
|
||||
@ -3765,7 +3765,7 @@ _mm_sllv_epi32(__m128i __X, __m128i __Y)
|
||||
/// A 256-bit vector of [4 x i64] containing the unsigned shift counts (in
|
||||
/// bits).
|
||||
/// \returns A 256-bit vector of [4 x i64] containing the result.
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
|
||||
_mm256_sllv_epi64(__m256i __X, __m256i __Y)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_psllv4di((__v4di)__X, (__v4di)__Y);
|
||||
@ -3787,7 +3787,7 @@ _mm256_sllv_epi64(__m256i __X, __m256i __Y)
|
||||
/// A 128-bit vector of [2 x i64] containing the unsigned shift counts (in
|
||||
/// bits).
|
||||
/// \returns A 128-bit vector of [2 x i64] containing the result.
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
|
||||
_mm_sllv_epi64(__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_psllv2di((__v2di)__X, (__v2di)__Y);
|
||||
@ -3810,7 +3810,7 @@ _mm_sllv_epi64(__m128i __X, __m128i __Y)
|
||||
/// A 256-bit vector of [8 x i32] containing the unsigned shift counts (in
|
||||
/// bits).
|
||||
/// \returns A 256-bit vector of [8 x i32] containing the result.
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
|
||||
_mm256_srav_epi32(__m256i __X, __m256i __Y)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_psrav8si((__v8si)__X, (__v8si)__Y);
|
||||
@ -3833,7 +3833,7 @@ _mm256_srav_epi32(__m256i __X, __m256i __Y)
|
||||
/// A 128-bit vector of [4 x i32] containing the unsigned shift counts (in
|
||||
/// bits).
|
||||
/// \returns A 128-bit vector of [4 x i32] containing the result.
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
|
||||
_mm_srav_epi32(__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_psrav4si((__v4si)__X, (__v4si)__Y);
|
||||
@ -3855,7 +3855,7 @@ _mm_srav_epi32(__m128i __X, __m128i __Y)
|
||||
/// A 256-bit vector of [8 x i32] containing the unsigned shift counts (in
|
||||
/// bits).
|
||||
/// \returns A 256-bit vector of [8 x i32] containing the result.
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
|
||||
_mm256_srlv_epi32(__m256i __X, __m256i __Y)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_psrlv8si((__v8si)__X, (__v8si)__Y);
|
||||
@ -3877,7 +3877,7 @@ _mm256_srlv_epi32(__m256i __X, __m256i __Y)
|
||||
/// A 128-bit vector of [4 x i32] containing the unsigned shift counts (in
|
||||
/// bits).
|
||||
/// \returns A 128-bit vector of [4 x i32] containing the result.
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
|
||||
_mm_srlv_epi32(__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_psrlv4si((__v4si)__X, (__v4si)__Y);
|
||||
@ -3899,7 +3899,7 @@ _mm_srlv_epi32(__m128i __X, __m128i __Y)
|
||||
/// A 256-bit vector of [4 x i64] containing the unsigned shift counts (in
|
||||
/// bits).
|
||||
/// \returns A 256-bit vector of [4 x i64] containing the result.
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
|
||||
_mm256_srlv_epi64(__m256i __X, __m256i __Y)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_psrlv4di((__v4di)__X, (__v4di)__Y);
|
||||
@ -3921,7 +3921,7 @@ _mm256_srlv_epi64(__m256i __X, __m256i __Y)
|
||||
/// A 128-bit vector of [2 x i64] containing the unsigned shift counts (in
|
||||
/// bits).
|
||||
/// \returns A 128-bit vector of [2 x i64] containing the result.
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
|
||||
_mm_srlv_epi64(__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_psrlv2di((__v2di)__X, (__v2di)__Y);
|
||||
|
@ -327,7 +327,6 @@ __m256i test_mm256_cvtepi8_epi16(__m128i a) {
|
||||
// CHECK: sext <16 x i8> %{{.*}} to <16 x i16>
|
||||
return _mm256_cvtepi8_epi16(a);
|
||||
}
|
||||
|
||||
TEST_CONSTEXPR(match_v16hi(_mm256_cvtepi8_epi16(_mm_setr_epi8(-3, 2, -1, 0, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12)), -3, 2, -1, 0, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12));
|
||||
|
||||
__m256i test_mm256_cvtepi8_epi32(__m128i a) {
|
||||
@ -336,7 +335,6 @@ __m256i test_mm256_cvtepi8_epi32(__m128i a) {
|
||||
// CHECK: sext <8 x i8> %{{.*}} to <8 x i32>
|
||||
return _mm256_cvtepi8_epi32(a);
|
||||
}
|
||||
|
||||
TEST_CONSTEXPR(match_v8si(_mm256_cvtepi8_epi32(_mm_setr_epi8(-3, 2, -1, 0, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12)), -3, 2, -1, 0, 1, -2, 3, -4));
|
||||
|
||||
__m256i test_mm256_cvtepi8_epi64(__m128i a) {
|
||||
@ -345,7 +343,6 @@ __m256i test_mm256_cvtepi8_epi64(__m128i a) {
|
||||
// CHECK: sext <4 x i8> %{{.*}} to <4 x i64>
|
||||
return _mm256_cvtepi8_epi64(a);
|
||||
}
|
||||
|
||||
TEST_CONSTEXPR(match_v4di(_mm256_cvtepi8_epi64(_mm_setr_epi8(-3, 2, -1, 0, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12)), -3, 2, -1, 0));
|
||||
|
||||
__m256i test_mm256_cvtepi16_epi32(__m128i a) {
|
||||
@ -353,7 +350,6 @@ __m256i test_mm256_cvtepi16_epi32(__m128i a) {
|
||||
// CHECK: sext <8 x i16> %{{.*}} to <8 x i32>
|
||||
return _mm256_cvtepi16_epi32(a);
|
||||
}
|
||||
|
||||
TEST_CONSTEXPR(match_v8si(_mm256_cvtepi16_epi32(_mm_setr_epi16(-300, 2, -1, 0, 1, -2, 3, -4)), -300, 2, -1, 0, 1, -2, 3, -4));
|
||||
|
||||
__m256i test_mm256_cvtepi16_epi64(__m128i a) {
|
||||
@ -362,7 +358,6 @@ __m256i test_mm256_cvtepi16_epi64(__m128i a) {
|
||||
// CHECK: sext <4 x i16> %{{.*}} to <4 x i64>
|
||||
return _mm256_cvtepi16_epi64(a);
|
||||
}
|
||||
|
||||
TEST_CONSTEXPR(match_v4di(_mm256_cvtepi16_epi64(_mm_setr_epi16(-300, 2, -1, 0, 1, -2, 3, -4)), -300, 2, -1, 0));
|
||||
|
||||
__m256i test_mm256_cvtepi32_epi64(__m128i a) {
|
||||
@ -370,7 +365,6 @@ __m256i test_mm256_cvtepi32_epi64(__m128i a) {
|
||||
// CHECK: sext <4 x i32> %{{.*}} to <4 x i64>
|
||||
return _mm256_cvtepi32_epi64(a);
|
||||
}
|
||||
|
||||
TEST_CONSTEXPR(match_v4di(_mm256_cvtepi32_epi64(_mm_setr_epi32(-70000, 2, -1, 0)), -70000, 2, -1, 0));
|
||||
|
||||
__m256i test_mm256_cvtepu8_epi16(__m128i a) {
|
||||
@ -378,7 +372,6 @@ __m256i test_mm256_cvtepu8_epi16(__m128i a) {
|
||||
// CHECK: zext <16 x i8> %{{.*}} to <16 x i16>
|
||||
return _mm256_cvtepu8_epi16(a);
|
||||
}
|
||||
|
||||
TEST_CONSTEXPR(match_v16hi(_mm256_cvtepu8_epi16(_mm_setr_epi8(-3, 2, -1, 0, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12)), 253, 2, 255, 0, 1, 254, 3, 252, 5, 250, 7, 248, 9, 246, 11, 244));
|
||||
|
||||
__m256i test_mm256_cvtepu8_epi32(__m128i a) {
|
||||
@ -387,7 +380,6 @@ __m256i test_mm256_cvtepu8_epi32(__m128i a) {
|
||||
// CHECK: zext <8 x i8> %{{.*}} to <8 x i32>
|
||||
return _mm256_cvtepu8_epi32(a);
|
||||
}
|
||||
|
||||
TEST_CONSTEXPR(match_v8si(_mm256_cvtepu8_epi32(_mm_setr_epi8(-3, 2, -1, 0, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12)), 253, 2, 255, 0, 1, 254, 3, 252));
|
||||
|
||||
__m256i test_mm256_cvtepu8_epi64(__m128i a) {
|
||||
@ -396,7 +388,6 @@ __m256i test_mm256_cvtepu8_epi64(__m128i a) {
|
||||
// CHECK: zext <4 x i8> %{{.*}} to <4 x i64>
|
||||
return _mm256_cvtepu8_epi64(a);
|
||||
}
|
||||
|
||||
TEST_CONSTEXPR(match_v4di(_mm256_cvtepu8_epi64(_mm_setr_epi8(-3, 2, -1, 0, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12)), 253, 2, 255, 0));
|
||||
|
||||
__m256i test_mm256_cvtepu16_epi32(__m128i a) {
|
||||
@ -404,7 +395,6 @@ __m256i test_mm256_cvtepu16_epi32(__m128i a) {
|
||||
// CHECK: zext <8 x i16> {{.*}} to <8 x i32>
|
||||
return _mm256_cvtepu16_epi32(a);
|
||||
}
|
||||
|
||||
TEST_CONSTEXPR(match_v8si(_mm256_cvtepu16_epi32(_mm_setr_epi16(-300, 2, -1, 0, 1, -2, 3, -4)), 65236, 2, 65535, 0, 1, 65534, 3, 65532));
|
||||
|
||||
__m256i test_mm256_cvtepu16_epi64(__m128i a) {
|
||||
@ -413,7 +403,6 @@ __m256i test_mm256_cvtepu16_epi64(__m128i a) {
|
||||
// CHECK: zext <4 x i16> %{{.*}} to <4 x i64>
|
||||
return _mm256_cvtepu16_epi64(a);
|
||||
}
|
||||
|
||||
TEST_CONSTEXPR(match_v4di(_mm256_cvtepu16_epi64(_mm_setr_epi16(-300, 2, -1, 0, 1, -2, 3, -4)), 65236, 2, 65535, 0));
|
||||
|
||||
__m256i test_mm256_cvtepu32_epi64(__m128i a) {
|
||||
@ -421,7 +410,6 @@ __m256i test_mm256_cvtepu32_epi64(__m128i a) {
|
||||
// CHECK: zext <4 x i32> %{{.*}} to <4 x i64>
|
||||
return _mm256_cvtepu32_epi64(a);
|
||||
}
|
||||
|
||||
TEST_CONSTEXPR(match_v4di(_mm256_cvtepu32_epi64(_mm_setr_epi32(-70000, 2, -1, 0)), 4294897296, 2, 4294967295, 0));
|
||||
|
||||
__m128i test0_mm256_extracti128_si256_0(__m256i a) {
|
||||
@ -1120,24 +1108,28 @@ __m128i test_mm_sllv_epi32(__m128i a, __m128i b) {
|
||||
// CHECK: call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
|
||||
return _mm_sllv_epi32(a, b);
|
||||
}
|
||||
TEST_CONSTEXPR(match_v4si(_mm_sllv_epi32((__m128i)(__v4si){1, -2, 3, -4}, (__m128i)(__v4si){1, 2, 3, -4}), 2, -8, 24, 0));
|
||||
|
||||
__m256i test_mm256_sllv_epi32(__m256i a, __m256i b) {
|
||||
// CHECK-LABEL: test_mm256_sllv_epi32
|
||||
// CHECK: call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}})
|
||||
return _mm256_sllv_epi32(a, b);
|
||||
}
|
||||
TEST_CONSTEXPR(match_v8si(_mm256_sllv_epi32((__m256i)(__v8si){1, -2, 3, -4, 5, -6, 7, -8}, (__m256i)(__v8si){1, 2, 3, 4, -17, 31, 33, 29}), 2, -8, 24, -64, 0, 0, 0, 0));
|
||||
|
||||
__m128i test_mm_sllv_epi64(__m128i a, __m128i b) {
|
||||
// CHECK-LABEL: test_mm_sllv_epi64
|
||||
// CHECK: call {{.*}}<2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
|
||||
return _mm_sllv_epi64(a, b);
|
||||
}
|
||||
TEST_CONSTEXPR(match_m128i(_mm_sllv_epi64((__m128i)(__v2di){1, -3}, (__m128i)(__v2di){8, 63}), 256, 0x8000000000000000ULL));
|
||||
|
||||
__m256i test_mm256_sllv_epi64(__m256i a, __m256i b) {
|
||||
// CHECK-LABEL: test_mm256_sllv_epi64
|
||||
// CHECK: call {{.*}}<4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}})
|
||||
return _mm256_sllv_epi64(a, b);
|
||||
}
|
||||
TEST_CONSTEXPR(match_m256i(_mm256_sllv_epi64((__m256i)(__v4di){1, -2, 3, -4}, (__m256i)(__v4di){1, 2, 3, -4}), 2, -8, 24, 0));
|
||||
|
||||
__m256i test_mm256_sra_epi16(__m256i a, __m128i b) {
|
||||
// CHECK-LABEL: test_mm256_sra_epi16
|
||||
@ -1180,12 +1172,14 @@ __m128i test_mm_srav_epi32(__m128i a, __m128i b) {
|
||||
// CHECK: call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
|
||||
return _mm_srav_epi32(a, b);
|
||||
}
|
||||
TEST_CONSTEXPR(match_v4si(_mm_srav_epi32((__m128i)(__v4si){1, -2, 3, -4}, (__m128i)(__v4si){1, 2, 3, -4}), 0, -1, 0, -1));
|
||||
|
||||
__m256i test_mm256_srav_epi32(__m256i a, __m256i b) {
|
||||
// CHECK-LABEL: test_mm256_srav_epi32
|
||||
// CHECK: call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}})
|
||||
return _mm256_srav_epi32(a, b);
|
||||
}
|
||||
TEST_CONSTEXPR(match_v8si(_mm256_srav_epi32((__m256i)(__v8si){1, -2, 3, -4, 5, -6, 7, -8}, (__m256i)(__v8si){1, 2, 3, 4, -17, 31, 33, 29}), 0, -1, 0, -1, 0, -1, 0, -1));
|
||||
|
||||
__m256i test_mm256_srl_epi16(__m256i a, __m128i b) {
|
||||
// CHECK-LABEL: test_mm256_srl_epi16
|
||||
@ -1252,24 +1246,28 @@ __m128i test_mm_srlv_epi32(__m128i a, __m128i b) {
|
||||
// CHECK: call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
|
||||
return _mm_srlv_epi32(a, b);
|
||||
}
|
||||
TEST_CONSTEXPR(match_v4si(_mm_srlv_epi32((__m128i)(__v4si){1, -2, 3, -4}, (__m128i)(__v4si){1, 2, 3, -4}), 0, 1073741823, 0, 0));
|
||||
|
||||
__m256i test_mm256_srlv_epi32(__m256i a, __m256i b) {
|
||||
// CHECK-LABEL: test_mm256_srlv_epi32
|
||||
// CHECK: call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}})
|
||||
return _mm256_srlv_epi32(a, b);
|
||||
}
|
||||
TEST_CONSTEXPR(match_v8si(_mm256_srlv_epi32((__m256i)(__v8si){1, -2, 3, -4, 5, -6, 7, -8}, (__m256i)(__v8si){1, 2, 3, 4, -17, 31, 33, 29}), 0, 1073741823, 0, 268435455, 0, 1, 0, 7));
|
||||
|
||||
__m128i test_mm_srlv_epi64(__m128i a, __m128i b) {
|
||||
// CHECK-LABEL: test_mm_srlv_epi64
|
||||
// CHECK: call {{.*}}<2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
|
||||
return _mm_srlv_epi64(a, b);
|
||||
}
|
||||
TEST_CONSTEXPR(match_m128i(_mm_srlv_epi64((__m128i)(__v2di){1, -3}, (__m128i)(__v2di){8, 63}), 0, 1));
|
||||
|
||||
__m256i test_mm256_srlv_epi64(__m256i a, __m256i b) {
|
||||
// CHECK-LABEL: test_mm256_srlv_epi64
|
||||
// CHECK: call {{.*}}<4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}})
|
||||
return _mm256_srlv_epi64(a, b);
|
||||
}
|
||||
TEST_CONSTEXPR(match_m256i(_mm256_srlv_epi64((__m256i)(__v4di){1, -2, 3, -4}, (__m256i)(__v4di){1, 2, 3, -4}), 0, 0x3FFFFFFFFFFFFFFFULL, 0, 0));
|
||||
|
||||
__m256i test_mm256_stream_load_si256(__m256i const *a) {
|
||||
// CHECK-LABEL: test_mm256_stream_load_si256
|
||||
|
@ -106,6 +106,12 @@ if(MSVC)
|
||||
endif()
|
||||
set(ASAN_CFLAGS ${SANITIZER_COMMON_CFLAGS})
|
||||
|
||||
# Win/ASan relies on the runtime functions being hotpatchable. See
|
||||
# https://github.com/llvm/llvm-project/pull/149444
|
||||
if(MSVC)
|
||||
list(APPEND ASAN_CFLAGS /hotpatch)
|
||||
endif()
|
||||
|
||||
append_list_if(MSVC /Zl ASAN_CFLAGS)
|
||||
|
||||
set(ASAN_COMMON_DEFINITIONS "")
|
||||
|
@ -792,7 +792,7 @@ static void PrintNoOriginTrackingWarning() {
|
||||
|
||||
static void PrintNoTaintWarning(const void *address) {
|
||||
Decorator d;
|
||||
Printf(" %sDFSan: no tainted value at %zx%s\n", d.Warning(), (uptr)address,
|
||||
Printf(" %sDFSan: no tainted value at %x%s\n", d.Warning(), address,
|
||||
d.Default());
|
||||
}
|
||||
|
||||
|
@ -176,7 +176,7 @@ static void HwasanFormatMemoryUsage(InternalScopedString &s) {
|
||||
"HWASAN pid: %d rss: %zd threads: %zd stacks: %zd"
|
||||
" thr_aux: %zd stack_depot: %zd uniq_stacks: %zd"
|
||||
" heap: %zd",
|
||||
(int)internal_getpid(), GetRSS(), thread_stats.n_live_threads,
|
||||
internal_getpid(), GetRSS(), thread_stats.n_live_threads,
|
||||
thread_stats.total_stack_size,
|
||||
thread_stats.n_live_threads * thread_list.MemoryUsedPerThread(),
|
||||
sds.allocated, sds.n_uniq_ids, asc[AllocatorStatMapped]);
|
||||
@ -692,7 +692,7 @@ void __hwasan_handle_longjmp(const void *sp_dst) {
|
||||
"WARNING: HWASan is ignoring requested __hwasan_handle_longjmp: "
|
||||
"stack top: %p; target %p; distance: %p (%zd)\n"
|
||||
"False positive error reports may follow\n",
|
||||
(void *)sp, (void *)dst, (void *)(dst - sp), dst - sp);
|
||||
(void *)sp, (void *)dst, dst - sp, dst - sp);
|
||||
return;
|
||||
}
|
||||
TagMemory(sp, dst - sp, 0);
|
||||
|
@ -41,7 +41,7 @@ static inline bool malloc_bisect(StackTrace *stack, uptr orig_size) {
|
||||
if (h < left || h > right)
|
||||
return false;
|
||||
if (flags()->malloc_bisect_dump) {
|
||||
Printf("[alloc] %u %zu\n", (u32)h, orig_size);
|
||||
Printf("[alloc] %u %zu\n", h, orig_size);
|
||||
stack->Print();
|
||||
}
|
||||
return true;
|
||||
|
@ -306,9 +306,8 @@ static void PrintStackAllocations(const StackAllocationsRingBuffer *sa,
|
||||
"%p is located %zd bytes %s a %zd-byte local variable %s "
|
||||
"[%p,%p) "
|
||||
"in %s %s\n",
|
||||
(void *)untagged_addr, offset, whence, local.size, local.name,
|
||||
(void *)best_beg, (void *)(best_beg + local.size),
|
||||
local.function_name, location.data());
|
||||
untagged_addr, offset, whence, local.size, local.name, best_beg,
|
||||
best_beg + local.size, local.function_name, location.data());
|
||||
location.clear();
|
||||
Printf("%s\n", d.Default());
|
||||
}
|
||||
@ -739,8 +738,8 @@ void BaseReport::PrintHeapOrGlobalCandidate() const {
|
||||
Printf("%s", d.Location());
|
||||
Printf("%p is located %zd bytes %s a %zd-byte region [%p,%p)\n",
|
||||
untagged_addr, offset, whence,
|
||||
candidate.heap.end - candidate.heap.begin,
|
||||
(void *)candidate.heap.begin, (void *)candidate.heap.end);
|
||||
candidate.heap.end - candidate.heap.begin, candidate.heap.begin,
|
||||
candidate.heap.end);
|
||||
Printf("%s", d.Allocation());
|
||||
Printf("allocated by thread T%u here:\n", candidate.heap.thread_id);
|
||||
Printf("%s", d.Default());
|
||||
@ -763,11 +762,11 @@ void BaseReport::PrintHeapOrGlobalCandidate() const {
|
||||
Printf(
|
||||
"%p is located %zd bytes %s a %zd-byte global variable "
|
||||
"%s [%p,%p) in %s\n",
|
||||
(void *)untagged_addr,
|
||||
untagged_addr,
|
||||
candidate.after ? untagged_addr - (info.start + info.size)
|
||||
: info.start - untagged_addr,
|
||||
candidate.after ? "after" : "before", info.size, info.name,
|
||||
(void *)info.start, (void *)(info.start + info.size), module_name);
|
||||
info.start, info.start + info.size, module_name);
|
||||
} else {
|
||||
uptr size = GetGlobalSizeFromDescriptor(candidate.untagged_addr);
|
||||
if (size == 0)
|
||||
@ -775,14 +774,14 @@ void BaseReport::PrintHeapOrGlobalCandidate() const {
|
||||
Printf(
|
||||
"%p is located %s a global variable in "
|
||||
"\n #0 0x%x (%s+0x%x)\n",
|
||||
(void *)untagged_addr, candidate.after ? "after" : "before",
|
||||
(void *)candidate.untagged_addr, module_name, (u32)module_address);
|
||||
untagged_addr, candidate.after ? "after" : "before",
|
||||
candidate.untagged_addr, module_name, module_address);
|
||||
else
|
||||
Printf(
|
||||
"%p is located %s a %zd-byte global variable in "
|
||||
"\n #0 0x%x (%s+0x%x)\n",
|
||||
(void *)untagged_addr, candidate.after ? "after" : "before", size,
|
||||
(void *)candidate.untagged_addr, module_name, (u32)module_address);
|
||||
untagged_addr, candidate.after ? "after" : "before", size,
|
||||
candidate.untagged_addr, module_name, module_address);
|
||||
}
|
||||
Printf("%s", d.Default());
|
||||
}
|
||||
@ -793,8 +792,8 @@ void BaseReport::PrintAddressDescription() const {
|
||||
int num_descriptions_printed = 0;
|
||||
|
||||
if (MemIsShadow(untagged_addr)) {
|
||||
Printf("%s%p is HWAsan shadow memory.\n%s", d.Location(),
|
||||
(void *)untagged_addr, d.Default());
|
||||
Printf("%s%p is HWAsan shadow memory.\n%s", d.Location(), untagged_addr,
|
||||
d.Default());
|
||||
return;
|
||||
}
|
||||
|
||||
@ -803,7 +802,7 @@ void BaseReport::PrintAddressDescription() const {
|
||||
Printf(
|
||||
"%s[%p,%p) is a %s %s heap chunk; "
|
||||
"size: %zd offset: %zd\n%s",
|
||||
d.Location(), (void *)heap.begin, (void *)(heap.begin + heap.size),
|
||||
d.Location(), heap.begin, heap.begin + heap.size,
|
||||
heap.from_small_heap ? "small" : "large",
|
||||
heap.is_allocated ? "allocated" : "unallocated", heap.size,
|
||||
untagged_addr - heap.begin, d.Default());
|
||||
@ -822,8 +821,8 @@ void BaseReport::PrintAddressDescription() const {
|
||||
Printf("%s", d.Error());
|
||||
Printf("\nCause: stack tag-mismatch\n");
|
||||
Printf("%s", d.Location());
|
||||
Printf("Address %p is located in stack of thread T%zd\n",
|
||||
(void *)untagged_addr, (ssize)sa.thread_id());
|
||||
Printf("Address %p is located in stack of thread T%zd\n", untagged_addr,
|
||||
sa.thread_id());
|
||||
Printf("%s", d.Default());
|
||||
announce_by_id(sa.thread_id());
|
||||
PrintStackAllocations(sa.get(), ptr_tag, untagged_addr);
|
||||
@ -843,9 +842,9 @@ void BaseReport::PrintAddressDescription() const {
|
||||
Printf("\nCause: use-after-free\n");
|
||||
Printf("%s", d.Location());
|
||||
Printf("%p is located %zd bytes inside a %zd-byte region [%p,%p)\n",
|
||||
(void *)untagged_addr, untagged_addr - UntagAddr(har.tagged_addr),
|
||||
(ssize)har.requested_size, UntagAddr(har.tagged_addr),
|
||||
(void *)(UntagAddr(har.tagged_addr) + har.requested_size));
|
||||
untagged_addr, untagged_addr - UntagAddr(har.tagged_addr),
|
||||
har.requested_size, UntagAddr(har.tagged_addr),
|
||||
UntagAddr(har.tagged_addr) + har.requested_size);
|
||||
Printf("%s", d.Allocation());
|
||||
Printf("freed by thread T%u here:\n", ha.free_thread_id);
|
||||
Printf("%s", d.Default());
|
||||
@ -859,7 +858,7 @@ void BaseReport::PrintAddressDescription() const {
|
||||
// Print a developer note: the index of this heap object
|
||||
// in the thread's deallocation ring buffer.
|
||||
Printf("hwasan_dev_note_heap_rb_distance: %zd %zd\n", ha.ring_index + 1,
|
||||
(ssize)flags()->heap_history_size);
|
||||
flags()->heap_history_size);
|
||||
Printf("hwasan_dev_note_num_matching_addrs: %zd\n", ha.num_matching_addrs);
|
||||
Printf("hwasan_dev_note_num_matching_addrs_4b: %zd\n",
|
||||
ha.num_matching_addrs_4b);
|
||||
@ -916,11 +915,10 @@ InvalidFreeReport::~InvalidFreeReport() {
|
||||
const Thread *thread = GetCurrentThread();
|
||||
if (thread) {
|
||||
Report("ERROR: %s: %s on address %p at pc %p on thread T%zd\n",
|
||||
SanitizerToolName, bug_type, (void *)untagged_addr, (void *)pc,
|
||||
(ssize)thread->unique_id());
|
||||
SanitizerToolName, bug_type, untagged_addr, pc, thread->unique_id());
|
||||
} else {
|
||||
Report("ERROR: %s: %s on address %p at pc %p on unknown thread\n",
|
||||
SanitizerToolName, bug_type, (void *)untagged_addr, (void *)pc);
|
||||
SanitizerToolName, bug_type, untagged_addr, pc);
|
||||
}
|
||||
Printf("%s", d.Access());
|
||||
if (shadow.addr) {
|
||||
@ -969,8 +967,7 @@ TailOverwrittenReport::~TailOverwrittenReport() {
|
||||
Printf("%s", d.Error());
|
||||
const char *bug_type = "allocation-tail-overwritten";
|
||||
Report("ERROR: %s: %s; heap object [%p,%p) of size %zd\n", SanitizerToolName,
|
||||
bug_type, (void *)untagged_addr, (void *)(untagged_addr + orig_size),
|
||||
orig_size);
|
||||
bug_type, untagged_addr, untagged_addr + orig_size, orig_size);
|
||||
Printf("\n%s", d.Default());
|
||||
Printf(
|
||||
"Stack of invalid access unknown. Issue detected at deallocation "
|
||||
@ -1040,7 +1037,7 @@ TagMismatchReport::~TagMismatchReport() {
|
||||
uptr pc = GetTopPc(stack);
|
||||
Printf("%s", d.Error());
|
||||
Report("ERROR: %s: %s on address %p at pc %p\n", SanitizerToolName, bug_type,
|
||||
(void *)untagged_addr, (void *)pc);
|
||||
untagged_addr, pc);
|
||||
|
||||
Thread *t = GetCurrentThread();
|
||||
|
||||
@ -1052,12 +1049,12 @@ TagMismatchReport::~TagMismatchReport() {
|
||||
GetShortTagCopy(MemToShadow(untagged_addr + mismatch_offset));
|
||||
Printf(
|
||||
"%s of size %zu at %p tags: %02x/%02x(%02x) (ptr/mem) in thread T%zd\n",
|
||||
is_store ? "WRITE" : "READ", access_size, (void *)untagged_addr,
|
||||
ptr_tag, mem_tag, short_tag, (ssize)t->unique_id());
|
||||
is_store ? "WRITE" : "READ", access_size, untagged_addr, ptr_tag,
|
||||
mem_tag, short_tag, t->unique_id());
|
||||
} else {
|
||||
Printf("%s of size %zu at %p tags: %02x/%02x (ptr/mem) in thread T%zd\n",
|
||||
is_store ? "WRITE" : "READ", access_size, (void *)untagged_addr,
|
||||
ptr_tag, mem_tag, (ssize)t->unique_id());
|
||||
is_store ? "WRITE" : "READ", access_size, untagged_addr, ptr_tag,
|
||||
mem_tag, t->unique_id());
|
||||
}
|
||||
if (mismatch_offset)
|
||||
Printf("Invalid access starting at offset %zu\n", mismatch_offset);
|
||||
@ -1096,7 +1093,7 @@ void ReportTagMismatch(StackTrace *stack, uptr tagged_addr, uptr access_size,
|
||||
// See the frame breakdown defined in __hwasan_tag_mismatch (from
|
||||
// hwasan_tag_mismatch_{aarch64,riscv64}.S).
|
||||
void ReportRegisters(const uptr *frame, uptr pc) {
|
||||
Printf("\nRegisters where the failure occurred (pc %p):\n", (void *)pc);
|
||||
Printf("\nRegisters where the failure occurred (pc %p):\n", pc);
|
||||
|
||||
// We explicitly print a single line (4 registers/line) each iteration to
|
||||
// reduce the amount of logcat error messages printed. Each Printf() will
|
||||
|
@ -173,10 +173,9 @@ uptr Thread::stack_size() {
|
||||
}
|
||||
|
||||
void Thread::Print(const char *Prefix) {
|
||||
Printf("%sT%zd %p stack: [%p,%p) sz: %zd tls: [%p,%p)\n", Prefix,
|
||||
(ssize_t)unique_id_, (void *)this, (void *)stack_bottom(),
|
||||
(void *)stack_top(), stack_top() - stack_bottom(), (void *)tls_begin(),
|
||||
(void *)tls_end());
|
||||
Printf("%sT%zd %p stack: [%p,%p) sz: %zd tls: [%p,%p)\n", Prefix, unique_id_,
|
||||
(void *)this, stack_bottom(), stack_top(),
|
||||
stack_top() - stack_bottom(), tls_begin(), tls_end());
|
||||
}
|
||||
|
||||
static u32 xorshift(u32 state) {
|
||||
|
@ -806,7 +806,7 @@ static bool ReportUnsuspendedThreads(
|
||||
succeded = false;
|
||||
Report(
|
||||
"Running thread %zu was not suspended. False leaks are possible.\n",
|
||||
(usize)os_id);
|
||||
os_id);
|
||||
}
|
||||
}
|
||||
return succeded;
|
||||
|
@ -29,7 +29,7 @@ static void ProtectGap(uptr addr, uptr size) {
|
||||
Printf("protect_shadow_gap=0:"
|
||||
" not protecting shadow gap, allocating gap's shadow\n"
|
||||
"|| `[%p, %p]` || ShadowGap's shadow ||\n",
|
||||
(void *)GapShadowBeg, (void *)GapShadowEnd);
|
||||
GapShadowBeg, GapShadowEnd);
|
||||
ReserveShadowMemoryRange(GapShadowBeg, GapShadowEnd,
|
||||
"unprotected gap shadow");
|
||||
return;
|
||||
|
@ -105,7 +105,7 @@ __xray_register_sleds(const XRaySledEntry *SledsBegin,
|
||||
}
|
||||
|
||||
if (Verbosity())
|
||||
Report("Registering %d new functions!\n", (int)SledMap.Functions);
|
||||
Report("Registering %d new functions!\n", SledMap.Functions);
|
||||
|
||||
{
|
||||
SpinMutexLock Guard(&XRayInstrMapMutex);
|
||||
|
@ -308,8 +308,7 @@ XRayPatchingStatus controlPatchingObjectUnchecked(bool Enable, int32_t ObjId) {
|
||||
return XRayPatchingStatus::NOT_INITIALIZED;
|
||||
|
||||
if (Verbosity())
|
||||
Report("Patching object %d with %d functions.\n", ObjId,
|
||||
(int)InstrMap.Entries);
|
||||
Report("Patching object %d with %d functions.\n", ObjId, InstrMap.Entries);
|
||||
|
||||
// Check if the corresponding DSO has been unloaded.
|
||||
if (!InstrMap.Loaded) {
|
||||
|
@ -280,6 +280,7 @@ set(TARGET_LIBC_ENTRYPOINTS
|
||||
|
||||
set(TARGET_LIBM_ENTRYPOINTS
|
||||
# math.h entrypoints
|
||||
libc.src.math.acos
|
||||
libc.src.math.acosf
|
||||
libc.src.math.acoshf
|
||||
libc.src.math.asin
|
||||
|
@ -2432,14 +2432,6 @@ functions:
|
||||
return_type: double
|
||||
arguments:
|
||||
- type: double
|
||||
- name: sincosf
|
||||
standards:
|
||||
- gnu
|
||||
return_type: void
|
||||
arguments:
|
||||
- type: float
|
||||
- type: float *
|
||||
- type: float *
|
||||
- name: sinf
|
||||
standards:
|
||||
- stdc
|
||||
@ -2453,6 +2445,22 @@ functions:
|
||||
arguments:
|
||||
- type: _Float16
|
||||
guard: LIBC_TYPES_HAS_FLOAT16
|
||||
- name: sincos
|
||||
standards:
|
||||
- gnu
|
||||
return_type: void
|
||||
arguments:
|
||||
- type: double
|
||||
- type: double *
|
||||
- type: double *
|
||||
- name: sincosf
|
||||
standards:
|
||||
- gnu
|
||||
return_type: void
|
||||
arguments:
|
||||
- type: float
|
||||
- type: float *
|
||||
- type: float *
|
||||
- name: sinhf
|
||||
standards:
|
||||
- stdc
|
||||
|
@ -978,11 +978,11 @@ public:
|
||||
|
||||
# ifndef _LIBCPP_CXX03_LANG
|
||||
|
||||
_LIBCPP_HIDE_FROM_ABI map(map&& __m) noexcept(is_nothrow_move_constructible<__base>::value) = default;
|
||||
_LIBCPP_HIDE_FROM_ABI map(map&& __m) = default;
|
||||
|
||||
_LIBCPP_HIDE_FROM_ABI map(map&& __m, const allocator_type& __a);
|
||||
|
||||
_LIBCPP_HIDE_FROM_ABI map& operator=(map&& __m) noexcept(is_nothrow_move_assignable<__base>::value) = default;
|
||||
_LIBCPP_HIDE_FROM_ABI map& operator=(map&& __m) = default;
|
||||
|
||||
_LIBCPP_HIDE_FROM_ABI map(initializer_list<value_type> __il, const key_compare& __comp = key_compare())
|
||||
: __tree_(__vc(__comp)) {
|
||||
@ -1646,12 +1646,11 @@ public:
|
||||
|
||||
# ifndef _LIBCPP_CXX03_LANG
|
||||
|
||||
_LIBCPP_HIDE_FROM_ABI multimap(multimap&& __m) noexcept(is_nothrow_move_constructible<__base>::value) = default;
|
||||
_LIBCPP_HIDE_FROM_ABI multimap(multimap&& __m) = default;
|
||||
|
||||
_LIBCPP_HIDE_FROM_ABI multimap(multimap&& __m, const allocator_type& __a);
|
||||
|
||||
_LIBCPP_HIDE_FROM_ABI multimap&
|
||||
operator=(multimap&& __m) noexcept(is_nothrow_move_assignable<__base>::value) = default;
|
||||
_LIBCPP_HIDE_FROM_ABI multimap& operator=(multimap&& __m) = default;
|
||||
|
||||
_LIBCPP_HIDE_FROM_ABI multimap(initializer_list<value_type> __il, const key_compare& __comp = key_compare())
|
||||
: __tree_(__vc(__comp)) {
|
||||
|
@ -667,7 +667,7 @@ public:
|
||||
_LIBCPP_HIDE_FROM_ABI set& operator=(const set& __s) = default;
|
||||
|
||||
# ifndef _LIBCPP_CXX03_LANG
|
||||
_LIBCPP_HIDE_FROM_ABI set(set&& __s) noexcept(is_nothrow_move_constructible<__base>::value) = default;
|
||||
_LIBCPP_HIDE_FROM_ABI set(set&& __s) = default;
|
||||
# endif // _LIBCPP_CXX03_LANG
|
||||
|
||||
_LIBCPP_HIDE_FROM_ABI explicit set(const allocator_type& __a) : __tree_(__a) {}
|
||||
@ -699,10 +699,7 @@ public:
|
||||
return *this;
|
||||
}
|
||||
|
||||
_LIBCPP_HIDE_FROM_ABI set& operator=(set&& __s) noexcept(is_nothrow_move_assignable<__base>::value) {
|
||||
__tree_ = std::move(__s.__tree_);
|
||||
return *this;
|
||||
}
|
||||
_LIBCPP_HIDE_FROM_ABI set& operator=(set&& __s) = default;
|
||||
# endif // _LIBCPP_CXX03_LANG
|
||||
|
||||
_LIBCPP_HIDE_FROM_ABI ~set() { static_assert(sizeof(std::__diagnose_non_const_comparator<_Key, _Compare>()), ""); }
|
||||
@ -1126,7 +1123,7 @@ public:
|
||||
_LIBCPP_HIDE_FROM_ABI multiset& operator=(const multiset& __s) = default;
|
||||
|
||||
# ifndef _LIBCPP_CXX03_LANG
|
||||
_LIBCPP_HIDE_FROM_ABI multiset(multiset&& __s) noexcept(is_nothrow_move_constructible<__base>::value) = default;
|
||||
_LIBCPP_HIDE_FROM_ABI multiset(multiset&& __s) = default;
|
||||
|
||||
_LIBCPP_HIDE_FROM_ABI multiset(multiset&& __s, const allocator_type& __a);
|
||||
# endif // _LIBCPP_CXX03_LANG
|
||||
@ -1158,10 +1155,7 @@ public:
|
||||
return *this;
|
||||
}
|
||||
|
||||
_LIBCPP_HIDE_FROM_ABI multiset& operator=(multiset&& __s) _NOEXCEPT_(is_nothrow_move_assignable<__base>::value) {
|
||||
__tree_ = std::move(__s.__tree_);
|
||||
return *this;
|
||||
}
|
||||
_LIBCPP_HIDE_FROM_ABI multiset& operator=(multiset&& __s) = default;
|
||||
# endif // _LIBCPP_CXX03_LANG
|
||||
|
||||
_LIBCPP_HIDE_FROM_ABI ~multiset() {
|
||||
|
@ -1049,8 +1049,7 @@ public:
|
||||
_LIBCPP_HIDE_FROM_ABI unordered_map(const unordered_map& __u) = default;
|
||||
_LIBCPP_HIDE_FROM_ABI unordered_map(const unordered_map& __u, const allocator_type& __a);
|
||||
# ifndef _LIBCPP_CXX03_LANG
|
||||
_LIBCPP_HIDE_FROM_ABI unordered_map(unordered_map&& __u)
|
||||
_NOEXCEPT_(is_nothrow_move_constructible<__table>::value) = default;
|
||||
_LIBCPP_HIDE_FROM_ABI unordered_map(unordered_map&& __u) = default;
|
||||
_LIBCPP_HIDE_FROM_ABI unordered_map(unordered_map&& __u, const allocator_type& __a);
|
||||
_LIBCPP_HIDE_FROM_ABI unordered_map(initializer_list<value_type> __il);
|
||||
_LIBCPP_HIDE_FROM_ABI
|
||||
@ -1102,8 +1101,7 @@ public:
|
||||
|
||||
_LIBCPP_HIDE_FROM_ABI unordered_map& operator=(const unordered_map& __u) = default;
|
||||
# ifndef _LIBCPP_CXX03_LANG
|
||||
_LIBCPP_HIDE_FROM_ABI unordered_map& operator=(unordered_map&& __u)
|
||||
_NOEXCEPT_(is_nothrow_move_assignable<__table>::value) = default;
|
||||
_LIBCPP_HIDE_FROM_ABI unordered_map& operator=(unordered_map&& __u) = default;
|
||||
_LIBCPP_HIDE_FROM_ABI unordered_map& operator=(initializer_list<value_type> __il);
|
||||
# endif // _LIBCPP_CXX03_LANG
|
||||
|
||||
@ -1823,8 +1821,7 @@ public:
|
||||
_LIBCPP_HIDE_FROM_ABI unordered_multimap(const unordered_multimap& __u) = default;
|
||||
_LIBCPP_HIDE_FROM_ABI unordered_multimap(const unordered_multimap& __u, const allocator_type& __a);
|
||||
# ifndef _LIBCPP_CXX03_LANG
|
||||
_LIBCPP_HIDE_FROM_ABI unordered_multimap(unordered_multimap&& __u)
|
||||
_NOEXCEPT_(is_nothrow_move_constructible<__table>::value) = default;
|
||||
_LIBCPP_HIDE_FROM_ABI unordered_multimap(unordered_multimap&& __u) = default;
|
||||
_LIBCPP_HIDE_FROM_ABI unordered_multimap(unordered_multimap&& __u, const allocator_type& __a);
|
||||
_LIBCPP_HIDE_FROM_ABI unordered_multimap(initializer_list<value_type> __il);
|
||||
_LIBCPP_HIDE_FROM_ABI unordered_multimap(
|
||||
@ -1876,8 +1873,7 @@ public:
|
||||
|
||||
_LIBCPP_HIDE_FROM_ABI unordered_multimap& operator=(const unordered_multimap& __u) = default;
|
||||
# ifndef _LIBCPP_CXX03_LANG
|
||||
_LIBCPP_HIDE_FROM_ABI unordered_multimap& operator=(unordered_multimap&& __u)
|
||||
_NOEXCEPT_(is_nothrow_move_assignable<__table>::value) = default;
|
||||
_LIBCPP_HIDE_FROM_ABI unordered_multimap& operator=(unordered_multimap&& __u) = default;
|
||||
_LIBCPP_HIDE_FROM_ABI unordered_multimap& operator=(initializer_list<value_type> __il);
|
||||
# endif // _LIBCPP_CXX03_LANG
|
||||
|
||||
|
@ -706,7 +706,7 @@ public:
|
||||
_LIBCPP_HIDE_FROM_ABI unordered_set(const unordered_set& __u) = default;
|
||||
_LIBCPP_HIDE_FROM_ABI unordered_set(const unordered_set& __u, const allocator_type& __a);
|
||||
# ifndef _LIBCPP_CXX03_LANG
|
||||
_LIBCPP_HIDE_FROM_ABI unordered_set(unordered_set&& __u) _NOEXCEPT_(is_nothrow_move_constructible<__table>::value);
|
||||
_LIBCPP_HIDE_FROM_ABI unordered_set(unordered_set&& __u) = default;
|
||||
_LIBCPP_HIDE_FROM_ABI unordered_set(unordered_set&& __u, const allocator_type& __a);
|
||||
_LIBCPP_HIDE_FROM_ABI unordered_set(initializer_list<value_type> __il);
|
||||
_LIBCPP_HIDE_FROM_ABI
|
||||
@ -735,8 +735,7 @@ public:
|
||||
|
||||
_LIBCPP_HIDE_FROM_ABI unordered_set& operator=(const unordered_set& __u) = default;
|
||||
# ifndef _LIBCPP_CXX03_LANG
|
||||
_LIBCPP_HIDE_FROM_ABI unordered_set& operator=(unordered_set&& __u)
|
||||
_NOEXCEPT_(is_nothrow_move_assignable<__table>::value) = default;
|
||||
_LIBCPP_HIDE_FROM_ABI unordered_set& operator=(unordered_set&& __u) = default;
|
||||
_LIBCPP_HIDE_FROM_ABI unordered_set& operator=(initializer_list<value_type> __il);
|
||||
# endif // _LIBCPP_CXX03_LANG
|
||||
|
||||
@ -1076,11 +1075,6 @@ unordered_set<_Value, _Hash, _Pred, _Alloc>::unordered_set(const unordered_set&
|
||||
|
||||
# ifndef _LIBCPP_CXX03_LANG
|
||||
|
||||
template <class _Value, class _Hash, class _Pred, class _Alloc>
|
||||
inline unordered_set<_Value, _Hash, _Pred, _Alloc>::unordered_set(unordered_set&& __u)
|
||||
_NOEXCEPT_(is_nothrow_move_constructible<__table>::value)
|
||||
: __table_(std::move(__u.__table_)) {}
|
||||
|
||||
template <class _Value, class _Hash, class _Pred, class _Alloc>
|
||||
unordered_set<_Value, _Hash, _Pred, _Alloc>::unordered_set(unordered_set&& __u, const allocator_type& __a)
|
||||
: __table_(std::move(__u.__table_), __a) {
|
||||
@ -1294,8 +1288,7 @@ public:
|
||||
_LIBCPP_HIDE_FROM_ABI unordered_multiset(const unordered_multiset& __u) = default;
|
||||
_LIBCPP_HIDE_FROM_ABI unordered_multiset(const unordered_multiset& __u, const allocator_type& __a);
|
||||
# ifndef _LIBCPP_CXX03_LANG
|
||||
_LIBCPP_HIDE_FROM_ABI unordered_multiset(unordered_multiset&& __u)
|
||||
_NOEXCEPT_(is_nothrow_move_constructible<__table>::value);
|
||||
_LIBCPP_HIDE_FROM_ABI unordered_multiset(unordered_multiset&& __u) = default;
|
||||
_LIBCPP_HIDE_FROM_ABI unordered_multiset(unordered_multiset&& __u, const allocator_type& __a);
|
||||
_LIBCPP_HIDE_FROM_ABI unordered_multiset(initializer_list<value_type> __il);
|
||||
_LIBCPP_HIDE_FROM_ABI unordered_multiset(
|
||||
@ -1324,8 +1317,7 @@ public:
|
||||
|
||||
_LIBCPP_HIDE_FROM_ABI unordered_multiset& operator=(const unordered_multiset& __u) = default;
|
||||
# ifndef _LIBCPP_CXX03_LANG
|
||||
_LIBCPP_HIDE_FROM_ABI unordered_multiset& operator=(unordered_multiset&& __u)
|
||||
_NOEXCEPT_(is_nothrow_move_assignable<__table>::value) = default;
|
||||
_LIBCPP_HIDE_FROM_ABI unordered_multiset& operator=(unordered_multiset&& __u) = default;
|
||||
_LIBCPP_HIDE_FROM_ABI unordered_multiset& operator=(initializer_list<value_type> __il);
|
||||
# endif // _LIBCPP_CXX03_LANG
|
||||
|
||||
@ -1675,11 +1667,6 @@ unordered_multiset<_Value, _Hash, _Pred, _Alloc>::unordered_multiset(
|
||||
|
||||
# ifndef _LIBCPP_CXX03_LANG
|
||||
|
||||
template <class _Value, class _Hash, class _Pred, class _Alloc>
|
||||
inline unordered_multiset<_Value, _Hash, _Pred, _Alloc>::unordered_multiset(unordered_multiset&& __u)
|
||||
_NOEXCEPT_(is_nothrow_move_constructible<__table>::value)
|
||||
: __table_(std::move(__u.__table_)) {}
|
||||
|
||||
template <class _Value, class _Hash, class _Pred, class _Alloc>
|
||||
unordered_multiset<_Value, _Hash, _Pred, _Alloc>::unordered_multiset(
|
||||
unordered_multiset&& __u, const allocator_type& __a)
|
||||
|
@ -1399,6 +1399,7 @@ void SymbolTable::resolveAlternateNames() {
|
||||
auto toUndef = dyn_cast<Undefined>(toSym);
|
||||
if (toUndef && (!toUndef->weakAlias || toUndef->isAntiDep))
|
||||
continue;
|
||||
toSym->isUsedInRegularObj = true;
|
||||
if (toSym->isLazy())
|
||||
forceLazy(toSym);
|
||||
u->setWeakAlias(toSym);
|
||||
|
25
lld/test/COFF/alternatename-lto.ll
Normal file
25
lld/test/COFF/alternatename-lto.ll
Normal file
@ -0,0 +1,25 @@
|
||||
; REQUIRES: x86
|
||||
; RUN: mkdir -p %t.dir
|
||||
; RUN: llvm-as -o %t.obj %s
|
||||
; RUN: lld-link -out:%t.dll -dll -noentry %t.obj -export:test
|
||||
|
||||
target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-windows-msvc19.33.0"
|
||||
|
||||
$alt = comdat any
|
||||
|
||||
@alt = weak_odr dso_local global i32 0, comdat, align 4
|
||||
@ext = external dso_local global i32, align 4
|
||||
|
||||
; Function Attrs: noinline nounwind optnone uwtable
|
||||
define dso_local i32 @test() #0 {
|
||||
entry:
|
||||
%0 = load i32, ptr @ext, align 4
|
||||
ret i32 %0
|
||||
}
|
||||
|
||||
attributes #0 = { noinline nounwind optnone uwtable }
|
||||
|
||||
!llvm.linker.options = !{!0}
|
||||
|
||||
!0 = !{!"/alternatename:ext=alt"}
|
@ -347,6 +347,9 @@ Status Value::GetValueAsData(ExecutionContext *exe_ctx, DataExtractor &data,
|
||||
else
|
||||
data.SetAddressByteSize(sizeof(void *));
|
||||
|
||||
if (!type_size)
|
||||
return Status::FromErrorString("type does not have a size");
|
||||
|
||||
uint32_t result_byte_size = *type_size;
|
||||
if (m_value.GetData(data, result_byte_size))
|
||||
return error; // Success;
|
||||
|
@ -15,6 +15,7 @@ add_lldb_unittest(LLDBCoreTests
|
||||
SourceManagerTest.cpp
|
||||
TelemetryTest.cpp
|
||||
UniqueCStringMapTest.cpp
|
||||
Value.cpp
|
||||
|
||||
LINK_COMPONENTS
|
||||
Support
|
||||
|
39
lldb/unittests/Core/Value.cpp
Normal file
39
lldb/unittests/Core/Value.cpp
Normal file
@ -0,0 +1,39 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "lldb/Core/Value.h"
|
||||
#include "Plugins/Platform/MacOSX/PlatformMacOSX.h"
|
||||
#include "Plugins/TypeSystem/Clang/TypeSystemClang.h"
|
||||
#include "TestingSupport/SubsystemRAII.h"
|
||||
#include "TestingSupport/Symbol/ClangTestUtils.h"
|
||||
|
||||
#include "lldb/Utility/DataExtractor.h"
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
using namespace lldb_private;
|
||||
using namespace lldb_private::clang_utils;
|
||||
|
||||
TEST(ValueTest, GetValueAsData) {
|
||||
SubsystemRAII<FileSystem, HostInfo, PlatformMacOSX> subsystems;
|
||||
auto holder = std::make_unique<clang_utils::TypeSystemClangHolder>("test");
|
||||
auto *clang = holder->GetAST();
|
||||
|
||||
Value v(Scalar(42));
|
||||
DataExtractor extractor;
|
||||
|
||||
// no compiler type
|
||||
Status status = v.GetValueAsData(nullptr, extractor, nullptr);
|
||||
ASSERT_TRUE(status.Fail());
|
||||
|
||||
// with compiler type
|
||||
v.SetCompilerType(clang->GetBasicType(lldb::BasicType::eBasicTypeChar));
|
||||
|
||||
status = v.GetValueAsData(nullptr, extractor, nullptr);
|
||||
ASSERT_TRUE(status.Success());
|
||||
}
|
@ -18983,7 +18983,9 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
|
||||
// single-step fp_round we want to fold to.
|
||||
// In other words, double rounding isn't the same as rounding.
|
||||
// Also, this is a value preserving truncation iff both fp_round's are.
|
||||
if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc)
|
||||
if ((N->getFlags().hasAllowContract() &&
|
||||
N0->getFlags().hasAllowContract()) ||
|
||||
N0IsTrunc)
|
||||
return DAG.getNode(
|
||||
ISD::FP_ROUND, DL, VT, N0.getOperand(0),
|
||||
DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL, /*isTarget=*/true));
|
||||
|
@ -18,6 +18,7 @@
|
||||
#include "llvm/Analysis/AssumptionCache.h"
|
||||
#include "llvm/Analysis/ConstantFolding.h"
|
||||
#include "llvm/Analysis/TargetLibraryInfo.h"
|
||||
#include "llvm/Analysis/TargetTransformInfo.h"
|
||||
#include "llvm/Analysis/UniformityAnalysis.h"
|
||||
#include "llvm/Analysis/ValueTracking.h"
|
||||
#include "llvm/CodeGen/TargetPassConfig.h"
|
||||
@ -26,6 +27,7 @@
|
||||
#include "llvm/IR/InstVisitor.h"
|
||||
#include "llvm/IR/IntrinsicsAMDGPU.h"
|
||||
#include "llvm/IR/PatternMatch.h"
|
||||
#include "llvm/InitializePasses.h"
|
||||
#include "llvm/Pass.h"
|
||||
#include "llvm/Support/KnownBits.h"
|
||||
#include "llvm/Support/KnownFPClass.h"
|
||||
|
@ -27,6 +27,7 @@
|
||||
#include "llvm/CodeGen/SelectionDAGISel.h"
|
||||
#include "llvm/CodeGen/SelectionDAGNodes.h"
|
||||
#include "llvm/IR/IntrinsicsAMDGPU.h"
|
||||
#include "llvm/InitializePasses.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
|
||||
#ifdef EXPENSIVE_CHECKS
|
||||
|
@ -6135,6 +6135,19 @@ unsigned AMDGPUTargetLowering::computeNumSignBitsForTargetInstr(
|
||||
}
|
||||
}
|
||||
|
||||
bool AMDGPUTargetLowering::canCreateUndefOrPoisonForTargetNode(
|
||||
SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
|
||||
bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
|
||||
unsigned Opcode = Op.getOpcode();
|
||||
switch (Opcode) {
|
||||
case AMDGPUISD::BFE_I32:
|
||||
case AMDGPUISD::BFE_U32:
|
||||
return false;
|
||||
}
|
||||
return TargetLowering::canCreateUndefOrPoisonForTargetNode(
|
||||
Op, DemandedElts, DAG, PoisonOnly, ConsiderFlags, Depth);
|
||||
}
|
||||
|
||||
bool AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(
|
||||
SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool SNaN,
|
||||
unsigned Depth) const {
|
||||
|
@ -323,6 +323,12 @@ public:
|
||||
const MachineRegisterInfo &MRI,
|
||||
unsigned Depth = 0) const override;
|
||||
|
||||
bool canCreateUndefOrPoisonForTargetNode(SDValue Op,
|
||||
const APInt &DemandedElts,
|
||||
const SelectionDAG &DAG,
|
||||
bool PoisonOnly, bool ConsiderFlags,
|
||||
unsigned Depth) const override;
|
||||
|
||||
bool isKnownNeverNaNForTargetNode(SDValue Op, const APInt &DemandedElts,
|
||||
const SelectionDAG &DAG, bool SNaN = false,
|
||||
unsigned Depth = 0) const override;
|
||||
|
@ -21,6 +21,7 @@
|
||||
#include "llvm/IR/IRBuilder.h"
|
||||
#include "llvm/IR/InstVisitor.h"
|
||||
#include "llvm/IR/IntrinsicsAMDGPU.h"
|
||||
#include "llvm/InitializePasses.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/KnownBits.h"
|
||||
#include "llvm/Transforms/Utils/Local.h"
|
||||
|
@ -225,6 +225,7 @@
|
||||
#include "llvm/ADT/SetOperations.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/Analysis/InstSimplifyFolder.h"
|
||||
#include "llvm/Analysis/TargetTransformInfo.h"
|
||||
#include "llvm/Analysis/Utils/Local.h"
|
||||
#include "llvm/CodeGen/TargetPassConfig.h"
|
||||
#include "llvm/IR/AttributeMask.h"
|
||||
@ -243,6 +244,7 @@
|
||||
#include "llvm/IR/PatternMatch.h"
|
||||
#include "llvm/IR/ReplaceConstant.h"
|
||||
#include "llvm/IR/ValueHandle.h"
|
||||
#include "llvm/InitializePasses.h"
|
||||
#include "llvm/Pass.h"
|
||||
#include "llvm/Support/AMDGPUAddrSpace.h"
|
||||
#include "llvm/Support/Alignment.h"
|
||||
|
@ -190,12 +190,14 @@
|
||||
#include "llvm/CodeGen/TargetPassConfig.h"
|
||||
#include "llvm/IR/Constants.h"
|
||||
#include "llvm/IR/DerivedTypes.h"
|
||||
#include "llvm/IR/Dominators.h"
|
||||
#include "llvm/IR/IRBuilder.h"
|
||||
#include "llvm/IR/InlineAsm.h"
|
||||
#include "llvm/IR/Instructions.h"
|
||||
#include "llvm/IR/IntrinsicsAMDGPU.h"
|
||||
#include "llvm/IR/MDBuilder.h"
|
||||
#include "llvm/IR/ReplaceConstant.h"
|
||||
#include "llvm/InitializePasses.h"
|
||||
#include "llvm/Pass.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
|
@ -22,7 +22,6 @@
|
||||
namespace llvm {
|
||||
class AsmPrinter;
|
||||
class MCContext;
|
||||
} // namespace llvm
|
||||
|
||||
class AMDGPUMCInstLower {
|
||||
MCContext &Ctx;
|
||||
@ -66,4 +65,5 @@ static inline const MCExpr *lowerAddrSpaceCast(const TargetMachine &TM,
|
||||
return nullptr;
|
||||
}
|
||||
} // namespace
|
||||
} // namespace llvm
|
||||
#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUMCINSTLOWER_H
|
||||
|
@ -90,6 +90,7 @@
|
||||
#include "llvm/IR/PatternMatch.h"
|
||||
#include "llvm/InitializePasses.h"
|
||||
#include "llvm/MC/TargetRegistry.h"
|
||||
#include "llvm/Passes/CodeGenPassBuilder.h"
|
||||
#include "llvm/Passes/PassBuilder.h"
|
||||
#include "llvm/Support/Compiler.h"
|
||||
#include "llvm/Support/FormatVariadic.h"
|
||||
@ -125,6 +126,44 @@ using namespace llvm;
|
||||
using namespace llvm::PatternMatch;
|
||||
|
||||
namespace {
|
||||
//===----------------------------------------------------------------------===//
|
||||
// AMDGPU CodeGen Pass Builder interface.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class AMDGPUCodeGenPassBuilder
|
||||
: public CodeGenPassBuilder<AMDGPUCodeGenPassBuilder, GCNTargetMachine> {
|
||||
using Base = CodeGenPassBuilder<AMDGPUCodeGenPassBuilder, GCNTargetMachine>;
|
||||
|
||||
public:
|
||||
AMDGPUCodeGenPassBuilder(GCNTargetMachine &TM,
|
||||
const CGPassBuilderOption &Opts,
|
||||
PassInstrumentationCallbacks *PIC);
|
||||
|
||||
void addIRPasses(AddIRPass &) const;
|
||||
void addCodeGenPrepare(AddIRPass &) const;
|
||||
void addPreISel(AddIRPass &addPass) const;
|
||||
void addILPOpts(AddMachinePass &) const;
|
||||
void addAsmPrinter(AddMachinePass &, CreateMCStreamer) const;
|
||||
Error addInstSelector(AddMachinePass &) const;
|
||||
void addPreRewrite(AddMachinePass &) const;
|
||||
void addMachineSSAOptimization(AddMachinePass &) const;
|
||||
void addPostRegAlloc(AddMachinePass &) const;
|
||||
void addPreEmitPass(AddMachinePass &) const;
|
||||
void addPreEmitRegAlloc(AddMachinePass &) const;
|
||||
Error addRegAssignmentOptimized(AddMachinePass &) const;
|
||||
void addPreRegAlloc(AddMachinePass &) const;
|
||||
void addOptimizedRegAlloc(AddMachinePass &) const;
|
||||
void addPreSched2(AddMachinePass &) const;
|
||||
|
||||
/// Check if a pass is enabled given \p Opt option. The option always
|
||||
/// overrides defaults if explicitly used. Otherwise its default will be used
|
||||
/// given that a pass shall work at an optimization \p Level minimum.
|
||||
bool isPassEnabled(const cl::opt<bool> &Opt,
|
||||
CodeGenOptLevel Level = CodeGenOptLevel::Default) const;
|
||||
void addEarlyCSEOrGVNPass(AddIRPass &) const;
|
||||
void addStraightLineScalarOptimizationPasses(AddIRPass &) const;
|
||||
};
|
||||
|
||||
class SGPRRegisterRegAlloc : public RegisterRegAllocBase<SGPRRegisterRegAlloc> {
|
||||
public:
|
||||
SGPRRegisterRegAlloc(const char *N, const char *D, FunctionPassCtor C)
|
||||
|
@ -18,7 +18,6 @@
|
||||
#include "llvm/CodeGen/CodeGenTargetMachineImpl.h"
|
||||
#include "llvm/CodeGen/TargetPassConfig.h"
|
||||
#include "llvm/MC/MCStreamer.h"
|
||||
#include "llvm/Passes/CodeGenPassBuilder.h"
|
||||
#include <optional>
|
||||
#include <utility>
|
||||
|
||||
@ -158,44 +157,6 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// AMDGPU CodeGen Pass Builder interface.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class AMDGPUCodeGenPassBuilder
|
||||
: public CodeGenPassBuilder<AMDGPUCodeGenPassBuilder, GCNTargetMachine> {
|
||||
using Base = CodeGenPassBuilder<AMDGPUCodeGenPassBuilder, GCNTargetMachine>;
|
||||
|
||||
public:
|
||||
AMDGPUCodeGenPassBuilder(GCNTargetMachine &TM,
|
||||
const CGPassBuilderOption &Opts,
|
||||
PassInstrumentationCallbacks *PIC);
|
||||
|
||||
void addIRPasses(AddIRPass &) const;
|
||||
void addCodeGenPrepare(AddIRPass &) const;
|
||||
void addPreISel(AddIRPass &addPass) const;
|
||||
void addILPOpts(AddMachinePass &) const;
|
||||
void addAsmPrinter(AddMachinePass &, CreateMCStreamer) const;
|
||||
Error addInstSelector(AddMachinePass &) const;
|
||||
void addPreRewrite(AddMachinePass &) const;
|
||||
void addMachineSSAOptimization(AddMachinePass &) const;
|
||||
void addPostRegAlloc(AddMachinePass &) const;
|
||||
void addPreEmitPass(AddMachinePass &) const;
|
||||
void addPreEmitRegAlloc(AddMachinePass &) const;
|
||||
Error addRegAssignmentOptimized(AddMachinePass &) const;
|
||||
void addPreRegAlloc(AddMachinePass &) const;
|
||||
void addOptimizedRegAlloc(AddMachinePass &) const;
|
||||
void addPreSched2(AddMachinePass &) const;
|
||||
|
||||
/// Check if a pass is enabled given \p Opt option. The option always
|
||||
/// overrides defaults if explicitly used. Otherwise its default will be used
|
||||
/// given that a pass shall work at an optimization \p Level minimum.
|
||||
bool isPassEnabled(const cl::opt<bool> &Opt,
|
||||
CodeGenOptLevel Level = CodeGenOptLevel::Default) const;
|
||||
void addEarlyCSEOrGVNPass(AddIRPass &) const;
|
||||
void addStraightLineScalarOptimizationPasses(AddIRPass &) const;
|
||||
};
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETMACHINE_H
|
||||
|
@ -21,6 +21,7 @@
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/IR/IntrinsicsAMDGPU.h"
|
||||
#include "llvm/IR/IntrinsicsR600.h"
|
||||
#include "llvm/Passes/CodeGenPassBuilder.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
|
@ -20,6 +20,8 @@
|
||||
#include "llvm/MC/MCContext.h"
|
||||
#include "llvm/MC/MCExpr.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
namespace {
|
||||
class R600MCInstLower : public AMDGPUMCInstLower {
|
||||
public:
|
||||
|
@ -19,6 +19,7 @@
|
||||
#include "R600MachineFunctionInfo.h"
|
||||
#include "R600MachineScheduler.h"
|
||||
#include "R600TargetTransformInfo.h"
|
||||
#include "llvm/Passes/CodeGenPassBuilder.h"
|
||||
#include "llvm/Transforms/Scalar.h"
|
||||
#include <optional>
|
||||
|
||||
@ -46,6 +47,21 @@ static MachineSchedRegistry R600SchedRegistry("r600",
|
||||
"Run R600's custom scheduler",
|
||||
createR600MachineScheduler);
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// R600 CodeGen Pass Builder interface.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class R600CodeGenPassBuilder
|
||||
: public CodeGenPassBuilder<R600CodeGenPassBuilder, R600TargetMachine> {
|
||||
public:
|
||||
R600CodeGenPassBuilder(R600TargetMachine &TM, const CGPassBuilderOption &Opts,
|
||||
PassInstrumentationCallbacks *PIC);
|
||||
|
||||
void addPreISel(AddIRPass &addPass) const;
|
||||
void addAsmPrinter(AddMachinePass &, CreateMCStreamer) const;
|
||||
Error addInstSelector(AddMachinePass &) const;
|
||||
};
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// R600 Target Machine (R600 -> Cayman)
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -57,21 +57,6 @@ public:
|
||||
createMachineScheduler(MachineSchedContext *C) const override;
|
||||
};
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// R600 CodeGen Pass Builder interface.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class R600CodeGenPassBuilder
|
||||
: public CodeGenPassBuilder<R600CodeGenPassBuilder, R600TargetMachine> {
|
||||
public:
|
||||
R600CodeGenPassBuilder(R600TargetMachine &TM, const CGPassBuilderOption &Opts,
|
||||
PassInstrumentationCallbacks *PIC);
|
||||
|
||||
void addPreISel(AddIRPass &addPass) const;
|
||||
void addAsmPrinter(AddMachinePass &, CreateMCStreamer) const;
|
||||
Error addInstSelector(AddMachinePass &) const;
|
||||
};
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
#endif // LLVM_LIB_TARGET_AMDGPU_R600TARGETMACHINE_H
|
||||
|
@ -16,12 +16,14 @@
|
||||
#include "GCNSubtarget.h"
|
||||
#include "llvm/Analysis/LoopInfo.h"
|
||||
#include "llvm/Analysis/UniformityAnalysis.h"
|
||||
#include "llvm/CodeGen/MachineDominators.h"
|
||||
#include "llvm/CodeGen/TargetPassConfig.h"
|
||||
#include "llvm/IR/BasicBlock.h"
|
||||
#include "llvm/IR/Constants.h"
|
||||
#include "llvm/IR/Dominators.h"
|
||||
#include "llvm/IR/IRBuilder.h"
|
||||
#include "llvm/IR/IntrinsicsAMDGPU.h"
|
||||
#include "llvm/InitializePasses.h"
|
||||
#include "llvm/Target/TargetMachine.h"
|
||||
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
|
||||
#include "llvm/Transforms/Utils/Local.h"
|
||||
|
@ -32,11 +32,15 @@
|
||||
#include "llvm/ADT/PostOrderIterator.h"
|
||||
#include "llvm/ADT/Sequence.h"
|
||||
#include "llvm/Analysis/AliasAnalysis.h"
|
||||
#include "llvm/CodeGen/MachineFrameInfo.h"
|
||||
#include "llvm/CodeGen/MachineLoopInfo.h"
|
||||
#include "llvm/CodeGen/MachinePassManager.h"
|
||||
#include "llvm/CodeGen/MachinePostDominators.h"
|
||||
#include "llvm/IR/Dominators.h"
|
||||
#include "llvm/InitializePasses.h"
|
||||
#include "llvm/Support/DebugCounter.h"
|
||||
#include "llvm/TargetParser/TargetParser.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
#define DEBUG_TYPE "si-insert-waitcnts"
|
||||
|
@ -17,6 +17,7 @@
|
||||
#include "SIMachineFunctionInfo.h"
|
||||
#include "llvm/CodeGen/MachineDominators.h"
|
||||
#include "llvm/CodeGen/MachinePassManager.h"
|
||||
#include "llvm/InitializePasses.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
|
@ -24,6 +24,7 @@
|
||||
#include "llvm/CodeGen/MachineDominators.h"
|
||||
#include "llvm/CodeGen/MachineFrameInfo.h"
|
||||
#include "llvm/CodeGen/RegisterScavenging.h"
|
||||
#include "llvm/InitializePasses.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
|
@ -80,6 +80,7 @@
|
||||
#include "llvm/CodeGen/MachineDominators.h"
|
||||
#include "llvm/CodeGen/MachineLoopInfo.h"
|
||||
#include "llvm/CodeGen/TargetRegisterInfo.h"
|
||||
#include "llvm/IR/Dominators.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
|
@ -23,6 +23,7 @@
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/RegisterClassInfo.h"
|
||||
#include "llvm/CodeGen/VirtRegMap.h"
|
||||
#include "llvm/InitializePasses.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
|
@ -2073,15 +2073,23 @@ Instruction *InstCombinerImpl::visitIntToPtr(IntToPtrInst &CI) {
|
||||
}
|
||||
|
||||
// Replace (inttoptr (add (ptrtoint %Base), %Offset)) with
|
||||
// (getelementptr i8, %Base, %Offset) if all users are ICmps.
|
||||
// (getelementptr i8, %Base, %Offset) if the pointer is only used as integer
|
||||
// value.
|
||||
Value *Base;
|
||||
Value *Offset;
|
||||
auto UsesPointerAsInt = [](User *U) {
|
||||
if (isa<ICmpInst, PtrToIntInst>(U))
|
||||
return true;
|
||||
if (auto *P = dyn_cast<PHINode>(U))
|
||||
return P->hasOneUse() && isa<ICmpInst, PtrToIntInst>(*P->user_begin());
|
||||
return false;
|
||||
};
|
||||
if (match(CI.getOperand(0),
|
||||
m_OneUse(m_c_Add(m_PtrToIntSameSize(DL, m_Value(Base)),
|
||||
m_Value(Offset)))) &&
|
||||
CI.getType()->getPointerAddressSpace() ==
|
||||
Base->getType()->getPointerAddressSpace() &&
|
||||
all_of(CI.users(), IsaPred<ICmpInst>)) {
|
||||
all_of(CI.users(), UsesPointerAsInt)) {
|
||||
return GetElementPtrInst::Create(Builder.getInt8Ty(), Base, Offset);
|
||||
}
|
||||
|
||||
|
@ -642,6 +642,13 @@ Value *ConstantOffsetExtractor::applyExts(Value *V) {
|
||||
|
||||
Instruction *Ext = I->clone();
|
||||
Ext->setOperand(0, Current);
|
||||
// In ConstantOffsetExtractor::find we do not analyze nuw/nsw for trunc, so
|
||||
// we assume that it is ok to redistribute trunc over add/sub/or. But for
|
||||
// example (add (trunc nuw A), (trunc nuw B)) is more poisonous than (trunc
|
||||
// nuw (add A, B))). To make such redistributions legal we drop all the
|
||||
// poison generating flags from cloned trunc instructions here.
|
||||
if (isa<TruncInst>(Ext))
|
||||
Ext->dropPoisonGeneratingFlags();
|
||||
Ext->insertBefore(*IP->getParent(), IP);
|
||||
Current = Ext;
|
||||
}
|
||||
|
@ -153,11 +153,7 @@ template <typename LTy, typename RTy> struct match_combine_or {
|
||||
match_combine_or(const LTy &Left, const RTy &Right) : L(Left), R(Right) {}
|
||||
|
||||
template <typename ITy> bool match(ITy *V) const {
|
||||
if (L.match(V))
|
||||
return true;
|
||||
if (R.match(V))
|
||||
return true;
|
||||
return false;
|
||||
return L.match(V) || R.match(V);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -344,7 +344,7 @@ define amdgpu_ps void @fptrunc_f32_to_bf16(float %a, ptr %out) {
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
|
||||
; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
|
||||
; GFX1250-NEXT: flat_store_b16 v[2:3], v0
|
||||
; GFX1250-NEXT: flat_store_b16 v[2:3], v0 scope:SCOPE_SE
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
%a.cvt = fptrunc float %a to bfloat
|
||||
@ -380,7 +380,7 @@ define amdgpu_ps void @fptrunc_f32_to_bf16_abs(float %a, ptr %out) {
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
|
||||
; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, |v0|, s0
|
||||
; GFX1250-NEXT: flat_store_b16 v[2:3], v0
|
||||
; GFX1250-NEXT: flat_store_b16 v[2:3], v0 scope:SCOPE_SE
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
%a.abs = call float @llvm.fabs.f32(float %a)
|
||||
@ -417,7 +417,7 @@ define amdgpu_ps void @fptrunc_f32_to_bf16_neg(float %a, ptr %out) {
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
|
||||
; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, -v0, s0
|
||||
; GFX1250-NEXT: flat_store_b16 v[2:3], v0
|
||||
; GFX1250-NEXT: flat_store_b16 v[2:3], v0 scope:SCOPE_SE
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
%a.neg = fneg float %a
|
||||
@ -480,7 +480,7 @@ define amdgpu_ps void @fptrunc_f64_to_bf16(double %a, ptr %out) {
|
||||
; GFX1250-NEXT: s_or_b32 vcc_lo, vcc_lo, s0
|
||||
; GFX1250-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo
|
||||
; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
|
||||
; GFX1250-NEXT: flat_store_b16 v[2:3], v0
|
||||
; GFX1250-NEXT: flat_store_b16 v[2:3], v0 scope:SCOPE_SE
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
%a.cvt = fptrunc double %a to bfloat
|
||||
@ -543,7 +543,7 @@ define amdgpu_ps void @fptrunc_f64_to_bf16_neg(double %a, ptr %out) {
|
||||
; GFX1250-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
|
||||
; GFX1250-NEXT: flat_store_b16 v[2:3], v0
|
||||
; GFX1250-NEXT: flat_store_b16 v[2:3], v0 scope:SCOPE_SE
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
%a.neg = fneg double %a
|
||||
@ -607,7 +607,7 @@ define amdgpu_ps void @fptrunc_f64_to_bf16_abs(double %a, ptr %out) {
|
||||
; GFX1250-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
|
||||
; GFX1250-NEXT: flat_store_b16 v[2:3], v0
|
||||
; GFX1250-NEXT: flat_store_b16 v[2:3], v0 scope:SCOPE_SE
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
%a.abs = call double @llvm.fabs.f64(double %a)
|
||||
|
@ -1582,28 +1582,22 @@ define amdgpu_kernel void @frem_f64(ptr addrspace(1) %out, ptr addrspace(1) %in1
|
||||
; SI-NEXT: s_nop 1
|
||||
; SI-NEXT: v_div_fmas_f64 v[4:5], v[12:13], v[6:7], v[10:11]
|
||||
; SI-NEXT: v_div_fixup_f64 v[4:5], v[4:5], v[2:3], v[0:1]
|
||||
; SI-NEXT: v_readfirstlane_b32 s2, v5
|
||||
; SI-NEXT: s_bfe_u32 s0, s2, 0xb0014
|
||||
; SI-NEXT: s_add_i32 s3, s0, 0xfffffc01
|
||||
; SI-NEXT: s_mov_b32 s1, 0xfffff
|
||||
; SI-NEXT: s_mov_b32 s0, s6
|
||||
; SI-NEXT: s_lshr_b64 s[0:1], s[0:1], s3
|
||||
; SI-NEXT: v_not_b32_e32 v6, s0
|
||||
; SI-NEXT: v_and_b32_e32 v6, v4, v6
|
||||
; SI-NEXT: v_not_b32_e32 v7, s1
|
||||
; SI-NEXT: v_and_b32_e32 v5, v5, v7
|
||||
; SI-NEXT: s_and_b32 s0, s2, 0x80000000
|
||||
; SI-NEXT: s_cmp_lt_i32 s3, 0
|
||||
; SI-NEXT: s_cselect_b64 vcc, -1, 0
|
||||
; SI-NEXT: v_cndmask_b32_e64 v6, v6, 0, vcc
|
||||
; SI-NEXT: v_mov_b32_e32 v7, s0
|
||||
; SI-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc
|
||||
; SI-NEXT: s_cmp_gt_i32 s3, 51
|
||||
; SI-NEXT: s_cselect_b64 vcc, -1, 0
|
||||
; SI-NEXT: v_mov_b32_e32 v7, s2
|
||||
; SI-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc
|
||||
; SI-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc
|
||||
; SI-NEXT: v_fma_f64 v[0:1], -v[4:5], v[2:3], v[0:1]
|
||||
; SI-NEXT: v_readfirstlane_b32 s0, v4
|
||||
; SI-NEXT: v_readfirstlane_b32 s1, v5
|
||||
; SI-NEXT: s_bfe_u32 s2, s1, 0xb0014
|
||||
; SI-NEXT: s_add_i32 s8, s2, 0xfffffc01
|
||||
; SI-NEXT: s_mov_b32 s3, 0xfffff
|
||||
; SI-NEXT: s_mov_b32 s2, s6
|
||||
; SI-NEXT: s_lshr_b64 s[2:3], s[2:3], s8
|
||||
; SI-NEXT: s_andn2_b64 s[2:3], s[0:1], s[2:3]
|
||||
; SI-NEXT: s_and_b32 s9, s1, 0x80000000
|
||||
; SI-NEXT: s_cmp_lt_i32 s8, 0
|
||||
; SI-NEXT: s_cselect_b32 s2, 0, s2
|
||||
; SI-NEXT: s_cselect_b32 s3, s9, s3
|
||||
; SI-NEXT: s_cmp_gt_i32 s8, 51
|
||||
; SI-NEXT: s_cselect_b32 s1, s1, s3
|
||||
; SI-NEXT: s_cselect_b32 s0, s0, s2
|
||||
; SI-NEXT: v_fma_f64 v[0:1], -s[0:1], v[2:3], v[0:1]
|
||||
; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
|
||||
; SI-NEXT: s_endpgm
|
||||
;
|
||||
@ -1859,28 +1853,22 @@ define amdgpu_kernel void @fast_frem_f64(ptr addrspace(1) %out, ptr addrspace(1)
|
||||
; SI-NEXT: v_mul_f64 v[6:7], v[0:1], v[4:5]
|
||||
; SI-NEXT: v_fma_f64 v[8:9], -v[2:3], v[6:7], v[0:1]
|
||||
; SI-NEXT: v_fma_f64 v[4:5], v[8:9], v[4:5], v[6:7]
|
||||
; SI-NEXT: v_readfirstlane_b32 s6, v5
|
||||
; SI-NEXT: s_bfe_u32 s4, s6, 0xb0014
|
||||
; SI-NEXT: s_add_i32 s7, s4, 0xfffffc01
|
||||
; SI-NEXT: s_mov_b32 s5, 0xfffff
|
||||
; SI-NEXT: s_mov_b32 s4, s2
|
||||
; SI-NEXT: s_lshr_b64 s[4:5], s[4:5], s7
|
||||
; SI-NEXT: v_not_b32_e32 v6, s4
|
||||
; SI-NEXT: v_and_b32_e32 v6, v4, v6
|
||||
; SI-NEXT: v_not_b32_e32 v7, s5
|
||||
; SI-NEXT: v_and_b32_e32 v5, v5, v7
|
||||
; SI-NEXT: s_and_b32 s4, s6, 0x80000000
|
||||
; SI-NEXT: s_cmp_lt_i32 s7, 0
|
||||
; SI-NEXT: s_cselect_b64 vcc, -1, 0
|
||||
; SI-NEXT: v_cndmask_b32_e64 v6, v6, 0, vcc
|
||||
; SI-NEXT: v_mov_b32_e32 v7, s4
|
||||
; SI-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc
|
||||
; SI-NEXT: s_cmp_gt_i32 s7, 51
|
||||
; SI-NEXT: s_cselect_b64 vcc, -1, 0
|
||||
; SI-NEXT: v_mov_b32_e32 v7, s6
|
||||
; SI-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc
|
||||
; SI-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc
|
||||
; SI-NEXT: v_fma_f64 v[0:1], -v[4:5], v[2:3], v[0:1]
|
||||
; SI-NEXT: v_readfirstlane_b32 s4, v4
|
||||
; SI-NEXT: v_readfirstlane_b32 s5, v5
|
||||
; SI-NEXT: s_bfe_u32 s6, s5, 0xb0014
|
||||
; SI-NEXT: s_add_i32 s8, s6, 0xfffffc01
|
||||
; SI-NEXT: s_mov_b32 s7, 0xfffff
|
||||
; SI-NEXT: s_mov_b32 s6, s2
|
||||
; SI-NEXT: s_lshr_b64 s[6:7], s[6:7], s8
|
||||
; SI-NEXT: s_andn2_b64 s[6:7], s[4:5], s[6:7]
|
||||
; SI-NEXT: s_and_b32 s9, s5, 0x80000000
|
||||
; SI-NEXT: s_cmp_lt_i32 s8, 0
|
||||
; SI-NEXT: s_cselect_b32 s6, 0, s6
|
||||
; SI-NEXT: s_cselect_b32 s7, s9, s7
|
||||
; SI-NEXT: s_cmp_gt_i32 s8, 51
|
||||
; SI-NEXT: s_cselect_b32 s5, s5, s7
|
||||
; SI-NEXT: s_cselect_b32 s4, s4, s6
|
||||
; SI-NEXT: v_fma_f64 v[0:1], -s[4:5], v[2:3], v[0:1]
|
||||
; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
|
||||
; SI-NEXT: s_endpgm
|
||||
;
|
||||
@ -2109,28 +2097,22 @@ define amdgpu_kernel void @unsafe_frem_f64(ptr addrspace(1) %out, ptr addrspace(
|
||||
; SI-NEXT: v_mul_f64 v[6:7], v[0:1], v[4:5]
|
||||
; SI-NEXT: v_fma_f64 v[8:9], -v[2:3], v[6:7], v[0:1]
|
||||
; SI-NEXT: v_fma_f64 v[4:5], v[8:9], v[4:5], v[6:7]
|
||||
; SI-NEXT: v_readfirstlane_b32 s6, v5
|
||||
; SI-NEXT: s_bfe_u32 s4, s6, 0xb0014
|
||||
; SI-NEXT: s_add_i32 s7, s4, 0xfffffc01
|
||||
; SI-NEXT: s_mov_b32 s5, 0xfffff
|
||||
; SI-NEXT: s_mov_b32 s4, s2
|
||||
; SI-NEXT: s_lshr_b64 s[4:5], s[4:5], s7
|
||||
; SI-NEXT: v_not_b32_e32 v6, s4
|
||||
; SI-NEXT: v_and_b32_e32 v6, v4, v6
|
||||
; SI-NEXT: v_not_b32_e32 v7, s5
|
||||
; SI-NEXT: v_and_b32_e32 v5, v5, v7
|
||||
; SI-NEXT: s_and_b32 s4, s6, 0x80000000
|
||||
; SI-NEXT: s_cmp_lt_i32 s7, 0
|
||||
; SI-NEXT: s_cselect_b64 vcc, -1, 0
|
||||
; SI-NEXT: v_cndmask_b32_e64 v6, v6, 0, vcc
|
||||
; SI-NEXT: v_mov_b32_e32 v7, s4
|
||||
; SI-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc
|
||||
; SI-NEXT: s_cmp_gt_i32 s7, 51
|
||||
; SI-NEXT: s_cselect_b64 vcc, -1, 0
|
||||
; SI-NEXT: v_mov_b32_e32 v7, s6
|
||||
; SI-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc
|
||||
; SI-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc
|
||||
; SI-NEXT: v_fma_f64 v[0:1], -v[4:5], v[2:3], v[0:1]
|
||||
; SI-NEXT: v_readfirstlane_b32 s4, v4
|
||||
; SI-NEXT: v_readfirstlane_b32 s5, v5
|
||||
; SI-NEXT: s_bfe_u32 s6, s5, 0xb0014
|
||||
; SI-NEXT: s_add_i32 s8, s6, 0xfffffc01
|
||||
; SI-NEXT: s_mov_b32 s7, 0xfffff
|
||||
; SI-NEXT: s_mov_b32 s6, s2
|
||||
; SI-NEXT: s_lshr_b64 s[6:7], s[6:7], s8
|
||||
; SI-NEXT: s_andn2_b64 s[6:7], s[4:5], s[6:7]
|
||||
; SI-NEXT: s_and_b32 s9, s5, 0x80000000
|
||||
; SI-NEXT: s_cmp_lt_i32 s8, 0
|
||||
; SI-NEXT: s_cselect_b32 s6, 0, s6
|
||||
; SI-NEXT: s_cselect_b32 s7, s9, s7
|
||||
; SI-NEXT: s_cmp_gt_i32 s8, 51
|
||||
; SI-NEXT: s_cselect_b32 s5, s5, s7
|
||||
; SI-NEXT: s_cselect_b32 s4, s4, s6
|
||||
; SI-NEXT: v_fma_f64 v[0:1], -s[4:5], v[2:3], v[0:1]
|
||||
; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
|
||||
; SI-NEXT: s_endpgm
|
||||
;
|
||||
@ -5251,27 +5233,22 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
|
||||
; SI-NEXT: s_nop 1
|
||||
; SI-NEXT: v_div_fmas_f64 v[8:9], v[16:17], v[10:11], v[14:15]
|
||||
; SI-NEXT: v_div_fixup_f64 v[8:9], v[8:9], v[6:7], v[2:3]
|
||||
; SI-NEXT: v_readfirstlane_b32 s8, v9
|
||||
; SI-NEXT: s_bfe_u32 s0, s8, 0xb0014
|
||||
; SI-NEXT: s_add_i32 s9, s0, 0xfffffc01
|
||||
; SI-NEXT: v_readfirstlane_b32 s0, v8
|
||||
; SI-NEXT: v_readfirstlane_b32 s1, v9
|
||||
; SI-NEXT: s_bfe_u32 s2, s1, 0xb0014
|
||||
; SI-NEXT: s_add_i32 s10, s2, 0xfffffc01
|
||||
; SI-NEXT: s_mov_b32 s3, 0xfffff
|
||||
; SI-NEXT: s_lshr_b64 s[0:1], s[2:3], s9
|
||||
; SI-NEXT: v_not_b32_e32 v10, s0
|
||||
; SI-NEXT: v_and_b32_e32 v10, v8, v10
|
||||
; SI-NEXT: v_not_b32_e32 v11, s1
|
||||
; SI-NEXT: v_and_b32_e32 v9, v9, v11
|
||||
; SI-NEXT: s_and_b32 s0, s8, 0x80000000
|
||||
; SI-NEXT: s_cmp_lt_i32 s9, 0
|
||||
; SI-NEXT: s_cselect_b64 vcc, -1, 0
|
||||
; SI-NEXT: v_cndmask_b32_e64 v10, v10, 0, vcc
|
||||
; SI-NEXT: v_mov_b32_e32 v11, s0
|
||||
; SI-NEXT: v_cndmask_b32_e32 v9, v9, v11, vcc
|
||||
; SI-NEXT: s_cmp_gt_i32 s9, 51
|
||||
; SI-NEXT: s_cselect_b64 vcc, -1, 0
|
||||
; SI-NEXT: v_mov_b32_e32 v11, s8
|
||||
; SI-NEXT: v_cndmask_b32_e32 v9, v9, v11, vcc
|
||||
; SI-NEXT: v_cndmask_b32_e32 v8, v10, v8, vcc
|
||||
; SI-NEXT: v_fma_f64 v[2:3], -v[8:9], v[6:7], v[2:3]
|
||||
; SI-NEXT: s_mov_b32 s2, s6
|
||||
; SI-NEXT: s_lshr_b64 s[8:9], s[2:3], s10
|
||||
; SI-NEXT: s_andn2_b64 s[8:9], s[0:1], s[8:9]
|
||||
; SI-NEXT: s_and_b32 s11, s1, 0x80000000
|
||||
; SI-NEXT: s_cmp_lt_i32 s10, 0
|
||||
; SI-NEXT: s_cselect_b32 s8, 0, s8
|
||||
; SI-NEXT: s_cselect_b32 s9, s11, s9
|
||||
; SI-NEXT: s_cmp_gt_i32 s10, 51
|
||||
; SI-NEXT: s_cselect_b32 s1, s1, s9
|
||||
; SI-NEXT: s_cselect_b32 s0, s0, s8
|
||||
; SI-NEXT: v_fma_f64 v[2:3], -s[0:1], v[6:7], v[2:3]
|
||||
; SI-NEXT: v_div_scale_f64 v[6:7], s[0:1], v[4:5], v[4:5], v[0:1]
|
||||
; SI-NEXT: v_rcp_f64_e32 v[8:9], v[6:7]
|
||||
; SI-NEXT: v_fma_f64 v[10:11], -v[6:7], v[8:9], 1.0
|
||||
@ -5287,26 +5264,20 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
|
||||
; SI-NEXT: s_nop 1
|
||||
; SI-NEXT: v_div_fmas_f64 v[6:7], v[14:15], v[8:9], v[12:13]
|
||||
; SI-NEXT: v_div_fixup_f64 v[6:7], v[6:7], v[4:5], v[0:1]
|
||||
; SI-NEXT: v_readfirstlane_b32 s8, v7
|
||||
; SI-NEXT: s_bfe_u32 s0, s8, 0xb0014
|
||||
; SI-NEXT: s_add_i32 s9, s0, 0xfffffc01
|
||||
; SI-NEXT: s_lshr_b64 s[0:1], s[2:3], s9
|
||||
; SI-NEXT: v_not_b32_e32 v8, s0
|
||||
; SI-NEXT: v_and_b32_e32 v8, v6, v8
|
||||
; SI-NEXT: v_not_b32_e32 v9, s1
|
||||
; SI-NEXT: v_and_b32_e32 v7, v7, v9
|
||||
; SI-NEXT: s_and_b32 s0, s8, 0x80000000
|
||||
; SI-NEXT: s_cmp_lt_i32 s9, 0
|
||||
; SI-NEXT: s_cselect_b64 vcc, -1, 0
|
||||
; SI-NEXT: v_cndmask_b32_e64 v8, v8, 0, vcc
|
||||
; SI-NEXT: v_mov_b32_e32 v9, s0
|
||||
; SI-NEXT: v_cndmask_b32_e32 v7, v7, v9, vcc
|
||||
; SI-NEXT: s_cmp_gt_i32 s9, 51
|
||||
; SI-NEXT: s_cselect_b64 vcc, -1, 0
|
||||
; SI-NEXT: v_mov_b32_e32 v9, s8
|
||||
; SI-NEXT: v_cndmask_b32_e32 v7, v7, v9, vcc
|
||||
; SI-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc
|
||||
; SI-NEXT: v_fma_f64 v[0:1], -v[6:7], v[4:5], v[0:1]
|
||||
; SI-NEXT: v_readfirstlane_b32 s0, v6
|
||||
; SI-NEXT: v_readfirstlane_b32 s1, v7
|
||||
; SI-NEXT: s_bfe_u32 s8, s1, 0xb0014
|
||||
; SI-NEXT: s_addk_i32 s8, 0xfc01
|
||||
; SI-NEXT: s_lshr_b64 s[2:3], s[2:3], s8
|
||||
; SI-NEXT: s_andn2_b64 s[2:3], s[0:1], s[2:3]
|
||||
; SI-NEXT: s_and_b32 s9, s1, 0x80000000
|
||||
; SI-NEXT: s_cmp_lt_i32 s8, 0
|
||||
; SI-NEXT: s_cselect_b32 s2, 0, s2
|
||||
; SI-NEXT: s_cselect_b32 s3, s9, s3
|
||||
; SI-NEXT: s_cmp_gt_i32 s8, 51
|
||||
; SI-NEXT: s_cselect_b32 s1, s1, s3
|
||||
; SI-NEXT: s_cselect_b32 s0, s0, s2
|
||||
; SI-NEXT: v_fma_f64 v[0:1], -s[0:1], v[4:5], v[0:1]
|
||||
; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
||||
; SI-NEXT: s_endpgm
|
||||
;
|
||||
|
@ -18,6 +18,16 @@ define amdgpu_ps <4 x float> @gather4_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX10-LABEL: gather4_2d:
|
||||
; GFX10: ; %bb.0: ; %main_body
|
||||
; GFX10-NEXT: s_mov_b32 s12, exec_lo
|
||||
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
|
||||
; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
|
||||
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
||||
; GFX10-NEXT: image_gather4 v[0:3], v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX11-TRUE16-LABEL: gather4_2d:
|
||||
; GFX11-TRUE16: ; %bb.0: ; %main_body
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 s12, exec_lo
|
||||
@ -74,6 +84,16 @@ define amdgpu_ps <4 x float> @gather4_cube(<8 x i32> inreg %rsrc, <4 x i32> inre
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX10-LABEL: gather4_cube:
|
||||
; GFX10: ; %bb.0: ; %main_body
|
||||
; GFX10-NEXT: s_mov_b32 s12, exec_lo
|
||||
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
|
||||
; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
|
||||
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
||||
; GFX10-NEXT: image_gather4 v[0:3], [v0, v2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_CUBE a16
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX11-TRUE16-LABEL: gather4_cube:
|
||||
; GFX11-TRUE16: ; %bb.0: ; %main_body
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 s12, exec_lo
|
||||
@ -132,6 +152,16 @@ define amdgpu_ps <4 x float> @gather4_2darray(<8 x i32> inreg %rsrc, <4 x i32> i
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX10-LABEL: gather4_2darray:
|
||||
; GFX10: ; %bb.0: ; %main_body
|
||||
; GFX10-NEXT: s_mov_b32 s12, exec_lo
|
||||
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
|
||||
; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
|
||||
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
||||
; GFX10-NEXT: image_gather4 v[0:3], [v0, v2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY a16
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX11-TRUE16-LABEL: gather4_2darray:
|
||||
; GFX11-TRUE16: ; %bb.0: ; %main_body
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 s12, exec_lo
|
||||
@ -190,6 +220,16 @@ define amdgpu_ps <4 x float> @gather4_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inre
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX10-LABEL: gather4_c_2d:
|
||||
; GFX10: ; %bb.0: ; %main_body
|
||||
; GFX10-NEXT: s_mov_b32 s12, exec_lo
|
||||
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
|
||||
; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
|
||||
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
||||
; GFX10-NEXT: image_gather4_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX11-TRUE16-LABEL: gather4_c_2d:
|
||||
; GFX11-TRUE16: ; %bb.0: ; %main_body
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 s12, exec_lo
|
||||
@ -246,6 +286,16 @@ define amdgpu_ps <4 x float> @gather4_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inr
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX10-LABEL: gather4_cl_2d:
|
||||
; GFX10: ; %bb.0: ; %main_body
|
||||
; GFX10-NEXT: s_mov_b32 s12, exec_lo
|
||||
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
|
||||
; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
|
||||
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
||||
; GFX10-NEXT: image_gather4_cl v[0:3], [v0, v2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX11-TRUE16-LABEL: gather4_cl_2d:
|
||||
; GFX11-TRUE16: ; %bb.0: ; %main_body
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 s12, exec_lo
|
||||
@ -306,6 +356,16 @@ define amdgpu_ps <4 x float> @gather4_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> i
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX10-LABEL: gather4_c_cl_2d:
|
||||
; GFX10: ; %bb.0: ; %main_body
|
||||
; GFX10-NEXT: s_mov_b32 s12, exec_lo
|
||||
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
|
||||
; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
|
||||
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
||||
; GFX10-NEXT: image_gather4_c_cl v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX11-TRUE16-LABEL: gather4_c_cl_2d:
|
||||
; GFX11-TRUE16: ; %bb.0: ; %main_body
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 s12, exec_lo
|
||||
@ -362,6 +422,16 @@ define amdgpu_ps <4 x float> @gather4_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inre
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX10-LABEL: gather4_b_2d:
|
||||
; GFX10: ; %bb.0: ; %main_body
|
||||
; GFX10-NEXT: s_mov_b32 s12, exec_lo
|
||||
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
|
||||
; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
|
||||
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
||||
; GFX10-NEXT: image_gather4_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX11-TRUE16-LABEL: gather4_b_2d:
|
||||
; GFX11-TRUE16: ; %bb.0: ; %main_body
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 s12, exec_lo
|
||||
@ -418,6 +488,16 @@ define amdgpu_ps <4 x float> @gather4_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> in
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX10-LABEL: gather4_c_b_2d:
|
||||
; GFX10: ; %bb.0: ; %main_body
|
||||
; GFX10-NEXT: s_mov_b32 s12, exec_lo
|
||||
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
|
||||
; GFX10-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
|
||||
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
||||
; GFX10-NEXT: image_gather4_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX11-TRUE16-LABEL: gather4_c_b_2d:
|
||||
; GFX11-TRUE16: ; %bb.0: ; %main_body
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 s12, exec_lo
|
||||
@ -476,6 +556,16 @@ define amdgpu_ps <4 x float> @gather4_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> i
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX10-LABEL: gather4_b_cl_2d:
|
||||
; GFX10: ; %bb.0: ; %main_body
|
||||
; GFX10-NEXT: s_mov_b32 s12, exec_lo
|
||||
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
|
||||
; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
|
||||
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
||||
; GFX10-NEXT: image_gather4_b_cl v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX11-TRUE16-LABEL: gather4_b_cl_2d:
|
||||
; GFX11-TRUE16: ; %bb.0: ; %main_body
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 s12, exec_lo
|
||||
@ -538,6 +628,16 @@ define amdgpu_ps <4 x float> @gather4_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32>
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX10-LABEL: gather4_c_b_cl_2d:
|
||||
; GFX10: ; %bb.0: ; %main_body
|
||||
; GFX10-NEXT: s_mov_b32 s12, exec_lo
|
||||
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
|
||||
; GFX10-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
|
||||
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
||||
; GFX10-NEXT: image_gather4_c_b_cl v[0:3], [v0, v1, v2, v4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX11-TRUE16-LABEL: gather4_c_b_cl_2d:
|
||||
; GFX11-TRUE16: ; %bb.0: ; %main_body
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 s12, exec_lo
|
||||
@ -591,6 +691,13 @@ define amdgpu_ps <4 x float> @gather4_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inre
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX10-LABEL: gather4_l_2d:
|
||||
; GFX10: ; %bb.0: ; %main_body
|
||||
; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
|
||||
; GFX10-NEXT: image_gather4_l v[0:3], [v0, v2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX11-TRUE16-LABEL: gather4_l_2d:
|
||||
; GFX11-TRUE16: ; %bb.0: ; %main_body
|
||||
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v2.l
|
||||
@ -636,6 +743,13 @@ define amdgpu_ps <4 x float> @gather4_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> in
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX10-LABEL: gather4_c_l_2d:
|
||||
; GFX10: ; %bb.0: ; %main_body
|
||||
; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
|
||||
; GFX10-NEXT: image_gather4_c_l v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX11-TRUE16-LABEL: gather4_c_l_2d:
|
||||
; GFX11-TRUE16: ; %bb.0: ; %main_body
|
||||
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v2.l
|
||||
@ -677,6 +791,13 @@ define amdgpu_ps <4 x float> @gather4_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inr
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX10-LABEL: gather4_lz_2d:
|
||||
; GFX10: ; %bb.0: ; %main_body
|
||||
; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
|
||||
; GFX10-NEXT: image_gather4_lz v[0:3], v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX11-TRUE16-LABEL: gather4_lz_2d:
|
||||
; GFX11-TRUE16: ; %bb.0: ; %main_body
|
||||
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
|
||||
@ -718,6 +839,13 @@ define amdgpu_ps <4 x float> @gather4_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> i
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX10-LABEL: gather4_c_lz_2d:
|
||||
; GFX10: ; %bb.0: ; %main_body
|
||||
; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
|
||||
; GFX10-NEXT: image_gather4_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX11-TRUE16-LABEL: gather4_c_lz_2d:
|
||||
; GFX11-TRUE16: ; %bb.0: ; %main_body
|
||||
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v2.l
|
||||
@ -773,5 +901,4 @@ attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readonly }
|
||||
attributes #2 = { nounwind readnone }
|
||||
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
|
||||
; GFX10: {{.*}}
|
||||
; GFX12: {{.*}}
|
||||
|
@ -76,12 +76,13 @@ define amdgpu_kernel void @v_round_f64(ptr addrspace(1) %out, ptr addrspace(1) %
|
||||
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SI-NEXT: s_mov_b64 s[4:5], s[2:3]
|
||||
; SI-NEXT: buffer_load_dwordx2 v[2:3], v[0:1], s[4:7], 0 addr64
|
||||
; SI-NEXT: s_movk_i32 s4, 0xfc01
|
||||
; SI-NEXT: s_mov_b32 s2, -1
|
||||
; SI-NEXT: s_mov_b32 s3, 0xfffff
|
||||
; SI-NEXT: v_mov_b32_e32 v8, 0x3ff00000
|
||||
; SI-NEXT: s_waitcnt vmcnt(0)
|
||||
; SI-NEXT: v_bfe_u32 v4, v3, 20, 11
|
||||
; SI-NEXT: v_add_i32_e32 v6, vcc, 0xfffffc01, v4
|
||||
; SI-NEXT: v_add_i32_e32 v6, vcc, s4, v4
|
||||
; SI-NEXT: v_lshr_b64 v[4:5], s[2:3], v6
|
||||
; SI-NEXT: v_and_b32_e32 v7, 0x80000000, v3
|
||||
; SI-NEXT: v_not_b32_e32 v5, v5
|
||||
|
95
llvm/test/CodeGen/LoongArch/lasx/vec-reduce-and.ll
Normal file
95
llvm/test/CodeGen/LoongArch/lasx/vec-reduce-and.ll
Normal file
@ -0,0 +1,95 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
||||
; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s
|
||||
|
||||
define void @vec_reduce_and_v32i8(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_and_v32i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: xvld $xr0, $a0, 0
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78
|
||||
; CHECK-NEXT: xvshuf4i.b $xr1, $xr1, 228
|
||||
; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 8
|
||||
; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvsrli.d $xr1, $xr1, 32
|
||||
; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvshuf4i.b $xr1, $xr1, 14
|
||||
; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvrepl128vei.b $xr1, $xr1, 1
|
||||
; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvstelm.b $xr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <32 x i8>, ptr %src
|
||||
%res = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> %v)
|
||||
store i8 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_and_v16i16(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_and_v16i16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: xvld $xr0, $a0, 0
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78
|
||||
; CHECK-NEXT: xvshuf4i.h $xr1, $xr1, 228
|
||||
; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 8
|
||||
; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvshuf4i.h $xr1, $xr1, 14
|
||||
; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvrepl128vei.h $xr1, $xr1, 1
|
||||
; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvstelm.h $xr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <16 x i16>, ptr %src
|
||||
%res = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> %v)
|
||||
store i16 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_and_v8i32(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_and_v8i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: xvld $xr0, $a0, 0
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78
|
||||
; CHECK-NEXT: xvshuf4i.w $xr1, $xr1, 228
|
||||
; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvshuf4i.w $xr1, $xr1, 14
|
||||
; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvrepl128vei.w $xr1, $xr1, 1
|
||||
; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvstelm.w $xr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <8 x i32>, ptr %src
|
||||
%res = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> %v)
|
||||
store i32 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_and_v4i64(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_and_v4i64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: xvld $xr0, $a0, 0
|
||||
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
|
||||
; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI3_0)
|
||||
; CHECK-NEXT: xvpermi.d $xr2, $xr0, 78
|
||||
; CHECK-NEXT: xvshuf.d $xr1, $xr0, $xr2
|
||||
; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvrepl128vei.d $xr1, $xr1, 1
|
||||
; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvstelm.d $xr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <4 x i64>, ptr %src
|
||||
%res = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> %v)
|
||||
store i64 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
95
llvm/test/CodeGen/LoongArch/lasx/vec-reduce-or.ll
Normal file
95
llvm/test/CodeGen/LoongArch/lasx/vec-reduce-or.ll
Normal file
@ -0,0 +1,95 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
||||
; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s
|
||||
|
||||
define void @vec_reduce_or_v32i8(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_or_v32i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: xvld $xr0, $a0, 0
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78
|
||||
; CHECK-NEXT: xvshuf4i.b $xr1, $xr1, 228
|
||||
; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 8
|
||||
; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvsrli.d $xr1, $xr1, 32
|
||||
; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvshuf4i.b $xr1, $xr1, 14
|
||||
; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvrepl128vei.b $xr1, $xr1, 1
|
||||
; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvstelm.b $xr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <32 x i8>, ptr %src
|
||||
%res = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> %v)
|
||||
store i8 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_or_v16i16(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_or_v16i16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: xvld $xr0, $a0, 0
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78
|
||||
; CHECK-NEXT: xvshuf4i.h $xr1, $xr1, 228
|
||||
; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 8
|
||||
; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvshuf4i.h $xr1, $xr1, 14
|
||||
; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvrepl128vei.h $xr1, $xr1, 1
|
||||
; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvstelm.h $xr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <16 x i16>, ptr %src
|
||||
%res = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> %v)
|
||||
store i16 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_or_v8i32(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_or_v8i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: xvld $xr0, $a0, 0
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78
|
||||
; CHECK-NEXT: xvshuf4i.w $xr1, $xr1, 228
|
||||
; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvshuf4i.w $xr1, $xr1, 14
|
||||
; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvrepl128vei.w $xr1, $xr1, 1
|
||||
; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvstelm.w $xr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <8 x i32>, ptr %src
|
||||
%res = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> %v)
|
||||
store i32 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_or_v4i64(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_or_v4i64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: xvld $xr0, $a0, 0
|
||||
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
|
||||
; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI3_0)
|
||||
; CHECK-NEXT: xvpermi.d $xr2, $xr0, 78
|
||||
; CHECK-NEXT: xvshuf.d $xr1, $xr0, $xr2
|
||||
; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvrepl128vei.d $xr1, $xr1, 1
|
||||
; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvstelm.d $xr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <4 x i64>, ptr %src
|
||||
%res = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> %v)
|
||||
store i64 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
95
llvm/test/CodeGen/LoongArch/lasx/vec-reduce-smax.ll
Normal file
95
llvm/test/CodeGen/LoongArch/lasx/vec-reduce-smax.ll
Normal file
@ -0,0 +1,95 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
||||
; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s
|
||||
|
||||
define void @vec_reduce_smax_v32i8(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_smax_v32i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: xvld $xr0, $a0, 0
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78
|
||||
; CHECK-NEXT: xvshuf4i.b $xr1, $xr1, 228
|
||||
; CHECK-NEXT: xvmax.b $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 8
|
||||
; CHECK-NEXT: xvmax.b $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvsrli.d $xr1, $xr1, 32
|
||||
; CHECK-NEXT: xvmax.b $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvshuf4i.b $xr1, $xr1, 14
|
||||
; CHECK-NEXT: xvmax.b $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvrepl128vei.b $xr1, $xr1, 1
|
||||
; CHECK-NEXT: xvmax.b $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvstelm.b $xr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <32 x i8>, ptr %src
|
||||
%res = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> %v)
|
||||
store i8 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_smax_v16i16(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_smax_v16i16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: xvld $xr0, $a0, 0
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78
|
||||
; CHECK-NEXT: xvshuf4i.h $xr1, $xr1, 228
|
||||
; CHECK-NEXT: xvmax.h $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 8
|
||||
; CHECK-NEXT: xvmax.h $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvshuf4i.h $xr1, $xr1, 14
|
||||
; CHECK-NEXT: xvmax.h $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvrepl128vei.h $xr1, $xr1, 1
|
||||
; CHECK-NEXT: xvmax.h $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvstelm.h $xr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <16 x i16>, ptr %src
|
||||
%res = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> %v)
|
||||
store i16 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_smax_v8i32(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_smax_v8i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: xvld $xr0, $a0, 0
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78
|
||||
; CHECK-NEXT: xvshuf4i.w $xr1, $xr1, 228
|
||||
; CHECK-NEXT: xvmax.w $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvshuf4i.w $xr1, $xr1, 14
|
||||
; CHECK-NEXT: xvmax.w $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvrepl128vei.w $xr1, $xr1, 1
|
||||
; CHECK-NEXT: xvmax.w $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvstelm.w $xr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <8 x i32>, ptr %src
|
||||
%res = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> %v)
|
||||
store i32 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_smax_v4i64(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_smax_v4i64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: xvld $xr0, $a0, 0
|
||||
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
|
||||
; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI3_0)
|
||||
; CHECK-NEXT: xvpermi.d $xr2, $xr0, 78
|
||||
; CHECK-NEXT: xvshuf.d $xr1, $xr0, $xr2
|
||||
; CHECK-NEXT: xvmax.d $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvrepl128vei.d $xr1, $xr1, 1
|
||||
; CHECK-NEXT: xvmax.d $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvstelm.d $xr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <4 x i64>, ptr %src
|
||||
%res = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> %v)
|
||||
store i64 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
95
llvm/test/CodeGen/LoongArch/lasx/vec-reduce-smin.ll
Normal file
95
llvm/test/CodeGen/LoongArch/lasx/vec-reduce-smin.ll
Normal file
@ -0,0 +1,95 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
||||
; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s
|
||||
|
||||
define void @vec_reduce_smin_v32i8(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_smin_v32i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: xvld $xr0, $a0, 0
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78
|
||||
; CHECK-NEXT: xvshuf4i.b $xr1, $xr1, 228
|
||||
; CHECK-NEXT: xvmin.b $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 8
|
||||
; CHECK-NEXT: xvmin.b $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvsrli.d $xr1, $xr1, 32
|
||||
; CHECK-NEXT: xvmin.b $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvshuf4i.b $xr1, $xr1, 14
|
||||
; CHECK-NEXT: xvmin.b $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvrepl128vei.b $xr1, $xr1, 1
|
||||
; CHECK-NEXT: xvmin.b $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvstelm.b $xr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <32 x i8>, ptr %src
|
||||
%res = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> %v)
|
||||
store i8 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_smin_v16i16(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_smin_v16i16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: xvld $xr0, $a0, 0
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78
|
||||
; CHECK-NEXT: xvshuf4i.h $xr1, $xr1, 228
|
||||
; CHECK-NEXT: xvmin.h $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 8
|
||||
; CHECK-NEXT: xvmin.h $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvshuf4i.h $xr1, $xr1, 14
|
||||
; CHECK-NEXT: xvmin.h $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvrepl128vei.h $xr1, $xr1, 1
|
||||
; CHECK-NEXT: xvmin.h $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvstelm.h $xr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <16 x i16>, ptr %src
|
||||
%res = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> %v)
|
||||
store i16 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_smin_v8i32(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_smin_v8i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: xvld $xr0, $a0, 0
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78
|
||||
; CHECK-NEXT: xvshuf4i.w $xr1, $xr1, 228
|
||||
; CHECK-NEXT: xvmin.w $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvshuf4i.w $xr1, $xr1, 14
|
||||
; CHECK-NEXT: xvmin.w $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvrepl128vei.w $xr1, $xr1, 1
|
||||
; CHECK-NEXT: xvmin.w $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvstelm.w $xr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <8 x i32>, ptr %src
|
||||
%res = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> %v)
|
||||
store i32 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_smin_v4i64(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_smin_v4i64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: xvld $xr0, $a0, 0
|
||||
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
|
||||
; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI3_0)
|
||||
; CHECK-NEXT: xvpermi.d $xr2, $xr0, 78
|
||||
; CHECK-NEXT: xvshuf.d $xr1, $xr0, $xr2
|
||||
; CHECK-NEXT: xvmin.d $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvrepl128vei.d $xr1, $xr1, 1
|
||||
; CHECK-NEXT: xvmin.d $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvstelm.d $xr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <4 x i64>, ptr %src
|
||||
%res = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> %v)
|
||||
store i64 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
95
llvm/test/CodeGen/LoongArch/lasx/vec-reduce-umax.ll
Normal file
95
llvm/test/CodeGen/LoongArch/lasx/vec-reduce-umax.ll
Normal file
@ -0,0 +1,95 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
||||
; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s
|
||||
|
||||
define void @vec_reduce_umax_v32i8(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_umax_v32i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: xvld $xr0, $a0, 0
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78
|
||||
; CHECK-NEXT: xvshuf4i.b $xr1, $xr1, 228
|
||||
; CHECK-NEXT: xvmax.bu $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 8
|
||||
; CHECK-NEXT: xvmax.bu $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvsrli.d $xr1, $xr1, 32
|
||||
; CHECK-NEXT: xvmax.bu $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvshuf4i.b $xr1, $xr1, 14
|
||||
; CHECK-NEXT: xvmax.bu $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvrepl128vei.b $xr1, $xr1, 1
|
||||
; CHECK-NEXT: xvmax.bu $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvstelm.b $xr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <32 x i8>, ptr %src
|
||||
%res = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> %v)
|
||||
store i8 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_umax_v16i16(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_umax_v16i16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: xvld $xr0, $a0, 0
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78
|
||||
; CHECK-NEXT: xvshuf4i.h $xr1, $xr1, 228
|
||||
; CHECK-NEXT: xvmax.hu $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 8
|
||||
; CHECK-NEXT: xvmax.hu $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvshuf4i.h $xr1, $xr1, 14
|
||||
; CHECK-NEXT: xvmax.hu $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvrepl128vei.h $xr1, $xr1, 1
|
||||
; CHECK-NEXT: xvmax.hu $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvstelm.h $xr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <16 x i16>, ptr %src
|
||||
%res = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> %v)
|
||||
store i16 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_umax_v8i32(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_umax_v8i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: xvld $xr0, $a0, 0
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78
|
||||
; CHECK-NEXT: xvshuf4i.w $xr1, $xr1, 228
|
||||
; CHECK-NEXT: xvmax.wu $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvshuf4i.w $xr1, $xr1, 14
|
||||
; CHECK-NEXT: xvmax.wu $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvrepl128vei.w $xr1, $xr1, 1
|
||||
; CHECK-NEXT: xvmax.wu $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvstelm.w $xr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <8 x i32>, ptr %src
|
||||
%res = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> %v)
|
||||
store i32 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_umax_v4i64(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_umax_v4i64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: xvld $xr0, $a0, 0
|
||||
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
|
||||
; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI3_0)
|
||||
; CHECK-NEXT: xvpermi.d $xr2, $xr0, 78
|
||||
; CHECK-NEXT: xvshuf.d $xr1, $xr0, $xr2
|
||||
; CHECK-NEXT: xvmax.du $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvrepl128vei.d $xr1, $xr1, 1
|
||||
; CHECK-NEXT: xvmax.du $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvstelm.d $xr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <4 x i64>, ptr %src
|
||||
%res = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> %v)
|
||||
store i64 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
95
llvm/test/CodeGen/LoongArch/lasx/vec-reduce-umin.ll
Normal file
95
llvm/test/CodeGen/LoongArch/lasx/vec-reduce-umin.ll
Normal file
@ -0,0 +1,95 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
||||
; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s
|
||||
|
||||
define void @vec_reduce_umin_v32i8(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_umin_v32i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: xvld $xr0, $a0, 0
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78
|
||||
; CHECK-NEXT: xvshuf4i.b $xr1, $xr1, 228
|
||||
; CHECK-NEXT: xvmin.bu $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 8
|
||||
; CHECK-NEXT: xvmin.bu $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvsrli.d $xr1, $xr1, 32
|
||||
; CHECK-NEXT: xvmin.bu $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvshuf4i.b $xr1, $xr1, 14
|
||||
; CHECK-NEXT: xvmin.bu $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvrepl128vei.b $xr1, $xr1, 1
|
||||
; CHECK-NEXT: xvmin.bu $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvstelm.b $xr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <32 x i8>, ptr %src
|
||||
%res = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> %v)
|
||||
store i8 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_umin_v16i16(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_umin_v16i16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: xvld $xr0, $a0, 0
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78
|
||||
; CHECK-NEXT: xvshuf4i.h $xr1, $xr1, 228
|
||||
; CHECK-NEXT: xvmin.hu $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 8
|
||||
; CHECK-NEXT: xvmin.hu $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvshuf4i.h $xr1, $xr1, 14
|
||||
; CHECK-NEXT: xvmin.hu $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvrepl128vei.h $xr1, $xr1, 1
|
||||
; CHECK-NEXT: xvmin.hu $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvstelm.h $xr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <16 x i16>, ptr %src
|
||||
%res = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> %v)
|
||||
store i16 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_umin_v8i32(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_umin_v8i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: xvld $xr0, $a0, 0
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78
|
||||
; CHECK-NEXT: xvshuf4i.w $xr1, $xr1, 228
|
||||
; CHECK-NEXT: xvmin.wu $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvshuf4i.w $xr1, $xr1, 14
|
||||
; CHECK-NEXT: xvmin.wu $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvrepl128vei.w $xr1, $xr1, 1
|
||||
; CHECK-NEXT: xvmin.wu $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvstelm.w $xr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <8 x i32>, ptr %src
|
||||
%res = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> %v)
|
||||
store i32 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_umin_v4i64(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_umin_v4i64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: xvld $xr0, $a0, 0
|
||||
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
|
||||
; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI3_0)
|
||||
; CHECK-NEXT: xvpermi.d $xr2, $xr0, 78
|
||||
; CHECK-NEXT: xvshuf.d $xr1, $xr0, $xr2
|
||||
; CHECK-NEXT: xvmin.du $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvrepl128vei.d $xr1, $xr1, 1
|
||||
; CHECK-NEXT: xvmin.du $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvstelm.d $xr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <4 x i64>, ptr %src
|
||||
%res = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> %v)
|
||||
store i64 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
95
llvm/test/CodeGen/LoongArch/lasx/vec-reduce-xor.ll
Normal file
95
llvm/test/CodeGen/LoongArch/lasx/vec-reduce-xor.ll
Normal file
@ -0,0 +1,95 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
||||
; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s
|
||||
|
||||
define void @vec_reduce_xor_v32i8(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_xor_v32i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: xvld $xr0, $a0, 0
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78
|
||||
; CHECK-NEXT: xvshuf4i.b $xr1, $xr1, 228
|
||||
; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 8
|
||||
; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvsrli.d $xr1, $xr1, 32
|
||||
; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvshuf4i.b $xr1, $xr1, 14
|
||||
; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvrepl128vei.b $xr1, $xr1, 1
|
||||
; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvstelm.b $xr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <32 x i8>, ptr %src
|
||||
%res = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> %v)
|
||||
store i8 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_xor_v16i16(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_xor_v16i16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: xvld $xr0, $a0, 0
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78
|
||||
; CHECK-NEXT: xvshuf4i.h $xr1, $xr1, 228
|
||||
; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 8
|
||||
; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvshuf4i.h $xr1, $xr1, 14
|
||||
; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvrepl128vei.h $xr1, $xr1, 1
|
||||
; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvstelm.h $xr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <16 x i16>, ptr %src
|
||||
%res = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> %v)
|
||||
store i16 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_xor_v8i32(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_xor_v8i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: xvld $xr0, $a0, 0
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78
|
||||
; CHECK-NEXT: xvshuf4i.w $xr1, $xr1, 228
|
||||
; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvshuf4i.w $xr1, $xr1, 14
|
||||
; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvrepl128vei.w $xr1, $xr1, 1
|
||||
; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvstelm.w $xr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <8 x i32>, ptr %src
|
||||
%res = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> %v)
|
||||
store i32 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_xor_v4i64(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_xor_v4i64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: xvld $xr0, $a0, 0
|
||||
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
|
||||
; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI3_0)
|
||||
; CHECK-NEXT: xvpermi.d $xr2, $xr0, 78
|
||||
; CHECK-NEXT: xvshuf.d $xr1, $xr0, $xr2
|
||||
; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
|
||||
; CHECK-NEXT: xvrepl128vei.d $xr1, $xr1, 1
|
||||
; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
|
||||
; CHECK-NEXT: xvstelm.d $xr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <4 x i64>, ptr %src
|
||||
%res = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> %v)
|
||||
store i64 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
168
llvm/test/CodeGen/LoongArch/lsx/vec-reduce-and.ll
Normal file
168
llvm/test/CodeGen/LoongArch/lsx/vec-reduce-and.ll
Normal file
@ -0,0 +1,168 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
||||
; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s
|
||||
|
||||
define void @vec_reduce_and_v16i8(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_and_v16i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vld $vr0, $a0, 0
|
||||
; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
|
||||
; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vsrli.d $vr1, $vr0, 32
|
||||
; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14
|
||||
; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <16 x i8>, ptr %src
|
||||
%res = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> %v)
|
||||
store i8 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_and_v8i8(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_and_v8i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: ld.d $a0, $a0, 0
|
||||
; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
|
||||
; CHECK-NEXT: vsrli.d $vr1, $vr0, 32
|
||||
; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14
|
||||
; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <8 x i8>, ptr %src
|
||||
%res = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> %v)
|
||||
store i8 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_and_v4i8(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_and_v4i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: ld.w $a0, $a0, 0
|
||||
; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0
|
||||
; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14
|
||||
; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <4 x i8>, ptr %src
|
||||
%res = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> %v)
|
||||
store i8 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_and_v2i8(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_and_v2i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: ld.h $a0, $a0, 0
|
||||
; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 0
|
||||
; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <2 x i8>, ptr %src
|
||||
%res = call i8 @llvm.vector.reduce.and.v2i8(<2 x i8> %v)
|
||||
store i8 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_and_v8i16(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_and_v8i16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vld $vr0, $a0, 0
|
||||
; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
|
||||
; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vshuf4i.h $vr1, $vr0, 14
|
||||
; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vreplvei.h $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <8 x i16>, ptr %src
|
||||
%res = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> %v)
|
||||
store i16 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_and_v4i16(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_and_v4i16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: ld.d $a0, $a0, 0
|
||||
; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
|
||||
; CHECK-NEXT: vshuf4i.h $vr1, $vr0, 14
|
||||
; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vreplvei.h $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <4 x i16>, ptr %src
|
||||
%res = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> %v)
|
||||
store i16 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_and_v2i16(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_and_v2i16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: ld.w $a0, $a0, 0
|
||||
; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0
|
||||
; CHECK-NEXT: vreplvei.h $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <2 x i16>, ptr %src
|
||||
%res = call i16 @llvm.vector.reduce.and.v2i16(<2 x i16> %v)
|
||||
store i16 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_and_v4i32(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_and_v4i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vld $vr0, $a0, 0
|
||||
; CHECK-NEXT: vshuf4i.w $vr1, $vr0, 14
|
||||
; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vreplvei.w $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <4 x i32>, ptr %src
|
||||
%res = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %v)
|
||||
store i32 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_and_v2i32(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_and_v2i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: ld.d $a0, $a0, 0
|
||||
; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
|
||||
; CHECK-NEXT: vreplvei.w $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <2 x i32>, ptr %src
|
||||
%res = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> %v)
|
||||
store i32 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_and_v2i64(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_and_v2i64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vld $vr0, $a0, 0
|
||||
; CHECK-NEXT: vreplvei.d $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <2 x i64>, ptr %src
|
||||
%res = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> %v)
|
||||
store i64 %res, ptr %dst
|
||||
ret void
|
||||
}
|
168
llvm/test/CodeGen/LoongArch/lsx/vec-reduce-or.ll
Normal file
168
llvm/test/CodeGen/LoongArch/lsx/vec-reduce-or.ll
Normal file
@ -0,0 +1,168 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
||||
; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s
|
||||
|
||||
define void @vec_reduce_or_v16i8(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_or_v16i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vld $vr0, $a0, 0
|
||||
; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
|
||||
; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vsrli.d $vr1, $vr0, 32
|
||||
; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14
|
||||
; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <16 x i8>, ptr %src
|
||||
%res = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> %v)
|
||||
store i8 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_or_v8i8(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_or_v8i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: ld.d $a0, $a0, 0
|
||||
; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
|
||||
; CHECK-NEXT: vsrli.d $vr1, $vr0, 32
|
||||
; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14
|
||||
; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <8 x i8>, ptr %src
|
||||
%res = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> %v)
|
||||
store i8 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_or_v4i8(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_or_v4i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: ld.w $a0, $a0, 0
|
||||
; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0
|
||||
; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14
|
||||
; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <4 x i8>, ptr %src
|
||||
%res = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> %v)
|
||||
store i8 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_or_v2i8(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_or_v2i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: ld.h $a0, $a0, 0
|
||||
; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 0
|
||||
; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <2 x i8>, ptr %src
|
||||
%res = call i8 @llvm.vector.reduce.or.v2i8(<2 x i8> %v)
|
||||
store i8 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_or_v8i16(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_or_v8i16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vld $vr0, $a0, 0
|
||||
; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
|
||||
; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vshuf4i.h $vr1, $vr0, 14
|
||||
; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vreplvei.h $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <8 x i16>, ptr %src
|
||||
%res = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> %v)
|
||||
store i16 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_or_v4i16(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_or_v4i16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: ld.d $a0, $a0, 0
|
||||
; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
|
||||
; CHECK-NEXT: vshuf4i.h $vr1, $vr0, 14
|
||||
; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vreplvei.h $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <4 x i16>, ptr %src
|
||||
%res = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> %v)
|
||||
store i16 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_or_v2i16(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_or_v2i16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: ld.w $a0, $a0, 0
|
||||
; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0
|
||||
; CHECK-NEXT: vreplvei.h $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <2 x i16>, ptr %src
|
||||
%res = call i16 @llvm.vector.reduce.or.v2i16(<2 x i16> %v)
|
||||
store i16 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_or_v4i32(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_or_v4i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vld $vr0, $a0, 0
|
||||
; CHECK-NEXT: vshuf4i.w $vr1, $vr0, 14
|
||||
; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vreplvei.w $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <4 x i32>, ptr %src
|
||||
%res = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %v)
|
||||
store i32 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_or_v2i32(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_or_v2i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: ld.d $a0, $a0, 0
|
||||
; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
|
||||
; CHECK-NEXT: vreplvei.w $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <2 x i32>, ptr %src
|
||||
%res = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> %v)
|
||||
store i32 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_or_v2i64(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_or_v2i64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vld $vr0, $a0, 0
|
||||
; CHECK-NEXT: vreplvei.d $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <2 x i64>, ptr %src
|
||||
%res = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> %v)
|
||||
store i64 %res, ptr %dst
|
||||
ret void
|
||||
}
|
168
llvm/test/CodeGen/LoongArch/lsx/vec-reduce-smax.ll
Normal file
168
llvm/test/CodeGen/LoongArch/lsx/vec-reduce-smax.ll
Normal file
@ -0,0 +1,168 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
||||
; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s
|
||||
|
||||
define void @vec_reduce_smax_v16i8(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_smax_v16i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vld $vr0, $a0, 0
|
||||
; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
|
||||
; CHECK-NEXT: vmax.b $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vsrli.d $vr1, $vr0, 32
|
||||
; CHECK-NEXT: vmax.b $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14
|
||||
; CHECK-NEXT: vmax.b $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vmax.b $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <16 x i8>, ptr %src
|
||||
%res = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %v)
|
||||
store i8 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_smax_v8i8(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_smax_v8i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: ld.d $a0, $a0, 0
|
||||
; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
|
||||
; CHECK-NEXT: vsrli.d $vr1, $vr0, 32
|
||||
; CHECK-NEXT: vmax.b $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14
|
||||
; CHECK-NEXT: vmax.b $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vmax.b $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <8 x i8>, ptr %src
|
||||
%res = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> %v)
|
||||
store i8 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_smax_v4i8(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_smax_v4i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: ld.w $a0, $a0, 0
|
||||
; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0
|
||||
; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14
|
||||
; CHECK-NEXT: vmax.b $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vmax.b $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <4 x i8>, ptr %src
|
||||
%res = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> %v)
|
||||
store i8 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_smax_v2i8(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_smax_v2i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: ld.h $a0, $a0, 0
|
||||
; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 0
|
||||
; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vmax.b $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <2 x i8>, ptr %src
|
||||
%res = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> %v)
|
||||
store i8 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_smax_v8i16(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_smax_v8i16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vld $vr0, $a0, 0
|
||||
; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
|
||||
; CHECK-NEXT: vmax.h $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vshuf4i.h $vr1, $vr0, 14
|
||||
; CHECK-NEXT: vmax.h $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vreplvei.h $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vmax.h $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <8 x i16>, ptr %src
|
||||
%res = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %v)
|
||||
store i16 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_smax_v4i16(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_smax_v4i16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: ld.d $a0, $a0, 0
|
||||
; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
|
||||
; CHECK-NEXT: vshuf4i.h $vr1, $vr0, 14
|
||||
; CHECK-NEXT: vmax.h $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vreplvei.h $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vmax.h $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <4 x i16>, ptr %src
|
||||
%res = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> %v)
|
||||
store i16 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_smax_v2i16(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_smax_v2i16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: ld.w $a0, $a0, 0
|
||||
; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0
|
||||
; CHECK-NEXT: vreplvei.h $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vmax.h $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <2 x i16>, ptr %src
|
||||
%res = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> %v)
|
||||
store i16 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_smax_v4i32(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_smax_v4i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vld $vr0, $a0, 0
|
||||
; CHECK-NEXT: vshuf4i.w $vr1, $vr0, 14
|
||||
; CHECK-NEXT: vmax.w $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vreplvei.w $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vmax.w $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <4 x i32>, ptr %src
|
||||
%res = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %v)
|
||||
store i32 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_smax_v2i32(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_smax_v2i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: ld.d $a0, $a0, 0
|
||||
; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
|
||||
; CHECK-NEXT: vreplvei.w $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vmax.w $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <2 x i32>, ptr %src
|
||||
%res = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> %v)
|
||||
store i32 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_smax_v2i64(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_smax_v2i64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vld $vr0, $a0, 0
|
||||
; CHECK-NEXT: vreplvei.d $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vmax.d $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <2 x i64>, ptr %src
|
||||
%res = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> %v)
|
||||
store i64 %res, ptr %dst
|
||||
ret void
|
||||
}
|
168
llvm/test/CodeGen/LoongArch/lsx/vec-reduce-smin.ll
Normal file
168
llvm/test/CodeGen/LoongArch/lsx/vec-reduce-smin.ll
Normal file
@ -0,0 +1,168 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
||||
; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s
|
||||
|
||||
define void @vec_reduce_smin_v16i8(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_smin_v16i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vld $vr0, $a0, 0
|
||||
; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
|
||||
; CHECK-NEXT: vmin.b $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vsrli.d $vr1, $vr0, 32
|
||||
; CHECK-NEXT: vmin.b $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14
|
||||
; CHECK-NEXT: vmin.b $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vmin.b $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <16 x i8>, ptr %src
|
||||
%res = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %v)
|
||||
store i8 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_smin_v8i8(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_smin_v8i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: ld.d $a0, $a0, 0
|
||||
; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
|
||||
; CHECK-NEXT: vsrli.d $vr1, $vr0, 32
|
||||
; CHECK-NEXT: vmin.b $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14
|
||||
; CHECK-NEXT: vmin.b $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vmin.b $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <8 x i8>, ptr %src
|
||||
%res = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> %v)
|
||||
store i8 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_smin_v4i8(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_smin_v4i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: ld.w $a0, $a0, 0
|
||||
; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0
|
||||
; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14
|
||||
; CHECK-NEXT: vmin.b $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vmin.b $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <4 x i8>, ptr %src
|
||||
%res = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> %v)
|
||||
store i8 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_smin_v2i8(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_smin_v2i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: ld.h $a0, $a0, 0
|
||||
; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 0
|
||||
; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vmin.b $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <2 x i8>, ptr %src
|
||||
%res = call i8 @llvm.vector.reduce.smin.v2i8(<2 x i8> %v)
|
||||
store i8 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_smin_v8i16(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_smin_v8i16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vld $vr0, $a0, 0
|
||||
; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
|
||||
; CHECK-NEXT: vmin.h $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vshuf4i.h $vr1, $vr0, 14
|
||||
; CHECK-NEXT: vmin.h $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vreplvei.h $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vmin.h $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <8 x i16>, ptr %src
|
||||
%res = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %v)
|
||||
store i16 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_smin_v4i16(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_smin_v4i16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: ld.d $a0, $a0, 0
|
||||
; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
|
||||
; CHECK-NEXT: vshuf4i.h $vr1, $vr0, 14
|
||||
; CHECK-NEXT: vmin.h $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vreplvei.h $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vmin.h $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <4 x i16>, ptr %src
|
||||
%res = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> %v)
|
||||
store i16 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_smin_v2i16(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_smin_v2i16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: ld.w $a0, $a0, 0
|
||||
; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0
|
||||
; CHECK-NEXT: vreplvei.h $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vmin.h $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <2 x i16>, ptr %src
|
||||
%res = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> %v)
|
||||
store i16 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_smin_v4i32(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_smin_v4i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vld $vr0, $a0, 0
|
||||
; CHECK-NEXT: vshuf4i.w $vr1, $vr0, 14
|
||||
; CHECK-NEXT: vmin.w $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vreplvei.w $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vmin.w $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <4 x i32>, ptr %src
|
||||
%res = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %v)
|
||||
store i32 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_smin_v2i32(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_smin_v2i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: ld.d $a0, $a0, 0
|
||||
; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
|
||||
; CHECK-NEXT: vreplvei.w $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vmin.w $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <2 x i32>, ptr %src
|
||||
%res = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> %v)
|
||||
store i32 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_smin_v2i64(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_smin_v2i64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vld $vr0, $a0, 0
|
||||
; CHECK-NEXT: vreplvei.d $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vmin.d $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <2 x i64>, ptr %src
|
||||
%res = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> %v)
|
||||
store i64 %res, ptr %dst
|
||||
ret void
|
||||
}
|
168
llvm/test/CodeGen/LoongArch/lsx/vec-reduce-umax.ll
Normal file
168
llvm/test/CodeGen/LoongArch/lsx/vec-reduce-umax.ll
Normal file
@ -0,0 +1,168 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
||||
; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s
|
||||
|
||||
define void @vec_reduce_umax_v16i8(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_umax_v16i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vld $vr0, $a0, 0
|
||||
; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
|
||||
; CHECK-NEXT: vmax.bu $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vsrli.d $vr1, $vr0, 32
|
||||
; CHECK-NEXT: vmax.bu $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14
|
||||
; CHECK-NEXT: vmax.bu $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vmax.bu $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <16 x i8>, ptr %src
|
||||
%res = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %v)
|
||||
store i8 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_umax_v8i8(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_umax_v8i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: ld.d $a0, $a0, 0
|
||||
; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
|
||||
; CHECK-NEXT: vsrli.d $vr1, $vr0, 32
|
||||
; CHECK-NEXT: vmax.bu $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14
|
||||
; CHECK-NEXT: vmax.bu $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vmax.bu $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <8 x i8>, ptr %src
|
||||
%res = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> %v)
|
||||
store i8 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_umax_v4i8(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_umax_v4i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: ld.w $a0, $a0, 0
|
||||
; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0
|
||||
; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14
|
||||
; CHECK-NEXT: vmax.bu $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vmax.bu $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <4 x i8>, ptr %src
|
||||
%res = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> %v)
|
||||
store i8 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_umax_v2i8(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_umax_v2i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: ld.h $a0, $a0, 0
|
||||
; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 0
|
||||
; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vmax.bu $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <2 x i8>, ptr %src
|
||||
%res = call i8 @llvm.vector.reduce.umax.v2i8(<2 x i8> %v)
|
||||
store i8 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_umax_v8i16(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_umax_v8i16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vld $vr0, $a0, 0
|
||||
; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
|
||||
; CHECK-NEXT: vmax.hu $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vshuf4i.h $vr1, $vr0, 14
|
||||
; CHECK-NEXT: vmax.hu $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vreplvei.h $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vmax.hu $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <8 x i16>, ptr %src
|
||||
%res = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %v)
|
||||
store i16 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_umax_v4i16(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_umax_v4i16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: ld.d $a0, $a0, 0
|
||||
; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
|
||||
; CHECK-NEXT: vshuf4i.h $vr1, $vr0, 14
|
||||
; CHECK-NEXT: vmax.hu $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vreplvei.h $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vmax.hu $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <4 x i16>, ptr %src
|
||||
%res = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> %v)
|
||||
store i16 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_umax_v2i16(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_umax_v2i16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: ld.w $a0, $a0, 0
|
||||
; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0
|
||||
; CHECK-NEXT: vreplvei.h $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vmax.hu $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <2 x i16>, ptr %src
|
||||
%res = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> %v)
|
||||
store i16 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_umax_v4i32(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_umax_v4i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vld $vr0, $a0, 0
|
||||
; CHECK-NEXT: vshuf4i.w $vr1, $vr0, 14
|
||||
; CHECK-NEXT: vmax.wu $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vreplvei.w $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vmax.wu $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <4 x i32>, ptr %src
|
||||
%res = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %v)
|
||||
store i32 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_umax_v2i32(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_umax_v2i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: ld.d $a0, $a0, 0
|
||||
; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
|
||||
; CHECK-NEXT: vreplvei.w $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vmax.wu $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <2 x i32>, ptr %src
|
||||
%res = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> %v)
|
||||
store i32 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_umax_v2i64(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_umax_v2i64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vld $vr0, $a0, 0
|
||||
; CHECK-NEXT: vreplvei.d $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vmax.du $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <2 x i64>, ptr %src
|
||||
%res = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> %v)
|
||||
store i64 %res, ptr %dst
|
||||
ret void
|
||||
}
|
168
llvm/test/CodeGen/LoongArch/lsx/vec-reduce-umin.ll
Normal file
168
llvm/test/CodeGen/LoongArch/lsx/vec-reduce-umin.ll
Normal file
@ -0,0 +1,168 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
||||
; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s
|
||||
|
||||
define void @vec_reduce_umin_v16i8(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_umin_v16i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vld $vr0, $a0, 0
|
||||
; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
|
||||
; CHECK-NEXT: vmin.bu $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vsrli.d $vr1, $vr0, 32
|
||||
; CHECK-NEXT: vmin.bu $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14
|
||||
; CHECK-NEXT: vmin.bu $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vmin.bu $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <16 x i8>, ptr %src
|
||||
%res = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %v)
|
||||
store i8 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_umin_v8i8(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_umin_v8i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: ld.d $a0, $a0, 0
|
||||
; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
|
||||
; CHECK-NEXT: vsrli.d $vr1, $vr0, 32
|
||||
; CHECK-NEXT: vmin.bu $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14
|
||||
; CHECK-NEXT: vmin.bu $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vmin.bu $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <8 x i8>, ptr %src
|
||||
%res = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> %v)
|
||||
store i8 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_umin_v4i8(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_umin_v4i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: ld.w $a0, $a0, 0
|
||||
; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0
|
||||
; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14
|
||||
; CHECK-NEXT: vmin.bu $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vmin.bu $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <4 x i8>, ptr %src
|
||||
%res = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> %v)
|
||||
store i8 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_umin_v2i8(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_umin_v2i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: ld.h $a0, $a0, 0
|
||||
; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 0
|
||||
; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vmin.bu $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <2 x i8>, ptr %src
|
||||
%res = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> %v)
|
||||
store i8 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_umin_v8i16(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_umin_v8i16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vld $vr0, $a0, 0
|
||||
; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
|
||||
; CHECK-NEXT: vmin.hu $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vshuf4i.h $vr1, $vr0, 14
|
||||
; CHECK-NEXT: vmin.hu $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vreplvei.h $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vmin.hu $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <8 x i16>, ptr %src
|
||||
%res = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %v)
|
||||
store i16 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_umin_v4i16(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_umin_v4i16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: ld.d $a0, $a0, 0
|
||||
; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
|
||||
; CHECK-NEXT: vshuf4i.h $vr1, $vr0, 14
|
||||
; CHECK-NEXT: vmin.hu $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vreplvei.h $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vmin.hu $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <4 x i16>, ptr %src
|
||||
%res = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> %v)
|
||||
store i16 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_umin_v2i16(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_umin_v2i16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: ld.w $a0, $a0, 0
|
||||
; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0
|
||||
; CHECK-NEXT: vreplvei.h $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vmin.hu $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <2 x i16>, ptr %src
|
||||
%res = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> %v)
|
||||
store i16 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_umin_v4i32(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_umin_v4i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vld $vr0, $a0, 0
|
||||
; CHECK-NEXT: vshuf4i.w $vr1, $vr0, 14
|
||||
; CHECK-NEXT: vmin.wu $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vreplvei.w $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vmin.wu $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <4 x i32>, ptr %src
|
||||
%res = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %v)
|
||||
store i32 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_umin_v2i32(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_umin_v2i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: ld.d $a0, $a0, 0
|
||||
; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
|
||||
; CHECK-NEXT: vreplvei.w $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vmin.wu $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <2 x i32>, ptr %src
|
||||
%res = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> %v)
|
||||
store i32 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_umin_v2i64(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_umin_v2i64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vld $vr0, $a0, 0
|
||||
; CHECK-NEXT: vreplvei.d $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vmin.du $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <2 x i64>, ptr %src
|
||||
%res = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> %v)
|
||||
store i64 %res, ptr %dst
|
||||
ret void
|
||||
}
|
168
llvm/test/CodeGen/LoongArch/lsx/vec-reduce-xor.ll
Normal file
168
llvm/test/CodeGen/LoongArch/lsx/vec-reduce-xor.ll
Normal file
@ -0,0 +1,168 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
||||
; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s
|
||||
|
||||
define void @vec_reduce_xor_v16i8(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_xor_v16i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vld $vr0, $a0, 0
|
||||
; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
|
||||
; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vsrli.d $vr1, $vr0, 32
|
||||
; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14
|
||||
; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <16 x i8>, ptr %src
|
||||
%res = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> %v)
|
||||
store i8 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_xor_v8i8(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_xor_v8i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: ld.d $a0, $a0, 0
|
||||
; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
|
||||
; CHECK-NEXT: vsrli.d $vr1, $vr0, 32
|
||||
; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14
|
||||
; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <8 x i8>, ptr %src
|
||||
%res = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> %v)
|
||||
store i8 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_xor_v4i8(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_xor_v4i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: ld.w $a0, $a0, 0
|
||||
; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0
|
||||
; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14
|
||||
; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <4 x i8>, ptr %src
|
||||
%res = call i8 @llvm.vector.reduce.xor.v4i8(<4 x i8> %v)
|
||||
store i8 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_xor_v2i8(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_xor_v2i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: ld.h $a0, $a0, 0
|
||||
; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 0
|
||||
; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <2 x i8>, ptr %src
|
||||
%res = call i8 @llvm.vector.reduce.xor.v2i8(<2 x i8> %v)
|
||||
store i8 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_xor_v8i16(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_xor_v8i16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vld $vr0, $a0, 0
|
||||
; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
|
||||
; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vshuf4i.h $vr1, $vr0, 14
|
||||
; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vreplvei.h $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <8 x i16>, ptr %src
|
||||
%res = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> %v)
|
||||
store i16 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_xor_v4i16(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_xor_v4i16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: ld.d $a0, $a0, 0
|
||||
; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
|
||||
; CHECK-NEXT: vshuf4i.h $vr1, $vr0, 14
|
||||
; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vreplvei.h $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <4 x i16>, ptr %src
|
||||
%res = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> %v)
|
||||
store i16 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_xor_v2i16(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_xor_v2i16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: ld.w $a0, $a0, 0
|
||||
; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0
|
||||
; CHECK-NEXT: vreplvei.h $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <2 x i16>, ptr %src
|
||||
%res = call i16 @llvm.vector.reduce.xor.v2i16(<2 x i16> %v)
|
||||
store i16 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_xor_v4i32(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_xor_v4i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vld $vr0, $a0, 0
|
||||
; CHECK-NEXT: vshuf4i.w $vr1, $vr0, 14
|
||||
; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vreplvei.w $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <4 x i32>, ptr %src
|
||||
%res = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> %v)
|
||||
store i32 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_xor_v2i32(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_xor_v2i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: ld.d $a0, $a0, 0
|
||||
; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
|
||||
; CHECK-NEXT: vreplvei.w $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <2 x i32>, ptr %src
|
||||
%res = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> %v)
|
||||
store i32 %res, ptr %dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_reduce_xor_v2i64(ptr %src, ptr %dst) nounwind {
|
||||
; CHECK-LABEL: vec_reduce_xor_v2i64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vld $vr0, $a0, 0
|
||||
; CHECK-NEXT: vreplvei.d $vr1, $vr0, 1
|
||||
; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
|
||||
; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 0
|
||||
; CHECK-NEXT: ret
|
||||
%v = load <2 x i64>, ptr %src
|
||||
%res = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> %v)
|
||||
store i64 %res, ptr %dst
|
||||
ret void
|
||||
}
|
@ -1,20 +1,53 @@
|
||||
; RUN: llc < %s | FileCheck %s --check-prefix=CHECK --check-prefix=SAFE
|
||||
; RUN: llc < %s -enable-unsafe-fp-math | FileCheck %s --check-prefix=CHECK --check-prefix=UNSAFE
|
||||
; RUN: llc < %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64--"
|
||||
|
||||
; CHECK-LABEL: double_rounding_safe:
|
||||
; CHECK: callq __trunctfdf2
|
||||
; CHECK-NEXT: cvtsd2ss %xmm0
|
||||
define void @double_rounding_safe(ptr %x, ptr %f) {
|
||||
entry:
|
||||
%x.fp128 = load fp128, ptr %x, align 16
|
||||
%x.double = fptrunc fp128 %x.fp128 to double
|
||||
%x.float = fptrunc double %x.double to float
|
||||
store float %x.float, ptr %f, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: double_rounding_contract_fst:
|
||||
; CHECK: callq __trunctfdf2
|
||||
; CHECK-NEXT: cvtsd2ss %xmm0
|
||||
define void @double_rounding_contract_fst(ptr %x, ptr %f) {
|
||||
entry:
|
||||
%x.fp128 = load fp128, ptr %x, align 16
|
||||
%x.double = fptrunc contract fp128 %x.fp128 to double
|
||||
%x.float = fptrunc double %x.double to float
|
||||
store float %x.float, ptr %f, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: double_rounding_contract_snd:
|
||||
; CHECK: callq __trunctfdf2
|
||||
; CHECK-NEXT: cvtsd2ss %xmm0
|
||||
define void @double_rounding_contract_snd(ptr %x, ptr %f) {
|
||||
entry:
|
||||
%x.fp128 = load fp128, ptr %x, align 16
|
||||
%x.double = fptrunc fp128 %x.fp128 to double
|
||||
%x.float = fptrunc contract double %x.double to float
|
||||
store float %x.float, ptr %f, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: double_rounding:
|
||||
; SAFE: callq __trunctfdf2
|
||||
; SAFE-NEXT: cvtsd2ss %xmm0
|
||||
; UNSAFE: callq __trunctfsf2
|
||||
; UNSAFE-NOT: cvt
|
||||
; CHECK: callq __trunctfsf2
|
||||
; CHECK-NOT: cvt
|
||||
define void @double_rounding(ptr %x, ptr %f) {
|
||||
entry:
|
||||
%0 = load fp128, ptr %x, align 16
|
||||
%1 = fptrunc fp128 %0 to double
|
||||
%2 = fptrunc double %1 to float
|
||||
store float %2, ptr %f, align 4
|
||||
%x.fp128 = load fp128, ptr %x, align 16
|
||||
%x.double = fptrunc contract fp128 %x.fp128 to double
|
||||
%x.float = fptrunc contract double %x.double to float
|
||||
store float %x.float, ptr %f, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
|
69
llvm/test/Transforms/InstCombine/inttoptr-add-phi.ll
Normal file
69
llvm/test/Transforms/InstCombine/inttoptr-add-phi.ll
Normal file
@ -0,0 +1,69 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
|
||||
; RUN: opt -passes=instcombine -S %s | FileCheck %s
|
||||
|
||||
define i64 @inttoptr_used_by_phi_with_ptrtoint(i1 %c, ptr %src, ptr %p2) {
|
||||
; CHECK-LABEL: define i64 @inttoptr_used_by_phi_with_ptrtoint(
|
||||
; CHECK-SAME: i1 [[C:%.*]], ptr [[SRC:%.*]], ptr [[P2:%.*]]) {
|
||||
; CHECK-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[ELSE:.*]]
|
||||
; CHECK: [[THEN]]:
|
||||
; CHECK-NEXT: [[P:%.*]] = getelementptr i8, ptr [[SRC]], i64 10
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P]] to i64
|
||||
; CHECK-NEXT: br label %[[EXIT:.*]]
|
||||
; CHECK: [[ELSE]]:
|
||||
; CHECK-NEXT: br label %[[EXIT]]
|
||||
; CHECK: [[EXIT]]:
|
||||
; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ [[TMP1]], %[[THEN]] ], [ 0, %[[ELSE]] ]
|
||||
; CHECK-NEXT: ret i64 [[PHI]]
|
||||
;
|
||||
%i = ptrtoint ptr %src to i64
|
||||
%a = add i64 %i, 10
|
||||
%p = inttoptr i64 %a to ptr
|
||||
br i1 %c, label %then, label %else
|
||||
|
||||
then:
|
||||
br label %exit
|
||||
|
||||
else:
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
%phi = phi ptr [ %p, %then ], [ null, %else ]
|
||||
%i.2 = ptrtoint ptr %phi to i64
|
||||
ret i64 %i.2
|
||||
}
|
||||
|
||||
declare void @foo(ptr)
|
||||
|
||||
define i64 @inttoptr_used_by_phi_with_other_users(i1 %c, ptr %src, ptr %p2) {
|
||||
; CHECK-LABEL: define i64 @inttoptr_used_by_phi_with_other_users(
|
||||
; CHECK-SAME: i1 [[C:%.*]], ptr [[SRC:%.*]], ptr [[P2:%.*]]) {
|
||||
; CHECK-NEXT: [[I:%.*]] = ptrtoint ptr [[SRC]] to i64
|
||||
; CHECK-NEXT: [[A:%.*]] = add i64 [[I]], 10
|
||||
; CHECK-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[ELSE:.*]]
|
||||
; CHECK: [[THEN]]:
|
||||
; CHECK-NEXT: br label %[[EXIT:.*]]
|
||||
; CHECK: [[ELSE]]:
|
||||
; CHECK-NEXT: br label %[[EXIT]]
|
||||
; CHECK: [[EXIT]]:
|
||||
; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ [[A]], %[[THEN]] ], [ 0, %[[ELSE]] ]
|
||||
; CHECK-NEXT: [[P:%.*]] = inttoptr i64 [[A]] to ptr
|
||||
; CHECK-NEXT: call void @foo(ptr [[P]])
|
||||
; CHECK-NEXT: ret i64 [[PHI]]
|
||||
;
|
||||
%i = ptrtoint ptr %src to i64
|
||||
%a = add i64 %i, 10
|
||||
%p = inttoptr i64 %a to ptr
|
||||
br i1 %c, label %then, label %else
|
||||
|
||||
then:
|
||||
br label %exit
|
||||
|
||||
else:
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
%phi = phi ptr [ %p, %then ], [ null, %else ]
|
||||
call void @foo(ptr %p)
|
||||
%i.2 = ptrtoint ptr %phi to i64
|
||||
ret i64 %i.2
|
||||
}
|
@ -9,27 +9,34 @@ define i64 @std_find_i16_constant_offset_with_assumptions(ptr %first.coerce, i16
|
||||
; CHECK-NEXT: [[ENTRY:.*]]:
|
||||
; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[FIRST_COERCE]], i64 2) ]
|
||||
; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[FIRST_COERCE]], i64 256) ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[FIRST_COERCE]] to i64
|
||||
; CHECK-NEXT: [[COERCE_VAL_PI_I:%.*]] = add i64 [[TMP0]], 256
|
||||
; CHECK-NEXT: [[COERCE_VAL_IP:%.*]] = inttoptr i64 [[COERCE_VAL_PI_I]] to ptr
|
||||
; CHECK-NEXT: [[CMP_NOT6_I_I:%.*]] = icmp eq ptr [[FIRST_COERCE]], [[COERCE_VAL_IP]]
|
||||
; CHECK-NEXT: br i1 [[CMP_NOT6_I_I]], label %[[RETURN:.*]], label %[[LOOP_HEADER:.*]]
|
||||
; CHECK: [[LOOP_HEADER]]:
|
||||
; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[FIRST_COERCE]], %[[ENTRY]] ]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[PTR_IV]], align 2
|
||||
; CHECK-NEXT: [[CMP2_I_I:%.*]] = icmp eq i16 [[TMP1]], [[S]]
|
||||
; CHECK-NEXT: br i1 [[CMP2_I_I]], label %[[RETURN_LOOPEXIT:.*]], label %[[LOOP_LATCH]]
|
||||
; CHECK: [[LOOP_LATCH]]:
|
||||
; CHECK-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 2
|
||||
; CHECK-NEXT: [[CMP_NOT_I_I:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[COERCE_VAL_IP]]
|
||||
; CHECK-NEXT: br i1 [[CMP_NOT_I_I]], label %[[RETURN_LOOPEXIT]], label %[[LOOP_HEADER]]
|
||||
; CHECK: [[RETURN_LOOPEXIT]]:
|
||||
; CHECK-NEXT: [[MERGE_PH:%.*]] = phi ptr [ [[COERCE_VAL_IP]], %[[LOOP_LATCH]] ], [ [[PTR_IV]], %[[LOOP_HEADER]] ]
|
||||
; CHECK-NEXT: [[DOTPRE:%.*]] = ptrtoint ptr [[MERGE_PH]] to i64
|
||||
; CHECK-NEXT: [[COERCE_VAL_IP:%.*]] = getelementptr i8, ptr [[FIRST_COERCE]], i64 256
|
||||
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[S]], i64 0
|
||||
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
|
||||
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[FIRST_COERCE]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[NEXT_GEP]], align 2
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <8 x i16> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
|
||||
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i1> [[TMP0]] to i8
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i8 [[TMP1]], 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP2]], [[TMP3]]
|
||||
; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
||||
; CHECK: [[MIDDLE_SPLIT]]:
|
||||
; CHECK-NEXT: br i1 [[TMP2]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[RETURN:.*]]
|
||||
; CHECK: [[VECTOR_EARLY_EXIT]]:
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = tail call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP0]], i1 true)
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = shl i64 [[TMP6]], 1
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[FIRST_COERCE]], i64 [[TMP7]]
|
||||
; CHECK-NEXT: br label %[[RETURN]]
|
||||
; CHECK: [[RETURN]]:
|
||||
; CHECK-NEXT: [[RES_PRE_PHI:%.*]] = phi i64 [ [[DOTPRE]], %[[RETURN_LOOPEXIT]] ], [ [[TMP0]], %[[ENTRY]] ]
|
||||
; CHECK-NEXT: ret i64 [[RES_PRE_PHI]]
|
||||
; CHECK-NEXT: [[__FIRST_ADDR_0_LCSSA_I_I_PH:%.*]] = phi ptr [ [[TMP8]], %[[VECTOR_EARLY_EXIT]] ], [ [[COERCE_VAL_IP]], %[[MIDDLE_SPLIT]] ]
|
||||
; CHECK-NEXT: [[DOTPRE:%.*]] = ptrtoint ptr [[__FIRST_ADDR_0_LCSSA_I_I_PH]] to i64
|
||||
; CHECK-NEXT: ret i64 [[DOTPRE]]
|
||||
;
|
||||
entry:
|
||||
%first = alloca { ptr }, align 8
|
||||
@ -71,27 +78,21 @@ define i64 @std_find_i16_constant_offset_no_assumptions(ptr %first.coerce, i16 n
|
||||
; CHECK-LABEL: define i64 @std_find_i16_constant_offset_no_assumptions(
|
||||
; CHECK-SAME: ptr [[FIRST_COERCE:%.*]], i16 noundef signext [[S:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {
|
||||
; CHECK-NEXT: [[ENTRY:.*]]:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[FIRST_COERCE]] to i64
|
||||
; CHECK-NEXT: [[COERCE_VAL_PI_I:%.*]] = add i64 [[TMP0]], 256
|
||||
; CHECK-NEXT: [[COERCE_VAL_IP:%.*]] = inttoptr i64 [[COERCE_VAL_PI_I]] to ptr
|
||||
; CHECK-NEXT: [[CMP_NOT6_I_I:%.*]] = icmp eq ptr [[FIRST_COERCE]], [[COERCE_VAL_IP]]
|
||||
; CHECK-NEXT: br i1 [[CMP_NOT6_I_I]], label %[[RETURN:.*]], label %[[LOOP_HEADER:.*]]
|
||||
; CHECK-NEXT: [[COERCE_VAL_IP:%.*]] = getelementptr i8, ptr [[FIRST_COERCE]], i64 256
|
||||
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
|
||||
; CHECK: [[LOOP_HEADER]]:
|
||||
; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[FIRST_COERCE]], %[[ENTRY]] ]
|
||||
; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[FIRST_COERCE]], %[[ENTRY]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[PTR_IV]], align 2
|
||||
; CHECK-NEXT: [[CMP2_I_I:%.*]] = icmp eq i16 [[TMP1]], [[S]]
|
||||
; CHECK-NEXT: br i1 [[CMP2_I_I]], label %[[RETURN_LOOPEXIT:.*]], label %[[LOOP_LATCH]]
|
||||
; CHECK-NEXT: br i1 [[CMP2_I_I]], label %[[RETURN:.*]], label %[[LOOP_LATCH]]
|
||||
; CHECK: [[LOOP_LATCH]]:
|
||||
; CHECK-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 2
|
||||
; CHECK-NEXT: [[CMP_NOT_I_I:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[COERCE_VAL_IP]]
|
||||
; CHECK-NEXT: br i1 [[CMP_NOT_I_I]], label %[[RETURN_LOOPEXIT]], label %[[LOOP_HEADER]]
|
||||
; CHECK: [[RETURN_LOOPEXIT]]:
|
||||
; CHECK-NEXT: br i1 [[CMP_NOT_I_I]], label %[[RETURN]], label %[[LOOP_HEADER]]
|
||||
; CHECK: [[RETURN]]:
|
||||
; CHECK-NEXT: [[MERGE_PH:%.*]] = phi ptr [ [[COERCE_VAL_IP]], %[[LOOP_LATCH]] ], [ [[PTR_IV]], %[[LOOP_HEADER]] ]
|
||||
; CHECK-NEXT: [[DOTPRE:%.*]] = ptrtoint ptr [[MERGE_PH]] to i64
|
||||
; CHECK-NEXT: br label %[[RETURN]]
|
||||
; CHECK: [[RETURN]]:
|
||||
; CHECK-NEXT: [[RES_PRE_PHI:%.*]] = phi i64 [ [[DOTPRE]], %[[RETURN_LOOPEXIT]] ], [ [[TMP0]], %[[ENTRY]] ]
|
||||
; CHECK-NEXT: ret i64 [[RES_PRE_PHI]]
|
||||
; CHECK-NEXT: ret i64 [[DOTPRE]]
|
||||
;
|
||||
entry:
|
||||
%first = alloca { ptr }, align 8
|
||||
@ -128,3 +129,8 @@ return:
|
||||
}
|
||||
|
||||
declare void @llvm.assume(i1 noundef)
|
||||
;.
|
||||
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
|
||||
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
|
||||
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
|
||||
;.
|
||||
|
@ -311,7 +311,7 @@ entry:
|
||||
define ptr @nuw_inbounds_implies_nuw_inbounds_trunc_nuw(ptr %p, i128 %i) {
|
||||
; CHECK-LABEL: @nuw_inbounds_implies_nuw_inbounds_trunc_nuw(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = trunc nuw i128 [[I:%.*]] to i64
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = trunc i128 [[I:%.*]] to i64
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i32, ptr [[P:%.*]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 4
|
||||
; CHECK-NEXT: ret ptr [[ARRAYIDX2]]
|
||||
|
@ -0,0 +1,32 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
|
||||
; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -passes=separate-const-offset-from-gep -S | FileCheck %s
|
||||
|
||||
; Verify that we drop "nuw" from trunc.
|
||||
define ptr @pr154116_nuw(ptr %p, i128 %i) {
|
||||
; CHECK-LABEL: define ptr @pr154116_nuw(
|
||||
; CHECK-SAME: ptr [[P:%.*]], i128 [[I:%.*]]) #[[ATTR0:[0-9]+]] {
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = trunc i128 [[I]] to i64
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[P]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr i8, ptr [[TMP2]], i64 80
|
||||
; CHECK-NEXT: ret ptr [[ARRAYIDX2]]
|
||||
;
|
||||
%idx = add i128 %i, 20
|
||||
%idx.conv = trunc nuw i128 %idx to i64
|
||||
%arrayidx = getelementptr i32, ptr %p, i64 %idx.conv
|
||||
ret ptr %arrayidx
|
||||
}
|
||||
|
||||
; Verify that we drop "nsw" from trunc.
|
||||
define ptr @pr154116_nsw(ptr %p, i128 %i) {
|
||||
; CHECK-LABEL: define ptr @pr154116_nsw(
|
||||
; CHECK-SAME: ptr [[P:%.*]], i128 [[I:%.*]]) #[[ATTR0]] {
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = trunc i128 [[I]] to i64
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[P]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr i8, ptr [[TMP2]], i64 4
|
||||
; CHECK-NEXT: ret ptr [[ARRAYIDX2]]
|
||||
;
|
||||
%idx = add i128 %i, 1
|
||||
%idx.conv = trunc nsw i128 %idx to i64
|
||||
%arrayidx = getelementptr i32, ptr %p, i64 %idx.conv
|
||||
ret ptr %arrayidx
|
||||
}
|
@ -10,6 +10,7 @@
|
||||
#include "AMDGPUUnitTests.h"
|
||||
#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
|
||||
#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
|
||||
#include "llvm/CodeGen/MachineModuleInfo.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
@ -120,7 +120,7 @@ void ASTDumper::dump(NumberExprAST *num) {
|
||||
/// [ [ 1, 2 ], [ 3, 4 ] ]
|
||||
/// We print out such array with the dimensions spelled out at every level:
|
||||
/// <2,2>[<2>[ 1, 2 ], <2>[ 3, 4 ] ]
|
||||
void printLitHelper(ExprAST *litOrNum) {
|
||||
static void printLitHelper(ExprAST *litOrNum) {
|
||||
// Inside a literal expression we can have either a number or another literal
|
||||
if (auto *num = llvm::dyn_cast<NumberExprAST>(litOrNum)) {
|
||||
llvm::errs() << num->getValue();
|
||||
|
@ -39,7 +39,8 @@ static cl::opt<enum Action>
|
||||
cl::values(clEnumValN(DumpAST, "ast", "output the AST dump")));
|
||||
|
||||
/// Returns a Toy AST resulting from parsing the file or a nullptr on error.
|
||||
std::unique_ptr<toy::ModuleAST> parseInputFile(llvm::StringRef filename) {
|
||||
static std::unique_ptr<toy::ModuleAST>
|
||||
parseInputFile(llvm::StringRef filename) {
|
||||
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> fileOrErr =
|
||||
llvm::MemoryBuffer::getFileOrSTDIN(filename);
|
||||
if (std::error_code ec = fileOrErr.getError()) {
|
||||
|
@ -120,7 +120,7 @@ void ASTDumper::dump(NumberExprAST *num) {
|
||||
/// [ [ 1, 2 ], [ 3, 4 ] ]
|
||||
/// We print out such array with the dimensions spelled out at every level:
|
||||
/// <2,2>[<2>[ 1, 2 ], <2>[ 3, 4 ] ]
|
||||
void printLitHelper(ExprAST *litOrNum) {
|
||||
static void printLitHelper(ExprAST *litOrNum) {
|
||||
// Inside a literal expression we can have either a number or another literal
|
||||
if (auto *num = llvm::dyn_cast<NumberExprAST>(litOrNum)) {
|
||||
llvm::errs() << num->getValue();
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user