From 590bb3e8e63af0fb46eadf510761bd00e264c018 Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Mon, 24 Nov 2025 14:08:50 -0800 Subject: [PATCH] [AArch64] Improve host feature detection. (#160410) SVE depends on a combination of host support and operating system support. Sometimes those don't line up with detected host CPU name; make sure SVE is disabled when it isn't available. Implement this for both Windows and Linux. (We don't have a codepath for other operating systems. If someone wants to implement this, it should be possible to adapt fmv code from compiler-rt.) While I'm here, also add support for detecting other Windows CPU features. For Windows, declare constants ourselves so the code builds on older SDKs; we also do this in compiler-rt. --- llvm/lib/TargetParser/Host.cpp | 65 ++++++++++++++++++++++++++++++++-- 1 file changed, 63 insertions(+), 2 deletions(-) diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp index 3f9f69549f2d..cb793d60a286 100644 --- a/llvm/lib/TargetParser/Host.cpp +++ b/llvm/lib/TargetParser/Host.cpp @@ -2277,20 +2277,81 @@ StringMap sys::getHostCPUFeatures() { uint32_t Sha2 = CAP_SHA1 | CAP_SHA2; Features["aes"] = (crypto & Aes) == Aes; Features["sha2"] = (crypto & Sha2) == Sha2; + + // Even if an underlying core supports SVE, it might not be available if + // it's disabled by the OS, or some other layer. Disable SVE if we don't + // detect support at runtime. + if (!Features.contains("sve")) + Features["sve"] = false; #endif return Features; } #elif defined(_WIN32) && (defined(__aarch64__) || defined(_M_ARM64) || \ defined(__arm64ec__) || defined(_M_ARM64EC)) +#ifndef PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE +#define PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE 43 +#endif +#ifndef PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE +#define PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE 44 +#endif +#ifndef PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE +#define PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE 45 +#endif +#ifndef PF_ARM_SVE_INSTRUCTIONS_AVAILABLE +#define PF_ARM_SVE_INSTRUCTIONS_AVAILABLE 46 +#endif +#ifndef PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE +#define PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE 47 +#endif +#ifndef PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE +#define PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE 50 +#endif +#ifndef PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE +#define PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE 55 +#endif +#ifndef PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE +#define PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE 56 +#endif +#ifndef PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE +#define PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE 57 +#endif +#ifndef PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE +#define PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE 58 +#endif +#ifndef PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE +#define PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE 59 +#endif StringMap sys::getHostCPUFeatures() { StringMap Features; // If we're asking the OS at runtime, believe what the OS says - Features["neon"] = - IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE); Features["crc"] = IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE); + Features["lse"] = + IsProcessorFeaturePresent(PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE); + Features["dotprod"] = + IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE); + Features["jsconv"] = + IsProcessorFeaturePresent(PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE); + Features["rcpc"] = + IsProcessorFeaturePresent(PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE); + Features["sve"] = + IsProcessorFeaturePresent(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE); + Features["sve2"] = + IsProcessorFeaturePresent(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE); + Features["sve-aes"] = + IsProcessorFeaturePresent(PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE); + Features["sve-sha3"] = + IsProcessorFeaturePresent(PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE); + Features["sve-sm4"] = + IsProcessorFeaturePresent(PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE); + Features["f32mm"] = + IsProcessorFeaturePresent(PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE); + Features["f64mm"] = + IsProcessorFeaturePresent(PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE); + Features["i8mm"] = + IsProcessorFeaturePresent(PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE); // Avoid inferring "crypto" means more than the traditional AES + SHA2 bool TradCrypto =