[X86] Support "f16c" and "avx512fp16" for __builtin_cpu_supports (#78384)

This resolves issue #65320.
This also supports clarify sapphirerapids and cooperlake for
cpu_specific/dispatch.
This commit is contained in:
Freddy Ye 2024-01-18 09:22:04 +08:00 committed by GitHub
parent aa02002491
commit f3a4de395c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 16 additions and 10 deletions

View File

@ -82,6 +82,8 @@ void verifyfeaturestrings(void) {
(void)__builtin_cpu_supports("avx512bitalg");
(void)__builtin_cpu_supports("avx512bf16");
(void)__builtin_cpu_supports("avx512vp2intersect");
(void)__builtin_cpu_supports("f16c");
(void)__builtin_cpu_supports("avx512fp16");
}
void verifycpustrings(void) {

View File

@ -80,8 +80,8 @@ OutOfLineDefs::foo(int, int, int) {
// LINUX: define dso_local noundef i32 @_ZN13OutOfLineDefs3fooEiii.S
// LINUX: define dso_local noundef i32 @_ZN13OutOfLineDefs3fooEiii.R
// LINUX: define weak_odr ptr @_ZN13OutOfLineDefs3fooEiii.resolver()
// LINUX: ret ptr @_ZN13OutOfLineDefs3fooEiii.R
// LINUX: ret ptr @_ZN13OutOfLineDefs3fooEiii.S
// LINUX: ret ptr @_ZN13OutOfLineDefs3fooEiii.R
// LINUX: ret ptr @_ZN13OutOfLineDefs3fooEiii.O
// LINUX: call void @llvm.trap
// LINUX: define linkonce_odr noundef i32 @_ZN13OutOfLineDefs3fooEiii.O
@ -89,8 +89,8 @@ OutOfLineDefs::foo(int, int, int) {
// WINDOWS: define dso_local noundef i32 @"?foo@OutOfLineDefs@@QEAAHHHH@Z.S"
// WINDOWS: define dso_local noundef i32 @"?foo@OutOfLineDefs@@QEAAHHHH@Z.R"
// WINDOWS: define weak_odr dso_local i32 @"?foo@OutOfLineDefs@@QEAAHHHH@Z"(ptr %0, i32 %1, i32 %2, i32 %3)
// WINDOWS: musttail call i32 @"?foo@OutOfLineDefs@@QEAAHHHH@Z.R"(ptr %0, i32 %1, i32 %2, i32 %3)
// WINDOWS: musttail call i32 @"?foo@OutOfLineDefs@@QEAAHHHH@Z.S"(ptr %0, i32 %1, i32 %2, i32 %3)
// WINDOWS: musttail call i32 @"?foo@OutOfLineDefs@@QEAAHHHH@Z.R"(ptr %0, i32 %1, i32 %2, i32 %3)
// WINDOWS: musttail call i32 @"?foo@OutOfLineDefs@@QEAAHHHH@Z.O"(ptr %0, i32 %1, i32 %2, i32 %3)
// WINDOWS: call void @llvm.trap
// WINDOWS: define linkonce_odr dso_local noundef i32 @"?foo@OutOfLineDefs@@QEAAHHHH@Z.O"

View File

@ -148,7 +148,8 @@ enum ProcessorFeatures {
FEATURE_LZCNT,
FEATURE_MOVBE,
FEATURE_X86_64_BASELINE = 95,
FEATURE_AVX512FP16 = 94,
FEATURE_X86_64_BASELINE,
FEATURE_X86_64_V2,
FEATURE_X86_64_V3,
FEATURE_X86_64_V4,
@ -812,6 +813,8 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
setFeature(FEATURE_AVX5124FMAPS);
if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save)
setFeature(FEATURE_AVX512VP2INTERSECT);
if (HasLeaf7 && ((EDX >> 23) & 1) && HasAVX512Save)
setFeature(FEATURE_AVX512FP16);
// EAX from subleaf 0 is the maximum subleaf supported. Some CPUs don't
// return all 0s for invalid subleaves so check the limit.

View File

@ -122,6 +122,7 @@ X86_CPU_SUBTYPE_ALIAS(INTEL_COREI7_ALDERLAKE, "gracemont")
//
// We cannot just re-sort the list though because its order is dictated by the
// order of bits in CodeGenFunction::GetX86CpuSupportsMask.
// We cannot re-adjust the position of X86_FEATURE_COMPAT at the whole list.
#ifndef X86_FEATURE_COMPAT
#define X86_FEATURE_COMPAT(ENUM, STR, PRIORITY) X86_FEATURE(ENUM, STR)
#endif
@ -184,12 +185,12 @@ X86_FEATURE (AMX_TILE, "amx-tile")
X86_FEATURE (CLDEMOTE, "cldemote")
X86_FEATURE (CLFLUSHOPT, "clflushopt")
X86_FEATURE (CLWB, "clwb")
X86_FEATURE_COMPAT(F16C, "f16c", 38)
X86_FEATURE (CLZERO, "clzero")
X86_FEATURE (CMPXCHG16B, "cx16")
X86_FEATURE (CMPXCHG8B, "cx8")
X86_FEATURE (CRC32, "crc32")
X86_FEATURE (ENQCMD, "enqcmd")
X86_FEATURE (F16C, "f16c")
X86_FEATURE (FSGSBASE, "fsgsbase")
X86_FEATURE (FXSR, "fxsr")
X86_FEATURE (INVPCID, "invpcid")
@ -229,9 +230,9 @@ X86_FEATURE (XSAVE, "xsave")
X86_FEATURE (XSAVEC, "xsavec")
X86_FEATURE (XSAVEOPT, "xsaveopt")
X86_FEATURE (XSAVES, "xsaves")
X86_FEATURE_COMPAT(AVX512FP16, "avx512fp16", 39)
X86_FEATURE (HRESET, "hreset")
X86_FEATURE (RAOINT, "raoint")
X86_FEATURE (AVX512FP16, "avx512fp16")
X86_FEATURE (AMX_FP16, "amx-fp16")
X86_FEATURE (CMPCCXADD, "cmpccxadd")
X86_FEATURE (AVXNECONVERT, "avxneconvert")

View File

@ -347,7 +347,7 @@ constexpr ProcInfo Processors[] = {
// Tigerlake microarchitecture based processors.
{ {"tigerlake"}, CK_Tigerlake, FEATURE_AVX512VP2INTERSECT, FeaturesTigerlake, 'l', false },
// Sapphire Rapids microarchitecture based processors.
{ {"sapphirerapids"}, CK_SapphireRapids, FEATURE_AVX512BF16, FeaturesSapphireRapids, 'n', false },
{ {"sapphirerapids"}, CK_SapphireRapids, FEATURE_AVX512FP16, FeaturesSapphireRapids, 'n', false },
// Alderlake microarchitecture based processors.
{ {"alderlake"}, CK_Alderlake, FEATURE_AVX2, FeaturesAlderlake, 'p', false },
// Raptorlake microarchitecture based processors.
@ -369,12 +369,12 @@ constexpr ProcInfo Processors[] = {
// Grandridge microarchitecture based processors.
{ {"grandridge"}, CK_Grandridge, FEATURE_AVX2, FeaturesSierraforest, 'p', false },
// Granite Rapids microarchitecture based processors.
{ {"graniterapids"}, CK_Graniterapids, FEATURE_AVX512BF16, FeaturesGraniteRapids, 'n', false },
{ {"graniterapids"}, CK_Graniterapids, FEATURE_AVX512FP16, FeaturesGraniteRapids, 'n', false },
// Granite Rapids D microarchitecture based processors.
{ {"graniterapids-d"}, CK_GraniterapidsD, FEATURE_AVX512BF16, FeaturesGraniteRapids | FeatureAMX_COMPLEX, '\0', false },
{ {"graniterapids_d"}, CK_GraniterapidsD, FEATURE_AVX512BF16, FeaturesGraniteRapids | FeatureAMX_COMPLEX, 'n', true },
{ {"graniterapids-d"}, CK_GraniterapidsD, FEATURE_AVX512FP16, FeaturesGraniteRapids | FeatureAMX_COMPLEX, '\0', false },
{ {"graniterapids_d"}, CK_GraniterapidsD, FEATURE_AVX512FP16, FeaturesGraniteRapids | FeatureAMX_COMPLEX, 'n', true },
// Emerald Rapids microarchitecture based processors.
{ {"emeraldrapids"}, CK_Emeraldrapids, FEATURE_AVX512BF16, FeaturesSapphireRapids, 'n', false },
{ {"emeraldrapids"}, CK_Emeraldrapids, FEATURE_AVX512FP16, FeaturesSapphireRapids, 'n', false },
// Clearwaterforest microarchitecture based processors.
{ {"clearwaterforest"}, CK_Lunarlake, FEATURE_AVX2, FeaturesClearwaterforest, 'p', false },
// Knights Landing processor.