[Headers][doc] Add misc non-AVX2 intrinsic descriptions

Adds descriptions for adxintrin.h, bmi2intrin.h, clflushoptintrin.h,
clzerointrin.h, rdseedintrin.h, and xsavecintrin.h.

Differential revision: https://reviews.llvm.org/D155859
This commit is contained in:
Paul Robinson 2023-07-20 08:56:39 -07:00
parent 5876eee1ff
commit 69593aa5c0
6 changed files with 448 additions and 6 deletions

View File

@ -28,7 +28,31 @@
extern "C" {
#endif
/* Intrinsics that are available only if __ADX__ defined */
/* Intrinsics that are available only if __ADX__ is defined. */
/// Adds unsigned 32-bit integers \a __x and \a __y, plus 0 or 1 as indicated
/// by the carry flag \a __cf. Stores the unsigned 32-bit sum in the memory
/// at \a __p, and returns the 8-bit carry-out (carry flag).
///
/// \code{.operation}
/// temp := (__cf == 0) ? 0 : 1
/// Store32(__p, __x + __y + temp)
/// result := CF
/// \endcode
///
/// \headerfile <immintrin.h>
///
/// This intrinsic corresponds to the \c ADCX instruction.
///
/// \param __cf
/// The 8-bit unsigned carry flag; any non-zero value indicates carry.
/// \param __x
/// A 32-bit unsigned addend.
/// \param __y
/// A 32-bit unsigned addend.
/// \param __p
/// Pointer to memory for storing the sum.
/// \returns The 8-bit unsigned carry-out value.
__INLINE unsigned char
__attribute__((__always_inline__, __nodebug__, __target__("adx")))
_addcarryx_u32(unsigned char __cf, unsigned int __x, unsigned int __y,
@ -37,6 +61,29 @@ __INLINE unsigned char
}
#ifdef __x86_64__
/// Adds unsigned 64-bit integers \a __x and \a __y, plus 0 or 1 as indicated
/// by the carry flag \a __cf. Stores the unsigned 64-bit sum in the memory
/// at \a __p, and returns the 8-bit carry-out (carry flag).
///
/// \code{.operation}
/// temp := (__cf == 0) ? 0 : 1
/// Store64(__p, __x + __y + temp)
/// result := CF
/// \endcode
///
/// \headerfile <immintrin.h>
///
/// This intrinsic corresponds to the \c ADCX instruction.
///
/// \param __cf
/// The 8-bit unsigned carry flag; any non-zero value indicates carry.
/// \param __x
/// A 64-bit unsigned addend.
/// \param __y
/// A 64-bit unsigned addend.
/// \param __p
/// Pointer to memory for storing the sum.
/// \returns The 8-bit unsigned carry-out value.
__INLINE unsigned char
__attribute__((__always_inline__, __nodebug__, __target__("adx")))
_addcarryx_u64(unsigned char __cf, unsigned long long __x,
@ -45,7 +92,31 @@ __INLINE unsigned char
}
#endif
/* Intrinsics that are also available if __ADX__ undefined */
/* Intrinsics that are also available if __ADX__ is undefined. */
/// Adds unsigned 32-bit integers \a __x and \a __y, plus 0 or 1 as indicated
/// by the carry flag \a __cf. Stores the unsigned 32-bit sum in the memory
/// at \a __p, and returns the 8-bit carry-out (carry flag).
///
/// \code{.operation}
/// temp := (__cf == 0) ? 0 : 1
/// Store32(__p, __x + __y + temp)
/// result := CF
/// \endcode
///
/// \headerfile <immintrin.h>
///
/// This intrinsic corresponds to the \c ADC instruction.
///
/// \param __cf
/// The 8-bit unsigned carry flag; any non-zero value indicates carry.
/// \param __x
/// A 32-bit unsigned addend.
/// \param __y
/// A 32-bit unsigned addend.
/// \param __p
/// Pointer to memory for storing the sum.
/// \returns The 8-bit unsigned carry-out value.
__INLINE unsigned char __DEFAULT_FN_ATTRS _addcarry_u32(unsigned char __cf,
unsigned int __x,
unsigned int __y,
@ -54,6 +125,29 @@ __INLINE unsigned char __DEFAULT_FN_ATTRS _addcarry_u32(unsigned char __cf,
}
#ifdef __x86_64__
/// Adds unsigned 64-bit integers \a __x and \a __y, plus 0 or 1 as indicated
/// by the carry flag \a __cf. Stores the unsigned 64-bit sum in the memory
/// at \a __p, and returns the 8-bit carry-out (carry flag).
///
/// \code{.operation}
/// temp := (__cf == 0) ? 0 : 1
/// Store64(__p, __x + __y + temp)
/// result := CF
/// \endcode
///
/// \headerfile <immintrin.h>
///
/// This intrinsic corresponds to the \c ADC instruction.
///
/// \param __cf
/// The 8-bit unsigned carry flag; any non-zero value indicates carry.
/// \param __x
/// A 64-bit unsigned addend.
/// \param __y
/// A 64-bit unsigned addend.
/// \param __p
/// Pointer to memory for storing the sum.
/// \returns The 8-bit unsigned carry-out value.
__INLINE unsigned char __DEFAULT_FN_ATTRS
_addcarry_u64(unsigned char __cf, unsigned long long __x,
unsigned long long __y, unsigned long long *__p) {
@ -61,6 +155,30 @@ _addcarry_u64(unsigned char __cf, unsigned long long __x,
}
#endif
/// Adds unsigned 32-bit integer \a __y to 0 or 1 as indicated by the carry
/// flag \a __cf, and subtracts the result from unsigned 32-bit integer
/// \a __x. Stores the unsigned 32-bit difference in the memory at \a __p,
/// and returns the 8-bit carry-out (carry or overflow flag).
///
/// \code{.operation}
/// temp := (__cf == 0) ? 0 : 1
/// Store32(__p, __x - (__y + temp))
/// result := CF
/// \endcode
///
/// \headerfile <immintrin.h>
///
/// This intrinsic corresponds to the \c SBB instruction.
///
/// \param __cf
/// The 8-bit unsigned carry flag; any non-zero value indicates carry.
/// \param __x
/// The 32-bit unsigned minuend.
/// \param __y
/// The 32-bit unsigned subtrahend.
/// \param __p
/// Pointer to memory for storing the difference.
/// \returns The 8-bit unsigned carry-out value.
__INLINE unsigned char __DEFAULT_FN_ATTRS _subborrow_u32(unsigned char __cf,
unsigned int __x,
unsigned int __y,
@ -69,6 +187,30 @@ __INLINE unsigned char __DEFAULT_FN_ATTRS _subborrow_u32(unsigned char __cf,
}
#ifdef __x86_64__
/// Adds unsigned 64-bit integer \a __y to 0 or 1 as indicated by the carry
/// flag \a __cf, and subtracts the result from unsigned 64-bit integer
/// \a __x. Stores the unsigned 64-bit difference in the memory at \a __p,
/// and returns the 8-bit carry-out (carry or overflow flag).
///
/// \code{.operation}
/// temp := (__cf == 0) ? 0 : 1
/// Store64(__p, __x - (__y + temp))
/// result := CF
/// \endcode
///
/// \headerfile <immintrin.h>
///
/// This intrinsic corresponds to the \c ADC instruction.
///
/// \param __cf
/// The 8-bit unsigned carry flag; any non-zero value indicates carry.
/// \param __x
/// The 64-bit unsigned minuend.
/// \param __y
/// The 64-bit unsigned subtrahend.
/// \param __p
/// Pointer to memory for storing the difference.
/// \returns The 8-bit unsigned carry-out value.
__INLINE unsigned char __DEFAULT_FN_ATTRS
_subborrow_u64(unsigned char __cf, unsigned long long __x,
unsigned long long __y, unsigned long long *__p) {

View File

@ -17,24 +17,112 @@
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("bmi2")))
/// Copies the unsigned 32-bit integer \a __X and zeroes the upper bits
/// starting at bit number \a __Y.
///
/// \code{.operation}
/// i := __Y[7:0]
/// result := __X
/// IF i < 32
/// result[31:i] := 0
/// FI
/// \endcode
///
/// \headerfile <immintrin.h>
///
/// This intrinsic corresponds to the \c BZHI instruction.
///
/// \param __X
/// The 32-bit source value to copy.
/// \param __Y
/// The lower 8 bits specify the bit number of the lowest bit to zero.
/// \returns The partially zeroed 32-bit value.
static __inline__ unsigned int __DEFAULT_FN_ATTRS
_bzhi_u32(unsigned int __X, unsigned int __Y)
{
return __builtin_ia32_bzhi_si(__X, __Y);
}
/// Deposit (scatter) low-order bits from the unsigned 32-bit integer \a __X
/// into the 32-bit result, according to the mask in the unsigned 32-bit
/// integer \a __Y. All other bits of the result are zero.
///
/// \code{.operation}
/// i := 0
/// result := 0
/// FOR m := 0 TO 31
/// IF __Y[m] == 1
/// result[m] := __X[i]
/// i := i + 1
/// ENDIF
/// ENDFOR
/// \endcode
///
/// \headerfile <immintrin.h>
///
/// This intrinsic corresponds to the \c PDEP instruction.
///
/// \param __X
/// The 32-bit source value to copy.
/// \param __Y
/// The 32-bit mask specifying where to deposit source bits.
/// \returns The 32-bit result.
static __inline__ unsigned int __DEFAULT_FN_ATTRS
_pdep_u32(unsigned int __X, unsigned int __Y)
{
return __builtin_ia32_pdep_si(__X, __Y);
}
/// Extract (gather) bits from the unsigned 32-bit integer \a __X into the
/// low-order bits of the 32-bit result, according to the mask in the
/// unsigned 32-bit integer \a __Y. All other bits of the result are zero.
///
/// \code{.operation}
/// i := 0
/// result := 0
/// FOR m := 0 TO 31
/// IF __Y[m] == 1
/// result[i] := __X[m]
/// i := i + 1
/// ENDIF
/// ENDFOR
/// \endcode
///
/// \headerfile <immintrin.h>
///
/// This intrinsic corresponds to the \c PEXT instruction.
///
/// \param __X
/// The 32-bit source value to copy.
/// \param __Y
/// The 32-bit mask specifying which source bits to extract.
/// \returns The 32-bit result.
static __inline__ unsigned int __DEFAULT_FN_ATTRS
_pext_u32(unsigned int __X, unsigned int __Y)
{
return __builtin_ia32_pext_si(__X, __Y);
}
/// Multiplies the unsigned 32-bit integers \a __X and \a __Y to form a
/// 64-bit product. Stores the upper 32 bits of the product in the
/// memory at \a __P and returns the lower 32 bits.
///
/// \code{.operation}
/// Store32(__P, (__X * __Y)[63:32])
/// result := (__X * __Y)[31:0]
/// \endcode
///
/// \headerfile <immintrin.h>
///
/// This intrinsic corresponds to the \c MULX instruction.
///
/// \param __X
/// An unsigned 32-bit multiplicand.
/// \param __Y
/// An unsigned 32-bit multiplicand.
/// \param __P
/// A pointer to memory for storing the upper half of the product.
/// \returns The lower half of the product.
static __inline__ unsigned int __DEFAULT_FN_ATTRS
_mulx_u32(unsigned int __X, unsigned int __Y, unsigned int *__P)
{
@ -45,24 +133,112 @@ _mulx_u32(unsigned int __X, unsigned int __Y, unsigned int *__P)
#ifdef __x86_64__
/// Copies the unsigned 64-bit integer \a __X and zeroes the upper bits
/// starting at bit number \a __Y.
///
/// \code{.operation}
/// i := __Y[7:0]
/// result := __X
/// IF i < 64
/// result[63:i] := 0
/// FI
/// \endcode
///
/// \headerfile <immintrin.h>
///
/// This intrinsic corresponds to the \c BZHI instruction.
///
/// \param __X
/// The 64-bit source value to copy.
/// \param __Y
/// The lower 8 bits specify the bit number of the lowest bit to zero.
/// \returns The partially zeroed 64-bit value.
static __inline__ unsigned long long __DEFAULT_FN_ATTRS
_bzhi_u64(unsigned long long __X, unsigned long long __Y)
{
return __builtin_ia32_bzhi_di(__X, __Y);
}
/// Deposit (scatter) low-order bits from the unsigned 64-bit integer \a __X
/// into the 64-bit result, according to the mask in the unsigned 64-bit
/// integer \a __Y. All other bits of the result are zero.
///
/// \code{.operation}
/// i := 0
/// result := 0
/// FOR m := 0 TO 63
/// IF __Y[m] == 1
/// result[m] := __X[i]
/// i := i + 1
/// ENDIF
/// ENDFOR
/// \endcode
///
/// \headerfile <immintrin.h>
///
/// This intrinsic corresponds to the \c PDEP instruction.
///
/// \param __X
/// The 64-bit source value to copy.
/// \param __Y
/// The 64-bit mask specifying where to deposit source bits.
/// \returns The 64-bit result.
static __inline__ unsigned long long __DEFAULT_FN_ATTRS
_pdep_u64(unsigned long long __X, unsigned long long __Y)
{
return __builtin_ia32_pdep_di(__X, __Y);
}
/// Extract (gather) bits from the unsigned 64-bit integer \a __X into the
/// low-order bits of the 64-bit result, according to the mask in the
/// unsigned 64-bit integer \a __Y. All other bits of the result are zero.
///
/// \code{.operation}
/// i := 0
/// result := 0
/// FOR m := 0 TO 63
/// IF __Y[m] == 1
/// result[i] := __X[m]
/// i := i + 1
/// ENDIF
/// ENDFOR
/// \endcode
///
/// \headerfile <immintrin.h>
///
/// This intrinsic corresponds to the \c PEXT instruction.
///
/// \param __X
/// The 64-bit source value to copy.
/// \param __Y
/// The 64-bit mask specifying which source bits to extract.
/// \returns The 64-bit result.
static __inline__ unsigned long long __DEFAULT_FN_ATTRS
_pext_u64(unsigned long long __X, unsigned long long __Y)
{
return __builtin_ia32_pext_di(__X, __Y);
}
/// Multiplies the unsigned 64-bit integers \a __X and \a __Y to form a
/// 128-bit product. Stores the upper 64 bits of the product to the
/// memory addressed by \a __P and returns the lower 64 bits.
///
/// \code{.operation}
/// Store64(__P, (__X * __Y)[127:64])
/// result := (__X * __Y)[63:0]
/// \endcode
///
/// \headerfile <immintrin.h>
///
/// This intrinsic corresponds to the \c MULX instruction.
///
/// \param __X
/// An unsigned 64-bit multiplicand.
/// \param __Y
/// An unsigned 64-bit multiplicand.
/// \param __P
/// A pointer to memory for storing the upper half of the product.
/// \returns The lower half of the product.
static __inline__ unsigned long long __DEFAULT_FN_ATTRS
_mulx_u64 (unsigned long long __X, unsigned long long __Y,
unsigned long long *__P)

View File

@ -17,6 +17,15 @@
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("clflushopt")))
/// Invalidates all levels of the cache hierarchy and flushes modified data to
/// memory for the cache line specified by the address \a __m.
///
/// \headerfile <immintrin.h>
///
/// This intrinsic corresponds to the \c CLFLUSHOPT instruction.
///
/// \param __m
/// An address within the cache line to flush and invalidate.
static __inline__ void __DEFAULT_FN_ATTRS
_mm_clflushopt(void const * __m) {
__builtin_ia32_clflushopt(__m);

View File

@ -17,14 +17,16 @@
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, __target__("clzero")))
/// Loads the cache line address and zero's out the cacheline
/// Zeroes out the cache line for the address \a __line. This uses a
/// non-temporal store. Calling \c _mm_sfence() afterward might be needed
/// to enforce ordering.
///
/// \headerfile <clzerointrin.h>
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> CLZERO </c> instruction.
/// This intrinsic corresponds to the \c CLZERO instruction.
///
/// \param __line
/// A pointer to a cacheline which needs to be zeroed out.
/// An address within the cache line to zero out.
static __inline__ void __DEFAULT_FN_ATTRS
_mm_clzero (void * __line)
{

View File

@ -17,12 +17,54 @@
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("rdseed")))
/// Stores a hardware-generated 16-bit random value in the memory at \a __p.
///
/// The random number generator complies with NIST SP800-90B and SP800-90C.
///
/// \code{.operation}
/// IF HW_NRND_GEN.ready == 1
/// Store16(__p, HW_NRND_GEN.data)
/// result := 1
/// ELSE
/// Store16(__p, 0)
/// result := 0
/// END
/// \endcode
///
/// \headerfile <immintrin.h>
///
/// This intrinsic corresponds to the \c RDSEED instruction.
///
/// \param __p
/// Pointer to memory for storing the 16-bit random number.
/// \returns 1 if a random number was generated, 0 if not.
static __inline__ int __DEFAULT_FN_ATTRS
_rdseed16_step(unsigned short *__p)
{
return (int) __builtin_ia32_rdseed16_step(__p);
}
/// Stores a hardware-generated 32-bit random value in the memory at \a __p.
///
/// The random number generator complies with NIST SP800-90B and SP800-90C.
///
/// \code{.operation}
/// IF HW_NRND_GEN.ready == 1
/// Store32(__p, HW_NRND_GEN.data)
/// result := 1
/// ELSE
/// Store32(__p, 0)
/// result := 0
/// END
/// \endcode
///
/// \headerfile <immintrin.h>
///
/// This intrinsic corresponds to the \c RDSEED instruction.
///
/// \param __p
/// Pointer to memory for storing the 32-bit random number.
/// \returns 1 if a random number was generated, 0 if not.
static __inline__ int __DEFAULT_FN_ATTRS
_rdseed32_step(unsigned int *__p)
{
@ -30,6 +72,27 @@ _rdseed32_step(unsigned int *__p)
}
#ifdef __x86_64__
/// Stores a hardware-generated 64-bit random value in the memory at \a __p.
///
/// The random number generator complies with NIST SP800-90B and SP800-90C.
///
/// \code{.operation}
/// IF HW_NRND_GEN.ready == 1
/// Store64(__p, HW_NRND_GEN.data)
/// result := 1
/// ELSE
/// Store64(__p, 0)
/// result := 0
/// END
/// \endcode
///
/// \headerfile <immintrin.h>
///
/// This intrinsic corresponds to the \c RDSEED instruction.
///
/// \param __p
/// Pointer to memory for storing the 64-bit random number.
/// \returns 1 if a random number was generated, 0 if not.
static __inline__ int __DEFAULT_FN_ATTRS
_rdseed64_step(unsigned long long *__p)
{

View File

@ -17,12 +17,62 @@
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("xsavec")))
/// Performs a full or partial save of processor state to the memory at
/// \a __p. The exact state saved depends on the 64-bit mask \a __m and
/// processor control register \c XCR0.
///
/// \code{.operation}
/// mask[62:0] := __m[62:0] AND XCR0[62:0]
/// FOR i := 0 TO 62
/// IF mask[i] == 1
/// CASE (i) OF
/// 0: save X87 FPU state
/// 1: save SSE state
/// DEFAULT: __p.Ext_Save_Area[i] := ProcessorState[i]
/// FI
/// ENDFOR
/// __p.Header.XSTATE_BV[62:0] := INIT_FUNCTION(mask[62:0])
/// \endcode
///
/// \headerfile <immintrin.h>
///
/// This intrinsic corresponds to the \c XSAVEC instruction.
///
/// \param __p
/// Pointer to the save area; must be 64-byte aligned.
/// \param __m
/// A 64-bit mask indicating what state should be saved.
static __inline__ void __DEFAULT_FN_ATTRS
_xsavec(void *__p, unsigned long long __m) {
__builtin_ia32_xsavec(__p, __m);
}
#ifdef __x86_64__
/// Performs a full or partial save of processor state to the memory at
/// \a __p. The exact state saved depends on the 64-bit mask \a __m and
/// processor control register \c XCR0.
///
/// \code{.operation}
/// mask[62:0] := __m[62:0] AND XCR0[62:0]
/// FOR i := 0 TO 62
/// IF mask[i] == 1
/// CASE (i) OF
/// 0: save X87 FPU state
/// 1: save SSE state
/// DEFAULT: __p.Ext_Save_Area[i] := ProcessorState[i]
/// FI
/// ENDFOR
/// __p.Header.XSTATE_BV[62:0] := INIT_FUNCTION(mask[62:0])
/// \endcode
///
/// \headerfile <immintrin.h>
///
/// This intrinsic corresponds to the \c XSAVEC64 instruction.
///
/// \param __p
/// Pointer to the save area; must be 64-byte aligned.
/// \param __m
/// A 64-bit mask indicating what state should be saved.
static __inline__ void __DEFAULT_FN_ATTRS
_xsavec64(void *__p, unsigned long long __m) {
__builtin_ia32_xsavec64(__p, __m);