[Headers][doc] Add misc non-AVX2 intrinsic descriptions

Adds descriptions for adxintrin.h, bmi2intrin.h, clflushoptintrin.h, clzerointrin.h, rdseedintrin.h, and xsavecintrin.h. Differential revision: https://reviews.llvm.org/D155859
2023-07-20 08:56:39 -07:00 · 2023-07-20 08:56:39 -07:00 · 69593aa5c0
commit 69593aa5c0
parent 5876eee1ff
6 changed files with 448 additions and 6 deletions
--- a/clang/lib/Headers/adxintrin.h
+++ b/clang/lib/Headers/adxintrin.h
@ -28,7 +28,31 @@
 extern "C" {
 #endif

-/* Intrinsics that are available only if __ADX__ defined */
+/* Intrinsics that are available only if __ADX__ is defined. */
+
+/// Adds unsigned 32-bit integers \a __x and \a __y, plus 0 or 1 as indicated
+///    by the carry flag \a __cf. Stores the unsigned 32-bit sum in the memory
+///    at \a __p, and returns the 8-bit carry-out (carry flag).
+///
+/// \code{.operation}
+/// temp := (__cf == 0) ? 0 : 1
+/// Store32(__p, __x + __y + temp)
+/// result := CF
+/// \endcode
+///
+/// \headerfile <immintrin.h>
+///
+/// This intrinsic corresponds to the \c ADCX instruction.
+///
+/// \param __cf
+///    The 8-bit unsigned carry flag; any non-zero value indicates carry.
+/// \param __x
+///    A 32-bit unsigned addend.
+/// \param __y
+///    A 32-bit unsigned addend.
+/// \param __p
+///    Pointer to memory for storing the sum.
+/// \returns The 8-bit unsigned carry-out value.
 __INLINE unsigned char
    __attribute__((__always_inline__, __nodebug__, __target__("adx")))
    _addcarryx_u32(unsigned char __cf, unsigned int __x, unsigned int __y,
@ -37,6 +61,29 @@ __INLINE unsigned char
 }

 #ifdef __x86_64__
+/// Adds unsigned 64-bit integers \a __x and \a __y, plus 0 or 1 as indicated
+///    by the carry flag \a __cf. Stores the unsigned 64-bit sum in the memory
+///    at \a __p, and returns the 8-bit carry-out (carry flag).
+///
+/// \code{.operation}
+/// temp := (__cf == 0) ? 0 : 1
+/// Store64(__p, __x + __y + temp)
+/// result := CF
+/// \endcode
+///
+/// \headerfile <immintrin.h>
+///
+/// This intrinsic corresponds to the \c ADCX instruction.
+///
+/// \param __cf
+///    The 8-bit unsigned carry flag; any non-zero value indicates carry.
+/// \param __x
+///    A 64-bit unsigned addend.
+/// \param __y
+///    A 64-bit unsigned addend.
+/// \param __p
+///    Pointer to memory for storing the sum.
+/// \returns The 8-bit unsigned carry-out value.
 __INLINE unsigned char
    __attribute__((__always_inline__, __nodebug__, __target__("adx")))
    _addcarryx_u64(unsigned char __cf, unsigned long long __x,
@ -45,7 +92,31 @@ __INLINE unsigned char
 }
 #endif

-/* Intrinsics that are also available if __ADX__ undefined */
+/* Intrinsics that are also available if __ADX__ is undefined. */
+
+/// Adds unsigned 32-bit integers \a __x and \a __y, plus 0 or 1 as indicated
+///    by the carry flag \a __cf. Stores the unsigned 32-bit sum in the memory
+///    at \a __p, and returns the 8-bit carry-out (carry flag).
+///
+/// \code{.operation}
+/// temp := (__cf == 0) ? 0 : 1
+/// Store32(__p, __x + __y + temp)
+/// result := CF
+/// \endcode
+///
+/// \headerfile <immintrin.h>
+///
+/// This intrinsic corresponds to the \c ADC instruction.
+///
+/// \param __cf
+///    The 8-bit unsigned carry flag; any non-zero value indicates carry.
+/// \param __x
+///    A 32-bit unsigned addend.
+/// \param __y
+///    A 32-bit unsigned addend.
+/// \param __p
+///    Pointer to memory for storing the sum.
+/// \returns The 8-bit unsigned carry-out value.
 __INLINE unsigned char __DEFAULT_FN_ATTRS _addcarry_u32(unsigned char __cf,
                                                        unsigned int __x,
                                                        unsigned int __y,
@ -54,6 +125,29 @@ __INLINE unsigned char __DEFAULT_FN_ATTRS _addcarry_u32(unsigned char __cf,
 }

 #ifdef __x86_64__
+/// Adds unsigned 64-bit integers \a __x and \a __y, plus 0 or 1 as indicated
+///    by the carry flag \a __cf. Stores the unsigned 64-bit sum in the memory
+///    at \a __p, and returns the 8-bit carry-out (carry flag).
+///
+/// \code{.operation}
+/// temp := (__cf == 0) ? 0 : 1
+/// Store64(__p, __x + __y + temp)
+/// result := CF
+/// \endcode
+///
+/// \headerfile <immintrin.h>
+///
+/// This intrinsic corresponds to the \c ADC instruction.
+///
+/// \param __cf
+///    The 8-bit unsigned carry flag; any non-zero value indicates carry.
+/// \param __x
+///    A 64-bit unsigned addend.
+/// \param __y
+///    A 64-bit unsigned addend.
+/// \param __p
+///    Pointer to memory for storing the sum.
+/// \returns The 8-bit unsigned carry-out value.
 __INLINE unsigned char __DEFAULT_FN_ATTRS
 _addcarry_u64(unsigned char __cf, unsigned long long __x,
              unsigned long long __y, unsigned long long *__p) {
@ -61,6 +155,30 @@ _addcarry_u64(unsigned char __cf, unsigned long long __x,
 }
 #endif

+/// Adds unsigned 32-bit integer \a __y to 0 or 1 as indicated by the carry
+///    flag \a __cf, and subtracts the result from unsigned 32-bit integer
+///    \a __x. Stores the unsigned 32-bit difference in the memory at \a __p,
+///    and returns the 8-bit carry-out (carry or overflow flag).
+///
+/// \code{.operation}
+/// temp := (__cf == 0) ? 0 : 1
+/// Store32(__p, __x - (__y + temp))
+/// result := CF
+/// \endcode
+///
+/// \headerfile <immintrin.h>
+///
+/// This intrinsic corresponds to the \c SBB instruction.
+///
+/// \param __cf
+///    The 8-bit unsigned carry flag; any non-zero value indicates carry.
+/// \param __x
+///    The 32-bit unsigned minuend.
+/// \param __y
+///    The 32-bit unsigned subtrahend.
+/// \param __p
+///    Pointer to memory for storing the difference.
+/// \returns The 8-bit unsigned carry-out value.
 __INLINE unsigned char __DEFAULT_FN_ATTRS _subborrow_u32(unsigned char __cf,
                                                         unsigned int __x,
                                                         unsigned int __y,
@ -69,6 +187,30 @@ __INLINE unsigned char __DEFAULT_FN_ATTRS _subborrow_u32(unsigned char __cf,
 }

 #ifdef __x86_64__
+/// Adds unsigned 64-bit integer \a __y to 0 or 1 as indicated by the carry
+///    flag \a __cf, and subtracts the result from unsigned 64-bit integer
+///    \a __x. Stores the unsigned 64-bit difference in the memory at \a __p,
+///    and returns the 8-bit carry-out (carry or overflow flag).
+///
+/// \code{.operation}
+/// temp := (__cf == 0) ? 0 : 1
+/// Store64(__p, __x - (__y + temp))
+/// result := CF
+/// \endcode
+///
+/// \headerfile <immintrin.h>
+///
+/// This intrinsic corresponds to the \c ADC instruction.
+///
+/// \param __cf
+///    The 8-bit unsigned carry flag; any non-zero value indicates carry.
+/// \param __x
+///    The 64-bit unsigned minuend.
+/// \param __y
+///    The 64-bit unsigned subtrahend.
+/// \param __p
+///    Pointer to memory for storing the difference.
+/// \returns The 8-bit unsigned carry-out value.
 __INLINE unsigned char __DEFAULT_FN_ATTRS
 _subborrow_u64(unsigned char __cf, unsigned long long __x,
               unsigned long long __y, unsigned long long *__p) {
--- a/clang/lib/Headers/bmi2intrin.h
+++ b/clang/lib/Headers/bmi2intrin.h
@ -17,24 +17,112 @@
 /* Define the default attributes for the functions in this file. */
 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("bmi2")))

+/// Copies the unsigned 32-bit integer \a __X and zeroes the upper bits
+///    starting at bit number \a __Y.
+///
+/// \code{.operation}
+/// i := __Y[7:0]
+/// result := __X
+/// IF i < 32
+///   result[31:i] := 0
+/// FI
+/// \endcode
+///
+/// \headerfile <immintrin.h>
+///
+/// This intrinsic corresponds to the \c BZHI instruction.
+///
+/// \param __X
+///    The 32-bit source value to copy.
+/// \param __Y
+///    The lower 8 bits specify the bit number of the lowest bit to zero.
+/// \returns The partially zeroed 32-bit value.
 static __inline__ unsigned int __DEFAULT_FN_ATTRS
 _bzhi_u32(unsigned int __X, unsigned int __Y)
 {
  return __builtin_ia32_bzhi_si(__X, __Y);
 }

+/// Deposit (scatter) low-order bits from the unsigned 32-bit integer \a __X
+///    into the 32-bit result, according to the mask in the unsigned 32-bit
+///    integer \a __Y. All other bits of the result are zero.
+///
+/// \code{.operation}
+/// i := 0
+/// result := 0
+/// FOR m := 0 TO 31
+///   IF __Y[m] == 1
+///     result[m] := __X[i]
+///     i := i + 1
+///   ENDIF
+/// ENDFOR
+/// \endcode
+///
+/// \headerfile <immintrin.h>
+///
+/// This intrinsic corresponds to the \c PDEP instruction.
+///
+/// \param __X
+///    The 32-bit source value to copy.
+/// \param __Y
+///    The 32-bit mask specifying where to deposit source bits.
+/// \returns The 32-bit result.
 static __inline__ unsigned int __DEFAULT_FN_ATTRS
 _pdep_u32(unsigned int __X, unsigned int __Y)
 {
  return __builtin_ia32_pdep_si(__X, __Y);
 }

+/// Extract (gather) bits from the unsigned 32-bit integer \a __X into the
+///    low-order bits of the 32-bit result, according to the mask in the
+///    unsigned 32-bit integer \a __Y. All other bits of the result are zero.
+///
+/// \code{.operation}
+/// i := 0
+/// result := 0
+/// FOR m := 0 TO 31
+///   IF __Y[m] == 1
+///     result[i] := __X[m]
+///     i := i + 1
+///   ENDIF
+/// ENDFOR
+/// \endcode
+///
+/// \headerfile <immintrin.h>
+///
+/// This intrinsic corresponds to the \c PEXT instruction.
+///
+/// \param __X
+///    The 32-bit source value to copy.
+/// \param __Y
+///    The 32-bit mask specifying which source bits to extract.
+/// \returns The 32-bit result.
 static __inline__ unsigned int __DEFAULT_FN_ATTRS
 _pext_u32(unsigned int __X, unsigned int __Y)
 {
  return __builtin_ia32_pext_si(__X, __Y);
 }

+/// Multiplies the unsigned 32-bit integers \a __X and \a __Y to form a
+///    64-bit product. Stores the upper 32 bits of the product in the
+///    memory at \a __P and returns the lower 32 bits.
+///
+/// \code{.operation}
+/// Store32(__P, (__X * __Y)[63:32])
+/// result := (__X * __Y)[31:0]
+/// \endcode
+///
+/// \headerfile <immintrin.h>
+///
+/// This intrinsic corresponds to the \c MULX instruction.
+///
+/// \param __X
+///    An unsigned 32-bit multiplicand.
+/// \param __Y
+///    An unsigned 32-bit multiplicand.
+/// \param __P
+///    A pointer to memory for storing the upper half of the product.
+/// \returns The lower half of the product.
 static __inline__ unsigned int __DEFAULT_FN_ATTRS
 _mulx_u32(unsigned int __X, unsigned int __Y, unsigned int *__P)
 {
@ -45,24 +133,112 @@ _mulx_u32(unsigned int __X, unsigned int __Y, unsigned int *__P)

 #ifdef  __x86_64__

+/// Copies the unsigned 64-bit integer \a __X and zeroes the upper bits
+///    starting at bit number \a __Y.
+///
+/// \code{.operation}
+/// i := __Y[7:0]
+/// result := __X
+/// IF i < 64
+///   result[63:i] := 0
+/// FI
+/// \endcode
+///
+/// \headerfile <immintrin.h>
+///
+/// This intrinsic corresponds to the \c BZHI instruction.
+///
+/// \param __X
+///    The 64-bit source value to copy.
+/// \param __Y
+///    The lower 8 bits specify the bit number of the lowest bit to zero.
+/// \returns The partially zeroed 64-bit value.
 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
 _bzhi_u64(unsigned long long __X, unsigned long long __Y)
 {
  return __builtin_ia32_bzhi_di(__X, __Y);
 }

+/// Deposit (scatter) low-order bits from the unsigned 64-bit integer \a __X
+///    into the 64-bit result, according to the mask in the unsigned 64-bit
+///    integer \a __Y. All other bits of the result are zero.
+///
+/// \code{.operation}
+/// i := 0
+/// result := 0
+/// FOR m := 0 TO 63
+///   IF __Y[m] == 1
+///     result[m] := __X[i]
+///     i := i + 1
+///   ENDIF
+/// ENDFOR
+/// \endcode
+///
+/// \headerfile <immintrin.h>
+///
+/// This intrinsic corresponds to the \c PDEP instruction.
+///
+/// \param __X
+///    The 64-bit source value to copy.
+/// \param __Y
+///    The 64-bit mask specifying where to deposit source bits.
+/// \returns The 64-bit result.
 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
 _pdep_u64(unsigned long long __X, unsigned long long __Y)
 {
  return __builtin_ia32_pdep_di(__X, __Y);
 }

+/// Extract (gather) bits from the unsigned 64-bit integer \a __X into the
+///    low-order bits of the 64-bit result, according to the mask in the
+///    unsigned 64-bit integer \a __Y. All other bits of the result are zero.
+///
+/// \code{.operation}
+/// i := 0
+/// result := 0
+/// FOR m := 0 TO 63
+///   IF __Y[m] == 1
+///     result[i] := __X[m]
+///     i := i + 1
+///   ENDIF
+/// ENDFOR
+/// \endcode
+///
+/// \headerfile <immintrin.h>
+///
+/// This intrinsic corresponds to the \c PEXT instruction.
+///
+/// \param __X
+///    The 64-bit source value to copy.
+/// \param __Y
+///    The 64-bit mask specifying which source bits to extract.
+/// \returns The 64-bit result.
 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
 _pext_u64(unsigned long long __X, unsigned long long __Y)
 {
  return __builtin_ia32_pext_di(__X, __Y);
 }

+/// Multiplies the unsigned 64-bit integers \a __X and \a __Y to form a
+///    128-bit product. Stores the upper 64 bits of the product to the
+///    memory addressed by \a __P and returns the lower 64 bits.
+///
+/// \code{.operation}
+/// Store64(__P, (__X * __Y)[127:64])
+/// result := (__X * __Y)[63:0]
+/// \endcode
+///
+/// \headerfile <immintrin.h>
+///
+/// This intrinsic corresponds to the \c MULX instruction.
+///
+/// \param __X
+///    An unsigned 64-bit multiplicand.
+/// \param __Y
+///    An unsigned 64-bit multiplicand.
+/// \param __P
+///    A pointer to memory for storing the upper half of the product.
+/// \returns The lower half of the product.
 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
 _mulx_u64 (unsigned long long __X, unsigned long long __Y,
 	   unsigned long long *__P)
--- a/clang/lib/Headers/clflushoptintrin.h
+++ b/clang/lib/Headers/clflushoptintrin.h
@ -17,6 +17,15 @@
 /* Define the default attributes for the functions in this file. */
 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__,  __target__("clflushopt")))

+/// Invalidates all levels of the cache hierarchy and flushes modified data to
+///    memory for the cache line specified by the address \a __m.
+///
+/// \headerfile <immintrin.h>
+///
+/// This intrinsic corresponds to the \c CLFLUSHOPT instruction.
+///
+/// \param __m
+///    An address within the cache line to flush and invalidate.
 static __inline__ void __DEFAULT_FN_ATTRS
 _mm_clflushopt(void const * __m) {
  __builtin_ia32_clflushopt(__m);
--- a/clang/lib/Headers/clzerointrin.h
+++ b/clang/lib/Headers/clzerointrin.h
@ -17,14 +17,16 @@
 #define __DEFAULT_FN_ATTRS \
  __attribute__((__always_inline__, __nodebug__,  __target__("clzero")))

-/// Loads the cache line address and zero's out the cacheline
+/// Zeroes out the cache line for the address \a __line. This uses a
+///    non-temporal store. Calling \c _mm_sfence() afterward might be needed
+///    to enforce ordering.
 ///
-/// \headerfile <clzerointrin.h>
+/// \headerfile <x86intrin.h>
 ///
-/// This intrinsic corresponds to the <c> CLZERO </c> instruction.
+/// This intrinsic corresponds to the \c CLZERO instruction.
 ///
 /// \param __line
-///    A pointer to a cacheline which needs to be zeroed out.
+///    An address within the cache line to zero out.
 static __inline__ void __DEFAULT_FN_ATTRS
 _mm_clzero (void * __line)
 {
--- a/clang/lib/Headers/rdseedintrin.h
+++ b/clang/lib/Headers/rdseedintrin.h
@ -17,12 +17,54 @@
 /* Define the default attributes for the functions in this file. */
 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("rdseed")))

+/// Stores a hardware-generated 16-bit random value in the memory at \a __p.
+///
+///    The random number generator complies with NIST SP800-90B and SP800-90C.
+///
+/// \code{.operation}
+/// IF HW_NRND_GEN.ready == 1
+///   Store16(__p, HW_NRND_GEN.data)
+///   result := 1
+/// ELSE
+///   Store16(__p, 0)
+///   result := 0
+/// END
+/// \endcode
+///
+/// \headerfile <immintrin.h>
+///
+/// This intrinsic corresponds to the \c RDSEED instruction.
+///
+/// \param __p
+///    Pointer to memory for storing the 16-bit random number.
+/// \returns 1 if a random number was generated, 0 if not.
 static __inline__ int __DEFAULT_FN_ATTRS
 _rdseed16_step(unsigned short *__p)
 {
  return (int) __builtin_ia32_rdseed16_step(__p);
 }

+/// Stores a hardware-generated 32-bit random value in the memory at \a __p.
+///
+///    The random number generator complies with NIST SP800-90B and SP800-90C.
+///
+/// \code{.operation}
+/// IF HW_NRND_GEN.ready == 1
+///   Store32(__p, HW_NRND_GEN.data)
+///   result := 1
+/// ELSE
+///   Store32(__p, 0)
+///   result := 0
+/// END
+/// \endcode
+///
+/// \headerfile <immintrin.h>
+///
+/// This intrinsic corresponds to the \c RDSEED instruction.
+///
+/// \param __p
+///    Pointer to memory for storing the 32-bit random number.
+/// \returns 1 if a random number was generated, 0 if not.
 static __inline__ int __DEFAULT_FN_ATTRS
 _rdseed32_step(unsigned int *__p)
 {
@ -30,6 +72,27 @@ _rdseed32_step(unsigned int *__p)
 }

 #ifdef __x86_64__
+/// Stores a hardware-generated 64-bit random value in the memory at \a __p.
+///
+///    The random number generator complies with NIST SP800-90B and SP800-90C.
+///
+/// \code{.operation}
+/// IF HW_NRND_GEN.ready == 1
+///   Store64(__p, HW_NRND_GEN.data)
+///   result := 1
+/// ELSE
+///   Store64(__p, 0)
+///   result := 0
+/// END
+/// \endcode
+///
+/// \headerfile <immintrin.h>
+///
+/// This intrinsic corresponds to the \c RDSEED instruction.
+///
+/// \param __p
+///    Pointer to memory for storing the 64-bit random number.
+/// \returns 1 if a random number was generated, 0 if not.
 static __inline__ int __DEFAULT_FN_ATTRS
 _rdseed64_step(unsigned long long *__p)
 {
--- a/clang/lib/Headers/xsavecintrin.h
+++ b/clang/lib/Headers/xsavecintrin.h
@ -17,12 +17,62 @@
 /* Define the default attributes for the functions in this file. */
 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__,  __target__("xsavec")))

+/// Performs a full or partial save of processor state to the memory at
+///    \a __p. The exact state saved depends on the 64-bit mask \a __m and
+///    processor control register \c XCR0.
+///
+/// \code{.operation}
+/// mask[62:0] := __m[62:0] AND XCR0[62:0]
+/// FOR i := 0 TO 62
+///   IF mask[i] == 1
+///     CASE (i) OF
+///     0: save X87 FPU state
+///     1: save SSE state
+///     DEFAULT: __p.Ext_Save_Area[i] := ProcessorState[i]
+///   FI
+/// ENDFOR
+/// __p.Header.XSTATE_BV[62:0] := INIT_FUNCTION(mask[62:0])
+/// \endcode
+///
+/// \headerfile <immintrin.h>
+///
+/// This intrinsic corresponds to the \c XSAVEC instruction.
+///
+/// \param __p
+///    Pointer to the save area; must be 64-byte aligned.
+/// \param __m
+///    A 64-bit mask indicating what state should be saved.
 static __inline__ void __DEFAULT_FN_ATTRS
 _xsavec(void *__p, unsigned long long __m) {
  __builtin_ia32_xsavec(__p, __m);
 }

 #ifdef __x86_64__
+/// Performs a full or partial save of processor state to the memory at
+///    \a __p. The exact state saved depends on the 64-bit mask \a __m and
+///    processor control register \c XCR0.
+///
+/// \code{.operation}
+/// mask[62:0] := __m[62:0] AND XCR0[62:0]
+/// FOR i := 0 TO 62
+///   IF mask[i] == 1
+///     CASE (i) OF
+///     0: save X87 FPU state
+///     1: save SSE state
+///     DEFAULT: __p.Ext_Save_Area[i] := ProcessorState[i]
+///   FI
+/// ENDFOR
+/// __p.Header.XSTATE_BV[62:0] := INIT_FUNCTION(mask[62:0])
+/// \endcode
+///
+/// \headerfile <immintrin.h>
+///
+/// This intrinsic corresponds to the \c XSAVEC64 instruction.
+///
+/// \param __p
+///    Pointer to the save area; must be 64-byte aligned.
+/// \param __m
+///    A 64-bit mask indicating what state should be saved.
 static __inline__ void __DEFAULT_FN_ATTRS
 _xsavec64(void *__p, unsigned long long __m) {
  __builtin_ia32_xsavec64(__p, __m);