DobieStation/ext/libdeflate/lib/x86/cpu_features.c
a dinosaur a491dce51a CISO reading support (#123)
This changes up the build system quite a bit.
2019-02-25 18:51:46 -05:00

140 lines
4 KiB
C

/*
* x86/cpu_features.c - feature detection for x86 processors
*
* Copyright 2016 Eric Biggers
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "cpu_features.h"
#if X86_CPU_FEATURES_ENABLED
volatile u32 _cpu_features = 0;
/* With old GCC versions we have to manually save and restore the x86_32 PIC
* register (ebx). See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=47602 */
#if defined(__i386__) && defined(__PIC__)
# define EBX_CONSTRAINT "=r"
#else
# define EBX_CONSTRAINT "=b"
#endif
/* Execute the CPUID instruction. */
static inline void
cpuid(u32 leaf, u32 subleaf, u32 *a, u32 *b, u32 *c, u32 *d)
{
__asm__(".ifnc %%ebx, %1; mov %%ebx, %1; .endif\n"
"cpuid \n"
".ifnc %%ebx, %1; xchg %%ebx, %1; .endif\n"
: "=a" (*a), EBX_CONSTRAINT (*b), "=c" (*c), "=d" (*d)
: "a" (leaf), "c" (subleaf));
}
/* Read an extended control register. */
static inline u64
read_xcr(u32 index)
{
u32 edx, eax;
/* Execute the "xgetbv" instruction. Old versions of binutils do not
* recognize this instruction, so list the raw bytes instead. */
__asm__ (".byte 0x0f, 0x01, 0xd0" : "=d" (edx), "=a" (eax) : "c" (index));
return ((u64)edx << 32) | eax;
}
#undef BIT
#define BIT(nr) (1UL << (nr))
#define XCR0_BIT_SSE BIT(1)
#define XCR0_BIT_AVX BIT(2)
#define XCR0_BIT_OPMASK BIT(5)
#define XCR0_BIT_ZMM_HI256 BIT(6)
#define XCR0_BIT_HI16_ZMM BIT(7)
#define IS_SET(reg, nr) ((reg) & BIT(nr))
#define IS_ALL_SET(reg, mask) (((reg) & (mask)) == (mask))
/* Initialize _cpu_features with bits for interesting processor features. */
void setup_cpu_features(void)
{
u32 features = 0;
u32 dummy1, dummy2, dummy3, dummy4;
u32 max_function;
u32 features_1, features_2, features_3, features_4;
bool os_avx_support = false;
bool os_avx512_support = false;
/* Get maximum supported function */
cpuid(0, 0, &max_function, &dummy2, &dummy3, &dummy4);
if (max_function < 1)
goto out;
/* Standard feature flags */
cpuid(1, 0, &dummy1, &dummy2, &features_2, &features_1);
if (IS_SET(features_1, 26))
features |= X86_CPU_FEATURE_SSE2;
if (IS_SET(features_2, 1))
features |= X86_CPU_FEATURE_PCLMULQDQ;
if (IS_SET(features_2, 27)) { /* OSXSAVE set? */
u64 xcr0 = read_xcr(0);
os_avx_support = IS_ALL_SET(xcr0,
XCR0_BIT_SSE |
XCR0_BIT_AVX);
os_avx512_support = IS_ALL_SET(xcr0,
XCR0_BIT_SSE |
XCR0_BIT_AVX |
XCR0_BIT_OPMASK |
XCR0_BIT_ZMM_HI256 |
XCR0_BIT_HI16_ZMM);
}
if (os_avx_support && IS_SET(features_2, 28))
features |= X86_CPU_FEATURE_AVX;
if (max_function < 7)
goto out;
/* Extended feature flags */
cpuid(7, 0, &dummy1, &features_3, &features_4, &dummy4);
if (os_avx_support && IS_SET(features_3, 5))
features |= X86_CPU_FEATURE_AVX2;
if (IS_SET(features_3, 8))
features |= X86_CPU_FEATURE_BMI2;
if (os_avx512_support && IS_SET(features_3, 30))
features |= X86_CPU_FEATURE_AVX512BW;
out:
_cpu_features = features | X86_CPU_FEATURES_KNOWN;
}
#endif /* X86_CPU_FEATURES_ENABLED */