diff --git a/Include/internal/pycore_cpuinfo.h b/Include/internal/pycore_cpuinfo.h new file mode 100644 index 00000000000000..eecc73736c5f44 --- /dev/null +++ b/Include/internal/pycore_cpuinfo.h @@ -0,0 +1,350 @@ +/* + * Interface for detecting the different CPUID flags in an opaque manner. + * See https://en.wikipedia.org/wiki/CPUID for details on the bit values. + * + * If a module requires to support SIMD instructions, it should determine + * the compiler flags and the instruction sets required for the intrinsics + * to work. + * + * For the headers and expected CPUID bits needed by Intel intrinsics, see + * https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html. + */ + +#ifndef Py_INTERNAL_CPUINFO_H +#define Py_INTERNAL_CPUINFO_H + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include "Python.h" + +/* + * The enumeration describes masks to apply on CPUID output registers. + * + * Member names are Py_CPUID_MASK__L[S]_, + * where <> (resp. []) denotes a required (resp. optional) group and: + * + * - REGISTER is EAX, EBX, ECX or EDX, + * - LEAF is the initial value of the EAX register (1 or 7), + * - SUBLEAF is the initial value of the ECX register (omitted if 0), and + * - FEATURE is a SIMD feature (with one or more specialized instructions). + * + * For maintainability, the flags are ordered by registers, leafs, subleafs, + * and bits. See https://en.wikipedia.org/wiki/CPUID for the values. + * + * Note 1: The LEAF is also called the 'page' or the 'level'. + * Note 2: The SUBLEAF is also referred to as the 'count'. + * + * The LEAF value should only 1 or 7 as other values may have different + * meanings depending on the underlying architecture. + */ +// fmt: off +typedef enum py_cpuid_feature_mask { +/*[python input] +# {(LEAF, SUBLEAF, REGISTRY): {FEATURE: BIT}} +data = { + (1, 0, 'ECX'): { + 'SSE3': 0, + 'PCLMULQDQ': 1, + 'SSSE3': 9, + 'FMA': 12, + 'SSE4_1': 19, + 'SSE4_2': 20, + 'POPCNT': 23, + 'XSAVE': 26, + 'OSXSAVE': 27, + 'AVX': 28, + }, + (1, 0, 'EDX'): { + 'CMOV': 15, + 'SSE': 25, + 'SSE2': 26, + }, + (7, 0, 'EBX'): { + 'AVX2': 5, + 'AVX512_F': 16, + 'AVX512_DQ': 17, + 'AVX512_IFMA': 21, + 'AVX512_PF': 26, + 'AVX512_ER': 27, + 'AVX512_CD': 28, + 'AVX512_BW': 30, + 'AVX512_VL': 31, + }, + (7, 0, 'ECX'): { + 'AVX512_VBMI': 1, + 'AVX512_VBMI2': 6, + 'AVX512_VNNI': 11, + 'AVX512_BITALG': 12, + 'AVX512_VPOPCNTDQ': 14, + }, + (7, 0, 'EDX'): { + 'AVX512_4VNNIW': 2, + 'AVX512_4FMAPS': 3, + 'AVX512_VP2INTERSECT': 8, + }, + (7, 1, 'EAX'): { + 'AVX_VNNI': 4, + 'AVX_IFMA': 23, + }, + (7, 1, 'EDX'): { + 'AVX_VNNI_INT8': 4, + 'AVX_NE_CONVERT': 5, + 'AVX_VNNI_INT16': 10, + }, +} + +def get_member_name(leaf, subleaf, registry, name): + node = f'L{leaf}S{subleaf}' if subleaf else f'L{leaf}' + return f'Py_CPUID_MASK_{registry}_{node}_{name}' + +def get_member_mask(bit): + val = format(1 << bit, '008x') + return f'= 0x{val},' + +# BUG(picnixz): Clinic does not like when commented lines have empty lines. +# so we use '::' for now to indicate an empty line. +# :: +# The enumeration is rendered as follows: +# :: +# = 0x, // bit = BIT +# ^ ^ ^ ^ ^ ^ ^ +# :: +# where ^ indicates a column that is a multiple of 4, has +# exactly 8 characters and has at most 2 characters. + +INDENT = ' ' * 4 +# BUG(picnixz): Clinic does not like when '/' and '*' are put together. +COMMENT = '/' + '* ' + +def next_block(w): + """Compute the smallest multiple of 4 strictly larger than *w*.""" + return ((w + 3) & ~0x03) if (w % 4) else (w + 4) + +NAMESIZE = next_block(max( + len(get_member_name(*group, name)) + for group, values in data.items() + for name in values +)) +MASKSIZE = 8 + next_block(len('= 0x,')) + +for group, values in data.items(): + title = 'CPUID (LEAF={}, SUBLEAF={}) [{}]'.format(*group) + print(INDENT, *COMMENT, title, *COMMENT[::-1], sep='') + for name, bit in values.items(): + assert name, f"invalid entry in {group}" + key = get_member_name(*group, name) + assert 0 <= bit < 32, f"invalid bit value for {name!r}" + val = get_member_mask(bit) + + member_name = key.ljust(NAMESIZE) + member_mask = val.ljust(MASKSIZE) + + print(INDENT, member_name, member_mask, f'// bit = {bit}', sep='') +[python start generated code]*/ + /* CPUID (LEAF=1, SUBLEAF=0) [ECX] */ + Py_CPUID_MASK_ECX_L1_SSE3 = 0x00000001, // bit = 0 + Py_CPUID_MASK_ECX_L1_PCLMULQDQ = 0x00000002, // bit = 1 + Py_CPUID_MASK_ECX_L1_SSSE3 = 0x00000200, // bit = 9 + Py_CPUID_MASK_ECX_L1_FMA = 0x00001000, // bit = 12 + Py_CPUID_MASK_ECX_L1_SSE4_1 = 0x00080000, // bit = 19 + Py_CPUID_MASK_ECX_L1_SSE4_2 = 0x00100000, // bit = 20 + Py_CPUID_MASK_ECX_L1_POPCNT = 0x00800000, // bit = 23 + Py_CPUID_MASK_ECX_L1_XSAVE = 0x04000000, // bit = 26 + Py_CPUID_MASK_ECX_L1_OSXSAVE = 0x08000000, // bit = 27 + Py_CPUID_MASK_ECX_L1_AVX = 0x10000000, // bit = 28 + /* CPUID (LEAF=1, SUBLEAF=0) [EDX] */ + Py_CPUID_MASK_EDX_L1_CMOV = 0x00008000, // bit = 15 + Py_CPUID_MASK_EDX_L1_SSE = 0x02000000, // bit = 25 + Py_CPUID_MASK_EDX_L1_SSE2 = 0x04000000, // bit = 26 + /* CPUID (LEAF=7, SUBLEAF=0) [EBX] */ + Py_CPUID_MASK_EBX_L7_AVX2 = 0x00000020, // bit = 5 + Py_CPUID_MASK_EBX_L7_AVX512_F = 0x00010000, // bit = 16 + Py_CPUID_MASK_EBX_L7_AVX512_DQ = 0x00020000, // bit = 17 + Py_CPUID_MASK_EBX_L7_AVX512_IFMA = 0x00200000, // bit = 21 + Py_CPUID_MASK_EBX_L7_AVX512_PF = 0x04000000, // bit = 26 + Py_CPUID_MASK_EBX_L7_AVX512_ER = 0x08000000, // bit = 27 + Py_CPUID_MASK_EBX_L7_AVX512_CD = 0x10000000, // bit = 28 + Py_CPUID_MASK_EBX_L7_AVX512_BW = 0x40000000, // bit = 30 + Py_CPUID_MASK_EBX_L7_AVX512_VL = 0x80000000, // bit = 31 + /* CPUID (LEAF=7, SUBLEAF=0) [ECX] */ + Py_CPUID_MASK_ECX_L7_AVX512_VBMI = 0x00000002, // bit = 1 + Py_CPUID_MASK_ECX_L7_AVX512_VBMI2 = 0x00000040, // bit = 6 + Py_CPUID_MASK_ECX_L7_AVX512_VNNI = 0x00000800, // bit = 11 + Py_CPUID_MASK_ECX_L7_AVX512_BITALG = 0x00001000, // bit = 12 + Py_CPUID_MASK_ECX_L7_AVX512_VPOPCNTDQ = 0x00004000, // bit = 14 + /* CPUID (LEAF=7, SUBLEAF=0) [EDX] */ + Py_CPUID_MASK_EDX_L7_AVX512_4VNNIW = 0x00000004, // bit = 2 + Py_CPUID_MASK_EDX_L7_AVX512_4FMAPS = 0x00000008, // bit = 3 + Py_CPUID_MASK_EDX_L7_AVX512_VP2INTERSECT = 0x00000100, // bit = 8 + /* CPUID (LEAF=7, SUBLEAF=1) [EAX] */ + Py_CPUID_MASK_EAX_L7S1_AVX_VNNI = 0x00000010, // bit = 4 + Py_CPUID_MASK_EAX_L7S1_AVX_IFMA = 0x00800000, // bit = 23 + /* CPUID (LEAF=7, SUBLEAF=1) [EDX] */ + Py_CPUID_MASK_EDX_L7S1_AVX_VNNI_INT8 = 0x00000010, // bit = 4 + Py_CPUID_MASK_EDX_L7S1_AVX_NE_CONVERT = 0x00000020, // bit = 5 + Py_CPUID_MASK_EDX_L7S1_AVX_VNNI_INT16 = 0x00000400, // bit = 10 +/*[python end generated code: output=e53c5376296af250 input=46c9e43c1f6f5cf9]*/ +} py_cpuid_feature_mask; +// fmt: on + +/* XSAVE state components (XCR0 control register) */ +typedef enum py_xsave_feature_mask { + Py_XSAVE_MASK_XCR0_SSE = 0x00000002, // bit = 1 + Py_XSAVE_MASK_XCR0_AVX = 0x00000004, // bit = 2 + Py_XSAVE_MASK_XCR0_AVX512_OPMASK = 0x00000020, // bit = 5 + Py_XSAVE_MASK_XCR0_AVX512_ZMM_HI256 = 0x00000040, // bit = 6 + Py_XSAVE_MASK_XCR0_AVX512_HI16_ZMM = 0x00000080, // bit = 7 +} py_xsave_feature_mask; + +typedef struct py_cpuid_features { + uint32_t maxleaf; + /* Macro to declare a member flag of 'py_cpuid_features' as a uint8_t. */ +#define _Py_CPUID_DECL_FLAG(MEMBER_NAME) uint8_t MEMBER_NAME:1 + // --- Streaming SIMD Extensions ------------------------------------------ + _Py_CPUID_DECL_FLAG(sse); + _Py_CPUID_DECL_FLAG(sse2); + _Py_CPUID_DECL_FLAG(sse3); + _Py_CPUID_DECL_FLAG(ssse3); // Supplemental SSE3 instructions + _Py_CPUID_DECL_FLAG(sse41); // SSE4.1 + _Py_CPUID_DECL_FLAG(sse42); // SSE4.2 + + // --- Advanced Vector Extensions ----------------------------------------- + _Py_CPUID_DECL_FLAG(avx); + _Py_CPUID_DECL_FLAG(avx_ifma); + _Py_CPUID_DECL_FLAG(avx_ne_convert); + + _Py_CPUID_DECL_FLAG(avx_vnni); + _Py_CPUID_DECL_FLAG(avx_vnni_int8); + _Py_CPUID_DECL_FLAG(avx_vnni_int16); + + // --- Advanced Vector Extensions 2 --------------------------------------- + _Py_CPUID_DECL_FLAG(avx2); + + // --- Advanced Vector Extensions (512-bit) ------------------------------- + /* + * AVX-512 instruction set are grouped by the processor generation + * that implements them (see https://en.wikipedia.org/wiki/AVX-512). + * + * We do not include GFNI, VPCLMULQDQ and VAES instructions since + * they are not exactly AVX-512 per se, nor do we include BF16 or + * FP16 since they operate on bfloat16 and binary16 (half-float). + * + * See https://en.wikipedia.org/wiki/AVX-512#Instruction_set for + * the suffix meanings (for instance 'f' stands for 'Foundation'). + */ + _Py_CPUID_DECL_FLAG(avx512_f); + _Py_CPUID_DECL_FLAG(avx512_cd); + + _Py_CPUID_DECL_FLAG(avx512_er); + _Py_CPUID_DECL_FLAG(avx512_pf); + + _Py_CPUID_DECL_FLAG(avx512_4fmaps); + _Py_CPUID_DECL_FLAG(avx512_4vnniw); + + _Py_CPUID_DECL_FLAG(avx512_vpopcntdq); + + _Py_CPUID_DECL_FLAG(avx512_vl); + _Py_CPUID_DECL_FLAG(avx512_dq); + _Py_CPUID_DECL_FLAG(avx512_bw); + + _Py_CPUID_DECL_FLAG(avx512_ifma); + _Py_CPUID_DECL_FLAG(avx512_vbmi); + + _Py_CPUID_DECL_FLAG(avx512_vnni); + + _Py_CPUID_DECL_FLAG(avx512_vbmi2); + _Py_CPUID_DECL_FLAG(avx512_bitalg); + + _Py_CPUID_DECL_FLAG(avx512_vp2intersect); + + // --- Instructions ------------------------------------------------------- + _Py_CPUID_DECL_FLAG(cmov); + _Py_CPUID_DECL_FLAG(fma); + _Py_CPUID_DECL_FLAG(popcnt); + _Py_CPUID_DECL_FLAG(pclmulqdq); + + _Py_CPUID_DECL_FLAG(xsave); // XSAVE/XRSTOR/XSETBV/XGETBV + _Py_CPUID_DECL_FLAG(osxsave); // XSAVE is enabled by the OS + + // --- XCR0 register bits ------------------------------------------------- + _Py_CPUID_DECL_FLAG(xcr0_sse); + // On some Intel CPUs, it is possible for the CPU to support AVX2 + // instructions even though the underlying OS does not know about + // AVX. In particular, only (SSE) XMM registers will be saved and + // restored on context-switch, but not (AVX) YMM registers. + _Py_CPUID_DECL_FLAG(xcr0_avx); + _Py_CPUID_DECL_FLAG(xcr0_avx512_opmask); + _Py_CPUID_DECL_FLAG(xcr0_avx512_zmm_hi256); + _Py_CPUID_DECL_FLAG(xcr0_avx512_hi16_zmm); +#undef _Py_CPUID_DECL_FLAG + // Whenever a field is added or removed above, update the + // number of fields (40) and adjust the bitsize of 'ready' + // so that the size of this structure is a multiple of 8. + uint8_t ready; // set if the structure is ready for usage +} py_cpuid_features; + +/* + * Explicitly initialize all members to zero to guarantee that + * we never have an un-initialized attribute at runtime which + * could lead to an illegal instruction error. + * + * This does not mark 'flags' as being ready yet. + * + * Note: This function does not set any exception and thus never fails. + */ +PyAPI_FUNC(void) +_Py_cpuid_disable_features(py_cpuid_features *flags); + +/* + * Check whether the structure is ready and flags are inter-compatible, + * returning 1 on success and 0 otherwise. + * + * The caller should disable all CPUID detected features if the check + * fails to avoid encountering runtime illegal instruction errors. + * + * Note: This function does not set any exception and thus never fails. + */ +PyAPI_FUNC(int) +_Py_cpuid_check_features(const py_cpuid_features *flags); + +/* + * Return 1 if all expected flags are set in 'actual', 0 otherwise. + * + * If 'actual' or 'expect' are not ready yet, this also returns 0. + * + * Note: This function does not set any exception and thus never fails. + */ +PyAPI_FUNC(int) +_Py_cpuid_has_features(const py_cpuid_features *actual, + const py_cpuid_features *expect); + +/* + * Return 1 if 'actual' and 'expect' are identical, 0 otherwise. + * + * If 'actual' or 'expect' are not ready yet, this also returns 0. + * + * Note: This function does not set any exception and thus never fails. + */ +PyAPI_FUNC(int) +_Py_cpuid_match_features(const py_cpuid_features *actual, + const py_cpuid_features *expect); + +/* + * Detect the available features on this machine, storing the result in 'flags'. + * + * Note: This function does not set any exception and thus never fails. + */ +PyAPI_FUNC(void) +_Py_cpuid_detect_features(py_cpuid_features *flags); + +#ifdef __cplusplus +} +#endif + +#endif /* !Py_INTERNAL_CPUINFO_H */ diff --git a/Makefile.pre.in b/Makefile.pre.in index 67acf0fc520087..1b0b73254f0fb5 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -435,6 +435,7 @@ PYTHON_OBJS= \ Python/codegen.o \ Python/compile.o \ Python/context.o \ + Python/cpuinfo.o \ Python/critical_section.o \ Python/crossinterp.o \ Python/dynamic_annotations.o \ @@ -1216,6 +1217,7 @@ PYTHON_HEADERS= \ $(srcdir)/Include/internal/pycore_complexobject.h \ $(srcdir)/Include/internal/pycore_condvar.h \ $(srcdir)/Include/internal/pycore_context.h \ + $(srcdir)/Include/internal/pycore_cpuinfo.h \ $(srcdir)/Include/internal/pycore_critical_section.h \ $(srcdir)/Include/internal/pycore_crossinterp.h \ $(srcdir)/Include/internal/pycore_crossinterp_data_registry.h \ diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index 9ebf58ae8a9bc4..c4c97ac7833ae4 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -224,6 +224,7 @@ + @@ -584,6 +585,7 @@ + diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters index 6c76a6ab592a84..03c3a82d8f85e0 100644 --- a/PCbuild/pythoncore.vcxproj.filters +++ b/PCbuild/pythoncore.vcxproj.filters @@ -591,6 +591,9 @@ Include\internal + + Include\cpython + Include\internal @@ -1298,6 +1301,9 @@ Python + + Source Files + Python diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c new file mode 100644 index 00000000000000..7181cc019d4a1c --- /dev/null +++ b/Python/cpuinfo.c @@ -0,0 +1,555 @@ +#include "pycore_cpuinfo.h" + +/* CPUID input and output registers are 32-bit unsigned integers */ +#define CPUID_REG uint32_t +/* Check one or more CPUID register bits. */ +#define CHECK_REG(REG, MASK) ((((REG) & (MASK)) == (MASK)) ? 0 : 1) +#define CPUID_CHECK_REG(REG, FEAT) CHECK_REG(REG, (Py_CPUID_MASK_ ## FEAT)) +#define XSAVE_CHECK_REG(REG, FEAT) CHECK_REG(REG, (Py_XSAVE_MASK_ ## FEAT)) + +// For now, we only try to enable SIMD instructions for x86-64 Intel CPUs. +// In the future, we should carefully enable support for ARM NEON and POWER +// as well as AMD. +#if defined(__x86_64__) && defined(__GNUC__) +# include // __cpuid_count() +# define HAS_CPUID_SUPPORT +# define HAS_XGETBV_SUPPORT +#elif defined(_M_X64) +# include // _xgetbv() +# define HAS_XGETBV_SUPPORT +# include // __cpuidex() +# define HAS_CPUID_SUPPORT +#else +# undef HAS_CPUID_SUPPORT +# undef HAS_XGETBV_SUPPORT +#endif + +// Below, we declare macros for guarding the detection of SSE, AVX/AVX2 +// and AVX-512 instructions. If the compiler does not even recognize the +// corresponding flags or if we are not on an 64-bit platform we do not +// even try to inspect the output of CPUID for those specific features. +#ifdef HAS_CPUID_SUPPORT +#if defined(Py_CAN_COMPILE_SIMD_SSE_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_SSSE3_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS) \ + // macros above should be sorted in alphabetical order +# define SIMD_SSE_INSTRUCTIONS_DETECTION_GUARD +#endif + +#if defined(Py_CAN_COMPILE_SIMD_AVX_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_AVX_IFMA_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_AVX_NE_CONVERT_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_AVX_VNNI_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_AVX_VNNI_INT8_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS) \ + // macros above should be sorted in alphabetical order +# define SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD +#endif + +#if defined(Py_CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS) +# define SIMD_AVX2_INSTRUCTIONS_DETECTION_GUARD +#endif + +#if defined(Py_CAN_COMPILE_SIMD_AVX512_BITALG_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_AVX512_BW_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_AVX512_CD_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_AVX512_DQ_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_AVX512_ER_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_AVX512_F_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_AVX512_IFMA_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_AVX512_PF_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_AVX512_VBMI2_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_AVX512_VL_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_AVX512_VNNI_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_AVX512_VP2INTERSECT_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_AVX512_VPOPCNTDQ_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_AVX512_4FMAPS_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_AVX512_4VNNIW_INSTRUCTIONS) \ + // macros above should be sorted in alphabetical order +# define SIMD_AVX512_INSTRUCTIONS_DETECTION_GUARD +#endif +#endif // HAS_CPUID_SUPPORT + +// On macOS, checking the XCR0 register is NOT a guaranteed way +// to ensure the usability of AVX-512. As such, we disable the +// entire set of AVX-512 instructions. +// +// See https://stackoverflow.com/a/72523150/9579194. +#if defined(__APPLE__) +# undef SIMD_AVX512_INSTRUCTIONS_DETECTION_GUARD + // Additionally, AVX2 cannot be compiled on macOS ARM64 (yet it can be + // compiled on x86_64). However, since autoconf incorrectly assumes so + // when compiling a universal2 binary, we disable SIMD on such builds. +# if defined(__aarch64__) || defined(__arm64__) +# undef SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD +# undef SIMD_AVX2_INSTRUCTIONS_DETECTION_GUARD +# endif +#endif + +// Below, we declare macros indicating how CPUID can be called at runtime, +// so that we only call CPUID with specific inputs when needed. + +#if defined(SIMD_SSE_INSTRUCTIONS_DETECTION_GUARD) \ + || defined(SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD) +/* Indicate that cpuid should be called once with EAX=1 and ECX=0. */ +# define SHOULD_PARSE_CPUID_L1 +#endif + +#if defined(SIMD_AVX2_INSTRUCTIONS_DETECTION_GUARD) \ + || defined(SIMD_AVX512_INSTRUCTIONS_DETECTION_GUARD) +/* Indicate that cpuid should be called once with EAX=7 and ECX=0. */ +# define SHOULD_PARSE_CPUID_L7 +# define SHOULD_PARSE_CPUID_L7S0 +#endif + +#if defined(SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD) +/* Indicate that cpuid should be called once with EAX=7 and ECX=1. */ +# define SHOULD_PARSE_CPUID_L7 +# define SHOULD_PARSE_CPUID_L7S1 +#endif + +/* + * Call __cpuid_count() or equivalent and get + * its EAX, EBX, ECX and EDX output registers. + * + * If CPUID is not supported, registers are set to 0. + */ +static inline void +get_cpuid_info(uint32_t level /* input eax */, + uint32_t count /* input ecx */, + CPUID_REG *eax, CPUID_REG *ebx, CPUID_REG *ecx, CPUID_REG *edx) +{ + *eax = *ebx = *ecx = *edx = 0; // ensure the output to be initialized +#if defined(HAS_CPUID_SUPPORT) && defined(__x86_64__) && defined(__GNUC__) + __cpuid_count(level, count, *eax, *ebx, *ecx, *edx); +#elif defined(HAS_CPUID_SUPPORT) && defined(_M_X64) + uint32_t info[4] = {0}; + __cpuidex(info, level, count); + *eax = info[0], *ebx = info[1], *ecx = info[2], *edx = info[3]; +#endif +} + +static inline uint64_t +get_xgetbv(uint32_t index) +{ + assert(index == 0); // only XCR0 is supported for now +#if defined(HAS_CPUID_SUPPORT) && defined(__x86_64__) && defined(__GNUC__) + uint32_t eax = 0, edx = 0; + __asm__ __volatile__("xgetbv" : "=a" (eax), "=d" (edx) : "c" (index)); + return ((uint64_t)edx << 32) | eax; +#elif defined(HAS_CPUID_SUPPORT) && defined(_M_X64) + return (uint64_t)_xgetbv(index); +#else + (void)index; + return 0; +#endif +} + +/* Highest Function Parameter and Manufacturer ID (LEAF=0, SUBLEAF=0). */ +static inline uint32_t +detect_cpuid_maxleaf(void) +{ + CPUID_REG maxleaf = 0, ebx = 0, ecx = 0, edx = 0; + get_cpuid_info(0, 0, &maxleaf, &ebx, &ecx, &edx); + return maxleaf; +} + +/* Processor Info and Feature Bits (LEAF=1, SUBLEAF=0). */ +static inline void +detect_cpuid_features(py_cpuid_features *flags, CPUID_REG ecx, CPUID_REG edx) +{ + // Keep the ordering and newlines as they are declared in the structure. +#ifdef SIMD_SSE_INSTRUCTIONS_DETECTION_GUARD +#ifdef Py_CAN_COMPILE_SIMD_SSE_INSTRUCTIONS + flags->sse = CPUID_CHECK_REG(edx, EDX_L1_SSE); +#endif +#ifdef Py_CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS + flags->sse2 = CPUID_CHECK_REG(edx, EDX_L1_SSE2); +#endif +#ifdef Py_CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS + flags->sse3 = CPUID_CHECK_REG(ecx, ECX_L1_SSE3); +#endif +#ifdef Py_CAN_COMPILE_SIMD_SSSE3_INSTRUCTIONS + flags->ssse3 = CPUID_CHECK_REG(ecx, ECX_L1_SSSE3); +#endif +#ifdef Py_CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS + flags->sse41 = CPUID_CHECK_REG(ecx, ECX_L1_SSE4_1); +#endif +#ifdef Py_CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS + flags->sse42 = CPUID_CHECK_REG(ecx, ECX_L1_SSE4_2); +#endif +#endif // SIMD_SSE_INSTRUCTIONS_DETECTION_GUARD + +#ifdef SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD +#ifdef Py_CAN_COMPILE_SIMD_AVX_INSTRUCTIONS + flags->avx = CPUID_CHECK_REG(ecx, ECX_L1_AVX); +#endif +#endif // SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD + +#ifdef HAS_CPUID_SUPPORT + flags->cmov = CPUID_CHECK_REG(edx, EDX_L1_CMOV); + flags->fma = CPUID_CHECK_REG(ecx, ECX_L1_FMA); + flags->popcnt = CPUID_CHECK_REG(ecx, ECX_L1_POPCNT); + flags->pclmulqdq = CPUID_CHECK_REG(ecx, ECX_L1_PCLMULQDQ); + + flags->xsave = CPUID_CHECK_REG(ecx, ECX_L1_XSAVE); + flags->osxsave = CPUID_CHECK_REG(ecx, ECX_L1_OSXSAVE); +#endif +} + +/* Extended Feature Bits (LEAF=7, SUBLEAF=0). */ +static inline void +detect_cpuid_extended_features_L7S0(py_cpuid_features *flags, + CPUID_REG ebx, CPUID_REG ecx, CPUID_REG edx) +{ + (void)ebx, (void)ecx, (void)edx; // to suppress unused warnings + // Keep the ordering and newlines as they are declared in the structure. +#ifdef SIMD_AVX2_INSTRUCTIONS_DETECTION_GUARD +#ifdef Py_CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS + flags->avx2 = CPUID_CHECK_REG(ebx, EBX_L7_AVX2); +#endif +#endif // SIMD_AVX2_INSTRUCTIONS_DETECTION_GUARD + +#ifdef SIMD_AVX512_INSTRUCTIONS_DETECTION_GUARD +#ifdef Py_CAN_COMPILE_SIMD_AVX512_F_INSTRUCTIONS + flags->avx512_f = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_F); +#endif +#ifdef Py_CAN_COMPILE_SIMD_AVX512_CD_INSTRUCTIONS + flags->avx512_cd = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_CD); +#endif + +#ifdef Py_CAN_COMPILE_SIMD_AVX512_ER_INSTRUCTIONS + flags->avx512_er = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_ER); +#endif +#ifdef Py_CAN_COMPILE_SIMD_AVX512_PF_INSTRUCTIONS + flags->avx512_pf = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_PF); +#endif + +#ifdef Py_CAN_COMPILE_SIMD_AVX512_4FMAPS_INSTRUCTIONS + flags->avx512_4fmaps = CPUID_CHECK_REG(edx, EDX_L7_AVX512_4FMAPS); +#endif +#ifdef Py_CAN_COMPILE_SIMD_AVX512_4VNNIW_INSTRUCTIONS + flags->avx512_4vnniw = CPUID_CHECK_REG(edx, EDX_L7_AVX512_4VNNIW); +#endif + +#ifdef Py_CAN_COMPILE_SIMD_AVX512_VPOPCNTDQ_INSTRUCTIONS + flags->avx512_vpopcntdq = CPUID_CHECK_REG(ecx, ECX_L7_AVX512_VPOPCNTDQ); +#endif + +#ifdef Py_CAN_COMPILE_SIMD_AVX512_VL_INSTRUCTIONS + flags->avx512_vl = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_VL); +#endif +#ifdef Py_CAN_COMPILE_SIMD_AVX512_DQ_INSTRUCTIONS + flags->avx512_dq = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_DQ); +#endif +#ifdef Py_CAN_COMPILE_SIMD_AVX512_BW_INSTRUCTIONS + flags->avx512_bw = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_BW); +#endif + +#ifdef Py_CAN_COMPILE_SIMD_AVX512_IFMA_INSTRUCTIONS + flags->avx512_ifma = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_IFMA); +#endif +#ifdef Py_CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS + flags->avx512_vbmi = CPUID_CHECK_REG(ecx, ECX_L7_AVX512_VBMI); +#endif + +#ifdef Py_CAN_COMPILE_SIMD_AVX512_VNNI_INSTRUCTIONS + flags->avx512_vnni = CPUID_CHECK_REG(ecx, ECX_L7_AVX512_VNNI); +#endif + +#ifdef Py_CAN_COMPILE_SIMD_AVX512_VBMI2_INSTRUCTIONS + flags->avx512_vbmi2 = CPUID_CHECK_REG(ecx, ECX_L7_AVX512_VBMI2); +#endif +#ifdef Py_CAN_COMPILE_SIMD_AVX512_BITALG_INSTRUCTIONS + flags->avx512_bitalg = CPUID_CHECK_REG(ecx, ECX_L7_AVX512_BITALG); +#endif + +#ifdef Py_CAN_COMPILE_SIMD_AVX512_VP2INTERSECT_INSTRUCTIONS + flags->avx512_vp2intersect = CPUID_CHECK_REG(edx, EDX_L7_AVX512_VP2INTERSECT); +#endif +#endif // SIMD_AVX512_INSTRUCTIONS_DETECTION_GUARD +} + +/* Extended Feature Bits (LEAF=7, SUBLEAF=1). */ +static inline void +detect_cpuid_extended_features_L7S1(py_cpuid_features *flags, + CPUID_REG eax, + CPUID_REG ebx, + CPUID_REG ecx, + CPUID_REG edx) +{ + (void)eax, (void)ebx, (void)ecx, (void)edx; // to suppress unused warnings + // Keep the ordering and newlines as they are declared in the structure. +#ifdef SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD +#ifdef Py_CAN_COMPILE_SIMD_AVX_NE_CONVERT_INSTRUCTIONS + flags->avx_ne_convert = CPUID_CHECK_REG(edx, EDX_L7S1_AVX_NE_CONVERT); +#endif + +#ifdef Py_CAN_COMPILE_SIMD_AVX_IFMA_INSTRUCTIONS + flags->avx_ifma = CPUID_CHECK_REG(eax, EAX_L7S1_AVX_IFMA); +#endif + +#ifdef Py_CAN_COMPILE_SIMD_AVX_VNNI_INSTRUCTIONS + flags->avx_vnni = CPUID_CHECK_REG(eax, EAX_L7S1_AVX_VNNI); +#endif +#ifdef Py_CAN_COMPILE_SIMD_AVX_VNNI_INT8_INSTRUCTIONS + flags->avx_vnni_int8 = CPUID_CHECK_REG(edx, EDX_L7S1_AVX_VNNI_INT8); +#endif +#ifdef Py_CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS + flags->avx_vnni_int16 = CPUID_CHECK_REG(edx, EDX_L7S1_AVX_VNNI_INT16); +#endif +#endif // SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD +} + +static inline void +detect_cpuid_xsave_state(py_cpuid_features *flags) +{ + // Keep the ordering and newlines as they are declared in the structure. +#ifdef HAS_XGETBV_SUPPORT + uint64_t xcr0 = flags->osxsave ? get_xgetbv(0) : 0; + flags->xcr0_sse = XSAVE_CHECK_REG(xcr0, XCR0_SSE); + flags->xcr0_avx = XSAVE_CHECK_REG(xcr0, XCR0_AVX); + flags->xcr0_avx512_opmask = XSAVE_CHECK_REG(xcr0, XCR0_AVX512_OPMASK); + flags->xcr0_avx512_zmm_hi256 = XSAVE_CHECK_REG(xcr0, XCR0_AVX512_ZMM_HI256); + flags->xcr0_avx512_hi16_zmm = XSAVE_CHECK_REG(xcr0, XCR0_AVX512_HI16_ZMM); +#endif +} + +static inline void +cpuid_features_finalize(py_cpuid_features *flags) +{ + assert(flags->ready == 0); + + // Here, any flag that may depend on others should be correctly set + // at runtime to avoid illegal instruction errors. + + flags->ready = 1; +} + +static inline int +cpuid_features_validate(const py_cpuid_features *flags) +{ + if (flags->ready != 1) { + return -1; + } + + // AVX-512/F is required to support any other AVX-512 instruction set + uint8_t avx512_require_f = ( + // newlines are placed according to processor generations + flags->avx512_cd || + flags->avx512_er || flags->avx512_pf || + flags->avx512_4fmaps || flags->avx512_4vnniw || + flags->avx512_vpopcntdq || + flags->avx512_vl || flags->avx512_dq || flags->avx512_bw || + flags->avx512_ifma || flags->avx512_vbmi || + flags->avx512_vnni || + flags->avx512_vbmi2 || flags->avx512_bitalg || + flags->avx512_vp2intersect + ); + + if (!flags->avx512_f && !avx512_require_f) { + return -1; + } + + return 0; +} + +int +_Py_cpuid_check_features(const py_cpuid_features *flags) +{ + return cpuid_features_validate(flags) < 0 ? 0 : 1; +} + +/* + * Apply a 1-parameter macro MACRO(FLAG) on all members + * of a 'py_cpuid_features' object ('ready' is omitted). + */ +#define CPUID_APPLY_MACRO(MACRO) \ + do { \ + MACRO(sse); \ + MACRO(sse2); \ + MACRO(sse3); \ + MACRO(ssse3); \ + MACRO(sse41); \ + MACRO(sse42); \ + \ + MACRO(avx); \ + MACRO(avx_ifma); \ + MACRO(avx_ne_convert); \ + \ + MACRO(avx_vnni); \ + MACRO(avx_vnni_int8); \ + MACRO(avx_vnni_int16); \ + \ + MACRO(avx2); \ + \ + MACRO(avx512_f); \ + MACRO(avx512_cd); \ + \ + MACRO(avx512_er); \ + MACRO(avx512_pf); \ + \ + MACRO(avx512_4fmaps); \ + MACRO(avx512_4vnniw); \ + \ + MACRO(avx512_vpopcntdq); \ + \ + MACRO(avx512_vl); \ + MACRO(avx512_dq); \ + MACRO(avx512_bw); \ + \ + MACRO(avx512_ifma); \ + MACRO(avx512_vbmi); \ + \ + MACRO(avx512_vnni); \ + \ + MACRO(avx512_vbmi2); \ + MACRO(avx512_bitalg); \ + \ + MACRO(avx512_vp2intersect); \ + \ + MACRO(cmov); \ + MACRO(fma); \ + MACRO(popcnt); \ + MACRO(pclmulqdq); \ + \ + MACRO(xsave); \ + MACRO(osxsave); \ + \ + MACRO(xcr0_sse); \ + MACRO(xcr0_avx); \ + MACRO(xcr0_avx512_opmask); \ + MACRO(xcr0_avx512_zmm_hi256); \ + MACRO(xcr0_avx512_hi16_zmm); \ + } while (0) + +void +_Py_cpuid_disable_features(py_cpuid_features *flags) +{ + flags->maxleaf = 0; +#define CPUID_DISABLE(FLAG) flags->FLAG = 0 + CPUID_APPLY_MACRO(CPUID_DISABLE); +#undef CPUID_DISABLE +} + +int +_Py_cpuid_has_features(const py_cpuid_features *actual, + const py_cpuid_features *expect) +{ + if (!actual->ready || !expect->ready) { + return 0; + } + if (actual->maxleaf < expect->maxleaf) { + return 0; + } +#define CPUID_CHECK_FEATURE(FLAG) \ + do { \ + if (expect->FLAG && !actual->FLAG) { \ + return 0; \ + } \ + } while (0) + CPUID_APPLY_MACRO(CPUID_CHECK_FEATURE); +#undef CPUID_CHECK_FEATURE + return 1; +} + +int +_Py_cpuid_match_features(const py_cpuid_features *actual, + const py_cpuid_features *expect) +{ + if (!actual->ready || !expect->ready) { + return 0; + } + if (actual->maxleaf != expect->maxleaf) { + return 0; + } +#define CPUID_MATCH_FEATURE(FLAG) \ + do { \ + if (expect->FLAG != actual->FLAG) { \ + return 0; \ + } \ + } while (0) + CPUID_APPLY_MACRO(CPUID_MATCH_FEATURE); +#undef CPUID_MATCH_FEATURE + return 1; +} + +#undef CPUID_APPLY_MACRO + +#ifdef SHOULD_PARSE_CPUID_L1 +static inline void +cpuid_detect_l1_features(py_cpuid_features *flags) +{ + if (flags->maxleaf >= 1) { + CPUID_REG eax = 0, ebx = 0, ecx = 0, edx = 0; + get_cpuid_info(1, 0, &eax, &ebx, &ecx, &edx); + detect_cpuid_features(flags, ecx, edx); + if (flags->osxsave) { + detect_cpuid_xsave_state(flags); + } + } +} +#else +#define cpuid_detect_l1_features(FLAGS) +#endif + +#ifdef SHOULD_PARSE_CPUID_L7S0 +static inline void +cpuid_detect_l7s0_features(py_cpuid_features *flags) +{ + CPUID_REG eax = 0, ebx = 0, ecx = 0, edx = 0; + get_cpuid_info(7, 0, &eax, &ebx, &ecx, &edx); + detect_cpuid_extended_features_L7S0(flags, ebx, ecx, edx); +} +#else +#define cpuid_detect_l7s0_features(FLAGS) +#endif + +#ifdef SHOULD_PARSE_CPUID_L7S1 +static inline void +cpuid_detect_l7s1_features(py_cpuid_features *flags) +{ + CPUID_REG eax = 0, ebx = 0, ecx = 0, edx = 0; + get_cpuid_info(7, 1, &eax, &ebx, &ecx, &edx); + detect_cpuid_extended_features_L7S1(flags, eax, ebx, ecx, edx); +} +#else +#define cpuid_detect_l7s1_features(FLAGS) +#endif + +#ifdef SHOULD_PARSE_CPUID_L7 +static inline void +cpuid_detect_l7_features(py_cpuid_features *flags) +{ + if (flags->maxleaf >= 7) { + cpuid_detect_l7s0_features(flags); + cpuid_detect_l7s1_features(flags); + } +} +#else +#define cpuid_detect_l7_features(FLAGS) +#endif + +void +_Py_cpuid_detect_features(py_cpuid_features *flags) +{ + if (flags->ready) { + return; + } + _Py_cpuid_disable_features(flags); +#ifndef HAS_CPUID_SUPPORT + flags->ready = 1; +#else + flags->maxleaf = detect_cpuid_maxleaf(); + cpuid_detect_l1_features(flags); + cpuid_detect_l7_features(flags); + cpuid_features_finalize(flags); + if (cpuid_features_validate(flags) < 0) { + _Py_cpuid_disable_features(flags); + } +#endif // !HAS_CPUID_SUPPORT +} diff --git a/configure b/configure index e59c7046305d46..db2203b706497a 100755 --- a/configure +++ b/configure @@ -30687,6 +30687,1423 @@ fi printf "%s\n" "$py_cv_module__blake2" >&6; } + + +# Detection of supported SIMD instruction sets for CPython. Since +# we do not necessarily know which instruction sets will be used, +# we disable SIMD support on some older Android platforms. +# +# See py_cpuid_features in pycore_cpuinfo.h for how to order fields +# and where to put blank lines to separate processor generations for +# AVX-512 instructions. +if test "$ac_sys_system" != "Linux-android" || test "$ANDROID_API_LEVEL" -ge 28; then + # SSE + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse" >&5 +printf %s "checking whether C compiler accepts -msse... " >&6; } +if test ${ax_cv_check_cflags___msse+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -msse" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___msse=yes +else $as_nop + ax_cv_check_cflags___msse=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___msse" >&5 +printf "%s\n" "$ax_cv_check_cflags___msse" >&6; } +if test "x$ax_cv_check_cflags___msse" = xyes +then : + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSE_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse2" >&5 +printf %s "checking whether C compiler accepts -msse2... " >&6; } +if test ${ax_cv_check_cflags___msse2+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -msse2" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___msse2=yes +else $as_nop + ax_cv_check_cflags___msse2=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___msse2" >&5 +printf "%s\n" "$ax_cv_check_cflags___msse2" >&6; } +if test "x$ax_cv_check_cflags___msse2" = xyes +then : + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse3" >&5 +printf %s "checking whether C compiler accepts -msse3... " >&6; } +if test ${ax_cv_check_cflags___msse3+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -msse3" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___msse3=yes +else $as_nop + ax_cv_check_cflags___msse3=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___msse3" >&5 +printf "%s\n" "$ax_cv_check_cflags___msse3" >&6; } +if test "x$ax_cv_check_cflags___msse3" = xyes +then : + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mssse3" >&5 +printf %s "checking whether C compiler accepts -mssse3... " >&6; } +if test ${ax_cv_check_cflags___mssse3+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mssse3" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mssse3=yes +else $as_nop + ax_cv_check_cflags___mssse3=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mssse3" >&5 +printf "%s\n" "$ax_cv_check_cflags___mssse3" >&6; } +if test "x$ax_cv_check_cflags___mssse3" = xyes +then : + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSSE3_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse4.1" >&5 +printf %s "checking whether C compiler accepts -msse4.1... " >&6; } +if test ${ax_cv_check_cflags___msse4_1+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -msse4.1" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___msse4_1=yes +else $as_nop + ax_cv_check_cflags___msse4_1=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___msse4_1" >&5 +printf "%s\n" "$ax_cv_check_cflags___msse4_1" >&6; } +if test "x$ax_cv_check_cflags___msse4_1" = xyes +then : + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse4.2" >&5 +printf %s "checking whether C compiler accepts -msse4.2... " >&6; } +if test ${ax_cv_check_cflags___msse4_2+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -msse4.2" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___msse4_2=yes +else $as_nop + ax_cv_check_cflags___msse4_2=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___msse4_2" >&5 +printf "%s\n" "$ax_cv_check_cflags___msse4_2" >&6; } +if test "x$ax_cv_check_cflags___msse4_2" = xyes +then : + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + # AVX + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx" >&5 +printf %s "checking whether C compiler accepts -mavx... " >&6; } +if test ${ax_cv_check_cflags___mavx+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavx" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavx=yes +else $as_nop + ax_cv_check_cflags___mavx=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavx" >&6; } +if test "x$ax_cv_check_cflags___mavx" = xyes +then : + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavxifma" >&5 +printf %s "checking whether C compiler accepts -mavxifma... " >&6; } +if test ${ax_cv_check_cflags___mavxifma+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavxifma" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavxifma=yes +else $as_nop + ax_cv_check_cflags___mavxifma=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavxifma" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavxifma" >&6; } +if test "x$ax_cv_check_cflags___mavxifma" = xyes +then : + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_IFMA_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavxneconvert" >&5 +printf %s "checking whether C compiler accepts -mavxneconvert... " >&6; } +if test ${ax_cv_check_cflags___mavxneconvert+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavxneconvert" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavxneconvert=yes +else $as_nop + ax_cv_check_cflags___mavxneconvert=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavxneconvert" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavxneconvert" >&6; } +if test "x$ax_cv_check_cflags___mavxneconvert" = xyes +then : + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_NE_CONVERT_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + # + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavxvnni" >&5 +printf %s "checking whether C compiler accepts -mavxvnni... " >&6; } +if test ${ax_cv_check_cflags___mavxvnni+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavxvnni" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavxvnni=yes +else $as_nop + ax_cv_check_cflags___mavxvnni=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavxvnni" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavxvnni" >&6; } +if test "x$ax_cv_check_cflags___mavxvnni" = xyes +then : + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_VNNI_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavxvnniint8" >&5 +printf %s "checking whether C compiler accepts -mavxvnniint8... " >&6; } +if test ${ax_cv_check_cflags___mavxvnniint8+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavxvnniint8" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavxvnniint8=yes +else $as_nop + ax_cv_check_cflags___mavxvnniint8=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavxvnniint8" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavxvnniint8" >&6; } +if test "x$ax_cv_check_cflags___mavxvnniint8" = xyes +then : + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_VNNI_INT8_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavxvnniint16" >&5 +printf %s "checking whether C compiler accepts -mavxvnniint16... " >&6; } +if test ${ax_cv_check_cflags___mavxvnniint16+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavxvnniint16" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavxvnniint16=yes +else $as_nop + ax_cv_check_cflags___mavxvnniint16=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavxvnniint16" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavxvnniint16" >&6; } +if test "x$ax_cv_check_cflags___mavxvnniint16" = xyes +then : + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + # AVX-2 + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx2" >&5 +printf %s "checking whether C compiler accepts -mavx2... " >&6; } +if test ${ax_cv_check_cflags___mavx2+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavx2" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavx2=yes +else $as_nop + ax_cv_check_cflags___mavx2=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx2" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavx2" >&6; } +if test "x$ax_cv_check_cflags___mavx2" = xyes +then : + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + # AVX-512 + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512f" >&5 +printf %s "checking whether C compiler accepts -mavx512f... " >&6; } +if test ${ax_cv_check_cflags___mavx512f+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavx512f" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavx512f=yes +else $as_nop + ax_cv_check_cflags___mavx512f=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512f" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavx512f" >&6; } +if test "x$ax_cv_check_cflags___mavx512f" = xyes +then : + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_F_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512cd" >&5 +printf %s "checking whether C compiler accepts -mavx512cd... " >&6; } +if test ${ax_cv_check_cflags___mavx512cd+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavx512cd" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavx512cd=yes +else $as_nop + ax_cv_check_cflags___mavx512cd=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512cd" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavx512cd" >&6; } +if test "x$ax_cv_check_cflags___mavx512cd" = xyes +then : + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_CD_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + # + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512er" >&5 +printf %s "checking whether C compiler accepts -mavx512er... " >&6; } +if test ${ax_cv_check_cflags___mavx512er+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavx512er" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavx512er=yes +else $as_nop + ax_cv_check_cflags___mavx512er=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512er" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavx512er" >&6; } +if test "x$ax_cv_check_cflags___mavx512er" = xyes +then : + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_ER_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512pf" >&5 +printf %s "checking whether C compiler accepts -mavx512pf... " >&6; } +if test ${ax_cv_check_cflags___mavx512pf+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavx512pf" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavx512pf=yes +else $as_nop + ax_cv_check_cflags___mavx512pf=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512pf" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavx512pf" >&6; } +if test "x$ax_cv_check_cflags___mavx512pf" = xyes +then : + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_PF_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + # + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx5124fmaps" >&5 +printf %s "checking whether C compiler accepts -mavx5124fmaps... " >&6; } +if test ${ax_cv_check_cflags___mavx5124fmaps+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavx5124fmaps" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavx5124fmaps=yes +else $as_nop + ax_cv_check_cflags___mavx5124fmaps=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx5124fmaps" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavx5124fmaps" >&6; } +if test "x$ax_cv_check_cflags___mavx5124fmaps" = xyes +then : + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_4FMAPS_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx5124vnniw" >&5 +printf %s "checking whether C compiler accepts -mavx5124vnniw... " >&6; } +if test ${ax_cv_check_cflags___mavx5124vnniw+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavx5124vnniw" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavx5124vnniw=yes +else $as_nop + ax_cv_check_cflags___mavx5124vnniw=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx5124vnniw" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavx5124vnniw" >&6; } +if test "x$ax_cv_check_cflags___mavx5124vnniw" = xyes +then : + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_4VNNIW_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + # + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vpopcntdq" >&5 +printf %s "checking whether C compiler accepts -mavx512vpopcntdq... " >&6; } +if test ${ax_cv_check_cflags___mavx512vpopcntdq+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavx512vpopcntdq" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavx512vpopcntdq=yes +else $as_nop + ax_cv_check_cflags___mavx512vpopcntdq=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512vpopcntdq" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavx512vpopcntdq" >&6; } +if test "x$ax_cv_check_cflags___mavx512vpopcntdq" = xyes +then : + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VPOPCNTDQ_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + # + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vl" >&5 +printf %s "checking whether C compiler accepts -mavx512vl... " >&6; } +if test ${ax_cv_check_cflags___mavx512vl+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavx512vl" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavx512vl=yes +else $as_nop + ax_cv_check_cflags___mavx512vl=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512vl" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavx512vl" >&6; } +if test "x$ax_cv_check_cflags___mavx512vl" = xyes +then : + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VL_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512dq" >&5 +printf %s "checking whether C compiler accepts -mavx512dq... " >&6; } +if test ${ax_cv_check_cflags___mavx512dq+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavx512dq" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavx512dq=yes +else $as_nop + ax_cv_check_cflags___mavx512dq=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512dq" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavx512dq" >&6; } +if test "x$ax_cv_check_cflags___mavx512dq" = xyes +then : + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_DQ_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512bw" >&5 +printf %s "checking whether C compiler accepts -mavx512bw... " >&6; } +if test ${ax_cv_check_cflags___mavx512bw+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavx512bw" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavx512bw=yes +else $as_nop + ax_cv_check_cflags___mavx512bw=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512bw" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavx512bw" >&6; } +if test "x$ax_cv_check_cflags___mavx512bw" = xyes +then : + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_BW_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + # + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512ifma" >&5 +printf %s "checking whether C compiler accepts -mavx512ifma... " >&6; } +if test ${ax_cv_check_cflags___mavx512ifma+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavx512ifma" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavx512ifma=yes +else $as_nop + ax_cv_check_cflags___mavx512ifma=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512ifma" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavx512ifma" >&6; } +if test "x$ax_cv_check_cflags___mavx512ifma" = xyes +then : + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_IFMA_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vbmi" >&5 +printf %s "checking whether C compiler accepts -mavx512vbmi... " >&6; } +if test ${ax_cv_check_cflags___mavx512vbmi+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavx512vbmi" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavx512vbmi=yes +else $as_nop + ax_cv_check_cflags___mavx512vbmi=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512vbmi" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavx512vbmi" >&6; } +if test "x$ax_cv_check_cflags___mavx512vbmi" = xyes +then : + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + # + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vnni" >&5 +printf %s "checking whether C compiler accepts -mavx512vnni... " >&6; } +if test ${ax_cv_check_cflags___mavx512vnni+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavx512vnni" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavx512vnni=yes +else $as_nop + ax_cv_check_cflags___mavx512vnni=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512vnni" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavx512vnni" >&6; } +if test "x$ax_cv_check_cflags___mavx512vnni" = xyes +then : + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VNNI_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + # + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vbmi2" >&5 +printf %s "checking whether C compiler accepts -mavx512vbmi2... " >&6; } +if test ${ax_cv_check_cflags___mavx512vbmi2+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavx512vbmi2" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavx512vbmi2=yes +else $as_nop + ax_cv_check_cflags___mavx512vbmi2=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512vbmi2" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavx512vbmi2" >&6; } +if test "x$ax_cv_check_cflags___mavx512vbmi2" = xyes +then : + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VBMI2_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512bitalg" >&5 +printf %s "checking whether C compiler accepts -mavx512bitalg... " >&6; } +if test ${ax_cv_check_cflags___mavx512bitalg+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavx512bitalg" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavx512bitalg=yes +else $as_nop + ax_cv_check_cflags___mavx512bitalg=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512bitalg" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavx512bitalg" >&6; } +if test "x$ax_cv_check_cflags___mavx512bitalg" = xyes +then : + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_BITALG_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + # + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vp2intersect" >&5 +printf %s "checking whether C compiler accepts -mavx512vp2intersect... " >&6; } +if test ${ax_cv_check_cflags___mavx512vp2intersect+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavx512vp2intersect" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavx512vp2intersect=yes +else $as_nop + ax_cv_check_cflags___mavx512vp2intersect=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512vp2intersect" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavx512vp2intersect" >&6; } +if test "x$ax_cv_check_cflags___mavx512vp2intersect" = xyes +then : + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VP2INTERSECT_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + +fi + LIBHACL_CFLAGS='-I$(srcdir)/Modules/_hacl -I$(srcdir)/Modules/_hacl/include -D_BSD_SOURCE -D_DEFAULT_SOURCE $(PY_STDMODULE_CFLAGS) $(CCSHARED)' case "$ac_sys_system" in Linux*) diff --git a/configure.ac b/configure.ac index 074e2ce3dd3024..1f114d0e42589d 100644 --- a/configure.ac +++ b/configure.ac @@ -7779,6 +7779,74 @@ PY_STDLIB_MOD([_sha2], [test "$with_builtin_sha2" = yes]) PY_STDLIB_MOD([_sha3], [test "$with_builtin_sha3" = yes]) PY_STDLIB_MOD([_blake2], [test "$with_builtin_blake2" = yes]) +dnl PY_SIMD_DETECT(INSTRUCTION_SET_NAME, COMPILER_FLAG, NORMALIZED_NAME) +AC_DEFUN([PY_SIMD_DETECT], [ + AS_VAR_PUSHDEF([py_var], [m4_ifblank([$3], + [[ac_cv_can_compile_simd_]m4_tolower([$1])], + [[ac_cv_can_compile_simd_]m4_tolower([$3])])]) + AS_VAR_PUSHDEF([py_define], [m4_ifblank([$3], + [[Py_CAN_COMPILE_SIMD_]m4_toupper([$1])[_INSTRUCTIONS]], + [[Py_CAN_COMPILE_SIMD_]m4_toupper([$3])[_INSTRUCTIONS]])]) + AC_MSG_CHECKING([checking SIMD instruction set]) + AX_CHECK_COMPILE_FLAG([$2], + [AC_DEFINE([py_define], [1], [Define if '$2' is a valid compiler flag.])], + [], []) + AS_VAR_POPDEF([py_var]) + AS_VAR_POPDEF([py_define]) +]) + +# Detection of supported SIMD instruction sets for CPython. Since +# we do not necessarily know which instruction sets will be used, +# we disable SIMD support on some older Android platforms. +# +# See py_cpuid_features in pycore_cpuinfo.h for how to order fields +# and where to put blank lines to separate processor generations for +# AVX-512 instructions. +if test "$ac_sys_system" != "Linux-android" || test "$ANDROID_API_LEVEL" -ge 28; then + # SSE + PY_SIMD_DETECT([SSE], [-msse]) + PY_SIMD_DETECT([SSE2], [-msse2]) + PY_SIMD_DETECT([SSE3], [-msse3]) + PY_SIMD_DETECT([SSSE3], [-mssse3]) + PY_SIMD_DETECT([SSE4.1], [-msse4.1], [SSE4_1]) + PY_SIMD_DETECT([SSE4.2], [-msse4.2], [SSE4_2]) + # AVX + PY_SIMD_DETECT([AVX], [-mavx]) + PY_SIMD_DETECT([AVX_IFMA], [-mavxifma]) + PY_SIMD_DETECT([AVX_NE_CONVERT], [-mavxneconvert]) + # + PY_SIMD_DETECT([AVX_VNNI], [-mavxvnni]) + PY_SIMD_DETECT([AVX_VNNI_INT8], [-mavxvnniint8]) + PY_SIMD_DETECT([AVX_VNNI_INT16], [-mavxvnniint16]) + # AVX-2 + PY_SIMD_DETECT([AVX2], [-mavx2]) + # AVX-512 + PY_SIMD_DETECT([AVX512_F], [-mavx512f]) + PY_SIMD_DETECT([AVX512_CD], [-mavx512cd]) + # + PY_SIMD_DETECT([AVX512_ER], [-mavx512er]) + PY_SIMD_DETECT([AVX512_PF], [-mavx512pf]) + # + PY_SIMD_DETECT([AVX512_4FMAPS], [-mavx5124fmaps]) + PY_SIMD_DETECT([AVX512_4VNNIW], [-mavx5124vnniw]) + # + PY_SIMD_DETECT([AVX512_VPOPCNTDQ], [-mavx512vpopcntdq]) + # + PY_SIMD_DETECT([AVX512_VL], [-mavx512vl]) + PY_SIMD_DETECT([AVX512_DQ], [-mavx512dq]) + PY_SIMD_DETECT([AVX512_BW], [-mavx512bw]) + # + PY_SIMD_DETECT([AVX512_IFMA], [-mavx512ifma]) + PY_SIMD_DETECT([AVX512_VBMI], [-mavx512vbmi]) + # + PY_SIMD_DETECT([AVX512_VNNI], [-mavx512vnni]) + # + PY_SIMD_DETECT([AVX512_VBMI2], [-mavx512vbmi2]) + PY_SIMD_DETECT([AVX512_BITALG], [-mavx512bitalg]) + # + PY_SIMD_DETECT([AVX512_VP2INTERSECT], [-mavx512vp2intersect]) +fi + LIBHACL_CFLAGS='-I$(srcdir)/Modules/_hacl -I$(srcdir)/Modules/_hacl/include -D_BSD_SOURCE -D_DEFAULT_SOURCE $(PY_STDMODULE_CFLAGS) $(CCSHARED)' case "$ac_sys_system" in Linux*) diff --git a/pyconfig.h.in b/pyconfig.h.in index 1ca83fd2f2ca1b..3510d78849c00c 100644 --- a/pyconfig.h.in +++ b/pyconfig.h.in @@ -1687,6 +1687,93 @@ /* PEP 11 Support tier (1, 2, 3 or 0 for unsupported) */ #undef PY_SUPPORT_TIER +/* Define if '-mavx2' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS + +/* Define if '-mavx5124fmaps' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX512_4FMAPS_INSTRUCTIONS + +/* Define if '-mavx5124vnniw' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX512_4VNNIW_INSTRUCTIONS + +/* Define if '-mavx512bitalg' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX512_BITALG_INSTRUCTIONS + +/* Define if '-mavx512bw' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX512_BW_INSTRUCTIONS + +/* Define if '-mavx512cd' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX512_CD_INSTRUCTIONS + +/* Define if '-mavx512dq' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX512_DQ_INSTRUCTIONS + +/* Define if '-mavx512er' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX512_ER_INSTRUCTIONS + +/* Define if '-mavx512f' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX512_F_INSTRUCTIONS + +/* Define if '-mavx512ifma' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX512_IFMA_INSTRUCTIONS + +/* Define if '-mavx512pf' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX512_PF_INSTRUCTIONS + +/* Define if '-mavx512vbmi2' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX512_VBMI2_INSTRUCTIONS + +/* Define if '-mavx512vbmi' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS + +/* Define if '-mavx512vl' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX512_VL_INSTRUCTIONS + +/* Define if '-mavx512vnni' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX512_VNNI_INSTRUCTIONS + +/* Define if '-mavx512vp2intersect' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX512_VP2INTERSECT_INSTRUCTIONS + +/* Define if '-mavx512vpopcntdq' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX512_VPOPCNTDQ_INSTRUCTIONS + +/* Define if '-mavxifma' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX_IFMA_INSTRUCTIONS + +/* Define if '-mavx' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX_INSTRUCTIONS + +/* Define if '-mavxneconvert' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX_NE_CONVERT_INSTRUCTIONS + +/* Define if '-mavxvnni' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX_VNNI_INSTRUCTIONS + +/* Define if '-mavxvnniint16' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS + +/* Define if '-mavxvnniint8' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX_VNNI_INT8_INSTRUCTIONS + +/* Define if '-msse2' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS + +/* Define if '-msse3' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS + +/* Define if '-msse4.1' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS + +/* Define if '-msse4.2' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS + +/* Define if '-msse' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_SSE_INSTRUCTIONS + +/* Define if '-mssse3' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_SSSE3_INSTRUCTIONS + /* Define if you want to build an interpreter with many run-time checks. */ #undef Py_DEBUG