33 #if defined(__AVX512F__) && defined(__AVX512BW__) && defined(__AVX512DQ__)
34 #define CK_HAS_AVX512 1
37 #if defined(__AVX2__) && defined(__FMA__)
38 #define CK_HAS_AVX2_FMA 1
49 #if defined(__AVX512VNNI__)
50 #define CK_HAS_AVX512VNNI 1
54 #if defined(__aarch64__)
55 #if defined(__ARM_FEATURE_SVE2)
58 #if defined(__ARM_FEATURE_NEON)
64 #if defined(__ALTIVEC__)
65 #define CK_HAS_ALTIVEC 1
73 #if defined(__riscv_vector)
82 #if defined(__x86_64__) || defined(__i386__)
85 static inline uint32_t ck_cpuid_max_leaf(
void) {
86 uint32_t eax, ecx = 0, ebx = 0, edx = 0;
87 __cpuid(0, eax, ebx, ecx, edx);
91 static inline void ck_cpuid_leaf1(uint32_t *eax, uint32_t *ebx,
92 uint32_t *ecx, uint32_t *edx) {
93 __cpuid(1, *eax, *ebx, *ecx, *edx);
96 static inline void ck_cpuid_leaf7(uint32_t ecx_val, uint32_t *eax,
97 uint32_t *ebx, uint32_t *ecx,
100 __cpuidex(info, 7, ecx_val);
108 static inline int ck_os_has_xtile(
void) {
109 uint32_t eax, ebx, ecx, edx;
110 ck_cpuid_leaf1(&eax, &ebx, &ecx, &edx);
112 return (ecx & (1 << 27)) && (ecx & (1 << 26));
122 #if defined(CK_HAS_AMX)
123 #define CK_VECTOR_WIDTH 512
124 #define CK_HAS_BEST_VECTOR 1
125 #elif defined(CK_HAS_AVX512)
126 #define CK_VECTOR_WIDTH 512
127 #define CK_HAS_BEST_VECTOR 1
128 #elif defined(CK_HAS_AVX2_FMA)
129 #define CK_VECTOR_WIDTH 256
130 #define CK_HAS_BEST_VECTOR 1
131 #elif defined(CK_HAS_AVX)
132 #define CK_VECTOR_WIDTH 256
133 #define CK_HAS_BEST_VECTOR 1
134 #elif defined(CK_HAS_NEON)
135 #define CK_VECTOR_WIDTH 128
136 #define CK_HAS_BEST_VECTOR 1
138 #define CK_VECTOR_WIDTH 32
139 #define CK_HAS_BEST_VECTOR 0
143 #if defined(CK_HAS_AMX)
144 #define CK_HAS_AI_ACCEL 1
145 #elif defined(CK_HAS_AVX512VNNI)
146 #define CK_HAS_AI_ACCEL 1
147 #elif defined(CK_HAS_VNNI)
148 #define CK_HAS_AI_ACCEL 1
150 #define CK_HAS_AI_ACCEL 0
165 #if defined(CK_HAS_AMX)
166 #define CK_GEMM_DISPATCH(...) gemm_amx(__VA_ARGS__)
167 #elif defined(CK_HAS_AVX512)
168 #define CK_GEMM_DISPATCH(...) gemm_avx512(__VA_ARGS__)
169 #elif defined(CK_HAS_AVX2_FMA)
170 #define CK_GEMM_DISPATCH(...) gemm_avx2(__VA_ARGS__)
171 #elif defined(CK_HAS_AVX)
172 #define CK_GEMM_DISPATCH(...) gemm_avx(__VA_ARGS__)
174 #define CK_GEMM_DISPATCH(...) gemm_ref(__VA_ARGS__)
180 #if defined(CK_HAS_AMX)
181 #define CK_GEMV_DISPATCH(...) gemv_amx(__VA_ARGS__)
182 #elif defined(CK_HAS_AVX512)
183 #define CK_GEMV_DISPATCH(...) gemv_avx512(__VA_ARGS__)
184 #elif defined(CK_HAS_AVX2_FMA)
185 #define CK_GEMV_DISPATCH(...) gemv_avx2(__VA_ARGS__)
186 #elif defined(CK_HAS_AVX)
187 #define CK_GEMV_DISPATCH(...) gemv_avx(__VA_ARGS__)
189 #define CK_GEMV_DISPATCH(...) gemv_ref(__VA_ARGS__)
196 #if defined(CK_HAS_AMX)
197 #define CK_QGEMM_DISPATCH(...) qgemm_amx(__VA_ARGS__)
198 #elif defined(CK_HAS_AVX512VNNI)
199 #define CK_QGEMM_DISPATCH(...) qgemm_avx512vnni(__VA_ARGS__)
200 #elif defined(CK_HAS_VNNI)
201 #define CK_QGEMM_DISPATCH(...) qgemm_vnni(__VA_ARGS__)
202 #elif defined(CK_HAS_AVX2_FMA)
203 #define CK_QGEMM_DISPATCH(...) qgemm_avx2(__VA_ARGS__)
205 #define CK_QGEMM_DISPATCH(...) qgemm_ref(__VA_ARGS__)
232 .best_kernel =
"gemm_ref"
235 #if defined(CK_HAS_AMX)
236 cap.
name =
"AMX (Intel Sapphire Rapids+)";
241 #elif defined(CK_HAS_AVX512)
242 cap.
name =
"AVX-512 (Intel Skylake-X+)";
247 #elif defined(CK_HAS_AVX2_FMA)
248 cap.
name =
"AVX2+FMA (Intel Haswell+)";
253 #elif defined(CK_HAS_AVX)
254 cap.
name =
"AVX (Intel Sandy Bridge+)";
259 #elif defined(CK_HAS_NEON)
260 cap.
name =
"NEON (ARM)";
265 #elif defined(CK_HAS_ALTIVEC)
266 cap.
name =
"AltiVec (PowerPC)";
static ck_capability_t ck_get_capabilities(void)
Get current platform capabilities.
CPU capability information structure.