46 #if defined(__AMX_INT8__) || defined(__AMX_TILE__)
48 #include <immintrin.h>
51 typedef struct __tile_config {
54 uint8_t reserved_0[14];
83 static void configure_tiles_gemm(
int M,
int N,
int K) {
84 __tile_config
config = {0};
94 config.rows[TILE_A] = tile_m;
95 config.colsb[TILE_A] = tile_k;
98 config.rows[TILE_B] = tile_k;
99 config.colsb[TILE_B] = tile_n * 4;
102 config.rows[TILE_C] = tile_m;
103 config.colsb[TILE_C] = tile_n * 4;
105 _tile_loadconfig(&
config);
111 static void release_tiles(
void) {
127 void gemm_amx_int8_core(
134 configure_tiles_gemm(M, N, K);
151 _tile_loadd(TILE_A, A + m * K + k, K);
155 _tile_loadd(TILE_B, B + k * N + n, N * 4);
158 _tile_dpbssd(TILE_C, TILE_A, TILE_B);
162 _tile_stored(TILE_C,
C + m * N + n, N * 4);
201 #if defined(__AVX512VNNI__) && defined(__AVX512VL__)
203 #elif defined(__AVX2__)
205 #elif defined(__AVX__)
219 unsigned int eax, ebx, ecx, edx;
222 __asm__ __volatile__(
224 :
"=a"(eax),
"=b"(ebx),
"=c"(ecx),
"=d"(edx)
231 bool has_amx_tile = (edx >> 24) & 1;
232 bool has_amx_int8 = (edx >> 25) & 1;
234 return has_amx_tile && has_amx_int8;
249 #if defined(__AVX512VNNI__) && defined(__AVX512VL__)
251 #elif defined(__AVX2__)
253 #elif defined(__AVX__)
Quantization block structures for weight-only quantization.
void gemv_q4_k_q8_k_avx2(float *y, const void *W, const void *x_q8, int M, int K)
void gemv_q4_k_q8_k_vnni(float *y, const void *W, const void *x_q8, int M, int K)
void gemv_q4_k_q8_k_amx(float *y, const void *W, const void *x_q8, int M, int K)
void gemv_q4_k_q8_k_ref(float *y, const void *W, const void *x_q8, int M, int K)
void gemv_q4_k_q8_k_avx(float *y, const void *W, const void *x_q8, int M, int K)
const CKBPEConfig * config