47 float val = fval + 12582912.f;
49 memcpy(&i, &val,
sizeof(
int));
50 return (i & 0x007fffff) - 0x00400000;
54 if (!x || !vy || k <= 0) {
57 assert(k %
QK_K == 0);
58 const int nb = k /
QK_K;
61 for (
int i = 0; i < nb; ++i) {
64 for (
int j = 0; j <
QK_K; ++j) {
65 float ax = fabsf(x[j]);
73 memset(y[i].qs, 0,
sizeof(y[i].qs));
74 memset(y[i].bsums, 0,
sizeof(y[i].bsums));
79 const float iscale = -127.0f / max;
80 for (
int j = 0; j <
QK_K; ++j) {
88 y[i].
qs[j] = (int8_t)v;
91 for (
int j = 0; j <
QK_K / 16; ++j) {
93 const int8_t *qs = &y[i].
qs[j * 16];
94 for (
int ii = 0; ii < 16; ++ii) {
97 y[i].
bsums[j] = (int16_t)sum;
100 y[i].
d = 1.0f / iscale;
108 #if defined(__SSE4_1__)
119 const int nb = k /
QK_K;
122 for (
int i = 0; i < nb; ++i) {
123 uint8_t sc[8], m_val[8];
138 for (
int j = 0; j <
QK_K; j += 64) {
139 const uint8_t *qs = &w[i].
qs[q_offset];
140 const int8_t *q8_lo = &x[i].
qs[j];
141 const int8_t *q8_hi = &x[i].
qs[j + 32];
144 int32_t sum_q4q8_lo = 0;
145 for (
int l = 0; l < 32; ++l) {
146 int q4_val = qs[l] & 0x0F;
147 sum_q4q8_lo += q4_val * q8_lo[l];
151 int32_t sum_q4q8_hi = 0;
152 for (
int l = 0; l < 32; ++l) {
153 int q4_val = qs[l] >> 4;
154 sum_q4q8_hi += q4_val * q8_hi[l];
158 int32_t bsum_lo = (int32_t)x[i].bsums[j / 16] +
159 (int32_t)x[i].
bsums[j / 16 + 1];
160 int32_t bsum_hi = (int32_t)x[i].bsums[(j + 32) / 16] +
161 (int32_t)x[i].bsums[(j + 32) / 16 + 1];
164 sumf += d * (float)sc[is] * (
float)sum_q4q8_lo;
165 sumf -= dmin * (float)m_val[is] * (
float)bsum_lo;
166 sumf += d * (float)sc[is + 1] * (
float)sum_q4q8_hi;
167 sumf -= dmin * (float)m_val[is + 1] * (
float)bsum_hi;
182 if (!y || !W || !x_q8 || M <= 0 || K <= 0) {
188 const int blocks_per_row = K /
QK_K;
190 for (
int row = 0; row < M; ++row) {
191 const block_q4_K *w_row = blocks + (size_t)row * (
size_t)blocks_per_row;
212 if (!y || !W || !x_q8 || M <= 0 || K <= 0) {
215 if (ith < 0 || nth <= 0 || ith >= nth) {
220 const int dr = (M + nth - 1) / nth;
221 const int r0 = dr * ith;
222 const int r1 = (r0 + dr < M) ? (r0 + dr) : M;
230 const int blocks_per_row = K /
QK_K;
233 for (
int row = r0; row < r1; ++row) {
234 const block_q4_K *w_row = blocks + (size_t)row * (
size_t)blocks_per_row;
244 #if defined(__AVX512VNNI__) && defined(__AVX512VL__)
247 #elif defined(__AVX2__)
249 #elif defined(__AVX__)
252 #elif defined(__SSE4_1__)
264 if (!Y || !W || !X_q8 || M <= 0 || N <= 0 || K <= 0) {
269 const int blocks_per_vec = K /
QK_K;
271 for (
int n = 0; n < N; ++n) {
272 const block_q8_K *x_row = X + (size_t)n * (
size_t)blocks_per_vec;
282 if (!Y || !W || !X_q8 || M <= 0 || N <= 0 || K <= 0) {
287 const int blocks_per_vec = K /
QK_K;
289 for (
int n = 0; n < N; ++n) {
290 const block_q8_K *x_row = X + (size_t)n * (
size_t)blocks_per_vec;
301 if (!A_q8 || !B || !
C) {
304 if (M <= 0 || N <= 0 || K <= 0) {
314 for (
int i = 0; i < M; ++i) {
315 float *row =
C + (size_t)i * (
size_t)N;
316 for (
int j = 0; j < N; ++j) {
Quantization block structures for weight-only quantization.
#define CK_FP16_TO_FP32(x)
static void unpack_q4_k_scales(const uint8_t *scales, uint8_t *sc, uint8_t *m)
Unpack Q4_K sub-block scales and mins.
void gemv_q4_k_q8_k_avx2(float *y, const void *W, const void *x_q8, int M, int K)
void gemv_q4_k_q8_k_vnni(float *y, const void *W, const void *x_q8, int M, int K)
void quantize_row_q8_k(const float *x, void *vy, int k)
void gemm_nt_q4_k_q8_k(const void *A_q8, const void *B, const float *bias, float *C, int M, int N, int K)
void gemv_q4_k_q8_k_parallel(float *y, const void *W, const void *x_q8, int M, int K, int ith, int nth)
void gemm_q4_k_q8_k_ref(float *Y, const void *W, const void *X_q8, int M, int N, int K)
static int ck_nearest_int(float fval)
void gemv_q4_k_q8_k(float *y, const void *W, const void *x_q8, int M, int K)
void gemv_q4_k_q8_k_ref(float *y, const void *W, const void *x_q8, int M, int K)
void gemm_q4_k_q8_k(float *Y, const void *W, const void *X_q8, int M, int N, int K)
void quantize_row_q8_k_sse(const float *x, void *vy, int k)
static float dot_q4_k_q8_k_ref(const block_q4_K *w, const block_q8_K *x, int k)
void quantize_row_q8_k_ref(const float *x, void *vy, int k)
void gemv_q4_k_q8_k_avx(float *y, const void *W, const void *x_q8, int M, int K)
void gemv_q4_k_q8_k_sse(float *y, const void *W, const void *x_q8, int M, int K)