57 const int blocks_per_row = K /
QK8_0;
59 #pragma omp parallel for schedule(static)
60 for (
int row = 0; row < M; row++) {
62 &w_blocks[row * blocks_per_row],
79 const int blocks_per_row = K /
QK5_0;
81 #pragma omp parallel for schedule(static)
82 for (
int row = 0; row < M; row++) {
84 &w_blocks[row * blocks_per_row],
103 const int blocks_per_row = K /
QK5_0;
110 #pragma omp parallel for schedule(static)
111 for (
int row = 0; row < M; row++) {
113 &w_blocks[row * blocks_per_row],
115 if (bias) y[row] += bias[row];
Quantization block structures for weight-only quantization.
void gemv_fused_q5_0_bias_parallel_omp(float *y, const void *W, const float *x, const float *bias, int M, int K)
void vec_dot_q5_0_q8_0(int n, float *s, const void *vx, const void *vy)
Auto-dispatch quantized dot product Q5_0 x Q8_0.
void gemv_q5_0_q8_0_parallel_omp(float *y, const void *W, const void *x_q8, int M, int K)
void gemv_q8_0_q8_0_parallel_omp(float *y, const void *W, const void *x_q8, int M, int K)
void quantize_row_q8_0(const float *x, void *y, int k)
Quantize FP32 to Q8_0 format (scalar reference)
void vec_dot_q8_0_q8_0(int n, float *s, const void *vx, const void *vy)
Auto-dispatch quantized dot product Q8_0 x Q8_0.