Go to the source code of this file.
|
| void | gemv_fused_q5_0_bias_parallel_omp (float *y, const void *W, const float *x, const float *bias, int M, int K) |
| |
| void | gemv_q5_0_q8_0_parallel_omp (float *y, const void *W, const void *x_q8, int M, int K) |
| |
| void | gemv_q8_0_q8_0_parallel_omp (float *y, const void *W, const void *x_q8, int M, int K) |
| |
◆ gemv_fused_q5_0_bias_parallel_omp()
| void gemv_fused_q5_0_bias_parallel_omp |
( |
float * |
y, |
|
|
const void * |
W, |
|
|
const float * |
x, |
|
|
const float * |
bias, |
|
|
int |
M, |
|
|
int |
K |
|
) |
| |
Definition at line 96 of file gemv_omp.c.
103 const int blocks_per_row = K /
QK5_0;
110 #pragma omp parallel for schedule(static)
111 for (
int row = 0; row < M; row++) {
113 &w_blocks[row * blocks_per_row],
115 if (bias) y[row] += bias[row];
void vec_dot_q5_0_q8_0(int n, float *s, const void *vx, const void *vy)
Auto-dispatch quantized dot product Q5_0 x Q8_0.
void quantize_row_q8_0(const float *x, void *y, int k)
Quantize FP32 to Q8_0 format (scalar reference)
References QK5_0, QK8_0, quantize_row_q8_0(), and vec_dot_q5_0_q8_0().
◆ gemv_q5_0_q8_0_parallel_omp()
| void gemv_q5_0_q8_0_parallel_omp |
( |
float * |
y, |
|
|
const void * |
W, |
|
|
const void * |
x_q8, |
|
|
int |
M, |
|
|
int |
K |
|
) |
| |
Definition at line 72 of file gemv_omp.c.
79 const int blocks_per_row = K /
QK5_0;
81 #pragma omp parallel for schedule(static)
82 for (
int row = 0; row < M; row++) {
84 &w_blocks[row * blocks_per_row],
References QK5_0, and vec_dot_q5_0_q8_0().
◆ gemv_q8_0_q8_0_parallel_omp()
| void gemv_q8_0_q8_0_parallel_omp |
( |
float * |
y, |
|
|
const void * |
W, |
|
|
const void * |
x_q8, |
|
|
int |
M, |
|
|
int |
K |
|
) |
| |
Definition at line 50 of file gemv_omp.c.
57 const int blocks_per_row = K /
QK8_0;
59 #pragma omp parallel for schedule(static)
60 for (
int row = 0; row < M; row++) {
62 &w_blocks[row * blocks_per_row],
void vec_dot_q8_0_q8_0(int n, float *s, const void *vx, const void *vy)
Auto-dispatch quantized dot product Q8_0 x Q8_0.
References QK8_0, and vec_dot_q8_0_q8_0().