Go to the source code of this file.
Functions | |
| void | gemv_fused_q5_0_bias_parallel_omp (float *y, const void *W, const float *x, const float *bias, int M, int K) |
| void | gemv_q5_0_q8_0_parallel_omp (float *y, const void *W, const void *x_q8, int M, int K) |
| void | gemv_q8_0_q8_0_parallel_omp (float *y, const void *W, const void *x_q8, int M, int K) |
| void | quantize_row_q8_0 (const float *x, void *y, int k) |
| Quantize FP32 to Q8_0 format (scalar reference) More... | |
| void | vec_dot_q5_0_q8_0 (int n, float *s, const void *vx, const void *vy) |
| Auto-dispatch quantized dot product Q5_0 x Q8_0. More... | |
| void | vec_dot_q8_0_q8_0 (int n, float *s, const void *vx, const void *vy) |
| Auto-dispatch quantized dot product Q8_0 x Q8_0. More... | |
| void gemv_fused_q5_0_bias_parallel_omp | ( | float * | y, |
| const void * | W, | ||
| const float * | x, | ||
| const float * | bias, | ||
| int | M, | ||
| int | K | ||
| ) |
Definition at line 96 of file gemv_omp.c.
References QK5_0, QK8_0, quantize_row_q8_0(), and vec_dot_q5_0_q8_0().
| void gemv_q5_0_q8_0_parallel_omp | ( | float * | y, |
| const void * | W, | ||
| const void * | x_q8, | ||
| int | M, | ||
| int | K | ||
| ) |
| void gemv_q8_0_q8_0_parallel_omp | ( | float * | y, |
| const void * | W, | ||
| const void * | x_q8, | ||
| int | M, | ||
| int | K | ||
| ) |
Definition at line 50 of file gemv_omp.c.
References QK8_0, and vec_dot_q8_0_q8_0().
| void quantize_row_q8_0 | ( | const float * | x, |
| void * | vy, | ||
| int | k | ||
| ) |
Quantize FP32 to Q8_0 format (scalar reference)
| x | Input FP32 values |
| vy | Output Q8_0 blocks |
| k | Number of elements (must be multiple of 32) |
Definition at line 59 of file gemm_kernels_q8_0.c.
Referenced by gemv_fused_q5_0_bias_parallel_omp(), and quantize_batch_q8_0().
| void vec_dot_q5_0_q8_0 | ( | int | n, |
| float * | s, | ||
| const void * | vx, | ||
| const void * | vy | ||
| ) |
Auto-dispatch quantized dot product Q5_0 x Q8_0.
Dispatch priority:
Definition at line 1498 of file gemm_kernels_q5_0.c.
Referenced by gemm_nt_q5_0_q8_0(), gemv_fused_q5_0_bias_parallel_omp(), gemv_q5_0_q8_0(), gemv_q5_0_q8_0_parallel_omp(), and gemv_q5_0_q8_0_parallel_simd().
| void vec_dot_q8_0_q8_0 | ( | int | n, |
| float * | s, | ||
| const void * | vx, | ||
| const void * | vy | ||
| ) |
Auto-dispatch quantized dot product Q8_0 x Q8_0.
Definition at line 1013 of file gemm_kernels_q8_0.c.
Referenced by gemv_q8_0_q8_0(), gemv_q8_0_q8_0_parallel(), gemv_q8_0_q8_0_parallel_omp(), and gemv_q8_0_q8_0_parallel_simd().