← Back to C-Kernel-Engine Docs Doxygen Source Documentation
gemv_omp.h File Reference

Go to the source code of this file.

Functions

void gemv_fused_q5_0_bias_parallel_omp (float *y, const void *W, const float *x, const float *bias, int M, int K)
 
void gemv_q5_0_q8_0_parallel_omp (float *y, const void *W, const void *x_q8, int M, int K)
 
void gemv_q8_0_q8_0_parallel_omp (float *y, const void *W, const void *x_q8, int M, int K)
 

Function Documentation

◆ gemv_fused_q5_0_bias_parallel_omp()

void gemv_fused_q5_0_bias_parallel_omp ( float *  y,
const void *  W,
const float *  x,
const float *  bias,
int  M,
int  K 
)

Definition at line 96 of file gemv_omp.c.

101 {
102  const block_q5_0 *w_blocks = (const block_q5_0 *)W;
103  const int blocks_per_row = K / QK5_0;
104 
105  /* Quantize input ONCE (serial, fast — K=896 → 28 blocks = 952 bytes) */
106  block_q8_0 x_q8[K / QK8_0];
107  quantize_row_q8_0(x, (void *)x_q8, K);
108 
109  /* Parallel GEMV over output rows */
110  #pragma omp parallel for schedule(static)
111  for (int row = 0; row < M; row++) {
112  vec_dot_q5_0_q8_0(K, &y[row],
113  &w_blocks[row * blocks_per_row],
114  x_q8);
115  if (bias) y[row] += bias[row];
116  }
117 }
#define QK5_0
Definition: ckernel_quant.h:67
#define QK8_0
void vec_dot_q5_0_q8_0(int n, float *s, const void *vx, const void *vy)
Auto-dispatch quantized dot product Q5_0 x Q8_0.
void quantize_row_q8_0(const float *x, void *y, int k)
Quantize FP32 to Q8_0 format (scalar reference)

References QK5_0, QK8_0, quantize_row_q8_0(), and vec_dot_q5_0_q8_0().

◆ gemv_q5_0_q8_0_parallel_omp()

void gemv_q5_0_q8_0_parallel_omp ( float *  y,
const void *  W,
const void *  x_q8,
int  M,
int  K 
)

Definition at line 72 of file gemv_omp.c.

76 {
77  const block_q5_0 *w_blocks = (const block_q5_0 *)W;
78  const block_q8_0 *x_blocks = (const block_q8_0 *)x_q8;
79  const int blocks_per_row = K / QK5_0;
80 
81  #pragma omp parallel for schedule(static)
82  for (int row = 0; row < M; row++) {
83  vec_dot_q5_0_q8_0(K, &y[row],
84  &w_blocks[row * blocks_per_row],
85  x_blocks);
86  }
87 }

References QK5_0, and vec_dot_q5_0_q8_0().

◆ gemv_q8_0_q8_0_parallel_omp()

void gemv_q8_0_q8_0_parallel_omp ( float *  y,
const void *  W,
const void *  x_q8,
int  M,
int  K 
)

Definition at line 50 of file gemv_omp.c.

54 {
55  const block_q8_0 *w_blocks = (const block_q8_0 *)W;
56  const block_q8_0 *x_blocks = (const block_q8_0 *)x_q8;
57  const int blocks_per_row = K / QK8_0;
58 
59  #pragma omp parallel for schedule(static)
60  for (int row = 0; row < M; row++) {
61  vec_dot_q8_0_q8_0(K, &y[row],
62  &w_blocks[row * blocks_per_row],
63  x_blocks);
64  }
65 }
void vec_dot_q8_0_q8_0(int n, float *s, const void *vx, const void *vy)
Auto-dispatch quantized dot product Q8_0 x Q8_0.

References QK8_0, and vec_dot_q8_0_q8_0().