← Back to C-Kernel-Engine Docs Doxygen Source Documentation
gemm_kernels_f16.c File Reference

GEMM kernels with FP16 (half-precision) weights. More...

#include <stdint.h>
#include <stddef.h>
#include "ckernel_quant.h"

Go to the source code of this file.

Macros

#define fp16_to_fp32(x)   ggml_fp16_to_fp32(x)
 
#define fp32_to_fp16(x)   ggml_fp32_to_fp16(x)
 

Functions

void convert_f16_to_f32 (float *dst, const uint16_t *src, size_t count)
 Convert FP16 tensor to FP32. More...
 
void convert_f32_to_f16 (uint16_t *dst, const float *src, size_t count)
 Convert FP32 tensor to FP16. More...
 
float dot_f16 (const uint16_t *w_f16, const float *x, int K)
 
void gemm_f16 (float *Y, const uint16_t *W, const float *X, int M, int N, int K)
 Auto-dispatch GEMM based on available SIMD. More...
 
void gemm_f16_backward (float *dX, const uint16_t *W, const float *dY, int M, int N, int K)
 Batched backward pass. More...
 
void gemm_f16_ref (float *Y, const uint16_t *W, const float *X, int M, int N, int K)
 Matrix-matrix multiply with FP16 weights (scalar reference) More...
 
void gemv_f16 (float *y, const uint16_t *W, const float *x, int M, int K)
 Auto-dispatch GEMV based on available SIMD. More...
 
void gemv_f16_backward (float *dX, const uint16_t *W, const float *dY, int M, int K)
 Auto-dispatch backward. More...
 
void gemv_f16_backward_ref (float *dX, const uint16_t *W, const float *dY, int M, int K)
 Backward pass: compute input gradient (scalar reference) More...
 
void gemv_f16_ref (float *y, const uint16_t *W, const float *x, int M, int K)
 Matrix-vector multiply with FP16 weights (scalar reference) More...
 

Detailed Description

GEMM kernels with FP16 (half-precision) weights.

CK-ENGINE KERNEL RULES:

  1. NO malloc/free - memory via bump allocator, pointers passed in
  2. NO OpenMP - parallelization at orchestrator/codegen layer
  3. API must define: inputs, outputs, workspace, and memory layouts
  4. Pure computation - deterministic, no side effects

After changes: make test && make llamacpp-parity-full

Implements matrix multiplication where:

  • Weights: FP16 (IEEE half-precision, used by vision encoders)
  • Activations: FP32
  • Output: FP32

Used for multimodal projection layers (mmproj-*.gguf files).

Definition in file gemm_kernels_f16.c.

Macro Definition Documentation

◆ fp16_to_fp32

#define fp16_to_fp32 (   x)    ggml_fp16_to_fp32(x)

Definition at line 36 of file gemm_kernels_f16.c.

◆ fp32_to_fp16

#define fp32_to_fp16 (   x)    ggml_fp32_to_fp16(x)

Definition at line 37 of file gemm_kernels_f16.c.

Function Documentation

◆ convert_f16_to_f32()

void convert_f16_to_f32 ( float *  dst,
const uint16_t *  src,
size_t  count 
)

Convert FP16 tensor to FP32.

Definition at line 226 of file gemm_kernels_f16.c.

227 {
228 #ifdef __AVX512F__
229  const size_t count16 = count / 16 * 16;
230 
231  for (size_t i = 0; i < count16; i += 16) {
232  __m256i f16 = _mm256_loadu_si256((const __m256i *)&src[i]);
233  __m512 f32 = _mm512_cvtph_ps(f16);
234  _mm512_storeu_ps(&dst[i], f32);
235  }
236 
237  for (size_t i = count16; i < count; i++) {
238  dst[i] = fp16_to_fp32(src[i]);
239  }
240 #else
241  for (size_t i = 0; i < count; i++) {
242  dst[i] = fp16_to_fp32(src[i]);
243  }
244 #endif
245 }
#define fp16_to_fp32(x)

References fp16_to_fp32.

◆ convert_f32_to_f16()

void convert_f32_to_f16 ( uint16_t *  dst,
const float *  src,
size_t  count 
)

Convert FP32 tensor to FP16.

Definition at line 250 of file gemm_kernels_f16.c.

251 {
252 #ifdef __AVX512F__
253  const size_t count16 = count / 16 * 16;
254 
255  for (size_t i = 0; i < count16; i += 16) {
256  __m512 f32 = _mm512_loadu_ps(&src[i]);
257  __m256i f16 = _mm512_cvtps_ph(f32, 0);
258  _mm256_storeu_si256((__m256i *)&dst[i], f16);
259  }
260 
261  for (size_t i = count16; i < count; i++) {
262  dst[i] = fp32_to_fp16(src[i]);
263  }
264 #else
265  for (size_t i = 0; i < count; i++) {
266  dst[i] = fp32_to_fp16(src[i]);
267  }
268 #endif
269 }
#define fp32_to_fp16(x)

References fp32_to_fp16.

◆ dot_f16()

float dot_f16 ( const uint16_t *  w_f16,
const float *  x,
int  K 
)

Definition at line 387 of file gemm_kernels_f16.c.

388 {
389  float result;
390  gemv_f16(&result, w_f16, x, 1, K);
391  return result;
392 }
void gemv_f16(float *y, const uint16_t *W, const float *x, int M, int K)
Auto-dispatch GEMV based on available SIMD.

References gemv_f16().

◆ gemm_f16()

void gemm_f16 ( float *  Y,
const uint16_t *  W,
const float *  X,
int  M,
int  N,
int  K 
)

Auto-dispatch GEMM based on available SIMD.

Definition at line 207 of file gemm_kernels_f16.c.

211 {
212 #ifdef __AVX512F__
213  gemm_f16_avx512(Y, W, X, M, N, K);
214 #else
215  gemm_f16_ref(Y, W, X, M, N, K);
216 #endif
217 }
void gemm_f16_ref(float *Y, const uint16_t *W, const float *X, int M, int N, int K)
Matrix-matrix multiply with FP16 weights (scalar reference)

References gemm_f16_ref().

◆ gemm_f16_backward()

void gemm_f16_backward ( float *  dX,
const uint16_t *  W,
const float *  dY,
int  M,
int  N,
int  K 
)

Batched backward pass.

Definition at line 373 of file gemm_kernels_f16.c.

377 {
378  for (int n = 0; n < N; n++) {
379  gemv_f16_backward(&dX[n * K], W, &dY[n * M], M, K);
380  }
381 }
void gemv_f16_backward(float *dX, const uint16_t *W, const float *dY, int M, int K)
Auto-dispatch backward.

References gemv_f16_backward().

◆ gemm_f16_ref()

void gemm_f16_ref ( float *  Y,
const uint16_t *  W,
const float *  X,
int  M,
int  N,
int  K 
)

Matrix-matrix multiply with FP16 weights (scalar reference)

Parameters
YOutput matrix [M x N]
WWeight matrix in FP16 [M x K]
XInput matrix [K x N]
MNumber of output rows
NBatch size
KHidden dimension

Definition at line 154 of file gemm_kernels_f16.c.

158 {
159  for (int n = 0; n < N; n++) {
160  gemv_f16_ref(&Y[n * M], W, &X[n * K], M, K);
161  }
162 }
void gemv_f16_ref(float *y, const uint16_t *W, const float *x, int M, int K)
Matrix-vector multiply with FP16 weights (scalar reference)

References gemv_f16_ref().

Referenced by gemm_f16().

◆ gemv_f16()

void gemv_f16 ( float *  y,
const uint16_t *  W,
const float *  x,
int  M,
int  K 
)

Auto-dispatch GEMV based on available SIMD.

Definition at line 128 of file gemm_kernels_f16.c.

132 {
133 #ifdef __AVX512F__
134  gemv_f16_avx512(y, W, x, M, K);
135 #else
136  gemv_f16_ref(y, W, x, M, K);
137 #endif
138 }

References gemv_f16_ref().

Referenced by dot_f16().

◆ gemv_f16_backward()

void gemv_f16_backward ( float *  dX,
const uint16_t *  W,
const float *  dY,
int  M,
int  K 
)

Auto-dispatch backward.

Definition at line 358 of file gemm_kernels_f16.c.

362 {
363 #ifdef __AVX512F__
364  gemv_f16_backward_avx512(dX, W, dY, M, K);
365 #else
366  gemv_f16_backward_ref(dX, W, dY, M, K);
367 #endif
368 }
void gemv_f16_backward_ref(float *dX, const uint16_t *W, const float *dY, int M, int K)
Backward pass: compute input gradient (scalar reference)

References gemv_f16_backward_ref().

Referenced by gemm_f16_backward().

◆ gemv_f16_backward_ref()

void gemv_f16_backward_ref ( float *  dX,
const uint16_t *  W,
const float *  dY,
int  M,
int  K 
)

Backward pass: compute input gradient (scalar reference)

Parameters
dXOutput gradient w.r.t. input [K]
WWeight matrix in FP16 format [M x K]
dYGradient w.r.t. output [M]
MNumber of output rows
KNumber of columns (input dimension)

Definition at line 289 of file gemm_kernels_f16.c.

293 {
294  /* Zero output gradient */
295  for (int k = 0; k < K; k++) {
296  dX[k] = 0.0f;
297  }
298 
299  /* Accumulate: dX += W^T @ dY */
300  for (int row = 0; row < M; row++) {
301  const float dy = dY[row];
302  const uint16_t *w_row = &W[row * K];
303 
304  for (int k = 0; k < K; k++) {
305  float w = fp16_to_fp32(w_row[k]);
306  dX[k] += w * dy;
307  }
308  }
309 }

References fp16_to_fp32.

Referenced by gemv_f16_backward().

◆ gemv_f16_ref()

void gemv_f16_ref ( float *  y,
const uint16_t *  W,
const float *  x,
int  M,
int  K 
)

Matrix-vector multiply with FP16 weights (scalar reference)

Parameters
yOutput vector [M]
WWeight matrix in FP16 [M x K]
xInput vector [K]
MNumber of output rows
KNumber of columns

Definition at line 62 of file gemm_kernels_f16.c.

66 {
67  for (int row = 0; row < M; row++) {
68  float sum = 0.0f;
69  const uint16_t *w_row = &W[row * K];
70 
71  for (int k = 0; k < K; k++) {
72  float w = fp16_to_fp32(w_row[k]);
73  sum += w * x[k];
74  }
75 
76  y[row] = sum;
77  }
78 }

References fp16_to_fp32.

Referenced by gemm_f16_ref(), and gemv_f16().