← Back to C-Kernel-Engine Docs Doxygen Source Documentation
fp16_convert.c File Reference

FP32 <-> FP16 SIMD conversion utilities. More...

#include <stdint.h>
#include <stddef.h>
#include <math.h>

Go to the source code of this file.

Functions

void ck_fma_f32_to_f16 (const float *a, const float *b, const float *c, uint16_t *dst, int n)
 FMA in FP32, store result as FP16: dst = a * b + c. More...
 
void ck_fp16_to_fp32_2d (const uint16_t *src, float *dst, int rows, int cols, int src_stride, int dst_stride)
 Convert 2D FP16 matrix to FP32 with strided access. More...
 
void ck_fp16_to_fp32_row (const uint16_t *src, float *dst, int n)
 Convert FP16 row to FP32 (auto-select best implementation) More...
 
static float ck_fp16_to_fp32_scalar (uint16_t h)
 
void ck_fp32_to_fp16_2d (const float *src, uint16_t *dst, int rows, int cols, int src_stride, int dst_stride)
 Convert 2D FP32 matrix to FP16 with strided access. More...
 
void ck_fp32_to_fp16_inplace (float *data, void *scratch, int n)
 Convert FP32 to FP16 in-place using scratch buffer. More...
 
void ck_fp32_to_fp16_row (const float *src, uint16_t *dst, int n)
 Convert FP32 row to FP16 (auto-select best implementation) More...
 
static uint16_t ck_fp32_to_fp16_scalar (float f)
 
void ck_scale_f32_to_f16 (const float *src, float scale, uint16_t *dst, int n)
 Scale FP32 array and store as FP16: dst = scale * src. More...
 

Detailed Description

FP32 <-> FP16 SIMD conversion utilities.

CK-ENGINE KERNEL RULES:

  1. NO malloc/free - memory via bump allocator, pointers passed in
  2. NO OpenMP - parallelization at orchestrator/codegen layer
  3. NO memcpy for layout - use strided access, not copies
  4. API must define: inputs, outputs, workspace, and memory layouts
  5. Pure computation - deterministic, no side effects

After changes: make test && make llamacpp-parity-full

These conversion functions use F16C hardware instructions (available on Intel Ivy Bridge and later, AMD Piledriver and later) for fast FP16/FP32 conversion. FP16 (IEEE 754 half-precision) provides 2x memory savings with ~0.1% precision loss for KV cache storage.

MEGA-FUSION BENEFIT:

FP16 KV cache doubles the context that fits in L3 cache:

  • FP32 KV: ~6K tokens in 6MB L3
  • FP16 KV: ~12K tokens in 6MB L3 This extends mega-fusion's "hot zone" for longer sequences.

Definition in file fp16_convert.c.

Function Documentation

◆ ck_fma_f32_to_f16()

void ck_fma_f32_to_f16 ( const float *  a,
const float *  b,
const float *  c,
uint16_t *  dst,
int  n 
)

FMA in FP32, store result as FP16: dst = a * b + c.

Parameters
aFirst FP32 operand array
bSecond FP32 operand array
cThird FP32 operand array
dstDestination FP16 array
nNumber of elements

Definition at line 350 of file fp16_convert.c.

351  {
352  if (!a || !b || !c || !dst || n <= 0) return;
353 
354 #if defined(__AVX512F__)
355  int i = 0;
356  for (; i + 15 < n; i += 16) {
357  __m512 va = _mm512_loadu_ps(a + i);
358  __m512 vb = _mm512_loadu_ps(b + i);
359  __m512 vc = _mm512_loadu_ps(c + i);
360  __m512 vr = _mm512_fmadd_ps(va, vb, vc);
361  __m256i vh = _mm512_cvtps_ph(vr, _MM_FROUND_TO_NEAREST_INT);
362  _mm256_storeu_si256((__m256i*)(dst + i), vh);
363  }
364  for (; i < n; i++) {
365  dst[i] = ck_fp32_to_fp16_scalar(a[i] * b[i] + c[i]);
366  }
367 #elif defined(__AVX__) && defined(__F16C__)
368  int i = 0;
369  for (; i + 7 < n; i += 8) {
370  __m256 va = _mm256_loadu_ps(a + i);
371  __m256 vb = _mm256_loadu_ps(b + i);
372  __m256 vc = _mm256_loadu_ps(c + i);
373 #if defined(__FMA__)
374  __m256 vr = _mm256_fmadd_ps(va, vb, vc);
375 #else
376  __m256 vr = _mm256_add_ps(_mm256_mul_ps(va, vb), vc);
377 #endif
378  __m128i vh = _mm256_cvtps_ph(vr, _MM_FROUND_TO_NEAREST_INT);
379  _mm_storeu_si128((__m128i*)(dst + i), vh);
380  }
381  for (; i < n; i++) {
382  dst[i] = ck_fp32_to_fp16_scalar(a[i] * b[i] + c[i]);
383  }
384 #else
385  for (int i = 0; i < n; i++) {
386  dst[i] = ck_fp32_to_fp16_scalar(a[i] * b[i] + c[i]);
387  }
388 #endif
389 }
static uint16_t ck_fp32_to_fp16_scalar(float f)
Definition: fp16_convert.c:66

References ck_fp32_to_fp16_scalar().

◆ ck_fp16_to_fp32_2d()

void ck_fp16_to_fp32_2d ( const uint16_t *  src,
float *  dst,
int  rows,
int  cols,
int  src_stride,
int  dst_stride 
)

Convert 2D FP16 matrix to FP32 with strided access.

Parameters
srcSource FP16 matrix [rows, src_stride]
dstDestination FP32 matrix [rows, dst_stride]
rowsNumber of rows
colsNumber of columns (actual data per row)
src_strideSource stride (elements per row)
dst_strideDestination stride (elements per row)

Definition at line 298 of file fp16_convert.c.

300  {
301  if (!src || !dst || rows <= 0 || cols <= 0) return;
302 
303  for (int r = 0; r < rows; r++) {
304  ck_fp16_to_fp32_row(src + (size_t)r * src_stride,
305  dst + (size_t)r * dst_stride,
306  cols);
307  }
308 }
void ck_fp16_to_fp32_row(const uint16_t *src, float *dst, int n)
Convert FP16 row to FP32 (auto-select best implementation)
Definition: fp16_convert.c:250

References ck_fp16_to_fp32_row().

◆ ck_fp16_to_fp32_row()

void ck_fp16_to_fp32_row ( const uint16_t *  src,
float *  dst,
int  n 
)

Convert FP16 row to FP32 (auto-select best implementation)

Parameters
srcSource FP16 array
dstDestination FP32 array (caller-allocated)
nNumber of elements

Definition at line 250 of file fp16_convert.c.

250  {
251  if (!src || !dst || n <= 0) return;
252 
253 #if defined(__AVX512F__)
254  ck_fp16_to_fp32_avx512(src, dst, n);
255 #elif defined(__AVX__)
256  ck_fp16_to_fp32_avx(src, dst, n);
257 #else
258  for (int i = 0; i < n; i++) {
259  dst[i] = ck_fp16_to_fp32_scalar(src[i]);
260  }
261 #endif
262 }
static float ck_fp16_to_fp32_scalar(uint16_t h)
Definition: fp16_convert.c:91

References ck_fp16_to_fp32_scalar().

Referenced by ck_fp16_to_fp32_2d().

◆ ck_fp16_to_fp32_scalar()

static float ck_fp16_to_fp32_scalar ( uint16_t  h)
inlinestatic

Definition at line 91 of file fp16_convert.c.

91  {
92  uint32_t sign = ((uint32_t)h & 0x8000) << 16;
93  int exp = (h >> 10) & 0x1F;
94  uint32_t mant = h & 0x3FF;
95 
96  if (exp == 0) {
97  if (mant == 0) {
98  /* Zero */
99  union { uint32_t u; float f; } u = { sign };
100  return u.f;
101  }
102  /* Denormalized number */
103  while (!(mant & 0x400)) {
104  mant <<= 1;
105  exp--;
106  }
107  exp++;
108  mant &= 0x3FF;
109  } else if (exp == 31) {
110  /* Infinity or NaN */
111  union { uint32_t u; float f; } u = { sign | 0x7F800000 | (mant << 13) };
112  return u.f;
113  }
114 
115  union { uint32_t u; float f; } u = { sign | ((uint32_t)(exp + 112) << 23) | (mant << 13) };
116  return u.f;
117 }

Referenced by ck_fp16_to_fp32_row().

◆ ck_fp32_to_fp16_2d()

void ck_fp32_to_fp16_2d ( const float *  src,
uint16_t *  dst,
int  rows,
int  cols,
int  src_stride,
int  dst_stride 
)

Convert 2D FP32 matrix to FP16 with strided access.

Parameters
srcSource FP32 matrix [rows, src_stride]
dstDestination FP16 matrix [rows, dst_stride]
rowsNumber of rows
colsNumber of columns (actual data per row)
src_strideSource stride (elements per row)
dst_strideDestination stride (elements per row)

Definition at line 277 of file fp16_convert.c.

279  {
280  if (!src || !dst || rows <= 0 || cols <= 0) return;
281 
282  for (int r = 0; r < rows; r++) {
283  ck_fp32_to_fp16_row(src + (size_t)r * src_stride,
284  dst + (size_t)r * dst_stride,
285  cols);
286  }
287 }
void ck_fp32_to_fp16_row(const float *src, uint16_t *dst, int n)
Convert FP32 row to FP16 (auto-select best implementation)
Definition: fp16_convert.c:230

References ck_fp32_to_fp16_row().

◆ ck_fp32_to_fp16_inplace()

void ck_fp32_to_fp16_inplace ( float *  data,
void *  scratch,
int  n 
)

Convert FP32 to FP16 in-place using scratch buffer.

Useful when you want to downcast in place but need FP32 for computation. Writes FP16 to the lower half of scratch, then copies back.

Parameters
dataFP32 array to convert (will contain FP16 in lower bits)
scratchTemporary buffer, must be >= n * sizeof(uint16_t)
nNumber of elements
Note
After this call, data should be treated as uint16_t*

Definition at line 325 of file fp16_convert.c.

325  {
326  if (!data || !scratch || n <= 0) return;
327 
328  uint16_t *tmp = (uint16_t*)scratch;
329  ck_fp32_to_fp16_row(data, tmp, n);
330 
331  /* Copy back (FP16 is half the size, so this is safe) */
332  uint16_t *dst = (uint16_t*)data;
333  for (int i = 0; i < n; i++) {
334  dst[i] = tmp[i];
335  }
336 }

References ck_fp32_to_fp16_row().

◆ ck_fp32_to_fp16_row()

void ck_fp32_to_fp16_row ( const float *  src,
uint16_t *  dst,
int  n 
)

Convert FP32 row to FP16 (auto-select best implementation)

Parameters
srcSource FP32 array
dstDestination FP16 array (caller-allocated)
nNumber of elements

Definition at line 230 of file fp16_convert.c.

230  {
231  if (!src || !dst || n <= 0) return;
232 
233 #if defined(__AVX512F__)
234  ck_fp32_to_fp16_avx512(src, dst, n);
235 #elif defined(__AVX__)
236  ck_fp32_to_fp16_avx(src, dst, n);
237 #else
238  for (int i = 0; i < n; i++) {
239  dst[i] = ck_fp32_to_fp16_scalar(src[i]);
240  }
241 #endif
242 }

References ck_fp32_to_fp16_scalar().

Referenced by ck_fp32_to_fp16_2d(), and ck_fp32_to_fp16_inplace().

◆ ck_fp32_to_fp16_scalar()

static uint16_t ck_fp32_to_fp16_scalar ( float  f)
inlinestatic

Definition at line 66 of file fp16_convert.c.

66  {
67  union { float f; uint32_t u; } u = { f };
68  uint32_t x = u.u;
69 
70  /* Extract sign, exponent, mantissa */
71  uint32_t sign = (x >> 16) & 0x8000;
72  int exp = ((x >> 23) & 0xFF) - 127 + 15;
73  uint32_t mant = (x >> 13) & 0x3FF;
74 
75  if (exp <= 0) {
76  /* Underflow to zero or denormal */
77  if (exp < -10) return (uint16_t)sign;
78  mant = (mant | 0x400) >> (1 - exp);
79  return (uint16_t)(sign | mant);
80  } else if (exp >= 31) {
81  /* Overflow to infinity or NaN */
82  if (exp == 128 && (x & 0x7FFFFF)) {
83  return (uint16_t)(sign | 0x7E00 | mant); /* NaN */
84  }
85  return (uint16_t)(sign | 0x7C00); /* Infinity */
86  }
87 
88  return (uint16_t)(sign | ((uint32_t)exp << 10) | mant);
89 }

Referenced by ck_fma_f32_to_f16(), ck_fp32_to_fp16_row(), and ck_scale_f32_to_f16().

◆ ck_scale_f32_to_f16()

void ck_scale_f32_to_f16 ( const float *  src,
float  scale,
uint16_t *  dst,
int  n 
)

Scale FP32 array and store as FP16: dst = scale * src.

Parameters
srcSource FP32 array
scaleScalar multiplier
dstDestination FP16 array
nNumber of elements

Definition at line 398 of file fp16_convert.c.

398  {
399  if (!src || !dst || n <= 0) return;
400 
401 #if defined(__AVX512F__)
402  __m512 vs = _mm512_set1_ps(scale);
403  int i = 0;
404  for (; i + 15 < n; i += 16) {
405  __m512 vx = _mm512_loadu_ps(src + i);
406  __m512 vr = _mm512_mul_ps(vx, vs);
407  __m256i vh = _mm512_cvtps_ph(vr, _MM_FROUND_TO_NEAREST_INT);
408  _mm256_storeu_si256((__m256i*)(dst + i), vh);
409  }
410  for (; i < n; i++) {
411  dst[i] = ck_fp32_to_fp16_scalar(src[i] * scale);
412  }
413 #elif defined(__AVX__) && defined(__F16C__)
414  __m256 vs = _mm256_set1_ps(scale);
415  int i = 0;
416  for (; i + 7 < n; i += 8) {
417  __m256 vx = _mm256_loadu_ps(src + i);
418  __m256 vr = _mm256_mul_ps(vx, vs);
419  __m128i vh = _mm256_cvtps_ph(vr, _MM_FROUND_TO_NEAREST_INT);
420  _mm_storeu_si128((__m128i*)(dst + i), vh);
421  }
422  for (; i < n; i++) {
423  dst[i] = ck_fp32_to_fp16_scalar(src[i] * scale);
424  }
425 #else
426  for (int i = 0; i < n; i++) {
427  dst[i] = ck_fp32_to_fp16_scalar(src[i] * scale);
428  }
429 #endif
430 }

References ck_fp32_to_fp16_scalar().