← Back to C-Kernel-Engine Docs Doxygen Source Documentation
bf16_utils.h File Reference
#include <stdint.h>
#include <stddef.h>

Go to the source code of this file.

Functions

static void bf16_tensor_to_float (const uint16_t *src, float *dst, size_t count)
 
static float bf16_to_float (uint16_t v)
 
static void float_tensor_to_bf16 (const float *src, uint16_t *dst, size_t count)
 
static uint16_t float_to_bf16 (float f)
 

Function Documentation

◆ bf16_tensor_to_float()

static void bf16_tensor_to_float ( const uint16_t *  src,
float *  dst,
size_t  count 
)
inlinestatic

Definition at line 250 of file bf16_utils.h.

251 {
252 #if defined(__AVX512F__)
253  size_t i = 0;
254  for (; i + 16 <= count; i += 16) {
255  __m512 fp32_vec = bf16_loadu_cvt_fp32(&src[i]);
256  _mm512_storeu_ps(&dst[i], fp32_vec);
257  }
258  for (; i < count; ++i) {
259  dst[i] = bf16_to_float(src[i]);
260  }
261 #else
262  for (size_t i = 0; i < count; ++i) {
263  dst[i] = bf16_to_float(src[i]);
264  }
265 #endif
266 }
static float bf16_to_float(uint16_t v)
Definition: bf16_utils.h:38

References bf16_to_float().

Referenced by backward_causal_softmax_head_major_bf16(), causal_softmax_head_major_bf16(), convert_bf16_tensor_to_buf(), geglu_forward_bf16(), gelu_backward_exact_bf16(), gelu_backward_fast_bf16(), gelu_fast_inplace_bf16(), layernorm_backward_kernel_bf16(), layernorm_forward_rolled_slice_bf16(), layernorm_forward_unrolled_slice_bf16(), mlp_token_parallel_bf16_fp32act(), rope_backward_bf16(), rope_forward_bf16(), sigmoid_backward_bf16(), sigmoid_forward_bf16(), and softmax_cross_entropy_loss_bf16().

◆ bf16_to_float()

◆ float_tensor_to_bf16()

static void float_tensor_to_bf16 ( const float *  src,
uint16_t *  dst,
size_t  count 
)
inlinestatic

Definition at line 271 of file bf16_utils.h.

272 {
273 #if defined(__AVX512F__)
274  size_t i = 0;
275  for (; i + 16 <= count; i += 16) {
276  __m512 fp32_vec = _mm512_loadu_ps(&src[i]);
277  fp32_cvt_storeu_bf16(&dst[i], fp32_vec);
278  }
279  for (; i < count; ++i) {
280  dst[i] = float_to_bf16(src[i]);
281  }
282 #else
283  for (size_t i = 0; i < count; ++i) {
284  dst[i] = float_to_bf16(src[i]);
285  }
286 #endif
287 }
static uint16_t float_to_bf16(float f)
Definition: bf16_utils.h:90

References float_to_bf16().

Referenced by backward_causal_softmax_head_major_bf16(), causal_softmax_head_major_bf16(), geglu_forward_bf16(), gelu_backward_exact_bf16(), gelu_backward_fast_bf16(), gelu_fast_inplace_bf16(), layernorm_backward_kernel_bf16(), layernorm_forward_rolled_slice_bf16(), layernorm_forward_unrolled_slice_bf16(), mlp_token_parallel_bf16_fp32act(), rope_backward_bf16(), rope_forward_bf16(), sigmoid_backward_bf16(), sigmoid_forward_bf16(), and softmax_cross_entropy_loss_bf16().

◆ float_to_bf16()

static uint16_t float_to_bf16 ( float  f)
inlinestatic

Definition at line 90 of file bf16_utils.h.

91 {
92  union {
93  uint32_t u;
94  float f;
95  } tmp;
96  tmp.f = f;
97  // Extract bit 16 (will be the LSB of the BF16 result after truncation)
98  uint32_t lsb = (tmp.u >> 16) & 1u;
99  // Add rounding bias: 0x7FFF normally, 0x8000 if LSB=1 (rounds ties to even)
100  tmp.u += 0x7FFFu + lsb;
101  // Truncate lower 16 bits
102  return (uint16_t)(tmp.u >> 16);
103 }

Referenced by __attribute__(), adamw_update_bf16(), add_forward_2d_bf16(), add_forward_bf16(), add_inplace_bf16(), add_scaled_forward_bf16(), add_scaled_inplace_bf16(), embedding_backward_bf16(), embedding_forward_bf16(), float_tensor_to_bf16(), gemm_nn_bf16(), gemm_tn_bf16(), gradient_accumulate_bf16(), gradient_scale_bf16(), mlp_token_parallel_bf16(), patch2im_bf16(), relu_backward_bf16(), relu_forward_bf16(), relu_forward_inplace_bf16(), rmsnorm_backward_bf16(), rmsnorm_forward_bf16(), sgd_momentum_update_bf16(), swiglu_backward_bf16(), and swiglu_forward_bf16().