← Back to C-Kernel-Engine Docs Doxygen Source Documentation
softmax_kernels_bf16.c File Reference

Softmax kernels for BF16 tensors. More...

#include <stddef.h>
#include <stdint.h>
#include "bf16_utils.h"
#include "ckernel_engine.h"

Go to the source code of this file.

Functions

void backward_causal_softmax_head_major_bf16 (uint16_t *d_scores, const uint16_t *weights, int num_heads, int num_tokens, int aligned_context_window, float *scratch_d_scores, float *scratch_weights)
 
void causal_softmax_head_major_bf16 (uint16_t *scores, int num_heads, int num_tokens, int aligned_context_window, float *scratch)
 

Detailed Description

Softmax kernels for BF16 tensors.

CK-ENGINE KERNEL RULES:

  1. NO malloc/free - memory via bump allocator, pointers passed in
  2. NO OpenMP - parallelization at orchestrator/codegen layer
  3. API must define: inputs, outputs, workspace, and memory layouts
  4. Pure computation - deterministic, no side effects

After changes: make test && make llamacpp-parity-full

Softmax: y[i] = exp(x[i] - max(x)) / sum(exp(x - max(x)))

Definition in file softmax_kernels_bf16.c.

Function Documentation

◆ backward_causal_softmax_head_major_bf16()

void backward_causal_softmax_head_major_bf16 ( uint16_t *  d_scores,
const uint16_t *  weights,
int  num_heads,
int  num_tokens,
int  aligned_context_window,
float *  scratch_d_scores,
float *  scratch_weights 
)

Definition at line 53 of file softmax_kernels_bf16.c.

60 {
61  if (!d_scores || !weights || num_heads <= 0 || num_tokens <= 0 || aligned_context_window <= 0) return;
62  if (!scratch_d_scores || !scratch_weights) return;
63 
64  const size_t total = (size_t)num_heads *
65  (size_t)aligned_context_window *
66  (size_t)aligned_context_window;
67 
68  bf16_tensor_to_float(d_scores, scratch_d_scores, total);
69  bf16_tensor_to_float(weights, scratch_weights, total);
70  backward_causal_softmax_head_major(scratch_d_scores, scratch_weights, num_heads, num_tokens, aligned_context_window);
71  float_tensor_to_bf16(scratch_d_scores, d_scores, total);
72 }
static void float_tensor_to_bf16(const float *src, uint16_t *dst, size_t count)
Definition: bf16_utils.h:271
static void bf16_tensor_to_float(const uint16_t *src, float *dst, size_t count)
Definition: bf16_utils.h:250
void backward_causal_softmax_head_major(float *d_scores, const float *weights, int num_heads, int num_tokens, int aligned_context_window)

References backward_causal_softmax_head_major(), bf16_tensor_to_float(), and float_tensor_to_bf16().

◆ causal_softmax_head_major_bf16()

void causal_softmax_head_major_bf16 ( uint16_t *  scores,
int  num_heads,
int  num_tokens,
int  aligned_context_window,
float *  scratch 
)

Definition at line 31 of file softmax_kernels_bf16.c.

36 {
37  if (!scores || num_heads <= 0 || num_tokens <= 0 || aligned_context_window <= 0) return;
38  if (!scratch) return;
39 
40  const size_t total = (size_t)num_heads *
41  (size_t)aligned_context_window *
42  (size_t)aligned_context_window;
43 
44  bf16_tensor_to_float(scores, scratch, total);
45  causal_softmax_head_major(scratch, num_heads, num_tokens, aligned_context_window);
46  float_tensor_to_bf16(scratch, scores, total);
47 }
void causal_softmax_head_major(float *scores, int num_heads, int num_tokens, int aligned_context_window)

References bf16_tensor_to_float(), causal_softmax_head_major(), and float_tensor_to_bf16().