← Back to C-Kernel-Engine Docs Doxygen Source Documentation
sigmoid_kernels_bf16.c
Go to the documentation of this file.
1 /**
2  * @file sigmoid_kernels_bf16.c
3  * @brief Sigmoid activation kernels for BF16 tensors
4  *
5  * CK-ENGINE KERNEL RULES:
6  * =======================
7  * 1. NO malloc/free - memory via bump allocator, pointers passed in
8  * 2. NO OpenMP - parallelization at orchestrator/codegen layer
9  * 3. API must define: inputs, outputs, workspace, and memory layouts
10  * 4. Pure computation - deterministic, no side effects
11  *
12  * After changes: make test && make llamacpp-parity-full
13  *
14  * Sigmoid: y = 1 / (1 + exp(-x))
15  */
16 
17 #include <stddef.h>
18 #include <stdint.h>
19 
20 #include "bf16_utils.h"
21 #include "ckernel_engine.h"
22 
23 /*
24  * BF16 sigmoid forward with caller-provided scratch buffers.
25  * scratch_input, scratch_output: each [n] floats
26  */
27 void sigmoid_forward_bf16(const uint16_t *input,
28  uint16_t *output,
29  size_t n,
30  float *scratch_input,
31  float *scratch_output)
32 {
33  if (!input || !output || n == 0) return;
34  if (!scratch_input || !scratch_output) return;
35 
36  bf16_tensor_to_float(input, scratch_input, n);
37  sigmoid_forward(scratch_input, scratch_output, n);
38  float_tensor_to_bf16(scratch_output, output, n);
39 }
40 
41 /*
42  * BF16 sigmoid backward with caller-provided scratch buffers.
43  * scratch_input, scratch_d_output, scratch_d_input: each [n] floats
44  */
45 void sigmoid_backward_bf16(const uint16_t *input,
46  const uint16_t *d_output,
47  uint16_t *d_input,
48  size_t n,
49  float *scratch_input,
50  float *scratch_d_output,
51  float *scratch_d_input)
52 {
53  if (!input || !d_output || !d_input || n == 0) return;
54  if (!scratch_input || !scratch_d_output || !scratch_d_input) return;
55 
56  bf16_tensor_to_float(input, scratch_input, n);
57  bf16_tensor_to_float(d_output, scratch_d_output, n);
58  sigmoid_backward(scratch_input, scratch_d_output, scratch_d_input, n);
59  float_tensor_to_bf16(scratch_d_input, d_input, n);
60 }
static void float_tensor_to_bf16(const float *src, uint16_t *dst, size_t count)
Definition: bf16_utils.h:271
static void bf16_tensor_to_float(const uint16_t *src, float *dst, size_t count)
Definition: bf16_utils.h:250
void sigmoid_backward(const float *input, const float *d_output, float *d_input, size_t n)
void sigmoid_forward(const float *input, float *output, size_t n)
void sigmoid_forward_bf16(const uint16_t *input, uint16_t *output, size_t n, float *scratch_input, float *scratch_output)
void sigmoid_backward_bf16(const uint16_t *input, const uint16_t *d_output, uint16_t *d_input, size_t n, float *scratch_input, float *scratch_d_output, float *scratch_d_input)