← Back to C-Kernel-Engine Docs Doxygen Source Documentation
vision_kernels_bf16.c
Go to the documentation of this file.
1 /**
2  * @file vision_kernels_bf16.c
3  * @brief Vision kernels for BF16 tensors (im2patch, etc.)
4  *
5  * CK-ENGINE KERNEL RULES:
6  * =======================
7  * 1. NO malloc/free - memory via bump allocator, pointers passed in
8  * 2. NO OpenMP - parallelization at orchestrator/codegen layer
9  * 3. API must define: inputs, outputs, workspace, and memory layouts
10  * 4. Pure computation - deterministic, no side effects
11  *
12  * After changes: make test && make llamacpp-parity-full
13  */
14 
15 #include <string.h>
16 #include <stddef.h>
17 #include <stdint.h>
18 
19 #include "bf16_utils.h"
20 #include "ckernel_engine.h"
21 
22 void im2patch_bf16(const uint16_t *image,
23  uint16_t *patches,
24  int C,
25  int H,
26  int W,
27  int P)
28 {
29  if (!image || !patches || C <= 0 || H <= 0 || W <= 0 || P <= 0) {
30  return;
31  }
32 
33  int num_patches_h = H / P;
34  int num_patches_w = W / P;
35  int patch_dim = C * P * P;
36 
37  for (int ph = 0; ph < num_patches_h; ++ph) {
38  for (int pw = 0; pw < num_patches_w; ++pw) {
39  int patch_idx = ph * num_patches_w + pw;
40  uint16_t *dst_patch = patches + (size_t)patch_idx * (size_t)patch_dim;
41 
42  for (int c = 0; c < C; ++c) {
43  for (int py = 0; py < P; ++py) {
44  int y = ph * P + py;
45  int x = pw * P;
46 
47  const uint16_t *src_row = image + (size_t)c * (size_t)H * (size_t)W + (size_t)y * (size_t)W + (size_t)x;
48  uint16_t *dst_row = dst_patch + (size_t)c * (size_t)P * (size_t)P + (size_t)py * (size_t)P;
49 
50  memcpy(dst_row, src_row, (size_t)P * sizeof(uint16_t));
51  }
52  }
53  }
54  }
55 }
56 
57 void patch2im_bf16(const uint16_t *d_patches,
58  uint16_t *d_image,
59  int C,
60  int H,
61  int W,
62  int P)
63 {
64  if (!d_patches || !d_image || C <= 0 || H <= 0 || W <= 0 || P <= 0) {
65  return;
66  }
67 
68  int num_patches_h = H / P;
69  int num_patches_w = W / P;
70  int patch_dim = C * P * P;
71 
72  memset(d_image, 0, (size_t)C * (size_t)H * (size_t)W * sizeof(uint16_t));
73 
74  for (int ph = 0; ph < num_patches_h; ++ph) {
75  for (int pw = 0; pw < num_patches_w; ++pw) {
76  int patch_idx = ph * num_patches_w + pw;
77  const uint16_t *src_patch = d_patches + (size_t)patch_idx * (size_t)patch_dim;
78 
79  for (int c = 0; c < C; ++c) {
80  for (int py = 0; py < P; ++py) {
81  int y = ph * P + py;
82  int x = pw * P;
83 
84  uint16_t *dst_row = d_image + (size_t)c * (size_t)H * (size_t)W + (size_t)y * (size_t)W + (size_t)x;
85  const uint16_t *src_row = src_patch + (size_t)c * (size_t)P * (size_t)P + (size_t)py * (size_t)P;
86 
87  for (int px = 0; px < P; ++px) {
88  float acc = bf16_to_float(dst_row[px]) + bf16_to_float(src_row[px]);
89  dst_row[px] = float_to_bf16(acc);
90  }
91  }
92  }
93  }
94  }
95 }
96 
static uint16_t float_to_bf16(float f)
Definition: bf16_utils.h:90
static float bf16_to_float(uint16_t v)
Definition: bf16_utils.h:38
#define C(color)
Definition: show_config.c:39
void im2patch_bf16(const uint16_t *image, uint16_t *patches, int C, int H, int W, int P)
void patch2im_bf16(const uint16_t *d_patches, uint16_t *d_image, int C, int H, int W, int P)