← Back to C-Kernel-Engine Docs Doxygen Source Documentation
vision_kernels_bf16.c File Reference

Vision kernels for BF16 tensors (im2patch, etc.) More...

#include <string.h>
#include <stddef.h>
#include <stdint.h>
#include "bf16_utils.h"
#include "ckernel_engine.h"

Go to the source code of this file.

Functions

void im2patch_bf16 (const uint16_t *image, uint16_t *patches, int C, int H, int W, int P)
 
void patch2im_bf16 (const uint16_t *d_patches, uint16_t *d_image, int C, int H, int W, int P)
 

Detailed Description

Vision kernels for BF16 tensors (im2patch, etc.)

CK-ENGINE KERNEL RULES:

  1. NO malloc/free - memory via bump allocator, pointers passed in
  2. NO OpenMP - parallelization at orchestrator/codegen layer
  3. API must define: inputs, outputs, workspace, and memory layouts
  4. Pure computation - deterministic, no side effects

After changes: make test && make llamacpp-parity-full

Definition in file vision_kernels_bf16.c.

Function Documentation

◆ im2patch_bf16()

void im2patch_bf16 ( const uint16_t *  image,
uint16_t *  patches,
int  C,
int  H,
int  W,
int  P 
)

Definition at line 22 of file vision_kernels_bf16.c.

28 {
29  if (!image || !patches || C <= 0 || H <= 0 || W <= 0 || P <= 0) {
30  return;
31  }
32 
33  int num_patches_h = H / P;
34  int num_patches_w = W / P;
35  int patch_dim = C * P * P;
36 
37  for (int ph = 0; ph < num_patches_h; ++ph) {
38  for (int pw = 0; pw < num_patches_w; ++pw) {
39  int patch_idx = ph * num_patches_w + pw;
40  uint16_t *dst_patch = patches + (size_t)patch_idx * (size_t)patch_dim;
41 
42  for (int c = 0; c < C; ++c) {
43  for (int py = 0; py < P; ++py) {
44  int y = ph * P + py;
45  int x = pw * P;
46 
47  const uint16_t *src_row = image + (size_t)c * (size_t)H * (size_t)W + (size_t)y * (size_t)W + (size_t)x;
48  uint16_t *dst_row = dst_patch + (size_t)c * (size_t)P * (size_t)P + (size_t)py * (size_t)P;
49 
50  memcpy(dst_row, src_row, (size_t)P * sizeof(uint16_t));
51  }
52  }
53  }
54  }
55 }
#define C(color)
Definition: show_config.c:39

References C.

◆ patch2im_bf16()

void patch2im_bf16 ( const uint16_t *  d_patches,
uint16_t *  d_image,
int  C,
int  H,
int  W,
int  P 
)

Definition at line 57 of file vision_kernels_bf16.c.

63 {
64  if (!d_patches || !d_image || C <= 0 || H <= 0 || W <= 0 || P <= 0) {
65  return;
66  }
67 
68  int num_patches_h = H / P;
69  int num_patches_w = W / P;
70  int patch_dim = C * P * P;
71 
72  memset(d_image, 0, (size_t)C * (size_t)H * (size_t)W * sizeof(uint16_t));
73 
74  for (int ph = 0; ph < num_patches_h; ++ph) {
75  for (int pw = 0; pw < num_patches_w; ++pw) {
76  int patch_idx = ph * num_patches_w + pw;
77  const uint16_t *src_patch = d_patches + (size_t)patch_idx * (size_t)patch_dim;
78 
79  for (int c = 0; c < C; ++c) {
80  for (int py = 0; py < P; ++py) {
81  int y = ph * P + py;
82  int x = pw * P;
83 
84  uint16_t *dst_row = d_image + (size_t)c * (size_t)H * (size_t)W + (size_t)y * (size_t)W + (size_t)x;
85  const uint16_t *src_row = src_patch + (size_t)c * (size_t)P * (size_t)P + (size_t)py * (size_t)P;
86 
87  for (int px = 0; px < P; ++px) {
88  float acc = bf16_to_float(dst_row[px]) + bf16_to_float(src_row[px]);
89  dst_row[px] = float_to_bf16(acc);
90  }
91  }
92  }
93  }
94  }
95 }
static uint16_t float_to_bf16(float f)
Definition: bf16_utils.h:90
static float bf16_to_float(uint16_t v)
Definition: bf16_utils.h:38

References bf16_to_float(), C, and float_to_bf16().