Vision kernels (im2patch, patch embedding, etc.)
More...
#include <string.h>
#include <stddef.h>
#include <stdint.h>
Go to the source code of this file.
|
| void | im2patch (const float *image, float *patches, int C, int H, int W, int P) |
| |
| void | patch2im (const float *d_patches, float *d_image, int C, int H, int W, int P) |
| |
Vision kernels (im2patch, patch embedding, etc.)
CK-ENGINE KERNEL RULES:
- NO malloc/free - memory via bump allocator, pointers passed in
- NO OpenMP - parallelization at orchestrator/codegen layer
- API must define: inputs, outputs, workspace, and memory layouts
- Pure computation - deterministic, no side effects
After changes: make test && make llamacpp-parity-full
Definition in file vision_kernels.c.
◆ im2patch()
| void im2patch |
( |
const float * |
image, |
|
|
float * |
patches, |
|
|
int |
C, |
|
|
int |
H, |
|
|
int |
W, |
|
|
int |
P |
|
) |
| |
im2patch: Transforms an image into a sequence of flattened patches.
Image Layout: [C, H, W] (Row-major: W is fastest moving) Output Layout: [num_patches, C * P * P]
num_patches = (H/P) * (W/P) P = patch_size
Definition at line 28 of file vision_kernels.c.
32 int num_patches_h = H / P;
33 int num_patches_w = W / P;
34 int patch_dim =
C * P * P;
37 for (
int ph = 0; ph < num_patches_h; ++ph) {
38 for (
int pw = 0; pw < num_patches_w; ++pw) {
40 int patch_idx = ph * num_patches_w + pw;
41 float *dst_patch = patches + (size_t)patch_idx * patch_dim;
44 for (
int c = 0; c <
C; ++c) {
45 for (
int py = 0; py < P; ++py) {
50 const float *src_row = image + (size_t)c * H * W + (
size_t)y * W + x;
53 float *dst_row = dst_patch + (size_t)c * P * P + (
size_t)py * P;
56 memcpy(dst_row, src_row, P *
sizeof(
float));
References C.
◆ patch2im()
| void patch2im |
( |
const float * |
d_patches, |
|
|
float * |
d_image, |
|
|
int |
C, |
|
|
int |
H, |
|
|
int |
W, |
|
|
int |
P |
|
) |
| |
patch2im: Accumulates gradients from patches back into the image. (Backward pass)
d_patches: [num_patches, C * P * P] d_image: [C, H, W] (Accumulated)
Definition at line 69 of file vision_kernels.c.
73 int num_patches_h = H / P;
74 int num_patches_w = W / P;
75 int patch_dim =
C * P * P;
78 memset(d_image, 0, (
size_t)
C * H * W *
sizeof(
float));
80 for (
int ph = 0; ph < num_patches_h; ++ph) {
81 for (
int pw = 0; pw < num_patches_w; ++pw) {
83 int patch_idx = ph * num_patches_w + pw;
84 const float *src_patch = d_patches + (size_t)patch_idx * patch_dim;
86 for (
int c = 0; c <
C; ++c) {
87 for (
int py = 0; py < P; ++py) {
91 float *dst_row = d_image + (size_t)c * H * W + (
size_t)y * W + x;
92 const float *src_row = src_patch + (size_t)c * P * P + (
size_t)py * P;
95 for (
int px = 0; px < P; ++px) {
96 dst_row[px] += src_row[px];
References C.