← Back to C-Kernel-Engine Docs Doxygen Source Documentation
ckernel_model.h File Reference
#include "ckernel_ir.h"
#include <stddef.h>
#include <stdint.h>

Go to the source code of this file.

Data Structures

struct  CKLayerLayout
 
struct  TransformerModel
 

Functions

int ck_model_load_weights_flat (TransformerModel *m, const char *path)
 
void layout_transformer_from_ir (TransformerModel *m, const CKIRGraph *ir)
 

Function Documentation

◆ ck_model_load_weights_flat()

int ck_model_load_weights_flat ( TransformerModel m,
const char *  path 
)

Load weights from a single flat binary file into model->memory_base.

Expected layout in the file (float32, little-endian), in the same order as layout_transformer_from_ir assigns weight offsets:

1) Token embeddings [vocab_size × hidden_size] 2) Pos embeddings [context_window × hidden_size] 3) For each layer L = 0..num_layers-1:

  • LN1 gamma [hidden_size]
  • LN1 beta [hidden_size]
  • QKV weight [hidden_size × 3*hidden_size]
  • QKV bias [3*hidden_size]
  • Attn proj W [hidden_size × hidden_size]
  • Attn proj b [hidden_size]
  • FC1 weight [hidden_size × intermediate_size]
  • FC1 bias [intermediate_size]
  • FC2 weight [intermediate_size × hidden_size]
  • FC2 bias [hidden_size] 4) Final LN gamma [hidden_size] 5) Final LN beta [hidden_size] 6) LM head weight [vocab_size × hidden_size]

Activation buffers (embedded_input_offset, final_output_offset, logits_offset) are NOT populated by this loader.

Returns 0 on success, non-zero on failure.

Definition at line 24 of file ckernel_model_load.c.

25 {
26  if (!m || !m->memory_base || !path) {
27  fprintf(stderr, "ck_model_load_weights_flat: invalid arguments\n");
28  return -1;
29  }
30 
31  FILE *f = fopen(path, "rb");
32  if (!f) {
33  fprintf(stderr, "ck_model_load_weights_flat: failed to open %s: %s\n",
34  path, strerror(errno));
35  return -1;
36  }
37  char magic[8];
38  if (fread(magic, 1, 8, f) == 8) {
39  if (memcmp(magic, "BUMPWGT2", 8) == 0) {
40  if (fseek(f, 128, SEEK_SET) != 0) {
41  fclose(f);
42  return -1;
43  }
44  } else if (memcmp(magic, "BUMPWGT3", 8) == 0) {
45  if (fseek(f, 128, SEEK_SET) != 0) {
46  fclose(f);
47  return -1;
48  }
49  uint32_t dtype_len = 0;
50  if (fread(&dtype_len, sizeof(uint32_t), 1, f) != 1) {
51  fclose(f);
52  return -1;
53  }
54  if (fseek(f, (long)dtype_len, SEEK_CUR) != 0) {
55  fclose(f);
56  return -1;
57  }
58  } else if (fseek(f, 0, SEEK_SET) != 0) {
59  fclose(f);
60  return -1;
61  }
62  } else if (fseek(f, 0, SEEK_SET) != 0) {
63  fclose(f);
64  return -1;
65  }
66 
67  const int L = m->cfg.num_layers;
68  const int H = m->cfg.hidden_size;
69  const int Hff = m->cfg.intermediate_size;
70  const int V = m->cfg.vocab_size;
71  const int T = m->cfg.context_window;
72 
73  if (L <= 0 || H <= 0 || Hff <= 0 || V <= 0 || T <= 0) {
74  fprintf(stderr, "ck_model_load_weights_flat: invalid model cfg (L=%d, H=%d, Hff=%d, V=%d, T=%d)\n",
75  L, H, Hff, V, T);
76  fclose(f);
77  return -1;
78  }
79 
80  uint8_t *base = m->memory_base;
81 
82  /* 1) Token embeddings [V × H] */
83  if (read_floats(f, (float *)(base + m->token_emb_offset),
84  (size_t)V * (size_t)H) != 0) {
85  fclose(f);
86  return -1;
87  }
88 
89  /* 2) Positional embeddings [T × H] */
90  if (read_floats(f, (float *)(base + m->pos_emb_offset),
91  (size_t)T * (size_t)H) != 0) {
92  fclose(f);
93  return -1;
94  }
95 
96  /* 3) Per-layer weights */
97  for (int layer = 0; layer < L; ++layer) {
98  CKLayerLayout *Lyt = &m->layers[layer];
99 
100  /* LN1 gamma [H] */
101  if (read_floats(f, (float *)(base + Lyt->ln1_weight_offset), (size_t)H) != 0) {
102  fclose(f);
103  return -1;
104  }
105 
106  /* LN1 beta [H] */
107  if (read_floats(f, (float *)(base + Lyt->ln1_bias_offset), (size_t)H) != 0) {
108  fclose(f);
109  return -1;
110  }
111 
112  /* QKV weight [H × 3H] */
113  if (read_floats(f, (float *)(base + Lyt->qkv_weight_offset),
114  (size_t)H * (size_t)(3 * H)) != 0) {
115  fclose(f);
116  return -1;
117  }
118 
119  /* QKV bias [3H] */
120  if (read_floats(f, (float *)(base + Lyt->qkv_bias_offset), (size_t)(3 * H)) != 0) {
121  fclose(f);
122  return -1;
123  }
124 
125  /* Attention proj weight [H × H] */
126  if (read_floats(f, (float *)(base + Lyt->attn_proj_weight_offset),
127  (size_t)H * (size_t)H) != 0) {
128  fclose(f);
129  return -1;
130  }
131 
132  /* Attention proj bias [H] */
133  if (read_floats(f, (float *)(base + Lyt->attn_proj_bias_offset), (size_t)H) != 0) {
134  fclose(f);
135  return -1;
136  }
137 
138  /* FC1 weight [H × Hff] */
139  if (read_floats(f, (float *)(base + Lyt->fc1_weight_offset),
140  (size_t)H * (size_t)Hff) != 0) {
141  fclose(f);
142  return -1;
143  }
144 
145  /* FC1 bias [Hff] */
146  if (read_floats(f, (float *)(base + Lyt->fc1_bias_offset), (size_t)Hff) != 0) {
147  fclose(f);
148  return -1;
149  }
150 
151  /* FC2 weight [Hff × H] */
152  if (read_floats(f, (float *)(base + Lyt->fc2_weight_offset),
153  (size_t)Hff * (size_t)H) != 0) {
154  fclose(f);
155  return -1;
156  }
157 
158  /* FC2 bias [H] */
159  if (read_floats(f, (float *)(base + Lyt->fc2_bias_offset), (size_t)H) != 0) {
160  fclose(f);
161  return -1;
162  }
163  }
164 
165  /* 4) Final LN gamma [H] */
166  if (read_floats(f, (float *)(base + m->final_ln_weight_offset), (size_t)H) != 0) {
167  fclose(f);
168  return -1;
169  }
170 
171  /* 5) Final LN beta [H] */
172  if (read_floats(f, (float *)(base + m->final_ln_bias_offset), (size_t)H) != 0) {
173  fclose(f);
174  return -1;
175  }
176 
177  /* 6) LM head weight [V × H] */
178  if (read_floats(f, (float *)(base + m->lm_head_weight_offset),
179  (size_t)V * (size_t)H) != 0) {
180  fclose(f);
181  return -1;
182  }
183 
184  fclose(f);
185  return 0;
186 }
static int read_floats(FILE *f, float *dst, size_t count)
size_t qkv_bias_offset
Definition: ckernel_model.h:26
size_t fc2_weight_offset
Definition: ckernel_model.h:34
size_t fc1_bias_offset
Definition: ckernel_model.h:32
size_t fc1_weight_offset
Definition: ckernel_model.h:31
size_t ln1_weight_offset
Definition: ckernel_model.h:22
size_t qkv_weight_offset
Definition: ckernel_model.h:25
size_t attn_proj_bias_offset
Definition: ckernel_model.h:29
size_t ln1_bias_offset
Definition: ckernel_model.h:23
size_t fc2_bias_offset
Definition: ckernel_model.h:35
size_t attn_proj_weight_offset
Definition: ckernel_model.h:28
int context_window
Definition: ckernel_ir.h:30
int intermediate_size
Definition: ck_model_api.h:37
size_t lm_head_weight_offset
Definition: ckernel_model.h:57
size_t final_ln_weight_offset
Definition: ckernel_model.h:53
size_t final_ln_bias_offset
Definition: ckernel_model.h:54
size_t token_emb_offset
Definition: ckernel_model.h:48
uint8_t * memory_base
Definition: ckernel_model.h:42
CKLayerLayout * layers
Definition: ckernel_model.h:61
CKModelConfig cfg
Definition: ckernel_model.h:39

References CKLayerLayout::attn_proj_bias_offset, CKLayerLayout::attn_proj_weight_offset, TransformerModel::cfg, CKModelConfig::context_window, CKLayerLayout::fc1_bias_offset, CKLayerLayout::fc1_weight_offset, CKLayerLayout::fc2_bias_offset, CKLayerLayout::fc2_weight_offset, TransformerModel::final_ln_bias_offset, TransformerModel::final_ln_weight_offset, CKModelConfig::hidden_size, CKModelConfig::intermediate_size, TransformerModel::layers, TransformerModel::lm_head_weight_offset, CKLayerLayout::ln1_bias_offset, CKLayerLayout::ln1_weight_offset, TransformerModel::memory_base, CKModelConfig::num_layers, TransformerModel::pos_emb_offset, CKLayerLayout::qkv_bias_offset, CKLayerLayout::qkv_weight_offset, read_floats(), TransformerModel::token_emb_offset, and CKModelConfig::vocab_size.

◆ layout_transformer_from_ir()

void layout_transformer_from_ir ( TransformerModel m,
const CKIRGraph ir 
)

Compute a simple forward-only layout for TransformerModel based on:

  • CKModelConfig (dims, heads, vocab, context)
  • The IR graph structure (number of layers, op types)

This function:

  • Fills token/pos embedding offsets
  • Assigns per-layer weight offsets for LN, QKV, attention proj, MLP
  • Sets final LN / LM head / logits offsets
  • Populates total_bytes with the required byte capacity

Offsets are in bytes counted from memory_base. The exact shapes and alignment strategy will evolve; this initial version focuses on correctness and clarity over tight packing. Layout the TransformerModel memory based on its cfg and (optionally) the IR.

If ir is non-NULL, its config is copied into m->cfg. If ir is NULL, the function trusts that m->cfg has already been populated.

Definition at line 21 of file ckernel_model_layout.c.

22 {
23  if (!m) {
24  return;
25  }
26 
27  if (ir) {
28  /* If IR is provided, copy its config. Otherwise, trust m->cfg. */
29  m->cfg = ir->config;
30  }
31 
32  const int L = m->cfg.num_layers;
33  const int H = m->cfg.hidden_size;
34  const int Hff = m->cfg.intermediate_size;
35  const int V = m->cfg.vocab_size > 0 ? m->cfg.vocab_size : 1;
36  const int T = m->cfg.context_window > 0 ? m->cfg.context_window : 1;
37 
38  /* Allocate per-layer layout array. */
39  if (m->layers) {
40  /* caller responsible for freeing if re-layout is needed */
41  } else if (L > 0) {
42  m->layers = (CKLayerLayout *)calloc((size_t)L, sizeof(CKLayerLayout));
43  }
44 
45  size_t elem_bytes = m->elem_bytes ? m->elem_bytes : sizeof(float);
46  m->elem_bytes = elem_bytes;
47 
48  size_t offset = 0;
49 
50  /* Token embeddings: [V × H] */
51  m->token_emb_offset = bump_bytes(&offset,
52  (size_t)V * (size_t)H * elem_bytes,
54 
55  /* Positional embeddings: [T × H] */
56  m->pos_emb_offset = bump_bytes(&offset,
57  (size_t)T * (size_t)H * elem_bytes,
59 
60  /* Embedded input buffer: [T × H] */
61  m->embedded_input_offset = bump_bytes(&offset,
62  (size_t)T * (size_t)H * elem_bytes,
64 
65  m->layers_start_offset = offset;
66 
67  /* Per-layer weights. This is a simple, linear layout:
68  * - LN1 gamma/beta [H]
69  * - QKV weight/bias [H × 3H], [3H]
70  * - Attention proj weight/bias [H × H], [H]
71  * - FC1 weight/bias [H × Hff], [Hff]
72  * - FC2 weight/bias [Hff × H], [H]
73  *
74  * Activations are not yet explicitly laid out here; this pass focuses
75  * on weights. A later planner can layer activations and gradients on top.
76  */
77  for (int layer = 0; layer < L; ++layer) {
78  CKLayerLayout *Lyt = &m->layers[layer];
79 
80  /* LN1 weights/bias */
81  Lyt->ln1_weight_offset = bump_bytes(&offset,
82  (size_t)H * elem_bytes,
84 
85  Lyt->ln1_bias_offset = bump_bytes(&offset,
86  (size_t)H * elem_bytes,
88 
89  /* QKV weight: [H × 3H] */
90  Lyt->qkv_weight_offset = bump_bytes(&offset,
91  (size_t)H * (size_t)(3 * H) * elem_bytes,
93 
94  /* QKV bias: [3H] */
95  Lyt->qkv_bias_offset = bump_bytes(&offset,
96  (size_t)(3 * H) * elem_bytes,
98 
99  /* Attention output projection: [H × H] + [H] */
100  Lyt->attn_proj_weight_offset = bump_bytes(&offset,
101  (size_t)H * (size_t)H * elem_bytes,
103 
104  Lyt->attn_proj_bias_offset = bump_bytes(&offset,
105  (size_t)H * elem_bytes,
107 
108  /* FC1: [H × Hff] + [Hff] */
109  Lyt->fc1_weight_offset = bump_bytes(&offset,
110  (size_t)H * (size_t)Hff * elem_bytes,
112 
113  Lyt->fc1_bias_offset = bump_bytes(&offset,
114  (size_t)Hff * elem_bytes,
116 
117  /* FC2: [Hff × H] + [H] */
118  Lyt->fc2_weight_offset = bump_bytes(&offset,
119  (size_t)Hff * (size_t)H * elem_bytes,
121 
122  Lyt->fc2_bias_offset = bump_bytes(&offset,
123  (size_t)H * elem_bytes,
125  }
126 
127  /* Final LayerNorm: gamma/beta [H], mean/rstd [T] if needed. */
128  m->final_ln_weight_offset = bump_bytes(&offset,
129  (size_t)H * elem_bytes,
131 
132  m->final_ln_bias_offset = bump_bytes(&offset,
133  (size_t)H * elem_bytes,
135 
136  /* Final normalized output: [T × H] */
137  m->final_output_offset = bump_bytes(&offset,
138  (size_t)T * (size_t)H * elem_bytes,
140 
141  /* LM head weight: [V × H] (often tied to token_emb_offset in logic) */
142  m->lm_head_weight_offset = bump_bytes(&offset,
143  (size_t)V * (size_t)H * elem_bytes,
145 
146  /* Logits buffer: [T × V] */
147  m->logits_offset = bump_bytes(&offset,
148  (size_t)T * (size_t)V * elem_bytes,
150 
152  m->total_floats = m->total_bytes / elem_bytes;
153 }
static size_t align_up_bytes(size_t n, size_t align)
static size_t bump_bytes(size_t *off, size_t bytes, size_t align)
#define CACHELINE_BYTES
CKModelConfig config
Definition: ckernel_ir.h:73
size_t embedded_input_offset
Definition: ckernel_model.h:50
size_t layers_start_offset
Definition: ckernel_model.h:51
size_t final_output_offset
Definition: ckernel_model.h:55

References align_up_bytes(), CKLayerLayout::attn_proj_bias_offset, CKLayerLayout::attn_proj_weight_offset, bump_bytes(), CACHELINE_BYTES, TransformerModel::cfg, CKIRGraph::config, CKModelConfig::context_window, TransformerModel::elem_bytes, TransformerModel::embedded_input_offset, CKLayerLayout::fc1_bias_offset, CKLayerLayout::fc1_weight_offset, CKLayerLayout::fc2_bias_offset, CKLayerLayout::fc2_weight_offset, TransformerModel::final_ln_bias_offset, TransformerModel::final_ln_weight_offset, TransformerModel::final_output_offset, CKModelConfig::hidden_size, CKModelConfig::intermediate_size, TransformerModel::layers, TransformerModel::layers_start_offset, TransformerModel::lm_head_weight_offset, CKLayerLayout::ln1_bias_offset, CKLayerLayout::ln1_weight_offset, TransformerModel::logits_offset, CKModelConfig::num_layers, TransformerModel::pos_emb_offset, CKLayerLayout::qkv_bias_offset, CKLayerLayout::qkv_weight_offset, TransformerModel::token_emb_offset, TransformerModel::total_bytes, TransformerModel::total_floats, and CKModelConfig::vocab_size.