← Back to C-Kernel-Engine Docs Doxygen Source Documentation
ckernel_model.h
Go to the documentation of this file.
1 #ifndef CKERNEL_MODEL_H
2 #define CKERNEL_MODEL_H
3 
4 #include "ckernel_ir.h"
5 
6 #include <stddef.h>
7 #include <stdint.h>
8 
9 /**
10  * Simplified forward-only model layout.
11  *
12  * This is the first step toward the full TransformerModel / TrulyOptimalLayer
13  * / GradientStorage design. It focuses on:
14  * - A single contiguous memory block for weights + activations
15  * - Per-layer weight offsets for the core decoder kernels
16  *
17  * Backward/optimizer offsets and KV cache layout will be layered on later.
18  */
19 
20 typedef struct {
21  /* Per-layer weight offsets, measured in bytes from memory_base. */
24 
27 
30 
33 
37 
38 typedef struct {
39  CKModelConfig cfg; /* parsed from HF config / IR config */
40 
41  /* Unified memory block (weights + activations). */
42  uint8_t *memory_base;
43  size_t total_bytes;
44  size_t total_floats; /* legacy: bytes / elem_bytes when elem_bytes==4 */
45  size_t elem_bytes;
46 
47  /* Global offsets (bytes) into memory_base. */
48  size_t token_emb_offset; /* [vocab_size × hidden_size] */
49  size_t pos_emb_offset; /* [context_window × hidden_size] */
50  size_t embedded_input_offset; /* [context_window × hidden_size] */
51  size_t layers_start_offset; /* start of first layer block */
52 
55  size_t final_output_offset; /* [context_window × hidden_size] */
56 
57  size_t lm_head_weight_offset; /* often tied to token_emb_offset */
58  size_t logits_offset; /* [context_window × vocab_size] */
59 
60  /* Per-layer layouts (length = cfg.num_layers). */
63 
64 /**
65  * Compute a simple forward-only layout for TransformerModel based on:
66  * - CKModelConfig (dims, heads, vocab, context)
67  * - The IR graph structure (number of layers, op types)
68  *
69  * This function:
70  * - Fills token/pos embedding offsets
71  * - Assigns per-layer weight offsets for LN, QKV, attention proj, MLP
72  * - Sets final LN / LM head / logits offsets
73  * - Populates total_bytes with the required byte capacity
74  *
75  * Offsets are in bytes counted from memory_base. The exact shapes
76  * and alignment strategy will evolve; this initial version focuses on
77  * correctness and clarity over tight packing.
78  */
79 /**
80  * Layout the TransformerModel memory based on its cfg and (optionally) the IR.
81  *
82  * If `ir` is non-NULL, its config is copied into `m->cfg`. If `ir` is NULL,
83  * the function trusts that `m->cfg` has already been populated.
84  */
86 
87 /**
88  * Load weights from a single flat binary file into model->memory_base.
89  *
90  * Expected layout in the file (float32, little-endian), in the same order
91  * as layout_transformer_from_ir assigns weight offsets:
92  *
93  * 1) Token embeddings [vocab_size × hidden_size]
94  * 2) Pos embeddings [context_window × hidden_size]
95  * 3) For each layer L = 0..num_layers-1:
96  * - LN1 gamma [hidden_size]
97  * - LN1 beta [hidden_size]
98  * - QKV weight [hidden_size × 3*hidden_size]
99  * - QKV bias [3*hidden_size]
100  * - Attn proj W [hidden_size × hidden_size]
101  * - Attn proj b [hidden_size]
102  * - FC1 weight [hidden_size × intermediate_size]
103  * - FC1 bias [intermediate_size]
104  * - FC2 weight [intermediate_size × hidden_size]
105  * - FC2 bias [hidden_size]
106  * 4) Final LN gamma [hidden_size]
107  * 5) Final LN beta [hidden_size]
108  * 6) LM head weight [vocab_size × hidden_size]
109  *
110  * Activation buffers (embedded_input_offset, final_output_offset, logits_offset)
111  * are NOT populated by this loader.
112  *
113  * Returns 0 on success, non-zero on failure.
114  */
115 int ck_model_load_weights_flat(TransformerModel *m, const char *path);
116 
117 #endif /* CKERNEL_MODEL_H */
void layout_transformer_from_ir(TransformerModel *m, const CKIRGraph *ir)
int ck_model_load_weights_flat(TransformerModel *m, const char *path)
size_t qkv_bias_offset
Definition: ckernel_model.h:26
size_t fc2_weight_offset
Definition: ckernel_model.h:34
size_t fc1_bias_offset
Definition: ckernel_model.h:32
size_t fc1_weight_offset
Definition: ckernel_model.h:31
size_t ln1_weight_offset
Definition: ckernel_model.h:22
size_t qkv_weight_offset
Definition: ckernel_model.h:25
size_t attn_proj_bias_offset
Definition: ckernel_model.h:29
size_t ln1_bias_offset
Definition: ckernel_model.h:23
size_t fc2_bias_offset
Definition: ckernel_model.h:35
size_t attn_proj_weight_offset
Definition: ckernel_model.h:28
size_t lm_head_weight_offset
Definition: ckernel_model.h:57
size_t final_ln_weight_offset
Definition: ckernel_model.h:53
size_t embedded_input_offset
Definition: ckernel_model.h:50
size_t final_ln_bias_offset
Definition: ckernel_model.h:54
size_t token_emb_offset
Definition: ckernel_model.h:48
size_t layers_start_offset
Definition: ckernel_model.h:51
uint8_t * memory_base
Definition: ckernel_model.h:42
size_t final_output_offset
Definition: ckernel_model.h:55
CKLayerLayout * layers
Definition: ckernel_model.h:61
CKModelConfig cfg
Definition: ckernel_model.h:39