#include "ckernel_ir.h"
#include <stddef.h>
#include <stdint.h>

Data Structures
struct	CKLayerLayout

struct	TransformerModel

Functions
int	ck_model_load_weights_flat (TransformerModel m, const char path)

void	layout_transformer_from_ir (TransformerModel m, const CKIRGraph ir)

Function Documentation

◆ ck_model_load_weights_flat()

int ck_model_load_weights_flat	(	TransformerModel *	m,
		const char *	path
	)

Load weights from a single flat binary file into model->memory_base.

Expected layout in the file (float32, little-endian), in the same order as layout_transformer_from_ir assigns weight offsets:

1) Token embeddings [vocab_size × hidden_size] 2) Pos embeddings [context_window × hidden_size] 3) For each layer L = 0..num_layers-1:

LN1 gamma [hidden_size]
LN1 beta [hidden_size]
QKV weight [hidden_size × 3*hidden_size]
QKV bias [3*hidden_size]
Attn proj W [hidden_size × hidden_size]
Attn proj b [hidden_size]
FC1 weight [hidden_size × intermediate_size]
FC1 bias [intermediate_size]
FC2 weight [intermediate_size × hidden_size]
FC2 bias [hidden_size] 4) Final LN gamma [hidden_size] 5) Final LN beta [hidden_size] 6) LM head weight [vocab_size × hidden_size]

Activation buffers (embedded_input_offset, final_output_offset, logits_offset) are NOT populated by this loader.

Returns 0 on success, non-zero on failure.

Definition at line 24 of file ckernel_model_load.c.

 {
     if (!m || !m->memory_base || !path) {
         fprintf(stderr, "ck_model_load_weights_flat: invalid arguments\n");
         return -1;
     }
  
     FILE *f = fopen(path, "rb");
     if (!f) {
         fprintf(stderr, "ck_model_load_weights_flat: failed to open %s: %s\n",
                 path, strerror(errno));
         return -1;
     }
     char magic[8];
     if (fread(magic, 1, 8, f) == 8) {
         if (memcmp(magic, "BUMPWGT2", 8) == 0) {
             if (fseek(f, 128, SEEK_SET) != 0) {
                 fclose(f);
                 return -1;
             }
         } else if (memcmp(magic, "BUMPWGT3", 8) == 0) {
             if (fseek(f, 128, SEEK_SET) != 0) {
                 fclose(f);
                 return -1;
             }
             uint32_t dtype_len = 0;
             if (fread(&dtype_len, sizeof(uint32_t), 1, f) != 1) {
                 fclose(f);
                 return -1;
             }
             if (fseek(f, (long)dtype_len, SEEK_CUR) != 0) {
                 fclose(f);
                 return -1;
             }
         } else if (fseek(f, 0, SEEK_SET) != 0) {
             fclose(f);
             return -1;
         }
     } else if (fseek(f, 0, SEEK_SET) != 0) {
         fclose(f);
         return -1;
     }
  
     const int L   = m->cfg.num_layers;
     const int H   = m->cfg.hidden_size;
     const int Hff = m->cfg.intermediate_size;
     const int V   = m->cfg.vocab_size;
     const int T   = m->cfg.context_window;
  
     if (L <= 0 || H <= 0 || Hff <= 0 || V <= 0 || T <= 0) {
         fprintf(stderr, "ck_model_load_weights_flat: invalid model cfg (L=%d, H=%d, Hff=%d, V=%d, T=%d)\n",
                 L, H, Hff, V, T);
         fclose(f);
         return -1;
     }
  
     uint8_t *base = m->memory_base;
  
     /* 1) Token embeddings [V × H] */
     if (read_floats(f, (float *)(base + m->token_emb_offset),
                     (size_t)V * (size_t)H) != 0) {
         fclose(f);
         return -1;
     }
  
     /* 2) Positional embeddings [T × H] */
     if (read_floats(f, (float *)(base + m->pos_emb_offset),
                     (size_t)T * (size_t)H) != 0) {
         fclose(f);
         return -1;
     }
  
     /* 3) Per-layer weights */
     for (int layer = 0; layer < L; ++layer) {
         CKLayerLayout *Lyt = &m->layers[layer];
  
         /* LN1 gamma [H] */
         if (read_floats(f, (float *)(base + Lyt->ln1_weight_offset), (size_t)H) != 0) {
             fclose(f);
             return -1;
         }
  
         /* LN1 beta [H] */
         if (read_floats(f, (float *)(base + Lyt->ln1_bias_offset), (size_t)H) != 0) {
             fclose(f);
             return -1;
         }
  
         /* QKV weight [H × 3H] */
         if (read_floats(f, (float *)(base + Lyt->qkv_weight_offset),
                         (size_t)H * (size_t)(3 * H)) != 0) {
             fclose(f);
             return -1;
         }
  
         /* QKV bias [3H] */
         if (read_floats(f, (float *)(base + Lyt->qkv_bias_offset), (size_t)(3 * H)) != 0) {
             fclose(f);
             return -1;
         }
  
         /* Attention proj weight [H × H] */
         if (read_floats(f, (float *)(base + Lyt->attn_proj_weight_offset),
                         (size_t)H * (size_t)H) != 0) {
             fclose(f);
             return -1;
         }
  
         /* Attention proj bias [H] */
         if (read_floats(f, (float *)(base + Lyt->attn_proj_bias_offset), (size_t)H) != 0) {
             fclose(f);
             return -1;
         }
  
         /* FC1 weight [H × Hff] */
         if (read_floats(f, (float *)(base + Lyt->fc1_weight_offset),
                         (size_t)H * (size_t)Hff) != 0) {
             fclose(f);
             return -1;
         }
  
         /* FC1 bias [Hff] */
         if (read_floats(f, (float *)(base + Lyt->fc1_bias_offset), (size_t)Hff) != 0) {
             fclose(f);
             return -1;
         }
  
         /* FC2 weight [Hff × H] */
         if (read_floats(f, (float *)(base + Lyt->fc2_weight_offset),
                         (size_t)Hff * (size_t)H) != 0) {
             fclose(f);
             return -1;
         }
  
         /* FC2 bias [H] */
         if (read_floats(f, (float *)(base + Lyt->fc2_bias_offset), (size_t)H) != 0) {
             fclose(f);
             return -1;
         }
     }
  
     /* 4) Final LN gamma [H] */
     if (read_floats(f, (float *)(base + m->final_ln_weight_offset), (size_t)H) != 0) {
         fclose(f);
         return -1;
     }
  
     /* 5) Final LN beta [H] */
     if (read_floats(f, (float *)(base + m->final_ln_bias_offset), (size_t)H) != 0) {
         fclose(f);
         return -1;
     }
  
     /* 6) LM head weight [V × H] */
     if (read_floats(f, (float *)(base + m->lm_head_weight_offset),
                     (size_t)V * (size_t)H) != 0) {
         fclose(f);
         return -1;
     }
  
     fclose(f);
     return 0;
 }

◆ layout_transformer_from_ir()

void layout_transformer_from_ir	(	TransformerModel *	m,
		const CKIRGraph *	ir
	)

Compute a simple forward-only layout for TransformerModel based on:

CKModelConfig (dims, heads, vocab, context)
The IR graph structure (number of layers, op types)

This function:

Fills token/pos embedding offsets
Assigns per-layer weight offsets for LN, QKV, attention proj, MLP
Sets final LN / LM head / logits offsets
Populates total_bytes with the required byte capacity

Offsets are in bytes counted from memory_base. The exact shapes and alignment strategy will evolve; this initial version focuses on correctness and clarity over tight packing. Layout the TransformerModel memory based on its cfg and (optionally) the IR.

If ir is non-NULL, its config is copied into m->cfg. If ir is NULL, the function trusts that m->cfg has already been populated.

Definition at line 21 of file ckernel_model_layout.c.

 {
     if (!m) {
         return;
     }
  
     if (ir) {
         /* If IR is provided, copy its config. Otherwise, trust m->cfg. */
         m->cfg = ir->config;
     }
  
     const int L   = m->cfg.num_layers;
     const int H   = m->cfg.hidden_size;
     const int Hff = m->cfg.intermediate_size;
     const int V   = m->cfg.vocab_size > 0 ? m->cfg.vocab_size : 1;
     const int T   = m->cfg.context_window > 0 ? m->cfg.context_window : 1;
  
     /* Allocate per-layer layout array. */
     if (m->layers) {
         /* caller responsible for freeing if re-layout is needed */
     } else if (L > 0) {
         m->layers = (CKLayerLayout *)calloc((size_t)L, sizeof(CKLayerLayout));
     }
  
     size_t elem_bytes = m->elem_bytes ? m->elem_bytes : sizeof(float);
     m->elem_bytes = elem_bytes;
  
     size_t offset = 0;
  
     /* Token embeddings: [V × H] */
     m->token_emb_offset = bump_bytes(&offset,
                                      (size_t)V * (size_t)H * elem_bytes,
                                      CACHELINE_BYTES);
  
     /* Positional embeddings: [T × H] */
     m->pos_emb_offset = bump_bytes(&offset,
                                    (size_t)T * (size_t)H * elem_bytes,
                                    CACHELINE_BYTES);
  
     /* Embedded input buffer: [T × H] */
     m->embedded_input_offset = bump_bytes(&offset,
                                           (size_t)T * (size_t)H * elem_bytes,
                                           CACHELINE_BYTES);
  
     m->layers_start_offset = offset;
  
     /* Per-layer weights. This is a simple, linear layout:
      *  - LN1 gamma/beta           [H]
      *  - QKV weight/bias          [H × 3H], [3H]
      *  - Attention proj weight/bias [H × H], [H]
      *  - FC1 weight/bias          [H × Hff], [Hff]
      *  - FC2 weight/bias          [Hff × H], [H]
      *
      * Activations are not yet explicitly laid out here; this pass focuses
      * on weights. A later planner can layer activations and gradients on top.
      */
     for (int layer = 0; layer < L; ++layer) {
         CKLayerLayout *Lyt = &m->layers[layer];
  
         /* LN1 weights/bias */
         Lyt->ln1_weight_offset = bump_bytes(&offset,
                                             (size_t)H * elem_bytes,
                                             CACHELINE_BYTES);
  
         Lyt->ln1_bias_offset = bump_bytes(&offset,
                                           (size_t)H * elem_bytes,
                                           CACHELINE_BYTES);
  
         /* QKV weight: [H × 3H] */
         Lyt->qkv_weight_offset = bump_bytes(&offset,
                                             (size_t)H * (size_t)(3 * H) * elem_bytes,
                                             CACHELINE_BYTES);
  
         /* QKV bias: [3H] */
         Lyt->qkv_bias_offset = bump_bytes(&offset,
                                           (size_t)(3 * H) * elem_bytes,
                                           CACHELINE_BYTES);
  
         /* Attention output projection: [H × H] + [H] */
         Lyt->attn_proj_weight_offset = bump_bytes(&offset,
                                                   (size_t)H * (size_t)H * elem_bytes,
                                                   CACHELINE_BYTES);
  
         Lyt->attn_proj_bias_offset = bump_bytes(&offset,
                                                 (size_t)H * elem_bytes,
                                                 CACHELINE_BYTES);
  
         /* FC1: [H × Hff] + [Hff] */
         Lyt->fc1_weight_offset = bump_bytes(&offset,
                                             (size_t)H * (size_t)Hff * elem_bytes,
                                             CACHELINE_BYTES);
  
         Lyt->fc1_bias_offset = bump_bytes(&offset,
                                           (size_t)Hff * elem_bytes,
                                           CACHELINE_BYTES);
  
         /* FC2: [Hff × H] + [H] */
         Lyt->fc2_weight_offset = bump_bytes(&offset,
                                             (size_t)Hff * (size_t)H * elem_bytes,
                                             CACHELINE_BYTES);
  
         Lyt->fc2_bias_offset = bump_bytes(&offset,
                                           (size_t)H * elem_bytes,
                                           CACHELINE_BYTES);
     }
  
     /* Final LayerNorm: gamma/beta [H], mean/rstd [T] if needed. */
     m->final_ln_weight_offset = bump_bytes(&offset,
                                            (size_t)H * elem_bytes,
                                            CACHELINE_BYTES);
  
     m->final_ln_bias_offset = bump_bytes(&offset,
                                          (size_t)H * elem_bytes,
                                          CACHELINE_BYTES);
  
     /* Final normalized output: [T × H] */
     m->final_output_offset = bump_bytes(&offset,
                                         (size_t)T * (size_t)H * elem_bytes,
                                         CACHELINE_BYTES);
  
     /* LM head weight: [V × H] (often tied to token_emb_offset in logic) */
     m->lm_head_weight_offset = bump_bytes(&offset,
                                           (size_t)V * (size_t)H * elem_bytes,
                                           CACHELINE_BYTES);
  
     /* Logits buffer: [T × V] */
     m->logits_offset = bump_bytes(&offset,
                                   (size_t)T * (size_t)V * elem_bytes,
                                   CACHELINE_BYTES);
  
     m->total_bytes = align_up_bytes(offset, CACHELINE_BYTES);
     m->total_floats = m->total_bytes / elem_bytes;
 }

References align_up_bytes(), CKLayerLayout::attn_proj_bias_offset, CKLayerLayout::attn_proj_weight_offset, bump_bytes(), CACHELINE_BYTES, TransformerModel::cfg, CKIRGraph::config, CKModelConfig::context_window, TransformerModel::elem_bytes, TransformerModel::embedded_input_offset, CKLayerLayout::fc1_bias_offset, CKLayerLayout::fc1_weight_offset, CKLayerLayout::fc2_bias_offset, CKLayerLayout::fc2_weight_offset, TransformerModel::final_ln_bias_offset, TransformerModel::final_ln_weight_offset, TransformerModel::final_output_offset, CKModelConfig::hidden_size, CKModelConfig::intermediate_size, TransformerModel::layers, TransformerModel::layers_start_offset, TransformerModel::lm_head_weight_offset, CKLayerLayout::ln1_bias_offset, CKLayerLayout::ln1_weight_offset, TransformerModel::logits_offset, CKModelConfig::num_layers, TransformerModel::pos_emb_offset, CKLayerLayout::qkv_bias_offset, CKLayerLayout::qkv_weight_offset, TransformerModel::token_emb_offset, TransformerModel::total_bytes, TransformerModel::total_floats, and CKModelConfig::vocab_size.

Data Structures

Functions

Function Documentation

◆ ck_model_load_weights_flat()

◆ layout_transformer_from_ir()