#include "ckernel_model.h"
#include <stdlib.h>
#include <string.h>

Macros
#define	CACHELINE_BYTES 64

Functions
static size_t	align_up_bytes (size_t n, size_t align)

static size_t	bump_bytes (size_t *off, size_t bytes, size_t align)

void	layout_transformer_from_ir (TransformerModel m, const CKIRGraph ir)

Macro Definition Documentation

◆ CACHELINE_BYTES

#define CACHELINE_BYTES 64

Definition at line 6 of file ckernel_model_layout.c.

Function Documentation

◆ align_up_bytes()

static size_t align_up_bytes	(	size_t	n,
		size_t	align
	)

static

Definition at line 8 of file ckernel_model_layout.c.

 {
     if (align == 0) return n;
     return (n + align - 1) & ~(align - 1);
 }

Referenced by bump_bytes(), and layout_transformer_from_ir().

◆ bump_bytes()

static size_t bump_bytes	(	size_t *	off,
		size_t	bytes,
		size_t	align
	)

static

Definition at line 14 of file ckernel_model_layout.c.

 {
     size_t start = align_up_bytes(*off, align);
     *off = start + bytes;
     return start;
 }

References align_up_bytes(), and start.

Referenced by layout_transformer_from_ir().

◆ layout_transformer_from_ir()

void layout_transformer_from_ir	(	TransformerModel *	m,
		const CKIRGraph *	ir
	)

Compute a simple forward-only layout for TransformerModel based on:

CKModelConfig (dims, heads, vocab, context)
The IR graph structure (number of layers, op types)

This function:

Fills token/pos embedding offsets
Assigns per-layer weight offsets for LN, QKV, attention proj, MLP
Sets final LN / LM head / logits offsets
Populates total_bytes with the required byte capacity

Offsets are in bytes counted from memory_base. The exact shapes and alignment strategy will evolve; this initial version focuses on correctness and clarity over tight packing. Layout the TransformerModel memory based on its cfg and (optionally) the IR.

If ir is non-NULL, its config is copied into m->cfg. If ir is NULL, the function trusts that m->cfg has already been populated.

Definition at line 21 of file ckernel_model_layout.c.

 {
     if (!m) {
         return;
     }
  
     if (ir) {
         /* If IR is provided, copy its config. Otherwise, trust m->cfg. */
         m->cfg = ir->config;
     }
  
     const int L   = m->cfg.num_layers;
     const int H   = m->cfg.hidden_size;
     const int Hff = m->cfg.intermediate_size;
     const int V   = m->cfg.vocab_size > 0 ? m->cfg.vocab_size : 1;
     const int T   = m->cfg.context_window > 0 ? m->cfg.context_window : 1;
  
     /* Allocate per-layer layout array. */
     if (m->layers) {
         /* caller responsible for freeing if re-layout is needed */
     } else if (L > 0) {
         m->layers = (CKLayerLayout *)calloc((size_t)L, sizeof(CKLayerLayout));
     }
  
     size_t elem_bytes = m->elem_bytes ? m->elem_bytes : sizeof(float);
     m->elem_bytes = elem_bytes;
  
     size_t offset = 0;
  
     /* Token embeddings: [V × H] */
     m->token_emb_offset = bump_bytes(&offset,
                                      (size_t)V * (size_t)H * elem_bytes,
                                      CACHELINE_BYTES);
  
     /* Positional embeddings: [T × H] */
     m->pos_emb_offset = bump_bytes(&offset,
                                    (size_t)T * (size_t)H * elem_bytes,
                                    CACHELINE_BYTES);
  
     /* Embedded input buffer: [T × H] */
     m->embedded_input_offset = bump_bytes(&offset,
                                           (size_t)T * (size_t)H * elem_bytes,
                                           CACHELINE_BYTES);
  
     m->layers_start_offset = offset;
  
     /* Per-layer weights. This is a simple, linear layout:
      *  - LN1 gamma/beta           [H]
      *  - QKV weight/bias          [H × 3H], [3H]
      *  - Attention proj weight/bias [H × H], [H]
      *  - FC1 weight/bias          [H × Hff], [Hff]
      *  - FC2 weight/bias          [Hff × H], [H]
      *
      * Activations are not yet explicitly laid out here; this pass focuses
      * on weights. A later planner can layer activations and gradients on top.
      */
     for (int layer = 0; layer < L; ++layer) {
         CKLayerLayout *Lyt = &m->layers[layer];
  
         /* LN1 weights/bias */
         Lyt->ln1_weight_offset = bump_bytes(&offset,
                                             (size_t)H * elem_bytes,
                                             CACHELINE_BYTES);
  
         Lyt->ln1_bias_offset = bump_bytes(&offset,
                                           (size_t)H * elem_bytes,
                                           CACHELINE_BYTES);
  
         /* QKV weight: [H × 3H] */
         Lyt->qkv_weight_offset = bump_bytes(&offset,
                                             (size_t)H * (size_t)(3 * H) * elem_bytes,
                                             CACHELINE_BYTES);
  
         /* QKV bias: [3H] */
         Lyt->qkv_bias_offset = bump_bytes(&offset,
                                           (size_t)(3 * H) * elem_bytes,
                                           CACHELINE_BYTES);
  
         /* Attention output projection: [H × H] + [H] */
         Lyt->attn_proj_weight_offset = bump_bytes(&offset,
                                                   (size_t)H * (size_t)H * elem_bytes,
                                                   CACHELINE_BYTES);
  
         Lyt->attn_proj_bias_offset = bump_bytes(&offset,
                                                 (size_t)H * elem_bytes,
                                                 CACHELINE_BYTES);
  
         /* FC1: [H × Hff] + [Hff] */
         Lyt->fc1_weight_offset = bump_bytes(&offset,
                                             (size_t)H * (size_t)Hff * elem_bytes,
                                             CACHELINE_BYTES);
  
         Lyt->fc1_bias_offset = bump_bytes(&offset,
                                           (size_t)Hff * elem_bytes,
                                           CACHELINE_BYTES);
  
         /* FC2: [Hff × H] + [H] */
         Lyt->fc2_weight_offset = bump_bytes(&offset,
                                             (size_t)Hff * (size_t)H * elem_bytes,
                                             CACHELINE_BYTES);
  
         Lyt->fc2_bias_offset = bump_bytes(&offset,
                                           (size_t)H * elem_bytes,
                                           CACHELINE_BYTES);
     }
  
     /* Final LayerNorm: gamma/beta [H], mean/rstd [T] if needed. */
     m->final_ln_weight_offset = bump_bytes(&offset,
                                            (size_t)H * elem_bytes,
                                            CACHELINE_BYTES);
  
     m->final_ln_bias_offset = bump_bytes(&offset,
                                          (size_t)H * elem_bytes,
                                          CACHELINE_BYTES);
  
     /* Final normalized output: [T × H] */
     m->final_output_offset = bump_bytes(&offset,
                                         (size_t)T * (size_t)H * elem_bytes,
                                         CACHELINE_BYTES);
  
     /* LM head weight: [V × H] (often tied to token_emb_offset in logic) */
     m->lm_head_weight_offset = bump_bytes(&offset,
                                           (size_t)V * (size_t)H * elem_bytes,
                                           CACHELINE_BYTES);
  
     /* Logits buffer: [T × V] */
     m->logits_offset = bump_bytes(&offset,
                                   (size_t)T * (size_t)V * elem_bytes,
                                   CACHELINE_BYTES);
  
     m->total_bytes = align_up_bytes(offset, CACHELINE_BYTES);
     m->total_floats = m->total_bytes / elem_bytes;
 }

References align_up_bytes(), CKLayerLayout::attn_proj_bias_offset, CKLayerLayout::attn_proj_weight_offset, bump_bytes(), CACHELINE_BYTES, TransformerModel::cfg, CKIRGraph::config, CKModelConfig::context_window, TransformerModel::elem_bytes, TransformerModel::embedded_input_offset, CKLayerLayout::fc1_bias_offset, CKLayerLayout::fc1_weight_offset, CKLayerLayout::fc2_bias_offset, CKLayerLayout::fc2_weight_offset, TransformerModel::final_ln_bias_offset, TransformerModel::final_ln_weight_offset, TransformerModel::final_output_offset, CKModelConfig::hidden_size, CKModelConfig::intermediate_size, TransformerModel::layers, TransformerModel::layers_start_offset, TransformerModel::lm_head_weight_offset, CKLayerLayout::ln1_bias_offset, CKLayerLayout::ln1_weight_offset, TransformerModel::logits_offset, CKModelConfig::num_layers, TransformerModel::pos_emb_offset, CKLayerLayout::qkv_bias_offset, CKLayerLayout::qkv_weight_offset, TransformerModel::token_emb_offset, TransformerModel::total_bytes, TransformerModel::total_floats, and CKModelConfig::vocab_size.