Section-Based Memory Layout: Header / Body / Footer Pattern. More...
#include <stddef.h>#include <stdint.h>Go to the source code of this file.
Data Structures | |
| struct | CKFooterGradOffsets |
| struct | CKFooterOffsets |
| struct | CKHeaderOffsets |
| struct | CKLayerGradOffsets |
| struct | CKLayerOffsets |
| struct | CKLayerOptimizerOffsets |
| struct | CKModel |
| struct | CKSection |
| struct | CKSectionConfig |
Macros | |
| #define | CK_ALIGN_CACHE 64ULL /* CPU cache line (64 bytes) */ |
| #define | CK_ALIGN_HUGE_1GB (1ULL << 30) /* 1GB hugepage (NUMA optimal) */ |
| #define | CK_ALIGN_HUGE_2MB (2ULL << 20) /* 2MB hugepage */ |
| #define | CK_ALIGN_SIMD 64ULL /* AVX-512 register (64 bytes) */ |
| #define | CK_FOOTER(model, s) (&(model)->sections[s].footer) |
| #define | CK_HEADER(model, s) (&(model)->sections[s].header) |
| #define | CK_LAYER(model, s, l) (&(model)->sections[s].layers[l]) |
| #define | CK_LAYER_GRAD(model, s, l) (&(model)->sections[s].layer_grads[l]) |
| #define | CK_PTR(model, offset) ((float*)((char*)(model)->base + (offset))) |
| #define | CK_PTR_BF16(model, offset) ((uint16_t*)((char*)(model)->base + (offset))) |
Enumerations | |
| enum | CKFusionFlags { CK_FUSE_NONE = 0 , CK_FUSE_EMBED_NORM = 1 << 0 , CK_FUSE_NORM_QKV = 1 << 1 , CK_FUSE_QKV_ROPE = 1 << 2 , CK_FUSE_ATTN_PROJ = 1 << 3 , CK_FUSE_NORM_MLP = 1 << 4 , CK_FUSE_MLP_GATE_UP = 1 << 5 , CK_FUSE_MLP_ACT_DOWN = 1 << 6 , CK_FUSE_ADD_NORM = 1 << 7 , CK_FUSE_NONE = 0 , CK_FUSE_EMBED_NORM = 1 << 0 , CK_FUSE_NORM_QKV = 1 << 1 , CK_FUSE_QKV_ROPE = 1 << 2 , CK_FUSE_ATTN_PROJ = 1 << 3 , CK_FUSE_NORM_MLP = 1 << 4 , CK_FUSE_MLP_GATE_UP = 1 << 5 , CK_FUSE_MLP_ACT_DOWN = 1 << 6 , CK_FUSE_RESIDUAL_NORM = 1 << 7 } |
Functions | |
| int | ck_model_allocate (CKModel *model, int hugepage_mode) |
| void | ck_model_free (CKModel *model) |
| size_t | ck_model_plan (CKModel *model, const CKSectionConfig *configs, int num_sections, int training_enabled, uint32_t fusion_flags) |
| void | ck_section_config_init (CKSectionConfig *config, size_t simd_align) |
| size_t | ck_section_plan (CKSection *section, const CKSectionConfig *config, int training_enabled, size_t base_offset) |
Section-Based Memory Layout: Header / Body / Footer Pattern.
Multi-modal models have multiple SECTIONS. Each section is a complete encoder/decoder/vision/audio module with its own dimensions.
┌─────────────────────────────────────────────────────────────────┐ │ SINGLE ALLOCATION │ ├─────────────────────────────────────────────────────────────────┤ │ SECTION 0: Vision Encoder │ │ ├── HEADER: patch_embed, pos_embed │ │ ├── BODY: layer[0..N] (weights + activations interleaved) │ │ └── FOOTER: final_norm, bridge_to_text │ ├─────────────────────────────────────────────────────────────────┤ │ SECTION 1: Text Decoder │ │ ├── HEADER: token_embed, pos_embed │ │ ├── BODY: layer[0..N] (weights + activations interleaved) │ │ └── FOOTER: final_norm, lm_head, logits │ ├─────────────────────────────────────────────────────────────────┤ │ SECTION 2: Audio Encoder (optional) │ │ ├── HEADER: mel_embed │ │ ├── BODY: layer[0..N] │ │ └── FOOTER: bridge_to_text │ ├─────────────────────────────────────────────────────────────────┤ │ GRADIENTS (if training) │ │ └── Same layout: section[0].grads, section[1].grads, ... │ ├─────────────────────────────────────────────────────────────────┤ │ OPTIMIZER STATE (if training with Adam) │ │ └── m[], v[] for each weight │ └─────────────────────────────────────────────────────────────────┘
Within each layer, memory is laid out in the ORDER operations execute:
weight → activation → weight → activation → ...
This is critical for CPU cache efficiency. The CPU streams forward, prefetching the next cache line while processing the current one.
Definition in file ckernel_section_layout.h.
| #define CK_ALIGN_CACHE 64ULL /* CPU cache line (64 bytes) */ |
Definition at line 88 of file ckernel_section_layout.h.
| #define CK_ALIGN_HUGE_1GB (1ULL << 30) /* 1GB hugepage (NUMA optimal) */ |
Definition at line 91 of file ckernel_section_layout.h.
| #define CK_ALIGN_HUGE_2MB (2ULL << 20) /* 2MB hugepage */ |
Definition at line 90 of file ckernel_section_layout.h.
| #define CK_ALIGN_SIMD 64ULL /* AVX-512 register (64 bytes) */ |
Definition at line 89 of file ckernel_section_layout.h.
| #define CK_FOOTER | ( | model, | |
| s | |||
| ) | (&(model)->sections[s].footer) |
Definition at line 541 of file ckernel_section_layout.h.
| #define CK_HEADER | ( | model, | |
| s | |||
| ) | (&(model)->sections[s].header) |
Definition at line 537 of file ckernel_section_layout.h.
| #define CK_LAYER | ( | model, | |
| s, | |||
| l | |||
| ) | (&(model)->sections[s].layers[l]) |
Definition at line 529 of file ckernel_section_layout.h.
| #define CK_LAYER_GRAD | ( | model, | |
| s, | |||
| l | |||
| ) | (&(model)->sections[s].layer_grads[l]) |
Definition at line 533 of file ckernel_section_layout.h.
| #define CK_PTR | ( | model, | |
| offset | |||
| ) | ((float*)((char*)(model)->base + (offset))) |
Definition at line 522 of file ckernel_section_layout.h.
| #define CK_PTR_BF16 | ( | model, | |
| offset | |||
| ) | ((uint16_t*)((char*)(model)->base + (offset))) |
Definition at line 525 of file ckernel_section_layout.h.
| enum CKFusionFlags |
Definition at line 459 of file ckernel_section_layout.h.
| int ck_model_allocate | ( | CKModel * | model, |
| int | hugepage_mode | ||
| ) |
Allocate the planned memory.
| hugepage_mode | 0=normal, 1=2MB hugepages, 2=1GB hugepages |
| void ck_model_free | ( | CKModel * | model | ) |
Free the model (single free, since single allocation).
| size_t ck_model_plan | ( | CKModel * | model, |
| const CKSectionConfig * | configs, | ||
| int | num_sections, | ||
| int | training_enabled, | ||
| uint32_t | fusion_flags | ||
| ) |
Plan memory layout for complete model. Returns total bytes needed.
| void ck_section_config_init | ( | CKSectionConfig * | config, |
| size_t | simd_align | ||
| ) |
Initialize section config with computed alignments.
| size_t ck_section_plan | ( | CKSection * | section, |
| const CKSectionConfig * | config, | ||
| int | training_enabled, | ||
| size_t | base_offset | ||
| ) |
Plan memory layout for a single section. Returns bytes needed for this section.