← Back to C-Kernel-Engine Docs Doxygen Source Documentation
v6.5/test_generated/ck-kernel-inference.h File Reference

AUTO-GENERATED: qwen2_0.5b_decode Memory Layout. More...

#include <stddef.h>
#include <stdint.h>

Go to the source code of this file.

Data Structures

struct  QWEN2_0_5B_DECODECanary
 
struct  QWEN2_0_5B_DECODEFooterOffsets
 
struct  QWEN2_0_5B_DECODEGlobalOffsets
 
struct  QWEN2_0_5B_DECODEHeaderOffsets
 
struct  QWEN2_0_5B_DECODELayerOffsets
 
struct  QWEN2_0_5B_DECODEModel
 

Macros

#define QWEN2_0_5B_DECODE_ACTIVATION_BYTES   3256169984ULL
 
#define QWEN2_0_5B_DECODE_CANARY_COUNT   566
 
#define QWEN2_0_5B_DECODE_CANARY_SIZE   64
 
#define QWEN2_0_5B_DECODE_CANARY_VALUE   0xDEADBEEFUL
 
#define QWEN2_0_5B_DECODE_DTYPE_BYTES   4
 
#define QWEN2_0_5B_DECODE_EMBED_DIM   896
 
#define QWEN2_0_5B_DECODE_HEAD_DIM   64
 
#define QWEN2_0_5B_DECODE_INTERMEDIATE   4864
 
#define QWEN2_0_5B_DECODE_LAYER(layer_id)    (&QWEN2_0_5B_DECODE_LAYERS[layer_id])
 
#define QWEN2_0_5B_DECODE_LAYER_STRIDE   0x0892CC00ULL
 
#define QWEN2_0_5B_DECODE_MAGIC   0x434B454EUL
 
#define QWEN2_0_5B_DECODE_MAX_SEQ_LEN   131072
 
#define QWEN2_0_5B_DECODE_NUM_HEADS   14
 
#define QWEN2_0_5B_DECODE_NUM_KV_HEADS   2
 
#define QWEN2_0_5B_DECODE_NUM_LAYERS   24
 
#define QWEN2_0_5B_DECODE_NUM_MERGES   0
 
#define QWEN2_0_5B_DECODE_PTR(model, offset)    ((float*)((char*)(model)->base + (offset)))
 
#define QWEN2_0_5B_DECODE_PTR_BF16(model, offset)    ((uint16_t*)((char*)(model)->base + (offset)))
 
#define QWEN2_0_5B_DECODE_TOTAL_BYTES   3573889600ULL
 
#define QWEN2_0_5B_DECODE_TOTAL_VOCAB_BYTES   0
 
#define QWEN2_0_5B_DECODE_VOCAB_SIZE   151936
 
#define QWEN2_0_5B_DECODE_WEIGHT_BYTES   317683328ULL
 

Functions

void qwen2_0_5b_decode_decode (QWEN2_0_5B_DECODEModel *model, const int *token, int token_index)
 
void qwen2_0_5b_decode_forward (QWEN2_0_5B_DECODEModel *model, const int *tokens, int num_tokens)
 
int qwen2_0_5b_decode_model_allocate (QWEN2_0_5B_DECODEModel *model)
 
void qwen2_0_5b_decode_model_free (QWEN2_0_5B_DECODEModel *model)
 
int qwen2_0_5b_decode_verify_canaries (QWEN2_0_5B_DECODEModel *model)
 

Variables

static const QWEN2_0_5B_DECODECanary QWEN2_0_5B_DECODE_CANARIES []
 
static const QWEN2_0_5B_DECODEFooterOffsets QWEN2_0_5B_DECODE_FOOTER
 
static const QWEN2_0_5B_DECODEGlobalOffsets QWEN2_0_5B_DECODE_GLOBALS
 
static const QWEN2_0_5B_DECODEHeaderOffsets QWEN2_0_5B_DECODE_HEADER
 
static const QWEN2_0_5B_DECODELayerOffsets QWEN2_0_5B_DECODE_LAYERS [24]
 

Detailed Description

AUTO-GENERATED: qwen2_0.5b_decode Memory Layout.

Generated: 2026-01-12T04:06:36.660353 UTC Total Memory: 3.57 GB

DO NOT EDIT - Regenerate with build_ir_v3.py

Definition in file v6.5/test_generated/ck-kernel-inference.h.

Macro Definition Documentation

◆ QWEN2_0_5B_DECODE_ACTIVATION_BYTES

#define QWEN2_0_5B_DECODE_ACTIVATION_BYTES   3256169984ULL

Definition at line 39 of file v6.5/test_generated/ck-kernel-inference.h.

◆ QWEN2_0_5B_DECODE_CANARY_COUNT

#define QWEN2_0_5B_DECODE_CANARY_COUNT   566

Definition at line 1259 of file v6.5/test_generated/ck-kernel-inference.h.

◆ QWEN2_0_5B_DECODE_CANARY_SIZE

#define QWEN2_0_5B_DECODE_CANARY_SIZE   64

Definition at line 43 of file v6.5/test_generated/ck-kernel-inference.h.

◆ QWEN2_0_5B_DECODE_CANARY_VALUE

#define QWEN2_0_5B_DECODE_CANARY_VALUE   0xDEADBEEFUL

Definition at line 42 of file v6.5/test_generated/ck-kernel-inference.h.

◆ QWEN2_0_5B_DECODE_DTYPE_BYTES

#define QWEN2_0_5B_DECODE_DTYPE_BYTES   4

Definition at line 35 of file v6.5/test_generated/ck-kernel-inference.h.

◆ QWEN2_0_5B_DECODE_EMBED_DIM

#define QWEN2_0_5B_DECODE_EMBED_DIM   896

Definition at line 25 of file v6.5/test_generated/ck-kernel-inference.h.

◆ QWEN2_0_5B_DECODE_HEAD_DIM

#define QWEN2_0_5B_DECODE_HEAD_DIM   64

Definition at line 28 of file v6.5/test_generated/ck-kernel-inference.h.

◆ QWEN2_0_5B_DECODE_INTERMEDIATE

#define QWEN2_0_5B_DECODE_INTERMEDIATE   4864

Definition at line 29 of file v6.5/test_generated/ck-kernel-inference.h.

◆ QWEN2_0_5B_DECODE_LAYER

#define QWEN2_0_5B_DECODE_LAYER (   layer_id)     (&QWEN2_0_5B_DECODE_LAYERS[layer_id])

Definition at line 1280 of file v6.5/test_generated/ck-kernel-inference.h.

◆ QWEN2_0_5B_DECODE_LAYER_STRIDE

#define QWEN2_0_5B_DECODE_LAYER_STRIDE   0x0892CC00ULL

Definition at line 648 of file v6.5/test_generated/ck-kernel-inference.h.

◆ QWEN2_0_5B_DECODE_MAGIC

#define QWEN2_0_5B_DECODE_MAGIC   0x434B454EUL

Definition at line 41 of file v6.5/test_generated/ck-kernel-inference.h.

◆ QWEN2_0_5B_DECODE_MAX_SEQ_LEN

#define QWEN2_0_5B_DECODE_MAX_SEQ_LEN   131072

Definition at line 32 of file v6.5/test_generated/ck-kernel-inference.h.

◆ QWEN2_0_5B_DECODE_NUM_HEADS

#define QWEN2_0_5B_DECODE_NUM_HEADS   14

Definition at line 26 of file v6.5/test_generated/ck-kernel-inference.h.

◆ QWEN2_0_5B_DECODE_NUM_KV_HEADS

#define QWEN2_0_5B_DECODE_NUM_KV_HEADS   2

Definition at line 27 of file v6.5/test_generated/ck-kernel-inference.h.

◆ QWEN2_0_5B_DECODE_NUM_LAYERS

#define QWEN2_0_5B_DECODE_NUM_LAYERS   24

Definition at line 30 of file v6.5/test_generated/ck-kernel-inference.h.

◆ QWEN2_0_5B_DECODE_NUM_MERGES

#define QWEN2_0_5B_DECODE_NUM_MERGES   0

Definition at line 33 of file v6.5/test_generated/ck-kernel-inference.h.

◆ QWEN2_0_5B_DECODE_PTR

#define QWEN2_0_5B_DECODE_PTR (   model,
  offset 
)     ((float*)((char*)(model)->base + (offset)))

Definition at line 1274 of file v6.5/test_generated/ck-kernel-inference.h.

◆ QWEN2_0_5B_DECODE_PTR_BF16

#define QWEN2_0_5B_DECODE_PTR_BF16 (   model,
  offset 
)     ((uint16_t*)((char*)(model)->base + (offset)))

Definition at line 1277 of file v6.5/test_generated/ck-kernel-inference.h.

◆ QWEN2_0_5B_DECODE_TOTAL_BYTES

#define QWEN2_0_5B_DECODE_TOTAL_BYTES   3573889600ULL

Definition at line 37 of file v6.5/test_generated/ck-kernel-inference.h.

◆ QWEN2_0_5B_DECODE_TOTAL_VOCAB_BYTES

#define QWEN2_0_5B_DECODE_TOTAL_VOCAB_BYTES   0

Definition at line 34 of file v6.5/test_generated/ck-kernel-inference.h.

◆ QWEN2_0_5B_DECODE_VOCAB_SIZE

#define QWEN2_0_5B_DECODE_VOCAB_SIZE   151936

Definition at line 31 of file v6.5/test_generated/ck-kernel-inference.h.

◆ QWEN2_0_5B_DECODE_WEIGHT_BYTES

#define QWEN2_0_5B_DECODE_WEIGHT_BYTES   317683328ULL

Definition at line 38 of file v6.5/test_generated/ck-kernel-inference.h.

Function Documentation

◆ qwen2_0_5b_decode_decode()

void qwen2_0_5b_decode_decode ( QWEN2_0_5B_DECODEModel model,
const int *  token,
int  token_index 
)

Definition at line 8022 of file v6.5/test_generated/ck-kernel-inference.c.

8022  {
8023  qwen2_0_5b_decode_decode_token(model, token, token_index);
8024 }
const char * token
Definition: tokenizer.h:306
static void qwen2_0_5b_decode_decode_token(QWEN2_0_5B_DECODEModel *model, const int *token, int token_index)

◆ qwen2_0_5b_decode_forward()

void qwen2_0_5b_decode_forward ( QWEN2_0_5B_DECODEModel model,
const int *  tokens,
int  num_tokens 
)

Definition at line 8013 of file v6.5/test_generated/ck-kernel-inference.c.

8017  {
8018  if (!model || !tokens || num_tokens <= 0) return;
8019  qwen2_0_5b_decode_forward_prefill_impl(model, tokens, num_tokens);
8020 }
static void qwen2_0_5b_decode_forward_prefill_impl(QWEN2_0_5B_DECODEModel *model, const int *tokens, int num_tokens)

◆ qwen2_0_5b_decode_model_allocate()

int qwen2_0_5b_decode_model_allocate ( QWEN2_0_5B_DECODEModel model)

Definition at line 88 of file v6.5/test_generated/ck-kernel-inference.c.

88  {
89  size_t total = QWEN2_0_5B_DECODE_TOTAL_BYTES;
90 
91 #ifdef __linux__
92  model->base = mmap(NULL, total,
93  PROT_READ | PROT_WRITE,
94  MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
95  -1, 0);
96  if (model->base == MAP_FAILED) {
97  model->base = mmap(NULL, total,
98  PROT_READ | PROT_WRITE,
99  MAP_PRIVATE | MAP_ANONYMOUS,
100  -1, 0);
101  }
102  if (model->base == MAP_FAILED) {
103  perror("mmap failed");
104  return -1;
105  }
106 #else
107  model->base = aligned_alloc(64, total);
108  if (!model->base) {
109  perror("aligned_alloc failed");
110  return -1;
111  }
112 #endif
113 
114  model->total_bytes = total;
115 
116  /* Initialize magic header */
117  MagicHeader *header = (MagicHeader *)model->base;
118  header->magic = QWEN2_0_5B_DECODE_MAGIC;
119  header->version = 5;
120  header->total_bytes = QWEN2_0_5B_DECODE_TOTAL_BYTES;
121  header->weight_bytes = QWEN2_0_5B_DECODE_WEIGHT_BYTES;
122  header->activation_bytes = QWEN2_0_5B_DECODE_ACTIVATION_BYTES;
123  header->num_layers = QWEN2_0_5B_DECODE_NUM_LAYERS;
124  header->embed_dim = QWEN2_0_5B_DECODE_EMBED_DIM;
125  header->num_heads = QWEN2_0_5B_DECODE_NUM_HEADS;
126  header->vocab_size = QWEN2_0_5B_DECODE_VOCAB_SIZE;
127  header->max_seq_len = QWEN2_0_5B_DECODE_MAX_SEQ_LEN;
128  header->canary_count = QWEN2_0_5B_DECODE_CANARY_COUNT;
129 
130  /* Initialize canary guards */
131  for (int i = 0; i < QWEN2_0_5B_DECODE_CANARY_COUNT; i++) {
132  uint32_t *ptr = (uint32_t*)((char*)model->base + QWEN2_0_5B_DECODE_CANARIES[i].offset);
133  for (int j = 0; j < (QWEN2_0_5B_DECODE_CANARY_SIZE / 4); j++) {
135  }
136  }
137 
138  return 0;
139 }
#define QWEN2_0_5B_DECODE_TOTAL_BYTES
#define QWEN2_0_5B_DECODE_ACTIVATION_BYTES
#define QWEN2_0_5B_DECODE_MAX_SEQ_LEN
#define QWEN2_0_5B_DECODE_NUM_LAYERS
#define QWEN2_0_5B_DECODE_WEIGHT_BYTES
#define QWEN2_0_5B_DECODE_CANARY_VALUE
#define QWEN2_0_5B_DECODE_CANARY_SIZE
#define QWEN2_0_5B_DECODE_VOCAB_SIZE
static const QWEN2_0_5B_DECODECanary QWEN2_0_5B_DECODE_CANARIES[]

◆ qwen2_0_5b_decode_model_free()

void qwen2_0_5b_decode_model_free ( QWEN2_0_5B_DECODEModel model)

Definition at line 141 of file v6.5/test_generated/ck-kernel-inference.c.

141  {
142  if (!model || !model->base) return;
143 #ifdef __linux__
144  munmap(model->base, model->total_bytes);
145 #else
146  free(model->base);
147 #endif
148  model->base = NULL;
149  model->total_bytes = 0;
150 }

◆ qwen2_0_5b_decode_verify_canaries()

int qwen2_0_5b_decode_verify_canaries ( QWEN2_0_5B_DECODEModel model)

Definition at line 152 of file v6.5/test_generated/ck-kernel-inference.c.

152  {
153  int errors = 0;
154  uint32_t *ptr;
155 
156  for (int i = 0; i < QWEN2_0_5B_DECODE_CANARY_COUNT; i++) {
157  ptr = (uint32_t*)((char*)model->base + QWEN2_0_5B_DECODE_CANARIES[i].offset);
158  for (int j = 0; j < 4; j++) {
159  if (ptr[j] != QWEN2_0_5B_DECODE_CANARY_VALUE) {
160  fprintf(stderr, "CANARY CORRUPTION: %s at offset 0x%lX\n",
162  QWEN2_0_5B_DECODE_CANARIES[i].offset);
163  errors++;
164  break;
165  }
166  }
167  }
168 
169  return errors;
170 }

Variable Documentation

◆ QWEN2_0_5B_DECODE_CANARIES

const QWEN2_0_5B_DECODECanary QWEN2_0_5B_DECODE_CANARIES[]
static

◆ QWEN2_0_5B_DECODE_FOOTER

const QWEN2_0_5B_DECODEFooterOffsets QWEN2_0_5B_DECODE_FOOTER
static
Initial value:
= {
.final_ln_weight = 0xD2FBE080,
.final_output = 0xD2FBE300,
.lm_head_weight = 0x00000080,
.logits = 0xD2FBF340,
}

Definition at line 661 of file v6.5/test_generated/ck-kernel-inference.h.

Referenced by ck_model_get_logits(), qwen2_0_5b_decode_decode_token(), and qwen2_0_5b_decode_forward_prefill_impl().

◆ QWEN2_0_5B_DECODE_GLOBALS

const QWEN2_0_5B_DECODEGlobalOffsets QWEN2_0_5B_DECODE_GLOBALS
static
Initial value:
= {
.rope_cos_cache = 0xD30539C0,
.rope_sin_cache = 0xD4053A00,
}

Definition at line 677 of file v6.5/test_generated/ck-kernel-inference.h.

Referenced by qwen2_0_5b_decode_layer_0_decode(), qwen2_0_5b_decode_layer_0_prefill(), qwen2_0_5b_decode_layer_10_decode(), qwen2_0_5b_decode_layer_10_prefill(), qwen2_0_5b_decode_layer_11_decode(), qwen2_0_5b_decode_layer_11_prefill(), qwen2_0_5b_decode_layer_12_decode(), qwen2_0_5b_decode_layer_12_prefill(), qwen2_0_5b_decode_layer_13_decode(), qwen2_0_5b_decode_layer_13_prefill(), qwen2_0_5b_decode_layer_14_decode(), qwen2_0_5b_decode_layer_14_prefill(), qwen2_0_5b_decode_layer_15_decode(), qwen2_0_5b_decode_layer_15_prefill(), qwen2_0_5b_decode_layer_16_decode(), qwen2_0_5b_decode_layer_16_prefill(), qwen2_0_5b_decode_layer_17_decode(), qwen2_0_5b_decode_layer_17_prefill(), qwen2_0_5b_decode_layer_18_decode(), qwen2_0_5b_decode_layer_18_prefill(), qwen2_0_5b_decode_layer_19_decode(), qwen2_0_5b_decode_layer_19_prefill(), qwen2_0_5b_decode_layer_1_decode(), qwen2_0_5b_decode_layer_1_prefill(), qwen2_0_5b_decode_layer_20_decode(), qwen2_0_5b_decode_layer_20_prefill(), qwen2_0_5b_decode_layer_21_decode(), qwen2_0_5b_decode_layer_21_prefill(), qwen2_0_5b_decode_layer_22_decode(), qwen2_0_5b_decode_layer_22_prefill(), qwen2_0_5b_decode_layer_23_decode(), qwen2_0_5b_decode_layer_23_prefill(), qwen2_0_5b_decode_layer_2_decode(), qwen2_0_5b_decode_layer_2_prefill(), qwen2_0_5b_decode_layer_3_decode(), qwen2_0_5b_decode_layer_3_prefill(), qwen2_0_5b_decode_layer_4_decode(), qwen2_0_5b_decode_layer_4_prefill(), qwen2_0_5b_decode_layer_5_decode(), qwen2_0_5b_decode_layer_5_prefill(), qwen2_0_5b_decode_layer_6_decode(), qwen2_0_5b_decode_layer_6_prefill(), qwen2_0_5b_decode_layer_7_decode(), qwen2_0_5b_decode_layer_7_prefill(), qwen2_0_5b_decode_layer_8_decode(), qwen2_0_5b_decode_layer_8_prefill(), qwen2_0_5b_decode_layer_9_decode(), qwen2_0_5b_decode_layer_9_prefill(), and qwen2_0_5b_decode_precompute_rope().

◆ QWEN2_0_5B_DECODE_HEADER

const QWEN2_0_5B_DECODEHeaderOffsets QWEN2_0_5B_DECODE_HEADER
static
Initial value:
= {
.token_emb = 0x00000080,
.vocab_offsets = 0x053760C0,
.vocab_strings = 0x0538AF40,
.vocab_merges = 0x0538AF80,
.embedded_input = 0x0538AFC0,
}

Definition at line 57 of file v6.5/test_generated/ck-kernel-inference.h.

Referenced by qwen2_0_5b_decode_decode_token(), qwen2_0_5b_decode_forward_prefill_impl(), qwen2_0_5b_decode_layer_0_decode(), and qwen2_0_5b_decode_layer_0_prefill().

◆ QWEN2_0_5B_DECODE_LAYERS

const QWEN2_0_5B_DECODELayerOffsets QWEN2_0_5B_DECODE_LAYERS[24]
static

Definition at line 93 of file v6.5/test_generated/ck-kernel-inference.h.

Referenced by qwen2_0_5b_decode_decode_token(), qwen2_0_5b_decode_forward_prefill_impl(), qwen2_0_5b_decode_layer_0_decode(), qwen2_0_5b_decode_layer_0_prefill(), qwen2_0_5b_decode_layer_10_decode(), qwen2_0_5b_decode_layer_10_prefill(), qwen2_0_5b_decode_layer_11_decode(), qwen2_0_5b_decode_layer_11_prefill(), qwen2_0_5b_decode_layer_12_decode(), qwen2_0_5b_decode_layer_12_prefill(), qwen2_0_5b_decode_layer_13_decode(), qwen2_0_5b_decode_layer_13_prefill(), qwen2_0_5b_decode_layer_14_decode(), qwen2_0_5b_decode_layer_14_prefill(), qwen2_0_5b_decode_layer_15_decode(), qwen2_0_5b_decode_layer_15_prefill(), qwen2_0_5b_decode_layer_16_decode(), qwen2_0_5b_decode_layer_16_prefill(), qwen2_0_5b_decode_layer_17_decode(), qwen2_0_5b_decode_layer_17_prefill(), qwen2_0_5b_decode_layer_18_decode(), qwen2_0_5b_decode_layer_18_prefill(), qwen2_0_5b_decode_layer_19_decode(), qwen2_0_5b_decode_layer_19_prefill(), qwen2_0_5b_decode_layer_1_decode(), qwen2_0_5b_decode_layer_1_prefill(), qwen2_0_5b_decode_layer_20_decode(), qwen2_0_5b_decode_layer_20_prefill(), qwen2_0_5b_decode_layer_21_decode(), qwen2_0_5b_decode_layer_21_prefill(), qwen2_0_5b_decode_layer_22_decode(), qwen2_0_5b_decode_layer_22_prefill(), qwen2_0_5b_decode_layer_23_decode(), qwen2_0_5b_decode_layer_23_prefill(), qwen2_0_5b_decode_layer_2_decode(), qwen2_0_5b_decode_layer_2_prefill(), qwen2_0_5b_decode_layer_3_decode(), qwen2_0_5b_decode_layer_3_prefill(), qwen2_0_5b_decode_layer_4_decode(), qwen2_0_5b_decode_layer_4_prefill(), qwen2_0_5b_decode_layer_5_decode(), qwen2_0_5b_decode_layer_5_prefill(), qwen2_0_5b_decode_layer_6_decode(), qwen2_0_5b_decode_layer_6_prefill(), qwen2_0_5b_decode_layer_7_decode(), qwen2_0_5b_decode_layer_7_prefill(), qwen2_0_5b_decode_layer_8_decode(), qwen2_0_5b_decode_layer_8_prefill(), qwen2_0_5b_decode_layer_9_decode(), and qwen2_0_5b_decode_layer_9_prefill().