AUTO-GENERATED: qwen2_0.5b_decode Memory Layout. More...

#include <stddef.h>
#include <stdint.h>

Data Structures
struct	QWEN2_0_5B_DECODECanary

struct	QWEN2_0_5B_DECODEFooterOffsets

struct	QWEN2_0_5B_DECODEGlobalOffsets

struct	QWEN2_0_5B_DECODEHeaderOffsets

struct	QWEN2_0_5B_DECODELayerOffsets

struct	QWEN2_0_5B_DECODEModel

Macros
#define	QWEN2_0_5B_DECODE_ACTIVATION_BYTES 3256169984ULL

#define	QWEN2_0_5B_DECODE_CANARY_COUNT 566

#define	QWEN2_0_5B_DECODE_CANARY_SIZE 64

#define	QWEN2_0_5B_DECODE_CANARY_VALUE 0xDEADBEEFUL

#define	QWEN2_0_5B_DECODE_DTYPE_BYTES 4

#define	QWEN2_0_5B_DECODE_EMBED_DIM 896

#define	QWEN2_0_5B_DECODE_HEAD_DIM 64

#define	QWEN2_0_5B_DECODE_INTERMEDIATE 4864

#define	QWEN2_0_5B_DECODE_LAYER(layer_id) (&QWEN2_0_5B_DECODE_LAYERS[layer_id])

#define	QWEN2_0_5B_DECODE_LAYER_STRIDE 0x0892CC00ULL

#define	QWEN2_0_5B_DECODE_MAGIC 0x434B454EUL

#define	QWEN2_0_5B_DECODE_MAX_SEQ_LEN 131072

#define	QWEN2_0_5B_DECODE_NUM_HEADS 14

#define	QWEN2_0_5B_DECODE_NUM_KV_HEADS 2

#define	QWEN2_0_5B_DECODE_NUM_LAYERS 24

#define	QWEN2_0_5B_DECODE_NUM_MERGES 0

#define	QWEN2_0_5B_DECODE_PTR(model, offset) ((float)((char)(model)->base + (offset)))

#define	QWEN2_0_5B_DECODE_PTR_BF16(model, offset) ((uint16_t)((char)(model)->base + (offset)))

#define	QWEN2_0_5B_DECODE_TOTAL_BYTES 3573889600ULL

#define	QWEN2_0_5B_DECODE_TOTAL_VOCAB_BYTES 0

#define	QWEN2_0_5B_DECODE_VOCAB_SIZE 151936

#define	QWEN2_0_5B_DECODE_WEIGHT_BYTES 317683328ULL

Functions
void	qwen2_0_5b_decode_decode (QWEN2_0_5B_DECODEModel model, const int token, int token_index)

void	qwen2_0_5b_decode_forward (QWEN2_0_5B_DECODEModel model, const int tokens, int num_tokens)

int	qwen2_0_5b_decode_model_allocate (QWEN2_0_5B_DECODEModel *model)

void	qwen2_0_5b_decode_model_free (QWEN2_0_5B_DECODEModel *model)

int	qwen2_0_5b_decode_verify_canaries (QWEN2_0_5B_DECODEModel *model)

Variables
static const QWEN2_0_5B_DECODECanary	QWEN2_0_5B_DECODE_CANARIES []

static const QWEN2_0_5B_DECODEFooterOffsets	QWEN2_0_5B_DECODE_FOOTER

static const QWEN2_0_5B_DECODEGlobalOffsets	QWEN2_0_5B_DECODE_GLOBALS

static const QWEN2_0_5B_DECODEHeaderOffsets	QWEN2_0_5B_DECODE_HEADER

static const QWEN2_0_5B_DECODELayerOffsets	QWEN2_0_5B_DECODE_LAYERS [24]

Detailed Description

AUTO-GENERATED: qwen2_0.5b_decode Memory Layout.

Generated: 2026-01-12T04:06:36.660353 UTC Total Memory: 3.57 GB

DO NOT EDIT - Regenerate with build_ir_v3.py

Definition in file v6.5/test_generated/ck-kernel-inference.h.

Macro Definition Documentation

◆ QWEN2_0_5B_DECODE_ACTIVATION_BYTES

#define QWEN2_0_5B_DECODE_ACTIVATION_BYTES 3256169984ULL

Definition at line 39 of file v6.5/test_generated/ck-kernel-inference.h.

◆ QWEN2_0_5B_DECODE_CANARY_COUNT

#define QWEN2_0_5B_DECODE_CANARY_COUNT 566

Definition at line 1259 of file v6.5/test_generated/ck-kernel-inference.h.

◆ QWEN2_0_5B_DECODE_CANARY_SIZE

#define QWEN2_0_5B_DECODE_CANARY_SIZE 64

Definition at line 43 of file v6.5/test_generated/ck-kernel-inference.h.

◆ QWEN2_0_5B_DECODE_CANARY_VALUE

#define QWEN2_0_5B_DECODE_CANARY_VALUE 0xDEADBEEFUL

Definition at line 42 of file v6.5/test_generated/ck-kernel-inference.h.

◆ QWEN2_0_5B_DECODE_DTYPE_BYTES

#define QWEN2_0_5B_DECODE_DTYPE_BYTES 4

Definition at line 35 of file v6.5/test_generated/ck-kernel-inference.h.

◆ QWEN2_0_5B_DECODE_EMBED_DIM

#define QWEN2_0_5B_DECODE_EMBED_DIM 896

Definition at line 25 of file v6.5/test_generated/ck-kernel-inference.h.

◆ QWEN2_0_5B_DECODE_HEAD_DIM

#define QWEN2_0_5B_DECODE_HEAD_DIM 64

Definition at line 28 of file v6.5/test_generated/ck-kernel-inference.h.

◆ QWEN2_0_5B_DECODE_INTERMEDIATE

#define QWEN2_0_5B_DECODE_INTERMEDIATE 4864

Definition at line 29 of file v6.5/test_generated/ck-kernel-inference.h.

◆ QWEN2_0_5B_DECODE_LAYER

#define QWEN2_0_5B_DECODE_LAYER ( layer_id ) (&QWEN2_0_5B_DECODE_LAYERS[layer_id])

Definition at line 1280 of file v6.5/test_generated/ck-kernel-inference.h.

◆ QWEN2_0_5B_DECODE_LAYER_STRIDE

#define QWEN2_0_5B_DECODE_LAYER_STRIDE 0x0892CC00ULL

Definition at line 648 of file v6.5/test_generated/ck-kernel-inference.h.

◆ QWEN2_0_5B_DECODE_MAGIC

#define QWEN2_0_5B_DECODE_MAGIC 0x434B454EUL

Definition at line 41 of file v6.5/test_generated/ck-kernel-inference.h.

◆ QWEN2_0_5B_DECODE_MAX_SEQ_LEN

#define QWEN2_0_5B_DECODE_MAX_SEQ_LEN 131072

Definition at line 32 of file v6.5/test_generated/ck-kernel-inference.h.

◆ QWEN2_0_5B_DECODE_NUM_HEADS

#define QWEN2_0_5B_DECODE_NUM_HEADS 14

Definition at line 26 of file v6.5/test_generated/ck-kernel-inference.h.

◆ QWEN2_0_5B_DECODE_NUM_KV_HEADS

#define QWEN2_0_5B_DECODE_NUM_KV_HEADS 2

Definition at line 27 of file v6.5/test_generated/ck-kernel-inference.h.

◆ QWEN2_0_5B_DECODE_NUM_LAYERS

#define QWEN2_0_5B_DECODE_NUM_LAYERS 24

Definition at line 30 of file v6.5/test_generated/ck-kernel-inference.h.

◆ QWEN2_0_5B_DECODE_NUM_MERGES

#define QWEN2_0_5B_DECODE_NUM_MERGES 0

Definition at line 33 of file v6.5/test_generated/ck-kernel-inference.h.

◆ QWEN2_0_5B_DECODE_PTR

#define QWEN2_0_5B_DECODE_PTR	(	model,
		offset
	)	((float)((char)(model)->base + (offset)))

Definition at line 1274 of file v6.5/test_generated/ck-kernel-inference.h.

◆ QWEN2_0_5B_DECODE_PTR_BF16

#define QWEN2_0_5B_DECODE_PTR_BF16	(	model,
		offset
	)	((uint16_t)((char)(model)->base + (offset)))

Definition at line 1277 of file v6.5/test_generated/ck-kernel-inference.h.

◆ QWEN2_0_5B_DECODE_TOTAL_BYTES

#define QWEN2_0_5B_DECODE_TOTAL_BYTES 3573889600ULL

Definition at line 37 of file v6.5/test_generated/ck-kernel-inference.h.

◆ QWEN2_0_5B_DECODE_TOTAL_VOCAB_BYTES

#define QWEN2_0_5B_DECODE_TOTAL_VOCAB_BYTES 0

Definition at line 34 of file v6.5/test_generated/ck-kernel-inference.h.

◆ QWEN2_0_5B_DECODE_VOCAB_SIZE

#define QWEN2_0_5B_DECODE_VOCAB_SIZE 151936

Definition at line 31 of file v6.5/test_generated/ck-kernel-inference.h.

◆ QWEN2_0_5B_DECODE_WEIGHT_BYTES

#define QWEN2_0_5B_DECODE_WEIGHT_BYTES 317683328ULL

Definition at line 38 of file v6.5/test_generated/ck-kernel-inference.h.

Function Documentation

◆ qwen2_0_5b_decode_decode()

void qwen2_0_5b_decode_decode	(	QWEN2_0_5B_DECODEModel *	model,
		const int *	token,
		int	token_index
	)

Definition at line 8022 of file v6.5/test_generated/ck-kernel-inference.c.

                                                                                                 {
     qwen2_0_5b_decode_decode_token(model, token, token_index);
 }

◆ qwen2_0_5b_decode_forward()

void qwen2_0_5b_decode_forward	(	QWEN2_0_5B_DECODEModel *	model,
		const int *	tokens,
		int	num_tokens
	)

Definition at line 8013 of file v6.5/test_generated/ck-kernel-inference.c.

   {
     if (!model || !tokens || num_tokens <= 0) return;
     qwen2_0_5b_decode_forward_prefill_impl(model, tokens, num_tokens);
 }

◆ qwen2_0_5b_decode_model_allocate()

int qwen2_0_5b_decode_model_allocate ( QWEN2_0_5B_DECODEModel * model )

Definition at line 88 of file v6.5/test_generated/ck-kernel-inference.c.

                                                                     {
     size_t total = QWEN2_0_5B_DECODE_TOTAL_BYTES;
  
 #ifdef __linux__
     model->base = mmap(NULL, total,
                        PROT_READ | PROT_WRITE,
                        MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
                        -1, 0);
     if (model->base == MAP_FAILED) {
         model->base = mmap(NULL, total,
                            PROT_READ | PROT_WRITE,
                            MAP_PRIVATE | MAP_ANONYMOUS,
                            -1, 0);
     }
     if (model->base == MAP_FAILED) {
         perror("mmap failed");
         return -1;
     }
 #else
     model->base = aligned_alloc(64, total);
     if (!model->base) {
         perror("aligned_alloc failed");
         return -1;
     }
 #endif
  
     model->total_bytes = total;
  
     /* Initialize magic header */
     MagicHeader *header = (MagicHeader *)model->base;
     header->magic = QWEN2_0_5B_DECODE_MAGIC;
     header->version = 5;
     header->total_bytes = QWEN2_0_5B_DECODE_TOTAL_BYTES;
     header->weight_bytes = QWEN2_0_5B_DECODE_WEIGHT_BYTES;
     header->activation_bytes = QWEN2_0_5B_DECODE_ACTIVATION_BYTES;
     header->num_layers = QWEN2_0_5B_DECODE_NUM_LAYERS;
     header->embed_dim = QWEN2_0_5B_DECODE_EMBED_DIM;
     header->num_heads = QWEN2_0_5B_DECODE_NUM_HEADS;
     header->vocab_size = QWEN2_0_5B_DECODE_VOCAB_SIZE;
     header->max_seq_len = QWEN2_0_5B_DECODE_MAX_SEQ_LEN;
     header->canary_count = QWEN2_0_5B_DECODE_CANARY_COUNT;
  
     /* Initialize canary guards */
     for (int i = 0; i < QWEN2_0_5B_DECODE_CANARY_COUNT; i++) {
         uint32_t *ptr = (uint32_t*)((char*)model->base + QWEN2_0_5B_DECODE_CANARIES[i].offset);
         for (int j = 0; j < (QWEN2_0_5B_DECODE_CANARY_SIZE / 4); j++) {
             ptr[j] = QWEN2_0_5B_DECODE_CANARY_VALUE;
         }
     }
  
     return 0;
 }

◆ qwen2_0_5b_decode_model_free()

void qwen2_0_5b_decode_model_free ( QWEN2_0_5B_DECODEModel * model )

Definition at line 141 of file v6.5/test_generated/ck-kernel-inference.c.

                                                                  {
     if (!model || !model->base) return;
 #ifdef __linux__
     munmap(model->base, model->total_bytes);
 #else
     free(model->base);
 #endif
     model->base = NULL;
     model->total_bytes = 0;
 }

◆ qwen2_0_5b_decode_verify_canaries()

int qwen2_0_5b_decode_verify_canaries ( QWEN2_0_5B_DECODEModel * model )

Definition at line 152 of file v6.5/test_generated/ck-kernel-inference.c.

                                                                      {
     int errors = 0;
     uint32_t *ptr;
  
     for (int i = 0; i < QWEN2_0_5B_DECODE_CANARY_COUNT; i++) {
         ptr = (uint32_t*)((char*)model->base + QWEN2_0_5B_DECODE_CANARIES[i].offset);
         for (int j = 0; j < 4; j++) {
             if (ptr[j] != QWEN2_0_5B_DECODE_CANARY_VALUE) {
                 fprintf(stderr, "CANARY CORRUPTION: %s at offset 0x%lX\n",
                         QWEN2_0_5B_DECODE_CANARIES[i].name,
                         QWEN2_0_5B_DECODE_CANARIES[i].offset);
                 errors++;
                 break;
             }
         }
     }
  
     return errors;
 }

Variable Documentation

◆ QWEN2_0_5B_DECODE_CANARIES

const QWEN2_0_5B_DECODECanary QWEN2_0_5B_DECODE_CANARIES[]

static

Definition at line 691 of file v6.5/test_generated/ck-kernel-inference.h.

Referenced by qwen2_0_5b_decode_model_allocate(), and qwen2_0_5b_decode_verify_canaries().

◆ QWEN2_0_5B_DECODE_FOOTER

const QWEN2_0_5B_DECODEFooterOffsets QWEN2_0_5B_DECODE_FOOTER

static

Initial value:

= {
    .final_ln_weight = 0xD2FBE080,
    .final_output = 0xD2FBE300,
    .lm_head_weight = 0x00000080,  
    .logits = 0xD2FBF340,
}

Definition at line 661 of file v6.5/test_generated/ck-kernel-inference.h.

Referenced by ck_model_get_logits(), qwen2_0_5b_decode_decode_token(), and qwen2_0_5b_decode_forward_prefill_impl().

◆ QWEN2_0_5B_DECODE_GLOBALS

const QWEN2_0_5B_DECODEGlobalOffsets QWEN2_0_5B_DECODE_GLOBALS

static

Initial value:

= {
    .rope_cos_cache = 0xD30539C0,
    .rope_sin_cache = 0xD4053A00,
}

Definition at line 677 of file v6.5/test_generated/ck-kernel-inference.h.

◆ QWEN2_0_5B_DECODE_HEADER

const QWEN2_0_5B_DECODEHeaderOffsets QWEN2_0_5B_DECODE_HEADER

static

Initial value:

= {
    .token_emb = 0x00000080,
    .vocab_offsets = 0x053760C0,
    .vocab_strings = 0x0538AF40,
    .vocab_merges = 0x0538AF80,
    .embedded_input = 0x0538AFC0,
}

Definition at line 57 of file v6.5/test_generated/ck-kernel-inference.h.

Referenced by qwen2_0_5b_decode_decode_token(), qwen2_0_5b_decode_forward_prefill_impl(), qwen2_0_5b_decode_layer_0_decode(), and qwen2_0_5b_decode_layer_0_prefill().

◆ QWEN2_0_5B_DECODE_LAYERS

const QWEN2_0_5B_DECODELayerOffsets QWEN2_0_5B_DECODE_LAYERS[24]

static

Definition at line 93 of file v6.5/test_generated/ck-kernel-inference.h.

Data Structures

Macros

Functions

Variables

Detailed Description

Macro Definition Documentation

◆ QWEN2_0_5B_DECODE_ACTIVATION_BYTES

◆ QWEN2_0_5B_DECODE_CANARY_COUNT

◆ QWEN2_0_5B_DECODE_CANARY_SIZE

◆ QWEN2_0_5B_DECODE_CANARY_VALUE

◆ QWEN2_0_5B_DECODE_DTYPE_BYTES

◆ QWEN2_0_5B_DECODE_EMBED_DIM

◆ QWEN2_0_5B_DECODE_HEAD_DIM

◆ QWEN2_0_5B_DECODE_INTERMEDIATE

◆ QWEN2_0_5B_DECODE_LAYER

◆ QWEN2_0_5B_DECODE_LAYER_STRIDE

◆ QWEN2_0_5B_DECODE_MAGIC

◆ QWEN2_0_5B_DECODE_MAX_SEQ_LEN

◆ QWEN2_0_5B_DECODE_NUM_HEADS

◆ QWEN2_0_5B_DECODE_NUM_KV_HEADS

◆ QWEN2_0_5B_DECODE_NUM_LAYERS

◆ QWEN2_0_5B_DECODE_NUM_MERGES

◆ QWEN2_0_5B_DECODE_PTR

◆ QWEN2_0_5B_DECODE_PTR_BF16

◆ QWEN2_0_5B_DECODE_TOTAL_BYTES

◆ QWEN2_0_5B_DECODE_TOTAL_VOCAB_BYTES

◆ QWEN2_0_5B_DECODE_VOCAB_SIZE

◆ QWEN2_0_5B_DECODE_WEIGHT_BYTES

Function Documentation

◆ qwen2_0_5b_decode_decode()

◆ qwen2_0_5b_decode_forward()

◆ qwen2_0_5b_decode_model_allocate()

◆ qwen2_0_5b_decode_model_free()

◆ qwen2_0_5b_decode_verify_canaries()

Variable Documentation

◆ QWEN2_0_5B_DECODE_CANARIES

◆ QWEN2_0_5B_DECODE_FOOTER

◆ QWEN2_0_5B_DECODE_GLOBALS

◆ QWEN2_0_5B_DECODE_HEADER

◆ QWEN2_0_5B_DECODE_LAYERS