AUTO-GENERATED: model Implementation (IR v6.5 - Explicit Unrolled) More...
#include "ck-kernel-inference.h"#include "ckernel_engine.h"#include <stdio.h>#include <stdlib.h>#include <string.h>#include <stdint.h>#include <math.h>Go to the source code of this file.
Macros | |
| #define | _GNU_SOURCE /* For MAP_ANONYMOUS, MAP_HUGETLB */ |
Functions | |
| struct | __attribute__ ((packed)) |
| _Static_assert (sizeof(MagicHeader)==64, "MagicHeader must be 64 bytes") | |
| static int | model_align_elems (int elems, int elem_bytes, int align_bytes) |
| void | model_decode (MODELModel *model, const int *token, int token_index) |
| static void | model_decode_token (MODELModel *model, const int *token, int token_index) |
| void | model_forward (MODELModel *model, const int *tokens, int num_tokens) |
| static void | model_forward_prefill_impl (MODELModel *model, const int *tokens, int num_tokens) |
| static void | model_layer_0_decode (MODELModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | model_layer_0_prefill (MODELModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | model_layer_10_decode (MODELModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | model_layer_10_prefill (MODELModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | model_layer_11_decode (MODELModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | model_layer_11_prefill (MODELModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | model_layer_12_decode (MODELModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | model_layer_12_prefill (MODELModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | model_layer_13_decode (MODELModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | model_layer_13_prefill (MODELModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | model_layer_14_decode (MODELModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | model_layer_14_prefill (MODELModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | model_layer_15_decode (MODELModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | model_layer_15_prefill (MODELModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | model_layer_16_decode (MODELModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | model_layer_16_prefill (MODELModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | model_layer_17_decode (MODELModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | model_layer_17_prefill (MODELModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | model_layer_18_decode (MODELModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | model_layer_18_prefill (MODELModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | model_layer_19_decode (MODELModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | model_layer_19_prefill (MODELModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | model_layer_1_decode (MODELModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | model_layer_1_prefill (MODELModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | model_layer_20_decode (MODELModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | model_layer_20_prefill (MODELModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | model_layer_21_decode (MODELModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | model_layer_21_prefill (MODELModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | model_layer_22_decode (MODELModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | model_layer_22_prefill (MODELModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | model_layer_23_decode (MODELModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | model_layer_23_prefill (MODELModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | model_layer_2_decode (MODELModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | model_layer_2_prefill (MODELModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | model_layer_3_decode (MODELModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | model_layer_3_prefill (MODELModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | model_layer_4_decode (MODELModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | model_layer_4_prefill (MODELModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | model_layer_5_decode (MODELModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | model_layer_5_prefill (MODELModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | model_layer_6_decode (MODELModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | model_layer_6_prefill (MODELModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | model_layer_7_decode (MODELModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | model_layer_7_prefill (MODELModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | model_layer_8_decode (MODELModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | model_layer_8_prefill (MODELModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | model_layer_9_decode (MODELModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | model_layer_9_prefill (MODELModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| int | model_model_allocate (MODELModel *model) |
| void | model_model_free (MODELModel *model) |
| void | model_precompute_rope (MODELModel *model) |
| static void | model_residual_add_token_major (const float *a, const float *b, float *out, int tokens, int aligned_embed_dim) |
| int | model_verify_canaries (MODELModel *model) |
Variables | |
| MagicHeader | |
AUTO-GENERATED: model Implementation (IR v6.5 - Explicit Unrolled)
Generated: 2026-01-12T04:06:43.069656 UTC Total Memory: 3.57 GB Mode: decode Layers: 24 (fully unrolled)
Per-layer quant types: Layer 0: wq=q4_k wk=q4_k wv=q4_k wo=q4_k w1=q4_k w2=q4_k Layer 1: wq=q4_k wk=q4_k wv=q4_k wo=q4_k w1=q4_k w2=q4_k Layer 2: wq=q4_k wk=q4_k wv=q4_k wo=q4_k w1=q4_k w2=q4_k ... (21 more layers)
DO NOT EDIT - Regenerate with build_ir_v6.5.py or codegen_v6.5.py
Definition in file v6.5/test_generated/int8_q4k_test.c.
| #define _GNU_SOURCE /* For MAP_ANONYMOUS, MAP_HUGETLB */ |
Definition at line 19 of file v6.5/test_generated/int8_q4k_test.c.
| struct __attribute__ | ( | (packed) | ) |
Definition at line 43 of file v6.5/test_generated/int8_q4k_test.c.
| _Static_assert | ( | sizeof(MagicHeader) | = =64, |
| "MagicHeader must be 64 bytes" | |||
| ) |
|
static |
Definition at line 176 of file v6.5/test_generated/int8_q4k_test.c.
| void model_decode | ( | MODELModel * | model, |
| const int * | token, | ||
| int | token_index | ||
| ) |
Definition at line 8841 of file v6.5/test_generated/int8_q4k_test.c.
References model_decode_token(), and token.
|
static |
Definition at line 8750 of file v6.5/test_generated/int8_q4k_test.c.
References embedding_forward_q4_k(), gemv_q4_k_q8_k(), MODEL_EMBED_DIM, model_layer_0_decode(), model_layer_10_decode(), model_layer_11_decode(), model_layer_12_decode(), model_layer_13_decode(), model_layer_14_decode(), model_layer_15_decode(), model_layer_16_decode(), model_layer_17_decode(), model_layer_18_decode(), model_layer_19_decode(), model_layer_1_decode(), model_layer_20_decode(), model_layer_21_decode(), model_layer_22_decode(), model_layer_23_decode(), model_layer_2_decode(), model_layer_3_decode(), model_layer_4_decode(), model_layer_5_decode(), model_layer_6_decode(), model_layer_7_decode(), model_layer_8_decode(), model_layer_9_decode(), MODEL_VOCAB_SIZE, quantize_row_q8_k(), rmsnorm_forward(), and token.
Referenced by model_decode().
| void model_forward | ( | MODELModel * | model, |
| const int * | tokens, | ||
| int | num_tokens | ||
| ) |
Definition at line 8832 of file v6.5/test_generated/int8_q4k_test.c.
References model_forward_prefill_impl().
|
static |
Definition at line 4148 of file v6.5/test_generated/int8_q4k_test.c.
References CK_DT_Q8_K, ck_dtype_row_bytes(), embedding_forward_q4_k(), gemm_nt_q4_k_q8_k(), MODEL_EMBED_DIM, model_layer_0_prefill(), model_layer_10_prefill(), model_layer_11_prefill(), model_layer_12_prefill(), model_layer_13_prefill(), model_layer_14_prefill(), model_layer_15_prefill(), model_layer_16_prefill(), model_layer_17_prefill(), model_layer_18_prefill(), model_layer_19_prefill(), model_layer_1_prefill(), model_layer_20_prefill(), model_layer_21_prefill(), model_layer_22_prefill(), model_layer_23_prefill(), model_layer_2_prefill(), model_layer_3_prefill(), model_layer_4_prefill(), model_layer_5_prefill(), model_layer_6_prefill(), model_layer_7_prefill(), model_layer_8_prefill(), model_layer_9_prefill(), MODEL_NUM_LAYERS, MODEL_VOCAB_SIZE, quantize_row_q8_k(), and rmsnorm_forward().
Referenced by model_forward().
|
static |
Definition at line 4405 of file v6.5/test_generated/int8_q4k_test.c.
References attention_forward_decode_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemv_q4_k_q8_k(), MODEL_EMBED_DIM, MODEL_HEAD_DIM, MODEL_NUM_HEADS, MODEL_NUM_KV_HEADS, model_residual_add_token_major(), quantize_row_q8_k(), rmsnorm_forward(), rope_forward(), and swiglu_forward().
Referenced by model_decode_token().
|
static |
Definition at line 211 of file v6.5/test_generated/int8_q4k_test.c.
References attention_forward_causal_head_major_gqa_flash_strided(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), MODEL_EMBED_DIM, MODEL_HEAD_DIM, MODEL_NUM_HEADS, MODEL_NUM_KV_HEADS, model_residual_add_token_major(), rmsnorm_forward(), rope_forward_qk_strided(), and swiglu_forward().
Referenced by model_forward_prefill_impl().
|
static |
Definition at line 6215 of file v6.5/test_generated/int8_q4k_test.c.
References attention_forward_decode_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemv_q4_k_q8_k(), MODEL_EMBED_DIM, MODEL_HEAD_DIM, MODEL_NUM_HEADS, MODEL_NUM_KV_HEADS, model_residual_add_token_major(), quantize_row_q8_k(), rmsnorm_forward(), rope_forward(), and swiglu_forward().
Referenced by model_decode_token().
|
static |
Definition at line 1851 of file v6.5/test_generated/int8_q4k_test.c.
References attention_forward_causal_head_major_gqa_flash_strided(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), MODEL_EMBED_DIM, MODEL_HEAD_DIM, MODEL_NUM_HEADS, MODEL_NUM_KV_HEADS, model_residual_add_token_major(), rmsnorm_forward(), rope_forward_qk_strided(), and swiglu_forward().
Referenced by model_forward_prefill_impl().
|
static |
Definition at line 6396 of file v6.5/test_generated/int8_q4k_test.c.
References attention_forward_decode_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemv_q4_k_q8_k(), MODEL_EMBED_DIM, MODEL_HEAD_DIM, MODEL_NUM_HEADS, MODEL_NUM_KV_HEADS, model_residual_add_token_major(), quantize_row_q8_k(), rmsnorm_forward(), rope_forward(), and swiglu_forward().
Referenced by model_decode_token().
|
static |
Definition at line 2015 of file v6.5/test_generated/int8_q4k_test.c.
References attention_forward_causal_head_major_gqa_flash_strided(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), MODEL_EMBED_DIM, MODEL_HEAD_DIM, MODEL_NUM_HEADS, MODEL_NUM_KV_HEADS, model_residual_add_token_major(), rmsnorm_forward(), rope_forward_qk_strided(), and swiglu_forward().
Referenced by model_forward_prefill_impl().
|
static |
Definition at line 6577 of file v6.5/test_generated/int8_q4k_test.c.
References attention_forward_decode_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemv_q4_k_q8_k(), MODEL_EMBED_DIM, MODEL_HEAD_DIM, MODEL_NUM_HEADS, MODEL_NUM_KV_HEADS, model_residual_add_token_major(), quantize_row_q8_k(), rmsnorm_forward(), rope_forward(), and swiglu_forward().
Referenced by model_decode_token().
|
static |
Definition at line 2179 of file v6.5/test_generated/int8_q4k_test.c.
References attention_forward_causal_head_major_gqa_flash_strided(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), MODEL_EMBED_DIM, MODEL_HEAD_DIM, MODEL_NUM_HEADS, MODEL_NUM_KV_HEADS, model_residual_add_token_major(), rmsnorm_forward(), rope_forward_qk_strided(), and swiglu_forward().
Referenced by model_forward_prefill_impl().
|
static |
Definition at line 6758 of file v6.5/test_generated/int8_q4k_test.c.
References attention_forward_decode_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemv_q4_k_q8_k(), MODEL_EMBED_DIM, MODEL_HEAD_DIM, MODEL_NUM_HEADS, MODEL_NUM_KV_HEADS, model_residual_add_token_major(), quantize_row_q8_k(), rmsnorm_forward(), rope_forward(), and swiglu_forward().
Referenced by model_decode_token().
|
static |
Definition at line 2343 of file v6.5/test_generated/int8_q4k_test.c.
References attention_forward_causal_head_major_gqa_flash_strided(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), MODEL_EMBED_DIM, MODEL_HEAD_DIM, MODEL_NUM_HEADS, MODEL_NUM_KV_HEADS, model_residual_add_token_major(), rmsnorm_forward(), rope_forward_qk_strided(), and swiglu_forward().
Referenced by model_forward_prefill_impl().
|
static |
Definition at line 6939 of file v6.5/test_generated/int8_q4k_test.c.
References attention_forward_decode_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemv_q4_k_q8_k(), MODEL_EMBED_DIM, MODEL_HEAD_DIM, MODEL_NUM_HEADS, MODEL_NUM_KV_HEADS, model_residual_add_token_major(), quantize_row_q8_k(), rmsnorm_forward(), rope_forward(), and swiglu_forward().
Referenced by model_decode_token().
|
static |
Definition at line 2507 of file v6.5/test_generated/int8_q4k_test.c.
References attention_forward_causal_head_major_gqa_flash_strided(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), MODEL_EMBED_DIM, MODEL_HEAD_DIM, MODEL_NUM_HEADS, MODEL_NUM_KV_HEADS, model_residual_add_token_major(), rmsnorm_forward(), rope_forward_qk_strided(), and swiglu_forward().
Referenced by model_forward_prefill_impl().
|
static |
Definition at line 7120 of file v6.5/test_generated/int8_q4k_test.c.
References attention_forward_decode_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemv_q4_k_q8_k(), MODEL_EMBED_DIM, MODEL_HEAD_DIM, MODEL_NUM_HEADS, MODEL_NUM_KV_HEADS, model_residual_add_token_major(), quantize_row_q8_k(), rmsnorm_forward(), rope_forward(), and swiglu_forward().
Referenced by model_decode_token().
|
static |
Definition at line 2671 of file v6.5/test_generated/int8_q4k_test.c.
References attention_forward_causal_head_major_gqa_flash_strided(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), MODEL_EMBED_DIM, MODEL_HEAD_DIM, MODEL_NUM_HEADS, MODEL_NUM_KV_HEADS, model_residual_add_token_major(), rmsnorm_forward(), rope_forward_qk_strided(), and swiglu_forward().
Referenced by model_forward_prefill_impl().
|
static |
Definition at line 7301 of file v6.5/test_generated/int8_q4k_test.c.
References attention_forward_decode_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemv_q4_k_q8_k(), MODEL_EMBED_DIM, MODEL_HEAD_DIM, MODEL_NUM_HEADS, MODEL_NUM_KV_HEADS, model_residual_add_token_major(), quantize_row_q8_k(), rmsnorm_forward(), rope_forward(), and swiglu_forward().
Referenced by model_decode_token().
|
static |
Definition at line 2835 of file v6.5/test_generated/int8_q4k_test.c.
References attention_forward_causal_head_major_gqa_flash_strided(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), MODEL_EMBED_DIM, MODEL_HEAD_DIM, MODEL_NUM_HEADS, MODEL_NUM_KV_HEADS, model_residual_add_token_major(), rmsnorm_forward(), rope_forward_qk_strided(), and swiglu_forward().
Referenced by model_forward_prefill_impl().
|
static |
Definition at line 7482 of file v6.5/test_generated/int8_q4k_test.c.
References attention_forward_decode_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemv_q4_k_q8_k(), MODEL_EMBED_DIM, MODEL_HEAD_DIM, MODEL_NUM_HEADS, MODEL_NUM_KV_HEADS, model_residual_add_token_major(), quantize_row_q8_k(), rmsnorm_forward(), rope_forward(), and swiglu_forward().
Referenced by model_decode_token().
|
static |
Definition at line 2999 of file v6.5/test_generated/int8_q4k_test.c.
References attention_forward_causal_head_major_gqa_flash_strided(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), MODEL_EMBED_DIM, MODEL_HEAD_DIM, MODEL_NUM_HEADS, MODEL_NUM_KV_HEADS, model_residual_add_token_major(), rmsnorm_forward(), rope_forward_qk_strided(), and swiglu_forward().
Referenced by model_forward_prefill_impl().
|
static |
Definition at line 7663 of file v6.5/test_generated/int8_q4k_test.c.
References attention_forward_decode_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemv_q4_k_q8_k(), MODEL_EMBED_DIM, MODEL_HEAD_DIM, MODEL_NUM_HEADS, MODEL_NUM_KV_HEADS, model_residual_add_token_major(), quantize_row_q8_k(), rmsnorm_forward(), rope_forward(), and swiglu_forward().
Referenced by model_decode_token().
|
static |
Definition at line 3163 of file v6.5/test_generated/int8_q4k_test.c.
References attention_forward_causal_head_major_gqa_flash_strided(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), MODEL_EMBED_DIM, MODEL_HEAD_DIM, MODEL_NUM_HEADS, MODEL_NUM_KV_HEADS, model_residual_add_token_major(), rmsnorm_forward(), rope_forward_qk_strided(), and swiglu_forward().
Referenced by model_forward_prefill_impl().
|
static |
Definition at line 7844 of file v6.5/test_generated/int8_q4k_test.c.
References attention_forward_decode_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemv_q4_k_q8_k(), MODEL_EMBED_DIM, MODEL_HEAD_DIM, MODEL_NUM_HEADS, MODEL_NUM_KV_HEADS, model_residual_add_token_major(), quantize_row_q8_k(), rmsnorm_forward(), rope_forward(), and swiglu_forward().
Referenced by model_decode_token().
|
static |
Definition at line 3327 of file v6.5/test_generated/int8_q4k_test.c.
References attention_forward_causal_head_major_gqa_flash_strided(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), MODEL_EMBED_DIM, MODEL_HEAD_DIM, MODEL_NUM_HEADS, MODEL_NUM_KV_HEADS, model_residual_add_token_major(), rmsnorm_forward(), rope_forward_qk_strided(), and swiglu_forward().
Referenced by model_forward_prefill_impl().
|
static |
Definition at line 4586 of file v6.5/test_generated/int8_q4k_test.c.
References attention_forward_decode_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemv_q4_k_q8_k(), MODEL_EMBED_DIM, MODEL_HEAD_DIM, MODEL_NUM_HEADS, MODEL_NUM_KV_HEADS, model_residual_add_token_major(), quantize_row_q8_k(), rmsnorm_forward(), rope_forward(), and swiglu_forward().
Referenced by model_decode_token().
|
static |
Definition at line 375 of file v6.5/test_generated/int8_q4k_test.c.
References attention_forward_causal_head_major_gqa_flash_strided(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), MODEL_EMBED_DIM, MODEL_HEAD_DIM, MODEL_NUM_HEADS, MODEL_NUM_KV_HEADS, model_residual_add_token_major(), rmsnorm_forward(), rope_forward_qk_strided(), and swiglu_forward().
Referenced by model_forward_prefill_impl().
|
static |
Definition at line 8025 of file v6.5/test_generated/int8_q4k_test.c.
References attention_forward_decode_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemv_q4_k_q8_k(), MODEL_EMBED_DIM, MODEL_HEAD_DIM, MODEL_NUM_HEADS, MODEL_NUM_KV_HEADS, model_residual_add_token_major(), quantize_row_q8_k(), rmsnorm_forward(), rope_forward(), and swiglu_forward().
Referenced by model_decode_token().
|
static |
Definition at line 3491 of file v6.5/test_generated/int8_q4k_test.c.
References attention_forward_causal_head_major_gqa_flash_strided(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), MODEL_EMBED_DIM, MODEL_HEAD_DIM, MODEL_NUM_HEADS, MODEL_NUM_KV_HEADS, model_residual_add_token_major(), rmsnorm_forward(), rope_forward_qk_strided(), and swiglu_forward().
Referenced by model_forward_prefill_impl().
|
static |
Definition at line 8206 of file v6.5/test_generated/int8_q4k_test.c.
References attention_forward_decode_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemv_q4_k_q8_k(), MODEL_EMBED_DIM, MODEL_HEAD_DIM, MODEL_NUM_HEADS, MODEL_NUM_KV_HEADS, model_residual_add_token_major(), quantize_row_q8_k(), rmsnorm_forward(), rope_forward(), and swiglu_forward().
Referenced by model_decode_token().
|
static |
Definition at line 3655 of file v6.5/test_generated/int8_q4k_test.c.
References attention_forward_causal_head_major_gqa_flash_strided(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), MODEL_EMBED_DIM, MODEL_HEAD_DIM, MODEL_NUM_HEADS, MODEL_NUM_KV_HEADS, model_residual_add_token_major(), rmsnorm_forward(), rope_forward_qk_strided(), and swiglu_forward().
Referenced by model_forward_prefill_impl().
|
static |
Definition at line 8387 of file v6.5/test_generated/int8_q4k_test.c.
References attention_forward_decode_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemv_q4_k_q8_k(), MODEL_EMBED_DIM, MODEL_HEAD_DIM, MODEL_NUM_HEADS, MODEL_NUM_KV_HEADS, model_residual_add_token_major(), quantize_row_q8_k(), rmsnorm_forward(), rope_forward(), and swiglu_forward().
Referenced by model_decode_token().
|
static |
Definition at line 3819 of file v6.5/test_generated/int8_q4k_test.c.
References attention_forward_causal_head_major_gqa_flash_strided(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), MODEL_EMBED_DIM, MODEL_HEAD_DIM, MODEL_NUM_HEADS, MODEL_NUM_KV_HEADS, model_residual_add_token_major(), rmsnorm_forward(), rope_forward_qk_strided(), and swiglu_forward().
Referenced by model_forward_prefill_impl().
|
static |
Definition at line 8568 of file v6.5/test_generated/int8_q4k_test.c.
References attention_forward_decode_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemv_q4_k_q8_k(), MODEL_EMBED_DIM, MODEL_HEAD_DIM, MODEL_NUM_HEADS, MODEL_NUM_KV_HEADS, model_residual_add_token_major(), quantize_row_q8_k(), rmsnorm_forward(), rope_forward(), and swiglu_forward().
Referenced by model_decode_token().
|
static |
Definition at line 3983 of file v6.5/test_generated/int8_q4k_test.c.
References attention_forward_causal_head_major_gqa_flash_strided(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), MODEL_EMBED_DIM, MODEL_HEAD_DIM, MODEL_NUM_HEADS, MODEL_NUM_KV_HEADS, model_residual_add_token_major(), rmsnorm_forward(), rope_forward_qk_strided(), and swiglu_forward().
Referenced by model_forward_prefill_impl().
|
static |
Definition at line 4767 of file v6.5/test_generated/int8_q4k_test.c.
References attention_forward_decode_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemv_q4_k_q8_k(), MODEL_EMBED_DIM, MODEL_HEAD_DIM, MODEL_NUM_HEADS, MODEL_NUM_KV_HEADS, model_residual_add_token_major(), quantize_row_q8_k(), rmsnorm_forward(), rope_forward(), and swiglu_forward().
Referenced by model_decode_token().
|
static |
Definition at line 539 of file v6.5/test_generated/int8_q4k_test.c.
References attention_forward_causal_head_major_gqa_flash_strided(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), MODEL_EMBED_DIM, MODEL_HEAD_DIM, MODEL_NUM_HEADS, MODEL_NUM_KV_HEADS, model_residual_add_token_major(), rmsnorm_forward(), rope_forward_qk_strided(), and swiglu_forward().
Referenced by model_forward_prefill_impl().
|
static |
Definition at line 4948 of file v6.5/test_generated/int8_q4k_test.c.
References attention_forward_decode_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemv_q4_k_q8_k(), MODEL_EMBED_DIM, MODEL_HEAD_DIM, MODEL_NUM_HEADS, MODEL_NUM_KV_HEADS, model_residual_add_token_major(), quantize_row_q8_k(), rmsnorm_forward(), rope_forward(), and swiglu_forward().
Referenced by model_decode_token().
|
static |
Definition at line 703 of file v6.5/test_generated/int8_q4k_test.c.
References attention_forward_causal_head_major_gqa_flash_strided(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), MODEL_EMBED_DIM, MODEL_HEAD_DIM, MODEL_NUM_HEADS, MODEL_NUM_KV_HEADS, model_residual_add_token_major(), rmsnorm_forward(), rope_forward_qk_strided(), and swiglu_forward().
Referenced by model_forward_prefill_impl().
|
static |
Definition at line 5129 of file v6.5/test_generated/int8_q4k_test.c.
References attention_forward_decode_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemv_q4_k_q8_k(), MODEL_EMBED_DIM, MODEL_HEAD_DIM, MODEL_NUM_HEADS, MODEL_NUM_KV_HEADS, model_residual_add_token_major(), quantize_row_q8_k(), rmsnorm_forward(), rope_forward(), and swiglu_forward().
Referenced by model_decode_token().
|
static |
Definition at line 867 of file v6.5/test_generated/int8_q4k_test.c.
References attention_forward_causal_head_major_gqa_flash_strided(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), MODEL_EMBED_DIM, MODEL_HEAD_DIM, MODEL_NUM_HEADS, MODEL_NUM_KV_HEADS, model_residual_add_token_major(), rmsnorm_forward(), rope_forward_qk_strided(), and swiglu_forward().
Referenced by model_forward_prefill_impl().
|
static |
Definition at line 5310 of file v6.5/test_generated/int8_q4k_test.c.
References attention_forward_decode_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemv_q4_k_q8_k(), MODEL_EMBED_DIM, MODEL_HEAD_DIM, MODEL_NUM_HEADS, MODEL_NUM_KV_HEADS, model_residual_add_token_major(), quantize_row_q8_k(), rmsnorm_forward(), rope_forward(), and swiglu_forward().
Referenced by model_decode_token().
|
static |
Definition at line 1031 of file v6.5/test_generated/int8_q4k_test.c.
References attention_forward_causal_head_major_gqa_flash_strided(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), MODEL_EMBED_DIM, MODEL_HEAD_DIM, MODEL_NUM_HEADS, MODEL_NUM_KV_HEADS, model_residual_add_token_major(), rmsnorm_forward(), rope_forward_qk_strided(), and swiglu_forward().
Referenced by model_forward_prefill_impl().
|
static |
Definition at line 5491 of file v6.5/test_generated/int8_q4k_test.c.
References attention_forward_decode_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemv_q4_k_q8_k(), MODEL_EMBED_DIM, MODEL_HEAD_DIM, MODEL_NUM_HEADS, MODEL_NUM_KV_HEADS, model_residual_add_token_major(), quantize_row_q8_k(), rmsnorm_forward(), rope_forward(), and swiglu_forward().
Referenced by model_decode_token().
|
static |
Definition at line 1195 of file v6.5/test_generated/int8_q4k_test.c.
References attention_forward_causal_head_major_gqa_flash_strided(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), MODEL_EMBED_DIM, MODEL_HEAD_DIM, MODEL_NUM_HEADS, MODEL_NUM_KV_HEADS, model_residual_add_token_major(), rmsnorm_forward(), rope_forward_qk_strided(), and swiglu_forward().
Referenced by model_forward_prefill_impl().
|
static |
Definition at line 5672 of file v6.5/test_generated/int8_q4k_test.c.
References attention_forward_decode_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemv_q4_k_q8_k(), MODEL_EMBED_DIM, MODEL_HEAD_DIM, MODEL_NUM_HEADS, MODEL_NUM_KV_HEADS, model_residual_add_token_major(), quantize_row_q8_k(), rmsnorm_forward(), rope_forward(), and swiglu_forward().
Referenced by model_decode_token().
|
static |
Definition at line 1359 of file v6.5/test_generated/int8_q4k_test.c.
References attention_forward_causal_head_major_gqa_flash_strided(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), MODEL_EMBED_DIM, MODEL_HEAD_DIM, MODEL_NUM_HEADS, MODEL_NUM_KV_HEADS, model_residual_add_token_major(), rmsnorm_forward(), rope_forward_qk_strided(), and swiglu_forward().
Referenced by model_forward_prefill_impl().
|
static |
Definition at line 5853 of file v6.5/test_generated/int8_q4k_test.c.
References attention_forward_decode_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemv_q4_k_q8_k(), MODEL_EMBED_DIM, MODEL_HEAD_DIM, MODEL_NUM_HEADS, MODEL_NUM_KV_HEADS, model_residual_add_token_major(), quantize_row_q8_k(), rmsnorm_forward(), rope_forward(), and swiglu_forward().
Referenced by model_decode_token().
|
static |
Definition at line 1523 of file v6.5/test_generated/int8_q4k_test.c.
References attention_forward_causal_head_major_gqa_flash_strided(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), MODEL_EMBED_DIM, MODEL_HEAD_DIM, MODEL_NUM_HEADS, MODEL_NUM_KV_HEADS, model_residual_add_token_major(), rmsnorm_forward(), rope_forward_qk_strided(), and swiglu_forward().
Referenced by model_forward_prefill_impl().
|
static |
Definition at line 6034 of file v6.5/test_generated/int8_q4k_test.c.
References attention_forward_decode_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemv_q4_k_q8_k(), MODEL_EMBED_DIM, MODEL_HEAD_DIM, MODEL_NUM_HEADS, MODEL_NUM_KV_HEADS, model_residual_add_token_major(), quantize_row_q8_k(), rmsnorm_forward(), rope_forward(), and swiglu_forward().
Referenced by model_decode_token().
|
static |
Definition at line 1687 of file v6.5/test_generated/int8_q4k_test.c.
References attention_forward_causal_head_major_gqa_flash_strided(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), MODEL_EMBED_DIM, MODEL_HEAD_DIM, MODEL_NUM_HEADS, MODEL_NUM_KV_HEADS, model_residual_add_token_major(), rmsnorm_forward(), rope_forward_qk_strided(), and swiglu_forward().
Referenced by model_forward_prefill_impl().
| int model_model_allocate | ( | MODELModel * | model | ) |
Definition at line 88 of file v6.5/test_generated/int8_q4k_test.c.
References MagicHeader, MODEL_EMBED_DIM, MODEL_MAX_SEQ_LEN, MODEL_NUM_HEADS, MODEL_NUM_LAYERS, and MODEL_VOCAB_SIZE.
| void model_model_free | ( | MODELModel * | model | ) |
Definition at line 141 of file v6.5/test_generated/int8_q4k_test.c.
| void model_precompute_rope | ( | MODELModel * | model | ) |
Definition at line 186 of file v6.5/test_generated/int8_q4k_test.c.
References MODEL_HEAD_DIM, and MODEL_MAX_SEQ_LEN.
|
static |
Definition at line 43 of file v6.5/test_generated/int8_q4k_test.c.
Referenced by model_layer_0_decode(), model_layer_0_prefill(), model_layer_10_decode(), model_layer_10_prefill(), model_layer_11_decode(), model_layer_11_prefill(), model_layer_12_decode(), model_layer_12_prefill(), model_layer_13_decode(), model_layer_13_prefill(), model_layer_14_decode(), model_layer_14_prefill(), model_layer_15_decode(), model_layer_15_prefill(), model_layer_16_decode(), model_layer_16_prefill(), model_layer_17_decode(), model_layer_17_prefill(), model_layer_18_decode(), model_layer_18_prefill(), model_layer_19_decode(), model_layer_19_prefill(), model_layer_1_decode(), model_layer_1_prefill(), model_layer_20_decode(), model_layer_20_prefill(), model_layer_21_decode(), model_layer_21_prefill(), model_layer_22_decode(), model_layer_22_prefill(), model_layer_23_decode(), model_layer_23_prefill(), model_layer_2_decode(), model_layer_2_prefill(), model_layer_3_decode(), model_layer_3_prefill(), model_layer_4_decode(), model_layer_4_prefill(), model_layer_5_decode(), model_layer_5_prefill(), model_layer_6_decode(), model_layer_6_prefill(), model_layer_7_decode(), model_layer_7_prefill(), model_layer_8_decode(), model_layer_8_prefill(), model_layer_9_decode(), and model_layer_9_prefill().
| int model_verify_canaries | ( | MODELModel * | model | ) |
Definition at line 152 of file v6.5/test_generated/int8_q4k_test.c.
| MagicHeader |
Definition at line 80 of file v6.5/test_generated/int8_q4k_test.c.
Referenced by model_model_allocate().