AUTO-GENERATED: qwen2_0.5b_decode Implementation (IR v6.5 - Explicit Unrolled) More...
#include "ck-kernel-inference.h"#include "ckernel_engine.h"#include <stdio.h>#include <stdlib.h>#include <string.h>#include <stdint.h>#include <math.h>#include "ck_model_api.h"Go to the source code of this file.
Macros | |
| #define | _GNU_SOURCE /* For MAP_ANONYMOUS, MAP_HUGETLB */ |
Functions | |
| struct | __attribute__ ((packed)) |
| _Static_assert (sizeof(MagicHeader)==64, "MagicHeader must be 64 bytes") | |
| void * | ck_model_create (void) |
| void | ck_model_decode (void *model, const int *token, int token_index) |
| void | ck_model_forward (void *model, const int *tokens, int num_tokens) |
| void | ck_model_free (void *model) |
| void * | ck_model_get_base (void *model) |
| const CKModelConfig * | ck_model_get_config (void) |
| float * | ck_model_get_logits (void *model) |
| size_t | ck_model_get_total_bytes (void *model) |
| void | ck_model_precompute_rope (void *model) |
| int | ck_model_verify_canaries (void *model) |
| static int | qwen2_0_5b_decode_align_elems (int elems, int elem_bytes, int align_bytes) |
| void | qwen2_0_5b_decode_decode (QWEN2_0_5B_DECODEModel *model, const int *token, int token_index) |
| static void | qwen2_0_5b_decode_decode_token (QWEN2_0_5B_DECODEModel *model, const int *token, int token_index) |
| void | qwen2_0_5b_decode_forward (QWEN2_0_5B_DECODEModel *model, const int *tokens, int num_tokens) |
| static void | qwen2_0_5b_decode_forward_prefill_impl (QWEN2_0_5B_DECODEModel *model, const int *tokens, int num_tokens) |
| static void | qwen2_0_5b_decode_layer_0_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_0_prefill (QWEN2_0_5B_DECODEModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_10_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_10_prefill (QWEN2_0_5B_DECODEModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_11_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_11_prefill (QWEN2_0_5B_DECODEModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_12_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_12_prefill (QWEN2_0_5B_DECODEModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_13_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_13_prefill (QWEN2_0_5B_DECODEModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_14_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_14_prefill (QWEN2_0_5B_DECODEModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_15_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_15_prefill (QWEN2_0_5B_DECODEModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_16_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_16_prefill (QWEN2_0_5B_DECODEModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_17_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_17_prefill (QWEN2_0_5B_DECODEModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_18_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_18_prefill (QWEN2_0_5B_DECODEModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_19_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_19_prefill (QWEN2_0_5B_DECODEModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_1_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_1_prefill (QWEN2_0_5B_DECODEModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_20_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_20_prefill (QWEN2_0_5B_DECODEModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_21_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_21_prefill (QWEN2_0_5B_DECODEModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_22_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_22_prefill (QWEN2_0_5B_DECODEModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_23_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_23_prefill (QWEN2_0_5B_DECODEModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_2_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_2_prefill (QWEN2_0_5B_DECODEModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_3_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_3_prefill (QWEN2_0_5B_DECODEModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_4_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_4_prefill (QWEN2_0_5B_DECODEModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_5_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_5_prefill (QWEN2_0_5B_DECODEModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_6_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_6_prefill (QWEN2_0_5B_DECODEModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_7_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_7_prefill (QWEN2_0_5B_DECODEModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_8_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_8_prefill (QWEN2_0_5B_DECODEModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_9_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_9_prefill (QWEN2_0_5B_DECODEModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| int | qwen2_0_5b_decode_model_allocate (QWEN2_0_5B_DECODEModel *model) |
| void | qwen2_0_5b_decode_model_free (QWEN2_0_5B_DECODEModel *model) |
| void | qwen2_0_5b_decode_precompute_rope (QWEN2_0_5B_DECODEModel *model) |
| static void | qwen2_0_5b_decode_residual_add_token_major (const float *a, const float *b, float *out, int tokens, int aligned_embed_dim) |
| int | qwen2_0_5b_decode_verify_canaries (QWEN2_0_5B_DECODEModel *model) |
Variables | |
| static CKModelConfig | g_model_config |
| MagicHeader | |
AUTO-GENERATED: qwen2_0.5b_decode Implementation (IR v6.5 - Explicit Unrolled)
Generated: 2026-01-12T11:58:55.212793 UTC Total Memory: 3.57 GB Mode: decode Layers: 24 (fully unrolled)
Per-layer quant types: Layer 0: wq=q4_k wk=q4_k wv=q4_k wo=q4_k w1=q4_k w2=q4_k Layer 1: wq=q4_k wk=q4_k wv=q4_k wo=q4_k w1=q4_k w2=q4_k Layer 2: wq=q4_k wk=q4_k wv=q4_k wo=q4_k w1=q4_k w2=q4_k ... (21 more layers)
DO NOT EDIT - Regenerate with build_ir_v6.5.py or codegen_v6.5.py
Definition in file v6.5/test_generated/qwen2_int8.c.
| #define _GNU_SOURCE /* For MAP_ANONYMOUS, MAP_HUGETLB */ |
Definition at line 19 of file v6.5/test_generated/qwen2_int8.c.
| struct __attribute__ | ( | (packed) | ) |
Definition at line 43 of file v6.5/test_generated/qwen2_int8.c.
| _Static_assert | ( | sizeof(MagicHeader) | = =64, |
| "MagicHeader must be 64 bytes" | |||
| ) |
| void* ck_model_create | ( | void | ) |
Create and allocate model memory. Returns opaque model pointer, or NULL on failure.
Definition at line 8873 of file v6.5/test_generated/qwen2_int8.c.
References qwen2_0_5b_decode_model_allocate().
| void ck_model_decode | ( | void * | model, |
| const int * | token, | ||
| int | token_index | ||
| ) |
Decode single token at position token_index. Used for autoregressive generation.
Definition at line 8897 of file v6.5/test_generated/qwen2_int8.c.
References qwen2_0_5b_decode_decode(), and token.
| void ck_model_forward | ( | void * | model, |
| const int * | tokens, | ||
| int | num_tokens | ||
| ) |
Forward pass (prefill) - process multiple tokens. Used for initial prompt processing.
Definition at line 8893 of file v6.5/test_generated/qwen2_int8.c.
References qwen2_0_5b_decode_forward().
| void ck_model_free | ( | void * | model | ) |
Free model memory.
Definition at line 8883 of file v6.5/test_generated/qwen2_int8.c.
References qwen2_0_5b_decode_model_free().
| void* ck_model_get_base | ( | void * | model | ) |
Get model base pointer (for weight loading).
Definition at line 8910 of file v6.5/test_generated/qwen2_int8.c.
| const CKModelConfig* ck_model_get_config | ( | void | ) |
Get model configuration (dimensions, sizes, etc.) This is available before allocation.
Definition at line 8869 of file v6.5/test_generated/qwen2_int8.c.
References g_model_config.
| float* ck_model_get_logits | ( | void * | model | ) |
Get pointer to output logits buffer. Size is vocab_size floats.
Definition at line 8901 of file v6.5/test_generated/qwen2_int8.c.
References QWEN2_0_5B_DECODEFooterOffsets::logits, QWEN2_0_5B_DECODE_FOOTER, and QWEN2_0_5B_DECODE_PTR.
| size_t ck_model_get_total_bytes | ( | void * | model | ) |
Get total model size in bytes.
Definition at line 8914 of file v6.5/test_generated/qwen2_int8.c.
| void ck_model_precompute_rope | ( | void * | model | ) |
Precompute RoPE cos/sin caches. Call once after allocation, before inference.
Definition at line 8889 of file v6.5/test_generated/qwen2_int8.c.
References qwen2_0_5b_decode_precompute_rope().
| int ck_model_verify_canaries | ( | void * | model | ) |
Verify memory canaries (debug). Returns number of corrupted canaries (0 = OK).
Definition at line 8906 of file v6.5/test_generated/qwen2_int8.c.
References qwen2_0_5b_decode_verify_canaries().
|
static |
Definition at line 176 of file v6.5/test_generated/qwen2_int8.c.
| void qwen2_0_5b_decode_decode | ( | QWEN2_0_5B_DECODEModel * | model, |
| const int * | token, | ||
| int | token_index | ||
| ) |
Definition at line 8841 of file v6.5/test_generated/qwen2_int8.c.
References qwen2_0_5b_decode_decode_token(), and token.
Referenced by ck_model_decode().
|
static |
Definition at line 8750 of file v6.5/test_generated/qwen2_int8.c.
References QWEN2_0_5B_DECODEHeaderOffsets::embedded_input, embedding_forward_q4_k(), QWEN2_0_5B_DECODEFooterOffsets::final_ln_weight, QWEN2_0_5B_DECODEFooterOffsets::final_output, gemv_q4_k_q8_k(), QWEN2_0_5B_DECODEFooterOffsets::lm_head_weight, QWEN2_0_5B_DECODEFooterOffsets::logits, quantize_row_q8_k(), QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_FOOTER, QWEN2_0_5B_DECODE_HEADER, qwen2_0_5b_decode_layer_0_decode(), qwen2_0_5b_decode_layer_10_decode(), qwen2_0_5b_decode_layer_11_decode(), qwen2_0_5b_decode_layer_12_decode(), qwen2_0_5b_decode_layer_13_decode(), qwen2_0_5b_decode_layer_14_decode(), qwen2_0_5b_decode_layer_15_decode(), qwen2_0_5b_decode_layer_16_decode(), qwen2_0_5b_decode_layer_17_decode(), qwen2_0_5b_decode_layer_18_decode(), qwen2_0_5b_decode_layer_19_decode(), qwen2_0_5b_decode_layer_1_decode(), qwen2_0_5b_decode_layer_20_decode(), qwen2_0_5b_decode_layer_21_decode(), qwen2_0_5b_decode_layer_22_decode(), qwen2_0_5b_decode_layer_23_decode(), qwen2_0_5b_decode_layer_2_decode(), qwen2_0_5b_decode_layer_3_decode(), qwen2_0_5b_decode_layer_4_decode(), qwen2_0_5b_decode_layer_5_decode(), qwen2_0_5b_decode_layer_6_decode(), qwen2_0_5b_decode_layer_7_decode(), qwen2_0_5b_decode_layer_8_decode(), qwen2_0_5b_decode_layer_9_decode(), QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_PTR, QWEN2_0_5B_DECODE_VOCAB_SIZE, rmsnorm_forward(), token, and QWEN2_0_5B_DECODEHeaderOffsets::token_emb.
Referenced by qwen2_0_5b_decode_decode().
| void qwen2_0_5b_decode_forward | ( | QWEN2_0_5B_DECODEModel * | model, |
| const int * | tokens, | ||
| int | num_tokens | ||
| ) |
Definition at line 8832 of file v6.5/test_generated/qwen2_int8.c.
References qwen2_0_5b_decode_forward_prefill_impl().
Referenced by ck_model_forward().
|
static |
Definition at line 4148 of file v6.5/test_generated/qwen2_int8.c.
References CK_DT_Q8_K, ck_dtype_row_bytes(), QWEN2_0_5B_DECODEHeaderOffsets::embedded_input, embedding_forward_q4_k(), QWEN2_0_5B_DECODEFooterOffsets::final_ln_weight, QWEN2_0_5B_DECODEFooterOffsets::final_output, gemm_nt_q4_k_q8_k(), QWEN2_0_5B_DECODEFooterOffsets::lm_head_weight, QWEN2_0_5B_DECODEFooterOffsets::logits, quantize_row_q8_k(), QWEN2_0_5B_DECODE_DTYPE_BYTES, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_FOOTER, QWEN2_0_5B_DECODE_HEADER, qwen2_0_5b_decode_layer_0_prefill(), qwen2_0_5b_decode_layer_10_prefill(), qwen2_0_5b_decode_layer_11_prefill(), qwen2_0_5b_decode_layer_12_prefill(), qwen2_0_5b_decode_layer_13_prefill(), qwen2_0_5b_decode_layer_14_prefill(), qwen2_0_5b_decode_layer_15_prefill(), qwen2_0_5b_decode_layer_16_prefill(), qwen2_0_5b_decode_layer_17_prefill(), qwen2_0_5b_decode_layer_18_prefill(), qwen2_0_5b_decode_layer_19_prefill(), qwen2_0_5b_decode_layer_1_prefill(), qwen2_0_5b_decode_layer_20_prefill(), qwen2_0_5b_decode_layer_21_prefill(), qwen2_0_5b_decode_layer_22_prefill(), qwen2_0_5b_decode_layer_23_prefill(), qwen2_0_5b_decode_layer_2_prefill(), qwen2_0_5b_decode_layer_3_prefill(), qwen2_0_5b_decode_layer_4_prefill(), qwen2_0_5b_decode_layer_5_prefill(), qwen2_0_5b_decode_layer_6_prefill(), qwen2_0_5b_decode_layer_7_prefill(), qwen2_0_5b_decode_layer_8_prefill(), qwen2_0_5b_decode_layer_9_prefill(), QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_LAYERS, QWEN2_0_5B_DECODE_PTR, QWEN2_0_5B_DECODE_VOCAB_SIZE, rmsnorm_forward(), and QWEN2_0_5B_DECODEHeaderOffsets::token_emb.
Referenced by qwen2_0_5b_decode_forward().
|
static |
Definition at line 4405 of file v6.5/test_generated/qwen2_int8.c.
References attention_forward_decode_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), QWEN2_0_5B_DECODEHeaderOffsets::embedded_input, gemv_q4_k_q8_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, quantize_row_q8_k(), QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_HEADER, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 211 of file v6.5/test_generated/qwen2_int8.c.
References attention_forward_causal_head_major_gqa_flash_strided(), CK_DT_Q4_K, ck_dtype_row_bytes(), QWEN2_0_5B_DECODEHeaderOffsets::embedded_input, gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODELayerOffsets::q, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_HEADER, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk_strided(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_forward_prefill_impl().
|
static |
Definition at line 6215 of file v6.5/test_generated/qwen2_int8.c.
References attention_forward_decode_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemv_q4_k_q8_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, quantize_row_q8_k(), QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 1851 of file v6.5/test_generated/qwen2_int8.c.
References attention_forward_causal_head_major_gqa_flash_strided(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODELayerOffsets::q, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk_strided(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_forward_prefill_impl().
|
static |
Definition at line 6396 of file v6.5/test_generated/qwen2_int8.c.
References attention_forward_decode_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemv_q4_k_q8_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, quantize_row_q8_k(), QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 2015 of file v6.5/test_generated/qwen2_int8.c.
References attention_forward_causal_head_major_gqa_flash_strided(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODELayerOffsets::q, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk_strided(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_forward_prefill_impl().
|
static |
Definition at line 6577 of file v6.5/test_generated/qwen2_int8.c.
References attention_forward_decode_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemv_q4_k_q8_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, quantize_row_q8_k(), QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 2179 of file v6.5/test_generated/qwen2_int8.c.
References attention_forward_causal_head_major_gqa_flash_strided(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODELayerOffsets::q, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk_strided(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_forward_prefill_impl().
|
static |
Definition at line 6758 of file v6.5/test_generated/qwen2_int8.c.
References attention_forward_decode_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemv_q4_k_q8_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, quantize_row_q8_k(), QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 2343 of file v6.5/test_generated/qwen2_int8.c.
References attention_forward_causal_head_major_gqa_flash_strided(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODELayerOffsets::q, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk_strided(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_forward_prefill_impl().
|
static |
Definition at line 6939 of file v6.5/test_generated/qwen2_int8.c.
References attention_forward_decode_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemv_q4_k_q8_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, quantize_row_q8_k(), QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 2507 of file v6.5/test_generated/qwen2_int8.c.
References attention_forward_causal_head_major_gqa_flash_strided(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODELayerOffsets::q, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk_strided(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_forward_prefill_impl().
|
static |
Definition at line 7120 of file v6.5/test_generated/qwen2_int8.c.
References attention_forward_decode_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemv_q4_k_q8_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, quantize_row_q8_k(), QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 2671 of file v6.5/test_generated/qwen2_int8.c.
References attention_forward_causal_head_major_gqa_flash_strided(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODELayerOffsets::q, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk_strided(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_forward_prefill_impl().
|
static |
Definition at line 7301 of file v6.5/test_generated/qwen2_int8.c.
References attention_forward_decode_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemv_q4_k_q8_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, quantize_row_q8_k(), QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 2835 of file v6.5/test_generated/qwen2_int8.c.
References attention_forward_causal_head_major_gqa_flash_strided(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODELayerOffsets::q, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk_strided(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_forward_prefill_impl().
|
static |
Definition at line 7482 of file v6.5/test_generated/qwen2_int8.c.
References attention_forward_decode_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemv_q4_k_q8_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, quantize_row_q8_k(), QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 2999 of file v6.5/test_generated/qwen2_int8.c.
References attention_forward_causal_head_major_gqa_flash_strided(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODELayerOffsets::q, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk_strided(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_forward_prefill_impl().
|
static |
Definition at line 7663 of file v6.5/test_generated/qwen2_int8.c.
References attention_forward_decode_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemv_q4_k_q8_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, quantize_row_q8_k(), QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 3163 of file v6.5/test_generated/qwen2_int8.c.
References attention_forward_causal_head_major_gqa_flash_strided(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODELayerOffsets::q, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk_strided(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_forward_prefill_impl().
|
static |
Definition at line 7844 of file v6.5/test_generated/qwen2_int8.c.
References attention_forward_decode_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemv_q4_k_q8_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, quantize_row_q8_k(), QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 3327 of file v6.5/test_generated/qwen2_int8.c.
References attention_forward_causal_head_major_gqa_flash_strided(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODELayerOffsets::q, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk_strided(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_forward_prefill_impl().
|
static |
Definition at line 4586 of file v6.5/test_generated/qwen2_int8.c.
References attention_forward_decode_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemv_q4_k_q8_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, quantize_row_q8_k(), QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 375 of file v6.5/test_generated/qwen2_int8.c.
References attention_forward_causal_head_major_gqa_flash_strided(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODELayerOffsets::q, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk_strided(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_forward_prefill_impl().
|
static |
Definition at line 8025 of file v6.5/test_generated/qwen2_int8.c.
References attention_forward_decode_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemv_q4_k_q8_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, quantize_row_q8_k(), QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 3491 of file v6.5/test_generated/qwen2_int8.c.
References attention_forward_causal_head_major_gqa_flash_strided(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODELayerOffsets::q, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk_strided(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_forward_prefill_impl().
|
static |
Definition at line 8206 of file v6.5/test_generated/qwen2_int8.c.
References attention_forward_decode_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemv_q4_k_q8_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, quantize_row_q8_k(), QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 3655 of file v6.5/test_generated/qwen2_int8.c.
References attention_forward_causal_head_major_gqa_flash_strided(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODELayerOffsets::q, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk_strided(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_forward_prefill_impl().
|
static |
Definition at line 8387 of file v6.5/test_generated/qwen2_int8.c.
References attention_forward_decode_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemv_q4_k_q8_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, quantize_row_q8_k(), QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 3819 of file v6.5/test_generated/qwen2_int8.c.
References attention_forward_causal_head_major_gqa_flash_strided(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODELayerOffsets::q, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk_strided(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_forward_prefill_impl().
|
static |
Definition at line 8568 of file v6.5/test_generated/qwen2_int8.c.
References attention_forward_decode_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemv_q4_k_q8_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, quantize_row_q8_k(), QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 3983 of file v6.5/test_generated/qwen2_int8.c.
References attention_forward_causal_head_major_gqa_flash_strided(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODELayerOffsets::q, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk_strided(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_forward_prefill_impl().
|
static |
Definition at line 4767 of file v6.5/test_generated/qwen2_int8.c.
References attention_forward_decode_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemv_q4_k_q8_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, quantize_row_q8_k(), QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 539 of file v6.5/test_generated/qwen2_int8.c.
References attention_forward_causal_head_major_gqa_flash_strided(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODELayerOffsets::q, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk_strided(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_forward_prefill_impl().
|
static |
Definition at line 4948 of file v6.5/test_generated/qwen2_int8.c.
References attention_forward_decode_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemv_q4_k_q8_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, quantize_row_q8_k(), QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 703 of file v6.5/test_generated/qwen2_int8.c.
References attention_forward_causal_head_major_gqa_flash_strided(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODELayerOffsets::q, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk_strided(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_forward_prefill_impl().
|
static |
Definition at line 5129 of file v6.5/test_generated/qwen2_int8.c.
References attention_forward_decode_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemv_q4_k_q8_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, quantize_row_q8_k(), QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 867 of file v6.5/test_generated/qwen2_int8.c.
References attention_forward_causal_head_major_gqa_flash_strided(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODELayerOffsets::q, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk_strided(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_forward_prefill_impl().
|
static |
Definition at line 5310 of file v6.5/test_generated/qwen2_int8.c.
References attention_forward_decode_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemv_q4_k_q8_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, quantize_row_q8_k(), QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 1031 of file v6.5/test_generated/qwen2_int8.c.
References attention_forward_causal_head_major_gqa_flash_strided(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODELayerOffsets::q, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk_strided(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_forward_prefill_impl().
|
static |
Definition at line 5491 of file v6.5/test_generated/qwen2_int8.c.
References attention_forward_decode_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemv_q4_k_q8_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, quantize_row_q8_k(), QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 1195 of file v6.5/test_generated/qwen2_int8.c.
References attention_forward_causal_head_major_gqa_flash_strided(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODELayerOffsets::q, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk_strided(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_forward_prefill_impl().
|
static |
Definition at line 5672 of file v6.5/test_generated/qwen2_int8.c.
References attention_forward_decode_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemv_q4_k_q8_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, quantize_row_q8_k(), QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 1359 of file v6.5/test_generated/qwen2_int8.c.
References attention_forward_causal_head_major_gqa_flash_strided(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODELayerOffsets::q, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk_strided(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_forward_prefill_impl().
|
static |
Definition at line 5853 of file v6.5/test_generated/qwen2_int8.c.
References attention_forward_decode_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemv_q4_k_q8_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, quantize_row_q8_k(), QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 1523 of file v6.5/test_generated/qwen2_int8.c.
References attention_forward_causal_head_major_gqa_flash_strided(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODELayerOffsets::q, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk_strided(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_forward_prefill_impl().
|
static |
Definition at line 6034 of file v6.5/test_generated/qwen2_int8.c.
References attention_forward_decode_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemv_q4_k_q8_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, quantize_row_q8_k(), QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 1687 of file v6.5/test_generated/qwen2_int8.c.
References attention_forward_causal_head_major_gqa_flash_strided(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODELayerOffsets::q, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk_strided(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_forward_prefill_impl().
| int qwen2_0_5b_decode_model_allocate | ( | QWEN2_0_5B_DECODEModel * | model | ) |
Definition at line 88 of file v6.5/test_generated/qwen2_int8.c.
References QWEN2_0_5B_DECODEModel::base, MagicHeader, QWEN2_0_5B_DECODECanary::offset, QWEN2_0_5B_DECODE_ACTIVATION_BYTES, QWEN2_0_5B_DECODE_CANARIES, QWEN2_0_5B_DECODE_CANARY_COUNT, QWEN2_0_5B_DECODE_CANARY_SIZE, QWEN2_0_5B_DECODE_CANARY_VALUE, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_MAGIC, QWEN2_0_5B_DECODE_MAX_SEQ_LEN, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_LAYERS, QWEN2_0_5B_DECODE_TOTAL_BYTES, QWEN2_0_5B_DECODE_VOCAB_SIZE, QWEN2_0_5B_DECODE_WEIGHT_BYTES, and QWEN2_0_5B_DECODEModel::total_bytes.
Referenced by ck_model_create().
| void qwen2_0_5b_decode_model_free | ( | QWEN2_0_5B_DECODEModel * | model | ) |
Definition at line 141 of file v6.5/test_generated/qwen2_int8.c.
References QWEN2_0_5B_DECODEModel::base, and QWEN2_0_5B_DECODEModel::total_bytes.
Referenced by ck_model_free().
| void qwen2_0_5b_decode_precompute_rope | ( | QWEN2_0_5B_DECODEModel * | model | ) |
Definition at line 186 of file v6.5/test_generated/qwen2_int8.c.
References QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_MAX_SEQ_LEN, QWEN2_0_5B_DECODE_PTR, QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, and QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache.
Referenced by ck_model_precompute_rope().
|
static |
Definition at line 43 of file v6.5/test_generated/qwen2_int8.c.
Referenced by qwen2_0_5b_decode_layer_0_decode(), qwen2_0_5b_decode_layer_0_prefill(), qwen2_0_5b_decode_layer_10_decode(), qwen2_0_5b_decode_layer_10_prefill(), qwen2_0_5b_decode_layer_11_decode(), qwen2_0_5b_decode_layer_11_prefill(), qwen2_0_5b_decode_layer_12_decode(), qwen2_0_5b_decode_layer_12_prefill(), qwen2_0_5b_decode_layer_13_decode(), qwen2_0_5b_decode_layer_13_prefill(), qwen2_0_5b_decode_layer_14_decode(), qwen2_0_5b_decode_layer_14_prefill(), qwen2_0_5b_decode_layer_15_decode(), qwen2_0_5b_decode_layer_15_prefill(), qwen2_0_5b_decode_layer_16_decode(), qwen2_0_5b_decode_layer_16_prefill(), qwen2_0_5b_decode_layer_17_decode(), qwen2_0_5b_decode_layer_17_prefill(), qwen2_0_5b_decode_layer_18_decode(), qwen2_0_5b_decode_layer_18_prefill(), qwen2_0_5b_decode_layer_19_decode(), qwen2_0_5b_decode_layer_19_prefill(), qwen2_0_5b_decode_layer_1_decode(), qwen2_0_5b_decode_layer_1_prefill(), qwen2_0_5b_decode_layer_20_decode(), qwen2_0_5b_decode_layer_20_prefill(), qwen2_0_5b_decode_layer_21_decode(), qwen2_0_5b_decode_layer_21_prefill(), qwen2_0_5b_decode_layer_22_decode(), qwen2_0_5b_decode_layer_22_prefill(), qwen2_0_5b_decode_layer_23_decode(), qwen2_0_5b_decode_layer_23_prefill(), qwen2_0_5b_decode_layer_2_decode(), qwen2_0_5b_decode_layer_2_prefill(), qwen2_0_5b_decode_layer_3_decode(), qwen2_0_5b_decode_layer_3_prefill(), qwen2_0_5b_decode_layer_4_decode(), qwen2_0_5b_decode_layer_4_prefill(), qwen2_0_5b_decode_layer_5_decode(), qwen2_0_5b_decode_layer_5_prefill(), qwen2_0_5b_decode_layer_6_decode(), qwen2_0_5b_decode_layer_6_prefill(), qwen2_0_5b_decode_layer_7_decode(), qwen2_0_5b_decode_layer_7_prefill(), qwen2_0_5b_decode_layer_8_decode(), qwen2_0_5b_decode_layer_8_prefill(), qwen2_0_5b_decode_layer_9_decode(), and qwen2_0_5b_decode_layer_9_prefill().
| int qwen2_0_5b_decode_verify_canaries | ( | QWEN2_0_5B_DECODEModel * | model | ) |
Definition at line 152 of file v6.5/test_generated/qwen2_int8.c.
References QWEN2_0_5B_DECODEModel::base, QWEN2_0_5B_DECODECanary::offset, QWEN2_0_5B_DECODE_CANARIES, QWEN2_0_5B_DECODE_CANARY_COUNT, and QWEN2_0_5B_DECODE_CANARY_VALUE.
Referenced by ck_model_verify_canaries().
|
static |
Definition at line 8853 of file v6.5/test_generated/qwen2_int8.c.
Referenced by ck_model_get_config().
| MagicHeader |
Definition at line 80 of file v6.5/test_generated/qwen2_int8.c.
Referenced by qwen2_0_5b_decode_model_allocate().