AUTO-GENERATED: qwen2_0.5b_decode Implementation (IR v6 - Explicit Unrolled) More...
#include "ck-kernel-inference.h"#include "ckernel_engine.h"#include <stdio.h>#include <stdlib.h>#include <string.h>#include <stdint.h>#include <math.h>Go to the source code of this file.
Macros | |
| #define | _GNU_SOURCE /* For MAP_ANONYMOUS, MAP_HUGETLB */ |
Functions | |
| struct | __attribute__ ((packed)) |
| _Static_assert (sizeof(MagicHeader)==64, "MagicHeader must be 64 bytes") | |
| static int | qwen2_0_5b_decode_align_elems (int elems, int elem_bytes, int align_bytes) |
| void | qwen2_0_5b_decode_decode (QWEN2_0_5B_DECODEModel *model, const int *token, int token_index) |
| static void | qwen2_0_5b_decode_decode_token (QWEN2_0_5B_DECODEModel *model, const int *token, int token_index) |
| void | qwen2_0_5b_decode_forward (QWEN2_0_5B_DECODEModel *model, const int *tokens, int num_tokens) |
| static void | qwen2_0_5b_decode_forward_prefill_impl (QWEN2_0_5B_DECODEModel *model, const int *tokens, int num_tokens) |
| static void | qwen2_0_5b_decode_layer_0_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_0_prefill (QWEN2_0_5B_DECODEModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_10_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_10_prefill (QWEN2_0_5B_DECODEModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_11_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_11_prefill (QWEN2_0_5B_DECODEModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_12_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_12_prefill (QWEN2_0_5B_DECODEModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_13_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_13_prefill (QWEN2_0_5B_DECODEModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_14_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_14_prefill (QWEN2_0_5B_DECODEModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_15_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_15_prefill (QWEN2_0_5B_DECODEModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_16_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_16_prefill (QWEN2_0_5B_DECODEModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_17_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_17_prefill (QWEN2_0_5B_DECODEModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_18_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_18_prefill (QWEN2_0_5B_DECODEModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_19_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_19_prefill (QWEN2_0_5B_DECODEModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_1_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_1_prefill (QWEN2_0_5B_DECODEModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_20_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_20_prefill (QWEN2_0_5B_DECODEModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_21_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_21_prefill (QWEN2_0_5B_DECODEModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_22_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_22_prefill (QWEN2_0_5B_DECODEModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_23_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_23_prefill (QWEN2_0_5B_DECODEModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_2_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_2_prefill (QWEN2_0_5B_DECODEModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_3_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_3_prefill (QWEN2_0_5B_DECODEModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_4_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_4_prefill (QWEN2_0_5B_DECODEModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_5_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_5_prefill (QWEN2_0_5B_DECODEModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_6_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_6_prefill (QWEN2_0_5B_DECODEModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_7_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_7_prefill (QWEN2_0_5B_DECODEModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_8_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_8_prefill (QWEN2_0_5B_DECODEModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_9_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_9_prefill (QWEN2_0_5B_DECODEModel *model, int num_tokens, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| int | qwen2_0_5b_decode_model_allocate (QWEN2_0_5B_DECODEModel *model) |
| void | qwen2_0_5b_decode_model_free (QWEN2_0_5B_DECODEModel *model) |
| void | qwen2_0_5b_decode_precompute_rope (QWEN2_0_5B_DECODEModel *model) |
| static void | qwen2_0_5b_decode_residual_add_token_major (const float *a, const float *b, float *out, int tokens, int aligned_embed_dim) |
| int | qwen2_0_5b_decode_verify_canaries (QWEN2_0_5B_DECODEModel *model) |
Variables | |
| MagicHeader | |
AUTO-GENERATED: qwen2_0.5b_decode Implementation (IR v6 - Explicit Unrolled)
Generated: 2026-01-12T04:06:36.662558 UTC Total Memory: 3.57 GB Mode: decode Layers: 24 (fully unrolled)
Per-layer quant types: Layer 0: wq=q4_k wk=q4_k wv=q4_k wo=q4_k w1=q4_k w2=q4_k Layer 1: wq=q4_k wk=q4_k wv=q4_k wo=q4_k w1=q4_k w2=q4_k Layer 2: wq=q4_k wk=q4_k wv=q4_k wo=q4_k w1=q4_k w2=q4_k ... (21 more layers)
DO NOT EDIT - Regenerate with build_ir_v6.py or codegen_v6.py
Definition in file v6.6/test_generated/ck-kernel-inference.c.
| #define _GNU_SOURCE /* For MAP_ANONYMOUS, MAP_HUGETLB */ |
Definition at line 19 of file v6.6/test_generated/ck-kernel-inference.c.
| struct __attribute__ | ( | (packed) | ) |
Definition at line 43 of file v6.6/test_generated/ck-kernel-inference.c.
| _Static_assert | ( | sizeof(MagicHeader) | = =64, |
| "MagicHeader must be 64 bytes" | |||
| ) |
|
static |
Definition at line 176 of file v6.6/test_generated/ck-kernel-inference.c.
| void qwen2_0_5b_decode_decode | ( | QWEN2_0_5B_DECODEModel * | model, |
| const int * | token, | ||
| int | token_index | ||
| ) |
Definition at line 8022 of file v6.6/test_generated/ck-kernel-inference.c.
References qwen2_0_5b_decode_decode_token(), and token.
|
static |
Definition at line 7934 of file v6.6/test_generated/ck-kernel-inference.c.
References QWEN2_0_5B_DECODEHeaderOffsets::embedded_input, embedding_forward_q4_k(), QWEN2_0_5B_DECODEFooterOffsets::final_ln_weight, QWEN2_0_5B_DECODEFooterOffsets::final_output, gemm_nt_q4_k(), QWEN2_0_5B_DECODEFooterOffsets::lm_head_weight, QWEN2_0_5B_DECODEFooterOffsets::logits, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_FOOTER, QWEN2_0_5B_DECODE_HEADER, qwen2_0_5b_decode_layer_0_decode(), qwen2_0_5b_decode_layer_10_decode(), qwen2_0_5b_decode_layer_11_decode(), qwen2_0_5b_decode_layer_12_decode(), qwen2_0_5b_decode_layer_13_decode(), qwen2_0_5b_decode_layer_14_decode(), qwen2_0_5b_decode_layer_15_decode(), qwen2_0_5b_decode_layer_16_decode(), qwen2_0_5b_decode_layer_17_decode(), qwen2_0_5b_decode_layer_18_decode(), qwen2_0_5b_decode_layer_19_decode(), qwen2_0_5b_decode_layer_1_decode(), qwen2_0_5b_decode_layer_20_decode(), qwen2_0_5b_decode_layer_21_decode(), qwen2_0_5b_decode_layer_22_decode(), qwen2_0_5b_decode_layer_23_decode(), qwen2_0_5b_decode_layer_2_decode(), qwen2_0_5b_decode_layer_3_decode(), qwen2_0_5b_decode_layer_4_decode(), qwen2_0_5b_decode_layer_5_decode(), qwen2_0_5b_decode_layer_6_decode(), qwen2_0_5b_decode_layer_7_decode(), qwen2_0_5b_decode_layer_8_decode(), qwen2_0_5b_decode_layer_9_decode(), QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_PTR, QWEN2_0_5B_DECODE_VOCAB_SIZE, rmsnorm_forward(), token, and QWEN2_0_5B_DECODEHeaderOffsets::token_emb.
Referenced by qwen2_0_5b_decode_decode().
| void qwen2_0_5b_decode_forward | ( | QWEN2_0_5B_DECODEModel * | model, |
| const int * | tokens, | ||
| int | num_tokens | ||
| ) |
Definition at line 8013 of file v6.6/test_generated/ck-kernel-inference.c.
References qwen2_0_5b_decode_forward_prefill_impl().
|
static |
Definition at line 4076 of file v6.6/test_generated/ck-kernel-inference.c.
References CK_DT_Q8_K, ck_dtype_row_bytes(), QWEN2_0_5B_DECODEHeaderOffsets::embedded_input, embedding_forward_q4_k(), QWEN2_0_5B_DECODEFooterOffsets::final_ln_weight, QWEN2_0_5B_DECODEFooterOffsets::final_output, gemm_nt_q4_k_q8_k(), kv_cache_repack_head_major_inplace(), QWEN2_0_5B_DECODEFooterOffsets::lm_head_weight, QWEN2_0_5B_DECODEFooterOffsets::logits, quantize_row_q8_k(), QWEN2_0_5B_DECODE_DTYPE_BYTES, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_FOOTER, QWEN2_0_5B_DECODE_HEADER, qwen2_0_5b_decode_layer_0_prefill(), qwen2_0_5b_decode_layer_10_prefill(), qwen2_0_5b_decode_layer_11_prefill(), qwen2_0_5b_decode_layer_12_prefill(), qwen2_0_5b_decode_layer_13_prefill(), qwen2_0_5b_decode_layer_14_prefill(), qwen2_0_5b_decode_layer_15_prefill(), qwen2_0_5b_decode_layer_16_prefill(), qwen2_0_5b_decode_layer_17_prefill(), qwen2_0_5b_decode_layer_18_prefill(), qwen2_0_5b_decode_layer_19_prefill(), qwen2_0_5b_decode_layer_1_prefill(), qwen2_0_5b_decode_layer_20_prefill(), qwen2_0_5b_decode_layer_21_prefill(), qwen2_0_5b_decode_layer_22_prefill(), qwen2_0_5b_decode_layer_23_prefill(), qwen2_0_5b_decode_layer_2_prefill(), qwen2_0_5b_decode_layer_3_prefill(), qwen2_0_5b_decode_layer_4_prefill(), qwen2_0_5b_decode_layer_5_prefill(), qwen2_0_5b_decode_layer_6_prefill(), qwen2_0_5b_decode_layer_7_prefill(), qwen2_0_5b_decode_layer_8_prefill(), qwen2_0_5b_decode_layer_9_prefill(), QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_NUM_LAYERS, QWEN2_0_5B_DECODE_PTR, QWEN2_0_5B_DECODE_VOCAB_SIZE, rmsnorm_forward(), and QWEN2_0_5B_DECODEHeaderOffsets::token_emb.
Referenced by qwen2_0_5b_decode_forward().
|
static |
Definition at line 4645 of file v6.6/test_generated/ck-kernel-inference.c.
References attention_forward_decode_head_major_gqa_regular(), QWEN2_0_5B_DECODEHeaderOffsets::embedded_input, gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, kv_cache_write_head_major(), QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_HEADER, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 211 of file v6.6/test_generated/ck-kernel-inference.c.
References attention_forward_causal_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), QWEN2_0_5B_DECODEHeaderOffsets::embedded_input, gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODELayerOffsets::q, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_HEADER, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_forward_prefill_impl().
|
static |
Definition at line 6015 of file v6.6/test_generated/ck-kernel-inference.c.
References attention_forward_decode_head_major_gqa_regular(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, kv_cache_write_head_major(), QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 1821 of file v6.6/test_generated/ck-kernel-inference.c.
References attention_forward_causal_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODELayerOffsets::q, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_forward_prefill_impl().
|
static |
Definition at line 6152 of file v6.6/test_generated/ck-kernel-inference.c.
References attention_forward_decode_head_major_gqa_regular(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, kv_cache_write_head_major(), QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 1982 of file v6.6/test_generated/ck-kernel-inference.c.
References attention_forward_causal_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODELayerOffsets::q, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_forward_prefill_impl().
|
static |
Definition at line 6289 of file v6.6/test_generated/ck-kernel-inference.c.
References attention_forward_decode_head_major_gqa_regular(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, kv_cache_write_head_major(), QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 2143 of file v6.6/test_generated/ck-kernel-inference.c.
References attention_forward_causal_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODELayerOffsets::q, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_forward_prefill_impl().
|
static |
Definition at line 6426 of file v6.6/test_generated/ck-kernel-inference.c.
References attention_forward_decode_head_major_gqa_regular(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, kv_cache_write_head_major(), QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 2304 of file v6.6/test_generated/ck-kernel-inference.c.
References attention_forward_causal_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODELayerOffsets::q, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_forward_prefill_impl().
|
static |
Definition at line 6563 of file v6.6/test_generated/ck-kernel-inference.c.
References attention_forward_decode_head_major_gqa_regular(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, kv_cache_write_head_major(), QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 2465 of file v6.6/test_generated/ck-kernel-inference.c.
References attention_forward_causal_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODELayerOffsets::q, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_forward_prefill_impl().
|
static |
Definition at line 6700 of file v6.6/test_generated/ck-kernel-inference.c.
References attention_forward_decode_head_major_gqa_regular(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, kv_cache_write_head_major(), QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 2626 of file v6.6/test_generated/ck-kernel-inference.c.
References attention_forward_causal_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODELayerOffsets::q, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_forward_prefill_impl().
|
static |
Definition at line 6837 of file v6.6/test_generated/ck-kernel-inference.c.
References attention_forward_decode_head_major_gqa_regular(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, kv_cache_write_head_major(), QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 2787 of file v6.6/test_generated/ck-kernel-inference.c.
References attention_forward_causal_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODELayerOffsets::q, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_forward_prefill_impl().
|
static |
Definition at line 6974 of file v6.6/test_generated/ck-kernel-inference.c.
References attention_forward_decode_head_major_gqa_regular(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, kv_cache_write_head_major(), QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 2948 of file v6.6/test_generated/ck-kernel-inference.c.
References attention_forward_causal_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODELayerOffsets::q, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_forward_prefill_impl().
|
static |
Definition at line 7111 of file v6.6/test_generated/ck-kernel-inference.c.
References attention_forward_decode_head_major_gqa_regular(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, kv_cache_write_head_major(), QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 3109 of file v6.6/test_generated/ck-kernel-inference.c.
References attention_forward_causal_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODELayerOffsets::q, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_forward_prefill_impl().
|
static |
Definition at line 7248 of file v6.6/test_generated/ck-kernel-inference.c.
References attention_forward_decode_head_major_gqa_regular(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, kv_cache_write_head_major(), QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 3270 of file v6.6/test_generated/ck-kernel-inference.c.
References attention_forward_causal_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODELayerOffsets::q, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_forward_prefill_impl().
|
static |
Definition at line 4782 of file v6.6/test_generated/ck-kernel-inference.c.
References attention_forward_decode_head_major_gqa_regular(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, kv_cache_write_head_major(), QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 372 of file v6.6/test_generated/ck-kernel-inference.c.
References attention_forward_causal_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODELayerOffsets::q, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_forward_prefill_impl().
|
static |
Definition at line 7385 of file v6.6/test_generated/ck-kernel-inference.c.
References attention_forward_decode_head_major_gqa_regular(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, kv_cache_write_head_major(), QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 3431 of file v6.6/test_generated/ck-kernel-inference.c.
References attention_forward_causal_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODELayerOffsets::q, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_forward_prefill_impl().
|
static |
Definition at line 7522 of file v6.6/test_generated/ck-kernel-inference.c.
References attention_forward_decode_head_major_gqa_regular(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, kv_cache_write_head_major(), QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 3592 of file v6.6/test_generated/ck-kernel-inference.c.
References attention_forward_causal_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODELayerOffsets::q, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_forward_prefill_impl().
|
static |
Definition at line 7659 of file v6.6/test_generated/ck-kernel-inference.c.
References attention_forward_decode_head_major_gqa_regular(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, kv_cache_write_head_major(), QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 3753 of file v6.6/test_generated/ck-kernel-inference.c.
References attention_forward_causal_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODELayerOffsets::q, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_forward_prefill_impl().
|
static |
Definition at line 7796 of file v6.6/test_generated/ck-kernel-inference.c.
References attention_forward_decode_head_major_gqa_regular(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, kv_cache_write_head_major(), QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 3914 of file v6.6/test_generated/ck-kernel-inference.c.
References attention_forward_causal_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODELayerOffsets::q, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_forward_prefill_impl().
|
static |
Definition at line 4919 of file v6.6/test_generated/ck-kernel-inference.c.
References attention_forward_decode_head_major_gqa_regular(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, kv_cache_write_head_major(), QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 533 of file v6.6/test_generated/ck-kernel-inference.c.
References attention_forward_causal_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODELayerOffsets::q, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_forward_prefill_impl().
|
static |
Definition at line 5056 of file v6.6/test_generated/ck-kernel-inference.c.
References attention_forward_decode_head_major_gqa_regular(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, kv_cache_write_head_major(), QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 694 of file v6.6/test_generated/ck-kernel-inference.c.
References attention_forward_causal_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODELayerOffsets::q, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_forward_prefill_impl().
|
static |
Definition at line 5193 of file v6.6/test_generated/ck-kernel-inference.c.
References attention_forward_decode_head_major_gqa_regular(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, kv_cache_write_head_major(), QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 855 of file v6.6/test_generated/ck-kernel-inference.c.
References attention_forward_causal_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODELayerOffsets::q, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_forward_prefill_impl().
|
static |
Definition at line 5330 of file v6.6/test_generated/ck-kernel-inference.c.
References attention_forward_decode_head_major_gqa_regular(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, kv_cache_write_head_major(), QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 1016 of file v6.6/test_generated/ck-kernel-inference.c.
References attention_forward_causal_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODELayerOffsets::q, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_forward_prefill_impl().
|
static |
Definition at line 5467 of file v6.6/test_generated/ck-kernel-inference.c.
References attention_forward_decode_head_major_gqa_regular(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, kv_cache_write_head_major(), QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 1177 of file v6.6/test_generated/ck-kernel-inference.c.
References attention_forward_causal_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODELayerOffsets::q, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_forward_prefill_impl().
|
static |
Definition at line 5604 of file v6.6/test_generated/ck-kernel-inference.c.
References attention_forward_decode_head_major_gqa_regular(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, kv_cache_write_head_major(), QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 1338 of file v6.6/test_generated/ck-kernel-inference.c.
References attention_forward_causal_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODELayerOffsets::q, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_forward_prefill_impl().
|
static |
Definition at line 5741 of file v6.6/test_generated/ck-kernel-inference.c.
References attention_forward_decode_head_major_gqa_regular(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, kv_cache_write_head_major(), QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 1499 of file v6.6/test_generated/ck-kernel-inference.c.
References attention_forward_causal_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODELayerOffsets::q, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_forward_prefill_impl().
|
static |
Definition at line 5878 of file v6.6/test_generated/ck-kernel-inference.c.
References attention_forward_decode_head_major_gqa_regular(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, kv_cache_write_head_major(), QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 1660 of file v6.6/test_generated/ck-kernel-inference.c.
References attention_forward_causal_head_major_gqa_flash(), CK_DT_Q4_K, ck_dtype_row_bytes(), gemm_nt_q4_k(), QWEN2_0_5B_DECODELayerOffsets::k, QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODELayerOffsets::q, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_forward_prefill_impl().
| int qwen2_0_5b_decode_model_allocate | ( | QWEN2_0_5B_DECODEModel * | model | ) |
Definition at line 88 of file v6.6/test_generated/ck-kernel-inference.c.
References QWEN2_0_5B_DECODEModel::base, MagicHeader, QWEN2_0_5B_DECODECanary::offset, QWEN2_0_5B_DECODE_ACTIVATION_BYTES, QWEN2_0_5B_DECODE_CANARIES, QWEN2_0_5B_DECODE_CANARY_COUNT, QWEN2_0_5B_DECODE_CANARY_SIZE, QWEN2_0_5B_DECODE_CANARY_VALUE, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_MAGIC, QWEN2_0_5B_DECODE_MAX_SEQ_LEN, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_LAYERS, QWEN2_0_5B_DECODE_TOTAL_BYTES, QWEN2_0_5B_DECODE_VOCAB_SIZE, QWEN2_0_5B_DECODE_WEIGHT_BYTES, and QWEN2_0_5B_DECODEModel::total_bytes.
| void qwen2_0_5b_decode_model_free | ( | QWEN2_0_5B_DECODEModel * | model | ) |
Definition at line 141 of file v6.6/test_generated/ck-kernel-inference.c.
References QWEN2_0_5B_DECODEModel::base, and QWEN2_0_5B_DECODEModel::total_bytes.
| void qwen2_0_5b_decode_precompute_rope | ( | QWEN2_0_5B_DECODEModel * | model | ) |
|
static |
Definition at line 43 of file v6.6/test_generated/ck-kernel-inference.c.
Referenced by qwen2_0_5b_decode_layer_0_decode(), qwen2_0_5b_decode_layer_0_prefill(), qwen2_0_5b_decode_layer_10_decode(), qwen2_0_5b_decode_layer_10_prefill(), qwen2_0_5b_decode_layer_11_decode(), qwen2_0_5b_decode_layer_11_prefill(), qwen2_0_5b_decode_layer_12_decode(), qwen2_0_5b_decode_layer_12_prefill(), qwen2_0_5b_decode_layer_13_decode(), qwen2_0_5b_decode_layer_13_prefill(), qwen2_0_5b_decode_layer_14_decode(), qwen2_0_5b_decode_layer_14_prefill(), qwen2_0_5b_decode_layer_15_decode(), qwen2_0_5b_decode_layer_15_prefill(), qwen2_0_5b_decode_layer_16_decode(), qwen2_0_5b_decode_layer_16_prefill(), qwen2_0_5b_decode_layer_17_decode(), qwen2_0_5b_decode_layer_17_prefill(), qwen2_0_5b_decode_layer_18_decode(), qwen2_0_5b_decode_layer_18_prefill(), qwen2_0_5b_decode_layer_19_decode(), qwen2_0_5b_decode_layer_19_prefill(), qwen2_0_5b_decode_layer_1_decode(), qwen2_0_5b_decode_layer_1_prefill(), qwen2_0_5b_decode_layer_20_decode(), qwen2_0_5b_decode_layer_20_prefill(), qwen2_0_5b_decode_layer_21_decode(), qwen2_0_5b_decode_layer_21_prefill(), qwen2_0_5b_decode_layer_22_decode(), qwen2_0_5b_decode_layer_22_prefill(), qwen2_0_5b_decode_layer_23_decode(), qwen2_0_5b_decode_layer_23_prefill(), qwen2_0_5b_decode_layer_2_decode(), qwen2_0_5b_decode_layer_2_prefill(), qwen2_0_5b_decode_layer_3_decode(), qwen2_0_5b_decode_layer_3_prefill(), qwen2_0_5b_decode_layer_4_decode(), qwen2_0_5b_decode_layer_4_prefill(), qwen2_0_5b_decode_layer_5_decode(), qwen2_0_5b_decode_layer_5_prefill(), qwen2_0_5b_decode_layer_6_decode(), qwen2_0_5b_decode_layer_6_prefill(), qwen2_0_5b_decode_layer_7_decode(), qwen2_0_5b_decode_layer_7_prefill(), qwen2_0_5b_decode_layer_8_decode(), qwen2_0_5b_decode_layer_8_prefill(), qwen2_0_5b_decode_layer_9_decode(), and qwen2_0_5b_decode_layer_9_prefill().
| int qwen2_0_5b_decode_verify_canaries | ( | QWEN2_0_5B_DECODEModel * | model | ) |
Definition at line 152 of file v6.6/test_generated/ck-kernel-inference.c.
References QWEN2_0_5B_DECODEModel::base, QWEN2_0_5B_DECODECanary::offset, QWEN2_0_5B_DECODE_CANARIES, QWEN2_0_5B_DECODE_CANARY_COUNT, and QWEN2_0_5B_DECODE_CANARY_VALUE.
| MagicHeader |
Definition at line 80 of file v6.6/test_generated/ck-kernel-inference.c.
Referenced by qwen2_0_5b_decode_model_allocate().