AUTO-GENERATED: qwen2_0.5b_decode Implementation (IR v6 - Explicit Unrolled) More...
#include "ck-kernel-inference.h"#include "ckernel_engine.h"#include <stdio.h>#include <stdlib.h>#include <string.h>#include <stdint.h>#include <math.h>Go to the source code of this file.
Macros | |
| #define | _GNU_SOURCE /* For MAP_ANONYMOUS, MAP_HUGETLB */ |
Functions | |
| struct | __attribute__ ((packed)) |
| _Static_assert (sizeof(MagicHeader)==64, "MagicHeader must be 64 bytes") | |
| static int | qwen2_0_5b_decode_align_elems (int elems, int elem_bytes, int align_bytes) |
| void | qwen2_0_5b_decode_decode (QWEN2_0_5B_DECODEModel *model, const int *token, int token_index) |
| static void | qwen2_0_5b_decode_decode_token (QWEN2_0_5B_DECODEModel *model, const int *token, int token_index) |
| void | qwen2_0_5b_decode_forward (QWEN2_0_5B_DECODEModel *model, const int *tokens, int num_tokens) |
| static void | qwen2_0_5b_decode_forward_prefill_impl (QWEN2_0_5B_DECODEModel *model, const int *tokens, int num_tokens) |
| static void | qwen2_0_5b_decode_layer_0_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_10_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_11_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_12_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_13_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_14_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_15_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_16_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_17_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_18_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_19_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_1_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_20_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_21_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_22_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_23_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_2_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_3_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_4_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_5_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_6_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_7_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_8_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| static void | qwen2_0_5b_decode_layer_9_decode (QWEN2_0_5B_DECODEModel *model, int token_index, int aligned_embed_dim, int aligned_head_dim, int aligned_intermediate_dim, int aligned_context_window) |
| int | qwen2_0_5b_decode_model_allocate (QWEN2_0_5B_DECODEModel *model) |
| void | qwen2_0_5b_decode_model_free (QWEN2_0_5B_DECODEModel *model) |
| void | qwen2_0_5b_decode_precompute_rope (QWEN2_0_5B_DECODEModel *model) |
| static void | qwen2_0_5b_decode_residual_add_token_major (const float *a, const float *b, float *out, int tokens, int aligned_embed_dim) |
| int | qwen2_0_5b_decode_verify_canaries (QWEN2_0_5B_DECODEModel *model) |
Variables | |
| MagicHeader | |
AUTO-GENERATED: qwen2_0.5b_decode Implementation (IR v6 - Explicit Unrolled)
Generated: 2026-01-15T19:23:13.600148 UTC Total Memory: 3.65 GB Mode: decode Layers: 24 (fully unrolled)
Per-layer quant types: Layer 0: wq=q5_0 wk=q5_0 wv=q8_0 wo=q5_0 w1=q5_0 w2=q6_k Layer 1: wq=q5_0 wk=q5_0 wv=q8_0 wo=q5_0 w1=q5_0 w2=q6_k Layer 2: wq=q5_0 wk=q5_0 wv=q5_0 wo=q5_0 w1=q5_0 w2=q4_k ... (21 more layers)
═══════════════════════════════════════════════════════════════════════════ MANIFEST VALIDATION (from weights_manifest.json) ═══════════════════════════════════════════════════════════════════════════
| Layer | WQ | WK | WV | WO | W1 | W2 | BQ | BK | BV | BO |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | q5_0 | q5_0 | q8_0 | q5_0 | q5_0 | q6_k | ✓ | ✓ | ✓ | ○ |
| 1 | q5_0 | q5_0 | q8_0 | q5_0 | q5_0 | q6_k | ✓ | ✓ | ✓ | ○ |
| 2 | q5_0 | q5_0 | q5_0 | q5_0 | q5_0 | q4_k | ✓ | ✓ | ✓ | ○ |
| 3 | q5_0 | q5_0 | q8_0 | q5_0 | q5_0 | q6_k | ✓ | ✓ | ✓ | ○ |
| 4 | q5_0 | q5_0 | q5_0 | q5_0 | q5_0 | q4_k | ✓ | ✓ | ✓ | ○ |
| 5 | q5_0 | q5_0 | q5_0 | q5_0 | q5_0 | q4_k | ✓ | ✓ | ✓ | ○ |
| 6 | q5_0 | q5_0 | q8_0 | q5_0 | q5_0 | q6_k | ✓ | ✓ | ✓ | ○ |
| 7 | q5_0 | q5_0 | q8_0 | q5_0 | q5_0 | q6_k | ✓ | ✓ | ✓ | ○ |
| 8 | q5_0 | q5_0 | q8_0 | q5_0 | q5_0 | q6_k | ✓ | ✓ | ✓ | ○ |
| 9 | q5_0 | q5_0 | q8_0 | q5_0 | q5_0 | q6_k | ✓ | ✓ | ✓ | ○ |
| 10 | q5_0 | q5_0 | q8_0 | q5_0 | q5_0 | q6_k | ✓ | ✓ | ✓ | ○ |
| 11 | q5_0 | q5_0 | q5_0 | q5_0 | q5_0 | q4_k | ✓ | ✓ | ✓ | ○ |
| 12 | q5_0 | q5_0 | q5_0 | q5_0 | q5_0 | q4_k | ✓ | ✓ | ✓ | ○ |
| 13 | q5_0 | q5_0 | q8_0 | q5_0 | q5_0 | q6_k | ✓ | ✓ | ✓ | ○ |
| 14 | q5_0 | q5_0 | q5_0 | q5_0 | q5_0 | q4_k | ✓ | ✓ | ✓ | ○ |
| 15 | q5_0 | q5_0 | q5_0 | q5_0 | q5_0 | q4_k | ✓ | ✓ | ✓ | ○ |
| 16 | q5_0 | q5_0 | q8_0 | q5_0 | q5_0 | q6_k | ✓ | ✓ | ✓ | ○ |
| 17 | q5_0 | q5_0 | q5_0 | q5_0 | q5_0 | q4_k | ✓ | ✓ | ✓ | ○ |
| 18 | q5_0 | q5_0 | q5_0 | q5_0 | q5_0 | q4_k | ✓ | ✓ | ✓ | ○ |
| 19 | q5_0 | q5_0 | q8_0 | q5_0 | q5_0 | q6_k | ✓ | ✓ | ✓ | ○ |
| 20 | q5_0 | q5_0 | q5_0 | q5_0 | q5_0 | q4_k | ✓ | ✓ | ✓ | ○ |
| 21 | q5_0 | q5_0 | q8_0 | q5_0 | q5_0 | q6_k | ✓ | ✓ | ✓ | ○ |
| 22 | q5_0 | q5_0 | q5_0 | q5_0 | q5_0 | q4_k | ✓ | ✓ | ✓ | ○ |
| 23 | q5_0 | q5_0 | q5_0 | q5_0 | q5_0 | q4_k | ✓ | ✓ | ✓ | ○ |
Total manifest entries: 269 Attention biases: PRESENT (Qwen2-style) ═══════════════════════════════════════════════════════════════════════════
DO NOT EDIT - Regenerate with build_ir_v6.py or codegen_v6.py
Definition in file v6.6/generated/ck-kernel-inference.c.
| #define _GNU_SOURCE /* For MAP_ANONYMOUS, MAP_HUGETLB */ |
Definition at line 55 of file v6.6/generated/ck-kernel-inference.c.
| struct __attribute__ | ( | (packed) | ) |
Definition at line 79 of file v6.6/generated/ck-kernel-inference.c.
| _Static_assert | ( | sizeof(MagicHeader) | = =64, |
| "MagicHeader must be 64 bytes" | |||
| ) |
|
static |
Definition at line 212 of file v6.6/generated/ck-kernel-inference.c.
| void qwen2_0_5b_decode_decode | ( | QWEN2_0_5B_DECODEModel * | model, |
| const int * | token, | ||
| int | token_index | ||
| ) |
Definition at line 4311 of file v6.6/generated/ck-kernel-inference.c.
References qwen2_0_5b_decode_decode_token(), and token.
|
static |
Definition at line 4223 of file v6.6/generated/ck-kernel-inference.c.
References QWEN2_0_5B_DECODEHeaderOffsets::embedded_input, embedding_forward_q8_0(), QWEN2_0_5B_DECODEFooterOffsets::final_ln_weight, QWEN2_0_5B_DECODEFooterOffsets::final_output, gemm_nt_q8_0(), QWEN2_0_5B_DECODEFooterOffsets::lm_head_weight, QWEN2_0_5B_DECODEFooterOffsets::logits, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_FOOTER, QWEN2_0_5B_DECODE_HEADER, qwen2_0_5b_decode_layer_0_decode(), qwen2_0_5b_decode_layer_10_decode(), qwen2_0_5b_decode_layer_11_decode(), qwen2_0_5b_decode_layer_12_decode(), qwen2_0_5b_decode_layer_13_decode(), qwen2_0_5b_decode_layer_14_decode(), qwen2_0_5b_decode_layer_15_decode(), qwen2_0_5b_decode_layer_16_decode(), qwen2_0_5b_decode_layer_17_decode(), qwen2_0_5b_decode_layer_18_decode(), qwen2_0_5b_decode_layer_19_decode(), qwen2_0_5b_decode_layer_1_decode(), qwen2_0_5b_decode_layer_20_decode(), qwen2_0_5b_decode_layer_21_decode(), qwen2_0_5b_decode_layer_22_decode(), qwen2_0_5b_decode_layer_23_decode(), qwen2_0_5b_decode_layer_2_decode(), qwen2_0_5b_decode_layer_3_decode(), qwen2_0_5b_decode_layer_4_decode(), qwen2_0_5b_decode_layer_5_decode(), qwen2_0_5b_decode_layer_6_decode(), qwen2_0_5b_decode_layer_7_decode(), qwen2_0_5b_decode_layer_8_decode(), qwen2_0_5b_decode_layer_9_decode(), QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_PTR, QWEN2_0_5B_DECODE_VOCAB_SIZE, rmsnorm_forward(), token, and QWEN2_0_5B_DECODEHeaderOffsets::token_emb.
Referenced by qwen2_0_5b_decode_decode().
| void qwen2_0_5b_decode_forward | ( | QWEN2_0_5B_DECODEModel * | model, |
| const int * | tokens, | ||
| int | num_tokens | ||
| ) |
Definition at line 4302 of file v6.6/generated/ck-kernel-inference.c.
References qwen2_0_5b_decode_forward_prefill_impl().
|
static |
Definition at line 248 of file v6.6/generated/ck-kernel-inference.c.
References QWEN2_0_5B_DECODEHeaderOffsets::embedded_input, embedding_forward_q8_0(), QWEN2_0_5B_DECODEFooterOffsets::final_ln_weight, QWEN2_0_5B_DECODEFooterOffsets::final_output, gemm_nt_q8_0(), kv_cache_repack_head_major_inplace(), QWEN2_0_5B_DECODEFooterOffsets::lm_head_weight, QWEN2_0_5B_DECODEFooterOffsets::logits, QWEN2_0_5B_DECODE_DTYPE_BYTES, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_FOOTER, QWEN2_0_5B_DECODE_HEADER, qwen2_0_5b_decode_layer_0_prefill(), qwen2_0_5b_decode_layer_10_prefill(), qwen2_0_5b_decode_layer_11_prefill(), qwen2_0_5b_decode_layer_12_prefill(), qwen2_0_5b_decode_layer_13_prefill(), qwen2_0_5b_decode_layer_14_prefill(), qwen2_0_5b_decode_layer_15_prefill(), qwen2_0_5b_decode_layer_16_prefill(), qwen2_0_5b_decode_layer_17_prefill(), qwen2_0_5b_decode_layer_18_prefill(), qwen2_0_5b_decode_layer_19_prefill(), qwen2_0_5b_decode_layer_1_prefill(), qwen2_0_5b_decode_layer_20_prefill(), qwen2_0_5b_decode_layer_21_prefill(), qwen2_0_5b_decode_layer_22_prefill(), qwen2_0_5b_decode_layer_23_prefill(), qwen2_0_5b_decode_layer_2_prefill(), qwen2_0_5b_decode_layer_3_prefill(), qwen2_0_5b_decode_layer_4_prefill(), qwen2_0_5b_decode_layer_5_prefill(), qwen2_0_5b_decode_layer_6_prefill(), qwen2_0_5b_decode_layer_7_prefill(), qwen2_0_5b_decode_layer_8_prefill(), qwen2_0_5b_decode_layer_9_prefill(), QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_NUM_LAYERS, QWEN2_0_5B_DECODE_PTR, QWEN2_0_5B_DECODE_VOCAB_SIZE, rmsnorm_forward(), and QWEN2_0_5B_DECODEHeaderOffsets::token_emb.
Referenced by qwen2_0_5b_decode_forward().
|
static |
Definition at line 814 of file v6.6/generated/ck-kernel-inference.c.
References attention_forward_decode_head_major_gqa_regular(), QWEN2_0_5B_DECODELayerOffsets::bk, QWEN2_0_5B_DECODELayerOffsets::bq, QWEN2_0_5B_DECODELayerOffsets::bv, QWEN2_0_5B_DECODEHeaderOffsets::embedded_input, gemm_nt_q5_0(), gemm_nt_q6_k(), gemm_nt_q8_0(), QWEN2_0_5B_DECODELayerOffsets::k, kv_cache_write_head_major(), QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_HEADER, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 2234 of file v6.6/generated/ck-kernel-inference.c.
References attention_forward_decode_head_major_gqa_regular(), QWEN2_0_5B_DECODELayerOffsets::bk, QWEN2_0_5B_DECODELayerOffsets::bq, QWEN2_0_5B_DECODELayerOffsets::bv, gemm_nt_q5_0(), gemm_nt_q6_k(), gemm_nt_q8_0(), QWEN2_0_5B_DECODELayerOffsets::k, kv_cache_write_head_major(), QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 2376 of file v6.6/generated/ck-kernel-inference.c.
References attention_forward_decode_head_major_gqa_regular(), QWEN2_0_5B_DECODELayerOffsets::bk, QWEN2_0_5B_DECODELayerOffsets::bq, QWEN2_0_5B_DECODELayerOffsets::bv, gemm_nt_q4_k(), gemm_nt_q5_0(), QWEN2_0_5B_DECODELayerOffsets::k, kv_cache_write_head_major(), QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 2518 of file v6.6/generated/ck-kernel-inference.c.
References attention_forward_decode_head_major_gqa_regular(), QWEN2_0_5B_DECODELayerOffsets::bk, QWEN2_0_5B_DECODELayerOffsets::bq, QWEN2_0_5B_DECODELayerOffsets::bv, gemm_nt_q4_k(), gemm_nt_q5_0(), QWEN2_0_5B_DECODELayerOffsets::k, kv_cache_write_head_major(), QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 2660 of file v6.6/generated/ck-kernel-inference.c.
References attention_forward_decode_head_major_gqa_regular(), QWEN2_0_5B_DECODELayerOffsets::bk, QWEN2_0_5B_DECODELayerOffsets::bq, QWEN2_0_5B_DECODELayerOffsets::bv, gemm_nt_q5_0(), gemm_nt_q6_k(), gemm_nt_q8_0(), QWEN2_0_5B_DECODELayerOffsets::k, kv_cache_write_head_major(), QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 2802 of file v6.6/generated/ck-kernel-inference.c.
References attention_forward_decode_head_major_gqa_regular(), QWEN2_0_5B_DECODELayerOffsets::bk, QWEN2_0_5B_DECODELayerOffsets::bq, QWEN2_0_5B_DECODELayerOffsets::bv, gemm_nt_q4_k(), gemm_nt_q5_0(), QWEN2_0_5B_DECODELayerOffsets::k, kv_cache_write_head_major(), QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 2944 of file v6.6/generated/ck-kernel-inference.c.
References attention_forward_decode_head_major_gqa_regular(), QWEN2_0_5B_DECODELayerOffsets::bk, QWEN2_0_5B_DECODELayerOffsets::bq, QWEN2_0_5B_DECODELayerOffsets::bv, gemm_nt_q4_k(), gemm_nt_q5_0(), QWEN2_0_5B_DECODELayerOffsets::k, kv_cache_write_head_major(), QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 3086 of file v6.6/generated/ck-kernel-inference.c.
References attention_forward_decode_head_major_gqa_regular(), QWEN2_0_5B_DECODELayerOffsets::bk, QWEN2_0_5B_DECODELayerOffsets::bq, QWEN2_0_5B_DECODELayerOffsets::bv, gemm_nt_q5_0(), gemm_nt_q6_k(), gemm_nt_q8_0(), QWEN2_0_5B_DECODELayerOffsets::k, kv_cache_write_head_major(), QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 3228 of file v6.6/generated/ck-kernel-inference.c.
References attention_forward_decode_head_major_gqa_regular(), QWEN2_0_5B_DECODELayerOffsets::bk, QWEN2_0_5B_DECODELayerOffsets::bq, QWEN2_0_5B_DECODELayerOffsets::bv, gemm_nt_q4_k(), gemm_nt_q5_0(), QWEN2_0_5B_DECODELayerOffsets::k, kv_cache_write_head_major(), QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 3370 of file v6.6/generated/ck-kernel-inference.c.
References attention_forward_decode_head_major_gqa_regular(), QWEN2_0_5B_DECODELayerOffsets::bk, QWEN2_0_5B_DECODELayerOffsets::bq, QWEN2_0_5B_DECODELayerOffsets::bv, gemm_nt_q4_k(), gemm_nt_q5_0(), QWEN2_0_5B_DECODELayerOffsets::k, kv_cache_write_head_major(), QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 3512 of file v6.6/generated/ck-kernel-inference.c.
References attention_forward_decode_head_major_gqa_regular(), QWEN2_0_5B_DECODELayerOffsets::bk, QWEN2_0_5B_DECODELayerOffsets::bq, QWEN2_0_5B_DECODELayerOffsets::bv, gemm_nt_q5_0(), gemm_nt_q6_k(), gemm_nt_q8_0(), QWEN2_0_5B_DECODELayerOffsets::k, kv_cache_write_head_major(), QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 956 of file v6.6/generated/ck-kernel-inference.c.
References attention_forward_decode_head_major_gqa_regular(), QWEN2_0_5B_DECODELayerOffsets::bk, QWEN2_0_5B_DECODELayerOffsets::bq, QWEN2_0_5B_DECODELayerOffsets::bv, gemm_nt_q5_0(), gemm_nt_q6_k(), gemm_nt_q8_0(), QWEN2_0_5B_DECODELayerOffsets::k, kv_cache_write_head_major(), QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 3654 of file v6.6/generated/ck-kernel-inference.c.
References attention_forward_decode_head_major_gqa_regular(), QWEN2_0_5B_DECODELayerOffsets::bk, QWEN2_0_5B_DECODELayerOffsets::bq, QWEN2_0_5B_DECODELayerOffsets::bv, gemm_nt_q4_k(), gemm_nt_q5_0(), QWEN2_0_5B_DECODELayerOffsets::k, kv_cache_write_head_major(), QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 3796 of file v6.6/generated/ck-kernel-inference.c.
References attention_forward_decode_head_major_gqa_regular(), QWEN2_0_5B_DECODELayerOffsets::bk, QWEN2_0_5B_DECODELayerOffsets::bq, QWEN2_0_5B_DECODELayerOffsets::bv, gemm_nt_q5_0(), gemm_nt_q6_k(), gemm_nt_q8_0(), QWEN2_0_5B_DECODELayerOffsets::k, kv_cache_write_head_major(), QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 3938 of file v6.6/generated/ck-kernel-inference.c.
References attention_forward_decode_head_major_gqa_regular(), QWEN2_0_5B_DECODELayerOffsets::bk, QWEN2_0_5B_DECODELayerOffsets::bq, QWEN2_0_5B_DECODELayerOffsets::bv, gemm_nt_q4_k(), gemm_nt_q5_0(), QWEN2_0_5B_DECODELayerOffsets::k, kv_cache_write_head_major(), QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 4080 of file v6.6/generated/ck-kernel-inference.c.
References attention_forward_decode_head_major_gqa_regular(), QWEN2_0_5B_DECODELayerOffsets::bk, QWEN2_0_5B_DECODELayerOffsets::bq, QWEN2_0_5B_DECODELayerOffsets::bv, gemm_nt_q4_k(), gemm_nt_q5_0(), QWEN2_0_5B_DECODELayerOffsets::k, kv_cache_write_head_major(), QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 1098 of file v6.6/generated/ck-kernel-inference.c.
References attention_forward_decode_head_major_gqa_regular(), QWEN2_0_5B_DECODELayerOffsets::bk, QWEN2_0_5B_DECODELayerOffsets::bq, QWEN2_0_5B_DECODELayerOffsets::bv, gemm_nt_q4_k(), gemm_nt_q5_0(), QWEN2_0_5B_DECODELayerOffsets::k, kv_cache_write_head_major(), QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 1240 of file v6.6/generated/ck-kernel-inference.c.
References attention_forward_decode_head_major_gqa_regular(), QWEN2_0_5B_DECODELayerOffsets::bk, QWEN2_0_5B_DECODELayerOffsets::bq, QWEN2_0_5B_DECODELayerOffsets::bv, gemm_nt_q5_0(), gemm_nt_q6_k(), gemm_nt_q8_0(), QWEN2_0_5B_DECODELayerOffsets::k, kv_cache_write_head_major(), QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 1382 of file v6.6/generated/ck-kernel-inference.c.
References attention_forward_decode_head_major_gqa_regular(), QWEN2_0_5B_DECODELayerOffsets::bk, QWEN2_0_5B_DECODELayerOffsets::bq, QWEN2_0_5B_DECODELayerOffsets::bv, gemm_nt_q4_k(), gemm_nt_q5_0(), QWEN2_0_5B_DECODELayerOffsets::k, kv_cache_write_head_major(), QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 1524 of file v6.6/generated/ck-kernel-inference.c.
References attention_forward_decode_head_major_gqa_regular(), QWEN2_0_5B_DECODELayerOffsets::bk, QWEN2_0_5B_DECODELayerOffsets::bq, QWEN2_0_5B_DECODELayerOffsets::bv, gemm_nt_q4_k(), gemm_nt_q5_0(), QWEN2_0_5B_DECODELayerOffsets::k, kv_cache_write_head_major(), QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 1666 of file v6.6/generated/ck-kernel-inference.c.
References attention_forward_decode_head_major_gqa_regular(), QWEN2_0_5B_DECODELayerOffsets::bk, QWEN2_0_5B_DECODELayerOffsets::bq, QWEN2_0_5B_DECODELayerOffsets::bv, gemm_nt_q5_0(), gemm_nt_q6_k(), gemm_nt_q8_0(), QWEN2_0_5B_DECODELayerOffsets::k, kv_cache_write_head_major(), QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 1808 of file v6.6/generated/ck-kernel-inference.c.
References attention_forward_decode_head_major_gqa_regular(), QWEN2_0_5B_DECODELayerOffsets::bk, QWEN2_0_5B_DECODELayerOffsets::bq, QWEN2_0_5B_DECODELayerOffsets::bv, gemm_nt_q5_0(), gemm_nt_q6_k(), gemm_nt_q8_0(), QWEN2_0_5B_DECODELayerOffsets::k, kv_cache_write_head_major(), QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 1950 of file v6.6/generated/ck-kernel-inference.c.
References attention_forward_decode_head_major_gqa_regular(), QWEN2_0_5B_DECODELayerOffsets::bk, QWEN2_0_5B_DECODELayerOffsets::bq, QWEN2_0_5B_DECODELayerOffsets::bv, gemm_nt_q5_0(), gemm_nt_q6_k(), gemm_nt_q8_0(), QWEN2_0_5B_DECODELayerOffsets::k, kv_cache_write_head_major(), QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
|
static |
Definition at line 2092 of file v6.6/generated/ck-kernel-inference.c.
References attention_forward_decode_head_major_gqa_regular(), QWEN2_0_5B_DECODELayerOffsets::bk, QWEN2_0_5B_DECODELayerOffsets::bq, QWEN2_0_5B_DECODELayerOffsets::bv, gemm_nt_q5_0(), gemm_nt_q6_k(), gemm_nt_q8_0(), QWEN2_0_5B_DECODELayerOffsets::k, kv_cache_write_head_major(), QWEN2_0_5B_DECODELayerOffsets::ln1_gamma, QWEN2_0_5B_DECODELayerOffsets::ln1_out, QWEN2_0_5B_DECODELayerOffsets::ln2_gamma, QWEN2_0_5B_DECODELayerOffsets::ln2_out, QWEN2_0_5B_DECODELayerOffsets::mlp_out, QWEN2_0_5B_DECODELayerOffsets::output, QWEN2_0_5B_DECODELayerOffsets::proj_scratch, QWEN2_0_5B_DECODELayerOffsets::proj_tmp, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_GLOBALS, QWEN2_0_5B_DECODE_HEAD_DIM, QWEN2_0_5B_DECODE_LAYERS, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_KV_HEADS, QWEN2_0_5B_DECODE_PTR, qwen2_0_5b_decode_residual_add_token_major(), QWEN2_0_5B_DECODELayerOffsets::residual1, rmsnorm_forward(), QWEN2_0_5B_DECODEGlobalOffsets::rope_cos_cache, rope_forward_qk(), QWEN2_0_5B_DECODEGlobalOffsets::rope_sin_cache, swiglu_forward(), QWEN2_0_5B_DECODELayerOffsets::v, QWEN2_0_5B_DECODELayerOffsets::w1, QWEN2_0_5B_DECODELayerOffsets::w2, QWEN2_0_5B_DECODELayerOffsets::wk, QWEN2_0_5B_DECODELayerOffsets::wo, QWEN2_0_5B_DECODELayerOffsets::wq, and QWEN2_0_5B_DECODELayerOffsets::wv.
Referenced by qwen2_0_5b_decode_decode_token().
| int qwen2_0_5b_decode_model_allocate | ( | QWEN2_0_5B_DECODEModel * | model | ) |
Definition at line 124 of file v6.6/generated/ck-kernel-inference.c.
References QWEN2_0_5B_DECODEModel::base, MagicHeader, QWEN2_0_5B_DECODECanary::offset, QWEN2_0_5B_DECODE_ACTIVATION_BYTES, QWEN2_0_5B_DECODE_CANARIES, QWEN2_0_5B_DECODE_CANARY_COUNT, QWEN2_0_5B_DECODE_CANARY_SIZE, QWEN2_0_5B_DECODE_CANARY_VALUE, QWEN2_0_5B_DECODE_EMBED_DIM, QWEN2_0_5B_DECODE_MAGIC, QWEN2_0_5B_DECODE_MAX_SEQ_LEN, QWEN2_0_5B_DECODE_NUM_HEADS, QWEN2_0_5B_DECODE_NUM_LAYERS, QWEN2_0_5B_DECODE_TOTAL_BYTES, QWEN2_0_5B_DECODE_VOCAB_SIZE, QWEN2_0_5B_DECODE_WEIGHT_BYTES, and QWEN2_0_5B_DECODEModel::total_bytes.
| void qwen2_0_5b_decode_model_free | ( | QWEN2_0_5B_DECODEModel * | model | ) |
Definition at line 177 of file v6.6/generated/ck-kernel-inference.c.
References QWEN2_0_5B_DECODEModel::base, and QWEN2_0_5B_DECODEModel::total_bytes.
| void qwen2_0_5b_decode_precompute_rope | ( | QWEN2_0_5B_DECODEModel * | model | ) |
|
static |
Definition at line 79 of file v6.6/generated/ck-kernel-inference.c.
Referenced by qwen2_0_5b_decode_layer_0_decode(), qwen2_0_5b_decode_layer_10_decode(), qwen2_0_5b_decode_layer_11_decode(), qwen2_0_5b_decode_layer_12_decode(), qwen2_0_5b_decode_layer_13_decode(), qwen2_0_5b_decode_layer_14_decode(), qwen2_0_5b_decode_layer_15_decode(), qwen2_0_5b_decode_layer_16_decode(), qwen2_0_5b_decode_layer_17_decode(), qwen2_0_5b_decode_layer_18_decode(), qwen2_0_5b_decode_layer_19_decode(), qwen2_0_5b_decode_layer_1_decode(), qwen2_0_5b_decode_layer_20_decode(), qwen2_0_5b_decode_layer_21_decode(), qwen2_0_5b_decode_layer_22_decode(), qwen2_0_5b_decode_layer_23_decode(), qwen2_0_5b_decode_layer_2_decode(), qwen2_0_5b_decode_layer_3_decode(), qwen2_0_5b_decode_layer_4_decode(), qwen2_0_5b_decode_layer_5_decode(), qwen2_0_5b_decode_layer_6_decode(), qwen2_0_5b_decode_layer_7_decode(), qwen2_0_5b_decode_layer_8_decode(), and qwen2_0_5b_decode_layer_9_decode().
| int qwen2_0_5b_decode_verify_canaries | ( | QWEN2_0_5B_DECODEModel * | model | ) |
Definition at line 188 of file v6.6/generated/ck-kernel-inference.c.
References QWEN2_0_5B_DECODEModel::base, QWEN2_0_5B_DECODECanary::offset, QWEN2_0_5B_DECODE_CANARIES, QWEN2_0_5B_DECODE_CANARY_COUNT, and QWEN2_0_5B_DECODE_CANARY_VALUE.
| MagicHeader |
Definition at line 116 of file v6.6/generated/ck-kernel-inference.c.
Referenced by qwen2_0_5b_decode_model_allocate().