← Back to C-Kernel-Engine Docs Doxygen Source Documentation
ckernel_kernel_specs.c File Reference

Go to the source code of this file.

Variables

static const CKPlanBinding ck_decoder_backward_bindings_0 []
 
static const CKPlanBinding ck_decoder_backward_bindings_1 []
 
static const CKPlanBinding ck_decoder_backward_bindings_10 []
 
static const CKPlanBinding ck_decoder_backward_bindings_2 []
 
static const CKPlanBinding ck_decoder_backward_bindings_3 []
 
static const CKPlanBinding ck_decoder_backward_bindings_4 []
 
static const CKPlanBinding ck_decoder_backward_bindings_5 []
 
static const CKPlanBinding ck_decoder_backward_bindings_6 []
 
static const CKPlanBinding ck_decoder_backward_bindings_7 []
 
static const CKPlanBinding ck_decoder_backward_bindings_8 []
 
static const CKPlanBinding ck_decoder_backward_bindings_9 []
 
const CKPlanStep ck_decoder_backward_plan []
 
const size_t ck_decoder_backward_plan_count = sizeof(ck_decoder_backward_plan) / sizeof(ck_decoder_backward_plan[0])
 
const CKPlanStepV2 ck_decoder_backward_plan_v2 []
 
const size_t ck_decoder_backward_plan_v2_count = sizeof(ck_decoder_backward_plan_v2) / sizeof(ck_decoder_backward_plan_v2[0])
 
const size_t ck_decoder_buffer_count = sizeof(ck_decoder_buffers) / sizeof(ck_decoder_buffers[0])
 
const CKBufferSpec ck_decoder_buffers []
 
static const CKPlanBinding ck_decoder_forward_bindings_0 []
 
static const CKPlanBinding ck_decoder_forward_bindings_1 []
 
static const CKPlanBinding ck_decoder_forward_bindings_10 []
 
static const CKPlanBinding ck_decoder_forward_bindings_2 []
 
static const CKPlanBinding ck_decoder_forward_bindings_3 []
 
static const CKPlanBinding ck_decoder_forward_bindings_4 []
 
static const CKPlanBinding ck_decoder_forward_bindings_5 []
 
static const CKPlanBinding ck_decoder_forward_bindings_6 []
 
static const CKPlanBinding ck_decoder_forward_bindings_7 []
 
static const CKPlanBinding ck_decoder_forward_bindings_8 []
 
static const CKPlanBinding ck_decoder_forward_bindings_9 []
 
const CKPlanStep ck_decoder_forward_plan []
 
const size_t ck_decoder_forward_plan_count = sizeof(ck_decoder_forward_plan) / sizeof(ck_decoder_forward_plan[0])
 
const CKPlanStepV2 ck_decoder_forward_plan_v2 []
 
const size_t ck_decoder_forward_plan_v2_count = sizeof(ck_decoder_forward_plan_v2) / sizeof(ck_decoder_forward_plan_v2[0])
 
const size_t ck_kernel_spec_count = sizeof(ck_kernel_specs) / sizeof(ck_kernel_specs[0])
 
const CKKernelSpec ck_kernel_specs []
 

Variable Documentation

◆ ck_decoder_backward_bindings_0

const CKPlanBinding ck_decoder_backward_bindings_0[]
static
Initial value:
= {
{"d_out", "d_output"},
{"d_a", "d_residual1"},
{"d_b", "d_mlp_out"},
}

Definition at line 212 of file ckernel_kernel_specs.c.

◆ ck_decoder_backward_bindings_1

const CKPlanBinding ck_decoder_backward_bindings_1[]
static
Initial value:
= {
{"d_out", "d_mlp_out"},
{"swiglu_out", "swiglu_out"},
{"w2", "w2"},
{"d_input", "d_swiglu_out"},
{"d_w2", "d_w2"},
{"d_b2", "d_b2"},
}

Definition at line 218 of file ckernel_kernel_specs.c.

◆ ck_decoder_backward_bindings_10

const CKPlanBinding ck_decoder_backward_bindings_10[]
static
Initial value:
= {
{"d_out", "d_ln1_out"},
{"input", "input"},
{"gamma", "ln1_gamma"},
{"rstd", "ln1_rstd"},
{"d_input", "d_input"},
{"d_gamma", "d_ln1_gamma"},
}

Definition at line 304 of file ckernel_kernel_specs.c.

◆ ck_decoder_backward_bindings_2

const CKPlanBinding ck_decoder_backward_bindings_2[]
static
Initial value:
= {
{"fc1_out", "fc1_out"},
{"d_out", "d_swiglu_out"},
{"d_input", "d_fc1_out"},
}

Definition at line 227 of file ckernel_kernel_specs.c.

◆ ck_decoder_backward_bindings_3

const CKPlanBinding ck_decoder_backward_bindings_3[]
static
Initial value:
= {
{"d_out", "d_fc1_out"},
{"input", "ln2_out"},
{"w1", "w1"},
{"d_input", "d_ln2_out"},
{"d_w1", "d_w1"},
{"d_b1", "d_b1"},
}

Definition at line 233 of file ckernel_kernel_specs.c.

◆ ck_decoder_backward_bindings_4

const CKPlanBinding ck_decoder_backward_bindings_4[]
static
Initial value:
= {
{"d_out", "d_ln2_out"},
{"input", "residual1"},
{"gamma", "ln2_gamma"},
{"rstd", "ln2_rstd"},
{"d_input", "d_residual1"},
{"d_gamma", "d_ln2_gamma"},
}

Definition at line 242 of file ckernel_kernel_specs.c.

◆ ck_decoder_backward_bindings_5

const CKPlanBinding ck_decoder_backward_bindings_5[]
static
Initial value:
= {
{"d_out", "d_residual1"},
{"d_a", "d_input"},
{"d_b", "d_proj_tmp"},
}

Definition at line 251 of file ckernel_kernel_specs.c.

◆ ck_decoder_backward_bindings_6

const CKPlanBinding ck_decoder_backward_bindings_6[]
static
Initial value:
= {
{"d_out", "d_proj_tmp"},
{"attn_out", "attn_out"},
{"wo", "wo"},
{"d_attn_out", "d_attn_out"},
{"d_wo", "d_wo"},
{"d_bo", "d_bo"},
}

Definition at line 257 of file ckernel_kernel_specs.c.

◆ ck_decoder_backward_bindings_7

const CKPlanBinding ck_decoder_backward_bindings_7[]
static
Initial value:
= {
{"d_out", "d_attn_out"},
{"q", "q"},
{"k", "k"},
{"v", "v"},
{"scores", "scores"},
{"d_q", "d_q"},
{"d_k", "d_k"},
{"d_v", "d_v"},
{"d_scores", "d_scores"},
}

Definition at line 266 of file ckernel_kernel_specs.c.

◆ ck_decoder_backward_bindings_8

const CKPlanBinding ck_decoder_backward_bindings_8[]
static
Initial value:
= {
{"d_q_out", "d_q"},
{"d_k_out", "d_k"},
{"d_q", "d_q"},
{"d_k", "d_k"},
{"cos_cache", "rope_cos_cache"},
{"sin_cache", "rope_sin_cache"},
}

Definition at line 278 of file ckernel_kernel_specs.c.

◆ ck_decoder_backward_bindings_9

const CKPlanBinding ck_decoder_backward_bindings_9[]
static
Initial value:
= {
{"d_q", "d_q"},
{"d_k", "d_k"},
{"d_v", "d_v"},
{"input", "ln1_out"},
{"wq", "wq"},
{"wk", "wk"},
{"wv", "wv"},
{"d_input", "d_ln1_out"},
{"d_wq", "d_wq"},
{"d_bq", "d_bq"},
{"d_wk", "d_wk"},
{"d_bk", "d_bk"},
{"d_wv", "d_wv"},
{"d_bv", "d_bv"},
}

Definition at line 287 of file ckernel_kernel_specs.c.

◆ ck_decoder_backward_plan

const CKPlanStep ck_decoder_backward_plan[]
Initial value:
= {
{"residual_add", NULL},
{"mlp_down", NULL},
{"swiglu", NULL},
{"mlp_up", NULL},
{"rmsnorm", NULL},
{"residual_add", NULL},
{"attn_proj", NULL},
{"attention", NULL},
{"rope", "rope_theta>0"},
{"qkv_project", NULL},
{"rmsnorm", NULL},
}

Definition at line 115 of file ckernel_kernel_specs.c.

Referenced by emit_kernel_manifest().

◆ ck_decoder_backward_plan_count

const size_t ck_decoder_backward_plan_count = sizeof(ck_decoder_backward_plan) / sizeof(ck_decoder_backward_plan[0])

Definition at line 129 of file ckernel_kernel_specs.c.

Referenced by emit_kernel_manifest().

◆ ck_decoder_backward_plan_v2

const CKPlanStepV2 ck_decoder_backward_plan_v2[]
Initial value:
= {
{"residual_add", NULL, ck_decoder_backward_bindings_0, 3},
{"mlp_down", NULL, ck_decoder_backward_bindings_1, 6},
{"swiglu", NULL, ck_decoder_backward_bindings_2, 3},
{"mlp_up", NULL, ck_decoder_backward_bindings_3, 6},
{"rmsnorm", NULL, ck_decoder_backward_bindings_4, 6},
{"residual_add", NULL, ck_decoder_backward_bindings_5, 3},
{"attn_proj", NULL, ck_decoder_backward_bindings_6, 6},
{"attention", NULL, ck_decoder_backward_bindings_7, 9},
{"rope", "rope_theta>0", ck_decoder_backward_bindings_8, 6},
{"qkv_project", NULL, ck_decoder_backward_bindings_9, 14},
{"rmsnorm", NULL, ck_decoder_backward_bindings_10, 6},
}
static const CKPlanBinding ck_decoder_backward_bindings_9[]
static const CKPlanBinding ck_decoder_backward_bindings_2[]
static const CKPlanBinding ck_decoder_backward_bindings_6[]
static const CKPlanBinding ck_decoder_backward_bindings_0[]
static const CKPlanBinding ck_decoder_backward_bindings_10[]
static const CKPlanBinding ck_decoder_backward_bindings_7[]
static const CKPlanBinding ck_decoder_backward_bindings_5[]
static const CKPlanBinding ck_decoder_backward_bindings_8[]
static const CKPlanBinding ck_decoder_backward_bindings_1[]
static const CKPlanBinding ck_decoder_backward_bindings_3[]
static const CKPlanBinding ck_decoder_backward_bindings_4[]

Definition at line 329 of file ckernel_kernel_specs.c.

Referenced by ck_ir_v2_build_decoder_backward().

◆ ck_decoder_backward_plan_v2_count

const size_t ck_decoder_backward_plan_v2_count = sizeof(ck_decoder_backward_plan_v2) / sizeof(ck_decoder_backward_plan_v2[0])

Definition at line 343 of file ckernel_kernel_specs.c.

Referenced by ck_ir_v2_build_decoder_backward().

◆ ck_decoder_buffer_count

◆ ck_decoder_buffers

◆ ck_decoder_forward_bindings_0

const CKPlanBinding ck_decoder_forward_bindings_0[]
static
Initial value:
= {
{"input", "input"},
{"gamma", "ln1_gamma"},
{"out", "ln1_out"},
{"rstd", "ln1_rstd"},
}

Definition at line 131 of file ckernel_kernel_specs.c.

◆ ck_decoder_forward_bindings_1

const CKPlanBinding ck_decoder_forward_bindings_1[]
static
Initial value:
= {
{"input", "ln1_out"},
{"wq", "wq"},
{"bq", "bq"},
{"wk", "wk"},
{"bk", "bk"},
{"wv", "wv"},
{"bv", "bv"},
{"q", "q"},
{"k", "k"},
{"v", "v"},
}

Definition at line 138 of file ckernel_kernel_specs.c.

◆ ck_decoder_forward_bindings_10

const CKPlanBinding ck_decoder_forward_bindings_10[]
static
Initial value:
= {
{"a", "residual1"},
{"b", "mlp_out"},
{"out", "output"},
}

Definition at line 206 of file ckernel_kernel_specs.c.

◆ ck_decoder_forward_bindings_2

const CKPlanBinding ck_decoder_forward_bindings_2[]
static
Initial value:
= {
{"q", "q"},
{"k", "k"},
{"cos_cache", "rope_cos_cache"},
{"sin_cache", "rope_sin_cache"},
}

Definition at line 151 of file ckernel_kernel_specs.c.

◆ ck_decoder_forward_bindings_3

const CKPlanBinding ck_decoder_forward_bindings_3[]
static
Initial value:
= {
{"q", "q"},
{"k", "k"},
{"v", "v"},
{"scores", "scores"},
{"attn_out", "attn_out"},
}

Definition at line 158 of file ckernel_kernel_specs.c.

◆ ck_decoder_forward_bindings_4

const CKPlanBinding ck_decoder_forward_bindings_4[]
static
Initial value:
= {
{"attn_out", "attn_out"},
{"wo", "wo"},
{"bo", "bo"},
{"proj_tmp", "proj_tmp"},
{"proj_scratch", "proj_scratch"},
}

Definition at line 166 of file ckernel_kernel_specs.c.

◆ ck_decoder_forward_bindings_5

const CKPlanBinding ck_decoder_forward_bindings_5[]
static
Initial value:
= {
{"a", "input"},
{"b", "proj_tmp"},
{"out", "residual1"},
}

Definition at line 174 of file ckernel_kernel_specs.c.

◆ ck_decoder_forward_bindings_6

const CKPlanBinding ck_decoder_forward_bindings_6[]
static
Initial value:
= {
{"input", "residual1"},
{"gamma", "ln2_gamma"},
{"out", "ln2_out"},
{"rstd", "ln2_rstd"},
}

Definition at line 180 of file ckernel_kernel_specs.c.

◆ ck_decoder_forward_bindings_7

const CKPlanBinding ck_decoder_forward_bindings_7[]
static
Initial value:
= {
{"input", "ln2_out"},
{"w1", "w1"},
{"b1", "b1"},
{"fc1_out", "fc1_out"},
}

Definition at line 187 of file ckernel_kernel_specs.c.

◆ ck_decoder_forward_bindings_8

const CKPlanBinding ck_decoder_forward_bindings_8[]
static
Initial value:
= {
{"fc1_out", "fc1_out"},
{"swiglu_out", "swiglu_out"},
}

Definition at line 194 of file ckernel_kernel_specs.c.

◆ ck_decoder_forward_bindings_9

const CKPlanBinding ck_decoder_forward_bindings_9[]
static
Initial value:
= {
{"swiglu_out", "swiglu_out"},
{"w2", "w2"},
{"b2", "b2"},
{"mlp_out", "mlp_out"},
}

Definition at line 199 of file ckernel_kernel_specs.c.

◆ ck_decoder_forward_plan

const CKPlanStep ck_decoder_forward_plan[]
Initial value:
= {
{"rmsnorm", NULL},
{"qkv_project", NULL},
{"rope", "rope_theta>0"},
{"attention", NULL},
{"attn_proj", NULL},
{"residual_add", NULL},
{"rmsnorm", NULL},
{"mlp_up", NULL},
{"swiglu", NULL},
{"mlp_down", NULL},
{"residual_add", NULL},
}

Definition at line 99 of file ckernel_kernel_specs.c.

Referenced by emit_kernel_manifest().

◆ ck_decoder_forward_plan_count

const size_t ck_decoder_forward_plan_count = sizeof(ck_decoder_forward_plan) / sizeof(ck_decoder_forward_plan[0])

Definition at line 113 of file ckernel_kernel_specs.c.

Referenced by emit_kernel_manifest().

◆ ck_decoder_forward_plan_v2

const CKPlanStepV2 ck_decoder_forward_plan_v2[]
Initial value:
= {
{"rmsnorm", NULL, ck_decoder_forward_bindings_0, 4},
{"qkv_project", NULL, ck_decoder_forward_bindings_1, 10},
{"rope", "rope_theta>0", ck_decoder_forward_bindings_2, 4},
{"attention", NULL, ck_decoder_forward_bindings_3, 5},
{"attn_proj", NULL, ck_decoder_forward_bindings_4, 5},
{"residual_add", NULL, ck_decoder_forward_bindings_5, 3},
{"rmsnorm", NULL, ck_decoder_forward_bindings_6, 4},
{"mlp_up", NULL, ck_decoder_forward_bindings_7, 4},
{"swiglu", NULL, ck_decoder_forward_bindings_8, 2},
{"mlp_down", NULL, ck_decoder_forward_bindings_9, 4},
{"residual_add", NULL, ck_decoder_forward_bindings_10, 3},
}
static const CKPlanBinding ck_decoder_forward_bindings_7[]
static const CKPlanBinding ck_decoder_forward_bindings_9[]
static const CKPlanBinding ck_decoder_forward_bindings_1[]
static const CKPlanBinding ck_decoder_forward_bindings_6[]
static const CKPlanBinding ck_decoder_forward_bindings_0[]
static const CKPlanBinding ck_decoder_forward_bindings_8[]
static const CKPlanBinding ck_decoder_forward_bindings_3[]
static const CKPlanBinding ck_decoder_forward_bindings_4[]
static const CKPlanBinding ck_decoder_forward_bindings_5[]
static const CKPlanBinding ck_decoder_forward_bindings_10[]
static const CKPlanBinding ck_decoder_forward_bindings_2[]

Definition at line 313 of file ckernel_kernel_specs.c.

Referenced by ck_ir_v2_build_decoder().

◆ ck_decoder_forward_plan_v2_count

const size_t ck_decoder_forward_plan_v2_count = sizeof(ck_decoder_forward_plan_v2) / sizeof(ck_decoder_forward_plan_v2[0])

Definition at line 327 of file ckernel_kernel_specs.c.

Referenced by ck_ir_v2_build_decoder().

◆ ck_kernel_spec_count

const size_t ck_kernel_spec_count = sizeof(ck_kernel_specs) / sizeof(ck_kernel_specs[0])

◆ ck_kernel_specs

const CKKernelSpec ck_kernel_specs[]
Initial value:
= {
{"attention", { "attention_forward_causal_head_major_gqa", "attention_forward_causal_head_major_gqa_bf16", NULL, NULL, NULL }, { "attention_backward_causal_head_major_gqa", "attention_backward_causal_head_major_gqa_bf16", NULL, NULL, NULL }, CK_DT_MASK(CK_DT_FP32) | CK_DT_MASK(CK_DT_BF16), CK_DT_FP32, { "src/kernels/attention_kernels.c", "src/kernels/softmax_kernels.c", NULL, NULL, NULL, NULL, NULL, NULL }},
{"attn_proj", { "ck_attention_project_head_major", NULL, NULL, NULL, NULL }, { "ck_attention_project_head_major_backward", NULL, NULL, NULL, NULL }, CK_DT_MASK(CK_DT_FP32), CK_DT_FP32, { "src/ckernel_orchestration.c", "src/kernels/gemm_kernels.c", "src/kernels/mlp_kernels.c", "src/kernels/gelu_kernels.c", NULL, NULL, NULL, NULL }},
{"mlp_down", { "gemm_blocked_serial", NULL, NULL, NULL, NULL }, { "fc2_backward_kernel", NULL, NULL, NULL, NULL }, CK_DT_MASK(CK_DT_FP32), CK_DT_FP32, { "src/kernels/gemm_kernels.c", "src/kernels/mlp_kernels.c", "src/kernels/gelu_kernels.c", NULL, NULL, NULL, NULL, NULL }},
{"mlp_up", { "gemm_blocked_serial", NULL, NULL, NULL, NULL }, { "fc1_backward_kernel", NULL, NULL, NULL, NULL }, CK_DT_MASK(CK_DT_FP32), CK_DT_FP32, { "src/kernels/gemm_kernels.c", "src/kernels/mlp_kernels.c", "src/kernels/gelu_kernels.c", NULL, NULL, NULL, NULL, NULL }},
{"qkv_project", { "ck_qkv_project_head_major", NULL, NULL, NULL, NULL }, { "ck_qkv_project_head_major_backward", NULL, NULL, NULL, NULL }, CK_DT_MASK(CK_DT_FP32), CK_DT_FP32, { "src/ckernel_orchestration.c", "src/kernels/gemm_kernels.c", "src/kernels/mlp_kernels.c", "src/kernels/gelu_kernels.c", NULL, NULL, NULL, NULL }},
{"residual_add", { "ck_residual_add_token_major", NULL, NULL, NULL, NULL }, { "ck_residual_add_backward", NULL, NULL, NULL, NULL }, CK_DT_MASK(CK_DT_FP32), CK_DT_FP32, { "src/ckernel_orchestration.c", NULL, NULL, NULL, NULL, NULL, NULL, NULL }},
{"rmsnorm", { "rmsnorm_forward", "rmsnorm_forward_bf16", NULL, "rmsnorm_forward_int8", "rmsnorm_forward_int4" }, { "rmsnorm_backward", "rmsnorm_backward_bf16", NULL, "rmsnorm_backward_int8", "rmsnorm_backward_int4" }, CK_DT_MASK(CK_DT_FP32) | CK_DT_MASK(CK_DT_BF16) | CK_DT_MASK(CK_DT_INT8) | CK_DT_MASK(CK_DT_INT4), CK_DT_FP32, { "src/kernels/rmsnorm_kernels.c", "src/kernels/rmsnorm_kernels_bf16.c", "src/kernels/rmsnorm_kernels_int8.c", "src/kernels/rmsnorm_kernels_int4.c", NULL, NULL, NULL, NULL }},
{"rope", { "rope_forward_qk", "rope_forward_qk_bf16", NULL, NULL, NULL }, { "rope_backward_qk", "rope_backward_qk_bf16", NULL, NULL, NULL }, CK_DT_MASK(CK_DT_FP32) | CK_DT_MASK(CK_DT_BF16), CK_DT_FP32, { "src/kernels/rope_kernels.c", "src/kernels/rope_kernels_bf16.c", NULL, NULL, NULL, NULL, NULL, NULL }},
{"swiglu", { "swiglu_forward", "swiglu_forward_bf16", NULL, NULL, NULL }, { "swiglu_backward", "swiglu_backward_bf16", NULL, NULL, NULL }, CK_DT_MASK(CK_DT_FP32) | CK_DT_MASK(CK_DT_BF16), CK_DT_FP32, { "src/kernels/swiglu_kernels.c", "src/kernels/swiglu_kernels_bf16.c", "src/kernels/sigmoid_kernels.c", NULL, NULL, NULL, NULL, NULL }},
}
#define CK_DT_MASK(dt)
Definition: ckernel_dtype.h:53
@ CK_DT_FP32
Definition: ckernel_dtype.h:29
@ CK_DT_INT4
Definition: ckernel_dtype.h:35
@ CK_DT_BF16
Definition: ckernel_dtype.h:30
@ CK_DT_INT8
Definition: ckernel_dtype.h:34

Definition at line 85 of file ckernel_kernel_specs.c.

Referenced by ck_find_kernel_spec(), and ck_ir_v2_find_kernel_spec().