#include <stdint.h>#include <stddef.h>Go to the source code of this file.
Macros | |
| #define | CK_DT_MASK(dt) (1u << (uint32_t)(dt)) |
Typedefs | |
| typedef uint32_t | CKDataTypeMask |
Enumerations | |
| enum | CKDataType { CK_DT_FP32 = 0 , CK_DT_BF16 , CK_DT_FP16 , CK_DT_INT8 , CK_DT_INT4 , CK_DT_Q4_0 , CK_DT_Q4_1 , CK_DT_Q4_K , CK_DT_Q6_K , CK_DT_Q8_0 , CK_DT_Q8_K , CK_DT_Q5_0 , CK_DT_Q5_1 , CK_DT_Q5_K , CK_DT_COUNT } |
| Supported data types in C-Kernel-Engine. More... | |
Functions | |
| static size_t | ck_dtype_block_bytes (CKDataType dt) |
| Get bytes per block for quantized types. More... | |
| static size_t | ck_dtype_block_size (CKDataType dt) |
| Get the number of elements per quantization block. More... | |
| static size_t | ck_dtype_bytes (CKDataType dt) |
| Get bytes per element for non-quantized types. More... | |
| static int | ck_dtype_is_quantized (CKDataType dt) |
| Check if a data type is block-quantized (GGML-style) More... | |
| static size_t | ck_dtype_row_bytes (CKDataType dt, size_t n_elements) |
| Calculate total bytes for n_elements of given dtype. More... | |
| static int | ck_dtype_supported (CKDataTypeMask mask, CKDataType dt) |
| #define CK_DT_MASK | ( | dt | ) | (1u << (uint32_t)(dt)) |
Definition at line 53 of file ckernel_dtype.h.
| typedef uint32_t CKDataTypeMask |
Definition at line 51 of file ckernel_dtype.h.
| enum CKDataType |
Supported data types in C-Kernel-Engine.
Standard types:
Simple quantization:
GGML-compatible quantization (block-based with scales):
| Enumerator | |
|---|---|
| CK_DT_FP32 | |
| CK_DT_BF16 | |
| CK_DT_FP16 | |
| CK_DT_INT8 | |
| CK_DT_INT4 | |
| CK_DT_Q4_0 | |
| CK_DT_Q4_1 | |
| CK_DT_Q4_K | |
| CK_DT_Q6_K | |
| CK_DT_Q8_0 | |
| CK_DT_Q8_K | |
| CK_DT_Q5_0 | |
| CK_DT_Q5_1 | |
| CK_DT_Q5_K | |
| CK_DT_COUNT | |
Definition at line 27 of file ckernel_dtype.h.
|
inlinestatic |
Get bytes per block for quantized types.
Definition at line 109 of file ckernel_dtype.h.
References CK_DT_Q4_0, CK_DT_Q4_1, CK_DT_Q4_K, CK_DT_Q5_0, CK_DT_Q5_1, CK_DT_Q5_K, CK_DT_Q6_K, CK_DT_Q8_0, CK_DT_Q8_K, and ck_dtype_bytes().
Referenced by ck_dtype_row_bytes().
|
inlinestatic |
Get the number of elements per quantization block.
Definition at line 87 of file ckernel_dtype.h.
References CK_DT_Q4_0, CK_DT_Q4_1, CK_DT_Q4_K, CK_DT_Q5_0, CK_DT_Q5_1, CK_DT_Q5_K, CK_DT_Q6_K, CK_DT_Q8_0, and CK_DT_Q8_K.
Referenced by ck_dtype_row_bytes().
|
inlinestatic |
Get bytes per element for non-quantized types.
Definition at line 68 of file ckernel_dtype.h.
References CK_DT_BF16, CK_DT_FP16, CK_DT_FP32, CK_DT_INT4, and CK_DT_INT8.
Referenced by ck_dtype_block_bytes(), and ck_dtype_row_bytes().
|
inlinestatic |
Check if a data type is block-quantized (GGML-style)
Definition at line 58 of file ckernel_dtype.h.
References CK_DT_Q4_0, CK_DT_Q4_1, CK_DT_Q4_K, CK_DT_Q5_0, CK_DT_Q5_1, CK_DT_Q5_K, CK_DT_Q6_K, CK_DT_Q8_0, and CK_DT_Q8_K.
Referenced by ck_dtype_row_bytes().
|
inlinestatic |
Calculate total bytes for n_elements of given dtype.
Definition at line 138 of file ckernel_dtype.h.
References ck_dtype_block_bytes(), ck_dtype_block_size(), ck_dtype_bytes(), and ck_dtype_is_quantized().
Referenced by buffer_bytes(), build_plan(), ck_qkv_project_head_major_q4_k(), ck_qkv_project_head_major_quant(), ck_qkv_project_head_major_token_q4_k(), ck_qkv_project_head_major_token_q4_k_q8_k(), ck_qkv_project_head_major_token_quant(), embedding_forward_q4_k(), embedding_forward_q6_k(), embedding_forward_q8_0(), fused_mlp_swiglu_prefill_w1w2_quant(), fused_mlp_swiglu_prefill_w1w2_quant_scratch_size(), fused_rmsnorm_qkv_prefill_head_major_quant(), fused_rmsnorm_qkv_prefill_head_major_quant_scratch_size(), mega_fused_outproj_mlp_prefill(), mega_fused_outproj_mlp_prefill_scratch_size(), model_forward_prefill_impl(), model_layer_0_decode(), model_layer_0_prefill(), model_layer_10_decode(), model_layer_10_prefill(), model_layer_11_decode(), model_layer_11_prefill(), model_layer_12_decode(), model_layer_12_prefill(), model_layer_13_decode(), model_layer_13_prefill(), model_layer_14_decode(), model_layer_14_prefill(), model_layer_15_decode(), model_layer_15_prefill(), model_layer_16_decode(), model_layer_16_prefill(), model_layer_17_decode(), model_layer_17_prefill(), model_layer_18_decode(), model_layer_18_prefill(), model_layer_19_decode(), model_layer_19_prefill(), model_layer_1_decode(), model_layer_1_prefill(), model_layer_20_decode(), model_layer_20_prefill(), model_layer_21_decode(), model_layer_21_prefill(), model_layer_22_decode(), model_layer_22_prefill(), model_layer_23_decode(), model_layer_23_prefill(), model_layer_2_decode(), model_layer_2_prefill(), model_layer_3_decode(), model_layer_3_prefill(), model_layer_4_decode(), model_layer_4_prefill(), model_layer_5_decode(), model_layer_5_prefill(), model_layer_6_decode(), model_layer_6_prefill(), model_layer_7_decode(), model_layer_7_prefill(), model_layer_8_decode(), model_layer_8_prefill(), model_layer_9_decode(), model_layer_9_prefill(), out_proj_head_major_q5_0_q8_0(), out_proj_head_major_q8_0_q8_0(), quantize_attn_out_head_major_q8_0(), qwen2_0_5b_decode_forward_prefill_impl(), qwen2_0_5b_decode_layer_0_decode(), qwen2_0_5b_decode_layer_0_prefill(), qwen2_0_5b_decode_layer_10_decode(), qwen2_0_5b_decode_layer_10_prefill(), qwen2_0_5b_decode_layer_11_decode(), qwen2_0_5b_decode_layer_11_prefill(), qwen2_0_5b_decode_layer_12_decode(), qwen2_0_5b_decode_layer_12_prefill(), qwen2_0_5b_decode_layer_13_decode(), qwen2_0_5b_decode_layer_13_prefill(), qwen2_0_5b_decode_layer_14_decode(), qwen2_0_5b_decode_layer_14_prefill(), qwen2_0_5b_decode_layer_15_decode(), qwen2_0_5b_decode_layer_15_prefill(), qwen2_0_5b_decode_layer_16_decode(), qwen2_0_5b_decode_layer_16_prefill(), qwen2_0_5b_decode_layer_17_decode(), qwen2_0_5b_decode_layer_17_prefill(), qwen2_0_5b_decode_layer_18_decode(), qwen2_0_5b_decode_layer_18_prefill(), qwen2_0_5b_decode_layer_19_decode(), qwen2_0_5b_decode_layer_19_prefill(), qwen2_0_5b_decode_layer_1_decode(), qwen2_0_5b_decode_layer_1_prefill(), qwen2_0_5b_decode_layer_20_decode(), qwen2_0_5b_decode_layer_20_prefill(), qwen2_0_5b_decode_layer_21_decode(), qwen2_0_5b_decode_layer_21_prefill(), qwen2_0_5b_decode_layer_22_decode(), qwen2_0_5b_decode_layer_22_prefill(), qwen2_0_5b_decode_layer_23_decode(), qwen2_0_5b_decode_layer_23_prefill(), qwen2_0_5b_decode_layer_2_decode(), qwen2_0_5b_decode_layer_2_prefill(), qwen2_0_5b_decode_layer_3_decode(), qwen2_0_5b_decode_layer_3_prefill(), qwen2_0_5b_decode_layer_4_decode(), qwen2_0_5b_decode_layer_4_prefill(), qwen2_0_5b_decode_layer_5_decode(), qwen2_0_5b_decode_layer_5_prefill(), qwen2_0_5b_decode_layer_6_decode(), qwen2_0_5b_decode_layer_6_prefill(), qwen2_0_5b_decode_layer_7_decode(), qwen2_0_5b_decode_layer_7_prefill(), qwen2_0_5b_decode_layer_8_decode(), qwen2_0_5b_decode_layer_8_prefill(), qwen2_0_5b_decode_layer_9_decode(), and qwen2_0_5b_decode_layer_9_prefill().
|
inlinestatic |