← Back to C-Kernel-Engine Docs Doxygen Source Documentation
ckernel_dtype.h File Reference
#include <stdint.h>
#include <stddef.h>

Go to the source code of this file.

Macros

#define CK_DT_MASK(dt)   (1u << (uint32_t)(dt))
 

Typedefs

typedef uint32_t CKDataTypeMask
 

Enumerations

enum  CKDataType {
  CK_DT_FP32 = 0 , CK_DT_BF16 , CK_DT_FP16 , CK_DT_INT8 ,
  CK_DT_INT4 , CK_DT_Q4_0 , CK_DT_Q4_1 , CK_DT_Q4_K ,
  CK_DT_Q6_K , CK_DT_Q8_0 , CK_DT_Q8_K , CK_DT_Q5_0 ,
  CK_DT_Q5_1 , CK_DT_Q5_K , CK_DT_COUNT
}
 Supported data types in C-Kernel-Engine. More...
 

Functions

static size_t ck_dtype_block_bytes (CKDataType dt)
 Get bytes per block for quantized types. More...
 
static size_t ck_dtype_block_size (CKDataType dt)
 Get the number of elements per quantization block. More...
 
static size_t ck_dtype_bytes (CKDataType dt)
 Get bytes per element for non-quantized types. More...
 
static int ck_dtype_is_quantized (CKDataType dt)
 Check if a data type is block-quantized (GGML-style) More...
 
static size_t ck_dtype_row_bytes (CKDataType dt, size_t n_elements)
 Calculate total bytes for n_elements of given dtype. More...
 
static int ck_dtype_supported (CKDataTypeMask mask, CKDataType dt)
 

Macro Definition Documentation

◆ CK_DT_MASK

#define CK_DT_MASK (   dt)    (1u << (uint32_t)(dt))

Definition at line 53 of file ckernel_dtype.h.

Typedef Documentation

◆ CKDataTypeMask

typedef uint32_t CKDataTypeMask

Definition at line 51 of file ckernel_dtype.h.

Enumeration Type Documentation

◆ CKDataType

enum CKDataType

Supported data types in C-Kernel-Engine.

Standard types:

  • CK_DT_FP32: 32-bit float (baseline, full precision)
  • CK_DT_BF16: Brain Float 16 (1+8+7 bits)
  • CK_DT_FP16: IEEE Half Precision (1+5+10 bits)

Simple quantization:

  • CK_DT_INT8: 8-bit signed integer
  • CK_DT_INT4: 4-bit signed integer (simple, no scales)

GGML-compatible quantization (block-based with scales):

  • CK_DT_Q4_0: 4-bit, 32 weights/block, 1 FP16 scale
  • CK_DT_Q4_K: 4-bit k-quant, 256 weights/block, nested scales (Q4_K_M)
  • CK_DT_Q6_K: 6-bit k-quant, 256 weights/block, per-16 scales
  • CK_DT_Q8_0: 8-bit, 32 weights/block, 1 FP16 scale
  • CK_DT_Q8_K: 8-bit k-quant, 256 weights/block, FP32 scale + bsums
  • CK_DT_Q5_0: 5-bit, 32 weights/block, 1 FP16 scale
Enumerator
CK_DT_FP32 
CK_DT_BF16 
CK_DT_FP16 
CK_DT_INT8 
CK_DT_INT4 
CK_DT_Q4_0 
CK_DT_Q4_1 
CK_DT_Q4_K 
CK_DT_Q6_K 
CK_DT_Q8_0 
CK_DT_Q8_K 
CK_DT_Q5_0 
CK_DT_Q5_1 
CK_DT_Q5_K 
CK_DT_COUNT 

Definition at line 27 of file ckernel_dtype.h.

27  {
28  /* Standard floating-point types */
29  CK_DT_FP32 = 0, /* 4 bytes per element */
30  CK_DT_BF16, /* 2 bytes per element */
31  CK_DT_FP16, /* 2 bytes per element */
32 
33  /* Simple integer types (legacy) */
34  CK_DT_INT8, /* 1 byte per element */
35  CK_DT_INT4, /* 0.5 bytes per element (packed) */
36 
37  /* GGML-compatible block quantization */
38  CK_DT_Q4_0, /* 4.5 bits/weight (18 bytes per 32 weights) */
39  CK_DT_Q4_1, /* 5.0 bits/weight (20 bytes per 32 weights) */
40  CK_DT_Q4_K, /* 4.5 bits/weight (144 bytes per 256 weights) - Q4_K_M */
41  CK_DT_Q6_K, /* 6.5 bits/weight (210 bytes per 256 weights) */
42  CK_DT_Q8_0, /* 8.5 bits/weight (34 bytes per 32 weights) */
43  CK_DT_Q8_K, /* 9.125 bits/weight (292 bytes per 256 weights) */
44  CK_DT_Q5_0, /* 5.5 bits/weight (22 bytes per 32 weights) */
45  CK_DT_Q5_1, /* 6.0 bits/weight (24 bytes per 32 weights) */
46  CK_DT_Q5_K, /* 5.5 bits/weight (176 bytes per 256 weights) - Q5_K super-block */
47 
49 } CKDataType;
CKDataType
Supported data types in C-Kernel-Engine.
Definition: ckernel_dtype.h:27
@ CK_DT_Q4_K
Definition: ckernel_dtype.h:40
@ CK_DT_COUNT
Definition: ckernel_dtype.h:48
@ CK_DT_Q4_0
Definition: ckernel_dtype.h:38
@ CK_DT_Q8_0
Definition: ckernel_dtype.h:42
@ CK_DT_Q8_K
Definition: ckernel_dtype.h:43
@ CK_DT_Q5_0
Definition: ckernel_dtype.h:44
@ CK_DT_Q5_K
Definition: ckernel_dtype.h:46
@ CK_DT_FP32
Definition: ckernel_dtype.h:29
@ CK_DT_FP16
Definition: ckernel_dtype.h:31
@ CK_DT_Q6_K
Definition: ckernel_dtype.h:41
@ CK_DT_Q4_1
Definition: ckernel_dtype.h:39
@ CK_DT_INT4
Definition: ckernel_dtype.h:35
@ CK_DT_BF16
Definition: ckernel_dtype.h:30
@ CK_DT_INT8
Definition: ckernel_dtype.h:34
@ CK_DT_Q5_1
Definition: ckernel_dtype.h:45

Function Documentation

◆ ck_dtype_block_bytes()

static size_t ck_dtype_block_bytes ( CKDataType  dt)
inlinestatic

Get bytes per block for quantized types.

Definition at line 109 of file ckernel_dtype.h.

110 {
111  switch (dt) {
112  case CK_DT_Q4_0:
113  return 18; /* 2 (scale) + 16 (32 x 4-bit) */
114  case CK_DT_Q4_1:
115  return 20; /* 2 (scale) + 2 (min) + 16 (32 x 4-bit) */
116  case CK_DT_Q5_0:
117  return 22; /* 2 (scale) + 4 (high bit) + 16 (low 4-bit) */
118  case CK_DT_Q5_1:
119  return 24; /* 2 (scale) + 2 (min) + 4 (high bit) + 16 (low 4-bit) */
120  case CK_DT_Q4_K:
121  return 144; /* 2 + 2 + 12 + 128 */
122  case CK_DT_Q5_K:
123  return 176; /* 2 + 2 + 12 + 32 + 128 */
124  case CK_DT_Q6_K:
125  return 210; /* 2 + 16 + 128 + 64 */
126  case CK_DT_Q8_0:
127  return 34; /* 2 (scale) + 32 (32 x 8-bit) */
128  case CK_DT_Q8_K:
129  return 292; /* 4 (scale) + 256 (int8) + 32 (bsums) */
130  default:
131  return ck_dtype_bytes(dt);
132  }
133 }
static size_t ck_dtype_bytes(CKDataType dt)
Get bytes per element for non-quantized types.
Definition: ckernel_dtype.h:68

References CK_DT_Q4_0, CK_DT_Q4_1, CK_DT_Q4_K, CK_DT_Q5_0, CK_DT_Q5_1, CK_DT_Q5_K, CK_DT_Q6_K, CK_DT_Q8_0, CK_DT_Q8_K, and ck_dtype_bytes().

Referenced by ck_dtype_row_bytes().

◆ ck_dtype_block_size()

static size_t ck_dtype_block_size ( CKDataType  dt)
inlinestatic

Get the number of elements per quantization block.

Definition at line 87 of file ckernel_dtype.h.

88 {
89  switch (dt) {
90  case CK_DT_Q4_0:
91  case CK_DT_Q4_1:
92  case CK_DT_Q5_0:
93  case CK_DT_Q5_1:
94  case CK_DT_Q8_0:
95  return 32;
96  case CK_DT_Q4_K:
97  case CK_DT_Q5_K:
98  case CK_DT_Q6_K:
99  case CK_DT_Q8_K:
100  return 256;
101  default:
102  return 1; /* Non-quantized types: 1 element per "block" */
103  }
104 }

References CK_DT_Q4_0, CK_DT_Q4_1, CK_DT_Q4_K, CK_DT_Q5_0, CK_DT_Q5_1, CK_DT_Q5_K, CK_DT_Q6_K, CK_DT_Q8_0, and CK_DT_Q8_K.

Referenced by ck_dtype_row_bytes().

◆ ck_dtype_bytes()

static size_t ck_dtype_bytes ( CKDataType  dt)
inlinestatic

Get bytes per element for non-quantized types.

Note
For quantized types, use ck_dtype_block_bytes() and ck_dtype_block_size()

Definition at line 68 of file ckernel_dtype.h.

69 {
70  switch (dt) {
71  case CK_DT_BF16:
72  case CK_DT_FP16:
73  return 2;
74  case CK_DT_INT8:
75  return 1;
76  case CK_DT_INT4:
77  return 1; /* Note: actually 0.5, but stored as pairs */
78  case CK_DT_FP32:
79  default:
80  return 4;
81  }
82 }

References CK_DT_BF16, CK_DT_FP16, CK_DT_FP32, CK_DT_INT4, and CK_DT_INT8.

Referenced by ck_dtype_block_bytes(), and ck_dtype_row_bytes().

◆ ck_dtype_is_quantized()

static int ck_dtype_is_quantized ( CKDataType  dt)
inlinestatic

Check if a data type is block-quantized (GGML-style)

Definition at line 58 of file ckernel_dtype.h.

59 {
60  return dt == CK_DT_Q4_0 || dt == CK_DT_Q4_1 || dt == CK_DT_Q5_0 || dt == CK_DT_Q5_1 ||
61  dt == CK_DT_Q5_K || dt == CK_DT_Q4_K || dt == CK_DT_Q6_K || dt == CK_DT_Q8_0 || dt == CK_DT_Q8_K;
62 }

References CK_DT_Q4_0, CK_DT_Q4_1, CK_DT_Q4_K, CK_DT_Q5_0, CK_DT_Q5_1, CK_DT_Q5_K, CK_DT_Q6_K, CK_DT_Q8_0, and CK_DT_Q8_K.

Referenced by ck_dtype_row_bytes().

◆ ck_dtype_row_bytes()

static size_t ck_dtype_row_bytes ( CKDataType  dt,
size_t  n_elements 
)
inlinestatic

Calculate total bytes for n_elements of given dtype.

Definition at line 138 of file ckernel_dtype.h.

139 {
140  if (ck_dtype_is_quantized(dt)) {
141  size_t block_size = ck_dtype_block_size(dt);
142  size_t n_blocks = (n_elements + block_size - 1) / block_size;
143  return n_blocks * ck_dtype_block_bytes(dt);
144  }
145  return n_elements * ck_dtype_bytes(dt);
146 }
static size_t ck_dtype_block_size(CKDataType dt)
Get the number of elements per quantization block.
Definition: ckernel_dtype.h:87
static size_t ck_dtype_block_bytes(CKDataType dt)
Get bytes per block for quantized types.
static int ck_dtype_is_quantized(CKDataType dt)
Check if a data type is block-quantized (GGML-style)
Definition: ckernel_dtype.h:58

References ck_dtype_block_bytes(), ck_dtype_block_size(), ck_dtype_bytes(), and ck_dtype_is_quantized().

Referenced by buffer_bytes(), build_plan(), ck_qkv_project_head_major_q4_k(), ck_qkv_project_head_major_quant(), ck_qkv_project_head_major_token_q4_k(), ck_qkv_project_head_major_token_q4_k_q8_k(), ck_qkv_project_head_major_token_quant(), embedding_forward_q4_k(), embedding_forward_q6_k(), embedding_forward_q8_0(), fused_mlp_swiglu_prefill_w1w2_quant(), fused_mlp_swiglu_prefill_w1w2_quant_scratch_size(), fused_rmsnorm_qkv_prefill_head_major_quant(), fused_rmsnorm_qkv_prefill_head_major_quant_scratch_size(), mega_fused_outproj_mlp_prefill(), mega_fused_outproj_mlp_prefill_scratch_size(), model_forward_prefill_impl(), model_layer_0_decode(), model_layer_0_prefill(), model_layer_10_decode(), model_layer_10_prefill(), model_layer_11_decode(), model_layer_11_prefill(), model_layer_12_decode(), model_layer_12_prefill(), model_layer_13_decode(), model_layer_13_prefill(), model_layer_14_decode(), model_layer_14_prefill(), model_layer_15_decode(), model_layer_15_prefill(), model_layer_16_decode(), model_layer_16_prefill(), model_layer_17_decode(), model_layer_17_prefill(), model_layer_18_decode(), model_layer_18_prefill(), model_layer_19_decode(), model_layer_19_prefill(), model_layer_1_decode(), model_layer_1_prefill(), model_layer_20_decode(), model_layer_20_prefill(), model_layer_21_decode(), model_layer_21_prefill(), model_layer_22_decode(), model_layer_22_prefill(), model_layer_23_decode(), model_layer_23_prefill(), model_layer_2_decode(), model_layer_2_prefill(), model_layer_3_decode(), model_layer_3_prefill(), model_layer_4_decode(), model_layer_4_prefill(), model_layer_5_decode(), model_layer_5_prefill(), model_layer_6_decode(), model_layer_6_prefill(), model_layer_7_decode(), model_layer_7_prefill(), model_layer_8_decode(), model_layer_8_prefill(), model_layer_9_decode(), model_layer_9_prefill(), out_proj_head_major_q5_0_q8_0(), out_proj_head_major_q8_0_q8_0(), quantize_attn_out_head_major_q8_0(), qwen2_0_5b_decode_forward_prefill_impl(), qwen2_0_5b_decode_layer_0_decode(), qwen2_0_5b_decode_layer_0_prefill(), qwen2_0_5b_decode_layer_10_decode(), qwen2_0_5b_decode_layer_10_prefill(), qwen2_0_5b_decode_layer_11_decode(), qwen2_0_5b_decode_layer_11_prefill(), qwen2_0_5b_decode_layer_12_decode(), qwen2_0_5b_decode_layer_12_prefill(), qwen2_0_5b_decode_layer_13_decode(), qwen2_0_5b_decode_layer_13_prefill(), qwen2_0_5b_decode_layer_14_decode(), qwen2_0_5b_decode_layer_14_prefill(), qwen2_0_5b_decode_layer_15_decode(), qwen2_0_5b_decode_layer_15_prefill(), qwen2_0_5b_decode_layer_16_decode(), qwen2_0_5b_decode_layer_16_prefill(), qwen2_0_5b_decode_layer_17_decode(), qwen2_0_5b_decode_layer_17_prefill(), qwen2_0_5b_decode_layer_18_decode(), qwen2_0_5b_decode_layer_18_prefill(), qwen2_0_5b_decode_layer_19_decode(), qwen2_0_5b_decode_layer_19_prefill(), qwen2_0_5b_decode_layer_1_decode(), qwen2_0_5b_decode_layer_1_prefill(), qwen2_0_5b_decode_layer_20_decode(), qwen2_0_5b_decode_layer_20_prefill(), qwen2_0_5b_decode_layer_21_decode(), qwen2_0_5b_decode_layer_21_prefill(), qwen2_0_5b_decode_layer_22_decode(), qwen2_0_5b_decode_layer_22_prefill(), qwen2_0_5b_decode_layer_23_decode(), qwen2_0_5b_decode_layer_23_prefill(), qwen2_0_5b_decode_layer_2_decode(), qwen2_0_5b_decode_layer_2_prefill(), qwen2_0_5b_decode_layer_3_decode(), qwen2_0_5b_decode_layer_3_prefill(), qwen2_0_5b_decode_layer_4_decode(), qwen2_0_5b_decode_layer_4_prefill(), qwen2_0_5b_decode_layer_5_decode(), qwen2_0_5b_decode_layer_5_prefill(), qwen2_0_5b_decode_layer_6_decode(), qwen2_0_5b_decode_layer_6_prefill(), qwen2_0_5b_decode_layer_7_decode(), qwen2_0_5b_decode_layer_7_prefill(), qwen2_0_5b_decode_layer_8_decode(), qwen2_0_5b_decode_layer_8_prefill(), qwen2_0_5b_decode_layer_9_decode(), and qwen2_0_5b_decode_layer_9_prefill().

◆ ck_dtype_supported()

static int ck_dtype_supported ( CKDataTypeMask  mask,
CKDataType  dt 
)
inlinestatic

Definition at line 148 of file ckernel_dtype.h.

149 {
150  return (mask & CK_DT_MASK(dt)) != 0;
151 }
#define CK_DT_MASK(dt)
Definition: ckernel_dtype.h:53
int32_t int32_t int32_t int32_t int32_t mask
Definition: tokenizer.h:233

References CK_DT_MASK, and mask.