← Back to C-Kernel-Engine Docs Doxygen Source Documentation
ckernel_dtype.h
Go to the documentation of this file.
1 #ifndef CKERNEL_DTYPE_H
2 #define CKERNEL_DTYPE_H
3 
4 #include <stdint.h>
5 #include <stddef.h>
6 
7 /**
8  * @brief Supported data types in C-Kernel-Engine
9  *
10  * Standard types:
11  * - CK_DT_FP32: 32-bit float (baseline, full precision)
12  * - CK_DT_BF16: Brain Float 16 (1+8+7 bits)
13  * - CK_DT_FP16: IEEE Half Precision (1+5+10 bits)
14  *
15  * Simple quantization:
16  * - CK_DT_INT8: 8-bit signed integer
17  * - CK_DT_INT4: 4-bit signed integer (simple, no scales)
18  *
19  * GGML-compatible quantization (block-based with scales):
20  * - CK_DT_Q4_0: 4-bit, 32 weights/block, 1 FP16 scale
21  * - CK_DT_Q4_K: 4-bit k-quant, 256 weights/block, nested scales (Q4_K_M)
22  * - CK_DT_Q6_K: 6-bit k-quant, 256 weights/block, per-16 scales
23  * - CK_DT_Q8_0: 8-bit, 32 weights/block, 1 FP16 scale
24  * - CK_DT_Q8_K: 8-bit k-quant, 256 weights/block, FP32 scale + bsums
25  * - CK_DT_Q5_0: 5-bit, 32 weights/block, 1 FP16 scale
26  */
27 typedef enum {
28  /* Standard floating-point types */
29  CK_DT_FP32 = 0, /* 4 bytes per element */
30  CK_DT_BF16, /* 2 bytes per element */
31  CK_DT_FP16, /* 2 bytes per element */
32 
33  /* Simple integer types (legacy) */
34  CK_DT_INT8, /* 1 byte per element */
35  CK_DT_INT4, /* 0.5 bytes per element (packed) */
36 
37  /* GGML-compatible block quantization */
38  CK_DT_Q4_0, /* 4.5 bits/weight (18 bytes per 32 weights) */
39  CK_DT_Q4_1, /* 5.0 bits/weight (20 bytes per 32 weights) */
40  CK_DT_Q4_K, /* 4.5 bits/weight (144 bytes per 256 weights) - Q4_K_M */
41  CK_DT_Q6_K, /* 6.5 bits/weight (210 bytes per 256 weights) */
42  CK_DT_Q8_0, /* 8.5 bits/weight (34 bytes per 32 weights) */
43  CK_DT_Q8_K, /* 9.125 bits/weight (292 bytes per 256 weights) */
44  CK_DT_Q5_0, /* 5.5 bits/weight (22 bytes per 32 weights) */
45  CK_DT_Q5_1, /* 6.0 bits/weight (24 bytes per 32 weights) */
46  CK_DT_Q5_K, /* 5.5 bits/weight (176 bytes per 256 weights) - Q5_K super-block */
47 
50 
51 typedef uint32_t CKDataTypeMask;
52 
53 #define CK_DT_MASK(dt) (1u << (uint32_t)(dt))
54 
55 /**
56  * @brief Check if a data type is block-quantized (GGML-style)
57  */
58 static inline int ck_dtype_is_quantized(CKDataType dt)
59 {
60  return dt == CK_DT_Q4_0 || dt == CK_DT_Q4_1 || dt == CK_DT_Q5_0 || dt == CK_DT_Q5_1 ||
61  dt == CK_DT_Q5_K || dt == CK_DT_Q4_K || dt == CK_DT_Q6_K || dt == CK_DT_Q8_0 || dt == CK_DT_Q8_K;
62 }
63 
64 /**
65  * @brief Get bytes per element for non-quantized types
66  * @note For quantized types, use ck_dtype_block_bytes() and ck_dtype_block_size()
67  */
68 static inline size_t ck_dtype_bytes(CKDataType dt)
69 {
70  switch (dt) {
71  case CK_DT_BF16:
72  case CK_DT_FP16:
73  return 2;
74  case CK_DT_INT8:
75  return 1;
76  case CK_DT_INT4:
77  return 1; /* Note: actually 0.5, but stored as pairs */
78  case CK_DT_FP32:
79  default:
80  return 4;
81  }
82 }
83 
84 /**
85  * @brief Get the number of elements per quantization block
86  */
87 static inline size_t ck_dtype_block_size(CKDataType dt)
88 {
89  switch (dt) {
90  case CK_DT_Q4_0:
91  case CK_DT_Q4_1:
92  case CK_DT_Q5_0:
93  case CK_DT_Q5_1:
94  case CK_DT_Q8_0:
95  return 32;
96  case CK_DT_Q4_K:
97  case CK_DT_Q5_K:
98  case CK_DT_Q6_K:
99  case CK_DT_Q8_K:
100  return 256;
101  default:
102  return 1; /* Non-quantized types: 1 element per "block" */
103  }
104 }
105 
106 /**
107  * @brief Get bytes per block for quantized types
108  */
109 static inline size_t ck_dtype_block_bytes(CKDataType dt)
110 {
111  switch (dt) {
112  case CK_DT_Q4_0:
113  return 18; /* 2 (scale) + 16 (32 x 4-bit) */
114  case CK_DT_Q4_1:
115  return 20; /* 2 (scale) + 2 (min) + 16 (32 x 4-bit) */
116  case CK_DT_Q5_0:
117  return 22; /* 2 (scale) + 4 (high bit) + 16 (low 4-bit) */
118  case CK_DT_Q5_1:
119  return 24; /* 2 (scale) + 2 (min) + 4 (high bit) + 16 (low 4-bit) */
120  case CK_DT_Q4_K:
121  return 144; /* 2 + 2 + 12 + 128 */
122  case CK_DT_Q5_K:
123  return 176; /* 2 + 2 + 12 + 32 + 128 */
124  case CK_DT_Q6_K:
125  return 210; /* 2 + 16 + 128 + 64 */
126  case CK_DT_Q8_0:
127  return 34; /* 2 (scale) + 32 (32 x 8-bit) */
128  case CK_DT_Q8_K:
129  return 292; /* 4 (scale) + 256 (int8) + 32 (bsums) */
130  default:
131  return ck_dtype_bytes(dt);
132  }
133 }
134 
135 /**
136  * @brief Calculate total bytes for n_elements of given dtype
137  */
138 static inline size_t ck_dtype_row_bytes(CKDataType dt, size_t n_elements)
139 {
140  if (ck_dtype_is_quantized(dt)) {
141  size_t block_size = ck_dtype_block_size(dt);
142  size_t n_blocks = (n_elements + block_size - 1) / block_size;
143  return n_blocks * ck_dtype_block_bytes(dt);
144  }
145  return n_elements * ck_dtype_bytes(dt);
146 }
147 
149 {
150  return (mask & CK_DT_MASK(dt)) != 0;
151 }
152 
153 #endif /* CKERNEL_DTYPE_H */
static int ck_dtype_supported(CKDataTypeMask mask, CKDataType dt)
#define CK_DT_MASK(dt)
Definition: ckernel_dtype.h:53
static size_t ck_dtype_block_size(CKDataType dt)
Get the number of elements per quantization block.
Definition: ckernel_dtype.h:87
static size_t ck_dtype_block_bytes(CKDataType dt)
Get bytes per block for quantized types.
uint32_t CKDataTypeMask
Definition: ckernel_dtype.h:51
CKDataType
Supported data types in C-Kernel-Engine.
Definition: ckernel_dtype.h:27
@ CK_DT_Q4_K
Definition: ckernel_dtype.h:40
@ CK_DT_COUNT
Definition: ckernel_dtype.h:48
@ CK_DT_Q4_0
Definition: ckernel_dtype.h:38
@ CK_DT_Q8_0
Definition: ckernel_dtype.h:42
@ CK_DT_Q8_K
Definition: ckernel_dtype.h:43
@ CK_DT_Q5_0
Definition: ckernel_dtype.h:44
@ CK_DT_Q5_K
Definition: ckernel_dtype.h:46
@ CK_DT_FP32
Definition: ckernel_dtype.h:29
@ CK_DT_FP16
Definition: ckernel_dtype.h:31
@ CK_DT_Q6_K
Definition: ckernel_dtype.h:41
@ CK_DT_Q4_1
Definition: ckernel_dtype.h:39
@ CK_DT_INT4
Definition: ckernel_dtype.h:35
@ CK_DT_BF16
Definition: ckernel_dtype.h:30
@ CK_DT_INT8
Definition: ckernel_dtype.h:34
@ CK_DT_Q5_1
Definition: ckernel_dtype.h:45
static int ck_dtype_is_quantized(CKDataType dt)
Check if a data type is block-quantized (GGML-style)
Definition: ckernel_dtype.h:58
static size_t ck_dtype_bytes(CKDataType dt)
Get bytes per element for non-quantized types.
Definition: ckernel_dtype.h:68
static size_t ck_dtype_row_bytes(CKDataType dt, size_t n_elements)
Calculate total bytes for n_elements of given dtype.
int32_t int32_t int32_t int32_t int32_t mask
Definition: tokenizer.h:233