Dequantization kernels for GGML-compatible formats. More...
#include <stdint.h>#include <stddef.h>#include <string.h>#include <immintrin.h>#include "ckernel_quant.h"#include "ckernel_dtype.h"Go to the source code of this file.
Functions | |
| void | dequant_q4_0_block (const block_q4_0 *block, float *output) |
| Dequantize a single Q4_0 block to FP32. More... | |
| void | dequant_q4_0_row (const void *src, float *dst, size_t n_elements) |
| Dequantize Q4_0 row (multiple blocks) More... | |
| void | dequant_q4_1_block (const block_q4_1 *block, float *output) |
| Dequantize a single Q4_1 block to FP32. More... | |
| void | dequant_q4_1_row (const void *src, float *dst, size_t n_elements) |
| Dequantize Q4_1 row (multiple blocks) More... | |
| void | dequant_q4_k_block (const block_q4_K *block, float *output) |
| Dequantize a single Q4_K block to FP32. More... | |
| void | dequant_q4_k_row (const void *src, float *dst, size_t n_elements) |
| Dequantize Q4_K row (multiple blocks) More... | |
| void | dequant_q5_0_block (const block_q5_0 *block, float *output) |
| Dequantize a single Q5_0 block to FP32. More... | |
| void | dequant_q5_0_row (const void *src, float *dst, size_t n_elements) |
| Dequantize Q5_0 row (multiple blocks) More... | |
| void | dequant_q5_1_block (const block_q5_1 *block, float *output) |
| Dequantize a single Q5_1 block to FP32. More... | |
| void | dequant_q5_1_row (const void *src, float *dst, size_t n_elements) |
| Dequantize Q5_1 row (multiple blocks) More... | |
| void | dequant_q6_k_block (const block_q6_K *block, float *output) |
| Dequantize a single Q6_K block to FP32. More... | |
| void | dequant_q6_k_row (const void *src, float *dst, size_t n_elements) |
| Dequantize Q6_K row (multiple blocks) More... | |
| void | dequant_q8_0_block (const block_q8_0 *block, float *output) |
| Dequantize a single Q8_0 block to FP32. More... | |
| void | dequant_q8_0_row (const void *src, float *dst, size_t n_elements) |
| Dequantize Q8_0 row (multiple blocks) More... | |
| void | dequant_row (CKDataType dtype, const void *src, float *dst, size_t n_elements) |
| Dequantize a row of quantized data to FP32. More... | |
Dequantization kernels for GGML-compatible formats.
After changes: make test && make llamacpp-parity-full
Implements dequantization from Q4_0, Q5_0, Q5_1, Q4_K, Q6_K, Q8_0 to FP32. These kernels are used as building blocks for quantized GEMM/GEMV.
Key optimization: Dequantize into registers, use immediately in FMA, never write intermediate FP32 values to memory.
Definition in file dequant_kernels.c.
| void dequant_q4_0_block | ( | const block_q4_0 * | block, |
| float * | output | ||
| ) |
Dequantize a single Q4_0 block to FP32.
| block | Pointer to Q4_0 block (18 bytes) |
| output | Output FP32 array (32 floats) |
Definition at line 38 of file dequant_kernels.c.
References block_q4_0::d, GGML_FP16_TO_FP32, QK4_0, and block_q4_0::qs.
Referenced by dequant_q4_0_row().
| void dequant_q4_0_row | ( | const void * | src, |
| float * | dst, | ||
| size_t | n_elements | ||
| ) |
Dequantize Q4_0 row (multiple blocks)
| src | Q4_0 data |
| dst | FP32 output |
| n_elements | Number of elements to dequantize |
Definition at line 61 of file dequant_kernels.c.
References dequant_q4_0_block(), and QK4_0.
Referenced by ck_test_dequant_q4_0(), and dequant_row().
| void dequant_q4_1_block | ( | const block_q4_1 * | block, |
| float * | output | ||
| ) |
Dequantize a single Q4_1 block to FP32.
| block | Pointer to Q4_1 block (20 bytes) |
| output | Output FP32 array (32 floats) |
Definition at line 117 of file dequant_kernels.c.
References block_q4_1::d, GGML_FP16_TO_FP32, block_q4_1::m, QK4_1, and block_q4_1::qs.
Referenced by dequant_q4_1_row().
| void dequant_q4_1_row | ( | const void * | src, |
| float * | dst, | ||
| size_t | n_elements | ||
| ) |
Dequantize Q4_1 row (multiple blocks)
Definition at line 139 of file dequant_kernels.c.
References dequant_q4_1_block(), and QK4_1.
Referenced by dequant_row().
| void dequant_q4_k_block | ( | const block_q4_K * | block, |
| float * | output | ||
| ) |
Dequantize a single Q4_K block to FP32.
This matches llama.cpp's dequantize_row_q4_K exactly:
Definition at line 334 of file dequant_kernels.c.
References block_q4_K::d, block_q4_K::dmin, GGML_FP16_TO_FP32, block_q4_K::qs, block_q4_K::scales, and unpack_q4_k_scales().
Referenced by dequant_q4_k_row().
| void dequant_q4_k_row | ( | const void * | src, |
| float * | dst, | ||
| size_t | n_elements | ||
| ) |
Dequantize Q4_K row (multiple blocks)
Definition at line 370 of file dequant_kernels.c.
References dequant_q4_k_block(), and QK_K.
Referenced by ck_test_dequant_q4_k(), dequant_row(), and embedding_forward_q4_k().
| void dequant_q5_0_block | ( | const block_q5_0 * | block, |
| float * | output | ||
| ) |
Dequantize a single Q5_0 block to FP32.
| block | Pointer to Q5_0 block (22 bytes) |
| output | Output FP32 array (32 floats) |
Definition at line 161 of file dequant_kernels.c.
References block_q5_0::d, GGML_FP16_TO_FP32, block_q5_0::qh, QK5_0, and block_q5_0::qs.
Referenced by dequant_q5_0_row().
| void dequant_q5_0_row | ( | const void * | src, |
| float * | dst, | ||
| size_t | n_elements | ||
| ) |
Dequantize Q5_0 row (multiple blocks)
Definition at line 196 of file dequant_kernels.c.
References dequant_q5_0_block(), and QK5_0.
Referenced by dequant_row().
| void dequant_q5_1_block | ( | const block_q5_1 * | block, |
| float * | output | ||
| ) |
Dequantize a single Q5_1 block to FP32.
| block | Pointer to Q5_1 block (24 bytes) |
| output | Output FP32 array (32 floats) |
Definition at line 218 of file dequant_kernels.c.
References block_q5_1::d, GGML_FP16_TO_FP32, block_q5_1::m, block_q5_1::qh, QK5_1, and block_q5_1::qs.
Referenced by dequant_q5_1_row().
| void dequant_q5_1_row | ( | const void * | src, |
| float * | dst, | ||
| size_t | n_elements | ||
| ) |
Dequantize Q5_1 row (multiple blocks)
Definition at line 255 of file dequant_kernels.c.
References dequant_q5_1_block(), and QK5_1.
Referenced by ck_test_dequant_q5_1(), and dequant_row().
| void dequant_q6_k_block | ( | const block_q6_K * | block, |
| float * | output | ||
| ) |
Dequantize a single Q6_K block to FP32.
Definition at line 389 of file dequant_kernels.c.
References block_q6_K::d, GGML_FP16_TO_FP32, block_q6_K::qh, QK_K, block_q6_K::ql, and block_q6_K::scales.
Referenced by dequant_q6_k_row().
| void dequant_q6_k_row | ( | const void * | src, |
| float * | dst, | ||
| size_t | n_elements | ||
| ) |
Dequantize Q6_K row (multiple blocks)
Definition at line 420 of file dequant_kernels.c.
References dequant_q6_k_block(), and QK_K.
Referenced by ck_test_dequant_q6_k(), ck_test_gemv_q6_k(), dequant_row(), and embedding_forward_q6_k().
| void dequant_q8_0_block | ( | const block_q8_0 * | block, |
| float * | output | ||
| ) |
Dequantize a single Q8_0 block to FP32.
Definition at line 274 of file dequant_kernels.c.
References block_q8_0::d, GGML_FP16_TO_FP32, QK8_0, and block_q8_0::qs.
Referenced by dequant_q8_0_row().
| void dequant_q8_0_row | ( | const void * | src, |
| float * | dst, | ||
| size_t | n_elements | ||
| ) |
Dequantize Q8_0 row (multiple blocks)
Definition at line 286 of file dequant_kernels.c.
References dequant_q8_0_block(), and QK8_0.
Referenced by dequant_row(), and embedding_forward_q8_0().
| void dequant_row | ( | CKDataType | dtype, |
| const void * | src, | ||
| float * | dst, | ||
| size_t | n_elements | ||
| ) |
Dequantize a row of quantized data to FP32.
| dtype | Data type (must be quantized type) |
| src | Source quantized data |
| dst | Destination FP32 buffer |
| n_elements | Number of elements |
Definition at line 512 of file dequant_kernels.c.
References CK_DT_Q4_0, CK_DT_Q4_1, CK_DT_Q4_K, CK_DT_Q5_0, CK_DT_Q5_1, CK_DT_Q6_K, CK_DT_Q8_0, dequant_q4_0_row(), dequant_q4_1_row(), dequant_q4_k_row(), dequant_q5_0_row(), dequant_q5_1_row(), dequant_q6_k_row(), and dequant_q8_0_row().