Top-K selection kernels for MoE router dispatch.
More...
#include <stdint.h>
#include <stddef.h>
#include <float.h>
#include <math.h>
Go to the source code of this file.
|
| int | argmax_f32 (const float *scores, int n) |
| | Find index of maximum value. More...
|
| |
| void | topk_batched_f32 (const float *scores, int num_tokens, int n_experts, int k, int *indices, float *weights) |
| | Batched top-K selection for multiple tokens. More...
|
| |
| void | topk_f32 (const float *scores, int n, int k, int *indices, float *values) |
| | Find top-K indices and values from a score vector. More...
|
| |
| void | topk_softmax_f32 (const float *scores, int n, int k, int *indices, float *weights) |
| | Find top-K indices with softmax-normalized weights. More...
|
| |
Top-K selection kernels for MoE router dispatch.
CK-ENGINE KERNEL RULES:
- NO malloc/free - memory via bump allocator, pointers passed in
- NO OpenMP - parallelization at orchestrator/codegen layer
- API must define: inputs, outputs, workspace, and memory layouts
- Pure computation - deterministic, no side effects
After changes: make test && make llamacpp-parity-full
Provides efficient top-K selection from a score vector. Used in Mixture-of-Experts models to select which experts process each token.
Operations:
- topk_f32: Find top-K indices and values from N scores
- topk_softmax_f32: Top-K with softmax normalization of selected scores
Definition in file topk_kernels.c.
◆ argmax_f32()
| int argmax_f32 |
( |
const float * |
scores, |
|
|
int |
n |
|
) |
| |
Find index of maximum value.
- Parameters
-
| scores | Input scores [n] |
| n | Number of scores |
- Returns
- Index of maximum value
Definition at line 226 of file topk_kernels.c.
228 if (!scores || n <= 0) {
233 float max_val = scores[0];
238 __m512 vmax = _mm512_set1_ps(-FLT_MAX);
239 __m512i vidx = _mm512_setzero_si512();
240 __m512i vcur_max_idx = _mm512_setzero_si512();
243 for (; i + 16 <= n; i += 16) {
244 __m512 v = _mm512_loadu_ps(&scores[i]);
245 __m512i cur_idx = _mm512_add_epi32(
246 _mm512_set1_epi32(i),
247 _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)
250 __mmask16 gt_mask = _mm512_cmp_ps_mask(v, vmax, _CMP_GT_OQ);
251 vmax = _mm512_mask_blend_ps(gt_mask, vmax, v);
252 vcur_max_idx = _mm512_mask_blend_epi32(gt_mask, vcur_max_idx, cur_idx);
258 _mm512_storeu_ps(vals, vmax);
259 _mm512_storeu_si512(idxs, vcur_max_idx);
263 for (
int j = 1; j < 16; j++) {
264 if (vals[j] > max_val) {
272 if (scores[i] > max_val) {
283 for (
int i = 1; i < n; i++) {
284 if (scores[i] > max_val) {
◆ topk_batched_f32()
| void topk_batched_f32 |
( |
const float * |
scores, |
|
|
int |
num_tokens, |
|
|
int |
n_experts, |
|
|
int |
k, |
|
|
int * |
indices, |
|
|
float * |
weights |
|
) |
| |
Batched top-K selection for multiple tokens.
- Parameters
-
| scores | Input scores [num_tokens, n_experts] |
| num_tokens | Number of tokens |
| n_experts | Number of experts |
| k | Number of experts to select per token |
| indices | Output: selected expert indices [num_tokens, k] |
| weights | Output: routing weights [num_tokens, k] (can be NULL for no softmax) |
Definition at line 191 of file topk_kernels.c.
198 if (!scores || !indices || num_tokens <= 0 || n_experts <= 0 || k <= 0) {
202 for (
int t = 0; t < num_tokens; t++) {
203 const float *token_scores = scores + t * n_experts;
204 int *token_indices = indices + t * k;
207 float *token_weights = weights + t * k;
210 topk_f32(token_scores, n_experts, k, token_indices, NULL);
void topk_f32(const float *scores, int n, int k, int *indices, float *values)
Find top-K indices and values from a score vector.
void topk_softmax_f32(const float *scores, int n, int k, int *indices, float *weights)
Find top-K indices with softmax-normalized weights.
References topk_f32(), and topk_softmax_f32().
◆ topk_f32()
| void topk_f32 |
( |
const float * |
scores, |
|
|
int |
n, |
|
|
int |
k, |
|
|
int * |
indices, |
|
|
float * |
values |
|
) |
| |
Find top-K indices and values from a score vector.
- Parameters
-
| scores | Input scores [n] |
| n | Number of scores (e.g., number of experts) |
| k | Number of top scores to select |
| indices | Output: indices of top-K scores [k], sorted descending by value |
| values | Output: top-K score values [k], sorted descending (can be NULL) |
Definition at line 49 of file topk_kernels.c.
55 if (!scores || !indices || n <= 0 || k <= 0) {
65 float local_values[k];
66 for (
int i = 0; i < k; i++) {
68 local_values[i] = scores[i];
73 for (
int i = 1; i < k; i++) {
74 if (local_values[i] < local_values[min_idx]) {
80 for (
int i = k; i < n; i++) {
81 if (scores[i] > local_values[min_idx]) {
84 local_values[min_idx] = scores[i];
88 for (
int j = 1; j < k; j++) {
89 if (local_values[j] < local_values[min_idx]) {
97 for (
int i = 1; i < k; i++) {
98 float val = local_values[i];
101 while (j >= 0 && local_values[j] < val) {
102 local_values[j + 1] = local_values[j];
103 indices[j + 1] = indices[j];
106 local_values[j + 1] = val;
107 indices[j + 1] = idx;
112 for (
int i = 0; i < k; i++) {
113 values[i] = local_values[i];
Referenced by topk_batched_f32(), and topk_softmax_f32().
◆ topk_softmax_f32()
| void topk_softmax_f32 |
( |
const float * |
scores, |
|
|
int |
n, |
|
|
int |
k, |
|
|
int * |
indices, |
|
|
float * |
weights |
|
) |
| |
Find top-K indices with softmax-normalized weights.
- Parameters
-
| scores | Input scores [n] (router logits) |
| n | Number of scores |
| k | Number of top scores to select |
| indices | Output: indices of top-K scores [k] |
| weights | Output: softmax-normalized weights for selected [k], sum to 1.0 |
Definition at line 134 of file topk_kernels.c.
140 if (!scores || !indices || !weights || n <= 0 || k <= 0) {
150 topk_f32(scores, n, k, indices, values);
154 float max_val = values[0];
155 for (
int i = 1; i < k; i++) {
156 if (values[i] > max_val) {
163 for (
int i = 0; i < k; i++) {
164 weights[i] = expf(values[i] - max_val);
169 float inv_sum = 1.0f / sum;
170 for (
int i = 0; i < k; i++) {
171 weights[i] *= inv_sum;
References topk_f32().
Referenced by topk_batched_f32().