← Back to C-Kernel-Engine Docs Doxygen Source Documentation
v6.5_simple.c File Reference
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <time.h>
#include <math.h>
#include <omp.h>
#include "ckernel_engine.h"

Go to the source code of this file.

Macros

#define _GNU_SOURCE
 
#define ALIGN_CONTEXT   32768
 
#define ALIGN_EMBED   896
 
#define ALIGN_HEAD   64
 
#define MODEL_EMBED_DIM   896
 
#define MODEL_HEAD_DIM   64
 
#define MODEL_INTERMEDIATE   4864
 
#define MODEL_INTERMEDIATE_SIZE   4864
 
#define MODEL_MAX_SEQ_LEN   32768
 
#define MODEL_NUM_HEADS   14
 
#define MODEL_NUM_KV_HEADS   2
 
#define MODEL_NUM_LAYERS   24
 
#define MODEL_VOCAB_SIZE   128256
 

Functions

static void apply_rope (float *x, int seq_len, int head_dim)
 
static void gemm_nt (const float *input, const float *weight, float *output, int rows, int cols, int common)
 
int main (int argc, char **argv)
 
static void residual_add (float *residual, float *addend, int n)
 
static void silu (float *x, int n)
 
static void simple_attention (const float *q, const float *k, const float *v, float *output, int num_heads, int num_kv_heads, int seq_len, int head_dim)
 
static void simple_embedding (const int32_t *tokens, int num_tokens, const float *weight, float *output, int vocab_size, int embed_dim)
 
static void simple_rmsnorm (const float *input, const float *gamma, float *output, int tokens, int d_model, float eps)
 
static void softmax (float *x, int n)
 
void v6_prefill (const float *embed_weight, const int32_t *tokens, int num_tokens, float *logits)
 

Macro Definition Documentation

◆ _GNU_SOURCE

#define _GNU_SOURCE

Definition at line 12 of file v6.5_simple.c.

◆ ALIGN_CONTEXT

#define ALIGN_CONTEXT   32768

Definition at line 37 of file v6.5_simple.c.

◆ ALIGN_EMBED

#define ALIGN_EMBED   896

Definition at line 34 of file v6.5_simple.c.

◆ ALIGN_HEAD

#define ALIGN_HEAD   64

Definition at line 35 of file v6.5_simple.c.

◆ MODEL_EMBED_DIM

#define MODEL_EMBED_DIM   896

Definition at line 24 of file v6.5_simple.c.

◆ MODEL_HEAD_DIM

#define MODEL_HEAD_DIM   64

Definition at line 28 of file v6.5_simple.c.

◆ MODEL_INTERMEDIATE

#define MODEL_INTERMEDIATE   4864

Definition at line 36 of file v6.5_simple.c.

◆ MODEL_INTERMEDIATE_SIZE

#define MODEL_INTERMEDIATE_SIZE   4864

Definition at line 29 of file v6.5_simple.c.

◆ MODEL_MAX_SEQ_LEN

#define MODEL_MAX_SEQ_LEN   32768

Definition at line 31 of file v6.5_simple.c.

◆ MODEL_NUM_HEADS

#define MODEL_NUM_HEADS   14

Definition at line 26 of file v6.5_simple.c.

◆ MODEL_NUM_KV_HEADS

#define MODEL_NUM_KV_HEADS   2

Definition at line 27 of file v6.5_simple.c.

◆ MODEL_NUM_LAYERS

#define MODEL_NUM_LAYERS   24

Definition at line 25 of file v6.5_simple.c.

◆ MODEL_VOCAB_SIZE

#define MODEL_VOCAB_SIZE   128256

Definition at line 30 of file v6.5_simple.c.

Function Documentation

◆ apply_rope()

static void apply_rope ( float *  x,
int  seq_len,
int  head_dim 
)
static

Definition at line 173 of file v6.5_simple.c.

173  {
174  /* Simplified - just identity for now */
175  (void)x;
176  (void)seq_len;
177  (void)head_dim;
178 }

Referenced by v6_prefill().

◆ gemm_nt()

static void gemm_nt ( const float *  input,
const float *  weight,
float *  output,
int  rows,
int  cols,
int  common 
)
static

Definition at line 145 of file v6.5_simple.c.

146  {
147  for (int r = 0; r < rows; r++) {
148  for (int c = 0; c < cols; c++) {
149  float sum = 0.0f;
150  for (int k = 0; k < common; k++) {
151  sum += input[r * common + k] * weight[c * common + k];
152  }
153  output[r * cols + c] = sum;
154  }
155  }
156 }

Referenced by v6_prefill().

◆ main()

int main ( int  argc,
char **  argv 
)

Definition at line 295 of file v6.5_simple.c.

295  {
296  printf("=== V6 Simple CLI ===\n");
297  printf("Generic kernel implementation\n");
298  printf("OMP parallelization for prefill\n\n");
299 
300  if (argc < 2) {
301  printf("Usage: %s <weights.bin> [options]\n", argv[0]);
302  printf("\nOptions:\n");
303  printf(" -p, --prompt <text> Input prompt\n");
304  printf(" -t, --tokens <n> Max tokens (default: 50)\n");
305  printf(" -h, --help Show help\n");
306  return 1;
307  }
308 
309  const char *weights_path = argv[1];
310  const char *prompt = "Hello";
311  int max_tokens = 50;
312 
313  for (int i = 2; i < argc; i++) {
314  if (strcmp(argv[i], "-p") == 0 || strcmp(argv[i], "--prompt") == 0) {
315  prompt = argv[++i];
316  } else if (strcmp(argv[i], "-t") == 0 || strcmp(argv[i], "--tokens") == 0) {
317  max_tokens = atoi(argv[++i]);
318  } else if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0) {
319  printf("Usage: %s <weights.bin> [options]\n", argv[0]);
320  return 0;
321  }
322  }
323 
324  printf("Model: Qwen2 0.5B (generic kernels)\n");
325  printf("Prompt: %s\n", prompt);
326  printf("Max tokens: %d\n", max_tokens);
327  printf("\n[Note: This is a simplified v6 implementation using generic kernels]\n");
328  printf("[Real weights loading and inference would require full implementation]\n");
329 
330  /* Placeholder for actual inference */
331  printf("\nAssistant: (v6 placeholder - full implementation pending)\n");
332 
333  return 0;
334 }

◆ residual_add()

static void residual_add ( float *  residual,
float *  addend,
int  n 
)
static

Definition at line 166 of file v6.5_simple.c.

166  {
167  for (int i = 0; i < n; i++) {
168  residual[i] += addend[i];
169  }
170 }

Referenced by v6_prefill().

◆ silu()

static void silu ( float *  x,
int  n 
)
static

Definition at line 159 of file v6.5_simple.c.

159  {
160  for (int i = 0; i < n; i++) {
161  x[i] = x[i] / (1.0f + expf(-x[i]));
162  }
163 }

Referenced by v6_prefill().

◆ simple_attention()

static void simple_attention ( const float *  q,
const float *  k,
const float *  v,
float *  output,
int  num_heads,
int  num_kv_heads,
int  seq_len,
int  head_dim 
)
static

Definition at line 80 of file v6.5_simple.c.

82  {
83  int hidden_dim = num_heads * head_dim;
84 
85  /* For each head, compute attention */
86  for (int h = 0; h < num_heads; h++) {
87  const float *q_head = q + h * head_dim;
88  const float *k_head = k; /* KV heads repeated */
89  const float *v_head = v;
90 
91  /* Repeat K/V for GQA */
92  int repeat = num_heads / num_kv_heads;
93 
94  float *out_head = output + h * head_dim;
95 
96  /* Compute attention scores */
97  float scores[MODEL_MAX_SEQ_LEN];
98  for (int t = 0; t < seq_len; t++) {
99  float score = 0.0f;
100  for (int d = 0; d < head_dim; d++) {
101  score += q_head[d] * k_head[t * head_dim + d];
102  }
103  scores[t] = score / sqrtf((float)head_dim);
104  }
105 
106  /* Causal mask */
107  for (int t = 0; t < seq_len; t++) {
108  if (t >= seq_len - 1) {
109  scores[t] = scores[t]; /* Last token can attend to all */
110  } else {
111  scores[t] = -1e9f; /* Mask future tokens */
112  }
113  }
114 
115  /* Softmax */
116  softmax(scores, seq_len);
117 
118  /* Weighted sum */
119  for (int d = 0; d < head_dim; d++) {
120  float sum = 0.0f;
121  for (int t = 0; t < seq_len; t++) {
122  sum += scores[t] * v_head[t * head_dim + d];
123  }
124  out_head[d] = sum;
125  }
126  }
127 }
int32_t float * score
Definition: tokenizer.h:327
static void softmax(float *x, int n)
Definition: v6.5_simple.c:62
#define MODEL_MAX_SEQ_LEN
Definition: v6.5_simple.c:31

References MODEL_MAX_SEQ_LEN, score, and softmax().

Referenced by v6_prefill().

◆ simple_embedding()

static void simple_embedding ( const int32_t *  tokens,
int  num_tokens,
const float *  weight,
float *  output,
int  vocab_size,
int  embed_dim 
)
static

Definition at line 130 of file v6.5_simple.c.

132  {
133  for (int t = 0; t < num_tokens; t++) {
134  int token_id = tokens[t];
135  if (token_id >= 0 && token_id < vocab_size) {
136  memcpy(output + t * embed_dim, weight + token_id * embed_dim,
137  embed_dim * sizeof(float));
138  } else {
139  memset(output + t * embed_dim, 0, embed_dim * sizeof(float));
140  }
141  }
142 }
int vocab_size
Definition: true_bpe.h:185

References vocab_size.

Referenced by v6_prefill().

◆ simple_rmsnorm()

static void simple_rmsnorm ( const float *  input,
const float *  gamma,
float *  output,
int  tokens,
int  d_model,
float  eps 
)
static

Definition at line 40 of file v6.5_simple.c.

41  {
42  for (int t = 0; t < tokens; t++) {
43  const float *in_row = input + t * d_model;
44  float *out_row = output + t * d_model;
45 
46  /* Compute variance */
47  float variance = 0.0f;
48  for (int i = 0; i < d_model; i++) {
49  variance += in_row[i] * in_row[i];
50  }
51  variance /= d_model;
52 
53  /* Normalize */
54  float scale = 1.0f / sqrtf(variance + eps);
55  for (int i = 0; i < d_model; i++) {
56  out_row[i] = in_row[i] * gamma[i] * scale;
57  }
58  }
59 }

Referenced by v6_prefill().

◆ softmax()

static void softmax ( float *  x,
int  n 
)
static

Definition at line 62 of file v6.5_simple.c.

62  {
63  float max_val = x[0];
64  for (int i = 1; i < n; i++) {
65  if (x[i] > max_val) max_val = x[i];
66  }
67 
68  float sum = 0.0f;
69  for (int i = 0; i < n; i++) {
70  x[i] = expf(x[i] - max_val);
71  sum += x[i];
72  }
73 
74  for (int i = 0; i < n; i++) {
75  x[i] /= sum;
76  }
77 }

Referenced by simple_attention().

◆ v6_prefill()

void v6_prefill ( const float *  embed_weight,
const int32_t *  tokens,
int  num_tokens,
float *  logits 
)

Definition at line 181 of file v6.5_simple.c.

182  {
183  if (!embed_weight || !tokens || num_tokens <= 0) return;
184 
185  /* Allocate buffers */
186  const int embed_dim = ALIGN_EMBED;
187  const int intermediate = MODEL_INTERMEDIATE;
188  const int num_layers = MODEL_NUM_LAYERS;
189  const int num_heads = MODEL_NUM_HEADS;
190  const int num_kv_heads = MODEL_NUM_KV_HEADS;
191  const int head_dim = MODEL_HEAD_DIM;
192 
193  /* Per-token hidden states: (num_tokens) x (num_layers + 1) x embed_dim */
194  float *hidden = malloc(num_tokens * (num_layers + 1) * embed_dim * sizeof(float));
195  if (!hidden) {
196  fprintf(stderr, "Failed to allocate hidden states\n");
197  return;
198  }
199 
200  /* Temporary buffers per layer */
201  float *q = malloc(num_heads * head_dim * sizeof(float));
202  float *k = malloc(num_kv_heads * head_dim * sizeof(float));
203  float *v = malloc(num_kv_heads * head_dim * sizeof(float));
204  float *attn = malloc(num_heads * head_dim * sizeof(float));
205  float *mlp = malloc(intermediate * sizeof(float));
206 
207  if (!q || !k || !v || !attn || !mlp) {
208  fprintf(stderr, "Failed to allocate temp buffers\n");
209  free(hidden);
210  free(q);
211  free(k);
212  free(v);
213  free(attn);
214  free(mlp);
215  return;
216  }
217 
218  /* Dummy layer weights (in real impl, these come from mapped memory) */
219  const float *ln1_gamma = NULL; /* Would come from weights */
220  const float *ln2_gamma = NULL;
221  const float *wq = NULL, *wk = NULL, *wv = NULL, *wo = NULL;
222  const float *w1 = NULL, *w2 = NULL;
223 
224  /* OMP parallel for over tokens */
225  #pragma omp parallel for schedule(dynamic, 1)
226  for (int t = 0; t < num_tokens; t++) {
227  float *h = hidden + t * (num_layers + 1) * embed_dim;
228 
229  /* Embedding lookup */
230  simple_embedding(tokens + t, 1, embed_weight, h, MODEL_VOCAB_SIZE, embed_dim);
231 
232  /* Process through layers */
233  for (int layer = 0; layer < num_layers; layer++) {
234  float *layer_in = h;
235  float *layer_out = h + embed_dim;
236 
237  /* RMSNorm */
238  simple_rmsnorm(layer_in, ln1_gamma, layer_in, 1, embed_dim, 1e-6f);
239 
240  /* QKV projection */
241  gemm_nt(layer_in, wq, q, 1, num_heads * head_dim, embed_dim);
242  gemm_nt(layer_in, wk, k, 1, num_kv_heads * head_dim, embed_dim);
243  gemm_nt(layer_in, wv, v, 1, num_kv_heads * head_dim, embed_dim);
244 
245  /* RoPE */
246  apply_rope(q, 1, head_dim);
247  apply_rope(k, 1, head_dim);
248 
249  /* Attention */
250  simple_attention(q, k, v, attn, num_heads, num_kv_heads, 1, head_dim);
251 
252  /* Output projection */
253  gemm_nt(attn, wo, layer_out, 1, embed_dim, num_heads * head_dim);
254 
255  /* Residual */
256  residual_add(layer_in, layer_out, embed_dim);
257 
258  /* RMSNorm before MLP */
259  simple_rmsnorm(layer_in, ln2_gamma, layer_in, 1, embed_dim, 1e-6f);
260 
261  /* MLP */
262  gemm_nt(layer_in, w1, mlp, 1, 2 * intermediate, embed_dim);
263  silu(mlp, 2 * intermediate);
264  gemm_nt(mlp, w2, layer_out, 1, embed_dim, intermediate);
265 
266  /* Residual */
267  residual_add(layer_in, layer_out, embed_dim);
268  }
269 
270  /* Copy to output area */
271  memcpy(hidden + t * (num_layers + 1) * embed_dim +
272  num_layers * embed_dim, h, embed_dim * sizeof(float));
273  }
274 
275  /* Final RMSNorm over all tokens */
276  float *final_out = malloc(num_tokens * embed_dim * sizeof(float));
277  if (final_out) {
278  simple_rmsnorm(hidden + num_layers * embed_dim, ln1_gamma, final_out,
279  num_tokens, embed_dim, 1e-6f);
280 
281  /* LM head */
282  gemm_nt(final_out, embed_weight, logits, num_tokens, MODEL_VOCAB_SIZE, embed_dim);
283 
284  free(final_out);
285  }
286 
287  free(hidden);
288  free(q);
289  free(k);
290  free(v);
291  free(attn);
292  free(mlp);
293 }
static void residual_add(float *residual, float *addend, int n)
Definition: v6.5_simple.c:166
#define MODEL_VOCAB_SIZE
Definition: v6.5_simple.c:30
static void simple_embedding(const int32_t *tokens, int num_tokens, const float *weight, float *output, int vocab_size, int embed_dim)
Definition: v6.5_simple.c:130
#define MODEL_INTERMEDIATE
Definition: v6.5_simple.c:36
static void simple_attention(const float *q, const float *k, const float *v, float *output, int num_heads, int num_kv_heads, int seq_len, int head_dim)
Definition: v6.5_simple.c:80
#define MODEL_NUM_LAYERS
Definition: v6.5_simple.c:25
static void silu(float *x, int n)
Definition: v6.5_simple.c:159
static void gemm_nt(const float *input, const float *weight, float *output, int rows, int cols, int common)
Definition: v6.5_simple.c:145
#define MODEL_HEAD_DIM
Definition: v6.5_simple.c:28
#define MODEL_NUM_KV_HEADS
Definition: v6.5_simple.c:27
static void simple_rmsnorm(const float *input, const float *gamma, float *output, int tokens, int d_model, float eps)
Definition: v6.5_simple.c:40
#define MODEL_NUM_HEADS
Definition: v6.5_simple.c:26
static void apply_rope(float *x, int seq_len, int head_dim)
Definition: v6.5_simple.c:173
#define ALIGN_EMBED
Definition: v6.5_simple.c:34

References ALIGN_EMBED, apply_rope(), gemm_nt(), MODEL_HEAD_DIM, MODEL_INTERMEDIATE, MODEL_NUM_HEADS, MODEL_NUM_KV_HEADS, MODEL_NUM_LAYERS, MODEL_VOCAB_SIZE, residual_add(), silu(), simple_attention(), simple_embedding(), and simple_rmsnorm().