← Back to C-Kernel-Engine Docs Doxygen Source Documentation
v6_inference.c File Reference

C-Kernel-Engine v6 Inference. More...

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <time.h>
#include <math.h>
#include <sys/mman.h>
#include <fcntl.h>
#include <unistd.h>
#include "ckernel_engine.h"
#include "ck_tokenizer.h"
#include "ck-kernel-inference.h"
#include "ck-kernel-prefill.h"

Go to the source code of this file.

Macros

#define _GNU_SOURCE
 
#define ANSI_BOLD   "\033[1m"
 
#define ANSI_CYAN   "\033[0;36m"
 
#define ANSI_DIM   "\033[2m"
 
#define ANSI_GREEN   "\033[0;32m"
 
#define ANSI_RESET   "\033[0m"
 
#define ANSI_YELLOW   "\033[0;33m"
 
#define CK_VERSION   "6.0.0"
 

Functions

static int load_manifest (const char *path, ManifestEntry **entries, int *num_entries)
 
static int load_weights (QWEN2_DECODEModel *model, const char *bump_path, const char *manifest_path)
 
int main (int argc, char **argv)
 
static void print_banner (void)
 
static int run_inference (const char *bump_path, const char *manifest_path, const char *tokenizer_path, const char *prompt, int max_tokens, float temperature, int topk)
 
static int sample_topk (float *probs, int vocab_size, int topk)
 

Detailed Description

C-Kernel-Engine v6 Inference.

Loads weights from BUMP file and runs real inference using generated model code with proper BPE tokenization.

Definition in file v6_inference.c.

Macro Definition Documentation

◆ _GNU_SOURCE

#define _GNU_SOURCE

Definition at line 9 of file v6_inference.c.

◆ ANSI_BOLD

#define ANSI_BOLD   "\033[1m"

Definition at line 28 of file v6_inference.c.

◆ ANSI_CYAN

#define ANSI_CYAN   "\033[0;36m"

Definition at line 32 of file v6_inference.c.

◆ ANSI_DIM

#define ANSI_DIM   "\033[2m"

Definition at line 29 of file v6_inference.c.

◆ ANSI_GREEN

#define ANSI_GREEN   "\033[0;32m"

Definition at line 30 of file v6_inference.c.

◆ ANSI_RESET

#define ANSI_RESET   "\033[0m"

Definition at line 27 of file v6_inference.c.

◆ ANSI_YELLOW

#define ANSI_YELLOW   "\033[0;33m"

Definition at line 31 of file v6_inference.c.

◆ CK_VERSION

#define CK_VERSION   "6.0.0"

Definition at line 24 of file v6_inference.c.

Function Documentation

◆ load_manifest()

static int load_manifest ( const char *  path,
ManifestEntry **  entries,
int *  num_entries 
)
static

Definition at line 63 of file v6_inference.c.

63  {
64  FILE *f = fopen(path, "r");
65  if (!f) {
66  fprintf(stderr, "Failed to open manifest: %s\n", path);
67  return -1;
68  }
69 
70  /* Read entire file */
71  fseek(f, 0, SEEK_END);
72  long size = ftell(f);
73  fseek(f, 0, SEEK_SET);
74 
75  char *content = malloc(size + 1);
76  fread(content, 1, size, f);
77  content[size] = '\0';
78  fclose(f);
79 
80  /* Count entries */
81  int count = 0;
82  char *p = content;
83  while ((p = strstr(p, "\"name\":")) != NULL) {
84  count++;
85  p += 7;
86  }
87 
88  *entries = malloc(count * sizeof(ManifestEntry));
89  *num_entries = count;
90 
91  /* Parse entries (simplified) */
92  p = content;
93  int idx = 0;
94  while ((p = strstr(p, "\"name\":")) != NULL && idx < count) {
95  char *start = strchr(p, '"') + 1;
96  char *end = strchr(start, '"');
97  size_t len = end - start;
98  if (len >= sizeof((*entries)[idx].name)) len = sizeof((*entries)[idx].name) - 1;
99  strncpy((*entries)[idx].name, start, len);
100  (*entries)[idx].name[len] = '\0';
101 
102  /* Find dtype */
103  char *dtype_p = strstr(p, "\"dtype\":");
104  if (dtype_p) {
105  char *d_start = strchr(dtype_p, '"') + 1;
106  char *d_end = strchr(d_start, '"');
107  size_t d_len = d_end - d_start;
108  if (d_len >= sizeof((*entries)[idx].dtype)) d_len = sizeof((*entries)[idx].dtype) - 1;
109  strncpy((*entries)[idx].dtype, d_start, d_len);
110  (*entries)[idx].dtype[d_len] = '\0';
111  }
112 
113  /* Find file_offset */
114  char *fo_p = strstr(p, "\"file_offset\":");
115  if (fo_p) {
116  sscanf(fo_p + 14, "%zu", &(*entries)[idx].file_offset);
117  }
118 
119  /* Find size */
120  char *size_p = strstr(p, "\"size\":");
121  if (size_p) {
122  sscanf(size_p + 7, "%zu", &(*entries)[idx].size);
123  }
124 
125  /* Find runtime_offset */
126  char *ro_p = strstr(p, "\"runtime_offset\":");
127  if (ro_p) {
128  sscanf(ro_p + 17, "%zu", &(*entries)[idx].runtime_offset);
129  }
130 
131  idx++;
132  p++;
133  }
134 
135  free(content);
136  return 0;
137 }
uint32_t end
Definition: utf8.c:215
uint32_t start
Definition: utf8.c:214

References end, and start.

Referenced by load_weights().

◆ load_weights()

static int load_weights ( QWEN2_DECODEModel *  model,
const char *  bump_path,
const char *  manifest_path 
)
static

Definition at line 140 of file v6_inference.c.

141  {
142  printf("[INFO] Loading weights from: %s\n", bump_path);
143 
144  /* Open BUMP file */
145  int fd = open(bump_path, O_RDONLY);
146  if (fd < 0) {
147  fprintf(stderr, "Failed to open BUMP file: %s\n", bump_path);
148  return -1;
149  }
150 
151  /* Get file size */
152  off_t file_size = lseek(fd, 0, SEEK_END);
153  lseek(fd, 0, SEEK_SET);
154  printf("[INFO] BUMP file size: %ld bytes\n", (long)file_size);
155 
156  /* Load manifest */
157  ManifestEntry *entries = NULL;
158  int num_entries = 0;
159  if (load_manifest(manifest_path, &entries, &num_entries) != 0) {
160  close(fd);
161  return -1;
162  }
163  printf("[INFO] Manifest entries: %d\n", num_entries);
164 
165  /* Load weights from BUMP using mmap for efficiency */
166  void *bump_base = mmap(NULL, file_size, PROT_READ, MAP_PRIVATE, fd, 0);
167  if (bump_base == MAP_FAILED) {
168  fprintf(stderr, "Failed to mmap BUMP file\n");
169  close(fd);
170  free(entries);
171  return -1;
172  }
173  close(fd);
174 
175  /* Copy each weight entry */
176  for (int i = 0; i < num_entries; i++) {
177  ManifestEntry *e = &entries[i];
178 
179  if (e->runtime_offset + e->size > model->total_bytes) {
180  fprintf(stderr, "Warning: Entry %s exceeds model memory\n", e->name);
181  continue;
182  }
183 
184  memcpy((char *)model->base + e->runtime_offset,
185  (char *)bump_base + e->file_offset,
186  e->size);
187  }
188 
189  munmap(bump_base, file_size);
190  free(entries);
191 
192  printf("[INFO] Weights loaded successfully\n");
193  return 0;
194 }
static int load_manifest(const char *path, ManifestEntry **entries, int *num_entries)
Definition: v6_inference.c:63

References load_manifest().

Referenced by run_inference().

◆ main()

int main ( int  argc,
char **  argv 
)

Definition at line 338 of file v6_inference.c.

338  {
339  const char *bump_path = NULL;
340  const char *manifest_path = NULL;
341  const char *tokenizer_path = NULL;
342  const char *prompt = "Hello";
343  int max_tokens = 100;
344  float temperature = 0.7f;
345  int topk = 40;
346 
347  /* Parse arguments */
348  for (int i = 1; i < argc; i++) {
349  if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0) {
350  print_banner();
351  printf("Usage: %s <weights.bump> [options]\n", argv[0]);
352  printf("\nOptions:\n");
353  printf(" -m, --model <file> Weights BUMP file\n");
354  printf(" -t, --tokenizer <file> Tokenizer JSON file\n");
355  printf(" -p, --prompt <text> Prompt (default: Hello)\n");
356  printf(" -n, --tokens <n> Max tokens (default: 100)\n");
357  printf(" --temp <float> Temperature (default: 0.7)\n");
358  printf(" --top-k <n> Top-k (default: 40)\n");
359  printf(" -h, --help Show help\n");
360  return 0;
361  }
362  else if ((strcmp(argv[i], "-m") == 0 || strcmp(argv[i], "--model") == 0) && i + 1 < argc) {
363  bump_path = argv[++i];
364  }
365  else if ((strcmp(argv[i], "-t") == 0 || strcmp(argv[i], "--tokenizer") == 0) && i + 1 < argc) {
366  tokenizer_path = argv[++i];
367  }
368  else if ((strcmp(argv[i], "-p") == 0 || strcmp(argv[i], "--prompt") == 0) && i + 1 < argc) {
369  prompt = argv[++i];
370  }
371  else if (strcmp(argv[i], "-n") == 0 && i + 1 < argc) {
372  max_tokens = atoi(argv[++i]);
373  }
374  else if (strcmp(argv[i], "--temp") == 0 && i + 1 < argc) {
375  temperature = atof(argv[++i]);
376  }
377  else if (strcmp(argv[i], "--top-k") == 0 && i + 1 < argc) {
378  topk = atoi(argv[++i]);
379  }
380  }
381 
382  if (!bump_path) {
383  print_banner();
384  fprintf(stderr, "Error: No weights file specified\n");
385  fprintf(stderr, "Usage: %s <weights.bump> [options]\n", argv[0]);
386  return 1;
387  }
388 
389  /* Default paths */
390  if (!manifest_path) {
391  manifest_path = "generated/weights_manifest.json";
392  }
393  if (!tokenizer_path) {
394  tokenizer_path = "generated/tokenizer.json";
395  }
396 
397  if (access(bump_path, F_OK) != 0) {
398  fprintf(stderr, "Error: Weights file not found: %s\n", bump_path);
399  return 1;
400  }
401 
402  if (access(tokenizer_path, F_OK) != 0) {
403  fprintf(stderr, "Error: Tokenizer not found: %s\n", tokenizer_path);
404  return 1;
405  }
406 
407  return run_inference(bump_path, manifest_path, tokenizer_path,
408  prompt, max_tokens, temperature, topk);
409 }
static int run_inference(const char *bump_path, const char *manifest_path, const char *tokenizer_path, const char *prompt, int max_tokens, float temperature, int topk)
Definition: v6_inference.c:197
static void print_banner(void)
Definition: v6_inference.c:323

References print_banner(), and run_inference().

◆ print_banner()

static void print_banner ( void  )
static

Definition at line 323 of file v6_inference.c.

323  {
324  printf(ANSI_CYAN);
325  printf("\n");
326  printf(" ____ _ _ _ _ _ _ _ _____ _ _ \n");
327  printf(" | _ \\| | | (_) | | (_) | | | |/ ___| | | |\n");
328  printf(" | |_) | |_ _ ___| |_ __ _| |_ ___ _____| | | |\\ `--.| |__ __| |\n");
329  printf(" | _ <| | | | |/ __| | |/ _` | __| \\ \\ / / _ \\ | | | |`--. \\ '_ \\ / _` |\n");
330  printf(" | |_) | | |_| | (__| | | (_| | |_| |\\ V / __/ |_| |/\\__/ / | | | (_| |\n");
331  printf(" |____/|_|\\__,_|\\___|_|_|\\__,_|\\__|_| \\_/ \\___|\\___|\\___/\\____/|_| |_|\\__,_|\n");
332  printf(ANSI_RESET);
333  printf(ANSI_DIM);
334  printf(" v%s - Native C Inference\n", CK_VERSION);
335  printf("\n");
336 }
#define ANSI_DIM
Definition: v6_inference.c:29
#define ANSI_CYAN
Definition: v6_inference.c:32
#define CK_VERSION
Definition: v6_inference.c:24
#define ANSI_RESET
Definition: v6_inference.c:27

References ANSI_CYAN, ANSI_DIM, ANSI_RESET, and CK_VERSION.

Referenced by main().

◆ run_inference()

static int run_inference ( const char *  bump_path,
const char *  manifest_path,
const char *  tokenizer_path,
const char *  prompt,
int  max_tokens,
float  temperature,
int  topk 
)
static

Definition at line 197 of file v6_inference.c.

199  {
200  printf(ANSI_CYAN);
201  printf("\n C-Kernel-Engine v6 Inference\n");
202  printf(ANSI_RESET);
203  printf("\n");
204 
205  /* Load tokenizer */
206  printf("[INFO] Loading tokenizer: %s\n", tokenizer_path);
207  CKTokenizer tokenizer;
208  if (ck_tokenizer_init(&tokenizer) != 0) {
209  fprintf(stderr, "Failed to init tokenizer\n");
210  return -1;
211  }
212 
213  if (ck_tokenizer_load(&tokenizer, tokenizer_path) != 0) {
214  fprintf(stderr, "Failed to load tokenizer: %s\n", tokenizer_path);
215  ck_tokenizer_free(&tokenizer);
216  return -1;
217  }
218  printf("[INFO] Tokenizer loaded: %d tokens\n", ck_tokenizer_vocab_size(&tokenizer));
219 
220  /* Allocate model */
221  QWEN2_DECODEModel model;
222  printf("[INFO] Allocating model (%zu bytes)...\n", (size_t)QWEN2_DECODE_TOTAL_BYTES);
223 
224  if (qwen2_decode_model_allocate(&model) != 0) {
225  fprintf(stderr, "Failed to allocate model\n");
226  ck_tokenizer_free(&tokenizer);
227  return -1;
228  }
229  printf("[INFO] Model allocated at %p\n", model.base);
230 
231  /* Load weights */
232  if (load_weights(&model, bump_path, manifest_path) != 0) {
233  qwen2_decode_model_free(&model);
234  ck_tokenizer_free(&tokenizer);
235  return -1;
236  }
237 
238  /* Tokenize prompt */
239  int32_t tokens[512];
240  int num_tokens = ck_tokenizer_encode(&tokenizer, prompt, -1, tokens, 512);
241  printf("[INFO] Tokenized prompt: %d tokens\n", num_tokens);
242 
243  if (num_tokens <= 0) {
244  fprintf(stderr, "Failed to tokenize prompt\n");
245  qwen2_decode_model_free(&model);
246  ck_tokenizer_free(&tokenizer);
247  return -1;
248  }
249 
250  /* Print tokenized input */
251  printf("[INFO] Input tokens: ");
252  for (int i = 0; i < num_tokens; i++) {
253  const char *tok_str = ck_tokenizer_id_to_token(&tokenizer, tokens[i]);
254  if (tok_str) {
255  printf("%d(%s) ", tokens[i], tok_str);
256  } else {
257  printf("%d(?) ", tokens[i]);
258  }
259  }
260  printf("\n");
261 
262  /* Run prefill */
263  printf("[INFO] Running prefill for %d tokens...\n", num_tokens);
264  qwen2_decode_forward(&model, tokens, num_tokens);
265 
266  /* Get logits for first token */
267  float *logits = (float *)((char *)model.base + QWEN2_DECODE_FOOTER.logits);
268 
269  /* Generate tokens */
270  printf("\n[INFO] Generating %d tokens...\n", max_tokens);
271  printf(ANSI_YELLOW);
272 
273  int eos_token = 151645; /* Qwen EOS */
274  int32_t token = tokens[num_tokens - 1];
275 
276  /* Buffer for decoded output */
277  char output_buffer[4096];
278  int output_pos = 0;
279  output_buffer[0] = '\0';
280 
281  for (int i = 0; i < max_tokens; i++) {
282  /* Run decode for this token */
283  qwen2_decode_decode(&model, &token, 0);
284 
285  /* Get logits */
286  logits = (float *)((char *)model.base + QWEN2_DECODE_FOOTER.logits);
287 
288  /* Sample next token */
289  int next_token = sample_topk(logits, QWEN2_DECODE_VOCAB_SIZE, topk);
290 
291  /* Get token string and append to output */
292  const char *tok_str = ck_tokenizer_id_to_token(&tokenizer, next_token);
293  if (tok_str) {
294  printf("%s", tok_str);
295  fflush(stdout);
296 
297  /* Add to output buffer */
298  size_t len = strlen(tok_str);
299  if (output_pos + len < sizeof(output_buffer) - 1) {
300  strcpy(output_buffer + output_pos, tok_str);
301  output_pos += len;
302  }
303  } else if (next_token == eos_token) {
304  printf(ANSI_RESET);
305  printf("\n[INFO] EOS token received\n");
306  break;
307  }
308 
309  token = next_token;
310  }
311 
312  printf(ANSI_RESET);
313  printf("\n\n[INFO] Inference complete\n");
314 
315  /* Cleanup */
316  qwen2_decode_model_free(&model);
317  ck_tokenizer_free(&tokenizer);
318 
319  return 0;
320 }
int ck_tokenizer_init(CKTokenizer *tok)
Definition: ck_tokenizer.c:148
const char * ck_tokenizer_id_to_token(const CKTokenizer *tok, int32_t id)
Definition: ck_tokenizer.c:239
int ck_tokenizer_load(CKTokenizer *tok, const char *path)
Definition: ck_tokenizer.c:432
int ck_tokenizer_encode(const CKTokenizer *tok, const char *text, int text_len, int32_t *ids, int max_ids)
Definition: ck_tokenizer.c:638
void ck_tokenizer_free(CKTokenizer *tok)
Definition: ck_tokenizer.c:183
static int ck_tokenizer_vocab_size(const CKTokenizer *tok)
Definition: ck_tokenizer.h:196
const char * token
Definition: tokenizer.h:306
static int load_weights(QWEN2_DECODEModel *model, const char *bump_path, const char *manifest_path)
Definition: v6_inference.c:140
static int sample_topk(float *probs, int vocab_size, int topk)
Definition: v6_inference.c:48
#define ANSI_YELLOW
Definition: v6_inference.c:31

References ANSI_CYAN, ANSI_RESET, ANSI_YELLOW, ck_tokenizer_encode(), ck_tokenizer_free(), ck_tokenizer_id_to_token(), ck_tokenizer_init(), ck_tokenizer_load(), ck_tokenizer_vocab_size(), load_weights(), sample_topk(), and token.

Referenced by main().

◆ sample_topk()

static int sample_topk ( float *  probs,
int  vocab_size,
int  topk 
)
static

Definition at line 48 of file v6_inference.c.

48  {
49  int start = vocab_size > topk ? vocab_size - topk : 0;
50  int best_idx = start;
51  float best_val = probs[start];
52 
53  for (int i = start + 1; i < vocab_size; i++) {
54  if (probs[i] > best_val) {
55  best_val = probs[i];
56  best_idx = i;
57  }
58  }
59  return best_idx;
60 }
int vocab_size
Definition: true_bpe.h:185

References start, and vocab_size.

Referenced by run_inference().