← Back to C-Kernel-Engine Docs Doxygen Source Documentation
v6.6_inference.c File Reference
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <time.h>
#include <math.h>
#include <sys/mman.h>
#include <fcntl.h>
#include <unistd.h>
#include "ckernel_engine.h"
#include "tokenizer/true_bpe.h"
#include "ck-kernel-inference.h"
#include "ck-kernel-prefill.h"

Go to the source code of this file.

Macros

#define _GNU_SOURCE
 
#define ANSI_BOLD   "\033[1m"
 
#define ANSI_CYAN   "\033[0;36m"
 
#define ANSI_DIM   "\033[2m"
 
#define ANSI_GREEN   "\033[0;32m"
 
#define ANSI_RESET   "\033[0m"
 
#define ANSI_YELLOW   "\033[0;33m"
 
#define CK_VERSION   "6.0.0"
 

Functions

static int load_manifest (const char *path, ManifestEntry **entries, int *num_entries)
 
static int load_weights (QWEN2_DECODEModel *model, const char *bump_path, const char *manifest_path)
 
int main (int argc, char **argv)
 
static void print_banner (void)
 
static int run_inference (const char *bump_path, const char *manifest_path, const char *tokenizer_path, const char *prompt, int max_tokens, float temperature, int topk)
 
static int sample_topk (float *probs, int vocab_size, int topk)
 

Macro Definition Documentation

◆ _GNU_SOURCE

#define _GNU_SOURCE

Definition at line 9 of file v6.6_inference.c.

◆ ANSI_BOLD

#define ANSI_BOLD   "\033[1m"

Definition at line 28 of file v6.6_inference.c.

◆ ANSI_CYAN

#define ANSI_CYAN   "\033[0;36m"

Definition at line 32 of file v6.6_inference.c.

◆ ANSI_DIM

#define ANSI_DIM   "\033[2m"

Definition at line 29 of file v6.6_inference.c.

◆ ANSI_GREEN

#define ANSI_GREEN   "\033[0;32m"

Definition at line 30 of file v6.6_inference.c.

◆ ANSI_RESET

#define ANSI_RESET   "\033[0m"

Definition at line 27 of file v6.6_inference.c.

◆ ANSI_YELLOW

#define ANSI_YELLOW   "\033[0;33m"

Definition at line 31 of file v6.6_inference.c.

◆ CK_VERSION

#define CK_VERSION   "6.0.0"

Definition at line 24 of file v6.6_inference.c.

Function Documentation

◆ load_manifest()

static int load_manifest ( const char *  path,
ManifestEntry **  entries,
int *  num_entries 
)
static

Definition at line 63 of file v6.6_inference.c.

63  {
64  FILE *f = fopen(path, "r");
65  if (!f) {
66  fprintf(stderr, "Failed to open manifest: %s\n", path);
67  return -1;
68  }
69 
70  /* Read entire file */
71  fseek(f, 0, SEEK_END);
72  long size = ftell(f);
73  fseek(f, 0, SEEK_SET);
74 
75  char *content = malloc(size + 1);
76  fread(content, 1, size, f);
77  content[size] = '\0';
78  fclose(f);
79 
80  /* Count entries */
81  int count = 0;
82  char *p = content;
83  while ((p = strstr(p, "\"name\":")) != NULL) {
84  count++;
85  p += 7;
86  }
87 
88  *entries = malloc(count * sizeof(ManifestEntry));
89  *num_entries = count;
90 
91  /* Parse entries (simplified) */
92  p = content;
93  int idx = 0;
94  while ((p = strstr(p, "\"name\":")) != NULL && idx < count) {
95  char *start = strchr(p, '"') + 1;
96  char *end = strchr(start, '"');
97  size_t len = end - start;
98  if (len >= sizeof((*entries)[idx].name)) len = sizeof((*entries)[idx].name) - 1;
99  strncpy((*entries)[idx].name, start, len);
100  (*entries)[idx].name[len] = '\0';
101 
102  /* Find dtype */
103  char *dtype_p = strstr(p, "\"dtype\":");
104  if (dtype_p) {
105  char *d_start = strchr(dtype_p, '"') + 1;
106  char *d_end = strchr(d_start, '"');
107  size_t d_len = d_end - d_start;
108  if (d_len >= sizeof((*entries)[idx].dtype)) d_len = sizeof((*entries)[idx].dtype) - 1;
109  strncpy((*entries)[idx].dtype, d_start, d_len);
110  (*entries)[idx].dtype[d_len] = '\0';
111  }
112 
113  /* Find file_offset */
114  char *fo_p = strstr(p, "\"file_offset\":");
115  if (fo_p) {
116  sscanf(fo_p + 14, "%zu", &(*entries)[idx].file_offset);
117  }
118 
119  /* Find size */
120  char *size_p = strstr(p, "\"size\":");
121  if (size_p) {
122  sscanf(size_p + 7, "%zu", &(*entries)[idx].size);
123  }
124 
125  /* Find runtime_offset */
126  char *ro_p = strstr(p, "\"runtime_offset\":");
127  if (ro_p) {
128  sscanf(ro_p + 17, "%zu", &(*entries)[idx].runtime_offset);
129  }
130 
131  idx++;
132  p++;
133  }
134 
135  free(content);
136  return 0;
137 }
uint32_t end
Definition: utf8.c:215
uint32_t start
Definition: utf8.c:214

References end, and start.

Referenced by load_weights().

◆ load_weights()

static int load_weights ( QWEN2_DECODEModel *  model,
const char *  bump_path,
const char *  manifest_path 
)
static

Definition at line 140 of file v6.6_inference.c.

141  {
142  printf("[INFO] Loading weights from: %s\n", bump_path);
143 
144  /* Open BUMP file */
145  int fd = open(bump_path, O_RDONLY);
146  if (fd < 0) {
147  fprintf(stderr, "Failed to open BUMP file: %s\n", bump_path);
148  return -1;
149  }
150 
151  /* Get file size */
152  off_t file_size = lseek(fd, 0, SEEK_END);
153  lseek(fd, 0, SEEK_SET);
154  printf("[INFO] BUMP file size: %ld bytes\n", (long)file_size);
155 
156  /* Load manifest */
157  ManifestEntry *entries = NULL;
158  int num_entries = 0;
159  if (load_manifest(manifest_path, &entries, &num_entries) != 0) {
160  close(fd);
161  return -1;
162  }
163  printf("[INFO] Manifest entries: %d\n", num_entries);
164 
165  /* Load weights from BUMP using mmap for efficiency */
166  void *bump_base = mmap(NULL, file_size, PROT_READ, MAP_PRIVATE, fd, 0);
167  if (bump_base == MAP_FAILED) {
168  fprintf(stderr, "Failed to mmap BUMP file\n");
169  close(fd);
170  free(entries);
171  return -1;
172  }
173  close(fd);
174 
175  /* Copy each weight entry */
176  for (int i = 0; i < num_entries; i++) {
177  ManifestEntry *e = &entries[i];
178 
179  if (e->runtime_offset + e->size > model->total_bytes) {
180  fprintf(stderr, "Warning: Entry %s exceeds model memory\n", e->name);
181  continue;
182  }
183 
184  memcpy((char *)model->base + e->runtime_offset,
185  (char *)bump_base + e->file_offset,
186  e->size);
187  }
188 
189  munmap(bump_base, file_size);
190  free(entries);
191 
192  printf("[INFO] Weights loaded successfully\n");
193  return 0;
194 }
static int load_manifest(const char *path, ManifestEntry **entries, int *num_entries)

References load_manifest().

Referenced by run_inference().

◆ main()

int main ( int  argc,
char **  argv 
)

Definition at line 356 of file v6.6_inference.c.

356  {
357  const char *bump_path = NULL;
358  const char *manifest_path = NULL;
359  const char *tokenizer_path = NULL;
360  const char *prompt = "Hello";
361  int max_tokens = 100;
362  float temperature = 0.7f;
363  int topk = 40;
364 
365  /* Parse arguments */
366  for (int i = 1; i < argc; i++) {
367  if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0) {
368  print_banner();
369  printf("Usage: %s <weights.bump> [options]\n", argv[0]);
370  printf("\nOptions:\n");
371  printf(" -m, --model <file> Weights BUMP file\n");
372  printf(" -t, --tokenizer <file> (unused) tokenizer is loaded from weights\n");
373  printf(" -p, --prompt <text> Prompt (default: Hello)\n");
374  printf(" -n, --tokens <n> Max tokens (default: 100)\n");
375  printf(" --temp <float> Temperature (default: 0.7)\n");
376  printf(" --top-k <n> Top-k (default: 40)\n");
377  printf(" -h, --help Show help\n");
378  return 0;
379  }
380  else if ((strcmp(argv[i], "-m") == 0 || strcmp(argv[i], "--model") == 0) && i + 1 < argc) {
381  bump_path = argv[++i];
382  }
383  else if ((strcmp(argv[i], "-t") == 0 || strcmp(argv[i], "--tokenizer") == 0) && i + 1 < argc) {
384  tokenizer_path = argv[++i];
385  }
386  else if ((strcmp(argv[i], "-p") == 0 || strcmp(argv[i], "--prompt") == 0) && i + 1 < argc) {
387  prompt = argv[++i];
388  }
389  else if (strcmp(argv[i], "-n") == 0 && i + 1 < argc) {
390  max_tokens = atoi(argv[++i]);
391  }
392  else if (strcmp(argv[i], "--temp") == 0 && i + 1 < argc) {
393  temperature = atof(argv[++i]);
394  }
395  else if (strcmp(argv[i], "--top-k") == 0 && i + 1 < argc) {
396  topk = atoi(argv[++i]);
397  }
398  }
399 
400  if (!bump_path) {
401  print_banner();
402  fprintf(stderr, "Error: No weights file specified\n");
403  fprintf(stderr, "Usage: %s <weights.bump> [options]\n", argv[0]);
404  return 1;
405  }
406 
407  /* Default paths */
408  if (!manifest_path) {
409  manifest_path = "generated/weights_manifest.json";
410  }
411  if (access(bump_path, F_OK) != 0) {
412  fprintf(stderr, "Error: Weights file not found: %s\n", bump_path);
413  return 1;
414  }
415 
416  return run_inference(bump_path, manifest_path, tokenizer_path,
417  prompt, max_tokens, temperature, topk);
418 }
static int run_inference(const char *bump_path, const char *manifest_path, const char *tokenizer_path, const char *prompt, int max_tokens, float temperature, int topk)
static void print_banner(void)

References print_banner(), and run_inference().

◆ print_banner()

static void print_banner ( void  )
static

Definition at line 341 of file v6.6_inference.c.

341  {
342  printf(ANSI_CYAN);
343  printf("\n");
344  printf(" ____ _ _ _ _ _ _ _ _____ _ _ \n");
345  printf(" | _ \\| | | (_) | | (_) | | | |/ ___| | | |\n");
346  printf(" | |_) | |_ _ ___| |_ __ _| |_ ___ _____| | | |\\ `--.| |__ __| |\n");
347  printf(" | _ <| | | | |/ __| | |/ _` | __| \\ \\ / / _ \\ | | | |`--. \\ '_ \\ / _` |\n");
348  printf(" | |_) | | |_| | (__| | | (_| | |_| |\\ V / __/ |_| |/\\__/ / | | | (_| |\n");
349  printf(" |____/|_|\\__,_|\\___|_|_|\\__,_|\\__|_| \\_/ \\___|\\___|\\___/\\____/|_| |_|\\__,_|\n");
350  printf(ANSI_RESET);
351  printf(ANSI_DIM);
352  printf(" v%s - Native C Inference\n", CK_VERSION);
353  printf("\n");
354 }
#define ANSI_DIM
#define ANSI_CYAN
#define CK_VERSION
#define ANSI_RESET

References ANSI_CYAN, ANSI_DIM, ANSI_RESET, and CK_VERSION.

Referenced by main().

◆ run_inference()

static int run_inference ( const char *  bump_path,
const char *  manifest_path,
const char *  tokenizer_path,
const char *  prompt,
int  max_tokens,
float  temperature,
int  topk 
)
static

Definition at line 197 of file v6.6_inference.c.

199  {
200  printf(ANSI_CYAN);
201  printf("\n C-Kernel-Engine v6 Inference\n");
202  printf(ANSI_RESET);
203  printf("\n");
204 
205  (void)tokenizer_path;
206 
207  CKTrueBPE *tokenizer = ck_true_bpe_create();
208  if (!tokenizer) {
209  fprintf(stderr, "Failed to init tokenizer\n");
210  return -1;
211  }
212 
213  /* Allocate model */
214  QWEN2_DECODEModel model;
215  printf("[INFO] Allocating model (%zu bytes)...\n", (size_t)QWEN2_DECODE_TOTAL_BYTES);
216 
217  if (qwen2_decode_model_allocate(&model) != 0) {
218  fprintf(stderr, "Failed to allocate model\n");
219  ck_true_bpe_free(tokenizer);
220  return -1;
221  }
222  printf("[INFO] Model allocated at %p\n", model.base);
223 
224  /* Load weights */
225  if (load_weights(&model, bump_path, manifest_path) != 0) {
226  qwen2_decode_model_free(&model);
227  ck_tokenizer_free(&tokenizer);
228  return -1;
229  }
230 
231  /* Tokenize prompt */
232  const int vocab_size = QWEN2_DECODE_VOCAB_SIZE;
233  const int num_merges = QWEN2_DECODE_NUM_MERGES;
234  const int vocab_bytes = QWEN2_DECODE_TOTAL_VOCAB_BYTES;
235  if (vocab_size <= 0 || vocab_bytes <= 0) {
236  fprintf(stderr, "Tokenizer data missing in model\n");
237  qwen2_decode_model_free(&model);
238  ck_true_bpe_free(tokenizer);
239  return -1;
240  }
241 
242  const int32_t *vocab_offsets = (const int32_t *)QWEN2_DECODE_PTR(&model, QWEN2_DECODE_HEADER.vocab_offsets);
243  const char *vocab_strings = (const char *)((char *)model.base + QWEN2_DECODE_HEADER.vocab_strings);
244  const int32_t *vocab_merges = num_merges > 0
245  ? (const int32_t *)((char *)model.base + QWEN2_DECODE_HEADER.vocab_merges)
246  : NULL;
247 
248  if (ck_true_bpe_load_binary(tokenizer, vocab_size, vocab_offsets, vocab_strings, num_merges, vocab_merges) != 0) {
249  fprintf(stderr, "Failed to load tokenizer from model\n");
250  qwen2_decode_model_free(&model);
251  ck_true_bpe_free(tokenizer);
252  return -1;
253  }
254 
255  printf("[INFO] Tokenizer loaded: %d tokens\n", vocab_size);
256 
257  int32_t tokens[512];
258  int num_tokens = ck_true_bpe_encode(tokenizer, prompt, -1, tokens, 512);
259  printf("[INFO] Tokenized prompt: %d tokens\n", num_tokens);
260 
261  if (num_tokens <= 0) {
262  fprintf(stderr, "Failed to tokenize prompt\n");
263  qwen2_decode_model_free(&model);
264  ck_true_bpe_free(tokenizer);
265  return -1;
266  }
267 
268  /* Print tokenized input */
269  printf("[INFO] Input tokens: ");
270  for (int i = 0; i < num_tokens; i++) {
271  const char *tok_str = ck_true_bpe_id_to_token(tokenizer, tokens[i]);
272  if (tok_str) {
273  printf("%d(%s) ", tokens[i], tok_str);
274  } else {
275  printf("%d(?) ", tokens[i]);
276  }
277  }
278  printf("\n");
279 
280  /* Run prefill */
281  printf("[INFO] Running prefill for %d tokens...\n", num_tokens);
282  qwen2_decode_forward(&model, tokens, num_tokens);
283 
284  /* Get logits for first token */
285  float *logits = (float *)((char *)model.base + QWEN2_DECODE_FOOTER.logits);
286 
287  /* Generate tokens */
288  printf("\n[INFO] Generating %d tokens...\n", max_tokens);
289  printf(ANSI_YELLOW);
290 
291  int eos_token = 151645; /* Qwen EOS */
292  int32_t token = tokens[num_tokens - 1];
293 
294  /* Buffer for decoded output */
295  char output_buffer[4096];
296  int output_pos = 0;
297  output_buffer[0] = '\0';
298 
299  for (int i = 0; i < max_tokens; i++) {
300  /* Run decode for this token */
301  qwen2_decode_decode(&model, &token, 0);
302 
303  /* Get logits */
304  logits = (float *)((char *)model.base + QWEN2_DECODE_FOOTER.logits);
305 
306  /* Sample next token */
307  int next_token = sample_topk(logits, QWEN2_DECODE_VOCAB_SIZE, topk);
308 
309  /* Get token string and append to output */
310  const char *tok_str = ck_true_bpe_id_to_token(tokenizer, next_token);
311  if (tok_str) {
312  printf("%s", tok_str);
313  fflush(stdout);
314 
315  /* Add to output buffer */
316  size_t len = strlen(tok_str);
317  if (output_pos + len < sizeof(output_buffer) - 1) {
318  strcpy(output_buffer + output_pos, tok_str);
319  output_pos += len;
320  }
321  } else if (next_token == eos_token) {
322  printf(ANSI_RESET);
323  printf("\n[INFO] EOS token received\n");
324  break;
325  }
326 
327  token = next_token;
328  }
329 
330  printf(ANSI_RESET);
331  printf("\n\n[INFO] Inference complete\n");
332 
333  /* Cleanup */
334  qwen2_decode_model_free(&model);
335  ck_true_bpe_free(tokenizer);
336 
337  return 0;
338 }
void ck_tokenizer_free(CKTokenizer *tok)
Definition: ck_tokenizer.c:183
const char * token
Definition: tokenizer.h:306
int ck_true_bpe_encode(CKTrueBPE *bpe, const char *text, int text_len, int32_t *ids, int max_ids)
Definition: true_bpe.c:1338
void ck_true_bpe_free(CKTrueBPE *bpe)
Definition: true_bpe.c:405
CKTrueBPE * ck_true_bpe_create(void)
Definition: true_bpe.c:342
int ck_true_bpe_load_binary(CKTrueBPE *bpe, int vocab_size, const int32_t *offsets, const char *strings, int num_merges, const int32_t *merges)
Definition: true_bpe.c:606
const char * ck_true_bpe_id_to_token(const CKTrueBPE *bpe, int32_t id)
Definition: true_bpe.c:645
int const int32_t const char int num_merges
Definition: true_bpe.h:188
int vocab_size
Definition: true_bpe.h:185
static int load_weights(QWEN2_DECODEModel *model, const char *bump_path, const char *manifest_path)
static int sample_topk(float *probs, int vocab_size, int topk)
#define ANSI_YELLOW

References ANSI_CYAN, ANSI_RESET, ANSI_YELLOW, ck_tokenizer_free(), ck_true_bpe_create(), ck_true_bpe_encode(), ck_true_bpe_free(), ck_true_bpe_id_to_token(), ck_true_bpe_load_binary(), load_weights(), num_merges, sample_topk(), token, and vocab_size.

Referenced by main().

◆ sample_topk()

static int sample_topk ( float *  probs,
int  vocab_size,
int  topk 
)
static

Definition at line 48 of file v6.6_inference.c.

48  {
49  int start = vocab_size > topk ? vocab_size - topk : 0;
50  int best_idx = start;
51  float best_val = probs[start];
52 
53  for (int i = start + 1; i < vocab_size; i++) {
54  if (probs[i] > best_val) {
55  best_val = probs[i];
56  best_idx = i;
57  }
58  }
59  return best_idx;
60 }

References start, and vocab_size.

Referenced by run_inference().