← Back to C-Kernel-Engine Docs Doxygen Source Documentation
v6.6/test_generic_api.c
Go to the documentation of this file.
1 /**
2  * @file test_generic_api.c
3  * @brief Generic test/benchmark harness using ck_model_* API
4  *
5  * This file works with ANY model - just link with different inference.c
6  *
7  * Build:
8  * gcc test_generic_api.c inference.c kernels/*.c -o test_bench -lm -lpthread
9  *
10  * Run:
11  * ./test_bench --weights weights.bump --benchmark 100
12  */
13 
14 #include <stdio.h>
15 #include <stdlib.h>
16 #include <string.h>
17 #include <time.h>
18 #include <sys/time.h>
19 #include <fcntl.h>
20 #include <unistd.h>
21 #include <sys/mman.h>
22 #include <sys/stat.h>
23 
24 #include "ck_model_api.h"
25 
26 /* ============================================================================
27  * TIMING UTILITIES
28  * ============================================================================ */
29 
30 static double get_time_ms(void) {
31  struct timeval tv;
32  gettimeofday(&tv, NULL);
33  return tv.tv_sec * 1000.0 + tv.tv_usec / 1000.0;
34 }
35 
36 /* ============================================================================
37  * WEIGHT LOADING
38  * ============================================================================ */
39 
40 static int load_weights_from_bump(void *model, const char *bump_path) {
41  int fd = open(bump_path, O_RDONLY);
42  if (fd < 0) {
43  fprintf(stderr, "[ERROR] Cannot open: %s\n", bump_path);
44  return -1;
45  }
46 
47  struct stat st;
48  if (fstat(fd, &st) < 0) {
49  close(fd);
50  return -1;
51  }
52 
53  size_t file_size = st.st_size;
54  size_t model_bytes = ck_model_get_total_bytes(model);
55  void *base = ck_model_get_base(model);
56 
57  /* Map file */
58  void *mapped = mmap(NULL, file_size, PROT_READ, MAP_PRIVATE, fd, 0);
59  close(fd);
60 
61  if (mapped == MAP_FAILED) {
62  fprintf(stderr, "[ERROR] mmap failed\n");
63  return -1;
64  }
65 
66  /* Copy weights (skip 64-byte header in BUMP file) */
67  size_t header_size = 64;
68  size_t weight_bytes = ck_model_get_config()->weight_bytes;
69 
70  if (file_size < header_size + weight_bytes) {
71  fprintf(stderr, "[ERROR] BUMP file too small: %zu < %zu\n",
72  file_size, header_size + weight_bytes);
73  munmap(mapped, file_size);
74  return -1;
75  }
76 
77  /* Copy weights to model base (after header) */
78  memcpy((char*)base + header_size, (char*)mapped + header_size, weight_bytes);
79 
80  munmap(mapped, file_size);
81  printf("[INFO] Loaded %zu bytes from %s\n", weight_bytes, bump_path);
82  return 0;
83 }
84 
85 /* ============================================================================
86  * SAMPLING
87  * ============================================================================ */
88 
89 static int sample_argmax(const float *logits, int vocab_size) {
90  int best_idx = 0;
91  float best_val = logits[0];
92  for (int i = 1; i < vocab_size; i++) {
93  if (logits[i] > best_val) {
94  best_val = logits[i];
95  best_idx = i;
96  }
97  }
98  return best_idx;
99 }
100 
101 /* ============================================================================
102  * BENCHMARK
103  * ============================================================================ */
104 
105 static void run_benchmark(void *model, int num_tokens) {
106  const CKModelConfig *cfg = ck_model_get_config();
107 
108  printf("\n");
109  printf("============================================\n");
110  printf(" BENCHMARK: %s\n", cfg->model_name);
111  printf("============================================\n");
112  printf(" Layers: %d\n", cfg->num_layers);
113  printf(" Embed dim: %d\n", cfg->embed_dim);
114  printf(" Heads: %d (KV: %d)\n", cfg->num_heads, cfg->num_kv_heads);
115  printf(" Vocab: %d\n", cfg->vocab_size);
116  printf(" Tokens: %d\n", num_tokens);
117  printf("============================================\n\n");
118 
119  /* Warmup */
120  printf("[WARMUP] Running 3 warmup iterations...\n");
121  int token = 1;
122  for (int i = 0; i < 3; i++) {
123  ck_model_decode(model, &token, i);
124  }
125 
126  /* Benchmark decode */
127  printf("[BENCH] Running %d decode iterations...\n", num_tokens);
128 
129  double start = get_time_ms();
130  for (int i = 0; i < num_tokens; i++) {
131  ck_model_decode(model, &token, i + 3); /* Offset by warmup */
132  }
133  double end = get_time_ms();
134 
135  double elapsed_ms = end - start;
136  double tokens_per_sec = num_tokens / (elapsed_ms / 1000.0);
137  double ms_per_token = elapsed_ms / num_tokens;
138 
139  printf("\n");
140  printf("============================================\n");
141  printf(" RESULTS\n");
142  printf("============================================\n");
143  printf(" Total time: %.2f ms\n", elapsed_ms);
144  printf(" Tokens/sec: %.2f\n", tokens_per_sec);
145  printf(" ms/token: %.2f\n", ms_per_token);
146  printf("============================================\n");
147 
148  /* Verify canaries */
149  int errors = ck_model_verify_canaries(model);
150  if (errors > 0) {
151  printf("[WARN] %d canary corruptions detected!\n", errors);
152  } else {
153  printf("[OK] Memory canaries intact\n");
154  }
155 }
156 
157 /* ============================================================================
158  * SIMPLE GENERATION TEST
159  * ============================================================================ */
160 
161 static void run_generation_test(void *model, int num_tokens) {
162  const CKModelConfig *cfg = ck_model_get_config();
163 
164  printf("\n[TEST] Generation test (%d tokens)...\n", num_tokens);
165 
166  /* Start with token 1 (usually <s> or similar) */
167  int token = 1;
168 
169  printf("[GEN] Token IDs: ");
170  for (int i = 0; i < num_tokens; i++) {
171  ck_model_decode(model, &token, i);
172  float *logits = ck_model_get_logits(model);
173  token = sample_argmax(logits, cfg->vocab_size);
174  printf("%d ", token);
175  fflush(stdout);
176  }
177  printf("\n");
178 }
179 
180 /* ============================================================================
181  * MAIN
182  * ============================================================================ */
183 
184 static void print_usage(const char *prog) {
185  printf("Usage: %s [options]\n", prog);
186  printf("\n");
187  printf("Options:\n");
188  printf(" --weights <path> Path to weights.bump file\n");
189  printf(" --benchmark <n> Run benchmark with n tokens (default: 100)\n");
190  printf(" --generate <n> Run generation test with n tokens\n");
191  printf(" --info Print model info and exit\n");
192  printf(" --help Show this help\n");
193 }
194 
195 int main(int argc, char **argv) {
196  const char *weights_path = NULL;
197  int benchmark_tokens = 0;
198  int generate_tokens = 0;
199  int info_only = 0;
200 
201  /* Parse args */
202  for (int i = 1; i < argc; i++) {
203  if (strcmp(argv[i], "--weights") == 0 && i + 1 < argc) {
204  weights_path = argv[++i];
205  } else if (strcmp(argv[i], "--benchmark") == 0 && i + 1 < argc) {
206  benchmark_tokens = atoi(argv[++i]);
207  } else if (strcmp(argv[i], "--generate") == 0 && i + 1 < argc) {
208  generate_tokens = atoi(argv[++i]);
209  } else if (strcmp(argv[i], "--info") == 0) {
210  info_only = 1;
211  } else if (strcmp(argv[i], "--help") == 0) {
212  print_usage(argv[0]);
213  return 0;
214  }
215  }
216 
217  /* Print model info */
218  const CKModelConfig *cfg = ck_model_get_config();
219  printf("\n");
220  printf("============================================\n");
221  printf(" CK-Engine Generic Test Harness\n");
222  printf("============================================\n");
223  printf(" Model: %s\n", cfg->model_name);
224  printf(" Family: %s\n", cfg->model_family);
225  printf(" Layers: %d\n", cfg->num_layers);
226  printf(" Embed: %d\n", cfg->embed_dim);
227  printf(" Heads: %d / %d (Q/KV)\n", cfg->num_heads, cfg->num_kv_heads);
228  printf(" Intermediate:%d\n", cfg->intermediate_size);
229  printf(" Vocab: %d\n", cfg->vocab_size);
230  printf(" Max seq: %d\n", cfg->max_seq_len);
231  printf(" Total mem: %.2f GB\n", cfg->total_bytes / 1e9);
232  printf(" Weight mem: %.2f GB\n", cfg->weight_bytes / 1e9);
233  printf("============================================\n");
234 
235  if (info_only) {
236  return 0;
237  }
238 
239  if (!weights_path) {
240  fprintf(stderr, "[ERROR] --weights required\n");
241  print_usage(argv[0]);
242  return 1;
243  }
244 
245  /* Create model */
246  printf("\n[INIT] Creating model...\n");
247  void *model = ck_model_create();
248  if (!model) {
249  fprintf(stderr, "[ERROR] Failed to create model\n");
250  return 1;
251  }
252  printf("[INIT] Model created (%.2f GB allocated)\n", cfg->total_bytes / 1e9);
253 
254  /* Load weights */
255  printf("[INIT] Loading weights from %s...\n", weights_path);
256  if (load_weights_from_bump(model, weights_path) != 0) {
257  fprintf(stderr, "[ERROR] Failed to load weights\n");
258  ck_model_free(model);
259  return 1;
260  }
261 
262  /* Precompute RoPE */
263  printf("[INIT] Precomputing RoPE...\n");
265 
266  /* Run tests */
267  if (benchmark_tokens > 0) {
268  run_benchmark(model, benchmark_tokens);
269  }
270 
271  if (generate_tokens > 0) {
272  run_generation_test(model, generate_tokens);
273  }
274 
275  if (benchmark_tokens == 0 && generate_tokens == 0) {
276  /* Default: quick benchmark */
277  run_benchmark(model, 100);
278  }
279 
280  /* Cleanup */
281  printf("\n[CLEANUP] Freeing model...\n");
282  ck_model_free(model);
283  printf("[DONE]\n");
284 
285  return 0;
286 }
Generic Model API - Model-agnostic interface for CK-Engine.
const CKModelConfig * ck_model_get_config(void)
void * ck_model_create(void)
void ck_model_precompute_rope(void *model)
int ck_model_verify_canaries(void *model)
void ck_model_free(void *model)
void * ck_model_get_base(void *model)
void ck_model_decode(void *model, const int *token, int token_index)
size_t ck_model_get_total_bytes(void *model)
float * ck_model_get_logits(void *model)
const char * model_family
Definition: ck_model_api.h:45
int intermediate_size
Definition: ck_model_api.h:37
const char * model_name
Definition: ck_model_api.h:44
size_t total_bytes
Definition: ck_model_api.h:41
size_t weight_bytes
Definition: ck_model_api.h:42
const char * token
Definition: tokenizer.h:306
int vocab_size
Definition: true_bpe.h:185
uint32_t end
Definition: utf8.c:215
uint32_t start
Definition: utf8.c:214
int main(int argc, char **argv)
static int load_weights_from_bump(void *model, const char *bump_path)
static void run_benchmark(void *model, int num_tokens)
static void run_generation_test(void *model, int num_tokens)
static int sample_argmax(const float *logits, int vocab_size)
static double get_time_ms(void)
static void print_usage(const char *prog)