32 gettimeofday(&tv, NULL);
33 return tv.tv_sec * 1000.0 + tv.tv_usec / 1000.0;
41 int fd = open(bump_path, O_RDONLY);
43 fprintf(stderr,
"[ERROR] Cannot open: %s\n", bump_path);
48 if (fstat(fd, &st) < 0) {
53 size_t file_size = st.st_size;
58 void *mapped = mmap(NULL, file_size, PROT_READ, MAP_PRIVATE, fd, 0);
61 if (mapped == MAP_FAILED) {
62 fprintf(stderr,
"[ERROR] mmap failed\n");
67 size_t header_size = 64;
70 if (file_size < header_size + weight_bytes) {
71 fprintf(stderr,
"[ERROR] BUMP file too small: %zu < %zu\n",
72 file_size, header_size + weight_bytes);
73 munmap(mapped, file_size);
78 memcpy((
char*)base + header_size, (
char*)mapped + header_size, weight_bytes);
80 munmap(mapped, file_size);
81 printf(
"[INFO] Loaded %zu bytes from %s\n", weight_bytes, bump_path);
91 float best_val = logits[0];
93 if (logits[i] > best_val) {
109 printf(
"============================================\n");
111 printf(
"============================================\n");
113 printf(
" Embed dim: %d\n", cfg->
embed_dim);
116 printf(
" Tokens: %d\n", num_tokens);
117 printf(
"============================================\n\n");
120 printf(
"[WARMUP] Running 3 warmup iterations...\n");
122 for (
int i = 0; i < 3; i++) {
127 printf(
"[BENCH] Running %d decode iterations...\n", num_tokens);
130 for (
int i = 0; i < num_tokens; i++) {
136 double tokens_per_sec = num_tokens / (elapsed_ms / 1000.0);
137 double ms_per_token = elapsed_ms / num_tokens;
140 printf(
"============================================\n");
141 printf(
" RESULTS\n");
142 printf(
"============================================\n");
143 printf(
" Total time: %.2f ms\n", elapsed_ms);
144 printf(
" Tokens/sec: %.2f\n", tokens_per_sec);
145 printf(
" ms/token: %.2f\n", ms_per_token);
146 printf(
"============================================\n");
151 printf(
"[WARN] %d canary corruptions detected!\n", errors);
153 printf(
"[OK] Memory canaries intact\n");
164 printf(
"\n[TEST] Generation test (%d tokens)...\n", num_tokens);
169 printf(
"[GEN] Token IDs: ");
170 for (
int i = 0; i < num_tokens; i++) {
174 printf(
"%d ",
token);
185 printf(
"Usage: %s [options]\n", prog);
187 printf(
"Options:\n");
188 printf(
" --weights <path> Path to weights.bump file\n");
189 printf(
" --benchmark <n> Run benchmark with n tokens (default: 100)\n");
190 printf(
" --generate <n> Run generation test with n tokens\n");
191 printf(
" --info Print model info and exit\n");
192 printf(
" --help Show this help\n");
195 int main(
int argc,
char **argv) {
196 const char *weights_path = NULL;
197 int benchmark_tokens = 0;
198 int generate_tokens = 0;
202 for (
int i = 1; i < argc; i++) {
203 if (strcmp(argv[i],
"--weights") == 0 && i + 1 < argc) {
204 weights_path = argv[++i];
205 }
else if (strcmp(argv[i],
"--benchmark") == 0 && i + 1 < argc) {
206 benchmark_tokens = atoi(argv[++i]);
207 }
else if (strcmp(argv[i],
"--generate") == 0 && i + 1 < argc) {
208 generate_tokens = atoi(argv[++i]);
209 }
else if (strcmp(argv[i],
"--info") == 0) {
211 }
else if (strcmp(argv[i],
"--help") == 0) {
220 printf(
"============================================\n");
221 printf(
" CK-Engine Generic Test Harness\n");
222 printf(
"============================================\n");
231 printf(
" Total mem: %.2f GB\n", cfg->
total_bytes / 1e9);
232 printf(
" Weight mem: %.2f GB\n", cfg->
weight_bytes / 1e9);
233 printf(
"============================================\n");
240 fprintf(stderr,
"[ERROR] --weights required\n");
246 printf(
"\n[INIT] Creating model...\n");
249 fprintf(stderr,
"[ERROR] Failed to create model\n");
252 printf(
"[INIT] Model created (%.2f GB allocated)\n", cfg->
total_bytes / 1e9);
255 printf(
"[INIT] Loading weights from %s...\n", weights_path);
257 fprintf(stderr,
"[ERROR] Failed to load weights\n");
263 printf(
"[INIT] Precomputing RoPE...\n");
267 if (benchmark_tokens > 0) {
271 if (generate_tokens > 0) {
275 if (benchmark_tokens == 0 && generate_tokens == 0) {
281 printf(
"\n[CLEANUP] Freeing model...\n");
Generic Model API - Model-agnostic interface for CK-Engine.
const CKModelConfig * ck_model_get_config(void)
void * ck_model_create(void)
void ck_model_precompute_rope(void *model)
int ck_model_verify_canaries(void *model)
void ck_model_free(void *model)
void * ck_model_get_base(void *model)
void ck_model_decode(void *model, const int *token, int token_index)
size_t ck_model_get_total_bytes(void *model)
float * ck_model_get_logits(void *model)
const char * model_family
int main(int argc, char **argv)
static int load_weights_from_bump(void *model, const char *bump_path)
static void run_benchmark(void *model, int num_tokens)
static void run_generation_test(void *model, int num_tokens)
static int sample_argmax(const float *logits, int vocab_size)
static double get_time_ms(void)
static void print_usage(const char *prog)