39 #define CK_VERSION "6.0.0"
40 #define CK_BUILD_DATE __DATE__
43 #define ANSI_RESET "\033[0m"
44 #define ANSI_BOLD "\033[1m"
45 #define ANSI_DIM "\033[2m"
46 #define ANSI_GREEN "\033[0;32m"
47 #define ANSI_YELLOW "\033[0;33m"
48 #define ANSI_BLUE "\033[0;34m"
49 #define ANSI_CYAN "\033[0;36m"
53 const char *model_path;
70 printf(
" C-Kernel-Engine\n");
71 printf(
" ------------------------------\n");
83 printf(
" -m, --model <file> Model file (GGUF or BUMP)\n");
84 printf(
" -p, --prompt <text> Prompt (use @file.txt to read from file)\n");
85 printf(
" -t, --tokens <n> Max tokens to generate (default: 100)\n");
86 printf(
" -t, --temp <float> Temperature (default: 0.7)\n");
87 printf(
" --top-p <float> Top-p sampling (default: 0.9)\n");
88 printf(
" --top-k <n> Top-k sampling (default: 40)\n");
89 printf(
" --seed <n> Random seed (default: random)\n");
90 printf(
" --threads <n> Number of threads (default: auto)\n");
91 printf(
" --no-kv-cache Disable KV cache\n");
92 printf(
" --ignore-eos Ignore EOS token\n");
93 printf(
" --verbose Verbose output\n");
94 printf(
" -v, --version Show version\n");
95 printf(
" -h, --help Show this help\n");
98 printf(
" %s -m model.gguf -p \"Hello\"\n", prog);
99 printf(
" %s -m model.gguf -p @prompt.txt -t 50 --temp 0.8\n", prog);
100 printf(
" %s -m model.gguf --seed 42 --verbose\n", prog);
128 for (
int i = 1; i < argc; i++) {
129 if (strcmp(argv[i],
"-h") == 0 || strcmp(argv[i],
"--help") == 0) {
134 if (strcmp(argv[i],
"-v") == 0 || strcmp(argv[i],
"--version") == 0) {
138 if ((strcmp(argv[i],
"-m") == 0 || strcmp(argv[i],
"--model") == 0) && i + 1 < argc) {
139 args.model_path = argv[++i];
141 else if ((strcmp(argv[i],
"-p") == 0 || strcmp(argv[i],
"--prompt") == 0) && i + 1 < argc) {
142 args.prompt = argv[++i];
144 else if ((strcmp(argv[i],
"-t") == 0) && i + 1 < argc) {
145 args.max_tokens = atoi(argv[++i]);
147 else if (strcmp(argv[i],
"--temp") == 0 && i + 1 < argc) {
148 args.temperature = atof(argv[++i]);
150 else if (strcmp(argv[i],
"--top-p") == 0 && i + 1 < argc) {
151 args.top_p = atof(argv[++i]);
153 else if (strcmp(argv[i],
"--top-k") == 0 && i + 1 < argc) {
154 args.top_k = atoi(argv[++i]);
156 else if (strcmp(argv[i],
"--seed") == 0 && i + 1 < argc) {
157 args.seed = atoi(argv[++i]);
159 else if (strcmp(argv[i],
"--threads") == 0 && i + 1 < argc) {
160 args.threads = atoi(argv[++i]);
162 else if (strcmp(argv[i],
"--no-kv-cache") == 0) {
165 else if (strcmp(argv[i],
"--ignore-eos") == 0) {
168 else if (strcmp(argv[i],
"--verbose") == 0 || strcmp(argv[i],
"-v") == 0) {
172 fprintf(stderr,
"Unknown option: %s\n", argv[i]);
173 fprintf(stderr,
"Use --help for usage\n");
183 if (path[0] !=
'@')
return (
char *)path;
185 FILE *f = fopen(path + 1,
"r");
187 fprintf(stderr,
"Cannot open prompt file: %s\n", path + 1);
191 char *content = malloc(4096);
192 size_t len = fread(content, 1, 4095, f);
197 while (len > 0 && (content[len-1] ==
'\n' || content[len-1] ==
'\r')) {
198 content[--len] =
'\0';
206 static int32_t tokens[1024];
207 *num_tokens = strlen(
text);
208 for (
int i = 0; i < *num_tokens && i < 1024; i++) {
209 tokens[i] = (int32_t)
text[i];
216 float max_val = logits[0];
218 if (logits[i] > max_val) max_val = logits[i];
223 logits[i] = expf((logits[i] - max_val) / temp);
231 float best_val = logits[
start];
232 int best_idx =
start;
234 if (logits[i] > best_val) {
235 best_val = logits[i];
245 static time_t last_time = 0;
246 time_t now = time(NULL);
248 if (now - last_time >= 1) {
255 int main(
int argc,
char **argv) {
264 if (!args.model_path) {
271 if (access(args.model_path, F_OK) != 0) {
281 args.seed = (int)time(NULL);
289 printf(
" Tokens: %d\n", args.max_tokens);
290 printf(
" Temp: %.2f\n", args.temperature);
291 printf(
" Top-p: %.2f\n", args.top_p);
292 printf(
" Top-k: %d\n", args.top_k);
293 printf(
" Seed: %d\n", args.seed);
294 printf(
" Threads: %d\n", args.threads > 0 ? args.threads : 4);
303 printf(
"<Model loading would happen here>\n");
304 printf(
"<Inference would run here>\n");
310 printf(
"Full model loading and inference requires:\n");
311 printf(
" 1. Generated model code from IR\n");
312 printf(
" 2. All kernel implementations compiled\n");
313 printf(
" 3. Weight loading from GGUF/BUMP\n");
317 if (prompt != args.prompt) {
static int32_t * tokenize(const char *text, int *num_tokens)
static void print_progress(int token_id, float token_per_sec)
static int sample_token(float *logits, int vocab_size, float temp, int top_k)
static void print_version(void)
int main(int argc, char **argv)
static void print_help(const char *prog)
static CLIArgs parse_args(int argc, char **argv)
static void print_banner(void)
static char * read_prompt_file(const char *path)