← Back to C-Kernel-Engine Docs Doxygen Source Documentation
ck_cli_v6.6.c File Reference
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include <errno.h>
#include <signal.h>
#include <dlfcn.h>
#include <unistd.h>
#include <time.h>
#include <math.h>
#include <dirent.h>
#include <sys/stat.h>
#include "tokenizer/true_bpe.h"
#include "ck_features.h"

Go to the source code of this file.

Macros

#define _GNU_SOURCE
 
#define CK_CLI_DEFAULT_MAX_TOKENS   256
 
#define CK_CLI_EOS_MAX   8
 
#define CK_CLI_HISTORY_FILE   ".ck_cli_history"
 
#define CK_CLI_MAX_CONTEXT   32768
 
#define CK_CLI_OUTPUT_BUF_SIZE   4096
 
#define CK_CLI_VERSION   "6.6.0"
 
#define EOS_PATTERN_BUF_SIZE   64
 
#define EOS_PENDING_MAX   8
 
#define SAMPLE_NEXT_TOKEN()
 

Typedefs

typedef int(* decode_t) (int32_t token, float *logits_out)
 
typedef int(* embed_t) (const int32_t *tokens, int num_tokens)
 
typedef int(* forward_t) (float *logits_out)
 
typedef void(* free_t) (void)
 
typedef int(* get_int_t) (void)
 
typedef float *(* get_logits_t) (void)
 
typedef void *(* get_ptr_t) (void)
 
typedef int(* init_t) (const char *weights_path)
 
typedef int(* kv_enable_t) (int capacity)
 
typedef void(* kv_reset_t) (void)
 
typedef int(* sample_argmax_t) (void)
 

Enumerations

enum  ChatTemplateType {
  CHAT_TEMPLATE_NONE = 0 , CHAT_TEMPLATE_QWEN , CHAT_TEMPLATE_LLAMA , CHAT_TEMPLATE_CHATML ,
  CHAT_TEMPLATE_MISTRAL , CHAT_TEMPLATE_NONE = 0 , CHAT_TEMPLATE_QWEN , CHAT_TEMPLATE_LLAMA ,
  CHAT_TEMPLATE_CHATML , CHAT_TEMPLATE_MISTRAL
}
 

Functions

static char * apply_chat_template (const ChatTemplate *tmpl, const char *system, const char *user)
 
static int decode_bpe_token (const char *token, char *out, int max)
 
static ChatTemplateType detect_chat_template (const char *model_name)
 
static bool eos_is_potential_prefix (const char *token)
 
static void eos_pattern_init (ChatTemplateType tmpl)
 
static bool eos_pattern_process (const char *token_text, char *out_buf, size_t *out_len, void(*output_fn)(char *, size_t *, const char *), ChatTemplateType tmpl)
 
static void eos_pattern_reset (void)
 
static bool find_model_in_cache (const char *model_name, char *lib_out, char *weights_out, size_t out_size)
 
static const char * get_cache_dir (void)
 
static void handle_sigint (int sig)
 
static bool is_eos_token (const CLIOptions *opt, int token)
 
static void list_available_models (void)
 
static bool load_eos_from_vocab_json (const char *weights_path, CLIOptions *opt)
 
static bool load_model_api (const char *lib_path, ModelAPI *api)
 
int main (int argc, char **argv)
 
static void output_append (char *buf, size_t *len, const char *text)
 
static void output_flush (char *buf, size_t *len)
 
static void output_token (char *buf, size_t *len, const char *token)
 
static bool parse_args (int argc, char **argv, CLIOptions *opt)
 
static bool parse_eos_ids (const char *arg, CLIOptions *opt)
 
static void print_banner (void)
 
static void print_help (const char *prog)
 
static bool process_repl_command (const char *line, CLIOptions *opt, ModelAPI *api)
 
static bool resolve_symbol (void *handle, const char *name, void **out_ptr, bool required)
 
static int run_prompt (ModelAPI *api, CKTrueBPE *tokenizer, CLIOptions *opt, const char *input)
 
static int sample_top_p (float *logits, int vocab_size, float temperature, float top_p)
 

Variables

static int g_decode_count = 0
 
static double g_decode_time_ms = 0.0
 
static EOSPatternState g_eos_state = {0}
 
static volatile sig_atomic_t g_exit_requested = 0
 
static volatile sig_atomic_t g_generation_active = 0
 
static double g_prefill_time_ms = 0.0
 
static int g_prompt_tokens = 0
 
static const ChatTemplate g_templates []
 

Macro Definition Documentation

◆ _GNU_SOURCE

#define _GNU_SOURCE

Definition at line 17 of file ck_cli_v6.6.c.

◆ CK_CLI_DEFAULT_MAX_TOKENS

#define CK_CLI_DEFAULT_MAX_TOKENS   256

Definition at line 41 of file ck_cli_v6.6.c.

◆ CK_CLI_EOS_MAX

#define CK_CLI_EOS_MAX   8

Definition at line 42 of file ck_cli_v6.6.c.

◆ CK_CLI_HISTORY_FILE

#define CK_CLI_HISTORY_FILE   ".ck_cli_history"

Definition at line 45 of file ck_cli_v6.6.c.

◆ CK_CLI_MAX_CONTEXT

#define CK_CLI_MAX_CONTEXT   32768

Definition at line 44 of file ck_cli_v6.6.c.

◆ CK_CLI_OUTPUT_BUF_SIZE

#define CK_CLI_OUTPUT_BUF_SIZE   4096

Definition at line 43 of file ck_cli_v6.6.c.

◆ CK_CLI_VERSION

#define CK_CLI_VERSION   "6.6.0"

Definition at line 40 of file ck_cli_v6.6.c.

◆ EOS_PATTERN_BUF_SIZE

#define EOS_PATTERN_BUF_SIZE   64

Text-based EOS pattern detection with pending output buffering.

When special tokens like <|im_end|> are tokenized as regular text (e.g., !, im, _end, !), we need to detect the pattern in the output and avoid outputting the partial pattern tokens.

This is a workaround for tokenizers that don't properly encode special tokens.

Definition at line 637 of file ck_cli_v6.6.c.

◆ EOS_PENDING_MAX

#define EOS_PENDING_MAX   8

Definition at line 638 of file ck_cli_v6.6.c.

◆ SAMPLE_NEXT_TOKEN

#define SAMPLE_NEXT_TOKEN ( )
Value:
do { \
if (api->get_logits && vocab_size > 0) { \
float *logits = api->get_logits(); \
if (logits) { \
int stride = api->get_logits_stride ? api->get_logits_stride() : vocab_size; \
int active = api->get_active_tokens ? api->get_active_tokens() : 1; \
float *last_logits = logits; \
if (stride > 0) { \
if (active < 1) active = 1; \
last_logits = logits + (size_t)(active - 1) * (size_t)stride; \
} \
float *logits_copy = (float *)malloc(vocab_size * sizeof(float)); \
memcpy(logits_copy, last_logits, vocab_size * sizeof(float)); \
next_token = sample_top_p(logits_copy, vocab_size, opt->temperature, opt->top_p); \
free(logits_copy); \
} else if (api->sample) { \
next_token = api->sample(); \
} else { \
next_token = -1; \
} \
} else if (api->sample) { \
next_token = api->sample(); \
} else { \
next_token = -1; \
} \
} while(0)
static int sample_top_p(float *logits, int vocab_size, float temperature, float top_p)
Definition: ck_cli_v6.6.c:335
int vocab_size
Definition: true_bpe.h:185

Typedef Documentation

◆ decode_t

typedef int(* decode_t) (int32_t token, float *logits_out)

Definition at line 74 of file ck_cli_v6.6.c.

◆ embed_t

typedef int(* embed_t) (const int32_t *tokens, int num_tokens)

Definition at line 70 of file ck_cli_v6.6.c.

◆ forward_t

typedef int(* forward_t) (float *logits_out)

Definition at line 71 of file ck_cli_v6.6.c.

◆ free_t

typedef void(* free_t) (void)

Definition at line 79 of file ck_cli_v6.6.c.

◆ get_int_t

typedef int(* get_int_t) (void)

Definition at line 77 of file ck_cli_v6.6.c.

◆ get_logits_t

typedef float*(* get_logits_t) (void)

Definition at line 76 of file ck_cli_v6.6.c.

◆ get_ptr_t

typedef void*(* get_ptr_t) (void)

Definition at line 78 of file ck_cli_v6.6.c.

◆ init_t

typedef int(* init_t) (const char *weights_path)

Definition at line 69 of file ck_cli_v6.6.c.

◆ kv_enable_t

typedef int(* kv_enable_t) (int capacity)

Definition at line 72 of file ck_cli_v6.6.c.

◆ kv_reset_t

typedef void(* kv_reset_t) (void)

Definition at line 73 of file ck_cli_v6.6.c.

◆ sample_argmax_t

typedef int(* sample_argmax_t) (void)

Definition at line 75 of file ck_cli_v6.6.c.

Enumeration Type Documentation

◆ ChatTemplateType

Enumerator
CHAT_TEMPLATE_NONE 
CHAT_TEMPLATE_QWEN 
CHAT_TEMPLATE_LLAMA 
CHAT_TEMPLATE_CHATML 
CHAT_TEMPLATE_MISTRAL 
CHAT_TEMPLATE_NONE 
CHAT_TEMPLATE_QWEN 
CHAT_TEMPLATE_LLAMA 
CHAT_TEMPLATE_CHATML 
CHAT_TEMPLATE_MISTRAL 

Definition at line 107 of file ck_cli_v6.6.c.

107  {
108  CHAT_TEMPLATE_NONE = 0,
ChatTemplateType
Definition: ck_cli_v6.6.c:107
@ CHAT_TEMPLATE_LLAMA
Definition: ck_cli_v6.6.c:110
@ CHAT_TEMPLATE_MISTRAL
Definition: ck_cli_v6.6.c:112
@ CHAT_TEMPLATE_QWEN
Definition: ck_cli_v6.6.c:109
@ CHAT_TEMPLATE_CHATML
Definition: ck_cli_v6.6.c:111
@ CHAT_TEMPLATE_NONE
Definition: ck_cli_v6.6.c:108

Function Documentation

◆ apply_chat_template()

static char* apply_chat_template ( const ChatTemplate *  tmpl,
const char *  system,
const char *  user 
)
static

Definition at line 590 of file ck_cli_v6.6.c.

590  {
591  size_t needed = 0;
592  if (system && *system) {
593  needed += strlen(tmpl->system_prefix) + strlen(system) + strlen(tmpl->system_suffix);
594  }
595  needed += strlen(tmpl->user_prefix) + strlen(user) + strlen(tmpl->user_suffix);
596  needed += strlen(tmpl->assistant_prefix);
597  needed += 1; /* null terminator */
598 
599  char *result = (char *)malloc(needed);
600  if (!result) return NULL;
601 
602  result[0] = '\0';
603  if (system && *system) {
604  strcat(result, tmpl->system_prefix);
605  strcat(result, system);
606  strcat(result, tmpl->system_suffix);
607  }
608  strcat(result, tmpl->user_prefix);
609  strcat(result, user);
610  strcat(result, tmpl->user_suffix);
611  strcat(result, tmpl->assistant_prefix);
612 
613  return result;
614 }

Referenced by run_prompt().

◆ decode_bpe_token()

static int decode_bpe_token ( const char *  token,
char *  out,
int  max 
)
static

Decode GPT-2 byte-level BPE representation back to actual bytes.

GPT-2's tokenizer maps certain bytes to Unicode code points:

  • Bytes 0x00-0x20 → U+0100-U+0120 (Ā Ć ċ ... Ġ)
  • Bytes 0x7F-0xA0 → U+017F-U+01A0
  • Printable ASCII (0x21-0x7E) stays as-is

This function reverses that mapping.

Parameters
tokenInput BPE token string (UTF-8)
outOutput buffer for decoded bytes
maxSize of output buffer
Returns
Number of bytes written (not including NUL)

Definition at line 428 of file ck_cli_v6.6.c.

428  {
429  if (!token || max <= 0) return 0;
430 
431  const unsigned char *src = (const unsigned char *)token;
432  int out_len = 0;
433 
434  while (*src && out_len < max - 1) {
435  unsigned int codepoint;
436  int bytes;
437 
438  /* Decode UTF-8 to codepoint */
439  if ((src[0] & 0x80) == 0) {
440  /* Single byte ASCII */
441  codepoint = src[0];
442  bytes = 1;
443  } else if ((src[0] & 0xE0) == 0xC0 && (src[1] & 0xC0) == 0x80) {
444  /* Two byte sequence */
445  codepoint = ((src[0] & 0x1F) << 6) | (src[1] & 0x3F);
446  bytes = 2;
447  } else if ((src[0] & 0xF0) == 0xE0 && (src[1] & 0xC0) == 0x80 && (src[2] & 0xC0) == 0x80) {
448  /* Three byte sequence */
449  codepoint = ((src[0] & 0x0F) << 12) | ((src[1] & 0x3F) << 6) | (src[2] & 0x3F);
450  bytes = 3;
451  } else if ((src[0] & 0xF8) == 0xF0 && (src[1] & 0xC0) == 0x80 &&
452  (src[2] & 0xC0) == 0x80 && (src[3] & 0xC0) == 0x80) {
453  /* Four byte sequence */
454  codepoint = ((src[0] & 0x07) << 18) | ((src[1] & 0x3F) << 12) |
455  ((src[2] & 0x3F) << 6) | (src[3] & 0x3F);
456  bytes = 4;
457  } else {
458  /* Invalid UTF-8, copy byte as-is */
459  out[out_len++] = (char)*src;
460  src++;
461  continue;
462  }
463 
464  /* Check if this is a GPT-2 byte-encoded character */
465  if (codepoint >= 0x100 && codepoint <= 0x120) {
466  /* Bytes 0x00-0x20: U+0100-U+0120 → byte = codepoint - 0x100 */
467  out[out_len++] = (char)(codepoint - 0x100);
468  } else if (codepoint >= 0x17F && codepoint <= 0x1A0) {
469  /* Bytes 0x7F-0xA0: U+017F-U+01A0 → byte = codepoint - 0x100 */
470  out[out_len++] = (char)(codepoint - 0x100);
471  } else if (codepoint < 0x80) {
472  /* Regular ASCII - copy as-is */
473  out[out_len++] = (char)codepoint;
474  } else if (codepoint == 0x2581) {
475  /* SentencePiece space marker ▁ (U+2581) → space */
476  out[out_len++] = ' ';
477  } else {
478  /* Other UTF-8 characters - copy original bytes */
479  for (int i = 0; i < bytes && out_len < max - 1; i++) {
480  out[out_len++] = (char)src[i];
481  }
482  }
483 
484  src += bytes;
485  }
486 
487  out[out_len] = '\0';
488  return out_len;
489 }
const char * token
Definition: tokenizer.h:306
const int32_t int int * out_len
Definition: tokenizer.h:445

References out_len, and token.

Referenced by output_token().

◆ detect_chat_template()

static ChatTemplateType detect_chat_template ( const char *  model_name)
static

Definition at line 575 of file ck_cli_v6.6.c.

575  {
576  if (!model_name) return CHAT_TEMPLATE_CHATML;
577 
578  /* Lowercase comparison */
579  char lower[256];
580  strncpy(lower, model_name, sizeof(lower) - 1);
581  for (char *p = lower; *p; p++) *p = (*p >= 'A' && *p <= 'Z') ? *p + 32 : *p;
582 
583  if (strstr(lower, "qwen")) return CHAT_TEMPLATE_QWEN;
584  if (strstr(lower, "llama")) return CHAT_TEMPLATE_LLAMA;
585  if (strstr(lower, "mistral")) return CHAT_TEMPLATE_MISTRAL;
586 
587  return CHAT_TEMPLATE_CHATML; /* Default */
588 }

References CHAT_TEMPLATE_CHATML, CHAT_TEMPLATE_LLAMA, CHAT_TEMPLATE_MISTRAL, and CHAT_TEMPLATE_QWEN.

Referenced by parse_args().

◆ eos_is_potential_prefix()

static bool eos_is_potential_prefix ( const char *  token)
static

Check if token might be start of EOS pattern.

Definition at line 684 of file ck_cli_v6.6.c.

684  {
685  if (!token || !g_eos_state.partial_prefix) return false;
686 
687  /* Check if current accumulated buffer + token could start the pattern */
688  size_t tlen = strlen(token);
689  size_t plen = g_eos_state.pattern_len;
690  size_t target_len = g_eos_state.target_pattern ? strlen(g_eos_state.target_pattern) : 0;
691 
692  /* If buffer + token contains partial match of target, it's a potential prefix */
693  if (target_len == 0) return false;
694 
695  /* Build temp buffer */
696  char temp[EOS_PATTERN_BUF_SIZE];
697  if (plen + tlen >= EOS_PATTERN_BUF_SIZE) return false;
698  memcpy(temp, g_eos_state.pattern_buf, plen);
699  memcpy(temp + plen, token, tlen);
700  temp[plen + tlen] = '\0';
701 
702  /* Check if temp is a prefix of target or contains start of target */
703  const char *target = g_eos_state.target_pattern;
704  size_t temp_len = plen + tlen;
705 
706  /* Look for any suffix of temp that is a prefix of target */
707  for (size_t i = 0; i < temp_len; i++) {
708  size_t remaining = temp_len - i;
709  if (remaining > target_len) remaining = target_len;
710  if (strncmp(temp + i, target, remaining) == 0) {
711  return true;
712  }
713  }
714 
715  return false;
716 }
#define EOS_PATTERN_BUF_SIZE
Definition: ck_cli_v6.6.c:637
static EOSPatternState g_eos_state
Definition: ck_cli_v6.6.c:649

References EOS_PATTERN_BUF_SIZE, g_eos_state, and token.

Referenced by eos_pattern_process().

◆ eos_pattern_init()

static void eos_pattern_init ( ChatTemplateType  tmpl)
static

Definition at line 663 of file ck_cli_v6.6.c.

663  {
665  switch (tmpl) {
666  case CHAT_TEMPLATE_QWEN:
668  g_eos_state.target_pattern = "im_end";
669  g_eos_state.partial_prefix = "im";
670  break;
671  case CHAT_TEMPLATE_LLAMA:
673  g_eos_state.target_pattern = "</s>";
674  g_eos_state.partial_prefix = "</";
675  break;
676  default:
677  break;
678  }
679 }
static void eos_pattern_reset(void)
Definition: ck_cli_v6.6.c:651

References CHAT_TEMPLATE_CHATML, CHAT_TEMPLATE_LLAMA, CHAT_TEMPLATE_MISTRAL, CHAT_TEMPLATE_QWEN, eos_pattern_reset(), and g_eos_state.

Referenced by run_prompt().

◆ eos_pattern_process()

static bool eos_pattern_process ( const char *  token_text,
char *  out_buf,
size_t *  out_len,
void(*)(char *, size_t *, const char *)  output_fn,
ChatTemplateType  tmpl 
)
static

Process a token for EOS pattern detection.

Parameters
token_textThe token text to process
out_bufOutput buffer for safe-to-output text
out_lenCurrent length of output buffer
tmplChat template type
Returns
true if EOS pattern detected, false otherwise

Definition at line 727 of file ck_cli_v6.6.c.

729  {
730  if (!token_text || !g_eos_state.target_pattern) {
731  /* No pattern to match - output directly */
732  if (token_text && output_fn) output_fn(out_buf, out_len, token_text);
733  return false;
734  }
735 
736  /* Append to pattern buffer */
737  size_t tlen = strlen(token_text);
738  if (g_eos_state.pattern_len + (int)tlen < EOS_PATTERN_BUF_SIZE - 1) {
739  memcpy(g_eos_state.pattern_buf + g_eos_state.pattern_len, token_text, tlen);
740  g_eos_state.pattern_len += (int)tlen;
741  g_eos_state.pattern_buf[g_eos_state.pattern_len] = '\0';
742  }
743 
744  /* Check if pattern is complete */
745  if (strstr(g_eos_state.pattern_buf, g_eos_state.target_pattern)) {
746  /* EOS detected - don't output pending tokens */
748  return true;
749  }
750 
751  /* Check if this could still be part of the pattern */
752  if (eos_is_potential_prefix(token_text)) {
753  /* Hold this token - might be part of EOS */
754  if (g_eos_state.pending_count < EOS_PENDING_MAX) {
755  g_eos_state.pending[g_eos_state.pending_count] = strdup(token_text);
756  g_eos_state.pending_count++;
757  }
758  return false;
759  }
760 
761  /* Not part of pattern - flush pending tokens and this one */
762  for (int i = 0; i < g_eos_state.pending_count; i++) {
763  if (output_fn) output_fn(out_buf, out_len, g_eos_state.pending[i]);
764  free(g_eos_state.pending[i]);
765  g_eos_state.pending[i] = NULL;
766  }
767  g_eos_state.pending_count = 0;
768  g_eos_state.pattern_len = 0;
769  g_eos_state.pattern_buf[0] = '\0';
770 
771  if (output_fn) output_fn(out_buf, out_len, token_text);
772  return false;
773 }
static bool eos_is_potential_prefix(const char *token)
Definition: ck_cli_v6.6.c:684
#define EOS_PENDING_MAX
Definition: ck_cli_v6.6.c:638

References eos_is_potential_prefix(), EOS_PATTERN_BUF_SIZE, eos_pattern_reset(), EOS_PENDING_MAX, g_eos_state, and out_len.

Referenced by run_prompt().

◆ eos_pattern_reset()

static void eos_pattern_reset ( void  )
static

Definition at line 651 of file ck_cli_v6.6.c.

651  {
652  g_eos_state.pattern_len = 0;
653  g_eos_state.pattern_buf[0] = '\0';
654  for (int i = 0; i < g_eos_state.pending_count; i++) {
655  free(g_eos_state.pending[i]);
656  g_eos_state.pending[i] = NULL;
657  }
658  g_eos_state.pending_count = 0;
659  g_eos_state.target_pattern = NULL;
660  g_eos_state.partial_prefix = NULL;
661 }

References g_eos_state.

Referenced by eos_pattern_init(), and eos_pattern_process().

◆ find_model_in_cache()

static bool find_model_in_cache ( const char *  model_name,
char *  lib_out,
char *  weights_out,
size_t  out_size 
)
static

Definition at line 206 of file ck_cli_v6.6.c.

206  {
207  const char *cache_dir = get_cache_dir();
208  DIR *dir = opendir(cache_dir);
209  if (!dir) return false;
210 
211  struct dirent *entry;
212  while ((entry = readdir(dir)) != NULL) {
213  if (entry->d_name[0] == '.') continue;
214 
215  /* Check if directory name contains model_name */
216  if (strstr(entry->d_name, model_name) != NULL) {
217  char model_dir[4096];
218  snprintf(model_dir, sizeof(model_dir), "%s/%s", cache_dir, entry->d_name);
219 
220  /* Check for required files */
221  char so_path[4096], bump_path[4096];
222  snprintf(so_path, sizeof(so_path), "%s/ck-kernel-inference.so", model_dir);
223  snprintf(bump_path, sizeof(bump_path), "%s/weights.bump", model_dir);
224 
225  struct stat st;
226  if (stat(so_path, &st) == 0 && stat(bump_path, &st) == 0) {
227  strncpy(lib_out, so_path, out_size - 1);
228  strncpy(weights_out, bump_path, out_size - 1);
229  closedir(dir);
230  return true;
231  }
232  }
233  }
234  closedir(dir);
235  return false;
236 }
static const char * get_cache_dir(void)
Definition: ck_cli_v6.6.c:198

References get_cache_dir().

Referenced by parse_args().

◆ get_cache_dir()

static const char* get_cache_dir ( void  )
static

Definition at line 198 of file ck_cli_v6.6.c.

198  {
199  static char cache_path[4096];
200  const char *home = getenv("HOME");
201  if (!home) home = "/tmp";
202  snprintf(cache_path, sizeof(cache_path), "%s/.cache/ck-engine-v6.6/models", home);
203  return cache_path;
204 }

Referenced by find_model_in_cache(), and list_available_models().

◆ handle_sigint()

static void handle_sigint ( int  sig)
static

Definition at line 56 of file ck_cli_v6.6.c.

56  {
57  (void)sig;
58  if (g_generation_active) {
59  g_generation_active = 0; /* Stop generation but don't exit */
60  } else {
61  g_exit_requested = 1;
62  }
63 }
static volatile sig_atomic_t g_generation_active
Definition: ck_cli_v6.6.c:48
static volatile sig_atomic_t g_exit_requested
Definition: ck_cli_v6.6.c:47

References g_exit_requested, and g_generation_active.

Referenced by main().

◆ is_eos_token()

static bool is_eos_token ( const CLIOptions *  opt,
int  token 
)
static

Definition at line 620 of file ck_cli_v6.6.c.

620  {
621  if (!opt || opt->ignore_eos) return false;
622  for (int i = 0; i < opt->eos_count; i++) {
623  if (opt->eos_ids[i] == token) return true;
624  }
625  return false;
626 }

References token.

Referenced by run_prompt().

◆ list_available_models()

static void list_available_models ( void  )
static

Definition at line 298 of file ck_cli_v6.6.c.

298  {
299  const char *cache_dir = get_cache_dir();
300  DIR *dir = opendir(cache_dir);
301  if (!dir) {
302  fprintf(stderr, "No models found in %s\n", cache_dir);
303  return;
304  }
305 
306  printf("Available models in %s:\n", cache_dir);
307  struct dirent *entry;
308  int count = 0;
309  while ((entry = readdir(dir)) != NULL) {
310  if (entry->d_name[0] == '.') continue;
311 
312  char model_dir[4096];
313  snprintf(model_dir, sizeof(model_dir), "%s/%s", cache_dir, entry->d_name);
314 
315  char so_path[4096];
316  snprintf(so_path, sizeof(so_path), "%s/ck-kernel-inference.so", model_dir);
317 
318  struct stat st;
319  if (stat(so_path, &st) == 0) {
320  printf(" - %s\n", entry->d_name);
321  count++;
322  }
323  }
324  closedir(dir);
325 
326  if (count == 0) {
327  printf(" (none found)\n");
328  }
329 }

References get_cache_dir().

Referenced by parse_args().

◆ load_eos_from_vocab_json()

static bool load_eos_from_vocab_json ( const char *  weights_path,
CLIOptions *  opt 
)
static

Definition at line 242 of file ck_cli_v6.6.c.

242  {
243  if (!weights_path || !opt) return false;
244 
245  /* Construct vocab.json path from weights path */
246  char vocab_path[4096];
247  const char *slash = strrchr(weights_path, '/');
248  if (!slash) return false;
249 
250  size_t dir_len = (size_t)(slash - weights_path);
251  if (dir_len + 12 >= sizeof(vocab_path)) return false;
252 
253  memcpy(vocab_path, weights_path, dir_len);
254  vocab_path[dir_len] = '\0';
255  strcat(vocab_path, "/vocab.json");
256 
257  FILE *f = fopen(vocab_path, "r");
258  if (!f) return false;
259 
260  /* Simple JSON parsing for special_tokens */
261  char buf[8192];
262  size_t n = fread(buf, 1, sizeof(buf) - 1, f);
263  fclose(f);
264  buf[n] = '\0';
265 
266  /* Look for "special_tokens" section */
267  const char *st = strstr(buf, "\"special_tokens\"");
268  if (!st) return false;
269 
270  /* Extract eos token */
271  const char *eos = strstr(st, "\"eos\"");
272  if (eos) {
273  const char *colon = strchr(eos, ':');
274  if (colon) {
275  int eos_id = atoi(colon + 1);
276  if (eos_id > 0) {
277  opt->eos_ids[0] = eos_id;
278  opt->eos_count = 1;
279  }
280  }
281  }
282 
283  /* Extract bos token (often used as im_end for chat) */
284  const char *bos = strstr(st, "\"bos\"");
285  if (bos) {
286  const char *colon = strchr(bos, ':');
287  if (colon) {
288  int bos_id = atoi(colon + 1);
289  if (bos_id > 0 && bos_id != opt->eos_ids[0]) {
290  opt->eos_ids[opt->eos_count++] = bos_id;
291  }
292  }
293  }
294 
295  return opt->eos_count > 0;
296 }
int32_t int32_t int32_t eos
Definition: tokenizer.h:231
int32_t int32_t bos
Definition: tokenizer.h:230

References bos, and eos.

Referenced by parse_args().

◆ load_model_api()

static bool load_model_api ( const char *  lib_path,
ModelAPI *  api 
)
static

Definition at line 536 of file ck_cli_v6.6.c.

536  {
537  if (!lib_path || !api) return false;
538  memset(api, 0, sizeof(*api));
539  api->handle = dlopen(lib_path, RTLD_NOW);
540  if (!api->handle) {
541  fprintf(stderr, "Error: dlopen failed: %s\n", dlerror());
542  return false;
543  }
544 
545  if (!resolve_symbol(api->handle, "ck_model_init", (void **)&api->init, true)) return false;
546  if (!resolve_symbol(api->handle, "ck_model_embed_tokens", (void **)&api->embed, true)) return false;
547  if (!resolve_symbol(api->handle, "ck_model_forward", (void **)&api->forward, true)) return false;
548  if (!resolve_symbol(api->handle, "ck_model_decode", (void **)&api->decode, true)) return false;
549  resolve_symbol(api->handle, "ck_model_sample_argmax", (void **)&api->sample, false); /* Optional - we can sample from logits */
550  resolve_symbol(api->handle, "ck_model_get_logits", (void **)&api->get_logits, false);
551  resolve_symbol(api->handle, "ck_model_get_logits_stride", (void **)&api->get_logits_stride, false);
552  resolve_symbol(api->handle, "ck_model_kv_cache_enable", (void **)&api->kv_enable, false);
553  resolve_symbol(api->handle, "ck_model_kv_cache_reset", (void **)&api->kv_reset, false);
554  resolve_symbol(api->handle, "ck_model_get_context_window", (void **)&api->get_context, false);
555  resolve_symbol(api->handle, "ck_model_get_vocab_size", (void **)&api->get_vocab_size, false);
556  resolve_symbol(api->handle, "ck_model_get_num_merges", (void **)&api->get_num_merges, false);
557  resolve_symbol(api->handle, "ck_model_get_vocab_strings_size", (void **)&api->get_vocab_bytes, false);
558  resolve_symbol(api->handle, "ck_model_get_active_tokens", (void **)&api->get_active_tokens, false);
559  resolve_symbol(api->handle, "ck_model_get_vocab_offsets", (void **)&api->get_offsets, false);
560  resolve_symbol(api->handle, "ck_model_get_vocab_strings", (void **)&api->get_strings, false);
561  resolve_symbol(api->handle, "ck_model_get_vocab_merges", (void **)&api->get_merges, false);
562  resolve_symbol(api->handle, "ck_model_free", (void **)&api->free_fn, false);
563 
564  if (!api->get_vocab_size || !api->get_offsets || !api->get_strings) {
565  fprintf(stderr, "Error: vocab accessors missing from model\n");
566  return false;
567  }
568  return true;
569 }
static bool resolve_symbol(void *handle, const char *name, void **out_ptr, bool required)
Definition: ck_cli_v6.6.c:526

References resolve_symbol().

Referenced by main().

◆ main()

int main ( int  argc,
char **  argv 
)

Definition at line 1197 of file ck_cli_v6.6.c.

1197  {
1198  signal(SIGINT, handle_sigint);
1199  srand((unsigned int)time(NULL));
1200 
1201  CLIOptions opt;
1202  if (!parse_args(argc, argv, &opt)) {
1203  return 1;
1204  }
1205 
1206  print_banner();
1207  printf("Loading: %s\n", opt.lib_path);
1208 
1209  ModelAPI api;
1210  if (!load_model_api(opt.lib_path, &api)) {
1211  return 1;
1212  }
1213 
1214  printf("Initializing model...\n");
1215  if (api.init(opt.weights_path) != 0) {
1216  fprintf(stderr, "Error: ck_model_init failed\n");
1217  return 1;
1218  }
1219 
1220  int ctx = opt.context_override;
1221  if (ctx <= 0 && api.get_context) ctx = api.get_context();
1222  if (api.kv_enable && ctx > 0) {
1223  api.kv_enable(ctx);
1224  }
1225 
1226  CKTrueBPE *tokenizer = ck_true_bpe_create();
1227  if (!tokenizer) {
1228  fprintf(stderr, "[Tokenizer] failed to create\n");
1229  return 1;
1230  }
1231 
1232  int vocab_size = api.get_vocab_size ? api.get_vocab_size() : 0;
1233  int vocab_bytes = api.get_vocab_bytes ? api.get_vocab_bytes() : 0;
1234  int num_merges = api.get_num_merges ? api.get_num_merges() : 0;
1235  const int32_t *offsets = (const int32_t *)api.get_offsets();
1236  const char *strings = (const char *)api.get_strings();
1237  const int32_t *merges = api.get_merges ? (const int32_t *)api.get_merges() : NULL;
1238 
1239  if (vocab_size <= 0 || vocab_bytes <= 0 || !offsets || !strings) {
1240  fprintf(stderr, "[Tokenizer] missing vocab data in model\n");
1241  ck_true_bpe_free(tokenizer);
1242  return 1;
1243  }
1244 
1246  fprintf(stderr, "[Tokenizer] failed to load vocab\n");
1247  ck_true_bpe_free(tokenizer);
1248  return 1;
1249  }
1250 
1251  /* Register special tokens for pre-BPE matching.
1252  * This is done in the CLI (orchestrator), NOT the generated model code.
1253  * The generated model code stays "dumb" - just inference.
1254  * Model-specific token handling is the CLI's responsibility.
1255  */
1256  {
1257  /* Common special tokens across model families */
1258  static const char *special_tokens[] = {
1259  /* Qwen/ChatML */
1260  "<|im_start|>", "<|im_end|>", "<|endoftext|>",
1261  /* Llama 3 */
1262  "<|eot_id|>", "<|begin_of_text|>", "<|end_of_text|>",
1263  "<|start_header_id|>", "<|end_header_id|>",
1264  /* Generic */
1265  "</s>", "<s>", "<pad>", "<unk>",
1266  NULL
1267  };
1268  int registered = 0;
1269  for (int i = 0; special_tokens[i] != NULL; i++) {
1270  int32_t id = ck_true_bpe_lookup(tokenizer, special_tokens[i]);
1271  /* Verify it's actually this token (not unk) via round-trip */
1272  const char *check = ck_true_bpe_id_to_token(tokenizer, id);
1273  if (check && strcmp(check, special_tokens[i]) == 0) {
1274  ck_true_bpe_add_special_token(tokenizer, special_tokens[i], id);
1275  registered++;
1276  if (opt.verbose) {
1277  printf("[Tokenizer] Registered special: %s -> %d\n", special_tokens[i], id);
1278  }
1279  }
1280  }
1281  if (opt.verbose) {
1282  printf("[Tokenizer] Registered %d special tokens for pre-BPE matching\n", registered);
1283  }
1284  }
1285 
1286  printf("Ready! Vocab: %d, Context: %d, Template: %s\n",
1287  vocab_size, ctx,
1288  opt.no_chat_template ? "none" :
1289  opt.chat_template == CHAT_TEMPLATE_QWEN ? "qwen" :
1290  opt.chat_template == CHAT_TEMPLATE_LLAMA ? "llama" :
1291  opt.chat_template == CHAT_TEMPLATE_MISTRAL ? "mistral" : "chatml");
1292 
1293  /* Print CPU capability info */
1295  printf("[Hardware] %s | Vector: %d-bit | FMA: %s | AI Accel: %s | Kernel: %s\n",
1296  cap.name, cap.width, cap.has_fma ? "Yes" : "No",
1297  cap.has_ai_accel ? "Yes" : "No", cap.best_kernel);
1298 
1299  printf("Type /help for commands, Ctrl+C to stop generation\n\n");
1300 
1301  setvbuf(stdout, NULL, _IOFBF, 1 << 20);
1302 
1303  if (opt.prompt_once) {
1304  run_prompt(&api, tokenizer, &opt, opt.prompt_once);
1305  } else {
1306  /* REPL */
1307 #ifdef HAVE_READLINE
1308  char *home = getenv("HOME");
1309  char history_path[4096];
1310  if (home) {
1311  snprintf(history_path, sizeof(history_path), "%s/%s", home, CK_CLI_HISTORY_FILE);
1312  read_history(history_path);
1313  }
1314 #endif
1315 
1316  while (!g_exit_requested) {
1317 #ifdef HAVE_READLINE
1318  char *line = readline("\033[1;32mYou:\033[0m ");
1319  if (!line) break;
1320  if (*line) add_history(line);
1321 #else
1322  printf("\033[1;32mYou:\033[0m ");
1323  fflush(stdout);
1324  char line_buf[4096];
1325  if (!fgets(line_buf, sizeof(line_buf), stdin)) {
1326  if (feof(stdin) || g_exit_requested) break;
1327  if (errno == EINTR) break;
1328  continue;
1329  }
1330  /* Remove trailing newline */
1331  size_t len = strlen(line_buf);
1332  if (len > 0 && line_buf[len-1] == '\n') line_buf[len-1] = '\0';
1333  char *line = line_buf;
1334 #endif
1335 
1336  if (line[0] == '\0') {
1337 #ifdef HAVE_READLINE
1338  free(line);
1339 #endif
1340  continue;
1341  }
1342 
1343  if (line[0] == '/') {
1344  process_repl_command(line, &opt, &api);
1345 #ifdef HAVE_READLINE
1346  free(line);
1347 #endif
1348  continue;
1349  }
1350 
1351  printf("\033[1;34mAssistant:\033[0m ");
1352  fflush(stdout);
1353  run_prompt(&api, tokenizer, &opt, line);
1354 
1355 #ifdef HAVE_READLINE
1356  free(line);
1357 #endif
1358  }
1359 
1360 #ifdef HAVE_READLINE
1361  if (home) {
1362  write_history(history_path);
1363  }
1364 #endif
1365  }
1366 
1367  ck_true_bpe_free(tokenizer);
1368  if (api.free_fn) api.free_fn();
1369  if (api.handle) dlclose(api.handle);
1370 
1371  printf("\nGoodbye!\n");
1372  return 0;
1373 }
static void handle_sigint(int sig)
Definition: ck_cli_v6.6.c:56
#define CK_CLI_HISTORY_FILE
Definition: ck_cli_v6.6.c:45
static bool process_repl_command(const char *line, CLIOptions *opt, ModelAPI *api)
Definition: ck_cli_v6.6.c:1132
static void print_banner(void)
Definition: ck_cli_v6.6.c:982
static bool parse_args(int argc, char **argv, CLIOptions *opt)
Definition: ck_cli_v6.6.c:1022
static int run_prompt(ModelAPI *api, CKTrueBPE *tokenizer, CLIOptions *opt, const char *input)
Definition: ck_cli_v6.6.c:794
static bool load_model_api(const char *lib_path, ModelAPI *api)
Definition: ck_cli_v6.6.c:536
static ck_capability_t ck_get_capabilities(void)
Get current platform capabilities.
Definition: ck_features.h:226
CPU capability information structure.
Definition: ck_features.h:215
const char * best_kernel
Definition: ck_features.h:220
const char * name
Definition: ck_features.h:216
void ck_true_bpe_free(CKTrueBPE *bpe)
Definition: true_bpe.c:405
CKTrueBPE * ck_true_bpe_create(void)
Definition: true_bpe.c:342
int ck_true_bpe_add_special_token(CKTrueBPE *bpe, const char *token, int32_t id)
Definition: true_bpe.c:565
int ck_true_bpe_load_binary(CKTrueBPE *bpe, int vocab_size, const int32_t *offsets, const char *strings, int num_merges, const int32_t *merges)
Definition: true_bpe.c:606
int32_t ck_true_bpe_lookup(const CKTrueBPE *bpe, const char *token)
Definition: true_bpe.c:638
const char * ck_true_bpe_id_to_token(const CKTrueBPE *bpe, int32_t id)
Definition: true_bpe.c:645
int const int32_t const char int num_merges
Definition: true_bpe.h:188
int const int32_t const char * strings
Definition: true_bpe.h:187
int const int32_t const char int const int32_t * merges
Definition: true_bpe.h:189
int const int32_t * offsets
Definition: true_bpe.h:186

References ck_capability_t::best_kernel, CHAT_TEMPLATE_LLAMA, CHAT_TEMPLATE_MISTRAL, CHAT_TEMPLATE_QWEN, CK_CLI_HISTORY_FILE, ck_get_capabilities(), ck_true_bpe_add_special_token(), ck_true_bpe_create(), ck_true_bpe_free(), ck_true_bpe_id_to_token(), ck_true_bpe_load_binary(), ck_true_bpe_lookup(), g_exit_requested, handle_sigint(), ck_capability_t::has_ai_accel, ck_capability_t::has_fma, load_model_api(), merges, ck_capability_t::name, num_merges, offsets, parse_args(), print_banner(), process_repl_command(), run_prompt(), strings, vocab_size, and ck_capability_t::width.

◆ output_append()

static void output_append ( char *  buf,
size_t *  len,
const char *  text 
)
static

Definition at line 497 of file ck_cli_v6.6.c.

497  {
498  if (!text || !*text) return;
499  size_t n = strlen(text);
500  if (*len + n >= CK_CLI_OUTPUT_BUF_SIZE) {
501  output_flush(buf, len);
502  }
503  if (n >= CK_CLI_OUTPUT_BUF_SIZE) {
504  fwrite(text, 1, n, stdout);
505  return;
506  }
507  memcpy(buf + *len, text, n);
508  *len += n;
509 }
#define CK_CLI_OUTPUT_BUF_SIZE
Definition: ck_cli_v6.6.c:43
static void output_flush(char *buf, size_t *len)
Definition: ck_cli_v6.6.c:491
const char * text
Definition: tokenizer.h:563

References CK_CLI_OUTPUT_BUF_SIZE, output_flush(), and text.

Referenced by output_token().

◆ output_flush()

static void output_flush ( char *  buf,
size_t *  len 
)
static

Definition at line 491 of file ck_cli_v6.6.c.

491  {
492  if (*len == 0) return;
493  fwrite(buf, 1, *len, stdout);
494  *len = 0;
495 }

Referenced by output_append(), and run_prompt().

◆ output_token()

static void output_token ( char *  buf,
size_t *  len,
const char *  token 
)
static

Definition at line 511 of file ck_cli_v6.6.c.

511  {
512  if (!token || !*token) return;
513 
514  /* Decode BPE byte-level encoding to actual bytes */
515  char decoded[1024];
516  int n = decode_bpe_token(token, decoded, sizeof(decoded));
517  if (n > 0) {
518  output_append(buf, len, decoded);
519  }
520 }
static void output_append(char *buf, size_t *len, const char *text)
Definition: ck_cli_v6.6.c:497
static int decode_bpe_token(const char *token, char *out, int max)
Definition: ck_cli_v6.6.c:428

References decode_bpe_token(), output_append(), and token.

Referenced by run_prompt().

◆ parse_args()

static bool parse_args ( int  argc,
char **  argv,
CLIOptions *  opt 
)
static

Definition at line 1022 of file ck_cli_v6.6.c.

1022  {
1023  if (!opt) return false;
1024  memset(opt, 0, sizeof(*opt));
1025  opt->max_tokens = CK_CLI_DEFAULT_MAX_TOKENS;
1026  opt->temperature = 0.0f; /* Greedy by default */
1027  opt->top_p = 0.9f;
1028  opt->stream = true; /* Stream by default */
1029  opt->timing = true; /* Show timing by default */
1030  /* Default EOS tokens for Qwen/ChatML */
1031  opt->eos_ids[0] = 151643; /* <|im_end|> */
1032  opt->eos_ids[1] = 151645; /* <|endoftext|> */
1033  opt->eos_ids[2] = 151644; /* <|im_sep|> */
1034  opt->eos_count = 3;
1035 
1036  for (int i = 1; i < argc; i++) {
1037  const char *arg = argv[i];
1038 
1039  if (!strcmp(arg, "--help") || !strcmp(arg, "-h")) {
1040  print_help(argv[0]);
1041  return false;
1042  } else if (!strcmp(arg, "--list")) {
1044  return false;
1045  } else if ((!strcmp(arg, "--model") || !strcmp(arg, "-m")) && i + 1 < argc) {
1046  opt->model_name = argv[++i];
1047  } else if (!strcmp(arg, "--lib") && i + 1 < argc) {
1048  opt->lib_path = argv[++i];
1049  } else if (!strcmp(arg, "--weights") && i + 1 < argc) {
1050  opt->weights_path = argv[++i];
1051  } else if ((!strcmp(arg, "--prompt") || !strcmp(arg, "-p")) && i + 1 < argc) {
1052  opt->prompt_once = argv[++i];
1053  } else if ((!strcmp(arg, "--system") || !strcmp(arg, "-S")) && i + 1 < argc) {
1054  opt->system_prompt = argv[++i];
1055  } else if ((!strcmp(arg, "--max-tokens") || !strcmp(arg, "-n")) && i + 1 < argc) {
1056  opt->max_tokens = atoi(argv[++i]);
1057  } else if ((!strcmp(arg, "--context") || !strcmp(arg, "-c")) && i + 1 < argc) {
1058  opt->context_override = atoi(argv[++i]);
1059  } else if ((!strcmp(arg, "--temperature") || !strcmp(arg, "-T")) && i + 1 < argc) {
1060  opt->temperature = (float)atof(argv[++i]);
1061  } else if (!strcmp(arg, "--top-p") && i + 1 < argc) {
1062  opt->top_p = (float)atof(argv[++i]);
1063  } else if (!strcmp(arg, "--stream") || !strcmp(arg, "-s")) {
1064  opt->stream = true;
1065  } else if (!strcmp(arg, "--no-stream")) {
1066  opt->stream = false;
1067  } else if (!strcmp(arg, "--timing") || !strcmp(arg, "-t")) {
1068  opt->timing = true;
1069  } else if (!strcmp(arg, "--no-timing")) {
1070  opt->timing = false;
1071  } else if (!strcmp(arg, "--no-chat-template")) {
1072  opt->no_chat_template = true;
1073  } else if (!strcmp(arg, "--eos") && i + 1 < argc) {
1074  parse_eos_ids(argv[++i], opt);
1075  } else if (!strcmp(arg, "--ignore-eos")) {
1076  opt->ignore_eos = true;
1077  } else if (!strcmp(arg, "--verbose") || !strcmp(arg, "-v")) {
1078  opt->verbose = true;
1079  } else if (arg[0] != '-') {
1080  if (!opt->lib_path) opt->lib_path = arg;
1081  else if (!opt->weights_path) opt->weights_path = arg;
1082  else {
1083  fprintf(stderr, "Unknown argument: %s\n", arg);
1084  return false;
1085  }
1086  } else {
1087  fprintf(stderr, "Unknown option: %s\n", arg);
1088  return false;
1089  }
1090  }
1091 
1092  /* Auto-discover model if --model specified */
1093  if (opt->model_name && (!opt->lib_path || !opt->weights_path)) {
1094  static char lib_buf[4096], weights_buf[4096];
1095  if (find_model_in_cache(opt->model_name, lib_buf, weights_buf, sizeof(lib_buf))) {
1096  opt->lib_path = lib_buf;
1097  opt->weights_path = weights_buf;
1098  } else {
1099  fprintf(stderr, "Error: model '%s' not found in cache\n", opt->model_name);
1100  fprintf(stderr, "Run with --list to see available models\n");
1101  return false;
1102  }
1103  }
1104 
1105  if (!opt->lib_path || !opt->weights_path) {
1106  print_help(argv[0]);
1107  return false;
1108  }
1109 
1110  /* Auto-detect chat template from model name/path */
1111  const char *name_for_template = opt->model_name ? opt->model_name : opt->lib_path;
1112  opt->chat_template = detect_chat_template(name_for_template);
1113 
1114  /* Load EOS tokens from vocab.json if available */
1115  if (load_eos_from_vocab_json(opt->weights_path, opt)) {
1116  if (opt->verbose) {
1117  printf("[DEBUG] Loaded %d EOS tokens: ", opt->eos_count);
1118  for (int i = 0; i < opt->eos_count; i++) {
1119  printf("%d ", opt->eos_ids[i]);
1120  }
1121  printf("\n");
1122  }
1123  }
1124 
1125  return true;
1126 }
static bool parse_eos_ids(const char *arg, CLIOptions *opt)
Definition: ck_cli_v6.6.c:775
static bool load_eos_from_vocab_json(const char *weights_path, CLIOptions *opt)
Definition: ck_cli_v6.6.c:242
static bool find_model_in_cache(const char *model_name, char *lib_out, char *weights_out, size_t out_size)
Definition: ck_cli_v6.6.c:206
static void print_help(const char *prog)
Definition: ck_cli_v6.6.c:989
static ChatTemplateType detect_chat_template(const char *model_name)
Definition: ck_cli_v6.6.c:575
static void list_available_models(void)
Definition: ck_cli_v6.6.c:298
#define CK_CLI_DEFAULT_MAX_TOKENS
Definition: ck_cli_v6.6.c:41

References CK_CLI_DEFAULT_MAX_TOKENS, detect_chat_template(), find_model_in_cache(), list_available_models(), load_eos_from_vocab_json(), parse_eos_ids(), and print_help().

Referenced by main().

◆ parse_eos_ids()

static bool parse_eos_ids ( const char *  arg,
CLIOptions *  opt 
)
static

Definition at line 775 of file ck_cli_v6.6.c.

775  {
776  if (!arg || !opt) return false;
777  opt->eos_count = 0;
778  const char *p = arg;
779  while (*p && opt->eos_count < CK_CLI_EOS_MAX) {
780  char *end = NULL;
781  long v = strtol(p, &end, 10);
782  if (end == p) break;
783  opt->eos_ids[opt->eos_count++] = (int)v;
784  p = end;
785  if (*p == ',') p++;
786  }
787  return opt->eos_count > 0;
788 }
#define CK_CLI_EOS_MAX
Definition: ck_cli_v6.6.c:42
uint32_t end
Definition: utf8.c:215

References CK_CLI_EOS_MAX, and end.

Referenced by parse_args().

◆ print_banner()

static void print_banner ( void  )
static

Definition at line 982 of file ck_cli_v6.6.c.

982  {
983  printf("\n");
984  printf(" \033[1;36mC-Kernel-Engine v%s\033[0m\n", CK_CLI_VERSION);
985  printf(" Native inference CLI with true-BPE tokenization\n");
986  printf("\n");
987 }
#define CK_CLI_VERSION
Definition: ck_cli_v6.6.c:40

References CK_CLI_VERSION.

Referenced by main(), and print_help().

◆ print_help()

static void print_help ( const char *  prog)
static

Definition at line 989 of file ck_cli_v6.6.c.

989  {
990  print_banner();
991  fprintf(stderr, "Usage:\n");
992  fprintf(stderr, " %s --model <name> Auto-discover model from cache\n", prog);
993  fprintf(stderr, " %s <libmodel.so> <weights.bump> Direct paths\n", prog);
994  fprintf(stderr, " %s --lib <.so> --weights <.bump> Named arguments\n", prog);
995  fprintf(stderr, "\nOptions:\n");
996  fprintf(stderr, " --model, -m NAME Model name (searches in cache)\n");
997  fprintf(stderr, " --lib PATH Path to compiled model .so\n");
998  fprintf(stderr, " --weights PATH Path to weights .bump file\n");
999  fprintf(stderr, " --prompt, -p TEXT Run single prompt (non-interactive)\n");
1000  fprintf(stderr, " --system, -S TEXT System prompt\n");
1001  fprintf(stderr, " --max-tokens, -n N Max tokens to generate (default: %d)\n", CK_CLI_DEFAULT_MAX_TOKENS);
1002  fprintf(stderr, " --context, -c N Override context/KV cache size\n");
1003  fprintf(stderr, " --temperature, -T F Sampling temperature (default: 0.0 = greedy)\n");
1004  fprintf(stderr, " --top-p F Nucleus sampling top-p (default: 0.9)\n");
1005  fprintf(stderr, " --stream, -s Stream tokens as generated\n");
1006  fprintf(stderr, " --timing, -t Show timing breakdown\n");
1007  fprintf(stderr, " --no-chat-template Disable chat template formatting\n");
1008  fprintf(stderr, " --eos IDS Comma-separated EOS token IDs\n");
1009  fprintf(stderr, " --ignore-eos Do not stop on EOS tokens\n");
1010  fprintf(stderr, " --list List available models\n");
1011  fprintf(stderr, " --verbose, -v Verbose output\n");
1012  fprintf(stderr, " --help, -h Show this help\n");
1013  fprintf(stderr, "\nREPL Commands:\n");
1014  fprintf(stderr, " /exit, /quit Exit the REPL\n");
1015  fprintf(stderr, " /reset Reset KV cache\n");
1016  fprintf(stderr, " /timing Toggle timing display\n");
1017  fprintf(stderr, " /temp <value> Set temperature\n");
1018  fprintf(stderr, " /system <text> Set system prompt\n");
1019  fprintf(stderr, " /help Show help\n");
1020 }

References CK_CLI_DEFAULT_MAX_TOKENS, and print_banner().

Referenced by parse_args().

◆ process_repl_command()

static bool process_repl_command ( const char *  line,
CLIOptions *  opt,
ModelAPI *  api 
)
static

Definition at line 1132 of file ck_cli_v6.6.c.

1132  {
1133  if (!line || line[0] != '/') return false;
1134 
1135  if (!strncmp(line, "/exit", 5) || !strncmp(line, "/quit", 5)) {
1136  g_exit_requested = 1;
1137  return true;
1138  }
1139  if (!strncmp(line, "/help", 5)) {
1140  printf("REPL Commands:\n");
1141  printf(" /exit, /quit Exit\n");
1142  printf(" /reset Reset KV cache\n");
1143  printf(" /timing Toggle timing display\n");
1144  printf(" /temp <value> Set temperature (0 = greedy)\n");
1145  printf(" /top-p <value> Set top-p\n");
1146  printf(" /system <text> Set system prompt\n");
1147  printf(" /clear Clear system prompt\n");
1148  printf(" /verbose Toggle verbose mode\n");
1149  return true;
1150  }
1151  if (!strncmp(line, "/reset", 6)) {
1152  if (api->kv_reset) {
1153  api->kv_reset();
1154  printf("[KV cache reset]\n");
1155  }
1156  return true;
1157  }
1158  if (!strncmp(line, "/timing", 7)) {
1159  opt->timing = !opt->timing;
1160  printf("[Timing %s]\n", opt->timing ? "enabled" : "disabled");
1161  return true;
1162  }
1163  if (!strncmp(line, "/verbose", 8)) {
1164  opt->verbose = !opt->verbose;
1165  printf("[Verbose %s]\n", opt->verbose ? "enabled" : "disabled");
1166  return true;
1167  }
1168  if (!strncmp(line, "/temp ", 6)) {
1169  opt->temperature = (float)atof(line + 6);
1170  printf("[Temperature set to %.2f]\n", opt->temperature);
1171  return true;
1172  }
1173  if (!strncmp(line, "/top-p ", 7)) {
1174  opt->top_p = (float)atof(line + 7);
1175  printf("[Top-p set to %.2f]\n", opt->top_p);
1176  return true;
1177  }
1178  if (!strncmp(line, "/system ", 8)) {
1179  opt->system_prompt = strdup(line + 8);
1180  printf("[System prompt set]\n");
1181  return true;
1182  }
1183  if (!strncmp(line, "/clear", 6)) {
1184  opt->system_prompt = NULL;
1185  printf("[System prompt cleared]\n");
1186  return true;
1187  }
1188 
1189  printf("Unknown command: %s\n", line);
1190  return true;
1191 }

References g_exit_requested.

Referenced by main().

◆ resolve_symbol()

static bool resolve_symbol ( void *  handle,
const char *  name,
void **  out_ptr,
bool  required 
)
static

Definition at line 526 of file ck_cli_v6.6.c.

526  {
527  void *sym = dlsym(handle, name);
528  if (!sym && required) {
529  fprintf(stderr, "Error: missing symbol %s\n", name);
530  return false;
531  }
532  if (out_ptr) *out_ptr = sym;
533  return true;
534 }

Referenced by load_model_api().

◆ run_prompt()

static int run_prompt ( ModelAPI *  api,
CKTrueBPE *  tokenizer,
CLIOptions *  opt,
const char *  input 
)
static

Definition at line 794 of file ck_cli_v6.6.c.

794  {
795  if (!api || !tokenizer || !opt || !input) return -1;
796  if (g_exit_requested) return -1;
797 
798  int ctx = opt->context_override;
799  if (ctx <= 0 && api->get_context) ctx = api->get_context();
800  if (ctx <= 0) ctx = 4096;
801  if (ctx > CK_CLI_MAX_CONTEXT) ctx = CK_CLI_MAX_CONTEXT;
802 
803  int max_tokens = opt->max_tokens > 0 ? opt->max_tokens : CK_CLI_DEFAULT_MAX_TOKENS;
804 
805  /* Apply chat template if enabled */
806  const ChatTemplate *tmpl = &g_templates[opt->no_chat_template ? CHAT_TEMPLATE_NONE : opt->chat_template];
807  char *formatted = apply_chat_template(tmpl, opt->system_prompt, input);
808  if (!formatted) {
809  fprintf(stderr, "Error: failed to format prompt\n");
810  return -1;
811  }
812 
813  if (opt->verbose) {
814  printf("[DEBUG] Formatted prompt:\n%s\n", formatted);
815  }
816 
817  int32_t *ids = (int32_t *)malloc((size_t)ctx * sizeof(int32_t));
818  if (!ids) {
819  fprintf(stderr, "Error: failed to allocate token buffer\n");
820  free(formatted);
821  return -1;
822  }
823 
824  int n = ck_true_bpe_encode(tokenizer, formatted, -1, ids, ctx);
825  free(formatted);
826 
827  if (n <= 0) {
828  fprintf(stderr, "[Tokenizer] failed to encode prompt\n");
829  free(ids);
830  return -1;
831  }
832  if (n > ctx - max_tokens) {
833  n = ctx - max_tokens;
834  if (opt->verbose) {
835  printf("[DEBUG] Truncated prompt to %d tokens\n", n);
836  }
837  }
838 
839  g_prefill_time_ms = 0.0;
840  g_decode_time_ms = 0.0;
841  g_decode_count = 0;
842  g_prompt_tokens = n;
843 
844  if (api->kv_reset) api->kv_reset();
845 
846  if (api->embed(ids, n) != 0) {
847  fprintf(stderr, "[Model] embed failed\n");
848  free(ids);
849  return -1;
850  }
851 
852  struct timespec t0, t1;
853  clock_gettime(CLOCK_MONOTONIC, &t0);
854  if (api->forward(NULL) != 0) {
855  fprintf(stderr, "[Model] forward failed\n");
856  free(ids);
857  return -1;
858  }
859  clock_gettime(CLOCK_MONOTONIC, &t1);
860  g_prefill_time_ms = (t1.tv_sec - t0.tv_sec) * 1000.0 +
861  (t1.tv_nsec - t0.tv_nsec) / 1000000.0;
862 
863  /* Get vocab size for sampling */
864  int vocab_size = api->get_vocab_size ? api->get_vocab_size() : 0;
865 
866  /* Helper: sample next token from logits */
867  #define SAMPLE_NEXT_TOKEN() do { \
868  if (api->get_logits && vocab_size > 0) { \
869  float *logits = api->get_logits(); \
870  if (logits) { \
871  int stride = api->get_logits_stride ? api->get_logits_stride() : vocab_size; \
872  int active = api->get_active_tokens ? api->get_active_tokens() : 1; \
873  float *last_logits = logits; \
874  if (stride > 0) { \
875  if (active < 1) active = 1; \
876  last_logits = logits + (size_t)(active - 1) * (size_t)stride; \
877  } \
878  float *logits_copy = (float *)malloc(vocab_size * sizeof(float)); \
879  memcpy(logits_copy, last_logits, vocab_size * sizeof(float)); \
880  next_token = sample_top_p(logits_copy, vocab_size, opt->temperature, opt->top_p); \
881  free(logits_copy); \
882  } else if (api->sample) { \
883  next_token = api->sample(); \
884  } else { \
885  next_token = -1; \
886  } \
887  } else if (api->sample) { \
888  next_token = api->sample(); \
889  } else { \
890  next_token = -1; \
891  } \
892  } while(0)
893 
894  /* Sample first token */
895  int next_token;
897 
898  char out_buf[CK_CLI_OUTPUT_BUF_SIZE];
899  size_t out_len = 0;
900 
901  /* Initialize EOS pattern detection for this prompt */
902  eos_pattern_init(opt->chat_template);
903 
905 
906  for (int generated = 0; generated < max_tokens && !g_exit_requested && g_generation_active; generated++) {
907  if (next_token < 0) break;
908 
909  if (opt->verbose) {
910  const char *tok_str = ck_true_bpe_id_to_token(tokenizer, next_token);
911  fprintf(stderr, "[DEBUG] Token %d: %d (%s)\n", generated, next_token, tok_str ? tok_str : "NULL");
912  }
913 
914  if (is_eos_token(opt, next_token)) {
915  if (opt->verbose) {
916  fprintf(stderr, "[DEBUG] EOS detected (token ID), stopping\n");
917  }
918  break;
919  }
920 
921  const char *word = ck_true_bpe_id_to_token(tokenizer, next_token);
922 
923  /* Process token through EOS pattern detection (buffers potential EOS tokens) */
924  if (!opt->ignore_eos &&
925  eos_pattern_process(word, out_buf, &out_len, output_token, opt->chat_template)) {
926  if (opt->verbose) {
927  fprintf(stderr, "[DEBUG] EOS detected (text pattern), stopping\n");
928  }
929  break;
930  }
931 
932  if (opt->stream) {
933  output_flush(out_buf, &out_len);
934  fflush(stdout);
935  } else if (out_len > (CK_CLI_OUTPUT_BUF_SIZE / 2)) {
936  output_flush(out_buf, &out_len);
937  fflush(stdout);
938  }
939 
940  if (generated + 1 >= max_tokens) break;
941 
942  clock_gettime(CLOCK_MONOTONIC, &t0);
943  if (api->decode(next_token, NULL) != 0) {
944  fprintf(stderr, "\n[Model] decode failed\n");
945  break;
946  }
947  clock_gettime(CLOCK_MONOTONIC, &t1);
948  g_decode_time_ms += (t1.tv_sec - t0.tv_sec) * 1000.0 +
949  (t1.tv_nsec - t0.tv_nsec) / 1000000.0;
950  g_decode_count++;
951 
952  /* Sample next token */
954  }
955 
956  #undef SAMPLE_NEXT_TOKEN
958  output_flush(out_buf, &out_len);
959  printf("\n");
960 
961  if (opt->timing) {
962  double total_ms = g_prefill_time_ms + g_decode_time_ms;
963  double prefill_rate = g_prompt_tokens / (g_prefill_time_ms / 1000.0);
964  double decode_rate = g_decode_count > 0 ? g_decode_count / (g_decode_time_ms / 1000.0) : 0.0;
965  double avg_decode = g_decode_count > 0 ? g_decode_time_ms / g_decode_count : 0.0;
966 
967  printf("\033[90m"); /* Gray text */
968  printf("prompt: %3d tok / %7.1f ms (%5.1f tok/s) | ", g_prompt_tokens, g_prefill_time_ms, prefill_rate);
969  printf("decode: %3d tok / %7.1f ms (%5.1f tok/s, %5.1f ms/tok)\033[0m\n",
970  g_decode_count, g_decode_time_ms, decode_rate, avg_decode);
971  }
972  fflush(stdout);
973 
974  free(ids);
975  return 0;
976 }
static double g_decode_time_ms
Definition: ck_cli_v6.6.c:52
static char * apply_chat_template(const ChatTemplate *tmpl, const char *system, const char *user)
Definition: ck_cli_v6.6.c:590
static double g_prefill_time_ms
Definition: ck_cli_v6.6.c:51
static int g_decode_count
Definition: ck_cli_v6.6.c:53
static bool is_eos_token(const CLIOptions *opt, int token)
Definition: ck_cli_v6.6.c:620
static void eos_pattern_init(ChatTemplateType tmpl)
Definition: ck_cli_v6.6.c:663
#define CK_CLI_MAX_CONTEXT
Definition: ck_cli_v6.6.c:44
static const ChatTemplate g_templates[]
Definition: ck_cli_v6.6.c:125
#define SAMPLE_NEXT_TOKEN()
static void output_token(char *buf, size_t *len, const char *token)
Definition: ck_cli_v6.6.c:511
static bool eos_pattern_process(const char *token_text, char *out_buf, size_t *out_len, void(*output_fn)(char *, size_t *, const char *), ChatTemplateType tmpl)
Definition: ck_cli_v6.6.c:727
static int g_prompt_tokens
Definition: ck_cli_v6.6.c:54
const int32_t * ids
Definition: tokenizer.h:443
int ck_true_bpe_encode(CKTrueBPE *bpe, const char *text, int text_len, int32_t *ids, int max_ids)
Definition: true_bpe.c:1338

References apply_chat_template(), CHAT_TEMPLATE_NONE, CK_CLI_DEFAULT_MAX_TOKENS, CK_CLI_MAX_CONTEXT, CK_CLI_OUTPUT_BUF_SIZE, ck_true_bpe_encode(), ck_true_bpe_id_to_token(), eos_pattern_init(), eos_pattern_process(), g_decode_count, g_decode_time_ms, g_exit_requested, g_generation_active, g_prefill_time_ms, g_prompt_tokens, g_templates, ids, is_eos_token(), out_len, output_flush(), output_token(), SAMPLE_NEXT_TOKEN, and vocab_size.

Referenced by main().

◆ sample_top_p()

static int sample_top_p ( float *  logits,
int  vocab_size,
float  temperature,
float  top_p 
)
static

Definition at line 335 of file ck_cli_v6.6.c.

335  {
336  if (temperature <= 0.0f || top_p <= 0.0f) {
337  /* Argmax */
338  int best = 0;
339  float best_val = logits[0];
340  for (int i = 1; i < vocab_size; i++) {
341  if (logits[i] > best_val) {
342  best_val = logits[i];
343  best = i;
344  }
345  }
346  return best;
347  }
348 
349  /* Apply temperature */
350  float max_logit = logits[0];
351  for (int i = 1; i < vocab_size; i++) {
352  if (logits[i] > max_logit) max_logit = logits[i];
353  }
354 
355  float sum = 0.0f;
356  for (int i = 0; i < vocab_size; i++) {
357  logits[i] = expf((logits[i] - max_logit) / temperature);
358  sum += logits[i];
359  }
360 
361  /* Normalize to probabilities */
362  for (int i = 0; i < vocab_size; i++) {
363  logits[i] /= sum;
364  }
365 
366  /* Sort indices by probability (simple selection for top-p) */
367  /* For efficiency, we'll do nucleus sampling with cumulative sum */
368  float cumsum = 0.0f;
369  float threshold = (float)rand() / (float)RAND_MAX * top_p;
370 
371  /* Find nucleus tokens and sample */
372  int *indices = (int *)malloc(vocab_size * sizeof(int));
373  float *probs = (float *)malloc(vocab_size * sizeof(float));
374  for (int i = 0; i < vocab_size; i++) {
375  indices[i] = i;
376  probs[i] = logits[i];
377  }
378 
379  /* Simple sort (for small vocab, bubble sort is fine; for large, use qsort) */
380  for (int i = 0; i < vocab_size - 1; i++) {
381  for (int j = i + 1; j < vocab_size; j++) {
382  if (probs[j] > probs[i]) {
383  float tmp_p = probs[i]; probs[i] = probs[j]; probs[j] = tmp_p;
384  int tmp_i = indices[i]; indices[i] = indices[j]; indices[j] = tmp_i;
385  }
386  }
387  cumsum += probs[i];
388  if (cumsum >= top_p) break;
389  }
390 
391  /* Sample from nucleus */
392  float r = (float)rand() / (float)RAND_MAX * cumsum;
393  float acc = 0.0f;
394  int result = indices[0];
395  for (int i = 0; cumsum > 0 && i < vocab_size; i++) {
396  acc += probs[i];
397  if (acc >= r) {
398  result = indices[i];
399  break;
400  }
401  if (acc >= cumsum) break;
402  }
403 
404  free(indices);
405  free(probs);
406  return result;
407 }

References vocab_size.

Variable Documentation

◆ g_decode_count

int g_decode_count = 0
static

Definition at line 53 of file ck_cli_v6.6.c.

Referenced by run_prompt().

◆ g_decode_time_ms

double g_decode_time_ms = 0.0
static

Definition at line 52 of file ck_cli_v6.6.c.

Referenced by run_prompt().

◆ g_eos_state

EOSPatternState g_eos_state = {0}
static

◆ g_exit_requested

volatile sig_atomic_t g_exit_requested = 0
static

Definition at line 47 of file ck_cli_v6.6.c.

Referenced by handle_sigint(), main(), process_repl_command(), and run_prompt().

◆ g_generation_active

volatile sig_atomic_t g_generation_active = 0
static

Definition at line 48 of file ck_cli_v6.6.c.

Referenced by handle_sigint(), and run_prompt().

◆ g_prefill_time_ms

double g_prefill_time_ms = 0.0
static

Definition at line 51 of file ck_cli_v6.6.c.

Referenced by run_prompt().

◆ g_prompt_tokens

int g_prompt_tokens = 0
static

Definition at line 54 of file ck_cli_v6.6.c.

Referenced by run_prompt().

◆ g_templates

const ChatTemplate g_templates[]
static

Definition at line 125 of file ck_cli_v6.6.c.

Referenced by run_prompt().