← Back to C-Kernel-Engine Docs Doxygen Source Documentation
ck_cli_v6.5.c File Reference
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include <errno.h>
#include <signal.h>
#include <dlfcn.h>
#include <unistd.h>
#include <time.h>
#include <math.h>
#include <dirent.h>
#include <sys/stat.h>
#include "tokenizer/true_bpe.h"
#include "ck_features.h"

Go to the source code of this file.

Macros

#define _GNU_SOURCE
 
#define CK_CLI_DEFAULT_MAX_TOKENS   256
 
#define CK_CLI_EOS_MAX   8
 
#define CK_CLI_HISTORY_FILE   ".ck_cli_history"
 
#define CK_CLI_MAX_CONTEXT   32768
 
#define CK_CLI_OUTPUT_BUF_SIZE   4096
 
#define CK_CLI_VERSION   "6.5.0"
 
#define EOS_PATTERN_BUF_SIZE   64
 
#define EOS_PENDING_MAX   8
 

Typedefs

typedef int(* decode_t) (int32_t token, float *logits_out)
 
typedef int(* embed_t) (const int32_t *tokens, int num_tokens)
 
typedef int(* forward_t) (float *logits_out)
 
typedef void(* free_t) (void)
 
typedef int(* get_int_t) (void)
 
typedef float *(* get_logits_t) (void)
 
typedef void *(* get_ptr_t) (void)
 
typedef int(* init_t) (const char *weights_path)
 
typedef int(* kv_enable_t) (int capacity)
 
typedef void(* kv_reset_t) (void)
 
typedef int(* sample_argmax_t) (void)
 

Enumerations

enum  ChatTemplateType {
  CHAT_TEMPLATE_NONE = 0 , CHAT_TEMPLATE_QWEN , CHAT_TEMPLATE_LLAMA , CHAT_TEMPLATE_CHATML ,
  CHAT_TEMPLATE_MISTRAL , CHAT_TEMPLATE_NONE = 0 , CHAT_TEMPLATE_QWEN , CHAT_TEMPLATE_LLAMA ,
  CHAT_TEMPLATE_CHATML , CHAT_TEMPLATE_MISTRAL
}
 

Functions

static char * apply_chat_template (const ChatTemplate *tmpl, const char *system, const char *user)
 
static int decode_bpe_token (const char *token, char *out, int max)
 
static ChatTemplateType detect_chat_template (const char *model_name)
 
static bool eos_is_potential_prefix (const char *token)
 
static void eos_pattern_init (ChatTemplateType tmpl)
 
static bool eos_pattern_process (const char *token_text, char *out_buf, size_t *out_len, void(*output_fn)(char *, size_t *, const char *), ChatTemplateType tmpl)
 
static void eos_pattern_reset (void)
 
static bool find_model_in_cache (const char *model_name, char *lib_out, char *weights_out, size_t out_size)
 
static const char * get_cache_dir (void)
 
static void handle_sigint (int sig)
 
static bool is_eos_token (const CLIOptions *opt, int token)
 
static void list_available_models (void)
 
static bool load_eos_from_vocab_json (const char *weights_path, CLIOptions *opt)
 
static bool load_model_api (const char *lib_path, ModelAPI *api)
 
int main (int argc, char **argv)
 
static void output_append (char *buf, size_t *len, const char *text)
 
static void output_flush (char *buf, size_t *len)
 
static void output_token (char *buf, size_t *len, const char *token)
 
static bool parse_args (int argc, char **argv, CLIOptions *opt)
 
static bool parse_eos_ids (const char *arg, CLIOptions *opt)
 
static void print_banner (void)
 
static void print_help (const char *prog)
 
static bool process_repl_command (const char *line, CLIOptions *opt, ModelAPI *api)
 
static bool resolve_symbol (void *handle, const char *name, void **out_ptr, bool required)
 
static int run_prompt (ModelAPI *api, CKTrueBPE *tokenizer, CLIOptions *opt, const char *input)
 
static int sample_top_p (float *logits, int vocab_size, float temperature, float top_p)
 

Variables

static int g_decode_count = 0
 
static double g_decode_time_ms = 0.0
 
static EOSPatternState g_eos_state = {0}
 
static volatile sig_atomic_t g_exit_requested = 0
 
static volatile sig_atomic_t g_generation_active = 0
 
static double g_prefill_time_ms = 0.0
 
static int g_prompt_tokens = 0
 
static const ChatTemplate g_templates []
 

Macro Definition Documentation

◆ _GNU_SOURCE

#define _GNU_SOURCE

Definition at line 17 of file ck_cli_v6.5.c.

◆ CK_CLI_DEFAULT_MAX_TOKENS

#define CK_CLI_DEFAULT_MAX_TOKENS   256

Definition at line 41 of file ck_cli_v6.5.c.

◆ CK_CLI_EOS_MAX

#define CK_CLI_EOS_MAX   8

Definition at line 42 of file ck_cli_v6.5.c.

◆ CK_CLI_HISTORY_FILE

#define CK_CLI_HISTORY_FILE   ".ck_cli_history"

Definition at line 45 of file ck_cli_v6.5.c.

◆ CK_CLI_MAX_CONTEXT

#define CK_CLI_MAX_CONTEXT   32768

Definition at line 44 of file ck_cli_v6.5.c.

◆ CK_CLI_OUTPUT_BUF_SIZE

#define CK_CLI_OUTPUT_BUF_SIZE   4096

Definition at line 43 of file ck_cli_v6.5.c.

◆ CK_CLI_VERSION

#define CK_CLI_VERSION   "6.5.0"

Definition at line 40 of file ck_cli_v6.5.c.

◆ EOS_PATTERN_BUF_SIZE

#define EOS_PATTERN_BUF_SIZE   64

Text-based EOS pattern detection with pending output buffering.

When special tokens like <|im_end|> are tokenized as regular text (e.g., !, im, _end, !), we need to detect the pattern in the output and avoid outputting the partial pattern tokens.

This is a workaround for tokenizers that don't properly encode special tokens.

Definition at line 635 of file ck_cli_v6.5.c.

◆ EOS_PENDING_MAX

#define EOS_PENDING_MAX   8

Definition at line 636 of file ck_cli_v6.5.c.

Typedef Documentation

◆ decode_t

typedef int(* decode_t) (int32_t token, float *logits_out)

Definition at line 74 of file ck_cli_v6.5.c.

◆ embed_t

typedef int(* embed_t) (const int32_t *tokens, int num_tokens)

Definition at line 70 of file ck_cli_v6.5.c.

◆ forward_t

typedef int(* forward_t) (float *logits_out)

Definition at line 71 of file ck_cli_v6.5.c.

◆ free_t

typedef void(* free_t) (void)

Definition at line 79 of file ck_cli_v6.5.c.

◆ get_int_t

typedef int(* get_int_t) (void)

Definition at line 77 of file ck_cli_v6.5.c.

◆ get_logits_t

typedef float*(* get_logits_t) (void)

Definition at line 76 of file ck_cli_v6.5.c.

◆ get_ptr_t

typedef void*(* get_ptr_t) (void)

Definition at line 78 of file ck_cli_v6.5.c.

◆ init_t

typedef int(* init_t) (const char *weights_path)

Definition at line 69 of file ck_cli_v6.5.c.

◆ kv_enable_t

typedef int(* kv_enable_t) (int capacity)

Definition at line 72 of file ck_cli_v6.5.c.

◆ kv_reset_t

typedef void(* kv_reset_t) (void)

Definition at line 73 of file ck_cli_v6.5.c.

◆ sample_argmax_t

typedef int(* sample_argmax_t) (void)

Definition at line 75 of file ck_cli_v6.5.c.

Enumeration Type Documentation

◆ ChatTemplateType

Enumerator
CHAT_TEMPLATE_NONE 
CHAT_TEMPLATE_QWEN 
CHAT_TEMPLATE_LLAMA 
CHAT_TEMPLATE_CHATML 
CHAT_TEMPLATE_MISTRAL 
CHAT_TEMPLATE_NONE 
CHAT_TEMPLATE_QWEN 
CHAT_TEMPLATE_LLAMA 
CHAT_TEMPLATE_CHATML 
CHAT_TEMPLATE_MISTRAL 

Definition at line 106 of file ck_cli_v6.5.c.

106  {
107  CHAT_TEMPLATE_NONE = 0,
ChatTemplateType
Definition: ck_cli_v6.5.c:106
@ CHAT_TEMPLATE_LLAMA
Definition: ck_cli_v6.5.c:109
@ CHAT_TEMPLATE_MISTRAL
Definition: ck_cli_v6.5.c:111
@ CHAT_TEMPLATE_QWEN
Definition: ck_cli_v6.5.c:108
@ CHAT_TEMPLATE_CHATML
Definition: ck_cli_v6.5.c:110
@ CHAT_TEMPLATE_NONE
Definition: ck_cli_v6.5.c:107

Function Documentation

◆ apply_chat_template()

static char* apply_chat_template ( const ChatTemplate *  tmpl,
const char *  system,
const char *  user 
)
static

Definition at line 588 of file ck_cli_v6.5.c.

588  {
589  size_t needed = 0;
590  if (system && *system) {
591  needed += strlen(tmpl->system_prefix) + strlen(system) + strlen(tmpl->system_suffix);
592  }
593  needed += strlen(tmpl->user_prefix) + strlen(user) + strlen(tmpl->user_suffix);
594  needed += strlen(tmpl->assistant_prefix);
595  needed += 1; /* null terminator */
596 
597  char *result = (char *)malloc(needed);
598  if (!result) return NULL;
599 
600  result[0] = '\0';
601  if (system && *system) {
602  strcat(result, tmpl->system_prefix);
603  strcat(result, system);
604  strcat(result, tmpl->system_suffix);
605  }
606  strcat(result, tmpl->user_prefix);
607  strcat(result, user);
608  strcat(result, tmpl->user_suffix);
609  strcat(result, tmpl->assistant_prefix);
610 
611  return result;
612 }

Referenced by run_prompt().

◆ decode_bpe_token()

static int decode_bpe_token ( const char *  token,
char *  out,
int  max 
)
static

Decode GPT-2 byte-level BPE representation back to actual bytes.

GPT-2's tokenizer maps certain bytes to Unicode code points:

  • Bytes 0x00-0x20 → U+0100-U+0120 (Ā Ć ċ ... Ġ)
  • Bytes 0x7F-0xA0 → U+017F-U+01A0
  • Printable ASCII (0x21-0x7E) stays as-is

This function reverses that mapping.

Parameters
tokenInput BPE token string (UTF-8)
outOutput buffer for decoded bytes
maxSize of output buffer
Returns
Number of bytes written (not including NUL)

Definition at line 427 of file ck_cli_v6.5.c.

427  {
428  if (!token || max <= 0) return 0;
429 
430  const unsigned char *src = (const unsigned char *)token;
431  int out_len = 0;
432 
433  while (*src && out_len < max - 1) {
434  unsigned int codepoint;
435  int bytes;
436 
437  /* Decode UTF-8 to codepoint */
438  if ((src[0] & 0x80) == 0) {
439  /* Single byte ASCII */
440  codepoint = src[0];
441  bytes = 1;
442  } else if ((src[0] & 0xE0) == 0xC0 && (src[1] & 0xC0) == 0x80) {
443  /* Two byte sequence */
444  codepoint = ((src[0] & 0x1F) << 6) | (src[1] & 0x3F);
445  bytes = 2;
446  } else if ((src[0] & 0xF0) == 0xE0 && (src[1] & 0xC0) == 0x80 && (src[2] & 0xC0) == 0x80) {
447  /* Three byte sequence */
448  codepoint = ((src[0] & 0x0F) << 12) | ((src[1] & 0x3F) << 6) | (src[2] & 0x3F);
449  bytes = 3;
450  } else if ((src[0] & 0xF8) == 0xF0 && (src[1] & 0xC0) == 0x80 &&
451  (src[2] & 0xC0) == 0x80 && (src[3] & 0xC0) == 0x80) {
452  /* Four byte sequence */
453  codepoint = ((src[0] & 0x07) << 18) | ((src[1] & 0x3F) << 12) |
454  ((src[2] & 0x3F) << 6) | (src[3] & 0x3F);
455  bytes = 4;
456  } else {
457  /* Invalid UTF-8, copy byte as-is */
458  out[out_len++] = (char)*src;
459  src++;
460  continue;
461  }
462 
463  /* Check if this is a GPT-2 byte-encoded character */
464  if (codepoint >= 0x100 && codepoint <= 0x120) {
465  /* Bytes 0x00-0x20: U+0100-U+0120 → byte = codepoint - 0x100 */
466  out[out_len++] = (char)(codepoint - 0x100);
467  } else if (codepoint >= 0x17F && codepoint <= 0x1A0) {
468  /* Bytes 0x7F-0xA0: U+017F-U+01A0 → byte = codepoint - 0x100 */
469  out[out_len++] = (char)(codepoint - 0x100);
470  } else if (codepoint < 0x80) {
471  /* Regular ASCII - copy as-is */
472  out[out_len++] = (char)codepoint;
473  } else if (codepoint == 0x2581) {
474  /* SentencePiece space marker ▁ (U+2581) → space */
475  out[out_len++] = ' ';
476  } else {
477  /* Other UTF-8 characters - copy original bytes */
478  for (int i = 0; i < bytes && out_len < max - 1; i++) {
479  out[out_len++] = (char)src[i];
480  }
481  }
482 
483  src += bytes;
484  }
485 
486  out[out_len] = '\0';
487  return out_len;
488 }
const char * token
Definition: tokenizer.h:306
const int32_t int int * out_len
Definition: tokenizer.h:445

References out_len, and token.

Referenced by output_token().

◆ detect_chat_template()

static ChatTemplateType detect_chat_template ( const char *  model_name)
static

Definition at line 573 of file ck_cli_v6.5.c.

573  {
574  if (!model_name) return CHAT_TEMPLATE_CHATML;
575 
576  /* Lowercase comparison */
577  char lower[256];
578  strncpy(lower, model_name, sizeof(lower) - 1);
579  for (char *p = lower; *p; p++) *p = (*p >= 'A' && *p <= 'Z') ? *p + 32 : *p;
580 
581  if (strstr(lower, "qwen")) return CHAT_TEMPLATE_QWEN;
582  if (strstr(lower, "llama")) return CHAT_TEMPLATE_LLAMA;
583  if (strstr(lower, "mistral")) return CHAT_TEMPLATE_MISTRAL;
584 
585  return CHAT_TEMPLATE_CHATML; /* Default */
586 }

References CHAT_TEMPLATE_CHATML, CHAT_TEMPLATE_LLAMA, CHAT_TEMPLATE_MISTRAL, and CHAT_TEMPLATE_QWEN.

Referenced by parse_args().

◆ eos_is_potential_prefix()

static bool eos_is_potential_prefix ( const char *  token)
static

Check if token might be start of EOS pattern.

Definition at line 682 of file ck_cli_v6.5.c.

682  {
683  if (!token || !g_eos_state.partial_prefix) return false;
684 
685  /* Check if current accumulated buffer + token could start the pattern */
686  size_t tlen = strlen(token);
687  size_t plen = g_eos_state.pattern_len;
688  size_t target_len = g_eos_state.target_pattern ? strlen(g_eos_state.target_pattern) : 0;
689 
690  /* If buffer + token contains partial match of target, it's a potential prefix */
691  if (target_len == 0) return false;
692 
693  /* Build temp buffer */
694  char temp[EOS_PATTERN_BUF_SIZE];
695  if (plen + tlen >= EOS_PATTERN_BUF_SIZE) return false;
696  memcpy(temp, g_eos_state.pattern_buf, plen);
697  memcpy(temp + plen, token, tlen);
698  temp[plen + tlen] = '\0';
699 
700  /* Check if temp is a prefix of target or contains start of target */
701  const char *target = g_eos_state.target_pattern;
702  size_t temp_len = plen + tlen;
703 
704  /* Look for any suffix of temp that is a prefix of target */
705  for (size_t i = 0; i < temp_len; i++) {
706  size_t remaining = temp_len - i;
707  if (remaining > target_len) remaining = target_len;
708  if (strncmp(temp + i, target, remaining) == 0) {
709  return true;
710  }
711  }
712 
713  return false;
714 }
#define EOS_PATTERN_BUF_SIZE
Definition: ck_cli_v6.5.c:635
static EOSPatternState g_eos_state
Definition: ck_cli_v6.5.c:647

References EOS_PATTERN_BUF_SIZE, g_eos_state, and token.

Referenced by eos_pattern_process().

◆ eos_pattern_init()

static void eos_pattern_init ( ChatTemplateType  tmpl)
static

Definition at line 661 of file ck_cli_v6.5.c.

661  {
663  switch (tmpl) {
664  case CHAT_TEMPLATE_QWEN:
666  g_eos_state.target_pattern = "im_end";
667  g_eos_state.partial_prefix = "im";
668  break;
669  case CHAT_TEMPLATE_LLAMA:
671  g_eos_state.target_pattern = "</s>";
672  g_eos_state.partial_prefix = "</";
673  break;
674  default:
675  break;
676  }
677 }
static void eos_pattern_reset(void)
Definition: ck_cli_v6.5.c:649

References CHAT_TEMPLATE_CHATML, CHAT_TEMPLATE_LLAMA, CHAT_TEMPLATE_MISTRAL, CHAT_TEMPLATE_QWEN, eos_pattern_reset(), and g_eos_state.

Referenced by run_prompt().

◆ eos_pattern_process()

static bool eos_pattern_process ( const char *  token_text,
char *  out_buf,
size_t *  out_len,
void(*)(char *, size_t *, const char *)  output_fn,
ChatTemplateType  tmpl 
)
static

Process a token for EOS pattern detection.

Parameters
token_textThe token text to process
out_bufOutput buffer for safe-to-output text
out_lenCurrent length of output buffer
tmplChat template type
Returns
true if EOS pattern detected, false otherwise

Definition at line 725 of file ck_cli_v6.5.c.

727  {
728  if (!token_text || !g_eos_state.target_pattern) {
729  /* No pattern to match - output directly */
730  if (token_text && output_fn) output_fn(out_buf, out_len, token_text);
731  return false;
732  }
733 
734  /* Append to pattern buffer */
735  size_t tlen = strlen(token_text);
736  if (g_eos_state.pattern_len + (int)tlen < EOS_PATTERN_BUF_SIZE - 1) {
737  memcpy(g_eos_state.pattern_buf + g_eos_state.pattern_len, token_text, tlen);
738  g_eos_state.pattern_len += (int)tlen;
739  g_eos_state.pattern_buf[g_eos_state.pattern_len] = '\0';
740  }
741 
742  /* Check if pattern is complete */
743  if (strstr(g_eos_state.pattern_buf, g_eos_state.target_pattern)) {
744  /* EOS detected - don't output pending tokens */
746  return true;
747  }
748 
749  /* Check if this could still be part of the pattern */
750  if (eos_is_potential_prefix(token_text)) {
751  /* Hold this token - might be part of EOS */
752  if (g_eos_state.pending_count < EOS_PENDING_MAX) {
753  g_eos_state.pending[g_eos_state.pending_count] = strdup(token_text);
754  g_eos_state.pending_count++;
755  }
756  return false;
757  }
758 
759  /* Not part of pattern - flush pending tokens and this one */
760  for (int i = 0; i < g_eos_state.pending_count; i++) {
761  if (output_fn) output_fn(out_buf, out_len, g_eos_state.pending[i]);
762  free(g_eos_state.pending[i]);
763  g_eos_state.pending[i] = NULL;
764  }
765  g_eos_state.pending_count = 0;
766  g_eos_state.pattern_len = 0;
767  g_eos_state.pattern_buf[0] = '\0';
768 
769  if (output_fn) output_fn(out_buf, out_len, token_text);
770  return false;
771 }
static bool eos_is_potential_prefix(const char *token)
Definition: ck_cli_v6.5.c:682
#define EOS_PENDING_MAX
Definition: ck_cli_v6.5.c:636

References eos_is_potential_prefix(), EOS_PATTERN_BUF_SIZE, eos_pattern_reset(), EOS_PENDING_MAX, g_eos_state, and out_len.

Referenced by run_prompt().

◆ eos_pattern_reset()

static void eos_pattern_reset ( void  )
static

Definition at line 649 of file ck_cli_v6.5.c.

649  {
650  g_eos_state.pattern_len = 0;
651  g_eos_state.pattern_buf[0] = '\0';
652  for (int i = 0; i < g_eos_state.pending_count; i++) {
653  free(g_eos_state.pending[i]);
654  g_eos_state.pending[i] = NULL;
655  }
656  g_eos_state.pending_count = 0;
657  g_eos_state.target_pattern = NULL;
658  g_eos_state.partial_prefix = NULL;
659 }

References g_eos_state.

Referenced by eos_pattern_init(), and eos_pattern_process().

◆ find_model_in_cache()

static bool find_model_in_cache ( const char *  model_name,
char *  lib_out,
char *  weights_out,
size_t  out_size 
)
static

Definition at line 205 of file ck_cli_v6.5.c.

205  {
206  const char *cache_dir = get_cache_dir();
207  DIR *dir = opendir(cache_dir);
208  if (!dir) return false;
209 
210  struct dirent *entry;
211  while ((entry = readdir(dir)) != NULL) {
212  if (entry->d_name[0] == '.') continue;
213 
214  /* Check if directory name contains model_name */
215  if (strstr(entry->d_name, model_name) != NULL) {
216  char model_dir[4096];
217  snprintf(model_dir, sizeof(model_dir), "%s/%s", cache_dir, entry->d_name);
218 
219  /* Check for required files */
220  char so_path[4096], bump_path[4096];
221  snprintf(so_path, sizeof(so_path), "%s/ck-kernel-inference.so", model_dir);
222  snprintf(bump_path, sizeof(bump_path), "%s/weights.bump", model_dir);
223 
224  struct stat st;
225  if (stat(so_path, &st) == 0 && stat(bump_path, &st) == 0) {
226  strncpy(lib_out, so_path, out_size - 1);
227  strncpy(weights_out, bump_path, out_size - 1);
228  closedir(dir);
229  return true;
230  }
231  }
232  }
233  closedir(dir);
234  return false;
235 }
static const char * get_cache_dir(void)
Definition: ck_cli_v6.5.c:197

References get_cache_dir().

Referenced by parse_args().

◆ get_cache_dir()

static const char* get_cache_dir ( void  )
static

Definition at line 197 of file ck_cli_v6.5.c.

197  {
198  static char cache_path[4096];
199  const char *home = getenv("HOME");
200  if (!home) home = "/tmp";
201  snprintf(cache_path, sizeof(cache_path), "%s/.cache/ck-engine-v6.5/models", home);
202  return cache_path;
203 }

Referenced by find_model_in_cache(), and list_available_models().

◆ handle_sigint()

static void handle_sigint ( int  sig)
static

Definition at line 56 of file ck_cli_v6.5.c.

56  {
57  (void)sig;
58  if (g_generation_active) {
59  g_generation_active = 0; /* Stop generation but don't exit */
60  } else {
61  g_exit_requested = 1;
62  }
63 }
static volatile sig_atomic_t g_generation_active
Definition: ck_cli_v6.5.c:48
static volatile sig_atomic_t g_exit_requested
Definition: ck_cli_v6.5.c:47

References g_exit_requested, and g_generation_active.

Referenced by main().

◆ is_eos_token()

static bool is_eos_token ( const CLIOptions *  opt,
int  token 
)
static

Definition at line 618 of file ck_cli_v6.5.c.

618  {
619  if (!opt || opt->ignore_eos) return false;
620  for (int i = 0; i < opt->eos_count; i++) {
621  if (opt->eos_ids[i] == token) return true;
622  }
623  return false;
624 }

References token.

Referenced by run_prompt().

◆ list_available_models()

static void list_available_models ( void  )
static

Definition at line 297 of file ck_cli_v6.5.c.

297  {
298  const char *cache_dir = get_cache_dir();
299  DIR *dir = opendir(cache_dir);
300  if (!dir) {
301  fprintf(stderr, "No models found in %s\n", cache_dir);
302  return;
303  }
304 
305  printf("Available models in %s:\n", cache_dir);
306  struct dirent *entry;
307  int count = 0;
308  while ((entry = readdir(dir)) != NULL) {
309  if (entry->d_name[0] == '.') continue;
310 
311  char model_dir[4096];
312  snprintf(model_dir, sizeof(model_dir), "%s/%s", cache_dir, entry->d_name);
313 
314  char so_path[4096];
315  snprintf(so_path, sizeof(so_path), "%s/ck-kernel-inference.so", model_dir);
316 
317  struct stat st;
318  if (stat(so_path, &st) == 0) {
319  printf(" - %s\n", entry->d_name);
320  count++;
321  }
322  }
323  closedir(dir);
324 
325  if (count == 0) {
326  printf(" (none found)\n");
327  }
328 }

References get_cache_dir().

Referenced by parse_args().

◆ load_eos_from_vocab_json()

static bool load_eos_from_vocab_json ( const char *  weights_path,
CLIOptions *  opt 
)
static

Definition at line 241 of file ck_cli_v6.5.c.

241  {
242  if (!weights_path || !opt) return false;
243 
244  /* Construct vocab.json path from weights path */
245  char vocab_path[4096];
246  const char *slash = strrchr(weights_path, '/');
247  if (!slash) return false;
248 
249  size_t dir_len = (size_t)(slash - weights_path);
250  if (dir_len + 12 >= sizeof(vocab_path)) return false;
251 
252  memcpy(vocab_path, weights_path, dir_len);
253  vocab_path[dir_len] = '\0';
254  strcat(vocab_path, "/vocab.json");
255 
256  FILE *f = fopen(vocab_path, "r");
257  if (!f) return false;
258 
259  /* Simple JSON parsing for special_tokens */
260  char buf[8192];
261  size_t n = fread(buf, 1, sizeof(buf) - 1, f);
262  fclose(f);
263  buf[n] = '\0';
264 
265  /* Look for "special_tokens" section */
266  const char *st = strstr(buf, "\"special_tokens\"");
267  if (!st) return false;
268 
269  /* Extract eos token */
270  const char *eos = strstr(st, "\"eos\"");
271  if (eos) {
272  const char *colon = strchr(eos, ':');
273  if (colon) {
274  int eos_id = atoi(colon + 1);
275  if (eos_id > 0) {
276  opt->eos_ids[0] = eos_id;
277  opt->eos_count = 1;
278  }
279  }
280  }
281 
282  /* Extract bos token (often used as im_end for chat) */
283  const char *bos = strstr(st, "\"bos\"");
284  if (bos) {
285  const char *colon = strchr(bos, ':');
286  if (colon) {
287  int bos_id = atoi(colon + 1);
288  if (bos_id > 0 && bos_id != opt->eos_ids[0]) {
289  opt->eos_ids[opt->eos_count++] = bos_id;
290  }
291  }
292  }
293 
294  return opt->eos_count > 0;
295 }
int32_t int32_t int32_t eos
Definition: tokenizer.h:231
int32_t int32_t bos
Definition: tokenizer.h:230

References bos, and eos.

Referenced by parse_args().

◆ load_model_api()

static bool load_model_api ( const char *  lib_path,
ModelAPI *  api 
)
static

Definition at line 535 of file ck_cli_v6.5.c.

535  {
536  if (!lib_path || !api) return false;
537  memset(api, 0, sizeof(*api));
538  api->handle = dlopen(lib_path, RTLD_NOW);
539  if (!api->handle) {
540  fprintf(stderr, "Error: dlopen failed: %s\n", dlerror());
541  return false;
542  }
543 
544  if (!resolve_symbol(api->handle, "ck_model_init", (void **)&api->init, true)) return false;
545  if (!resolve_symbol(api->handle, "ck_model_embed_tokens", (void **)&api->embed, true)) return false;
546  if (!resolve_symbol(api->handle, "ck_model_forward", (void **)&api->forward, true)) return false;
547  if (!resolve_symbol(api->handle, "ck_model_decode", (void **)&api->decode, true)) return false;
548  if (!resolve_symbol(api->handle, "ck_model_sample_argmax", (void **)&api->sample, true)) return false;
549  resolve_symbol(api->handle, "ck_model_get_logits", (void **)&api->get_logits, false);
550  resolve_symbol(api->handle, "ck_model_kv_cache_enable", (void **)&api->kv_enable, false);
551  resolve_symbol(api->handle, "ck_model_kv_cache_reset", (void **)&api->kv_reset, false);
552  resolve_symbol(api->handle, "ck_model_get_context_window", (void **)&api->get_context, false);
553  resolve_symbol(api->handle, "ck_model_get_vocab_size", (void **)&api->get_vocab_size, false);
554  resolve_symbol(api->handle, "ck_model_get_num_merges", (void **)&api->get_num_merges, false);
555  resolve_symbol(api->handle, "ck_model_get_vocab_strings_size", (void **)&api->get_vocab_bytes, false);
556  resolve_symbol(api->handle, "ck_model_get_active_tokens", (void **)&api->get_active_tokens, false);
557  resolve_symbol(api->handle, "ck_model_get_vocab_offsets", (void **)&api->get_offsets, false);
558  resolve_symbol(api->handle, "ck_model_get_vocab_strings", (void **)&api->get_strings, false);
559  resolve_symbol(api->handle, "ck_model_get_vocab_merges", (void **)&api->get_merges, false);
560  resolve_symbol(api->handle, "ck_model_free", (void **)&api->free_fn, false);
561 
562  if (!api->get_vocab_size || !api->get_vocab_bytes || !api->get_offsets || !api->get_strings) {
563  fprintf(stderr, "Error: vocab accessors missing from model\n");
564  return false;
565  }
566  return true;
567 }
static bool resolve_symbol(void *handle, const char *name, void **out_ptr, bool required)
Definition: ck_cli_v6.5.c:525

References resolve_symbol().

Referenced by main().

◆ main()

int main ( int  argc,
char **  argv 
)

Definition at line 1196 of file ck_cli_v6.5.c.

1196  {
1197  signal(SIGINT, handle_sigint);
1198  srand((unsigned int)time(NULL));
1199 
1200  CLIOptions opt;
1201  if (!parse_args(argc, argv, &opt)) {
1202  return 1;
1203  }
1204 
1205  print_banner();
1206  printf("Loading: %s\n", opt.lib_path);
1207 
1208  ModelAPI api;
1209  if (!load_model_api(opt.lib_path, &api)) {
1210  return 1;
1211  }
1212 
1213  printf("Initializing model...\n");
1214  if (api.init(opt.weights_path) != 0) {
1215  fprintf(stderr, "Error: ck_model_init failed\n");
1216  return 1;
1217  }
1218 
1219  int ctx = opt.context_override;
1220  if (ctx <= 0 && api.get_context) ctx = api.get_context();
1221  if (api.kv_enable && ctx > 0) {
1222  api.kv_enable(ctx);
1223  }
1224 
1225  CKTrueBPE *tokenizer = ck_true_bpe_create();
1226  if (!tokenizer) {
1227  fprintf(stderr, "[Tokenizer] failed to create\n");
1228  return 1;
1229  }
1230 
1231  int vocab_size = api.get_vocab_size ? api.get_vocab_size() : 0;
1232  int vocab_bytes = api.get_vocab_bytes ? api.get_vocab_bytes() : 0;
1233  int num_merges = api.get_num_merges ? api.get_num_merges() : 0;
1234  const int32_t *offsets = (const int32_t *)api.get_offsets();
1235  const char *strings = (const char *)api.get_strings();
1236  const int32_t *merges = api.get_merges ? (const int32_t *)api.get_merges() : NULL;
1237 
1238  if (vocab_size <= 0 || vocab_bytes <= 0 || !offsets || !strings) {
1239  fprintf(stderr, "[Tokenizer] missing vocab data in model\n");
1240  ck_true_bpe_free(tokenizer);
1241  return 1;
1242  }
1243 
1245  fprintf(stderr, "[Tokenizer] failed to load vocab\n");
1246  ck_true_bpe_free(tokenizer);
1247  return 1;
1248  }
1249 
1250  printf("Ready! Vocab: %d, Context: %d, Template: %s\n",
1251  vocab_size, ctx,
1252  opt.no_chat_template ? "none" :
1253  opt.chat_template == CHAT_TEMPLATE_QWEN ? "qwen" :
1254  opt.chat_template == CHAT_TEMPLATE_LLAMA ? "llama" :
1255  opt.chat_template == CHAT_TEMPLATE_MISTRAL ? "mistral" : "chatml");
1256 
1257  /* Print CPU capability info */
1259  printf("[Hardware] %s | Vector: %d-bit | FMA: %s | AI Accel: %s | Kernel: %s\n",
1260  cap.name, cap.width, cap.has_fma ? "Yes" : "No",
1261  cap.has_ai_accel ? "Yes" : "No", cap.best_kernel);
1262 
1263  printf("Type /help for commands, Ctrl+C to stop generation\n\n");
1264 
1265  setvbuf(stdout, NULL, _IOFBF, 1 << 20);
1266 
1267  if (opt.prompt_once) {
1268  run_prompt(&api, tokenizer, &opt, opt.prompt_once);
1269  } else {
1270  /* REPL */
1271 #ifdef HAVE_READLINE
1272  char *home = getenv("HOME");
1273  char history_path[4096];
1274  if (home) {
1275  snprintf(history_path, sizeof(history_path), "%s/%s", home, CK_CLI_HISTORY_FILE);
1276  read_history(history_path);
1277  }
1278 #endif
1279 
1280  while (!g_exit_requested) {
1281 #ifdef HAVE_READLINE
1282  char *line = readline("\033[1;32mYou:\033[0m ");
1283  if (!line) break;
1284  if (*line) add_history(line);
1285 #else
1286  printf("\033[1;32mYou:\033[0m ");
1287  fflush(stdout);
1288  char line_buf[4096];
1289  if (!fgets(line_buf, sizeof(line_buf), stdin)) {
1290  if (feof(stdin) || g_exit_requested) break;
1291  if (errno == EINTR) break;
1292  continue;
1293  }
1294  /* Remove trailing newline */
1295  size_t len = strlen(line_buf);
1296  if (len > 0 && line_buf[len-1] == '\n') line_buf[len-1] = '\0';
1297  char *line = line_buf;
1298 #endif
1299 
1300  if (line[0] == '\0') {
1301 #ifdef HAVE_READLINE
1302  free(line);
1303 #endif
1304  continue;
1305  }
1306 
1307  if (line[0] == '/') {
1308  process_repl_command(line, &opt, &api);
1309 #ifdef HAVE_READLINE
1310  free(line);
1311 #endif
1312  continue;
1313  }
1314 
1315  printf("\033[1;34mAssistant:\033[0m ");
1316  fflush(stdout);
1317  run_prompt(&api, tokenizer, &opt, line);
1318 
1319 #ifdef HAVE_READLINE
1320  free(line);
1321 #endif
1322  }
1323 
1324 #ifdef HAVE_READLINE
1325  if (home) {
1326  write_history(history_path);
1327  }
1328 #endif
1329  }
1330 
1331  ck_true_bpe_free(tokenizer);
1332  if (api.free_fn) api.free_fn();
1333  if (api.handle) dlclose(api.handle);
1334 
1335  printf("\nGoodbye!\n");
1336  return 0;
1337 }
static void handle_sigint(int sig)
Definition: ck_cli_v6.5.c:56
#define CK_CLI_HISTORY_FILE
Definition: ck_cli_v6.5.c:45
static bool process_repl_command(const char *line, CLIOptions *opt, ModelAPI *api)
Definition: ck_cli_v6.5.c:1131
static void print_banner(void)
Definition: ck_cli_v6.5.c:981
static bool parse_args(int argc, char **argv, CLIOptions *opt)
Definition: ck_cli_v6.5.c:1021
static int run_prompt(ModelAPI *api, CKTrueBPE *tokenizer, CLIOptions *opt, const char *input)
Definition: ck_cli_v6.5.c:792
static bool load_model_api(const char *lib_path, ModelAPI *api)
Definition: ck_cli_v6.5.c:535
static ck_capability_t ck_get_capabilities(void)
Get current platform capabilities.
Definition: ck_features.h:226
CPU capability information structure.
Definition: ck_features.h:215
const char * best_kernel
Definition: ck_features.h:220
const char * name
Definition: ck_features.h:216
void ck_true_bpe_free(CKTrueBPE *bpe)
Definition: true_bpe.c:405
CKTrueBPE * ck_true_bpe_create(void)
Definition: true_bpe.c:342
int ck_true_bpe_load_binary(CKTrueBPE *bpe, int vocab_size, const int32_t *offsets, const char *strings, int num_merges, const int32_t *merges)
Definition: true_bpe.c:606
int const int32_t const char int num_merges
Definition: true_bpe.h:188
int const int32_t const char * strings
Definition: true_bpe.h:187
int const int32_t const char int const int32_t * merges
Definition: true_bpe.h:189
int vocab_size
Definition: true_bpe.h:185
int const int32_t * offsets
Definition: true_bpe.h:186

References ck_capability_t::best_kernel, CHAT_TEMPLATE_LLAMA, CHAT_TEMPLATE_MISTRAL, CHAT_TEMPLATE_QWEN, CK_CLI_HISTORY_FILE, ck_get_capabilities(), ck_true_bpe_create(), ck_true_bpe_free(), ck_true_bpe_load_binary(), g_exit_requested, handle_sigint(), ck_capability_t::has_ai_accel, ck_capability_t::has_fma, load_model_api(), merges, ck_capability_t::name, num_merges, offsets, parse_args(), print_banner(), process_repl_command(), run_prompt(), strings, vocab_size, and ck_capability_t::width.

◆ output_append()

static void output_append ( char *  buf,
size_t *  len,
const char *  text 
)
static

Definition at line 496 of file ck_cli_v6.5.c.

496  {
497  if (!text || !*text) return;
498  size_t n = strlen(text);
499  if (*len + n >= CK_CLI_OUTPUT_BUF_SIZE) {
500  output_flush(buf, len);
501  }
502  if (n >= CK_CLI_OUTPUT_BUF_SIZE) {
503  fwrite(text, 1, n, stdout);
504  return;
505  }
506  memcpy(buf + *len, text, n);
507  *len += n;
508 }
#define CK_CLI_OUTPUT_BUF_SIZE
Definition: ck_cli_v6.5.c:43
static void output_flush(char *buf, size_t *len)
Definition: ck_cli_v6.5.c:490
const char * text
Definition: tokenizer.h:563

References CK_CLI_OUTPUT_BUF_SIZE, output_flush(), and text.

Referenced by output_token().

◆ output_flush()

static void output_flush ( char *  buf,
size_t *  len 
)
static

Definition at line 490 of file ck_cli_v6.5.c.

490  {
491  if (*len == 0) return;
492  fwrite(buf, 1, *len, stdout);
493  *len = 0;
494 }

Referenced by output_append(), and run_prompt().

◆ output_token()

static void output_token ( char *  buf,
size_t *  len,
const char *  token 
)
static

Definition at line 510 of file ck_cli_v6.5.c.

510  {
511  if (!token || !*token) return;
512 
513  /* Decode BPE byte-level encoding to actual bytes */
514  char decoded[1024];
515  int n = decode_bpe_token(token, decoded, sizeof(decoded));
516  if (n > 0) {
517  output_append(buf, len, decoded);
518  }
519 }
static void output_append(char *buf, size_t *len, const char *text)
Definition: ck_cli_v6.5.c:496
static int decode_bpe_token(const char *token, char *out, int max)
Definition: ck_cli_v6.5.c:427

References decode_bpe_token(), output_append(), and token.

Referenced by run_prompt().

◆ parse_args()

static bool parse_args ( int  argc,
char **  argv,
CLIOptions *  opt 
)
static

Definition at line 1021 of file ck_cli_v6.5.c.

1021  {
1022  if (!opt) return false;
1023  memset(opt, 0, sizeof(*opt));
1024  opt->max_tokens = CK_CLI_DEFAULT_MAX_TOKENS;
1025  opt->temperature = 0.0f; /* Greedy by default */
1026  opt->top_p = 0.9f;
1027  opt->stream = true; /* Stream by default */
1028  opt->timing = true; /* Show timing by default */
1029  /* Default EOS tokens for Qwen/ChatML */
1030  opt->eos_ids[0] = 151643; /* <|im_end|> */
1031  opt->eos_ids[1] = 151645; /* <|endoftext|> */
1032  opt->eos_ids[2] = 151644; /* <|im_sep|> */
1033  opt->eos_count = 3;
1034 
1035  for (int i = 1; i < argc; i++) {
1036  const char *arg = argv[i];
1037 
1038  if (!strcmp(arg, "--help") || !strcmp(arg, "-h")) {
1039  print_help(argv[0]);
1040  return false;
1041  } else if (!strcmp(arg, "--list")) {
1043  return false;
1044  } else if ((!strcmp(arg, "--model") || !strcmp(arg, "-m")) && i + 1 < argc) {
1045  opt->model_name = argv[++i];
1046  } else if (!strcmp(arg, "--lib") && i + 1 < argc) {
1047  opt->lib_path = argv[++i];
1048  } else if (!strcmp(arg, "--weights") && i + 1 < argc) {
1049  opt->weights_path = argv[++i];
1050  } else if ((!strcmp(arg, "--prompt") || !strcmp(arg, "-p")) && i + 1 < argc) {
1051  opt->prompt_once = argv[++i];
1052  } else if ((!strcmp(arg, "--system") || !strcmp(arg, "-S")) && i + 1 < argc) {
1053  opt->system_prompt = argv[++i];
1054  } else if ((!strcmp(arg, "--max-tokens") || !strcmp(arg, "-n")) && i + 1 < argc) {
1055  opt->max_tokens = atoi(argv[++i]);
1056  } else if ((!strcmp(arg, "--context") || !strcmp(arg, "-c")) && i + 1 < argc) {
1057  opt->context_override = atoi(argv[++i]);
1058  } else if ((!strcmp(arg, "--temperature") || !strcmp(arg, "-T")) && i + 1 < argc) {
1059  opt->temperature = (float)atof(argv[++i]);
1060  } else if (!strcmp(arg, "--top-p") && i + 1 < argc) {
1061  opt->top_p = (float)atof(argv[++i]);
1062  } else if (!strcmp(arg, "--stream") || !strcmp(arg, "-s")) {
1063  opt->stream = true;
1064  } else if (!strcmp(arg, "--no-stream")) {
1065  opt->stream = false;
1066  } else if (!strcmp(arg, "--timing") || !strcmp(arg, "-t")) {
1067  opt->timing = true;
1068  } else if (!strcmp(arg, "--no-timing")) {
1069  opt->timing = false;
1070  } else if (!strcmp(arg, "--no-chat-template")) {
1071  opt->no_chat_template = true;
1072  } else if (!strcmp(arg, "--eos") && i + 1 < argc) {
1073  parse_eos_ids(argv[++i], opt);
1074  } else if (!strcmp(arg, "--ignore-eos")) {
1075  opt->ignore_eos = true;
1076  } else if (!strcmp(arg, "--verbose") || !strcmp(arg, "-v")) {
1077  opt->verbose = true;
1078  } else if (arg[0] != '-') {
1079  if (!opt->lib_path) opt->lib_path = arg;
1080  else if (!opt->weights_path) opt->weights_path = arg;
1081  else {
1082  fprintf(stderr, "Unknown argument: %s\n", arg);
1083  return false;
1084  }
1085  } else {
1086  fprintf(stderr, "Unknown option: %s\n", arg);
1087  return false;
1088  }
1089  }
1090 
1091  /* Auto-discover model if --model specified */
1092  if (opt->model_name && (!opt->lib_path || !opt->weights_path)) {
1093  static char lib_buf[4096], weights_buf[4096];
1094  if (find_model_in_cache(opt->model_name, lib_buf, weights_buf, sizeof(lib_buf))) {
1095  opt->lib_path = lib_buf;
1096  opt->weights_path = weights_buf;
1097  } else {
1098  fprintf(stderr, "Error: model '%s' not found in cache\n", opt->model_name);
1099  fprintf(stderr, "Run with --list to see available models\n");
1100  return false;
1101  }
1102  }
1103 
1104  if (!opt->lib_path || !opt->weights_path) {
1105  print_help(argv[0]);
1106  return false;
1107  }
1108 
1109  /* Auto-detect chat template from model name/path */
1110  const char *name_for_template = opt->model_name ? opt->model_name : opt->lib_path;
1111  opt->chat_template = detect_chat_template(name_for_template);
1112 
1113  /* Load EOS tokens from vocab.json if available */
1114  if (load_eos_from_vocab_json(opt->weights_path, opt)) {
1115  if (opt->verbose) {
1116  printf("[DEBUG] Loaded %d EOS tokens: ", opt->eos_count);
1117  for (int i = 0; i < opt->eos_count; i++) {
1118  printf("%d ", opt->eos_ids[i]);
1119  }
1120  printf("\n");
1121  }
1122  }
1123 
1124  return true;
1125 }
static bool parse_eos_ids(const char *arg, CLIOptions *opt)
Definition: ck_cli_v6.5.c:773
static bool load_eos_from_vocab_json(const char *weights_path, CLIOptions *opt)
Definition: ck_cli_v6.5.c:241
static bool find_model_in_cache(const char *model_name, char *lib_out, char *weights_out, size_t out_size)
Definition: ck_cli_v6.5.c:205
static void print_help(const char *prog)
Definition: ck_cli_v6.5.c:988
static ChatTemplateType detect_chat_template(const char *model_name)
Definition: ck_cli_v6.5.c:573
static void list_available_models(void)
Definition: ck_cli_v6.5.c:297
#define CK_CLI_DEFAULT_MAX_TOKENS
Definition: ck_cli_v6.5.c:41

References CK_CLI_DEFAULT_MAX_TOKENS, detect_chat_template(), find_model_in_cache(), list_available_models(), load_eos_from_vocab_json(), parse_eos_ids(), and print_help().

Referenced by main().

◆ parse_eos_ids()

static bool parse_eos_ids ( const char *  arg,
CLIOptions *  opt 
)
static

Definition at line 773 of file ck_cli_v6.5.c.

773  {
774  if (!arg || !opt) return false;
775  opt->eos_count = 0;
776  const char *p = arg;
777  while (*p && opt->eos_count < CK_CLI_EOS_MAX) {
778  char *end = NULL;
779  long v = strtol(p, &end, 10);
780  if (end == p) break;
781  opt->eos_ids[opt->eos_count++] = (int)v;
782  p = end;
783  if (*p == ',') p++;
784  }
785  return opt->eos_count > 0;
786 }
#define CK_CLI_EOS_MAX
Definition: ck_cli_v6.5.c:42
uint32_t end
Definition: utf8.c:215

References CK_CLI_EOS_MAX, and end.

Referenced by parse_args().

◆ print_banner()

static void print_banner ( void  )
static

Definition at line 981 of file ck_cli_v6.5.c.

981  {
982  printf("\n");
983  printf(" \033[1;36mC-Kernel-Engine v%s\033[0m\n", CK_CLI_VERSION);
984  printf(" Native inference CLI with true-BPE tokenization\n");
985  printf("\n");
986 }
#define CK_CLI_VERSION
Definition: ck_cli_v6.5.c:40

References CK_CLI_VERSION.

Referenced by main(), and print_help().

◆ print_help()

static void print_help ( const char *  prog)
static

Definition at line 988 of file ck_cli_v6.5.c.

988  {
989  print_banner();
990  fprintf(stderr, "Usage:\n");
991  fprintf(stderr, " %s --model <name> Auto-discover model from cache\n", prog);
992  fprintf(stderr, " %s <libmodel.so> <weights.bump> Direct paths\n", prog);
993  fprintf(stderr, " %s --lib <.so> --weights <.bump> Named arguments\n", prog);
994  fprintf(stderr, "\nOptions:\n");
995  fprintf(stderr, " --model, -m NAME Model name (searches in cache)\n");
996  fprintf(stderr, " --lib PATH Path to compiled model .so\n");
997  fprintf(stderr, " --weights PATH Path to weights .bump file\n");
998  fprintf(stderr, " --prompt, -p TEXT Run single prompt (non-interactive)\n");
999  fprintf(stderr, " --system, -S TEXT System prompt\n");
1000  fprintf(stderr, " --max-tokens, -n N Max tokens to generate (default: %d)\n", CK_CLI_DEFAULT_MAX_TOKENS);
1001  fprintf(stderr, " --context, -c N Override context/KV cache size\n");
1002  fprintf(stderr, " --temperature, -T F Sampling temperature (default: 0.0 = greedy)\n");
1003  fprintf(stderr, " --top-p F Nucleus sampling top-p (default: 0.9)\n");
1004  fprintf(stderr, " --stream, -s Stream tokens as generated\n");
1005  fprintf(stderr, " --timing, -t Show timing breakdown\n");
1006  fprintf(stderr, " --no-chat-template Disable chat template formatting\n");
1007  fprintf(stderr, " --eos IDS Comma-separated EOS token IDs\n");
1008  fprintf(stderr, " --ignore-eos Do not stop on EOS tokens\n");
1009  fprintf(stderr, " --list List available models\n");
1010  fprintf(stderr, " --verbose, -v Verbose output\n");
1011  fprintf(stderr, " --help, -h Show this help\n");
1012  fprintf(stderr, "\nREPL Commands:\n");
1013  fprintf(stderr, " /exit, /quit Exit the REPL\n");
1014  fprintf(stderr, " /reset Reset KV cache\n");
1015  fprintf(stderr, " /timing Toggle timing display\n");
1016  fprintf(stderr, " /temp <value> Set temperature\n");
1017  fprintf(stderr, " /system <text> Set system prompt\n");
1018  fprintf(stderr, " /help Show help\n");
1019 }

References CK_CLI_DEFAULT_MAX_TOKENS, and print_banner().

Referenced by parse_args().

◆ process_repl_command()

static bool process_repl_command ( const char *  line,
CLIOptions *  opt,
ModelAPI *  api 
)
static

Definition at line 1131 of file ck_cli_v6.5.c.

1131  {
1132  if (!line || line[0] != '/') return false;
1133 
1134  if (!strncmp(line, "/exit", 5) || !strncmp(line, "/quit", 5)) {
1135  g_exit_requested = 1;
1136  return true;
1137  }
1138  if (!strncmp(line, "/help", 5)) {
1139  printf("REPL Commands:\n");
1140  printf(" /exit, /quit Exit\n");
1141  printf(" /reset Reset KV cache\n");
1142  printf(" /timing Toggle timing display\n");
1143  printf(" /temp <value> Set temperature (0 = greedy)\n");
1144  printf(" /top-p <value> Set top-p\n");
1145  printf(" /system <text> Set system prompt\n");
1146  printf(" /clear Clear system prompt\n");
1147  printf(" /verbose Toggle verbose mode\n");
1148  return true;
1149  }
1150  if (!strncmp(line, "/reset", 6)) {
1151  if (api->kv_reset) {
1152  api->kv_reset();
1153  printf("[KV cache reset]\n");
1154  }
1155  return true;
1156  }
1157  if (!strncmp(line, "/timing", 7)) {
1158  opt->timing = !opt->timing;
1159  printf("[Timing %s]\n", opt->timing ? "enabled" : "disabled");
1160  return true;
1161  }
1162  if (!strncmp(line, "/verbose", 8)) {
1163  opt->verbose = !opt->verbose;
1164  printf("[Verbose %s]\n", opt->verbose ? "enabled" : "disabled");
1165  return true;
1166  }
1167  if (!strncmp(line, "/temp ", 6)) {
1168  opt->temperature = (float)atof(line + 6);
1169  printf("[Temperature set to %.2f]\n", opt->temperature);
1170  return true;
1171  }
1172  if (!strncmp(line, "/top-p ", 7)) {
1173  opt->top_p = (float)atof(line + 7);
1174  printf("[Top-p set to %.2f]\n", opt->top_p);
1175  return true;
1176  }
1177  if (!strncmp(line, "/system ", 8)) {
1178  opt->system_prompt = strdup(line + 8);
1179  printf("[System prompt set]\n");
1180  return true;
1181  }
1182  if (!strncmp(line, "/clear", 6)) {
1183  opt->system_prompt = NULL;
1184  printf("[System prompt cleared]\n");
1185  return true;
1186  }
1187 
1188  printf("Unknown command: %s\n", line);
1189  return true;
1190 }

References g_exit_requested.

Referenced by main().

◆ resolve_symbol()

static bool resolve_symbol ( void *  handle,
const char *  name,
void **  out_ptr,
bool  required 
)
static

Definition at line 525 of file ck_cli_v6.5.c.

525  {
526  void *sym = dlsym(handle, name);
527  if (!sym && required) {
528  fprintf(stderr, "Error: missing symbol %s\n", name);
529  return false;
530  }
531  if (out_ptr) *out_ptr = sym;
532  return true;
533 }

Referenced by load_model_api().

◆ run_prompt()

static int run_prompt ( ModelAPI *  api,
CKTrueBPE *  tokenizer,
CLIOptions *  opt,
const char *  input 
)
static

Definition at line 792 of file ck_cli_v6.5.c.

792  {
793  if (!api || !tokenizer || !opt || !input) return -1;
794  if (g_exit_requested) return -1;
795 
796  int ctx = opt->context_override;
797  if (ctx <= 0 && api->get_context) ctx = api->get_context();
798  if (ctx <= 0) ctx = 4096;
799  if (ctx > CK_CLI_MAX_CONTEXT) ctx = CK_CLI_MAX_CONTEXT;
800 
801  int max_tokens = opt->max_tokens > 0 ? opt->max_tokens : CK_CLI_DEFAULT_MAX_TOKENS;
802 
803  /* Apply chat template if enabled */
804  const ChatTemplate *tmpl = &g_templates[opt->no_chat_template ? CHAT_TEMPLATE_NONE : opt->chat_template];
805  char *formatted = apply_chat_template(tmpl, opt->system_prompt, input);
806  if (!formatted) {
807  fprintf(stderr, "Error: failed to format prompt\n");
808  return -1;
809  }
810 
811  if (opt->verbose) {
812  printf("[DEBUG] Formatted prompt:\n%s\n", formatted);
813  }
814 
815  int32_t *ids = (int32_t *)malloc((size_t)ctx * sizeof(int32_t));
816  if (!ids) {
817  fprintf(stderr, "Error: failed to allocate token buffer\n");
818  free(formatted);
819  return -1;
820  }
821 
822  int n = ck_true_bpe_encode(tokenizer, formatted, -1, ids, ctx);
823  free(formatted);
824 
825  if (n <= 0) {
826  fprintf(stderr, "[Tokenizer] failed to encode prompt\n");
827  free(ids);
828  return -1;
829  }
830  if (n > ctx - max_tokens) {
831  n = ctx - max_tokens;
832  if (opt->verbose) {
833  printf("[DEBUG] Truncated prompt to %d tokens\n", n);
834  }
835  }
836 
837  g_prefill_time_ms = 0.0;
838  g_decode_time_ms = 0.0;
839  g_decode_count = 0;
840  g_prompt_tokens = n;
841 
842  if (api->kv_reset) api->kv_reset();
843 
844  if (api->embed(ids, n) != 0) {
845  fprintf(stderr, "[Model] embed failed\n");
846  free(ids);
847  return -1;
848  }
849 
850  struct timespec t0, t1;
851  clock_gettime(CLOCK_MONOTONIC, &t0);
852  if (api->forward(NULL) != 0) {
853  fprintf(stderr, "[Model] forward failed\n");
854  free(ids);
855  return -1;
856  }
857  clock_gettime(CLOCK_MONOTONIC, &t1);
858  g_prefill_time_ms = (t1.tv_sec - t0.tv_sec) * 1000.0 +
859  (t1.tv_nsec - t0.tv_nsec) / 1000000.0;
860 
861  /* Get vocab size for sampling */
862  int vocab_size = api->get_vocab_size ? api->get_vocab_size() : 0;
863 
864  /* Sample first token */
865  int next_token;
866  if (opt->temperature > 0.0f && api->get_logits && vocab_size > 0) {
867  float *logits = api->get_logits();
868  if (logits) {
869  /* Get logits for last position */
870  int active = api->get_active_tokens ? api->get_active_tokens() : 1;
871  float *last_logits = logits + (size_t)(active - 1) * vocab_size;
872  /* Make a copy since sampling modifies in place */
873  float *logits_copy = (float *)malloc(vocab_size * sizeof(float));
874  memcpy(logits_copy, last_logits, vocab_size * sizeof(float));
875  next_token = sample_top_p(logits_copy, vocab_size, opt->temperature, opt->top_p);
876  free(logits_copy);
877  } else {
878  next_token = api->sample();
879  }
880  } else {
881  next_token = api->sample();
882  }
883 
884  char out_buf[CK_CLI_OUTPUT_BUF_SIZE];
885  size_t out_len = 0;
886 
887  /* Initialize EOS pattern detection for this prompt */
888  eos_pattern_init(opt->chat_template);
889 
891 
892  for (int generated = 0; generated < max_tokens && !g_exit_requested && g_generation_active; generated++) {
893  if (next_token < 0) break;
894 
895  if (opt->verbose) {
896  const char *tok_str = ck_true_bpe_id_to_token(tokenizer, next_token);
897  fprintf(stderr, "[DEBUG] Token %d: %d (%s)\n", generated, next_token, tok_str ? tok_str : "NULL");
898  }
899 
900  if (is_eos_token(opt, next_token)) {
901  if (opt->verbose) {
902  fprintf(stderr, "[DEBUG] EOS detected (token ID), stopping\n");
903  }
904  break;
905  }
906 
907  const char *word = ck_true_bpe_id_to_token(tokenizer, next_token);
908 
909  /* Process token through EOS pattern detection (buffers potential EOS tokens) */
910  if (!opt->ignore_eos &&
911  eos_pattern_process(word, out_buf, &out_len, output_token, opt->chat_template)) {
912  if (opt->verbose) {
913  fprintf(stderr, "[DEBUG] EOS detected (text pattern), stopping\n");
914  }
915  break;
916  }
917 
918  if (opt->stream) {
919  output_flush(out_buf, &out_len);
920  fflush(stdout);
921  } else if (out_len > (CK_CLI_OUTPUT_BUF_SIZE / 2)) {
922  output_flush(out_buf, &out_len);
923  fflush(stdout);
924  }
925 
926  if (generated + 1 >= max_tokens) break;
927 
928  clock_gettime(CLOCK_MONOTONIC, &t0);
929  if (api->decode(next_token, NULL) != 0) {
930  fprintf(stderr, "\n[Model] decode failed\n");
931  break;
932  }
933  clock_gettime(CLOCK_MONOTONIC, &t1);
934  g_decode_time_ms += (t1.tv_sec - t0.tv_sec) * 1000.0 +
935  (t1.tv_nsec - t0.tv_nsec) / 1000000.0;
936  g_decode_count++;
937 
938  /* Sample next token */
939  if (opt->temperature > 0.0f && api->get_logits && vocab_size > 0) {
940  float *logits = api->get_logits();
941  if (logits) {
942  int active = api->get_active_tokens ? api->get_active_tokens() : 1;
943  float *last_logits = logits + (size_t)(active - 1) * vocab_size;
944  float *logits_copy = (float *)malloc(vocab_size * sizeof(float));
945  memcpy(logits_copy, last_logits, vocab_size * sizeof(float));
946  next_token = sample_top_p(logits_copy, vocab_size, opt->temperature, opt->top_p);
947  free(logits_copy);
948  } else {
949  next_token = api->sample();
950  }
951  } else {
952  next_token = api->sample();
953  }
954  }
955 
957  output_flush(out_buf, &out_len);
958  printf("\n");
959 
960  if (opt->timing) {
961  double total_ms = g_prefill_time_ms + g_decode_time_ms;
962  double prefill_rate = g_prompt_tokens / (g_prefill_time_ms / 1000.0);
963  double decode_rate = g_decode_count > 0 ? g_decode_count / (g_decode_time_ms / 1000.0) : 0.0;
964  double avg_decode = g_decode_count > 0 ? g_decode_time_ms / g_decode_count : 0.0;
965 
966  printf("\033[90m"); /* Gray text */
967  printf("prompt: %3d tok / %7.1f ms (%5.1f tok/s) | ", g_prompt_tokens, g_prefill_time_ms, prefill_rate);
968  printf("decode: %3d tok / %7.1f ms (%5.1f tok/s, %5.1f ms/tok)\033[0m\n",
969  g_decode_count, g_decode_time_ms, decode_rate, avg_decode);
970  }
971  fflush(stdout);
972 
973  free(ids);
974  return 0;
975 }
static double g_decode_time_ms
Definition: ck_cli_v6.5.c:52
static char * apply_chat_template(const ChatTemplate *tmpl, const char *system, const char *user)
Definition: ck_cli_v6.5.c:588
static int sample_top_p(float *logits, int vocab_size, float temperature, float top_p)
Definition: ck_cli_v6.5.c:334
static double g_prefill_time_ms
Definition: ck_cli_v6.5.c:51
static int g_decode_count
Definition: ck_cli_v6.5.c:53
static bool is_eos_token(const CLIOptions *opt, int token)
Definition: ck_cli_v6.5.c:618
static void eos_pattern_init(ChatTemplateType tmpl)
Definition: ck_cli_v6.5.c:661
#define CK_CLI_MAX_CONTEXT
Definition: ck_cli_v6.5.c:44
static const ChatTemplate g_templates[]
Definition: ck_cli_v6.5.c:124
static void output_token(char *buf, size_t *len, const char *token)
Definition: ck_cli_v6.5.c:510
static bool eos_pattern_process(const char *token_text, char *out_buf, size_t *out_len, void(*output_fn)(char *, size_t *, const char *), ChatTemplateType tmpl)
Definition: ck_cli_v6.5.c:725
static int g_prompt_tokens
Definition: ck_cli_v6.5.c:54
const int32_t * ids
Definition: tokenizer.h:443
int ck_true_bpe_encode(CKTrueBPE *bpe, const char *text, int text_len, int32_t *ids, int max_ids)
Definition: true_bpe.c:1338
const char * ck_true_bpe_id_to_token(const CKTrueBPE *bpe, int32_t id)
Definition: true_bpe.c:645

References apply_chat_template(), CHAT_TEMPLATE_NONE, CK_CLI_DEFAULT_MAX_TOKENS, CK_CLI_MAX_CONTEXT, CK_CLI_OUTPUT_BUF_SIZE, ck_true_bpe_encode(), ck_true_bpe_id_to_token(), eos_pattern_init(), eos_pattern_process(), g_decode_count, g_decode_time_ms, g_exit_requested, g_generation_active, g_prefill_time_ms, g_prompt_tokens, g_templates, ids, is_eos_token(), out_len, output_flush(), output_token(), sample_top_p(), and vocab_size.

Referenced by main().

◆ sample_top_p()

static int sample_top_p ( float *  logits,
int  vocab_size,
float  temperature,
float  top_p 
)
static

Definition at line 334 of file ck_cli_v6.5.c.

334  {
335  if (temperature <= 0.0f || top_p <= 0.0f) {
336  /* Argmax */
337  int best = 0;
338  float best_val = logits[0];
339  for (int i = 1; i < vocab_size; i++) {
340  if (logits[i] > best_val) {
341  best_val = logits[i];
342  best = i;
343  }
344  }
345  return best;
346  }
347 
348  /* Apply temperature */
349  float max_logit = logits[0];
350  for (int i = 1; i < vocab_size; i++) {
351  if (logits[i] > max_logit) max_logit = logits[i];
352  }
353 
354  float sum = 0.0f;
355  for (int i = 0; i < vocab_size; i++) {
356  logits[i] = expf((logits[i] - max_logit) / temperature);
357  sum += logits[i];
358  }
359 
360  /* Normalize to probabilities */
361  for (int i = 0; i < vocab_size; i++) {
362  logits[i] /= sum;
363  }
364 
365  /* Sort indices by probability (simple selection for top-p) */
366  /* For efficiency, we'll do nucleus sampling with cumulative sum */
367  float cumsum = 0.0f;
368  float threshold = (float)rand() / (float)RAND_MAX * top_p;
369 
370  /* Find nucleus tokens and sample */
371  int *indices = (int *)malloc(vocab_size * sizeof(int));
372  float *probs = (float *)malloc(vocab_size * sizeof(float));
373  for (int i = 0; i < vocab_size; i++) {
374  indices[i] = i;
375  probs[i] = logits[i];
376  }
377 
378  /* Simple sort (for small vocab, bubble sort is fine; for large, use qsort) */
379  for (int i = 0; i < vocab_size - 1; i++) {
380  for (int j = i + 1; j < vocab_size; j++) {
381  if (probs[j] > probs[i]) {
382  float tmp_p = probs[i]; probs[i] = probs[j]; probs[j] = tmp_p;
383  int tmp_i = indices[i]; indices[i] = indices[j]; indices[j] = tmp_i;
384  }
385  }
386  cumsum += probs[i];
387  if (cumsum >= top_p) break;
388  }
389 
390  /* Sample from nucleus */
391  float r = (float)rand() / (float)RAND_MAX * cumsum;
392  float acc = 0.0f;
393  int result = indices[0];
394  for (int i = 0; cumsum > 0 && i < vocab_size; i++) {
395  acc += probs[i];
396  if (acc >= r) {
397  result = indices[i];
398  break;
399  }
400  if (acc >= cumsum) break;
401  }
402 
403  free(indices);
404  free(probs);
405  return result;
406 }

References vocab_size.

Referenced by run_prompt().

Variable Documentation

◆ g_decode_count

int g_decode_count = 0
static

Definition at line 53 of file ck_cli_v6.5.c.

Referenced by run_prompt().

◆ g_decode_time_ms

double g_decode_time_ms = 0.0
static

Definition at line 52 of file ck_cli_v6.5.c.

Referenced by run_prompt().

◆ g_eos_state

EOSPatternState g_eos_state = {0}
static

◆ g_exit_requested

volatile sig_atomic_t g_exit_requested = 0
static

Definition at line 47 of file ck_cli_v6.5.c.

Referenced by handle_sigint(), main(), process_repl_command(), and run_prompt().

◆ g_generation_active

volatile sig_atomic_t g_generation_active = 0
static

Definition at line 48 of file ck_cli_v6.5.c.

Referenced by handle_sigint(), and run_prompt().

◆ g_prefill_time_ms

double g_prefill_time_ms = 0.0
static

Definition at line 51 of file ck_cli_v6.5.c.

Referenced by run_prompt().

◆ g_prompt_tokens

int g_prompt_tokens = 0
static

Definition at line 54 of file ck_cli_v6.5.c.

Referenced by run_prompt().

◆ g_templates

const ChatTemplate g_templates[]
static

Definition at line 124 of file ck_cli_v6.5.c.

Referenced by run_prompt().