← Back to C-Kernel-Engine Docs Doxygen Source Documentation
ck_cli_v5.c File Reference
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <time.h>
#include <dlfcn.h>
#include <pthread.h>
#include <unistd.h>
#include "tokenizer/tokenizer.h"

Go to the source code of this file.

Typedefs

typedef int(* decode_t) (int32_t token, float *logits_out)
 
typedef int(* embed_t) (const int32_t *tokens, int num_tokens)
 
typedef int(* forward_t) (float *logits_out)
 
typedef int(* get_int_t) (void)
 
typedef void *(* get_ptr_t) (void)
 
typedef int(* init_t) (const char *weights_path)
 
typedef int(* kv_enable_t) (int capacity)
 
typedef int(* sample_argmax_t) (void)
 
typedef void(* void_func_t) (void)
 

Functions

void * engine_thread_func (void *arg)
 
int main (int argc, char **argv)
 

Typedef Documentation

◆ decode_t

typedef int(* decode_t) (int32_t token, float *logits_out)

Definition at line 17 of file ck_cli_v5.c.

◆ embed_t

typedef int(* embed_t) (const int32_t *tokens, int num_tokens)

Definition at line 14 of file ck_cli_v5.c.

◆ forward_t

typedef int(* forward_t) (float *logits_out)

Definition at line 15 of file ck_cli_v5.c.

◆ get_int_t

typedef int(* get_int_t) (void)

Definition at line 19 of file ck_cli_v5.c.

◆ get_ptr_t

typedef void*(* get_ptr_t) (void)

Definition at line 20 of file ck_cli_v5.c.

◆ init_t

typedef int(* init_t) (const char *weights_path)

Definition at line 13 of file ck_cli_v5.c.

◆ kv_enable_t

typedef int(* kv_enable_t) (int capacity)

Definition at line 16 of file ck_cli_v5.c.

◆ sample_argmax_t

typedef int(* sample_argmax_t) (void)

Definition at line 18 of file ck_cli_v5.c.

◆ void_func_t

typedef void(* void_func_t) (void)

Definition at line 21 of file ck_cli_v5.c.

Function Documentation

◆ engine_thread_func()

void* engine_thread_func ( void *  arg)

Definition at line 53 of file ck_cli_v5.c.

53  {
54  SharedState *s = (SharedState *)arg;
55 
56  printf("[Engine] Thread started. Initializing model...\n");
57  if (s->init(s->weights_path) != 0) {
58  fprintf(stderr, "[Engine] Failed to init model\n");
59  return NULL;
60  }
61  s->kv_enable(s->get_context());
62 
63  while (1) {
64  pthread_mutex_lock(&s->mutex);
65  while (!s->task_ready && !s->quit) {
66  pthread_cond_wait(&s->cond_task, &s->mutex);
67  }
68  if (s->quit) {
69  pthread_mutex_unlock(&s->mutex);
70  break;
71  }
72 
73  // Start Task
74  int n_prompt = s->n_prompt;
75  int32_t prompt[1024];
76  memcpy(prompt, s->prompt_tokens, n_prompt * sizeof(int32_t));
77  int max_gen = s->max_gen;
78  s->task_ready = false;
79  pthread_mutex_unlock(&s->mutex);
80 
81  // 1. Prefill
82  s->embed(prompt, n_prompt);
83  s->forward(NULL);
84  int32_t next_token = s->sample();
85 
86  // 2. Feedback first token
87  pthread_mutex_lock(&s->mutex);
88  s->last_token = next_token;
89  s->token_ready = true;
90  pthread_cond_signal(&s->cond_done);
91  pthread_mutex_unlock(&s->mutex);
92 
93  // 3. Decode Loop
94  for (int i = 0; i < max_gen; i++) {
95  if (s->decode(next_token, NULL) != 0) break;
96  next_token = s->sample();
97 
98  pthread_mutex_lock(&s->mutex);
99  s->last_token = next_token;
100  s->token_ready = true;
101  pthread_cond_signal(&s->cond_done);
102  pthread_mutex_unlock(&s->mutex);
103 
104  if (next_token == 151643 || next_token == 151645) break;
105  }
106  }
107  return NULL;
108 }

Referenced by main().

◆ main()

int main ( int  argc,
char **  argv 
)

Definition at line 110 of file ck_cli_v5.c.

110  {
111  if (argc < 3) {
112  printf("Usage: %s <libmodel.so> <weights.bump>\n", argv[0]);
113  return 1;
114  }
115 
116  SharedState state = {0};
117  pthread_mutex_init(&state.mutex, NULL);
118  pthread_cond_init(&state.cond_task, NULL);
119  pthread_cond_init(&state.cond_done, NULL);
120  state.weights_path = argv[2];
121 
122  void *handle = dlopen(argv[1], RTLD_NOW);
123  if (!handle) { fprintf(stderr, "%s\n", dlerror()); return 1; }
124 
125  state.init = dlsym(handle, "ck_model_init");
126  state.embed = dlsym(handle, "ck_model_embed_tokens");
127  state.forward = dlsym(handle, "ck_model_forward");
128  state.kv_enable = dlsym(handle, "ck_model_kv_cache_enable");
129  state.decode = dlsym(handle, "ck_model_decode");
130  state.sample = dlsym(handle, "ck_model_sample_argmax");
131  state.get_context = dlsym(handle, "ck_model_get_context_window");
132  get_ptr_t get_offsets = dlsym(handle, "ck_model_get_vocab_offsets");
133  get_ptr_t get_strings = dlsym(handle, "ck_model_get_vocab_strings");
134  get_int_t get_vocab_size = dlsym(handle, "ck_model_get_vocab_size");
135  get_int_t get_num_merges = dlsym(handle, "ck_model_get_num_merges");
136 
137  // Start Engine
138  pthread_t engine_thread;
139  pthread_create(&engine_thread, NULL, engine_thread_func, &state);
140 
141  // Wait for engine to init (simple wait for this demo)
142  sleep(1);
143 
144  // Tokenizer setup
146  ck_tokenizer_load_binary(tokenizer, get_vocab_size(), get_offsets(), get_strings(), get_num_merges(), NULL);
147 
148  char input[1024];
149  while (1) {
150  printf("\nYou: ");
151  if (!fgets(input, sizeof(input), stdin)) break;
152  if (strncmp(input, "/exit", 5) == 0) break;
153 
154  // Tokenize
155  int32_t ids[1024];
156  int n = ck_tokenizer_encode(tokenizer, input, -1, ids, 1024);
157 
158  // Submit Task
159  pthread_mutex_lock(&state.mutex);
160  memcpy(state.prompt_tokens, ids, n * sizeof(int32_t));
161  state.n_prompt = n;
162  state.max_gen = 100;
163  state.task_ready = true;
164  state.token_ready = false;
165  pthread_cond_signal(&state.cond_task);
166  pthread_mutex_unlock(&state.mutex);
167 
168  printf("Assistant: ");
169  fflush(stdout);
170 
171  // UI Detokenizer Loop
172  while (1) {
173  pthread_mutex_lock(&state.mutex);
174  while (!state.token_ready && !state.quit) {
175  pthread_cond_wait(&state.cond_done, &state.mutex);
176  }
177  int32_t tok = state.last_token;
178  state.token_ready = false;
179  pthread_mutex_unlock(&state.mutex);
180 
181  if (tok == 151643 || tok == 151645) break;
182 
183  const char *word = ck_tokenizer_id_to_token(tokenizer, tok);
184  if (word) {
185  if ((unsigned char)word[0] == 0xC4 && (unsigned char)word[1] == 0xA0) {
186  printf(" %s", word + 2);
187  } else {
188  printf("%s", word);
189  }
190  fflush(stdout);
191  }
192  }
193  printf("\n");
194  }
195 
196  pthread_mutex_lock(&state.mutex);
197  state.quit = true;
198  pthread_cond_signal(&state.cond_task);
199  pthread_mutex_unlock(&state.mutex);
200  pthread_join(engine_thread, NULL);
201 
202  return 0;
203 }
void *(* get_ptr_t)(void)
Definition: ck_cli_v5.c:20
int(* get_int_t)(void)
Definition: ck_cli_v5.c:19
void * engine_thread_func(void *arg)
Definition: ck_cli_v5.c:53
const char * ck_tokenizer_id_to_token(const CKTokenizer *tok, int32_t id)
Definition: ck_tokenizer.c:239
int ck_tokenizer_encode(const CKTokenizer *tok, const char *text, int text_len, int32_t *ids, int max_ids)
Definition: ck_tokenizer.c:638
CKTokenizer * ck_tokenizer_create(CKTokenizerType type)
Definition: tokenizer.c:34
const int32_t * ids
Definition: tokenizer.h:443
@ CK_TOKENIZER_BPE
Definition: tokenizer.h:54
int ck_tokenizer_load_binary(CKTokenizer *tok, int vocab_size, const int32_t *offsets, const char *strings, int num_merges, const int32_t *merges)

References CK_TOKENIZER_BPE, ck_tokenizer_create(), ck_tokenizer_encode(), ck_tokenizer_id_to_token(), ck_tokenizer_load_binary(), engine_thread_func(), and ids.