10 int main(
int argc,
char **argv) {
11 printf(
"=== C-Kernel-Engine Tokenizer Test ===\n\n");
16 fprintf(stderr,
"Failed to create tokenizer\n");
20 printf(
"Tokenizer type: %s\n\n", ck_tokenizer_type_name(tok));
23 printf(
"Adding test vocabulary...\n");
49 const char *test_strings[] = {
53 "hello world testing tokenizer",
57 printf(
"=== Encoding Tests ===\n\n");
59 for (
int i = 0; test_strings[i] != NULL; i++) {
60 const char *
text = test_strings[i];
70 printf(
"Input: \"%s\"\n",
text);
71 printf(
"Tokens [%d]: ",
num_ids);
73 for (
int j = 0; j <
num_ids; j++) {
77 printf(
"(%s)",
token);
79 if (j <
num_ids - 1) printf(
", ");
86 printf(
"Decoded: \"%s\"\n\n", decoded);
90 printf(
"=== Lookup Tests ===\n\n");
98 printf(
"\n=== Test Complete ===\n");
int32_t ck_tokenizer_lookup(const CKTokenizer *tok, const char *token, int len)
int ck_tokenizer_decode(const CKTokenizer *tok, const int32_t *ids, int num_ids, char *text, int max_len)
const char * ck_tokenizer_id_to_token(const CKTokenizer *tok, int32_t id)
int ck_tokenizer_encode(const CKTokenizer *tok, const char *text, int text_len, int32_t *ids, int max_ids)
int32_t ck_tokenizer_add_token(CKTokenizer *tok, const char *token, int len)
void ck_tokenizer_free(CKTokenizer *tok)
static int ck_tokenizer_vocab_size(const CKTokenizer *tok)
int main(int argc, char **argv)
static CKTokenizer * ck_tokenizer_create_bpe(void)
const int32_t int num_ids
int ck_tokenizer_add_special_token(CKTokenizer *tok, const char *name, int32_t id)
const char int int32_t int max_ids