← Back to C-Kernel-Engine Docs Doxygen Source Documentation
CKTokenizer Struct Reference

#include <ck_tokenizer.h>

Data Fields

bool add_bos
 
bool add_eos
 
int32_t bos_id
 
int32_t * byte_token_id
 
CKTokenizerConfig config
 
char * encode_buffer
 
size_t encode_buffer_size
 
int32_t eos_id
 
char ** id_to_token
 
int32_t mask_id
 
int * merge_hash
 
int merge_hash_size
 
int32_t * merge_pairs
 
size_t merge_pairs_size
 
int32_t * merge_result
 
size_t merge_result_size
 
CKMergeRulemerges
 
int num_merges
 
int32_t num_merges
 
int32_t pad_id
 
CKMemPool pool
 
CKTokenizerMemPool pool
 
float * scores
 
size_t scores_size
 
uint8_t * types
 
size_t types_size
 
int32_t unk_id
 
CKTokenizerHashTablevocab
 
size_t vocab_capacity
 
CKVocabEntry ** vocab_hash
 
int vocab_hash_size
 
int vocab_size
 
size_t vocab_size
 
CKTrievocab_trie
 

Detailed Description

Definition at line 76 of file ck_tokenizer.h.

Field Documentation

◆ add_bos

bool CKTokenizer::add_bos

Definition at line 103 of file ck_tokenizer.h.

Referenced by ck_tokenizer_encode().

◆ add_eos

bool CKTokenizer::add_eos

Definition at line 104 of file ck_tokenizer.h.

Referenced by ck_tokenizer_encode().

◆ bos_id

◆ byte_token_id

int32_t* CKTokenizer::byte_token_id

◆ config

◆ encode_buffer

char* CKTokenizer::encode_buffer

Definition at line 137 of file tokenizer.h.

◆ encode_buffer_size

size_t CKTokenizer::encode_buffer_size

Definition at line 138 of file tokenizer.h.

◆ eos_id

◆ id_to_token

◆ mask_id

int32_t CKTokenizer::mask_id

Definition at line 124 of file tokenizer.h.

Referenced by ck_tokenizer_create(), and ck_tokenizer_set_special_ids().

◆ merge_hash

int* CKTokenizer::merge_hash

◆ merge_hash_size

int CKTokenizer::merge_hash_size

◆ merge_pairs

int32_t* CKTokenizer::merge_pairs

Definition at line 130 of file tokenizer.h.

◆ merge_pairs_size

size_t CKTokenizer::merge_pairs_size

Definition at line 131 of file tokenizer.h.

◆ merge_result

int32_t* CKTokenizer::merge_result

Definition at line 132 of file tokenizer.h.

◆ merge_result_size

size_t CKTokenizer::merge_result_size

Definition at line 133 of file tokenizer.h.

◆ merges

CKMergeRule* CKTokenizer::merges

◆ num_merges [1/2]

int CKTokenizer::num_merges

Definition at line 90 of file ck_tokenizer.h.

Referenced by ck_tokenizer_add_merge(), ck_tokenizer_encode(), and ck_tokenizer_load().

◆ num_merges [2/2]

int32_t CKTokenizer::num_merges

Definition at line 134 of file tokenizer.h.

◆ pad_id

◆ pool [1/2]

◆ pool [2/2]

CKTokenizerMemPool CKTokenizer::pool

Definition at line 127 of file tokenizer.h.

◆ scores

◆ scores_size

size_t CKTokenizer::scores_size

◆ types

◆ types_size

size_t CKTokenizer::types_size

◆ unk_id

◆ vocab

◆ vocab_capacity

size_t CKTokenizer::vocab_capacity

Definition at line 108 of file tokenizer.h.

Referenced by ck_tokenizer_add_token(), and ck_tokenizer_create().

◆ vocab_hash

◆ vocab_hash_size

int CKTokenizer::vocab_hash_size

◆ vocab_size [1/2]

◆ vocab_size [2/2]

size_t CKTokenizer::vocab_size

Definition at line 107 of file tokenizer.h.

◆ vocab_trie


The documentation for this struct was generated from the following files: