#include <tokenizer.h>
Data Fields | |
| bool | add_bos |
| bool | add_eos |
| bool | add_space_prefix |
| bool | lowercase |
| bool | space_prefix_detected |
| CKSpacePrefixStyle | space_prefix_style |
| CKSpmMode | spm_mode |
| bool | treat_whitespace_as_suffix |
| CKTokenizerType | type |
| float | unk_score |
| bool | use_trie |
Definition at line 73 of file tokenizer.h.
| bool CKTokenizerConfig::add_bos |
Definition at line 75 of file tokenizer.h.
Referenced by ck_tokenizer_create(), ck_tokenizer_encode(), ck_tokenizer_set_add_bos_eos(), and main().
| bool CKTokenizerConfig::add_eos |
Definition at line 76 of file tokenizer.h.
Referenced by ck_tokenizer_create(), ck_tokenizer_encode(), ck_tokenizer_set_add_bos_eos(), and main().
| bool CKTokenizerConfig::add_space_prefix |
Definition at line 77 of file tokenizer.h.
Referenced by ck_tokenizer_create(), ck_tokenizer_encode_spm_impl(), ck_tokenizer_encode_spm_llama_impl(), and ck_tokenizer_set_add_space_prefix().
| bool CKTokenizerConfig::lowercase |
Definition at line 78 of file tokenizer.h.
| bool CKTokenizerConfig::space_prefix_detected |
Definition at line 83 of file tokenizer.h.
Referenced by ck_tokenizer_detect_space_prefix_style(), and ck_tokenizer_set_space_prefix_style().
| CKSpacePrefixStyle CKTokenizerConfig::space_prefix_style |
Definition at line 82 of file tokenizer.h.
Referenced by ck_tokenizer_detect_space_prefix_style(), and ck_tokenizer_set_space_prefix_style().
| CKSpmMode CKTokenizerConfig::spm_mode |
Definition at line 84 of file tokenizer.h.
Referenced by ck_tokenizer_create(), ck_tokenizer_encode(), and ck_tokenizer_set_spm_mode().
| bool CKTokenizerConfig::treat_whitespace_as_suffix |
Definition at line 79 of file tokenizer.h.
| CKTokenizerType CKTokenizerConfig::type |
Definition at line 74 of file tokenizer.h.
Referenced by ck_tokenizer_create(), and ck_tokenizer_encode().
| float CKTokenizerConfig::unk_score |
Definition at line 80 of file tokenizer.h.
Referenced by ck_tokenizer_create().
| bool CKTokenizerConfig::use_trie |
Definition at line 81 of file tokenizer.h.
Referenced by ck_tokenizer_set_use_trie(), and find_longest_match().