#include <stddef.h>#include <stdint.h>#include <stdbool.h>Go to the source code of this file.
Data Structures | |
| struct | CKTokenizerHashEntry |
| struct | CKTokenizerHashTable |
Macros | |
| #define | CK_TOKENIZER_HT_BUCKETS_LARGE 65536 |
| #define | CK_TOKENIZER_HT_BUCKETS_MEDIUM 8192 |
| #define | CK_TOKENIZER_HT_BUCKETS_SMALL 1024 |
| #define | CK_TOKENIZER_HT_BUCKETS_XL 262144 |
Typedefs | |
| typedef int(* | CKTokenizerHashCallback) (const char *key, void *value, void *user_data) |
Functions | |
| void | ck_tokenizer_hash_table_clear (CKTokenizerHashTable *table, bool free_values) |
| bool | ck_tokenizer_hash_table_contains (CKTokenizerHashTable *table, const char *key) |
| size_t | ck_tokenizer_hash_table_count (CKTokenizerHashTable *table) |
| CKTokenizerHashTable * | ck_tokenizer_hash_table_create (size_t bucket_count) |
| int | ck_tokenizer_hash_table_delete (CKTokenizerHashTable *table, const char *key, bool free_value) |
| void | ck_tokenizer_hash_table_free (CKTokenizerHashTable *table, bool free_values) |
| int | ck_tokenizer_hash_table_insert (CKTokenizerHashTable *table, const char *key, void *value) |
| int | ck_tokenizer_hash_table_iterate (CKTokenizerHashTable *table, CKTokenizerHashCallback callback, void *user_data) |
| size_t | ck_tokenizer_hash_table_keys (CKTokenizerHashTable *table, const char **out_keys, size_t max_keys) |
| void * | ck_tokenizer_hash_table_lookup (CKTokenizerHashTable *table, const char *key) |
| #define CK_TOKENIZER_HT_BUCKETS_LARGE 65536 |
Definition at line 142 of file hash_table.h.
| #define CK_TOKENIZER_HT_BUCKETS_MEDIUM 8192 |
Definition at line 141 of file hash_table.h.
| #define CK_TOKENIZER_HT_BUCKETS_SMALL 1024 |
Definition at line 140 of file hash_table.h.
| #define CK_TOKENIZER_HT_BUCKETS_XL 262144 |
Definition at line 143 of file hash_table.h.
| typedef int(* CKTokenizerHashCallback) (const char *key, void *value, void *user_data) |
Iterate over all entries.
| table | Hash table |
| callback | Function to call for each entry |
| user_data | User-provided data for callback |
Definition at line 112 of file hash_table.h.
| void ck_tokenizer_hash_table_clear | ( | CKTokenizerHashTable * | table, |
| bool | free_values | ||
| ) |
Clear all entries (but keep bucket array).
| table | Hash table |
| free_values | If true, free all value pointers |
Definition at line 312 of file hash_table.c.
References CKTokenizerHashTable::count, CKTokenizerHashTable::entries, free_entry(), CKTokenizerHashEntry::next, and CKTokenizerHashTable::size.
Referenced by ck_tokenizer_reset().
| bool ck_tokenizer_hash_table_contains | ( | CKTokenizerHashTable * | table, |
| const char * | key | ||
| ) |
Check if key exists.
| table | Hash table |
| key | Key to check |
Definition at line 268 of file hash_table.c.
References ck_tokenizer_hash_table_lookup().
| size_t ck_tokenizer_hash_table_count | ( | CKTokenizerHashTable * | table | ) |
Get the number of entries.
| table | Hash table |
Definition at line 264 of file hash_table.c.
References CKTokenizerHashTable::count.
| CKTokenizerHashTable* ck_tokenizer_hash_table_create | ( | size_t | bucket_count | ) |
Create a hash table.
| bucket_count | Number of buckets (0 = auto-size) |
Definition at line 80 of file hash_table.c.
References CK_TOKENIZER_HT_BUCKETS_SMALL, CKTokenizerHashTable::count, CKTokenizerHashTable::entries, CKTokenizerHashTable::load_factor, and CKTokenizerHashTable::size.
Referenced by ck_tokenizer_create(), and ck_true_bpe_create().
| int ck_tokenizer_hash_table_delete | ( | CKTokenizerHashTable * | table, |
| const char * | key, | ||
| bool | free_value | ||
| ) |
Delete a key.
| table | Hash table |
| key | Key to delete |
| free_value | If true, free the value pointer |
Definition at line 235 of file hash_table.c.
References ck_tokenizer_hash_str(), CKTokenizerHashTable::count, CKTokenizerHashTable::entries, free_entry(), CKTokenizerHashEntry::key, CKTokenizerHashEntry::next, and CKTokenizerHashTable::size.
| void ck_tokenizer_hash_table_free | ( | CKTokenizerHashTable * | table, |
| bool | free_values | ||
| ) |
Free a hash table.
| table | Hash table to free |
| free_values | If true, also free all value pointers |
Definition at line 140 of file hash_table.c.
References CKTokenizerHashTable::entries, free_entry(), CKTokenizerHashEntry::next, and CKTokenizerHashTable::size.
Referenced by ck_tokenizer_create(), ck_tokenizer_free(), ck_true_bpe_create(), and ck_true_bpe_free().
| int ck_tokenizer_hash_table_insert | ( | CKTokenizerHashTable * | table, |
| const char * | key, | ||
| void * | value | ||
| ) |
Insert a key-value pair.
| table | Hash table |
| key | Key string |
| value | Value pointer |
Definition at line 158 of file hash_table.c.
References ck_tokenizer_hash_str(), CKTokenizerHashTable::count, CKTokenizerHashTable::entries, CKTokenizerHashEntry::key, CKTokenizerHashEntry::next, CKTokenizerHashTable::size, and CKTokenizerHashEntry::value.
Referenced by ck_tokenizer_add_token(), and ck_true_bpe_add_token().
| int ck_tokenizer_hash_table_iterate | ( | CKTokenizerHashTable * | table, |
| CKTokenizerHashCallback | callback, | ||
| void * | user_data | ||
| ) |
Definition at line 272 of file hash_table.c.
References CKTokenizerHashTable::entries, CKTokenizerHashEntry::key, CKTokenizerHashEntry::next, CKTokenizerHashTable::size, and CKTokenizerHashEntry::value.
| size_t ck_tokenizer_hash_table_keys | ( | CKTokenizerHashTable * | table, |
| const char ** | out_keys, | ||
| size_t | max_keys | ||
| ) |
Get all keys as an array.
| table | Hash table |
| out_keys | Output array for keys (must be pre-allocated) |
| max_keys | Maximum keys to write |
Definition at line 293 of file hash_table.c.
References CKTokenizerHashTable::entries, CKTokenizerHashEntry::key, CKTokenizerHashEntry::next, and CKTokenizerHashTable::size.
| void* ck_tokenizer_hash_table_lookup | ( | CKTokenizerHashTable * | table, |
| const char * | key | ||
| ) |
Look up a key.
| table | Hash table |
| key | Key to look up |
Definition at line 198 of file hash_table.c.
References ck_tokenizer_hash_str(), CKTokenizerHashTable::entries, CKTokenizerHashEntry::key, CKTokenizerHashEntry::next, CKTokenizerHashTable::size, and CKTokenizerHashEntry::value.
Referenced by ck_tokenizer_add_special_token(), ck_tokenizer_add_token(), ck_tokenizer_hash_table_contains(), ck_tokenizer_lookup(), ck_tokenizer_lookup_exact(), ck_true_bpe_add_merge_by_tokens(), ck_true_bpe_add_token(), ck_true_bpe_lookup(), find_longest_match_hash(), spm_count_unknown_run(), and spm_find_candidates_at_pos().