#include <stdint.h>#include <stdlib.h>#include <string.h>#include <immintrin.h>#include <stdio.h>#include "tokenizer/hash_table.h"#include "tokenizer/murmurhash3.h"Go to the source code of this file.
Macros | |
| #define | CK_TOKENIZER_HASH_SEED 0x9747b28c |
| #define CK_TOKENIZER_HASH_SEED 0x9747b28c |
Definition at line 16 of file hash_table.c.
| uint32_t ck_tokenizer_hash | ( | const char * | key, |
| size_t | len | ||
| ) |
Definition at line 18 of file hash_table.c.
References ck_murmurhash3(), and CK_TOKENIZER_HASH_SEED.
| uint32_t ck_tokenizer_hash_str | ( | const char * | key | ) |
Definition at line 22 of file hash_table.c.
References ck_murmurhash3_str(), and CK_TOKENIZER_HASH_SEED.
Referenced by ck_tokenizer_hash_table_delete(), ck_tokenizer_hash_table_insert(), ck_tokenizer_hash_table_lookup(), and ck_tokenizer_hash_table_lookup_avx().
| void ck_tokenizer_hash_table_clear | ( | CKTokenizerHashTable * | table, |
| bool | free_values | ||
| ) |
Clear all entries (but keep bucket array).
| table | Hash table |
| free_values | If true, free all value pointers |
Definition at line 312 of file hash_table.c.
References CKTokenizerHashTable::count, CKTokenizerHashTable::entries, free_entry(), CKTokenizerHashEntry::next, and CKTokenizerHashTable::size.
Referenced by ck_tokenizer_reset().
| bool ck_tokenizer_hash_table_contains | ( | CKTokenizerHashTable * | table, |
| const char * | key | ||
| ) |
Check if key exists.
| table | Hash table |
| key | Key to check |
Definition at line 268 of file hash_table.c.
References ck_tokenizer_hash_table_lookup().
| size_t ck_tokenizer_hash_table_count | ( | CKTokenizerHashTable * | table | ) |
Get the number of entries.
| table | Hash table |
Definition at line 264 of file hash_table.c.
References CKTokenizerHashTable::count.
| CKTokenizerHashTable* ck_tokenizer_hash_table_create | ( | size_t | bucket_count | ) |
Create a hash table.
| bucket_count | Number of buckets (0 = auto-size) |
Definition at line 80 of file hash_table.c.
References CK_TOKENIZER_HT_BUCKETS_SMALL, CKTokenizerHashTable::count, CKTokenizerHashTable::entries, CKTokenizerHashTable::load_factor, and CKTokenizerHashTable::size.
Referenced by ck_tokenizer_create(), and ck_true_bpe_create().
| int ck_tokenizer_hash_table_delete | ( | CKTokenizerHashTable * | table, |
| const char * | key, | ||
| bool | free_value | ||
| ) |
Delete a key.
| table | Hash table |
| key | Key to delete |
| free_value | If true, free the value pointer |
Definition at line 235 of file hash_table.c.
References ck_tokenizer_hash_str(), CKTokenizerHashTable::count, CKTokenizerHashTable::entries, free_entry(), CKTokenizerHashEntry::key, CKTokenizerHashEntry::next, and CKTokenizerHashTable::size.
| void ck_tokenizer_hash_table_free | ( | CKTokenizerHashTable * | table, |
| bool | free_values | ||
| ) |
Free a hash table.
| table | Hash table to free |
| free_values | If true, also free all value pointers |
Definition at line 140 of file hash_table.c.
References CKTokenizerHashTable::entries, free_entry(), CKTokenizerHashEntry::next, and CKTokenizerHashTable::size.
Referenced by ck_tokenizer_create(), ck_tokenizer_free(), ck_true_bpe_create(), and ck_true_bpe_free().
| int ck_tokenizer_hash_table_insert | ( | CKTokenizerHashTable * | table, |
| const char * | key, | ||
| void * | value | ||
| ) |
Insert a key-value pair.
| table | Hash table |
| key | Key string |
| value | Value pointer |
Definition at line 158 of file hash_table.c.
References ck_tokenizer_hash_str(), CKTokenizerHashTable::count, CKTokenizerHashTable::entries, CKTokenizerHashEntry::key, CKTokenizerHashEntry::next, CKTokenizerHashTable::size, and CKTokenizerHashEntry::value.
Referenced by ck_tokenizer_add_token(), and ck_true_bpe_add_token().
| int ck_tokenizer_hash_table_iterate | ( | CKTokenizerHashTable * | table, |
| CKTokenizerHashCallback | callback, | ||
| void * | user_data | ||
| ) |
Definition at line 272 of file hash_table.c.
References CKTokenizerHashTable::entries, CKTokenizerHashEntry::key, CKTokenizerHashEntry::next, CKTokenizerHashTable::size, and CKTokenizerHashEntry::value.
| size_t ck_tokenizer_hash_table_keys | ( | CKTokenizerHashTable * | table, |
| const char ** | out_keys, | ||
| size_t | max_keys | ||
| ) |
Get all keys as an array.
| table | Hash table |
| out_keys | Output array for keys (must be pre-allocated) |
| max_keys | Maximum keys to write |
Definition at line 293 of file hash_table.c.
References CKTokenizerHashTable::entries, CKTokenizerHashEntry::key, CKTokenizerHashEntry::next, and CKTokenizerHashTable::size.
| void* ck_tokenizer_hash_table_lookup | ( | CKTokenizerHashTable * | table, |
| const char * | key | ||
| ) |
Look up a key.
| table | Hash table |
| key | Key to look up |
Definition at line 198 of file hash_table.c.
References ck_tokenizer_hash_str(), CKTokenizerHashTable::entries, CKTokenizerHashEntry::key, CKTokenizerHashEntry::next, CKTokenizerHashTable::size, and CKTokenizerHashEntry::value.
Referenced by ck_tokenizer_add_special_token(), ck_tokenizer_add_token(), ck_tokenizer_hash_table_contains(), ck_tokenizer_lookup(), ck_tokenizer_lookup_exact(), ck_true_bpe_add_merge_by_tokens(), ck_true_bpe_add_token(), ck_true_bpe_lookup(), find_longest_match_hash(), spm_count_unknown_run(), and spm_find_candidates_at_pos().
| void* ck_tokenizer_hash_table_lookup_avx | ( | CKTokenizerHashTable * | table, |
| const char * | key | ||
| ) |
Definition at line 217 of file hash_table.c.
References ck_tokenizer_hash_str(), CKTokenizerHashTable::entries, CKTokenizerHashEntry::key, CKTokenizerHashEntry::next, simd_strcmp(), CKTokenizerHashTable::size, and CKTokenizerHashEntry::value.
|
static |
Definition at line 103 of file hash_table.c.
References CKTokenizerHashEntry::key, CKTokenizerHashEntry::next, and CKTokenizerHashEntry::value.
|
static |
Definition at line 131 of file hash_table.c.
References CKTokenizerHashEntry::key, and CKTokenizerHashEntry::value.
Referenced by ck_tokenizer_hash_table_clear(), ck_tokenizer_hash_table_delete(), and ck_tokenizer_hash_table_free().
|
inlinestatic |