← Back to C-Kernel-Engine Docs Doxygen Source Documentation
hash_table.h File Reference
#include <stddef.h>
#include <stdint.h>
#include <stdbool.h>

Go to the source code of this file.

Data Structures

struct  CKTokenizerHashEntry
 
struct  CKTokenizerHashTable
 

Macros

#define CK_TOKENIZER_HT_BUCKETS_LARGE   65536
 
#define CK_TOKENIZER_HT_BUCKETS_MEDIUM   8192
 
#define CK_TOKENIZER_HT_BUCKETS_SMALL   1024
 
#define CK_TOKENIZER_HT_BUCKETS_XL   262144
 

Typedefs

typedef int(* CKTokenizerHashCallback) (const char *key, void *value, void *user_data)
 

Functions

void ck_tokenizer_hash_table_clear (CKTokenizerHashTable *table, bool free_values)
 
bool ck_tokenizer_hash_table_contains (CKTokenizerHashTable *table, const char *key)
 
size_t ck_tokenizer_hash_table_count (CKTokenizerHashTable *table)
 
CKTokenizerHashTableck_tokenizer_hash_table_create (size_t bucket_count)
 
int ck_tokenizer_hash_table_delete (CKTokenizerHashTable *table, const char *key, bool free_value)
 
void ck_tokenizer_hash_table_free (CKTokenizerHashTable *table, bool free_values)
 
int ck_tokenizer_hash_table_insert (CKTokenizerHashTable *table, const char *key, void *value)
 
int ck_tokenizer_hash_table_iterate (CKTokenizerHashTable *table, CKTokenizerHashCallback callback, void *user_data)
 
size_t ck_tokenizer_hash_table_keys (CKTokenizerHashTable *table, const char **out_keys, size_t max_keys)
 
void * ck_tokenizer_hash_table_lookup (CKTokenizerHashTable *table, const char *key)
 

Macro Definition Documentation

◆ CK_TOKENIZER_HT_BUCKETS_LARGE

#define CK_TOKENIZER_HT_BUCKETS_LARGE   65536

Definition at line 142 of file hash_table.h.

◆ CK_TOKENIZER_HT_BUCKETS_MEDIUM

#define CK_TOKENIZER_HT_BUCKETS_MEDIUM   8192

Definition at line 141 of file hash_table.h.

◆ CK_TOKENIZER_HT_BUCKETS_SMALL

#define CK_TOKENIZER_HT_BUCKETS_SMALL   1024

Definition at line 140 of file hash_table.h.

◆ CK_TOKENIZER_HT_BUCKETS_XL

#define CK_TOKENIZER_HT_BUCKETS_XL   262144

Definition at line 143 of file hash_table.h.

Typedef Documentation

◆ CKTokenizerHashCallback

typedef int(* CKTokenizerHashCallback) (const char *key, void *value, void *user_data)

Iterate over all entries.

Parameters
tableHash table
callbackFunction to call for each entry
user_dataUser-provided data for callback
Returns
0 if all entries processed, non-zero to stop

Definition at line 112 of file hash_table.h.

Function Documentation

◆ ck_tokenizer_hash_table_clear()

void ck_tokenizer_hash_table_clear ( CKTokenizerHashTable table,
bool  free_values 
)

Clear all entries (but keep bucket array).

Parameters
tableHash table
free_valuesIf true, free all value pointers

Definition at line 312 of file hash_table.c.

312  {
313  if (!table) {
314  return;
315  }
316 
317  for (size_t i = 0; i < table->size; i++) {
318  CKTokenizerHashEntry *entry = table->entries[i];
319  while (entry) {
320  CKTokenizerHashEntry *next = entry->next;
321  free_entry(entry, free_values);
322  entry = next;
323  }
324  table->entries[i] = NULL;
325  }
326 
327  table->count = 0;
328 }
static void free_entry(CKTokenizerHashEntry *entry, bool free_value)
Definition: hash_table.c:131
struct CKTokenizerHashEntry * next
Definition: hash_table.h:25
CKTokenizerHashEntry ** entries
Definition: hash_table.h:30

References CKTokenizerHashTable::count, CKTokenizerHashTable::entries, free_entry(), CKTokenizerHashEntry::next, and CKTokenizerHashTable::size.

Referenced by ck_tokenizer_reset().

◆ ck_tokenizer_hash_table_contains()

bool ck_tokenizer_hash_table_contains ( CKTokenizerHashTable table,
const char *  key 
)

Check if key exists.

Parameters
tableHash table
keyKey to check
Returns
true if key exists

Definition at line 268 of file hash_table.c.

268  {
269  return ck_tokenizer_hash_table_lookup(table, key) != NULL;
270 }
void * ck_tokenizer_hash_table_lookup(CKTokenizerHashTable *table, const char *key)
Definition: hash_table.c:198

References ck_tokenizer_hash_table_lookup().

◆ ck_tokenizer_hash_table_count()

size_t ck_tokenizer_hash_table_count ( CKTokenizerHashTable table)

Get the number of entries.

Parameters
tableHash table
Returns
Number of entries

Definition at line 264 of file hash_table.c.

264  {
265  return table ? table->count : 0;
266 }

References CKTokenizerHashTable::count.

◆ ck_tokenizer_hash_table_create()

CKTokenizerHashTable* ck_tokenizer_hash_table_create ( size_t  bucket_count)

Create a hash table.

Parameters
bucket_countNumber of buckets (0 = auto-size)
Returns
Newly allocated hash table, or NULL on error

Definition at line 80 of file hash_table.c.

80  {
81  if (bucket_count == 0) {
82  bucket_count = CK_TOKENIZER_HT_BUCKETS_SMALL;
83  }
84 
86  if (!table) {
87  return NULL;
88  }
89 
90  table->entries = (CKTokenizerHashEntry **)calloc(bucket_count, sizeof(CKTokenizerHashEntry *));
91  if (!table->entries) {
92  free(table);
93  return NULL;
94  }
95 
96  table->size = bucket_count;
97  table->count = 0;
98  table->load_factor = 0.75f;
99 
100  return table;
101 }
#define CK_TOKENIZER_HT_BUCKETS_SMALL
Definition: hash_table.h:140

References CK_TOKENIZER_HT_BUCKETS_SMALL, CKTokenizerHashTable::count, CKTokenizerHashTable::entries, CKTokenizerHashTable::load_factor, and CKTokenizerHashTable::size.

Referenced by ck_tokenizer_create(), and ck_true_bpe_create().

◆ ck_tokenizer_hash_table_delete()

int ck_tokenizer_hash_table_delete ( CKTokenizerHashTable table,
const char *  key,
bool  free_value 
)

Delete a key.

Parameters
tableHash table
keyKey to delete
free_valueIf true, free the value pointer
Returns
0 if found and deleted, -1 if not found

Definition at line 235 of file hash_table.c.

237  {
238  if (!table || !key) {
239  return -1;
240  }
241 
242  uint32_t bucket = ck_tokenizer_hash_str(key) % table->size;
243  CKTokenizerHashEntry *entry = table->entries[bucket];
244  CKTokenizerHashEntry *prev = NULL;
245 
246  while (entry) {
247  if (strcmp(entry->key, key) == 0) {
248  if (prev) {
249  prev->next = entry->next;
250  } else {
251  table->entries[bucket] = entry->next;
252  }
253  free_entry(entry, free_value);
254  table->count--;
255  return 0;
256  }
257  prev = entry;
258  entry = entry->next;
259  }
260 
261  return -1;
262 }
uint32_t ck_tokenizer_hash_str(const char *key)
Definition: hash_table.c:22

References ck_tokenizer_hash_str(), CKTokenizerHashTable::count, CKTokenizerHashTable::entries, free_entry(), CKTokenizerHashEntry::key, CKTokenizerHashEntry::next, and CKTokenizerHashTable::size.

◆ ck_tokenizer_hash_table_free()

void ck_tokenizer_hash_table_free ( CKTokenizerHashTable table,
bool  free_values 
)

Free a hash table.

Parameters
tableHash table to free
free_valuesIf true, also free all value pointers

Definition at line 140 of file hash_table.c.

140  {
141  if (!table) {
142  return;
143  }
144 
145  for (size_t i = 0; i < table->size; i++) {
146  CKTokenizerHashEntry *entry = table->entries[i];
147  while (entry) {
148  CKTokenizerHashEntry *next = entry->next;
149  free_entry(entry, free_values);
150  entry = next;
151  }
152  }
153 
154  free(table->entries);
155  free(table);
156 }

References CKTokenizerHashTable::entries, free_entry(), CKTokenizerHashEntry::next, and CKTokenizerHashTable::size.

Referenced by ck_tokenizer_create(), ck_tokenizer_free(), ck_true_bpe_create(), and ck_true_bpe_free().

◆ ck_tokenizer_hash_table_insert()

int ck_tokenizer_hash_table_insert ( CKTokenizerHashTable table,
const char *  key,
void *  value 
)

Insert a key-value pair.

Parameters
tableHash table
keyKey string
valueValue pointer
Returns
0 on success, -1 on error

Definition at line 158 of file hash_table.c.

160  {
161  if (!table || !key) {
162  return -1;
163  }
164 
165  uint32_t bucket = ck_tokenizer_hash_str(key) % table->size;
166  CKTokenizerHashEntry *entry = table->entries[bucket];
167 
168  /* Check if key already exists */
169  while (entry) {
170  if (strcmp(entry->key, key) == 0) {
171  /* Update existing entry - just replace value pointer */
172  entry->value = value;
173  return 0;
174  }
175  entry = entry->next;
176  }
177 
178  /* Create new entry with NULL value (caller manages memory) */
179  CKTokenizerHashEntry *new_entry = (CKTokenizerHashEntry *)malloc(sizeof(CKTokenizerHashEntry));
180  if (!new_entry) {
181  return -1;
182  }
183 
184  new_entry->key = strdup(key);
185  if (!new_entry->key) {
186  free(new_entry);
187  return -1;
188  }
189 
190  new_entry->value = value;
191  new_entry->next = table->entries[bucket];
192  table->entries[bucket] = new_entry;
193  table->count++;
194 
195  return 0;
196 }

References ck_tokenizer_hash_str(), CKTokenizerHashTable::count, CKTokenizerHashTable::entries, CKTokenizerHashEntry::key, CKTokenizerHashEntry::next, CKTokenizerHashTable::size, and CKTokenizerHashEntry::value.

Referenced by ck_tokenizer_add_token(), and ck_true_bpe_add_token().

◆ ck_tokenizer_hash_table_iterate()

int ck_tokenizer_hash_table_iterate ( CKTokenizerHashTable table,
CKTokenizerHashCallback  callback,
void *  user_data 
)

Definition at line 272 of file hash_table.c.

274  {
275  if (!table || !callback) {
276  return -1;
277  }
278 
279  for (size_t i = 0; i < table->size; i++) {
280  CKTokenizerHashEntry *entry = table->entries[i];
281  while (entry) {
282  int ret = callback(entry->key, entry->value, user_data);
283  if (ret != 0) {
284  return ret;
285  }
286  entry = entry->next;
287  }
288  }
289 
290  return 0;
291 }

References CKTokenizerHashTable::entries, CKTokenizerHashEntry::key, CKTokenizerHashEntry::next, CKTokenizerHashTable::size, and CKTokenizerHashEntry::value.

◆ ck_tokenizer_hash_table_keys()

size_t ck_tokenizer_hash_table_keys ( CKTokenizerHashTable table,
const char **  out_keys,
size_t  max_keys 
)

Get all keys as an array.

Parameters
tableHash table
out_keysOutput array for keys (must be pre-allocated)
max_keysMaximum keys to write
Returns
Number of keys written

Definition at line 293 of file hash_table.c.

295  {
296  if (!table || !out_keys) {
297  return 0;
298  }
299 
300  size_t written = 0;
301  for (size_t i = 0; i < table->size && written < max_keys; i++) {
302  CKTokenizerHashEntry *entry = table->entries[i];
303  while (entry && written < max_keys) {
304  out_keys[written++] = entry->key;
305  entry = entry->next;
306  }
307  }
308 
309  return written;
310 }

References CKTokenizerHashTable::entries, CKTokenizerHashEntry::key, CKTokenizerHashEntry::next, and CKTokenizerHashTable::size.

◆ ck_tokenizer_hash_table_lookup()

void* ck_tokenizer_hash_table_lookup ( CKTokenizerHashTable table,
const char *  key 
)

Look up a key.

Parameters
tableHash table
keyKey to look up
Returns
Value pointer, or NULL if not found

Definition at line 198 of file hash_table.c.

198  {
199  if (!table || !key) {
200  return NULL;
201  }
202 
203  uint32_t bucket = ck_tokenizer_hash_str(key) % table->size;
204  CKTokenizerHashEntry *entry = table->entries[bucket];
205 
206  while (entry) {
207  if (strcmp(entry->key, key) == 0) {
208  return entry->value;
209  }
210  entry = entry->next;
211  }
212 
213  return NULL;
214 }

References ck_tokenizer_hash_str(), CKTokenizerHashTable::entries, CKTokenizerHashEntry::key, CKTokenizerHashEntry::next, CKTokenizerHashTable::size, and CKTokenizerHashEntry::value.

Referenced by ck_tokenizer_add_special_token(), ck_tokenizer_add_token(), ck_tokenizer_hash_table_contains(), ck_tokenizer_lookup(), ck_tokenizer_lookup_exact(), ck_true_bpe_add_merge_by_tokens(), ck_true_bpe_add_token(), ck_true_bpe_lookup(), find_longest_match_hash(), spm_count_unknown_run(), and spm_find_candidates_at_pos().