← Back to C-Kernel-Engine Docs Doxygen Source Documentation
hash_table.h
Go to the documentation of this file.
1 /*
2  * Hash Table - Optimized with AVX-512
3  *
4  * Ported from HPC_Embeddings with SIMD-optimized string comparison.
5  * Uses MurmurHash3 for hashing and supports AVX-512 for fast lookups.
6  *
7  * By Anthony Shivakumar
8  */
9 
10 #ifndef CK_TOKENIZER_HASH_TABLE_H
11 #define CK_TOKENIZER_HASH_TABLE_H
12 
13 #include <stddef.h>
14 #include <stdint.h>
15 #include <stdbool.h>
16 
17 #ifdef __cplusplus
18 extern "C" {
19 #endif
20 
21 /* Hash table entry */
22 typedef struct CKTokenizerHashEntry {
23  char *key; /* Null-terminated key string */
24  void *value; /* Associated value */
25  struct CKTokenizerHashEntry *next; /* Chain for collisions */
27 
28 /* Hash table structure */
29 typedef struct {
30  CKTokenizerHashEntry **entries; /* Bucket array */
31  size_t size; /* Number of buckets */
32  size_t count; /* Number of entries */
33  float load_factor; /* Max load factor before resize */
35 
36 /**
37  * Create a hash table.
38  *
39  * @param bucket_count Number of buckets (0 = auto-size)
40  * @return Newly allocated hash table, or NULL on error
41  */
43 
44 /**
45  * Free a hash table.
46  *
47  * @param table Hash table to free
48  * @param free_values If true, also free all value pointers
49  */
50 void ck_tokenizer_hash_table_free(CKTokenizerHashTable *table, bool free_values);
51 
52 /**
53  * Insert a key-value pair.
54  *
55  * @param table Hash table
56  * @param key Key string
57  * @param value Value pointer
58  * @return 0 on success, -1 on error
59  */
61  const char *key,
62  void *value);
63 
64 /**
65  * Look up a key.
66  *
67  * @param table Hash table
68  * @param key Key to look up
69  * @return Value pointer, or NULL if not found
70  */
72  const char *key);
73 
74 /**
75  * Delete a key.
76  *
77  * @param table Hash table
78  * @param key Key to delete
79  * @param free_value If true, free the value pointer
80  * @return 0 if found and deleted, -1 if not found
81  */
83  const char *key,
84  bool free_value);
85 
86 /**
87  * Get the number of entries.
88  *
89  * @param table Hash table
90  * @return Number of entries
91  */
93 
94 /**
95  * Check if key exists.
96  *
97  * @param table Hash table
98  * @param key Key to check
99  * @return true if key exists
100  */
102  const char *key);
103 
104 /**
105  * Iterate over all entries.
106  *
107  * @param table Hash table
108  * @param callback Function to call for each entry
109  * @param user_data User-provided data for callback
110  * @return 0 if all entries processed, non-zero to stop
111  */
112 typedef int (*CKTokenizerHashCallback)(const char *key, void *value, void *user_data);
113 
115  CKTokenizerHashCallback callback,
116  void *user_data);
117 
118 /**
119  * Get all keys as an array.
120  *
121  * @param table Hash table
122  * @param out_keys Output array for keys (must be pre-allocated)
123  * @param max_keys Maximum keys to write
124  * @return Number of keys written
125  */
127  const char **out_keys,
128  size_t max_keys);
129 
130 /**
131  * Clear all entries (but keep bucket array).
132  *
133  * @param table Hash table
134  * @param free_values If true, free all value pointers
135  */
137  bool free_values);
138 
139 /* Pre-defined bucket counts (prime numbers for good distribution) */
140 #define CK_TOKENIZER_HT_BUCKETS_SMALL 1024
141 #define CK_TOKENIZER_HT_BUCKETS_MEDIUM 8192
142 #define CK_TOKENIZER_HT_BUCKETS_LARGE 65536
143 #define CK_TOKENIZER_HT_BUCKETS_XL 262144
144 
145 #ifdef __cplusplus
146 }
147 #endif
148 
149 #endif /* CK_TOKENIZER_HASH_TABLE_H */
bool ck_tokenizer_hash_table_contains(CKTokenizerHashTable *table, const char *key)
Definition: hash_table.c:268
size_t ck_tokenizer_hash_table_count(CKTokenizerHashTable *table)
Definition: hash_table.c:264
size_t ck_tokenizer_hash_table_keys(CKTokenizerHashTable *table, const char **out_keys, size_t max_keys)
Definition: hash_table.c:293
void ck_tokenizer_hash_table_free(CKTokenizerHashTable *table, bool free_values)
Definition: hash_table.c:140
int(* CKTokenizerHashCallback)(const char *key, void *value, void *user_data)
Definition: hash_table.h:112
CKTokenizerHashTable * ck_tokenizer_hash_table_create(size_t bucket_count)
Definition: hash_table.c:80
int ck_tokenizer_hash_table_iterate(CKTokenizerHashTable *table, CKTokenizerHashCallback callback, void *user_data)
Definition: hash_table.c:272
int ck_tokenizer_hash_table_insert(CKTokenizerHashTable *table, const char *key, void *value)
Definition: hash_table.c:158
void * ck_tokenizer_hash_table_lookup(CKTokenizerHashTable *table, const char *key)
Definition: hash_table.c:198
int ck_tokenizer_hash_table_delete(CKTokenizerHashTable *table, const char *key, bool free_value)
Definition: hash_table.c:235
void ck_tokenizer_hash_table_clear(CKTokenizerHashTable *table, bool free_values)
Definition: hash_table.c:312
struct CKTokenizerHashEntry * next
Definition: hash_table.h:25
CKTokenizerHashEntry ** entries
Definition: hash_table.h:30