← Back to C-Kernel-Engine Docs Doxygen Source Documentation
src/data_structures/tries/trie.h File Reference
#include <stdint.h>
#include <stdbool.h>

Go to the source code of this file.

Data Structures

struct  CKTrie
 
struct  CKTrieNode
 

Functions

void ck_trie_clear (CKTrie *trie)
 
CKTrieck_trie_create (size_t max_nodes)
 
int32_t ck_trie_find_longest (const CKTrie *trie, const char *text, size_t text_len, size_t start_pos, size_t *match_len)
 
void ck_trie_free (CKTrie *trie)
 
bool ck_trie_has_prefix (const CKTrie *trie, const char *text, size_t text_len, size_t pos)
 
int ck_trie_insert (CKTrie *trie, const char *token, int32_t token_id, bool is_special, int32_t priority)
 
size_t ck_trie_node_count (const CKTrie *trie)
 

Function Documentation

◆ ck_trie_clear()

void ck_trie_clear ( CKTrie trie)

Definition at line 80 of file trie.c.

80  {
81  if (!trie) return;
82 
83  /* Free all nodes except root */
84  CKTrieNode **queue = (CKTrieNode **)malloc(trie->node_count * sizeof(CKTrieNode *));
85  if (!queue) return;
86 
87  size_t head = 0, tail = 0;
88  queue[tail++] = trie->root;
89 
90  while (head < tail) {
91  CKTrieNode *node = queue[head++];
92  for (int i = 0; i < 256; i++) {
93  if (node->children[i]) {
94  queue[tail++] = node->children[i];
95  }
96  }
97  /* Clear children pointers but don't free root */
98  if (node != trie->root) {
99  free(node);
100  } else {
101  memset(node->children, 0, sizeof(node->children));
102  node->token_id = -1;
103  }
104  }
105 
106  free(queue);
107  trie->node_count = 1;
108 }
struct CKTrieNode * children[256]

◆ ck_trie_create()

CKTrie* ck_trie_create ( size_t  max_nodes)

Definition at line 29 of file trie.c.

29  {
30  CKTrie *trie = (CKTrie *)malloc(sizeof(CKTrie));
31  if (!trie) {
32  return NULL;
33  }
34 
35  if (max_nodes == 0) {
36  max_nodes = DEFAULT_MAX_NODES;
37  }
38 
39  trie->root = create_node();
40  if (!trie->root) {
41  free(trie);
42  return NULL;
43  }
44 
45  trie->max_nodes = max_nodes;
46  trie->node_count = 1; /* Root node */
47 
48  return trie;
49 }
#define DEFAULT_MAX_NODES
Definition: trie.c:16
static CKTrieNode * create_node(void)
Definition: trie.c:19

◆ ck_trie_find_longest()

int32_t ck_trie_find_longest ( const CKTrie trie,
const char *  text,
size_t  text_len,
size_t  start_pos,
size_t *  match_len 
)

Definition at line 142 of file trie.c.

143  {
144  if (!trie || !text || start_pos >= text_len || !match_len) {
145  *match_len = 0;
146  return -1;
147  }
148 
149  CKTrieNode *node = trie->root;
150  CKTrieNode *last_token_node = NULL;
151  size_t last_token_len = 0;
152  size_t pos = start_pos;
153 
154  /* Traverse as long as we have matching children */
155  while (pos < text_len && node) {
156  unsigned char c = (unsigned char)text[pos];
157 
158  if (!node->children[c]) {
159  break;
160  }
161 
162  node = node->children[c];
163  pos++;
164 
165  /* Record if this is a valid token end */
166  if (node->token_id >= 0) {
167  last_token_node = node;
168  last_token_len = pos - start_pos;
169  }
170  }
171 
172  *match_len = last_token_len;
173 
174  if (last_token_node) {
175  return last_token_node->token_id;
176  }
177 
178  return -1;
179 }
const char * text
Definition: tokenizer.h:563
const char int text_len
Definition: true_bpe.h:262

◆ ck_trie_free()

void ck_trie_free ( CKTrie trie)

Definition at line 51 of file trie.c.

51  {
52  if (!trie) return;
53 
54  /* Free all nodes using BFS */
55  CKTrieNode **queue = (CKTrieNode **)malloc(trie->node_count * sizeof(CKTrieNode *));
56  if (!queue) {
57  /* Fallback to simple free */
58  free(trie->root);
59  free(trie);
60  return;
61  }
62 
63  size_t head = 0, tail = 0;
64  queue[tail++] = trie->root;
65 
66  while (head < tail) {
67  CKTrieNode *node = queue[head++];
68  for (int i = 0; i < 256; i++) {
69  if (node->children[i]) {
70  queue[tail++] = node->children[i];
71  }
72  }
73  free(node);
74  }
75 
76  free(queue);
77  free(trie);
78 }

◆ ck_trie_has_prefix()

bool ck_trie_has_prefix ( const CKTrie trie,
const char *  text,
size_t  text_len,
size_t  pos 
)

Definition at line 181 of file trie.c.

181  {
182  if (!trie || !text || pos >= text_len) return false;
183 
184  CKTrieNode *node = trie->root;
185 
186  while (pos < text_len && node) {
187  unsigned char c = (unsigned char)text[pos];
188  node = node->children[c];
189  pos++;
190  }
191 
192  return node != NULL;
193 }

◆ ck_trie_insert()

int ck_trie_insert ( CKTrie trie,
const char *  token,
int32_t  token_id,
bool  is_special,
int32_t  priority 
)

Definition at line 110 of file trie.c.

110  {
111  if (!trie || !token) return -1;
112 
113  CKTrieNode *node = trie->root;
114  const unsigned char *p = (const unsigned char *)token;
115 
116  while (*p) {
117  unsigned char c = *p++;
118 
119  if (!node->children[c]) {
120  if (trie->node_count >= trie->max_nodes) {
121  return -1; /* Out of nodes */
122  }
123 
124  CKTrieNode *new_node = create_node();
125  if (!new_node) return -1;
126 
127  node->children[c] = new_node;
128  trie->node_count++;
129  }
130 
131  node = node->children[c];
132  }
133 
134  /* Mark end of token */
135  node->token_id = token_id;
136  node->is_special = is_special;
137  node->priority = priority;
138 
139  return 0;
140 }
const char * token
Definition: tokenizer.h:306
int32_t int32_t int32_t int32_t priority
Definition: true_bpe.h:115

◆ ck_trie_node_count()

size_t ck_trie_node_count ( const CKTrie trie)

Definition at line 195 of file trie.c.

195  {
196  return trie ? trie->node_count : 0;
197 }