← Back to C-Kernel-Engine Docs Doxygen Source Documentation
v6.6/generated/ck-kernel-inference.h
Go to the documentation of this file.
1 /**
2  * @file ck-kernel-inference.h
3  * @brief AUTO-GENERATED: qwen2_0.5b_decode Memory Layout
4  *
5  * Generated: 2026-01-15T19:23:13.597715 UTC
6  * Total Memory: 3.65 GB
7  *
8  * DO NOT EDIT - Regenerate with build_ir_v3.py
9  */
10 
11 #ifndef GENERATED_QWEN2_0_5B_DECODE_H
12 #define GENERATED_QWEN2_0_5B_DECODE_H
13 
14 #include <stddef.h>
15 #include <stdint.h>
16 
17 #ifdef __cplusplus
18 extern "C" {
19 #endif
20 
21 /* ============================================================================
22  * MODEL CONFIGURATION
23  * ============================================================================ */
24 
25 #define QWEN2_0_5B_DECODE_EMBED_DIM 896
26 #define QWEN2_0_5B_DECODE_NUM_HEADS 14
27 #define QWEN2_0_5B_DECODE_NUM_KV_HEADS 2
28 #define QWEN2_0_5B_DECODE_HEAD_DIM 64
29 #define QWEN2_0_5B_DECODE_INTERMEDIATE 4864
30 #define QWEN2_0_5B_DECODE_NUM_LAYERS 24
31 #define QWEN2_0_5B_DECODE_VOCAB_SIZE 151936
32 #define QWEN2_0_5B_DECODE_MAX_SEQ_LEN 131072
33 #define QWEN2_0_5B_DECODE_NUM_MERGES 0
34 #define QWEN2_0_5B_DECODE_TOTAL_VOCAB_BYTES 0
35 #define QWEN2_0_5B_DECODE_DTYPE_BYTES 4
36 
37 #define QWEN2_0_5B_DECODE_TOTAL_BYTES 3651931008ULL
38 #define QWEN2_0_5B_DECODE_WEIGHT_BYTES 395811776ULL
39 #define QWEN2_0_5B_DECODE_ACTIVATION_BYTES 3256082944ULL
40 
41 #define QWEN2_0_5B_DECODE_MAGIC 0x434B454EUL
42 #define QWEN2_0_5B_DECODE_CANARY_VALUE 0xDEADBEEFUL
43 #define QWEN2_0_5B_DECODE_CANARY_SIZE 64
44 
45 /* ============================================================================
46  * HEADER OFFSETS
47  * ============================================================================ */
48 
49 typedef struct {
50  size_t token_emb; /* [151936, 896] */
51  size_t vocab_offsets; /* [151936] */
52  size_t vocab_strings; /* [0] */
53  size_t vocab_merges; /* [0, 3] */
54  size_t embedded_input; /* [1, 896] */
56 
58  .token_emb = 0x00000080,
59  .vocab_offsets = 0x089F14C0,
60  .vocab_strings = 0x08A85B00,
61  .vocab_merges = 0x08BFAA80,
62  .embedded_input = 0x08DB6340,
63 };
64 
65 /* ============================================================================
66  * LAYER OFFSETS
67  * ============================================================================ */
68 
69 typedef struct {
70  size_t ln1_gamma; /* [896] */
71  size_t ln1_out; /* [1, 896] */
72  size_t wq; /* [14, 64, 896] */
73  size_t bq; /* [14, 64] */
74  size_t wk; /* [2, 64, 896] */
75  size_t bk; /* [2, 64] */
76  size_t wv; /* [2, 64, 896] */
77  size_t bv; /* [2, 64] */
78  size_t q; /* [14, 1, 64] */
79  size_t k; /* [2, 131072, 64] */
80  size_t v; /* [2, 131072, 64] */
81  size_t wo; /* [14, 896, 64] */
82  size_t proj_tmp; /* [1, 896] */
83  size_t proj_scratch; /* [1, 896] */
84  size_t residual1; /* [1, 896] */
85  size_t ln2_gamma; /* [896] */
86  size_t ln2_out; /* [1, 896] */
87  size_t w1; /* [9728, 896] */
88  size_t w2; /* [896, 4864] */
89  size_t mlp_out; /* [1, 896] */
90  size_t output; /* [1, 896] */
92 
94  [0] = {
95  .ln1_gamma = 0x08DB7200,
96  .ln1_out = 0x08DB8040,
97  .wq = 0x08DB8E80,
98  .bq = 0x08E3FAC0,
99  .wk = 0x08E40900,
100  .bk = 0x08E53D40,
101  .wv = 0x08E53F80,
102  .bv = 0x08E71BC0,
103  .q = 0x08E71E00,
104  .k = 0x08E72C40,
105  .v = 0x0CE72C80,
106  .wo = 0x10E72CC0,
107  .proj_tmp = 0x10EF9900,
108  .proj_scratch = 0x10EFA740,
109  .residual1 = 0x10EFB580,
110  .ln2_gamma = 0x10EFC3C0,
111  .ln2_out = 0x10EFD200,
112  .w1 = 0x10EFE040,
113  .w2 = 0x114B5080,
114  .mlp_out = 0x1181DDC0,
115  .output = 0x1181EC00,
116  },
117  [1] = {
118  .ln1_gamma = 0x1181FAC0,
119  .ln1_out = 0x11820900,
120  .wq = 0x11821740,
121  .bq = 0x118A8380,
122  .wk = 0x118A91C0,
123  .bk = 0x118BC600,
124  .wv = 0x118BC840,
125  .bv = 0x118DA480,
126  .q = 0x118DA6C0,
127  .k = 0x118DB500,
128  .v = 0x158DB540,
129  .wo = 0x198DB580,
130  .proj_tmp = 0x199621C0,
131  .proj_scratch = 0x19963000,
132  .residual1 = 0x19963E40,
133  .ln2_gamma = 0x19964C80,
134  .ln2_out = 0x19965AC0,
135  .w1 = 0x19966900,
136  .w2 = 0x19F1D940,
137  .mlp_out = 0x1A286680,
138  .output = 0x1A2874C0,
139  },
140  [2] = {
141  .ln1_gamma = 0x1A288380,
142  .ln1_out = 0x1A2891C0,
143  .wq = 0x1A28A000,
144  .bq = 0x1A310C40,
145  .wk = 0x1A311A80,
146  .bk = 0x1A324EC0,
147  .wv = 0x1A325100,
148  .bv = 0x1A338540,
149  .q = 0x1A338780,
150  .k = 0x1A3395C0,
151  .v = 0x1E339600,
152  .wo = 0x22339640,
153  .proj_tmp = 0x223C0280,
154  .proj_scratch = 0x223C10C0,
155  .residual1 = 0x223C1F00,
156  .ln2_gamma = 0x223C2D40,
157  .ln2_out = 0x223C3B80,
158  .w1 = 0x223C49C0,
159  .w2 = 0x2297BA00,
160  .mlp_out = 0x22BD2240,
161  .output = 0x22BD3080,
162  },
163  [3] = {
164  .ln1_gamma = 0x22BD3F40,
165  .ln1_out = 0x22BD4D80,
166  .wq = 0x22BD5BC0,
167  .bq = 0x22C5C800,
168  .wk = 0x22C5D640,
169  .bk = 0x22C70A80,
170  .wv = 0x22C70CC0,
171  .bv = 0x22C8E900,
172  .q = 0x22C8EB40,
173  .k = 0x22C8F980,
174  .v = 0x26C8F9C0,
175  .wo = 0x2AC8FA00,
176  .proj_tmp = 0x2AD16640,
177  .proj_scratch = 0x2AD17480,
178  .residual1 = 0x2AD182C0,
179  .ln2_gamma = 0x2AD19100,
180  .ln2_out = 0x2AD19F40,
181  .w1 = 0x2AD1AD80,
182  .w2 = 0x2B2D1DC0,
183  .mlp_out = 0x2B63AB00,
184  .output = 0x2B63B940,
185  },
186  [4] = {
187  .ln1_gamma = 0x2B63C800,
188  .ln1_out = 0x2B63D640,
189  .wq = 0x2B63E480,
190  .bq = 0x2B6C50C0,
191  .wk = 0x2B6C5F00,
192  .bk = 0x2B6D9340,
193  .wv = 0x2B6D9580,
194  .bv = 0x2B6EC9C0,
195  .q = 0x2B6ECC00,
196  .k = 0x2B6EDA40,
197  .v = 0x2F6EDA80,
198  .wo = 0x336EDAC0,
199  .proj_tmp = 0x33774700,
200  .proj_scratch = 0x33775540,
201  .residual1 = 0x33776380,
202  .ln2_gamma = 0x337771C0,
203  .ln2_out = 0x33778000,
204  .w1 = 0x33778E40,
205  .w2 = 0x33D2FE80,
206  .mlp_out = 0x33F866C0,
207  .output = 0x33F87500,
208  },
209  [5] = {
210  .ln1_gamma = 0x33F883C0,
211  .ln1_out = 0x33F89200,
212  .wq = 0x33F8A040,
213  .bq = 0x34010C80,
214  .wk = 0x34011AC0,
215  .bk = 0x34024F00,
216  .wv = 0x34025140,
217  .bv = 0x34038580,
218  .q = 0x340387C0,
219  .k = 0x34039600,
220  .v = 0x38039640,
221  .wo = 0x3C039680,
222  .proj_tmp = 0x3C0C02C0,
223  .proj_scratch = 0x3C0C1100,
224  .residual1 = 0x3C0C1F40,
225  .ln2_gamma = 0x3C0C2D80,
226  .ln2_out = 0x3C0C3BC0,
227  .w1 = 0x3C0C4A00,
228  .w2 = 0x3C67BA40,
229  .mlp_out = 0x3C8D2280,
230  .output = 0x3C8D30C0,
231  },
232  [6] = {
233  .ln1_gamma = 0x3C8D3F80,
234  .ln1_out = 0x3C8D4DC0,
235  .wq = 0x3C8D5C00,
236  .bq = 0x3C95C840,
237  .wk = 0x3C95D680,
238  .bk = 0x3C970AC0,
239  .wv = 0x3C970D00,
240  .bv = 0x3C98E940,
241  .q = 0x3C98EB80,
242  .k = 0x3C98F9C0,
243  .v = 0x4098FA00,
244  .wo = 0x4498FA40,
245  .proj_tmp = 0x44A16680,
246  .proj_scratch = 0x44A174C0,
247  .residual1 = 0x44A18300,
248  .ln2_gamma = 0x44A19140,
249  .ln2_out = 0x44A19F80,
250  .w1 = 0x44A1ADC0,
251  .w2 = 0x44FD1E00,
252  .mlp_out = 0x4533AB40,
253  .output = 0x4533B980,
254  },
255  [7] = {
256  .ln1_gamma = 0x4533C840,
257  .ln1_out = 0x4533D680,
258  .wq = 0x4533E4C0,
259  .bq = 0x453C5100,
260  .wk = 0x453C5F40,
261  .bk = 0x453D9380,
262  .wv = 0x453D95C0,
263  .bv = 0x453F7200,
264  .q = 0x453F7440,
265  .k = 0x453F8280,
266  .v = 0x493F82C0,
267  .wo = 0x4D3F8300,
268  .proj_tmp = 0x4D47EF40,
269  .proj_scratch = 0x4D47FD80,
270  .residual1 = 0x4D480BC0,
271  .ln2_gamma = 0x4D481A00,
272  .ln2_out = 0x4D482840,
273  .w1 = 0x4D483680,
274  .w2 = 0x4DA3A6C0,
275  .mlp_out = 0x4DDA3400,
276  .output = 0x4DDA4240,
277  },
278  [8] = {
279  .ln1_gamma = 0x4DDA5100,
280  .ln1_out = 0x4DDA5F40,
281  .wq = 0x4DDA6D80,
282  .bq = 0x4DE2D9C0,
283  .wk = 0x4DE2E800,
284  .bk = 0x4DE41C40,
285  .wv = 0x4DE41E80,
286  .bv = 0x4DE5FAC0,
287  .q = 0x4DE5FD00,
288  .k = 0x4DE60B40,
289  .v = 0x51E60B80,
290  .wo = 0x55E60BC0,
291  .proj_tmp = 0x55EE7800,
292  .proj_scratch = 0x55EE8640,
293  .residual1 = 0x55EE9480,
294  .ln2_gamma = 0x55EEA2C0,
295  .ln2_out = 0x55EEB100,
296  .w1 = 0x55EEBF40,
297  .w2 = 0x564A2F80,
298  .mlp_out = 0x5680BCC0,
299  .output = 0x5680CB00,
300  },
301  [9] = {
302  .ln1_gamma = 0x5680D9C0,
303  .ln1_out = 0x5680E800,
304  .wq = 0x5680F640,
305  .bq = 0x56896280,
306  .wk = 0x568970C0,
307  .bk = 0x568AA500,
308  .wv = 0x568AA740,
309  .bv = 0x568C8380,
310  .q = 0x568C85C0,
311  .k = 0x568C9400,
312  .v = 0x5A8C9440,
313  .wo = 0x5E8C9480,
314  .proj_tmp = 0x5E9500C0,
315  .proj_scratch = 0x5E950F00,
316  .residual1 = 0x5E951D40,
317  .ln2_gamma = 0x5E952B80,
318  .ln2_out = 0x5E9539C0,
319  .w1 = 0x5E954800,
320  .w2 = 0x5EF0B840,
321  .mlp_out = 0x5F274580,
322  .output = 0x5F2753C0,
323  },
324  [10] = {
325  .ln1_gamma = 0x5F276280,
326  .ln1_out = 0x5F2770C0,
327  .wq = 0x5F277F00,
328  .bq = 0x5F2FEB40,
329  .wk = 0x5F2FF980,
330  .bk = 0x5F312DC0,
331  .wv = 0x5F313000,
332  .bv = 0x5F330C40,
333  .q = 0x5F330E80,
334  .k = 0x5F331CC0,
335  .v = 0x63331D00,
336  .wo = 0x67331D40,
337  .proj_tmp = 0x673B8980,
338  .proj_scratch = 0x673B97C0,
339  .residual1 = 0x673BA600,
340  .ln2_gamma = 0x673BB440,
341  .ln2_out = 0x673BC280,
342  .w1 = 0x673BD0C0,
343  .w2 = 0x67974100,
344  .mlp_out = 0x67CDCE40,
345  .output = 0x67CDDC80,
346  },
347  [11] = {
348  .ln1_gamma = 0x67CDEB40,
349  .ln1_out = 0x67CDF980,
350  .wq = 0x67CE07C0,
351  .bq = 0x67D67400,
352  .wk = 0x67D68240,
353  .bk = 0x67D7B680,
354  .wv = 0x67D7B8C0,
355  .bv = 0x67D8ED00,
356  .q = 0x67D8EF40,
357  .k = 0x67D8FD80,
358  .v = 0x6BD8FDC0,
359  .wo = 0x6FD8FE00,
360  .proj_tmp = 0x6FE16A40,
361  .proj_scratch = 0x6FE17880,
362  .residual1 = 0x6FE186C0,
363  .ln2_gamma = 0x6FE19500,
364  .ln2_out = 0x6FE1A340,
365  .w1 = 0x6FE1B180,
366  .w2 = 0x703D21C0,
367  .mlp_out = 0x70628A00,
368  .output = 0x70629840,
369  },
370  [12] = {
371  .ln1_gamma = 0x7062A700,
372  .ln1_out = 0x7062B540,
373  .wq = 0x7062C380,
374  .bq = 0x706B2FC0,
375  .wk = 0x706B3E00,
376  .bk = 0x706C7240,
377  .wv = 0x706C7480,
378  .bv = 0x706DA8C0,
379  .q = 0x706DAB00,
380  .k = 0x706DB940,
381  .v = 0x746DB980,
382  .wo = 0x786DB9C0,
383  .proj_tmp = 0x78762600,
384  .proj_scratch = 0x78763440,
385  .residual1 = 0x78764280,
386  .ln2_gamma = 0x787650C0,
387  .ln2_out = 0x78765F00,
388  .w1 = 0x78766D40,
389  .w2 = 0x78D1DD80,
390  .mlp_out = 0x78F745C0,
391  .output = 0x78F75400,
392  },
393  [13] = {
394  .ln1_gamma = 0x78F762C0,
395  .ln1_out = 0x78F77100,
396  .wq = 0x78F77F40,
397  .bq = 0x78FFEB80,
398  .wk = 0x78FFF9C0,
399  .bk = 0x79012E00,
400  .wv = 0x79013040,
401  .bv = 0x79030C80,
402  .q = 0x79030EC0,
403  .k = 0x79031D00,
404  .v = 0x7D031D40,
405  .wo = 0x81031D80,
406  .proj_tmp = 0x810B89C0,
407  .proj_scratch = 0x810B9800,
408  .residual1 = 0x810BA640,
409  .ln2_gamma = 0x810BB480,
410  .ln2_out = 0x810BC2C0,
411  .w1 = 0x810BD100,
412  .w2 = 0x81674140,
413  .mlp_out = 0x819DCE80,
414  .output = 0x819DDCC0,
415  },
416  [14] = {
417  .ln1_gamma = 0x819DEB80,
418  .ln1_out = 0x819DF9C0,
419  .wq = 0x819E0800,
420  .bq = 0x81A67440,
421  .wk = 0x81A68280,
422  .bk = 0x81A7B6C0,
423  .wv = 0x81A7B900,
424  .bv = 0x81A8ED40,
425  .q = 0x81A8EF80,
426  .k = 0x81A8FDC0,
427  .v = 0x85A8FE00,
428  .wo = 0x89A8FE40,
429  .proj_tmp = 0x89B16A80,
430  .proj_scratch = 0x89B178C0,
431  .residual1 = 0x89B18700,
432  .ln2_gamma = 0x89B19540,
433  .ln2_out = 0x89B1A380,
434  .w1 = 0x89B1B1C0,
435  .w2 = 0x8A0D2200,
436  .mlp_out = 0x8A328A40,
437  .output = 0x8A329880,
438  },
439  [15] = {
440  .ln1_gamma = 0x8A32A740,
441  .ln1_out = 0x8A32B580,
442  .wq = 0x8A32C3C0,
443  .bq = 0x8A3B3000,
444  .wk = 0x8A3B3E40,
445  .bk = 0x8A3C7280,
446  .wv = 0x8A3C74C0,
447  .bv = 0x8A3DA900,
448  .q = 0x8A3DAB40,
449  .k = 0x8A3DB980,
450  .v = 0x8E3DB9C0,
451  .wo = 0x923DBA00,
452  .proj_tmp = 0x92462640,
453  .proj_scratch = 0x92463480,
454  .residual1 = 0x924642C0,
455  .ln2_gamma = 0x92465100,
456  .ln2_out = 0x92465F40,
457  .w1 = 0x92466D80,
458  .w2 = 0x92A1DDC0,
459  .mlp_out = 0x92C74600,
460  .output = 0x92C75440,
461  },
462  [16] = {
463  .ln1_gamma = 0x92C76300,
464  .ln1_out = 0x92C77140,
465  .wq = 0x92C77F80,
466  .bq = 0x92CFEBC0,
467  .wk = 0x92CFFA00,
468  .bk = 0x92D12E40,
469  .wv = 0x92D13080,
470  .bv = 0x92D30CC0,
471  .q = 0x92D30F00,
472  .k = 0x92D31D40,
473  .v = 0x96D31D80,
474  .wo = 0x9AD31DC0,
475  .proj_tmp = 0x9ADB8A00,
476  .proj_scratch = 0x9ADB9840,
477  .residual1 = 0x9ADBA680,
478  .ln2_gamma = 0x9ADBB4C0,
479  .ln2_out = 0x9ADBC300,
480  .w1 = 0x9ADBD140,
481  .w2 = 0x9B374180,
482  .mlp_out = 0x9B6DCEC0,
483  .output = 0x9B6DDD00,
484  },
485  [17] = {
486  .ln1_gamma = 0x9B6DEBC0,
487  .ln1_out = 0x9B6DFA00,
488  .wq = 0x9B6E0840,
489  .bq = 0x9B767480,
490  .wk = 0x9B7682C0,
491  .bk = 0x9B77B700,
492  .wv = 0x9B77B940,
493  .bv = 0x9B78ED80,
494  .q = 0x9B78EFC0,
495  .k = 0x9B78FE00,
496  .v = 0x9F78FE40,
497  .wo = 0xA378FE80,
498  .proj_tmp = 0xA3816AC0,
499  .proj_scratch = 0xA3817900,
500  .residual1 = 0xA3818740,
501  .ln2_gamma = 0xA3819580,
502  .ln2_out = 0xA381A3C0,
503  .w1 = 0xA381B200,
504  .w2 = 0xA3DD2240,
505  .mlp_out = 0xA4028A80,
506  .output = 0xA40298C0,
507  },
508  [18] = {
509  .ln1_gamma = 0xA402A780,
510  .ln1_out = 0xA402B5C0,
511  .wq = 0xA402C400,
512  .bq = 0xA40B3040,
513  .wk = 0xA40B3E80,
514  .bk = 0xA40C72C0,
515  .wv = 0xA40C7500,
516  .bv = 0xA40DA940,
517  .q = 0xA40DAB80,
518  .k = 0xA40DB9C0,
519  .v = 0xA80DBA00,
520  .wo = 0xAC0DBA40,
521  .proj_tmp = 0xAC162680,
522  .proj_scratch = 0xAC1634C0,
523  .residual1 = 0xAC164300,
524  .ln2_gamma = 0xAC165140,
525  .ln2_out = 0xAC165F80,
526  .w1 = 0xAC166DC0,
527  .w2 = 0xAC71DE00,
528  .mlp_out = 0xAC974640,
529  .output = 0xAC975480,
530  },
531  [19] = {
532  .ln1_gamma = 0xAC976340,
533  .ln1_out = 0xAC977180,
534  .wq = 0xAC977FC0,
535  .bq = 0xAC9FEC00,
536  .wk = 0xAC9FFA40,
537  .bk = 0xACA12E80,
538  .wv = 0xACA130C0,
539  .bv = 0xACA30D00,
540  .q = 0xACA30F40,
541  .k = 0xACA31D80,
542  .v = 0xB0A31DC0,
543  .wo = 0xB4A31E00,
544  .proj_tmp = 0xB4AB8A40,
545  .proj_scratch = 0xB4AB9880,
546  .residual1 = 0xB4ABA6C0,
547  .ln2_gamma = 0xB4ABB500,
548  .ln2_out = 0xB4ABC340,
549  .w1 = 0xB4ABD180,
550  .w2 = 0xB50741C0,
551  .mlp_out = 0xB53DCF00,
552  .output = 0xB53DDD40,
553  },
554  [20] = {
555  .ln1_gamma = 0xB53DEC00,
556  .ln1_out = 0xB53DFA40,
557  .wq = 0xB53E0880,
558  .bq = 0xB54674C0,
559  .wk = 0xB5468300,
560  .bk = 0xB547B740,
561  .wv = 0xB547B980,
562  .bv = 0xB548EDC0,
563  .q = 0xB548F000,
564  .k = 0xB548FE40,
565  .v = 0xB948FE80,
566  .wo = 0xBD48FEC0,
567  .proj_tmp = 0xBD516B00,
568  .proj_scratch = 0xBD517940,
569  .residual1 = 0xBD518780,
570  .ln2_gamma = 0xBD5195C0,
571  .ln2_out = 0xBD51A400,
572  .w1 = 0xBD51B240,
573  .w2 = 0xBDAD2280,
574  .mlp_out = 0xBDD28AC0,
575  .output = 0xBDD29900,
576  },
577  [21] = {
578  .ln1_gamma = 0xBDD2A7C0,
579  .ln1_out = 0xBDD2B600,
580  .wq = 0xBDD2C440,
581  .bq = 0xBDDB3080,
582  .wk = 0xBDDB3EC0,
583  .bk = 0xBDDC7300,
584  .wv = 0xBDDC7540,
585  .bv = 0xBDDE5180,
586  .q = 0xBDDE53C0,
587  .k = 0xBDDE6200,
588  .v = 0xC1DE6240,
589  .wo = 0xC5DE6280,
590  .proj_tmp = 0xC5E6CEC0,
591  .proj_scratch = 0xC5E6DD00,
592  .residual1 = 0xC5E6EB40,
593  .ln2_gamma = 0xC5E6F980,
594  .ln2_out = 0xC5E707C0,
595  .w1 = 0xC5E71600,
596  .w2 = 0xC6428640,
597  .mlp_out = 0xC6791380,
598  .output = 0xC67921C0,
599  },
600  [22] = {
601  .ln1_gamma = 0xC6793080,
602  .ln1_out = 0xC6793EC0,
603  .wq = 0xC6794D00,
604  .bq = 0xC681B940,
605  .wk = 0xC681C780,
606  .bk = 0xC682FBC0,
607  .wv = 0xC682FE00,
608  .bv = 0xC6843240,
609  .q = 0xC6843480,
610  .k = 0xC68442C0,
611  .v = 0xCA844300,
612  .wo = 0xCE844340,
613  .proj_tmp = 0xCE8CAF80,
614  .proj_scratch = 0xCE8CBDC0,
615  .residual1 = 0xCE8CCC00,
616  .ln2_gamma = 0xCE8CDA40,
617  .ln2_out = 0xCE8CE880,
618  .w1 = 0xCE8CF6C0,
619  .w2 = 0xCEE86700,
620  .mlp_out = 0xCF0DCF40,
621  .output = 0xCF0DDD80,
622  },
623  [23] = {
624  .ln1_gamma = 0xCF0DEC40,
625  .ln1_out = 0xCF0DFA80,
626  .wq = 0xCF0E08C0,
627  .bq = 0xCF167500,
628  .wk = 0xCF168340,
629  .bk = 0xCF17B780,
630  .wv = 0xCF17B9C0,
631  .bv = 0xCF18EE00,
632  .q = 0xCF18F040,
633  .k = 0xCF18FE80,
634  .v = 0xD318FEC0,
635  .wo = 0xD718FF00,
636  .proj_tmp = 0xD7216B40,
637  .proj_scratch = 0xD7217980,
638  .residual1 = 0xD72187C0,
639  .ln2_gamma = 0xD7219600,
640  .ln2_out = 0xD721A440,
641  .w1 = 0xD721B280,
642  .w2 = 0xD77D22C0,
643  .mlp_out = 0xD7A28B00,
644  .output = 0xD7A29940,
645  },
646 };
647 
648 #define QWEN2_0_5B_DECODE_LAYER_STRIDE 0x08A688C0ULL
649 
650 /* ============================================================================
651  * PER-LAYER DTYPE ARRAYS (mixed quantization)
652  * ============================================================================ */
653 
654 #include "ckernel_dtype.h" /* For CKDataType */
655 
658 };
659 
662 };
663 
666 };
667 
670 };
671 
674 };
675 
678 };
679 
680 /* ============================================================================
681  * FOOTER OFFSETS
682  * ============================================================================ */
683 
684 typedef struct {
685  size_t final_ln_weight; /* [896] */
686  size_t final_output; /* [1, 896] */
687  size_t lm_head_weight; /* [151936, 896] */
688  size_t logits; /* [1, 151936] */
690 
692  .final_ln_weight = 0xD7A2A800,
693  .final_output = 0xD7A2B640,
694  .lm_head_weight = 0x00000080, /* TIED to token_emb */
695  .logits = 0xD7A2C480,
696 };
697 
698 /* ============================================================================
699  * GLOBAL OFFSETS
700  * ============================================================================ */
701 
702 typedef struct {
703  size_t rope_cos_cache; /* [131072, 32] */
704  size_t rope_sin_cache; /* [131072, 32] */
706 
708  .rope_cos_cache = 0xD7AC0B00,
709  .rope_sin_cache = 0xD8AC0B40,
710 };
711 
712 /* ============================================================================
713  * CANARY OFFSETS
714  * ============================================================================ */
715 
716 typedef struct {
717  size_t offset;
718  const char *name;
720 
722  {0x00000040, "header_start"},
723  {0x089F1480, "token_emb_end"},
724  {0x08A85AC0, "vocab_offsets_end"},
725  {0x08BFAA40, "vocab_strings_end"},
726  {0x08DB6300, "vocab_merges_end"},
727  {0x08DB7140, "embedded_input_end"},
728  {0x08DB7180, "header_end"},
729  {0x08DB71C0, "layer_0_start"},
730  {0x08DB8000, "layer.0.ln1_gamma_end"},
731  {0x08DB8E40, "layer.0.ln1_out_end"},
732  {0x08E3FA80, "layer.0.wq_end"},
733  {0x08E408C0, "layer.0.bq_end"},
734  {0x08E53D00, "layer.0.wk_end"},
735  {0x08E53F40, "layer.0.bk_end"},
736  {0x08E71B80, "layer.0.wv_end"},
737  {0x08E71DC0, "layer.0.bv_end"},
738  {0x08E72C00, "layer.0.q_end"},
739  {0x0CE72C40, "layer.0.k_end"},
740  {0x10E72C80, "layer.0.v_end"},
741  {0x10EF98C0, "layer.0.wo_end"},
742  {0x10EFA700, "layer.0.proj_tmp_end"},
743  {0x10EFB540, "layer.0.proj_scratch_end"},
744  {0x10EFC380, "layer.0.residual1_end"},
745  {0x10EFD1C0, "layer.0.ln2_gamma_end"},
746  {0x10EFE000, "layer.0.ln2_out_end"},
747  {0x114B5040, "layer.0.w1_end"},
748  {0x1181DD80, "layer.0.w2_end"},
749  {0x1181EBC0, "layer.0.mlp_out_end"},
750  {0x1181FA00, "layer.0.output_end"},
751  {0x1181FA40, "layer_0_end"},
752  {0x1181FA80, "layer_1_start"},
753  {0x118208C0, "layer.1.ln1_gamma_end"},
754  {0x11821700, "layer.1.ln1_out_end"},
755  {0x118A8340, "layer.1.wq_end"},
756  {0x118A9180, "layer.1.bq_end"},
757  {0x118BC5C0, "layer.1.wk_end"},
758  {0x118BC800, "layer.1.bk_end"},
759  {0x118DA440, "layer.1.wv_end"},
760  {0x118DA680, "layer.1.bv_end"},
761  {0x118DB4C0, "layer.1.q_end"},
762  {0x158DB500, "layer.1.k_end"},
763  {0x198DB540, "layer.1.v_end"},
764  {0x19962180, "layer.1.wo_end"},
765  {0x19962FC0, "layer.1.proj_tmp_end"},
766  {0x19963E00, "layer.1.proj_scratch_end"},
767  {0x19964C40, "layer.1.residual1_end"},
768  {0x19965A80, "layer.1.ln2_gamma_end"},
769  {0x199668C0, "layer.1.ln2_out_end"},
770  {0x19F1D900, "layer.1.w1_end"},
771  {0x1A286640, "layer.1.w2_end"},
772  {0x1A287480, "layer.1.mlp_out_end"},
773  {0x1A2882C0, "layer.1.output_end"},
774  {0x1A288300, "layer_1_end"},
775  {0x1A288340, "layer_2_start"},
776  {0x1A289180, "layer.2.ln1_gamma_end"},
777  {0x1A289FC0, "layer.2.ln1_out_end"},
778  {0x1A310C00, "layer.2.wq_end"},
779  {0x1A311A40, "layer.2.bq_end"},
780  {0x1A324E80, "layer.2.wk_end"},
781  {0x1A3250C0, "layer.2.bk_end"},
782  {0x1A338500, "layer.2.wv_end"},
783  {0x1A338740, "layer.2.bv_end"},
784  {0x1A339580, "layer.2.q_end"},
785  {0x1E3395C0, "layer.2.k_end"},
786  {0x22339600, "layer.2.v_end"},
787  {0x223C0240, "layer.2.wo_end"},
788  {0x223C1080, "layer.2.proj_tmp_end"},
789  {0x223C1EC0, "layer.2.proj_scratch_end"},
790  {0x223C2D00, "layer.2.residual1_end"},
791  {0x223C3B40, "layer.2.ln2_gamma_end"},
792  {0x223C4980, "layer.2.ln2_out_end"},
793  {0x2297B9C0, "layer.2.w1_end"},
794  {0x22BD2200, "layer.2.w2_end"},
795  {0x22BD3040, "layer.2.mlp_out_end"},
796  {0x22BD3E80, "layer.2.output_end"},
797  {0x22BD3EC0, "layer_2_end"},
798  {0x22BD3F00, "layer_3_start"},
799  {0x22BD4D40, "layer.3.ln1_gamma_end"},
800  {0x22BD5B80, "layer.3.ln1_out_end"},
801  {0x22C5C7C0, "layer.3.wq_end"},
802  {0x22C5D600, "layer.3.bq_end"},
803  {0x22C70A40, "layer.3.wk_end"},
804  {0x22C70C80, "layer.3.bk_end"},
805  {0x22C8E8C0, "layer.3.wv_end"},
806  {0x22C8EB00, "layer.3.bv_end"},
807  {0x22C8F940, "layer.3.q_end"},
808  {0x26C8F980, "layer.3.k_end"},
809  {0x2AC8F9C0, "layer.3.v_end"},
810  {0x2AD16600, "layer.3.wo_end"},
811  {0x2AD17440, "layer.3.proj_tmp_end"},
812  {0x2AD18280, "layer.3.proj_scratch_end"},
813  {0x2AD190C0, "layer.3.residual1_end"},
814  {0x2AD19F00, "layer.3.ln2_gamma_end"},
815  {0x2AD1AD40, "layer.3.ln2_out_end"},
816  {0x2B2D1D80, "layer.3.w1_end"},
817  {0x2B63AAC0, "layer.3.w2_end"},
818  {0x2B63B900, "layer.3.mlp_out_end"},
819  {0x2B63C740, "layer.3.output_end"},
820  {0x2B63C780, "layer_3_end"},
821  {0x2B63C7C0, "layer_4_start"},
822  {0x2B63D600, "layer.4.ln1_gamma_end"},
823  {0x2B63E440, "layer.4.ln1_out_end"},
824  {0x2B6C5080, "layer.4.wq_end"},
825  {0x2B6C5EC0, "layer.4.bq_end"},
826  {0x2B6D9300, "layer.4.wk_end"},
827  {0x2B6D9540, "layer.4.bk_end"},
828  {0x2B6EC980, "layer.4.wv_end"},
829  {0x2B6ECBC0, "layer.4.bv_end"},
830  {0x2B6EDA00, "layer.4.q_end"},
831  {0x2F6EDA40, "layer.4.k_end"},
832  {0x336EDA80, "layer.4.v_end"},
833  {0x337746C0, "layer.4.wo_end"},
834  {0x33775500, "layer.4.proj_tmp_end"},
835  {0x33776340, "layer.4.proj_scratch_end"},
836  {0x33777180, "layer.4.residual1_end"},
837  {0x33777FC0, "layer.4.ln2_gamma_end"},
838  {0x33778E00, "layer.4.ln2_out_end"},
839  {0x33D2FE40, "layer.4.w1_end"},
840  {0x33F86680, "layer.4.w2_end"},
841  {0x33F874C0, "layer.4.mlp_out_end"},
842  {0x33F88300, "layer.4.output_end"},
843  {0x33F88340, "layer_4_end"},
844  {0x33F88380, "layer_5_start"},
845  {0x33F891C0, "layer.5.ln1_gamma_end"},
846  {0x33F8A000, "layer.5.ln1_out_end"},
847  {0x34010C40, "layer.5.wq_end"},
848  {0x34011A80, "layer.5.bq_end"},
849  {0x34024EC0, "layer.5.wk_end"},
850  {0x34025100, "layer.5.bk_end"},
851  {0x34038540, "layer.5.wv_end"},
852  {0x34038780, "layer.5.bv_end"},
853  {0x340395C0, "layer.5.q_end"},
854  {0x38039600, "layer.5.k_end"},
855  {0x3C039640, "layer.5.v_end"},
856  {0x3C0C0280, "layer.5.wo_end"},
857  {0x3C0C10C0, "layer.5.proj_tmp_end"},
858  {0x3C0C1F00, "layer.5.proj_scratch_end"},
859  {0x3C0C2D40, "layer.5.residual1_end"},
860  {0x3C0C3B80, "layer.5.ln2_gamma_end"},
861  {0x3C0C49C0, "layer.5.ln2_out_end"},
862  {0x3C67BA00, "layer.5.w1_end"},
863  {0x3C8D2240, "layer.5.w2_end"},
864  {0x3C8D3080, "layer.5.mlp_out_end"},
865  {0x3C8D3EC0, "layer.5.output_end"},
866  {0x3C8D3F00, "layer_5_end"},
867  {0x3C8D3F40, "layer_6_start"},
868  {0x3C8D4D80, "layer.6.ln1_gamma_end"},
869  {0x3C8D5BC0, "layer.6.ln1_out_end"},
870  {0x3C95C800, "layer.6.wq_end"},
871  {0x3C95D640, "layer.6.bq_end"},
872  {0x3C970A80, "layer.6.wk_end"},
873  {0x3C970CC0, "layer.6.bk_end"},
874  {0x3C98E900, "layer.6.wv_end"},
875  {0x3C98EB40, "layer.6.bv_end"},
876  {0x3C98F980, "layer.6.q_end"},
877  {0x4098F9C0, "layer.6.k_end"},
878  {0x4498FA00, "layer.6.v_end"},
879  {0x44A16640, "layer.6.wo_end"},
880  {0x44A17480, "layer.6.proj_tmp_end"},
881  {0x44A182C0, "layer.6.proj_scratch_end"},
882  {0x44A19100, "layer.6.residual1_end"},
883  {0x44A19F40, "layer.6.ln2_gamma_end"},
884  {0x44A1AD80, "layer.6.ln2_out_end"},
885  {0x44FD1DC0, "layer.6.w1_end"},
886  {0x4533AB00, "layer.6.w2_end"},
887  {0x4533B940, "layer.6.mlp_out_end"},
888  {0x4533C780, "layer.6.output_end"},
889  {0x4533C7C0, "layer_6_end"},
890  {0x4533C800, "layer_7_start"},
891  {0x4533D640, "layer.7.ln1_gamma_end"},
892  {0x4533E480, "layer.7.ln1_out_end"},
893  {0x453C50C0, "layer.7.wq_end"},
894  {0x453C5F00, "layer.7.bq_end"},
895  {0x453D9340, "layer.7.wk_end"},
896  {0x453D9580, "layer.7.bk_end"},
897  {0x453F71C0, "layer.7.wv_end"},
898  {0x453F7400, "layer.7.bv_end"},
899  {0x453F8240, "layer.7.q_end"},
900  {0x493F8280, "layer.7.k_end"},
901  {0x4D3F82C0, "layer.7.v_end"},
902  {0x4D47EF00, "layer.7.wo_end"},
903  {0x4D47FD40, "layer.7.proj_tmp_end"},
904  {0x4D480B80, "layer.7.proj_scratch_end"},
905  {0x4D4819C0, "layer.7.residual1_end"},
906  {0x4D482800, "layer.7.ln2_gamma_end"},
907  {0x4D483640, "layer.7.ln2_out_end"},
908  {0x4DA3A680, "layer.7.w1_end"},
909  {0x4DDA33C0, "layer.7.w2_end"},
910  {0x4DDA4200, "layer.7.mlp_out_end"},
911  {0x4DDA5040, "layer.7.output_end"},
912  {0x4DDA5080, "layer_7_end"},
913  {0x4DDA50C0, "layer_8_start"},
914  {0x4DDA5F00, "layer.8.ln1_gamma_end"},
915  {0x4DDA6D40, "layer.8.ln1_out_end"},
916  {0x4DE2D980, "layer.8.wq_end"},
917  {0x4DE2E7C0, "layer.8.bq_end"},
918  {0x4DE41C00, "layer.8.wk_end"},
919  {0x4DE41E40, "layer.8.bk_end"},
920  {0x4DE5FA80, "layer.8.wv_end"},
921  {0x4DE5FCC0, "layer.8.bv_end"},
922  {0x4DE60B00, "layer.8.q_end"},
923  {0x51E60B40, "layer.8.k_end"},
924  {0x55E60B80, "layer.8.v_end"},
925  {0x55EE77C0, "layer.8.wo_end"},
926  {0x55EE8600, "layer.8.proj_tmp_end"},
927  {0x55EE9440, "layer.8.proj_scratch_end"},
928  {0x55EEA280, "layer.8.residual1_end"},
929  {0x55EEB0C0, "layer.8.ln2_gamma_end"},
930  {0x55EEBF00, "layer.8.ln2_out_end"},
931  {0x564A2F40, "layer.8.w1_end"},
932  {0x5680BC80, "layer.8.w2_end"},
933  {0x5680CAC0, "layer.8.mlp_out_end"},
934  {0x5680D900, "layer.8.output_end"},
935  {0x5680D940, "layer_8_end"},
936  {0x5680D980, "layer_9_start"},
937  {0x5680E7C0, "layer.9.ln1_gamma_end"},
938  {0x5680F600, "layer.9.ln1_out_end"},
939  {0x56896240, "layer.9.wq_end"},
940  {0x56897080, "layer.9.bq_end"},
941  {0x568AA4C0, "layer.9.wk_end"},
942  {0x568AA700, "layer.9.bk_end"},
943  {0x568C8340, "layer.9.wv_end"},
944  {0x568C8580, "layer.9.bv_end"},
945  {0x568C93C0, "layer.9.q_end"},
946  {0x5A8C9400, "layer.9.k_end"},
947  {0x5E8C9440, "layer.9.v_end"},
948  {0x5E950080, "layer.9.wo_end"},
949  {0x5E950EC0, "layer.9.proj_tmp_end"},
950  {0x5E951D00, "layer.9.proj_scratch_end"},
951  {0x5E952B40, "layer.9.residual1_end"},
952  {0x5E953980, "layer.9.ln2_gamma_end"},
953  {0x5E9547C0, "layer.9.ln2_out_end"},
954  {0x5EF0B800, "layer.9.w1_end"},
955  {0x5F274540, "layer.9.w2_end"},
956  {0x5F275380, "layer.9.mlp_out_end"},
957  {0x5F2761C0, "layer.9.output_end"},
958  {0x5F276200, "layer_9_end"},
959  {0x5F276240, "layer_10_start"},
960  {0x5F277080, "layer.10.ln1_gamma_end"},
961  {0x5F277EC0, "layer.10.ln1_out_end"},
962  {0x5F2FEB00, "layer.10.wq_end"},
963  {0x5F2FF940, "layer.10.bq_end"},
964  {0x5F312D80, "layer.10.wk_end"},
965  {0x5F312FC0, "layer.10.bk_end"},
966  {0x5F330C00, "layer.10.wv_end"},
967  {0x5F330E40, "layer.10.bv_end"},
968  {0x5F331C80, "layer.10.q_end"},
969  {0x63331CC0, "layer.10.k_end"},
970  {0x67331D00, "layer.10.v_end"},
971  {0x673B8940, "layer.10.wo_end"},
972  {0x673B9780, "layer.10.proj_tmp_end"},
973  {0x673BA5C0, "layer.10.proj_scratch_end"},
974  {0x673BB400, "layer.10.residual1_end"},
975  {0x673BC240, "layer.10.ln2_gamma_end"},
976  {0x673BD080, "layer.10.ln2_out_end"},
977  {0x679740C0, "layer.10.w1_end"},
978  {0x67CDCE00, "layer.10.w2_end"},
979  {0x67CDDC40, "layer.10.mlp_out_end"},
980  {0x67CDEA80, "layer.10.output_end"},
981  {0x67CDEAC0, "layer_10_end"},
982  {0x67CDEB00, "layer_11_start"},
983  {0x67CDF940, "layer.11.ln1_gamma_end"},
984  {0x67CE0780, "layer.11.ln1_out_end"},
985  {0x67D673C0, "layer.11.wq_end"},
986  {0x67D68200, "layer.11.bq_end"},
987  {0x67D7B640, "layer.11.wk_end"},
988  {0x67D7B880, "layer.11.bk_end"},
989  {0x67D8ECC0, "layer.11.wv_end"},
990  {0x67D8EF00, "layer.11.bv_end"},
991  {0x67D8FD40, "layer.11.q_end"},
992  {0x6BD8FD80, "layer.11.k_end"},
993  {0x6FD8FDC0, "layer.11.v_end"},
994  {0x6FE16A00, "layer.11.wo_end"},
995  {0x6FE17840, "layer.11.proj_tmp_end"},
996  {0x6FE18680, "layer.11.proj_scratch_end"},
997  {0x6FE194C0, "layer.11.residual1_end"},
998  {0x6FE1A300, "layer.11.ln2_gamma_end"},
999  {0x6FE1B140, "layer.11.ln2_out_end"},
1000  {0x703D2180, "layer.11.w1_end"},
1001  {0x706289C0, "layer.11.w2_end"},
1002  {0x70629800, "layer.11.mlp_out_end"},
1003  {0x7062A640, "layer.11.output_end"},
1004  {0x7062A680, "layer_11_end"},
1005  {0x7062A6C0, "layer_12_start"},
1006  {0x7062B500, "layer.12.ln1_gamma_end"},
1007  {0x7062C340, "layer.12.ln1_out_end"},
1008  {0x706B2F80, "layer.12.wq_end"},
1009  {0x706B3DC0, "layer.12.bq_end"},
1010  {0x706C7200, "layer.12.wk_end"},
1011  {0x706C7440, "layer.12.bk_end"},
1012  {0x706DA880, "layer.12.wv_end"},
1013  {0x706DAAC0, "layer.12.bv_end"},
1014  {0x706DB900, "layer.12.q_end"},
1015  {0x746DB940, "layer.12.k_end"},
1016  {0x786DB980, "layer.12.v_end"},
1017  {0x787625C0, "layer.12.wo_end"},
1018  {0x78763400, "layer.12.proj_tmp_end"},
1019  {0x78764240, "layer.12.proj_scratch_end"},
1020  {0x78765080, "layer.12.residual1_end"},
1021  {0x78765EC0, "layer.12.ln2_gamma_end"},
1022  {0x78766D00, "layer.12.ln2_out_end"},
1023  {0x78D1DD40, "layer.12.w1_end"},
1024  {0x78F74580, "layer.12.w2_end"},
1025  {0x78F753C0, "layer.12.mlp_out_end"},
1026  {0x78F76200, "layer.12.output_end"},
1027  {0x78F76240, "layer_12_end"},
1028  {0x78F76280, "layer_13_start"},
1029  {0x78F770C0, "layer.13.ln1_gamma_end"},
1030  {0x78F77F00, "layer.13.ln1_out_end"},
1031  {0x78FFEB40, "layer.13.wq_end"},
1032  {0x78FFF980, "layer.13.bq_end"},
1033  {0x79012DC0, "layer.13.wk_end"},
1034  {0x79013000, "layer.13.bk_end"},
1035  {0x79030C40, "layer.13.wv_end"},
1036  {0x79030E80, "layer.13.bv_end"},
1037  {0x79031CC0, "layer.13.q_end"},
1038  {0x7D031D00, "layer.13.k_end"},
1039  {0x81031D40, "layer.13.v_end"},
1040  {0x810B8980, "layer.13.wo_end"},
1041  {0x810B97C0, "layer.13.proj_tmp_end"},
1042  {0x810BA600, "layer.13.proj_scratch_end"},
1043  {0x810BB440, "layer.13.residual1_end"},
1044  {0x810BC280, "layer.13.ln2_gamma_end"},
1045  {0x810BD0C0, "layer.13.ln2_out_end"},
1046  {0x81674100, "layer.13.w1_end"},
1047  {0x819DCE40, "layer.13.w2_end"},
1048  {0x819DDC80, "layer.13.mlp_out_end"},
1049  {0x819DEAC0, "layer.13.output_end"},
1050  {0x819DEB00, "layer_13_end"},
1051  {0x819DEB40, "layer_14_start"},
1052  {0x819DF980, "layer.14.ln1_gamma_end"},
1053  {0x819E07C0, "layer.14.ln1_out_end"},
1054  {0x81A67400, "layer.14.wq_end"},
1055  {0x81A68240, "layer.14.bq_end"},
1056  {0x81A7B680, "layer.14.wk_end"},
1057  {0x81A7B8C0, "layer.14.bk_end"},
1058  {0x81A8ED00, "layer.14.wv_end"},
1059  {0x81A8EF40, "layer.14.bv_end"},
1060  {0x81A8FD80, "layer.14.q_end"},
1061  {0x85A8FDC0, "layer.14.k_end"},
1062  {0x89A8FE00, "layer.14.v_end"},
1063  {0x89B16A40, "layer.14.wo_end"},
1064  {0x89B17880, "layer.14.proj_tmp_end"},
1065  {0x89B186C0, "layer.14.proj_scratch_end"},
1066  {0x89B19500, "layer.14.residual1_end"},
1067  {0x89B1A340, "layer.14.ln2_gamma_end"},
1068  {0x89B1B180, "layer.14.ln2_out_end"},
1069  {0x8A0D21C0, "layer.14.w1_end"},
1070  {0x8A328A00, "layer.14.w2_end"},
1071  {0x8A329840, "layer.14.mlp_out_end"},
1072  {0x8A32A680, "layer.14.output_end"},
1073  {0x8A32A6C0, "layer_14_end"},
1074  {0x8A32A700, "layer_15_start"},
1075  {0x8A32B540, "layer.15.ln1_gamma_end"},
1076  {0x8A32C380, "layer.15.ln1_out_end"},
1077  {0x8A3B2FC0, "layer.15.wq_end"},
1078  {0x8A3B3E00, "layer.15.bq_end"},
1079  {0x8A3C7240, "layer.15.wk_end"},
1080  {0x8A3C7480, "layer.15.bk_end"},
1081  {0x8A3DA8C0, "layer.15.wv_end"},
1082  {0x8A3DAB00, "layer.15.bv_end"},
1083  {0x8A3DB940, "layer.15.q_end"},
1084  {0x8E3DB980, "layer.15.k_end"},
1085  {0x923DB9C0, "layer.15.v_end"},
1086  {0x92462600, "layer.15.wo_end"},
1087  {0x92463440, "layer.15.proj_tmp_end"},
1088  {0x92464280, "layer.15.proj_scratch_end"},
1089  {0x924650C0, "layer.15.residual1_end"},
1090  {0x92465F00, "layer.15.ln2_gamma_end"},
1091  {0x92466D40, "layer.15.ln2_out_end"},
1092  {0x92A1DD80, "layer.15.w1_end"},
1093  {0x92C745C0, "layer.15.w2_end"},
1094  {0x92C75400, "layer.15.mlp_out_end"},
1095  {0x92C76240, "layer.15.output_end"},
1096  {0x92C76280, "layer_15_end"},
1097  {0x92C762C0, "layer_16_start"},
1098  {0x92C77100, "layer.16.ln1_gamma_end"},
1099  {0x92C77F40, "layer.16.ln1_out_end"},
1100  {0x92CFEB80, "layer.16.wq_end"},
1101  {0x92CFF9C0, "layer.16.bq_end"},
1102  {0x92D12E00, "layer.16.wk_end"},
1103  {0x92D13040, "layer.16.bk_end"},
1104  {0x92D30C80, "layer.16.wv_end"},
1105  {0x92D30EC0, "layer.16.bv_end"},
1106  {0x92D31D00, "layer.16.q_end"},
1107  {0x96D31D40, "layer.16.k_end"},
1108  {0x9AD31D80, "layer.16.v_end"},
1109  {0x9ADB89C0, "layer.16.wo_end"},
1110  {0x9ADB9800, "layer.16.proj_tmp_end"},
1111  {0x9ADBA640, "layer.16.proj_scratch_end"},
1112  {0x9ADBB480, "layer.16.residual1_end"},
1113  {0x9ADBC2C0, "layer.16.ln2_gamma_end"},
1114  {0x9ADBD100, "layer.16.ln2_out_end"},
1115  {0x9B374140, "layer.16.w1_end"},
1116  {0x9B6DCE80, "layer.16.w2_end"},
1117  {0x9B6DDCC0, "layer.16.mlp_out_end"},
1118  {0x9B6DEB00, "layer.16.output_end"},
1119  {0x9B6DEB40, "layer_16_end"},
1120  {0x9B6DEB80, "layer_17_start"},
1121  {0x9B6DF9C0, "layer.17.ln1_gamma_end"},
1122  {0x9B6E0800, "layer.17.ln1_out_end"},
1123  {0x9B767440, "layer.17.wq_end"},
1124  {0x9B768280, "layer.17.bq_end"},
1125  {0x9B77B6C0, "layer.17.wk_end"},
1126  {0x9B77B900, "layer.17.bk_end"},
1127  {0x9B78ED40, "layer.17.wv_end"},
1128  {0x9B78EF80, "layer.17.bv_end"},
1129  {0x9B78FDC0, "layer.17.q_end"},
1130  {0x9F78FE00, "layer.17.k_end"},
1131  {0xA378FE40, "layer.17.v_end"},
1132  {0xA3816A80, "layer.17.wo_end"},
1133  {0xA38178C0, "layer.17.proj_tmp_end"},
1134  {0xA3818700, "layer.17.proj_scratch_end"},
1135  {0xA3819540, "layer.17.residual1_end"},
1136  {0xA381A380, "layer.17.ln2_gamma_end"},
1137  {0xA381B1C0, "layer.17.ln2_out_end"},
1138  {0xA3DD2200, "layer.17.w1_end"},
1139  {0xA4028A40, "layer.17.w2_end"},
1140  {0xA4029880, "layer.17.mlp_out_end"},
1141  {0xA402A6C0, "layer.17.output_end"},
1142  {0xA402A700, "layer_17_end"},
1143  {0xA402A740, "layer_18_start"},
1144  {0xA402B580, "layer.18.ln1_gamma_end"},
1145  {0xA402C3C0, "layer.18.ln1_out_end"},
1146  {0xA40B3000, "layer.18.wq_end"},
1147  {0xA40B3E40, "layer.18.bq_end"},
1148  {0xA40C7280, "layer.18.wk_end"},
1149  {0xA40C74C0, "layer.18.bk_end"},
1150  {0xA40DA900, "layer.18.wv_end"},
1151  {0xA40DAB40, "layer.18.bv_end"},
1152  {0xA40DB980, "layer.18.q_end"},
1153  {0xA80DB9C0, "layer.18.k_end"},
1154  {0xAC0DBA00, "layer.18.v_end"},
1155  {0xAC162640, "layer.18.wo_end"},
1156  {0xAC163480, "layer.18.proj_tmp_end"},
1157  {0xAC1642C0, "layer.18.proj_scratch_end"},
1158  {0xAC165100, "layer.18.residual1_end"},
1159  {0xAC165F40, "layer.18.ln2_gamma_end"},
1160  {0xAC166D80, "layer.18.ln2_out_end"},
1161  {0xAC71DDC0, "layer.18.w1_end"},
1162  {0xAC974600, "layer.18.w2_end"},
1163  {0xAC975440, "layer.18.mlp_out_end"},
1164  {0xAC976280, "layer.18.output_end"},
1165  {0xAC9762C0, "layer_18_end"},
1166  {0xAC976300, "layer_19_start"},
1167  {0xAC977140, "layer.19.ln1_gamma_end"},
1168  {0xAC977F80, "layer.19.ln1_out_end"},
1169  {0xAC9FEBC0, "layer.19.wq_end"},
1170  {0xAC9FFA00, "layer.19.bq_end"},
1171  {0xACA12E40, "layer.19.wk_end"},
1172  {0xACA13080, "layer.19.bk_end"},
1173  {0xACA30CC0, "layer.19.wv_end"},
1174  {0xACA30F00, "layer.19.bv_end"},
1175  {0xACA31D40, "layer.19.q_end"},
1176  {0xB0A31D80, "layer.19.k_end"},
1177  {0xB4A31DC0, "layer.19.v_end"},
1178  {0xB4AB8A00, "layer.19.wo_end"},
1179  {0xB4AB9840, "layer.19.proj_tmp_end"},
1180  {0xB4ABA680, "layer.19.proj_scratch_end"},
1181  {0xB4ABB4C0, "layer.19.residual1_end"},
1182  {0xB4ABC300, "layer.19.ln2_gamma_end"},
1183  {0xB4ABD140, "layer.19.ln2_out_end"},
1184  {0xB5074180, "layer.19.w1_end"},
1185  {0xB53DCEC0, "layer.19.w2_end"},
1186  {0xB53DDD00, "layer.19.mlp_out_end"},
1187  {0xB53DEB40, "layer.19.output_end"},
1188  {0xB53DEB80, "layer_19_end"},
1189  {0xB53DEBC0, "layer_20_start"},
1190  {0xB53DFA00, "layer.20.ln1_gamma_end"},
1191  {0xB53E0840, "layer.20.ln1_out_end"},
1192  {0xB5467480, "layer.20.wq_end"},
1193  {0xB54682C0, "layer.20.bq_end"},
1194  {0xB547B700, "layer.20.wk_end"},
1195  {0xB547B940, "layer.20.bk_end"},
1196  {0xB548ED80, "layer.20.wv_end"},
1197  {0xB548EFC0, "layer.20.bv_end"},
1198  {0xB548FE00, "layer.20.q_end"},
1199  {0xB948FE40, "layer.20.k_end"},
1200  {0xBD48FE80, "layer.20.v_end"},
1201  {0xBD516AC0, "layer.20.wo_end"},
1202  {0xBD517900, "layer.20.proj_tmp_end"},
1203  {0xBD518740, "layer.20.proj_scratch_end"},
1204  {0xBD519580, "layer.20.residual1_end"},
1205  {0xBD51A3C0, "layer.20.ln2_gamma_end"},
1206  {0xBD51B200, "layer.20.ln2_out_end"},
1207  {0xBDAD2240, "layer.20.w1_end"},
1208  {0xBDD28A80, "layer.20.w2_end"},
1209  {0xBDD298C0, "layer.20.mlp_out_end"},
1210  {0xBDD2A700, "layer.20.output_end"},
1211  {0xBDD2A740, "layer_20_end"},
1212  {0xBDD2A780, "layer_21_start"},
1213  {0xBDD2B5C0, "layer.21.ln1_gamma_end"},
1214  {0xBDD2C400, "layer.21.ln1_out_end"},
1215  {0xBDDB3040, "layer.21.wq_end"},
1216  {0xBDDB3E80, "layer.21.bq_end"},
1217  {0xBDDC72C0, "layer.21.wk_end"},
1218  {0xBDDC7500, "layer.21.bk_end"},
1219  {0xBDDE5140, "layer.21.wv_end"},
1220  {0xBDDE5380, "layer.21.bv_end"},
1221  {0xBDDE61C0, "layer.21.q_end"},
1222  {0xC1DE6200, "layer.21.k_end"},
1223  {0xC5DE6240, "layer.21.v_end"},
1224  {0xC5E6CE80, "layer.21.wo_end"},
1225  {0xC5E6DCC0, "layer.21.proj_tmp_end"},
1226  {0xC5E6EB00, "layer.21.proj_scratch_end"},
1227  {0xC5E6F940, "layer.21.residual1_end"},
1228  {0xC5E70780, "layer.21.ln2_gamma_end"},
1229  {0xC5E715C0, "layer.21.ln2_out_end"},
1230  {0xC6428600, "layer.21.w1_end"},
1231  {0xC6791340, "layer.21.w2_end"},
1232  {0xC6792180, "layer.21.mlp_out_end"},
1233  {0xC6792FC0, "layer.21.output_end"},
1234  {0xC6793000, "layer_21_end"},
1235  {0xC6793040, "layer_22_start"},
1236  {0xC6793E80, "layer.22.ln1_gamma_end"},
1237  {0xC6794CC0, "layer.22.ln1_out_end"},
1238  {0xC681B900, "layer.22.wq_end"},
1239  {0xC681C740, "layer.22.bq_end"},
1240  {0xC682FB80, "layer.22.wk_end"},
1241  {0xC682FDC0, "layer.22.bk_end"},
1242  {0xC6843200, "layer.22.wv_end"},
1243  {0xC6843440, "layer.22.bv_end"},
1244  {0xC6844280, "layer.22.q_end"},
1245  {0xCA8442C0, "layer.22.k_end"},
1246  {0xCE844300, "layer.22.v_end"},
1247  {0xCE8CAF40, "layer.22.wo_end"},
1248  {0xCE8CBD80, "layer.22.proj_tmp_end"},
1249  {0xCE8CCBC0, "layer.22.proj_scratch_end"},
1250  {0xCE8CDA00, "layer.22.residual1_end"},
1251  {0xCE8CE840, "layer.22.ln2_gamma_end"},
1252  {0xCE8CF680, "layer.22.ln2_out_end"},
1253  {0xCEE866C0, "layer.22.w1_end"},
1254  {0xCF0DCF00, "layer.22.w2_end"},
1255  {0xCF0DDD40, "layer.22.mlp_out_end"},
1256  {0xCF0DEB80, "layer.22.output_end"},
1257  {0xCF0DEBC0, "layer_22_end"},
1258  {0xCF0DEC00, "layer_23_start"},
1259  {0xCF0DFA40, "layer.23.ln1_gamma_end"},
1260  {0xCF0E0880, "layer.23.ln1_out_end"},
1261  {0xCF1674C0, "layer.23.wq_end"},
1262  {0xCF168300, "layer.23.bq_end"},
1263  {0xCF17B740, "layer.23.wk_end"},
1264  {0xCF17B980, "layer.23.bk_end"},
1265  {0xCF18EDC0, "layer.23.wv_end"},
1266  {0xCF18F000, "layer.23.bv_end"},
1267  {0xCF18FE40, "layer.23.q_end"},
1268  {0xD318FE80, "layer.23.k_end"},
1269  {0xD718FEC0, "layer.23.v_end"},
1270  {0xD7216B00, "layer.23.wo_end"},
1271  {0xD7217940, "layer.23.proj_tmp_end"},
1272  {0xD7218780, "layer.23.proj_scratch_end"},
1273  {0xD72195C0, "layer.23.residual1_end"},
1274  {0xD721A400, "layer.23.ln2_gamma_end"},
1275  {0xD721B240, "layer.23.ln2_out_end"},
1276  {0xD77D2280, "layer.23.w1_end"},
1277  {0xD7A28AC0, "layer.23.w2_end"},
1278  {0xD7A29900, "layer.23.mlp_out_end"},
1279  {0xD7A2A740, "layer.23.output_end"},
1280  {0xD7A2A780, "layer_23_end"},
1281  {0xD7A2A7C0, "footer_start"},
1282  {0xD7A2B600, "final_ln_weight_end"},
1283  {0xD7A2C440, "final_output_end"},
1284  {0xD7AC0A80, "logits_end"},
1285  {0xD7AC0AC0, "footer_end"},
1286  {0xD8AC0B00, "rope_cos_cache_end"},
1287  {0xD9AC0B40, "rope_sin_cache_end"},
1288 };
1289 #define QWEN2_0_5B_DECODE_CANARY_COUNT 566
1290 
1291 /* ============================================================================
1292  * MODEL STRUCT
1293  * ============================================================================ */
1294 
1295 typedef struct {
1296  void *base;
1297  size_t total_bytes;
1299 
1300 /* ============================================================================
1301  * ACCESSOR MACROS
1302  * ============================================================================ */
1303 
1304 #define QWEN2_0_5B_DECODE_PTR(model, offset) \
1305  ((float*)((char*)(model)->base + (offset)))
1306 
1307 #define QWEN2_0_5B_DECODE_PTR_BF16(model, offset) \
1308  ((uint16_t*)((char*)(model)->base + (offset)))
1309 
1310 #define QWEN2_0_5B_DECODE_LAYER(layer_id) \
1311  (&QWEN2_0_5B_DECODE_LAYERS[layer_id])
1312 
1313 /* ============================================================================
1314  * API
1315  * ============================================================================ */
1316 
1320 void qwen2_0_5b_decode_forward(QWEN2_0_5B_DECODEModel *model, const int *tokens, int num_tokens);
1321 void qwen2_0_5b_decode_decode(QWEN2_0_5B_DECODEModel *model, const int *token, int token_index);
1322 
1323 #ifdef __cplusplus
1324 }
1325 #endif
1326 
1327 #endif /* GENERATED_QWEN2_0_5B_DECODE_H */
CKDataType
Supported data types in C-Kernel-Engine.
Definition: ckernel_dtype.h:27
@ CK_DT_Q4_K
Definition: ckernel_dtype.h:40
@ CK_DT_Q8_0
Definition: ckernel_dtype.h:42
@ CK_DT_Q5_0
Definition: ckernel_dtype.h:44
@ CK_DT_Q6_K
Definition: ckernel_dtype.h:41
const char * token
Definition: tokenizer.h:306
static const CKDataType QWEN2_0_5B_DECODE_LAYER_W2_DTYPE[]
int qwen2_0_5b_decode_verify_canaries(QWEN2_0_5B_DECODEModel *model)
static const CKDataType QWEN2_0_5B_DECODE_LAYER_WO_DTYPE[]
static const CKDataType QWEN2_0_5B_DECODE_LAYER_WK_DTYPE[]
static const QWEN2_0_5B_DECODEFooterOffsets QWEN2_0_5B_DECODE_FOOTER
int qwen2_0_5b_decode_model_allocate(QWEN2_0_5B_DECODEModel *model)
static const QWEN2_0_5B_DECODELayerOffsets QWEN2_0_5B_DECODE_LAYERS[24]
static const CKDataType QWEN2_0_5B_DECODE_LAYER_WV_DTYPE[]
void qwen2_0_5b_decode_decode(QWEN2_0_5B_DECODEModel *model, const int *token, int token_index)
static const CKDataType QWEN2_0_5B_DECODE_LAYER_WQ_DTYPE[]
static const CKDataType QWEN2_0_5B_DECODE_LAYER_W1_DTYPE[]
static const QWEN2_0_5B_DECODEGlobalOffsets QWEN2_0_5B_DECODE_GLOBALS
void qwen2_0_5b_decode_forward(QWEN2_0_5B_DECODEModel *model, const int *tokens, int num_tokens)
void qwen2_0_5b_decode_model_free(QWEN2_0_5B_DECODEModel *model)
static const QWEN2_0_5B_DECODEHeaderOffsets QWEN2_0_5B_DECODE_HEADER
static const QWEN2_0_5B_DECODECanary QWEN2_0_5B_DECODE_CANARIES[]