← Back to C-Kernel-Engine Docs Doxygen Source Documentation
ck_threadpool.h
Go to the documentation of this file.
1 /**
2  * @file ck_threadpool.h
3  * @brief Persistent pthread thread pool for CK-Engine inference
4  *
5  * Design goals:
6  * - Sub-microsecond dispatch latency (spin-wait barriers)
7  * - Zero allocation after init (all memory pre-allocated)
8  * - Cache-line aligned atomics to avoid false sharing
9  * - Hybrid polling: spin N rounds, then fall back to condvar
10  * - Thread 0 = main thread (does serial ops + its share of parallel work)
11  *
12  * Usage:
13  * ck_threadpool_t *pool = ck_threadpool_create(4); // 4 threads total
14  *
15  * // In decode loop:
16  * ck_threadpool_dispatch(pool, my_work_fn, args);
17  * // my_work_fn called on all threads with (ith, nth, args)
18  *
19  * // Between batches:
20  * ck_threadpool_pause(pool); // workers sleep (0% CPU)
21  * ck_threadpool_resume(pool); // wake workers
22  *
23  * ck_threadpool_destroy(pool);
24  *
25  * Architecture:
26  * STARTUP: Main creates N-1 worker pthreads, all spin on atomic counter
27  * DISPATCH: Main writes work desc, bumps counter, all threads execute
28  * BARRIER: Atomic counter + spin-wait with _mm_pause()
29  * PAUSE: Workers sleep on pthread_cond_t (0% CPU between batches)
30  */
31 
32 #ifndef CK_THREADPOOL_H
33 #define CK_THREADPOOL_H
34 
35 #include <stdint.h>
36 #include <stdatomic.h>
37 #include <pthread.h>
38 
39 #ifdef __cplusplus
40 extern "C" {
41 #endif
42 
43 /* ============================================================================
44  * Configuration
45  * ============================================================================ */
46 
47 /** Maximum threads supported (main + workers) */
48 #define CK_THREADPOOL_MAX_THREADS 64
49 
50 /** Number of spin iterations before falling back to condvar wait */
51 #define CK_THREADPOOL_SPIN_COUNT 1024
52 
53 /** Cache line size for alignment (x86-64) */
54 #define CK_CACHE_LINE 64
55 
56 /* ============================================================================
57  * Types
58  * ============================================================================ */
59 
60 /**
61  * Work function signature.
62  * Called on ALL threads (including main thread 0).
63  *
64  * @param ith Thread index (0 = main thread)
65  * @param nth Total number of threads
66  * @param args Opaque argument pointer (set via dispatch)
67  */
68 typedef void (*ck_work_fn_t)(int ith, int nth, void *args);
69 
70 /**
71  * Thread pool state (opaque).
72  *
73  * All atomics are cache-line aligned to prevent false sharing.
74  * Workers spin on n_dispatch, checking for new work or shutdown.
75  */
76 typedef struct ck_threadpool ck_threadpool_t;
77 
78 /* ============================================================================
79  * Lifecycle
80  * ============================================================================ */
81 
82 /**
83  * Create a thread pool with `n_threads` total threads.
84  * Thread 0 is the calling (main) thread; n_threads-1 workers are spawned.
85  *
86  * @param n_threads Total thread count (including main). Must be >= 1.
87  * Pass 0 for auto-detect (physical cores).
88  * @return Pool handle, or NULL on failure.
89  */
90 ck_threadpool_t *ck_threadpool_create(int n_threads);
91 
92 /**
93  * Destroy the thread pool. Signals all workers to exit and joins them.
94  * Safe to call with NULL.
95  */
96 void ck_threadpool_destroy(ck_threadpool_t *pool);
97 
98 /* ============================================================================
99  * Dispatch & Synchronization
100  * ============================================================================ */
101 
102 /**
103  * Dispatch work to all threads and wait for completion.
104  *
105  * 1. Sets the work function and args
106  * 2. Bumps the dispatch counter (wakes workers)
107  * 3. Main thread (ith=0) executes its share
108  * 4. Waits for all threads to complete via barrier
109  *
110  * This is a blocking call — returns when ALL threads have finished.
111  *
112  * @param pool Thread pool
113  * @param fn Work function (called on each thread)
114  * @param args Argument passed to fn
115  */
116 void ck_threadpool_dispatch(ck_threadpool_t *pool, ck_work_fn_t fn, void *args);
117 
118 /**
119  * Barrier synchronization within a dispatched work function.
120  *
121  * ALL threads must call this at the same point. Threads spin-wait
122  * until all have arrived, then proceed.
123  *
124  * Must only be called from within a work function (during dispatch).
125  *
126  * @param pool Thread pool
127  */
128 void ck_threadpool_barrier(ck_threadpool_t *pool);
129 
130 /* ============================================================================
131  * Power Management
132  * ============================================================================ */
133 
134 /**
135  * Pause workers — they sleep on condvar (0% CPU).
136  * Call between batches or during interactive waiting.
137  * Workers wake on next dispatch or resume.
138  */
139 void ck_threadpool_pause(ck_threadpool_t *pool);
140 
141 /**
142  * Resume workers — transition from sleep to spin-wait.
143  * Call before starting a new batch of work.
144  */
145 void ck_threadpool_resume(ck_threadpool_t *pool);
146 
147 /* ============================================================================
148  * Queries
149  * ============================================================================ */
150 
151 /** Get total thread count (including main thread) */
152 int ck_threadpool_n_threads(const ck_threadpool_t *pool);
153 
154 /** Get thread index for current thread (0 = main, -1 if not in pool) */
155 int ck_threadpool_thread_id(const ck_threadpool_t *pool);
156 
157 /* ============================================================================
158  * Global Thread Pool (convenience)
159  * ============================================================================ */
160 
161 /**
162  * Get or create the global thread pool.
163  * Thread-safe (uses pthread_once internally).
164  * Uses ck_get_num_threads() for auto-detection.
165  *
166  * @return Global pool, never NULL after successful first call.
167  */
168 ck_threadpool_t *ck_threadpool_global(void);
169 
170 /**
171  * Destroy the global thread pool.
172  * Called during engine shutdown.
173  */
175 
176 #ifdef __cplusplus
177 }
178 #endif
179 
180 #endif /* CK_THREADPOOL_H */
void ck_threadpool_pause(ck_threadpool_t *pool)
void ck_threadpool_resume(ck_threadpool_t *pool)
void ck_threadpool_global_destroy(void)
ck_threadpool_t * ck_threadpool_create(int n_threads)
void ck_threadpool_destroy(ck_threadpool_t *pool)
void ck_threadpool_barrier(ck_threadpool_t *pool)
void(* ck_work_fn_t)(int ith, int nth, void *args)
Definition: ck_threadpool.h:68
void ck_threadpool_dispatch(ck_threadpool_t *pool, ck_work_fn_t fn, void *args)
int ck_threadpool_thread_id(const ck_threadpool_t *pool)
int ck_threadpool_n_threads(const ck_threadpool_t *pool)
ck_threadpool_t * ck_threadpool_global(void)