← Back to C-Kernel-Engine Docs Doxygen Source Documentation
show_config.c
Go to the documentation of this file.
1 /*
2  * show_config.c - Display system configuration for C-Kernel-Engine
3  *
4  * Main program for `make show_config` that displays comprehensive
5  * hardware topology and recommendations for distributed training.
6  */
7 
8 #define _GNU_SOURCE
9 #include "system_topology.h"
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <string.h>
13 #include <stdarg.h>
14 #include <unistd.h>
15 
16 // ═══════════════════════════════════════════════════════════════════════════════
17 // ANSI Color Codes
18 // ═══════════════════════════════════════════════════════════════════════════════
19 
20 #define RESET "\033[0m"
21 #define BOLD "\033[1m"
22 #define DIM "\033[2m"
23 
24 #define RED "\033[31m"
25 #define GREEN "\033[32m"
26 #define YELLOW "\033[33m"
27 #define BLUE "\033[34m"
28 #define MAGENTA "\033[35m"
29 #define CYAN "\033[36m"
30 #define WHITE "\033[37m"
31 
32 #define BG_RED "\033[41m"
33 #define BG_GREEN "\033[42m"
34 #define BG_YELLOW "\033[43m"
35 
36 // Check if terminal supports colors
37 static int use_colors = 1;
38 
39 #define C(color) (use_colors ? color : "")
40 
41 // ═══════════════════════════════════════════════════════════════════════════════
42 // Helper Functions
43 // ═══════════════════════════════════════════════════════════════════════════════
44 
45 static const char* format_size(uint64_t size_mb, char *buf, size_t buf_size) {
46  if (size_mb >= 1024 * 1024) {
47  snprintf(buf, buf_size, "%.1f TB", size_mb / (1024.0 * 1024.0));
48  } else if (size_mb >= 1024) {
49  snprintf(buf, buf_size, "%.1f GB", size_mb / 1024.0);
50  } else {
51  snprintf(buf, buf_size, "%lu MB", (unsigned long)size_mb);
52  }
53  return buf;
54 }
55 
56 static const char* format_bandwidth(float bw_gbs, char *buf, size_t buf_size) {
57  if (bw_gbs >= 1.0f) {
58  snprintf(buf, buf_size, "%.1f GB/s", bw_gbs);
59  } else {
60  snprintf(buf, buf_size, "%.0f MB/s", bw_gbs * 1024);
61  }
62  return buf;
63 }
64 
65 static void print_header(const char *title) {
66  printf("\n%s", C(BOLD));
67  printf("═══════════════════════════════════════════════════════════════════════════════\n");
68  printf(" %s\n", title);
69  printf("═══════════════════════════════════════════════════════════════════════════════%s\n",
70  C(RESET));
71 }
72 
73 static void print_section(const char *title) {
74  printf("\n%s %s%s\n", C(CYAN), title, C(RESET));
75  printf(" ────────────────────────────────────────────────────────────────────────────\n");
76 }
77 
78 static void print_tree_item(int level, int is_last, const char *fmt, ...) {
79  for (int i = 0; i < level; i++) {
80  printf(" │ ");
81  }
82  printf(" %s── ", is_last ? "└" : "├");
83 
84  va_list args;
85  va_start(args, fmt);
86  vprintf(fmt, args);
87  va_end(args);
88  printf("\n");
89 }
90 
91 static void print_warning(const char *msg) {
92  printf(" %s⚠️ %s%s\n", C(YELLOW), msg, C(RESET));
93 }
94 
95 static void print_ok(const char *msg) {
96  printf(" %s✓ %s%s\n", C(GREEN), msg, C(RESET));
97 }
98 
99 // ═══════════════════════════════════════════════════════════════════════════════
100 // Print Functions
101 // ═══════════════════════════════════════════════════════════════════════════════
102 
103 void topology_print_cpu(const CPUInfo *cpu) {
104  print_section("CPU");
105 
106  printf(" %s%s%s\n", C(BOLD), cpu->model_name, C(RESET));
107 
108  // Build SIMD string with detailed AVX-512 sub-features
109  char simd_buf[256] = "";
110  if (cpu->has_avx512f) {
111  strcat(simd_buf, "AVX-512");
112  // Add AVX-512 sub-features in parentheses
113  char sub_features[64] = "";
114  if (cpu->has_avx512_bf16) strcat(sub_features, "BF16, ");
115  if (cpu->has_avx512bw) strcat(sub_features, "BW, ");
116  if (cpu->has_avx512vl) strcat(sub_features, "VL, ");
117  if (sub_features[0]) {
118  // Remove trailing ", "
119  sub_features[strlen(sub_features) - 2] = '\0';
120  strcat(simd_buf, " (");
121  strcat(simd_buf, sub_features);
122  strcat(simd_buf, ")");
123  }
124  strcat(simd_buf, " ");
125  } else if (cpu->has_avx2) {
126  strcat(simd_buf, "AVX2 ");
127  } else if (cpu->has_avx) {
128  strcat(simd_buf, "AVX ");
129  } else if (cpu->has_sse4_2) {
130  strcat(simd_buf, "SSE4.2 ");
131  }
132 
133  if (cpu->has_vnni) strcat(simd_buf, "VNNI ");
134 
135  // AMX details
136  if (cpu->has_amx) {
137  strcat(simd_buf, "AMX");
138  char amx_features[32] = "";
139  if (cpu->has_amx_bf16) strcat(amx_features, "BF16,");
140  if (cpu->has_amx_int8) strcat(amx_features, "INT8,");
141  if (amx_features[0]) {
142  amx_features[strlen(amx_features) - 1] = '\0'; // Remove trailing comma
143  strcat(simd_buf, "(");
144  strcat(simd_buf, amx_features);
145  strcat(simd_buf, ") ");
146  } else {
147  strcat(simd_buf, " ");
148  }
149  }
150 
151  print_tree_item(0, 0, "Sockets: %d", cpu->sockets);
152  print_tree_item(0, 0, "Cores: %d physical, %d logical %s",
153  cpu->physical_cores, cpu->logical_cores,
154  cpu->threads_per_core > 1 ? "(HT/SMT enabled)" : "");
155  print_tree_item(0, 0, "Frequency: %.0f MHz (max: %.0f MHz)",
156  cpu->base_freq_mhz, cpu->max_freq_mhz);
157  print_tree_item(0, 0, "SIMD: %s%s%s",
158  C(cpu->has_avx512f ? GREEN : (cpu->has_avx2 ? GREEN : YELLOW)),
159  simd_buf[0] ? simd_buf : "Basic",
160  C(RESET));
161  print_tree_item(0, 1, "PCIe: Gen %d, ~%d lanes from CPU",
162  cpu->pcie_generation, cpu->pcie_lanes_total);
163 
164  if (!cpu->has_avx2) {
165  print_warning("No AVX2 support - kernel performance will be limited");
166  }
167 }
168 
169 void topology_print_cache(const CacheTopology *cache, int logical_cores) {
170  print_section("CACHE HIERARCHY");
171 
172  // Show data source
173  printf(" %sSource: /sys/devices/system/cpu/cpu0/cache/%s\n\n",
174  C(DIM), C(RESET));
175 
176  for (int i = 0; i < cache->num_levels; i++) {
177  const CacheInfo *c = &cache->levels[i];
178  int is_last = (i == cache->num_levels - 1);
179 
180  // Calculate number of instances (how many of this cache exist)
181  int instances = 1;
182  if (c->shared_by_cores > 0 && logical_cores > 0) {
183  instances = logical_cores / c->shared_by_cores;
184  if (instances < 1) instances = 1;
185  }
186 
187  // Calculate total size across all instances
188  int total_kb = c->size_kb * instances;
189 
190  // Format size nicely (KB or MB)
191  char size_str[32];
192  if (total_kb >= 1024) {
193  snprintf(size_str, sizeof(size_str), "%d MiB", total_kb / 1024);
194  } else {
195  snprintf(size_str, sizeof(size_str), "%d KiB", total_kb);
196  }
197 
198  // Format instance info like lscpu
199  char instance_str[32] = "";
200  if (instances > 1) {
201  snprintf(instance_str, sizeof(instance_str), " (%d instances)", instances);
202  } else {
203  snprintf(instance_str, sizeof(instance_str), " (%d instance)", instances);
204  }
205 
206  print_tree_item(0, is_last, "L%d%c: %s%s",
207  c->level,
208  c->type[0] == 'D' ? 'd' : (c->type[0] == 'I' ? 'i' : ' '),
209  size_str, instance_str);
210  }
211 }
212 
213 void topology_print_numa(const NUMATopology *numa, int sockets) {
214  print_section("NUMA TOPOLOGY");
215 
216  // Show source
217  printf(" %sSource: /sys/devices/system/node/%s\n", C(DIM), C(RESET));
218 
219  // Single NUMA node - UMA system
220  if (numa->num_nodes <= 1) {
221  printf("\n %s✓ Single NUMA node (Uniform Memory Access)%s\n", C(GREEN), C(RESET));
222  printf(" %s All memory is local - no NUMA penalties%s\n", C(DIM), C(RESET));
223  printf("\n %sNote: Sub-NUMA Clustering (SNC) / NUMA-Per-Socket (NPS) not detected.%s\n",
224  C(DIM), C(RESET));
225  printf(" %s On Xeon/EPYC, check BIOS settings or run: numactl --hardware%s\n",
226  C(DIM), C(RESET));
227  return;
228  }
229 
230  // Detect potential Sub-NUMA Clustering (SNC) or NUMA-Per-Socket (NPS)
231  // If num_nodes > sockets, SNC/NPS is likely enabled
232  // SNC divides each socket's memory channels into groups, one per sub-NUMA node
233  if (sockets > 0 && numa->num_nodes > sockets) {
234  int nodes_per_socket = numa->num_nodes / sockets;
235  printf("\n %s⚠ Sub-NUMA detected: %d NUMA nodes on %d socket(s) = SNC%d or NPS%d%s\n",
236  C(YELLOW), numa->num_nodes, sockets, nodes_per_socket, nodes_per_socket, C(RESET));
237  printf(" %s Intel: Sub-NUMA Clustering (SNC) | AMD: NUMA-Per-Socket (NPS)%s\n",
238  C(DIM), C(RESET));
239  printf(" %s Each sub-node has its own memory channels for lower latency%s\n",
240  C(DIM), C(RESET));
241  } else if (sockets > 1) {
242  // Multi-socket without SNC
243  printf("\n %sMulti-socket system: %d sockets, %d NUMA nodes%s\n",
244  C(CYAN), sockets, numa->num_nodes, C(RESET));
245  printf(" %s SNC/NPS not enabled - each socket is one NUMA node%s\n",
246  C(DIM), C(RESET));
247  printf(" %s 💡 Enable SNC in BIOS to partition channels for lower latency%s\n",
248  C(DIM), C(RESET));
249  }
250 
251  printf("\n");
252  char size_buf[32];
253 
254  for (int i = 0; i < numa->num_nodes; i++) {
255  const NUMANode *n = &numa->nodes[i];
256  int is_last = (i == numa->num_nodes - 1);
257 
258  format_size(n->memory_total_mb, size_buf, sizeof(size_buf));
259  print_tree_item(0, is_last, "Node %d: %s, CPUs %d-%d",
260  n->node_id, size_buf,
261  n->cpu_list[0],
262  n->cpu_list[n->num_cpus - 1]);
263  }
264 
265  // Print distance matrix if more than 2 nodes
266  if (numa->num_nodes >= 2 && numa->distances[0][1] > 0) {
267  printf("\n NUMA Distances (10=local, higher=remote):\n");
268  printf(" ");
269  for (int i = 0; i < numa->num_nodes; i++) {
270  printf(" N%d ", i);
271  }
272  printf("\n");
273  for (int i = 0; i < numa->num_nodes; i++) {
274  printf(" N%d", i);
275  for (int j = 0; j < numa->num_nodes; j++) {
276  int dist = numa->distances[i][j];
277  if (dist == 10) {
278  printf(" %s%2d%s ", C(GREEN), dist, C(RESET));
279  } else {
280  printf(" %s%2d%s ", C(YELLOW), dist, C(RESET));
281  }
282  }
283  printf("\n");
284  }
285  }
286 
287  // Tip for accurate per-node bandwidth
288  printf("\n %s💡 Per-node bandwidth: numactl --cpunodebind=0 --membind=0 ./build/show_config%s\n",
289  C(CYAN), C(RESET));
290 }
291 
293  print_section("MEMORY");
294 
295  // Show data sources
296  printf(" %sSource: /proc/meminfo, dmidecode (if root), STREAM benchmark%s\n\n",
297  C(DIM), C(RESET));
298 
299  char total_buf[32], avail_buf[32], theo_bw_buf[32], meas_bw_buf[32];
300  format_size(mem->total_mb, total_buf, sizeof(total_buf));
301  format_size(mem->available_mb, avail_buf, sizeof(avail_buf));
302  format_bandwidth(mem->theoretical_bandwidth_gbs, theo_bw_buf, sizeof(theo_bw_buf));
303  format_bandwidth(mem->measured_bandwidth_gbs, meas_bw_buf, sizeof(meas_bw_buf));
304 
305  printf(" %sTotal: %s%s (%s available)\n",
306  C(BOLD), total_buf, C(RESET), avail_buf);
307 
308  if (mem->memory_type[0]) {
309  print_tree_item(0, 0, "Type: %s @ %d MT/s", mem->memory_type, mem->memory_speed_mhz);
310  }
311 
312  print_tree_item(0, 0, "Configuration: %s", mem->channel_config);
313 
314  if (mem->num_slots > 0) {
315  print_tree_item(0, 0, "Slots: %d/%d populated",
316  mem->slots_populated, mem->num_slots);
317  }
318 
319  // Show bandwidth measurements with explanation
320  printf("\n %sBandwidth Analysis:%s\n", C(CYAN), C(RESET));
321 
322  // Theoretical bandwidth calculation
323  if (mem->memory_speed_mhz > 0 && mem->channels_populated > 0) {
324  printf(" %s├── Theoretical: %d MT/s × 8 bytes × %d channel(s) = %s%s\n",
326  theo_bw_buf, C(RESET));
327 
328  // Show SNC relationship for multi-channel configs
329  if (mem->channels_populated >= 4) {
330  printf(" %s│ └── SNC potential: %d ch ÷ 2 = SNC2 (%d ch/node), ÷ 4 = SNC4 (%d ch/node)%s\n",
331  C(DIM), mem->channels_populated,
332  mem->channels_populated / 2,
333  mem->channels_populated / 4,
334  C(RESET));
335  } else if (mem->channels_populated >= 2) {
336  printf(" %s│ └── SNC potential: %d ch ÷ 2 = SNC2 (%d ch/node)%s\n",
337  C(DIM), mem->channels_populated,
338  mem->channels_populated / 2,
339  C(RESET));
340  }
341  } else {
342  printf(" %s├── Theoretical: %s (estimated)%s\n",
343  C(DIM), theo_bw_buf, C(RESET));
344  }
345 
346  // Measured bandwidth with methodology
347  if (mem->measured_bandwidth_gbs > 0) {
348  float efficiency = (mem->theoretical_bandwidth_gbs > 0) ?
349  (mem->measured_bandwidth_gbs / mem->theoretical_bandwidth_gbs * 100.0f) : 0;
350 
351  printf(" %s├── Measured: %s%s%s (%.0f%% efficiency)%s\n",
352  C(DIM),
353  C(mem->measured_bandwidth_gbs > 40 ? GREEN : YELLOW),
354  meas_bw_buf, C(RESET), efficiency, C(RESET));
355  printf(" %s│ Method: STREAM Triad (c[i] = a[i] + s*b[i])%s\n",
356  C(DIM), C(RESET));
357  printf(" %s│ Buffer: 256 MB × 3 arrays, 3 iterations%s\n",
358  C(DIM), C(RESET));
359  printf(" %s│ NUMA node: %d (memory allocated on this node)%s\n",
360  C(DIM), mem->bw_test_numa_node, C(RESET));
361  printf(" %s│ Threads: %d (OMP_NUM_THREADS)%s\n",
362  C(DIM), mem->bw_test_num_threads, C(RESET));
363  printf(" %s└── Formula: (256MB × 3 × 3) / time = GB/s%s\n",
364  C(DIM), C(RESET));
365  }
366 
367  // Show DIMM details if available
368  if (mem->num_slots > 0 && mem->slots[0].locator[0]) {
369  printf("\n DIMM Layout:\n");
370  for (int i = 0; i < mem->num_slots; i++) {
371  const MemorySlot *s = &mem->slots[i];
372  if (s->populated) {
373  char dimm_size[32];
374  format_size(s->size_mb, dimm_size, sizeof(dimm_size));
375  printf(" %s[%s]%s %s: %s%s @ %d MT/s%s\n",
376  C(GREEN), s->locator, C(RESET),
377  s->type, C(BOLD), dimm_size, s->speed_mhz, C(RESET));
378  } else {
379  printf(" %s[%s]%s EMPTY\n",
380  C(DIM), s->locator, C(RESET));
381  }
382  }
383  }
384 
385  if (mem->channels_populated == 1 && mem->num_slots > 1) {
386  print_warning("Single-channel mode - bandwidth reduced by ~50%%");
387  }
388 
389  if (mem->num_slots > 0 && mem->slots_populated < mem->num_slots) {
390  printf(" %s💡 Tip:%s Add %d more DIMM(s) for better bandwidth\n",
391  C(CYAN), C(RESET), mem->num_slots - mem->slots_populated);
392  }
393 }
394 
396  print_section("PCIe DEVICES");
397 
398  int gpu_count = 0, nic_count = 0, nvme_count = 0;
399  char bw_buf[32];
400 
401  for (int i = 0; i < pcie->num_devices; i++) {
402  const PCIeDevice *d = &pcie->devices[i];
403 
404  // Skip bridges and other infrastructure devices
405  if (d->link_width == 0) continue;
406  if (strstr(d->device_name, "bridge") ||
407  strstr(d->device_name, "Bridge") ||
408  strstr(d->device_name, "Host") ||
409  strstr(d->device_name, "PCI")) continue;
410 
411  const char *type_icon = " ";
412  const char *type_color = "";
413  if (d->is_gpu) {
414  type_icon = "🎮 ";
415  type_color = GREEN;
416  gpu_count++;
417  } else if (d->is_nic) {
418  type_icon = "🌐 ";
419  type_color = CYAN;
420  nic_count++;
421  } else if (d->is_nvme) {
422  type_icon = "💾 ";
423  type_color = MAGENTA;
424  nvme_count++;
425  }
426 
427  format_bandwidth(d->bandwidth_gbs, bw_buf, sizeof(bw_buf));
428 
429  // Truncate long device names
430  char name[48];
431  strncpy(name, d->device_name, sizeof(name) - 1);
432  name[sizeof(name) - 1] = '\0';
433  if (strlen(d->device_name) > 45) {
434  strcpy(name + 42, "...");
435  }
436 
437  printf(" %s%s%s%-45s%s x%d Gen%d %s%s%s",
438  type_icon, C(type_color), C(BOLD), name, C(RESET),
439  d->link_width, d->link_speed,
440  C(DIM), bw_buf, C(RESET));
441 
442  // Show if not running at max capability
443  if (d->link_width < d->link_width_max || d->link_speed < d->link_speed_max) {
444  printf(" %s(capable: x%d Gen%d)%s",
446  }
447  printf("\n");
448  }
449 
450  if (gpu_count == 0 && nic_count == 0 && nvme_count == 0) {
451  printf(" %sNo significant PCIe devices detected%s\n", C(DIM), C(RESET));
452  }
453 
454  printf("\n Summary: %d GPU(s), %d NIC(s), %d NVMe(s)\n",
455  gpu_count, nic_count, nvme_count);
456 }
457 
459  print_section("NETWORK INTERFACES");
460 
461  if (net->num_interfaces == 0) {
462  printf(" %sNo network interfaces detected%s\n", C(DIM), C(RESET));
463  return;
464  }
465 
466  char bw_buf[32];
467 
468  for (int i = 0; i < net->num_interfaces; i++) {
469  const NetworkInterface *n = &net->interfaces[i];
470 
471  const char *status_icon = n->is_up && n->has_link ? "✓" : "✗";
472  const char *status_color = n->is_up && n->has_link ? GREEN : RED;
473 
474  float bw_gbs = n->speed_mbps / 8000.0f;
475  format_bandwidth(bw_gbs, bw_buf, sizeof(bw_buf));
476 
477  printf(" %s%s%s %s%-10s%s ",
478  C(status_color), status_icon, C(RESET),
479  C(BOLD), n->name, C(RESET));
480 
481  if (n->has_link) {
482  // Color code speed
483  const char *speed_color = "";
484  if (n->speed_mbps >= 100000) speed_color = GREEN; // 100 GbE+
485  else if (n->speed_mbps >= 10000) speed_color = GREEN; // 10 GbE
486  else if (n->speed_mbps >= 1000) speed_color = YELLOW; // 1 GbE
487  else speed_color = RED; // < 1 GbE
488 
489  printf("%s%6lu Mbps%s (%s) ",
490  C(speed_color), (unsigned long)n->speed_mbps, C(RESET), bw_buf);
491  } else {
492  printf("%sno link %s ", C(RED), C(RESET));
493  }
494 
495  if (n->driver[0]) {
496  printf("%s[%s]%s ", C(DIM), n->driver, C(RESET));
497  }
498 
499  if (n->supports_rdma) {
500  printf("%s[RDMA]%s ", C(GREEN), C(RESET));
501  }
502  if (n->is_infiniband) {
503  printf("%s[IB]%s ", C(MAGENTA), C(RESET));
504  }
505 
506  printf("\n");
507  }
508 
509  // Network capability summary
510  printf("\n For Distributed Training:\n");
511 
512  if (net->max_bandwidth_gbs >= 12.5f) {
513  print_ok("100 GbE+ available - excellent for distributed training");
514  } else if (net->max_bandwidth_gbs >= 1.25f) {
515  printf(" %s✓%s 10 GbE available - good for small clusters\n",
516  C(GREEN), C(RESET));
517  } else if (net->max_bandwidth_gbs >= 0.125f) {
518  print_warning("Only 1 GbE - significant bottleneck for distributed training");
519  } else {
520  print_warning("Very slow network - distributed training not recommended");
521  }
522 
523  if (net->has_rdma) {
524  print_ok("RDMA capable NIC detected - low-latency gradient sync possible");
525  }
526 }
527 
529  print_section("THREAD AFFINITY (OpenMP)");
530 
531  print_tree_item(0, 0, "OMP_NUM_THREADS: %d", aff->omp_num_threads);
532  print_tree_item(0, 0, "OMP_PROC_BIND: %s%s%s",
533  aff->affinity_set ? C(GREEN) : C(YELLOW),
534  aff->omp_proc_bind, C(RESET));
535  print_tree_item(0, 1, "OMP_PLACES: %s", aff->omp_places);
536 
537  if (!aff->affinity_set) {
538  printf("\n");
539  print_warning("Thread affinity not configured");
540  printf(" %s💡 Recommendation:%s export OMP_PROC_BIND=close OMP_PLACES=cores\n",
541  C(CYAN), C(RESET));
542  }
543 }
544 
546  if (recs->num_recommendations == 0) {
547  print_section("RECOMMENDATIONS");
548  print_ok("No significant issues detected!");
549  return;
550  }
551 
552  print_section("RECOMMENDATIONS");
553 
554  for (int i = 0; i < recs->num_recommendations; i++) {
555  const Recommendation *r = &recs->recommendations[i];
556 
557  const char *priority_icon = "";
558  const char *priority_color = "";
559  switch (r->priority) {
561  priority_icon = "🔴";
562  priority_color = RED;
563  break;
564  case REC_PRIORITY_HIGH:
565  priority_icon = "🟠";
566  priority_color = RED;
567  break;
568  case REC_PRIORITY_MEDIUM:
569  priority_icon = "🟡";
570  priority_color = YELLOW;
571  break;
572  case REC_PRIORITY_LOW:
573  priority_icon = "🟢";
574  priority_color = GREEN;
575  break;
576  }
577 
578  printf("\n %s %s%s%s\n", priority_icon, C(priority_color), r->title, C(RESET));
579  printf(" %s\n", r->description);
580  printf(" %s→ %s%s\n", C(CYAN), r->action, C(RESET));
581  }
582 }
583 
585  print_section("DISTRIBUTED TRAINING POTENTIAL");
586 
587  char mem_bw_buf[32], net_bw_buf[32];
588  format_bandwidth(topo->memory.theoretical_bandwidth_gbs, mem_bw_buf, sizeof(mem_bw_buf));
589  format_bandwidth(topo->network.max_bandwidth_gbs, net_bw_buf, sizeof(net_bw_buf));
590 
591  printf(" Single Node Capacity:\n");
592  print_tree_item(0, 0, "Compute: %d cores @ %s",
593  topo->cpu.physical_cores,
594  topo->cpu.has_avx512f ? "AVX-512" :
595  (topo->cpu.has_avx2 ? "AVX2" : "AVX"));
596  print_tree_item(0, 0, "Memory: %lu GB @ %s",
597  (unsigned long)(topo->memory.total_mb / 1024), mem_bw_buf);
598  print_tree_item(0, 1, "Network: %s", net_bw_buf);
599 
600  // Estimate sync times for various model sizes
601  printf("\n Estimated Gradient Sync Time (single allreduce):\n");
602 
603  uint64_t model_sizes[] = {100, 500, 1000, 7000}; // MB
604  const char *model_names[] = {"100 MB (BERT-base)", "500 MB (GPT-2)",
605  "1 GB (ResNet-50 batch)", "7 GB (LLaMA-7B)"};
606 
607  for (int i = 0; i < 4; i++) {
608  float sync_time = topology_estimate_network_training_time(
609  &topo->network, model_sizes[i]);
610 
611  const char *time_color = "";
612  if (sync_time < 0.1f) time_color = GREEN;
613  else if (sync_time < 1.0f) time_color = YELLOW;
614  else time_color = RED;
615 
616  printf(" %-25s %s%8.2f sec%s\n",
617  model_names[i], C(time_color), sync_time, C(RESET));
618  }
619 
620  // Multi-node projection
621  printf("\n Multi-Node Projection (assuming identical nodes):\n");
622  int nodes[] = {2, 4, 8, 16};
623  for (int i = 0; i < 4; i++) {
624  int n = nodes[i];
625  uint64_t total_mem = topo->memory.total_mb * n;
626  int total_cores = topo->cpu.physical_cores * n;
627 
628  char total_mem_buf[32];
629  format_size(total_mem, total_mem_buf, sizeof(total_mem_buf));
630 
631  printf(" %2d nodes: %4d cores, %s memory\n",
632  n, total_cores, total_mem_buf);
633  }
634 
635  // Ring-allreduce topology diagram
636  printf("\n Ring-AllReduce Topology (4 nodes):\n");
637  printf(" %s┌─────────┐ ┌─────────┐%s\n", C(CYAN), C(RESET));
638  printf(" %s│ Node 0 │────→│ Node 1 │%s\n", C(CYAN), C(RESET));
639  printf(" %s│ Worker │ │ Worker │%s\n", C(CYAN), C(RESET));
640  printf(" %s└────↑────┘ └────│────┘%s\n", C(CYAN), C(RESET));
641  printf(" %s │ │ %s\n", C(CYAN), C(RESET));
642  printf(" %s │ ↓ %s\n", C(CYAN), C(RESET));
643  printf(" %s┌────│────┐ ┌────↓────┐%s\n", C(CYAN), C(RESET));
644  printf(" %s│ Node 3 │←────│ Node 2 │%s\n", C(CYAN), C(RESET));
645  printf(" %s│ Worker │ │ Worker │%s\n", C(CYAN), C(RESET));
646  printf(" %s└─────────┘ └─────────┘%s\n", C(CYAN), C(RESET));
647 }
648 
650  print_header("C-Kernel-Engine System Configuration");
651 
652  printf(" %sHostname:%s %s\n", C(DIM), C(RESET), topo->hostname);
653  printf(" %sKernel:%s %s\n", C(DIM), C(RESET), topo->kernel_version);
654  if (!topo->has_root_access) {
655  printf(" %sNote:%s Running without root - some info may be unavailable\n",
656  C(YELLOW), C(RESET));
657  }
658 
659  topology_print_cpu(&topo->cpu);
661  topology_print_numa(&topo->numa, topo->cpu.sockets);
663  topology_print_pcie(&topo->pcie);
666 
667  RecommendationList recs;
670 
672 
673  printf("\n");
674 }
675 
676 // ═══════════════════════════════════════════════════════════════════════════════
677 // Main
678 // ═══════════════════════════════════════════════════════════════════════════════
679 
680 int main(int argc, char *argv[]) {
681  // Check for --no-color flag
682  for (int i = 1; i < argc; i++) {
683  if (strcmp(argv[i], "--no-color") == 0) {
684  use_colors = 0;
685  }
686  if (strcmp(argv[i], "--help") == 0 || strcmp(argv[i], "-h") == 0) {
687  printf("Usage: %s [OPTIONS]\n", argv[0]);
688  printf("\nDisplay system hardware configuration for C-Kernel-Engine\n");
689  printf("\nOptions:\n");
690  printf(" --no-color Disable colored output\n");
691  printf(" --help, -h Show this help message\n");
692  return 0;
693  }
694  }
695 
696  // Check if stdout is a terminal
697  if (!isatty(1)) {
698  use_colors = 0;
699  }
700 
701  SystemTopology topo;
702  if (topology_discover(&topo) < 0) {
703  fprintf(stderr, "Error: Failed to discover system topology\n");
704  return 1;
705  }
706 
707  topology_print_summary(&topo);
708 
709  return 0;
710 }
int main(int argc, char *argv[])
Definition: show_config.c:680
static void print_ok(const char *msg)
Definition: show_config.c:95
#define BOLD
Definition: show_config.c:21
void topology_print_memory(const MemoryInfo *mem)
Definition: show_config.c:292
static const char * format_bandwidth(float bw_gbs, char *buf, size_t buf_size)
Definition: show_config.c:56
#define C(color)
Definition: show_config.c:39
void topology_print_network(const NetworkTopology *net)
Definition: show_config.c:458
void topology_print_pcie(const PCIeTopology *pcie)
Definition: show_config.c:395
void topology_print_distributed_potential(const SystemTopology *topo)
Definition: show_config.c:584
static void print_tree_item(int level, int is_last, const char *fmt,...)
Definition: show_config.c:78
void topology_print_numa(const NUMATopology *numa, int sockets)
Definition: show_config.c:213
static void print_section(const char *title)
Definition: show_config.c:73
void topology_print_cpu(const CPUInfo *cpu)
Definition: show_config.c:103
#define MAGENTA
Definition: show_config.c:28
static int use_colors
Definition: show_config.c:37
#define RED
Definition: show_config.c:24
static void print_warning(const char *msg)
Definition: show_config.c:91
void topology_print_affinity(const AffinityInfo *aff)
Definition: show_config.c:528
#define RESET
Definition: show_config.c:20
#define YELLOW
Definition: show_config.c:26
#define DIM
Definition: show_config.c:22
#define GREEN
Definition: show_config.c:25
#define CYAN
Definition: show_config.c:29
static const char * format_size(uint64_t size_mb, char *buf, size_t buf_size)
Definition: show_config.c:45
void topology_print_recommendations(const RecommendationList *recs)
Definition: show_config.c:545
void topology_print_summary(const SystemTopology *topo)
Definition: show_config.c:649
static void print_header(const char *title)
Definition: show_config.c:65
void topology_print_cache(const CacheTopology *cache, int logical_cores)
Definition: show_config.c:169
char omp_places[64]
char omp_proc_bind[32]
int logical_cores
bool has_avx512bw
char model_name[256]
int pcie_generation
bool has_vnni
bool has_avx512_bf16
float max_freq_mhz
int has_avx2
Definition: cpu_features.h:27
bool has_amx_int8
int has_avx
Definition: cpu_features.h:26
int physical_cores
int has_avx512f
Definition: cpu_features.h:28
bool has_amx_bf16
int pcie_lanes_total
int threads_per_core
bool has_amx
bool has_avx512vl
float base_freq_mhz
bool has_sse4_2
char type[16]
int shared_by_cores
CacheInfo levels[4]
float measured_bandwidth_gbs
int channels_populated
uint64_t total_mb
float theoretical_bandwidth_gbs
uint64_t available_mb
char memory_type[32]
char channel_config[64]
int bw_test_num_threads
MemorySlot slots[16]
char type[32]
uint64_t size_mb
char locator[64]
int cpu_list[256]
uint64_t memory_total_mb
int distances[8][8]
NUMANode nodes[8]
NetworkInterface interfaces[8]
char device_name[256]
float bandwidth_gbs
PCIeDevice devices[32]
Recommendation recommendations[32]
RecommendationPriority priority
char description[512]
CacheTopology cache
NUMATopology numa
char hostname[256]
PCIeTopology pcie
NetworkTopology network
MemoryInfo memory
char kernel_version[128]
AffinityInfo affinity
int topology_discover(SystemTopology *topo)
@ REC_PRIORITY_MEDIUM
@ REC_PRIORITY_CRITICAL
@ REC_PRIORITY_HIGH
@ REC_PRIORITY_LOW
float topology_estimate_network_training_time(const NetworkTopology *net, uint64_t model_size_mb)
int topology_generate_recommendations(const SystemTopology *topo, RecommendationList *recs)