← Back to C-Kernel-Engine Docs Doxygen Source Documentation
system_topology.h File Reference
#include <stdint.h>
#include <stdbool.h>

Go to the source code of this file.

Data Structures

struct  AffinityInfo
 
struct  CacheInfo
 
struct  CacheTopology
 
struct  CPUInfo
 
struct  MemoryInfo
 
struct  MemorySlot
 
struct  NetworkInterface
 
struct  NetworkTopology
 
struct  NUMANode
 
struct  NUMATopology
 
struct  PCIeDevice
 
struct  PCIeTopology
 
struct  Recommendation
 
struct  RecommendationList
 
struct  SystemTopology
 

Macros

#define MAX_CACHE_LEVELS   4
 
#define MAX_CPUS   256
 
#define MAX_MEMORY_SLOTS   16
 
#define MAX_NICS   8
 
#define MAX_NUMA_NODES   8
 
#define MAX_PCIE_DEVICES   32
 
#define MAX_RECOMMENDATIONS   32
 
#define MAX_STR_LEN   256
 

Enumerations

enum  RecommendationCategory {
  REC_CATEGORY_MEMORY , REC_CATEGORY_CPU , REC_CATEGORY_NETWORK , REC_CATEGORY_AFFINITY ,
  REC_CATEGORY_PCIE
}
 
enum  RecommendationPriority { REC_PRIORITY_LOW , REC_PRIORITY_MEDIUM , REC_PRIORITY_HIGH , REC_PRIORITY_CRITICAL }
 

Functions

int topology_discover (SystemTopology *topo)
 
int topology_discover_affinity (AffinityInfo *aff)
 
int topology_discover_cache (CacheTopology *cache)
 
int topology_discover_cpu (CPUInfo *cpu)
 
int topology_discover_memory (MemoryInfo *mem)
 
int topology_discover_network (NetworkTopology *net)
 
int topology_discover_numa (NUMATopology *numa)
 
int topology_discover_pcie (PCIeTopology *pcie)
 
float topology_estimate_memory_bandwidth (const MemoryInfo *mem)
 
float topology_estimate_network_training_time (const NetworkTopology *net, uint64_t model_size_mb)
 
int topology_generate_recommendations (const SystemTopology *topo, RecommendationList *recs)
 
void topology_print_affinity (const AffinityInfo *aff)
 
void topology_print_cache (const CacheTopology *cache, int logical_cores)
 
void topology_print_cpu (const CPUInfo *cpu)
 
void topology_print_distributed_potential (const SystemTopology *topo)
 
void topology_print_memory (const MemoryInfo *mem)
 
void topology_print_network (const NetworkTopology *net)
 
void topology_print_numa (const NUMATopology *numa, int sockets)
 
void topology_print_pcie (const PCIeTopology *pcie)
 
void topology_print_recommendations (const RecommendationList *recs)
 
void topology_print_summary (const SystemTopology *topo)
 

Macro Definition Documentation

◆ MAX_CACHE_LEVELS

#define MAX_CACHE_LEVELS   4

Definition at line 24 of file system_topology.h.

◆ MAX_CPUS

#define MAX_CPUS   256

Definition at line 23 of file system_topology.h.

◆ MAX_MEMORY_SLOTS

#define MAX_MEMORY_SLOTS   16

Definition at line 27 of file system_topology.h.

◆ MAX_NICS

#define MAX_NICS   8

Definition at line 25 of file system_topology.h.

◆ MAX_NUMA_NODES

#define MAX_NUMA_NODES   8

Definition at line 22 of file system_topology.h.

◆ MAX_PCIE_DEVICES

#define MAX_PCIE_DEVICES   32

Definition at line 26 of file system_topology.h.

◆ MAX_RECOMMENDATIONS

#define MAX_RECOMMENDATIONS   32

Definition at line 280 of file system_topology.h.

◆ MAX_STR_LEN

#define MAX_STR_LEN   256

Definition at line 28 of file system_topology.h.

Enumeration Type Documentation

◆ RecommendationCategory

Enumerator
REC_CATEGORY_MEMORY 
REC_CATEGORY_CPU 
REC_CATEGORY_NETWORK 
REC_CATEGORY_AFFINITY 
REC_CATEGORY_PCIE 

Definition at line 272 of file system_topology.h.

272  {
RecommendationCategory
@ REC_CATEGORY_AFFINITY
@ REC_CATEGORY_CPU
@ REC_CATEGORY_MEMORY
@ REC_CATEGORY_NETWORK
@ REC_CATEGORY_PCIE

◆ RecommendationPriority

Enumerator
REC_PRIORITY_LOW 
REC_PRIORITY_MEDIUM 
REC_PRIORITY_HIGH 
REC_PRIORITY_CRITICAL 

Definition at line 265 of file system_topology.h.

265  {
RecommendationPriority
@ REC_PRIORITY_MEDIUM
@ REC_PRIORITY_CRITICAL
@ REC_PRIORITY_HIGH
@ REC_PRIORITY_LOW

Function Documentation

◆ topology_discover()

int topology_discover ( SystemTopology topo)

Definition at line 979 of file system_topology.c.

979  {
980  memset(topo, 0, sizeof(*topo));
981 
982  // Get hostname and kernel version
983  gethostname(topo->hostname, sizeof(topo->hostname));
984 
985  struct utsname uts;
986  if (uname(&uts) == 0) {
987  snprintf(topo->kernel_version, sizeof(topo->kernel_version),
988  "%s %s", uts.sysname, uts.release);
989  }
990 
991  // Check for root access
992  topo->has_root_access = (geteuid() == 0);
993 
994  // Run all discovery functions
995  topology_discover_cpu(&topo->cpu);
1002 
1003  return 0;
1004 }
CacheTopology cache
NUMATopology numa
char hostname[256]
PCIeTopology pcie
NetworkTopology network
MemoryInfo memory
char kernel_version[128]
AffinityInfo affinity
int topology_discover_memory(MemoryInfo *mem)
int topology_discover_pcie(PCIeTopology *pcie)
int topology_discover_cpu(CPUInfo *cpu)
int topology_discover_cache(CacheTopology *cache)
int topology_discover_network(NetworkTopology *net)
int topology_discover_affinity(AffinityInfo *aff)
int topology_discover_numa(NUMATopology *numa)

References SystemTopology::affinity, SystemTopology::cache, SystemTopology::cpu, SystemTopology::has_root_access, SystemTopology::hostname, SystemTopology::kernel_version, SystemTopology::memory, SystemTopology::network, SystemTopology::numa, SystemTopology::pcie, topology_discover_affinity(), topology_discover_cache(), topology_discover_cpu(), topology_discover_memory(), topology_discover_network(), topology_discover_numa(), and topology_discover_pcie().

Referenced by main().

◆ topology_discover_affinity()

int topology_discover_affinity ( AffinityInfo aff)

Definition at line 936 of file system_topology.c.

936  {
937  memset(aff, 0, sizeof(*aff));
938 
939  // OpenMP settings
940  const char *omp_threads = getenv("OMP_NUM_THREADS");
941  if (omp_threads) {
942  aff->omp_num_threads = atoi(omp_threads);
943  } else {
944  aff->omp_num_threads = sysconf(_SC_NPROCESSORS_ONLN);
945  }
946 
947  const char *omp_bind = getenv("OMP_PROC_BIND");
948  if (omp_bind) {
949  strncpy(aff->omp_proc_bind, omp_bind, sizeof(aff->omp_proc_bind) - 1);
950  aff->affinity_set = true;
951  } else {
952  strcpy(aff->omp_proc_bind, "not set");
953  }
954 
955  const char *omp_places = getenv("OMP_PLACES");
956  if (omp_places) {
957  strncpy(aff->omp_places, omp_places, sizeof(aff->omp_places) - 1);
958  } else {
959  strcpy(aff->omp_places, "not set");
960  }
961 
962  // Current process affinity
963  cpu_set_t mask;
964  if (sched_getaffinity(0, sizeof(mask), &mask) == 0) {
965  for (int i = 0; i < MAX_CPUS && aff->num_affinity_cpus < MAX_CPUS; i++) {
966  if (CPU_ISSET(i, &mask)) {
967  aff->affinity_cpus[aff->num_affinity_cpus++] = i;
968  }
969  }
970  }
971 
972  return 0;
973 }
int affinity_cpus[256]
char omp_places[64]
char omp_proc_bind[32]
#define MAX_CPUS
int32_t int32_t int32_t int32_t int32_t mask
Definition: tokenizer.h:233

References AffinityInfo::affinity_cpus, AffinityInfo::affinity_set, mask, MAX_CPUS, AffinityInfo::num_affinity_cpus, AffinityInfo::omp_num_threads, AffinityInfo::omp_places, and AffinityInfo::omp_proc_bind.

Referenced by topology_discover().

◆ topology_discover_cache()

int topology_discover_cache ( CacheTopology cache)

Definition at line 246 of file system_topology.c.

246  {
247  memset(cache, 0, sizeof(*cache));
248 
249  const char *base = "/sys/devices/system/cpu/cpu0/cache";
250  DIR *dir = opendir(base);
251  if (!dir) return -1;
252 
253  struct dirent *entry;
254  while ((entry = readdir(dir)) != NULL) {
255  if (strncmp(entry->d_name, "index", 5) != 0) continue;
256 
257  char path[512];
258  CacheInfo *ci = &cache->levels[cache->num_levels];
259 
260  snprintf(path, sizeof(path), "%s/%s/level", base, entry->d_name);
261  ci->level = read_file_int(path);
262 
263  snprintf(path, sizeof(path), "%s/%s/type", base, entry->d_name);
264  read_file_string(path, ci->type, sizeof(ci->type));
265 
266  snprintf(path, sizeof(path), "%s/%s/size", base, entry->d_name);
267  char size_str[32];
268  if (read_file_string(path, size_str, sizeof(size_str)) == 0) {
269  ci->size_kb = atoi(size_str); // Usually in KB with 'K' suffix
270  }
271 
272  snprintf(path, sizeof(path), "%s/%s/coherency_line_size", base, entry->d_name);
273  ci->line_size_bytes = read_file_int(path);
274 
275  snprintf(path, sizeof(path), "%s/%s/ways_of_associativity", base, entry->d_name);
277 
278  snprintf(path, sizeof(path), "%s/%s/shared_cpu_map", base, entry->d_name);
279  char cpu_map[256];
280  if (read_file_string(path, cpu_map, sizeof(cpu_map)) == 0) {
281  ci->shared_by_cores = count_set_bits(cpu_map);
282  }
283 
284  if (ci->level == 3) {
285  cache->l3_total_kb = ci->size_kb; // Will be multiplied if multiple
286  }
287 
288  cache->num_levels++;
289  if (cache->num_levels >= MAX_CACHE_LEVELS) break;
290  }
291  closedir(dir);
292 
293  // Sort by level (L1 → L2 → L3) and type (Data → Instruction → Unified)
294  qsort(cache->levels, cache->num_levels, sizeof(CacheInfo), cache_compare);
295 
296  return 0;
297 }
char type[16]
int line_size_bytes
int ways_of_associativity
int shared_by_cores
CacheInfo levels[4]
static int cache_compare(const void *a, const void *b)
static int count_set_bits(const char *hex_mask)
static int read_file_int(const char *path)
static int read_file_string(const char *path, char *buf, size_t buf_size)
#define MAX_CACHE_LEVELS

References cache_compare(), count_set_bits(), CacheTopology::l3_total_kb, CacheInfo::level, CacheTopology::levels, CacheInfo::line_size_bytes, MAX_CACHE_LEVELS, CacheTopology::num_levels, read_file_int(), read_file_string(), CacheInfo::shared_by_cores, CacheInfo::size_kb, CacheInfo::type, and CacheInfo::ways_of_associativity.

Referenced by topology_discover().

◆ topology_discover_cpu()

int topology_discover_cpu ( CPUInfo cpu)

Definition at line 112 of file system_topology.c.

112  {
113  memset(cpu, 0, sizeof(*cpu));
114 
115  FILE *f = fopen("/proc/cpuinfo", "r");
116  if (!f) return -1;
117 
118  char line[4096]; // Flags line can be very long on modern CPUs (AVX-512 + AMX)
119  int processor_count = 0;
120  int physical_id_max = -1;
121  int core_id_max = -1;
122 
123  while (fgets(line, sizeof(line), f)) {
124  char *colon = strchr(line, ':');
125  if (!colon) continue;
126 
127  char *key = line;
128  char *value = colon + 1;
129  *colon = '\0';
130  trim_string(key);
131  trim_string(value);
132 
133  if (strcmp(key, "processor") == 0) {
134  processor_count++;
135  } else if (strcmp(key, "model name") == 0 && cpu->model_name[0] == '\0') {
136  strncpy(cpu->model_name, value, sizeof(cpu->model_name) - 1);
137  } else if (strcmp(key, "vendor_id") == 0 && cpu->vendor[0] == '\0') {
138  strncpy(cpu->vendor, value, sizeof(cpu->vendor) - 1);
139  } else if (strcmp(key, "cpu family") == 0 && cpu->family == 0) {
140  cpu->family = atoi(value);
141  } else if (strcmp(key, "model") == 0 && cpu->model == 0) {
142  cpu->model = atoi(value);
143  } else if (strcmp(key, "stepping") == 0 && cpu->stepping == 0) {
144  cpu->stepping = atoi(value);
145  } else if (strcmp(key, "cpu MHz") == 0 && cpu->base_freq_mhz == 0) {
146  cpu->base_freq_mhz = atof(value);
147  } else if (strcmp(key, "physical id") == 0) {
148  int id = atoi(value);
149  if (id > physical_id_max) physical_id_max = id;
150  } else if (strcmp(key, "core id") == 0) {
151  int id = atoi(value);
152  if (id > core_id_max) core_id_max = id;
153  } else if (strcmp(key, "flags") == 0) {
154  // Use word-boundary-aware matching for CPU flags
155  cpu->has_sse4_2 = has_cpu_flag(value, "sse4_2");
156  cpu->has_avx = has_cpu_flag(value, "avx");
157  cpu->has_avx2 = has_cpu_flag(value, "avx2");
158  cpu->has_avx512f = has_cpu_flag(value, "avx512f");
159  cpu->has_avx512bw = has_cpu_flag(value, "avx512bw");
160  cpu->has_avx512vl = has_cpu_flag(value, "avx512vl");
161  cpu->has_avx512_bf16 = has_cpu_flag(value, "avx512_bf16");
162  cpu->has_amx_tile = has_cpu_flag(value, "amx_tile");
163  cpu->has_amx_int8 = has_cpu_flag(value, "amx_int8");
164  cpu->has_amx_bf16 = has_cpu_flag(value, "amx_bf16");
165  cpu->has_amx = cpu->has_amx_tile || cpu->has_amx_int8 || cpu->has_amx_bf16;
166  cpu->has_vnni = has_cpu_flag(value, "avx512_vnni") || has_cpu_flag(value, "avx_vnni");
167  }
168  }
169  fclose(f);
170 
171  cpu->logical_cores = processor_count;
172  cpu->sockets = physical_id_max + 1;
173  if (cpu->sockets < 1) cpu->sockets = 1;
174 
175  // Read from /sys for more accurate core count
176  int cores_per_socket = read_file_int("/sys/devices/system/cpu/cpu0/topology/core_cpus_list");
177  if (cores_per_socket < 0) {
178  // Fallback: estimate from logical cores and sockets
179  cpu->physical_cores = cpu->logical_cores / 2; // Assume HT
180  cpu->cores_per_socket = cpu->physical_cores / cpu->sockets;
181  } else {
182  // Count unique core IDs
183  char path[256];
184  int unique_cores = 0;
185  int seen_cores[MAX_CPUS] = {0};
186 
187  for (int i = 0; i < cpu->logical_cores && i < MAX_CPUS; i++) {
188  snprintf(path, sizeof(path),
189  "/sys/devices/system/cpu/cpu%d/topology/core_id", i);
190  int core_id = read_file_int(path);
191  if (core_id >= 0 && core_id < MAX_CPUS && !seen_cores[core_id]) {
192  seen_cores[core_id] = 1;
193  unique_cores++;
194  }
195  }
196  cpu->physical_cores = unique_cores > 0 ? unique_cores : cpu->logical_cores / 2;
197  cpu->cores_per_socket = cpu->physical_cores / cpu->sockets;
198  }
199 
200  cpu->threads_per_core = cpu->logical_cores / cpu->physical_cores;
201 
202  // Try to get max frequency
203  int max_freq = read_file_int("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq");
204  if (max_freq > 0) {
205  cpu->max_freq_mhz = max_freq / 1000.0f;
206  }
207 
208  // Estimate PCIe lanes based on CPU model
209  if (strstr(cpu->model_name, "Xeon") || strstr(cpu->model_name, "EPYC")) {
210  cpu->pcie_lanes_total = 64; // Server CPUs typically have more
211  cpu->pcie_generation = 4;
212  } else if (strstr(cpu->model_name, "i9") || strstr(cpu->model_name, "i7")) {
213  cpu->pcie_lanes_total = 20;
214  cpu->pcie_generation = cpu->has_avx512f ? 4 : 3;
215  } else {
216  cpu->pcie_lanes_total = 16;
217  cpu->pcie_generation = 3;
218  }
219 
220  return 0;
221 }
int logical_cores
bool has_avx512bw
char model_name[256]
int pcie_generation
bool has_vnni
bool has_avx512_bf16
float max_freq_mhz
char vendor[64]
int has_avx2
Definition: cpu_features.h:27
int cores_per_socket
bool has_amx_int8
int has_avx
Definition: cpu_features.h:26
int physical_cores
int has_avx512f
Definition: cpu_features.h:28
bool has_amx_bf16
int pcie_lanes_total
int threads_per_core
bool has_amx
bool has_avx512vl
float base_freq_mhz
bool has_sse4_2
bool has_amx_tile
static void trim_string(char *str)
static int has_cpu_flag(const char *flags, const char *flag)
int32_t id
Definition: tokenizer.h:315

References CPUInfo::base_freq_mhz, CPUInfo::cores_per_socket, CPUInfo::family, CPUInfo::has_amx, CPUInfo::has_amx_bf16, CPUInfo::has_amx_int8, CPUInfo::has_amx_tile, CPUInfo::has_avx, CPUInfo::has_avx2, CPUInfo::has_avx512_bf16, CPUInfo::has_avx512bw, CPUInfo::has_avx512f, CPUInfo::has_avx512vl, has_cpu_flag(), CPUInfo::has_sse4_2, CPUInfo::has_vnni, id, CPUInfo::logical_cores, MAX_CPUS, CPUInfo::max_freq_mhz, CPUInfo::model, CPUInfo::model_name, CPUInfo::pcie_generation, CPUInfo::pcie_lanes_total, CPUInfo::physical_cores, read_file_int(), CPUInfo::sockets, CPUInfo::stepping, CPUInfo::threads_per_core, trim_string(), and CPUInfo::vendor.

Referenced by topology_discover().

◆ topology_discover_memory()

int topology_discover_memory ( MemoryInfo mem)

Definition at line 553 of file system_topology.c.

553  {
554  memset(mem, 0, sizeof(*mem));
555 
556  // Basic memory info from /proc/meminfo
557  FILE *f = fopen("/proc/meminfo", "r");
558  if (f) {
559  char line[256];
560  while (fgets(line, sizeof(line), f)) {
561  uint64_t val;
562  if (sscanf(line, "MemTotal: %lu kB", &val) == 1) {
563  mem->total_mb = val / 1024;
564  } else if (sscanf(line, "MemAvailable: %lu kB", &val) == 1) {
565  mem->available_mb = val / 1024;
566  } else if (sscanf(line, "Cached: %lu kB", &val) == 1) {
567  mem->cached_mb = val / 1024;
568  }
569  }
570  fclose(f);
571  }
572 
573  // Try to get DIMM info via dmidecode (requires root)
574  char output[8192];
575  if (run_command("dmidecode -t memory 2>/dev/null", output, sizeof(output)) == 0 &&
576  strlen(output) > 100) {
577 
578  char *line = strtok(output, "\n");
579  MemorySlot *current_slot = NULL;
580 
581  while (line) {
582  trim_string(line);
583 
584  if (strstr(line, "Memory Device")) {
585  if (mem->num_slots < MAX_MEMORY_SLOTS) {
586  current_slot = &mem->slots[mem->num_slots++];
587  memset(current_slot, 0, sizeof(*current_slot));
588  current_slot->slot_number = mem->num_slots;
589  }
590  } else if (current_slot) {
591  uint64_t val;
592  int ival;
593  char str[64];
594 
595  if (sscanf(line, "Size: %lu MB", &val) == 1) {
596  current_slot->size_mb = val;
597  current_slot->populated = true;
598  mem->slots_populated++;
599  } else if (sscanf(line, "Size: %lu GB", &val) == 1) {
600  current_slot->size_mb = val * 1024;
601  current_slot->populated = true;
602  mem->slots_populated++;
603  } else if (strstr(line, "Size: No Module")) {
604  current_slot->populated = false;
605  } else if (sscanf(line, "Speed: %d MT/s", &ival) == 1 ||
606  sscanf(line, "Speed: %d MHz", &ival) == 1) {
607  current_slot->speed_mhz = ival;
608  if (mem->memory_speed_mhz == 0) mem->memory_speed_mhz = ival;
609  } else if (sscanf(line, "Type: %63s", str) == 1) {
610  strncpy(current_slot->type, str, sizeof(current_slot->type) - 1);
611  if (mem->memory_type[0] == '\0') {
612  strncpy(mem->memory_type, str, sizeof(mem->memory_type) - 1);
613  }
614  } else if (sscanf(line, "Locator: %63s", str) == 1) {
615  strncpy(current_slot->locator, str, sizeof(current_slot->locator) - 1);
616  } else if (sscanf(line, "Rank: %d", &ival) == 1) {
617  current_slot->rank = ival;
618  } else if (sscanf(line, "Data Width: %d bits", &ival) == 1) {
619  current_slot->data_width_bits = ival;
620  }
621  }
622 
623  line = strtok(NULL, "\n");
624  }
625  }
626 
627  // Estimate channel configuration
628  if (mem->slots_populated > 0) {
629  if (mem->slots_populated == 1) {
630  strcpy(mem->channel_config, "Single-channel");
631  mem->num_channels = 1;
632  mem->channels_populated = 1;
633  } else if (mem->slots_populated == 2) {
634  strcpy(mem->channel_config, "Dual-channel");
635  mem->num_channels = 2;
636  mem->channels_populated = 2;
637  } else if (mem->slots_populated == 4) {
638  strcpy(mem->channel_config, "Quad-channel");
639  mem->num_channels = 4;
640  mem->channels_populated = 4;
641  } else if (mem->slots_populated >= 6) {
642  strcpy(mem->channel_config, "Hexa-channel or more");
643  mem->num_channels = 6;
645  } else {
646  snprintf(mem->channel_config, sizeof(mem->channel_config),
647  "%d DIMMs", mem->slots_populated);
648  mem->num_channels = mem->slots_populated;
650  }
651 
652  // Estimate bandwidth
653  // DDR4: speed * 8 bytes * channels
654  // DDR5: speed * 8 bytes * channels (but DDR5 has 2 channels per DIMM)
655  float bytes_per_transfer = 8.0f;
656  if (strstr(mem->memory_type, "DDR5")) {
658  (mem->memory_speed_mhz * bytes_per_transfer * mem->channels_populated * 2) / 1000.0f;
659  } else {
661  (mem->memory_speed_mhz * bytes_per_transfer * mem->channels_populated) / 1000.0f;
662  }
663  }
664 
665  // Always measure actual bandwidth (quick ~0.5s test)
666  // Use extended version to get NUMA node and thread count for transparency
668  &mem->bw_test_numa_node,
669  &mem->bw_test_num_threads
670  );
671 
672  // If dmidecode didn't give us channel info, estimate from measured bandwidth
673  if (mem->slots_populated == 0 && mem->measured_bandwidth_gbs > 0) {
674  // Try to detect memory speed from /sys or assume DDR4-3200
675  if (mem->memory_speed_mhz == 0) {
676  // Common defaults: DDR4-2666, DDR4-3200, DDR5-4800
677  // Assume DDR4-3200 as a reasonable default
678  mem->memory_speed_mhz = 3200;
679  strcpy(mem->memory_type, "DDR4");
680  }
681 
682  // Estimate channels from measured bandwidth
685 
686  // Update channel config string
687  switch (mem->estimated_channels) {
688  case 1:
689  strcpy(mem->channel_config, "Single-channel (estimated)");
690  break;
691  case 2:
692  strcpy(mem->channel_config, "Dual-channel (estimated)");
693  break;
694  case 4:
695  strcpy(mem->channel_config, "Quad-channel (estimated)");
696  break;
697  case 6:
698  strcpy(mem->channel_config, "Hexa-channel (estimated)");
699  break;
700  case 8:
701  strcpy(mem->channel_config, "Octa-channel (estimated)");
702  break;
703  default:
704  snprintf(mem->channel_config, sizeof(mem->channel_config),
705  "%d-channel (estimated)", mem->estimated_channels);
706  }
707 
709  mem->num_channels = mem->estimated_channels;
710 
711  // Recalculate theoretical based on estimated channels
713  (mem->memory_speed_mhz * 8.0f * mem->estimated_channels) / 1000.0f;
714  }
715 
716  return 0;
717 }
float measured_bandwidth_gbs
int channels_populated
uint64_t total_mb
float theoretical_bandwidth_gbs
int estimated_channels
uint64_t available_mb
char memory_type[32]
char channel_config[64]
int bw_test_num_threads
uint64_t cached_mb
MemorySlot slots[16]
char type[32]
uint64_t size_mb
char locator[64]
int topology_estimate_channels_from_bandwidth(float measured_bw_gbs, int memory_speed_mhz, const char *memory_type)
static int run_command(const char *cmd, char *output, size_t output_size)
float topology_measure_memory_bandwidth_ex(int *numa_node_out, int *num_threads_out)
#define MAX_MEMORY_SLOTS

References MemoryInfo::available_mb, MemoryInfo::bw_test_num_threads, MemoryInfo::bw_test_numa_node, MemoryInfo::cached_mb, MemoryInfo::channel_config, MemoryInfo::channels_populated, MemorySlot::data_width_bits, MemoryInfo::estimated_channels, MemorySlot::locator, MAX_MEMORY_SLOTS, MemoryInfo::measured_bandwidth_gbs, MemoryInfo::memory_speed_mhz, MemoryInfo::memory_type, MemoryInfo::num_channels, MemoryInfo::num_slots, MemorySlot::populated, MemorySlot::rank, run_command(), MemorySlot::size_mb, MemorySlot::slot_number, MemoryInfo::slots, MemoryInfo::slots_populated, MemorySlot::speed_mhz, MemoryInfo::theoretical_bandwidth_gbs, topology_estimate_channels_from_bandwidth(), topology_measure_memory_bandwidth_ex(), MemoryInfo::total_mb, trim_string(), and MemorySlot::type.

Referenced by topology_discover().

◆ topology_discover_network()

int topology_discover_network ( NetworkTopology net)

Definition at line 832 of file system_topology.c.

832  {
833  memset(net, 0, sizeof(*net));
834 
835  const char *base = "/sys/class/net";
836  DIR *dir = opendir(base);
837  if (!dir) return -1;
838 
839  struct dirent *entry;
840  while ((entry = readdir(dir)) != NULL) {
841  if (entry->d_name[0] == '.') continue;
842  if (strcmp(entry->d_name, "lo") == 0) continue; // Skip loopback
843 
844  if (net->num_interfaces >= MAX_NICS) break;
845  NetworkInterface *nic = &net->interfaces[net->num_interfaces];
846  memset(nic, 0, sizeof(*nic));
847 
848  strncpy(nic->name, entry->d_name, sizeof(nic->name) - 1);
849 
850  char path[512];
851 
852  // Check if interface is up
853  snprintf(path, sizeof(path), "%s/%s/operstate", base, entry->d_name);
854  char state[32];
855  if (read_file_string(path, state, sizeof(state)) == 0) {
856  nic->is_up = (strcmp(state, "up") == 0);
857  }
858 
859  // Get speed
860  snprintf(path, sizeof(path), "%s/%s/speed", base, entry->d_name);
861  int speed = read_file_int(path);
862  if (speed > 0) {
863  nic->speed_mbps = speed;
864  nic->has_link = true;
865  }
866 
867  // Get MTU
868  snprintf(path, sizeof(path), "%s/%s/mtu", base, entry->d_name);
869  nic->mtu = read_file_int(path);
870 
871  // Get MAC address
872  snprintf(path, sizeof(path), "%s/%s/address", base, entry->d_name);
873  read_file_string(path, nic->mac_address, sizeof(nic->mac_address));
874 
875  // Get driver
876  snprintf(path, sizeof(path), "%s/%s/device/driver", base, entry->d_name);
877  char driver_link[512];
878  ssize_t len = readlink(path, driver_link, sizeof(driver_link) - 1);
879  if (len > 0) {
880  driver_link[len] = '\0';
881  char *driver_name = strrchr(driver_link, '/');
882  if (driver_name) {
883  strncpy(nic->driver, driver_name + 1, sizeof(nic->driver) - 1);
884  }
885  }
886 
887  // Check for InfiniBand
888  snprintf(path, sizeof(path), "/sys/class/infiniband/%s", entry->d_name);
889  if (access(path, F_OK) == 0) {
890  nic->is_infiniband = true;
891  nic->supports_rdma = true;
892  }
893 
894  // Check for RoCE capability
895  if (strstr(nic->driver, "mlx") || strstr(nic->driver, "bnxt") ||
896  strstr(nic->driver, "qed")) {
897  nic->supports_roce = true;
898  nic->supports_rdma = true;
899  }
900 
901  // Get PCI address
902  snprintf(path, sizeof(path), "%s/%s/device", base, entry->d_name);
903  char pci_link[512];
904  len = readlink(path, pci_link, sizeof(pci_link) - 1);
905  if (len > 0) {
906  pci_link[len] = '\0';
907  char *pci = strrchr(pci_link, '/');
908  if (pci) {
909  strncpy(nic->pci_address, pci + 1, sizeof(nic->pci_address) - 1);
910  }
911  }
912 
913  // Calculate bandwidth
914  float bandwidth = nic->speed_mbps / 8000.0f; // Mbps to GB/s
915 
916  if (bandwidth > net->max_bandwidth_gbs) {
917  net->max_bandwidth_gbs = bandwidth;
919  }
920 
921  if (nic->supports_rdma) {
922  net->has_rdma = true;
923  }
924 
925  net->num_interfaces++;
926  }
927  closedir(dir);
928 
929  return 0;
930 }
NetworkInterface interfaces[8]
#define MAX_NICS

References NetworkTopology::best_interface_idx, NetworkInterface::driver, NetworkInterface::has_link, NetworkTopology::has_rdma, NetworkTopology::interfaces, NetworkInterface::is_infiniband, NetworkInterface::is_up, NetworkInterface::mac_address, NetworkTopology::max_bandwidth_gbs, MAX_NICS, NetworkInterface::mtu, NetworkInterface::name, NetworkTopology::num_interfaces, NetworkInterface::pci_address, read_file_int(), read_file_string(), NetworkInterface::speed_mbps, NetworkInterface::supports_rdma, and NetworkInterface::supports_roce.

Referenced by topology_discover().

◆ topology_discover_numa()

int topology_discover_numa ( NUMATopology numa)

Definition at line 303 of file system_topology.c.

303  {
304  memset(numa, 0, sizeof(*numa));
305 
306  const char *base = "/sys/devices/system/node";
307  DIR *dir = opendir(base);
308  if (!dir) {
309  // No NUMA, single node system
310  numa->num_nodes = 1;
311  numa->nodes[0].node_id = 0;
312 
313  struct sysinfo si;
314  if (sysinfo(&si) == 0) {
315  numa->nodes[0].memory_total_mb = si.totalram / (1024 * 1024);
316  numa->nodes[0].memory_free_mb = si.freeram / (1024 * 1024);
317  }
318  return 0;
319  }
320 
321  struct dirent *entry;
322  while ((entry = readdir(dir)) != NULL) {
323  if (strncmp(entry->d_name, "node", 4) != 0) continue;
324  if (!isdigit(entry->d_name[4])) continue;
325 
326  int node_id = atoi(entry->d_name + 4);
327  if (node_id >= MAX_NUMA_NODES) continue;
328 
329  NUMANode *node = &numa->nodes[numa->num_nodes];
330  node->node_id = node_id;
331 
332  char path[512];
333 
334  // Memory info
335  snprintf(path, sizeof(path), "%s/%s/meminfo", base, entry->d_name);
336  FILE *f = fopen(path, "r");
337  if (f) {
338  char line[256];
339  while (fgets(line, sizeof(line), f)) {
340  uint64_t val;
341  if (sscanf(line, "Node %*d MemTotal: %lu kB", &val) == 1) {
342  node->memory_total_mb = val / 1024;
343  } else if (sscanf(line, "Node %*d MemFree: %lu kB", &val) == 1) {
344  node->memory_free_mb = val / 1024;
345  }
346  }
347  fclose(f);
348  }
349 
350  // CPU list
351  snprintf(path, sizeof(path), "%s/%s/cpulist", base, entry->d_name);
352  char cpulist[512];
353  if (read_file_string(path, cpulist, sizeof(cpulist)) == 0) {
354  // Parse CPU list (e.g., "0-7,16-23")
355  char *saveptr;
356  char *token = strtok_r(cpulist, ",", &saveptr);
357  while (token && node->num_cpus < MAX_CPUS) {
358  int start, end;
359  if (sscanf(token, "%d-%d", &start, &end) == 2) {
360  for (int i = start; i <= end && node->num_cpus < MAX_CPUS; i++) {
361  node->cpu_list[node->num_cpus++] = i;
362  }
363  } else if (sscanf(token, "%d", &start) == 1) {
364  node->cpu_list[node->num_cpus++] = start;
365  }
366  token = strtok_r(NULL, ",", &saveptr);
367  }
368  }
369 
370  numa->num_nodes++;
371  }
372  closedir(dir);
373 
374  // Read NUMA distances
375  char path[512];
376  snprintf(path, sizeof(path), "%s/node0/distance", base);
377  char dist_str[256];
378  if (read_file_string(path, dist_str, sizeof(dist_str)) == 0) {
379  char *saveptr;
380  char *token = strtok_r(dist_str, " ", &saveptr);
381  int col = 0;
382  while (token && col < numa->num_nodes) {
383  numa->distances[0][col++] = atoi(token);
384  token = strtok_r(NULL, " ", &saveptr);
385  }
386  }
387 
388  return 0;
389 }
int cpu_list[256]
uint64_t memory_free_mb
uint64_t memory_total_mb
int distances[8][8]
NUMANode nodes[8]
#define MAX_NUMA_NODES
const char * token
Definition: tokenizer.h:306
uint32_t end
Definition: utf8.c:215
uint32_t start
Definition: utf8.c:214

References NUMANode::cpu_list, NUMATopology::distances, end, MAX_CPUS, MAX_NUMA_NODES, NUMANode::memory_free_mb, NUMANode::memory_total_mb, NUMANode::node_id, NUMATopology::nodes, NUMANode::num_cpus, NUMATopology::num_nodes, read_file_string(), start, and token.

Referenced by topology_discover().

◆ topology_discover_pcie()

int topology_discover_pcie ( PCIeTopology pcie)

Definition at line 730 of file system_topology.c.

730  {
731  memset(pcie, 0, sizeof(*pcie));
732 
733  char output[32768];
734  if (run_command("lspci -vvv 2>/dev/null", output, sizeof(output)) != 0) {
735  return -1;
736  }
737 
738  PCIeDevice *current = NULL;
739  char *line = strtok(output, "\n");
740 
741  while (line) {
742  // New device line: "00:1f.0 ISA bridge: Intel..."
743  if (strlen(line) > 0 && isxdigit(line[0]) && line[2] == ':') {
744  if (pcie->num_devices < MAX_PCIE_DEVICES) {
745  current = &pcie->devices[pcie->num_devices++];
746  memset(current, 0, sizeof(*current));
747 
748  // Parse BDF
749  sscanf(line, "%x:%x.%x", &current->bus, &current->device, &current->function);
750 
751  // Get device name (after the type)
752  char *name_start = strchr(line, ':');
753  if (name_start) {
754  name_start = strchr(name_start + 1, ':');
755  if (name_start) {
756  name_start++;
757  while (*name_start == ' ') name_start++;
758  strncpy(current->device_name, name_start,
759  sizeof(current->device_name) - 1);
760  }
761  }
762 
763  // Check device type
764  current->is_gpu = (strstr(line, "VGA") != NULL ||
765  strstr(line, "3D controller") != NULL ||
766  strstr(line, "Display") != NULL);
767  current->is_nic = (strstr(line, "Ethernet") != NULL ||
768  strstr(line, "Network") != NULL ||
769  strstr(line, "InfiniBand") != NULL);
770  current->is_nvme = (strstr(line, "Non-Volatile memory") != NULL);
771  }
772  } else if (current) {
773  // Parse LnkCap and LnkSta for PCIe info
774  if (strstr(line, "LnkCap:")) {
775  char *speed = strstr(line, "Speed ");
776  char *width = strstr(line, "Width x");
777  if (speed) {
778  float gts;
779  if (sscanf(speed, "Speed %fGT/s", &gts) == 1) {
780  if (gts >= 64) current->link_speed_max = 6;
781  else if (gts >= 32) current->link_speed_max = 5;
782  else if (gts >= 16) current->link_speed_max = 4;
783  else if (gts >= 8) current->link_speed_max = 3;
784  else if (gts >= 5) current->link_speed_max = 2;
785  else current->link_speed_max = 1;
786  }
787  }
788  if (width) {
789  sscanf(width, "Width x%d", &current->link_width_max);
790  }
791  } else if (strstr(line, "LnkSta:")) {
792  char *speed = strstr(line, "Speed ");
793  char *width = strstr(line, "Width x");
794  if (speed) {
795  float gts;
796  if (sscanf(speed, "Speed %fGT/s", &gts) == 1) {
797  if (gts >= 64) current->link_speed = 6;
798  else if (gts >= 32) current->link_speed = 5;
799  else if (gts >= 16) current->link_speed = 4;
800  else if (gts >= 8) current->link_speed = 3;
801  else if (gts >= 5) current->link_speed = 2;
802  else current->link_speed = 1;
803  }
804  }
805  if (width) {
806  sscanf(width, "Width x%d", &current->link_width);
807  }
808  }
809  }
810 
811  line = strtok(NULL, "\n");
812  }
813 
814  // Calculate bandwidths and summary
815  for (int i = 0; i < pcie->num_devices; i++) {
816  PCIeDevice *d = &pcie->devices[i];
819 
820  if (d->link_width > 0) {
821  pcie->total_lanes_used += d->link_width;
822  }
823  }
824 
825  return 0;
826 }
char device_name[256]
float bandwidth_max_gbs
float bandwidth_gbs
PCIeDevice devices[32]
static float pcie_bandwidth_gbs(int gen, int width)
#define MAX_PCIE_DEVICES

References PCIeDevice::bandwidth_gbs, PCIeDevice::bandwidth_max_gbs, PCIeDevice::bus, PCIeDevice::device, PCIeDevice::device_name, PCIeTopology::devices, PCIeDevice::function, PCIeDevice::is_gpu, PCIeDevice::is_nic, PCIeDevice::is_nvme, PCIeDevice::link_speed, PCIeDevice::link_speed_max, PCIeDevice::link_width, PCIeDevice::link_width_max, MAX_PCIE_DEVICES, PCIeTopology::num_devices, pcie_bandwidth_gbs(), run_command(), and PCIeTopology::total_lanes_used.

Referenced by topology_discover().

◆ topology_estimate_memory_bandwidth()

float topology_estimate_memory_bandwidth ( const MemoryInfo mem)

Definition at line 1116 of file system_topology.c.

1116  {
1117  return mem->theoretical_bandwidth_gbs;
1118 }

References MemoryInfo::theoretical_bandwidth_gbs.

◆ topology_estimate_network_training_time()

float topology_estimate_network_training_time ( const NetworkTopology net,
uint64_t  model_size_mb 
)

Definition at line 1120 of file system_topology.c.

1121  {
1122  if (net->max_bandwidth_gbs <= 0) return -1;
1123 
1124  // Time to transfer model_size_mb in seconds
1125  // Account for protocol overhead (~10%)
1126  float effective_bw = net->max_bandwidth_gbs * 0.9f * 1024; // Convert to MB/s
1127  return model_size_mb / effective_bw;
1128 }

References NetworkTopology::max_bandwidth_gbs.

Referenced by topology_print_distributed_potential().

◆ topology_generate_recommendations()

int topology_generate_recommendations ( const SystemTopology topo,
RecommendationList recs 
)

Definition at line 1010 of file system_topology.c.

1011  {
1012  memset(recs, 0, sizeof(*recs));
1013 
1014  // Memory recommendations
1015  if (topo->memory.slots_populated > 0 &&
1016  topo->memory.slots_populated < topo->memory.num_slots) {
1017 
1018  Recommendation *r = &recs->recommendations[recs->num_recommendations++];
1021  strcpy(r->title, "Memory Slots Available");
1022  snprintf(r->description, sizeof(r->description),
1023  "Only %d of %d memory slots populated. Adding more DIMMs "
1024  "could increase memory bandwidth.",
1025  topo->memory.slots_populated, topo->memory.num_slots);
1026  snprintf(r->action, sizeof(r->action),
1027  "Consider adding %d more matching DIMMs for better bandwidth",
1028  topo->memory.num_slots - topo->memory.slots_populated);
1029  }
1030 
1031  // Single-channel warning
1032  if (topo->memory.channels_populated == 1 && topo->memory.num_slots > 1) {
1033  Recommendation *r = &recs->recommendations[recs->num_recommendations++];
1036  strcpy(r->title, "Single-Channel Memory");
1037  strcpy(r->description,
1038  "Running in single-channel mode significantly reduces memory bandwidth. "
1039  "This will impact training performance.");
1040  strcpy(r->action,
1041  "Add a second DIMM in the correct slot to enable dual-channel mode");
1042  }
1043 
1044  // Affinity recommendations
1045  if (!topo->affinity.affinity_set) {
1046  Recommendation *r = &recs->recommendations[recs->num_recommendations++];
1049  strcpy(r->title, "OpenMP Affinity Not Set");
1050  strcpy(r->description,
1051  "OpenMP thread affinity is not configured. Threads may migrate "
1052  "between cores causing cache misses and NUMA penalties.");
1053  strcpy(r->action,
1054  "export OMP_PROC_BIND=close OMP_PLACES=cores");
1055  }
1056 
1057  // Network recommendations
1058  if (topo->network.max_bandwidth_gbs < 1.0f) { // Less than 10 GbE
1059  Recommendation *r = &recs->recommendations[recs->num_recommendations++];
1062  strcpy(r->title, "Slow Network for Distributed Training");
1063  snprintf(r->description, sizeof(r->description),
1064  "Maximum network bandwidth is %.2f GB/s. This will be a "
1065  "significant bottleneck for distributed training.",
1066  topo->network.max_bandwidth_gbs);
1067  strcpy(r->action,
1068  "Consider upgrading to 10GbE+ or InfiniBand for distributed training");
1069  }
1070 
1071  // RDMA recommendation
1072  if (!topo->network.has_rdma && topo->network.num_interfaces > 0) {
1073  Recommendation *r = &recs->recommendations[recs->num_recommendations++];
1076  strcpy(r->title, "No RDMA-Capable NICs");
1077  strcpy(r->description,
1078  "No RDMA-capable network adapters detected. RDMA enables direct "
1079  "memory access between nodes, reducing latency for gradient sync.");
1080  strcpy(r->action,
1081  "Consider Mellanox ConnectX or Intel E810 for RDMA support");
1082  }
1083 
1084  // SIMD recommendations
1085  if (!topo->cpu.has_avx2) {
1086  Recommendation *r = &recs->recommendations[recs->num_recommendations++];
1089  strcpy(r->title, "Limited SIMD Support");
1090  strcpy(r->description,
1091  "CPU does not support AVX2. Kernel performance will be limited.");
1092  strcpy(r->action, "AVX2+ CPUs provide significantly better performance");
1093  }
1094 
1095  // NUMA warning for multi-socket
1096  if (topo->numa.num_nodes > 1) {
1097  Recommendation *r = &recs->recommendations[recs->num_recommendations++];
1100  strcpy(r->title, "Multi-NUMA System Detected");
1101  snprintf(r->description, sizeof(r->description),
1102  "System has %d NUMA nodes. Cross-node memory access is slower. "
1103  "Ensure data locality for best performance.",
1104  topo->numa.num_nodes);
1105  strcpy(r->action,
1106  "Use numactl --localalloc or NUMA-aware memory allocation");
1107  }
1108 
1109  return 0;
1110 }
Recommendation recommendations[32]
RecommendationPriority priority
RecommendationCategory category
char description[512]

References Recommendation::action, SystemTopology::affinity, AffinityInfo::affinity_set, Recommendation::category, MemoryInfo::channels_populated, SystemTopology::cpu, Recommendation::description, CPUInfo::has_avx2, NetworkTopology::has_rdma, NetworkTopology::max_bandwidth_gbs, SystemTopology::memory, SystemTopology::network, NetworkTopology::num_interfaces, NUMATopology::num_nodes, RecommendationList::num_recommendations, MemoryInfo::num_slots, SystemTopology::numa, Recommendation::priority, REC_CATEGORY_AFFINITY, REC_CATEGORY_CPU, REC_CATEGORY_MEMORY, REC_CATEGORY_NETWORK, REC_PRIORITY_HIGH, REC_PRIORITY_LOW, REC_PRIORITY_MEDIUM, RecommendationList::recommendations, MemoryInfo::slots_populated, and Recommendation::title.

Referenced by topology_print_summary().

◆ topology_print_affinity()

void topology_print_affinity ( const AffinityInfo aff)

Definition at line 528 of file show_config.c.

528  {
529  print_section("THREAD AFFINITY (OpenMP)");
530 
531  print_tree_item(0, 0, "OMP_NUM_THREADS: %d", aff->omp_num_threads);
532  print_tree_item(0, 0, "OMP_PROC_BIND: %s%s%s",
533  aff->affinity_set ? C(GREEN) : C(YELLOW),
534  aff->omp_proc_bind, C(RESET));
535  print_tree_item(0, 1, "OMP_PLACES: %s", aff->omp_places);
536 
537  if (!aff->affinity_set) {
538  printf("\n");
539  print_warning("Thread affinity not configured");
540  printf(" %sšŸ’” Recommendation:%s export OMP_PROC_BIND=close OMP_PLACES=cores\n",
541  C(CYAN), C(RESET));
542  }
543 }
#define C(color)
Definition: show_config.c:39
static void print_tree_item(int level, int is_last, const char *fmt,...)
Definition: show_config.c:78
static void print_section(const char *title)
Definition: show_config.c:73
static void print_warning(const char *msg)
Definition: show_config.c:91
#define RESET
Definition: show_config.c:20
#define YELLOW
Definition: show_config.c:26
#define GREEN
Definition: show_config.c:25
#define CYAN
Definition: show_config.c:29

References AffinityInfo::affinity_set, C, CYAN, GREEN, AffinityInfo::omp_num_threads, AffinityInfo::omp_places, AffinityInfo::omp_proc_bind, print_section(), print_tree_item(), print_warning(), RESET, and YELLOW.

Referenced by topology_print_summary().

◆ topology_print_cache()

void topology_print_cache ( const CacheTopology cache,
int  logical_cores 
)

Definition at line 169 of file show_config.c.

169  {
170  print_section("CACHE HIERARCHY");
171 
172  // Show data source
173  printf(" %sSource: /sys/devices/system/cpu/cpu0/cache/%s\n\n",
174  C(DIM), C(RESET));
175 
176  for (int i = 0; i < cache->num_levels; i++) {
177  const CacheInfo *c = &cache->levels[i];
178  int is_last = (i == cache->num_levels - 1);
179 
180  // Calculate number of instances (how many of this cache exist)
181  int instances = 1;
182  if (c->shared_by_cores > 0 && logical_cores > 0) {
183  instances = logical_cores / c->shared_by_cores;
184  if (instances < 1) instances = 1;
185  }
186 
187  // Calculate total size across all instances
188  int total_kb = c->size_kb * instances;
189 
190  // Format size nicely (KB or MB)
191  char size_str[32];
192  if (total_kb >= 1024) {
193  snprintf(size_str, sizeof(size_str), "%d MiB", total_kb / 1024);
194  } else {
195  snprintf(size_str, sizeof(size_str), "%d KiB", total_kb);
196  }
197 
198  // Format instance info like lscpu
199  char instance_str[32] = "";
200  if (instances > 1) {
201  snprintf(instance_str, sizeof(instance_str), " (%d instances)", instances);
202  } else {
203  snprintf(instance_str, sizeof(instance_str), " (%d instance)", instances);
204  }
205 
206  print_tree_item(0, is_last, "L%d%c: %s%s",
207  c->level,
208  c->type[0] == 'D' ? 'd' : (c->type[0] == 'I' ? 'i' : ' '),
209  size_str, instance_str);
210  }
211 }
#define DIM
Definition: show_config.c:22

References C, DIM, CacheInfo::level, CacheTopology::levels, CacheTopology::num_levels, print_section(), print_tree_item(), RESET, CacheInfo::shared_by_cores, CacheInfo::size_kb, and CacheInfo::type.

Referenced by topology_print_summary().

◆ topology_print_cpu()

void topology_print_cpu ( const CPUInfo cpu)

Definition at line 103 of file show_config.c.

103  {
104  print_section("CPU");
105 
106  printf(" %s%s%s\n", C(BOLD), cpu->model_name, C(RESET));
107 
108  // Build SIMD string with detailed AVX-512 sub-features
109  char simd_buf[256] = "";
110  if (cpu->has_avx512f) {
111  strcat(simd_buf, "AVX-512");
112  // Add AVX-512 sub-features in parentheses
113  char sub_features[64] = "";
114  if (cpu->has_avx512_bf16) strcat(sub_features, "BF16, ");
115  if (cpu->has_avx512bw) strcat(sub_features, "BW, ");
116  if (cpu->has_avx512vl) strcat(sub_features, "VL, ");
117  if (sub_features[0]) {
118  // Remove trailing ", "
119  sub_features[strlen(sub_features) - 2] = '\0';
120  strcat(simd_buf, " (");
121  strcat(simd_buf, sub_features);
122  strcat(simd_buf, ")");
123  }
124  strcat(simd_buf, " ");
125  } else if (cpu->has_avx2) {
126  strcat(simd_buf, "AVX2 ");
127  } else if (cpu->has_avx) {
128  strcat(simd_buf, "AVX ");
129  } else if (cpu->has_sse4_2) {
130  strcat(simd_buf, "SSE4.2 ");
131  }
132 
133  if (cpu->has_vnni) strcat(simd_buf, "VNNI ");
134 
135  // AMX details
136  if (cpu->has_amx) {
137  strcat(simd_buf, "AMX");
138  char amx_features[32] = "";
139  if (cpu->has_amx_bf16) strcat(amx_features, "BF16,");
140  if (cpu->has_amx_int8) strcat(amx_features, "INT8,");
141  if (amx_features[0]) {
142  amx_features[strlen(amx_features) - 1] = '\0'; // Remove trailing comma
143  strcat(simd_buf, "(");
144  strcat(simd_buf, amx_features);
145  strcat(simd_buf, ") ");
146  } else {
147  strcat(simd_buf, " ");
148  }
149  }
150 
151  print_tree_item(0, 0, "Sockets: %d", cpu->sockets);
152  print_tree_item(0, 0, "Cores: %d physical, %d logical %s",
153  cpu->physical_cores, cpu->logical_cores,
154  cpu->threads_per_core > 1 ? "(HT/SMT enabled)" : "");
155  print_tree_item(0, 0, "Frequency: %.0f MHz (max: %.0f MHz)",
156  cpu->base_freq_mhz, cpu->max_freq_mhz);
157  print_tree_item(0, 0, "SIMD: %s%s%s",
158  C(cpu->has_avx512f ? GREEN : (cpu->has_avx2 ? GREEN : YELLOW)),
159  simd_buf[0] ? simd_buf : "Basic",
160  C(RESET));
161  print_tree_item(0, 1, "PCIe: Gen %d, ~%d lanes from CPU",
162  cpu->pcie_generation, cpu->pcie_lanes_total);
163 
164  if (!cpu->has_avx2) {
165  print_warning("No AVX2 support - kernel performance will be limited");
166  }
167 }
#define BOLD
Definition: show_config.c:21

References CPUInfo::base_freq_mhz, BOLD, C, GREEN, CPUInfo::has_amx, CPUInfo::has_amx_bf16, CPUInfo::has_amx_int8, CPUInfo::has_avx, CPUInfo::has_avx2, CPUInfo::has_avx512_bf16, CPUInfo::has_avx512bw, CPUInfo::has_avx512f, CPUInfo::has_avx512vl, CPUInfo::has_sse4_2, CPUInfo::has_vnni, CPUInfo::logical_cores, CPUInfo::max_freq_mhz, CPUInfo::model_name, CPUInfo::pcie_generation, CPUInfo::pcie_lanes_total, CPUInfo::physical_cores, print_section(), print_tree_item(), print_warning(), RESET, CPUInfo::sockets, CPUInfo::threads_per_core, and YELLOW.

Referenced by topology_print_summary().

◆ topology_print_distributed_potential()

void topology_print_distributed_potential ( const SystemTopology topo)

Definition at line 584 of file show_config.c.

584  {
585  print_section("DISTRIBUTED TRAINING POTENTIAL");
586 
587  char mem_bw_buf[32], net_bw_buf[32];
588  format_bandwidth(topo->memory.theoretical_bandwidth_gbs, mem_bw_buf, sizeof(mem_bw_buf));
589  format_bandwidth(topo->network.max_bandwidth_gbs, net_bw_buf, sizeof(net_bw_buf));
590 
591  printf(" Single Node Capacity:\n");
592  print_tree_item(0, 0, "Compute: %d cores @ %s",
593  topo->cpu.physical_cores,
594  topo->cpu.has_avx512f ? "AVX-512" :
595  (topo->cpu.has_avx2 ? "AVX2" : "AVX"));
596  print_tree_item(0, 0, "Memory: %lu GB @ %s",
597  (unsigned long)(topo->memory.total_mb / 1024), mem_bw_buf);
598  print_tree_item(0, 1, "Network: %s", net_bw_buf);
599 
600  // Estimate sync times for various model sizes
601  printf("\n Estimated Gradient Sync Time (single allreduce):\n");
602 
603  uint64_t model_sizes[] = {100, 500, 1000, 7000}; // MB
604  const char *model_names[] = {"100 MB (BERT-base)", "500 MB (GPT-2)",
605  "1 GB (ResNet-50 batch)", "7 GB (LLaMA-7B)"};
606 
607  for (int i = 0; i < 4; i++) {
608  float sync_time = topology_estimate_network_training_time(
609  &topo->network, model_sizes[i]);
610 
611  const char *time_color = "";
612  if (sync_time < 0.1f) time_color = GREEN;
613  else if (sync_time < 1.0f) time_color = YELLOW;
614  else time_color = RED;
615 
616  printf(" %-25s %s%8.2f sec%s\n",
617  model_names[i], C(time_color), sync_time, C(RESET));
618  }
619 
620  // Multi-node projection
621  printf("\n Multi-Node Projection (assuming identical nodes):\n");
622  int nodes[] = {2, 4, 8, 16};
623  for (int i = 0; i < 4; i++) {
624  int n = nodes[i];
625  uint64_t total_mem = topo->memory.total_mb * n;
626  int total_cores = topo->cpu.physical_cores * n;
627 
628  char total_mem_buf[32];
629  format_size(total_mem, total_mem_buf, sizeof(total_mem_buf));
630 
631  printf(" %2d nodes: %4d cores, %s memory\n",
632  n, total_cores, total_mem_buf);
633  }
634 
635  // Ring-allreduce topology diagram
636  printf("\n Ring-AllReduce Topology (4 nodes):\n");
637  printf(" %sā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”%s\n", C(CYAN), C(RESET));
638  printf(" %s│ Node 0 │────→│ Node 1 │%s\n", C(CYAN), C(RESET));
639  printf(" %s│ Worker │ │ Worker │%s\n", C(CYAN), C(RESET));
640  printf(" %sā””ā”€ā”€ā”€ā”€ā†‘ā”€ā”€ā”€ā”€ā”˜ ā””ā”€ā”€ā”€ā”€ā”‚ā”€ā”€ā”€ā”€ā”˜%s\n", C(CYAN), C(RESET));
641  printf(" %s │ │ %s\n", C(CYAN), C(RESET));
642  printf(" %s │ ↓ %s\n", C(CYAN), C(RESET));
643  printf(" %sā”Œā”€ā”€ā”€ā”€ā”‚ā”€ā”€ā”€ā”€ā” ā”Œā”€ā”€ā”€ā”€ā†“ā”€ā”€ā”€ā”€ā”%s\n", C(CYAN), C(RESET));
644  printf(" %s│ Node 3 │←────│ Node 2 │%s\n", C(CYAN), C(RESET));
645  printf(" %s│ Worker │ │ Worker │%s\n", C(CYAN), C(RESET));
646  printf(" %sā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜%s\n", C(CYAN), C(RESET));
647 }
static const char * format_bandwidth(float bw_gbs, char *buf, size_t buf_size)
Definition: show_config.c:56
#define RED
Definition: show_config.c:24
static const char * format_size(uint64_t size_mb, char *buf, size_t buf_size)
Definition: show_config.c:45
float topology_estimate_network_training_time(const NetworkTopology *net, uint64_t model_size_mb)

References C, SystemTopology::cpu, CYAN, format_bandwidth(), format_size(), GREEN, CPUInfo::has_avx2, CPUInfo::has_avx512f, NetworkTopology::max_bandwidth_gbs, SystemTopology::memory, SystemTopology::network, CPUInfo::physical_cores, print_section(), print_tree_item(), RED, RESET, MemoryInfo::theoretical_bandwidth_gbs, topology_estimate_network_training_time(), MemoryInfo::total_mb, and YELLOW.

Referenced by topology_print_summary().

◆ topology_print_memory()

void topology_print_memory ( const MemoryInfo mem)

Definition at line 292 of file show_config.c.

292  {
293  print_section("MEMORY");
294 
295  // Show data sources
296  printf(" %sSource: /proc/meminfo, dmidecode (if root), STREAM benchmark%s\n\n",
297  C(DIM), C(RESET));
298 
299  char total_buf[32], avail_buf[32], theo_bw_buf[32], meas_bw_buf[32];
300  format_size(mem->total_mb, total_buf, sizeof(total_buf));
301  format_size(mem->available_mb, avail_buf, sizeof(avail_buf));
302  format_bandwidth(mem->theoretical_bandwidth_gbs, theo_bw_buf, sizeof(theo_bw_buf));
303  format_bandwidth(mem->measured_bandwidth_gbs, meas_bw_buf, sizeof(meas_bw_buf));
304 
305  printf(" %sTotal: %s%s (%s available)\n",
306  C(BOLD), total_buf, C(RESET), avail_buf);
307 
308  if (mem->memory_type[0]) {
309  print_tree_item(0, 0, "Type: %s @ %d MT/s", mem->memory_type, mem->memory_speed_mhz);
310  }
311 
312  print_tree_item(0, 0, "Configuration: %s", mem->channel_config);
313 
314  if (mem->num_slots > 0) {
315  print_tree_item(0, 0, "Slots: %d/%d populated",
316  mem->slots_populated, mem->num_slots);
317  }
318 
319  // Show bandwidth measurements with explanation
320  printf("\n %sBandwidth Analysis:%s\n", C(CYAN), C(RESET));
321 
322  // Theoretical bandwidth calculation
323  if (mem->memory_speed_mhz > 0 && mem->channels_populated > 0) {
324  printf(" %sā”œā”€ā”€ Theoretical: %d MT/s Ɨ 8 bytes Ɨ %d channel(s) = %s%s\n",
326  theo_bw_buf, C(RESET));
327 
328  // Show SNC relationship for multi-channel configs
329  if (mem->channels_populated >= 4) {
330  printf(" %s│ └── SNC potential: %d ch Ć· 2 = SNC2 (%d ch/node), Ć· 4 = SNC4 (%d ch/node)%s\n",
331  C(DIM), mem->channels_populated,
332  mem->channels_populated / 2,
333  mem->channels_populated / 4,
334  C(RESET));
335  } else if (mem->channels_populated >= 2) {
336  printf(" %s│ └── SNC potential: %d ch Ć· 2 = SNC2 (%d ch/node)%s\n",
337  C(DIM), mem->channels_populated,
338  mem->channels_populated / 2,
339  C(RESET));
340  }
341  } else {
342  printf(" %sā”œā”€ā”€ Theoretical: %s (estimated)%s\n",
343  C(DIM), theo_bw_buf, C(RESET));
344  }
345 
346  // Measured bandwidth with methodology
347  if (mem->measured_bandwidth_gbs > 0) {
348  float efficiency = (mem->theoretical_bandwidth_gbs > 0) ?
349  (mem->measured_bandwidth_gbs / mem->theoretical_bandwidth_gbs * 100.0f) : 0;
350 
351  printf(" %sā”œā”€ā”€ Measured: %s%s%s (%.0f%% efficiency)%s\n",
352  C(DIM),
353  C(mem->measured_bandwidth_gbs > 40 ? GREEN : YELLOW),
354  meas_bw_buf, C(RESET), efficiency, C(RESET));
355  printf(" %s│ Method: STREAM Triad (c[i] = a[i] + s*b[i])%s\n",
356  C(DIM), C(RESET));
357  printf(" %s│ Buffer: 256 MB Ɨ 3 arrays, 3 iterations%s\n",
358  C(DIM), C(RESET));
359  printf(" %s│ NUMA node: %d (memory allocated on this node)%s\n",
360  C(DIM), mem->bw_test_numa_node, C(RESET));
361  printf(" %s│ Threads: %d (OMP_NUM_THREADS)%s\n",
362  C(DIM), mem->bw_test_num_threads, C(RESET));
363  printf(" %s└── Formula: (256MB Ɨ 3 Ɨ 3) / time = GB/s%s\n",
364  C(DIM), C(RESET));
365  }
366 
367  // Show DIMM details if available
368  if (mem->num_slots > 0 && mem->slots[0].locator[0]) {
369  printf("\n DIMM Layout:\n");
370  for (int i = 0; i < mem->num_slots; i++) {
371  const MemorySlot *s = &mem->slots[i];
372  if (s->populated) {
373  char dimm_size[32];
374  format_size(s->size_mb, dimm_size, sizeof(dimm_size));
375  printf(" %s[%s]%s %s: %s%s @ %d MT/s%s\n",
376  C(GREEN), s->locator, C(RESET),
377  s->type, C(BOLD), dimm_size, s->speed_mhz, C(RESET));
378  } else {
379  printf(" %s[%s]%s EMPTY\n",
380  C(DIM), s->locator, C(RESET));
381  }
382  }
383  }
384 
385  if (mem->channels_populated == 1 && mem->num_slots > 1) {
386  print_warning("Single-channel mode - bandwidth reduced by ~50%%");
387  }
388 
389  if (mem->num_slots > 0 && mem->slots_populated < mem->num_slots) {
390  printf(" %sšŸ’” Tip:%s Add %d more DIMM(s) for better bandwidth\n",
391  C(CYAN), C(RESET), mem->num_slots - mem->slots_populated);
392  }
393 }

References MemoryInfo::available_mb, BOLD, MemoryInfo::bw_test_num_threads, MemoryInfo::bw_test_numa_node, C, MemoryInfo::channel_config, MemoryInfo::channels_populated, CYAN, DIM, format_bandwidth(), format_size(), GREEN, MemorySlot::locator, MemoryInfo::measured_bandwidth_gbs, MemoryInfo::memory_speed_mhz, MemoryInfo::memory_type, MemoryInfo::num_slots, MemorySlot::populated, print_section(), print_tree_item(), print_warning(), RESET, MemorySlot::size_mb, MemoryInfo::slots, MemoryInfo::slots_populated, MemorySlot::speed_mhz, MemoryInfo::theoretical_bandwidth_gbs, MemoryInfo::total_mb, MemorySlot::type, and YELLOW.

Referenced by topology_print_summary().

◆ topology_print_network()

void topology_print_network ( const NetworkTopology net)

Definition at line 458 of file show_config.c.

458  {
459  print_section("NETWORK INTERFACES");
460 
461  if (net->num_interfaces == 0) {
462  printf(" %sNo network interfaces detected%s\n", C(DIM), C(RESET));
463  return;
464  }
465 
466  char bw_buf[32];
467 
468  for (int i = 0; i < net->num_interfaces; i++) {
469  const NetworkInterface *n = &net->interfaces[i];
470 
471  const char *status_icon = n->is_up && n->has_link ? "āœ“" : "āœ—";
472  const char *status_color = n->is_up && n->has_link ? GREEN : RED;
473 
474  float bw_gbs = n->speed_mbps / 8000.0f;
475  format_bandwidth(bw_gbs, bw_buf, sizeof(bw_buf));
476 
477  printf(" %s%s%s %s%-10s%s ",
478  C(status_color), status_icon, C(RESET),
479  C(BOLD), n->name, C(RESET));
480 
481  if (n->has_link) {
482  // Color code speed
483  const char *speed_color = "";
484  if (n->speed_mbps >= 100000) speed_color = GREEN; // 100 GbE+
485  else if (n->speed_mbps >= 10000) speed_color = GREEN; // 10 GbE
486  else if (n->speed_mbps >= 1000) speed_color = YELLOW; // 1 GbE
487  else speed_color = RED; // < 1 GbE
488 
489  printf("%s%6lu Mbps%s (%s) ",
490  C(speed_color), (unsigned long)n->speed_mbps, C(RESET), bw_buf);
491  } else {
492  printf("%sno link %s ", C(RED), C(RESET));
493  }
494 
495  if (n->driver[0]) {
496  printf("%s[%s]%s ", C(DIM), n->driver, C(RESET));
497  }
498 
499  if (n->supports_rdma) {
500  printf("%s[RDMA]%s ", C(GREEN), C(RESET));
501  }
502  if (n->is_infiniband) {
503  printf("%s[IB]%s ", C(MAGENTA), C(RESET));
504  }
505 
506  printf("\n");
507  }
508 
509  // Network capability summary
510  printf("\n For Distributed Training:\n");
511 
512  if (net->max_bandwidth_gbs >= 12.5f) {
513  print_ok("100 GbE+ available - excellent for distributed training");
514  } else if (net->max_bandwidth_gbs >= 1.25f) {
515  printf(" %sāœ“%s 10 GbE available - good for small clusters\n",
516  C(GREEN), C(RESET));
517  } else if (net->max_bandwidth_gbs >= 0.125f) {
518  print_warning("Only 1 GbE - significant bottleneck for distributed training");
519  } else {
520  print_warning("Very slow network - distributed training not recommended");
521  }
522 
523  if (net->has_rdma) {
524  print_ok("RDMA capable NIC detected - low-latency gradient sync possible");
525  }
526 }
static void print_ok(const char *msg)
Definition: show_config.c:95
#define MAGENTA
Definition: show_config.c:28

References BOLD, C, DIM, NetworkInterface::driver, format_bandwidth(), GREEN, NetworkInterface::has_link, NetworkTopology::has_rdma, NetworkTopology::interfaces, NetworkInterface::is_infiniband, NetworkInterface::is_up, MAGENTA, NetworkTopology::max_bandwidth_gbs, NetworkInterface::name, NetworkTopology::num_interfaces, print_ok(), print_section(), print_warning(), RED, RESET, NetworkInterface::speed_mbps, NetworkInterface::supports_rdma, and YELLOW.

Referenced by topology_print_summary().

◆ topology_print_numa()

void topology_print_numa ( const NUMATopology numa,
int  sockets 
)

Definition at line 213 of file show_config.c.

213  {
214  print_section("NUMA TOPOLOGY");
215 
216  // Show source
217  printf(" %sSource: /sys/devices/system/node/%s\n", C(DIM), C(RESET));
218 
219  // Single NUMA node - UMA system
220  if (numa->num_nodes <= 1) {
221  printf("\n %sāœ“ Single NUMA node (Uniform Memory Access)%s\n", C(GREEN), C(RESET));
222  printf(" %s All memory is local - no NUMA penalties%s\n", C(DIM), C(RESET));
223  printf("\n %sNote: Sub-NUMA Clustering (SNC) / NUMA-Per-Socket (NPS) not detected.%s\n",
224  C(DIM), C(RESET));
225  printf(" %s On Xeon/EPYC, check BIOS settings or run: numactl --hardware%s\n",
226  C(DIM), C(RESET));
227  return;
228  }
229 
230  // Detect potential Sub-NUMA Clustering (SNC) or NUMA-Per-Socket (NPS)
231  // If num_nodes > sockets, SNC/NPS is likely enabled
232  // SNC divides each socket's memory channels into groups, one per sub-NUMA node
233  if (sockets > 0 && numa->num_nodes > sockets) {
234  int nodes_per_socket = numa->num_nodes / sockets;
235  printf("\n %s⚠ Sub-NUMA detected: %d NUMA nodes on %d socket(s) = SNC%d or NPS%d%s\n",
236  C(YELLOW), numa->num_nodes, sockets, nodes_per_socket, nodes_per_socket, C(RESET));
237  printf(" %s Intel: Sub-NUMA Clustering (SNC) | AMD: NUMA-Per-Socket (NPS)%s\n",
238  C(DIM), C(RESET));
239  printf(" %s Each sub-node has its own memory channels for lower latency%s\n",
240  C(DIM), C(RESET));
241  } else if (sockets > 1) {
242  // Multi-socket without SNC
243  printf("\n %sMulti-socket system: %d sockets, %d NUMA nodes%s\n",
244  C(CYAN), sockets, numa->num_nodes, C(RESET));
245  printf(" %s SNC/NPS not enabled - each socket is one NUMA node%s\n",
246  C(DIM), C(RESET));
247  printf(" %s šŸ’” Enable SNC in BIOS to partition channels for lower latency%s\n",
248  C(DIM), C(RESET));
249  }
250 
251  printf("\n");
252  char size_buf[32];
253 
254  for (int i = 0; i < numa->num_nodes; i++) {
255  const NUMANode *n = &numa->nodes[i];
256  int is_last = (i == numa->num_nodes - 1);
257 
258  format_size(n->memory_total_mb, size_buf, sizeof(size_buf));
259  print_tree_item(0, is_last, "Node %d: %s, CPUs %d-%d",
260  n->node_id, size_buf,
261  n->cpu_list[0],
262  n->cpu_list[n->num_cpus - 1]);
263  }
264 
265  // Print distance matrix if more than 2 nodes
266  if (numa->num_nodes >= 2 && numa->distances[0][1] > 0) {
267  printf("\n NUMA Distances (10=local, higher=remote):\n");
268  printf(" ");
269  for (int i = 0; i < numa->num_nodes; i++) {
270  printf(" N%d ", i);
271  }
272  printf("\n");
273  for (int i = 0; i < numa->num_nodes; i++) {
274  printf(" N%d", i);
275  for (int j = 0; j < numa->num_nodes; j++) {
276  int dist = numa->distances[i][j];
277  if (dist == 10) {
278  printf(" %s%2d%s ", C(GREEN), dist, C(RESET));
279  } else {
280  printf(" %s%2d%s ", C(YELLOW), dist, C(RESET));
281  }
282  }
283  printf("\n");
284  }
285  }
286 
287  // Tip for accurate per-node bandwidth
288  printf("\n %sšŸ’” Per-node bandwidth: numactl --cpunodebind=0 --membind=0 ./build/show_config%s\n",
289  C(CYAN), C(RESET));
290 }

References C, NUMANode::cpu_list, CYAN, DIM, NUMATopology::distances, format_size(), GREEN, NUMANode::memory_total_mb, NUMANode::node_id, NUMATopology::nodes, NUMANode::num_cpus, NUMATopology::num_nodes, print_section(), print_tree_item(), RESET, and YELLOW.

Referenced by topology_print_summary().

◆ topology_print_pcie()

void topology_print_pcie ( const PCIeTopology pcie)

Definition at line 395 of file show_config.c.

395  {
396  print_section("PCIe DEVICES");
397 
398  int gpu_count = 0, nic_count = 0, nvme_count = 0;
399  char bw_buf[32];
400 
401  for (int i = 0; i < pcie->num_devices; i++) {
402  const PCIeDevice *d = &pcie->devices[i];
403 
404  // Skip bridges and other infrastructure devices
405  if (d->link_width == 0) continue;
406  if (strstr(d->device_name, "bridge") ||
407  strstr(d->device_name, "Bridge") ||
408  strstr(d->device_name, "Host") ||
409  strstr(d->device_name, "PCI")) continue;
410 
411  const char *type_icon = " ";
412  const char *type_color = "";
413  if (d->is_gpu) {
414  type_icon = "šŸŽ® ";
415  type_color = GREEN;
416  gpu_count++;
417  } else if (d->is_nic) {
418  type_icon = "🌐 ";
419  type_color = CYAN;
420  nic_count++;
421  } else if (d->is_nvme) {
422  type_icon = "šŸ’¾ ";
423  type_color = MAGENTA;
424  nvme_count++;
425  }
426 
427  format_bandwidth(d->bandwidth_gbs, bw_buf, sizeof(bw_buf));
428 
429  // Truncate long device names
430  char name[48];
431  strncpy(name, d->device_name, sizeof(name) - 1);
432  name[sizeof(name) - 1] = '\0';
433  if (strlen(d->device_name) > 45) {
434  strcpy(name + 42, "...");
435  }
436 
437  printf(" %s%s%s%-45s%s x%d Gen%d %s%s%s",
438  type_icon, C(type_color), C(BOLD), name, C(RESET),
439  d->link_width, d->link_speed,
440  C(DIM), bw_buf, C(RESET));
441 
442  // Show if not running at max capability
443  if (d->link_width < d->link_width_max || d->link_speed < d->link_speed_max) {
444  printf(" %s(capable: x%d Gen%d)%s",
446  }
447  printf("\n");
448  }
449 
450  if (gpu_count == 0 && nic_count == 0 && nvme_count == 0) {
451  printf(" %sNo significant PCIe devices detected%s\n", C(DIM), C(RESET));
452  }
453 
454  printf("\n Summary: %d GPU(s), %d NIC(s), %d NVMe(s)\n",
455  gpu_count, nic_count, nvme_count);
456 }

References PCIeDevice::bandwidth_gbs, BOLD, C, CYAN, PCIeDevice::device_name, PCIeTopology::devices, DIM, format_bandwidth(), GREEN, PCIeDevice::is_gpu, PCIeDevice::is_nic, PCIeDevice::is_nvme, PCIeDevice::link_speed, PCIeDevice::link_speed_max, PCIeDevice::link_width, PCIeDevice::link_width_max, MAGENTA, PCIeTopology::num_devices, print_section(), RESET, and YELLOW.

Referenced by topology_print_summary().

◆ topology_print_recommendations()

void topology_print_recommendations ( const RecommendationList recs)

Definition at line 545 of file show_config.c.

545  {
546  if (recs->num_recommendations == 0) {
547  print_section("RECOMMENDATIONS");
548  print_ok("No significant issues detected!");
549  return;
550  }
551 
552  print_section("RECOMMENDATIONS");
553 
554  for (int i = 0; i < recs->num_recommendations; i++) {
555  const Recommendation *r = &recs->recommendations[i];
556 
557  const char *priority_icon = "";
558  const char *priority_color = "";
559  switch (r->priority) {
561  priority_icon = "šŸ”“";
562  priority_color = RED;
563  break;
564  case REC_PRIORITY_HIGH:
565  priority_icon = "🟠";
566  priority_color = RED;
567  break;
568  case REC_PRIORITY_MEDIUM:
569  priority_icon = "🟔";
570  priority_color = YELLOW;
571  break;
572  case REC_PRIORITY_LOW:
573  priority_icon = "🟢";
574  priority_color = GREEN;
575  break;
576  }
577 
578  printf("\n %s %s%s%s\n", priority_icon, C(priority_color), r->title, C(RESET));
579  printf(" %s\n", r->description);
580  printf(" %s→ %s%s\n", C(CYAN), r->action, C(RESET));
581  }
582 }

References Recommendation::action, C, CYAN, Recommendation::description, GREEN, RecommendationList::num_recommendations, print_ok(), print_section(), Recommendation::priority, REC_PRIORITY_CRITICAL, REC_PRIORITY_HIGH, REC_PRIORITY_LOW, REC_PRIORITY_MEDIUM, RecommendationList::recommendations, RED, RESET, Recommendation::title, and YELLOW.

Referenced by topology_print_summary().

◆ topology_print_summary()

void topology_print_summary ( const SystemTopology topo)

Definition at line 649 of file show_config.c.

649  {
650  print_header("C-Kernel-Engine System Configuration");
651 
652  printf(" %sHostname:%s %s\n", C(DIM), C(RESET), topo->hostname);
653  printf(" %sKernel:%s %s\n", C(DIM), C(RESET), topo->kernel_version);
654  if (!topo->has_root_access) {
655  printf(" %sNote:%s Running without root - some info may be unavailable\n",
656  C(YELLOW), C(RESET));
657  }
658 
659  topology_print_cpu(&topo->cpu);
661  topology_print_numa(&topo->numa, topo->cpu.sockets);
663  topology_print_pcie(&topo->pcie);
666 
667  RecommendationList recs;
670 
672 
673  printf("\n");
674 }
void topology_print_memory(const MemoryInfo *mem)
Definition: show_config.c:292
void topology_print_network(const NetworkTopology *net)
Definition: show_config.c:458
void topology_print_pcie(const PCIeTopology *pcie)
Definition: show_config.c:395
void topology_print_distributed_potential(const SystemTopology *topo)
Definition: show_config.c:584
void topology_print_numa(const NUMATopology *numa, int sockets)
Definition: show_config.c:213
void topology_print_cpu(const CPUInfo *cpu)
Definition: show_config.c:103
void topology_print_affinity(const AffinityInfo *aff)
Definition: show_config.c:528
void topology_print_recommendations(const RecommendationList *recs)
Definition: show_config.c:545
static void print_header(const char *title)
Definition: show_config.c:65
void topology_print_cache(const CacheTopology *cache, int logical_cores)
Definition: show_config.c:169
int topology_generate_recommendations(const SystemTopology *topo, RecommendationList *recs)

References SystemTopology::affinity, C, SystemTopology::cache, SystemTopology::cpu, DIM, SystemTopology::has_root_access, SystemTopology::hostname, SystemTopology::kernel_version, CPUInfo::logical_cores, SystemTopology::memory, SystemTopology::network, SystemTopology::numa, SystemTopology::pcie, print_header(), RESET, CPUInfo::sockets, topology_generate_recommendations(), topology_print_affinity(), topology_print_cache(), topology_print_cpu(), topology_print_distributed_potential(), topology_print_memory(), topology_print_network(), topology_print_numa(), topology_print_pcie(), topology_print_recommendations(), and YELLOW.

Referenced by main().