GEMM Microkernel - High-Performance Register-Blocked Matrix Multiplication. More...
#include "ckernel_engine.h"#include "cpu_features.h"#include <string.h>#include <stdlib.h>#include <stdio.h>Go to the source code of this file.
Macros | |
| #define | GEMM_BACKEND "Native" |
| #define | KC (get_gemm_params()->KC) |
| #define | MC (get_gemm_params()->MC) |
| #define | MR (MR_FIXED) |
| #define | MR_FIXED 4 |
| #define | NC (get_gemm_params()->NC) |
| #define | NR (NR_FIXED) |
| #define | NR_FIXED 4 |
| #define | PACK_THRESHOLD 256 |
Functions | |
| const char * | gemm_get_backend (void) |
| static void | gemm_init_threads (void) |
| void | gemm_microkernel (const float *A, const float *B, float *C, int M, int N, int K, int B_transposed) |
| void | gemm_microkernel_blocked (const float *A, const float *B, float *C, int M, int N, int K) |
| void | gemm_microkernel_blocked_bt (const float *A, const float *B, float *C, int M, int N, int K) |
| static void | gemm_microkernel_edge (int m, int n, int K, const float *A, int lda, const float *B, int ldb, float *C, int ldc, int first_k) |
| void | gemm_microkernel_packed (const float *A, const float *B, float *C, int M, int N, int K) |
| static void | gemm_microkernel_sequential (const float *A, const float *B, float *C, int M, int N, int K) |
| static void | pack_a_panel (const float *A, int lda, float *Ap, int mc, int kc, int mr) |
| static void | pack_b_panel (const float *B, int ldb, float *Bp, int kc, int nc, int nr) |
Variables | |
| static int | g_threads_initialized = 0 |
GEMM Microkernel - High-Performance Register-Blocked Matrix Multiplication.
After changes: make test && make llamacpp-parity-full
This file implements optimized GEMM microkernels with multiple backends:
Build with: make USE_MKL=1 # Use Intel MKL make USE_ONEDNN=1 # Use Intel oneDNN make # Use native kernels
Layout: C[M,N] = A[M,K] @ B[K,N] (row-major)
Definition in file gemm_microkernel.c.
| #define GEMM_BACKEND "Native" |
Definition at line 45 of file gemm_microkernel.c.
| #define KC (get_gemm_params()->KC) |
Definition at line 230 of file gemm_microkernel.c.
| #define MC (get_gemm_params()->MC) |
Definition at line 228 of file gemm_microkernel.c.
| #define MR (MR_FIXED) |
Definition at line 226 of file gemm_microkernel.c.
| #define MR_FIXED 4 |
Definition at line 221 of file gemm_microkernel.c.
| #define NC (get_gemm_params()->NC) |
Definition at line 229 of file gemm_microkernel.c.
| #define NR (NR_FIXED) |
Definition at line 227 of file gemm_microkernel.c.
| #define NR_FIXED 4 |
Definition at line 222 of file gemm_microkernel.c.
| #define PACK_THRESHOLD 256 |
Definition at line 1132 of file gemm_microkernel.c.
| const char* gemm_get_backend | ( | void | ) |
|
static |
Definition at line 915 of file gemm_microkernel.c.
References g_threads_initialized, get_cpu_info(), and CPUInfo::num_cores.
Referenced by gemm_microkernel_blocked().
| void gemm_microkernel | ( | const float * | A, |
| const float * | B, | ||
| float * | C, | ||
| int | M, | ||
| int | N, | ||
| int | K, | ||
| int | B_transposed | ||
| ) |
Definition at line 1134 of file gemm_microkernel.c.
References C, gemm_microkernel_blocked(), gemm_microkernel_blocked_bt(), gemm_microkernel_packed(), and PACK_THRESHOLD.
Referenced by gemm_blocked_serial().
| void gemm_microkernel_blocked | ( | const float * | A, |
| const float * | B, | ||
| float * | C, | ||
| int | M, | ||
| int | N, | ||
| int | K | ||
| ) |
Definition at line 934 of file gemm_microkernel.c.
References C, gemm_init_threads(), gemm_microkernel_edge(), gemm_microkernel_sequential(), KC, MR, and NR.
Referenced by gemm_microkernel(), and gemm_microkernel_packed().
| void gemm_microkernel_blocked_bt | ( | const float * | A, |
| const float * | B, | ||
| float * | C, | ||
| int | M, | ||
| int | N, | ||
| int | K | ||
| ) |
Definition at line 1058 of file gemm_microkernel.c.
References C, KC, MC, MR, NC, and NR.
Referenced by gemm_microkernel().
|
static |
Definition at line 737 of file gemm_microkernel.c.
References C.
Referenced by gemm_microkernel_blocked(), and gemm_microkernel_sequential().
| void gemm_microkernel_packed | ( | const float * | A, |
| const float * | B, | ||
| float * | C, | ||
| int | M, | ||
| int | N, | ||
| int | K | ||
| ) |
Definition at line 840 of file gemm_microkernel.c.
References C, and gemm_microkernel_blocked().
Referenced by gemm_microkernel().
|
static |
Definition at line 862 of file gemm_microkernel.c.
References C, gemm_microkernel_edge(), KC, MR, and NR.
Referenced by gemm_microkernel_blocked().
|
static |
Definition at line 761 of file gemm_microkernel.c.
|
static |
Definition at line 795 of file gemm_microkernel.c.
|
static |
Definition at line 912 of file gemm_microkernel.c.
Referenced by gemm_init_threads().