← Back to C-Kernel-Engine Docs Doxygen Source Documentation
backend_native.c
Go to the documentation of this file.
1 /**
2  * Native GEMM backend that directly reuses the C-Transformer GEMM kernel.
3  *
4  * Layout assumptions (identical to C-Transformer/main.c):
5  * - A: [M x K], row-major, A(i,k) = A[i*K + k]
6  * - B: [N x K], row-major, B(j,k) = B[j*K + k]
7  * - C: [M x N], row-major, C(i,j) = C[i*N + j]
8  *
9  * This is a straight copy of gemm_blocked_serial with a thin wrapper to match
10  * the CKMathBackend.sgemm signature. It is intentionally minimal.
11  */
12 
13 #include "ckernel_engine.h"
14 
15 // Thin wrapper matching CKMathBackend.sgemm. For now we deliberately assume
16 // lda = K, ldb = K, ldc = N (the dense LLM layouts) and ignore the lda/ldb/ldc
17 // parameters to keep the implementation identical to the original kernel.
18 static void ckernel_sgemm_native(int M, int N, int K,
19  const float *A, int lda,
20  const float *B, int ldb,
21  const float *bias,
22  float *C, int ldc)
23 {
24  /* Honor caller-provided strides so padded matrices still compute correctly. */
25  for (int i = 0; i < M; ++i) {
26  const float *a_row = A + (size_t)i * lda;
27  float *c_row = C + (size_t)i * ldc;
28  for (int j = 0; j < N; ++j) {
29  float sum = bias ? bias[j] : 0.0f;
30  const float *b_row = B + (size_t)j * ldb;
31  for (int k = 0; k < K; ++k) {
32  sum += a_row[k] * b_row[k];
33  }
34  c_row[j] = sum;
35  }
36  }
37 }
38 
40 {
41  CKMathBackend b;
43  return b;
44 }
CKMathBackend ckernel_backend_native(void)
static void ckernel_sgemm_native(int M, int N, int K, const float *A, int lda, const float *B, int ldb, const float *bias, float *C, int ldc)
#define C(color)
Definition: show_config.c:39
void(* sgemm)(int M, int N, int K, const float *A, int lda, const float *B, int ldb, const float *bias, float *C, int ldc)