← Back to C-Kernel-Engine Docs Doxygen Source Documentation
gemv_omp.h
Go to the documentation of this file.
1 #pragma once
2 
3 /*
4  * OpenMP-parallel GEMV variants (same signatures as serial counterparts).
5  *
6  * WARNING: These use #pragma omp parallel for which has high fork/join
7  * overhead (~50-200us per call). During inference this makes them SLOWER
8  * than the serial versions. Measured 1.9x slower on real workloads.
9  *
10  * Do NOT use for inference. These exist for:
11  * - Correctness reference (numerically identical to serial)
12  * - Future conversion to a pthread thread pool (persistent threads,
13  * ~2-5us dispatch overhead instead of 50-200us fork/join)
14  *
15  * The serial kernels in gemm_kernels_q8_0.c / gemm_kernels_q5_0.c are
16  * faster for all current use cases.
17  */
18 
19 void gemv_q8_0_q8_0_parallel_omp(float *y, const void *W, const void *x_q8, int M, int K);
20 void gemv_q5_0_q8_0_parallel_omp(float *y, const void *W, const void *x_q8, int M, int K);
21 void gemv_fused_q5_0_bias_parallel_omp(float *y, const void *W, const float *x,
22  const float *bias, int M, int K);
void gemv_fused_q5_0_bias_parallel_omp(float *y, const void *W, const float *x, const float *bias, int M, int K)
Definition: gemv_omp.c:96
void gemv_q5_0_q8_0_parallel_omp(float *y, const void *W, const void *x_q8, int M, int K)
Definition: gemv_omp.c:72
void gemv_q8_0_q8_0_parallel_omp(float *y, const void *W, const void *x_q8, int M, int K)
Definition: gemv_omp.c:50