LayerNorm forward/backward kernels with SIMD (SSE/AVX/AVX512) More...
#include <math.h>Go to the source code of this file.
Functions | |
| void | layernorm_backward_kernel (const float *d_output, const float *input, const float *gamma, const float *mean, const float *rstd, float *d_input, float *d_gamma, float *d_beta, int tokens, int d_model, int aligned_embed_dim) |
| void | layernorm_forward_rolled_slice (const float *__restrict input_slice_base, const float *__restrict gamma, const float *__restrict beta, float *__restrict output_slice_base, float *__restrict mean_cache_slice, float *__restrict rstd_cache_slice, int num_tokens_in_slice, int d_model, int aligned_embed_dim, float eps) |
| void | layernorm_forward_unrolled_slice (const float *__restrict input_slice_base, const float *__restrict gamma, const float *__restrict beta, float *__restrict output_slice_base, float *__restrict mean_cache_slice, float *__restrict rstd_cache_slice, int num_tokens_in_slice, int d_model, float eps) |
| static void | layernorm_forward_unrolled_slice_scalar (const float *__restrict input_slice_base, const float *__restrict gamma, const float *__restrict beta, float *__restrict output_slice_base, float *__restrict mean_cache_slice, float *__restrict rstd_cache_slice, int num_tokens_in_slice, int d_model, float eps) |
| void | layernorm_naive_serial (const float *input, const float *gamma, const float *beta, float *output, float *mean_cache, float *rstd_cache, int tokens, int d_model, int aligned_embed_dim, float eps) |
| void | layernorm_naive_serial_matched_precision (const float *input, const float *gamma, const float *beta, float *output, float *mean_cache, float *rstd_cache, int tokens, int d_model, float eps) |
| static void | zero_layernorm_padding (float *out_ptr, int d_model, int aligned_embed_dim) |
LayerNorm forward/backward kernels with SIMD (SSE/AVX/AVX512)
After changes: make test && make llamacpp-parity-full
LayerNorm: y = gamma * (x - mean) / sqrt(var + eps) + beta
Definition in file layernorm_kernels.c.
| void layernorm_backward_kernel | ( | const float * | d_output, |
| const float * | input, | ||
| const float * | gamma, | ||
| const float * | mean, | ||
| const float * | rstd, | ||
| float * | d_input, | ||
| float * | d_gamma, | ||
| float * | d_beta, | ||
| int | tokens, | ||
| int | d_model, | ||
| int | aligned_embed_dim | ||
| ) |
| void layernorm_forward_rolled_slice | ( | const float *__restrict | input_slice_base, |
| const float *__restrict | gamma, | ||
| const float *__restrict | beta, | ||
| float *__restrict | output_slice_base, | ||
| float *__restrict | mean_cache_slice, | ||
| float *__restrict | rstd_cache_slice, | ||
| int | num_tokens_in_slice, | ||
| int | d_model, | ||
| int | aligned_embed_dim, | ||
| float | eps | ||
| ) |
Definition at line 274 of file layernorm_kernels.c.
References layernorm_naive_serial().
Referenced by layernorm_forward_rolled_slice_bf16().
| void layernorm_forward_unrolled_slice | ( | const float *__restrict | input_slice_base, |
| const float *__restrict | gamma, | ||
| const float *__restrict | beta, | ||
| float *__restrict | output_slice_base, | ||
| float *__restrict | mean_cache_slice, | ||
| float *__restrict | rstd_cache_slice, | ||
| int | num_tokens_in_slice, | ||
| int | d_model, | ||
| float | eps | ||
| ) |
Definition at line 598 of file layernorm_kernels.c.
References layernorm_forward_unrolled_slice_scalar().
Referenced by layernorm_forward_unrolled_slice_bf16().
|
static |
Definition at line 582 of file layernorm_kernels.c.
References layernorm_naive_serial_matched_precision().
Referenced by layernorm_forward_unrolled_slice().
| void layernorm_naive_serial | ( | const float * | input, |
| const float * | gamma, | ||
| const float * | beta, | ||
| float * | output, | ||
| float * | mean_cache, | ||
| float * | rstd_cache, | ||
| int | tokens, | ||
| int | d_model, | ||
| int | aligned_embed_dim, | ||
| float | eps | ||
| ) |
| void layernorm_naive_serial_matched_precision | ( | const float * | input, |
| const float * | gamma, | ||
| const float * | beta, | ||
| float * | output, | ||
| float * | mean_cache, | ||
| float * | rstd_cache, | ||
| int | tokens, | ||
| int | d_model, | ||
| float | eps | ||
| ) |
Definition at line 624 of file layernorm_kernels.c.
Referenced by layernorm_forward_unrolled_slice_scalar().
|
inlinestatic |
Definition at line 22 of file layernorm_kernels.c.