GELU activation kernels for BF16 tensors. More...
Go to the source code of this file.
Functions | |
| void | gelu_backward_exact_bf16 (const uint16_t *input, const uint16_t *d_output, uint16_t *d_input, size_t n, float *scratch_input, float *scratch_d_output, float *scratch_d_input) |
| void | gelu_backward_fast_bf16 (const uint16_t *input, const uint16_t *d_output, uint16_t *d_input, size_t n, float *scratch_input, float *scratch_d_output, float *scratch_d_input) |
| void | gelu_fast_inplace_bf16 (uint16_t *data, size_t n, float *scratch) |
GELU activation kernels for BF16 tensors.
After changes: make test && make llamacpp-parity-full
GELU: y = x * 0.5 * (1 + erf(x / sqrt(2)))
Definition in file gelu_kernels_bf16.c.
| void gelu_backward_exact_bf16 | ( | const uint16_t * | input, |
| const uint16_t * | d_output, | ||
| uint16_t * | d_input, | ||
| size_t | n, | ||
| float * | scratch_input, | ||
| float * | scratch_d_output, | ||
| float * | scratch_d_input | ||
| ) |
Definition at line 46 of file gelu_kernels_bf16.c.
References bf16_tensor_to_float(), float_tensor_to_bf16(), and gelu_backward_scalar().
| void gelu_backward_fast_bf16 | ( | const uint16_t * | input, |
| const uint16_t * | d_output, | ||
| uint16_t * | d_input, | ||
| size_t | n, | ||
| float * | scratch_input, | ||
| float * | scratch_d_output, | ||
| float * | scratch_d_input | ||
| ) |
Definition at line 69 of file gelu_kernels_bf16.c.
References bf16_tensor_to_float(), float_tensor_to_bf16(), and gelu_backward_fast().
| void gelu_fast_inplace_bf16 | ( | uint16_t * | data, |
| size_t | n, | ||
| float * | scratch | ||
| ) |
Definition at line 31 of file gelu_kernels_bf16.c.
References bf16_tensor_to_float(), float_tensor_to_bf16(), and gelu_exact_inplace().