RoPE (Rotary Position Embedding) kernels for BF16. More...
Go to the source code of this file.
Functions | |
| void | rope_backward_bf16 (const uint16_t *d_out, uint16_t *d_x, const float *cos_cache, const float *sin_cache, int num_heads, int num_tokens, int head_dim, int aligned_head_dim, int pos_offset, float *scratch_d_out, float *scratch_d_x) |
| void | rope_backward_qk_bf16 (const uint16_t *d_q_out, const uint16_t *d_k_out, uint16_t *d_q, uint16_t *d_k, const float *cos_cache, const float *sin_cache, int num_heads, int num_kv_heads, int num_tokens, int head_dim, int aligned_head_dim, int pos_offset, float *scratch_dq_out, float *scratch_dq, float *scratch_dk_out, float *scratch_dk) |
| void | rope_forward_bf16 (uint16_t *x, const float *cos_cache, const float *sin_cache, int num_heads, int num_tokens, int head_dim, int aligned_head_dim, int pos_offset, float *scratch) |
| void | rope_forward_qk_bf16 (uint16_t *q, uint16_t *k, const float *cos_cache, const float *sin_cache, int num_heads, int num_kv_heads, int num_tokens, int head_dim, int aligned_head_dim, int pos_offset, float *scratch_q, float *scratch_k) |
RoPE (Rotary Position Embedding) kernels for BF16.
After changes: make test && make llamacpp-parity-full
Definition in file rope_kernels_bf16.c.
| void rope_backward_bf16 | ( | const uint16_t * | d_out, |
| uint16_t * | d_x, | ||
| const float * | cos_cache, | ||
| const float * | sin_cache, | ||
| int | num_heads, | ||
| int | num_tokens, | ||
| int | head_dim, | ||
| int | aligned_head_dim, | ||
| int | pos_offset, | ||
| float * | scratch_d_out, | ||
| float * | scratch_d_x | ||
| ) |
Definition at line 52 of file rope_kernels_bf16.c.
References bf16_tensor_to_float(), float_tensor_to_bf16(), and rope_backward().
Referenced by rope_backward_qk_bf16().
| void rope_backward_qk_bf16 | ( | const uint16_t * | d_q_out, |
| const uint16_t * | d_k_out, | ||
| uint16_t * | d_q, | ||
| uint16_t * | d_k, | ||
| const float * | cos_cache, | ||
| const float * | sin_cache, | ||
| int | num_heads, | ||
| int | num_kv_heads, | ||
| int | num_tokens, | ||
| int | head_dim, | ||
| int | aligned_head_dim, | ||
| int | pos_offset, | ||
| float * | scratch_dq_out, | ||
| float * | scratch_dq, | ||
| float * | scratch_dk_out, | ||
| float * | scratch_dk | ||
| ) |
Definition at line 103 of file rope_kernels_bf16.c.
References rope_backward_bf16().
| void rope_forward_bf16 | ( | uint16_t * | x, |
| const float * | cos_cache, | ||
| const float * | sin_cache, | ||
| int | num_heads, | ||
| int | num_tokens, | ||
| int | head_dim, | ||
| int | aligned_head_dim, | ||
| int | pos_offset, | ||
| float * | scratch | ||
| ) |
Definition at line 28 of file rope_kernels_bf16.c.
References bf16_tensor_to_float(), float_tensor_to_bf16(), and rope_forward().
Referenced by rope_forward_qk_bf16().
| void rope_forward_qk_bf16 | ( | uint16_t * | q, |
| uint16_t * | k, | ||
| const float * | cos_cache, | ||
| const float * | sin_cache, | ||
| int | num_heads, | ||
| int | num_kv_heads, | ||
| int | num_tokens, | ||
| int | head_dim, | ||
| int | aligned_head_dim, | ||
| int | pos_offset, | ||
| float * | scratch_q, | ||
| float * | scratch_k | ||
| ) |
Definition at line 79 of file rope_kernels_bf16.c.
References rope_forward_bf16().