Token/position embedding lookup kernels. More...
Go to the source code of this file.
Functions | |
| void | embedding_backward (const int32_t *token_ids, int token_count, const float *d_output, float *d_token_embeddings, float *d_pos_embeddings, int vocab_size, int embed_dim, int aligned_embed_dim, int context_window, int add_pos) |
| void | embedding_forward (const int32_t *token_ids, int token_count, int vocab_size, const float *token_embeddings, const float *pos_embeddings, float *output, int embed_dim, int aligned_embed_dim, int context_window, int add_pos) |
| void | embedding_forward_q4_k (const int32_t *token_ids, int token_count, int vocab_size, const void *token_embeddings, const float *pos_embeddings, float *output, int embed_dim, int aligned_embed_dim, int context_window, int add_pos) |
| void | embedding_forward_q6_k (const int32_t *token_ids, int token_count, int vocab_size, const void *token_embeddings, const float *pos_embeddings, float *output, int embed_dim, int aligned_embed_dim, int context_window, int add_pos) |
| void | embedding_forward_q8_0 (const int32_t *token_ids, int token_count, int vocab_size, const void *token_embeddings, const float *pos_embeddings, float *output, int embed_dim, int aligned_embed_dim, int context_window, int add_pos) |
Token/position embedding lookup kernels.
After changes: make test && make llamacpp-parity-full
Embedding: out[t] = token_embed[token_id[t]] + pos_embed[t]
Definition in file embedding_kernels.c.
| void embedding_backward | ( | const int32_t * | token_ids, |
| int | token_count, | ||
| const float * | d_output, | ||
| float * | d_token_embeddings, | ||
| float * | d_pos_embeddings, | ||
| int | vocab_size, | ||
| int | embed_dim, | ||
| int | aligned_embed_dim, | ||
| int | context_window, | ||
| int | add_pos | ||
| ) |
Definition at line 241 of file embedding_kernels.c.
References vocab_size.
| void embedding_forward | ( | const int32_t * | token_ids, |
| int | token_count, | ||
| int | vocab_size, | ||
| const float * | token_embeddings, | ||
| const float * | pos_embeddings, | ||
| float * | output, | ||
| int | embed_dim, | ||
| int | aligned_embed_dim, | ||
| int | context_window, | ||
| int | add_pos | ||
| ) |
Definition at line 22 of file embedding_kernels.c.
References vocab_size.
| void embedding_forward_q4_k | ( | const int32_t * | token_ids, |
| int | token_count, | ||
| int | vocab_size, | ||
| const void * | token_embeddings, | ||
| const float * | pos_embeddings, | ||
| float * | output, | ||
| int | embed_dim, | ||
| int | aligned_embed_dim, | ||
| int | context_window, | ||
| int | add_pos | ||
| ) |
Definition at line 76 of file embedding_kernels.c.
References CK_DT_Q4_K, ck_dtype_row_bytes(), dequant_q4_k_row(), and vocab_size.
Referenced by model_decode_token(), model_forward_prefill_impl(), qwen2_0_5b_decode_decode_token(), and qwen2_0_5b_decode_forward_prefill_impl().
| void embedding_forward_q6_k | ( | const int32_t * | token_ids, |
| int | token_count, | ||
| int | vocab_size, | ||
| const void * | token_embeddings, | ||
| const float * | pos_embeddings, | ||
| float * | output, | ||
| int | embed_dim, | ||
| int | aligned_embed_dim, | ||
| int | context_window, | ||
| int | add_pos | ||
| ) |
Definition at line 186 of file embedding_kernels.c.
References CK_DT_Q6_K, ck_dtype_row_bytes(), dequant_q6_k_row(), and vocab_size.
| void embedding_forward_q8_0 | ( | const int32_t * | token_ids, |
| int | token_count, | ||
| int | vocab_size, | ||
| const void * | token_embeddings, | ||
| const float * | pos_embeddings, | ||
| float * | output, | ||
| int | embed_dim, | ||
| int | aligned_embed_dim, | ||
| int | context_window, | ||
| int | add_pos | ||
| ) |
Definition at line 131 of file embedding_kernels.c.
References CK_DT_Q8_0, ck_dtype_row_bytes(), dequant_q8_0_row(), and vocab_size.
Referenced by qwen2_0_5b_decode_decode_token(), and qwen2_0_5b_decode_forward_prefill_impl().