Mojo struct
PagedKVCacheCollection
struct PagedKVCacheCollection[dtype_: DType, kv_params_: KVCacheStaticParams, page_size: Int]
Fields
- blocks (
NDBuffer[dtype_, 6, MutableAnyOrigin, DimList(Dim(), Dim(2 if kv_params_.is_mla.__bool__().__invert__() else 1), Dim(), Dim(page_size), Dim(Int(kv_params_)), Dim(Int(kv_params_))), _strides_from_shape[DimList(Dim(), Dim(2 if kv_params_.is_mla.__bool__().__invert__() else 1), Dim(), Dim(page_size), Dim(Int(kv_params_)), Dim(Int(kv_params_)))]()]
): - cache_lengths (
NDBuffer[DType.uint32, 1, MutableAnyOrigin]
): - lookup_table (
NDBuffer[DType.uint32, 2, MutableAnyOrigin]
): - max_seq_length (
UInt32
): - max_cache_length (
UInt32
): - kv_cache_dynamic_shape (
IndexList[4]
): - kv_cache_dynamic_strides (
IndexList[4]
):
Implemented traits
AnyType
,
Copyable
,
ImplicitlyCopyable
,
KVCollectionT
,
Movable
,
UnknownDestructibility
Aliases
__copyinit__is_trivial
alias __copyinit__is_trivial = True
__del__is_trivial
alias __del__is_trivial = True
__moveinit__is_trivial
alias __moveinit__is_trivial = True
blocks_shape
alias blocks_shape = DimList(Dim(), Dim(2 if kv_params_.is_mla.__bool__().__invert__() else 1), Dim(), Dim(page_size), Dim(Int(kv_params_)), Dim(Int(kv_params_)))
blocks_stride
alias blocks_stride = _strides_from_shape[DimList(Dim(), Dim(2 if kv_params_.is_mla.__bool__().__invert__() else 1), Dim(), Dim(page_size), Dim(Int(kv_params_)), Dim(Int(kv_params_)))]()
blocks_type
alias blocks_type = NDBuffer[dtype_, 6, MutableAnyOrigin, DimList(Dim(), Dim(2 if kv_params_.is_mla.__bool__().__invert__() else 1), Dim(), Dim(page_size), Dim(Int(kv_params_)), Dim(Int(kv_params_))), _strides_from_shape[DimList(Dim(), Dim(2 if kv_params_.is_mla.__bool__().__invert__() else 1), Dim(), Dim(page_size), Dim(Int(kv_params_)), Dim(Int(kv_params_)))]()]
CacheType
alias CacheType = PagedKVCache[dtype_, kv_params_, page_size]
dtype
alias dtype = dtype_
kv_params
alias kv_params = kv_params_
name_str
alias name_str = "paged"
Methods
__init__
__init__(out self, blocks: NDBuffer[dtype_, 6, MutableAnyOrigin], cache_lengths: NDBuffer[DType.uint32, 1, MutableAnyOrigin], lookup_table: NDBuffer[DType.uint32, 2, MutableAnyOrigin], max_seq_length: UInt32, max_cache_length: UInt32)
get_key_cache
get_key_cache(self, layer_idx: Int) -> PagedKVCache[dtype_, kv_params_, page_size]
Returns:
PagedKVCache
get_value_cache
get_value_cache(self, layer_idx: Int) -> PagedKVCache[dtype_, kv_params_, page_size]
Returns:
PagedKVCache
cache_length
Was this page helpful?
Thank you! We'll create more content like this.
Thank you for helping us improve!