Skip to main content

Mojo struct

PagedKVCacheCollection

struct PagedKVCacheCollection[dtype_: DType, kv_params_: KVCacheStaticParams, page_size: Int]

Fields

  • blocks (NDBuffer[dtype_, 6, MutableAnyOrigin, DimList(Dim(), Dim(2 if kv_params_.is_mla.__bool__().__invert__() else 1), Dim(), Dim(page_size), Dim(Int(kv_params_)), Dim(Int(kv_params_))), _strides_from_shape[DimList(Dim(), Dim(2 if kv_params_.is_mla.__bool__().__invert__() else 1), Dim(), Dim(page_size), Dim(Int(kv_params_)), Dim(Int(kv_params_)))]()]):
  • cache_lengths (NDBuffer[DType.uint32, 1, MutableAnyOrigin]):
  • lookup_table (NDBuffer[DType.uint32, 2, MutableAnyOrigin]):
  • max_seq_length (UInt32):
  • max_cache_length (UInt32):
  • kv_cache_dynamic_shape (IndexList[4]):
  • kv_cache_dynamic_strides (IndexList[4]):

Implemented traits

AnyType, Copyable, ImplicitlyCopyable, KVCollectionT, Movable, UnknownDestructibility

Aliases

__copyinit__is_trivial

alias __copyinit__is_trivial = True

__del__is_trivial

alias __del__is_trivial = True

__moveinit__is_trivial

alias __moveinit__is_trivial = True

blocks_shape

alias blocks_shape = DimList(Dim(), Dim(2 if kv_params_.is_mla.__bool__().__invert__() else 1), Dim(), Dim(page_size), Dim(Int(kv_params_)), Dim(Int(kv_params_)))

blocks_stride

alias blocks_stride = _strides_from_shape[DimList(Dim(), Dim(2 if kv_params_.is_mla.__bool__().__invert__() else 1), Dim(), Dim(page_size), Dim(Int(kv_params_)), Dim(Int(kv_params_)))]()

blocks_type

alias blocks_type = NDBuffer[dtype_, 6, MutableAnyOrigin, DimList(Dim(), Dim(2 if kv_params_.is_mla.__bool__().__invert__() else 1), Dim(), Dim(page_size), Dim(Int(kv_params_)), Dim(Int(kv_params_))), _strides_from_shape[DimList(Dim(), Dim(2 if kv_params_.is_mla.__bool__().__invert__() else 1), Dim(), Dim(page_size), Dim(Int(kv_params_)), Dim(Int(kv_params_)))]()]

CacheType

alias CacheType = PagedKVCache[dtype_, kv_params_, page_size]

dtype

alias dtype = dtype_

kv_params

alias kv_params = kv_params_

name_str

alias name_str = "paged"

Methods

__init__

__init__(out self, blocks: NDBuffer[dtype_, 6, MutableAnyOrigin], cache_lengths: NDBuffer[DType.uint32, 1, MutableAnyOrigin], lookup_table: NDBuffer[DType.uint32, 2, MutableAnyOrigin], max_seq_length: UInt32, max_cache_length: UInt32)

get_key_cache

get_key_cache(self, layer_idx: Int) -> PagedKVCache[dtype_, kv_params_, page_size]

Returns:

PagedKVCache

get_value_cache

get_value_cache(self, layer_idx: Int) -> PagedKVCache[dtype_, kv_params_, page_size]

Returns:

PagedKVCache

cache_length

cache_length(self, bs_idx: Int) -> Int

Returns:

Int

Was this page helpful?