Skip to main content

Mojo struct

PagedKVCacheCollection

struct PagedKVCacheCollection[dtype_: DType, kv_params_: KVCacheStaticParams, page_size: Int]

Fields

  • blocks (NDBuffer[dtype_, 6, MutableAnyOrigin, DimList(Dim(-31337), Dim(-31337), Dim(-31337), Dim(page_size), Dim(kv_params_.num_heads), Dim(kv_params_.head_size)), _strides_from_shape[DimList(Dim(-31337), Dim(-31337), Dim(-31337), Dim(page_size), Dim(kv_params_.num_heads), Dim(kv_params_.head_size))]()]):
  • cache_lengths (NDBuffer[DType.uint32, 1, MutableAnyOrigin]):
  • lookup_table (NDBuffer[DType.uint32, 2, MutableAnyOrigin]):
  • max_seq_length (UInt32):
  • max_cache_length (UInt32):
  • kv_cache_dynamic_shape (IndexList[4]):
  • kv_cache_dynamic_strides (IndexList[4]):

Implemented traits

AnyType, ExplicitlyCopyable, ImplicitlyCopyable, KVCollectionT, Movable, UnknownDestructibility

Aliases

__copyinit__is_trivial

alias __copyinit__is_trivial = False

__del__is_trivial

alias __del__is_trivial = True

__moveinit__is_trivial

alias __moveinit__is_trivial = False

blocks_shape

alias blocks_shape = DimList(Dim(-31337), Dim(-31337), Dim(-31337), Dim(page_size), Dim(kv_params_.num_heads), Dim(kv_params_.head_size))

blocks_stride

alias blocks_stride = _strides_from_shape[DimList(Dim(-31337), Dim(-31337), Dim(-31337), Dim(page_size), Dim(kv_params_.num_heads), Dim(kv_params_.head_size))]()

blocks_type

alias blocks_type = NDBuffer[dtype_, 6, MutableAnyOrigin, DimList(Dim(-31337), Dim(-31337), Dim(-31337), Dim(page_size), Dim(kv_params_.num_heads), Dim(kv_params_.head_size)), _strides_from_shape[DimList(Dim(-31337), Dim(-31337), Dim(-31337), Dim(page_size), Dim(kv_params_.num_heads), Dim(kv_params_.head_size))]()]

CacheType

alias CacheType = PagedKVCache[dtype_, kv_params_, page_size]

dtype

alias dtype = dtype_

kv_params

alias kv_params = kv_params_

name_str

alias name_str = "paged"

Methods

__init__

__init__(out self, blocks: NDBuffer[dtype_, 6, MutableAnyOrigin], cache_lengths: NDBuffer[DType.uint32, 1, MutableAnyOrigin], lookup_table: NDBuffer[DType.uint32, 2, MutableAnyOrigin], max_seq_length: UInt32, max_cache_length: UInt32)

__copyinit__

__copyinit__(out self, other: Self)

__moveinit__

__moveinit__(out self, var other: Self)

get_key_cache

get_key_cache(self, layer_idx: Int) -> PagedKVCache[dtype_, kv_params_, page_size]

Returns:

PagedKVCache

get_value_cache

get_value_cache(self, layer_idx: Int) -> PagedKVCache[dtype_, kv_params_, page_size]

Returns:

PagedKVCache

cache_length

cache_length(self, bs_idx: Int) -> Int

Returns:

Int

Was this page helpful?