Skip to main content

Mojo struct

PagedKVCacheCollection

struct PagedKVCacheCollection[dtype_: DType, kv_params_: KVCacheStaticParams, page_size: Int]

Fields

  • blocks (LayoutTensor[PagedKVCacheCollection[dtype_, kv_params_, page_size].dtype, PagedKVCacheCollection[dtype_, kv_params_, page_size].blocks_layout, MutAnyOrigin]):
  • cache_lengths (LayoutTensor[DType.uint32, Layout(IntTuple(-1)), ImmutAnyOrigin]):
  • lookup_table (LayoutTensor[DType.uint32, Layout.row_major[2](), ImmutAnyOrigin]):
  • max_seq_length (UInt32):
  • max_cache_length (UInt32):
  • kv_cache_dynamic_shape (IndexList[4]):
  • kv_cache_dynamic_strides (IndexList[4]):

Implemented traits

AnyType, Copyable, ImplicitlyCopyable, KVCollectionT, Movable, UnknownDestructibility

Aliases

__copyinit__is_trivial

comptime __copyinit__is_trivial = True

__del__is_trivial

comptime __del__is_trivial = True

__moveinit__is_trivial

comptime __moveinit__is_trivial = True

blocks_layout

comptime blocks_layout = Layout.row_major(PagedKVCacheCollection[dtype_, kv_params_, page_size].blocks_shape)

blocks_shape

comptime blocks_shape = IntTuple(-1, 2 if (PagedKVCacheCollection[dtype_, kv_params_, page_size].kv_params.is_mla.__bool__() ^ True) else 1, -1, page_size, Int(PagedKVCacheCollection[dtype_, kv_params_, page_size].kv_params), Int(PagedKVCacheCollection[dtype_, kv_params_, page_size].kv_params))

blocks_type

comptime blocks_type = LayoutTensor[PagedKVCacheCollection[dtype_, kv_params_, page_size].dtype, PagedKVCacheCollection[dtype_, kv_params_, page_size].blocks_layout, MutAnyOrigin]

cache_lengths_type

comptime cache_lengths_type = LayoutTensor[DType.uint32, Layout(IntTuple(-1)), ImmutAnyOrigin]

CacheType

comptime CacheType = PagedKVCache[PagedKVCacheCollection[dtype_, kv_params_, page_size].dtype, PagedKVCacheCollection[dtype_, kv_params_, page_size].kv_params, page_size]

dtype

comptime dtype = dtype_

kv_params

comptime kv_params = kv_params_

lookup_table_type

comptime lookup_table_type = LayoutTensor[DType.uint32, Layout.row_major[2](), ImmutAnyOrigin]

name_str

comptime name_str = "paged"

Methods

__init__

__init__(out self, blocks: LayoutTensor[PagedKVCacheCollection[dtype_, kv_params_, page_size].dtype, Layout.row_major[6](), MutAnyOrigin], cache_lengths: LayoutTensor[DType.uint32, Layout(IntTuple(-1)), ImmutAnyOrigin], lookup_table: LayoutTensor[DType.uint32, Layout.row_major[2](), ImmutAnyOrigin], max_seq_length: UInt32, max_cache_length: UInt32)

get_key_cache

get_key_cache(self, layer_idx: Int) -> PagedKVCache[PagedKVCacheCollection[dtype_, kv_params_, page_size].dtype, PagedKVCacheCollection[dtype_, kv_params_, page_size].kv_params, page_size]

Returns:

PagedKVCache

get_value_cache

get_value_cache(self, layer_idx: Int) -> PagedKVCache[PagedKVCacheCollection[dtype_, kv_params_, page_size].dtype, PagedKVCacheCollection[dtype_, kv_params_, page_size].kv_params, page_size]

Returns:

PagedKVCache

cache_length

cache_length(self, bs_idx: Int) -> Int

Returns:

Int

Was this page helpful?