Mojo struct
PagedKVCacheCollection
struct PagedKVCacheCollection[dtype_: DType, kv_params_: KVCacheStaticParams, page_size: Int, scale_dtype_: DType = DType.invalid]
Fields
- scales (
OptionalReg[LayoutTensor[PagedKVCacheCollection[dtype_, kv_params_, page_size, scale_dtype_].scale_dtype, PagedKVCacheCollection[dtype_, kv_params_, page_size, scale_dtype_].scales_layout, MutAnyOrigin]]): - kv_cache_scales_dynamic_shape (
IndexList[4]): - kv_cache_scales_dynamic_strides (
IndexList[4]): - blocks (
PagedKVCacheCollection[dtype_, kv_params_, page_size, scale_dtype_].blocks_type): - cache_lengths (
PagedKVCacheCollection[dtype_, kv_params_, page_size, scale_dtype_].cache_lengths_type): - lookup_table (
PagedKVCacheCollection[dtype_, kv_params_, page_size, scale_dtype_].lookup_table_type): - max_seq_length (
UInt32): - max_cache_length (
UInt32): - kv_cache_dynamic_shape (
IndexList[4]): - kv_cache_dynamic_strides (
IndexList[4]):
Implemented traits
AnyType,
Copyable,
ImplicitlyCopyable,
ImplicitlyDestructible,
KVCollectionT,
Movable
comptime members
__copyinit__is_trivial
comptime __copyinit__is_trivial = True
__del__is_trivial
comptime __del__is_trivial = True
__moveinit__is_trivial
comptime __moveinit__is_trivial = True
blocks_layout
comptime blocks_layout = Layout.row_major(PagedKVCacheCollection[dtype_, kv_params_, page_size, scale_dtype_].blocks_shape)
blocks_shape
comptime blocks_shape = IntTuple(-1, 2 if (xor kv_params_.is_mla._mlir_value, True) else 1, -1, page_size, Int.__init__[UInt](PagedKVCacheCollection[dtype_, kv_params_, page_size, scale_dtype_].kv_params.num_heads), Int.__init__[UInt](PagedKVCacheCollection[dtype_, kv_params_, page_size, scale_dtype_].kv_params.head_size))
blocks_type
comptime blocks_type = LayoutTensor[PagedKVCacheCollection[dtype_, kv_params_, page_size, scale_dtype_].dtype, PagedKVCacheCollection[dtype_, kv_params_, page_size, scale_dtype_].blocks_layout, MutAnyOrigin]
cache_lengths_type
comptime cache_lengths_type = LayoutTensor[DType.uint32, Layout(IntTuple(-1)), ImmutAnyOrigin]
CacheType
comptime CacheType = PagedKVCache[PagedKVCacheCollection[dtype_, kv_params_, page_size, scale_dtype_].dtype, PagedKVCacheCollection[dtype_, kv_params_, page_size, scale_dtype_].kv_params, page_size, PagedKVCacheCollection[dtype_, kv_params_, page_size, scale_dtype_].scale_dtype]
dtype
comptime dtype = dtype_
head_dim_granularity
comptime head_dim_granularity = ceildiv(Int.__init__[UInt](PagedKVCacheCollection[dtype_, kv_params_, page_size, scale_dtype_].kv_params.head_size), 1)
kv_params
comptime kv_params = kv_params_
lookup_table_type
comptime lookup_table_type = LayoutTensor[DType.uint32, Layout.row_major[2](), ImmutAnyOrigin]
name_str
comptime name_str = "paged"
scale_dtype
comptime scale_dtype = scale_dtype_
scales_layout
comptime scales_layout = Layout.row_major(PagedKVCacheCollection[dtype_, kv_params_, page_size, scale_dtype_].scales_shape)
scales_shape
comptime scales_shape = IntTuple(-1, 2 if (xor kv_params_.is_mla._mlir_value, True) else 1, -1, page_size, Int.__init__[UInt](PagedKVCacheCollection[dtype_, kv_params_, page_size, scale_dtype_].kv_params.num_heads), PagedKVCacheCollection[dtype_, kv_params_, page_size, scale_dtype_].head_dim_granularity)
scales_type
comptime scales_type = LayoutTensor[PagedKVCacheCollection[dtype_, kv_params_, page_size, scale_dtype_].scale_dtype, PagedKVCacheCollection[dtype_, kv_params_, page_size, scale_dtype_].scales_layout, MutAnyOrigin]
Methods
__init__
__init__(out self, blocks: LayoutTensor[PagedKVCacheCollection[dtype_, kv_params_, page_size, scale_dtype_].dtype, Layout.row_major[6](), MutAnyOrigin], cache_lengths: LayoutTensor[DType.uint32, Layout(IntTuple(-1)), ImmutAnyOrigin], lookup_table: LayoutTensor[DType.uint32, Layout.row_major[2](), ImmutAnyOrigin], max_seq_length: UInt32, max_cache_length: UInt32, scales: OptionalReg[LayoutTensor[PagedKVCacheCollection[dtype_, kv_params_, page_size, scale_dtype_].scale_dtype, Layout.row_major[6](), MutAnyOrigin]] = None)
get_key_cache
get_key_cache(self, layer_idx: Int) -> PagedKVCacheCollection[dtype_, kv_params_, page_size, scale_dtype_].CacheType
Returns:
PagedKVCacheCollection
get_value_cache
get_value_cache(self, layer_idx: Int) -> PagedKVCacheCollection[dtype_, kv_params_, page_size, scale_dtype_].CacheType
Returns:
PagedKVCacheCollection
cache_length
Was this page helpful?
Thank you! We'll create more content like this.
Thank you for helping us improve!