Skip to main content

Mojo function

generic_flare_mla_decompress_k_cache_ragged_paged

generic_flare_mla_decompress_k_cache_ragged_paged[target: StringSlice[StaticConstantOrigin], dtype: DType](buffer_row_offsets_1d: LayoutTensor[DType.uint32, buffer_row_offsets_1d.layout, buffer_row_offsets_1d.origin, element_layout=buffer_row_offsets_1d.element_layout, layout_int_type=buffer_row_offsets_1d.layout_int_type, linear_idx_type=buffer_row_offsets_1d.linear_idx_type, masked=buffer_row_offsets_1d.masked, alignment=buffer_row_offsets_1d.alignment], cache_offsets_1d: LayoutTensor[DType.uint32, cache_offsets_1d.layout, cache_offsets_1d.origin, element_layout=cache_offsets_1d.element_layout, layout_int_type=cache_offsets_1d.layout_int_type, linear_idx_type=cache_offsets_1d.linear_idx_type, masked=cache_offsets_1d.masked, alignment=cache_offsets_1d.alignment], buffer_length: Int32, weight: LayoutTensor[dtype, weight.layout, weight.origin, element_layout=weight.element_layout, layout_int_type=weight.layout_int_type, linear_idx_type=weight.linear_idx_type, masked=weight.masked, alignment=weight.alignment], kv_collection: PagedKVCacheCollection[kv_collection.dtype_, kv_collection.kv_params_, kv_collection.page_size, kv_collection.scale_dtype_, kv_collection.quantization_granularity_], layer_idx: UInt32, k_latent_buffer: LayoutTensor[dtype, k_latent_buffer.layout, k_latent_buffer.origin, element_layout=k_latent_buffer.element_layout, layout_int_type=k_latent_buffer.layout_int_type, linear_idx_type=k_latent_buffer.linear_idx_type, masked=k_latent_buffer.masked, alignment=k_latent_buffer.alignment], k_buffer: LayoutTensor[dtype, k_buffer.layout, k_buffer.origin, element_layout=k_buffer.element_layout, layout_int_type=k_buffer.layout_int_type, linear_idx_type=k_buffer.linear_idx_type, masked=k_buffer.masked, alignment=k_buffer.alignment], context: DeviceContextPtr)

Was this page helpful?