Mojo struct

QRegisterBuffer

struct QRegisterBuffer[mut: Bool, dtype: DType, layout: Layout, address_space: AddressSpace, alignment: Int, origin: Origin[mut], masked: Bool, layout_int_type: DType, linear_idx_type: DType, //, mma_shape: IndexList[3], k_group_size: Int, WM: Int, WN: Int, BN: Int, BK: Int, depth: Int, thread_layout: Layout]

Fields

gmem_tensor (LayoutTensor[dtype, layout, origin, address_space=address_space, layout_int_type=layout_int_type, linear_idx_type=linear_idx_type, masked=masked, alignment=alignment]):
mma_tile (LayoutTensor[dtype, Layout.row_major(((ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size))) * (depth // BK)), simd_width_of[dtype]()), MutableAnyOrigin, address_space=AddressSpace(5)]):

Implemented traits

AnyType, UnknownDestructibility

Aliases

`delis_trivial`

alias __del__is_trivial = LayoutTensor[dtype, Layout.row_major(((ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size))) * (depth // BK)), simd_width_of[dtype]()), MutableAnyOrigin, address_space=AddressSpace(5)].__del__is_trivial if LayoutTensor[dtype, layout, origin, address_space=address_space, layout_int_type=layout_int_type, linear_idx_type=linear_idx_type, masked=masked, alignment=alignment].__del__is_trivial else LayoutTensor[dtype, layout, origin, address_space=address_space, layout_int_type=layout_int_type, linear_idx_type=linear_idx_type, masked=masked, alignment=alignment].__del__is_trivial

`GlobalTensorType`

alias GlobalTensorType = LayoutTensor[dtype, layout, origin, address_space=address_space, layout_int_type=layout_int_type, linear_idx_type=linear_idx_type, masked=masked, alignment=alignment]

`MMA_K`

alias MMA_K = mma_shape.__getitem__[3, DType.int64, Int](2)

`MMA_M`

alias MMA_M = mma_shape.__getitem__[3, DType.int64, Int](0)

`num_k_tiles`

alias num_k_tiles = ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size))

`num_mmas`

alias num_mmas = ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0))

`num_tiles`

alias num_tiles = (depth // BK)

`RegisterTileType`

alias RegisterTileType = LayoutTensor[dtype, Layout.row_major(((ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size))) * (depth // BK)), simd_width_of[dtype]()), MutableAnyOrigin, address_space=AddressSpace(5)]

`simd_width`

alias simd_width = simd_width_of[dtype]()

`TiledIteratorType`

alias TiledIteratorType = LayoutTensorIter[dtype, LayoutTensor._compute_tile_layout[True, dtype, Layout.row_major(((ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size))) * (depth // BK)), simd_width_of[dtype]()), MutableAnyOrigin, AddressSpace(5), Layout.__init__(IntTuple[__origin_of()](1), IntTuple[__origin_of()](1)), _get_layout_type(Layout.row_major(((ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size))) * (depth // BK)), simd_width_of[dtype]()), AddressSpace(5)), _get_index_type(Layout.row_major(((ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size))) * (depth // BK)), simd_width_of[dtype]()), AddressSpace(5)), False, align_of[dtype](), (ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size))), simd_width_of[dtype]()]()[0], MutableAnyOrigin, address_space=AddressSpace(5), axis=OptionalReg[Int](0), layout_int_type=_get_layout_type(Layout.row_major(((ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size))) * (depth // BK)), simd_width_of[dtype]()), AddressSpace(5)), linear_idx_type=_get_index_type(Layout.row_major(((ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size))) * (depth // BK)), simd_width_of[dtype]()), AddressSpace(5)), masked=_tile_is_masked[Layout.row_major(((ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size))) * (depth // BK)), simd_width_of[dtype]()), (ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size))), simd_width_of[dtype]()]()]

Methods

`init`

__init__(out self, tensor: LayoutTensor[dtype, layout, origin, address_space=address_space, layout_int_type=layout_int_type, linear_idx_type=linear_idx_type, masked=masked, alignment=alignment])

`load_from_dram`

load_from_dram(mut self)

`get_mma_tile`

get_mma_tile[tile_idx: Int, k_idx: Int](self) -> LayoutTensor[dtype, LayoutTensor._compute_tile_layout[True, dtype, LayoutTensor._compute_tile_layout[True, dtype, Layout.row_major(((ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size))) * (depth // BK)), simd_width_of[dtype]()), MutableAnyOrigin, AddressSpace(5), Layout.__init__(IntTuple[__origin_of()](1), IntTuple[__origin_of()](1)), _get_layout_type(Layout.row_major(((ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size))) * (depth // BK)), simd_width_of[dtype]()), AddressSpace(5)), _get_index_type(Layout.row_major(((ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size))) * (depth // BK)), simd_width_of[dtype]()), AddressSpace(5)), False, align_of[dtype](), (Layout.row_major(((ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size))) * (depth // BK)), simd_width_of[dtype]()).shape[0].value[ComptimeOrigin]() // (depth // BK)), 0]()[0], MutableAnyOrigin, AddressSpace(5), Layout.__init__(IntTuple[__origin_of()](1), IntTuple[__origin_of()](1)), _get_layout_type(LayoutTensor._compute_tile_layout[True, dtype, Layout.row_major(((ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size))) * (depth // BK)), simd_width_of[dtype]()), MutableAnyOrigin, AddressSpace(5), Layout.__init__(IntTuple[__origin_of()](1), IntTuple[__origin_of()](1)), _get_layout_type(Layout.row_major(((ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size))) * (depth // BK)), simd_width_of[dtype]()), AddressSpace(5)), _get_index_type(Layout.row_major(((ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size))) * (depth // BK)), simd_width_of[dtype]()), AddressSpace(5)), False, align_of[dtype](), (Layout.row_major(((ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size))) * (depth // BK)), simd_width_of[dtype]()).shape[0].value[ComptimeOrigin]() // (depth // BK)), 0]()[0], AddressSpace(5)), _get_index_type(LayoutTensor._compute_tile_layout[True, dtype, Layout.row_major(((ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size))) * (depth // BK)), simd_width_of[dtype]()), MutableAnyOrigin, AddressSpace(5), Layout.__init__(IntTuple[__origin_of()](1), IntTuple[__origin_of()](1)), _get_layout_type(Layout.row_major(((ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size))) * (depth // BK)), simd_width_of[dtype]()), AddressSpace(5)), _get_index_type(Layout.row_major(((ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size))) * (depth // BK)), simd_width_of[dtype]()), AddressSpace(5)), False, align_of[dtype](), (Layout.row_major(((ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size))) * (depth // BK)), simd_width_of[dtype]()).shape[0].value[ComptimeOrigin]() // (depth // BK)), 0]()[0], AddressSpace(5)), False, align_of[dtype](), (LayoutTensor._compute_tile_layout[True, dtype, Layout.row_major(((ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size))) * (depth // BK)), simd_width_of[dtype]()), MutableAnyOrigin, AddressSpace(5), Layout.__init__(IntTuple[__origin_of()](1), IntTuple[__origin_of()](1)), _get_layout_type(Layout.row_major(((ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size))) * (depth // BK)), simd_width_of[dtype]()), AddressSpace(5)), _get_index_type(Layout.row_major(((ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size))) * (depth // BK)), simd_width_of[dtype]()), AddressSpace(5)), False, align_of[dtype](), (Layout.row_major(((ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size))) * (depth // BK)), simd_width_of[dtype]()).shape[0].value[ComptimeOrigin]() // (depth // BK)), 0]()[0].shape[0].value[ComptimeOrigin]() // ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size))), 0]()[0], MutableAnyOrigin, address_space=AddressSpace(5)]

Returns:

LayoutTensor

Fields​

Implemented traits​

Aliases​

__del__is_trivial​

GlobalTensorType​

MMA_K​

MMA_M​

num_k_tiles​

num_mmas​

num_tiles​

RegisterTileType​

simd_width​

TiledIteratorType​

Methods​

__init__​

load_from_dram​

get_mma_tile​