Mojo struct
QRegisterBufferRDNA
struct QRegisterBufferRDNA[dtype: DType, mma_shape: IndexList[3], k_group_size: Int, WM: Int, WN: Int, BN: Int, BK: Int, depth: Int, thread_layout: Layout]
RDNA-specific Q register buffer for Wave32 WMMA attention.
Fields
- reg_tile (
QRegisterBufferRDNA[dtype, mma_shape, k_group_size, WM, WN, BN, BK, depth, thread_layout].RegisterTileType):
Implemented traits
AnyType,
ImplicitlyDestructible,
RegisterBuffer,
RegisterMMABuffer
comptime members
__del__is_trivial
comptime __del__is_trivial = True
mma_dtype
comptime mma_dtype = dtype
MMA_K
comptime MMA_K = mma_shape.__getitem__[Int](2)
MMA_M
comptime MMA_M = mma_shape.__getitem__[Int](0)
mma_tile_layout
comptime mma_tile_layout = LayoutTensor._compute_tile_layout[tile_size=(LayoutTensor._compute_tile_layout[tile_size=(QRegisterBufferRDNA[dtype, mma_shape, k_group_size, WM, WN, BN, BK, depth, thread_layout].reg_tile_layout.shape[0].value() // QRegisterBufferRDNA[dtype, mma_shape, k_group_size, WM, WN, BN, BK, depth, thread_layout].num_tiles), axis=0]()[0].shape[0].value() // QRegisterBufferRDNA[dtype, mma_shape, k_group_size, WM, WN, BN, BK, depth, thread_layout].num_k_tiles), axis=0]()[0]
MMATileType
comptime MMATileType = LayoutTensor[dtype, LayoutTensor._compute_tile_layout[tile_size=(LayoutTensor._compute_tile_layout[tile_size=(QRegisterBufferRDNA[dtype, mma_shape, k_group_size, WM, WN, BN, BK, depth, thread_layout].reg_tile_layout.shape[0].value() // QRegisterBufferRDNA[dtype, mma_shape, k_group_size, WM, WN, BN, BK, depth, thread_layout].num_tiles), axis=0]()[0].shape[0].value() // QRegisterBufferRDNA[dtype, mma_shape, k_group_size, WM, WN, BN, BK, depth, thread_layout].num_k_tiles), axis=0]()[0], MutAnyOrigin, address_space=AddressSpace.LOCAL]
num_k_tiles
comptime num_k_tiles = ceildiv(BK, (QRegisterBufferRDNA[dtype, mma_shape, k_group_size, WM, WN, BN, BK, depth, thread_layout].MMA_K * k_group_size))
num_mmas
comptime num_mmas = ceildiv(WM, QRegisterBufferRDNA[dtype, mma_shape, k_group_size, WM, WN, BN, BK, depth, thread_layout].MMA_M)
num_tiles
comptime num_tiles = (depth // BK)
rdna_frag_size
comptime rdna_frag_size = RDNA_AB_FRAG_SIZE
reg_dtype
comptime reg_dtype = dtype
reg_tile_layout
comptime reg_tile_layout = Layout.row_major(((QRegisterBufferRDNA[dtype, mma_shape, k_group_size, WM, WN, BN, BK, depth, thread_layout].num_mmas * QRegisterBufferRDNA[dtype, mma_shape, k_group_size, WM, WN, BN, BK, depth, thread_layout].num_k_tiles) * QRegisterBufferRDNA[dtype, mma_shape, k_group_size, WM, WN, BN, BK, depth, thread_layout].num_tiles), 16)
RegisterTileType
comptime RegisterTileType = LayoutTensor[dtype, QRegisterBufferRDNA[dtype, mma_shape, k_group_size, WM, WN, BN, BK, depth, thread_layout].reg_tile_layout, MutAnyOrigin, address_space=AddressSpace.LOCAL]
simd_width
comptime simd_width = simd_width_of[dtype]()
TiledIteratorType
comptime TiledIteratorType = LayoutTensorIter[dtype, LayoutTensor._compute_tile_layout[(QRegisterBufferRDNA[dtype, mma_shape, k_group_size, WM, WN, BN, BK, depth, thread_layout].num_mmas * QRegisterBufferRDNA[dtype, mma_shape, k_group_size, WM, WN, BN, BK, depth, thread_layout].num_k_tiles), 16]()[0], MutAnyOrigin, address_space=AddressSpace.LOCAL, axis=0, layout_int_type=_get_layout_type(QRegisterBufferRDNA[dtype, mma_shape, k_group_size, WM, WN, BN, BK, depth, thread_layout].reg_tile_layout, AddressSpace.LOCAL), linear_idx_type=_get_index_type(QRegisterBufferRDNA[dtype, mma_shape, k_group_size, WM, WN, BN, BK, depth, thread_layout].reg_tile_layout, AddressSpace.LOCAL), masked=_tile_is_masked[QRegisterBufferRDNA[dtype, mma_shape, k_group_size, WM, WN, BN, BK, depth, thread_layout].reg_tile_layout, (QRegisterBufferRDNA[dtype, mma_shape, k_group_size, WM, WN, BN, BK, depth, thread_layout].num_mmas * QRegisterBufferRDNA[dtype, mma_shape, k_group_size, WM, WN, BN, BK, depth, thread_layout].num_k_tiles), 16]()]
Methods
__init__
__init__(out self, tensor: LayoutTensor[dtype, tensor.layout, tensor.origin, address_space=tensor.address_space, element_layout=tensor.element_layout, layout_int_type=tensor.layout_int_type, linear_idx_type=tensor.linear_idx_type, masked=tensor.masked, alignment=tensor.alignment])
get_dtype
get_iter
get_iter(self) -> QRegisterBufferRDNA[dtype, mma_shape, k_group_size, WM, WN, BN, BK, depth, thread_layout].TiledIteratorType
Returns:
QRegisterBufferRDNA
get_mma_tile
get_mma_tile[tile_idx: Int, k_idx: Int](self) -> QRegisterBufferRDNA[dtype, mma_shape, k_group_size, WM, WN, BN, BK, depth, thread_layout].MMATileType
Returns:
QRegisterBufferRDNA
get_reg_tile
get_reg_tile[stage: Int = 0](self) -> QRegisterBufferRDNA[dtype, mma_shape, k_group_size, WM, WN, BN, BK, depth, thread_layout].RegisterTileType
Returns:
QRegisterBufferRDNA
zero
zero(self)
Was this page helpful?
Thank you! We'll create more content like this.
Thank you for helping us improve!