Mojo struct
STMatrixLayout
@register_passable(trivial)
struct STMatrixLayout[BM: Int, BN: Int, *, num_threads: Int, accum_type_size: Int]
Layout for using st_matrix for writing the final accumulator to smem.
Implemented traits
AnyType,
Copyable,
ImplicitlyCopyable,
Movable,
UnknownDestructibility
Aliases
__copyinit__is_trivial
alias __copyinit__is_trivial = True
__del__is_trivial
alias __del__is_trivial = True
__moveinit__is_trivial
alias __moveinit__is_trivial = True
bits
alias bits = (64 * accum_type_size)
bits_per_byte
alias bits_per_byte = 8
element_layout
alias element_layout = Layout.row_major(1, 2)
elements_per_repeat
alias elements_per_repeat = 4
frag_simdwidth
alias frag_simdwidth = 2
frag_size
alias frag_size = ((BN * 2) // 4)
num_m_tiles
alias num_m_tiles = (ceildiv((2 * BM), 128) // ceildiv(num_threads, 128))
num_m_tiles_total
alias num_m_tiles_total = ceildiv((2 * BM), 128)
num_row_blocks_per_mma
alias num_row_blocks_per_mma = 2
num_warpgroups
alias num_warpgroups = ceildiv(num_threads, 128)
repeat
alias repeat = (BN // 8)
row_of_frags_layout
alias row_of_frags_layout = Layout.row_major((ceildiv((2 * BM), 128) // ceildiv(num_threads, 128)), ((BN * 2) // 4))
TensorType
alias TensorType[dtype: DType] = LayoutTensor[dtype, Layout(IntTuple(IntTuple(2, (ceildiv((2 * BM), 128) // ceildiv(num_threads, 128))), IntTuple((BN // 8)), Tuple[]()), IntTuple(IntTuple(2, ((BN * 2) // 4)), IntTuple(4), Tuple[]())), MutableAnyOrigin, address_space=AddressSpace(5), element_layout=Layout.row_major(1, 2)]
Parameters
- dtype (
DType):
thread_cols
alias thread_cols = 4
vec_local_layout
alias vec_local_layout = Layout(IntTuple(IntTuple(2, (ceildiv((2 * BM), 128) // ceildiv(num_threads, 128))), IntTuple((BN // 8)), Tuple[]()), IntTuple(IntTuple(2, ((BN * 2) // 4)), IntTuple(4), Tuple[]()))
Methods
__init__
__init__() -> Self
Was this page helpful?
Thank you! We'll create more content like this.
Thank you for helping us improve!