Skip to main content

Mojo struct

STMatrixLayout

@register_passable(trivial) struct STMatrixLayout[BM: Int, BN: Int, *, num_threads: Int, accum_type_size: Int]

Layout for using st_matrix for writing the final accumulator to smem.

Implemented traits

AnyType, Copyable, ImplicitlyCopyable, Movable, UnknownDestructibility

Aliases

__copyinit__is_trivial

alias __copyinit__is_trivial = True

__del__is_trivial

alias __del__is_trivial = True

__moveinit__is_trivial

alias __moveinit__is_trivial = True

bits

alias bits = (64 * accum_type_size)

bits_per_byte

alias bits_per_byte = 8

element_layout

alias element_layout = Layout.row_major(1, 2)

elements_per_repeat

alias elements_per_repeat = 4

frag_simdwidth

alias frag_simdwidth = 2

frag_size

alias frag_size = ((BN * 2) // 4)

num_m_tiles

alias num_m_tiles = (ceildiv((2 * BM), 128) // ceildiv(num_threads, 128))

num_m_tiles_total

alias num_m_tiles_total = ceildiv((2 * BM), 128)

num_row_blocks_per_mma

alias num_row_blocks_per_mma = 2

num_warpgroups

alias num_warpgroups = ceildiv(num_threads, 128)

repeat

alias repeat = (BN // 8)

row_of_frags_layout

alias row_of_frags_layout = Layout.row_major((ceildiv((2 * BM), 128) // ceildiv(num_threads, 128)), ((BN * 2) // 4))

TensorType

alias TensorType[dtype: DType] = LayoutTensor[dtype, Layout(IntTuple(IntTuple(2, (ceildiv((2 * BM), 128) // ceildiv(num_threads, 128))), IntTuple((BN // 8)), Tuple[]()), IntTuple(IntTuple(2, ((BN * 2) // 4)), IntTuple(4), Tuple[]())), MutableAnyOrigin, address_space=AddressSpace(5), element_layout=Layout.row_major(1, 2)]

Parameters

thread_cols

alias thread_cols = 4

vec_local_layout

alias vec_local_layout = Layout(IntTuple(IntTuple(2, (ceildiv((2 * BM), 128) // ceildiv(num_threads, 128))), IntTuple((BN // 8)), Tuple[]()), IntTuple(IntTuple(2, ((BN * 2) // 4)), IntTuple(4), Tuple[]()))

Methods

__init__

__init__() -> Self

Was this page helpful?