Skip to main content

Mojo struct

STMatrixOffsets

struct STMatrixOffsets[BM: Int, BN: Int, *, num_threads: Int, accum_dtype_size: Int, curr_repeat: Int, cumulative_repeat: Int, m_mma: Int]

Implemented traits

AnyType, Copyable, ImplicitlyCopyable, ImplicitlyDestructible, Movable, RegisterPassable, TrivialRegisterPassable

comptime members

b32_per_repeat

comptime b32_per_repeat = ((STMatrixOffsets[BM, BN, num_threads=num_threads, accum_dtype_size=accum_dtype_size, curr_repeat=curr_repeat, cumulative_repeat=cumulative_repeat, m_mma=m_mma].STLayout.elements_per_repeat * accum_dtype_size) // 4)

local_frag_size_b32

comptime local_frag_size_b32 = (curr_repeat * STMatrixOffsets[BM, BN, num_threads=num_threads, accum_dtype_size=accum_dtype_size, curr_repeat=curr_repeat, cumulative_repeat=cumulative_repeat, m_mma=m_mma].b32_per_repeat)

ptr_offset

comptime ptr_offset = (STMatrixOffsets[BM, BN, num_threads=num_threads, accum_dtype_size=accum_dtype_size, curr_repeat=curr_repeat, cumulative_repeat=cumulative_repeat, m_mma=m_mma].b32_per_repeat * ((STMatrixOffsets[BM, BN, num_threads=num_threads, accum_dtype_size=accum_dtype_size, curr_repeat=curr_repeat, cumulative_repeat=cumulative_repeat, m_mma=m_mma].STLayout.repeat * m_mma) + cumulative_repeat))

STLayout

comptime STLayout = STMatrixLayout[BM, BN, num_threads=num_threads, accum_dtype_size=accum_dtype_size]

tmem_col_offset

comptime tmem_col_offset = ((cumulative_repeat * STMatrixOffsets[BM, BN, num_threads=num_threads, accum_dtype_size=accum_dtype_size, curr_repeat=curr_repeat, cumulative_repeat=cumulative_repeat, m_mma=m_mma].STLayout.frag_simdwidth) * 4)

tmem_offset

comptime tmem_offset = ((STMatrixOffsets[BM, BN, num_threads=num_threads, accum_dtype_size=accum_dtype_size, curr_repeat=curr_repeat, cumulative_repeat=cumulative_repeat, m_mma=m_mma].tmem_row_offset << 16) + STMatrixOffsets[BM, BN, num_threads=num_threads, accum_dtype_size=accum_dtype_size, curr_repeat=curr_repeat, cumulative_repeat=cumulative_repeat, m_mma=m_mma].tmem_col_offset)

tmem_row_offset

comptime tmem_row_offset = (16 * m_mma)

Methods

__init__

__init__() -> Self

Was this page helpful?