Mojo struct
TMemAccumulator
@register_passable(trivial)
struct TMemAccumulator[dtype_: DType, MMA_M: Int, MMA_N: Int, num_m_mmas: Int, num_n_mmas: Int, num_softmax_threads: Int]
Fields
- tmem_addr (
UInt32
):
Implemented traits
AccumulatorTile
,
AnyType
,
Copyable
,
ImplicitlyCopyable
,
Movable
,
UnknownDestructibility
Aliases
__copyinit__is_trivial
alias __copyinit__is_trivial = UInt32.__copyinit__is_trivial
__del__is_trivial
alias __del__is_trivial = UInt32.__del__is_trivial
__moveinit__is_trivial
alias __moveinit__is_trivial = UInt32.__moveinit__is_trivial
dtype
alias dtype = dtype_
element_layout
alias element_layout = Layout.row_major(1, 2)
frag_size
alias frag_size = ((MMA_M * MMA_N) // num_softmax_threads)
layout_t
alias layout_t = RegisterAccumulatorLayout[MMA_M, MMA_N, num_m_mmas, num_n_mmas, num_softmax_threads]
rows_of_frags_layout
alias rows_of_frags_layout = Layout.row_major((num_m_mmas * num_n_mmas), ((MMA_M * MMA_N) // num_softmax_threads))
vec_output_layout
alias vec_output_layout = Layout.__init__(IntTuple.__init__[__origin_of()](IntTuple[__origin_of()](2, num_m_mmas), IntTuple[__origin_of()]((((MMA_M * MMA_N) // num_softmax_threads) // 4), num_n_mmas), Tuple[]()), IntTuple.__init__[__origin_of()](IntTuple[__origin_of()](2, ((MMA_M * MMA_N) // num_softmax_threads)), IntTuple[__origin_of()](4, (((MMA_M * MMA_N) // num_softmax_threads) * num_m_mmas)), Tuple[]()))
Methods
__init__
__init__(tmem_addr: UInt32) -> Self
__getitem__
__getitem__(self, i: UInt32) -> Self
check_constraints
static check_constraints()
offset
rows_of_frags
static rows_of_frags(src: LayoutTensor[dtype_, Layout.__init__(IntTuple.__init__[__origin_of()](IntTuple[__origin_of()](2, num_m_mmas), IntTuple[__origin_of()]((((MMA_M * MMA_N) // num_softmax_threads) // 4), num_n_mmas), Tuple[]()), IntTuple.__init__[__origin_of()](IntTuple[__origin_of()](2, ((MMA_M * MMA_N) // num_softmax_threads)), IntTuple[__origin_of()](4, (((MMA_M * MMA_N) // num_softmax_threads) * num_m_mmas)), Tuple[]())), MutableAnyOrigin, address_space=AddressSpace(5), element_layout=Layout.row_major(1, 2)]) -> LayoutTensor[dtype_, Layout.row_major((num_m_mmas * num_n_mmas), ((MMA_M * MMA_N) // num_softmax_threads)), MutableAnyOrigin, address_space=AddressSpace(5)]
Returns:
allocate_register_tile
static allocate_register_tile() -> LayoutTensor[dtype_, Layout.__init__(IntTuple.__init__[__origin_of()](IntTuple[__origin_of()](2, num_m_mmas), IntTuple[__origin_of()]((((MMA_M * MMA_N) // num_softmax_threads) // 4), num_n_mmas), Tuple[]()), IntTuple.__init__[__origin_of()](IntTuple[__origin_of()](2, ((MMA_M * MMA_N) // num_softmax_threads)), IntTuple[__origin_of()](4, (((MMA_M * MMA_N) // num_softmax_threads) * num_m_mmas)), Tuple[]())), MutableAnyOrigin, address_space=AddressSpace(5), element_layout=Layout.row_major(1, 2)]
Returns:
copy_from
copy_from(self, src: LayoutTensor[dtype_, Layout.__init__(IntTuple.__init__[__origin_of()](IntTuple[__origin_of()](2, num_m_mmas), IntTuple[__origin_of()]((((MMA_M * MMA_N) // num_softmax_threads) // 4), num_n_mmas), Tuple[]()), IntTuple.__init__[__origin_of()](IntTuple[__origin_of()](2, ((MMA_M * MMA_N) // num_softmax_threads)), IntTuple[__origin_of()](4, (((MMA_M * MMA_N) // num_softmax_threads) * num_m_mmas)), Tuple[]())), MutableAnyOrigin, address_space=AddressSpace(5), element_layout=Layout.row_major(1, 2)])
copy_to
copy_to(self, dst: LayoutTensor[dtype_, Layout.__init__(IntTuple.__init__[__origin_of()](IntTuple[__origin_of()](2, num_m_mmas), IntTuple[__origin_of()]((((MMA_M * MMA_N) // num_softmax_threads) // 4), num_n_mmas), Tuple[]()), IntTuple.__init__[__origin_of()](IntTuple[__origin_of()](2, ((MMA_M * MMA_N) // num_softmax_threads)), IntTuple[__origin_of()](4, (((MMA_M * MMA_N) // num_softmax_threads) * num_m_mmas)), Tuple[]())), MutableAnyOrigin, address_space=AddressSpace(5), element_layout=Layout.row_major(1, 2)])
Was this page helpful?
Thank you! We'll create more content like this.
Thank you for helping us improve!