Skip to main content

Mojo struct

TMemAccumulator

@register_passable(trivial) struct TMemAccumulator[dtype_: DType, MMA_M: Int, MMA_N: Int, num_m_mmas: Int, num_n_mmas: Int, num_softmax_threads: Int]

Fields

  • tmem_addr (UInt32):

Implemented traits

AccumulatorTile, AnyType, Copyable, ImplicitlyCopyable, Movable, UnknownDestructibility

Aliases

__copyinit__is_trivial

comptime __copyinit__is_trivial = True

__del__is_trivial

comptime __del__is_trivial = True

__moveinit__is_trivial

comptime __moveinit__is_trivial = True

dtype

comptime dtype = dtype_

element_layout

comptime element_layout = RegisterAccumulatorLayout[MMA_M, MMA_N, num_m_mmas, num_n_mmas, num_softmax_threads].element_layout

frag_size

comptime frag_size = RegisterAccumulatorLayout[MMA_M, MMA_N, num_m_mmas, num_n_mmas, num_softmax_threads].frag_size

layout_t

comptime layout_t = RegisterAccumulatorLayout[MMA_M, MMA_N, num_m_mmas, num_n_mmas, num_softmax_threads]

rows_of_frags_layout

comptime rows_of_frags_layout = RegisterAccumulatorLayout[MMA_M, MMA_N, num_m_mmas, num_n_mmas, num_softmax_threads].rows_of_frags_layout

vec_output_layout

comptime vec_output_layout = RegisterAccumulatorLayout[MMA_M, MMA_N, num_m_mmas, num_n_mmas, num_softmax_threads].vec_output_layout

Methods

__init__

__init__(tmem_addr: UInt32) -> Self

__getitem__

__getitem__(self, i: UInt32) -> Self

check_constraints

static check_constraints()

offset

offset[m_mma: Int, n_mma: Int](self) -> UInt32

Returns:

UInt32

rows_of_frags

static rows_of_frags(src: LayoutTensor[TMemAccumulator[dtype_, MMA_M, MMA_N, num_m_mmas, num_n_mmas, num_softmax_threads].dtype, TMemAccumulator[dtype_, MMA_M, MMA_N, num_m_mmas, num_n_mmas, num_softmax_threads].vec_output_layout, MutAnyOrigin, address_space=AddressSpace.LOCAL, element_layout=RegisterAccumulatorLayout[MMA_M, MMA_N, num_m_mmas, num_n_mmas, num_softmax_threads].element_layout]) -> LayoutTensor[TMemAccumulator[dtype_, MMA_M, MMA_N, num_m_mmas, num_n_mmas, num_softmax_threads].dtype, TMemAccumulator[dtype_, MMA_M, MMA_N, num_m_mmas, num_n_mmas, num_softmax_threads].rows_of_frags_layout, MutAnyOrigin, address_space=AddressSpace.LOCAL]

Returns:

LayoutTensor

allocate_register_tile

static allocate_register_tile() -> LayoutTensor[TMemAccumulator[dtype_, MMA_M, MMA_N, num_m_mmas, num_n_mmas, num_softmax_threads].dtype, TMemAccumulator[dtype_, MMA_M, MMA_N, num_m_mmas, num_n_mmas, num_softmax_threads].vec_output_layout, MutAnyOrigin, address_space=AddressSpace.LOCAL, element_layout=RegisterAccumulatorLayout[MMA_M, MMA_N, num_m_mmas, num_n_mmas, num_softmax_threads].element_layout]

Returns:

LayoutTensor

copy_from

copy_from(self, src: LayoutTensor[TMemAccumulator[dtype_, MMA_M, MMA_N, num_m_mmas, num_n_mmas, num_softmax_threads].dtype, TMemAccumulator[dtype_, MMA_M, MMA_N, num_m_mmas, num_n_mmas, num_softmax_threads].vec_output_layout, MutAnyOrigin, address_space=AddressSpace.LOCAL, element_layout=RegisterAccumulatorLayout[MMA_M, MMA_N, num_m_mmas, num_n_mmas, num_softmax_threads].element_layout])

copy_to

copy_to(self, dst: LayoutTensor[TMemAccumulator[dtype_, MMA_M, MMA_N, num_m_mmas, num_n_mmas, num_softmax_threads].dtype, TMemAccumulator[dtype_, MMA_M, MMA_N, num_m_mmas, num_n_mmas, num_softmax_threads].vec_output_layout, MutAnyOrigin, address_space=AddressSpace.LOCAL, element_layout=RegisterAccumulatorLayout[MMA_M, MMA_N, num_m_mmas, num_n_mmas, num_softmax_threads].element_layout])

Was this page helpful?