Skip to main content

Mojo trait

AsyncTensorAccumulator

Implemented traits

AnyType, UnknownDestructibility

Aliases

a_t

alias a_t

ab_t

alias ab_t

accum_t

alias accum_t

b_t

alias b_t

c_t

alias c_t

operand_t

alias operand_t

Methods

__init__

__init__(smem: UnsafePointer[SharedMemBarrier, address_space=AddressSpace(3), alignment=8]) -> _Self

mma_descriptors

static mma_descriptors[dtype_a: DType, dtype_b: DType](p_a: UnsafePointer[SIMD[dtype_a, 1], address_space=AddressSpace(3)], p_b: UnsafePointer[SIMD[dtype_b, 1], address_space=AddressSpace(3)]) -> get_witness(_Self, nn::mha_sm100::AsyncTensorAccumulator, ab_t)

mma

mma(self: _Self, a: get_witness(_Self, nn::mha_sm100::AsyncTensorAccumulator, a_t), b: get_witness(_Self, nn::mha_sm100::AsyncTensorAccumulator, b_t), c: get_witness(_Self, nn::mha_sm100::AsyncTensorAccumulator, c_t), c_scale: SIMD[uint32, 1], wg_idx: SIMD[uint32, 1] = 0)

wait_group

wait_group[wgmma_left_in_flight: Int = 0](mut self: _Self)

Was this page helpful?