Mojo struct
SM100TensorAccumulatorSS
struct SM100TensorAccumulatorSS[operand_type: DType, accum_dtype: DType, MMA_M: Int, MMA_N: Int, BK: Int, *, mma_kind: UMMAKind = UMMAKind.KIND_F16, swizzle_a: TensorMapSwizzle = TensorMapSwizzle.SWIZZLE_128B, swizzle_b: TensorMapSwizzle = TensorMapSwizzle.SWIZZLE_128B, transpose_b: Bool = True, cta_group: Int = 1, num_stages: Int = 1]
Implemented traits
AnyType,
Copyable,
ImplicitlyCopyable,
ImplicitlyDestructible,
Movable,
RegisterPassable,
TrivialRegisterPassable
comptime members
a_layout
comptime a_layout = tile_layout_k_major[SM100TensorAccumulatorSS[operand_type, accum_dtype, MMA_M, MMA_N, BK, mma_kind=mma_kind, swizzle_a=swizzle_a, swizzle_b=swizzle_b, transpose_b=transpose_b, cta_group=cta_group, num_stages=num_stages].operand_t, align_up(MMA_M, 8), SM100TensorAccumulatorSS[operand_type, accum_dtype, MMA_M, MMA_N, BK, mma_kind=mma_kind, swizzle_a=swizzle_a, swizzle_b=swizzle_b, transpose_b=transpose_b, cta_group=cta_group, num_stages=num_stages].padded_BK, swizzle_a]()
accum_t
comptime accum_t = accum_dtype
AType
comptime AType = MMASmemDescriptorPair
b_layout
comptime b_layout = tile_layout_k_major[SM100TensorAccumulatorSS[operand_type, accum_dtype, MMA_M, MMA_N, BK, mma_kind=mma_kind, swizzle_a=swizzle_a, swizzle_b=swizzle_b, transpose_b=transpose_b, cta_group=cta_group, num_stages=num_stages].operand_t, MMA_N, SM100TensorAccumulatorSS[operand_type, accum_dtype, MMA_M, MMA_N, BK, mma_kind=mma_kind, swizzle_a=swizzle_a, swizzle_b=swizzle_b, transpose_b=transpose_b, cta_group=cta_group, num_stages=num_stages].padded_BK, swizzle_b]() if transpose_b else tile_layout_mn_major[SM100TensorAccumulatorSS[operand_type, accum_dtype, MMA_M, MMA_N, BK, mma_kind=mma_kind, swizzle_a=swizzle_a, swizzle_b=swizzle_b, transpose_b=transpose_b, cta_group=cta_group, num_stages=num_stages].operand_t, MMA_N, SM100TensorAccumulatorSS[operand_type, accum_dtype, MMA_M, MMA_N, BK, mma_kind=mma_kind, swizzle_a=swizzle_a, swizzle_b=swizzle_b, transpose_b=transpose_b, cta_group=cta_group, num_stages=num_stages].padded_BK, swizzle_b]()
BType
comptime BType = MMASmemDescriptorPair
CType
comptime CType = TMemTile[SM100TensorAccumulatorSS[operand_type, accum_dtype, MMA_M, MMA_N, BK, mma_kind=mma_kind, swizzle_a=swizzle_a, swizzle_b=swizzle_b, transpose_b=transpose_b, cta_group=cta_group, num_stages=num_stages].accum_t, MMA_M, MMA_N]
idesc
comptime idesc = UMMAInsDescriptor.create[SM100TensorAccumulatorSS[operand_type, accum_dtype, MMA_M, MMA_N, BK, mma_kind=mma_kind, swizzle_a=swizzle_a, swizzle_b=swizzle_b, transpose_b=transpose_b, cta_group=cta_group, num_stages=num_stages].accum_t, SM100TensorAccumulatorSS[operand_type, accum_dtype, MMA_M, MMA_N, BK, mma_kind=mma_kind, swizzle_a=swizzle_a, swizzle_b=swizzle_b, transpose_b=transpose_b, cta_group=cta_group, num_stages=num_stages].operand_t, SM100TensorAccumulatorSS[operand_type, accum_dtype, MMA_M, MMA_N, BK, mma_kind=mma_kind, swizzle_a=swizzle_a, swizzle_b=swizzle_b, transpose_b=transpose_b, cta_group=cta_group, num_stages=num_stages].operand_t, Index[Int, Int, dtype=DType.uint32](VariadicPack(MMA_M, MMA_N)), transpose_b=transpose_b]()
MMA_K
comptime MMA_K = 16 if operand_type.is_half_float() else 32
num_k_blocks
comptime num_k_blocks = (SM100TensorAccumulatorSS[operand_type, accum_dtype, MMA_M, MMA_N, BK, mma_kind=mma_kind, swizzle_a=swizzle_a, swizzle_b=swizzle_b, transpose_b=transpose_b, cta_group=cta_group, num_stages=num_stages].padded_BK // SM100TensorAccumulatorSS[operand_type, accum_dtype, MMA_M, MMA_N, BK, mma_kind=mma_kind, swizzle_a=swizzle_a, swizzle_b=swizzle_b, transpose_b=transpose_b, cta_group=cta_group, num_stages=num_stages].MMA_K)
num_k_blocks_per_stage
comptime num_k_blocks_per_stage = (SM100TensorAccumulatorSS[operand_type, accum_dtype, MMA_M, MMA_N, BK, mma_kind=mma_kind, swizzle_a=swizzle_a, swizzle_b=swizzle_b, transpose_b=transpose_b, cta_group=cta_group, num_stages=num_stages].num_k_blocks // num_stages)
num_k_mmas
comptime num_k_mmas = ceildiv(BK, SM100TensorAccumulatorSS[operand_type, accum_dtype, MMA_M, MMA_N, BK, mma_kind=mma_kind, swizzle_a=swizzle_a, swizzle_b=swizzle_b, transpose_b=transpose_b, cta_group=cta_group, num_stages=num_stages].MMA_K)
operand_size
comptime operand_size = size_of[SM100TensorAccumulatorSS[operand_type, accum_dtype, MMA_M, MMA_N, BK, mma_kind=mma_kind, swizzle_a=swizzle_a, swizzle_b=swizzle_b, transpose_b=transpose_b, cta_group=cta_group, num_stages=num_stages].operand_t]()
operand_t
comptime operand_t = operand_type
padded_BK
comptime padded_BK = align_up(BK, SM100TensorAccumulatorSS[operand_type, accum_dtype, MMA_M, MMA_N, BK, mma_kind=mma_kind, swizzle_a=swizzle_a, swizzle_b=swizzle_b, transpose_b=transpose_b, cta_group=cta_group, num_stages=num_stages].swizzle_granularity)
swizzle_granularity
comptime swizzle_granularity = (max(swizzle_a.bytes(), swizzle_b.bytes()) // size_of[SM100TensorAccumulatorSS[operand_type, accum_dtype, MMA_M, MMA_N, BK, mma_kind=mma_kind, swizzle_a=swizzle_a, swizzle_b=swizzle_b, transpose_b=transpose_b, cta_group=cta_group, num_stages=num_stages].operand_t]())
Methods
mma
static mma[*, stage_idx: Int = 0](a: MMASmemDescriptorPair, b: MMASmemDescriptorPair, c: UInt32, *, c_scale: UInt32, elect: Int32)
Was this page helpful?
Thank you! We'll create more content like this.
Thank you for helping us improve!