Mojo struct
SM100TensorAccumulatorTS
@register_passable(trivial)
struct SM100TensorAccumulatorTS[operand_type: DType, accum_type: DType, MMA_M: Int, MMA_N: Int, BK: Int, swizzle_b: TensorMapSwizzle = TensorMapSwizzle.SWIZZLE_128B, *, transpose_b: Bool = True, cta_group: Int = 1, num_stages: Int = 1, padded_BK: Int = BK]
Implemented traits
AnyType,
Copyable,
ImplicitlyCopyable,
Movable,
UnknownDestructibility
Aliases
__copyinit__is_trivial
alias __copyinit__is_trivial = True
__del__is_trivial
alias __del__is_trivial = True
__moveinit__is_trivial
alias __moveinit__is_trivial = True
accum_t
alias accum_t = accum_type
AType
alias AType = TMemTile[operand_type, MMA_M, BK]
b_layout
alias b_layout = tile_layout_k_major[SM100TensorAccumulatorTS[operand_type, accum_type, MMA_M, MMA_N, BK, swizzle_b, transpose_b=transpose_b, cta_group=cta_group, num_stages=num_stages, padded_BK=padded_BK].operand_t, MMA_N, BK, swizzle_b]() if transpose_b else tile_layout_mn_major[SM100TensorAccumulatorTS[operand_type, accum_type, MMA_M, MMA_N, BK, swizzle_b, transpose_b=transpose_b, cta_group=cta_group, num_stages=num_stages, padded_BK=padded_BK].operand_t, MMA_N, BK, swizzle_b]()
BType
alias BType = MMASmemDescriptorPair
CType
alias CType = TMemTile[SM100TensorAccumulatorTS[operand_type, accum_type, MMA_M, MMA_N, BK, swizzle_b, transpose_b=transpose_b, cta_group=cta_group, num_stages=num_stages, padded_BK=padded_BK].accum_t, MMA_M, MMA_N]
idesc
alias idesc = UMMAInsDescriptor.create[UMMAKind.KIND_F16, SM100TensorAccumulatorTS[operand_type, accum_type, MMA_M, MMA_N, BK, swizzle_b, transpose_b=transpose_b, cta_group=cta_group, num_stages=num_stages, padded_BK=padded_BK].accum_t, SM100TensorAccumulatorTS[operand_type, accum_type, MMA_M, MMA_N, BK, swizzle_b, transpose_b=transpose_b, cta_group=cta_group, num_stages=num_stages, padded_BK=padded_BK].operand_t, SM100TensorAccumulatorTS[operand_type, accum_type, MMA_M, MMA_N, BK, swizzle_b, transpose_b=transpose_b, cta_group=cta_group, num_stages=num_stages, padded_BK=padded_BK].operand_t, Index[dtype=DType.uint32](MMA_M, MMA_N), transpose_b=transpose_b]()
MMA_K
alias MMA_K = 16
num_k_blocks
alias num_k_blocks = (padded_BK // 16)
num_k_blocks_per_stage
alias num_k_blocks_per_stage = (SM100TensorAccumulatorTS[operand_type, accum_type, MMA_M, MMA_N, BK, swizzle_b, transpose_b=transpose_b, cta_group=cta_group, num_stages=num_stages, padded_BK=padded_BK].num_k_blocks // num_stages)
num_k_mmas
alias num_k_mmas = (BK // 16)
operand_size
alias operand_size = size_of[operand_type]()
operand_t
alias operand_t = operand_type
swizzle_granularity
alias swizzle_granularity = (swizzle_b.bytes() // SM100TensorAccumulatorTS[operand_type, accum_type, MMA_M, MMA_N, BK, swizzle_b, transpose_b=transpose_b, cta_group=cta_group, num_stages=num_stages, padded_BK=padded_BK].operand_size)
Methods
descriptor_a
mma
static mma[*, stage_idx: Int = 0](a: UInt32, b: MMASmemDescriptorPair, c: UInt32, *, c_scale: UInt32, elect: Int32)
Was this page helpful?
Thank you! We'll create more content like this.
Thank you for helping us improve!