Skip to main content

Mojo struct

SM100TensorAccumulatorSS

@register_passable(trivial) struct SM100TensorAccumulatorSS[operand_type: DType, accum_type: DType, MMA_M: Int, MMA_N: Int, BK: Int, *, swizzle_a: TensorMapSwizzle = 3, swizzle_b: TensorMapSwizzle = 3, transpose_b: Bool = True, cta_group: Int = 1, num_stages: Int = 1]

Implemented traits

AnyType, Copyable, ImplicitlyCopyable, Movable, UnknownDestructibility

Aliases

__copyinit__is_trivial

alias __copyinit__is_trivial = True

__del__is_trivial

alias __del__is_trivial = True

__moveinit__is_trivial

alias __moveinit__is_trivial = True

a_layout

alias a_layout = tile_layout_k_major[operand_type, align_up(MMA_M, 8), align_up(BK, (max(swizzle_a.bytes(), swizzle_b.bytes()) // operand_type.size_of())), swizzle_a]()

accum_t

alias accum_t = accum_type

AType

alias AType = MMASmemDescriptor

b_layout

alias b_layout = tile_layout_k_major[operand_type, MMA_N, align_up(BK, (max(swizzle_a.bytes(), swizzle_b.bytes()) // operand_type.size_of())), swizzle_b]() if transpose_b else tile_layout_mn_major[operand_type, MMA_N, align_up(BK, (max(swizzle_a.bytes(), swizzle_b.bytes()) // operand_type.size_of())), swizzle_b]()

BType

alias BType = MMASmemDescriptor

CType

alias CType = TMemTile[accum_type, MMA_M, MMA_N]

idesc

alias idesc = UMMAInsDescriptor.create[UMMAKind(2), accum_type, operand_type, operand_type, Index[dtype=DType.uint32](MMA_M, MMA_N), transpose_b=transpose_b]()

MMA_K

alias MMA_K = 16

num_k_blocks

alias num_k_blocks = (align_up(BK, (max(swizzle_a.bytes(), swizzle_b.bytes()) // operand_type.size_of())) // 16)

num_k_blocks_per_stage

alias num_k_blocks_per_stage = ((align_up(BK, (max(swizzle_a.bytes(), swizzle_b.bytes()) // operand_type.size_of())) // 16) // num_stages)

num_k_mmas

alias num_k_mmas = (BK // 16)

operand_t

alias operand_t = operand_type

padded_BK

alias padded_BK = align_up(BK, (max(swizzle_a.bytes(), swizzle_b.bytes()) // operand_type.size_of()))

swizzle_granularity

alias swizzle_granularity = (max(swizzle_a.bytes(), swizzle_b.bytes()) // operand_type.size_of())

Methods

mma

static mma[*, c_scale: UInt32, stage_idx: Int = 0](a: MMASmemDescriptor, b: MMASmemDescriptor, c: UInt32)

static mma[*, stage_idx: Int = 0](a: MMASmemDescriptor, b: MMASmemDescriptor, c: UInt32, c_scale: UInt32)

Was this page helpful?