Skip to main content

Mojo struct

B200MatmulSmem

struct B200MatmulSmem[a_type: DType, b_type: DType, c_type: DType, transpose_b: Bool, *, config: MatmulConfig[a_type, b_type, c_type, transpose_b]]

Fields

  • a_smem (InlineArray[B200MatmulSmem[a_type, b_type, c_type, transpose_b, config=config].AType, B200MatmulSmem[a_type, b_type, c_type, transpose_b, config=config].a_smem_size]):
  • b_smem (InlineArray[B200MatmulSmem[a_type, b_type, c_type, transpose_b, config=config].BType, B200MatmulSmem[a_type, b_type, c_type, transpose_b, config=config].b_smem_size]):
  • c_smem (InlineArray[B200MatmulSmem[a_type, b_type, c_type, transpose_b, config=config].CType, B200MatmulSmem[a_type, b_type, c_type, transpose_b, config=config].c_smem_size]):
  • tma_mma_mbars (InlineArray[SharedMemBarrier, (Int(B200MatmulSmem[a_type, b_type, c_type, transpose_b, config=config].num_group_pipeline_stages) * 2)]):
  • accum_mbars (InlineArray[SharedMemBarrier, (Int(config) * 2)]):
  • clc_mbars_full (InlineArray[SharedMemBarrier, Int(config)]):
  • clc_mbars_empty (InlineArray[SharedMemBarrier, Int(config)]):
  • clc_throttle_mbars (InlineArray[SharedMemBarrier, (Int(config) * 2)]):
  • clc_response (InlineArray[UInt128, Int(config)]):
  • tmem_dealloc_mbar (InlineArray[SharedMemBarrier, 1]):
  • tmem_addr (InlineArray[UInt32, 1]):

Implemented traits

AnyType, UnknownDestructibility

Aliases

__del__is_trivial

comptime __del__is_trivial = False

a_smem_size

comptime a_smem_size = ((B200MatmulSmem[a_type, b_type, c_type, transpose_b, config=config].BM * B200MatmulSmem[a_type, b_type, c_type, transpose_b, config=config].BK) * Int(config))

AType

comptime AType = Scalar[a_type]

b_smem_size

comptime b_smem_size = ((B200MatmulSmem[a_type, b_type, c_type, transpose_b, config=config].BN * B200MatmulSmem[a_type, b_type, c_type, transpose_b, config=config].BK) * Int(config))

BK

comptime BK = config.block_tile_shape.__getitem__[3, DType.int64, Int](2)

BM

comptime BM = config.block_tile_shape.__getitem__[3, DType.int64, Int](0)

BN

comptime BN = config.block_tile_shape.__getitem__[3, DType.int64, Int](1)

BType

comptime BType = Scalar[b_type]

c_smem_size

comptime c_smem_size = ((B200MatmulSmem[a_type, b_type, c_type, transpose_b, config=config].OutputM * B200MatmulSmem[a_type, b_type, c_type, transpose_b, config=config].OutputN) * Int(config))

CType

comptime CType = Scalar[c_type]

num_group_pipeline_stages

comptime num_group_pipeline_stages = (config // config)

OutputM

comptime OutputM = config.output_tile_shape.__getitem__[2, DType.int64, Int](0)

OutputN

comptime OutputN = config.output_tile_shape.__getitem__[2, DType.int64, Int](1)

Was this page helpful?