Mojo struct
B200MatmulSmem
struct B200MatmulSmem[a_type: DType, b_type: DType, c_type: DType, transpose_b: Bool, *, config: MatmulConfig[a_type, b_type, c_type, transpose_b]]
Fields
- a_smem (
InlineArray[B200MatmulSmem[a_type, b_type, c_type, transpose_b, config=config].AType, B200MatmulSmem[a_type, b_type, c_type, transpose_b, config=config].a_smem_size]): - b_smem (
InlineArray[B200MatmulSmem[a_type, b_type, c_type, transpose_b, config=config].BType, B200MatmulSmem[a_type, b_type, c_type, transpose_b, config=config].b_smem_size]): - c_smem (
InlineArray[B200MatmulSmem[a_type, b_type, c_type, transpose_b, config=config].CType, B200MatmulSmem[a_type, b_type, c_type, transpose_b, config=config].c_smem_size]): - tma_mma_mbars (
InlineArray[SharedMemBarrier, (Int(B200MatmulSmem[a_type, b_type, c_type, transpose_b, config=config].num_group_pipeline_stages) * 2)]): - accum_mbars (
InlineArray[SharedMemBarrier, (Int(config) * 2)]): - clc_mbars_full (
InlineArray[SharedMemBarrier, Int(config)]): - clc_mbars_empty (
InlineArray[SharedMemBarrier, Int(config)]): - clc_throttle_mbars (
InlineArray[SharedMemBarrier, (Int(config) * 2)]): - clc_response (
InlineArray[UInt128, Int(config)]): - tmem_dealloc_mbar (
InlineArray[SharedMemBarrier, 1]): - tmem_addr (
InlineArray[UInt32, 1]):
Implemented traits
AnyType,
UnknownDestructibility
Aliases
__del__is_trivial
comptime __del__is_trivial = False
a_smem_size
comptime a_smem_size = ((B200MatmulSmem[a_type, b_type, c_type, transpose_b, config=config].BM * B200MatmulSmem[a_type, b_type, c_type, transpose_b, config=config].BK) * Int(config))
AType
comptime AType = Scalar[a_type]
b_smem_size
comptime b_smem_size = ((B200MatmulSmem[a_type, b_type, c_type, transpose_b, config=config].BN * B200MatmulSmem[a_type, b_type, c_type, transpose_b, config=config].BK) * Int(config))
BK
comptime BK = config.block_tile_shape.__getitem__[3, DType.int64, Int](2)
BM
comptime BM = config.block_tile_shape.__getitem__[3, DType.int64, Int](0)
BN
comptime BN = config.block_tile_shape.__getitem__[3, DType.int64, Int](1)
BType
comptime BType = Scalar[b_type]
c_smem_size
comptime c_smem_size = ((B200MatmulSmem[a_type, b_type, c_type, transpose_b, config=config].OutputM * B200MatmulSmem[a_type, b_type, c_type, transpose_b, config=config].OutputN) * Int(config))
CType
comptime CType = Scalar[c_type]
num_group_pipeline_stages
comptime num_group_pipeline_stages = (config // config)
OutputM
comptime OutputM = config.output_tile_shape.__getitem__[2, DType.int64, Int](0)
OutputN
comptime OutputN = config.output_tile_shape.__getitem__[2, DType.int64, Int](1)
Was this page helpful?
Thank you! We'll create more content like this.
Thank you for helping us improve!