Mojo struct
B200BlockScaledMatmulSmem
struct B200BlockScaledMatmulSmem[a_type: DType, b_type: DType, c_type: DType, sfa_dtype: DType, sfb_dtype: DType, transpose_b: Bool, *, config: BlockScaledMatmulConfig[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b]]
Fields
- a_smem (
InlineArray[B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].AType, B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].a_smem_size]): - b_smem (
InlineArray[B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].BType, B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].b_smem_size]): - c_smem (
InlineArray[B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].CType, B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].c_smem_size]): - sfa_smem (
InlineArray[B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].AScalesType, B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].sfa_smem_size]): - sfb_smem (
InlineArray[B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].BScalesType, B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].sfb_smem_size]): - tma_mma_mbars (
InlineArray[SharedMemBarrier, (Int(B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].num_group_pipeline_stages) * 2)]): - accum_mbars (
InlineArray[SharedMemBarrier, (Int(config) * 2)]): - clc_mbars_full (
InlineArray[SharedMemBarrier, Int(config)]): - clc_mbars_empty (
InlineArray[SharedMemBarrier, Int(config)]): - clc_throttle_mbars (
InlineArray[SharedMemBarrier, (Int(config) * 2)]): - clc_response (
InlineArray[UInt128, Int(config)]): - tmem_dealloc_mbar (
InlineArray[SharedMemBarrier, 1]): - tmem_addr (
InlineArray[UInt32, 1]):
Implemented traits
AnyType,
UnknownDestructibility
comptime members
__del__is_trivial
comptime __del__is_trivial = True
a_smem_size
comptime a_smem_size = ((B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].BM * B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].BK) * Int(config))
AScalesType
comptime AScalesType = Scalar[sfa_dtype]
AType
comptime AType = Scalar[a_type]
b_smem_size
comptime b_smem_size = ((B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].BN * B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].BK) * Int(config))
BK
comptime BK = config.block_tile_shape.__getitem__[3, DType.int64, Int](2)
BM
comptime BM = config.block_tile_shape.__getitem__[3, DType.int64, Int](0)
BN
comptime BN = config.block_tile_shape.__getitem__[3, DType.int64, Int](1)
BScalesType
comptime BScalesType = Scalar[sfb_dtype]
BType
comptime BType = Scalar[b_type]
c_smem_size
comptime c_smem_size = ((B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].OutputM * B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].OutputN) * Int(config))
CType
comptime CType = Scalar[c_type]
MMA_K
comptime MMA_K = config.mma_shape.__getitem__[3, DType.int64, Int](2)
MMA_M
comptime MMA_M = config.mma_shape.__getitem__[3, DType.int64, Int](0)
MMA_N
comptime MMA_N = config.mma_shape.__getitem__[3, DType.int64, Int](1)
num_group_pipeline_stages
comptime num_group_pipeline_stages = (config // config)
OutputM
comptime OutputM = config.output_tile_shape.__getitem__[2, DType.int64, Int](0)
OutputN
comptime OutputN = config.output_tile_shape.__getitem__[2, DType.int64, Int](1)
sf_block_atom_size
comptime sf_block_atom_size = (((load_from_mem SF_ATOM_M.__getitem__[Int, Int, 0]()) * (load_from_mem SF_ATOM_M.__getitem__[Int, Int, 1]())) * 4)
sfa_smem_size
comptime sfa_smem_size = (((B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].BM // SF_MN_GROUP_SIZE) * B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].sf_block_atom_size) * Int(config))
sfb_smem_size
comptime sfb_smem_size = (((B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].MMA_N // SF_MN_GROUP_SIZE) * B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].sf_block_atom_size) * Int(config))
Was this page helpful?
Thank you! We'll create more content like this.
Thank you for helping us improve!