Skip to main content

Mojo struct

B200BlockScaledMatmulSmem

struct B200BlockScaledMatmulSmem[a_type: DType, b_type: DType, c_type: DType, sfa_dtype: DType, sfb_dtype: DType, transpose_b: Bool, *, config: BlockScaledMatmulConfig[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b]]

Fields

  • a_smem (InlineArray[Scalar[a_type], B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].a_smem_size]):
  • b_smem (InlineArray[Scalar[b_type], B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].b_smem_size]):
  • c_smem (InlineArray[Scalar[c_type], B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].c_smem_size]):
  • sfa_smem (InlineArray[Scalar[sfa_dtype], B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].sfa_smem_size]):
  • sfb_smem (InlineArray[Scalar[sfb_dtype], B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].sfb_smem_size]):
  • tma_mma_mbars (InlineArray[SharedMemBarrier, (B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].num_group_pipeline_stages * 2)]):
  • accum_mbars (InlineArray[SharedMemBarrier, (config * 2)]):
  • tmem_dealloc_mbar (InlineArray[SharedMemBarrier, 1]):
  • tmem_addr (InlineArray[UInt32, 1]):

Implemented traits

AnyType, ImplicitlyDestructible

comptime members

a_smem_size

comptime a_smem_size = ((B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].BM * B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].BK) * config)

AScalesType

comptime AScalesType = Scalar[sfa_dtype]

AType

comptime AType = Scalar[a_type]

b_smem_size

comptime b_smem_size = ((B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].BN * B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].BK) * config)

BK

comptime BK = config.block_tile_shape[2]

BM

comptime BM = config.block_tile_shape[0]

BN

comptime BN = config.block_tile_shape[1]

BScalesType

comptime BScalesType = Scalar[sfb_dtype]

BType

comptime BType = Scalar[b_type]

c_smem_size

comptime c_smem_size = ((B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].OutputM * B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].OutputN) * config)

CType

comptime CType = Scalar[c_type]

MMA_K

comptime MMA_K = config.mma_shape[2]

MMA_M

comptime MMA_M = config.mma_shape[0]

MMA_N

comptime MMA_N = config.mma_shape[1]

num_group_pipeline_stages

comptime num_group_pipeline_stages = (config // config)

OutputM

comptime OutputM = config.output_tile_shape[0]

OutputN

comptime OutputN = config.output_tile_shape[1]

sfa_smem_size

comptime sfa_smem_size = (((config * (B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].BM // SF_MN_GROUP_SIZE)) * BlockScaledMatmulConfig[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b].sf_block_atom_size) * config)

sfb_smem_size

comptime sfb_smem_size = (((config * (B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].MMA_N // SF_MN_GROUP_SIZE)) * BlockScaledMatmulConfig[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b].sf_block_atom_size) * config)

Was this page helpful?