Skip to main content

Mojo struct

B200BlockScaledMatmulSmem

struct B200BlockScaledMatmulSmem[a_type: DType, b_type: DType, c_type: DType, sfa_dtype: DType, sfb_dtype: DType, transpose_b: Bool, *, config: BlockScaledMatmulConfig[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b]]

Fields​

  • ​a_smem (InlineArray[Scalar[a_type], B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].a_smem_size]):
  • ​b_smem (InlineArray[Scalar[b_type], B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].b_smem_size]):
  • ​c_smem (InlineArray[Scalar[c_type], B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].c_smem_size]):
  • ​sfa_smem (InlineArray[Scalar[sfa_dtype], B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].sfa_smem_size]):
  • ​sfb_smem (InlineArray[Scalar[sfb_dtype], B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].sfb_smem_size]):
  • ​tma_mma_mbars (InlineArray[SharedMemBarrier, (B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].num_group_pipeline_stages * 2)]):
  • ​accum_mbars (InlineArray[SharedMemBarrier, (config * 2)]):
  • ​clc_mbars_full (InlineArray[SharedMemBarrier, config.num_clc_pipeline_stages]):
  • ​clc_mbars_empty (InlineArray[SharedMemBarrier, config.num_clc_pipeline_stages]):
  • ​clc_throttle_mbars (InlineArray[SharedMemBarrier, (config * 2)]):
  • ​clc_response (InlineArray[UInt128, config.num_clc_pipeline_stages]):
  • ​tmem_dealloc_mbar (InlineArray[SharedMemBarrier, 1]):
  • ​tmem_addr (InlineArray[UInt32, 1]):

Implemented traits​

AnyType, ImplicitlyDestructible

comptime members​

a_smem_size​

comptime a_smem_size = ((B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].BM * B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].BK) * config)

AScalesType​

comptime AScalesType = Scalar[sfa_dtype]

AType​

comptime AType = Scalar[a_type]

b_smem_size​

comptime b_smem_size = ((B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].BN * B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].BK) * config)

BK​

comptime BK = config.block_tile_shape[2]

BM​

comptime BM = config.block_tile_shape[0]

BN​

comptime BN = config.block_tile_shape[1]

BScalesType​

comptime BScalesType = Scalar[sfb_dtype]

BType​

comptime BType = Scalar[b_type]

c_smem_size​

comptime c_smem_size = ((B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].output_m * B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].output_n) * config)

CType​

comptime CType = Scalar[c_type]

MMA_K​

comptime MMA_K = config.mma_shape[2]

MMA_M​

comptime MMA_M = config.mma_shape[0]

MMA_N​

comptime MMA_N = config.mma_shape[1]

num_group_pipeline_stages​

comptime num_group_pipeline_stages = (config // config)

output_m​

comptime output_m = config.output_tile_shape[0]

output_n​

comptime output_n = config.output_tile_shape[1]

sfa_smem_size​

comptime sfa_smem_size = (((config * (B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].BM // SF_MN_GROUP_SIZE)) * BlockScaledMatmulConfig[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b].sf_block_atom_size) * config)

sfb_smem_size​

comptime sfb_smem_size = (((config * (align_up(B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].MMA_N, SF_MN_GROUP_SIZE) // SF_MN_GROUP_SIZE)) * BlockScaledMatmulConfig[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b].sf_block_atom_size) * config)