Mojo struct

B200BlockScaledMatmulSmem

struct B200BlockScaledMatmulSmem[a_type: DType, b_type: DType, c_type: DType, sfa_dtype: DType, sfb_dtype: DType, transpose_b: Bool, *, config: BlockScaledMatmulConfig[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b]]

Fields

a_smem (InlineArray[Scalar[a_type], B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].a_smem_size]):
b_smem (InlineArray[Scalar[b_type], B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].b_smem_size]):
c_smem (InlineArray[Scalar[c_type], B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].c_smem_size]):
sfa_smem (InlineArray[Scalar[sfa_dtype], B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].sfa_smem_size]):
sfb_smem (InlineArray[Scalar[sfb_dtype], B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].sfb_smem_size]):
tma_mma_mbars (InlineArray[SharedMemBarrier, (B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].num_group_pipeline_stages * 2)]):
accum_mbars (InlineArray[SharedMemBarrier, (config * 2)]):
tmem_dealloc_mbar (InlineArray[SharedMemBarrier, 1]):
tmem_addr (InlineArray[UInt32, 1]):

Implemented traits

AnyType, ImplicitlyDestructible

`comptime` members

`a_smem_size`

comptime a_smem_size = ((B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].BM * B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].BK) * config)

`AScalesType`

comptime AScalesType = Scalar[sfa_dtype]

`AType`

comptime AType = Scalar[a_type]

`b_smem_size`

comptime b_smem_size = ((B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].BN * B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].BK) * config)

`BK`

comptime BK = config.block_tile_shape[2]

`BM`

comptime BM = config.block_tile_shape[0]

`BN`

comptime BN = config.block_tile_shape[1]

`BScalesType`

comptime BScalesType = Scalar[sfb_dtype]

`BType`

comptime BType = Scalar[b_type]

`c_smem_size`

comptime c_smem_size = ((B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].OutputM * B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].OutputN) * config)

`CType`

comptime CType = Scalar[c_type]

`MMA_K`

comptime MMA_K = config.mma_shape[2]

`MMA_M`

comptime MMA_M = config.mma_shape[0]

`MMA_N`

comptime MMA_N = config.mma_shape[1]

`num_group_pipeline_stages`

comptime num_group_pipeline_stages = (config // config)

`OutputM`

comptime OutputM = config.output_tile_shape[0]

`OutputN`

comptime OutputN = config.output_tile_shape[1]

`sfa_smem_size`

comptime sfa_smem_size = (((config * (B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].BM // SF_MN_GROUP_SIZE)) * BlockScaledMatmulConfig[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b].sf_block_atom_size) * config)

`sfb_smem_size`

comptime sfb_smem_size = (((config * (B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].MMA_N // SF_MN_GROUP_SIZE)) * BlockScaledMatmulConfig[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b].sf_block_atom_size) * config)

Fields​

Implemented traits​

comptime members​

a_smem_size​

AScalesType​

AType​

b_smem_size​

BK​

BM​

BN​

BScalesType​

BType​

c_smem_size​

CType​

MMA_K​

MMA_M​

MMA_N​

num_group_pipeline_stages​

OutputM​

OutputN​

sfa_smem_size​

sfb_smem_size​