Mojo struct

B200BlockScaledMatmulSmem

struct B200BlockScaledMatmulSmem[a_type: DType, b_type: DType, c_type: DType, sfa_dtype: DType, sfb_dtype: DType, transpose_b: Bool, *, config: BlockScaledMatmulConfig[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b]]

Fields

a_smem (InlineArray[B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].AType, B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].a_smem_size]):
b_smem (InlineArray[B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].BType, B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].b_smem_size]):
c_smem (InlineArray[B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].CType, B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].c_smem_size]):
sfa_smem (InlineArray[B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].AScalesType, B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].sfa_smem_size]):
sfb_smem (InlineArray[B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].BScalesType, B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].sfb_smem_size]):
tma_mma_mbars (InlineArray[SharedMemBarrier, (Int.__init__[Int](B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].num_group_pipeline_stages) * 2)]):
accum_mbars (InlineArray[SharedMemBarrier, (Int.__init__[Int](config.num_accum_pipeline_stages) * 2)]):
clc_mbars_full (InlineArray[SharedMemBarrier, Int.__init__[Int](config.num_clc_pipeline_stages)]):
clc_mbars_empty (InlineArray[SharedMemBarrier, Int.__init__[Int](config.num_clc_pipeline_stages)]):
clc_throttle_mbars (InlineArray[SharedMemBarrier, (Int.__init__[Int](config.num_clc_pipeline_stages) * 2)]):
clc_response (InlineArray[UInt128, Int.__init__[Int](config.num_clc_pipeline_stages)]):
tmem_dealloc_mbar (InlineArray[SharedMemBarrier, 1]):
tmem_addr (InlineArray[UInt32, 1]):

Implemented traits

AnyType, ImplicitlyDestructible

`comptime` members

`delis_trivial`

comptime __del__is_trivial = True

`a_smem_size`

comptime a_smem_size = ((B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].BM * B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].BK) * Int.__init__[Int](config.num_pipeline_stages))

`AScalesType`

comptime AScalesType = Scalar[sfa_dtype]

`AType`

comptime AType = Scalar[a_type]

`b_smem_size`

comptime b_smem_size = ((B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].BN * B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].BK) * Int.__init__[Int](config.num_pipeline_stages))

`BK`

comptime BK = config.block_tile_shape.__getitem__[Int](2)

`BM`

comptime BM = config.block_tile_shape.__getitem__[Int](0)

`BN`

comptime BN = config.block_tile_shape.__getitem__[Int](1)

`BScalesType`

comptime BScalesType = Scalar[sfb_dtype]

`BType`

comptime BType = Scalar[b_type]

`c_smem_size`

comptime c_smem_size = ((B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].OutputM * B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].OutputN) * Int.__init__[Int](config.num_output_stages))

`CType`

comptime CType = Scalar[c_type]

`MMA_K`

comptime MMA_K = config.mma_shape.__getitem__[Int](2)

`MMA_M`

comptime MMA_M = config.mma_shape.__getitem__[Int](0)

`MMA_N`

comptime MMA_N = config.mma_shape.__getitem__[Int](1)

`num_group_pipeline_stages`

comptime num_group_pipeline_stages = (config // config)

`OutputM`

comptime OutputM = config.output_tile_shape.__getitem__[Int](0)

`OutputN`

comptime OutputN = config.output_tile_shape.__getitem__[Int](1)

`sfa_smem_size`

comptime sfa_smem_size = (((config * (B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].BM // SF_MN_GROUP_SIZE)) * BlockScaledMatmulConfig[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b].sf_block_atom_size) * Int.__init__[Int](config.num_pipeline_stages))

`sfb_smem_size`

comptime sfb_smem_size = (((config * (align_up(B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].MMA_N, SF_MN_GROUP_SIZE) // SF_MN_GROUP_SIZE)) * BlockScaledMatmulConfig[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b].sf_block_atom_size) * Int.__init__[Int](config.num_pipeline_stages))

Fields​

Implemented traits​

comptime members​

__del__is_trivial​

a_smem_size​

AScalesType​

AType​

b_smem_size​

BK​

BM​

BN​

BScalesType​

BType​

c_smem_size​

CType​

MMA_K​

MMA_M​

MMA_N​

num_group_pipeline_stages​

OutputM​

OutputN​

sfa_smem_size​

sfb_smem_size​