Mojo struct
B200BlockScaledMatmulSmem
struct B200BlockScaledMatmulSmem[a_type: DType, b_type: DType, c_type: DType, sfa_dtype: DType, sfb_dtype: DType, transpose_b: Bool, *, config: BlockScaledMatmulConfig[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b]]
Fieldsβ
- βa_smem (
InlineArray[Scalar[a_type], B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].a_smem_size]): - βb_smem (
InlineArray[Scalar[b_type], B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].b_smem_size]): - βc_smem (
InlineArray[Scalar[c_type], B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].c_smem_size]): - βsfa_smem (
InlineArray[Scalar[sfa_dtype], B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].sfa_smem_size]): - βsfb_smem (
InlineArray[Scalar[sfb_dtype], B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].sfb_smem_size]): - βtma_mma_mbars (
InlineArray[SharedMemBarrier, (B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].num_group_pipeline_stages * 2)]): - βaccum_mbars (
InlineArray[SharedMemBarrier, (config * 2)]): - βclc_mbars_full (
InlineArray[SharedMemBarrier, config.num_clc_pipeline_stages]): - βclc_mbars_empty (
InlineArray[SharedMemBarrier, config.num_clc_pipeline_stages]): - βclc_throttle_mbars (
InlineArray[SharedMemBarrier, (config * 2)]): - βclc_response (
InlineArray[UInt128, config.num_clc_pipeline_stages]): - βtmem_dealloc_mbar (
InlineArray[SharedMemBarrier, 1]): - βtmem_addr (
InlineArray[UInt32, 1]):
Implemented traitsβ
AnyType,
ImplicitlyDestructible
comptime membersβ
a_smem_sizeβ
comptime a_smem_size = ((B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].BM * B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].BK) * config)
AScalesTypeβ
comptime AScalesType = Scalar[sfa_dtype]
ATypeβ
comptime AType = Scalar[a_type]
b_smem_sizeβ
comptime b_smem_size = ((B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].BN * B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].BK) * config)
BKβ
comptime BK = config.block_tile_shape[2]
BMβ
comptime BM = config.block_tile_shape[0]
BNβ
comptime BN = config.block_tile_shape[1]
BScalesTypeβ
comptime BScalesType = Scalar[sfb_dtype]
BTypeβ
comptime BType = Scalar[b_type]
c_smem_sizeβ
comptime c_smem_size = ((B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].output_m * B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].output_n) * config)
CTypeβ
comptime CType = Scalar[c_type]
MMA_Kβ
comptime MMA_K = config.mma_shape[2]
MMA_Mβ
comptime MMA_M = config.mma_shape[0]
MMA_Nβ
comptime MMA_N = config.mma_shape[1]
num_group_pipeline_stagesβ
comptime num_group_pipeline_stages = (config // config)
output_mβ
comptime output_m = config.output_tile_shape[0]
output_nβ
comptime output_n = config.output_tile_shape[1]
sfa_smem_sizeβ
comptime sfa_smem_size = (((config * (B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].BM // SF_MN_GROUP_SIZE)) * BlockScaledMatmulConfig[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b].sf_block_atom_size) * config)
sfb_smem_sizeβ
comptime sfb_smem_size = (((config * (align_up(B200BlockScaledMatmulSmem[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b, config=config].MMA_N, SF_MN_GROUP_SIZE) // SF_MN_GROUP_SIZE)) * BlockScaledMatmulConfig[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b].sf_block_atom_size) * config)
Was this page helpful?
Thank you! We'll create more content like this.
Thank you for helping us improve!