Mojo struct
BlockScaledMatmulConfig
struct BlockScaledMatmulConfig[a_type: DType, b_type: DType, c_type: DType, sfa_dtype: DType, sfb_dtype: DType, transpose_b: Bool = True]
Static configuration of GPU matmul.
Fieldsβ
- βcta_group (
Int): - βmma_shape (
IndexList[3]): - βcluster_shape (
IndexList[3]): - βAB_swapped (
Bool): - βblock_swizzle_size (
Int): - βraster_order (
RasterOrder): - βregister_based_epilogue (
Bool): - βblock_tile_shape (
IndexList[3]): - βnum_split_k (
Int): - βnum_pipeline_stages (
Int): - βnum_clc_pipeline_stages (
Int): - βnum_accum_pipeline_stages (
Int): - βnum_output_stages (
Int): - βoutput_tile_shape (
IndexList[2]): - βa_swizzle (
TensorMapSwizzle): - βb_swizzle (
TensorMapSwizzle): - βc_swizzle (
TensorMapSwizzle): - βk_group_size (
Int): - βscaling_kind (
UMMAKind): - βvec_sf_size (
Int): - βnum_sf_k_tiles (
Int): - βis_small_bn (
Bool): - βgemm_kind (
GEMMKind):
Implemented traitsβ
AnyType,
Copyable,
Equatable,
Hashable,
ImplicitlyCopyable,
ImplicitlyDestructible,
Movable,
RegisterPassable,
TrivialRegisterPassable,
Writable
comptime membersβ
accum_typeβ
comptime accum_type = get_accum_type[a_type]()
sf_block_atom_sizeβ
comptime sf_block_atom_size = (((load_from_mem SF_ATOM_M.__getitem_param__[0]()) * (load_from_mem SF_ATOM_M.__getitem_param__[1]())) * 4)
Methodsβ
__init__β
__init__(*, scaling_kind: UMMAKind, cta_group: Int = 2, mma_shape: IndexList[3] = get_mma_shape[a_type, BlockScaledMatmulConfig[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b].accum_type](), cluster_shape: IndexList[3] = Index[Int, Int, Int](2, 1, 1), AB_swapped: Bool = False, num_split_k: Int = 1, block_swizzle_size: Int = 0, raster_order: RasterOrder = RasterOrder.AlongM, k_group_size: Int = 1, num_pipeline_stages: Optional[Int] = None, num_accum_pipeline_stages: Int = 2, num_clc_pipeline_stages: Int = 2, is_gmm: Bool = False, is_small_bn: Bool = False, register_based_epilogue: Bool = True, gemm_kind: GEMMKind = GEMMKind.GEMM) -> Self
swap_AB_typeβ
swap_AB_type(self) -> BlockScaledMatmulConfig[b_type, a_type, c_type, sfb_dtype, sfa_dtype, transpose_b]
Returns:
BlockScaledMatmulConfig[b_type, a_type, c_type, sfb_dtype, sfa_dtype, transpose_b]
write_toβ
write_to[W: Writer](self, mut writer: W)
write_repr_toβ
write_repr_to(self, mut writer: T)
get_kernal_nameβ
Was this page helpful?
Thank you! We'll create more content like this.
Thank you for helping us improve!