Mojo struct
MatmulConfig
struct MatmulConfig[a_type: DType, b_type: DType, c_type: DType, transpose_b: Bool = True]
Static configuration of GPU matmul.
Fields
- cta_group (
Int): - mma_shape (
IndexList[3]): - cluster_shape (
IndexList[3]): - AB_swapped (
Bool): - block_swizzle_size (
Int): - raster_order (
RasterOrder): - register_based_epilogue (
Bool): - block_tile_shape (
IndexList[3]): - num_split_k (
Int): - num_pipeline_stages (
Int): - num_clc_pipeline_stages (
Int): - num_accum_pipeline_stages (
Int): - num_output_stages (
Int): - output_tile_shape (
IndexList[2]): - a_swizzle (
TensorMapSwizzle): - b_swizzle (
TensorMapSwizzle): - c_swizzle (
TensorMapSwizzle): - k_group_size (
Int):
Implemented traits
AnyType,
Copyable,
Equatable,
Hashable,
ImplicitlyCopyable,
ImplicitlyDestructible,
Movable,
RegisterPassable,
TrivialRegisterPassable,
Writable
comptime members
accum_type
comptime accum_type = get_accum_type[a_type]()
Methods
__init__
__init__(*, cta_group: Int = 2, mma_shape: IndexList[3] = get_mma_shape[a_type, MatmulConfig[a_type, b_type, c_type, transpose_b].accum_type](), cluster_shape: IndexList[3] = Index[Int, Int, Int](2, 1, 1), AB_swapped: Bool = False, num_split_k: Int = 1, block_swizzle_size: Int = 0, raster_order: RasterOrder = RasterOrder.AlongM, k_group_size: Int = 1, num_pipeline_stages: Optional[Int] = None, num_accum_pipeline_stages: Int = 2, num_clc_pipeline_stages: Int = 2, register_based_epilogue: Bool = True, extra_smem_per_stage: Int = 0) -> Self
swap_AB_type
write_to
write_to[W: Writer](self, mut writer: W)
write_repr_to
write_repr_to(self, mut writer: T)
Was this page helpful?
Thank you! We'll create more content like this.
Thank you for helping us improve!