Mojo struct

MatmulConfig

@register_passable(trivial) struct MatmulConfig[a_type: DType, b_type: DType, c_type: DType, transpose_b: Bool = False, mma_shape: IndexList[3] = get_mma_shape[::DType,::DType,::Int]()]

Static configuration of GPU matmul.

Fields

block_tile_shape (IndexList[3]):
warp_tile_shape (IndexList[3]):
num_pipeline_stages (UInt):
num_k_partitions (UInt):
k_group_size (UInt):
num_warp_k_partitions (UInt):
cluster_shape (IndexList[3]):
num_consumer (UInt):
partitioned_multicast (Bool):
scheduler_hint (IndexList[3]):

Implemented traits

AnyType, Copyable, Movable, Stringable, UnknownDestructibility, Writable

Aliases

`ACCUM_PRECISION`

alias ACCUM_PRECISION = 1

`accum_type`

alias accum_type = get_accum_type[::DType,::DType]()

`OUTPUT_PRECISION`

alias OUTPUT_PRECISION = 2

`split_k_reduction_scheme`

alias split_k_reduction_scheme = env_get_int[::StringSlice[::Bool()

`split_k_reduction_type`

alias split_k_reduction_type = c_type if (env_get_int[::StringSlice[::Bool() == 2) else get_accum_type[::DType,::DType]()

Methods

`init`

__init__(block_tile_shape: IndexList[3] = Index(128, 128, 32), warp_tile_shape: IndexList[3] = Index(64, 64, 32), cluster_shape: IndexList[3] = Index(1, 1, 1), num_pipeline_stages: UInt = UInt(4), num_k_partitions: UInt = UInt(1), k_group_size: UInt = UInt(1), num_warp_k_partitions: UInt = UInt(1), num_consumer: UInt = UInt(1), partitioned_multicast: Bool = False, scheduler_hint: IndexList[3] = Index(2, 2, 2), pdl_level: PDLLevel = PDLLevel()) -> Self

`eq`

__eq__(self, rhs: MatmulConfig[a_type, b_type, c_type, transpose_b, mma_shape]) -> Bool

`num_warps_m`

num_warps_m(self) -> UInt

`num_warps_n`

num_warps_n(self) -> UInt

`num_threads`

num_threads(self) -> UInt

`shared_mem_usage`

shared_mem_usage(self) -> Int

`grid_dim`

grid_dim(self, m: UInt, n: UInt) -> IndexList[3]

`block_dim`

block_dim(self) -> IndexList[3]

`work_space_size`

work_space_size(self, M: UInt, N: UInt) -> UInt

`pdl_level`

pdl_level(self) -> PDLLevel

`str`

__str__(self) -> String

`write_to`

write_to[W: Writer](self, mut writer: W)

`repr`

__repr__(self) -> String

`hash`

__hash__[H: Hasher](self, mut hasher: H)

Updates hasher with the underlying bytes.

Parameters:

H (Hasher): The hasher type.

Args:

hasher (H): The hasher instance.

Fields​

Implemented traits​

Aliases​

ACCUM_PRECISION​

accum_type​

OUTPUT_PRECISION​

split_k_reduction_scheme​

split_k_reduction_type​

Methods​

__init__​

__eq__​

num_warps_m​

num_warps_n​

num_threads​

shared_mem_usage​

grid_dim​

block_dim​

work_space_size​

pdl_level​

__str__​

write_to​

__repr__​

__hash__​