Mojo struct
QRegisterBuffer
struct QRegisterBuffer[mut: Bool, dtype: DType, layout: Layout, address_space: AddressSpace, alignment: Int, origin: Origin[mut], masked: Bool, layout_int_type: DType, linear_idx_type: DType, //, mma_shape: IndexList[3], k_group_size: Int, WM: Int, WN: Int, BN: Int, BK: Int, depth: Int, thread_layout: Layout]
Fields​
- ​gmem_tensor (
LayoutTensor[dtype, layout, origin, address_space=address_space, layout_int_type=layout_int_type, linear_idx_type=linear_idx_type, masked=masked, alignment=alignment]
): - ​mma_tile (
LayoutTensor[dtype, Layout.row_major((ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size)) * ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)), simd_width_of[dtype]()), MutableAnyOrigin, address_space=AddressSpace(5)]
):
Implemented traits​
AnyType
,
UnknownDestructibility
Aliases​
__del__is_trivial
​
alias __del__is_trivial = LayoutTensor[dtype, Layout.row_major((ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size)) * ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)), simd_width_of[dtype]()), MutableAnyOrigin, address_space=AddressSpace(5)].__del__is_trivial if LayoutTensor[dtype, layout, origin, address_space=address_space, layout_int_type=layout_int_type, linear_idx_type=linear_idx_type, masked=masked, alignment=alignment].__del__is_trivial else LayoutTensor[dtype, layout, origin, address_space=address_space, layout_int_type=layout_int_type, linear_idx_type=linear_idx_type, masked=masked, alignment=alignment].__del__is_trivial
GlobalTensorType
​
alias GlobalTensorType = LayoutTensor[dtype, layout, origin, address_space=address_space, layout_int_type=layout_int_type, linear_idx_type=linear_idx_type, masked=masked, alignment=alignment]
MMA_K
​
alias MMA_K = mma_shape.__getitem__[3, DType.int64, Int](2)
MMA_M
​
alias MMA_M = mma_shape.__getitem__[3, DType.int64, Int](0)
num_k_tiles
​
alias num_k_tiles = ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size))
num_mmas
​
alias num_mmas = ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0))
num_tiles
​
alias num_tiles = 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)
RegisterTileType
​
alias RegisterTileType = LayoutTensor[dtype, Layout.row_major((ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size)) * ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)), simd_width_of[dtype]()), MutableAnyOrigin, address_space=AddressSpace(5)]
simd_width
​
alias simd_width = simd_width_of[dtype]()
TiledIteratorType
​
alias TiledIteratorType = LayoutTensorIter[dtype, LayoutTensor._compute_tile_layout[True, dtype, Layout.row_major((ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size)) * ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)), simd_width_of[dtype]()), MutableAnyOrigin, AddressSpace(5), Layout.__init__(IntTuple[__origin_of()](1), IntTuple[__origin_of()](1)), _get_layout_type(Layout.row_major((ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size)) * ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)), simd_width_of[dtype]()), AddressSpace(5)), _get_index_type(Layout.row_major((ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size)) * ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)), simd_width_of[dtype]()), AddressSpace(5)), False, align_of[dtype](), (ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size)) * ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0))), simd_width_of[dtype]()]()[0], MutableAnyOrigin, address_space=AddressSpace(5), axis=OptionalReg[Int]({:@stdlib::@builtin::@int::@Int {0}, 0}), layout_int_type=_get_layout_type(Layout.row_major((ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size)) * ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)), simd_width_of[dtype]()), AddressSpace(5)), linear_idx_type=_get_index_type(Layout.row_major((ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size)) * ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)), simd_width_of[dtype]()), AddressSpace(5)), masked=_tile_is_masked[Layout.row_major((ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size)) * ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)), simd_width_of[dtype]()), (ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size)) * ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0))), simd_width_of[dtype]()]()]
Methods​
__init__
​
__init__(out self, tensor: LayoutTensor[dtype, layout, origin, address_space=address_space, layout_int_type=layout_int_type, linear_idx_type=linear_idx_type, masked=masked, alignment=alignment])
load_from_dram
​
load_from_dram(mut self)
get_mma_tile
​
get_mma_tile[tile_idx: Int, k_idx: Int](self) -> LayoutTensor[dtype, LayoutTensor._compute_tile_layout[True, dtype, LayoutTensor._compute_tile_layout[True, dtype, Layout.row_major((ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size)) * ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)), simd_width_of[dtype]()), MutableAnyOrigin, AddressSpace(5), Layout.__init__(IntTuple[__origin_of()](1), IntTuple[__origin_of()](1)), _get_layout_type(Layout.row_major((ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size)) * ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)), simd_width_of[dtype]()), AddressSpace(5)), _get_index_type(Layout.row_major((ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size)) * ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)), simd_width_of[dtype]()), AddressSpace(5)), False, align_of[dtype](), 0 if (0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) else ((div_s Layout.row_major((ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size)) * ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)), simd_width_of[dtype]()).shape[0].value[ComptimeOrigin]()._mlir_value, 1 if (0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) else 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)._mlir_value) + -1) if ((((rem_s Layout.row_major((ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size)) * ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)), simd_width_of[dtype]()).shape[0].value[ComptimeOrigin]()._mlir_value, 1 if (0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) else 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)._mlir_value) == 0) ^ True) & ((Layout.row_major((ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size)) * ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)), simd_width_of[dtype]()).shape[0].value[ComptimeOrigin]() < 0) ^ (0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) < 0))) else (div_s Layout.row_major((ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size)) * ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)), simd_width_of[dtype]()).shape[0].value[ComptimeOrigin]()._mlir_value, 1 if (0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) else 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)._mlir_value), 0]()[0], MutableAnyOrigin, AddressSpace(5), Layout.__init__(IntTuple[__origin_of()](1), IntTuple[__origin_of()](1)), _get_layout_type(LayoutTensor._compute_tile_layout[True, dtype, Layout.row_major((ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size)) * ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)), simd_width_of[dtype]()), MutableAnyOrigin, AddressSpace(5), Layout.__init__(IntTuple[__origin_of()](1), IntTuple[__origin_of()](1)), _get_layout_type(Layout.row_major((ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size)) * ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)), simd_width_of[dtype]()), AddressSpace(5)), _get_index_type(Layout.row_major((ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size)) * ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)), simd_width_of[dtype]()), AddressSpace(5)), False, align_of[dtype](), 0 if (0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) else ((div_s Layout.row_major((ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size)) * ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)), simd_width_of[dtype]()).shape[0].value[ComptimeOrigin]()._mlir_value, 1 if (0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) else 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)._mlir_value) + -1) if ((((rem_s Layout.row_major((ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size)) * ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)), simd_width_of[dtype]()).shape[0].value[ComptimeOrigin]()._mlir_value, 1 if (0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) else 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)._mlir_value) == 0) ^ True) & ((Layout.row_major((ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size)) * ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)), simd_width_of[dtype]()).shape[0].value[ComptimeOrigin]() < 0) ^ (0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) < 0))) else (div_s Layout.row_major((ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size)) * ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)), simd_width_of[dtype]()).shape[0].value[ComptimeOrigin]()._mlir_value, 1 if (0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) else 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)._mlir_value), 0]()[0], AddressSpace(5)), _get_index_type(LayoutTensor._compute_tile_layout[True, dtype, Layout.row_major((ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size)) * ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)), simd_width_of[dtype]()), MutableAnyOrigin, AddressSpace(5), Layout.__init__(IntTuple[__origin_of()](1), IntTuple[__origin_of()](1)), _get_layout_type(Layout.row_major((ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size)) * ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)), simd_width_of[dtype]()), AddressSpace(5)), _get_index_type(Layout.row_major((ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size)) * ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)), simd_width_of[dtype]()), AddressSpace(5)), False, align_of[dtype](), 0 if (0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) else ((div_s Layout.row_major((ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size)) * ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)), simd_width_of[dtype]()).shape[0].value[ComptimeOrigin]()._mlir_value, 1 if (0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) else 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)._mlir_value) + -1) if ((((rem_s Layout.row_major((ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size)) * ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)), simd_width_of[dtype]()).shape[0].value[ComptimeOrigin]()._mlir_value, 1 if (0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) else 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)._mlir_value) == 0) ^ True) & ((Layout.row_major((ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size)) * ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)), simd_width_of[dtype]()).shape[0].value[ComptimeOrigin]() < 0) ^ (0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) < 0))) else (div_s Layout.row_major((ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size)) * ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)), simd_width_of[dtype]()).shape[0].value[ComptimeOrigin]()._mlir_value, 1 if (0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) else 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)._mlir_value), 0]()[0], AddressSpace(5)), False, align_of[dtype](), 0 if (ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size)) == 0) else ((div_s LayoutTensor._compute_tile_layout[True, dtype, Layout.row_major((ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size)) * ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)), simd_width_of[dtype]()), MutableAnyOrigin, AddressSpace(5), Layout.__init__(IntTuple[__origin_of()](1), IntTuple[__origin_of()](1)), _get_layout_type(Layout.row_major((ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size)) * ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)), simd_width_of[dtype]()), AddressSpace(5)), _get_index_type(Layout.row_major((ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size)) * ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)), simd_width_of[dtype]()), AddressSpace(5)), False, align_of[dtype](), 0 if (0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) else ((div_s Layout.row_major((ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size)) * ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)), simd_width_of[dtype]()).shape[0].value[ComptimeOrigin]()._mlir_value, 1 if (0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) else 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)._mlir_value) + -1) if ((((rem_s Layout.row_major((ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size)) * ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)), simd_width_of[dtype]()).shape[0].value[ComptimeOrigin]()._mlir_value, 1 if (0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) else 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)._mlir_value) == 0) ^ True) & ((Layout.row_major((ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size)) * ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)), simd_width_of[dtype]()).shape[0].value[ComptimeOrigin]() < 0) ^ (0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) < 0))) else (div_s Layout.row_major((ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size)) * ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)), simd_width_of[dtype]()).shape[0].value[ComptimeOrigin]()._mlir_value, 1 if (0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) else 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)._mlir_value), 0]()[0].shape[0].value[ComptimeOrigin]()._mlir_value, 1 if (ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size)) == 0) else ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size))._mlir_value) + -1) if ((((rem_s LayoutTensor._compute_tile_layout[True, dtype, Layout.row_major((ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size)) * ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)), simd_width_of[dtype]()), MutableAnyOrigin, AddressSpace(5), Layout.__init__(IntTuple[__origin_of()](1), IntTuple[__origin_of()](1)), _get_layout_type(Layout.row_major((ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size)) * ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)), simd_width_of[dtype]()), AddressSpace(5)), _get_index_type(Layout.row_major((ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size)) * ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)), simd_width_of[dtype]()), AddressSpace(5)), False, align_of[dtype](), 0 if (0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) else ((div_s Layout.row_major((ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size)) * ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)), simd_width_of[dtype]()).shape[0].value[ComptimeOrigin]()._mlir_value, 1 if (0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) else 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)._mlir_value) + -1) if ((((rem_s Layout.row_major((ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size)) * ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)), simd_width_of[dtype]()).shape[0].value[ComptimeOrigin]()._mlir_value, 1 if (0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) else 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)._mlir_value) == 0) ^ True) & ((Layout.row_major((ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size)) * ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)), simd_width_of[dtype]()).shape[0].value[ComptimeOrigin]() < 0) ^ (0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) < 0))) else (div_s Layout.row_major((ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size)) * ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)), simd_width_of[dtype]()).shape[0].value[ComptimeOrigin]()._mlir_value, 1 if (0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) else 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)._mlir_value), 0]()[0].shape[0].value[ComptimeOrigin]()._mlir_value, 1 if (ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size)) == 0) else ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size))._mlir_value) == 0) ^ True) & ((LayoutTensor._compute_tile_layout[True, dtype, Layout.row_major((ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size)) * ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)), simd_width_of[dtype]()), MutableAnyOrigin, AddressSpace(5), Layout.__init__(IntTuple[__origin_of()](1), IntTuple[__origin_of()](1)), _get_layout_type(Layout.row_major((ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size)) * ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)), simd_width_of[dtype]()), AddressSpace(5)), _get_index_type(Layout.row_major((ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size)) * ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)), simd_width_of[dtype]()), AddressSpace(5)), False, align_of[dtype](), 0 if (0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) else ((div_s Layout.row_major((ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size)) * ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)), simd_width_of[dtype]()).shape[0].value[ComptimeOrigin]()._mlir_value, 1 if (0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) else 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)._mlir_value) + -1) if ((((rem_s Layout.row_major((ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size)) * ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)), simd_width_of[dtype]()).shape[0].value[ComptimeOrigin]()._mlir_value, 1 if (0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) else 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)._mlir_value) == 0) ^ True) & ((Layout.row_major((ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size)) * ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)), simd_width_of[dtype]()).shape[0].value[ComptimeOrigin]() < 0) ^ (0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) < 0))) else (div_s Layout.row_major((ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size)) * ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)), simd_width_of[dtype]()).shape[0].value[ComptimeOrigin]()._mlir_value, 1 if (0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) else 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)._mlir_value), 0]()[0].shape[0].value[ComptimeOrigin]() < 0) ^ (ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size)) < 0))) else (div_s LayoutTensor._compute_tile_layout[True, dtype, Layout.row_major((ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size)) * ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)), simd_width_of[dtype]()), MutableAnyOrigin, AddressSpace(5), Layout.__init__(IntTuple[__origin_of()](1), IntTuple[__origin_of()](1)), _get_layout_type(Layout.row_major((ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size)) * ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)), simd_width_of[dtype]()), AddressSpace(5)), _get_index_type(Layout.row_major((ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size)) * ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)), simd_width_of[dtype]()), AddressSpace(5)), False, align_of[dtype](), 0 if (0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) else ((div_s Layout.row_major((ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size)) * ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)), simd_width_of[dtype]()).shape[0].value[ComptimeOrigin]()._mlir_value, 1 if (0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) else 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)._mlir_value) + -1) if ((((rem_s Layout.row_major((ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size)) * ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)), simd_width_of[dtype]()).shape[0].value[ComptimeOrigin]()._mlir_value, 1 if (0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) else 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)._mlir_value) == 0) ^ True) & ((Layout.row_major((ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size)) * ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)), simd_width_of[dtype]()).shape[0].value[ComptimeOrigin]() < 0) ^ (0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) < 0))) else (div_s Layout.row_major((ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size)) * ceildiv(WM, mma_shape.__getitem__[3, DType.int64, Int](0)) * 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)), simd_width_of[dtype]()).shape[0].value[ComptimeOrigin]()._mlir_value, 1 if (0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) else 0 if (BK == 0) else ((div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) + -1) if ((((rem_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value) == 0) ^ True) & ((BK < 0) ^ (depth < 0))) else (div_s depth._mlir_value, 1 if (BK == 0) else BK._mlir_value)._mlir_value), 0]()[0].shape[0].value[ComptimeOrigin]()._mlir_value, 1 if (ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size)) == 0) else ceildiv(BK, (mma_shape.__getitem__[3, DType.int64, Int](2) * k_group_size))._mlir_value), 0]()[0], MutableAnyOrigin, address_space=AddressSpace(5)]
Returns:
Was this page helpful?
Thank you! We'll create more content like this.
Thank you for helping us improve!