Mojo struct

TMALoadOp

struct TMALoadOp[a_type: DType, b_type: DType, block_tile_shape: IndexList[3], cluster_shape: IndexList[3], a_swizzle: TensorMapSwizzle = TensorMapSwizzle.SWIZZLE_128B, b_swizzle: TensorMapSwizzle = TensorMapSwizzle.SWIZZLE_128B]

Fields

a_tma_ptr (LegacyUnsafePointer[TMATensorTile[a_type, TMALoadOp[a_type, b_type, block_tile_shape, cluster_shape, a_swizzle, b_swizzle].a_tma_layout, TMALoadOp[a_type, b_type, block_tile_shape, cluster_shape, a_swizzle, b_swizzle].a_tma_desc_layout]]):
b_tma_ptr (LegacyUnsafePointer[TMATensorTile[b_type, TMALoadOp[a_type, b_type, block_tile_shape, cluster_shape, a_swizzle, b_swizzle].b_tma_layout, TMALoadOp[a_type, b_type, block_tile_shape, cluster_shape, a_swizzle, b_swizzle].b_tma_desc_layout]]):

Implemented traits

AnyType, Copyable, DevicePassable, ImplicitlyCopyable, LoadOp, UnknownDestructibility

Aliases

`copyinitis_trivial`

comptime __copyinit__is_trivial = True

`delis_trivial`

comptime __del__is_trivial = True

`a_tma_desc_layout`

comptime a_tma_desc_layout = _tma_desc_tile_layout[a_type, 2, Index((block_tile_shape.__getitem__[3, DType.int64, Int](0) // cluster_shape.__getitem__[3, DType.int64, Int](0)), block_tile_shape.__getitem__[3, DType.int64, Int](2)), swizzle_mode=a_swizzle]()

`a_tma_layout`

comptime a_tma_layout = Layout.row_major((block_tile_shape.__getitem__[3, DType.int64, Int](0) // cluster_shape.__getitem__[3, DType.int64, Int](0)), block_tile_shape.__getitem__[3, DType.int64, Int](2))

`a_tma_type`

comptime a_tma_type = TMATensorTile[a_type, TMALoadOp[a_type, b_type, block_tile_shape, cluster_shape, a_swizzle, b_swizzle].a_tma_layout, TMALoadOp[a_type, b_type, block_tile_shape, cluster_shape, a_swizzle, b_swizzle].a_tma_desc_layout]

`args_type`

comptime args_type = TMALoadOpArgs[a_type, b_type, TMALoadOp[a_type, b_type, block_tile_shape, cluster_shape, a_swizzle, b_swizzle].a_tma_layout, TMALoadOp[a_type, b_type, block_tile_shape, cluster_shape, a_swizzle, b_swizzle].b_tma_layout, TMALoadOp[a_type, b_type, block_tile_shape, cluster_shape, a_swizzle, b_swizzle].a_tma_desc_layout, TMALoadOp[a_type, b_type, block_tile_shape, cluster_shape, a_swizzle, b_swizzle].b_tma_desc_layout]

`b_tma_desc_layout`

comptime b_tma_desc_layout = _tma_desc_tile_layout[b_type, 2, Index((block_tile_shape.__getitem__[3, DType.int64, Int](1) // cluster_shape.__getitem__[3, DType.int64, Int](1)), block_tile_shape.__getitem__[3, DType.int64, Int](2)), swizzle_mode=b_swizzle]()

`b_tma_layout`

comptime b_tma_layout = Layout.row_major((block_tile_shape.__getitem__[3, DType.int64, Int](1) // cluster_shape.__getitem__[3, DType.int64, Int](1)), block_tile_shape.__getitem__[3, DType.int64, Int](2))

`b_tma_type`

comptime b_tma_type = TMATensorTile[b_type, TMALoadOp[a_type, b_type, block_tile_shape, cluster_shape, a_swizzle, b_swizzle].b_tma_layout, TMALoadOp[a_type, b_type, block_tile_shape, cluster_shape, a_swizzle, b_swizzle].b_tma_desc_layout]

`device_type`

comptime device_type = TMALoadOp[a_type, b_type, block_tile_shape, cluster_shape, a_swizzle, b_swizzle]

Methods

`init`

__init__(out self, args: TMALoadOpArgs[a_type, b_type, TMALoadOp[a_type, b_type, block_tile_shape, cluster_shape, a_swizzle, b_swizzle].a_tma_layout, TMALoadOp[a_type, b_type, block_tile_shape, cluster_shape, a_swizzle, b_swizzle].b_tma_layout, TMALoadOp[a_type, b_type, block_tile_shape, cluster_shape, a_swizzle, b_swizzle].a_tma_desc_layout, TMALoadOp[a_type, b_type, block_tile_shape, cluster_shape, a_swizzle, b_swizzle].b_tma_desc_layout])

`get_type_name`

static get_type_name() -> String

Gets this type's name, for use in error messages when handing arguments to kernels.

Returns:

String

`get_device_type_name`

static get_device_type_name() -> String

Gets device_type's name, for use in error messages when handing arguments to kernels.

Returns:

String

`to_kernel_args`

static to_kernel_args(a: LayoutTensor[a_type, layout, origin, address_space=address_space, element_layout=element_layout, layout_int_type=layout_int_type, linear_idx_type=linear_idx_type, masked=masked, alignment=alignment], b: LayoutTensor[b_type, layout, origin, address_space=address_space, element_layout=element_layout, layout_int_type=layout_int_type, linear_idx_type=linear_idx_type, masked=masked, alignment=alignment], ctx: DeviceContext) -> TMALoadOpArgs[a_type, b_type, TMALoadOp[a_type, b_type, block_tile_shape, cluster_shape, a_swizzle, b_swizzle].a_tma_layout, TMALoadOp[a_type, b_type, block_tile_shape, cluster_shape, a_swizzle, b_swizzle].b_tma_layout, TMALoadOp[a_type, b_type, block_tile_shape, cluster_shape, a_swizzle, b_swizzle].a_tma_desc_layout, TMALoadOp[a_type, b_type, block_tile_shape, cluster_shape, a_swizzle, b_swizzle].b_tma_desc_layout]

Returns:

TMALoadOpArgs

`call`

__call__(self, a_smem_tile: LayoutTensor[dtype, layout, origin, address_space=AddressSpace.SHARED, element_layout=element_layout, layout_int_type=layout_int_type, linear_idx_type=linear_idx_type, masked=masked, alignment=alignment], b_smem_tile: LayoutTensor[dtype, layout, origin, address_space=AddressSpace.SHARED, element_layout=element_layout, layout_int_type=layout_int_type, linear_idx_type=linear_idx_type, masked=masked, alignment=alignment], m: UInt32, n: UInt32, k: UInt32, ref [3] mbar: SharedMemBarrier)

Fields​

Implemented traits​

Aliases​

__copyinit__is_trivial​

__del__is_trivial​

a_tma_desc_layout​

a_tma_layout​

a_tma_type​

args_type​

b_tma_desc_layout​

b_tma_layout​

b_tma_type​

device_type​

Methods​

__init__​

get_type_name​

get_device_type_name​

to_kernel_args​

__call__​

Fields

Implemented traits

Aliases

`copyinitis_trivial`

`delis_trivial`

`a_tma_desc_layout`

`a_tma_layout`

`a_tma_type`

`args_type`

`b_tma_desc_layout`

`b_tma_layout`

`b_tma_type`

`device_type`

Methods

`init`

`get_type_name`

`get_device_type_name`

`to_kernel_args`

`call`