Mojo struct
TMALoadOp
struct TMALoadOp[a_type: DType, b_type: DType, block_tile_shape: IndexList[3], cluster_shape: IndexList[3], a_swizzle: TensorMapSwizzle = TensorMapSwizzle.SWIZZLE_128B, b_swizzle: TensorMapSwizzle = TensorMapSwizzle.SWIZZLE_128B]
Fields
- a_tma_ptr (
LegacyUnsafePointer[TMATensorTile[a_type, TMALoadOp[a_type, b_type, block_tile_shape, cluster_shape, a_swizzle, b_swizzle].a_tma_layout, TMALoadOp[a_type, b_type, block_tile_shape, cluster_shape, a_swizzle, b_swizzle].a_tma_desc_layout]]): - b_tma_ptr (
LegacyUnsafePointer[TMATensorTile[b_type, TMALoadOp[a_type, b_type, block_tile_shape, cluster_shape, a_swizzle, b_swizzle].b_tma_layout, TMALoadOp[a_type, b_type, block_tile_shape, cluster_shape, a_swizzle, b_swizzle].b_tma_desc_layout]]):
Implemented traits
AnyType,
Copyable,
DevicePassable,
ImplicitlyCopyable,
LoadOp,
UnknownDestructibility
Aliases
__copyinit__is_trivial
comptime __copyinit__is_trivial = True
__del__is_trivial
comptime __del__is_trivial = True
a_tma_desc_layout
comptime a_tma_desc_layout = _tma_desc_tile_layout[a_type, 2, Index((block_tile_shape.__getitem__[3, DType.int64, Int](0) // cluster_shape.__getitem__[3, DType.int64, Int](0)), block_tile_shape.__getitem__[3, DType.int64, Int](2)), swizzle_mode=a_swizzle]()
a_tma_layout
comptime a_tma_layout = Layout.row_major((block_tile_shape.__getitem__[3, DType.int64, Int](0) // cluster_shape.__getitem__[3, DType.int64, Int](0)), block_tile_shape.__getitem__[3, DType.int64, Int](2))
a_tma_type
comptime a_tma_type = TMATensorTile[a_type, TMALoadOp[a_type, b_type, block_tile_shape, cluster_shape, a_swizzle, b_swizzle].a_tma_layout, TMALoadOp[a_type, b_type, block_tile_shape, cluster_shape, a_swizzle, b_swizzle].a_tma_desc_layout]
args_type
comptime args_type = TMALoadOpArgs[a_type, b_type, TMALoadOp[a_type, b_type, block_tile_shape, cluster_shape, a_swizzle, b_swizzle].a_tma_layout, TMALoadOp[a_type, b_type, block_tile_shape, cluster_shape, a_swizzle, b_swizzle].b_tma_layout, TMALoadOp[a_type, b_type, block_tile_shape, cluster_shape, a_swizzle, b_swizzle].a_tma_desc_layout, TMALoadOp[a_type, b_type, block_tile_shape, cluster_shape, a_swizzle, b_swizzle].b_tma_desc_layout]
b_tma_desc_layout
comptime b_tma_desc_layout = _tma_desc_tile_layout[b_type, 2, Index((block_tile_shape.__getitem__[3, DType.int64, Int](1) // cluster_shape.__getitem__[3, DType.int64, Int](1)), block_tile_shape.__getitem__[3, DType.int64, Int](2)), swizzle_mode=b_swizzle]()
b_tma_layout
comptime b_tma_layout = Layout.row_major((block_tile_shape.__getitem__[3, DType.int64, Int](1) // cluster_shape.__getitem__[3, DType.int64, Int](1)), block_tile_shape.__getitem__[3, DType.int64, Int](2))
b_tma_type
comptime b_tma_type = TMATensorTile[b_type, TMALoadOp[a_type, b_type, block_tile_shape, cluster_shape, a_swizzle, b_swizzle].b_tma_layout, TMALoadOp[a_type, b_type, block_tile_shape, cluster_shape, a_swizzle, b_swizzle].b_tma_desc_layout]
device_type
comptime device_type = TMALoadOp[a_type, b_type, block_tile_shape, cluster_shape, a_swizzle, b_swizzle]
Methods
__init__
__init__(out self, args: TMALoadOpArgs[a_type, b_type, TMALoadOp[a_type, b_type, block_tile_shape, cluster_shape, a_swizzle, b_swizzle].a_tma_layout, TMALoadOp[a_type, b_type, block_tile_shape, cluster_shape, a_swizzle, b_swizzle].b_tma_layout, TMALoadOp[a_type, b_type, block_tile_shape, cluster_shape, a_swizzle, b_swizzle].a_tma_desc_layout, TMALoadOp[a_type, b_type, block_tile_shape, cluster_shape, a_swizzle, b_swizzle].b_tma_desc_layout])
get_type_name
static get_type_name() -> String
Gets this type's name, for use in error messages when handing arguments to kernels.
Returns:
get_device_type_name
static get_device_type_name() -> String
Gets device_type's name, for use in error messages when handing arguments to kernels.
Returns:
to_kernel_args
static to_kernel_args(a: LayoutTensor[a_type, layout, origin, address_space=address_space, element_layout=element_layout, layout_int_type=layout_int_type, linear_idx_type=linear_idx_type, masked=masked, alignment=alignment], b: LayoutTensor[b_type, layout, origin, address_space=address_space, element_layout=element_layout, layout_int_type=layout_int_type, linear_idx_type=linear_idx_type, masked=masked, alignment=alignment], ctx: DeviceContext) -> TMALoadOpArgs[a_type, b_type, TMALoadOp[a_type, b_type, block_tile_shape, cluster_shape, a_swizzle, b_swizzle].a_tma_layout, TMALoadOp[a_type, b_type, block_tile_shape, cluster_shape, a_swizzle, b_swizzle].b_tma_layout, TMALoadOp[a_type, b_type, block_tile_shape, cluster_shape, a_swizzle, b_swizzle].a_tma_desc_layout, TMALoadOp[a_type, b_type, block_tile_shape, cluster_shape, a_swizzle, b_swizzle].b_tma_desc_layout]
Returns:
__call__
__call__(self, a_smem_tile: LayoutTensor[dtype, layout, origin, address_space=AddressSpace.SHARED, element_layout=element_layout, layout_int_type=layout_int_type, linear_idx_type=linear_idx_type, masked=masked, alignment=alignment], b_smem_tile: LayoutTensor[dtype, layout, origin, address_space=AddressSpace.SHARED, element_layout=element_layout, layout_int_type=layout_int_type, linear_idx_type=linear_idx_type, masked=masked, alignment=alignment], m: UInt32, n: UInt32, k: UInt32, ref [3] mbar: SharedMemBarrier)
Was this page helpful?
Thank you! We'll create more content like this.
Thank you for helping us improve!