Skip to main content

Mojo struct

TMALoadOp

struct TMALoadOp[a_type: DType, b_type: DType, block_tile_shape: IndexList[3], cluster_shape: IndexList[3], a_swizzle: TensorMapSwizzle = TensorMapSwizzle.SWIZZLE_128B, b_swizzle: TensorMapSwizzle = TensorMapSwizzle.SWIZZLE_128B]

Fields

  • a_tma_ptr (LegacyUnsafePointer[TMATensorTile[a_type, TMALoadOp[a_type, b_type, block_tile_shape, cluster_shape, a_swizzle, b_swizzle].a_tma_layout, TMALoadOp[a_type, b_type, block_tile_shape, cluster_shape, a_swizzle, b_swizzle].a_tma_desc_layout]]):
  • b_tma_ptr (LegacyUnsafePointer[TMATensorTile[b_type, TMALoadOp[a_type, b_type, block_tile_shape, cluster_shape, a_swizzle, b_swizzle].b_tma_layout, TMALoadOp[a_type, b_type, block_tile_shape, cluster_shape, a_swizzle, b_swizzle].b_tma_desc_layout]]):

Implemented traits

AnyType, Copyable, DevicePassable, ImplicitlyCopyable, LoadOp, UnknownDestructibility

Aliases

__copyinit__is_trivial

comptime __copyinit__is_trivial = True

__del__is_trivial

comptime __del__is_trivial = True

a_tma_desc_layout

comptime a_tma_desc_layout = _tma_desc_tile_layout[a_type, 2, Index((block_tile_shape.__getitem__[3, DType.int64, Int](0) // cluster_shape.__getitem__[3, DType.int64, Int](0)), block_tile_shape.__getitem__[3, DType.int64, Int](2)), swizzle_mode=a_swizzle]()

a_tma_layout

comptime a_tma_layout = Layout.row_major((block_tile_shape.__getitem__[3, DType.int64, Int](0) // cluster_shape.__getitem__[3, DType.int64, Int](0)), block_tile_shape.__getitem__[3, DType.int64, Int](2))

a_tma_type

comptime a_tma_type = TMATensorTile[a_type, TMALoadOp[a_type, b_type, block_tile_shape, cluster_shape, a_swizzle, b_swizzle].a_tma_layout, TMALoadOp[a_type, b_type, block_tile_shape, cluster_shape, a_swizzle, b_swizzle].a_tma_desc_layout]

args_type

comptime args_type = TMALoadOpArgs[a_type, b_type, TMALoadOp[a_type, b_type, block_tile_shape, cluster_shape, a_swizzle, b_swizzle].a_tma_layout, TMALoadOp[a_type, b_type, block_tile_shape, cluster_shape, a_swizzle, b_swizzle].b_tma_layout, TMALoadOp[a_type, b_type, block_tile_shape, cluster_shape, a_swizzle, b_swizzle].a_tma_desc_layout, TMALoadOp[a_type, b_type, block_tile_shape, cluster_shape, a_swizzle, b_swizzle].b_tma_desc_layout]

b_tma_desc_layout

comptime b_tma_desc_layout = _tma_desc_tile_layout[b_type, 2, Index((block_tile_shape.__getitem__[3, DType.int64, Int](1) // cluster_shape.__getitem__[3, DType.int64, Int](1)), block_tile_shape.__getitem__[3, DType.int64, Int](2)), swizzle_mode=b_swizzle]()

b_tma_layout

comptime b_tma_layout = Layout.row_major((block_tile_shape.__getitem__[3, DType.int64, Int](1) // cluster_shape.__getitem__[3, DType.int64, Int](1)), block_tile_shape.__getitem__[3, DType.int64, Int](2))

b_tma_type

comptime b_tma_type = TMATensorTile[b_type, TMALoadOp[a_type, b_type, block_tile_shape, cluster_shape, a_swizzle, b_swizzle].b_tma_layout, TMALoadOp[a_type, b_type, block_tile_shape, cluster_shape, a_swizzle, b_swizzle].b_tma_desc_layout]

device_type

comptime device_type = TMALoadOp[a_type, b_type, block_tile_shape, cluster_shape, a_swizzle, b_swizzle]

Methods

__init__

__init__(out self, args: TMALoadOpArgs[a_type, b_type, TMALoadOp[a_type, b_type, block_tile_shape, cluster_shape, a_swizzle, b_swizzle].a_tma_layout, TMALoadOp[a_type, b_type, block_tile_shape, cluster_shape, a_swizzle, b_swizzle].b_tma_layout, TMALoadOp[a_type, b_type, block_tile_shape, cluster_shape, a_swizzle, b_swizzle].a_tma_desc_layout, TMALoadOp[a_type, b_type, block_tile_shape, cluster_shape, a_swizzle, b_swizzle].b_tma_desc_layout])

get_type_name

static get_type_name() -> String

Gets this type's name, for use in error messages when handing arguments to kernels.

Returns:

String

get_device_type_name

static get_device_type_name() -> String

Gets device_type's name, for use in error messages when handing arguments to kernels.

Returns:

String

to_kernel_args

static to_kernel_args(a: LayoutTensor[a_type, layout, origin, address_space=address_space, element_layout=element_layout, layout_int_type=layout_int_type, linear_idx_type=linear_idx_type, masked=masked, alignment=alignment], b: LayoutTensor[b_type, layout, origin, address_space=address_space, element_layout=element_layout, layout_int_type=layout_int_type, linear_idx_type=linear_idx_type, masked=masked, alignment=alignment], ctx: DeviceContext) -> TMALoadOpArgs[a_type, b_type, TMALoadOp[a_type, b_type, block_tile_shape, cluster_shape, a_swizzle, b_swizzle].a_tma_layout, TMALoadOp[a_type, b_type, block_tile_shape, cluster_shape, a_swizzle, b_swizzle].b_tma_layout, TMALoadOp[a_type, b_type, block_tile_shape, cluster_shape, a_swizzle, b_swizzle].a_tma_desc_layout, TMALoadOp[a_type, b_type, block_tile_shape, cluster_shape, a_swizzle, b_swizzle].b_tma_desc_layout]

Returns:

TMALoadOpArgs

__call__

__call__(self, a_smem_tile: LayoutTensor[dtype, layout, origin, address_space=AddressSpace.SHARED, element_layout=element_layout, layout_int_type=layout_int_type, linear_idx_type=linear_idx_type, masked=masked, alignment=alignment], b_smem_tile: LayoutTensor[dtype, layout, origin, address_space=AddressSpace.SHARED, element_layout=element_layout, layout_int_type=layout_int_type, linear_idx_type=linear_idx_type, masked=masked, alignment=alignment], m: UInt32, n: UInt32, k: UInt32, ref [3] mbar: SharedMemBarrier)

Was this page helpful?