Mojo struct
TMemTile
@register_passable(trivial)
struct TMemTile[dtype_: DType, BM: Int, BN: Int]
Fields
- tmem_addr (
UInt32):
Implemented traits
AnyType,
Copyable,
ImplicitlyCopyable,
Movable,
UnknownDestructibility
Aliases
__copyinit__is_trivial
alias __copyinit__is_trivial = True
__del__is_trivial
alias __del__is_trivial = True
__moveinit__is_trivial
alias __moveinit__is_trivial = True
dtype
alias dtype = dtype_
dtype_size
alias dtype_size = dtype_.size_of()
num_m_tiles
alias num_m_tiles = (BM // 64)
Methods
__init__
__init__(tmem_addr: UInt32) -> Self
__getitem__
__getitem__(self, i: UInt32) -> Self
offset
allocate_register_tile
static allocate_register_tile[*, num_threads: Int]() -> LayoutTensor[dtype_, Layout(IntTuple(IntTuple(2, (ceildiv((2 * BM), 128) // ceildiv(num_threads, 128))), IntTuple((BN // 8)), Tuple[]()), IntTuple(IntTuple(2, ((BN * 2) // 4)), IntTuple(4), Tuple[]())), MutableAnyOrigin, address_space=AddressSpace(5), element_layout=Layout.row_major(1, 2)]
Returns:
store_async
store_async[*, num_threads: Int](self, src: LayoutTensor[dtype_, Layout(IntTuple(IntTuple(2, (ceildiv((2 * BM), 128) // ceildiv(num_threads, 128))), IntTuple((BN // 8)), Tuple[]()), IntTuple(IntTuple(2, ((BN * 2) // 4)), IntTuple(4), Tuple[]())), MutableAnyOrigin, address_space=AddressSpace(5), element_layout=Layout.row_major(1, 2)])
store_async[src_type: DType](self, src: LayoutTensor[src_type, Layout.row_major(BN), MutableAnyOrigin, address_space=AddressSpace(5)])
store
store[*, num_threads: Int](self, src: LayoutTensor[dtype_, Layout(IntTuple(IntTuple(2, (ceildiv((2 * BM), 128) // ceildiv(num_threads, 128))), IntTuple((BN // 8)), Tuple[]()), IntTuple(IntTuple(2, ((BN * 2) // 4)), IntTuple(4), Tuple[]())), MutableAnyOrigin, address_space=AddressSpace(5), element_layout=Layout.row_major(1, 2)])
store[src_type: DType](self, src: LayoutTensor[src_type, Layout.row_major(BN), MutableAnyOrigin, address_space=AddressSpace(5)])
load_async_st_matrix
load_async_st_matrix[*, num_threads: Int](self) -> LayoutTensor[dtype_, Layout(IntTuple(IntTuple(2, (ceildiv((2 * BM), 128) // ceildiv(num_threads, 128))), IntTuple((BN // 8)), Tuple[]()), IntTuple(IntTuple(2, ((BN * 2) // 4)), IntTuple(4), Tuple[]())), MutableAnyOrigin, address_space=AddressSpace(5), element_layout=Layout.row_major(1, 2)]
Returns:
load_async
load_async(self) -> LayoutTensor[dtype_, Layout.row_major(BN), MutableAnyOrigin, address_space=AddressSpace(5)]
Returns:
Was this page helpful?
Thank you! We'll create more content like this.
Thank you for helping us improve!