Mojo struct
GlobalMemoryManager
struct GlobalMemoryManager[dtype: DType, BM: UInt32, BN: UInt32, BK: UInt32, depth: UInt32, num_heads: UInt32, group: UInt32, token_gen: Bool, q_depth: UInt32 = depth, output_depth: UInt32 = depth]
Fields
- q_offset (
UInt32): - q_runtime_layout (
RuntimeLayout[Layout(IntTuple(Int.__init__[UInt32](BM), Int.__init__[UInt32](q_depth)), IntTuple(Int.__init__[UInt32]((num_heads * q_depth)), 1)) if (token_gen ^ True) else Layout.row_major(Int.__init__[UInt32](BM), Int.__init__[UInt32](q_depth)), element_type=DType.int32, linear_idx_type=DType.int32]): - output_offset (
UInt32): - output_runtime_layout (
RuntimeLayout[Layout(IntTuple(Int.__init__[UInt32](BM), Int.__init__[UInt32](output_depth)), IntTuple(Int.__init__[UInt32]((num_heads * output_depth)), 1)) if (token_gen ^ True) else Layout.row_major(Int.__init__[UInt32](BM), Int.__init__[UInt32](output_depth)), element_type=DType.int32, linear_idx_type=DType.int32]):
Implemented traits
AnyType,
UnknownDestructibility
Aliases
__del__is_trivial
alias __del__is_trivial = True
kv_gmem_layout
alias kv_gmem_layout = Layout(IntTuple(Int.__init__[UInt32](BN), Int.__init__[UInt32](depth)), IntTuple(Int.__init__[UInt32](((num_heads // group) * depth)), 1))
kv_num_heads
alias kv_num_heads = (num_heads // group)
output_gmem_layout
alias output_gmem_layout = Layout(IntTuple(Int.__init__[UInt32](BM), Int.__init__[UInt32](output_depth)), IntTuple(Int.__init__[UInt32]((num_heads * output_depth)), 1)) if (token_gen ^ True) else Layout.row_major(Int.__init__[UInt32](BM), Int.__init__[UInt32](output_depth))
q_gmem_layout
alias q_gmem_layout = Layout(IntTuple(Int.__init__[UInt32](BM), Int.__init__[UInt32](q_depth)), IntTuple(Int.__init__[UInt32]((num_heads * q_depth)), 1)) if (token_gen ^ True) else Layout.row_major(Int.__init__[UInt32](BM), Int.__init__[UInt32](q_depth))
Methods
__init__
__init__(out self, q_tile_idx: UInt32, kv_head_idx: UInt32, seq_len: Int, q_offset: UInt32, output_offset: UInt32)
get_q_tensor
get_q_tensor[qtype: DType](self, ptr: UnsafePointer[Scalar[qtype]]) -> LayoutTensor[qtype, Layout(IntTuple(Int.__init__[UInt32](BM), Int.__init__[UInt32](q_depth)), IntTuple(Int.__init__[UInt32]((num_heads * q_depth)), 1)) if (token_gen ^ True) else Layout.row_major(Int.__init__[UInt32](BM), Int.__init__[UInt32](q_depth)), MutableAnyOrigin, layout_int_type=DType.int32, linear_idx_type=DType.int32, masked=True]
Returns:
get_output_tensor
get_output_tensor[out_type: DType](self, ptr: UnsafePointer[Scalar[out_type]]) -> LayoutTensor[out_type, Layout(IntTuple(Int.__init__[UInt32](BM), Int.__init__[UInt32](output_depth)), IntTuple(Int.__init__[UInt32]((num_heads * output_depth)), 1)) if (token_gen ^ True) else Layout.row_major(Int.__init__[UInt32](BM), Int.__init__[UInt32](output_depth)), MutableAnyOrigin, layout_int_type=DType.int32, linear_idx_type=DType.int32, masked=True]
Returns:
get_kv_tensor
get_kv_tensor[kvtype: DType, //](self, ptr: UnsafePointer[Scalar[kvtype], address_space=address_space, mut=mut, origin=origin], kv_tile_num_rows: UInt32) -> LayoutTensor[kvtype, Layout(IntTuple(Int.__init__[UInt32](BN), Int.__init__[UInt32](depth)), IntTuple(Int.__init__[UInt32](((num_heads // group) * depth)), 1)), origin, address_space=address_space, masked=True]
Returns:
Was this page helpful?
Thank you! We'll create more content like this.
Thank you for helping us improve!