Mojo struct
AmdWarpBlockScatterGather
struct AmdWarpBlockScatterGather[SmemType: DType, thread_layout: Layout, smem_layout: Layout, simd_width: Int, is_a: Bool, warp_rows: Int, warp_cols: Int, swizzle: OptionalReg[Swizzle] = OptionalReg[Swizzle](None)]
Transports data from global -> register -> shared memory. Does this by warp tile each warp is resposible for moving one warp block of smem.
Fields
- fragment (
LayoutTensor[SmemType, _compute_distribute_layout[smem_layout, thread_layout]()[1], MutableAnyOrigin, address_space=AddressSpace(5), layout_int_type=_get_layout_type(smem_layout, AddressSpace(5)), linear_idx_type=_get_index_type(smem_layout, AddressSpace(5)), masked=_distribute_is_masked[smem_layout, thread_layout]() if is_nvidia_gpu() else False]
):
Implemented traits
AnyType
,
UnknownDestructibility
Aliases
__del__is_trivial
alias __del__is_trivial = LayoutTensor[SmemType, _compute_distribute_layout[smem_layout, thread_layout]()[1], MutableAnyOrigin, address_space=AddressSpace(5), layout_int_type=_get_layout_type(smem_layout, AddressSpace(5)), linear_idx_type=_get_index_type(smem_layout, AddressSpace(5)), masked=_distribute_is_masked[smem_layout, thread_layout]() if is_nvidia_gpu() else False].__del__is_trivial
LoadFragmentType
alias LoadFragmentType = LayoutTensor[SmemType, _compute_distribute_layout[smem_layout, thread_layout]()[1], MutableAnyOrigin, address_space=AddressSpace(5), layout_int_type=_get_layout_type(smem_layout, AddressSpace(5)), linear_idx_type=_get_index_type(smem_layout, AddressSpace(5)), masked=_distribute_is_masked[smem_layout, thread_layout]() if is_nvidia_gpu() else False]
SmemTensorType
alias SmemTensorType = LayoutTensor[SmemType, smem_layout, MutableAnyOrigin, address_space=AddressSpace(5)]
Methods
__init__
__init__(out self)
load_compute_tile
load_compute_tile[GmemType: DType, GmemLayout: Layout](mut self, mut cache_manager: RingBuffer[pipeline_stages, a_tile_layout, b_tile_layout, TileTypeA, TileTypeB, WM, WN, WK, warps_per_block_m, warps_per_block_n], mut phase: Int, gmem_tile: LayoutTensor[GmemType, GmemLayout, MutableAnyOrigin, address_space=AddressSpace(1)], stage: Int, tile_idx: Int)
Was this page helpful?
Thank you! We'll create more content like this.
Thank you for helping us improve!