Mojo struct
AmdWarpBlockScatterGather
struct AmdWarpBlockScatterGather[SmemType: DType, thread_layout: Layout, warp_tile_layout: Layout, simd_width: Int, is_a: Bool, warp_rows: Int, warp_cols: Int, swizzle: OptionalReg[Swizzle] = None]
Transports data from global -> register -> shared memory. Does this by warp tile each warp is responsible for moving one warp block of smem.
Fields
- fragment (
LayoutTensor[SmemType, Layout.row_major(((warp_tile_layout.size() // thread_layout.size()) // simd_width), simd_width), MutableAnyOrigin, address_space=AddressSpace(5)]
):
Implemented traits
AnyType
,
UnknownDestructibility
Aliases
__del__is_trivial
alias __del__is_trivial = True
elements_loaded_per_thread
alias elements_loaded_per_thread = (warp_tile_layout.size() // thread_layout.size())
LoadFragmentType
alias LoadFragmentType = LayoutTensor[SmemType, Layout.row_major(((warp_tile_layout.size() // thread_layout.size()) // simd_width), simd_width), MutableAnyOrigin, address_space=AddressSpace(5)]
simd_loads_per_thread
alias simd_loads_per_thread = ((warp_tile_layout.size() // thread_layout.size()) // simd_width)
total_participating_threads
alias total_participating_threads = thread_layout.size()
Methods
__init__
__init__(out self)
load_compute_tile
load_compute_tile[GmemType: DType, GmemLayout: Layout](mut self, mut cache_manager: RingBuffer[SmemBufferTypeA, SmemBufferTypeB, consumer_warps], mut phase: Int, gmem_tile: LayoutTensor[GmemType, GmemLayout, MutableAnyOrigin, address_space=AddressSpace(1)], stage: Int, tile_idx: Int)
Was this page helpful?
Thank you! We'll create more content like this.
Thank you for helping us improve!