Skip to main content

Mojo struct

TileScheduler

@register_passable(trivial) struct TileScheduler[num_stages: Int, block_tile_shape: IndexList[3], cluster_shape: IndexList[3, element_type=DType.uint32] = Index[dtype=DType.uint32](1, 1, 1), rasterize_order: RasterOrder = RasterOrder(1), block_swizzle_size: Int = 8, num_split_k: Int = 1]

Fields

  • locks_ptr (UnsafePointer[Int32]):
  • scheduler (TileScheduler[num_stages, cluster_shape, rasterize_order, block_swizzle_size]):
  • total_k_tiles (UInt32):
  • k_tiles_per_split (UInt32):

Implemented traits

AnyType, Copyable, ImplicitlyCopyable, Movable, UnknownDestructibility

Aliases

__copyinit__is_trivial

alias __copyinit__is_trivial = True

__del__is_trivial

alias __del__is_trivial = True

__moveinit__is_trivial

alias __moveinit__is_trivial = True

BK

alias BK = block_tile_shape.__getitem__[3, DType.int64, Int](2)

BM

alias BM = block_tile_shape.__getitem__[3, DType.int64, Int](0)

BN

alias BN = block_tile_shape.__getitem__[3, DType.int64, Int](1)

UnderlyingScheduler

alias UnderlyingScheduler = TileScheduler[num_stages, cluster_shape, rasterize_order, block_swizzle_size]

Methods

__init__

__init__(cluster_dim: StaticTuple[Int32, 3], mnk: StaticTuple[UInt32, 3], clc_response_ptr: UnsafePointer[UInt128, address_space=AddressSpace(3)], full_mbar_ptr: UnsafePointer[SharedMemBarrier, address_space=AddressSpace(3)], empty_mbar_ptr: UnsafePointer[SharedMemBarrier, address_space=AddressSpace(3)], locks_ptr: UnsafePointer[Int8]) -> Self

convert_to_splitk_work_info

convert_to_splitk_work_info(self, work_info: WorkInfo) -> WorkInfo

Returns:

WorkInfo

initial_work_info

initial_work_info(self) -> WorkInfo

Returns:

WorkInfo

advance_to_next_work

advance_to_next_work(self, mut clc_state: PipelineState[num_stages]) -> PipelineState[num_stages]

Returns:

PipelineState

fetch_next_work

fetch_next_work(self, work_info: WorkInfo, consumer_state: PipelineState[num_stages]) -> WorkInfo

Returns:

WorkInfo

is_last_split

is_last_split(self, work_tile_info: WorkInfo) -> Bool

Returns:

Bool

output_tile_index

output_tile_index(self, work_info: WorkInfo) -> UInt32

Returns:

UInt32

store_to_workspace

store_to_workspace[accum_type: DType, size: Int, workspace_layout: Layout, /, *, repeat: Int, do_reduction: Bool = False, write_back: Bool = False](self, mut reg_tile_upper: SIMD[accum_type, size], mut reg_tile_lower: SIMD[accum_type, size], tmem_addr: UInt32, reduction_workspace: LayoutTensor[accum_type, workspace_layout, origin], epilogue_thread_idx: UInt, reduction_tile_idx: UInt32)

reduction

reduction[accum_type: DType, size: Int, workspace_layout: Layout, /, *, repeat: Int](self, mut reg_tile_upper: SIMD[accum_type, size], mut reg_tile_lower: SIMD[accum_type, size], reduction_workspace: LayoutTensor[accum_type, workspace_layout, origin], tmem_addr: UInt32, epilogue_thread_idx: UInt, work_info: WorkInfo) -> Bool

Returns:

Bool

wait_eq

static wait_eq(lock_ptr: UnsafePointer[Int32], barrier_id: Int32, barrier_group_thread_idx: Int, lock_idx: UInt32, val: UInt32)

wait_lt

static wait_lt(lock_ptr: UnsafePointer[Int32], barrier_id: Int32, barrier_group_thread_idx: Int, lock_idx: UInt32, count: UInt32)

arrive_set

static arrive_set(lock_ptr: UnsafePointer[Int32], barrier_id: Int32, barrier_group_thread_idx: Int, lock_idx: UInt32, val: UInt32)

Was this page helpful?