Skip to main content

Mojo function

multi_stage_store_C_split_k

multi_stage_store_C_split_k[c_type: DType, c_smem_layout: Layout, c_layout: Layout, c_desc_layout: Layout, reduction_layout: Layout, num_accum_pipeline_stages: UInt, /, *, input_type: DType, accum_type: DType, block_tile_shape: IndexList[3], mma_shape: IndexList[3], stage_stride_cols: UInt, c_swizzle: TensorMapSwizzle = TensorMapSwizzle.SWIZZLE_128B, cta_group: Int = 1, num_output_warps: UInt = 4, max_tmem_cols: UInt = 512, elementwise_compute_lambda_fn: OptionalReg[elementwise_compute_lambda_type] = None, register_based_epilogue: Bool = True, transpose_c: Bool = False](scheduler: TileScheduler[num_stages, reduction_tile_shape, cluster_shape, rasterize_order, block_swizzle_size, num_split_k], reduction_tensor: LayoutTensor[accum_type, reduction_layout, MutAnyOrigin], c_iter: LayoutTensorIter[c_type, c_smem_layout, MutAnyOrigin, address_space=AddressSpace.SHARED, alignment=128], c_tma_op: TMATensorTile[c_type, c_layout, c_desc_layout], mma_output_pipeline: ProducerConsumerPipeline[num_accum_pipeline_stages], tmem_addr: UInt32, work_info: WorkInfo, elect_one_warp: Bool, M: UInt32, N: UInt32)

Was this page helpful?