Mojo function

multi_stage_store_C

multi_stage_store_C[c_type: DType, c_tile_rank: Int, c_tile_shape: IndexList[c_tile_rank], c_tensor_layout: Layout, c_desc_shape: IndexList[c_tile_rank], num_accum_pipeline_stages: Int, /, *, c_smem_layout: Layout, accum_type: DType, block_tile_shape: IndexList[3], mma_shape: IndexList[3], stage_stride_cols: Int, c_swizzle: TensorMapSwizzle = TensorMapSwizzle.SWIZZLE_128B, cta_group: Int = 1, num_output_warps: Int = 4, elementwise_lambda_fn: Optional[elementwise_epilogue_type] = None, transpose_c: Bool = False](c_smem_base: UnsafePointer[Scalar[c_type], MutAnyOrigin, address_space=AddressSpace.SHARED], c_tma_op: TMATensorTile[c_type, c_tile_rank, c_tile_shape, c_desc_shape], c: LayoutTensor[c_type, c_tensor_layout, MutAnyOrigin], accum_pipeline_consumer_state: PipelineState[num_accum_pipeline_stages], accum_full_mbar: UnsafePointer[SharedMemBarrier, MutAnyOrigin, address_space=AddressSpace.SHARED], accum_empty_mbar: UnsafePointer[SharedMemBarrier, MutAnyOrigin, address_space=AddressSpace.SHARED], tmem_addr: UInt32, work_tile_coord: Tuple[UInt, UInt], group_end_idx: UInt32, elect_one_warp: Bool, M: UInt32, N: UInt32)