Mojo function

copy_accum_to_gmem

copy_accum_to_gmem[c_type: DType, c_tile_rank: Int, c_tile_shape: IndexList[c_tile_rank], c_desc_shape: IndexList[c_tile_rank], num_accum_pipeline_stages: Int, c_tensor_layout: Layout, /, *, c_smem_layout: Layout, repeat: Int, accum_type: DType, cta_group: Int, epilogue_dtype: DType, block_tile_shape: IndexList[3], mma_shape: IndexList[3], num_output_warps: Int, c_swizzle: TensorMapSwizzle = TensorMapSwizzle.SWIZZLE_128B, elementwise_compute_lambda_fn: Optional[elementwise_compute_lambda_type] = None, register_based_epilogue: Bool = True, transpose_c: Bool = False, scale_c_coord: Bool = True](c_smem_base: UnsafePointer[Scalar[c_type], MutAnyOrigin, address_space=AddressSpace.SHARED], c_tma_op: TMATensorTile[c_type, c_tile_rank, c_tile_shape, c_desc_shape], c: LayoutTensor[c_type, c_tensor_layout, MutAnyOrigin], mma_output_pipeline: ProducerConsumerPipeline[num_accum_pipeline_stages], mma_output_stage: UInt32, tmem_offset: UInt32, c_coord: Tuple[UInt32, UInt32], c_shape: Tuple[UInt32, UInt32], expert_scale: Float32, group_end_idx: UInt32)