Mojo function

shared_memory_epilogue

shared_memory_epilogue[MMA_M: UInt, data_paths: UInt, num_stages: UInt, stage: UInt, stageN: UInt, c_type: DType, shared_n: UInt, simd_size: UInt, c_smem_upper_layout: Layout, c_smem_lower_layout: Layout, swizzle: Swizzle, compute_lambda_fn: fn[dtype: DType, width: Int, *, alignment: Int = 1](IndexList[2], SIMD[dtype, width]) capturing -> SIMD[dtype, width], num_output_warps: UInt](M: UInt32, N: UInt32, c_col: UInt, c_row: UInt, c_smem_warp_tile_upper: LayoutTensor[c_type, c_smem_upper_layout, MutableAnyOrigin, address_space=address_space, element_layout=element_layout, layout_int_type=layout_int_type, linear_idx_type=linear_idx_type, masked=masked, alignment=alignment], c_smem_warp_tile_lower: LayoutTensor[c_type, c_smem_lower_layout, MutableAnyOrigin, address_space=address_space, element_layout=element_layout, layout_int_type=layout_int_type, linear_idx_type=linear_idx_type, masked=masked, alignment=alignment])