Mojo function

load_AB_SFA_SFB

load_AB_SFA_SFB[a_type: DType, b_type: DType, sfa_dtype: DType, sfb_dtype: DType, a_rank: Int, a_tile_shape: IndexList[a_rank], a_desc_shape: IndexList[a_rank], b_rank: Int, b_tile_shape: IndexList[b_rank], b_desc_shape: IndexList[b_rank], sfa_rank: Int, sfa_tile_shape: IndexList[sfa_rank], sfa_desc_shape: IndexList[sfa_rank], sfb_rank: Int, sfb_tile_shape: IndexList[sfb_rank], sfb_desc_shape: IndexList[sfb_rank], a_dim0: Int, a_dim1: Int, a_num_tiles: Int, a_swizzle_bytes: Int, b_dim0: Int, b_dim1: Int, b_num_tiles: Int, b_swizzle_bytes: Int, num_pipeline_stages: Int, /, *, block_tile_shape: IndexList[3], mma_shape: IndexList[3], num_sf_k_tiles: Int, cta_group: Int = 1, k_group_size: UInt = 1](a_tma_op: TMATensorTile[a_type, a_rank, a_tile_shape, a_desc_shape], b_tma_op: TMATensorTile[b_type, b_rank, b_tile_shape, b_desc_shape], sfa_tma_op: TMATensorTile[sfa_dtype, sfa_rank, sfa_tile_shape, sfa_desc_shape], sfb_tma_op: TMATensorTile[sfb_dtype, sfb_rank, sfb_tile_shape, sfb_desc_shape], a_smem_tiles: SMemTileArray2D[a_type, a_dim0, a_dim1, a_num_tiles, a_swizzle_bytes], b_smem_tiles: SMemTileArray2D[b_type, b_dim0, b_dim1, b_num_tiles, b_swizzle_bytes], sfa_smem_tiles: SMemTileArrayWithLayout[sfa_dtype, sfa_smem_tiles.tile_layout, sfa_smem_tiles.num_tiles, sfa_smem_tiles.alignment], sfb_smem_tiles: SMemTileArrayWithLayout[sfb_dtype, sfb_smem_tiles.tile_layout, sfb_smem_tiles.num_tiles, sfb_smem_tiles.alignment], load_mma_pipeline: ProducerConsumerPipeline[num_pipeline_stages], peer_cta_coord: Tuple[UInt, UInt, UInt], work_tile_coord: Tuple[UInt, UInt, UInt], a_multicast_mask: UInt16, b_multicast_mask: UInt16, iter_idx: UInt32, elect_one_cta: Bool)