Skip to main content

Mojo function

blackwell_block_scaled_matmul_tma_umma_warp_specialized

blackwell_block_scaled_matmul_tma_umma_warp_specialized[c_type: DType, c_layout: Layout, a_type: DType, a_layout: Layout, a_offsets_layout: Layout, a_scale_offsets_layout: Layout, b_type: DType, b_layout: Layout, expert_ids_layout: Layout, sfa_dtype: DType, sfa_layout: Layout, sfb_dtype: DType, _sfb_layout: Layout, expert_scale_layout: Layout, transpose_b: Bool, *, config: BlockScaledMatmulConfig[a_type, b_type, c_type, sfa_dtype, sfb_dtype, transpose_b], elementwise_compute_lambda_fn: Optional[elementwise_compute_lambda_type] = None, register_based_epilogue: Bool = True, pdl_level: PDLLevel = PDLLevel(), max_profiled_tiles_per_SM: Optional[UInt32] = None](c_device: LayoutTensor[c_type, c_layout, origin, address_space=address_space, element_layout=element_layout, layout_int_type=layout_int_type, linear_idx_type=linear_idx_type, masked=masked, alignment=alignment], a_device: LayoutTensor[a_type, a_layout, origin, address_space=address_space, element_layout=element_layout, layout_int_type=layout_int_type, linear_idx_type=linear_idx_type, masked=masked, alignment=alignment], a_offsets: LayoutTensor[DType.uint32, a_offsets_layout, origin, address_space=address_space, element_layout=element_layout, layout_int_type=layout_int_type, linear_idx_type=linear_idx_type, masked=masked, alignment=alignment], a_scale_offsets: LayoutTensor[DType.uint32, a_scale_offsets_layout, origin, address_space=address_space, element_layout=element_layout, layout_int_type=layout_int_type, linear_idx_type=linear_idx_type, masked=masked, alignment=alignment], _b_device: LayoutTensor[b_type, b_layout, origin, address_space=address_space, element_layout=element_layout, layout_int_type=layout_int_type, linear_idx_type=linear_idx_type, masked=masked, alignment=alignment], expert_ids: LayoutTensor[DType.int32, expert_ids_layout, origin, address_space=address_space, element_layout=element_layout, layout_int_type=layout_int_type, linear_idx_type=linear_idx_type, masked=masked, alignment=alignment], a_scales: LayoutTensor[sfa_dtype, sfa_layout, MutAnyOrigin], _b_scales: LayoutTensor[sfb_dtype, _sfb_layout, MutAnyOrigin], expert_scales: LayoutTensor[DType.float32, expert_scale_layout, MutAnyOrigin], num_active_experts: Int, ctx: DeviceContext)

Was this page helpful?