Mojo function
blackwell_matmul_tma_umma_warp_specialized
blackwell_matmul_tma_umma_warp_specialized[transpose_b: Bool, *, config: MatmulConfig[config.a_type, config.b_type, config.c_type, transpose_b], elementwise_compute_lambda_fn: Optional[elementwise_compute_lambda_type] = None, register_based_epilogue: Bool = True, pdl_level: PDLLevel = PDLLevel(), max_profiled_tiles_per_SM: Optional[UInt32] = None](c_device: TileTensor[c_device.dtype, c_device.LayoutType, c_device.origin, address_space=c_device.address_space, linear_idx_type=c_device.linear_idx_type, element_shape_types=c_device.element_shape_types], a_device: TileTensor[a_device.dtype, a_device.LayoutType, a_device.origin, address_space=a_device.address_space, linear_idx_type=a_device.linear_idx_type, element_shape_types=a_device.element_shape_types], b_device: TileTensor[b_device.dtype, b_device.LayoutType, b_device.origin, address_space=b_device.address_space, linear_idx_type=b_device.linear_idx_type, element_shape_types=b_device.element_shape_types], ctx: DeviceContext)
Was this page helpful?
Thank you! We'll create more content like this.
Thank you for helping us improve!