Mojo function

quantize_fp8_kernel

quantize_fp8_kernel[out_type: DType, scales_type: DType, in_type: DType, input_fn: fn[width: Int, alignment: Int](row: Int, col: Int) capturing -> SIMD[in_type, width], num_threads: Int, group_size: Int, simd_width: Int](output: NDBuffer[out_type, 2, MutAnyOrigin], scales: NDBuffer[scales_type, 2, MutAnyOrigin], scale_ub: Scalar[scales_type])