Mojo function

quantize_dynamic_scaled_fp4

quantize_dynamic_scaled_fp4[out_dtype: DType, scales_dtype: DType, in_dtype: DType, output_layout: Layout, scales_layout: Layout, input_layout: Layout, //, *, SF_VECTOR_SIZE: Int = 16, num_max_threads: Int = 512](ctx: DeviceContext, output: LayoutTensor[out_dtype, output_layout, MutAnyOrigin], scales: LayoutTensor[scales_dtype, scales_layout, MutAnyOrigin], input: LayoutTensor[in_dtype, input_layout, MutAnyOrigin], num_cols: Int, num_cols_padded: Int, tensor_sf: Float32 = 1)