Skip to main content

Mojo function

grouped_matmul_dynamic_scaled_fp8

grouped_matmul_dynamic_scaled_fp8[c_type: DType, a_type: DType, b_type: DType, a_scales_type: DType, b_scales_type: DType, //, input_scale_granularity: StringSlice[StaticConstantOrigin], weight_scale_granularity: StringSlice[StaticConstantOrigin], transpose_b: Bool = False, target: StringSlice[StaticConstantOrigin] = "cpu"](c: NDBuffer[c_type, 2, origin, shape], a: NDBuffer[a_type, 2, origin, shape], b: NDBuffer[b_type, 3, origin, shape], a_scales: NDBuffer[a_scales_type, 2, origin, shape], b_scales: NDBuffer[b_scales_type, 3, origin, shape], a_offsets: NDBuffer[DType.uint32, 1, origin, shape], expert_ids: NDBuffer[DType.int32, 1, origin, shape], max_num_tokens_per_expert: Int, num_active_experts: Int, ctx: DeviceContext)

Was this page helpful?