Skip to main content

Mojo function

mla_sm100_prefill_fp8

mla_sm100_prefill_fp8[output_type: DType, q_type: DType, KVType: MHAOperand, KRopeType: MHAOperand, MaskType: MHAMask, ScoreModType: ScoreModTrait, MaxPromptLenType: OptionallyStaticInt, //, config: MHAConfig[config.dtype], group: Int, q_depth: Int, cache_depth: Int, use_score_mod: Bool, _ndbuffer_mha_operand: Bool](output: LayoutTensor[output_type, output.layout, output.origin, element_layout=output.element_layout, layout_int_type=output.layout_int_type, linear_idx_type=output.linear_idx_type, masked=output.masked, alignment=output.alignment], q: LayoutTensor[q_type, q.layout, q.origin, element_layout=q.element_layout, layout_int_type=q.layout_int_type, linear_idx_type=q.linear_idx_type, masked=q.masked, alignment=q.alignment], k: KVType, v: KVType, k_rope: KRopeType, mask_functor: MaskType, score_mod_functor: ScoreModType, valid_length: LayoutTensor[DType.uint32, valid_length.layout, valid_length.origin, element_layout=valid_length.element_layout, layout_int_type=valid_length.layout_int_type, linear_idx_type=valid_length.linear_idx_type, masked=valid_length.masked, alignment=valid_length.alignment], max_prompt_len: MaxPromptLenType, scale: Float32, batch_size: Int, ctx: DeviceContext)

Was this page helpful?