Skip to main content
Log in

Mojo function

mla_prefill

mla_prefill[q_type: DType, k_t: MHAOperand, v_t: MHAOperand, k_rope_t: MHAOperand, output_type: DType, softmax_type: DType, mask_t: MHAMask, score_mod_t: ScoreModTrait, config: MHAConfig, group: Int = 128, q_depth: Int = 192, cache_depth: Int = 576, use_score_mod: Bool = False, write_softmax_info: Bool = False, use_cascade_attention: Bool = False, _ndbuffer_mha_operand: Bool = False](q_ptr: UnsafePointer[SIMD[q_type, 1]], k: k_t, v: v_t, k_rope: k_rope_t, output_ptr: UnsafePointer[SIMD[output_type, 1]], softmax_info_ptr: UnsafePointer[SIMD[softmax_type, 1]], prev_output_ptr: UnsafePointer[SIMD[output_type, 1]], prev_softmax_info_ptr: UnsafePointer[SIMD[softmax_type, 1]], scale: SIMD[float32, 1], batch_size: Int, seq_len_arg: Int, valid_length: NDBuffer[uint32, 1, MutableAnyOrigin], cache_offsets: OptionalReg[NDBuffer[uint32, 1, MutableAnyOrigin]], mask: mask_t, score_mod: score_mod_t)

Was this page helpful?