Skip to main content

Mojo function

mha

mha[q_type: DType, k_t: MHAOperand, v_t: MHAOperand, output_type: DType, mask_t: MHAMask, valid_length_layout: Layout, config: MHAConfig[config.dtype], group: Int = 1, ragged: Bool = False, is_shared_kv: Bool = False, sink: Bool = False, _use_valid_length: Bool = False, _is_cache_length_accurate: Bool = False, _padded_ndbuffer: Bool = False](q_ptr: UnsafePointer[Scalar[q_type], MutAnyOrigin], k: k_t, v: v_t, output_ptr: UnsafePointer[Scalar[output_type], MutAnyOrigin], scale: Float32, batch_size: Int, seq_len_arg: Int, num_keys_arg: Int, valid_length: LayoutTensor[DType.uint32, valid_length_layout, ImmutAnyOrigin], kv_input_row_offsets: OptionalReg[LayoutTensor[DType.uint32, Layout.row_major(VariadicList(-1)), ImmutAnyOrigin]], sink_weights: OptionalReg[LayoutTensor[q_type, Layout.row_major(VariadicList(-1)), ImmutAnyOrigin]], mask: mask_t)

Was this page helpful?