Skip to main content

Mojo struct

MHAAttentionConfig

struct MHAAttentionConfig[token_gen: Bool, config: MHAConfig[config.dtype], group: Int]

Implemented traits

AnyType, AttentionConfig, Copyable, ImplicitlyCopyable, ImplicitlyDestructible, Movable

comptime members

depth_padded

comptime depth_padded = False

double_buffer

comptime double_buffer = not token_gen

double_buffer_k_only

comptime double_buffer_k_only = token_gen and (config.block_n() <= 64)

full_kv

comptime full_kv = True

shared_kv

comptime shared_kv = token_gen and (config > 256)

Methods

q_head_idx

static q_head_idx() -> Int

Returns:

Int

q_tile_idx

static q_tile_idx() -> Int

Returns:

Int

kv_head_idx

static kv_head_idx() -> Int

Returns:

Int

get_mma_shape

static get_mma_shape() -> IndexList[3]

Returns:

IndexList

get_q_offset

static get_q_offset[q_depth: Int]() -> UInt32

Returns:

UInt32

get_output_offset

static get_output_offset[output_depth: Int]() -> UInt32

Returns:

UInt32

Was this page helpful?