Mojo struct

MHAAttentionConfig

struct MHAAttentionConfig[dtype: DType, //, token_gen: Bool, config: MHAConfig[dtype], group: Int]

Implemented traits

AnyType, AttentionConfig, Copyable, ImplicitlyCopyable, ImplicitlyDestructible, Movable

`comptime` members

`copyinitis_trivial`

comptime __copyinit__is_trivial = True

`delis_trivial`

comptime __del__is_trivial = True

`moveinitis_trivial`

comptime __moveinit__is_trivial = True

`depth_padded`

comptime depth_padded = False if MHAAttentionConfig[token_gen, config, group].USE_EXPERIMENTAL_CDNA4_MHA_KERNEL else True

`double_buffer`

comptime double_buffer = True if MHAAttentionConfig[token_gen, config, group].USE_EXPERIMENTAL_CDNA4_MHA_KERNEL else False

`full_kv`

comptime full_kv = True if MHAAttentionConfig[token_gen, config, group].USE_EXPERIMENTAL_CDNA4_MHA_KERNEL else False

`shared_kv`

comptime shared_kv = False if MHAAttentionConfig[token_gen, config, group].USE_EXPERIMENTAL_CDNA4_MHA_KERNEL else True

`USE_EXPERIMENTAL_CDNA4_MHA_KERNEL`

comptime USE_EXPERIMENTAL_CDNA4_MHA_KERNEL = token_gen.__invert__() if env_get_bool["USE_EXPERIMENTAL_CDNA4_MHA_KERNEL", False]() if _cdna_4_or_newer() else _cdna_4_or_newer() else env_get_bool["USE_EXPERIMENTAL_CDNA4_MHA_KERNEL", False]() if _cdna_4_or_newer() else _cdna_4_or_newer()

Methods

`q_head_idx`

static q_head_idx() -> UInt

Returns:

UInt

`q_tile_idx`

static q_tile_idx() -> UInt

Returns:

UInt

`kv_head_idx`

static kv_head_idx() -> UInt

Returns:

UInt

`get_mma_shape`

static get_mma_shape() -> IndexList[3]

Returns:

IndexList

`get_q_offset`

static get_q_offset[q_depth: Scalar[DType.uint]]() -> UInt32

Returns:

UInt32

`get_output_offset`

static get_output_offset[output_depth: Scalar[DType.uint]]() -> UInt32

Returns:

UInt32

Implemented traits​

comptime members​

__copyinit__is_trivial​

__del__is_trivial​

__moveinit__is_trivial​

depth_padded​

double_buffer​

full_kv​

shared_kv​

USE_EXPERIMENTAL_CDNA4_MHA_KERNEL​

Methods​

q_head_idx​

q_tile_idx​

kv_head_idx​

get_mma_shape​

get_q_offset​

get_output_offset​

Implemented traits

`comptime` members

`copyinitis_trivial`

`delis_trivial`

`moveinitis_trivial`

`depth_padded`

`double_buffer`

`full_kv`

`shared_kv`

`USE_EXPERIMENTAL_CDNA4_MHA_KERNEL`

Methods

`q_head_idx`

`q_tile_idx`

`kv_head_idx`

`get_mma_shape`

`get_q_offset`

`get_output_offset`