For the complete documentation index, see llms.txt. Markdown versions of all pages are available by appending .md to any URL (e.g. /max/get-started.md).

Mojo function

q_tma

def q_tma[dtype: DType, //, swizzle_mode: TensorMapSwizzle, *, BM: Int, depth: Int, q_num_heads: Int, group: Int, decoding: Bool, fuse_gqa: Bool = False, num_qk_stages: Int = Int(1)](ctx: DeviceContext, ptr: UnsafePointer[Scalar[dtype]], rows: Int) -> TMATensorTile[dtype, Int(4) if decoding or fuse_gqa else Int(3), _padded_shape[Int(4) if decoding or fuse_gqa else Int(3), dtype, q_smem_shape[dtype, swizzle_mode, BM=BM, group=group, depth=depth, decoding=decoding, fuse_gqa=fuse_gqa, num_qk_stages=num_qk_stages](), swizzle_mode](), _ragged_shape[Int(4) if decoding or fuse_gqa else Int(3), dtype, q_smem_shape[dtype, swizzle_mode, BM=BM, group=group, depth=depth, decoding=decoding, fuse_gqa=fuse_gqa, num_qk_stages=num_qk_stages](), swizzle_mode]()]

Returns:

TMATensorTile[dtype, Int(4) if decoding or fuse_gqa else Int(3), _padded_shape[Int(4) if decoding or fuse_gqa else Int(3), dtype, q_smem_shape[dtype, swizzle_mode, BM=BM, group=group, depth=depth, decoding=decoding, fuse_gqa=fuse_gqa, num_qk_stages=num_qk_stages](), swizzle_mode](), _ragged_shape[Int(4) if decoding or fuse_gqa else Int(3), dtype, q_smem_shape[dtype, swizzle_mode, BM=BM, group=group, depth=depth, decoding=decoding, fuse_gqa=fuse_gqa, num_qk_stages=num_qk_stages](), swizzle_mode]()]