Skip to main content

Mojo module

dispatch

Aliases

DISPATCH_HIT

comptime DISPATCH_HIT = 1

DISPATCH_MISS

comptime DISPATCH_MISS = 0

llama_405b_fp8_list

comptime llama_405b_fp8_list = List[TuningConfigSM90](TuningConfigSM90(64, 16384, 2048, IndexList[3, DType.int64](64, 128, 32, Tuple[]()), Index(64, 128, 128), 8, Index(1, 1, 1), 1, False, OptionalReg[IndexList[2]](Index(128, 1)), MatmulSchedule.DS_SCHEDULER, OptionalReg[Int](None), OptionalReg[RasterOrder](None)), TuningConfigSM90(128, 16384, 2048, IndexList[3, DType.int64](64, 128, 32, Tuple[]()), Index(128, 128, 128), 4, Index(1, 1, 1), 2, True, OptionalReg[IndexList[2]](Index(H100.sm_count, 1)), MatmulSchedule.DS_SCHEDULER, OptionalReg[Int](None), OptionalReg[RasterOrder](None)), TuningConfigSM90(256, 16384, 2048, IndexList[3, DType.int64](64, 128, 32, Tuple[]()), Index(128, 128, 128), 4, Index(1, 1, 1), 2, True, OptionalReg[IndexList[2]](Index(H100.sm_count, 1)), MatmulSchedule.DS_SCHEDULER, OptionalReg[Int](None), OptionalReg[RasterOrder](None)), TuningConfigSM90(512, 16384, 2048, IndexList[3, DType.int64](64, 128, 32, Tuple[]()), Index(128, 128, 128), 4, Index(1, 1, 1), 2, True, OptionalReg[IndexList[2]](Index(H100.sm_count, 1)), MatmulSchedule.DS_SCHEDULER, OptionalReg[Int](None), OptionalReg[RasterOrder](None)), TuningConfigSM90(1024, 16384, 2048, IndexList[3, DType.int64](64, 128, 32, Tuple[]()), Index(128, 128, 128), 4, Index(1, 1, 1), 2, True, OptionalReg[IndexList[2]](Index(H100.sm_count, 1)), MatmulSchedule.DS_SCHEDULER, OptionalReg[Int](None), OptionalReg[RasterOrder](None)), TuningConfigSM90(MAX_M, 16384, 2048, IndexList[3, DType.int64](64, 128, 32, Tuple[]()), Index(128, 128, 128), 4, Index(2, 1, 1), 2, True, OptionalReg[IndexList[2]](Index(8, (H100 // 8))), MatmulSchedule.TILE2D, OptionalReg[Int](None), OptionalReg[RasterOrder](None)), TuningConfigSM90(64, 2304, 16384, IndexList[3, DType.int64](64, 48, 32, Tuple[]()), Index(64, 48, 128), 8, Index(1, 1, 1), 1, False, OptionalReg[IndexList[2]](Index(H100.sm_count, 1)), MatmulSchedule.DS_SCHEDULER, OptionalReg[Int](None), OptionalReg[RasterOrder](None)), TuningConfigSM90(128, 2304, 16384, IndexList[3, DType.int64](64, 48, 32, Tuple[]()), Index(64, 48, 128), 8, Index(1, 1, 1), 1, False, OptionalReg[IndexList[2]](Index(H100.sm_count, 1)), MatmulSchedule.DS_SCHEDULER, OptionalReg[Int](None), OptionalReg[RasterOrder](None)), TuningConfigSM90(256, 2304, 16384, IndexList[3, DType.int64](64, 96, 32, Tuple[]()), Index(64, 96, 128), 4, Index(1, 1, 1), 1, False, OptionalReg[IndexList[2]](Index(H100.sm_count, 1)), MatmulSchedule.DS_SCHEDULER, OptionalReg[Int](None), OptionalReg[RasterOrder](None)), TuningConfigSM90(512, 2304, 16384, IndexList[3, DType.int64](64, 144, 32, Tuple[]()), Index(128, 144, 128), 4, Index(1, 1, 1), 2, False, OptionalReg[IndexList[2]](Index(H100.sm_count, 1)), MatmulSchedule.DS_SCHEDULER, OptionalReg[Int](None), OptionalReg[RasterOrder](None)), TuningConfigSM90(1024, 2304, 16384, IndexList[3, DType.int64](64, 144, 32, Tuple[]()), Index(128, 144, 128), 4, Index(1, 1, 1), 2, False, OptionalReg[IndexList[2]](Index(H100.sm_count, 1)), MatmulSchedule.DS_SCHEDULER, OptionalReg[Int](None), OptionalReg[RasterOrder](None)), TuningConfigSM90(2048, 2304, 16384, IndexList[3, DType.int64](64, 144, 32, Tuple[]()), Index(128, 144, 128), 4, Index(2, 1, 1), 2, True, OptionalReg[IndexList[2]](Index(16, 8)), MatmulSchedule.TILE2D, OptionalReg[Int](None), OptionalReg[RasterOrder](None)), TuningConfigSM90(MAX_M, 2304, 16384, IndexList[3, DType.int64](64, 128, 32, Tuple[]()), Index(128, 128, 128), 4, Index(2, 1, 1), 2, True, OptionalReg[IndexList[2]](None), MatmulSchedule.TILE2D, OptionalReg[Int](None), OptionalReg[RasterOrder](None)), TuningConfigSM90(64, 13312, 16384, IndexList[3, DType.int64](64, 128, 32, Tuple[]()), Index(64, 128, 128), 8, Index(1, 1, 1), 1, False, OptionalReg[IndexList[2]](Index(128, 1)), MatmulSchedule.DS_SCHEDULER, OptionalReg[Int](None), OptionalReg[RasterOrder](None)), TuningConfigSM90(128, 13312, 16384, IndexList[3, DType.int64](64, 128, 32, Tuple[]()), Index(128, 128, 128), 4, Index(1, 1, 1), 2, True, OptionalReg[IndexList[2]](None), MatmulSchedule.NONE, OptionalReg[Int](None), OptionalReg[RasterOrder](None)), TuningConfigSM90(256, 13312, 16384, IndexList[3, DType.int64](64, 208, 32, Tuple[]()), Index(128, 208, 128), 4, Index(1, 2, 1), 2, True, OptionalReg[IndexList[2]](None), MatmulSchedule.NONE, OptionalReg[Int](None), OptionalReg[RasterOrder](None)), TuningConfigSM90(512, 13312, 16384, IndexList[3, DType.int64](64, 128, 32, Tuple[]()), Index(128, 128, 128), 4, Index(1, 1, 1), 2, True, OptionalReg[IndexList[2]](None), MatmulSchedule.NONE, OptionalReg[Int](None), OptionalReg[RasterOrder](None)), TuningConfigSM90(1024, 13312, 16384, IndexList[3, DType.int64](64, 128, 32, Tuple[]()), Index(128, 128, 128), 4, Index(1, 1, 1), 2, True, OptionalReg[IndexList[2]](None), MatmulSchedule.NONE, OptionalReg[Int](None), OptionalReg[RasterOrder](None)), TuningConfigSM90(MAX_M, 13312, 16384, IndexList[3, DType.int64](64, 128, 32, Tuple[]()), Index(128, 128, 128), 4, Index(2, 1, 1), 2, True, OptionalReg[IndexList[2]](Index(8, (H100 // 8))), MatmulSchedule.TILE2D, OptionalReg[Int](None), OptionalReg[RasterOrder](None)), TuningConfigSM90(64, 16384, 6656, IndexList[3, DType.int64](64, 128, 32, Tuple[]()), Index(64, 128, 128), 8, Index(1, 1, 1), 1, False, OptionalReg[IndexList[2]](Index(128, 1)), MatmulSchedule.DS_SCHEDULER, OptionalReg[Int](None), OptionalReg[RasterOrder](None)), TuningConfigSM90(1024, 16384, 6656, IndexList[3, DType.int64](64, 128, 32, Tuple[]()), Index(128, 128, 128), 4, Index(1, 1, 1), 2, True, OptionalReg[IndexList[2]](None), MatmulSchedule.NONE, OptionalReg[Int](None), OptionalReg[RasterOrder](None)), TuningConfigSM90(MAX_M, 16384, 6656, IndexList[3, DType.int64](64, 128, 32, Tuple[]()), Index(128, 128, 128), 4, Index(2, 1, 1), 2, True, OptionalReg[IndexList[2]](Index(8, (H100 // 8))), MatmulSchedule.TILE2D, OptionalReg[Int](None), OptionalReg[RasterOrder](None)), Tuple[]())

llama_405b_fp8_table

comptime llama_405b_fp8_table = Table[TuningConfigSM90](llama_405b_fp8_list, "llama_405b_fp8")

llama_8b_fp8_list

comptime llama_8b_fp8_list = List[TuningConfigSM90](TuningConfigSM90(128, -1, -1, IndexList[3, DType.int64](64, 128, 32, Tuple[]()), Index(64, 128, 128), 8, Index(1, 1, 1), 1, True, OptionalReg[IndexList[2]](None), MatmulSchedule.NONE, OptionalReg[Int](None), OptionalReg[RasterOrder](None)), TuningConfigSM90(1024, -1, -1, IndexList[3, DType.int64](64, 128, 32, Tuple[]()), Index(128, 128, 128), 6, Index(1, 1, 1), 2, True, OptionalReg[IndexList[2]](None), MatmulSchedule.NONE, OptionalReg[Int](None), OptionalReg[RasterOrder](None)), TuningConfigSM90(MAX_M, -1, -1, IndexList[3, DType.int64](64, 128, 32, Tuple[]()), Index(128, 128, 128), 6, Index(2, 1, 1), 2, True, OptionalReg[IndexList[2]](Index(8, (H100 // 8))), MatmulSchedule.TILE2D, OptionalReg[Int](None), OptionalReg[RasterOrder](None)), Tuple[]())

llama_8b_fp8_table

comptime llama_8b_fp8_table = Table[TuningConfigSM90](llama_8b_fp8_list, "llama_8b_fp8")

logger

comptime logger = Logger[DEFAULT_LEVEL](stdout, "", False)

MAX_M

comptime MAX_M = Int.MAX

Functions

Was this page helpful?