Skip to main content

Mojo module

dispatch

comptime values

DISPATCH_HIT

comptime DISPATCH_HIT = 1

DISPATCH_MISS

comptime DISPATCH_MISS = 0

llama_405b_fp8_list

comptime llama_405b_fp8_list = List(TuningConfigSM90(64, 16384, 2048, IndexList(64, 128, 32, __list_literal__=Tuple()), Index[Int, Int, Int](64, 128, 128), 8, Index[Int, Int, Int](1, 1, 1), 1, False, OptionalReg(Index[Int, Int](128, 1)), MatmulSchedule.DS_SCHEDULER, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(128, 16384, 2048, IndexList(64, 128, 32, __list_literal__=Tuple()), Index[Int, Int, Int](128, 128, 128), 4, Index[Int, Int, Int](1, 1, 1), 2, True, OptionalReg(Index[Int, Int](H100, 1)), MatmulSchedule.DS_SCHEDULER, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(256, 16384, 2048, IndexList(64, 128, 32, __list_literal__=Tuple()), Index[Int, Int, Int](128, 128, 128), 4, Index[Int, Int, Int](1, 1, 1), 2, True, OptionalReg(Index[Int, Int](H100, 1)), MatmulSchedule.DS_SCHEDULER, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(512, 16384, 2048, IndexList(64, 128, 32, __list_literal__=Tuple()), Index[Int, Int, Int](128, 128, 128), 4, Index[Int, Int, Int](1, 1, 1), 2, True, OptionalReg(Index[Int, Int](H100, 1)), MatmulSchedule.DS_SCHEDULER, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(1024, 16384, 2048, IndexList(64, 128, 32, __list_literal__=Tuple()), Index[Int, Int, Int](128, 128, 128), 4, Index[Int, Int, Int](1, 1, 1), 2, True, OptionalReg(Index[Int, Int](H100, 1)), MatmulSchedule.DS_SCHEDULER, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(MAX_M, 16384, 2048, IndexList(64, 128, 32, __list_literal__=Tuple()), Index[Int, Int, Int](128, 128, 128), 4, Index[Int, Int, Int](2, 1, 1), 2, True, OptionalReg(Index[Int, Int](8, (H100 // 8))), MatmulSchedule.TILE2D, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(64, 2304, 16384, IndexList(64, 48, 32, __list_literal__=Tuple()), Index[Int, Int, Int](64, 48, 128), 8, Index[Int, Int, Int](1, 1, 1), 1, False, OptionalReg(Index[Int, Int](H100, 1)), MatmulSchedule.DS_SCHEDULER, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(128, 2304, 16384, IndexList(64, 48, 32, __list_literal__=Tuple()), Index[Int, Int, Int](64, 48, 128), 8, Index[Int, Int, Int](1, 1, 1), 1, False, OptionalReg(Index[Int, Int](H100, 1)), MatmulSchedule.DS_SCHEDULER, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(256, 2304, 16384, IndexList(64, 96, 32, __list_literal__=Tuple()), Index[Int, Int, Int](64, 96, 128), 4, Index[Int, Int, Int](1, 1, 1), 1, False, OptionalReg(Index[Int, Int](H100, 1)), MatmulSchedule.DS_SCHEDULER, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(512, 2304, 16384, IndexList(64, 144, 32, __list_literal__=Tuple()), Index[Int, Int, Int](128, 144, 128), 4, Index[Int, Int, Int](1, 1, 1), 2, False, OptionalReg(Index[Int, Int](H100, 1)), MatmulSchedule.DS_SCHEDULER, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(1024, 2304, 16384, IndexList(64, 144, 32, __list_literal__=Tuple()), Index[Int, Int, Int](128, 144, 128), 4, Index[Int, Int, Int](1, 1, 1), 2, False, OptionalReg(Index[Int, Int](H100, 1)), MatmulSchedule.DS_SCHEDULER, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(2048, 2304, 16384, IndexList(64, 144, 32, __list_literal__=Tuple()), Index[Int, Int, Int](128, 144, 128), 4, Index[Int, Int, Int](2, 1, 1), 2, True, OptionalReg(Index[Int, Int](16, 8)), MatmulSchedule.TILE2D, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(MAX_M, 2304, 16384, IndexList(64, 128, 32, __list_literal__=Tuple()), Index[Int, Int, Int](128, 128, 128), 4, Index[Int, Int, Int](2, 1, 1), 2, True, OptionalReg(None), MatmulSchedule.TILE2D, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(64, 13312, 16384, IndexList(64, 128, 32, __list_literal__=Tuple()), Index[Int, Int, Int](64, 128, 128), 8, Index[Int, Int, Int](1, 1, 1), 1, False, OptionalReg(Index[Int, Int](128, 1)), MatmulSchedule.DS_SCHEDULER, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(128, 13312, 16384, IndexList(64, 128, 32, __list_literal__=Tuple()), Index[Int, Int, Int](128, 128, 128), 4, Index[Int, Int, Int](1, 1, 1), 2, True, OptionalReg(None), MatmulSchedule.NONE, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(256, 13312, 16384, IndexList(64, 208, 32, __list_literal__=Tuple()), Index[Int, Int, Int](128, 208, 128), 4, Index[Int, Int, Int](1, 2, 1), 2, True, OptionalReg(None), MatmulSchedule.NONE, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(512, 13312, 16384, IndexList(64, 128, 32, __list_literal__=Tuple()), Index[Int, Int, Int](128, 128, 128), 4, Index[Int, Int, Int](1, 1, 1), 2, True, OptionalReg(None), MatmulSchedule.NONE, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(1024, 13312, 16384, IndexList(64, 128, 32, __list_literal__=Tuple()), Index[Int, Int, Int](128, 128, 128), 4, Index[Int, Int, Int](1, 1, 1), 2, True, OptionalReg(None), MatmulSchedule.NONE, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(MAX_M, 13312, 16384, IndexList(64, 128, 32, __list_literal__=Tuple()), Index[Int, Int, Int](128, 128, 128), 4, Index[Int, Int, Int](2, 1, 1), 2, True, OptionalReg(Index[Int, Int](8, (H100 // 8))), MatmulSchedule.TILE2D, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(64, 16384, 6656, IndexList(64, 128, 32, __list_literal__=Tuple()), Index[Int, Int, Int](64, 128, 128), 8, Index[Int, Int, Int](1, 1, 1), 1, False, OptionalReg(Index[Int, Int](128, 1)), MatmulSchedule.DS_SCHEDULER, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(1024, 16384, 6656, IndexList(64, 128, 32, __list_literal__=Tuple()), Index[Int, Int, Int](128, 128, 128), 4, Index[Int, Int, Int](1, 1, 1), 2, True, OptionalReg(None), MatmulSchedule.NONE, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(MAX_M, 16384, 6656, IndexList(64, 128, 32, __list_literal__=Tuple()), Index[Int, Int, Int](128, 128, 128), 4, Index[Int, Int, Int](2, 1, 1), 2, True, OptionalReg(Index[Int, Int](8, (H100 // 8))), MatmulSchedule.TILE2D, OptionalReg(None), OptionalReg(None)), __list_literal__=Tuple())

llama_405b_fp8_table

comptime llama_405b_fp8_table = Table(llama_405b_fp8_list, String("llama_405b_fp8"))

llama_8b_fp8_list

comptime llama_8b_fp8_list = List(TuningConfigSM90(128, -1, -1, IndexList(64, 128, 32, __list_literal__=Tuple()), Index[Int, Int, Int](64, 128, 128), 8, Index[Int, Int, Int](1, 1, 1), 1, True, OptionalReg(None), MatmulSchedule.NONE, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(1024, -1, -1, IndexList(64, 128, 32, __list_literal__=Tuple()), Index[Int, Int, Int](128, 128, 128), 6, Index[Int, Int, Int](1, 1, 1), 2, True, OptionalReg(None), MatmulSchedule.NONE, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(MAX_M, -1, -1, IndexList(64, 128, 32, __list_literal__=Tuple()), Index[Int, Int, Int](128, 128, 128), 6, Index[Int, Int, Int](2, 1, 1), 2, True, OptionalReg(Index[Int, Int](8, (H100 // 8))), MatmulSchedule.TILE2D, OptionalReg(None), OptionalReg(None)), __list_literal__=Tuple())

llama_8b_fp8_table

comptime llama_8b_fp8_table = Table(llama_8b_fp8_list, String("llama_8b_fp8"))

logger

comptime logger = Logger(stdout, prefix=String(""), source_location=False)

MAX_M

comptime MAX_M = Int.MAX

Functions

Was this page helpful?