Mojo module
dispatch
comptime values
DISPATCH_HIT
comptime DISPATCH_HIT = 1
DISPATCH_MISS
comptime DISPATCH_MISS = 0
llama_405b_fp8_list
comptime llama_405b_fp8_list = List(TuningConfigSM90(64, 16384, 2048, IndexList(64, 128, 32, __list_literal__=Tuple()), Index[Int, Int, Int](64, 128, 128), 8, Index[Int, Int, Int](1, 1, 1), 1, False, OptionalReg(Index[Int, Int](128, 1)), MatmulSchedule.DS_SCHEDULER, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(128, 16384, 2048, IndexList(64, 128, 32, __list_literal__=Tuple()), Index[Int, Int, Int](128, 128, 128), 4, Index[Int, Int, Int](1, 1, 1), 2, True, OptionalReg(Index[Int, Int](H100, 1)), MatmulSchedule.DS_SCHEDULER, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(256, 16384, 2048, IndexList(64, 128, 32, __list_literal__=Tuple()), Index[Int, Int, Int](128, 128, 128), 4, Index[Int, Int, Int](1, 1, 1), 2, True, OptionalReg(Index[Int, Int](H100, 1)), MatmulSchedule.DS_SCHEDULER, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(512, 16384, 2048, IndexList(64, 128, 32, __list_literal__=Tuple()), Index[Int, Int, Int](128, 128, 128), 4, Index[Int, Int, Int](1, 1, 1), 2, True, OptionalReg(Index[Int, Int](H100, 1)), MatmulSchedule.DS_SCHEDULER, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(1024, 16384, 2048, IndexList(64, 128, 32, __list_literal__=Tuple()), Index[Int, Int, Int](128, 128, 128), 4, Index[Int, Int, Int](1, 1, 1), 2, True, OptionalReg(Index[Int, Int](H100, 1)), MatmulSchedule.DS_SCHEDULER, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(MAX_M, 16384, 2048, IndexList(64, 128, 32, __list_literal__=Tuple()), Index[Int, Int, Int](128, 128, 128), 4, Index[Int, Int, Int](2, 1, 1), 2, True, OptionalReg(Index[Int, Int](8, (H100 // 8))), MatmulSchedule.TILE2D, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(64, 2304, 16384, IndexList(64, 48, 32, __list_literal__=Tuple()), Index[Int, Int, Int](64, 48, 128), 8, Index[Int, Int, Int](1, 1, 1), 1, False, OptionalReg(Index[Int, Int](H100, 1)), MatmulSchedule.DS_SCHEDULER, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(128, 2304, 16384, IndexList(64, 48, 32, __list_literal__=Tuple()), Index[Int, Int, Int](64, 48, 128), 8, Index[Int, Int, Int](1, 1, 1), 1, False, OptionalReg(Index[Int, Int](H100, 1)), MatmulSchedule.DS_SCHEDULER, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(256, 2304, 16384, IndexList(64, 96, 32, __list_literal__=Tuple()), Index[Int, Int, Int](64, 96, 128), 4, Index[Int, Int, Int](1, 1, 1), 1, False, OptionalReg(Index[Int, Int](H100, 1)), MatmulSchedule.DS_SCHEDULER, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(512, 2304, 16384, IndexList(64, 144, 32, __list_literal__=Tuple()), Index[Int, Int, Int](128, 144, 128), 4, Index[Int, Int, Int](1, 1, 1), 2, False, OptionalReg(Index[Int, Int](H100, 1)), MatmulSchedule.DS_SCHEDULER, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(1024, 2304, 16384, IndexList(64, 144, 32, __list_literal__=Tuple()), Index[Int, Int, Int](128, 144, 128), 4, Index[Int, Int, Int](1, 1, 1), 2, False, OptionalReg(Index[Int, Int](H100, 1)), MatmulSchedule.DS_SCHEDULER, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(2048, 2304, 16384, IndexList(64, 144, 32, __list_literal__=Tuple()), Index[Int, Int, Int](128, 144, 128), 4, Index[Int, Int, Int](2, 1, 1), 2, True, OptionalReg(Index[Int, Int](16, 8)), MatmulSchedule.TILE2D, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(MAX_M, 2304, 16384, IndexList(64, 128, 32, __list_literal__=Tuple()), Index[Int, Int, Int](128, 128, 128), 4, Index[Int, Int, Int](2, 1, 1), 2, True, OptionalReg(None), MatmulSchedule.TILE2D, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(64, 13312, 16384, IndexList(64, 128, 32, __list_literal__=Tuple()), Index[Int, Int, Int](64, 128, 128), 8, Index[Int, Int, Int](1, 1, 1), 1, False, OptionalReg(Index[Int, Int](128, 1)), MatmulSchedule.DS_SCHEDULER, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(128, 13312, 16384, IndexList(64, 128, 32, __list_literal__=Tuple()), Index[Int, Int, Int](128, 128, 128), 4, Index[Int, Int, Int](1, 1, 1), 2, True, OptionalReg(None), MatmulSchedule.NONE, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(256, 13312, 16384, IndexList(64, 208, 32, __list_literal__=Tuple()), Index[Int, Int, Int](128, 208, 128), 4, Index[Int, Int, Int](1, 2, 1), 2, True, OptionalReg(None), MatmulSchedule.NONE, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(512, 13312, 16384, IndexList(64, 128, 32, __list_literal__=Tuple()), Index[Int, Int, Int](128, 128, 128), 4, Index[Int, Int, Int](1, 1, 1), 2, True, OptionalReg(None), MatmulSchedule.NONE, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(1024, 13312, 16384, IndexList(64, 128, 32, __list_literal__=Tuple()), Index[Int, Int, Int](128, 128, 128), 4, Index[Int, Int, Int](1, 1, 1), 2, True, OptionalReg(None), MatmulSchedule.NONE, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(MAX_M, 13312, 16384, IndexList(64, 128, 32, __list_literal__=Tuple()), Index[Int, Int, Int](128, 128, 128), 4, Index[Int, Int, Int](2, 1, 1), 2, True, OptionalReg(Index[Int, Int](8, (H100 // 8))), MatmulSchedule.TILE2D, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(64, 16384, 6656, IndexList(64, 128, 32, __list_literal__=Tuple()), Index[Int, Int, Int](64, 128, 128), 8, Index[Int, Int, Int](1, 1, 1), 1, False, OptionalReg(Index[Int, Int](128, 1)), MatmulSchedule.DS_SCHEDULER, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(1024, 16384, 6656, IndexList(64, 128, 32, __list_literal__=Tuple()), Index[Int, Int, Int](128, 128, 128), 4, Index[Int, Int, Int](1, 1, 1), 2, True, OptionalReg(None), MatmulSchedule.NONE, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(MAX_M, 16384, 6656, IndexList(64, 128, 32, __list_literal__=Tuple()), Index[Int, Int, Int](128, 128, 128), 4, Index[Int, Int, Int](2, 1, 1), 2, True, OptionalReg(Index[Int, Int](8, (H100 // 8))), MatmulSchedule.TILE2D, OptionalReg(None), OptionalReg(None)), __list_literal__=Tuple())
llama_405b_fp8_table
comptime llama_405b_fp8_table = Table(llama_405b_fp8_list, String("llama_405b_fp8"))
llama_8b_fp8_list
comptime llama_8b_fp8_list = List(TuningConfigSM90(128, -1, -1, IndexList(64, 128, 32, __list_literal__=Tuple()), Index[Int, Int, Int](64, 128, 128), 8, Index[Int, Int, Int](1, 1, 1), 1, True, OptionalReg(None), MatmulSchedule.NONE, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(1024, -1, -1, IndexList(64, 128, 32, __list_literal__=Tuple()), Index[Int, Int, Int](128, 128, 128), 6, Index[Int, Int, Int](1, 1, 1), 2, True, OptionalReg(None), MatmulSchedule.NONE, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(MAX_M, -1, -1, IndexList(64, 128, 32, __list_literal__=Tuple()), Index[Int, Int, Int](128, 128, 128), 6, Index[Int, Int, Int](2, 1, 1), 2, True, OptionalReg(Index[Int, Int](8, (H100 // 8))), MatmulSchedule.TILE2D, OptionalReg(None), OptionalReg(None)), __list_literal__=Tuple())
llama_8b_fp8_table
comptime llama_8b_fp8_table = Table(llama_8b_fp8_list, String("llama_8b_fp8"))
logger
comptime logger = Logger(stdout, prefix=String(""), source_location=False)
MAX_M
comptime MAX_M = Int.MAX
Functions
Was this page helpful?
Thank you! We'll create more content like this.
Thank you for helping us improve!