For the complete documentation index, see llms.txt. Markdown versions of all pages are available by appending .md to any URL (e.g. /max/get-started.md).
Mojo module
dispatch
comptime valuesβ
DISPATCH_HITβ
comptime DISPATCH_HIT = 1
DISPATCH_MISSβ
comptime DISPATCH_MISS = 0
llama_405b_fp8_listβ
comptime llama_405b_fp8_list = List(TuningConfigSM90(Int(64), Int(16384), Int(2048), IndexList(Int(64), Int(128), Int(32), __list_literal__=NoneType(None)), Index[Int, Int, Int](Int(64), Int(128), Int(128)), Int(8), Index[Int, Int, Int](Int(1), Int(1), Int(1)), Int(1), False, OptionalReg(Index[Int, Int](Int(128), Int(1))), MatmulSchedule.DS_SCHEDULER, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(Int(128), Int(16384), Int(2048), IndexList(Int(64), Int(128), Int(32), __list_literal__=NoneType(None)), Index[Int, Int, Int](Int(128), Int(128), Int(128)), Int(4), Index[Int, Int, Int](Int(1), Int(1), Int(1)), Int(2), True, OptionalReg(Index[Int, Int](GPUInfo.from_family(AcceleratorArchitectureFamily(Int(32), Int(2048), Int(233472), Int(65536), Int(1024)), StringSlice("H100"), Vendor(Int8(2)), StringSlice("cuda"), StringSlice("hopper"), SIMD(9), StringSlice("sm_90a"), Int(132)), Int(1))), MatmulSchedule.DS_SCHEDULER, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(Int(256), Int(16384), Int(2048), IndexList(Int(64), Int(128), Int(32), __list_literal__=NoneType(None)), Index[Int, Int, Int](Int(128), Int(128), Int(128)), Int(4), Index[Int, Int, Int](Int(1), Int(1), Int(1)), Int(2), True, OptionalReg(Index[Int, Int](GPUInfo.from_family(AcceleratorArchitectureFamily(Int(32), Int(2048), Int(233472), Int(65536), Int(1024)), StringSlice("H100"), Vendor(Int8(2)), StringSlice("cuda"), StringSlice("hopper"), SIMD(9), StringSlice("sm_90a"), Int(132)), Int(1))), MatmulSchedule.DS_SCHEDULER, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(Int(512), Int(16384), Int(2048), IndexList(Int(64), Int(128), Int(32), __list_literal__=NoneType(None)), Index[Int, Int, Int](Int(128), Int(128), Int(128)), Int(4), Index[Int, Int, Int](Int(1), Int(1), Int(1)), Int(2), True, OptionalReg(Index[Int, Int](GPUInfo.from_family(AcceleratorArchitectureFamily(Int(32), Int(2048), Int(233472), Int(65536), Int(1024)), StringSlice("H100"), Vendor(Int8(2)), StringSlice("cuda"), StringSlice("hopper"), SIMD(9), StringSlice("sm_90a"), Int(132)), Int(1))), MatmulSchedule.DS_SCHEDULER, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(Int(1024), Int(16384), Int(2048), IndexList(Int(64), Int(128), Int(32), __list_literal__=NoneType(None)), Index[Int, Int, Int](Int(128), Int(128), Int(128)), Int(4), Index[Int, Int, Int](Int(1), Int(1), Int(1)), Int(2), True, OptionalReg(Index[Int, Int](GPUInfo.from_family(AcceleratorArchitectureFamily(Int(32), Int(2048), Int(233472), Int(65536), Int(1024)), StringSlice("H100"), Vendor(Int8(2)), StringSlice("cuda"), StringSlice("hopper"), SIMD(9), StringSlice("sm_90a"), Int(132)), Int(1))), MatmulSchedule.DS_SCHEDULER, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(MAX_M, Int(16384), Int(2048), IndexList(Int(64), Int(128), Int(32), __list_literal__=NoneType(None)), Index[Int, Int, Int](Int(128), Int(128), Int(128)), Int(4), Index[Int, Int, Int](Int(2), Int(1), Int(1)), Int(2), True, OptionalReg(Index[Int, Int](Int(8), (GPUInfo.from_family(AcceleratorArchitectureFamily(Int(32), Int(2048), Int(233472), Int(65536), Int(1024)), StringSlice("H100"), Vendor(Int8(2)), StringSlice("cuda"), StringSlice("hopper"), SIMD(9), StringSlice("sm_90a"), Int(132)) // Int(8)))), MatmulSchedule.TILE2D, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(Int(64), Int(2304), Int(16384), IndexList(Int(64), Int(48), Int(32), __list_literal__=NoneType(None)), Index[Int, Int, Int](Int(64), Int(48), Int(128)), Int(8), Index[Int, Int, Int](Int(1), Int(1), Int(1)), Int(1), False, OptionalReg(Index[Int, Int](GPUInfo.from_family(AcceleratorArchitectureFamily(Int(32), Int(2048), Int(233472), Int(65536), Int(1024)), StringSlice("H100"), Vendor(Int8(2)), StringSlice("cuda"), StringSlice("hopper"), SIMD(9), StringSlice("sm_90a"), Int(132)), Int(1))), MatmulSchedule.DS_SCHEDULER, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(Int(128), Int(2304), Int(16384), IndexList(Int(64), Int(48), Int(32), __list_literal__=NoneType(None)), Index[Int, Int, Int](Int(64), Int(48), Int(128)), Int(8), Index[Int, Int, Int](Int(1), Int(1), Int(1)), Int(1), False, OptionalReg(Index[Int, Int](GPUInfo.from_family(AcceleratorArchitectureFamily(Int(32), Int(2048), Int(233472), Int(65536), Int(1024)), StringSlice("H100"), Vendor(Int8(2)), StringSlice("cuda"), StringSlice("hopper"), SIMD(9), StringSlice("sm_90a"), Int(132)), Int(1))), MatmulSchedule.DS_SCHEDULER, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(Int(256), Int(2304), Int(16384), IndexList(Int(64), Int(96), Int(32), __list_literal__=NoneType(None)), Index[Int, Int, Int](Int(64), Int(96), Int(128)), Int(4), Index[Int, Int, Int](Int(1), Int(1), Int(1)), Int(1), False, OptionalReg(Index[Int, Int](GPUInfo.from_family(AcceleratorArchitectureFamily(Int(32), Int(2048), Int(233472), Int(65536), Int(1024)), StringSlice("H100"), Vendor(Int8(2)), StringSlice("cuda"), StringSlice("hopper"), SIMD(9), StringSlice("sm_90a"), Int(132)), Int(1))), MatmulSchedule.DS_SCHEDULER, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(Int(512), Int(2304), Int(16384), IndexList(Int(64), Int(144), Int(32), __list_literal__=NoneType(None)), Index[Int, Int, Int](Int(128), Int(144), Int(128)), Int(4), Index[Int, Int, Int](Int(1), Int(1), Int(1)), Int(2), False, OptionalReg(Index[Int, Int](GPUInfo.from_family(AcceleratorArchitectureFamily(Int(32), Int(2048), Int(233472), Int(65536), Int(1024)), StringSlice("H100"), Vendor(Int8(2)), StringSlice("cuda"), StringSlice("hopper"), SIMD(9), StringSlice("sm_90a"), Int(132)), Int(1))), MatmulSchedule.DS_SCHEDULER, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(Int(1024), Int(2304), Int(16384), IndexList(Int(64), Int(144), Int(32), __list_literal__=NoneType(None)), Index[Int, Int, Int](Int(128), Int(144), Int(128)), Int(4), Index[Int, Int, Int](Int(1), Int(1), Int(1)), Int(2), False, OptionalReg(Index[Int, Int](GPUInfo.from_family(AcceleratorArchitectureFamily(Int(32), Int(2048), Int(233472), Int(65536), Int(1024)), StringSlice("H100"), Vendor(Int8(2)), StringSlice("cuda"), StringSlice("hopper"), SIMD(9), StringSlice("sm_90a"), Int(132)), Int(1))), MatmulSchedule.DS_SCHEDULER, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(Int(2048), Int(2304), Int(16384), IndexList(Int(64), Int(144), Int(32), __list_literal__=NoneType(None)), Index[Int, Int, Int](Int(128), Int(144), Int(128)), Int(4), Index[Int, Int, Int](Int(2), Int(1), Int(1)), Int(2), True, OptionalReg(Index[Int, Int](Int(16), Int(8))), MatmulSchedule.TILE2D, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(MAX_M, Int(2304), Int(16384), IndexList(Int(64), Int(128), Int(32), __list_literal__=NoneType(None)), Index[Int, Int, Int](Int(128), Int(128), Int(128)), Int(4), Index[Int, Int, Int](Int(2), Int(1), Int(1)), Int(2), True, OptionalReg(None), MatmulSchedule.TILE2D, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(Int(64), Int(13312), Int(16384), IndexList(Int(64), Int(128), Int(32), __list_literal__=NoneType(None)), Index[Int, Int, Int](Int(64), Int(128), Int(128)), Int(8), Index[Int, Int, Int](Int(1), Int(1), Int(1)), Int(1), False, OptionalReg(Index[Int, Int](Int(128), Int(1))), MatmulSchedule.DS_SCHEDULER, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(Int(128), Int(13312), Int(16384), IndexList(Int(64), Int(128), Int(32), __list_literal__=NoneType(None)), Index[Int, Int, Int](Int(128), Int(128), Int(128)), Int(4), Index[Int, Int, Int](Int(1), Int(1), Int(1)), Int(2), True, OptionalReg(None), MatmulSchedule.NONE, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(Int(256), Int(13312), Int(16384), IndexList(Int(64), Int(208), Int(32), __list_literal__=NoneType(None)), Index[Int, Int, Int](Int(128), Int(208), Int(128)), Int(4), Index[Int, Int, Int](Int(1), Int(2), Int(1)), Int(2), True, OptionalReg(None), MatmulSchedule.NONE, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(Int(512), Int(13312), Int(16384), IndexList(Int(64), Int(128), Int(32), __list_literal__=NoneType(None)), Index[Int, Int, Int](Int(128), Int(128), Int(128)), Int(4), Index[Int, Int, Int](Int(1), Int(1), Int(1)), Int(2), True, OptionalReg(None), MatmulSchedule.NONE, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(Int(1024), Int(13312), Int(16384), IndexList(Int(64), Int(128), Int(32), __list_literal__=NoneType(None)), Index[Int, Int, Int](Int(128), Int(128), Int(128)), Int(4), Index[Int, Int, Int](Int(1), Int(1), Int(1)), Int(2), True, OptionalReg(None), MatmulSchedule.NONE, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(MAX_M, Int(13312), Int(16384), IndexList(Int(64), Int(128), Int(32), __list_literal__=NoneType(None)), Index[Int, Int, Int](Int(128), Int(128), Int(128)), Int(4), Index[Int, Int, Int](Int(2), Int(1), Int(1)), Int(2), True, OptionalReg(Index[Int, Int](Int(8), (GPUInfo.from_family(AcceleratorArchitectureFamily(Int(32), Int(2048), Int(233472), Int(65536), Int(1024)), StringSlice("H100"), Vendor(Int8(2)), StringSlice("cuda"), StringSlice("hopper"), SIMD(9), StringSlice("sm_90a"), Int(132)) // Int(8)))), MatmulSchedule.TILE2D, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(Int(64), Int(16384), Int(6656), IndexList(Int(64), Int(128), Int(32), __list_literal__=NoneType(None)), Index[Int, Int, Int](Int(64), Int(128), Int(128)), Int(8), Index[Int, Int, Int](Int(1), Int(1), Int(1)), Int(1), False, OptionalReg(Index[Int, Int](Int(128), Int(1))), MatmulSchedule.DS_SCHEDULER, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(Int(1024), Int(16384), Int(6656), IndexList(Int(64), Int(128), Int(32), __list_literal__=NoneType(None)), Index[Int, Int, Int](Int(128), Int(128), Int(128)), Int(4), Index[Int, Int, Int](Int(1), Int(1), Int(1)), Int(2), True, OptionalReg(None), MatmulSchedule.NONE, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(MAX_M, Int(16384), Int(6656), IndexList(Int(64), Int(128), Int(32), __list_literal__=NoneType(None)), Index[Int, Int, Int](Int(128), Int(128), Int(128)), Int(4), Index[Int, Int, Int](Int(2), Int(1), Int(1)), Int(2), True, OptionalReg(Index[Int, Int](Int(8), (GPUInfo.from_family(AcceleratorArchitectureFamily(Int(32), Int(2048), Int(233472), Int(65536), Int(1024)), StringSlice("H100"), Vendor(Int8(2)), StringSlice("cuda"), StringSlice("hopper"), SIMD(9), StringSlice("sm_90a"), Int(132)) // Int(8)))), MatmulSchedule.TILE2D, OptionalReg(None), OptionalReg(None)), __list_literal__=NoneType(None))
llama_405b_fp8_tableβ
comptime llama_405b_fp8_table = Table(llama_405b_fp8_list, String("llama_405b_fp8"))
llama_8b_fp8_listβ
comptime llama_8b_fp8_list = List(TuningConfigSM90(Int(128), Int(-1), Int(-1), IndexList(Int(64), Int(128), Int(32), __list_literal__=NoneType(None)), Index[Int, Int, Int](Int(64), Int(128), Int(128)), Int(8), Index[Int, Int, Int](Int(1), Int(1), Int(1)), Int(1), True, OptionalReg(None), MatmulSchedule.NONE, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(Int(1024), Int(-1), Int(-1), IndexList(Int(64), Int(128), Int(32), __list_literal__=NoneType(None)), Index[Int, Int, Int](Int(128), Int(128), Int(128)), Int(6), Index[Int, Int, Int](Int(1), Int(1), Int(1)), Int(2), True, OptionalReg(None), MatmulSchedule.NONE, OptionalReg(None), OptionalReg(None)), TuningConfigSM90(MAX_M, Int(-1), Int(-1), IndexList(Int(64), Int(128), Int(32), __list_literal__=NoneType(None)), Index[Int, Int, Int](Int(128), Int(128), Int(128)), Int(6), Index[Int, Int, Int](Int(2), Int(1), Int(1)), Int(2), True, OptionalReg(Index[Int, Int](Int(8), (GPUInfo.from_family(AcceleratorArchitectureFamily(Int(32), Int(2048), Int(233472), Int(65536), Int(1024)), StringSlice("H100"), Vendor(Int8(2)), StringSlice("cuda"), StringSlice("hopper"), SIMD(9), StringSlice("sm_90a"), Int(132)) // Int(8)))), MatmulSchedule.TILE2D, OptionalReg(None), OptionalReg(None)), __list_literal__=NoneType(None))
llama_8b_fp8_tableβ
comptime llama_8b_fp8_table = Table(llama_8b_fp8_list, String("llama_8b_fp8"))
loggerβ
comptime logger = Logger(stdout, prefix=String(""), source_location=False)
MAX_Mβ
comptime MAX_M = Int.MAX
Functionsβ
Was this page helpful?
Thank you! We'll create more content like this.
Thank you for helping us improve!