Skip to main content

Mojo function

warp_specialized_matmul

warp_specialized_matmul[in_type: DType, out_type: DType, a_layout: Layout, b_layout: Layout, c_layout: Layout, BM: Int, BN: Int, BK: Int, WM: Int, WN: Int, WK: Int, a_producer_warps: Int = 1, b_producer_warps: Int = 1, consumer_warps: Int = 1, pipeline_stages: Int = 1](a: LayoutTensor[in_type, a_layout, MutAnyOrigin, address_space=AddressSpace.GLOBAL], b: LayoutTensor[in_type, b_layout, MutAnyOrigin, address_space=AddressSpace.GLOBAL], c: LayoutTensor[out_type, c_layout, MutAnyOrigin, address_space=AddressSpace.GLOBAL])

warp_specialized_matmul[M: Int, N: Int, K: Int, BM: Int, BN: Int, BK: Int, WM: Int, WN: Int, WK: Int, a_producer_warps: Int, b_producer_warps: Int, consumer_warps: Int, pipeline_stages: Int = 1](a_device_tensor: LayoutTensor[DType.bfloat16, Layout.row_major(M, K), origin], b_device_tensor: LayoutTensor[DType.bfloat16, Layout.row_major(N, K), origin], c_device_tensor: LayoutTensor[DType.float32, Layout.row_major(M, N), origin], ctx: DeviceContext)

Was this page helpful?