Skip to main content

Mojo struct

ConvDirectNHWC

struct ConvDirectNHWC[input_mut: Bool, filter_mut: Bool, conv_attr_rank: Int, //, input_layout: Layout, filter_layout: Layout, output_layout: Layout, input_origin: Origin[input_mut], filter_origin: Origin[filter_mut], output_origin: MutOrigin, input_type: DType, filter_type: DType, output_type: DType, filter_packed: Bool, conv_attr: ConvInfoStatic[conv_attr_rank], elementwise_epilogue: OptionalReg[fn[rank: Int](coords: IndexList[rank], f_size: Int) capturing -> None] = None]

Implement the outer loops for direct convolution. Collapse N, HO, WO into one dimension n_ho_wo. Tile n_ho_wo, C, and F. The tile factor for C and F are chosen by a heuristic prioritizing C. n_ho_wo is tiled by micro kernel's height.

If n_ho_wo is large enough to spill LLC, we may need to tile n_ho_wo as the outer most loop with a factor fit in LLC.

Assume F is divisible at least by simd_size.

Fields

  • output (LayoutTensor[output_type, output_layout, output_origin]):
  • input (LayoutTensor[input_type, input_layout, input_origin]):
  • filter (LayoutTensor[filter_type, filter_layout, filter_origin]):
  • conv_shape (ConvShape[conv_attr_rank]):
  • partition (ConvPartition):
  • cf_tile_size (IndexList[2]):

Implemented traits

AnyType, Copyable, ImplicitlyCopyable, Movable, UnknownDestructibility

Aliases

__copyinit__is_trivial

alias __copyinit__is_trivial = True

__del__is_trivial

alias __del__is_trivial = True

__moveinit__is_trivial

alias __moveinit__is_trivial = True

packed_and_fully_static

alias packed_and_fully_static = filter_packed if filter_layout.shape.all_known() if output_layout.shape.all_known[1, output_layout.rank()]() if input_layout.shape.all_known[1, input_layout.rank()]() if conv_attr.all_known[conv_attr_rank]() else conv_attr.all_known[conv_attr_rank]() else input_layout.shape.all_known[1, input_layout.rank()]() if conv_attr.all_known[conv_attr_rank]() else conv_attr.all_known[conv_attr_rank]() else output_layout.shape.all_known[1, output_layout.rank()]() if input_layout.shape.all_known[1, input_layout.rank()]() if conv_attr.all_known[conv_attr_rank]() else conv_attr.all_known[conv_attr_rank]() else input_layout.shape.all_known[1, input_layout.rank()]() if conv_attr.all_known[conv_attr_rank]() else conv_attr.all_known[conv_attr_rank]() else filter_layout.shape.all_known() if output_layout.shape.all_known[1, output_layout.rank()]() if input_layout.shape.all_known[1, input_layout.rank()]() if conv_attr.all_known[conv_attr_rank]() else conv_attr.all_known[conv_attr_rank]() else input_layout.shape.all_known[1, input_layout.rank()]() if conv_attr.all_known[conv_attr_rank]() else conv_attr.all_known[conv_attr_rank]() else output_layout.shape.all_known[1, output_layout.rank()]() if input_layout.shape.all_known[1, input_layout.rank()]() if conv_attr.all_known[conv_attr_rank]() else conv_attr.all_known[conv_attr_rank]() else input_layout.shape.all_known[1, input_layout.rank()]() if conv_attr.all_known[conv_attr_rank]() else conv_attr.all_known[conv_attr_rank]()

Methods

run

static run(output: LayoutTensor[output_type, output_layout, output_origin], input: LayoutTensor[input_type, input_layout, input_origin], filter: LayoutTensor[filter_type, filter_layout, filter_origin], conv_shape: ConvShape[conv_attr_rank])

is_new_c_accum

is_new_c_accum(self, c_idx: Int) -> Bool

Returns:

Bool

update_output_tile_no_padding

update_output_tile_no_padding[micro_kernel_height: Int, micro_kernel_width: Int, c_fully_cached: Bool, has_residual: Bool, last_c_tile: Bool](self, n: Int, f_tile_offset: Int, f_tile_size: Int, c_tile_offset: Int, c_tile_size: Int, output_flat_coord: Int)

output_space_flat_loop

output_space_flat_loop[micro_kernel_f_size: Int, has_residual: Bool, last_c_tile: Bool](self, n: Int, f_tile_offset: Int, f_tile_size: Int, c_tile_offset: Int, c_tile_size: Int)

output_space_loop

output_space_loop[micro_kernel_height: Int, micro_kernel_width: Int, has_residual: Bool, last_c_tile: Bool](self, n: Int, f_tile_offset: Int, f_tile_size: Int, c_tile_offset: Int, c_tile_size: Int)

output_space_loop_1d

output_space_loop_1d[micro_kernel_height: Int, micro_kernel_width: Int, has_residual: Bool, last_c_tile: Bool, output_dt: DType, input_dt: DType, filter_dt: DType](self, output: LegacyUnsafePointer[Scalar[output_dt]], input: LegacyUnsafePointer[Scalar[input_dt]], filter: LegacyUnsafePointer[Scalar[filter_dt]], n: Int, first_c_tile_in_group: Bool, c_tile_size: Int, f_tile_offset: Int, f_tile_size: Int, left_pad_impact_end: Int, right_pad_impact_start: Int)

output_space_loop_2d

output_space_loop_2d[micro_kernel_height: Int, micro_kernel_width: Int, has_residual: Bool, last_c_tile: Bool, output_dt: DType, input_dt: DType, filter_dt: DType](self, output: LegacyUnsafePointer[Scalar[output_dt]], input: LegacyUnsafePointer[Scalar[input_dt]], filter: LegacyUnsafePointer[Scalar[filter_dt]], n: Int, first_c_tile_in_group: Bool, c_tile_size: Int, f_tile_offset: Int, f_tile_size: Int, left_pad_impact_end: Int, right_pad_impact_start: Int)

output_space_loop_3d

output_space_loop_3d[micro_kernel_height: Int, micro_kernel_width: Int, has_residual: Bool, last_c_tile: Bool, output_dt: DType, input_dt: DType, filter_dt: DType](self, output: LegacyUnsafePointer[Scalar[output_dt]], input: LegacyUnsafePointer[Scalar[input_dt]], filter: LegacyUnsafePointer[Scalar[filter_dt]], n: Int, first_c_tile_in_group: Bool, c_tile_size: Int, f_tile_offset: Int, f_tile_size: Int, left_pad_impact_end: Int, right_pad_impact_start: Int)

Was this page helpful?