device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp Source File#
device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp
Go to the documentation of this file.
float launch_and_time_kernel(const StreamConfig &stream_config, F kernel, dim3 grid_dim, dim3 block_dim, std::size_t lds_byte, Args... args)
Definition host_utility/kernel_launch.hpp:14
float launch_and_time_kernel_with_preprocess(const StreamConfig &stream_config, PreProcessFunc preprocess, F kernel, dim3 grid_dim, dim3 block_dim, std::size_t lds_byte, Args... args)
Definition host_utility/kernel_launch.hpp:91
Definition convolution_backward_data_specialization.hpp:8
auto get_bwd_weight_gemm_sizes(const std::array< index_t, NDimSpatial+3 > &a_g_n_k_wos_lengths, const std::array< index_t, NDimSpatial+3 > &e_g_k_c_xs_lengths)
Definition split_k_utils.hpp:55
ConvolutionBackwardWeightSpecialization
Definition convolution_backward_weight_specialization.hpp:13
@ Filter1x1Stride1Pad0
Definition convolution_backward_weight_specialization.hpp:15
constexpr bool is_GNWC_GKXC_GNWK()
Definition device_grouped_conv_utils.hpp:23
__global__ void kernel_batched_gemm_xdlops_bwd_weight(const FloatA *__restrict__ p_a_grid, const FloatB *__restrict__ p_b_grid, FloatC *__restrict__ p_c_grid, const AElementwiseOperation a_element_op, const BElementwiseOperation b_element_op, const CElementwiseOperation c_element_op, const index_t batch_count, const AGridDesc_B_K0_M_K1 a_b_k0_m_k1_grid_desc, const BGridDesc_B_K0_N_K1 b_b_k0_n_k1_grid_desc, const CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock c_grid_desc_mblock_mperblock_nblock_nperblock, const Block2CTileMap block_2_ctile_map, const ComputePtrOffsetOfBatch compute_ptr_offset_of_batch)
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:50
constexpr bool is_GNDHWC_GKZYXC_GNDHWK()
Definition device_grouped_conv_utils.hpp:88
constexpr bool is_NHWGC_GKYXC_NHWGK()
Definition device_grouped_conv_utils.hpp:40
ck::index_t get_best_occupancy_k_batch_value(int max_occupancy, ck::index_t grid_size)
Definition split_k_utils.hpp:30
constexpr bool is_NDHWGC_GKZYXC_NDHWGK()
Definition device_grouped_conv_utils.hpp:80
std::string getConvBackwardWeightSpecializationString(const ConvolutionBackwardWeightSpecialization &s)
Definition convolution_backward_weight_specialization.hpp:21
ck::index_t calculate_mn_grid_size(ck::index_t gemmM, ck::index_t gemmN)
Definition split_k_utils.hpp:84
constexpr bool is_GNHWC_GKYXC_GNHWK()
Definition device_grouped_conv_utils.hpp:48
Definition convolution_backward_data_specialization.hpp:7
Definition ck.hpp:268
__global__ void kernel_batched_elementwise(const InGridDescTuple in_grid_desc_tuple, const OutGridDescTuple out_grid_desc_tuple, const InDataTypePointerTuple p_in_global_tuple, const OutDataTypePointerTuple p_out_global_tuple, const Block2TileMap block_2_tile_map, const ElementwiseOperation elementwise_op, const index_t batch_count, const std::array< index_t, NumInputs > input_batch_strides, const std::array< index_t, NumOutputs > output_batch_strides)
Definition gridwise_elementwise_2d.hpp:221
__host__ __device__ constexpr auto concat_tuple(const Tuple< X... > &tx, const Tuple< Y... > &ty)
Definition tuple_helper.hpp:52
__host__ __device__ constexpr auto make_naive_tensor_descriptor(const Tuple< Lengths... > &lengths, const Tuple< Strides... > &strides)
Definition tensor_descriptor_helper.hpp:49
__host__ __device__ constexpr auto make_right_pad_transform(const LowLength &low_length, const RightPadLength &right_pad, integral_constant< bool, SkipIsValidCheck >=integral_constant< bool, false >{})
Definition multi_index_transform_helper.hpp:37
typename tuple_element< I, TTuple >::type tuple_element_t
Definition utility/tuple.hpp:208
__device__ uint32_t amd_wave_read_first_lane(uint32_t value)
Definition amd_wave_read_first_lane.hpp:100
__host__ __device__ constexpr Y type_convert(X x)
Definition utility/type_convert.hpp:98
__host__ __device__ constexpr auto generate_sequence_v2(F &&f, Number< N >)
Definition sequence_helper.hpp:25
__host__ __device__ constexpr auto generate_tuple(F &&f, Number< N >)
Definition tuple_helper.hpp:21
typename remove_reference< T >::type remove_reference_t
Definition type.hpp:292
auto accumulate_n(ForwardIterator first, Size count, T init, BinaryOperation op) -> decltype(std::accumulate(first, std::next(first, count), init, op))
Definition library/utility/numeric.hpp:11
__host__ __device__ constexpr auto make_tuple(Xs &&... xs)
Definition utility/tuple.hpp:211
__host__ __device__ constexpr auto transform_tensor_descriptor(const OldTensorDescriptor &old_tensor_desc, const NewTransforms &new_transforms, NewLowerDimensionOldVisibleIdss, NewUpperDimensionNewVisibleIdss)
Definition tensor_description/tensor_descriptor.hpp:319
Definition ck/stream_config.hpp:10
Definition block_to_ctile_map.hpp:261
Definition gridwise_elementwise_2d.hpp:278
Definition gridwise_gemm_xdlops_bwd_weight.hpp:254
__host__ static __device__ constexpr bool CheckValidity(const AGridDesc_K0_M_K1 &a_b_k0_m_k1_grid_desc, const BGridDesc_K0_N_K1 &b_b_k0_n_k1_grid_desc, const CGridDesc_M_N &c_m_n_grid_desc, const Block2CTileMap &block_2_ctile_map)
Definition gridwise_gemm_xdlops_bwd_weight.hpp:544
__host__ static __device__ constexpr auto MakeCBlockClusterAdaptor(const CGridDesc_M_N &c_m_n_grid_desc, index_t M01, index_t N01, index_t KBatch)
Definition gridwise_gemm_xdlops_bwd_weight.hpp:625
__host__ static __device__ constexpr auto MakeCGridDesc_MBlock_MPerBlock_NBlock_NPerBlock(const CGridDesc_M_N &c_m_n_grid_desc)
Definition gridwise_gemm_xdlops_bwd_weight.hpp:608
Definition utility/sequence.hpp:43
Definition utility/tuple.hpp:117
Definition functional2.hpp:33
Definition tensor_operation/operator_transform/transform_conv_bwd_weight_to_gemm.hpp:24
Definition split_k_arg.hpp:11
Definition device_base.hpp:197
void * p_workspace_
Definition device_base.hpp:204
BaseArgument()=default
BaseInvoker()=default
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:558
int GetMaxOccupancy()
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:560
int max_occupancy_
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:603
ActiveWorkgroupsPerCU()
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:585
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:607
std::array< ck::index_t, NDimSpatial > input_spatial_lengths_
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:777
InElementwiseOperation b_element_op_
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:769
BGridDesc_K0_N_K1 b_grid_desc_kbatch_k0_n_k1_
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:755
const index_t Conv_G_
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:773
std::size_t GetWorkspaceSizeBytes() const
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:744
CGridDesc_M_N ce_grid_desc_m_n_
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:756
long_index_t c_space_size_bytes
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:783
const BDataType * p_b_grid_
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:750
const std::array< ck::index_t, NDimSpatial > & input_right_pads_
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:782
index_t N01_
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:766
ComputePtrOffsetOfStridedBatch< I1, I1, NumDTensor > compute_ptr_offset_of_batch_
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:763
Argument(const InDataType *p_in_grid, WeiDataType *p_wei_grid, const OutDataType *p_out_grid, const std::array< const void *, NumDTensor > &p_ds, const std::array< index_t, NDimSpatial+3 > &b_g_n_c_wis_lengths, const std::array< index_t, NDimSpatial+3 > &b_g_n_c_wis_strides, const std::array< index_t, NDimSpatial+3 > &e_g_k_c_xs_lengths, const std::array< index_t, NDimSpatial+3 > &e_g_k_c_xs_strides, const std::array< index_t, NDimSpatial+3 > &a_g_n_k_wos_lengths, const std::array< index_t, NDimSpatial+3 > &a_g_n_k_wos_strides, const std::array< std::array< index_t, NDimSpatial+3 >, NumDTensor > &ds_g_k_c_xs_lengths, const std::array< std::array< index_t, NDimSpatial+3 >, NumDTensor > &ds_g_k_c_xs_strides, const std::array< ck::index_t, NDimSpatial > &conv_filter_strides, const std::array< ck::index_t, NDimSpatial > &conv_filter_dilations, const std::array< ck::index_t, NDimSpatial > &input_left_pads, const std::array< ck::index_t, NDimSpatial > &input_right_pads, const ck::index_t M01, const ck::index_t N01, InElementwiseOperation in_element_op, WeiElementwiseOperation wei_element_op, OutElementwiseOperation out_element_op, ck::index_t split_k)
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:608
std::array< ck::index_t, NDimSpatial > output_spatial_lengths_
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:779
OutElementwiseOperation a_element_op_
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:768
const std::array< ck::index_t, NDimSpatial > & input_left_pads_
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:781
AGridDesc_K0_M_K1 a_grid_desc_kbatch_k0_m_k1_
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:754
ck::tensor_operation::device::DeviceGroupedConvBwdWeightMultipleD_Xdl_CShuffle::Argument::p_ds_grid_
DsGridPointerTuple p_ds_grid_
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:751
const index_t Conv_K_
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:775
const ADataType * p_a_grid_
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:749
EDataType * p_e_grid_
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:752
index_t M01_
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:765
DsGridDesc_M_N ds_grid_descs_tuple_
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:757
std::array< ck::index_t, NDimSpatial > filter_spatial_lengths_
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:778
const std::array< ck::index_t, NDimSpatial > & conv_filter_strides_
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:780
Block2CTileMap block_2_ctile_map_
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:759
const index_t Conv_N_
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:774
WeiElementwiseOperation cde_element_op_
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:770
Block2TileMapElementwise elementwise_block_2_ctile_map_
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:760
const index_t Conv_C_
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:776
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:788
INVOKER_RUN_IMPL float Run(const BaseArgument *p_arg, const StreamConfig &stream_config=StreamConfig{}) override
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:929
float RunImp(const Argument &arg, const StreamConfig &stream_config=StreamConfig{})
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:810
void ShowInfo(const Argument &arg)
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:791
DeviceOp::Argument Argument
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:789
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:171
InDataType BDataType
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:178
static GET_NXDL_PER_WAVE_IMPL constexpr auto NXdlPerWave64
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:174
decltype(concat_tuple(Tuple< const AccDataType * >{}, DsGridPointerTuple{})) CDDataTypes
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:524
static constexpr auto BBlockLdsN1PerBlock
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:223
static constexpr auto conv_to_gemm_transformer
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:199
decltype(MakeDsGridDescriptor_M_N< NDimSpatial >({}, {})) DsGridDesc_M_N
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:521
static constexpr index_t WorkspaceInOutScalarPerVector
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:208
static constexpr auto NXdlPerWave32
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:175
static constexpr auto I5
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:195
static void InitElementwiseBatchStrides(const ComputePtrOffsetOfBatch &compute_ptr_offset_of_batch_, std::array< index_t, NumDTensor+I1 > &input_batch_strides, std::array< index_t, I1 > &output_batch_strides)
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:508
static constexpr index_t ClusterLengthNPerBlock
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:528
static constexpr auto I0
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:190
bool IsSupportedArgument(const BaseArgument *p_arg) override
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:1051
static auto MakeArgument(const InDataType *p_in_grid, WeiDataType *p_wei_grid, const OutDataType *p_out_grid, const std::array< const void *, NumDTensor > &p_ds, const std::array< index_t, NDimSpatial+3 > &b_g_n_c_wis_lengths, const std::array< index_t, NDimSpatial+3 > &b_g_n_c_wis_strides, const std::array< index_t, NDimSpatial+3 > &e_g_k_c_xs_lengths, const std::array< index_t, NDimSpatial+3 > &e_g_k_c_xs_strides, const std::array< index_t, NDimSpatial+3 > &a_g_n_k_wos_lengths, const std::array< index_t, NDimSpatial+3 > &a_g_n_k_wos_strides, const std::array< std::array< index_t, NDimSpatial+3 >, NumDTensor > &ds_g_k_c_xs_lengths, const std::array< std::array< index_t, NDimSpatial+3 >, NumDTensor > &ds_g_k_c_xs_strides, const std::array< ck::index_t, NDimSpatial > &conv_filter_strides, const std::array< ck::index_t, NDimSpatial > &conv_filter_dilations, const std::array< ck::index_t, NDimSpatial > &input_left_pads, const std::array< ck::index_t, NDimSpatial > &input_right_pads, InElementwiseOperation in_element_op, WeiElementwiseOperation wei_element_op, OutElementwiseOperation out_element_op, const ck::index_t split_k)
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:1056
static auto MakeDsGridDescriptor_M_N(const std::array< std::array< index_t, NDim+3 >, NumDTensor > &ds_g_k_c_xs_lengths, const std::array< std::array< index_t, NDim+3 >, NumDTensor > &ds_g_k_c_xs_strides)
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:383
static constexpr auto K1Number
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:197
std::unique_ptr< BaseArgument > MakeArgumentPointer(const void *p_in_grid, void *p_wei_grid, const void *p_out_grid, const std::array< const void *, NumDTensor > &p_ds, const std::array< index_t, NDimSpatial+3 > &b_g_n_c_wis_lengths, const std::array< index_t, NDimSpatial+3 > &b_g_n_c_wis_strides, const std::array< index_t, NDimSpatial+3 > &e_g_k_c_xs_lengths, const std::array< index_t, NDimSpatial+3 > &e_g_k_c_xs_strides, const std::array< index_t, NDimSpatial+3 > &a_g_n_k_wos_lengths, const std::array< index_t, NDimSpatial+3 > &a_g_n_k_wos_strides, const std::array< std::array< index_t, NDimSpatial+3 >, NumDTensor > &ds_g_k_c_xs_lengths, const std::array< std::array< index_t, NDimSpatial+3 >, NumDTensor > &ds_g_k_c_xs_strides, const std::array< ck::index_t, NDimSpatial > &conv_filter_strides, const std::array< ck::index_t, NDimSpatial > &conv_filter_dilations, const std::array< ck::index_t, NDimSpatial > &input_left_pads, const std::array< ck::index_t, NDimSpatial > &input_right_pads, InElementwiseOperation in_element_op, WeiElementwiseOperation wei_element_op, OutElementwiseOperation out_element_op, const ck::index_t split_k) override
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:1104
static constexpr auto ABlockLdsM0PerBlock
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:219
static constexpr auto ElePerBank
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:215
remove_cvref_t< decltype(ABCGridDescs{}[I0])> AGridDesc_K0_M_K1
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:304
static auto MakeInvoker()
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:1102
BlockToCTileMap_M00_N0_M01Adapt< MPerBlock, NPerBlock > Block2TileMapElementwise
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:530
decltype(GetDsGridPointerTuple()) DsGridPointerTuple
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:523
std::string GetTypeString() const override
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:1155
remove_cvref_t< decltype(ABCGridDescs{}[I1])> BGridDesc_K0_N_K1
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:305
decltype(concat_tuple(Tuple< CGridDesc_M_N >{}, DsGridDesc_M_N{})) CDGridDesc_M_N
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:522
static constexpr auto ABlockLdsM1Padding
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:220
static constexpr bool IsValidCompilationParameter()
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:936
static constexpr auto ABlockLdsM1PerBlock
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:218
static constexpr auto I2
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:192
InDataType ABDataType
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:188
GridwiseGemm_bk0mk1_bk0nk1_mn_xdlops_bwd_weight< BlockSize, ADataType, BDataType, AccDataType, AccDataType, InMemoryDataOperationEnum::AtomicAdd, AGridDesc_K0_M_K1, BGridDesc_K0_N_K1, CGridDesc_M_N, AElementwiseOperation, BElementwiseOperation, element_wise::PassThrough, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, K1, MXdlPerWave, NXdlPerWave_, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, false, ABlockLdsAddExtraM, ABlockLdsM1PerBlock, ABlockLdsM0PerBlock, ABlockLdsM1Padding, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, false, BBlockLdsAddExtraN, BBlockLdsN1PerBlock, BBlockLdsN0PerBlock, BBlockLdsN1Padding, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, WorkspaceInOutScalarPerVector, CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, true, true, 1, PipelineVersion::v1, ComputeTypeA, ComputeTypeB > GridwiseGemmBase
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:309
static constexpr index_t MaxScalarPerVectorFP32
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:207
static constexpr auto MakeElementwiseInputSequence()
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:365
static constexpr auto I1
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:191
static constexpr auto I4
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:194
std::unique_ptr< BaseInvoker > MakeInvokerPointer() override
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:1150
static constexpr index_t NumDTensor
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:181
OutElementwiseOperation AElementwiseOperation
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:183
decltype(GridwiseGemm64::MakeCBlockClusterAdaptor(CGridDesc_M_N{}, 1, 1, 1)) Block2CTileMap
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:554
size_t GetWorkSpaceSize(const BaseArgument *p_arg) const override
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:1183
static constexpr auto GetDsGridPointerTuple()
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:372
static constexpr index_t ClusterLengthMPerBlock
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:526
decltype(GridwiseGemm64::MakeCGridDesc_MBlock_MPerBlock_NBlock_NPerBlock(CGridDesc_M_N{})) CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:551
WeiElementwiseOperation CDEElementwiseOperation
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:185
DeviceGroupedConvBwdWeightMultipleD_Xdl_CShuffle DeviceOp
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:172
WeiDataType EDataType
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:179
OutDataType ADataType
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:177
static auto GetABCGridDesc()
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:228
InElementwiseOperation BElementwiseOperation
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:184
static bool IsSupportedArgument(const Argument &arg)
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:942
static constexpr auto BBlockLdsN1Padding
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:225
GridwiseElementwise< CDGridDesc_M_N, Tuple< EGridDesc_M_N >, CDDataTypes, Tuple< EDataType * >, Block2TileMapElementwise, CDEElementwiseOperation, BlockSize, MPerBlock, NPerBlock, MPerBlock/ClusterLengthMPerBlock, NPerBlock/ClusterLengthNPerBlock, Sequence< 0, 1 >, decltype(MakeElementwiseInputSequence()), Sequence< CBlockTransferScalarPerVector_NWaveNPerXdl >, I1, I1 > GridwiseElementwise
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:532
GridwiseGemmBase< NXdlPerWave32 > GridwiseGemm32
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:363
remove_cvref_t< decltype(ABCGridDescs{}[I2])> CGridDesc_M_N
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:306
static constexpr auto BBlockLdsN0PerBlock
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:224
decltype(GetABCGridDesc< NDimSpatial >()) ABCGridDescs
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:302
CGridDesc_M_N EGridDesc_M_N
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:525
GridwiseGemmBase< math::max(NXdlPerWave64, 1)> GridwiseGemm64
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:362
void SetWorkSpacePointer(BaseArgument *p_arg, void *p_workspace, const StreamConfig &=StreamConfig{}) const override
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:1196
static constexpr auto BankLength
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:214
static constexpr auto I3
Definition device_grouped_conv_bwd_weight_multiple_d_xdl_cshuffle.hpp:193
Definition device_grouped_conv_bwd_weight_multiple_d.hpp:31
Definition tensor_operation/gpu/element/unary_element_wise_operation.hpp:340