DeviceBatchNormFwdImpl< XDataType, YDataType, AccDataType, ScaleDataType, BiasDataType, MeanVarDataType, YElementwiseOp, Rank, NumBatchNormReduceDim, UseMultiblockInK, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcYDstVectorDim, XSrcVectorSize, YDstVectorSize, ScaleSrcVectorSize, BiasSrcVectorSize, MeanVarSrcDstVectorSize > Struct Template Reference

DeviceBatchNormFwdImpl&lt; XDataType, YDataType, AccDataType, ScaleDataType, BiasDataType, MeanVarDataType, YElementwiseOp, Rank, NumBatchNormReduceDim, UseMultiblockInK, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcYDstVectorDim, XSrcVectorSize, YDstVectorSize, ScaleSrcVectorSize, BiasSrcVectorSize, MeanVarSrcDstVectorSize &gt; Struct Template Reference#

Composable Kernel: ck::tensor_operation::device::DeviceBatchNormFwdImpl< XDataType, YDataType, AccDataType, ScaleDataType, BiasDataType, MeanVarDataType, YElementwiseOp, Rank, NumBatchNormReduceDim, UseMultiblockInK, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcYDstVectorDim, XSrcVectorSize, YDstVectorSize, ScaleSrcVectorSize, BiasSrcVectorSize, MeanVarSrcDstVectorSize > Struct Template Reference
ck::tensor_operation::device::DeviceBatchNormFwdImpl< XDataType, YDataType, AccDataType, ScaleDataType, BiasDataType, MeanVarDataType, YElementwiseOp, Rank, NumBatchNormReduceDim, UseMultiblockInK, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcYDstVectorDim, XSrcVectorSize, YDstVectorSize, ScaleSrcVectorSize, BiasSrcVectorSize, MeanVarSrcDstVectorSize > Struct Template Reference

#include <device_batchnorm_forward_impl.hpp>

Inheritance diagram for ck::tensor_operation::device::DeviceBatchNormFwdImpl< XDataType, YDataType, AccDataType, ScaleDataType, BiasDataType, MeanVarDataType, YElementwiseOp, Rank, NumBatchNormReduceDim, UseMultiblockInK, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcYDstVectorDim, XSrcVectorSize, YDstVectorSize, ScaleSrcVectorSize, BiasSrcVectorSize, MeanVarSrcDstVectorSize >:
ck::tensor_operation::device::DeviceBatchNormFwd< XDataType, YDataType, AccDataType, ScaleDataType, BiasDataType, MeanVarDataType, YElementwiseOp, Rank, NumBatchNormReduceDim > ck::tensor_operation::device::DeviceBatchNormFwd< XDataType, YDataType, AccDataType, ScaleDataType, BiasDataType, MeanVarDataType, YElementwiseOp, Rank, NumBatchNormReduceDim > ck::tensor_operation::device::BaseOperator ck::tensor_operation::device::BaseOperator

Classes

struct  Argument
struct  Invoker

Public Types

using XYGridDesc_M_K = decltype(MakeXY2dDescriptor({1}, {1}, 1, 1))
using ScaleBiasMeanVarGridDesc_M = decltype(MakeScaleBiasMeanVar1dDescriptor({1}, {1}))
using XYGridDesc_M_K = decltype(MakeXY2dDescriptor({1}, {1}, 1, 1))
using ScaleBiasMeanVarGridDesc_M = decltype(MakeScaleBiasMeanVar1dDescriptor({1}, {1}))

Public Member Functions

size_t GetWorkSpaceSize (const BaseArgument *pArg) const override
void SetWorkSpacePointer (BaseArgument *pArg, void *p_workspace, const StreamConfig &=StreamConfig{}) const override
bool IsSupportedArgument (const BaseArgument *pArg) override
std::unique_ptr< BaseArgumentMakeArgumentPointer (const std::array< index_t, Rank > xyLengths, const std::array< index_t, Rank > xStrides, const std::array< index_t, Rank > yStrides, const std::array< int, NumBatchNormReduceDim > reduceDims, const std::array< index_t, Rank - NumBatchNormReduceDim > bnScaleBiasMeanVarLengths, const std::array< index_t, Rank - NumBatchNormReduceDim > bnScaleStrides, const std::array< index_t, Rank - NumBatchNormReduceDim > bnBiasStrides, const std::array< index_t, Rank - NumBatchNormReduceDim > bnMeanVarStrides, const void *p_x, const void *p_scale, const void *p_bias, double epsilon, const YElementwiseOp y_elementwise_op, void *p_y, void *resultSaveMean, void *resultSaveInvVariance, double averageFactor, void *resultRunningMean, void *resultRunningVariance) override
std::unique_ptr< BaseInvokerMakeInvokerPointer () override
std::string GetTypeString () const override
size_t GetWorkSpaceSize (const BaseArgument *pArg) const override
void SetWorkSpacePointer (BaseArgument *pArg, void *p_workspace, const StreamConfig &=StreamConfig{}) const override
bool IsSupportedArgument (const BaseArgument *pArg) override
std::unique_ptr< BaseArgumentMakeArgumentPointer (const std::array< index_t, Rank > xyLengths, const std::array< index_t, Rank > xStrides, const std::array< index_t, Rank > yStrides, const std::array< int, NumBatchNormReduceDim > reduceDims, const std::array< index_t, Rank - NumBatchNormReduceDim > bnScaleBiasMeanVarLengths, const std::array< index_t, Rank - NumBatchNormReduceDim > bnScaleStrides, const std::array< index_t, Rank - NumBatchNormReduceDim > bnBiasStrides, const std::array< index_t, Rank - NumBatchNormReduceDim > bnMeanVarStrides, const void *p_x, const void *p_scale, const void *p_bias, double epsilon, const YElementwiseOp y_elementwise_op, void *p_y, void *resultSaveMean, void *resultSaveInvVariance, double averageFactor, void *resultRunningMean, void *resultRunningVariance) override
std::unique_ptr< BaseInvokerMakeInvokerPointer () override
std::string GetTypeString () const override
Public Member Functions inherited from ck::tensor_operation::device::BaseOperator
 BaseOperator ()=default
 BaseOperator (const BaseOperator &)=default
BaseOperatoroperator= (const BaseOperator &)=default
virtual std::string GetInstanceString () const
virtual std::string GetTypeIdName () const
virtual std::optional< std::string > GetObjectName () const
virtual std::optional< std::string > GetTemplateInfo () const
virtual std::string GetTypeIdHashCode () const
virtual ~BaseOperator ()

Static Public Member Functions

static auto MakeXY2dDescriptor (const std::array< index_t, Rank > &xyLengths, const std::array< index_t, Rank > &xyStrides, int blkGroupSize, int numBlockTileIteration)
static auto MakeMeanVarCountOutputMG2dDescriptor (int invariantLength, int blkGroupSize)
static auto MakeMeanVarCountInputMK2dDescriptor (int invariantLength, int blkGroupSize)
static auto MakeScaleBiasMeanVar1dDescriptor (const std::array< index_t, NumInvariantDim > &lengths, const std::array< index_t, NumInvariantDim > &strides)
static auto MakeXY2dDescriptor (const std::array< index_t, Rank > &xyLengths, const std::array< index_t, Rank > &xyStrides, int blkGroupSize, int numBlockTileIteration)
static auto MakeMeanVarCountOutputMG2dDescriptor (int invariantLength, int blkGroupSize)
static auto MakeMeanVarCountInputMK2dDescriptor (int invariantLength, int blkGroupSize)
static auto MakeScaleBiasMeanVar1dDescriptor (const std::array< index_t, NumInvariantDim > &lengths, const std::array< index_t, NumInvariantDim > &strides)

Static Public Attributes

static constexpr index_t NumInvariantDim = Rank - NumBatchNormReduceDim
static constexpr index_t M_BlockTileSize = MThreadClusterSize * MThreadSliceSize
static constexpr index_t K_BlockTileSize = KThreadClusterSize * KThreadSliceSize

Member Typedef Documentation

◆ ScaleBiasMeanVarGridDesc_M [1/2]

template<typename XDataType, typename YDataType, typename AccDataType, typename ScaleDataType, typename BiasDataType, typename MeanVarDataType, typename YElementwiseOp, index_t Rank, index_t NumBatchNormReduceDim, bool UseMultiblockInK, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcYDstVectorDim, index_t XSrcVectorSize, index_t YDstVectorSize, index_t ScaleSrcVectorSize, index_t BiasSrcVectorSize, index_t MeanVarSrcDstVectorSize>
using ck::tensor_operation::device::DeviceBatchNormFwdImpl< XDataType, YDataType, AccDataType, ScaleDataType, BiasDataType, MeanVarDataType, YElementwiseOp, Rank, NumBatchNormReduceDim, UseMultiblockInK, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcYDstVectorDim, XSrcVectorSize, YDstVectorSize, ScaleSrcVectorSize, BiasSrcVectorSize, MeanVarSrcDstVectorSize >::ScaleBiasMeanVarGridDesc_M = decltype(MakeScaleBiasMeanVar1dDescriptor({1}, {1}))

◆ ScaleBiasMeanVarGridDesc_M [2/2]

template<typename XDataType, typename YDataType, typename AccDataType, typename ScaleDataType, typename BiasDataType, typename MeanVarDataType, typename YElementwiseOp, index_t Rank, index_t NumBatchNormReduceDim, bool UseMultiblockInK, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcYDstVectorDim, index_t XSrcVectorSize, index_t YDstVectorSize, index_t ScaleSrcVectorSize, index_t BiasSrcVectorSize, index_t MeanVarSrcDstVectorSize>
using ck::tensor_operation::device::DeviceBatchNormFwdImpl< XDataType, YDataType, AccDataType, ScaleDataType, BiasDataType, MeanVarDataType, YElementwiseOp, Rank, NumBatchNormReduceDim, UseMultiblockInK, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcYDstVectorDim, XSrcVectorSize, YDstVectorSize, ScaleSrcVectorSize, BiasSrcVectorSize, MeanVarSrcDstVectorSize >::ScaleBiasMeanVarGridDesc_M = decltype(MakeScaleBiasMeanVar1dDescriptor({1}, {1}))

◆ XYGridDesc_M_K [1/2]

template<typename XDataType, typename YDataType, typename AccDataType, typename ScaleDataType, typename BiasDataType, typename MeanVarDataType, typename YElementwiseOp, index_t Rank, index_t NumBatchNormReduceDim, bool UseMultiblockInK, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcYDstVectorDim, index_t XSrcVectorSize, index_t YDstVectorSize, index_t ScaleSrcVectorSize, index_t BiasSrcVectorSize, index_t MeanVarSrcDstVectorSize>
using ck::tensor_operation::device::DeviceBatchNormFwdImpl< XDataType, YDataType, AccDataType, ScaleDataType, BiasDataType, MeanVarDataType, YElementwiseOp, Rank, NumBatchNormReduceDim, UseMultiblockInK, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcYDstVectorDim, XSrcVectorSize, YDstVectorSize, ScaleSrcVectorSize, BiasSrcVectorSize, MeanVarSrcDstVectorSize >::XYGridDesc_M_K = decltype(MakeXY2dDescriptor({1}, {1}, 1, 1))

◆ XYGridDesc_M_K [2/2]

template<typename XDataType, typename YDataType, typename AccDataType, typename ScaleDataType, typename BiasDataType, typename MeanVarDataType, typename YElementwiseOp, index_t Rank, index_t NumBatchNormReduceDim, bool UseMultiblockInK, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcYDstVectorDim, index_t XSrcVectorSize, index_t YDstVectorSize, index_t ScaleSrcVectorSize, index_t BiasSrcVectorSize, index_t MeanVarSrcDstVectorSize>
using ck::tensor_operation::device::DeviceBatchNormFwdImpl< XDataType, YDataType, AccDataType, ScaleDataType, BiasDataType, MeanVarDataType, YElementwiseOp, Rank, NumBatchNormReduceDim, UseMultiblockInK, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcYDstVectorDim, XSrcVectorSize, YDstVectorSize, ScaleSrcVectorSize, BiasSrcVectorSize, MeanVarSrcDstVectorSize >::XYGridDesc_M_K = decltype(MakeXY2dDescriptor({1}, {1}, 1, 1))

Member Function Documentation

◆ GetTypeString() [1/2]

template<typename XDataType, typename YDataType, typename AccDataType, typename ScaleDataType, typename BiasDataType, typename MeanVarDataType, typename YElementwiseOp, index_t Rank, index_t NumBatchNormReduceDim, bool UseMultiblockInK, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcYDstVectorDim, index_t XSrcVectorSize, index_t YDstVectorSize, index_t ScaleSrcVectorSize, index_t BiasSrcVectorSize, index_t MeanVarSrcDstVectorSize>
std::string ck::tensor_operation::device::DeviceBatchNormFwdImpl< XDataType, YDataType, AccDataType, ScaleDataType, BiasDataType, MeanVarDataType, YElementwiseOp, Rank, NumBatchNormReduceDim, UseMultiblockInK, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcYDstVectorDim, XSrcVectorSize, YDstVectorSize, ScaleSrcVectorSize, BiasSrcVectorSize, MeanVarSrcDstVectorSize >::GetTypeString ( ) const
inlineoverridevirtual

◆ GetTypeString() [2/2]

template<typename XDataType, typename YDataType, typename AccDataType, typename ScaleDataType, typename BiasDataType, typename MeanVarDataType, typename YElementwiseOp, index_t Rank, index_t NumBatchNormReduceDim, bool UseMultiblockInK, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcYDstVectorDim, index_t XSrcVectorSize, index_t YDstVectorSize, index_t ScaleSrcVectorSize, index_t BiasSrcVectorSize, index_t MeanVarSrcDstVectorSize>
std::string ck::tensor_operation::device::DeviceBatchNormFwdImpl< XDataType, YDataType, AccDataType, ScaleDataType, BiasDataType, MeanVarDataType, YElementwiseOp, Rank, NumBatchNormReduceDim, UseMultiblockInK, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcYDstVectorDim, XSrcVectorSize, YDstVectorSize, ScaleSrcVectorSize, BiasSrcVectorSize, MeanVarSrcDstVectorSize >::GetTypeString ( ) const
inlineoverridevirtual

◆ GetWorkSpaceSize() [1/2]

template<typename XDataType, typename YDataType, typename AccDataType, typename ScaleDataType, typename BiasDataType, typename MeanVarDataType, typename YElementwiseOp, index_t Rank, index_t NumBatchNormReduceDim, bool UseMultiblockInK, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcYDstVectorDim, index_t XSrcVectorSize, index_t YDstVectorSize, index_t ScaleSrcVectorSize, index_t BiasSrcVectorSize, index_t MeanVarSrcDstVectorSize>
size_t ck::tensor_operation::device::DeviceBatchNormFwdImpl< XDataType, YDataType, AccDataType, ScaleDataType, BiasDataType, MeanVarDataType, YElementwiseOp, Rank, NumBatchNormReduceDim, UseMultiblockInK, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcYDstVectorDim, XSrcVectorSize, YDstVectorSize, ScaleSrcVectorSize, BiasSrcVectorSize, MeanVarSrcDstVectorSize >::GetWorkSpaceSize ( const BaseArgument * pArg) const
inlineoverridevirtual

◆ GetWorkSpaceSize() [2/2]

template<typename XDataType, typename YDataType, typename AccDataType, typename ScaleDataType, typename BiasDataType, typename MeanVarDataType, typename YElementwiseOp, index_t Rank, index_t NumBatchNormReduceDim, bool UseMultiblockInK, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcYDstVectorDim, index_t XSrcVectorSize, index_t YDstVectorSize, index_t ScaleSrcVectorSize, index_t BiasSrcVectorSize, index_t MeanVarSrcDstVectorSize>
size_t ck::tensor_operation::device::DeviceBatchNormFwdImpl< XDataType, YDataType, AccDataType, ScaleDataType, BiasDataType, MeanVarDataType, YElementwiseOp, Rank, NumBatchNormReduceDim, UseMultiblockInK, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcYDstVectorDim, XSrcVectorSize, YDstVectorSize, ScaleSrcVectorSize, BiasSrcVectorSize, MeanVarSrcDstVectorSize >::GetWorkSpaceSize ( const BaseArgument * pArg) const
inlineoverridevirtual

◆ IsSupportedArgument() [1/2]

template<typename XDataType, typename YDataType, typename AccDataType, typename ScaleDataType, typename BiasDataType, typename MeanVarDataType, typename YElementwiseOp, index_t Rank, index_t NumBatchNormReduceDim, bool UseMultiblockInK, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcYDstVectorDim, index_t XSrcVectorSize, index_t YDstVectorSize, index_t ScaleSrcVectorSize, index_t BiasSrcVectorSize, index_t MeanVarSrcDstVectorSize>
bool ck::tensor_operation::device::DeviceBatchNormFwdImpl< XDataType, YDataType, AccDataType, ScaleDataType, BiasDataType, MeanVarDataType, YElementwiseOp, Rank, NumBatchNormReduceDim, UseMultiblockInK, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcYDstVectorDim, XSrcVectorSize, YDstVectorSize, ScaleSrcVectorSize, BiasSrcVectorSize, MeanVarSrcDstVectorSize >::IsSupportedArgument ( const BaseArgument * pArg)
inlineoverridevirtual

◆ IsSupportedArgument() [2/2]

template<typename XDataType, typename YDataType, typename AccDataType, typename ScaleDataType, typename BiasDataType, typename MeanVarDataType, typename YElementwiseOp, index_t Rank, index_t NumBatchNormReduceDim, bool UseMultiblockInK, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcYDstVectorDim, index_t XSrcVectorSize, index_t YDstVectorSize, index_t ScaleSrcVectorSize, index_t BiasSrcVectorSize, index_t MeanVarSrcDstVectorSize>
bool ck::tensor_operation::device::DeviceBatchNormFwdImpl< XDataType, YDataType, AccDataType, ScaleDataType, BiasDataType, MeanVarDataType, YElementwiseOp, Rank, NumBatchNormReduceDim, UseMultiblockInK, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcYDstVectorDim, XSrcVectorSize, YDstVectorSize, ScaleSrcVectorSize, BiasSrcVectorSize, MeanVarSrcDstVectorSize >::IsSupportedArgument ( const BaseArgument * pArg)
inlineoverridevirtual

◆ MakeArgumentPointer() [1/2]

template<typename XDataType, typename YDataType, typename AccDataType, typename ScaleDataType, typename BiasDataType, typename MeanVarDataType, typename YElementwiseOp, index_t Rank, index_t NumBatchNormReduceDim, bool UseMultiblockInK, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcYDstVectorDim, index_t XSrcVectorSize, index_t YDstVectorSize, index_t ScaleSrcVectorSize, index_t BiasSrcVectorSize, index_t MeanVarSrcDstVectorSize>
std::unique_ptr< BaseArgument > ck::tensor_operation::device::DeviceBatchNormFwdImpl< XDataType, YDataType, AccDataType, ScaleDataType, BiasDataType, MeanVarDataType, YElementwiseOp, Rank, NumBatchNormReduceDim, UseMultiblockInK, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcYDstVectorDim, XSrcVectorSize, YDstVectorSize, ScaleSrcVectorSize, BiasSrcVectorSize, MeanVarSrcDstVectorSize >::MakeArgumentPointer ( const std::array< index_t, Rank > xyLengths,
const std::array< index_t, Rank > xStrides,
const std::array< index_t, Rank > yStrides,
const std::array< int, NumBatchNormReduceDim > reduceDims,
const std::array< index_t, Rank - NumBatchNormReduceDim > bnScaleBiasMeanVarLengths,
const std::array< index_t, Rank - NumBatchNormReduceDim > bnScaleStrides,
const std::array< index_t, Rank - NumBatchNormReduceDim > bnBiasStrides,
const std::array< index_t, Rank - NumBatchNormReduceDim > bnMeanVarStrides,
const void * p_x,
const void * p_scale,
const void * p_bias,
double epsilon,
const YElementwiseOp y_elementwise_op,
void * p_y,
void * resultSaveMean,
void * resultSaveInvVariance,
double averageFactor,
void * resultRunningMean,
void * resultRunningVariance )
inlineoverridevirtual

◆ MakeArgumentPointer() [2/2]

template<typename XDataType, typename YDataType, typename AccDataType, typename ScaleDataType, typename BiasDataType, typename MeanVarDataType, typename YElementwiseOp, index_t Rank, index_t NumBatchNormReduceDim, bool UseMultiblockInK, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcYDstVectorDim, index_t XSrcVectorSize, index_t YDstVectorSize, index_t ScaleSrcVectorSize, index_t BiasSrcVectorSize, index_t MeanVarSrcDstVectorSize>
std::unique_ptr< BaseArgument > ck::tensor_operation::device::DeviceBatchNormFwdImpl< XDataType, YDataType, AccDataType, ScaleDataType, BiasDataType, MeanVarDataType, YElementwiseOp, Rank, NumBatchNormReduceDim, UseMultiblockInK, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcYDstVectorDim, XSrcVectorSize, YDstVectorSize, ScaleSrcVectorSize, BiasSrcVectorSize, MeanVarSrcDstVectorSize >::MakeArgumentPointer ( const std::array< index_t, Rank > xyLengths,
const std::array< index_t, Rank > xStrides,
const std::array< index_t, Rank > yStrides,
const std::array< int, NumBatchNormReduceDim > reduceDims,
const std::array< index_t, Rank - NumBatchNormReduceDim > bnScaleBiasMeanVarLengths,
const std::array< index_t, Rank - NumBatchNormReduceDim > bnScaleStrides,
const std::array< index_t, Rank - NumBatchNormReduceDim > bnBiasStrides,
const std::array< index_t, Rank - NumBatchNormReduceDim > bnMeanVarStrides,
const void * p_x,
const void * p_scale,
const void * p_bias,
double epsilon,
const YElementwiseOp y_elementwise_op,
void * p_y,
void * resultSaveMean,
void * resultSaveInvVariance,
double averageFactor,
void * resultRunningMean,
void * resultRunningVariance )
inlineoverridevirtual

◆ MakeInvokerPointer() [1/2]

template<typename XDataType, typename YDataType, typename AccDataType, typename ScaleDataType, typename BiasDataType, typename MeanVarDataType, typename YElementwiseOp, index_t Rank, index_t NumBatchNormReduceDim, bool UseMultiblockInK, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcYDstVectorDim, index_t XSrcVectorSize, index_t YDstVectorSize, index_t ScaleSrcVectorSize, index_t BiasSrcVectorSize, index_t MeanVarSrcDstVectorSize>
std::unique_ptr< BaseInvoker > ck::tensor_operation::device::DeviceBatchNormFwdImpl< XDataType, YDataType, AccDataType, ScaleDataType, BiasDataType, MeanVarDataType, YElementwiseOp, Rank, NumBatchNormReduceDim, UseMultiblockInK, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcYDstVectorDim, XSrcVectorSize, YDstVectorSize, ScaleSrcVectorSize, BiasSrcVectorSize, MeanVarSrcDstVectorSize >::MakeInvokerPointer ( )
inlineoverridevirtual

◆ MakeInvokerPointer() [2/2]

template<typename XDataType, typename YDataType, typename AccDataType, typename ScaleDataType, typename BiasDataType, typename MeanVarDataType, typename YElementwiseOp, index_t Rank, index_t NumBatchNormReduceDim, bool UseMultiblockInK, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcYDstVectorDim, index_t XSrcVectorSize, index_t YDstVectorSize, index_t ScaleSrcVectorSize, index_t BiasSrcVectorSize, index_t MeanVarSrcDstVectorSize>
std::unique_ptr< BaseInvoker > ck::tensor_operation::device::DeviceBatchNormFwdImpl< XDataType, YDataType, AccDataType, ScaleDataType, BiasDataType, MeanVarDataType, YElementwiseOp, Rank, NumBatchNormReduceDim, UseMultiblockInK, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcYDstVectorDim, XSrcVectorSize, YDstVectorSize, ScaleSrcVectorSize, BiasSrcVectorSize, MeanVarSrcDstVectorSize >::MakeInvokerPointer ( )
inlineoverridevirtual

◆ MakeMeanVarCountInputMK2dDescriptor() [1/2]

template<typename XDataType, typename YDataType, typename AccDataType, typename ScaleDataType, typename BiasDataType, typename MeanVarDataType, typename YElementwiseOp, index_t Rank, index_t NumBatchNormReduceDim, bool UseMultiblockInK, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcYDstVectorDim, index_t XSrcVectorSize, index_t YDstVectorSize, index_t ScaleSrcVectorSize, index_t BiasSrcVectorSize, index_t MeanVarSrcDstVectorSize>
auto ck::tensor_operation::device::DeviceBatchNormFwdImpl< XDataType, YDataType, AccDataType, ScaleDataType, BiasDataType, MeanVarDataType, YElementwiseOp, Rank, NumBatchNormReduceDim, UseMultiblockInK, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcYDstVectorDim, XSrcVectorSize, YDstVectorSize, ScaleSrcVectorSize, BiasSrcVectorSize, MeanVarSrcDstVectorSize >::MakeMeanVarCountInputMK2dDescriptor ( int invariantLength,
int blkGroupSize )
inlinestatic

◆ MakeMeanVarCountInputMK2dDescriptor() [2/2]

template<typename XDataType, typename YDataType, typename AccDataType, typename ScaleDataType, typename BiasDataType, typename MeanVarDataType, typename YElementwiseOp, index_t Rank, index_t NumBatchNormReduceDim, bool UseMultiblockInK, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcYDstVectorDim, index_t XSrcVectorSize, index_t YDstVectorSize, index_t ScaleSrcVectorSize, index_t BiasSrcVectorSize, index_t MeanVarSrcDstVectorSize>
auto ck::tensor_operation::device::DeviceBatchNormFwdImpl< XDataType, YDataType, AccDataType, ScaleDataType, BiasDataType, MeanVarDataType, YElementwiseOp, Rank, NumBatchNormReduceDim, UseMultiblockInK, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcYDstVectorDim, XSrcVectorSize, YDstVectorSize, ScaleSrcVectorSize, BiasSrcVectorSize, MeanVarSrcDstVectorSize >::MakeMeanVarCountInputMK2dDescriptor ( int invariantLength,
int blkGroupSize )
inlinestatic

◆ MakeMeanVarCountOutputMG2dDescriptor() [1/2]

template<typename XDataType, typename YDataType, typename AccDataType, typename ScaleDataType, typename BiasDataType, typename MeanVarDataType, typename YElementwiseOp, index_t Rank, index_t NumBatchNormReduceDim, bool UseMultiblockInK, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcYDstVectorDim, index_t XSrcVectorSize, index_t YDstVectorSize, index_t ScaleSrcVectorSize, index_t BiasSrcVectorSize, index_t MeanVarSrcDstVectorSize>
auto ck::tensor_operation::device::DeviceBatchNormFwdImpl< XDataType, YDataType, AccDataType, ScaleDataType, BiasDataType, MeanVarDataType, YElementwiseOp, Rank, NumBatchNormReduceDim, UseMultiblockInK, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcYDstVectorDim, XSrcVectorSize, YDstVectorSize, ScaleSrcVectorSize, BiasSrcVectorSize, MeanVarSrcDstVectorSize >::MakeMeanVarCountOutputMG2dDescriptor ( int invariantLength,
int blkGroupSize )
inlinestatic

◆ MakeMeanVarCountOutputMG2dDescriptor() [2/2]

template<typename XDataType, typename YDataType, typename AccDataType, typename ScaleDataType, typename BiasDataType, typename MeanVarDataType, typename YElementwiseOp, index_t Rank, index_t NumBatchNormReduceDim, bool UseMultiblockInK, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcYDstVectorDim, index_t XSrcVectorSize, index_t YDstVectorSize, index_t ScaleSrcVectorSize, index_t BiasSrcVectorSize, index_t MeanVarSrcDstVectorSize>
auto ck::tensor_operation::device::DeviceBatchNormFwdImpl< XDataType, YDataType, AccDataType, ScaleDataType, BiasDataType, MeanVarDataType, YElementwiseOp, Rank, NumBatchNormReduceDim, UseMultiblockInK, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcYDstVectorDim, XSrcVectorSize, YDstVectorSize, ScaleSrcVectorSize, BiasSrcVectorSize, MeanVarSrcDstVectorSize >::MakeMeanVarCountOutputMG2dDescriptor ( int invariantLength,
int blkGroupSize )
inlinestatic

◆ MakeScaleBiasMeanVar1dDescriptor() [1/2]

template<typename XDataType, typename YDataType, typename AccDataType, typename ScaleDataType, typename BiasDataType, typename MeanVarDataType, typename YElementwiseOp, index_t Rank, index_t NumBatchNormReduceDim, bool UseMultiblockInK, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcYDstVectorDim, index_t XSrcVectorSize, index_t YDstVectorSize, index_t ScaleSrcVectorSize, index_t BiasSrcVectorSize, index_t MeanVarSrcDstVectorSize>
auto ck::tensor_operation::device::DeviceBatchNormFwdImpl< XDataType, YDataType, AccDataType, ScaleDataType, BiasDataType, MeanVarDataType, YElementwiseOp, Rank, NumBatchNormReduceDim, UseMultiblockInK, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcYDstVectorDim, XSrcVectorSize, YDstVectorSize, ScaleSrcVectorSize, BiasSrcVectorSize, MeanVarSrcDstVectorSize >::MakeScaleBiasMeanVar1dDescriptor ( const std::array< index_t, NumInvariantDim > & lengths,
const std::array< index_t, NumInvariantDim > & strides )
inlinestatic

◆ MakeScaleBiasMeanVar1dDescriptor() [2/2]

template<typename XDataType, typename YDataType, typename AccDataType, typename ScaleDataType, typename BiasDataType, typename MeanVarDataType, typename YElementwiseOp, index_t Rank, index_t NumBatchNormReduceDim, bool UseMultiblockInK, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcYDstVectorDim, index_t XSrcVectorSize, index_t YDstVectorSize, index_t ScaleSrcVectorSize, index_t BiasSrcVectorSize, index_t MeanVarSrcDstVectorSize>
auto ck::tensor_operation::device::DeviceBatchNormFwdImpl< XDataType, YDataType, AccDataType, ScaleDataType, BiasDataType, MeanVarDataType, YElementwiseOp, Rank, NumBatchNormReduceDim, UseMultiblockInK, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcYDstVectorDim, XSrcVectorSize, YDstVectorSize, ScaleSrcVectorSize, BiasSrcVectorSize, MeanVarSrcDstVectorSize >::MakeScaleBiasMeanVar1dDescriptor ( const std::array< index_t, NumInvariantDim > & lengths,
const std::array< index_t, NumInvariantDim > & strides )
inlinestatic

◆ MakeXY2dDescriptor() [1/2]

template<typename XDataType, typename YDataType, typename AccDataType, typename ScaleDataType, typename BiasDataType, typename MeanVarDataType, typename YElementwiseOp, index_t Rank, index_t NumBatchNormReduceDim, bool UseMultiblockInK, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcYDstVectorDim, index_t XSrcVectorSize, index_t YDstVectorSize, index_t ScaleSrcVectorSize, index_t BiasSrcVectorSize, index_t MeanVarSrcDstVectorSize>
auto ck::tensor_operation::device::DeviceBatchNormFwdImpl< XDataType, YDataType, AccDataType, ScaleDataType, BiasDataType, MeanVarDataType, YElementwiseOp, Rank, NumBatchNormReduceDim, UseMultiblockInK, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcYDstVectorDim, XSrcVectorSize, YDstVectorSize, ScaleSrcVectorSize, BiasSrcVectorSize, MeanVarSrcDstVectorSize >::MakeXY2dDescriptor ( const std::array< index_t, Rank > & xyLengths,
const std::array< index_t, Rank > & xyStrides,
int blkGroupSize,
int numBlockTileIteration )
inlinestatic

◆ MakeXY2dDescriptor() [2/2]

template<typename XDataType, typename YDataType, typename AccDataType, typename ScaleDataType, typename BiasDataType, typename MeanVarDataType, typename YElementwiseOp, index_t Rank, index_t NumBatchNormReduceDim, bool UseMultiblockInK, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcYDstVectorDim, index_t XSrcVectorSize, index_t YDstVectorSize, index_t ScaleSrcVectorSize, index_t BiasSrcVectorSize, index_t MeanVarSrcDstVectorSize>
auto ck::tensor_operation::device::DeviceBatchNormFwdImpl< XDataType, YDataType, AccDataType, ScaleDataType, BiasDataType, MeanVarDataType, YElementwiseOp, Rank, NumBatchNormReduceDim, UseMultiblockInK, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcYDstVectorDim, XSrcVectorSize, YDstVectorSize, ScaleSrcVectorSize, BiasSrcVectorSize, MeanVarSrcDstVectorSize >::MakeXY2dDescriptor ( const std::array< index_t, Rank > & xyLengths,
const std::array< index_t, Rank > & xyStrides,
int blkGroupSize,
int numBlockTileIteration )
inlinestatic

◆ SetWorkSpacePointer() [1/2]

template<typename XDataType, typename YDataType, typename AccDataType, typename ScaleDataType, typename BiasDataType, typename MeanVarDataType, typename YElementwiseOp, index_t Rank, index_t NumBatchNormReduceDim, bool UseMultiblockInK, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcYDstVectorDim, index_t XSrcVectorSize, index_t YDstVectorSize, index_t ScaleSrcVectorSize, index_t BiasSrcVectorSize, index_t MeanVarSrcDstVectorSize>
void ck::tensor_operation::device::DeviceBatchNormFwdImpl< XDataType, YDataType, AccDataType, ScaleDataType, BiasDataType, MeanVarDataType, YElementwiseOp, Rank, NumBatchNormReduceDim, UseMultiblockInK, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcYDstVectorDim, XSrcVectorSize, YDstVectorSize, ScaleSrcVectorSize, BiasSrcVectorSize, MeanVarSrcDstVectorSize >::SetWorkSpacePointer ( BaseArgument * pArg,
void * p_workspace,
const StreamConfig & = StreamConfig{} ) const
inlineoverridevirtual

◆ SetWorkSpacePointer() [2/2]

template<typename XDataType, typename YDataType, typename AccDataType, typename ScaleDataType, typename BiasDataType, typename MeanVarDataType, typename YElementwiseOp, index_t Rank, index_t NumBatchNormReduceDim, bool UseMultiblockInK, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcYDstVectorDim, index_t XSrcVectorSize, index_t YDstVectorSize, index_t ScaleSrcVectorSize, index_t BiasSrcVectorSize, index_t MeanVarSrcDstVectorSize>
void ck::tensor_operation::device::DeviceBatchNormFwdImpl< XDataType, YDataType, AccDataType, ScaleDataType, BiasDataType, MeanVarDataType, YElementwiseOp, Rank, NumBatchNormReduceDim, UseMultiblockInK, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcYDstVectorDim, XSrcVectorSize, YDstVectorSize, ScaleSrcVectorSize, BiasSrcVectorSize, MeanVarSrcDstVectorSize >::SetWorkSpacePointer ( BaseArgument * pArg,
void * p_workspace,
const StreamConfig & = StreamConfig{} ) const
inlineoverridevirtual

Member Data Documentation

◆ K_BlockTileSize

template<typename XDataType, typename YDataType, typename AccDataType, typename ScaleDataType, typename BiasDataType, typename MeanVarDataType, typename YElementwiseOp, index_t Rank, index_t NumBatchNormReduceDim, bool UseMultiblockInK, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcYDstVectorDim, index_t XSrcVectorSize, index_t YDstVectorSize, index_t ScaleSrcVectorSize, index_t BiasSrcVectorSize, index_t MeanVarSrcDstVectorSize>
constexpr index_t ck::tensor_operation::device::DeviceBatchNormFwdImpl< XDataType, YDataType, AccDataType, ScaleDataType, BiasDataType, MeanVarDataType, YElementwiseOp, Rank, NumBatchNormReduceDim, UseMultiblockInK, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcYDstVectorDim, XSrcVectorSize, YDstVectorSize, ScaleSrcVectorSize, BiasSrcVectorSize, MeanVarSrcDstVectorSize >::K_BlockTileSize = KThreadClusterSize * KThreadSliceSize
staticconstexpr

◆ M_BlockTileSize

template<typename XDataType, typename YDataType, typename AccDataType, typename ScaleDataType, typename BiasDataType, typename MeanVarDataType, typename YElementwiseOp, index_t Rank, index_t NumBatchNormReduceDim, bool UseMultiblockInK, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcYDstVectorDim, index_t XSrcVectorSize, index_t YDstVectorSize, index_t ScaleSrcVectorSize, index_t BiasSrcVectorSize, index_t MeanVarSrcDstVectorSize>
constexpr index_t ck::tensor_operation::device::DeviceBatchNormFwdImpl< XDataType, YDataType, AccDataType, ScaleDataType, BiasDataType, MeanVarDataType, YElementwiseOp, Rank, NumBatchNormReduceDim, UseMultiblockInK, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcYDstVectorDim, XSrcVectorSize, YDstVectorSize, ScaleSrcVectorSize, BiasSrcVectorSize, MeanVarSrcDstVectorSize >::M_BlockTileSize = MThreadClusterSize * MThreadSliceSize
staticconstexpr

◆ NumInvariantDim

template<typename XDataType, typename YDataType, typename AccDataType, typename ScaleDataType, typename BiasDataType, typename MeanVarDataType, typename YElementwiseOp, index_t Rank, index_t NumBatchNormReduceDim, bool UseMultiblockInK, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcYDstVectorDim, index_t XSrcVectorSize, index_t YDstVectorSize, index_t ScaleSrcVectorSize, index_t BiasSrcVectorSize, index_t MeanVarSrcDstVectorSize>
constexpr index_t ck::tensor_operation::device::DeviceBatchNormFwdImpl< XDataType, YDataType, AccDataType, ScaleDataType, BiasDataType, MeanVarDataType, YElementwiseOp, Rank, NumBatchNormReduceDim, UseMultiblockInK, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcYDstVectorDim, XSrcVectorSize, YDstVectorSize, ScaleSrcVectorSize, BiasSrcVectorSize, MeanVarSrcDstVectorSize >::NumInvariantDim = Rank - NumBatchNormReduceDim
staticconstexpr

The documentation for this struct was generated from the following files: