ComputePtrOffsetOfStridedBatch Struct Reference

ComputePtrOffsetOfStridedBatch Struct Reference#

Composable Kernel: ck::tensor_operation::device::DeviceBatchedGemmMultipleD_Dl< ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CDEElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, M1PerThread, N1PerThread, KPerThread, M1N1ThreadClusterM1Xs, M1N1ThreadClusterN1Xs, ABlockTransferThreadSliceLengths_K0_M0_M1_K1, ABlockTransferThreadClusterLengths_K0_M0_M1_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, ABlockTransferSrcVectorTensorContiguousDimOrder, ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, BBlockTransferThreadSliceLengths_K0_N0_N1_K1, BBlockTransferThreadClusterLengths_K0_N0_N1_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, BBlockTransferSrcVectorTensorContiguousDimOrder, BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, >::ComputePtrOffsetOfStridedBatch Struct Reference
ck::tensor_operation::device::DeviceBatchedGemmMultipleD_Dl< ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CDEElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, M1PerThread, N1PerThread, KPerThread, M1N1ThreadClusterM1Xs, M1N1ThreadClusterN1Xs, ABlockTransferThreadSliceLengths_K0_M0_M1_K1, ABlockTransferThreadClusterLengths_K0_M0_M1_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, ABlockTransferSrcVectorTensorContiguousDimOrder, ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, BBlockTransferThreadSliceLengths_K0_N0_N1_K1, BBlockTransferThreadClusterLengths_K0_N0_N1_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, BBlockTransferSrcVectorTensorContiguousDimOrder, BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, >::ComputePtrOffsetOfStridedBatch Struct Reference

#include <device_batched_gemm_multiple_d_dl.hpp>

Public Member Functions

 ComputePtrOffsetOfStridedBatch (index_t BatchStrideA, index_t BatchStrideB, std::array< ck::index_t, NumDTensor > BatchStrideDs, index_t BatchStrideE)
__host__ __device__ constexpr long_index_t GetAPtrOffset (index_t g_idx) const
__host__ __device__ constexpr long_index_t GetBPtrOffset (index_t g_idx) const
__host__ __device__ constexpr auto GetDsPtrOffset (index_t g_idx) const
__host__ __device__ constexpr long_index_t GetEPtrOffset (index_t g_idx) const

Constructor & Destructor Documentation

◆ ComputePtrOffsetOfStridedBatch()

template<typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename DsDataType, typename EDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CDEElementwiseOperation, GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs, typename M1N1ThreadClusterN1Xs, typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, typename ABlockTransferSrcVectorTensorContiguousDimOrder, typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, typename BBlockTransferSrcVectorTensorContiguousDimOrder, typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, typename CThreadTransferSrcDstAccessOrder, index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
ck::tensor_operation::device::DeviceBatchedGemmMultipleD_Dl< ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CDEElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, M1PerThread, N1PerThread, KPerThread, M1N1ThreadClusterM1Xs, M1N1ThreadClusterN1Xs, ABlockTransferThreadSliceLengths_K0_M0_M1_K1, ABlockTransferThreadClusterLengths_K0_M0_M1_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, ABlockTransferSrcVectorTensorContiguousDimOrder, ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, BBlockTransferThreadSliceLengths_K0_N0_N1_K1, BBlockTransferThreadClusterLengths_K0_N0_N1_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, BBlockTransferSrcVectorTensorContiguousDimOrder, BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, >::ComputePtrOffsetOfStridedBatch::ComputePtrOffsetOfStridedBatch ( index_t BatchStrideA,
index_t BatchStrideB,
std::array< ck::index_t, NumDTensor > BatchStrideDs,
index_t BatchStrideE )
inline

Member Function Documentation

◆ GetAPtrOffset()

template<typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename DsDataType, typename EDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CDEElementwiseOperation, GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs, typename M1N1ThreadClusterN1Xs, typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, typename ABlockTransferSrcVectorTensorContiguousDimOrder, typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, typename BBlockTransferSrcVectorTensorContiguousDimOrder, typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, typename CThreadTransferSrcDstAccessOrder, index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
__host__ __device__ constexpr long_index_t ck::tensor_operation::device::DeviceBatchedGemmMultipleD_Dl< ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CDEElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, M1PerThread, N1PerThread, KPerThread, M1N1ThreadClusterM1Xs, M1N1ThreadClusterN1Xs, ABlockTransferThreadSliceLengths_K0_M0_M1_K1, ABlockTransferThreadClusterLengths_K0_M0_M1_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, ABlockTransferSrcVectorTensorContiguousDimOrder, ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, BBlockTransferThreadSliceLengths_K0_N0_N1_K1, BBlockTransferThreadClusterLengths_K0_N0_N1_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, BBlockTransferSrcVectorTensorContiguousDimOrder, BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, >::ComputePtrOffsetOfStridedBatch::GetAPtrOffset ( index_t g_idx) const
inlineconstexpr

◆ GetBPtrOffset()

template<typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename DsDataType, typename EDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CDEElementwiseOperation, GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs, typename M1N1ThreadClusterN1Xs, typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, typename ABlockTransferSrcVectorTensorContiguousDimOrder, typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, typename BBlockTransferSrcVectorTensorContiguousDimOrder, typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, typename CThreadTransferSrcDstAccessOrder, index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
__host__ __device__ constexpr long_index_t ck::tensor_operation::device::DeviceBatchedGemmMultipleD_Dl< ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CDEElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, M1PerThread, N1PerThread, KPerThread, M1N1ThreadClusterM1Xs, M1N1ThreadClusterN1Xs, ABlockTransferThreadSliceLengths_K0_M0_M1_K1, ABlockTransferThreadClusterLengths_K0_M0_M1_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, ABlockTransferSrcVectorTensorContiguousDimOrder, ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, BBlockTransferThreadSliceLengths_K0_N0_N1_K1, BBlockTransferThreadClusterLengths_K0_N0_N1_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, BBlockTransferSrcVectorTensorContiguousDimOrder, BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, >::ComputePtrOffsetOfStridedBatch::GetBPtrOffset ( index_t g_idx) const
inlineconstexpr

◆ GetDsPtrOffset()

template<typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename DsDataType, typename EDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CDEElementwiseOperation, GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs, typename M1N1ThreadClusterN1Xs, typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, typename ABlockTransferSrcVectorTensorContiguousDimOrder, typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, typename BBlockTransferSrcVectorTensorContiguousDimOrder, typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, typename CThreadTransferSrcDstAccessOrder, index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
__host__ __device__ constexpr auto ck::tensor_operation::device::DeviceBatchedGemmMultipleD_Dl< ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CDEElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, M1PerThread, N1PerThread, KPerThread, M1N1ThreadClusterM1Xs, M1N1ThreadClusterN1Xs, ABlockTransferThreadSliceLengths_K0_M0_M1_K1, ABlockTransferThreadClusterLengths_K0_M0_M1_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, ABlockTransferSrcVectorTensorContiguousDimOrder, ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, BBlockTransferThreadSliceLengths_K0_N0_N1_K1, BBlockTransferThreadClusterLengths_K0_N0_N1_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, BBlockTransferSrcVectorTensorContiguousDimOrder, BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, >::ComputePtrOffsetOfStridedBatch::GetDsPtrOffset ( index_t g_idx) const
inlineconstexpr

◆ GetEPtrOffset()

template<typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename DsDataType, typename EDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CDEElementwiseOperation, GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs, typename M1N1ThreadClusterN1Xs, typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, typename ABlockTransferSrcVectorTensorContiguousDimOrder, typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, typename BBlockTransferSrcVectorTensorContiguousDimOrder, typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, typename CThreadTransferSrcDstAccessOrder, index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
__host__ __device__ constexpr long_index_t ck::tensor_operation::device::DeviceBatchedGemmMultipleD_Dl< ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CDEElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, M1PerThread, N1PerThread, KPerThread, M1N1ThreadClusterM1Xs, M1N1ThreadClusterN1Xs, ABlockTransferThreadSliceLengths_K0_M0_M1_K1, ABlockTransferThreadClusterLengths_K0_M0_M1_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, ABlockTransferSrcVectorTensorContiguousDimOrder, ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, BBlockTransferThreadSliceLengths_K0_N0_N1_K1, BBlockTransferThreadClusterLengths_K0_N0_N1_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, BBlockTransferSrcVectorTensorContiguousDimOrder, BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, >::ComputePtrOffsetOfStridedBatch::GetEPtrOffset ( index_t g_idx) const
inlineconstexpr

The documentation for this struct was generated from the following file: