#include <threadwise_tensor_slice_transfer_v3r1_gather.hpp>
|
| __device__ constexpr | ThreadwiseTensorSliceTransfer_v3r1_gather (const SrcDesc &src_desc, const Index &src_slice_origin, const SrcElementwiseOperation &src_element_op, const DstDesc &dst_desc, const Index &dst_slice_origin, const DstElementwiseOperation &dst_element_op, const StaticallyIndexedArray< IndexType, gather_num > &gather_offsets) |
| __device__ void | SetSrcSliceOrigin (const SrcDesc &src_desc, const Index &src_slice_origin_idx) |
| __device__ void | SetDstSliceOrigin (const DstDesc &dst_desc, const Index &dst_slice_origin_idx) |
| template<typename SrcBuffer, index_t ThreadScratchId = 0> |
| __device__ void | RunRead (const SrcDesc &src_desc, const SrcBuffer &src_buf, Number< ThreadScratchId > thread_scratch_id=Number< ThreadScratchId >{}) |
| template<typename SeqIdx, index_t ThreadScratchId = 0> |
| __device__ constexpr auto | GetSrcThreadScratchIdx (Number< ThreadScratchId > thread_scratch_id=Number< ThreadScratchId >{}) |
| template<index_t ThreadScratchId> |
| __device__ void | TransferDataFromSrcThreadScratchToDstThreadScratch (Number< ThreadScratchId > thread_scratch_id) |
| template<typename DstBuffer, index_t ThreadScratchId = 0> |
| __device__ void | RunWrite (const DstDesc &dst_desc, DstBuffer &dst_buf, Number< ThreadScratchId > thread_scratch_id=Number< ThreadScratchId >{}) |
| __device__ void | MoveSrcSliceWindow (const SrcDesc &src_desc, const Index &src_slice_origin_step_idx) |
| __device__ void | MoveDstSliceWindow (const DstDesc &dst_desc, const Index &dst_slice_origin_step_idx) |
◆ DstCoord
template<typename SliceLengths, typename SrcElementwiseOperation, typename DstElementwiseOperation,
InMemoryDataOperationEnum DstInMemOp, typename SrcData, typename DstData, typename SrcDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder,
index_t SrcVectorDim,
index_t DstVectorDim,
index_t SrcScalarPerVector_,
index_t DstScalarPerVector_,
index_t SrcScalarStrideInVector,
index_t DstScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool DstResetCoordinateAfterRun, typename IndexType,
index_t GatherDim = 1,
index_t NumThreadScratch = 1>
| using ck::ThreadwiseTensorSliceTransfer_v3r1_gather< SliceLengths, SrcElementwiseOperation, DstElementwiseOperation, DstInMemOp, SrcData, DstData, SrcDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector_, DstScalarPerVector_, SrcScalarStrideInVector, DstScalarStrideInVector, SrcResetCoordinateAfterRun, DstResetCoordinateAfterRun, IndexType, GatherDim, NumThreadScratch >::DstCoord = decltype(make_tensor_coordinate(DstDesc{}, Index{})) |
◆ DstCoordStep
template<typename SliceLengths, typename SrcElementwiseOperation, typename DstElementwiseOperation,
InMemoryDataOperationEnum DstInMemOp, typename SrcData, typename DstData, typename SrcDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder,
index_t SrcVectorDim,
index_t DstVectorDim,
index_t SrcScalarPerVector_,
index_t DstScalarPerVector_,
index_t SrcScalarStrideInVector,
index_t DstScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool DstResetCoordinateAfterRun, typename IndexType,
index_t GatherDim = 1,
index_t NumThreadScratch = 1>
| using ck::ThreadwiseTensorSliceTransfer_v3r1_gather< SliceLengths, SrcElementwiseOperation, DstElementwiseOperation, DstInMemOp, SrcData, DstData, SrcDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector_, DstScalarPerVector_, SrcScalarStrideInVector, DstScalarStrideInVector, SrcResetCoordinateAfterRun, DstResetCoordinateAfterRun, IndexType, GatherDim, NumThreadScratch >::DstCoordStep = decltype(make_tensor_coordinate_step(DstDesc{}, Index{})) |
◆ Index
template<typename SliceLengths, typename SrcElementwiseOperation, typename DstElementwiseOperation,
InMemoryDataOperationEnum DstInMemOp, typename SrcData, typename DstData, typename SrcDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder,
index_t SrcVectorDim,
index_t DstVectorDim,
index_t SrcScalarPerVector_,
index_t DstScalarPerVector_,
index_t SrcScalarStrideInVector,
index_t DstScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool DstResetCoordinateAfterRun, typename IndexType,
index_t GatherDim = 1,
index_t NumThreadScratch = 1>
| using ck::ThreadwiseTensorSliceTransfer_v3r1_gather< SliceLengths, SrcElementwiseOperation, DstElementwiseOperation, DstInMemOp, SrcData, DstData, SrcDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector_, DstScalarPerVector_, SrcScalarStrideInVector, DstScalarStrideInVector, SrcResetCoordinateAfterRun, DstResetCoordinateAfterRun, IndexType, GatherDim, NumThreadScratch >::Index = MultiIndex<nDim> |
◆ SrcCoord
template<typename SliceLengths, typename SrcElementwiseOperation, typename DstElementwiseOperation,
InMemoryDataOperationEnum DstInMemOp, typename SrcData, typename DstData, typename SrcDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder,
index_t SrcVectorDim,
index_t DstVectorDim,
index_t SrcScalarPerVector_,
index_t DstScalarPerVector_,
index_t SrcScalarStrideInVector,
index_t DstScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool DstResetCoordinateAfterRun, typename IndexType,
index_t GatherDim = 1,
index_t NumThreadScratch = 1>
| using ck::ThreadwiseTensorSliceTransfer_v3r1_gather< SliceLengths, SrcElementwiseOperation, DstElementwiseOperation, DstInMemOp, SrcData, DstData, SrcDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector_, DstScalarPerVector_, SrcScalarStrideInVector, DstScalarStrideInVector, SrcResetCoordinateAfterRun, DstResetCoordinateAfterRun, IndexType, GatherDim, NumThreadScratch >::SrcCoord = decltype(make_tensor_coordinate(SrcDesc{}, Index{})) |
◆ SrcCoordStep
template<typename SliceLengths, typename SrcElementwiseOperation, typename DstElementwiseOperation,
InMemoryDataOperationEnum DstInMemOp, typename SrcData, typename DstData, typename SrcDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder,
index_t SrcVectorDim,
index_t DstVectorDim,
index_t SrcScalarPerVector_,
index_t DstScalarPerVector_,
index_t SrcScalarStrideInVector,
index_t DstScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool DstResetCoordinateAfterRun, typename IndexType,
index_t GatherDim = 1,
index_t NumThreadScratch = 1>
| using ck::ThreadwiseTensorSliceTransfer_v3r1_gather< SliceLengths, SrcElementwiseOperation, DstElementwiseOperation, DstInMemOp, SrcData, DstData, SrcDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector_, DstScalarPerVector_, SrcScalarStrideInVector, DstScalarStrideInVector, SrcResetCoordinateAfterRun, DstResetCoordinateAfterRun, IndexType, GatherDim, NumThreadScratch >::SrcCoordStep = decltype(make_tensor_coordinate_step(SrcDesc{}, Index{})) |
◆ ThreadwiseTensorSliceTransfer_v3r1_gather()
template<typename SliceLengths, typename SrcElementwiseOperation, typename DstElementwiseOperation,
InMemoryDataOperationEnum DstInMemOp, typename SrcData, typename DstData, typename SrcDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder,
index_t SrcVectorDim,
index_t DstVectorDim,
index_t SrcScalarPerVector_,
index_t DstScalarPerVector_,
index_t SrcScalarStrideInVector,
index_t DstScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool DstResetCoordinateAfterRun, typename IndexType,
index_t GatherDim = 1,
index_t NumThreadScratch = 1>
| __device__ constexpr ck::ThreadwiseTensorSliceTransfer_v3r1_gather< SliceLengths, SrcElementwiseOperation, DstElementwiseOperation, DstInMemOp, SrcData, DstData, SrcDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector_, DstScalarPerVector_, SrcScalarStrideInVector, DstScalarStrideInVector, SrcResetCoordinateAfterRun, DstResetCoordinateAfterRun, IndexType, GatherDim, NumThreadScratch >::ThreadwiseTensorSliceTransfer_v3r1_gather |
( |
const SrcDesc & | src_desc, |
|
|
const Index & | src_slice_origin, |
|
|
const SrcElementwiseOperation & | src_element_op, |
|
|
const DstDesc & | dst_desc, |
|
|
const Index & | dst_slice_origin, |
|
|
const DstElementwiseOperation & | dst_element_op, |
|
|
const StaticallyIndexedArray< IndexType, gather_num > & | gather_offsets ) |
|
inlineconstexpr |
◆ GetDstCoordinateResetStep()
template<typename SliceLengths, typename SrcElementwiseOperation, typename DstElementwiseOperation,
InMemoryDataOperationEnum DstInMemOp, typename SrcData, typename DstData, typename SrcDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder,
index_t SrcVectorDim,
index_t DstVectorDim,
index_t SrcScalarPerVector_,
index_t DstScalarPerVector_,
index_t SrcScalarStrideInVector,
index_t DstScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool DstResetCoordinateAfterRun, typename IndexType,
index_t GatherDim = 1,
index_t NumThreadScratch = 1>
| __device__ constexpr auto ck::ThreadwiseTensorSliceTransfer_v3r1_gather< SliceLengths, SrcElementwiseOperation, DstElementwiseOperation, DstInMemOp, SrcData, DstData, SrcDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector_, DstScalarPerVector_, SrcScalarStrideInVector, DstScalarStrideInVector, SrcResetCoordinateAfterRun, DstResetCoordinateAfterRun, IndexType, GatherDim, NumThreadScratch >::GetDstCoordinateResetStep |
( |
| ) |
|
|
inlinestaticconstexpr |
◆ GetDstThreadScratchDescriptor()
template<typename SliceLengths, typename SrcElementwiseOperation, typename DstElementwiseOperation,
InMemoryDataOperationEnum DstInMemOp, typename SrcData, typename DstData, typename SrcDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder,
index_t SrcVectorDim,
index_t DstVectorDim,
index_t SrcScalarPerVector_,
index_t DstScalarPerVector_,
index_t SrcScalarStrideInVector,
index_t DstScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool DstResetCoordinateAfterRun, typename IndexType,
index_t GatherDim = 1,
index_t NumThreadScratch = 1>
| __device__ constexpr auto ck::ThreadwiseTensorSliceTransfer_v3r1_gather< SliceLengths, SrcElementwiseOperation, DstElementwiseOperation, DstInMemOp, SrcData, DstData, SrcDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector_, DstScalarPerVector_, SrcScalarStrideInVector, DstScalarStrideInVector, SrcResetCoordinateAfterRun, DstResetCoordinateAfterRun, IndexType, GatherDim, NumThreadScratch >::GetDstThreadScratchDescriptor |
( |
| ) |
|
|
inlinestaticconstexpr |
◆ GetSrcCoordinateResetStep()
template<typename SliceLengths, typename SrcElementwiseOperation, typename DstElementwiseOperation,
InMemoryDataOperationEnum DstInMemOp, typename SrcData, typename DstData, typename SrcDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder,
index_t SrcVectorDim,
index_t DstVectorDim,
index_t SrcScalarPerVector_,
index_t DstScalarPerVector_,
index_t SrcScalarStrideInVector,
index_t DstScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool DstResetCoordinateAfterRun, typename IndexType,
index_t GatherDim = 1,
index_t NumThreadScratch = 1>
| __device__ constexpr auto ck::ThreadwiseTensorSliceTransfer_v3r1_gather< SliceLengths, SrcElementwiseOperation, DstElementwiseOperation, DstInMemOp, SrcData, DstData, SrcDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector_, DstScalarPerVector_, SrcScalarStrideInVector, DstScalarStrideInVector, SrcResetCoordinateAfterRun, DstResetCoordinateAfterRun, IndexType, GatherDim, NumThreadScratch >::GetSrcCoordinateResetStep |
( |
| ) |
|
|
inlinestaticconstexpr |
◆ GetSrcOOBThreadScratchDescriptor()
template<typename SliceLengths, typename SrcElementwiseOperation, typename DstElementwiseOperation,
InMemoryDataOperationEnum DstInMemOp, typename SrcData, typename DstData, typename SrcDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder,
index_t SrcVectorDim,
index_t DstVectorDim,
index_t SrcScalarPerVector_,
index_t DstScalarPerVector_,
index_t SrcScalarStrideInVector,
index_t DstScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool DstResetCoordinateAfterRun, typename IndexType,
index_t GatherDim = 1,
index_t NumThreadScratch = 1>
| __device__ constexpr auto ck::ThreadwiseTensorSliceTransfer_v3r1_gather< SliceLengths, SrcElementwiseOperation, DstElementwiseOperation, DstInMemOp, SrcData, DstData, SrcDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector_, DstScalarPerVector_, SrcScalarStrideInVector, DstScalarStrideInVector, SrcResetCoordinateAfterRun, DstResetCoordinateAfterRun, IndexType, GatherDim, NumThreadScratch >::GetSrcOOBThreadScratchDescriptor |
( |
| ) |
|
|
inlinestaticconstexpr |
◆ GetSrcThreadScratchDescriptor()
template<typename SliceLengths, typename SrcElementwiseOperation, typename DstElementwiseOperation,
InMemoryDataOperationEnum DstInMemOp, typename SrcData, typename DstData, typename SrcDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder,
index_t SrcVectorDim,
index_t DstVectorDim,
index_t SrcScalarPerVector_,
index_t DstScalarPerVector_,
index_t SrcScalarStrideInVector,
index_t DstScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool DstResetCoordinateAfterRun, typename IndexType,
index_t GatherDim = 1,
index_t NumThreadScratch = 1>
| __device__ constexpr auto ck::ThreadwiseTensorSliceTransfer_v3r1_gather< SliceLengths, SrcElementwiseOperation, DstElementwiseOperation, DstInMemOp, SrcData, DstData, SrcDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector_, DstScalarPerVector_, SrcScalarStrideInVector, DstScalarStrideInVector, SrcResetCoordinateAfterRun, DstResetCoordinateAfterRun, IndexType, GatherDim, NumThreadScratch >::GetSrcThreadScratchDescriptor |
( |
| ) |
|
|
inlinestaticconstexpr |
◆ GetSrcThreadScratchIdx()
template<typename SliceLengths, typename SrcElementwiseOperation, typename DstElementwiseOperation,
InMemoryDataOperationEnum DstInMemOp, typename SrcData, typename DstData, typename SrcDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder,
index_t SrcVectorDim,
index_t DstVectorDim,
index_t SrcScalarPerVector_,
index_t DstScalarPerVector_,
index_t SrcScalarStrideInVector,
index_t DstScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool DstResetCoordinateAfterRun, typename IndexType,
index_t GatherDim = 1,
index_t NumThreadScratch = 1>
template<typename SeqIdx,
index_t ThreadScratchId = 0>
| __device__ constexpr auto ck::ThreadwiseTensorSliceTransfer_v3r1_gather< SliceLengths, SrcElementwiseOperation, DstElementwiseOperation, DstInMemOp, SrcData, DstData, SrcDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector_, DstScalarPerVector_, SrcScalarStrideInVector, DstScalarStrideInVector, SrcResetCoordinateAfterRun, DstResetCoordinateAfterRun, IndexType, GatherDim, NumThreadScratch >::GetSrcThreadScratchIdx |
( |
Number< ThreadScratchId > | thread_scratch_id = Number<ThreadScratchId>{} | ) |
|
|
inlineconstexpr |
◆ MoveDstSliceWindow()
template<typename SliceLengths, typename SrcElementwiseOperation, typename DstElementwiseOperation,
InMemoryDataOperationEnum DstInMemOp, typename SrcData, typename DstData, typename SrcDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder,
index_t SrcVectorDim,
index_t DstVectorDim,
index_t SrcScalarPerVector_,
index_t DstScalarPerVector_,
index_t SrcScalarStrideInVector,
index_t DstScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool DstResetCoordinateAfterRun, typename IndexType,
index_t GatherDim = 1,
index_t NumThreadScratch = 1>
| __device__ void ck::ThreadwiseTensorSliceTransfer_v3r1_gather< SliceLengths, SrcElementwiseOperation, DstElementwiseOperation, DstInMemOp, SrcData, DstData, SrcDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector_, DstScalarPerVector_, SrcScalarStrideInVector, DstScalarStrideInVector, SrcResetCoordinateAfterRun, DstResetCoordinateAfterRun, IndexType, GatherDim, NumThreadScratch >::MoveDstSliceWindow |
( |
const DstDesc & | dst_desc, |
|
|
const Index & | dst_slice_origin_step_idx ) |
|
inline |
◆ MoveSrcSliceWindow()
template<typename SliceLengths, typename SrcElementwiseOperation, typename DstElementwiseOperation,
InMemoryDataOperationEnum DstInMemOp, typename SrcData, typename DstData, typename SrcDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder,
index_t SrcVectorDim,
index_t DstVectorDim,
index_t SrcScalarPerVector_,
index_t DstScalarPerVector_,
index_t SrcScalarStrideInVector,
index_t DstScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool DstResetCoordinateAfterRun, typename IndexType,
index_t GatherDim = 1,
index_t NumThreadScratch = 1>
| __device__ void ck::ThreadwiseTensorSliceTransfer_v3r1_gather< SliceLengths, SrcElementwiseOperation, DstElementwiseOperation, DstInMemOp, SrcData, DstData, SrcDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector_, DstScalarPerVector_, SrcScalarStrideInVector, DstScalarStrideInVector, SrcResetCoordinateAfterRun, DstResetCoordinateAfterRun, IndexType, GatherDim, NumThreadScratch >::MoveSrcSliceWindow |
( |
const SrcDesc & | src_desc, |
|
|
const Index & | src_slice_origin_step_idx ) |
|
inline |
◆ RunRead()
template<typename SliceLengths, typename SrcElementwiseOperation, typename DstElementwiseOperation,
InMemoryDataOperationEnum DstInMemOp, typename SrcData, typename DstData, typename SrcDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder,
index_t SrcVectorDim,
index_t DstVectorDim,
index_t SrcScalarPerVector_,
index_t DstScalarPerVector_,
index_t SrcScalarStrideInVector,
index_t DstScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool DstResetCoordinateAfterRun, typename IndexType,
index_t GatherDim = 1,
index_t NumThreadScratch = 1>
template<typename SrcBuffer,
index_t ThreadScratchId = 0>
| __device__ void ck::ThreadwiseTensorSliceTransfer_v3r1_gather< SliceLengths, SrcElementwiseOperation, DstElementwiseOperation, DstInMemOp, SrcData, DstData, SrcDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector_, DstScalarPerVector_, SrcScalarStrideInVector, DstScalarStrideInVector, SrcResetCoordinateAfterRun, DstResetCoordinateAfterRun, IndexType, GatherDim, NumThreadScratch >::RunRead |
( |
const SrcDesc & | src_desc, |
|
|
const SrcBuffer & | src_buf, |
|
|
Number< ThreadScratchId > | thread_scratch_id = Number<ThreadScratchId>{} ) |
|
inline |
◆ RunWrite()
template<typename SliceLengths, typename SrcElementwiseOperation, typename DstElementwiseOperation,
InMemoryDataOperationEnum DstInMemOp, typename SrcData, typename DstData, typename SrcDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder,
index_t SrcVectorDim,
index_t DstVectorDim,
index_t SrcScalarPerVector_,
index_t DstScalarPerVector_,
index_t SrcScalarStrideInVector,
index_t DstScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool DstResetCoordinateAfterRun, typename IndexType,
index_t GatherDim = 1,
index_t NumThreadScratch = 1>
template<typename DstBuffer,
index_t ThreadScratchId = 0>
| __device__ void ck::ThreadwiseTensorSliceTransfer_v3r1_gather< SliceLengths, SrcElementwiseOperation, DstElementwiseOperation, DstInMemOp, SrcData, DstData, SrcDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector_, DstScalarPerVector_, SrcScalarStrideInVector, DstScalarStrideInVector, SrcResetCoordinateAfterRun, DstResetCoordinateAfterRun, IndexType, GatherDim, NumThreadScratch >::RunWrite |
( |
const DstDesc & | dst_desc, |
|
|
DstBuffer & | dst_buf, |
|
|
Number< ThreadScratchId > | thread_scratch_id = Number<ThreadScratchId>{} ) |
|
inline |
◆ SetDstSliceOrigin()
template<typename SliceLengths, typename SrcElementwiseOperation, typename DstElementwiseOperation,
InMemoryDataOperationEnum DstInMemOp, typename SrcData, typename DstData, typename SrcDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder,
index_t SrcVectorDim,
index_t DstVectorDim,
index_t SrcScalarPerVector_,
index_t DstScalarPerVector_,
index_t SrcScalarStrideInVector,
index_t DstScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool DstResetCoordinateAfterRun, typename IndexType,
index_t GatherDim = 1,
index_t NumThreadScratch = 1>
| __device__ void ck::ThreadwiseTensorSliceTransfer_v3r1_gather< SliceLengths, SrcElementwiseOperation, DstElementwiseOperation, DstInMemOp, SrcData, DstData, SrcDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector_, DstScalarPerVector_, SrcScalarStrideInVector, DstScalarStrideInVector, SrcResetCoordinateAfterRun, DstResetCoordinateAfterRun, IndexType, GatherDim, NumThreadScratch >::SetDstSliceOrigin |
( |
const DstDesc & | dst_desc, |
|
|
const Index & | dst_slice_origin_idx ) |
|
inline |
◆ SetSrcSliceOrigin()
template<typename SliceLengths, typename SrcElementwiseOperation, typename DstElementwiseOperation,
InMemoryDataOperationEnum DstInMemOp, typename SrcData, typename DstData, typename SrcDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder,
index_t SrcVectorDim,
index_t DstVectorDim,
index_t SrcScalarPerVector_,
index_t DstScalarPerVector_,
index_t SrcScalarStrideInVector,
index_t DstScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool DstResetCoordinateAfterRun, typename IndexType,
index_t GatherDim = 1,
index_t NumThreadScratch = 1>
| __device__ void ck::ThreadwiseTensorSliceTransfer_v3r1_gather< SliceLengths, SrcElementwiseOperation, DstElementwiseOperation, DstInMemOp, SrcData, DstData, SrcDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector_, DstScalarPerVector_, SrcScalarStrideInVector, DstScalarStrideInVector, SrcResetCoordinateAfterRun, DstResetCoordinateAfterRun, IndexType, GatherDim, NumThreadScratch >::SetSrcSliceOrigin |
( |
const SrcDesc & | src_desc, |
|
|
const Index & | src_slice_origin_idx ) |
|
inline |
◆ TransferDataFromSrcThreadScratchToDstThreadScratch()
template<typename SliceLengths, typename SrcElementwiseOperation, typename DstElementwiseOperation,
InMemoryDataOperationEnum DstInMemOp, typename SrcData, typename DstData, typename SrcDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder,
index_t SrcVectorDim,
index_t DstVectorDim,
index_t SrcScalarPerVector_,
index_t DstScalarPerVector_,
index_t SrcScalarStrideInVector,
index_t DstScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool DstResetCoordinateAfterRun, typename IndexType,
index_t GatherDim = 1,
index_t NumThreadScratch = 1>
| __device__ void ck::ThreadwiseTensorSliceTransfer_v3r1_gather< SliceLengths, SrcElementwiseOperation, DstElementwiseOperation, DstInMemOp, SrcData, DstData, SrcDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector_, DstScalarPerVector_, SrcScalarStrideInVector, DstScalarStrideInVector, SrcResetCoordinateAfterRun, DstResetCoordinateAfterRun, IndexType, GatherDim, NumThreadScratch >::TransferDataFromSrcThreadScratchToDstThreadScratch |
( |
Number< ThreadScratchId > | thread_scratch_id | ) |
|
|
inline |
◆ DstScalarPerVector
template<typename SliceLengths, typename SrcElementwiseOperation, typename DstElementwiseOperation,
InMemoryDataOperationEnum DstInMemOp, typename SrcData, typename DstData, typename SrcDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder,
index_t SrcVectorDim,
index_t DstVectorDim,
index_t SrcScalarPerVector_,
index_t DstScalarPerVector_,
index_t SrcScalarStrideInVector,
index_t DstScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool DstResetCoordinateAfterRun, typename IndexType,
index_t GatherDim = 1,
index_t NumThreadScratch = 1>
| auto ck::ThreadwiseTensorSliceTransfer_v3r1_gather< SliceLengths, SrcElementwiseOperation, DstElementwiseOperation, DstInMemOp, SrcData, DstData, SrcDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector_, DstScalarPerVector_, SrcScalarStrideInVector, DstScalarStrideInVector, SrcResetCoordinateAfterRun, DstResetCoordinateAfterRun, IndexType, GatherDim, NumThreadScratch >::DstScalarPerVector = Number<DstScalarPerVector_ / PackedSize>{} |
|
staticconstexpr |
◆ gather_num
template<typename SliceLengths, typename SrcElementwiseOperation, typename DstElementwiseOperation,
InMemoryDataOperationEnum DstInMemOp, typename SrcData, typename DstData, typename SrcDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder,
index_t SrcVectorDim,
index_t DstVectorDim,
index_t SrcScalarPerVector_,
index_t DstScalarPerVector_,
index_t SrcScalarStrideInVector,
index_t DstScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool DstResetCoordinateAfterRun, typename IndexType,
index_t GatherDim = 1,
index_t NumThreadScratch = 1>
| index_t ck::ThreadwiseTensorSliceTransfer_v3r1_gather< SliceLengths, SrcElementwiseOperation, DstElementwiseOperation, DstInMemOp, SrcData, DstData, SrcDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector_, DstScalarPerVector_, SrcScalarStrideInVector, DstScalarStrideInVector, SrcResetCoordinateAfterRun, DstResetCoordinateAfterRun, IndexType, GatherDim, NumThreadScratch >::gather_num = SliceLengths{}.At(Number<GatherDim>{}) |
|
staticconstexpr |
◆ I0
template<typename SliceLengths, typename SrcElementwiseOperation, typename DstElementwiseOperation,
InMemoryDataOperationEnum DstInMemOp, typename SrcData, typename DstData, typename SrcDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder,
index_t SrcVectorDim,
index_t DstVectorDim,
index_t SrcScalarPerVector_,
index_t DstScalarPerVector_,
index_t SrcScalarStrideInVector,
index_t DstScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool DstResetCoordinateAfterRun, typename IndexType,
index_t GatherDim = 1,
index_t NumThreadScratch = 1>
| auto ck::ThreadwiseTensorSliceTransfer_v3r1_gather< SliceLengths, SrcElementwiseOperation, DstElementwiseOperation, DstInMemOp, SrcData, DstData, SrcDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector_, DstScalarPerVector_, SrcScalarStrideInVector, DstScalarStrideInVector, SrcResetCoordinateAfterRun, DstResetCoordinateAfterRun, IndexType, GatherDim, NumThreadScratch >::I0 = Number<0>{} |
|
staticconstexpr |
◆ I1
template<typename SliceLengths, typename SrcElementwiseOperation, typename DstElementwiseOperation,
InMemoryDataOperationEnum DstInMemOp, typename SrcData, typename DstData, typename SrcDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder,
index_t SrcVectorDim,
index_t DstVectorDim,
index_t SrcScalarPerVector_,
index_t DstScalarPerVector_,
index_t SrcScalarStrideInVector,
index_t DstScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool DstResetCoordinateAfterRun, typename IndexType,
index_t GatherDim = 1,
index_t NumThreadScratch = 1>
| auto ck::ThreadwiseTensorSliceTransfer_v3r1_gather< SliceLengths, SrcElementwiseOperation, DstElementwiseOperation, DstInMemOp, SrcData, DstData, SrcDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector_, DstScalarPerVector_, SrcScalarStrideInVector, DstScalarStrideInVector, SrcResetCoordinateAfterRun, DstResetCoordinateAfterRun, IndexType, GatherDim, NumThreadScratch >::I1 = Number<1>{} |
|
staticconstexpr |
◆ I10
template<typename SliceLengths, typename SrcElementwiseOperation, typename DstElementwiseOperation,
InMemoryDataOperationEnum DstInMemOp, typename SrcData, typename DstData, typename SrcDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder,
index_t SrcVectorDim,
index_t DstVectorDim,
index_t SrcScalarPerVector_,
index_t DstScalarPerVector_,
index_t SrcScalarStrideInVector,
index_t DstScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool DstResetCoordinateAfterRun, typename IndexType,
index_t GatherDim = 1,
index_t NumThreadScratch = 1>
| auto ck::ThreadwiseTensorSliceTransfer_v3r1_gather< SliceLengths, SrcElementwiseOperation, DstElementwiseOperation, DstInMemOp, SrcData, DstData, SrcDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector_, DstScalarPerVector_, SrcScalarStrideInVector, DstScalarStrideInVector, SrcResetCoordinateAfterRun, DstResetCoordinateAfterRun, IndexType, GatherDim, NumThreadScratch >::I10 = Number<10>{} |
|
staticconstexpr |
◆ I12
template<typename SliceLengths, typename SrcElementwiseOperation, typename DstElementwiseOperation,
InMemoryDataOperationEnum DstInMemOp, typename SrcData, typename DstData, typename SrcDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder,
index_t SrcVectorDim,
index_t DstVectorDim,
index_t SrcScalarPerVector_,
index_t DstScalarPerVector_,
index_t SrcScalarStrideInVector,
index_t DstScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool DstResetCoordinateAfterRun, typename IndexType,
index_t GatherDim = 1,
index_t NumThreadScratch = 1>
| auto ck::ThreadwiseTensorSliceTransfer_v3r1_gather< SliceLengths, SrcElementwiseOperation, DstElementwiseOperation, DstInMemOp, SrcData, DstData, SrcDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector_, DstScalarPerVector_, SrcScalarStrideInVector, DstScalarStrideInVector, SrcResetCoordinateAfterRun, DstResetCoordinateAfterRun, IndexType, GatherDim, NumThreadScratch >::I12 = Number<12>{} |
|
staticconstexpr |
◆ I13
template<typename SliceLengths, typename SrcElementwiseOperation, typename DstElementwiseOperation,
InMemoryDataOperationEnum DstInMemOp, typename SrcData, typename DstData, typename SrcDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder,
index_t SrcVectorDim,
index_t DstVectorDim,
index_t SrcScalarPerVector_,
index_t DstScalarPerVector_,
index_t SrcScalarStrideInVector,
index_t DstScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool DstResetCoordinateAfterRun, typename IndexType,
index_t GatherDim = 1,
index_t NumThreadScratch = 1>
| auto ck::ThreadwiseTensorSliceTransfer_v3r1_gather< SliceLengths, SrcElementwiseOperation, DstElementwiseOperation, DstInMemOp, SrcData, DstData, SrcDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector_, DstScalarPerVector_, SrcScalarStrideInVector, DstScalarStrideInVector, SrcResetCoordinateAfterRun, DstResetCoordinateAfterRun, IndexType, GatherDim, NumThreadScratch >::I13 = Number<13>{} |
|
staticconstexpr |
◆ I14
template<typename SliceLengths, typename SrcElementwiseOperation, typename DstElementwiseOperation,
InMemoryDataOperationEnum DstInMemOp, typename SrcData, typename DstData, typename SrcDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder,
index_t SrcVectorDim,
index_t DstVectorDim,
index_t SrcScalarPerVector_,
index_t DstScalarPerVector_,
index_t SrcScalarStrideInVector,
index_t DstScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool DstResetCoordinateAfterRun, typename IndexType,
index_t GatherDim = 1,
index_t NumThreadScratch = 1>
| auto ck::ThreadwiseTensorSliceTransfer_v3r1_gather< SliceLengths, SrcElementwiseOperation, DstElementwiseOperation, DstInMemOp, SrcData, DstData, SrcDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector_, DstScalarPerVector_, SrcScalarStrideInVector, DstScalarStrideInVector, SrcResetCoordinateAfterRun, DstResetCoordinateAfterRun, IndexType, GatherDim, NumThreadScratch >::I14 = Number<14>{} |
|
staticconstexpr |
◆ I16
template<typename SliceLengths, typename SrcElementwiseOperation, typename DstElementwiseOperation,
InMemoryDataOperationEnum DstInMemOp, typename SrcData, typename DstData, typename SrcDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder,
index_t SrcVectorDim,
index_t DstVectorDim,
index_t SrcScalarPerVector_,
index_t DstScalarPerVector_,
index_t SrcScalarStrideInVector,
index_t DstScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool DstResetCoordinateAfterRun, typename IndexType,
index_t GatherDim = 1,
index_t NumThreadScratch = 1>
| auto ck::ThreadwiseTensorSliceTransfer_v3r1_gather< SliceLengths, SrcElementwiseOperation, DstElementwiseOperation, DstInMemOp, SrcData, DstData, SrcDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector_, DstScalarPerVector_, SrcScalarStrideInVector, DstScalarStrideInVector, SrcResetCoordinateAfterRun, DstResetCoordinateAfterRun, IndexType, GatherDim, NumThreadScratch >::I16 = Number<16>{} |
|
staticconstexpr |
◆ I2
template<typename SliceLengths, typename SrcElementwiseOperation, typename DstElementwiseOperation,
InMemoryDataOperationEnum DstInMemOp, typename SrcData, typename DstData, typename SrcDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder,
index_t SrcVectorDim,
index_t DstVectorDim,
index_t SrcScalarPerVector_,
index_t DstScalarPerVector_,
index_t SrcScalarStrideInVector,
index_t DstScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool DstResetCoordinateAfterRun, typename IndexType,
index_t GatherDim = 1,
index_t NumThreadScratch = 1>
| auto ck::ThreadwiseTensorSliceTransfer_v3r1_gather< SliceLengths, SrcElementwiseOperation, DstElementwiseOperation, DstInMemOp, SrcData, DstData, SrcDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector_, DstScalarPerVector_, SrcScalarStrideInVector, DstScalarStrideInVector, SrcResetCoordinateAfterRun, DstResetCoordinateAfterRun, IndexType, GatherDim, NumThreadScratch >::I2 = Number<2>{} |
|
staticconstexpr |
◆ I3
template<typename SliceLengths, typename SrcElementwiseOperation, typename DstElementwiseOperation,
InMemoryDataOperationEnum DstInMemOp, typename SrcData, typename DstData, typename SrcDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder,
index_t SrcVectorDim,
index_t DstVectorDim,
index_t SrcScalarPerVector_,
index_t DstScalarPerVector_,
index_t SrcScalarStrideInVector,
index_t DstScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool DstResetCoordinateAfterRun, typename IndexType,
index_t GatherDim = 1,
index_t NumThreadScratch = 1>
| auto ck::ThreadwiseTensorSliceTransfer_v3r1_gather< SliceLengths, SrcElementwiseOperation, DstElementwiseOperation, DstInMemOp, SrcData, DstData, SrcDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector_, DstScalarPerVector_, SrcScalarStrideInVector, DstScalarStrideInVector, SrcResetCoordinateAfterRun, DstResetCoordinateAfterRun, IndexType, GatherDim, NumThreadScratch >::I3 = Number<3>{} |
|
staticconstexpr |
◆ I4
template<typename SliceLengths, typename SrcElementwiseOperation, typename DstElementwiseOperation,
InMemoryDataOperationEnum DstInMemOp, typename SrcData, typename DstData, typename SrcDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder,
index_t SrcVectorDim,
index_t DstVectorDim,
index_t SrcScalarPerVector_,
index_t DstScalarPerVector_,
index_t SrcScalarStrideInVector,
index_t DstScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool DstResetCoordinateAfterRun, typename IndexType,
index_t GatherDim = 1,
index_t NumThreadScratch = 1>
| auto ck::ThreadwiseTensorSliceTransfer_v3r1_gather< SliceLengths, SrcElementwiseOperation, DstElementwiseOperation, DstInMemOp, SrcData, DstData, SrcDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector_, DstScalarPerVector_, SrcScalarStrideInVector, DstScalarStrideInVector, SrcResetCoordinateAfterRun, DstResetCoordinateAfterRun, IndexType, GatherDim, NumThreadScratch >::I4 = Number<4>{} |
|
staticconstexpr |
◆ I5
template<typename SliceLengths, typename SrcElementwiseOperation, typename DstElementwiseOperation,
InMemoryDataOperationEnum DstInMemOp, typename SrcData, typename DstData, typename SrcDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder,
index_t SrcVectorDim,
index_t DstVectorDim,
index_t SrcScalarPerVector_,
index_t DstScalarPerVector_,
index_t SrcScalarStrideInVector,
index_t DstScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool DstResetCoordinateAfterRun, typename IndexType,
index_t GatherDim = 1,
index_t NumThreadScratch = 1>
| auto ck::ThreadwiseTensorSliceTransfer_v3r1_gather< SliceLengths, SrcElementwiseOperation, DstElementwiseOperation, DstInMemOp, SrcData, DstData, SrcDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector_, DstScalarPerVector_, SrcScalarStrideInVector, DstScalarStrideInVector, SrcResetCoordinateAfterRun, DstResetCoordinateAfterRun, IndexType, GatherDim, NumThreadScratch >::I5 = Number<5>{} |
|
staticconstexpr |
◆ I6
template<typename SliceLengths, typename SrcElementwiseOperation, typename DstElementwiseOperation,
InMemoryDataOperationEnum DstInMemOp, typename SrcData, typename DstData, typename SrcDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder,
index_t SrcVectorDim,
index_t DstVectorDim,
index_t SrcScalarPerVector_,
index_t DstScalarPerVector_,
index_t SrcScalarStrideInVector,
index_t DstScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool DstResetCoordinateAfterRun, typename IndexType,
index_t GatherDim = 1,
index_t NumThreadScratch = 1>
| auto ck::ThreadwiseTensorSliceTransfer_v3r1_gather< SliceLengths, SrcElementwiseOperation, DstElementwiseOperation, DstInMemOp, SrcData, DstData, SrcDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector_, DstScalarPerVector_, SrcScalarStrideInVector, DstScalarStrideInVector, SrcResetCoordinateAfterRun, DstResetCoordinateAfterRun, IndexType, GatherDim, NumThreadScratch >::I6 = Number<6>{} |
|
staticconstexpr |
◆ I7
template<typename SliceLengths, typename SrcElementwiseOperation, typename DstElementwiseOperation,
InMemoryDataOperationEnum DstInMemOp, typename SrcData, typename DstData, typename SrcDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder,
index_t SrcVectorDim,
index_t DstVectorDim,
index_t SrcScalarPerVector_,
index_t DstScalarPerVector_,
index_t SrcScalarStrideInVector,
index_t DstScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool DstResetCoordinateAfterRun, typename IndexType,
index_t GatherDim = 1,
index_t NumThreadScratch = 1>
| auto ck::ThreadwiseTensorSliceTransfer_v3r1_gather< SliceLengths, SrcElementwiseOperation, DstElementwiseOperation, DstInMemOp, SrcData, DstData, SrcDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector_, DstScalarPerVector_, SrcScalarStrideInVector, DstScalarStrideInVector, SrcResetCoordinateAfterRun, DstResetCoordinateAfterRun, IndexType, GatherDim, NumThreadScratch >::I7 = Number<7>{} |
|
staticconstexpr |
◆ I8
template<typename SliceLengths, typename SrcElementwiseOperation, typename DstElementwiseOperation,
InMemoryDataOperationEnum DstInMemOp, typename SrcData, typename DstData, typename SrcDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder,
index_t SrcVectorDim,
index_t DstVectorDim,
index_t SrcScalarPerVector_,
index_t DstScalarPerVector_,
index_t SrcScalarStrideInVector,
index_t DstScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool DstResetCoordinateAfterRun, typename IndexType,
index_t GatherDim = 1,
index_t NumThreadScratch = 1>
| auto ck::ThreadwiseTensorSliceTransfer_v3r1_gather< SliceLengths, SrcElementwiseOperation, DstElementwiseOperation, DstInMemOp, SrcData, DstData, SrcDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector_, DstScalarPerVector_, SrcScalarStrideInVector, DstScalarStrideInVector, SrcResetCoordinateAfterRun, DstResetCoordinateAfterRun, IndexType, GatherDim, NumThreadScratch >::I8 = Number<8>{} |
|
staticconstexpr |
◆ nDim
template<typename SliceLengths, typename SrcElementwiseOperation, typename DstElementwiseOperation,
InMemoryDataOperationEnum DstInMemOp, typename SrcData, typename DstData, typename SrcDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder,
index_t SrcVectorDim,
index_t DstVectorDim,
index_t SrcScalarPerVector_,
index_t DstScalarPerVector_,
index_t SrcScalarStrideInVector,
index_t DstScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool DstResetCoordinateAfterRun, typename IndexType,
index_t GatherDim = 1,
index_t NumThreadScratch = 1>
| index_t ck::ThreadwiseTensorSliceTransfer_v3r1_gather< SliceLengths, SrcElementwiseOperation, DstElementwiseOperation, DstInMemOp, SrcData, DstData, SrcDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector_, DstScalarPerVector_, SrcScalarStrideInVector, DstScalarStrideInVector, SrcResetCoordinateAfterRun, DstResetCoordinateAfterRun, IndexType, GatherDim, NumThreadScratch >::nDim = SliceLengths::Size() |
|
staticconstexpr |
◆ PackedSize
template<typename SliceLengths, typename SrcElementwiseOperation, typename DstElementwiseOperation,
InMemoryDataOperationEnum DstInMemOp, typename SrcData, typename DstData, typename SrcDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder,
index_t SrcVectorDim,
index_t DstVectorDim,
index_t SrcScalarPerVector_,
index_t DstScalarPerVector_,
index_t SrcScalarStrideInVector,
index_t DstScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool DstResetCoordinateAfterRun, typename IndexType,
index_t GatherDim = 1,
index_t NumThreadScratch = 1>
| index_t ck::ThreadwiseTensorSliceTransfer_v3r1_gather< SliceLengths, SrcElementwiseOperation, DstElementwiseOperation, DstInMemOp, SrcData, DstData, SrcDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector_, DstScalarPerVector_, SrcScalarStrideInVector, DstScalarStrideInVector, SrcResetCoordinateAfterRun, DstResetCoordinateAfterRun, IndexType, GatherDim, NumThreadScratch >::PackedSize |
|
staticconstexpr |
Initial value:= []() {
return 2;
else
return 1;
}()
constexpr bool is_same_v
Definition type.hpp:283
Definition data_type.hpp:187
◆ SrcScalarPerVector
template<typename SliceLengths, typename SrcElementwiseOperation, typename DstElementwiseOperation,
InMemoryDataOperationEnum DstInMemOp, typename SrcData, typename DstData, typename SrcDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder,
index_t SrcVectorDim,
index_t DstVectorDim,
index_t SrcScalarPerVector_,
index_t DstScalarPerVector_,
index_t SrcScalarStrideInVector,
index_t DstScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool DstResetCoordinateAfterRun, typename IndexType,
index_t GatherDim = 1,
index_t NumThreadScratch = 1>
| auto ck::ThreadwiseTensorSliceTransfer_v3r1_gather< SliceLengths, SrcElementwiseOperation, DstElementwiseOperation, DstInMemOp, SrcData, DstData, SrcDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector_, DstScalarPerVector_, SrcScalarStrideInVector, DstScalarStrideInVector, SrcResetCoordinateAfterRun, DstResetCoordinateAfterRun, IndexType, GatherDim, NumThreadScratch >::SrcScalarPerVector = Number<SrcScalarPerVector_ / PackedSize>{} |
|
staticconstexpr |
The documentation for this struct was generated from the following file: