20 typename AScaleDataType,
22 typename BScaleDataType,
25 typename AMmaTileDesc,
26 typename BMmaTileDesc,
27 index_t ABlockTransferSrcScalarPerVector,
28 index_t BBlockTransferSrcScalarPerVector,
41template <
index_t ThreadBlockSize,
44 typename AScaleDataType,
46 typename BScaleDataType,
49 typename AMmaTileDesc,
50 typename BMmaTileDesc,
51 index_t ABlockTransferSrcScalarPerVector,
52 index_t BBlockTransferSrcScalarPerVector,
72 ABlockTransferSrcScalarPerVector,
73 BBlockTransferSrcScalarPerVector,
89 ABlockTransferSrcScalarPerVector,
90 BBlockTransferSrcScalarPerVector,
109 ABlockTransferSrcScalarPerVector,
110 BBlockTransferSrcScalarPerVector,
163 KPerBlock / ScaleBlockSize;
177 "A scale pack data type too large!");
179 "B scale pack data type too large!");
193 template <
bool HasMainLoop,
197 typename ABlockTransfer,
198 typename AGridBuffer,
199 typename ABlockBuffer,
200 typename ABlockTransferStep,
203 typename BBlockTransfer,
204 typename BGridBuffer,
205 typename BBlockBuffer,
206 typename BBlockTransferStep,
207 typename CThreadBuffer,
208 typename AScaleGridBuffer,
209 typename AScaleGridDesc,
210 typename AScaleThreadTransfer,
211 typename BScaleGridBuffer,
212 typename BScaleGridDesc,
213 typename BScaleThreadTransfer>
216 const AGridDesc& a_grid_desc,
217 const ABlockDesc& a_block_desc,
218 ABlockTransfer& a_blockwise_copy,
219 const AGridBuffer& a_grid_buf,
220 ABlockBuffer& a_block_buf,
221 const ABlockTransferStep& a_block_copy_step,
223 const BGridDesc& b_grid_desc,
224 const BBlockDesc& b_block_desc,
225 BBlockTransfer& b_blockwise_copy,
226 const BGridBuffer& b_grid_buf,
227 BBlockBuffer& b_block_buf,
228 const BBlockTransferStep& b_block_copy_step,
230 CThreadBuffer& c_thread_buf,
232 const AScaleGridDesc& a_scale_grid_desc,
233 AScaleThreadTransfer& a_scale_thread_copy,
234 const AScaleGridBuffer& a_scale_grid_buf,
235 const BScaleGridDesc& b_scale_grid_desc,
236 BScaleThreadTransfer& b_scale_thread_copy,
237 const BScaleGridBuffer& b_scale_grid_buf,
252 a_blockwise_copy.RunRead(a_grid_desc, a_grid_buf);
253 b_blockwise_copy.RunRead(b_grid_desc, b_grid_buf);
255 a_blockwise_copy.MoveSrcSliceWindow(a_grid_desc, a_block_copy_step);
256 b_blockwise_copy.MoveSrcSliceWindow(b_grid_desc, b_block_copy_step);
261 a_scale_thread_copy.Run(a_scale_grid_desc,
267 a_scale_thread_copy.MoveSrcSliceWindow(a_scale_grid_desc,
270 a_scale_thread_copy.MoveSrcSliceWindow(
275 a_scale_thread_copy.MoveSrcSliceWindow(
282 b_scale_thread_copy.Run(b_scale_grid_desc,
288 b_scale_thread_copy.MoveSrcSliceWindow(b_scale_grid_desc,
291 b_scale_thread_copy.MoveSrcSliceWindow(
297 b_scale_thread_copy.MoveSrcSliceWindow(
302 a_blockwise_copy.RunWrite(a_block_desc, a_block_buf);
303 b_blockwise_copy.RunWrite(b_block_desc, b_block_buf);
306 c_thread_buf.Clear();
309 if constexpr(HasMainLoop)
316 a_blockwise_copy.RunRead(a_grid_desc, a_grid_buf);
317 b_blockwise_copy.RunRead(b_grid_desc, b_grid_buf);
319 a_blockwise_copy.MoveSrcSliceWindow(a_grid_desc, a_block_copy_step);
320 b_blockwise_copy.MoveSrcSliceWindow(b_grid_desc, b_block_copy_step);
330 constexpr auto a_k_step_chunk =
353 constexpr auto b_k_step_chunk =
377 constexpr index_t a_scale_offset =
379 constexpr index_t b_scale_offset =
383 "Must have at least one scale per Xdlops "
391 a_scale_thread_vec.template AsType<AScaleDataType>()(s) =
396 b_scale_thread_vec.template AsType<BScaleDataType>()(s) =
403 constexpr auto kxdl = ikxdl + k0 *
KXdlPack;
409 a_thread_vec.template AsType<ComputeTypeA>()(ik) =
412 b_thread_vec.template AsType<ComputeTypeB>()(ik) =
417 using mfma_input_type_a =
422 using mfma_input_type_b =
427 using mfma_scale_input_type_a =
430 using mfma_scale_input_type_b =
440 a_thread_vec.template AsType<mfma_input_type_a>(),
442 .template AsType<mfma_scale_input_type_a>(),
443 b_thread_vec.template AsType<mfma_input_type_b>(),
445 .template AsType<mfma_scale_input_type_b>(),
446 c_thread_buf.GetVectorTypeReference(
458 a_scale_thread_copy.Run(a_scale_grid_desc,
464 a_scale_thread_copy.MoveSrcSliceWindow(a_scale_grid_desc,
467 a_scale_thread_copy.MoveSrcSliceWindow(
472 a_scale_thread_copy.MoveSrcSliceWindow(
479 b_scale_thread_copy.Run(b_scale_grid_desc,
485 b_scale_thread_copy.MoveSrcSliceWindow(b_scale_grid_desc,
488 b_scale_thread_copy.MoveSrcSliceWindow(
494 b_scale_thread_copy.MoveSrcSliceWindow(
499 a_blockwise_copy.RunWrite(a_block_desc, a_block_buf);
500 b_blockwise_copy.RunWrite(b_block_desc, b_block_buf);
503 }
while(i < (num_loop - 1));
516 constexpr auto a_k_step_chunk =
539 constexpr auto b_k_step_chunk =
563 constexpr index_t a_scale_offset =
565 constexpr index_t b_scale_offset =
569 "Must have at least one scale per Xdlops "
577 a_scale_thread_vec.template AsType<AScaleDataType>()(s) =
582 b_scale_thread_vec.template AsType<BScaleDataType>()(s) =
589 constexpr auto kxdl = ikxdl + k0 *
KXdlPack;
595 a_thread_vec.template AsType<ComputeTypeA>()(ik) =
598 b_thread_vec.template AsType<ComputeTypeB>()(ik) =
603 using mfma_input_type_a =
608 using mfma_input_type_b =
613 using mfma_scale_input_type_a =
616 using mfma_scale_input_type_b =
626 a_thread_vec.template AsType<mfma_input_type_a>(),
628 .template AsType<mfma_scale_input_type_a>(),
629 b_thread_vec.template AsType<mfma_input_type_b>(),
631 .template AsType<mfma_scale_input_type_b>(),
646 Number<KRepeat / KXdlPack>{},
653 Number<KRepeat / KXdlPack>{},
657 using Base::a_thread_copy_;
658 using Base::a_thread_desc_;
659 using Base::b_thread_copy_;
660 using Base::b_thread_desc_;
661 using Base::c_thread_desc_;
__host__ __device__ constexpr auto make_multi_index(Xs &&... xs)
Definition array_multi_index.hpp:15
__host__ __device__ constexpr auto make_static_buffer(Number< N >)
Definition static_buffer.hpp:186
int32_t index_t
Definition ck.hpp:299
integral_constant< index_t, N > Number
Definition number.hpp:12
TailNumber
Definition blkgemmpipe_scheduler.hpp:31
@ Even
Definition blkgemmpipe_scheduler.hpp:34
@ Odd
Definition blkgemmpipe_scheduler.hpp:33
@ Full
Definition blkgemmpipe_scheduler.hpp:49
BlockGemmPipelineScheduler
Definition blkgemmpipe_scheduler.hpp:25
@ Intrawave
Definition blkgemmpipe_scheduler.hpp:26
__host__ __device__ constexpr auto make_naive_tensor_descriptor_packed(const Tuple< Lengths... > &lengths)
Definition tensor_descriptor_helper.hpp:101
__host__ __device__ constexpr auto make_tuple(Xs &&... xs)
Definition utility/tuple.hpp:211
__device__ void block_sync_lds()
Definition synchronization.hpp:16
ck::BlockwiseGemmXdlops_mx_pipeline_base< ThreadBlockSize, ADataType, BDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::GetWaveIdx static __device__ auto GetWaveIdx()
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:118
ck::BlockwiseGemmXdlops_mx_pipeline_base< ThreadBlockSize, ADataType, BDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::AccType float AccType
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:36
ck::BlockwiseGemmXdlops_mx_pipeline_base< ThreadBlockSize, ADataType, BDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::HotLoopInstList ck::BlockwiseGemmXdlops_pipeline_hotloop_inst< BlockSize, MPerBlock, NPerBlock, KPerBlock, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, A_K1, B_K1, A_K1, B_K1, MRepeat, NRepeat, MPerXDL, NPerXDL, xdlops_gemm.KPerXdlops,(packed_size_v< ComputeTypeA > > 1||packed_size_v< ComputeTypeB > > 1)> HotLoopInstList
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:88
ck::BlockwiseGemmXdlops_mx_pipeline_base< ThreadBlockSize, ADataType, BDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::ComputeTypeA ADataType ComputeTypeA
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:34
ck::BlockwiseGemmXdlops_mx_pipeline_base< ThreadBlockSize, ADataType, BDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::MakeCGridDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 __host__ static __device__ constexpr auto MakeCGridDescriptor_M0_N0_M1_N1_M2_M3_M4_N2(const CGridDesc_M_N &c_grid_desc_m_n)
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:344
ck::BlockwiseGemmXdlops_mx_pipeline_base< ThreadBlockSize, ADataType, BDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::AMmaKStride static constexpr index_t AMmaKStride
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:68
ck::BlockwiseGemmXdlops_mx_pipeline_base< ThreadBlockSize, ADataType, BDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::MWaves static constexpr index_t MWaves
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:49
ck::BlockwiseGemmXdlops_mx_pipeline_base< ThreadBlockSize, ADataType, BDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::GetCThreadDescriptor_M0_N0_M1_N1_M2_N2_N3_N4 __host__ static __device__ constexpr auto GetCThreadDescriptor_M0_N0_M1_N1_M2_N2_N3_N4()
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:220
ck::BlockwiseGemmXdlops_mx_pipeline_base< ThreadBlockSize, ADataType, BDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::GetCThreadDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 __host__ static __device__ constexpr auto GetCThreadDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2()
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:269
ck::BlockwiseGemmXdlops_mx_pipeline_base< ThreadBlockSize, ADataType, BDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::b_block_desc_n0_n1_n2_n3_k static constexpr BMmaTileDesc b_block_desc_n0_n1_n2_n3_k
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:382
ck::BlockwiseGemmXdlops_mx_pipeline_base< ThreadBlockSize, ADataType, BDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::GetCBlockDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 __host__ static __device__ constexpr auto GetCBlockDescriptor_M0_N0_M1_N1_M2_M3_M4_N2()
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:297
ck::BlockwiseGemmXdlops_mx_pipeline_base< ThreadBlockSize, ADataType, BDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::Tuple5 decltype(CalculateAThreadOriginDataIndex()) Tuple5
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:184
ck::BlockwiseGemmXdlops_mx_pipeline_base< ThreadBlockSize, ADataType, BDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::I0 static constexpr auto I0
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:41
ck::BlockwiseGemmXdlops_mx_pipeline_base< ThreadBlockSize, ADataType, BDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::I1 static constexpr auto I1
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:42
ck::BlockwiseGemmXdlops_mx_pipeline_base< ThreadBlockSize, ADataType, BDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::KRepeat static constexpr index_t KRepeat
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:79
ck::BlockwiseGemmXdlops_mx_pipeline_base< ThreadBlockSize, ADataType, BDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::CalculateCThreadOriginDataIndex static __device__ auto CalculateCThreadOriginDataIndex(Number< m0 >, Number< n0 >, Number< xdlops_i >, Number< blk_i >)
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:154
ck::BlockwiseGemmXdlops_mx_pipeline_base< ThreadBlockSize, ADataType, BDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::MXdlPack static constexpr index_t MXdlPack
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:84
ck::BlockwiseGemmXdlops_mx_pipeline_base< ThreadBlockSize, ADataType, BDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::GetCBlockDescriptor_M0_N0_M1_N1_M2_N2_N3_N4 __host__ static __device__ constexpr auto GetCBlockDescriptor_M0_N0_M1_N1_M2_N2_N3_N4()
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:283
ck::BlockwiseGemmXdlops_mx_pipeline_base< ThreadBlockSize, ADataType, BDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::MakeCGridDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 __host__ static __device__ constexpr auto MakeCGridDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2(const CGridDesc_G_M_N &c_grid_desc_g_m_n)
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:361
ck::BlockwiseGemmXdlops_mx_pipeline_base< ThreadBlockSize, ADataType, BDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::xdlops_gemm static constexpr auto xdlops_gemm
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:60
ck::BlockwiseGemmXdlops_mx_pipeline_base< ThreadBlockSize, ADataType, BDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::a_block_desc_m0_m1_m2_m3_k static constexpr AMmaTileDesc a_block_desc_m0_m1_m2_m3_k
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:381
ck::BlockwiseGemmXdlops_mx_pipeline_base< ThreadBlockSize, ADataType, BDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::KThreadChunk static constexpr index_t KThreadChunk
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:74
ck::BlockwiseGemmXdlops_mx_pipeline_base< ThreadBlockSize, ADataType, BDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::KXdlPack static constexpr index_t KXdlPack
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:86
ck::BlockwiseGemmXdlops_mx_pipeline_base< ThreadBlockSize, ADataType, BDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::GetCThreadDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 __host__ static __device__ constexpr auto GetCThreadDescriptor_M0_N0_M1_N1_M2_M3_M4_N2()
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:234
ck::BlockwiseGemmXdlops_mx_pipeline_base< ThreadBlockSize, ADataType, BDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::GetCThreadBuffer __host__ __device__ constexpr auto & GetCThreadBuffer()
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:116
ck::BlockwiseGemmXdlops_mx_pipeline_base< ThreadBlockSize, ADataType, BDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::ComputeTypeB BDataType ComputeTypeB
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:35
ck::BlockwiseGemmXdlops_mx_pipeline_base< ThreadBlockSize, ADataType, BDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::APackedSize static constexpr index_t APackedSize
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:38
ck::BlockwiseGemmXdlops_mx_pipeline_base< ThreadBlockSize, ADataType, BDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::BlockwiseGemmXdlops_mx_pipeline_base __host__ __device__ BlockwiseGemmXdlops_mx_pipeline_base(Tuple5 a_origin=CalculateAThreadOriginDataIndex(), Tuple5 b_origin=CalculateBThreadOriginDataIndex())
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:204
ck::BlockwiseGemmXdlops_mx_pipeline_base< ThreadBlockSize, ADataType, BDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::BMmaKStride static constexpr index_t BMmaKStride
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:69
ck::BlockwiseGemmXdlops_mx_pipeline_base< ThreadBlockSize, ADataType, BDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::NXdlPack static constexpr index_t NXdlPack
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:85
ck::BlockwiseGemmXdlops_mx_pipeline_base< ThreadBlockSize, ADataType, BDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::GetCBlockDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 __host__ static __device__ constexpr auto GetCBlockDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2()
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:327
ck::BlockwiseGemmXdlops_mx_pipeline_base< ThreadBlockSize, ADataType, BDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::NWaves static constexpr index_t NWaves
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:50
ck::BlockwiseGemmXdlops_mx_pipeline_base< ThreadBlockSize, ADataType, BDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::WaveSize static constexpr index_t WaveSize
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:51
ck::BlockwiseGemmXdlops_mx_pipeline_base< ThreadBlockSize, ADataType, BDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::BPackedSize static constexpr index_t BPackedSize
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:39
ck::BlockwiseGemmXdlops_pipeline_mx_moe_nbs_v1< BlockGemmPipelineScheduler::Intrawave, ThreadBlockSize, ScaleBlockSize, ADataType, AScaleDataType, BDataType, BScaleDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::AccType typename Base::AccType AccType
Definition blockwise_gemm_pipeline_xdlops_mx_moe_nbs_v1.hpp:153
ck::BlockwiseGemmXdlops_pipeline_mx_moe_nbs_v1< BlockGemmPipelineScheduler::Intrawave, ThreadBlockSize, ScaleBlockSize, ADataType, AScaleDataType, BDataType, BScaleDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::b_thread_copy_ BThreadCopy b_thread_copy_
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:424
ck::BlockwiseGemmXdlops_pipeline_mx_moe_nbs_v1< BlockGemmPipelineScheduler::Intrawave, ThreadBlockSize, ScaleBlockSize, ADataType, AScaleDataType, BDataType, BScaleDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::BlockHasHotloop static __host__ constexpr bool BlockHasHotloop(index_t num_loop)
Definition blockwise_gemm_pipeline_xdlops_mx_moe_nbs_v1.hpp:183
ck::BlockwiseGemmXdlops_pipeline_mx_moe_nbs_v1< BlockGemmPipelineScheduler::Intrawave, ThreadBlockSize, ScaleBlockSize, ADataType, AScaleDataType, BDataType, BScaleDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::PrefillStages static constexpr index_t PrefillStages
Definition blockwise_gemm_pipeline_xdlops_mx_moe_nbs_v1.hpp:159
ck::BlockwiseGemmXdlops_pipeline_mx_moe_nbs_v1< BlockGemmPipelineScheduler::Intrawave, ThreadBlockSize, ScaleBlockSize, ADataType, AScaleDataType, BDataType, BScaleDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::mx_scale_t e8m0_bexp_t mx_scale_t
Definition blockwise_gemm_pipeline_xdlops_mx_moe_nbs_v1.hpp:173
ck::BlockwiseGemmXdlops_pipeline_mx_moe_nbs_v1< BlockGemmPipelineScheduler::Intrawave, ThreadBlockSize, ScaleBlockSize, ADataType, AScaleDataType, BDataType, BScaleDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::I1 static constexpr auto I1
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:42
ck::BlockwiseGemmXdlops_pipeline_mx_moe_nbs_v1< BlockGemmPipelineScheduler::Intrawave, ThreadBlockSize, ScaleBlockSize, ADataType, AScaleDataType, BDataType, BScaleDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::MXdlPack static constexpr index_t MXdlPack
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:84
ck::BlockwiseGemmXdlops_pipeline_mx_moe_nbs_v1< BlockGemmPipelineScheduler::Intrawave, ThreadBlockSize, ScaleBlockSize, ADataType, AScaleDataType, BDataType, BScaleDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::MWaves static constexpr index_t MWaves
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:49
ck::BlockwiseGemmXdlops_pipeline_mx_moe_nbs_v1< BlockGemmPipelineScheduler::Intrawave, ThreadBlockSize, ScaleBlockSize, ADataType, AScaleDataType, BDataType, BScaleDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::ScalesPerXdlopsRun static constexpr auto ScalesPerXdlopsRun
Definition blockwise_gemm_pipeline_xdlops_mx_moe_nbs_v1.hpp:166
ck::BlockwiseGemmXdlops_pipeline_mx_moe_nbs_v1< BlockGemmPipelineScheduler::Intrawave, ThreadBlockSize, ScaleBlockSize, ADataType, AScaleDataType, BDataType, BScaleDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::a_scale_thread_desc static constexpr auto a_scale_thread_desc
Definition blockwise_gemm_pipeline_xdlops_mx_moe_nbs_v1.hpp:644
ck::BlockwiseGemmXdlops_pipeline_mx_moe_nbs_v1< BlockGemmPipelineScheduler::Intrawave, ThreadBlockSize, ScaleBlockSize, ADataType, AScaleDataType, BDataType, BScaleDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::a_block_desc_m0_m1_m2_m3_k static constexpr AMmaTileDesc a_block_desc_m0_m1_m2_m3_k
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:381
ck::BlockwiseGemmXdlops_pipeline_mx_moe_nbs_v1< BlockGemmPipelineScheduler::Intrawave, ThreadBlockSize, ScaleBlockSize, ADataType, AScaleDataType, BDataType, BScaleDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::I0 static constexpr auto I0
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:41
ck::BlockwiseGemmXdlops_pipeline_mx_moe_nbs_v1< BlockGemmPipelineScheduler::Intrawave, ThreadBlockSize, ScaleBlockSize, ADataType, AScaleDataType, BDataType, BScaleDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::Run __device__ void Run(const AGridDesc &a_grid_desc, const ABlockDesc &a_block_desc, ABlockTransfer &a_blockwise_copy, const AGridBuffer &a_grid_buf, ABlockBuffer &a_block_buf, const ABlockTransferStep &a_block_copy_step, const BGridDesc &b_grid_desc, const BBlockDesc &b_block_desc, BBlockTransfer &b_blockwise_copy, const BGridBuffer &b_grid_buf, BBlockBuffer &b_block_buf, const BBlockTransferStep &b_block_copy_step, CThreadBuffer &c_thread_buf, const AScaleGridDesc &a_scale_grid_desc, AScaleThreadTransfer &a_scale_thread_copy, const AScaleGridBuffer &a_scale_grid_buf, const BScaleGridDesc &b_scale_grid_desc, BScaleThreadTransfer &b_scale_thread_copy, const BScaleGridBuffer &b_scale_grid_buf, index_t num_loop) const
Definition blockwise_gemm_pipeline_xdlops_mx_moe_nbs_v1.hpp:214
ck::BlockwiseGemmXdlops_pipeline_mx_moe_nbs_v1< BlockGemmPipelineScheduler::Intrawave, ThreadBlockSize, ScaleBlockSize, ADataType, AScaleDataType, BDataType, BScaleDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::KThreadChunk static constexpr index_t KThreadChunk
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:74
ck::BlockwiseGemmXdlops_pipeline_mx_moe_nbs_v1< BlockGemmPipelineScheduler::Intrawave, ThreadBlockSize, ScaleBlockSize, ADataType, AScaleDataType, BDataType, BScaleDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::scale_pack_size_a static constexpr auto scale_pack_size_a
Definition blockwise_gemm_pipeline_xdlops_mx_moe_nbs_v1.hpp:174
ck::BlockwiseGemmXdlops_pipeline_mx_moe_nbs_v1< BlockGemmPipelineScheduler::Intrawave, ThreadBlockSize, ScaleBlockSize, ADataType, AScaleDataType, BDataType, BScaleDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::Base BlockwiseGemmXdlops_mx_pipeline_base< ThreadBlockSize, ADataType, BDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack > Base
Definition blockwise_gemm_pipeline_xdlops_mx_moe_nbs_v1.hpp:102
ck::BlockwiseGemmXdlops_pipeline_mx_moe_nbs_v1< BlockGemmPipelineScheduler::Intrawave, ThreadBlockSize, ScaleBlockSize, ADataType, AScaleDataType, BDataType, BScaleDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::a_thread_copy_ AThreadCopy a_thread_copy_
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:423
ck::BlockwiseGemmXdlops_pipeline_mx_moe_nbs_v1< BlockGemmPipelineScheduler::Intrawave, ThreadBlockSize, ScaleBlockSize, ADataType, AScaleDataType, BDataType, BScaleDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::b_block_desc_n0_n1_n2_n3_k static constexpr BMmaTileDesc b_block_desc_n0_n1_n2_n3_k
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:382
ck::BlockwiseGemmXdlops_pipeline_mx_moe_nbs_v1< BlockGemmPipelineScheduler::Intrawave, ThreadBlockSize, ScaleBlockSize, ADataType, AScaleDataType, BDataType, BScaleDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::BlockLoopTailNum static __host__ constexpr TailNumber BlockLoopTailNum(index_t num_loop)
Definition blockwise_gemm_pipeline_xdlops_mx_moe_nbs_v1.hpp:188
ck::BlockwiseGemmXdlops_pipeline_mx_moe_nbs_v1< BlockGemmPipelineScheduler::Intrawave, ThreadBlockSize, ScaleBlockSize, ADataType, AScaleDataType, BDataType, BScaleDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::b_scale_thread_desc static constexpr auto b_scale_thread_desc
Definition blockwise_gemm_pipeline_xdlops_mx_moe_nbs_v1.hpp:651
ck::BlockwiseGemmXdlops_pipeline_mx_moe_nbs_v1< BlockGemmPipelineScheduler::Intrawave, ThreadBlockSize, ScaleBlockSize, ADataType, AScaleDataType, BDataType, BScaleDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::ScalesPerXdlopsRunPerThread static constexpr auto ScalesPerXdlopsRunPerThread
Definition blockwise_gemm_pipeline_xdlops_mx_moe_nbs_v1.hpp:170
ck::BlockwiseGemmXdlops_pipeline_mx_moe_nbs_v1< BlockGemmPipelineScheduler::Intrawave, ThreadBlockSize, ScaleBlockSize, ADataType, AScaleDataType, BDataType, BScaleDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::b_thread_desc_ static constexpr auto b_thread_desc_
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:392
ck::BlockwiseGemmXdlops_pipeline_mx_moe_nbs_v1< BlockGemmPipelineScheduler::Intrawave, ThreadBlockSize, ScaleBlockSize, ADataType, AScaleDataType, BDataType, BScaleDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::c_thread_desc_ static constexpr auto c_thread_desc_
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:396
ck::BlockwiseGemmXdlops_pipeline_mx_moe_nbs_v1< BlockGemmPipelineScheduler::Intrawave, ThreadBlockSize, ScaleBlockSize, ADataType, AScaleDataType, BDataType, BScaleDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::KXdlPack static constexpr index_t KXdlPack
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:86
ck::BlockwiseGemmXdlops_pipeline_mx_moe_nbs_v1< BlockGemmPipelineScheduler::Intrawave, ThreadBlockSize, ScaleBlockSize, ADataType, AScaleDataType, BDataType, BScaleDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::ComputeTypeB typename Base::ComputeTypeB ComputeTypeB
Definition blockwise_gemm_pipeline_xdlops_mx_moe_nbs_v1.hpp:156
ck::BlockwiseGemmXdlops_pipeline_mx_moe_nbs_v1< BlockGemmPipelineScheduler::Intrawave, ThreadBlockSize, ScaleBlockSize, ADataType, AScaleDataType, BDataType, BScaleDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::NXdlPack static constexpr index_t NXdlPack
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:85
ck::BlockwiseGemmXdlops_pipeline_mx_moe_nbs_v1< BlockGemmPipelineScheduler::Intrawave, ThreadBlockSize, ScaleBlockSize, ADataType, AScaleDataType, BDataType, BScaleDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::a_scale_thread_vec_size static constexpr auto a_scale_thread_vec_size
Definition blockwise_gemm_pipeline_xdlops_mx_moe_nbs_v1.hpp:180
ck::BlockwiseGemmXdlops_pipeline_mx_moe_nbs_v1< BlockGemmPipelineScheduler::Intrawave, ThreadBlockSize, ScaleBlockSize, ADataType, AScaleDataType, BDataType, BScaleDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::Tuple5 typename Base::Tuple5 Tuple5
Definition blockwise_gemm_pipeline_xdlops_mx_moe_nbs_v1.hpp:154
ck::BlockwiseGemmXdlops_pipeline_mx_moe_nbs_v1< BlockGemmPipelineScheduler::Intrawave, ThreadBlockSize, ScaleBlockSize, ADataType, AScaleDataType, BDataType, BScaleDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::KRepeat static constexpr index_t KRepeat
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:79
ck::BlockwiseGemmXdlops_pipeline_mx_moe_nbs_v1< BlockGemmPipelineScheduler::Intrawave, ThreadBlockSize, ScaleBlockSize, ADataType, AScaleDataType, BDataType, BScaleDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::xdlops_gemm static constexpr auto xdlops_gemm
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:60
ck::BlockwiseGemmXdlops_pipeline_mx_moe_nbs_v1< BlockGemmPipelineScheduler::Intrawave, ThreadBlockSize, ScaleBlockSize, ADataType, AScaleDataType, BDataType, BScaleDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::a_thread_desc_ static constexpr auto a_thread_desc_
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:388
ck::BlockwiseGemmXdlops_pipeline_mx_moe_nbs_v1< BlockGemmPipelineScheduler::Intrawave, ThreadBlockSize, ScaleBlockSize, ADataType, AScaleDataType, BDataType, BScaleDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::GlobalBufferNum static constexpr index_t GlobalBufferNum
Definition blockwise_gemm_pipeline_xdlops_mx_moe_nbs_v1.hpp:160
ck::BlockwiseGemmXdlops_pipeline_mx_moe_nbs_v1< BlockGemmPipelineScheduler::Intrawave, ThreadBlockSize, ScaleBlockSize, ADataType, AScaleDataType, BDataType, BScaleDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::APackedSize static constexpr index_t APackedSize
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:38
ck::BlockwiseGemmXdlops_pipeline_mx_moe_nbs_v1< BlockGemmPipelineScheduler::Intrawave, ThreadBlockSize, ScaleBlockSize, ADataType, AScaleDataType, BDataType, BScaleDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::ComputeTypeA typename Base::ComputeTypeA ComputeTypeA
Definition blockwise_gemm_pipeline_xdlops_mx_moe_nbs_v1.hpp:155
ck::BlockwiseGemmXdlops_pipeline_mx_moe_nbs_v1< BlockGemmPipelineScheduler::Intrawave, ThreadBlockSize, ScaleBlockSize, ADataType, AScaleDataType, BDataType, BScaleDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::b_scale_thread_vec_size static constexpr auto b_scale_thread_vec_size
Definition blockwise_gemm_pipeline_xdlops_mx_moe_nbs_v1.hpp:181
ck::BlockwiseGemmXdlops_pipeline_mx_moe_nbs_v1< BlockGemmPipelineScheduler::Intrawave, ThreadBlockSize, ScaleBlockSize, ADataType, AScaleDataType, BDataType, BScaleDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::ScalesPerKBlockSize static constexpr auto ScalesPerKBlockSize
Definition blockwise_gemm_pipeline_xdlops_mx_moe_nbs_v1.hpp:162
ck::BlockwiseGemmXdlops_pipeline_mx_moe_nbs_v1< BlockGemmPipelineScheduler::Intrawave, ThreadBlockSize, ScaleBlockSize, ADataType, AScaleDataType, BDataType, BScaleDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::BPackedSize static constexpr index_t BPackedSize
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:39
ck::BlockwiseGemmXdlops_pipeline_mx_moe_nbs_v1< BlockGemmPipelineScheduler::Intrawave, ThreadBlockSize, ScaleBlockSize, ADataType, AScaleDataType, BDataType, BScaleDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::scale_pack_size_b static constexpr auto scale_pack_size_b
Definition blockwise_gemm_pipeline_xdlops_mx_moe_nbs_v1.hpp:175
ck::BlockwiseGemmXdlops_pipeline_mx_moe_nbs_v1< BlockGemmPipelineScheduler::Intrawave, ThreadBlockSize, ScaleBlockSize, ADataType, AScaleDataType, BDataType, BScaleDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::PrefetchStages static constexpr index_t PrefetchStages
Definition blockwise_gemm_pipeline_xdlops_mx_moe_nbs_v1.hpp:158
ck::BlockwiseGemmXdlops_pipeline_mx_moe_nbs_v1< BlockGemmPipelineScheduler::Intrawave, ThreadBlockSize, ScaleBlockSize, ADataType, AScaleDataType, BDataType, BScaleDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::NWaves static constexpr index_t NWaves
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:50
Definition blockwise_gemm_pipeline_xdlops_mx_moe_nbs_v1.hpp:38
Unsigned representation of a conventional biased Float32 exponent.
Definition utility/e8m0.hpp:26
Definition functional2.hpp:33
Definition dtype_vector.hpp:10