BlockFmhaV3PipelineDefaultPolicy Struct Reference#
#include <block_fmha_fwd_v3_pipeline_default_policy.hpp>
Static Public Member Functions | |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | GetAlignmentQ () |
| template<typename Problem> | |
| static CK_TILE_DEVICE constexpr auto | GetAlignmentK () |
| template<typename Problem> | |
| static CK_TILE_DEVICE constexpr auto | GetAlignmentV () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | GetAlignmentO () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | GetSmemKPackK () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | GetSmemVPackK () |
| template<typename Problem> | |
| static CK_TILE_DEVICE constexpr auto | MakeKDramTileDistribution () |
| template<typename Problem> | |
| static CK_TILE_DEVICE constexpr auto | MakeVDramTileDistribution () |
| template<typename Problem> | |
| static CK_TILE_DEVICE constexpr auto | MakeQRegTileDistribution () |
| template<typename Problem> | |
| static CK_TILE_DEVICE constexpr auto | MakeKRegTileDistribution () |
| template<typename Problem> | |
| static CK_TILE_DEVICE constexpr auto | MakePRegTileDistribution () |
| template<typename Problem> | |
| static CK_TILE_DEVICE constexpr auto | MakeVRegTileDistribution () |
| template<typename Problem> | |
| static CK_TILE_DEVICE constexpr auto | GetQKBlockGemm () |
| template<typename Problem> | |
| static CK_TILE_DEVICE constexpr auto | GetPVBlockGemm () |
| template<typename Problem, ck_tile::index_t IBuf = 0> | |
| static CK_TILE_DEVICE constexpr auto | MakeKLdsStoreBlockDescriptor (ck_tile::number< IBuf >=ck_tile::number< 0 >{}) |
| template<typename Problem> | |
| static CK_TILE_DEVICE constexpr auto | MakeKLdsLoadBlockDescriptor () |
| template<typename Problem> | |
| static CK_TILE_DEVICE constexpr auto | GetSingleSmemElementSpaceSize () |
| template<typename Problem, ck_tile::index_t IBuf = 0> | |
| static CK_TILE_DEVICE constexpr auto | MakeVLdsStoreBlockDescriptor (ck_tile::number< IBuf >=ck_tile::number< 0 >{}) |
| template<typename Problem> | |
| static CK_TILE_DEVICE constexpr auto | MakeVLdsLoadBlockDescriptor () |
| template<typename Problem> | |
| static CK_TILE_DEVICE constexpr ck_tile::index_t | GetSmemSizeKV () |
| template<typename Problem> | |
| static CK_TILE_DEVICE constexpr ck_tile::index_t | GetSmemSize () |
Static Public Attributes | |
| static constexpr ck_tile::index_t | NumWarpPerGroup = 4 |
| static constexpr ck_tile::index_t | NumThreadPerWarpGroup |
| static constexpr ck_tile::index_t | kKLdsPadInBytes = 4 * 4 |
| static constexpr ck_tile::index_t | kVLdsPadInBytes = 4 * 16 |
Member Function Documentation
◆ GetAlignmentK()
|
inlinestaticconstexpr |
◆ GetAlignmentO()
|
inlinestaticconstexpr |
◆ GetAlignmentQ()
|
inlinestaticconstexpr |
◆ GetAlignmentV()
|
inlinestaticconstexpr |
◆ GetPVBlockGemm()
|
inlinestaticconstexpr |
NOTICE: in order to use load_tile_transpose() later for V tiles, we have to pass WGAttrNumAccessEnum::Double instead of WGAttrNumAccessEnum::Single
◆ GetQKBlockGemm()
|
inlinestaticconstexpr |
NOTICE: in order to use load_tile_transpose() later for V tile, we cannot use WarpGemmMfmaF16F16F32M32N32K16SwizzleBTransposedCDistribution here
NOTICE: in order to use load_tile_transpose() later for V tile, we cannot use WarpGemmMfmaBf16Bf16F32M32N32K16SwizzleBTransposedCDistribution here
◆ GetSingleSmemElementSpaceSize()
|
inlinestaticconstexpr |
◆ GetSmemKPackK()
|
inlinestaticconstexpr |
◆ GetSmemSize()
|
inlinestaticconstexpr |
◆ GetSmemSizeKV()
|
inlinestaticconstexpr |
TODO: override GetSingleSmemElementSpaceSize() to align with MakeKLdsBlockDescriptor() & MakeVLdsBlockDescriptor()
◆ GetSmemVPackK()
|
inlinestaticconstexpr |
◆ MakeKDramTileDistribution()
|
inlinestaticconstexpr |
◆ MakeKLdsLoadBlockDescriptor()
|
inlinestaticconstexpr |
◆ MakeKLdsStoreBlockDescriptor()
|
inlinestaticconstexpr |
◆ MakeKRegTileDistribution()
|
inlinestaticconstexpr |
◆ MakePRegTileDistribution()
|
inlinestaticconstexpr |
◆ MakeQRegTileDistribution()
|
inlinestaticconstexpr |
◆ MakeVDramTileDistribution()
|
inlinestaticconstexpr |
◆ MakeVLdsLoadBlockDescriptor()
|
inlinestaticconstexpr |
FIXME: rename the kNPerBlock & kKPerBlock since the kN1 is congtigous dimension
◆ MakeVLdsStoreBlockDescriptor()
|
inlinestaticconstexpr |
FIXME: rename the kNPerBlock & kKPerBlock since the kN1 is congtigous dimension
◆ MakeVRegTileDistribution()
|
inlinestaticconstexpr |
Member Data Documentation
◆ kKLdsPadInBytes
|
staticconstexpr |
◆ kVLdsPadInBytes
|
staticconstexpr |
◆ NumThreadPerWarpGroup
|
staticconstexpr |
◆ NumWarpPerGroup
|
staticconstexpr |
The documentation for this struct was generated from the following file: