json_dump.hpp File Reference

json_dump.hpp File Reference#

Composable Kernel: json_dump.hpp File Reference
json_dump.hpp File Reference

Go to the source code of this file.

Classes

struct  has_warp_tile_members< T, typename >
struct  has_warp_tile_members< T, std::void_t< decltype(T::M_Warp_Tile), decltype(T::N_Warp_Tile), decltype(T::K_Warp_Tile)> >

Macros

#define START_JSON_DUMP_FILE(file_name)
#define END_JSON_DUMP_FILE()
#define ADD_KEY_VALUE(key, value)
#define ADD_PERF_TO_JSON(_time, tflops, gbytes)

Functions

template<typename ALayout, typename BLayout, typename CLayout, typename ADataType, typename BDataType, typename CDataType, typename GemmConfig, template< typename > typename DTypeTraits>
void dump_gemm_json_results (const std::string &json_filename, int M, int N, int K, int stride_A, int stride_B, int stride_C, bool persistent, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="gemm_basic")
void dump_batched_gemm_json_results (const std::string &json_filename, const std::string &op_name, int M, int N, int K, int stride_A, int stride_B, int stride_C, int batch_stride_A, int batch_stride_B, int batch_stride_C, int batch_count, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="batched_gemm_basic")
template<typename ALayout, typename BLayout, typename CLayout>
void dump_grouped_gemm_json_results (const std::string &json_filename, const std::string &op_name, int group_count, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="grouped_gemm")
void dump_flatmm_json_results (const std::string &json_filename, const std::string &datatype, int M, int N, int K, int stride_A, int stride_B, int stride_C, int kbatch, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="flatmm_basic")
void dump_gemm_multi_d_fp16_json_results (const std::string &json_filename, const std::string &op_name, int M, int N, int K, int StrideA, int StrideB, int StrideD0, int StrideD1, int StrideE, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="gemm_multi_d_fp16")
void dump_elementwise_json_results (const std::string &json_filename, const std::string &prec, int grid_size, int block_size, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="elementwise")
void dump_layernorm2d_fwd_json_results (const std::string &json_filename, const std::string &prec_i, const std::string &prec_o, const std::string &prec_sm, const std::string &prec_sy, int m, int n, int x_stride, int xr_stride, int y_stride, int yr_stride, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="layernorm2d_fwd")
template<typename DataType, template< typename > typename DTypeTraits>
void dump_reduce_json_results (const std::string &json_filename, int N, int C, int H, int W, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="reduce")
void dump_permute_json_results (const std::string &json_filename, const std::string &data_type, bool pass, float ave_time, float tflop, float gb_per_sec, const std::string &kernel_name="permute")
void dump_topk_softmax_json (const std::string &json_filename, const std::string &input_prec, const std::string &weight_prec, int tokens, int experts, int topk, int stride_input, int stride_output, float ave_time, float tflop, float gb_per_sec, bool pass, const std::string &kernel_name="topk_softmax")
void dump_rmsnorm2d_fwd_json (const std::string &json_filename, const std::string &prec_str, int m, int n, int x_stride, int xr_stride, int y_stride, int yr_stride, int use_model_sensitive_rmsnorm, float ave_time, float tflops, float gb_per_sec, bool pass, const std::string &kernel_name="rmsnorm2d_fwd")
void dump_add_rmsnorm2d_rdquant_fwd_json (const std::string &json_filename, const std::string &input_data_type, const std::string &quantized_data_type, int m, int n, int stride, float epsilon, float ave_time, float tflops, float gb_per_sec, bool pass, const std::string &kernel_name="add_rmsnorm2d_rdquant_fwd")
void dump_smoothquant_json (const std::string &json_filename, const std::string &prec_str, int m, int n, int x_stride, int y_stride, float ave_time, float tflops, float gb_per_sec, bool pass, const std::string &kernel_name="smoothquant")
void dump_moe_sorting_json (const std::string &json_filename, const std::string &index_prec, const std::string &weight_prec, const std::string &workspace_size, int dispatch_policy, int tokens, int num_experts, int topk, float ave_time, float tflops, float gb_per_sec, bool pass, const std::string &kernel_name="moe_sorting")
void dump_batched_transpose_json (const std::string &json_filename, int N, int C, int H, int W, const std::string &layout_in, const std::string &layout_out, const std::string &prec, float ave_time, float tflops, float gb_per_sec, bool pass, const std::string &kernel_name="batched_transpose")
void dump_moe_smoothquant_json (const std::string &json_filename, const std::string &prec_i, const std::string &prec_o, int tokens, int hidden_size, int stride, int experts, int topk, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="moe_smoothquant")
void dump_fused_moe_json (const std::string &json_filename, const std::string &api_str, const std::string &prec_str, int tokens, bool is_local_token, int local_tokens, int experts, int topk, int hidden_size, int intermediate_size, int stride, int block_m, int activation, bool gate_only, bool fused_quant, bool pass, float ave_time, float tflops, float tb_per_sec, const std::string &kernel_name="fused_moe")
void dump_fmha_fwd_json_results (const std::string &json_filename, const std::string &prec, const std::string &mode, const std::string &io_layout, int batch, int nhead, int nhead_k, int seqlen_qs, int seqlen_ks, int seqlen_kpads, int hdim_q, int hdim_v, float scale_s, float p_drop, bool lse, bool squant, const std::string &bias, const std::string &vlayout, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="fmha_fwd")
void dump_fmha_bwd_json_results (const std::string &json_filename, const std::string &data_type, const std::string &mode, const std::string &i_perm, const std::string &o_perm, int batch, int nhead, int nhead_k, int seqlen_q, int seqlen_k, int hdim_q, int hdim_v, float scale, const std::string &bias, bool use_dbias, float p_drop, bool s_randval, bool deterministic, const std::string &mask, int mask_left, int mask_right, int workspace_size, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="fmha_bwd")

Macro Definition Documentation

◆ ADD_KEY_VALUE

#define ADD_KEY_VALUE ( key,
value )

◆ ADD_PERF_TO_JSON

#define ADD_PERF_TO_JSON ( _time,
tflops,
gbytes )

◆ END_JSON_DUMP_FILE

#define END_JSON_DUMP_FILE ( )
Value:
std::cout << "JSON dump disabled, To enable, set CK_ENABLE_JSON_DUMP cmake option" << std::endl;

◆ START_JSON_DUMP_FILE

#define START_JSON_DUMP_FILE ( file_name)

Function Documentation

◆ dump_add_rmsnorm2d_rdquant_fwd_json()

void dump_add_rmsnorm2d_rdquant_fwd_json ( const std::string & json_filename,
const std::string & input_data_type,
const std::string & quantized_data_type,
int m,
int n,
int stride,
float epsilon,
float ave_time,
float tflops,
float gb_per_sec,
bool pass,
const std::string & kernel_name = "add_rmsnorm2d_rdquant_fwd" )

◆ dump_batched_gemm_json_results()

void dump_batched_gemm_json_results ( const std::string & json_filename,
const std::string & op_name,
int M,
int N,
int K,
int stride_A,
int stride_B,
int stride_C,
int batch_stride_A,
int batch_stride_B,
int batch_stride_C,
int batch_count,
bool pass,
float ave_time,
float tflops,
float gb_per_sec,
const std::string & kernel_name = "batched_gemm_basic" )

◆ dump_batched_transpose_json()

void dump_batched_transpose_json ( const std::string & json_filename,
int N,
int C,
int H,
int W,
const std::string & layout_in,
const std::string & layout_out,
const std::string & prec,
float ave_time,
float tflops,
float gb_per_sec,
bool pass,
const std::string & kernel_name = "batched_transpose" )

◆ dump_elementwise_json_results()

void dump_elementwise_json_results ( const std::string & json_filename,
const std::string & prec,
int grid_size,
int block_size,
float ave_time,
float tflops,
float gb_per_sec,
const std::string & kernel_name = "elementwise" )

◆ dump_flatmm_json_results()

void dump_flatmm_json_results ( const std::string & json_filename,
const std::string & datatype,
int M,
int N,
int K,
int stride_A,
int stride_B,
int stride_C,
int kbatch,
bool pass,
float ave_time,
float tflops,
float gb_per_sec,
const std::string & kernel_name = "flatmm_basic" )

◆ dump_fmha_bwd_json_results()

void dump_fmha_bwd_json_results ( const std::string & json_filename,
const std::string & data_type,
const std::string & mode,
const std::string & i_perm,
const std::string & o_perm,
int batch,
int nhead,
int nhead_k,
int seqlen_q,
int seqlen_k,
int hdim_q,
int hdim_v,
float scale,
const std::string & bias,
bool use_dbias,
float p_drop,
bool s_randval,
bool deterministic,
const std::string & mask,
int mask_left,
int mask_right,
int workspace_size,
bool pass,
float ave_time,
float tflops,
float gb_per_sec,
const std::string & kernel_name = "fmha_bwd" )

◆ dump_fmha_fwd_json_results()

void dump_fmha_fwd_json_results ( const std::string & json_filename,
const std::string & prec,
const std::string & mode,
const std::string & io_layout,
int batch,
int nhead,
int nhead_k,
int seqlen_qs,
int seqlen_ks,
int seqlen_kpads,
int hdim_q,
int hdim_v,
float scale_s,
float p_drop,
bool lse,
bool squant,
const std::string & bias,
const std::string & vlayout,
bool pass,
float ave_time,
float tflops,
float gb_per_sec,
const std::string & kernel_name = "fmha_fwd" )

◆ dump_fused_moe_json()

void dump_fused_moe_json ( const std::string & json_filename,
const std::string & api_str,
const std::string & prec_str,
int tokens,
bool is_local_token,
int local_tokens,
int experts,
int topk,
int hidden_size,
int intermediate_size,
int stride,
int block_m,
int activation,
bool gate_only,
bool fused_quant,
bool pass,
float ave_time,
float tflops,
float tb_per_sec,
const std::string & kernel_name = "fused_moe" )

◆ dump_gemm_json_results()

template<typename ALayout, typename BLayout, typename CLayout, typename ADataType, typename BDataType, typename CDataType, typename GemmConfig, template< typename > typename DTypeTraits>
void dump_gemm_json_results ( const std::string & json_filename,
int M,
int N,
int K,
int stride_A,
int stride_B,
int stride_C,
bool persistent,
bool pass,
float ave_time,
float tflops,
float gb_per_sec,
const std::string & kernel_name = "gemm_basic" )

◆ dump_gemm_multi_d_fp16_json_results()

void dump_gemm_multi_d_fp16_json_results ( const std::string & json_filename,
const std::string & op_name,
int M,
int N,
int K,
int StrideA,
int StrideB,
int StrideD0,
int StrideD1,
int StrideE,
bool pass,
float ave_time,
float tflops,
float gb_per_sec,
const std::string & kernel_name = "gemm_multi_d_fp16" )

◆ dump_grouped_gemm_json_results()

template<typename ALayout, typename BLayout, typename CLayout>
void dump_grouped_gemm_json_results ( const std::string & json_filename,
const std::string & op_name,
int group_count,
bool pass,
float ave_time,
float tflops,
float gb_per_sec,
const std::string & kernel_name = "grouped_gemm" )

◆ dump_layernorm2d_fwd_json_results()

void dump_layernorm2d_fwd_json_results ( const std::string & json_filename,
const std::string & prec_i,
const std::string & prec_o,
const std::string & prec_sm,
const std::string & prec_sy,
int m,
int n,
int x_stride,
int xr_stride,
int y_stride,
int yr_stride,
bool pass,
float ave_time,
float tflops,
float gb_per_sec,
const std::string & kernel_name = "layernorm2d_fwd" )

◆ dump_moe_smoothquant_json()

void dump_moe_smoothquant_json ( const std::string & json_filename,
const std::string & prec_i,
const std::string & prec_o,
int tokens,
int hidden_size,
int stride,
int experts,
int topk,
bool pass,
float ave_time,
float tflops,
float gb_per_sec,
const std::string & kernel_name = "moe_smoothquant" )

◆ dump_moe_sorting_json()

void dump_moe_sorting_json ( const std::string & json_filename,
const std::string & index_prec,
const std::string & weight_prec,
const std::string & workspace_size,
int dispatch_policy,
int tokens,
int num_experts,
int topk,
float ave_time,
float tflops,
float gb_per_sec,
bool pass,
const std::string & kernel_name = "moe_sorting" )

◆ dump_permute_json_results()

void dump_permute_json_results ( const std::string & json_filename,
const std::string & data_type,
bool pass,
float ave_time,
float tflop,
float gb_per_sec,
const std::string & kernel_name = "permute" )

◆ dump_reduce_json_results()

template<typename DataType, template< typename > typename DTypeTraits>
void dump_reduce_json_results ( const std::string & json_filename,
int N,
int C,
int H,
int W,
bool pass,
float ave_time,
float tflops,
float gb_per_sec,
const std::string & kernel_name = "reduce" )

◆ dump_rmsnorm2d_fwd_json()

void dump_rmsnorm2d_fwd_json ( const std::string & json_filename,
const std::string & prec_str,
int m,
int n,
int x_stride,
int xr_stride,
int y_stride,
int yr_stride,
int use_model_sensitive_rmsnorm,
float ave_time,
float tflops,
float gb_per_sec,
bool pass,
const std::string & kernel_name = "rmsnorm2d_fwd" )

◆ dump_smoothquant_json()

void dump_smoothquant_json ( const std::string & json_filename,
const std::string & prec_str,
int m,
int n,
int x_stride,
int y_stride,
float ave_time,
float tflops,
float gb_per_sec,
bool pass,
const std::string & kernel_name = "smoothquant" )

◆ dump_topk_softmax_json()

void dump_topk_softmax_json ( const std::string & json_filename,
const std::string & input_prec,
const std::string & weight_prec,
int tokens,
int experts,
int topk,
int stride_input,
int stride_output,
float ave_time,
float tflop,
float gb_per_sec,
bool pass,
const std::string & kernel_name = "topk_softmax" )