moe_flatmm_pipeline_agmem_bgmem_creg.hpp Source File#
moe_flatmm_pipeline_agmem_bgmem_creg.hpp
Go to the documentation of this file.
Definition tile/core/algorithm/cluster_descriptor.hpp:13
remove_cv_t< std::remove_reference_t< T > > remove_cvref_t
Definition type_traits.hpp:21
CK_TILE_DEVICE auto tile_elementwise_in(const InElementFunc &in_element_func, const InTensor &... in_dstr_tensors)
Definition tile_elementwise.hpp:40
CK_TILE_HOST_DEVICE constexpr auto make_tensor_view(DataType *__restrict__ p, const tensor_descriptor< Ts... > &desc)
Definition tensor_view.hpp:452
CK_TILE_DEVICE index_t get_warp_id(bool_constant< ReturnSgpr >={})
Definition arch.hpp:104
CK_TILE_DEVICE void tile_elementwise_inout(const InOutElementFunc &inout_element_func, InOutDstrTensors &... inout_dstr_tensors)
Definition tile_elementwise.hpp:23
auto concat(const Ts &... xs) -> std::enable_if_t<!AllConvertibleToStringView< Ts... >, std::string >
Definition concat.hpp:43
CK_TILE_HOST_DEVICE constexpr auto merge_sequences(Seqs...)
Definition tile/core/container/sequence.hpp:826
CK_TILE_DEVICE constexpr auto make_tile_window(null_tensor_view, const WindowLengths &window_lengths, const multi_index< WindowLengths::size()> &, Ts &&...)
Definition null_tile_window.hpp:75
CK_TILE_HOST_DEVICE constexpr auto to_sequence(tuple< number< Is >... >)
Definition tile/core/container/sequence.hpp:1055
CK_TILE_DEVICE void move_tile_window(null_tile_window< WindowLengths > &, const typename null_tile_window< WindowLengths >::BottomTensorIndex &)
Definition null_tile_window.hpp:95
CK_TILE_DEVICE constexpr auto make_tile_scatter_gather(const TensorView_ &tensor_view, const WindowLengths_ &window_lengths, const multi_index< TensorView_::get_num_of_dimension()> &origin, const StaticTileDistribution_ &tile_distribution, const StaticPageIndexArray_ &page_idx, number< HsGatherDim >={}, number< NumCoord >={})
Definition tile_scatter_gather.hpp:906
typename uniform_sequence_gen< NSize, I >::type uniform_sequence_gen_t
Definition tile/core/container/sequence.hpp:1026
CK_TILE_DEVICE void store_tile(tile_window_with_static_lengths< BottomTensorView_, WindowLengths_ > &tile_window_tmp, const static_distributed_tensor< DataType_, TileDistribution_ > &dstr_tensor)
Definition store_tile.hpp:23
CK_TILE_DEVICE auto load_tile(const TileWindow_ &tile_window, number< i_access >={}, bool_constant< oob_conditional_check >={})
Definition load_tile.hpp:22
CK_TILE_HOST_DEVICE constexpr auto make_tuple(Xs &&... xs)
Definition tile/core/container/tuple.hpp:360
tuple_array< T, N > statically_indexed_array
Definition tile/core/container/statically_indexed_array.hpp:16
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:15
remove_cvref_t< typename BlockGemmShape::BlockWarps > BlockWarps
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:65
static constexpr index_t Bload_num_perK
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:105
static constexpr index_t kNPerBlock
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:40
static constexpr index_t KIterPerWarp
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:73
CK_TILE_DEVICE auto operator()(const ADramBlockWindowTmp &a_dram_block_window_tmp, const BFlatBlockWindowTmp &b_flat_dram_block_window_tmp, number< IsGateUpMode > is_gate_up_mode, index_t num_loop, void *p_smem_ping, void *p_smem_pong) const
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:994
static constexpr index_t BlockSize
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:36
static constexpr index_t Aload_rep
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:104
static constexpr auto idxN
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:62
static constexpr bool DoubleSmemBuffer
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:130
static constexpr auto config
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:28
static constexpr index_t NWarp
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:69
static constexpr bool UsePersistentKernel
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:56
static constexpr index_t DsReadPreload
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:34
remove_cvref_t< decltype(PipelinePolicy::template GetBlockFlatmm< Problem >())> BlockFlatmm
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:25
static constexpr index_t flatNPerWarp
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:44
static constexpr index_t KPerBlockPerIter
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:79
static constexpr index_t m_preload
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:84
static constexpr index_t MWarp
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:68
static constexpr index_t dswrite_rep
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:102
remove_cvref_t< typename BlockGemmShape::WarpTile > WarpTile
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:66
static constexpr index_t mfma_per_wg
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:94
static CK_TILE_HOST_DEVICE constexpr auto GetADramTileDistribution()
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:437
static constexpr index_t WaveSize
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:37
remove_cvref_t< typename Problem::BlockGemmShape > BlockGemmShape
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:19
static constexpr index_t AK1
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:82
static constexpr index_t GetVectorSizeC()
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:48
static constexpr auto idxK
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:63
static constexpr index_t mfma_perM_perK
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:114
static constexpr index_t KFlatPerBlockPerIter
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:75
static constexpr index_t kKPerBlock
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:41
static constexpr bool kPadK
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:52
static CK_TILE_HOST_DEVICE constexpr auto SchedulerPerM(index_t dsread_perM, index_t dswrite_perM, index_t load_perM)
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:140
static constexpr bool kPadM
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:50
remove_cvref_t< typename Problem::ADataType > ADataType
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:16
static constexpr index_t HalfMIter
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:111
static constexpr int MXFP4PackedSize
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:81
static constexpr index_t GetVectorSizeA()
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:46
static constexpr index_t dsread_num_perK
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:100
static constexpr index_t kLdsAlignmentInBytes
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:54
static constexpr index_t NIterPerWarp
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:72
static constexpr index_t DsWritePreIssue
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:33
static constexpr index_t flatKPerWarp
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:43
static constexpr index_t Aload_num_perK
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:103
static constexpr index_t dswrite_mIter
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:115
static CK_TILE_HOST_DEVICE constexpr auto LastHotLoopScheduler()
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:417
static constexpr index_t dswrite_num_perK
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:101
static constexpr index_t BK1
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:83
static constexpr index_t kMPerBlock
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:39
static constexpr index_t NumWaveGroups
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:55
static constexpr auto I1
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:59
static constexpr bool HasHotLoop
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:88
static constexpr auto idxM
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:61
static constexpr auto TailNum
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:89
static constexpr index_t dsread_per_wg
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:96
static constexpr index_t MIterPerWarp
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:71
remove_cvref_t< typename Problem::CLayout > CLayout
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:23
static CK_TILE_HOST_DEVICE constexpr auto TransposeC()
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:132
static constexpr bool kPadN
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:51
static CK_TILE_HOST const std::string GetName()
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:118
static constexpr auto I2
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:60
static constexpr index_t MPerBlockPerIter
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:78
static constexpr index_t NFlatPerBlockPerIter
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:76
remove_cvref_t< typename Problem::BDataType > BDataType
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:17
static constexpr index_t dswrite_kIter
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:116
static CK_TILE_HOST_DEVICE constexpr auto HotLoopScheduler()
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:222
remove_cvref_t< typename Problem::BLayout > BLayout
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:22
remove_cvref_t< typename BlockGemmShape::BlockTile > BlockTile
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:64
static constexpr index_t GetVectorSizeB()
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:47
CK_TILE_HOST_DEVICE auto operator()(const ADramBlockWindowTmp &a_dram_block_window_tmp, const AElementFunction &a_element_func, const BFlatBlockWindowTmp &b_flat_dram_block_window_tmp, number< IsGateUpMode >, index_t num_loop, void *p_smem_ping, void *p_smem_pong) const
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:446
static constexpr index_t Bload_rep
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:112
static CK_TILE_HOST_DEVICE constexpr index_t GetSmemSize()
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:134
static CK_TILE_HOST_DEVICE constexpr auto Last2ndHotLoopScheduler()
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:365
static constexpr auto I0
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:58
remove_cvref_t< typename Problem::CDataType > CDataType
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:18
remove_cvref_t< typename Problem::ALayout > ALayout
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:21
remove_cvref_t< decltype(config.template at< 0 >())> WG
Definition moe_flatmm_pipeline_agmem_bgmem_creg.hpp:31
Definition tile/core/container/sequence.hpp:49
Definition tile/core/utility/functional.hpp:43