GridwiseWelfordSecondHalfLayernorm2d< EMeanVarDataType, HDataType, GammaDataType, BetaDataType, ComputeDataType, EHGridDesc_M_N, MeanVarGridDesc_M_NBlock, CountGridDesc_M_NBlock, GammaBetaGridDesc_N, HElementwiseOperation, BlockSize, MThreadClusterSize, NThreadClusterSize, MThreadSliceSize, NThreadSliceSize, ESrcVectorSize, HDstVectorSize, GammaSrcVectorSize, BetaSrcVectorSize > Struct Template Reference

GridwiseWelfordSecondHalfLayernorm2d&lt; EMeanVarDataType, HDataType, GammaDataType, BetaDataType, ComputeDataType, EHGridDesc_M_N, MeanVarGridDesc_M_NBlock, CountGridDesc_M_NBlock, GammaBetaGridDesc_N, HElementwiseOperation, BlockSize, MThreadClusterSize, NThreadClusterSize, MThreadSliceSize, NThreadSliceSize, ESrcVectorSize, HDstVectorSize, GammaSrcVectorSize, BetaSrcVectorSize &gt; Struct Template Reference#

Composable Kernel: ck::GridwiseWelfordSecondHalfLayernorm2d< EMeanVarDataType, HDataType, GammaDataType, BetaDataType, ComputeDataType, EHGridDesc_M_N, MeanVarGridDesc_M_NBlock, CountGridDesc_M_NBlock, GammaBetaGridDesc_N, HElementwiseOperation, BlockSize, MThreadClusterSize, NThreadClusterSize, MThreadSliceSize, NThreadSliceSize, ESrcVectorSize, HDstVectorSize, GammaSrcVectorSize, BetaSrcVectorSize > Struct Template Reference
ck::GridwiseWelfordSecondHalfLayernorm2d< EMeanVarDataType, HDataType, GammaDataType, BetaDataType, ComputeDataType, EHGridDesc_M_N, MeanVarGridDesc_M_NBlock, CountGridDesc_M_NBlock, GammaBetaGridDesc_N, HElementwiseOperation, BlockSize, MThreadClusterSize, NThreadClusterSize, MThreadSliceSize, NThreadSliceSize, ESrcVectorSize, HDstVectorSize, GammaSrcVectorSize, BetaSrcVectorSize > Struct Template Reference

#include <gridwise_welford_second_half_layernorm2d.hpp>

Public Types

using ThreadClusterLengths_M_N = Sequence<MThreadClusterSize, NThreadClusterSize>
using ThreadBufferDimAccessOrder = Sequence<0, 1>
using ThreadClusterArrangeOrder = Sequence<0, 1>
using ThreadBufferLengths_M_N = Sequence<MThreadSliceSize, NThreadSliceSize>
using ThreadBufferLengths_M_1 = Sequence<MThreadSliceSize, 1>
using ThreadBufferLengths_N = Sequence<NThreadSliceSize>
using ThreadWelfordSrcDesc_M_1 = decltype(thread_buffer_desc_m_1)
using ThreadWelfordDstDesc_M
using ThreadwiseWelford
using BlockwiseWelford

Static Public Member Functions

static __device__ void Run (const EMeanVarDataType *__restrict__ p_e_grid, const EMeanVarDataType *__restrict__ p_in_welford_mean_grid, const EMeanVarDataType *__restrict__ p_in_welford_var_grid, const int32_t *__restrict__ p_in_welford_count_grid, const GammaDataType *__restrict__ p_gamma_grid, const BetaDataType *__restrict__ p_beta_grid, HDataType *__restrict__ p_h_grid, const EHGridDesc_M_N &e_grid_desc_m_n, const EHGridDesc_M_N &h_grid_desc_m_n, const MeanVarGridDesc_M_NBlock &mean_var_grid_desc_m_nblock, const CountGridDesc_M_NBlock &count_grid_desc_m_nblock, const GammaBetaGridDesc_N &gamma_grid_desc_n, const GammaBetaGridDesc_N &beta_grid_desc_n, index_t numMeanVarCountBlockTileIteration_N, index_t NBlockClusterLength, ComputeDataType epsilon, HElementwiseOperation h_element_op)

Static Public Attributes

static constexpr auto thread_cluster_desc_m_n
static constexpr auto thread_buffer_desc_m_n
static constexpr auto thread_buffer_desc_m_1
static constexpr auto thread_buffer_desc_n
static constexpr auto I0 = Number<0>{}
static constexpr auto I1 = Number<1>{}
static constexpr index_t M_BlockTileSize = MThreadClusterSize * MThreadSliceSize
static constexpr index_t N_BlockTileSize = NThreadClusterSize * NThreadSliceSize

Member Typedef Documentation

◆ BlockwiseWelford

template<typename EMeanVarDataType, typename HDataType, typename GammaDataType, typename BetaDataType, typename ComputeDataType, typename EHGridDesc_M_N, typename MeanVarGridDesc_M_NBlock, typename CountGridDesc_M_NBlock, typename GammaBetaGridDesc_N, typename HElementwiseOperation, index_t BlockSize, index_t MThreadClusterSize, index_t NThreadClusterSize, index_t MThreadSliceSize, index_t NThreadSliceSize, index_t ESrcVectorSize, index_t HDstVectorSize, index_t GammaSrcVectorSize, index_t BetaSrcVectorSize>
using ck::GridwiseWelfordSecondHalfLayernorm2d< EMeanVarDataType, HDataType, GammaDataType, BetaDataType, ComputeDataType, EHGridDesc_M_N, MeanVarGridDesc_M_NBlock, CountGridDesc_M_NBlock, GammaBetaGridDesc_N, HElementwiseOperation, BlockSize, MThreadClusterSize, NThreadClusterSize, MThreadSliceSize, NThreadSliceSize, ESrcVectorSize, HDstVectorSize, GammaSrcVectorSize, BetaSrcVectorSize >::BlockwiseWelford
Initial value:
BlockwiseWelford<ComputeDataType,
BlockSize,
BlockwiseWelford< AccDataType, BlockSize, ThreadClusterLengths_M_K, ThreadClusterArrangeOrder, false > BlockwiseWelford
Definition gridwise_multiblock_welford_first_half.hpp:82
typename conditional< reorder_thread_cluster, Sequence< 1, 0 >, Sequence< 0, 1 > >::type ThreadClusterArrangeOrder
Definition gridwise_multiblock_welford_first_half.hpp:68
Sequence< MThreadClusterSize, NThreadClusterSize > ThreadClusterLengths_M_N
Definition gridwise_welford_second_half_layernorm2d.hpp:51

◆ ThreadBufferDimAccessOrder

template<typename EMeanVarDataType, typename HDataType, typename GammaDataType, typename BetaDataType, typename ComputeDataType, typename EHGridDesc_M_N, typename MeanVarGridDesc_M_NBlock, typename CountGridDesc_M_NBlock, typename GammaBetaGridDesc_N, typename HElementwiseOperation, index_t BlockSize, index_t MThreadClusterSize, index_t NThreadClusterSize, index_t MThreadSliceSize, index_t NThreadSliceSize, index_t ESrcVectorSize, index_t HDstVectorSize, index_t GammaSrcVectorSize, index_t BetaSrcVectorSize>
using ck::GridwiseWelfordSecondHalfLayernorm2d< EMeanVarDataType, HDataType, GammaDataType, BetaDataType, ComputeDataType, EHGridDesc_M_N, MeanVarGridDesc_M_NBlock, CountGridDesc_M_NBlock, GammaBetaGridDesc_N, HElementwiseOperation, BlockSize, MThreadClusterSize, NThreadClusterSize, MThreadSliceSize, NThreadSliceSize, ESrcVectorSize, HDstVectorSize, GammaSrcVectorSize, BetaSrcVectorSize >::ThreadBufferDimAccessOrder = Sequence<0, 1>

◆ ThreadBufferLengths_M_1

template<typename EMeanVarDataType, typename HDataType, typename GammaDataType, typename BetaDataType, typename ComputeDataType, typename EHGridDesc_M_N, typename MeanVarGridDesc_M_NBlock, typename CountGridDesc_M_NBlock, typename GammaBetaGridDesc_N, typename HElementwiseOperation, index_t BlockSize, index_t MThreadClusterSize, index_t NThreadClusterSize, index_t MThreadSliceSize, index_t NThreadSliceSize, index_t ESrcVectorSize, index_t HDstVectorSize, index_t GammaSrcVectorSize, index_t BetaSrcVectorSize>
using ck::GridwiseWelfordSecondHalfLayernorm2d< EMeanVarDataType, HDataType, GammaDataType, BetaDataType, ComputeDataType, EHGridDesc_M_N, MeanVarGridDesc_M_NBlock, CountGridDesc_M_NBlock, GammaBetaGridDesc_N, HElementwiseOperation, BlockSize, MThreadClusterSize, NThreadClusterSize, MThreadSliceSize, NThreadSliceSize, ESrcVectorSize, HDstVectorSize, GammaSrcVectorSize, BetaSrcVectorSize >::ThreadBufferLengths_M_1 = Sequence<MThreadSliceSize, 1>

◆ ThreadBufferLengths_M_N

template<typename EMeanVarDataType, typename HDataType, typename GammaDataType, typename BetaDataType, typename ComputeDataType, typename EHGridDesc_M_N, typename MeanVarGridDesc_M_NBlock, typename CountGridDesc_M_NBlock, typename GammaBetaGridDesc_N, typename HElementwiseOperation, index_t BlockSize, index_t MThreadClusterSize, index_t NThreadClusterSize, index_t MThreadSliceSize, index_t NThreadSliceSize, index_t ESrcVectorSize, index_t HDstVectorSize, index_t GammaSrcVectorSize, index_t BetaSrcVectorSize>
using ck::GridwiseWelfordSecondHalfLayernorm2d< EMeanVarDataType, HDataType, GammaDataType, BetaDataType, ComputeDataType, EHGridDesc_M_N, MeanVarGridDesc_M_NBlock, CountGridDesc_M_NBlock, GammaBetaGridDesc_N, HElementwiseOperation, BlockSize, MThreadClusterSize, NThreadClusterSize, MThreadSliceSize, NThreadSliceSize, ESrcVectorSize, HDstVectorSize, GammaSrcVectorSize, BetaSrcVectorSize >::ThreadBufferLengths_M_N = Sequence<MThreadSliceSize, NThreadSliceSize>

◆ ThreadBufferLengths_N

template<typename EMeanVarDataType, typename HDataType, typename GammaDataType, typename BetaDataType, typename ComputeDataType, typename EHGridDesc_M_N, typename MeanVarGridDesc_M_NBlock, typename CountGridDesc_M_NBlock, typename GammaBetaGridDesc_N, typename HElementwiseOperation, index_t BlockSize, index_t MThreadClusterSize, index_t NThreadClusterSize, index_t MThreadSliceSize, index_t NThreadSliceSize, index_t ESrcVectorSize, index_t HDstVectorSize, index_t GammaSrcVectorSize, index_t BetaSrcVectorSize>
using ck::GridwiseWelfordSecondHalfLayernorm2d< EMeanVarDataType, HDataType, GammaDataType, BetaDataType, ComputeDataType, EHGridDesc_M_N, MeanVarGridDesc_M_NBlock, CountGridDesc_M_NBlock, GammaBetaGridDesc_N, HElementwiseOperation, BlockSize, MThreadClusterSize, NThreadClusterSize, MThreadSliceSize, NThreadSliceSize, ESrcVectorSize, HDstVectorSize, GammaSrcVectorSize, BetaSrcVectorSize >::ThreadBufferLengths_N = Sequence<NThreadSliceSize>

◆ ThreadClusterArrangeOrder

template<typename EMeanVarDataType, typename HDataType, typename GammaDataType, typename BetaDataType, typename ComputeDataType, typename EHGridDesc_M_N, typename MeanVarGridDesc_M_NBlock, typename CountGridDesc_M_NBlock, typename GammaBetaGridDesc_N, typename HElementwiseOperation, index_t BlockSize, index_t MThreadClusterSize, index_t NThreadClusterSize, index_t MThreadSliceSize, index_t NThreadSliceSize, index_t ESrcVectorSize, index_t HDstVectorSize, index_t GammaSrcVectorSize, index_t BetaSrcVectorSize>
using ck::GridwiseWelfordSecondHalfLayernorm2d< EMeanVarDataType, HDataType, GammaDataType, BetaDataType, ComputeDataType, EHGridDesc_M_N, MeanVarGridDesc_M_NBlock, CountGridDesc_M_NBlock, GammaBetaGridDesc_N, HElementwiseOperation, BlockSize, MThreadClusterSize, NThreadClusterSize, MThreadSliceSize, NThreadSliceSize, ESrcVectorSize, HDstVectorSize, GammaSrcVectorSize, BetaSrcVectorSize >::ThreadClusterArrangeOrder = Sequence<0, 1>

◆ ThreadClusterLengths_M_N

template<typename EMeanVarDataType, typename HDataType, typename GammaDataType, typename BetaDataType, typename ComputeDataType, typename EHGridDesc_M_N, typename MeanVarGridDesc_M_NBlock, typename CountGridDesc_M_NBlock, typename GammaBetaGridDesc_N, typename HElementwiseOperation, index_t BlockSize, index_t MThreadClusterSize, index_t NThreadClusterSize, index_t MThreadSliceSize, index_t NThreadSliceSize, index_t ESrcVectorSize, index_t HDstVectorSize, index_t GammaSrcVectorSize, index_t BetaSrcVectorSize>
using ck::GridwiseWelfordSecondHalfLayernorm2d< EMeanVarDataType, HDataType, GammaDataType, BetaDataType, ComputeDataType, EHGridDesc_M_N, MeanVarGridDesc_M_NBlock, CountGridDesc_M_NBlock, GammaBetaGridDesc_N, HElementwiseOperation, BlockSize, MThreadClusterSize, NThreadClusterSize, MThreadSliceSize, NThreadSliceSize, ESrcVectorSize, HDstVectorSize, GammaSrcVectorSize, BetaSrcVectorSize >::ThreadClusterLengths_M_N = Sequence<MThreadClusterSize, NThreadClusterSize>

◆ ThreadWelfordDstDesc_M

template<typename EMeanVarDataType, typename HDataType, typename GammaDataType, typename BetaDataType, typename ComputeDataType, typename EHGridDesc_M_N, typename MeanVarGridDesc_M_NBlock, typename CountGridDesc_M_NBlock, typename GammaBetaGridDesc_N, typename HElementwiseOperation, index_t BlockSize, index_t MThreadClusterSize, index_t NThreadClusterSize, index_t MThreadSliceSize, index_t NThreadSliceSize, index_t ESrcVectorSize, index_t HDstVectorSize, index_t GammaSrcVectorSize, index_t BetaSrcVectorSize>
using ck::GridwiseWelfordSecondHalfLayernorm2d< EMeanVarDataType, HDataType, GammaDataType, BetaDataType, ComputeDataType, EHGridDesc_M_N, MeanVarGridDesc_M_NBlock, CountGridDesc_M_NBlock, GammaBetaGridDesc_N, HElementwiseOperation, BlockSize, MThreadClusterSize, NThreadClusterSize, MThreadSliceSize, NThreadSliceSize, ESrcVectorSize, HDstVectorSize, GammaSrcVectorSize, BetaSrcVectorSize >::ThreadWelfordDstDesc_M
Initial value:
integral_constant< index_t, N > Number
Definition number.hpp:12
__host__ __device__ constexpr auto make_naive_tensor_descriptor_packed(const Tuple< Lengths... > &lengths)
Definition tensor_descriptor_helper.hpp:101
__host__ __device__ constexpr auto make_tuple(Xs &&... xs)
Definition utility/tuple.hpp:211

◆ ThreadWelfordSrcDesc_M_1

template<typename EMeanVarDataType, typename HDataType, typename GammaDataType, typename BetaDataType, typename ComputeDataType, typename EHGridDesc_M_N, typename MeanVarGridDesc_M_NBlock, typename CountGridDesc_M_NBlock, typename GammaBetaGridDesc_N, typename HElementwiseOperation, index_t BlockSize, index_t MThreadClusterSize, index_t NThreadClusterSize, index_t MThreadSliceSize, index_t NThreadSliceSize, index_t ESrcVectorSize, index_t HDstVectorSize, index_t GammaSrcVectorSize, index_t BetaSrcVectorSize>
using ck::GridwiseWelfordSecondHalfLayernorm2d< EMeanVarDataType, HDataType, GammaDataType, BetaDataType, ComputeDataType, EHGridDesc_M_N, MeanVarGridDesc_M_NBlock, CountGridDesc_M_NBlock, GammaBetaGridDesc_N, HElementwiseOperation, BlockSize, MThreadClusterSize, NThreadClusterSize, MThreadSliceSize, NThreadSliceSize, ESrcVectorSize, HDstVectorSize, GammaSrcVectorSize, BetaSrcVectorSize >::ThreadWelfordSrcDesc_M_1 = decltype(thread_buffer_desc_m_1)

◆ ThreadwiseWelford

template<typename EMeanVarDataType, typename HDataType, typename GammaDataType, typename BetaDataType, typename ComputeDataType, typename EHGridDesc_M_N, typename MeanVarGridDesc_M_NBlock, typename CountGridDesc_M_NBlock, typename GammaBetaGridDesc_N, typename HElementwiseOperation, index_t BlockSize, index_t MThreadClusterSize, index_t NThreadClusterSize, index_t MThreadSliceSize, index_t NThreadSliceSize, index_t ESrcVectorSize, index_t HDstVectorSize, index_t GammaSrcVectorSize, index_t BetaSrcVectorSize>
using ck::GridwiseWelfordSecondHalfLayernorm2d< EMeanVarDataType, HDataType, GammaDataType, BetaDataType, ComputeDataType, EHGridDesc_M_N, MeanVarGridDesc_M_NBlock, CountGridDesc_M_NBlock, GammaBetaGridDesc_N, HElementwiseOperation, BlockSize, MThreadClusterSize, NThreadClusterSize, MThreadSliceSize, NThreadSliceSize, ESrcVectorSize, HDstVectorSize, GammaSrcVectorSize, BetaSrcVectorSize >::ThreadwiseWelford

Member Function Documentation

◆ Run()

template<typename EMeanVarDataType, typename HDataType, typename GammaDataType, typename BetaDataType, typename ComputeDataType, typename EHGridDesc_M_N, typename MeanVarGridDesc_M_NBlock, typename CountGridDesc_M_NBlock, typename GammaBetaGridDesc_N, typename HElementwiseOperation, index_t BlockSize, index_t MThreadClusterSize, index_t NThreadClusterSize, index_t MThreadSliceSize, index_t NThreadSliceSize, index_t ESrcVectorSize, index_t HDstVectorSize, index_t GammaSrcVectorSize, index_t BetaSrcVectorSize>
__device__ void ck::GridwiseWelfordSecondHalfLayernorm2d< EMeanVarDataType, HDataType, GammaDataType, BetaDataType, ComputeDataType, EHGridDesc_M_N, MeanVarGridDesc_M_NBlock, CountGridDesc_M_NBlock, GammaBetaGridDesc_N, HElementwiseOperation, BlockSize, MThreadClusterSize, NThreadClusterSize, MThreadSliceSize, NThreadSliceSize, ESrcVectorSize, HDstVectorSize, GammaSrcVectorSize, BetaSrcVectorSize >::Run ( const EMeanVarDataType *__restrict__ p_e_grid,
const EMeanVarDataType *__restrict__ p_in_welford_mean_grid,
const EMeanVarDataType *__restrict__ p_in_welford_var_grid,
const int32_t *__restrict__ p_in_welford_count_grid,
const GammaDataType *__restrict__ p_gamma_grid,
const BetaDataType *__restrict__ p_beta_grid,
HDataType *__restrict__ p_h_grid,
const EHGridDesc_M_N & e_grid_desc_m_n,
const EHGridDesc_M_N & h_grid_desc_m_n,
const MeanVarGridDesc_M_NBlock & mean_var_grid_desc_m_nblock,
const CountGridDesc_M_NBlock & count_grid_desc_m_nblock,
const GammaBetaGridDesc_N & gamma_grid_desc_n,
const GammaBetaGridDesc_N & beta_grid_desc_n,
index_t numMeanVarCountBlockTileIteration_N,
index_t NBlockClusterLength,
ComputeDataType epsilon,
HElementwiseOperation h_element_op )
inlinestatic

Member Data Documentation

◆ I0

template<typename EMeanVarDataType, typename HDataType, typename GammaDataType, typename BetaDataType, typename ComputeDataType, typename EHGridDesc_M_N, typename MeanVarGridDesc_M_NBlock, typename CountGridDesc_M_NBlock, typename GammaBetaGridDesc_N, typename HElementwiseOperation, index_t BlockSize, index_t MThreadClusterSize, index_t NThreadClusterSize, index_t MThreadSliceSize, index_t NThreadSliceSize, index_t ESrcVectorSize, index_t HDstVectorSize, index_t GammaSrcVectorSize, index_t BetaSrcVectorSize>
auto ck::GridwiseWelfordSecondHalfLayernorm2d< EMeanVarDataType, HDataType, GammaDataType, BetaDataType, ComputeDataType, EHGridDesc_M_N, MeanVarGridDesc_M_NBlock, CountGridDesc_M_NBlock, GammaBetaGridDesc_N, HElementwiseOperation, BlockSize, MThreadClusterSize, NThreadClusterSize, MThreadSliceSize, NThreadSliceSize, ESrcVectorSize, HDstVectorSize, GammaSrcVectorSize, BetaSrcVectorSize >::I0 = Number<0>{}
staticconstexpr

◆ I1

template<typename EMeanVarDataType, typename HDataType, typename GammaDataType, typename BetaDataType, typename ComputeDataType, typename EHGridDesc_M_N, typename MeanVarGridDesc_M_NBlock, typename CountGridDesc_M_NBlock, typename GammaBetaGridDesc_N, typename HElementwiseOperation, index_t BlockSize, index_t MThreadClusterSize, index_t NThreadClusterSize, index_t MThreadSliceSize, index_t NThreadSliceSize, index_t ESrcVectorSize, index_t HDstVectorSize, index_t GammaSrcVectorSize, index_t BetaSrcVectorSize>
auto ck::GridwiseWelfordSecondHalfLayernorm2d< EMeanVarDataType, HDataType, GammaDataType, BetaDataType, ComputeDataType, EHGridDesc_M_N, MeanVarGridDesc_M_NBlock, CountGridDesc_M_NBlock, GammaBetaGridDesc_N, HElementwiseOperation, BlockSize, MThreadClusterSize, NThreadClusterSize, MThreadSliceSize, NThreadSliceSize, ESrcVectorSize, HDstVectorSize, GammaSrcVectorSize, BetaSrcVectorSize >::I1 = Number<1>{}
staticconstexpr

◆ M_BlockTileSize

template<typename EMeanVarDataType, typename HDataType, typename GammaDataType, typename BetaDataType, typename ComputeDataType, typename EHGridDesc_M_N, typename MeanVarGridDesc_M_NBlock, typename CountGridDesc_M_NBlock, typename GammaBetaGridDesc_N, typename HElementwiseOperation, index_t BlockSize, index_t MThreadClusterSize, index_t NThreadClusterSize, index_t MThreadSliceSize, index_t NThreadSliceSize, index_t ESrcVectorSize, index_t HDstVectorSize, index_t GammaSrcVectorSize, index_t BetaSrcVectorSize>
index_t ck::GridwiseWelfordSecondHalfLayernorm2d< EMeanVarDataType, HDataType, GammaDataType, BetaDataType, ComputeDataType, EHGridDesc_M_N, MeanVarGridDesc_M_NBlock, CountGridDesc_M_NBlock, GammaBetaGridDesc_N, HElementwiseOperation, BlockSize, MThreadClusterSize, NThreadClusterSize, MThreadSliceSize, NThreadSliceSize, ESrcVectorSize, HDstVectorSize, GammaSrcVectorSize, BetaSrcVectorSize >::M_BlockTileSize = MThreadClusterSize * MThreadSliceSize
staticconstexpr

◆ N_BlockTileSize

template<typename EMeanVarDataType, typename HDataType, typename GammaDataType, typename BetaDataType, typename ComputeDataType, typename EHGridDesc_M_N, typename MeanVarGridDesc_M_NBlock, typename CountGridDesc_M_NBlock, typename GammaBetaGridDesc_N, typename HElementwiseOperation, index_t BlockSize, index_t MThreadClusterSize, index_t NThreadClusterSize, index_t MThreadSliceSize, index_t NThreadSliceSize, index_t ESrcVectorSize, index_t HDstVectorSize, index_t GammaSrcVectorSize, index_t BetaSrcVectorSize>
index_t ck::GridwiseWelfordSecondHalfLayernorm2d< EMeanVarDataType, HDataType, GammaDataType, BetaDataType, ComputeDataType, EHGridDesc_M_N, MeanVarGridDesc_M_NBlock, CountGridDesc_M_NBlock, GammaBetaGridDesc_N, HElementwiseOperation, BlockSize, MThreadClusterSize, NThreadClusterSize, MThreadSliceSize, NThreadSliceSize, ESrcVectorSize, HDstVectorSize, GammaSrcVectorSize, BetaSrcVectorSize >::N_BlockTileSize = NThreadClusterSize * NThreadSliceSize
staticconstexpr

◆ thread_buffer_desc_m_1

template<typename EMeanVarDataType, typename HDataType, typename GammaDataType, typename BetaDataType, typename ComputeDataType, typename EHGridDesc_M_N, typename MeanVarGridDesc_M_NBlock, typename CountGridDesc_M_NBlock, typename GammaBetaGridDesc_N, typename HElementwiseOperation, index_t BlockSize, index_t MThreadClusterSize, index_t NThreadClusterSize, index_t MThreadSliceSize, index_t NThreadSliceSize, index_t ESrcVectorSize, index_t HDstVectorSize, index_t GammaSrcVectorSize, index_t BetaSrcVectorSize>
auto ck::GridwiseWelfordSecondHalfLayernorm2d< EMeanVarDataType, HDataType, GammaDataType, BetaDataType, ComputeDataType, EHGridDesc_M_N, MeanVarGridDesc_M_NBlock, CountGridDesc_M_NBlock, GammaBetaGridDesc_N, HElementwiseOperation, BlockSize, MThreadClusterSize, NThreadClusterSize, MThreadSliceSize, NThreadSliceSize, ESrcVectorSize, HDstVectorSize, GammaSrcVectorSize, BetaSrcVectorSize >::thread_buffer_desc_m_1
staticconstexpr

◆ thread_buffer_desc_m_n

template<typename EMeanVarDataType, typename HDataType, typename GammaDataType, typename BetaDataType, typename ComputeDataType, typename EHGridDesc_M_N, typename MeanVarGridDesc_M_NBlock, typename CountGridDesc_M_NBlock, typename GammaBetaGridDesc_N, typename HElementwiseOperation, index_t BlockSize, index_t MThreadClusterSize, index_t NThreadClusterSize, index_t MThreadSliceSize, index_t NThreadSliceSize, index_t ESrcVectorSize, index_t HDstVectorSize, index_t GammaSrcVectorSize, index_t BetaSrcVectorSize>
auto ck::GridwiseWelfordSecondHalfLayernorm2d< EMeanVarDataType, HDataType, GammaDataType, BetaDataType, ComputeDataType, EHGridDesc_M_N, MeanVarGridDesc_M_NBlock, CountGridDesc_M_NBlock, GammaBetaGridDesc_N, HElementwiseOperation, BlockSize, MThreadClusterSize, NThreadClusterSize, MThreadSliceSize, NThreadSliceSize, ESrcVectorSize, HDstVectorSize, GammaSrcVectorSize, BetaSrcVectorSize >::thread_buffer_desc_m_n
staticconstexpr

◆ thread_buffer_desc_n

template<typename EMeanVarDataType, typename HDataType, typename GammaDataType, typename BetaDataType, typename ComputeDataType, typename EHGridDesc_M_N, typename MeanVarGridDesc_M_NBlock, typename CountGridDesc_M_NBlock, typename GammaBetaGridDesc_N, typename HElementwiseOperation, index_t BlockSize, index_t MThreadClusterSize, index_t NThreadClusterSize, index_t MThreadSliceSize, index_t NThreadSliceSize, index_t ESrcVectorSize, index_t HDstVectorSize, index_t GammaSrcVectorSize, index_t BetaSrcVectorSize>
auto ck::GridwiseWelfordSecondHalfLayernorm2d< EMeanVarDataType, HDataType, GammaDataType, BetaDataType, ComputeDataType, EHGridDesc_M_N, MeanVarGridDesc_M_NBlock, CountGridDesc_M_NBlock, GammaBetaGridDesc_N, HElementwiseOperation, BlockSize, MThreadClusterSize, NThreadClusterSize, MThreadSliceSize, NThreadSliceSize, ESrcVectorSize, HDstVectorSize, GammaSrcVectorSize, BetaSrcVectorSize >::thread_buffer_desc_n
staticconstexpr

◆ thread_cluster_desc_m_n

template<typename EMeanVarDataType, typename HDataType, typename GammaDataType, typename BetaDataType, typename ComputeDataType, typename EHGridDesc_M_N, typename MeanVarGridDesc_M_NBlock, typename CountGridDesc_M_NBlock, typename GammaBetaGridDesc_N, typename HElementwiseOperation, index_t BlockSize, index_t MThreadClusterSize, index_t NThreadClusterSize, index_t MThreadSliceSize, index_t NThreadSliceSize, index_t ESrcVectorSize, index_t HDstVectorSize, index_t GammaSrcVectorSize, index_t BetaSrcVectorSize>
auto ck::GridwiseWelfordSecondHalfLayernorm2d< EMeanVarDataType, HDataType, GammaDataType, BetaDataType, ComputeDataType, EHGridDesc_M_N, MeanVarGridDesc_M_NBlock, CountGridDesc_M_NBlock, GammaBetaGridDesc_N, HElementwiseOperation, BlockSize, MThreadClusterSize, NThreadClusterSize, MThreadSliceSize, NThreadSliceSize, ESrcVectorSize, HDstVectorSize, GammaSrcVectorSize, BetaSrcVectorSize >::thread_cluster_desc_m_n
staticconstexpr
Initial value:
=
__host__ __device__ constexpr auto make_cluster_descriptor(const Lengths &lengths, ArrangeOrder order=typename arithmetic_sequence_gen< 0, Lengths::Size(), 1 >::type{})
Definition tensor_description/cluster_descriptor.hpp:13
Sequence< 0, 1 > ThreadClusterArrangeOrder
Definition gridwise_welford_second_half_layernorm2d.hpp:53

The documentation for this struct was generated from the following file: