DeviceBatchedGemmMultipleD_Dl< ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CDEElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, M1PerThread, N1PerThread, KPerThread, M1N1ThreadClusterM1Xs, M1N1ThreadClusterN1Xs, ABlockTransferThreadSliceLengths_K0_M0_M1_K1, ABlockTransferThreadClusterLengths_K0_M0_M1_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, ABlockTransferSrcVectorTensorContiguousDimOrder, ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, BBlockTransferThreadSliceLengths_K0_N0_N1_K1, BBlockTransferThreadClusterLengths_K0_N0_N1_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, BBlockTransferSrcVectorTensorContiguousDimOrder, BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, > Struct Template Reference#
Classes |
Public Types |
Public Member Functions |
Static Public Member Functions |
Static Public Attributes |
List of all members
ck::tensor_operation::device::DeviceBatchedGemmMultipleD_Dl< ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CDEElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, M1PerThread, N1PerThread, KPerThread, M1N1ThreadClusterM1Xs, M1N1ThreadClusterN1Xs, ABlockTransferThreadSliceLengths_K0_M0_M1_K1, ABlockTransferThreadClusterLengths_K0_M0_M1_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, ABlockTransferSrcVectorTensorContiguousDimOrder, ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, BBlockTransferThreadSliceLengths_K0_N0_N1_K1, BBlockTransferThreadClusterLengths_K0_N0_N1_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, BBlockTransferSrcVectorTensorContiguousDimOrder, BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, > Struct Template Reference
#include <device_batched_gemm_multiple_d_dl.hpp>
Inheritance diagram for ck::tensor_operation::device::DeviceBatchedGemmMultipleD_Dl< ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CDEElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, M1PerThread, N1PerThread, KPerThread, M1N1ThreadClusterM1Xs, M1N1ThreadClusterN1Xs, ABlockTransferThreadSliceLengths_K0_M0_M1_K1, ABlockTransferThreadClusterLengths_K0_M0_M1_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, ABlockTransferSrcVectorTensorContiguousDimOrder, ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, BBlockTransferThreadSliceLengths_K0_N0_N1_K1, BBlockTransferThreadClusterLengths_K0_N0_N1_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, BBlockTransferSrcVectorTensorContiguousDimOrder, BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, >:
Classes | |
| struct | ComputePtrOffsetOfStridedBatch |
| struct | Argument |
| struct | Invoker |
Public Types | |
| using | DeviceOp = DeviceBatchedGemmMultipleD_Dl |
| using | AGridDesc_K0_M_K1 = decltype(MakeAGridDescriptor_K0_M_K1(1, 1, 1)) |
| using | BGridDesc_K0_N_K1 = decltype(MakeBGridDescriptor_K0_N_K1(1, 1, 1)) |
| using | DsGridDesc_M_N = decltype(MakeDsGridDescriptor_M_N({}, {}, {})) |
| using | EGridDesc_M_N = decltype(MakeEGridDescriptor_M_N<ELayout>(1, 1, 1)) |
| using | GridwiseGemm |
| using | AGridDesc_K0_M0_M1_K1 |
| using | BGridDesc_K0_N0_N1_K1 |
| using | DsGridDesc_M0_M10_M11_N0_N10_N11 |
| using | EGridDesc_M0_M10_M11_N0_N10_N11 |
| using | DefaultBlock2CTileMap |
Public Member Functions | |
| bool | IsSupportedArgument (const BaseArgument *p_arg) override |
| std::unique_ptr< BaseArgument > | MakeArgumentPointer (const void *p_a, const void *p_b, const std::array< const void *, NumDTensor > &p_ds, void *p_e, index_t M, index_t N, index_t K, index_t Batch, index_t StrideA, index_t StrideB, const std::array< ck::index_t, NumDTensor > &StrideDs, index_t StrideE, index_t BatchStrideA, index_t BatchStrideB, const std::array< ck::index_t, NumDTensor > &BatchStrideDs, index_t BatchStrideE, AElementwiseOperation a_element_op, BElementwiseOperation b_element_op, CDEElementwiseOperation cde_element_op) override |
| std::unique_ptr< BaseInvoker > | MakeInvokerPointer () override |
| std::string | GetTypeString () const override |
| Public Member Functions inherited from ck::tensor_operation::device::BaseOperator | |
| BaseOperator ()=default | |
| BaseOperator (const BaseOperator &)=default | |
| BaseOperator & | operator= (const BaseOperator &)=default |
| virtual std::string | GetInstanceString () const |
| virtual std::string | GetTypeIdName () const |
| virtual std::optional< std::string > | GetObjectName () const |
| virtual std::optional< std::string > | GetTemplateInfo () const |
| virtual std::string | GetTypeIdHashCode () const |
| virtual size_t | GetWorkSpaceSize (const BaseArgument *) const |
| virtual void | SetWorkSpacePointer (BaseArgument *p_arg, void *p_workspace, const StreamConfig &=StreamConfig{}) const |
| virtual | ~BaseOperator () |
Static Public Member Functions | |
| static auto | MakeAGridDescriptor_K0_M_K1 (index_t M, index_t K, index_t StrideA) |
| static auto | MakeBGridDescriptor_K0_N_K1 (index_t K, index_t N, index_t StrideB) |
| template<typename ELay> | |
| static auto | MakeEGridDescriptor_M_N (index_t M, index_t N, index_t StrideE) |
| static auto | MakeDsGridDescriptor_M_N (const std::array< index_t, NumDTensor > &MRaws, const std::array< index_t, NumDTensor > &NRaws, const std::array< index_t, NumDTensor > &DsStride) |
| static constexpr bool | IsValidCompilationParameter () |
| static bool | IsSupportedArgument (const Argument &arg) |
| static auto | MakeArgument (const void *p_a, const void *p_b, std::array< const void *, NumDTensor > p_ds, void *p_e, index_t M, index_t N, index_t K, index_t Batch, index_t StrideA, index_t StrideB, std::array< ck::index_t, NumDTensor > StrideDs, index_t StrideE, index_t BatchStrideA, index_t BatchStrideB, const std::array< ck::index_t, NumDTensor > &BatchStrideDs, index_t BatchStrideE, AElementwiseOperation a_element_op, BElementwiseOperation b_element_op, CDEElementwiseOperation cde_element_op) |
| static auto | MakeInvoker () |
Static Public Attributes | |
| static constexpr index_t | NumDTensor = DsDataType::Size() |
| static constexpr auto | I0 = Number<0>{} |
| static constexpr auto | I1 = Number<1>{} |
| static constexpr auto | I2 = Number<2>{} |
| static constexpr auto | I3 = Number<3>{} |
| static constexpr auto | I4 = Number<4>{} |
| static constexpr auto | I5 = Number<5>{} |
| static constexpr auto | K1Number = Number<K1>{} |
| Static Public Attributes inherited from ck::tensor_operation::device::DeviceBatchedGemmMultiD< ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CDEElementwiseOperation > | |
| static constexpr index_t | NumDTensor = DsDataType::Size() |
Member Typedef Documentation
◆ AGridDesc_K0_M0_M1_K1
template<typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename DsDataType, typename EDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CDEElementwiseOperation, GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs, typename M1N1ThreadClusterN1Xs, typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, typename ABlockTransferSrcVectorTensorContiguousDimOrder, typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, typename BBlockTransferSrcVectorTensorContiguousDimOrder, typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, typename CThreadTransferSrcDstAccessOrder, index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
| using ck::tensor_operation::device::DeviceBatchedGemmMultipleD_Dl< ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CDEElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, M1PerThread, N1PerThread, KPerThread, M1N1ThreadClusterM1Xs, M1N1ThreadClusterN1Xs, ABlockTransferThreadSliceLengths_K0_M0_M1_K1, ABlockTransferThreadClusterLengths_K0_M0_M1_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, ABlockTransferSrcVectorTensorContiguousDimOrder, ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, BBlockTransferThreadSliceLengths_K0_N0_N1_K1, BBlockTransferThreadClusterLengths_K0_N0_N1_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, BBlockTransferSrcVectorTensorContiguousDimOrder, BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, >::AGridDesc_K0_M0_M1_K1 |
Initial value:
__host__ static __device__ constexpr auto MakeAGridDescriptor_K0_M0_M1_K1(const AGridDesc_K0_M_K1 &a_grid_desc_k0_m_k1)
Definition gridwise_gemm_dl_multiple_d.hpp:158
decltype(MakeAGridDescriptor_K0_M_K1(1, 1, 1)) AGridDesc_K0_M_K1
Definition device_batched_gemm_multiple_d_dl.hpp:324
◆ AGridDesc_K0_M_K1
template<typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename DsDataType, typename EDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CDEElementwiseOperation, GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs, typename M1N1ThreadClusterN1Xs, typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, typename ABlockTransferSrcVectorTensorContiguousDimOrder, typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, typename BBlockTransferSrcVectorTensorContiguousDimOrder, typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, typename CThreadTransferSrcDstAccessOrder, index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
| using ck::tensor_operation::device::DeviceBatchedGemmMultipleD_Dl< ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CDEElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, M1PerThread, N1PerThread, KPerThread, M1N1ThreadClusterM1Xs, M1N1ThreadClusterN1Xs, ABlockTransferThreadSliceLengths_K0_M0_M1_K1, ABlockTransferThreadClusterLengths_K0_M0_M1_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, ABlockTransferSrcVectorTensorContiguousDimOrder, ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, BBlockTransferThreadSliceLengths_K0_N0_N1_K1, BBlockTransferThreadClusterLengths_K0_N0_N1_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, BBlockTransferSrcVectorTensorContiguousDimOrder, BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, >::AGridDesc_K0_M_K1 = decltype(MakeAGridDescriptor_K0_M_K1(1, 1, 1)) |
◆ BGridDesc_K0_N0_N1_K1
template<typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename DsDataType, typename EDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CDEElementwiseOperation, GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs, typename M1N1ThreadClusterN1Xs, typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, typename ABlockTransferSrcVectorTensorContiguousDimOrder, typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, typename BBlockTransferSrcVectorTensorContiguousDimOrder, typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, typename CThreadTransferSrcDstAccessOrder, index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
| using ck::tensor_operation::device::DeviceBatchedGemmMultipleD_Dl< ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CDEElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, M1PerThread, N1PerThread, KPerThread, M1N1ThreadClusterM1Xs, M1N1ThreadClusterN1Xs, ABlockTransferThreadSliceLengths_K0_M0_M1_K1, ABlockTransferThreadClusterLengths_K0_M0_M1_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, ABlockTransferSrcVectorTensorContiguousDimOrder, ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, BBlockTransferThreadSliceLengths_K0_N0_N1_K1, BBlockTransferThreadClusterLengths_K0_N0_N1_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, BBlockTransferSrcVectorTensorContiguousDimOrder, BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, >::BGridDesc_K0_N0_N1_K1 |
Initial value:
__host__ static __device__ constexpr auto MakeBGridDescriptor_K0_N0_N1_K1(const BGridDesc_K0_N_K1 &b_grid_desc_k0_n_k1)
Definition gridwise_gemm_dl_multiple_d.hpp:178
decltype(MakeBGridDescriptor_K0_N_K1(1, 1, 1)) BGridDesc_K0_N_K1
Definition device_batched_gemm_multiple_d_dl.hpp:325
◆ BGridDesc_K0_N_K1
template<typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename DsDataType, typename EDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CDEElementwiseOperation, GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs, typename M1N1ThreadClusterN1Xs, typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, typename ABlockTransferSrcVectorTensorContiguousDimOrder, typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, typename BBlockTransferSrcVectorTensorContiguousDimOrder, typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, typename CThreadTransferSrcDstAccessOrder, index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
| using ck::tensor_operation::device::DeviceBatchedGemmMultipleD_Dl< ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CDEElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, M1PerThread, N1PerThread, KPerThread, M1N1ThreadClusterM1Xs, M1N1ThreadClusterN1Xs, ABlockTransferThreadSliceLengths_K0_M0_M1_K1, ABlockTransferThreadClusterLengths_K0_M0_M1_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, ABlockTransferSrcVectorTensorContiguousDimOrder, ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, BBlockTransferThreadSliceLengths_K0_N0_N1_K1, BBlockTransferThreadClusterLengths_K0_N0_N1_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, BBlockTransferSrcVectorTensorContiguousDimOrder, BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, >::BGridDesc_K0_N_K1 = decltype(MakeBGridDescriptor_K0_N_K1(1, 1, 1)) |
◆ DefaultBlock2CTileMap
template<typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename DsDataType, typename EDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CDEElementwiseOperation, GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs, typename M1N1ThreadClusterN1Xs, typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, typename ABlockTransferSrcVectorTensorContiguousDimOrder, typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, typename BBlockTransferSrcVectorTensorContiguousDimOrder, typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, typename CThreadTransferSrcDstAccessOrder, index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
| using ck::tensor_operation::device::DeviceBatchedGemmMultipleD_Dl< ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CDEElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, M1PerThread, N1PerThread, KPerThread, M1N1ThreadClusterM1Xs, M1N1ThreadClusterN1Xs, ABlockTransferThreadSliceLengths_K0_M0_M1_K1, ABlockTransferThreadClusterLengths_K0_M0_M1_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, ABlockTransferSrcVectorTensorContiguousDimOrder, ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, BBlockTransferThreadSliceLengths_K0_N0_N1_K1, BBlockTransferThreadClusterLengths_K0_N0_N1_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, BBlockTransferSrcVectorTensorContiguousDimOrder, BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, >::DefaultBlock2CTileMap |
Initial value:
__host__ static __device__ constexpr auto MakeDefaultBlock2CTileMap(const EGridDesc_M_N &c_grid_desc_m_n)
Definition gridwise_gemm_dl_multiple_d.hpp:242
decltype(MakeEGridDescriptor_M_N< ELayout >(1, 1, 1)) EGridDesc_M_N
Definition device_batched_gemm_multiple_d_dl.hpp:327
◆ DeviceOp
template<typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename DsDataType, typename EDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CDEElementwiseOperation, GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs, typename M1N1ThreadClusterN1Xs, typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, typename ABlockTransferSrcVectorTensorContiguousDimOrder, typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, typename BBlockTransferSrcVectorTensorContiguousDimOrder, typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, typename CThreadTransferSrcDstAccessOrder, index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
| using ck::tensor_operation::device::DeviceBatchedGemmMultipleD_Dl< ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CDEElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, M1PerThread, N1PerThread, KPerThread, M1N1ThreadClusterM1Xs, M1N1ThreadClusterN1Xs, ABlockTransferThreadSliceLengths_K0_M0_M1_K1, ABlockTransferThreadClusterLengths_K0_M0_M1_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, ABlockTransferSrcVectorTensorContiguousDimOrder, ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, BBlockTransferThreadSliceLengths_K0_N0_N1_K1, BBlockTransferThreadClusterLengths_K0_N0_N1_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, BBlockTransferSrcVectorTensorContiguousDimOrder, BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, >::DeviceOp = DeviceBatchedGemmMultipleD_Dl |
◆ DsGridDesc_M0_M10_M11_N0_N10_N11
template<typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename DsDataType, typename EDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CDEElementwiseOperation, GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs, typename M1N1ThreadClusterN1Xs, typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, typename ABlockTransferSrcVectorTensorContiguousDimOrder, typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, typename BBlockTransferSrcVectorTensorContiguousDimOrder, typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, typename CThreadTransferSrcDstAccessOrder, index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
| using ck::tensor_operation::device::DeviceBatchedGemmMultipleD_Dl< ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CDEElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, M1PerThread, N1PerThread, KPerThread, M1N1ThreadClusterM1Xs, M1N1ThreadClusterN1Xs, ABlockTransferThreadSliceLengths_K0_M0_M1_K1, ABlockTransferThreadClusterLengths_K0_M0_M1_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, ABlockTransferSrcVectorTensorContiguousDimOrder, ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, BBlockTransferThreadSliceLengths_K0_N0_N1_K1, BBlockTransferThreadClusterLengths_K0_N0_N1_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, BBlockTransferSrcVectorTensorContiguousDimOrder, BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, >::DsGridDesc_M0_M10_M11_N0_N10_N11 |
Initial value:
__host__ static __device__ constexpr auto MakeDsGridDescriptor_M0_M10_M11_N0_N10_N11(const DsGridDesc_M_N &ds_grid_desc_m_n)
Definition gridwise_gemm_dl_multiple_d.hpp:234
decltype(MakeDsGridDescriptor_M_N({}, {}, {})) DsGridDesc_M_N
Definition device_batched_gemm_multiple_d_dl.hpp:326
◆ DsGridDesc_M_N
template<typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename DsDataType, typename EDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CDEElementwiseOperation, GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs, typename M1N1ThreadClusterN1Xs, typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, typename ABlockTransferSrcVectorTensorContiguousDimOrder, typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, typename BBlockTransferSrcVectorTensorContiguousDimOrder, typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, typename CThreadTransferSrcDstAccessOrder, index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
| using ck::tensor_operation::device::DeviceBatchedGemmMultipleD_Dl< ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CDEElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, M1PerThread, N1PerThread, KPerThread, M1N1ThreadClusterM1Xs, M1N1ThreadClusterN1Xs, ABlockTransferThreadSliceLengths_K0_M0_M1_K1, ABlockTransferThreadClusterLengths_K0_M0_M1_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, ABlockTransferSrcVectorTensorContiguousDimOrder, ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, BBlockTransferThreadSliceLengths_K0_N0_N1_K1, BBlockTransferThreadClusterLengths_K0_N0_N1_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, BBlockTransferSrcVectorTensorContiguousDimOrder, BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, >::DsGridDesc_M_N = decltype(MakeDsGridDescriptor_M_N({}, {}, {})) |
◆ EGridDesc_M0_M10_M11_N0_N10_N11
template<typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename DsDataType, typename EDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CDEElementwiseOperation, GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs, typename M1N1ThreadClusterN1Xs, typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, typename ABlockTransferSrcVectorTensorContiguousDimOrder, typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, typename BBlockTransferSrcVectorTensorContiguousDimOrder, typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, typename CThreadTransferSrcDstAccessOrder, index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
| using ck::tensor_operation::device::DeviceBatchedGemmMultipleD_Dl< ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CDEElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, M1PerThread, N1PerThread, KPerThread, M1N1ThreadClusterM1Xs, M1N1ThreadClusterN1Xs, ABlockTransferThreadSliceLengths_K0_M0_M1_K1, ABlockTransferThreadClusterLengths_K0_M0_M1_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, ABlockTransferSrcVectorTensorContiguousDimOrder, ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, BBlockTransferThreadSliceLengths_K0_N0_N1_K1, BBlockTransferThreadClusterLengths_K0_N0_N1_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, BBlockTransferSrcVectorTensorContiguousDimOrder, BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, >::EGridDesc_M0_M10_M11_N0_N10_N11 |
Initial value:
__host__ static __device__ constexpr auto MakeCGridDescriptor_M0_M10_M11_N0_N10_N11(const CGridDesc_M_N_ &c_grid_desc_m_n)
Definition gridwise_gemm_dl_multiple_d.hpp:200
◆ EGridDesc_M_N
template<typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename DsDataType, typename EDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CDEElementwiseOperation, GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs, typename M1N1ThreadClusterN1Xs, typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, typename ABlockTransferSrcVectorTensorContiguousDimOrder, typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, typename BBlockTransferSrcVectorTensorContiguousDimOrder, typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, typename CThreadTransferSrcDstAccessOrder, index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
| using ck::tensor_operation::device::DeviceBatchedGemmMultipleD_Dl< ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CDEElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, M1PerThread, N1PerThread, KPerThread, M1N1ThreadClusterM1Xs, M1N1ThreadClusterN1Xs, ABlockTransferThreadSliceLengths_K0_M0_M1_K1, ABlockTransferThreadClusterLengths_K0_M0_M1_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, ABlockTransferSrcVectorTensorContiguousDimOrder, ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, BBlockTransferThreadSliceLengths_K0_N0_N1_K1, BBlockTransferThreadClusterLengths_K0_N0_N1_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, BBlockTransferSrcVectorTensorContiguousDimOrder, BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, >::EGridDesc_M_N = decltype(MakeEGridDescriptor_M_N<ELayout>(1, 1, 1)) |
◆ GridwiseGemm
template<typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename DsDataType, typename EDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CDEElementwiseOperation, GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs, typename M1N1ThreadClusterN1Xs, typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, typename ABlockTransferSrcVectorTensorContiguousDimOrder, typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, typename BBlockTransferSrcVectorTensorContiguousDimOrder, typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, typename CThreadTransferSrcDstAccessOrder, index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
| using ck::tensor_operation::device::DeviceBatchedGemmMultipleD_Dl< ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CDEElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, M1PerThread, N1PerThread, KPerThread, M1N1ThreadClusterM1Xs, M1N1ThreadClusterN1Xs, ABlockTransferThreadSliceLengths_K0_M0_M1_K1, ABlockTransferThreadClusterLengths_K0_M0_M1_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, ABlockTransferSrcVectorTensorContiguousDimOrder, ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, BBlockTransferThreadSliceLengths_K0_N0_N1_K1, BBlockTransferThreadClusterLengths_K0_N0_N1_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, BBlockTransferSrcVectorTensorContiguousDimOrder, BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, >::GridwiseGemm |
Member Function Documentation
◆ GetTypeString()
template<typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename DsDataType, typename EDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CDEElementwiseOperation, GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs, typename M1N1ThreadClusterN1Xs, typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, typename ABlockTransferSrcVectorTensorContiguousDimOrder, typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, typename BBlockTransferSrcVectorTensorContiguousDimOrder, typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, typename CThreadTransferSrcDstAccessOrder, index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
|
inlineoverridevirtual |
Reimplemented from ck::tensor_operation::device::BaseOperator.
◆ IsSupportedArgument() [1/2]
template<typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename DsDataType, typename EDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CDEElementwiseOperation, GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs, typename M1N1ThreadClusterN1Xs, typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, typename ABlockTransferSrcVectorTensorContiguousDimOrder, typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, typename BBlockTransferSrcVectorTensorContiguousDimOrder, typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, typename CThreadTransferSrcDstAccessOrder, index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
|
inlinestatic |
◆ IsSupportedArgument() [2/2]
template<typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename DsDataType, typename EDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CDEElementwiseOperation, GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs, typename M1N1ThreadClusterN1Xs, typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, typename ABlockTransferSrcVectorTensorContiguousDimOrder, typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, typename BBlockTransferSrcVectorTensorContiguousDimOrder, typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, typename CThreadTransferSrcDstAccessOrder, index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
|
inlineoverridevirtual |
Reimplemented from ck::tensor_operation::device::BaseOperator.
◆ IsValidCompilationParameter()
template<typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename DsDataType, typename EDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CDEElementwiseOperation, GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs, typename M1N1ThreadClusterN1Xs, typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, typename ABlockTransferSrcVectorTensorContiguousDimOrder, typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, typename BBlockTransferSrcVectorTensorContiguousDimOrder, typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, typename CThreadTransferSrcDstAccessOrder, index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
|
inlinestaticconstexpr |
◆ MakeAGridDescriptor_K0_M_K1()
template<typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename DsDataType, typename EDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CDEElementwiseOperation, GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs, typename M1N1ThreadClusterN1Xs, typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, typename ABlockTransferSrcVectorTensorContiguousDimOrder, typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, typename BBlockTransferSrcVectorTensorContiguousDimOrder, typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, typename CThreadTransferSrcDstAccessOrder, index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
|
inlinestatic |
◆ MakeArgument()
template<typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename DsDataType, typename EDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CDEElementwiseOperation, GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs, typename M1N1ThreadClusterN1Xs, typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, typename ABlockTransferSrcVectorTensorContiguousDimOrder, typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, typename BBlockTransferSrcVectorTensorContiguousDimOrder, typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, typename CThreadTransferSrcDstAccessOrder, index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
|
inlinestatic |
◆ MakeArgumentPointer()
template<typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename DsDataType, typename EDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CDEElementwiseOperation, GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs, typename M1N1ThreadClusterN1Xs, typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, typename ABlockTransferSrcVectorTensorContiguousDimOrder, typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, typename BBlockTransferSrcVectorTensorContiguousDimOrder, typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, typename CThreadTransferSrcDstAccessOrder, index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
|
inlineoverridevirtual |
◆ MakeBGridDescriptor_K0_N_K1()
template<typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename DsDataType, typename EDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CDEElementwiseOperation, GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs, typename M1N1ThreadClusterN1Xs, typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, typename ABlockTransferSrcVectorTensorContiguousDimOrder, typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, typename BBlockTransferSrcVectorTensorContiguousDimOrder, typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, typename CThreadTransferSrcDstAccessOrder, index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
|
inlinestatic |
◆ MakeDsGridDescriptor_M_N()
template<typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename DsDataType, typename EDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CDEElementwiseOperation, GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs, typename M1N1ThreadClusterN1Xs, typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, typename ABlockTransferSrcVectorTensorContiguousDimOrder, typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, typename BBlockTransferSrcVectorTensorContiguousDimOrder, typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, typename CThreadTransferSrcDstAccessOrder, index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
|
inlinestatic |
◆ MakeEGridDescriptor_M_N()
template<typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename DsDataType, typename EDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CDEElementwiseOperation, GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs, typename M1N1ThreadClusterN1Xs, typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, typename ABlockTransferSrcVectorTensorContiguousDimOrder, typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, typename BBlockTransferSrcVectorTensorContiguousDimOrder, typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, typename CThreadTransferSrcDstAccessOrder, index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
template<typename ELay>
|
inlinestatic |
◆ MakeInvoker()
template<typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename DsDataType, typename EDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CDEElementwiseOperation, GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs, typename M1N1ThreadClusterN1Xs, typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, typename ABlockTransferSrcVectorTensorContiguousDimOrder, typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, typename BBlockTransferSrcVectorTensorContiguousDimOrder, typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, typename CThreadTransferSrcDstAccessOrder, index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
|
inlinestatic |
◆ MakeInvokerPointer()
template<typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename DsDataType, typename EDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CDEElementwiseOperation, GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs, typename M1N1ThreadClusterN1Xs, typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, typename ABlockTransferSrcVectorTensorContiguousDimOrder, typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, typename BBlockTransferSrcVectorTensorContiguousDimOrder, typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, typename CThreadTransferSrcDstAccessOrder, index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
|
inlineoverridevirtual |
Member Data Documentation
◆ I0
template<typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename DsDataType, typename EDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CDEElementwiseOperation, GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs, typename M1N1ThreadClusterN1Xs, typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, typename ABlockTransferSrcVectorTensorContiguousDimOrder, typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, typename BBlockTransferSrcVectorTensorContiguousDimOrder, typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, typename CThreadTransferSrcDstAccessOrder, index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
|
staticconstexpr |
◆ I1
template<typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename DsDataType, typename EDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CDEElementwiseOperation, GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs, typename M1N1ThreadClusterN1Xs, typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, typename ABlockTransferSrcVectorTensorContiguousDimOrder, typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, typename BBlockTransferSrcVectorTensorContiguousDimOrder, typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, typename CThreadTransferSrcDstAccessOrder, index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
|
staticconstexpr |
◆ I2
template<typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename DsDataType, typename EDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CDEElementwiseOperation, GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs, typename M1N1ThreadClusterN1Xs, typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, typename ABlockTransferSrcVectorTensorContiguousDimOrder, typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, typename BBlockTransferSrcVectorTensorContiguousDimOrder, typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, typename CThreadTransferSrcDstAccessOrder, index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
|
staticconstexpr |
◆ I3
template<typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename DsDataType, typename EDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CDEElementwiseOperation, GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs, typename M1N1ThreadClusterN1Xs, typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, typename ABlockTransferSrcVectorTensorContiguousDimOrder, typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, typename BBlockTransferSrcVectorTensorContiguousDimOrder, typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, typename CThreadTransferSrcDstAccessOrder, index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
|
staticconstexpr |
◆ I4
template<typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename DsDataType, typename EDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CDEElementwiseOperation, GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs, typename M1N1ThreadClusterN1Xs, typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, typename ABlockTransferSrcVectorTensorContiguousDimOrder, typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, typename BBlockTransferSrcVectorTensorContiguousDimOrder, typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, typename CThreadTransferSrcDstAccessOrder, index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
|
staticconstexpr |
◆ I5
template<typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename DsDataType, typename EDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CDEElementwiseOperation, GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs, typename M1N1ThreadClusterN1Xs, typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, typename ABlockTransferSrcVectorTensorContiguousDimOrder, typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, typename BBlockTransferSrcVectorTensorContiguousDimOrder, typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, typename CThreadTransferSrcDstAccessOrder, index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
|
staticconstexpr |
◆ K1Number
template<typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename DsDataType, typename EDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CDEElementwiseOperation, GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs, typename M1N1ThreadClusterN1Xs, typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, typename ABlockTransferSrcVectorTensorContiguousDimOrder, typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, typename BBlockTransferSrcVectorTensorContiguousDimOrder, typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, typename CThreadTransferSrcDstAccessOrder, index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
|
staticconstexpr |
◆ NumDTensor
template<typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename DsDataType, typename EDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CDEElementwiseOperation, GemmSpecialization GemmSpec, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t M1PerThread, index_t N1PerThread, index_t KPerThread, typename M1N1ThreadClusterM1Xs, typename M1N1ThreadClusterN1Xs, typename ABlockTransferThreadSliceLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterLengths_K0_M0_M1_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, typename ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1, typename ABlockTransferSrcVectorTensorContiguousDimOrder, typename ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1, typename BBlockTransferThreadSliceLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterLengths_K0_N0_N1_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, typename BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1, typename BBlockTransferSrcVectorTensorContiguousDimOrder, typename BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1, typename CThreadTransferSrcDstAccessOrder, index_t CThreadTransferSrcDstVectorDim, index_t CThreadTransferDstScalarPerVector, enable_if_t< is_same_v< AElementwiseOperation, ck::tensor_operation::element_wise::PassThrough > &&is_same_v< BElementwiseOperation, ck::tensor_operation::element_wise::PassThrough >, bool > = false>
|
staticconstexpr |
The documentation for this struct was generated from the following file: