device_grouped_conv_bwd_weight_xdl_cshuffle_v3.hpp File Reference#
device_grouped_conv_bwd_weight_xdl_cshuffle_v3.hpp File Reference
#include <iostream>#include <numeric>#include <sstream>#include "ck/utility/common_header.hpp"#include "ck/tensor_description/tensor_descriptor.hpp"#include "ck/tensor_description/tensor_descriptor_helper.hpp"#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"#include "ck/tensor_operation/gpu/device/device_grouped_conv_bwd_weight.hpp"#include "ck/tensor_operation/operator_transform/transform_conv_bwd_weight_to_gemm.hpp"#include "ck/tensor_operation/operator_transform/transform_conv_bwd_weight_to_gemm_v2.hpp"#include "ck/tensor_operation/gpu/device/convolution_backward_weight_specialization.hpp"#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"#include "ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_conv_v3.hpp"#include <ck/tensor_operation/gpu/grid/block_to_ctile_map.hpp>#include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_utils.hpp"#include "ck/tensor_operation/gpu/device/impl/split_k_utils.hpp"#include "ck/tensor_operation/gpu/device/impl/split_k_arg.hpp"#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"#include "ck/tensor_operation/gpu/device/matrix_padder.hpp"#include "ck/host_utility/device_prop.hpp"#include "ck/host_utility/kernel_launch.hpp"#include "ck/host_utility/flush_cache.hpp"Go to the source code of this file.
Namespaces | |
| namespace | ck |
| namespace | ck::tensor_operation |
| namespace | ck::tensor_operation::device |
Functions | |
| template<typename GridwiseGemm, typename AGridDesc_AK0_M_K1, typename BGridDesc_BK0_N_K1, typename CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock, typename ComputePtrOffsetOfBatch, bool HasMainKBlockLoop, InMemoryDataOperationEnum CGlobalMemoryDataOperation, index_t MinimumOccupancy = 1, TailNumber TailNum = TailNumber::Full> | |
| __global__ void | ck::tensor_operation::device::kernel_grouped_conv_bwd_weight_xdl_cshuffle_v3 (typename GridwiseGemm::Argument karg, const AGridDesc_AK0_M_K1 a_grid_desc_ak0_m_ak1, const BGridDesc_BK0_N_K1 b_grid_desc_bk0_n_bk1, const CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock c_grid_desc_mblock_mperblock_nblock_nperblock, const ComputePtrOffsetOfBatch compute_ptr_offset_of_batch, const index_t num_k_per_block) |
| template<typename GridwiseGemm, typename AGridDesc_AK0_M_K1, typename BGridDesc_BK0_N_K1, typename CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock, typename ComputePtrOffsetOfBatch, bool HasMainKBlockLoop, InMemoryDataOperationEnum CGlobalMemoryDataOperation, index_t MinimumOccupancy = 1, TailNumber TailNum = TailNumber::Full> | |
| __global__ void | ck::tensor_operation::device::kernel_grouped_conv_bwd_weight_xdl_cshuffle_v3_2lds (typename GridwiseGemm::Argument karg, const AGridDesc_AK0_M_K1 a_grid_desc_ak0_m_ak1, const BGridDesc_BK0_N_K1 b_grid_desc_bk0_n_bk1, const CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock c_grid_desc_mblock_mperblock_nblock_nperblock, const ComputePtrOffsetOfBatch compute_ptr_offset_of_batch, const index_t num_k_per_block) |