blockwise_softmax.hpp Source File#
blockwise_softmax.hpp
Go to the documentation of this file.
Definition ck.hpp:268
__host__ __device__ constexpr auto make_naive_tensor_descriptor_packed(const Tuple< Lengths... > &lengths)
Definition tensor_descriptor_helper.hpp:101
__host__ __device__ constexpr auto make_tuple(Xs &&... xs)
Definition utility/tuple.hpp:211
decltype(make_naive_tensor_descriptor_packed( make_tuple(ThreadSliceDesc_M_K{}.GetLength(I0)))) ThreadSliceDesc_M
Definition blockwise_softmax.hpp:38
PartitionedBlockwiseReduction_v2< AccDataType, BlockSize, ThreadClusterLengths_M_K, ThreadMap_M_K, reduce::Add, false > BlockwiseSumReduce
Definition blockwise_softmax.hpp:78
decltype(ThreadClusterDesc_M_K{}.GetLengths()) ThreadClusterLengths_M_K
Definition blockwise_softmax.hpp:69
__host__ __device__ void Run(CThreadBuffer &in_thread_buf, WorkspaceBuffer &reduce_work_buf)
Definition blockwise_softmax.hpp:88
StaticBuffer< AddressSpaceEnum::Vgpr, AccDataType, MRepeat, true > BufferType
Definition blockwise_softmax.hpp:85
PartitionedBlockwiseReduction_v2< AccDataType, BlockSize, ThreadClusterLengths_M_K, ThreadMap_M_K, reduce::Max, false > BlockwiseMaxReduce
Definition blockwise_softmax.hpp:71
typename conditional< IgnoreNaN, ThreadwiseReduction< AccDataType, ThreadSliceDesc_M_K, ThreadSliceDesc_M, reduce::Add, false, detail::AccumulateWithNanIgnore< reduce::Add, AccDataType > >, ThreadwiseReduction< AccDataType, ThreadSliceDesc_M_K, ThreadSliceDesc_M, reduce::Add, false > >::type ThreadwiseSumReduce
Definition blockwise_softmax.hpp:55
typename conditional< IgnoreNaN, ThreadwiseReduction< AccDataType, ThreadSliceDesc_M_K, ThreadSliceDesc_M, reduce::Max, false, detail::AccumulateWithNanIgnore< reduce::Max, AccDataType > >, ThreadwiseReduction< AccDataType, ThreadSliceDesc_M_K, ThreadSliceDesc_M, reduce::Max, false > >::type ThreadwiseMaxReduce
Definition blockwise_softmax.hpp:41
Definition reduction_functions_blockwise.hpp:101
static __device__ void Reduce(BufferType &work_buffer, AccDataType &in_out_value)
Definition reduction_functions_blockwise.hpp:116
Definition static_buffer.hpp:16
Definition reduction_functions_threadwise.hpp:23
Definition utility/functional.hpp:100
Definition reduction_functions_accumulate.hpp:17
Definition reduction_operator.hpp:37
Definition reduction_operator.hpp:163
Definition functional2.hpp:33