tensor_partition.hpp File Reference#
tensor_partition.hpp File Reference
#include "tensor_utils.hpp"#include "layout_utils.hpp"#include "ck/tensor_operation/gpu/grid/block_to_ctile_map.hpp"#include "ck/tensor_description/cluster_descriptor.hpp"Go to the source code of this file.
Functions | |
| template<typename TensorType, typename ThreadShape, typename ThreadUnrolledDesc, typename ProjectionTuple> | |
| __host__ __device__ constexpr auto | make_local_partition (TensorType &tensor, const Layout< ThreadShape, ThreadUnrolledDesc > &thread_layout, const index_t thread_id, const ProjectionTuple &projection) |
| Create local partition for thread (At now only packed partition is supported). | |
| template<typename TensorType, typename ThreadShape, typename ThreadUnrolledDesc> | |
| __host__ __device__ constexpr auto | make_local_partition (TensorType &tensor, const Layout< ThreadShape, ThreadUnrolledDesc > &thread_lengths, const index_t thread_id) |
| Create local partition for thread (At now only packed partition is supported). | |
| template<typename TensorType, typename BlockShapeTuple, typename BlockIdxs, typename ProjectionTuple> | |
| __host__ __device__ constexpr auto | make_local_tile (const TensorType &tensor, const BlockShapeTuple &tile_shape, const BlockIdxs &block_idxs, const ProjectionTuple &projection) |
| Create local tile for thread block. (At now only packed tile is supported). | |
| template<typename TensorType, typename BlockShapeTuple, typename BlockIdxs> | |
| __host__ __device__ constexpr auto | make_local_tile (const TensorType &tensor, const BlockShapeTuple &tile_shape, const BlockIdxs &block_idxs) |
| Create local tile for thread block. (At now only packed tile is supported). | |
Function Documentation
◆ make_local_partition() [1/2]
template<typename TensorType, typename ThreadShape, typename ThreadUnrolledDesc, typename ProjectionTuple>
|
constexpr |
Create local partition for thread (At now only packed partition is supported).
- Parameters
-
tensor Tensor for partition. thread_layout Layout of threads (could not be transformed). thread_id Thread index represented as integer. projection Projection is used to remove selected dim from partitioning. Use slice(X) to remove dimension, where X is dim size. Use Number<1>{} to keep it.
- Returns
- Partition tensor.
◆ make_local_partition() [2/2]
template<typename TensorType, typename ThreadShape, typename ThreadUnrolledDesc>
|
constexpr |
◆ make_local_tile() [1/2]
template<typename TensorType, typename BlockShapeTuple, typename BlockIdxs>
|
constexpr |
Create local tile for thread block. (At now only packed tile is supported).
- Note
- Currently to get the best performance please use 2d shape.
- Parameters
-
tensor Tensor for partition. tile_shape Shapes of requested tile. block_idxs Tuple of block indexes represented as integer. If slice, then get whole dim.
- Returns
- Tile tensor.
◆ make_local_tile() [2/2]
template<typename TensorType, typename BlockShapeTuple, typename BlockIdxs, typename ProjectionTuple>
|
constexpr |
Create local tile for thread block. (At now only packed tile is supported).
- Note
- Temporary to gain the best performance use 2d tile_shape.
- Parameters
-
tensor Tensor for partition. tile_shape Shapes of requested tile. block_idxs Tuple of block indexes represented as integer. If slice, then get whole dim. projection Projection is used to remove selected dim from partitioning. Use slice(X) to remove dimension, where X is dim size. Use Number<1>{} to keep it.
- Returns
- Tile tensor.