mfma_f32_32x32x8bf16_1k > Struct Reference

mfma_f32_32x32x8bf16_1k > Struct Reference#

Composable Kernel: ck::mfma_type< MfmaInstr::mfma_f32_32x32x8bf16_1k > Struct Reference
ck::mfma_type< MfmaInstr::mfma_f32_32x32x8bf16_1k > Struct Reference

#include <xdlops_gemm.hpp>

Public Member Functions

template<index_t MPerXdlops, index_t NPerXdlops, class FloatA, class FloatB, class FloatC>
__device__ void run (const FloatA &a, const FloatB &b, FloatC &reg_c) const

Static Public Attributes

static constexpr index_t group_size = 4
static constexpr index_t num_groups_per_blk = 4
static constexpr index_t num_regs_per_blk = 16
static constexpr index_t num_threads_per_blk = 32
static constexpr index_t wave_size = 64
static constexpr index_t num_input_blks = 2
static constexpr index_t num_output_blks = 1
static constexpr index_t m_per_blk = 32
static constexpr index_t n_per_blk = 32
static constexpr index_t k_per_blk = 4
static constexpr bool is_k_reduction = true

Member Function Documentation

◆ run()

template<index_t MPerXdlops, index_t NPerXdlops, class FloatA, class FloatB, class FloatC>
__device__ void ck::mfma_type< MfmaInstr::mfma_f32_32x32x8bf16_1k >::run ( const FloatA & a,
const FloatB & b,
FloatC & reg_c ) const
inline

Member Data Documentation

◆ group_size

index_t ck::mfma_type< MfmaInstr::mfma_f32_32x32x8bf16_1k >::group_size = 4
staticconstexpr

◆ is_k_reduction

bool ck::mfma_type< MfmaInstr::mfma_f32_32x32x8bf16_1k >::is_k_reduction = true
staticconstexpr

◆ k_per_blk

index_t ck::mfma_type< MfmaInstr::mfma_f32_32x32x8bf16_1k >::k_per_blk = 4
staticconstexpr

◆ m_per_blk

index_t ck::mfma_type< MfmaInstr::mfma_f32_32x32x8bf16_1k >::m_per_blk = 32
staticconstexpr

◆ n_per_blk

index_t ck::mfma_type< MfmaInstr::mfma_f32_32x32x8bf16_1k >::n_per_blk = 32
staticconstexpr

◆ num_groups_per_blk

index_t ck::mfma_type< MfmaInstr::mfma_f32_32x32x8bf16_1k >::num_groups_per_blk = 4
staticconstexpr

◆ num_input_blks

index_t ck::mfma_type< MfmaInstr::mfma_f32_32x32x8bf16_1k >::num_input_blks = 2
staticconstexpr

◆ num_output_blks

index_t ck::mfma_type< MfmaInstr::mfma_f32_32x32x8bf16_1k >::num_output_blks = 1
staticconstexpr

◆ num_regs_per_blk

index_t ck::mfma_type< MfmaInstr::mfma_f32_32x32x8bf16_1k >::num_regs_per_blk = 16
staticconstexpr

◆ num_threads_per_blk

index_t ck::mfma_type< MfmaInstr::mfma_f32_32x32x8bf16_1k >::num_threads_per_blk = 32
staticconstexpr

◆ wave_size

index_t ck::mfma_type< MfmaInstr::mfma_f32_32x32x8bf16_1k >::wave_size = 64
staticconstexpr

The documentation for this struct was generated from the following file: