9template <index_t MPerWave, index_t NPerWave>
17 template <
class FloatC, index_t ab
id = 0>
18 __device__
static void
22 reg_c.template AsType<float4_t>()(
Number<0>{}) = __builtin_amdgcn_smfmac_f32_16x16x32_f16(
23 reg_a, reg_b, reg_c.template AsType<float4_t>()[
Number<0>{}], reg_idx, 0, abid);
33template <index_t MPerWave, index_t NPerWave>
39 template <
class FloatC, index_t ab
id = 0>
40 __device__
static void
44 reg_c.template AsType<float4_t>()(
Number<0>{}) = __builtin_amdgcn_smfmac_f32_16x16x32_bf16(
45 reg_a, reg_b, reg_c.template AsType<float4_t>()[
Number<0>{}], reg_idx, 0, abid);
55template <index_t MPerWave, index_t NPerWave>
61 template <
class FloatC, index_t ab
id = 0>
62 __device__
static void
66 reg_c.template AsType<float16_t>()(
Number<0>{}) = __builtin_amdgcn_smfmac_f32_32x32x16_f16(
67 reg_a, reg_b, reg_c.template AsType<float16_t>()[
Number<0>{}], reg_idx, 0, abid);
77template <index_t MPerWave, index_t NPerWave>
83 template <
class FloatC, index_t ab
id = 0>
84 __device__
static void
88 reg_c.template AsType<float16_t>()(
Number<0>{}) = __builtin_amdgcn_smfmac_f32_32x32x16_bf16(
89 reg_a, reg_b, reg_c.template AsType<float16_t>()[
Number<0>{}], reg_idx, 0, abid);
int32_t index_t
Definition ck.hpp:299
typename vector_type< bhalf_t, 8 >::type bhalf8_t
Definition dtype_vector.hpp:2162
integral_constant< index_t, N > Number
Definition number.hpp:12
typename vector_type< half_t, 8 >::type half8_t
Definition dtype_vector.hpp:2155
constexpr detail::ignore_t ignore
Definition utility/ignore.hpp:20
typename vector_type< bhalf_t, 4 >::type bhalf4_t
Definition dtype_vector.hpp:2161
typename vector_type< half_t, 4 >::type half4_t
Definition dtype_vector.hpp:2154
static __device__ void Run(const bhalf4_t ®_a, const bhalf8_t ®_b, const index_t ®_idx, FloatC ®_c)
Definition amd_smfmac.hpp:41
Definition amd_smfmac.hpp:34
static __device__ void Run(const half4_t ®_a, const half8_t ®_b, const index_t ®_idx, FloatC ®_c)
Definition amd_smfmac.hpp:19
Definition amd_smfmac.hpp:10
static __device__ void Run(const bhalf4_t ®_a, const bhalf8_t ®_b, const index_t ®_idx, FloatC ®_c)
Definition amd_smfmac.hpp:85
Definition amd_smfmac.hpp:78
static __device__ void Run(const half4_t ®_a, const half8_t ®_b, const index_t ®_idx, FloatC ®_c)
Definition amd_smfmac.hpp:63
Definition amd_smfmac.hpp:56