41 template <is::reductionExpression Expression>
42 VCTR_FORCEDINLINE
static constexpr auto reduce (
const Expression& e)
44 const auto s = e.size();
49 if (! std::is_constant_evaluated())
53 return e.reduceVectorOp();
58 return reduceNeon (e);
65 if (Config::supportedCPUInstructionSets.fma)
70 if (Config::supportedCPUInstructionSets.avx2)
71 return reduceAVX2 (e);
77 if (Config::supportedCPUInstructionSets.sse4_1)
82 std::array<ValueType<Expression>, 1> v = { Expression::reductionResultInitValue };
83 for (
size_t i = 0; i < s; ++i)
84 e.reduceElementWise (v[0], i);
86 return e.finalizeReduction (v);
91 template <is::reductionExpression Expression>
92 VCTR_TARGET (
"avx2") static auto reduceAVX2 (const Expression& e)
98 constexpr auto inc = RType::numElements;
99 const auto n = e.size();
100 const auto nSIMD = detail::previousMultipleOf<inc> (n);
102 e.prepareAVXEvaluation();
104 auto avxValue = RType::broadcast (Expression::reductionResultInitValue);
107 for (; i < nSIMD; i += inc)
108 e.reduceAVXRegisterWise (avxValue, i);
110 alignas (Config::maxSIMDRegisterSize) std::array<VType, RType::numElements + 1> results;
111 avxValue.storeAligned (results.data());
112 results.back() = Expression::reductionResultInitValue;
115 e.reduceElementWise (results.back(), i);
117 return e.finalizeReduction (results);
120 template <is::reductionExpression Expression>
121 VCTR_TARGET (
"fma") static auto reduceFMA (const Expression& e)
122 requires Config::archX64
124 using VType = ValueType<Expression>;
125 using RType = AVXRegister<VType>;
127 constexpr auto inc = RType::numElements;
128 const auto n = e.size();
129 const auto nSIMD = detail::previousMultipleOf<inc> (n);
131 e.prepareAVXEvaluation();
133 auto avxValue = RType::broadcast (Expression::reductionResultInitValue);
136 for (; i < nSIMD; i += inc)
137 e.reduceAVXRegisterWise (avxValue, i);
139 alignas (Config::maxSIMDRegisterSize) std::array<VType, RType::numElements + 1> results;
140 avxValue.storeAligned (results.data());
141 results.back() = Expression::reductionResultInitValue;
144 e.reduceElementWise (results.back(), i);
146 return e.finalizeReduction (results);
149 template <is::reductionExpression Expression>
150 VCTR_TARGET (
"sse4.1") static auto reduceSSE (const Expression& e)
151 requires Config::archX64
153 using VType = ValueType<Expression>;
154 using RType = SSERegister<VType>;
156 constexpr auto inc = RType::numElements;
157 const auto n = e.size();
158 const auto nSIMD = detail::previousMultipleOf<inc> (n);
160 e.prepareSSEEvaluation();
162 auto sseValue = RType::broadcast (Expression::reductionResultInitValue);
165 for (; i < nSIMD; i += inc)
166 e.reduceSSERegisterWise (sseValue, i);
168 alignas (Config::maxSIMDRegisterSize) std::array<VType, RType::numElements + 1> results;
169 sseValue.storeAligned (results.data());
170 results.back() = Expression::reductionResultInitValue;
173 e.reduceElementWise (results.back(), i);
175 return e.finalizeReduction (results);
178 template <is::reductionExpression Expression>
179 static auto reduceNeon (
const Expression& e)
180 requires Config::archARM
182 using VType = ValueType<Expression>;
183 using RType = NeonRegister<VType>;
185 constexpr auto inc = RType::numElements;
186 const auto n = e.size();
187 const auto nSIMD = detail::previousMultipleOf<inc> (n);
189 e.prepareNeonEvaluation();
191 auto neonValue = RType::broadcast (Expression::reductionResultInitValue);
194 for (; i < nSIMD; i += inc)
195 e.reduceNeonRegisterWise (neonValue, i);
197 alignas (Config::maxSIMDRegisterSize) std::array<VType, RType::numElements + 1> results;
198 neonValue.store (results.data());
199 results.back() = Expression::reductionResultInitValue;
202 e.reduceElementWise (results.back(), i);
204 return e.finalizeReduction (results);
A collection of static functions used to evaluate reduction expressions.
Definition: ReductionExpression.h:33
static VCTR_FORCEDINLINE constexpr auto reduce(const Expression &e)
Returns the reduction result of the expression passed in.
Definition: ReductionExpression.h:42
Constrains a type to have a member function reduceAVXRegisterWise (AVXRegister<SrcDstType>&,...
Definition: ContainerAndExpressionConcepts.h:128
Constrains a type to have a member function reduceNeonRegisterWise (NeonRegister<SrcDstType>&,...
Definition: ContainerAndExpressionConcepts.h:124
Constrains a type to have a member function reduceSSERegisterWise (SSERegister<SrcDstType>&,...
Definition: ContainerAndExpressionConcepts.h:132
Constrains a type to have a member function reduceVectorOp() const that returns a DstType value.
Definition: ContainerAndExpressionConcepts.h:120
Constrains a type to represent a real valued floating point number.
Definition: NumericTypeConcepts.h:83
The main namespace of the VCTR project.
Definition: Array.h:24
typename detail::ValueType< std::remove_cvref_t< T > >::Type ValueType
If T is an expression template, it equals its return type, if it's a type that defines value_type as ...
Definition: Traits.h:201
Definition: AVXRegister.h:28