41 template <is::reductionExpression Expression>
42 VCTR_FORCEDINLINE
static constexpr auto reduce (
const Expression& e)
44 const auto s = e.size();
49 if (! std::is_constant_evaluated())
53 return e.reduceVectorOp();
58 return reduceNeon (e);
65 if (Config::supportsAVX)
70 if (Config::supportsAVX2)
71 return reduceAVX2 (e);
77 if (Config::highestSupportedCPUInstructionSet != CPUInstructionSet::fallback)
82 std::array<ValueType<Expression>, 1> v = { Expression::reductionResultInitValue };
83 for (
size_t i = 0; i < s; ++i)
84 e.reduceElementWise (v[0], i);
86 return e.finalizeReduction (v);
91 template <is::reductionExpression Expression>
92 VCTR_TARGET (
"avx2") static auto reduceAVX2 (const Expression& e)
98 constexpr auto inc = RType::numElements;
99 const auto n = e.size();
100 const auto nSIMD = detail::previousMultipleOf<inc> (n);
102 auto avxValue = RType::broadcast (Expression::reductionResultInitValue);
105 for (; i < nSIMD; i += inc)
106 e.reduceAVXRegisterWise (avxValue, i);
108 alignas (Config::maxSIMDRegisterSize) std::array<VType, RType::numElements + 1> results;
109 avxValue.storeAligned (results.data());
110 results.back() = Expression::reductionResultInitValue;
113 e.reduceElementWise (results.back(), i);
115 return e.finalizeReduction (results);
118 template <is::reductionExpression Expression>
119 VCTR_TARGET (
"avx") static auto reduceAVX (const Expression& e)
120 requires Config::archX64
122 using VType = ValueType<Expression>;
123 using RType = AVXRegister<VType>;
125 constexpr auto inc = RType::numElements;
126 const auto n = e.size();
127 const auto nSIMD = detail::previousMultipleOf<inc> (n);
129 auto avxValue = RType::broadcast (Expression::reductionResultInitValue);
132 for (; i < nSIMD; i += inc)
133 e.reduceAVXRegisterWise (avxValue, i);
135 alignas (Config::maxSIMDRegisterSize) std::array<VType, RType::numElements + 1> results;
136 avxValue.storeAligned (results.data());
137 results.back() = Expression::reductionResultInitValue;
140 e.reduceElementWise (results.back(), i);
142 return e.finalizeReduction (results);
145 template <is::reductionExpression Expression>
146 VCTR_TARGET (
"sse4.1") static auto reduceSSE (const Expression& e)
147 requires Config::archX64
149 using VType = ValueType<Expression>;
150 using RType = SSERegister<VType>;
152 constexpr auto inc = RType::numElements;
153 const auto n = e.size();
154 const auto nSIMD = detail::previousMultipleOf<inc> (n);
156 auto sseValue = RType::broadcast (Expression::reductionResultInitValue);
159 for (; i < nSIMD; i += inc)
160 e.reduceSSERegisterWise (sseValue, i);
162 alignas (Config::maxSIMDRegisterSize) std::array<VType, RType::numElements + 1> results;
163 sseValue.storeAligned (results.data());
164 results.back() = Expression::reductionResultInitValue;
167 e.reduceElementWise (results.back(), i);
169 return e.finalizeReduction (results);
172 template <is::reductionExpression Expression>
173 static auto reduceNeon (
const Expression& e)
174 requires Config::archARM
176 using VType = ValueType<Expression>;
177 using RType = NeonRegister<VType>;
179 constexpr auto inc = RType::numElements;
180 const auto n = e.size();
181 const auto nSIMD = detail::previousMultipleOf<inc> (n);
183 auto sseValue = RType::broadcast (Expression::reductionResultInitValue);
186 for (; i < nSIMD; i += inc)
187 e.reduceNeonRegisterWise (sseValue, i);
189 alignas (Config::maxSIMDRegisterSize) std::array<VType, RType::numElements + 1> results;
190 sseValue.store (results.data());
191 results.back() = Expression::reductionResultInitValue;
194 e.reduceElementWise (results.back(), i);
196 return e.finalizeReduction (results);
A collection of static functions used to evaluate reduction expressions.
Definition: ReductionExpression.h:33
static VCTR_FORCEDINLINE constexpr auto reduce(const Expression &e)
Returns the reduction result of the expression passed in.
Definition: ReductionExpression.h:42
Constrains a type to have a member function reduceAVXRegisterWise (AVXRegister<SrcDstType>&,...
Definition: ContainerAndExpressionConcepts.h:110
Constrains a type to have a member function reduceNeonRegisterWise (NeonRegister<SrcDstType>&,...
Definition: ContainerAndExpressionConcepts.h:106
Constrains a type to have a member function reduceSSERegisterWise (SSERegister<SrcDstType>&,...
Definition: ContainerAndExpressionConcepts.h:114
Constrains a type to have a member function reduceVectorOp() const that returns a DstType value.
Definition: ContainerAndExpressionConcepts.h:102
Constrains a type to represent a real valued floating point number.
Definition: NumericTypeConcepts.h:79
The main namespace of the VCTR project.
Definition: Array.h:24
typename detail::ValueType< std::remove_cvref_t< T > >::Type ValueType
If T is an expression template, it equals its return type, if it's a type that defines value_type as ...
Definition: Traits.h:201
Definition: AVXRegister.h:28