VCTR
Loading...
Searching...
No Matches
ReductionExpression.h
1/*
2 ==============================================================================
3 DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
5 Copyright 2022- by sonible GmbH.
6
7 This file is part of VCTR - Versatile Container Templates Reconceptualized.
8
9 VCTR is free software: you can redistribute it and/or modify
10 it under the terms of the GNU Lesser General Public License version 3
11 only, as published by the Free Software Foundation.
12
13 VCTR is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU Lesser General Public License version 3 for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 version 3 along with VCTR. If not, see <https://www.gnu.org/licenses/>.
20 ==============================================================================
21*/
22
23// This concept is placed here due to dependencies on concepts found in both the ContainerAndExpressionConcepts.h
24// and Traits.h. Placing it in ContainerAndExpressionConcepts.h would have caused cyclic inclusion issues
25namespace vctr::is
26{
28template <class T, class SourceType>
30}
31
32namespace vctr
33{
34
42{
43public:
44 //==============================================================================
45
50 template <is::reductionExpression Expression>
51 VCTR_FORCEDINLINE static constexpr auto reduce (const Expression& e)
52 {
53 const auto s = e.size();
54
55 // Reducing an empty vector can lead to undefined edge cases
56 VCTR_ASSERT (s > 0);
57
58 if (! std::is_constant_evaluated())
59 {
61 {
62 return e.reduceVectorOp();
63 }
64
66 {
67 return reduceNeon (e);
68 }
69
71 {
73 {
74 if (Config::supportedCPUInstructionSets.fma)
75 return reduceFMA (e);
76 }
77 else
78 {
79 if (Config::supportedCPUInstructionSets.avx2)
80 return reduceAVX2 (e);
81 }
82 }
83
85 {
86 if (Config::supportedCPUInstructionSets.sse4_1)
87 return reduceSSE (e);
88 }
89 }
90
91 std::array<ValueType<Expression>, 1> v = { Expression::reductionResultInitValue };
92 for (size_t i = 0; i < s; ++i)
93 e.reduceElementWise (v[0], i);
94
95 return e.finalizeReduction (v);
96 }
97
98private:
99 //==============================================================================
100 template <is::reductionExpression Expression>
101 VCTR_TARGET ("avx2") static auto reduceAVX2 (const Expression& e)
102 requires Config::archX64
103 {
104 using VType = ValueType<Expression>;
105 using RType = AVXRegister<VType>;
106
107 constexpr auto inc = RType::numElements;
108 const auto n = e.size();
109 const auto nSIMD = detail::previousMultipleOf<inc> (n);
110
111 e.prepareAVXEvaluation();
112
113 auto avxValue = RType::broadcast (Expression::reductionResultInitValue);
114
115 size_t i = 0;
116 for (; i < nSIMD; i += inc)
117 e.reduceAVXRegisterWise (avxValue, i);
118
119 alignas (Config::maxSIMDRegisterSize) std::array<VType, RType::numElements + 1> results;
120 avxValue.storeAligned (results.data());
121 results.back() = Expression::reductionResultInitValue;
122
123 for (; i < n; ++i)
124 e.reduceElementWise (results.back(), i);
125
126 return e.finalizeReduction (results);
127 }
128
129 template <is::reductionExpression Expression>
130 VCTR_TARGET ("fma") static auto reduceFMA (const Expression& e)
131 requires Config::archX64
132 {
133 using VType = ValueType<Expression>;
134 using RType = AVXRegister<VType>;
135
136 constexpr auto inc = RType::numElements;
137 const auto n = e.size();
138 const auto nSIMD = detail::previousMultipleOf<inc> (n);
139
140 e.prepareAVXEvaluation();
141
142 auto avxValue = RType::broadcast (Expression::reductionResultInitValue);
143
144 size_t i = 0;
145 for (; i < nSIMD; i += inc)
146 e.reduceAVXRegisterWise (avxValue, i);
147
148 alignas (Config::maxSIMDRegisterSize) std::array<VType, RType::numElements + 1> results;
149 avxValue.storeAligned (results.data());
150 results.back() = Expression::reductionResultInitValue;
151
152 for (; i < n; ++i)
153 e.reduceElementWise (results.back(), i);
154
155 return e.finalizeReduction (results);
156 }
157
158 template <is::reductionExpression Expression>
159 VCTR_TARGET ("sse4.1") static auto reduceSSE (const Expression& e)
160 requires Config::archX64
161 {
162 using VType = ValueType<Expression>;
163 using RType = SSERegister<VType>;
164
165 constexpr auto inc = RType::numElements;
166 const auto n = e.size();
167 const auto nSIMD = detail::previousMultipleOf<inc> (n);
168
169 e.prepareSSEEvaluation();
170
171 auto sseValue = RType::broadcast (Expression::reductionResultInitValue);
172
173 size_t i = 0;
174 for (; i < nSIMD; i += inc)
175 e.reduceSSERegisterWise (sseValue, i);
176
177 alignas (Config::maxSIMDRegisterSize) std::array<VType, RType::numElements + 1> results;
178 sseValue.storeAligned (results.data());
179 results.back() = Expression::reductionResultInitValue;
180
181 for (; i < n; ++i)
182 e.reduceElementWise (results.back(), i);
183
184 return e.finalizeReduction (results);
185 }
186
187 template <is::reductionExpression Expression>
188 static auto reduceNeon (const Expression& e)
189 requires Config::archARM
190 {
191 using VType = ValueType<Expression>;
192 using RType = NeonRegister<VType>;
193
194 constexpr auto inc = RType::numElements;
195 const auto n = e.size();
196 const auto nSIMD = detail::previousMultipleOf<inc> (n);
197
198 e.prepareNeonEvaluation();
199
200 auto neonValue = RType::broadcast (Expression::reductionResultInitValue);
201
202 size_t i = 0;
203 for (; i < nSIMD; i += inc)
204 e.reduceNeonRegisterWise (neonValue, i);
205
206 alignas (Config::maxSIMDRegisterSize) std::array<VType, RType::numElements + 1> results;
207 neonValue.store (results.data());
208 results.back() = Expression::reductionResultInitValue;
209
210 for (; i < n; ++i)
211 e.reduceElementWise (results.back(), i);
212
213 return e.finalizeReduction (results);
214 }
215};
216
217} // namespace vctr
A collection of static functions used to evaluate reduction expressions.
Definition: ReductionExpression.h:42
static VCTR_FORCEDINLINE constexpr auto reduce(const Expression &e)
Returns the reduction result of the expression passed in.
Definition: ReductionExpression.h:51
Constrains a type to have a member function reduceAVXRegisterWise (AVXRegister<SrcDstType>&,...
Definition: ContainerAndExpressionConcepts.h:128
Constrains a type to have a member function reduceNeonRegisterWise (NeonRegister<SrcDstType>&,...
Definition: ContainerAndExpressionConcepts.h:124
Constrains a type to have a member function reduceSSERegisterWise (SSERegister<SrcDstType>&,...
Definition: ContainerAndExpressionConcepts.h:132
Constrains a type to have a member function reduceVectorOp() const that returns a DstType value.
Definition: ContainerAndExpressionConcepts.h:120
Constrains a type to be an expression chain builder.
Definition: ContainerAndExpressionConcepts.h:226
Constrains a type to represent a real valued floating point number.
Definition: NumericTypeConcepts.h:83
Constrains T to be an expression chain builder that results in a reduction expression when a source o...
Definition: ReductionExpression.h:29
Definition: ContainerAndExpressionConcepts.h:387
The main namespace of the VCTR project.
Definition: Array.h:24
typename detail::ValueType< std::remove_cvref_t< T > >::Type ValueType
If T is an expression template, it equals its return type, if it's a type that defines value_type as ...
Definition: Traits.h:201
Definition: AVXRegister.h:28
Definition: Config.h:298