VCTR
Loading...
Searching...
No Matches
ReductionExpression.h
1/*
2 ==============================================================================
3 DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
5 Copyright 2022- by sonible GmbH.
6
7 This file is part of VCTR - Versatile Container Templates Reconceptualized.
8
9 VCTR is free software: you can redistribute it and/or modify
10 it under the terms of the GNU Lesser General Public License version 3
11 only, as published by the Free Software Foundation.
12
13 VCTR is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU Lesser General Public License version 3 for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 version 3 along with VCTR. If not, see <https://www.gnu.org/licenses/>.
20 ==============================================================================
21*/
22
23namespace vctr
24{
25
33{
34public:
35 //==============================================================================
36
41 template <is::reductionExpression Expression>
42 VCTR_FORCEDINLINE static constexpr auto reduce (const Expression& e)
43 {
44 const auto s = e.size();
45
46 // Reducing an empty vector can lead to undefined edge cases
47 VCTR_ASSERT (s > 0);
48
49 if (! std::is_constant_evaluated())
50 {
52 {
53 return e.reduceVectorOp();
54 }
55
57 {
58 return reduceNeon (e);
59 }
60
62 {
64 {
65 if (Config::supportedCPUInstructionSets.fma)
66 return reduceFMA (e);
67 }
68 else
69 {
70 if (Config::supportedCPUInstructionSets.avx2)
71 return reduceAVX2 (e);
72 }
73 }
74
76 {
77 if (Config::supportedCPUInstructionSets.sse4_1)
78 return reduceSSE (e);
79 }
80 }
81
82 std::array<ValueType<Expression>, 1> v = { Expression::reductionResultInitValue };
83 for (size_t i = 0; i < s; ++i)
84 e.reduceElementWise (v[0], i);
85
86 return e.finalizeReduction (v);
87 }
88
89private:
90 //==============================================================================
91 template <is::reductionExpression Expression>
92 VCTR_TARGET ("avx2") static auto reduceAVX2 (const Expression& e)
93 requires Config::archX64
94 {
95 using VType = ValueType<Expression>;
96 using RType = AVXRegister<VType>;
97
98 constexpr auto inc = RType::numElements;
99 const auto n = e.size();
100 const auto nSIMD = detail::previousMultipleOf<inc> (n);
101
102 e.prepareAVXEvaluation();
103
104 auto avxValue = RType::broadcast (Expression::reductionResultInitValue);
105
106 size_t i = 0;
107 for (; i < nSIMD; i += inc)
108 e.reduceAVXRegisterWise (avxValue, i);
109
110 alignas (Config::maxSIMDRegisterSize) std::array<VType, RType::numElements + 1> results;
111 avxValue.storeAligned (results.data());
112 results.back() = Expression::reductionResultInitValue;
113
114 for (; i < n; ++i)
115 e.reduceElementWise (results.back(), i);
116
117 return e.finalizeReduction (results);
118 }
119
120 template <is::reductionExpression Expression>
121 VCTR_TARGET ("fma") static auto reduceFMA (const Expression& e)
122 requires Config::archX64
123 {
124 using VType = ValueType<Expression>;
125 using RType = AVXRegister<VType>;
126
127 constexpr auto inc = RType::numElements;
128 const auto n = e.size();
129 const auto nSIMD = detail::previousMultipleOf<inc> (n);
130
131 e.prepareAVXEvaluation();
132
133 auto avxValue = RType::broadcast (Expression::reductionResultInitValue);
134
135 size_t i = 0;
136 for (; i < nSIMD; i += inc)
137 e.reduceAVXRegisterWise (avxValue, i);
138
139 alignas (Config::maxSIMDRegisterSize) std::array<VType, RType::numElements + 1> results;
140 avxValue.storeAligned (results.data());
141 results.back() = Expression::reductionResultInitValue;
142
143 for (; i < n; ++i)
144 e.reduceElementWise (results.back(), i);
145
146 return e.finalizeReduction (results);
147 }
148
149 template <is::reductionExpression Expression>
150 VCTR_TARGET ("sse4.1") static auto reduceSSE (const Expression& e)
151 requires Config::archX64
152 {
153 using VType = ValueType<Expression>;
154 using RType = SSERegister<VType>;
155
156 constexpr auto inc = RType::numElements;
157 const auto n = e.size();
158 const auto nSIMD = detail::previousMultipleOf<inc> (n);
159
160 e.prepareSSEEvaluation();
161
162 auto sseValue = RType::broadcast (Expression::reductionResultInitValue);
163
164 size_t i = 0;
165 for (; i < nSIMD; i += inc)
166 e.reduceSSERegisterWise (sseValue, i);
167
168 alignas (Config::maxSIMDRegisterSize) std::array<VType, RType::numElements + 1> results;
169 sseValue.storeAligned (results.data());
170 results.back() = Expression::reductionResultInitValue;
171
172 for (; i < n; ++i)
173 e.reduceElementWise (results.back(), i);
174
175 return e.finalizeReduction (results);
176 }
177
178 template <is::reductionExpression Expression>
179 static auto reduceNeon (const Expression& e)
180 requires Config::archARM
181 {
182 using VType = ValueType<Expression>;
183 using RType = NeonRegister<VType>;
184
185 constexpr auto inc = RType::numElements;
186 const auto n = e.size();
187 const auto nSIMD = detail::previousMultipleOf<inc> (n);
188
189 e.prepareNeonEvaluation();
190
191 auto neonValue = RType::broadcast (Expression::reductionResultInitValue);
192
193 size_t i = 0;
194 for (; i < nSIMD; i += inc)
195 e.reduceNeonRegisterWise (neonValue, i);
196
197 alignas (Config::maxSIMDRegisterSize) std::array<VType, RType::numElements + 1> results;
198 neonValue.store (results.data());
199 results.back() = Expression::reductionResultInitValue;
200
201 for (; i < n; ++i)
202 e.reduceElementWise (results.back(), i);
203
204 return e.finalizeReduction (results);
205 }
206};
207
208} // namespace vctr
A collection of static functions used to evaluate reduction expressions.
Definition: ReductionExpression.h:33
static VCTR_FORCEDINLINE constexpr auto reduce(const Expression &e)
Returns the reduction result of the expression passed in.
Definition: ReductionExpression.h:42
Constrains a type to have a member function reduceAVXRegisterWise (AVXRegister<SrcDstType>&,...
Definition: ContainerAndExpressionConcepts.h:128
Constrains a type to have a member function reduceNeonRegisterWise (NeonRegister<SrcDstType>&,...
Definition: ContainerAndExpressionConcepts.h:124
Constrains a type to have a member function reduceSSERegisterWise (SSERegister<SrcDstType>&,...
Definition: ContainerAndExpressionConcepts.h:132
Constrains a type to have a member function reduceVectorOp() const that returns a DstType value.
Definition: ContainerAndExpressionConcepts.h:120
Constrains a type to represent a real valued floating point number.
Definition: NumericTypeConcepts.h:83
The main namespace of the VCTR project.
Definition: Array.h:24
typename detail::ValueType< std::remove_cvref_t< T > >::Type ValueType
If T is an expression template, it equals its return type, if it's a type that defines value_type as ...
Definition: Traits.h:201
Definition: AVXRegister.h:28
Definition: Config.h:298