VCTR
Loading...
Searching...
No Matches
ReductionExpression.h
1/*
2 ==============================================================================
3 DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
5 Copyright 2022- by sonible GmbH.
6
7 This file is part of VCTR - Versatile Container Templates Reconceptualized.
8
9 VCTR is free software: you can redistribute it and/or modify
10 it under the terms of the GNU Lesser General Public License version 3
11 only, as published by the Free Software Foundation.
12
13 VCTR is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU Lesser General Public License version 3 for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 version 3 along with VCTR. If not, see <https://www.gnu.org/licenses/>.
20 ==============================================================================
21*/
22
23namespace vctr
24{
25
33{
34public:
35 //==============================================================================
36
41 template <is::reductionExpression Expression>
42 VCTR_FORCEDINLINE static constexpr auto reduce (const Expression& e)
43 {
44 const auto s = e.size();
45
46 // Reducing an empty vector can lead to undefined edge cases
47 VCTR_ASSERT (s > 0);
48
49 if (! std::is_constant_evaluated())
50 {
52 {
53 return e.reduceVectorOp();
54 }
55
57 {
58 return reduceNeon (e);
59 }
60
62 {
64 {
65 if (Config::supportsAVX)
66 return reduceAVX (e);
67 }
68 else
69 {
70 if (Config::supportsAVX2)
71 return reduceAVX2 (e);
72 }
73 }
74
76 {
77 if (Config::highestSupportedCPUInstructionSet != CPUInstructionSet::fallback)
78 return reduceSSE (e);
79 }
80 }
81
82 std::array<ValueType<Expression>, 1> v = { Expression::reductionResultInitValue };
83 for (size_t i = 0; i < s; ++i)
84 e.reduceElementWise (v[0], i);
85
86 return e.finalizeReduction (v);
87 }
88
89private:
90 //==============================================================================
91 template <is::reductionExpression Expression>
92 VCTR_TARGET ("avx2") static auto reduceAVX2 (const Expression& e)
93 requires Config::archX64
94 {
95 using VType = ValueType<Expression>;
96 using RType = AVXRegister<VType>;
97
98 constexpr auto inc = RType::numElements;
99 const auto n = e.size();
100 const auto nSIMD = detail::previousMultipleOf<inc> (n);
101
102 auto avxValue = RType::broadcast (Expression::reductionResultInitValue);
103
104 size_t i = 0;
105 for (; i < nSIMD; i += inc)
106 e.reduceAVXRegisterWise (avxValue, i);
107
108 alignas (Config::maxSIMDRegisterSize) std::array<VType, RType::numElements + 1> results;
109 avxValue.storeAligned (results.data());
110 results.back() = Expression::reductionResultInitValue;
111
112 for (; i < n; ++i)
113 e.reduceElementWise (results.back(), i);
114
115 return e.finalizeReduction (results);
116 }
117
118 template <is::reductionExpression Expression>
119 VCTR_TARGET ("avx") static auto reduceAVX (const Expression& e)
120 requires Config::archX64
121 {
122 using VType = ValueType<Expression>;
123 using RType = AVXRegister<VType>;
124
125 constexpr auto inc = RType::numElements;
126 const auto n = e.size();
127 const auto nSIMD = detail::previousMultipleOf<inc> (n);
128
129 auto avxValue = RType::broadcast (Expression::reductionResultInitValue);
130
131 size_t i = 0;
132 for (; i < nSIMD; i += inc)
133 e.reduceAVXRegisterWise (avxValue, i);
134
135 alignas (Config::maxSIMDRegisterSize) std::array<VType, RType::numElements + 1> results;
136 avxValue.storeAligned (results.data());
137 results.back() = Expression::reductionResultInitValue;
138
139 for (; i < n; ++i)
140 e.reduceElementWise (results.back(), i);
141
142 return e.finalizeReduction (results);
143 }
144
145 template <is::reductionExpression Expression>
146 VCTR_TARGET ("sse4.1") static auto reduceSSE (const Expression& e)
147 requires Config::archX64
148 {
149 using VType = ValueType<Expression>;
150 using RType = SSERegister<VType>;
151
152 constexpr auto inc = RType::numElements;
153 const auto n = e.size();
154 const auto nSIMD = detail::previousMultipleOf<inc> (n);
155
156 auto sseValue = RType::broadcast (Expression::reductionResultInitValue);
157
158 size_t i = 0;
159 for (; i < nSIMD; i += inc)
160 e.reduceSSERegisterWise (sseValue, i);
161
162 alignas (Config::maxSIMDRegisterSize) std::array<VType, RType::numElements + 1> results;
163 sseValue.storeAligned (results.data());
164 results.back() = Expression::reductionResultInitValue;
165
166 for (; i < n; ++i)
167 e.reduceElementWise (results.back(), i);
168
169 return e.finalizeReduction (results);
170 }
171
172 template <is::reductionExpression Expression>
173 static auto reduceNeon (const Expression& e)
174 requires Config::archARM
175 {
176 using VType = ValueType<Expression>;
177 using RType = NeonRegister<VType>;
178
179 constexpr auto inc = RType::numElements;
180 const auto n = e.size();
181 const auto nSIMD = detail::previousMultipleOf<inc> (n);
182
183 auto sseValue = RType::broadcast (Expression::reductionResultInitValue);
184
185 size_t i = 0;
186 for (; i < nSIMD; i += inc)
187 e.reduceNeonRegisterWise (sseValue, i);
188
189 alignas (Config::maxSIMDRegisterSize) std::array<VType, RType::numElements + 1> results;
190 sseValue.store (results.data());
191 results.back() = Expression::reductionResultInitValue;
192
193 for (; i < n; ++i)
194 e.reduceElementWise (results.back(), i);
195
196 return e.finalizeReduction (results);
197 }
198};
199
200} // namespace vctr
A collection of static functions used to evaluate reduction expressions.
Definition: ReductionExpression.h:33
static VCTR_FORCEDINLINE constexpr auto reduce(const Expression &e)
Returns the reduction result of the expression passed in.
Definition: ReductionExpression.h:42
Constrains a type to have a member function reduceAVXRegisterWise (AVXRegister<SrcDstType>&,...
Definition: ContainerAndExpressionConcepts.h:110
Constrains a type to have a member function reduceNeonRegisterWise (NeonRegister<SrcDstType>&,...
Definition: ContainerAndExpressionConcepts.h:106
Constrains a type to have a member function reduceSSERegisterWise (SSERegister<SrcDstType>&,...
Definition: ContainerAndExpressionConcepts.h:114
Constrains a type to have a member function reduceVectorOp() const that returns a DstType value.
Definition: ContainerAndExpressionConcepts.h:102
Constrains a type to represent a real valued floating point number.
Definition: NumericTypeConcepts.h:79
The main namespace of the VCTR project.
Definition: Array.h:24
typename detail::ValueType< std::remove_cvref_t< T > >::Type ValueType
If T is an expression template, it equals its return type, if it's a type that defines value_type as ...
Definition: Traits.h:201
Definition: AVXRegister.h:28
Definition: Config.h:290