VCTR
Loading...
Searching...
No Matches
Max.h
1/*
2 ==============================================================================
3 DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
5 Copyright 2022- by sonible GmbH.
6
7 This file is part of VCTR - Versatile Container Templates Reconceptualized.
8
9 VCTR is free software: you can redistribute it and/or modify
10 it under the terms of the GNU Lesser General Public License version 3
11 only, as published by the Free Software Foundation.
12
13 VCTR is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU Lesser General Public License version 3 for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 version 3 along with VCTR. If not, see <https://www.gnu.org/licenses/>.
20 ==============================================================================
21*/
22
23namespace vctr::expressions
24{
25
26template <size_t extent, class SrcType>
27requires std::totally_ordered<ValueType<SrcType>>
29{
30public:
31 using value_type = ValueType<SrcType>;
32
33 VCTR_COMMON_UNARY_EXPRESSION_MEMBERS (Max, src)
34
35 static constexpr value_type reductionResultInitValue = std::numeric_limits<value_type>::lowest();
36
37 VCTR_FORCEDINLINE constexpr void reduceElementWise (value_type& result, size_t i) const
38 {
39 result = std::max (result, src[i]);
40 }
41
42 //==============================================================================
43 VCTR_FORCEDINLINE value_type reduceVectorOp() const
45 {
46 return Expression::Accelerate::max (src.data(), size());
47 };
48
49 VCTR_FORCEDINLINE value_type reduceVectorOp() const
51 {
52 return Expression::IPP::max (src.data(), sizeToInt (size()));
53 };
54
55 //==============================================================================
56 VCTR_FORWARD_PREPARE_SIMD_EVALUATION_UNARY_EXPRESSION_MEMBER_FUNCTIONS
57
58 VCTR_FORCEDINLINE void reduceNeonRegisterWise (NeonRegister<value_type>& result, size_t i) const
60 {
61 result = Expression::Neon::max (result, src.getNeon (i));
62 }
63
64 VCTR_FORCEDINLINE VCTR_TARGET ("fma") void reduceAVXRegisterWise (AVXRegister<value_type>& result, size_t i) const
65 requires Config::archX64 && has::getAVX<SrcType> && is::realFloatNumber<value_type>
66 {
67 result = Expression::AVX::max (result, src.getAVX (i));
68 }
69
70 VCTR_FORCEDINLINE VCTR_TARGET ("avx2") void reduceAVXRegisterWise (AVXRegister<value_type>& result, size_t i) const
71 requires Config::archX64 && has::getAVX<SrcType> && is::int32Number<value_type>
72 {
73 result = Expression::AVX::max (result, src.getAVX (i));
74 }
75
76 VCTR_FORCEDINLINE VCTR_TARGET ("sse4.1") void reduceSSERegisterWise (SSERegister<value_type>& result, size_t i) const
78 {
79 result = Expression::SSE::max (result, src.getSSE (i));
80 }
81
82 //==============================================================================
83 template <size_t n>
84 VCTR_FORCEDINLINE static constexpr value_type finalizeReduction (const std::array<value_type, n>& maxima)
85 {
86 if constexpr (n == 1)
87 return maxima[0];
88
89 return *std::max_element (maxima.begin(), maxima.end());
90 }
91};
92
93template <size_t extent, class SrcType>
96{
97public:
98 using value_type = RealType<ValueType<SrcType>>;
99
100 VCTR_COMMON_UNARY_EXPRESSION_MEMBERS (MaxAbs, src)
101
102 static constexpr value_type reductionResultInitValue = 0;
103
104 VCTR_FORCEDINLINE constexpr void reduceElementWise (value_type& result, size_t i) const
106 {
107 result = std::max (result, std::abs (src[i]));
108 }
109
110 VCTR_FORCEDINLINE constexpr void reduceElementWise (value_type& result, size_t i) const
112 {
113 result = std::max (result, src[i]);
114 }
115
116 //==============================================================================
117 VCTR_FORCEDINLINE value_type reduceVectorOp() const
119 {
120 return Expression::IPP::maxAbs (src.data(), sizeToInt (size()));
121 };
122
123 //==============================================================================
124 VCTR_FORWARD_PREPARE_SIMD_EVALUATION_UNARY_EXPRESSION_MEMBER_FUNCTIONS
125
126 VCTR_FORCEDINLINE void reduceNeonRegisterWise (NeonRegister<value_type>& result, size_t i) const
127 requires Config::archARM && has::getNeon<SrcType> && (is::realFloatNumber<value_type> || std::same_as<int32_t, value_type>)
128 {
129 result = Expression::Neon::max (result, Expression::Neon::abs (src.getNeon (i)));
130 }
131
132 VCTR_FORCEDINLINE void reduceNeonRegisterWise (NeonRegister<value_type>& result, size_t i) const
133 requires Config::archARM && has::getNeon<SrcType> && std::same_as<uint32_t, value_type>
134 {
135 result = Expression::Neon::max (result, src.getNeon (i));
136 }
137
138 VCTR_FORCEDINLINE VCTR_TARGET ("fma") void reduceAVXRegisterWise (AVXRegister<value_type>& result, size_t i) const
139 requires Config::archX64 && has::getAVX<SrcType> && is::realFloatNumber<value_type>
140 {
141 static const auto avxSignBit = Expression::AVX::broadcast (typename Expression::CommonElement::Type (-0.0));
142
143 result = Expression::AVX::max (result, Expression::AVX::bitwiseAndNot (src.getAVX (i), avxSignBit));
144 }
145
146 VCTR_FORCEDINLINE VCTR_TARGET ("avx2") void reduceAVXRegisterWise (AVXRegister<value_type>& result, size_t i) const
147 requires Config::archX64 && has::getAVX<SrcType> && std::same_as<int32_t, value_type>
148 {
149 result = Expression::AVX::max (result, Expression::AVX::abs (src.getAVX (i)));
150 }
151
152 VCTR_FORCEDINLINE VCTR_TARGET ("avx2") void reduceAVXRegisterWise (AVXRegister<value_type>& result, size_t i) const
153 requires Config::archX64 && has::getAVX<SrcType> && std::same_as<uint32_t, value_type>
154 {
155 result = Expression::AVX::max (result, src.getAVX (i));
156 }
157
158 VCTR_FORCEDINLINE VCTR_TARGET ("sse4.1") void reduceSSERegisterWise (SSERegister<value_type>& result, size_t i) const
159 requires Config::archX64 && has::getSSE<SrcType> && is::realFloatNumber<value_type>
160 {
161 static const auto sseSignBit = Expression::SSE::broadcast (typename Expression::CommonElement::Type (-0.0));
162
163 result = Expression::SSE::max (result, Expression::SSE::bitwiseAndNot (src.getSSE (i), sseSignBit));
164 }
165
166 VCTR_FORCEDINLINE VCTR_TARGET ("sse4.1") void reduceSSERegisterWise (SSERegister<value_type>& result, size_t i) const
167 requires Config::archX64 && has::getSSE<SrcType> && std::same_as<int32_t, value_type>
168 {
169 result = Expression::SSE::max (result, Expression::SSE::abs (src.getSSE (i)));
170 }
171
172 VCTR_FORCEDINLINE VCTR_TARGET ("sse4.1") void reduceSSERegisterWise (SSERegister<value_type>& result, size_t i) const
173 requires Config::archX64 && has::getSSE<SrcType> && std::same_as<uint32_t, value_type>
174 {
175 result = Expression::SSE::max (result, src.getSSE (i));
176 }
177
178 //==============================================================================
179 template <size_t n>
180 VCTR_FORCEDINLINE static constexpr value_type finalizeReduction (const std::array<value_type, n>& maxima)
181 {
182 if constexpr (n == 1)
183 return maxima[0];
184
185 return *std::max_element (maxima.begin(), maxima.end());
186 }
187};
188
189} // namespace vctr::expressions
190
191namespace vctr
192{
193
199
205
206} // namespace vctr
Definition: Max.h:96
Definition: Max.h:29
Constrains a type to have a member function getAVX (size_t) const.
Definition: ContainerAndExpressionConcepts.h:92
Constrains a type to have a member function getNeon (size_t) const.
Definition: ContainerAndExpressionConcepts.h:84
Constrains a type to have a member function getSSE (size_t) const.
Definition: ContainerAndExpressionConcepts.h:100
Constrains a type to represent a real valued 32 bit integer number.
Definition: NumericTypeConcepts.h:57
Constrains a type to represent a real valued or std::complex number type.
Definition: NumericTypeConcepts.h:49
Constrains a type to represent a real valued floating point number.
Definition: NumericTypeConcepts.h:83
Constrains a type to represent a real valued signed number (e.g.
Definition: NumericTypeConcepts.h:65
A combined concept to check if Apple Accelerate is a suitable option for a floating point vector redu...
Definition: ContainerAndExpressionConcepts.h:280
A combined concept to check if Intel IPP is a suitable option for a floating point vector reduction o...
Definition: ContainerAndExpressionConcepts.h:304
constexpr ExpressionChainBuilder< expressions::Max > max
Computes the maximum value of the source values.
Definition: Max.h:198
constexpr ExpressionChainBuilder< expressions::MaxAbs > maxAbs
Computes the maximum value of the absolute value of the source values.
Definition: Max.h:204
The main namespace of the VCTR project.
Definition: Array.h:24
typename detail::ValueType< std::remove_cvref_t< T > >::Type ValueType
If T is an expression template, it equals its return type, if it's a type that defines value_type as ...
Definition: Traits.h:201
typename detail::RealType< std::remove_cvref_t< T > >::Type RealType
If T is any instance of std::complex, this will be the real value_type, otherwise this will be T.
Definition: Traits.h:211
int sizeToInt(size_t size)
Casts the size_t argument to an int.
Definition: PlatformVectorOpsHelpers.h:27
Definition: AVXRegister.h:28
An expression chain builder is an object which supplies various operator<< overloads which build chai...
Definition: ExpressionChainBuilder.h:157
The base class to every expression template.
Definition: ExpressionTemplate.h:37
Definition: NeonRegister.h:28
Definition: SSERegister.h:28