23namespace vctr::expressions
26template <
size_t extent,
class SrcType>
27requires std::totally_ordered<ValueType<SrcType>>
33 VCTR_COMMON_UNARY_EXPRESSION_MEMBERS (
Max, src)
35 static constexpr value_type reductionResultInitValue = std::numeric_limits<value_type>::lowest();
37 VCTR_FORCEDINLINE
constexpr void reduceElementWise (value_type& result,
size_t i)
const
39 result = std::max (result, src[i]);
43 VCTR_FORCEDINLINE value_type reduceVectorOp()
const
46 return Expression::Accelerate::max (src.data(), size());
49 VCTR_FORCEDINLINE value_type reduceVectorOp()
const
52 return Expression::IPP::max (src.data(),
sizeToInt (size()));
56 VCTR_FORWARD_PREPARE_SIMD_EVALUATION_UNARY_EXPRESSION_MEMBER_FUNCTIONS
61 result = Expression::Neon::max (result, src.getNeon (i));
64 VCTR_FORCEDINLINE VCTR_TARGET (
"fma")
void reduceAVXRegisterWise (
AVXRegister<value_type>& result,
size_t i)
const
67 result = Expression::AVX::max (result, src.getAVX (i));
70 VCTR_FORCEDINLINE VCTR_TARGET (
"avx2")
void reduceAVXRegisterWise (
AVXRegister<value_type>& result,
size_t i)
const
73 result = Expression::AVX::max (result, src.getAVX (i));
76 VCTR_FORCEDINLINE VCTR_TARGET (
"sse4.1")
void reduceSSERegisterWise (
SSERegister<value_type>& result,
size_t i)
const
79 result = Expression::SSE::max (result, src.getSSE (i));
84 VCTR_FORCEDINLINE
static constexpr value_type finalizeReduction (
const std::array<value_type, n>& maxima)
89 return *std::max_element (maxima.begin(), maxima.end());
93template <
size_t extent,
class SrcType>
100 VCTR_COMMON_UNARY_EXPRESSION_MEMBERS (
MaxAbs, src)
102 static constexpr value_type reductionResultInitValue = 0;
104 VCTR_FORCEDINLINE
constexpr void reduceElementWise (value_type& result,
size_t i)
const
107 result = std::max (result, std::abs (src[i]));
110 VCTR_FORCEDINLINE
constexpr void reduceElementWise (value_type& result,
size_t i)
const
113 result = std::max (result, src[i]);
117 VCTR_FORCEDINLINE value_type reduceVectorOp()
const
120 return Expression::IPP::maxAbs (src.data(),
sizeToInt (size()));
124 VCTR_FORWARD_PREPARE_SIMD_EVALUATION_UNARY_EXPRESSION_MEMBER_FUNCTIONS
129 result = Expression::Neon::max (result, Expression::Neon::abs (src.getNeon (i)));
135 result = Expression::Neon::max (result, src.getNeon (i));
138 VCTR_FORCEDINLINE VCTR_TARGET (
"fma")
void reduceAVXRegisterWise (
AVXRegister<value_type>& result,
size_t i)
const
141 static const auto avxSignBit = Expression::AVX::broadcast (
typename Expression::CommonElement::Type (-0.0));
143 result = Expression::AVX::max (result, Expression::AVX::bitwiseAndNot (src.getAVX (i), avxSignBit));
146 VCTR_FORCEDINLINE VCTR_TARGET (
"avx2")
void reduceAVXRegisterWise (
AVXRegister<value_type>& result,
size_t i)
const
149 result = Expression::AVX::max (result, Expression::AVX::abs (src.getAVX (i)));
152 VCTR_FORCEDINLINE VCTR_TARGET (
"avx2")
void reduceAVXRegisterWise (
AVXRegister<value_type>& result,
size_t i)
const
155 result = Expression::AVX::max (result, src.getAVX (i));
158 VCTR_FORCEDINLINE VCTR_TARGET (
"sse4.1")
void reduceSSERegisterWise (
SSERegister<value_type>& result,
size_t i)
const
161 static const auto sseSignBit = Expression::SSE::broadcast (
typename Expression::CommonElement::Type (-0.0));
163 result = Expression::SSE::max (result, Expression::SSE::bitwiseAndNot (src.getSSE (i), sseSignBit));
166 VCTR_FORCEDINLINE VCTR_TARGET (
"sse4.1")
void reduceSSERegisterWise (
SSERegister<value_type>& result,
size_t i)
const
169 result = Expression::SSE::max (result, Expression::SSE::abs (src.getSSE (i)));
172 VCTR_FORCEDINLINE VCTR_TARGET (
"sse4.1")
void reduceSSERegisterWise (
SSERegister<value_type>& result,
size_t i)
const
175 result = Expression::SSE::max (result, src.getSSE (i));
180 VCTR_FORCEDINLINE
static constexpr value_type finalizeReduction (
const std::array<value_type, n>& maxima)
182 if constexpr (n == 1)
185 return *std::max_element (maxima.begin(), maxima.end());
Constrains a type to have a member function getAVX (size_t) const.
Definition: ContainerAndExpressionConcepts.h:92
Constrains a type to have a member function getNeon (size_t) const.
Definition: ContainerAndExpressionConcepts.h:84
Constrains a type to have a member function getSSE (size_t) const.
Definition: ContainerAndExpressionConcepts.h:100
Constrains a type to represent a real valued 32 bit integer number.
Definition: NumericTypeConcepts.h:57
Constrains a type to represent a real valued or std::complex number type.
Definition: NumericTypeConcepts.h:49
Constrains a type to represent a real valued floating point number.
Definition: NumericTypeConcepts.h:83
Constrains a type to represent a real valued signed number (e.g.
Definition: NumericTypeConcepts.h:65
A combined concept to check if Apple Accelerate is a suitable option for a floating point vector redu...
Definition: ContainerAndExpressionConcepts.h:280
A combined concept to check if Intel IPP is a suitable option for a floating point vector reduction o...
Definition: ContainerAndExpressionConcepts.h:304
constexpr ExpressionChainBuilder< expressions::Max > max
Computes the maximum value of the source values.
Definition: Max.h:198
constexpr ExpressionChainBuilder< expressions::MaxAbs > maxAbs
Computes the maximum value of the absolute value of the source values.
Definition: Max.h:204
The main namespace of the VCTR project.
Definition: Array.h:24
typename detail::ValueType< std::remove_cvref_t< T > >::Type ValueType
If T is an expression template, it equals its return type, if it's a type that defines value_type as ...
Definition: Traits.h:201
typename detail::RealType< std::remove_cvref_t< T > >::Type RealType
If T is any instance of std::complex, this will be the real value_type, otherwise this will be T.
Definition: Traits.h:211
int sizeToInt(size_t size)
Casts the size_t argument to an int.
Definition: PlatformVectorOpsHelpers.h:27
Definition: AVXRegister.h:28
An expression chain builder is an object which supplies various operator<< overloads which build chai...
Definition: ExpressionChainBuilder.h:157
The base class to every expression template.
Definition: ExpressionTemplate.h:37
Definition: NeonRegister.h:28
Definition: SSERegister.h:28