23namespace vctr::expressions
29template <std::
floating_po
int T>
30struct FastLog2Constants {};
33struct FastLog2Constants<float>
35 static constexpr float a { 1.1920928955078125e-7f };
36 static constexpr float b { 124.22551499f };
37 static constexpr float c { 1.498030302f };
38 static constexpr float d { 1.72587999f };
39 static constexpr float e { 0.3520887068f };
40 static constexpr int32_t c_0x007fffff { 0x007fffff };
41 static constexpr int32_t c_0x3f000000 { 0x3f000000 };
54template <
size_t extent,
class SrcType>
55requires std::same_as<float, ValueType<SrcType>>
61 using Constants = detail::FastLog2Constants<value_type>;
63 VCTR_COMMON_UNARY_EXPRESSION_MEMBERS (
FastLog2, src)
65 VCTR_FORCEDINLINE value_type operator[] (
size_t i)
const
69 auto xBitsInterpretedAsInt = bitCast<int32_t> (x);
70 auto mantissa = (xBitsInterpretedAsInt & Constants::c_0x007fffff);
71 auto mx = bitCast<float> (mantissa | Constants::c_0x3f000000);
73 auto y =
static_cast<float> (xBitsInterpretedAsInt) * Constants::a;
75 auto dv = Constants::d / (Constants::e + mx);
76 auto ml = Constants::c * mx;
78 return y - Constants::b - ml - dv;
83 VCTR_FORCEDINLINE VCTR_TARGET (
"avx")
void prepareAVXEvaluation()
const
86 src.prepareAVXEvaluation();
88 c_a.avx = Expression::AVX::broadcast (Constants::a);
89 c_b.avx = Expression::AVX::broadcast (Constants::b);
90 c_c.avx = Expression::AVX::broadcast (Constants::c);
91 c_d.avx = Expression::AVX::broadcast (Constants::d);
92 c_e.avx = Expression::AVX::broadcast (Constants::e);
94 c_0x007fffff.avx = IntTypes::AVXSrc::broadcast (Constants::c_0x007fffff);
95 c_0x3f000000.avx = IntTypes::AVXSrc::broadcast (Constants::c_0x3f000000);
99 requires (archX64 &&
has::getAVX<SrcType> && Expression::allElementTypesSame && Expression::CommonElement::isRealFloat)
101 auto x = src.getAVX (i);
103 auto xBitsInterpretedAsInt = Expression::AVX::reinterpretAsInt (x);
104 auto mantissa = IntTypes::AVXSrc::bitwiseAndLegacy (xBitsInterpretedAsInt, c_0x007fffff.avx);
105 auto mx = IntTypes::AVXSrc::reinterpretAsFp (IntTypes::AVXSrc::bitwiseOrLegacy (mantissa, c_0x3f000000.avx));
107 auto y = Expression::AVX::mul (IntTypes::AVXSrc::convertToFp (xBitsInterpretedAsInt), c_a.avx);
109 auto dv = Expression::AVX::div (c_d.avx, Expression::AVX::add (c_e.avx, mx));
110 auto ml = Expression::AVX::mul (c_c.avx, mx);
112 return Expression::AVX::sub (Expression::AVX::sub (Expression::AVX::sub (y, c_b.avx), ml), dv);
117 VCTR_FORCEDINLINE VCTR_TARGET (
"sse4.1")
void prepareSSEEvaluation()
const
120 src.prepareSSEEvaluation();
122 c_a.sse = Expression::SSE::broadcast (Constants::a);
123 c_b.sse = Expression::SSE::broadcast (Constants::b);
124 c_c.sse = Expression::SSE::broadcast (Constants::c);
125 c_d.sse = Expression::SSE::broadcast (Constants::d);
126 c_e.sse = Expression::SSE::broadcast (Constants::e);
128 c_0x007fffff.sse = IntTypes::SSESrc::broadcast (Constants::c_0x007fffff);
129 c_0x3f000000.sse = IntTypes::SSESrc::broadcast (Constants::c_0x3f000000);
133 requires (archX64 &&
has::getSSE<SrcType> && Expression::allElementTypesSame && Expression::CommonElement::isRealFloat)
135 auto x = src.getSSE (i);
137 auto xBitsInterpretedAsInt = Expression::SSE::reinterpretAsInt (x);
138 auto mantissa = IntTypes::SSESrc::bitwiseAnd (xBitsInterpretedAsInt, c_0x007fffff.sse);
139 auto mx = IntTypes::SSESrc::reinterpretAsFp (IntTypes::SSESrc::bitwiseOr (mantissa, c_0x3f000000.sse));
141 auto y = Expression::SSE::mul (IntTypes::SSESrc::convertToFp (xBitsInterpretedAsInt), c_a.sse);
143 auto dv = Expression::SSE::div (c_d.sse, Expression::SSE::add (c_e.sse, mx));
144 auto ml = Expression::SSE::mul (c_c.sse, mx);
146 return Expression::SSE::sub (Expression::SSE::sub (Expression::SSE::sub (y, c_b.sse), ml), dv);
151 void prepareNeonEvaluation()
const
154 src.prepareNeonEvaluation();
156 c_a.neon = Expression::Neon::broadcast (Constants::a);
157 c_b.neon = Expression::Neon::broadcast (Constants::b);
158 c_c.neon = Expression::Neon::broadcast (Constants::c);
159 c_d.neon = Expression::Neon::broadcast (Constants::d);
160 c_e.neon = Expression::Neon::broadcast (Constants::e);
162 c_0x007fffff.neon = IntTypes::NeonSrc::broadcast (Constants::c_0x007fffff);
163 c_0x3f000000.neon = IntTypes::NeonSrc::broadcast (Constants::c_0x3f000000);
167 requires (archARM &&
has::getNeon<SrcType> && Expression::allElementTypesSame && Expression::CommonElement::isRealFloat)
169 auto x = src.getNeon (i);
171 auto xBitsInterpretedAsInt = Expression::Neon::reinterpretAsInt (x);
172 auto mantissa = IntTypes::NeonSrc::bitwiseAnd (xBitsInterpretedAsInt, c_0x007fffff.neon);
173 auto mx = IntTypes::NeonSrc::reinterpretAsFp (IntTypes::NeonSrc::bitwiseOr (mantissa, c_0x3f000000.neon));
175 auto y = Expression::Neon::mul (IntTypes::NeonSrc::convertToFp (xBitsInterpretedAsInt), c_a.neon);
177 auto dv = Expression::Neon::div (c_d.neon, Expression::Neon::add (c_e.neon, mx));
178 auto ml = Expression::Neon::mul (c_c.neon, mx);
180 return Expression::Neon::sub (Expression::Neon::sub (Expression::Neon::sub (y, c_b.neon), ml), dv);
Calculates a fast approximation for the log2 function.
Definition: FastLog.h:57
Constrains a type to have a member function getAVX (size_t) const.
Definition: ContainerAndExpressionConcepts.h:92
Constrains a type to have a member function getNeon (size_t) const.
Definition: ContainerAndExpressionConcepts.h:84
Constrains a type to have a member function getSSE (size_t) const.
Definition: ContainerAndExpressionConcepts.h:100
Constrains a type to have a member function prepareAVXEvaluation() const.
Definition: ContainerAndExpressionConcepts.h:88
Constrains a type to have a member function prepareNeonEvaluation() const.
Definition: ContainerAndExpressionConcepts.h:80
Constrains a type to have a member function prepareSSEEvaluation() const.
Definition: ContainerAndExpressionConcepts.h:96
constexpr ExpressionChainBuilder< expressions::FastLog2 > fastLog2
A fast approximation of the log2 function (e.g.
Definition: FastLog.h:213
The main namespace of the VCTR project.
Definition: Array.h:24
typename detail::ValueType< std::remove_cvref_t< T > >::Type ValueType
If T is an expression template, it equals its return type, if it's a type that defines value_type as ...
Definition: Traits.h:201
Definition: AVXRegister.h:28
An expression chain builder is an object which supplies various operator<< overloads which build chai...
Definition: ExpressionChainBuilder.h:157
The base class to every expression template.
Definition: ExpressionTemplate.h:37
Definition: NeonRegister.h:28
Definition: SSERegister.h:28
Helper template to define a union of all supported SIMD types.
Definition: ExpressionTemplate.h:123