VCTR
Loading...
Searching...
No Matches
FastLog.h
1/*
2 ==============================================================================
3 DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
5 Copyright 2025- by sonible GmbH.
6
7 This file is part of VCTR - Versatile Container Templates Reconceptualized.
8
9 VCTR is free software: you can redistribute it and/or modify
10 it under the terms of the GNU Lesser General Public License version 3
11 only, as published by the Free Software Foundation.
12
13 VCTR is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU Lesser General Public License version 3 for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 version 3 along with VCTR. If not, see <https://www.gnu.org/licenses/>.
20 ==============================================================================
21*/
22
23namespace vctr::expressions
24{
25
26namespace detail
27{
28
29template <std::floating_point T>
30struct FastLog2Constants {};
31
32template <>
33struct FastLog2Constants<float>
34{
35 static constexpr float a { 1.1920928955078125e-7f };
36 static constexpr float b { 124.22551499f };
37 static constexpr float c { 1.498030302f };
38 static constexpr float d { 1.72587999f };
39 static constexpr float e { 0.3520887068f };
40 static constexpr int32_t c_0x007fffff { 0x007fffff }; // Masks the mantissa bits
41 static constexpr int32_t c_0x3f000000 { 0x3f000000 };
42 // clang-format on
43};
44}
45
46//==============================================================================
54template <size_t extent, class SrcType>
55requires std::same_as<float, ValueType<SrcType>>
57{
58public:
59 using value_type = ValueType<SrcType>;
60
61 using Constants = detail::FastLog2Constants<value_type>;
62
63 VCTR_COMMON_UNARY_EXPRESSION_MEMBERS (FastLog2, src)
64
65 VCTR_FORCEDINLINE value_type operator[] (size_t i) const
66 {
67 auto x = src[i];
68
69 auto xBitsInterpretedAsInt = bitCast<int32_t> (x);
70 auto mantissa = (xBitsInterpretedAsInt & Constants::c_0x007fffff);
71 auto mx = bitCast<float> (mantissa | Constants::c_0x3f000000);
72
73 auto y = static_cast<float> (xBitsInterpretedAsInt) * Constants::a;
74
75 auto dv = Constants::d / (Constants::e + mx);
76 auto ml = Constants::c * mx;
77
78 return y - Constants::b - ml - dv;
79 }
80
81 //==============================================================================
82 // AVX Implementation
83 VCTR_FORCEDINLINE VCTR_TARGET ("avx") void prepareAVXEvaluation() const
84 requires (has::prepareAVXEvaluation<SrcType> && Expression::CommonElement::isRealFloat)
85 {
86 src.prepareAVXEvaluation();
87
88 c_a.avx = Expression::AVX::broadcast (Constants::a);
89 c_b.avx = Expression::AVX::broadcast (Constants::b);
90 c_c.avx = Expression::AVX::broadcast (Constants::c);
91 c_d.avx = Expression::AVX::broadcast (Constants::d);
92 c_e.avx = Expression::AVX::broadcast (Constants::e);
93
94 c_0x007fffff.avx = IntTypes::AVXSrc::broadcast (Constants::c_0x007fffff);
95 c_0x3f000000.avx = IntTypes::AVXSrc::broadcast (Constants::c_0x3f000000);
96 }
97
98 VCTR_FORCEDINLINE VCTR_TARGET ("fma") AVXRegister<value_type> getAVX (size_t i) const
99 requires (archX64 && has::getAVX<SrcType> && Expression::allElementTypesSame && Expression::CommonElement::isRealFloat)
100 {
101 auto x = src.getAVX (i);
102
103 auto xBitsInterpretedAsInt = Expression::AVX::reinterpretAsInt (x);
104 auto mantissa = IntTypes::AVXSrc::bitwiseAndLegacy (xBitsInterpretedAsInt, c_0x007fffff.avx);
105 auto mx = IntTypes::AVXSrc::reinterpretAsFp (IntTypes::AVXSrc::bitwiseOrLegacy (mantissa, c_0x3f000000.avx));
106
107 auto y = Expression::AVX::mul (IntTypes::AVXSrc::convertToFp (xBitsInterpretedAsInt), c_a.avx);
108
109 auto dv = Expression::AVX::div (c_d.avx, Expression::AVX::add (c_e.avx, mx));
110 auto ml = Expression::AVX::mul (c_c.avx, mx);
111
112 return Expression::AVX::sub (Expression::AVX::sub (Expression::AVX::sub (y, c_b.avx), ml), dv);
113 }
114
115 //==============================================================================
116 // SSE Implementation
117 VCTR_FORCEDINLINE VCTR_TARGET ("sse4.1") void prepareSSEEvaluation() const
118 requires (has::prepareSSEEvaluation<SrcType> && Expression::CommonElement::isRealFloat)
119 {
120 src.prepareSSEEvaluation();
121
122 c_a.sse = Expression::SSE::broadcast (Constants::a);
123 c_b.sse = Expression::SSE::broadcast (Constants::b);
124 c_c.sse = Expression::SSE::broadcast (Constants::c);
125 c_d.sse = Expression::SSE::broadcast (Constants::d);
126 c_e.sse = Expression::SSE::broadcast (Constants::e);
127
128 c_0x007fffff.sse = IntTypes::SSESrc::broadcast (Constants::c_0x007fffff);
129 c_0x3f000000.sse = IntTypes::SSESrc::broadcast (Constants::c_0x3f000000);
130 }
131
132 VCTR_FORCEDINLINE VCTR_TARGET ("sse4.1") SSERegister<value_type> getSSE (size_t i) const
133 requires (archX64 && has::getSSE<SrcType> && Expression::allElementTypesSame && Expression::CommonElement::isRealFloat)
134 {
135 auto x = src.getSSE (i);
136
137 auto xBitsInterpretedAsInt = Expression::SSE::reinterpretAsInt (x);
138 auto mantissa = IntTypes::SSESrc::bitwiseAnd (xBitsInterpretedAsInt, c_0x007fffff.sse);
139 auto mx = IntTypes::SSESrc::reinterpretAsFp (IntTypes::SSESrc::bitwiseOr (mantissa, c_0x3f000000.sse));
140
141 auto y = Expression::SSE::mul (IntTypes::SSESrc::convertToFp (xBitsInterpretedAsInt), c_a.sse);
142
143 auto dv = Expression::SSE::div (c_d.sse, Expression::SSE::add (c_e.sse, mx));
144 auto ml = Expression::SSE::mul (c_c.sse, mx);
145
146 return Expression::SSE::sub (Expression::SSE::sub (Expression::SSE::sub (y, c_b.sse), ml), dv);
147 }
148
149 //==============================================================================
150 // Neon Implementation
151 void prepareNeonEvaluation() const
152 requires (archARM && has::prepareNeonEvaluation<SrcType> && Expression::CommonElement::isRealFloat)
153 {
154 src.prepareNeonEvaluation();
155
156 c_a.neon = Expression::Neon::broadcast (Constants::a);
157 c_b.neon = Expression::Neon::broadcast (Constants::b);
158 c_c.neon = Expression::Neon::broadcast (Constants::c);
159 c_d.neon = Expression::Neon::broadcast (Constants::d);
160 c_e.neon = Expression::Neon::broadcast (Constants::e);
161
162 c_0x007fffff.neon = IntTypes::NeonSrc::broadcast (Constants::c_0x007fffff);
163 c_0x3f000000.neon = IntTypes::NeonSrc::broadcast (Constants::c_0x3f000000);
164 }
165
166 NeonRegister<value_type> getNeon (size_t i) const
167 requires (archARM && has::getNeon<SrcType> && Expression::allElementTypesSame && Expression::CommonElement::isRealFloat)
168 {
169 auto x = src.getNeon (i);
170
171 auto xBitsInterpretedAsInt = Expression::Neon::reinterpretAsInt (x);
172 auto mantissa = IntTypes::NeonSrc::bitwiseAnd (xBitsInterpretedAsInt, c_0x007fffff.neon);
173 auto mx = IntTypes::NeonSrc::reinterpretAsFp (IntTypes::NeonSrc::bitwiseOr (mantissa, c_0x3f000000.neon));
174
175 auto y = Expression::Neon::mul (IntTypes::NeonSrc::convertToFp (xBitsInterpretedAsInt), c_a.neon);
176
177 auto dv = Expression::Neon::div (c_d.neon, Expression::Neon::add (c_e.neon, mx));
178 auto ml = Expression::Neon::mul (c_c.neon, mx);
179
180 return Expression::Neon::sub (Expression::Neon::sub (Expression::Neon::sub (y, c_b.neon), ml), dv);
181 }
182
183private:
184 struct IntTypes
185 {
186 using NeonSrc = NeonRegister<int32_t>;
187 using AVXSrc = AVXRegister<int32_t>;
188 using SSESrc = SSERegister<int32_t>;
189 };
190
196 mutable SIMDRegisterUnion<IntTypes> c_0x007fffff;
197 mutable SIMDRegisterUnion<IntTypes> c_0x3f000000;
198};
199
200} // namespace vctr::expressions
201
202namespace vctr
203{
204
214
215} // namespace vctr
Calculates a fast approximation for the log2 function.
Definition: FastLog.h:57
Constrains a type to have a member function getAVX (size_t) const.
Definition: ContainerAndExpressionConcepts.h:92
Constrains a type to have a member function getNeon (size_t) const.
Definition: ContainerAndExpressionConcepts.h:84
Constrains a type to have a member function getSSE (size_t) const.
Definition: ContainerAndExpressionConcepts.h:100
Constrains a type to have a member function prepareAVXEvaluation() const.
Definition: ContainerAndExpressionConcepts.h:88
Constrains a type to have a member function prepareNeonEvaluation() const.
Definition: ContainerAndExpressionConcepts.h:80
Constrains a type to have a member function prepareSSEEvaluation() const.
Definition: ContainerAndExpressionConcepts.h:96
constexpr ExpressionChainBuilder< expressions::FastLog2 > fastLog2
A fast approximation of the log2 function (e.g.
Definition: FastLog.h:213
The main namespace of the VCTR project.
Definition: Array.h:24
typename detail::ValueType< std::remove_cvref_t< T > >::Type ValueType
If T is an expression template, it equals its return type, if it's a type that defines value_type as ...
Definition: Traits.h:201
Definition: AVXRegister.h:28
An expression chain builder is an object which supplies various operator<< overloads which build chai...
Definition: ExpressionChainBuilder.h:157
The base class to every expression template.
Definition: ExpressionTemplate.h:37
Definition: NeonRegister.h:28
Definition: SSERegister.h:28
Helper template to define a union of all supported SIMD types.
Definition: ExpressionTemplate.h:123