VCTR
Loading...
Searching...
No Matches
MultiplySubtract.h
1/*
2 ==============================================================================
3 DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
5 Copyright 2022- by sonible GmbH.
6
7 This file is part of VCTR - Versatile Container Templates Reconceptualized.
8
9 VCTR is free software: you can redistribute it and/or modify
10 it under the terms of the GNU Lesser General Public License version 3
11 only, as published by the Free Software Foundation.
12
13 VCTR is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU Lesser General Public License version 3 for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 version 3 along with VCTR. If not, see <https://www.gnu.org/licenses/>.
20 ==============================================================================
21*/
22
23namespace vctr::expressions
24{
25
26//==============================================================================
28template <size_t extent, class SrcAType, class SrcBType, class SrcCType>
29requires are::same<ValueType<SrcAType>, ValueType<SrcBType>, ValueType<SrcCType>>
31{
32public:
33 using value_type = ValueType<SrcAType>;
34
35private:
37
38 using SrcAStorageInfoType = std::invoke_result_t<decltype (&std::remove_cvref_t<SrcAType>::getStorageInfo), SrcAType>;
39 using SrcBStorageInfoType = std::invoke_result_t<decltype (&std::remove_cvref_t<SrcBType>::getStorageInfo), SrcBType>;
40 using SrcCStorageInfoType = std::invoke_result_t<decltype (&std::remove_cvref_t<SrcCType>::getStorageInfo), SrcCType>;
41
42 SrcAType srcA;
43 SrcBType srcB;
44 SrcCType srcC;
45
46 const CombinedStorageInfo<std::remove_cvref_t<SrcAStorageInfoType>, std::remove_cvref_t<SrcBStorageInfoType>, std::remove_cvref_t<SrcCStorageInfoType>> storageInfo;
47
48public:
49 template <class SrcA, class SrcB, class SrcC>
50 constexpr MultiplySubtractVectors (SrcA&& a, SrcB&& b, SrcC&& c)
51 : srcA (std::forward<SrcA> (a)),
52 srcB (std::forward<SrcB> (b)),
53 srcC (std::forward<SrcC> (c)),
54 storageInfo (srcA.getStorageInfo(), srcB.getStorageInfo(), srcC.getStorageInfo())
55 {}
56
57 constexpr const auto& getStorageInfo() const { return storageInfo; }
58
59 constexpr size_t size() const { return srcA.size(); }
60
61 constexpr bool isNotAliased (const void*) const
62 {
63 return false;
64 }
65
66 VCTR_FORCEDINLINE constexpr auto operator[] (size_t i) const
67 {
68 return srcC[i] - srcA[i] * srcB[i];
69 }
70
71 //==============================================================================
72 void prepareNeonEvaluation() const
73 requires ::vctr::has::prepareNeonEvaluation<SrcAType> && ::vctr::has::prepareNeonEvaluation<SrcBType> && ::vctr::has::prepareNeonEvaluation<SrcCType>
74 {
75 srcA.prepareNeonEvaluation();
76 srcB.prepareNeonEvaluation();
77 srcC.prepareNeonEvaluation();
78 }
79
80 VCTR_FORCEDINLINE VCTR_TARGET ("avx") void prepareAVXEvaluation() const
81 requires ::vctr::has::prepareAVXEvaluation<SrcAType> && ::vctr::has::prepareAVXEvaluation<SrcBType> && ::vctr::has::prepareAVXEvaluation<SrcCType>
82 {
83 srcA.prepareAVXEvaluation();
84 srcB.prepareAVXEvaluation();
85 srcC.prepareAVXEvaluation();
86 }
87
88 VCTR_FORCEDINLINE VCTR_TARGET ("sse4.1") void prepareSSEEvaluation() const
89 requires ::vctr::has::prepareSSEEvaluation<SrcAType> && ::vctr::has::prepareSSEEvaluation<SrcBType> && ::vctr::has::prepareSSEEvaluation<SrcCType>
90 {
91 srcA.prepareSSEEvaluation();
92 srcB.prepareSSEEvaluation();
93 srcC.prepareSSEEvaluation();
94 }
95
96 // AVX Implementation
97 VCTR_FORCEDINLINE VCTR_TARGET ("fma") AVXRegister<value_type> getAVX (size_t i) const
98 requires (archX64 && has::getAVX<SrcAType> && has::getAVX<SrcBType> && Expression::allElementTypesSame && Expression::CommonElement::isRealFloat)
99 {
100 return Expression::AVX::fms (srcA.getAVX (i), srcB.getAVX (i), srcC.getAVX (i));
101 }
102
103 //==============================================================================
104 // NEON Implementation
105 NeonRegister<value_type> getNeon (size_t i) const
107 {
108 return Expression::Neon::fms (srcA.getNeon (i), srcB.getNeon (i), srcC.getNeon (i));
109 }
110};
111
112} // namespace vctr::expressions
113
114namespace vctr
115{
116
124template <class SrcAType, class SrcBType, class SrcCType>
128constexpr auto multiplySubtract (SrcAType&& a, SrcBType&& b, SrcCType&& c)
129{
130 assertCommonSize (a, b, c);
131 constexpr auto extent = getCommonExtent<SrcAType, SrcBType, SrcCType>();
132
133 return expressions::MultiplySubtractVectors<extent, SrcAType, SrcBType, SrcCType> (std::forward<SrcAType> (a), std::forward<SrcBType> (b), std::forward<SrcCType> (c));
134}
135} // namespace vctr
Multiply-Accumulates three vector like types.
Definition: MultiplySubtract.h:31
Constrains a type to have a member function getAVX (size_t) const.
Definition: ContainerAndExpressionConcepts.h:92
Constrains a type to have a member function getNeon (size_t) const.
Definition: ContainerAndExpressionConcepts.h:84
Constrains a type to either be an expression template or any derived instance of VctrBase.
Definition: ContainerAndExpressionConcepts.h:230
constexpr auto multiplySubtract(SrcAType &&a, SrcBType &&b, SrcCType &&c)
Computes the multiply-subtract operation c - (a * b).
Definition: MultiplySubtract.h:128
The main namespace of the VCTR project.
Definition: Array.h:24
constexpr void assertCommonSize(const A &a, const B &b)
Ensures that both sources have the same size.
Definition: Traits.h:256
typename detail::ValueType< std::remove_cvref_t< T > >::Type ValueType
If T is an expression template, it equals its return type, if it's a type that defines value_type as ...
Definition: Traits.h:201
Definition: AVXRegister.h:28
Definition: SIMDHelpers.h:222
Supplies some handy typedefs and traits needed when implementing expression templates.
Definition: ExpressionTemplate.h:41
static constexpr auto allElementTypesSame
Indicates if all source element types and the return types are same.
Definition: ExpressionTemplate.h:80
The base class to every expression template.
Definition: ExpressionTemplate.h:37
Definition: NeonRegister.h:28