CbmRoot
L1/vectors/P4_F32vec4.h
Go to the documentation of this file.
1 #ifndef L1Algo_F32vec4P4_H
2 #define L1Algo_F32vec4P4_H
3 
4 #include "vec_arithmetic.h"
5 #include "xmmintrin.h"
6 #include <cmath>
7 #include <iostream>
8 
9 /**********************************
10  *
11  * Vector of four single floats
12  *
13  **********************************/
14 
15 //#pragma pack(push,16)/* Must ensure class & union 16-B aligned */
16 
17 //typedef __m128 VectorFloat __attribute__ ((aligned(16)));
18 
19 const union {
20  float f;
21  int i;
22 } __f_one = {1.f};
23 
24 const union {
25  int i[4];
26  __m128 m;
27 } __f32vec4_abs_mask_cheat = {{0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff}},
28  __f32vec4_sgn_mask_cheat = {{static_cast<int>(0x80000000),
29  static_cast<int>(0x80000000),
30  static_cast<int>(0x80000000),
31  static_cast<int>(0x80000000)}},
32  __f32vec4_zero_cheat = {{0, 0, 0, 0}},
34  __f32vec4_true_cheat = {{static_cast<int>(0xFFFFFFFF),
35  static_cast<int>(0xFFFFFFFF),
36  static_cast<int>(0xFFFFFFFF),
37  static_cast<int>(0xFFFFFFFF)}},
38  __f32vec4_false_cheat = {{0x00000000, 0x00000000, 0x00000000, 0x00000000}};
39 
40 #define _f32vec4_abs_mask (static_cast<F32vec4>(__f32vec4_abs_mask_cheat.m))
41 #define _f32vec4_sgn_mask (static_cast<F32vec4>(__f32vec4_sgn_mask_cheat.m))
42 #define _f32vec4_zero (static_cast<F32vec4>(__f32vec4_zero_cheat.m))
43 #define _f32vec4_one (static_cast<F32vec4>(__f32vec4_one_cheat.m))
44 #define _f32vec4_true (static_cast<F32vec4>(__f32vec4_true_cheat.m))
45 #define _f32vec4_false (static_cast<F32vec4>(__f32vec4_false_cheat.m))
46 
47 class F32vec4 {
48 public:
49  __m128 v;
50 
51  float& operator[](int i) { return (reinterpret_cast<float*>(&v))[i]; }
52  float operator[](int i) const {
53  return (reinterpret_cast<const float*>(&v))[i];
54  }
55 
56  F32vec4() : v(_mm_set_ps1(0)) {}
57  F32vec4(const __m128& a) : v(a) {}
58  F32vec4(const float& a) : v(_mm_set_ps1(a)) {}
59 
60  F32vec4(const float& f0, const float& f1, const float& f2, const float& f3)
61  : v(_mm_set_ps(f3, f2, f1, f0)) {}
62 
63  /* Conversion function */
64  operator __m128() const { return v; } /* Convert to __m128 */
65 
66  /* Arithmetic Operators */
67  friend F32vec4 operator+(const F32vec4& a, const F32vec4& b) {
68  return _mm_add_ps(a, b);
69  }
70  friend F32vec4 operator-(const F32vec4& a, const F32vec4& b) {
71  return _mm_sub_ps(a, b);
72  }
73  friend F32vec4 operator*(const F32vec4& a, const F32vec4& b) {
74  return _mm_mul_ps(a, b);
75  }
76  friend F32vec4 operator/(const F32vec4& a, const F32vec4& b) {
77  return _mm_div_ps(a, b);
78  }
79 
80  /* Functions */
81  friend F32vec4 min(const F32vec4& a, const F32vec4& b) {
82  return _mm_min_ps(a, b);
83  }
84  friend F32vec4 max(const F32vec4& a, const F32vec4& b) {
85  return _mm_max_ps(a, b);
86  }
87 
88  /* Square Root */
89  friend F32vec4 sqrt(const F32vec4& a) { return _mm_sqrt_ps(a); }
90 
91  /* Reciprocal( inverse) Square Root */
92  friend F32vec4 rsqrt(const F32vec4& a) { return _mm_rsqrt_ps(a); }
93 
94  /* Reciprocal (inversion) */
95  // friend F32vec4 rcp ( const F32vec4 &a ){ return _mm_rcp_ps (a); }
96  /* Reciprocal (inversion) */
97  //friend F32vec4 rcp ( const F32vec4 &a ){ return 1. / a; }
98  /* NewtonRaphson Reciprocal
99  [2 * rcpps(x) - (x * rcpps(x) * rcpps(x))] */
100  friend F32vec4 rcp(const F32vec4& a) {
101  F32vec4 Ra0 = _mm_rcp_ps(a);
102  return _mm_sub_ps(_mm_add_ps(Ra0, Ra0),
103  _mm_mul_ps(_mm_mul_ps(Ra0, a), Ra0));
104  }
105 
106 
107  /* Absolute value */
108  friend F32vec4 fabs(const F32vec4& a) {
109  return _mm_and_ps(a, _f32vec4_abs_mask);
110  }
111 
112  /* Sign */
113  friend F32vec4 sgn(const F32vec4& a) {
114  return _mm_or_ps(_mm_and_ps(a, _f32vec4_sgn_mask), _f32vec4_one);
115  }
116  friend F32vec4 asgnb(const F32vec4& a, const F32vec4& b) {
117  return _mm_or_ps(_mm_and_ps(b, _f32vec4_sgn_mask), a);
118  }
119 
120  /* Logical */
121 
122  friend F32vec4 operator&(const F32vec4& a,
123  const F32vec4& b) { // mask returned
124  return _mm_and_ps(a, b);
125  }
126  friend F32vec4 operator|(const F32vec4& a,
127  const F32vec4& b) { // mask returned
128  return _mm_or_ps(a, b);
129  }
130  friend F32vec4 operator^(const F32vec4& a,
131  const F32vec4& b) { // mask returned
132  return _mm_xor_ps(a, b);
133  }
134  friend F32vec4 operator!(const F32vec4& a) { // mask returned
135  return _mm_xor_ps(a, _f32vec4_true);
136  }
137  // friend F32vec4 operator||( const F32vec4 &a, const F32vec4 &b ){ // mask returned
138  // return _mm_or_ps(a, b);
139  // }
140 
141  /* Comparison */
142 
143  friend F32vec4 operator<(const F32vec4& a,
144  const F32vec4& b) { // mask returned
145  return _mm_cmplt_ps(a, b);
146  }
147  friend F32vec4 operator<=(const F32vec4& a,
148  const F32vec4& b) { // mask returned
149  return _mm_cmple_ps(a, b);
150  }
151  friend F32vec4 operator>(const F32vec4& a,
152  const F32vec4& b) { // mask returned
153  return _mm_cmpgt_ps(a, b);
154  }
155  friend F32vec4 operator>=(const F32vec4& a,
156  const F32vec4& b) { // mask returned
157  return _mm_cmpge_ps(a, b);
158  }
159  friend F32vec4 operator==(const F32vec4& a,
160  const F32vec4& b) { // mask returned
161  return _mm_cmpeq_ps(a, b);
162  }
163 
164 #define if3(a, b, c) ((a) & (b)) | ((!(a)) & (c)) // analog (a) ? b : c
165 
166 #define NotEmpty(a) bool((a)[0]) | bool((a)[1]) | bool((a)[2]) | bool((a)[3])
167 #define Empty(a) !(bool((a)[0]) | bool((a)[1]) | bool((a)[2]) | bool((a)[3]))
168  // bool NotEmpty(const F32vec4 &a) { return a[0]||a[1]||a[2]||a[3]; }
169  // bool Empty(const F32vec4 &a) { return !(a[0]||a[1]||a[2]||a[3]); } // optimize
170  friend F32vec4 bool2int(const F32vec4& a) { // mask returned
171  return if3(a, 1, 0);
172  }
173 
174  /* Define all operators for consistensy */
175 
177 
178  /* Non intrinsic functions */
179 
180 #define _f1(A, F) F32vec4(F(A[0]), F(A[1]), F(A[2]), F(A[3]))
181 
182  friend F32vec4 exp(const F32vec4& a) { return _f1(a, exp); }
183  friend F32vec4 log(const F32vec4& a) { return _f1(a, log); }
184  friend F32vec4 sin(const F32vec4& a) { return _f1(a, sin); }
185  friend F32vec4 cos(const F32vec4& a) { return _f1(a, cos); }
186  friend F32vec4 acos(const F32vec4& a) { return _f1(a, acos); }
187 
188 #undef _f1
189 
190  friend F32vec4 atan2(const F32vec4& y, const F32vec4& x) {
191  const F32vec4 pi(3.1415926535897932);
192  const F32vec4 pi_2 = pi / 2;
193  const F32vec4 zero(0);
194 
195  const F32vec4& xZero = F32vec4(x == zero);
196  const F32vec4& yZero = F32vec4(y == zero);
197  const F32vec4& xNeg = F32vec4(x < zero);
198  const F32vec4& yNeg = F32vec4(y < zero);
199 
200  const F32vec4& absX = fabs(x);
201  const F32vec4& absY = fabs(y);
202 
203  F32vec4 a = absY / absX;
204  const F32vec4 pi_4 = pi / 4;
205  const F32vec4& gt_tan_3pi_8 = F32vec4(a > F32vec4(2.414213562373095));
206  const F32vec4& gt_tan_pi_8 =
207  F32vec4(a > F32vec4(0.4142135623730950)) & F32vec4(!gt_tan_3pi_8);
208  const F32vec4 minusOne(-1);
209  F32vec4 b(zero);
210  b = (pi_2 & gt_tan_3pi_8) + (F32vec4(!gt_tan_3pi_8) & b);
211  b = (pi_4 & gt_tan_pi_8) + (F32vec4(!gt_tan_pi_8) & b);
212  a = (gt_tan_3pi_8 & (minusOne / a)) + (F32vec4(!gt_tan_3pi_8) & a);
213  a = (gt_tan_pi_8 & ((absY - absX) / (absY + absX)))
214  + (F32vec4(!gt_tan_pi_8) & a);
215  const F32vec4& a2 = a * a;
216  b +=
217  (((8.05374449538e-2 * a2 - 1.38776856032E-1) * a2 + 1.99777106478E-1) * a2
218  - 3.33329491539E-1)
219  * a2 * a
220  + a;
221  F32vec4 xyNeg = F32vec4(xNeg ^ yNeg);
222  b = (xyNeg & (-b)) + (F32vec4(!xyNeg) & b);
223  xyNeg = F32vec4(xNeg & !yNeg);
224  b = (xyNeg & (b + pi)) + (F32vec4(!xyNeg) & b);
225  xyNeg = F32vec4(xNeg & yNeg);
226  b = (xyNeg & (b - pi)) + (F32vec4(!xyNeg) & b);
227  xyNeg = F32vec4(xZero & yZero);
228  b = (xyNeg & zero) + (F32vec4(!xyNeg) & b);
229  xyNeg = F32vec4(xZero & yNeg);
230  b = (xyNeg & (-pi_2)) + (F32vec4(!xyNeg) & b);
231  return b;
232  }
233 
234  friend std::ostream& operator<<(std::ostream& strm, const F32vec4& a) {
235  strm << "[" << a[0] << " " << a[1] << " " << a[2] << " " << a[3] << "]";
236  return strm;
237  }
238 
239  friend std::istream& operator>>(std::istream& strm, F32vec4& a) {
240  float tmp;
241  strm >> tmp;
242  a = tmp;
243  return strm;
244  }
245 
246 } __attribute__((aligned(16)));
247 
248 
249 typedef F32vec4 fvec;
250 typedef float fscal;
251 const int fvecLen = 4;
252 //#define fvec_true _f32vec4_true
253 //#define fvec_false _f32vec4_false
254 #define _fvecalignment __attribute__((aligned(16)))
255 
256 
257 #include "std_alloc.h"
258 
259 
260 #endif
F32vec4::exp
friend F32vec4 exp(const F32vec4 &a)
Definition: L1/vectors/P4_F32vec4.h:182
F32vec4::max
friend F32vec4 max(const F32vec4 &a, const F32vec4 &b)
Definition: L1/vectors/P4_F32vec4.h:84
fscal
float fscal
Definition: L1/vectors/P4_F32vec4.h:250
_f1
#define _f1(A, F)
Definition: L1/vectors/P4_F32vec4.h:132
f
float f
Definition: L1/vectors/P4_F32vec4.h:24
F32vec4::rsqrt
friend F32vec4 rsqrt(const F32vec4 &a)
Definition: L1/vectors/P4_F32vec4.h:92
F32vec4::fabs
friend F32vec4 fabs(const F32vec4 &a)
Definition: L1/vectors/P4_F32vec4.h:108
F32vec4
Definition: L1/vectors/P4_F32vec4.h:47
vec_arithmetic.h
F32vec4::atan2
friend F32vec4 atan2(const F32vec4 &y, const F32vec4 &x)
Definition: L1/vectors/P4_F32vec4.h:190
F32vec4::operator/
friend F32vec4 operator/(const F32vec4 &a, const F32vec4 &b)
Definition: L1/vectors/P4_F32vec4.h:76
F32vec4::log
friend F32vec4 log(const F32vec4 &a)
Definition: L1/vectors/P4_F32vec4.h:183
fvec
F32vec4 fvec
Definition: L1/vectors/P4_F32vec4.h:249
_f32vec4_one
#define _f32vec4_one
Definition: L1/vectors/P4_F32vec4.h:43
F32vec4::operator^
friend F32vec4 operator^(const F32vec4 &a, const F32vec4 &b)
Definition: L1/vectors/P4_F32vec4.h:130
i
int i
Definition: L1/vectors/P4_F32vec4.h:25
F32vec4::operator!
friend F32vec4 operator!(const F32vec4 &a)
Definition: L1/vectors/P4_F32vec4.h:134
__f32vec4_true_cheat
const union @12 __f32vec4_true_cheat
F32vec4::operator|
friend F32vec4 operator|(const F32vec4 &a, const F32vec4 &b)
Definition: L1/vectors/P4_F32vec4.h:126
F32vec4::operator>=
friend F32vec4 operator>=(const F32vec4 &a, const F32vec4 &b)
Definition: L1/vectors/P4_F32vec4.h:155
F32vec4::sqrt
friend F32vec4 sqrt(const F32vec4 &a)
Definition: L1/vectors/P4_F32vec4.h:89
F32vec4::v
__m128 v
Definition: L1/vectors/P4_F32vec4.h:49
std_alloc.h
_f32vec4_abs_mask
#define _f32vec4_abs_mask
Definition: L1/vectors/P4_F32vec4.h:40
F32vec4::operator>>
friend std::istream & operator>>(std::istream &strm, F32vec4 &a)
Definition: L1/vectors/P4_F32vec4.h:239
__f32vec4_sgn_mask_cheat
const union @12 __f32vec4_sgn_mask_cheat
_f32vec4_true
#define _f32vec4_true
Definition: L1/vectors/P4_F32vec4.h:44
F32vec4::operator<
friend F32vec4 operator<(const F32vec4 &a, const F32vec4 &b)
Definition: L1/vectors/P4_F32vec4.h:143
F32vec4::F32vec4
F32vec4(const float &a)
Definition: L1/vectors/P4_F32vec4.h:58
F32vec4::cos
friend F32vec4 cos(const F32vec4 &a)
Definition: L1/vectors/P4_F32vec4.h:185
fvecLen
const int fvecLen
Definition: L1/vectors/P4_F32vec4.h:251
__attribute__
class F32vec4 __attribute__((aligned(16)))
__f32vec4_zero_cheat
const union @12 __f32vec4_zero_cheat
F32vec4::operator+
friend F32vec4 operator+(const F32vec4 &a, const F32vec4 &b)
Definition: L1/vectors/P4_F32vec4.h:67
F32vec4::operator-
friend F32vec4 operator-(const F32vec4 &a, const F32vec4 &b)
Definition: L1/vectors/P4_F32vec4.h:70
F32vec4::asgnb
friend F32vec4 asgnb(const F32vec4 &a, const F32vec4 &b)
Definition: L1/vectors/P4_F32vec4.h:116
__f_one
const union @11 __f_one
F32vec4::operator[]
float & operator[](int i)
Definition: L1/vectors/P4_F32vec4.h:51
F32vec4::operator==
friend F32vec4 operator==(const F32vec4 &a, const F32vec4 &b)
Definition: L1/vectors/P4_F32vec4.h:159
__f32vec4_false_cheat
const union @12 __f32vec4_false_cheat
__f32vec4_abs_mask_cheat
const union @12 __f32vec4_abs_mask_cheat
F32vec4::rcp
friend F32vec4 rcp(const F32vec4 &a)
Definition: L1/vectors/P4_F32vec4.h:100
F32vec4::operator<<
friend std::ostream & operator<<(std::ostream &strm, const F32vec4 &a)
Definition: L1/vectors/P4_F32vec4.h:234
x
Double_t x
Definition: CbmMvdSensorDigiToHitTask.cxx:68
F32vec4::operator&
friend F32vec4 operator&(const F32vec4 &a, const F32vec4 &b)
Definition: L1/vectors/P4_F32vec4.h:122
m
__m128 m
Definition: L1/vectors/P4_F32vec4.h:26
y
Double_t y
Definition: CbmMvdSensorDigiToHitTask.cxx:68
F32vec4::sgn
friend F32vec4 sgn(const F32vec4 &a)
Definition: L1/vectors/P4_F32vec4.h:113
F32vec4::F32vec4
F32vec4(const __m128 &a)
Definition: L1/vectors/P4_F32vec4.h:57
F32vec4::min
friend F32vec4 min(const F32vec4 &a, const F32vec4 &b)
Definition: L1/vectors/P4_F32vec4.h:81
F32vec4::F32vec4
F32vec4()
Definition: L1/vectors/P4_F32vec4.h:56
_f32vec4_sgn_mask
#define _f32vec4_sgn_mask
Definition: L1/vectors/P4_F32vec4.h:41
F32vec4::bool2int
friend F32vec4 bool2int(const F32vec4 &a)
Definition: L1/vectors/P4_F32vec4.h:170
F32vec4::acos
friend F32vec4 acos(const F32vec4 &a)
Definition: L1/vectors/P4_F32vec4.h:186
F32vec4::if3
friend F32vec4 if3(const F32vec4 &a, const F32vec4 &b, const F32vec4 &c)
Definition: PSEUDO_F32vec4.h:108
F32vec4::sin
friend F32vec4 sin(const F32vec4 &a)
Definition: L1/vectors/P4_F32vec4.h:184
F32vec4::operator<=
friend F32vec4 operator<=(const F32vec4 &a, const F32vec4 &b)
Definition: L1/vectors/P4_F32vec4.h:147
F32vec4::vec_arithmetic
vec_arithmetic(F32vec4, float)
F32vec4::operator>
friend F32vec4 operator>(const F32vec4 &a, const F32vec4 &b)
Definition: L1/vectors/P4_F32vec4.h:151
__f32vec4_one_cheat
const union @12 __f32vec4_one_cheat
F32vec4::operator*
friend F32vec4 operator*(const F32vec4 &a, const F32vec4 &b)
Definition: L1/vectors/P4_F32vec4.h:73