CbmRoot
littrack/parallel/vectors/P4_F32vec4.h
Go to the documentation of this file.
1 #ifndef LIT_F32VEC4P4_H
2 #define LIT_F32VEC4P4_H
3 
4 
5 #include "vec_arithmetic.h"
6 #include "xmmintrin.h"
7 #include <cmath>
8 #include <iostream>
9 
10 /**********************************
11  *
12  * Vector of four single floats
13  *
14  **********************************/
15 
16 //#pragma pack(push,16)/* Must ensure class & union 16-B aligned */
17 
18 //typedef __m128 VectorFloat __attribute__ ((aligned(16)));
19 
20 const union {
21  float f;
22  int i;
23 } __f_one = {(float) 1.};
24 
25 const union {
26  int i[4];
27  __m128 m;
28 } __f32vec4_abs_mask_cheat = {{0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff}},
29  __f32vec4_sgn_mask_cheat = {{static_cast<int>(0x80000000),
30  static_cast<int>(0x80000000),
31  static_cast<int>(0x80000000),
32  static_cast<int>(0x80000000)}},
33  __f32vec4_zero_cheat = {{0, 0, 0, 0}},
35  __f32vec4_true_cheat = {{static_cast<int>(0xFFFFFFFF),
36  static_cast<int>(0xFFFFFFFF),
37  static_cast<int>(0xFFFFFFFF),
38  static_cast<int>(0xFFFFFFFF)}},
39  __f32vec4_false_cheat = {{0x00000000, 0x00000000, 0x00000000, 0x00000000}};
40 
41 #define _f32vec4_abs_mask ((F32vec4) __f32vec4_abs_mask_cheat.m)
42 #define _f32vec4_sgn_mask ((F32vec4) __f32vec4_sgn_mask_cheat.m)
43 #define _f32vec4_zero ((F32vec4) __f32vec4_zero_cheat.m)
44 #define _f32vec4_one ((F32vec4) __f32vec4_one_cheat.m)
45 #define _f32vec4_true ((F32vec4) __f32vec4_true_cheat.m)
46 #define _f32vec4_false ((F32vec4) __f32vec4_false_cheat.m)
47 
48 class F32vec4 {
49 public:
50  __m128 v;
51 
52  float& operator[](int i) { return ((float*) &v)[i]; }
53  float operator[](int i) const { return ((float*) &v)[i]; }
54 
55  F32vec4() : v(_mm_set_ps1(0)) {}
56  F32vec4(const __m128& a) : v(a) {}
57  F32vec4(const float& a) : v(_mm_set_ps1(a)) {}
58 
59  F32vec4(const float& f0, const float& f1, const float& f2, const float& f3)
60  : v(_mm_set_ps(f3, f2, f1, f0)) {}
61 
62  /* Conversion function */
63  operator __m128() const { return v; } /* Convert to __m128 */
64 
65  /* Arithmetic Operators */
66  friend F32vec4 operator+(const F32vec4& a, const F32vec4& b) {
67  return _mm_add_ps(a, b);
68  }
69  friend F32vec4 operator-(const F32vec4& a, const F32vec4& b) {
70  return _mm_sub_ps(a, b);
71  }
72  friend F32vec4 operator*(const F32vec4& a, const F32vec4& b) {
73  return _mm_mul_ps(a, b);
74  }
75  friend F32vec4 operator/(const F32vec4& a, const F32vec4& b) {
76  return _mm_div_ps(a, b);
77  }
78 
79  /* Functions */
80  friend F32vec4 min(const F32vec4& a, const F32vec4& b) {
81  return _mm_min_ps(a, b);
82  }
83  friend F32vec4 max(const F32vec4& a, const F32vec4& b) {
84  return _mm_max_ps(a, b);
85  }
86 
87  /* Square Root */
88  friend F32vec4 sqrt(const F32vec4& a) { return _mm_sqrt_ps(a); }
89 
90  /* Reciprocal( inverse) Square Root */
91  friend F32vec4 rsqrt(const F32vec4& a) { return _mm_rsqrt_ps(a); }
92 
93  /* Reciprocal (inversion) */
94  friend F32vec4 rcp(const F32vec4& a) { return _mm_rcp_ps(a); }
95  //friend F32vec4 rcp ( const F32vec4 &a ){ return 1./a; }
96 
97  /* Absolute value */
98  friend F32vec4 fabs(const F32vec4& a) {
99  return _mm_and_ps(a, _f32vec4_abs_mask);
100  }
101 
102  /* Sign */
103  friend F32vec4 sgn(const F32vec4& a) {
104  return _mm_or_ps(_mm_and_ps(a, _f32vec4_sgn_mask), _f32vec4_one);
105  }
106  friend F32vec4 asgnb(const F32vec4& a, const F32vec4& b) {
107  return _mm_or_ps(_mm_and_ps(b, _f32vec4_sgn_mask), a);
108  }
109 
110  /* Logical */
111 
112  friend F32vec4 operator&(const F32vec4& a,
113  const F32vec4& b) { // mask returned
114  return _mm_and_ps(a, b);
115  }
116  friend F32vec4 operator|(const F32vec4& a,
117  const F32vec4& b) { // mask returned
118  return _mm_or_ps(a, b);
119  }
120  friend F32vec4 operator^(const F32vec4& a,
121  const F32vec4& b) { // mask returned
122  return _mm_xor_ps(a, b);
123  }
124  friend F32vec4 operator!(const F32vec4& a) { // mask returned
125  return _mm_xor_ps(a, _f32vec4_true);
126  }
127  //friend F32vec4 operator||( const F32vec4& a, const F32vec4& b ) { // mask returned
128  // return _mm_or_ps(a, b);
129  //}
130 
131  /* Comparison */
132 
133  friend F32vec4 operator<(const F32vec4& a,
134  const F32vec4& b) { // mask returned
135  return _mm_cmplt_ps(a, b);
136  }
137 
138  /* Non intrinsic functions */
139 
140 #define _f1(A, F) F32vec4(F(A[0]), F(A[1]), F(A[2]), F(A[3]))
141 
142  friend F32vec4 exp(const F32vec4& a) { return _f1(a, exp); }
143  friend F32vec4 log(const F32vec4& a) { return _f1(a, log); }
144  friend F32vec4 sin(const F32vec4& a) { return _f1(a, sin); }
145  friend F32vec4 cos(const F32vec4& a) { return _f1(a, cos); }
146 
147 #undef _f1
148 
149  /* Define all operators for consistensy */
150 
152 
153  friend std::ostream& operator<<(std::ostream& strm, const F32vec4& a) {
154  strm << a[0] << " " << a[1] << " " << a[2] << " " << a[3];
155  return strm;
156  }
157 
158  friend std::istream& operator>>(std::istream& strm, F32vec4& a) {
159  float tmp;
160  strm >> tmp;
161  a = tmp;
162  return strm;
163  }
164 
165 } __attribute__((aligned(16)));
166 
167 
168 typedef F32vec4 fvec;
169 typedef float fscal;
170 const int fvecLen = 4;
171 //#define fvec_true _f32vec4_true
172 //#define fvec_false _f32vec4_false
173 #define _fvecalignment __attribute__((aligned(16)))
174 
175 #endif
F32vec4::exp
friend F32vec4 exp(const F32vec4 &a)
Definition: littrack/parallel/vectors/P4_F32vec4.h:142
__f32vec4_sgn_mask_cheat
const union @16 __f32vec4_sgn_mask_cheat
F32vec4::max
friend F32vec4 max(const F32vec4 &a, const F32vec4 &b)
Definition: littrack/parallel/vectors/P4_F32vec4.h:83
F32vec4::rsqrt
friend F32vec4 rsqrt(const F32vec4 &a)
Definition: littrack/parallel/vectors/P4_F32vec4.h:91
F32vec4::fabs
friend F32vec4 fabs(const F32vec4 &a)
Definition: littrack/parallel/vectors/P4_F32vec4.h:98
f
float f
Definition: littrack/parallel/vectors/P4_F32vec4.h:25
F32vec4
Definition: L1/vectors/P4_F32vec4.h:47
vec_arithmetic.h
__f32vec4_zero_cheat
const union @16 __f32vec4_zero_cheat
F32vec4::operator/
friend F32vec4 operator/(const F32vec4 &a, const F32vec4 &b)
Definition: L1/vectors/P4_F32vec4.h:76
F32vec4::log
friend F32vec4 log(const F32vec4 &a)
Definition: littrack/parallel/vectors/P4_F32vec4.h:143
__f_one
const union @15 __f_one
F32vec4::operator^
friend F32vec4 operator^(const F32vec4 &a, const F32vec4 &b)
Definition: littrack/parallel/vectors/P4_F32vec4.h:120
F32vec4::operator!
friend F32vec4 operator!(const F32vec4 &a)
Definition: littrack/parallel/vectors/P4_F32vec4.h:124
_f32vec4_one
#define _f32vec4_one
Definition: littrack/parallel/vectors/P4_F32vec4.h:44
F32vec4::operator|
friend F32vec4 operator|(const F32vec4 &a, const F32vec4 &b)
Definition: littrack/parallel/vectors/P4_F32vec4.h:116
_f32vec4_sgn_mask
#define _f32vec4_sgn_mask
Definition: littrack/parallel/vectors/P4_F32vec4.h:42
F32vec4::sqrt
friend F32vec4 sqrt(const F32vec4 &a)
Definition: littrack/parallel/vectors/P4_F32vec4.h:88
_f32vec4_true
#define _f32vec4_true
Definition: littrack/parallel/vectors/P4_F32vec4.h:45
F32vec4::v
__m128 v
Definition: L1/vectors/P4_F32vec4.h:49
_f1
#define _f1(A, F)
Definition: littrack/parallel/vectors/P4_F32vec4.h:91
F32vec4::operator>>
friend std::istream & operator>>(std::istream &strm, F32vec4 &a)
Definition: littrack/parallel/vectors/P4_F32vec4.h:158
F32vec4::operator[]
float operator[](int i) const
Definition: littrack/parallel/vectors/P4_F32vec4.h:53
F32vec4::operator<
friend F32vec4 operator<(const F32vec4 &a, const F32vec4 &b)
Definition: littrack/parallel/vectors/P4_F32vec4.h:133
F32vec4::cos
friend F32vec4 cos(const F32vec4 &a)
Definition: littrack/parallel/vectors/P4_F32vec4.h:145
m
__m128 m
Definition: littrack/parallel/vectors/P4_F32vec4.h:27
fvecLen
const int fvecLen
Definition: littrack/parallel/vectors/P4_F32vec4.h:170
F32vec4::operator+
friend F32vec4 operator+(const F32vec4 &a, const F32vec4 &b)
Definition: littrack/parallel/vectors/P4_F32vec4.h:66
F32vec4::operator-
friend F32vec4 operator-(const F32vec4 &a, const F32vec4 &b)
Definition: littrack/parallel/vectors/P4_F32vec4.h:69
F32vec4::asgnb
friend F32vec4 asgnb(const F32vec4 &a, const F32vec4 &b)
Definition: littrack/parallel/vectors/P4_F32vec4.h:106
fscal
float fscal
Definition: littrack/parallel/vectors/P4_F32vec4.h:169
_f32vec4_abs_mask
#define _f32vec4_abs_mask
Definition: littrack/parallel/vectors/P4_F32vec4.h:41
F32vec4::operator[]
float & operator[](int i)
Definition: littrack/parallel/vectors/P4_F32vec4.h:52
i
int i
Definition: littrack/parallel/vectors/P4_F32vec4.h:26
F32vec4::rcp
friend F32vec4 rcp(const F32vec4 &a)
Definition: L1/vectors/P4_F32vec4.h:100
__f32vec4_true_cheat
const union @16 __f32vec4_true_cheat
F32vec4::operator<<
friend std::ostream & operator<<(std::ostream &strm, const F32vec4 &a)
Definition: littrack/parallel/vectors/P4_F32vec4.h:153
__attribute__
class F32vec4 __attribute__((aligned(16)))
F32vec4::operator&
friend F32vec4 operator&(const F32vec4 &a, const F32vec4 &b)
Definition: littrack/parallel/vectors/P4_F32vec4.h:112
F32vec4::sgn
friend F32vec4 sgn(const F32vec4 &a)
Definition: littrack/parallel/vectors/P4_F32vec4.h:103
__f32vec4_one_cheat
const union @16 __f32vec4_one_cheat
F32vec4::F32vec4
F32vec4(const __m128 &a)
Definition: littrack/parallel/vectors/P4_F32vec4.h:56
__f32vec4_abs_mask_cheat
const union @16 __f32vec4_abs_mask_cheat
F32vec4::min
friend F32vec4 min(const F32vec4 &a, const F32vec4 &b)
Definition: littrack/parallel/vectors/P4_F32vec4.h:80
F32vec4::F32vec4
F32vec4()
Definition: littrack/parallel/vectors/P4_F32vec4.h:55
F32vec4::sin
friend F32vec4 sin(const F32vec4 &a)
Definition: littrack/parallel/vectors/P4_F32vec4.h:144
F32vec4::vec_arithmetic
vec_arithmetic(F32vec4, float)
F32vec4::F32vec4
F32vec4(const float &f0, const float &f1, const float &f2, const float &f3)
Definition: littrack/parallel/vectors/P4_F32vec4.h:59
F32vec4::operator*
friend F32vec4 operator*(const F32vec4 &a, const F32vec4 &b)
Definition: littrack/parallel/vectors/P4_F32vec4.h:72
fvec
F32vec4 fvec
Definition: littrack/parallel/vectors/P4_F32vec4.h:168
__f32vec4_false_cheat
const union @16 __f32vec4_false_cheat