GNU Radio Manual and C++ API Reference  3.7.7
The Free & Open Software Radio Ecosystem
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
volk_32f_asin_32f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
23 /*!
24  * \page volk_32f_asin_32f
25  *
26  * \b Overview
27  *
28  * Computes arcsine of input vector and stores results in output vector.
29  *
30  * <b>Dispatcher Prototype</b>
31  * \code
32  * void volk_32f_asin_32f(float* bVector, const float* aVector, unsigned int num_points)
33  * \endcode
34  *
35  * \b Inputs
36  * \li aVector: The input vector of floats.
37  * \li num_points: The number of data points.
38  *
39  * \b Outputs
40  * \li bVector: The vector where results will be stored.
41  *
42  * \b Example
43  * \code
44  * Calculate common angles around the top half of the unit circle.
45  * int N = 10;
46  * unsigned int alignment = volk_get_alignment();
47  * float* in = (float*)volk_malloc(sizeof(float)*N, alignment);
48  * float* out = (float*)volk_malloc(sizeof(float)*N, alignment);
49  *
50  * in[0] = 0;
51  * in[1] = 0.5;
52  * in[2] = std::sqrt(2.f)/2.f;
53  * in[3] = std::sqrt(3.f)/2.f;
54  * in[4] = in[5] = 1;
55  * for(unsigned int ii = 6; ii < N; ++ii){
56  * in[ii] = - in[N-ii-1];
57  * }
58  *
59  * volk_32f_asin_32f(out, in, N);
60  *
61  * for(unsigned int ii = 0; ii < N; ++ii){
62  * printf("asin(%1.3f) = %1.3f\n", in[ii], out[ii]);
63  * }
64  *
65  * volk_free(in);
66  * volk_free(out);
67  * \endcode
68  */
69 
70 #include <stdio.h>
71 #include <math.h>
72 #include <inttypes.h>
73 
74 /* This is the number of terms of Taylor series to evaluate, increase this for more accuracy*/
75 #define ASIN_TERMS 2
76 
77 #ifndef INCLUDED_volk_32f_asin_32f_a_H
78 #define INCLUDED_volk_32f_asin_32f_a_H
79 
80 #ifdef LV_HAVE_SSE4_1
81 #include <smmintrin.h>
82 
83 static inline void
84 volk_32f_asin_32f_a_sse4_1(float* bVector, const float* aVector, unsigned int num_points)
85 {
86  float* bPtr = bVector;
87  const float* aPtr = aVector;
88 
89  unsigned int number = 0;
90  unsigned int quarterPoints = num_points / 4;
91  int i, j;
92 
93  __m128 aVal, pio2, x, y, z, arcsine;
94  __m128 fzeroes, fones, ftwos, ffours, condition;
95 
96  pio2 = _mm_set1_ps(3.14159265358979323846/2);
97  fzeroes = _mm_setzero_ps();
98  fones = _mm_set1_ps(1.0);
99  ftwos = _mm_set1_ps(2.0);
100  ffours = _mm_set1_ps(4.0);
101 
102  for(;number < quarterPoints; number++){
103  aVal = _mm_load_ps(aPtr);
104  aVal = _mm_div_ps(aVal, _mm_sqrt_ps(_mm_mul_ps(_mm_add_ps(fones, aVal), _mm_sub_ps(fones, aVal))));
105  z = aVal;
106  condition = _mm_cmplt_ps(z, fzeroes);
107  z = _mm_sub_ps(z, _mm_and_ps(_mm_mul_ps(z, ftwos), condition));
108  x = z;
109  condition = _mm_cmplt_ps(z, fones);
110  x = _mm_add_ps(x, _mm_and_ps(_mm_sub_ps(_mm_div_ps(fones, z), z), condition));
111 
112  for(i = 0; i < 2; i++){
113  x = _mm_add_ps(x, _mm_sqrt_ps(_mm_add_ps(fones, _mm_mul_ps(x, x))));
114  }
115  x = _mm_div_ps(fones, x);
116  y = fzeroes;
117  for(j = ASIN_TERMS - 1; j >=0 ; j--){
118  y = _mm_add_ps(_mm_mul_ps(y, _mm_mul_ps(x, x)), _mm_set1_ps(pow(-1,j)/(2*j+1)));
119  }
120 
121  y = _mm_mul_ps(y, _mm_mul_ps(x, ffours));
122  condition = _mm_cmpgt_ps(z, fones);
123 
124  y = _mm_add_ps(y, _mm_and_ps(_mm_sub_ps(pio2, _mm_mul_ps(y, ftwos)), condition));
125  arcsine = y;
126  condition = _mm_cmplt_ps(aVal, fzeroes);
127  arcsine = _mm_sub_ps(arcsine, _mm_and_ps(_mm_mul_ps(arcsine, ftwos), condition));
128 
129  _mm_store_ps(bPtr, arcsine);
130  aPtr += 4;
131  bPtr += 4;
132  }
133 
134  number = quarterPoints * 4;
135  for(;number < num_points; number++){
136  *bPtr++ = asin(*aPtr++);
137  }
138 }
139 
140 #endif /* LV_HAVE_SSE4_1 for aligned */
141 
142 #endif /* INCLUDED_volk_32f_asin_32f_a_H */
143 
144 #ifndef INCLUDED_volk_32f_asin_32f_u_H
145 #define INCLUDED_volk_32f_asin_32f_u_H
146 
147 #ifdef LV_HAVE_SSE4_1
148 #include <smmintrin.h>
149 
150 static inline void
151 volk_32f_asin_32f_u_sse4_1(float* bVector, const float* aVector, unsigned int num_points)
152 {
153  float* bPtr = bVector;
154  const float* aPtr = aVector;
155 
156  unsigned int number = 0;
157  unsigned int quarterPoints = num_points / 4;
158  int i, j;
159 
160  __m128 aVal, pio2, x, y, z, arcsine;
161  __m128 fzeroes, fones, ftwos, ffours, condition;
162 
163  pio2 = _mm_set1_ps(3.14159265358979323846/2);
164  fzeroes = _mm_setzero_ps();
165  fones = _mm_set1_ps(1.0);
166  ftwos = _mm_set1_ps(2.0);
167  ffours = _mm_set1_ps(4.0);
168 
169  for(;number < quarterPoints; number++){
170  aVal = _mm_loadu_ps(aPtr);
171  aVal = _mm_div_ps(aVal, _mm_sqrt_ps(_mm_mul_ps(_mm_add_ps(fones, aVal), _mm_sub_ps(fones, aVal))));
172  z = aVal;
173  condition = _mm_cmplt_ps(z, fzeroes);
174  z = _mm_sub_ps(z, _mm_and_ps(_mm_mul_ps(z, ftwos), condition));
175  x = z;
176  condition = _mm_cmplt_ps(z, fones);
177  x = _mm_add_ps(x, _mm_and_ps(_mm_sub_ps(_mm_div_ps(fones, z), z), condition));
178 
179  for(i = 0; i < 2; i++){
180  x = _mm_add_ps(x, _mm_sqrt_ps(_mm_add_ps(fones, _mm_mul_ps(x, x))));
181  }
182  x = _mm_div_ps(fones, x);
183  y = fzeroes;
184  for(j = ASIN_TERMS - 1; j >=0 ; j--){
185  y = _mm_add_ps(_mm_mul_ps(y, _mm_mul_ps(x, x)), _mm_set1_ps(pow(-1,j)/(2*j+1)));
186  }
187 
188  y = _mm_mul_ps(y, _mm_mul_ps(x, ffours));
189  condition = _mm_cmpgt_ps(z, fones);
190 
191  y = _mm_add_ps(y, _mm_and_ps(_mm_sub_ps(pio2, _mm_mul_ps(y, ftwos)), condition));
192  arcsine = y;
193  condition = _mm_cmplt_ps(aVal, fzeroes);
194  arcsine = _mm_sub_ps(arcsine, _mm_and_ps(_mm_mul_ps(arcsine, ftwos), condition));
195 
196  _mm_storeu_ps(bPtr, arcsine);
197  aPtr += 4;
198  bPtr += 4;
199  }
200 
201  number = quarterPoints * 4;
202  for(;number < num_points; number++){
203  *bPtr++ = asin(*aPtr++);
204  }
205 }
206 
207 #endif /* LV_HAVE_SSE4_1 for unaligned */
208 
209 #ifdef LV_HAVE_GENERIC
210 
211 static inline void
212 volk_32f_asin_32f_u_generic(float* bVector, const float* aVector, unsigned int num_points)
213 {
214  float* bPtr = bVector;
215  const float* aPtr = aVector;
216  unsigned int number = 0;
217 
218  for(number = 0; number < num_points; number++){
219  *bPtr++ = asin(*aPtr++);
220  }
221 }
222 #endif /* LV_HAVE_GENERIC */
223 
224 #endif /* INCLUDED_volk_32f_asin_32f_u_H */
#define ASIN_TERMS
Definition: volk_32f_asin_32f.h:75