GNU Radio Manual and C++ API Reference  3.7.7
The Free & Open Software Radio Ecosystem
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
volk_32f_expfast_32f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
23 /*!
24  * \page volk_32f_expfast_32f
25  *
26  * \b Overview
27  *
28  * Computes exp of input vector and stores results in output
29  * vector. This uses a fast exp approximation with a maximum 7% error.
30  *
31  * <b>Dispatcher Prototype</b>
32  * \code
33  * void volk_32f_expfast_32f(float* bVector, const float* aVector, unsigned int num_points)
34  * \endcode
35  *
36  * \b Inputs
37  * \li aVector: Input vector of floats.
38  * \li num_points: The number of data points.
39  *
40  * \b Outputs
41  * \li bVector: The output vector.
42  *
43  * \b Example
44  * \code
45  * int N = 10;
46  * unsigned int alignment = volk_get_alignment();
47  * float* in = (float*)volk_malloc(sizeof(float)*N, alignment);
48  * float* out = (float*)volk_malloc(sizeof(float)*N, alignment);
49  *
50  * for(unsigned int ii = 0; ii < N; ++ii){
51  * in[ii] = std::log((float)ii);
52  * }
53  *
54  * volk_32f_expfast_32f(out, in, N);
55  *
56  * for(unsigned int ii = 0; ii < N; ++ii){
57  * printf("out(%i) = %f\n", ii, out[ii]);
58  * }
59  *
60  * volk_free(in);
61  * volk_free(out);
62  * \endcode
63  */
64 
65 #include <stdio.h>
66 #include <math.h>
67 #include <inttypes.h>
68 
69 #define Mln2 0.6931471805f
70 #define A 8388608.0f
71 #define B 1065353216.0f
72 #define C 60801.0f
73 
74 
75 #ifndef INCLUDED_volk_32f_expfast_32f_a_H
76 #define INCLUDED_volk_32f_expfast_32f_a_H
77 
78 #ifdef LV_HAVE_AVX
79 
80 #include <immintrin.h>
81 
82 static inline void
83  volk_32f_expfast_32f_a_avx(float* bVector, const float* aVector, unsigned int num_points)
84 {
85  float* bPtr = bVector;
86  const float* aPtr = aVector;
87 
88  unsigned int number = 0;
89  const unsigned int eighthPoints = num_points / 8;
90 
91  __m256 aVal, bVal, a, b;
92  __m256i exp;
93  a = _mm256_set1_ps(A/Mln2);
94  b = _mm256_set1_ps(B-C);
95 
96  for(;number < eighthPoints; number++){
97  aVal = _mm256_load_ps(aPtr);
98  exp = _mm256_cvtps_epi32(_mm256_add_ps(_mm256_mul_ps(a,aVal), b));
99  bVal = _mm256_castsi256_ps(exp);
100 
101  _mm256_store_ps(bPtr, bVal);
102  aPtr += 8;
103  bPtr += 8;
104  }
105 
106  number = eighthPoints * 8;
107  for(;number < num_points; number++){
108  *bPtr++ = expf(*aPtr++);
109  }
110 }
111 
112 #endif /* LV_HAVE_AVX for aligned */
113 
114 #ifdef LV_HAVE_SSE4_1
115 #include <smmintrin.h>
116 
117 static inline void
118 volk_32f_expfast_32f_a_sse4_1(float* bVector, const float* aVector, unsigned int num_points)
119 {
120  float* bPtr = bVector;
121  const float* aPtr = aVector;
122 
123  unsigned int number = 0;
124  const unsigned int quarterPoints = num_points / 4;
125 
126  __m128 aVal, bVal, a, b;
127  __m128i exp;
128  a = _mm_set1_ps(A/Mln2);
129  b = _mm_set1_ps(B-C);
130 
131  for(;number < quarterPoints; number++){
132  aVal = _mm_load_ps(aPtr);
133  exp = _mm_cvtps_epi32(_mm_add_ps(_mm_mul_ps(a,aVal), b));
134  bVal = _mm_castsi128_ps(exp);
135 
136  _mm_store_ps(bPtr, bVal);
137  aPtr += 4;
138  bPtr += 4;
139  }
140 
141  number = quarterPoints * 4;
142  for(;number < num_points; number++){
143  *bPtr++ = expf(*aPtr++);
144  }
145 }
146 
147 #endif /* LV_HAVE_SSE4_1 for aligned */
148 
149 #endif /* INCLUDED_volk_32f_expfast_32f_a_H */
150 
151 
152 
153 #ifndef INCLUDED_volk_32f_expfast_32f_u_H
154 #define INCLUDED_volk_32f_expfast_32f_u_H
155 
156 #ifdef LV_HAVE_AVX
157 #include <immintrin.h>
158 
159 static inline void
160 volk_32f_expfast_32f_u_avx(float* bVector, const float* aVector, unsigned int num_points)
161 {
162  float* bPtr = bVector;
163  const float* aPtr = aVector;
164 
165  unsigned int number = 0;
166  const unsigned int eighthPoints = num_points / 8;
167 
168  __m256 aVal, bVal, a, b;
169  __m256i exp;
170  a = _mm256_set1_ps(A/Mln2);
171  b = _mm256_set1_ps(B-C);
172 
173  for(;number < eighthPoints; number++){
174  aVal = _mm256_loadu_ps(aPtr);
175  exp = _mm256_cvtps_epi32(_mm256_add_ps(_mm256_mul_ps(a,aVal), b));
176  bVal = _mm256_castsi256_ps(exp);
177 
178  _mm256_storeu_ps(bPtr, bVal);
179  aPtr += 8;
180  bPtr += 8;
181  }
182 
183  number = eighthPoints * 8;
184  for(;number < num_points; number++){
185  *bPtr++ = expf(*aPtr++);
186  }
187 }
188 
189 #endif /* LV_HAVE_AVX for aligned */
190 
191 
192 #ifdef LV_HAVE_SSE4_1
193 #include <smmintrin.h>
194 
195 static inline void
196 volk_32f_expfast_32f_u_sse4_1(float* bVector, const float* aVector, unsigned int num_points)
197 {
198  float* bPtr = bVector;
199  const float* aPtr = aVector;
200 
201  unsigned int number = 0;
202  const unsigned int quarterPoints = num_points / 4;
203 
204  __m128 aVal, bVal, a, b;
205  __m128i exp;
206  a = _mm_set1_ps(A/Mln2);
207  b = _mm_set1_ps(B-C);
208 
209  for(;number < quarterPoints; number++){
210  aVal = _mm_loadu_ps(aPtr);
211  exp = _mm_cvtps_epi32(_mm_add_ps(_mm_mul_ps(a,aVal), b));
212  bVal = _mm_castsi128_ps(exp);
213 
214  _mm_storeu_ps(bPtr, bVal);
215  aPtr += 4;
216  bPtr += 4;
217  }
218 
219  number = quarterPoints * 4;
220  for(;number < num_points; number++){
221  *bPtr++ = expf(*aPtr++);
222  }
223 }
224 
225 #endif /* LV_HAVE_SSE4_1 for unaligned */
226 
227 
228 #ifdef LV_HAVE_GENERIC
229 
230 static inline void
231 volk_32f_expfast_32f_generic(float* bVector, const float* aVector, unsigned int num_points)
232 {
233  float* bPtr = bVector;
234  const float* aPtr = aVector;
235  unsigned int number = 0;
236 
237  for(number = 0; number < num_points; number++){
238  *bPtr++ = expf(*aPtr++);
239  }
240 }
241 #endif /* LV_HAVE_GENERIC */
242 
243 #endif /* INCLUDED_volk_32f_expfast_32f_u_H */
#define B
Definition: volk_32f_expfast_32f.h:71
#define C
Definition: volk_32f_expfast_32f.h:72
#define Mln2
Definition: volk_32f_expfast_32f.h:69
#define A
Definition: volk_32f_expfast_32f.h:70