GNU Radio Manual and C++ API Reference  3.7.7
The Free & Open Software Radio Ecosystem
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
volk_32f_s32f_convert_32i.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
23 /*!
24  * \page volk_32f_s32f_convert_32i
25  *
26  * \b Overview
27  *
28  * Converts a floating point number to a 32-bit integer after applying a
29  * scaling factor.
30  *
31  * <b>Dispatcher Prototype</b>
32  * \code
33  * void volk_32f_s32f_convert_32i(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points)
34  * \endcode
35  *
36  * \b Inputs
37  * \li inputVector: the input vector of floats.
38  * \li scalar: The value multiplied against each point in the input buffer.
39  * \li num_points: The number of data points.
40  *
41  * \b Outputs
42  * \li outputVector: The output vector.
43  *
44  * \b Example
45  * Convert floats from [-1,1] to integers with a scale of 5 to maintain smallest delta
46  * \code
47  * int N = 10;
48  * unsigned int alignment = volk_get_alignment();
49  * float* increasing = (float*)volk_malloc(sizeof(float)*N, alignment);
50  * int32_t* out = (int32_t*)volk_malloc(sizeof(int32_t)*N, alignment);
51  *
52  * for(unsigned int ii = 0; ii < N; ++ii){
53  * increasing[ii] = 2.f * ((float)ii / (float)N) - 1.f;
54  * }
55  *
56  * // Normalize by the smallest delta (0.2 in this example)
57  * float scale = 5.f;
58  *
59  * volk_32f_s32f_convert_32i(out, increasing, scale, N);
60  *
61  * for(unsigned int ii = 0; ii < N; ++ii){
62  * printf("out[%u] = %i\n", ii, out[ii]);
63  * }
64  *
65  * volk_free(increasing);
66  * volk_free(out);
67  * \endcode
68  */
69 
70 #ifndef INCLUDED_volk_32f_s32f_convert_32i_u_H
71 #define INCLUDED_volk_32f_s32f_convert_32i_u_H
72 
73 #include <inttypes.h>
74 #include <stdio.h>
75 
76 #ifdef LV_HAVE_SSE2
77 #include <emmintrin.h>
78 
79 static inline void
80 volk_32f_s32f_convert_32i_u_sse2(int32_t* outputVector, const float* inputVector,
81  const float scalar, unsigned int num_points)
82 {
83  unsigned int number = 0;
84 
85  const unsigned int quarterPoints = num_points / 4;
86 
87  const float* inputVectorPtr = (const float*)inputVector;
88  int32_t* outputVectorPtr = outputVector;
89 
90  float min_val = -2147483647;
91  float max_val = 2147483647;
92  float r;
93 
94  __m128 vScalar = _mm_set_ps1(scalar);
95  __m128 inputVal1;
96  __m128i intInputVal1;
97  __m128 vmin_val = _mm_set_ps1(min_val);
98  __m128 vmax_val = _mm_set_ps1(max_val);
99 
100  for(;number < quarterPoints; number++){
101  inputVal1 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
102 
103  inputVal1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
104  intInputVal1 = _mm_cvtps_epi32(inputVal1);
105 
106  _mm_storeu_si128((__m128i*)outputVectorPtr, intInputVal1);
107  outputVectorPtr += 4;
108  }
109 
110  number = quarterPoints * 4;
111  for(; number < num_points; number++){
112  r = inputVector[number] * scalar;
113  if(r > max_val)
114  r = max_val;
115  else if(r < min_val)
116  r = min_val;
117  outputVector[number] = (int32_t)(r);
118  }
119 }
120 
121 #endif /* LV_HAVE_SSE2 */
122 
123 
124 #ifdef LV_HAVE_SSE
125 #include <xmmintrin.h>
126 
127 static inline void
128 volk_32f_s32f_convert_32i_u_sse(int32_t* outputVector, const float* inputVector,
129  const float scalar, unsigned int num_points)
130 {
131  unsigned int number = 0;
132 
133  const unsigned int quarterPoints = num_points / 4;
134 
135  const float* inputVectorPtr = (const float*)inputVector;
136  int32_t* outputVectorPtr = outputVector;
137 
138  float min_val = -2147483647;
139  float max_val = 2147483647;
140  float r;
141 
142  __m128 vScalar = _mm_set_ps1(scalar);
143  __m128 ret;
144  __m128 vmin_val = _mm_set_ps1(min_val);
145  __m128 vmax_val = _mm_set_ps1(max_val);
146 
147  __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
148 
149  for(;number < quarterPoints; number++){
150  ret = _mm_loadu_ps(inputVectorPtr);
151  inputVectorPtr += 4;
152 
153  ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
154 
155  _mm_store_ps(outputFloatBuffer, ret);
156  *outputVectorPtr++ = (int32_t)(outputFloatBuffer[0]);
157  *outputVectorPtr++ = (int32_t)(outputFloatBuffer[1]);
158  *outputVectorPtr++ = (int32_t)(outputFloatBuffer[2]);
159  *outputVectorPtr++ = (int32_t)(outputFloatBuffer[3]);
160  }
161 
162  number = quarterPoints * 4;
163  for(; number < num_points; number++){
164  r = inputVector[number] * scalar;
165  if(r > max_val)
166  r = max_val;
167  else if(r < min_val)
168  r = min_val;
169  outputVector[number] = (int32_t)(r);
170  }
171 }
172 
173 #endif /* LV_HAVE_SSE */
174 
175 
176 #ifdef LV_HAVE_GENERIC
177 
178 static inline void
179 volk_32f_s32f_convert_32i_generic(int32_t* outputVector, const float* inputVector,
180  const float scalar, unsigned int num_points)
181 {
182  int32_t* outputVectorPtr = outputVector;
183  const float* inputVectorPtr = inputVector;
184  unsigned int number = 0;
185  float min_val = -2147483647;
186  float max_val = 2147483647;
187  float r;
188 
189  for(number = 0; number < num_points; number++){
190  r = *inputVectorPtr++ * scalar;
191  if(r > max_val)
192  r = max_val;
193  else if(r < min_val)
194  r = min_val;
195  *outputVectorPtr++ = (int32_t)(r);
196  }
197 }
198 
199 #endif /* LV_HAVE_GENERIC */
200 
201 
202 
203 #endif /* INCLUDED_volk_32f_s32f_convert_32i_u_H */
204 #ifndef INCLUDED_volk_32f_s32f_convert_32i_a_H
205 #define INCLUDED_volk_32f_s32f_convert_32i_a_H
206 
207 #include <volk/volk_common.h>
208 #include <inttypes.h>
209 #include <stdio.h>
210 
211 #ifdef LV_HAVE_AVX
212 #include <immintrin.h>
213 
214 static inline void
215 volk_32f_s32f_convert_32i_a_avx(int32_t* outputVector, const float* inputVector,
216  const float scalar, unsigned int num_points)
217 {
218  unsigned int number = 0;
219 
220  const unsigned int eighthPoints = num_points / 8;
221 
222  const float* inputVectorPtr = (const float*)inputVector;
223  int32_t* outputVectorPtr = outputVector;
224 
225  float min_val = -2147483647;
226  float max_val = 2147483647;
227  float r;
228 
229  __m256 vScalar = _mm256_set1_ps(scalar);
230  __m256 inputVal1;
231  __m256i intInputVal1;
232  __m256 vmin_val = _mm256_set1_ps(min_val);
233  __m256 vmax_val = _mm256_set1_ps(max_val);
234 
235  for(;number < eighthPoints; number++){
236  inputVal1 = _mm256_load_ps(inputVectorPtr); inputVectorPtr += 8;
237 
238  inputVal1 = _mm256_max_ps(_mm256_min_ps(_mm256_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
239  intInputVal1 = _mm256_cvtps_epi32(inputVal1);
240 
241  _mm256_store_si256((__m256i*)outputVectorPtr, intInputVal1);
242  outputVectorPtr += 8;
243  }
244 
245  number = eighthPoints * 8;
246  for(; number < num_points; number++){
247  r = inputVector[number] * scalar;
248  if(r > max_val)
249  r = max_val;
250  else if(r < min_val)
251  r = min_val;
252  outputVector[number] = (int32_t)(r);
253  }
254 }
255 
256 #endif /* LV_HAVE_AVX */
257 
258 
259 #ifdef LV_HAVE_SSE2
260 #include <emmintrin.h>
261 
262 static inline void
263 volk_32f_s32f_convert_32i_a_sse2(int32_t* outputVector, const float* inputVector,
264  const float scalar, unsigned int num_points)
265 {
266  unsigned int number = 0;
267 
268  const unsigned int quarterPoints = num_points / 4;
269 
270  const float* inputVectorPtr = (const float*)inputVector;
271  int32_t* outputVectorPtr = outputVector;
272 
273  float min_val = -2147483647;
274  float max_val = 2147483647;
275  float r;
276 
277  __m128 vScalar = _mm_set_ps1(scalar);
278  __m128 inputVal1;
279  __m128i intInputVal1;
280  __m128 vmin_val = _mm_set_ps1(min_val);
281  __m128 vmax_val = _mm_set_ps1(max_val);
282 
283  for(;number < quarterPoints; number++){
284  inputVal1 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
285 
286  inputVal1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
287  intInputVal1 = _mm_cvtps_epi32(inputVal1);
288 
289  _mm_store_si128((__m128i*)outputVectorPtr, intInputVal1);
290  outputVectorPtr += 4;
291  }
292 
293  number = quarterPoints * 4;
294  for(; number < num_points; number++){
295  r = inputVector[number] * scalar;
296  if(r > max_val)
297  r = max_val;
298  else if(r < min_val)
299  r = min_val;
300  outputVector[number] = (int32_t)(r);
301  }
302 }
303 
304 #endif /* LV_HAVE_SSE2 */
305 
306 
307 #ifdef LV_HAVE_SSE
308 #include <xmmintrin.h>
309 
310 static inline void
311 volk_32f_s32f_convert_32i_a_sse(int32_t* outputVector, const float* inputVector,
312  const float scalar, unsigned int num_points)
313 {
314  unsigned int number = 0;
315 
316  const unsigned int quarterPoints = num_points / 4;
317 
318  const float* inputVectorPtr = (const float*)inputVector;
319  int32_t* outputVectorPtr = outputVector;
320 
321  float min_val = -2147483647;
322  float max_val = 2147483647;
323  float r;
324 
325  __m128 vScalar = _mm_set_ps1(scalar);
326  __m128 ret;
327  __m128 vmin_val = _mm_set_ps1(min_val);
328  __m128 vmax_val = _mm_set_ps1(max_val);
329 
330  __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
331 
332  for(;number < quarterPoints; number++){
333  ret = _mm_load_ps(inputVectorPtr);
334  inputVectorPtr += 4;
335 
336  ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
337 
338  _mm_store_ps(outputFloatBuffer, ret);
339  *outputVectorPtr++ = (int32_t)(outputFloatBuffer[0]);
340  *outputVectorPtr++ = (int32_t)(outputFloatBuffer[1]);
341  *outputVectorPtr++ = (int32_t)(outputFloatBuffer[2]);
342  *outputVectorPtr++ = (int32_t)(outputFloatBuffer[3]);
343  }
344 
345  number = quarterPoints * 4;
346  for(; number < num_points; number++){
347  r = inputVector[number] * scalar;
348  if(r > max_val)
349  r = max_val;
350  else if(r < min_val)
351  r = min_val;
352  outputVector[number] = (int32_t)(r);
353  }
354 }
355 
356 #endif /* LV_HAVE_SSE */
357 
358 
359 #ifdef LV_HAVE_GENERIC
360 
361 static inline void
362 volk_32f_s32f_convert_32i_a_generic(int32_t* outputVector, const float* inputVector,
363  const float scalar, unsigned int num_points)
364 {
365  int32_t* outputVectorPtr = outputVector;
366  const float* inputVectorPtr = inputVector;
367  unsigned int number = 0;
368  float min_val = -2147483647;
369  float max_val = 2147483647;
370  float r;
371 
372  for(number = 0; number < num_points; number++){
373  r = *inputVectorPtr++ * scalar;
374  if(r > max_val)
375  r = max_val;
376  else if(r < min_val)
377  r = min_val;
378  *outputVectorPtr++ = (int32_t)(r);
379  }
380 }
381 
382 #endif /* LV_HAVE_GENERIC */
383 
384 #endif /* INCLUDED_volk_32f_s32f_convert_32i_a_H */
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:27
signed int int32_t
Definition: stdint.h:77