GNU Radio Manual and C++ API Reference  3.7.7
The Free & Open Software Radio Ecosystem
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
volk_32f_s32f_convert_16i.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
23 /*!
24  * \page volk_32f_s32f_convert_16i
25  *
26  * \b Overview
27  *
28  * Converts a floating point number to a 16-bit short after applying a
29  * scaling factor.
30  *
31  * <b>Dispatcher Prototype</b>
32  * \code
33  * void volk_32f_s32f_convert_16i(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points)
34  * \endcode
35  *
36  * \b Inputs
37  * \li inputVector: the input vector of floats.
38  * \li scalar: The value multiplied against each point in the input buffer.
39  * \li num_points: The number of data points.
40  *
41  * \b Outputs
42  * \li outputVector: The output vector.
43  *
44  * \b Example
45  * Convert floats from [-1,1] to 16-bit integers with a scale of 5 to maintain smallest delta
46  * int N = 10;
47  * unsigned int alignment = volk_get_alignment();
48  * float* increasing = (float*)volk_malloc(sizeof(float)*N, alignment);
49  * int16_t* out = (int16_t*)volk_malloc(sizeof(int16_t)*N, alignment);
50  *
51  * for(unsigned int ii = 0; ii < N; ++ii){
52  * increasing[ii] = 2.f * ((float)ii / (float)N) - 1.f;
53  * }
54  *
55  * // Normalize by the smallest delta (0.2 in this example)
56  * float scale = 5.f;
57  *
58  * volk_32f_s32f_convert_32i(out, increasing, scale, N);
59  *
60  * for(unsigned int ii = 0; ii < N; ++ii){
61  * printf("out[%u] = %i\n", ii, out[ii]);
62  * }
63  *
64  * volk_free(increasing);
65  * volk_free(out);
66  * \endcode
67  */
68 
69 #ifndef INCLUDED_volk_32f_s32f_convert_16i_u_H
70 #define INCLUDED_volk_32f_s32f_convert_16i_u_H
71 
72 #include <inttypes.h>
73 #include <stdio.h>
74 #include <math.h>
75 
76 #ifdef LV_HAVE_AVX
77 #include <immintrin.h>
78 
79 static inline void
80 volk_32f_s32f_convert_16i_u_avx(int16_t* outputVector, const float* inputVector,
81  const float scalar, unsigned int num_points)
82 {
83  unsigned int number = 0;
84 
85  const unsigned int eighthPoints = num_points / 8;
86 
87  const float* inputVectorPtr = (const float*)inputVector;
88  int16_t* outputVectorPtr = outputVector;
89 
90  float min_val = -32768;
91  float max_val = 32767;
92  float r;
93 
94  __m256 vScalar = _mm256_set1_ps(scalar);
95  __m256 inputVal, ret;
96  __m256i intInputVal;
97  __m128i intInputVal1, intInputVal2;
98  __m256 vmin_val = _mm256_set1_ps(min_val);
99  __m256 vmax_val = _mm256_set1_ps(max_val);
100 
101  for(;number < eighthPoints; number++){
102  inputVal = _mm256_loadu_ps(inputVectorPtr); inputVectorPtr += 8;
103 
104  // Scale and clip
105  ret = _mm256_max_ps(_mm256_min_ps(_mm256_mul_ps(inputVal, vScalar), vmax_val), vmin_val);
106 
107  intInputVal = _mm256_cvtps_epi32(ret);
108 
109  intInputVal1 = _mm256_extractf128_si256(intInputVal, 0);
110  intInputVal2 = _mm256_extractf128_si256(intInputVal, 1);
111 
112  intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
113 
114  _mm_storeu_si128((__m128i*)outputVectorPtr, intInputVal1);
115  outputVectorPtr += 8;
116  }
117 
118  number = eighthPoints * 8;
119  for(; number < num_points; number++){
120  r = inputVector[number] * scalar;
121  if(r > max_val)
122  r = max_val;
123  else if(r < min_val)
124  r = min_val;
125  outputVector[number] = (int16_t)rintf(r);
126  }
127 }
128 #endif /* LV_HAVE_AVX */
129 
130 
131 #ifdef LV_HAVE_SSE2
132 #include <emmintrin.h>
133 
134 static inline void
135 volk_32f_s32f_convert_16i_u_sse2(int16_t* outputVector, const float* inputVector,
136  const float scalar, unsigned int num_points)
137 {
138  unsigned int number = 0;
139 
140  const unsigned int eighthPoints = num_points / 8;
141 
142  const float* inputVectorPtr = (const float*)inputVector;
143  int16_t* outputVectorPtr = outputVector;
144 
145  float min_val = -32768;
146  float max_val = 32767;
147  float r;
148 
149  __m128 vScalar = _mm_set_ps1(scalar);
150  __m128 inputVal1, inputVal2;
151  __m128i intInputVal1, intInputVal2;
152  __m128 ret1, ret2;
153  __m128 vmin_val = _mm_set_ps1(min_val);
154  __m128 vmax_val = _mm_set_ps1(max_val);
155 
156  for(;number < eighthPoints; number++){
157  inputVal1 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
158  inputVal2 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
159 
160  // Scale and clip
161  ret1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
162  ret2 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal2, vScalar), vmax_val), vmin_val);
163 
164  intInputVal1 = _mm_cvtps_epi32(ret1);
165  intInputVal2 = _mm_cvtps_epi32(ret2);
166 
167  intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
168 
169  _mm_storeu_si128((__m128i*)outputVectorPtr, intInputVal1);
170  outputVectorPtr += 8;
171  }
172 
173  number = eighthPoints * 8;
174  for(; number < num_points; number++){
175  r = inputVector[number] * scalar;
176  if(r > max_val)
177  r = max_val;
178  else if(r < min_val)
179  r = min_val;
180  outputVector[number] = (int16_t)rintf(r);
181  }
182 }
183 #endif /* LV_HAVE_SSE2 */
184 
185 
186 #ifdef LV_HAVE_SSE
187 #include <xmmintrin.h>
188 
189 static inline void
190 volk_32f_s32f_convert_16i_u_sse(int16_t* outputVector, const float* inputVector,
191  const float scalar, unsigned int num_points)
192 {
193  unsigned int number = 0;
194 
195  const unsigned int quarterPoints = num_points / 4;
196 
197  const float* inputVectorPtr = (const float*)inputVector;
198  int16_t* outputVectorPtr = outputVector;
199 
200  float min_val = -32768;
201  float max_val = 32767;
202  float r;
203 
204  __m128 vScalar = _mm_set_ps1(scalar);
205  __m128 ret;
206  __m128 vmin_val = _mm_set_ps1(min_val);
207  __m128 vmax_val = _mm_set_ps1(max_val);
208 
209  __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
210 
211  for(;number < quarterPoints; number++){
212  ret = _mm_loadu_ps(inputVectorPtr);
213  inputVectorPtr += 4;
214 
215  // Scale and clip
216  ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
217 
218  _mm_store_ps(outputFloatBuffer, ret);
219  *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[0]);
220  *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[1]);
221  *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[2]);
222  *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[3]);
223  }
224 
225  number = quarterPoints * 4;
226  for(; number < num_points; number++){
227  r = inputVector[number] * scalar;
228  if(r > max_val)
229  r = max_val;
230  else if(r < min_val)
231  r = min_val;
232  outputVector[number] = (int16_t)rintf(r);
233  }
234 }
235 #endif /* LV_HAVE_SSE */
236 
237 
238 #ifdef LV_HAVE_GENERIC
239 
240 static inline void
241 volk_32f_s32f_convert_16i_generic(int16_t* outputVector, const float* inputVector,
242  const float scalar, unsigned int num_points)
243 {
244  int16_t* outputVectorPtr = outputVector;
245  const float* inputVectorPtr = inputVector;
246  unsigned int number = 0;
247  float min_val = -32768;
248  float max_val = 32767;
249  float r;
250 
251  for(number = 0; number < num_points; number++){
252  r = *inputVectorPtr++ * scalar;
253  if(r > max_val)
254  r = max_val;
255  else if(r < min_val)
256  r = min_val;
257  *outputVectorPtr++ = (int16_t)rintf(r);
258  }
259 }
260 #endif /* LV_HAVE_GENERIC */
261 
262 
263 #endif /* INCLUDED_volk_32f_s32f_convert_16i_u_H */
264 #ifndef INCLUDED_volk_32f_s32f_convert_16i_a_H
265 #define INCLUDED_volk_32f_s32f_convert_16i_a_H
266 
267 #include <volk/volk_common.h>
268 #include <inttypes.h>
269 #include <stdio.h>
270 #include <math.h>
271 
272 #ifdef LV_HAVE_AVX
273 #include <immintrin.h>
274 
275 static inline void
276 volk_32f_s32f_convert_16i_a_avx(int16_t* outputVector, const float* inputVector,
277  const float scalar, unsigned int num_points)
278 {
279  unsigned int number = 0;
280 
281  const unsigned int eighthPoints = num_points / 8;
282 
283  const float* inputVectorPtr = (const float*)inputVector;
284  int16_t* outputVectorPtr = outputVector;
285 
286  float min_val = -32768;
287  float max_val = 32767;
288  float r;
289 
290  __m256 vScalar = _mm256_set1_ps(scalar);
291  __m256 inputVal, ret;
292  __m256i intInputVal;
293  __m128i intInputVal1, intInputVal2;
294  __m256 vmin_val = _mm256_set1_ps(min_val);
295  __m256 vmax_val = _mm256_set1_ps(max_val);
296 
297  for(;number < eighthPoints; number++){
298  inputVal = _mm256_load_ps(inputVectorPtr); inputVectorPtr += 8;
299 
300  // Scale and clip
301  ret = _mm256_max_ps(_mm256_min_ps(_mm256_mul_ps(inputVal, vScalar), vmax_val), vmin_val);
302 
303  intInputVal = _mm256_cvtps_epi32(ret);
304 
305  intInputVal1 = _mm256_extractf128_si256(intInputVal, 0);
306  intInputVal2 = _mm256_extractf128_si256(intInputVal, 1);
307 
308  intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
309 
310  _mm_store_si128((__m128i*)outputVectorPtr, intInputVal1);
311  outputVectorPtr += 8;
312  }
313 
314  number = eighthPoints * 8;
315  for(; number < num_points; number++){
316  r = inputVector[number] * scalar;
317  if(r > max_val)
318  r = max_val;
319  else if(r < min_val)
320  r = min_val;
321  outputVector[number] = (int16_t)rintf(r);
322  }
323 }
324 #endif /* LV_HAVE_AVX */
325 
326 #ifdef LV_HAVE_SSE2
327 #include <emmintrin.h>
328 
329 static inline void
330 volk_32f_s32f_convert_16i_a_sse2(int16_t* outputVector, const float* inputVector,
331  const float scalar, unsigned int num_points)
332 {
333  unsigned int number = 0;
334 
335  const unsigned int eighthPoints = num_points / 8;
336 
337  const float* inputVectorPtr = (const float*)inputVector;
338  int16_t* outputVectorPtr = outputVector;
339 
340  float min_val = -32768;
341  float max_val = 32767;
342  float r;
343 
344  __m128 vScalar = _mm_set_ps1(scalar);
345  __m128 inputVal1, inputVal2;
346  __m128i intInputVal1, intInputVal2;
347  __m128 ret1, ret2;
348  __m128 vmin_val = _mm_set_ps1(min_val);
349  __m128 vmax_val = _mm_set_ps1(max_val);
350 
351  for(;number < eighthPoints; number++){
352  inputVal1 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
353  inputVal2 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
354 
355  // Scale and clip
356  ret1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
357  ret2 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal2, vScalar), vmax_val), vmin_val);
358 
359  intInputVal1 = _mm_cvtps_epi32(ret1);
360  intInputVal2 = _mm_cvtps_epi32(ret2);
361 
362  intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
363 
364  _mm_store_si128((__m128i*)outputVectorPtr, intInputVal1);
365  outputVectorPtr += 8;
366  }
367 
368  number = eighthPoints * 8;
369  for(; number < num_points; number++){
370  r = inputVector[number] * scalar;
371  if(r > max_val)
372  r = max_val;
373  else if(r < min_val)
374  r = min_val;
375  outputVector[number] = (int16_t)rintf(r);
376  }
377 }
378 #endif /* LV_HAVE_SSE2 */
379 
380 
381 #ifdef LV_HAVE_SSE
382 #include <xmmintrin.h>
383 
384 static inline void
385 volk_32f_s32f_convert_16i_a_sse(int16_t* outputVector, const float* inputVector,
386  const float scalar, unsigned int num_points)
387 {
388  unsigned int number = 0;
389 
390  const unsigned int quarterPoints = num_points / 4;
391 
392  const float* inputVectorPtr = (const float*)inputVector;
393  int16_t* outputVectorPtr = outputVector;
394 
395  float min_val = -32768;
396  float max_val = 32767;
397  float r;
398 
399  __m128 vScalar = _mm_set_ps1(scalar);
400  __m128 ret;
401  __m128 vmin_val = _mm_set_ps1(min_val);
402  __m128 vmax_val = _mm_set_ps1(max_val);
403 
404  __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
405 
406  for(;number < quarterPoints; number++){
407  ret = _mm_load_ps(inputVectorPtr);
408  inputVectorPtr += 4;
409 
410  // Scale and clip
411  ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
412 
413  _mm_store_ps(outputFloatBuffer, ret);
414  *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[0]);
415  *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[1]);
416  *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[2]);
417  *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[3]);
418  }
419 
420  number = quarterPoints * 4;
421  for(; number < num_points; number++){
422  r = inputVector[number] * scalar;
423  if(r > max_val)
424  r = max_val;
425  else if(r < min_val)
426  r = min_val;
427  outputVector[number] = (int16_t)rintf(r);
428  }
429 }
430 #endif /* LV_HAVE_SSE */
431 
432 
433 #ifdef LV_HAVE_GENERIC
434 
435 static inline void
436 volk_32f_s32f_convert_16i_a_generic(int16_t* outputVector, const float* inputVector,
437  const float scalar, unsigned int num_points)
438 {
439  int16_t* outputVectorPtr = outputVector;
440  const float* inputVectorPtr = inputVector;
441  unsigned int number = 0;
442  float min_val = -32768;
443  float max_val = 32767;
444  float r;
445 
446  for(number = 0; number < num_points; number++){
447  r = *inputVectorPtr++ * scalar;
448  if(r < min_val)
449  r = min_val;
450  else if(r > max_val)
451  r = max_val;
452  *outputVectorPtr++ = (int16_t)rintf(r);
453  }
454 }
455 #endif /* LV_HAVE_GENERIC */
456 
457 #endif /* INCLUDED_volk_32f_s32f_convert_16i_a_H */
signed short int16_t
Definition: stdint.h:76
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:27
static float rintf(float x)
Definition: volk/cmake/msvc/config.h:30