GNU Radio Manual and C++ API Reference  3.7.7
The Free & Open Software Radio Ecosystem
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
volk_32f_sqrt_32f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
23 /*!
24  * \page volk_32f_sqrt_32f
25  *
26  * \b Overview
27  *
28  * Computes the square root of the input vector and stores the results
29  * in the output vector.
30  *
31  * <b>Dispatcher Prototype</b>
32  * \code
33  * void volk_32f_sqrt_32f(float* cVector, const float* aVector, unsigned int num_points)
34  * \endcode
35  *
36  * \b Inputs
37  * \li aVector: The input vector of floats.
38  * \li num_points: The number of data points.
39  *
40  * \b Outputs
41  * \li bVector: The output vector.
42  *
43  * \b Example
44  * \code
45  int N = 10;
46  unsigned int alignment = volk_get_alignment();
47  float* in = (float*)volk_malloc(sizeof(float)*N, alignment);
48  float* out = (float*)volk_malloc(sizeof(float)*N, alignment);
49 
50  for(unsigned int ii = 0; ii < N; ++ii){
51  in[ii] = (float)(ii*ii);
52  }
53 
54  volk_32f_sqrt_32f(out, in, N);
55 
56  for(unsigned int ii = 0; ii < N; ++ii){
57  printf("out(%i) = %f\n", ii, out[ii]);
58  }
59 
60  volk_free(in);
61  volk_free(out);
62  * \endcode
63  */
64 
65 #ifndef INCLUDED_volk_32f_sqrt_32f_a_H
66 #define INCLUDED_volk_32f_sqrt_32f_a_H
67 
68 #include <inttypes.h>
69 #include <stdio.h>
70 #include <math.h>
71 
72 #ifdef LV_HAVE_SSE
73 #include <xmmintrin.h>
74 
75 static inline void
76 volk_32f_sqrt_32f_a_sse(float* cVector, const float* aVector, unsigned int num_points)
77 {
78  unsigned int number = 0;
79  const unsigned int quarterPoints = num_points / 4;
80 
81  float* cPtr = cVector;
82  const float* aPtr = aVector;
83 
84  __m128 aVal, cVal;
85  for(;number < quarterPoints; number++) {
86  aVal = _mm_load_ps(aPtr);
87 
88  cVal = _mm_sqrt_ps(aVal);
89 
90  _mm_store_ps(cPtr,cVal); // Store the results back into the C container
91 
92  aPtr += 4;
93  cPtr += 4;
94  }
95 
96  number = quarterPoints * 4;
97  for(;number < num_points; number++) {
98  *cPtr++ = sqrtf(*aPtr++);
99  }
100 }
101 
102 #endif /* LV_HAVE_SSE */
103 
104 
105 #ifdef LV_HAVE_NEON
106 #include <arm_neon.h>
107 
108 static inline void
109 volk_32f_sqrt_32f_neon(float* cVector, const float* aVector, unsigned int num_points)
110 {
111  float* cPtr = cVector;
112  const float* aPtr = aVector;
113  unsigned int number = 0;
114  unsigned int quarter_points = num_points / 4;
115  float32x4_t in_vec, out_vec;
116 
117  for(number = 0; number < quarter_points; number++) {
118  in_vec = vld1q_f32(aPtr);
119  // note that armv8 has vsqrt_f32 which will be much better
120  out_vec = vrecpeq_f32(vrsqrteq_f32(in_vec) );
121  vst1q_f32(cPtr, out_vec);
122  aPtr += 4;
123  cPtr += 4;
124  }
125 
126  for(number = quarter_points * 4; number < num_points; number++) {
127  *cPtr++ = sqrtf(*aPtr++);
128  }
129 }
130 
131 #endif /* LV_HAVE_NEON */
132 
133 
134 #ifdef LV_HAVE_GENERIC
135 
136 static inline void
137 volk_32f_sqrt_32f_generic(float* cVector, const float* aVector, unsigned int num_points)
138 {
139  float* cPtr = cVector;
140  const float* aPtr = aVector;
141  unsigned int number = 0;
142 
143  for(number = 0; number < num_points; number++) {
144  *cPtr++ = sqrtf(*aPtr++);
145  }
146 }
147 
148 #endif /* LV_HAVE_GENERIC */
149 
150 
151 #ifdef LV_HAVE_ORC
152 
153 extern void
154 volk_32f_sqrt_32f_a_orc_impl(float *, const float*, unsigned int);
155 
156 static inline void
157 volk_32f_sqrt_32f_u_orc(float* cVector, const float* aVector, unsigned int num_points)
158 {
159  volk_32f_sqrt_32f_a_orc_impl(cVector, aVector, num_points);
160 }
161 
162 #endif /* LV_HAVE_ORC */
163 
164 #endif /* INCLUDED_volk_32f_sqrt_32f_a_H */