GNU Radio Manual and C++ API Reference  3.7.7
The Free & Open Software Radio Ecosystem
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
volk_16ic_s32f_deinterleave_32f_x2.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
23 /*!
24  * \page volk_16ic_s32f_deinterleave_32f_x2
25  *
26  * \b Overview
27  *
28  * Deinterleaves the complex 16 bit vector into I & Q vector data and
29  * returns the result as two vectors of floats that have been scaled.
30  *
31  * <b>Dispatcher Prototype</b>
32  * \code
33  * void volk_16ic_s32f_deinterleave_32f_x2(float* iBuffer, float* qBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){
34  * \endcode
35  *
36  * \b Inputs
37  * \li complexVector: The complex input vector of 16-bit shorts.
38  * \li scalar: The value to be divided against each sample of the input complex vector.
39  * \li num_points: The number of complex data values to be deinterleaved.
40  *
41  * \b Outputs
42  * \li iBuffer: The floating point I buffer output data.
43  * \li qBuffer: The floating point Q buffer output data.
44  *
45  * \b Example
46  * \code
47  * int N = 10000;
48  *
49  * volk_16ic_s32f_deinterleave_32f_x2();
50  *
51  * volk_free(x);
52  * volk_free(t);
53  * \endcode
54  */
55 
56 #ifndef INCLUDED_volk_16ic_s32f_deinterleave_32f_x2_a_H
57 #define INCLUDED_volk_16ic_s32f_deinterleave_32f_x2_a_H
58 
59 #include <volk/volk_common.h>
60 #include <inttypes.h>
61 #include <stdio.h>
62 
63 #ifdef LV_HAVE_SSE
64 #include <xmmintrin.h>
65 
66 static inline
67 void volk_16ic_s32f_deinterleave_32f_x2_a_sse(float* iBuffer, float* qBuffer, const lv_16sc_t* complexVector,
68  const float scalar, unsigned int num_points)
69 {
70  float* iBufferPtr = iBuffer;
71  float* qBufferPtr = qBuffer;
72 
73  uint64_t number = 0;
74  const uint64_t quarterPoints = num_points / 4;
75  __m128 cplxValue1, cplxValue2, iValue, qValue;
76 
77  __m128 invScalar = _mm_set_ps1(1.0/scalar);
78  int16_t* complexVectorPtr = (int16_t*)complexVector;
79 
80  __VOLK_ATTR_ALIGNED(16) float floatBuffer[8];
81 
82  for(;number < quarterPoints; number++){
83 
84  floatBuffer[0] = (float)(complexVectorPtr[0]);
85  floatBuffer[1] = (float)(complexVectorPtr[1]);
86  floatBuffer[2] = (float)(complexVectorPtr[2]);
87  floatBuffer[3] = (float)(complexVectorPtr[3]);
88 
89  floatBuffer[4] = (float)(complexVectorPtr[4]);
90  floatBuffer[5] = (float)(complexVectorPtr[5]);
91  floatBuffer[6] = (float)(complexVectorPtr[6]);
92  floatBuffer[7] = (float)(complexVectorPtr[7]);
93 
94  cplxValue1 = _mm_load_ps(&floatBuffer[0]);
95  cplxValue2 = _mm_load_ps(&floatBuffer[4]);
96 
97  complexVectorPtr += 8;
98 
99  cplxValue1 = _mm_mul_ps(cplxValue1, invScalar);
100  cplxValue2 = _mm_mul_ps(cplxValue2, invScalar);
101 
102  // Arrange in i1i2i3i4 format
103  iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
104  // Arrange in q1q2q3q4 format
105  qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
106 
107  _mm_store_ps(iBufferPtr, iValue);
108  _mm_store_ps(qBufferPtr, qValue);
109 
110  iBufferPtr += 4;
111  qBufferPtr += 4;
112  }
113 
114  number = quarterPoints * 4;
115  complexVectorPtr = (int16_t*)&complexVector[number];
116  for(; number < num_points; number++){
117  *iBufferPtr++ = (float)(*complexVectorPtr++) / scalar;
118  *qBufferPtr++ = (float)(*complexVectorPtr++) / scalar;
119  }
120 }
121 #endif /* LV_HAVE_SSE */
122 
123 #ifdef LV_HAVE_GENERIC
124 
125 static inline void
126 volk_16ic_s32f_deinterleave_32f_x2_generic(float* iBuffer, float* qBuffer, const lv_16sc_t* complexVector,
127  const float scalar, unsigned int num_points)
128 {
129  const int16_t* complexVectorPtr = (const int16_t*)complexVector;
130  float* iBufferPtr = iBuffer;
131  float* qBufferPtr = qBuffer;
132  unsigned int number;
133  for(number = 0; number < num_points; number++){
134  *iBufferPtr++ = (float)(*complexVectorPtr++) / scalar;
135  *qBufferPtr++ = (float)(*complexVectorPtr++) / scalar;
136  }
137 }
138 #endif /* LV_HAVE_GENERIC */
139 
140 #ifdef LV_HAVE_NEON
141 
142 static inline void
143 volk_16ic_s32f_deinterleave_32f_x2_neon(float* iBuffer, float* qBuffer, const lv_16sc_t* complexVector,
144  const float scalar, unsigned int num_points)
145 {
146  const int16_t* complexVectorPtr = (const int16_t*)complexVector;
147  float* iBufferPtr = iBuffer;
148  float* qBufferPtr = qBuffer;
149  unsigned int eighth_points = num_points / 4;
150  unsigned int number;
151  float iScalar = 1.f/scalar;
152  float32x4_t invScalar;
153  invScalar = vld1q_dup_f32(&iScalar);
154 
155  int16x4x2_t complexInput_s16;
156  int32x4x2_t complexInput_s32;
157  float32x4x2_t complexFloat;
158 
159  for(number = 0; number < eighth_points; number++){
160  complexInput_s16 = vld2_s16(complexVectorPtr);
161  complexInput_s32.val[0] = vmovl_s16(complexInput_s16.val[0]);
162  complexInput_s32.val[1] = vmovl_s16(complexInput_s16.val[1]);
163  complexFloat.val[0] = vcvtq_f32_s32(complexInput_s32.val[0]);
164  complexFloat.val[1] = vcvtq_f32_s32(complexInput_s32.val[1]);
165  complexFloat.val[0] = vmulq_f32(complexFloat.val[0], invScalar);
166  complexFloat.val[1] = vmulq_f32(complexFloat.val[1], invScalar);
167  vst1q_f32(iBufferPtr, complexFloat.val[0]);
168  vst1q_f32(qBufferPtr, complexFloat.val[1]);
169  complexVectorPtr += 8;
170  iBufferPtr += 4;
171  qBufferPtr += 4;
172  }
173 
174  for(number = eighth_points*4; number < num_points; number++){
175  *iBufferPtr++ = (float)(*complexVectorPtr++) / scalar;
176  *qBufferPtr++ = (float)(*complexVectorPtr++) / scalar;
177  }
178 }
179 #endif /* LV_HAVE_GENERIC */
180 
181 #ifdef LV_HAVE_ORC
182 extern void
183 volk_16ic_s32f_deinterleave_32f_x2_a_orc_impl(float* iBuffer, float* qBuffer, const lv_16sc_t* complexVector,
184  const float scalar, unsigned int num_points);
185 
186 static inline void
187 volk_16ic_s32f_deinterleave_32f_x2_u_orc(float* iBuffer, float* qBuffer, const lv_16sc_t* complexVector,
188  const float scalar, unsigned int num_points)
189 {
190  volk_16ic_s32f_deinterleave_32f_x2_a_orc_impl(iBuffer, qBuffer, complexVector, scalar, num_points);
191 }
192 #endif /* LV_HAVE_ORC */
193 
194 
195 #endif /* INCLUDED_volk_16ic_s32f_deinterleave_32f_x2_a_H */
short complex lv_16sc_t
Definition: volk_complex.h:53
signed short int16_t
Definition: stdint.h:76
unsigned __int64 uint64_t
Definition: stdint.h:90
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:27