70 #ifndef INCLUDED_volk_32fc_deinterleave_imag_32f_a_H
71 #define INCLUDED_volk_32fc_deinterleave_imag_32f_a_H
77 #include <immintrin.h>
80 volk_32fc_deinterleave_imag_32f_a_avx(
float* qBuffer,
const lv_32fc_t* complexVector,
81 unsigned int num_points)
83 unsigned int number = 0;
84 const unsigned int eighthPoints = num_points / 8;
85 const float* complexVectorPtr = (
const float*)complexVector;
86 float* qBufferPtr = qBuffer;
88 __m256 cplxValue1, cplxValue2, complex1, complex2, qValue;
89 for(;number < eighthPoints; number++){
91 cplxValue1 = _mm256_load_ps(complexVectorPtr);
92 complexVectorPtr += 8;
94 cplxValue2 = _mm256_load_ps(complexVectorPtr);
95 complexVectorPtr += 8;
97 complex1 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
98 complex2 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
101 qValue = _mm256_shuffle_ps(complex1, complex2, 0xdd);
103 _mm256_store_ps(qBufferPtr, qValue);
108 number = eighthPoints * 8;
109 for(; number < num_points; number++){
111 *qBufferPtr++ = *complexVectorPtr++;
117 #include <xmmintrin.h>
120 volk_32fc_deinterleave_imag_32f_a_sse(
float* qBuffer,
const lv_32fc_t* complexVector,
121 unsigned int num_points)
123 unsigned int number = 0;
124 const unsigned int quarterPoints = num_points / 4;
126 const float* complexVectorPtr = (
const float*)complexVector;
127 float* qBufferPtr = qBuffer;
129 __m128 cplxValue1, cplxValue2, iValue;
130 for(;number < quarterPoints; number++){
132 cplxValue1 = _mm_load_ps(complexVectorPtr);
133 complexVectorPtr += 4;
135 cplxValue2 = _mm_load_ps(complexVectorPtr);
136 complexVectorPtr += 4;
139 iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
141 _mm_store_ps(qBufferPtr, iValue);
146 number = quarterPoints * 4;
147 for(; number < num_points; number++){
149 *qBufferPtr++ = *complexVectorPtr++;
155 #include <arm_neon.h>
158 volk_32fc_deinterleave_imag_32f_neon(
float* qBuffer,
const lv_32fc_t* complexVector,
159 unsigned int num_points)
161 unsigned int number = 0;
162 unsigned int quarter_points = num_points / 4;
163 const float* complexVectorPtr = (
float*)complexVector;
164 float* qBufferPtr = qBuffer;
165 float32x4x2_t complexInput;
167 for(number = 0; number < quarter_points; number++){
168 complexInput = vld2q_f32(complexVectorPtr);
169 vst1q_f32( qBufferPtr, complexInput.val[1] );
170 complexVectorPtr += 8;
174 for(number = quarter_points*4; number < num_points; number++){
176 *qBufferPtr++ = *complexVectorPtr++;
181 #ifdef LV_HAVE_GENERIC
184 volk_32fc_deinterleave_imag_32f_generic(
float* qBuffer,
const lv_32fc_t* complexVector,
185 unsigned int num_points)
187 unsigned int number = 0;
188 const float* complexVectorPtr = (
float*)complexVector;
189 float* qBufferPtr = qBuffer;
190 for(number = 0; number < num_points; number++){
192 *qBufferPtr++ = *complexVectorPtr++;
float complex lv_32fc_t
Definition: volk_complex.h:56