68 #ifndef INCLUDED_volk_32fc_conjugate_32fc_u_H
69 #define INCLUDED_volk_32fc_conjugate_32fc_u_H
77 #include <immintrin.h>
80 volk_32fc_conjugate_32fc_u_avx(
lv_32fc_t* cVector,
const lv_32fc_t* aVector,
unsigned int num_points)
82 unsigned int number = 0;
83 const unsigned int quarterPoints = num_points / 4;
89 __m256 conjugator = _mm256_setr_ps(0, -0.f, 0, -0.f, 0, -0.f, 0, -0.f);
91 for(;number < quarterPoints; number++){
93 x = _mm256_loadu_ps((
float*)a);
95 x = _mm256_xor_ps(x, conjugator);
97 _mm256_storeu_ps((
float*)c,x);
103 number = quarterPoints * 4;
105 for(;number < num_points; number++) {
112 #include <pmmintrin.h>
115 volk_32fc_conjugate_32fc_u_sse3(
lv_32fc_t* cVector,
const lv_32fc_t* aVector,
unsigned int num_points)
117 unsigned int number = 0;
118 const unsigned int halfPoints = num_points / 2;
124 __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f);
126 for(;number < halfPoints; number++){
128 x = _mm_loadu_ps((
float*)a);
130 x = _mm_xor_ps(x, conjugator);
132 _mm_storeu_ps((
float*)c,x);
138 if((num_points % 2) != 0) {
144 #ifdef LV_HAVE_GENERIC
147 volk_32fc_conjugate_32fc_generic(
lv_32fc_t* cVector,
const lv_32fc_t* aVector,
unsigned int num_points)
151 unsigned int number = 0;
153 for(number = 0; number < num_points; number++){
161 #ifndef INCLUDED_volk_32fc_conjugate_32fc_a_H
162 #define INCLUDED_volk_32fc_conjugate_32fc_a_H
170 #include <immintrin.h>
173 volk_32fc_conjugate_32fc_a_avx(
lv_32fc_t* cVector,
const lv_32fc_t* aVector,
unsigned int num_points)
175 unsigned int number = 0;
176 const unsigned int quarterPoints = num_points / 4;
182 __m256 conjugator = _mm256_setr_ps(0, -0.f, 0, -0.f, 0, -0.f, 0, -0.f);
184 for(;number < quarterPoints; number++){
186 x = _mm256_load_ps((
float*)a);
188 x = _mm256_xor_ps(x, conjugator);
190 _mm256_store_ps((
float*)c,x);
196 number = quarterPoints * 4;
198 for(;number < num_points; number++) {
205 #include <pmmintrin.h>
208 volk_32fc_conjugate_32fc_a_sse3(
lv_32fc_t* cVector,
const lv_32fc_t* aVector,
unsigned int num_points)
210 unsigned int number = 0;
211 const unsigned int halfPoints = num_points / 2;
217 __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f);
219 for(;number < halfPoints; number++){
221 x = _mm_load_ps((
float*)a);
223 x = _mm_xor_ps(x, conjugator);
225 _mm_store_ps((
float*)c,x);
231 if((num_points % 2) != 0) {
238 #include <arm_neon.h>
241 volk_32fc_conjugate_32fc_a_neon(
lv_32fc_t* cVector,
const lv_32fc_t* aVector,
unsigned int num_points)
244 const unsigned int quarterPoints = num_points / 4;
250 for(number=0; number < quarterPoints; number++){
251 __builtin_prefetch(a+4);
252 x = vld2q_f32((
float*)a);
255 x.val[1] = vnegq_f32( x.val[1]);
257 vst2q_f32((
float*)c,x);
263 for(number=quarterPoints*4; number < num_points; number++){
270 #ifdef LV_HAVE_GENERIC
273 volk_32fc_conjugate_32fc_a_generic(
lv_32fc_t* cVector,
const lv_32fc_t* aVector,
unsigned int num_points)
277 unsigned int number = 0;
279 for(number = 0; number < num_points; number++){
#define lv_conj(x)
Definition: volk_complex.h:80
float complex lv_32fc_t
Definition: volk_complex.h:56