71 #ifndef INCLUDED_volk_32f_x2_subtract_32f_a_H
72 #define INCLUDED_volk_32f_x2_subtract_32f_a_H
78 #include <xmmintrin.h>
81 volk_32f_x2_subtract_32f_a_sse(
float* cVector,
const float* aVector,
82 const float* bVector,
unsigned int num_points)
84 unsigned int number = 0;
85 const unsigned int quarterPoints = num_points / 4;
87 float* cPtr = cVector;
88 const float* aPtr = aVector;
89 const float* bPtr = bVector;
91 __m128 aVal, bVal, cVal;
92 for(;number < quarterPoints; number++){
94 aVal = _mm_load_ps(aPtr);
95 bVal = _mm_load_ps(bPtr);
97 cVal = _mm_sub_ps(aVal, bVal);
99 _mm_store_ps(cPtr,cVal);
106 number = quarterPoints * 4;
107 for(;number < num_points; number++){
108 *cPtr++ = (*aPtr++) - (*bPtr++);
114 #ifdef LV_HAVE_GENERIC
117 volk_32f_x2_subtract_32f_generic(
float* cVector,
const float* aVector,
118 const float* bVector,
unsigned int num_points)
120 float* cPtr = cVector;
121 const float* aPtr = aVector;
122 const float* bPtr = bVector;
123 unsigned int number = 0;
125 for(number = 0; number < num_points; number++){
126 *cPtr++ = (*aPtr++) - (*bPtr++);
133 #include <arm_neon.h>
136 volk_32f_x2_subtract_32f_neon(
float* cVector,
const float* aVector,
137 const float* bVector,
unsigned int num_points)
139 float* cPtr = cVector;
140 const float* aPtr = aVector;
141 const float* bPtr = bVector;
142 unsigned int number = 0;
143 unsigned int quarter_points = num_points / 4;
145 float32x4_t a_vec, b_vec, c_vec;
147 for(number = 0; number < quarter_points; number++){
148 a_vec = vld1q_f32(aPtr);
149 b_vec = vld1q_f32(bPtr);
150 c_vec = vsubq_f32(a_vec, b_vec);
151 vst1q_f32(cPtr, c_vec);
157 for(number = quarter_points * 4; number < num_points; number++){
158 *cPtr++ = (*aPtr++) - (*bPtr++);
166 volk_32f_x2_subtract_32f_a_orc_impl(
float* cVector,
const float* aVector,
167 const float* bVector,
unsigned int num_points);
170 volk_32f_x2_subtract_32f_u_orc(
float* cVector,
const float* aVector,
171 const float* bVector,
unsigned int num_points)
173 volk_32f_x2_subtract_32f_a_orc_impl(cVector, aVector, bVector, num_points);