71 #ifndef INCLUDED_volk_32f_x2_min_32f_a_H
72 #define INCLUDED_volk_32f_x2_min_32f_a_H
78 #include <xmmintrin.h>
81 volk_32f_x2_min_32f_a_sse(
float* cVector,
const float* aVector,
82 const float* bVector,
unsigned int num_points)
84 unsigned int number = 0;
85 const unsigned int quarterPoints = num_points / 4;
87 float* cPtr = cVector;
88 const float* aPtr = aVector;
89 const float* bPtr= bVector;
91 __m128 aVal, bVal, cVal;
92 for(;number < quarterPoints; number++){
93 aVal = _mm_load_ps(aPtr);
94 bVal = _mm_load_ps(bPtr);
96 cVal = _mm_min_ps(aVal, bVal);
98 _mm_store_ps(cPtr,cVal);
105 number = quarterPoints * 4;
106 for(;number < num_points; number++){
107 const float a = *aPtr++;
108 const float b = *bPtr++;
109 *cPtr++ = ( a < b ? a : b);
116 #include <arm_neon.h>
119 volk_32f_x2_min_32f_neon(
float* cVector,
const float* aVector,
120 const float* bVector,
unsigned int num_points)
122 float* cPtr = cVector;
123 const float* aPtr = aVector;
124 const float* bPtr= bVector;
125 unsigned int number = 0;
126 unsigned int quarter_points = num_points / 4;
128 float32x4_t a_vec, b_vec, c_vec;
129 for(number = 0; number < quarter_points; number++){
130 a_vec = vld1q_f32(aPtr);
131 b_vec = vld1q_f32(bPtr);
133 c_vec = vminq_f32(a_vec, b_vec);
135 vst1q_f32(cPtr, c_vec);
141 for(number = quarter_points*4; number < num_points; number++){
142 const float a = *aPtr++;
143 const float b = *bPtr++;
144 *cPtr++ = ( a < b ? a : b);
150 #ifdef LV_HAVE_GENERIC
153 volk_32f_x2_min_32f_generic(
float* cVector,
const float* aVector,
154 const float* bVector,
unsigned int num_points)
156 float* cPtr = cVector;
157 const float* aPtr = aVector;
158 const float* bPtr= bVector;
159 unsigned int number = 0;
161 for(number = 0; number < num_points; number++){
162 const float a = *aPtr++;
163 const float b = *bPtr++;
164 *cPtr++ = ( a < b ? a : b);
173 volk_32f_x2_min_32f_a_orc_impl(
float* cVector,
const float* aVector,
174 const float* bVector,
unsigned int num_points);
177 volk_32f_x2_min_32f_u_orc(
float* cVector,
const float* aVector,
178 const float* bVector,
unsigned int num_points)
180 volk_32f_x2_min_32f_a_orc_impl(cVector, aVector, bVector, num_points);