70 #ifndef INCLUDED_volk_32f_s32f_convert_32i_u_H
71 #define INCLUDED_volk_32f_s32f_convert_32i_u_H
77 #include <emmintrin.h>
80 volk_32f_s32f_convert_32i_u_sse2(
int32_t* outputVector,
const float* inputVector,
81 const float scalar,
unsigned int num_points)
83 unsigned int number = 0;
85 const unsigned int quarterPoints = num_points / 4;
87 const float* inputVectorPtr = (
const float*)inputVector;
88 int32_t* outputVectorPtr = outputVector;
90 float min_val = -2147483647;
91 float max_val = 2147483647;
94 __m128 vScalar = _mm_set_ps1(scalar);
97 __m128 vmin_val = _mm_set_ps1(min_val);
98 __m128 vmax_val = _mm_set_ps1(max_val);
100 for(;number < quarterPoints; number++){
101 inputVal1 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
103 inputVal1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
104 intInputVal1 = _mm_cvtps_epi32(inputVal1);
106 _mm_storeu_si128((__m128i*)outputVectorPtr, intInputVal1);
107 outputVectorPtr += 4;
110 number = quarterPoints * 4;
111 for(; number < num_points; number++){
112 r = inputVector[number] * scalar;
117 outputVector[number] = (
int32_t)(r);
125 #include <xmmintrin.h>
128 volk_32f_s32f_convert_32i_u_sse(
int32_t* outputVector,
const float* inputVector,
129 const float scalar,
unsigned int num_points)
131 unsigned int number = 0;
133 const unsigned int quarterPoints = num_points / 4;
135 const float* inputVectorPtr = (
const float*)inputVector;
136 int32_t* outputVectorPtr = outputVector;
138 float min_val = -2147483647;
139 float max_val = 2147483647;
142 __m128 vScalar = _mm_set_ps1(scalar);
144 __m128 vmin_val = _mm_set_ps1(min_val);
145 __m128 vmax_val = _mm_set_ps1(max_val);
149 for(;number < quarterPoints; number++){
150 ret = _mm_loadu_ps(inputVectorPtr);
153 ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
155 _mm_store_ps(outputFloatBuffer, ret);
156 *outputVectorPtr++ = (
int32_t)(outputFloatBuffer[0]);
157 *outputVectorPtr++ = (
int32_t)(outputFloatBuffer[1]);
158 *outputVectorPtr++ = (
int32_t)(outputFloatBuffer[2]);
159 *outputVectorPtr++ = (
int32_t)(outputFloatBuffer[3]);
162 number = quarterPoints * 4;
163 for(; number < num_points; number++){
164 r = inputVector[number] * scalar;
169 outputVector[number] = (
int32_t)(r);
176 #ifdef LV_HAVE_GENERIC
179 volk_32f_s32f_convert_32i_generic(
int32_t* outputVector,
const float* inputVector,
180 const float scalar,
unsigned int num_points)
182 int32_t* outputVectorPtr = outputVector;
183 const float* inputVectorPtr = inputVector;
184 unsigned int number = 0;
185 float min_val = -2147483647;
186 float max_val = 2147483647;
189 for(number = 0; number < num_points; number++){
190 r = *inputVectorPtr++ * scalar;
195 *outputVectorPtr++ = (
int32_t)(r);
204 #ifndef INCLUDED_volk_32f_s32f_convert_32i_a_H
205 #define INCLUDED_volk_32f_s32f_convert_32i_a_H
212 #include <immintrin.h>
215 volk_32f_s32f_convert_32i_a_avx(
int32_t* outputVector,
const float* inputVector,
216 const float scalar,
unsigned int num_points)
218 unsigned int number = 0;
220 const unsigned int eighthPoints = num_points / 8;
222 const float* inputVectorPtr = (
const float*)inputVector;
223 int32_t* outputVectorPtr = outputVector;
225 float min_val = -2147483647;
226 float max_val = 2147483647;
229 __m256 vScalar = _mm256_set1_ps(scalar);
231 __m256i intInputVal1;
232 __m256 vmin_val = _mm256_set1_ps(min_val);
233 __m256 vmax_val = _mm256_set1_ps(max_val);
235 for(;number < eighthPoints; number++){
236 inputVal1 = _mm256_load_ps(inputVectorPtr); inputVectorPtr += 8;
238 inputVal1 = _mm256_max_ps(_mm256_min_ps(_mm256_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
239 intInputVal1 = _mm256_cvtps_epi32(inputVal1);
241 _mm256_store_si256((__m256i*)outputVectorPtr, intInputVal1);
242 outputVectorPtr += 8;
245 number = eighthPoints * 8;
246 for(; number < num_points; number++){
247 r = inputVector[number] * scalar;
252 outputVector[number] = (
int32_t)(r);
260 #include <emmintrin.h>
263 volk_32f_s32f_convert_32i_a_sse2(
int32_t* outputVector,
const float* inputVector,
264 const float scalar,
unsigned int num_points)
266 unsigned int number = 0;
268 const unsigned int quarterPoints = num_points / 4;
270 const float* inputVectorPtr = (
const float*)inputVector;
271 int32_t* outputVectorPtr = outputVector;
273 float min_val = -2147483647;
274 float max_val = 2147483647;
277 __m128 vScalar = _mm_set_ps1(scalar);
279 __m128i intInputVal1;
280 __m128 vmin_val = _mm_set_ps1(min_val);
281 __m128 vmax_val = _mm_set_ps1(max_val);
283 for(;number < quarterPoints; number++){
284 inputVal1 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
286 inputVal1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
287 intInputVal1 = _mm_cvtps_epi32(inputVal1);
289 _mm_store_si128((__m128i*)outputVectorPtr, intInputVal1);
290 outputVectorPtr += 4;
293 number = quarterPoints * 4;
294 for(; number < num_points; number++){
295 r = inputVector[number] * scalar;
300 outputVector[number] = (
int32_t)(r);
308 #include <xmmintrin.h>
311 volk_32f_s32f_convert_32i_a_sse(
int32_t* outputVector,
const float* inputVector,
312 const float scalar,
unsigned int num_points)
314 unsigned int number = 0;
316 const unsigned int quarterPoints = num_points / 4;
318 const float* inputVectorPtr = (
const float*)inputVector;
319 int32_t* outputVectorPtr = outputVector;
321 float min_val = -2147483647;
322 float max_val = 2147483647;
325 __m128 vScalar = _mm_set_ps1(scalar);
327 __m128 vmin_val = _mm_set_ps1(min_val);
328 __m128 vmax_val = _mm_set_ps1(max_val);
332 for(;number < quarterPoints; number++){
333 ret = _mm_load_ps(inputVectorPtr);
336 ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
338 _mm_store_ps(outputFloatBuffer, ret);
339 *outputVectorPtr++ = (
int32_t)(outputFloatBuffer[0]);
340 *outputVectorPtr++ = (
int32_t)(outputFloatBuffer[1]);
341 *outputVectorPtr++ = (
int32_t)(outputFloatBuffer[2]);
342 *outputVectorPtr++ = (
int32_t)(outputFloatBuffer[3]);
345 number = quarterPoints * 4;
346 for(; number < num_points; number++){
347 r = inputVector[number] * scalar;
352 outputVector[number] = (
int32_t)(r);
359 #ifdef LV_HAVE_GENERIC
362 volk_32f_s32f_convert_32i_a_generic(
int32_t* outputVector,
const float* inputVector,
363 const float scalar,
unsigned int num_points)
365 int32_t* outputVectorPtr = outputVector;
366 const float* inputVectorPtr = inputVector;
367 unsigned int number = 0;
368 float min_val = -2147483647;
369 float max_val = 2147483647;
372 for(number = 0; number < num_points; number++){
373 r = *inputVectorPtr++ * scalar;
378 *outputVectorPtr++ = (
int32_t)(r);
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:27
signed int int32_t
Definition: stdint.h:77