1 #ifndef INCLUDED_volk_32f_s32f_convert_32i_u_H
2 #define INCLUDED_volk_32f_s32f_convert_32i_u_H
17 static inline void volk_32f_s32f_convert_32i_u_sse2(
int32_t* outputVector,
const float* inputVector,
const float scalar,
unsigned int num_points){
18 unsigned int number = 0;
20 const unsigned int quarterPoints = num_points / 4;
22 const float* inputVectorPtr = (
const float*)inputVector;
23 int32_t* outputVectorPtr = outputVector;
24 __m128 vScalar = _mm_set_ps1(scalar);
28 for(;number < quarterPoints; number++){
29 inputVal1 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
31 intInputVal1 = _mm_cvtps_epi32(_mm_mul_ps(inputVal1, vScalar));
33 _mm_storeu_si128((__m128i*)outputVectorPtr, intInputVal1);
37 number = quarterPoints * 4;
38 for(; number < num_points; number++){
39 outputVector[number] = (
int32_t)(inputVector[number] * scalar);
45 #include <xmmintrin.h>
54 static inline void volk_32f_s32f_convert_32i_u_sse(
int32_t* outputVector,
const float* inputVector,
const float scalar,
unsigned int num_points){
55 unsigned int number = 0;
57 const unsigned int quarterPoints = num_points / 4;
59 const float* inputVectorPtr = (
const float*)inputVector;
60 int32_t* outputVectorPtr = outputVector;
61 __m128 vScalar = _mm_set_ps1(scalar);
66 for(;number < quarterPoints; number++){
67 ret = _mm_loadu_ps(inputVectorPtr);
70 ret = _mm_mul_ps(ret, vScalar);
72 _mm_store_ps(outputFloatBuffer, ret);
73 *outputVectorPtr++ = (
int32_t)(outputFloatBuffer[0]);
74 *outputVectorPtr++ = (
int32_t)(outputFloatBuffer[1]);
75 *outputVectorPtr++ = (
int32_t)(outputFloatBuffer[2]);
76 *outputVectorPtr++ = (
int32_t)(outputFloatBuffer[3]);
79 number = quarterPoints * 4;
80 for(; number < num_points; number++){
81 outputVector[number] = (
int32_t)(inputVector[number] * scalar);
86 #ifdef LV_HAVE_GENERIC
95 static inline void volk_32f_s32f_convert_32i_u_generic(
int32_t* outputVector,
const float* inputVector,
const float scalar,
unsigned int num_points){
96 int32_t* outputVectorPtr = outputVector;
97 const float* inputVectorPtr = inputVector;
98 unsigned int number = 0;
100 for(number = 0; number < num_points; number++){
101 *outputVectorPtr++ = ((
int32_t)(*inputVectorPtr++ * scalar));