1 #ifndef INCLUDED_volk_32f_s32f_convert_16i_u_H
2 #define INCLUDED_volk_32f_s32f_convert_16i_u_H
17 static inline void volk_32f_s32f_convert_16i_u_sse2(
int16_t* outputVector,
const float* inputVector,
const float scalar,
unsigned int num_points){
18 unsigned int number = 0;
20 const unsigned int eighthPoints = num_points / 8;
22 const float* inputVectorPtr = (
const float*)inputVector;
23 int16_t* outputVectorPtr = outputVector;
24 __m128 vScalar = _mm_set_ps1(scalar);
25 __m128 inputVal1, inputVal2;
26 __m128i intInputVal1, intInputVal2;
28 for(;number < eighthPoints; number++){
29 inputVal1 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
30 inputVal2 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
32 intInputVal1 = _mm_cvtps_epi32(_mm_mul_ps(inputVal1, vScalar));
33 intInputVal2 = _mm_cvtps_epi32(_mm_mul_ps(inputVal2, vScalar));
35 intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
37 _mm_storeu_si128((__m128i*)outputVectorPtr, intInputVal1);
41 number = eighthPoints * 8;
42 for(; number < num_points; number++){
43 outputVector[number] = (
int16_t)(inputVector[number] * scalar);
49 #include <xmmintrin.h>
58 static inline void volk_32f_s32f_convert_16i_u_sse(
int16_t* outputVector,
const float* inputVector,
const float scalar,
unsigned int num_points){
59 unsigned int number = 0;
61 const unsigned int quarterPoints = num_points / 4;
63 const float* inputVectorPtr = (
const float*)inputVector;
64 int16_t* outputVectorPtr = outputVector;
65 __m128 vScalar = _mm_set_ps1(scalar);
70 for(;number < quarterPoints; number++){
71 ret = _mm_loadu_ps(inputVectorPtr);
74 ret = _mm_mul_ps(ret, vScalar);
76 _mm_store_ps(outputFloatBuffer, ret);
77 *outputVectorPtr++ = (
int16_t)(outputFloatBuffer[0]);
78 *outputVectorPtr++ = (
int16_t)(outputFloatBuffer[1]);
79 *outputVectorPtr++ = (
int16_t)(outputFloatBuffer[2]);
80 *outputVectorPtr++ = (
int16_t)(outputFloatBuffer[3]);
83 number = quarterPoints * 4;
84 for(; number < num_points; number++){
85 outputVector[number] = (
int16_t)(inputVector[number] * scalar);
90 #ifdef LV_HAVE_GENERIC
99 static inline void volk_32f_s32f_convert_16i_u_generic(
int16_t* outputVector,
const float* inputVector,
const float scalar,
unsigned int num_points){
100 int16_t* outputVectorPtr = outputVector;
101 const float* inputVectorPtr = inputVector;
102 unsigned int number = 0;
104 for(number = 0; number < num_points; number++){
105 *outputVectorPtr++ = ((
int16_t)(*inputVectorPtr++ * scalar));