64 #ifndef INCLUDED_volk_64f_convert_32f_u_H
65 #define INCLUDED_volk_64f_convert_32f_u_H
71 #include <emmintrin.h>
73 static inline void volk_64f_convert_32f_u_sse2(
float* outputVector,
const double* inputVector,
unsigned int num_points){
74 unsigned int number = 0;
76 const unsigned int quarterPoints = num_points / 4;
78 const double* inputVectorPtr = (
const double*)inputVector;
79 float* outputVectorPtr = outputVector;
81 __m128d inputVal1, inputVal2;
83 for(;number < quarterPoints; number++){
84 inputVal1 = _mm_loadu_pd(inputVectorPtr); inputVectorPtr += 2;
85 inputVal2 = _mm_loadu_pd(inputVectorPtr); inputVectorPtr += 2;
87 ret = _mm_cvtpd_ps(inputVal1);
88 ret2 = _mm_cvtpd_ps(inputVal2);
90 ret = _mm_movelh_ps(ret, ret2);
92 _mm_storeu_ps(outputVectorPtr, ret);
96 number = quarterPoints * 4;
97 for(; number < num_points; number++){
98 outputVector[number] = (float)(inputVector[number]);
104 #ifdef LV_HAVE_GENERIC
106 static inline void volk_64f_convert_32f_generic(
float* outputVector,
const double* inputVector,
unsigned int num_points){
107 float* outputVectorPtr = outputVector;
108 const double* inputVectorPtr = inputVector;
109 unsigned int number = 0;
111 for(number = 0; number < num_points; number++){
112 *outputVectorPtr++ = ((float)(*inputVectorPtr++));
121 #ifndef INCLUDED_volk_64f_convert_32f_a_H
122 #define INCLUDED_volk_64f_convert_32f_a_H
128 #include <emmintrin.h>
130 static inline void volk_64f_convert_32f_a_sse2(
float* outputVector,
const double* inputVector,
unsigned int num_points){
131 unsigned int number = 0;
133 const unsigned int quarterPoints = num_points / 4;
135 const double* inputVectorPtr = (
const double*)inputVector;
136 float* outputVectorPtr = outputVector;
138 __m128d inputVal1, inputVal2;
140 for(;number < quarterPoints; number++){
141 inputVal1 = _mm_load_pd(inputVectorPtr); inputVectorPtr += 2;
142 inputVal2 = _mm_load_pd(inputVectorPtr); inputVectorPtr += 2;
144 ret = _mm_cvtpd_ps(inputVal1);
145 ret2 = _mm_cvtpd_ps(inputVal2);
147 ret = _mm_movelh_ps(ret, ret2);
149 _mm_store_ps(outputVectorPtr, ret);
150 outputVectorPtr += 4;
153 number = quarterPoints * 4;
154 for(; number < num_points; number++){
155 outputVector[number] = (float)(inputVector[number]);
161 #ifdef LV_HAVE_GENERIC
163 static inline void volk_64f_convert_32f_a_generic(
float* outputVector,
const double* inputVector,
unsigned int num_points){
164 float* outputVectorPtr = outputVector;
165 const double* inputVectorPtr = inputVector;
166 unsigned int number = 0;
168 for(number = 0; number < num_points; number++){
169 *outputVectorPtr++ = ((float)(*inputVectorPtr++));