1 #ifndef INCLUDED_volk_32f_s32f_convert_8i_a_H
2 #define INCLUDED_volk_32f_s32f_convert_8i_a_H
17 static inline void volk_32f_s32f_convert_8i_a_sse2(
int8_t* outputVector,
const float* inputVector,
const float scalar,
unsigned int num_points){
18 unsigned int number = 0;
20 const unsigned int sixteenthPoints = num_points / 16;
22 const float* inputVectorPtr = (
const float*)inputVector;
23 int8_t* outputVectorPtr = outputVector;
24 __m128 vScalar = _mm_set_ps1(scalar);
25 __m128 inputVal1, inputVal2, inputVal3, inputVal4;
26 __m128i intInputVal1, intInputVal2, intInputVal3, intInputVal4;
28 for(;number < sixteenthPoints; number++){
29 inputVal1 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
30 inputVal2 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
31 inputVal3 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
32 inputVal4 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
34 intInputVal1 = _mm_cvtps_epi32(_mm_mul_ps(inputVal1, vScalar));
35 intInputVal2 = _mm_cvtps_epi32(_mm_mul_ps(inputVal2, vScalar));
36 intInputVal3 = _mm_cvtps_epi32(_mm_mul_ps(inputVal3, vScalar));
37 intInputVal4 = _mm_cvtps_epi32(_mm_mul_ps(inputVal4, vScalar));
39 intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
40 intInputVal3 = _mm_packs_epi32(intInputVal3, intInputVal4);
42 intInputVal1 = _mm_packs_epi16(intInputVal1, intInputVal3);
44 _mm_store_si128((__m128i*)outputVectorPtr, intInputVal1);
45 outputVectorPtr += 16;
48 number = sixteenthPoints * 16;
49 for(; number < num_points; number++){
50 outputVector[number] = (
int8_t)(inputVector[number] * scalar);
56 #include <xmmintrin.h>
64 static inline void volk_32f_s32f_convert_8i_a_sse(
int8_t* outputVector,
const float* inputVector,
const float scalar,
unsigned int num_points){
65 unsigned int number = 0;
67 const unsigned int quarterPoints = num_points / 4;
69 const float* inputVectorPtr = (
const float*)inputVector;
70 int8_t* outputVectorPtr = outputVector;
71 __m128 vScalar = _mm_set_ps1(scalar);
76 for(;number < quarterPoints; number++){
77 ret = _mm_load_ps(inputVectorPtr);
80 ret = _mm_mul_ps(ret, vScalar);
82 _mm_store_ps(outputFloatBuffer, ret);
83 *outputVectorPtr++ = (
int8_t)(outputFloatBuffer[0]);
84 *outputVectorPtr++ = (
int8_t)(outputFloatBuffer[1]);
85 *outputVectorPtr++ = (
int8_t)(outputFloatBuffer[2]);
86 *outputVectorPtr++ = (
int8_t)(outputFloatBuffer[3]);
89 number = quarterPoints * 4;
90 for(; number < num_points; number++){
91 outputVector[number] = (
int8_t)(inputVector[number] * scalar);
96 #ifdef LV_HAVE_GENERIC
104 static inline void volk_32f_s32f_convert_8i_a_generic(
int8_t* outputVector,
const float* inputVector,
const float scalar,
unsigned int num_points){
105 int8_t* outputVectorPtr = outputVector;
106 const float* inputVectorPtr = inputVector;
107 unsigned int number = 0;
109 for(number = 0; number < num_points; number++){
110 *outputVectorPtr++ = (
int8_t)(*inputVectorPtr++ * scalar);