56 #ifndef INCLUDED_volk_32f_s32f_32f_fm_detect_32f_a_H
57 #define INCLUDED_volk_32f_s32f_32f_fm_detect_32f_a_H
63 #include <xmmintrin.h>
65 static inline void volk_32f_s32f_32f_fm_detect_32f_a_sse(
float* outputVector,
const float* inputVector,
const float bound,
float* saveValue,
unsigned int num_points){
69 unsigned int number = 1;
73 const unsigned int quarterPoints = (num_points-1) / 4;
75 float* outPtr = outputVector;
76 const float* inPtr = inputVector;
77 __m128 upperBound = _mm_set_ps1(bound);
78 __m128 lowerBound = _mm_set_ps1(-bound);
82 __m128 posBoundAdjust = _mm_set_ps1(-2*bound);
83 __m128 negBoundAdjust = _mm_set_ps1(2*bound);
85 *outPtr = *inPtr - *saveValue;
86 if (*outPtr > bound) *outPtr -= 2*bound;
87 if (*outPtr < -bound) *outPtr += 2*bound;
90 for (j = 1; j < ( (4 < num_points) ? 4 : num_points); j++) {
91 *outPtr = *(inPtr) - *(inPtr-1);
92 if (*outPtr > bound) *outPtr -= 2*bound;
93 if (*outPtr < -bound) *outPtr += 2*bound;
98 for (; number < quarterPoints; number++) {
100 next3old1 = _mm_loadu_ps((
float*) (inPtr-1));
101 next4 = _mm_load_ps(inPtr);
104 next3old1 = _mm_sub_ps(next4, next3old1);
106 boundAdjust = _mm_cmpgt_ps(next3old1, upperBound);
107 boundAdjust = _mm_and_ps(boundAdjust, posBoundAdjust);
108 next4 = _mm_cmplt_ps(next3old1, lowerBound);
109 next4 = _mm_and_ps(next4, negBoundAdjust);
110 boundAdjust = _mm_or_ps(next4, boundAdjust);
112 next3old1 = _mm_add_ps(next3old1, boundAdjust);
113 _mm_store_ps(outPtr,next3old1);
117 for (number = (4 > (quarterPoints*4) ? 4 : (4 * quarterPoints)); number < num_points; number++) {
118 *outPtr = *(inPtr) - *(inPtr-1);
119 if (*outPtr > bound) *outPtr -= 2*bound;
120 if (*outPtr < -bound) *outPtr += 2*bound;
125 *saveValue = inputVector[num_points-1];
129 #ifdef LV_HAVE_GENERIC
131 static inline void volk_32f_s32f_32f_fm_detect_32f_generic(
float* outputVector,
const float* inputVector,
const float bound,
float* saveValue,
unsigned int num_points){
132 if (num_points < 1) {
135 unsigned int number = 0;
136 float* outPtr = outputVector;
137 const float* inPtr = inputVector;
140 *outPtr = *inPtr - *saveValue;
141 if (*outPtr > bound) *outPtr -= 2*bound;
142 if (*outPtr < -bound) *outPtr += 2*bound;
146 for (number = 1; number < num_points; number++) {
147 *outPtr = *(inPtr) - *(inPtr-1);
148 if (*outPtr > bound) *outPtr -= 2*bound;
149 if (*outPtr < -bound) *outPtr += 2*bound;
154 *saveValue = inputVector[num_points-1];