53 #ifndef INCLUDED_volk_8ic_deinterleave_real_16i_a_H
54 #define INCLUDED_volk_8ic_deinterleave_real_16i_a_H
60 #include <smmintrin.h>
63 volk_8ic_deinterleave_real_16i_a_sse4_1(
int16_t* iBuffer,
const lv_8sc_t* complexVector,
64 unsigned int num_points)
66 unsigned int number = 0;
69 __m128i moveMask = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
70 __m128i complexVal, outputVal;
72 unsigned int eighthPoints = num_points / 8;
74 for(number = 0; number < eighthPoints; number++){
75 complexVal = _mm_load_si128((__m128i*)complexVectorPtr); complexVectorPtr += 16;
77 complexVal = _mm_shuffle_epi8(complexVal, moveMask);
79 outputVal = _mm_cvtepi8_epi16(complexVal);
80 outputVal = _mm_slli_epi16(outputVal, 7);
82 _mm_store_si128((__m128i*)iBufferPtr, outputVal);
86 number = eighthPoints * 8;
87 for(; number < num_points; number++){
88 *iBufferPtr++ = ((
int16_t)*complexVectorPtr++) * 128;
96 #include <immintrin.h>
99 volk_8ic_deinterleave_real_16i_a_avx(
int16_t* iBuffer,
const lv_8sc_t* complexVector,
100 unsigned int num_points)
102 unsigned int number = 0;
103 const int8_t* complexVectorPtr = (
int8_t*)complexVector;
105 __m128i moveMask = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
106 __m256i complexVal, outputVal;
107 __m128i complexVal1, complexVal0, outputVal1, outputVal0;
109 unsigned int sixteenthPoints = num_points / 16;
111 for(number = 0; number < sixteenthPoints; number++){
112 complexVal = _mm256_load_si256((__m256i*)complexVectorPtr); complexVectorPtr += 32;
114 complexVal1 = _mm256_extractf128_si256(complexVal, 1);
115 complexVal0 = _mm256_extractf128_si256(complexVal, 0);
117 outputVal1 = _mm_shuffle_epi8(complexVal1, moveMask);
118 outputVal0 = _mm_shuffle_epi8(complexVal0, moveMask);
120 outputVal1 = _mm_cvtepi8_epi16(outputVal1);
121 outputVal1 = _mm_slli_epi16(outputVal1, 7);
122 outputVal0 = _mm_cvtepi8_epi16(outputVal0);
123 outputVal0 = _mm_slli_epi16(outputVal0, 7);
125 __m256i dummy = _mm256_setzero_si256();
126 outputVal = _mm256_insertf128_si256(dummy, outputVal0, 0);
127 outputVal = _mm256_insertf128_si256(outputVal, outputVal1, 1);
128 _mm256_store_si256((__m256i*)iBufferPtr, outputVal);
133 number = sixteenthPoints * 16;
134 for(; number < num_points; number++){
135 *iBufferPtr++ = ((
int16_t)*complexVectorPtr++) * 128;
142 #ifdef LV_HAVE_GENERIC
145 volk_8ic_deinterleave_real_16i_generic(
int16_t* iBuffer,
const lv_8sc_t* complexVector,
146 unsigned int num_points)
148 unsigned int number = 0;
149 const int8_t* complexVectorPtr = (
const int8_t*)complexVector;
151 for(number = 0; number < num_points; number++){
152 *iBufferPtr++ = ((
int16_t)(*complexVectorPtr++)) * 128;
signed short int16_t
Definition: stdint.h:76
signed char int8_t
Definition: stdint.h:75
char complex lv_8sc_t
Provide typedefs and operators for all complex types in C and C++.
Definition: volk_complex.h:52