53 #ifndef INCLUDED_VOLK_8sc_DEINTERLEAVE_REAL_8s_ALIGNED8_H
54 #define INCLUDED_VOLK_8sc_DEINTERLEAVE_REAL_8s_ALIGNED8_H
60 #include <tmmintrin.h>
63 volk_8ic_deinterleave_real_8i_a_ssse3(
int8_t* iBuffer,
const lv_8sc_t* complexVector,
64 unsigned int num_points)
66 unsigned int number = 0;
68 int8_t* iBufferPtr = iBuffer;
69 __m128i moveMask1 = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
70 __m128i moveMask2 = _mm_set_epi8(14, 12, 10, 8, 6, 4, 2, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
71 __m128i complexVal1, complexVal2, outputVal;
73 unsigned int sixteenthPoints = num_points / 16;
75 for(number = 0; number < sixteenthPoints; number++){
76 complexVal1 = _mm_load_si128((__m128i*)complexVectorPtr); complexVectorPtr += 16;
77 complexVal2 = _mm_load_si128((__m128i*)complexVectorPtr); complexVectorPtr += 16;
79 complexVal1 = _mm_shuffle_epi8(complexVal1, moveMask1);
80 complexVal2 = _mm_shuffle_epi8(complexVal2, moveMask2);
82 outputVal = _mm_or_si128(complexVal1, complexVal2);
84 _mm_store_si128((__m128i*)iBufferPtr, outputVal);
88 number = sixteenthPoints * 16;
89 for(; number < num_points; number++){
90 *iBufferPtr++ = *complexVectorPtr++;
98 #include <immintrin.h>
101 volk_8ic_deinterleave_real_8i_a_avx(
int8_t* iBuffer,
const lv_8sc_t* complexVector,
102 unsigned int num_points)
104 unsigned int number = 0;
105 const int8_t* complexVectorPtr = (
int8_t*)complexVector;
106 int8_t* iBufferPtr = iBuffer;
107 __m128i moveMaskL = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
108 __m128i moveMaskH = _mm_set_epi8(14, 12, 10, 8, 6, 4, 2, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
109 __m256i complexVal1, complexVal2, outputVal;
110 __m128i complexVal1H, complexVal1L, complexVal2H, complexVal2L, outputVal1, outputVal2;
112 unsigned int thirtysecondPoints = num_points / 32;
114 for(number = 0; number < thirtysecondPoints; number++){
116 complexVal1 = _mm256_load_si256((__m256i*)complexVectorPtr);
117 complexVectorPtr += 32;
118 complexVal2 = _mm256_load_si256((__m256i*)complexVectorPtr);
119 complexVectorPtr += 32;
121 complexVal1H = _mm256_extractf128_si256(complexVal1, 1);
122 complexVal1L = _mm256_extractf128_si256(complexVal1, 0);
123 complexVal2H = _mm256_extractf128_si256(complexVal2, 1);
124 complexVal2L = _mm256_extractf128_si256(complexVal2, 0);
126 complexVal1H = _mm_shuffle_epi8(complexVal1H, moveMaskH);
127 complexVal1L = _mm_shuffle_epi8(complexVal1L, moveMaskL);
128 outputVal1 = _mm_or_si128(complexVal1H, complexVal1L);
131 complexVal2H = _mm_shuffle_epi8(complexVal2H, moveMaskH);
132 complexVal2L = _mm_shuffle_epi8(complexVal2L, moveMaskL);
133 outputVal2 = _mm_or_si128(complexVal2H, complexVal2L);
135 __m256i dummy = _mm256_setzero_si256();
136 outputVal = _mm256_insertf128_si256(dummy, outputVal1, 0);
137 outputVal = _mm256_insertf128_si256(outputVal, outputVal2, 1);
140 _mm256_store_si256((__m256i*)iBufferPtr, outputVal);
144 number = thirtysecondPoints * 32;
145 for(; number < num_points; number++){
146 *iBufferPtr++ = *complexVectorPtr++;
153 #ifdef LV_HAVE_GENERIC
156 volk_8ic_deinterleave_real_8i_generic(
int8_t* iBuffer,
const lv_8sc_t* complexVector,
157 unsigned int num_points)
159 unsigned int number = 0;
160 const int8_t* complexVectorPtr = (
int8_t*)complexVector;
161 int8_t* iBufferPtr = iBuffer;
162 for(number = 0; number < num_points; number++){
163 *iBufferPtr++ = *complexVectorPtr++;
171 #include <arm_neon.h>
174 volk_8ic_deinterleave_real_8i_neon(
int8_t* iBuffer,
const lv_8sc_t* complexVector,
unsigned int num_points)
177 unsigned int sixteenth_points = num_points / 16;
179 int8x16x2_t input_vector;
180 for(number=0; number < sixteenth_points; ++number) {
181 input_vector = vld2q_s8((
int8_t*) complexVector );
182 vst1q_s8(iBuffer, input_vector.val[0]);
187 const int8_t* complexVectorPtr = (
int8_t*)complexVector;
188 int8_t* iBufferPtr = iBuffer;
189 for(number = sixteenth_points*16; number < num_points; number++){
190 *iBufferPtr++ = *complexVectorPtr++;
signed char int8_t
Definition: stdint.h:75
char complex lv_8sc_t
Provide typedefs and operators for all complex types in C and C++.
Definition: volk_complex.h:52