GNU Radio Manual and C++ API Reference  3.7.7
The Free & Open Software Radio Ecosystem
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
volk_64u_byteswap.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
23 /*!
24  * \page volk_64u_byteswap
25  *
26  * \b Overview
27  *
28  * Byteswaps (in-place) an aligned vector of int64_t's.
29  *
30  * <b>Dispatcher Prototype</b>
31  * \code
32  * void volk_64u_byteswap(uint64_t* intsToSwap, unsigned int num_points)
33  * \endcode
34  *
35  * \b Inputs
36  * \li intsToSwap: The vector of data to byte swap
37  * \li num_points: The number of data points
38  *
39  * \b Outputs
40  * \li intsToSwap: returns as an in-place calculation.
41  *
42  * \b Example
43  * \code
44  * int N = 10;
45  * unsigned int alignment = volk_get_alignment();
46  *
47  * uint64_t bitstring[] = {0x0, 0x1, 0xf, 0xffffffffffffffff,
48  * 0x5a5a5a5a5a5a5a5a, 0xa5a5a5a5a5a5a5a5, 0x2a2a2a2a2a2a2a2a,
49  * 0xffffffff, 0x32, 0x64};
50  * uint64_t hamming_distance = 0;
51  *
52  * printf("byteswap vector =\n");
53  * for(unsigned int ii=0; ii<N; ++ii){
54  * printf(" %.16lx\n", bitstring[ii]);
55  * }
56  *
57  * volk_64u_byteswap(bitstring, N);
58  *
59  * printf("byteswapped vector =\n");
60  * for(unsigned int ii=0; ii<N; ++ii){
61  * printf(" %.16lx\n", bitstring[ii]);
62  * }
63  * \endcode
64  */
65 
66 #ifndef INCLUDED_volk_64u_byteswap_u_H
67 #define INCLUDED_volk_64u_byteswap_u_H
68 
69 #include <inttypes.h>
70 #include <stdio.h>
71 
72 #ifdef LV_HAVE_SSE2
73 #include <emmintrin.h>
74 
75 static inline void volk_64u_byteswap_u_sse2(uint64_t* intsToSwap, unsigned int num_points){
76  uint32_t* inputPtr = (uint32_t*)intsToSwap;
77  __m128i input, byte1, byte2, byte3, byte4, output;
78  __m128i byte2mask = _mm_set1_epi32(0x00FF0000);
79  __m128i byte3mask = _mm_set1_epi32(0x0000FF00);
80  uint64_t number = 0;
81  const unsigned int halfPoints = num_points / 2;
82  for(;number < halfPoints; number++){
83  // Load the 32t values, increment inputPtr later since we're doing it in-place.
84  input = _mm_loadu_si128((__m128i*)inputPtr);
85 
86  // Do the four shifts
87  byte1 = _mm_slli_epi32(input, 24);
88  byte2 = _mm_slli_epi32(input, 8);
89  byte3 = _mm_srli_epi32(input, 8);
90  byte4 = _mm_srli_epi32(input, 24);
91  // Or bytes together
92  output = _mm_or_si128(byte1, byte4);
93  byte2 = _mm_and_si128(byte2, byte2mask);
94  output = _mm_or_si128(output, byte2);
95  byte3 = _mm_and_si128(byte3, byte3mask);
96  output = _mm_or_si128(output, byte3);
97 
98  // Reorder the two words
99  output = _mm_shuffle_epi32(output, _MM_SHUFFLE(2, 3, 0, 1));
100 
101  // Store the results
102  _mm_storeu_si128((__m128i*)inputPtr, output);
103  inputPtr += 4;
104  }
105 
106  // Byteswap any remaining points:
107  number = halfPoints*2;
108  for(; number < num_points; number++){
109  uint32_t output1 = *inputPtr;
110  uint32_t output2 = inputPtr[1];
111 
112  output1 = (((output1 >> 24) & 0xff) | ((output1 >> 8) & 0x0000ff00) | ((output1 << 8) & 0x00ff0000) | ((output1 << 24) & 0xff000000));
113 
114  output2 = (((output2 >> 24) & 0xff) | ((output2 >> 8) & 0x0000ff00) | ((output2 << 8) & 0x00ff0000) | ((output2 << 24) & 0xff000000));
115 
116  *inputPtr++ = output2;
117  *inputPtr++ = output1;
118  }
119 }
120 #endif /* LV_HAVE_SSE2 */
121 
122 
123 
124 #ifdef LV_HAVE_GENERIC
125 
126 static inline void volk_64u_byteswap_generic(uint64_t* intsToSwap, unsigned int num_points){
127  uint32_t* inputPtr = (uint32_t*)intsToSwap;
128  unsigned int point;
129  for(point = 0; point < num_points; point++){
130  uint32_t output1 = *inputPtr;
131  uint32_t output2 = inputPtr[1];
132 
133  output1 = (((output1 >> 24) & 0xff) | ((output1 >> 8) & 0x0000ff00) | ((output1 << 8) & 0x00ff0000) | ((output1 << 24) & 0xff000000));
134 
135  output2 = (((output2 >> 24) & 0xff) | ((output2 >> 8) & 0x0000ff00) | ((output2 << 8) & 0x00ff0000) | ((output2 << 24) & 0xff000000));
136 
137  *inputPtr++ = output2;
138  *inputPtr++ = output1;
139  }
140 }
141 #endif /* LV_HAVE_GENERIC */
142 
143 
144 
145 #ifdef LV_HAVE_NEON
146 #include <arm_neon.h>
147 
148 static inline void volk_64u_byteswap_neon(uint64_t* intsToSwap, unsigned int num_points){
149  uint32_t* inputPtr = (uint32_t*)intsToSwap;
150  unsigned int number = 0;
151  unsigned int n8points = num_points / 4;
152 
153  uint8x8x4_t input_table;
154  uint8x8_t int_lookup01, int_lookup23, int_lookup45, int_lookup67;
155  uint8x8_t swapped_int01, swapped_int23, swapped_int45, swapped_int67;
156 
157  /* these magic numbers are used as byte-indeces in the LUT.
158  they are pre-computed to save time. A simple C program
159  can calculate them; for example for lookup01:
160  uint8_t chars[8] = {24, 16, 8, 0, 25, 17, 9, 1};
161  for(ii=0; ii < 8; ++ii) {
162  index += ((uint64_t)(*(chars+ii))) << (ii*8);
163  }
164  */
165  int_lookup01 = vcreate_u8(2269495096316185);
166  int_lookup23 = vcreate_u8(146949840772469531);
167  int_lookup45 = vcreate_u8(291630186448622877);
168  int_lookup67 = vcreate_u8(436310532124776223);
169 
170  for(number = 0; number < n8points; ++number){
171  input_table = vld4_u8((uint8_t*) inputPtr);
172  swapped_int01 = vtbl4_u8(input_table, int_lookup01);
173  swapped_int23 = vtbl4_u8(input_table, int_lookup23);
174  swapped_int45 = vtbl4_u8(input_table, int_lookup45);
175  swapped_int67 = vtbl4_u8(input_table, int_lookup67);
176  vst1_u8((uint8_t*) inputPtr, swapped_int01);
177  vst1_u8((uint8_t*) (inputPtr+2), swapped_int23);
178  vst1_u8((uint8_t*) (inputPtr+4), swapped_int45);
179  vst1_u8((uint8_t*) (inputPtr+6), swapped_int67);
180 
181  inputPtr += 4;
182  }
183 
184  for(number = n8points * 4; number < num_points; ++number){
185  uint32_t output1 = *inputPtr;
186  uint32_t output2 = inputPtr[1];
187 
188  output1 = (((output1 >> 24) & 0xff) | ((output1 >> 8) & 0x0000ff00) | ((output1 << 8) & 0x00ff0000) | ((output1 << 24) & 0xff000000));
189  output2 = (((output2 >> 24) & 0xff) | ((output2 >> 8) & 0x0000ff00) | ((output2 << 8) & 0x00ff0000) | ((output2 << 24) & 0xff000000));
190 
191  *inputPtr++ = output2;
192  *inputPtr++ = output1;
193  }
194 
195 }
196 #endif /* LV_HAVE_NEON */
197 
198 
199 #endif /* INCLUDED_volk_64u_byteswap_u_H */
200 #ifndef INCLUDED_volk_64u_byteswap_a_H
201 #define INCLUDED_volk_64u_byteswap_a_H
202 
203 #include <inttypes.h>
204 #include <stdio.h>
205 
206 
207 #ifdef LV_HAVE_SSE2
208 #include <emmintrin.h>
209 
210 static inline void volk_64u_byteswap_a_sse2(uint64_t* intsToSwap, unsigned int num_points){
211  uint32_t* inputPtr = (uint32_t*)intsToSwap;
212  __m128i input, byte1, byte2, byte3, byte4, output;
213  __m128i byte2mask = _mm_set1_epi32(0x00FF0000);
214  __m128i byte3mask = _mm_set1_epi32(0x0000FF00);
215  uint64_t number = 0;
216  const unsigned int halfPoints = num_points / 2;
217  for(;number < halfPoints; number++){
218  // Load the 32t values, increment inputPtr later since we're doing it in-place.
219  input = _mm_load_si128((__m128i*)inputPtr);
220 
221  // Do the four shifts
222  byte1 = _mm_slli_epi32(input, 24);
223  byte2 = _mm_slli_epi32(input, 8);
224  byte3 = _mm_srli_epi32(input, 8);
225  byte4 = _mm_srli_epi32(input, 24);
226  // Or bytes together
227  output = _mm_or_si128(byte1, byte4);
228  byte2 = _mm_and_si128(byte2, byte2mask);
229  output = _mm_or_si128(output, byte2);
230  byte3 = _mm_and_si128(byte3, byte3mask);
231  output = _mm_or_si128(output, byte3);
232 
233  // Reorder the two words
234  output = _mm_shuffle_epi32(output, _MM_SHUFFLE(2, 3, 0, 1));
235 
236  // Store the results
237  _mm_store_si128((__m128i*)inputPtr, output);
238  inputPtr += 4;
239  }
240 
241  // Byteswap any remaining points:
242  number = halfPoints*2;
243  for(; number < num_points; number++){
244  uint32_t output1 = *inputPtr;
245  uint32_t output2 = inputPtr[1];
246 
247  output1 = (((output1 >> 24) & 0xff) | ((output1 >> 8) & 0x0000ff00) | ((output1 << 8) & 0x00ff0000) | ((output1 << 24) & 0xff000000));
248 
249  output2 = (((output2 >> 24) & 0xff) | ((output2 >> 8) & 0x0000ff00) | ((output2 << 8) & 0x00ff0000) | ((output2 << 24) & 0xff000000));
250 
251  *inputPtr++ = output2;
252  *inputPtr++ = output1;
253  }
254 }
255 #endif /* LV_HAVE_SSE2 */
256 
257 
258 #ifdef LV_HAVE_GENERIC
259 
260 static inline void volk_64u_byteswap_a_generic(uint64_t* intsToSwap, unsigned int num_points){
261  uint32_t* inputPtr = (uint32_t*)intsToSwap;
262  unsigned int point;
263  for(point = 0; point < num_points; point++){
264  uint32_t output1 = *inputPtr;
265  uint32_t output2 = inputPtr[1];
266 
267  output1 = (((output1 >> 24) & 0xff) | ((output1 >> 8) & 0x0000ff00) | ((output1 << 8) & 0x00ff0000) | ((output1 << 24) & 0xff000000));
268 
269  output2 = (((output2 >> 24) & 0xff) | ((output2 >> 8) & 0x0000ff00) | ((output2 << 8) & 0x00ff0000) | ((output2 << 24) & 0xff000000));
270 
271  *inputPtr++ = output2;
272  *inputPtr++ = output1;
273  }
274 }
275 #endif /* LV_HAVE_GENERIC */
276 
277 
278 
279 
280 #endif /* INCLUDED_volk_64u_byteswap_a_H */
unsigned char uint8_t
Definition: stdint.h:78
unsigned int uint32_t
Definition: stdint.h:80
unsigned __int64 uint64_t
Definition: stdint.h:90