GNU Radio Manual and C++ API Reference  3.7.7
The Free & Open Software Radio Ecosystem
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
volk_16u_byteswap.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
23 /*!
24  * \page volk_16u_byteswap
25  *
26  * \b Overview
27  *
28  * Byteswaps (in-place) an aligned vector of int16_t's.
29  *
30  * <b>Dispatcher Prototype</b>
31  * \code
32  * void volk_16u_byteswap(uint16_t* intsToSwap, unsigned int num_points)
33  * \endcode
34  *
35  * \b Inputs
36  * \li intsToSwap: The vector of data to byte swap.
37  * \li num_points: The number of data points.
38  *
39  * \b Outputs
40  * \li intsToSwap: returns as an in-place calculation.
41  *
42  * \b Example
43  * \code
44  * int N = 10000;
45  *
46  * <FIXME>
47  *
48  * volk_16u_byteswap(x, N);
49  *
50  * \endcode
51  */
52 
53 #ifndef INCLUDED_volk_16u_byteswap_u_H
54 #define INCLUDED_volk_16u_byteswap_u_H
55 
56 #include <inttypes.h>
57 #include <stdio.h>
58 
59 #ifdef LV_HAVE_SSE2
60 #include <emmintrin.h>
61 
62 static inline void volk_16u_byteswap_u_sse2(uint16_t* intsToSwap, unsigned int num_points){
63  unsigned int number = 0;
64  uint16_t* inputPtr = intsToSwap;
65  __m128i input, left, right, output;
66 
67  const unsigned int eighthPoints = num_points / 8;
68  for(;number < eighthPoints; number++){
69  // Load the 16t values, increment inputPtr later since we're doing it in-place.
70  input = _mm_loadu_si128((__m128i*)inputPtr);
71  // Do the two shifts
72  left = _mm_slli_epi16(input, 8);
73  right = _mm_srli_epi16(input, 8);
74  // Or the left and right halves together
75  output = _mm_or_si128(left, right);
76  // Store the results
77  _mm_storeu_si128((__m128i*)inputPtr, output);
78  inputPtr += 8;
79  }
80 
81  // Byteswap any remaining points:
82  number = eighthPoints*8;
83  for(; number < num_points; number++){
84  uint16_t outputVal = *inputPtr;
85  outputVal = (((outputVal >> 8) & 0xff) | ((outputVal << 8) & 0xff00));
86  *inputPtr = outputVal;
87  inputPtr++;
88  }
89 }
90 #endif /* LV_HAVE_SSE2 */
91 
92 #ifdef LV_HAVE_GENERIC
93 
94 static inline void volk_16u_byteswap_generic(uint16_t* intsToSwap, unsigned int num_points){
95  unsigned int point;
96  uint16_t* inputPtr = intsToSwap;
97  for(point = 0; point < num_points; point++){
98  uint16_t output = *inputPtr;
99  output = (((output >> 8) & 0xff) | ((output << 8) & 0xff00));
100  *inputPtr = output;
101  inputPtr++;
102  }
103 }
104 #endif /* LV_HAVE_GENERIC */
105 
106 #endif /* INCLUDED_volk_16u_byteswap_u_H */
107 #ifndef INCLUDED_volk_16u_byteswap_a_H
108 #define INCLUDED_volk_16u_byteswap_a_H
109 
110 #include <inttypes.h>
111 #include <stdio.h>
112 
113 #ifdef LV_HAVE_SSE2
114 #include <emmintrin.h>
115 
116 static inline void volk_16u_byteswap_a_sse2(uint16_t* intsToSwap, unsigned int num_points){
117  unsigned int number = 0;
118  uint16_t* inputPtr = intsToSwap;
119  __m128i input, left, right, output;
120 
121  const unsigned int eighthPoints = num_points / 8;
122  for(;number < eighthPoints; number++){
123  // Load the 16t values, increment inputPtr later since we're doing it in-place.
124  input = _mm_load_si128((__m128i*)inputPtr);
125  // Do the two shifts
126  left = _mm_slli_epi16(input, 8);
127  right = _mm_srli_epi16(input, 8);
128  // Or the left and right halves together
129  output = _mm_or_si128(left, right);
130  // Store the results
131  _mm_store_si128((__m128i*)inputPtr, output);
132  inputPtr += 8;
133  }
134 
135 
136  // Byteswap any remaining points:
137  number = eighthPoints*8;
138  for(; number < num_points; number++){
139  uint16_t outputVal = *inputPtr;
140  outputVal = (((outputVal >> 8) & 0xff) | ((outputVal << 8) & 0xff00));
141  *inputPtr = outputVal;
142  inputPtr++;
143  }
144 }
145 #endif /* LV_HAVE_SSE2 */
146 
147 #ifdef LV_HAVE_NEON
148 #include <arm_neon.h>
149 
150 static inline void volk_16u_byteswap_neon(uint16_t* intsToSwap, unsigned int num_points){
151  unsigned int number;
152  unsigned int eighth_points = num_points / 8;
153  uint16x8_t input, output;
154  uint16_t* inputPtr = intsToSwap;
155 
156  for(number = 0; number < eighth_points; number++) {
157  input = vld1q_u16(inputPtr);
158  output = vsriq_n_u16(output, input, 8);
159  output = vsliq_n_u16(output, input, 8);
160  vst1q_u16(inputPtr, output);
161  inputPtr += 8;
162  }
163 
164  for(number = eighth_points * 8; number < num_points; number++){
165  uint16_t output = *inputPtr;
166  output = (((output >> 8) & 0xff) | ((output << 8) & 0xff00));
167  *inputPtr = output;
168  inputPtr++;
169  }
170 }
171 #endif /* LV_HAVE_NEON */
172 
173 #ifdef LV_HAVE_NEON
174 #include <arm_neon.h>
175 
176 static inline void volk_16u_byteswap_neon_table(uint16_t* intsToSwap, unsigned int num_points){
177  uint16_t* inputPtr = intsToSwap;
178  unsigned int number = 0;
179  unsigned int n16points = num_points / 16;
180 
181  uint8x8x4_t input_table;
182  uint8x8_t int_lookup01, int_lookup23, int_lookup45, int_lookup67;
183  uint8x8_t swapped_int01, swapped_int23, swapped_int45, swapped_int67;
184 
185  /* these magic numbers are used as byte-indeces in the LUT.
186  they are pre-computed to save time. A simple C program
187  can calculate them; for example for lookup01:
188  uint8_t chars[8] = {24, 16, 8, 0, 25, 17, 9, 1};
189  for(ii=0; ii < 8; ++ii) {
190  index += ((uint64_t)(*(chars+ii))) << (ii*8);
191  }
192  */
193  int_lookup01 = vcreate_u8(1232017111498883080);
194  int_lookup23 = vcreate_u8(1376697457175036426);
195  int_lookup45 = vcreate_u8(1521377802851189772);
196  int_lookup67 = vcreate_u8(1666058148527343118);
197 
198  for(number = 0; number < n16points; ++number){
199  input_table = vld4_u8((uint8_t*) inputPtr);
200  swapped_int01 = vtbl4_u8(input_table, int_lookup01);
201  swapped_int23 = vtbl4_u8(input_table, int_lookup23);
202  swapped_int45 = vtbl4_u8(input_table, int_lookup45);
203  swapped_int67 = vtbl4_u8(input_table, int_lookup67);
204  vst1_u8((uint8_t*)inputPtr, swapped_int01);
205  vst1_u8((uint8_t*)(inputPtr+4), swapped_int23);
206  vst1_u8((uint8_t*)(inputPtr+8), swapped_int45);
207  vst1_u8((uint8_t*)(inputPtr+12), swapped_int67);
208 
209  inputPtr += 16;
210  }
211 
212  for(number = n16points * 16; number < num_points; ++number){
213  uint16_t output = *inputPtr;
214  output = (((output >> 8) & 0xff) | ((output << 8) & 0xff00));
215  *inputPtr = output;
216  inputPtr++;
217  }
218 }
219 #endif /* LV_HAVE_NEON */
220 
221 #ifdef LV_HAVE_GENERIC
222 
223 static inline void volk_16u_byteswap_a_generic(uint16_t* intsToSwap, unsigned int num_points){
224  unsigned int point;
225  uint16_t* inputPtr = intsToSwap;
226  for(point = 0; point < num_points; point++){
227  uint16_t output = *inputPtr;
228  output = (((output >> 8) & 0xff) | ((output << 8) & 0xff00));
229  *inputPtr = output;
230  inputPtr++;
231  }
232 }
233 #endif /* LV_HAVE_GENERIC */
234 
235 #ifdef LV_HAVE_ORC
236 
237 extern void volk_16u_byteswap_a_orc_impl(uint16_t* intsToSwap, unsigned int num_points);
238 static inline void volk_16u_byteswap_u_orc(uint16_t* intsToSwap, unsigned int num_points){
239  volk_16u_byteswap_a_orc_impl(intsToSwap, num_points);
240 }
241 #endif /* LV_HAVE_ORC */
242 
243 
244 #endif /* INCLUDED_volk_16u_byteswap_a_H */
unsigned short uint16_t
Definition: stdint.h:79
unsigned char uint8_t
Definition: stdint.h:78