mdds
block_util.hpp
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*************************************************************************
3  *
4  * Copyright (c) 2021 Kohei Yoshida
5  *
6  * Permission is hereby granted, free of charge, to any person
7  * obtaining a copy of this software and associated documentation
8  * files (the "Software"), to deal in the Software without
9  * restriction, including without limitation the rights to use,
10  * copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the
12  * Software is furnished to do so, subject to the following
13  * conditions:
14  *
15  * The above copyright notice and this permission notice shall be
16  * included in all copies or substantial portions of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
20  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
22  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
23  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25  * OTHER DEALINGS IN THE SOFTWARE.
26  *
27  ************************************************************************/
28 
29 #ifndef INCLUDED_MDDS_MULTI_TYPE_VECTOR_DIR_SOA_BLOCK_UTIL_HPP
30 #define INCLUDED_MDDS_MULTI_TYPE_VECTOR_DIR_SOA_BLOCK_UTIL_HPP
31 
32 #include "mdds/global.hpp"
33 #include "../types.hpp"
34 
35 #if defined(__SSE2__)
36 #include <emmintrin.h>
37 #endif
38 #if defined(__AVX2__)
39 #include <immintrin.h>
40 #endif
41 
42 namespace mdds { namespace mtv { namespace soa { namespace detail {
43 
44 template<typename Blks, lu_factor_t F>
46 {
47  void operator()(Blks& /*block_store*/, int64_t /*start_block_index*/, int64_t /*delta*/) const
48  {
49  static_assert(invalid_static_int<F>, "The loop-unrolling factor must be one of 0, 4, 8, 16, or 32.");
50  }
51 };
52 
53 template<typename Blks>
54 struct adjust_block_positions<Blks, lu_factor_t::none>
55 {
56  void operator()(Blks& block_store, int64_t start_block_index, int64_t delta) const
57  {
58  int64_t n = block_store.positions.size();
59 
60  if (start_block_index >= n)
61  return;
62 
63 #if MDDS_USE_OPENMP
64 #pragma omp parallel for
65 #endif
66  for (int64_t i = start_block_index; i < n; ++i)
67  block_store.positions[i] += delta;
68  }
69 };
70 
71 template<typename Blks>
72 struct adjust_block_positions<Blks, lu_factor_t::lu4>
73 {
74  void operator()(Blks& block_store, int64_t start_block_index, int64_t delta) const
75  {
76  int64_t n = block_store.positions.size();
77 
78  if (start_block_index >= n)
79  return;
80 
81  // Ensure that the section length is divisible by 4.
82  int64_t len = n - start_block_index;
83  int64_t rem = len & 3; // % 4
84  len -= rem;
85  len += start_block_index;
86 #if MDDS_USE_OPENMP
87 #pragma omp parallel for
88 #endif
89  for (int64_t i = start_block_index; i < len; i += 4)
90  {
91  block_store.positions[i + 0] += delta;
92  block_store.positions[i + 1] += delta;
93  block_store.positions[i + 2] += delta;
94  block_store.positions[i + 3] += delta;
95  }
96 
97  rem += len;
98  for (int64_t i = len; i < rem; ++i)
99  block_store.positions[i] += delta;
100  }
101 };
102 
103 template<typename Blks>
104 struct adjust_block_positions<Blks, lu_factor_t::lu8>
105 {
106  void operator()(Blks& block_store, int64_t start_block_index, int64_t delta) const
107  {
108  int64_t n = block_store.positions.size();
109 
110  if (start_block_index >= n)
111  return;
112 
113  // Ensure that the section length is divisible by 8.
114  int64_t len = n - start_block_index;
115  int64_t rem = len & 7; // % 8
116  len -= rem;
117  len += start_block_index;
118 #if MDDS_USE_OPENMP
119 #pragma omp parallel for
120 #endif
121  for (int64_t i = start_block_index; i < len; i += 8)
122  {
123  block_store.positions[i + 0] += delta;
124  block_store.positions[i + 1] += delta;
125  block_store.positions[i + 2] += delta;
126  block_store.positions[i + 3] += delta;
127  block_store.positions[i + 4] += delta;
128  block_store.positions[i + 5] += delta;
129  block_store.positions[i + 6] += delta;
130  block_store.positions[i + 7] += delta;
131  }
132 
133  rem += len;
134  for (int64_t i = len; i < rem; ++i)
135  block_store.positions[i] += delta;
136  }
137 };
138 
139 template<typename Blks>
140 struct adjust_block_positions<Blks, lu_factor_t::lu16>
141 {
142  void operator()(Blks& block_store, int64_t start_block_index, int64_t delta) const
143  {
144  int64_t n = block_store.positions.size();
145 
146  if (start_block_index >= n)
147  return;
148 
149  // Ensure that the section length is divisible by 16.
150  int64_t len = n - start_block_index;
151  int64_t rem = len & 15; // % 16
152  len -= rem;
153  len += start_block_index;
154 #if MDDS_USE_OPENMP
155 #pragma omp parallel for
156 #endif
157  for (int64_t i = start_block_index; i < len; i += 16)
158  {
159  block_store.positions[i + 0] += delta;
160  block_store.positions[i + 1] += delta;
161  block_store.positions[i + 2] += delta;
162  block_store.positions[i + 3] += delta;
163  block_store.positions[i + 4] += delta;
164  block_store.positions[i + 5] += delta;
165  block_store.positions[i + 6] += delta;
166  block_store.positions[i + 7] += delta;
167  block_store.positions[i + 8] += delta;
168  block_store.positions[i + 9] += delta;
169  block_store.positions[i + 10] += delta;
170  block_store.positions[i + 11] += delta;
171  block_store.positions[i + 12] += delta;
172  block_store.positions[i + 13] += delta;
173  block_store.positions[i + 14] += delta;
174  block_store.positions[i + 15] += delta;
175  }
176 
177  rem += len;
178  for (int64_t i = len; i < rem; ++i)
179  block_store.positions[i] += delta;
180  }
181 };
182 
183 template<typename Blks>
184 struct adjust_block_positions<Blks, lu_factor_t::lu32>
185 {
186  void operator()(Blks& block_store, int64_t start_block_index, int64_t delta) const
187  {
188  int64_t n = block_store.positions.size();
189 
190  if (start_block_index >= n)
191  return;
192 
193  // Ensure that the section length is divisible by 32.
194  int64_t len = n - start_block_index;
195  int64_t rem = len & 31; // % 32
196  len -= rem;
197  len += start_block_index;
198 #if MDDS_USE_OPENMP
199 #pragma omp parallel for
200 #endif
201  for (int64_t i = start_block_index; i < len; i += 32)
202  {
203  block_store.positions[i + 0] += delta;
204  block_store.positions[i + 1] += delta;
205  block_store.positions[i + 2] += delta;
206  block_store.positions[i + 3] += delta;
207  block_store.positions[i + 4] += delta;
208  block_store.positions[i + 5] += delta;
209  block_store.positions[i + 6] += delta;
210  block_store.positions[i + 7] += delta;
211  block_store.positions[i + 8] += delta;
212  block_store.positions[i + 9] += delta;
213  block_store.positions[i + 10] += delta;
214  block_store.positions[i + 11] += delta;
215  block_store.positions[i + 12] += delta;
216  block_store.positions[i + 13] += delta;
217  block_store.positions[i + 14] += delta;
218  block_store.positions[i + 15] += delta;
219  block_store.positions[i + 16] += delta;
220  block_store.positions[i + 17] += delta;
221  block_store.positions[i + 18] += delta;
222  block_store.positions[i + 19] += delta;
223  block_store.positions[i + 20] += delta;
224  block_store.positions[i + 21] += delta;
225  block_store.positions[i + 22] += delta;
226  block_store.positions[i + 23] += delta;
227  block_store.positions[i + 24] += delta;
228  block_store.positions[i + 25] += delta;
229  block_store.positions[i + 26] += delta;
230  block_store.positions[i + 27] += delta;
231  block_store.positions[i + 28] += delta;
232  block_store.positions[i + 29] += delta;
233  block_store.positions[i + 30] += delta;
234  block_store.positions[i + 31] += delta;
235  }
236 
237  rem += len;
238  for (int64_t i = len; i < rem; ++i)
239  block_store.positions[i] += delta;
240  }
241 };
242 
243 #if defined(__SSE2__)
244 
245 template<typename Blks>
246 struct adjust_block_positions<Blks, lu_factor_t::sse2_x64>
247 {
248  void operator()(Blks& block_store, int64_t start_block_index, int64_t delta) const
249  {
250  static_assert(
251  sizeof(typename decltype(block_store.positions)::value_type) == 8,
252  "This code works only when the position values are 64-bit wide.");
253 
254  int64_t n = block_store.positions.size();
255 
256  if (start_block_index >= n)
257  return;
258 
259  // Ensure that the section length is divisible by 2.
260  int64_t len = n - start_block_index;
261  bool odd = len & 1;
262  if (odd)
263  len -= 1;
264 
265  len += start_block_index;
266 
267  __m128i right = _mm_set_epi64x(delta, delta);
268 
269 #if MDDS_USE_OPENMP
270 #pragma omp parallel for
271 #endif
272  for (int64_t i = start_block_index; i < len; i += 2)
273  {
274  __m128i* dst = (__m128i*)&block_store.positions[i];
275  _mm_storeu_si128(dst, _mm_add_epi64(_mm_loadu_si128(dst), right));
276  }
277 
278  if (odd)
279  block_store.positions[len] += delta;
280  }
281 };
282 
283 template<typename Blks>
284 struct adjust_block_positions<Blks, lu_factor_t::sse2_x64_lu4>
285 {
286  void operator()(Blks& block_store, int64_t start_block_index, int64_t delta) const
287  {
288  static_assert(
289  sizeof(typename decltype(block_store.positions)::value_type) == 8,
290  "This code works only when the position values are 64-bit wide.");
291 
292  int64_t n = block_store.positions.size();
293 
294  if (start_block_index >= n)
295  return;
296 
297  // Ensure that the section length is divisible by 8.
298  int64_t len = n - start_block_index;
299  int64_t rem = len & 7; // % 8
300  len -= rem;
301  len += start_block_index;
302 
303  __m128i right = _mm_set_epi64x(delta, delta);
304 
305 #if MDDS_USE_OPENMP
306 #pragma omp parallel for
307 #endif
308  for (int64_t i = start_block_index; i < len; i += 8)
309  {
310  __m128i* dst0 = (__m128i*)&block_store.positions[i];
311  _mm_storeu_si128(dst0, _mm_add_epi64(_mm_loadu_si128(dst0), right));
312 
313  __m128i* dst2 = (__m128i*)&block_store.positions[i + 2];
314  _mm_storeu_si128(dst2, _mm_add_epi64(_mm_loadu_si128(dst2), right));
315 
316  __m128i* dst4 = (__m128i*)&block_store.positions[i + 4];
317  _mm_storeu_si128(dst4, _mm_add_epi64(_mm_loadu_si128(dst4), right));
318 
319  __m128i* dst6 = (__m128i*)&block_store.positions[i + 6];
320  _mm_storeu_si128(dst6, _mm_add_epi64(_mm_loadu_si128(dst6), right));
321  }
322 
323  rem += len;
324  for (int64_t i = len; i < rem; ++i)
325  block_store.positions[i] += delta;
326  }
327 };
328 
329 template<typename Blks>
330 struct adjust_block_positions<Blks, lu_factor_t::sse2_x64_lu8>
331 {
332  void operator()(Blks& block_store, int64_t start_block_index, int64_t delta) const
333  {
334  static_assert(
335  sizeof(typename decltype(block_store.positions)::value_type) == 8,
336  "This code works only when the position values are 64-bit wide.");
337 
338  int64_t n = block_store.positions.size();
339 
340  if (start_block_index >= n)
341  return;
342 
343  // Ensure that the section length is divisible by 16.
344  int64_t len = n - start_block_index;
345  int64_t rem = len & 15; // % 16
346  len -= rem;
347  len += start_block_index;
348 
349  __m128i right = _mm_set_epi64x(delta, delta);
350 
351 #if MDDS_USE_OPENMP
352 #pragma omp parallel for
353 #endif
354  for (int64_t i = start_block_index; i < len; i += 16)
355  {
356  __m128i* dst0 = (__m128i*)&block_store.positions[i];
357  _mm_storeu_si128(dst0, _mm_add_epi64(_mm_loadu_si128(dst0), right));
358 
359  __m128i* dst2 = (__m128i*)&block_store.positions[i + 2];
360  _mm_storeu_si128(dst2, _mm_add_epi64(_mm_loadu_si128(dst2), right));
361 
362  __m128i* dst4 = (__m128i*)&block_store.positions[i + 4];
363  _mm_storeu_si128(dst4, _mm_add_epi64(_mm_loadu_si128(dst4), right));
364 
365  __m128i* dst6 = (__m128i*)&block_store.positions[i + 6];
366  _mm_storeu_si128(dst6, _mm_add_epi64(_mm_loadu_si128(dst6), right));
367 
368  __m128i* dst8 = (__m128i*)&block_store.positions[i + 8];
369  _mm_storeu_si128(dst8, _mm_add_epi64(_mm_loadu_si128(dst8), right));
370 
371  __m128i* dst10 = (__m128i*)&block_store.positions[i + 10];
372  _mm_storeu_si128(dst10, _mm_add_epi64(_mm_loadu_si128(dst10), right));
373 
374  __m128i* dst12 = (__m128i*)&block_store.positions[i + 12];
375  _mm_storeu_si128(dst12, _mm_add_epi64(_mm_loadu_si128(dst12), right));
376 
377  __m128i* dst14 = (__m128i*)&block_store.positions[i + 14];
378  _mm_storeu_si128(dst14, _mm_add_epi64(_mm_loadu_si128(dst14), right));
379  }
380 
381  rem += len;
382  for (int64_t i = len; i < rem; ++i)
383  block_store.positions[i] += delta;
384  }
385 };
386 
387 template<typename Blks>
388 struct adjust_block_positions<Blks, lu_factor_t::sse2_x64_lu16>
389 {
390  void operator()(Blks& block_store, int64_t start_block_index, int64_t delta) const
391  {
392  static_assert(
393  sizeof(typename decltype(block_store.positions)::value_type) == 8,
394  "This code works only when the position values are 64-bit wide.");
395 
396  int64_t n = block_store.positions.size();
397 
398  if (start_block_index >= n)
399  return;
400 
401  // Ensure that the section length is divisible by 32.
402  int64_t len = n - start_block_index;
403  int64_t rem = len & 31; // % 32
404  len -= rem;
405  len += start_block_index;
406 
407  __m128i right = _mm_set_epi64x(delta, delta);
408 
409 #if MDDS_USE_OPENMP
410 #pragma omp parallel for
411 #endif
412  for (int64_t i = start_block_index; i < len; i += 32)
413  {
414  __m128i* dst0 = (__m128i*)&block_store.positions[i];
415  _mm_storeu_si128(dst0, _mm_add_epi64(_mm_loadu_si128(dst0), right));
416 
417  __m128i* dst2 = (__m128i*)&block_store.positions[i + 2];
418  _mm_storeu_si128(dst2, _mm_add_epi64(_mm_loadu_si128(dst2), right));
419 
420  __m128i* dst4 = (__m128i*)&block_store.positions[i + 4];
421  _mm_storeu_si128(dst4, _mm_add_epi64(_mm_loadu_si128(dst4), right));
422 
423  __m128i* dst6 = (__m128i*)&block_store.positions[i + 6];
424  _mm_storeu_si128(dst6, _mm_add_epi64(_mm_loadu_si128(dst6), right));
425 
426  __m128i* dst8 = (__m128i*)&block_store.positions[i + 8];
427  _mm_storeu_si128(dst8, _mm_add_epi64(_mm_loadu_si128(dst8), right));
428 
429  __m128i* dst10 = (__m128i*)&block_store.positions[i + 10];
430  _mm_storeu_si128(dst10, _mm_add_epi64(_mm_loadu_si128(dst10), right));
431 
432  __m128i* dst12 = (__m128i*)&block_store.positions[i + 12];
433  _mm_storeu_si128(dst12, _mm_add_epi64(_mm_loadu_si128(dst12), right));
434 
435  __m128i* dst14 = (__m128i*)&block_store.positions[i + 14];
436  _mm_storeu_si128(dst14, _mm_add_epi64(_mm_loadu_si128(dst14), right));
437 
438  __m128i* dst16 = (__m128i*)&block_store.positions[i + 16];
439  _mm_storeu_si128(dst16, _mm_add_epi64(_mm_loadu_si128(dst16), right));
440 
441  __m128i* dst18 = (__m128i*)&block_store.positions[i + 18];
442  _mm_storeu_si128(dst18, _mm_add_epi64(_mm_loadu_si128(dst18), right));
443 
444  __m128i* dst20 = (__m128i*)&block_store.positions[i + 20];
445  _mm_storeu_si128(dst20, _mm_add_epi64(_mm_loadu_si128(dst20), right));
446 
447  __m128i* dst22 = (__m128i*)&block_store.positions[i + 22];
448  _mm_storeu_si128(dst22, _mm_add_epi64(_mm_loadu_si128(dst22), right));
449 
450  __m128i* dst24 = (__m128i*)&block_store.positions[i + 24];
451  _mm_storeu_si128(dst24, _mm_add_epi64(_mm_loadu_si128(dst24), right));
452 
453  __m128i* dst26 = (__m128i*)&block_store.positions[i + 26];
454  _mm_storeu_si128(dst26, _mm_add_epi64(_mm_loadu_si128(dst26), right));
455 
456  __m128i* dst28 = (__m128i*)&block_store.positions[i + 28];
457  _mm_storeu_si128(dst28, _mm_add_epi64(_mm_loadu_si128(dst28), right));
458 
459  __m128i* dst30 = (__m128i*)&block_store.positions[i + 30];
460  _mm_storeu_si128(dst30, _mm_add_epi64(_mm_loadu_si128(dst30), right));
461  }
462 
463  rem += len;
464  for (int64_t i = len; i < rem; ++i)
465  block_store.positions[i] += delta;
466  }
467 };
468 
469 #endif // __SSE2__
470 
471 #if defined(__AVX2__)
472 
473 template<typename Blks>
474 struct adjust_block_positions<Blks, lu_factor_t::avx2_x64>
475 {
476  void operator()(Blks& block_store, int64_t start_block_index, int64_t delta) const
477  {
478  static_assert(
479  sizeof(typename decltype(block_store.positions)::value_type) == 8,
480  "This code works only when the position values are 64-bit wide.");
481 
482  int64_t n = block_store.positions.size();
483 
484  if (start_block_index >= n)
485  return;
486 
487  // Ensure that the section length is divisible by 4.
488  int64_t len = n - start_block_index;
489  int64_t rem = len & 3; // % 4
490  len -= rem;
491  len += start_block_index;
492 
493  __m256i right = _mm256_set1_epi64x(delta);
494 
495 #if MDDS_USE_OPENMP
496 #pragma omp parallel for
497 #endif
498  for (int64_t i = start_block_index; i < len; i += 4)
499  {
500  __m256i* dst = (__m256i*)&block_store.positions[i];
501  _mm256_storeu_si256(dst, _mm256_add_epi64(_mm256_loadu_si256(dst), right));
502  }
503 
504  rem += len;
505  for (int64_t i = len; i < rem; ++i)
506  block_store.positions[i] += delta;
507  }
508 };
509 
510 template<typename Blks>
511 struct adjust_block_positions<Blks, lu_factor_t::avx2_x64_lu4>
512 {
513  void operator()(Blks& block_store, int64_t start_block_index, int64_t delta) const
514  {
515  static_assert(
516  sizeof(typename decltype(block_store.positions)::value_type) == 8,
517  "This code works only when the position values are 64-bit wide.");
518 
519  int64_t n = block_store.positions.size();
520 
521  if (start_block_index >= n)
522  return;
523 
524  // Ensure that the section length is divisible by 16.
525  int64_t len = n - start_block_index;
526  int64_t rem = len & 15; // % 16
527  len -= rem;
528  len += start_block_index;
529 
530  __m256i right = _mm256_set1_epi64x(delta);
531 
532 #if MDDS_USE_OPENMP
533 #pragma omp parallel for
534 #endif
535  for (int64_t i = start_block_index; i < len; i += 16)
536  {
537  __m256i* dst = (__m256i*)&block_store.positions[i];
538  _mm256_storeu_si256(dst, _mm256_add_epi64(_mm256_loadu_si256(dst), right));
539 
540  __m256i* dst4 = (__m256i*)&block_store.positions[i + 4];
541  _mm256_storeu_si256(dst4, _mm256_add_epi64(_mm256_loadu_si256(dst4), right));
542 
543  __m256i* dst8 = (__m256i*)&block_store.positions[i + 8];
544  _mm256_storeu_si256(dst8, _mm256_add_epi64(_mm256_loadu_si256(dst8), right));
545 
546  __m256i* dst12 = (__m256i*)&block_store.positions[i + 12];
547  _mm256_storeu_si256(dst12, _mm256_add_epi64(_mm256_loadu_si256(dst12), right));
548  }
549 
550  rem += len;
551  for (int64_t i = len; i < rem; ++i)
552  block_store.positions[i] += delta;
553  }
554 };
555 
556 template<typename Blks>
557 struct adjust_block_positions<Blks, lu_factor_t::avx2_x64_lu8>
558 {
559  void operator()(Blks& block_store, int64_t start_block_index, int64_t delta) const
560  {
561  static_assert(
562  sizeof(typename decltype(block_store.positions)::value_type) == 8,
563  "This code works only when the position values are 64-bit wide.");
564 
565  int64_t n = block_store.positions.size();
566 
567  if (start_block_index >= n)
568  return;
569 
570  // Ensure that the section length is divisible by 16.
571  int64_t len = n - start_block_index;
572  int64_t rem = len & 31; // % 32
573  len -= rem;
574  len += start_block_index;
575 
576  __m256i right = _mm256_set1_epi64x(delta);
577 
578 #if MDDS_USE_OPENMP
579 #pragma omp parallel for
580 #endif
581  for (int64_t i = start_block_index; i < len; i += 32)
582  {
583  __m256i* dst = (__m256i*)&block_store.positions[i];
584  _mm256_storeu_si256(dst, _mm256_add_epi64(_mm256_loadu_si256(dst), right));
585 
586  __m256i* dst4 = (__m256i*)&block_store.positions[i + 4];
587  _mm256_storeu_si256(dst4, _mm256_add_epi64(_mm256_loadu_si256(dst4), right));
588 
589  __m256i* dst8 = (__m256i*)&block_store.positions[i + 8];
590  _mm256_storeu_si256(dst8, _mm256_add_epi64(_mm256_loadu_si256(dst8), right));
591 
592  __m256i* dst12 = (__m256i*)&block_store.positions[i + 12];
593  _mm256_storeu_si256(dst12, _mm256_add_epi64(_mm256_loadu_si256(dst12), right));
594 
595  __m256i* dst16 = (__m256i*)&block_store.positions[i + 16];
596  _mm256_storeu_si256(dst16, _mm256_add_epi64(_mm256_loadu_si256(dst16), right));
597 
598  __m256i* dst20 = (__m256i*)&block_store.positions[i + 20];
599  _mm256_storeu_si256(dst20, _mm256_add_epi64(_mm256_loadu_si256(dst20), right));
600 
601  __m256i* dst24 = (__m256i*)&block_store.positions[i + 24];
602  _mm256_storeu_si256(dst24, _mm256_add_epi64(_mm256_loadu_si256(dst24), right));
603 
604  __m256i* dst28 = (__m256i*)&block_store.positions[i + 28];
605  _mm256_storeu_si256(dst28, _mm256_add_epi64(_mm256_loadu_si256(dst28), right));
606  }
607 
608  rem += len;
609  for (int64_t i = len; i < rem; ++i)
610  block_store.positions[i] += delta;
611  }
612 };
613 
614 #endif // __AVX2__
615 
616 }}}} // namespace mdds::mtv::soa::detail
617 
618 #endif
619 
620 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
Definition: flat_segment_tree.hpp:46