BitMagic-C++
bmutil.h
Go to the documentation of this file.
1#ifndef BMUTIL__H__INCLUDED__
2#define BMUTIL__H__INCLUDED__
3/*
4Copyright(c) 2002-2017 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
5
6Licensed under the Apache License, Version 2.0 (the "License");
7you may not use this file except in compliance with the License.
8You may obtain a copy of the License at
9
10 http://www.apache.org/licenses/LICENSE-2.0
11
12Unless required by applicable law or agreed to in writing, software
13distributed under the License is distributed on an "AS IS" BASIS,
14WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15See the License for the specific language governing permissions and
16limitations under the License.
17
18For more information please visit: http://bitmagic.io
19*/
20
21/*! \file bmutil.h
22 \brief Bit manipulation primitives (internal)
23*/
24
25
26#include "bmdef.h"
27#include "bmconst.h"
28
29#if defined(__arm64__) || defined(__arm__)
30//#include "sse2neon.h"
31#else
32 #if defined(_M_AMD64) || defined(_M_X64)
33 #include <intrin.h>
34 #elif defined(__x86_64__)
35 #include <x86intrin.h>
36 #endif
37#endif
38
39#ifdef __GNUG__
40#pragma GCC diagnostic push
41#pragma GCC diagnostic ignored "-Wconversion"
42#endif
43
44#ifdef _MSC_VER
45#pragma warning( push )
46#pragma warning( disable : 4146)
47#endif
48
49
50namespace bm
51{
52
53 /**
54 bit-block array wrapped into union for correct interpretation of
55 32-bit vs 64-bit access vs SIMD
56 @internal
57 */
59 {
61 {
64
65#if defined(BMAVX512OPT)
67#endif
68#if defined(BMAVX2OPT)
70#endif
71#if defined(BMSSE2OPT) || defined(BMSSE42OPT)
73#endif
74 } b_;
75
76 operator bm::word_t*() { return &(b_.w32[0]); }
77 operator const bm::word_t*() const { return &(b_.w32[0]); }
78 explicit operator bm::id64_t*() { return &b_.w64[0]; }
79 explicit operator const bm::id64_t*() const { return &b_.w64[0]; }
80#ifdef BMAVX512OPT
81 explicit operator __m512i*() { return &b_.w512[0]; }
82 explicit operator const __m512i*() const { return &b_.w512[0]; }
83#endif
84#ifdef BMAVX2OPT
85 explicit operator __m256i*() { return &b_.w256[0]; }
86 explicit operator const __m256i*() const { return &b_.w256[0]; }
87#endif
88#if defined(BMSSE2OPT) || defined(BMSSE42OPT)
89 explicit operator __m128i*() { return &b_.w128[0]; }
90 explicit operator const __m128i*() const { return &b_.w128[0]; }
91#endif
92
93 const bm::word_t* begin() const { return (b_.w32 + 0); }
94 bm::word_t* begin() { return (b_.w32 + 0); }
95 const bm::word_t* end() const { return (b_.w32 + bm::set_block_size); }
96 bm::word_t* end() { return (b_.w32 + bm::set_block_size); }
97 };
98
99/**
100 Get minimum of 2 values
101*/
102template<typename T>
104{
105 return v1 < v2 ? v1 : v2;
106}
107
108/**
109 \brief ad-hoc conditional expressions
110 \internal
111*/
112template <bool b> struct conditional
113{
114 static bool test() { return true; }
115};
116template <> struct conditional<false>
117{
118 static bool test() { return false; }
119};
120
121
122/**
123 Fast loop-less function to find LOG2
124*/
125template<typename T>
127{
128 unsigned int l = 0;
129
130 if (x >= 1<<16) { x = (T)(x >> 16); l |= 16; }
131 if (x >= 1<<8) { x = (T)(x >> 8); l |= 8; }
132 if (x >= 1<<4) { x = (T)(x >> 4); l |= 4; }
133 if (x >= 1<<2) { x = (T)(x >> 2); l |= 2; }
134 if (x >= 1<<1) l |=1;
135 return (T)l;
136}
137
138template<>
141{
142 unsigned int l = 0;
143 if (x >= 1<<8) { x = (bm::gap_word_t)(x >> 8); l |= 8; }
144 if (x >= 1<<4) { x = (bm::gap_word_t)(x >> 4); l |= 4; }
145 if (x >= 1<<2) { x = (bm::gap_word_t)(x >> 2); l |= 2; }
146 if (x >= 1<<1) l |=1;
147 return (bm::gap_word_t)l;
148}
149
150/**
151 Mini auto-pointer for internal memory management
152 @internal
153*/
154template<class T>
156{
157public:
158 ptr_guard(T* p) BMNOEXCEPT : ptr_(p) {}
159 ~ptr_guard() { delete ptr_; }
160private:
161 ptr_guard(const ptr_guard<T>& p);
162 ptr_guard& operator=(const ptr_guard<T>& p);
163private:
164 T* ptr_;
165};
166
167/**
168 Portable LZCNT with (uses minimal LUT)
169 @ingroup bitfunc
170 @internal
171*/
173unsigned count_leading_zeros(unsigned x) BMNOEXCEPT
174{
175 unsigned n =
176 (x >= (1U << 16)) ?
177 ((x >= (1U << 24)) ? ((x >= (1 << 28)) ? 28u : 24u) : ((x >= (1U << 20)) ? 20u : 16u))
178 :
179 ((x >= (1U << 8)) ? ((x >= (1U << 12)) ? 12u : 8u) : ((x >= (1U << 4)) ? 4u : 0u));
180 return unsigned(bm::lzcnt_table<true>::_lut[x >> n]) - n;
181}
182
183/**
184 Portable TZCNT with (uses 37-LUT)
185 @ingroup bitfunc
186 @internal
187*/
190{
191 // (v & -v) isolates the last set bit
192 return unsigned(bm::tzcnt_table<true>::_lut[(-v & v) % 37]);
193}
194
195/**
196 Lookup table based integer LOG2
197*/
198template<typename T>
200{
201 unsigned l = 0;
202 if (x & 0xffff0000)
203 {
204 l += 16; x >>= 16;
205 }
206
207 if (x & 0xff00)
208 {
209 l += 8; x >>= 8;
210 }
211 return l + T(bm::first_bit_table<true>::_idx[x]);
212}
213
214/**
215 Lookup table based short integer LOG2
216*/
217template<>
227
228
229// if we are running on x86 CPU we can use inline ASM
230
231#if defined(BM_x86) && !(defined(__arm__) || defined(__aarch64__))
232#ifdef __GNUG__
233
235unsigned bsf_asm32(unsigned int v) BMNOEXCEPT
236{
237 unsigned r;
238 asm volatile(" bsfl %1, %0": "=r"(r): "rm"(v) );
239 return r;
240}
241
243unsigned bsr_asm32(unsigned int v) BMNOEXCEPT
244{
245 unsigned r;
246 asm volatile(" bsrl %1, %0": "=r"(r): "rm"(v) );
247 return r;
248}
249
250#endif // __GNUG__
251
252#ifdef _MSC_VER
253
254#if defined(_M_AMD64) || defined(_M_X64) // inline assembly not supported
255
257unsigned int bsr_asm32(unsigned int value) BMNOEXCEPT
258{
259 unsigned long r;
260 _BitScanReverse(&r, value);
261 return r;
262}
263
265unsigned int bsf_asm32(unsigned int value) BMNOEXCEPT
266{
267 unsigned long r;
268 _BitScanForward(&r, value);
269 return r;
270}
271
272#else
273
275unsigned int bsr_asm32(unsigned int value) BMNOEXCEPT
276{
277 __asm bsr eax, value
278}
279
281unsigned int bsf_asm32(unsigned int value) BMNOEXCEPT
282{
283 __asm bsf eax, value
284}
285
286#endif
287
288#endif // _MSC_VER
289
290#endif // BM_x86
291
292
293// From:
294// http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.37.8562
295//
296template<typename T>
298{
299 return
300 DeBruijn_bit_position<true>::_multiply[(((v & -v) * 0x077CB531U)) >> 27];
301}
302
304unsigned bit_scan_reverse32(unsigned w) BMNOEXCEPT
305{
306 BM_ASSERT(w);
307#if defined(BM_USE_GCC_BUILD) || (defined(__GNUG__) && (defined(__arm__) || defined(__aarch64__)))
308 return (unsigned) (31 - __builtin_clz(w));
309#else
310# if defined(BM_x86) && (defined(__GNUG__) || defined(_MSC_VER))
311 return bm::bsr_asm32(w);
312# else
314# endif
315#endif
316}
317
319unsigned bit_scan_forward32(unsigned w) BMNOEXCEPT
320{
321 BM_ASSERT(w);
322#if defined(BM_USE_GCC_BUILD) || (defined(__GNUG__) && (defined(__arm__) || defined(__aarch64__)))
323 return (unsigned) __builtin_ctz(w);
324#else
325# if defined(BM_x86) && (defined(__GNUG__) || defined(_MSC_VER))
326 return bm::bsf_asm32(w);
327# else
328 return bit_scan_fwd(w);
329# endif
330#endif
331}
332
333
335unsigned long long bmi_bslr_u64(unsigned long long w) BMNOEXCEPT
336{
337#if defined(BMAVX2OPT) || defined (BMAVX512OPT)
338 return _blsr_u64(w);
339#else
340 return w & (w - 1);
341#endif
342}
343
345unsigned long long bmi_blsi_u64(unsigned long long w)
346{
347#if defined(BMAVX2OPT) || defined (BMAVX512OPT)
348 return _blsi_u64(w);
349#else
350 return w & (-w);
351#endif
352}
353
354/// 32-bit bit-scan reverse
355inline
357{
358 BM_ASSERT(w);
359#if defined(BMAVX2OPT) || defined (BMAVX512OPT)
360 return (unsigned)_lzcnt_u32(w);
361#else
362 #if defined(BM_USE_GCC_BUILD) || defined(__GNUG__)
363 return (unsigned) __builtin_clz(w);
364 #else
365 return bm::count_leading_zeros(w); // portable
366 #endif
367#endif
368}
369
370
371/// 64-bit bit-scan reverse
372inline
374{
375 BM_ASSERT(w);
376#if defined(BMAVX2OPT) || defined (BMAVX512OPT)
377 return (unsigned)_lzcnt_u64(w);
378#else
379 #if defined(BM_USE_GCC_BUILD) || (defined(__GNUG__) && (defined(__arm__) || defined(__aarch64__)))
380 return (unsigned) __builtin_clzll(w);
381 #else
382 unsigned z;
383 unsigned w1 = unsigned(w >> 32);
384 if (!w1)
385 {
386 z = 32;
387 w1 = unsigned(w);
388 z += 31 - bm::bit_scan_reverse32(w1);
389 }
390 else
391 {
392 z = 31 - bm::bit_scan_reverse32(w1);
393 }
394 return z;
395 #endif
396#endif
397}
398
399/// 32-bit bit-scan fwd
400inline
402{
403 BM_ASSERT(w);
404
405#if defined(BMAVX2OPT) || defined (BMAVX512OPT)
406 return (unsigned)_tzcnt_u32(w);
407#else
408 #if defined(BM_USE_GCC_BUILD) || (defined(__GNUG__) && (defined(__arm__) || defined(__aarch64__)))
409 return (unsigned) __builtin_ctz(w);
410 #else
411 return bm::bit_scan_forward32(w);
412 #endif
413#endif
414}
415
416
417/// 64-bit bit-scan fwd
418inline
420{
421 BM_ASSERT(w);
422
423#if defined(BMAVX2OPT) || defined (BMAVX512OPT)
424 return (unsigned)_tzcnt_u64(w);
425#else
426 #if defined(BM_USE_GCC_BUILD) || (defined(__GNUG__) && (defined(__arm__) || defined(__aarch64__)))
427 return (unsigned) __builtin_ctzll(w);
428 #else
429 unsigned z;
430 unsigned w1 = unsigned(w);
431 if (!w1)
432 {
433 z = 32;
434 w1 = unsigned(w >> 32);
435 z += bm::bit_scan_forward32(w1);
436 }
437 else
438 {
440 }
441 return z;
442 #endif
443#endif
444}
445
446
447
448/*!
449 Returns BSR value
450 @ingroup bitfunc
451*/
452template <class T>
454{
455 BM_ASSERT(value);
456
457 if (bm::conditional<sizeof(T)==8>::test())
458 {
459 #if defined(BM_USE_GCC_BUILD) || (defined(__GNUG__) && (defined(__arm__) || defined(__aarch64__)))
460 return (unsigned) (63 - __builtin_clzll(value));
461 #else
462 bm::id64_t v8 = value;
463 v8 >>= 32;
464 unsigned v = (unsigned)v8;
465 if (v)
466 {
468 return v + 32;
469 }
470 #endif
471 }
472 return bm::bit_scan_reverse32((unsigned)value);
473}
474
475/*! \brief and functor
476 \internal
477 */
479{
480 static
481 BMFORCEINLINE unsigned op(unsigned v1, unsigned v2) BMNOEXCEPT2
482 { return v1 & v2; }
483};
484/*! \brief xor functor
485 \internal
486 */
488{
489 static
490 BMFORCEINLINE unsigned op(unsigned v1, unsigned v2) BMNOEXCEPT2
491 { return v1 ^ v2; }
492};
493/*! \brief or functor
494 \internal
495 */
497{
498 static
499 BMFORCEINLINE unsigned op(unsigned v1, unsigned v2) BMNOEXCEPT2
500 { return v1 | v2; }
501};
502/*! \brief sub functor
503 \internal
504 */
506{
507 static
508 BMFORCEINLINE unsigned op(unsigned v1, unsigned v2) BMNOEXCEPT2
509 { return v1 & ~v2; }
510};
511
513unsigned mask_r_u32(unsigned nbit) BMNOEXCEPT
514{
515 BM_ASSERT(nbit < 32);
516 unsigned m = (~0u << nbit);
518 return m;
519}
520
522unsigned mask_l_u32(unsigned nbit) BMNOEXCEPT
523{
524 BM_ASSERT(nbit < 32);
525 unsigned m = ~0u >> (31 - nbit);
527 return m;
528}
529
530/// XOR swap two variables
531///
532/// @internal
533template<typename W>
535{
536 BM_ASSERT(&x != &y);
537 x ^= y; y ^= x; x ^= y;
538}
539
540
541#ifdef __GNUG__
542#pragma GCC diagnostic pop
543#endif
544#ifdef _MSC_VER
545#pragma warning( pop )
546#endif
547
548/**
549 Сompute mask of bytes presense in 64-bit word
550
551 @param w - [in] input 64-bit word
552 @return mask with 8 bits
553 @internal
554 */
555inline
556unsigned compute_h64_mask(unsigned long long w) BMNOEXCEPT
557{
558 unsigned h_mask = 0;
559 for (unsigned i = 0; w && (i < 8); ++i, w >>= 8)
560 {
561 if ((unsigned char) w)
562 h_mask |= (1u<<i);
563 } // for
564 return h_mask;
565}
566
567/**
568 Returns true if INT64 contains 0 octet
569 */
572{
573 return (v - 0x0101010101010101ULL) & ~(v) & 0x8080808080808080ULL;
574}
575
576
577/*!
578 Returns bit count
579 @ingroup bitfunc
580*/
583{
584#if defined(BMSSE42OPT) || defined(BMAVX2OPT) || defined(BMAVX512OPT)
585 return bm::id_t(_mm_popcnt_u32(w));
586#else
587 #if defined(BM_USE_GCC_BUILD)
588 return (bm::id_t)__builtin_popcount(w);
589 #else
590 return
591 bm::bit_count_table<true>::_count[(unsigned char)(w)] +
592 bm::bit_count_table<true>::_count[(unsigned char)((w) >> 8)] +
593 bm::bit_count_table<true>::_count[(unsigned char)((w) >> 16)] +
594 bm::bit_count_table<true>::_count[(unsigned char)((w) >> 24)];
595 #endif
596#endif
597}
598
599
600/*!
601 Function calculates number of 1 bits in 64-bit word.
602 @ingroup bitfunc
603*/
606{
607#if defined(BMSSE42OPT) || defined(BMAVX2OPT) || defined(BMAVX512OPT)
608 #if defined(BM64_SSE4) || defined(BM64_AVX2) || defined(BM64_AVX512)
609 return unsigned(_mm_popcnt_u64(x));
610 #else // 32-bit
611 return _mm_popcnt_u32(x >> 32) + _mm_popcnt_u32((unsigned)x);
612 #endif
613#else
614 #if defined(BM_USE_GCC_BUILD) || defined(__arm64__)
615 return (unsigned)__builtin_popcountll(x);
616 #else
617 #if (defined(__arm__)) // 32-bit
618 return bm::word_bitcount(x >> 32) + bm::word_bitcount((unsigned)x);
619 #else
620 x = x - ((x >> 1) & 0x5555555555555555);
621 x = (x & 0x3333333333333333) + ((x >> 2) & 0x3333333333333333);
622 x = (x + (x >> 4)) & 0x0F0F0F0F0F0F0F0F;
623 x = x + (x >> 8);
624 x = x + (x >> 16);
625 x = x + (x >> 32);
626 return x & 0xFF;
627 #endif
628 #endif
629#endif
630}
631
632/**
633 Check pointer alignment
634 @internal
635 */
636template< typename T >
638{
639#if defined (BM_ALLOC_ALIGN)
640 return !(reinterpret_cast<unsigned int*>(p) % BM_ALLOC_ALIGN);
641#else
642 (void)p;
643 return true;
644#endif
645}
646
647
648} // bm
649
650#endif
#define BM_ALLOC_ALIGN
Definition bmalloc.h:32
Constants, lookup tables and typedefs.
Definitions(internal).
#define BMNOEXCEPT
Definition bmdef.h:82
#define BMFORCEINLINE
Definition bmdef.h:213
#define BM_ASSERT
Definition bmdef.h:139
#define BMNOEXCEPT2
Definition bmdef.h:85
#define BM_VECT_ALIGN
Definition bmdef.h:320
Mini auto-pointer for internal memory management.
Definition bmutil.h:156
ptr_guard(T *p) BMNOEXCEPT
Definition bmutil.h:158
BMFORCEINLINE bm::id_t word_bitcount(bm::id_t w) BMNOEXCEPT
Definition bmutil.h:582
BMFORCEINLINE unsigned count_leading_zeros(unsigned x) BMNOEXCEPT
Portable LZCNT with (uses minimal LUT).
Definition bmutil.h:173
BMFORCEINLINE unsigned word_bitcount64(bm::id64_t x) BMNOEXCEPT
Definition bmutil.h:605
unsigned bit_scan_reverse(T value) BMNOEXCEPT
Definition bmutil.h:453
BMFORCEINLINE unsigned count_trailing_zeros(unsigned v) BMNOEXCEPT
Portable TZCNT with (uses 37-LUT).
Definition bmutil.h:189
Definition bm.h:78
unsigned int word_t
Definition bmconst.h:39
unsigned count_leading_zeros_u32(unsigned w) BMNOEXCEPT
32-bit bit-scan reverse
Definition bmutil.h:356
BMFORCEINLINE unsigned bit_scan_forward32(unsigned w) BMNOEXCEPT
Definition bmutil.h:319
BMFORCEINLINE T ilog2(T x) BMNOEXCEPT
Fast loop-less function to find LOG2.
Definition bmutil.h:126
BMFORCEINLINE bm::gap_word_t ilog2_LUT< bm::gap_word_t >(bm::gap_word_t x) BMNOEXCEPT
Lookup table based short integer LOG2.
Definition bmutil.h:218
unsigned compute_h64_mask(unsigned long long w) BMNOEXCEPT
Сompute mask of bytes presense in 64-bit word.
Definition bmutil.h:556
BMFORCEINLINE unsigned mask_l_u32(unsigned nbit) BMNOEXCEPT
Definition bmutil.h:522
BMFORCEINLINE T bit_scan_fwd(T v) BMNOEXCEPT
Definition bmutil.h:297
BMFORCEINLINE unsigned bit_scan_reverse32(unsigned w) BMNOEXCEPT
Definition bmutil.h:304
BMFORCEINLINE T ilog2_LUT(T x) BMNOEXCEPT
Lookup table based integer LOG2.
Definition bmutil.h:199
BMFORCEINLINE bool has_zero_byte_u64(bm::id64_t v) BMNOEXCEPT
Returns true if INT64 contains 0 octet.
Definition bmutil.h:571
BMFORCEINLINE void xor_swap(W &x, W &y) BMNOEXCEPT
XOR swap two variables.
Definition bmutil.h:534
BMFORCEINLINE T min_value(T v1, T v2) BMNOEXCEPT
Get minimum of 2 values.
Definition bmutil.h:103
unsigned count_leading_zeros_u64(bm::id64_t w) BMNOEXCEPT
64-bit bit-scan reverse
Definition bmutil.h:373
const unsigned set_block_size
Definition bmconst.h:55
unsigned long long int id64_t
Definition bmconst.h:35
unsigned int id_t
Definition bmconst.h:38
BMFORCEINLINE unsigned mask_r_u32(unsigned nbit) BMNOEXCEPT
Definition bmutil.h:513
BMFORCEINLINE unsigned long long bmi_bslr_u64(unsigned long long w) BMNOEXCEPT
Definition bmutil.h:335
unsigned short gap_word_t
Definition bmconst.h:78
unsigned count_trailing_zeros_u64(bm::id64_t w) BMNOEXCEPT
64-bit bit-scan fwd
Definition bmutil.h:419
bool is_aligned(T *p) BMNOEXCEPT
Check pointer alignment.
Definition bmutil.h:637
BMFORCEINLINE unsigned long long bmi_blsi_u64(unsigned long long w)
Definition bmutil.h:345
unsigned count_trailing_zeros_u32(unsigned w) BMNOEXCEPT
32-bit bit-scan fwd
Definition bmutil.h:401
static const unsigned _multiply[32]
Definition bmconst.h:263
and functor
Definition bmutil.h:479
static BMFORCEINLINE unsigned op(unsigned v1, unsigned v2) BMNOEXCEPT2
Definition bmutil.h:481
bit-block array wrapped into union for correct interpretation of 32-bit vs 64-bit access vs SIMD
Definition bmutil.h:59
union bm::bit_block_t::bunion_t b_
const bm::word_t * begin() const
Definition bmutil.h:93
bm::word_t * end()
Definition bmutil.h:96
const bm::word_t * end() const
Definition bmutil.h:95
bm::word_t * begin()
Definition bmutil.h:94
static const unsigned char _count[256]
Definition bmconst.h:310
static const unsigned _right[32]
Definition bmconst.h:366
static const unsigned _left[32]
Definition bmconst.h:365
static bool test()
Definition bmutil.h:118
ad-hoc conditional expressions
Definition bmutil.h:113
static bool test()
Definition bmutil.h:114
static const signed char _idx[256]
Definition bmconst.h:278
static unsigned char const _lut[16]
Definition bmconst.h:332
or functor
Definition bmutil.h:497
static BMFORCEINLINE unsigned op(unsigned v1, unsigned v2) BMNOEXCEPT2
Definition bmutil.h:499
sub functor
Definition bmutil.h:506
static BMFORCEINLINE unsigned op(unsigned v1, unsigned v2) BMNOEXCEPT2
Definition bmutil.h:508
static unsigned char const _lut[37]
Definition bmconst.h:347
xor functor
Definition bmutil.h:488
static BMFORCEINLINE unsigned op(unsigned v1, unsigned v2) BMNOEXCEPT2
Definition bmutil.h:490
bm::word_t BM_VECT_ALIGN w32[bm::set_block_size] BM_VECT_ALIGN_ATTR
Definition bmutil.h:62