|
BitMagic-C++
|
Processor specific optimizations for AVX2 instructions (internals). More...

Functions | |
| bm::id_t | bm::avx2_bit_count (const __m256i *BMRESTRICT block, const __m256i *BMRESTRICT block_end) |
| AVX2 Harley-Seal popcount The algorithm is based on the paper "Faster Population Counts
using AVX2 Instructions" by Daniel Lemire, Nathan Kurz and Wojciech Mula (23 Nov 2016). | |
| bm::id_t | bm::avx2_bit_block_count (const bm::word_t *const block, bm::id64_t digest) |
| Calculate population count based on digest. | |
| bm::id_t | bm::avx2_bit_count_and (const __m256i *BMRESTRICT block, const __m256i *BMRESTRICT block_end, const __m256i *BMRESTRICT mask_block) |
| AND bit count for two aligned bit-blocks. | |
| bm::id_t | bm::avx2_bit_count_xor (const __m256i *BMRESTRICT block, const __m256i *BMRESTRICT block_end, const __m256i *BMRESTRICT mask_block) |
| XOR bit count for two aligned bit-blocks. | |
| bm::id_t | bm::avx2_bit_count_sub (const __m256i *BMRESTRICT block, const __m256i *BMRESTRICT block_end, const __m256i *BMRESTRICT mask_block) |
| AND NOT bit count for two aligned bit-blocks. | |
| void | bm::avx2_xor_arr_2_mask (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src, const __m256i *BMRESTRICT src_end, bm::word_t mask) |
| XOR array elements to specified mask dst = *src ^ mask. | |
| void | bm::avx2_andnot_arr_2_mask (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src, const __m256i *BMRESTRICT src_end, bm::word_t mask) |
| Inverts array elements and NOT them to specified mask dst = ~*src & mask. | |
| unsigned | bm::avx2_and_block (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src) |
| AND array elements against another array dst &= *src. | |
| bool | bm::avx2_and_digest (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src) |
| AND block digest stride dst &= *src. | |
| bool | bm::avx2_and_digest_2way (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src1, const __m256i *BMRESTRICT src2) |
| AND block digest stride 2 way dst = *src1 & *src2. | |
| bool | bm::avx2_and_or_digest_2way (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src1, const __m256i *BMRESTRICT src2) |
| AND-OR block digest stride 2 way dst |= *src1 & *src2. | |
| bool | bm::avx2_and_digest_5way (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src1, const __m256i *BMRESTRICT src2, const __m256i *BMRESTRICT src3, const __m256i *BMRESTRICT src4) |
| AND block digest stride. | |
| bool | bm::avx2_and_digest_3way (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src1, const __m256i *BMRESTRICT src2) |
| AND block digest stride. | |
| unsigned | bm::avx2_and_arr_unal (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src, const __m256i *BMRESTRICT src_end) |
| AND array elements against another array (unaligned) dst &= *src. | |
| bool | bm::avx2_or_block (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src) |
| OR array elements against another array dst |= *src. | |
| bool | bm::avx2_or_arr_unal (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src, const __m256i *BMRESTRICT src_end) |
| OR array elements against another unaligned array dst |= *src. | |
| bool | bm::avx2_or_block_2way (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src1, const __m256i *BMRESTRICT src2) |
| OR 2 arrays and copy to the destination dst = *src1 | src2. | |
| bool | bm::avx2_or_block_3way (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src1, const __m256i *BMRESTRICT src2) |
| OR array elements against another 2 arrays dst |= *src1 | src2. | |
| bool | bm::avx2_or_block_5way (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src1, const __m256i *BMRESTRICT src2, const __m256i *BMRESTRICT src3, const __m256i *BMRESTRICT src4) |
| OR array elements against another 4 arrays dst |= *src1 | src2. | |
| unsigned | bm::avx2_xor_block (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src) |
| XOR block against another dst ^= *src. | |
| unsigned | bm::avx2_xor_block_2way (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src1, const __m256i *BMRESTRICT src2) |
| 3 operand XOR dst = *src1 ^ src2 | |
| unsigned | bm::avx2_sub_block (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src) |
| AND-NOT (SUB) array elements against another array dst &= ~*src. | |
| bool | bm::avx2_sub_digest (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src) |
| SUB (AND NOT) block digest stride dst &= ~*src. | |
| bool | bm::avx2_sub_digest_2way (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src1, const __m256i *BMRESTRICT src2) |
| 2-operand SUB (AND NOT) block digest stride dst = *src1 & ~*src2 | |
| bool | bm::avx2_sub_digest_5way (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src1, const __m256i *BMRESTRICT src2, const __m256i *BMRESTRICT src3, const __m256i *BMRESTRICT src4) |
| SUB block digest stride. | |
| bool | bm::avx2_sub_digest_3way (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src1, const __m256i *BMRESTRICT src2) |
| SUB block digest stride. | |
| BMFORCEINLINE void | bm::avx2_set_block (__m256i *BMRESTRICT dst, bm::word_t value) |
| AVX2 block memset dst = value. | |
| void | bm::avx2_copy_block (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src) |
| AVX2 block copy dst = *src. | |
| void | bm::avx2_copy_block_unalign (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src) |
| AVX2 block copy (unaligned SRC) dst = *src. | |
| void | bm::avx2_stream_block (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src) |
| AVX2 block copy dst = *src. | |
| void | bm::avx2_stream_block_unalign (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src) |
| AVX2 block copy (unaligned SRC) dst = *src. | |
| void | bm::avx2_invert_block (__m256i *BMRESTRICT dst) |
| Invert bit-block dst = ~*dst or dst ^= *dst. | |
| bool | bm::avx2_is_all_zero (const __m256i *BMRESTRICT block) |
| check if block is all zero bits | |
| bool | bm::avx2_is_digest_zero (const __m256i *BMRESTRICT block) |
| check if digest stride is all zero bits | |
| void | bm::avx2_block_set_digest (__m256i *dst, unsigned value) |
| set digest stride to 0xFF.. or 0x0 value | |
| bool | bm::avx2_is_all_one (const __m256i *BMRESTRICT block) |
| check if block is all one bits | |
| BMFORCEINLINE bool | bm::avx2_test_all_one_wave (const void *ptr) |
| check if wave of pointers is all 0xFFF | |
| BMFORCEINLINE bool | bm::avx2_test_all_zero_wave (const void *ptr) |
| check if wave of pointers is all NULL | |
| BMFORCEINLINE bool | bm::avx2_test_all_zero_wave2 (const void *ptr0, const void *ptr1) |
| check if 2 wave of pointers are all NULL | |
| BMFORCEINLINE bool | bm::avx2_test_all_eq_wave2 (const void *ptr0, const void *ptr1) |
| check if 2 wave of pointers are all the same (NULL or FULL) | |
| bool | bm::avx2_shift_l1 (__m256i *block, bm::word_t *empty_acc, unsigned co1) |
| block shift left by 1 | |
| bool | bm::avx2_shift_r1 (__m256i *block, bm::word_t *empty_acc, unsigned co1) |
| block shift right by 1 | |
| bool | bm::avx2_shift_r1_and (__m256i *BMRESTRICT block, bm::word_t co1, const __m256i *BMRESTRICT mask_block, bm::id64_t *BMRESTRICT digest) |
| fused block shift right by 1 plus AND | |
| unsigned | bm::avx2_bit_block_calc_change (const __m256i *BMRESTRICT block, unsigned size) |
| void | bm::avx2_bit_block_calc_xor_change (const __m256i *BMRESTRICT block, const __m256i *BMRESTRICT xor_block, unsigned size, unsigned *BMRESTRICT gcount, unsigned *BMRESTRICT bcount) |
| void | bm::avx2_bit_block_calc_change_bc (const __m256i *BMRESTRICT block, unsigned *gcount, unsigned *bcount) |
| bool | bm::avx2_bit_find_first_diff (const __m256i *BMRESTRICT block1, const __m256i *BMRESTRICT block2, unsigned *pos) |
| Find first bit which is different between two bit-blocks. | |
| bool | bm::avx2_bit_find_first (const __m256i *BMRESTRICT block, unsigned off, unsigned *pos) |
| Find first bit set. | |
| int | bm::avx2_cmpge_u32 (__m256i vect8, unsigned value) |
| Experimental (test) function to do SIMD vector search (lower bound) in sorted, growing array. | |
| int | bm::avx2_cmpge_u16 (__m256i vect16, unsigned short value) |
| Experimental (test) function to do SIMD vector search in sorted, growing array. | |
| template<bool RET_TEST = false> | |
| unsigned | bm::avx2_gap_bfind (const unsigned short *BMRESTRICT buf, unsigned pos, unsigned *BMRESTRICT is_set) |
| Hybrid binary search, starts as binary, then switches to scan. | |
| unsigned | bm::avx2_gap_test (const unsigned short *BMRESTRICT buf, unsigned pos) |
| Hybrid binary search, starts as binary, then switches to scan. | |
| unsigned | bm::avx2_lower_bound_scan_u32 (const unsigned *BMRESTRICT arr, unsigned target, unsigned from, unsigned to) |
| lower bound (great or equal) linear scan in ascending order sorted array | |
| unsigned | bm::avx2_bit_to_gap (gap_word_t *BMRESTRICT dest, const unsigned *BMRESTRICT block, unsigned dest_len) |
| Convert bit block to GAP block. | |
| void | bm::avx2_bit_block_xor (bm::word_t *target_block, const bm::word_t *block, const bm::word_t *xor_block, bm::id64_t digest) |
| Build partial XOR product of 2 bit-blocks using digest mask. | |
| void | bm::avx2_bit_block_xor_2way (bm::word_t *target_block, const bm::word_t *xor_block, bm::id64_t digest) BMNOEXCEPT |
| Build partial XOR product of 2 bit-blocks using digest mask. | |
Processor specific optimizations for AVX2 instructions (internals).
|
inline |
AND array elements against another array (unaligned) dst &= *src.
Definition at line 777 of file bmavx2.h.
References BMRESTRICT.
Referenced by bm::decoder::get_32_AND().
|
inline |
AND array elements against another array dst &= *src.
Definition at line 496 of file bmavx2.h.
References BMRESTRICT, and set_block_size.
|
inline |
AND block digest stride dst &= *src.
Definition at line 543 of file bmavx2.h.
References BMRESTRICT.
|
inline |
AND block digest stride 2 way dst = *src1 & *src2.
Definition at line 573 of file bmavx2.h.
References BMRESTRICT.
|
inline |
|
inline |
|
inline |
AND-OR block digest stride 2 way dst |= *src1 & *src2.
Definition at line 604 of file bmavx2.h.
References BMRESTRICT.
|
inline |
Inverts array elements and NOT them to specified mask dst = ~*src & mask.
Definition at line 472 of file bmavx2.h.
References BMRESTRICT.
|
inline |
AVX2 calculate number of bit changes from 0 to 1
Definition at line 2083 of file bmavx2.h.
References BM_ALIGN32, BM_ALIGN32ATTR, BM_AVX2_BIT_COUNT, BM_AVX2_POPCNT_PROLOG, and BMRESTRICT.
|
inline |
AVX2 calculate number of bit changes from 0 to 1 and bitcount
Definition at line 2251 of file bmavx2.h.
References BM_ALIGN32, BM_ALIGN32ATTR, BM_AVX2_BIT_COUNT, BM_AVX2_POPCNT_PROLOG, BMRESTRICT, and set_block_size.
|
inline |
AVX2 calculate number of bit changes from 0 to 1 from a XOR product
Definition at line 2154 of file bmavx2.h.
References BM_ALIGN32, BM_ALIGN32ATTR, BM_AVX2_BIT_COUNT, BM_AVX2_POPCNT_PROLOG, and BMRESTRICT.
|
inline |
Calculate population count based on digest.
Definition at line 232 of file bmavx2.h.
References BM_AVX2_BIT_COUNT, BM_AVX2_POPCNT_PROLOG, bmi_blsi_u64(), bmi_bslr_u64(), BMRESTRICT, and set_block_digest_wave_size.
|
inline |
Build partial XOR product of 2 bit-blocks using digest mask.
| target_block | - target := block ^ xor_block |
| block | - arg1 |
| xor_block | - arg2 |
| digest | - mask for each block wave to XOR (1) or just copy (0) |
Definition at line 3341 of file bmavx2.h.
References block_waves, and set_block_digest_wave_size.
|
inline |
Build partial XOR product of 2 bit-blocks using digest mask.
| target_block | - target ^= xor_block |
| xor_block | - arg1 |
| digest | - mask for each block wave to XOR (1) |
Definition at line 3392 of file bmavx2.h.
References bmi_blsi_u64(), bmi_bslr_u64(), BMNOEXCEPT, and set_block_digest_wave_size.
|
inline |
AVX2 Harley-Seal popcount The algorithm is based on the paper "Faster Population Counts using AVX2 Instructions" by Daniel Lemire, Nathan Kurz and Wojciech Mula (23 Nov 2016).
Definition at line 156 of file bmavx2.h.
References BM_AVX2_BIT_COUNT, BM_AVX2_POPCNT_PROLOG, BM_CSA256, and BMRESTRICT.
|
inline |
AND bit count for two aligned bit-blocks.
Definition at line 290 of file bmavx2.h.
References BM_AVX2_BIT_COUNT, BM_AVX2_POPCNT_PROLOG, and BMRESTRICT.
|
inline |
AND NOT bit count for two aligned bit-blocks.
Definition at line 413 of file bmavx2.h.
References BM_AVX2_BIT_COUNT, BM_AVX2_POPCNT_PROLOG, and BMRESTRICT.
|
inline |
XOR bit count for two aligned bit-blocks.
Definition at line 368 of file bmavx2.h.
References BM_AVX2_BIT_COUNT, BM_AVX2_POPCNT_PROLOG, and BMRESTRICT.
|
inline |
Find first bit set.
Definition at line 2394 of file bmavx2.h.
References BM_ALIGN32, BM_ALIGN32ATTR, BM_ASSERT, BMRESTRICT, and set_block_size.
|
inline |
Find first bit which is different between two bit-blocks.
Definition at line 2336 of file bmavx2.h.
References BM_ALIGN32, BM_ALIGN32ATTR, BM_ASSERT, BMRESTRICT, and set_block_size.
|
inline |
Convert bit block to GAP block.
Definition at line 3227 of file bmavx2.h.
References BM_ASSERT, BMRESTRICT, and set_block_size.
|
inline |
|
inline |
|
inline |
|
inline |
AVX2 block copy dst = *src.
Definition at line 1503 of file bmavx2.h.
References BMRESTRICT, and set_block_size.
|
inline |
AVX2 block copy (unaligned SRC) dst = *src.
Definition at line 1545 of file bmavx2.h.
References BMRESTRICT, and set_block_size.
| unsigned bm::avx2_gap_bfind | ( | const unsigned short *BMRESTRICT | buf, |
| unsigned | pos, | ||
| unsigned *BMRESTRICT | is_set ) |
Hybrid binary search, starts as binary, then switches to scan.
NOTE: AVX code uses _mm256_subs_epu16 - saturated substraction which gives 0 if A-B=0 if A < B (not negative a value).
| buf | - GAP buffer pointer. |
| pos | - index of the element. |
| is_set | - output. GAP value (0 or 1). |
Definition at line 2939 of file bmavx2.h.
References BM_ASSERT, BMRESTRICT, and gap_max_bits.
Referenced by avx2_gap_test().
|
inline |
Hybrid binary search, starts as binary, then switches to scan.
Definition at line 3057 of file bmavx2.h.
References avx2_gap_bfind(), and BMRESTRICT.
|
inline |
Invert bit-block dst = ~*dst or dst ^= *dst.
Definition at line 1677 of file bmavx2.h.
References BMRESTRICT, and set_block_size.
|
inline |
check if block is all one bits
Definition at line 1767 of file bmavx2.h.
References BMRESTRICT, and set_block_size.
|
inline |
check if block is all zero bits
Definition at line 1708 of file bmavx2.h.
References BMRESTRICT, and set_block_size.
|
inline |
check if digest stride is all zero bits
Definition at line 1738 of file bmavx2.h.
References BMRESTRICT.
|
inline |
lower bound (great or equal) linear scan in ascending order sorted array
Definition at line 3068 of file bmavx2.h.
References BMRESTRICT.
|
inline |
OR array elements against another unaligned array dst |= *src.
Definition at line 888 of file bmavx2.h.
References BMRESTRICT.
Referenced by bm::decoder::get_32_OR().
|
inline |
OR array elements against another array dst |= *src.
Definition at line 835 of file bmavx2.h.
References BMRESTRICT, and set_block_size.
|
inline |
OR 2 arrays and copy to the destination dst = *src1 | src2.
Definition at line 941 of file bmavx2.h.
References BMRESTRICT, and set_block_size.
|
inline |
OR array elements against another 2 arrays dst |= *src1 | src2.
Definition at line 987 of file bmavx2.h.
References BMRESTRICT, and set_block_size.
|
inline |
OR array elements against another 4 arrays dst |= *src1 | src2.
Definition at line 1039 of file bmavx2.h.
References BMRESTRICT, and set_block_size.
| BMFORCEINLINE void bm::avx2_set_block | ( | __m256i *BMRESTRICT | dst, |
| bm::word_t | value ) |
AVX2 block memset dst = value.
Definition at line 1477 of file bmavx2.h.
References BMRESTRICT, and set_block_size.
|
inline |
|
inline |
|
inline |
fused block shift right by 1 plus AND
Definition at line 1959 of file bmavx2.h.
References BM_ASSERT, BMRESTRICT, and set_block_digest_wave_size.
|
inline |
AVX2 block copy dst = *src.
Definition at line 1589 of file bmavx2.h.
References BMRESTRICT, and set_block_size.
|
inline |
AVX2 block copy (unaligned SRC) dst = *src.
Definition at line 1631 of file bmavx2.h.
References BMRESTRICT, and set_block_size.
|
inline |
AND-NOT (SUB) array elements against another array dst &= ~*src.
Definition at line 1204 of file bmavx2.h.
References BMRESTRICT, and set_block_size.
|
inline |
SUB (AND NOT) block digest stride dst &= ~*src.
Definition at line 1250 of file bmavx2.h.
References BMRESTRICT.
|
inline |
2-operand SUB (AND NOT) block digest stride dst = *src1 & ~*src2
Definition at line 1280 of file bmavx2.h.
References BMRESTRICT.
|
inline |
|
inline |
| BMFORCEINLINE bool bm::avx2_test_all_eq_wave2 | ( | const void * | ptr0, |
| const void * | ptr1 ) |
check if 2 wave of pointers are all the same (NULL or FULL)
Definition at line 1829 of file bmavx2.h.
Referenced by bm::bvector< Alloc >::combine_operation_or().
| BMFORCEINLINE bool bm::avx2_test_all_one_wave | ( | const void * | ptr | ) |
| BMFORCEINLINE bool bm::avx2_test_all_zero_wave | ( | const void * | ptr | ) |
check if wave of pointers is all NULL
Definition at line 1805 of file bmavx2.h.
Referenced by bm::bvector< Alloc >::combine_operation_and(), bm::bvector< Alloc >::combine_operation_sub(), find_not_null_ptr(), for_each_bit(), and for_each_nzblock().
| BMFORCEINLINE bool bm::avx2_test_all_zero_wave2 | ( | const void * | ptr0, |
| const void * | ptr1 ) |
check if 2 wave of pointers are all NULL
Definition at line 1816 of file bmavx2.h.
Referenced by bm::bvector< Alloc >::combine_operation_xor().
|
inline |
XOR array elements to specified mask dst = *src ^ mask.
Definition at line 447 of file bmavx2.h.
References BMRESTRICT.
|
inline |
XOR block against another dst ^= *src.
Definition at line 1108 of file bmavx2.h.
References BMRESTRICT, and set_block_size.
|
inline |
3 operand XOR dst = *src1 ^ src2
Definition at line 1154 of file bmavx2.h.
References BMRESTRICT, and set_block_size.