BitMagic-C++
SSE4.2 funcions (internal)

Processor specific optimizations for SSE4.2 instructions (internals). More...

Functions

bm::id_t bm::sse4_bit_count (const __m128i *block, const __m128i *block_end) BMNOEXCEPT
bm::id_t bm::sse42_bit_count_digest (const bm::word_t *BMRESTRICT block, bm::id64_t digest) BMNOEXCEPT
bool bm::sse4_is_all_zero (const __m128i *BMRESTRICT block) BMNOEXCEPT
 check if block is all zero bits
BMFORCEINLINE bool bm::sse4_is_digest_zero (const __m128i *BMRESTRICT block) BMNOEXCEPT
 check if digest stride is all zero bits
BMFORCEINLINE void bm::sse4_block_set_digest (__m128i *dst, unsigned value) BMNOEXCEPT
 set digest stride to 0xFF.. or 0x0 value
unsigned bm::sse4_and_block (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src) BMNOEXCEPT
 AND blocks2 dst &= *src.
BMFORCEINLINE bool bm::sse4_and_digest (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src) BMNOEXCEPT
 AND block digest stride dst &= *src.
BMFORCEINLINE bool bm::sse4_and_digest_2way (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src1, const __m128i *BMRESTRICT src2) BMNOEXCEPT
 AND block digest stride dst = *src1 & src2.
bool bm::sse4_and_or_digest_2way (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src1, const __m128i *BMRESTRICT src2) BMNOEXCEPT
 AND-OR block digest stride dst |= *src1 & src2.
bool bm::sse4_and_digest_3way (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src1, const __m128i *BMRESTRICT src2) BMNOEXCEPT
 AND block digest stride.
bool bm::sse4_and_digest_5way (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src1, const __m128i *BMRESTRICT src2, const __m128i *BMRESTRICT src3, const __m128i *BMRESTRICT src4) BMNOEXCEPT
 AND block digest stride.
BMFORCEINLINE bool bm::sse4_sub_digest (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src) BMNOEXCEPT
 SUB (AND NOT) block digest stride dst &= ~*src.
BMFORCEINLINE bool bm::sse4_sub_digest_2way (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src1, const __m128i *BMRESTRICT src2) BMNOEXCEPT
 2-operand SUB (AND NOT) block digest stride dst = src1 & ~*src2
bool bm::sse4_sub_digest_5way (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src1, const __m128i *BMRESTRICT src2, const __m128i *BMRESTRICT src3, const __m128i *BMRESTRICT src4) BMNOEXCEPT
 SUB block digest stride.
bool bm::sse4_sub_digest_3way (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src1, const __m128i *BMRESTRICT src2) BMNOEXCEPT
 SUB block digest stride.
bool bm::sse4_is_all_one (const __m128i *BMRESTRICT block) BMNOEXCEPT
 check if block is all ONE bits
BMFORCEINLINE bool bm::sse42_test_all_one_wave (const void *ptr) BMNOEXCEPT
 check if SSE wave is all oxFFFF...FFF
BMFORCEINLINE bool bm::sse42_test_all_zero_wave (const void *ptr) BMNOEXCEPT
 check if wave of pointers is all NULL
BMFORCEINLINE bool bm::sse42_test_all_zero_wave2 (const void *ptr0, const void *ptr1) BMNOEXCEPT
 check if 2 waves of pointers are all NULL
BMFORCEINLINE bool bm::sse42_test_all_eq_wave2 (const void *ptr0, const void *ptr1) BMNOEXCEPT
 check if wave of 2 pointers are the same (null or FULL)
unsigned bm::sse42_bit_block_calc_change (const __m128i *BMRESTRICT block, unsigned size) BMNOEXCEPT
void bm::sse42_bit_block_calc_xor_change (const __m128i *BMRESTRICT block, const __m128i *BMRESTRICT xor_block, unsigned size, unsigned *BMRESTRICT gc, unsigned *BMRESTRICT bc) BMNOEXCEPT
void bm::sse42_bit_block_calc_change_bc (const __m128i *BMRESTRICT block, unsigned *gc, unsigned *bc) BMNOEXCEPT
bool bm::sse42_bit_find_first_diff (const __m128i *BMRESTRICT block1, const __m128i *BMRESTRICT block2, unsigned *pos) BMNOEXCEPT
 Find first bit which is different between two bit-blocks.
bool bm::sse42_bit_find_first (const __m128i *BMRESTRICT block, unsigned off, unsigned *pos) BMNOEXCEPT
 Find first non-zero bit.
unsigned bm::sse4_gap_find (const bm::gap_word_t *BMRESTRICT pbuf, const bm::gap_word_t pos, const unsigned size) BMNOEXCEPT
unsigned bm::sse42_gap_bfind (const unsigned short *BMRESTRICT buf, unsigned pos, unsigned *BMRESTRICT is_set) BMNOEXCEPT
 Hybrid binary search, starts as binary, then switches to linear scan.
unsigned bm::sse42_gap_test (const unsigned short *BMRESTRICT buf, unsigned pos) BMNOEXCEPT
 Hybrid binary search to test GAP value, starts as binary, then switches to scan.
int bm::sse42_cmpge_u32 (__m128i vect4, unsigned value) BMNOEXCEPT
 Experimental (test) function to do SIMD vector search (lower bound) in sorted, growing array.
bool bm::sse42_shift_l1 (__m128i *block, unsigned *empty_acc, unsigned co1) BMNOEXCEPT
 block shift left by 1
bool bm::sse42_shift_r1 (__m128i *block, unsigned *empty_acc, unsigned co1) BMNOEXCEPT
 block shift right by 1
bool bm::sse42_shift_r1_and (__m128i *block, bm::word_t co1, const __m128i *BMRESTRICT mask_block, bm::id64_t *digest) BMNOEXCEPT
 block shift right by 1 plus AND
void bm::sse42_bit_block_xor (bm::word_t *target_block, const bm::word_t *block, const bm::word_t *xor_block, bm::id64_t digest) BMNOEXCEPT
 Build partial XOR product of 2 bit-blocks using digest mask.
void bm::sse42_bit_block_xor_2way (bm::word_t *target_block, const bm::word_t *xor_block, bm::id64_t digest) BMNOEXCEPT
 Build partial XOR product of 2 bit-blocks using digest mask.
bool bm::sse2_sub_digest_5way (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src1, const __m128i *BMRESTRICT src2, const __m128i *BMRESTRICT src3, const __m128i *BMRESTRICT src4) BMNOEXCEPT
 SUB block digest stride.
bool bm::sse2_sub_digest_3way (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src1, const __m128i *BMRESTRICT src2) BMNOEXCEPT
 SUB block digest stride.

Detailed Description

Processor specific optimizations for SSE4.2 instructions (internals).

Function Documentation

◆ sse2_sub_digest_3way()

bool bm::sse2_sub_digest_3way ( __m128i *BMRESTRICT dst,
const __m128i *BMRESTRICT src1,
const __m128i *BMRESTRICT src2 )
inline

SUB block digest stride.

Returns
true if stide is all zero

Definition at line 911 of file bmsse2.h.

References BMNOEXCEPT, and BMRESTRICT.

◆ sse2_sub_digest_5way()

bool bm::sse2_sub_digest_5way ( __m128i *BMRESTRICT dst,
const __m128i *BMRESTRICT src1,
const __m128i *BMRESTRICT src2,
const __m128i *BMRESTRICT src3,
const __m128i *BMRESTRICT src4 )
inline

SUB block digest stride.

Returns
true if stide is all zero

Definition at line 828 of file bmsse2.h.

References BMNOEXCEPT, and BMRESTRICT.

◆ sse42_bit_block_calc_change()

unsigned bm::sse42_bit_block_calc_change ( const __m128i *BMRESTRICT block,
unsigned size )
inline

SSE4.2 calculate number of bit changes from 0 to 1

Definition at line 948 of file bmsse4.h.

References BM_ALIGN32, BM_ALIGN32ATTR, BMNOEXCEPT, and BMRESTRICT.

◆ sse42_bit_block_calc_change_bc()

void bm::sse42_bit_block_calc_change_bc ( const __m128i *BMRESTRICT block,
unsigned * gc,
unsigned * bc )
inline

SSE4.2 calculate number of bit changes from 0 to 1

Definition at line 1143 of file bmsse4.h.

References BMNOEXCEPT, BMRESTRICT, and set_block_size.

◆ sse42_bit_block_calc_xor_change()

void bm::sse42_bit_block_calc_xor_change ( const __m128i *BMRESTRICT block,
const __m128i *BMRESTRICT xor_block,
unsigned size,
unsigned *BMRESTRICT gc,
unsigned *BMRESTRICT bc )
inline

SSE4.2 calculate number of bit changes from 0 to 1 of a XOR product

Definition at line 1025 of file bmsse4.h.

References BM_ALIGN32, BM_ALIGN32ATTR, BMNOEXCEPT, and BMRESTRICT.

◆ sse42_bit_block_xor()

void bm::sse42_bit_block_xor ( bm::word_t * target_block,
const bm::word_t * block,
const bm::word_t * xor_block,
bm::id64_t digest )
inline

Build partial XOR product of 2 bit-blocks using digest mask.

Parameters
target_block- target := block ^ xor_block
block- arg1
xor_block- arg2
digest- mask for each block wave to XOR (1) or just copy (0)

Definition at line 1988 of file bmsse4.h.

References block_waves, BMNOEXCEPT, and set_block_digest_wave_size.

◆ sse42_bit_block_xor_2way()

void bm::sse42_bit_block_xor_2way ( bm::word_t * target_block,
const bm::word_t * xor_block,
bm::id64_t digest )
inline

Build partial XOR product of 2 bit-blocks using digest mask.

Parameters
target_block- target ^= xor_block
xor_block- arg1
digest- mask for each block wave to XOR (if 1)

Definition at line 2058 of file bmsse4.h.

References bmi_blsi_u64(), bmi_bslr_u64(), BMNOEXCEPT, and set_block_digest_wave_size.

◆ sse42_bit_count_digest()

bm::id_t bm::sse42_bit_count_digest ( const bm::word_t *BMRESTRICT block,
bm::id64_t digest )
inline

SSE4.2 optimized bitcounting, uses digest for positioning

Definition at line 127 of file bmsse4.h.

References BM_ASSERT, bmi_blsi_u64(), bmi_bslr_u64(), BMNOEXCEPT, BMRESTRICT, and set_block_digest_wave_size.

◆ sse42_bit_find_first()

bool bm::sse42_bit_find_first ( const __m128i *BMRESTRICT block,
unsigned off,
unsigned * pos )
inline

Find first non-zero bit.

Definition at line 1274 of file bmsse4.h.

References BM_ALIGN32, BM_ALIGN32ATTR, BM_ASSERT, BM_BSF32, BMNOEXCEPT, BMRESTRICT, and set_block_size.

◆ sse42_bit_find_first_diff()

bool bm::sse42_bit_find_first_diff ( const __m128i *BMRESTRICT block1,
const __m128i *BMRESTRICT block2,
unsigned * pos )
inline

Find first bit which is different between two bit-blocks.

Definition at line 1218 of file bmsse4.h.

References BM_ALIGN32, BM_ALIGN32ATTR, BM_ASSERT, BM_BSF32, BMNOEXCEPT, BMRESTRICT, and set_block_size.

◆ sse42_cmpge_u32()

int bm::sse42_cmpge_u32 ( __m128i vect4,
unsigned value )
inline

Experimental (test) function to do SIMD vector search (lower bound) in sorted, growing array.

Definition at line 1527 of file bmsse4.h.

References BM_BSF32, and BMNOEXCEPT.

◆ sse42_gap_bfind()

unsigned bm::sse42_gap_bfind ( const unsigned short *BMRESTRICT buf,
unsigned pos,
unsigned *BMRESTRICT is_set )
inline

Hybrid binary search, starts as binary, then switches to linear scan.

Parameters
buf- GAP buffer pointer.
pos- index of the element.
is_set- output. GAP value (0 or 1).
Returns
GAP index.

Definition at line 1390 of file bmsse4.h.

References BM_ASSERT, BMNOEXCEPT, BMRESTRICT, and sse4_gap_find().

◆ sse42_gap_test()

unsigned bm::sse42_gap_test ( const unsigned short *BMRESTRICT buf,
unsigned pos )
inline

Hybrid binary search to test GAP value, starts as binary, then switches to scan.

Returns
test result

Definition at line 1461 of file bmsse4.h.

References BM_ASSERT, BMNOEXCEPT, BMRESTRICT, and sse4_gap_find().

◆ sse42_shift_l1()

bool bm::sse42_shift_l1 ( __m128i * block,
unsigned * empty_acc,
unsigned co1 )
inline

block shift left by 1

Definition at line 1773 of file bmsse4.h.

References BMNOEXCEPT, and set_block_size.

◆ sse42_shift_r1()

bool bm::sse42_shift_r1 ( __m128i * block,
unsigned * empty_acc,
unsigned co1 )
inline

block shift right by 1

Definition at line 1823 of file bmsse4.h.

References BMNOEXCEPT, and set_block_size.

◆ sse42_shift_r1_and()

bool bm::sse42_shift_r1_and ( __m128i * block,
bm::word_t co1,
const __m128i *BMRESTRICT mask_block,
bm::id64_t * digest )
inline

block shift right by 1 plus AND

Returns
carry over flag

Definition at line 1873 of file bmsse4.h.

References BM_ASSERT, BMNOEXCEPT, BMRESTRICT, id_max, and set_block_digest_wave_size.

◆ sse42_test_all_eq_wave2()

BMFORCEINLINE bool bm::sse42_test_all_eq_wave2 ( const void * ptr0,
const void * ptr1 )

check if wave of 2 pointers are the same (null or FULL)

Definition at line 934 of file bmsse4.h.

References BMNOEXCEPT.

Referenced by bm::bvector< Alloc >::combine_operation_or().

◆ sse42_test_all_one_wave()

BMFORCEINLINE bool bm::sse42_test_all_one_wave ( const void * ptr)

check if SSE wave is all oxFFFF...FFF

Definition at line 899 of file bmsse4.h.

References BMNOEXCEPT.

◆ sse42_test_all_zero_wave()

BMFORCEINLINE bool bm::sse42_test_all_zero_wave ( const void * ptr)

check if wave of pointers is all NULL

Definition at line 910 of file bmsse4.h.

References BMNOEXCEPT.

Referenced by bm::bvector< Alloc >::combine_operation_and(), bm::bvector< Alloc >::combine_operation_sub(), for_each_bit(), and for_each_nzblock().

◆ sse42_test_all_zero_wave2()

BMFORCEINLINE bool bm::sse42_test_all_zero_wave2 ( const void * ptr0,
const void * ptr1 )

check if 2 waves of pointers are all NULL

Definition at line 921 of file bmsse4.h.

References BMNOEXCEPT.

Referenced by bm::bvector< Alloc >::combine_operation_xor().

◆ sse4_and_block()

unsigned bm::sse4_and_block ( __m128i *BMRESTRICT dst,
const __m128i *BMRESTRICT src )
inline

AND blocks2 dst &= *src.

Returns
0 if no bits were set

Definition at line 294 of file bmsse4.h.

References BMNOEXCEPT, BMRESTRICT, and set_block_size.

◆ sse4_and_digest()

BMFORCEINLINE bool bm::sse4_and_digest ( __m128i *BMRESTRICT dst,
const __m128i *BMRESTRICT src )

AND block digest stride dst &= *src.

Returns
true if stide is all zero

Definition at line 341 of file bmsse4.h.

References BMNOEXCEPT, and BMRESTRICT.

◆ sse4_and_digest_2way()

BMFORCEINLINE bool bm::sse4_and_digest_2way ( __m128i *BMRESTRICT dst,
const __m128i *BMRESTRICT src1,
const __m128i *BMRESTRICT src2 )

AND block digest stride dst = *src1 & src2.

Returns
true if stide is all zero

Definition at line 389 of file bmsse4.h.

References BMNOEXCEPT, and BMRESTRICT.

◆ sse4_and_digest_3way()

bool bm::sse4_and_digest_3way ( __m128i *BMRESTRICT dst,
const __m128i *BMRESTRICT src1,
const __m128i *BMRESTRICT src2 )
inline

AND block digest stride.

Returns
true if stide is all zero

Definition at line 491 of file bmsse4.h.

References BMNOEXCEPT, and BMRESTRICT.

◆ sse4_and_digest_5way()

bool bm::sse4_and_digest_5way ( __m128i *BMRESTRICT dst,
const __m128i *BMRESTRICT src1,
const __m128i *BMRESTRICT src2,
const __m128i *BMRESTRICT src3,
const __m128i *BMRESTRICT src4 )
inline

AND block digest stride.

Returns
true if stide is all zero

Definition at line 552 of file bmsse4.h.

References BMNOEXCEPT, and BMRESTRICT.

◆ sse4_and_or_digest_2way()

bool bm::sse4_and_or_digest_2way ( __m128i *BMRESTRICT dst,
const __m128i *BMRESTRICT src1,
const __m128i *BMRESTRICT src2 )
inline

AND-OR block digest stride dst |= *src1 & src2.

Returns
true if stide is all zero

Definition at line 438 of file bmsse4.h.

References BMNOEXCEPT, and BMRESTRICT.

◆ sse4_bit_count()

bm::id_t bm::sse4_bit_count ( const __m128i * block,
const __m128i * block_end )
inline

SSE4.2 optimized bitcounting .

Definition at line 93 of file bmsse4.h.

References BMNOEXCEPT.

◆ sse4_block_set_digest()

BMFORCEINLINE void bm::sse4_block_set_digest ( __m128i * dst,
unsigned value )

set digest stride to 0xFF.. or 0x0 value

Definition at line 276 of file bmsse4.h.

References BMNOEXCEPT.

◆ sse4_gap_find()

unsigned bm::sse4_gap_find ( const bm::gap_word_t *BMRESTRICT pbuf,
const bm::gap_word_t pos,
const unsigned size )
inline

SSE4.2 check for one to two (variable len) 128 bit SSE lines for gap search results (8 elements)

Definition at line 1340 of file bmsse4.h.

References BM_ASSERT, BMNOEXCEPT, and BMRESTRICT.

Referenced by sse42_gap_bfind(), and sse42_gap_test().

◆ sse4_is_all_one()

bool bm::sse4_is_all_one ( const __m128i *BMRESTRICT block)
inline

check if block is all ONE bits

Definition at line 874 of file bmsse4.h.

References BMNOEXCEPT, BMRESTRICT, and set_block_size.

◆ sse4_is_all_zero()

bool bm::sse4_is_all_zero ( const __m128i *BMRESTRICT block)
inline

check if block is all zero bits

Definition at line 232 of file bmsse4.h.

References BMNOEXCEPT, BMRESTRICT, and set_block_size.

◆ sse4_is_digest_zero()

BMFORCEINLINE bool bm::sse4_is_digest_zero ( const __m128i *BMRESTRICT block)

check if digest stride is all zero bits

Definition at line 257 of file bmsse4.h.

References BMNOEXCEPT, and BMRESTRICT.

◆ sse4_sub_digest()

BMFORCEINLINE bool bm::sse4_sub_digest ( __m128i *BMRESTRICT dst,
const __m128i *BMRESTRICT src )

SUB (AND NOT) block digest stride dst &= ~*src.

Returns
true if stide is all zero

Definition at line 636 of file bmsse4.h.

References BMNOEXCEPT, and BMRESTRICT.

◆ sse4_sub_digest_2way()

BMFORCEINLINE bool bm::sse4_sub_digest_2way ( __m128i *BMRESTRICT dst,
const __m128i *BMRESTRICT src1,
const __m128i *BMRESTRICT src2 )

2-operand SUB (AND NOT) block digest stride dst = src1 & ~*src2

Returns
true if stide is all zero

Definition at line 685 of file bmsse4.h.

References BMNOEXCEPT, and BMRESTRICT.

◆ sse4_sub_digest_3way()

bool bm::sse4_sub_digest_3way ( __m128i *BMRESTRICT dst,
const __m128i *BMRESTRICT src1,
const __m128i *BMRESTRICT src2 )
inline

SUB block digest stride.

Returns
true if stide is all zero

Definition at line 814 of file bmsse4.h.

References BMNOEXCEPT, and BMRESTRICT.

◆ sse4_sub_digest_5way()

bool bm::sse4_sub_digest_5way ( __m128i *BMRESTRICT dst,
const __m128i *BMRESTRICT src1,
const __m128i *BMRESTRICT src2,
const __m128i *BMRESTRICT src3,
const __m128i *BMRESTRICT src4 )
inline

SUB block digest stride.

Returns
true if stide is all zero

Definition at line 732 of file bmsse4.h.

References BMNOEXCEPT, and BMRESTRICT.