BitMagic-C++
bmsse2.h File Reference

Compute functions for SSE2 SIMD instruction set (internal). More...

#include <mmintrin.h>
#include <emmintrin.h>
#include "bmdef.h"
#include "bmutil.h"
#include "bmsse_util.h"
Include dependency graph for bmsse2.h:

Go to the source code of this file.

Namespaces

namespace  bm

Macros

#define VECT_XOR_ARR_2_MASK(dst, src, src_end, mask)
#define VECT_ANDNOT_ARR_2_MASK(dst, src, src_end, mask)
#define VECT_BITCOUNT(first, last)
#define VECT_BITCOUNT_AND(first, last, mask)
#define VECT_BITCOUNT_OR(first, last, mask)
#define VECT_BITCOUNT_XOR(first, last, mask)
#define VECT_BITCOUNT_SUB(first, last, mask)
#define VECT_INVERT_BLOCK(first)
#define VECT_AND_BLOCK(dst, src)
#define VECT_AND_DIGEST(dst, src)
#define VECT_AND_OR_DIGEST_2WAY(dst, src1, src2)
#define VECT_AND_DIGEST_5WAY(dst, src1, src2, src3, src4)
#define VECT_AND_DIGEST_3WAY(dst, src1, src2)
#define VECT_AND_DIGEST_2WAY(dst, src1, src2)
#define VECT_OR_BLOCK(dst, src)
#define VECT_OR_BLOCK_2WAY(dst, src1, src2)
#define VECT_OR_BLOCK_3WAY(dst, src1, src2)
#define VECT_OR_BLOCK_5WAY(dst, src1, src2, src3, src4)
#define VECT_SUB_BLOCK(dst, src)
#define VECT_SUB_DIGEST(dst, src)
#define VECT_SUB_DIGEST_2WAY(dst, src1, src2)
#define VECT_SUB_DIGEST_5WAY(dst, src1, src2, src3, src4)
#define VECT_SUB_DIGEST_3WAY(dst, src1, src2)
#define VECT_XOR_BLOCK(dst, src)
#define VECT_XOR_BLOCK_2WAY(dst, src1, src2)
#define VECT_COPY_BLOCK(dst, src)
#define VECT_COPY_BLOCK_UNALIGN(dst, src)
#define VECT_STREAM_BLOCK(dst, src)
#define VECT_STREAM_BLOCK_UNALIGN(dst, src)
#define VECT_SET_BLOCK(dst, value)
#define VECT_IS_ZERO_BLOCK(dst)
#define VECT_IS_ONE_BLOCK(dst)
#define VECT_IS_DIGEST_ZERO(start)
#define VECT_BLOCK_SET_DIGEST(dst, val)
#define VECT_LOWER_BOUND_SCAN_U32(arr, target, from, to)
#define VECT_SHIFT_R1(b, acc, co)
#define VECT_BIT_FIND_FIRST(src, off, pos)
#define VECT_BIT_FIND_DIFF(src1, src2, pos)
#define VECT_BIT_BLOCK_XOR(t, src, src_xor, d)
#define VECT_BIT_BLOCK_XOR_2WAY(t, src_xor, d)
#define VECT_GAP_BFIND(buf, pos, is_set)
#define VECT_GAP_TEST(buf, pos)

Functions

bm::id_t bm::sse2_bit_count (const __m128i *block, const __m128i *block_end)
template<class Func>
bm::id_t bm::sse2_bit_count_op (const __m128i *BMRESTRICT block, const __m128i *BMRESTRICT block_end, const __m128i *BMRESTRICT mask_block, Func sse2_func)
bool bm::sse2_is_all_zero (const __m128i *BMRESTRICT block) BMNOEXCEPT
 check if block is all zero bits
bool bm::sse2_is_all_one (const __m128i *BMRESTRICT block) BMNOEXCEPT
 check if block is all ONE bits
BMFORCEINLINE bool bm::sse2_is_digest_zero (const __m128i *BMRESTRICT block) BMNOEXCEPT
 check if digest stride is all zero bits
BMFORCEINLINE void bm::sse2_block_set_digest (__m128i *dst, unsigned value) BMNOEXCEPT
 set digest stride to 0xFF.. or 0x0 value
void bm::sse2_bit_block_xor (bm::word_t *target_block, const bm::word_t *block, const bm::word_t *xor_block, bm::id64_t digest) BMNOEXCEPT
 Build partial XOR product of 2 bit-blocks using digest mask.
void bm::sse2_bit_block_xor_2way (bm::word_t *target_block, const bm::word_t *xor_block, bm::id64_t digest) BMNOEXCEPT
 Build partial XOR product of 2 bit-blocks using digest mask.
BMFORCEINLINE bool bm::sse2_and_digest (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src) BMNOEXCEPT
 AND block digest stride dst &= *src.
BMFORCEINLINE bool bm::sse2_and_or_digest_2way (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src1, const __m128i *BMRESTRICT src2) BMNOEXCEPT
 AND-OR block digest stride dst |= *src1 & src2.
bool bm::sse2_and_digest_5way (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src1, const __m128i *BMRESTRICT src2, const __m128i *BMRESTRICT src3, const __m128i *BMRESTRICT src4) BMNOEXCEPT
 AND block digest stride.
bool bm::sse2_and_digest_3way (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src1, const __m128i *BMRESTRICT src2) BMNOEXCEPT
 AND block digest stride.
BMFORCEINLINE bool bm::sse2_and_digest_2way (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src1, const __m128i *BMRESTRICT src2) BMNOEXCEPT
 AND block digest stride dst = *src1 & src2.
BMFORCEINLINE bool bm::sse2_sub_digest (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src) BMNOEXCEPT
 SUB (AND NOT) block digest stride dst &= ~*src.
BMFORCEINLINE bool bm::sse2_sub_digest_2way (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src1, const __m128i *BMRESTRICT src2) BMNOEXCEPT
 2-operand SUB (AND NOT) block digest stride dst = src1 & ~*src2
bool bm::sse2_sub_digest_5way (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src1, const __m128i *BMRESTRICT src2, const __m128i *BMRESTRICT src3, const __m128i *BMRESTRICT src4) BMNOEXCEPT
 SUB block digest stride.
bool bm::sse2_sub_digest_3way (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src1, const __m128i *BMRESTRICT src2) BMNOEXCEPT
 SUB block digest stride.
bool bm::sse2_bit_find_first (const __m128i *BMRESTRICT block, unsigned off, unsigned *pos) BMNOEXCEPT
 Find first non-zero bit.
bool bm::sse2_bit_find_first_diff (const __m128i *BMRESTRICT block1, const __m128i *BMRESTRICT block2, unsigned *pos) BMNOEXCEPT
 Find first bit which is different between two bit-blocks.
bool bm::sse2_shift_r1 (__m128i *block, unsigned *empty_acc, unsigned co1) BMNOEXCEPT
 block shift right by 1
bool bm::sse2_shift_l1 (__m128i *block, unsigned *empty_acc, unsigned co1) BMNOEXCEPT
 block shift left by 1
bm::id_t bm::sse2_bit_block_calc_count_change (const __m128i *BMRESTRICT block, const __m128i *BMRESTRICT block_end, unsigned *BMRESTRICT bit_count)
unsigned bm::sse2_gap_find (const bm::gap_word_t *BMRESTRICT pbuf, const bm::gap_word_t pos, unsigned size)
unsigned bm::sse2_gap_bfind (const unsigned short *BMRESTRICT buf, unsigned pos, unsigned *BMRESTRICT is_set)
 Hybrid binary search, starts as binary, then switches to linear scan.
unsigned bm::sse2_gap_test (const unsigned short *BMRESTRICT buf, unsigned pos)
 Hybrid binary search, starts as binary, then switches to scan.

Detailed Description

Compute functions for SSE2 SIMD instruction set (internal).

Definition in file bmsse2.h.

Macro Definition Documentation

◆ VECT_AND_BLOCK

#define VECT_AND_BLOCK ( dst,
src )
Value:
sse2_and_block((__m128i*) dst, (__m128i*) (src))

Definition at line 1541 of file bmsse2.h.

◆ VECT_AND_DIGEST

#define VECT_AND_DIGEST ( dst,
src )
Value:
sse2_and_digest((__m128i*) dst, (const __m128i*) (src))

Definition at line 1544 of file bmsse2.h.

◆ VECT_AND_DIGEST_2WAY

#define VECT_AND_DIGEST_2WAY ( dst,
src1,
src2 )
Value:
sse2_and_digest_2way((__m128i*) dst, (const __m128i*) (src1), (const __m128i*) (src2))

Definition at line 1556 of file bmsse2.h.

◆ VECT_AND_DIGEST_3WAY

#define VECT_AND_DIGEST_3WAY ( dst,
src1,
src2 )
Value:
sse2_and_digest_3way((__m128i*) dst, (const __m128i*) (src1), (const __m128i*) (src2))

Definition at line 1553 of file bmsse2.h.

◆ VECT_AND_DIGEST_5WAY

#define VECT_AND_DIGEST_5WAY ( dst,
src1,
src2,
src3,
src4 )
Value:
sse2_and_digest_5way((__m128i*) dst, (const __m128i*) (src1), (const __m128i*) (src2), (const __m128i*) (src3), (const __m128i*) (src4))

Definition at line 1550 of file bmsse2.h.

◆ VECT_AND_OR_DIGEST_2WAY

#define VECT_AND_OR_DIGEST_2WAY ( dst,
src1,
src2 )
Value:
sse2_and_or_digest_2way((__m128i*) dst, (const __m128i*) (src1), (const __m128i*) (src2))

Definition at line 1547 of file bmsse2.h.

◆ VECT_ANDNOT_ARR_2_MASK

#define VECT_ANDNOT_ARR_2_MASK ( dst,
src,
src_end,
mask )
Value:
sse2_andnot_arr_2_mask((__m128i*)(dst), (__m128i*)(src), (__m128i*)(src_end), (bm::word_t)mask)
unsigned int word_t
Definition bmconst.h:39

Definition at line 1520 of file bmsse2.h.

◆ VECT_BIT_BLOCK_XOR

#define VECT_BIT_BLOCK_XOR ( t,
src,
src_xor,
d )
Value:
sse2_bit_block_xor(t, src, src_xor, d)

Definition at line 1632 of file bmsse2.h.

◆ VECT_BIT_BLOCK_XOR_2WAY

#define VECT_BIT_BLOCK_XOR_2WAY ( t,
src_xor,
d )
Value:
sse2_bit_block_xor_2way(t, src_xor, d)

Definition at line 1635 of file bmsse2.h.

◆ VECT_BIT_FIND_DIFF

#define VECT_BIT_FIND_DIFF ( src1,
src2,
pos )
Value:
sse2_bit_find_first_diff((__m128i*) src1, (__m128i*) (src2), pos)

Definition at line 1629 of file bmsse2.h.

◆ VECT_BIT_FIND_FIRST

#define VECT_BIT_FIND_FIRST ( src,
off,
pos )
Value:
sse2_bit_find_first((__m128i*) src, off, pos)

Definition at line 1626 of file bmsse2.h.

◆ VECT_BITCOUNT

#define VECT_BITCOUNT ( first,
last )
Value:
sse2_bit_count((__m128i*) (first), (__m128i*) (last))

Definition at line 1523 of file bmsse2.h.

◆ VECT_BITCOUNT_AND

#define VECT_BITCOUNT_AND ( first,
last,
mask )
Value:
sse2_bit_count_op((__m128i*) (first), (__m128i*) (last), (__m128i*) (mask), sse2_and)

Definition at line 1526 of file bmsse2.h.

◆ VECT_BITCOUNT_OR

#define VECT_BITCOUNT_OR ( first,
last,
mask )
Value:
sse2_bit_count_op((__m128i*) (first), (__m128i*) (last), (__m128i*) (mask), sse2_or)

Definition at line 1529 of file bmsse2.h.

◆ VECT_BITCOUNT_SUB

#define VECT_BITCOUNT_SUB ( first,
last,
mask )
Value:
sse2_bit_count_op((__m128i*) (first), (__m128i*) (last), (__m128i*) (mask), sse2_sub)

Definition at line 1535 of file bmsse2.h.

◆ VECT_BITCOUNT_XOR

#define VECT_BITCOUNT_XOR ( first,
last,
mask )
Value:
sse2_bit_count_op((__m128i*) (first), (__m128i*) (last), (__m128i*) (mask), sse2_xor)

Definition at line 1532 of file bmsse2.h.

◆ VECT_BLOCK_SET_DIGEST

#define VECT_BLOCK_SET_DIGEST ( dst,
val )
Value:
sse2_block_set_digest((__m128i*)dst, val)

Definition at line 1616 of file bmsse2.h.

◆ VECT_COPY_BLOCK

#define VECT_COPY_BLOCK ( dst,
src )
Value:
sse2_copy_block((__m128i*) dst, (__m128i*) (src))

Definition at line 1592 of file bmsse2.h.

◆ VECT_COPY_BLOCK_UNALIGN

#define VECT_COPY_BLOCK_UNALIGN ( dst,
src )
Value:
sse2_copy_block_unalign((__m128i*) dst, (__m128i*) (src))

Definition at line 1595 of file bmsse2.h.

◆ VECT_GAP_BFIND

#define VECT_GAP_BFIND ( buf,
pos,
is_set )
Value:
sse2_gap_bfind(buf, pos, is_set)

Definition at line 1638 of file bmsse2.h.

◆ VECT_GAP_TEST

#define VECT_GAP_TEST ( buf,
pos )
Value:
sse2_gap_test(buf, pos)

Definition at line 1641 of file bmsse2.h.

◆ VECT_INVERT_BLOCK

#define VECT_INVERT_BLOCK ( first)
Value:
sse2_invert_block((__m128i*)first);

Definition at line 1538 of file bmsse2.h.

◆ VECT_IS_DIGEST_ZERO

#define VECT_IS_DIGEST_ZERO ( start)
Value:
sse2_is_digest_zero((__m128i*)start)

Definition at line 1613 of file bmsse2.h.

◆ VECT_IS_ONE_BLOCK

#define VECT_IS_ONE_BLOCK ( dst)
Value:
sse2_is_all_one((__m128i*) dst)

Definition at line 1610 of file bmsse2.h.

◆ VECT_IS_ZERO_BLOCK

#define VECT_IS_ZERO_BLOCK ( dst)
Value:
sse2_is_all_zero((__m128i*) dst)

Definition at line 1607 of file bmsse2.h.

◆ VECT_LOWER_BOUND_SCAN_U32

#define VECT_LOWER_BOUND_SCAN_U32 ( arr,
target,
from,
to )
Value:
sse2_lower_bound_scan_u32(arr, target, from, to)

Definition at line 1619 of file bmsse2.h.

◆ VECT_OR_BLOCK

#define VECT_OR_BLOCK ( dst,
src )
Value:
sse2_or_block((__m128i*) dst, (__m128i*) (src))

Definition at line 1559 of file bmsse2.h.

◆ VECT_OR_BLOCK_2WAY

#define VECT_OR_BLOCK_2WAY ( dst,
src1,
src2 )
Value:
sse2_or_block_2way((__m128i*) (dst), (__m128i*) (src1), (__m128i*) (src2))

Definition at line 1562 of file bmsse2.h.

◆ VECT_OR_BLOCK_3WAY

#define VECT_OR_BLOCK_3WAY ( dst,
src1,
src2 )
Value:
sse2_or_block_3way((__m128i*) (dst), (__m128i*) (src1), (__m128i*) (src2))

Definition at line 1565 of file bmsse2.h.

◆ VECT_OR_BLOCK_5WAY

#define VECT_OR_BLOCK_5WAY ( dst,
src1,
src2,
src3,
src4 )
Value:
sse2_or_block_5way((__m128i*) (dst), (__m128i*) (src1), (__m128i*) (src2), (__m128i*) (src3), (__m128i*) (src4))

Definition at line 1568 of file bmsse2.h.

◆ VECT_SET_BLOCK

#define VECT_SET_BLOCK ( dst,
value )
Value:
sse2_set_block((__m128i*) dst, value)

Definition at line 1604 of file bmsse2.h.

◆ VECT_SHIFT_R1

#define VECT_SHIFT_R1 ( b,
acc,
co )
Value:
sse2_shift_r1((__m128i*)b, acc, co)

Definition at line 1622 of file bmsse2.h.

◆ VECT_STREAM_BLOCK

#define VECT_STREAM_BLOCK ( dst,
src )
Value:
sse2_stream_block((__m128i*) dst, (__m128i*) (src))

Definition at line 1598 of file bmsse2.h.

◆ VECT_STREAM_BLOCK_UNALIGN

#define VECT_STREAM_BLOCK_UNALIGN ( dst,
src )
Value:
sse2_stream_block_unalign((__m128i*) dst, (__m128i*) (src))

Definition at line 1601 of file bmsse2.h.

◆ VECT_SUB_BLOCK

#define VECT_SUB_BLOCK ( dst,
src )
Value:
sse2_sub_block((__m128i*) dst, (__m128i*) (src))

Definition at line 1571 of file bmsse2.h.

◆ VECT_SUB_DIGEST

#define VECT_SUB_DIGEST ( dst,
src )
Value:
sse2_sub_digest((__m128i*) dst, (const __m128i*) (src))

Definition at line 1574 of file bmsse2.h.

◆ VECT_SUB_DIGEST_2WAY

#define VECT_SUB_DIGEST_2WAY ( dst,
src1,
src2 )
Value:
sse2_sub_digest_2way((__m128i*) dst, (const __m128i*) (src1), (const __m128i*) (src2))

Definition at line 1577 of file bmsse2.h.

◆ VECT_SUB_DIGEST_3WAY

#define VECT_SUB_DIGEST_3WAY ( dst,
src1,
src2 )
Value:
sse2_sub_digest_3way((__m128i*) dst, (const __m128i*) (src1), (const __m128i*) (src2))

Definition at line 1583 of file bmsse2.h.

◆ VECT_SUB_DIGEST_5WAY

#define VECT_SUB_DIGEST_5WAY ( dst,
src1,
src2,
src3,
src4 )
Value:
sse2_sub_digest_5way((__m128i*) dst, (const __m128i*) (src1), (const __m128i*) (src2), (const __m128i*) (src3), (const __m128i*) (src4))

Definition at line 1580 of file bmsse2.h.

◆ VECT_XOR_ARR_2_MASK

#define VECT_XOR_ARR_2_MASK ( dst,
src,
src_end,
mask )
Value:
sse2_xor_arr_2_mask((__m128i*)(dst), (__m128i*)(src), (__m128i*)(src_end), (bm::word_t)mask)

Definition at line 1517 of file bmsse2.h.

◆ VECT_XOR_BLOCK

#define VECT_XOR_BLOCK ( dst,
src )
Value:
sse2_xor_block((__m128i*) dst, (__m128i*) (src))

Definition at line 1586 of file bmsse2.h.

◆ VECT_XOR_BLOCK_2WAY

#define VECT_XOR_BLOCK_2WAY ( dst,
src1,
src2 )
Value:
sse2_xor_block_2way((__m128i*) (dst), (const __m128i*) (src1), (const __m128i*) (src2))

Definition at line 1589 of file bmsse2.h.