1#ifndef BMSPARSEVEC_ALGO__H__INCLUDED__
2#define BMSPARSEVEC_ALGO__H__INCLUDED__
24#ifndef BM__H__INCLUDED__
27# error missing include (bm.h or bm64.h)
41# pragma warning( disable: 4146 )
69 unsigned sv_planes = svect.slices();
71 BM_ASSERT(sv_planes > high_bit && high_bit > 0);
73 typename SV::bvector_type bv_acc;
77 for (i = high_bit+1; i < sv_planes; ++i)
79 if (
typename SV::bvector_type* bv = svect.slice(i))
87 for (i = high_bit;
true; --i)
89 typename SV::bvector_type* bv = svect.get_create_slice(i);
109 if (low_bit == 0)
return;
112 unsigned sv_planes = svect.slices();
113 typename SV::bvector_type bv_acc1;
117 for (i = low_bit+1; i < sv_planes; ++i)
119 if (
typename SV::bvector_type* bv = svect.slice(i))
124 typename SV::bvector_type bv_acc2;
125 typename SV::bvector_type* bv_low_plane = svect.get_create_slice(low_bit);
127 for (i = low_bit-1;
true; --i)
129 if (
typename SV::bvector_type* bv = svect.slice(i))
144 bv_acc1.bit_xor(bv_acc2);
145 bv_low_plane->bit_or(bv_acc1);
172 typename SV::size_type& midx,
180 unsigned planes1 = (unsigned)sv1.get_bmatrix().rows();
183 typename SV::bvector_type_const_ptr bv_null1 = sv1.get_null_bvector();
184 typename SV::bvector_type_const_ptr bv_null2 = sv2.get_null_bvector();
196 if (bv_null1 && bv_null2)
198 bool f = bv_null1->find_first_mismatch(*bv_null2, midx, mismatch);
199 if (f && (midx < mismatch))
201 found = f; mismatch = midx;
208 typename SV::bvector_type bv_tmp;
209 bv_tmp.resize(sv2.size());
213 bool f = bv_null1->find_first_mismatch(bv_tmp, midx, mismatch);
214 if (f && (midx < mismatch))
216 found = f; mismatch = midx;
221 typename SV::bvector_type bv_tmp;
222 bv_tmp.resize(sv1.size());
225 bool f = bv_null2->find_first_mismatch(bv_tmp, midx, mismatch);
226 if (f && (midx < mismatch))
228 found = f; mismatch = midx;
235 for (
unsigned i = 0; mismatch && (i < planes1); ++i)
237 typename SV::bvector_type_const_ptr bv1 = sv1.get_slice(i);
240 typename SV::bvector_type_const_ptr bv2 = sv2.get_slice(i);
248 bool f = bv2->find(midx);
249 if (f && (midx < mismatch))
251 found = f; sv_idx = 2; mismatch = midx;
258 bool f = bv1->find(midx);
259 if (f && (midx < mismatch))
261 found = f; sv_idx = 1; mismatch = midx;
267 bool f = bv1->find_first_mismatch(*bv2, midx, mismatch);
268 if (f && (midx < mismatch))
270 found = f; mismatch = midx;
273 sv_idx = (bv1->test(mismatch)) ? 1 : 2;
290 found = sv1.find_rank(midx + 1, mismatch);
293 found = sv2.find_rank(midx + 1, mismatch);
304 bool f = bv_null1->find_first_mismatch(*bv_null2, midx, mismatch);
305 if (f && (midx < mismatch))
307 found = f; mismatch = midx;
347template<
typename SV1,
typename SV2>
355 typedef typename allocator_type::allocator_pool_type allocator_pool_type;
357 allocator_pool_type pool;
359 mp_guard_bv.assign_if_not_set(pool, bv);
373 unsigned planes = sv1.effective_slices();;
374 if (planes < sv2.slices())
375 planes = sv2.slices();
377 for (
unsigned i = 0; i < planes; ++i)
379 typename SV1::bvector_type_const_ptr bv1 = sv1.get_slice(i);
380 typename SV2::bvector_type_const_ptr bv2 = sv2.get_slice(i);
400 mp_guard.assign_if_not_set(pool, bv_xor);
402 bv_xor.
bit_xor(*bv1, *bv2, SV1::bvector_type::opt_none);
409 typename SV1::size_type sz1 = sv1.size();
410 typename SV2::size_type sz2 = sv2.size();
420 bv.set_range(sz1, sz2-1);
426 typename SV1::bvector_type_const_ptr bv_null1 = sv1.get_null_bvector();
427 typename SV2::bvector_type_const_ptr bv_null2 = sv2.get_null_bvector();
434 if (bv_null1 && bv_null2)
438 mp_guard.assign_if_not_set(pool, bv_or);
452 if (bv_null1 && bv_null2)
456 mp_guard.assign_if_not_set(pool, bv_xor);
465 mp_guard.assign_if_not_set(pool, bv_null);
469 bv_null.
resize(sv1.size());
474 bv_null.
resize(sv2.size());
513 void construct(
const SV& sv,
unsigned s_factor);
553 unsigned s_factor_ = 0;
556 bool idx_unique_ =
true;
557 size_t min_key_len_ = 0;
580template<
typename SV,
unsigned S_FACTOR>
595 bm::heap_vector<value_type, typename bvector_type::allocator_type, true>
618 template<
class Opt = bm::agg_run_options<> >
653 {
agg_pipe_.set_search_count_limit(limit); }
670 void add(
const typename SV::value_type* str);
686 {
return agg_pipe_.get_bv_res_vector(); }
690 {
return agg_pipe_.get_bv_count_vector(); }
722 void bind(
const SV& sv,
bool sorted);
754 template<typename BII>
893 template<class TPipe>
989 template<typename It>
996 mp_guard.assign_if_not_set(pool_, bv_out);
1000 bool any_zero =
false;
1001 for (; start < end; ++start)
1004 any_zero |= (v == 0);
1030 bool null_correct =
true);
1046 template<
bool BOUND>
1082 unsigned prefix_len);
1096 unsigned octet_start,
1103 template <
bool BOUND>
1112 unsigned from,
unsigned total_planes);
1119 unsigned from,
unsigned total_planes);
1125 if constexpr (std::is_signed<value_type>::value)
1133 value_vect_.resize_no_copy(effective_str_max_ * 2);
1134 remap_value_vect_.resize_no_copy(effective_str_max_ * 2);
1135 remap_prefix_vect_.resize_no_copy(effective_str_max_ * 2);
1154 bm::heap_vector<bm::id64_t, typename bvector_type::allocator_type, true>
1160 template<
typename AGG>
1167 unsigned char bits[64];
1168 unsigned short bit_count_v =
bm::bitscan(value, bits);
1169 for (
unsigned i = 0; i < bit_count_v; ++i)
1171 unsigned bit_idx = bits[i];
1173 unsigned plane_idx = (unsigned(octet_idx) * 8) + bit_idx;
1178 unsigned vinv = unsigned(value);
1187 vinv &= unsigned(planes_mask);
1188 for (
unsigned octet_plane = (
unsigned(octet_idx) * 8); vinv; vinv &= vinv-1)
1192 unsigned plane_idx = octet_plane + bit_idx;
1216 const SV* bound_sv_;
1245template<
typename SV>
1308 remap(bv_in, sv_brel, bv_out);
1321 void attach_sv(
const SV* sv_brel,
bool compute_stats =
false);
1330 template<
unsigned BSIZE>
1365template<
typename SV>
1370 static_assert(std::is_unsigned<value_type>::value,
1371 "BM: unsigned sparse vector is required for transform");
1375 SV::throw_bad_alloc();
1381template<
typename SV>
1391template<
typename SV>
1401 if (sv_brel->empty() || !compute_stats)
1403 const bvector_type* bv_non_null = sv_brel->get_null_bvector();
1419template<
typename SV>
1424 if (sv_brel.empty())
1427 const bvector_type* bv_non_null = sv_brel.get_null_bvector();
1430 if (!bv_non_null->test(id_from))
1433 size_type idx = sv_brel.translate_address(id_from);
1434 id_to = sv_brel.get(idx);
1440template<
typename SV>
1446 mp_g_out.assign_if_not_set(
pool_, bv_out);
1452 remap(bv_in, bv_out);
1457template<
typename SV>
1471 mp_g_out.assign_if_not_set(
pool_, bv_out);
1503 bv_out.set_bit_no_check(0);
1512 buf_cnt = nb_old = 0;
1515 for (; en.
valid(); ++en)
1517 typename SV::size_type idx = *en;
1518 idx =
sv_ptr_->translate_address(idx);
1530 gb_->gather_idx_[buf_cnt++] = idx;
1534 gb_->gather_idx_[buf_cnt++] = idx;
1554template<
typename SV>
1559 if (sv_brel.empty())
1564 typename SV::const_iterator it = sv_brel.begin();
1565 for (; it.valid(); ++it)
1567 typename SV::value_type t_id = *it;
1569 if (bv_in.test(idx))
1571 bv_out.set_bit_no_check(t_id);
1581template<
typename SV,
unsigned S_FACTOR>
1588 effective_str_max_ = 0;
1593template<
typename SV,
unsigned S_FACTOR>
1596 static_assert(S_FACTOR == 4 || S_FACTOR == 8 || S_FACTOR == 16
1597 || S_FACTOR == 32 || S_FACTOR == 64,
1598 "BM: sparse_vector_scanner<> incorrect sampling factor template parameter");
1603 if constexpr (SV::is_str())
1605 effective_str_max_ = sv.effective_vector_max();
1610 range_idx_.construct(sv, S_FACTOR);
1614 vector_plane_masks_.resize(effective_str_max_);
1615 for (
unsigned i = 0; i < effective_str_max_; ++i)
1617 vector_plane_masks_[i] = sv.get_slice_mask(i);
1624template<
typename SV,
unsigned S_FACTOR>
1628 effective_str_max_ = 0;
1633template<
typename SV,
unsigned S_FACTOR>
1644 if constexpr (SV::is_compressed())
1647 bv_out.set_range(sv.effective_size(),
bm::id_max - 1,
false);
1660template<
typename SV,
unsigned S_FACTOR>
1669 auto old_sz = bv_out.size();
1670 bv_out.resize(sv.size());
1672 bv_out.resize(old_sz);
1678template<
typename SV,
unsigned S_FACTOR>
1684 bv_out.bit_and(*bv_null);
1689template<
typename SV,
unsigned S_FACTOR>
1701 return bv_out.any();
1712 bool any = (search_limit == 1);
1713 found = agg_.combine_and_sub(bv_out, any);
1720template<
typename SV,
unsigned S_FACTOR>
1736 found = agg_.find_first_and_sub(idx);
1744template<
typename SV,
unsigned S_FACTOR>
1751 unsigned prefix_len)
1759 unsigned common_prefix_len = 0;
1761 value_type* pref = remap_prefix_vect_.data();
1765 if (agg_.set_range_hint(mask_from_, mask_to_))
1767 if (prefix_len == ~0u)
1770 sv.template common_prefix_length<true>(mask_from_, mask_to_, pref);
1771 if (common_prefix_len)
1774 str = remap_value_vect_.data();
1776 for (
unsigned i = 0; i < common_prefix_len; ++i)
1777 if (str[i] != pref[i])
1783 unsigned pl; (void)pl;
1785 (pl=sv.template common_prefix_length<true>(
1786 mask_from_, mask_to_, pref)));
1787 common_prefix_len = prefix_len;
1792 if (prefix_len != ~0u)
1794 unsigned pl; (void)pl;
1796 (pl=sv.template common_prefix_length<true>(
1797 mask_from_, mask_to_, pref)));
1798 common_prefix_len = prefix_len;
1804 if (common_prefix_len && (in_len <= common_prefix_len))
1806 if (in_len == common_prefix_len)
1807 --common_prefix_len;
1814 str = remap_value_vect_.data();
1817 if (sv.is_remap() && (str != remap_value_vect_.data()))
1819 bool r = sv.remap_tosv(
1820 remap_value_vect_.data(), sv.effective_max_str(), str);
1823 str = remap_value_vect_.data();
1830 found = agg_.find_first_and_sub(idx);
1831 if (found && idx > mask_to_)
1833 int cmp = sv.compare(idx, search_str);
1844template<
typename SV,
unsigned S_FACTOR>
1848 unsigned octet_start,
1852 for (; str[len] != 0; ++len)
1858 for (
int octet_idx = len-1; octet_idx >= int(octet_start); --octet_idx)
1860 unsigned value = unsigned(str[octet_idx]) & 0xFFu;
1863 if (&sv == bound_sv_)
1864 planes_mask = vector_plane_masks_[unsigned(octet_idx)];
1866 planes_mask = sv.get_slice_mask(
unsigned(octet_idx));
1868 if ((value & planes_mask) != value)
1878 typename SV::size_type planes = sv.get_bmatrix().rows_not_null();
1879 for (
unsigned plane_idx =
unsigned(len * 8);
1880 plane_idx < planes; ++plane_idx)
1891template<
typename SV,
unsigned S_FACTOR>
1896 using unsigned_value_type =
typename SV::unsigned_value_type;
1900 unsigned char bits[
sizeof(value) * 8];
1901 unsigned_value_type uv = sv.s2u(value);
1903 unsigned short bit_count_v =
bm::bitscan(uv, bits);
1905 const unsigned_value_type mask1 = 1;
1910 for (
unsigned i = bit_count_v; i > 0; --i)
1912 unsigned bit_idx = bits[i-1];
1919 unsigned sv_planes = sv.effective_slices();
1920 for (
unsigned i = 0; i < sv_planes; ++i)
1922 unsigned_value_type mask = mask1 << i;
1932template<
typename SV,
unsigned S_FACTOR>
1948 unsigned char bits[
sizeof(value) * 8];
1949 typename SV::unsigned_value_type uv = sv.s2u(value);
1950 unsigned short bit_count_v =
bm::bitscan(uv, bits);
1954 if (
const bvector_type* bv_plane = sv.get_slice(bits[--bit_count_v]))
1961 for (
unsigned i = 0; i < bit_count_v; ++i)
1965 bv_out &= *bv_plane;
1974 unsigned sv_planes = sv.effective_slices();
1975 for (
unsigned i = 0; (i < sv_planes) && uv; ++i)
1977 if (
const bvector_type* bv = sv.get_slice(i); bv && !(uv & (1u << i)))
1984template<
typename SV,
unsigned S_FACTOR>
1996template<
typename SV,
unsigned S_FACTOR>
2002 if constexpr (std::is_signed<value_type>::value)
2004 if (val == std::numeric_limits<int>::min())
2009 bv_out.merge(bv_min);
2027 bv_out.set_range(0, sv.size()-1);
2035template<
typename SV,
unsigned S_FACTOR>
2047template<
typename SV,
unsigned S_FACTOR>
2058template<
typename SV,
unsigned S_FACTOR>
2070 bv_out.bit_sub(bv_gt);
2076template<
typename SV,
unsigned S_FACTOR>
2082 unsigned char blist[64];
2083 unsigned bit_count_v;
2095 auto sz = sv.size();
2096 bv_out.set_range(0, sz-1);
2097 bv_out.bit_sub(bv_zero);
2099 if constexpr (std::is_signed<value_type>::value)
2102 bv_out.bit_sub(*bv_sign);
2112 if constexpr (std::is_signed<value_type>::value)
2117 bv_out.swap(gtz_bv);
2125 bv_out.bit_or(gtz_bv);
2131 auto uvalue = SV::s2u(value +
bool(value < 0));
2146 unsigned total_planes = sv.effective_slices();
2147 const bvector_type* bv_sign = sv.get_slice(0); (void)bv_sign;
2150 if constexpr (std::is_signed<value_type>::value)
2156 bv_out.swap(gtz_bv);
2170 if (total_planes <
unsigned(blist[bit_count_v-1]+1))
2176 bv_out.merge(top_or_bv);
2184 for (
int i =
int(bit_count_v)-1; i >= 0; --i)
2186 int slice_idx = blist[i];
2189 const bvector_type* bv_base_plane = sv.get_slice(
unsigned(slice_idx));
2195 if constexpr (std::is_signed<value_type>::value)
2198 and_eq_bv.bit_and(*bv_base_plane, *bv_sign);
2200 and_eq_bv.bit_and(*bv_base_plane, gtz_bv);
2203 and_eq_bv = *bv_base_plane;
2206 and_eq_bv.bit_and(*bv_base_plane);
2208 int next_slice_idx = 0;
2211 next_slice_idx = blist[i-1];
2212 if (slice_idx-1 == next_slice_idx)
2219 for (
int j = slice_idx-1; j >= next_slice_idx; --j)
2221 if constexpr (std::is_signed<value_type>::value)
2224 if (
const bvector_type* bv_sub_plane = sv.get_slice(
unsigned(j)))
2225 bv_out.bit_or_and(and_eq_bv, *bv_sub_plane);
2229 if constexpr (std::is_signed<value_type>::value)
2234 top_or_bv.set_range(0, sv.size()-1);
2235 top_or_bv.bit_sub(bv_out);
2236 bv_out.swap(top_or_bv);
2240 gtz_bv.resize(sv.size());
2242 bv_out.bit_sub(gtz_bv);
2250 if constexpr (!std::is_signed<value_type>::value)
2262template<
typename SV,
unsigned S_FACTOR>
2266 unsigned from,
unsigned total_planes)
2268 for (
unsigned i = from; i < total_planes; ++i)
2272 BM_ASSERT(bv_slice != sv.get_null_bvector());
2276 agg_.combine_or(bv_target);
2281template<
typename SV,
unsigned S_FACTOR>
2285 unsigned from,
unsigned total_planes)
2287 for (
unsigned i = from; i < total_planes; ++i)
2291 BM_ASSERT(bv_slice != sv.get_null_bvector());
2292 bv_target.bit_or_and(bv_mask, *bv_slice);
2299template<
typename SV,
unsigned S_FACTOR>
2301 const typename SV::value_type* str,
2302 typename SV::bvector_type& bv_out)
2310template<
typename SV,
unsigned S_FACTOR>
2312 const typename SV::value_type* str,
2313 typename SV::size_type& pos)
2316 return this->find_eq_str(*bound_sv_, str, pos);
2321template<
typename SV,
unsigned S_FACTOR>
2324 const typename SV::value_type* str,
2325 typename SV::size_type& pos)
2332 bool remaped =
false;
2333 if constexpr (SV::is_remap_support::value)
2335 if (sv.is_remap() && str != remap_value_vect_.data())
2337 auto str_len = sv.effective_vector_max();
2338 remap_value_vect_.resize(str_len);
2339 remaped = sv.remap_tosv(
2340 remap_value_vect_.data(), str_len, str);
2343 str = remap_value_vect_.data();
2347 size_t in_len = ::strlen(str);
2349 found = find_first_eq(sv, str, in_len, found_pos, remaped, ~0u);
2353 if constexpr (SV::is_rsc_support::value)
2355 if constexpr (sv.is_compressed())
2356 found = sv.find_rank(found_pos + 1, pos);
2363 typename SV::bvector_type bv_zero;
2364 find_zero(sv, bv_zero);
2365 found = bv_zero.find(pos);
2372template<
typename SV,
unsigned S_FACTOR>
2374 const typename SV::value_type* str,
2375 typename SV::bvector_type& bv_out)
2378 return find_eq_str(*bound_sv_, str, bv_out);
2383template<
typename SV,
unsigned S_FACTOR>
2386 const typename SV::value_type* str,
2387 typename SV::bvector_type& bv_out)
2389 return find_eq_str_impl(sv, str, bv_out,
true);
2394template<
typename SV,
unsigned S_FACTOR>
2397 const typename SV::value_type* str,
2400 auto str_len = sv.effective_vector_max();
2401 remap_vect_target.resize(str_len);
2402 bool remaped = sv.remap_tosv(remap_vect_target.data(), str_len, str);
2408template<
typename SV,
unsigned S_FACTOR>
2411 const typename SV::value_type* str,
2412 typename SV::bvector_type& bv_out,
2419 if constexpr (SV::is_rsc_support::value)
2427 if constexpr (SV::is_remap_support::value)
2429 if (sv.is_remap() && (str != remap_value_vect_.data()))
2431 bool remaped =
remap_tosv(remap_value_vect_, str, sv);
2434 str = remap_value_vect_.data();
2440 const unsigned common_prefix_len = 0;
2445 found = agg_.combine_and_sub(bv_out);
2452 found = bv_out.any();
2460template<
typename SV,
unsigned S_FACTOR>
template<
class TPipe>
2463 if (pipe.bv_and_mask_)
2466 bool any = pipe.bv_and_mask_->find_range(first, last);
2469 agg_.set_range_hint(first, last);
2471 agg_.combine_and_sub(pipe.agg_pipe_);
2472 agg_.reset_range_hint();
2477template<
typename SV,
unsigned S_FACTOR>
2481 const typename SV::value_type* str,
2484 typename SV::size_type& pos)
2490 unsigned prefix_len = ~0u;
2499 if constexpr (BOUND)
2501 found = range_idx_.bfind_range(str, in_len, l, r);
2506 (unsigned) range_idx_.common_prefix_length(str, in_len, l, r);
2508 if ((l == r) && (in_len == prefix_len))
2510 range_idx_.recalc_range(str, l, r);
2515 range_idx_.recalc_range(str, l, r);
2528 l = 0; r = sv.size()-1;
2534 if (dist < min_distance_cutoff)
2583 typename SV::size_type mid = dist/2+l;
2616 found =
find_first_eq(sv, str, in_len, found_pos, remaped, prefix_len);
2620 if constexpr (SV::is_compressed())
2621 found = sv.find_rank(found_pos + 1, pos);
2628 typename SV::bvector_type bv_zero;
2630 found = bv_zero.find(pos);
2637template<
typename SV,
unsigned S_FACTOR>
2641 size_t len = ::strlen(str);
2642 effective_str_max_ = sv.effective_max_str();
2643 if (len > effective_str_max_)
2648 bool remaped =
false;
2649 if constexpr (SV::is_remap_support::value)
2653 remap_value_vect_.resize_no_copy(len);
2654 remaped = sv.remap_tosv(remap_value_vect_.data(),
2655 effective_str_max_, str);
2665template<
typename SV,
unsigned S_FACTOR>
2667 const typename SV::value_type* str,
2668 typename SV::size_type& pos)
2671 size_t len = ::strlen(str);
2672 if (len > effective_str_max_)
2674 bool remaped =
false;
2675 if constexpr (SV::is_remap_support::value)
2677 if (bound_sv_->is_remap())
2679 remaped = bound_sv_->remap_tosv(remap_value_vect_.data(),
2680 effective_str_max_, str);
2685 return bfind_eq_str_impl<true>(*bound_sv_, str, len, remaped, pos);
2690template<
typename SV,
unsigned S_FACTOR>
2697 if (in_len > effective_str_max_)
2702 bool remaped =
false;
2704 if constexpr (SV::is_remap_support::value)
2706 if (bound_sv_->is_remap())
2708 remaped = bound_sv_->remap_n_tosv_2way(
2709 remap_value_vect_.data(),
2720 for (
size_t i = 0; i < in_len && *str; ++i)
2729template<
typename SV,
unsigned S_FACTOR>
2732 const typename SV::value_type val,
2733 typename SV::size_type& pos)
2745 cmp = this->
compare(sv, l, val);
2756 cmp = this->
compare(sv, r, val);
2764 cmp = this->
compare(sv, r, val);
2782 cmp = this->
compare(sv, l, val);
2799 cmp = this->
compare(sv, mid, val);
2806 cmp = this->
compare(sv, i, val);
2822 typename SV::const_iterator it(&sv, l);
2823 for (; it.valid(); ++it, ++l)
2825 typename SV::value_type sv_value = it.value();
2826 if (sv_value == val)
2850template<
typename SV,
unsigned S_FACTOR>
2853 const typename SV::value_type* str,
2854 typename SV::size_type& pos)
2943 if (!hmatr_.is_init())
2945 unsigned max_str = (unsigned)sv.effective_max_str();
2949 dist = sv.decode(hmatr_, l, dist+1);
2950 for (
unsigned i = 0; i < dist; ++i, ++l)
2952 const typename SV::value_type* hm_str = hmatr_.row(i);
2953 cmp = ::strcmp(hm_str, str);
2984template<
typename SV,
unsigned S_FACTOR>
2985template <
bool BOUND>
2993 if constexpr (BOUND)
3038 for (
unsigned i = 0;
true; ++i)
3040 char octet = str[i];
char value = s0[i];
3041 res = (value > octet) - (value < octet);
3052 return sv.compare(idx, str);
3058template<
typename SV,
unsigned S_FACTOR>
3065 return sv.compare(idx, val);
3070template<
typename SV,
unsigned S_FACTOR>
3073 typename SV::value_type value,
3074 typename SV::bvector_type& bv_out)
3095template<
typename SV,
unsigned S_FACTOR>
template<
typename BII>
3099 static_assert(!SV::is_compressed(),
"BM:find_eq on RSC vector not implemented");
3106 typename SV::bvector_type bv_out;
3108 typename SV::bvector_type::enumerator en = bv_out.get_enumerator(0);
3109 for (; en.valid(); ++en)
3122 found = agg_.combine_and_sub_bi(bi);
3129template<
typename SV,
unsigned S_FACTOR>
3132 typename SV::value_type value,
3133 typename SV::size_type& pos)
3139 bool found = bv_zero.find(pos);
3150 if constexpr (SV::is_compressed())
3151 found = sv.find_rank(found_pos + 1, pos);
3159template<
typename SV,
unsigned S_FACTOR>
3162 typename SV::bvector_type& bv_out)
3165 auto sz = sv.effective_slices();
3166 for (
unsigned i = 0; i < sz; ++i)
3167 agg_.add(sv.get_slice(i));
3168 agg_.combine_or(bv_out);
3174template<
typename SV,
unsigned S_FACTOR>
3177 typename SV::bvector_type& bv_out)
3180 bv_out.set_range(0, sv.size()-1);
3181 if constexpr (std::is_signed<value_type>::value)
3184 bv_out.bit_sub(*bv_sign);
3190template<
typename SV,
unsigned S_FACTOR>
3193 typename SV::bvector_type& bv_out)
3195 if constexpr (SV::is_compressed())
3197 const bvector_type* bv_non_null = sv.get_null_bvector();
3201 rank_compr_.decompress(bv_tmp_, *bv_non_null, bv_out);
3202 bv_out.swap(bv_tmp_);
3206 (void)sv; (void)bv_out;
3212template<
typename SV,
unsigned S_FACTOR>
3217 mask_from_ = from; mask_to_ = to; mask_set_ =
true;
3222template<
typename SV,
unsigned S_FACTOR>
3233template<
typename SV,
unsigned S_FACTOR>
template<
class Opt>
3238 static_assert(Opt::is_masks(),
3239 "BM: Search masking needs to be enabled in template parameter options before function call. see bm::agg_run_options<> ");
3245template<
typename SV,
unsigned S_FACTOR>
template<
class Opt>
3248 const typename SV::value_type* str)
3252 typename aggregator_type::arg_groups* arg =
agg_pipe_.add();
3255 if constexpr (SV::is_remap_support::value)
3266 for (; str[len] != 0; ++len)
3270 if constexpr(Opt::is_masks())
3272 arg->add(
sv_.get_null_bvector(), 0);
3276 for (
int octet_idx = len-1; octet_idx >= 0; --octet_idx)
3281 unsigned value = unsigned(str[octet_idx]) & 0xFF;
3286 if (&sv == bound_sv_)
3287 planes_mask = vector_plane_masks_[unsigned(octet_idx)];
3290 planes_mask =
sv_.get_slice_mask(
unsigned(octet_idx));
3292 if ((value & planes_mask) != value)
3299 *arg,
sv_, octet_idx, planes_mask, value);
3307 unsigned plane_idx = unsigned(len * 8);
3310 for (; plane_idx < planes; ++plane_idx)
3323template<
typename SV>
3327 s_factor_ = s_factor;
3328 sv_size_ = sv.size();
3334 auto effective_str_max = sv.effective_vector_max() + 1;
3336 size_type idx_size = total_nb * s_factor + 1;
3337 s_cache_.init_resize(idx_size, effective_str_max);
3338 s_cache_.set_zero();
3348 sv.get(i, s_str, cols);
3350 if (i == sv_size_-1)
3358 s_str = s_cache_.row(idx_size_);
3360 sv.get(i, s_str, cols);
3369 min_len = ::strlen(s);
3375 const value_type* str_prev = s_cache_.row(0);
3376 for(
size_type i = 1; i < idx_size_; ++i)
3378 const value_type* str_curr = s_cache_.row(i);
3379 size_t curr_len = ::strlen(str_curr);
3380 if (curr_len < min_len)
3383 int cmp = SV::compare_str(str_prev, str_curr);
3387 idx_unique_ =
false;
3390 str_prev = str_curr;
3393 min_key_len_ = min_len;
3399template<
typename SV>
3408 l = 0; r = idx_size_ - 1;
3411 size_t min_len = this->min_key_len_;
3412 if (in_len < min_len)
3418 cmp = SV::compare_str(search_str, str, min_len);
3422 str = s_cache_.row(r);
3423 cmp = SV::compare_str(search_str, str, min_len);
3431 if (dist < linear_cutoff)
3436 cmp = SV::compare_str(search_str, str_i, min_len);
3459 cmp = SV::compare_str(str_m, search_str, min_len);
3471template<
typename SV>
3481 size_t min_len = (in_len < min_key_len_) ? in_len : min_key_len_;
3485 for (; i < min_len-3; i+=4)
3488 ::memcpy(&i2, &str_l[i],
sizeof(i2));
3489 ::memcpy(&i1, &str_r[i],
sizeof(i1));
3494 ::memcpy(&i2, &str_s[i],
sizeof(i2));
3504 auto ch1 = str_l[i];
auto ch2 = str_r[i];
3505 if (ch1 != ch2 || (!(ch1|ch2)))
3507 auto chs = str_s[i];
3517template<
typename SV>
3530 if (r == idx_size_-1)
3557 int cmp = SV::compare_str(search_str, str);
3560 r -= (r && idx_unique_);
Algorithms for fast aggregation of N bvectors.
Algorithms for bvector<> (main include).
Sparse constainer sparse_vector<> for integer types using bit-transposition transform.
Algorithms for fast aggregation of a group of bit-vectors.
bm::heap_vector< size_type, allocator_type, true > bv_count_vector_type
Constant iterator designed to enumerate "ON" bits.
bool valid() const BMNOEXCEPT
Checks if iterator is still valid.
bm::bvector< Alloc > & bit_or(const bm::bvector< Alloc > &bv1, const bm::bvector< Alloc > &bv2, typename bm::bvector< Alloc >::optmode opt_mode=opt_none)
3-operand OR : this := bv1 OR bv2
@ opt_none
no optimization
bvector< Alloc > & invert()
Invert/NEG all bits It should be noted, invert is affected by size() if size is set - it only inverts...
allocator_type::allocator_pool_type allocator_pool_type
void resize(size_type new_size)
Change size of the bvector.
bm::bvector< Alloc > & bit_xor(const bm::bvector< Alloc > &bv1, const bm::bvector< Alloc > &bv2, typename bm::bvector< Alloc >::optmode opt_mode=opt_none)
3-operand XOR : this := bv1 XOR bv2
Algorithms for rank compression of bit-vector.
friend class sparse_vector_scanner
void set_search_count_limit(size_type limit) BMNOEXCEPT
Set search limit for results.
void complete()
Prepare pipeline for the execution (resize and init internal structures) Once complete,...
pipeline(const pipeline &)=delete
void set_search_mask(const bvector_type *bv_mask) BMNOEXCEPT
Set pipeline mask bit-vector to restrict the search space.
const run_options & get_options() const BMNOEXCEPT
Get pipeline run options.
aggregator_pipeline_type & get_aggregator_pipe() BMNOEXCEPT
get aggregator pipeline (access to compute internals)
void add(const typename SV::value_type *str)
Add new search string.
const bvector_type * bv_and_mask_
aggregator_pipeline_type agg_pipe_
traget aggregator pipeline
remap_vector_type remap_value_vect_
remap buffer
bvect_vector_type & get_bv_res_vector() BMNOEXCEPT
Return results vector used for pipeline execution.
size_type size() const BMNOEXCEPT
pipeline & operator=(const pipeline &)=delete
bool is_complete() const BMNOEXCEPT
bv_count_vector_type & get_bv_count_vector() BMNOEXCEPT
Return results vector count used for pipeline execution.
aggregator_type::template pipeline< Opt > aggregator_pipeline_type
size_t eff_slices_
number of effectice NOT NULL value slices
run_options & options() BMNOEXCEPT
Set pipeline run options.
const SV & sv_
target sparse vector ref
void set_or_target(bvector_type *bv_or) BMNOEXCEPT
Attach OR (aggregator vector).
algorithms for sparse_vector scan/search
bm::dynamic_heap_matrix< value_type, allocator_type > matrix_search_buf_type
allocator_pool_type & get_bvector_alloc_pool() BMNOEXCEPT
Return allocator pool for blocks (Can be used to improve performance of repeated searches with the sa...
bm::dynamic_heap_matrix< value_type, allocator_type > heap_matrix_type
int compare(const SV &sv, size_type idx, const value_type val) BMNOEXCEPT
compare sv[idx] with input value
SV::bvector_type bvector_type
void operator=(const sparse_vector_scanner &)=delete
bool prepare_and_sub_aggregator(const SV &sv, value_type value)
Prepare aggregator for AND-SUB (EQ) search.
void find_eq(const SV &sv, value_type value, bvector_type &bv_out)
find all sparse vector elements EQ to search value
bool bfind(const SV &sv, const value_type val, size_type &pos)
binary search for position in the sorted sparse vector
void bind(const SV &sv, bool sorted)
bind sparse vector for all searches
void correct_nulls(const SV &sv, bvector_type &bv_out)
Exclude possible NULL values from the result vector.
int compare_str(const SV &sv, size_type idx, const value_type *str) const BMNOEXCEPT
compare sv[idx] with input str
void reset_binding() BMNOEXCEPT
reset sparse vector binding
void find_range(const SV &sv, value_type from, value_type to, bvector_type &bv_out)
find all elements sparse vector element in closed range [left..right] interval
bm::heap_vector< bm::id64_t, typename bvector_type::allocator_type, true > mask_vector_type
const bvector_type * bvector_type_const_ptr
void aggregate_OR_slices(bvector_type &bv_target, const SV &sv, unsigned from, unsigned total_planes)
aggregator_type::bv_count_vector_type bv_count_vector_type
bm::heap_vector< bvector_type *, allocator_type, true > bvect_vector_type
void decompress(const SV &sv, bvector_type &bv_out)
Rank-Select decompression for RSC vectors.
sparse_vector_scanner(const sparse_vector_scanner &)=delete
bvector_type * bvector_type_ptr
void find_gt(const SV &sv, value_type val, bvector_type &bv_out)
find all elements sparse vector element greater (>) than value
void set_search_range(size_type from, size_type to) BMNOEXCEPT
set search boundaries (hint for the aggregator)
bool find_eq_str_impl(const SV &sv, const value_type *str, bvector_type &bv_out, bool prefix_sub)
find EQ str / prefix impl
void find_zero(const SV &sv, bvector_type &bv_out, bool null_correct=true)
find all sparse vector elements EQ to 0
void reset_search_range() BMNOEXCEPT
reset (disable) search range
void find_eq_with_nulls_horizontal(const SV &sv, value_type value, bvector_type &bv_out)
For testing purposes only.
bm::heap_vector< value_type, typename bvector_type::allocator_type, true > remap_vector_type
void find_lt(const SV &sv, value_type val, bvector_type &bv_out)
find all elements sparse vector element less (<) than value
bool find_eq_with_nulls(const SV &sv, value_type value, bvector_type &bv_out, size_type search_limit=0)
find value (may include NULL indexes)
bool bfind_eq_str(const SV &sv, const value_type *str, size_type &pos)
binary find first sparse vector element (string).
bool bfind_eq_str_impl(const SV &sv, const value_type *str, size_t in_len, bool remaped, size_type &pos)
void find_gt_horizontal(const SV &sv, value_type value, bvector_type &bv_out, bool null_correct=true)
For testing purposes only.
static constexpr int gt_slice_limit() noexcept
Return the slice limit for signed/unsigned vector value types.
static bool remap_tosv(remap_vector_type &remap_vect_target, const value_type *str, const SV &sv)
Remap input value into SV char encodings.
static void add_agg_char(AGG &agg, const SV &sv, int octet_idx, bm::id64_t planes_mask, unsigned value)
Addd char to aggregator (AND-SUB).
bm::aggregator< bvector_type > aggregator_type
aggregator_type::run_options run_options
Scanner options to control execution.
bool lower_bound_str(const SV &sv, const value_type *str, size_type &pos)
lower bound search for an array position
void find_positive(const SV &sv, bvector_type &bv_out)
Find positive (greter than zero elements) Output vector is computed as a logical OR (join) of all pla...
bvector_type::allocator_type allocator_type
bool find_eq_str(const SV &sv, const value_type *str, bvector_type &bv_out)
find sparse vector elements (string)
void find_nonzero(const SV &sv, bvector_type &bv_out)
Find non-zero elements Output vector is computed as a logical OR (join) of all planes.
bool find_first_eq(const SV &sv, value_type value, size_type &idx)
find first value (may include NULL indexes)
void invert(const SV &sv, bvector_type &bv_out)
invert search result ("EQ" to "not EQ")
allocator_type::allocator_pool_type allocator_pool_type
SV::value_type value_type
bool find_eq_str_prefix(const SV &sv, const value_type *str, bvector_type &bv_out)
find sparse vector elements with a given prefix (string)
void find_le(const SV &sv, value_type val, bvector_type &bv_out)
find all elements sparse vector element less or equal (<=) than value
void find_ge(const SV &sv, value_type val, bvector_type &bv_out)
find all elements sparse vector element greater or equal (>=) than value
static void aggregate_AND_OR_slices(bvector_type &bv_target, const bvector_type &bv_mask, const SV &sv, unsigned from, unsigned total_planes)
Index for SV sorted vectors for approximate range queries.
size_type size() const BMNOEXCEPT
Index size (number of sampled elements).
size_type common_prefix_length(const value_type *search_str, size_t in_len, size_type l, size_type r) const BMNOEXCEPT
find common prefix between index elements and search string
SV::bvector_type bvector_type
SV::value_type value_type
bool bfind_range(const value_type *search_str, size_t in_len, size_type &l, size_type &r) const BMNOEXCEPT
find range (binary)
sv_sample_index(const SV &sv, unsigned s_factor)
bool is_unique() const BMNOEXCEPT
returns true if all index values are unique
size_type sv_size() const BMNOEXCEPT
Original SV size.
bm::dynamic_heap_matrix< value_type, allocator_type > heap_matrix_type
void recalc_range(const value_type *search_str, size_type &l, size_type &r) const BMNOEXCEPT
recalculate range into SV coordinates range [from..to)
void construct(const SV &sv, unsigned s_factor)
Build sampling index for the sorted sprase vector.
size_t get_min_len() const BMNOEXCEPT
Return length of minimal indexed string.
bvector_type::allocator_type allocator_type
BMFORCEINLINE bm::id_t word_bitcount(bm::id_t w) BMNOEXCEPT
unsigned short bitscan(V w, B *bits) BMNOEXCEPT
Templated Bitscan with dynamic dispatch for best type.
bm::alloc_pool_guard< allocator_pool_type, bvector< Alloc > > mem_pool_guard
null_support
NULL-able value support.
@ BM_SORTED
input set is sorted (ascending order)
@ BM_SORTED_UNIFORM
sorted and in one block (internal!)
@ use_null
support "non-assigned" or "NULL" logic
@ no_null
do not support NULL values
bool sparse_vector_find_first_mismatch(const SV &sv1, const SV &sv2, typename SV::size_type &midx, bm::null_support null_proc=bm::use_null)
Find first mismatch (element which is different) between two sparse vectors (uses linear scan in bit-...
void dynamic_range_clip_low(SV &svect, unsigned low_bit)
Clip dynamic range for signal lower than specified (boost).
void dynamic_range_clip_high(SV &svect, unsigned high_bit)
Clip dynamic range for signal higher than specified.
void sparse_vector_find_mismatch(typename SV1::bvector_type &bv, const SV1 &sv1, const SV2 &sv2, bm::null_support null_proc)
Find mismatch vector, indicating positions of mismatch between two sparse vectors (uses linear scan i...
const unsigned set_block_mask
BMFORCEINLINE bool has_zero_byte_u64(bm::id64_t v) BMNOEXCEPT
Returns true if INT64 contains 0 octet.
BMFORCEINLINE void xor_swap(W &x, W &y) BMNOEXCEPT
XOR swap two variables.
unsigned long long int id64_t
const unsigned gap_max_bits
const unsigned set_block_shift
bm::aggregator< bm::bvector<> > aggregator_type