1#ifndef BMSTRSPARSEVEC__H__INCLUDED__
2#define BMSTRSPARSEVEC__H__INCLUDED__
33#ifndef BM__H__INCLUDED__
36# error missing include (bm.h or bm64.h)
70template<
typename CharType,
typename BV,
unsigned STR_SIZE>
100 typedef bm::dynamic_heap_matrix<unsigned char, allocator_type>
118 bm::heap_vector<CharType, typename bvector_type::allocator_type, true>
134 : str_sv_(str_sv), idx_(idx)
146 str_sv_.get(idx_, this->
buf_.data(), str_sv_.effective_max_str());
147 return this->
buf_.data();
151 {
return bool(*
this) == bool(ref); }
167 : str_sv_(str_sv), idx_(idx)
179 str_sv_.get(idx_, this->
buf_.data(), str_sv_.effective_max_str());
180 return this->
buf_.data();
192 str_sv_.set(idx_, str);
196 {
return bool(*
this) == bool(ref); }
260 {
return (pos_ == it.pos_) && (sv_ == it.sv_); }
264 {
return pos_ < it.pos_; }
266 {
return pos_ <= it.pos_; }
268 {
return pos_ > it.pos_; }
270 {
return pos_ >= it.pos_; }
274 {
return this->
value(); }
278 { this->
advance();
return *
this; }
318 unsigned substr_from_;
366 bi.buf_matrix_.rows(), bi.buf_matrix_.cols());
396 { this->
add(v);
return *
this; }
400 template<
typename StrType>
403 this->
add(v.c_str());
return *
this;
460 typedef bm::dynamic_heap_matrix<CharType, allocator_type> buffer_matrix_type;
612 template<
typename StrType>
615 this->
insert(idx, str.c_str());
647 template<
typename StrType>
650 if (idx >= this->
size())
661 CharType ch = str[i];
666 ch = CharType(remap_value);
668 this->
bmatr_.set_octet(idx, i, (
unsigned char)ch);
674 bv_null->set_bit_no_check(idx);
682 template<
typename StrType>
709 template<
typename StrType>
725 template<
typename StrType>
729 for (
unsigned i = 0;
true; ++i)
731 CharType ch = CharType(this->
bmatr_.get_octet(idx, i));
737 unsigned char remap_value = remap_row[unsigned(ch)];
744 ch = CharType(remap_value);
799 template<
bool USE_PREFIX_BUF = false>
952 {
return const_iterator(
this, idx); }
959 {
return back_insert_iterator(
this); }
1118 template<
typename CharMatrix>
1122 bool zero_mem =
true)
const
1126 0,
unsigned(str_len-1), zero_mem);
1142 template<
typename CharMatrix>
1146 unsigned substr_from,
1148 bool zero_mem =
true)
const
1154 struct sv_decode_visitor_func
1156 sv_decode_visitor_func(CharMatrix& cmatr)
BMNOEXCEPT2
1169 const unsigned i = substr_i_;
1176 }
while (++j < bits_size);
1184 auto base = bv_offset - sv_off_;
1186 const unsigned i = substr_i_;
1199 unsigned substr_i_= 0;
1212 if (max_sz < dec_size)
1214 if (rows < dec_size)
1219 sv_decode_visitor_func func(cmatr);
1221 for (
unsigned i = substr_from; i <= substr_to; ++i)
1224 func.substr_i_ = i - substr_from;
1226 auto rsize = this->
bmatr_.rows_not_null();
1227 for (
unsigned k = i * 8; k < (i * 8) + 8; ++k, ++bi)
1237 func.sv_off_ = idx_from;
1248 for (
unsigned i = 0; i < dec_size; ++i)
1250 typename CharMatrix::value_type* str = cmatr.row(i);
1266 template<
typename CharMatrix>
1271 if (idx_from < this->
size_)
1274 this->
clear_range(idx_from, idx_from + imp_size - 1);
1287 template<
typename CharMatrix>
1378 template<
typename CharMatrix,
size_t BufSize = ins_buf_size>
1381 bool set_not_null =
true)
1385 unsigned max_str_size = 0;
1387 for (
unsigned j = 0; j < imp_size; ++j)
1389 typename CharMatrix::value_type* str = cmatr.row(j);
1390 typename CharMatrix::size_type i;
1391 typename CharMatrix::size_type cols = cmatr.cols();
1392 for (i = 0; i < cols; ++i)
1398 (unsigned)((i > max_str_size) ? i : max_str_size);
1403 unsigned char remap_value =
1409 str[i] = CharType(remap_value);
1417 unsigned_value_type ch_slice[BufSize];
1418 for (
unsigned i = 0; i < max_str_size; ++i)
1420 unsigned ch_acc = 0;
1421#if defined(BMVECTOPT) || defined(BM_USE_GCC_BUILD)
1424 for (
size_type j = 0; j < imp_size; j+=4)
1431 ch_acc |= ch0 | ch1 | ch2 | ch3;
1432 ch_slice[j] = ch0; ch_slice[j+1] = ch1;
1433 ch_slice[j+2] = ch2; ch_slice[j+3] = ch3;
1439 for (
size_type j = 0; j < imp_size; ++j)
1449 size_type idx_to = idx_from + imp_size - 1;
1453 bv_null->set_range(idx_from, idx_to);
1455 if (idx_to >= this->
size())
1456 this->
size_ = idx_to+1;
1460#pragma warning( push )
1461#pragma warning( disable : 4146 )
1464 template<
size_t BufSize = ins_buf_size>
1471 for ( ;ch_acc; ch_acc &= ch_acc - 1)
1473 unsigned n_bits = 0;
1475 unsigned mask = 1u << bi;
1476#if defined(BMVECTOPT) || defined(BM_USE_GCC_BUILD)
1479 mask |= (mask << 8) | (mask << 16) | (mask << 24);
1480 for (
size_type j = 0; j < imp_size; j+=4)
1482 unsigned ch0 = ((unsigned)ch_slice[j+0]) |
1483 ((
unsigned)ch_slice[j+1] << 8) |
1484 ((unsigned)ch_slice[j+2] << 16) |
1485 ((
unsigned)ch_slice[j+3] << 24);
1487 ch0 = (ch0 >> bi) | (ch0 >> (bi+7)) |
1488 (ch0 >> (bi+14)) | (ch0 >> (bi+21));
1491 for (
size_type base_idx = idx_from + j ;ch0; ch0 &= ch0 - 1)
1493 const unsigned bit_idx =
1495 bit_list[n_bits++] = base_idx + bit_idx;
1502 for (
size_type j = 0; j < imp_size; ++j)
1504 unsigned ch = unsigned(ch_slice[j]);
1519#pragma warning( pop )
1572 *idx_from = from; *idx_to = to;
return true;
1606template<
class CharType,
class BV,
unsigned STR_SIZE>
1612:
parent_type(null_able, true, ap, bv_max_size, alloc),
1615 static_assert(STR_SIZE > 1,
1616 "BM:: String vector size must be > 1 (to accomodate 0 terminator)");
1622template<
class CharType,
class BV,
unsigned STR_SIZE>
1630 static_assert(STR_SIZE > 1,
1631 "BM:: String vector size must be > 1 (to accomodate 0 terminator)");
1636template<
class CharType,
class BV,
unsigned STR_SIZE>
1646 static_assert(STR_SIZE > 1,
1647 "BM:: String vector size must be > 1 (to accomodate 0 terminator)");
1653template<
class CharType,
class BV,
unsigned STR_SIZE>
1665template<
class CharType,
class BV,
unsigned STR_SIZE>
1669 if (idx >= this->
size())
1670 this->
size_ = idx+1;
1676template<
class CharType,
class BV,
unsigned STR_SIZE>
1680 if (idx >= this->
size())
1682 this->
size_ = idx+1;
1692template<
class CharType,
class BV,
unsigned STR_SIZE>
1704template<
class CharType,
class BV,
unsigned STR_SIZE>
1708 if (idx >= this->size_)
1716template<
class CharType,
class BV,
unsigned STR_SIZE>
1719 if (idx >= this->
size_)
1720 this->
size_ = idx + 1;
1722 this->
bmatr_.clear_column(idx, 0);
1726template<
class CharType,
class BV,
unsigned STR_SIZE>
1733 this->
size_ += count;
1738template<
class CharType,
class BV,
unsigned STR_SIZE>
1744 bv_null->set_bit_no_check(idx);
1749template<
class CharType,
class BV,
unsigned STR_SIZE>
1753 for (
unsigned i = 0;
true; ++i)
1755 CharType ch = str[i];
1776 ch = CharType(remap_value);
1778 this->
bmatr_.set_octet(idx, i, (
unsigned char)ch);
1784template<
class CharType,
class BV,
unsigned STR_SIZE>
1794template<
class CharType,
class BV,
unsigned STR_SIZE>
1798 for (
unsigned i = 0;
true; ++i)
1800 CharType ch = str[i];
1816 ch = CharType(remap_value);
1818 this->
bmatr_.insert_octet(idx, i, (
unsigned char)ch);
1825template<
class CharType,
class BV,
unsigned STR_SIZE>
1835 CharType ch = CharType(this->
bmatr_.get_octet(idx, i));
1847template<
class CharType,
class BV,
unsigned STR_SIZE>
1862template<
class CharType,
class BV,
unsigned STR_SIZE>
1885 st->memory_used += remap_mem_usage;
1888 st->max_serialize_mem += (remap_mem_usage * 2);
1894template<
class CharType,
class BV,
unsigned STR_SIZE>
1900 for (
unsigned i = 0;
true; ++i)
1902 CharType octet2 = str2[i];
1903 CharType octet1 = str1[i];
1909 res = (octet1 > octet2) - (octet1 < octet2);
1918template<
class CharType,
class BV,
unsigned STR_SIZE>
1927 CharType octet2, octet1;
1930 for (; i < min_len-3; i+=4)
1933 ::memcpy(&i2, &str2[i],
sizeof(i2));
1934 ::memcpy(&i1, &str1[i],
sizeof(i1));
1940 res = (octet1 > octet2) - (octet1 < octet2);
1945 res = (octet1 > octet2) - (octet1 < octet2);
1950 res = (octet1 > octet2) - (octet1 < octet2);
1955 res = (octet1 > octet2) - (octet1 < octet2);
1965 for (; i < min_len; ++i)
1970 res = (octet1 > octet2) - (octet1 < octet2);
1984 res = (octet1 > octet2) - (octet1 < octet2);
1994template<
class CharType,
class BV,
unsigned STR_SIZE>
2002 for (
unsigned i = 0;
true; ++i)
2004 CharType octet2 = str[i];
2005 CharType octet1 = (CharType)this->
bmatr_.get_octet(idx, i);
2012 CharType remap_value1 = (CharType)remap_row[
unsigned(octet1)];
2014 res = (remap_value1 > octet2) - (remap_value1 < octet2);
2023template<
class CharType,
class BV,
unsigned STR_SIZE>
2031 for (
unsigned i = 0;
true; ++i)
2033 CharType octet2 = str[i];
2034 CharType octet1 = (CharType)this->
bmatr_.get_octet(idx, i);
2040 res = (octet1 > octet2) - (octet1 < octet2);
2050template<
class CharType,
class BV,
unsigned STR_SIZE>
2063template<
class CharType,
class BV,
unsigned STR_SIZE>
2074 for (
unsigned i = 0;
true; ++i)
2077 CharType octet2 = (CharType)this->
bmatr_.get_octet(idx2, i);
2078 CharType octet1 = (CharType)this->
bmatr_.get_octet(idx1, i);
2085 unsigned char remap_value1 = remap_row[unsigned(octet1)];
2087 unsigned char remap_value2 = remap_row[unsigned(octet2)];
2089 res = (remap_value1 > remap_value2) - (remap_value1 < remap_value2);
2096 for (
unsigned i = 0;
true; ++i)
2098 CharType octet2 = (CharType)this->
bmatr_.get_octet(idx2, i);
2099 CharType octet1 = (CharType)this->
bmatr_.get_octet(idx1, i);
2105 res = (octet1 > octet2) - (octet1 < octet2);
2116template<
class CharType,
class BV,
unsigned STR_SIZE>
2117template<
bool USE_PREFIX_BUF>
2122 BM_ASSERT (!(prefix_buf && !USE_PREFIX_BUF));
2124 CharType ch1 = CharType(this->
bmatr_.get_octet(idx1, i));
2125 CharType ch2 = CharType(this->
bmatr_.get_octet(idx2, i));
2126 if (ch1 == ch2 && (ch1|ch2))
2128 if constexpr(USE_PREFIX_BUF)
2131 *prefix_buf++ = ch1;
2133 for (++i;
true; ++i)
2135 ch1 = CharType(this->
bmatr_.get_octet(idx1, i));
2136 ch2 = CharType(this->
bmatr_.get_octet(idx2, i));
2137 if (ch1 != ch2 || (!(ch1|ch2)))
2139 if constexpr(USE_PREFIX_BUF)
2140 *prefix_buf++ = ch1;
2149template<
class CharType,
class BV,
unsigned STR_SIZE>
2162template<
class CharType,
class BV,
unsigned STR_SIZE>
2167 return this->
bmatr_.octet_size();
2172template<
class CharType,
class BV,
unsigned STR_SIZE>
2177 octet_matrix.resize(max_str_len, 256,
false);
2178 octet_matrix.set_zero();
2181 for(; it.
valid(); ++it)
2186 for (
unsigned i = 0;
true; ++i)
2192 octet_freq_matrix_type::value_type* row = octet_matrix.row(i);
2193 unsigned ch_idx = (
unsigned char)ch;
2202template<
class CharType,
class BV,
unsigned STR_SIZE>
2209 octet_remap_matrix1.resize(max_str_len, 256,
false);
2210 octet_remap_matrix1.set_zero();
2211 octet_remap_matrix2.resize(max_str_len, 256,
false);
2212 octet_remap_matrix2.set_zero();
2214 for (
unsigned i = 0; i < octet_occupancy_matrix.rows(); ++i)
2216 typename octet_freq_matrix_type::value_type* frq_row =
2217 octet_occupancy_matrix.row(i);
2219 unsigned char* remap_row1 = octet_remap_matrix1.row(i);
2220 unsigned char* remap_row2 = octet_remap_matrix2.row(i);
2222 const typename slice_octet_matrix_type::size_type row_size =
2223 octet_occupancy_matrix.cols();
2224 for (
unsigned remap_code = 1;
true; ++remap_code)
2226 typename octet_freq_matrix_type::size_type char_idx;
2234 unsigned char ch = (
unsigned char)char_idx;
2235 remap_row1[remap_code] = ch;
2236 remap_row2[ch] = (
unsigned char)remap_code;
2237 frq_row[char_idx] = 0;
2244template<
class CharType,
class BV,
unsigned STR_SIZE>
2263 unsigned ch_code = remap_row1[j];
2264 remap_row2[ch_code] = (
unsigned char)j;
2274template<
class CharType,
class BV,
unsigned STR_SIZE>
2281 if (!octet_remap_matrix2.rows())
2284 const unsigned char* remap_row = octet_remap_matrix2.row(0);
2285 for (
unsigned i = 0; i < buf_size; ++i, remap_row += 256)
2287 CharType ch = str[i];
2293 unsigned char remap_value = remap_row[unsigned(ch)];
2294 sv_str[i] = CharType(remap_value);
2303template<
class CharType,
class BV,
unsigned STR_SIZE>
2312 BM_ASSERT(in_len <= buf_size); (void) buf_size;
2314 const unsigned char* remap_row = octet_remap_matrix2.row(0);
2315 for (
unsigned i = 0; i < in_len; ++i, remap_row += 256)
2317 CharType ch = str[i];
2320 unsigned char remap_value = remap_row[unsigned(ch)];
2321 sv_str[i] = CharType(remap_value);
2325 sv_str[in_len] = str_cp[in_len] = 0;
2332template<
class CharType,
class BV,
unsigned MAX_STR_SIZE>
2340 const unsigned char* remap_row = octet_remap_matrix1.row(0);
2341 for (
unsigned i = 0; i < buf_size; ++i, remap_row += 256)
2343 CharType ch = sv_str[i];
2349 unsigned char remap_value = remap_row[unsigned(ch)];
2350 str[i] = CharType(remap_value);
2359template<
class CharType,
class BV,
unsigned MAX_STR_SIZE>
2370template<
class CharType,
class BV,
unsigned MAX_STR_SIZE>
2387template<
class CharType,
class BV,
unsigned STR_SIZE>
2398template<
class CharType,
class BV,
unsigned STR_SIZE>
2415 bm::alloc_pool_guard<typename bvector_type::allocator_pool_type, str_sparse_vector> g1, g2;
2419 g1.assign_if_not_set(pool, *
this);
2420 g2.assign_if_not_set(pool, sv);
2423 pool.set_block_limit(r + 10);
2434 omatrix = &occ_matrix;
2439 typedef bm::dynamic_heap_matrix<CharType, allocator_type> buffer_matrix_type;
2442 buffer_matrix_type cmatr(buffer_size, str_len);
2445 for (
size_type i{0}, dsize;
true; i += dsize)
2447 dsize = str_sv.
decode(cmatr, i, buffer_size,
true);
2459 this->
import(cmatr, i, dsize);
2471 *bv_null = *bv_null_arg;
2482template<
class CharType,
class BV,
unsigned STR_SIZE>
2492template<
class CharType,
class BV,
unsigned STR_SIZE>
2522template<
class CharType,
class BV,
unsigned STR_SIZE>
2542template<
class CharType,
class BV,
unsigned STR_SIZE>
2548 if (this->
size_ < arg_size)
2567 bv_null->set_range(0, arg_size-1);
2575template<
class CharType,
class BV,
unsigned STR_SIZE>
2587template<
class CharType,
class BV,
unsigned STR_SIZE>
2593 return it_type(
this);
2598template<
class CharType,
class BV,
unsigned STR_SIZE>
2613template<
class CharType,
class BV,
unsigned STR_SIZE>
2615 const char* err_msg)
2618 throw std::range_error(err_msg);
2626template<
class CharType,
class BV,
unsigned STR_SIZE>
2628 const char* err_msg)
2632 err_msg =
"Unknown/incomparable dictionary character";
2633 throw std::domain_error(err_msg);
2645template<
class CharType,
class BV,
unsigned STR_SIZE>
2647: sv_(0), substr_from_(0), substr_to_(STR_SIZE),
2654template<
class CharType,
class BV,
unsigned STR_SIZE>
2658 substr_from_(it.substr_from_), substr_to_(it.substr_to_),
2659 pos_(it.pos_), pos_in_buf_(~
size_type(0))
2665template<
class CharType,
class BV,
unsigned STR_SIZE>
2671 substr_to_ = (unsigned) sv_->effective_max_str();
2672 buf_matrix_.resize(
n_rows, substr_to_+1);
2677template<
class CharType,
class BV,
unsigned STR_SIZE>
2684 substr_to_ = (unsigned) sv_->effective_max_str();
2685 buf_matrix_.resize(n_rows, substr_to_+1);
2690template<
class CharType,
class BV,
unsigned STR_SIZE>
2694 unsigned max_str = sv_->effective_max_str();
2695 substr_from_ = from;
2699 substr_to_ = from + len;
2704 substr_to_ = substr_from_ + (len - 1);
2708 buf_matrix_.resize(
n_rows, len+1,
false);
2713template<
class CharType,
class BV,
unsigned STR_SIZE>
2722 if (!buf_matrix_.is_init())
2726 substr_from_, substr_to_);
2735 return buf_matrix_.row(pos_in_buf_);
2740template<
class CharType,
class BV,
unsigned STR_SIZE>
2749 if (!buf_matrix_.is_init())
2753 substr_from_, substr_to_);
2768template<
class CharType,
class BV,
unsigned STR_SIZE>
2780template<
class CharType,
class BV,
unsigned STR_SIZE>
2788 if (pos_ >= sv_->size())
2795 if (pos_in_buf_ >=
n_rows)
2805template<
class CharType,
class BV,
unsigned STR_SIZE>
2812template<
class CharType,
class BV,
unsigned STR_SIZE>
2821 unsigned esize = (unsigned)
sv_->effective_max_str();
2822 if (esize < STR_SIZE)
2834template<
class CharType,
class BV,
unsigned STR_SIZE>
2837: sv_(bi.sv_), bv_null_(bi.bv_null_), buf_matrix_(bi.buf_matrix_.rows(), bi.buf_matrix_.cols()),
2838 pos_in_buf_(~
size_type(0)), prev_nb_(bi.prev_nb_), opt_mode_(bi.opt_mode_),
2846template<
class CharType,
class BV,
unsigned STR_SIZE>
2854template<
class CharType,
class BV,
unsigned STR_SIZE>
2864template<
class CharType,
class BV,
unsigned STR_SIZE>
2878template<
class CharType,
class BV,
unsigned STR_SIZE>
2884 size_type imp_idx =
sv_->size();
2898template<
class CharType,
class BV,
unsigned STR_SIZE>
2912 bv_null_->set_bit_no_check(sz + buf_idx + 1);
2917template<
class CharType,
class BV,
unsigned STR_SIZE>
2925template<
class CharType,
class BV,
unsigned STR_SIZE>
2930 this->add_value(
"");
2936template<
class CharType,
class BV,
unsigned STR_SIZE>
2943 size_t slen = ::strlen(v);
2956 for (; orows <
omatrix_.rows(); ++orows)
2959 octet_freq_matrix_type::value_type* r =
omatrix_.row(orows);
2960 ::memset(r, 0, 256 *
sizeof(r[0]));
2964 for (
size_t i = 0; i < slen; ++i)
2968 octet_freq_matrix_type::value_type* row =
omatrix_.row(i);
2969 unsigned ch_idx = (
unsigned char)ch;
2976template<
class CharType,
class BV,
unsigned STR_SIZE>
3003 typename buffer_matrix_type::size_type i;
3004 typename buffer_matrix_type::size_type cols =
buf_matrix_.cols();
3005 for (i = 0; i < cols; ++i)
3013 for (cols = i;
true; ++cols)
3024 for (; i < cols; ++i)
3035template<
class CharType,
class BV,
unsigned STR_SIZE>
3041 bool found = bv_null->find_reverse(this->
size_);
3042 this->
size_ += found;
Algorithms for bvector<> (main include).
basic bit-matrix class and utilities
Constants, lookup tables and typedefs.
#define BM_ASSERT_THROW(x, xerrcode)
Utilities for bit transposition (internal) (experimental!).
void swap(base_sparse_vector< CharType, BV, MAX_SIZE > &bsv) BMNOEXCEPT
void resize(size_type new_size, bool set_null)
bm::null_support get_null_support() const BMNOEXCEPT
void merge_matr(bmatrix_type &bmatr)
const value_type & const_reference
const bvector_type * get_null_bvector() const BMNOEXCEPT
void copy_from(const base_sparse_vector< CharType, BV, MAX_SIZE > &bsv)
void clear_range(size_type left, size_type right, bool set_null)
bvector_type_ptr get_create_slice(unsigned i)
bmatrix_type bmatr_
bit-transposed matrix
void erase_column(size_type idx, bool erase_null)
void swap_elements(size_type idx1, size_type idx2)
void copy_range_slices(const base_sparse_vector< CharType, BV, MAX_SIZE > &bsv, typename base_sparse_vector< CharType, BV, MAX_SIZE >::size_type left, typename base_sparse_vector< CharType, BV, MAX_SIZE >::size_type right, bm::null_support slice_null)
bool is_null(size_type idx) const BMNOEXCEPT
size_type size_
array size
void bit_and_rows(const bvector_type &bv)
void keep_range_no_check(size_type left, size_type right, bm::null_support slice_null)
std::make_unsigned< value_type >::type unsigned_value_type
const bmatrix_type & get_bmatrix() const BMNOEXCEPT
bvector_type * get_null_bvect() BMNOEXCEPT
void sync_ro() BMNOEXCEPT
void bit_sub_rows(const bvector_type &bv, bool use_null)
void clear_all(bool free_mem=true) BMNOEXCEPT
void optimize(bm::word_t *temp_block=0, typename bvector_type::optmode opt_mode=bvector_type::opt_compress, typename bvector_type::statistics *stat=0)
void insert_null(size_type idx, bool not_null)
void clear_value_planes_from(unsigned plane_idx, size_type idx)
void calc_stat(typename bvector_type::statistics *st) const BMNOEXCEPT
bool is_nullable() const BMNOEXCEPT
bool equal(const base_sparse_vector< CharType, BV, MAX_SIZE > &sv, bm::null_support null_able=bm::use_null) const BMNOEXCEPT
void insert_clear_value_planes_from(unsigned plane_idx, size_type idx)
Basic dense bit-matrix class.
size_type rows() const BMNOEXCEPT
Constant iterator designed to enumerate "ON" bits.
optmode
Optimization mode Every next level means additional checks (better compression vs time).
@ opt_compress
compress blocks when possible (GAP/prefix sum)
allocator_type::allocator_pool_type allocator_pool_type
bvector_size_type size_type
void import_sorted(const size_type *ids, const size_type ids_size, bool opt_flag)
Import sorted integers (set bits).
Back insert iterator implements buffered insert, faster than generic access assignment.
bvector_type * bv_null_
!< pointer on the parent vector
back_insert_iterator & operator=(const StrType &v)
push value to the vector
void add_remap_stat(const value_type *v)
void add(const value_type *v)
add value to the container
size_type pos_in_buf_
!< value buffer
back_insert_iterator & operator++(int)
noop
unsigned get_remap() const BMNOEXCEPT
Get curent remap state flags.
octet_freq_matrix_type omatrix_
octet frequency matrix
void add_value(const value_type *v)
void flush()
flush the accumulated buffer.
back_insert_iterator & operator=(const value_type *v)
push value to the vector
unsigned remap_flags_
target remapping
back_insert_iterator & operator++()
noop
buffer_matrix_type buf_matrix_
!< not NULL vector pointer
str_sparse_vector_type * str_sparse_vector_type_ptr
bvector_type::block_idx_type block_idx_type
str_sparse_vector_type::value_type value_type
str_sparse_vector_type::size_type size_type
void add_null()
add NULL (no-value) to the container
void set_optimize(typename bvector_type::optmode opt_mode) BMNOEXCEPT
Set optimization on load option (deafult: false).
back_insert_iterator & operator*()
noop
bvector_type::allocator_type allocator_type
bvector_type::optmode opt_mode_
!< previous block added
friend class str_sparse_vector
void set_remap(bool flag) BMNOEXCEPT
Method to configure back inserter to collect statistics on optimal character codes.
back_insert_iterator() BMNOEXCEPT
const octet_freq_matrix_type & get_octet_matrix() const noexcept
Get octet frequence matrix.
block_idx_type prev_nb_
!< buffer position
str_sparse_vector< CharType, BV, STR_SIZE > str_sparse_vector_type
std::output_iterator_tag iterator_category
bool empty() const BMNOEXCEPT
return true if insertion buffer is empty
void add_null(size_type count)
add a series of consequitve NULLs (no-value) to the container
str_sparse_vector_type * sv_
allocator_type::allocator_pool_type allocator_pool_type
str_sparse_vector_type::bvector_type bvector_type
Const iterator to do quick traverse of the sparse vector.
str_sparse_vector_type * str_sparse_vector_type_ptr
std::input_iterator_tag iterator_category
size_type pos() const BMNOEXCEPT
Current position (index) in the vector.
void invalidate() BMNOEXCEPT
Invalidate current iterator.
bool operator<(const const_iterator &it) const BMNOEXCEPT
const value_type * operator*() const
Get current position (value).
bool operator!=(const const_iterator &it) const BMNOEXCEPT
dynamic_heap_matrix< CharType, allocator_type > buffer_matrix_type
bool is_null() const BMNOEXCEPT
Get NULL status.
const value_type * value() const
Get zero terminated string value at the current position.
allocator_type::allocator_pool_type allocator_pool_type
bvector_type::allocator_type allocator_type
void go_to(size_type pos) BMNOEXCEPT
re-position to a specified position
void advance() BMNOEXCEPT
advance iterator forward by one
friend class str_sparse_vector
str_sparse_vector< CharType, BV, STR_SIZE > str_sparse_vector_type
const_iterator() BMNOEXCEPT
Construct iterator (not attached to any particular vector).
bool operator<=(const const_iterator &it) const BMNOEXCEPT
str_sparse_vector_type::bvector_type bvector_type
long long difference_type
bool valid() const BMNOEXCEPT
Returns true if iterator is at a valid position.
str_sparse_vector_type::size_type size_type
str_sparse_vector_type::value_type value_type
const_iterator & operator++(int) BMNOEXCEPT
Advance to the next available value.
string_view_type get_string_view() const
Get current string as string_view.
bool operator>=(const const_iterator &it) const BMNOEXCEPT
const_iterator & operator++() BMNOEXCEPT
Advance to the next available value.
void set_substr(unsigned from, unsigned len=0) BMNOEXCEPT
std::basic_string_view< CharType > string_view_type
bool operator==(const const_iterator &it) const BMNOEXCEPT
bool operator>(const const_iterator &it) const BMNOEXCEPT
const_reference(const str_sparse_vector< CharType, BV, STR_SIZE > &str_sv, size_type idx)
bool is_null() const BMNOEXCEPT
const value_type * get() const BMNOEXCEPT
bool operator==(const const_reference &ref) const BMNOEXCEPT
bm::heap_vector< CharType, typename bvector_type::allocator_type, true > bufffer_type
reference & operator=(const reference &ref)
reference(str_sparse_vector< CharType, BV, STR_SIZE > &str_sv, size_type idx)
bool is_null() const BMNOEXCEPT
reference & operator=(const value_type *str)
const value_type * get() const BMNOEXCEPT
bool operator==(const reference &ref) const BMNOEXCEPT
succinct sparse vector for strings with compression using bit-slicing ( transposition) method
void insert(size_type idx, const value_type *str)
insert the specified element
void swap(str_sparse_vector &str_sv) BMNOEXCEPT
void calc_octet_stat(octet_freq_matrix_type &octet_matrix) const
bool is_ro() const BMNOEXCEPT
Returns true if vector is read-only.
const_iterator end() const BMNOEXCEPT
void optimize(bm::word_t *temp_block=0, typename bvector_type::optmode opt_mode=bvector_type::opt_compress, typename str_sparse_vector< char, bm::bvector<>, STR_SIZE >::statistics *stat=0)
int compare(size_type idx1, size_type idx2) const BMNOEXCEPT
Compare two vector elements.
void set_value(size_type idx, const value_type *str)
set value without checking boundaries
void resize(size_type sz)
resize vector
void calc_stat(struct str_sparse_vector< char, bm::bvector<>, STR_SIZE >::statistics *st) const BMNOEXCEPT
bool empty() const
return true if vector is empty
void recalc_remap_matrix2()
static size_type max_str()
get maximum string length capacity
void set_null(size_type idx)
set NULL status for the specified element Vector is resized automatically
unsigned char * init_remap_buffer()
void set_null(const bvector_type &bv_idx)
Set NULL all elements set as 1 in the argument vector.
void remap_from_impl(const str_sparse_vector &str_sv, octet_freq_matrix_type *omatrix, bool move_data)
Remap from implementation, please note that move_data flag can violate cosnt-ness.
void sync(bool force)
syncronize internal structures
str_sparse_vector< CharType, BV, STR_SIZE > & clear_range(size_type left, size_type right, bool set_null=false)
clear range (assign bit 0 for all planes)
slice_octet_matrix_type remap_matrix1_
const bvector_type * bvector_type_const_ptr
str_sparse_vector< CharType, BV, STR_SIZE > & merge(str_sparse_vector< CharType, BV, STR_SIZE > &str_sv)
merge with another sparse vector using OR operation Merge is different from join(),...
static constexpr bool is_compressed() BMNOEXCEPT
various type traits
void assign(size_type idx, const StrType &str)
set specified element with bounds checking and automatic resize
str_sparse_vector(bm::null_support null_able=bm::no_null, allocation_policy_type ap=allocation_policy_type(), size_type bv_max_size=bm::id_max, const allocator_type &alloc=allocator_type())
Sparse vector constructor.
bm::dynamic_heap_matrix< unsigned char, allocator_type > slice_octet_matrix_type
Matrix of character remappings.
bool equal(const str_sparse_vector< CharType, BV, STR_SIZE > &sv, bm::null_support null_able=bm::use_null) const BMNOEXCEPT
check if another sparse vector has the same content and size
void remap_from(const str_sparse_vector &str_sv, octet_freq_matrix_type *omatrix=0)
Build remapping profile and load content from another sparse vector Remapped vector likely saves memo...
size_type size() const
return size of the vector
int compare_nomap(size_type idx, const value_type *str) const BMNOEXCEPT
void build_octet_remap(slice_octet_matrix_type &octet_remap_matrix1, slice_octet_matrix_type &octet_remap_matrix2, octet_freq_matrix_type &octet_occupancy_matrix) const
const remap_matrix_type * get_remap_matrix() const
void keep(const bvector_type &bv_idx)
Set NULL all elements NOT set as 1 in the argument vector.
const_iterator begin() const BMNOEXCEPT
Provide const iterator access to container content.
void import_char_slice(const unsigned_value_type *ch_slice, unsigned ch_acc, size_type char_slice_idx, size_type idx_from, size_type imp_size)
bm::basic_bmatrix< BV > bmatrix_type
unsigned common_prefix_length(size_type idx1, size_type idx2, value_type *prefix_buf=0) const BMNOEXCEPT
Find size of common prefix between two vector elements in octets.
bool is_remap() const BMNOEXCEPT
Get character remapping status (true | false).
void insert_value(size_type idx, const value_type *str)
insert value without checking boundaries
CharType * value_type_prt
void clear_all(bool free_mem, unsigned remap=0) BMNOEXCEPT
remap_matrix_type * get_remap_matrix()
void push_back(const StrType &str)
push back a string
void import_no_check(CharMatrix &cmatr, size_type idx_from, size_type imp_size, bool set_not_null=true)
bvector_type::allocation_policy allocation_policy_type
void clear(const bvector_type &bv_idx)
Set vector elements spcified by argument bit-vector to empty Note that set to empty elements are NOT ...
BV::allocator_type allocator_type
void insert_value_no_null(size_type idx, const value_type *str)
insert value without checking boundaries or support of NULL
int compare(size_type idx, const value_type *str) const BMNOEXCEPT
Compare vector element with argument lexicographically.
bm::dynamic_heap_matrix< size_t, allocator_type > octet_freq_matrix_type
Matrix of character frequencies (for optimal code remap).
size_type get(size_type idx, value_type *str, size_type buf_size) const BMNOEXCEPT
get specified element
friend class sparse_vector_serializer
slice_octet_matrix_type remap_matrix2_
bool resolve_range(size_type from, size_type to, size_type *idx_from, size_type *idx_to) const
parent_type::unsigned_value_type unsigned_value_type
void import_back(CharMatrix &cmatr, size_type imp_size)
Bulk push-back import of strings from a C-style matrix of chars.
static int compare_str(const value_type *str1, const value_type *str2) BMNOEXCEPT
size_type decode(CharMatrix &cmatr, size_type idx_from, size_type dec_size, bool zero_mem=true) const
Bulk export strings to a C-style matrix of chars.
void resize_internal(size_type sz)
static bool find_rank(size_type rank, size_type &pos) BMNOEXCEPT
find position of compressed element by its rank
str_sparse_vector(const str_sparse_vector &str_sv)
bool remap_tosv(value_type *sv_str, size_type buf_size, const value_type *str) const BMNOEXCEPT
bool remap_n_tosv_2way(value_type *BMRESTRICT sv_str, value_type *BMRESTRICT str_cp, size_type buf_size, const value_type *BMRESTRICT str, size_t in_len) const BMNOEXCEPT
bool try_get(size_type idx, StrType &str) const
get specified string element if NOT NULL Template method expects an STL-compatible type basic_string<...
friend class sparse_vector_deserializer
void erase(size_type idx)
erase the specified element
int compare_remap(size_type idx, const value_type *str) const BMNOEXCEPT
size_type effective_size() const BMNOEXCEPT
size of sparse vector (may be different for RSC)
bvector_type * bvector_type_ptr
const unsigned char * get_remap_buffer() const
str_sparse_vector(str_sparse_vector< CharType, BV, STR_SIZE > &&str_sv) BMNOEXCEPT
void insert(size_type idx, const StrType &str)
insert STL string
static void throw_bad_value(const char *err_msg)
throw domain error
void get(size_type idx, StrType &str) const
get specified string element Template method expects an STL-compatible type basic_string<>
reference operator[](size_type idx)
Operator to get write access to an element.
static constexpr bool is_str() BMNOEXCEPT
void push_back(const value_type *str)
push back a string (zero terminated)
size_type decode_substr(CharMatrix &cmatr, size_type idx_from, size_type dec_size, unsigned substr_from, unsigned substr_to, bool zero_mem=true) const
Bulk export strings to a C-style matrix of chars.
base_sparse_vector< CharType, BV, STR_SIZE > parent_type
void push_back_null()
push back NULL value
size_type effective_vector_max() const
void remap(back_insert_iterator &iit)
reamp using statistics table from inserter
back_insert_iterator get_back_inserter()
Provide back insert iterator Back insert iterator implements buffered insertion, which is faster,...
str_sparse_vector< CharType, BV, STR_SIZE > & operator=(const str_sparse_vector< CharType, BV, STR_SIZE > &str_sv)
size_type effective_max_str() const BMNOEXCEPT
get effective string length used in vector Calculate and returns efficiency, how close are we to the ...
void sync_size() BMNOEXCEPT
recalculate size to exclude tail NULL elements After this call size() will return the true size of th...
slice_octet_matrix_type remap_matrix_type
bvector_type::enumerator bvector_enumerator_type
const_iterator get_const_iterator(size_type idx) const BMNOEXCEPT
Get const_itertor re-positioned to specific element.
void set_value_no_null(size_type idx, const value_type *str)
set value without checking boundaries or support of NULL
void push_back_null(size_type count)
push back specified amount of NULL values
void copy_range(const str_sparse_vector< CharType, BV, STR_SIZE > &sv, size_type left, size_type right, bm::null_support slice_null=bm::use_null)
copy range of values from another sparse vector
size_t remap_size() const
static bool remap_n_tosv_2way(value_type *BMRESTRICT sv_str, value_type *BMRESTRICT str_cp, size_type buf_size, const value_type *BMRESTRICT str, size_t in_len, const slice_octet_matrix_type &BMRESTRICT octet_remap_matrix2) BMNOEXCEPT
static void throw_range_error(const char *err_msg)
throw range error
void swap(size_type idx1, size_type idx2)
swap two vector elements between each other
size_type size_internal() const
str_sparse_vector(const str_sparse_vector &str_sv, bm::remap_setup remap_mode)
static bool remap_fromsv(value_type *BMRESTRICT str, size_type buf_size, const value_type *BMRESTRICT sv_str, const slice_octet_matrix_type &BMRESTRICT octet_remap_matrix1) BMNOEXCEPT
void set(size_type idx, const value_type *str)
set specified element with bounds checking and automatic resize
static bool remap_tosv(value_type *BMRESTRICT sv_str, size_type buf_size, const value_type *BMRESTRICT str, const slice_octet_matrix_type &BMRESTRICT octet_remap_matrix2) BMNOEXCEPT
const const_reference operator[](size_type idx) const
Operator to get read access to an element.
void keep_range(size_type left, size_type right, bm::null_support slice_null=bm::use_null)
Keep only specified interval in the sparse vector, clear all other elements.
static int compare_str(const value_type *str1, const value_type *str2, size_t min_len) BMNOEXCEPT
allocator_type::allocator_pool_type allocator_pool_type
bvector_type::size_type size_type
BMFORCEINLINE bm::id_t word_bitcount(bm::id_t w) BMNOEXCEPT
unsigned bit_list(T w, B *bits) BMNOEXCEPT
Unpacks word into list of ON bit indexes.
null_support
NULL-able value support.
@ use_null
support "non-assigned" or "NULL" logic
@ no_null
do not support NULL values
bool find_first_nz(const VT *arr, SZ arr_size, SZ *found_idx) BMNOEXCEPT
Find max non-zero value in an array.
int for_each_bit_range_no_check(const BV &bv, typename BV::size_type left, typename BV::size_type right, Func &bit_functor)
Implementation of for_each_bit_range without boilerplave checks.
BMFORCEINLINE bool has_zero_byte_u64(bm::id64_t v) BMNOEXCEPT
Returns true if INT64 contains 0 octet.
BMFORCEINLINE void xor_swap(W &x, W &y) BMNOEXCEPT
XOR swap two variables.
unsigned long long int id64_t
@ COPY_RTABLES
copy remap tables only (without data)
bool find_max_nz(const VT *arr, SZ arr_size, SZ *found_idx) BMNOEXCEPT
Find max non-zero value in an array.
const unsigned gap_max_bits
const unsigned set_block_shift
size_t gap_cap_overhead
gap memory overhead between length and capacity
size_t ptr_sub_blocks
Number of sub-blocks.
bv_statistics() BMNOEXCEPT
size_t gap_blocks
Number of GAP blocks.
size_t bit_blocks
Number of bit blocks.
size_t bv_count
Number of bit-vectors.
size_t max_serialize_mem
estimated maximum memory for serialization
size_t memory_used
memory usage for all blocks and service tables
void add(const bv_statistics &st) BMNOEXCEPT
Sum data from another sttructure.
Statistical information about bitset's memory allocation details.