Succinct container binary search.
#include <iostream>
#include <string>
#include <vector>
#include <algorithm>
#include <random>
#include <cassert>
using namespace std;
static
{
string prefix = "az";
string str;
for (unsigned i = 0; i < max_coll; ++i)
{
str = prefix;
str.append(to_string(i));
str_coll.emplace_back(str);
if (i % 1024 == 0)
{
prefix.clear();
unsigned prefix_len = (unsigned)rand() % 5;
for (unsigned j = 0; j < prefix_len; ++j)
{
char cch = char('a' + (unsigned)rand() % 26);
prefix.push_back(cch);
}
}
}
}
{
const unsigned max_coll = 30000000;
try
{
std::vector<string> str_coll;
cout << "Prepare test data" << endl;
cout << " generation ..." << endl;
cout << " sort..." << endl;
std::sort(str_coll.begin(), str_coll.end());
{
for (auto str : str_coll)
bi = str;
}
cout << " remap-optimize-freeze..." << endl;
std::vector<string> str_coll_test1;
{
const unsigned pick_factor = 5;
for (size_t i = 0; i < size_t(str_coll.size()); i+=pick_factor)
{
const string& s = str_coll[i];
str_coll_test1.push_back(s);
}
std::random_device rd;
std::mt19937 g(rd());
std::shuffle(str_coll_test1.begin(), str_coll_test1.end(), g);
}
cout << "Run test" << endl;
size_t sum_pos4=0, sum_pos32=0;
{
scanner.
bind(str_sv_srt,
true);
bm::chrono_taker tt(cout,
"bm::sparse_vector_scanner<>::bfind_eq_str() [4] ", 1);
for (unsigned i = 0; i < unsigned(str_coll_test1.size()); ++i)
{
const string& s = str_coll_test1[i];
if (!found)
{
cerr << "String bfind_eq_str() failure!" << endl;
assert(0); exit(1);
}
sum_pos4 += pos;
}
}
{
scanner.
bind(str_sv_srt,
true);
bm::chrono_taker tt(cout,
"bm::sparse_vector_scanner<>::bfind_eq_str() [32] ", 1);
for (unsigned i = 0; i < unsigned(str_coll_test1.size()); ++i)
{
const string& s = str_coll_test1[i];
if (!found)
{
cerr << "String bfind_eq_str() failure!" << endl;
assert(0); exit(1);
}
sum_pos32 += pos;
}
}
assert(sum_pos4 == sum_pos32);
}
catch(std::exception& ex)
{
std::cerr << ex.what() << std::endl;
return 1;
}
return 0;
}
Compressed bit-vector bvector<> container, set algebraic methods, traversal iterators.
Algorithms for bm::sparse_vector.
string sparse vector based on bit-transposed matrix
Timing utilities for benchmarking (internal).
pre-processor un-defines to avoid global space pollution (internal)
Bitvector Bit-vector container with runtime compression of bits.
Utility class to collect performance measurements and statistics.
algorithms for sparse_vector scan/search
void bind(const SV &sv, bool sorted)
bind sparse vector for all searches
bool bfind_eq_str(const SV &sv, const value_type *str, size_type &pos)
binary find first sparse vector element (string).
succinct sparse vector for strings with compression using bit-slicing ( transposition) method
void optimize(bm::word_t *temp_block=0, typename bvector_type::optmode opt_mode=bvector_type::opt_compress, typename str_sparse_vector< CharType, BV, STR_SIZE >::statistics *stat=0)
run memory optimization for all vector planes
void freeze()
Turn sparse vector into immutable mode Read-only (immutable) vector uses less memory and allows faste...
void remap()
Build remapping profile and re-load content to save memory.
back_insert_iterator get_back_inserter()
Provide back insert iterator Back insert iterator implements buffered insertion, which is faster,...
bvector_type::size_type size_type
bm::str_sparse_vector< char, bvector_type, 32 > str_sv_type
static void GenerateTestStrCollection(std::vector< string > &str_coll, unsigned max_coll)