BitMagic-C++
bmserial.h
Go to the documentation of this file.
1#ifndef BMSERIAL__H__INCLUDED__
2#define BMSERIAL__H__INCLUDED__
3/*
4Copyright(c) 2002-2019 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
5
6Licensed under the Apache License, Version 2.0 (the "License");
7you may not use this file except in compliance with the License.
8You may obtain a copy of the License at
9
10 http://www.apache.org/licenses/LICENSE-2.0
11
12Unless required by applicable law or agreed to in writing, software
13distributed under the License is distributed on an "AS IS" BASIS,
14WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15See the License for the specific language governing permissions and
16limitations under the License.
17
18For more information please visit: http://bitmagic.io
19*/
20
21/*! \file bmserial.h
22 \brief Serialization / compression of bvector<>.
23 Set theoretical operations on compressed BLOBs.
24*/
25
26/*!
27 \defgroup bvserial bvector<> serialization
28 Serialization for bvector<> container
29
30 \ingroup bvector
31*/
32
33#ifndef BM__H__INCLUDED__
34// BitMagic utility headers do not include main "bm.h" declaration
35// #include "bm.h" or "bm64.h" explicitly
36# error missing include (bm.h or bm64.h)
37#endif
38
39
40#ifdef _MSC_VER
41#pragma warning( push )
42#pragma warning( disable : 4311 4312 4127)
43#endif
44
45
46
47#include "encoding.h"
48#include "bmfunc.h"
49#include "bmtrans.h"
50#include "bmalgo.h"
51#include "bmutil.h"
52#include "bmbuffer.h"
53#include "bmdef.h"
54#include "bmxor.h"
55
56namespace bm
57{
58
59const unsigned set_compression_max = 6; ///< Maximum supported compression level
60const unsigned set_compression_default = 6; ///< Default compression level
61
62/**
63 Bit-vector serialization class.
64
65 Class designed to convert sparse bit-vectors into a single block of memory
66 ready for file or database storage or network transfer.
67
68 Reuse of this class for multiple serializations (but not across threads).
69 Class resue offers some performance advantage (helps with temp memory
70 reallocations).
71
72 @ingroup bvserial
73*/
74template<class BV>
76{
77public:
78 typedef BV bvector_type;
84
85 typedef byte_buffer<allocator_type> buffer;
88
89 typedef
91public:
92 /**
93 Constructor
94
95 \param alloc - memory allocator
96 \param temp_block - temporary block for various operations
97 (if NULL it will be allocated and managed by serializer class)
98 Temp block is used as a scratch memory during serialization,
99 use of external temp block allows to avoid unnecessary re-allocations.
100
101 Temp block attached is not owned by the class and NOT deallocated on
102 destruction.
103 */
105 bm::word_t* temp_block = 0);
106
107 serializer(bm::word_t* temp_block);
108
110
111 /*! @name Compression level settings */
112 //@{
113
114 // --------------------------------------------------------------------
115 /**
116 Set compression level. Higher compression takes more time to process.
117 @param clevel - compression level (0-6)
118 0 - take as is
119 1, 2 - apply light weight RLE/GAP encodings, limited depth hierarchical
120 compression, intervals encoding
121 3 - variant of 2 with different cut-offs
122 4 - delta transforms plus Elias Gamma encoding where possible legacy)
123 5 - Binary Interpolative Coding (BIC) - light settings
124 6 - Binary Interpolative Coding (BIC) - harder settings
125
126 @sa get_compression_level
127 */
128 void set_compression_level(unsigned clevel) BMNOEXCEPT;
129
130 /**
131 Get current compression level.
132 */
134 { return compression_level_; }
135
136
137 //@}
138
139
140 // --------------------------------------------------------------------
141 /*! @name Serialization Methods */
142 //@{
143
144 /**
145 Bitvector serialization into memory block
146
147 @param bv - input bitvector
148 @param buf - out buffer (pre-allocated)
149 No range checking is done in this method.
150 It is responsibility of caller to allocate sufficient
151 amount of memory using information from calc_stat() function.
152
153 @param buf_size - size of the output buffer
154
155 @return Size of serialization block.
156 @sa calc_stat
157 */
158 size_type serialize(const BV& bv,
159 unsigned char* buf, size_t buf_size);
160
161 /**
162 Bitvector serialization into buffer object (resized automatically)
163
164 @param bv - input bitvector
165 @param buf - output buffer object
166 @param bv_stat - input (optional) bit-vector statistics object
167 if NULL, serialize will compute the statistics
168 */
169 void serialize(const BV& bv,
170 typename serializer<BV>::buffer& buf,
171 const statistics_type* bv_stat = 0);
172
173 /**
174 Bitvector serialization into buffer object (resized automatically)
175 Input bit-vector gets optimized and then destroyed, content is
176 NOT guaranteed after this operation.
177 Effectively it moves data into the buffer.
178
179 The reason this operation exsists is because it is faster to do
180 all three operations in one single pass.
181 This is a destructive serialization!
182
183 @param bv - input/output bitvector
184 @param buf - output buffer object
185 */
187 typename serializer<BV>::buffer& buf);
188
189 //@}
190 // --------------------------------------------------------------------
191
192 /**
193 Return serialization counter vector
194 @internal
195 */
197 { return compression_stat_; }
198
199 /**
200 Enable/disable statistics reset on each serilaization
201 */
203 { allow_stat_reset_ = allow; }
204
205 /// Reset all accumulated compression statistics
207
208 /**
209 Set GAP length serialization (serializes GAP levels of the original vector)
210
211 @param value - when TRUE serialized vector includes GAP levels parameters
212 */
214
215 /**
216 Set byte-order serialization (for cross platform compatibility)
217 @param value - TRUE serialization format includes byte-order marker
218 */
220
221 /**
222 Add skip-markers to serialization BLOB for faster range decode
223 at the expense of some BLOB size increase
224
225 @param enable - TRUE searilization will add bookmark codes
226 @param bm_interval - bookmark interval in (number of blocks)
227 suggested values between 4 and 512 (block size is 64K bits)
228 smaller interval means more bookmarks added to the skip list
229 allows faster range deserialization at the expense of
230 somewhat increased BLOB size.
231 */
232 void set_bookmarks(bool enable, unsigned bm_interval = 256) BMNOEXCEPT;
233
234 /**
235 Fine tuning for Binary Interpolative Compression (levels 5+)
236 The parameter sets average population count per block (64Kbits)
237 below which block is considered very sparse.
238 If super block (group of 256 blocks) is very sparse it applies
239 block size expansion (for the compression purposes) to
240 improve compression rates.
241 */
242 void set_sparse_cutoff(unsigned cutoff) BMNOEXCEPT;
243
244 /**
245 Attach collection of reference vectors for XOR serialization
246 (no transfer of ownership for the pointers)
247 @internal
248 */
249 void set_ref_vectors(const bv_ref_vector_type* ref_vect);
250
251 /**
252 Calculate XOR similarity model for ref_vector
253 refernece vector must be associated before
254
255 @param sim_model - [out] similarity model to compute
256 @param ref_vect - [in] reference vectors
257 @param params - parameters to regulate search depth
258 @return true - if similarity model created successfully
259
260 @sa set_ref_vectors
261 @internal
262 */
264 const bv_ref_vector_type& ref_vect,
265 const bm::xor_sim_params& params);
266
267 /**
268 Atach XOR similarity model (must be computed by the same ref vector)
269 @internal
270 */
272
273 /**
274 Set current index in rer.vector collection
275 (not a row idx or plain idx)
276 */
278
279
280protected:
281 /**
282 Encode serialization header information
283 */
284 void encode_header(const BV& bv, bm::encoder& enc) BMNOEXCEPT;
285
286 /*! Encode GAP block */
287 void encode_gap_block(const bm::gap_word_t* gap_block, bm::encoder& enc);
288
289 /*! Encode GAP block with Elias Gamma coder */
290 void gamma_gap_block(const bm::gap_word_t* gap_block,
291 bm::encoder& enc) BMNOEXCEPT;
292
293 /**
294 Encode GAP block as delta-array with Elias Gamma coder
295 */
296 void gamma_gap_array(const bm::gap_word_t* gap_block,
297 unsigned arr_len,
298 bm::encoder& enc,
299 bool inverted = false) BMNOEXCEPT;
300
301 /// Encode bit-block as an array of bits
302 void encode_bit_array(const bm::word_t* block,
303 bm::encoder& enc, bool inverted) BMNOEXCEPT;
304
305 void gamma_gap_bit_block(const bm::word_t* block,
306 bm::encoder& enc) BMNOEXCEPT;
307
308 void gamma_arr_bit_block(const bm::word_t* block,
309 bm::encoder& enc, bool inverted) BMNOEXCEPT;
310
311 void bienc_arr_bit_block(const bm::word_t* block,
312 bm::encoder& enc, bool inverted) BMNOEXCEPT;
313
314 void bienc_arr_sblock(const BV& bv, unsigned sb,
315 bm::encoder& enc) BMNOEXCEPT;
316
317 /// encode bit-block as interpolated bit block of gaps
318 void bienc_gap_bit_block(const bm::word_t* block,
319 bm::encoder& enc) BMNOEXCEPT;
320
322 bm::encoder& enc, bool inverted) BMNOEXCEPT;
323 /// encode bit-block as interpolated gap block
325 bm::encoder& enc) BMNOEXCEPT;
326
327 /**
328 Encode GAP block as an array with binary interpolated coder
329 */
330 void interpolated_gap_array(const bm::gap_word_t* gap_block,
331 unsigned arr_len,
332 bm::encoder& enc,
333 bool inverted) BMNOEXCEPT;
335 unsigned arr_len,
336 bm::encoder& enc,
337 bool inverted) BMNOEXCEPT;
338
339
340 /*! Encode GAP block with using binary interpolated encoder */
342 const bm::gap_word_t* gap_block, bm::encoder& enc) BMNOEXCEPT;
343
344 /**
345 Encode BIT block with repeatable runs of zeroes
346 */
347 void encode_bit_interval(const bm::word_t* blk,
348 bm::encoder& enc,
349 unsigned size_control) BMNOEXCEPT;
350 /**
351 Encode bit-block using digest (hierarchical compression)
352 */
353 void encode_bit_digest(const bm::word_t* blk,
354 bm::encoder& enc,
355 bm::id64_t d0) BMNOEXCEPT;
356 /**
357 Encode XOR match chain
358 */
360 const block_match_chain_type& mchain) BMNOEXCEPT;
361
362 /**
363 Determine best representation for GAP block based
364 on current set compression level
365
366 @return set_block_bit, set_block_bit_1bit, set_block_arrgap
367 set_block_arrgap_egamma, set_block_arrgap_bienc
368 set_block_arrgap_inv, set_block_arrgap_egamma_inv
369 set_block_arrgap_bienc_inv, set_block_gap_egamma
370 set_block_gap_bienc
371
372 @internal
373 */
374 unsigned char
376
377 /// Determine best representation for a bit-block
378 unsigned char find_bit_best_encoding(const bm::word_t* block) BMNOEXCEPT;
379
380 /// Determine best representation for a bit-block (level 5)
381 unsigned char find_bit_best_encoding_l5(const bm::word_t* block) BMNOEXCEPT;
382
383 void reset_models() BMNOEXCEPT { mod_size_ = 0; }
384 void add_model(unsigned char mod, unsigned score) BMNOEXCEPT;
385protected:
386
387 /// Bookmark state structure
389 {
391 : ptr_(0), nb_(0),
392 nb_range_(nb_range), bm_type_(0)
393 {
394 min_bytes_range_ = nb_range * 8;
395 if (min_bytes_range_ < 512)
396 min_bytes_range_ = 512;
397
398 if (nb_range < 15)
399 bm_type_ = 2; // 16-bit offset
400 else
401 if (nb_range < 255)
402 bm_type_ = 1; // 24-bit offset
403 }
404
405 unsigned char* ptr_; ///< bookmark pointer
406 block_idx_type nb_; ///< bookmark block idx
407 block_idx_type nb_range_; ///< target bookmark range in blocks
408 unsigned bm_type_; ///< 0:32-bit, 1: 24-bit, 2: 16-bit
409 size_t min_bytes_range_; ///< minumal distance (bytes) between marks
410 };
411
412 /**
413 Check if bookmark needs to be placed and if so, encode it
414 into serialization BLOB
415
416 @param nb - block idx
417 @param bookm - bookmark state structure
418 @param enc - BLOB encoder
419 */
420 static
421 void process_bookmark(block_idx_type nb, bookmark_state& bookm,
423
424 /**
425 Compute digest based XOR product, place into tmp XOR block
426 */
428 const bm::word_t* s_block,
429 const block_match_chain_type& mchain,
430 unsigned i, unsigned j) BMNOEXCEPT;
431
432private:
433 serializer(const serializer&);
434 serializer& operator=(const serializer&);
435
436private:
437 typedef bm::bit_out<bm::encoder> bit_out_type;
438 typedef bm::gamma_encoder<bm::gap_word_t, bit_out_type> gamma_encoder_func;
439 typedef bm::heap_vector<bm::gap_word_t, allocator_type, true> block_arridx_type;
440 typedef bm::heap_vector<unsigned, allocator_type, true> sblock_arridx_type;
441 typedef typename allocator_type::allocator_pool_type allocator_pool_type;
442
443private:
444 bm::id64_t digest0_;
445 unsigned bit_model_d0_size_; ///< memory (bytes) by d0 method (bytes)
446 unsigned bit_model_0run_size_; ///< memory (bytes) by run-0 method (bytes)
447 block_arridx_type bit_idx_arr_;
448 sblock_arridx_type sb_bit_idx_arr_;
449 unsigned scores_[bm::block_waves];
450 unsigned char models_[bm::block_waves];
451 unsigned mod_size_;
452
453 allocator_type alloc_;
454 size_type* compression_stat_;
455 bool allow_stat_reset_ = true; ///< controls zeroing of telemetry
456 bool gap_serial_;
457 bool byte_order_serial_;
458
459 bool sb_bookmarks_; ///< Bookmarks flag
460 unsigned sb_range_; ///< Desired bookmarks interval
461
462 bm::word_t* temp_block_;
463 unsigned compression_level_;
464 bool own_temp_block_;
465
466 bool optimize_; ///< flag to optimize the input vector
467 bool free_; ///< flag to free the input vector
468 allocator_pool_type pool_;
469
470
471 unsigned char* enc_header_pos_; ///< pos of top level header to roll back
472 unsigned char header_flag_; ///< set of masks used to save
473
474 // XOR compression
475 //
476 const bv_ref_vector_type* ref_vect_; ///< ref.vector for XOR compression
477 const xor_sim_model_type* sim_model_; ///< similarity model matrix
478 bm::xor_scanner<BV> xor_scan_; ///< scanner for XOR similarity
479 size_type ref_idx_; ///< current reference index
480 bm::word_t* xor_tmp_block_; ///< tmp area for xor product
481 bm::word_t* xor_tmp1_;
482 bm::word_t* xor_tmp2_;
483
484 unsigned sparse_cutoff_; ///< number of bits per blocks to consider sparse
485};
486
487/**
488 Base deserialization class
489 \ingroup bvserial
490 @internal
491*/
492template<typename DEC, typename BLOCK_IDX>
494{
495protected:
496 typedef DEC decoder_type;
497 typedef BLOCK_IDX block_idx_type;
499
500protected:
504
505 /// Read GAP block from the stream
507 unsigned block_type,
508 bm::gap_word_t* dst_block,
509 bm::gap_word_t& gap_head);
510
511 /// Read list of bit ids
512 ///
513 /// @return number of ids
515 unsigned block_type,
516 bm::gap_word_t* dst_arr);
517
518 /// Read binary interpolated list into a bit-set
520 bm::word_t* blk, unsigned block_type) BMNOEXCEPT;
521
522 /// Read list of bit ids for super-blocks
523 ///
524 /// @return number of ids
526 unsigned block_type,
527 unsigned* dst_arr,
528 unsigned* sb_idx);
529
530 /// Read binary interpolated gap blocks into a bitset
532
533 /// Read inverted binary interpolated list into a bit-set
535
536 /// Read digest0-type bit-block
538
539
540 /// read bit-block encoded as runs
541 static
543
544 static
545 const char* err_msg() BMNOEXCEPT { return "BM::Invalid serialization format"; }
546
547 /// Try to skip if skip bookmark is available within reach
548 /// @return new block idx if skip went well
549 ///
552 block_idx_type expect_nb) BMNOEXCEPT;
553
554protected:
555 bm::gap_word_t* id_array_; ///< ptr to idx array for temp decode use
556 unsigned* sb_id_array_; ///< ptr to super-block idx array (temp)
557
558 block_idx_type bookmark_idx_;///< last bookmark block index
559 unsigned skip_offset_; ///< bookmark to skip 256 encoded blocks
560 const unsigned char* skip_pos_; ///< decoder skip position
561};
562
563/**
564 Deserializer for bit-vector
565 \ingroup bvserial
566*/
567template<class BV, class DEC>
569 protected deseriaizer_base<DEC, typename BV::block_idx_type>
570{
571public:
572 typedef BV bvector_type;
574 typedef typename BV::size_type size_type;
579
580public:
583
584 /*! Deserialize bit-vector (equivalent to logical OR)
585 @param bv - target bit-vector
586 @param buf - BLOB memory pointer
587 @param temp_block - temporary buffer [block size] (not used)
588
589 @return number of consumed bytes
590 */
592 const unsigned char* buf,
593 bm::word_t* temp_block = 0);
594
595 // ----------------------------------------------------------------
596 /**
597 Attach collection of reference vectors for XOR de-serialization
598 (no transfer of ownership for the pointer)
599 @internal
600 */
601 void set_ref_vectors(const bv_ref_vector_type* ref_vect);
602
603
604 /**
605 set deserialization range [from, to]
606 This is NOT exact, approximate range, content outside range
607 is not guaranteed to be absent
608 @sa unset_range()
609 */
611 {
612 is_range_set_ = 1; idx_from_ = from; idx_to_ = to;
613 }
614
615 /**
616 Disable range deserialization
617 @sa set_range()
618 */
620
621 /** reset range deserialization and reference vectors
622 @sa set_range()
623 */
625
626protected:
627 typedef typename BV::blocks_manager_type blocks_manager_type;
628
629protected:
633
634 void deserialize_gap(unsigned char btype, decoder_type& dec,
637 bm::word_t* blk);
638 void decode_bit_block(unsigned char btype, decoder_type& dec,
641 bm::word_t* blk);
642
644 bvector_type& bv,
646 bm::word_t* blk);
647
649 bvector_type& bv,
651 bm::word_t* blk);
652
654 bvector_type& bv,
656 bm::word_t* blk);
657
658 void decode_arr_sblock(unsigned char btype, decoder_type& dec,
659 bvector_type& bv);
660
661
662protected:
663 typedef bm::heap_vector<bm::gap_word_t, allocator_type, true> block_arridx_type;
664 typedef bm::heap_vector<bm::word_t, allocator_type, true> sblock_arridx_type;
666
667protected:
672
675
676 // XOR compression memory
677 //
678 const bv_ref_vector_type* ref_vect_; ///< ref.vector for XOR compression
679 bm::word_t* xor_block_; ///< xor product
682
683 // XOR decode FSM
684 //
690
691 // Range deserialization settings
692 //
696
697
698};
699
700
701/**
702 Iterator to walk forward the serialized stream.
703
704 \internal
705 \ingroup bvserial
706*/
707template<class BV, class SerialIterator>
709{
710public:
711 typedef BV bvector_type;
713 typedef SerialIterator serial_iterator_type;
714public:
715
716 /// set deserialization range [from, to]
718
719 /// disable range filtration
720 void unset_range() BMNOEXCEPT { is_range_set_ = false; }
721
724 bm::word_t* temp_block,
726 bool exit_on_one = false);
727
728private:
729 typedef typename BV::blocks_manager_type blocks_manager_type;
730 typedef typename bvector_type::block_idx_type block_idx_type;
731
732 /// load data from the iterator of type "id list"
733 static
734 void load_id_list(bvector_type& bv,
736 unsigned id_count,
737 bool set_clear);
738
739 /// Finalize the deserialization (zero target vector tail or bit-count tail)
740 static
741 size_type finalize_target_vector(blocks_manager_type& bman,
742 set_operation op,
743 size_type bv_block_idx);
744
745 /// Process (obsolete) id-list serialization format
746 static
747 size_type process_id_list(bvector_type& bv,
749 set_operation op);
750 static
751 const char* err_msg() BMNOEXCEPT
752 { return "BM::de-serialization format error"; }
753private:
754 bool is_range_set_ = false;
755 size_type nb_range_from_ = 0;
756 size_type nb_range_to_ = 0;
757};
758
759/*!
760 @brief Serialization stream iterator
761
762 Iterates blocks and control tokens of serialized bit-stream
763 \ingroup bvserial
764 @internal
765*/
766template<class DEC, typename BLOCK_IDX>
767class serial_stream_iterator : protected deseriaizer_base<DEC, BLOCK_IDX>
768{
769public:
771 typedef BLOCK_IDX block_idx_type;
773
774public:
775 serial_stream_iterator(const unsigned char* buf);
777
778 /// serialized bitvector size
779 block_idx_type bv_size() const { return bv_size_; }
780
781 /// Returns true if end of bit-stream reached
782 bool is_eof() const { return end_of_stream_; }
783
784 /// get next block
785 void next();
786
787 /// skip all zero or all-one blocks
789
790 /// read bit block, using logical operation
791 unsigned get_bit_block(bm::word_t* dst_block,
792 bm::word_t* tmp_block,
793 set_operation op);
794
795
796 /// Read gap block data (with head)
797 void get_gap_block(bm::gap_word_t* dst_block);
798
799 /// Return current decoder size
800 unsigned dec_size() const { return decoder_.size(); }
801
802 /// Get low level access to the decoder (use carefully)
804
805 /// iterator is a state machine, this enum encodes
806 /// its key value
807 ///
809 {
811 e_list_ids, ///< plain int array
812 e_blocks, ///< stream of blocks
813 e_zero_blocks, ///< one or more zero bit blocks
814 e_one_blocks, ///< one or more all-1 bit blocks
815 e_bit_block, ///< one bit block
816 e_gap_block ///< one gap block
817
818 };
819
820 /// Returns iterator internal state
821 iterator_state state() const BMNOEXCEPT { return this->state_; }
822
823 iterator_state get_state() const BMNOEXCEPT { return this->state_; }
824 /// Number of ids in the inverted list (valid for e_list_ids)
825 unsigned get_id_count() const BMNOEXCEPT { return this->id_cnt_; }
826
827 /// Get last id from the id list
828 bm::id_t get_id() const BMNOEXCEPT { return this->last_id_; }
829
830 /// Get current block index
832
833public:
834 /// member function pointer for bitset-bitset get operations
835 ///
836 typedef
839
840 unsigned
842 unsigned
843 get_bit_block_OR (bm::word_t* dst_block, bm::word_t* tmp_block);
844 unsigned
845 get_bit_block_AND (bm::word_t* dst_block, bm::word_t* tmp_block);
846 unsigned
847 get_bit_block_SUB (bm::word_t* dst_block, bm::word_t* tmp_block);
848 unsigned
849 get_bit_block_XOR (bm::word_t* dst_block, bm::word_t* tmp_block);
850 unsigned
851 get_bit_block_COUNT (bm::word_t* dst_block, bm::word_t* tmp_block);
852 unsigned
854 unsigned
856 unsigned
858 unsigned
860 unsigned
862 unsigned
864 unsigned
866 {
867 return get_bit_block_COUNT(dst_block, tmp_block);
868 }
869
870 /// Get array of bits out of the decoder into bit block
871 /// (Converts inverted list into bits)
872 /// Returns number of words (bits) being read
873 unsigned get_arr_bit(bm::word_t* dst_block,
874 bool clear_target=true) BMNOEXCEPT;
875
876 /// Get current block type
877 unsigned get_block_type() const BMNOEXCEPT { return block_type_; }
878
879 unsigned get_bit() BMNOEXCEPT;
880
882
883 /// Try to skip if skip bookmark is available within reach
884 /// @return true if skip went well
885 ///
887 {
888 block_idx_type new_nb = parent_type::try_skip(decoder_, nb, expect_nb);
889 if (new_nb)
890 {
891 block_idx_ = new_nb; state_ = e_blocks;
892 }
893 return new_nb;
894 }
895
896protected:
898
899 unsigned char header_flag_;
900
905 unsigned id_cnt_; ///< Id counter for id list
906 bm::id_t last_id_; ///< Last id from the id list
908
909 unsigned block_type_; ///< current block type
910 block_idx_type block_idx_; ///< current block index
911 block_idx_type mono_block_cnt_; ///< number of 0 or 1 blocks
912
915};
916
917/**
918 Deserializer, performs logical operations between bit-vector and
919 serialized bit-vector. This utility class potentially provides faster
920 and/or more memory efficient operation than more conventional deserialization
921 into memory bvector and then logical operation
922
923 \ingroup bvserial
924*/
925template<typename BV>
927{
928public:
929 typedef BV bvector_type;
930 typedef typename BV::allocator_type allocator_type;
933
934public:
937
938 /*!
939 \brief Deserialize bvector using buffer as set operation argument
940
941 \param bv - target bvector
942 \param buf - serialized buffer used as as a logical operation argument
943 \param op - set algebra operation (default: OR)
944 \param exit_on_one - quick exit if set operation found some result
945
946 \return bitcount (for COUNT_* operations)
947 */
949 const unsigned char* buf,
950 set_operation op,
951 bool exit_on_one = false);
952
953 /*!
954 Deserialize range using mask vector (AND)
955 \param bv - target bvector (should be set ranged)
956 \param buf - serialized buffer pointer
957 \param idx_from - range of bits set for deserialization [from..to]
958 \param idx_to - range of bits [from..to]
959 */
961 const unsigned char* buf,
962 size_type idx_from,
963 size_type idx_to);
964
965
966
967
968 // -----------------------------------------------------------------
969 // obsolete methods (switch to new ones, without team_block)
970 //
971
972 /*!
973 \brief Obsolete! Deserialize bvector using buffer as set operation argument
974
975 \param bv - target bvector
976 \param buf - serialized buffer as a logical argument
977 \param op - set algebra operation (default: OR)
978 \param exit_on_one - quick exit if set operation found some result
979
980 \return bitcount (for COUNT_* operations)
981 */
983 const unsigned char* buf,
984 bm::word_t*, /*temp_block,*/
986 bool exit_on_one = false)
987 { return deserialize(bv, buf, op, exit_on_one); }
988
989 /*!
990 Deserialize range using mask vector (AND)
991 \param bv - target bvector (should be set ranged)
992 \param buf - serialized buffer pointer
993 \param idx_from - range of bits set for deserialization [from..to]
994 \param idx_to - range of bits [from..to]
995 */
997 const unsigned char* buf,
998 bm::word_t*, /* temp_block, */
999 size_type idx_from,
1000 size_type idx_to)
1001 { deserialize_range(bv, buf, idx_from, idx_to); }
1002 // -----------------------------------------------------------------
1003
1004 /**
1005 Attach collection of reference vectors for XOR serialization
1006 (no transfer of ownership for the pointer)
1007 @internal
1008 */
1010 { ref_vect_ = ref_vect; }
1011
1012private:
1013 size_type deserialize_xor(bvector_type& bv,
1014 const unsigned char* buf,
1015 set_operation op,
1016 bool exit_on_one);
1017 static
1018 size_type deserialize_xor(bvector_type& bv,
1019 bvector_type& bv_tmp,
1020 set_operation op);
1021
1022 void deserialize_xor_range(bvector_type& bv,
1023 const unsigned char* buf,
1024 size_type idx_from,
1025 size_type idx_to);
1026
1027private:
1028 typedef typename bvector_type::block_idx_type block_idx_type;
1029 typedef
1030 typename BV::blocks_manager_type blocks_manager_type;
1031 typedef
1033 typedef
1035 typedef
1037 typedef
1039 typedef
1041
1042private:
1043 allocator_type alloc_;
1044 bm::word_t* temp_block_;
1045
1046 /// default stream iterator (same endian)
1048 /// big-endian stream iterator
1050 /// little-endian stream iterator
1052
1054 deserializer_le de_le_;
1055 deserializer_be de_be_;
1056
1057 bvector_type bv_tmp_;
1058
1059
1060 // XOR compression related fields
1061 //
1062 const bv_ref_vector_type* ref_vect_; ///< xor ref.vector
1063
1064
1065};
1066
1067
1068
1069
1070//----------------------------------------------------------------------------
1071//
1072//----------------------------------------------------------------------------
1073
1074
1075/// \internal
1076/// \ingroup bvserial
1079 BM_HM_RESIZE = (1 << 1), ///< resized vector
1080 BM_HM_ID_LIST = (1 << 2), ///< id list stored
1081 BM_HM_NO_BO = (1 << 3), ///< no byte-order
1082 BM_HM_NO_GAPL = (1 << 4), ///< no GAP levels
1083 BM_HM_64_BIT = (1 << 5), ///< 64-bit vector
1084 BM_HM_HXOR = (1 << 6), ///< horizontal XOR compression turned ON
1085 BM_HM_SPARSE = (1 << 7) ///< very sparse vector
1086};
1087
1088
1089// --------------------------------------------------------------------
1090// Serialization stream encoding constants
1091//
1092
1093const unsigned char set_block_end = 0; //!< End of serialization
1094const unsigned char set_block_1zero = 1; //!< One all-zero block
1095const unsigned char set_block_1one = 2; //!< One block all-set (1111...)
1096const unsigned char set_block_8zero = 3; //!< Up to 256 zero blocks
1097const unsigned char set_block_8one = 4; //!< Up to 256 all-set blocks
1098const unsigned char set_block_16zero = 5; //!< Up to 65536 zero blocks
1099const unsigned char set_block_16one = 6; //!< UP to 65536 all-set blocks
1100const unsigned char set_block_32zero = 7; //!< Up to 4G zero blocks
1101const unsigned char set_block_32one = 8; //!< UP to 4G all-set blocks
1102const unsigned char set_block_azero = 9; //!< All other blocks zero
1103const unsigned char set_block_aone = 10; //!< All other blocks one
1104const unsigned char set_block_bit = 11; //!< Plain bit block
1105const unsigned char set_block_sgapbit = 12; //!< SGAP compressed bitblock
1106const unsigned char set_block_sgapgap = 13; //!< SGAP compressed GAP block
1107const unsigned char set_block_gap = 14; //!< Plain GAP block
1108const unsigned char set_block_gapbit = 15; //!< GAP compressed bitblock
1109const unsigned char set_block_arrbit = 16; //!< List of bits ON
1110const unsigned char set_block_bit_interval = 17; //!< Interval block
1111const unsigned char set_block_arrgap = 18; //!< List of bits ON (GAP block)
1112const unsigned char set_block_bit_1bit = 19; //!< Bit block with 1 bit ON
1113const unsigned char set_block_gap_egamma = 20; //!< Gamma compressed GAP block
1114const unsigned char set_block_arrgap_egamma = 21; //!< Gamma compressed delta GAP array
1115const unsigned char set_block_bit_0runs = 22; //!< Bit block with encoded zero intervals
1116const unsigned char set_block_arrgap_egamma_inv = 23; //!< Gamma compressed inverted delta GAP array
1117const unsigned char set_block_arrgap_inv = 24; //!< List of bits OFF (GAP block)
1118const unsigned char set_block_64zero = 25; //!< lots of zero blocks
1119const unsigned char set_block_64one = 26; //!< lots of all-set blocks
1120
1121const unsigned char set_block_gap_bienc = 27; //!< Interpolated GAP block (legacy)
1122const unsigned char set_block_arrgap_bienc = 28; //!< Interpolated GAP array
1123const unsigned char set_block_arrgap_bienc_inv = 29; //!< Interpolated GAP array (inverted)
1124const unsigned char set_block_arrbit_inv = 30; //!< List of bits OFF
1125const unsigned char set_block_arr_bienc = 31; //!< Interpolated block as int array
1126const unsigned char set_block_arr_bienc_inv = 32; //!< Interpolated inverted block int array
1127const unsigned char set_block_bitgap_bienc = 33; //!< Interpolated bit-block as GAPs
1128const unsigned char set_block_bit_digest0 = 34; //!< H-compression with digest mask
1129
1130const unsigned char set_block_ref_eq = 35; //!< block is a copy of a reference block
1131const unsigned char set_block_xor_ref8 = 36; //!< block is masked XOR of a reference block (8-bit)
1132const unsigned char set_block_xor_ref16 = 37; //!< block is masked XOR of a reference block (16-bit)
1133const unsigned char set_block_xor_ref32 = 38; //!< ..... 32-bit (should never happen)
1134const unsigned char set_block_xor_gap_ref8 = 39; //!< ..... 8-bit
1135const unsigned char set_block_xor_gap_ref16 = 40; //!< ..... 16-bit
1136const unsigned char set_block_xor_gap_ref32 = 41; //!< ..... 32-bit (should never happen)
1137const unsigned char set_block_xor_chain = 42; //!< XOR chain (composit of sub-blocks)
1138
1139const unsigned char set_block_gap_bienc_v2 = 43; //!< Interpolated GAP block (v2)
1140const unsigned char set_block_arrgap_bienc_v2 = 44; //!< //!< Interpolated GAP array (v2)
1141const unsigned char set_block_arrgap_bienc_inv_v2 = 45; //!< Interpolated GAP array (inverted)
1142const unsigned char set_block_bitgap_bienc_v2 = 46; //!< Interpolated bit-block as GAPs (v2 - reseved)
1143
1144const unsigned char set_nb_bookmark16 = 47; //!< jump ahead mark (16-bit)
1145const unsigned char set_nb_bookmark24 = 48; //!< jump ahead mark (24-bit)
1146const unsigned char set_nb_bookmark32 = 49; //!< jump ahead mark (32-bit)
1147const unsigned char set_nb_sync_mark8 = 50; //!< bookmark sync point (8-bits)
1148const unsigned char set_nb_sync_mark16 = 51;
1149const unsigned char set_nb_sync_mark24 = 52;
1150const unsigned char set_nb_sync_mark32 = 53;
1151const unsigned char set_nb_sync_mark48 = 54;
1152const unsigned char set_nb_sync_mark64 = 55; //!< ..... 64-bit (should never happen)
1153
1154const unsigned char set_sblock_bienc = 56; //!< super-block interpolated list
1155const unsigned char set_block_arr_bienc_8bh = 57; //!< BIC block 8bit header
1156
1157const unsigned char set_block_xor_ref8_um = 58; //!< block is un-masked XOR of a reference block (8-bit)
1158const unsigned char set_block_xor_ref16_um = 59; //!< block is un-masked XOR of a reference block (16-bit)
1159const unsigned char set_block_xor_ref32_um = 60; //!< ..... 32-bit (should never happen)
1160
1161
1162
1163const unsigned sparse_max_l5 = 48;
1164const unsigned sparse_max_l6 = 256;
1165
1166template<class BV>
1168 bm::word_t* temp_block)
1169: alloc_(alloc),
1170 compression_stat_(0),
1171 gap_serial_(false),
1172 byte_order_serial_(true),
1173 sb_bookmarks_(false),
1174 sb_range_(0),
1175 compression_level_(bm::set_compression_default),
1176 enc_header_pos_(0), header_flag_(0),
1177 ref_vect_(0),
1178 sim_model_(0),
1179 ref_idx_(0),
1180 xor_tmp_block_(0),
1181 sparse_cutoff_(sparse_max_l6)
1182{
1183 bit_idx_arr_.resize(bm::gap_max_bits);
1184 if (temp_block == 0)
1185 {
1186 temp_block_ = alloc_.alloc_bit_block();
1187 own_temp_block_ = true;
1188 }
1189 else
1190 {
1191 temp_block_ = temp_block;
1192 own_temp_block_ = false;
1193 }
1194 compression_stat_ = (size_type*) alloc_.alloc_bit_block();
1195 optimize_ = free_ = false;
1196 xor_tmp1_ = xor_tmp2_ = 0;
1197}
1198
1199template<class BV>
1201: alloc_(allocator_type()),
1202 compression_stat_(0),
1203 gap_serial_(false),
1204 byte_order_serial_(true),
1205 sb_bookmarks_(false),
1206 sb_range_(0),
1207 compression_level_(bm::set_compression_default),
1208 enc_header_pos_(0), header_flag_(0),
1209 ref_vect_(0),
1210 sim_model_(0),
1211 ref_idx_(0),
1212 xor_tmp_block_(0),
1213 sparse_cutoff_(sparse_max_l6)
1214{
1215 bit_idx_arr_.resize(bm::gap_max_bits);
1216 if (temp_block == 0)
1217 {
1218 temp_block_ = alloc_.alloc_bit_block();
1219 own_temp_block_ = true;
1220 }
1221 else
1222 {
1223 temp_block_ = temp_block;
1224 own_temp_block_ = false;
1225 }
1226 compression_stat_ = (size_type*) alloc_.alloc_bit_block();
1227 optimize_ = free_ = false;
1228 xor_tmp1_ = xor_tmp2_ = 0;
1229}
1230
1231template<class BV>
1233{
1234 if (own_temp_block_)
1235 alloc_.free_bit_block(temp_block_);
1236 if (compression_stat_)
1237 alloc_.free_bit_block((bm::word_t*)compression_stat_);
1238 if (xor_tmp_block_)
1239 alloc_.free_bit_block(xor_tmp_block_, 3);
1240}
1241
1242
1243template<class BV>
1245{
1246 for (unsigned i = 0; i < 256; ++i)
1247 compression_stat_[i] = 0;
1248}
1249
1250template<class BV>
1252{
1253 if (clevel <= bm::set_compression_max)
1254 compression_level_ = clevel;
1255 if (compression_level_ == 5)
1256 sparse_cutoff_ = sparse_max_l5;
1257 else if (compression_level_ == 6)
1258 sparse_cutoff_ = sparse_max_l6;
1259}
1260
1261template<class BV>
1263{
1264 BM_ASSERT(cutoff <= sparse_max_l6);
1265 if (cutoff <= sparse_max_l6)
1266 sparse_cutoff_ = cutoff;
1267 else
1268 sparse_cutoff_ = sparse_max_l6;
1269}
1270
1271template<class BV>
1273{
1274 gap_serial_ = value;
1275}
1276
1277template<class BV>
1279{
1280 byte_order_serial_ = value;
1281}
1282
1283template<class BV>
1284void serializer<BV>::set_bookmarks(bool enable, unsigned bm_interval) BMNOEXCEPT
1285{
1286 sb_bookmarks_ = enable;
1287 if (enable)
1288 {
1289 if (bm_interval > 512)
1290 bm_interval = 512;
1291 else
1292 if (bm_interval < 4)
1293 bm_interval = 4;
1294 }
1295 sb_range_ = bm_interval;
1296}
1297
1298template<class BV>
1300{
1301 ref_vect_ = ref_vect;
1302 sim_model_ = 0;
1303 xor_scan_.set_ref_vector(ref_vect);
1304 if (!xor_tmp_block_ && ref_vect)
1305 {
1306 xor_tmp_block_ = alloc_.alloc_bit_block(3);
1307 xor_tmp1_ = &xor_tmp_block_[bm::set_block_size];
1308 xor_tmp2_ = &xor_tmp_block_[bm::set_block_size*2];
1309 }
1310}
1311
1312template<class BV>
1314 const bv_ref_vector_type& ref_vect,
1315 const bm::xor_sim_params& params)
1316{
1317 return xor_scan_.compute_sim_model(sim_model, ref_vect, params);
1318}
1319
1320template<class BV>
1322{
1323 sim_model_ = sim_model;
1324}
1325
1326template<class BV>
1328{
1329 ref_idx_ = ref_idx;
1330}
1331
1332template<class BV>
1334{
1335 const blocks_manager_type& bman = bv.get_blocks_manager();
1336
1337 header_flag_ = 0;
1338 if (bv.size() == bm::id_max) // no dynamic resize
1339 header_flag_ |= BM_HM_DEFAULT;
1340 else
1341 header_flag_ |= BM_HM_RESIZE;
1342
1343 if (!byte_order_serial_)
1344 header_flag_ |= BM_HM_NO_BO;
1345
1346 if (!gap_serial_)
1347 header_flag_ |= BM_HM_NO_GAPL;
1348
1349 #ifdef BM64ADDR
1350 header_flag_ |= BM_HM_64_BIT;
1351 #endif
1352
1353 if (ref_vect_)
1354 {
1355 // TODO: check if XOR search found anything at all
1356 header_flag_ |= BM_HM_HXOR; // XOR compression turned ON
1357 }
1358
1359 enc_header_pos_ = enc.get_pos();
1360 enc.put_8(header_flag_);
1361
1362 if (byte_order_serial_)
1363 {
1365 enc.put_8((unsigned char)bo);
1366 }
1367 // keep GAP levels information
1368 if (gap_serial_)
1369 {
1370 enc.put_16(bman.glen(), bm::gap_levels);
1371 }
1372
1373 // save size (only if bvector has been down-sized)
1374 if (header_flag_ & BM_HM_RESIZE)
1375 {
1376 #ifdef BM64ADDR
1377 enc.put_64(bv.size());
1378 #else
1379 enc.put_32(bv.size());
1380 #endif
1381 }
1382}
1383
1384template<class BV>
1386 const bm::gap_word_t* gap_block, bm::encoder& enc) BMNOEXCEPT
1387{
1388 unsigned len = bm::gap_length(gap_block);
1389 if (len > 4) // BIC encoding
1390 {
1391 encoder::position_type enc_pos0 = enc.get_pos();
1392 BM_ASSERT(gap_block[len-1] == 65535);
1393
1394 bm::gap_word_t head = gap_block[0];
1395 head &= bm::gap_word_t(~(3 << 1)); // clear the level flags
1396 bm::gap_word_t min_v = gap_block[1];
1397 bm::gap_word_t max_v = gap_block[len-2];
1398 bm::gap_word_t tail_delta = bm::gap_word_t(65535 - max_v);
1399
1400 if (min_v < 256)
1401 head |= (1 << 1);
1402 if (tail_delta < 256)
1403 head |= (1 << 2);
1404
1405 enc.put_8(bm::set_block_gap_bienc_v2);
1406 enc.put_16(head);
1407 if (min_v < 256)
1408 enc.put_8((unsigned char)min_v);
1409 else
1410 enc.put_16(min_v);
1411
1412 if (tail_delta < 256)
1413 enc.put_8((unsigned char)tail_delta);
1414 else
1415 enc.put_16(tail_delta);
1416
1417 bit_out_type bout(enc);
1418 bout.bic_encode_u16(&gap_block[2], len-4, min_v, max_v);
1419 bout.flush();
1420
1421 // re-evaluate coding efficiency
1422 //
1423 encoder::position_type enc_pos1 = enc.get_pos();
1424 unsigned gamma_size = (unsigned)(enc_pos1 - enc_pos0);
1425 if (gamma_size > (len-1)*sizeof(gap_word_t))
1426 {
1427 enc.set_pos(enc_pos0);
1428 }
1429 else
1430 {
1431 compression_stat_[bm::set_block_gap_bienc]++;
1432 return;
1433 }
1434 }
1435
1436 // save as plain GAP block
1437 enc.put_8(bm::set_block_gap);
1438 enc.put_16(gap_block, len-1);
1439
1440 compression_stat_[bm::set_block_gap]++;
1441}
1442
1443
1444template<class BV>
1447{
1448 unsigned len = gap_length(gap_block);
1449 if (len > 3 && (compression_level_ > 3)) // Use Elias Gamma encoding
1450 {
1451 encoder::position_type enc_pos0 = enc.get_pos();
1452 {
1453 bit_out_type bout(enc);
1454 gamma_encoder_func gamma(bout);
1455
1456 enc.put_8(bm::set_block_gap_egamma);
1457 enc.put_16(gap_block[0]);
1458
1459 for_each_dgap(gap_block, gamma);
1460 }
1461 // re-evaluate coding efficiency
1462 //
1463 encoder::position_type enc_pos1 = enc.get_pos();
1464 unsigned gamma_size = (unsigned)(enc_pos1 - enc_pos0);
1465 if (gamma_size > (len-1)*sizeof(gap_word_t))
1466 {
1467 enc.set_pos(enc_pos0);
1468 }
1469 else
1470 {
1471 compression_stat_[bm::set_block_gap_egamma]++;
1472 return;
1473 }
1474 }
1475
1476 // save as plain GAP block
1477 enc.put_8(bm::set_block_gap);
1478 enc.put_16(gap_block, len-1);
1479
1480 compression_stat_[bm::set_block_gap]++;
1481}
1482
1483template<class BV>
1485 unsigned arr_len,
1486 bm::encoder& enc,
1487 bool inverted) BMNOEXCEPT
1488{
1489 unsigned char scode = inverted ? bm::set_block_arrgap_egamma_inv
1491 if (compression_level_ > 3 && arr_len > 1)
1492 {
1493 encoder::position_type enc_pos0 = enc.get_pos();
1494 {
1495 bit_out_type bout(enc);
1496 enc.put_8(scode);
1497 bout.gamma(arr_len);
1498 gap_word_t prev = gap_array[0];
1499 bout.gamma(prev + 1);
1500
1501 for (unsigned i = 1; i < arr_len; ++i)
1502 {
1503 gap_word_t curr = gap_array[i];
1504 bout.gamma(curr - prev);
1505 prev = curr;
1506 }
1507 }
1508 encoder::position_type enc_pos1 = enc.get_pos();
1509 unsigned gamma_size = (unsigned)(enc_pos1 - enc_pos0);
1510 unsigned plain_size = (unsigned)(sizeof(gap_word_t)+arr_len*sizeof(gap_word_t));
1511 if (gamma_size >= plain_size)
1512 {
1513 enc.set_pos(enc_pos0); // rollback the bit stream
1514 }
1515 else
1516 {
1517 compression_stat_[scode]++;
1518 return;
1519 }
1520 }
1521 // save as a plain array
1523 enc.put_prefixed_array_16(scode, gap_array, arr_len, true);
1524 compression_stat_[scode]++;
1525}
1526
1527
1528template<class BV>
1530 const bm::gap_word_t* gap_block,
1531 unsigned arr_len,
1532 bm::encoder& enc,
1533 bool inverted) BMNOEXCEPT
1534{
1535 BM_ASSERT(arr_len <= 65535);
1536 unsigned char scode = inverted ? bm::set_block_arrgap_bienc_inv
1538 if (arr_len > 4)
1539 {
1540 encoder::position_type enc_pos0 = enc.get_pos();
1541 {
1542 bit_out_type bout(enc);
1543
1544 bm::gap_word_t min_v = gap_block[0];
1545 bm::gap_word_t max_v = gap_block[arr_len-1];
1546 BM_ASSERT(max_v > min_v);
1547
1548 enc.put_8(scode);
1549 enc.put_16(min_v);
1550 enc.put_16(max_v);
1551
1552 bout.gamma(arr_len-4);
1553 bout.bic_encode_u16(&gap_block[1], arr_len-2, min_v, max_v);
1554 bout.flush();
1555 }
1556 encoder::position_type enc_pos1 = enc.get_pos();
1557 unsigned enc_size = (unsigned)(enc_pos1 - enc_pos0);
1558 unsigned raw_size = (unsigned)(sizeof(gap_word_t)+arr_len*sizeof(gap_word_t));
1559 if (enc_size >= raw_size)
1560 {
1561 enc.set_pos(enc_pos0); // rollback the bit stream
1562 }
1563 else
1564 {
1565 compression_stat_[scode]++;
1566 return;
1567 }
1568 }
1569 // save as a plain array
1571 enc.put_prefixed_array_16(scode, gap_block, arr_len, true);
1572 compression_stat_[scode]++;
1573}
1574
1575
1576template<class BV>
1578 unsigned arr_len,
1579 bm::encoder& enc,
1580 bool inverted) BMNOEXCEPT
1581{
1582 BM_ASSERT(arr_len <= 65535);
1583
1584 unsigned char scode = inverted ? bm::set_block_arrgap_bienc_inv_v2
1586 if (arr_len > 4)
1587 {
1588 bm::gap_word_t min_v = gap_block[0];
1589 bm::gap_word_t max_v = gap_block[arr_len-1];
1590 bm::gap_word_t tail = bm::gap_word_t(max_v - min_v);
1591
1592 if (min_v >= 256 && tail >= 256)// || arr_len > 128)
1593 {
1594 interpolated_gap_array_v0(gap_block, arr_len, enc, inverted);
1595 return;
1596 }
1597
1598 BM_ASSERT(arr_len < 16383);
1599 encoder::position_type enc_pos0 = enc.get_pos();
1600 {
1601 bit_out_type bout(enc);
1602
1603 BM_ASSERT(max_v > min_v);
1604
1605 enc.put_8(scode);
1606
1607 BM_ASSERT((arr_len & (3 << 14)) == 0);
1608 arr_len <<= 2;
1609 if (min_v < 256)
1610 arr_len |= 1;
1611 if (tail < 256)
1612 arr_len |= (1 << 1);
1613
1614 enc.put_16(bm::gap_word_t(arr_len));
1615 if (min_v < 256)
1616 enc.put_8((unsigned char)min_v);
1617 else
1618 enc.put_16(min_v);
1619
1620 if (tail < 256)
1621 enc.put_8((unsigned char)tail);
1622 else
1623 enc.put_16(tail);
1624
1625 arr_len >>= 2;
1626
1627 bout.bic_encode_u16(&gap_block[1], arr_len-2, min_v, max_v);
1628 bout.flush();
1629 }
1630 encoder::position_type enc_pos1 = enc.get_pos();
1631 unsigned enc_size = (unsigned)(enc_pos1 - enc_pos0);
1632 unsigned raw_size = (unsigned)(sizeof(gap_word_t)+arr_len*sizeof(gap_word_t));
1633 if (enc_size >= raw_size)
1634 {
1635 enc.set_pos(enc_pos0); // rollback the bit stream
1636 }
1637 else
1638 {
1639 compression_stat_[scode]++;
1640 return;
1641 }
1642 }
1643 // save as a plain array
1645 enc.put_prefixed_array_16(scode, gap_block, arr_len, true);
1646 compression_stat_[scode]++;
1647}
1648
1649
1650
1651template<class BV>
1652void serializer<BV>::add_model(unsigned char mod, unsigned score) BMNOEXCEPT
1653{
1654 BM_ASSERT(mod_size_ < 64); // too many models (memory corruption?)
1655 scores_[mod_size_] = score; models_[mod_size_] = mod;
1656 ++mod_size_;
1657}
1658
1659template<class BV>
1660unsigned char
1662{
1663 const float bie_bits_per_int = compression_level_ < 6 ? 3.75f : 2.5f;
1664 const unsigned bie_limit = unsigned(float(bm::gap_max_bits) / bie_bits_per_int);
1665
1666 unsigned bc, ibc, gc;
1667
1668 add_model(bm::set_block_bit, bm::gap_max_bits); // default model (bit-block)
1669
1670 bit_model_0run_size_ = bm::bit_count_nonzero_size(block, bm::set_block_size);
1671 add_model(bm::set_block_bit_0runs, bit_model_0run_size_ * 8);
1672
1673 bm::id64_t d0 = digest0_ = bm::calc_block_digest0(block);
1674 if (!d0)
1675 return bm::set_block_azero;
1676
1677 unsigned d0_bc = word_bitcount64(d0);
1678 bit_model_d0_size_ = unsigned(8 + (32 * d0_bc * sizeof(bm::word_t)));
1679 if (d0 != ~0ull)
1680 add_model(bm::set_block_bit_digest0, bit_model_d0_size_ * 8);
1681
1682 bm::bit_block_change_bc(block, &gc, &bc);
1683 ibc = bm::gap_max_bits - bc;
1684 if (bc == 1)
1686 if (!ibc)
1687 return bm::set_block_aone;
1688 {
1689 unsigned arr_size =
1690 unsigned(sizeof(gap_word_t) + (bc * sizeof(gap_word_t)));
1691 unsigned arr_size_inv =
1692 unsigned(sizeof(gap_word_t) + (ibc * sizeof(gap_word_t)));
1693
1694 add_model(bm::set_block_arrbit, arr_size * 8);
1695 add_model(bm::set_block_arrbit_inv, arr_size_inv * 8);
1696 }
1697
1698 float gcf=float(gc);
1699
1700 if (gc > 3 && gc < bm::gap_max_buff_len)
1702 32 + unsigned((gcf-1) * bie_bits_per_int));
1703
1704
1705 float bcf=float(bc), ibcf=float(ibc);
1706
1707 if (bc < bie_limit)
1708 add_model(bm::set_block_arr_bienc, 16 * 3 + unsigned(bcf * bie_bits_per_int));
1709 else
1710 {
1711 if (ibc < bie_limit)
1713 16 * 3 + unsigned(ibcf * bie_bits_per_int));
1714 }
1715
1716 gc -= gc > 2 ? 2 : 0;
1717 gcf = float(gc);
1718 if (gc < bm::gap_max_buff_len) // GAP block
1719 {
1721 16 * 4 + unsigned(gcf * bie_bits_per_int));
1722 }
1723 else
1724 {
1725 if (gc < bie_limit)
1727 16 * 4 + unsigned(gcf * bie_bits_per_int));
1728 }
1729
1730 // find the best representation based on computed approx.models
1731 //
1732 unsigned min_score = bm::gap_max_bits;
1733 unsigned char model = bm::set_block_bit;
1734 for (unsigned i = 0; i < mod_size_; ++i)
1735 {
1736 if (scores_[i] < min_score)
1737 {
1738 min_score = scores_[i];
1739 model = models_[i];
1740 }
1741 }
1742#if 0
1743 if (model == set_block_bit_0runs)
1744 {
1745 std::cout << " 0runs=" << (bit_model_0run_size_ * 8) << std::endl;
1746 std::cout << " GAP BIC=" << (16 * 4 + unsigned(gcf * bie_bits_per_int)) << std::endl;
1747 std::cout << " ARR BIC=" << (16 * 3 + unsigned(bcf * bie_bits_per_int)) << std::endl;
1748 std::cout << "BC,GC=[" << bc <<", " << gc << "]" << std::endl;
1749 std::cout << "bie_limit=" << bie_limit << std::endl;
1750
1751 }
1752 switch(model)
1753 {
1754 case set_block_bit: std::cout << "BIT=" << "[" << bc <<", " << gc << "]"; break;
1755 case set_block_bit_1bit: std::cout << "1bit "; break;
1756 case set_block_arrgap: std::cout << "arr_gap "; break;
1757 case set_block_arrgap_egamma: std::cout << "arrgap_egamma "; break;
1758 case set_block_arrgap_bienc: std::cout << "arrgap_BIC "; break;
1759 case set_block_arrgap_inv: std::cout << "arrgap_INV "; break;
1760 case set_block_arrgap_egamma_inv: std::cout << "arrgap_egamma_INV "; break;
1761 case set_block_arrgap_bienc_inv: std::cout << "arrgap_BIC_INV "; break;
1762 case set_block_gap_egamma: std::cout << "gap_egamma "; break;
1763 case set_block_gap_bienc: std::cout << "gap_BIC "; break;
1764 case set_block_bitgap_bienc: std::cout << "bitgap_BIC "; break;
1765 case set_block_bit_digest0: std::cout << "D0 "; break;
1766 case set_block_bit_0runs: std::cout << "0runs=[" << bc <<", " << gc << " lmt=" << bie_limit << "]"; break;
1767 case set_block_arr_bienc_inv: std::cout << "arr_BIC_INV "; break;
1768 case set_block_arr_bienc: std::cout << "arr_BIC "; break;
1769 default: std::cout << "UNK=" << int(model); break;
1770 }
1771#endif
1772 return model;
1773}
1774
1775template<class BV>
1776unsigned char
1778{
1779 // experimental code
1780#if 0
1781 {
1782 const float bie_bits_per_int = compression_level_ < 6 ? 3.75f : 2.5f;
1783
1784 unsigned wave_matches[e_bit_end] = {0, };
1785
1787 unsigned s_gc, s_bc;
1788 bm::compute_s_block_descr(block, x_descr, &s_gc, &s_bc);
1789 const unsigned wave_max_bits = bm::set_block_digest_wave_size * 32;
1790
1791 for (unsigned i = 0; i < bm::block_waves; ++i)
1792 {
1793 s_gc = x_descr.sb_gc[i];
1794 s_bc = x_descr.sb_bc[i];
1795 unsigned curr_best;
1796 bm::bit_representation best_rep =
1797 bm::best_representation(s_bc, s_gc, wave_max_bits, bie_bits_per_int, &curr_best);
1798 wave_matches[best_rep]++;
1799 } // for
1800 unsigned sum_cases = 0;
1801 bool sub_diff = false;
1802 unsigned v_count = 0;
1803 for (unsigned i = 0; i < e_bit_end; ++i)
1804 {
1805 sum_cases += wave_matches[i];
1806 if (wave_matches[i] != 0 && wave_matches[i] < 64)
1807 {
1808 sub_diff = true; v_count++;
1809 }
1810 }
1811 if (sub_diff)
1812 {
1813 std::cout << "-" << v_count;
1814 if (v_count > 2)
1815 {
1816 for (unsigned i=0; i < e_bit_end; ++i)
1817 {
1818 if (wave_matches[i])
1819 {
1820 switch (i)
1821 {
1822 case e_bit_GAP: std::cout << " G" << wave_matches[i]; break;
1823 case e_bit_INT: std::cout << " I" << wave_matches[i]; break;
1824 case e_bit_IINT: std::cout << "iI" << wave_matches[i]; break;
1825 case e_bit_1: std::cout << " 1s" << wave_matches[i]; break;
1826 case e_bit_0: std::cout << " 0s" << wave_matches[i]; break;
1827 case e_bit_bit: std::cout << " B" << wave_matches[i]; break;
1828 }
1829 }
1830 } // for
1831 std::cout << " |";
1832 }
1833 }
1834 else
1835 {
1836 //std::cout << "=";
1837 }
1838 BM_ASSERT(sum_cases == 64);
1839 }
1840#endif
1841 reset_models();
1842
1843 if (compression_level_ >= 5)
1844 return find_bit_best_encoding_l5(block);
1845
1846 unsigned bc, bit_gaps;
1847
1848 // heuristics and hard-coded rules to determine
1849 // the best representation for bit-block
1850 //
1851 add_model(bm::set_block_bit, bm::gap_max_bits); // default model (bit-block)
1852
1853 if (compression_level_ <= 1)
1854 return bm::set_block_bit;
1855
1856 // check if it is a very sparse block with some areas of dense areas
1857 bit_model_0run_size_ = bm::bit_count_nonzero_size(block, bm::set_block_size);
1858 if (compression_level_ <= 5)
1859 add_model(bm::set_block_bit_0runs, bit_model_0run_size_ * 8);
1860
1861 if (compression_level_ >= 2)
1862 {
1863 bm::id64_t d0 = digest0_ = bm::calc_block_digest0(block);
1864 if (!d0)
1865 {
1867 return bm::set_block_azero;
1868 }
1869 unsigned d0_bc = word_bitcount64(d0);
1870 bit_model_d0_size_ = unsigned(8 + (32 * d0_bc * sizeof(bm::word_t)));
1871 if (d0 != ~0ull)
1872 add_model(bm::set_block_bit_digest0, bit_model_d0_size_ * 8);
1873
1874 if (compression_level_ >= 4)
1875 {
1876 bm::bit_block_change_bc(block, &bit_gaps, &bc);
1877 }
1878 else
1879 {
1880 bc = bm::bit_block_count(block);
1881 bit_gaps = 65535;
1882 }
1883 BM_ASSERT(bc);
1884
1885 if (bc == 1)
1886 {
1889 }
1890 unsigned inverted_bc = bm::gap_max_bits - bc;
1891 if (!inverted_bc)
1892 {
1894 return bm::set_block_aone;
1895 }
1896
1897 if (compression_level_ >= 3)
1898 {
1899 unsigned arr_size =
1900 unsigned(sizeof(gap_word_t) + (bc * sizeof(gap_word_t)));
1901 unsigned arr_size_inv =
1902 unsigned(sizeof(gap_word_t) + (inverted_bc * sizeof(gap_word_t)));
1903
1904 add_model(bm::set_block_arrbit, arr_size*8);
1905 add_model(bm::set_block_arrbit_inv, arr_size_inv*8);
1906
1907 if (compression_level_ >= 4)
1908 {
1909 const unsigned gamma_bits_per_int = 6;
1910 //unsigned bit_gaps = bm::bit_block_calc_change(block);
1911
1912 if (compression_level_ == 4)
1913 {
1914 if (bit_gaps > 3 && bit_gaps < bm::gap_max_buff_len)
1916 16 + (bit_gaps-1) * gamma_bits_per_int);
1917 if (bc < bit_gaps && bc < bm::gap_equiv_len)
1919 16 + bc * gamma_bits_per_int);
1920 if (inverted_bc > 3 && inverted_bc < bit_gaps && inverted_bc < bm::gap_equiv_len)
1922 16 + inverted_bc * gamma_bits_per_int);
1923 }
1924 } // level >= 3
1925 } // level >= 3
1926 } // level >= 2
1927
1928 // find the best representation based on computed approx.models
1929 //
1930 unsigned min_score = bm::gap_max_bits;
1931 unsigned char model = bm::set_block_bit;
1932 for (unsigned i = 0; i < mod_size_; ++i)
1933 {
1934 if (scores_[i] < min_score)
1935 {
1936 min_score = scores_[i];
1937 model = models_[i];
1938 }
1939 }
1940 return model;
1941}
1942
1943template<class BV>
1944unsigned char
1946{
1947 // heuristics and hard-coded rules to determine
1948 // the best representation for d-GAP block
1949 //
1950 if (compression_level_ <= 2)
1951 return bm::set_block_gap;
1952
1953 unsigned len = bm::gap_length(gap_block);
1954 if (len == 2)
1955 return bm::set_block_gap;
1956
1957 unsigned bc = bm::gap_bit_count_unr(gap_block);
1958 unsigned ibc = bm::gap_max_bits - bc;
1959
1960 if (bc == 1)
1962
1963 bc += 2; ibc += 2; // correct counts because effective GAP len = len - 2
1964 if (bc < len)
1965 {
1966 if (compression_level_ < 4 || len < 6)
1967 return bm::set_block_arrgap;
1968
1969 if (compression_level_ == 4)
1971
1973 }
1974 if (ibc < len)
1975 {
1976 if (compression_level_ < 4 || len < 6)
1978
1979 if (compression_level_ == 4)
1982 }
1983 if (len < 6)
1984 return bm::set_block_gap;
1985
1986 if (compression_level_ == 4)
1988
1990}
1991
1992
1993
1994
1995template<class BV>
1997{
1998 gap_word_t* gap_temp_block = (gap_word_t*) temp_block_;
1999
2000 gap_word_t arr_len;
2001 bool invert = false;
2002
2003 unsigned char enc_choice = find_gap_best_encoding(gap_block);
2004 switch (enc_choice)
2005 {
2006 case bm::set_block_gap:
2007 gamma_gap_block(gap_block, enc); // TODO: use plain encode (non-gamma)
2008 break;
2009
2011 arr_len = bm::gap_convert_to_arr(gap_temp_block,
2012 gap_block,
2014 BM_ASSERT(arr_len == 1);
2016 enc.put_16(gap_temp_block[0]);
2017 compression_stat_[bm::set_block_bit_1bit]++;
2018 break;
2021 invert = true;
2023 // fall through
2026 // fall through
2028 arr_len = gap_convert_to_arr(gap_temp_block,
2029 gap_block,
2031 invert);
2032 BM_ASSERT(arr_len);
2033 gamma_gap_array(gap_temp_block, arr_len, enc, invert);
2034 break;
2036 interpolated_encode_gap_block(gap_block, enc);
2037 break;
2039 invert = true;
2041 // fall through
2043 arr_len = gap_convert_to_arr(gap_temp_block,
2044 gap_block,
2046 invert);
2047 BM_ASSERT(arr_len);
2048 interpolated_gap_array(gap_temp_block, arr_len, enc, invert);
2049 break;
2050 default:
2051 gamma_gap_block(gap_block, enc);
2052 } // switch
2053}
2054
2055template<class BV>
2057 bm::encoder& enc,
2058 unsigned //size_control
2059 ) BMNOEXCEPT
2060{
2061 enc.put_8(bm::set_block_bit_0runs);
2062 enc.put_8((blk[0]==0) ? 0 : 1); // encode start
2063
2064 unsigned i, j;
2065 for (i = 0; i < bm::set_block_size; ++i)
2066 {
2067 if (blk[i] == 0)
2068 {
2069 // scan fwd to find 0 island length
2070 for (j = i+1; j < bm::set_block_size; ++j)
2071 {
2072 if (blk[j] != 0)
2073 break;
2074 }
2075 BM_ASSERT(j-i);
2076 enc.put_16((gap_word_t)(j-i));
2077 i = j - 1;
2078 }
2079 else
2080 {
2081 // scan fwd to find non-0 island length
2082 for (j = i+1; j < bm::set_block_size; ++j)
2083 {
2084 if (blk[j] == 0)
2085 {
2086 // look ahead to identify and ignore short 0-run
2087 if (((j+1 < bm::set_block_size) && blk[j+1]) ||
2088 ((j+2 < bm::set_block_size) && blk[j+2]))
2089 {
2090 ++j; // skip zero word
2091 continue;
2092 }
2093 break;
2094 }
2095 }
2096 BM_ASSERT(j-i);
2097 enc.put_16((gap_word_t)(j-i));
2098 enc.put_32(blk + i, j - i); // stream all bit-words now
2099
2100 i = j - 1;
2101 }
2102 }
2103 compression_stat_[bm::set_block_bit_0runs]++;
2104}
2105
2106
2107template<class BV>
2109 bm::encoder& enc,
2111{
2112 // evaluate a few "sure" models here and pick the best
2113 //
2114 if (d0 != ~0ull)
2115 {
2116 if (bit_model_0run_size_ < bit_model_d0_size_)
2117 {
2118 encode_bit_interval(block, enc, 0); // TODO: get rid of param 3 (0)
2119 return;
2120 }
2121
2122 // encode using digest0 method
2123 //
2124 enc.put_8(bm::set_block_bit_digest0);
2125 enc.put_64(d0);
2126 while (d0)
2127 {
2128 bm::id64_t t = bm::bmi_blsi_u64(d0); // d & -d;
2129
2130 unsigned wave = bm::word_bitcount64(t - 1);
2131 unsigned off = wave * bm::set_block_digest_wave_size;
2132 unsigned j = 0;
2133 do
2134 {
2135 enc.put_32(block[off+j+0]);
2136 enc.put_32(block[off+j+1]);
2137 enc.put_32(block[off+j+2]);
2138 enc.put_32(block[off+j+3]);
2139 j += 4;
2140 } while (j < bm::set_block_digest_wave_size);
2141
2142 d0 = bm::bmi_bslr_u64(d0); // d &= d - 1;
2143 } // while
2144
2145 compression_stat_[bm::set_block_bit_digest0]++;
2146 }
2147 else
2148 {
2149 if (bit_model_0run_size_ < unsigned(bm::set_block_size*sizeof(bm::word_t)))
2150 {
2151 encode_bit_interval(block, enc, 0); // TODO: get rid of param 3 (0)
2152 return;
2153 }
2154
2155 enc.put_prefixed_array_32(bm::set_block_bit, block, bm::set_block_size);
2156 compression_stat_[bm::set_block_bit]++;
2157 }
2158}
2159
2160template<class BV>
2162 const block_match_chain_type& mchain) BMNOEXCEPT
2163{
2164 size_type chain_size = (size_type)mchain.chain_size;
2165 BM_ASSERT(chain_size);
2166
2167 unsigned char vbr_flag = bm::check_pair_vect_vbr(mchain, *ref_vect_);
2168
2169 enc.put_8(bm::set_block_xor_chain);
2170 enc.put_8(vbr_flag); // flag (reserved)
2171
2172 size_type ridx = mchain.ref_idx[0];
2173 bm::id64_t d64 = mchain.xor_d64[0];
2174 ridx = ref_vect_->get_row_idx(ridx);
2175
2176 switch(vbr_flag)
2177 {
2178 case 1: enc.put_8((unsigned char)ridx); break;
2179 case 2: enc.put_16((unsigned short)ridx); break;
2180 case 0: enc.put_32((unsigned)ridx); break;
2181 default: BM_ASSERT(0); break;
2182 } // switch
2183 enc.put_h64(d64);
2184 enc.put_8((unsigned char) (chain_size-1));
2185
2186 for (unsigned ci = 1; ci < chain_size; ++ci)
2187 {
2188 ridx = mchain.ref_idx[ci];
2189 d64 = mchain.xor_d64[ci];
2190 ridx = ref_vect_->get_row_idx(ridx);
2191 switch(vbr_flag)
2192 {
2193 case 1: enc.put_8((unsigned char)ridx); break;
2194 case 2: enc.put_16((unsigned short)ridx); break;
2195 case 0: enc.put_32((unsigned)ridx); break;
2196 default: BM_ASSERT(0); break;
2197 } // switch
2198 enc.put_h64(d64);
2199 } // for ci
2200 compression_stat_[bm::set_block_xor_chain]++;
2201}
2202
2203
2204
2205template<class BV>
2207 const bm::word_t* s_block,
2208 const block_match_chain_type& mchain,
2209 unsigned i, unsigned j) BMNOEXCEPT
2210{
2211 if (BM_IS_GAP(s_block))
2212 {
2213 bm::gap_convert_to_bitset(xor_tmp1_, BMGAP_PTR(s_block));
2214 s_block = xor_tmp1_;
2215 }
2216 size_type ridx = mchain.ref_idx[0];
2217 const bm::word_t* ref_block = xor_scan_.get_ref_block(ridx, i, j);
2218 if (BM_IS_GAP(ref_block))
2219 {
2220 bm::gap_convert_to_bitset(xor_tmp2_, BMGAP_PTR(ref_block));
2221 ref_block = xor_tmp2_;
2222 }
2223 bm::id64_t d64 = mchain.xor_d64[0];
2224 bm::bit_block_xor(xor_tmp_block_, s_block, ref_block, d64);
2225 for (unsigned k = 1; k < mchain.chain_size; ++k)
2226 {
2227 ridx = mchain.ref_idx[k];
2228 ref_block = xor_scan_.get_ref_block(ridx, i, j);
2229 if (BM_IS_GAP(ref_block))
2230 {
2231 bm::gap_convert_to_bitset(xor_tmp2_, BMGAP_PTR(ref_block));
2232 ref_block = xor_tmp2_;
2233 }
2234 d64 = mchain.xor_d64[k];
2235 bm::bit_block_xor(xor_tmp_block_, ref_block, d64);
2236 } // for i
2237}
2238
2239
2240template<class BV>
2242 typename serializer<BV>::buffer& buf,
2243 const statistics_type* bv_stat)
2244{
2245 statistics_type stat;
2246 if (!bv_stat)
2247 {
2248 bv.calc_stat(&stat);
2249 bv_stat = &stat;
2250 }
2251
2252 buf.resize(bv_stat->max_serialize_mem, false); // no-copy resize
2253 optimize_ = free_ = false;
2254
2255 unsigned char* data_buf = buf.data();
2256 size_t buf_size = buf.size();
2257 size_type slen = this->serialize(bv, data_buf, buf_size);
2258 BM_ASSERT(slen <= buf.size()); // or we have a BIG problem with prediction
2259 BM_ASSERT(slen);
2260
2261 buf.resize(slen);
2262}
2263
2264template<class BV>
2266 typename serializer<BV>::buffer& buf)
2267{
2268 statistics_type st;
2269 optimize_ = true;
2270 if (!ref_vect_) // do not use block-free if XOR compression is setup
2271 free_ = true; // set the destructive mode
2272
2273 typename bvector_type::mem_pool_guard mp_g_z;
2274 mp_g_z.assign_if_not_set(pool_, bv);
2275
2276 bv.optimize(temp_block_, BV::opt_compress, &st);
2277 serialize(bv, buf, &st);
2278
2279 optimize_ = free_ = false; // restore the default mode
2280}
2281
2282template<class BV>
2284 bm::encoder& enc,
2285 bool inverted) BMNOEXCEPT
2286{
2287 unsigned arr_len =
2288 bm::bit_block_convert_to_arr(bit_idx_arr_.data(), block, inverted);
2289
2290 if (arr_len)
2291 {
2292 unsigned char scode =
2294 enc.put_prefixed_array_16(scode, bit_idx_arr_.data(), arr_len, true);
2295 compression_stat_[scode]++;
2296 return;
2297 }
2298 encode_bit_digest(block, enc, digest0_);
2299}
2300
2301template<class BV>
2304{
2305 unsigned len = bm::bit_to_gap(bit_idx_arr_.data(), block, bm::gap_equiv_len);
2306 BM_ASSERT(len); (void)len;
2307 gamma_gap_block(bit_idx_arr_.data(), enc);
2308}
2309
2310template<class BV>
2312 bm::encoder& enc,
2313 bool inverted) BMNOEXCEPT
2314{
2315 unsigned arr_len =
2316 bm::bit_block_convert_to_arr(bit_idx_arr_.data(), block, inverted);
2317 if (arr_len)
2318 {
2319 gamma_gap_array(bit_idx_arr_.data(), arr_len, enc, inverted);
2320 return;
2321 }
2322 enc.put_prefixed_array_32(bm::set_block_bit, block, bm::set_block_size);
2323 compression_stat_[bm::set_block_bit]++;
2324}
2325
2326template<class BV>
2328 bm::encoder& enc,
2329 bool inverted) BMNOEXCEPT
2330{
2331 unsigned arr_len =
2332 bm::bit_block_convert_to_arr(bit_idx_arr_.data(), block, inverted);
2333 if (arr_len)
2334 {
2335 interpolated_gap_array(bit_idx_arr_.data(), arr_len, enc, inverted);
2336 return;
2337 }
2338 encode_bit_digest(block, enc, digest0_);
2339}
2340
2341template<class BV>
2344{
2345 unsigned len = bm::bit_to_gap(bit_idx_arr_.data(), block, bm::gap_max_bits);
2346 BM_ASSERT(len); (void)len;
2347 interpolated_encode_gap_block(bit_idx_arr_.data(), enc);
2348}
2349
2350
2351template<class BV>
2354{
2355 unsigned len = bm::bit_to_gap(bit_idx_arr_.data(), block, bm::gap_max_bits);
2356 BM_ASSERT(len); (void)len;
2357
2358 const unsigned char scode = bm::set_block_bitgap_bienc;
2359
2360 encoder::position_type enc_pos0 = enc.get_pos();
2361 {
2362 bit_out_type bout(enc);
2363
2364 bm::gap_word_t head = (bit_idx_arr_[0] & 1); // isolate start flag
2365 bm::gap_word_t min_v = bit_idx_arr_[1];
2366
2367 BM_ASSERT(bit_idx_arr_[len] == 65535);
2368 BM_ASSERT(bit_idx_arr_[len] > min_v);
2369
2370 enc.put_8(scode);
2371
2372 enc.put_8((unsigned char)head);
2373 enc.put_16(bm::gap_word_t(len));
2374 enc.put_16(min_v);
2375 bout.bic_encode_u16(&bit_idx_arr_[2], len-2, min_v, 65535);
2376 bout.flush();
2377 }
2378 encoder::position_type enc_pos1 = enc.get_pos();
2379 unsigned enc_size = (unsigned)(enc_pos1 - enc_pos0);
2380 unsigned raw_size = sizeof(word_t) * bm::set_block_size;
2381 if (enc_size >= raw_size)
2382 {
2383 enc.set_pos(enc_pos0); // rollback the bit stream
2384 }
2385 else
2386 {
2387 compression_stat_[scode]++;
2388 return;
2389 }
2390 // if we got to this point it means coding was not efficient
2391 // and we rolled back to simpler method
2392 //
2393 encode_bit_digest(block, enc, digest0_);
2394}
2395
2396//--------------------------------------------------------------------
2397//
2398const unsigned sblock_flag_sb16 = (1u << 0); ///< 16-bit SB index (8-bit by default)
2399const unsigned sblock_flag_sb32 = (1u << 1); ///< 32-bit SB index
2400
2401const unsigned sblock_flag_min16 = (1u << 2); ///< 16-bit minv
2402const unsigned sblock_flag_min24 = (1u << 3); ///< 24-bit minv
2404
2405const unsigned sblock_flag_len16 = (1u << 4); ///< 16-bit len (8-bit by default)
2406const unsigned sblock_flag_max16 = (1u << 5);
2407const unsigned sblock_flag_max24 = (1u << 6);
2409
2410template<class BV>
2411void serializer<BV>::bienc_arr_sblock(const BV& bv, unsigned sb,
2413{
2414 unsigned sb_flag = 0;
2415
2416 unsigned char scode = bm::set_sblock_bienc;
2417 bm::convert_sub_to_arr(bv, sb, sb_bit_idx_arr_);
2418 unsigned len = (unsigned)sb_bit_idx_arr_.size();
2419
2420 BM_ASSERT(sb_bit_idx_arr_.size() < 65536);
2421 BM_ASSERT(sb_bit_idx_arr_.size());
2422
2423 bm::word_t min_v = sb_bit_idx_arr_[0];
2424 bm::word_t max_v = sb_bit_idx_arr_[len - 1];
2426 bm::word_t max_v_delta = bm::set_sub_total_bits - max_v;
2427
2428 // build decoding flags
2429 if (sb > 65535)
2430 sb_flag |= bm::sblock_flag_sb32;
2431 else if (sb > 255)
2432 sb_flag |= bm::sblock_flag_sb16;
2433
2434 if (len > 255)
2435 sb_flag |= bm::sblock_flag_len16;
2436
2437 if (min_v > 65535)
2438 if (min_v < 0xFFFFFF)
2439 sb_flag |= bm::sblock_flag_min24;
2440 else
2441 sb_flag |= bm::sblock_flag_min32; // 24 and 16
2442 else if (min_v > 255)
2443 sb_flag |= bm::sblock_flag_min16;
2444
2445 if (max_v_delta > 65535)
2446 if (max_v_delta < 0xFFFFFF)
2447 sb_flag |= bm::sblock_flag_max24;
2448 else
2449 sb_flag |= bm::sblock_flag_max32;
2450 else if (max_v_delta > 255)
2451 sb_flag |= bm::sblock_flag_max16;
2452
2453 // encoding header
2454 //
2455 enc.put_8(scode);
2456 enc.put_8((unsigned char)sb_flag);
2457
2458 if (sb > 65535)
2459 enc.put_32(sb);
2460 else if (sb > 255)
2461 enc.put_16((unsigned short)sb);
2462 else
2463 enc.put_8((unsigned char)sb);
2464
2465 if (len > 255)
2466 enc.put_16((unsigned short)len);
2467 else
2468 enc.put_8((unsigned char)len);
2469
2470 if (min_v > 65535)
2471 if (min_v < 0xFFFFFF)
2472 enc.put_24(min_v);
2473 else
2474 enc.put_32(min_v);
2475 else if (min_v > 255)
2476 enc.put_16((unsigned short)min_v);
2477 else
2478 enc.put_8((unsigned char)min_v);
2479
2480 if (max_v_delta > 65535)
2481 if (max_v < 0xFFFFFF)
2482 enc.put_24(max_v_delta);
2483 else
2484 enc.put_32(max_v_delta);
2485 else if (max_v_delta > 255)
2486 enc.put_16((unsigned short)max_v_delta);
2487 else
2488 enc.put_8((unsigned char)max_v_delta);
2489
2490 bit_out_type bout(enc);
2491 bout.bic_encode_u32_cm(sb_bit_idx_arr_.data()+1,
2492 (unsigned)sb_bit_idx_arr_.size()-2,
2493 min_v, max_v);
2494
2495 compression_stat_[scode]++;
2496}
2497
2498
2499
2500template<class BV>
2501void
2503 bm::encoder& enc,
2504 bool inverted) BMNOEXCEPT
2505{
2506 unsigned arr_len = bm::bit_block_convert_to_arr(
2507 bit_idx_arr_.data(), block, inverted);
2508
2509 if (arr_len)
2510 {
2511 unsigned char scode =
2513
2514 encoder::position_type enc_pos0 = enc.get_pos();
2515 {
2516 bit_out_type bout(enc);
2517
2518 bm::gap_word_t min_v = bit_idx_arr_[0];
2519 bm::gap_word_t max_v = bit_idx_arr_[arr_len-1];
2520 BM_ASSERT(max_v > min_v);
2521 bm::gap_word_t max_delta = bm::gap_word_t(65536 - max_v);
2522
2523 if (!inverted && min_v <= 0xFF && max_delta <= 0xFF) // 8-bit header
2524 {
2525 enc.put_8(bm::set_block_arr_bienc_8bh);
2526 enc.put_8((unsigned char)min_v);
2527 enc.put_8((unsigned char)max_delta);
2528 }
2529 else
2530 {
2531 enc.put_8(scode);
2532 enc.put_16(min_v);
2533 enc.put_16(max_v);
2534 }
2535 enc.put_16(bm::gap_word_t(arr_len));
2536
2537 bout.bic_encode_u16(&bit_idx_arr_[1], arr_len-2, min_v, max_v);
2538 bout.flush();
2539 }
2540 encoder::position_type enc_pos1 = enc.get_pos();
2541 unsigned enc_size = (unsigned)(enc_pos1 - enc_pos0);
2542 unsigned raw_size = sizeof(word_t) * bm::set_block_size;
2543 if (enc_size >= raw_size)
2544 {
2545 enc.set_pos(enc_pos0); // rollback the bit stream
2546 }
2547 else
2548 {
2549 if (digest0_ != ~0ull && enc_size > bit_model_d0_size_)
2550 {
2551 enc.set_pos(enc_pos0); // rollback the bit stream
2552 }
2553 else
2554 {
2555 compression_stat_[scode]++;
2556 return;
2557 }
2558 }
2559 }
2560 // coding did not result in best compression
2561 // use simpler method
2562 encode_bit_digest(block, enc, digest0_);
2563}
2564
2565
2566
2567#define BM_SER_NEXT_GRP(enc, nb, B_1ZERO, B_8ZERO, B_16ZERO, B_32ZERO, B_64ZERO) \
2568 if (nb == 1u) \
2569 enc.put_8(B_1ZERO); \
2570 else if (nb < 256u) \
2571 { \
2572 enc.put_8(B_8ZERO); \
2573 enc.put_8((unsigned char)nb); \
2574 } \
2575 else if (nb < 65536u) \
2576 { \
2577 enc.put_8(B_16ZERO); \
2578 enc.put_16((unsigned short)nb); \
2579 } \
2580 else if (nb < bm::id_max32) \
2581 { \
2582 enc.put_8(B_32ZERO); \
2583 enc.put_32(unsigned(nb)); \
2584 } \
2585 else \
2586 {\
2587 enc.put_8(B_64ZERO); \
2588 enc.put_64(nb); \
2589 }
2590
2591
2592template<class BV>
2594 bookmark_state& bookm,
2596{
2597 BM_ASSERT(bookm.nb_range_);
2598
2599 block_idx_type nb_delta = nb - bookm.nb_;
2600 if (bookm.ptr_ && nb_delta >= bookm.nb_range_)
2601 {
2602 unsigned char* curr = enc.get_pos();
2603 size_t bytes_delta = size_t(curr - bookm.ptr_);
2604 if (bytes_delta > bookm.min_bytes_range_)
2605 {
2606 enc.set_pos(bookm.ptr_); // rewind back and save the skip
2607 switch (bookm.bm_type_)
2608 {
2609 case 0: // 32-bit mark
2610 bytes_delta -= sizeof(unsigned);
2611 if (bytes_delta < 0xFFFFFFFF) // range overflow check
2612 enc.put_32(unsigned(bytes_delta));
2613 // if range is somehow off, bookmark remains 0 (NULL)
2614 break;
2615 case 1: // 24-bit mark
2616 bytes_delta -= (sizeof(unsigned)-1);
2617 if (bytes_delta < 0xFFFFFF)
2618 enc.put_24(unsigned(bytes_delta));
2619 break;
2620 case 2: // 16-bit mark
2621 bytes_delta -= sizeof(unsigned short);
2622 if (bytes_delta < 0xFFFF)
2623 enc.put_16((unsigned short)bytes_delta);
2624 break;
2625 default:
2626 BM_ASSERT(0);
2627 break;
2628 } // switch
2629
2630 enc.set_pos(curr); // restore and save the sync mark
2631
2632 if (nb_delta < 0xFF)
2633 {
2634 enc.put_8(set_nb_sync_mark8);
2635 enc.put_8((unsigned char) nb_delta);
2636 }
2637 else
2638 if (nb_delta < 0xFFFF)
2639 {
2640 enc.put_8(set_nb_sync_mark16);
2641 enc.put_16((unsigned short) nb_delta);
2642 }
2643 else
2644 if (nb_delta < 0xFFFFFF)
2645 {
2646 enc.put_8(set_nb_sync_mark24);
2647 enc.put_24(unsigned(nb_delta));
2648 }
2649 else
2650 if (nb_delta < ~0U)
2651 {
2652 enc.put_8(set_nb_sync_mark32);
2653 enc.put_32(unsigned(nb_delta));
2654 }
2655 else
2656 {
2657 #ifdef BM64ADDR
2658 if (nb_delta < 0xFFFFFFFFFFFFUL)
2659 {
2660 enc.put_8(set_nb_sync_mark48);
2661 enc.put_48(nb_delta);
2662 }
2663 else
2664 {
2665 enc.put_8(set_nb_sync_mark64);
2666 enc.put_64(nb_delta);
2667 }
2668 #endif
2669 }
2670 bookm.ptr_ = 0;
2671 }
2672 }
2673
2674 if (!bookm.ptr_) // start new bookmark
2675 {
2676 // bookmarks use VBR to save offset
2677 bookm.nb_ = nb;
2678 bookm.ptr_ = enc.get_pos() + 1;
2679 switch (bookm.bm_type_)
2680 {
2681 case 0: // 32-bit mark
2682 enc.put_8(bm::set_nb_bookmark32);
2683 enc.put_32(0); // put NULL mark at first
2684 break;
2685 case 1: // 24-bit mark
2686 enc.put_8(bm::set_nb_bookmark24);
2687 enc.put_24(0);
2688 break;
2689 case 2: // 16-bit mark
2690 enc.put_8(bm::set_nb_bookmark16);
2691 enc.put_16(0);
2692 break;
2693 default:
2694 BM_ASSERT(0);
2695 break;
2696 } // switch
2697 }
2698}
2699
2700
2701template<class BV>
2704 unsigned char* buf, size_t buf_size)
2705{
2706 BM_ASSERT(temp_block_);
2707
2708 if (allow_stat_reset_)
2710 const blocks_manager_type& bman = bv.get_blocks_manager();
2711
2712 bm::encoder enc(buf, buf_size); // create the encoder
2713 enc_header_pos_ = 0;
2714 encode_header(bv, enc);
2715
2716 bookmark_state sb_bookmark(sb_range_);
2717
2718 unsigned i_last = ~0u;
2719
2720 block_idx_type i, j;
2721 for (i = 0; i < bm::set_total_blocks; ++i)
2722 {
2723 unsigned i0, j0;
2724 bm::get_block_coord(i, i0, j0);
2725
2726 if (i0 < bman.top_block_size())
2727 {
2728 // ----------------------------------------------------
2729 // bookmark the stream to allow fast skip on decode
2730 //
2731 if (sb_bookmarks_)
2732 {
2733 process_bookmark(i, sb_bookmark, enc);
2734 }
2735
2736 // ---------------------------------------------------
2737 // check if top level block is embarassingly sparse
2738 // and can be encoded as an array of unsigned
2739 //
2740 if ((compression_level_ >= 5) && (i0 != i_last))
2741 {
2742 i_last = i0;
2743 bool is_sparse_sub = bman.is_sparse_sblock(i0, sparse_cutoff_);
2744 if (is_sparse_sub)
2745 {
2746 header_flag_ |= BM_HM_SPARSE;
2747 bienc_arr_sblock(bv, i0, enc);
2748 i += (bm::set_sub_array_size - j0) - 1;
2749 continue;
2750 }
2751 }
2752 }
2753 else
2754 {
2755 break;
2756 }
2757
2758 const bm::word_t* blk = bman.get_block(i0, j0);
2759
2760 // ----------------------------------------------------
2761 // Empty or ONE block serialization
2762 //
2763 // TODO: make a function to check this in ONE pass
2764 //
2765 bool flag;
2766 flag = bm::check_block_zero(blk, false/*shallow check*/);
2767 if (flag)
2768 {
2769 zero_block:
2770 flag = 1;
2771 block_idx_type next_nb = bman.find_next_nz_block(i+1, false);
2772 if (next_nb == bm::set_total_blocks) // no more blocks
2773 {
2775 size_type sz = (size_type)enc.size();
2776
2777 // rewind back to save header flag
2778 enc.set_pos(enc_header_pos_);
2779 enc.put_8(header_flag_);
2780 return sz;
2781 }
2782 block_idx_type nb = next_nb - i;
2783
2784 if (nb > 1 && nb < 128)
2785 {
2786 // special (but frequent) case -- GAP delta fits in 7bits
2787 unsigned char c = (unsigned char)((1u << 7) | nb);
2788 enc.put_8(c);
2789 }
2790 else
2791 {
2797 }
2798 i = next_nb - 1;
2799 continue;
2800 }
2801 else
2802 {
2803 flag = bm::check_block_one(blk, false); // shallow scan
2804 if (flag)
2805 {
2806 full_block:
2807 flag = 1;
2808 // Look ahead for similar blocks
2809 for(j = i+1; j < bm::set_total_blocks; ++j)
2810 {
2811 bm::get_block_coord(j, i0, j0);
2812 if (!j0) // look ahead if the whole superblock is 0xFF
2813 {
2814 bm::word_t*** blk_root = bman.top_blocks_root();
2815 if ((bm::word_t*)blk_root[i0] == FULL_BLOCK_FAKE_ADDR)
2816 {
2817 j += 255;
2818 continue;
2819 }
2820 }
2821 const bm::word_t* blk_next = bman.get_block(i0, j0);
2822 if (flag != bm::check_block_one(blk_next, true)) // deep scan
2823 break;
2824 } // for j
2825
2826 // TODO: improve this condition, because last block is always
2827 // has 0 at the end, thus this never happen in practice
2828 if (j == bm::set_total_blocks)
2829 {
2830 enc.put_8(set_block_aone);
2831 break;
2832 }
2833 else
2834 {
2835 block_idx_type nb = j - i;
2841 }
2842 i = j - 1;
2843 continue;
2844 }
2845 }
2846
2847 if (ref_vect_) // XOR filter
2848 {
2849 // Similarity model must be attached with the ref.vectors
2850 // for XOR filter to work
2851 BM_ASSERT(sim_model_);
2852
2853 bool nb_indexed = sim_model_->bv_blocks.test(i);
2854 BM_ASSERT(nb_indexed); // Model is correctly computed from ref-vector
2855 if (nb_indexed)
2856 {
2857 // TODO: use rs-index or count blocks
2858 size_type rank = sim_model_->bv_blocks.count_range(0, i);
2859 BM_ASSERT(rank);
2860 --rank;
2861 const block_match_chain_type& mchain =
2862 sim_model_->matr.get(ref_idx_, rank);
2863 BM_ASSERT(mchain.nb == i);
2864 switch (mchain.match)
2865 {
2866 case e_no_xor_match:
2867 break;
2868 case e_xor_match_EQ:
2869 {
2870 BM_ASSERT(mchain.chain_size == 1);
2871 size_type ridx = mchain.ref_idx[0];
2872 size_type plain_idx = ref_vect_->get_row_idx(ridx);
2874 enc.put_32(unsigned(plain_idx));
2875 compression_stat_[bm::set_block_ref_eq]++;
2876 continue;
2877 }
2878 break;
2879 case e_xor_match_GC:
2880 case e_xor_match_BC:
2881 case e_xor_match_iBC:
2882 {
2883 BM_ASSERT(mchain.chain_size);
2884 xor_tmp_product(blk, mchain, i0, j0);
2885 // TODO: validate xor_tmp_block_
2886 if (mchain.chain_size == 1)
2887 {
2888 size_type ridx = mchain.ref_idx[0];
2889 bm::id64_t d64 = mchain.xor_d64[0];
2890 size_type plain_idx = ref_vect_->get_row_idx(ridx);
2891 if (d64 == ~0ull)
2892 {
2893 enc.put_8_16_32(unsigned(plain_idx),
2897 }
2898 else
2899 {
2900 enc.put_8_16_32(unsigned(plain_idx),
2904 enc.put_64(d64); // xor digest mask
2905 }
2906 compression_stat_[bm::set_block_xor_ref32]++;
2907 }
2908 else // chain
2909 {
2910 encode_xor_match_chain(enc, mchain);
2911 }
2912 blk = xor_tmp_block_; // substitute with XOR product
2913 }
2914 break;
2915 default:
2916 BM_ASSERT(0);
2917 } // switch xor_match
2918 }
2919 }
2920
2921 // --------------------------------------------------
2922 // GAP serialization
2923 //
2924 if (BM_IS_GAP(blk))
2925 {
2926 encode_gap_block(BMGAP_PTR(blk), enc);
2927 }
2928 else // bit-block
2929 {
2930 // ----------------------------------------------
2931 // BIT BLOCK serialization
2932 //
2933
2934 unsigned char model = find_bit_best_encoding(blk);
2935 switch (model)
2936 {
2937 case bm::set_block_bit:
2939 break;
2941 {
2942 unsigned bit_idx = 0;
2943 bm::bit_block_find(blk, bit_idx, &bit_idx);
2944 BM_ASSERT(bit_idx < 65536);
2945 enc.put_8(bm::set_block_bit_1bit); enc.put_16(bm::short_t(bit_idx));
2946 compression_stat_[bm::set_block_bit_1bit]++;
2947 continue;
2948 }
2949 break;
2950 case bm::set_block_azero: // empty block all of the sudden ?
2951 goto zero_block;
2952 case bm::set_block_aone:
2953 goto full_block;
2955 encode_bit_array(blk, enc, false);
2956 break;
2958 encode_bit_array(blk, enc, true);
2959 break;
2961 gamma_gap_bit_block(blk, enc);
2962 break;
2964 encode_bit_interval(blk, enc, 0); // TODO: get rid of param 3 (0)
2965 break;
2967 gamma_arr_bit_block(blk, enc, false);
2968 break;
2970 gamma_arr_bit_block(blk, enc, true);
2971 break;
2973 bienc_arr_bit_block(blk, enc, false);
2974 break;
2976 bienc_arr_bit_block(blk, enc, true);
2977 break;
2979 interpolated_arr_bit_block(blk, enc, false);
2980 break;
2982 interpolated_arr_bit_block(blk, enc, true); // inverted
2983 break;
2986 break;
2988 bienc_gap_bit_block(blk, enc);
2989 break;
2991 encode_bit_digest(blk, enc, digest0_);
2992 break;
2993 default:
2994 BM_ASSERT(0); // predictor returned an unknown model
2996 }
2997 } // bit-block processing
2998
2999 // destructive serialization mode
3000 //
3001 if (free_)
3002 {
3003 // const cast is ok, because it is a requested mode
3004 const_cast<blocks_manager_type&>(bman).zero_block(i);
3005 }
3006
3007 } // for i
3008
3009 enc.put_8(set_block_end);
3010 size_type sz = (size_type)enc.size();
3011
3012 // rewind back to save header flag
3013 enc.set_pos(enc_header_pos_);
3014 enc.put_8(header_flag_);
3015
3016 return sz;
3017}
3018
3019
3020
3021/// Bit mask flags for serialization algorithm
3022/// \ingroup bvserial
3024 BM_NO_BYTE_ORDER = 1, ///< save no byte-order info (save some space)
3025 BM_NO_GAP_LENGTH = (1 << 1) ///< save no GAP info (save some space)
3026};
3027
3028/*!
3029 \brief Saves bitvector into memory.
3030
3031 Function serializes content of the bitvector into memory.
3032 Serialization adaptively uses compression(variation of GAP encoding)
3033 when it is benefitial.
3034
3035 \param bv - source bvecor
3036 \param buf - pointer on target memory area. No range checking in the
3037 function. It is responsibility of programmer to allocate sufficient
3038 amount of memory using information from calc_stat function.
3039
3040 \param temp_block - pointer on temporary memory block. Cannot be 0;
3041 If you want to save memory across multiple bvectors
3042 allocate temporary block using allocate_tempblock and pass it to
3043 serialize.
3044 (Serialize does not deallocate temp_block.)
3045
3046 \param serialization_flags
3047 Flags controlling serilization (bit-mask)
3048 (use OR-ed serialization flags)
3049
3050 \ingroup bvserial
3051
3052 \return Size of serialization block.
3053 \sa calc_stat, serialization_flags
3054
3055*/
3056/*!
3057 Serialization format:
3058 <pre>
3059
3060 | HEADER | BLOCKS |
3061
3062 Header structure:
3063 BYTE : Serialization header (bit mask of BM_HM_*)
3064 BYTE : Byte order ( 0 - Big Endian, 1 - Little Endian)
3065 INT16: Reserved (0)
3066 INT16: Reserved Flags (0)
3067
3068 </pre>
3069*/
3070template<class BV>
3071size_t serialize(const BV& bv,
3072 unsigned char* buf,
3073 bm::word_t* temp_block = 0,
3074 unsigned serialization_flags = 0)
3075{
3076 bm::serializer<BV> bv_serial(bv.get_allocator(), temp_block);
3077
3079 bv_serial.byte_order_serialization(false);
3080
3082 bv_serial.gap_length_serialization(false);
3083 else
3084 bv_serial.gap_length_serialization(true);
3085
3086 return (size_t) bv_serial.serialize(bv, buf, 0);
3087}
3088
3089/*!
3090 @brief Saves bitvector into memory.
3091 Allocates temporary memory block for bvector.
3092
3093 \param bv - source bvecor
3094 \param buf - pointer on target memory area. No range checking in the
3095 function. It is responsibility of programmer to allocate sufficient
3096 amount of memory using information from calc_stat function.
3097
3098 \param serialization_flags
3099 Flags controlling serilization (bit-mask)
3100 (use OR-ed serialization flags)
3101
3102 \ingroup bvserial
3103*/
3104template<class BV>
3105size_t serialize(BV& bv,
3106 unsigned char* buf,
3107 unsigned serialization_flags=0)
3108{
3109 return bm::serialize(bv, buf, 0, serialization_flags);
3110}
3111
3112
3113
3114/*!
3115 @brief Bitvector deserialization from a memory BLOB
3116
3117 @param bv - target bvector
3118 @param buf - pointer on memory which keeps serialized bvector
3119 @param temp_block - pointer on temporary block,
3120 if NULL bvector allocates own.
3121 @param ref_vect - [in] optional pointer to a list of
3122 reference vectors used for
3123 XOR compression.
3124
3125 @return Number of bytes consumed by deserializer.
3126
3127 Function deserializes bitvector from memory block containig results
3128 of previous serialization. Function does not remove bits
3129 which are currently set. Effectively it is OR logical operation
3130 between the target bit-vector and serialized one.
3131
3132 @sa deserialize_range
3133
3134 @ingroup bvserial
3135*/
3136template<class BV>
3137size_t deserialize(BV& bv,
3138 const unsigned char* buf,
3139 bm::word_t* temp_block = 0,
3140 const bm::bv_ref_vector<BV>* ref_vect = 0)
3141{
3142 ByteOrder bo_current = globals<true>::byte_order();
3143
3144 bm::decoder dec(buf);
3145 unsigned char header_flag = dec.get_8();
3146 ByteOrder bo = bo_current;
3147 if (!(header_flag & BM_HM_NO_BO))
3148 {
3149 bo = (bm::ByteOrder) dec.get_8();
3150 }
3151
3152 if (bo_current == bo)
3153 {
3155 deserial.set_ref_vectors(ref_vect);
3156 return deserial.deserialize(bv, buf, temp_block);
3157 }
3158 switch (bo_current)
3159 {
3160 case BigEndian:
3161 {
3163 deserial.set_ref_vectors(ref_vect);
3164 return deserial.deserialize(bv, buf, temp_block);
3165 }
3166 case LittleEndian:
3167 {
3169 deserial.set_ref_vectors(ref_vect);
3170 return deserial.deserialize(bv, buf, temp_block);
3171 }
3172 default:
3173 BM_ASSERT(0);
3174 };
3175 return 0;
3176}
3177
3178
3179/*!
3180 @brief Bitvector range deserialization from a memory BLOB
3181
3182 @param bv - target bvector
3183 @param buf - pointer on memory which keeps serialized bvector
3184 @param from - bit-vector index to deserialize from
3185 @param to - bit-vector index to deserialize to
3186 @param ref_vect - [in] optional pointer to a list of
3187 reference vectors used for
3188 XOR compression.
3189
3190 Function deserializes bitvector from memory block containig results
3191 of previous serialization. Function does not remove bits
3192 which are currently set. Effectively it is OR logical operation
3193 between the target bit-vector and serialized one.
3194
3195 @sa deserialize
3196
3197 @ingroup bvserial
3198*/
3199template<class BV>
3201 const unsigned char* buf,
3202 typename BV::size_type from,
3203 typename BV::size_type to,
3204 const bm::bv_ref_vector<BV>* ref_vect = 0)
3205{
3206 ByteOrder bo_current = globals<true>::byte_order();
3207
3208 bm::decoder dec(buf);
3209 unsigned char header_flag = dec.get_8();
3210 ByteOrder bo = bo_current;
3211 if (!(header_flag & BM_HM_NO_BO))
3212 {
3213 bo = (bm::ByteOrder) dec.get_8();
3214 }
3215
3216 if (bo_current == bo)
3217 {
3219 deserial.set_ref_vectors(ref_vect);
3220 deserial.set_range(from, to);
3221 deserial.deserialize(bv, buf);
3222 }
3223 else
3224 {
3225 switch (bo_current)
3226 {
3227 case BigEndian:
3228 {
3230 deserial.set_ref_vectors(ref_vect);
3231 deserial.set_range(from, to);
3232 deserial.deserialize(bv, buf);
3233 }
3234 break;
3235 case LittleEndian:
3236 {
3238 deserial.set_ref_vectors(ref_vect);
3239 deserial.set_range(from, to);
3240 deserial.deserialize(bv, buf);
3241 }
3242 break;;
3243 default:
3244 BM_ASSERT(0);
3245 };
3246 }
3247 bv.keep_range(from, to);
3248}
3249
3250
3251template<typename DEC, typename BLOCK_IDX>
3254 unsigned block_type,
3255 bm::gap_word_t* dst_arr)
3256{
3257 bm::gap_word_t len = 0;
3258
3259 switch (block_type)
3260 {
3261 case set_block_bit_1bit:
3262 dst_arr[0] = decoder.get_16();
3263 len = 1;
3264 break;
3265 case set_block_arrgap:
3267 len = decoder.get_16();
3268 decoder.get_16(dst_arr, len);
3269 break;
3272 {
3273 bit_in_type bin(decoder);
3274 len = (gap_word_t)bin.gamma();
3275 gap_word_t prev = 0;
3276 for (gap_word_t k = 0; k < len; ++k)
3277 {
3278 gap_word_t bit_idx = (gap_word_t)bin.gamma();
3279 if (k == 0) --bit_idx; // TODO: optimization
3280 bit_idx = (gap_word_t)(bit_idx + prev);
3281 prev = bit_idx;
3282 dst_arr[k] = bit_idx;
3283 } // for
3284 }
3285 break;
3288 {
3289 bm::gap_word_t min_v = decoder.get_16();
3290 bm::gap_word_t max_v = decoder.get_16();
3291
3292 bit_in_type bin(decoder);
3293 len = bm::gap_word_t(bin.gamma() + 4);
3294 dst_arr[0] = min_v;
3295 dst_arr[len-1] = max_v;
3296 bin.bic_decode_u16(&dst_arr[1], len-2, min_v, max_v);
3297 }
3298 break;
3301 {
3302 bm::gap_word_t min_v, max_v;
3303 len = decoder.get_16();
3304 if (len & 1)
3305 min_v = decoder.get_8();
3306 else
3307 min_v = decoder.get_16();
3308
3309 if (len & (1<<1))
3310 max_v = decoder.get_8();
3311 else
3312 max_v = decoder.get_16();
3313 max_v = bm::gap_word_t(min_v + max_v);
3314
3315 len = bm::gap_word_t(len >> 2);
3316
3317 bit_in_type bin(decoder);
3318 dst_arr[0] = min_v;
3319 dst_arr[len-1] = max_v;
3320 bin.bic_decode_u16(&dst_arr[1], len-2, min_v, max_v);
3321 }
3322 break;
3323
3324 default: // TODO: get rid of exception here to classify as "noexept"
3325 BM_ASSERT(0);
3326 #ifndef BM_NO_STL
3327 throw std::logic_error(err_msg());
3328 #else
3329 BM_THROW(BM_ERR_SERIALFORMAT);
3330 #endif
3331 }
3332 return len;
3333}
3334
3335template<typename DEC, typename BLOCK_IDX>
3336void
3338 bm::word_t* blk,
3339 unsigned block_type) BMNOEXCEPT
3340{
3341 BM_ASSERT(!BM_IS_GAP(blk));
3342 bm::gap_word_t min_v, max_v, max_delta, arr_len;
3343
3344 switch (block_type)
3345 {
3347 min_v = dec.get_16();
3348 max_v = dec.get_16();
3349 break;
3351 min_v = dec.get_8();
3352 max_delta = dec.get_8();
3353 max_v = bm::gap_word_t(65536 - max_delta);
3354 break;
3355 default:
3356 BM_ASSERT(0);
3357 return;
3358 }
3359
3360 arr_len = dec.get_16();
3361
3362 bit_in_type bin(dec);
3363
3364 if (!IS_VALID_ADDR(blk))
3365 {
3366 bin.bic_decode_u16_dry(arr_len-2, min_v, max_v);
3367 return;
3368 }
3369 bm::set_bit(blk, min_v);
3370 bm::set_bit(blk, max_v);
3371 bin.bic_decode_u16_bitset(blk, arr_len-2, min_v, max_v);
3372}
3373
3374template<typename DEC, typename BLOCK_IDX>
3376 decoder_type& dec,
3377 unsigned block_type,
3378 unsigned* dst_arr,
3379 unsigned* sb_idx)
3380{
3381 unsigned len(0), sb_flag(0);
3382
3383 bit_in_type bin(dec);
3384
3385 switch (block_type)
3386 {
3388 {
3389 sb_flag = dec.get_8();
3390
3391 if (sb_flag & bm::sblock_flag_sb32)
3392 *sb_idx = dec.get_32();
3393 else if (sb_flag & bm::sblock_flag_sb16)
3394 *sb_idx = dec.get_16();
3395 else
3396 *sb_idx = dec.get_8();
3397
3398 if (sb_flag & bm::sblock_flag_len16)
3399 len = dec.get_16();
3400 else
3401 len = dec.get_8();
3402
3403 bm::word_t min_v;
3404 if (sb_flag & bm::sblock_flag_min24)
3405 if (sb_flag & bm::sblock_flag_min16) // 24 and 16
3406 min_v = dec.get_32();
3407 else
3408 min_v = dec.get_24(); // 24 but not 16
3409 else if (sb_flag & bm::sblock_flag_min16)
3410 min_v = dec.get_16();
3411 else
3412 min_v = dec.get_8();
3413
3414 bm::word_t max_v;// = dec.get_32();
3415 if (sb_flag & bm::sblock_flag_max24)
3416 if (sb_flag & bm::sblock_flag_max16)
3417 max_v = dec.get_32(); // 24 and 16
3418 else
3419 max_v = dec.get_24(); // 24 but not 16
3420 else if (sb_flag & bm::sblock_flag_max16)
3421 max_v = dec.get_16();
3422 else
3423 max_v = dec.get_8();
3424 max_v = bm::set_sub_total_bits - max_v;
3425
3426 dst_arr[0] = min_v;
3427 dst_arr[len-1] = max_v;
3428 bin.bic_decode_u32_cm(&dst_arr[1], len-2, min_v, max_v);
3429 }
3430 break;
3431 default: // TODO: get rid of exception here to classify as "noexept"
3432 BM_ASSERT(0);
3433 #ifndef BM_NO_STL
3434 throw std::logic_error(err_msg());
3435 #else
3436 BM_THROW(BM_ERR_SERIALFORMAT);
3437 #endif
3438 } // switch
3439 return len;
3440}
3441
3442
3443template<typename DEC, typename BLOCK_IDX>
3444void
3453
3454template<typename DEC, typename BLOCK_IDX>
3457{
3458 BM_ASSERT(!BM_IS_GAP(blk));
3459
3460 bm::gap_word_t head = dec.get_8();
3461 unsigned arr_len = dec.get_16();
3462 bm::gap_word_t min_v = dec.get_16();
3463
3464
3465 id_array_[0] = head;
3466 id_array_[1] = min_v;
3467 id_array_[arr_len] = 65535;
3468
3469 bit_in_type bin(dec);
3470 bin.bic_decode_u16(&id_array_[2], arr_len-2, min_v, 65535);
3471
3472 if (!IS_VALID_ADDR(blk))
3473 return;
3474 bm::gap_add_to_bitset(blk, id_array_, arr_len);
3475}
3476
3477template<typename DEC, typename BLOCK_IDX>
3479 decoder_type& dec,
3480 bm::word_t* block) BMNOEXCEPT
3481{
3482 bm::id64_t d0 = dec.get_64();
3483 while (d0)
3484 {
3485 bm::id64_t t = bm::bmi_blsi_u64(d0); // d & -d;
3486
3487 unsigned wave = bm::word_bitcount64(t - 1);
3488 unsigned off = wave * bm::set_block_digest_wave_size;
3489 unsigned j = 0;
3490 if (!IS_VALID_ADDR(block))
3491 {
3492 do
3493 {
3494 dec.get_32();
3495 dec.get_32();
3496 dec.get_32();
3497 dec.get_32();
3498 j += 4;
3499 } while (j < bm::set_block_digest_wave_size);
3500 }
3501 else
3502 {
3503 do
3504 {
3505 block[off+j+0] |= dec.get_32();
3506 block[off+j+1] |= dec.get_32();
3507 block[off+j+2] |= dec.get_32();
3508 block[off+j+3] |= dec.get_32();
3509 j += 4;
3510 } while (j < bm::set_block_digest_wave_size);
3511 }
3512
3513 d0 = bm::bmi_bslr_u64(d0); // d &= d - 1;
3514 } // while
3515}
3516
3517template<typename DEC, typename BLOCK_IDX>
3519 decoder_type& dec,
3521{
3522 //TODO: optimization if block exists and it is OR-ed read
3523 bm::bit_block_set(blk, 0);
3524
3525 unsigned char run_type = dec.get_8();
3526 for (unsigned j = 0; j < bm::set_block_size; run_type = !run_type)
3527 {
3528 unsigned run_length = dec.get_16();
3529 if (run_type)
3530 {
3531 unsigned run_end = j + run_length;
3532 BM_ASSERT(run_end <= bm::set_block_size);
3533 for (;j < run_end; ++j)
3534 {
3535 unsigned w = dec.get_32();
3536 blk[j] = w;
3537 }
3538 }
3539 else
3540 {
3541 j += run_length;
3542 }
3543 } // for j
3544}
3545
3546
3547template<typename DEC, typename BLOCK_IDX>
3548void
3550 unsigned block_type,
3551 bm::gap_word_t* dst_block,
3552 bm::gap_word_t& gap_head)
3553{
3554// typedef bit_in<DEC> bit_in_type;
3555 switch (block_type)
3556 {
3557 case set_block_gap:
3558 {
3559 unsigned len = gap_length(&gap_head);
3560 --len;
3561 *dst_block = gap_head;
3562 decoder.get_16(dst_block+1, len - 1);
3563 dst_block[len] = gap_max_bits - 1;
3564 }
3565 break;
3566 case set_block_bit_1bit:
3567 {
3568 gap_set_all(dst_block, bm::gap_max_bits, 0);
3569 gap_word_t bit_idx = decoder.get_16();
3570 gap_add_value(dst_block, bit_idx);
3571 }
3572 break;
3573 case set_block_arrgap:
3575 {
3576 gap_set_all(dst_block, bm::gap_max_bits, 0);
3577 gap_word_t len = decoder.get_16();
3578 for (gap_word_t k = 0; k < len; ++k)
3579 {
3580 gap_word_t bit_idx = decoder.get_16();
3581 bm::gap_add_value(dst_block, bit_idx);
3582 } // for
3583 }
3584 break;
3591 {
3592 unsigned arr_len = read_id_list(decoder, block_type, id_array_);
3593 dst_block[0] = 0;
3594 unsigned gap_len =
3595 gap_set_array(dst_block, id_array_, arr_len);
3596 BM_ASSERT(gap_len == bm::gap_length(dst_block));
3597 (void)(gap_len);
3598 }
3599 break;
3601 {
3602 unsigned len = (gap_head >> 3);
3603 --len;
3604 // read gamma GAP block into a dest block
3605 *dst_block = gap_head;
3606 gap_word_t* gap_data_ptr = dst_block + 1;
3607
3608 bit_in_type bin(decoder);
3609 gap_word_t v = (gap_word_t)bin.gamma();
3610 gap_word_t gap_sum = *gap_data_ptr = (gap_word_t)(v - 1);
3611 for (unsigned i = 1; i < len; ++i)
3612 {
3613 v = (gap_word_t)bin.gamma();
3614 gap_sum = (gap_word_t)(gap_sum + v);
3615 *(++gap_data_ptr) = gap_sum;
3616 }
3617 dst_block[len+1] = bm::gap_max_bits - 1;
3618 }
3619 break;
3621 {
3622 unsigned len = (gap_head >> 3);
3623 *dst_block = gap_head;
3624 bm::gap_word_t min_v = decoder.get_16();
3625 dst_block[1] = min_v;
3626 bit_in_type bin(decoder);
3627 bin.bic_decode_u16(&dst_block[2], len-2, min_v, 65535);
3628 dst_block[len] = bm::gap_max_bits - 1;
3629 }
3630 break;
3632 {
3633 bm::gap_word_t min_v, max_v;
3634 unsigned len = (gap_head >> 3);
3635 bm::gap_word_t min8 = gap_head & (1 << 1);
3636 bm::gap_word_t tail8 = gap_head & (1 << 2);
3637 gap_head &= bm::gap_word_t(~(3 << 1)); // clear the flags
3638 if (min8)
3639 min_v = decoder.get_8();
3640 else
3641 min_v = decoder.get_16();
3642 if (tail8)
3643 max_v = decoder.get_8();
3644 else
3645 max_v = decoder.get_16();
3646 max_v = bm::gap_word_t(65535 - max_v); // tail correction
3647
3648 dst_block[0] = gap_head;
3649 dst_block[1] = min_v;
3650 bit_in_type bin(decoder);
3651 bin.bic_decode_u16(&dst_block[2], len-3, min_v, max_v);
3652 dst_block[len-1] = max_v;
3653 dst_block[len] = bm::gap_max_bits - 1;
3654 }
3655 break;
3656 default:
3657 BM_ASSERT(0);
3658 #ifndef BM_NO_STL
3659 throw std::logic_error(err_msg());
3660 #else
3661 BM_THROW(BM_ERR_SERIALFORMAT);
3662 #endif
3663 }
3664
3665 if (block_type == set_block_arrgap_egamma_inv ||
3666 block_type == set_block_arrgap_inv ||
3667 block_type == set_block_arrgap_bienc_inv ||
3668 block_type == set_block_arrgap_bienc_inv_v2)
3669 {
3670 gap_invert(dst_block);
3671 }
3672}
3673
3674template<typename DEC, typename BLOCK_IDX>
3678 block_idx_type nb,
3679 block_idx_type expect_nb) BMNOEXCEPT
3680{
3681 if (skip_offset_) // skip bookmark is available
3682 {
3683 const unsigned char* save_pos = decoder.get_pos(); // save position
3684
3685 if (save_pos > skip_pos_) // checkpoint passed
3686 {
3687 skip_offset_ = 0;
3688 return false;
3689 }
3691
3692 unsigned char sync_mark = decoder.get_8();
3693 block_idx_type nb_sync;
3694 switch (sync_mark)
3695 {
3696 case set_nb_sync_mark8:
3697 nb_sync = decoder.get_8();
3698 break;
3699 case set_nb_sync_mark16:
3700 nb_sync = decoder.get_16();
3701 break;
3702 case set_nb_sync_mark24:
3703 nb_sync = decoder.get_24();
3704 break;
3705 case set_nb_sync_mark32:
3706 nb_sync = decoder.get_32();
3707 break;
3708 case set_nb_sync_mark48:
3709 nb_sync = block_idx_type(decoder.get_48());
3710 #ifndef BM64ADDR
3711 BM_ASSERT(0);
3712 decoder.set_pos(save_pos);
3713 skip_offset_ = 0;
3714 return 0; // invalid bookmark from 64-bit serialization
3715 #endif
3716 break;
3717 case set_nb_sync_mark64:
3718 nb_sync = block_idx_type(decoder.get_64());
3719 #ifndef BM64ADDR
3720 BM_ASSERT(0);
3721 decoder.set_pos(save_pos);
3722 skip_offset_ = 0;
3723 return 0; // invalid bookmark from 64-bit serialization
3724 #endif
3725 break;
3726 default:
3727 BM_ASSERT(0);
3728 decoder.set_pos(save_pos);
3729 skip_offset_ = 0;
3730 return 0;
3731 } // switch
3732
3733 nb_sync += nb;
3734 if (nb_sync <= expect_nb) // within reach
3735 {
3736 skip_offset_ = 0;
3737 return nb_sync;
3738 }
3739 skip_offset_ = 0;
3740 decoder.set_pos(save_pos);
3741 }
3742 return 0;
3743}
3744
3745
3746// -------------------------------------------------------------------------
3747
3748template<class BV, class DEC>
3750: ref_vect_(0),
3751 xor_block_(0),
3752 or_block_(0),
3753 or_block_idx_(0),
3754 is_range_set_(0)
3755{
3756 temp_block_ = alloc_.alloc_bit_block();
3757
3759 this->id_array_ = bit_idx_arr_.data();
3760
3762 this->sb_id_array_ = sb_bit_idx_arr_.data();
3763
3765
3766 alloc_.set_pool(&pool_);
3767}
3768
3769template<class BV, class DEC>
3771{
3772 alloc_.free_bit_block(temp_block_);
3773 if (xor_block_)
3774 alloc_.free_bit_block(xor_block_, 2);
3776}
3777
3778template<class BV, class DEC>
3780{
3781 ref_vect_ = ref_vect;
3782 if (ref_vect_ && !xor_block_)
3783 xor_block_ = alloc_.alloc_bit_block(2);
3784}
3785
3786template<class BV, class DEC>
3787void
3790 block_idx_type nb,
3791 bm::word_t* blk)
3792{
3793 gap_word_t gap_head;
3794 bm::gap_word_t* gap_temp_block = gap_temp_block_.data();
3795
3796 bool inv_flag = false;
3797 unsigned i0, j0;
3798 bm::get_block_coord(nb, i0, j0);
3799 bman.reserve_top_blocks(i0+1);
3800 bman.check_alloc_top_subblock(i0);
3801
3802 switch (btype)
3803 {
3804 case set_block_gap:
3806 case set_block_gapbit:
3807 {
3808 gap_head = (gap_word_t)
3809 (sizeof(gap_word_t) == 2 ? dec.get_16() : dec.get_32());
3810
3811 unsigned len = gap_length(&gap_head);
3812 int level = gap_calc_level(len, bman.glen());
3813 --len;
3814 if (level == -1) // Too big to be GAP: convert to BIT block
3815 {
3816 *gap_temp_block = gap_head;
3817 dec.get_16(gap_temp_block+1, len - 1);
3818 gap_temp_block[len] = gap_max_bits - 1;
3819
3820 if (blk == 0) // block does not exist yet
3821 {
3822 blk = bman.get_allocator().alloc_bit_block();
3823 bman.set_block(nb, blk);
3824 gap_convert_to_bitset(blk, gap_temp_block);
3825 }
3826 else // We have some data already here. Apply OR operation.
3827 {
3829 gap_temp_block);
3830 bv.combine_operation_block_or(i0, j0, blk, temp_block_);
3831 }
3832 return;
3833 } // level == -1
3834
3835 set_gap_level(&gap_head, level);
3836
3837 if (blk == 0)
3838 {
3839 BM_ASSERT(level >= 0);
3840 gap_word_t* gap_blk =
3841 bman.get_allocator().alloc_gap_block(unsigned(level), bman.glen());
3842 gap_word_t* gap_blk_ptr = BMGAP_PTR(gap_blk);
3843 *gap_blk_ptr = gap_head;
3844 bm::set_gap_level(gap_blk_ptr, level);
3845 blk = bman.set_block(nb, (bm::word_t*)BMPTR_SETBIT0(gap_blk));
3846 BM_ASSERT(blk == 0);
3847
3848 if (len > 1)
3849 dec.get_16(gap_blk + 1, len - 1);
3850 gap_blk[len] = bm::gap_max_bits - 1;
3851 }
3852 else // target block exists
3853 {
3854 // read GAP block into a temp memory and perform OR
3855 *gap_temp_block = gap_head;
3856 dec.get_16(gap_temp_block + 1, len - 1);
3857 gap_temp_block[len] = bm::gap_max_bits - 1;
3858 break;
3859 }
3860 return;
3861 }
3863 inv_flag = true;
3865 case set_block_arrgap:
3872 {
3873 unsigned arr_len = this->read_id_list(dec, btype, this->id_array_);
3874 gap_temp_block[0] = 0; // reset unused bits in gap header
3875 unsigned gap_len =
3876 bm::gap_set_array(gap_temp_block, this->id_array_, arr_len);
3877 if (inv_flag)
3878 bm::gap_invert(gap_temp_block);
3879
3880 BM_ASSERT(gap_len == bm::gap_length(gap_temp_block));
3881 int level = bm::gap_calc_level(gap_len, bman.glen());
3882 if (level == -1) // Too big to be GAP: convert to BIT block
3883 {
3884 bm::gap_convert_to_bitset(temp_block_, gap_temp_block);
3885 bv.combine_operation_block_or(i0, j0, blk, temp_block_);
3886 return;
3887 }
3888 break;
3889 }
3891 gap_head = dec.get_16();
3898 this->read_gap_block(dec, btype, gap_temp_block, gap_head);
3899 break;
3903 gap_head = dec.get_16();
3904 this->read_gap_block(dec, btype, gap_temp_block, gap_head);
3905 break;
3906 default:
3907 BM_ASSERT(0);
3908 #ifndef BM_NO_STL
3909 throw std::logic_error(this->err_msg());
3910 #else
3911 BM_THROW(BM_ERR_SERIALFORMAT);
3912 #endif
3913 }
3914
3915 if (x_ref_d64_) // this is a delayed bit-XOR to be played here
3916 { // deoptimize the operation it will turn to bit block anyways
3917 if (!blk)
3918 {
3919 blk = bman.get_allocator().alloc_bit_block();
3920 bm::gap_convert_to_bitset(blk, gap_temp_block);
3921 bman.set_block_ptr(i0, j0, blk);
3922 }
3923 else
3924 {
3925 bm::gap_convert_to_bitset(temp_block_, gap_temp_block);
3926 bv.combine_operation_block_or(i0, j0, blk, temp_block_);
3927 }
3928 }
3929 else
3930 {
3931 bm::word_t* tmp_blk = (bm::word_t*)gap_temp_block;
3932 BMSET_PTRGAP(tmp_blk);
3933 bv.combine_operation_block_or(i0, j0, blk, tmp_blk);
3934 }
3935}
3936
3937template<class BV, class DEC>
3939 decoder_type& dec,
3940 blocks_manager_type& bman,
3941 block_idx_type nb,
3942 bm::word_t* blk)
3943{
3944 if (!blk)
3945 {
3946 blk = bman.get_allocator().alloc_bit_block();
3947 bman.set_block(nb, blk);
3948 bm::bit_block_set(blk, 0);
3949 }
3950 else
3951 if (BM_IS_GAP(blk))
3952 blk = bman.deoptimize_block(nb);
3953
3954 BM_ASSERT(blk != temp_block_);
3955
3956 switch (btype)
3957 {
3959 if (IS_FULL_BLOCK(blk))
3960 blk = bman.deoptimize_block(nb);
3962 {
3963 gap_word_t len = dec.get_16();
3964 for (unsigned k = 0; k < len; ++k)
3965 {
3966 gap_word_t bit_idx = dec.get_16();
3967 bm::clear_bit(temp_block_, bit_idx);
3968 } // for
3969 }
3971 break;
3974 this->read_bic_arr(dec, blk, btype);
3975 break;
3977 BM_ASSERT(blk != temp_block_);
3978 if (IS_FULL_BLOCK(blk))
3979 blk = bman.deoptimize_block(nb);
3980 // TODO: optimization
3985 break;
3987 this->read_bic_gap(dec, blk);
3988 break;
3990 this->read_digest0_block(dec, blk);
3991 break;
3992 default:
3993 BM_ASSERT(0);
3994 #ifndef BM_NO_STL
3995 throw std::logic_error(this->err_msg());
3996 #else
3997 BM_THROW(BM_ERR_SERIALFORMAT);
3998 #endif
3999 } // switch
4000}
4001
4002template<class BV, class DEC>
4004 decoder_type& dec,
4005 bvector_type& bv)
4006{
4007 unsigned sb;
4008 unsigned* arr = this->sb_id_array_;
4009 unsigned len = this->read_bic_sb_arr(dec, btype, arr, &sb);
4010 const typename BV::size_type sb_max_bc = bm::set_sub_array_size * bm::gap_max_bits;
4011 typename BV::size_type from = sb * sb_max_bc;
4012
4013 if (is_range_set_)
4014 {
4015 for (typename BV::size_type i = 0; i < len; ++i)
4016 {
4017 typename BV::size_type idx = from + arr[i];
4018 if (idx > idx_to_)
4019 break;
4020 if (idx < idx_from_)
4021 continue;
4022 bv.set_bit_no_check(idx);
4023 } // for
4024 }
4025 else // range restriction is not set
4026 {
4027 for (typename BV::size_type i = 0; i < len; ++i)
4028 {
4029 typename BV::size_type idx = from + arr[i];
4030 bv.set_bit_no_check(idx);
4031 } // for
4032 }
4033}
4034
4035
4036template<class BV, class DEC>
4038 bvector_type& bv,
4039 block_idx_type nb,
4040 bm::word_t* blk)
4041{
4042 if (!blk)
4043 {
4044 blocks_manager_type& bman = bv.get_blocks_manager();
4045 blk = bman.get_allocator().alloc_bit_block();
4046 bman.set_block(nb, blk);
4047 dec.get_32(blk, bm::set_block_size);
4048 }
4049 else
4050 {
4051 dec.get_32(temp_block_, bm::set_block_size);
4052 bv.combine_operation_with_block(nb, temp_block_, 0, BM_OR);
4053 }
4054}
4055
4056template<class BV, class DEC>
4058 bvector_type& bv,
4059 block_idx_type nb,
4060 bm::word_t* blk)
4061{
4062 unsigned head_idx = dec.get_16();
4063 unsigned tail_idx = dec.get_16();
4064 if (!blk)
4065 {
4066 blocks_manager_type& bman = bv.get_blocks_manager();
4067 blk = bman.get_allocator().alloc_bit_block();
4068 bman.set_block(nb, blk);
4069 for (unsigned k = 0; k < head_idx; ++k)
4070 blk[k] = 0;
4071 dec.get_32(blk + head_idx, tail_idx - head_idx + 1);
4072 for (unsigned j = tail_idx + 1; j < bm::set_block_size; ++j)
4073 blk[j] = 0;
4074 }
4075 else
4076 {
4078 dec.get_32(temp_block_ + head_idx, tail_idx - head_idx + 1);
4079 bv.combine_operation_with_block(nb, temp_block_, 0, BM_OR);
4080 }
4081}
4082
4083template<class BV, class DEC>
4085 bvector_type& bv,
4086 block_idx_type nb,
4087 bm::word_t* blk)
4088{
4089 blocks_manager_type& bman = bv.get_blocks_manager();
4090 bm::gap_word_t len = dec.get_16();
4091 if (BM_IS_GAP(blk))
4092 {
4093 blk = bman.deoptimize_block(nb);
4094 }
4095 else
4096 {
4097 if (!blk) // block does not exists yet
4098 {
4099 blk = bman.get_allocator().alloc_bit_block();
4100 bm::bit_block_set(blk, 0);
4101 bman.set_block(nb, blk);
4102 }
4103 else
4104 if (IS_FULL_BLOCK(blk)) // nothing to do
4105 {
4106 for (unsigned k = 0; k < len; ++k) // dry read
4107 dec.get_16();
4108 return;
4109 }
4110 }
4111 // Get the array one by one and set the bits.
4112 for (unsigned k = 0; k < len; ++k)
4113 {
4114 gap_word_t bit_idx = dec.get_16();
4115 bm::set_bit(blk, bit_idx);
4116 }
4117}
4118
4119
4120
4121template<class BV, class DEC>
4123 const unsigned char* buf,
4124 bm::word_t* /*temp_block*/)
4125{
4126 blocks_manager_type& bman = bv.get_blocks_manager();
4127 if (!bman.is_init())
4128 {
4129 auto bc = bman.compute_top_block_size(bm::id_max-1);
4130 bman.init_tree(bc);
4131 }
4132
4133 bm::word_t* temp_block = temp_block_;
4134
4135 bm::strategy strat = bv.get_new_blocks_strat();
4136 //bv.set_new_blocks_strat(BM_GAP);
4137 typename bvector_type::mem_pool_guard mp_guard_bv;
4138 mp_guard_bv.assign_if_not_set(pool_, bv);
4139
4140
4141 decoder_type dec(buf);
4142
4143 // Reading th serialization header
4144 //
4145 unsigned char header_flag = dec.get_8();
4146 if (!(header_flag & BM_HM_NO_BO))
4147 {
4148 /*ByteOrder bo = (bm::ByteOrder)*/dec.get_8();
4149 }
4150 if (header_flag & BM_HM_64_BIT)
4151 {
4152 #ifndef BM64ADDR
4153 BM_ASSERT(0); // 64-bit address vector cannot read on 32
4154 #ifndef BM_NO_STL
4155 throw std::logic_error(this->err_msg());
4156 #else
4157 BM_THROW(BM_ERR_SERIALFORMAT);
4158 #endif
4159 #endif
4160 }
4161
4162 if (header_flag & BM_HM_ID_LIST)
4163 {
4164 // special case: the next comes plain list of integers
4165 if (header_flag & BM_HM_RESIZE)
4166 {
4167 block_idx_type bv_size;
4168 if (header_flag & BM_HM_64_BIT)
4169 {
4170 BM_ASSERT(sizeof(block_idx_type)==8);
4171 bv_size = (block_idx_type)dec.get_64();
4172 }
4173 else
4174 bv_size = dec.get_32();
4175 if (bv_size > bv.size())
4176 bv.resize(bv_size);
4177 }
4178 for (unsigned cnt = dec.get_32(); cnt; --cnt)
4179 {
4180 bm::id_t idx = dec.get_32();
4181 bv.set(idx);
4182 } // for
4183 // -1 for compatibility with other deserialization branches
4184 return dec.size()-1;
4185 }
4186
4187 if (!(header_flag & BM_HM_NO_GAPL))
4188 {
4189 for (unsigned k = 0; k < bm::gap_levels; ++k) // read GAP levels
4190 {
4191 /*glevels[i] =*/ dec.get_16();
4192 }
4193 }
4194 if (header_flag & BM_HM_RESIZE)
4195 {
4196 block_idx_type bv_size;
4197 if (header_flag & BM_HM_64_BIT)
4198 {
4199 // 64-bit vector cannot be deserialized into 32-bit
4200 BM_ASSERT(sizeof(block_idx_type)==8);
4201 bv_size = (block_idx_type)dec.get_64();
4202 #ifndef BM64ADDR
4203 #ifndef BM_NO_STL
4204 throw std::logic_error(this->err_msg());
4205 #else
4206 BM_THROW(BM_ERR_SERIALFORMAT);
4207 #endif
4208 #endif
4209 }
4210 else
4211 bv_size = dec.get_32();
4212 if (bv_size > bv.size())
4213 bv.resize(bv_size);
4214 }
4215
4216 if (header_flag & BM_HM_HXOR) // XOR compression mode
4217 {
4218 if (!xor_block_)
4219 xor_block_ = alloc_.alloc_bit_block();
4220 }
4221
4222 bv.set_new_blocks_strat(bm::BM_GAP); // minimize deserialization footprint
4223
4224 size_type full_blocks = 0;
4225
4226 // reference XOR compression FSM fields
4227 //
4228 xor_reset();
4229
4230 unsigned row_idx(0);
4231
4232 block_idx_type nb_sync;
4233
4234 unsigned char btype;
4235 block_idx_type nb;
4236 unsigned i0, j0;
4237
4238 block_idx_type nb_i = 0;
4239 do
4240 {
4241 if (is_range_set_)
4242 {
4244 if (nb_i > nb_to)
4245 break; // early exit (out of target range)
4246 }
4247
4248 btype = dec.get_8();
4249 if (btype & (1 << 7)) // check if short zero-run packed here
4250 {
4251 nb = btype & ~(1 << 7);
4252 BM_ASSERT(nb);
4253 nb_i += nb;
4254 continue;
4255 }
4256 bm::get_block_coord(nb_i, i0, j0);
4257 bm::word_t* blk = bman.get_block_ptr(i0, j0);
4258
4259 switch (btype)
4260 {
4261 case set_block_azero:
4262 case set_block_end:
4263 nb_i = bm::set_total_blocks;
4264 break;
4265 case set_block_1zero:
4266 break;
4267 case set_block_8zero:
4268 nb = dec.get_8();
4269 BM_ASSERT(nb);
4270 nb_i += nb;
4271 continue; // bypass ++nb_i;
4272 case set_block_16zero:
4273 nb = dec.get_16();
4274 BM_ASSERT(nb);
4275 nb_i += nb;
4276 continue; // bypass ++nb_i;
4277 case set_block_32zero:
4278 nb = dec.get_32();
4279 BM_ASSERT(nb);
4280 nb_i += nb;
4281 continue; // bypass ++nb_i;
4282 case set_block_64zero:
4283 #ifdef BM64ADDR
4284 nb = dec.get_64();
4285 BM_ASSERT(nb);
4286 nb_i += nb;
4287 continue; // bypass ++nb_i;
4288 #else
4289 BM_ASSERT(0); // attempt to read 64-bit vector in 32-bit mode
4290 #ifndef BM_NO_STL
4291 throw std::logic_error(this->err_msg());
4292 #else
4293 BM_THROW(BM_ERR_SERIALFORMAT);
4294 #endif
4295 #endif
4296 break;
4297 case set_block_aone:
4298 bman.set_all_set(nb_i, bm::set_total_blocks-1);
4299 nb_i = bm::set_total_blocks;
4300 break;
4301 case set_block_1one:
4302 bman.set_block_all_set(nb_i);
4303 break;
4304 case set_block_8one:
4305 full_blocks = dec.get_8();
4306 goto process_full_blocks;
4307 break;
4308 case set_block_16one:
4309 full_blocks = dec.get_16();
4310 goto process_full_blocks;
4311 break;
4312 case set_block_32one:
4313 full_blocks = dec.get_32();
4314 goto process_full_blocks;
4315 break;
4316 case set_block_64one:
4317 #ifdef BM64ADDR
4318 full_blocks = dec.get_64();
4319 goto process_full_blocks;
4320 #else
4321 BM_ASSERT(0); // 32-bit vector cannot read 64-bit
4322 dec.get_64();
4323 #ifndef BM_NO_STL
4324 throw std::logic_error(this->err_msg());
4325 #else
4326 BM_THROW(BM_ERR_SERIALFORMAT);
4327 #endif
4328 #endif
4329 process_full_blocks:
4330 {
4331 BM_ASSERT(full_blocks);
4332 size_type from = nb_i * bm::gap_max_bits;
4333 size_type to = from + full_blocks * bm::gap_max_bits;
4334 bv.set_range(from, to-1);
4335 nb_i += full_blocks-1;
4336 }
4337 break;
4338 case set_block_bit:
4339 decode_block_bit(dec, bv, nb_i, blk);
4340 break;
4341 case set_block_bit_1bit:
4342 {
4343 size_type bit_idx = dec.get_16();
4344 bit_idx += nb_i * bm::bits_in_block;
4345 bv.set_bit_no_check(bit_idx);
4346 break;
4347 }
4349 {
4350 //TODO: optimization if block exists
4351 this->read_0runs_block(dec, temp_block);
4352 bv.combine_operation_with_block(nb_i, temp_block, 0, BM_OR);
4353 break;
4354 }
4356 decode_block_bit_interval(dec, bv, nb_i, blk);
4357 break;
4358 case set_block_gap:
4359 case set_block_gapbit:
4360 case set_block_arrgap:
4371 deserialize_gap(btype, dec, bv, bman, nb_i, blk);
4372 break;
4373 case set_block_arrbit:
4374 decode_arrbit(dec, bv, nb_i, blk);
4375 break;
4381 decode_bit_block(btype, dec, bman, nb_i, blk);
4382 break;
4384 decode_bit_block(btype, dec, bman, nb_i, blk);
4385 break;
4386
4387 // --------------------------------------- super-block encodings
4389 decode_arr_sblock(btype, dec, bv);
4390 nb_i += (bm::set_sub_array_size - j0);
4391 continue; // bypass ++i;
4392
4393 // --------------------------------------- bookmarks and skip jumps
4394 //
4395 case set_nb_bookmark32:
4396 this->bookmark_idx_ = nb_i;
4397 this->skip_offset_ = dec.get_32();
4398 goto process_bookmark;
4399 case set_nb_bookmark24:
4400 this->bookmark_idx_ = nb_i;
4401 this->skip_offset_ = dec.get_24();
4402 goto process_bookmark;
4403 case set_nb_bookmark16:
4404 this->bookmark_idx_ = nb_i;
4405 this->skip_offset_ = dec.get_16();
4406 process_bookmark:
4407 if (is_range_set_)
4408 {
4409 this->skip_pos_ = dec.get_pos() + this->skip_offset_;
4411 nb_from = this->try_skip(dec, nb_i, nb_from);
4412 if (nb_from)
4413 nb_i = nb_from;
4414 }
4415 continue; // bypass ++i;
4416
4417 case set_nb_sync_mark8:
4418 nb_sync = dec.get_8();
4419 goto process_nb_sync;
4420 case set_nb_sync_mark16:
4421 nb_sync = dec.get_16();
4422 goto process_nb_sync;
4423 case set_nb_sync_mark24:
4424 nb_sync = dec.get_24();
4425 goto process_nb_sync;
4426 case set_nb_sync_mark32:
4427 nb_sync = dec.get_32();
4428 goto process_nb_sync;
4429 case set_nb_sync_mark48:
4430 nb_sync = block_idx_type(dec.get_48());
4431 goto process_nb_sync;
4432 case set_nb_sync_mark64:
4433 nb_sync = block_idx_type(dec.get_64());
4434 process_nb_sync:
4435 BM_ASSERT(nb_i == this->bookmark_idx_ + nb_sync);
4436 if (nb_i != this->bookmark_idx_ + nb_sync)
4437 {
4438 #ifndef BM_NO_STL
4439 throw std::logic_error(this->err_msg());
4440 #else
4441 BM_THROW(BM_ERR_SERIALFORMAT);
4442 #endif
4443 }
4444
4445 continue; // bypass ++i;
4446
4447 // ------------------------------------ XOR compression markers
4449 {
4450 BM_ASSERT(ref_vect_); // reference vector has to be attached
4451 if (x_ref_d64_) // previous delayed XOR post proc.
4452 xor_decode(bman);
4453
4454 row_idx = dec.get_32();
4455 size_type idx = ref_vect_->find(row_idx);
4456 if (idx == ref_vect_->not_found())
4457 {
4458 BM_ASSERT(0);
4459 goto throw_err;
4460 }
4461 const bvector_type* ref_bv = ref_vect_->get_bv(idx);
4462 BM_ASSERT(ref_bv); // some incorrect work with the ref.vector
4463 BM_ASSERT(ref_bv != &bv);
4464 const blocks_manager_type& ref_bman = ref_bv->get_blocks_manager();
4465 const bm::word_t* ref_blk = ref_bman.get_block_ptr(i0, j0);
4466 if (ref_blk)
4467 bv.combine_operation_with_block(nb_i, ref_blk,
4468 BM_IS_GAP(ref_blk), BM_OR);
4469 break;
4470 }
4473 if (x_ref_d64_) // previous delayed XOR post proc.
4474 xor_decode(bman);
4475 row_idx = dec.get_8();
4476 goto process_xor;
4479 if (x_ref_d64_) // previous delayed XOR post proc.
4480 xor_decode(bman);
4481 row_idx = dec.get_16();
4482 goto process_xor;
4485 {
4486 if (x_ref_d64_) // previous delayed XOR post proc.
4487 xor_decode(bman);
4488 row_idx = dec.get_32();
4489 process_xor:
4490 if (btype <= bm::set_block_xor_ref32)
4491 x_ref_d64_ = dec.get_64();
4492 else
4493 x_ref_d64_ = ~0ull;
4494 process_xor_ref:
4495 BM_ASSERT(ref_vect_); // reference vector has to be attached
4496 x_ref_idx_ = ref_vect_->find(row_idx);
4497 x_nb_ = nb_i;
4498 if (x_ref_idx_ == ref_vect_->not_found())
4499 {
4500 BM_ASSERT(0); // XOR de-filter setup issue cannot find ref vector
4501 goto throw_err;
4502 }
4503 if (blk)
4504 {
4506 or_block_ = bman.deoptimize_block(nb_i);
4507 bman.set_block_ptr(nb_i, 0); // borrow OR target before XOR
4508 or_block_idx_ = nb_i;
4509 }
4510 }
4511 continue; // important! cont to avoid inc(i)
4513 if (x_ref_d64_) // previous delayed XOR post proc.
4514 xor_decode(bman);
4515 row_idx = dec.get_8();
4516 x_ref_d64_ = ~0ULL;
4517 goto process_xor_ref;
4519 if (x_ref_d64_) // previous delayed XOR post proc.
4520 xor_decode(bman);
4521 row_idx = dec.get_16();
4522 x_ref_d64_ = ~0ULL;
4523 goto process_xor_ref;
4525 if (x_ref_d64_) // previous delayed XOR post proc.
4526 xor_decode(bman);
4527 row_idx = dec.get_32();
4528 x_ref_d64_ = ~0ULL;
4529 goto process_xor_ref;
4531 if (x_ref_d64_) // previous delayed XOR post proc.
4532 xor_decode(bman);
4533 {
4534 unsigned char vbr_flag = dec.get_8(); // reserved
4535 switch (vbr_flag)
4536 {
4537 case 1: row_idx = dec.get_8(); break;
4538 case 2: row_idx = dec.get_16(); break;
4539 case 0: row_idx = dec.get_32(); break;
4540 default: BM_ASSERT(0); break;
4541 } // switch
4542 bm::id64_t acc64 = x_ref_d64_ = dec.get_h64(); (void) acc64;
4544 xor_chain_size_ = dec.get_8();
4546 for (unsigned ci = 0; ci < xor_chain_size_; ++ci)
4547 {
4548 switch (vbr_flag)
4549 {
4550 case 1: xor_chain_[ci].ref_idx = dec.get_8(); break;
4551 case 2: xor_chain_[ci].ref_idx = dec.get_16(); break;
4552 case 0: xor_chain_[ci].ref_idx = dec.get_32(); break;
4553 default: BM_ASSERT(0); break;
4554 } // switch
4555 xor_chain_[ci].xor_d64 = dec.get_h64();
4556
4557 BM_ASSERT((xor_chain_[ci].xor_d64 & acc64) == 0);
4558 acc64 |= xor_chain_[ci].xor_d64; // control
4559 } // for
4560 }
4561 goto process_xor_ref;
4562
4563 default:
4564 BM_ASSERT(0); // unknown block type
4565 throw_err:
4566 #ifndef BM_NO_STL
4567 throw std::logic_error(this->err_msg());
4568 #else
4569 BM_THROW(BM_ERR_SERIALFORMAT);
4570 #endif
4571 } // switch
4572
4573 ++nb_i;
4574 } while (nb_i < bm::set_total_blocks);
4575
4576 // process the last delayed XOR ref here
4577 //
4578 if (x_ref_d64_) // XOR post proc.
4579 xor_decode(bman);
4580
4581 bv.set_new_blocks_strat(strat);
4582
4583 bman.shrink_top_blocks(); // reduce top blocks to necessary size
4584
4585 return dec.size();
4586}
4587
4588// ---------------------------------------------------------------------------
4589
4590template<class BV, class DEC>
4592{
4594
4595 unsigned i0, j0;
4596 bm::get_block_coord(x_nb_, i0, j0);
4597 for (unsigned ci = 0; ci < xor_chain_size_; ++ci)
4598 {
4599 unsigned ref_idx = (unsigned)ref_vect_->find(xor_chain_[ci].ref_idx);
4600 const bvector_type* BMRESTRICT ref_bv = ref_vect_->get_bv(ref_idx);
4601 const blocks_manager_type& ref_bman = ref_bv->get_blocks_manager();
4602 BM_ASSERT(ref_bv);
4603
4604 const bm::word_t* BMRESTRICT ref_blk = ref_bman.get_block_ptr(i0, j0);
4605 if (BM_IS_GAP(ref_blk))
4606 {
4607 bm::gap_word_t* BMRESTRICT gap_block = BMGAP_PTR(ref_blk);
4609 ref_blk = xor_block_;
4610 }
4611 else
4612 if (IS_FULL_BLOCK(ref_blk))
4613 ref_blk = FULL_BLOCK_REAL_ADDR;
4614 if (ref_blk)
4615 bm::bit_block_xor(blk, ref_blk, xor_chain_[ci].xor_d64);
4616 } // for ci
4617}
4618
4619// ---------------------------------------------------------------------------
4620
4621template<class BV, class DEC>
4623{
4625
4626 unsigned i0, j0;
4627 const bm::word_t* ref_blk;
4628
4629 {
4630 const bvector_type* ref_bv = ref_vect_->get_bv(x_ref_idx_);
4631 const blocks_manager_type& ref_bman = ref_bv->get_blocks_manager();
4632 BM_ASSERT(ref_bv);
4633 BM_ASSERT(&ref_bman != &bman); // some incorrect work with the ref.vect
4634 bm::get_block_coord(x_nb_, i0, j0);
4635 ref_blk = ref_bman.get_block_ptr(i0, j0);
4636 }
4638
4639 if (!ref_blk)
4640 {
4641 if (or_block_)
4642 {
4643 bm::word_t* blk = bman.deoptimize_block(i0, j0, true); // true to alloc
4644 if (blk)
4645 {
4647 alloc_.free_bit_block(or_block_);
4648 }
4649 else
4650 {
4651 bman.set_block_ptr(x_nb_, or_block_); // return OR block
4652 }
4653 or_block_ = 0; or_block_idx_ = 0;
4654 }
4655
4656 if (xor_chain_size_)
4657 {
4658 bm::word_t* blk = bman.deoptimize_block(i0, j0, true);
4659 xor_decode_chain(blk);
4660 }
4661 xor_reset();
4662 return;
4663 }
4664 if (IS_FULL_BLOCK(ref_blk))
4665 ref_blk = FULL_BLOCK_REAL_ADDR;
4666 else
4667 {
4668 if (BM_IS_GAP(ref_blk))
4669 {
4670 bm::word_t* blk = bman.get_block_ptr(i0, j0);
4671 bm::gap_word_t* gap_block = BMGAP_PTR(ref_blk);
4672 if (BM_IS_GAP(blk) && (x_ref_d64_==~0ULL) && !or_block_) // two GAPs no FULL digest
4673 {
4674 BM_ASSERT(!xor_chain_size_); // since x_ref_d64_ == 0xFF...FF
4676 const bm::gap_word_t* res;
4677 unsigned res_len;
4679 gap_block,
4680 tmp_buf,
4681 res_len);
4682 BM_ASSERT(res == tmp_buf);
4683 bman.assign_gap_check(i0, j0, res, ++res_len, blk, tmp_buf);
4684
4685 xor_reset();
4686 return;
4687 }
4688
4690 ref_blk = xor_block_;
4691 }
4692 else
4693 {
4694 if (IS_FULL_BLOCK(ref_blk))
4695 ref_blk = FULL_BLOCK_REAL_ADDR;
4696 }
4697 }
4698 bm::word_t* blk = bman.deoptimize_block(i0, j0, true); // true to alloc
4699
4700 if (x_ref_d64_)
4701 {
4702 bm::bit_block_xor(blk, blk, ref_blk, x_ref_d64_);
4703 if (xor_chain_size_)
4704 xor_decode_chain(blk);
4705 }
4706 else
4707 {
4709 bm::bit_block_xor(blk, ref_blk);
4710 }
4711 xor_reset();
4712
4713 if (or_block_)
4714 {
4716 alloc_.free_bit_block(or_block_);
4717 or_block_ = 0;
4718 or_block_idx_ = 0;
4719 }
4720
4721 // target BLOCK post-processing
4722 //
4723 // check if we range setting overrides the need for block optimization
4724 //
4725 if (is_range_set_)
4726 {
4729 if (nb_from == x_nb_ || nb_to == x_nb_)
4730 return;
4731 }
4732 bman.optimize_bit_block_nocheck(i0, j0);
4733
4734}
4735// ---------------------------------------------------------------------------
4736
4737template<class BV, class DEC>
4743
4744// ---------------------------------------------------------------------------
4745
4746template<typename DEC, typename BLOCK_IDX>
4748 : header_flag_(0),
4749 decoder_(buf),
4750 end_of_stream_(false),
4751 bv_size_(0),
4753 id_cnt_(0),
4754 block_idx_(0),
4755 mono_block_cnt_(0),
4757{
4758 ::memset(bit_func_table_, 0, sizeof(bit_func_table_));
4759
4762
4787
4788
4789 // reading stream header
4790 header_flag_ = decoder_.get_8();
4791 if (!(header_flag_ & BM_HM_NO_BO))
4792 {
4793 /*ByteOrder bo = (bm::ByteOrder)*/decoder_.get_8();
4794 }
4795
4796 // check if bitvector comes as an inverted, sorted list of ints
4797 //
4799 {
4800 // special case: the next comes plain list of unsigned integers
4802 {
4804 {
4805 BM_ASSERT(sizeof(block_idx_type)==8);
4806 bv_size_ = (block_idx_type)decoder_.get_64();
4807 }
4808 else
4809 bv_size_ = decoder_.get_32();
4810 }
4811
4813 id_cnt_ = decoder_.get_32();
4814 next(); // read first id
4815 }
4816 else
4817 {
4818 if (!(header_flag_ & BM_HM_NO_GAPL))
4819 {
4820 for (unsigned i = 0; i < bm::gap_levels; ++i) // load the GAP levels
4821 glevels_[i] = decoder_.get_16();
4822 }
4824 {
4826 {
4827 BM_ASSERT(sizeof(block_idx_type)==8);
4828 bv_size_ = (block_idx_type)decoder_.get_64();
4829 }
4830 else
4831 bv_size_ = decoder_.get_32();
4832 }
4833 state_ = e_blocks;
4834 }
4836 if (!block_idx_arr_)
4837 {
4838 #ifndef BM_NO_STL
4839 throw std::bad_alloc();
4840 #else
4841 BM_THROW(BM_ERR_BADALLOC);
4842 #endif
4843 }
4844
4845 this->id_array_ = block_idx_arr_;
4846}
4847
4848template<typename DEC, typename BLOCK_IDX>
4854
4855
4856template<typename DEC, typename BLOCK_IDX>
4858{
4859 if (is_eof())
4860 {
4861 ++block_idx_;
4862 return;
4863 }
4864 block_idx_type nb_sync;
4865
4866 switch (state_)
4867 {
4868 case e_list_ids:
4869 // read inverted ids one by one
4870 if (id_cnt_ == 0)
4871 {
4872 end_of_stream_ = true;
4873 state_ = e_unknown;
4874 break;
4875 }
4876 last_id_ = decoder_.get_32();
4877 --id_cnt_;
4878 break;
4879
4880 case e_blocks:
4882 {
4883 end_of_stream_ = true;
4884 state_ = e_unknown;
4885 break;
4886 }
4887
4888 block_type_ = decoder_.get_8();
4889
4890 // pre-check for 7-bit zero block
4891 //
4892 if (block_type_ & (1u << 7u))
4893 {
4894 mono_block_cnt_ = (block_type_ & ~(1u << 7u)) - 1;
4896 break;
4897 }
4898
4899 switch (block_type_)
4900 {
4901 case set_block_azero:
4902 case set_block_end:
4904 break;
4905 case set_block_1zero:
4907 mono_block_cnt_ = 0;
4908 break;
4909 case set_block_8zero:
4911 mono_block_cnt_ = decoder_.get_8()-1;
4912 break;
4913 case set_block_16zero:
4915 mono_block_cnt_ = decoder_.get_16()-1;
4916 break;
4917 case set_block_32zero:
4919 mono_block_cnt_ = decoder_.get_32()-1;
4920 break;
4921 case set_block_aone:
4924 break;
4925 case set_block_1one:
4927 mono_block_cnt_ = 0;
4928 break;
4929 case set_block_8one:
4931 mono_block_cnt_ = decoder_.get_8()-1;
4932 break;
4933 case set_block_16one:
4935 mono_block_cnt_ = decoder_.get_16()-1;
4936 break;
4937 case set_block_32one:
4939 mono_block_cnt_ = decoder_.get_32()-1;
4940 break;
4941 case set_block_64one:
4944 break;
4945
4946 case bm::set_block_bit:
4957 break;
4958 case set_block_gap:
4965 gap_head_ = decoder_.get_16();
4967 case set_block_arrgap:
4975 case set_block_bit_1bit:
4985 break;
4986 case set_block_gapbit:
4987 state_ = e_gap_block; //e_bit_block; // TODO: make a better decision here
4988 break;
4989
4990 // --------------------------------------------- bookmarks and syncs
4991 //
4992 case set_nb_bookmark32:
4993 this->bookmark_idx_ = block_idx_;
4994 this->skip_offset_ = decoder_.get_32();
4995 this->skip_pos_ = decoder_.get_pos() + this->skip_offset_;
4996 break;
4997 case set_nb_bookmark24:
4998 this->bookmark_idx_ = block_idx_;
4999 this->skip_offset_ = decoder_.get_24();
5000 this->skip_pos_ = decoder_.get_pos() + this->skip_offset_;
5001 break;
5002 case set_nb_bookmark16:
5003 this->bookmark_idx_ = block_idx_;
5004 this->skip_offset_ = decoder_.get_16();
5005 this->skip_pos_ = decoder_.get_pos() + this->skip_offset_;
5006 break;
5007
5008 case set_nb_sync_mark8:
5009 nb_sync = decoder_.get_8();
5010 goto process_nb_sync;
5011 case set_nb_sync_mark16:
5012 nb_sync = decoder_.get_16();
5013 goto process_nb_sync;
5014 case set_nb_sync_mark24:
5015 nb_sync = decoder_.get_24();
5016 goto process_nb_sync;
5017 case set_nb_sync_mark32:
5018 nb_sync = decoder_.get_32();
5019 goto process_nb_sync;
5020 case set_nb_sync_mark48:
5021 nb_sync = block_idx_type(decoder_.get_48());
5022 goto process_nb_sync;
5023 case set_nb_sync_mark64:
5024 nb_sync = block_idx_type(decoder_.get_64());
5025 process_nb_sync:
5026 BM_ASSERT(block_idx_ == this->bookmark_idx_ + nb_sync);
5027 if (block_idx_ != this->bookmark_idx_ + nb_sync)
5028 {
5029 #ifndef BM_NO_STL
5030 throw std::logic_error(this->err_msg());
5031 #else
5032 BM_THROW(BM_ERR_SERIALFORMAT);
5033 #endif
5034 }
5035 break;
5036
5037
5038
5039 // --------------------------------------------------------------
5040 // XOR deserials are incompatible with the operation deserialial
5041 // throw or assert here
5042 //
5043 case set_block_ref_eq:
5044 case set_block_xor_ref8:
5051 // fall through
5052 case set_sblock_bienc:
5053 BM_ASSERT(0);
5055 // fall through
5056 default:
5057 BM_ASSERT(0);
5058 #ifndef BM_NO_STL
5059 throw std::logic_error(this->err_msg());
5060 #else
5061 BM_THROW(BM_ERR_SERIALFORMAT);
5062 #endif
5063 } // switch
5064
5065 break;
5066
5067 case e_zero_blocks:
5068 case e_one_blocks:
5069 ++block_idx_;
5070 if (!mono_block_cnt_)
5071 {
5072 state_ = e_blocks; // get new block token
5073 break;
5074 }
5076 break;
5077
5078 case e_unknown:
5079 default:
5080 BM_ASSERT(0);
5081 #ifndef BM_NO_STL
5082 throw std::logic_error(this->err_msg());
5083 #else
5084 BM_THROW(BM_ERR_SERIALFORMAT);
5085 #endif
5086 } // switch
5087}
5088
5089template<typename DEC, typename BLOCK_IDX>
5104
5105template<typename DEC, typename BLOCK_IDX>
5106void
5108{
5109 gap_word_t len = decoder_.get_16();
5110 if (block)
5111 {
5112 bm::bit_block_set(block, ~0u);
5113 for (unsigned k = 0; k < len; ++k)
5114 {
5115 bm::gap_word_t bit_idx = decoder_.get_16();
5116 bm::clear_bit(block, bit_idx);
5117 }
5118 }
5119 else // dry read
5120 {
5121 for (unsigned k = 0; k < len; ++k)
5122 decoder_.get_16();
5123 }
5124}
5125
5126
5127template<typename DEC, typename BLOCK_IDX>
5128unsigned
5130 bm::word_t* dst_block,
5131 bm::word_t* tmp_block)
5132{
5133 // ASSIGN should be ready for dry read (dst_block can be NULL)
5134 //
5135 BM_ASSERT(this->state_ == e_bit_block);
5136 unsigned count = 0;
5137
5138 switch (this->block_type_)
5139 {
5140 case set_block_bit:
5141 decoder_.get_32(dst_block, bm::set_block_size);
5142 break;
5143 case set_block_bit_0runs:
5144 {
5145 if (IS_VALID_ADDR(dst_block))
5146 bm::bit_block_set(dst_block, 0);
5147 unsigned char run_type = decoder_.get_8();
5148 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
5149 {
5150 unsigned run_length = decoder_.get_16();
5151 if (run_type)
5152 {
5153 decoder_.get_32(dst_block ? dst_block + j : dst_block, run_length);
5154 }
5155 j += run_length;
5156 } // for
5157 }
5158 break;
5160 {
5161 unsigned head_idx = decoder_.get_16();
5162 unsigned tail_idx = decoder_.get_16();
5163 if (dst_block)
5164 {
5165 for (unsigned i = 0; i < head_idx; ++i)
5166 dst_block[i] = 0;
5167 decoder_.get_32(dst_block + head_idx,
5168 tail_idx - head_idx + 1);
5169 for (unsigned j = tail_idx + 1; j < bm::set_block_size; ++j)
5170 dst_block[j] = 0;
5171 }
5172 else
5173 {
5174 int pos = int(tail_idx - head_idx) + 1;
5175 pos *= 4u;
5176 decoder_.seek(pos);
5177 }
5178 }
5179 break;
5180 case set_block_arrbit:
5181 case set_block_bit_1bit:
5182 get_arr_bit(dst_block, true /*clear target*/);
5183 break;
5184 case set_block_gapbit:
5185 BM_ASSERT(0);
5186 #ifndef BM_NO_STL
5187 throw std::logic_error(this->err_msg());
5188 #else
5189 BM_THROW(BM_ERR_SERIALFORMAT);
5190 #endif
5191 break;
5193 get_inv_arr(dst_block);
5194 break;
5197 if (IS_VALID_ADDR(dst_block))
5198 bm::bit_block_set(dst_block, 0);
5199 this->read_bic_arr(decoder_, dst_block, this->block_type_);
5200 break;
5202 this->read_bic_arr_inv(decoder_, tmp_block);
5203 if (IS_VALID_ADDR(dst_block))
5204 bm::bit_block_copy(dst_block, tmp_block);
5205 break;
5207 if (IS_VALID_ADDR(dst_block))
5208 bm::bit_block_set(dst_block, 0);
5209 this->read_bic_gap(decoder_, dst_block);
5210 break;
5212 if (IS_VALID_ADDR(dst_block))
5213 bm::bit_block_set(dst_block, 0);
5214 this->read_digest0_block(decoder_, dst_block);
5215 break;
5216 default:
5217 BM_ASSERT(0);
5218 #ifndef BM_NO_STL
5219 throw std::logic_error(this->err_msg());
5220 #else
5221 BM_THROW(BM_ERR_SERIALFORMAT);
5222 #endif
5223 } // switch
5224 return count;
5225}
5226
5227template<typename DEC, typename BLOCK_IDX>
5228unsigned
5230 bm::word_t* dst_block,
5231 bm::word_t* tmp_block)
5232{
5233 BM_ASSERT(this->state_ == e_bit_block);
5234 unsigned count = 0;
5235 switch (block_type_)
5236 {
5237 case set_block_bit:
5238 decoder_.get_32_OR(dst_block, bm::set_block_size);
5239 break;
5241 {
5242 unsigned head_idx = decoder_.get_16();
5243 unsigned tail_idx = decoder_.get_16();
5244 for (unsigned i = head_idx; i <= tail_idx; ++i)
5245 dst_block[i] |= decoder_.get_32();
5246 }
5247 break;
5249 {
5250 unsigned char run_type = decoder_.get_8();
5251 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
5252 {
5253 unsigned run_length = decoder_.get_16();
5254 if (run_type)
5255 {
5256 unsigned run_end = j + run_length;
5257 for (;j < run_end; ++j)
5258 {
5260 dst_block[j] |= decoder_.get_32();
5261 }
5262 }
5263 else
5264 {
5265 j += run_length;
5266 }
5267 } // for
5268 }
5269 break;
5270 case set_block_bit_1bit:
5271 case set_block_arrbit:
5272 get_arr_bit(dst_block, false /*don't clear target*/);
5273 break;
5275 get_inv_arr(tmp_block);
5276 bm::bit_block_or(dst_block, tmp_block);
5277 break;
5280 this->read_bic_arr(decoder_, dst_block, this->block_type_);
5281 break;
5283 this->read_bic_arr_inv(decoder_, tmp_block);
5284 bm::bit_block_or(dst_block, tmp_block);
5285 break;
5287 this->read_bic_gap(decoder_, dst_block);
5288 break;
5290 this->read_digest0_block(decoder_, dst_block);
5291 break;
5292 default:
5293 BM_ASSERT(0);
5294 #ifndef BM_NO_STL
5295 throw std::logic_error(this->err_msg());
5296 #else
5297 BM_THROW(BM_ERR_SERIALFORMAT);
5298 #endif
5299 } // switch
5300 return count;
5301}
5302
5303template<typename DEC, typename BLOCK_IDX>
5304unsigned
5306 bm::word_t* BMRESTRICT dst_block,
5307 bm::word_t* BMRESTRICT tmp_block)
5308{
5309 BM_ASSERT(this->state_ == e_bit_block);
5310 BM_ASSERT(dst_block != tmp_block);
5311 unsigned count = 0;
5312 switch (block_type_)
5313 {
5314 case set_block_bit:
5315 decoder_.get_32_AND(dst_block, bm::set_block_size);
5316 break;
5318 {
5319 unsigned char run_type = decoder_.get_8();
5320 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
5321 {
5322 unsigned run_length = decoder_.get_16();
5323
5324 unsigned run_end = j + run_length;
5325 if (run_type)
5326 {
5327 for (;j < run_end; ++j)
5328 {
5330 dst_block[j] &= decoder_.get_32();
5331 }
5332 }
5333 else
5334 {
5335 for (;j < run_end; ++j)
5336 {
5338 dst_block[j] = 0;
5339 }
5340 }
5341 } // for
5342 }
5343 break;
5345 {
5346 unsigned head_idx = decoder_.get_16();
5347 unsigned tail_idx = decoder_.get_16();
5348 unsigned i;
5349 for ( i = 0; i < head_idx; ++i)
5350 dst_block[i] = 0;
5351 for ( i = head_idx; i <= tail_idx; ++i)
5352 dst_block[i] &= decoder_.get_32();
5353 for ( i = tail_idx + 1; i < bm::set_block_size; ++i)
5354 dst_block[i] = 0;
5355 }
5356 break;
5357 case set_block_bit_1bit:
5358 case set_block_arrbit:
5359 get_arr_bit(tmp_block, true /*clear target*/);
5360 if (dst_block)
5361 bm::bit_block_and(dst_block, tmp_block);
5362 break;
5364 get_inv_arr(tmp_block);
5365 if (dst_block)
5366 bm::bit_block_and(dst_block, tmp_block);
5367 break;
5370 if (dst_block)
5371 {
5372 bm::bit_block_set(tmp_block, 0);
5373 this->read_bic_arr(decoder_, tmp_block, block_type_);
5374 bm::bit_block_and(dst_block, tmp_block);
5375 }
5376 else
5377 this->read_bic_arr(decoder_, 0, block_type_); // dry read
5378 break;
5380 this->read_bic_arr_inv(decoder_, tmp_block);
5381 if (dst_block)
5382 bm::bit_block_and(dst_block, tmp_block);
5383 break;
5385 if (dst_block)
5386 {
5387 BM_ASSERT(IS_VALID_ADDR(dst_block));
5388 bm::bit_block_set(tmp_block, 0);
5389 this->read_bic_gap(decoder_, tmp_block);
5390 bm::bit_block_and(dst_block, tmp_block);
5391 }
5392 else
5393 this->read_bic_gap(decoder_, 0); // dry read
5394 break;
5396 if (dst_block)
5397 {
5398 BM_ASSERT(IS_VALID_ADDR(dst_block));
5399 bm::bit_block_set(tmp_block, 0);
5400 this->read_digest0_block(decoder_, tmp_block);
5401 bm::bit_block_and(dst_block, tmp_block);
5402 }
5403 else
5404 this->read_digest0_block(decoder_, 0); // dry read
5405 break;
5406 default:
5407 BM_ASSERT(0);
5408 #ifndef BM_NO_STL
5409 throw std::logic_error(this->err_msg());
5410 #else
5411 BM_THROW(BM_ERR_SERIALFORMAT);
5412 #endif
5413 } // switch
5414 return count;
5415}
5416
5417template<typename DEC, typename BLOCK_IDX>
5418unsigned
5420 bm::word_t* dst_block,
5421 bm::word_t* tmp_block)
5422{
5423 BM_ASSERT(this->state_ == e_bit_block);
5424 BM_ASSERT(dst_block != tmp_block);
5425
5426 unsigned count = 0;
5427 switch (block_type_)
5428 {
5429 case set_block_bit:
5430 for (unsigned i = 0; i < bm::set_block_size; ++i)
5431 dst_block[i] ^= decoder_.get_32();
5432 break;
5434 {
5435 unsigned char run_type = decoder_.get_8();
5436 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
5437 {
5438 unsigned run_length = decoder_.get_16();
5439 if (run_type)
5440 {
5441 unsigned run_end = j + run_length;
5442 for (;j < run_end; ++j)
5443 {
5445 dst_block[j] ^= decoder_.get_32();
5446 }
5447 }
5448 else
5449 {
5450 j += run_length;
5451 }
5452 } // for
5453 }
5454 break;
5456 {
5457 unsigned head_idx = decoder_.get_16();
5458 unsigned tail_idx = decoder_.get_16();
5459 for (unsigned i = head_idx; i <= tail_idx; ++i)
5460 dst_block[i] ^= decoder_.get_32();
5461 }
5462 break;
5463 case set_block_bit_1bit:
5464 // TODO: optimization
5465 case set_block_arrbit:
5466 get_arr_bit(tmp_block, true /*clear target*/);
5467 if (dst_block)
5468 bm::bit_block_xor(dst_block, tmp_block);
5469 break;
5471 get_inv_arr(tmp_block);
5472 if (dst_block)
5473 bm::bit_block_xor(dst_block, tmp_block);
5474 break;
5477 bm::bit_block_set(tmp_block, 0);
5478 this->read_bic_arr(decoder_, tmp_block, block_type_);
5479 if (dst_block)
5480 bm::bit_block_xor(dst_block, tmp_block);
5481 break;
5483 this->read_bic_arr_inv(decoder_, tmp_block);
5484 if (dst_block)
5485 {
5486 BM_ASSERT(IS_VALID_ADDR(dst_block));
5487 bm::bit_block_xor(dst_block, tmp_block);
5488 }
5489 break;
5491 if (dst_block)
5492 {
5493 BM_ASSERT(IS_VALID_ADDR(dst_block));
5494 bm::bit_block_set(tmp_block, 0);
5495 this->read_bic_gap(decoder_, tmp_block);
5496 bm::bit_block_xor(dst_block, tmp_block);
5497 }
5498 else
5499 this->read_bic_gap(decoder_, 0); // dry read
5500 break;
5502 if (dst_block)
5503 {
5504 BM_ASSERT(IS_VALID_ADDR(dst_block));
5505 bm::bit_block_set(tmp_block, 0);
5506 this->read_digest0_block(decoder_, tmp_block);
5507 bm::bit_block_xor(dst_block, tmp_block);
5508 }
5509 else
5510 this->read_digest0_block(decoder_, 0); // dry read
5511 break;
5512 default:
5513 BM_ASSERT(0);
5514 #ifndef BM_NO_STL
5515 throw std::logic_error(this->err_msg());
5516 #else
5517 BM_THROW(BM_ERR_SERIALFORMAT);
5518 #endif
5519 } // switch
5520 return count;
5521}
5522
5523template<typename DEC, typename BLOCK_IDX>
5524unsigned
5526 bm::word_t* dst_block,
5527 bm::word_t* tmp_block)
5528{
5529 BM_ASSERT(this->state_ == e_bit_block);
5530 BM_ASSERT(dst_block != tmp_block);
5531
5532 unsigned count = 0;
5533 switch (block_type_)
5534 {
5535 case set_block_bit:
5536 for (unsigned i = 0; i < bm::set_block_size; ++i)
5537 dst_block[i] &= ~decoder_.get_32();
5538 break;
5540 {
5541 unsigned char run_type = decoder_.get_8();
5542 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
5543 {
5544 unsigned run_length = decoder_.get_16();
5545 if (run_type)
5546 {
5547 unsigned run_end = j + run_length;
5548 for (;j < run_end; ++j)
5549 {
5551 dst_block[j] &= ~decoder_.get_32();
5552 }
5553 }
5554 else
5555 {
5556 j += run_length;
5557 }
5558 } // for
5559 }
5560 break;
5562 {
5563 unsigned head_idx = decoder_.get_16();
5564 unsigned tail_idx = decoder_.get_16();
5565 for (unsigned i = head_idx; i <= tail_idx; ++i)
5566 dst_block[i] &= ~decoder_.get_32();
5567 }
5568 break;
5569 case set_block_bit_1bit:
5570 // TODO: optimization
5571 case set_block_arrbit:
5572 get_arr_bit(tmp_block, true /*clear target*/);
5573 if (dst_block)
5574 bm::bit_block_sub(dst_block, tmp_block);
5575 break;
5577 get_inv_arr(tmp_block);
5578 if (dst_block)
5579 bm::bit_block_sub(dst_block, tmp_block);
5580 break;
5583 bm::bit_block_set(tmp_block, 0);
5584 this->read_bic_arr(decoder_, tmp_block, block_type_);
5585 if (dst_block)
5586 bm::bit_block_sub(dst_block, tmp_block);
5587 break;
5589 this->read_bic_arr_inv(decoder_, tmp_block);
5590 if (dst_block)
5591 bm::bit_block_sub(dst_block, tmp_block);
5592 break;
5594 if (dst_block)
5595 {
5596 BM_ASSERT(IS_VALID_ADDR(dst_block));
5597 bm::bit_block_set(tmp_block, 0);
5598 this->read_bic_gap(decoder_, tmp_block);
5599 bm::bit_block_sub(dst_block, tmp_block);
5600 }
5601 else
5602 this->read_bic_gap(decoder_, 0); // dry read
5603 break;
5605 if (dst_block)
5606 {
5607 BM_ASSERT(IS_VALID_ADDR(dst_block));
5608 bm::bit_block_set(tmp_block, 0);
5609 this->read_digest0_block(decoder_, tmp_block);
5610 bm::bit_block_sub(dst_block, tmp_block);
5611 }
5612 else
5613 this->read_digest0_block(decoder_, 0); // dry read
5614 break;
5615 default:
5616 BM_ASSERT(0);
5617 #ifndef BM_NO_STL
5618 throw std::logic_error(this->err_msg());
5619 #else
5620 BM_THROW(BM_ERR_SERIALFORMAT);
5621 #endif
5622 } // switch
5623 return count;
5624}
5625
5626
5627template<typename DEC, typename BLOCK_IDX>
5628unsigned
5630 bm::word_t* /*dst_block*/,
5631 bm::word_t* tmp_block)
5632{
5633 BM_ASSERT(this->state_ == e_bit_block);
5634
5635 unsigned count = 0;
5636 switch (block_type_)
5637 {
5638 case set_block_bit:
5639 for (unsigned i = 0; i < bm::set_block_size; ++i)
5640 count += bm::word_bitcount(decoder_.get_32());
5641 break;
5643 {
5644 //count = 0;
5645 unsigned char run_type = decoder_.get_8();
5646 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
5647 {
5648 unsigned run_length = decoder_.get_16();
5649 if (run_type)
5650 {
5651 unsigned run_end = j + run_length;
5652 for (;j < run_end; ++j)
5653 {
5654 count += word_bitcount(decoder_.get_32());
5655 }
5656 }
5657 else
5658 {
5659 j += run_length;
5660 }
5661 } // for
5662 return count;
5663 }
5665 {
5666 unsigned head_idx = decoder_.get_16();
5667 unsigned tail_idx = decoder_.get_16();
5668 for (unsigned i = head_idx; i <= tail_idx; ++i)
5669 count += bm::word_bitcount(decoder_.get_32());
5670 }
5671 break;
5672 case set_block_arrbit:
5673 count += get_arr_bit(0);
5674 break;
5675 case set_block_bit_1bit:
5676 ++count;
5677 decoder_.get_16();
5678 break;
5680 get_inv_arr(tmp_block);
5681 goto count_tmp;
5684 bm::bit_block_set(tmp_block, 0); // TODO: just add a counted read
5685 this->read_bic_arr(decoder_, tmp_block, block_type_);
5686 goto count_tmp;
5688 this->read_bic_arr_inv(decoder_, tmp_block);
5689 goto count_tmp;
5691 bm::bit_block_set(tmp_block, 0);
5692 this->read_digest0_block(decoder_, tmp_block);
5693 goto count_tmp;
5695 bm::bit_block_set(tmp_block, 0);
5696 this->read_bic_gap(decoder_, tmp_block);
5697 count_tmp:
5698 count += bm::bit_block_count(tmp_block);
5699 break;
5700 default:
5701 BM_ASSERT(0);
5702 #ifndef BM_NO_STL
5703 throw std::logic_error(this->err_msg());
5704 #else
5705 BM_THROW(BM_ERR_SERIALFORMAT);
5706 #endif
5707
5708 } // switch
5709 return count;
5710}
5711
5712template<typename DEC, typename BLOCK_IDX>
5713unsigned
5715 bm::word_t* dst_block,
5716 bm::word_t* tmp_block)
5717{
5718 BM_ASSERT(this->state_ == e_bit_block);
5719 unsigned count = 0;
5720 if (dst_block)
5721 {
5722 // count the block bitcount
5723 count = bm::bit_block_count(dst_block);
5724 }
5725
5726 switch (block_type_)
5727 {
5728 case set_block_bit:
5729 decoder_.get_32(0, bm::set_block_size);
5730 break;
5732 {
5733 unsigned char run_type = decoder_.get_8();
5734 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
5735 {
5736 unsigned run_length = decoder_.get_16();
5737 if (run_type)
5738 {
5739 unsigned run_end = j + run_length;
5740 for (;j < run_end; ++j)
5741 {
5742 decoder_.get_32();
5743 }
5744 }
5745 else
5746 {
5747 j += run_length;
5748 }
5749 } // for
5750 }
5751 break;
5752
5754 {
5755 unsigned head_idx = decoder_.get_16();
5756 unsigned tail_idx = decoder_.get_16();
5757 for (unsigned i = head_idx; i <= tail_idx; ++i)
5758 decoder_.get_32();
5759 }
5760 break;
5761 case set_block_arrbit:
5762 get_arr_bit(0);
5763 break;
5764 case set_block_bit_1bit:
5765 decoder_.get_16();
5766 break;
5768 get_inv_arr(tmp_block);
5769 break;
5772 this->read_bic_arr(decoder_, tmp_block, block_type_); // TODO: add dry read
5773 break;
5775 this->read_bic_arr_inv(decoder_, tmp_block); // TODO: add dry read
5776 break;
5778 this->read_bic_gap(decoder_, tmp_block);
5779 break;
5781 this->read_digest0_block(decoder_, 0); // dry read
5782 break;
5783 default:
5784 BM_ASSERT(0);
5785 #ifndef BM_NO_STL
5786 throw std::logic_error(this->err_msg());
5787 #else
5788 BM_THROW(BM_ERR_SERIALFORMAT);
5789 #endif
5790
5791 } // switch
5792 return count;
5793}
5794
5795
5796template<typename DEC, typename BLOCK_IDX>
5797unsigned
5799 bm::word_t* dst_block,
5800 bm::word_t* tmp_block)
5801{
5802 BM_ASSERT(this->state_ == e_bit_block);
5803 BM_ASSERT(dst_block);
5804
5805 unsigned count = 0;
5806 switch (block_type_)
5807 {
5808 case set_block_bit:
5809 for (unsigned i = 0; i < bm::set_block_size; ++i)
5810 count += word_bitcount(dst_block[i] & decoder_.get_32());
5811 break;
5813 {
5814 //count = 0;
5815 unsigned char run_type = decoder_.get_8();
5816 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
5817 {
5818 unsigned run_length = decoder_.get_16();
5819 if (run_type)
5820 {
5821 unsigned run_end = j + run_length;
5822 for (;j < run_end; ++j)
5823 {
5824 count += word_bitcount(dst_block[j] & decoder_.get_32());
5825 }
5826 }
5827 else
5828 {
5829 j += run_length;
5830 }
5831 } // for
5832 return count;
5833 }
5835 {
5836 unsigned head_idx = decoder_.get_16();
5837 unsigned tail_idx = decoder_.get_16();
5838 for (unsigned i = head_idx; i <= tail_idx; ++i)
5839 count += word_bitcount(dst_block[i] & decoder_.get_32());
5840 }
5841 break;
5842 case set_block_bit_1bit:
5843 // TODO: optimization
5844 case set_block_arrbit:
5845 get_arr_bit(tmp_block, true /*clear target*/);
5846 goto count_tmp;
5847 break;
5849 get_inv_arr(tmp_block);
5850 goto count_tmp;
5851 break;
5854 bm::bit_block_set(tmp_block, 0);
5855 this->read_bic_arr(decoder_, tmp_block, block_type_);
5856 goto count_tmp;
5858 this->read_bic_arr_inv(decoder_, tmp_block);
5859 goto count_tmp;
5861 bm::bit_block_set(tmp_block, 0);
5862 this->read_digest0_block(decoder_, tmp_block);
5863 goto count_tmp;
5865 bm::bit_block_set(tmp_block, 0);
5866 this->read_bic_gap(decoder_, tmp_block);
5867 count_tmp:
5868 count += bm::bit_operation_and_count(dst_block, tmp_block);
5869 break;
5870 default:
5871 BM_ASSERT(0);
5872 #ifndef BM_NO_STL
5873 throw std::logic_error(this->err_msg());
5874 #else
5875 BM_THROW(BM_ERR_SERIALFORMAT);
5876 #endif
5877
5878 } // switch
5879 return count;
5880}
5881
5882template<typename DEC,typename BLOCK_IDX>
5883unsigned
5885 bm::word_t* dst_block,
5886 bm::word_t* tmp_block)
5887{
5888 BM_ASSERT(this->state_ == e_bit_block);
5889 BM_ASSERT(dst_block);
5890
5891 bitblock_sum_adapter count_adapter;
5892 switch (block_type_)
5893 {
5894 case set_block_bit:
5895 {
5896 bitblock_get_adapter ga(dst_block);
5898
5899 bit_recomb(ga,
5900 decoder_,
5901 func,
5902 count_adapter
5903 );
5904 }
5905 break;
5906 case set_block_bit_0runs:
5907 {
5908 unsigned count = 0;
5909 unsigned char run_type = decoder_.get_8();
5910 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
5911 {
5912 unsigned run_length = decoder_.get_16();
5913 unsigned run_end = j + run_length;
5914 if (run_type)
5915 {
5916 for (;j < run_end; ++j)
5917 {
5919 count += word_bitcount(dst_block[j] | decoder_.get_32());
5920 }
5921 }
5922 else
5923 {
5924 for (;j < run_end; ++j)
5925 {
5927 count += word_bitcount(dst_block[j]);
5928 }
5929 }
5930 } // for
5931 return count;
5932 }
5934 {
5935 unsigned head_idx = decoder_.get_16();
5936 unsigned tail_idx = decoder_.get_16();
5937 unsigned count = 0;
5938 unsigned i;
5939 for (i = 0; i < head_idx; ++i)
5940 count += bm::word_bitcount(dst_block[i]);
5941 for (i = head_idx; i <= tail_idx; ++i)
5942 count += bm::word_bitcount(dst_block[i] | decoder_.get_32());
5943 for (i = tail_idx + 1; i < bm::set_block_size; ++i)
5944 count += bm::word_bitcount(dst_block[i]);
5945 return count;
5946 }
5947 case set_block_bit_1bit:
5948 // TODO: optimization
5949 case set_block_arrbit:
5950 get_arr_bit(tmp_block, true /* clear target*/);
5951 return bit_operation_or_count(dst_block, tmp_block);
5953 get_inv_arr(tmp_block);
5954 goto count_tmp;
5957 bm::bit_block_set(tmp_block, 0);
5958 this->read_bic_arr(decoder_, tmp_block, block_type_);
5959 goto count_tmp;
5961 this->read_bic_arr_inv(decoder_, tmp_block);
5962 goto count_tmp;
5964 bm::bit_block_set(tmp_block, 0);
5965 this->read_digest0_block(decoder_, tmp_block);
5966 goto count_tmp;
5968 bm::bit_block_set(tmp_block, 0);
5969 this->read_bic_gap(decoder_, tmp_block);
5970 count_tmp:
5971 return bm::bit_operation_or_count(dst_block, tmp_block);
5972 default:
5973 BM_ASSERT(0);
5974 #ifndef BM_NO_STL
5975 throw std::logic_error(this->err_msg());
5976 #else
5977 BM_THROW(BM_ERR_SERIALFORMAT);
5978 #endif
5979
5980 } // switch
5981 return count_adapter.sum();
5982}
5983
5984template<typename DEC, typename BLOCK_IDX>
5985unsigned
5987 bm::word_t* dst_block,
5988 bm::word_t* tmp_block)
5989{
5990 BM_ASSERT(this->state_ == e_bit_block);
5991 BM_ASSERT(dst_block);
5992
5993 bitblock_sum_adapter count_adapter;
5994 switch (block_type_)
5995 {
5996 case set_block_bit:
5997 {
5998 bitblock_get_adapter ga(dst_block);
6000
6001 bit_recomb(ga,
6002 decoder_,
6003 func,
6004 count_adapter
6005 );
6006 }
6007 break;
6008 case set_block_bit_0runs:
6009 {
6010 unsigned count = 0;
6011 unsigned char run_type = decoder_.get_8();
6012 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
6013 {
6014 unsigned run_length = decoder_.get_16();
6015 unsigned run_end = j + run_length;
6016 if (run_type)
6017 {
6018 for (;j < run_end; ++j)
6019 {
6021 count += bm::word_bitcount(dst_block[j] ^ decoder_.get_32());
6022 }
6023 }
6024 else
6025 {
6026 for (;j < run_end; ++j)
6027 {
6029 count += bm::word_bitcount(dst_block[j]);
6030 }
6031 }
6032 } // for
6033 return count;
6034 }
6036 {
6037 unsigned head_idx = decoder_.get_16();
6038 unsigned tail_idx = decoder_.get_16();
6039 unsigned count = 0;
6040 unsigned i;
6041 for (i = 0; i < head_idx; ++i)
6042 count += bm::word_bitcount(dst_block[i]);
6043 for (i = head_idx; i <= tail_idx; ++i)
6044 count += bm::word_bitcount(dst_block[i] ^ decoder_.get_32());
6045 for (i = tail_idx + 1; i < bm::set_block_size; ++i)
6046 count += bm::word_bitcount(dst_block[i]);
6047 return count;
6048 }
6049 case set_block_bit_1bit:
6050 // TODO: optimization
6051 case set_block_arrbit:
6052 get_arr_bit(tmp_block, true /* clear target*/);
6053 goto count_tmp;
6055 get_inv_arr(tmp_block);
6056 goto count_tmp;
6059 bm::bit_block_set(tmp_block, 0);
6060 this->read_bic_arr(decoder_, tmp_block, block_type_);
6061 goto count_tmp;
6063 this->read_bic_arr_inv(decoder_, tmp_block);
6064 goto count_tmp;
6065 break;
6067 bm::bit_block_set(tmp_block, 0);
6068 this->read_digest0_block(decoder_, tmp_block);
6069 goto count_tmp;
6071 bm::bit_block_set(tmp_block, 0);
6072 this->read_bic_gap(decoder_, tmp_block);
6073 count_tmp:
6074 return bm::bit_operation_xor_count(dst_block, tmp_block);
6075 default:
6076 BM_ASSERT(0);
6077 #ifndef BM_NO_STL
6078 throw std::logic_error(this->err_msg());
6079 #else
6080 BM_THROW(BM_ERR_SERIALFORMAT);
6081 #endif
6082
6083 } // switch
6084 return count_adapter.sum();
6085}
6086
6087template<typename DEC, typename BLOCK_IDX>
6088unsigned
6090 bm::word_t* dst_block,
6091 bm::word_t* tmp_block)
6092{
6093 BM_ASSERT(this->state_ == e_bit_block);
6094 BM_ASSERT(dst_block);
6095
6096 bitblock_sum_adapter count_adapter;
6097 switch (block_type_)
6098 {
6099 case set_block_bit:
6100 {
6101 bitblock_get_adapter ga(dst_block);
6103
6104 bit_recomb(ga,
6105 decoder_,
6106 func,
6107 count_adapter
6108 );
6109 }
6110 break;
6111 case set_block_bit_0runs:
6112 {
6113 unsigned count = 0;
6114 unsigned char run_type = decoder_.get_8();
6115 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
6116 {
6117 unsigned run_length = decoder_.get_16();
6118 unsigned run_end = j + run_length;
6119 if (run_type)
6120 {
6121 for (;j < run_end; ++j)
6122 {
6124 count += word_bitcount(dst_block[j] & ~decoder_.get_32());
6125 }
6126 }
6127 else
6128 {
6129 for (;j < run_end; ++j)
6130 {
6132 count += bm::word_bitcount(dst_block[j]);
6133 }
6134 }
6135 } // for
6136 return count;
6137 }
6139 {
6140 unsigned head_idx = decoder_.get_16();
6141 unsigned tail_idx = decoder_.get_16();
6142 unsigned count = 0;
6143 unsigned i;
6144 for (i = 0; i < head_idx; ++i)
6145 count += bm::word_bitcount(dst_block[i]);
6146 for (i = head_idx; i <= tail_idx; ++i)
6147 count += bm::word_bitcount(dst_block[i] & (~decoder_.get_32()));
6148 for (i = tail_idx + 1; i < bm::set_block_size; ++i)
6149 count += bm::word_bitcount(dst_block[i]);
6150 return count;
6151 }
6152 break;
6153 case set_block_bit_1bit:
6154 //TODO: optimization
6155 case set_block_arrbit:
6156 get_arr_bit(tmp_block, true /* clear target*/);
6157 goto count_tmp;
6159 get_inv_arr(tmp_block);
6160 goto count_tmp;
6163 bm::bit_block_set(tmp_block, 0);
6164 this->read_bic_arr(decoder_, tmp_block, block_type_);
6165 goto count_tmp;
6167 this->read_bic_arr_inv(decoder_, tmp_block);
6168 goto count_tmp;
6170 bm::bit_block_set(tmp_block, 0);
6171 this->read_digest0_block(decoder_, tmp_block);
6172 goto count_tmp;
6174 bm::bit_block_set(tmp_block, 0);
6175 this->read_bic_gap(decoder_, tmp_block);
6176 count_tmp:
6177 return bm::bit_operation_sub_count(dst_block, tmp_block);
6178 default:
6179 BM_ASSERT(0);
6180 #ifndef BM_NO_STL
6181 throw std::logic_error(this->err_msg());
6182 #else
6183 BM_THROW(BM_ERR_SERIALFORMAT);
6184 #endif
6185
6186 } // switch
6187 return count_adapter.sum();
6188}
6189
6190template<typename DEC, typename BLOCK_IDX>
6191unsigned
6193 bm::word_t* dst_block,
6194 bm::word_t* tmp_block)
6195{
6196 BM_ASSERT(this->state_ == e_bit_block);
6197 BM_ASSERT(dst_block);
6198
6199 bitblock_sum_adapter count_adapter;
6200 switch (block_type_)
6201 {
6202 case set_block_bit:
6203 {
6204 bitblock_get_adapter ga(dst_block);
6206
6207 bit_recomb(ga,
6208 decoder_,
6209 func,
6210 count_adapter
6211 );
6212 }
6213 break;
6214 case set_block_bit_0runs:
6215 {
6216 unsigned count = 0;
6217 unsigned char run_type = decoder_.get_8();
6218 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
6219 {
6220 unsigned run_length = decoder_.get_16();
6221 unsigned run_end = j + run_length;
6222 if (run_type)
6223 {
6224 for (;j < run_end; ++j)
6225 {
6227 count += word_bitcount(decoder_.get_32() & (~dst_block[j]));
6228 }
6229 }
6230 else
6231 {
6232 j += run_length;
6233 }
6234 } // for
6235 return count;
6236 }
6238 {
6239 unsigned head_idx = decoder_.get_16();
6240 unsigned tail_idx = decoder_.get_16();
6241 unsigned count = 0;
6242 unsigned i;
6243 for (i = head_idx; i <= tail_idx; ++i)
6244 count += bm::word_bitcount(decoder_.get_32() & (~dst_block[i]));
6245 return count;
6246 }
6247 break;
6248 case set_block_bit_1bit:
6249 // TODO: optimization
6250 case set_block_arrbit:
6251 get_arr_bit(tmp_block, true /* clear target*/);
6252 goto count_tmp;
6254 get_inv_arr(tmp_block);
6255 goto count_tmp;
6258 bm::bit_block_set(tmp_block, 0);
6259 this->read_bic_arr(decoder_, tmp_block, block_type_);
6260 goto count_tmp;
6262 this->read_bic_arr_inv(decoder_, tmp_block);
6263 goto count_tmp;
6265 bm::bit_block_set(tmp_block, 0);
6266 this->read_digest0_block(decoder_, tmp_block);
6267 goto count_tmp;
6269 bm::bit_block_set(tmp_block, 0);
6270 this->read_bic_gap(decoder_, tmp_block);
6271 count_tmp:
6272 return bm::bit_operation_sub_count(tmp_block, dst_block);
6273 default:
6274 BM_ASSERT(0);
6275 #ifndef BM_NO_STL
6276 throw std::logic_error(this->err_msg());
6277 #else
6278 BM_THROW(BM_ERR_SERIALFORMAT);
6279 #endif
6280 } // switch
6281 return count_adapter.sum();
6282}
6283
6284
6285
6286template<typename DEC, typename BLOCK_IDX>
6288 bm::word_t* dst_block,
6289 bool clear_target) BMNOEXCEPT
6290{
6293
6294 gap_word_t len = decoder_.get_16(); // array length / 1bit_idx
6295 if (dst_block)
6296 {
6297 if (clear_target)
6298 bm::bit_block_set(dst_block, 0);
6299
6300 if (this->block_type_ == set_block_bit_1bit)
6301 {
6302 // len contains idx of 1 bit set
6303 set_bit(dst_block, len);
6304 return 1;
6305 }
6306
6307 for (unsigned k = 0; k < len; ++k)
6308 {
6309 gap_word_t bit_idx = decoder_.get_16();
6310 bm::set_bit(dst_block, bit_idx);
6311 }
6312 }
6313 else
6314 {
6315 if (this->block_type_ == set_block_bit_1bit)
6316 return 1; // nothing to do: len var already consumed 16 bits
6317
6318 // fwd the decode stream
6319 decoder_.seek(len * 2);
6320 }
6321 return len;
6322}
6323
6324template<typename DEC, typename BLOCK_IDX>
6326{
6328 ++(this->block_idx_);
6329 this->state_ = e_blocks;
6330
6331 return decoder_.get_16(); // 1bit_idx
6332}
6333
6334template<typename DEC, typename BLOCK_IDX>
6335void
6337{
6338 BM_ASSERT(this->state_ == e_gap_block ||
6340 BM_ASSERT(dst_block);
6341
6342 this->read_gap_block(this->decoder_,
6343 this->block_type_,
6344 dst_block,
6345 this->gap_head_);
6346
6347 ++(this->block_idx_);
6348 this->state_ = e_blocks;
6349}
6350
6351
6352template<typename DEC, typename BLOCK_IDX>
6353unsigned
6355 bm::word_t* dst_block,
6356 bm::word_t* tmp_block,
6357 set_operation op)
6358{
6359 BM_ASSERT(this->state_ == e_bit_block);
6360
6361 get_bit_func_type bit_func = bit_func_table_[op];
6362 BM_ASSERT(bit_func);
6363 unsigned cnt = ((*this).*(bit_func))(dst_block, tmp_block);
6364 this->state_ = e_blocks;
6365 ++(this->block_idx_);
6366 return cnt;
6367}
6368
6369// ----------------------------------------------------------------------
6370//
6371// ----------------------------------------------------------------------
6372
6373
6374template<class BV>
6376: temp_block_(0), ref_vect_(0)
6377{
6378 temp_block_ = alloc_.alloc_bit_block();
6379}
6380
6381
6382template<class BV>
6384{
6385 if (temp_block_)
6386 alloc_.free_bit_block(temp_block_);
6387}
6388
6389/**
6390 Utility function to process operation using temp vector
6391 @internal
6392 */
6393template<class BV>
6394typename BV::size_type process_operation(BV& bv,
6395 BV& bv_tmp,
6397{
6398 typename BV::size_type count = 0;
6399 switch (op)
6400 {
6401 case set_AND:
6402 bv.bit_and(bv_tmp, BV::opt_compress);
6403 break;
6404 case set_ASSIGN:
6405 bv.swap(bv_tmp);
6406 break;
6407 case set_OR:
6408 bv.bit_or(bv_tmp);
6409 break;
6410 case set_SUB:
6411 bv.bit_sub(bv_tmp);
6412 break;
6413 case set_XOR:
6414 bv.bit_xor(bv_tmp);
6415 break;
6416 case set_COUNT: case set_COUNT_B:
6417 count = bv_tmp.count();
6418 break;
6419 case set_COUNT_A:
6420 count = bv.count();
6421 break;
6422 case set_COUNT_AND:
6423 count = bm::count_and(bv, bv_tmp);
6424 break;
6425 case set_COUNT_XOR:
6426 count = bm::count_xor(bv, bv_tmp);
6427 break;
6428 case set_COUNT_OR:
6429 count = bm::count_or(bv, bv_tmp);
6430 break;
6431 case set_COUNT_SUB_AB:
6432 count = bm::count_sub(bv, bv_tmp);
6433 break;
6434 case set_COUNT_SUB_BA:
6435 count = bm::count_sub(bv_tmp, bv);
6436 break;
6437 case set_END:
6438 default:
6439 BM_ASSERT(0);
6440 #ifndef BM_NO_STL
6441 //throw std::logic_error(err_msg());
6442 #else
6443 BM_THROW(BM_ERR_SERIALFORMAT);
6444 #endif
6445 } // switch
6446
6447 return count;
6448}
6449
6450
6451template<class BV>
6453operation_deserializer<BV>::deserialize_xor(bvector_type& bv,
6454 const unsigned char* buf,
6455 set_operation op,
6456 bool /*exit_on_one*/)
6457{
6458 if (op == set_OR)
6459 {
6460 bm::deserialize(bv, buf, temp_block_, ref_vect_);
6461 return 0;
6462 }
6463 bvector_type bv_tmp;
6464 bm::deserialize(bv_tmp, buf, temp_block_, ref_vect_);
6465 return deserialize_xor(bv, bv_tmp, op);
6466}
6467
6468template<class BV>
6469void operation_deserializer<BV>::deserialize_xor_range(
6470 bvector_type& bv,
6471 const unsigned char* buf,
6472 size_type idx_from,
6473 size_type idx_to)
6474{
6475 bv.clear();
6476
6477 ByteOrder bo_current = globals<true>::byte_order();
6478
6479 bm::decoder dec(buf);
6480 unsigned char header_flag = dec.get_8();
6481 ByteOrder bo = bo_current;
6482 if (!(header_flag & BM_HM_NO_BO))
6483 {
6484 bo = (bm::ByteOrder) dec.get_8();
6485 }
6486
6487 if (bo_current == bo)
6488 {
6489 de_.set_ref_vectors(ref_vect_);
6490 de_.set_range(idx_from, idx_to);
6491 de_.deserialize(bv, buf);
6492 de_.reset();
6493 }
6494 else
6495 {
6496 switch (bo_current)
6497 {
6498 case BigEndian:
6499 {
6501 deserial.set_ref_vectors(ref_vect_);
6502 deserial.set_range(idx_from, idx_to);
6503 deserial.deserialize(bv, buf);
6504 }
6505 break;
6506 case LittleEndian:
6507 {
6509 deserial.set_ref_vectors(ref_vect_);
6510 deserial.set_range(idx_from, idx_to);
6511 deserial.deserialize(bv, buf);
6512 }
6513 break;
6514 default:
6515 BM_ASSERT(0);
6516 };
6517 }
6518 bv.keep_range_no_check(idx_from, idx_to);
6519}
6520
6521
6522
6523template<class BV>
6525operation_deserializer<BV>::deserialize_xor(bvector_type& bv,
6526 bvector_type& bv_tmp,
6527 set_operation op)
6528{
6529 size_type count = 0;
6530 switch (op)
6531 {
6532 case set_AND:
6534 break;
6535 case set_ASSIGN:
6536 bv.swap(bv_tmp);
6537 break;
6538 case set_SUB:
6539 bv -= bv_tmp;
6540 break;
6541 case set_XOR:
6542 bv ^= bv_tmp;
6543 break;
6544 case set_COUNT: case set_COUNT_B:
6545 count = bv_tmp.count();
6546 break;
6547 case set_COUNT_A:
6548 return bv.count();
6549 case set_COUNT_AND:
6550 count += bm::count_and(bv, bv_tmp);
6551 break;
6552 case set_COUNT_XOR:
6553 count += bm::count_xor(bv, bv_tmp);
6554 break;
6555 case set_COUNT_OR:
6556 count += bm::count_or(bv, bv_tmp);
6557 break;
6558 case set_COUNT_SUB_AB:
6559 count += bm::count_sub(bv, bv_tmp);
6560 break;
6561 case set_COUNT_SUB_BA:
6562 count += bm::count_sub(bv_tmp, bv);
6563 break;
6564 case set_END:
6565 default:
6566 BM_ASSERT(0);
6567 #ifndef BM_NO_STL
6568 throw std::logic_error("BM: serialization error");
6569 #else
6570 BM_THROW(BM_ERR_SERIALFORMAT);
6571 #endif
6572 } // switch
6573
6574 return count;
6575}
6576
6577
6578
6579template<class BV>
6582 const unsigned char* buf,
6583 set_operation op,
6584 bool exit_on_one)
6585{
6586 ByteOrder bo_current = globals<true>::byte_order();
6587 bm::decoder dec(buf);
6588 unsigned char header_flag = dec.get_8();
6589
6590 if (header_flag & BM_HM_HXOR) // XOR compression
6591 {
6592 BM_ASSERT(ref_vect_); // reference vector must be set
6593 return deserialize_xor(bv, buf, op, exit_on_one);
6594 }
6595
6596 ByteOrder bo = bo_current;
6597 if (!(header_flag & BM_HM_NO_BO))
6598 bo = (bm::ByteOrder) dec.get_8();
6599
6600 if (op == bm::set_ASSIGN)
6601 {
6602 bv.clear(true);
6603 op = bm::set_OR;
6604 }
6605
6606 if (header_flag & BM_HM_SPARSE)
6607 {
6608 size_type count = 0;
6609 if (bo_current == bo)
6610 {
6611 if (op == bm::set_OR)
6612 {
6613 de_.reset();
6614 de_.deserialize(bv, buf);
6615 }
6616 else
6617 {
6618 bv_tmp_.clear(true);
6619 bv_tmp_.set_new_blocks_strat(bm::BM_GAP);
6620 de_.reset();
6621 de_.deserialize(bv_tmp_, buf);
6622 count = bm::process_operation(bv, bv_tmp_, op);
6623 }
6624 }
6625 else
6626 {
6627 // TODO: implement byte-order aware sparse deserialization
6628 BM_ASSERT(0);
6629 }
6630 return count;
6631 }
6632
6633 if (bo_current == bo)
6634 {
6635 serial_stream_current ss(buf);
6636 return it_d_.deserialize(bv, ss, temp_block_, op, exit_on_one);
6637 }
6638 switch (bo_current)
6639 {
6640 case BigEndian:
6641 {
6642 serial_stream_be ss(buf);
6643 return it_d_be_.deserialize(bv, ss, temp_block_, op, exit_on_one);
6644 }
6645 case LittleEndian:
6646 {
6647 serial_stream_le ss(buf);
6648 return it_d_le_.deserialize(bv, ss, temp_block_, op, exit_on_one);
6649 }
6650 default:
6651 BM_ASSERT(0);
6652 #ifndef BM_NO_STL
6653 throw std::logic_error("BM::platform error: unknown endianness");
6654 #else
6655 BM_THROW(BM_ERR_SERIALFORMAT);
6656 #endif
6657 };
6658}
6659
6660template<class BV>
6662 bvector_type& bv,
6663 const unsigned char* buf,
6664 size_type idx_from,
6665 size_type idx_to)
6666{
6667 ByteOrder bo_current = globals<true>::byte_order();
6668 bm::decoder dec(buf);
6669 unsigned char header_flag = dec.get_8();
6670 ByteOrder bo = bo_current;
6671 if (!(header_flag & BM_HM_NO_BO))
6672 bo = (bm::ByteOrder) dec.get_8();
6673
6674 // check if it is empty fresh vector, set the range then
6675 //
6676 blocks_manager_type& bman = bv.get_blocks_manager();
6677 if (!bman.is_init())
6678 bv.set_range(idx_from, idx_to);
6679 else
6680 {} // assume that the target range set outside the call
6681
6682
6683 if (header_flag & BM_HM_HXOR) // XOR compression
6684 {
6685 BM_ASSERT(ref_vect_);
6686 bvector_type bv_tmp;
6687 deserialize_xor_range(bv_tmp, buf, idx_from, idx_to);
6688 if (bv.any())
6689 {
6690 bv.bit_and(bv_tmp, bvector_type::opt_compress);
6691 }
6692 else
6693 {
6694 bv.swap(bv_tmp);
6695 }
6696 return;
6697 }
6698
6699 if (header_flag & BM_HM_SPARSE)
6700 {
6701 if (bo_current == bo)
6702 {
6703 bv_tmp_.clear(true);
6704 bv_tmp_.set_new_blocks_strat(bm::BM_GAP);
6705 de_.reset();
6706 de_.set_range(idx_from, idx_to);
6707 de_.deserialize(bv_tmp_, buf);
6708 de_.reset();
6709
6710 if (bv.any())
6711 {
6712 bv.bit_and(bv_tmp_, bvector_type::opt_compress);
6713 }
6714 else
6715 {
6716 bv.swap(bv_tmp_);
6717 }
6718
6719 }
6720 else
6721 {
6722 // TODO: implement byte-order aware sparse deserialization
6723 BM_ASSERT(0);
6724 }
6725 return;
6726 }
6727
6728 const bm::set_operation op = bm::set_AND;
6729
6730 if (bo_current == bo)
6731 {
6732 serial_stream_current ss(buf);
6733 it_d_.set_range(idx_from, idx_to);
6734 it_d_.deserialize(bv, ss, temp_block_, op, false);
6735 it_d_.unset_range();
6736 return;
6737 }
6738 switch (bo_current)
6739 {
6740 case BigEndian:
6741 {
6742 serial_stream_be ss(buf);
6743 it_d_be_.set_range(idx_from, idx_to);
6744 it_d_be_.deserialize(bv, ss, temp_block_, op, false);
6745 it_d_be_.unset_range();
6746 return;
6747 }
6748 case LittleEndian:
6749 {
6750 serial_stream_le ss(buf);
6751 it_d_le_.set_range(idx_from, idx_to);
6752 it_d_le_.deserialize(bv, ss, temp_block_, op, false);
6753 it_d_le_.unset_range();
6754 return;
6755 }
6756 default:
6757 BM_ASSERT(0);
6758 #ifndef BM_NO_STL
6759 throw std::logic_error("BM::platform error: unknown endianness");
6760 #else
6761 BM_THROW(BM_ERR_SERIALFORMAT);
6762 #endif
6763 };
6764 return;
6765}
6766
6767
6768
6769// ------------------------------------------------------------------
6770
6771template<class BV, class SerialIterator>
6773 size_type from, size_type to)
6774{
6775 is_range_set_ = true;
6776 nb_range_from_ = (from >> bm::set_block_shift);
6777 nb_range_to_ = (to >> bm::set_block_shift);
6778}
6779
6780
6781template<class BV, class SerialIterator>
6782void iterator_deserializer<BV, SerialIterator>::load_id_list(
6783 bvector_type& bv,
6784 serial_iterator_type& sit,
6785 unsigned id_count,
6786 bool set_clear)
6787{
6788 const unsigned win_size = 64;
6789 bm::id_t id_buffer[win_size+1];
6790
6791 if (set_clear) // set bits
6792 {
6793 for (unsigned i = 0; i <= id_count;)
6794 {
6795 unsigned j;
6796 for (j = 0; j < win_size && i <= id_count; ++j, ++i)
6797 {
6798 id_buffer[j] = sit.get_id();
6799 sit.next();
6800 } // for j
6801 bm::combine_or(bv, id_buffer, id_buffer + j);
6802 } // for i
6803 }
6804 else // clear bits
6805 {
6806 for (unsigned i = 0; i <= id_count;)
6807 {
6808 unsigned j;
6809 for (j = 0; j < win_size && i <= id_count; ++j, ++i)
6810 {
6811 id_buffer[j] = sit.get_id();
6812 sit.next();
6813 } // for j
6814 bm::combine_sub(bv, id_buffer, id_buffer + j);
6815 } // for i
6816 }
6817}
6818
6819template<class BV, class SerialIterator>
6821iterator_deserializer<BV, SerialIterator>::finalize_target_vector(
6822 blocks_manager_type& bman,
6823 set_operation op,
6824 size_type bv_block_idx)
6825{
6826 size_type count = 0;
6827 switch (op)
6828 {
6829 case set_OR: case set_SUB: case set_XOR:
6830 case set_COUNT: case set_COUNT_B: case set_COUNT_AND:
6831 case set_COUNT_SUB_BA:
6832 // nothing to do
6833 break;
6834 case set_ASSIGN: case set_AND:
6835 {
6836 block_idx_type nblock_last = (bm::id_max >> bm::set_block_shift);
6837 if (bv_block_idx <= nblock_last)
6838 bman.set_all_zero(bv_block_idx, nblock_last); // clear the tail
6839 }
6840 break;
6841 case set_COUNT_A: case set_COUNT_OR: case set_COUNT_XOR:
6842 case set_COUNT_SUB_AB:
6843 // count bits in the target vector
6844 {
6845 unsigned i, j;
6846 bm::get_block_coord(bv_block_idx, i, j);
6847 bm::word_t*** blk_root = bman.top_blocks_root();
6848 unsigned top_size = bman.top_block_size();
6849 for (;i < top_size; ++i)
6850 {
6851 bm::word_t** blk_blk = blk_root[i];
6852 if (blk_blk == 0)
6853 {
6854 bv_block_idx+=bm::set_sub_array_size-j;
6855 j = 0;
6856 continue;
6857 }
6858 // TODO: optimize for FULL top level
6859 for (; j < bm::set_sub_array_size; ++j, ++bv_block_idx)
6860 {
6861 if (blk_blk[j])
6862 count += bman.block_bitcount(blk_blk[j]);
6863 } // for j
6864 j = 0;
6865 } // for i
6866 }
6867 break;
6868 case set_END:
6869 default:
6870 BM_ASSERT(0);
6871 #ifndef BM_NO_STL
6872 throw std::logic_error(err_msg());
6873 #else
6874 BM_THROW(BM_ERR_SERIALFORMAT);
6875 #endif
6876 }
6877 return count;
6878}
6879
6880template<class BV, class SerialIterator>
6882iterator_deserializer<BV, SerialIterator>::process_id_list(
6883 bvector_type& bv,
6884 serial_iterator_type& sit,
6885 set_operation op)
6886{
6887 size_type count = 0;
6888 unsigned id_count = sit.get_id_count();
6889 bool set_clear = true;
6890 switch (op)
6891 {
6892 case set_AND:
6893 {
6894 // TODO: use some more optimal algorithm without temp vector
6895 BV bv_tmp(BM_GAP);
6896 load_id_list(bv_tmp, sit, id_count, true);
6897 bv &= bv_tmp;
6898 }
6899 break;
6900 case set_ASSIGN:
6901 BM_ASSERT(0);
6903 // fall through
6904 case set_OR:
6905 set_clear = true;
6907 // fall through
6908 case set_SUB:
6909 load_id_list(bv, sit, id_count, set_clear);
6910 break;
6911 case set_XOR:
6912 for (unsigned i = 0; i < id_count; ++i)
6913 {
6914 bm::id_t id = sit.get_id();
6915 bv[id] ^= true;
6916 sit.next();
6917 } // for
6918 break;
6919 case set_COUNT: case set_COUNT_B:
6920 for (unsigned i = 0; i < id_count; ++i)
6921 {
6922 /* bm::id_t id = */ sit.get_id();
6923 ++count;
6924 sit.next();
6925 } // for
6926 break;
6927 case set_COUNT_A:
6928 return bv.count();
6929 case set_COUNT_AND:
6930 for (size_type i = 0; i < id_count; ++i)
6931 {
6932 bm::id_t id = sit.get_id();
6933 count += bv.get_bit(id);
6934 sit.next();
6935 } // for
6936 break;
6937 case set_COUNT_XOR:
6938 {
6939 // TODO: get rid of the temp vector
6940 BV bv_tmp(BM_GAP);
6941 load_id_list(bv_tmp, sit, id_count, true);
6942 count += bm::count_xor(bv, bv_tmp);
6943 }
6944 break;
6945 case set_COUNT_OR:
6946 {
6947 // TODO: get rid of the temp. vector
6948 BV bv_tmp(BM_GAP);
6949 load_id_list(bv_tmp, sit, id_count, true);
6950 count += bm::count_or(bv, bv_tmp);
6951 }
6952 break;
6953 case set_COUNT_SUB_AB:
6954 {
6955 // TODO: get rid of the temp. vector
6956 BV bv_tmp(bv);
6957 load_id_list(bv_tmp, sit, id_count, false);
6958 count += bv_tmp.count();
6959 }
6960 break;
6961 case set_COUNT_SUB_BA:
6962 {
6963 BV bv_tmp(BM_GAP);
6964 load_id_list(bv_tmp, sit, id_count, true);
6965 count += bm::count_sub(bv_tmp, bv);
6966 }
6967 break;
6968 case set_END:
6969 default:
6970 BM_ASSERT(0);
6971 #ifndef BM_NO_STL
6972 throw std::logic_error(err_msg());
6973 #else
6974 BM_THROW(BM_ERR_SERIALFORMAT);
6975 #endif
6976 } // switch
6977
6978 return count;
6979}
6980
6981
6982template<class BV, class SerialIterator>
6985 bvector_type& bv,
6987 bm::word_t* temp_block,
6988 set_operation op,
6989 bool exit_on_one)
6990{
6991 BM_ASSERT(temp_block);
6992
6993 size_type count = 0;
6994 gap_word_t gap_temp_block[bm::gap_equiv_len * 4];
6995 gap_temp_block[0] = 0;
6996
6997 blocks_manager_type& bman = bv.get_blocks_manager();
6998 if (!bman.is_init())
6999 bman.init_tree();
7000
7001 if (sit.bv_size() && (sit.bv_size() > bv.size()))
7002 bv.resize(sit.bv_size());
7003
7004 typename serial_iterator_type::iterator_state state;
7005 state = sit.get_state();
7006 if (state == serial_iterator_type::e_list_ids)
7007 {
7008 count = process_id_list(bv, sit, op);
7009 return count;
7010 }
7011
7012 size_type bv_block_idx = 0;
7013 size_type empty_op_cnt = 0; // counter for empty target operations
7014
7015 for (;1;)
7016 {
7017 bm::set_operation sop = op;
7018 if (sit.is_eof()) // argument stream ended
7019 {
7020 count += finalize_target_vector(bman, op, bv_block_idx);
7021 return count;
7022 }
7023
7024 state = sit.state();
7025 switch (state)
7026 {
7027 case serial_iterator_type::e_blocks:
7028 sit.next();
7029
7030 // try to skip forward
7031 //
7032 if (is_range_set_ && (bv_block_idx < nb_range_from_))
7033 {
7034 // TODO: use saved bookmark block-idx
7035 bool skip_flag = sit.try_skip(bv_block_idx, nb_range_from_);
7036 if (skip_flag)
7037 {
7038 bv_block_idx = sit.block_idx();
7039 BM_ASSERT(bv_block_idx <= nb_range_from_);
7040 BM_ASSERT(sit.state() == serial_iterator_type::e_blocks);
7041 }
7042 }
7043 continue;
7044 case serial_iterator_type::e_bit_block:
7045 {
7046 BM_ASSERT(sit.block_idx() == bv_block_idx);
7047 unsigned i0, j0;
7048 bm::get_block_coord(bv_block_idx, i0, j0);
7049 bm::word_t* blk = bman.get_block_ptr(i0, j0);
7050 if (!blk)
7051 {
7052 switch (op)
7053 {
7054 case set_AND: case set_SUB: case set_COUNT_AND:
7055 case set_COUNT_SUB_AB: case set_COUNT_A:
7056 // one arg is 0, so the operation gives us zero
7057 // all we need to do is to seek the input stream
7058 sop = set_ASSIGN;
7059 break;
7060
7061 case set_OR: case set_XOR: case set_ASSIGN:
7062 blk = bman.make_bit_block(bv_block_idx);
7063 break;
7064
7065 case set_COUNT: case set_COUNT_XOR: case set_COUNT_OR:
7066 case set_COUNT_SUB_BA: case set_COUNT_B:
7067 // first arg is not required (should work as is)
7068 sop = set_COUNT;
7069 break;
7070
7071 case set_END:
7072 default:
7073 BM_ASSERT(0);
7074 #ifndef BM_NO_STL
7075 throw std::logic_error(err_msg());
7076 #else
7077 BM_THROW(BM_ERR_SERIALFORMAT);
7078 #endif
7079 }
7080 }
7081 else // block exists
7082 {
7083 int is_gap = BM_IS_GAP(blk);
7084 if (is_gap || IS_FULL_BLOCK(blk))
7085 {
7086 if (IS_FULL_BLOCK(blk) && is_const_set_operation(op))
7087 {
7089 }
7090 else
7091 {
7092 // TODO: make sure const operations do not
7093 // deoptimize GAP blocks
7094 blk = bman.deoptimize_block(bv_block_idx);
7095 }
7096 }
7097 }
7098
7099 // 2 bit-blocks recombination
7100 unsigned c = sit.get_bit_block(blk, temp_block, sop);
7101 count += c;
7102 if (exit_on_one && count) // early exit
7103 return count;
7104 switch (op) // target block optimization for non-const operations
7105 {
7106 case set_AND: case set_SUB: case set_XOR: case set_OR:
7107 bman.optimize_bit_block(i0, j0, bvector_type::opt_compress);
7108 break;
7109 default: break;
7110 } // switch
7111
7112 }
7113 break;
7114
7115 case serial_iterator_type::e_zero_blocks:
7116 {
7117 BM_ASSERT(bv_block_idx == sit.block_idx());
7118
7119 switch (op)
7120 {
7121 case set_ASSIGN: // nothing to do to rewind fwd
7122 case set_SUB: case set_COUNT_AND: case set_OR:
7123 case set_XOR: case set_COUNT_SUB_BA: case set_COUNT_B:
7124 bv_block_idx = sit.skip_mono_blocks();
7125 continue;
7126
7127 case set_AND: // clear the range
7128 {
7129 size_type nb_start = bv_block_idx;
7130 bv_block_idx = sit.skip_mono_blocks();
7131 bman.set_all_zero(nb_start, bv_block_idx-1);
7132 }
7133 continue;
7134 case set_END:
7135 default:
7136 break;
7137 } // switch op
7138
7139
7140 unsigned i0, j0;
7141 bm::get_block_coord(bv_block_idx, i0, j0);
7142 bm::word_t* blk = bman.get_block_ptr(i0, j0);
7143
7144 sit.next();
7145
7146 if (blk)
7147 {
7148 switch (op)
7149 {
7150 case set_AND: case set_ASSIGN:
7151 // the result is 0
7152 //blk =
7153 bman.zero_block(bv_block_idx);
7154 break;
7155
7156 case set_SUB: case set_COUNT_AND: case set_OR:
7157 case set_XOR: case set_COUNT_SUB_BA: case set_COUNT_B:
7158 // nothing to do
7159 break;
7160
7161 case set_COUNT_SUB_AB: case set_COUNT_A: case set_COUNT_OR:
7162 case set_COUNT: case set_COUNT_XOR:
7163 // valid bit block recombined with 0 block
7164 // results with same block data
7165 // all we need is to bitcount bv block
7166 {
7167 count += blk ? bman.block_bitcount(blk) : 0;
7168 if (exit_on_one && count) // early exit
7169 return count;
7170 }
7171 break;
7172 case set_END:
7173 default:
7174 BM_ASSERT(0);
7175 } // switch op
7176 } // if blk
7177 }
7178 break;
7179
7180 case serial_iterator_type::e_one_blocks:
7181 {
7182 BM_ASSERT(bv_block_idx == sit.block_idx());
7183 unsigned i0, j0;
7184 bm::get_block_coord(bv_block_idx, i0, j0);
7185 bm::word_t* blk = bman.get_block_ptr(i0, j0);
7186
7187 sit.next();
7188
7189 switch (op)
7190 {
7191 case set_OR: case set_ASSIGN:
7192 bman.set_block_all_set(bv_block_idx);
7193 break;
7194 case set_COUNT_OR: case set_COUNT_B: case set_COUNT:
7195 // block becomes all set
7196 count += bm::bits_in_block;
7197 break;
7198 case set_SUB:
7199 //blk =
7200 bman.zero_block(bv_block_idx);
7201 break;
7202 case set_COUNT_SUB_AB: case set_AND:
7203 // nothing to do, check maybe nothing else to do at all
7204 if (++empty_op_cnt > 64)
7205 {
7206 size_type last_id;
7207 bool b = bv.find_reverse(last_id);
7208 if (!b)
7209 return count;
7210 size_type last_nb = (last_id >> bm::set_block_shift);
7211 if (last_nb < bv_block_idx)
7212 return count; // early exit, nothing to do here
7213 empty_op_cnt = 0;
7214 }
7215 break;
7216 case set_COUNT_AND: case set_COUNT_A:
7217 count += blk ? bman.block_bitcount(blk) : 0;
7218 break;
7219 default:
7220 if (blk)
7221 {
7222 switch (op)
7223 {
7224 case set_XOR:
7225 blk = bman.deoptimize_block(bv_block_idx);
7227 break;
7228 case set_COUNT_XOR:
7229 {
7230 count +=
7232 blk,
7235 }
7236 break;
7237 case set_COUNT_SUB_BA:
7238 {
7239 count +=
7241 blk,
7244 }
7245 break;
7246 default:
7247 BM_ASSERT(0);
7248 } // switch
7249 }
7250 else // blk == 0
7251 {
7252 switch (op)
7253 {
7254 case set_XOR:
7255 // 0 XOR 1 = 1
7256 bman.set_block_all_set(bv_block_idx);
7257 break;
7258 case set_COUNT_XOR:
7259 count += bm::bits_in_block;
7260 break;
7261 case set_COUNT_SUB_BA:
7262 // 1 - 0 = 1
7263 count += bm::bits_in_block;
7264 break;
7265 default:
7266 break;
7267 } // switch
7268 } // else
7269 } // switch
7270 if (exit_on_one && count) // early exit
7271 return count;
7272 }
7273 break;
7274
7275 case serial_iterator_type::e_gap_block:
7276 {
7277 BM_ASSERT(bv_block_idx == sit.block_idx());
7278
7279 unsigned i0, j0;
7280 bm::get_block_coord(bv_block_idx, i0, j0);
7281 const bm::word_t* blk = bman.get_block(i0, j0);
7282
7283 sit.get_gap_block(gap_temp_block);
7284
7285 unsigned len = gap_length(gap_temp_block);
7286 int level = gap_calc_level(len, bman.glen());
7287 --len;
7288
7289 bool const_op = is_const_set_operation(op);
7290 if (const_op)
7291 {
7292 distance_metric metric = operation2metric(op);
7293 bm::word_t* gptr = (bm::word_t*)gap_temp_block;
7294 BMSET_PTRGAP(gptr);
7295 unsigned c =
7297 blk,
7298 gptr,
7299 metric);
7300 count += c;
7301 if (exit_on_one && count) // early exit
7302 return count;
7303
7304 }
7305 else // non-const set operation
7306 {
7307 if ((sop == set_ASSIGN) && blk) // target block override
7308 {
7309 bman.zero_block(bv_block_idx);
7310 sop = set_OR;
7311 }
7312 if (blk == 0) // target block not found
7313 {
7314 switch (sop)
7315 {
7316 case set_AND: case set_SUB:
7317 break;
7318 case set_OR: case set_XOR: case set_ASSIGN:
7319 bman.set_gap_block(
7320 bv_block_idx, gap_temp_block, level);
7321 break;
7322
7323 default:
7324 BM_ASSERT(0);
7325 } // switch
7326 }
7327 else // target block exists
7328 {
7329 bm::operation bop = bm::setop2op(op);
7330 if (level == -1) // too big to GAP
7331 {
7332 gap_convert_to_bitset(temp_block, gap_temp_block);
7333 bv.combine_operation_with_block(bv_block_idx,
7334 temp_block,
7335 0, // BIT
7336 bop);
7337 }
7338 else // GAP fits
7339 {
7340 set_gap_level(gap_temp_block, level);
7341 bv.combine_operation_with_block(
7342 bv_block_idx,
7343 (bm::word_t*)gap_temp_block,
7344 1, // GAP
7345 bop);
7346 }
7347 }
7348 if (exit_on_one)
7349 {
7350 bm::get_block_coord(bv_block_idx, i0, j0);
7351 blk = bman.get_block_ptr(i0, j0);
7352 if (blk)
7353 {
7354 bool z = bm::check_block_zero(blk, true/*deep check*/);
7355 if (!z)
7356 return 1;
7357 }
7358 } // if exit_on_one
7359
7360 } // if else non-const op
7361 }
7362 break;
7363
7364 default:
7365 BM_ASSERT(0);
7366 #ifndef BM_NO_STL
7367 throw std::logic_error(err_msg());
7368 #else
7369 BM_THROW(BM_ERR_SERIALFORMAT);
7370 #endif
7371 } // switch
7372
7373 ++bv_block_idx;
7374 BM_ASSERT(bv_block_idx);
7375
7376 if (is_range_set_ && (bv_block_idx > nb_range_to_))
7377 break;
7378
7379 } // for (deserialization)
7380
7381 return count;
7382}
7383
7384
7385
7386} // namespace bm
7387
7388
7389#ifdef _MSC_VER
7390#pragma warning( pop )
7391#endif
7392
7393
7394#endif
Algorithms for bvector<> (main include).
Definitions(internal).
#define IS_FULL_BLOCK(addr)
Definition bmdef.h:162
#define IS_VALID_ADDR(addr)
Definition bmdef.h:161
#define BM_FALLTHROUGH
Definition bmdef.h:358
#define BMRESTRICT
Definition bmdef.h:203
#define BMNOEXCEPT
Definition bmdef.h:82
#define BMPTR_SETBIT0(ptr)
Definition bmdef.h:177
#define BMGAP_PTR(ptr)
Definition bmdef.h:189
#define BM_IS_GAP(ptr)
Definition bmdef.h:191
#define BMSET_PTRGAP(ptr)
Definition bmdef.h:190
#define BM_ASSERT
Definition bmdef.h:139
#define FULL_BLOCK_FAKE_ADDR
Definition bmdef.h:158
#define FULL_BLOCK_REAL_ADDR
Definition bmdef.h:157
Bit manipulation primitives (internal).
#define BM_SER_NEXT_GRP(enc, nb, B_1ZERO, B_8ZERO, B_16ZERO, B_32ZERO, B_64ZERO)
Definition bmserial.h:2567
Utilities for bit transposition (internal) (experimental!).
Bit manipulation primitives (internal).
Functions and utilities for XOR filters (internal).
Byte based reader for un-aligned bit streaming.
Definition encoding.h:257
unsigned gamma() BMNOEXCEPT
decode unsigned value using Elias Gamma coding
Definition encoding.h:1792
void bic_decode_u16(bm::gap_word_t *arr, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
Binary Interpolative array decode.
Definition encoding.h:275
void bic_decode_u16_bitset(bm::word_t *block, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
Definition encoding.h:282
void bic_decode_u32_cm(bm::word_t *arr, unsigned sz, bm::word_t lo, bm::word_t hi) BMNOEXCEPT
Binary Interpolative array decode (32-bit).
Definition encoding.h:1516
void bic_decode_u16_dry(unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
Definition encoding.h:288
Byte based writer for un-aligned bit streaming.
Definition encoding.h:183
void flush() BMNOEXCEPT
Flush the incomplete 32-bit accumulator word.
Definition encoding.h:230
void bic_encode_u32_cm(const bm::word_t *arr, unsigned sz, bm::word_t lo, bm::word_t hi) BMNOEXCEPT
Binary Interpolative encoding (array of 32-bit ints) cm - "center-minimal".
Definition encoding.h:1294
void gamma(unsigned value) BMNOEXCEPT
Elias Gamma encode the specified value.
Definition encoding.h:1187
void bic_encode_u16(const bm::gap_word_t *arr, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
Binary Interpolative array decode.
Definition encoding.h:207
Bit-block get adapter, takes bitblock and represents it as a get_32() accessor function.
Definition bmfunc.h:9360
Bit-block sum adapter, takes values and sums it /internal.
Definition bmfunc.h:9390
bm::word_t sum() const BMNOEXCEPT
Get accumulated sum.
Definition bmfunc.h:9396
List of reference bit-vectors with their true index associations.
Definition bmxor.h:624
@ opt_compress
compress blocks when possible (GAP/prefix sum)
Definition bm.h:137
size_type count() const BMNOEXCEPT
population count (count of ON bits)
Definition bm.h:2401
bm::bvector< Alloc > & bit_and(const bm::bvector< Alloc > &bv1, const bm::bvector< Alloc > &bv2, typename bm::bvector< Alloc >::optmode opt_mode=opt_none)
3-operand AND : this := bv1 AND bv2
Definition bm.h:6118
blocks_manager< Alloc > blocks_manager_type
Definition bm.h:119
bvector_size_type size_type
Definition bm.h:121
void swap(bvector< Alloc > &bvect) BMNOEXCEPT
Exchanges content of bv and this bvector.
Definition bm.h:3966
blocks_manager_type::block_idx_type block_idx_type
Definition bm.h:120
void clear(const size_type *ids, size_type ids_size, bm::sort_order so=bm::BM_UNKNOWN)
clear list of bits in this bitset
Definition bm.h:4149
Alloc allocator_type
Definition bm.h:117
bool get_bit(size_type n) const BMNOEXCEPT
returns true if bit n is set and false is bit n is 0.
Definition bm.h:3602
const unsigned char * get_pos() const BMNOEXCEPT
Return current buffer pointer.
Definition encoding.h:105
unsigned char get_8() BMNOEXCEPT
Reads character from the decoding buffer.
Definition encoding.h:93
void set_pos(const unsigned char *pos) BMNOEXCEPT
Set current buffer pointer.
Definition encoding.h:108
Class for decoding data from memory buffer.
Definition encoding.h:126
bm::word_t get_32() BMNOEXCEPT
Reads 32-bit word from the decoding buffer.
Definition encoding.h:751
bm::word_t get_24() BMNOEXCEPT
Reads 32-bit word from the decoding buffer.
Definition encoding.h:738
bm::id64_t get_64() BMNOEXCEPT
Reads 64-bit word from the decoding buffer.
Definition encoding.h:786
bm::short_t get_16() BMNOEXCEPT
Reads 16-bit word from the decoding buffer.
Definition encoding.h:722
bm::id64_t get_48() BMNOEXCEPT
Reads 64-bit word from the decoding buffer.
Definition encoding.h:769
static const char * err_msg() BMNOEXCEPT
Definition bmserial.h:545
static void read_0runs_block(decoder_type &decoder, bm::word_t *blk) BMNOEXCEPT
read bit-block encoded as runs
Definition bmserial.h:3518
void read_bic_arr_inv(decoder_type &decoder, bm::word_t *blk) BMNOEXCEPT
Read inverted binary interpolated list into a bit-set.
Definition bmserial.h:3445
block_idx_type try_skip(decoder_type &decoder, block_idx_type nb, block_idx_type expect_nb) BMNOEXCEPT
Try to skip if skip bookmark is available within reach.
Definition bmserial.h:3676
void read_digest0_block(decoder_type &decoder, bm::word_t *blk) BMNOEXCEPT
Read digest0-type bit-block.
Definition bmserial.h:3478
void read_bic_gap(decoder_type &decoder, bm::word_t *blk) BMNOEXCEPT
Read binary interpolated gap blocks into a bitset.
Definition bmserial.h:3455
bm::bit_in< DEC > bit_in_type
Definition bmserial.h:498
unsigned read_bic_sb_arr(decoder_type &decoder, unsigned block_type, unsigned *dst_arr, unsigned *sb_idx)
Read list of bit ids for super-blocks.
Definition bmserial.h:3375
void read_gap_block(decoder_type &decoder, unsigned block_type, bm::gap_word_t *dst_block, bm::gap_word_t &gap_head)
Read GAP block from the stream.
Definition bmserial.h:3549
unsigned read_id_list(decoder_type &decoder, unsigned block_type, bm::gap_word_t *dst_arr)
Read list of bit ids.
Definition bmserial.h:3252
BLOCK_IDX block_idx_type
Definition bmserial.h:497
void read_bic_arr(decoder_type &decoder, bm::word_t *blk, unsigned block_type) BMNOEXCEPT
Read binary interpolated list into a bit-set.
Definition bmserial.h:3337
Deserializer for bit-vector.
Definition bmserial.h:570
void reset() BMNOEXCEPT
reset range deserialization and reference vectors
Definition bmserial.h:624
void set_ref_vectors(const bv_ref_vector_type *ref_vect)
Attach collection of reference vectors for XOR de-serialization (no transfer of ownership for the poi...
Definition bmserial.h:3779
allocator_type::allocator_pool_type allocator_pool_type
Definition bmserial.h:665
void xor_decode(blocks_manager_type &bman)
Definition bmserial.h:4622
deseriaizer_base< DEC, block_idx_type > parent_type
Definition bmserial.h:576
bm::bv_ref_vector< BV > bv_ref_vector_type
Definition bmserial.h:578
void decode_arr_sblock(unsigned char btype, decoder_type &dec, bvector_type &bv)
void deserialize_gap(unsigned char btype, decoder_type &dec, bvector_type &bv, blocks_manager_type &bman, block_idx_type nb, bm::word_t *blk)
bvector_type::allocator_type allocator_type
Definition bmserial.h:573
parent_type::decoder_type decoder_type
Definition bmserial.h:577
void xor_reset() BMNOEXCEPT
Definition bmserial.h:4738
BV::size_type size_type
Definition bmserial.h:574
void set_range(size_type from, size_type to) BMNOEXCEPT
set deserialization range [from, to] This is NOT exact, approximate range, content outside range is n...
Definition bmserial.h:610
void unset_range() BMNOEXCEPT
Disable range deserialization.
Definition bmserial.h:619
BV::blocks_manager_type blocks_manager_type
Definition bmserial.h:627
size_t deserialize(bvector_type &bv, const unsigned char *buf, bm::word_t *temp_block=0)
Definition bmserial.h:4122
bvector_type::block_idx_type block_idx_type
Definition bmserial.h:575
void decode_arrbit(decoder_type &dec, bvector_type &bv, block_idx_type nb, bm::word_t *blk)
void xor_decode_chain(bm::word_t *BMRESTRICT blk) BMNOEXCEPT
Definition bmserial.h:4591
void decode_bit_block(unsigned char btype, decoder_type &dec, blocks_manager_type &bman, block_idx_type nb, bm::word_t *blk)
void decode_block_bit(decoder_type &dec, bvector_type &bv, block_idx_type nb, bm::word_t *blk)
void decode_block_bit_interval(decoder_type &dec, bvector_type &bv, block_idx_type nb, bm::word_t *blk)
bm::heap_vector< bm::gap_word_t, allocator_type, true > block_arridx_type
Definition bmserial.h:663
bm::heap_vector< bm::word_t, allocator_type, true > sblock_arridx_type
Definition bmserial.h:664
Memory encoding.
Definition encoding.h:50
unsigned char * position_type
Definition encoding.h:52
size_t size() const BMNOEXCEPT
Returns size of the current encoding stream.
Definition encoding.h:529
void put_64(bm::id64_t w) BMNOEXCEPT
Puts 64 bits word into encoding buffer.
Definition encoding.h:606
void put_8(unsigned char c) BMNOEXCEPT
Puts one character into the encoding buffer.
Definition encoding.h:434
void set_pos(unsigned char *buf_pos) BMNOEXCEPT
Set current memory stream position.
Definition encoding.h:545
void put_32(bm::word_t w) BMNOEXCEPT
Puts 32 bits word into encoding buffer.
Definition encoding.h:571
void put_16(bm::short_t s) BMNOEXCEPT
Puts short word (16 bits) into the encoding buffer.
Definition encoding.h:444
void put_prefixed_array_32(unsigned char c, const bm::word_t *w, unsigned count) BMNOEXCEPT
Encode 8-bit prefix + an array.
Definition encoding.h:406
void put_8_16_32(unsigned w, unsigned char c8, unsigned char c16, unsigned char c32) BMNOEXCEPT
but gat plus value based on its VBR evaluation
Definition encoding.h:487
Functor for Elias Gamma encoding.
Definition encoding.h:341
Iterator to walk forward the serialized stream.
Definition bmserial.h:709
void set_range(size_type from, size_type to)
set deserialization range [from, to]
Definition bmserial.h:6772
SerialIterator serial_iterator_type
Definition bmserial.h:713
void unset_range() BMNOEXCEPT
disable range filtration
Definition bmserial.h:720
bvector_type::size_type size_type
Definition bmserial.h:712
size_type deserialize(bvector_type &bv, serial_iterator_type &sit, bm::word_t *temp_block, set_operation op=bm::set_OR, bool exit_on_one=false)
Definition bmserial.h:6984
bm::bv_ref_vector< BV > bv_ref_vector_type
Definition bmserial.h:932
void set_ref_vectors(const bv_ref_vector_type *ref_vect)
Attach collection of reference vectors for XOR serialization (no transfer of ownership for the pointe...
Definition bmserial.h:1009
void deserialize_range(bvector_type &bv, const unsigned char *buf, size_type idx_from, size_type idx_to)
Definition bmserial.h:6661
BV::allocator_type allocator_type
Definition bmserial.h:930
size_type deserialize(bvector_type &bv, const unsigned char *buf, set_operation op, bool exit_on_one=false)
Deserialize bvector using buffer as set operation argument.
Definition bmserial.h:6581
void deserialize_range(bvector_type &bv, const unsigned char *buf, bm::word_t *, size_type idx_from, size_type idx_to)
Definition bmserial.h:996
bvector_type::size_type size_type
Definition bmserial.h:931
size_type deserialize(bvector_type &bv, const unsigned char *buf, bm::word_t *, set_operation op=bm::set_OR, bool exit_on_one=false)
Obsolete! Deserialize bvector using buffer as set operation argument.
Definition bmserial.h:982
Serialization stream iterator.
Definition bmserial.h:768
unsigned get_arr_bit(bm::word_t *dst_block, bool clear_target=true) BMNOEXCEPT
Get array of bits out of the decoder into bit block (Converts inverted list into bits) Returns number...
Definition bmserial.h:6287
unsigned get_bit_block_COUNT_B(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition bmserial.h:865
deseriaizer_base< DEC, block_idx_type > parent_type
Definition bmserial.h:772
void next()
get next block
Definition bmserial.h:4857
serial_stream_iterator(const unsigned char *buf)
Definition bmserial.h:4747
unsigned get_id_count() const BMNOEXCEPT
Number of ids in the inverted list (valid for e_list_ids).
Definition bmserial.h:825
unsigned get_bit() BMNOEXCEPT
Definition bmserial.h:6325
unsigned get_bit_block_SUB(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition bmserial.h:5525
unsigned get_bit_block_COUNT_SUB_AB(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition bmserial.h:6089
unsigned get_bit_block_AND(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition bmserial.h:5305
unsigned get_bit_block_COUNT_SUB_BA(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition bmserial.h:6192
bm::id_t get_id() const BMNOEXCEPT
Get last id from the id list.
Definition bmserial.h:828
unsigned get_bit_block_COUNT_OR(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition bmserial.h:5884
unsigned get_bit_block_COUNT(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition bmserial.h:5629
bool is_eof() const
Returns true if end of bit-stream reached.
Definition bmserial.h:782
decoder_type & decoder()
Get low level access to the decoder (use carefully).
Definition bmserial.h:803
unsigned get_bit_block_XOR(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition bmserial.h:5419
unsigned get_bit_block_OR(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition bmserial.h:5229
unsigned get_bit_block_ASSIGN(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition bmserial.h:5129
iterator_state state() const BMNOEXCEPT
Returns iterator internal state.
Definition bmserial.h:821
block_idx_type block_idx() const BMNOEXCEPT
Get current block index.
Definition bmserial.h:831
unsigned get_bit_block_COUNT_A(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition bmserial.h:5714
unsigned(serial_stream_iterator< DEC, BLOCK_IDX >::* get_bit_func_type)(bm::word_t *, bm::word_t *)
member function pointer for bitset-bitset get operations
Definition bmserial.h:838
iterator_state
iterator is a state machine, this enum encodes its key value
Definition bmserial.h:809
bool try_skip(block_idx_type nb, block_idx_type expect_nb) BMNOEXCEPT
Definition bmserial.h:886
deseriaizer_base< DEC, BLOCK_IDX >::decoder_type decoder_type
Definition bmserial.h:770
unsigned get_bit_block_COUNT_XOR(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition bmserial.h:5986
unsigned get_bit_block_COUNT_AND(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition bmserial.h:5798
block_idx_type skip_mono_blocks() BMNOEXCEPT
skip all zero or all-one blocks
Definition bmserial.h:5091
block_idx_type bv_size() const
serialized bitvector size
Definition bmserial.h:779
iterator_state get_state() const BMNOEXCEPT
Definition bmserial.h:823
unsigned get_bit_block(bm::word_t *dst_block, bm::word_t *tmp_block, set_operation op)
void gamma_gap_block(const bm::gap_word_t *gap_block, bm::encoder &enc) BMNOEXCEPT
void serialize(const BV &bv, typename serializer< BV >::buffer &buf, const statistics_type *bv_stat=0)
Bitvector serialization into buffer object (resized automatically).
Definition bmserial.h:2241
unsigned char find_bit_best_encoding(const bm::word_t *block) BMNOEXCEPT
serializer(bm::word_t *temp_block)
Definition bmserial.h:1200
void reset_compression_stats() BMNOEXCEPT
Reset all accumulated compression statistics.
Definition bmserial.h:1244
void bienc_arr_sblock(const bvector_type &bv, unsigned sb, bm::encoder &enc) BMNOEXCEPT
void xor_tmp_product(const bm::word_t *s_block, const block_match_chain_type &mchain, unsigned i, unsigned j) BMNOEXCEPT
Compute digest based XOR product, place into tmp XOR block.
Definition bmserial.h:2206
void gap_length_serialization(bool value) BMNOEXCEPT
void set_sim_model(const xor_sim_model_type *sim_model) BMNOEXCEPT
bvector_type::block_idx_type block_idx_type
Definition bmserial.h:82
bvector_type::size_type size_type
Definition bmserial.h:83
unsigned char find_gap_best_encoding(const bm::gap_word_t *gap_block) BMNOEXCEPT
void gamma_arr_bit_block(const bm::word_t *block, bm::encoder &enc, bool inverted) BMNOEXCEPT
void set_curr_ref_idx(size_type ref_idx) BMNOEXCEPT
void interpolated_gap_array_v0(const bm::gap_word_t *gap_block, unsigned arr_len, bm::encoder &enc, bool inverted) BMNOEXCEPT
void set_compression_level(unsigned clevel) BMNOEXCEPT
Set compression level.
Definition bmserial.h:1251
void allow_stat_reset(bool allow) BMNOEXCEPT
Enable/disable statistics reset on each serilaization.
Definition bmserial.h:202
void encode_bit_digest(const bm::word_t *blk, bm::encoder &enc, bm::id64_t d0) BMNOEXCEPT
bm::bv_ref_vector< BV > bv_ref_vector_type
Definition bmserial.h:86
void byte_order_serialization(bool value) BMNOEXCEPT
unsigned char find_bit_best_encoding_l5(const bm::word_t *block) BMNOEXCEPT
void set_sparse_cutoff(unsigned cutoff) BMNOEXCEPT
void optimize_serialize_destroy(BV &bv, typename serializer< BV >::buffer &buf)
Bitvector serialization into buffer object (resized automatically) Input bit-vector gets optimized an...
Definition bmserial.h:2265
bvector_type::allocator_type allocator_type
Definition bmserial.h:79
byte_buffer< allocator_type > buffer
Definition bmserial.h:85
bool compute_sim_model(xor_sim_model_type &sim_model, const bv_ref_vector_type &ref_vect, const bm::xor_sim_params &params)
void encode_bit_array(const bm::word_t *block, bm::encoder &enc, bool inverted) BMNOEXCEPT
void interpolated_gap_bit_block(const bm::word_t *block, bm::encoder &enc) BMNOEXCEPT
bvector_type::blocks_manager_type blocks_manager_type
Definition bmserial.h:80
void encode_gap_block(const bm::gap_word_t *gap_block, bm::encoder &enc)
void set_bookmarks(bool enable, unsigned bm_interval=256) BMNOEXCEPT
unsigned get_compression_level() const BMNOEXCEPT
Get current compression level.
Definition bmserial.h:133
void reset_models() BMNOEXCEPT
Definition bmserial.h:383
const size_type * get_compression_stat() const BMNOEXCEPT
Return serialization counter vector.
Definition bmserial.h:196
void encode_xor_match_chain(bm::encoder &enc, const block_match_chain_type &mchain) BMNOEXCEPT
xor_sim_model_type::block_match_chain_type block_match_chain_type
Definition bmserial.h:90
static void process_bookmark(block_idx_type nb, bookmark_state &bookm, bm::encoder &enc) BMNOEXCEPT
Check if bookmark needs to be placed and if so, encode it into serialization BLOB.
Definition bmserial.h:2593
serializer(const allocator_type &alloc=allocator_type(), bm::word_t *temp_block=0)
Constructor.
Definition bmserial.h:1167
void interpolated_arr_bit_block(const bm::word_t *block, bm::encoder &enc, bool inverted) BMNOEXCEPT
void interpolated_gap_array(const bm::gap_word_t *gap_block, unsigned arr_len, bm::encoder &enc, bool inverted) BMNOEXCEPT
bm::xor_sim_model< BV > xor_sim_model_type
Definition bmserial.h:87
void gamma_gap_array(const bm::gap_word_t *gap_block, unsigned arr_len, bm::encoder &enc, bool inverted=false) BMNOEXCEPT
void add_model(unsigned char mod, unsigned score) BMNOEXCEPT
Definition bmserial.h:1652
void bienc_gap_bit_block(const bm::word_t *block, bm::encoder &enc) BMNOEXCEPT
void gamma_gap_bit_block(const bm::word_t *block, bm::encoder &enc) BMNOEXCEPT
bvector_type::statistics statistics_type
Definition bmserial.h:81
void set_ref_vectors(const bv_ref_vector_type *ref_vect)
void encode_bit_interval(const bm::word_t *blk, bm::encoder &enc, unsigned size_control) BMNOEXCEPT
void interpolated_encode_gap_block(const bm::gap_word_t *gap_block, bm::encoder &enc) BMNOEXCEPT
size_type serialize(const BV &bv, unsigned char *buf, size_t buf_size)
Bitvector serialization into memory block.
Definition bmserial.h:2703
void encode_header(const bvector_type &bv, bm::encoder &enc) BMNOEXCEPT
void bienc_arr_bit_block(const bm::word_t *block, bm::encoder &enc, bool inverted) BMNOEXCEPT
XOR scanner to search for complement-similarities in collections of bit-vectors.
Definition bmxor.h:820
Encoding utilities for serialization (internal).
unsigned bit_block_find(const bm::word_t *BMRESTRICT block, unsigned nbit, unsigned *BMRESTRICT pos) BMNOEXCEPT
Searches for the next 1 bit in the BIT block.
Definition bmfunc.h:8657
BMFORCEINLINE bm::id_t word_bitcount(bm::id_t w) BMNOEXCEPT
Definition bmutil.h:582
void bit_block_copy(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src) BMNOEXCEPT
Bitblock copy operation.
Definition bmfunc.h:6777
unsigned bit_count_nonzero_size(const T *blk, unsigned data_size) BMNOEXCEPT
Inspects block for full zero words.
Definition bmfunc.h:8601
bm::id64_t bit_block_and(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src) BMNOEXCEPT
Plain bitblock AND operation. Function does not analyse availability of source and destination blocks...
Definition bmfunc.h:6858
bm::id_t bit_block_count(const bm::word_t *block) BMNOEXCEPT
Bitcount for bit block.
Definition bmfunc.h:5051
BMFORCEINLINE void clear_bit(unsigned *dest, unsigned bitpos) BMNOEXCEPT
Set 1 bit in a block.
Definition bmfunc.h:3734
bm::set_representation best_representation(unsigned bit_count, unsigned total_possible_bitcount, unsigned gap_count, unsigned block_size) BMNOEXCEPT
Choose best representation for a bit-block.
Definition bmfunc.h:9021
bm::id_t bit_operation_or_count(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2) BMNOEXCEPT
Performs bitblock OR operation and calculates bitcount of the result.
Definition bmfunc.h:7765
BMFORCEINLINE unsigned word_bitcount64(bm::id64_t x) BMNOEXCEPT
Definition bmutil.h:605
BMFORCEINLINE void set_bit(unsigned *dest, unsigned bitpos) BMNOEXCEPT
Set 1 bit in a block.
Definition bmfunc.h:3721
unsigned bit_block_convert_to_arr(T *BMRESTRICT dest, const unsigned *BMRESTRICT src, bool inverted) BMNOEXCEPT
Convert bit block into an array of ints corresponding to 1 bits.
Definition bmfunc.h:9064
bm::id64_t calc_block_digest0(const bm::word_t *const block) BMNOEXCEPT
Compute digest for 64 non-zero areas.
Definition bmfunc.h:1120
void bit_invert(T *start) BMNOEXCEPT
Definition bmfunc.h:6057
bm::id64_t bit_block_xor(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src) BMNOEXCEPT
Plain bitblock XOR operation. Function does not analyse availability of source and destination blocks...
Definition bmfunc.h:8434
void bit_block_set(bm::word_t *BMRESTRICT dst, bm::word_t value) BMNOEXCEPT
Bitblock memset operation.
Definition bmfunc.h:4456
bool bit_block_or(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src) BMNOEXCEPT
Plain bitblock OR operation. Function does not analyse availability of source and destination blocks.
Definition bmfunc.h:7835
bm::id_t bit_operation_xor_count(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2) BMNOEXCEPT
Performs bitblock XOR operation and calculates bitcount of the result.
Definition bmfunc.h:8543
bm::id64_t bit_block_sub(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src) BMNOEXCEPT
Plain bitblock SUB (AND NOT) operation. Function does not analyse availability of source and destinat...
Definition bmfunc.h:8105
bm::id_t bit_operation_and_count(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2) BMNOEXCEPT
Performs bitblock AND operation and calculates bitcount of the result.
Definition bmfunc.h:7626
bm::id_t bit_operation_sub_count(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2) BMNOEXCEPT
Performs bitblock SUB operation and calculates bitcount of the result.
Definition bmfunc.h:7675
bm::alloc_pool_guard< allocator_pool_type, bvector< Alloc > > mem_pool_guard
Definition bm.h:790
operation
Bit operations.
Definition bmconst.h:191
set_operation
Codes of set operations.
Definition bmconst.h:168
strategy
Block allocation strategies.
Definition bmconst.h:146
@ BM_OR
Definition bmconst.h:193
@ set_COUNT_SUB_AB
Definition bmconst.h:178
@ set_OR
Definition bmconst.h:170
@ set_COUNT_XOR
Definition bmconst.h:176
@ set_COUNT_OR
Definition bmconst.h:177
@ set_COUNT_B
Definition bmconst.h:181
@ set_COUNT_SUB_BA
Definition bmconst.h:179
@ set_ASSIGN
Definition bmconst.h:173
@ set_SUB
Definition bmconst.h:171
@ set_COUNT_AND
Definition bmconst.h:175
@ set_COUNT
Definition bmconst.h:174
@ set_AND
Definition bmconst.h:169
@ set_XOR
Definition bmconst.h:172
@ set_COUNT_A
Definition bmconst.h:180
@ set_END
Definition bmconst.h:183
@ BM_GAP
GAP compression is ON.
Definition bmconst.h:148
size_t serialize(const BV &bv, unsigned char *buf, bm::word_t *temp_block=0, unsigned serialization_flags=0)
Saves bitvector into memory.
Definition bmserial.h:3071
serialization_flags
Bit mask flags for serialization algorithm.
Definition bmserial.h:3023
size_t deserialize(BV &bv, const unsigned char *buf, bm::word_t *temp_block=0, const bm::bv_ref_vector< BV > *ref_vect=0)
Bitvector deserialization from a memory BLOB.
Definition bmserial.h:3137
void deserialize_range(BV &bv, const unsigned char *buf, typename BV::size_type from, typename BV::size_type to, const bm::bv_ref_vector< BV > *ref_vect=0)
Bitvector range deserialization from a memory BLOB.
Definition bmserial.h:3200
@ BM_NO_GAP_LENGTH
save no GAP info (save some space)
Definition bmserial.h:3025
@ BM_NO_BYTE_ORDER
save no byte-order info (save some space)
Definition bmserial.h:3024
distance_metric
Distance metrics codes defined for vectors A and B.
Definition bmalgo_impl.h:58
distance_metric operation2metric(set_operation op) BMNOEXCEPT
Convert set operation into compatible distance metric.
Definition bmalgo_impl.h:73
@ COUNT_XOR
(A ^ B).count()
Definition bmalgo_impl.h:60
@ COUNT_SUB_BA
(B - A).count()
Definition bmalgo_impl.h:63
unsigned gap_bit_count_unr(const T *buf) BMNOEXCEPT
Calculates number of bits ON in GAP buffer. Loop unrolled version.
Definition bmfunc.h:2327
D gap_convert_to_arr(D *BMRESTRICT dest, const T *BMRESTRICT buf, unsigned dest_len, bool invert=false) BMNOEXCEPT
Convert gap block into array of ints corresponding to 1 bits.
Definition bmfunc.h:4942
gap_word_t * gap_operation_xor(const gap_word_t *BMRESTRICT vect1, const gap_word_t *BMRESTRICT vect2, gap_word_t *BMRESTRICT tmp_buf, unsigned &dsize) BMNOEXCEPT
GAP XOR operation.
Definition bmfunc.h:6585
void gap_convert_to_bitset(unsigned *BMRESTRICT dest, const T *BMRESTRICT buf, unsigned len=0) BMNOEXCEPT
GAP block to bitblock conversion.
Definition bmfunc.h:4475
void gap_invert(T *buf) BMNOEXCEPT
Inverts all bits in the GAP buffer.
Definition bmfunc.h:4620
unsigned gap_set_array(T *buf, const T *arr, unsigned len) BMNOEXCEPT
Convert array to GAP buffer.
Definition bmfunc.h:3601
void set_gap_level(T *buf, int level) BMNOEXCEPT
Sets GAP block capacity level.
Definition bmfunc.h:4639
void gap_add_to_bitset(unsigned *BMRESTRICT dest, const T *BMRESTRICT pcurr, unsigned len) BMNOEXCEPT
Adds(OR) GAP block to bitblock.
Definition bmfunc.h:4039
void gap_set_all(T *buf, unsigned set_max, unsigned value) BMNOEXCEPT
Sets all bits to 0 or 1 (GAP).
Definition bmfunc.h:4552
unsigned gap_add_value(T *buf, unsigned pos) BMNOEXCEPT
Add new value to the end of GAP buffer.
Definition bmfunc.h:3361
int gap_calc_level(unsigned len, const T *glevel_len) BMNOEXCEPT
Calculates GAP block capacity level.
Definition bmfunc.h:4661
BMFORCEINLINE bm::gap_word_t gap_length(const bm::gap_word_t *BMRESTRICT buf) BMNOEXCEPT
Returs GAP block length.
Definition bmfunc.h:1603
void combine_sub(BV &bv, It first, It last)
SUB Combine bitvector and the iterable sequence.
BV::size_type count_and(const BV &bv1, const BV &bv2) BMNOEXCEPT
Computes bitcount of AND operation of two bitsets.
Definition bmalgo.h:49
BV::size_type count_or(const BV &bv1, const BV &bv2) BMNOEXCEPT
Computes bitcount of OR operation of two bitsets.
Definition bmalgo.h:149
BV::size_type count_sub(const BV &bv1, const BV &bv2) BMNOEXCEPT
Computes bitcount of SUB operation of two bitsets.
Definition bmalgo.h:115
void combine_or(BV &bv, It first, It last)
OR Combine bitvector and the iterable sequence.
bm::distance_metric_descriptor::size_type count_xor(const BV &bv1, const BV &bv2) BMNOEXCEPT
Computes bitcount of XOR operation of two bitsets.
Definition bmalgo.h:81
Definition bm.h:78
const unsigned char set_block_ref_eq
block is a copy of a reference block
Definition bmserial.h:1130
@ e_no_xor_match
Definition bmxor.h:82
@ e_xor_match_GC
Definition bmxor.h:83
@ e_xor_match_BC
Definition bmxor.h:84
@ e_xor_match_EQ
Definition bmxor.h:86
@ e_xor_match_iBC
Definition bmxor.h:85
const unsigned char set_block_xor_ref32_um
..... 32-bit (should never happen)
Definition bmserial.h:1159
const unsigned set_block_digest_wave_size
Definition bmconst.h:67
const unsigned char set_block_gap
Plain GAP block.
Definition bmserial.h:1107
BV::size_type process_operation(BV &bv, BV &bv_tmp, bm::set_operation op)
Utility function to process operation using temp vector.
Definition bmserial.h:6394
void bit_block_change_bc(const bm::word_t *BMRESTRICT block, unsigned *BMRESTRICT gc, unsigned *BMRESTRICT bc) BMNOEXCEPT
Definition bmfunc.h:5252
const unsigned char set_block_arrbit_inv
List of bits OFF.
Definition bmserial.h:1124
const unsigned char set_nb_sync_mark8
bookmark sync point (8-bits)
Definition bmserial.h:1147
const unsigned sblock_flag_max16
Definition bmserial.h:2406
const unsigned sblock_flag_sb16
16-bit SB index (8-bit by default)
Definition bmserial.h:2398
const unsigned char set_block_bit_interval
Interval block.
Definition bmserial.h:1110
const unsigned id_max
Definition bmconst.h:109
bool check_block_zero(const bm::word_t *blk, bool deep_scan) BMNOEXCEPT
Checks all conditions and returns true if block consists of only 0 bits.
Definition bmfunc.h:9096
unsigned int word_t
Definition bmconst.h:39
const unsigned char set_block_bit_digest0
H-compression with digest mask.
Definition bmserial.h:1128
const unsigned char set_block_xor_ref32
..... 32-bit (should never happen)
Definition bmserial.h:1133
const unsigned char set_block_arrgap_bienc_inv_v2
Interpolated GAP array (inverted).
Definition bmserial.h:1141
const unsigned char set_block_bitgap_bienc
Interpolated bit-block as GAPs.
Definition bmserial.h:1127
unsigned char check_pair_vect_vbr(const BMChain &mchain, const RVect &ref_vect)
Check effective bit-rate for the XOR encode vector.
Definition bmxor.h:404
const unsigned char set_block_arrgap_egamma_inv
Gamma compressed inverted delta GAP array.
Definition bmserial.h:1116
bool check_block_one(const bm::word_t *blk, bool deep_scan) BMNOEXCEPT
Checks if block has only 1 bits.
Definition bmfunc.h:9120
const unsigned char set_block_arr_bienc_inv
Interpolated inverted block int array.
Definition bmserial.h:1126
unsigned bit_to_gap(gap_word_t *BMRESTRICT dest, const unsigned *BMRESTRICT block, unsigned dest_len) BMNOEXCEPT
Convert bit block to GAP representation.
Definition bmfunc.h:4870
const unsigned set_sub_array_size
Definition bmconst.h:95
void compute_s_block_descr(const bm::word_t *BMRESTRICT block, block_waves_xor_descr &BMRESTRICT x_descr, unsigned *BMRESTRICT s_gc, unsigned *BMRESTRICT s_bc) BMNOEXCEPT
Compute reference (non-XOR) 64-dim complexity descriptor for the s-block.
Definition bmxor.h:430
const unsigned sblock_flag_sb32
32-bit SB index
Definition bmserial.h:2399
const unsigned char set_block_16one
UP to 65536 all-set blocks.
Definition bmserial.h:1099
void for_each_dgap(const T *gap_buf, Func &func)
Definition bmfunc.h:2739
BMFORCEINLINE void get_block_coord(BI_TYPE nb, unsigned &i, unsigned &j) BMNOEXCEPT
Recalc linear bvector block index into 2D matrix coordinates.
Definition bmfunc.h:180
void combine_count_operation_with_block(const bm::word_t *blk, const bm::word_t *arg_blk, distance_metric_descriptor *dmit, distance_metric_descriptor *dmit_end) BMNOEXCEPT
Internal function computes different distance metrics.
const unsigned sblock_flag_max32
Definition bmserial.h:2408
const unsigned sblock_flag_min32
Definition bmserial.h:2403
const unsigned set_total_blocks
Definition bmconst.h:111
const unsigned char set_nb_sync_mark48
Definition bmserial.h:1151
const unsigned char set_block_arrgap_bienc
Interpolated GAP array.
Definition bmserial.h:1122
ByteOrder
Byte orders recognized by the library.
Definition bmconst.h:452
@ LittleEndian
Definition bmconst.h:454
@ BigEndian
Definition bmconst.h:453
const unsigned char set_block_8one
Up to 256 all-set blocks.
Definition bmserial.h:1097
const unsigned char set_nb_sync_mark16
Definition bmserial.h:1148
const unsigned char set_block_32one
UP to 4G all-set blocks.
Definition bmserial.h:1101
const unsigned char set_block_bit_0runs
Bit block with encoded zero intervals.
Definition bmserial.h:1115
const unsigned char set_block_xor_ref8_um
block is un-masked XOR of a reference block (8-bit)
Definition bmserial.h:1157
const unsigned char set_block_64one
lots of all-set blocks
Definition bmserial.h:1119
const unsigned char set_block_gap_bienc
Interpolated GAP block (legacy).
Definition bmserial.h:1121
const unsigned char set_block_xor_gap_ref16
..... 16-bit
Definition bmserial.h:1135
const unsigned set_compression_default
Default compression level.
Definition bmserial.h:60
const unsigned char set_block_arrbit
List of bits ON.
Definition bmserial.h:1109
const unsigned char set_block_1one
One block all-set (1111...).
Definition bmserial.h:1095
const unsigned char set_block_arrgap_inv
List of bits OFF (GAP block).
Definition bmserial.h:1117
void convert_sub_to_arr(const BV &bv, unsigned sb, VECT &vect)
convert sub-blocks to an array of set 1s (32-bit)
const unsigned gap_levels
Definition bmconst.h:85
const unsigned char set_block_gapbit
GAP compressed bitblock.
Definition bmserial.h:1108
void bit_recomb(It1 &it1, It2 &it2, BinaryOp &op, Encoder &enc, unsigned block_size=bm::set_block_size) BMNOEXCEPT
Definition bmfunc.h:9439
const unsigned char set_block_arr_bienc_8bh
BIC block 8bit header.
Definition bmserial.h:1155
bm::operation setop2op(bm::set_operation op) BMNOEXCEPT
Convert set operation to operation.
Definition bmfunc.h:1339
const unsigned char set_sblock_bienc
super-block interpolated list
Definition bmserial.h:1154
const unsigned char set_block_xor_chain
XOR chain (composit of sub-blocks).
Definition bmserial.h:1137
const unsigned char set_nb_bookmark32
jump ahead mark (32-bit)
Definition bmserial.h:1146
const unsigned sblock_flag_max24
Definition bmserial.h:2407
const unsigned set_sub_total_bits
Definition bmconst.h:100
const unsigned set_block_size
Definition bmconst.h:55
unsigned long long int id64_t
Definition bmconst.h:35
const unsigned block_waves
Definition bmconst.h:66
const unsigned char set_block_xor_ref16
block is masked XOR of a reference block (16-bit)
Definition bmserial.h:1132
const unsigned char set_block_arrgap_bienc_inv
Interpolated GAP array (inverted).
Definition bmserial.h:1123
const unsigned char set_block_arrgap_egamma
Gamma compressed delta GAP array.
Definition bmserial.h:1114
const unsigned gap_equiv_len
Definition bmconst.h:82
const unsigned char set_block_1zero
One all-zero block.
Definition bmserial.h:1094
const unsigned sblock_flag_min24
24-bit minv
Definition bmserial.h:2402
const unsigned char set_block_end
End of serialization.
Definition bmserial.h:1093
unsigned int id_t
Definition bmconst.h:38
const unsigned gap_max_buff_len
Definition bmconst.h:80
const unsigned char set_block_arrgap_bienc_v2
//!< Interpolated GAP array (v2)
Definition bmserial.h:1140
const unsigned char set_block_xor_gap_ref32
..... 32-bit (should never happen)
Definition bmserial.h:1136
const unsigned char set_block_arrgap
List of bits ON (GAP block).
Definition bmserial.h:1111
bit_representation
Possible representations for bit sets.
Definition bmfunc.h:10170
@ e_bit_INT
Int list.
Definition bmfunc.h:10172
@ e_bit_GAP
GAPs.
Definition bmfunc.h:10171
@ e_bit_bit
uncompressed bit-block
Definition bmfunc.h:10176
@ e_bit_IINT
Inverted int list.
Definition bmfunc.h:10173
@ e_bit_end
Definition bmfunc.h:10177
@ e_bit_1
all 1s
Definition bmfunc.h:10174
@ e_bit_0
all 0s (empty block)
Definition bmfunc.h:10175
const unsigned char set_nb_sync_mark64
..... 64-bit (should never happen)
Definition bmserial.h:1152
const unsigned char set_nb_sync_mark32
Definition bmserial.h:1150
const unsigned char set_block_sgapgap
SGAP compressed GAP block.
Definition bmserial.h:1106
const unsigned sparse_max_l5
Definition bmserial.h:1163
serialization_header_mask
Definition bmserial.h:1077
@ BM_HM_NO_GAPL
no GAP levels
Definition bmserial.h:1082
@ BM_HM_ID_LIST
id list stored
Definition bmserial.h:1080
@ BM_HM_NO_BO
no byte-order
Definition bmserial.h:1081
@ BM_HM_SPARSE
very sparse vector
Definition bmserial.h:1085
@ BM_HM_DEFAULT
Definition bmserial.h:1078
@ BM_HM_64_BIT
64-bit vector
Definition bmserial.h:1083
@ BM_HM_RESIZE
resized vector
Definition bmserial.h:1079
@ BM_HM_HXOR
horizontal XOR compression turned ON
Definition bmserial.h:1084
BMFORCEINLINE unsigned long long bmi_bslr_u64(unsigned long long w) BMNOEXCEPT
Definition bmutil.h:335
const unsigned char set_block_arr_bienc
Interpolated block as int array.
Definition bmserial.h:1125
const unsigned char set_block_64zero
lots of zero blocks
Definition bmserial.h:1118
const unsigned char set_block_gap_bienc_v2
Interpolated GAP block (v2).
Definition bmserial.h:1139
const unsigned char set_block_xor_ref8
block is masked XOR of a reference block (8-bit)
Definition bmserial.h:1131
const unsigned char set_block_gap_egamma
Gamma compressed GAP block.
Definition bmserial.h:1113
unsigned short gap_word_t
Definition bmconst.h:78
const unsigned char set_block_32zero
Up to 4G zero blocks.
Definition bmserial.h:1100
const unsigned char set_block_8zero
Up to 256 zero blocks.
Definition bmserial.h:1096
const unsigned char set_block_xor_ref16_um
block is un-masked XOR of a reference block (16-bit)
Definition bmserial.h:1158
const unsigned gap_max_bits
Definition bmconst.h:81
const unsigned char set_block_bit_1bit
Bit block with 1 bit ON.
Definition bmserial.h:1112
const unsigned char set_block_aone
All other blocks one.
Definition bmserial.h:1103
const unsigned char set_nb_bookmark16
jump ahead mark (16-bit)
Definition bmserial.h:1144
const unsigned set_block_shift
Definition bmconst.h:56
const unsigned sblock_flag_len16
16-bit len (8-bit by default)
Definition bmserial.h:2405
const unsigned set_compression_max
Maximum supported compression level.
Definition bmserial.h:59
BMFORCEINLINE unsigned long long bmi_blsi_u64(unsigned long long w)
Definition bmutil.h:345
const unsigned char set_nb_sync_mark24
Definition bmserial.h:1149
const unsigned char set_block_xor_gap_ref8
..... 8-bit
Definition bmserial.h:1134
unsigned short short_t
Definition bmconst.h:40
const unsigned sparse_max_l6
Definition bmserial.h:1164
const unsigned char set_block_azero
All other blocks zero.
Definition bmserial.h:1102
const unsigned bits_in_block
Definition bmconst.h:114
const unsigned char set_block_16zero
Up to 65536 zero blocks.
Definition bmserial.h:1098
const unsigned char set_block_bit
Plain bit block.
Definition bmserial.h:1104
const unsigned char set_block_bitgap_bienc_v2
Interpolated bit-block as GAPs (v2 - reseved).
Definition bmserial.h:1142
const unsigned char set_nb_bookmark24
jump ahead mark (24-bit)
Definition bmserial.h:1145
const unsigned sblock_flag_min16
16-bit minv
Definition bmserial.h:2401
bool is_const_set_operation(set_operation op) BMNOEXCEPT
Returns true if set operation is constant (bitcount).
Definition bmfunc.h:1330
const unsigned char set_block_sgapbit
SGAP compressed bitblock.
Definition bmserial.h:1105
Bit COUNT OR functor.
Definition bmfunc.h:9507
Bit COUNT SUB AB functor.
Definition bmfunc.h:9514
Bit SUB BA functor.
Definition bmfunc.h:9520
Bit COUNT XOR functor.
Definition bmfunc.h:9501
unsigned chain_size
Definition bmxor.h:292
bm::id64_t xor_d64[64]
Definition bmxor.h:294
unsigned ref_idx[64]
Definition bmxor.h:293
bm::xor_complement_match match
Definition bmxor.h:295
Structure to compute XOR gap-count profile by sub-block waves.
Definition bmxor.h:230
unsigned short sb_bc[bm::block_waves]
BIT counts.
Definition bmxor.h:233
unsigned short sb_gc[bm::block_waves]
GAP counts.
Definition bmxor.h:232
size_t max_serialize_mem
estimated maximum memory for serialization
Definition bmfunc.h:61
Statistical information about bitset's memory allocation details.
Definition bm.h:125
static ByteOrder byte_order()
Definition bmconst.h:487
XOR match pair.
Definition bmxor.h:274
Bookmark state structure.
Definition bmserial.h:389
unsigned bm_type_
0:32-bit, 1: 24-bit, 2: 16-bit
Definition bmserial.h:408
bookmark_state(block_idx_type nb_range) BMNOEXCEPT
Definition bmserial.h:390
size_t min_bytes_range_
minumal distance (bytes) between marks
Definition bmserial.h:409
block_idx_type nb_
bookmark block idx
Definition bmserial.h:406
unsigned char * ptr_
bookmark pointer
Definition bmserial.h:405
block_idx_type nb_range_
target bookmark range in blocks
Definition bmserial.h:407
XOR similarity model.
Definition bmxor.h:791
bm::block_match_chain< size_type > block_match_chain_type
Definition bmxor.h:798
Parameters for XOR similarity search.
Definition bmxor.h:59
bm::bvector bvector_type