BitMagic-C++
bmsparsevec_serial.h
Go to the documentation of this file.
1#ifndef BMSPARSEVEC_SERIAL__H__INCLUDED__
2#define BMSPARSEVEC_SERIAL__H__INCLUDED__
3/*
4Copyright(c) 2002-2017 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
5
6Licensed under the Apache License, Version 2.0 (the "License");
7you may not use this file except in compliance with the License.
8You may obtain a copy of the License at
9
10 http://www.apache.org/licenses/LICENSE-2.0
11
12Unless required by applicable law or agreed to in writing, software
13distributed under the License is distributed on an "AS IS" BASIS,
14WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15See the License for the specific language governing permissions and
16limitations under the License.
17
18For more information please visit: http://bitmagic.io
19*/
20
21/*! \file bmsparsevec_serial.h
22 \brief Serialization for sparse_vector<>
23*/
24
25
26#ifndef BM__H__INCLUDED__
27// BitMagic utility headers do not include main "bm.h" declaration
28// #include "bm.h" or "bm64.h" explicitly
29# error missing include (bm.h or bm64.h)
30#endif
31
32#include "bmsparsevec.h"
33#include "bmserial.h"
34#include "bmbuffer.h"
35#include "bmdef.h"
36
37namespace bm
38{
39
40/** \defgroup svserial Sparse vector serialization
41 Sparse vector serialization
42 \ingroup svector
43 */
44
45
46/*!
47 \brief layout class for serialization buffer structure
48
49 Class keeps a memory block sized for the target sparse vector BLOB.
50 This class also provides acess to bit-plane memory, so it becomes possible
51 to use parallel storage methods to save bit-planes into
52 different storage shards.
53
54 \ingroup svserial
55*/
56template<class SV>
58{
59 typedef typename SV::value_type value_type;
60 typedef typename SV::bvector_type bvector_type;
63
65
67
68 /*!
69 \brief resize capacity
70 \param capacity - new capacity
71 \return new buffer or 0 if failed
72 */
73 unsigned char* reserve(size_t capacity)
74 {
75 if (capacity == 0)
76 {
77 freemem();
78 return 0;
79 }
80 buf_.reinit(capacity);
81 return buf_.data();
82 }
83
84 /// return current serialized size
85 size_t size() const BMNOEXCEPT { return buf_.size(); }
86
87 /// Set new serialized size
88 void resize(size_t ssize) { buf_.resize(ssize); }
89
90 /// return serialization buffer capacity
91 size_t capacity() const BMNOEXCEPT { return buf_.capacity(); }
92
93 /// free memory
94 void freemem() BMNOEXCEPT { buf_.release(); }
95
96 /// Set plane output pointer and size
97 void set_plane(unsigned i, unsigned char* ptr, size_t buf_size) BMNOEXCEPT
98 {
99 plane_ptrs_[i] = ptr;
100 plane_size_[i] = buf_size;
101 }
102
103 /// Get plane pointer
104 const unsigned char* get_plane(unsigned i) const BMNOEXCEPT
105 { return plane_ptrs_[i]; }
106
107 /// Return serialization buffer pointer
108 const unsigned char* buf() const BMNOEXCEPT { return buf_.buf(); }
109 /// Return serialization buffer pointer
110 const unsigned char* data() const BMNOEXCEPT { return buf_.buf(); }
111
112 /// Resize for the target number of plains / bit-slices
113 void resize_slices(unsigned new_slices_size)
114 {
115 plane_ptrs_.resize(new_slices_size);
116 plane_size_.resize(new_slices_size);
117 }
118
119private:
121 void operator=(const sparse_vector_serial_layout&);
122protected:
123 typedef bm::heap_vector<unsigned char*, allocator_type, true> ptr_vector_type;
124 typedef bm::heap_vector<size_t, allocator_type, true> sizet_vector_type;
125
126
127 buffer_type buf_; ///< serialization buffer
128 ptr_vector_type plane_ptrs_; ///< pointers on serialized bit-planes
129 sizet_vector_type plane_size_; ///< serialized plane size
130// unsigned char* plane_ptrs_[SV::sv_slices]; ///< pointers on serialized bit-planes
131// size_t plane_size_[SV::sv_slices]; ///< serialized plane size
132};
133
134// -------------------------------------------------------------------------
135
136/*!
137 \brief Serialize sparse vector into a memory buffer(s) structure
138
139 Serialization format:
140
141 | HEADER | BIT-VECTORS ... | REMAP_MATRIX
142
143 Header structure:
144 -----------------
145 BYTE+BYTE: Magic-signature 'BM' or 'BC' (c-compressed)
146 BYTE : Byte order ( 0 - Big Endian, 1 - Little Endian)
147 {
148 BYTE : Number of Bit-vector planes (total) (non-zero when < 255 planes)
149 |
150 BYTE: zero - flag of large plane matrix
151 INT64: Nnmber of bit-vector planes
152 }
153 INT64: Vector size
154 INT64: Offset of plane 0 from the header start (value 0 means plane is empty)
155 INT64: Offset of plane 1 from
156 ...
157 INT32: reserved
158
159Bit-vectors:
160------------
161 Based on current bit-vector serialization
162
163Remap Matrix:
164 SubHeader | Matrix BLOB
165
166 sub-header:
167 BYTE: 'R' (remapping) or 'N' (no remapping)
168 N - means no other info is saved on the stream
169 INT64: remap matrix size
170
171 \ingroup svector
172 \ingroup svserial
173*/
174template<typename SV>
176{
177public:
178 typedef typename SV::bvector_type bvector_type;
181 typedef typename SV::value_type value_type;
182 typedef typename SV::size_type size_type;
184 typedef typename alloc_type::allocator_pool_type allocator_pool_type;
185 typedef typename
187 typedef typename
189
190
191public:
193
194
195 /*! @name Compression settings */
196 ///@{
197
198 /**
199 Add skip-markers for faster range deserialization
200
201 @param enable - TRUE searilization will add bookmark codes
202 @param bm_interval - bookmark interval in (number of blocks)
203 (suggested between 4 and 512)
204 smaller interval means more bookmarks added to the skip list thus
205 more increasing the BLOB size
206 */
207 void set_bookmarks(bool enable, unsigned bm_interval = 256) BMNOEXCEPT
208 { bvs_.set_bookmarks(enable, bm_interval); }
209
210 /**
211 Enable XOR compression on vector serialization
212 @sa set_xor_ref
213 @sa disable_xor_compression
214 */
217
218 /**
219 Disable XOR compression on serialization
220 */
223
224 /** Turn ON and OFF XOR compression of sparse vectors
225 Enables XOR reference compression for the sparse vector.
226 Default: disabled
227 Reference bit-vectors from the sparse vector itself
228 */
229 void set_xor_ref(bool is_enabled) BMNOEXCEPT;
230
231 /** Enable external XOR serialization via external reference vectors
232 (data frame ref. vector).
233 This method is useful when we serialize a group of related
234 sparse vectors which benefits from the XOR referencial compression
235
236 @param bv_ref_ptr - external reference vector
237 if NULL - resets the use of reference vector
238 */
240
241 /**
242 Calculate XOR similarity model for ref_vector
243 refernece vector must be associated before
244 @sa set_ref_vectors, set_sim_model
245 @internal
246 */
248 const bv_ref_vector_type& ref_vect,
249 const bm::xor_sim_params& params);
250
251 /**
252 Attach serizalizer to a pre-computed similarity model
253 @param sim_model - pointer to external computed model
254 */
256
257 /**
258 Returns the XOR reference compression status (enabled/disabled)
259 */
260 bool is_xor_ref() const BMNOEXCEPT { return is_xor_ref_; }
261
262 ///@}
263
264 /*! @name Serialization */
265 ///@{
266
267 /*!
268 \brief Serialize sparse vector into a memory buffer(s) structure
269
270 \param sv - sparse vector to serialize
271 \param sv_layout - buffer structure to keep the result
272 as defined in bm::serialization_flags
273 */
274 void serialize(const SV& sv,
276
277 /** Get access to the underlying bit-vector serializer
278 This access can be used to fine tune compression settings
279 @sa bm::serializer::set_compression_level
280 */
283
284 ///@}
285
286
287protected:
288 void build_xor_ref_vector(const SV& sv);
289
290 static
291 void build_plane_digest(bvector_type& digest_bv, const SV& sv);
292
293 typedef typename SV::remap_matrix_type remap_matrix_type;
294
295 /// serialize the remap matrix used for SV encoding
296 void encode_remap_matrix(bm::encoder& enc, const SV& sv);
297
298 typedef bm::heap_vector<unsigned, alloc_type, true> u32_vector_type;
301private:
303 sparse_vector_serializer& operator=(const sparse_vector_serializer&) = delete;
304
305protected:
307
308 bvector_type plane_digest_bv_; ///< bv.digest of bit-planes
309 buffer_type plane_digest_buf_; ///< serialization buf
311
313 // XOR compression member vars
319};
320
321/**
322 sparse vector de-serializer
323
324*/
325template<typename SV>
327{
328public:
329 typedef typename SV::bvector_type bvector_type;
332 typedef typename SV::value_type value_type;
333 typedef typename SV::size_type size_type;
336
337public:
340
341 /**
342 Set deserialization finalization to force deserialized vectors into READONLY (or READWRITE) mode.
343 Performance impact: Turning ON finalization will make deserialization a lit slower,
344 because each bit-vector will be re-converted into new mode (READONLY).
345 Following (search) operations may perform a bit faster.
346
347 @param is_final - finalization code
348 (use bm::finalization::READONLY to produce an immutable vector)
349 */
351
352
353 /** Set external XOR reference vectors
354 (data frame referenece vectors)
355
356 @param bv_ref_ptr - external reference vector
357 if NULL - resets the use of reference
358 */
360
361 /*!
362 Deserialize sparse vector
363
364 @param sv - [out] target sparse vector to populate
365 @param buf - input BLOB source memory pointer
366 @param clear_sv - if true clears the target vector (sv)
367
368 @sa deserialize_range
369 */
370 void deserialize(SV& sv,
371 const unsigned char* buf,
372 bool clear_sv = true);
373
374 /*!
375 Deserialize sparse vector for the range [from, to]
376
377 @param sv - [out] target sparse vector to populate
378 @param buf - input BLOB source memory pointer
379 @param from - start vector index for deserialization range
380 @param to - end vector index for deserialization range
381 @param clear_sv - if true clears the target vector
382
383 */
384 void deserialize_range(SV& sv, const unsigned char* buf,
385 size_type from, size_type to,
386 bool clear_sv = true);
387
388 /*!
389 Better use deserialize_range()
390 @sa deserialize_range
391 */
392 void deserialize(SV& sv, const unsigned char* buf,
393 size_type from, size_type to)
394 {
395 deserialize_range(sv, buf, from, to);
396 }
397
398
399
400 /*!
401 Deserialize sparse vector using address mask vector
402 Address mask defines (by set bits) which vector elements to be extracted
403 from the compressed BLOB
404
405 @param sv - [out] target sparse vector to populate
406 @param buf - source memory pointer
407 @param mask_bv - AND mask bit-vector (address gather vector)
408 */
409 void deserialize(SV& sv,
410 const unsigned char* buf,
411 const bvector_type& mask_bv)
412 { idx_range_set_ = false;
413 deserialize_sv(sv, buf, &mask_bv, true);
414 }
415
416
417 /*!
418 Load serialization descriptor, create vectors but DO NOT perform full deserialization
419 @param sv - [out] target sparse vector to populate
420 @param buf - source memory pointer
421 */
423 const unsigned char* buf);
424
425
426protected:
428
429
430 /// Deserialize header/version and other common info
431 ///
432 /// @return number of bit-planes
433 ///
434 unsigned load_header(bm::decoder& dec, SV& sv, unsigned char& matr_s_ser);
435
436 void deserialize_sv(SV& sv, const unsigned char* buf,
437 const bvector_type* mask_bv,
438 bool clear_sv);
439
440
441 /// deserialize bit-vector planes
442 void deserialize_planes(SV& sv, unsigned planes,
443 const unsigned char* buf,
444 const bvector_type* mask_bv = 0);
445
446 /// load offset table
447 void load_planes_off_table(const unsigned char* buf, bm::decoder& dec, unsigned planes);
448
449 /// load NULL bit-plane (returns new planes count)
450 int load_null_plane(SV& sv,
451 int planes,
452 const unsigned char* buf,
453 const bvector_type* mask_bv);
454
455 /// load string remap dict
456 void load_remap(SV& sv, const unsigned char* remap_buf_ptr);
457
458 /// throw error on incorrect deserialization
459 static void raise_invalid_header();
460 /// throw error on incorrect deserialization
461 static void raise_invalid_64bit();
462 /// throw error on incorrect deserialization
464 /// throw error on incorrect deserialization
465 static void raise_invalid_format();
466 /// throw error on incorrect deserialization
468 /// setup deserializers
470
471 /// unset XOR compression vectors
473
474private:
476 sparse_vector_deserializer& operator=(const sparse_vector_deserializer&) = delete;
477
478 typedef bm::heap_vector<unsigned, alloc_type, true> rlen_vector_type;
479
480protected:
482 const unsigned char* remap_buf_ptr_ = 0;
486
487 bvector_type plane_digest_bv_; // digest of bit-planes
490
496 bm::heap_vector<size_t, alloc_type, true> off_vect_;
497 bm::heap_vector<unsigned, alloc_type, true> off32_vect_;
498 rlen_vector_type remap_rlen_vect_;
499
500 // XOR compression variables
501 bv_ref_vector_type bv_ref_; ///< reference vector
502 bv_ref_vector_type* bv_ref_ptr_ = 0; ///< external ref bit-vect
503
504 // Range deserialization parameters
505 bool idx_range_set_ = false;
508};
509
510
511
512/*!
513 \brief Serialize sparse vector into a memory buffer(s) structure
514
515 \param sv - sparse vector to serialize
516 \param sv_layout - buffer structure to keep the result
517 \param temp_block - temporary buffer
518 (allocate with BM_DECLARE_TEMP_BLOCK(x) for speed)
519
520 \ingroup svserial
521
522 @sa serialization_flags
523 @sa sparse_vector_deserializer
524*/
525template<class SV>
527 const SV& sv,
529 bm::word_t* temp_block = 0)
530{
531 (void)temp_block;
533// sv_serializer.enable_xor_compression();
534 sv_serializer.serialize(sv, sv_layout);
535}
536
537// -------------------------------------------------------------------------
538
539/*!
540 \brief Deserialize sparse vector
541 \param sv - target sparse vector
542 \param buf - source memory buffer
543 \param temp_block - temporary block buffer to avoid re-allocations
544
545 \return 0 (error processing via std::logic_error)
546
547 \ingroup svserial
548 @sa sparse_vector_deserializer
549*/
550template<class SV>
552 const unsigned char* buf,
553 bm::word_t* temp_block=0)
554{
555 (void)temp_block;
557 sv_deserializer.deserialize(sv, buf);
558 return 0;
559}
560
561// -------------------------------------------------------------------------
562
563/**
564 Seriaizer for compressed collections
565*/
566template<class CBC>
568{
569public:
571 typedef typename CBC::bvector_type bvector_type;
572 typedef typename CBC::buffer_type buffer_type;
573 typedef typename CBC::statistics statistics_type;
574 typedef typename CBC::address_resolver_type address_resolver_type;
575
576public:
577 void serialize(const CBC& buffer_coll,
578 buffer_type& buf,
579 bm::word_t* temp_block = 0);
580};
581
582/**
583 Deseriaizer for compressed collections
584*/
585template<class CBC>
587{
588public:
590 typedef typename CBC::bvector_type bvector_type;
592 typedef typename CBC::buffer_type buffer_type;
593 typedef typename CBC::statistics statistics_type;
594 typedef typename CBC::address_resolver_type address_resolver_type;
595 typedef typename CBC::container_type container_type;
596
597public:
598 int deserialize(CBC& buffer_coll,
599 const unsigned char* buf,
600 bm::word_t* temp_block=0);
601};
602
603
604// -------------------------------------------------------------------------
605
606/**
607 \brief Serialize compressed collection into memory buffer
608
609Serialization format:
610
611
612<pre>
613 | MAGIC_HEADER | ADDRESS_BITVECTROR | LIST_OF_BUFFER_SIZES | BUFFER(s)
614
615 MAGIC_HEADER:
616 BYTE+BYTE: Magic-signature 'BM' or 'BC'
617 BYTE : Byte order ( 0 - Big Endian, 1 - Little Endian)
618
619 ADDRESS_BITVECTROR:
620 INT64: address bit-vector size
621 [memblock]: serialized address bit-vector
622
623 LIST_OF_BUFFER_SIZES:
624 INT64 - buffer sizes count
625 INT32 - buffer size 0
626 INT32 - buffer size 1
627 ...
628
629 BUFFERS:
630 [memblock]: block0
631 [memblock]: block1
632 ...
633
634</pre>
635*/
636
637template<class CBC>
639 buffer_type& buf,
640 bm::word_t* temp_block)
641{
643 buffer_coll.calc_stat(&st);
644
645 buf.resize(st.max_serialize_mem);
646
647 // ptr where bit-planes start
648 unsigned char* buf_ptr = buf.data();
649
650 bm::encoder enc(buf.data(), buf.capacity());
652 enc.put_8('B');
653 enc.put_8('C');
654 enc.put_8((unsigned char)bo);
655
656 unsigned char* mbuf1 = enc.get_pos(); // bookmark position
657 enc.put_64(0); // address vector size (reservation)
658
659 buf_ptr = enc.get_pos();
660
661 const address_resolver_type& addr_res = buffer_coll.resolver();
662 const bvector_type& bv = addr_res.get_bvector();
663 {
664 bm::serializer<bvector_type > bvs(temp_block);
665 bvs.gap_length_serialization(false);
666
667 size_t addr_bv_size = bvs.serialize(bv, buf_ptr, buf.size());
668 buf_ptr += addr_bv_size;
669
670 enc.set_pos(mbuf1); // rewind to bookmark
671 enc.put_64(addr_bv_size); // save the address vector size
672 }
673 enc.set_pos(buf_ptr); // restore stream position
674 size_t coll_size = buffer_coll.size();
675
676 enc.put_64(coll_size);
677
678 // pass 1 (save buffer sizes)
679 {
680 for (unsigned i = 0; i < buffer_coll.size(); ++i)
681 {
682 const buffer_type& cbuf = buffer_coll.get(i);
683 size_t sz = cbuf.size();
684 enc.put_64(sz);
685 } // for i
686 }
687 // pass 2 (save buffers)
688 {
689 for (unsigned i = 0; i < buffer_coll.size(); ++i)
690 {
691 const buffer_type& cbuf = buffer_coll.get(i);
692 size_t sz = cbuf.size();
693 enc.memcpy(cbuf.buf(), sz);
694 } // for i
695 }
696 buf.resize(enc.size());
697}
698
699// -------------------------------------------------------------------------
700template<class CBC>
702 CBC& buffer_coll,
703 const unsigned char* buf,
704 bm::word_t* temp_block)
705{
706 // TODO: implement correct processing of byte-order corect deserialization
707 // ByteOrder bo_current = globals<true>::byte_order();
708
709 bm::decoder dec(buf);
710 unsigned char h1 = dec.get_8();
711 unsigned char h2 = dec.get_8();
712
713 BM_ASSERT(h1 == 'B' && h2 == 'C');
714 if (h1 != 'B' && h2 != 'C') // no magic header? issue...
715 {
716 return -1;
717 }
718 //unsigned char bv_bo =
719 dec.get_8();
720
721 // -----------------------------------------
722 // restore address resolver
723 //
724 bm::id64_t addr_bv_size = dec.get_64();
725
726 const unsigned char* bv_buf_ptr = dec.get_pos();
727
728 address_resolver_type& addr_res = buffer_coll.resolver();
729 bvector_type& bv = addr_res.get_bvector();
730 bv.clear();
731
732 bm::deserialize(bv, bv_buf_ptr, temp_block);
733 addr_res.sync();
734
735 typename bvector_type::size_type addr_cnt = bv.count();
736 dec.seek((int)addr_bv_size);
737
738 // -----------------------------------------
739 // read buffer sizes
740 //
741 bm::id64_t coll_size = dec.get_64();
742 if (coll_size != addr_cnt)
743 {
744 return -2; // buffer size collection does not match address vector
745 }
746
747 typedef size_t vect_size_type;
748 bm::heap_vector<bm::id64_t, allocator_type, true> buf_size_vec;
749
750 buf_size_vec.resize(vect_size_type(coll_size));
751 {
752 for (unsigned i = 0; i < coll_size; ++i)
753 {
754 bm::id64_t sz = dec.get_64();
755 buf_size_vec[i] = sz;
756 } // for i
757 }
758
759 {
760 container_type& buf_vect = buffer_coll.container();
761 buf_vect.resize(vect_size_type(coll_size));
762 for (unsigned i = 0; i < coll_size; ++i)
763 {
764 bm::id64_t sz = buf_size_vec[i];
765 buffer_type& b = buf_vect.at(i);
766 b.resize(sz);
767 dec.memcpy(b.data(), size_t(sz));
768 } // for i
769 }
770 buffer_coll.sync();
771 return 0;
772}
773
774// -------------------------------------------------------------------------
775//
776// -------------------------------------------------------------------------
777
778template<typename SV>
780: bv_ref_ptr_(0)
781{
782 bvs_.gap_length_serialization(false);
783 #ifdef BMXORCOMP
784 is_xor_ref_ = true;
785 #else
786 is_xor_ref_ = false;
787 #endif
788}
789
790// -------------------------------------------------------------------------
791
792template<typename SV>
794 const bv_ref_vector_type* bv_ref_ptr) BMNOEXCEPT
795{
796 bv_ref_ptr_ = bv_ref_ptr;
797 is_xor_ref_ = bool(bv_ref_ptr);
798 sim_model_ptr_ = 0;
799}
800
801// -------------------------------------------------------------------------
802
803template<typename SV>
805{
806 bv_ref_ptr_ = 0; // reset external ref.vector
807 is_xor_ref_ = is_enabled;
808}
809
810// -------------------------------------------------------------------------
811
812template<typename SV>
814 xor_sim_model_type& sim_model,
815 const bv_ref_vector_type& ref_vect,
816 const xor_sim_params& params)
817{
818 bvs_.compute_sim_model(sim_model, ref_vect, params);
819}
820
821// -------------------------------------------------------------------------
822
823template<typename SV>
829
830// -------------------------------------------------------------------------
831
832template<typename SV>
834{
835 //bv_ref_.reset();
836 bv_ref_.build(sv.get_bmatrix());
837}
838
839// -------------------------------------------------------------------------
840
841template<typename SV>
843 const SV& sv)
844{
845 const typename SV::remap_matrix_type* rm = sv.get_remap_matrix();
846 BM_ASSERT(rm);
847
848 const remap_matrix_type& rmatr = *rm;
849
850 size_t rows = rmatr.rows();
851 size_t cols = rmatr.cols();
852
853 BM_ASSERT(cols <= 256);
854 BM_ASSERT(rows <= ~0u);
855
856 // compute CSR capacity vector
857 remap_rlen_vect_.resize(0);
858 for (size_t r = 0; r < rows; ++r)
859 {
860 const unsigned char* BMRESTRICT remap_row = rmatr.row(r);
861 size_t cnt = bm::count_nz(remap_row, cols);
862 if (!cnt)
863 break;
864 remap_rlen_vect_.push_back(unsigned(cnt));
865 } // for r
866
867 rows = remap_rlen_vect_.size(); // effective rows in the remap table
868
869 size_t csr_size_max = rows * sizeof(bm::gap_word_t);
870 for (size_t r = 0; r < rows; ++r)
871 {
872 unsigned rl = remap_rlen_vect_[r];
873 csr_size_max += rl * 2;
874 } // for r
875
876 size_t remap_size = sv.remap_size();
877
878 if (remap_size < csr_size_max)
879 {
880 const unsigned char* matrix_buf = sv.get_remap_buffer();
881 BM_ASSERT(matrix_buf);
882 BM_ASSERT(remap_size);
883
884 enc.put_8('R');
885 enc.put_64(remap_size);
886 enc.memcpy(matrix_buf, size_t(remap_size));
887 }
888 else
889 {
890 enc.put_8('C'); // Compressed sparse row (CSR)
891 enc.put_32(unsigned(rows));
892 enc.put_16(bm::gap_word_t(cols)); // <= 255 chars
893
894 {
896 for (size_t r = 0; r < rows; ++r)
897 {
898 unsigned rl = remap_rlen_vect_[r];
899 bo.gamma(rl);
900 } // for r
901 }
902
903 for (size_t r = 0; r < rows; ++r)
904 {
905 const unsigned char* BMRESTRICT row = rmatr.row(r);
906 for (size_t j = 0; j < cols; ++j)
907 {
908 unsigned char v = row[j];
909 if (v)
910 {
911 enc.put_8((unsigned char)j);
912 enc.put_8(v);
913 }
914 } // for j
915 } // for r
916 }
917
918 enc.put_8('E'); // end of matrix (integrity check token)
919}
920
921// -------------------------------------------------------------------------
922
923template<typename SV>
925 const SV& sv)
926{
927 digest_bv.init();
928 digest_bv.clear(false);
929 unsigned planes = (unsigned)sv.get_bmatrix().rows();
930 for (unsigned i = 0; i < planes; ++i)
931 {
932 typename SV::bvector_type_const_ptr bv = sv.get_slice(i);
933 if (bv)
934 digest_bv.set_bit_no_check(i);
935 } // for i
936}
937
938// -------------------------------------------------------------------------
939
940template<typename SV>
943{
944 bvs_.allow_stat_reset(false); // stats accumulate mode for all bit-slices
945 bvs_.reset_compression_stats();
946
947 if (!sv.size()) // special case of an empty vector
948 {
949 unsigned char* buf = sv_layout.reserve(4);
950 buf[0]='B'; buf[1] = 'Z';
951 sv_layout.resize(2);
952 return;
953 }
954
956 bvs_.set_ref_vectors(0); // disable possible XOR compression for offs.bv
958
959 unsigned planes = (unsigned)sv.get_bmatrix().rows();
960 sv_layout.resize_slices(planes);
961
962 // ----------------------------------------------------
963 // memory pre-reservation
964 //
965 typename SV::statistics sv_stat;
966 sv.calc_stat(&sv_stat);
967 sv_stat.max_serialize_mem += plane_digest_buf_.size() + (8 * planes);
968 unsigned char* buf = sv_layout.reserve(sv_stat.max_serialize_mem);
969
970 // ----------------------------------------------------
971 //
972 bm::encoder enc(buf, sv_layout.capacity());
973
974 // header size in bytes
975 unsigned h_size = 1 + 1 + // "BM" or "BC" (magic header)
976 1 + // byte-order
977 1 + // number of bit-planes (for vector)
978 8 + // size (internal 64-bit)
979 8 + // offset to digest (64-bit)
980 4; // reserve
981 // for large plane matrixes
982 {
983 h_size += 1 + // version number
984 8; // number of planes (64-bit)
985 }
986
987 // ----------------------------------------------------
988 // Setup XOR reference compression
989 //
990 if (is_xor_ref())
991 {
992 if (bv_ref_ptr_) // use external reference
993 {
994 // ref vector and similarity model, both must(!) be set
996 bvs_.set_ref_vectors(bv_ref_ptr_);
997 bvs_.set_sim_model(sim_model_ptr_);
998 }
999 else
1000 {
1001 bm::xor_sim_params xs_params;
1003 bvs_.set_ref_vectors(&bv_ref_);
1004 if (bvs_.compute_sim_model(sim_model_, bv_ref_, xs_params))
1005 bvs_.set_sim_model(&sim_model_);
1006 }
1007 }
1008
1009 // ----------------------------------------------------
1010 // Serialize all bvector planes
1011 //
1012
1013 ::memset(buf, 0, h_size);
1014 unsigned char* buf_ptr = buf + h_size; // ptr where planes start (start+hdr)
1015
1016 for (unsigned i = 0; i < planes; ++i)
1017 {
1018 typename SV::bvector_type_const_ptr bv = sv.get_slice(i);
1019 if (!bv) // empty plane
1020 {
1021 sv_layout.set_plane(i, 0, 0);
1022 continue;
1023 }
1024 if (is_xor_ref())
1025 {
1026 unsigned idx;
1027 if (bv_ref_ptr_) // use external reference
1028 idx = (unsigned)bv_ref_ptr_->find_bv(bv);
1029 else
1030 idx = (unsigned)bv_ref_.find_bv(bv);
1031 BM_ASSERT(idx != bv_ref_.not_found());
1032 bvs_.set_curr_ref_idx(idx);
1033 }
1034 size_t buf_size = (size_t)
1035 bvs_.serialize(*bv, buf_ptr, sv_stat.max_serialize_mem);
1036
1037 sv_layout.set_plane(i, buf_ptr, buf_size);
1038 buf_ptr += buf_size;
1039 if (sv_stat.max_serialize_mem > buf_size)
1040 {
1041 sv_stat.max_serialize_mem -= buf_size;
1042 continue;
1043 }
1044 BM_ASSERT(0); // TODO: throw an exception here
1045 } // for i
1046
1047 bvs_.set_ref_vectors(0); // dis-engage XOR ref vector
1048
1049 // -----------------------------------------------------
1050 // serialize the re-map matrix
1051 //
1052 if (bm::conditional<SV::is_remap_support::value>::test()) // test remapping trait
1053 {
1054 bm::encoder enc_m(buf_ptr, sv_stat.max_serialize_mem);
1055 if (sv.is_remap())
1056 encode_remap_matrix(enc_m, sv);
1057 else
1058 enc_m.put_8('N');
1059 buf_ptr += enc_m.size(); // add encoded data size
1060 }
1061
1062 // ------------------------------------------------------
1063 // save the digest vector
1064 //
1065 size_t digest_offset = size_t(buf_ptr - buf); // digest position from the start
1066 ::memcpy(buf_ptr, plane_digest_buf_.buf(), plane_digest_buf_.size());
1067 buf_ptr += plane_digest_buf_.size();
1068 {
1069 bool use_64bit = false;
1070 plane_off_vect_.resize(0);
1071 for (unsigned i = 0; i < planes; ++i)
1072 {
1073 const unsigned char* p = sv_layout.get_plane(i);
1074 if (p)
1075 {
1076 size_t offset = size_t(p - buf);
1077 if (offset > bm::id_max32)
1078 {
1079 use_64bit = true;
1080 break;
1081 }
1082 plane_off_vect_.push_back(unsigned(offset)); // cast is not a bug
1083 }
1084 } // for i
1085 bm::encoder enc_o(buf_ptr, sv_stat.max_serialize_mem);
1086 if (use_64bit || (plane_off_vect_.size() < 4))
1087 {
1088 enc_o.put_8('6');
1089 // save the offset table as a list of 64-bit values
1090 //
1091 for (unsigned i = 0; i < planes; ++i)
1092 {
1093 const unsigned char* p = sv_layout.get_plane(i);
1094 if (p)
1095 {
1096 size_t offset = size_t(p - buf);
1097 enc_o.put_64(offset);
1098 }
1099 } // for
1100 }
1101 else // searialize 32-bit offset table using BIC
1102 {
1103 BM_ASSERT(plane_off_vect_.size() == plane_digest_bv_.count());
1104 unsigned min_v = plane_off_vect_[0];
1105 unsigned max_v = plane_off_vect_[plane_off_vect_.size()-1];
1106
1107 enc_o.put_8('3');
1108 enc_o.put_32(min_v);
1109 enc_o.put_32(max_v);
1110
1111 bm::bit_out<bm::encoder> bo(enc_o);
1113 unsigned(plane_off_vect_.size()-2),
1114 min_v, max_v);
1115 }
1116 buf_ptr += enc_o.size();
1117 }
1118
1119
1120
1121 sv_layout.resize(size_t(buf_ptr - buf)); // set the true occupied size
1122
1123 // -----------------------------------------------------
1124 // save the header
1125 //
1127
1128 enc.put_8('B'); // magic header 'BM' - bit matrix 'BC' - bit compressed
1129 if (sv.is_compressed())
1130 enc.put_8('C');
1131 else
1132 enc.put_8('M');
1133
1134 enc.put_8((unsigned char)bo); // byte order
1135
1136 unsigned char matr_s_ser = 1;
1137#ifdef BM64ADDR
1138 matr_s_ser = 2;
1139#endif
1140
1141 enc.put_8(0); // number of planes == 0 (legacy magic number)
1142 enc.put_8(matr_s_ser); // matrix serialization version
1143 {
1144 bm::id64_t planes_code = planes | (1ull << 63);
1145 enc.put_64(planes_code); // number of rows in the bit-matrix
1146 }
1147 enc.put_64(sv.size_internal());
1148 enc.put_64(bm::id64_t(digest_offset));
1149}
1150
1151// -------------------------------------------------------------------------
1152//
1153// -------------------------------------------------------------------------
1154
1155template<typename SV>
1157{
1158 temp_block_ = alloc_.alloc_bit_block();
1159 not_null_mask_bv_.set_allocator_pool(&pool_);
1160 rsc_mask_bv_.set_allocator_pool(&pool_);
1161}
1162
1163// -------------------------------------------------------------------------
1164
1165template<typename SV>
1171
1172// -------------------------------------------------------------------------
1173
1174template<typename SV>
1175void
1180
1181// -------------------------------------------------------------------------
1182
1183template<typename SV>
1184void
1191
1192// -------------------------------------------------------------------------
1193
1194template<typename SV>
1196{
1197 op_deserial_.set_ref_vectors(0);
1198 deserial_.set_ref_vectors(0);
1199 bv_ref_.reset();
1200}
1201
1202// -------------------------------------------------------------------------
1203
1204template<typename SV>
1206{
1207 if (bv_ref_ptr_)
1208 {
1209 op_deserial_.set_ref_vectors(bv_ref_ptr_);
1210 deserial_.set_ref_vectors(bv_ref_ptr_);
1211 }
1212 else
1213 {
1214 op_deserial_.set_ref_vectors(&bv_ref_);
1215 deserial_.set_ref_vectors(&bv_ref_);
1216 }
1217}
1218
1219// -------------------------------------------------------------------------
1220
1221template<typename SV>
1223 const unsigned char* buf,
1224 bool clear_sv)
1225{
1226 BM_ASSERT(buf);
1227
1228 idx_range_set_ = false;
1229 deserialize_sv(sv, buf, 0, clear_sv);
1230}
1231
1232// -------------------------------------------------------------------------
1233
1234template<typename SV>
1236 const unsigned char* buf)
1237{
1238 bm::decoder dec(buf); // TODO: implement correct processing of byte-order
1239
1240 unsigned char matr_s_ser = 0;
1241 unsigned planes = load_header(dec, sv, matr_s_ser);
1242 if (!planes)
1243 return;
1244
1245 load_planes_off_table(buf, dec, planes); // read the offset vector of bit-planes
1246
1247 sv.get_bmatrix().allocate_rows(planes);
1248
1249 for (unsigned i = 0; i < planes; ++i)
1250 {
1251 if (!off_vect_[i]) // empty vector
1252 continue;
1253 bvector_type* bv = sv.get_create_slice(i);
1254 BM_ASSERT(bv); (void)bv;
1255 } // for i
1256}
1257
1258// -------------------------------------------------------------------------
1259
1260template<typename SV>
1262 const unsigned char* buf,
1263 size_type from,
1264 size_type to,
1265 bool clear_sv)
1266{
1267 if (clear_sv)
1268 sv.clear_all(true, 1);
1269
1270 idx_range_set_ = true; idx_range_from_ = from; idx_range_to_ = to;
1271
1272 remap_buf_ptr_ = 0;
1273 bm::decoder dec(buf); // TODO: implement correct processing of byte-order
1274
1275 unsigned char matr_s_ser = 0;
1276 unsigned planes = load_header(dec, sv, matr_s_ser);
1277
1278 if (!sv_size_) // empty vector
1279 return;
1280
1281 sv.resize_internal(size_type(sv_size_));
1282 bv_ref_.reset();
1283
1284 load_planes_off_table(buf, dec, planes); // read the offset vector of bit-planes
1285
1287
1288 sv.get_bmatrix().allocate_rows(planes);
1289
1290 // TODO: add range for not NULL plane
1291 planes = (unsigned)load_null_plane(sv, int(planes), buf, 0);
1292
1293 // check if mask needs to be relaculated using the NULL (index) vector
1295 {
1296 // recalculate planes range
1297 size_type sv_left, sv_right;
1298 bool range_valid = sv.resolve_range(from, to, &sv_left, &sv_right);
1299 if (!range_valid)
1300 {
1301 sv.clear();
1302 idx_range_set_ = false;
1303 return;
1304 }
1305 else
1306 {
1307 idx_range_set_ = true; idx_range_from_ = sv_left; idx_range_to_ = sv_right;
1308 }
1309 }
1310
1311 deserialize_planes(sv, planes, buf, 0);
1312
1314
1315 // load the remap matrix
1316 //
1318 {
1319 if (matr_s_ser)
1321 } // if remap traits
1322
1323 sv.sync(true); // force sync, recalculate RS index, remap tables, etc
1324
1325 remap_buf_ptr_ = 0;
1326
1327 idx_range_set_ = false;
1328}
1329
1330// -------------------------------------------------------------------------
1331
1332template<typename SV>
1334 const unsigned char* buf,
1335 const bvector_type* mask_bv,
1336 bool clear_sv)
1337{
1338 if (clear_sv)
1339 sv.clear_all(true, 1); // free memory, keep remap matrix
1340
1341 remap_buf_ptr_ = 0;
1342 bm::decoder dec(buf); // TODO: implement correct processing of byte-order
1343
1344 unsigned char matr_s_ser = 0;
1345 unsigned planes = load_header(dec, sv, matr_s_ser);
1346 if (!sv_size_)
1347 return; // empty vector
1348
1349 sv.resize_internal(size_type(sv_size_));
1350 bv_ref_.reset();
1351
1352 load_planes_off_table(buf, dec, planes); // read the offset vector of bit-planes
1353
1355
1356 sv.get_bmatrix().allocate_rows(planes);
1357 planes = (unsigned)load_null_plane(sv, int(planes), buf, mask_bv);
1358
1359
1360 // check if mask needs to be relaculated using the NULL (index) vector
1362 {
1363 if (mask_bv)
1364 {
1365 const bvector_type* bv_null = sv.get_null_bvector();
1366 BM_ASSERT(bv_null);
1367 rsc_mask_bv_.clear(true);
1368 not_null_mask_bv_.bit_and(*bv_null, *mask_bv, bvector_type::opt_compress);
1369 rsc_compressor_.compress(rsc_mask_bv_, *bv_null, not_null_mask_bv_);
1370 mask_bv = &rsc_mask_bv_;
1371
1372 // if it needs range recalculation
1373 if (idx_range_set_) // range setting is in effect
1374 {
1375 //bool rf =
1377 }
1378 }
1379 }
1380
1381 deserialize_planes(sv, planes, buf, mask_bv);
1382
1383 // restore NULL slice index
1384#ifdef _MSC_VER
1385#pragma warning( push )
1386#pragma warning( disable : 4127)
1387#endif
1388 if (sv.max_vector_size == 1)
1389 {
1390 // NULL vector at: (sv.max_vector_size * sizeof(value_type) * 8 + 1)
1391 const bvector_type* bv_null = sv.get_slice(sv.sv_value_slices);
1392 if (bv_null)
1393 sv.mark_null_idx(sv.sv_value_slices); // last slice is NULL
1394 }
1395#ifdef _MSC_VER
1396#pragma warning( pop )
1397#endif
1398
1399
1401
1402 // load the remap matrix
1403 //
1405 {
1406 if (matr_s_ser)
1408 } // if remap traits
1409
1410 sv.sync(true); // force sync, recalculate RS index, remap tables, etc
1411 remap_buf_ptr_ = 0;
1412}
1413
1414// -------------------------------------------------------------------------
1415
1416template<typename SV>
1418 bm::decoder& dec, SV& sv, unsigned char& matr_s_ser)
1419{
1420 (void)sv;
1421 bm::id64_t planes_code = 0;
1422 unsigned char h1 = dec.get_8();
1423 unsigned char h2 = dec.get_8();
1424
1425 BM_ASSERT(h1 == 'B' && (h2 == 'M' || h2 == 'C' || h2 == 'Z'));
1426
1427 bool sig2_ok = (h2 == 'M' || h2 == 'C' || h2 == 'Z');
1428 if (h1 != 'B' || !sig2_ok) //&& (h2 != 'M' || h2 != 'C')) // no magic header?
1430 unsigned planes = 0;
1431 if (h2 == 'Z') // empty serialization package
1432 {
1433 sv_size_ = 0;
1434 return planes;
1435 }
1436
1437 unsigned char bv_bo = dec.get_8(); (void) bv_bo;
1438 planes = dec.get_8();
1439 if (planes == 0) // bit-matrix
1440 {
1441 matr_s_ser = dec.get_8(); // matrix serialization version
1442 planes_code = dec.get_64();
1443 planes = (unsigned) planes_code; // number of rows in the bit-matrix
1444 }
1445 #ifdef BM64ADDR
1446 #else
1447 if (matr_s_ser == 2) // 64-bit matrix
1449 #endif
1450
1451 if constexpr (SV::is_dynamic_splices::value == false)
1452 {
1453 unsigned sv_planes = sv.stored_slices();
1454 if (!planes || planes > sv_planes)
1456 }
1457
1458 sv_size_ = dec.get_64();
1459
1460 digest_offset_ = 0;
1461 if (planes_code & (1ull << 63))
1462 {
1463 digest_offset_ = dec.get_64();
1464 }
1465
1466 return planes;
1467}
1468
1469// -------------------------------------------------------------------------
1470
1471template<typename SV>
1473 SV& sv,
1474 unsigned planes,
1475 const unsigned char* buf,
1476 const bvector_type* mask_bv)
1477{
1478 if (mask_bv && !idx_range_set_)
1479 idx_range_set_ = mask_bv->find_range(idx_range_from_, idx_range_to_);
1480
1481 // read-deserialize the planes based on offsets
1482 // backward order to bring the NULL vector first
1483 //
1484 for (int i = int(planes-1); i >= 0; --i)
1485 {
1486 size_t offset = off_vect_[unsigned(i)];
1487 if (!offset) // empty vector
1488 continue;
1489 const unsigned char* bv_buf_ptr = buf + offset; // seek to position
1490 bvector_type* bv = sv.get_create_slice(unsigned(i));
1491 BM_ASSERT(bv);
1492
1493 // add the vector into the XOR reference list
1494 if (!bv_ref_ptr_)
1495 bv_ref_.add(bv, unsigned(i));
1496 if (mask_bv) // gather mask set, use AND operation deserializer
1497 {
1498 typename bvector_type::mem_pool_guard mp_g_z(pool_, *bv);
1500 && !remap_buf_ptr_) // last plane vector (special case)
1501 {
1502 size_t read_bytes =
1503 deserial_.deserialize(*bv, bv_buf_ptr, temp_block_);
1504 remap_buf_ptr_ = bv_buf_ptr + read_bytes;
1505 bv->bit_and(*mask_bv, bvector_type::opt_compress);
1506 }
1507 else
1508 {
1509 if (idx_range_set_)
1511 deserial_.deserialize(*bv, bv_buf_ptr);
1512 bv->bit_and(*mask_bv, bvector_type::opt_compress);
1513 }
1514 }
1515 else
1516 {
1519 {
1520 size_t read_bytes =
1521 deserial_.deserialize(*bv, bv_buf_ptr, temp_block_);
1522 remap_buf_ptr_ = bv_buf_ptr + read_bytes;
1523 if (idx_range_set_)
1524 bv->keep_range(idx_range_from_, idx_range_to_);
1525 }
1526 else
1527 {
1528 if (idx_range_set_)
1529 {
1531 deserial_.deserialize(*bv, bv_buf_ptr);
1532 bv->keep_range(idx_range_from_, idx_range_to_);
1533 }
1534 else
1535 {
1536 //size_t read_bytes =
1537 deserial_.deserialize(*bv, bv_buf_ptr, temp_block_);
1538 }
1539 }
1540 }
1541
1542 switch (is_final_)
1543 {
1545 bv->freeze();
1546 break;
1547 default:
1548 break;
1549 }
1550 } // for i
1551
1552 deserial_.unset_range();
1553
1554}
1555
1556// -------------------------------------------------------------------------
1557
1558template<typename SV>
1560 int planes,
1561 const unsigned char* buf,
1562 const bvector_type* mask_bv)
1563{
1564 BM_ASSERT(planes > 0);
1565 if (!sv.is_nullable())
1566 return planes;
1567 int i = planes - 1;
1568 size_t offset = off_vect_[unsigned(i)];
1569 if (offset)
1570 {
1571 // TODO: improve serialization format to avoid non-range decode of
1572 // the NULL vector just to get to the offset of remap table
1573
1574 const unsigned char* bv_buf_ptr = buf + offset; // seek to position
1575 bvector_type* bv = sv.slice(unsigned(i));
1576
1577 if (!bv_ref_ptr_)
1578 bv_ref_.add(bv, unsigned(i));
1579
1581 {
1582 // load the whole not-NULL vector regardless of range
1583 // TODO: load [0, idx_range_to_]
1584 size_t read_bytes = deserial_.deserialize(*bv, bv_buf_ptr, temp_block_);
1585 remap_buf_ptr_ = bv_buf_ptr + read_bytes;
1586 }
1587 else // non-compressed SV
1588 {
1589 // NULL plane in string vector with substitute re-map
1590 //
1592 {
1594 size_t read_bytes = deserial_.deserialize(*bv, bv_buf_ptr, temp_block_);
1595 remap_buf_ptr_ = bv_buf_ptr + read_bytes;
1596 if (idx_range_set_)
1597 bv->keep_range(idx_range_from_, idx_range_to_);
1598 }
1599 else
1600 if (idx_range_set_)
1601 {
1603 deserial_.deserialize(*bv, bv_buf_ptr, temp_block_);
1604 bv->keep_range(idx_range_from_, idx_range_to_);
1605 deserial_.unset_range();
1606 }
1607 else
1608 {
1609 deserial_.deserialize(*bv, bv_buf_ptr, temp_block_);
1610 }
1611 if (mask_bv)
1612 bv->bit_and(*mask_bv, bvector_type::opt_compress);
1613 }
1614
1615 switch (is_final_)
1616 {
1618 bv->freeze();
1619 break;
1620 default:
1621 break;
1622 }
1623
1624 }
1625 return planes-1;
1626}
1627
1628// -------------------------------------------------------------------------
1629
1630template<typename SV>
1632 const unsigned char* buf, bm::decoder& dec, unsigned planes)
1633{
1634 off_vect_.resize(planes);
1635 if (digest_offset_)
1636 {
1637 plane_digest_bv_.clear(false);
1638 const unsigned char* buf_ptr = buf + digest_offset_;
1639 size_t read_bytes =
1640 deserial_.deserialize(plane_digest_bv_, buf_ptr, temp_block_);
1641 buf_ptr += read_bytes;
1642
1643 bm::decoder dec_o(buf_ptr);
1644
1645 unsigned char dtype = dec_o.get_8();
1646 switch (dtype)
1647 {
1648 case '6':
1649 for (unsigned i = 0; i < planes; ++i)
1650 {
1651 size_t offset = 0;
1652 if (plane_digest_bv_.test(i))
1653 offset = (size_t) dec_o.get_64();
1654 off_vect_[i] = offset;
1655 } // for i
1656 break;
1657 case '3':
1658 {
1659 unsigned osize = (unsigned)plane_digest_bv_.count();
1660 BM_ASSERT(osize);
1661 off32_vect_.resize(osize);
1662
1663 unsigned min_v = dec_o.get_32();
1664 unsigned max_v = dec_o.get_32();
1665
1666 off32_vect_[0] = min_v;
1667 off32_vect_[osize-1] = max_v;
1668
1669 bm::bit_in<bm::decoder> bi(dec_o);
1670 bi.bic_decode_u32_cm(off32_vect_.data()+1, osize-2, min_v, max_v);
1671
1672 unsigned k = 0;
1673 for (unsigned i = 0; i < planes; ++i)
1674 {
1675 if (plane_digest_bv_.test(i))
1676 {
1677 off_vect_[i] = off32_vect_[k];
1678 ++k;
1679 }
1680 else
1681 off_vect_[i] = 0;
1682 }
1683 }
1684 break;
1685 default:
1686 // TODO: raise an exception
1687 BM_ASSERT(0);
1688 } // switch
1689 }
1690 else
1691 {
1692 for (unsigned i = 0; i < planes; ++i)
1693 {
1694 size_t offset = (size_t) dec.get_64();
1695 off_vect_[i] = offset;
1696 } // for i
1697 }
1698}
1699
1700// -------------------------------------------------------------------------
1701
1702template<typename SV>
1704 const unsigned char* remap_buf_ptr)
1705{
1706 if (!remap_buf_ptr)
1707 return;
1708
1709 bm::decoder dec_m(remap_buf_ptr);
1710
1711 unsigned char rh = dec_m.get_8();
1712 switch (rh)
1713 {
1714 case 'N':
1715 return;
1716 case 'R':
1717 {
1718 size_t remap_size = (size_t) dec_m.get_64();
1719 unsigned char* remap_buf = sv.init_remap_buffer();
1720 BM_ASSERT(remap_buf);
1721 size_t target_remap_size = sv.remap_size();
1722 if (!remap_size || !remap_buf || remap_size != target_remap_size)
1723 {
1725 }
1726 dec_m.memcpy(remap_buf, remap_size);
1727 }
1728 break;
1729
1730 case 'C': // CSR remap
1731 {
1732 //sv.init_remap_buffer();
1733 typename SV::remap_matrix_type* rmatr = sv.get_remap_matrix();
1734 if (!rmatr)
1735 {
1737 }
1738 size_t rows = (size_t) dec_m.get_32();
1739 size_t cols = dec_m.get_16();
1740 if (cols > 256)
1741 {
1743 }
1744 rmatr->resize(rows, cols, false);
1745 if (rows)
1746 {
1747 rmatr->set_zero();
1748
1749 // read gamma encoded row lens
1750 remap_rlen_vect_.resize(0);
1751 {
1752 bm::bit_in<bm::decoder> bi(dec_m);
1753 for (size_t r = 0; r < rows; ++r)
1754 {
1755 unsigned rl = bi.gamma();
1756 remap_rlen_vect_.push_back(rl);
1757 } // for r
1758 }
1759
1760 for (size_t r = 0; r < rows; ++r)
1761 {
1762 unsigned char* BMRESTRICT row = rmatr->row(r);
1763 size_t cnt = remap_rlen_vect_[r];
1764 if (!cnt || cnt > 256)
1765 {
1766 raise_invalid_format(); // format corruption!
1767 }
1768 for (size_t j = 0; j < cnt; ++j)
1769 {
1770 unsigned idx = dec_m.get_8();
1771 unsigned char v = dec_m.get_8();
1772 row[idx] = v;
1773 } // for j
1774 } // for r
1775 }
1776 }
1777 break;
1778 default:
1779 // re-map matrix code error
1781 } // switch
1782
1783 // finalize the remap matrix read
1784 //
1785 unsigned char end_tok = dec_m.get_8();
1786 if (end_tok != 'E')
1787 {
1789 }
1790 sv.set_remap();
1791}
1792
1793// -------------------------------------------------------------------------
1794
1795template<typename SV>
1797{
1798#ifndef BM_NO_STL
1799 throw std::logic_error("BitMagic: Invalid serialization signature header");
1800#else
1801 BM_THROW(BM_ERR_SERIALFORMAT);
1802#endif
1803}
1804
1805// -------------------------------------------------------------------------
1806
1807template<typename SV>
1809{
1810#ifndef BM_NO_STL
1811 throw std::logic_error("BitMagic: Invalid serialization target (64-bit BLOB)");
1812#else
1813 BM_THROW(BM_ERR_SERIALFORMAT);
1814#endif
1815}
1816
1817// -------------------------------------------------------------------------
1818
1819template<typename SV>
1821{
1822#ifndef BM_NO_STL
1823 throw std::logic_error("BitMagic: Invalid serialization target (bit depth)");
1824#else
1825 BM_THROW(BM_ERR_SERIALFORMAT);
1826#endif
1827}
1828
1829// -------------------------------------------------------------------------
1830
1831template<typename SV>
1833{
1834#ifndef BM_NO_STL
1835 throw std::logic_error("BitMagic: Invalid serialization fromat (BLOB corruption?)");
1836#else
1837 BM_THROW(BM_ERR_SERIALFORMAT);
1838#endif
1839}
1840
1841// -------------------------------------------------------------------------
1842
1843template<typename SV>
1845{
1846#ifndef BM_NO_STL
1847 throw std::logic_error("BitMagic: Invalid serialization format (remap matrix)");
1848#else
1849 BM_THROW(BM_ERR_SERIALFORMAT);
1850#endif
1851}
1852
1853// -------------------------------------------------------------------------
1854
1855} // namespace bm
1856
1857#endif
Definitions(internal).
#define BMRESTRICT
Definition bmdef.h:203
#define BMNOEXCEPT
Definition bmdef.h:82
#define BM_ASSERT
Definition bmdef.h:139
Serialization / compression of bvector<>. Set theoretical operations on compressed BLOBs.
Sparse constainer sparse_vector<> for integer types using bit-transposition transform.
Byte based reader for un-aligned bit streaming.
Definition encoding.h:257
unsigned gamma() BMNOEXCEPT
decode unsigned value using Elias Gamma coding
Definition encoding.h:1792
void bic_decode_u32_cm(bm::word_t *arr, unsigned sz, bm::word_t lo, bm::word_t hi) BMNOEXCEPT
Binary Interpolative array decode (32-bit).
Definition encoding.h:1516
Byte based writer for un-aligned bit streaming.
Definition encoding.h:183
void bic_encode_u32_cm(const bm::word_t *arr, unsigned sz, bm::word_t lo, bm::word_t hi) BMNOEXCEPT
Binary Interpolative encoding (array of 32-bit ints) cm - "center-minimal".
Definition encoding.h:1294
void gamma(unsigned value) BMNOEXCEPT
Elias Gamma encode the specified value.
Definition encoding.h:1187
@ opt_compress
compress blocks when possible (GAP/prefix sum)
Definition bm.h:137
allocator_type::allocator_pool_type allocator_pool_type
Definition bm.h:118
bvector_size_type size_type
Definition bm.h:121
Alloc allocator_type
Definition bm.h:117
Deseriaizer for compressed collections.
bvector_type::allocator_type allocator_type
int deserialize(CBC &buffer_coll, const unsigned char *buf, bm::word_t *temp_block=0)
CBC::address_resolver_type address_resolver_type
Seriaizer for compressed collections.
void serialize(const CBC &buffer_coll, buffer_type &buf, bm::word_t *temp_block=0)
CBC::address_resolver_type address_resolver_type
const unsigned char * get_pos() const BMNOEXCEPT
Return current buffer pointer.
Definition encoding.h:105
void seek(int delta) BMNOEXCEPT
change current position
Definition encoding.h:99
unsigned char get_8() BMNOEXCEPT
Reads character from the decoding buffer.
Definition encoding.h:93
void memcpy(unsigned char *dst, size_t count) BMNOEXCEPT
read bytes from the decode buffer
Definition encoding.h:679
Class for decoding data from memory buffer.
Definition encoding.h:126
bm::word_t get_32() BMNOEXCEPT
Reads 32-bit word from the decoding buffer.
Definition encoding.h:751
bm::id64_t get_64() BMNOEXCEPT
Reads 64-bit word from the decoding buffer.
Definition encoding.h:786
bm::short_t get_16() BMNOEXCEPT
Reads 16-bit word from the decoding buffer.
Definition encoding.h:722
Deserializer for bit-vector.
Definition bmserial.h:570
Memory encoding.
Definition encoding.h:50
size_t size() const BMNOEXCEPT
Returns size of the current encoding stream.
Definition encoding.h:529
unsigned char * get_pos() const BMNOEXCEPT
Get current memory stream position.
Definition encoding.h:537
void put_64(bm::id64_t w) BMNOEXCEPT
Puts 64 bits word into encoding buffer.
Definition encoding.h:606
void put_8(unsigned char c) BMNOEXCEPT
Puts one character into the encoding buffer.
Definition encoding.h:434
void set_pos(unsigned char *buf_pos) BMNOEXCEPT
Set current memory stream position.
Definition encoding.h:545
void memcpy(const unsigned char *src, size_t count) BMNOEXCEPT
copy bytes into target buffer or just rewind if src is NULL
Definition encoding.h:516
void put_32(bm::word_t w) BMNOEXCEPT
Puts 32 bits word into encoding buffer.
Definition encoding.h:571
void put_16(bm::short_t s) BMNOEXCEPT
Puts short word (16 bits) into the encoding buffer.
Definition encoding.h:444
Deserializer, performs logical operations between bit-vector and serialized bit-vector.
Definition bmserial.h:927
Algorithms for rank compression of bit-vector.
Definition bmalgo.h:453
Bit-vector serialization class.
Definition bmserial.h:76
void gap_length_serialization(bool value) BMNOEXCEPT
Set GAP length serialization (serializes GAP levels of the original vector).
Definition bmserial.h:1272
bm::bv_ref_vector< BV > bv_ref_vector_type
Definition bmserial.h:86
byte_buffer< allocator_type > buffer
Definition bmserial.h:85
bm::xor_sim_model< BV > xor_sim_model_type
Definition bmserial.h:87
size_type serialize(const BV &bv, unsigned char *buf, size_t buf_size)
Bitvector serialization into memory block.
Definition bmserial.h:2703
sparse vector de-serializer
void deserialize_planes(SV &sv, unsigned planes, const unsigned char *buf, const bvector_type *mask_bv=0)
deserialize bit-vector planes
void setup_xor_compression()
setup deserializers
bm::operation_deserializer< bvector_type > op_deserial_
void deserialize(SV &sv, const unsigned char *buf, bool clear_sv=true)
bm::serializer< bvector_type >::bv_ref_vector_type bv_ref_vector_type
void deserialize(SV &sv, const unsigned char *buf, const bvector_type &mask_bv)
bm::rank_compressor< bvector_type > rsc_compressor_
void deserialize_structure(SV &sv, const unsigned char *buf)
void deserialize_sv(SV &sv, const unsigned char *buf, const bvector_type *mask_bv, bool clear_sv)
static void raise_invalid_bitdepth()
throw error on incorrect deserialization
static void raise_invalid_header()
throw error on incorrect deserialization
bvector_type::allocator_type::allocator_pool_type allocator_pool_type
bvector_type::allocator_type alloc_type
static void raise_invalid_format()
throw error on incorrect deserialization
unsigned load_header(bm::decoder &dec, SV &sv, unsigned char &matr_s_ser)
Deserialize header/version and other common info.
void deserialize(SV &sv, const unsigned char *buf, size_type from, size_type to)
bm::heap_vector< size_t, alloc_type, true > off_vect_
bm::heap_vector< unsigned, alloc_type, true > off32_vect_
static void raise_invalid_64bit()
throw error on incorrect deserialization
static void raise_missing_remap_matrix()
throw error on incorrect deserialization
const bvector_type * bvector_type_const_ptr
void set_xor_ref(bv_ref_vector_type *bv_ref_ptr)
Set external XOR reference vectors (data frame referenece vectors).
void set_finalization(bm::finalization is_final)
Set deserialization finalization to force deserialized vectors into READONLY (or READWRITE) mode.
bm::deserializer< bvector_type, bm::decoder > deserial_
int load_null_plane(SV &sv, int planes, const unsigned char *buf, const bvector_type *mask_bv)
load NULL bit-plane (returns new planes count)
void deserialize_range(SV &sv, const unsigned char *buf, size_type from, size_type to, bool clear_sv=true)
void load_planes_off_table(const unsigned char *buf, bm::decoder &dec, unsigned planes)
load offset table
void clear_xor_compression()
unset XOR compression vectors
void load_remap(SV &sv, const unsigned char *remap_buf_ptr)
load string remap dict
void encode_remap_matrix(bm::encoder &enc, const SV &sv)
serialize the remap matrix used for SV encoding
void set_xor_ref(const bv_ref_vector_type *bv_ref_ptr) BMNOEXCEPT
Enable external XOR serialization via external reference vectors (data frame ref.
serializer_type::buffer buffer_type
bm::serializer< bvector_type >::bv_ref_vector_type bv_ref_vector_type
static void build_plane_digest(bvector_type &digest_bv, const SV &sv)
void set_bookmarks(bool enable, unsigned bm_interval=256) BMNOEXCEPT
Add skip-markers for faster range deserialization.
void set_xor_ref(bool is_enabled) BMNOEXCEPT
Turn ON and OFF XOR compression of sparse vectors Enables XOR reference compression for the sparse ve...
void build_xor_ref_vector(const SV &sv)
const bvector_type * bvector_type_const_ptr
void compute_sim_model(xor_sim_model_type &sim_model, const bv_ref_vector_type &ref_vect, const bm::xor_sim_params &params)
Calculate XOR similarity model for ref_vector refernece vector must be associated before.
alloc_type::allocator_pool_type allocator_pool_type
void set_sim_model(const xor_sim_model_type *sim_model) BMNOEXCEPT
Attach serizalizer to a pre-computed similarity model.
bm::serializer< bvector_type > serializer_type
SV::remap_matrix_type remap_matrix_type
void serialize(const SV &sv, sparse_vector_serial_layout< SV > &sv_layout)
Serialize sparse vector into a memory buffer(s) structure.
bm::serializer< bvector_type > & get_bv_serializer() BMNOEXCEPT
Get access to the underlying bit-vector serializer This access can be used to fine tune compression s...
bool is_xor_ref() const BMNOEXCEPT
Returns the XOR reference compression status (enabled/disabled).
void disable_xor_compression() BMNOEXCEPT
Disable XOR compression on serialization.
void enable_xor_compression() BMNOEXCEPT
Enable XOR compression on vector serialization.
bm::heap_vector< unsigned, alloc_type, true > u32_vector_type
bvector_type::allocator_type alloc_type
bm::serializer< bvector_type >::xor_sim_model_type xor_sim_model_type
bm::alloc_pool_guard< allocator_pool_type, bvector< Alloc > > mem_pool_guard
Definition bm.h:790
finalization
copy strategy
Definition bmconst.h:156
@ READONLY
immutable (read-only object)
Definition bmconst.h:158
size_t deserialize(BV &bv, const unsigned char *buf, bm::word_t *temp_block=0, const bm::bv_ref_vector< BV > *ref_vect=0)
Bitvector deserialization from a memory BLOB.
Definition bmserial.h:3137
void sparse_vector_serialize(const SV &sv, sparse_vector_serial_layout< SV > &sv_layout, bm::word_t *temp_block=0)
Serialize sparse vector into a memory buffer(s) structure.
int sparse_vector_deserialize(SV &sv, const unsigned char *buf, bm::word_t *temp_block=0)
Deserialize sparse vector.
Definition bm.h:78
unsigned int word_t
Definition bmconst.h:39
SZ count_nz(const VT *arr, SZ arr_size) BMNOEXCEPT
Find count of non-zero elements in the array.
Definition bmfunc.h:10154
const unsigned id_max32
Definition bmconst.h:50
ByteOrder
Byte orders recognized by the library.
Definition bmconst.h:452
unsigned long long int id64_t
Definition bmconst.h:35
unsigned short gap_word_t
Definition bmconst.h:78
static bool test()
Definition bmutil.h:114
static ByteOrder byte_order()
Definition bmconst.h:487
layout class for serialization buffer structure
const unsigned char * buf() const BMNOEXCEPT
Return serialization buffer pointer.
sizet_vector_type plane_size_
serialized plane size
serializer< bvector_type >::buffer buffer_type
void resize(size_t ssize)
Set new serialized size.
unsigned char * reserve(size_t capacity)
resize capacity
size_t capacity() const BMNOEXCEPT
return serialization buffer capacity
size_t size() const BMNOEXCEPT
return current serialized size
void resize_slices(unsigned new_slices_size)
Resize for the target number of plains / bit-slices.
bm::heap_vector< unsigned char *, allocator_type, true > ptr_vector_type
ptr_vector_type plane_ptrs_
pointers on serialized bit-planes
buffer_type buf_
serialization buffer
void set_plane(unsigned i, unsigned char *ptr, size_t buf_size) BMNOEXCEPT
Set plane output pointer and size.
const unsigned char * data() const BMNOEXCEPT
Return serialization buffer pointer.
bvector_type::allocator_type allocator_type
const unsigned char * get_plane(unsigned i) const BMNOEXCEPT
Get plane pointer.
bm::heap_vector< size_t, allocator_type, true > sizet_vector_type
void freemem() BMNOEXCEPT
free memory
Parameters for XOR similarity search.
Definition bmxor.h:59