BitMagic-C++
encoding.h
Go to the documentation of this file.
1#ifndef BMENCODING_H__INCLUDED__
2#define BMENCODING_H__INCLUDED__
3/*
4Copyright(c) 2002-2017 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
5
6Licensed under the Apache License, Version 2.0 (the "License");
7you may not use this file except in compliance with the License.
8You may obtain a copy of the License at
9
10 http://www.apache.org/licenses/LICENSE-2.0
11
12Unless required by applicable law or agreed to in writing, software
13distributed under the License is distributed on an "AS IS" BASIS,
14WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15See the License for the specific language governing permissions and
16limitations under the License.
17
18For more information please visit: http://bitmagic.io
19*/
20
21/*! \file encoding.h
22 \brief Encoding utilities for serialization (internal)
23*/
24
25
26#include <memory.h>
27#include "bmutil.h"
28
29
30#ifdef _MSC_VER
31#pragma warning (push)
32#pragma warning (disable : 4702)
33#endif
34
35namespace bm
36{
37
38
39// ----------------------------------------------------------------
40
41/*!
42 \brief Memory encoding.
43
44 Class for encoding data into memory.
45 Class handles aligment issues with the integer data types.
46
47 \ingroup gammacode
48*/
50{
51public:
52 typedef unsigned char* position_type;
53public:
54 encoder(unsigned char* buf, size_t size) BMNOEXCEPT;
55 void put_8(unsigned char c) BMNOEXCEPT;
57 void put_16(const bm::short_t* s, unsigned count) BMNOEXCEPT;
60 void put_32(const bm::word_t* w, unsigned count) BMNOEXCEPT;
64
65 void put_8_16_32(unsigned w,
66 unsigned char c8, unsigned char c16, unsigned char c32) BMNOEXCEPT;
67 void put_prefixed_array_32(unsigned char c,
68 const bm::word_t* w, unsigned count) BMNOEXCEPT;
69 void put_prefixed_array_16(unsigned char c,
70 const bm::short_t* s, unsigned count,
71 bool encode_count) BMNOEXCEPT;
72 void memcpy(const unsigned char* src, size_t count) BMNOEXCEPT;
73 size_t size() const BMNOEXCEPT;
74 unsigned char* get_pos() const BMNOEXCEPT;
75 void set_pos(unsigned char* buf_pos) BMNOEXCEPT;
76private:
77 unsigned char* buf_;
78 unsigned char* start_;
79 size_t size_;
80};
81
82// ----------------------------------------------------------------
83/**
84 Base class for all decoding functionality
85 \ingroup gammacode
86*/
88{
89public:
90 decoder_base(const unsigned char* buf) BMNOEXCEPT { buf_ = start_ = buf; }
91
92 /// Reads character from the decoding buffer.
93 unsigned char get_8() BMNOEXCEPT { return *buf_++; }
94
95 /// Returns size of the current decoding stream.
96 size_t size() const BMNOEXCEPT { return size_t(buf_ - start_); }
97
98 /// change current position
99 void seek(int delta) BMNOEXCEPT { buf_ += delta; }
100
101 /// read bytes from the decode buffer
102 void memcpy(unsigned char* dst, size_t count) BMNOEXCEPT;
103
104 /// Return current buffer pointer
105 const unsigned char* get_pos() const BMNOEXCEPT { return buf_; }
106
107 /// Set current buffer pointer
108 void set_pos(const unsigned char* pos) BMNOEXCEPT { buf_ = pos; }
109
110 /// Read h-64-bit
111 bm::id64_t get_h64() BMNOEXCEPT;
112
113protected:
114 const unsigned char* buf_;
115 const unsigned char* start_;
116};
117
118
119// ----------------------------------------------------------------
120/**
121 Class for decoding data from memory buffer.
122 Properly handles aligment issues with integer data types.
123 \ingroup gammacode
124*/
125class decoder : public decoder_base
126{
127public:
128 decoder(const unsigned char* buf) BMNOEXCEPT;
134 void get_32(bm::word_t* w, unsigned count) BMNOEXCEPT;
135 bool get_32_OR(bm::word_t* w, unsigned count) BMNOEXCEPT;
136 void get_32_AND(bm::word_t* w, unsigned count) BMNOEXCEPT;
137 void get_16(bm::short_t* s, unsigned count) BMNOEXCEPT;
138};
139
140// ----------------------------------------------------------------
141/**
142 Class for decoding data from memory buffer.
143 Properly handles aligment issues with integer data types.
144 Converts data to big endian architecture
145 (presumed it was encoded as little endian)
146 \ingroup gammacode
147*/
149
150
151// ----------------------------------------------------------------
152/**
153 Class for decoding data from memory buffer.
154 Properly handles aligment issues with integer data types.
155 Converts data to little endian architecture
156 (presumed it was encoded as big endian)
157 \ingroup gammacode
158*/
160{
161public:
162 decoder_little_endian(const unsigned char* buf);
168 void get_32(bm::word_t* w, unsigned count);
169 bool get_32_OR(bm::word_t* w, unsigned count);
170 void get_32_AND(bm::word_t* w, unsigned count);
171 void get_16(bm::short_t* s, unsigned count);
172};
173
174
175/**
176 Byte based writer for un-aligned bit streaming
177
178 @ingroup gammacode
179 @sa encoder
180*/
181template<class TEncoder>
183{
184public:
185 bit_out(TEncoder& dest)
186 : dest_(dest), used_bits_(0), accum_(0)
187 {}
188
190
191 /// issue single bit into encode bit-stream
192 void put_bit(unsigned value) BMNOEXCEPT;
193
194 /// issue count bits out of value
195 void put_bits(unsigned value, unsigned count) BMNOEXCEPT;
196
197 /// issue 0 into output stream
199
200 /// issue specified number of 0s
201 void put_zero_bits(unsigned count) BMNOEXCEPT;
202
203 /// Elias Gamma encode the specified value
204 void gamma(unsigned value) BMNOEXCEPT;
205
206 /// Binary Interpolative array decode
207 void bic_encode_u16(const bm::gap_word_t* arr, unsigned sz,
209 {
210 bic_encode_u16_cm(arr, sz, lo, hi);
211 }
212
213 /// Binary Interpolative encoding (array of 16-bit ints)
214 void bic_encode_u16_rg(const bm::gap_word_t* arr, unsigned sz,
217
218 /// Binary Interpolative encoding (array of 16-bit ints)
219 /// cm - "center-minimal"
220 void bic_encode_u16_cm(const bm::gap_word_t* arr, unsigned sz,
223
224 /// Binary Interpolative encoding (array of 32-bit ints)
225 /// cm - "center-minimal"
226 void bic_encode_u32_cm(const bm::word_t* arr, unsigned sz,
228
229 /// Flush the incomplete 32-bit accumulator word
230 void flush() BMNOEXCEPT { if (used_bits_) flush_accum(); }
231
232private:
233 void flush_accum() BMNOEXCEPT
234 {
235 dest_.put_32(accum_);
236 used_bits_ = accum_ = 0;
237 }
238private:
239 bit_out(const bit_out&);
240 bit_out& operator=(const bit_out&);
241
242private:
243 TEncoder& dest_; ///< Bit stream target
244 unsigned used_bits_; ///< Bits used in the accumulator
245 unsigned accum_; ///< write bit accumulator
246};
247
248
249/**
250 Byte based reader for un-aligned bit streaming
251
252 @ingroup gammacode
253 @sa encoder
254*/
255template<class TDecoder>
257{
258public:
260 : src_(decoder),
261 used_bits_(unsigned(sizeof(accum_) * 8)),
262 accum_(0)
263 {}
264
265 /// decode unsigned value using Elias Gamma coding
266 unsigned gamma() BMNOEXCEPT;
267
268 /// read number of bits out of the stream
269 unsigned get_bits(unsigned count) BMNOEXCEPT;
270
271 /// read 1 bit
272 unsigned get_bit() BMNOEXCEPT;
273
274 /// Binary Interpolative array decode
275 void bic_decode_u16(bm::gap_word_t* arr, unsigned sz,
277 {
278 if (sz)
279 bic_decode_u16_cm(arr, sz, lo, hi);
280 }
281
282 void bic_decode_u16_bitset(bm::word_t* block, unsigned sz,
284 {
285 if (sz)
286 bic_decode_u16_cm_bitset(block, sz, lo, hi);
287 }
288 void bic_decode_u16_dry(unsigned sz,
290 {
291 if (sz)
292 bic_decode_u16_cm_dry(sz, lo, hi);
293 }
294
295
296 /// Binary Interpolative array decode
297 void bic_decode_u16_rg(bm::gap_word_t* arr, unsigned sz,
299 /// Binary Interpolative array decode
300 void bic_decode_u16_cm(bm::gap_word_t* arr, unsigned sz,
302
303 /// Binary Interpolative array decode (32-bit)
304 void bic_decode_u32_cm(bm::word_t* arr, unsigned sz,
306
307
308 /// Binary Interpolative array decode into bitset (32-bit based)
309 void bic_decode_u16_rg_bitset(bm::word_t* block, unsigned sz,
311
312 /// Binary Interpolative array decode into /dev/null
313 void bic_decode_u16_rg_dry(unsigned sz,
315
316 /// Binary Interpolative array decode into bitset (32-bit based)
317 void bic_decode_u16_cm_bitset(bm::word_t* block, unsigned sz,
320
321 /// Binary Interpolative array decode into /dev/null
322 void bic_decode_u16_cm_dry(unsigned sz,
324
325private:
326 bit_in(const bit_in&);
327 bit_in& operator=(const bit_in&);
328private:
329 TDecoder& src_; ///< Source of bytes
330 unsigned used_bits_; ///< Bits used in the accumulator
331 unsigned accum_; ///< read bit accumulator
332};
333
334
335/**
336 Functor for Elias Gamma encoding
337 @ingroup gammacode
338*/
339template<typename T, typename TBitIO>
341{
342public:
343 gamma_encoder(TBitIO& bout) : bout_(bout)
344 {}
345
346 /** Encode word */
347 void operator()(T value) { bout_.gamma(value); }
348private:
350 gamma_encoder& operator=(const gamma_encoder&);
351private:
352 TBitIO& bout_;
353};
354
355
356/**
357 Elias Gamma decoder
358 @ingroup gammacode
359*/
360template<typename T, typename TBitIO>
362{
363public:
364 gamma_decoder(TBitIO& bin) : bin_(bin) {}
365
366 /**
367 Start encoding sequence
368 */
369 void start() {}
370
371 /**
372 Stop decoding sequence
373 */
374 void stop() {}
375
376 /**
377 Decode word
378 */
379 T operator()(void) { return (T)bin_.gamma(); }
380private:
382 gamma_decoder& operator=(const gamma_decoder&);
383private:
384 TBitIO& bin_;
385};
386
387
388// ----------------------------------------------------------------
389// Implementation details.
390// ----------------------------------------------------------------
391
392/*!
393 \fn encoder::encoder(unsigned char* buf, unsigned size)
394 \brief Construction.
395 \param buf - memory buffer pointer.
396 \param size - size of the buffer
397*/
398inline encoder::encoder(unsigned char* buf, size_t a_size) BMNOEXCEPT
399: buf_(buf), start_(buf)
400{
401 size_ = a_size;
402}
403/*!
404 \brief Encode 8-bit prefix + an array
405*/
406inline void encoder::put_prefixed_array_32(unsigned char c,
407 const bm::word_t* w,
408 unsigned count) BMNOEXCEPT
409{
410 put_8(c);
411 put_32(w, count);
412}
413
414/*!
415 \brief Encode 8-bit prefix + an array
416*/
417inline void encoder::put_prefixed_array_16(unsigned char c,
418 const bm::short_t* s,
419 unsigned count,
420 bool encode_count) BMNOEXCEPT
421{
422 put_8(c);
423 if (encode_count)
424 put_16((bm::short_t) count);
425 put_16(s, count);
426}
427
428
429/*!
430 \fn void encoder::put_8(unsigned char c)
431 \brief Puts one character into the encoding buffer.
432 \param c - character to encode
433*/
435{
436 *buf_++ = c;
437}
438
439/*!
440 \fn encoder::put_16(bm::short_t s)
441 \brief Puts short word (16 bits) into the encoding buffer.
442 \param s - short word to encode
443*/
445{
446#if (BM_UNALIGNED_ACCESS_OK == 1)
447 ::memcpy(buf_, &s, sizeof(bm::short_t)); // optimizer takes care of it
448 buf_ += sizeof(bm::short_t);
449#else
450 *buf_++ = (unsigned char) s;
451 s >>= 8;
452 *buf_++ = (unsigned char) s;
453#endif
454}
455
456/*!
457 \brief Method puts array of short words (16 bits) into the encoding buffer.
458*/
459inline void encoder::put_16(const bm::short_t* s, unsigned count) BMNOEXCEPT
460{
461 BM_ASSERT(count);
462#if (BM_UNALIGNED_ACCESS_OK == 1)
463 ::memcpy(buf_, s, sizeof(bm::short_t)*count);
464 buf_ += sizeof(bm::short_t) * count;
465#else
466 unsigned char* buf = buf_;
467 const bm::short_t* s_end = s + count;
468 do
469 {
470 bm::short_t w16 = *s++;
471 unsigned char a = (unsigned char) w16;
472 unsigned char b = (unsigned char) (w16 >> 8);
473
474 *buf++ = a;
475 *buf++ = b;
476
477 } while (s < s_end);
478
479 buf_ = (unsigned char*)buf;
480#endif
481}
482
483/*!
484 \brief but gat plus value based on its VBR evaluation
485*/
486inline
487void encoder::put_8_16_32(unsigned w,
488 unsigned char c8,
489 unsigned char c16,
490 unsigned char c32) BMNOEXCEPT
491{
492 if (w < 256)
493 {
494 put_8(c8);
495 put_8((unsigned char)w);
496 }
497 else
498 {
499 if (w < 65536)
500 {
501 put_8(c16);
502 put_16((unsigned short) w);
503 }
504 else
505 {
506 put_8(c32);
507 put_32(w);
508 }
509 }
510}
511
512/*!
513 \brief copy bytes into target buffer or just rewind if src is NULL
514*/
515inline
516void encoder::memcpy(const unsigned char* src, size_t count) BMNOEXCEPT
517{
518 BM_ASSERT((buf_ + count) < (start_ + size_));
519 if (src)
520 ::memcpy(buf_, src, count);
521 buf_ += count;
522}
523
524
525/*!
526 \fn unsigned encoder::size() const
527 \brief Returns size of the current encoding stream.
528*/
529inline size_t encoder::size() const BMNOEXCEPT
530{
531 return size_t(buf_ - start_);
532}
533
534/**
535 \brief Get current memory stream position
536*/
538{
539 return buf_;
540}
541
542/**
543 \brief Set current memory stream position
544*/
546{
547 buf_ = buf_pos;
548}
549
550/*!
551 \fn void encoder::put_24(bm::word_t w)
552 \brief Puts 24 bits word into encoding buffer.
553 \param w - word to encode.
554*/
556{
557 BM_ASSERT((w & ~(0xFFFFFFU)) == 0);
558
559 buf_[0] = (unsigned char)w;
560 buf_[1] = (unsigned char)(w >> 8);
561 buf_[2] = (unsigned char)(w >> 16);
562 buf_ += 3;
563}
564
565
566/*!
567 \fn void encoder::put_32(bm::word_t w)
568 \brief Puts 32 bits word into encoding buffer.
569 \param w - word to encode.
570*/
572{
573#if (BM_UNALIGNED_ACCESS_OK == 1)
574 ::memcpy(buf_, &w, sizeof(bm::word_t));
575 buf_ += sizeof(bm::word_t);
576#else
577 *buf_++ = (unsigned char) w;
578 *buf_++ = (unsigned char) (w >> 8);
579 *buf_++ = (unsigned char) (w >> 16);
580 *buf_++ = (unsigned char) (w >> 24);
581#endif
582}
583
584/*!
585 \fn void encoder::put_48(bm::id64_t w)
586 \brief Puts 48 bits word into encoding buffer.
587 \param w - word to encode.
588*/
590{
591 BM_ASSERT((w & ~(0xFFFFFFFFFFFFUL)) == 0);
592 *buf_++ = (unsigned char)w;
593 *buf_++ = (unsigned char)(w >> 8);
594 *buf_++ = (unsigned char)(w >> 16);
595 *buf_++ = (unsigned char)(w >> 24);
596 *buf_++ = (unsigned char)(w >> 32);
597 *buf_++ = (unsigned char)(w >> 40);
598}
599
600
601/*!
602 \fn void encoder::put_64(bm::id64_t w)
603 \brief Puts 64 bits word into encoding buffer.
604 \param w - word to encode.
605*/
607{
608#if (BM_UNALIGNED_ACCESS_OK == 1)
609 ::memcpy(buf_, &w, sizeof(bm::id64_t));
610 buf_ += sizeof(bm::id64_t);
611#else
612 *buf_++ = (unsigned char) w;
613 *buf_++ = (unsigned char) (w >> 8);
614 *buf_++ = (unsigned char) (w >> 16);
615 *buf_++ = (unsigned char) (w >> 24);
616 *buf_++ = (unsigned char) (w >> 32);
617 *buf_++ = (unsigned char) (w >> 40);
618 *buf_++ = (unsigned char) (w >> 48);
619 *buf_++ = (unsigned char) (w >> 56);
620#endif
621}
622
623/*!
624 \fn void encoder::put_h64(bm::id64_t w)
625 \brief Puts 64 bits word into encoding buffer with h-compression
626 \param w - word to encode.
627*/
629{
630 unsigned h_mask = bm::compute_h64_mask(w);
631 put_8((unsigned char) h_mask);
632 for (unsigned i = 0; w && (i < 8); ++i, w >>= 8)
633 {
634 if ((unsigned char) w)
635 put_8((unsigned char) w);
636 } // for i
637}
638
639
640
641/*!
642 \brief Encodes array of 32-bit words
643*/
644inline void encoder::put_32(const bm::word_t* w, unsigned count) BMNOEXCEPT
645{
646#if (BM_UNALIGNED_ACCESS_OK == 1)
647 // use memcpy() because compilers now understand it as an idiom and inline
648 ::memcpy(buf_, w, sizeof(bm::word_t) * count);
649 buf_ += sizeof(bm::word_t) * count;
650#else
651 unsigned char* buf = buf_;
652 const bm::word_t* w_end = w + count;
653 do
654 {
655 bm::word_t w32 = *w++;
656 unsigned char a = (unsigned char) w32;
657 unsigned char b = (unsigned char) (w32 >> 8);
658 unsigned char c = (unsigned char) (w32 >> 16);
659 unsigned char d = (unsigned char) (w32 >> 24);
660
661 *buf++ = a;
662 *buf++ = b;
663 *buf++ = c;
664 *buf++ = d;
665 } while (w < w_end);
666
667 buf_ = (unsigned char*)buf;
668#endif
669}
670
671
672// ---------------------------------------------------------------------
673
674
675/*!
676 Load bytes from the decode buffer
677*/
678inline
679void decoder_base::memcpy(unsigned char* dst, size_t count) BMNOEXCEPT
680{
681 if (dst)
682 ::memcpy(dst, buf_, count);
683 buf_ += count;
684}
685
686/*!
687 \fn bm::id64_t decoder_base::get_h64()
688 \brief Reads 64-bit word from the decoding buffer.
689*/
690inline
692{
693 bm::id64_t w = 0;
694 unsigned h_mask = (unsigned char) *buf_++;
695 for (unsigned i = 0; h_mask && (i < 8); ++i)
696 {
697 if (h_mask & (1u<<i))
698 {
699 h_mask &= ~(1u<<i);
700 unsigned char a = (unsigned char) *buf_++;
701 w |= (bm::id64_t(a) << (i*8));
702 }
703 } // for i
704 return w;
705}
706
707
708/*!
709 \fn decoder::decoder(const unsigned char* buf)
710 \brief Construction
711 \param buf - pointer to the decoding memory.
712*/
713inline decoder::decoder(const unsigned char* buf) BMNOEXCEPT
714: decoder_base(buf)
715{
716}
717
718/*!
719 \fn bm::short_t decoder::get_16()
720 \brief Reads 16-bit word from the decoding buffer.
721*/
723{
724#if (BM_UNALIGNED_ACCESS_OK == 1)
725 bm::short_t a;
726 ::memcpy(&a, buf_, sizeof(bm::short_t));
727#else
728 bm::short_t a = (bm::short_t)(buf_[0] + ((bm::short_t)buf_[1] << 8));
729#endif
730 buf_ += sizeof(a);
731 return a;
732}
733
734/*!
735 \fn bm::word_t decoder::get_24()
736 \brief Reads 32-bit word from the decoding buffer.
737*/
739{
740 bm::word_t a = buf_[0] + ((unsigned)buf_[1] << 8) +
741 ((unsigned)buf_[2] << 16);
742 buf_ += 3;
743 return a;
744}
745
746
747/*!
748 \fn bm::word_t decoder::get_32()
749 \brief Reads 32-bit word from the decoding buffer.
750*/
752{
753#if (BM_UNALIGNED_ACCESS_OK == 1)
754 bm::word_t a;
755 ::memcpy(&a, buf_, sizeof(bm::word_t));
756#else
757 bm::word_t a = buf_[0]+ ((unsigned)buf_[1] << 8) +
758 ((unsigned)buf_[2] << 16) + ((unsigned)buf_[3] << 24);
759#endif
760 buf_+=sizeof(a);
761 return a;
762}
763
764/*!
765 \fn bm::word_t decoder::get_48()
766 \brief Reads 64-bit word from the decoding buffer.
767*/
768inline
770{
771 bm::id64_t a = buf_[0] +
772 ((bm::id64_t)buf_[1] << 8) +
773 ((bm::id64_t)buf_[2] << 16) +
774 ((bm::id64_t)buf_[3] << 24) +
775 ((bm::id64_t)buf_[4] << 32) +
776 ((bm::id64_t)buf_[5] << 40);
777 buf_ += 6;
778 return a;
779}
780
781/*!
782 \fn bm::id64_t decoder::get_64()
783 \brief Reads 64-bit word from the decoding buffer.
784*/
785inline
787{
788#if (BM_UNALIGNED_ACCESS_OK == 1)
789 bm::id64_t a;
790 ::memcpy(&a, buf_, sizeof(bm::id64_t));
791#else
792 bm::id64_t a = buf_[0]+
793 ((bm::id64_t)buf_[1] << 8) +
794 ((bm::id64_t)buf_[2] << 16) +
795 ((bm::id64_t)buf_[3] << 24) +
796 ((bm::id64_t)buf_[4] << 32) +
797 ((bm::id64_t)buf_[5] << 40) +
798 ((bm::id64_t)buf_[6] << 48) +
799 ((bm::id64_t)buf_[7] << 56);
800#endif
801 buf_ += sizeof(a);
802 return a;
803}
804
805
806/*!
807 \fn void decoder::get_32(bm::word_t* w, unsigned count)
808 \brief Reads block of 32-bit words from the decoding buffer.
809 \param w - pointer on memory block to read into.
810 \param count - size of memory block in words.
811*/
812inline void decoder::get_32(bm::word_t* w, unsigned count) BMNOEXCEPT
813{
814 if (!w)
815 {
816 seek(int(count * sizeof(bm::word_t)));
817 return;
818 }
819#if (BM_UNALIGNED_ACCESS_OK == 1)
820 ::memcpy(w, buf_, count * sizeof(bm::word_t));
821 seek(int(count * sizeof(bm::word_t)));
822 return;
823#else
824 const unsigned char* buf = buf_;
825 const bm::word_t* w_end = w + count;
826 do
827 {
828 bm::word_t a = buf[0]+ ((unsigned)buf[1] << 8) +
829 ((unsigned)buf[2] << 16) + ((unsigned)buf[3] << 24);
830 *w++ = a;
831 buf += sizeof(a);
832 } while (w < w_end);
833 buf_ = (unsigned char*)buf;
834#endif
835}
836
837/*!
838 \brief Reads block of 32-bit words from the decoding buffer and ORs
839 to the destination
840 \param w - pointer on memory block to read into
841 \param count - should match bm::set_block_size
842*/
843inline
845{
846 if (!w)
847 {
848 seek(int(count * sizeof(bm::word_t)));
849 return false;
850 }
851#if defined(BMAVX2OPT)
852 __m256i* buf_start = (__m256i*)buf_;
853 seek(int(count * sizeof(bm::word_t)));
854 __m256i* buf_end = (__m256i*)buf_;
855
856 return bm::avx2_or_arr_unal((__m256i*)w, buf_start, buf_end);
857#elif defined(BMSSE42OPT) || defined(BMSSE2OPT)
858 __m128i* buf_start = (__m128i*)buf_;
859 seek(int(count * sizeof(bm::word_t)));
860 __m128i* buf_end = (__m128i*)buf_;
861
862 return bm::sse2_or_arr_unal((__m128i*)w, buf_start, buf_end);
863#else
864 bm::word_t acc = 0;
865 const bm::word_t not_acc = acc = ~acc;
866
867 for (unsigned i = 0; i < count; i+=4)
868 {
869 acc &= (w[i+0] |= get_32());
870 acc &= (w[i+1] |= get_32());
871 acc &= (w[i+2] |= get_32());
872 acc &= (w[i+3] |= get_32());
873 }
874 return acc == not_acc;
875#endif
876}
877
878/*!
879 \brief Reads block of 32-bit words from the decoding buffer and ANDs
880 to the destination
881 \param w - pointer on memory block to read into
882 \param count - should match bm::set_block_size
883*/
884inline
886{
887 if (!w)
888 {
889 seek(int(count * sizeof(bm::word_t)));
890 return;
891 }
892#if defined(BMAVX2OPT)
893 __m256i* buf_start = (__m256i*)buf_;
894 seek(int(count * sizeof(bm::word_t)));
895 __m256i* buf_end = (__m256i*)buf_;
896
897 bm::avx2_and_arr_unal((__m256i*)w, buf_start, buf_end);
898#elif defined(BMSSE42OPT) || defined(BMSSE2OPT)
899 __m128i* buf_start = (__m128i*)buf_;
900 seek(int(count * sizeof(bm::word_t)));
901 __m128i* buf_end = (__m128i*)buf_;
902
903 bm::sse2_and_arr_unal((__m128i*)w, buf_start, buf_end);
904#else
905 for (unsigned i = 0; i < count; i+=4)
906 {
907 w[i+0] &= get_32();
908 w[i+1] &= get_32();
909 w[i+2] &= get_32();
910 w[i+3] &= get_32();
911 }
912#endif
913}
914
915
916
917/*!
918 \fn void decoder::get_16(bm::short_t* s, unsigned count)
919 \brief Reads block of 32-bit words from the decoding buffer.
920 \param s - pointer on memory block to read into.
921 \param count - size of memory block in words.
922*/
923inline void decoder::get_16(bm::short_t* s, unsigned count) BMNOEXCEPT
924{
925 BM_ASSERT(count);
926 if (!s)
927 {
928 seek(int(count * sizeof(bm::short_t)));
929 return;
930 }
931#if (BM_UNALIGNED_ACCESS_OK == 1)
932 ::memcpy(s, buf_, sizeof(bm::short_t) * count);
933 buf_ += sizeof(bm::short_t) * count;
934#else
935 const unsigned char* buf = buf_;
936 const bm::short_t* s_end = s + count;
937 do
938 {
939 bm::short_t a = (bm::short_t)(buf[0] + ((bm::short_t)buf[1] << 8));
940 *s++ = a;
941 buf += sizeof(a);
942 } while (s < s_end);
943 buf_ = (unsigned char*)buf;
944#endif
945
946}
947
948
949
950// ---------------------------------------------------------------------
951
952inline decoder_little_endian::decoder_little_endian(const unsigned char* buf)
953: decoder_base(buf)
954{
955}
956
957inline
959{
962 bm::short_t a = bm::short_t((v1 << 8) + v2);
963 buf_ += sizeof(a);
964 return a;
965}
966
968{
969 // TODO: validate if this is a correct for cross endian opts
970 bm::word_t a = buf_[0] + ((unsigned)buf_[1] << 8) +
971 ((unsigned)buf_[2] << 16);
972 buf_ += 3;
973 return a;
974}
975
976inline
978{
979 bm::word_t a = ((unsigned)buf_[0] << 24)+ ((unsigned)buf_[1] << 16) +
980 ((unsigned)buf_[2] << 8) + ((unsigned)buf_[3]);
981 buf_+=sizeof(a);
982 return a;
983}
984
985inline
987{
988 bm::id64_t a = buf_[0] +
989 ((bm::id64_t)buf_[1] << 8) +
990 ((bm::id64_t)buf_[2] << 16) +
991 ((bm::id64_t)buf_[3] << 24) +
992 ((bm::id64_t)buf_[4] << 32) +
993 ((bm::id64_t)buf_[5] << 40);
994 buf_ += 6;
995 return a;
996}
997
998inline
1000{
1001 bm::id64_t a = buf_[0]+
1002 ((bm::id64_t)buf_[1] << 56) +
1003 ((bm::id64_t)buf_[2] << 48) +
1004 ((bm::id64_t)buf_[3] << 40) +
1005 ((bm::id64_t)buf_[4] << 32) +
1006 ((bm::id64_t)buf_[5] << 24) +
1007 ((bm::id64_t)buf_[6] << 16) +
1008 ((bm::id64_t)buf_[7] << 8);
1009 buf_+=sizeof(a);
1010 return a;
1011}
1012
1013inline
1015{
1016 if (!w)
1017 {
1018 seek(int(count * sizeof(bm::word_t)));
1019 return;
1020 }
1021
1022 const unsigned char* buf = buf_;
1023 const bm::word_t* w_end = w + count;
1024 do
1025 {
1026 bm::word_t a = ((unsigned)buf[0] << 24)+ ((unsigned)buf[1] << 16) +
1027 ((unsigned)buf[2] << 8) + ((unsigned)buf[3]);
1028 *w++ = a;
1029 buf += sizeof(a);
1030 } while (w < w_end);
1031 buf_ = (unsigned char*)buf;
1032}
1033
1034inline
1036{
1037 if (!w)
1038 {
1039 seek(int(count * sizeof(bm::word_t)));
1040 return false;
1041 }
1042
1043 bm::word_t acc = 0;
1044 const bm::word_t not_acc = acc = ~acc;
1045
1046 for (unsigned i = 0; i < count; i+=4)
1047 {
1048 acc &= (w[i+0] |= get_32());
1049 acc &= (w[i+1] |= get_32());
1050 acc &= (w[i+2] |= get_32());
1051 acc &= (w[i+3] |= get_32());
1052 }
1053 return acc == not_acc;
1054}
1055
1056inline
1058{
1059 for (unsigned i = 0; i < count; i+=4)
1060 {
1061 w[i+0] &= get_32();
1062 w[i+1] &= get_32();
1063 w[i+2] &= get_32();
1064 w[i+3] &= get_32();
1065 }
1066}
1067
1068
1069inline
1071{
1072 if (!s)
1073 {
1074 seek(int(count * sizeof(bm::short_t)));
1075 return;
1076 }
1077
1078 const unsigned char* buf = buf_;
1079 const bm::short_t* s_end = s + count;
1080 do
1081 {
1082 bm::short_t v1 = bm::short_t(buf_[0]);
1083 bm::short_t v2 = bm::short_t(buf_[1]);
1084 bm::short_t a = bm::short_t((v1 << 8) + v2);
1085 *s++ = a;
1086 buf += sizeof(a);
1087 } while (s < s_end);
1088 buf_ = (unsigned char*)buf;
1089}
1090
1091// ----------------------------------------------------------------------
1092//
1093
1094template<typename TEncoder>
1096{
1097 BM_ASSERT(value <= 1);
1098 accum_ |= (value << used_bits_);
1099 if (++used_bits_ == (sizeof(accum_) * 8))
1100 flush_accum();
1101}
1102
1103// ----------------------------------------------------------------------
1104
1105template<typename TEncoder>
1106void bit_out<TEncoder>::put_bits(unsigned value, unsigned count) BMNOEXCEPT
1107{
1108 unsigned used = used_bits_;
1109 unsigned acc = accum_;
1110
1111 {
1112 unsigned mask = ~0u;
1113 mask >>= (sizeof(accum_) * 8) - count;
1114 value &= mask;
1115 }
1116 for (;count;)
1117 {
1118 unsigned free_bits = unsigned(sizeof(accum_) * 8) - used;
1119 BM_ASSERT(free_bits);
1120 acc |= value << used;
1121
1122 if (count <= free_bits)
1123 {
1124 used += count;
1125 break;
1126 }
1127 else
1128 {
1129 value >>= free_bits;
1130 count -= free_bits;
1131 dest_.put_32(acc);
1132 acc = used = 0;
1133 continue;
1134 }
1135 }
1136 if (used == (sizeof(accum_) * 8))
1137 {
1138 dest_.put_32(acc);
1139 acc = used = 0;
1140 }
1141 used_bits_ = used;
1142 accum_ = acc;
1143}
1144
1145// ----------------------------------------------------------------------
1146
1147template<typename TEncoder>
1149{
1150 if (++used_bits_ == (sizeof(accum_) * 8))
1151 flush_accum();
1152}
1153
1154// ----------------------------------------------------------------------
1155
1156template<typename TEncoder>
1158{
1159 unsigned used = used_bits_;
1160 unsigned free_bits = (sizeof(accum_) * 8) - used;
1161 if (count >= free_bits)
1162 {
1163 flush_accum();
1164 count -= free_bits;
1165 used = 0;
1166
1167 for ( ;count >= sizeof(accum_) * 8; count -= sizeof(accum_) * 8)
1168 {
1169 dest_.put_32(0);
1170 }
1171 used += count;
1172 }
1173 else
1174 {
1175 used += count;
1176 }
1177 accum_ |= (1u << used);
1178 if (++used == (sizeof(accum_) * 8))
1179 flush_accum();
1180 else
1181 used_bits_ = used;
1182}
1183
1184// ----------------------------------------------------------------------
1185
1186template<typename TEncoder>
1188{
1189 BM_ASSERT(value);
1190
1191 unsigned logv = bm::bit_scan_reverse32(value);
1192
1193 // Put zeroes + 1 bit
1194
1195 unsigned used = used_bits_;
1196 unsigned acc = accum_;
1197 const unsigned acc_bits = (sizeof(acc) * 8);
1198 unsigned free_bits = acc_bits - used;
1199
1200 {
1201 unsigned count = logv;
1202 if (count >= free_bits)
1203 {
1204 dest_.put_32(acc);
1205 acc = used ^= used;
1206 count -= free_bits;
1207
1208 for ( ;count >= acc_bits; count -= acc_bits)
1209 {
1210 dest_.put_32(0);
1211 }
1212 used += count;
1213 }
1214 else
1215 {
1216 used += count;
1217 }
1218 acc |= (1 << used);
1219 if (++used == acc_bits)
1220 {
1221 dest_.put_32(acc);
1222 acc = used ^= used;
1223 }
1224 }
1225
1226 // Put the value bits
1227 //
1228 {
1229 unsigned mask = (~0u);
1230 mask >>= acc_bits - logv;
1231 value &= mask;
1232 }
1233 for (;logv;)
1234 {
1235 acc |= value << used;
1236 free_bits = acc_bits - used;
1237 if (logv <= free_bits)
1238 {
1239 used += logv;
1240 break;
1241 }
1242 else
1243 {
1244 value >>= free_bits;
1245 logv -= free_bits;
1246 dest_.put_32(acc);
1247 acc = used ^= used;
1248 continue;
1249 }
1250 } // for
1251
1252 used_bits_ = used;
1253 accum_ = acc;
1254}
1255
1256// ----------------------------------------------------------------------
1257
1258template<typename TEncoder>
1260 const bm::gap_word_t* arr,
1261 unsigned sz,
1263{
1264 for (;sz;)
1265 {
1266 BM_ASSERT(lo <= hi);
1267 unsigned mid_idx = sz >> 1;
1268 bm::gap_word_t val = arr[mid_idx];
1269
1270 // write the interpolated value
1271 // write(x, r) where x=(arr[mid] - lo - mid) r=(hi - lo - sz + 1);
1272 {
1273 unsigned r = hi - lo - sz + 1;
1274 if (r)
1275 {
1276 unsigned value = val - lo - mid_idx;
1277 unsigned logv = bm::bit_scan_reverse32(r);
1278 put_bits(value, logv+1);
1279 }
1280 }
1281
1282 bic_encode_u16_rg(arr, mid_idx, lo, gap_word_t(val-1));
1283 // tail recursion
1284 // bic_encode_u16(arr + mid_idx + 1, sz - mid_idx - 1, gap_word_t(val+1), hi);
1285 arr += mid_idx + 1;
1286 sz -= mid_idx + 1;
1287 lo = gap_word_t(val + 1);
1288 } // for sz
1289}
1290
1291// ----------------------------------------------------------------------
1292
1293template<typename TEncoder>
1295 unsigned sz,
1296 bm::word_t lo,
1298{
1299 for (;sz;)
1300 {
1301 BM_ASSERT(lo <= hi);
1302 unsigned mid_idx = sz >> 1;
1303 bm::word_t val = arr[mid_idx];
1304
1305 // write the interpolated value
1306 // write(x, r) where x=(arr[mid] - lo - mid) r=(hi - lo - sz + 1);
1307 {
1308 unsigned r = hi - lo - sz + 1;
1309 if (r)
1310 {
1311 unsigned value = val - lo - mid_idx;
1312
1313 unsigned n = r + 1;
1314 unsigned logv = bm::bit_scan_reverse32(n);
1315 unsigned c = (unsigned)(1ull << (logv + 1)) - n;
1316 int64_t half_c = c >> 1; // c / 2;
1317 int64_t half_r = r >> 1; // r / 2;
1318 int64_t lo1 = half_r - half_c;
1319 int64_t hi1 = half_r + half_c + 1;
1320 lo1 -= (n & 1);
1321 logv += (value <= lo1 || value >= hi1);
1322
1323 put_bits(value, logv);
1324 }
1325 }
1326
1327 bic_encode_u32_cm(arr, mid_idx, lo, val-1);
1328 // tail recursive call:
1329 // bic_encode_u32_cm(arr + mid_idx + 1, sz - mid_idx - 1, val+1, hi);
1330 arr += mid_idx + 1;
1331 sz -= mid_idx + 1;
1332 lo = val + 1;
1333 } // for sz
1334}
1335
1336// ----------------------------------------------------------------------
1337
1338#if 0
1339/**
1340 Shuffle structure for BIC
1341 @internal
1342*/
1343template<unsigned N>
1344struct bic_encode_stack_u16
1345{
1346 bm::gap_word_t val_[N];
1347 bm::gap_word_t mid_[N];
1348 bm::gap_word_t lo_[N];
1349 bm::gap_word_t r_[N];
1350
1351 unsigned stack_size_ = 0;
1352
1353 void bic_shuffle(const bm::gap_word_t* arr,
1355 {
1356 for (;sz;)
1357 {
1358 BM_ASSERT(lo <= hi);
1359 bm::gap_word_t mid_idx = sz >> 1;
1360 bm::gap_word_t val = arr[mid_idx];
1361 unsigned r = hi - lo - sz + 1;
1362 if (r)
1363 {
1364 unsigned s = stack_size_++;
1365 r_[s] = (bm::gap_word_t)r;
1366 val_[s] = val;
1367 mid_[s] = mid_idx;
1368 lo_[s] = lo;
1369 }
1370
1371 bic_shuffle(arr, mid_idx, lo, bm::gap_word_t(val-1));
1372 // tail recursive call:
1373 // bic_shuffle(arr + mid_idx + 1, sz - mid_idx - 1, val+1, hi);
1374 arr += mid_idx + 1;
1375 sz -= mid_idx + 1;
1376 lo = bm::gap_word_t(val + 1);
1377 } // for sz
1378 }
1379};
1380
1381template<typename TEncoder>
1383 unsigned sz_i,
1384 bm::gap_word_t lo_i,
1386{
1387 BM_ASSERT(sz_i <= 65535);
1388
1389 bic_encode_stack_u16<bm::bie_cut_off> u16_stack;
1390 // BIC re-ordering
1391 u16_stack.bic_shuffle(arr, bm::gap_word_t(sz_i), lo_i, hi_i);
1392
1393 BM_ASSERT(sz_i == u16_stack.stack_size_);
1394 for (unsigned i = 0; i < sz_i; ++i)
1395 {
1396 bm::gap_word_t val = u16_stack.val_[i];
1397 bm::gap_word_t mid_idx = u16_stack.mid_[i];
1398 bm::gap_word_t lo = u16_stack.lo_[i];
1399 unsigned r = u16_stack.r_[i];
1400
1401 unsigned value = val - lo - mid_idx;
1402 unsigned n = r + 1;
1403 unsigned logv = bm::bit_scan_reverse32(n);
1404 unsigned c = (unsigned)(1ull << (logv + 1)) - n;
1405
1406 int64_t half_c = c >> 1; // c / 2;
1407 int64_t half_r = r >> 1; // r / 2;
1408 int64_t lo1 = half_r - half_c;
1409 int64_t hi1 = half_r + half_c + 1;
1410 lo1 -= (n & 1);
1411 logv += (value <= lo1 || value >= hi1);
1412
1413 put_bits(value, logv);
1414 } // for i
1415}
1416#endif
1417
1418
1419template<typename TEncoder>
1421 unsigned sz,
1422 bm::gap_word_t lo,
1424{
1425 for (;sz;)
1426 {
1427 BM_ASSERT(lo <= hi);
1428 unsigned mid_idx = sz >> 1;
1429 bm::gap_word_t val = arr[mid_idx];
1430
1431 // write the interpolated value
1432 // write(x, r) where x=(arr[mid] - lo - mid) r=(hi - lo - sz + 1);
1433 {
1434 unsigned r = hi - lo - sz + 1;
1435 if (r)
1436 {
1437 unsigned value = val - lo - mid_idx;
1438
1439 unsigned n = r + 1;
1440 unsigned logv = bm::bit_scan_reverse32(n);
1441 unsigned c = (unsigned)(1ull << (logv + 1)) - n;
1442 unsigned half_c = c >> 1; // c / 2;
1443 unsigned half_r = r >> 1; // r / 2;
1444 int64_t lo1 = (int64_t(half_r) - half_c - (n & 1u));
1445 unsigned hi1 = (half_r + half_c);
1446 logv += (value <= lo1 || value > hi1);
1447
1448 put_bits(value, logv);
1449 }
1450 }
1451
1452 bic_encode_u16_cm(arr, mid_idx, lo, bm::gap_word_t(val-1));
1453 // tail recursive call:
1454 // bic_encode_u32_cm(arr + mid_idx + 1, sz - mid_idx - 1, val+1, hi);
1455 arr += ++mid_idx;
1456 sz -= mid_idx;
1457 lo = bm::gap_word_t(val + 1);
1458 } // for sz
1459}
1460
1461
1462
1463
1464
1465
1466// ----------------------------------------------------------------------
1467//
1468// ----------------------------------------------------------------------
1469
1470
1471template<class TDecoder>
1473 bm::gap_word_t lo,
1475{
1476 BM_ASSERT(sz);
1477 do // for (;sz;)
1478 {
1479 BM_ASSERT(lo <= hi);
1480
1481 unsigned val;
1482 // read the value
1483 {
1484 unsigned r = hi - lo - sz + 1;
1485 if (r)
1486 {
1487 unsigned logv = bm::bit_scan_reverse32(r) + 1;
1488 val = get_bits(logv);
1489 BM_ASSERT(val <= r);
1490 }
1491 else
1492 {
1493 val = 0;
1494 }
1495 }
1496 unsigned mid_idx = sz >> 1;
1497 val += lo + mid_idx;
1498
1499 BM_ASSERT(val < 65536);
1500 BM_ASSERT(mid_idx < 65536);
1501
1502 arr[mid_idx] = bm::gap_word_t(val);
1503 if (sz <= 1)
1504 return;
1505
1506 bic_decode_u16_rg(arr, mid_idx, lo, bm::gap_word_t(val - 1));
1507 arr += mid_idx + 1;
1508 sz -= mid_idx + 1;
1509 lo = bm::gap_word_t(val + 1);
1510 } while (sz); // for sz
1511}
1512
1513// ----------------------------------------------------------------------
1514
1515template<class TDecoder>
1517 bm::word_t lo,
1519{
1520 BM_ASSERT(sz);
1521 do
1522 {
1523 BM_ASSERT(lo <= hi);
1524 // read the interpolated value
1525 // x = read(r)+ lo + mid, where r = (hi - lo - sz + 1);
1526 unsigned val = hi - lo - sz + 1;
1527 if (val)
1528 {
1529 unsigned logv = bm::bit_scan_reverse32(val+1);
1530
1531 unsigned c = unsigned((1ull << (logv + 1)) - val - 1);
1532 int64_t half_c = c >> 1;
1533 int64_t half_r = val >> 1;
1534 int64_t lo1 = half_r - half_c - ((val + 1) & 1);
1535 int64_t hi1 = half_r + half_c + 1;
1536
1537 val = get_bits(logv);
1538 if (val <= lo1 || val >= hi1)
1539 val += (get_bit() << logv);
1540 }
1541
1542 unsigned mid_idx = sz >> 1;
1543 val += lo + mid_idx;
1544 arr[mid_idx] = val;
1545
1546 if (sz <= 1)
1547 return;
1548
1549 bic_decode_u32_cm(arr, mid_idx, lo, val-1);
1550 // tail recursive call:
1551 // bic_decode_u32_cm(arr + mid_idx + 1, sz - mid_idx - 1, val + 1, hi);
1552 arr += ++mid_idx;
1553 sz -= mid_idx;
1554 lo = val + 1;
1555 } while (sz);// for sz
1556}
1557
1558// ----------------------------------------------------------------------
1559
1560template<class TDecoder>
1562 bm::gap_word_t lo,
1564{
1565 BM_ASSERT(sz);
1566 do
1567 {
1568 BM_ASSERT(lo <= hi);
1569 // read the interpolated value
1570 // x = read(r)+ lo + mid, where r = (hi - lo - sz + 1);
1571 unsigned val = hi - lo - sz + 1;
1572 if (val)
1573 {
1574 unsigned logv = bm::bit_scan_reverse32(val+1);
1575
1576 unsigned c = unsigned((1ull << (logv + 1)) - val - 1);
1577 int64_t half_c = c >> 1; // c / 2;
1578 int64_t half_r = val >> 1; // r / 2;
1579 int64_t lo1 = half_r - half_c - ((val + 1) & 1);
1580 int64_t hi1 = half_r + half_c + 1;
1581 val = get_bits(logv);
1582 if (val <= lo1 || val >= hi1)
1583 val += (get_bit() << logv);
1584 }
1585
1586 unsigned mid_idx = sz >> 1;
1587 val += lo + mid_idx;
1588 arr[mid_idx] = bm::gap_word_t(val);
1589
1590 if (sz <= 1)
1591 return;
1592
1593 bic_decode_u16_cm(arr, mid_idx, lo, bm::gap_word_t(val-1));
1594 // tail recursive call:
1595 // bic_decode_u16_cm(arr + mid_idx + 1, sz - mid_idx - 1, val + 1, hi);
1596 arr += ++mid_idx;
1597 sz -= mid_idx;
1598 lo = bm::gap_word_t(val + 1);
1599 } while (sz);// for sz
1600}
1601
1602// ----------------------------------------------------------------------
1603
1604template<class TDecoder>
1606 bm::gap_word_t lo,
1608{
1609 BM_ASSERT(sz);
1610 do
1611 {
1612 BM_ASSERT(lo <= hi);
1613 // read the interpolated value
1614 // x = read(r)+ lo + mid, where r = (hi - lo - sz + 1);
1615 unsigned val = hi - lo - sz + 1;
1616 if (val)
1617 {
1618 unsigned logv = bm::bit_scan_reverse32(val+1);
1619
1620 unsigned c = unsigned((1ull << (logv + 1)) - val - 1);
1621 int64_t half_c = c >> 1; // c / 2;
1622 int64_t half_r = val >> 1; // r / 2;
1623 int64_t lo1 = half_r - half_c - ((val + 1) & 1);
1624 int64_t hi1 = half_r + half_c + 1;
1625
1626 val = get_bits(logv);
1627 if (val <= lo1 || val >= hi1)
1628 val += (get_bit() << logv);
1629 }
1630
1631 unsigned mid_idx = sz >> 1;
1632 val += lo + mid_idx;
1633
1634 // set bit in the target block
1635 {
1636 unsigned nword = (val >> bm::set_word_shift);
1637 block[nword] |= (1u << (val & bm::set_word_mask));
1638 }
1639
1640 if (sz <= 1)
1641 return;
1642
1643 bic_decode_u16_cm_bitset(block, mid_idx, lo, bm::gap_word_t(val-1));
1644 // tail recursive call:
1645 // bic_decode_u32_cm(block, sz - mid_idx - 1, val + 1, hi);
1646 sz -= ++mid_idx;
1647 lo = bm::gap_word_t(val + 1);
1648 } while (sz);
1649}
1650
1651// ----------------------------------------------------------------------
1652
1653template<class TDecoder>
1655 bm::gap_word_t lo,
1657{
1658 BM_ASSERT(sz);
1659 do
1660 {
1661 BM_ASSERT(lo <= hi);
1662
1663 unsigned val;
1664
1665 // read the interpolated value
1666 // x = read(r)+ lo + mid, where r = (hi - lo - sz + 1);
1667 {
1668 unsigned r = hi - lo - sz + 1;
1669 if (r)
1670 {
1671 unsigned logv = bm::bit_scan_reverse32(r+1);
1672
1673 unsigned c = unsigned((1ull << (logv + 1)) - r - 1);
1674 int64_t half_c = c >> 1; // c / 2;
1675 int64_t half_r = r >> 1; // r / 2;
1676 int64_t lo1 = half_r - half_c - ((r + 1) & 1);
1677 int64_t hi1 = half_r + half_c + 1;
1678 r = get_bits(logv);
1679 if (r <= lo1 || r >= hi1)
1680 r += (get_bit() << logv);
1681 }
1682 val = r;
1683 }
1684
1685 unsigned mid_idx = sz >> 1;
1686 val += lo + mid_idx;
1687
1688 if (sz <= 1)
1689 return;
1690
1691 bic_decode_u16_cm_dry(mid_idx, lo, bm::gap_word_t(val-1));
1692 // tail recursive call:
1693 // bic_decode_u32_cm_dry(sz - mid_idx - 1, val + 1, hi);
1694 sz -= mid_idx + 1;
1695 lo = bm::gap_word_t(val + 1);
1696 } while (sz); // for sz
1697}
1698
1699
1700// ----------------------------------------------------------------------
1701
1702template<class TDecoder>
1704 bm::gap_word_t lo,
1706{
1707 BM_ASSERT(sz);
1708 do //for (;sz;)
1709 {
1710 BM_ASSERT(lo <= hi);
1711
1712 unsigned val;
1713 // read the value
1714 {
1715 unsigned r = hi - lo - sz + 1;
1716 if (r)
1717 {
1718 unsigned logv = bm::bit_scan_reverse32(r) + 1;
1719 val = get_bits(logv);
1720 BM_ASSERT(val <= r);
1721 }
1722 else
1723 {
1724 val = 0;
1725 }
1726 }
1727 unsigned mid_idx = sz >> 1;
1728 val += lo + mid_idx;
1729 BM_ASSERT(val < 65536);
1730 BM_ASSERT(mid_idx < 65536);
1731
1732 // set bit in the target block
1733 {
1734 unsigned nword = (val >> bm::set_word_shift);
1735 block[nword] |= (1u << (val & bm::set_word_mask));
1736 }
1737
1738 if (sz == 1)
1739 return;
1740 bic_decode_u16_rg_bitset(block, mid_idx, lo, bm::gap_word_t(val - 1));
1741 // tail recursion of:
1742 //bic_decode_u16_bitset(block, sz - mid_idx - 1, bm::gap_word_t(val + 1), hi);
1743 sz -= mid_idx + 1;
1744 lo = bm::gap_word_t(val + 1);
1745 } while(sz); // for sz
1746}
1747
1748// ----------------------------------------------------------------------
1749
1750template<class TDecoder>
1752 bm::gap_word_t lo,
1754{
1755 for (;sz;)
1756 {
1757 BM_ASSERT(lo <= hi);
1758
1759 unsigned val;
1760 // read the value
1761 {
1762 unsigned r = hi - lo - sz + 1;
1763 if (r)
1764 {
1765 unsigned logv = bm::bit_scan_reverse32(r) + 1;
1766 val = get_bits(logv);
1767 BM_ASSERT(val <= r);
1768 }
1769 else
1770 {
1771 val = 0;
1772 }
1773 }
1774 unsigned mid_idx = sz >> 1;
1775 val += lo + mid_idx;
1776 BM_ASSERT(val < 65536);
1777 BM_ASSERT(mid_idx < 65536);
1778
1779 if (sz == 1)
1780 return;
1781 bic_decode_u16_rg_dry(mid_idx, lo, bm::gap_word_t(val - 1));
1782 sz -= mid_idx + 1;
1783 lo = bm::gap_word_t(val + 1);
1784 } // for sz
1785}
1786
1787
1788
1789// ----------------------------------------------------------------------
1790
1791template<class TDecoder>
1793{
1794 unsigned acc = accum_;
1795 unsigned used = used_bits_;
1796
1797 if (used == (sizeof(acc) * 8))
1798 {
1799 acc = src_.get_32();
1800 used ^= used;
1801 }
1802 unsigned zero_bits = 0;
1803 while (true)
1804 {
1805 if (acc == 0)
1806 {
1807 zero_bits = unsigned(zero_bits +(sizeof(acc) * 8) - used);
1808 used = 0;
1809 acc = src_.get_32();
1810 continue;
1811 }
1812 unsigned first_bit_idx =
1813 #if defined(BM_x86) && (defined(__GNUG__) || defined(_MSC_VER)) && !(defined(__arm64__) || defined(__arm__))
1814 bm::bsf_asm32(acc);
1815 #else
1816 bm::bit_scan_fwd(acc);
1817 #endif
1818 acc >>= first_bit_idx;
1819 zero_bits += first_bit_idx;
1820 used += first_bit_idx;
1821 break;
1822 } // while
1823
1824 // eat the border bit
1825 //
1826 if (used == (sizeof(acc) * 8))
1827 {
1828 acc = src_.get_32();
1829 used = 1;
1830 }
1831 else
1832 {
1833 ++used;
1834 }
1835 acc >>= 1;
1836
1837 // get the value
1838 unsigned current;
1839
1840 unsigned free_bits = unsigned((sizeof(acc) * 8) - used);
1841 if (zero_bits <= free_bits)
1842 {
1843 take_accum:
1844 current =
1845 (acc & block_set_table<true>::_left[zero_bits]) | (1 << zero_bits);
1846 acc >>= zero_bits;
1847 used += zero_bits;
1848 goto ret;
1849 }
1850
1851 if (used == (sizeof(acc) * 8))
1852 {
1853 acc = src_.get_32();
1854 used ^= used;
1855 goto take_accum;
1856 }
1857
1858 // take the part
1859 current = acc;
1860 // read the next word
1861 acc = src_.get_32();
1862 used = zero_bits - free_bits;
1863 current |=
1864 ((acc & block_set_table<true>::_left[used]) << free_bits) |
1865 (1 << zero_bits);
1866
1867 acc >>= used;
1868ret:
1869 accum_ = acc;
1870 used_bits_ = used;
1871 return current;
1872}
1873
1874// ----------------------------------------------------------------------
1875
1876template<class TDecoder>
1878{
1879 BM_ASSERT(count);
1880 const unsigned maskFF = ~0u;
1881 unsigned acc = accum_;
1882 unsigned used = used_bits_;
1883
1884 unsigned value = 0;
1885 unsigned free_bits = unsigned((sizeof(acc) * 8) - used);
1886 if (count <= free_bits)
1887 {
1888 take_accum:
1889 value = acc & (maskFF >> (32 - count));
1890 acc >>= count;
1891 used += count;
1892 goto ret;
1893 }
1894 if (used == (sizeof(acc) * 8))
1895 {
1896 acc = src_.get_32();
1897 used = 0;
1898 goto take_accum;
1899 }
1900 value = acc;
1901 acc = src_.get_32();
1902 used = count - free_bits;
1903 value |= ((acc & (maskFF >> (32 - used))) << free_bits);
1904 acc >>= used;
1905ret:
1906 accum_ = acc;
1907 used_bits_ = used;
1908 return value;
1909}
1910
1911// ----------------------------------------------------------------------
1912
1913template<class TDecoder>
1915{
1916 const unsigned mask = (~0u) >> (32 - 1); // 100000...
1917 unsigned value = accum_ & mask;
1918 if (unsigned free_bits = unsigned(32u - used_bits_); free_bits)
1919 {
1920 accum_ >>= 1; ++used_bits_;
1921 }
1922 else
1923 {
1924 BM_ASSERT(used_bits_ == (sizeof(accum_) * 8));
1925 unsigned a = src_.get_32();
1926 value = a & mask;
1927 used_bits_ = 1;
1928 accum_ = (a >> 1);
1929 }
1930 return value;
1931}
1932
1933// ----------------------------------------------------------------------
1934
1935} // namespace bm
1936
1937#ifdef _MSC_VER
1938#pragma warning(pop)
1939#endif
1940
1941#endif
#define BMNOEXCEPT
Definition bmdef.h:82
#define BMFORCEINLINE
Definition bmdef.h:213
#define BM_ASSERT
Definition bmdef.h:139
Bit manipulation primitives (internal).
unsigned gamma() BMNOEXCEPT
decode unsigned value using Elias Gamma coding
Definition encoding.h:1792
void bic_decode_u16(bm::gap_word_t *arr, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
Definition encoding.h:275
void bic_decode_u16_cm(bm::gap_word_t *arr, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
void bic_decode_u16_bitset(bm::word_t *block, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
Definition encoding.h:282
unsigned get_bit() BMNOEXCEPT
void bic_decode_u16_cm_dry(unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
bit_in(TDecoder &decoder) BMNOEXCEPT
Definition encoding.h:259
void bic_decode_u32_cm(bm::word_t *arr, unsigned sz, bm::word_t lo, bm::word_t hi) BMNOEXCEPT
Binary Interpolative array decode (32-bit).
Definition encoding.h:1516
void bic_decode_u16_rg(bm::gap_word_t *arr, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
Binary Interpolative array decode.
Definition encoding.h:1472
void bic_decode_u16_dry(unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
Definition encoding.h:288
void bic_decode_u16_rg_dry(unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
Binary Interpolative array decode into /dev/null.
Definition encoding.h:1751
unsigned get_bits(unsigned count) BMNOEXCEPT
void bic_decode_u16_cm_bitset(bm::word_t *block, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
void bic_decode_u16_rg_bitset(bm::word_t *block, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
Binary Interpolative array decode into bitset (32-bit based).
Definition encoding.h:1703
void bic_encode_u16_rg(const bm::gap_word_t *arr, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
Binary Interpolative encoding (array of 16-bit ints).
Definition encoding.h:1259
void flush() BMNOEXCEPT
Definition encoding.h:230
bit_out(TEncoder &dest)
Definition encoding.h:185
void put_zero_bits(unsigned count) BMNOEXCEPT
void bic_encode_u32_cm(const bm::word_t *arr, unsigned sz, bm::word_t lo, bm::word_t hi) BMNOEXCEPT
Binary Interpolative encoding (array of 32-bit ints) cm - "center-minimal".
Definition encoding.h:1294
void put_bit(unsigned value) BMNOEXCEPT
issue single bit into encode bit-stream
Definition encoding.h:1095
void put_bits(unsigned value, unsigned count) BMNOEXCEPT
issue count bits out of value
Definition encoding.h:1106
void gamma(unsigned value) BMNOEXCEPT
void put_zero_bit() BMNOEXCEPT
issue 0 into output stream
Definition encoding.h:1148
void bic_encode_u16_cm(const bm::gap_word_t *arr, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
void bic_encode_u16(const bm::gap_word_t *arr, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
Definition encoding.h:207
const unsigned char * start_
Definition encoding.h:115
decoder_base(const unsigned char *buf) BMNOEXCEPT
Definition encoding.h:90
const unsigned char * buf_
Definition encoding.h:114
bm::id64_t get_h64() BMNOEXCEPT
Read h-64-bit.
Definition encoding.h:691
const unsigned char * get_pos() const BMNOEXCEPT
Return current buffer pointer.
Definition encoding.h:105
void seek(int delta) BMNOEXCEPT
change current position
Definition encoding.h:99
size_t size() const BMNOEXCEPT
Returns size of the current decoding stream.
Definition encoding.h:96
unsigned char get_8() BMNOEXCEPT
Reads character from the decoding buffer.
Definition encoding.h:93
void memcpy(unsigned char *dst, size_t count) BMNOEXCEPT
read bytes from the decode buffer
Definition encoding.h:679
void set_pos(const unsigned char *pos) BMNOEXCEPT
Set current buffer pointer.
Definition encoding.h:108
decoder_little_endian(const unsigned char *buf)
Definition encoding.h:952
bool get_32_OR(bm::word_t *w, unsigned count)
Definition encoding.h:1035
void get_32_AND(bm::word_t *w, unsigned count)
Definition encoding.h:1057
Class for decoding data from memory buffer.
Definition encoding.h:126
bm::word_t get_32() BMNOEXCEPT
Reads 32-bit word from the decoding buffer.
Definition encoding.h:751
bm::word_t get_24() BMNOEXCEPT
Reads 32-bit word from the decoding buffer.
Definition encoding.h:738
bool get_32_OR(bm::word_t *w, unsigned count) BMNOEXCEPT
Reads block of 32-bit words from the decoding buffer and ORs to the destination.
Definition encoding.h:844
bm::id64_t get_64() BMNOEXCEPT
Reads 64-bit word from the decoding buffer.
Definition encoding.h:786
void get_32_AND(bm::word_t *w, unsigned count) BMNOEXCEPT
Reads block of 32-bit words from the decoding buffer and ANDs to the destination.
Definition encoding.h:885
bm::short_t get_16() BMNOEXCEPT
Reads 16-bit word from the decoding buffer.
Definition encoding.h:722
decoder(const unsigned char *buf) BMNOEXCEPT
Construction.
Definition encoding.h:713
bm::id64_t get_48() BMNOEXCEPT
Reads 64-bit word from the decoding buffer.
Definition encoding.h:769
unsigned char * position_type
Definition encoding.h:52
size_t size() const BMNOEXCEPT
Returns size of the current encoding stream.
Definition encoding.h:529
void put_48(bm::id64_t w) BMNOEXCEPT
Puts 48 bits word into encoding buffer.
Definition encoding.h:589
unsigned char * get_pos() const BMNOEXCEPT
Get current memory stream position.
Definition encoding.h:537
void put_64(bm::id64_t w) BMNOEXCEPT
Puts 64 bits word into encoding buffer.
Definition encoding.h:606
encoder(unsigned char *buf, size_t size) BMNOEXCEPT
Construction.
Definition encoding.h:398
void put_8(unsigned char c) BMNOEXCEPT
Puts one character into the encoding buffer.
Definition encoding.h:434
void set_pos(unsigned char *buf_pos) BMNOEXCEPT
Set current memory stream position.
Definition encoding.h:545
void put_prefixed_array_16(unsigned char c, const bm::short_t *s, unsigned count, bool encode_count) BMNOEXCEPT
Encode 8-bit prefix + an array.
Definition encoding.h:417
void put_h64(bm::id64_t w) BMNOEXCEPT
Puts 64 bits word into encoding buffer with h-compression.
Definition encoding.h:628
void memcpy(const unsigned char *src, size_t count) BMNOEXCEPT
copy bytes into target buffer or just rewind if src is NULL
Definition encoding.h:516
void put_32(bm::word_t w) BMNOEXCEPT
Puts 32 bits word into encoding buffer.
Definition encoding.h:571
void put_24(bm::word_t w) BMNOEXCEPT
Puts 24 bits word into encoding buffer.
Definition encoding.h:555
void put_16(bm::short_t s) BMNOEXCEPT
Puts short word (16 bits) into the encoding buffer.
Definition encoding.h:444
void put_prefixed_array_32(unsigned char c, const bm::word_t *w, unsigned count) BMNOEXCEPT
Encode 8-bit prefix + an array.
Definition encoding.h:406
void put_8_16_32(unsigned w, unsigned char c8, unsigned char c16, unsigned char c32) BMNOEXCEPT
but gat plus value based on its VBR evaluation
Definition encoding.h:487
Elias Gamma decoder.
Definition encoding.h:362
void stop()
Stop decoding sequence.
Definition encoding.h:374
void start()
Start encoding sequence.
Definition encoding.h:369
T operator()(void)
Decode word.
Definition encoding.h:379
gamma_decoder(TBitIO &bin)
Definition encoding.h:364
Functor for Elias Gamma encoding.
Definition encoding.h:341
gamma_encoder(TBitIO &bout)
Definition encoding.h:343
void operator()(T value)
Encode word.
Definition encoding.h:347
bool avx2_or_arr_unal(__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src, const __m256i *BMRESTRICT src_end)
OR array elements against another unaligned array dst |= *src.
Definition bmavx2.h:888
unsigned avx2_and_arr_unal(__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src, const __m256i *BMRESTRICT src_end)
AND array elements against another array (unaligned) dst &= *src.
Definition bmavx2.h:777
bool sse2_or_arr_unal(__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src, const __m128i *BMRESTRICT src_end) BMNOEXCEPT
OR array elements against another array (unaligned) dst |= *src.
Definition bmsse_util.h:426
unsigned sse2_and_arr_unal(__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src, const __m128i *BMRESTRICT src_end) BMNOEXCEPT
AND array elements against another array (unaligned) dst &= *src.
Definition bmsse_util.h:259
decoder decoder_big_endian
Class for decoding data from memory buffer.
Definition encoding.h:148
Definition bm.h:78
unsigned int word_t
Definition bmconst.h:39
unsigned compute_h64_mask(unsigned long long w) BMNOEXCEPT
Сompute mask of bytes presense in 64-bit word.
Definition bmutil.h:556
BMFORCEINLINE T bit_scan_fwd(T v) BMNOEXCEPT
Definition bmutil.h:297
BMFORCEINLINE unsigned bit_scan_reverse32(unsigned w) BMNOEXCEPT
Definition bmutil.h:304
const unsigned set_word_shift
Definition bmconst.h:72
unsigned long long int id64_t
Definition bmconst.h:35
unsigned short gap_word_t
Definition bmconst.h:78
const unsigned set_word_mask
Definition bmconst.h:73
unsigned short short_t
Definition bmconst.h:40
static const unsigned _left[32]
Definition bmconst.h:365