1#ifndef BMSPARSEVEC_SERIAL__H__INCLUDED__
2#define BMSPARSEVEC_SERIAL__H__INCLUDED__
26#ifndef BM__H__INCLUDED__
29# error missing include (bm.h or bm64.h)
96 void set_plain(
unsigned i,
unsigned char* ptr,
size_t buf_size)
106 const unsigned char*
buf()
const {
return buf_.buf(); }
108 const unsigned char*
data()
const {
return buf_.buf(); }
274 const unsigned char* buf,
296 const unsigned char* buf,
305 const unsigned char* buf,
309 static void load_remap(SV& sv,
const unsigned char* remap_buf_ptr);
381 const unsigned char* buf,
427 const unsigned char* buf,
471 buffer_coll.calc_stat(&st);
473 buf.resize(st.max_serialize_mem);
476 unsigned char* buf_ptr = buf.data();
482 enc.
put_8((
unsigned char)bo);
484 unsigned char* mbuf1 = enc.
get_pos();
495 size_t addr_bv_size = bvs.
serialize(bv, buf_ptr, buf.size());
496 buf_ptr += addr_bv_size;
502 size_t coll_size = buffer_coll.size();
508 for (
unsigned i = 0; i < buffer_coll.size(); ++i)
511 size_t sz = cbuf.size();
517 for (
unsigned i = 0; i < buffer_coll.size(); ++i)
520 size_t sz = cbuf.size();
521 enc.
memcpy(cbuf.buf(), sz);
524 buf.resize(enc.
size());
531 const unsigned char* buf,
538 unsigned char h1 = dec.
get_8();
539 unsigned char h2 = dec.
get_8();
542 if (h1 !=
'B' && h2 !=
'C')
554 const unsigned char* bv_buf_ptr = dec.
get_pos();
564 dec.
seek((
int)addr_bv_size);
570 if (coll_size != addr_cnt)
575 typedef std::vector<unsigned>::size_type vect_size_type;
576 std::vector<bm::id64_t> buf_size_vec;
577 buf_size_vec.resize(vect_size_type(coll_size));
579 for (
unsigned i = 0; i < coll_size; ++i)
582 buf_size_vec[i] = sz;
588 buf_vect.resize(vect_size_type(coll_size));
589 for (
unsigned i = 0; i < coll_size; ++i)
608 bvs_.gap_length_serialization(
false);
622 bv_ref_.build(sv.get_bmatrix());
631 typename SV::statistics sv_stat;
632 sv.calc_stat(&sv_stat);
633 unsigned char* buf = sv_layout.
reserve(sv_stat.max_serialize_mem);
636 unsigned plains = sv.stored_plains();
639 unsigned h_size = 1 + 1 +
656 build_xor_ref_vector(sv);
657 bvs_.set_ref_vectors(&bv_ref_);
664 unsigned char* buf_ptr = buf + h_size;
667 for (i = 0; i < plains; ++i)
669 typename SV::bvector_type_const_ptr bv = sv.get_plain(i);
677 unsigned idx = (unsigned)bv_ref_.find(i);
679 bvs_.set_curr_ref_idx(idx);
683 bvs_.serialize(*bv, buf_ptr, sv_stat.max_serialize_mem);
685 sv_layout.
set_plain(i, buf_ptr, buf_size);
687 if (sv_stat.max_serialize_mem > buf_size)
689 sv_stat.max_serialize_mem -= buf_size;
695 bvs_.set_ref_vectors(0);
702 bm::encoder enc_m(buf_ptr, sv_stat.max_serialize_mem);
706 const unsigned char* matrix_buf = sv.get_remap_buffer();
712 enc_m.
memcpy(matrix_buf, remap_size);
719 buf_ptr += enc_m.
size();
722 sv_layout.
resize(
size_t(buf_ptr - buf));
730 if (sv.is_compressed())
735 enc.
put_8((
unsigned char)bo);
737 unsigned char matr_s_ser = 1;
743 enc.
put_8(matr_s_ser);
745 enc.
put_64(sv.size_internal());
749 for (i = 0; i < plains; ++i)
751 const unsigned char* p = sv_layout.
get_plain(i);
757 size_t offset = size_t(p - buf);
782 alloc_.free_bit_block(temp_block_);
789 const unsigned char* buf,
792 idx_range_set_ =
true; idx_range_from_ = from; idx_range_to_ = to;
797 unsigned char matr_s_ser = 0;
798 unsigned plains = load_header(dec, sv, matr_s_ser);
809 load_plains_off_table(dec, plains);
812 plains = (unsigned)load_null_plain(sv,
int(plains), buf, 0);
819 bool range_valid = sv.resolve_range(from, to, &sv_left, &sv_right);
823 idx_range_set_ =
false;
828 idx_range_set_ =
true; idx_range_from_ = sv_left; idx_range_to_ = sv_right;
832 deserialize_plains(sv, plains, buf, 0);
834 op_deserial_.set_ref_vectors(0);
835 deserial_.set_ref_vectors(0);
844 load_remap(sv, remap_buf_ptr_);
850 idx_range_set_ =
false;
857 const unsigned char* buf,
863 unsigned char matr_s_ser = 0;
864 unsigned plains = load_header(dec, sv, matr_s_ser);
875 load_plains_off_table(dec, plains);
877 plains = (unsigned)load_null_plain(sv,
int(plains), buf, mask_bv);
886 rsc_mask_bv_.clear(
true);
888 rsc_compressor_.compress(rsc_mask_bv_, *bv_null, not_null_mask_bv_);
889 mask_bv = &rsc_mask_bv_;
895 rsc_mask_bv_.find_range(idx_range_from_, idx_range_to_);
900 deserialize_plains(sv, plains, buf, mask_bv);
902 op_deserial_.set_ref_vectors(0);
903 deserial_.set_ref_vectors(0);
912 load_remap(sv, remap_buf_ptr_);
923 bm::decoder& dec, SV& sv,
unsigned char& matr_s_ser)
925 unsigned char h1 = dec.
get_8();
926 unsigned char h2 = dec.
get_8();
928 BM_ASSERT(h1 ==
'B' && (h2 ==
'M' || h2 ==
'C'));
930 bool sig2_ok = (h2 ==
'M' || h2 ==
'C');
931 if (h1 !=
'B' || !sig2_ok)
932 raise_invalid_header();
934 unsigned char bv_bo = dec.
get_8(); (void) bv_bo;
935 unsigned plains = dec.
get_8();
938 matr_s_ser = dec.
get_8();
939 plains = (unsigned) dec.
get_64();
945 raise_invalid_64bit();
949 unsigned sv_plains = sv.stored_plains();
950 if (!plains || plains > sv_plains)
951 raise_invalid_bitdepth();
961 const unsigned char* buf,
964 if (mask_bv && !idx_range_set_)
965 idx_range_set_ = mask_bv->find_range(idx_range_from_, idx_range_to_);
967 op_deserial_.set_ref_vectors(&bv_ref_);
968 deserial_.set_ref_vectors(&bv_ref_);
973 for (
int i =
int(plains-1); i >= 0; --i)
975 size_t offset = off_vect_[unsigned(i)];
978 const unsigned char* bv_buf_ptr = buf + offset;
982 bv_ref_.add(bv,
unsigned(i));
992 deserial_.deserialize(*bv, bv_buf_ptr, temp_block_);
993 remap_buf_ptr_ = bv_buf_ptr + read_bytes;
998 deserial_.set_range(idx_range_from_, idx_range_to_);
999 deserial_.deserialize(*bv, bv_buf_ptr);
1008 deserial_.deserialize(*bv, bv_buf_ptr, temp_block_);
1009 remap_buf_ptr_ = bv_buf_ptr + read_bytes;
1011 bv->keep_range(idx_range_from_, idx_range_to_);
1016 deserial_.set_range(idx_range_from_, idx_range_to_);
1017 deserial_.deserialize(*bv, bv_buf_ptr);
1018 bv->keep_range(idx_range_from_, idx_range_to_);
1023 deserial_.deserialize(*bv, bv_buf_ptr, temp_block_);
1028 deserial_.unset_range();
1034template<
typename SV>
1037 const unsigned char* buf,
1041 if (!sv.is_nullable())
1044 size_t offset = off_vect_[unsigned(i)];
1050 const unsigned char* bv_buf_ptr = buf + offset;
1052 bv_ref_.add(bv,
unsigned(i));
1057 size_t read_bytes = deserial_.deserialize(*bv, bv_buf_ptr, temp_block_);
1058 remap_buf_ptr_ = bv_buf_ptr + read_bytes;
1067 size_t read_bytes = deserial_.deserialize(*bv, bv_buf_ptr, temp_block_);
1068 remap_buf_ptr_ = bv_buf_ptr + read_bytes;
1070 bv->keep_range(idx_range_from_, idx_range_to_);
1075 deserial_.set_range(idx_range_from_, idx_range_to_);
1076 deserial_.deserialize(*bv, bv_buf_ptr, temp_block_);
1077 bv->keep_range(idx_range_from_, idx_range_to_);
1078 deserial_.unset_range();
1082 deserial_.deserialize(*bv, bv_buf_ptr, temp_block_);
1093template<
typename SV>
1097 off_vect_.resize(plains);
1098 for (
unsigned i = 0; i < plains; ++i)
1100 size_t offset = (size_t) dec.
get_64();
1101 off_vect_[i] = offset;
1107template<
typename SV>
1109 const unsigned char* remap_buf_ptr)
1115 unsigned char rh = dec_m.
get_8();
1122 size_t remap_size = (size_t) dec_m.
get_64();
1123 unsigned char* remap_buf = sv.init_remap_buffer();
1125 size_t target_remap_size = sv.remap_size();
1126 if (!remap_size || !remap_buf || remap_size != target_remap_size)
1129 throw std::logic_error(
"Invalid serialization format (remap size)");
1131 BM_THROW(BM_ERR_SERIALFORMAT);
1134 dec_m.
memcpy(remap_buf, remap_size);
1135 unsigned char end_tok = dec_m.
get_8();
1139 throw std::logic_error(
"Invalid serialization format");
1141 BM_THROW(BM_ERR_SERIALFORMAT);
1149 throw std::logic_error(
"Invalid serialization format (remap error)");
1151 BM_THROW(BM_ERR_SERIALFORMAT);
1158template<
typename SV>
1162 throw std::logic_error(
"Invalid serialization signature header");
1164 BM_THROW(BM_ERR_SERIALFORMAT);
1170template<
typename SV>
1174 throw std::logic_error(
"Invalid serialization target (64-bit BLOB)");
1176 BM_THROW(BM_ERR_SERIALFORMAT);
1182template<
typename SV>
1186 throw std::logic_error(
"Invalid serialization target (bit depth)");
1188 BM_THROW(BM_ERR_SERIALFORMAT);
Serialization / compression of bvector<>. Set theoretical operations on compressed BLOBs.
Sparse constainer sparse_vector<> for integer types using bit-transposition transform.
pre-processor un-defines to avoid global space pollution (internal)
List of reference bit-vectors with their true index associations.
@ opt_compress
compress blocks when possible (GAP/prefix sum)
allocator_type::allocator_pool_type allocator_pool_type
Deseriaizer for compressed collections.
CBC::buffer_type buffer_type
CBC::bvector_type bvector_type
CBC::statistics statistics_type
CBC::container_type container_type
CBC compressed_collection_type
int deserialize(CBC &buffer_coll, const unsigned char *buf, bm::word_t *temp_block=0)
CBC::address_resolver_type address_resolver_type
Seriaizer for compressed collections.
void serialize(const CBC &buffer_coll, buffer_type &buf, bm::word_t *temp_block=0)
CBC compressed_collection_type
CBC::bvector_type bvector_type
CBC::statistics statistics_type
CBC::address_resolver_type address_resolver_type
CBC::buffer_type buffer_type
const unsigned char * get_pos() const BMNOEXCEPT
Return current buffer pointer.
void seek(int delta) BMNOEXCEPT
change current position
unsigned char get_8() BMNOEXCEPT
Reads character from the decoding buffer.
void memcpy(unsigned char *dst, size_t count) BMNOEXCEPT
read bytes from the decode buffer
Class for decoding data from memory buffer.
bm::id64_t get_64() BMNOEXCEPT
Reads 64-bit word from the decoding buffer.
Deserializer for bit-vector.
size_t size() const BMNOEXCEPT
Returns size of the current encoding stream.
unsigned char * get_pos() const BMNOEXCEPT
Get current memory stream position.
void put_64(bm::id64_t w) BMNOEXCEPT
Puts 64 bits word into encoding buffer.
void put_8(unsigned char c) BMNOEXCEPT
Puts one character into the encoding buffer.
void set_pos(unsigned char *buf_pos) BMNOEXCEPT
Set current memory stream position.
void memcpy(const unsigned char *src, size_t count) BMNOEXCEPT
copy bytes into target buffer or just rewind if src is NULL
Deserializer, performs logical operations between bit-vector and serialized bit-vector.
Algorithms for rank compression of bit-vector.
Bit-vector serialization class.
void gap_length_serialization(bool value) BMNOEXCEPT
Set GAP length serialization (serializes GAP levels of the original vector)
byte_buffer< allocator_type > buffer
void set_bookmarks(bool enable, unsigned bm_interval=256) BMNOEXCEPT
Add skip-markers to serialization BLOB for faster range decode at the expense of some BLOB size incre...
size_type serialize(const BV &bv, unsigned char *buf, size_t buf_size)
Bitvector serialization into memory block.
sparse vector de-serializer
sparse_vector_deserializer()
allocator_pool_type pool_
bm::operation_deserializer< bvector_type > op_deserial_
bm::serializer< bvector_type >::bv_ref_vector_type bv_ref_vector_type
void deserialize(SV &sv, const unsigned char *buf, const bvector_type &mask_bv)
bm::rank_compressor< bvector_type > rsc_compressor_
static void raise_invalid_bitdepth()
throw error on incorrect deserialization
static void raise_invalid_header()
throw error on incorrect deserialization
SV::bvector_type bvector_type
size_type idx_range_from_
bvector_type not_null_mask_bv_
bvector_type::allocator_type::allocator_pool_type allocator_pool_type
bvector_type::allocator_type alloc_type
unsigned load_header(bm::decoder &dec, SV &sv, unsigned char &matr_s_ser)
Deserialize header/version and other common info.
void deserialize(SV &sv, const unsigned char *buf, size_type from, size_type to)
bm::heap_vector< size_t, alloc_type, true > off_vect_
void deserialize_range(SV &sv, const unsigned char *buf, size_type from, size_type to)
~sparse_vector_deserializer()
void load_plains_off_table(bm::decoder &dec, unsigned plains)
load offset table
bvector_type * bvector_type_ptr
static void raise_invalid_64bit()
throw error on incorrect deserialization
const bvector_type * bvector_type_const_ptr
bvector_type rsc_mask_bv_
bm::deserializer< bvector_type, bm::decoder > deserial_
const unsigned char * remap_buf_ptr_
int load_null_plain(SV &sv, int plains, const unsigned char *buf, const bvector_type *mask_bv)
load NULL bit-plain (returns new plains count)
bv_ref_vector_type bv_ref_
void deserialize_plains(SV &sv, unsigned plains, const unsigned char *buf, const bvector_type *mask_bv=0)
deserialize bit-vector plains
void deserialize_sv(SV &sv, const unsigned char *buf, const bvector_type *mask_bv)
static void load_remap(SV &sv, const unsigned char *remap_buf_ptr)
load string remap dict
void deserialize(SV &sv, const unsigned char *buf)
SV::value_type value_type
sparse_vector_serializer()
void set_bookmarks(bool enable, unsigned bm_interval=256)
Add skip-markers for faster range deserialization.
bm::serializer< bvector_type >::bv_ref_vector_type bv_ref_vector_type
bvector_type * bvector_type_ptr
void build_xor_ref_vector(const SV &sv)
bvector_type::allocator_type::allocator_pool_type allocator_pool_type
SV::value_type value_type
const bvector_type * bvector_type_const_ptr
void set_xor_ref(bool is_enabled)
Turn ON and OFF XOR compression of sparse vectors.
void serialize(const SV &sv, sparse_vector_serial_layout< SV > &sv_layout)
Serialize sparse vector into a memory buffer(s) structure.
SV::bvector_type bvector_type
bm::serializer< bvector_type > bvs_
bv_ref_vector_type bv_ref_
bool is_xor_ref() const
Get XOR reference compression status (enabled/disabled)
size_t deserialize(BV &bv, const unsigned char *buf, bm::word_t *temp_block=0, const bm::bv_ref_vector< BV > *ref_vect=0)
Bitvector deserialization from a memory BLOB.
void sparse_vector_serialize(const SV &sv, sparse_vector_serial_layout< SV > &sv_layout, bm::word_t *temp_block=0)
Serialize sparse vector into a memory buffer(s) structure.
int sparse_vector_deserialize(SV &sv, const unsigned char *buf, bm::word_t *temp_block=0)
Deserialize sparse vector.
ByteOrder
Byte orders recognized by the library.
unsigned long long int id64_t
ad-hoc conditional expressions
static ByteOrder byte_order()
layout class for serialization buffer structure
SV::bvector_type bvector_type
size_t size() const
return current serialized size
serializer< bvector_type >::buffer buffer_type
void resize(size_t ssize)
Set new serialized size.
unsigned char * reserve(size_t capacity)
resize capacity
const unsigned char * get_plain(unsigned i) const
Get plain pointer.
void set_plain(unsigned i, unsigned char *ptr, size_t buf_size)
Set plain output pointer and size.
size_t plane_size_[SV::sv_plains]
serialized plain size
unsigned char * plain_ptrs_[SV::sv_plains]
pointers on serialized bit-plains
buffer_type buf_
serialization buffer
sparse_vector_serial_layout()
const unsigned char * buf() const
Return serialization buffer pointer.
size_t capacity() const
return serialization buffer capacity
void freemem()
free memory
const unsigned char * data() const
Return serialization buffer pointer.
SV::value_type value_type
~sparse_vector_serial_layout()