BitMagic-C++
strsvsample03.cpp
Go to the documentation of this file.
1/*
2Copyright(c) 2002-2017 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
3
4Licensed under the Apache License, Version 2.0 (the "License");
5you may not use this file except in compliance with the License.
6You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10Unless required by applicable law or agreed to in writing, software
11distributed under the License is distributed on an "AS IS" BASIS,
12WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13See the License for the specific language governing permissions and
14limitations under the License.
15
16For more information please visit: http://bitmagic.io
17*/
18
19/** \example strsvsample03.cpp
20 Example of how to use bm::str_sparse_vector<> - succinct container for
21 bit-transposed string collections
22
23 \sa bm::str_sparse_vector
24*/
25
26/*! \file strsvsample03.cpp
27 \brief Example: str_sparse_vector<> back insert iterator example
28
29 This example loads sparse vector from an STL container uses re-mapping
30 to compress, serialize and save container to disk.
31 Example also illustrates how to check memory footprint.
32*/
33
34#include <iostream>
35#include <string>
36#include <vector>
37#include <random>
38#include <algorithm>
39#include <fstream>
40
41#include "bm.h"
42#include "bmstrsparsevec.h"
43#include "bmsparsevec_serial.h"
44
45
46using namespace std;
47
49
50// define the sparse vector type for 'char' type using bvector as
51// a container of bits for bit-transposed planes
52// 32 - is maximum string length for this container.
53// Memory allocation is dynamic using sparse techniques, so this number
54// just defines the max capacity.
55//
57
58
59// generate collection of strings from integers and shuffle it
60//
61static
62void generate_string_set(vector<string>& str_vec)
63{
64 const unsigned max_coll = 50000;
65
66 str_vec.resize(0);
67 string str;
68 for (unsigned i = 10; i < max_coll; i += rand() % 3)
69 {
70 str = to_string(i);
71 str_vec.emplace_back(str);
72 } // for i
73
74 // shuffle the data set
75 //
76 std::random_device rd;
77 std::mt19937 g(rd());
78 std::shuffle(str_vec.begin(), str_vec.end(), g);
79}
80
81
82int main(void)
83{
84 try
85 {
86 str_sv_type str_sv;
87
88 vector<string> str_vec;
89 generate_string_set(str_vec);
90 std::sort(str_vec.begin(), str_vec.end()); // sort the input vector
91
92
93 // load sparse vector from an STL container
94 //
95 {
96 size_t vect_size = 0; // approx std::vector<string> memory usage
97 str_sv_type str_sv_tmp; // temp vector
98 {
100 str_sv_tmp.get_back_inserter();
101 for (auto str : str_vec)
102 {
103 bi = str;
104
105 // some approximate estimate of std::string element cost
106 //
107 size_t str_size = str.size() + sizeof(str);
108 vect_size += str_size;
109 }
110
111 // it is important to use flush, because back inserter is
112 // buffering data. Of cause it flashes automatically on
113 // destruction but explicit flush is somewhat better
114 // because of possible exception is thrown here and not from
115 // destructor.
116 //
117
118 bi.flush();
119
120 cout << "STL vector<string> approx.memory consumption:"
121 << vect_size << endl;
122 }
123
124 // calculate memory footprint
125 //
127 str_sv_tmp.calc_stat(&st);
128
129 cout << "Used memory: " << st.memory_used << std::endl;
130
131
132 // final step is re-mapping, which increses chances for
133 // good memory compression.
134 // A side-effect here is that remapping makes container
135 // effectively read-only.
136 //
137 str_sv.remap_from(str_sv_tmp);
138
140 str_sv.optimize(tb); // optimize the vector
141
142 str_sv.calc_stat(&st);
143 cout << "Used memory after remap and optimization: "
144 << st.memory_used
145 << std::endl;
146 }
147
148 // serialize and save
149 //
150 {
151 std::string fname = "test.sv";
153
155 bm::sparse_vector_serialize(str_sv, sv_lay, tb);
156
157 std::ofstream fout(fname.c_str(), std::ios::binary);
158 if (!fout.good())
159 {
160 return -1;
161 }
162 const char* buf = (char*)sv_lay.buf();
163 fout.write(buf, (unsigned)sv_lay.size());
164 if (!fout.good())
165 {
166 return -1;
167 }
168 fout.close();
169
170 cout << "Saved size: " << sv_lay.size() << endl;
171 }
172
173 }
174 catch(std::exception& ex)
175 {
176 std::cerr << ex.what() << std::endl;
177 return 1;
178 }
179
180
181 return 0;
182}
183
Compressed bit-vector bvector<> container, set algebraic methods, traversal iterators.
#define BM_DECLARE_TEMP_BLOCK(x)
Definition bm.h:47
Serialization for sparse_vector<>
string sparse vector based on bit-transposed matrix
Bitvector Bit-vector container with runtime compression of bits.
Definition bm.h:108
Back insert iterator implements buffered insert, faster than generic access assignment.
void flush()
flush the accumulated buffer
sparse vector for strings with compression using bit transposition method
void remap_from(const str_sparse_vector &str_sv)
Build remapping profile and load content from another sparse vector.
back_insert_iterator get_back_inserter()
Provide back insert iterator Back insert iterator implements buffered insertion, which is faster,...
void calc_stat(struct str_sparse_vector< CharType, BV, MAX_STR_SIZE >::statistics *st) const BMNOEXCEPT
Calculates memory statistics.
void optimize(bm::word_t *temp_block=0, typename bvector_type::optmode opt_mode=bvector_type::opt_compress, typename str_sparse_vector< CharType, BV, MAX_STR_SIZE >::statistics *stat=0)
run memory optimization for all vector plains
void resize(size_type sz)
resize vector
void sparse_vector_serialize(const SV &sv, sparse_vector_serial_layout< SV > &sv_layout, bm::word_t *temp_block=0)
Serialize sparse vector into a memory buffer(s) structure.
bm::str_sparse_vector< char, bvector_type, 32 > str_sv_type
bm::bvector bvector_type
int main(void)
static void generate_string_set(vector< string > &str_vec)
size_t memory_used
memory usage for all blocks and service tables
Definition bmfunc.h:61
layout class for serialization buffer structure
size_t size() const
return current serialized size
const unsigned char * buf() const
Return serialization buffer pointer.