BitMagic-C++
svsample06.cpp
Go to the documentation of this file.
1 /*
2 Copyright(c) 2002-2017 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
3 
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7 
8  http://www.apache.org/licenses/LICENSE-2.0
9 
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15 
16 For more information please visit: http://bitmagic.io
17 */
18 
19 /** \example svsample06.cpp
20  Search/scan for elements in unordered, non-unique sparse vector
21 
22  \sa bm::sparse_vector<>::const_iterator
23  \sa bm::sparse_vector<>::back_insert_iterator
24  \sa bm::sparse_vector_scanner
25 */
26 
27 /*! \file svsample06.cpp
28  \brief Example: sparse_vector<> scan search (non-ordered set functionality)
29 */
30 
31 #include <iostream>
32 #include <vector>
33 #include <chrono>
34 #include <algorithm>
35 #include <random>
36 #include <stdexcept>
37 
38 #include "bm.h"
39 #include "bmsparsevec.h"
40 #include "bmsparsevec_algo.h"
41 #include "bmtimer.h"
42 
43 
44 using namespace std;
45 
47 
48 
49 // ----------------------------------------------------
50 // Global parameters and types
51 // ----------------------------------------------------
52 
53 const unsigned value_max = 1250000; // range of variants of events [0..max]
54 const unsigned test_size = 250000000; // vector size to generate
55 
56 // -------------------------------------------
57 // Random generator
58 // -------------------------------------------
59 
60 std::random_device rand_dev;
61 std::mt19937 gen(rand_dev());
62 std::uniform_int_distribution<> rand_dis(1, value_max); // generate uniform numebrs for [1, vector_max]
63 
64 // timing storage for benchmarking
66 
67 
68 
69 // Function to generate test vector set with some NULL values stored as a
70 // separate bit-bector
71 //
72 static
73 void generate_test_set(std::vector<unsigned>& vect,
74  bm::bvector<>& bv_null,
76 {
77  // back insert iterator is faster than random element access for sparse vector
78  //
80 
81  vect.resize(test_size);
82  bv_null.reset();
83 
84  for (unsigned i = 0; i < test_size; ++i)
85  {
86  unsigned v = unsigned(rand_dis(gen));
87 
88  vect[i] = v;
89  bv_null[i] = true; // not NULL(assigned) element
90 
91  *bi = v; // push back an element to sparse vector
92 
93  if (i % 64 == 0)
94  {
95  bi.add_null(5); // add 5 unassigned elements using back inserter
96  i += 5; // insert a small NULL plate (unassigned values)
97  }
98  } // for
99 }
100 
101 
102 // plain scan in std::vector<>, matching values are indexed
103 // in result bit-vector (subset projection)
104 // values are added, so multiple calls result in subset addition
105 static
106 void vector_search(const std::vector<unsigned>& vect,
107  const bm::bvector<>& bv_null,
108  unsigned value,
109  bm::bvector<>& bv_res)
110 {
111  bv_res.init(); // always use init() if set_bit_no_check()
112  for (size_t i = 0; i < vect.size(); ++i)
113  {
114  if (vect[i] == value)
115  bv_res.set_bit_no_check((bm::id_t)i);
116  } // for
117  bv_res &= bv_null; // correct results to only include non-NULL values
118 }
119 
120 
121 inline
123 {
124  cout << "( count = " << bv.count() << ")" << ": [";
125 
127  for (; en.valid(); ++en)
128  cout << *en << ", ";
129  cout << "]" << endl;
130 }
131 
132 
133 int main(void)
134 {
135  try
136  {
137  // First, lets run, simple (printable) search case
138  //
139  {
141 
142  sv.set(2, 25);
143  sv.set(3, 35);
144  sv.set(7, 75);
145  sv.set(1000, 2000);
146  sv.set(256, 2001);
147  sv.set(77, 25);
148 
149  bm::bvector<> bv_found; // search results vector
150 
151  bm::sparse_vector_scanner<sparse_vector_u32> scanner; // scanner class
152  scanner.find_eq(sv, 25, bv_found); // seach for all values == 25
153 
154  print_bvector(bv_found); // print results
155 
156  scanner.invert(sv, bv_found); // invert search results to NOT EQ
157  print_bvector(bv_found); // print all != 25
158  }
159 
160 
161  std::vector<unsigned> vect;
162  bm::bvector<> bv_null;
164 
165  {
166  bm::chrono_taker tt1("0. test set generate ", 1, &timing_map);
167  generate_test_set(vect, bv_null, sv);
168  }
169 
170  unsigned search_repeats = 500;
171 
172  // generate a search vector for benchmarking
173  //
174  std::vector<unsigned> search_vect;
175  {
176  bm::bvector<> bv_tmp;
177  search_vect.reserve(search_repeats);
178  for (unsigned i = 0; i < search_repeats;)
179  {
180  bm::id_t idx = bm::id_t(rand_dis(gen));
181  if (!bv_tmp.test(idx)) // check if number is unique
182  {
183  search_vect.push_back(idx);
184  bv_tmp[idx] = 1;
185  ++i;
186  }
187  }
188  }
189 
190  // run benchmarks
191  //
192  bm::bvector<> bv_res1;
193  bm::bvector<> bv_res2;
194  bm::bvector<> bv_res3;
195 
196  {
197  bm::chrono_taker tt1("1. std::vector<> scan ", search_repeats, &timing_map);
198 
199  for (unsigned i = 0; i < search_repeats; ++i)
200  {
201  unsigned vs = search_vect[i];
202  vector_search(vect, bv_null, vs, bv_res1);
203  } // for
204  }
205 
206  {
207  bm::chrono_taker tt1("2. sparse_vector<> scan ", search_repeats, &timing_map);
208 
210  scanner.find_eq(sv, search_vect.begin(), search_vect.end(), bv_res2);
211  }
212 
213  // check jus in case if results look correct
214  if (bv_res1.compare(bv_res2) != 0)
215  {
216  std::cerr << "2. Search result mismatch!" << std::endl;
217  }
218 
219  {
220  bv_res3.init(); // always init before calling "set_bit_no_check()"
221 
222  bm::chrono_taker tt1("3. sparse_vector<>::const_iterator search ", search_repeats, &timing_map);
223 
224  // prepare a unique search set
225  bm::bvector<> bv_search(bm::BM_GAP);
226  bm::combine_or(bv_search, search_vect.begin(), search_vect.end());
227 
230  for (; it != it_end; ++it)
231  {
232  unsigned v = *it;
233  if (bv_search.test(v))
234  {
235  bv_res3.set_bit_no_check(it.pos());
236  }
237  } // for
238  }
239 
240  // paranoiya check
241  if (bv_res1.compare(bv_res3) != 0)
242  {
243  std::cerr << "3. Search result mismatch!" << std::endl;
244  }
245 
246 
248 
249  }
250  catch(std::exception& ex)
251  {
252  std::cerr << ex.what() << std::endl;
253  return 1;
254  }
255 
256  return 0;
257 }
258 
generate_test_set
static void generate_test_set(std::vector< unsigned > &vect, bm::bvector<> &bv_null, sparse_vector_u32 &sv)
Definition: svsample06.cpp:73
bm::sparse_vector_scanner::invert
void invert(const SV &sv, typename SV::bvector_type &bv_out)
invert search result ("EQ" to "not EQ")
Definition: bmsparsevec_algo.h:875
bmsparsevec_algo.h
Algorithms for sparse_vector<>
bm::sparse_vector::back_insert_iterator
friend back_insert_iterator
Definition: bmsparsevec.h:330
bm::bvector::iterator_base::valid
bool valid() const
Checks if iterator is still valid. Analog of != 0 comparison for pointers.
Definition: bm.h:277
bm::sparse_vector::const_iterator
friend const_iterator
Definition: bmsparsevec.h:329
bm::bvector::count
size_type count() const
population cout (count of ON bits)
Definition: bm.h:2487
bm::sparse_vector
sparse vector with runtime compression using bit transposition method
Definition: bmsparsevec.h:81
bm::chrono_taker
Utility class to collect performance measurements and statistics.
Definition: bmtimer.h:39
bm::bvector::reset
bvector< Alloc > & reset()
Clears every bit in the bitvector.
Definition: bm.h:1623
timing_map
bm::chrono_taker::duration_map_type timing_map
Definition: svsample06.cpp:65
bm::bvector::set_bit_no_check
void set_bit_no_check(size_type n)
Set bit without checking preconditions (size, etc)
Definition: bm.h:3425
bmsparsevec.h
Sparse constainer sparse_vector<> for integer types using bit-transposition transform.
bm::bvector::enumerator
Constant iterator designed to enumerate "ON" bits.
Definition: bm.h:590
bm::sparse_vector_scanner
algorithms for sparse_vector scan/seach
Definition: bmsparsevec_algo.h:161
bm::bvector::compare
int compare(const bvector< Alloc > &bvect) const
Lexicographical comparison with a bitvector.
Definition: bm.h:3224
test_size
const unsigned test_size
Definition: svsample06.cpp:54
print_bvector
void print_bvector(const bm::bvector<> &bv)
Definition: svsample06.cpp:122
bm::bvector<>
bm::use_null
@ use_null
support "non-assigned" or "NULL" logic
Definition: bmconst.h:216
vector_search
static void vector_search(const std::vector< unsigned > &vect, const bm::bvector<> &bv_null, unsigned value, bm::bvector<> &bv_res)
Definition: svsample06.cpp:106
bm::bvector::test
bool test(size_type n) const
returns true if bit n is set and false is bit n is 0.
Definition: bm.h:1798
bm::sparse_vector::end
const_iterator end() const
Provide const iterator access to the end
Definition: bmsparsevec.h:472
bm::bvector::init
void init()
Explicit post-construction initialization.
Definition: bm.h:2427
bmtimer.h
Timing utilities for benchmarking (internal)
sparse_vector_u32
bm::sparse_vector< bm::id_t, bm::bvector<> > sparse_vector_u32
Definition: svsample06.cpp:46
bm::chrono_taker::ct_ops_per_sec
@ ct_ops_per_sec
Definition: bmtimer.h:59
bm::id_t
unsigned int id_t
Definition: bmconst.h:37
bm::BM_GAP
@ BM_GAP
GAP compression is ON.
Definition: bmconst.h:145
bm::sparse_vector::begin
const_iterator begin() const
Provide const iterator access to container content
Definition: bmsparsevec.h:1910
value_max
const unsigned value_max
Definition: svsample06.cpp:53
gen
std::mt19937 gen(rand_dev())
bm::combine_or
void combine_or(BV &bv, It first, It last)
OR Combine bitvector and the iterable sequence.
Definition: bmalgo_impl.h:1148
rand_dis
std::uniform_int_distribution rand_dis(1, value_max)
bm::chrono_taker::duration_map_type
std::map< std::string, statistics > duration_map_type
test name to duration map
Definition: bmtimer.h:65
bm::sparse_vector::get_back_inserter
back_insert_iterator get_back_inserter()
Provide back insert iterator Back insert iterator implements buffered insertion, which is faster,...
Definition: bmsparsevec.h:483
main
int main(void)
Definition: svsample06.cpp:133
bm::bvector::end
enumerator end() const
Returns enumerator pointing on the next bit after the last.
Definition: bm.h:2128
bm::chrono_taker::print_duration_map
static void print_duration_map(const duration_map_type &dmap, format fmt=ct_time)
Definition: bmtimer.h:127
rand_dev
std::random_device rand_dev
Definition: svsample06.cpp:60
bm::bvector::first
enumerator first() const
Returns enumerator pointing on the first non-zero bit.
Definition: bm.h:2122
bm.h
Compressed bit-vector bvector<> container, set algebraic methods, traversal iterators.
bm::sparse_vector::set
void set(size_type idx, value_type v)
set specified element with bounds checking and automatic resize
Definition: bmsparsevec.h:1460
bm::sparse_vector_scanner::find_eq
void find_eq(const SV &sv, typename SV::value_type value, typename SV::bvector_type &bv_out)
find all sparse vector elements EQ to search value
Definition: bmsparsevec_algo.h:1713