BitMagic-C++
bmaggregator.h
Go to the documentation of this file.
1#ifndef BMAGGREGATOR__H__INCLUDED__
2#define BMAGGREGATOR__H__INCLUDED__
3/*
4Copyright(c) 2002-2017 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
5
6Licensed under the Apache License, Version 2.0 (the "License");
7you may not use this file except in compliance with the License.
8You may obtain a copy of the License at
9
10 http://www.apache.org/licenses/LICENSE-2.0
11
12Unless required by applicable law or agreed to in writing, software
13distributed under the License is distributed on an "AS IS" BASIS,
14WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15See the License for the specific language governing permissions and
16limitations under the License.
17
18For more information please visit: http://bitmagic.io
19*/
20
21/*! \file bmaggregator.h
22 \brief Algorithms for fast aggregation of N bvectors
23*/
24
25
26#ifndef BM__H__INCLUDED__
27// BitMagic utility headers do not include main "bm.h" declaration
28// #include "bm.h" or "bm64.h" explicitly
29# error missing include (bm.h or bm64.h)
30#endif
31
32
33#include "bmfunc.h"
34#include "bmdef.h"
35
36#include "bmalgo_impl.h"
37
38
39namespace bm
40{
41
42
43/**
44 Algorithms for fast aggregation of a group of bit-vectors
45
46 Current implementation can aggregate up to max_aggregator_cap vectors
47 (TODO: remove this limitation)
48
49 Algorithms of this class use cache locality optimizations and efficient
50 on cases, wehen we need to apply the same logical operation (aggregate)
51 more than 2x vectors.
52
53 TARGET = BV1 or BV2 or BV3 or BV4 ...
54
55 @ingroup setalgo
56*/
57template<typename BV>
59{
60public:
61 typedef BV bvector_type;
62 typedef typename BV::size_type size_type;
65
67
68 /// Maximum aggregation capacity in one pass
70 {
72 };
73
74 /// Codes for aggregation operations which can be pipelined for efficient execution
75 ///
81
89
90public:
91
92 /*! @name Construction and setup */
93 //@{
96
97 /**
98 \brief set on-the-fly bit-block compression
99 By default aggregator does not try to optimize result, but in some cases
100 it can be quite a lot faster than calling bvector<>::optimize() later
101 (because block data sits in CPU cache).
102
103 \param opt - optimization mode (full compression by default)
104 */
107 { opt_mode_ = opt; }
108 //@}
109
110
111 // -----------------------------------------------------------------------
112
113 /*! @name Methods to setup argument groups and run operations on groups */
114 //@{
115
116 /**
117 Attach source bit-vector to a argument group (0 or 1).
118 Arg group 1 used for fused operations like (AND-SUB)
119 \param bv - input bit-vector pointer to attach
120 \param agr_group - input argument group (0 - default, 1 - fused op)
121
122 @return current arg group size (0 if vector was not added (empty))
123 @sa reset
124 */
125 unsigned add(const bvector_type* bv, unsigned agr_group = 0) BMNOEXCEPT;
126
127 /**
128 Reset aggregate groups, forget all attached vectors
129 */
131
132 /**
133 Aggregate added group of vectors using logical OR
134 Operation does NOT perform an explicit reset of arg group(s)
135
136 \param bv_target - target vector (input is arg group 0)
137
138 @sa add, reset
139 */
140 void combine_or(bvector_type& bv_target);
141
142 /**
143 Aggregate added group of vectors using logical AND
144 Operation does NOT perform an explicit reset of arg group(s)
145
146 \param bv_target - target vector (input is arg group 0)
147
148 @sa add, reset
149 */
150 void combine_and(bvector_type& bv_target);
151
152 /**
153 Aggregate added group of vectors using fused logical AND-SUB
154 Operation does NOT perform an explicit reset of arg group(s)
155
156 \param bv_target - target vector (input is arg group 0(AND), 1(SUB) )
157
158 \return true if anything was found
159
160 @sa add, reset
161 */
163
164
165 /**
166 Aggregate added group of vectors using fused logical AND-SUB
167 Operation does NOT perform an explicit reset of arg group(s)
168 Operation can terminate early if anything was found.
169
170 \param bv_target - target vector (input is arg group 0(AND), 1(SUB) )
171 \param any - if true, search result will terminate of first found result
172
173 \return true if anything was found
174
175 @sa add, reset
176 */
177 bool combine_and_sub(bvector_type& bv_target, bool any);
178
180
181
182 /**
183 Aggregate added group of vectors using SHIFT-RIGHT and logical AND
184 Operation does NOT perform an explicit reset of arg group(s)
185
186 \param bv_target - target vector (input is arg group 0)
187
188 @return bool if anything was found
189
190 @sa add, reset
191 */
193
194 /**
195 Set search hint for the range, where results needs to be searched
196 (experimental for internal use).
197 */
199
200 //@}
201
202 // -----------------------------------------------------------------------
203
204 /*! @name Logical operations (C-style interface) */
205 //@{
206
207 /**
208 Aggregate group of vectors using logical OR
209 \param bv_target - target vector
210 \param bv_src - array of pointers on bit-vector aggregate arguments
211 \param src_size - size of bv_src (how many vectors to aggregate)
212 */
213 void combine_or(bvector_type& bv_target,
214 const bvector_type_const_ptr* bv_src, unsigned src_size);
215
216 /**
217 Aggregate group of vectors using logical AND
218 \param bv_target - target vector
219 \param bv_src - array of pointers on bit-vector aggregate arguments
220 \param src_size - size of bv_src (how many vectors to aggregate)
221 */
222 void combine_and(bvector_type& bv_target,
223 const bvector_type_const_ptr* bv_src, unsigned src_size);
224
225 /**
226 Fusion aggregate group of vectors using logical AND MINUS another set
227
228 \param bv_target - target vector
229 \param bv_src_and - array of pointers on bit-vectors for AND
230 \param src_and_size - size of AND group
231 \param bv_src_sub - array of pointers on bit-vectors for SUBstract
232 \param src_sub_size - size of SUB group
233 \param any - flag if caller needs any results asap (incomplete results)
234
235 \return true when found
236 */
238 const bvector_type_const_ptr* bv_src_and, unsigned src_and_size,
239 const bvector_type_const_ptr* bv_src_sub, unsigned src_sub_size,
240 bool any);
241
243 const bvector_type_const_ptr* bv_src_and, unsigned src_and_size,
244 const bvector_type_const_ptr* bv_src_sub, unsigned src_sub_size);
245
246 /**
247 Fusion aggregate group of vectors using SHIFT right with AND
248
249 \param bv_target - target vector
250 \param bv_src_and - array of pointers on bit-vectors for AND masking
251 \param src_and_size - size of AND group
252 \param any - flag if caller needs any results asap (incomplete results)
253
254 \return true when found
255 */
257 const bvector_type_const_ptr* bv_src_and, unsigned src_and_size,
258 bool any);
259
260
261 //@}
262
263 // -----------------------------------------------------------------------
264
265 /*! @name Horizontal Logical operations used for tests (C-style interface) */
266 //@{
267
268 /**
269 Horizontal OR aggregation (potentially slower) method.
270 \param bv_target - target vector
271 \param bv_src - array of pointers on bit-vector aggregate arguments
272 \param src_size - size of bv_src (how many vectors to aggregate)
273 */
275 const bvector_type_const_ptr* bv_src, unsigned src_size);
276 /**
277 Horizontal AND aggregation (potentially slower) method.
278 \param bv_target - target vector
279 \param bv_src - array of pointers on bit-vector aggregate arguments
280 \param src_size - size of bv_src (how many vectors to aggregate)
281 */
283 const bvector_type_const_ptr* bv_src, unsigned src_size);
284
285 /**
286 Horizontal AND-SUB aggregation (potentially slower) method.
287 \param bv_target - target vector
288 \param bv_src_and - array of pointers on bit-vector to AND aggregate
289 \param src_and_size - size of bv_src_and
290 \param bv_src_sub - array of pointers on bit-vector to SUB aggregate
291 \param src_sub_size - size of bv_src_sub
292 */
294 const bvector_type_const_ptr* bv_src_and,
295 unsigned src_and_size,
296 const bvector_type_const_ptr* bv_src_sub,
297 unsigned src_sub_size);
298
299 //@}
300
301
302 // -----------------------------------------------------------------------
303
304 /*! @name Pipeline operations */
305 //@{
306
307 /** Get current operation code */
308 int get_operation() const BMNOEXCEPT { return operation_; }
309
310 /** Set operation code for the aggregator */
311 void set_operation(int op_code) BMNOEXCEPT { operation_ = op_code; }
312
313 /**
314 Prepare operation, create internal resources, analyse dependencies.
315 Prerequisites are: that operation is set and all argument vectors are added
316 */
317 void stage(bm::word_t* temp_block);
318
319 /**
320 Run a step of current arrgegation operation
321 */
322 operation_status run_step(unsigned i, unsigned j);
323
324 operation_status get_operation_status() const { return operation_status_; }
325
326 const bvector_type* get_target() const { return bv_target_; }
327
328 bm::word_t* get_temp_block() { return ar_->tb1; }
329
330 //@}
331
332protected:
334 typedef typename BV::block_idx_type block_idx_type;
335
336
337 void combine_or(unsigned i, unsigned j,
338 bvector_type& bv_target,
339 const bvector_type_const_ptr* bv_src, unsigned src_size);
340
341 void combine_and(unsigned i, unsigned j,
342 bvector_type& bv_target,
343 const bvector_type_const_ptr* bv_src, unsigned src_size);
344
345 digest_type combine_and_sub(unsigned i, unsigned j,
346 const bvector_type_const_ptr* bv_src_and, unsigned src_and_size,
347 const bvector_type_const_ptr* bv_src_sub, unsigned src_sub_size,
348 int* is_result_full);
349
351 const bvector_type_const_ptr* bv_src, unsigned src_size);
352
353 bool combine_shift_right_and(unsigned i, unsigned j,
354 bvector_type& bv_target,
355 const bvector_type_const_ptr* bv_src, unsigned src_size);
356
357 static
358 unsigned resize_target(bvector_type& bv_target,
359 const bvector_type_const_ptr* bv_src,
360 unsigned src_size,
361 bool init_clear = true);
362
363 static
365 unsigned src_size) BMNOEXCEPT;
366
368 unsigned src_size,
369 unsigned i, unsigned j,
370 unsigned* arg_blk_count,
371 unsigned* arg_blk_gap_count) BMNOEXCEPT;
372
374 unsigned src_size,
375 unsigned i, unsigned j,
376 unsigned* arg_blk_count,
377 unsigned* arg_blk_gap_count) BMNOEXCEPT;
378
379
381 unsigned i, unsigned j, unsigned block_count);
382
383 void process_gap_blocks_or(unsigned block_count);
384
385 digest_type process_bit_blocks_and(unsigned block_count, digest_type digest);
386
387 digest_type process_gap_blocks_and(unsigned block_count, digest_type digest);
388
389 bool test_gap_blocks_and(unsigned block_count, unsigned bit_idx);
390 bool test_gap_blocks_sub(unsigned block_count, unsigned bit_idx);
391
392 digest_type process_bit_blocks_sub(unsigned block_count, digest_type digest);
393
394 digest_type process_gap_blocks_sub(unsigned block_count, digest_type digest);
395
396 static
397 unsigned find_effective_sub_block_size(unsigned i,
398 const bvector_type_const_ptr* bv_src,
399 unsigned src_size,
400 bool top_null_as_zero) BMNOEXCEPT;
401
402 static
403 bool any_carry_overs(const unsigned char* carry_overs,
404 unsigned co_size) BMNOEXCEPT;
405
406 /**
407 @return carry over
408 */
409 static
411 const bm::word_t* BMRESTRICT arg_blk,
412 digest_type& BMRESTRICT digest,
413 unsigned carry_over) BMNOEXCEPT;
414
415 static
417 unsigned k, unsigned i, unsigned j) BMNOEXCEPT;
418
420
421private:
422 /// Memory arena for logical operations
423 ///
424 /// @internal
425 struct arena
426 {
428 BM_DECLARE_TEMP_BLOCK(tb_opt); ///< temp block for results optimization
429 const bm::word_t* v_arg_or_blk[max_aggregator_cap]; ///< source blocks list (OR)
430 const bm::gap_word_t* v_arg_or_blk_gap[max_aggregator_cap]; ///< source GAP blocks list (OR)
431 const bm::word_t* v_arg_and_blk[max_aggregator_cap]; ///< source blocks list (AND)
432 const bm::gap_word_t* v_arg_and_blk_gap[max_aggregator_cap]; ///< source GAP blocks list (AND)
433
434 bvector_type_const_ptr arg_bv0[max_aggregator_cap]; ///< arg group 0
435 bvector_type_const_ptr arg_bv1[max_aggregator_cap]; ///< arg group 1
436 unsigned char carry_overs_[max_aggregator_cap]; /// carry over flags
437 };
438
439 aggregator(const aggregator&) = delete;
440 aggregator& operator=(const aggregator&) = delete;
441
442private:
443 arena* ar_; ///< data arena ptr (heap allocated)
444 unsigned arg_group0_size = 0;
445 unsigned arg_group1_size = 0;
446 allocator_pool_type pool_; ///< pool for operations with cyclic mem.use
447
448 bm::word_t* temp_blk_= 0; ///< external temp block ptr
449 int operation_ = 0; ///< operation code (default: not defined)
450 operation_status operation_status_ = op_undefined;
451 bvector_type* bv_target_ = 0; ///< target bit-vector
452 unsigned top_block_size_ = 0; ///< operation top block (i) size
453
454 // search range setting (hint) [from, to]
455 bool range_set_ = false; ///< range flag
456 size_type range_from_ = bm::id_max; ///< search from
457 size_type range_to_ = bm::id_max; ///< search to
458
459 typename bvector_type::optmode opt_mode_;
460
461};
462
463
464
465
466// ------------------------------------------------------------------------
467//
468// ------------------------------------------------------------------------
469
470/**
471 Experimental method ro run multiple aggregators in sync
472
473 Pipeleine algorithm walts the sequence of iterators (pointers on aggregators)
474 and run them all via aggregator<>::run_step() method
475
476 @param first - iterator (pointer on aggregator)
477 @param last - iterator (pointer on aggregator)
478 @ingroup setalgo
479*/
480template<typename Agg, typename It>
481void aggregator_pipeline_execute(It first, It last)
482{
483 bm::word_t* tb = (*first)->get_temp_block();
484
485 int pipeline_size = 0;
486 for (It it = first; it != last; ++it, ++pipeline_size)
487 {
488 Agg& agg = *(*it);
489 agg.stage(tb);
490 }
491 for (unsigned i = 0; i < bm::set_sub_array_size; ++i)
492 {
493 unsigned j = 0;
494 do
495 {
496 // run all aggregators for the [i,j] coordinate
497 unsigned w = 0;
498 for (It it = first; it != last; ++it, ++w)
499 {
500 Agg& agg = *(*it);
501 auto op_st = agg.get_operation_status();
502 if (op_st != Agg::op_done)
503 {
504 op_st = agg.run_step(i, j);
505 pipeline_size -= (op_st == Agg::op_done);
506 }
507 } // for it
508 if (pipeline_size <= 0)
509 return;
510
511 } while (++j < bm::set_sub_array_size);
512
513 } // for i
514}
515
516
517// ------------------------------------------------------------------------
518//
519// ------------------------------------------------------------------------
520
521
522template<typename BV>
524: opt_mode_(bvector_type::opt_none)
525{
526 ar_ = (arena*) bm::aligned_new_malloc(sizeof(arena));
527}
528
529// ------------------------------------------------------------------------
530
531template<typename BV>
533{
534 BM_ASSERT(ar_);
535 bm::aligned_free(ar_);
536 delete bv_target_;
537}
538
539// ------------------------------------------------------------------------
540
541template<typename BV>
543{
544 arg_group0_size = arg_group1_size = operation_ = top_block_size_ = 0;
545 operation_status_ = op_undefined;
546 range_set_ = false;
547 range_from_ = range_to_ = bm::id_max;
548}
549
550// ------------------------------------------------------------------------
551
552template<typename BV>
554{
555 range_from_ = from; range_to_ = to;
556 range_set_ = true;
557}
558
559// ------------------------------------------------------------------------
560
561template<typename BV>
564{
565 if (!bv_target_)
566 {
567 bv_target_ = new bvector_type(); //TODO: get rid of "new"
568 bv_target_->init();
569 }
570 return bv_target_;
571}
572
573// ------------------------------------------------------------------------
574
575template<typename BV>
577 unsigned agr_group) BMNOEXCEPT
578{
579 BM_ASSERT_THROW(agr_group <= 1, BM_ERR_RANGE);
580 BM_ASSERT(agr_group <= 1);
581
582 if (agr_group) // arg group 1
583 {
584 BM_ASSERT(arg_group1_size < max_aggregator_cap);
585 BM_ASSERT_THROW(arg_group1_size < max_aggregator_cap, BM_ERR_RANGE);
586
587 if (!bv)
588 return arg_group1_size;
589
590 ar_->arg_bv1[arg_group1_size++] = bv;
591 return arg_group1_size;
592 }
593 else // arg group 0
594 {
595 BM_ASSERT(arg_group0_size < max_aggregator_cap);
596 BM_ASSERT_THROW(arg_group0_size < max_aggregator_cap, BM_ERR_RANGE);
597
598 if (!bv)
599 return arg_group0_size;
600
601 ar_->arg_bv0[arg_group0_size++] = bv;
602 return arg_group0_size;
603 }
604}
605
606// ------------------------------------------------------------------------
607
608template<typename BV>
610{
611 combine_or(bv_target, ar_->arg_bv0, arg_group0_size);
612}
613
614// ------------------------------------------------------------------------
615
616template<typename BV>
618{
619 combine_and(bv_target, ar_->arg_bv0, arg_group0_size);
620}
621
622// ------------------------------------------------------------------------
623
624template<typename BV>
626{
627 return combine_and_sub(bv_target,
628 ar_->arg_bv0, arg_group0_size,
629 ar_->arg_bv1, arg_group1_size, false);
630}
631
632// ------------------------------------------------------------------------
633
634template<typename BV>
636{
637 return combine_and_sub(bv_target,
638 ar_->arg_bv0, arg_group0_size,
639 ar_->arg_bv1, arg_group1_size, any);
640}
641
642// ------------------------------------------------------------------------
643
644template<typename BV>
646{
647 return find_first_and_sub(idx,
648 ar_->arg_bv0, arg_group0_size,
649 ar_->arg_bv1, arg_group1_size);
650}
651
652// ------------------------------------------------------------------------
653
654template<typename BV>
656{
657 combine_shift_right_and(bv_target, ar_->arg_bv0, arg_group0_size, false);
658}
659
660// ------------------------------------------------------------------------
661
662template<typename BV>
664 const bvector_type_const_ptr* bv_src, unsigned src_size)
665{
666 BM_ASSERT_THROW(src_size < max_aggregator_cap, BM_ERR_RANGE);
667 if (!src_size)
668 {
669 bv_target.clear();
670 return;
671 }
672 unsigned top_blocks = resize_target(bv_target, bv_src, src_size);
673 for (unsigned i = 0; i < top_blocks; ++i)
674 {
675 unsigned set_array_max =
676 find_effective_sub_block_size(i, bv_src, src_size, false);
677 for (unsigned j = 0; j < set_array_max; ++j)
678 {
679 combine_or(i, j, bv_target, bv_src, src_size);
680 } // for j
681 } // for i
682}
683
684// ------------------------------------------------------------------------
685
686template<typename BV>
688 const bvector_type_const_ptr* bv_src,
689 unsigned src_size)
690{
691 BM_ASSERT_THROW(src_size < max_aggregator_cap, BM_ERR_RANGE);
692 if (src_size == 1)
693 {
694 const bvector_type* bv = bv_src[0];
695 bv_target = *bv;
696 return;
697 }
698 if (!src_size)
699 {
700 bv_target.clear();
701 return;
702 }
703 unsigned top_blocks = resize_target(bv_target, bv_src, src_size);
704 for (unsigned i = 0; i < top_blocks; ++i)
705 {
706 // TODO: find range, not just size
707 unsigned set_array_max =
708 find_effective_sub_block_size(i, bv_src, src_size, true);
709 for (unsigned j = 0; j < set_array_max; ++j)
710 {
711 // TODO: use block_managers not bvectors to avoid extra indirect
712 combine_and(i, j, bv_target, bv_src, src_size);
713 } // for j
714 } // for i
715}
716
717// ------------------------------------------------------------------------
718
719template<typename BV>
721 const bvector_type_const_ptr* bv_src_and, unsigned src_and_size,
722 const bvector_type_const_ptr* bv_src_sub, unsigned src_sub_size,
723 bool any)
724{
725 BM_ASSERT_THROW(src_and_size < max_aggregator_cap, BM_ERR_RANGE);
726 BM_ASSERT_THROW(src_sub_size < max_aggregator_cap, BM_ERR_RANGE);
727
728 bool global_found = false;
729
730 if (!bv_src_and || !src_and_size)
731 {
732 bv_target.clear();
733 return false;
734 }
735
736 blocks_manager_type& bman_target = bv_target.get_blocks_manager();
737
738 unsigned top_blocks = resize_target(bv_target, bv_src_and, src_and_size);
739 unsigned top_blocks2 = resize_target(bv_target, bv_src_sub, src_sub_size, false);
740
741 if (top_blocks2 > top_blocks)
742 top_blocks = top_blocks2;
743
744 for (unsigned i = 0; i < top_blocks; ++i)
745 {
746 unsigned set_array_max = find_effective_sub_block_size(i, bv_src_and, src_and_size, true);
747 if (!set_array_max)
748 continue;
749 if (src_sub_size)
750 {
751 unsigned set_array_max2 =
752 find_effective_sub_block_size(i, bv_src_sub, src_sub_size, false);
753 if (set_array_max2 > set_array_max) // TODO: use range intersect
754 set_array_max = set_array_max2;
755 }
756 for (unsigned j = 0; j < set_array_max; ++j)
757 {
758 int is_res_full;
759 digest_type digest = combine_and_sub(i, j,
760 bv_src_and, src_and_size,
761 bv_src_sub, src_sub_size,
762 &is_res_full);
763 if (is_res_full)
764 {
765 bman_target.check_alloc_top_subblock(i);
766 bman_target.set_block_ptr(i, j, (bm::word_t*)FULL_BLOCK_FAKE_ADDR);
767 if (j == bm::set_sub_array_size-1)
768 {
769 bman_target.validate_top_full(i);
770 }
771 if (any)
772 return any;
773 }
774 else
775 {
776 bool found = digest;
777 if (found)
778 {
779 bman_target.opt_copy_bit_block(i, j, ar_->tb1,
780 opt_mode_, ar_->tb_opt);
781 if (any)
782 return found;
783 }
784 global_found |= found;
785 }
786 } // for j
787 } // for i
788 return global_found;
789}
790
791// ------------------------------------------------------------------------
792
793template<typename BV>
795 const bvector_type_const_ptr* bv_src_and, unsigned src_and_size,
796 const bvector_type_const_ptr* bv_src_sub, unsigned src_sub_size)
797{
798 BM_ASSERT_THROW(src_and_size < max_aggregator_cap, BM_ERR_RANGE);
799 BM_ASSERT_THROW(src_sub_size < max_aggregator_cap, BM_ERR_RANGE);
800
801 if (!bv_src_and || !src_and_size)
802 return false;
803
804 unsigned top_blocks = max_top_blocks(bv_src_and, src_and_size);
805 unsigned top_blocks2 = max_top_blocks(bv_src_sub, src_sub_size);
806
807 if (top_blocks2 > top_blocks)
808 top_blocks = top_blocks2;
809
810 // compute range blocks coordinates
811 //
812 block_idx_type nblock_from = (range_from_ >> bm::set_block_shift);
813 block_idx_type nblock_to = (range_to_ >> bm::set_block_shift);
814 unsigned top_from = unsigned(nblock_from >> bm::set_array_shift);
815 unsigned top_to = unsigned(nblock_to >> bm::set_array_shift);
816
817 if (range_set_)
818 {
819 if (nblock_from == nblock_to) // one block search
820 {
821 int is_res_full;
822 unsigned i = top_from;
823 unsigned j = unsigned(nblock_from & bm::set_array_mask);
824 digest_type digest = combine_and_sub(i, j,
825 bv_src_and, src_and_size,
826 bv_src_sub, src_sub_size,
827 &is_res_full);
828 // is_res_full is not needed here, since it is just 1 block
829 if (digest)
830 {
831 unsigned block_bit_idx = 0;
832 bool found = bm::bit_find_first(ar_->tb1, &block_bit_idx, digest);
833 BM_ASSERT(found);
834 idx = bm::block_to_global_index(i, j, block_bit_idx);
835 return found;
836 }
837 return false;
838 }
839
840 if (top_to < top_blocks)
841 top_blocks = top_to+1;
842 }
843 else
844 {
845 top_from = 0;
846 }
847
848 for (unsigned i = top_from; i < top_blocks; ++i)
849 {
850 unsigned set_array_max = bm::set_sub_array_size;
851 unsigned j = 0;
852 if (range_set_)
853 {
854 if (i == top_from)
855 {
856 j = nblock_from & bm::set_array_mask;
857 }
858 if (i == top_to)
859 {
860 set_array_max = 1 + unsigned(nblock_to & bm::set_array_mask);
861 }
862 }
863 else
864 {
865 set_array_max = find_effective_sub_block_size(i, bv_src_and, src_and_size, true);
866 if (!set_array_max)
867 continue;
868 if (src_sub_size)
869 {
870 unsigned set_array_max2 =
871 find_effective_sub_block_size(i, bv_src_sub, src_sub_size, false);
872 if (set_array_max2 > set_array_max)
873 set_array_max = set_array_max2;
874 }
875 }
876 for (; j < set_array_max; ++j)
877 {
878 int is_res_full;
879 digest_type digest = combine_and_sub(i, j,
880 bv_src_and, src_and_size,
881 bv_src_sub, src_sub_size,
882 &is_res_full);
883 if (digest)
884 {
885 unsigned block_bit_idx = 0;
886 bool found = bm::bit_find_first(ar_->tb1, &block_bit_idx, digest);
887 BM_ASSERT(found);
888 idx = bm::block_to_global_index(i, j, block_bit_idx);
889 return found;
890 }
891 } // for j
892 //while (++j < set_array_max);
893 } // for i
894 return false;
895}
896
897// ------------------------------------------------------------------------
898
899template<typename BV>
900unsigned
902 unsigned i,
903 const bvector_type_const_ptr* bv_src,
904 unsigned src_size,
905 bool top_null_as_zero) BMNOEXCEPT
906{
907 // quick hack to avoid scanning large, arrays, where such scan can be quite
908 // expensive by itself (this makes this function approximate)
909 if (src_size > 32)
911
912 unsigned max_size = 1;
913 for (unsigned k = 0; k < src_size; ++k)
914 {
915 const bvector_type* bv = bv_src[k];
916 BM_ASSERT(bv);
917 const typename bvector_type::blocks_manager_type& bman_arg =
918 bv->get_blocks_manager();
919 const bm::word_t* const* blk_blk_arg = bman_arg.get_topblock(i);
920 if (!blk_blk_arg)
921 {
922 if (top_null_as_zero)
923 return 0;
924 continue;
925 }
926 if ((bm::word_t*)blk_blk_arg == FULL_BLOCK_FAKE_ADDR)
928
929 for (unsigned j = bm::set_sub_array_size - 1; j > max_size; --j)
930 {
931 if (blk_blk_arg[j])
932 {
933 max_size = j;
934 break;
935 }
936 } // for j
938 break;
939 } // for k
940 ++max_size;
942
943 return max_size;
944}
945
946// ------------------------------------------------------------------------
947
948template<typename BV>
949void aggregator<BV>::combine_or(unsigned i, unsigned j,
950 bvector_type& bv_target,
951 const bvector_type_const_ptr* bv_src,
952 unsigned src_size)
953{
954 typename bvector_type::blocks_manager_type& bman_target = bv_target.get_blocks_manager();
955
956 unsigned arg_blk_count = 0;
957 unsigned arg_blk_gap_count = 0;
958 bm::word_t* blk =
959 sort_input_blocks_or(bv_src, src_size, i, j,
960 &arg_blk_count, &arg_blk_gap_count);
961
962 BM_ASSERT(blk == 0 || blk == FULL_BLOCK_FAKE_ADDR);
963
964 if (blk == FULL_BLOCK_FAKE_ADDR) // nothing to do - golden block(!)
965 {
966 bman_target.check_alloc_top_subblock(i);
967 bman_target.set_block_ptr(i, j, blk);
968 if (++j == bm::set_sub_array_size)
969 {
970 bman_target.validate_top_full(i);
971 }
972 }
973 else
974 {
975 blk = ar_->tb1;
976 if (arg_blk_count || arg_blk_gap_count)
977 {
978 bool all_one =
979 process_bit_blocks_or(bman_target, i, j, arg_blk_count);
980 if (!all_one)
981 {
982 if (arg_blk_gap_count)
983 {
984 process_gap_blocks_or(arg_blk_gap_count);
985 }
986 // we have some results, allocate block and copy from temp
987 bman_target.opt_copy_bit_block(i, j, ar_->tb1,
988 opt_mode_, ar_->tb_opt);
989 }
990 }
991 }
992}
993
994// ------------------------------------------------------------------------
995
996template<typename BV>
997void aggregator<BV>::combine_and(unsigned i, unsigned j,
998 bvector_type& bv_target,
999 const bvector_type_const_ptr* bv_src,
1000 unsigned src_size)
1001{
1002 BM_ASSERT(src_size);
1003
1004 unsigned arg_blk_count = 0;
1005 unsigned arg_blk_gap_count = 0;
1006 bm::word_t* blk =
1007 sort_input_blocks_and(bv_src, src_size,
1008 i, j,
1009 &arg_blk_count, &arg_blk_gap_count);
1010
1011 BM_ASSERT(blk == 0 || blk == FULL_BLOCK_FAKE_ADDR);
1012
1013 if (!blk) // nothing to do - golden block(!)
1014 return;
1015 if (arg_blk_count || arg_blk_gap_count)
1016 {
1017 if (!arg_blk_gap_count && (arg_blk_count == 1))
1018 {
1019 if (ar_->v_arg_and_blk[0] == FULL_BLOCK_REAL_ADDR)
1020 {
1021 // another nothing to do: one FULL block
1022 blocks_manager_type& bman_target = bv_target.get_blocks_manager();
1023 bman_target.check_alloc_top_subblock(i);
1024 bman_target.set_block_ptr(i, j, blk);
1025 if (++j == bm::set_sub_array_size)
1026 bman_target.validate_top_full(i);
1027 return;
1028 }
1029 }
1030 // AND bit-blocks
1031 //
1032 bm::id64_t digest = ~0ull;
1033 digest = process_bit_blocks_and(arg_blk_count, digest);
1034 if (!digest)
1035 return;
1036
1037 // AND all GAP blocks (if any)
1038 //
1039 if (arg_blk_gap_count)
1040 {
1041 digest = process_gap_blocks_and(arg_blk_gap_count, digest);
1042 }
1043 if (digest) // we have results , allocate block and copy from temp
1044 {
1045 blocks_manager_type& bman_target = bv_target.get_blocks_manager();
1046 bman_target.opt_copy_bit_block(i, j, ar_->tb1,
1047 opt_mode_, ar_->tb_opt);
1048 }
1049 }
1050}
1051
1052// ------------------------------------------------------------------------
1053
1054template<typename BV>
1056aggregator<BV>::combine_and_sub(unsigned i, unsigned j,
1057 const bvector_type_const_ptr* bv_src_and, unsigned src_and_size,
1058 const bvector_type_const_ptr* bv_src_sub, unsigned src_sub_size,
1059 int* is_result_full)
1060{
1061 BM_ASSERT(src_and_size);
1062 BM_ASSERT(is_result_full);
1063
1064 unsigned arg_blk_and_count = 0;
1065 unsigned arg_blk_and_gap_count = 0;
1066 unsigned arg_blk_sub_count = 0;
1067 unsigned arg_blk_sub_gap_count = 0;
1068
1069 *is_result_full = 0;
1070 bm::word_t* blk = sort_input_blocks_and(bv_src_and, src_and_size,
1071 i, j,
1072 &arg_blk_and_count, &arg_blk_and_gap_count);
1073 BM_ASSERT(blk == 0 || blk == FULL_BLOCK_FAKE_ADDR);
1074 if (!blk || !(arg_blk_and_count | arg_blk_and_gap_count))
1075 return 0; // nothing to do - golden block(!)
1076
1077 if (src_sub_size)
1078 {
1079 blk = sort_input_blocks_or(bv_src_sub, src_sub_size,
1080 i, j,
1081 &arg_blk_sub_count, &arg_blk_sub_gap_count);
1082 BM_ASSERT(blk == 0 || blk == FULL_BLOCK_FAKE_ADDR);
1083 if (blk == FULL_BLOCK_FAKE_ADDR)
1084 return 0; // nothing to do - golden block(!)
1085 }
1086 else
1087 {
1088 if (!arg_blk_and_gap_count && (arg_blk_and_count == 1))
1089 {
1090 if (ar_->v_arg_and_blk[0] == FULL_BLOCK_REAL_ADDR)
1091 {
1092 *is_result_full = 1;
1093 return ~0ull;
1094 }
1095 }
1096 }
1097
1098 digest_type digest = ~0ull;
1099
1100 // AND-SUB bit-blocks
1101 //
1102 digest = process_bit_blocks_and(arg_blk_and_count, digest);
1103 if (!digest)
1104 return digest;
1105 digest = process_bit_blocks_sub(arg_blk_sub_count, digest);
1106 if (!digest)
1107 return digest;
1108
1109 // AND all GAP block
1110 //
1111 digest =
1112 process_gap_blocks_and(arg_blk_and_gap_count, digest);
1113 if (!digest)
1114 return digest;
1115
1116 if (arg_blk_sub_gap_count)
1117 {
1118 digest =
1119 process_gap_blocks_sub(arg_blk_sub_gap_count, digest);
1120 }
1121
1122 return digest;
1123}
1124
1125// ------------------------------------------------------------------------
1126
1127template<typename BV>
1128void aggregator<BV>::process_gap_blocks_or(unsigned arg_blk_gap_count)
1129{
1130 bm::word_t* blk = ar_->tb1;
1131 for (unsigned k = 0; k < arg_blk_gap_count; ++k)
1132 bm::gap_add_to_bitset(blk, ar_->v_arg_or_blk_gap[k]);
1133}
1134
1135// ------------------------------------------------------------------------
1136
1137template<typename BV>
1140 digest_type digest)
1141{
1142 bm::word_t* blk = ar_->tb1;
1143 bool single_bit_found;
1144 unsigned single_bit_idx;
1145 for (unsigned k = 0; k < arg_blk_gap_count; ++k)
1146 {
1147 bm::gap_and_to_bitset(blk, ar_->v_arg_and_blk_gap[k], digest);
1148 digest = bm::update_block_digest0(blk, digest);
1149 if (!digest)
1150 {
1152 break;
1153 }
1154 single_bit_found = bm::bit_find_first_if_1(blk, &single_bit_idx, digest);
1155 if (single_bit_found)
1156 {
1157 for (++k; k < arg_blk_gap_count; ++k)
1158 {
1159 bool b = bm::gap_test_unr(ar_->v_arg_and_blk_gap[k], single_bit_idx);
1160 if (!b)
1161 return 0; // AND 0 causes result to turn 0
1162 } // for k
1163 break;
1164 }
1165 }
1166 return digest;
1167}
1168
1169// ------------------------------------------------------------------------
1170
1171template<typename BV>
1174 digest_type digest)
1175{
1176 bm::word_t* blk = ar_->tb1;
1177 bool single_bit_found;
1178 unsigned single_bit_idx;
1179 for (unsigned k = 0; k < arg_blk_gap_count; ++k)
1180 {
1181 bm::gap_sub_to_bitset(blk, ar_->v_arg_or_blk_gap[k], digest);
1182 digest = bm::update_block_digest0(blk, digest);
1183 if (!digest)
1184 {
1186 break;
1187 }
1188 // check if logical operation reduced to a corner case of one single bit
1189 single_bit_found = bm::bit_find_first_if_1(blk, &single_bit_idx, digest);
1190 if (single_bit_found)
1191 {
1192 for (++k; k < arg_blk_gap_count; ++k)
1193 {
1194 bool b = bm::gap_test_unr(ar_->v_arg_or_blk_gap[k], single_bit_idx);
1195 if (b)
1196 return 0; // AND-NOT causes search result to turn 0
1197 } // for k
1198 break;
1199 }
1200 } // for k
1201 return digest;
1202}
1203
1204// ------------------------------------------------------------------------
1205
1206template<typename BV>
1207bool aggregator<BV>::test_gap_blocks_and(unsigned arg_blk_gap_count,
1208 unsigned bit_idx)
1209{
1210 unsigned b = 1;
1211 for (unsigned i = 0; i < arg_blk_gap_count && b; ++i)
1212 {
1213 b = bm::gap_test_unr(ar_->v_arg_and_blk_gap[i], bit_idx);
1214 }
1215 return b;
1216}
1217
1218// ------------------------------------------------------------------------
1219
1220template<typename BV>
1221bool aggregator<BV>::test_gap_blocks_sub(unsigned arg_blk_gap_count,
1222 unsigned bit_idx)
1223{
1224 unsigned b = 1;
1225 for (unsigned i = 0; i < arg_blk_gap_count; ++i)
1226 {
1227 b = bm::gap_test_unr(ar_->v_arg_or_blk_gap[i], bit_idx);
1228 if (b)
1229 return false;
1230 }
1231 return true;
1232}
1233
1234// ------------------------------------------------------------------------
1235
1236
1237template<typename BV>
1239 unsigned i, unsigned j,
1240 unsigned arg_blk_count)
1241{
1242 bm::word_t* blk = ar_->tb1;
1243 bool all_one;
1244
1245 unsigned k = 0;
1246
1247 if (arg_blk_count) // copy the first block
1248 bm::bit_block_copy(blk, ar_->v_arg_or_blk[k++]);
1249 else
1250 bm::bit_block_set(blk, 0);
1251
1252 // process all BIT blocks
1253 //
1254 unsigned unroll_factor, len, len_unr;
1255
1256 unroll_factor = 4;
1257 len = arg_blk_count - k;
1258 len_unr = len - (len % unroll_factor);
1259 for( ;k < len_unr; k+=unroll_factor)
1260 {
1261 all_one = bm::bit_block_or_5way(blk,
1262 ar_->v_arg_or_blk[k], ar_->v_arg_or_blk[k+1],
1263 ar_->v_arg_or_blk[k+2], ar_->v_arg_or_blk[k+3]);
1264 if (all_one)
1265 {
1266 BM_ASSERT(blk == ar_->tb1);
1268 bman_target.set_block(i, j, FULL_BLOCK_FAKE_ADDR, false);
1269 return true;
1270 }
1271 } // for k
1272
1273 unroll_factor = 2;
1274 len = arg_blk_count - k;
1275 len_unr = len - (len % unroll_factor);
1276 for( ;k < len_unr; k+=unroll_factor)
1277 {
1278 all_one = bm::bit_block_or_3way(blk, ar_->v_arg_or_blk[k], ar_->v_arg_or_blk[k+1]);
1279 if (all_one)
1280 {
1281 BM_ASSERT(blk == ar_->tb1);
1283 bman_target.set_block(i, j, FULL_BLOCK_FAKE_ADDR, false);
1284 return true;
1285 }
1286 } // for k
1287
1288 for (; k < arg_blk_count; ++k)
1289 {
1290 all_one = bm::bit_block_or(blk, ar_->v_arg_or_blk[k]);
1291 if (all_one)
1292 {
1293 BM_ASSERT(blk == ar_->tb1);
1295 bman_target.set_block(i, j, FULL_BLOCK_FAKE_ADDR, false);
1296 return true;
1297 }
1298 } // for k
1299
1300 return false;
1301}
1302
1303// ------------------------------------------------------------------------
1304
1305template<typename BV>
1308 digest_type digest)
1309{
1310 bm::word_t* blk = ar_->tb1;
1311 unsigned k = 0;
1312
1313 block_idx_type nb_from = (range_from_ >> bm::set_block_shift);
1314 block_idx_type nb_to = (range_to_ >> bm::set_block_shift);
1315 if (range_set_ && (nb_from == nb_to))
1316 {
1317 unsigned nbit_from = unsigned(range_from_ & bm::set_block_mask);
1318 unsigned nbit_to = unsigned(range_to_ & bm::set_block_mask);
1319 digest_type digest0 = bm::digest_mask(nbit_from, nbit_to);
1320 digest &= digest0;
1321 bm::block_init_digest0(blk, digest);
1322 }
1323 else
1324 {
1325 switch (arg_blk_count)
1326 {
1327 case 0:
1328 bm::block_init_digest0(blk, digest); // 0xFF... by default
1329 return digest;
1330 case 1:
1331 bm::bit_block_copy(blk, ar_->v_arg_and_blk[k]);
1332 return bm::calc_block_digest0(blk);
1333 default:
1334 digest = bm::bit_block_and_2way(blk,
1335 ar_->v_arg_and_blk[k],
1336 ar_->v_arg_and_blk[k+1],
1337 ~0ull);
1338 k += 2;
1339 break;
1340 } // switch
1341 }
1342
1343 unsigned unroll_factor, len, len_unr;
1344 unsigned single_bit_idx;
1345
1346 unroll_factor = 4;
1347 len = arg_blk_count - k;
1348 len_unr = len - (len % unroll_factor);
1349 for (; k < len_unr; k += unroll_factor)
1350 {
1351 digest =
1353 ar_->v_arg_and_blk[k], ar_->v_arg_and_blk[k + 1],
1354 ar_->v_arg_and_blk[k + 2], ar_->v_arg_and_blk[k + 3],
1355 digest);
1356 if (!digest) // all zero
1357 return digest;
1358 bool found = bm::bit_find_first_if_1(blk, &single_bit_idx, digest);
1359 if (found)
1360 {
1361 unsigned nword = unsigned(single_bit_idx >> bm::set_word_shift);
1362 unsigned mask = 1u << (single_bit_idx & bm::set_word_mask);
1363 for (++k; k < arg_blk_count; ++k)
1364 {
1365 const bm::word_t* arg_blk = ar_->v_arg_and_blk[k];
1366 if (!(mask & arg_blk[nword]))
1367 {
1368 blk[nword] = 0;
1369 return 0;
1370 }
1371 } // for k
1372 break;
1373 }
1374 } // for k
1375
1376 for (; k < arg_blk_count; ++k)
1377 {
1378 if (ar_->v_arg_and_blk[k] == FULL_BLOCK_REAL_ADDR)
1379 continue;
1380 digest = bm::bit_block_and(blk, ar_->v_arg_and_blk[k], digest);
1381 if (!digest) // all zero
1382 return digest;
1383 } // for k
1384 return digest;
1385}
1386
1387// ------------------------------------------------------------------------
1388
1389template<typename BV>
1392 digest_type digest)
1393{
1394 bm::word_t* blk = ar_->tb1;
1395 unsigned single_bit_idx;
1396 const word_t** args = &ar_->v_arg_or_blk[0];
1397 for (unsigned k = 0; k < arg_blk_count; ++k)
1398 {
1399 if (ar_->v_arg_or_blk[k] == FULL_BLOCK_REAL_ADDR) // golden block
1400 {
1401 digest = 0;
1402 break;
1403 }
1404 digest = bm::bit_block_sub(blk, args[k], digest);
1405 if (!digest) // all zero
1406 break;
1407
1408 bool found = bm::bit_find_first_if_1(blk, &single_bit_idx, digest);
1409 if (found)
1410 {
1411 const unsigned mask = 1u << (single_bit_idx & bm::set_word_mask);
1412 unsigned nword = unsigned(single_bit_idx >> bm::set_word_shift);
1413 for (++k; k < arg_blk_count; ++k)
1414 {
1415 if (mask & args[k][nword])
1416 {
1417 blk[nword] = 0;
1418 return 0;
1419 }
1420 } // for k
1421 break;
1422 }
1423 } // for k
1424 return digest;
1425}
1426
1427// ------------------------------------------------------------------------
1428
1429template<typename BV>
1431 const bvector_type_const_ptr* bv_src, unsigned src_size,
1432 bool init_clear)
1433{
1434 typename bvector_type::blocks_manager_type& bman_target = bv_target.get_blocks_manager();
1435 if (init_clear)
1436 {
1437 if (bman_target.is_init())
1438 bman_target.deinit_tree();
1439 }
1440
1441 unsigned top_blocks = bman_target.top_block_size();
1442 size_type size = bv_target.size();
1443 bool need_realloc = false;
1444
1445 // pre-scan to do target size harmonization
1446 for (unsigned i = 0; i < src_size; ++i)
1447 {
1448 const bvector_type* bv = bv_src[i];
1449 BM_ASSERT(bv);
1450 const typename bvector_type::blocks_manager_type& bman_arg =
1451 bv->get_blocks_manager();
1452 unsigned arg_top_blocks = bman_arg.top_block_size();
1453 if (arg_top_blocks > top_blocks)
1454 {
1455 need_realloc = true;
1456 top_blocks = arg_top_blocks;
1457 }
1458 size_type arg_size = bv->size();
1459 if (arg_size > size)
1460 size = arg_size;
1461 } // for i
1462
1463 if (need_realloc)
1464 {
1465 bman_target.reserve_top_blocks(top_blocks);
1466 }
1467 if (!bman_target.is_init())
1468 bman_target.init_tree();
1469 if (size > bv_target.size())
1470 bv_target.resize(size);
1471
1472 return top_blocks;
1473}
1474
1475// ------------------------------------------------------------------------
1476
1477template<typename BV>
1478unsigned
1480 unsigned src_size) BMNOEXCEPT
1481{
1482 unsigned top_blocks = 1;
1483
1484 // pre-scan to do target size harmonization
1485 for (unsigned i = 0; i < src_size; ++i)
1486 {
1487 const bvector_type* bv = bv_src[i];
1488 BM_ASSERT(bv);
1489 const typename bvector_type::blocks_manager_type& bman_arg = bv->get_blocks_manager();
1490 unsigned arg_top_blocks = bman_arg.top_block_size();
1491 if (arg_top_blocks > top_blocks)
1492 top_blocks = arg_top_blocks;
1493 } // for i
1494 return top_blocks;
1495}
1496
1497// ------------------------------------------------------------------------
1498
1499template<typename BV>
1501 const bvector_type_const_ptr* bv_src,
1502 unsigned src_size,
1503 unsigned i, unsigned j,
1504 unsigned* arg_blk_count,
1505 unsigned* arg_blk_gap_count) BMNOEXCEPT
1506{
1507 bm::word_t* blk = 0;
1508 for (unsigned k = 0; k < src_size; ++k)
1509 {
1510 const bvector_type* bv = bv_src[k];
1511 BM_ASSERT(bv);
1512 const blocks_manager_type& bman_arg = bv->get_blocks_manager();
1513 const bm::word_t* arg_blk = bman_arg.get_block_ptr(i, j);
1514 if (!arg_blk)
1515 continue;
1516 if (BM_IS_GAP(arg_blk))
1517 {
1518 ar_->v_arg_or_blk_gap[*arg_blk_gap_count] = BMGAP_PTR(arg_blk);
1519 (*arg_blk_gap_count)++;
1520 }
1521 else // FULL or bit block
1522 {
1523 if (IS_FULL_BLOCK(arg_blk))
1524 {
1526 *arg_blk_gap_count = *arg_blk_count = 0; // nothing to do
1527 break;
1528 }
1529 ar_->v_arg_or_blk[*arg_blk_count] = arg_blk;
1530 (*arg_blk_count)++;
1531 }
1532 } // for k
1533 return blk;
1534}
1535
1536// ------------------------------------------------------------------------
1537
1538template<typename BV>
1540 const bvector_type_const_ptr* bv_src,
1541 unsigned src_size,
1542 unsigned i, unsigned j,
1543 unsigned* arg_blk_count,
1544 unsigned* arg_blk_gap_count) BMNOEXCEPT
1545{
1546 unsigned full_blk_cnt = 0;
1548 for (unsigned k = 0; k < src_size; ++k)
1549 {
1550 const bvector_type* bv = bv_src[k];
1551 BM_ASSERT(bv);
1552 const blocks_manager_type& bman_arg = bv->get_blocks_manager();
1553 const bm::word_t* arg_blk = bman_arg.get_block_ptr(i, j);
1554 if (!arg_blk)
1555 {
1556 blk = 0;
1557 *arg_blk_gap_count = *arg_blk_count = 0; // nothing to do
1558 break;
1559 }
1560 if (BM_IS_GAP(arg_blk))
1561 {
1562 ar_->v_arg_and_blk_gap[*arg_blk_gap_count] = BMGAP_PTR(arg_blk);
1563 (*arg_blk_gap_count)++;
1564 }
1565 else // FULL or bit block
1566 {
1567 if (IS_FULL_BLOCK(arg_blk))
1568 {
1569 // add only first FULL encounter, others ignore
1570 if (!full_blk_cnt) // first 0xFFF....
1571 {
1572 ar_->v_arg_and_blk[*arg_blk_count] = FULL_BLOCK_REAL_ADDR;
1573 ++full_blk_cnt;
1574 (*arg_blk_count)++;
1575 }
1576 }
1577 else
1578 {
1579 ar_->v_arg_and_blk[*arg_blk_count] = arg_blk;
1580 (*arg_blk_count)++;
1581 }
1582 /*
1583 ar_->v_arg_and_blk[*arg_blk_count] =
1584 IS_FULL_BLOCK(arg_blk) ? FULL_BLOCK_REAL_ADDR: arg_blk;
1585 (*arg_blk_count)++; */
1586 }
1587 } // for k
1588 return blk;
1589}
1590
1591// ------------------------------------------------------------------------
1592
1593template<typename BV>
1595 const bvector_type_const_ptr* bv_src, unsigned src_size)
1596{
1597 BM_ASSERT(src_size);
1598 if (src_size == 0)
1599 {
1600 bv_target.clear();
1601 return;
1602 }
1603
1604 const bvector_type* bv = bv_src[0];
1605 bv_target = *bv;
1606
1607 for (unsigned i = 1; i < src_size; ++i)
1608 {
1609 bv = bv_src[i];
1610 BM_ASSERT(bv);
1611 bv_target.bit_or(*bv);
1612 }
1613}
1614
1615// ------------------------------------------------------------------------
1616
1617template<typename BV>
1619 const bvector_type_const_ptr* bv_src, unsigned src_size)
1620{
1621 BM_ASSERT(src_size);
1622
1623 if (src_size == 0)
1624 {
1625 bv_target.clear();
1626 return;
1627 }
1628 const bvector_type* bv = bv_src[0];
1629 bv_target = *bv;
1630
1631 for (unsigned i = 1; i < src_size; ++i)
1632 {
1633 bv = bv_src[i];
1634 BM_ASSERT(bv);
1635 bv_target.bit_and(*bv);
1636 }
1637}
1638
1639// ------------------------------------------------------------------------
1640
1641template<typename BV>
1643 const bvector_type_const_ptr* bv_src_and,
1644 unsigned src_and_size,
1645 const bvector_type_const_ptr* bv_src_sub,
1646 unsigned src_sub_size)
1647{
1648 BM_ASSERT(src_and_size);
1649
1650 combine_and_horizontal(bv_target, bv_src_and, src_and_size);
1651
1652 for (unsigned i = 0; i < src_sub_size; ++i)
1653 {
1654 const bvector_type* bv = bv_src_sub[i];
1655 BM_ASSERT(bv);
1656 bv_target -= *bv;
1657 }
1658}
1659
1660// ------------------------------------------------------------------------
1661
1662template<typename BV>
1664 const bvector_type_const_ptr* bv_src,
1665 unsigned src_size)
1666{
1667 top_block_size_ = resize_target(bv_target, bv_src, src_size);
1668
1669 // set initial carry overs all to 0
1670 for (unsigned i = 0; i < src_size; ++i) // reset co flags
1671 ar_->carry_overs_[i] = 0;
1672}
1673
1674// ------------------------------------------------------------------------
1675
1676template<typename BV>
1678 bvector_type& bv_target,
1679 const bvector_type_const_ptr* bv_src_and, unsigned src_and_size,
1680 bool any)
1681{
1682 BM_ASSERT_THROW(src_size < max_aggregator_cap, BM_ERR_RANGE);
1683 if (!src_and_size)
1684 {
1685 bv_target.clear();
1686 return false;
1687 }
1688 prepare_shift_right_and(bv_target, bv_src_and, src_and_size);
1689
1690 for (unsigned i = 0; i < bm::set_top_array_size; ++i)
1691 {
1692 if (i > top_block_size_)
1693 {
1694 if (!any_carry_overs(&ar_->carry_overs_[0], src_and_size))
1695 break; // quit early if there is nothing to carry on
1696 }
1697
1698 unsigned j = 0;
1699 do
1700 {
1701 bool found =
1702 combine_shift_right_and(i, j, bv_target, bv_src_and, src_and_size);
1703 if (found && any)
1704 return found;
1705 } while (++j < bm::set_sub_array_size);
1706
1707 } // for i
1708
1709 return bv_target.any();
1710}
1711
1712// ------------------------------------------------------------------------
1713
1714template<typename BV>
1716 bvector_type& bv_target,
1717 const bvector_type_const_ptr* bv_src,
1718 unsigned src_size)
1719{
1720 bm::word_t* blk = temp_blk_ ? temp_blk_ : ar_->tb1;
1721 unsigned char* carry_overs = &(ar_->carry_overs_[0]);
1722
1723 bm::id64_t digest = ~0ull; // start with a full content digest
1724
1725 bool blk_zero = false;
1726 // first initial block is just copied over from the first AND
1727 {
1728 const blocks_manager_type& bman_arg = bv_src[0]->get_blocks_manager();
1729 BM_ASSERT(bman_arg.is_init());
1730 const bm::word_t* arg_blk = bman_arg.get_block(i, j);
1731 if (BM_IS_GAP(arg_blk))
1732 {
1733 bm::bit_block_set(blk, 0);
1734 bm::gap_add_to_bitset(blk, BMGAP_PTR(arg_blk));
1735 }
1736 else
1737 {
1738 if (arg_blk)
1739 {
1740 bm::bit_block_copy(blk, arg_blk);
1741 }
1742 else
1743 {
1744 blk_zero = true; // set flag for delayed block init
1745 digest = 0;
1746 }
1747 }
1748 carry_overs[0] = 0;
1749 }
1750
1751 for (unsigned k = 1; k < src_size; ++k)
1752 {
1753 unsigned carry_over = carry_overs[k];
1754 if (!digest && !carry_over) // 0 into "00000" block >> 0
1755 continue;
1756 if (blk_zero) // delayed temp block 0-init requested
1757 {
1758 bm::bit_block_set(blk, 0);
1759 blk_zero = !blk_zero; // = false
1760 }
1761 const bm::word_t* arg_blk = get_arg_block(bv_src, k, i, j);
1762 carry_overs[k] = (unsigned char)
1763 process_shift_right_and(blk, arg_blk, digest, carry_over);
1764 BM_ASSERT(carry_overs[k] == 0 || carry_overs[k] == 1);
1765 } // for k
1766
1767 if (blk_zero) // delayed temp block 0-init
1768 {
1769 bm::bit_block_set(blk, 0);
1770 }
1771 // block now gets emitted into the target bit-vector
1772 if (digest)
1773 {
1775 blocks_manager_type& bman_target = bv_target.get_blocks_manager();
1776 bman_target.opt_copy_bit_block(i, j, blk, opt_mode_, ar_->tb_opt);
1777 return true;
1778 }
1779 return false;
1780}
1781
1782// ------------------------------------------------------------------------
1783
1784template<typename BV>
1787 const bm::word_t* BMRESTRICT arg_blk,
1788 digest_type& BMRESTRICT digest,
1789 unsigned carry_over) BMNOEXCEPT
1790{
1791 BM_ASSERT(carry_over == 1 || carry_over == 0);
1792
1793 if (BM_IS_GAP(arg_blk)) // GAP argument
1794 {
1795 if (digest)
1796 {
1797 carry_over =
1798 bm::bit_block_shift_r1_and_unr(blk, carry_over,
1799 FULL_BLOCK_REAL_ADDR, &digest);
1800 }
1801 else // digest == 0, but carry_over can change that
1802 {
1803 blk[0] = carry_over;
1804 carry_over = 0;
1805 digest = blk[0]; // NOTE: this does NOT account for AND (yet)
1806 }
1807 BM_ASSERT(bm::calc_block_digest0(blk) == digest);
1808
1809 bm::gap_and_to_bitset(blk, BMGAP_PTR(arg_blk), digest);
1810 digest = bm::update_block_digest0(blk, digest);
1811 }
1812 else // 2 bit-blocks
1813 {
1814 if (arg_blk) // use fast fused SHIFT-AND operation
1815 {
1816 if (digest)
1817 {
1818 carry_over =
1819 bm::bit_block_shift_r1_and_unr(blk, carry_over, arg_blk,
1820 &digest);
1821 }
1822 else // digest == 0
1823 {
1824 blk[0] = carry_over & arg_blk[0];
1825 carry_over = 0;
1826 digest = blk[0];
1827 }
1828 BM_ASSERT(bm::calc_block_digest0(blk) == digest);
1829 }
1830 else // arg is zero - target block => zero
1831 {
1832 carry_over = blk[bm::set_block_size-1] >> 31; // carry out
1833 if (digest)
1834 {
1835 bm::bit_block_set(blk, 0); // TODO: digest based set
1836 digest = 0;
1837 }
1838 }
1839 }
1840 return carry_over;
1841}
1842
1843// ------------------------------------------------------------------------
1844
1845template<typename BV>
1847 const bvector_type_const_ptr* bv_src,
1848 unsigned k, unsigned i, unsigned j) BMNOEXCEPT
1849{
1850 return bv_src[k]->get_blocks_manager().get_block(i, j);
1851}
1852
1853// ------------------------------------------------------------------------
1854
1855template<typename BV>
1856bool aggregator<BV>::any_carry_overs(const unsigned char* carry_overs,
1857 unsigned co_size) BMNOEXCEPT
1858{
1859 // TODO: loop unroll?
1860 unsigned acc = carry_overs[0];
1861 for (unsigned i = 1; i < co_size; ++i)
1862 acc |= carry_overs[i];
1863// if (ar_->carry_overs_[i])
1864// return true;
1865// return false;
1866 return acc;
1867}
1868
1869// ------------------------------------------------------------------------
1870
1871template<typename BV>
1873{
1874 bvector_type* bv_target = check_create_target(); // create target vector
1875 BM_ASSERT(bv_target);
1876
1877 temp_blk_ = temp_block;
1878
1879 switch (operation_)
1880 {
1881 case BM_NOT_DEFINED:
1882 break;
1883 case BM_SHIFT_R_AND:
1884 prepare_shift_right_and(*bv_target, ar_->arg_bv0, arg_group0_size);
1885 operation_status_ = op_prepared;
1886 break;
1887 default:
1888 BM_ASSERT(0);
1889 } // switch
1890}
1891
1892// ------------------------------------------------------------------------
1893
1894template<typename BV>
1896aggregator<BV>::run_step(unsigned i, unsigned j)
1897{
1898 BM_ASSERT(operation_status_ == op_prepared || operation_status_ == op_in_progress);
1900
1901 switch (operation_)
1902 {
1903 case BM_NOT_DEFINED:
1904 break;
1905
1906 case BM_SHIFT_R_AND:
1907 {
1908 if (i > top_block_size_)
1909 {
1910 if (!this->any_carry_overs(&ar_->carry_overs_[0], arg_group0_size))
1911 {
1912 operation_status_ = op_done;
1913 return operation_status_;
1914 }
1915 }
1916 //bool found =
1917 this->combine_shift_right_and(i, j, *bv_target_,
1918 ar_->arg_bv0, arg_group0_size);
1919 operation_status_ = op_in_progress;
1920 }
1921 break;
1922 default:
1923 BM_ASSERT(0);
1924 break;
1925 } // switch
1926
1927 return operation_status_;
1928}
1929
1930
1931// ------------------------------------------------------------------------
1932
1933
1934} // bm
1935
1936#include "bmundef.h"
1937
1938#endif
#define BM_DECLARE_TEMP_BLOCK(x)
Definition bm.h:47
Algorithms for bvector<>
Definitions(internal)
#define IS_FULL_BLOCK(addr)
Definition bmdef.h:153
#define BMRESTRICT
Definition bmdef.h:193
#define BMNOEXCEPT
Definition bmdef.h:79
#define BMGAP_PTR(ptr)
Definition bmdef.h:179
#define BM_IS_GAP(ptr)
Definition bmdef.h:181
#define BM_ASSERT
Definition bmdef.h:130
#define BM_ASSERT_THROW(x, xerrcode)
Definition bmdef.h:401
#define FULL_BLOCK_FAKE_ADDR
Definition bmdef.h:149
#define FULL_BLOCK_REAL_ADDR
Definition bmdef.h:148
Bit manipulation primitives (internal)
pre-processor un-defines to avoid global space pollution (internal)
Algorithms for fast aggregation of a group of bit-vectors.
BV::block_idx_type block_idx_type
void set_optimization(typename bvector_type::optmode opt=bvector_type::opt_compress)
set on-the-fly bit-block compression By default aggregator does not try to optimize result,...
bvector_type * check_create_target()
void reset() BMNOEXCEPT
Reset aggregate groups, forget all attached vectors.
void process_gap_blocks_or(unsigned block_count)
max_size
Maximum aggregation capacity in one pass.
const bvector_type * get_target() const
static unsigned process_shift_right_and(bm::word_t *BMRESTRICT blk, const bm::word_t *BMRESTRICT arg_blk, digest_type &BMRESTRICT digest, unsigned carry_over) BMNOEXCEPT
void combine_and_sub_horizontal(bvector_type &bv_target, const bvector_type_const_ptr *bv_src_and, unsigned src_and_size, const bvector_type_const_ptr *bv_src_sub, unsigned src_sub_size)
Horizontal AND-SUB aggregation (potentially slower) method.
static unsigned find_effective_sub_block_size(unsigned i, const bvector_type_const_ptr *bv_src, unsigned src_size, bool top_null_as_zero) BMNOEXCEPT
bm::word_t * sort_input_blocks_or(const bvector_type_const_ptr *bv_src, unsigned src_size, unsigned i, unsigned j, unsigned *arg_blk_count, unsigned *arg_blk_gap_count) BMNOEXCEPT
void combine_shift_right_and(bvector_type &bv_target)
Aggregate added group of vectors using SHIFT-RIGHT and logical AND Operation does NOT perform an expl...
void prepare_shift_right_and(bvector_type &bv_target, const bvector_type_const_ptr *bv_src, unsigned src_size)
unsigned add(const bvector_type *bv, unsigned agr_group=0) BMNOEXCEPT
Attach source bit-vector to a argument group (0 or 1).
operation_status run_step(unsigned i, unsigned j)
Run a step of current arrgegation operation.
void stage(bm::word_t *temp_block)
Prepare operation, create internal resources, analyse dependencies.
bool combine_and_sub(bvector_type &bv_target)
Aggregate added group of vectors using fused logical AND-SUB Operation does NOT perform an explicit r...
void combine_or(unsigned i, unsigned j, bvector_type &bv_target, const bvector_type_const_ptr *bv_src, unsigned src_size)
bm::id64_t digest_type
bvector_type::blocks_manager_type blocks_manager_type
digest_type combine_and_sub(unsigned i, unsigned j, const bvector_type_const_ptr *bv_src_and, unsigned src_and_size, const bvector_type_const_ptr *bv_src_sub, unsigned src_sub_size, int *is_result_full)
void combine_or(bvector_type &bv_target)
Aggregate added group of vectors using logical OR Operation does NOT perform an explicit reset of arg...
BV::size_type size_type
digest_type process_bit_blocks_sub(unsigned block_count, digest_type digest)
void combine_and(bvector_type &bv_target)
Aggregate added group of vectors using logical AND Operation does NOT perform an explicit reset of ar...
void set_operation(int op_code) BMNOEXCEPT
Set operation code for the aggregator.
const bvector_type * bvector_type_const_ptr
static const bm::word_t * get_arg_block(const bvector_type_const_ptr *bv_src, unsigned k, unsigned i, unsigned j) BMNOEXCEPT
bool test_gap_blocks_and(unsigned block_count, unsigned bit_idx)
void set_range_hint(size_type from, size_type to) BMNOEXCEPT
Set search hint for the range, where results needs to be searched (experimental for internal use).
static unsigned max_top_blocks(const bvector_type_const_ptr *bv_src, unsigned src_size) BMNOEXCEPT
bm::word_t * get_temp_block()
void combine_and_horizontal(bvector_type &bv_target, const bvector_type_const_ptr *bv_src, unsigned src_size)
Horizontal AND aggregation (potentially slower) method.
bool find_first_and_sub(size_type &idx)
operation
Codes for aggregation operations which can be pipelined for efficient execution.
int get_operation() const BMNOEXCEPT
Get current operation code.
bool process_bit_blocks_or(blocks_manager_type &bman_target, unsigned i, unsigned j, unsigned block_count)
static unsigned resize_target(bvector_type &bv_target, const bvector_type_const_ptr *bv_src, unsigned src_size, bool init_clear=true)
bm::word_t * sort_input_blocks_and(const bvector_type_const_ptr *bv_src, unsigned src_size, unsigned i, unsigned j, unsigned *arg_blk_count, unsigned *arg_blk_gap_count) BMNOEXCEPT
bool combine_shift_right_and(unsigned i, unsigned j, bvector_type &bv_target, const bvector_type_const_ptr *bv_src, unsigned src_size)
bool test_gap_blocks_sub(unsigned block_count, unsigned bit_idx)
void combine_or_horizontal(bvector_type &bv_target, const bvector_type_const_ptr *bv_src, unsigned src_size)
Horizontal OR aggregation (potentially slower) method.
digest_type process_gap_blocks_and(unsigned block_count, digest_type digest)
bvector_type::allocator_type::allocator_pool_type allocator_pool_type
static bool any_carry_overs(const unsigned char *carry_overs, unsigned co_size) BMNOEXCEPT
void combine_and(unsigned i, unsigned j, bvector_type &bv_target, const bvector_type_const_ptr *bv_src, unsigned src_size)
digest_type process_bit_blocks_and(unsigned block_count, digest_type digest)
digest_type process_gap_blocks_sub(unsigned block_count, digest_type digest)
operation_status get_operation_status() const
Bitvector Bit-vector container with runtime compression of bits.
Definition bm.h:108
optmode
Optimization mode Every next level means additional checks (better compression vs time)
Definition bm.h:130
@ opt_compress
compress blocks when possible (GAP/prefix sum)
Definition bm.h:134
allocator_type::allocator_pool_type allocator_pool_type
Definition bm.h:111
blocks_manager< Alloc > blocks_manager_type
Definition bm.h:112
bm::id64_t bit_block_and_5way(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src0, const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2, const bm::word_t *BMRESTRICT src3, bm::id64_t digest) BMNOEXCEPT
digest based bit-block AND 5-way
Definition bmfunc.h:6034
void bit_block_copy(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src) BMNOEXCEPT
Bitblock copy operation.
Definition bmfunc.h:5898
void block_init_digest0(bm::word_t *const block, bm::id64_t digest) BMNOEXCEPT
Init block with 000111000 pattren based on digest.
Definition bmfunc.h:704
bm::id64_t bit_block_and(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src) BMNOEXCEPT
Plain bitblock AND operation. Function does not analyse availability of source and destination blocks...
Definition bmfunc.h:5940
bool bit_block_or_5way(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2, const bm::word_t *BMRESTRICT src3, const bm::word_t *BMRESTRICT src4) BMNOEXCEPT
5 way (target, source1, source2) bitblock OR operation. Function does not analyse availability of sou...
Definition bmfunc.h:6910
bool bit_block_shift_r1_and_unr(bm::word_t *BMRESTRICT block, bm::word_t co_flag, const bm::word_t *BMRESTRICT mask_block, bm::id64_t *BMRESTRICT digest) BMNOEXCEPT
Right bit-shift bitblock by 1 bit (reference) + AND.
Definition bmfunc.h:5161
bm::id64_t calc_block_digest0(const bm::word_t *const block) BMNOEXCEPT
Compute digest for 64 non-zero areas.
Definition bmfunc.h:734
bm::id64_t bit_block_and_2way(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2, bm::id64_t digest) BMNOEXCEPT
digest based bit-block AND
Definition bmfunc.h:6101
bool bit_find_first(const bm::word_t *BMRESTRICT block, unsigned *BMRESTRICT pos) BMNOEXCEPT
BIT block find the first set bit.
Definition bmfunc.h:7552
bool bit_block_or_3way(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2) BMNOEXCEPT
3 way (target | source1 | source2) bitblock OR operation. Function does not analyse availability of s...
Definition bmfunc.h:6866
void bit_block_set(bm::word_t *BMRESTRICT dst, bm::word_t value) BMNOEXCEPT
Bitblock memset operation.
Definition bmfunc.h:3841
BMFORCEINLINE bm::id64_t digest_mask(unsigned from, unsigned to) BMNOEXCEPT
Compute digest mask for [from..to] positions.
Definition bmfunc.h:665
bool bit_block_or(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src) BMNOEXCEPT
Plain bitblock OR operation. Function does not analyse availability of source and destination blocks.
Definition bmfunc.h:6749
bool bit_is_all_zero(const bm::word_t *BMRESTRICT start) BMNOEXCEPT
Returns "true" if all bits in the block are 0.
Definition bmfunc.h:1045
bool bit_find_first_if_1(const bm::word_t *BMRESTRICT block, unsigned *BMRESTRICT first, bm::id64_t digest) BMNOEXCEPT
BIT block find the first set bit if only 1 bit is set.
Definition bmfunc.h:7625
bm::id64_t update_block_digest0(const bm::word_t *const block, bm::id64_t digest) BMNOEXCEPT
Compute digest for 64 non-zero areas based on existing digest (function revalidates zero areas)
Definition bmfunc.h:776
bm::id64_t bit_block_sub(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src) BMNOEXCEPT
Plain bitblock SUB (AND NOT) operation. Function does not analyse availability of source and destinat...
Definition bmfunc.h:7019
bool is_bits_one(const bm::wordop_t *start) BMNOEXCEPT
Returns "true" if all bits in the block are 1.
Definition bmfunc.h:5277
unsigned gap_test_unr(const T *BMRESTRICT buf, const unsigned pos) BMNOEXCEPT
Tests if bit = pos is true. Analog of bm::gap_test with SIMD unrolling.
Definition bmfunc.h:1298
void gap_and_to_bitset(unsigned *BMRESTRICT dest, const T *BMRESTRICT pcurr) BMNOEXCEPT
ANDs GAP block to bitblock.
Definition bmfunc.h:3486
void gap_sub_to_bitset(unsigned *BMRESTRICT dest, const T *BMRESTRICT pcurr) BMNOEXCEPT
SUB (AND NOT) GAP block to bitblock.
Definition bmfunc.h:3320
void gap_add_to_bitset(unsigned *BMRESTRICT dest, const T *BMRESTRICT pcurr, unsigned len) BMNOEXCEPT
Adds(OR) GAP block to bitblock.
Definition bmfunc.h:3436
void combine_and(BV &bv, It first, It last)
AND Combine bitvector and the iterable sequence.
void combine_or(BV &bv, It first, It last)
OR Combine bitvector and the iterable sequence.
void aggregator_pipeline_execute(It first, It last)
Experimental method ro run multiple aggregators in sync.
Definition bm.h:77
const unsigned set_array_mask
Definition bmconst.h:96
const unsigned id_max
Definition bmconst.h:108
unsigned int word_t
Definition bmconst.h:38
const unsigned set_block_mask
Definition bmconst.h:56
void aligned_free(void *ptr) BMNOEXCEPT
Aligned free.
Definition bmalloc.h:424
const unsigned set_sub_array_size
Definition bmconst.h:94
void * aligned_new_malloc(size_t size)
Aligned malloc (unlike classic malloc it throws bad_alloc exception)
Definition bmalloc.h:396
const unsigned set_word_shift
Definition bmconst.h:71
const unsigned set_block_size
Definition bmconst.h:54
unsigned long long int id64_t
Definition bmconst.h:34
const unsigned set_array_shift
Definition bmconst.h:95
bm::id_t block_to_global_index(unsigned i, unsigned j, unsigned block_idx) BMNOEXCEPT
calculate bvector<> global bit-index from block-local coords
Definition bmfunc.h:8828
unsigned short gap_word_t
Definition bmconst.h:77
const unsigned set_top_array_size
Definition bmconst.h:109
const unsigned set_block_shift
Definition bmconst.h:55
const unsigned set_word_mask
Definition bmconst.h:72
id64_t wordop_t
Definition bmconst.h:127
bm::bvector bvector_type
const unsigned max_size
Definition xsample01.cpp:56