1 #ifndef BMAGGREGATOR__H__INCLUDED__
2 #define BMAGGREGATOR__H__INCLUDED__
26 #ifndef BM__H__INCLUDED__
29 # error missing include (bm.h or bm64.h)
295 unsigned src_and_size,
297 unsigned src_sub_size);
348 int* is_result_full);
361 bool init_clear =
true);
368 unsigned i,
unsigned j,
369 unsigned* arg_blk_count,
370 unsigned* arg_blk_gap_count);
374 unsigned i,
unsigned j,
375 unsigned* arg_blk_count,
376 unsigned* arg_blk_gap_count);
380 unsigned i,
unsigned j,
unsigned block_count);
399 bool top_null_as_zero);
408 unsigned carry_over);
411 unsigned k,
unsigned i,
unsigned j);
438 unsigned arg_group0_size = 0;
439 unsigned arg_group1_size = 0;
446 unsigned top_block_size_ = 0;
449 bool range_set_ =
false;
474 template<
typename Agg,
typename It>
479 int pipeline_size = 0;
480 for (It it = first; it != last; ++it, ++pipeline_size)
492 for (It it = first; it != last; ++it, ++w)
495 auto op_st = agg.get_operation_status();
496 if (op_st != Agg::op_done)
498 op_st = agg.run_step(i, j);
499 pipeline_size -= (op_st == Agg::op_done);
502 if (pipeline_size <= 0)
516 template<
typename BV>
525 template<
typename BV>
535 template<
typename BV>
538 arg_group0_size = arg_group1_size = operation_ = top_block_size_ = 0;
539 operation_status_ = op_undefined;
546 template<
typename BV>
549 range_from_ = from; range_to_ = to;
555 template<
typename BV>
568 template<
typename BV>
576 BM_ASSERT(arg_group1_size < max_aggregator_cap);
580 return arg_group1_size;
582 ar_->arg_bv1[arg_group1_size++] = bv;
583 return arg_group1_size;
587 BM_ASSERT(arg_group0_size < max_aggregator_cap);
591 return arg_group0_size;
593 ar_->arg_bv0[arg_group0_size++] = bv;
594 return arg_group0_size;
600 template<
typename BV>
603 combine_or(bv_target, ar_->arg_bv0, arg_group0_size);
608 template<
typename BV>
611 combine_and(bv_target, ar_->arg_bv0, arg_group0_size);
616 template<
typename BV>
619 return combine_and_sub(bv_target,
620 ar_->arg_bv0, arg_group0_size,
621 ar_->arg_bv1, arg_group1_size,
false);
626 template<
typename BV>
629 return combine_and_sub(bv_target,
630 ar_->arg_bv0, arg_group0_size,
631 ar_->arg_bv1, arg_group1_size, any);
636 template<
typename BV>
639 return find_first_and_sub(idx,
640 ar_->arg_bv0, arg_group0_size,
641 ar_->arg_bv1, arg_group1_size);
646 template<
typename BV>
649 combine_shift_right_and(bv_target, ar_->arg_bv0, arg_group0_size,
false);
654 template<
typename BV>
656 const bvector_type_const_ptr* bv_src,
unsigned src_size)
664 unsigned top_blocks = resize_target(bv_target, bv_src, src_size);
665 for (
unsigned i = 0; i < top_blocks; ++i)
667 unsigned set_array_max =
668 find_effective_sub_block_size(i, bv_src, src_size,
false);
669 for (
unsigned j = 0; j < set_array_max; ++j)
671 combine_or(i, j, bv_target, bv_src, src_size);
678 template<
typename BV>
680 const bvector_type_const_ptr* bv_src,
695 unsigned top_blocks = resize_target(bv_target, bv_src, src_size);
696 for (
unsigned i = 0; i < top_blocks; ++i)
699 unsigned set_array_max =
700 find_effective_sub_block_size(i, bv_src, src_size,
true);
701 for (
unsigned j = 0; j < set_array_max; ++j)
711 template<
typename BV>
713 const bvector_type_const_ptr* bv_src_and,
unsigned src_and_size,
714 const bvector_type_const_ptr* bv_src_sub,
unsigned src_sub_size,
720 bool global_found =
false;
722 if (!bv_src_and || !src_and_size)
730 unsigned top_blocks = resize_target(bv_target, bv_src_and, src_and_size);
731 unsigned top_blocks2 = resize_target(bv_target, bv_src_sub, src_sub_size,
false);
733 if (top_blocks2 > top_blocks)
734 top_blocks = top_blocks2;
736 for (
unsigned i = 0; i < top_blocks; ++i)
738 unsigned set_array_max = find_effective_sub_block_size(i, bv_src_and, src_and_size,
true);
743 unsigned set_array_max2 =
744 find_effective_sub_block_size(i, bv_src_sub, src_sub_size,
false);
745 if (set_array_max2 > set_array_max)
746 set_array_max = set_array_max2;
748 for (
unsigned j = 0; j < set_array_max; ++j)
752 bv_src_and, src_and_size,
753 bv_src_sub, src_sub_size,
757 bman_target.check_alloc_top_subblock(i);
761 bman_target.validate_top_full(i);
771 bman_target.opt_copy_bit_block(i, j, ar_->tb1,
772 opt_mode_, ar_->tb_opt);
776 global_found |= found;
785 template<
typename BV>
787 const bvector_type_const_ptr* bv_src_and,
unsigned src_and_size,
788 const bvector_type_const_ptr* bv_src_sub,
unsigned src_sub_size)
793 if (!bv_src_and || !src_and_size)
796 unsigned top_blocks = max_top_blocks(bv_src_and, src_and_size);
797 unsigned top_blocks2 = max_top_blocks(bv_src_sub, src_sub_size);
799 if (top_blocks2 > top_blocks)
800 top_blocks = top_blocks2;
811 if (nblock_from == nblock_to)
814 unsigned i = top_from;
817 bv_src_and, src_and_size,
818 bv_src_sub, src_sub_size,
823 unsigned block_bit_idx = 0;
832 if (top_to < top_blocks)
833 top_blocks = top_to+1;
840 for (
unsigned i = top_from; i < top_blocks; ++i)
857 set_array_max = find_effective_sub_block_size(i, bv_src_and, src_and_size,
true);
862 unsigned set_array_max2 =
863 find_effective_sub_block_size(i, bv_src_sub, src_sub_size,
false);
864 if (set_array_max2 > set_array_max)
865 set_array_max = set_array_max2;
868 for (; j < set_array_max; ++j)
872 bv_src_and, src_and_size,
873 bv_src_sub, src_sub_size,
877 unsigned block_bit_idx = 0;
891 template<
typename BV>
894 const bvector_type_const_ptr* bv_src,
896 bool top_null_as_zero)
904 for (
unsigned k = 0; k < src_size; ++k)
909 bv->get_blocks_manager();
910 const bm::word_t*
const* blk_blk_arg = bman_arg.get_topblock(i);
913 if (top_null_as_zero)
939 template<
typename BV>
942 const bvector_type_const_ptr* bv_src,
947 unsigned arg_blk_count = 0;
948 unsigned arg_blk_gap_count = 0;
950 sort_input_blocks_or(bv_src, src_size, i, j,
951 &arg_blk_count, &arg_blk_gap_count);
957 bman_target.check_alloc_top_subblock(i);
958 bman_target.set_block_ptr(i, j, blk);
961 bman_target.validate_top_full(i);
967 if (arg_blk_count || arg_blk_gap_count)
970 process_bit_blocks_or(bman_target, i, j, arg_blk_count);
973 if (arg_blk_gap_count)
975 process_gap_blocks_or(arg_blk_gap_count);
978 bman_target.opt_copy_bit_block(i, j, ar_->tb1,
979 opt_mode_, ar_->tb_opt);
987 template<
typename BV>
990 const bvector_type_const_ptr* bv_src,
997 unsigned arg_blk_count = 0;
998 unsigned arg_blk_gap_count = 0;
1000 sort_input_blocks_and(bv_src, src_size,
1002 &arg_blk_count, &arg_blk_gap_count);
1008 if (arg_blk_count || arg_blk_gap_count)
1010 if (!arg_blk_gap_count && (arg_blk_count == 1))
1015 bman_target.check_alloc_top_subblock(i);
1016 bman_target.set_block_ptr(i, j, blk);
1019 bman_target.validate_top_full(i);
1027 digest = process_bit_blocks_and(arg_blk_count, digest);
1033 if (arg_blk_gap_count)
1036 process_gap_blocks_and(arg_blk_gap_count, digest);
1041 bman_target.opt_copy_bit_block(i, j, ar_->tb1,
1042 opt_mode_, ar_->tb_opt);
1049 template<
typename BV>
1052 const bvector_type_const_ptr* bv_src_and,
unsigned src_and_size,
1053 const bvector_type_const_ptr* bv_src_sub,
unsigned src_sub_size,
1054 int* is_result_full)
1059 unsigned arg_blk_and_count = 0;
1060 unsigned arg_blk_and_gap_count = 0;
1061 unsigned arg_blk_sub_count = 0;
1062 unsigned arg_blk_sub_gap_count = 0;
1064 *is_result_full = 0;
1065 bm::word_t* blk = sort_input_blocks_and(bv_src_and, src_and_size,
1067 &arg_blk_and_count, &arg_blk_and_gap_count);
1069 if (!blk || !(arg_blk_and_count | arg_blk_and_gap_count))
1074 blk = sort_input_blocks_or(bv_src_sub, src_sub_size,
1076 &arg_blk_sub_count, &arg_blk_sub_gap_count);
1083 if (!arg_blk_and_gap_count && (arg_blk_and_count == 1))
1087 *is_result_full = 1;
1097 digest = process_bit_blocks_and(arg_blk_and_count, digest);
1100 digest = process_bit_blocks_sub(arg_blk_sub_count, digest);
1107 process_gap_blocks_and(arg_blk_and_gap_count, digest);
1111 if (arg_blk_sub_gap_count)
1114 process_gap_blocks_sub(arg_blk_sub_gap_count, digest);
1122 template<
typename BV>
1126 for (
unsigned k = 0; k < arg_blk_gap_count; ++k)
1132 template<
typename BV>
1138 bool single_bit_found;
1139 unsigned single_bit_idx;
1140 for (
unsigned k = 0; k < arg_blk_gap_count; ++k)
1150 if (single_bit_found)
1152 for (++k; k < arg_blk_gap_count; ++k)
1166 template<
typename BV>
1172 bool single_bit_found;
1173 unsigned single_bit_idx;
1174 for (
unsigned k = 0; k < arg_blk_gap_count; ++k)
1185 if (single_bit_found)
1187 for (++k; k < arg_blk_gap_count; ++k)
1201 template<
typename BV>
1206 for (
unsigned i = 0; i < arg_blk_gap_count && b; ++i)
1215 template<
typename BV>
1220 for (
unsigned i = 0; i < arg_blk_gap_count; ++i)
1232 template<
typename BV>
1234 unsigned i,
unsigned j,
1235 unsigned arg_blk_count)
1249 unsigned unroll_factor, len, len_unr;
1252 len = arg_blk_count - k;
1253 len_unr = len - (len % unroll_factor);
1254 for( ;k < len_unr; k+=unroll_factor)
1257 ar_->v_arg_or_blk[k], ar_->v_arg_or_blk[k+1],
1258 ar_->v_arg_or_blk[k+2], ar_->v_arg_or_blk[k+3]);
1269 len = arg_blk_count - k;
1270 len_unr = len - (len % unroll_factor);
1271 for( ;k < len_unr; k+=unroll_factor)
1283 for (; k < arg_blk_count; ++k)
1300 template<
typename BV>
1310 if (range_set_ && (nb_from == nb_to))
1320 switch (arg_blk_count)
1330 ar_->v_arg_and_blk[k],
1331 ar_->v_arg_and_blk[k+1],
1338 unsigned unroll_factor, len, len_unr;
1339 unsigned single_bit_idx;
1342 len = arg_blk_count - k;
1343 len_unr = len - (len % unroll_factor);
1344 for (; k < len_unr; k += unroll_factor)
1348 ar_->v_arg_and_blk[k], ar_->v_arg_and_blk[k + 1],
1349 ar_->v_arg_and_blk[k + 2], ar_->v_arg_and_blk[k + 3],
1358 for (++k; k < arg_blk_count; ++k)
1360 const bm::word_t* arg_blk = ar_->v_arg_and_blk[k];
1361 if (!(mask & arg_blk[nword]))
1371 for (; k < arg_blk_count; ++k)
1384 template<
typename BV>
1390 unsigned single_bit_idx;
1391 const word_t** args = &ar_->v_arg_or_blk[0];
1392 for (
unsigned k = 0; k < arg_blk_count; ++k)
1408 for (++k; k < arg_blk_count; ++k)
1410 if (mask & args[k][nword])
1424 template<
typename BV>
1426 const bvector_type_const_ptr* bv_src,
unsigned src_size,
1432 if (bman_target.is_init())
1433 bman_target.deinit_tree();
1436 unsigned top_blocks = bman_target.top_block_size();
1438 bool need_realloc =
false;
1441 for (
unsigned i = 0; i < src_size; ++i)
1446 bv->get_blocks_manager();
1447 unsigned arg_top_blocks = bman_arg.top_block_size();
1448 if (arg_top_blocks > top_blocks)
1450 need_realloc =
true;
1451 top_blocks = arg_top_blocks;
1454 if (arg_size > size)
1460 bman_target.reserve_top_blocks(top_blocks);
1462 if (!bman_target.is_init())
1463 bman_target.init_tree();
1464 if (size > bv_target.size())
1465 bv_target.resize(size);
1472 template<
typename BV>
1476 unsigned top_blocks = 1;
1479 for (
unsigned i = 0; i < src_size; ++i)
1484 unsigned arg_top_blocks = bman_arg.top_block_size();
1485 if (arg_top_blocks > top_blocks)
1486 top_blocks = arg_top_blocks;
1493 template<
typename BV>
1496 unsigned i,
unsigned j,
1497 unsigned* arg_blk_count,
1498 unsigned* arg_blk_gap_count)
1501 for (
unsigned k = 0; k < src_size; ++k)
1506 const bm::word_t* arg_blk = bman_arg.get_block_ptr(i, j);
1511 ar_->v_arg_or_blk_gap[*arg_blk_gap_count] =
BMGAP_PTR(arg_blk);
1512 (*arg_blk_gap_count)++;
1519 *arg_blk_gap_count = *arg_blk_count = 0;
1522 ar_->v_arg_or_blk[*arg_blk_count] = arg_blk;
1531 template<
typename BV>
1534 unsigned i,
unsigned j,
1535 unsigned* arg_blk_count,
1536 unsigned* arg_blk_gap_count)
1538 unsigned full_blk_cnt = 0;
1540 for (
unsigned k = 0; k < src_size; ++k)
1545 const bm::word_t* arg_blk = bman_arg.get_block_ptr(i, j);
1549 *arg_blk_gap_count = *arg_blk_count = 0;
1554 ar_->v_arg_and_blk_gap[*arg_blk_gap_count] =
BMGAP_PTR(arg_blk);
1555 (*arg_blk_gap_count)++;
1571 ar_->v_arg_and_blk[*arg_blk_count] = arg_blk;
1585 template<
typename BV>
1587 const bvector_type_const_ptr* bv_src,
unsigned src_size)
1599 for (
unsigned i = 1; i < src_size; ++i)
1603 bv_target.bit_or(*bv);
1609 template<
typename BV>
1611 const bvector_type_const_ptr* bv_src,
unsigned src_size)
1623 for (
unsigned i = 1; i < src_size; ++i)
1627 bv_target.bit_and(*bv);
1633 template<
typename BV>
1635 const bvector_type_const_ptr* bv_src_and,
1636 unsigned src_and_size,
1637 const bvector_type_const_ptr* bv_src_sub,
1638 unsigned src_sub_size)
1642 combine_and_horizontal(bv_target, bv_src_and, src_and_size);
1644 for (
unsigned i = 0; i < src_sub_size; ++i)
1654 template<
typename BV>
1656 const bvector_type_const_ptr* bv_src,
1659 top_block_size_ = resize_target(bv_target, bv_src, src_size);
1662 for (
unsigned i = 0; i < src_size; ++i)
1663 ar_->carry_overs_[i] = 0;
1668 template<
typename BV>
1671 const bvector_type_const_ptr* bv_src_and,
unsigned src_and_size,
1680 prepare_shift_right_and(bv_target, bv_src_and, src_and_size);
1684 if (i > top_block_size_)
1686 if (!this->any_carry_overs(src_and_size))
1693 bool found = combine_shift_right_and(i, j, bv_target, bv_src_and, src_and_size);
1700 return bv_target.any();
1705 template<
typename BV>
1708 const bvector_type_const_ptr* bv_src,
1712 bm::word_t* blk = temp_blk_ ? temp_blk_ : ar_->tb1;
1713 unsigned char* carry_overs = &(ar_->carry_overs_[0]);
1717 bool blk_zero =
false;
1722 const bm::word_t* arg_blk = bman_arg.get_block(i, j);
1743 for (
unsigned k = 1; k < src_size; ++k)
1745 unsigned carry_over = carry_overs[k];
1746 if (!digest && !carry_over)
1753 const bm::word_t* arg_blk = get_arg_block(bv_src, k, i, j);
1754 carry_overs[k] = process_shift_right_and(arg_blk, digest, carry_over);
1761 bman_target.opt_copy_bit_block(i, j, blk,
1762 opt_mode_, ar_->tb_opt);
1770 template<
typename BV>
1773 unsigned carry_over)
1775 bm::word_t* blk = temp_blk_ ? temp_blk_ : ar_->tb1;
1787 blk[0] = carry_over;
1808 blk[0] = carry_over & arg_blk[0];
1830 template<
typename BV>
1832 const bvector_type_const_ptr* bv_src,
1833 unsigned k,
unsigned i,
unsigned j)
1836 return bman_arg.get_block(i, j);
1841 template<
typename BV>
1844 for (
unsigned i = 0; i < co_size; ++i)
1845 if (ar_->carry_overs_[i])
1852 template<
typename BV>
1858 temp_blk_ = temp_block;
1862 case BM_NOT_DEFINED:
1864 case BM_SHIFT_R_AND:
1865 prepare_shift_right_and(*bv_target, ar_->arg_bv0, arg_group0_size);
1866 operation_status_ = op_prepared;
1875 template<
typename BV>
1879 BM_ASSERT(operation_status_ == op_prepared || operation_status_ == op_in_progress);
1884 case BM_NOT_DEFINED:
1887 case BM_SHIFT_R_AND:
1889 if (i > top_block_size_)
1891 if (!this->any_carry_overs(arg_group0_size))
1893 operation_status_ = op_done;
1894 return operation_status_;
1898 this->combine_shift_right_and(i, j, *bv_target_,
1899 ar_->arg_bv0, arg_group0_size);
1900 operation_status_ = op_in_progress;
1908 return operation_status_;