1 #ifndef BMSPARSEVEC_ALGO__H__INCLUDED__
2 #define BMSPARSEVEC_ALGO__H__INCLUDED__
24 #ifndef BM__H__INCLUDED__
27 # error missing include (bm.h or bm64.h)
37 # pragma warning( disable: 4146 )
65 unsigned sv_plains = svect.plains();
67 BM_ASSERT(sv_plains > high_bit && high_bit > 0);
73 for (i = high_bit+1; i < sv_plains; ++i)
84 for (i = high_bit;
true; --i)
103 template<
typename SV>
106 if (low_bit == 0)
return;
109 unsigned sv_plains = svect.plains();
114 for (i = low_bit+1; i < sv_plains; ++i)
118 bv_acc1.
bit_or(*bv_plain);
125 for (i = low_bit-1;
true; --i)
130 bv_acc2.
bit_or(*bv_plain);
143 bv_acc1.bit_xor(bv_acc2);
144 bv_low_plain->bit_or(bv_acc1);
160 template<
typename SV>
182 void bind(
const SV& sv,
bool sorted);
199 typename SV::value_type value,
215 typename SV::value_type value,
216 typename SV::size_type& pos);
222 const typename SV::value_type* str,
223 typename SV::size_type& pos);
230 bool find_eq_str(
const typename SV::value_type* str,
231 typename SV::size_type& pos);
238 const typename SV::value_type* str,
239 typename SV::size_type& pos);
253 const typename SV::value_type val,
254 typename SV::size_type& pos);
267 const typename SV::value_type* str,
268 typename SV::size_type& pos);
276 typename SV::size_type& pos);
310 template<
typename It>
321 bool any_zero =
false;
322 for (; start < end; ++start)
325 any_zero |= (v == 0);
342 typename SV::value_type value,
361 typename SV::value_type value,
363 typename SV::size_type search_limit = 0);
367 typename SV::value_type value,
372 const typename SV::value_type* str,
379 typename SV::value_type value);
383 const typename SV::value_type* str,
384 unsigned octet_start);
413 typedef bm::heap_matrix<
typename SV::value_type,
434 value_type remap_value_vect_[SV::max_vector_size];
436 bm::id64_t vector_plain_masks_[SV::max_vector_size];
452 template<
typename SV>
515 remap(bv_in, sv_brel, bv_out);
528 void attach_sv(
const SV* sv_brel,
bool compute_stats =
false);
537 template<
unsigned BSIZE>
572 template<
typename SV>
574 : sv_ptr_(0), gb_(0), have_stats_(false)
579 SV::throw_bad_alloc();
585 template<
typename SV>
595 template<
typename SV>
605 if (sv_brel->empty() || !compute_stats)
607 const bvector_type* bv_non_null = sv_brel->get_null_bvector();
623 template<
typename SV>
631 const bvector_type* bv_non_null = sv_brel.get_null_bvector();
634 if (!bv_non_null->test(id_from))
637 size_type idx = sv_brel.translate_address(id_from);
638 id_to = sv_brel.get(idx);
644 template<
typename SV>
656 remap(bv_in, bv_out);
661 template<
typename SV>
669 if (sv_ptr_ == 0 || sv_ptr_->empty())
682 const bvector_type * bv_non_null = sv_ptr_->get_null_bvector();
687 bv_product_.bit_and(*bv_non_null);
688 enum_bv = &bv_product_;
701 bv_product_.bit_sub(bv_zero_);
707 bv_out.set_bit_no_check(0);
709 enum_bv = &bv_product_;
716 buf_cnt = nb_old = 0;
719 for (; en.
valid(); ++en)
721 typename SV::size_type idx = *en;
722 idx = sv_ptr_->translate_address(idx);
729 sv_ptr_->gather(&gb_->buffer_[0], &gb_->gather_idx_[0], buf_cnt,
BM_SORTED_UNIFORM);
730 bv_out.set(&gb_->buffer_[0], buf_cnt,
BM_SORTED);
734 gb_->gather_idx_[buf_cnt++] = idx;
738 gb_->gather_idx_[buf_cnt++] = idx;
741 if (buf_cnt == sv_g_size)
743 sv_ptr_->gather(&gb_->buffer_[0], &gb_->gather_idx_[0], buf_cnt,
BM_SORTED_UNIFORM);
750 sv_ptr_->gather(&gb_->buffer_[0], &gb_->gather_idx_[0], buf_cnt,
BM_SORTED_UNIFORM);
758 template<
typename SV>
768 typename SV::const_iterator it = sv_brel.begin();
769 for (; it.valid(); ++it)
771 typename SV::value_type t_id = *it;
775 bv_out.set_bit_no_check(t_id);
785 template<
typename SV>
792 effective_str_max_ = 0;
797 template<
typename SV>
806 effective_str_max_ = sv.effective_vector_max();
808 block0_elements_cache_.resize(total_nb, effective_str_max_+1);
809 block0_elements_cache_.set_zero();
811 block3_elements_cache_.resize(total_nb * 3, effective_str_max_+1);
812 block3_elements_cache_.set_zero();
818 value_type* s0 = block0_elements_cache_.row(nb);
819 sv.get(i, s0,
size_type(block0_elements_cache_.cols()));
823 value_type* s1 = block3_elements_cache_.row(nb * 3 + k);
825 sv.get(idx, s1,
size_type(block3_elements_cache_.cols()));
831 for (
unsigned i = 0; i < SV::max_vector_size; ++i)
833 vector_plain_masks_[i] = sv.get_plains_mask(i);
840 template<
typename SV>
844 effective_str_max_ = 0;
849 template<
typename SV>
858 find_nonzero(sv, bv_out);
859 if (sv.is_compressed())
862 bv_out.set_range(sv.effective_size(),
bm::id_max - 1,
false);
863 decompress(sv, bv_out);
869 correct_nulls(sv, bv_out);
874 template<
typename SV>
890 bv_out.set_range(sv.size(),
bm::id_max - 1,
false);
896 template<
typename SV>
902 bv_out.bit_and(*bv_null);
907 template<
typename SV>
909 typename SV::value_type value,
911 typename SV::size_type search_limit)
918 find_zero(sv, bv_out);
923 bool found = prepare_and_sub_aggregator(sv, value);
930 bool any = (search_limit == 1);
931 found = agg_.combine_and_sub(bv_out, any);
938 template<
typename SV>
940 typename SV::value_type value,
951 bool found = prepare_and_sub_aggregator(sv, value);
954 found = agg_.find_first_and_sub(idx);
962 template<
typename SV>
964 const typename SV::value_type* str,
976 unsigned common_prefix_len = 0;
980 agg_.set_range_hint(mask_from_, mask_to_);
981 common_prefix_len = sv.common_prefix_length(mask_from_, mask_to_);
986 str = remap_value_vect_;
990 if (sv.is_remap() && str != remap_value_vect_)
992 bool r = sv.remap_tosv(remap_value_vect_, SV::max_vector_size, str);
995 str = remap_value_vect_;
999 bool found = prepare_and_sub_aggregator(sv, str, common_prefix_len);
1003 found = agg_.find_first_and_sub(idx);
1010 template<
typename SV>
1012 const typename SV::value_type* str,
1013 unsigned octet_start)
1015 unsigned char bits[64];
1018 for (; str[len] != 0; ++len)
1023 for (
int octet_idx = len-1; octet_idx >= 0; --octet_idx)
1025 if (
unsigned(octet_idx) < octet_start)
1028 unsigned value = unsigned(str[octet_idx]) & 0xFF;
1032 if (&sv == bound_sv_)
1033 plains_mask = vector_plain_masks_[octet_idx];
1035 plains_mask = sv.get_plains_mask(
unsigned(octet_idx));
1037 if ((value & plains_mask) != value)
1042 unsigned short bit_count_v =
bm::bitscan(value, bits);
1043 for (
unsigned i = 0; i < bit_count_v; ++i)
1045 unsigned bit_idx = bits[i];
1047 unsigned plain_idx = (unsigned(octet_idx) * 8) + bit_idx;
1052 unsigned vinv = unsigned(value);
1063 vinv &= unsigned(plains_mask);
1064 for (
unsigned octet_plain = (
unsigned(octet_idx) * 8); vinv; vinv &= vinv-1)
1068 unsigned plain_idx = octet_plain + bit_idx;
1077 unsigned plain_idx = unsigned(len * 8) + 1;
1078 typename SV::size_type plains;
1079 if (&sv == bound_sv_)
1080 plains = effective_str_max_ * unsigned(
sizeof(
value_type)) * 8;
1082 plains = sv.plains();
1083 for (; plain_idx < plains; ++plain_idx)
1085 bvector_type_const_ptr bv = sv.get_plain(plain_idx);
1094 template<
typename SV>
1096 typename SV::value_type value)
1098 unsigned char bits[
sizeof(value) * 8];
1099 unsigned short bit_count_v =
bm::bitscan(value, bits);
1106 for (
unsigned i = bit_count_v; i > 0; --i)
1108 unsigned bit_idx = bits[i-1];
1117 unsigned sv_plains = sv.effective_plains();
1118 for (
unsigned i = 0; i < sv_plains; ++i)
1120 bvector_type_const_ptr bv = sv.get_plain(i);
1122 if (bv && !(value & mask))
1131 template<
typename SV>
1133 typename SV::value_type value,
1141 find_zero(sv, bv_out);
1145 unsigned char bits[
sizeof(value) * 8];
1146 unsigned short bit_count_v =
bm::bitscan(value, bits);
1151 const bvector_type* bv_plain = sv.get_plain(bits[--bit_count_v]);
1160 for (
unsigned i = 0; i < bit_count_v; ++i)
1164 bv_out &= *bv_plain;
1173 unsigned sv_plains = sv.effective_plains();
1174 for (
unsigned i = 0; (i < sv_plains) && value; ++i)
1177 if (bv_plain && !(value & (
value_type(1) << i)))
1178 bv_out -= *bv_plain;
1184 template<
typename SV>
1186 typename SV::size_type& pos)
1189 return this->find_eq_str(*bound_sv_, str, pos);
1194 template<
typename SV>
1196 const typename SV::value_type* str,
1197 typename SV::size_type& pos)
1204 bool remaped =
false;
1207 if (sv.is_remap() && str != remap_value_vect_)
1209 remaped = sv.remap_tosv(remap_value_vect_, SV::max_vector_size, str);
1212 str = remap_value_vect_;
1217 found = find_first_eq(sv, str, found_pos, remaped);
1223 if (sv.is_compressed())
1224 found = sv.find_rank(found_pos + 1, pos);
1232 find_zero(sv, bv_zero);
1233 found = bv_zero.find(pos);
1240 template<
typename SV>
1242 const typename SV::value_type* str,
1243 typename SV::size_type& pos)
1251 bool remaped =
false;
1255 if (sv.is_remap() && str != remap_value_vect_)
1257 remaped = sv.remap_tosv(
1258 remap_value_vect_, SV::max_vector_size, str);
1264 reset_search_range();
1269 l = 0; r = sv.size()-1;
1276 if (dist < min_distance_cutoff)
1289 int cmp = this->compare_str(sv, mid, str);
1307 int cmp = this->compare_str(sv, mid, str);
1312 cmp = this->compare_str(sv, mid, str);
1317 cmp = this->compare_str(sv, mid, str);
1335 set_search_range(l, r);
1339 typename SV::size_type mid = dist/2+l;
1351 int cmp = this->compare_str(sv, mid, str);
1356 set_search_range(l, mid);
1366 found = find_first_eq(sv, str, found_pos, remaped);
1372 if (sv.is_compressed())
1373 found = sv.find_rank(found_pos + 1, pos);
1376 reset_search_range();
1382 find_zero(sv, bv_zero);
1383 found = bv_zero.find(pos);
1390 template<
typename SV>
1392 typename SV::size_type& pos)
1395 return bfind_eq_str(*bound_sv_, str, pos);
1400 template<
typename SV>
1402 const typename SV::value_type val,
1403 typename SV::size_type& pos)
1415 cmp = this->compare(sv, l, val);
1426 cmp = this->compare(sv, r, val);
1434 cmp = this->compare(sv, r, val);
1448 if (dist < linear_cutoff1)
1452 cmp = this->compare(sv, l, val);
1469 cmp = this->compare(sv, mid, val);
1476 cmp = this->compare(sv, i, val);
1490 if (dist < linear_cutoff2)
1492 typename SV::const_iterator it(&sv, l);
1493 for (; it.valid(); ++it, ++l)
1495 typename SV::value_type sv_value = it.value();
1496 if (sv_value == val)
1520 template<
typename SV>
1523 const typename SV::value_type* str,
1524 typename SV::size_type& pos)
1537 cmp = this->compare_str(sv, l, str);
1548 cmp = this->compare_str(sv, r, str);
1556 cmp = this->compare_str(sv, r, str);
1570 if (dist < linear_cutoff1)
1574 cmp = this->compare_str(sv, l, str);
1590 cmp = this->compare_str(sv, mid, str);
1597 cmp = this->compare_str(sv, i, str);
1611 if (dist < linear_cutoff2)
1615 dist = sv.decode(hmatr_, l, dist+1);
1616 for (
unsigned i = 0; i < dist; ++i, ++l)
1618 const typename SV::value_type* hm_str = hmatr_.row(i);
1619 cmp = ::strcmp(hm_str, str);
1631 cmp = this->compare_str(sv, l, str);
1650 template<
typename SV>
1655 if (bound_sv_ == &sv)
1661 value_type* s0 = block0_elements_cache_.row(nb);
1664 sv.get(idx, s0,
size_type(block0_elements_cache_.cols()));
1667 for (
unsigned i = 0; i < block0_elements_cache_.cols(); ++i)
1669 char octet = str[i];
char value = s0[i];
1670 res = (value > octet) - (value < octet);
1682 value_type* s1 = block3_elements_cache_.row(nb * 3 + k);
1684 for (
unsigned i = 0; i < block3_elements_cache_.cols(); ++i)
1686 char octet = str[i];
char value = s1[i];
1687 res = (value > octet) - (value < octet);
1696 return sv.compare(idx, str);
1701 template<
typename SV>
1707 return sv.compare(idx, val);
1712 template<
typename SV>
1714 typename SV::value_type value,
1724 find_zero(sv, bv_out);
1728 find_eq_with_nulls(sv, value, bv_out, 0);
1730 decompress(sv, bv_out);
1731 correct_nulls(sv, bv_out);
1736 template<
typename SV>
1738 typename SV::value_type value,
1739 typename SV::size_type& pos)
1744 find_eq(sv, value, bv_zero);
1745 bool found = bv_zero.find(pos);
1750 bool found = find_first_eq(sv, value, found_pos);
1756 if (sv.is_compressed())
1757 found = sv.find_rank(found_pos + 1, pos);
1765 template<
typename SV>
1770 for (
unsigned i = 0; i < sv.plains(); ++i)
1771 agg_.add(sv.get_plain(i));
1772 agg_.combine_or(bv_out);
1778 template<
typename SV>
1782 if (!sv.is_compressed())
1784 const bvector_type* bv_non_null = sv.get_null_bvector();
1788 rank_compr_.decompress(bv_tmp_, *bv_non_null, bv_out);
1789 bv_out.swap(bv_tmp_);
1794 template<
typename SV>
1805 template<
typename SV>