48 return a >= b ? a : b;
57 return (R)(n) *
fak(n - 1);
72 for (k = 0; k <= m - r; k++)
74 sum +=
binom(m + k, k) * POW((xx + K(1.0)) / K(2.0), (R) k);
76 return sum * POW((xx + K(1.0)), (R) r) * POW(K(1.0) - xx, (R) (m + 1))
77 / (R)(1 << (m + 1)) /
fak(r);
81 C
regkern(kernel k, R xx,
int p,
const R *param, R a, R b)
90 if ((xx >= -K(0.5) + b && xx <= -a) || (xx >= a && xx <= K(0.5) - b))
92 return k(xx, 0, param);
94 else if (xx < -K(0.5) + b)
96 sum = (k(-K(0.5), 0, param) + k(K(0.5), 0, param)) / K(2.0)
97 *
BasisPoly(p - 1, 0, K(2.0) * xx / b + (K(1.0) - b) / b);
98 for (r = 0; r < p; r++)
100 sum += POW(-b / K(2.0), (R) r) * k(-K(0.5) + b, r, param)
101 *
BasisPoly(p - 1, r, -K(2.0) * xx / b + (b - K(1.0)) / b);
105 else if ((xx > -a) && (xx < a))
107 for (r = 0; r < p; r++)
111 * (k(-a, r, param) *
BasisPoly(p - 1, r, xx / a)
112 + k(a, r, param) *
BasisPoly(p - 1, r, -xx / a)
117 else if (xx > K(0.5) - b)
119 sum = (k(-K(0.5), 0, param) + k(K(0.5), 0, param)) / K(2.0)
120 *
BasisPoly(p - 1, 0, -K(2.0) * xx / b + (K(1.0) - b) / b);
121 for (r = 0; r < p; r++)
123 sum += POW(b / K(2.0), (R) r) * k(K(0.5) - b, r, param)
124 *
BasisPoly(p - 1, r, K(2.0) * xx / b - (K(1.0) - b) / b);
128 return k(xx, 0, param);
134 static C
regkern1(kernel k, R xx,
int p,
const R *param, R a, R b)
143 if ((xx >= -K(0.5) + b && xx <= -a) || (xx >= a && xx <= K(0.5) - b))
145 return k(xx, 0, param);
147 else if ((xx > -a) && (xx < a))
149 for (r = 0; r < p; r++)
153 * (k(-a, r, param) *
BasisPoly(p - 1, r, xx / a)
154 + k(a, r, param) *
BasisPoly(p - 1, r, -xx / a)
159 else if (xx < -K(0.5) + b)
161 for (r = 0; r < p; r++)
164 * (k(K(0.5) - b, r, param) *
BasisPoly(p - 1, r, (xx + K(0.5)) / b)
165 + k(-K(0.5) + b, r, param) *
BasisPoly(p - 1, r, -(xx + K(0.5)) / b)
170 else if (xx > K(0.5) - b)
172 for (r = 0; r < p; r++)
175 * (k(K(0.5) - b, r, param) *
BasisPoly(p - 1, r, (xx - K(0.5)) / b)
176 + k(-K(0.5) + b, r, param) *
BasisPoly(p - 1, r, -(xx - K(0.5)) / b)
181 return k(xx, 0, param);
230 static C
regkern3(kernel k, R xx,
int p,
const R *param, R a, R b)
243 if ((a <= xx) && (xx <= K(0.5) - b))
245 return k(xx, 0, param);
249 for (r = 0; r < p; r++)
251 sum += POW(-a, (R) r) * k(a, r, param)
257 else if ((K(0.5) - b < xx) && (xx <= K(0.5)))
259 sum = k(K(0.5), 0, param) *
BasisPoly(p - 1, 0, -K(2.0) * xx / b + (K(1.0) - b) / b);
261 for (r = 0; r < p; r++)
263 sum += POW(b / K(2.0), (R) r) * k(K(0.5) - b, r, param)
264 *
BasisPoly(p - 1, r, K(2.0) * xx / b - (K(1.0) - b) / b);
318 C
kubintkern(
const R x,
const C *Add,
const int Ad,
const R a)
347 return (-f0 * c1 * c3 * c4 / K(6.0) + f1 * c2 * c3 * c4 / K(2.0)
348 - f2 * c2 * c1 * c4 / K(2.0) + f3 * c2 * c1 * c3 / K(6.0));
352 static C
kubintkern1(
const R x,
const C *Add,
const int Ad,
const R a)
358 c = (x + a) * (R)(Ad) / K(2.0) / a;
376 return (-f0 * c1 * c3 * c4 / K(6.0) + f1 * c2 * c3 * c4 / K(2.0)
377 - f2 * c2 * c1 * c4 / K(2.0) + f3 * c2 * c1 * c3 / K(6.0));
381 static void quicksort(
int d,
int t, R *x, C *alpha,
int N)
386 R pivot = x[(N / 2) * d + t];
394 while (x[lpos * d + t] < pivot)
396 while (x[rpos * d + t] > pivot)
400 for (k = 0; k < d; k++)
402 temp1 = x[lpos * d + k];
403 x[lpos * d + k] = x[rpos * d + k];
404 x[rpos * d + k] = temp1;
407 alpha[lpos] = alpha[rpos];
417 quicksort(d, t, x + lpos * d, alpha + lpos, N - lpos);
427 box_index = (
int *) NFFT(malloc)((size_t)(ths->box_count) *
sizeof(int));
428 for (t = 0; t < ths->box_count; t++)
431 for (l = 0; l < ths->
N_total; l++)
434 for (t = 0; t < ths->
d; t++)
436 val[t] = ths->
x[ths->
d * l + t] + K(0.25) - ths->
eps_B / K(2.0);
437 ind *= ths->box_count_per_dim;
438 ind += (int) (val[t] / ths->
eps_I);
443 ths->box_offset[0] = 0;
444 for (t = 1; t <= ths->box_count; t++)
446 ths->box_offset[t] = ths->box_offset[t - 1] + box_index[t - 1];
447 box_index[t - 1] = ths->box_offset[t - 1];
450 for (l = 0; l < ths->
N_total; l++)
453 for (t = 0; t < ths->
d; t++)
455 val[t] = ths->
x[ths->
d * l + t] + K(0.25) - ths->
eps_B / K(2.0);
456 ind *= ths->box_count_per_dim;
457 ind += (int) (val[t] / ths->
eps_I);
460 ths->box_alpha[box_index[ind]] = ths->
alpha[l];
462 for (t = 0; t < ths->
d; t++)
464 ths->box_x[ths->
d * box_index[ind] + t] = ths->
x[ths->
d * l + t];
468 NFFT(free)(box_index);
473 int end_lt,
const C *Add,
const int Ad,
int p, R a,
const kernel k,
474 const R *param,
const unsigned flags)
481 for (m = start; m < end_lt; m++)
490 for (l = 0; l < d; l++)
491 r += (y[l] - x[m * d + l]) * (y[l] - x[m * d + l]);
496 result += alpha[m] * k(r, 0, param);
500 result -= alpha[m] *
regkern1(k, r, p, param, a, K(1.0) / K(16.0));
507 result -= alpha[m] *
regkern(k, r, p, param, a, K(1.0) / K(16.0));
510 result -= alpha[m] *
kubintkern(r, Add, Ad, a);
511 #elif defined(NF_QUADR)
512 result -= alpha[m]*quadrintkern(r,Add,Ad,a);
513 #elif defined(NF_LIN)
514 result -= alpha[m]*linintkern(r,Add,Ad,a);
516 #error define interpolation method
529 int y_multiind[ths->
d];
530 int multiindex[ths->
d];
533 for (t = 0; t < ths->
d; t++)
535 y_multiind[t] = (int)(LRINT((y[t] + K(0.25) - ths->
eps_B / K(2.0)) / ths->
eps_I));
540 for (y_ind =
max_i(0, y_multiind[0] - 1);
541 y_ind < ths->box_count_per_dim && y_ind <= y_multiind[0] + 1; y_ind++)
544 ths->box_offset[y_ind], ths->box_offset[y_ind + 1], ths->
Add, ths->
Ad,
548 else if (ths->
d == 2)
550 for (multiindex[0] =
max_i(0, y_multiind[0] - 1);
551 multiindex[0] < ths->box_count_per_dim
552 && multiindex[0] <= y_multiind[0] + 1; multiindex[0]++)
553 for (multiindex[1] =
max_i(0, y_multiind[1] - 1);
554 multiindex[1] < ths->box_count_per_dim
555 && multiindex[1] <= y_multiind[1] + 1; multiindex[1]++)
557 y_ind = (ths->box_count_per_dim * multiindex[0]) + multiindex[1];
559 ths->box_offset[y_ind], ths->box_offset[y_ind + 1], ths->
Add,
563 else if (ths->
d == 3)
565 for (multiindex[0] =
max_i(0, y_multiind[0] - 1);
566 multiindex[0] < ths->box_count_per_dim
567 && multiindex[0] <= y_multiind[0] + 1; multiindex[0]++)
568 for (multiindex[1] =
max_i(0, y_multiind[1] - 1);
569 multiindex[1] < ths->box_count_per_dim
570 && multiindex[1] <= y_multiind[1] + 1; multiindex[1]++)
571 for (multiindex[2] =
max_i(0, y_multiind[2] - 1);
572 multiindex[2] < ths->box_count_per_dim
573 && multiindex[2] <= y_multiind[2] + 1; multiindex[2]++)
575 y_ind = ((ths->box_count_per_dim * multiindex[0]) + multiindex[1])
576 * ths->box_count_per_dim + multiindex[2];
578 ths->box_offset[y_ind], ths->box_offset[y_ind + 1], ths->
Add,
591 static void BuildTree(
int d,
int t, R *x, C *alpha,
int N)
600 BuildTree(d, (t + 1) % d, x + (m + 1) * d, alpha + (m + 1), N - m - 1);
605 static C
SearchTree(
const int d,
const int t,
const R *x,
const C *alpha,
606 const R *xmin,
const R *xmax,
const int N,
const kernel k,
const R *param,
607 const int Ad,
const C *Add,
const int p,
const unsigned flags)
618 R Median = x[m * d + t];
619 R a = FABS(Max - Min) / 2;
625 return SearchTree(d, (t + 1) % d, x + (m + 1) * d, alpha + (m + 1), xmin,
626 xmax, N - m - 1, k, param, Ad, Add, p, flags);
627 else if (Max < Median)
628 return SearchTree(d, (t + 1) % d, x, alpha, xmin, xmax, m, k, param, Ad,
635 for (l = 0; l < d; l++)
637 if (x[m * d + l] > xmin[l] && x[m * d + l] < xmax[l])
645 r = xmin[0] + a - x[m];
650 for (l = 0; l < d; l++)
651 r += (xmin[l] + a - x[m * d + l]) * (xmin[l] + a - x[m * d + l]);
656 result += alpha[m] * k(r, 0, param);
660 result -= alpha[m] *
regkern1(k, r, p, param, a, K(1.0) / K(16.0));
667 result -= alpha[m] *
regkern(k, r, p, param, a, K(1.0) / K(16.0));
670 result -= alpha[m] *
kubintkern(r, Add, Ad, a);
671 #elif defined(NF_QUADR)
672 result -= alpha[m]*quadrintkern(r,Add,Ad,a);
673 #elif defined(NF_LIN)
674 result -= alpha[m]*linintkern(r,Add,Ad,a);
676 #error define interpolation method
681 result +=
SearchTree(d, (t + 1) % d, x + (m + 1) * d, alpha + (m + 1), xmin,
682 xmax, N - m - 1, k, param, Ad, Add, p, flags)
683 +
SearchTree(d, (t + 1) % d, x, alpha, xmin, xmax, m, k, param, Ad, Add,
692 kernel k, R *param,
unsigned flags,
int nn,
int m,
int p, R eps_I, R eps_B)
697 unsigned sort_flags_trafo = 0U;
698 unsigned sort_flags_adjoint = 0U;
700 int nthreads = NFFT(get_num_threads)();
705 sort_flags_trafo = NFFT_SORT_NODES;
707 sort_flags_adjoint = NFFT_SORT_NODES | NFFT_OMP_BLOCKWISE_ADJOINT;
709 sort_flags_adjoint = NFFT_SORT_NODES;
718 ths->
x = (R *) NFFT(malloc)((size_t)(d * N_total) * (
sizeof(R)));
719 ths->
alpha = (C *) NFFT(malloc)((size_t)(N_total) * (
sizeof(C)));
721 ths->
y = (R *) NFFT(malloc)((size_t)(d * M_total) * (
sizeof(R)));
722 ths->
f = (C *) NFFT(malloc)((size_t)(M_total) * (
sizeof(C)));
738 ths->
Ad = 4 * (ths->
p) * (ths->
p);
739 ths->
Add = (C *) NFFT(malloc)((size_t)(ths->
Ad + 5) * (
sizeof(C)));
743 if (ths->
k == one_over_x)
771 ths->
Ad =
max_i(10, (
int)(LRINT(CEIL(K(1.4) / POW(delta, K(1.0) / K(4.0))))));
772 ths->
Add = (C *) NFFT(malloc)((size_t)(ths->
Ad + 3) * (
sizeof(C)));
773 #elif defined(NF_QUADR)
774 ths->
Ad = (int)(LRINT(CEIL(K(2.2)/POW(delta,K(1.0)/K(3.0)))));
775 ths->
Add = (C *)NFFT(malloc)((size_t)(ths->
Ad+3)*(
sizeof(C)));
776 #elif defined(NF_LIN)
777 ths->
Ad = (int)(LRINT(CEIL(K(1.7)/pow(delta,K(1.0)/K(2.0)))));
778 ths->
Add = (C *)NFFT(malloc)((size_t)(ths->
Ad+3)*(
sizeof(C)));
780 #error define NF_LIN or NF_QUADR or NF_KUB
785 ths->
Ad = 2 * (ths->
p) * (ths->
p);
786 ths->
Add = (C *) NFFT(malloc)((size_t)(ths->
Ad + 3) * (
sizeof(C)));
793 for (t = 0; t < d; t++)
798 NFFT(init_guru)(&(ths->mv1), d, N, N_total, n, m,
800 PRE_PHI_HUT | PRE_PSI | MALLOC_X | MALLOC_F_HAT | MALLOC_F | FFTW_INIT
802 FFTW_MEASURE | FFTW_DESTROY_INPUT);
803 NFFT(init_guru)(&(ths->mv2), d, N, M_total, n, m,
805 PRE_PHI_HUT | PRE_PSI | MALLOC_X | MALLOC_F_HAT | MALLOC_F | FFTW_INIT
807 FFTW_MEASURE | FFTW_DESTROY_INPUT);
811 for (t = 0; t < d; t++)
814 ths->
b = (C*) NFFT(malloc)((size_t)(n_total) *
sizeof(C));
816 #pragma omp critical (nfft_omp_critical_fftw_plan)
818 FFTW(plan_with_nthreads)(nthreads);
821 ths->fft_plan = FFTW(plan_dft)(d, N, ths->
b, ths->
b, FFTW_FORWARD,
828 if (ths->
flags & NEARFIELD_BOXES)
830 ths->box_count_per_dim = (int)(LRINT(FLOOR((K(0.5) - ths->
eps_B) / ths->
eps_I))) + 1;
832 for (t = 0; t < ths->
d; t++)
833 ths->box_count *= ths->box_count_per_dim;
835 ths->box_offset = (
int *) NFFT(malloc)((size_t)(ths->box_count + 1) *
sizeof(int));
837 ths->box_alpha = (C *) NFFT(malloc)((size_t)(ths->
N_total) * (
sizeof(C)));
839 ths->box_x = (R *) NFFT(malloc)((size_t)(ths->
d * ths->
N_total) *
sizeof(R));
847 NFFT(free)(ths->
alpha);
852 NFFT(free)(ths->
Add);
854 NFFT(finalize)(&(ths->mv1));
855 NFFT(finalize)(&(ths->mv2));
858 #pragma omp critical (nfft_omp_critical_fftw_plan)
861 FFTW(destroy_plan)(ths->fft_plan);
868 if (ths->
flags & NEARFIELD_BOXES)
870 NFFT(free)(ths->box_offset);
871 NFFT(free)(ths->box_alpha);
872 NFFT(free)(ths->box_x);
884 #pragma omp parallel for default(shared) private(j,k,t,r)
886 for (j = 0; j < ths->
M_total; j++)
889 for (k = 0; k < ths->
N_total; k++)
892 r = ths->
y[j] - ths->
x[k];
896 for (t = 0; t < ths->
d; t++)
897 r += (ths->
y[j * ths->
d + t] - ths->
x[k * ths->
d + t])
898 * (ths->
y[j * ths->
d + t] - ths->
x[k * ths->
d + t]);
924 if (ths->
flags & NEARFIELD_BOXES)
947 #pragma omp parallel for default(shared) private(k)
949 for (k = -ths->
Ad / 2 - 2; k <= ths->Ad / 2 + 2; k++)
955 #pragma omp parallel for default(shared) private(k)
957 for (k = 0; k <= ths->
Ad + 2; k++)
970 for (k = 0; k < ths->mv1.
M_total; k++)
971 for (t = 0; t < ths->mv1.
d; t++)
972 ths->mv1.
x[ths->mv1.
d * k + t] = -ths->
x[ths->mv1.
d * k + t];
975 if (ths->mv1.
flags & PRE_LIN_PSI)
976 NFFT(precompute_lin_psi)(&(ths->mv1));
978 if (ths->mv1.
flags & PRE_PSI)
979 NFFT(precompute_psi)(&(ths->mv1));
981 if (ths->mv1.
flags & PRE_FULL_PSI)
982 NFFT(precompute_full_psi)(&(ths->mv1));
989 for (k = 0; k < ths->mv1.
M_total; k++)
990 ths->mv1.
f[k] = ths->
alpha[k];
996 for (j = 0; j < ths->mv2.
M_total; j++)
997 for (t = 0; t < ths->mv2.
d; t++)
998 ths->mv2.
x[ths->mv2.
d * j + t] = -ths->
y[ths->mv2.
d * j + t];
1001 if (ths->mv2.
flags & PRE_LIN_PSI)
1002 NFFT(precompute_lin_psi)(&(ths->mv2));
1004 if (ths->mv2.
flags & PRE_PSI)
1005 NFFT(precompute_psi)(&(ths->mv2));
1007 if (ths->mv2.
flags & PRE_FULL_PSI)
1008 NFFT(precompute_full_psi)(&(ths->mv2));
1019 for (t = 0; t < ths->
d; t++)
1023 #pragma omp parallel
for default(shared)
private(j,k,t)
1025 for (j = 0; j < n_total; j++)
1028 ths->
b[j] =
regkern1(ths->
k, (R) j / (R)(ths->
n) - K(0.5), ths->
p,
1034 for (t = 0; t < ths->
d; t++)
1036 ths->
b[j] += ((R) (k % (ths->
n)) / (R)(ths->
n) - K(0.5))
1037 * ((R) (k % (ths->
n)) / (R)(ths->
n) - K(0.5));
1045 NFFT(fftshift_complex)(ths->
b, (int)(ths->mv1.
d), ths->mv1.N);
1046 FFTW(execute)(ths->fft_plan);
1047 NFFT(fftshift_complex)(ths->
b, (int)(ths->mv1.
d), ths->mv1.N);
1071 NFFT(adjoint)(&(ths->mv1));
1082 #pragma omp parallel for default(shared) private(k)
1084 for (k = 0; k < ths->mv2.
N_total; k++)
1085 ths->mv2.f_hat[k] = ths->
b[k] * ths->mv1.f_hat[k];
1095 NFFT(trafo)(&(ths->mv2));
1106 #pragma omp parallel for default(shared) private(j,k,t)
1108 for (j = 0; j < ths->
M_total; j++)
1110 R ymin[ths->
d], ymax[ths->
d];
1112 if (ths->
flags & NEARFIELD_BOXES)
1114 ths->
f[j] = ths->mv2.
f[j] +
SearchBox(ths->
y + ths->
d * j, ths);
1118 for (t = 0; t < ths->
d; t++)
1120 ymin[t] = ths->
y[ths->
d * j + t] - ths->
eps_I;
1121 ymax[t] = ths->
y[ths->
d * j + t] + ths->
eps_I;
1123 ths->
f[j] = ths->mv2.
f[j]