NFFT  3.3.0
nfst.c
1 /*
2  * Copyright (c) 2002, 2015 Jens Keiner, Stefan Kunis, Daniel Potts
3  *
4  * This program is free software; you can redistribute it and/or modify it under
5  * the terms of the GNU General Public License as published by the Free Software
6  * Foundation; either version 2 of the License, or (at your option) any later
7  * version.
8  *
9  * This program is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11  * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
12  * details.
13  *
14  * You should have received a copy of the GNU General Public License along with
15  * this program; if not, write to the Free Software Foundation, Inc., 51
16  * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  */
18 
19 /* $Id$ */
20 
21 /* Nonequispaced fast cosine transform */
22 
23 /* Author: Steffen Klatt 2004-2006, Jens Keiner 2010 */
24 
25 /* configure header */
26 #include "config.h"
27 
28 /* complex datatype (maybe) */
29 #ifdef HAVE_COMPLEX_H
30 #include<complex.h>
31 #endif
32 
33 /* NFFT headers */
34 #include "nfft3.h"
35 #include "infft.h"
36 
37 #ifdef _OPENMP
38 #include <omp.h>
39 #endif
40 
41 #ifdef OMP_ASSERT
42 #include <assert.h>
43 #endif
44 
45 #undef X
46 #define X(name) NFST(name)
47 
49 static inline INT intprod(const INT *vec, const INT a, const INT d)
50 {
51  INT t, p;
52 
53  p = 1;
54  for (t = 0; t < d; t++)
55  p *= vec[t] - a;
56 
57  return p;
58 }
59 
60 /* handy shortcuts */
61 #define BASE(x) SIN(x)
62 #define NN(x) (x + 1)
63 #define OFFSET 1
64 #define FOURIER_TRAFO FFTW_RODFT00
65 #define FFTW_DEFAULT_FLAGS FFTW_ESTIMATE | FFTW_DESTROY_INPUT
66 
67 #define NODE(p,r) (ths->x[(p) * ths->d + (r)])
68 
69 #define MACRO_with_FG_PSI fg_psi[t][lj[t]]
70 #define MACRO_with_PRE_PSI ths->psi[(j * ths->d + t) * (2 * ths->m + 2) + lj[t]]
71 #define MACRO_without_PRE_PSI PHI((2 * NN(ths->n[t])), ((ths->x[(j) * ths->d + t]) \
72  - ((R)(lj[t] + u[t])) / (K(2.0) * ((R)NN(ths->n[t])))), t)
73 #define MACRO_compute_PSI PHI((2 * NN(ths->n[t])), (NODE(j,t) - ((R)(lj[t] + u[t])) / (K(2.0) * ((R)NN(ths->n[t])))), t)
74 
90 void X(trafo_direct)(const X(plan) *ths)
91 {
92  R *f_hat = (R*)ths->f_hat, *f = (R*)ths->f;
93 
94  memset(f, 0, (size_t)(ths->M_total) * sizeof(R));
95 
96  if (ths->d == 1)
97  {
98  /* specialize for univariate case, rationale: faster */
99  INT j;
100 #ifdef _OPENMP
101  #pragma omp parallel for default(shared) private(j)
102 #endif
103  for (j = 0; j < ths->M_total; j++)
104  {
105  INT k_L;
106  for (k_L = 0; k_L < ths->N_total; k_L++)
107  {
108  R omega = K2PI * ((R)(k_L + OFFSET)) * ths->x[j];
109  f[j] += f_hat[k_L] * BASE(omega);
110  }
111  }
112  }
113  else
114  {
115  /* multivariate case */
116  INT j;
117 #ifdef _OPENMP
118  #pragma omp parallel for default(shared) private(j)
119 #endif
120  for (j = 0; j < ths->M_total; j++)
121  {
122  R x[ths->d], omega, Omega[ths->d + 1];
123  INT t, t2, k_L, k[ths->d];
124  Omega[0] = K(1.0);
125  for (t = 0; t < ths->d; t++)
126  {
127  k[t] = OFFSET;
128  x[t] = K2PI * ths->x[j * ths->d + t];
129  Omega[t+1] = BASE(((R)(k[t])) * x[t]) * Omega[t];
130  }
131  omega = Omega[ths->d];
132 
133  for (k_L = 0; k_L < ths->N_total; k_L++)
134  {
135  f[j] += f_hat[k_L] * omega;
136  {
137  for (t = ths->d - 1; (t >= 1) && (k[t] == (ths->N[t] - 1)); t--)
138  k[t] = OFFSET;
139 
140  k[t]++;
141 
142  for (t2 = t; t2 < ths->d; t2++)
143  Omega[t2+1] = BASE(((R)(k[t2])) * x[t2]) * Omega[t2];
144 
145  omega = Omega[ths->d];
146  }
147  }
148  }
149  }
150 }
151 
152 void X(adjoint_direct)(const X(plan) *ths)
153 {
154  R *f_hat = (R*)ths->f_hat, *f = (R*)ths->f;
155 
156  memset(f_hat, 0, (size_t)(ths->N_total) * sizeof(R));
157 
158  if (ths->d == 1)
159  {
160  /* specialize for univariate case, rationale: faster */
161 #ifdef _OPENMP
162  INT k_L;
163  #pragma omp parallel for default(shared) private(k_L)
164  for (k_L = 0; k_L < ths->N_total; k_L++)
165  {
166  INT j;
167  for (j = 0; j < ths->M_total; j++)
168  {
169  R omega = K2PI * ((R)(k_L + OFFSET)) * ths->x[j];
170  f_hat[k_L] += f[j] * BASE(omega);
171  }
172  }
173 #else
174  INT j;
175  for (j = 0; j < ths->M_total; j++)
176  {
177  INT k_L;
178  for (k_L = 0; k_L < ths->N_total; k_L++)
179  {
180  R omega = K2PI * ((R)(k_L + OFFSET)) * ths->x[j];
181  f_hat[k_L] += f[j] * BASE(omega);
182  }
183  }
184 #endif
185  }
186  else
187  {
188  /* multivariate case */
189  INT j, k_L;
190 #ifdef _OPENMP
191  #pragma omp parallel for default(shared) private(j, k_L)
192  for (k_L = 0; k_L < ths->N_total; k_L++)
193  {
194  INT k[ths->d], k_temp, t;
195 
196  k_temp = k_L;
197 
198  for (t = ths->d - 1; t >= 0; t--)
199  {
200  k[t] = k_temp % ths->N[t];
201  k_temp /= ths->N[t];
202  }
203 
204  for (j = 0; j < ths->M_total; j++)
205  {
206  R omega = K(1.0);
207  for (t = 0; t < ths->d; t++)
208  omega *= BASE(K2PI * (k[t] + OFFSET) * ths->x[j * ths->d + t]);
209  f_hat[k_L] += f[j] * omega;
210  }
211  }
212 #else
213  for (j = 0; j < ths->M_total; j++)
214  {
215  R x[ths->d], omega, Omega[ths->d+1];
216  INT t, t2, k[ths->d];
217  Omega[0] = K(1.0);
218  for (t = 0; t < ths->d; t++)
219  {
220  k[t] = OFFSET;
221  x[t] = K2PI * ths->x[j * ths->d + t];
222  Omega[t+1] = BASE(((R)(k[t])) * x[t]) * Omega[t];
223  }
224  omega = Omega[ths->d];
225  for (k_L = 0; k_L < ths->N_total; k_L++)
226  {
227  f_hat[k_L] += f[j] * omega;
228 
229  for (t = ths->d-1; (t >= 1) && (k[t] == ths->N[t] - 1); t--)
230  k[t] = OFFSET;
231 
232  k[t]++;
233 
234  for (t2 = t; t2 < ths->d; t2++)
235  Omega[t2+1] = BASE(((R)(k[t2])) * x[t2]) * Omega[t2];
236 
237  omega = Omega[ths->d];
238  }
239  }
240 #endif
241  }
242 }
243 
263 static inline void uo(const X(plan) *ths, const INT j, INT *up, INT *op,
264  const INT act_dim)
265 {
266  const R xj = ths->x[j * ths->d + act_dim];
267  INT c = LRINT(xj * (2 * NN(ths->n[(act_dim)])));
268 
269  (*up) = c - (ths->m);
270  (*op) = c + 1 + (ths->m);
271 }
272 
273 #define MACRO_D_compute_A \
274 { \
275  g_hat[kg_plain[ths->d]] = f_hat[k_L] * c_phi_inv_k[ths->d]; \
276 }
277 
278 #define MACRO_D_compute_T \
279 { \
280  f_hat[k_L] = g_hat[kg_plain[ths->d]] * c_phi_inv_k[ths->d]; \
281 }
282 
283 #define MACRO_D_init_result_A memset(g_hat, 0, (size_t)(ths->n_total) * sizeof(R));
284 
285 #define MACRO_D_init_result_T memset(f_hat, 0, (size_t)(ths->N_total) * sizeof(R));
286 
287 #define MACRO_with_PRE_PHI_HUT ths->c_phi_inv[t][kg[t]]
288 
289 #define MACRO_compute_PHI_HUT_INV (K(1.0) / (PHI_HUT((2 * NN(ths->n[t])), kg[t] + OFFSET, t)))
290 
291 #define MACRO_init_k_ks \
292 { \
293  for (t = 0; t < ths->d; t++) \
294  { \
295  kg[t] = 0; \
296  } \
297  i = 0; \
298 }
299 
300 #define MACRO_update_c_phi_inv_k(what_kind, which_phi) \
301 { \
302  for (t = i; t < ths->d; t++) \
303  { \
304  MACRO_update_c_phi_inv_k_ ## what_kind(which_phi); \
305  kg_plain[t+1] = kg_plain[t] * ths->n[t] + kg[t]; \
306  } \
307 }
308 
309 #define MACRO_update_c_phi_inv_k_A(which_phi) \
310 { \
311  c_phi_inv_k[t+1] = K(0.5) * c_phi_inv_k[t] * MACRO_ ## which_phi; \
312 }
313 
314 #define MACRO_update_c_phi_inv_k_T(which_phi) \
315 { \
316  c_phi_inv_k[t+1] = K(0.5) * c_phi_inv_k[t] * MACRO_ ## which_phi; \
317 }
318 
319 #define MACRO_count_k_ks \
320 { \
321  kg[ths->d - 1]++; \
322  i = ths->d - 1; \
323 \
324  while ((kg[i] == ths->N[i] - 1) && (i > 0)) \
325  { \
326  kg[i - 1]++; \
327  kg[i] = 0; \
328  i--; \
329  } \
330 }
331 
332 /* sub routines for the fast transforms matrix vector multiplication with D, D^T */
333 #define MACRO_D(which_one) \
334 static inline void D_ ## which_one (X(plan) *ths) \
335 { \
336  R *g_hat, *f_hat; /* local copy */ \
337  R c_phi_inv_k[ths->d+1]; /* postfix product of PHI_HUT */ \
338  INT t; /* index dimensions */ \
339  INT i; \
340  INT k_L; /* plain index */ \
341  INT kg[ths->d]; /* multi index in g_hat */ \
342  INT kg_plain[ths->d+1]; /* postfix plain index */ \
343 \
344  f_hat = (R*)ths->f_hat; g_hat = (R*)ths->g_hat; \
345  MACRO_D_init_result_ ## which_one; \
346 \
347  c_phi_inv_k[0] = K(1.0); \
348  kg_plain[0] = 0; \
349 \
350  MACRO_init_k_ks; \
351 \
352  if (ths->flags & PRE_PHI_HUT) \
353  { \
354  for (k_L = 0; k_L < ths->N_total; k_L++) \
355  { \
356  MACRO_update_c_phi_inv_k(which_one, with_PRE_PHI_HUT); \
357  MACRO_D_compute_ ## which_one; \
358  MACRO_count_k_ks; \
359  } \
360  } \
361  else \
362  { \
363  for (k_L = 0; k_L < ths->N_total; k_L++) \
364  { \
365  MACRO_update_c_phi_inv_k(which_one,compute_PHI_HUT_INV); \
366  MACRO_D_compute_ ## which_one; \
367  MACRO_count_k_ks; \
368  } \
369  } \
370 }
371 
372 MACRO_D(A)
373 MACRO_D(T)
374 
375 /* sub routines for the fast transforms matrix vector multiplication with B, B^T */
376 #define MACRO_B_init_result_A memset(f, 0, (size_t)(ths->M_total) * sizeof(R));
377 #define MACRO_B_init_result_T memset(g, 0, (size_t)(ths->n_total) * sizeof(R));
378 
379 #define MACRO_B_PRE_FULL_PSI_compute_A \
380 { \
381  (*fj) += ths->psi[ix] * g[ths->psi_index_g[ix]]; \
382 }
383 
384 #define MACRO_B_PRE_FULL_PSI_compute_T \
385 { \
386  g[ths->psi_index_g[ix]] += ths->psi[ix] * (*fj); \
387 }
388 
389 #define MACRO_B_compute_A \
390 { \
391  (*fj) += phi_prod[ths->d] * g[ll_plain[ths->d]]; \
392 }
393 
394 #define MACRO_B_compute_T \
395 { \
396  g[ll_plain[ths->d]] += phi_prod[ths->d] * (*fj); \
397 }
398 
399 #define MACRO_init_uo_l_lj_t \
400 { \
401  for (t2 = 0; t2 < ths->d; t2++) \
402  { \
403  uo(ths, j, &u[t2], &o[t2], t2); \
404  \
405  /* determine index in g-array corresponding to u[(t2)] */ \
406  if (u[(t2)] < 0) \
407  lg_offset[(t2)] = \
408  (u[(t2)] % (2 * NN(ths->n[(t2)]))) + (2 * NN(ths->n[(t2)])); \
409  else \
410  lg_offset[(t2)] = u[(t2)] % (2 * NN(ths->n[(t2)])); \
411  if (lg_offset[(t2)] > NN(ths->n[(t2)])) \
412  lg_offset[(t2)] = -(2 * NN(ths->n[(t2)]) - lg_offset[(t2)]); \
413  \
414  if (lg_offset[t2] <= 0) \
415  { \
416  l[t2] = -lg_offset[t2]; \
417  count_lg[t2] = -1; \
418  } \
419  else \
420  { \
421  l[t2] = +lg_offset[t2]; \
422  count_lg[t2] = +1; \
423  } \
424  \
425  lj[t2] = 0; \
426  } \
427  t2 = 0; \
428 }
429 
430 #define FOO_A ((R)count_lg[t])
431 
432 #define FOO_T ((R)count_lg[t])
433 
434 #define MACRO_update_phi_prod_ll_plain(which_one,which_psi) \
435 { \
436  for (t = t2; t < ths->d; t++) \
437  { \
438  if ((l[t] != 0) && (l[t] != NN(ths->n[t]))) \
439  { \
440  phi_prod[t+1] = (FOO_ ## which_one) * phi_prod[t] * (MACRO_ ## which_psi); \
441  ll_plain[t+1] = ll_plain[t] * ths->n[t] + l[t] - 1; \
442  } \
443  else \
444  { \
445  phi_prod[t + 1] = K(0.0); \
446  ll_plain[t+1] = ll_plain[t] * ths->n[t]; \
447  } \
448  } \
449 }
450 
451 #define MACRO_count_uo_l_lj_t \
452 { \
453  /* turn around if we hit one of the boundaries */ \
454  if ((l[(ths->d-1)] == 0) || (l[(ths->d-1)] == NN(ths->n[(ths->d-1)]))) \
455  count_lg[(ths->d-1)] *= -1; \
456  \
457  /* move array index */ \
458  l[(ths->d-1)] += count_lg[(ths->d-1)]; \
459  \
460  lj[ths->d - 1]++; \
461  t2 = ths->d - 1; \
462  \
463  while ((lj[t2] == (2 * ths->m + 2)) && (t2 > 0)) \
464  { \
465  lj[t2 - 1]++; \
466  lj[t2] = 0; \
467  /* ansonsten lg[i-1] verschieben */ \
468  \
469  /* turn around if we hit one of the boundaries */ \
470  if ((l[(t2 - 1)] == 0) || (l[(t2 - 1)] == NN(ths->n[(t2 - 1)]))) \
471  count_lg[(t2 - 1)] *= -1; \
472  /* move array index */ \
473  l[(t2 - 1)] += count_lg[(t2 - 1)]; \
474  \
475  /* lg[i] = anfangswert */ \
476  if (lg_offset[t2] <= 0) \
477  { \
478  l[t2] = -lg_offset[t2]; \
479  count_lg[t2] = -1; \
480  } \
481  else \
482  { \
483  l[t2] = +lg_offset[t2]; \
484  count_lg[t2] = +1; \
485  } \
486  \
487  t2--; \
488  } \
489 }
490 
491 #define MACRO_B(which_one) \
492 static inline void B_ ## which_one (X(plan) *ths) \
493 { \
494  INT lprod; /* 'regular bandwidth' of matrix B */ \
495  INT u[ths->d], o[ths->d]; /* multi band with respect to x_j */ \
496  INT t, t2; /* index dimensions */ \
497  INT j; /* index nodes */ \
498  INT l_L, ix; /* index one row of B */ \
499  INT l[ths->d]; /* multi index u<=l<=o (real index of g in array) */ \
500  INT lj[ths->d]; /* multi index 0<=lc<2m+2 */ \
501  INT ll_plain[ths->d+1]; /* postfix plain index in g */ \
502  R phi_prod[ths->d+1]; /* postfix product of PHI */ \
503  R *f, *g; /* local copy */ \
504  R *fj; /* local copy */ \
505  R y[ths->d]; \
506  R fg_psi[ths->d][2*ths->m+2]; \
507  R fg_exp_l[ths->d][2*ths->m+2]; \
508  INT l_fg,lj_fg; \
509  R tmpEXP1, tmpEXP2, tmpEXP2sq, tmp1, tmp2, tmp3; \
510  R ip_w; \
511  INT ip_u; \
512  INT ip_s = ths->K/(ths->m+2); \
513  INT lg_offset[ths->d]; /* offset in g according to u */ \
514  INT count_lg[ths->d]; /* count summands (2m+2) */ \
515 \
516  f = (R*)ths->f; g = (R*)ths->g; \
517 \
518  MACRO_B_init_result_ ## which_one \
519 \
520  if (ths->flags & PRE_FULL_PSI) \
521  { \
522  for (ix = 0, j = 0, fj = f; j < ths->M_total; j++, fj++) \
523  { \
524  for (l_L = 0; l_L < ths->psi_index_f[j]; l_L++, ix++) \
525  { \
526  MACRO_B_PRE_FULL_PSI_compute_ ## which_one; \
527  } \
528  } \
529  return; \
530  } \
531 \
532  phi_prod[0] = K(1.0); \
533  ll_plain[0] = 0; \
534 \
535  for (t = 0, lprod = 1; t < ths->d; t++) \
536  lprod *= (2 * ths->m + 2); \
537 \
538  if (ths->flags & PRE_PSI) \
539  { \
540  for (j = 0, fj = f; j < ths->M_total; j++, fj++) \
541  { \
542  MACRO_init_uo_l_lj_t; \
543  \
544  for (l_L = 0; l_L < lprod; l_L++) \
545  { \
546  MACRO_update_phi_prod_ll_plain(which_one, with_PRE_PSI); \
547  \
548  MACRO_B_compute_ ## which_one; \
549  \
550  MACRO_count_uo_l_lj_t; \
551  } /* for(l_L) */ \
552  } /* for(j) */ \
553  return; \
554  } /* if(PRE_PSI) */ \
555  \
556  if (ths->flags & PRE_FG_PSI) \
557  { \
558  for (t = 0; t < ths->d; t++) \
559  { \
560  tmpEXP2 = EXP(K(-1.0) / ths->b[t]); \
561  tmpEXP2sq = tmpEXP2 * tmpEXP2; \
562  tmp2 = K(1.0); \
563  tmp3 = K(1.0); \
564  fg_exp_l[t][0] = K(1.0); \
565  \
566  for (lj_fg = 1; lj_fg <= (2 * ths->m + 2); lj_fg++) \
567  { \
568  tmp3 = tmp2 * tmpEXP2; \
569  tmp2 *= tmpEXP2sq; \
570  fg_exp_l[t][lj_fg] = fg_exp_l[t][lj_fg-1] * tmp3; \
571  } \
572  } \
573  \
574  for (j = 0, fj = f; j < ths->M_total; j++, fj++) \
575  { \
576  MACRO_init_uo_l_lj_t; \
577  \
578  for (t = 0; t < ths->d; t++) \
579  { \
580  fg_psi[t][0] = ths->psi[2 * (j * ths->d + t)]; \
581  tmpEXP1 = ths->psi[2 * (j * ths->d + t) + 1]; \
582  tmp1 = K(1.0); \
583  \
584  for (l_fg = u[t] + 1, lj_fg = 1; l_fg <= o[t]; l_fg++, lj_fg++) \
585  { \
586  tmp1 *= tmpEXP1; \
587  fg_psi[t][lj_fg] = fg_psi[t][0] * tmp1 * fg_exp_l[t][lj_fg]; \
588  } \
589  } \
590  \
591  for (l_L= 0; l_L < lprod; l_L++) \
592  { \
593  MACRO_update_phi_prod_ll_plain(which_one, with_FG_PSI); \
594  \
595  MACRO_B_compute_ ## which_one; \
596  \
597  MACRO_count_uo_l_lj_t; \
598  } \
599  } \
600  return; \
601  } \
602  \
603  if (ths->flags & FG_PSI) \
604  { \
605  for (t = 0; t < ths->d; t++) \
606  { \
607  tmpEXP2 = EXP(K(-1.0) / ths->b[t]); \
608  tmpEXP2sq = tmpEXP2 * tmpEXP2; \
609  tmp2 = K(1.0); \
610  tmp3 = K(1.0); \
611  fg_exp_l[t][0] = K(1.0); \
612  for (lj_fg = 1; lj_fg <= (2 * ths->m + 2); lj_fg++) \
613  { \
614  tmp3 = tmp2 * tmpEXP2; \
615  tmp2 *= tmpEXP2sq; \
616  fg_exp_l[t][lj_fg] = fg_exp_l[t][lj_fg-1] * tmp3; \
617  } \
618  } \
619  \
620  for (j = 0, fj = f; j < ths->M_total; j++, fj++) \
621  { \
622  MACRO_init_uo_l_lj_t; \
623  \
624  for (t = 0; t < ths->d; t++) \
625  { \
626  fg_psi[t][0] = (PHI((2 * NN(ths->n[t])), (ths->x[j*ths->d+t] - ((R)u[t])/(2 * NN(ths->n[t]))),(t)));\
627  \
628  tmpEXP1 = EXP(K(2.0) * ((2 * NN(ths->n[t])) * ths->x[j * ths->d + t] - u[t]) / ths->b[t]); \
629  tmp1 = K(1.0); \
630  for (l_fg = u[t] + 1, lj_fg = 1; l_fg <= o[t]; l_fg++, lj_fg++) \
631  { \
632  tmp1 *= tmpEXP1; \
633  fg_psi[t][lj_fg] = fg_psi[t][0] * tmp1 * fg_exp_l[t][lj_fg]; \
634  } \
635  } \
636  \
637  for (l_L = 0; l_L < lprod; l_L++) \
638  { \
639  MACRO_update_phi_prod_ll_plain(which_one, with_FG_PSI); \
640  \
641  MACRO_B_compute_ ## which_one; \
642  \
643  MACRO_count_uo_l_lj_t; \
644  } \
645  } \
646  return; \
647  } \
648  \
649  if (ths->flags & PRE_LIN_PSI) \
650  { \
651  for (j = 0, fj = f; j < ths->M_total; j++, fj++) \
652  { \
653  MACRO_init_uo_l_lj_t; \
654  \
655  for (t = 0; t < ths->d; t++) \
656  { \
657  y[t] = (((2 * NN(ths->n[t])) * ths->x[j * ths->d + t] - (R)u[t]) \
658  * ((R)ths->K))/(ths->m + 2); \
659  ip_u = LRINT(FLOOR(y[t])); \
660  ip_w = y[t]-ip_u; \
661  for (l_fg = u[t], lj_fg = 0; l_fg <= o[t]; l_fg++, lj_fg++) \
662  { \
663  fg_psi[t][lj_fg] = ths->psi[(ths->K+1)*t + ABS(ip_u-lj_fg*ip_s)] \
664  * (1-ip_w) + ths->psi[(ths->K+1)*t + ABS(ip_u-lj_fg*ip_s+1)] \
665  * (ip_w); \
666  } \
667  } \
668  \
669  for (l_L = 0; l_L < lprod; l_L++) \
670  { \
671  MACRO_update_phi_prod_ll_plain(which_one, with_FG_PSI); \
672  \
673  MACRO_B_compute_ ## which_one; \
674  \
675  MACRO_count_uo_l_lj_t; \
676  } /* for(l_L) */ \
677  } /* for(j) */ \
678  return; \
679  } /* if(PRE_LIN_PSI) */ \
680  \
681  /* no precomputed psi at all */ \
682  for (j = 0, fj = &f[0]; j < ths->M_total; j++, fj += 1) \
683  { \
684  MACRO_init_uo_l_lj_t; \
685  \
686  for (l_L = 0; l_L < lprod; l_L++) \
687  { \
688  MACRO_update_phi_prod_ll_plain(which_one, without_PRE_PSI); \
689  \
690  MACRO_B_compute_ ## which_one; \
691  \
692  MACRO_count_uo_l_lj_t; \
693  } /* for (l_L) */ \
694  } /* for (j) */ \
695 } /* B */
696 
697 MACRO_B(A)
698 MACRO_B(T)
699 
703 void X(trafo)(X(plan) *ths)
704 {
705  switch(ths->d)
706  {
707  default:
708  {
709  /* use ths->my_fftw_r2r_plan */
710  ths->g_hat = ths->g1;
711  ths->g = ths->g2;
712 
713  /* form \f$ \hat g_k = \frac{\hat f_k}{c_k\left(\phi\right)} \text{ for }
714  * k \in I_N \f$ */
715  TIC(0)
716  D_A(ths);
717  TOC(0)
718 
719  /* Compute by d-variate discrete Fourier transform
720  * \f$ g_l = \sum_{k \in I_N} \hat g_k {\rm e}^{-2\pi {\rm i} \frac{kl}{n}}
721  * \text{ for } l \in I_n \f$ */
722  TIC_FFTW(1)
723  FFTW(execute)(ths->my_fftw_r2r_plan);
724  TOC_FFTW(1)
725 
726  /*if (ths->flags & PRE_FULL_PSI)
727  full_psi__A(ths);*/
728 
729  /* Set \f$ f_j = \sum_{l \in I_n,m(x_j)} g_l \psi\left(x_j-\frac{l}{n}\right)
730  * \text{ for } j=0,\hdots,M-1 \f$ */
731  TIC(2)
732  B_A(ths);
733  TOC(2)
734 
735  /*if (ths->flags & PRE_FULL_PSI)
736  {
737  Y(free)(ths->psi_index_g);
738  Y(free)(ths->psi_index_f);
739  }*/
740  }
741  }
742 } /* trafo */
743 
744 void X(adjoint)(X(plan) *ths)
745 {
746  switch(ths->d)
747  {
748  default:
749  {
750  /* use ths->my_fftw_plan */
751  ths->g_hat = ths->g2;
752  ths->g = ths->g1;
753 
754  /*if (ths->flags & PRE_FULL_PSI)
755  full_psi__T(ths);*/
756 
757  /* Set \f$ g_l = \sum_{j=0}^{M-1} f_j \psi\left(x_j-\frac{l}{n}\right)
758  * \text{ for } l \in I_n,m(x_j) \f$ */
759  TIC(2)
760  B_T(ths);
761  TOC(2)
762 
763  /* Compute by d-variate discrete cosine transform
764  * \f$ \hat g_k = \sum_{l \in I_n} g_l {\rm e}^{-2\pi {\rm i} \frac{kl}{n}}
765  * \text{ for } k \in I_N\f$ */
766  TIC_FFTW(1)
767  FFTW(execute)(ths->my_fftw_r2r_plan);
768  TOC_FFTW(1)
769 
770  /* Form \f$ \hat f_k = \frac{\hat g_k}{c_k\left(\phi\right)} \text{ for }
771  * k \in I_N \f$ */
772  TIC(0)
773  D_T(ths);
774  TOC(0)
775  }
776  }
777 } /* adjoint */
778 
781 static inline void precompute_phi_hut(X(plan) *ths)
782 {
783  INT ks[ths->d]; /* index over all frequencies */
784  INT t; /* index over all dimensions */
785 
786  ths->c_phi_inv = (R**) Y(malloc)((size_t)(ths->d) * sizeof(R*));
787 
788  for (t = 0; t < ths->d; t++)
789  {
790  ths->c_phi_inv[t] = (R*)Y(malloc)((size_t)(ths->N[t] - OFFSET) * sizeof(R));
791 
792  for (ks[t] = 0; ks[t] < ths->N[t] - OFFSET; ks[t]++)
793  {
794  ths->c_phi_inv[t][ks[t]] = (K(1.0) / (PHI_HUT((2 * NN(ths->n[t])), ks[t] + OFFSET, t)));
795  }
796  }
797 } /* phi_hut */
798 
804 void X(precompute_lin_psi)(X(plan) *ths)
805 {
806  INT t;
807  INT j;
808  R step;
810  for (t = 0; t < ths->d; t++)
811  {
812  step = ((R)(ths->m+2)) / (((R)ths->K) * (2 * NN(ths->n[t])));
813 
814  for (j = 0; j <= ths->K; j++)
815  {
816  ths->psi[(ths->K + 1) * t + j] = PHI((2 * NN(ths->n[t])), (j * step), t);
817  } /* for(j) */
818  } /* for(t) */
819 }
820 
821 void X(precompute_fg_psi)(X(plan) *ths)
822 {
823  INT t; /* index over all dimensions */
824  INT u, o; /* depends on x_j */
825 
826 // sort(ths);
827 
828  for (t = 0; t < ths->d; t++)
829  {
830  INT j;
831 // #pragma omp parallel for default(shared) private(j,u,o)
832  for (j = 0; j < ths->M_total; j++)
833  {
834  uo(ths, j, &u, &o, t);
835 
836  ths->psi[2 * (j*ths->d + t)] = (PHI((2 * NN(ths->n[t])),(ths->x[j * ths->d + t] - ((R)u) / (2 * NN(ths->n[t]))),(t)));
837  ths->psi[2 * (j*ths->d + t) + 1] = EXP(K(2.0) * ( (2 * NN(ths->n[t])) * ths->x[j * ths->d + t] - u) / ths->b[t]);
838  } /* for(j) */
839  }
840  /* for(t) */
841 } /* nfft_precompute_fg_psi */
842 
843 void X(precompute_psi)(X(plan) *ths)
844 {
845  INT t; /* index over all dimensions */
846  INT lj; /* index 0<=lj<u+o+1 */
847  INT u, o; /* depends on x_j */
848 
849  //sort(ths);
850 
851  for (t = 0; t < ths->d; t++)
852  {
853  INT j;
854 
855  for (j = 0; j < ths->M_total; j++)
856  {
857  uo(ths, j, &u, &o, t);
858 
859  for(lj = 0; lj < (2 * ths->m + 2); lj++)
860  ths->psi[(j * ths->d + t) * (2 * ths->m + 2) + lj] =
861  (PHI((2 * NN(ths->n[t])), ((ths->x[(j) * ths->d + (t)]) - ((R)(lj + u)) / (K(2.0) * ((R)NN(ths->n[t])))), t));
862  } /* for (j) */
863  } /* for (t) */
864 } /* precompute_psi */
865 
866 void X(precompute_full_psi)(X(plan) *ths)
867 {
868 //#ifdef _OPENMP
869 // sort(ths);
870 //
871 // nfft_precompute_full_psi_omp(ths);
872 //#else
873  INT t, t2; /* index over all dimensions */
874  INT j; /* index over all nodes */
875  INT l_L; /* plain index 0 <= l_L < lprod */
876  INT l[ths->d]; /* multi index u<=l<=o */
877  INT lj[ths->d]; /* multi index 0<=lj<u+o+1 */
878  INT ll_plain[ths->d+1]; /* postfix plain index */
879  INT lprod; /* 'bandwidth' of matrix B */
880  INT u[ths->d], o[ths->d]; /* depends on x_j */
881  INT count_lg[ths->d];
882  INT lg_offset[ths->d];
883 
884  R phi_prod[ths->d+1];
885 
886  INT ix, ix_old;
887 
888  //sort(ths);
889 
890  phi_prod[0] = K(1.0);
891  ll_plain[0] = 0;
892 
893  for (t = 0, lprod = 1; t < ths->d; t++)
894  lprod *= 2 * ths->m + 2;
895 
896  for (j = 0, ix = 0, ix_old = 0; j < ths->M_total; j++)
897  {
898  MACRO_init_uo_l_lj_t;
899 
900  for (l_L = 0; l_L < lprod; l_L++, ix++)
901  {
902  MACRO_update_phi_prod_ll_plain(A, without_PRE_PSI);
903 
904  ths->psi_index_g[ix] = ll_plain[ths->d];
905  ths->psi[ix] = phi_prod[ths->d];
906 
907  MACRO_count_uo_l_lj_t;
908  } /* for (l_L) */
909 
910  ths->psi_index_f[j] = ix - ix_old;
911  ix_old = ix;
912  } /* for(j) */
913 //#endif
914 }
915 
916 void X(precompute_one_psi)(X(plan) *ths)
917 {
918  if(ths->flags & PRE_PSI)
919  X(precompute_psi)(ths);
920  if(ths->flags & PRE_FULL_PSI)
921  X(precompute_full_psi)(ths);
922  if(ths->flags & PRE_FG_PSI)
923  X(precompute_fg_psi)(ths);
924  if(ths->flags & PRE_LIN_PSI)
925  X(precompute_lin_psi)(ths);
926 }
927 
928 static inline void init_help(X(plan) *ths)
929 {
930  INT t; /* index over all dimensions */
931  INT lprod; /* 'bandwidth' of matrix B */
932 
933  if (ths->flags & NFFT_OMP_BLOCKWISE_ADJOINT)
934  ths->flags |= NFFT_SORT_NODES;
935 
936  ths->N_total = intprod(ths->N, OFFSET, ths->d);
937  ths->n_total = intprod(ths->n, 0, ths->d);
938 
939  ths->sigma = (R*)Y(malloc)((size_t)(ths->d) * sizeof(R));
940 
941  for (t = 0; t < ths->d; t++)
942  ths->sigma[t] = ((R)NN(ths->n[t])) / ths->N[t];
943 
944  /* Assign r2r transform kinds for each dimension */
945  ths->r2r_kind = (FFTW(r2r_kind)*)Y(malloc)((size_t)(ths->d) * sizeof (FFTW(r2r_kind)));
946  for (t = 0; t < ths->d; t++)
947  ths->r2r_kind[t] = FOURIER_TRAFO;
948 
949  WINDOW_HELP_INIT;
950 
951  if (ths->flags & MALLOC_X)
952  ths->x = (R*)Y(malloc)((size_t)(ths->d * ths->M_total) * sizeof(R));
953 
954  if (ths->flags & MALLOC_F_HAT)
955  ths->f_hat = (R*)Y(malloc)((size_t)(ths->N_total) * sizeof(R));
956 
957  if (ths->flags & MALLOC_F)
958  ths->f = (R*)Y(malloc)((size_t)(ths->M_total) * sizeof(R));
959 
960  if (ths->flags & PRE_PHI_HUT)
961  precompute_phi_hut(ths);
962 
963  if(ths->flags & PRE_LIN_PSI)
964  {
965  ths->K = (1U<< 10) * (ths->m+2);
966  ths->psi = (R*) Y(malloc)((size_t)((ths->K + 1) * ths->d) * sizeof(R));
967  }
968 
969  if(ths->flags & PRE_FG_PSI)
970  ths->psi = (R*) Y(malloc)((size_t)(ths->M_total * ths->d * 2) * sizeof(R));
971 
972  if (ths->flags & PRE_PSI)
973  ths->psi = (R*) Y(malloc)((size_t)(ths->M_total * ths->d * (2 * ths->m + 2 )) * sizeof(R));
974 
975  if(ths->flags & PRE_FULL_PSI)
976  {
977  for (t = 0, lprod = 1; t < ths->d; t++)
978  lprod *= 2 * ths->m + 2;
979 
980  ths->psi = (R*) Y(malloc)((size_t)(ths->M_total * lprod) * sizeof(R));
981 
982  ths->psi_index_f = (INT*) Y(malloc)((size_t)(ths->M_total) * sizeof(INT));
983  ths->psi_index_g = (INT*) Y(malloc)((size_t)(ths->M_total * lprod) * sizeof(INT));
984  }
985 
986  if (ths->flags & FFTW_INIT)
987  {
988  ths->g1 = (R*)Y(malloc)((size_t)(ths->n_total) * sizeof(R));
989 
990  if (ths->flags & FFT_OUT_OF_PLACE)
991  ths->g2 = (R*) Y(malloc)((size_t)(ths->n_total) * sizeof(R));
992  else
993  ths->g2 = ths->g1;
994 
995  {
996  int *_n = Y(malloc)((size_t)(ths->d) * sizeof(int));
997 
998  for (t = 0; t < ths->d; t++)
999  _n[t] = (int)(ths->n[t]);
1000 
1001  ths->my_fftw_r2r_plan = FFTW(plan_r2r)((int)ths->d, _n, ths->g1, ths->g2, ths->r2r_kind, ths->fftw_flags);
1002  Y(free)(_n);
1003  }
1004  }
1005 
1006 // if(ths->flags & NFFT_SORT_NODES)
1007 // ths->index_x = (INT*) Y(malloc)(sizeof(INT)*2*ths->M_total);
1008 // else
1009 // ths->index_x = NULL;
1010 
1011  ths->mv_trafo = (void (*) (void* ))X(trafo);
1012  ths->mv_adjoint = (void (*) (void* ))X(adjoint);
1013 }
1014 
1015 void X(init)(X(plan) *ths, int d, int *N, int M_total)
1016 {
1017  int t; /* index over all dimensions */
1018 
1019  ths->d = (INT)d;
1020 
1021  ths->N = (INT*) Y(malloc)((size_t)(d) * sizeof(INT));
1022 
1023  for (t = 0; t < d; t++)
1024  ths->N[t] = (INT)N[t];
1025 
1026  ths->M_total = (INT)M_total;
1027 
1028  ths->n = (INT*) Y(malloc)((size_t)(d) * sizeof(INT));
1029 
1030  for (t = 0; t < d; t++)
1031  ths->n[t] = 2 * (Y(next_power_of_2)(ths->N[t]) - 1) + OFFSET;
1032 
1033  ths->m = WINDOW_HELP_ESTIMATE_m;
1034 
1035  if (d > 1)
1036  {
1037 //#ifdef _OPENMP
1038 // ths->flags = PRE_PHI_HUT | PRE_PSI | MALLOC_X| MALLOC_F_HAT | MALLOC_F |
1039 // FFTW_INIT | FFT_OUT_OF_PLACE | NFFT_SORT_NODES |
1040 // NFFT_OMP_BLOCKWISE_ADJOINT;
1041 //#else
1042  ths->flags = PRE_PHI_HUT | PRE_PSI | MALLOC_X| MALLOC_F_HAT | MALLOC_F |
1043  FFTW_INIT | FFT_OUT_OF_PLACE | NFFT_SORT_NODES;
1044 //#endif
1045  }
1046  else
1047  ths->flags = PRE_PHI_HUT | PRE_PSI | MALLOC_X| MALLOC_F_HAT | MALLOC_F |
1048  FFTW_INIT | FFT_OUT_OF_PLACE;
1049 
1050  ths->fftw_flags = FFTW_ESTIMATE | FFTW_DESTROY_INPUT;
1051 
1052  init_help(ths);
1053 }
1054 
1055 void X(init_guru)(X(plan) *ths, int d, int *N, int M_total, int *n, int m,
1056  unsigned flags, unsigned fftw_flags)
1057 {
1058  INT t; /* index over all dimensions */
1059 
1060  ths->d = (INT)d;
1061  ths->M_total = (INT)M_total;
1062  ths->N = (INT*)Y(malloc)((size_t)(ths->d) * sizeof(INT));
1063 
1064  for (t = 0; t < d; t++)
1065  ths->N[t] = (INT)N[t];
1066 
1067  ths->n = (INT*)Y(malloc)((size_t)(ths->d) * sizeof(INT));
1068 
1069  for (t = 0; t < d; t++)
1070  ths->n[t] = (INT)n[t];
1071 
1072  ths->m = (INT)m;
1073 
1074  ths->flags = flags;
1075  ths->fftw_flags = fftw_flags;
1076 
1077  init_help(ths);
1078 }
1079 
1080 void X(init_1d)(X(plan) *ths, int N1, int M_total)
1081 {
1082  int N[1];
1083 
1084  N[0] = N1;
1085 
1086  X(init)(ths, 1, N, M_total);
1087 }
1088 
1089 void X(init_2d)(X(plan) *ths, int N1, int N2, int M_total)
1090 {
1091  int N[2];
1092 
1093  N[0] = N1;
1094  N[1] = N2;
1095 
1096  X(init)(ths, 2, N, M_total);
1097 }
1098 
1099 void X(init_3d)(X(plan) *ths, int N1, int N2, int N3, int M_total)
1100 {
1101  int N[3];
1102 
1103  N[0] = N1;
1104  N[1] = N2;
1105  N[2] = N3;
1106 
1107  X(init)(ths, 3, N, M_total);
1108 }
1109 
1110 const char* X(check)(X(plan) *ths)
1111 {
1112  INT j;
1113 
1114  if (!ths->f)
1115  return "Member f not initialized.";
1116 
1117  if (!ths->x)
1118  return "Member x not initialized.";
1119 
1120  if (!ths->f_hat)
1121  return "Member f_hat not initialized.";
1122 
1123  for (j = 0; j < ths->M_total * ths->d; j++)
1124  {
1125  if ((ths->x[j] < K(0.0)) || (ths->x[j] >= K(0.5)))
1126  {
1127  return "ths->x out of range [0.0,0.5)";
1128  }
1129  }
1130 
1131  for (j = 0; j < ths->d; j++)
1132  {
1133  if (ths->sigma[j] <= 1)
1134  return "Oversampling factor too small";
1135 
1136  if(ths->N[j] - 1 <= ths->m)
1137  return "Polynomial degree N is smaller than cut-off m";
1138 
1139  if(ths->N[j]%2 == 1)
1140  return "polynomial degree N has to be even";
1141  }
1142  return 0;
1143 }
1144 
1145 void X(finalize)(X(plan) *ths)
1146 {
1147  INT t; /* index over dimensions */
1148 
1149 // if(ths->flags & NFFT_SORT_NODES)
1150 // Y(free)(ths->index_x);
1151 
1152  if (ths->flags & FFTW_INIT)
1153  {
1154 #ifdef _OPENMP
1155  #pragma omp critical (nfft_omp_critical_fftw_plan)
1156 #endif
1157  FFTW(destroy_plan)(ths->my_fftw_r2r_plan);
1158 
1159  if (ths->flags & FFT_OUT_OF_PLACE)
1160  Y(free)(ths->g2);
1161 
1162  Y(free)(ths->g1);
1163  }
1164 
1165  if(ths->flags & PRE_FULL_PSI)
1166  {
1167  Y(free)(ths->psi_index_g);
1168  Y(free)(ths->psi_index_f);
1169  Y(free)(ths->psi);
1170  }
1171 
1172  if (ths->flags & PRE_PSI)
1173  Y(free)(ths->psi);
1174 
1175  if(ths->flags & PRE_FG_PSI)
1176  Y(free)(ths->psi);
1177 
1178  if(ths->flags & PRE_LIN_PSI)
1179  Y(free)(ths->psi);
1180 
1181  if (ths->flags & PRE_PHI_HUT)
1182  {
1183  for (t = 0; t < ths->d; t++)
1184  Y(free)(ths->c_phi_inv[t]);
1185  Y(free)(ths->c_phi_inv);
1186  }
1187 
1188  if (ths->flags & MALLOC_F)
1189  Y(free)(ths->f);
1190 
1191  if(ths->flags & MALLOC_F_HAT)
1192  Y(free)(ths->f_hat);
1193 
1194  if (ths->flags & MALLOC_X)
1195  Y(free)(ths->x);
1196 
1197  WINDOW_HELP_FINALIZE;
1198 
1199  Y(free)(ths->N);
1200  Y(free)(ths->n);
1201  Y(free)(ths->sigma);
1202 
1203  Y(free)(ths->r2r_kind);
1204 } /* finalize */
#define TIC(a)
Timing, method works since the inaccurate timer is updated mostly in the measured function...
Definition: infft.h:1415
#define X(name)
Include header for C99 complex datatype.
Definition: fastsum.h:53