![]() |
NFFT
3.3.1
|
00001 /* 00002 * Copyright (c) 2002, 2016 Jens Keiner, Stefan Kunis, Daniel Potts 00003 * 00004 * This program is free software; you can redistribute it and/or modify it under 00005 * the terms of the GNU General Public License as published by the Free Software 00006 * Foundation; either version 2 of the License, or (at your option) any later 00007 * version. 00008 * 00009 * This program is distributed in the hope that it will be useful, but WITHOUT 00010 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 00011 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 00012 * details. 00013 * 00014 * You should have received a copy of the GNU General Public License along with 00015 * this program; if not, write to the Free Software Foundation, Inc., 51 00016 * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 00017 */ 00018 00028 #include "config.h" 00029 00030 #include <stdio.h> 00031 #include <math.h> 00032 #include <string.h> 00033 #include <stdlib.h> 00034 #ifdef HAVE_COMPLEX_H 00035 #include <complex.h> 00036 #endif 00037 00038 #include "nfft3.h" 00039 #include "infft.h" 00040 00041 typedef struct 00042 { 00043 NFFT(plan) p; /* used for fftw and data */ 00044 INT *idx0; /* index of next neighbour of x_j on the oversampled regular grid */ 00045 R *deltax0; /* distance to the grid point */ 00046 } taylor_plan; 00047 00059 static void taylor_init(taylor_plan *ths, int N, int M, int n, int m) 00060 { 00061 /* Note: no nfft precomputation! */ 00062 NFFT(init_guru)((NFFT(plan)*) ths, 1, &N, M, &n, m, 00063 MALLOC_X | MALLOC_F_HAT | MALLOC_F | FFTW_INIT | FFT_OUT_OF_PLACE, 00064 FFTW_ESTIMATE | FFTW_PRESERVE_INPUT); 00065 00066 ths->idx0 = (INT*) NFFT(malloc)((size_t)(M) * sizeof(INT)); 00067 ths->deltax0 = (R*) NFFT(malloc)((size_t)(M) * sizeof(R)); 00068 } 00069 00077 static void taylor_precompute(taylor_plan *ths) 00078 { 00079 INT j; 00080 00081 NFFT(plan)* cths = (NFFT(plan)*) ths; 00082 00083 for (j = 0; j < cths->M_total; j++) 00084 { 00085 ths->idx0[j] = (LRINT(ROUND((cths->x[j] + K(0.5)) * (R)(cths->n[0]))) 00086 + cths->n[0] / 2) % cths->n[0]; 00087 ths->deltax0[j] = cths->x[j] 00088 - (ROUND((cths->x[j] + K(0.5)) * (R)(cths->n[0])) / (R)(cths->n[0]) - K(0.5)); 00089 } 00090 } 00091 00099 static void taylor_finalize(taylor_plan *ths) 00100 { 00101 NFFT(free)(ths->deltax0); 00102 NFFT(free)(ths->idx0); 00103 00104 NFFT(finalize)((NFFT(plan)*) ths); 00105 } 00106 00117 static void taylor_trafo(taylor_plan *ths) 00118 { 00119 INT j, k, l, ll; 00120 C *f, *f_hat, *g1; 00121 R *deltax; 00122 INT *idx; 00123 00124 NFFT(plan) *cths = (NFFT(plan)*) ths; 00125 00126 for (j = 0, f = cths->f; j < cths->M_total; j++) 00127 *f++ = K(0.0); 00128 00129 for (k = 0; k < cths->n_total; k++) 00130 cths->g1[k] = K(0.0); 00131 00132 for (k = -cths->N_total / 2, g1 = cths->g1 + cths->n_total 00133 - cths->N_total / 2, f_hat = cths->f_hat; k < 0; k++) 00134 (*g1++) = CPOW(-K2PI * II * (R)(k), (R)(cths->m)) * (*f_hat++); 00135 00136 cths->g1[0] = cths->f_hat[cths->N_total / 2]; 00137 00138 for (k = 1, g1 = cths->g1 + 1, f_hat = cths->f_hat + cths->N_total / 2 + 1; 00139 k < cths->N_total / 2; k++) 00140 (*g1++) = CPOW(-K2PI * II * (R)(k), (R)(cths->m)) * (*f_hat++); 00141 00142 for (l = cths->m - 1; l >= 0; l--) 00143 { 00144 for (k = -cths->N_total / 2, g1 = cths->g1 + cths->n_total 00145 - cths->N_total / 2; k < 0; k++) 00146 (*g1++) /= (-K2PI * II * (R)(k)); 00147 00148 for (k = 1, g1 = cths->g1 + 1; k < cths->N_total / 2; k++) 00149 (*g1++) /= (-K2PI * II * (R)(k)); 00150 00151 FFTW(execute)(cths->my_fftw_plan1); 00152 00153 ll = (l == 0 ? 1 : l); 00154 00155 for (j = 0, f = cths->f, deltax = ths->deltax0, idx = ths->idx0; 00156 j < cths->M_total; j++, f++) 00157 (*f) = ((*f) * (*deltax++) + cths->g2[*idx++]) / (R)(ll); 00158 } 00159 } 00160 00174 static void taylor_time_accuracy(int N, int M, int n, int m, int n_taylor, 00175 int m_taylor, unsigned test_accuracy) 00176 { 00177 int r; 00178 R t_ndft, t_nfft, t_taylor, t; 00179 C *swapndft = NULL; 00180 ticks t0, t1; 00181 00182 taylor_plan tp; 00183 NFFT(plan) np; 00184 00185 printf("%d\t%d\t", N, M); 00186 00187 taylor_init(&tp, N, M, n_taylor, m_taylor); 00188 00189 NFFT(init_guru)(&np, 1, &N, M, &n, m, 00190 PRE_PHI_HUT | PRE_FG_PSI | FFTW_INIT | FFT_OUT_OF_PLACE, 00191 FFTW_ESTIMATE | FFTW_DESTROY_INPUT); 00192 00193 /* share nodes, input, and output vectors */ 00194 np.x = tp.p.x; 00195 np.f_hat = tp.p.f_hat; 00196 np.f = tp.p.f; 00197 00198 /* output vector ndft */ 00199 if (test_accuracy) 00200 swapndft = (C*) NFFT(malloc)((size_t)(M) * sizeof(C)); 00201 00202 /* init pseudo random nodes */ 00203 NFFT(vrand_shifted_unit_double)(np.x, np.M_total); 00204 00205 /* nfft precomputation */ 00206 taylor_precompute(&tp); 00207 00208 /* nfft precomputation */ 00209 if (np.flags & PRE_ONE_PSI) 00210 NFFT(precompute_one_psi)(&np); 00211 00212 /* init pseudo random Fourier coefficients */ 00213 NFFT(vrand_unit_complex)(np.f_hat, np.N_total); 00214 00215 /* NDFT */ 00216 if (test_accuracy) 00217 { 00218 CSWAP(np.f, swapndft); 00219 00220 t_ndft = K(0.0); 00221 r = 0; 00222 while (t_ndft < K(0.01)) 00223 { 00224 r++; 00225 t0 = getticks(); 00226 NFFT(trafo_direct)(&np); 00227 t1 = getticks(); 00228 t = NFFT(elapsed_seconds)(t1, t0); 00229 t_ndft += t; 00230 } 00231 t_ndft /= (R)(r); 00232 00233 CSWAP(np.f, swapndft); 00234 printf("%.2" __FES__ "\t", t_ndft); 00235 } 00236 else 00237 printf("NaN\t"); 00238 00239 /* NFFT */ 00240 t_nfft = K(0.0); 00241 r = 0; 00242 while (t_nfft < K(0.01)) 00243 { 00244 r++; 00245 t0 = getticks(); 00246 NFFT(trafo)(&np); 00247 t1 = getticks(); 00248 t = NFFT(elapsed_seconds)(t1, t0); 00249 t_nfft += t; 00250 } 00251 t_nfft /= (R)(r); 00252 00253 printf("%.2" __FES__ "\t%d\t%.2" __FES__ "\t", ((R)(n)) / ((R)(N)), m, t_nfft); 00254 00255 if (test_accuracy) 00256 printf("%.2" __FES__ "\t", NFFT(error_l_infty_complex)(swapndft, np.f, np.M_total)); 00257 else 00258 printf("NaN\t"); 00259 00261 t_taylor = K(0.0); 00262 r = 0; 00263 while (t_taylor < K(0.01)) 00264 { 00265 r++; 00266 t0 = getticks(); 00267 taylor_trafo(&tp); 00268 t1 = getticks(); 00269 t = NFFT(elapsed_seconds)(t1, t0); 00270 t_taylor += t; 00271 } 00272 t_taylor /= (R)(r); 00273 00274 printf("%.2" __FES__ "\t%d\t%.2" __FES__ "\t", ((R)(n_taylor)) / ((R)(N)), m_taylor, t_taylor); 00275 00276 if (test_accuracy) 00277 printf("%.2" __FES__ "\n", NFFT(error_l_infty_complex)(swapndft, np.f, np.M_total)); 00278 else 00279 printf("NaN\n"); 00280 00281 fflush(stdout); 00282 00283 /* finalise */ 00284 if (test_accuracy) 00285 NFFT(free)(swapndft); 00286 00287 NFFT(finalize)(&np); 00288 taylor_finalize(&tp); 00289 } 00290 00291 int main(int argc, char **argv) 00292 { 00293 int l, m, trial; 00294 00295 if (argc <= 2) 00296 { 00297 fprintf(stderr, 00298 "taylor_nfft type first last trials sigma_nfft sigma_taylor.\n"); 00299 return EXIT_FAILURE; 00300 } 00301 00302 fprintf(stderr, "Testing the Nfft & a Taylor expansion based version.\n\n"); 00303 fprintf(stderr, "Columns: N, M, t_ndft, sigma_nfft, m_nfft, t_nfft, e_nfft"); 00304 fprintf(stderr, ", sigma_taylor, m_taylor, t_taylor, e_taylor\n"); 00305 00306 /* time vs. N = M */ 00307 if (atoi(argv[1]) == 0) 00308 { 00309 fprintf(stderr, "Fixed target accuracy, timings.\n\n"); 00310 int arg2 = atoi(argv[2]); 00311 int arg3 = atoi(argv[3]); 00312 int arg4 = atoi(argv[4]); 00313 for (l = arg2; l <= arg3; l++) 00314 { 00315 int N = (int)(1U << l); 00316 int M = (int)(1U << l); 00317 int arg5 = (int)(atof(argv[5]) * N); 00318 int arg6 = (int)(atof(argv[6]) * N); 00319 for (trial = 0; trial < arg4; trial++) 00320 { 00321 taylor_time_accuracy(N, M, arg5, 6, arg6, 6, l <= 10 ? 1 : 0); 00322 } 00323 } 00324 } 00325 00326 /* error vs. m */ 00327 if (atoi(argv[1]) == 1) 00328 { 00329 int arg2 = atoi(argv[2]); 00330 int arg3 = atoi(argv[3]); 00331 int arg4 = atoi(argv[4]); 00332 int N = atoi(argv[7]); 00333 int arg5 = (int) (atof(argv[5]) * N); 00334 int arg6 = (int) (atof(argv[6]) * N); 00335 fprintf(stderr, "Fixed N=M=%d, error vs. m.\n\n", N); 00336 for (m = arg2; m <= arg3; m++) 00337 { 00338 for (trial = 0; trial < arg4; trial++) 00339 { 00340 taylor_time_accuracy(N, N, arg5, m, arg6, m, 1); 00341 } 00342 } 00343 } 00344 00345 return EXIT_SUCCESS; 00346 }