![]() |
NFFT
3.3.1
|
00001 /* 00002 * Copyright (c) 2002, 2016 Jens Keiner, Stefan Kunis, Daniel Potts 00003 * 00004 * This program is free software; you can redistribute it and/or modify it under 00005 * the terms of the GNU General Public License as published by the Free Software 00006 * Foundation; either version 2 of the License, or (at your option) any later 00007 * version. 00008 * 00009 * This program is distributed in the hope that it will be useful, but WITHOUT 00010 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 00011 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 00012 * details. 00013 * 00014 * You should have received a copy of the GNU General Public License along with 00015 * this program; if not, write to the Free Software Foundation, Inc., 51 00016 * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 00017 */ 00018 #include <stdio.h> 00019 #include <stdlib.h> 00020 #include <string.h> 00021 #include <unistd.h> 00022 00023 #include "config.h" 00024 00025 #include "nfft3.h" 00026 #include "infft.h" 00027 00028 #define NREPEAT 5 00029 00030 #if defined(_WIN32) || defined(_WIN64) 00031 const char *CMD_CREATEDATASET = "fastsum_benchomp_createdataset.exe"; 00032 const char *CMD_DETAIL_SINGLE = "fastsum_benchomp_detail_single.exe"; 00033 const char *CMD_DETAIL_THREADS = "fastsum_benchomp_detail_threads.exe"; 00034 #else 00035 const char *CMD_CREATEDATASET = "./fastsum_benchomp_createdataset"; 00036 const char *CMD_DETAIL_SINGLE = "./fastsum_benchomp_detail_single"; 00037 const char *CMD_DETAIL_THREADS = "./fastsum_benchomp_detail_threads"; 00038 #endif 00039 00040 static FILE* file_out_tex = NULL; 00041 00042 int get_nthreads_array(int **arr) 00043 { 00044 int max_threads = NFFT(get_num_threads)(); 00045 int alloc_num = 2; 00046 int k; 00047 int ret_number = 0; 00048 int max_threads_pw2 = (max_threads / 2) * 2 == max_threads ? 1 : 0; 00049 00050 if (max_threads <= 5) 00051 { 00052 *arr = (int*) NFFT(malloc)((size_t) (max_threads) * sizeof(int)); 00053 for (k = 0; k < max_threads; k++) 00054 *(*arr + k) = k + 1; 00055 return max_threads; 00056 } 00057 00058 for (k = 1; k <= max_threads; k *= 2, alloc_num++) 00059 ; 00060 00061 *arr = (int*) NFFT(malloc)((size_t)(alloc_num) * sizeof(int)); 00062 00063 for (k = 1; k <= max_threads; k *= 2) 00064 { 00065 if (k != max_threads && 2 * k > max_threads && max_threads_pw2) 00066 { 00067 *(*arr + ret_number) = max_threads / 2; 00068 ret_number++; 00069 } 00070 00071 *(*arr + ret_number) = k; 00072 ret_number++; 00073 00074 if (k != max_threads && 2 * k > max_threads) 00075 { 00076 *(*arr + ret_number) = max_threads; 00077 ret_number++; 00078 break; 00079 } 00080 } 00081 00082 return ret_number; 00083 } 00084 00085 void check_result_value(const int val, const int ok, const char *msg) 00086 { 00087 if (val != ok) 00088 { 00089 fprintf(stderr, "ERROR %s: %d not %d\n", msg, val, ok); 00090 00091 exit(EXIT_FAILURE); 00092 } 00093 } 00094 00095 void run_test_create(int d, int L, int M) 00096 { 00097 char cmd[1025]; 00098 00099 snprintf(cmd, 1024, 00100 "%s %d %d %d > fastsum_benchomp_test.data", 00101 CMD_CREATEDATASET, d, L, M); 00102 fprintf(stderr, "%s\n", cmd); 00103 check_result_value(system(cmd), 0, "createdataset"); 00104 } 00105 00106 void run_test_init_output() 00107 { 00108 FILE *f = fopen("fastsum_benchomp_test.result", "w"); 00109 if (f != NULL) 00110 fclose(f); 00111 } 00112 00113 typedef struct 00114 { 00115 int d; 00116 int L; 00117 int M; 00118 int n; 00119 int m; 00120 int p; 00121 char *kernel_name; 00122 R c; 00123 R eps_I; 00124 R eps_B; 00125 } s_param; 00126 00127 typedef struct 00128 { 00129 R avg; 00130 R min; 00131 R max; 00132 } s_resval; 00133 00134 typedef struct 00135 { 00136 int nthreads; 00137 s_resval resval[16]; 00138 } s_result; 00139 00140 typedef struct 00141 { 00142 s_param param; 00143 s_result *results; 00144 int nresults; 00145 } s_testset; 00146 00147 void run_test(s_resval *res, int nrepeat, int n, int m, int p, 00148 char *kernel_name, R c, R eps_I, R eps_B, int nthreads) 00149 { 00150 char cmd[1025]; 00151 int r, t; 00152 00153 for (t = 0; t < 16; t++) 00154 { 00155 res[t].avg = K(0.0); 00156 res[t].min = K(1.0) / K(0.0); 00157 res[t].max = K(0.0); 00158 } 00159 00160 if (nthreads < 2) 00161 snprintf(cmd, 1024, 00162 "%s %d %d %d %s " __FR__ " " __FR__ " " __FR__ " < fastsum_benchomp_test.data > fastsum_benchomp_test.out", 00163 CMD_DETAIL_SINGLE, n, m, p, kernel_name, c, eps_I, eps_B); 00164 else 00165 snprintf(cmd, 1024, 00166 "%s %d %d %d %s " __FR__ " " __FR__ " " __FR__ " %d < fastsum_benchomp_test.data > fastsum_benchomp_test.out", 00167 CMD_DETAIL_THREADS, n, m, p, kernel_name, c, eps_I, eps_B, nthreads); 00168 fprintf(stderr, "%s\n", cmd); 00169 check_result_value(system(cmd), 0, cmd); 00170 00171 for (r = 0; r < nrepeat; r++) 00172 { 00173 int retval; 00174 R v[16]; 00175 FILE *f; 00176 check_result_value(system(cmd), 0, cmd); 00177 f = fopen("fastsum_benchomp_test.out", "r"); 00178 retval = fscanf(f, 00179 "" __FR__ " " __FR__ " " __FR__ " " __FR__ " " __FR__ " " __FR__ " " __FR__ " " __FR__ " " __FR__ " " __FR__ " " __FR__ " " __FR__ " " __FR__ " " __FR__ " " __FR__ " " __FR__ "", v, 00180 v + 1, v + 2, v + 3, v + 4, v + 5, v + 6, v + 7, v + 8, v + 9, v + 10, 00181 v + 11, v + 12, v + 13, v + 14, v + 15); 00182 check_result_value(retval, 16, "read fastsum_benchomp_test.out"); 00183 fclose(f); 00184 00185 for (t = 0; t < 16; t++) 00186 { 00187 res[t].avg += v[t]; 00188 if (res[t].min > v[t]) 00189 res[t].min = v[t]; 00190 if (res[t].max < v[t]) 00191 res[t].max = v[t]; 00192 } 00193 } 00194 00195 for (t = 0; t < 16; t++) 00196 res[t].avg /= (R)(nrepeat); 00197 00198 fprintf(stderr, "%d %d: ", nthreads, nrepeat); 00199 for (t = 0; t < 16; t++) 00200 fprintf(stderr, "%.3" __FES__ " %.3" __FES__ " %.3" __FES__ " | ", res[t].avg, res[t].min, res[t].max); 00201 fprintf(stderr, "\n"); 00202 } 00203 00204 const char *get_psi_string(int flags) 00205 { 00206 if (flags & PRE_PSI) 00207 return "prepsi"; 00208 else if (flags & PRE_ONE_PSI) 00209 return "unknownPSI"; 00210 00211 return "nopsi"; 00212 } 00213 const char *get_sort_string(int flags) 00214 { 00215 if (flags & NFFT_OMP_BLOCKWISE_ADJOINT) 00216 return ""; 00217 00218 if (flags & NFFT_SORT_NODES) 00219 return "sorted"; 00220 00221 return "unsorted"; 00222 } 00223 00224 const char *get_adjoint_omp_string(int flags) 00225 { 00226 if (flags & NFFT_OMP_BLOCKWISE_ADJOINT) 00227 return "blockwise"; 00228 00229 return ""; 00230 } 00231 00232 #define MASK_FSUM_D (1U<<0) 00233 #define MASK_FSUM_L (1U<<1) 00234 #define MASK_FSUM_M (1U<<2) 00235 #define MASK_FSUM_MULTIBW (1U<<3) 00236 #define MASK_FSUM_WINM (1U<<4) 00237 #define MASK_FSUM_P (1U<<5) 00238 #define MASK_FSUM_KERNEL (1U<<6) 00239 #define MASK_FSUM_EPSI (1U<<7) 00240 #define MASK_FSUM_EPSB (1U<<8) 00241 00242 unsigned int fastsum_determine_different_parameters(s_testset *testsets, 00243 int ntestsets) 00244 { 00245 int t; 00246 unsigned int mask = 0; 00247 00248 if (ntestsets < 2) 00249 return 0; 00250 00251 for (t = 1; t < ntestsets; t++) 00252 { 00253 if (testsets[t - 1].param.d != testsets[t].param.d) 00254 mask |= MASK_FSUM_D; 00255 if (testsets[t - 1].param.L != testsets[t].param.L) 00256 mask |= MASK_FSUM_L; 00257 if (testsets[t - 1].param.M != testsets[t].param.M) 00258 mask |= MASK_FSUM_M; 00259 if (testsets[t - 1].param.n != testsets[t].param.n) 00260 mask |= MASK_FSUM_MULTIBW; 00261 if (testsets[t - 1].param.m != testsets[t].param.m) 00262 mask |= MASK_FSUM_WINM; 00263 if (testsets[t - 1].param.p != testsets[t].param.p) 00264 mask |= MASK_FSUM_P; 00265 if (strcmp(testsets[t - 1].param.kernel_name, testsets[t].param.kernel_name) 00266 != 0) 00267 mask |= MASK_FSUM_KERNEL; 00268 if (testsets[t - 1].param.eps_I != testsets[t].param.eps_I) 00269 mask |= MASK_FSUM_EPSI; 00270 if (testsets[t - 1].param.eps_B != testsets[t].param.eps_B) 00271 mask |= MASK_FSUM_EPSB; 00272 } 00273 00274 return mask; 00275 } 00276 00277 void strEscapeUnderscore(char *dst, char *src, int maxlen) 00278 { 00279 int i = 0; 00280 int len; 00281 int offset = 0; 00282 00283 while (src[i] != '\0' && len + offset < maxlen - 1) 00284 { 00285 if (src[i] == '_') 00286 len = snprintf(dst + offset, maxlen - offset, "\\_{}"); 00287 else 00288 len = snprintf(dst + offset, maxlen - offset, "%c", src[i]); 00289 offset += len; 00290 i++; 00291 } 00292 } 00293 00294 void fastsum_get_plot_title_minus_indep(char *outstr, int maxlen, 00295 char *hostname, s_param param, unsigned int diff_mask) 00296 { 00297 unsigned int mask = ~diff_mask; 00298 int offset = 0; 00299 int len; 00300 00301 len = snprintf(outstr, maxlen, "%s", hostname); 00302 if (len < 0 || len + offset >= maxlen - 1) 00303 return; 00304 offset += len; 00305 00306 if (mask & MASK_FSUM_D) 00307 { 00308 len = snprintf(outstr + offset, maxlen - offset, " %dd fastsum", param.d); 00309 if (len < 0 || len + offset >= maxlen - 1) 00310 return; 00311 offset += len; 00312 } 00313 00314 if ((mask & (MASK_FSUM_L | MASK_FSUM_M)) && param.L == param.M) 00315 { 00316 len = snprintf(outstr + offset, maxlen - offset, " L=M=%d", param.L); 00317 if (len < 0 || len + offset >= maxlen - 1) 00318 return; 00319 offset += len; 00320 } 00321 else 00322 { 00323 if (mask & MASK_FSUM_L) 00324 { 00325 len = snprintf(outstr + offset, maxlen - offset, " L=%d", param.L); 00326 if (len < 0 || len + offset >= maxlen - 1) 00327 return; 00328 offset += len; 00329 } 00330 00331 if (mask & MASK_FSUM_M) 00332 { 00333 len = snprintf(outstr + offset, maxlen - offset, " M=%d", param.M); 00334 if (len < 0 || len + offset >= maxlen - 1) 00335 return; 00336 offset += len; 00337 } 00338 } 00339 00340 if (mask & MASK_FSUM_MULTIBW) 00341 { 00342 len = snprintf(outstr + offset, maxlen - offset, " n=%d", param.n); 00343 if (len < 0 || len + offset >= maxlen - 1) 00344 return; 00345 offset += len; 00346 } 00347 00348 if (mask & MASK_FSUM_WINM) 00349 { 00350 len = snprintf(outstr + offset, maxlen - offset, " m=%d", param.m); 00351 if (len < 0 || len + offset >= maxlen - 1) 00352 return; 00353 offset += len; 00354 } 00355 00356 if (mask & MASK_FSUM_P) 00357 { 00358 len = snprintf(outstr + offset, maxlen - offset, " p=%d", param.p); 00359 if (len < 0 || len + offset >= maxlen - 1) 00360 return; 00361 offset += len; 00362 } 00363 00364 if (mask & MASK_FSUM_KERNEL) 00365 { 00366 char tmp[maxlen]; 00367 strEscapeUnderscore(tmp, param.kernel_name, maxlen); 00368 00369 len = snprintf(outstr + offset, maxlen - offset, " %s", tmp); 00370 if (len < 0 || len + offset >= maxlen - 1) 00371 return; 00372 offset += len; 00373 } 00374 00375 if ((mask & (MASK_FSUM_EPSI | MASK_FSUM_EPSB)) && param.eps_I == param.eps_B) 00376 { 00377 len = snprintf(outstr + offset, maxlen - offset, 00378 " $\\varepsilon_\\mathrm{I}$=$\\varepsilon_\\mathrm{B}$=%" __FGS__ "", 00379 param.eps_I); 00380 if (len < 0 || len + offset >= maxlen - 1) 00381 return; 00382 offset += len; 00383 } 00384 else 00385 { 00386 if (mask & MASK_FSUM_EPSI) 00387 { 00388 len = snprintf(outstr + offset, maxlen - offset, 00389 " $\\varepsilon_\\mathrm{I}$=%" __FGS__ "", param.eps_I); 00390 if (len < 0 || len + offset >= maxlen - 1) 00391 return; 00392 offset += len; 00393 } 00394 00395 if (mask & MASK_FSUM_EPSB) 00396 { 00397 len = snprintf(outstr + offset, maxlen - offset, 00398 " $\\varepsilon_\\mathrm{B}$=%" __FGS__ "", param.eps_B); 00399 if (len < 0 || len + offset >= maxlen - 1) 00400 return; 00401 offset += len; 00402 } 00403 } 00404 } 00405 00406 void nfft_adjoint_print_output_histo_DFBRT(FILE *out, s_testset testset) 00407 { 00408 int i, size = testset.nresults; 00409 char hostname[1025]; 00410 00411 #ifdef HAVE_GETHOSTNAME 00412 if (gethostname(hostname, 1024) != 0) 00413 #endif 00414 strncpy(hostname, "unnamed", 1024); 00415 00416 fprintf(out, "\\begin{tikzpicture}\n"); 00417 fprintf(out, "\\begin{axis}["); 00418 fprintf(out, "width=0.9\\textwidth, height=0.6\\textwidth, "); 00419 fprintf(out, "symbolic x coords={"); 00420 for (i = 0; i < size; i++) 00421 if (i > 0) 00422 fprintf(out, ",%d", testset.results[i].nthreads); 00423 else 00424 fprintf(out, "%d", testset.results[i].nthreads); 00425 00426 fprintf(out, 00427 "}, x tick label style={ /pgf/number format/1000 sep=}, xlabel=Number of threads, ylabel=Time in s, xtick=data, legend style={legend columns=-1}, ybar, bar width=7pt, ymajorgrids=true, yminorgrids=true, minor y tick num=1, "); 00428 fprintf(out, 00429 " title={%s %dd $\\textrm{NFFT}^\\top$ N=%d $\\sigma$=2 M=%d m=%d prepsi sorted}", 00430 hostname, testset.param.d, testset.param.n, testset.param.M, 00431 testset.param.m); 00432 fprintf(out, " ]\n"); 00433 fprintf(out, "\\addplot coordinates {"); 00434 for (i = 0; i < size; i++) 00435 fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads, 00436 testset.results[i].resval[10].avg); 00437 fprintf(out, "};\n"); 00438 00439 fprintf(out, "\\addplot coordinates {"); 00440 for (i = 0; i < size; i++) 00441 fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads, 00442 testset.results[i].resval[11].avg); 00443 fprintf(out, "};\n"); 00444 00445 fprintf(out, "\\addplot coordinates {"); 00446 for (i = 0; i < size; i++) 00447 fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads, 00448 testset.results[i].resval[12].avg); 00449 fprintf(out, "};\n"); 00450 00451 fprintf(out, "\\addplot coordinates {"); 00452 for (i = 0; i < size; i++) 00453 fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads, 00454 testset.results[i].resval[1].avg); 00455 fprintf(out, "};\n"); 00456 00457 fprintf(out, "\\addplot coordinates {"); 00458 for (i = 0; i < size; i++) 00459 fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads, 00460 testset.results[i].resval[4].avg + testset.results[i].resval[1].avg); 00461 fprintf(out, "};\n"); 00462 fprintf(out, 00463 "\\legend{D,$\\textrm{F}^\\top$,$\\textrm{B}^\\top$,prepsi,total}\n"); 00464 fprintf(out, "\\end{axis}\n"); 00465 fprintf(out, "\\end{tikzpicture}\n"); 00466 fprintf(out, "\n\n"); 00467 00468 fflush(out); 00469 } 00470 00471 void nfft_trafo_print_output_histo_DFBRT(FILE *out, s_testset testset) 00472 { 00473 int i, size = testset.nresults; 00474 char hostname[1025]; 00475 00476 #ifdef HAVE_GETHOSTNAME 00477 if (gethostname(hostname, 1024) != 0) 00478 #endif 00479 strncpy(hostname, "unnamed", 1024); 00480 00481 fprintf(out, "\\begin{tikzpicture}\n"); 00482 fprintf(out, "\\begin{axis}["); 00483 fprintf(out, "width=0.9\\textwidth, height=0.6\\textwidth, "); 00484 fprintf(out, "symbolic x coords={"); 00485 for (i = 0; i < size; i++) 00486 if (i > 0) 00487 fprintf(out, ",%d", testset.results[i].nthreads); 00488 else 00489 fprintf(out, "%d", testset.results[i].nthreads); 00490 00491 fprintf(out, 00492 "}, x tick label style={ /pgf/number format/1000 sep=}, xlabel=Number of threads, ylabel=Time in s, xtick=data, legend style={legend columns=-1}, ybar, bar width=7pt, ymajorgrids=true, yminorgrids=true, minor y tick num=1, "); 00493 fprintf(out, 00494 " title={%s %dd $\\textrm{NFFT}$ N=%d $\\sigma$=2 M=%d m=%d prepsi sorted}", 00495 hostname, testset.param.d, testset.param.n, testset.param.M, 00496 testset.param.m); 00497 fprintf(out, " ]\n"); 00498 fprintf(out, "\\addplot coordinates {"); 00499 for (i = 0; i < size; i++) 00500 fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads, 00501 testset.results[i].resval[13].avg); 00502 fprintf(out, "};\n"); 00503 00504 fprintf(out, "\\addplot coordinates {"); 00505 for (i = 0; i < size; i++) 00506 fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads, 00507 testset.results[i].resval[14].avg); 00508 fprintf(out, "};\n"); 00509 00510 fprintf(out, "\\addplot coordinates {"); 00511 for (i = 0; i < size; i++) 00512 fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads, 00513 testset.results[i].resval[15].avg); 00514 fprintf(out, "};\n"); 00515 00516 fprintf(out, "\\addplot coordinates {"); 00517 for (i = 0; i < size; i++) 00518 fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads, 00519 testset.results[i].resval[2].avg); 00520 fprintf(out, "};\n"); 00521 00522 fprintf(out, "\\addplot coordinates {"); 00523 for (i = 0; i < size; i++) 00524 fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads, 00525 testset.results[i].resval[6].avg + testset.results[i].resval[2].avg); 00526 fprintf(out, "};\n"); 00527 fprintf(out, "\\legend{D,F,B,prepsi,total}\n"); 00528 fprintf(out, "\\end{axis}\n"); 00529 fprintf(out, "\\end{tikzpicture}\n"); 00530 fprintf(out, "\n\n"); 00531 00532 fflush(out); 00533 } 00534 00535 void fastsum_print_output_histo_PreRfNfT(FILE *out, s_testset testset) 00536 { 00537 int i, size = testset.nresults; 00538 char hostname[1025]; 00539 char plottitle[1025]; 00540 00541 #ifdef HAVE_GETHOSTNAME 00542 if (gethostname(hostname, 1024) != 0) 00543 #endif 00544 strncpy(hostname, "unnamed", 1024); 00545 00546 fastsum_get_plot_title_minus_indep(plottitle, 1024, hostname, testset.param, 00547 0); 00548 00549 fprintf(out, "\\begin{tikzpicture}\n"); 00550 fprintf(out, "\\begin{axis}["); 00551 fprintf(out, "width=0.9\\textwidth, height=0.6\\textwidth, "); 00552 fprintf(out, "symbolic x coords={"); 00553 for (i = 0; i < size; i++) 00554 if (i > 0) 00555 fprintf(out, ",%d", testset.results[i].nthreads); 00556 else 00557 fprintf(out, "%d", testset.results[i].nthreads); 00558 00559 fprintf(out, 00560 "}, x tick label style={ /pgf/number format/1000 sep=}, xlabel=Number of threads, ylabel=Time in s, xtick=data, legend style={legend columns=1}, ybar, bar width=7pt, ymajorgrids=true, yminorgrids=true, minor y tick num=1, "); 00561 fprintf(out, " title={%s}", plottitle); 00562 fprintf(out, " ]\n"); 00563 fprintf(out, "\\addplot coordinates {"); 00564 for (i = 0; i < size; i++) 00565 fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads, 00566 testset.results[i].resval[1].avg + testset.results[i].resval[2].avg); 00567 fprintf(out, "};\n"); 00568 00569 fprintf(out, "\\addplot coordinates {"); 00570 for (i = 0; i < size; i++) 00571 fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads, 00572 testset.results[i].resval[3].avg); 00573 fprintf(out, "};\n"); 00574 00575 fprintf(out, "\\addplot coordinates {"); 00576 for (i = 0; i < size; i++) 00577 fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads, 00578 testset.results[i].resval[4].avg + testset.results[i].resval[5].avg 00579 + testset.results[i].resval[6].avg); 00580 fprintf(out, "};\n"); 00581 00582 fprintf(out, "\\addplot coordinates {"); 00583 for (i = 0; i < size; i++) 00584 fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads, 00585 testset.results[i].resval[7].avg); 00586 fprintf(out, "};\n"); 00587 00588 fprintf(out, "\\addplot coordinates {"); 00589 for (i = 0; i < size; i++) 00590 fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads, 00591 testset.results[i].resval[9].avg - testset.results[i].resval[0].avg); 00592 fprintf(out, "};\n"); 00593 fprintf(out, 00594 "\\legend{prepsi (step 1b),init nearfield (step 1c),far field (steps 2a-c),nearfield (step 2d),total $-$ step 1a}\n"); 00595 fprintf(out, "\\end{axis}\n"); 00596 fprintf(out, "\\end{tikzpicture}\n"); 00597 fprintf(out, "\n\n"); 00598 00599 fflush(out); 00600 } 00601 00602 void fastsum_print_output_speedup_total_minus_indep(FILE *out, 00603 s_testset *testsets, int ntestsets) 00604 { 00605 int i, t; 00606 char hostname[1025]; 00607 char plottitle[1025]; 00608 unsigned int diff_mask = fastsum_determine_different_parameters(testsets, 00609 ntestsets); 00610 00611 #ifdef HAVE_GETHOSTNAME 00612 if (gethostname(hostname, 1024) != 0) 00613 #endif 00614 strncpy(hostname, "unnamed", 1024); 00615 00616 fastsum_get_plot_title_minus_indep(plottitle, 1024, hostname, 00617 testsets[0].param, diff_mask | MASK_FSUM_WINM); 00618 00619 fprintf(out, "\\begin{tikzpicture}\n"); 00620 fprintf(out, "\\begin{axis}["); 00621 fprintf(out, 00622 "width=0.9\\textwidth, height=0.6\\textwidth, x tick label style={ /pgf/number format/1000 sep=}, xlabel=Number of threads, ylabel=Speedup, xtick=data, legend style={ legend pos = north west, legend columns=1}, ymajorgrids=true, yminorgrids=true, minor y tick num=4, "); 00623 fprintf(out, " title={%s}", plottitle); 00624 fprintf(out, " ]\n"); 00625 00626 for (t = 0; t < ntestsets; t++) 00627 { 00628 s_testset testset = testsets[t]; 00629 00630 R tref = K(0.0); 00631 for (i = 0; i < testset.nresults; i++) 00632 if (testset.results[i].nthreads == 1) 00633 tref = testset.results[i].resval[9].avg 00634 - testset.results[i].resval[0].avg; 00635 00636 fprintf(out, "\\addplot coordinates {"); 00637 for (i = 0; i < testset.nresults; i++) 00638 fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads, 00639 tref 00640 / (testset.results[i].resval[9].avg 00641 - testset.results[i].resval[0].avg)); 00642 fprintf(out, "};\n"); 00643 00644 for (i = 0; i < testset.nresults; i++) 00645 { 00646 fprintf(stderr, "%d:%.3" __FIS__ " ", testset.results[i].nthreads, 00647 tref 00648 / (testset.results[i].resval[9].avg 00649 - testset.results[i].resval[0].avg)); 00650 } 00651 fprintf(stderr, "\n\n"); 00652 } 00653 00654 fprintf(out, "\\legend{{"); 00655 for (t = 0; t < ntestsets; t++) 00656 { 00657 char title[256]; 00658 if (t > 0) 00659 fprintf(out, "},{"); 00660 fastsum_get_plot_title_minus_indep(title, 255, "", testsets[t].param, 00661 ~(diff_mask | MASK_FSUM_WINM)); 00662 fprintf(out, "%s", title); 00663 } 00664 fprintf(out, "}}\n"); 00665 fprintf(out, "\\end{axis}\n"); 00666 fprintf(out, "\\end{tikzpicture}\n"); 00667 fprintf(out, "\n\n"); 00668 00669 fflush(out); 00670 } 00671 00672 void run_testset(s_testset *testset, int d, int L, int M, int n, int m, int p, 00673 char *kernel_name, R c, R eps_I, R eps_B, 00674 int *nthreads_array, int n_threads_array_size) 00675 { 00676 int i; 00677 testset->param.d = d; 00678 testset->param.L = L; 00679 testset->param.M = M; 00680 testset->param.n = n; 00681 testset->param.m = m; 00682 testset->param.p = p; 00683 testset->param.kernel_name = kernel_name; 00684 testset->param.c = c; 00685 testset->param.eps_I = eps_I; 00686 testset->param.eps_B = eps_B; 00687 00688 testset->results = (s_result*) NFFT(malloc)( 00689 (size_t)(n_threads_array_size) * sizeof(s_result)); 00690 testset->nresults = n_threads_array_size; 00691 00692 run_test_create(testset->param.d, testset->param.L, testset->param.M); 00693 for (i = 0; i < n_threads_array_size; i++) 00694 { 00695 testset->results[i].nthreads = nthreads_array[i]; 00696 run_test(testset->results[i].resval, NREPEAT, testset->param.n, 00697 testset->param.m, testset->param.p, testset->param.kernel_name, 00698 testset->param.c, testset->param.eps_I, testset->param.eps_B, 00699 testset->results[i].nthreads); 00700 } 00701 00702 } 00703 00704 void test1(int *nthreads_array, int n_threads_array_size) 00705 { 00706 s_testset testsets[1]; 00707 00708 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW 00709 run_testset(&testsets[0], 3, 100000, 100000, 128, 4, 7, "one_over_x", K(0.0), K(0.03125), K(0.03125), nthreads_array, n_threads_array_size); 00710 00711 fastsum_print_output_speedup_total_minus_indep(file_out_tex, testsets, 1); 00712 00713 fastsum_print_output_histo_PreRfNfT(file_out_tex, testsets[0]); 00714 00715 nfft_adjoint_print_output_histo_DFBRT(file_out_tex, testsets[0]); 00716 00717 nfft_trafo_print_output_histo_DFBRT(file_out_tex, testsets[0]); 00718 #endif 00719 } 00720 00721 int main(int argc, char** argv) 00722 { 00723 int *nthreads_array; 00724 int n_threads_array_size = get_nthreads_array(&nthreads_array); 00725 int k; 00726 00727 #if !(defined MEASURE_TIME && defined MEASURE_TIME_FFTW) 00728 fprintf(stderr, "WARNING: Detailed time measurements are not activated.\n"); 00729 fprintf(stderr, "Please re-run the configure script with options\n"); 00730 fprintf(stderr, 00731 "--enable-measure-time --enable-measure-time-fftw --enable-openmp\n"); 00732 fprintf(stderr, "and run \"make clean all\"\n\n"); 00733 #endif 00734 00735 for (k = 0; k < n_threads_array_size; k++) 00736 fprintf(stderr, "%d ", nthreads_array[k]); 00737 fprintf(stderr, "\n"); 00738 00739 file_out_tex = fopen("fastsum_benchomp_results_plots.tex", "w"); 00740 00741 test1(nthreads_array, n_threads_array_size); 00742 00743 fclose(file_out_tex); 00744 00745 return EXIT_SUCCESS; 00746 } 00747