Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
main.cpp
00001 //******************************************************** 00002 //** RC Apha --- based on roylongbottom.org ARM ****** 00003 //** Nucleo-64-144 Stm32F103 to F767 benchmark ****** 00004 //** Limpack -port form Arduino IDE ****** 00005 //** Jovan Ivkovic - 2017 ****** 00006 //******************************************************** 00007 #define GCCARMDP 00008 00009 #define UNROLL 00010 #define DP 00011 00012 #ifdef SP 00013 #define REAL float 00014 #define ZERO 0.0 00015 #define ONE 1.0 00016 #define PREC "Single" 00017 #endif 00018 00019 #ifdef DP 00020 #define REAL double 00021 #define ZERO 0.0e0 00022 #define ONE 1.0e0 00023 #define PREC "Double" 00024 #endif 00025 00026 #ifdef ROLL 00027 #define ROLLING "Rolled" 00028 #endif 00029 #ifdef UNROLL 00030 #define ROLLING "Unrolled" 00031 #endif 00032 00033 // VERSION 00034 00035 #ifdef CNNT 00036 #define options "Non-optimised" 00037 #define opt "0" 00038 #else 00039 // #define options "Optimised" 00040 // #define options "Opt 3 32 Bit" 00041 #define options "vfpv4 32 Bit" 00042 #define opt "1" 00043 #endif 00044 00045 #define NTIMES 10 00046 00047 #include "mbed.h" 00048 00049 00050 //--------------------------------- 00051 DigitalOut myled(LED1); 00052 Serial pc(USBTX, USBRX); //USB is out of oreder on Embeded-Pi 00053 //Serial pc(PC_10,PC_11); //RX-TX D0,D1 Embeded-PI ports 00054 Timer timer; 00055 //-------------------------------- 00056 00057 void print_time (int row); 00058 void matgen (REAL a[], int lda, int n, REAL b[], REAL *norma); 00059 void dgefa (REAL a[], int lda, int n, int ipvt[], int *info); 00060 void dgesl (REAL a[],int lda,int n,int ipvt[],REAL b[],int job); 00061 void dmxpy (int n1, REAL y[], int n2, int ldm, REAL x[], REAL m[]); 00062 void daxpy (int n, REAL da, REAL dx[], int incx, REAL dy[], int incy); 00063 REAL epslon (REAL x); 00064 int idamax (int n, REAL dx[], int incx); 00065 void dscal (int n, REAL da, REAL dx[], int incx); 00066 REAL ddot (int n, REAL dx[], int incx, REAL dy[], int incy); 00067 00068 static REAL atime[9][15]; 00069 double runSecs = 1; 00070 00071 void do_benchmark(void){ 00072 00073 float t1,t2 = 0.0; 00074 static REAL aa[40*40],a[40*41],b[40],x[40]; // nxn, n*n+1, n,n, 00075 REAL cray,ops,total,norma,normx; 00076 REAL resid,residn,eps,tm2,epsn,x1,x2; 00077 REAL mflops; 00078 static int ipvt[21],n,i,j,ntimes,info,lda,ldaa; 00079 int endit, pass, loop; 00080 REAL overhead1, overhead2, time2; 00081 REAL max1, max2; 00082 char was[5][20]; 00083 char expect[5][20]; 00084 char title[5][20]; 00085 int errors; 00086 int nopause = 1; 00087 00088 /* 00089 if (argc > 1) { 00090 switch (argv[1][0]) { 00091 case 'N': 00092 nopause = 0; 00093 break; 00094 case 'n': 00095 nopause = 0; 00096 break; 00097 } 00098 } 00099 */ 00100 pc.printf("\n"); 00101 00102 00103 // outfile = fopen("Linpack.txt","a+"); 00104 // if (outfile == NULL) 00105 // { 00106 // printf (" Cannot open results file \n\n"); 00107 // printf(" Press Enter\n\n"); 00108 // int g = getchar(); 00109 // exit (0); 00110 //} 00111 00112 00113 00114 #ifdef GCCARMDP 00115 pc.printf(expect[0], " 1.7"); 00116 pc.printf(expect[1], " 7.41628980e-14"); 00117 pc.printf(expect[2], " 2.22044605e-16"); 00118 pc.printf(expect[3], " -1.49880108e-14"); 00119 pc.printf(expect[4], " -1.89848137e-14"); 00120 #endif 00121 00122 #ifdef GCCARMSP 00123 pc.printf(expect[0], " 1.6"); 00124 pc.printf(expect[1], " 3.80277634e-05"); 00125 pc.printf(expect[2], " 1.19209290e-07"); 00126 pc.printf(expect[3], " -1.38282776e-05"); 00127 pc.printf(expect[4], " -7.51018524e-06"); 00128 #endif 00129 00130 lda = 41; //n+1 00131 ldaa = 40; //n 00132 cray = .056; 00133 n = 40; // 40 For Nucleo F7 00134 pc.printf("----------------------------------------------\n"); 00135 pc.printf("%s ", ROLLING); 00136 pc.printf("%s ", PREC); 00137 pc.printf("Precision Linpack Benchmark - Linux Version in 'C/C++'\n\n"); 00138 00139 pc.printf("Optimisation %s\n\n",options); 00140 00141 ops = (2.0e0*(n*n*n))/3.0 + 2.0*(n*n); 00142 00143 matgen(a,lda,n,b,&norma); 00144 timer.start(); 00145 //start_time(); 00146 t1 = timer.read(); 00147 dgefa(a,lda,n,ipvt,&info); 00148 //end_time(); 00149 t2 = timer.read(); 00150 timer.stop(); 00151 atime[0][0] = t2-t1; //secs 00152 00153 timer.reset(); 00154 timer.start(); 00155 t1 = timer.read(); 00156 dgesl(a,lda,n,ipvt,b,0); 00157 t2 = timer.read(); 00158 timer.stop(); 00159 atime[1][0] = t2-t1; //secs 00160 total = atime[0][0] + atime[1][0]; 00161 00162 /* compute a residual to verify results. */ 00163 00164 for (i = 0; i < n; i++) { 00165 x[i] = b[i]; 00166 } 00167 matgen(a,lda,n,b,&norma); 00168 for (i = 0; i < n; i++) { 00169 b[i] = -b[i]; 00170 } 00171 dmxpy(n,b,n,lda,x,a); 00172 resid = 0.0; 00173 normx = 0.0; 00174 for (i = 0; i < n; i++) { 00175 resid = (resid > fabs((double)b[i])) 00176 ? resid : fabs((double)b[i]); 00177 normx = (normx > fabs((double)x[i])) 00178 ? normx : fabs((double)x[i]); 00179 } 00180 eps = epslon(ONE); 00181 residn = resid/( n*norma*normx*eps ); 00182 epsn = eps; 00183 x1 = x[0] - 1; 00184 x2 = x[n-1] - 1; 00185 00186 pc.printf("norm resid resid machep"); 00187 pc.printf(" x[0]-1 x[n-1]-1\n"); 00188 pc.printf("%6.1f %17.8e%17.8e%17.8e%17.8e\n\n", 00189 (double)residn, (double)resid, (double)epsn, 00190 (double)x1, (double)x2); 00191 00192 pc.printf("Times are reported for matrices of order %5d\n",n); 00193 pc.printf("1 pass times for array with leading dimension of%5d\n\n",lda); 00194 pc.printf(" dgefa dgesl total Mflops unit"); 00195 pc.printf(" ratio\n"); 00196 00197 atime[2][0] = total; 00198 if (total > 0.0) { 00199 atime[3][0] = ops/(1.0e6*total); 00200 atime[4][0] = 2.0/atime[3][0]; 00201 } else { 00202 atime[3][0] = 0.0; 00203 atime[4][0] = 0.0; 00204 } 00205 atime[5][0] = total/cray; 00206 00207 print_time(0); 00208 00209 /************************************************************************ 00210 * Calculate overhead of executing matgen procedure * 00211 ************************************************************************/ 00212 00213 pc.printf ("\nCalculating matgen overhead\n"); 00214 pass = -20; 00215 loop = NTIMES; 00216 do { 00217 timer.start(); 00218 t1 = timer.read(); 00219 pass = pass + 1; 00220 for ( i = 0 ; i < loop ; i++) { 00221 matgen(a,lda,n,b,&norma); 00222 } 00223 t2 = timer.read(); 00224 timer.stop(); 00225 overhead1 = t2-t1; //sec 00226 pc.printf ("%10d times %6.4f seconds\n", loop, overhead1); 00227 if (overhead1 > runSecs) { 00228 pass = 0; 00229 } 00230 if (pass < 0) { 00231 if (overhead1 < 0.1) { 00232 loop = loop * 10; 00233 } else { 00234 loop = loop * 2; 00235 } 00236 } 00237 } while (pass < 0); 00238 00239 overhead1 = overhead1 / (double)loop; 00240 00241 pc.printf("Overhead for 1 matgen %12.5f seconds\n\n", overhead1); 00242 00243 /************************************************************************ 00244 * Calculate matgen/dgefa passes for runSecs seconds * 00245 ************************************************************************/ 00246 00247 pc.printf ("Calculating matgen/dgefa passes for %d seconds\n", (int)runSecs); 00248 pass = -20; 00249 ntimes = NTIMES; 00250 do { 00251 timer.start(); 00252 t1 = timer.read(); 00253 pass = pass + 1; 00254 for ( i = 0 ; i < ntimes ; i++) { 00255 matgen(a,lda,n,b,&norma); 00256 dgefa(a,lda,n,ipvt,&info ); 00257 } 00258 t2 = timer.read(); 00259 timer.stop(); 00260 time2 = t2-t1; //sec 00261 00262 pc.printf ("%10d times %6.2f seconds\n", ntimes, time2); 00263 if (time2 > runSecs) { 00264 pass = 0; 00265 } 00266 if (pass < 0) { 00267 if (time2 < 0.1) { 00268 ntimes = ntimes * 10; 00269 } else { 00270 ntimes = ntimes * 2; 00271 } 00272 } 00273 } while (pass < 0); 00274 00275 ntimes = (int)(runSecs * (double)ntimes / time2); 00276 if (ntimes == 0) ntimes = 1; 00277 00278 pc.printf ("Passes used %10d \n\n", ntimes); 00279 pc.printf("Times for array with leading dimension of%4d\n\n",lda); 00280 pc.printf(" dgefa dgesl total Mflops unit"); 00281 pc.printf(" ratio\n"); 00282 00283 /************************************************************************ 00284 * Execute 5 passes * 00285 ************************************************************************/ 00286 00287 tm2 = ntimes * overhead1; 00288 atime[3][6] = 0; 00289 00290 for (j=1 ; j<6 ; j++) { 00291 timer.start(); 00292 t1 = timer.read(); 00293 00294 for (i = 0; i < ntimes; i++) { 00295 matgen(a,lda,n,b,&norma); 00296 dgefa(a,lda,n,ipvt,&info ); 00297 } 00298 t2 = timer.read(); 00299 timer.stop(); 00300 atime[0][j] = ((t2-t1) - tm2)/ntimes; 00301 00302 timer.start(); 00303 t1 = timer.read(); 00304 for (i = 0; i < ntimes; i++) { 00305 dgesl(a,lda,n,ipvt,b,0); 00306 } 00307 t2 = timer.read(); 00308 timer.stop(); 00309 00310 atime[1][j] = (t2-t1)/ntimes; 00311 total = atime[0][j] + atime[1][j]; 00312 atime[2][j] = total; 00313 atime[3][j] = ops/(1.0e6*total); 00314 atime[4][j] = 2.0/atime[3][j]; 00315 atime[5][j] = total/cray; 00316 atime[3][6] = atime[3][6] + atime[3][j]; 00317 00318 print_time(j); 00319 } 00320 atime[3][6] = atime[3][6] / 5.0; 00321 pc.printf("Average %11.4f\n", 00322 (double)atime[3][6]); 00323 00324 pc.printf("\nCalculating matgen2 overhead\n"); 00325 00326 /************************************************************************ 00327 * Calculate overhead of executing matgen procedure * 00328 ************************************************************************/ 00329 00330 timer.start(); 00331 t1 = timer.read(); 00332 for ( i = 0 ; i < loop ; i++) { 00333 matgen(aa,ldaa,n,b,&norma); 00334 } 00335 t2 = timer.read(); 00336 timer.stop(); 00337 overhead2 = t2-t1; 00338 overhead2 = overhead2 / (double)loop; 00339 00340 pc.printf("Overhead for 1 matgen %12.5f seconds\n\n", overhead2); 00341 pc.printf("Times for array with leading dimension of%4d\n\n",ldaa); 00342 pc.printf(" dgefa dgesl total Mflops unit"); 00343 pc.printf(" ratio\n"); 00344 00345 /************************************************************************ 00346 * Execute 5 passes * 00347 ************************************************************************/ 00348 00349 tm2 = ntimes * overhead2; 00350 atime[3][12] = 0; 00351 00352 for (j=7 ; j<12 ; j++) { 00353 timer.start(); 00354 t1 = timer.read(); 00355 for (i = 0; i < ntimes; i++) { 00356 matgen(aa,ldaa,n,b,&norma); 00357 dgefa(aa,ldaa,n,ipvt,&info ); 00358 } 00359 t2 = timer.read(); 00360 timer.stop(); 00361 atime[0][j] = ((t2-t1) - tm2)/ntimes; 00362 00363 timer.start(); 00364 t1 = timer.read(); 00365 for (i = 0; i < ntimes; i++) { 00366 dgesl(aa,ldaa,n,ipvt,b,0); 00367 } 00368 t2 = timer.read(); 00369 timer.stop(); 00370 atime[1][j] = (t2-t1)/ntimes; 00371 total = atime[0][j] + atime[1][j]; 00372 atime[2][j] = total; 00373 atime[3][j] = ops/(1.0e6*total); 00374 atime[4][j] = 2.0/atime[3][j]; 00375 atime[5][j] = total/cray; 00376 atime[3][12] = atime[3][12] + atime[3][j]; 00377 00378 print_time(j); 00379 } 00380 atime[3][12] = atime[3][12] / 5.0; 00381 pc.printf("Average %11.4f\n\n", 00382 (double)atime[3][12]); 00383 pc.printf("--------------------------------------------------------\n"); 00384 pc.printf ("\nFrom File /proc/cpuinfo\n"); 00385 // pc.printf("%s\n", configdata[0]); 00386 pc.printf ("\nFrom File /proc/version\n"); 00387 //pc.printf("%s\n", configdata[1]); 00388 00389 /************************************************************************ 00390 * Use minimum average as overall Mflops rating * 00391 ************************************************************************/ 00392 00393 mflops = atime[3][6]; 00394 if (atime[3][12] < mflops) mflops = atime[3][12]; 00395 00396 pc.printf("\n"); 00397 pc.printf("%s ", ROLLING); 00398 pc.printf("%s ", PREC); 00399 pc.printf(" Precision %11.4f Mflops \n\n",mflops); 00400 00401 // local_time(); 00402 00403 00404 /************************************************************************ 00405 * Add results to output file Linpack.txt * 00406 ************************************************************************/ 00407 pc.printf (" --------------------------------------------------------\n\n"); 00408 pc.printf (" Linpack %s Precision %s Benchmark n @ 100\n", PREC, ROLLING); 00409 //pc.printf (outfile, " Optimisation %s, %s\n", options, timeday); 00410 00411 max1 = 0; 00412 for (i=1 ; i<6 ; i++) { 00413 if (atime[3][i] > max1) max1 = atime[3][i]; 00414 } 00415 00416 max2 = 0; 00417 for (i=7 ; i<12 ; i++) { 00418 if (atime[3][i] > max2) max2 = atime[3][i]; 00419 } 00420 if (max1 < max2) max2 = max1; 00421 00422 pc.printf(" Speed %10.4f MFLOPS\n\n", max2); 00423 pc.printf(was[0], "%16.1f",(double)residn); 00424 pc.printf(was[1], "%16.8e",(double)resid); 00425 pc.printf(was[2], "%16.8e",(double)epsn); 00426 pc.printf(was[3], "%16.8e",(double)x1); 00427 pc.printf(was[4], "%16.8e",(double)x2); 00428 00429 pc.printf(title[0], "norm. resid"); 00430 pc.printf(title[1], "resid "); 00431 pc.printf(title[2], "machep "); 00432 pc.printf(title[3], "x[0]-1 "); 00433 pc.printf(title[4], "x[n-1]-1 "); 00434 00435 if (strtol(opt, NULL, 10) == 0) { 00436 pc.printf(expect[2], " 8.88178420e-016"); 00437 } 00438 errors = 0; 00439 00440 for (i=0; i<5; i++) { 00441 if (strcmp (expect[i], was[i]) != 0) { 00442 pc.printf(" Variable %s Non-standard result was %s instead of %s\n", 00443 title[i], was[i], expect[i]); 00444 errors = errors + 1; 00445 } 00446 } 00447 if (errors == 0) { 00448 pc.printf(" Numeric results were as expected\n\n"); 00449 } else { 00450 pc.printf(" Different numeric results - see linpack.txt\n\n"); 00451 pc.printf("\n Compiler #define or values in linpack.c might need to be changed\n\n"); 00452 00453 } 00454 00455 00456 pc.printf (" -------------------------------------------------------------\n\n"); 00457 pc.printf("\n"); 00458 pc.printf ("SYSTEM INFORMATION\n\nFrom File /proc/cpuinfo\n"); 00459 // pc.printf (outfile, "%s \n", configdata[0]); 00460 pc.printf ("\nFrom File /proc/version\n"); 00461 // pc.printf (outfile, "%s \n", configdata[1]); 00462 pc.printf ("\n"); 00463 00464 pc.printf("\n"); 00465 00466 wait(1); 00467 00468 } 00469 00470 /*----------------------*/ 00471 void print_time (int row) 00472 00473 { 00474 pc.printf("%11.5f%11.5f%11.5f%11.4f%11.4f%11.4f\n", (double)atime[0][row], 00475 (double)atime[1][row], (double)atime[2][row], (double)atime[3][row], 00476 (double)atime[4][row], (double)atime[5][row]); 00477 return; 00478 } 00479 00480 /*----------------------*/ 00481 00482 void matgen (REAL a[], int lda, int n, REAL b[], REAL *norma) 00483 00484 00485 /* We would like to declare a[][lda], but c does not allow it. In this 00486 function, references to a[i][j] are written a[lda*i+j]. */ 00487 00488 { 00489 int init, i, j; 00490 00491 init = 1325; 00492 *norma = 0.0; 00493 for (j = 0; j < n; j++) { 00494 for (i = 0; i < n; i++) { 00495 init = 3125*init % 65536; 00496 a[lda*j+i] = (init - 32768.0)/16384.0; 00497 *norma = (a[lda*j+i] > *norma) ? a[lda*j+i] : *norma; 00498 00499 /* alternative for some compilers 00500 if (fabs(a[lda*j+i]) > *norma) *norma = fabs(a[lda*j+i]); 00501 */ 00502 } 00503 } 00504 for (i = 0; i < n; i++) { 00505 b[i] = 0.0; 00506 } 00507 for (j = 0; j < n; j++) { 00508 for (i = 0; i < n; i++) { 00509 b[i] = b[i] + a[lda*j+i]; 00510 } 00511 } 00512 return; 00513 } 00514 00515 /*----------------------*/ 00516 void dgefa(REAL a[], int lda, int n, int ipvt[], int *info) 00517 00518 00519 /* We would like to declare a[][lda], but c does not allow it. In this 00520 function, references to a[i][j] are written a[lda*i+j]. */ 00521 /* 00522 dgefa factors a double precision matrix by gaussian elimination. 00523 00524 dgefa is usually called by dgeco, but it can be called 00525 directly with a saving in time if rcond is not needed. 00526 (time for dgeco) = (1 + 9/n)*(time for dgefa) . 00527 00528 on entry 00529 00530 a REAL precision[n][lda] 00531 the matrix to be factored. 00532 00533 lda integer 00534 the leading dimension of the array a . 00535 00536 n integer 00537 the order of the matrix a . 00538 00539 on return 00540 00541 a an upper triangular matrix and the multipliers 00542 which were used to obtain it. 00543 the factorization can be written a = l*u where 00544 l is a product of permutation and unit lower 00545 triangular matrices and u is upper triangular. 00546 00547 ipvt integer[n] 00548 an integer vector of pivot indices. 00549 00550 info integer 00551 = 0 normal value. 00552 = k if u[k][k] .eq. 0.0 . this is not an error 00553 condition for this subroutine, but it does 00554 indicate that dgesl or dgedi will divide by zero 00555 if called. use rcond in dgeco for a reliable 00556 indication of singularity. 00557 00558 linpack. this version dated 08/14/78 . 00559 cleve moler, university of new mexico, argonne national lab. 00560 00561 functions 00562 00563 blas daxpy,dscal,idamax 00564 */ 00565 00566 { 00567 /* internal variables */ 00568 00569 REAL t; 00570 int j,k,kp1,l,nm1; 00571 00572 00573 /* gaussian elimination with partial pivoting */ 00574 00575 *info = 0; 00576 nm1 = n - 1; 00577 if (nm1 >= 0) { 00578 for (k = 0; k < nm1; k++) { 00579 kp1 = k + 1; 00580 00581 /* find l = pivot index */ 00582 00583 l = idamax(n-k,&a[lda*k+k],1) + k; 00584 ipvt[k] = l; 00585 00586 /* zero pivot implies this column already 00587 triangularized */ 00588 00589 if (a[lda*k+l] != ZERO) { 00590 00591 /* interchange if necessary */ 00592 00593 if (l != k) { 00594 t = a[lda*k+l]; 00595 a[lda*k+l] = a[lda*k+k]; 00596 a[lda*k+k] = t; 00597 } 00598 00599 /* compute multipliers */ 00600 00601 t = -ONE/a[lda*k+k]; 00602 dscal(n-(k+1),t,&a[lda*k+k+1],1); 00603 00604 /* row elimination with column indexing */ 00605 00606 for (j = kp1; j < n; j++) { 00607 t = a[lda*j+l]; 00608 if (l != k) { 00609 a[lda*j+l] = a[lda*j+k]; 00610 a[lda*j+k] = t; 00611 } 00612 daxpy(n-(k+1),t,&a[lda*k+k+1],1, 00613 &a[lda*j+k+1],1); 00614 } 00615 } else { 00616 *info = k; 00617 } 00618 } 00619 } 00620 ipvt[n-1] = n-1; 00621 if (a[lda*(n-1)+(n-1)] == ZERO) *info = n-1; 00622 return; 00623 } 00624 00625 /*----------------------*/ 00626 00627 void dgesl(REAL a[],int lda,int n,int ipvt[],REAL b[],int job ) 00628 00629 00630 /* We would like to declare a[][lda], but c does not allow it. In this 00631 function, references to a[i][j] are written a[lda*i+j]. */ 00632 00633 /* 00634 dgesl solves the double precision system 00635 a * x = b or trans(a) * x = b 00636 using the factors computed by dgeco or dgefa. 00637 00638 on entry 00639 00640 a double precision[n][lda] 00641 the output from dgeco or dgefa. 00642 00643 lda integer 00644 the leading dimension of the array a . 00645 00646 n integer 00647 the order of the matrix a . 00648 00649 ipvt integer[n] 00650 the pivot vector from dgeco or dgefa. 00651 00652 b double precision[n] 00653 the right hand side vector. 00654 00655 job integer 00656 = 0 to solve a*x = b , 00657 = nonzero to solve trans(a)*x = b where 00658 trans(a) is the transpose. 00659 00660 on return 00661 00662 b the solution vector x . 00663 00664 error condition 00665 00666 a division by zero will occur if the input factor contains a 00667 zero on the diagonal. technically this indicates singularity 00668 but it is often caused by improper arguments or improper 00669 setting of lda . it will not occur if the subroutines are 00670 called correctly and if dgeco has set rcond .gt. 0.0 00671 or dgefa has set info .eq. 0 . 00672 00673 to compute inverse(a) * c where c is a matrix 00674 with p columns 00675 dgeco(a,lda,n,ipvt,rcond,z) 00676 if (!rcond is too small){ 00677 for (j=0,j<p,j++) 00678 dgesl(a,lda,n,ipvt,c[j][0],0); 00679 } 00680 00681 linpack. this version dated 08/14/78 . 00682 cleve moler, university of new mexico, argonne national lab. 00683 00684 functions 00685 00686 blas daxpy,ddot 00687 */ 00688 { 00689 /* internal variables */ 00690 00691 REAL t; 00692 int k,kb,l,nm1; 00693 00694 nm1 = n - 1; 00695 if (job == 0) { 00696 00697 /* job = 0 , solve a * x = b 00698 first solve l*y = b */ 00699 00700 if (nm1 >= 1) { 00701 for (k = 0; k < nm1; k++) { 00702 l = ipvt[k]; 00703 t = b[l]; 00704 if (l != k) { 00705 b[l] = b[k]; 00706 b[k] = t; 00707 } 00708 daxpy(n-(k+1),t,&a[lda*k+k+1],1,&b[k+1],1 ); 00709 } 00710 } 00711 00712 /* now solve u*x = y */ 00713 00714 for (kb = 0; kb < n; kb++) { 00715 k = n - (kb + 1); 00716 b[k] = b[k]/a[lda*k+k]; 00717 t = -b[k]; 00718 daxpy(k,t,&a[lda*k+0],1,&b[0],1 ); 00719 } 00720 } else { 00721 00722 /* job = nonzero, solve trans(a) * x = b 00723 first solve trans(u)*y = b */ 00724 00725 for (k = 0; k < n; k++) { 00726 t = ddot(k,&a[lda*k+0],1,&b[0],1); 00727 b[k] = (b[k] - t)/a[lda*k+k]; 00728 } 00729 00730 /* now solve trans(l)*x = y */ 00731 00732 if (nm1 >= 1) { 00733 for (kb = 1; kb < nm1; kb++) { 00734 k = n - (kb+1); 00735 b[k] = b[k] + ddot(n-(k+1),&a[lda*k+k+1],1,&b[k+1],1); 00736 l = ipvt[k]; 00737 if (l != k) { 00738 t = b[l]; 00739 b[l] = b[k]; 00740 b[k] = t; 00741 } 00742 } 00743 } 00744 } 00745 return; 00746 } 00747 00748 /*----------------------*/ 00749 00750 void daxpy(int n, REAL da, REAL dx[], int incx, REAL dy[], int incy) 00751 /* 00752 constant times a vector plus a vector. 00753 jack dongarra, linpack, 3/11/78. 00754 */ 00755 00756 { 00757 int i,ix,iy,m,mp1; 00758 00759 mp1 = 0; 00760 m = 0; 00761 00762 if(n <= 0) return; 00763 if (da == ZERO) return; 00764 00765 if(incx != 1 || incy != 1) { 00766 00767 /* code for unequal increments or equal increments 00768 not equal to 1 */ 00769 00770 ix = 0; 00771 iy = 0; 00772 if(incx < 0) ix = (-n+1)*incx; 00773 if(incy < 0)iy = (-n+1)*incy; 00774 for (i = 0; i < n; i++) { 00775 dy[iy] = dy[iy] + da*dx[ix]; 00776 ix = ix + incx; 00777 iy = iy + incy; 00778 00779 } 00780 return; 00781 } 00782 00783 /* code for both increments equal to 1 */ 00784 00785 00786 #ifdef ROLL 00787 00788 for (i = 0; i < n; i++) { 00789 dy[i] = dy[i] + da*dx[i]; 00790 } 00791 00792 00793 #endif 00794 00795 #ifdef UNROLL 00796 00797 m = n % 4; 00798 if ( m != 0) { 00799 for (i = 0; i < m; i++) 00800 dy[i] = dy[i] + da*dx[i]; 00801 00802 if (n < 4) return; 00803 } 00804 for (i = m; i < n; i = i + 4) { 00805 dy[i] = dy[i] + da*dx[i]; 00806 dy[i+1] = dy[i+1] + da*dx[i+1]; 00807 dy[i+2] = dy[i+2] + da*dx[i+2]; 00808 dy[i+3] = dy[i+3] + da*dx[i+3]; 00809 00810 } 00811 00812 #endif 00813 return; 00814 } 00815 00816 /*----------------------*/ 00817 00818 REAL ddot(int n, REAL dx[], int incx, REAL dy[], int incy) 00819 /* 00820 forms the dot product of two vectors. 00821 jack dongarra, linpack, 3/11/78. 00822 */ 00823 00824 { 00825 REAL dtemp; 00826 int i,ix,iy,m,mp1; 00827 00828 mp1 = 0; 00829 m = 0; 00830 00831 dtemp = ZERO; 00832 00833 if(n <= 0) return(ZERO); 00834 00835 if(incx != 1 || incy != 1) { 00836 00837 /* code for unequal increments or equal increments 00838 not equal to 1 */ 00839 00840 ix = 0; 00841 iy = 0; 00842 if (incx < 0) ix = (-n+1)*incx; 00843 if (incy < 0) iy = (-n+1)*incy; 00844 for (i = 0; i < n; i++) { 00845 dtemp = dtemp + dx[ix]*dy[iy]; 00846 ix = ix + incx; 00847 iy = iy + incy; 00848 00849 } 00850 return(dtemp); 00851 } 00852 00853 /* code for both increments equal to 1 */ 00854 00855 00856 #ifdef ROLL 00857 00858 for (i=0; i < n; i++) 00859 dtemp = dtemp + dx[i]*dy[i]; 00860 00861 return(dtemp); 00862 00863 #endif 00864 00865 #ifdef UNROLL 00866 00867 00868 m = n % 5; 00869 if (m != 0) { 00870 for (i = 0; i < m; i++) 00871 dtemp = dtemp + dx[i]*dy[i]; 00872 if (n < 5) return(dtemp); 00873 } 00874 for (i = m; i < n; i = i + 5) { 00875 dtemp = dtemp + dx[i]*dy[i] + 00876 dx[i+1]*dy[i+1] + dx[i+2]*dy[i+2] + 00877 dx[i+3]*dy[i+3] + dx[i+4]*dy[i+4]; 00878 } 00879 return(dtemp); 00880 00881 #endif 00882 00883 } 00884 00885 /*----------------------*/ 00886 void dscal(int n, REAL da, REAL dx[], int incx) 00887 00888 /* scales a vector by a constant. 00889 jack dongarra, linpack, 3/11/78. 00890 */ 00891 00892 { 00893 int i,m,mp1,nincx; 00894 00895 mp1 = 0; 00896 m = 0; 00897 00898 if(n <= 0)return; 00899 if(incx != 1) { 00900 00901 /* code for increment not equal to 1 */ 00902 00903 nincx = n*incx; 00904 for (i = 0; i < nincx; i = i + incx) 00905 dx[i] = da*dx[i]; 00906 00907 return; 00908 } 00909 00910 /* code for increment equal to 1 */ 00911 00912 00913 #ifdef ROLL 00914 00915 for (i = 0; i < n; i++) 00916 dx[i] = da*dx[i]; 00917 00918 00919 #endif 00920 00921 #ifdef UNROLL 00922 00923 00924 m = n % 5; 00925 if (m != 0) { 00926 for (i = 0; i < m; i++) 00927 dx[i] = da*dx[i]; 00928 if (n < 5) return; 00929 } 00930 for (i = m; i < n; i = i + 5) { 00931 dx[i] = da*dx[i]; 00932 dx[i+1] = da*dx[i+1]; 00933 dx[i+2] = da*dx[i+2]; 00934 dx[i+3] = da*dx[i+3]; 00935 dx[i+4] = da*dx[i+4]; 00936 } 00937 00938 #endif 00939 00940 } 00941 00942 /*----------------------*/ 00943 int idamax(int n, REAL dx[], int incx) 00944 00945 /* 00946 finds the index of element having max. absolute value. 00947 jack dongarra, linpack, 3/11/78. 00948 */ 00949 00950 00951 { 00952 REAL dmax; 00953 int i, ix, itemp; 00954 00955 if( n < 1 ) return(-1); 00956 if(n ==1 ) return(0); 00957 if(incx != 1) { 00958 00959 /* code for increment not equal to 1 */ 00960 00961 ix = 1; 00962 dmax = fabs((double)dx[0]); 00963 ix = ix + incx; 00964 for (i = 1; i < n; i++) { 00965 if(fabs((double)dx[ix]) > dmax) { 00966 itemp = i; 00967 dmax = fabs((double)dx[ix]); 00968 } 00969 ix = ix + incx; 00970 } 00971 } else { 00972 00973 /* code for increment equal to 1 */ 00974 00975 itemp = 0; 00976 dmax = fabs((double)dx[0]); 00977 for (i = 1; i < n; i++) { 00978 if(fabs((double)dx[i]) > dmax) { 00979 itemp = i; 00980 dmax = fabs((double)dx[i]); 00981 } 00982 } 00983 } 00984 return (itemp); 00985 } 00986 00987 /*----------------------*/ 00988 REAL epslon (REAL x) 00989 00990 /* 00991 estimate unit roundoff in quantities of size x. 00992 */ 00993 00994 { 00995 REAL a,b,c,eps; 00996 /* 00997 this program should function properly on all systems 00998 satisfying the following two assumptions, 00999 1. the base used in representing dfloating point 01000 numbers is not a power of three. 01001 2. the quantity a in statement 10 is represented to 01002 the accuracy used in dfloating point variables 01003 that are stored in memory. 01004 the statement number 10 and the go to 10 are intended to 01005 force optimizing compilers to generate code satisfying 01006 assumption 2. 01007 under these assumptions, it should be true that, 01008 a is not exactly equal to four-thirds, 01009 b has a zero for its last bit or digit, 01010 c is not exactly equal to one, 01011 eps measures the separation of 1.0 from 01012 the next larger dfloating point number. 01013 the developers of eispack would appreciate being informed 01014 about any systems where these assumptions do not hold. 01015 01016 ***************************************************************** 01017 this routine is one of the auxiliary routines used by eispack iii 01018 to avoid machine dependencies. 01019 ***************************************************************** 01020 01021 this version dated 4/6/83. 01022 */ 01023 01024 a = 4.0e0/3.0e0; 01025 eps = ZERO; 01026 while (eps == ZERO) { 01027 b = a - ONE; 01028 c = b + b + b; 01029 eps = fabs((double)(c-ONE)); 01030 } 01031 return(eps*fabs((double)x)); 01032 } 01033 01034 /*----------------------*/ 01035 void dmxpy (int n1, REAL y[], int n2, int ldm, REAL x[], REAL m[]) 01036 01037 01038 /* We would like to declare m[][ldm], but c does not allow it. In this 01039 function, references to m[i][j] are written m[ldm*i+j]. */ 01040 01041 /* 01042 purpose: 01043 multiply matrix m times vector x and add the result to vector y. 01044 01045 parameters: 01046 01047 n1 integer, number of elements in vector y, and number of rows in 01048 matrix m 01049 01050 y double [n1], vector of length n1 to which is added 01051 the product m*x 01052 01053 n2 integer, number of elements in vector x, and number of columns 01054 in matrix m 01055 01056 ldm integer, leading dimension of array m 01057 01058 x double [n2], vector of length n2 01059 01060 m double [ldm][n2], matrix of n1 rows and n2 columns 01061 01062 ---------------------------------------------------------------------- 01063 */ 01064 { 01065 int j,i,jmin; 01066 /* cleanup odd vector */ 01067 01068 j = n2 % 2; 01069 if (j >= 1) { 01070 j = j - 1; 01071 for (i = 0; i < n1; i++) 01072 y[i] = (y[i]) + x[j]*m[ldm*j+i]; 01073 } 01074 01075 /* cleanup odd group of two vectors */ 01076 01077 j = n2 % 4; 01078 if (j >= 2) { 01079 j = j - 1; 01080 for (i = 0; i < n1; i++) 01081 y[i] = ( (y[i]) 01082 + x[j-1]*m[ldm*(j-1)+i]) + x[j]*m[ldm*j+i]; 01083 } 01084 01085 /* cleanup odd group of four vectors */ 01086 01087 j = n2 % 8; 01088 if (j >= 4) { 01089 j = j - 1; 01090 for (i = 0; i < n1; i++) 01091 y[i] = ((( (y[i]) 01092 + x[j-3]*m[ldm*(j-3)+i]) 01093 + x[j-2]*m[ldm*(j-2)+i]) 01094 + x[j-1]*m[ldm*(j-1)+i]) + x[j]*m[ldm*j+i]; 01095 } 01096 01097 /* cleanup odd group of eight vectors */ 01098 01099 j = n2 % 16; 01100 if (j >= 8) { 01101 j = j - 1; 01102 for (i = 0; i < n1; i++) 01103 y[i] = ((((((( (y[i]) 01104 + x[j-7]*m[ldm*(j-7)+i]) + x[j-6]*m[ldm*(j-6)+i]) 01105 + x[j-5]*m[ldm*(j-5)+i]) + x[j-4]*m[ldm*(j-4)+i]) 01106 + x[j-3]*m[ldm*(j-3)+i]) + x[j-2]*m[ldm*(j-2)+i]) 01107 + x[j-1]*m[ldm*(j-1)+i]) + x[j] *m[ldm*j+i]; 01108 } 01109 01110 /* main loop - groups of sixteen vectors */ 01111 01112 jmin = (n2%16)+16; 01113 for (j = jmin-1; j < n2; j = j + 16) { 01114 for (i = 0; i < n1; i++) 01115 y[i] = ((((((((((((((( (y[i]) 01116 + x[j-15]*m[ldm*(j-15)+i]) 01117 + x[j-14]*m[ldm*(j-14)+i]) 01118 + x[j-13]*m[ldm*(j-13)+i]) 01119 + x[j-12]*m[ldm*(j-12)+i]) 01120 + x[j-11]*m[ldm*(j-11)+i]) 01121 + x[j-10]*m[ldm*(j-10)+i]) 01122 + x[j- 9]*m[ldm*(j- 9)+i]) 01123 + x[j- 8]*m[ldm*(j- 8)+i]) 01124 + x[j- 7]*m[ldm*(j- 7)+i]) 01125 + x[j- 6]*m[ldm*(j- 6)+i]) 01126 + x[j- 5]*m[ldm*(j- 5)+i]) 01127 + x[j- 4]*m[ldm*(j- 4)+i]) 01128 + x[j- 3]*m[ldm*(j- 3)+i]) 01129 + x[j- 2]*m[ldm*(j- 2)+i]) 01130 + x[j- 1]*m[ldm*(j- 1)+i]) 01131 + x[j] *m[ldm*j+i]; 01132 } 01133 return; 01134 } 01135 //*---------------------------------------- 01136 int main() 01137 { 01138 pc.baud(9600); 01139 while(1) { 01140 01141 pc.printf("Starting benchmark...\n"); 01142 01143 do_benchmark(); 01144 01145 pc.printf(" kraj \n\n"); 01146 } 01147 }
Generated on Thu Jul 14 2022 05:16:43 by
1.7.2