Jovan Ivković / Mbed 2 deprecated Linpack

Dependencies:   mbed

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers main.cpp Source File

main.cpp

00001 //********************************************************
00002 //** RC Apha  --- based on roylongbottom.org ARM    ******
00003 //**  Nucleo-64-144 Stm32F103 to F767 benchmark     ******
00004 //**  Limpack -port form Arduino IDE                ******
00005 //**  Jovan Ivkovic - 2017                          ******
00006 //********************************************************
00007 #define GCCARMDP
00008 
00009 #define UNROLL
00010 #define DP
00011 
00012 #ifdef SP
00013 #define REAL float
00014 #define ZERO 0.0
00015 #define ONE 1.0
00016 #define PREC "Single"
00017 #endif
00018 
00019 #ifdef DP
00020 #define REAL double
00021 #define ZERO 0.0e0
00022 #define ONE 1.0e0
00023 #define PREC "Double"
00024 #endif
00025 
00026 #ifdef ROLL
00027 #define ROLLING "Rolled"
00028 #endif
00029 #ifdef UNROLL
00030 #define ROLLING "Unrolled"
00031 #endif
00032 
00033 // VERSION
00034 
00035 #ifdef CNNT
00036 #define options   "Non-optimised"
00037 #define opt "0"
00038 #else
00039 //    #define options   "Optimised"
00040 //    #define options   "Opt 3 32 Bit"
00041 #define options   "vfpv4 32 Bit"
00042 #define opt "1"
00043 #endif
00044 
00045 #define NTIMES 10
00046 
00047 #include "mbed.h"
00048 
00049 
00050 //---------------------------------
00051 DigitalOut myled(LED1);
00052 Serial pc(USBTX, USBRX); //USB is out of oreder on Embeded-Pi
00053 //Serial pc(PC_10,PC_11); //RX-TX D0,D1 Embeded-PI ports
00054 Timer timer;
00055 //--------------------------------
00056 
00057 void print_time (int row);
00058 void matgen (REAL a[], int lda, int n, REAL b[], REAL *norma);
00059 void dgefa (REAL a[], int lda, int n, int ipvt[], int *info);
00060 void dgesl (REAL a[],int lda,int n,int ipvt[],REAL b[],int job);
00061 void dmxpy (int n1, REAL y[], int n2, int ldm, REAL x[], REAL m[]);
00062 void daxpy (int n, REAL da, REAL dx[], int incx, REAL dy[], int incy);
00063 REAL epslon (REAL x);
00064 int idamax (int n, REAL dx[], int incx);
00065 void dscal (int n, REAL da, REAL dx[], int incx);
00066 REAL ddot (int n, REAL dx[], int incx, REAL dy[], int incy);
00067 
00068 static REAL atime[9][15];
00069 double runSecs = 1;
00070 
00071 void do_benchmark(void){
00072    
00073     float t1,t2 = 0.0;
00074     static REAL aa[40*40],a[40*41],b[40],x[40]; // nxn, n*n+1, n,n,
00075     REAL cray,ops,total,norma,normx;
00076     REAL resid,residn,eps,tm2,epsn,x1,x2;
00077     REAL mflops;
00078     static int ipvt[21],n,i,j,ntimes,info,lda,ldaa;
00079     int endit, pass, loop;
00080     REAL overhead1, overhead2, time2;
00081     REAL max1, max2;
00082     char was[5][20];
00083     char expect[5][20];
00084     char title[5][20];
00085     int errors;
00086     int         nopause = 1;
00087 
00088    /*
00089       if (argc > 1) {
00090         switch (argv[1][0]) {
00091             case 'N':
00092                 nopause = 0;
00093                 break;
00094             case 'n':
00095                 nopause = 0;
00096                 break;
00097         }
00098      }
00099     */
00100     pc.printf("\n");
00101 
00102 
00103     // outfile = fopen("Linpack.txt","a+");
00104     // if (outfile == NULL)
00105     // {
00106     //    printf (" Cannot open results file \n\n");
00107     //   printf(" Press Enter\n\n");
00108     //    int g = getchar();
00109     //    exit (0);
00110     //}
00111 
00112 
00113 
00114 #ifdef GCCARMDP
00115     pc.printf(expect[0], "             1.7");
00116     pc.printf(expect[1], "  7.41628980e-14");
00117     pc.printf(expect[2], "  2.22044605e-16");
00118     pc.printf(expect[3], " -1.49880108e-14");
00119     pc.printf(expect[4], " -1.89848137e-14");
00120 #endif
00121 
00122 #ifdef GCCARMSP
00123     pc.printf(expect[0], "             1.6");
00124     pc.printf(expect[1], "  3.80277634e-05");
00125     pc.printf(expect[2], "  1.19209290e-07");
00126     pc.printf(expect[3], " -1.38282776e-05");
00127     pc.printf(expect[4], " -7.51018524e-06");
00128 #endif
00129 
00130     lda = 41; //n+1
00131     ldaa = 40; //n
00132     cray = .056;
00133     n = 40; // 40 For Nucleo F7
00134     pc.printf("----------------------------------------------\n");
00135     pc.printf("%s ", ROLLING);
00136     pc.printf("%s ", PREC);
00137     pc.printf("Precision Linpack Benchmark - Linux Version in 'C/C++'\n\n");
00138 
00139     pc.printf("Optimisation %s\n\n",options);
00140 
00141     ops = (2.0e0*(n*n*n))/3.0 + 2.0*(n*n);
00142 
00143     matgen(a,lda,n,b,&norma);
00144     timer.start();
00145     //start_time();
00146     t1 = timer.read();
00147     dgefa(a,lda,n,ipvt,&info);
00148     //end_time();
00149     t2 = timer.read();
00150     timer.stop();
00151     atime[0][0] = t2-t1; //secs
00152 
00153     timer.reset();
00154     timer.start();
00155     t1 = timer.read();
00156     dgesl(a,lda,n,ipvt,b,0);
00157     t2 = timer.read();
00158     timer.stop();
00159     atime[1][0] = t2-t1; //secs
00160     total = atime[0][0] + atime[1][0];
00161 
00162     /*     compute a residual to verify results.  */
00163 
00164     for (i = 0; i < n; i++) {
00165         x[i] = b[i];
00166     }
00167     matgen(a,lda,n,b,&norma);
00168     for (i = 0; i < n; i++) {
00169         b[i] = -b[i];
00170     }
00171     dmxpy(n,b,n,lda,x,a);
00172     resid = 0.0;
00173     normx = 0.0;
00174     for (i = 0; i < n; i++) {
00175         resid = (resid > fabs((double)b[i]))
00176                 ? resid : fabs((double)b[i]);
00177         normx = (normx > fabs((double)x[i]))
00178                 ? normx : fabs((double)x[i]);
00179     }
00180     eps = epslon(ONE);
00181     residn = resid/( n*norma*normx*eps );
00182     epsn = eps;
00183     x1 = x[0] - 1;
00184     x2 = x[n-1] - 1;
00185 
00186     pc.printf("norm resid      resid           machep");
00187     pc.printf("         x[0]-1          x[n-1]-1\n");
00188     pc.printf("%6.1f %17.8e%17.8e%17.8e%17.8e\n\n",
00189               (double)residn, (double)resid, (double)epsn,
00190               (double)x1, (double)x2);
00191 
00192     pc.printf("Times are reported for matrices of order        %5d\n",n);
00193     pc.printf("1 pass times for array with leading dimension of%5d\n\n",lda);
00194     pc.printf("      dgefa      dgesl      total     Mflops       unit");
00195     pc.printf("      ratio\n");
00196 
00197     atime[2][0] = total;
00198     if (total > 0.0) {
00199         atime[3][0] = ops/(1.0e6*total);
00200         atime[4][0] = 2.0/atime[3][0];
00201     } else {
00202         atime[3][0] = 0.0;
00203         atime[4][0] = 0.0;
00204     }
00205     atime[5][0] = total/cray;
00206 
00207     print_time(0);
00208 
00209     /************************************************************************
00210      *       Calculate overhead of executing matgen procedure              *
00211      ************************************************************************/
00212 
00213     pc.printf ("\nCalculating matgen overhead\n");
00214     pass = -20;
00215     loop = NTIMES;
00216     do {
00217         timer.start();
00218         t1 = timer.read();
00219         pass = pass + 1;
00220         for ( i = 0 ; i < loop ; i++) {
00221             matgen(a,lda,n,b,&norma);
00222         }
00223         t2 = timer.read();
00224         timer.stop();
00225         overhead1 = t2-t1; //sec
00226         pc.printf ("%10d times %6.4f seconds\n", loop, overhead1);
00227         if (overhead1 > runSecs) {
00228             pass = 0;
00229         }
00230         if (pass < 0) {
00231             if (overhead1 < 0.1) {
00232                 loop = loop * 10;
00233             } else {
00234                 loop = loop * 2;
00235             }
00236         }
00237     } while (pass < 0);
00238 
00239     overhead1 = overhead1 / (double)loop;
00240 
00241     pc.printf("Overhead for 1 matgen %12.5f seconds\n\n", overhead1);
00242 
00243     /************************************************************************
00244      *           Calculate matgen/dgefa passes for runSecs seconds                *
00245      ************************************************************************/
00246 
00247     pc.printf ("Calculating matgen/dgefa passes for %d seconds\n", (int)runSecs);
00248     pass = -20;
00249     ntimes = NTIMES;
00250     do {
00251         timer.start();
00252         t1 = timer.read();
00253         pass = pass + 1;
00254         for ( i = 0 ; i < ntimes ; i++) {
00255             matgen(a,lda,n,b,&norma);
00256             dgefa(a,lda,n,ipvt,&info );
00257         }
00258         t2 = timer.read();
00259         timer.stop();
00260         time2 = t2-t1; //sec
00261 
00262         pc.printf ("%10d times %6.2f seconds\n", ntimes, time2);
00263         if (time2 > runSecs) {
00264             pass = 0;
00265         }
00266         if (pass < 0) {
00267             if (time2 < 0.1) {
00268                 ntimes = ntimes * 10;
00269             } else {
00270                 ntimes = ntimes * 2;
00271             }
00272         }
00273     } while (pass < 0);
00274 
00275     ntimes =  (int)(runSecs * (double)ntimes / time2);
00276     if (ntimes == 0) ntimes = 1;
00277 
00278     pc.printf ("Passes used %10d \n\n", ntimes);
00279     pc.printf("Times for array with leading dimension of%4d\n\n",lda);
00280     pc.printf("      dgefa      dgesl      total     Mflops       unit");
00281     pc.printf("      ratio\n");
00282 
00283     /************************************************************************
00284      *                              Execute 5 passes                        *
00285      ************************************************************************/
00286 
00287     tm2 = ntimes * overhead1;
00288     atime[3][6] = 0;
00289 
00290     for (j=1 ; j<6 ; j++) {
00291         timer.start();
00292         t1 = timer.read();
00293 
00294         for (i = 0; i < ntimes; i++) {
00295             matgen(a,lda,n,b,&norma);
00296             dgefa(a,lda,n,ipvt,&info );
00297         }
00298         t2 = timer.read();
00299         timer.stop();
00300         atime[0][j] = ((t2-t1) - tm2)/ntimes;
00301 
00302         timer.start();
00303         t1 = timer.read();
00304         for (i = 0; i < ntimes; i++) {
00305             dgesl(a,lda,n,ipvt,b,0);
00306         }
00307         t2 = timer.read();
00308         timer.stop();
00309 
00310         atime[1][j] = (t2-t1)/ntimes;
00311         total       = atime[0][j] + atime[1][j];
00312         atime[2][j] = total;
00313         atime[3][j] = ops/(1.0e6*total);
00314         atime[4][j] = 2.0/atime[3][j];
00315         atime[5][j] = total/cray;
00316         atime[3][6] = atime[3][6] + atime[3][j];
00317 
00318         print_time(j);
00319     }
00320     atime[3][6] = atime[3][6] / 5.0;
00321     pc.printf("Average                          %11.4f\n",
00322               (double)atime[3][6]);
00323 
00324     pc.printf("\nCalculating matgen2 overhead\n");
00325 
00326     /************************************************************************
00327      *             Calculate overhead of executing matgen procedure         *
00328      ************************************************************************/
00329 
00330     timer.start();
00331     t1 = timer.read();
00332     for ( i = 0 ; i < loop ; i++) {
00333         matgen(aa,ldaa,n,b,&norma);
00334     }
00335     t2 = timer.read();
00336     timer.stop();
00337     overhead2 = t2-t1;
00338     overhead2 = overhead2 / (double)loop;
00339 
00340     pc.printf("Overhead for 1 matgen %12.5f seconds\n\n", overhead2);
00341     pc.printf("Times for array with leading dimension of%4d\n\n",ldaa);
00342     pc.printf("      dgefa      dgesl      total     Mflops       unit");
00343     pc.printf("      ratio\n");
00344 
00345     /************************************************************************
00346      *                              Execute 5 passes                        *
00347      ************************************************************************/
00348 
00349     tm2 = ntimes * overhead2;
00350     atime[3][12] = 0;
00351 
00352     for (j=7 ; j<12 ; j++) {
00353         timer.start();
00354         t1 = timer.read();
00355         for (i = 0; i < ntimes; i++) {
00356             matgen(aa,ldaa,n,b,&norma);
00357             dgefa(aa,ldaa,n,ipvt,&info  );
00358         }
00359         t2 = timer.read();
00360         timer.stop();
00361         atime[0][j] = ((t2-t1) - tm2)/ntimes;
00362 
00363         timer.start();
00364         t1 = timer.read();
00365         for (i = 0; i < ntimes; i++) {
00366             dgesl(aa,ldaa,n,ipvt,b,0);
00367         }
00368         t2 = timer.read();
00369         timer.stop();
00370         atime[1][j] = (t2-t1)/ntimes;
00371         total       = atime[0][j] + atime[1][j];
00372         atime[2][j] = total;
00373         atime[3][j] = ops/(1.0e6*total);
00374         atime[4][j] = 2.0/atime[3][j];
00375         atime[5][j] = total/cray;
00376         atime[3][12] = atime[3][12] + atime[3][j];
00377 
00378         print_time(j);
00379     }
00380     atime[3][12] = atime[3][12] / 5.0;
00381     pc.printf("Average                          %11.4f\n\n",
00382               (double)atime[3][12]);
00383     pc.printf("--------------------------------------------------------\n");
00384     pc.printf ("\nFrom File /proc/cpuinfo\n");
00385     //  pc.printf("%s\n", configdata[0]);
00386     pc.printf ("\nFrom File /proc/version\n");
00387     //pc.printf("%s\n", configdata[1]);
00388 
00389     /************************************************************************
00390      *           Use minimum average as overall Mflops rating               *
00391      ************************************************************************/
00392 
00393     mflops = atime[3][6];
00394     if (atime[3][12] < mflops) mflops = atime[3][12];
00395 
00396     pc.printf("\n");
00397     pc.printf("%s ", ROLLING);
00398     pc.printf("%s ", PREC);
00399     pc.printf(" Precision %11.4f Mflops \n\n",mflops);
00400 
00401     // local_time();
00402 
00403 
00404     /************************************************************************
00405      *              Add results to output file Linpack.txt                  *
00406      ************************************************************************/
00407     pc.printf (" --------------------------------------------------------\n\n");
00408     pc.printf (" Linpack %s Precision %s Benchmark n @ 100\n", PREC, ROLLING);
00409     //pc.printf (outfile, " Optimisation %s, %s\n", options, timeday);
00410 
00411     max1 = 0;
00412     for (i=1 ; i<6 ; i++) {
00413         if (atime[3][i] > max1) max1 = atime[3][i];
00414     }
00415 
00416     max2 = 0;
00417     for (i=7 ; i<12 ; i++) {
00418         if (atime[3][i] > max2) max2 = atime[3][i];
00419     }
00420     if (max1 < max2) max2 = max1;
00421 
00422     pc.printf(" Speed %10.4f MFLOPS\n\n", max2);
00423     pc.printf(was[0], "%16.1f",(double)residn);
00424     pc.printf(was[1], "%16.8e",(double)resid);
00425     pc.printf(was[2], "%16.8e",(double)epsn);
00426     pc.printf(was[3], "%16.8e",(double)x1);
00427     pc.printf(was[4], "%16.8e",(double)x2);
00428 
00429     pc.printf(title[0], "norm. resid");
00430     pc.printf(title[1], "resid      ");
00431     pc.printf(title[2], "machep     ");
00432     pc.printf(title[3], "x[0]-1     ");
00433     pc.printf(title[4], "x[n-1]-1   ");
00434 
00435     if (strtol(opt, NULL, 10) == 0) {
00436         pc.printf(expect[2], " 8.88178420e-016");
00437     }
00438     errors = 0;
00439 
00440     for (i=0; i<5; i++) {
00441         if (strcmp (expect[i], was[i]) != 0) {
00442             pc.printf(" Variable %s Non-standard result was %s instead of %s\n",
00443                       title[i], was[i], expect[i]);
00444             errors = errors + 1;
00445         }
00446     }
00447     if (errors == 0) {
00448         pc.printf(" Numeric results were as expected\n\n");
00449     } else {
00450         pc.printf(" Different numeric results - see linpack.txt\n\n");
00451         pc.printf("\n Compiler #define or values in linpack.c might need to be changed\n\n");
00452 
00453     }
00454 
00455 
00456     pc.printf (" -------------------------------------------------------------\n\n");
00457     pc.printf("\n");
00458     pc.printf ("SYSTEM INFORMATION\n\nFrom File /proc/cpuinfo\n");
00459     // pc.printf (outfile, "%s \n", configdata[0]);
00460     pc.printf ("\nFrom File /proc/version\n");
00461     //  pc.printf (outfile, "%s \n", configdata[1]);
00462     pc.printf ("\n");
00463 
00464     pc.printf("\n");
00465     
00466     wait(1);
00467 
00468 }
00469 
00470 /*----------------------*/
00471 void print_time (int row)
00472 
00473 {
00474     pc.printf("%11.5f%11.5f%11.5f%11.4f%11.4f%11.4f\n",   (double)atime[0][row],
00475               (double)atime[1][row], (double)atime[2][row], (double)atime[3][row],
00476               (double)atime[4][row], (double)atime[5][row]);
00477     return;
00478 }
00479 
00480 /*----------------------*/
00481 
00482 void matgen (REAL a[], int lda, int n, REAL b[], REAL *norma)
00483 
00484 
00485 /* We would like to declare a[][lda], but c does not allow it.  In this
00486 function, references to a[i][j] are written a[lda*i+j].  */
00487 
00488 {
00489     int init, i, j;
00490 
00491     init = 1325;
00492     *norma = 0.0;
00493     for (j = 0; j < n; j++) {
00494         for (i = 0; i < n; i++) {
00495             init = 3125*init % 65536;
00496             a[lda*j+i] = (init - 32768.0)/16384.0;
00497             *norma = (a[lda*j+i] > *norma) ? a[lda*j+i] : *norma;
00498 
00499             /* alternative for some compilers
00500             if (fabs(a[lda*j+i]) > *norma) *norma = fabs(a[lda*j+i]);
00501             */
00502         }
00503     }
00504     for (i = 0; i < n; i++) {
00505         b[i] = 0.0;
00506     }
00507     for (j = 0; j < n; j++) {
00508         for (i = 0; i < n; i++) {
00509             b[i] = b[i] + a[lda*j+i];
00510         }
00511     }
00512     return;
00513 }
00514 
00515 /*----------------------*/
00516 void dgefa(REAL a[], int lda, int n, int ipvt[], int *info)
00517 
00518 
00519 /* We would like to declare a[][lda], but c does not allow it.  In this
00520 function, references to a[i][j] are written a[lda*i+j].  */
00521 /*
00522      dgefa factors a double precision matrix by gaussian elimination.
00523 
00524      dgefa is usually called by dgeco, but it can be called
00525      directly with a saving in time if  rcond  is not needed.
00526      (time for dgeco) = (1 + 9/n)*(time for dgefa) .
00527 
00528      on entry
00529 
00530         a       REAL precision[n][lda]
00531                 the matrix to be factored.
00532 
00533         lda     integer
00534                 the leading dimension of the array  a .
00535 
00536         n       integer
00537                 the order of the matrix  a .
00538 
00539      on return
00540 
00541         a       an upper triangular matrix and the multipliers
00542                 which were used to obtain it.
00543                 the factorization can be written  a = l*u  where
00544                 l  is a product of permutation and unit lower
00545                 triangular matrices and  u  is upper triangular.
00546 
00547         ipvt    integer[n]
00548                 an integer vector of pivot indices.
00549 
00550         info    integer
00551                 = 0  normal value.
00552                 = k  if  u[k][k] .eq. 0.0 .  this is not an error
00553                      condition for this subroutine, but it does
00554                      indicate that dgesl or dgedi will divide by zero
00555                      if called.  use  rcond  in dgeco for a reliable
00556                      indication of singularity.
00557 
00558      linpack. this version dated 08/14/78 .
00559      cleve moler, university of new mexico, argonne national lab.
00560 
00561      functions
00562 
00563      blas daxpy,dscal,idamax
00564 */
00565 
00566 {
00567     /*     internal variables       */
00568 
00569     REAL t;
00570     int j,k,kp1,l,nm1;
00571 
00572 
00573     /*     gaussian elimination with partial pivoting       */
00574 
00575     *info = 0;
00576     nm1 = n - 1;
00577     if (nm1 >=  0) {
00578         for (k = 0; k < nm1; k++) {
00579             kp1 = k + 1;
00580 
00581             /* find l = pivot index */
00582 
00583             l = idamax(n-k,&a[lda*k+k],1) + k;
00584             ipvt[k] = l;
00585 
00586             /* zero pivot implies this column already
00587                triangularized */
00588 
00589             if (a[lda*k+l] != ZERO) {
00590 
00591                 /* interchange if necessary */
00592 
00593                 if (l != k) {
00594                     t = a[lda*k+l];
00595                     a[lda*k+l] = a[lda*k+k];
00596                     a[lda*k+k] = t;
00597                 }
00598 
00599                 /* compute multipliers */
00600 
00601                 t = -ONE/a[lda*k+k];
00602                 dscal(n-(k+1),t,&a[lda*k+k+1],1);
00603 
00604                 /* row elimination with column indexing */
00605 
00606                 for (j = kp1; j < n; j++) {
00607                     t = a[lda*j+l];
00608                     if (l != k) {
00609                         a[lda*j+l] = a[lda*j+k];
00610                         a[lda*j+k] = t;
00611                     }
00612                     daxpy(n-(k+1),t,&a[lda*k+k+1],1,
00613                           &a[lda*j+k+1],1);
00614                 }
00615             } else {
00616                 *info = k;
00617             }
00618         }
00619     }
00620     ipvt[n-1] = n-1;
00621     if (a[lda*(n-1)+(n-1)] == ZERO) *info = n-1;
00622     return;
00623 }
00624 
00625 /*----------------------*/
00626 
00627 void dgesl(REAL a[],int lda,int n,int ipvt[],REAL b[],int job )
00628 
00629 
00630 /* We would like to declare a[][lda], but c does not allow it.  In this
00631 function, references to a[i][j] are written a[lda*i+j].  */
00632 
00633 /*
00634      dgesl solves the double precision system
00635      a * x = b  or  trans(a) * x = b
00636      using the factors computed by dgeco or dgefa.
00637 
00638      on entry
00639 
00640         a       double precision[n][lda]
00641                 the output from dgeco or dgefa.
00642 
00643         lda     integer
00644                 the leading dimension of the array  a .
00645 
00646         n       integer
00647                 the order of the matrix  a .
00648 
00649         ipvt    integer[n]
00650                 the pivot vector from dgeco or dgefa.
00651 
00652         b       double precision[n]
00653                 the right hand side vector.
00654 
00655         job     integer
00656                 = 0         to solve  a*x = b ,
00657                 = nonzero   to solve  trans(a)*x = b  where
00658                             trans(a)  is the transpose.
00659 
00660     on return
00661 
00662         b       the solution vector  x .
00663 
00664      error condition
00665 
00666         a division by zero will occur if the input factor contains a
00667         zero on the diagonal.  technically this indicates singularity
00668         but it is often caused by improper arguments or improper
00669         setting of lda .  it will not occur if the subroutines are
00670         called correctly and if dgeco has set rcond .gt. 0.0
00671         or dgefa has set info .eq. 0 .
00672 
00673      to compute  inverse(a) * c  where  c  is a matrix
00674      with  p  columns
00675            dgeco(a,lda,n,ipvt,rcond,z)
00676            if (!rcond is too small){
00677                 for (j=0,j<p,j++)
00678                         dgesl(a,lda,n,ipvt,c[j][0],0);
00679            }
00680 
00681      linpack. this version dated 08/14/78 .
00682      cleve moler, university of new mexico, argonne national lab.
00683 
00684      functions
00685 
00686      blas daxpy,ddot
00687 */
00688 {
00689     /*     internal variables       */
00690 
00691     REAL t;
00692     int k,kb,l,nm1;
00693 
00694     nm1 = n - 1;
00695     if (job == 0) {
00696 
00697         /* job = 0 , solve  a * x = b
00698            first solve  l*y = b         */
00699 
00700         if (nm1 >= 1) {
00701             for (k = 0; k < nm1; k++) {
00702                 l = ipvt[k];
00703                 t = b[l];
00704                 if (l != k) {
00705                     b[l] = b[k];
00706                     b[k] = t;
00707                 }
00708                 daxpy(n-(k+1),t,&a[lda*k+k+1],1,&b[k+1],1 );
00709             }
00710         }
00711 
00712         /* now solve  u*x = y */
00713 
00714         for (kb = 0; kb < n; kb++) {
00715             k = n - (kb + 1);
00716             b[k] = b[k]/a[lda*k+k];
00717             t = -b[k];
00718             daxpy(k,t,&a[lda*k+0],1,&b[0],1 );
00719         }
00720     } else {
00721 
00722         /* job = nonzero, solve  trans(a) * x = b
00723            first solve  trans(u)*y = b                  */
00724 
00725         for (k = 0; k < n; k++) {
00726             t = ddot(k,&a[lda*k+0],1,&b[0],1);
00727             b[k] = (b[k] - t)/a[lda*k+k];
00728         }
00729 
00730         /* now solve trans(l)*x = y     */
00731 
00732         if (nm1 >= 1) {
00733             for (kb = 1; kb < nm1; kb++) {
00734                 k = n - (kb+1);
00735                 b[k] = b[k] + ddot(n-(k+1),&a[lda*k+k+1],1,&b[k+1],1);
00736                 l = ipvt[k];
00737                 if (l != k) {
00738                     t = b[l];
00739                     b[l] = b[k];
00740                     b[k] = t;
00741                 }
00742             }
00743         }
00744     }
00745     return;
00746 }
00747 
00748 /*----------------------*/
00749 
00750 void daxpy(int n, REAL da, REAL dx[], int incx, REAL dy[], int incy)
00751 /*
00752      constant times a vector plus a vector.
00753      jack dongarra, linpack, 3/11/78.
00754 */
00755 
00756 {
00757     int i,ix,iy,m,mp1;
00758 
00759     mp1 = 0;
00760     m = 0;
00761 
00762     if(n <= 0) return;
00763     if (da == ZERO) return;
00764 
00765     if(incx != 1 || incy != 1) {
00766 
00767         /* code for unequal increments or equal increments
00768            not equal to 1                                       */
00769 
00770         ix = 0;
00771         iy = 0;
00772         if(incx < 0) ix = (-n+1)*incx;
00773         if(incy < 0)iy = (-n+1)*incy;
00774         for (i = 0; i < n; i++) {
00775             dy[iy] = dy[iy] + da*dx[ix];
00776             ix = ix + incx;
00777             iy = iy + incy;
00778 
00779         }
00780         return;
00781     }
00782 
00783     /* code for both increments equal to 1 */
00784 
00785 
00786 #ifdef ROLL
00787 
00788     for (i = 0; i < n; i++) {
00789         dy[i] = dy[i] + da*dx[i];
00790     }
00791 
00792 
00793 #endif
00794 
00795 #ifdef UNROLL
00796 
00797     m = n % 4;
00798     if ( m != 0) {
00799         for (i = 0; i < m; i++)
00800             dy[i] = dy[i] + da*dx[i];
00801 
00802         if (n < 4) return;
00803     }
00804     for (i = m; i < n; i = i + 4) {
00805         dy[i] = dy[i] + da*dx[i];
00806         dy[i+1] = dy[i+1] + da*dx[i+1];
00807         dy[i+2] = dy[i+2] + da*dx[i+2];
00808         dy[i+3] = dy[i+3] + da*dx[i+3];
00809 
00810     }
00811 
00812 #endif
00813     return;
00814 }
00815 
00816 /*----------------------*/
00817 
00818 REAL ddot(int n, REAL dx[], int incx, REAL dy[], int incy)
00819 /*
00820      forms the dot product of two vectors.
00821      jack dongarra, linpack, 3/11/78.
00822 */
00823 
00824 {
00825     REAL dtemp;
00826     int i,ix,iy,m,mp1;
00827 
00828     mp1 = 0;
00829     m = 0;
00830 
00831     dtemp = ZERO;
00832 
00833     if(n <= 0) return(ZERO);
00834 
00835     if(incx != 1 || incy != 1) {
00836 
00837         /* code for unequal increments or equal increments
00838            not equal to 1                                       */
00839 
00840         ix = 0;
00841         iy = 0;
00842         if (incx < 0) ix = (-n+1)*incx;
00843         if (incy < 0) iy = (-n+1)*incy;
00844         for (i = 0; i < n; i++) {
00845             dtemp = dtemp + dx[ix]*dy[iy];
00846             ix = ix + incx;
00847             iy = iy + incy;
00848 
00849         }
00850         return(dtemp);
00851     }
00852 
00853     /* code for both increments equal to 1 */
00854 
00855 
00856 #ifdef ROLL
00857 
00858     for (i=0; i < n; i++)
00859         dtemp = dtemp + dx[i]*dy[i];
00860 
00861     return(dtemp);
00862 
00863 #endif
00864 
00865 #ifdef UNROLL
00866 
00867 
00868     m = n % 5;
00869     if (m != 0) {
00870         for (i = 0; i < m; i++)
00871             dtemp = dtemp + dx[i]*dy[i];
00872         if (n < 5) return(dtemp);
00873     }
00874     for (i = m; i < n; i = i + 5) {
00875         dtemp = dtemp + dx[i]*dy[i] +
00876                 dx[i+1]*dy[i+1] + dx[i+2]*dy[i+2] +
00877                 dx[i+3]*dy[i+3] + dx[i+4]*dy[i+4];
00878     }
00879     return(dtemp);
00880 
00881 #endif
00882 
00883 }
00884 
00885 /*----------------------*/
00886 void dscal(int n, REAL da, REAL dx[], int incx)
00887 
00888 /*     scales a vector by a constant.
00889       jack dongarra, linpack, 3/11/78.
00890 */
00891 
00892 {
00893     int i,m,mp1,nincx;
00894 
00895     mp1 = 0;
00896     m = 0;
00897 
00898     if(n <= 0)return;
00899     if(incx != 1) {
00900 
00901         /* code for increment not equal to 1 */
00902 
00903         nincx = n*incx;
00904         for (i = 0; i < nincx; i = i + incx)
00905             dx[i] = da*dx[i];
00906 
00907         return;
00908     }
00909 
00910     /* code for increment equal to 1 */
00911 
00912 
00913 #ifdef ROLL
00914 
00915     for (i = 0; i < n; i++)
00916         dx[i] = da*dx[i];
00917 
00918 
00919 #endif
00920 
00921 #ifdef UNROLL
00922 
00923 
00924     m = n % 5;
00925     if (m != 0) {
00926         for (i = 0; i < m; i++)
00927             dx[i] = da*dx[i];
00928         if (n < 5) return;
00929     }
00930     for (i = m; i < n; i = i + 5) {
00931         dx[i] = da*dx[i];
00932         dx[i+1] = da*dx[i+1];
00933         dx[i+2] = da*dx[i+2];
00934         dx[i+3] = da*dx[i+3];
00935         dx[i+4] = da*dx[i+4];
00936     }
00937 
00938 #endif
00939 
00940 }
00941 
00942 /*----------------------*/
00943 int idamax(int n, REAL dx[], int incx)
00944 
00945 /*
00946      finds the index of element having max. absolute value.
00947      jack dongarra, linpack, 3/11/78.
00948 */
00949 
00950 
00951 {
00952     REAL dmax;
00953     int i, ix, itemp;
00954 
00955     if( n < 1 ) return(-1);
00956     if(n ==1 ) return(0);
00957     if(incx != 1) {
00958 
00959         /* code for increment not equal to 1 */
00960 
00961         ix = 1;
00962         dmax = fabs((double)dx[0]);
00963         ix = ix + incx;
00964         for (i = 1; i < n; i++) {
00965             if(fabs((double)dx[ix]) > dmax)  {
00966                 itemp = i;
00967                 dmax = fabs((double)dx[ix]);
00968             }
00969             ix = ix + incx;
00970         }
00971     } else {
00972 
00973         /* code for increment equal to 1 */
00974 
00975         itemp = 0;
00976         dmax = fabs((double)dx[0]);
00977         for (i = 1; i < n; i++) {
00978             if(fabs((double)dx[i]) > dmax) {
00979                 itemp = i;
00980                 dmax = fabs((double)dx[i]);
00981             }
00982         }
00983     }
00984     return (itemp);
00985 }
00986 
00987 /*----------------------*/
00988 REAL epslon (REAL x)
00989 
00990 /*
00991      estimate unit roundoff in quantities of size x.
00992 */
00993 
00994 {
00995     REAL a,b,c,eps;
00996     /*
00997          this program should function properly on all systems
00998          satisfying the following two assumptions,
00999             1.  the base used in representing dfloating point
01000                 numbers is not a power of three.
01001             2.  the quantity  a  in statement 10 is represented to
01002                 the accuracy used in dfloating point variables
01003                 that are stored in memory.
01004          the statement number 10 and the go to 10 are intended to
01005          force optimizing compilers to generate code satisfying
01006          assumption 2.
01007          under these assumptions, it should be true that,
01008                 a  is not exactly equal to four-thirds,
01009                 b  has a zero for its last bit or digit,
01010                 c  is not exactly equal to one,
01011                 eps  measures the separation of 1.0 from
01012                      the next larger dfloating point number.
01013          the developers of eispack would appreciate being informed
01014          about any systems where these assumptions do not hold.
01015 
01016          *****************************************************************
01017          this routine is one of the auxiliary routines used by eispack iii
01018          to avoid machine dependencies.
01019          *****************************************************************
01020 
01021          this version dated 4/6/83.
01022     */
01023 
01024     a = 4.0e0/3.0e0;
01025     eps = ZERO;
01026     while (eps == ZERO) {
01027         b = a - ONE;
01028         c = b + b + b;
01029         eps = fabs((double)(c-ONE));
01030     }
01031     return(eps*fabs((double)x));
01032 }
01033 
01034 /*----------------------*/
01035 void dmxpy (int n1, REAL y[], int n2, int ldm, REAL x[], REAL m[])
01036 
01037 
01038 /* We would like to declare m[][ldm], but c does not allow it.  In this
01039 function, references to m[i][j] are written m[ldm*i+j].  */
01040 
01041 /*
01042    purpose:
01043      multiply matrix m times vector x and add the result to vector y.
01044 
01045    parameters:
01046 
01047      n1 integer, number of elements in vector y, and number of rows in
01048          matrix m
01049 
01050      y double [n1], vector of length n1 to which is added
01051          the product m*x
01052 
01053      n2 integer, number of elements in vector x, and number of columns
01054          in matrix m
01055 
01056      ldm integer, leading dimension of array m
01057 
01058      x double [n2], vector of length n2
01059 
01060      m double [ldm][n2], matrix of n1 rows and n2 columns
01061 
01062  ----------------------------------------------------------------------
01063 */
01064 {
01065     int j,i,jmin;
01066     /* cleanup odd vector */
01067 
01068     j = n2 % 2;
01069     if (j >= 1) {
01070         j = j - 1;
01071         for (i = 0; i < n1; i++)
01072             y[i] = (y[i]) + x[j]*m[ldm*j+i];
01073     }
01074 
01075     /* cleanup odd group of two vectors */
01076 
01077     j = n2 % 4;
01078     if (j >= 2) {
01079         j = j - 1;
01080         for (i = 0; i < n1; i++)
01081             y[i] = ( (y[i])
01082                      + x[j-1]*m[ldm*(j-1)+i]) + x[j]*m[ldm*j+i];
01083     }
01084 
01085     /* cleanup odd group of four vectors */
01086 
01087     j = n2 % 8;
01088     if (j >= 4) {
01089         j = j - 1;
01090         for (i = 0; i < n1; i++)
01091             y[i] = ((( (y[i])
01092                        + x[j-3]*m[ldm*(j-3)+i])
01093                      + x[j-2]*m[ldm*(j-2)+i])
01094                     + x[j-1]*m[ldm*(j-1)+i]) + x[j]*m[ldm*j+i];
01095     }
01096 
01097     /* cleanup odd group of eight vectors */
01098 
01099     j = n2 % 16;
01100     if (j >= 8) {
01101         j = j - 1;
01102         for (i = 0; i < n1; i++)
01103             y[i] = ((((((( (y[i])
01104                            + x[j-7]*m[ldm*(j-7)+i]) + x[j-6]*m[ldm*(j-6)+i])
01105                         + x[j-5]*m[ldm*(j-5)+i]) + x[j-4]*m[ldm*(j-4)+i])
01106                       + x[j-3]*m[ldm*(j-3)+i]) + x[j-2]*m[ldm*(j-2)+i])
01107                     + x[j-1]*m[ldm*(j-1)+i]) + x[j]  *m[ldm*j+i];
01108     }
01109 
01110     /* main loop - groups of sixteen vectors */
01111 
01112     jmin = (n2%16)+16;
01113     for (j = jmin-1; j < n2; j = j + 16) {
01114         for (i = 0; i < n1; i++)
01115             y[i] = ((((((((((((((( (y[i])
01116                                    + x[j-15]*m[ldm*(j-15)+i])
01117                                  + x[j-14]*m[ldm*(j-14)+i])
01118                                 + x[j-13]*m[ldm*(j-13)+i])
01119                                + x[j-12]*m[ldm*(j-12)+i])
01120                               + x[j-11]*m[ldm*(j-11)+i])
01121                              + x[j-10]*m[ldm*(j-10)+i])
01122                             + x[j- 9]*m[ldm*(j- 9)+i])
01123                            + x[j- 8]*m[ldm*(j- 8)+i])
01124                           + x[j- 7]*m[ldm*(j- 7)+i])
01125                          + x[j- 6]*m[ldm*(j- 6)+i])
01126                         + x[j- 5]*m[ldm*(j- 5)+i])
01127                        + x[j- 4]*m[ldm*(j- 4)+i])
01128                       + x[j- 3]*m[ldm*(j- 3)+i])
01129                      + x[j- 2]*m[ldm*(j- 2)+i])
01130                     + x[j- 1]*m[ldm*(j- 1)+i])
01131                    + x[j]   *m[ldm*j+i];
01132     }
01133     return;
01134 }
01135 //*----------------------------------------
01136 int main()
01137 {
01138     pc.baud(9600);
01139     while(1) {
01140 
01141         pc.printf("Starting benchmark...\n");
01142 
01143         do_benchmark();
01144 
01145         pc.printf(" kraj \n\n");
01146     }
01147 }