Linpack port to ARM MCU from Arduino IDE and roylongbottom.org ARM based source

Dependencies:   mbed

Committer:
JovanEps
Date:
Wed May 24 19:23:32 2017 +0000
Revision:
12:623ce727d4fa
Parent:
11:4e700bbf93d7
Alpha - RC

Who changed what in which revision?

UserRevisionLine numberNew contents of line
JovanEps 0:43b96e9650ef 1 //********************************************************
JovanEps 11:4e700bbf93d7 2 //** RC Apha --- based on roylongbottom.org ARM ******
JovanEps 11:4e700bbf93d7 3 //** Nucleo-64-144 Stm32F103 to F767 benchmark ******
JovanEps 11:4e700bbf93d7 4 //** Limpack -port form Arduino IDE ******
JovanEps 11:4e700bbf93d7 5 //** Jovan Ivkovic - 2017 ******
JovanEps 0:43b96e9650ef 6 //********************************************************
JovanEps 11:4e700bbf93d7 7 #define GCCARMDP
JovanEps 11:4e700bbf93d7 8
JovanEps 11:4e700bbf93d7 9 #define UNROLL
JovanEps 11:4e700bbf93d7 10 #define DP
JovanEps 11:4e700bbf93d7 11
JovanEps 11:4e700bbf93d7 12 #ifdef SP
JovanEps 11:4e700bbf93d7 13 #define REAL float
JovanEps 11:4e700bbf93d7 14 #define ZERO 0.0
JovanEps 11:4e700bbf93d7 15 #define ONE 1.0
JovanEps 11:4e700bbf93d7 16 #define PREC "Single"
JovanEps 11:4e700bbf93d7 17 #endif
JovanEps 11:4e700bbf93d7 18
JovanEps 11:4e700bbf93d7 19 #ifdef DP
JovanEps 11:4e700bbf93d7 20 #define REAL double
JovanEps 11:4e700bbf93d7 21 #define ZERO 0.0e0
JovanEps 11:4e700bbf93d7 22 #define ONE 1.0e0
JovanEps 11:4e700bbf93d7 23 #define PREC "Double"
JovanEps 11:4e700bbf93d7 24 #endif
JovanEps 11:4e700bbf93d7 25
JovanEps 11:4e700bbf93d7 26 #ifdef ROLL
JovanEps 11:4e700bbf93d7 27 #define ROLLING "Rolled"
JovanEps 11:4e700bbf93d7 28 #endif
JovanEps 11:4e700bbf93d7 29 #ifdef UNROLL
JovanEps 11:4e700bbf93d7 30 #define ROLLING "Unrolled"
JovanEps 11:4e700bbf93d7 31 #endif
JovanEps 11:4e700bbf93d7 32
JovanEps 11:4e700bbf93d7 33 // VERSION
JovanEps 11:4e700bbf93d7 34
JovanEps 11:4e700bbf93d7 35 #ifdef CNNT
JovanEps 11:4e700bbf93d7 36 #define options "Non-optimised"
JovanEps 11:4e700bbf93d7 37 #define opt "0"
JovanEps 11:4e700bbf93d7 38 #else
JovanEps 11:4e700bbf93d7 39 // #define options "Optimised"
JovanEps 11:4e700bbf93d7 40 // #define options "Opt 3 32 Bit"
JovanEps 11:4e700bbf93d7 41 #define options "vfpv4 32 Bit"
JovanEps 11:4e700bbf93d7 42 #define opt "1"
JovanEps 11:4e700bbf93d7 43 #endif
JovanEps 11:4e700bbf93d7 44
JovanEps 11:4e700bbf93d7 45 #define NTIMES 10
JovanEps 11:4e700bbf93d7 46
JovanEps 0:43b96e9650ef 47 #include "mbed.h"
JovanEps 2:273153e44338 48
JovanEps 11:4e700bbf93d7 49
JovanEps 11:4e700bbf93d7 50 //---------------------------------
JovanEps 11:4e700bbf93d7 51 DigitalOut myled(LED1);
JovanEps 11:4e700bbf93d7 52 Serial pc(USBTX, USBRX); //USB is out of oreder on Embeded-Pi
JovanEps 11:4e700bbf93d7 53 //Serial pc(PC_10,PC_11); //RX-TX D0,D1 Embeded-PI ports
JovanEps 11:4e700bbf93d7 54 Timer timer;
JovanEps 11:4e700bbf93d7 55 //--------------------------------
JovanEps 11:4e700bbf93d7 56
JovanEps 11:4e700bbf93d7 57 void print_time (int row);
JovanEps 11:4e700bbf93d7 58 void matgen (REAL a[], int lda, int n, REAL b[], REAL *norma);
JovanEps 11:4e700bbf93d7 59 void dgefa (REAL a[], int lda, int n, int ipvt[], int *info);
JovanEps 11:4e700bbf93d7 60 void dgesl (REAL a[],int lda,int n,int ipvt[],REAL b[],int job);
JovanEps 11:4e700bbf93d7 61 void dmxpy (int n1, REAL y[], int n2, int ldm, REAL x[], REAL m[]);
JovanEps 11:4e700bbf93d7 62 void daxpy (int n, REAL da, REAL dx[], int incx, REAL dy[], int incy);
JovanEps 11:4e700bbf93d7 63 REAL epslon (REAL x);
JovanEps 11:4e700bbf93d7 64 int idamax (int n, REAL dx[], int incx);
JovanEps 11:4e700bbf93d7 65 void dscal (int n, REAL da, REAL dx[], int incx);
JovanEps 11:4e700bbf93d7 66 REAL ddot (int n, REAL dx[], int incx, REAL dy[], int incy);
JovanEps 11:4e700bbf93d7 67
JovanEps 11:4e700bbf93d7 68 static REAL atime[9][15];
JovanEps 11:4e700bbf93d7 69 double runSecs = 1;
JovanEps 11:4e700bbf93d7 70
JovanEps 11:4e700bbf93d7 71 void do_benchmark(void){
JovanEps 11:4e700bbf93d7 72
JovanEps 11:4e700bbf93d7 73 float t1,t2 = 0.0;
JovanEps 12:623ce727d4fa 74 static REAL aa[40*40],a[40*41],b[40],x[40]; // nxn, n*n+1, n,n,
JovanEps 11:4e700bbf93d7 75 REAL cray,ops,total,norma,normx;
JovanEps 11:4e700bbf93d7 76 REAL resid,residn,eps,tm2,epsn,x1,x2;
JovanEps 11:4e700bbf93d7 77 REAL mflops;
JovanEps 11:4e700bbf93d7 78 static int ipvt[21],n,i,j,ntimes,info,lda,ldaa;
JovanEps 11:4e700bbf93d7 79 int endit, pass, loop;
JovanEps 11:4e700bbf93d7 80 REAL overhead1, overhead2, time2;
JovanEps 11:4e700bbf93d7 81 REAL max1, max2;
JovanEps 11:4e700bbf93d7 82 char was[5][20];
JovanEps 11:4e700bbf93d7 83 char expect[5][20];
JovanEps 11:4e700bbf93d7 84 char title[5][20];
JovanEps 11:4e700bbf93d7 85 int errors;
JovanEps 11:4e700bbf93d7 86 int nopause = 1;
JovanEps 11:4e700bbf93d7 87
JovanEps 11:4e700bbf93d7 88 /*
JovanEps 11:4e700bbf93d7 89 if (argc > 1) {
JovanEps 11:4e700bbf93d7 90 switch (argv[1][0]) {
JovanEps 11:4e700bbf93d7 91 case 'N':
JovanEps 11:4e700bbf93d7 92 nopause = 0;
JovanEps 11:4e700bbf93d7 93 break;
JovanEps 11:4e700bbf93d7 94 case 'n':
JovanEps 11:4e700bbf93d7 95 nopause = 0;
JovanEps 11:4e700bbf93d7 96 break;
JovanEps 11:4e700bbf93d7 97 }
JovanEps 11:4e700bbf93d7 98 }
JovanEps 11:4e700bbf93d7 99 */
JovanEps 11:4e700bbf93d7 100 pc.printf("\n");
JovanEps 11:4e700bbf93d7 101
JovanEps 11:4e700bbf93d7 102
JovanEps 11:4e700bbf93d7 103 // outfile = fopen("Linpack.txt","a+");
JovanEps 11:4e700bbf93d7 104 // if (outfile == NULL)
JovanEps 11:4e700bbf93d7 105 // {
JovanEps 11:4e700bbf93d7 106 // printf (" Cannot open results file \n\n");
JovanEps 11:4e700bbf93d7 107 // printf(" Press Enter\n\n");
JovanEps 11:4e700bbf93d7 108 // int g = getchar();
JovanEps 11:4e700bbf93d7 109 // exit (0);
JovanEps 11:4e700bbf93d7 110 //}
JovanEps 11:4e700bbf93d7 111
JovanEps 11:4e700bbf93d7 112
JovanEps 11:4e700bbf93d7 113
JovanEps 11:4e700bbf93d7 114 #ifdef GCCARMDP
JovanEps 11:4e700bbf93d7 115 pc.printf(expect[0], " 1.7");
JovanEps 11:4e700bbf93d7 116 pc.printf(expect[1], " 7.41628980e-14");
JovanEps 11:4e700bbf93d7 117 pc.printf(expect[2], " 2.22044605e-16");
JovanEps 11:4e700bbf93d7 118 pc.printf(expect[3], " -1.49880108e-14");
JovanEps 11:4e700bbf93d7 119 pc.printf(expect[4], " -1.89848137e-14");
JovanEps 11:4e700bbf93d7 120 #endif
JovanEps 11:4e700bbf93d7 121
JovanEps 11:4e700bbf93d7 122 #ifdef GCCARMSP
JovanEps 11:4e700bbf93d7 123 pc.printf(expect[0], " 1.6");
JovanEps 11:4e700bbf93d7 124 pc.printf(expect[1], " 3.80277634e-05");
JovanEps 11:4e700bbf93d7 125 pc.printf(expect[2], " 1.19209290e-07");
JovanEps 11:4e700bbf93d7 126 pc.printf(expect[3], " -1.38282776e-05");
JovanEps 11:4e700bbf93d7 127 pc.printf(expect[4], " -7.51018524e-06");
JovanEps 11:4e700bbf93d7 128 #endif
JovanEps 11:4e700bbf93d7 129
JovanEps 12:623ce727d4fa 130 lda = 41; //n+1
JovanEps 12:623ce727d4fa 131 ldaa = 40; //n
JovanEps 11:4e700bbf93d7 132 cray = .056;
JovanEps 12:623ce727d4fa 133 n = 40; // 40 For Nucleo F7
JovanEps 11:4e700bbf93d7 134 pc.printf("----------------------------------------------\n");
JovanEps 11:4e700bbf93d7 135 pc.printf("%s ", ROLLING);
JovanEps 11:4e700bbf93d7 136 pc.printf("%s ", PREC);
JovanEps 11:4e700bbf93d7 137 pc.printf("Precision Linpack Benchmark - Linux Version in 'C/C++'\n\n");
JovanEps 11:4e700bbf93d7 138
JovanEps 11:4e700bbf93d7 139 pc.printf("Optimisation %s\n\n",options);
JovanEps 11:4e700bbf93d7 140
JovanEps 11:4e700bbf93d7 141 ops = (2.0e0*(n*n*n))/3.0 + 2.0*(n*n);
JovanEps 11:4e700bbf93d7 142
JovanEps 11:4e700bbf93d7 143 matgen(a,lda,n,b,&norma);
JovanEps 11:4e700bbf93d7 144 timer.start();
JovanEps 11:4e700bbf93d7 145 //start_time();
JovanEps 11:4e700bbf93d7 146 t1 = timer.read();
JovanEps 11:4e700bbf93d7 147 dgefa(a,lda,n,ipvt,&info);
JovanEps 11:4e700bbf93d7 148 //end_time();
JovanEps 11:4e700bbf93d7 149 t2 = timer.read();
JovanEps 11:4e700bbf93d7 150 timer.stop();
JovanEps 11:4e700bbf93d7 151 atime[0][0] = t2-t1; //secs
JovanEps 11:4e700bbf93d7 152
JovanEps 11:4e700bbf93d7 153 timer.reset();
JovanEps 11:4e700bbf93d7 154 timer.start();
JovanEps 11:4e700bbf93d7 155 t1 = timer.read();
JovanEps 11:4e700bbf93d7 156 dgesl(a,lda,n,ipvt,b,0);
JovanEps 11:4e700bbf93d7 157 t2 = timer.read();
JovanEps 11:4e700bbf93d7 158 timer.stop();
JovanEps 11:4e700bbf93d7 159 atime[1][0] = t2-t1; //secs
JovanEps 11:4e700bbf93d7 160 total = atime[0][0] + atime[1][0];
JovanEps 11:4e700bbf93d7 161
JovanEps 11:4e700bbf93d7 162 /* compute a residual to verify results. */
JovanEps 11:4e700bbf93d7 163
JovanEps 11:4e700bbf93d7 164 for (i = 0; i < n; i++) {
JovanEps 11:4e700bbf93d7 165 x[i] = b[i];
JovanEps 11:4e700bbf93d7 166 }
JovanEps 11:4e700bbf93d7 167 matgen(a,lda,n,b,&norma);
JovanEps 11:4e700bbf93d7 168 for (i = 0; i < n; i++) {
JovanEps 11:4e700bbf93d7 169 b[i] = -b[i];
JovanEps 11:4e700bbf93d7 170 }
JovanEps 11:4e700bbf93d7 171 dmxpy(n,b,n,lda,x,a);
JovanEps 11:4e700bbf93d7 172 resid = 0.0;
JovanEps 11:4e700bbf93d7 173 normx = 0.0;
JovanEps 11:4e700bbf93d7 174 for (i = 0; i < n; i++) {
JovanEps 11:4e700bbf93d7 175 resid = (resid > fabs((double)b[i]))
JovanEps 11:4e700bbf93d7 176 ? resid : fabs((double)b[i]);
JovanEps 11:4e700bbf93d7 177 normx = (normx > fabs((double)x[i]))
JovanEps 11:4e700bbf93d7 178 ? normx : fabs((double)x[i]);
JovanEps 11:4e700bbf93d7 179 }
JovanEps 11:4e700bbf93d7 180 eps = epslon(ONE);
JovanEps 11:4e700bbf93d7 181 residn = resid/( n*norma*normx*eps );
JovanEps 11:4e700bbf93d7 182 epsn = eps;
JovanEps 11:4e700bbf93d7 183 x1 = x[0] - 1;
JovanEps 11:4e700bbf93d7 184 x2 = x[n-1] - 1;
JovanEps 11:4e700bbf93d7 185
JovanEps 11:4e700bbf93d7 186 pc.printf("norm resid resid machep");
JovanEps 11:4e700bbf93d7 187 pc.printf(" x[0]-1 x[n-1]-1\n");
JovanEps 11:4e700bbf93d7 188 pc.printf("%6.1f %17.8e%17.8e%17.8e%17.8e\n\n",
JovanEps 11:4e700bbf93d7 189 (double)residn, (double)resid, (double)epsn,
JovanEps 11:4e700bbf93d7 190 (double)x1, (double)x2);
JovanEps 11:4e700bbf93d7 191
JovanEps 11:4e700bbf93d7 192 pc.printf("Times are reported for matrices of order %5d\n",n);
JovanEps 11:4e700bbf93d7 193 pc.printf("1 pass times for array with leading dimension of%5d\n\n",lda);
JovanEps 11:4e700bbf93d7 194 pc.printf(" dgefa dgesl total Mflops unit");
JovanEps 11:4e700bbf93d7 195 pc.printf(" ratio\n");
JovanEps 11:4e700bbf93d7 196
JovanEps 11:4e700bbf93d7 197 atime[2][0] = total;
JovanEps 11:4e700bbf93d7 198 if (total > 0.0) {
JovanEps 11:4e700bbf93d7 199 atime[3][0] = ops/(1.0e6*total);
JovanEps 11:4e700bbf93d7 200 atime[4][0] = 2.0/atime[3][0];
JovanEps 11:4e700bbf93d7 201 } else {
JovanEps 11:4e700bbf93d7 202 atime[3][0] = 0.0;
JovanEps 11:4e700bbf93d7 203 atime[4][0] = 0.0;
JovanEps 11:4e700bbf93d7 204 }
JovanEps 11:4e700bbf93d7 205 atime[5][0] = total/cray;
JovanEps 11:4e700bbf93d7 206
JovanEps 11:4e700bbf93d7 207 print_time(0);
JovanEps 11:4e700bbf93d7 208
JovanEps 11:4e700bbf93d7 209 /************************************************************************
JovanEps 11:4e700bbf93d7 210 * Calculate overhead of executing matgen procedure *
JovanEps 11:4e700bbf93d7 211 ************************************************************************/
JovanEps 11:4e700bbf93d7 212
JovanEps 11:4e700bbf93d7 213 pc.printf ("\nCalculating matgen overhead\n");
JovanEps 11:4e700bbf93d7 214 pass = -20;
JovanEps 11:4e700bbf93d7 215 loop = NTIMES;
JovanEps 11:4e700bbf93d7 216 do {
JovanEps 11:4e700bbf93d7 217 timer.start();
JovanEps 11:4e700bbf93d7 218 t1 = timer.read();
JovanEps 11:4e700bbf93d7 219 pass = pass + 1;
JovanEps 11:4e700bbf93d7 220 for ( i = 0 ; i < loop ; i++) {
JovanEps 11:4e700bbf93d7 221 matgen(a,lda,n,b,&norma);
JovanEps 11:4e700bbf93d7 222 }
JovanEps 11:4e700bbf93d7 223 t2 = timer.read();
JovanEps 11:4e700bbf93d7 224 timer.stop();
JovanEps 11:4e700bbf93d7 225 overhead1 = t2-t1; //sec
JovanEps 12:623ce727d4fa 226 pc.printf ("%10d times %6.4f seconds\n", loop, overhead1);
JovanEps 11:4e700bbf93d7 227 if (overhead1 > runSecs) {
JovanEps 11:4e700bbf93d7 228 pass = 0;
JovanEps 11:4e700bbf93d7 229 }
JovanEps 11:4e700bbf93d7 230 if (pass < 0) {
JovanEps 11:4e700bbf93d7 231 if (overhead1 < 0.1) {
JovanEps 11:4e700bbf93d7 232 loop = loop * 10;
JovanEps 11:4e700bbf93d7 233 } else {
JovanEps 11:4e700bbf93d7 234 loop = loop * 2;
JovanEps 11:4e700bbf93d7 235 }
JovanEps 11:4e700bbf93d7 236 }
JovanEps 11:4e700bbf93d7 237 } while (pass < 0);
JovanEps 11:4e700bbf93d7 238
JovanEps 11:4e700bbf93d7 239 overhead1 = overhead1 / (double)loop;
JovanEps 11:4e700bbf93d7 240
JovanEps 11:4e700bbf93d7 241 pc.printf("Overhead for 1 matgen %12.5f seconds\n\n", overhead1);
JovanEps 11:4e700bbf93d7 242
JovanEps 11:4e700bbf93d7 243 /************************************************************************
JovanEps 11:4e700bbf93d7 244 * Calculate matgen/dgefa passes for runSecs seconds *
JovanEps 11:4e700bbf93d7 245 ************************************************************************/
JovanEps 11:4e700bbf93d7 246
JovanEps 11:4e700bbf93d7 247 pc.printf ("Calculating matgen/dgefa passes for %d seconds\n", (int)runSecs);
JovanEps 11:4e700bbf93d7 248 pass = -20;
JovanEps 11:4e700bbf93d7 249 ntimes = NTIMES;
JovanEps 11:4e700bbf93d7 250 do {
JovanEps 11:4e700bbf93d7 251 timer.start();
JovanEps 11:4e700bbf93d7 252 t1 = timer.read();
JovanEps 11:4e700bbf93d7 253 pass = pass + 1;
JovanEps 11:4e700bbf93d7 254 for ( i = 0 ; i < ntimes ; i++) {
JovanEps 11:4e700bbf93d7 255 matgen(a,lda,n,b,&norma);
JovanEps 11:4e700bbf93d7 256 dgefa(a,lda,n,ipvt,&info );
JovanEps 11:4e700bbf93d7 257 }
JovanEps 11:4e700bbf93d7 258 t2 = timer.read();
JovanEps 11:4e700bbf93d7 259 timer.stop();
JovanEps 11:4e700bbf93d7 260 time2 = t2-t1; //sec
JovanEps 11:4e700bbf93d7 261
JovanEps 11:4e700bbf93d7 262 pc.printf ("%10d times %6.2f seconds\n", ntimes, time2);
JovanEps 11:4e700bbf93d7 263 if (time2 > runSecs) {
JovanEps 11:4e700bbf93d7 264 pass = 0;
JovanEps 11:4e700bbf93d7 265 }
JovanEps 11:4e700bbf93d7 266 if (pass < 0) {
JovanEps 11:4e700bbf93d7 267 if (time2 < 0.1) {
JovanEps 11:4e700bbf93d7 268 ntimes = ntimes * 10;
JovanEps 11:4e700bbf93d7 269 } else {
JovanEps 11:4e700bbf93d7 270 ntimes = ntimes * 2;
JovanEps 11:4e700bbf93d7 271 }
JovanEps 11:4e700bbf93d7 272 }
JovanEps 11:4e700bbf93d7 273 } while (pass < 0);
JovanEps 11:4e700bbf93d7 274
JovanEps 11:4e700bbf93d7 275 ntimes = (int)(runSecs * (double)ntimes / time2);
JovanEps 11:4e700bbf93d7 276 if (ntimes == 0) ntimes = 1;
JovanEps 11:4e700bbf93d7 277
JovanEps 11:4e700bbf93d7 278 pc.printf ("Passes used %10d \n\n", ntimes);
JovanEps 11:4e700bbf93d7 279 pc.printf("Times for array with leading dimension of%4d\n\n",lda);
JovanEps 11:4e700bbf93d7 280 pc.printf(" dgefa dgesl total Mflops unit");
JovanEps 11:4e700bbf93d7 281 pc.printf(" ratio\n");
JovanEps 11:4e700bbf93d7 282
JovanEps 11:4e700bbf93d7 283 /************************************************************************
JovanEps 11:4e700bbf93d7 284 * Execute 5 passes *
JovanEps 11:4e700bbf93d7 285 ************************************************************************/
JovanEps 11:4e700bbf93d7 286
JovanEps 11:4e700bbf93d7 287 tm2 = ntimes * overhead1;
JovanEps 11:4e700bbf93d7 288 atime[3][6] = 0;
JovanEps 11:4e700bbf93d7 289
JovanEps 11:4e700bbf93d7 290 for (j=1 ; j<6 ; j++) {
JovanEps 11:4e700bbf93d7 291 timer.start();
JovanEps 11:4e700bbf93d7 292 t1 = timer.read();
JovanEps 11:4e700bbf93d7 293
JovanEps 11:4e700bbf93d7 294 for (i = 0; i < ntimes; i++) {
JovanEps 11:4e700bbf93d7 295 matgen(a,lda,n,b,&norma);
JovanEps 11:4e700bbf93d7 296 dgefa(a,lda,n,ipvt,&info );
JovanEps 11:4e700bbf93d7 297 }
JovanEps 11:4e700bbf93d7 298 t2 = timer.read();
JovanEps 11:4e700bbf93d7 299 timer.stop();
JovanEps 11:4e700bbf93d7 300 atime[0][j] = ((t2-t1) - tm2)/ntimes;
JovanEps 11:4e700bbf93d7 301
JovanEps 11:4e700bbf93d7 302 timer.start();
JovanEps 11:4e700bbf93d7 303 t1 = timer.read();
JovanEps 11:4e700bbf93d7 304 for (i = 0; i < ntimes; i++) {
JovanEps 11:4e700bbf93d7 305 dgesl(a,lda,n,ipvt,b,0);
JovanEps 11:4e700bbf93d7 306 }
JovanEps 11:4e700bbf93d7 307 t2 = timer.read();
JovanEps 11:4e700bbf93d7 308 timer.stop();
JovanEps 11:4e700bbf93d7 309
JovanEps 11:4e700bbf93d7 310 atime[1][j] = (t2-t1)/ntimes;
JovanEps 11:4e700bbf93d7 311 total = atime[0][j] + atime[1][j];
JovanEps 11:4e700bbf93d7 312 atime[2][j] = total;
JovanEps 11:4e700bbf93d7 313 atime[3][j] = ops/(1.0e6*total);
JovanEps 11:4e700bbf93d7 314 atime[4][j] = 2.0/atime[3][j];
JovanEps 11:4e700bbf93d7 315 atime[5][j] = total/cray;
JovanEps 11:4e700bbf93d7 316 atime[3][6] = atime[3][6] + atime[3][j];
JovanEps 2:273153e44338 317
JovanEps 11:4e700bbf93d7 318 print_time(j);
JovanEps 11:4e700bbf93d7 319 }
JovanEps 11:4e700bbf93d7 320 atime[3][6] = atime[3][6] / 5.0;
JovanEps 12:623ce727d4fa 321 pc.printf("Average %11.4f\n",
JovanEps 11:4e700bbf93d7 322 (double)atime[3][6]);
JovanEps 11:4e700bbf93d7 323
JovanEps 11:4e700bbf93d7 324 pc.printf("\nCalculating matgen2 overhead\n");
JovanEps 11:4e700bbf93d7 325
JovanEps 11:4e700bbf93d7 326 /************************************************************************
JovanEps 11:4e700bbf93d7 327 * Calculate overhead of executing matgen procedure *
JovanEps 11:4e700bbf93d7 328 ************************************************************************/
JovanEps 11:4e700bbf93d7 329
JovanEps 11:4e700bbf93d7 330 timer.start();
JovanEps 11:4e700bbf93d7 331 t1 = timer.read();
JovanEps 11:4e700bbf93d7 332 for ( i = 0 ; i < loop ; i++) {
JovanEps 11:4e700bbf93d7 333 matgen(aa,ldaa,n,b,&norma);
JovanEps 11:4e700bbf93d7 334 }
JovanEps 11:4e700bbf93d7 335 t2 = timer.read();
JovanEps 11:4e700bbf93d7 336 timer.stop();
JovanEps 11:4e700bbf93d7 337 overhead2 = t2-t1;
JovanEps 11:4e700bbf93d7 338 overhead2 = overhead2 / (double)loop;
JovanEps 11:4e700bbf93d7 339
JovanEps 11:4e700bbf93d7 340 pc.printf("Overhead for 1 matgen %12.5f seconds\n\n", overhead2);
JovanEps 11:4e700bbf93d7 341 pc.printf("Times for array with leading dimension of%4d\n\n",ldaa);
JovanEps 11:4e700bbf93d7 342 pc.printf(" dgefa dgesl total Mflops unit");
JovanEps 11:4e700bbf93d7 343 pc.printf(" ratio\n");
JovanEps 11:4e700bbf93d7 344
JovanEps 11:4e700bbf93d7 345 /************************************************************************
JovanEps 11:4e700bbf93d7 346 * Execute 5 passes *
JovanEps 11:4e700bbf93d7 347 ************************************************************************/
JovanEps 11:4e700bbf93d7 348
JovanEps 11:4e700bbf93d7 349 tm2 = ntimes * overhead2;
JovanEps 11:4e700bbf93d7 350 atime[3][12] = 0;
JovanEps 11:4e700bbf93d7 351
JovanEps 11:4e700bbf93d7 352 for (j=7 ; j<12 ; j++) {
JovanEps 11:4e700bbf93d7 353 timer.start();
JovanEps 11:4e700bbf93d7 354 t1 = timer.read();
JovanEps 11:4e700bbf93d7 355 for (i = 0; i < ntimes; i++) {
JovanEps 11:4e700bbf93d7 356 matgen(aa,ldaa,n,b,&norma);
JovanEps 11:4e700bbf93d7 357 dgefa(aa,ldaa,n,ipvt,&info );
JovanEps 11:4e700bbf93d7 358 }
JovanEps 11:4e700bbf93d7 359 t2 = timer.read();
JovanEps 11:4e700bbf93d7 360 timer.stop();
JovanEps 11:4e700bbf93d7 361 atime[0][j] = ((t2-t1) - tm2)/ntimes;
JovanEps 11:4e700bbf93d7 362
JovanEps 11:4e700bbf93d7 363 timer.start();
JovanEps 11:4e700bbf93d7 364 t1 = timer.read();
JovanEps 11:4e700bbf93d7 365 for (i = 0; i < ntimes; i++) {
JovanEps 11:4e700bbf93d7 366 dgesl(aa,ldaa,n,ipvt,b,0);
JovanEps 11:4e700bbf93d7 367 }
JovanEps 11:4e700bbf93d7 368 t2 = timer.read();
JovanEps 11:4e700bbf93d7 369 timer.stop();
JovanEps 11:4e700bbf93d7 370 atime[1][j] = (t2-t1)/ntimes;
JovanEps 11:4e700bbf93d7 371 total = atime[0][j] + atime[1][j];
JovanEps 11:4e700bbf93d7 372 atime[2][j] = total;
JovanEps 11:4e700bbf93d7 373 atime[3][j] = ops/(1.0e6*total);
JovanEps 11:4e700bbf93d7 374 atime[4][j] = 2.0/atime[3][j];
JovanEps 11:4e700bbf93d7 375 atime[5][j] = total/cray;
JovanEps 11:4e700bbf93d7 376 atime[3][12] = atime[3][12] + atime[3][j];
JovanEps 11:4e700bbf93d7 377
JovanEps 11:4e700bbf93d7 378 print_time(j);
JovanEps 11:4e700bbf93d7 379 }
JovanEps 11:4e700bbf93d7 380 atime[3][12] = atime[3][12] / 5.0;
JovanEps 12:623ce727d4fa 381 pc.printf("Average %11.4f\n\n",
JovanEps 11:4e700bbf93d7 382 (double)atime[3][12]);
JovanEps 12:623ce727d4fa 383 pc.printf("--------------------------------------------------------\n");
JovanEps 11:4e700bbf93d7 384 pc.printf ("\nFrom File /proc/cpuinfo\n");
JovanEps 11:4e700bbf93d7 385 // pc.printf("%s\n", configdata[0]);
JovanEps 11:4e700bbf93d7 386 pc.printf ("\nFrom File /proc/version\n");
JovanEps 11:4e700bbf93d7 387 //pc.printf("%s\n", configdata[1]);
JovanEps 11:4e700bbf93d7 388
JovanEps 11:4e700bbf93d7 389 /************************************************************************
JovanEps 11:4e700bbf93d7 390 * Use minimum average as overall Mflops rating *
JovanEps 11:4e700bbf93d7 391 ************************************************************************/
JovanEps 11:4e700bbf93d7 392
JovanEps 11:4e700bbf93d7 393 mflops = atime[3][6];
JovanEps 11:4e700bbf93d7 394 if (atime[3][12] < mflops) mflops = atime[3][12];
JovanEps 11:4e700bbf93d7 395
JovanEps 11:4e700bbf93d7 396 pc.printf("\n");
JovanEps 11:4e700bbf93d7 397 pc.printf("%s ", ROLLING);
JovanEps 11:4e700bbf93d7 398 pc.printf("%s ", PREC);
JovanEps 11:4e700bbf93d7 399 pc.printf(" Precision %11.4f Mflops \n\n",mflops);
JovanEps 11:4e700bbf93d7 400
JovanEps 11:4e700bbf93d7 401 // local_time();
JovanEps 11:4e700bbf93d7 402
JovanEps 11:4e700bbf93d7 403
JovanEps 11:4e700bbf93d7 404 /************************************************************************
JovanEps 11:4e700bbf93d7 405 * Add results to output file Linpack.txt *
JovanEps 11:4e700bbf93d7 406 ************************************************************************/
JovanEps 12:623ce727d4fa 407 pc.printf (" --------------------------------------------------------\n\n");
JovanEps 11:4e700bbf93d7 408 pc.printf (" Linpack %s Precision %s Benchmark n @ 100\n", PREC, ROLLING);
JovanEps 11:4e700bbf93d7 409 //pc.printf (outfile, " Optimisation %s, %s\n", options, timeday);
JovanEps 11:4e700bbf93d7 410
JovanEps 11:4e700bbf93d7 411 max1 = 0;
JovanEps 11:4e700bbf93d7 412 for (i=1 ; i<6 ; i++) {
JovanEps 11:4e700bbf93d7 413 if (atime[3][i] > max1) max1 = atime[3][i];
JovanEps 11:4e700bbf93d7 414 }
JovanEps 11:4e700bbf93d7 415
JovanEps 11:4e700bbf93d7 416 max2 = 0;
JovanEps 11:4e700bbf93d7 417 for (i=7 ; i<12 ; i++) {
JovanEps 11:4e700bbf93d7 418 if (atime[3][i] > max2) max2 = atime[3][i];
JovanEps 11:4e700bbf93d7 419 }
JovanEps 11:4e700bbf93d7 420 if (max1 < max2) max2 = max1;
JovanEps 11:4e700bbf93d7 421
JovanEps 11:4e700bbf93d7 422 pc.printf(" Speed %10.4f MFLOPS\n\n", max2);
JovanEps 11:4e700bbf93d7 423 pc.printf(was[0], "%16.1f",(double)residn);
JovanEps 11:4e700bbf93d7 424 pc.printf(was[1], "%16.8e",(double)resid);
JovanEps 11:4e700bbf93d7 425 pc.printf(was[2], "%16.8e",(double)epsn);
JovanEps 11:4e700bbf93d7 426 pc.printf(was[3], "%16.8e",(double)x1);
JovanEps 11:4e700bbf93d7 427 pc.printf(was[4], "%16.8e",(double)x2);
JovanEps 11:4e700bbf93d7 428
JovanEps 11:4e700bbf93d7 429 pc.printf(title[0], "norm. resid");
JovanEps 11:4e700bbf93d7 430 pc.printf(title[1], "resid ");
JovanEps 11:4e700bbf93d7 431 pc.printf(title[2], "machep ");
JovanEps 11:4e700bbf93d7 432 pc.printf(title[3], "x[0]-1 ");
JovanEps 11:4e700bbf93d7 433 pc.printf(title[4], "x[n-1]-1 ");
JovanEps 11:4e700bbf93d7 434
JovanEps 11:4e700bbf93d7 435 if (strtol(opt, NULL, 10) == 0) {
JovanEps 11:4e700bbf93d7 436 pc.printf(expect[2], " 8.88178420e-016");
JovanEps 11:4e700bbf93d7 437 }
JovanEps 11:4e700bbf93d7 438 errors = 0;
JovanEps 11:4e700bbf93d7 439
JovanEps 11:4e700bbf93d7 440 for (i=0; i<5; i++) {
JovanEps 11:4e700bbf93d7 441 if (strcmp (expect[i], was[i]) != 0) {
JovanEps 11:4e700bbf93d7 442 pc.printf(" Variable %s Non-standard result was %s instead of %s\n",
JovanEps 11:4e700bbf93d7 443 title[i], was[i], expect[i]);
JovanEps 11:4e700bbf93d7 444 errors = errors + 1;
JovanEps 11:4e700bbf93d7 445 }
JovanEps 11:4e700bbf93d7 446 }
JovanEps 11:4e700bbf93d7 447 if (errors == 0) {
JovanEps 11:4e700bbf93d7 448 pc.printf(" Numeric results were as expected\n\n");
JovanEps 11:4e700bbf93d7 449 } else {
JovanEps 11:4e700bbf93d7 450 pc.printf(" Different numeric results - see linpack.txt\n\n");
JovanEps 11:4e700bbf93d7 451 pc.printf("\n Compiler #define or values in linpack.c might need to be changed\n\n");
JovanEps 11:4e700bbf93d7 452
JovanEps 11:4e700bbf93d7 453 }
JovanEps 11:4e700bbf93d7 454
JovanEps 11:4e700bbf93d7 455
JovanEps 12:623ce727d4fa 456 pc.printf (" -------------------------------------------------------------\n\n");
JovanEps 11:4e700bbf93d7 457 pc.printf("\n");
JovanEps 11:4e700bbf93d7 458 pc.printf ("SYSTEM INFORMATION\n\nFrom File /proc/cpuinfo\n");
JovanEps 11:4e700bbf93d7 459 // pc.printf (outfile, "%s \n", configdata[0]);
JovanEps 11:4e700bbf93d7 460 pc.printf ("\nFrom File /proc/version\n");
JovanEps 11:4e700bbf93d7 461 // pc.printf (outfile, "%s \n", configdata[1]);
JovanEps 11:4e700bbf93d7 462 pc.printf ("\n");
JovanEps 11:4e700bbf93d7 463
JovanEps 11:4e700bbf93d7 464 pc.printf("\n");
JovanEps 12:623ce727d4fa 465
JovanEps 12:623ce727d4fa 466 wait(1);
JovanEps 11:4e700bbf93d7 467
JovanEps 11:4e700bbf93d7 468 }
JovanEps 11:4e700bbf93d7 469
JovanEps 11:4e700bbf93d7 470 /*----------------------*/
JovanEps 11:4e700bbf93d7 471 void print_time (int row)
JovanEps 11:4e700bbf93d7 472
JovanEps 11:4e700bbf93d7 473 {
JovanEps 12:623ce727d4fa 474 pc.printf("%11.5f%11.5f%11.5f%11.4f%11.4f%11.4f\n", (double)atime[0][row],
JovanEps 11:4e700bbf93d7 475 (double)atime[1][row], (double)atime[2][row], (double)atime[3][row],
JovanEps 11:4e700bbf93d7 476 (double)atime[4][row], (double)atime[5][row]);
JovanEps 11:4e700bbf93d7 477 return;
JovanEps 11:4e700bbf93d7 478 }
JovanEps 11:4e700bbf93d7 479
JovanEps 11:4e700bbf93d7 480 /*----------------------*/
JovanEps 11:4e700bbf93d7 481
JovanEps 11:4e700bbf93d7 482 void matgen (REAL a[], int lda, int n, REAL b[], REAL *norma)
JovanEps 11:4e700bbf93d7 483
JovanEps 11:4e700bbf93d7 484
JovanEps 11:4e700bbf93d7 485 /* We would like to declare a[][lda], but c does not allow it. In this
JovanEps 11:4e700bbf93d7 486 function, references to a[i][j] are written a[lda*i+j]. */
JovanEps 11:4e700bbf93d7 487
JovanEps 11:4e700bbf93d7 488 {
JovanEps 11:4e700bbf93d7 489 int init, i, j;
JovanEps 11:4e700bbf93d7 490
JovanEps 11:4e700bbf93d7 491 init = 1325;
JovanEps 11:4e700bbf93d7 492 *norma = 0.0;
JovanEps 11:4e700bbf93d7 493 for (j = 0; j < n; j++) {
JovanEps 11:4e700bbf93d7 494 for (i = 0; i < n; i++) {
JovanEps 11:4e700bbf93d7 495 init = 3125*init % 65536;
JovanEps 11:4e700bbf93d7 496 a[lda*j+i] = (init - 32768.0)/16384.0;
JovanEps 11:4e700bbf93d7 497 *norma = (a[lda*j+i] > *norma) ? a[lda*j+i] : *norma;
JovanEps 11:4e700bbf93d7 498
JovanEps 11:4e700bbf93d7 499 /* alternative for some compilers
JovanEps 11:4e700bbf93d7 500 if (fabs(a[lda*j+i]) > *norma) *norma = fabs(a[lda*j+i]);
JovanEps 11:4e700bbf93d7 501 */
JovanEps 11:4e700bbf93d7 502 }
JovanEps 11:4e700bbf93d7 503 }
JovanEps 11:4e700bbf93d7 504 for (i = 0; i < n; i++) {
JovanEps 11:4e700bbf93d7 505 b[i] = 0.0;
JovanEps 11:4e700bbf93d7 506 }
JovanEps 11:4e700bbf93d7 507 for (j = 0; j < n; j++) {
JovanEps 11:4e700bbf93d7 508 for (i = 0; i < n; i++) {
JovanEps 11:4e700bbf93d7 509 b[i] = b[i] + a[lda*j+i];
JovanEps 11:4e700bbf93d7 510 }
JovanEps 11:4e700bbf93d7 511 }
JovanEps 11:4e700bbf93d7 512 return;
JovanEps 11:4e700bbf93d7 513 }
JovanEps 11:4e700bbf93d7 514
JovanEps 11:4e700bbf93d7 515 /*----------------------*/
JovanEps 11:4e700bbf93d7 516 void dgefa(REAL a[], int lda, int n, int ipvt[], int *info)
JovanEps 11:4e700bbf93d7 517
JovanEps 11:4e700bbf93d7 518
JovanEps 11:4e700bbf93d7 519 /* We would like to declare a[][lda], but c does not allow it. In this
JovanEps 11:4e700bbf93d7 520 function, references to a[i][j] are written a[lda*i+j]. */
JovanEps 11:4e700bbf93d7 521 /*
JovanEps 11:4e700bbf93d7 522 dgefa factors a double precision matrix by gaussian elimination.
JovanEps 11:4e700bbf93d7 523
JovanEps 11:4e700bbf93d7 524 dgefa is usually called by dgeco, but it can be called
JovanEps 11:4e700bbf93d7 525 directly with a saving in time if rcond is not needed.
JovanEps 11:4e700bbf93d7 526 (time for dgeco) = (1 + 9/n)*(time for dgefa) .
JovanEps 11:4e700bbf93d7 527
JovanEps 11:4e700bbf93d7 528 on entry
JovanEps 11:4e700bbf93d7 529
JovanEps 11:4e700bbf93d7 530 a REAL precision[n][lda]
JovanEps 11:4e700bbf93d7 531 the matrix to be factored.
JovanEps 11:4e700bbf93d7 532
JovanEps 11:4e700bbf93d7 533 lda integer
JovanEps 11:4e700bbf93d7 534 the leading dimension of the array a .
JovanEps 11:4e700bbf93d7 535
JovanEps 11:4e700bbf93d7 536 n integer
JovanEps 11:4e700bbf93d7 537 the order of the matrix a .
JovanEps 11:4e700bbf93d7 538
JovanEps 11:4e700bbf93d7 539 on return
JovanEps 11:4e700bbf93d7 540
JovanEps 11:4e700bbf93d7 541 a an upper triangular matrix and the multipliers
JovanEps 11:4e700bbf93d7 542 which were used to obtain it.
JovanEps 11:4e700bbf93d7 543 the factorization can be written a = l*u where
JovanEps 11:4e700bbf93d7 544 l is a product of permutation and unit lower
JovanEps 11:4e700bbf93d7 545 triangular matrices and u is upper triangular.
JovanEps 11:4e700bbf93d7 546
JovanEps 11:4e700bbf93d7 547 ipvt integer[n]
JovanEps 11:4e700bbf93d7 548 an integer vector of pivot indices.
JovanEps 11:4e700bbf93d7 549
JovanEps 11:4e700bbf93d7 550 info integer
JovanEps 11:4e700bbf93d7 551 = 0 normal value.
JovanEps 11:4e700bbf93d7 552 = k if u[k][k] .eq. 0.0 . this is not an error
JovanEps 11:4e700bbf93d7 553 condition for this subroutine, but it does
JovanEps 11:4e700bbf93d7 554 indicate that dgesl or dgedi will divide by zero
JovanEps 11:4e700bbf93d7 555 if called. use rcond in dgeco for a reliable
JovanEps 11:4e700bbf93d7 556 indication of singularity.
JovanEps 11:4e700bbf93d7 557
JovanEps 11:4e700bbf93d7 558 linpack. this version dated 08/14/78 .
JovanEps 11:4e700bbf93d7 559 cleve moler, university of new mexico, argonne national lab.
JovanEps 11:4e700bbf93d7 560
JovanEps 11:4e700bbf93d7 561 functions
JovanEps 11:4e700bbf93d7 562
JovanEps 11:4e700bbf93d7 563 blas daxpy,dscal,idamax
JovanEps 11:4e700bbf93d7 564 */
JovanEps 11:4e700bbf93d7 565
JovanEps 11:4e700bbf93d7 566 {
JovanEps 11:4e700bbf93d7 567 /* internal variables */
JovanEps 11:4e700bbf93d7 568
JovanEps 11:4e700bbf93d7 569 REAL t;
JovanEps 11:4e700bbf93d7 570 int j,k,kp1,l,nm1;
JovanEps 11:4e700bbf93d7 571
JovanEps 11:4e700bbf93d7 572
JovanEps 11:4e700bbf93d7 573 /* gaussian elimination with partial pivoting */
JovanEps 11:4e700bbf93d7 574
JovanEps 11:4e700bbf93d7 575 *info = 0;
JovanEps 11:4e700bbf93d7 576 nm1 = n - 1;
JovanEps 11:4e700bbf93d7 577 if (nm1 >= 0) {
JovanEps 11:4e700bbf93d7 578 for (k = 0; k < nm1; k++) {
JovanEps 11:4e700bbf93d7 579 kp1 = k + 1;
JovanEps 11:4e700bbf93d7 580
JovanEps 11:4e700bbf93d7 581 /* find l = pivot index */
JovanEps 11:4e700bbf93d7 582
JovanEps 11:4e700bbf93d7 583 l = idamax(n-k,&a[lda*k+k],1) + k;
JovanEps 11:4e700bbf93d7 584 ipvt[k] = l;
JovanEps 11:4e700bbf93d7 585
JovanEps 11:4e700bbf93d7 586 /* zero pivot implies this column already
JovanEps 11:4e700bbf93d7 587 triangularized */
JovanEps 11:4e700bbf93d7 588
JovanEps 11:4e700bbf93d7 589 if (a[lda*k+l] != ZERO) {
JovanEps 11:4e700bbf93d7 590
JovanEps 11:4e700bbf93d7 591 /* interchange if necessary */
JovanEps 0:43b96e9650ef 592
JovanEps 11:4e700bbf93d7 593 if (l != k) {
JovanEps 11:4e700bbf93d7 594 t = a[lda*k+l];
JovanEps 11:4e700bbf93d7 595 a[lda*k+l] = a[lda*k+k];
JovanEps 11:4e700bbf93d7 596 a[lda*k+k] = t;
JovanEps 11:4e700bbf93d7 597 }
JovanEps 11:4e700bbf93d7 598
JovanEps 11:4e700bbf93d7 599 /* compute multipliers */
JovanEps 11:4e700bbf93d7 600
JovanEps 11:4e700bbf93d7 601 t = -ONE/a[lda*k+k];
JovanEps 11:4e700bbf93d7 602 dscal(n-(k+1),t,&a[lda*k+k+1],1);
JovanEps 11:4e700bbf93d7 603
JovanEps 11:4e700bbf93d7 604 /* row elimination with column indexing */
JovanEps 11:4e700bbf93d7 605
JovanEps 11:4e700bbf93d7 606 for (j = kp1; j < n; j++) {
JovanEps 11:4e700bbf93d7 607 t = a[lda*j+l];
JovanEps 11:4e700bbf93d7 608 if (l != k) {
JovanEps 11:4e700bbf93d7 609 a[lda*j+l] = a[lda*j+k];
JovanEps 11:4e700bbf93d7 610 a[lda*j+k] = t;
JovanEps 11:4e700bbf93d7 611 }
JovanEps 11:4e700bbf93d7 612 daxpy(n-(k+1),t,&a[lda*k+k+1],1,
JovanEps 11:4e700bbf93d7 613 &a[lda*j+k+1],1);
JovanEps 11:4e700bbf93d7 614 }
JovanEps 11:4e700bbf93d7 615 } else {
JovanEps 11:4e700bbf93d7 616 *info = k;
JovanEps 11:4e700bbf93d7 617 }
JovanEps 11:4e700bbf93d7 618 }
JovanEps 11:4e700bbf93d7 619 }
JovanEps 11:4e700bbf93d7 620 ipvt[n-1] = n-1;
JovanEps 11:4e700bbf93d7 621 if (a[lda*(n-1)+(n-1)] == ZERO) *info = n-1;
JovanEps 11:4e700bbf93d7 622 return;
JovanEps 11:4e700bbf93d7 623 }
JovanEps 11:4e700bbf93d7 624
JovanEps 11:4e700bbf93d7 625 /*----------------------*/
JovanEps 11:4e700bbf93d7 626
JovanEps 11:4e700bbf93d7 627 void dgesl(REAL a[],int lda,int n,int ipvt[],REAL b[],int job )
JovanEps 11:4e700bbf93d7 628
JovanEps 11:4e700bbf93d7 629
JovanEps 11:4e700bbf93d7 630 /* We would like to declare a[][lda], but c does not allow it. In this
JovanEps 11:4e700bbf93d7 631 function, references to a[i][j] are written a[lda*i+j]. */
JovanEps 11:4e700bbf93d7 632
JovanEps 11:4e700bbf93d7 633 /*
JovanEps 11:4e700bbf93d7 634 dgesl solves the double precision system
JovanEps 11:4e700bbf93d7 635 a * x = b or trans(a) * x = b
JovanEps 11:4e700bbf93d7 636 using the factors computed by dgeco or dgefa.
JovanEps 11:4e700bbf93d7 637
JovanEps 11:4e700bbf93d7 638 on entry
JovanEps 11:4e700bbf93d7 639
JovanEps 11:4e700bbf93d7 640 a double precision[n][lda]
JovanEps 11:4e700bbf93d7 641 the output from dgeco or dgefa.
JovanEps 11:4e700bbf93d7 642
JovanEps 11:4e700bbf93d7 643 lda integer
JovanEps 11:4e700bbf93d7 644 the leading dimension of the array a .
JovanEps 11:4e700bbf93d7 645
JovanEps 11:4e700bbf93d7 646 n integer
JovanEps 11:4e700bbf93d7 647 the order of the matrix a .
JovanEps 11:4e700bbf93d7 648
JovanEps 11:4e700bbf93d7 649 ipvt integer[n]
JovanEps 11:4e700bbf93d7 650 the pivot vector from dgeco or dgefa.
JovanEps 11:4e700bbf93d7 651
JovanEps 11:4e700bbf93d7 652 b double precision[n]
JovanEps 11:4e700bbf93d7 653 the right hand side vector.
JovanEps 11:4e700bbf93d7 654
JovanEps 11:4e700bbf93d7 655 job integer
JovanEps 11:4e700bbf93d7 656 = 0 to solve a*x = b ,
JovanEps 11:4e700bbf93d7 657 = nonzero to solve trans(a)*x = b where
JovanEps 11:4e700bbf93d7 658 trans(a) is the transpose.
JovanEps 11:4e700bbf93d7 659
JovanEps 11:4e700bbf93d7 660 on return
JovanEps 11:4e700bbf93d7 661
JovanEps 11:4e700bbf93d7 662 b the solution vector x .
JovanEps 11:4e700bbf93d7 663
JovanEps 11:4e700bbf93d7 664 error condition
JovanEps 11:4e700bbf93d7 665
JovanEps 11:4e700bbf93d7 666 a division by zero will occur if the input factor contains a
JovanEps 11:4e700bbf93d7 667 zero on the diagonal. technically this indicates singularity
JovanEps 11:4e700bbf93d7 668 but it is often caused by improper arguments or improper
JovanEps 11:4e700bbf93d7 669 setting of lda . it will not occur if the subroutines are
JovanEps 11:4e700bbf93d7 670 called correctly and if dgeco has set rcond .gt. 0.0
JovanEps 11:4e700bbf93d7 671 or dgefa has set info .eq. 0 .
JovanEps 11:4e700bbf93d7 672
JovanEps 11:4e700bbf93d7 673 to compute inverse(a) * c where c is a matrix
JovanEps 11:4e700bbf93d7 674 with p columns
JovanEps 11:4e700bbf93d7 675 dgeco(a,lda,n,ipvt,rcond,z)
JovanEps 11:4e700bbf93d7 676 if (!rcond is too small){
JovanEps 11:4e700bbf93d7 677 for (j=0,j<p,j++)
JovanEps 11:4e700bbf93d7 678 dgesl(a,lda,n,ipvt,c[j][0],0);
JovanEps 11:4e700bbf93d7 679 }
JovanEps 11:4e700bbf93d7 680
JovanEps 11:4e700bbf93d7 681 linpack. this version dated 08/14/78 .
JovanEps 11:4e700bbf93d7 682 cleve moler, university of new mexico, argonne national lab.
JovanEps 11:4e700bbf93d7 683
JovanEps 11:4e700bbf93d7 684 functions
JovanEps 11:4e700bbf93d7 685
JovanEps 11:4e700bbf93d7 686 blas daxpy,ddot
JovanEps 11:4e700bbf93d7 687 */
JovanEps 11:4e700bbf93d7 688 {
JovanEps 11:4e700bbf93d7 689 /* internal variables */
JovanEps 11:4e700bbf93d7 690
JovanEps 11:4e700bbf93d7 691 REAL t;
JovanEps 11:4e700bbf93d7 692 int k,kb,l,nm1;
JovanEps 11:4e700bbf93d7 693
JovanEps 11:4e700bbf93d7 694 nm1 = n - 1;
JovanEps 11:4e700bbf93d7 695 if (job == 0) {
JovanEps 11:4e700bbf93d7 696
JovanEps 11:4e700bbf93d7 697 /* job = 0 , solve a * x = b
JovanEps 11:4e700bbf93d7 698 first solve l*y = b */
JovanEps 11:4e700bbf93d7 699
JovanEps 11:4e700bbf93d7 700 if (nm1 >= 1) {
JovanEps 11:4e700bbf93d7 701 for (k = 0; k < nm1; k++) {
JovanEps 11:4e700bbf93d7 702 l = ipvt[k];
JovanEps 11:4e700bbf93d7 703 t = b[l];
JovanEps 11:4e700bbf93d7 704 if (l != k) {
JovanEps 11:4e700bbf93d7 705 b[l] = b[k];
JovanEps 11:4e700bbf93d7 706 b[k] = t;
JovanEps 11:4e700bbf93d7 707 }
JovanEps 11:4e700bbf93d7 708 daxpy(n-(k+1),t,&a[lda*k+k+1],1,&b[k+1],1 );
JovanEps 11:4e700bbf93d7 709 }
JovanEps 11:4e700bbf93d7 710 }
JovanEps 11:4e700bbf93d7 711
JovanEps 11:4e700bbf93d7 712 /* now solve u*x = y */
JovanEps 11:4e700bbf93d7 713
JovanEps 11:4e700bbf93d7 714 for (kb = 0; kb < n; kb++) {
JovanEps 11:4e700bbf93d7 715 k = n - (kb + 1);
JovanEps 11:4e700bbf93d7 716 b[k] = b[k]/a[lda*k+k];
JovanEps 11:4e700bbf93d7 717 t = -b[k];
JovanEps 11:4e700bbf93d7 718 daxpy(k,t,&a[lda*k+0],1,&b[0],1 );
JovanEps 11:4e700bbf93d7 719 }
JovanEps 11:4e700bbf93d7 720 } else {
JovanEps 11:4e700bbf93d7 721
JovanEps 11:4e700bbf93d7 722 /* job = nonzero, solve trans(a) * x = b
JovanEps 11:4e700bbf93d7 723 first solve trans(u)*y = b */
JovanEps 11:4e700bbf93d7 724
JovanEps 11:4e700bbf93d7 725 for (k = 0; k < n; k++) {
JovanEps 11:4e700bbf93d7 726 t = ddot(k,&a[lda*k+0],1,&b[0],1);
JovanEps 11:4e700bbf93d7 727 b[k] = (b[k] - t)/a[lda*k+k];
JovanEps 11:4e700bbf93d7 728 }
JovanEps 11:4e700bbf93d7 729
JovanEps 11:4e700bbf93d7 730 /* now solve trans(l)*x = y */
JovanEps 11:4e700bbf93d7 731
JovanEps 11:4e700bbf93d7 732 if (nm1 >= 1) {
JovanEps 11:4e700bbf93d7 733 for (kb = 1; kb < nm1; kb++) {
JovanEps 11:4e700bbf93d7 734 k = n - (kb+1);
JovanEps 11:4e700bbf93d7 735 b[k] = b[k] + ddot(n-(k+1),&a[lda*k+k+1],1,&b[k+1],1);
JovanEps 11:4e700bbf93d7 736 l = ipvt[k];
JovanEps 11:4e700bbf93d7 737 if (l != k) {
JovanEps 11:4e700bbf93d7 738 t = b[l];
JovanEps 11:4e700bbf93d7 739 b[l] = b[k];
JovanEps 11:4e700bbf93d7 740 b[k] = t;
JovanEps 11:4e700bbf93d7 741 }
JovanEps 11:4e700bbf93d7 742 }
JovanEps 11:4e700bbf93d7 743 }
JovanEps 11:4e700bbf93d7 744 }
JovanEps 11:4e700bbf93d7 745 return;
JovanEps 11:4e700bbf93d7 746 }
JovanEps 11:4e700bbf93d7 747
JovanEps 11:4e700bbf93d7 748 /*----------------------*/
JovanEps 11:4e700bbf93d7 749
JovanEps 11:4e700bbf93d7 750 void daxpy(int n, REAL da, REAL dx[], int incx, REAL dy[], int incy)
JovanEps 11:4e700bbf93d7 751 /*
JovanEps 11:4e700bbf93d7 752 constant times a vector plus a vector.
JovanEps 11:4e700bbf93d7 753 jack dongarra, linpack, 3/11/78.
JovanEps 11:4e700bbf93d7 754 */
JovanEps 11:4e700bbf93d7 755
JovanEps 11:4e700bbf93d7 756 {
JovanEps 11:4e700bbf93d7 757 int i,ix,iy,m,mp1;
JovanEps 11:4e700bbf93d7 758
JovanEps 11:4e700bbf93d7 759 mp1 = 0;
JovanEps 11:4e700bbf93d7 760 m = 0;
JovanEps 11:4e700bbf93d7 761
JovanEps 11:4e700bbf93d7 762 if(n <= 0) return;
JovanEps 11:4e700bbf93d7 763 if (da == ZERO) return;
JovanEps 11:4e700bbf93d7 764
JovanEps 11:4e700bbf93d7 765 if(incx != 1 || incy != 1) {
JovanEps 11:4e700bbf93d7 766
JovanEps 11:4e700bbf93d7 767 /* code for unequal increments or equal increments
JovanEps 11:4e700bbf93d7 768 not equal to 1 */
JovanEps 11:4e700bbf93d7 769
JovanEps 11:4e700bbf93d7 770 ix = 0;
JovanEps 11:4e700bbf93d7 771 iy = 0;
JovanEps 11:4e700bbf93d7 772 if(incx < 0) ix = (-n+1)*incx;
JovanEps 11:4e700bbf93d7 773 if(incy < 0)iy = (-n+1)*incy;
JovanEps 11:4e700bbf93d7 774 for (i = 0; i < n; i++) {
JovanEps 11:4e700bbf93d7 775 dy[iy] = dy[iy] + da*dx[ix];
JovanEps 11:4e700bbf93d7 776 ix = ix + incx;
JovanEps 11:4e700bbf93d7 777 iy = iy + incy;
JovanEps 11:4e700bbf93d7 778
JovanEps 11:4e700bbf93d7 779 }
JovanEps 11:4e700bbf93d7 780 return;
JovanEps 11:4e700bbf93d7 781 }
JovanEps 11:4e700bbf93d7 782
JovanEps 11:4e700bbf93d7 783 /* code for both increments equal to 1 */
JovanEps 11:4e700bbf93d7 784
JovanEps 11:4e700bbf93d7 785
JovanEps 11:4e700bbf93d7 786 #ifdef ROLL
JovanEps 11:4e700bbf93d7 787
JovanEps 11:4e700bbf93d7 788 for (i = 0; i < n; i++) {
JovanEps 11:4e700bbf93d7 789 dy[i] = dy[i] + da*dx[i];
JovanEps 11:4e700bbf93d7 790 }
JovanEps 11:4e700bbf93d7 791
JovanEps 11:4e700bbf93d7 792
JovanEps 11:4e700bbf93d7 793 #endif
JovanEps 11:4e700bbf93d7 794
JovanEps 11:4e700bbf93d7 795 #ifdef UNROLL
JovanEps 11:4e700bbf93d7 796
JovanEps 11:4e700bbf93d7 797 m = n % 4;
JovanEps 11:4e700bbf93d7 798 if ( m != 0) {
JovanEps 11:4e700bbf93d7 799 for (i = 0; i < m; i++)
JovanEps 11:4e700bbf93d7 800 dy[i] = dy[i] + da*dx[i];
JovanEps 11:4e700bbf93d7 801
JovanEps 11:4e700bbf93d7 802 if (n < 4) return;
JovanEps 11:4e700bbf93d7 803 }
JovanEps 11:4e700bbf93d7 804 for (i = m; i < n; i = i + 4) {
JovanEps 11:4e700bbf93d7 805 dy[i] = dy[i] + da*dx[i];
JovanEps 11:4e700bbf93d7 806 dy[i+1] = dy[i+1] + da*dx[i+1];
JovanEps 11:4e700bbf93d7 807 dy[i+2] = dy[i+2] + da*dx[i+2];
JovanEps 11:4e700bbf93d7 808 dy[i+3] = dy[i+3] + da*dx[i+3];
JovanEps 11:4e700bbf93d7 809
JovanEps 11:4e700bbf93d7 810 }
JovanEps 11:4e700bbf93d7 811
JovanEps 11:4e700bbf93d7 812 #endif
JovanEps 11:4e700bbf93d7 813 return;
JovanEps 11:4e700bbf93d7 814 }
JovanEps 11:4e700bbf93d7 815
JovanEps 11:4e700bbf93d7 816 /*----------------------*/
JovanEps 11:4e700bbf93d7 817
JovanEps 11:4e700bbf93d7 818 REAL ddot(int n, REAL dx[], int incx, REAL dy[], int incy)
JovanEps 11:4e700bbf93d7 819 /*
JovanEps 11:4e700bbf93d7 820 forms the dot product of two vectors.
JovanEps 11:4e700bbf93d7 821 jack dongarra, linpack, 3/11/78.
JovanEps 11:4e700bbf93d7 822 */
JovanEps 11:4e700bbf93d7 823
JovanEps 11:4e700bbf93d7 824 {
JovanEps 11:4e700bbf93d7 825 REAL dtemp;
JovanEps 11:4e700bbf93d7 826 int i,ix,iy,m,mp1;
JovanEps 11:4e700bbf93d7 827
JovanEps 11:4e700bbf93d7 828 mp1 = 0;
JovanEps 11:4e700bbf93d7 829 m = 0;
JovanEps 11:4e700bbf93d7 830
JovanEps 11:4e700bbf93d7 831 dtemp = ZERO;
JovanEps 11:4e700bbf93d7 832
JovanEps 11:4e700bbf93d7 833 if(n <= 0) return(ZERO);
JovanEps 11:4e700bbf93d7 834
JovanEps 11:4e700bbf93d7 835 if(incx != 1 || incy != 1) {
JovanEps 11:4e700bbf93d7 836
JovanEps 11:4e700bbf93d7 837 /* code for unequal increments or equal increments
JovanEps 11:4e700bbf93d7 838 not equal to 1 */
JovanEps 11:4e700bbf93d7 839
JovanEps 11:4e700bbf93d7 840 ix = 0;
JovanEps 11:4e700bbf93d7 841 iy = 0;
JovanEps 11:4e700bbf93d7 842 if (incx < 0) ix = (-n+1)*incx;
JovanEps 11:4e700bbf93d7 843 if (incy < 0) iy = (-n+1)*incy;
JovanEps 11:4e700bbf93d7 844 for (i = 0; i < n; i++) {
JovanEps 11:4e700bbf93d7 845 dtemp = dtemp + dx[ix]*dy[iy];
JovanEps 11:4e700bbf93d7 846 ix = ix + incx;
JovanEps 11:4e700bbf93d7 847 iy = iy + incy;
JovanEps 11:4e700bbf93d7 848
JovanEps 11:4e700bbf93d7 849 }
JovanEps 11:4e700bbf93d7 850 return(dtemp);
JovanEps 11:4e700bbf93d7 851 }
JovanEps 11:4e700bbf93d7 852
JovanEps 11:4e700bbf93d7 853 /* code for both increments equal to 1 */
JovanEps 11:4e700bbf93d7 854
JovanEps 11:4e700bbf93d7 855
JovanEps 11:4e700bbf93d7 856 #ifdef ROLL
JovanEps 11:4e700bbf93d7 857
JovanEps 11:4e700bbf93d7 858 for (i=0; i < n; i++)
JovanEps 11:4e700bbf93d7 859 dtemp = dtemp + dx[i]*dy[i];
JovanEps 11:4e700bbf93d7 860
JovanEps 11:4e700bbf93d7 861 return(dtemp);
JovanEps 11:4e700bbf93d7 862
JovanEps 11:4e700bbf93d7 863 #endif
JovanEps 0:43b96e9650ef 864
JovanEps 11:4e700bbf93d7 865 #ifdef UNROLL
JovanEps 11:4e700bbf93d7 866
JovanEps 11:4e700bbf93d7 867
JovanEps 11:4e700bbf93d7 868 m = n % 5;
JovanEps 11:4e700bbf93d7 869 if (m != 0) {
JovanEps 11:4e700bbf93d7 870 for (i = 0; i < m; i++)
JovanEps 11:4e700bbf93d7 871 dtemp = dtemp + dx[i]*dy[i];
JovanEps 11:4e700bbf93d7 872 if (n < 5) return(dtemp);
JovanEps 11:4e700bbf93d7 873 }
JovanEps 11:4e700bbf93d7 874 for (i = m; i < n; i = i + 5) {
JovanEps 11:4e700bbf93d7 875 dtemp = dtemp + dx[i]*dy[i] +
JovanEps 11:4e700bbf93d7 876 dx[i+1]*dy[i+1] + dx[i+2]*dy[i+2] +
JovanEps 11:4e700bbf93d7 877 dx[i+3]*dy[i+3] + dx[i+4]*dy[i+4];
JovanEps 11:4e700bbf93d7 878 }
JovanEps 11:4e700bbf93d7 879 return(dtemp);
JovanEps 11:4e700bbf93d7 880
JovanEps 11:4e700bbf93d7 881 #endif
JovanEps 11:4e700bbf93d7 882
JovanEps 11:4e700bbf93d7 883 }
JovanEps 11:4e700bbf93d7 884
JovanEps 11:4e700bbf93d7 885 /*----------------------*/
JovanEps 11:4e700bbf93d7 886 void dscal(int n, REAL da, REAL dx[], int incx)
JovanEps 11:4e700bbf93d7 887
JovanEps 11:4e700bbf93d7 888 /* scales a vector by a constant.
JovanEps 11:4e700bbf93d7 889 jack dongarra, linpack, 3/11/78.
JovanEps 11:4e700bbf93d7 890 */
JovanEps 11:4e700bbf93d7 891
JovanEps 11:4e700bbf93d7 892 {
JovanEps 11:4e700bbf93d7 893 int i,m,mp1,nincx;
JovanEps 11:4e700bbf93d7 894
JovanEps 11:4e700bbf93d7 895 mp1 = 0;
JovanEps 11:4e700bbf93d7 896 m = 0;
JovanEps 11:4e700bbf93d7 897
JovanEps 11:4e700bbf93d7 898 if(n <= 0)return;
JovanEps 11:4e700bbf93d7 899 if(incx != 1) {
JovanEps 11:4e700bbf93d7 900
JovanEps 11:4e700bbf93d7 901 /* code for increment not equal to 1 */
JovanEps 11:4e700bbf93d7 902
JovanEps 11:4e700bbf93d7 903 nincx = n*incx;
JovanEps 11:4e700bbf93d7 904 for (i = 0; i < nincx; i = i + incx)
JovanEps 11:4e700bbf93d7 905 dx[i] = da*dx[i];
JovanEps 11:4e700bbf93d7 906
JovanEps 11:4e700bbf93d7 907 return;
JovanEps 11:4e700bbf93d7 908 }
JovanEps 11:4e700bbf93d7 909
JovanEps 11:4e700bbf93d7 910 /* code for increment equal to 1 */
JovanEps 11:4e700bbf93d7 911
JovanEps 11:4e700bbf93d7 912
JovanEps 11:4e700bbf93d7 913 #ifdef ROLL
JovanEps 11:4e700bbf93d7 914
JovanEps 11:4e700bbf93d7 915 for (i = 0; i < n; i++)
JovanEps 11:4e700bbf93d7 916 dx[i] = da*dx[i];
JovanEps 11:4e700bbf93d7 917
JovanEps 11:4e700bbf93d7 918
JovanEps 11:4e700bbf93d7 919 #endif
JovanEps 11:4e700bbf93d7 920
JovanEps 11:4e700bbf93d7 921 #ifdef UNROLL
JovanEps 11:4e700bbf93d7 922
JovanEps 11:4e700bbf93d7 923
JovanEps 11:4e700bbf93d7 924 m = n % 5;
JovanEps 11:4e700bbf93d7 925 if (m != 0) {
JovanEps 11:4e700bbf93d7 926 for (i = 0; i < m; i++)
JovanEps 11:4e700bbf93d7 927 dx[i] = da*dx[i];
JovanEps 11:4e700bbf93d7 928 if (n < 5) return;
JovanEps 11:4e700bbf93d7 929 }
JovanEps 11:4e700bbf93d7 930 for (i = m; i < n; i = i + 5) {
JovanEps 11:4e700bbf93d7 931 dx[i] = da*dx[i];
JovanEps 11:4e700bbf93d7 932 dx[i+1] = da*dx[i+1];
JovanEps 11:4e700bbf93d7 933 dx[i+2] = da*dx[i+2];
JovanEps 11:4e700bbf93d7 934 dx[i+3] = da*dx[i+3];
JovanEps 11:4e700bbf93d7 935 dx[i+4] = da*dx[i+4];
JovanEps 11:4e700bbf93d7 936 }
JovanEps 11:4e700bbf93d7 937
JovanEps 11:4e700bbf93d7 938 #endif
JovanEps 11:4e700bbf93d7 939
JovanEps 11:4e700bbf93d7 940 }
JovanEps 11:4e700bbf93d7 941
JovanEps 11:4e700bbf93d7 942 /*----------------------*/
JovanEps 11:4e700bbf93d7 943 int idamax(int n, REAL dx[], int incx)
JovanEps 11:4e700bbf93d7 944
JovanEps 11:4e700bbf93d7 945 /*
JovanEps 11:4e700bbf93d7 946 finds the index of element having max. absolute value.
JovanEps 11:4e700bbf93d7 947 jack dongarra, linpack, 3/11/78.
JovanEps 11:4e700bbf93d7 948 */
JovanEps 11:4e700bbf93d7 949
JovanEps 11:4e700bbf93d7 950
JovanEps 11:4e700bbf93d7 951 {
JovanEps 11:4e700bbf93d7 952 REAL dmax;
JovanEps 11:4e700bbf93d7 953 int i, ix, itemp;
JovanEps 11:4e700bbf93d7 954
JovanEps 11:4e700bbf93d7 955 if( n < 1 ) return(-1);
JovanEps 11:4e700bbf93d7 956 if(n ==1 ) return(0);
JovanEps 11:4e700bbf93d7 957 if(incx != 1) {
JovanEps 11:4e700bbf93d7 958
JovanEps 11:4e700bbf93d7 959 /* code for increment not equal to 1 */
JovanEps 11:4e700bbf93d7 960
JovanEps 11:4e700bbf93d7 961 ix = 1;
JovanEps 11:4e700bbf93d7 962 dmax = fabs((double)dx[0]);
JovanEps 11:4e700bbf93d7 963 ix = ix + incx;
JovanEps 11:4e700bbf93d7 964 for (i = 1; i < n; i++) {
JovanEps 11:4e700bbf93d7 965 if(fabs((double)dx[ix]) > dmax) {
JovanEps 11:4e700bbf93d7 966 itemp = i;
JovanEps 11:4e700bbf93d7 967 dmax = fabs((double)dx[ix]);
JovanEps 11:4e700bbf93d7 968 }
JovanEps 11:4e700bbf93d7 969 ix = ix + incx;
JovanEps 11:4e700bbf93d7 970 }
JovanEps 11:4e700bbf93d7 971 } else {
JovanEps 11:4e700bbf93d7 972
JovanEps 11:4e700bbf93d7 973 /* code for increment equal to 1 */
JovanEps 11:4e700bbf93d7 974
JovanEps 11:4e700bbf93d7 975 itemp = 0;
JovanEps 11:4e700bbf93d7 976 dmax = fabs((double)dx[0]);
JovanEps 11:4e700bbf93d7 977 for (i = 1; i < n; i++) {
JovanEps 11:4e700bbf93d7 978 if(fabs((double)dx[i]) > dmax) {
JovanEps 11:4e700bbf93d7 979 itemp = i;
JovanEps 11:4e700bbf93d7 980 dmax = fabs((double)dx[i]);
JovanEps 11:4e700bbf93d7 981 }
JovanEps 11:4e700bbf93d7 982 }
JovanEps 11:4e700bbf93d7 983 }
JovanEps 11:4e700bbf93d7 984 return (itemp);
JovanEps 11:4e700bbf93d7 985 }
JovanEps 11:4e700bbf93d7 986
JovanEps 11:4e700bbf93d7 987 /*----------------------*/
JovanEps 11:4e700bbf93d7 988 REAL epslon (REAL x)
JovanEps 11:4e700bbf93d7 989
JovanEps 11:4e700bbf93d7 990 /*
JovanEps 11:4e700bbf93d7 991 estimate unit roundoff in quantities of size x.
JovanEps 11:4e700bbf93d7 992 */
JovanEps 0:43b96e9650ef 993
JovanEps 11:4e700bbf93d7 994 {
JovanEps 11:4e700bbf93d7 995 REAL a,b,c,eps;
JovanEps 11:4e700bbf93d7 996 /*
JovanEps 11:4e700bbf93d7 997 this program should function properly on all systems
JovanEps 11:4e700bbf93d7 998 satisfying the following two assumptions,
JovanEps 11:4e700bbf93d7 999 1. the base used in representing dfloating point
JovanEps 11:4e700bbf93d7 1000 numbers is not a power of three.
JovanEps 11:4e700bbf93d7 1001 2. the quantity a in statement 10 is represented to
JovanEps 11:4e700bbf93d7 1002 the accuracy used in dfloating point variables
JovanEps 11:4e700bbf93d7 1003 that are stored in memory.
JovanEps 11:4e700bbf93d7 1004 the statement number 10 and the go to 10 are intended to
JovanEps 11:4e700bbf93d7 1005 force optimizing compilers to generate code satisfying
JovanEps 11:4e700bbf93d7 1006 assumption 2.
JovanEps 11:4e700bbf93d7 1007 under these assumptions, it should be true that,
JovanEps 11:4e700bbf93d7 1008 a is not exactly equal to four-thirds,
JovanEps 11:4e700bbf93d7 1009 b has a zero for its last bit or digit,
JovanEps 11:4e700bbf93d7 1010 c is not exactly equal to one,
JovanEps 11:4e700bbf93d7 1011 eps measures the separation of 1.0 from
JovanEps 11:4e700bbf93d7 1012 the next larger dfloating point number.
JovanEps 11:4e700bbf93d7 1013 the developers of eispack would appreciate being informed
JovanEps 11:4e700bbf93d7 1014 about any systems where these assumptions do not hold.
JovanEps 11:4e700bbf93d7 1015
JovanEps 11:4e700bbf93d7 1016 *****************************************************************
JovanEps 11:4e700bbf93d7 1017 this routine is one of the auxiliary routines used by eispack iii
JovanEps 11:4e700bbf93d7 1018 to avoid machine dependencies.
JovanEps 11:4e700bbf93d7 1019 *****************************************************************
JovanEps 11:4e700bbf93d7 1020
JovanEps 11:4e700bbf93d7 1021 this version dated 4/6/83.
JovanEps 11:4e700bbf93d7 1022 */
JovanEps 11:4e700bbf93d7 1023
JovanEps 11:4e700bbf93d7 1024 a = 4.0e0/3.0e0;
JovanEps 11:4e700bbf93d7 1025 eps = ZERO;
JovanEps 11:4e700bbf93d7 1026 while (eps == ZERO) {
JovanEps 11:4e700bbf93d7 1027 b = a - ONE;
JovanEps 11:4e700bbf93d7 1028 c = b + b + b;
JovanEps 11:4e700bbf93d7 1029 eps = fabs((double)(c-ONE));
JovanEps 11:4e700bbf93d7 1030 }
JovanEps 11:4e700bbf93d7 1031 return(eps*fabs((double)x));
JovanEps 11:4e700bbf93d7 1032 }
JovanEps 11:4e700bbf93d7 1033
JovanEps 11:4e700bbf93d7 1034 /*----------------------*/
JovanEps 11:4e700bbf93d7 1035 void dmxpy (int n1, REAL y[], int n2, int ldm, REAL x[], REAL m[])
JovanEps 11:4e700bbf93d7 1036
JovanEps 11:4e700bbf93d7 1037
JovanEps 11:4e700bbf93d7 1038 /* We would like to declare m[][ldm], but c does not allow it. In this
JovanEps 11:4e700bbf93d7 1039 function, references to m[i][j] are written m[ldm*i+j]. */
JovanEps 11:4e700bbf93d7 1040
JovanEps 11:4e700bbf93d7 1041 /*
JovanEps 11:4e700bbf93d7 1042 purpose:
JovanEps 11:4e700bbf93d7 1043 multiply matrix m times vector x and add the result to vector y.
JovanEps 11:4e700bbf93d7 1044
JovanEps 11:4e700bbf93d7 1045 parameters:
JovanEps 11:4e700bbf93d7 1046
JovanEps 11:4e700bbf93d7 1047 n1 integer, number of elements in vector y, and number of rows in
JovanEps 11:4e700bbf93d7 1048 matrix m
JovanEps 11:4e700bbf93d7 1049
JovanEps 11:4e700bbf93d7 1050 y double [n1], vector of length n1 to which is added
JovanEps 11:4e700bbf93d7 1051 the product m*x
JovanEps 11:4e700bbf93d7 1052
JovanEps 11:4e700bbf93d7 1053 n2 integer, number of elements in vector x, and number of columns
JovanEps 11:4e700bbf93d7 1054 in matrix m
JovanEps 11:4e700bbf93d7 1055
JovanEps 11:4e700bbf93d7 1056 ldm integer, leading dimension of array m
JovanEps 11:4e700bbf93d7 1057
JovanEps 11:4e700bbf93d7 1058 x double [n2], vector of length n2
JovanEps 11:4e700bbf93d7 1059
JovanEps 11:4e700bbf93d7 1060 m double [ldm][n2], matrix of n1 rows and n2 columns
JovanEps 0:43b96e9650ef 1061
JovanEps 11:4e700bbf93d7 1062 ----------------------------------------------------------------------
JovanEps 11:4e700bbf93d7 1063 */
JovanEps 11:4e700bbf93d7 1064 {
JovanEps 11:4e700bbf93d7 1065 int j,i,jmin;
JovanEps 11:4e700bbf93d7 1066 /* cleanup odd vector */
JovanEps 11:4e700bbf93d7 1067
JovanEps 11:4e700bbf93d7 1068 j = n2 % 2;
JovanEps 11:4e700bbf93d7 1069 if (j >= 1) {
JovanEps 11:4e700bbf93d7 1070 j = j - 1;
JovanEps 11:4e700bbf93d7 1071 for (i = 0; i < n1; i++)
JovanEps 11:4e700bbf93d7 1072 y[i] = (y[i]) + x[j]*m[ldm*j+i];
JovanEps 11:4e700bbf93d7 1073 }
JovanEps 11:4e700bbf93d7 1074
JovanEps 11:4e700bbf93d7 1075 /* cleanup odd group of two vectors */
JovanEps 11:4e700bbf93d7 1076
JovanEps 11:4e700bbf93d7 1077 j = n2 % 4;
JovanEps 11:4e700bbf93d7 1078 if (j >= 2) {
JovanEps 11:4e700bbf93d7 1079 j = j - 1;
JovanEps 11:4e700bbf93d7 1080 for (i = 0; i < n1; i++)
JovanEps 11:4e700bbf93d7 1081 y[i] = ( (y[i])
JovanEps 11:4e700bbf93d7 1082 + x[j-1]*m[ldm*(j-1)+i]) + x[j]*m[ldm*j+i];
JovanEps 11:4e700bbf93d7 1083 }
JovanEps 11:4e700bbf93d7 1084
JovanEps 11:4e700bbf93d7 1085 /* cleanup odd group of four vectors */
JovanEps 11:4e700bbf93d7 1086
JovanEps 11:4e700bbf93d7 1087 j = n2 % 8;
JovanEps 11:4e700bbf93d7 1088 if (j >= 4) {
JovanEps 11:4e700bbf93d7 1089 j = j - 1;
JovanEps 11:4e700bbf93d7 1090 for (i = 0; i < n1; i++)
JovanEps 11:4e700bbf93d7 1091 y[i] = ((( (y[i])
JovanEps 11:4e700bbf93d7 1092 + x[j-3]*m[ldm*(j-3)+i])
JovanEps 11:4e700bbf93d7 1093 + x[j-2]*m[ldm*(j-2)+i])
JovanEps 11:4e700bbf93d7 1094 + x[j-1]*m[ldm*(j-1)+i]) + x[j]*m[ldm*j+i];
JovanEps 11:4e700bbf93d7 1095 }
JovanEps 11:4e700bbf93d7 1096
JovanEps 11:4e700bbf93d7 1097 /* cleanup odd group of eight vectors */
JovanEps 0:43b96e9650ef 1098
JovanEps 11:4e700bbf93d7 1099 j = n2 % 16;
JovanEps 11:4e700bbf93d7 1100 if (j >= 8) {
JovanEps 11:4e700bbf93d7 1101 j = j - 1;
JovanEps 11:4e700bbf93d7 1102 for (i = 0; i < n1; i++)
JovanEps 11:4e700bbf93d7 1103 y[i] = ((((((( (y[i])
JovanEps 11:4e700bbf93d7 1104 + x[j-7]*m[ldm*(j-7)+i]) + x[j-6]*m[ldm*(j-6)+i])
JovanEps 11:4e700bbf93d7 1105 + x[j-5]*m[ldm*(j-5)+i]) + x[j-4]*m[ldm*(j-4)+i])
JovanEps 11:4e700bbf93d7 1106 + x[j-3]*m[ldm*(j-3)+i]) + x[j-2]*m[ldm*(j-2)+i])
JovanEps 11:4e700bbf93d7 1107 + x[j-1]*m[ldm*(j-1)+i]) + x[j] *m[ldm*j+i];
JovanEps 11:4e700bbf93d7 1108 }
JovanEps 11:4e700bbf93d7 1109
JovanEps 11:4e700bbf93d7 1110 /* main loop - groups of sixteen vectors */
JovanEps 11:4e700bbf93d7 1111
JovanEps 11:4e700bbf93d7 1112 jmin = (n2%16)+16;
JovanEps 11:4e700bbf93d7 1113 for (j = jmin-1; j < n2; j = j + 16) {
JovanEps 11:4e700bbf93d7 1114 for (i = 0; i < n1; i++)
JovanEps 11:4e700bbf93d7 1115 y[i] = ((((((((((((((( (y[i])
JovanEps 11:4e700bbf93d7 1116 + x[j-15]*m[ldm*(j-15)+i])
JovanEps 11:4e700bbf93d7 1117 + x[j-14]*m[ldm*(j-14)+i])
JovanEps 11:4e700bbf93d7 1118 + x[j-13]*m[ldm*(j-13)+i])
JovanEps 11:4e700bbf93d7 1119 + x[j-12]*m[ldm*(j-12)+i])
JovanEps 11:4e700bbf93d7 1120 + x[j-11]*m[ldm*(j-11)+i])
JovanEps 11:4e700bbf93d7 1121 + x[j-10]*m[ldm*(j-10)+i])
JovanEps 11:4e700bbf93d7 1122 + x[j- 9]*m[ldm*(j- 9)+i])
JovanEps 11:4e700bbf93d7 1123 + x[j- 8]*m[ldm*(j- 8)+i])
JovanEps 11:4e700bbf93d7 1124 + x[j- 7]*m[ldm*(j- 7)+i])
JovanEps 11:4e700bbf93d7 1125 + x[j- 6]*m[ldm*(j- 6)+i])
JovanEps 11:4e700bbf93d7 1126 + x[j- 5]*m[ldm*(j- 5)+i])
JovanEps 11:4e700bbf93d7 1127 + x[j- 4]*m[ldm*(j- 4)+i])
JovanEps 11:4e700bbf93d7 1128 + x[j- 3]*m[ldm*(j- 3)+i])
JovanEps 11:4e700bbf93d7 1129 + x[j- 2]*m[ldm*(j- 2)+i])
JovanEps 11:4e700bbf93d7 1130 + x[j- 1]*m[ldm*(j- 1)+i])
JovanEps 11:4e700bbf93d7 1131 + x[j] *m[ldm*j+i];
JovanEps 11:4e700bbf93d7 1132 }
JovanEps 11:4e700bbf93d7 1133 return;
JovanEps 11:4e700bbf93d7 1134 }
JovanEps 11:4e700bbf93d7 1135 //*----------------------------------------
JovanEps 2:273153e44338 1136 int main()
JovanEps 0:43b96e9650ef 1137 {
JovanEps 11:4e700bbf93d7 1138 pc.baud(9600);
JovanEps 6:5e0f3eedaf66 1139 while(1) {
JovanEps 6:5e0f3eedaf66 1140
JovanEps 2:273153e44338 1141 pc.printf("Starting benchmark...\n");
JovanEps 6:5e0f3eedaf66 1142
JovanEps 2:273153e44338 1143 do_benchmark();
JovanEps 6:5e0f3eedaf66 1144
JovanEps 2:273153e44338 1145 pc.printf(" kraj \n\n");
JovanEps 2:273153e44338 1146 }
JovanEps 2:273153e44338 1147 }