Jovan Ivković
/
Linpack
Linpack port to ARM MCU from Arduino IDE and roylongbottom.org ARM based source
main.cpp@12:623ce727d4fa, 2017-05-24 (annotated)
- Committer:
- JovanEps
- Date:
- Wed May 24 19:23:32 2017 +0000
- Revision:
- 12:623ce727d4fa
- Parent:
- 11:4e700bbf93d7
Alpha - RC
Who changed what in which revision?
User | Revision | Line number | New contents of line |
---|---|---|---|
JovanEps | 0:43b96e9650ef | 1 | //******************************************************** |
JovanEps | 11:4e700bbf93d7 | 2 | //** RC Apha --- based on roylongbottom.org ARM ****** |
JovanEps | 11:4e700bbf93d7 | 3 | //** Nucleo-64-144 Stm32F103 to F767 benchmark ****** |
JovanEps | 11:4e700bbf93d7 | 4 | //** Limpack -port form Arduino IDE ****** |
JovanEps | 11:4e700bbf93d7 | 5 | //** Jovan Ivkovic - 2017 ****** |
JovanEps | 0:43b96e9650ef | 6 | //******************************************************** |
JovanEps | 11:4e700bbf93d7 | 7 | #define GCCARMDP |
JovanEps | 11:4e700bbf93d7 | 8 | |
JovanEps | 11:4e700bbf93d7 | 9 | #define UNROLL |
JovanEps | 11:4e700bbf93d7 | 10 | #define DP |
JovanEps | 11:4e700bbf93d7 | 11 | |
JovanEps | 11:4e700bbf93d7 | 12 | #ifdef SP |
JovanEps | 11:4e700bbf93d7 | 13 | #define REAL float |
JovanEps | 11:4e700bbf93d7 | 14 | #define ZERO 0.0 |
JovanEps | 11:4e700bbf93d7 | 15 | #define ONE 1.0 |
JovanEps | 11:4e700bbf93d7 | 16 | #define PREC "Single" |
JovanEps | 11:4e700bbf93d7 | 17 | #endif |
JovanEps | 11:4e700bbf93d7 | 18 | |
JovanEps | 11:4e700bbf93d7 | 19 | #ifdef DP |
JovanEps | 11:4e700bbf93d7 | 20 | #define REAL double |
JovanEps | 11:4e700bbf93d7 | 21 | #define ZERO 0.0e0 |
JovanEps | 11:4e700bbf93d7 | 22 | #define ONE 1.0e0 |
JovanEps | 11:4e700bbf93d7 | 23 | #define PREC "Double" |
JovanEps | 11:4e700bbf93d7 | 24 | #endif |
JovanEps | 11:4e700bbf93d7 | 25 | |
JovanEps | 11:4e700bbf93d7 | 26 | #ifdef ROLL |
JovanEps | 11:4e700bbf93d7 | 27 | #define ROLLING "Rolled" |
JovanEps | 11:4e700bbf93d7 | 28 | #endif |
JovanEps | 11:4e700bbf93d7 | 29 | #ifdef UNROLL |
JovanEps | 11:4e700bbf93d7 | 30 | #define ROLLING "Unrolled" |
JovanEps | 11:4e700bbf93d7 | 31 | #endif |
JovanEps | 11:4e700bbf93d7 | 32 | |
JovanEps | 11:4e700bbf93d7 | 33 | // VERSION |
JovanEps | 11:4e700bbf93d7 | 34 | |
JovanEps | 11:4e700bbf93d7 | 35 | #ifdef CNNT |
JovanEps | 11:4e700bbf93d7 | 36 | #define options "Non-optimised" |
JovanEps | 11:4e700bbf93d7 | 37 | #define opt "0" |
JovanEps | 11:4e700bbf93d7 | 38 | #else |
JovanEps | 11:4e700bbf93d7 | 39 | // #define options "Optimised" |
JovanEps | 11:4e700bbf93d7 | 40 | // #define options "Opt 3 32 Bit" |
JovanEps | 11:4e700bbf93d7 | 41 | #define options "vfpv4 32 Bit" |
JovanEps | 11:4e700bbf93d7 | 42 | #define opt "1" |
JovanEps | 11:4e700bbf93d7 | 43 | #endif |
JovanEps | 11:4e700bbf93d7 | 44 | |
JovanEps | 11:4e700bbf93d7 | 45 | #define NTIMES 10 |
JovanEps | 11:4e700bbf93d7 | 46 | |
JovanEps | 0:43b96e9650ef | 47 | #include "mbed.h" |
JovanEps | 2:273153e44338 | 48 | |
JovanEps | 11:4e700bbf93d7 | 49 | |
JovanEps | 11:4e700bbf93d7 | 50 | //--------------------------------- |
JovanEps | 11:4e700bbf93d7 | 51 | DigitalOut myled(LED1); |
JovanEps | 11:4e700bbf93d7 | 52 | Serial pc(USBTX, USBRX); //USB is out of oreder on Embeded-Pi |
JovanEps | 11:4e700bbf93d7 | 53 | //Serial pc(PC_10,PC_11); //RX-TX D0,D1 Embeded-PI ports |
JovanEps | 11:4e700bbf93d7 | 54 | Timer timer; |
JovanEps | 11:4e700bbf93d7 | 55 | //-------------------------------- |
JovanEps | 11:4e700bbf93d7 | 56 | |
JovanEps | 11:4e700bbf93d7 | 57 | void print_time (int row); |
JovanEps | 11:4e700bbf93d7 | 58 | void matgen (REAL a[], int lda, int n, REAL b[], REAL *norma); |
JovanEps | 11:4e700bbf93d7 | 59 | void dgefa (REAL a[], int lda, int n, int ipvt[], int *info); |
JovanEps | 11:4e700bbf93d7 | 60 | void dgesl (REAL a[],int lda,int n,int ipvt[],REAL b[],int job); |
JovanEps | 11:4e700bbf93d7 | 61 | void dmxpy (int n1, REAL y[], int n2, int ldm, REAL x[], REAL m[]); |
JovanEps | 11:4e700bbf93d7 | 62 | void daxpy (int n, REAL da, REAL dx[], int incx, REAL dy[], int incy); |
JovanEps | 11:4e700bbf93d7 | 63 | REAL epslon (REAL x); |
JovanEps | 11:4e700bbf93d7 | 64 | int idamax (int n, REAL dx[], int incx); |
JovanEps | 11:4e700bbf93d7 | 65 | void dscal (int n, REAL da, REAL dx[], int incx); |
JovanEps | 11:4e700bbf93d7 | 66 | REAL ddot (int n, REAL dx[], int incx, REAL dy[], int incy); |
JovanEps | 11:4e700bbf93d7 | 67 | |
JovanEps | 11:4e700bbf93d7 | 68 | static REAL atime[9][15]; |
JovanEps | 11:4e700bbf93d7 | 69 | double runSecs = 1; |
JovanEps | 11:4e700bbf93d7 | 70 | |
JovanEps | 11:4e700bbf93d7 | 71 | void do_benchmark(void){ |
JovanEps | 11:4e700bbf93d7 | 72 | |
JovanEps | 11:4e700bbf93d7 | 73 | float t1,t2 = 0.0; |
JovanEps | 12:623ce727d4fa | 74 | static REAL aa[40*40],a[40*41],b[40],x[40]; // nxn, n*n+1, n,n, |
JovanEps | 11:4e700bbf93d7 | 75 | REAL cray,ops,total,norma,normx; |
JovanEps | 11:4e700bbf93d7 | 76 | REAL resid,residn,eps,tm2,epsn,x1,x2; |
JovanEps | 11:4e700bbf93d7 | 77 | REAL mflops; |
JovanEps | 11:4e700bbf93d7 | 78 | static int ipvt[21],n,i,j,ntimes,info,lda,ldaa; |
JovanEps | 11:4e700bbf93d7 | 79 | int endit, pass, loop; |
JovanEps | 11:4e700bbf93d7 | 80 | REAL overhead1, overhead2, time2; |
JovanEps | 11:4e700bbf93d7 | 81 | REAL max1, max2; |
JovanEps | 11:4e700bbf93d7 | 82 | char was[5][20]; |
JovanEps | 11:4e700bbf93d7 | 83 | char expect[5][20]; |
JovanEps | 11:4e700bbf93d7 | 84 | char title[5][20]; |
JovanEps | 11:4e700bbf93d7 | 85 | int errors; |
JovanEps | 11:4e700bbf93d7 | 86 | int nopause = 1; |
JovanEps | 11:4e700bbf93d7 | 87 | |
JovanEps | 11:4e700bbf93d7 | 88 | /* |
JovanEps | 11:4e700bbf93d7 | 89 | if (argc > 1) { |
JovanEps | 11:4e700bbf93d7 | 90 | switch (argv[1][0]) { |
JovanEps | 11:4e700bbf93d7 | 91 | case 'N': |
JovanEps | 11:4e700bbf93d7 | 92 | nopause = 0; |
JovanEps | 11:4e700bbf93d7 | 93 | break; |
JovanEps | 11:4e700bbf93d7 | 94 | case 'n': |
JovanEps | 11:4e700bbf93d7 | 95 | nopause = 0; |
JovanEps | 11:4e700bbf93d7 | 96 | break; |
JovanEps | 11:4e700bbf93d7 | 97 | } |
JovanEps | 11:4e700bbf93d7 | 98 | } |
JovanEps | 11:4e700bbf93d7 | 99 | */ |
JovanEps | 11:4e700bbf93d7 | 100 | pc.printf("\n"); |
JovanEps | 11:4e700bbf93d7 | 101 | |
JovanEps | 11:4e700bbf93d7 | 102 | |
JovanEps | 11:4e700bbf93d7 | 103 | // outfile = fopen("Linpack.txt","a+"); |
JovanEps | 11:4e700bbf93d7 | 104 | // if (outfile == NULL) |
JovanEps | 11:4e700bbf93d7 | 105 | // { |
JovanEps | 11:4e700bbf93d7 | 106 | // printf (" Cannot open results file \n\n"); |
JovanEps | 11:4e700bbf93d7 | 107 | // printf(" Press Enter\n\n"); |
JovanEps | 11:4e700bbf93d7 | 108 | // int g = getchar(); |
JovanEps | 11:4e700bbf93d7 | 109 | // exit (0); |
JovanEps | 11:4e700bbf93d7 | 110 | //} |
JovanEps | 11:4e700bbf93d7 | 111 | |
JovanEps | 11:4e700bbf93d7 | 112 | |
JovanEps | 11:4e700bbf93d7 | 113 | |
JovanEps | 11:4e700bbf93d7 | 114 | #ifdef GCCARMDP |
JovanEps | 11:4e700bbf93d7 | 115 | pc.printf(expect[0], " 1.7"); |
JovanEps | 11:4e700bbf93d7 | 116 | pc.printf(expect[1], " 7.41628980e-14"); |
JovanEps | 11:4e700bbf93d7 | 117 | pc.printf(expect[2], " 2.22044605e-16"); |
JovanEps | 11:4e700bbf93d7 | 118 | pc.printf(expect[3], " -1.49880108e-14"); |
JovanEps | 11:4e700bbf93d7 | 119 | pc.printf(expect[4], " -1.89848137e-14"); |
JovanEps | 11:4e700bbf93d7 | 120 | #endif |
JovanEps | 11:4e700bbf93d7 | 121 | |
JovanEps | 11:4e700bbf93d7 | 122 | #ifdef GCCARMSP |
JovanEps | 11:4e700bbf93d7 | 123 | pc.printf(expect[0], " 1.6"); |
JovanEps | 11:4e700bbf93d7 | 124 | pc.printf(expect[1], " 3.80277634e-05"); |
JovanEps | 11:4e700bbf93d7 | 125 | pc.printf(expect[2], " 1.19209290e-07"); |
JovanEps | 11:4e700bbf93d7 | 126 | pc.printf(expect[3], " -1.38282776e-05"); |
JovanEps | 11:4e700bbf93d7 | 127 | pc.printf(expect[4], " -7.51018524e-06"); |
JovanEps | 11:4e700bbf93d7 | 128 | #endif |
JovanEps | 11:4e700bbf93d7 | 129 | |
JovanEps | 12:623ce727d4fa | 130 | lda = 41; //n+1 |
JovanEps | 12:623ce727d4fa | 131 | ldaa = 40; //n |
JovanEps | 11:4e700bbf93d7 | 132 | cray = .056; |
JovanEps | 12:623ce727d4fa | 133 | n = 40; // 40 For Nucleo F7 |
JovanEps | 11:4e700bbf93d7 | 134 | pc.printf("----------------------------------------------\n"); |
JovanEps | 11:4e700bbf93d7 | 135 | pc.printf("%s ", ROLLING); |
JovanEps | 11:4e700bbf93d7 | 136 | pc.printf("%s ", PREC); |
JovanEps | 11:4e700bbf93d7 | 137 | pc.printf("Precision Linpack Benchmark - Linux Version in 'C/C++'\n\n"); |
JovanEps | 11:4e700bbf93d7 | 138 | |
JovanEps | 11:4e700bbf93d7 | 139 | pc.printf("Optimisation %s\n\n",options); |
JovanEps | 11:4e700bbf93d7 | 140 | |
JovanEps | 11:4e700bbf93d7 | 141 | ops = (2.0e0*(n*n*n))/3.0 + 2.0*(n*n); |
JovanEps | 11:4e700bbf93d7 | 142 | |
JovanEps | 11:4e700bbf93d7 | 143 | matgen(a,lda,n,b,&norma); |
JovanEps | 11:4e700bbf93d7 | 144 | timer.start(); |
JovanEps | 11:4e700bbf93d7 | 145 | //start_time(); |
JovanEps | 11:4e700bbf93d7 | 146 | t1 = timer.read(); |
JovanEps | 11:4e700bbf93d7 | 147 | dgefa(a,lda,n,ipvt,&info); |
JovanEps | 11:4e700bbf93d7 | 148 | //end_time(); |
JovanEps | 11:4e700bbf93d7 | 149 | t2 = timer.read(); |
JovanEps | 11:4e700bbf93d7 | 150 | timer.stop(); |
JovanEps | 11:4e700bbf93d7 | 151 | atime[0][0] = t2-t1; //secs |
JovanEps | 11:4e700bbf93d7 | 152 | |
JovanEps | 11:4e700bbf93d7 | 153 | timer.reset(); |
JovanEps | 11:4e700bbf93d7 | 154 | timer.start(); |
JovanEps | 11:4e700bbf93d7 | 155 | t1 = timer.read(); |
JovanEps | 11:4e700bbf93d7 | 156 | dgesl(a,lda,n,ipvt,b,0); |
JovanEps | 11:4e700bbf93d7 | 157 | t2 = timer.read(); |
JovanEps | 11:4e700bbf93d7 | 158 | timer.stop(); |
JovanEps | 11:4e700bbf93d7 | 159 | atime[1][0] = t2-t1; //secs |
JovanEps | 11:4e700bbf93d7 | 160 | total = atime[0][0] + atime[1][0]; |
JovanEps | 11:4e700bbf93d7 | 161 | |
JovanEps | 11:4e700bbf93d7 | 162 | /* compute a residual to verify results. */ |
JovanEps | 11:4e700bbf93d7 | 163 | |
JovanEps | 11:4e700bbf93d7 | 164 | for (i = 0; i < n; i++) { |
JovanEps | 11:4e700bbf93d7 | 165 | x[i] = b[i]; |
JovanEps | 11:4e700bbf93d7 | 166 | } |
JovanEps | 11:4e700bbf93d7 | 167 | matgen(a,lda,n,b,&norma); |
JovanEps | 11:4e700bbf93d7 | 168 | for (i = 0; i < n; i++) { |
JovanEps | 11:4e700bbf93d7 | 169 | b[i] = -b[i]; |
JovanEps | 11:4e700bbf93d7 | 170 | } |
JovanEps | 11:4e700bbf93d7 | 171 | dmxpy(n,b,n,lda,x,a); |
JovanEps | 11:4e700bbf93d7 | 172 | resid = 0.0; |
JovanEps | 11:4e700bbf93d7 | 173 | normx = 0.0; |
JovanEps | 11:4e700bbf93d7 | 174 | for (i = 0; i < n; i++) { |
JovanEps | 11:4e700bbf93d7 | 175 | resid = (resid > fabs((double)b[i])) |
JovanEps | 11:4e700bbf93d7 | 176 | ? resid : fabs((double)b[i]); |
JovanEps | 11:4e700bbf93d7 | 177 | normx = (normx > fabs((double)x[i])) |
JovanEps | 11:4e700bbf93d7 | 178 | ? normx : fabs((double)x[i]); |
JovanEps | 11:4e700bbf93d7 | 179 | } |
JovanEps | 11:4e700bbf93d7 | 180 | eps = epslon(ONE); |
JovanEps | 11:4e700bbf93d7 | 181 | residn = resid/( n*norma*normx*eps ); |
JovanEps | 11:4e700bbf93d7 | 182 | epsn = eps; |
JovanEps | 11:4e700bbf93d7 | 183 | x1 = x[0] - 1; |
JovanEps | 11:4e700bbf93d7 | 184 | x2 = x[n-1] - 1; |
JovanEps | 11:4e700bbf93d7 | 185 | |
JovanEps | 11:4e700bbf93d7 | 186 | pc.printf("norm resid resid machep"); |
JovanEps | 11:4e700bbf93d7 | 187 | pc.printf(" x[0]-1 x[n-1]-1\n"); |
JovanEps | 11:4e700bbf93d7 | 188 | pc.printf("%6.1f %17.8e%17.8e%17.8e%17.8e\n\n", |
JovanEps | 11:4e700bbf93d7 | 189 | (double)residn, (double)resid, (double)epsn, |
JovanEps | 11:4e700bbf93d7 | 190 | (double)x1, (double)x2); |
JovanEps | 11:4e700bbf93d7 | 191 | |
JovanEps | 11:4e700bbf93d7 | 192 | pc.printf("Times are reported for matrices of order %5d\n",n); |
JovanEps | 11:4e700bbf93d7 | 193 | pc.printf("1 pass times for array with leading dimension of%5d\n\n",lda); |
JovanEps | 11:4e700bbf93d7 | 194 | pc.printf(" dgefa dgesl total Mflops unit"); |
JovanEps | 11:4e700bbf93d7 | 195 | pc.printf(" ratio\n"); |
JovanEps | 11:4e700bbf93d7 | 196 | |
JovanEps | 11:4e700bbf93d7 | 197 | atime[2][0] = total; |
JovanEps | 11:4e700bbf93d7 | 198 | if (total > 0.0) { |
JovanEps | 11:4e700bbf93d7 | 199 | atime[3][0] = ops/(1.0e6*total); |
JovanEps | 11:4e700bbf93d7 | 200 | atime[4][0] = 2.0/atime[3][0]; |
JovanEps | 11:4e700bbf93d7 | 201 | } else { |
JovanEps | 11:4e700bbf93d7 | 202 | atime[3][0] = 0.0; |
JovanEps | 11:4e700bbf93d7 | 203 | atime[4][0] = 0.0; |
JovanEps | 11:4e700bbf93d7 | 204 | } |
JovanEps | 11:4e700bbf93d7 | 205 | atime[5][0] = total/cray; |
JovanEps | 11:4e700bbf93d7 | 206 | |
JovanEps | 11:4e700bbf93d7 | 207 | print_time(0); |
JovanEps | 11:4e700bbf93d7 | 208 | |
JovanEps | 11:4e700bbf93d7 | 209 | /************************************************************************ |
JovanEps | 11:4e700bbf93d7 | 210 | * Calculate overhead of executing matgen procedure * |
JovanEps | 11:4e700bbf93d7 | 211 | ************************************************************************/ |
JovanEps | 11:4e700bbf93d7 | 212 | |
JovanEps | 11:4e700bbf93d7 | 213 | pc.printf ("\nCalculating matgen overhead\n"); |
JovanEps | 11:4e700bbf93d7 | 214 | pass = -20; |
JovanEps | 11:4e700bbf93d7 | 215 | loop = NTIMES; |
JovanEps | 11:4e700bbf93d7 | 216 | do { |
JovanEps | 11:4e700bbf93d7 | 217 | timer.start(); |
JovanEps | 11:4e700bbf93d7 | 218 | t1 = timer.read(); |
JovanEps | 11:4e700bbf93d7 | 219 | pass = pass + 1; |
JovanEps | 11:4e700bbf93d7 | 220 | for ( i = 0 ; i < loop ; i++) { |
JovanEps | 11:4e700bbf93d7 | 221 | matgen(a,lda,n,b,&norma); |
JovanEps | 11:4e700bbf93d7 | 222 | } |
JovanEps | 11:4e700bbf93d7 | 223 | t2 = timer.read(); |
JovanEps | 11:4e700bbf93d7 | 224 | timer.stop(); |
JovanEps | 11:4e700bbf93d7 | 225 | overhead1 = t2-t1; //sec |
JovanEps | 12:623ce727d4fa | 226 | pc.printf ("%10d times %6.4f seconds\n", loop, overhead1); |
JovanEps | 11:4e700bbf93d7 | 227 | if (overhead1 > runSecs) { |
JovanEps | 11:4e700bbf93d7 | 228 | pass = 0; |
JovanEps | 11:4e700bbf93d7 | 229 | } |
JovanEps | 11:4e700bbf93d7 | 230 | if (pass < 0) { |
JovanEps | 11:4e700bbf93d7 | 231 | if (overhead1 < 0.1) { |
JovanEps | 11:4e700bbf93d7 | 232 | loop = loop * 10; |
JovanEps | 11:4e700bbf93d7 | 233 | } else { |
JovanEps | 11:4e700bbf93d7 | 234 | loop = loop * 2; |
JovanEps | 11:4e700bbf93d7 | 235 | } |
JovanEps | 11:4e700bbf93d7 | 236 | } |
JovanEps | 11:4e700bbf93d7 | 237 | } while (pass < 0); |
JovanEps | 11:4e700bbf93d7 | 238 | |
JovanEps | 11:4e700bbf93d7 | 239 | overhead1 = overhead1 / (double)loop; |
JovanEps | 11:4e700bbf93d7 | 240 | |
JovanEps | 11:4e700bbf93d7 | 241 | pc.printf("Overhead for 1 matgen %12.5f seconds\n\n", overhead1); |
JovanEps | 11:4e700bbf93d7 | 242 | |
JovanEps | 11:4e700bbf93d7 | 243 | /************************************************************************ |
JovanEps | 11:4e700bbf93d7 | 244 | * Calculate matgen/dgefa passes for runSecs seconds * |
JovanEps | 11:4e700bbf93d7 | 245 | ************************************************************************/ |
JovanEps | 11:4e700bbf93d7 | 246 | |
JovanEps | 11:4e700bbf93d7 | 247 | pc.printf ("Calculating matgen/dgefa passes for %d seconds\n", (int)runSecs); |
JovanEps | 11:4e700bbf93d7 | 248 | pass = -20; |
JovanEps | 11:4e700bbf93d7 | 249 | ntimes = NTIMES; |
JovanEps | 11:4e700bbf93d7 | 250 | do { |
JovanEps | 11:4e700bbf93d7 | 251 | timer.start(); |
JovanEps | 11:4e700bbf93d7 | 252 | t1 = timer.read(); |
JovanEps | 11:4e700bbf93d7 | 253 | pass = pass + 1; |
JovanEps | 11:4e700bbf93d7 | 254 | for ( i = 0 ; i < ntimes ; i++) { |
JovanEps | 11:4e700bbf93d7 | 255 | matgen(a,lda,n,b,&norma); |
JovanEps | 11:4e700bbf93d7 | 256 | dgefa(a,lda,n,ipvt,&info ); |
JovanEps | 11:4e700bbf93d7 | 257 | } |
JovanEps | 11:4e700bbf93d7 | 258 | t2 = timer.read(); |
JovanEps | 11:4e700bbf93d7 | 259 | timer.stop(); |
JovanEps | 11:4e700bbf93d7 | 260 | time2 = t2-t1; //sec |
JovanEps | 11:4e700bbf93d7 | 261 | |
JovanEps | 11:4e700bbf93d7 | 262 | pc.printf ("%10d times %6.2f seconds\n", ntimes, time2); |
JovanEps | 11:4e700bbf93d7 | 263 | if (time2 > runSecs) { |
JovanEps | 11:4e700bbf93d7 | 264 | pass = 0; |
JovanEps | 11:4e700bbf93d7 | 265 | } |
JovanEps | 11:4e700bbf93d7 | 266 | if (pass < 0) { |
JovanEps | 11:4e700bbf93d7 | 267 | if (time2 < 0.1) { |
JovanEps | 11:4e700bbf93d7 | 268 | ntimes = ntimes * 10; |
JovanEps | 11:4e700bbf93d7 | 269 | } else { |
JovanEps | 11:4e700bbf93d7 | 270 | ntimes = ntimes * 2; |
JovanEps | 11:4e700bbf93d7 | 271 | } |
JovanEps | 11:4e700bbf93d7 | 272 | } |
JovanEps | 11:4e700bbf93d7 | 273 | } while (pass < 0); |
JovanEps | 11:4e700bbf93d7 | 274 | |
JovanEps | 11:4e700bbf93d7 | 275 | ntimes = (int)(runSecs * (double)ntimes / time2); |
JovanEps | 11:4e700bbf93d7 | 276 | if (ntimes == 0) ntimes = 1; |
JovanEps | 11:4e700bbf93d7 | 277 | |
JovanEps | 11:4e700bbf93d7 | 278 | pc.printf ("Passes used %10d \n\n", ntimes); |
JovanEps | 11:4e700bbf93d7 | 279 | pc.printf("Times for array with leading dimension of%4d\n\n",lda); |
JovanEps | 11:4e700bbf93d7 | 280 | pc.printf(" dgefa dgesl total Mflops unit"); |
JovanEps | 11:4e700bbf93d7 | 281 | pc.printf(" ratio\n"); |
JovanEps | 11:4e700bbf93d7 | 282 | |
JovanEps | 11:4e700bbf93d7 | 283 | /************************************************************************ |
JovanEps | 11:4e700bbf93d7 | 284 | * Execute 5 passes * |
JovanEps | 11:4e700bbf93d7 | 285 | ************************************************************************/ |
JovanEps | 11:4e700bbf93d7 | 286 | |
JovanEps | 11:4e700bbf93d7 | 287 | tm2 = ntimes * overhead1; |
JovanEps | 11:4e700bbf93d7 | 288 | atime[3][6] = 0; |
JovanEps | 11:4e700bbf93d7 | 289 | |
JovanEps | 11:4e700bbf93d7 | 290 | for (j=1 ; j<6 ; j++) { |
JovanEps | 11:4e700bbf93d7 | 291 | timer.start(); |
JovanEps | 11:4e700bbf93d7 | 292 | t1 = timer.read(); |
JovanEps | 11:4e700bbf93d7 | 293 | |
JovanEps | 11:4e700bbf93d7 | 294 | for (i = 0; i < ntimes; i++) { |
JovanEps | 11:4e700bbf93d7 | 295 | matgen(a,lda,n,b,&norma); |
JovanEps | 11:4e700bbf93d7 | 296 | dgefa(a,lda,n,ipvt,&info ); |
JovanEps | 11:4e700bbf93d7 | 297 | } |
JovanEps | 11:4e700bbf93d7 | 298 | t2 = timer.read(); |
JovanEps | 11:4e700bbf93d7 | 299 | timer.stop(); |
JovanEps | 11:4e700bbf93d7 | 300 | atime[0][j] = ((t2-t1) - tm2)/ntimes; |
JovanEps | 11:4e700bbf93d7 | 301 | |
JovanEps | 11:4e700bbf93d7 | 302 | timer.start(); |
JovanEps | 11:4e700bbf93d7 | 303 | t1 = timer.read(); |
JovanEps | 11:4e700bbf93d7 | 304 | for (i = 0; i < ntimes; i++) { |
JovanEps | 11:4e700bbf93d7 | 305 | dgesl(a,lda,n,ipvt,b,0); |
JovanEps | 11:4e700bbf93d7 | 306 | } |
JovanEps | 11:4e700bbf93d7 | 307 | t2 = timer.read(); |
JovanEps | 11:4e700bbf93d7 | 308 | timer.stop(); |
JovanEps | 11:4e700bbf93d7 | 309 | |
JovanEps | 11:4e700bbf93d7 | 310 | atime[1][j] = (t2-t1)/ntimes; |
JovanEps | 11:4e700bbf93d7 | 311 | total = atime[0][j] + atime[1][j]; |
JovanEps | 11:4e700bbf93d7 | 312 | atime[2][j] = total; |
JovanEps | 11:4e700bbf93d7 | 313 | atime[3][j] = ops/(1.0e6*total); |
JovanEps | 11:4e700bbf93d7 | 314 | atime[4][j] = 2.0/atime[3][j]; |
JovanEps | 11:4e700bbf93d7 | 315 | atime[5][j] = total/cray; |
JovanEps | 11:4e700bbf93d7 | 316 | atime[3][6] = atime[3][6] + atime[3][j]; |
JovanEps | 2:273153e44338 | 317 | |
JovanEps | 11:4e700bbf93d7 | 318 | print_time(j); |
JovanEps | 11:4e700bbf93d7 | 319 | } |
JovanEps | 11:4e700bbf93d7 | 320 | atime[3][6] = atime[3][6] / 5.0; |
JovanEps | 12:623ce727d4fa | 321 | pc.printf("Average %11.4f\n", |
JovanEps | 11:4e700bbf93d7 | 322 | (double)atime[3][6]); |
JovanEps | 11:4e700bbf93d7 | 323 | |
JovanEps | 11:4e700bbf93d7 | 324 | pc.printf("\nCalculating matgen2 overhead\n"); |
JovanEps | 11:4e700bbf93d7 | 325 | |
JovanEps | 11:4e700bbf93d7 | 326 | /************************************************************************ |
JovanEps | 11:4e700bbf93d7 | 327 | * Calculate overhead of executing matgen procedure * |
JovanEps | 11:4e700bbf93d7 | 328 | ************************************************************************/ |
JovanEps | 11:4e700bbf93d7 | 329 | |
JovanEps | 11:4e700bbf93d7 | 330 | timer.start(); |
JovanEps | 11:4e700bbf93d7 | 331 | t1 = timer.read(); |
JovanEps | 11:4e700bbf93d7 | 332 | for ( i = 0 ; i < loop ; i++) { |
JovanEps | 11:4e700bbf93d7 | 333 | matgen(aa,ldaa,n,b,&norma); |
JovanEps | 11:4e700bbf93d7 | 334 | } |
JovanEps | 11:4e700bbf93d7 | 335 | t2 = timer.read(); |
JovanEps | 11:4e700bbf93d7 | 336 | timer.stop(); |
JovanEps | 11:4e700bbf93d7 | 337 | overhead2 = t2-t1; |
JovanEps | 11:4e700bbf93d7 | 338 | overhead2 = overhead2 / (double)loop; |
JovanEps | 11:4e700bbf93d7 | 339 | |
JovanEps | 11:4e700bbf93d7 | 340 | pc.printf("Overhead for 1 matgen %12.5f seconds\n\n", overhead2); |
JovanEps | 11:4e700bbf93d7 | 341 | pc.printf("Times for array with leading dimension of%4d\n\n",ldaa); |
JovanEps | 11:4e700bbf93d7 | 342 | pc.printf(" dgefa dgesl total Mflops unit"); |
JovanEps | 11:4e700bbf93d7 | 343 | pc.printf(" ratio\n"); |
JovanEps | 11:4e700bbf93d7 | 344 | |
JovanEps | 11:4e700bbf93d7 | 345 | /************************************************************************ |
JovanEps | 11:4e700bbf93d7 | 346 | * Execute 5 passes * |
JovanEps | 11:4e700bbf93d7 | 347 | ************************************************************************/ |
JovanEps | 11:4e700bbf93d7 | 348 | |
JovanEps | 11:4e700bbf93d7 | 349 | tm2 = ntimes * overhead2; |
JovanEps | 11:4e700bbf93d7 | 350 | atime[3][12] = 0; |
JovanEps | 11:4e700bbf93d7 | 351 | |
JovanEps | 11:4e700bbf93d7 | 352 | for (j=7 ; j<12 ; j++) { |
JovanEps | 11:4e700bbf93d7 | 353 | timer.start(); |
JovanEps | 11:4e700bbf93d7 | 354 | t1 = timer.read(); |
JovanEps | 11:4e700bbf93d7 | 355 | for (i = 0; i < ntimes; i++) { |
JovanEps | 11:4e700bbf93d7 | 356 | matgen(aa,ldaa,n,b,&norma); |
JovanEps | 11:4e700bbf93d7 | 357 | dgefa(aa,ldaa,n,ipvt,&info ); |
JovanEps | 11:4e700bbf93d7 | 358 | } |
JovanEps | 11:4e700bbf93d7 | 359 | t2 = timer.read(); |
JovanEps | 11:4e700bbf93d7 | 360 | timer.stop(); |
JovanEps | 11:4e700bbf93d7 | 361 | atime[0][j] = ((t2-t1) - tm2)/ntimes; |
JovanEps | 11:4e700bbf93d7 | 362 | |
JovanEps | 11:4e700bbf93d7 | 363 | timer.start(); |
JovanEps | 11:4e700bbf93d7 | 364 | t1 = timer.read(); |
JovanEps | 11:4e700bbf93d7 | 365 | for (i = 0; i < ntimes; i++) { |
JovanEps | 11:4e700bbf93d7 | 366 | dgesl(aa,ldaa,n,ipvt,b,0); |
JovanEps | 11:4e700bbf93d7 | 367 | } |
JovanEps | 11:4e700bbf93d7 | 368 | t2 = timer.read(); |
JovanEps | 11:4e700bbf93d7 | 369 | timer.stop(); |
JovanEps | 11:4e700bbf93d7 | 370 | atime[1][j] = (t2-t1)/ntimes; |
JovanEps | 11:4e700bbf93d7 | 371 | total = atime[0][j] + atime[1][j]; |
JovanEps | 11:4e700bbf93d7 | 372 | atime[2][j] = total; |
JovanEps | 11:4e700bbf93d7 | 373 | atime[3][j] = ops/(1.0e6*total); |
JovanEps | 11:4e700bbf93d7 | 374 | atime[4][j] = 2.0/atime[3][j]; |
JovanEps | 11:4e700bbf93d7 | 375 | atime[5][j] = total/cray; |
JovanEps | 11:4e700bbf93d7 | 376 | atime[3][12] = atime[3][12] + atime[3][j]; |
JovanEps | 11:4e700bbf93d7 | 377 | |
JovanEps | 11:4e700bbf93d7 | 378 | print_time(j); |
JovanEps | 11:4e700bbf93d7 | 379 | } |
JovanEps | 11:4e700bbf93d7 | 380 | atime[3][12] = atime[3][12] / 5.0; |
JovanEps | 12:623ce727d4fa | 381 | pc.printf("Average %11.4f\n\n", |
JovanEps | 11:4e700bbf93d7 | 382 | (double)atime[3][12]); |
JovanEps | 12:623ce727d4fa | 383 | pc.printf("--------------------------------------------------------\n"); |
JovanEps | 11:4e700bbf93d7 | 384 | pc.printf ("\nFrom File /proc/cpuinfo\n"); |
JovanEps | 11:4e700bbf93d7 | 385 | // pc.printf("%s\n", configdata[0]); |
JovanEps | 11:4e700bbf93d7 | 386 | pc.printf ("\nFrom File /proc/version\n"); |
JovanEps | 11:4e700bbf93d7 | 387 | //pc.printf("%s\n", configdata[1]); |
JovanEps | 11:4e700bbf93d7 | 388 | |
JovanEps | 11:4e700bbf93d7 | 389 | /************************************************************************ |
JovanEps | 11:4e700bbf93d7 | 390 | * Use minimum average as overall Mflops rating * |
JovanEps | 11:4e700bbf93d7 | 391 | ************************************************************************/ |
JovanEps | 11:4e700bbf93d7 | 392 | |
JovanEps | 11:4e700bbf93d7 | 393 | mflops = atime[3][6]; |
JovanEps | 11:4e700bbf93d7 | 394 | if (atime[3][12] < mflops) mflops = atime[3][12]; |
JovanEps | 11:4e700bbf93d7 | 395 | |
JovanEps | 11:4e700bbf93d7 | 396 | pc.printf("\n"); |
JovanEps | 11:4e700bbf93d7 | 397 | pc.printf("%s ", ROLLING); |
JovanEps | 11:4e700bbf93d7 | 398 | pc.printf("%s ", PREC); |
JovanEps | 11:4e700bbf93d7 | 399 | pc.printf(" Precision %11.4f Mflops \n\n",mflops); |
JovanEps | 11:4e700bbf93d7 | 400 | |
JovanEps | 11:4e700bbf93d7 | 401 | // local_time(); |
JovanEps | 11:4e700bbf93d7 | 402 | |
JovanEps | 11:4e700bbf93d7 | 403 | |
JovanEps | 11:4e700bbf93d7 | 404 | /************************************************************************ |
JovanEps | 11:4e700bbf93d7 | 405 | * Add results to output file Linpack.txt * |
JovanEps | 11:4e700bbf93d7 | 406 | ************************************************************************/ |
JovanEps | 12:623ce727d4fa | 407 | pc.printf (" --------------------------------------------------------\n\n"); |
JovanEps | 11:4e700bbf93d7 | 408 | pc.printf (" Linpack %s Precision %s Benchmark n @ 100\n", PREC, ROLLING); |
JovanEps | 11:4e700bbf93d7 | 409 | //pc.printf (outfile, " Optimisation %s, %s\n", options, timeday); |
JovanEps | 11:4e700bbf93d7 | 410 | |
JovanEps | 11:4e700bbf93d7 | 411 | max1 = 0; |
JovanEps | 11:4e700bbf93d7 | 412 | for (i=1 ; i<6 ; i++) { |
JovanEps | 11:4e700bbf93d7 | 413 | if (atime[3][i] > max1) max1 = atime[3][i]; |
JovanEps | 11:4e700bbf93d7 | 414 | } |
JovanEps | 11:4e700bbf93d7 | 415 | |
JovanEps | 11:4e700bbf93d7 | 416 | max2 = 0; |
JovanEps | 11:4e700bbf93d7 | 417 | for (i=7 ; i<12 ; i++) { |
JovanEps | 11:4e700bbf93d7 | 418 | if (atime[3][i] > max2) max2 = atime[3][i]; |
JovanEps | 11:4e700bbf93d7 | 419 | } |
JovanEps | 11:4e700bbf93d7 | 420 | if (max1 < max2) max2 = max1; |
JovanEps | 11:4e700bbf93d7 | 421 | |
JovanEps | 11:4e700bbf93d7 | 422 | pc.printf(" Speed %10.4f MFLOPS\n\n", max2); |
JovanEps | 11:4e700bbf93d7 | 423 | pc.printf(was[0], "%16.1f",(double)residn); |
JovanEps | 11:4e700bbf93d7 | 424 | pc.printf(was[1], "%16.8e",(double)resid); |
JovanEps | 11:4e700bbf93d7 | 425 | pc.printf(was[2], "%16.8e",(double)epsn); |
JovanEps | 11:4e700bbf93d7 | 426 | pc.printf(was[3], "%16.8e",(double)x1); |
JovanEps | 11:4e700bbf93d7 | 427 | pc.printf(was[4], "%16.8e",(double)x2); |
JovanEps | 11:4e700bbf93d7 | 428 | |
JovanEps | 11:4e700bbf93d7 | 429 | pc.printf(title[0], "norm. resid"); |
JovanEps | 11:4e700bbf93d7 | 430 | pc.printf(title[1], "resid "); |
JovanEps | 11:4e700bbf93d7 | 431 | pc.printf(title[2], "machep "); |
JovanEps | 11:4e700bbf93d7 | 432 | pc.printf(title[3], "x[0]-1 "); |
JovanEps | 11:4e700bbf93d7 | 433 | pc.printf(title[4], "x[n-1]-1 "); |
JovanEps | 11:4e700bbf93d7 | 434 | |
JovanEps | 11:4e700bbf93d7 | 435 | if (strtol(opt, NULL, 10) == 0) { |
JovanEps | 11:4e700bbf93d7 | 436 | pc.printf(expect[2], " 8.88178420e-016"); |
JovanEps | 11:4e700bbf93d7 | 437 | } |
JovanEps | 11:4e700bbf93d7 | 438 | errors = 0; |
JovanEps | 11:4e700bbf93d7 | 439 | |
JovanEps | 11:4e700bbf93d7 | 440 | for (i=0; i<5; i++) { |
JovanEps | 11:4e700bbf93d7 | 441 | if (strcmp (expect[i], was[i]) != 0) { |
JovanEps | 11:4e700bbf93d7 | 442 | pc.printf(" Variable %s Non-standard result was %s instead of %s\n", |
JovanEps | 11:4e700bbf93d7 | 443 | title[i], was[i], expect[i]); |
JovanEps | 11:4e700bbf93d7 | 444 | errors = errors + 1; |
JovanEps | 11:4e700bbf93d7 | 445 | } |
JovanEps | 11:4e700bbf93d7 | 446 | } |
JovanEps | 11:4e700bbf93d7 | 447 | if (errors == 0) { |
JovanEps | 11:4e700bbf93d7 | 448 | pc.printf(" Numeric results were as expected\n\n"); |
JovanEps | 11:4e700bbf93d7 | 449 | } else { |
JovanEps | 11:4e700bbf93d7 | 450 | pc.printf(" Different numeric results - see linpack.txt\n\n"); |
JovanEps | 11:4e700bbf93d7 | 451 | pc.printf("\n Compiler #define or values in linpack.c might need to be changed\n\n"); |
JovanEps | 11:4e700bbf93d7 | 452 | |
JovanEps | 11:4e700bbf93d7 | 453 | } |
JovanEps | 11:4e700bbf93d7 | 454 | |
JovanEps | 11:4e700bbf93d7 | 455 | |
JovanEps | 12:623ce727d4fa | 456 | pc.printf (" -------------------------------------------------------------\n\n"); |
JovanEps | 11:4e700bbf93d7 | 457 | pc.printf("\n"); |
JovanEps | 11:4e700bbf93d7 | 458 | pc.printf ("SYSTEM INFORMATION\n\nFrom File /proc/cpuinfo\n"); |
JovanEps | 11:4e700bbf93d7 | 459 | // pc.printf (outfile, "%s \n", configdata[0]); |
JovanEps | 11:4e700bbf93d7 | 460 | pc.printf ("\nFrom File /proc/version\n"); |
JovanEps | 11:4e700bbf93d7 | 461 | // pc.printf (outfile, "%s \n", configdata[1]); |
JovanEps | 11:4e700bbf93d7 | 462 | pc.printf ("\n"); |
JovanEps | 11:4e700bbf93d7 | 463 | |
JovanEps | 11:4e700bbf93d7 | 464 | pc.printf("\n"); |
JovanEps | 12:623ce727d4fa | 465 | |
JovanEps | 12:623ce727d4fa | 466 | wait(1); |
JovanEps | 11:4e700bbf93d7 | 467 | |
JovanEps | 11:4e700bbf93d7 | 468 | } |
JovanEps | 11:4e700bbf93d7 | 469 | |
JovanEps | 11:4e700bbf93d7 | 470 | /*----------------------*/ |
JovanEps | 11:4e700bbf93d7 | 471 | void print_time (int row) |
JovanEps | 11:4e700bbf93d7 | 472 | |
JovanEps | 11:4e700bbf93d7 | 473 | { |
JovanEps | 12:623ce727d4fa | 474 | pc.printf("%11.5f%11.5f%11.5f%11.4f%11.4f%11.4f\n", (double)atime[0][row], |
JovanEps | 11:4e700bbf93d7 | 475 | (double)atime[1][row], (double)atime[2][row], (double)atime[3][row], |
JovanEps | 11:4e700bbf93d7 | 476 | (double)atime[4][row], (double)atime[5][row]); |
JovanEps | 11:4e700bbf93d7 | 477 | return; |
JovanEps | 11:4e700bbf93d7 | 478 | } |
JovanEps | 11:4e700bbf93d7 | 479 | |
JovanEps | 11:4e700bbf93d7 | 480 | /*----------------------*/ |
JovanEps | 11:4e700bbf93d7 | 481 | |
JovanEps | 11:4e700bbf93d7 | 482 | void matgen (REAL a[], int lda, int n, REAL b[], REAL *norma) |
JovanEps | 11:4e700bbf93d7 | 483 | |
JovanEps | 11:4e700bbf93d7 | 484 | |
JovanEps | 11:4e700bbf93d7 | 485 | /* We would like to declare a[][lda], but c does not allow it. In this |
JovanEps | 11:4e700bbf93d7 | 486 | function, references to a[i][j] are written a[lda*i+j]. */ |
JovanEps | 11:4e700bbf93d7 | 487 | |
JovanEps | 11:4e700bbf93d7 | 488 | { |
JovanEps | 11:4e700bbf93d7 | 489 | int init, i, j; |
JovanEps | 11:4e700bbf93d7 | 490 | |
JovanEps | 11:4e700bbf93d7 | 491 | init = 1325; |
JovanEps | 11:4e700bbf93d7 | 492 | *norma = 0.0; |
JovanEps | 11:4e700bbf93d7 | 493 | for (j = 0; j < n; j++) { |
JovanEps | 11:4e700bbf93d7 | 494 | for (i = 0; i < n; i++) { |
JovanEps | 11:4e700bbf93d7 | 495 | init = 3125*init % 65536; |
JovanEps | 11:4e700bbf93d7 | 496 | a[lda*j+i] = (init - 32768.0)/16384.0; |
JovanEps | 11:4e700bbf93d7 | 497 | *norma = (a[lda*j+i] > *norma) ? a[lda*j+i] : *norma; |
JovanEps | 11:4e700bbf93d7 | 498 | |
JovanEps | 11:4e700bbf93d7 | 499 | /* alternative for some compilers |
JovanEps | 11:4e700bbf93d7 | 500 | if (fabs(a[lda*j+i]) > *norma) *norma = fabs(a[lda*j+i]); |
JovanEps | 11:4e700bbf93d7 | 501 | */ |
JovanEps | 11:4e700bbf93d7 | 502 | } |
JovanEps | 11:4e700bbf93d7 | 503 | } |
JovanEps | 11:4e700bbf93d7 | 504 | for (i = 0; i < n; i++) { |
JovanEps | 11:4e700bbf93d7 | 505 | b[i] = 0.0; |
JovanEps | 11:4e700bbf93d7 | 506 | } |
JovanEps | 11:4e700bbf93d7 | 507 | for (j = 0; j < n; j++) { |
JovanEps | 11:4e700bbf93d7 | 508 | for (i = 0; i < n; i++) { |
JovanEps | 11:4e700bbf93d7 | 509 | b[i] = b[i] + a[lda*j+i]; |
JovanEps | 11:4e700bbf93d7 | 510 | } |
JovanEps | 11:4e700bbf93d7 | 511 | } |
JovanEps | 11:4e700bbf93d7 | 512 | return; |
JovanEps | 11:4e700bbf93d7 | 513 | } |
JovanEps | 11:4e700bbf93d7 | 514 | |
JovanEps | 11:4e700bbf93d7 | 515 | /*----------------------*/ |
JovanEps | 11:4e700bbf93d7 | 516 | void dgefa(REAL a[], int lda, int n, int ipvt[], int *info) |
JovanEps | 11:4e700bbf93d7 | 517 | |
JovanEps | 11:4e700bbf93d7 | 518 | |
JovanEps | 11:4e700bbf93d7 | 519 | /* We would like to declare a[][lda], but c does not allow it. In this |
JovanEps | 11:4e700bbf93d7 | 520 | function, references to a[i][j] are written a[lda*i+j]. */ |
JovanEps | 11:4e700bbf93d7 | 521 | /* |
JovanEps | 11:4e700bbf93d7 | 522 | dgefa factors a double precision matrix by gaussian elimination. |
JovanEps | 11:4e700bbf93d7 | 523 | |
JovanEps | 11:4e700bbf93d7 | 524 | dgefa is usually called by dgeco, but it can be called |
JovanEps | 11:4e700bbf93d7 | 525 | directly with a saving in time if rcond is not needed. |
JovanEps | 11:4e700bbf93d7 | 526 | (time for dgeco) = (1 + 9/n)*(time for dgefa) . |
JovanEps | 11:4e700bbf93d7 | 527 | |
JovanEps | 11:4e700bbf93d7 | 528 | on entry |
JovanEps | 11:4e700bbf93d7 | 529 | |
JovanEps | 11:4e700bbf93d7 | 530 | a REAL precision[n][lda] |
JovanEps | 11:4e700bbf93d7 | 531 | the matrix to be factored. |
JovanEps | 11:4e700bbf93d7 | 532 | |
JovanEps | 11:4e700bbf93d7 | 533 | lda integer |
JovanEps | 11:4e700bbf93d7 | 534 | the leading dimension of the array a . |
JovanEps | 11:4e700bbf93d7 | 535 | |
JovanEps | 11:4e700bbf93d7 | 536 | n integer |
JovanEps | 11:4e700bbf93d7 | 537 | the order of the matrix a . |
JovanEps | 11:4e700bbf93d7 | 538 | |
JovanEps | 11:4e700bbf93d7 | 539 | on return |
JovanEps | 11:4e700bbf93d7 | 540 | |
JovanEps | 11:4e700bbf93d7 | 541 | a an upper triangular matrix and the multipliers |
JovanEps | 11:4e700bbf93d7 | 542 | which were used to obtain it. |
JovanEps | 11:4e700bbf93d7 | 543 | the factorization can be written a = l*u where |
JovanEps | 11:4e700bbf93d7 | 544 | l is a product of permutation and unit lower |
JovanEps | 11:4e700bbf93d7 | 545 | triangular matrices and u is upper triangular. |
JovanEps | 11:4e700bbf93d7 | 546 | |
JovanEps | 11:4e700bbf93d7 | 547 | ipvt integer[n] |
JovanEps | 11:4e700bbf93d7 | 548 | an integer vector of pivot indices. |
JovanEps | 11:4e700bbf93d7 | 549 | |
JovanEps | 11:4e700bbf93d7 | 550 | info integer |
JovanEps | 11:4e700bbf93d7 | 551 | = 0 normal value. |
JovanEps | 11:4e700bbf93d7 | 552 | = k if u[k][k] .eq. 0.0 . this is not an error |
JovanEps | 11:4e700bbf93d7 | 553 | condition for this subroutine, but it does |
JovanEps | 11:4e700bbf93d7 | 554 | indicate that dgesl or dgedi will divide by zero |
JovanEps | 11:4e700bbf93d7 | 555 | if called. use rcond in dgeco for a reliable |
JovanEps | 11:4e700bbf93d7 | 556 | indication of singularity. |
JovanEps | 11:4e700bbf93d7 | 557 | |
JovanEps | 11:4e700bbf93d7 | 558 | linpack. this version dated 08/14/78 . |
JovanEps | 11:4e700bbf93d7 | 559 | cleve moler, university of new mexico, argonne national lab. |
JovanEps | 11:4e700bbf93d7 | 560 | |
JovanEps | 11:4e700bbf93d7 | 561 | functions |
JovanEps | 11:4e700bbf93d7 | 562 | |
JovanEps | 11:4e700bbf93d7 | 563 | blas daxpy,dscal,idamax |
JovanEps | 11:4e700bbf93d7 | 564 | */ |
JovanEps | 11:4e700bbf93d7 | 565 | |
JovanEps | 11:4e700bbf93d7 | 566 | { |
JovanEps | 11:4e700bbf93d7 | 567 | /* internal variables */ |
JovanEps | 11:4e700bbf93d7 | 568 | |
JovanEps | 11:4e700bbf93d7 | 569 | REAL t; |
JovanEps | 11:4e700bbf93d7 | 570 | int j,k,kp1,l,nm1; |
JovanEps | 11:4e700bbf93d7 | 571 | |
JovanEps | 11:4e700bbf93d7 | 572 | |
JovanEps | 11:4e700bbf93d7 | 573 | /* gaussian elimination with partial pivoting */ |
JovanEps | 11:4e700bbf93d7 | 574 | |
JovanEps | 11:4e700bbf93d7 | 575 | *info = 0; |
JovanEps | 11:4e700bbf93d7 | 576 | nm1 = n - 1; |
JovanEps | 11:4e700bbf93d7 | 577 | if (nm1 >= 0) { |
JovanEps | 11:4e700bbf93d7 | 578 | for (k = 0; k < nm1; k++) { |
JovanEps | 11:4e700bbf93d7 | 579 | kp1 = k + 1; |
JovanEps | 11:4e700bbf93d7 | 580 | |
JovanEps | 11:4e700bbf93d7 | 581 | /* find l = pivot index */ |
JovanEps | 11:4e700bbf93d7 | 582 | |
JovanEps | 11:4e700bbf93d7 | 583 | l = idamax(n-k,&a[lda*k+k],1) + k; |
JovanEps | 11:4e700bbf93d7 | 584 | ipvt[k] = l; |
JovanEps | 11:4e700bbf93d7 | 585 | |
JovanEps | 11:4e700bbf93d7 | 586 | /* zero pivot implies this column already |
JovanEps | 11:4e700bbf93d7 | 587 | triangularized */ |
JovanEps | 11:4e700bbf93d7 | 588 | |
JovanEps | 11:4e700bbf93d7 | 589 | if (a[lda*k+l] != ZERO) { |
JovanEps | 11:4e700bbf93d7 | 590 | |
JovanEps | 11:4e700bbf93d7 | 591 | /* interchange if necessary */ |
JovanEps | 0:43b96e9650ef | 592 | |
JovanEps | 11:4e700bbf93d7 | 593 | if (l != k) { |
JovanEps | 11:4e700bbf93d7 | 594 | t = a[lda*k+l]; |
JovanEps | 11:4e700bbf93d7 | 595 | a[lda*k+l] = a[lda*k+k]; |
JovanEps | 11:4e700bbf93d7 | 596 | a[lda*k+k] = t; |
JovanEps | 11:4e700bbf93d7 | 597 | } |
JovanEps | 11:4e700bbf93d7 | 598 | |
JovanEps | 11:4e700bbf93d7 | 599 | /* compute multipliers */ |
JovanEps | 11:4e700bbf93d7 | 600 | |
JovanEps | 11:4e700bbf93d7 | 601 | t = -ONE/a[lda*k+k]; |
JovanEps | 11:4e700bbf93d7 | 602 | dscal(n-(k+1),t,&a[lda*k+k+1],1); |
JovanEps | 11:4e700bbf93d7 | 603 | |
JovanEps | 11:4e700bbf93d7 | 604 | /* row elimination with column indexing */ |
JovanEps | 11:4e700bbf93d7 | 605 | |
JovanEps | 11:4e700bbf93d7 | 606 | for (j = kp1; j < n; j++) { |
JovanEps | 11:4e700bbf93d7 | 607 | t = a[lda*j+l]; |
JovanEps | 11:4e700bbf93d7 | 608 | if (l != k) { |
JovanEps | 11:4e700bbf93d7 | 609 | a[lda*j+l] = a[lda*j+k]; |
JovanEps | 11:4e700bbf93d7 | 610 | a[lda*j+k] = t; |
JovanEps | 11:4e700bbf93d7 | 611 | } |
JovanEps | 11:4e700bbf93d7 | 612 | daxpy(n-(k+1),t,&a[lda*k+k+1],1, |
JovanEps | 11:4e700bbf93d7 | 613 | &a[lda*j+k+1],1); |
JovanEps | 11:4e700bbf93d7 | 614 | } |
JovanEps | 11:4e700bbf93d7 | 615 | } else { |
JovanEps | 11:4e700bbf93d7 | 616 | *info = k; |
JovanEps | 11:4e700bbf93d7 | 617 | } |
JovanEps | 11:4e700bbf93d7 | 618 | } |
JovanEps | 11:4e700bbf93d7 | 619 | } |
JovanEps | 11:4e700bbf93d7 | 620 | ipvt[n-1] = n-1; |
JovanEps | 11:4e700bbf93d7 | 621 | if (a[lda*(n-1)+(n-1)] == ZERO) *info = n-1; |
JovanEps | 11:4e700bbf93d7 | 622 | return; |
JovanEps | 11:4e700bbf93d7 | 623 | } |
JovanEps | 11:4e700bbf93d7 | 624 | |
JovanEps | 11:4e700bbf93d7 | 625 | /*----------------------*/ |
JovanEps | 11:4e700bbf93d7 | 626 | |
JovanEps | 11:4e700bbf93d7 | 627 | void dgesl(REAL a[],int lda,int n,int ipvt[],REAL b[],int job ) |
JovanEps | 11:4e700bbf93d7 | 628 | |
JovanEps | 11:4e700bbf93d7 | 629 | |
JovanEps | 11:4e700bbf93d7 | 630 | /* We would like to declare a[][lda], but c does not allow it. In this |
JovanEps | 11:4e700bbf93d7 | 631 | function, references to a[i][j] are written a[lda*i+j]. */ |
JovanEps | 11:4e700bbf93d7 | 632 | |
JovanEps | 11:4e700bbf93d7 | 633 | /* |
JovanEps | 11:4e700bbf93d7 | 634 | dgesl solves the double precision system |
JovanEps | 11:4e700bbf93d7 | 635 | a * x = b or trans(a) * x = b |
JovanEps | 11:4e700bbf93d7 | 636 | using the factors computed by dgeco or dgefa. |
JovanEps | 11:4e700bbf93d7 | 637 | |
JovanEps | 11:4e700bbf93d7 | 638 | on entry |
JovanEps | 11:4e700bbf93d7 | 639 | |
JovanEps | 11:4e700bbf93d7 | 640 | a double precision[n][lda] |
JovanEps | 11:4e700bbf93d7 | 641 | the output from dgeco or dgefa. |
JovanEps | 11:4e700bbf93d7 | 642 | |
JovanEps | 11:4e700bbf93d7 | 643 | lda integer |
JovanEps | 11:4e700bbf93d7 | 644 | the leading dimension of the array a . |
JovanEps | 11:4e700bbf93d7 | 645 | |
JovanEps | 11:4e700bbf93d7 | 646 | n integer |
JovanEps | 11:4e700bbf93d7 | 647 | the order of the matrix a . |
JovanEps | 11:4e700bbf93d7 | 648 | |
JovanEps | 11:4e700bbf93d7 | 649 | ipvt integer[n] |
JovanEps | 11:4e700bbf93d7 | 650 | the pivot vector from dgeco or dgefa. |
JovanEps | 11:4e700bbf93d7 | 651 | |
JovanEps | 11:4e700bbf93d7 | 652 | b double precision[n] |
JovanEps | 11:4e700bbf93d7 | 653 | the right hand side vector. |
JovanEps | 11:4e700bbf93d7 | 654 | |
JovanEps | 11:4e700bbf93d7 | 655 | job integer |
JovanEps | 11:4e700bbf93d7 | 656 | = 0 to solve a*x = b , |
JovanEps | 11:4e700bbf93d7 | 657 | = nonzero to solve trans(a)*x = b where |
JovanEps | 11:4e700bbf93d7 | 658 | trans(a) is the transpose. |
JovanEps | 11:4e700bbf93d7 | 659 | |
JovanEps | 11:4e700bbf93d7 | 660 | on return |
JovanEps | 11:4e700bbf93d7 | 661 | |
JovanEps | 11:4e700bbf93d7 | 662 | b the solution vector x . |
JovanEps | 11:4e700bbf93d7 | 663 | |
JovanEps | 11:4e700bbf93d7 | 664 | error condition |
JovanEps | 11:4e700bbf93d7 | 665 | |
JovanEps | 11:4e700bbf93d7 | 666 | a division by zero will occur if the input factor contains a |
JovanEps | 11:4e700bbf93d7 | 667 | zero on the diagonal. technically this indicates singularity |
JovanEps | 11:4e700bbf93d7 | 668 | but it is often caused by improper arguments or improper |
JovanEps | 11:4e700bbf93d7 | 669 | setting of lda . it will not occur if the subroutines are |
JovanEps | 11:4e700bbf93d7 | 670 | called correctly and if dgeco has set rcond .gt. 0.0 |
JovanEps | 11:4e700bbf93d7 | 671 | or dgefa has set info .eq. 0 . |
JovanEps | 11:4e700bbf93d7 | 672 | |
JovanEps | 11:4e700bbf93d7 | 673 | to compute inverse(a) * c where c is a matrix |
JovanEps | 11:4e700bbf93d7 | 674 | with p columns |
JovanEps | 11:4e700bbf93d7 | 675 | dgeco(a,lda,n,ipvt,rcond,z) |
JovanEps | 11:4e700bbf93d7 | 676 | if (!rcond is too small){ |
JovanEps | 11:4e700bbf93d7 | 677 | for (j=0,j<p,j++) |
JovanEps | 11:4e700bbf93d7 | 678 | dgesl(a,lda,n,ipvt,c[j][0],0); |
JovanEps | 11:4e700bbf93d7 | 679 | } |
JovanEps | 11:4e700bbf93d7 | 680 | |
JovanEps | 11:4e700bbf93d7 | 681 | linpack. this version dated 08/14/78 . |
JovanEps | 11:4e700bbf93d7 | 682 | cleve moler, university of new mexico, argonne national lab. |
JovanEps | 11:4e700bbf93d7 | 683 | |
JovanEps | 11:4e700bbf93d7 | 684 | functions |
JovanEps | 11:4e700bbf93d7 | 685 | |
JovanEps | 11:4e700bbf93d7 | 686 | blas daxpy,ddot |
JovanEps | 11:4e700bbf93d7 | 687 | */ |
JovanEps | 11:4e700bbf93d7 | 688 | { |
JovanEps | 11:4e700bbf93d7 | 689 | /* internal variables */ |
JovanEps | 11:4e700bbf93d7 | 690 | |
JovanEps | 11:4e700bbf93d7 | 691 | REAL t; |
JovanEps | 11:4e700bbf93d7 | 692 | int k,kb,l,nm1; |
JovanEps | 11:4e700bbf93d7 | 693 | |
JovanEps | 11:4e700bbf93d7 | 694 | nm1 = n - 1; |
JovanEps | 11:4e700bbf93d7 | 695 | if (job == 0) { |
JovanEps | 11:4e700bbf93d7 | 696 | |
JovanEps | 11:4e700bbf93d7 | 697 | /* job = 0 , solve a * x = b |
JovanEps | 11:4e700bbf93d7 | 698 | first solve l*y = b */ |
JovanEps | 11:4e700bbf93d7 | 699 | |
JovanEps | 11:4e700bbf93d7 | 700 | if (nm1 >= 1) { |
JovanEps | 11:4e700bbf93d7 | 701 | for (k = 0; k < nm1; k++) { |
JovanEps | 11:4e700bbf93d7 | 702 | l = ipvt[k]; |
JovanEps | 11:4e700bbf93d7 | 703 | t = b[l]; |
JovanEps | 11:4e700bbf93d7 | 704 | if (l != k) { |
JovanEps | 11:4e700bbf93d7 | 705 | b[l] = b[k]; |
JovanEps | 11:4e700bbf93d7 | 706 | b[k] = t; |
JovanEps | 11:4e700bbf93d7 | 707 | } |
JovanEps | 11:4e700bbf93d7 | 708 | daxpy(n-(k+1),t,&a[lda*k+k+1],1,&b[k+1],1 ); |
JovanEps | 11:4e700bbf93d7 | 709 | } |
JovanEps | 11:4e700bbf93d7 | 710 | } |
JovanEps | 11:4e700bbf93d7 | 711 | |
JovanEps | 11:4e700bbf93d7 | 712 | /* now solve u*x = y */ |
JovanEps | 11:4e700bbf93d7 | 713 | |
JovanEps | 11:4e700bbf93d7 | 714 | for (kb = 0; kb < n; kb++) { |
JovanEps | 11:4e700bbf93d7 | 715 | k = n - (kb + 1); |
JovanEps | 11:4e700bbf93d7 | 716 | b[k] = b[k]/a[lda*k+k]; |
JovanEps | 11:4e700bbf93d7 | 717 | t = -b[k]; |
JovanEps | 11:4e700bbf93d7 | 718 | daxpy(k,t,&a[lda*k+0],1,&b[0],1 ); |
JovanEps | 11:4e700bbf93d7 | 719 | } |
JovanEps | 11:4e700bbf93d7 | 720 | } else { |
JovanEps | 11:4e700bbf93d7 | 721 | |
JovanEps | 11:4e700bbf93d7 | 722 | /* job = nonzero, solve trans(a) * x = b |
JovanEps | 11:4e700bbf93d7 | 723 | first solve trans(u)*y = b */ |
JovanEps | 11:4e700bbf93d7 | 724 | |
JovanEps | 11:4e700bbf93d7 | 725 | for (k = 0; k < n; k++) { |
JovanEps | 11:4e700bbf93d7 | 726 | t = ddot(k,&a[lda*k+0],1,&b[0],1); |
JovanEps | 11:4e700bbf93d7 | 727 | b[k] = (b[k] - t)/a[lda*k+k]; |
JovanEps | 11:4e700bbf93d7 | 728 | } |
JovanEps | 11:4e700bbf93d7 | 729 | |
JovanEps | 11:4e700bbf93d7 | 730 | /* now solve trans(l)*x = y */ |
JovanEps | 11:4e700bbf93d7 | 731 | |
JovanEps | 11:4e700bbf93d7 | 732 | if (nm1 >= 1) { |
JovanEps | 11:4e700bbf93d7 | 733 | for (kb = 1; kb < nm1; kb++) { |
JovanEps | 11:4e700bbf93d7 | 734 | k = n - (kb+1); |
JovanEps | 11:4e700bbf93d7 | 735 | b[k] = b[k] + ddot(n-(k+1),&a[lda*k+k+1],1,&b[k+1],1); |
JovanEps | 11:4e700bbf93d7 | 736 | l = ipvt[k]; |
JovanEps | 11:4e700bbf93d7 | 737 | if (l != k) { |
JovanEps | 11:4e700bbf93d7 | 738 | t = b[l]; |
JovanEps | 11:4e700bbf93d7 | 739 | b[l] = b[k]; |
JovanEps | 11:4e700bbf93d7 | 740 | b[k] = t; |
JovanEps | 11:4e700bbf93d7 | 741 | } |
JovanEps | 11:4e700bbf93d7 | 742 | } |
JovanEps | 11:4e700bbf93d7 | 743 | } |
JovanEps | 11:4e700bbf93d7 | 744 | } |
JovanEps | 11:4e700bbf93d7 | 745 | return; |
JovanEps | 11:4e700bbf93d7 | 746 | } |
JovanEps | 11:4e700bbf93d7 | 747 | |
JovanEps | 11:4e700bbf93d7 | 748 | /*----------------------*/ |
JovanEps | 11:4e700bbf93d7 | 749 | |
JovanEps | 11:4e700bbf93d7 | 750 | void daxpy(int n, REAL da, REAL dx[], int incx, REAL dy[], int incy) |
JovanEps | 11:4e700bbf93d7 | 751 | /* |
JovanEps | 11:4e700bbf93d7 | 752 | constant times a vector plus a vector. |
JovanEps | 11:4e700bbf93d7 | 753 | jack dongarra, linpack, 3/11/78. |
JovanEps | 11:4e700bbf93d7 | 754 | */ |
JovanEps | 11:4e700bbf93d7 | 755 | |
JovanEps | 11:4e700bbf93d7 | 756 | { |
JovanEps | 11:4e700bbf93d7 | 757 | int i,ix,iy,m,mp1; |
JovanEps | 11:4e700bbf93d7 | 758 | |
JovanEps | 11:4e700bbf93d7 | 759 | mp1 = 0; |
JovanEps | 11:4e700bbf93d7 | 760 | m = 0; |
JovanEps | 11:4e700bbf93d7 | 761 | |
JovanEps | 11:4e700bbf93d7 | 762 | if(n <= 0) return; |
JovanEps | 11:4e700bbf93d7 | 763 | if (da == ZERO) return; |
JovanEps | 11:4e700bbf93d7 | 764 | |
JovanEps | 11:4e700bbf93d7 | 765 | if(incx != 1 || incy != 1) { |
JovanEps | 11:4e700bbf93d7 | 766 | |
JovanEps | 11:4e700bbf93d7 | 767 | /* code for unequal increments or equal increments |
JovanEps | 11:4e700bbf93d7 | 768 | not equal to 1 */ |
JovanEps | 11:4e700bbf93d7 | 769 | |
JovanEps | 11:4e700bbf93d7 | 770 | ix = 0; |
JovanEps | 11:4e700bbf93d7 | 771 | iy = 0; |
JovanEps | 11:4e700bbf93d7 | 772 | if(incx < 0) ix = (-n+1)*incx; |
JovanEps | 11:4e700bbf93d7 | 773 | if(incy < 0)iy = (-n+1)*incy; |
JovanEps | 11:4e700bbf93d7 | 774 | for (i = 0; i < n; i++) { |
JovanEps | 11:4e700bbf93d7 | 775 | dy[iy] = dy[iy] + da*dx[ix]; |
JovanEps | 11:4e700bbf93d7 | 776 | ix = ix + incx; |
JovanEps | 11:4e700bbf93d7 | 777 | iy = iy + incy; |
JovanEps | 11:4e700bbf93d7 | 778 | |
JovanEps | 11:4e700bbf93d7 | 779 | } |
JovanEps | 11:4e700bbf93d7 | 780 | return; |
JovanEps | 11:4e700bbf93d7 | 781 | } |
JovanEps | 11:4e700bbf93d7 | 782 | |
JovanEps | 11:4e700bbf93d7 | 783 | /* code for both increments equal to 1 */ |
JovanEps | 11:4e700bbf93d7 | 784 | |
JovanEps | 11:4e700bbf93d7 | 785 | |
JovanEps | 11:4e700bbf93d7 | 786 | #ifdef ROLL |
JovanEps | 11:4e700bbf93d7 | 787 | |
JovanEps | 11:4e700bbf93d7 | 788 | for (i = 0; i < n; i++) { |
JovanEps | 11:4e700bbf93d7 | 789 | dy[i] = dy[i] + da*dx[i]; |
JovanEps | 11:4e700bbf93d7 | 790 | } |
JovanEps | 11:4e700bbf93d7 | 791 | |
JovanEps | 11:4e700bbf93d7 | 792 | |
JovanEps | 11:4e700bbf93d7 | 793 | #endif |
JovanEps | 11:4e700bbf93d7 | 794 | |
JovanEps | 11:4e700bbf93d7 | 795 | #ifdef UNROLL |
JovanEps | 11:4e700bbf93d7 | 796 | |
JovanEps | 11:4e700bbf93d7 | 797 | m = n % 4; |
JovanEps | 11:4e700bbf93d7 | 798 | if ( m != 0) { |
JovanEps | 11:4e700bbf93d7 | 799 | for (i = 0; i < m; i++) |
JovanEps | 11:4e700bbf93d7 | 800 | dy[i] = dy[i] + da*dx[i]; |
JovanEps | 11:4e700bbf93d7 | 801 | |
JovanEps | 11:4e700bbf93d7 | 802 | if (n < 4) return; |
JovanEps | 11:4e700bbf93d7 | 803 | } |
JovanEps | 11:4e700bbf93d7 | 804 | for (i = m; i < n; i = i + 4) { |
JovanEps | 11:4e700bbf93d7 | 805 | dy[i] = dy[i] + da*dx[i]; |
JovanEps | 11:4e700bbf93d7 | 806 | dy[i+1] = dy[i+1] + da*dx[i+1]; |
JovanEps | 11:4e700bbf93d7 | 807 | dy[i+2] = dy[i+2] + da*dx[i+2]; |
JovanEps | 11:4e700bbf93d7 | 808 | dy[i+3] = dy[i+3] + da*dx[i+3]; |
JovanEps | 11:4e700bbf93d7 | 809 | |
JovanEps | 11:4e700bbf93d7 | 810 | } |
JovanEps | 11:4e700bbf93d7 | 811 | |
JovanEps | 11:4e700bbf93d7 | 812 | #endif |
JovanEps | 11:4e700bbf93d7 | 813 | return; |
JovanEps | 11:4e700bbf93d7 | 814 | } |
JovanEps | 11:4e700bbf93d7 | 815 | |
JovanEps | 11:4e700bbf93d7 | 816 | /*----------------------*/ |
JovanEps | 11:4e700bbf93d7 | 817 | |
JovanEps | 11:4e700bbf93d7 | 818 | REAL ddot(int n, REAL dx[], int incx, REAL dy[], int incy) |
JovanEps | 11:4e700bbf93d7 | 819 | /* |
JovanEps | 11:4e700bbf93d7 | 820 | forms the dot product of two vectors. |
JovanEps | 11:4e700bbf93d7 | 821 | jack dongarra, linpack, 3/11/78. |
JovanEps | 11:4e700bbf93d7 | 822 | */ |
JovanEps | 11:4e700bbf93d7 | 823 | |
JovanEps | 11:4e700bbf93d7 | 824 | { |
JovanEps | 11:4e700bbf93d7 | 825 | REAL dtemp; |
JovanEps | 11:4e700bbf93d7 | 826 | int i,ix,iy,m,mp1; |
JovanEps | 11:4e700bbf93d7 | 827 | |
JovanEps | 11:4e700bbf93d7 | 828 | mp1 = 0; |
JovanEps | 11:4e700bbf93d7 | 829 | m = 0; |
JovanEps | 11:4e700bbf93d7 | 830 | |
JovanEps | 11:4e700bbf93d7 | 831 | dtemp = ZERO; |
JovanEps | 11:4e700bbf93d7 | 832 | |
JovanEps | 11:4e700bbf93d7 | 833 | if(n <= 0) return(ZERO); |
JovanEps | 11:4e700bbf93d7 | 834 | |
JovanEps | 11:4e700bbf93d7 | 835 | if(incx != 1 || incy != 1) { |
JovanEps | 11:4e700bbf93d7 | 836 | |
JovanEps | 11:4e700bbf93d7 | 837 | /* code for unequal increments or equal increments |
JovanEps | 11:4e700bbf93d7 | 838 | not equal to 1 */ |
JovanEps | 11:4e700bbf93d7 | 839 | |
JovanEps | 11:4e700bbf93d7 | 840 | ix = 0; |
JovanEps | 11:4e700bbf93d7 | 841 | iy = 0; |
JovanEps | 11:4e700bbf93d7 | 842 | if (incx < 0) ix = (-n+1)*incx; |
JovanEps | 11:4e700bbf93d7 | 843 | if (incy < 0) iy = (-n+1)*incy; |
JovanEps | 11:4e700bbf93d7 | 844 | for (i = 0; i < n; i++) { |
JovanEps | 11:4e700bbf93d7 | 845 | dtemp = dtemp + dx[ix]*dy[iy]; |
JovanEps | 11:4e700bbf93d7 | 846 | ix = ix + incx; |
JovanEps | 11:4e700bbf93d7 | 847 | iy = iy + incy; |
JovanEps | 11:4e700bbf93d7 | 848 | |
JovanEps | 11:4e700bbf93d7 | 849 | } |
JovanEps | 11:4e700bbf93d7 | 850 | return(dtemp); |
JovanEps | 11:4e700bbf93d7 | 851 | } |
JovanEps | 11:4e700bbf93d7 | 852 | |
JovanEps | 11:4e700bbf93d7 | 853 | /* code for both increments equal to 1 */ |
JovanEps | 11:4e700bbf93d7 | 854 | |
JovanEps | 11:4e700bbf93d7 | 855 | |
JovanEps | 11:4e700bbf93d7 | 856 | #ifdef ROLL |
JovanEps | 11:4e700bbf93d7 | 857 | |
JovanEps | 11:4e700bbf93d7 | 858 | for (i=0; i < n; i++) |
JovanEps | 11:4e700bbf93d7 | 859 | dtemp = dtemp + dx[i]*dy[i]; |
JovanEps | 11:4e700bbf93d7 | 860 | |
JovanEps | 11:4e700bbf93d7 | 861 | return(dtemp); |
JovanEps | 11:4e700bbf93d7 | 862 | |
JovanEps | 11:4e700bbf93d7 | 863 | #endif |
JovanEps | 0:43b96e9650ef | 864 | |
JovanEps | 11:4e700bbf93d7 | 865 | #ifdef UNROLL |
JovanEps | 11:4e700bbf93d7 | 866 | |
JovanEps | 11:4e700bbf93d7 | 867 | |
JovanEps | 11:4e700bbf93d7 | 868 | m = n % 5; |
JovanEps | 11:4e700bbf93d7 | 869 | if (m != 0) { |
JovanEps | 11:4e700bbf93d7 | 870 | for (i = 0; i < m; i++) |
JovanEps | 11:4e700bbf93d7 | 871 | dtemp = dtemp + dx[i]*dy[i]; |
JovanEps | 11:4e700bbf93d7 | 872 | if (n < 5) return(dtemp); |
JovanEps | 11:4e700bbf93d7 | 873 | } |
JovanEps | 11:4e700bbf93d7 | 874 | for (i = m; i < n; i = i + 5) { |
JovanEps | 11:4e700bbf93d7 | 875 | dtemp = dtemp + dx[i]*dy[i] + |
JovanEps | 11:4e700bbf93d7 | 876 | dx[i+1]*dy[i+1] + dx[i+2]*dy[i+2] + |
JovanEps | 11:4e700bbf93d7 | 877 | dx[i+3]*dy[i+3] + dx[i+4]*dy[i+4]; |
JovanEps | 11:4e700bbf93d7 | 878 | } |
JovanEps | 11:4e700bbf93d7 | 879 | return(dtemp); |
JovanEps | 11:4e700bbf93d7 | 880 | |
JovanEps | 11:4e700bbf93d7 | 881 | #endif |
JovanEps | 11:4e700bbf93d7 | 882 | |
JovanEps | 11:4e700bbf93d7 | 883 | } |
JovanEps | 11:4e700bbf93d7 | 884 | |
JovanEps | 11:4e700bbf93d7 | 885 | /*----------------------*/ |
JovanEps | 11:4e700bbf93d7 | 886 | void dscal(int n, REAL da, REAL dx[], int incx) |
JovanEps | 11:4e700bbf93d7 | 887 | |
JovanEps | 11:4e700bbf93d7 | 888 | /* scales a vector by a constant. |
JovanEps | 11:4e700bbf93d7 | 889 | jack dongarra, linpack, 3/11/78. |
JovanEps | 11:4e700bbf93d7 | 890 | */ |
JovanEps | 11:4e700bbf93d7 | 891 | |
JovanEps | 11:4e700bbf93d7 | 892 | { |
JovanEps | 11:4e700bbf93d7 | 893 | int i,m,mp1,nincx; |
JovanEps | 11:4e700bbf93d7 | 894 | |
JovanEps | 11:4e700bbf93d7 | 895 | mp1 = 0; |
JovanEps | 11:4e700bbf93d7 | 896 | m = 0; |
JovanEps | 11:4e700bbf93d7 | 897 | |
JovanEps | 11:4e700bbf93d7 | 898 | if(n <= 0)return; |
JovanEps | 11:4e700bbf93d7 | 899 | if(incx != 1) { |
JovanEps | 11:4e700bbf93d7 | 900 | |
JovanEps | 11:4e700bbf93d7 | 901 | /* code for increment not equal to 1 */ |
JovanEps | 11:4e700bbf93d7 | 902 | |
JovanEps | 11:4e700bbf93d7 | 903 | nincx = n*incx; |
JovanEps | 11:4e700bbf93d7 | 904 | for (i = 0; i < nincx; i = i + incx) |
JovanEps | 11:4e700bbf93d7 | 905 | dx[i] = da*dx[i]; |
JovanEps | 11:4e700bbf93d7 | 906 | |
JovanEps | 11:4e700bbf93d7 | 907 | return; |
JovanEps | 11:4e700bbf93d7 | 908 | } |
JovanEps | 11:4e700bbf93d7 | 909 | |
JovanEps | 11:4e700bbf93d7 | 910 | /* code for increment equal to 1 */ |
JovanEps | 11:4e700bbf93d7 | 911 | |
JovanEps | 11:4e700bbf93d7 | 912 | |
JovanEps | 11:4e700bbf93d7 | 913 | #ifdef ROLL |
JovanEps | 11:4e700bbf93d7 | 914 | |
JovanEps | 11:4e700bbf93d7 | 915 | for (i = 0; i < n; i++) |
JovanEps | 11:4e700bbf93d7 | 916 | dx[i] = da*dx[i]; |
JovanEps | 11:4e700bbf93d7 | 917 | |
JovanEps | 11:4e700bbf93d7 | 918 | |
JovanEps | 11:4e700bbf93d7 | 919 | #endif |
JovanEps | 11:4e700bbf93d7 | 920 | |
JovanEps | 11:4e700bbf93d7 | 921 | #ifdef UNROLL |
JovanEps | 11:4e700bbf93d7 | 922 | |
JovanEps | 11:4e700bbf93d7 | 923 | |
JovanEps | 11:4e700bbf93d7 | 924 | m = n % 5; |
JovanEps | 11:4e700bbf93d7 | 925 | if (m != 0) { |
JovanEps | 11:4e700bbf93d7 | 926 | for (i = 0; i < m; i++) |
JovanEps | 11:4e700bbf93d7 | 927 | dx[i] = da*dx[i]; |
JovanEps | 11:4e700bbf93d7 | 928 | if (n < 5) return; |
JovanEps | 11:4e700bbf93d7 | 929 | } |
JovanEps | 11:4e700bbf93d7 | 930 | for (i = m; i < n; i = i + 5) { |
JovanEps | 11:4e700bbf93d7 | 931 | dx[i] = da*dx[i]; |
JovanEps | 11:4e700bbf93d7 | 932 | dx[i+1] = da*dx[i+1]; |
JovanEps | 11:4e700bbf93d7 | 933 | dx[i+2] = da*dx[i+2]; |
JovanEps | 11:4e700bbf93d7 | 934 | dx[i+3] = da*dx[i+3]; |
JovanEps | 11:4e700bbf93d7 | 935 | dx[i+4] = da*dx[i+4]; |
JovanEps | 11:4e700bbf93d7 | 936 | } |
JovanEps | 11:4e700bbf93d7 | 937 | |
JovanEps | 11:4e700bbf93d7 | 938 | #endif |
JovanEps | 11:4e700bbf93d7 | 939 | |
JovanEps | 11:4e700bbf93d7 | 940 | } |
JovanEps | 11:4e700bbf93d7 | 941 | |
JovanEps | 11:4e700bbf93d7 | 942 | /*----------------------*/ |
JovanEps | 11:4e700bbf93d7 | 943 | int idamax(int n, REAL dx[], int incx) |
JovanEps | 11:4e700bbf93d7 | 944 | |
JovanEps | 11:4e700bbf93d7 | 945 | /* |
JovanEps | 11:4e700bbf93d7 | 946 | finds the index of element having max. absolute value. |
JovanEps | 11:4e700bbf93d7 | 947 | jack dongarra, linpack, 3/11/78. |
JovanEps | 11:4e700bbf93d7 | 948 | */ |
JovanEps | 11:4e700bbf93d7 | 949 | |
JovanEps | 11:4e700bbf93d7 | 950 | |
JovanEps | 11:4e700bbf93d7 | 951 | { |
JovanEps | 11:4e700bbf93d7 | 952 | REAL dmax; |
JovanEps | 11:4e700bbf93d7 | 953 | int i, ix, itemp; |
JovanEps | 11:4e700bbf93d7 | 954 | |
JovanEps | 11:4e700bbf93d7 | 955 | if( n < 1 ) return(-1); |
JovanEps | 11:4e700bbf93d7 | 956 | if(n ==1 ) return(0); |
JovanEps | 11:4e700bbf93d7 | 957 | if(incx != 1) { |
JovanEps | 11:4e700bbf93d7 | 958 | |
JovanEps | 11:4e700bbf93d7 | 959 | /* code for increment not equal to 1 */ |
JovanEps | 11:4e700bbf93d7 | 960 | |
JovanEps | 11:4e700bbf93d7 | 961 | ix = 1; |
JovanEps | 11:4e700bbf93d7 | 962 | dmax = fabs((double)dx[0]); |
JovanEps | 11:4e700bbf93d7 | 963 | ix = ix + incx; |
JovanEps | 11:4e700bbf93d7 | 964 | for (i = 1; i < n; i++) { |
JovanEps | 11:4e700bbf93d7 | 965 | if(fabs((double)dx[ix]) > dmax) { |
JovanEps | 11:4e700bbf93d7 | 966 | itemp = i; |
JovanEps | 11:4e700bbf93d7 | 967 | dmax = fabs((double)dx[ix]); |
JovanEps | 11:4e700bbf93d7 | 968 | } |
JovanEps | 11:4e700bbf93d7 | 969 | ix = ix + incx; |
JovanEps | 11:4e700bbf93d7 | 970 | } |
JovanEps | 11:4e700bbf93d7 | 971 | } else { |
JovanEps | 11:4e700bbf93d7 | 972 | |
JovanEps | 11:4e700bbf93d7 | 973 | /* code for increment equal to 1 */ |
JovanEps | 11:4e700bbf93d7 | 974 | |
JovanEps | 11:4e700bbf93d7 | 975 | itemp = 0; |
JovanEps | 11:4e700bbf93d7 | 976 | dmax = fabs((double)dx[0]); |
JovanEps | 11:4e700bbf93d7 | 977 | for (i = 1; i < n; i++) { |
JovanEps | 11:4e700bbf93d7 | 978 | if(fabs((double)dx[i]) > dmax) { |
JovanEps | 11:4e700bbf93d7 | 979 | itemp = i; |
JovanEps | 11:4e700bbf93d7 | 980 | dmax = fabs((double)dx[i]); |
JovanEps | 11:4e700bbf93d7 | 981 | } |
JovanEps | 11:4e700bbf93d7 | 982 | } |
JovanEps | 11:4e700bbf93d7 | 983 | } |
JovanEps | 11:4e700bbf93d7 | 984 | return (itemp); |
JovanEps | 11:4e700bbf93d7 | 985 | } |
JovanEps | 11:4e700bbf93d7 | 986 | |
JovanEps | 11:4e700bbf93d7 | 987 | /*----------------------*/ |
JovanEps | 11:4e700bbf93d7 | 988 | REAL epslon (REAL x) |
JovanEps | 11:4e700bbf93d7 | 989 | |
JovanEps | 11:4e700bbf93d7 | 990 | /* |
JovanEps | 11:4e700bbf93d7 | 991 | estimate unit roundoff in quantities of size x. |
JovanEps | 11:4e700bbf93d7 | 992 | */ |
JovanEps | 0:43b96e9650ef | 993 | |
JovanEps | 11:4e700bbf93d7 | 994 | { |
JovanEps | 11:4e700bbf93d7 | 995 | REAL a,b,c,eps; |
JovanEps | 11:4e700bbf93d7 | 996 | /* |
JovanEps | 11:4e700bbf93d7 | 997 | this program should function properly on all systems |
JovanEps | 11:4e700bbf93d7 | 998 | satisfying the following two assumptions, |
JovanEps | 11:4e700bbf93d7 | 999 | 1. the base used in representing dfloating point |
JovanEps | 11:4e700bbf93d7 | 1000 | numbers is not a power of three. |
JovanEps | 11:4e700bbf93d7 | 1001 | 2. the quantity a in statement 10 is represented to |
JovanEps | 11:4e700bbf93d7 | 1002 | the accuracy used in dfloating point variables |
JovanEps | 11:4e700bbf93d7 | 1003 | that are stored in memory. |
JovanEps | 11:4e700bbf93d7 | 1004 | the statement number 10 and the go to 10 are intended to |
JovanEps | 11:4e700bbf93d7 | 1005 | force optimizing compilers to generate code satisfying |
JovanEps | 11:4e700bbf93d7 | 1006 | assumption 2. |
JovanEps | 11:4e700bbf93d7 | 1007 | under these assumptions, it should be true that, |
JovanEps | 11:4e700bbf93d7 | 1008 | a is not exactly equal to four-thirds, |
JovanEps | 11:4e700bbf93d7 | 1009 | b has a zero for its last bit or digit, |
JovanEps | 11:4e700bbf93d7 | 1010 | c is not exactly equal to one, |
JovanEps | 11:4e700bbf93d7 | 1011 | eps measures the separation of 1.0 from |
JovanEps | 11:4e700bbf93d7 | 1012 | the next larger dfloating point number. |
JovanEps | 11:4e700bbf93d7 | 1013 | the developers of eispack would appreciate being informed |
JovanEps | 11:4e700bbf93d7 | 1014 | about any systems where these assumptions do not hold. |
JovanEps | 11:4e700bbf93d7 | 1015 | |
JovanEps | 11:4e700bbf93d7 | 1016 | ***************************************************************** |
JovanEps | 11:4e700bbf93d7 | 1017 | this routine is one of the auxiliary routines used by eispack iii |
JovanEps | 11:4e700bbf93d7 | 1018 | to avoid machine dependencies. |
JovanEps | 11:4e700bbf93d7 | 1019 | ***************************************************************** |
JovanEps | 11:4e700bbf93d7 | 1020 | |
JovanEps | 11:4e700bbf93d7 | 1021 | this version dated 4/6/83. |
JovanEps | 11:4e700bbf93d7 | 1022 | */ |
JovanEps | 11:4e700bbf93d7 | 1023 | |
JovanEps | 11:4e700bbf93d7 | 1024 | a = 4.0e0/3.0e0; |
JovanEps | 11:4e700bbf93d7 | 1025 | eps = ZERO; |
JovanEps | 11:4e700bbf93d7 | 1026 | while (eps == ZERO) { |
JovanEps | 11:4e700bbf93d7 | 1027 | b = a - ONE; |
JovanEps | 11:4e700bbf93d7 | 1028 | c = b + b + b; |
JovanEps | 11:4e700bbf93d7 | 1029 | eps = fabs((double)(c-ONE)); |
JovanEps | 11:4e700bbf93d7 | 1030 | } |
JovanEps | 11:4e700bbf93d7 | 1031 | return(eps*fabs((double)x)); |
JovanEps | 11:4e700bbf93d7 | 1032 | } |
JovanEps | 11:4e700bbf93d7 | 1033 | |
JovanEps | 11:4e700bbf93d7 | 1034 | /*----------------------*/ |
JovanEps | 11:4e700bbf93d7 | 1035 | void dmxpy (int n1, REAL y[], int n2, int ldm, REAL x[], REAL m[]) |
JovanEps | 11:4e700bbf93d7 | 1036 | |
JovanEps | 11:4e700bbf93d7 | 1037 | |
JovanEps | 11:4e700bbf93d7 | 1038 | /* We would like to declare m[][ldm], but c does not allow it. In this |
JovanEps | 11:4e700bbf93d7 | 1039 | function, references to m[i][j] are written m[ldm*i+j]. */ |
JovanEps | 11:4e700bbf93d7 | 1040 | |
JovanEps | 11:4e700bbf93d7 | 1041 | /* |
JovanEps | 11:4e700bbf93d7 | 1042 | purpose: |
JovanEps | 11:4e700bbf93d7 | 1043 | multiply matrix m times vector x and add the result to vector y. |
JovanEps | 11:4e700bbf93d7 | 1044 | |
JovanEps | 11:4e700bbf93d7 | 1045 | parameters: |
JovanEps | 11:4e700bbf93d7 | 1046 | |
JovanEps | 11:4e700bbf93d7 | 1047 | n1 integer, number of elements in vector y, and number of rows in |
JovanEps | 11:4e700bbf93d7 | 1048 | matrix m |
JovanEps | 11:4e700bbf93d7 | 1049 | |
JovanEps | 11:4e700bbf93d7 | 1050 | y double [n1], vector of length n1 to which is added |
JovanEps | 11:4e700bbf93d7 | 1051 | the product m*x |
JovanEps | 11:4e700bbf93d7 | 1052 | |
JovanEps | 11:4e700bbf93d7 | 1053 | n2 integer, number of elements in vector x, and number of columns |
JovanEps | 11:4e700bbf93d7 | 1054 | in matrix m |
JovanEps | 11:4e700bbf93d7 | 1055 | |
JovanEps | 11:4e700bbf93d7 | 1056 | ldm integer, leading dimension of array m |
JovanEps | 11:4e700bbf93d7 | 1057 | |
JovanEps | 11:4e700bbf93d7 | 1058 | x double [n2], vector of length n2 |
JovanEps | 11:4e700bbf93d7 | 1059 | |
JovanEps | 11:4e700bbf93d7 | 1060 | m double [ldm][n2], matrix of n1 rows and n2 columns |
JovanEps | 0:43b96e9650ef | 1061 | |
JovanEps | 11:4e700bbf93d7 | 1062 | ---------------------------------------------------------------------- |
JovanEps | 11:4e700bbf93d7 | 1063 | */ |
JovanEps | 11:4e700bbf93d7 | 1064 | { |
JovanEps | 11:4e700bbf93d7 | 1065 | int j,i,jmin; |
JovanEps | 11:4e700bbf93d7 | 1066 | /* cleanup odd vector */ |
JovanEps | 11:4e700bbf93d7 | 1067 | |
JovanEps | 11:4e700bbf93d7 | 1068 | j = n2 % 2; |
JovanEps | 11:4e700bbf93d7 | 1069 | if (j >= 1) { |
JovanEps | 11:4e700bbf93d7 | 1070 | j = j - 1; |
JovanEps | 11:4e700bbf93d7 | 1071 | for (i = 0; i < n1; i++) |
JovanEps | 11:4e700bbf93d7 | 1072 | y[i] = (y[i]) + x[j]*m[ldm*j+i]; |
JovanEps | 11:4e700bbf93d7 | 1073 | } |
JovanEps | 11:4e700bbf93d7 | 1074 | |
JovanEps | 11:4e700bbf93d7 | 1075 | /* cleanup odd group of two vectors */ |
JovanEps | 11:4e700bbf93d7 | 1076 | |
JovanEps | 11:4e700bbf93d7 | 1077 | j = n2 % 4; |
JovanEps | 11:4e700bbf93d7 | 1078 | if (j >= 2) { |
JovanEps | 11:4e700bbf93d7 | 1079 | j = j - 1; |
JovanEps | 11:4e700bbf93d7 | 1080 | for (i = 0; i < n1; i++) |
JovanEps | 11:4e700bbf93d7 | 1081 | y[i] = ( (y[i]) |
JovanEps | 11:4e700bbf93d7 | 1082 | + x[j-1]*m[ldm*(j-1)+i]) + x[j]*m[ldm*j+i]; |
JovanEps | 11:4e700bbf93d7 | 1083 | } |
JovanEps | 11:4e700bbf93d7 | 1084 | |
JovanEps | 11:4e700bbf93d7 | 1085 | /* cleanup odd group of four vectors */ |
JovanEps | 11:4e700bbf93d7 | 1086 | |
JovanEps | 11:4e700bbf93d7 | 1087 | j = n2 % 8; |
JovanEps | 11:4e700bbf93d7 | 1088 | if (j >= 4) { |
JovanEps | 11:4e700bbf93d7 | 1089 | j = j - 1; |
JovanEps | 11:4e700bbf93d7 | 1090 | for (i = 0; i < n1; i++) |
JovanEps | 11:4e700bbf93d7 | 1091 | y[i] = ((( (y[i]) |
JovanEps | 11:4e700bbf93d7 | 1092 | + x[j-3]*m[ldm*(j-3)+i]) |
JovanEps | 11:4e700bbf93d7 | 1093 | + x[j-2]*m[ldm*(j-2)+i]) |
JovanEps | 11:4e700bbf93d7 | 1094 | + x[j-1]*m[ldm*(j-1)+i]) + x[j]*m[ldm*j+i]; |
JovanEps | 11:4e700bbf93d7 | 1095 | } |
JovanEps | 11:4e700bbf93d7 | 1096 | |
JovanEps | 11:4e700bbf93d7 | 1097 | /* cleanup odd group of eight vectors */ |
JovanEps | 0:43b96e9650ef | 1098 | |
JovanEps | 11:4e700bbf93d7 | 1099 | j = n2 % 16; |
JovanEps | 11:4e700bbf93d7 | 1100 | if (j >= 8) { |
JovanEps | 11:4e700bbf93d7 | 1101 | j = j - 1; |
JovanEps | 11:4e700bbf93d7 | 1102 | for (i = 0; i < n1; i++) |
JovanEps | 11:4e700bbf93d7 | 1103 | y[i] = ((((((( (y[i]) |
JovanEps | 11:4e700bbf93d7 | 1104 | + x[j-7]*m[ldm*(j-7)+i]) + x[j-6]*m[ldm*(j-6)+i]) |
JovanEps | 11:4e700bbf93d7 | 1105 | + x[j-5]*m[ldm*(j-5)+i]) + x[j-4]*m[ldm*(j-4)+i]) |
JovanEps | 11:4e700bbf93d7 | 1106 | + x[j-3]*m[ldm*(j-3)+i]) + x[j-2]*m[ldm*(j-2)+i]) |
JovanEps | 11:4e700bbf93d7 | 1107 | + x[j-1]*m[ldm*(j-1)+i]) + x[j] *m[ldm*j+i]; |
JovanEps | 11:4e700bbf93d7 | 1108 | } |
JovanEps | 11:4e700bbf93d7 | 1109 | |
JovanEps | 11:4e700bbf93d7 | 1110 | /* main loop - groups of sixteen vectors */ |
JovanEps | 11:4e700bbf93d7 | 1111 | |
JovanEps | 11:4e700bbf93d7 | 1112 | jmin = (n2%16)+16; |
JovanEps | 11:4e700bbf93d7 | 1113 | for (j = jmin-1; j < n2; j = j + 16) { |
JovanEps | 11:4e700bbf93d7 | 1114 | for (i = 0; i < n1; i++) |
JovanEps | 11:4e700bbf93d7 | 1115 | y[i] = ((((((((((((((( (y[i]) |
JovanEps | 11:4e700bbf93d7 | 1116 | + x[j-15]*m[ldm*(j-15)+i]) |
JovanEps | 11:4e700bbf93d7 | 1117 | + x[j-14]*m[ldm*(j-14)+i]) |
JovanEps | 11:4e700bbf93d7 | 1118 | + x[j-13]*m[ldm*(j-13)+i]) |
JovanEps | 11:4e700bbf93d7 | 1119 | + x[j-12]*m[ldm*(j-12)+i]) |
JovanEps | 11:4e700bbf93d7 | 1120 | + x[j-11]*m[ldm*(j-11)+i]) |
JovanEps | 11:4e700bbf93d7 | 1121 | + x[j-10]*m[ldm*(j-10)+i]) |
JovanEps | 11:4e700bbf93d7 | 1122 | + x[j- 9]*m[ldm*(j- 9)+i]) |
JovanEps | 11:4e700bbf93d7 | 1123 | + x[j- 8]*m[ldm*(j- 8)+i]) |
JovanEps | 11:4e700bbf93d7 | 1124 | + x[j- 7]*m[ldm*(j- 7)+i]) |
JovanEps | 11:4e700bbf93d7 | 1125 | + x[j- 6]*m[ldm*(j- 6)+i]) |
JovanEps | 11:4e700bbf93d7 | 1126 | + x[j- 5]*m[ldm*(j- 5)+i]) |
JovanEps | 11:4e700bbf93d7 | 1127 | + x[j- 4]*m[ldm*(j- 4)+i]) |
JovanEps | 11:4e700bbf93d7 | 1128 | + x[j- 3]*m[ldm*(j- 3)+i]) |
JovanEps | 11:4e700bbf93d7 | 1129 | + x[j- 2]*m[ldm*(j- 2)+i]) |
JovanEps | 11:4e700bbf93d7 | 1130 | + x[j- 1]*m[ldm*(j- 1)+i]) |
JovanEps | 11:4e700bbf93d7 | 1131 | + x[j] *m[ldm*j+i]; |
JovanEps | 11:4e700bbf93d7 | 1132 | } |
JovanEps | 11:4e700bbf93d7 | 1133 | return; |
JovanEps | 11:4e700bbf93d7 | 1134 | } |
JovanEps | 11:4e700bbf93d7 | 1135 | //*---------------------------------------- |
JovanEps | 2:273153e44338 | 1136 | int main() |
JovanEps | 0:43b96e9650ef | 1137 | { |
JovanEps | 11:4e700bbf93d7 | 1138 | pc.baud(9600); |
JovanEps | 6:5e0f3eedaf66 | 1139 | while(1) { |
JovanEps | 6:5e0f3eedaf66 | 1140 | |
JovanEps | 2:273153e44338 | 1141 | pc.printf("Starting benchmark...\n"); |
JovanEps | 6:5e0f3eedaf66 | 1142 | |
JovanEps | 2:273153e44338 | 1143 | do_benchmark(); |
JovanEps | 6:5e0f3eedaf66 | 1144 | |
JovanEps | 2:273153e44338 | 1145 | pc.printf(" kraj \n\n"); |
JovanEps | 2:273153e44338 | 1146 | } |
JovanEps | 2:273153e44338 | 1147 | } |