Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
main.cpp@11:4e700bbf93d7, 2017-01-18 (annotated)
- Committer:
- JovanEps
- Date:
- Wed Jan 18 23:35:49 2017 +0000
- Revision:
- 11:4e700bbf93d7
- Parent:
- 8:66f6deeb2556
- Child:
- 12:623ce727d4fa
RC Alpha
Who changed what in which revision?
| User | Revision | Line number | New contents of line |
|---|---|---|---|
| JovanEps | 0:43b96e9650ef | 1 | //******************************************************** |
| JovanEps | 11:4e700bbf93d7 | 2 | //** RC Apha --- based on roylongbottom.org ARM ****** |
| JovanEps | 11:4e700bbf93d7 | 3 | //** Nucleo-64-144 Stm32F103 to F767 benchmark ****** |
| JovanEps | 11:4e700bbf93d7 | 4 | //** Limpack -port form Arduino IDE ****** |
| JovanEps | 11:4e700bbf93d7 | 5 | //** Jovan Ivkovic - 2017 ****** |
| JovanEps | 0:43b96e9650ef | 6 | //******************************************************** |
| JovanEps | 11:4e700bbf93d7 | 7 | #define GCCARMDP |
| JovanEps | 11:4e700bbf93d7 | 8 | |
| JovanEps | 11:4e700bbf93d7 | 9 | #define UNROLL |
| JovanEps | 11:4e700bbf93d7 | 10 | #define DP |
| JovanEps | 11:4e700bbf93d7 | 11 | |
| JovanEps | 11:4e700bbf93d7 | 12 | #ifdef SP |
| JovanEps | 11:4e700bbf93d7 | 13 | #define REAL float |
| JovanEps | 11:4e700bbf93d7 | 14 | #define ZERO 0.0 |
| JovanEps | 11:4e700bbf93d7 | 15 | #define ONE 1.0 |
| JovanEps | 11:4e700bbf93d7 | 16 | #define PREC "Single" |
| JovanEps | 11:4e700bbf93d7 | 17 | #endif |
| JovanEps | 11:4e700bbf93d7 | 18 | |
| JovanEps | 11:4e700bbf93d7 | 19 | #ifdef DP |
| JovanEps | 11:4e700bbf93d7 | 20 | #define REAL double |
| JovanEps | 11:4e700bbf93d7 | 21 | #define ZERO 0.0e0 |
| JovanEps | 11:4e700bbf93d7 | 22 | #define ONE 1.0e0 |
| JovanEps | 11:4e700bbf93d7 | 23 | #define PREC "Double" |
| JovanEps | 11:4e700bbf93d7 | 24 | #endif |
| JovanEps | 11:4e700bbf93d7 | 25 | |
| JovanEps | 11:4e700bbf93d7 | 26 | #ifdef ROLL |
| JovanEps | 11:4e700bbf93d7 | 27 | #define ROLLING "Rolled" |
| JovanEps | 11:4e700bbf93d7 | 28 | #endif |
| JovanEps | 11:4e700bbf93d7 | 29 | #ifdef UNROLL |
| JovanEps | 11:4e700bbf93d7 | 30 | #define ROLLING "Unrolled" |
| JovanEps | 11:4e700bbf93d7 | 31 | #endif |
| JovanEps | 11:4e700bbf93d7 | 32 | |
| JovanEps | 11:4e700bbf93d7 | 33 | // VERSION |
| JovanEps | 11:4e700bbf93d7 | 34 | |
| JovanEps | 11:4e700bbf93d7 | 35 | #ifdef CNNT |
| JovanEps | 11:4e700bbf93d7 | 36 | #define options "Non-optimised" |
| JovanEps | 11:4e700bbf93d7 | 37 | #define opt "0" |
| JovanEps | 11:4e700bbf93d7 | 38 | #else |
| JovanEps | 11:4e700bbf93d7 | 39 | // #define options "Optimised" |
| JovanEps | 11:4e700bbf93d7 | 40 | // #define options "Opt 3 32 Bit" |
| JovanEps | 11:4e700bbf93d7 | 41 | #define options "vfpv4 32 Bit" |
| JovanEps | 11:4e700bbf93d7 | 42 | #define opt "1" |
| JovanEps | 11:4e700bbf93d7 | 43 | #endif |
| JovanEps | 11:4e700bbf93d7 | 44 | |
| JovanEps | 11:4e700bbf93d7 | 45 | #define NTIMES 10 |
| JovanEps | 11:4e700bbf93d7 | 46 | |
| JovanEps | 0:43b96e9650ef | 47 | #include "mbed.h" |
| JovanEps | 2:273153e44338 | 48 | |
| JovanEps | 11:4e700bbf93d7 | 49 | |
| JovanEps | 11:4e700bbf93d7 | 50 | //--------------------------------- |
| JovanEps | 11:4e700bbf93d7 | 51 | DigitalOut myled(LED1); |
| JovanEps | 11:4e700bbf93d7 | 52 | Serial pc(USBTX, USBRX); //USB is out of oreder on Embeded-Pi |
| JovanEps | 11:4e700bbf93d7 | 53 | //Serial pc(PC_10,PC_11); //RX-TX D0,D1 Embeded-PI ports |
| JovanEps | 11:4e700bbf93d7 | 54 | Timer timer; |
| JovanEps | 11:4e700bbf93d7 | 55 | //-------------------------------- |
| JovanEps | 11:4e700bbf93d7 | 56 | |
| JovanEps | 11:4e700bbf93d7 | 57 | void print_time (int row); |
| JovanEps | 11:4e700bbf93d7 | 58 | void matgen (REAL a[], int lda, int n, REAL b[], REAL *norma); |
| JovanEps | 11:4e700bbf93d7 | 59 | void dgefa (REAL a[], int lda, int n, int ipvt[], int *info); |
| JovanEps | 11:4e700bbf93d7 | 60 | void dgesl (REAL a[],int lda,int n,int ipvt[],REAL b[],int job); |
| JovanEps | 11:4e700bbf93d7 | 61 | void dmxpy (int n1, REAL y[], int n2, int ldm, REAL x[], REAL m[]); |
| JovanEps | 11:4e700bbf93d7 | 62 | void daxpy (int n, REAL da, REAL dx[], int incx, REAL dy[], int incy); |
| JovanEps | 11:4e700bbf93d7 | 63 | REAL epslon (REAL x); |
| JovanEps | 11:4e700bbf93d7 | 64 | int idamax (int n, REAL dx[], int incx); |
| JovanEps | 11:4e700bbf93d7 | 65 | void dscal (int n, REAL da, REAL dx[], int incx); |
| JovanEps | 11:4e700bbf93d7 | 66 | REAL ddot (int n, REAL dx[], int incx, REAL dy[], int incy); |
| JovanEps | 11:4e700bbf93d7 | 67 | |
| JovanEps | 11:4e700bbf93d7 | 68 | static REAL atime[9][15]; |
| JovanEps | 11:4e700bbf93d7 | 69 | double runSecs = 1; |
| JovanEps | 11:4e700bbf93d7 | 70 | |
| JovanEps | 11:4e700bbf93d7 | 71 | void do_benchmark(void){ |
| JovanEps | 11:4e700bbf93d7 | 72 | |
| JovanEps | 11:4e700bbf93d7 | 73 | float t1,t2 = 0.0; |
| JovanEps | 11:4e700bbf93d7 | 74 | static REAL aa[20*20],a[20*21],b[20],x[20]; |
| JovanEps | 11:4e700bbf93d7 | 75 | REAL cray,ops,total,norma,normx; |
| JovanEps | 11:4e700bbf93d7 | 76 | REAL resid,residn,eps,tm2,epsn,x1,x2; |
| JovanEps | 11:4e700bbf93d7 | 77 | REAL mflops; |
| JovanEps | 11:4e700bbf93d7 | 78 | static int ipvt[21],n,i,j,ntimes,info,lda,ldaa; |
| JovanEps | 11:4e700bbf93d7 | 79 | int endit, pass, loop; |
| JovanEps | 11:4e700bbf93d7 | 80 | REAL overhead1, overhead2, time2; |
| JovanEps | 11:4e700bbf93d7 | 81 | REAL max1, max2; |
| JovanEps | 11:4e700bbf93d7 | 82 | char was[5][20]; |
| JovanEps | 11:4e700bbf93d7 | 83 | char expect[5][20]; |
| JovanEps | 11:4e700bbf93d7 | 84 | char title[5][20]; |
| JovanEps | 11:4e700bbf93d7 | 85 | int errors; |
| JovanEps | 11:4e700bbf93d7 | 86 | int nopause = 1; |
| JovanEps | 11:4e700bbf93d7 | 87 | |
| JovanEps | 11:4e700bbf93d7 | 88 | /* |
| JovanEps | 11:4e700bbf93d7 | 89 | if (argc > 1) { |
| JovanEps | 11:4e700bbf93d7 | 90 | switch (argv[1][0]) { |
| JovanEps | 11:4e700bbf93d7 | 91 | case 'N': |
| JovanEps | 11:4e700bbf93d7 | 92 | nopause = 0; |
| JovanEps | 11:4e700bbf93d7 | 93 | break; |
| JovanEps | 11:4e700bbf93d7 | 94 | case 'n': |
| JovanEps | 11:4e700bbf93d7 | 95 | nopause = 0; |
| JovanEps | 11:4e700bbf93d7 | 96 | break; |
| JovanEps | 11:4e700bbf93d7 | 97 | } |
| JovanEps | 11:4e700bbf93d7 | 98 | } |
| JovanEps | 11:4e700bbf93d7 | 99 | */ |
| JovanEps | 11:4e700bbf93d7 | 100 | pc.printf("\n"); |
| JovanEps | 11:4e700bbf93d7 | 101 | |
| JovanEps | 11:4e700bbf93d7 | 102 | |
| JovanEps | 11:4e700bbf93d7 | 103 | // outfile = fopen("Linpack.txt","a+"); |
| JovanEps | 11:4e700bbf93d7 | 104 | // if (outfile == NULL) |
| JovanEps | 11:4e700bbf93d7 | 105 | // { |
| JovanEps | 11:4e700bbf93d7 | 106 | // printf (" Cannot open results file \n\n"); |
| JovanEps | 11:4e700bbf93d7 | 107 | // printf(" Press Enter\n\n"); |
| JovanEps | 11:4e700bbf93d7 | 108 | // int g = getchar(); |
| JovanEps | 11:4e700bbf93d7 | 109 | // exit (0); |
| JovanEps | 11:4e700bbf93d7 | 110 | //} |
| JovanEps | 11:4e700bbf93d7 | 111 | |
| JovanEps | 11:4e700bbf93d7 | 112 | |
| JovanEps | 11:4e700bbf93d7 | 113 | |
| JovanEps | 11:4e700bbf93d7 | 114 | #ifdef GCCARMDP |
| JovanEps | 11:4e700bbf93d7 | 115 | pc.printf(expect[0], " 1.7"); |
| JovanEps | 11:4e700bbf93d7 | 116 | pc.printf(expect[1], " 7.41628980e-14"); |
| JovanEps | 11:4e700bbf93d7 | 117 | pc.printf(expect[2], " 2.22044605e-16"); |
| JovanEps | 11:4e700bbf93d7 | 118 | pc.printf(expect[3], " -1.49880108e-14"); |
| JovanEps | 11:4e700bbf93d7 | 119 | pc.printf(expect[4], " -1.89848137e-14"); |
| JovanEps | 11:4e700bbf93d7 | 120 | #endif |
| JovanEps | 11:4e700bbf93d7 | 121 | |
| JovanEps | 11:4e700bbf93d7 | 122 | #ifdef GCCARMSP |
| JovanEps | 11:4e700bbf93d7 | 123 | pc.printf(expect[0], " 1.6"); |
| JovanEps | 11:4e700bbf93d7 | 124 | pc.printf(expect[1], " 3.80277634e-05"); |
| JovanEps | 11:4e700bbf93d7 | 125 | pc.printf(expect[2], " 1.19209290e-07"); |
| JovanEps | 11:4e700bbf93d7 | 126 | pc.printf(expect[3], " -1.38282776e-05"); |
| JovanEps | 11:4e700bbf93d7 | 127 | pc.printf(expect[4], " -7.51018524e-06"); |
| JovanEps | 11:4e700bbf93d7 | 128 | #endif |
| JovanEps | 11:4e700bbf93d7 | 129 | |
| JovanEps | 11:4e700bbf93d7 | 130 | lda = 21; |
| JovanEps | 11:4e700bbf93d7 | 131 | ldaa = 20; |
| JovanEps | 11:4e700bbf93d7 | 132 | cray = .056; |
| JovanEps | 11:4e700bbf93d7 | 133 | n = 20; |
| JovanEps | 11:4e700bbf93d7 | 134 | pc.printf("----------------------------------------------\n"); |
| JovanEps | 11:4e700bbf93d7 | 135 | pc.printf("%s ", ROLLING); |
| JovanEps | 11:4e700bbf93d7 | 136 | pc.printf("%s ", PREC); |
| JovanEps | 11:4e700bbf93d7 | 137 | pc.printf("Precision Linpack Benchmark - Linux Version in 'C/C++'\n\n"); |
| JovanEps | 11:4e700bbf93d7 | 138 | |
| JovanEps | 11:4e700bbf93d7 | 139 | pc.printf("Optimisation %s\n\n",options); |
| JovanEps | 11:4e700bbf93d7 | 140 | |
| JovanEps | 11:4e700bbf93d7 | 141 | ops = (2.0e0*(n*n*n))/3.0 + 2.0*(n*n); |
| JovanEps | 11:4e700bbf93d7 | 142 | |
| JovanEps | 11:4e700bbf93d7 | 143 | matgen(a,lda,n,b,&norma); |
| JovanEps | 11:4e700bbf93d7 | 144 | timer.start(); |
| JovanEps | 11:4e700bbf93d7 | 145 | //start_time(); |
| JovanEps | 11:4e700bbf93d7 | 146 | t1 = timer.read(); |
| JovanEps | 11:4e700bbf93d7 | 147 | dgefa(a,lda,n,ipvt,&info); |
| JovanEps | 11:4e700bbf93d7 | 148 | //end_time(); |
| JovanEps | 11:4e700bbf93d7 | 149 | t2 = timer.read(); |
| JovanEps | 11:4e700bbf93d7 | 150 | timer.stop(); |
| JovanEps | 11:4e700bbf93d7 | 151 | atime[0][0] = t2-t1; //secs |
| JovanEps | 11:4e700bbf93d7 | 152 | |
| JovanEps | 11:4e700bbf93d7 | 153 | timer.reset(); |
| JovanEps | 11:4e700bbf93d7 | 154 | timer.start(); |
| JovanEps | 11:4e700bbf93d7 | 155 | t1 = timer.read(); |
| JovanEps | 11:4e700bbf93d7 | 156 | dgesl(a,lda,n,ipvt,b,0); |
| JovanEps | 11:4e700bbf93d7 | 157 | t2 = timer.read(); |
| JovanEps | 11:4e700bbf93d7 | 158 | timer.stop(); |
| JovanEps | 11:4e700bbf93d7 | 159 | atime[1][0] = t2-t1; //secs |
| JovanEps | 11:4e700bbf93d7 | 160 | total = atime[0][0] + atime[1][0]; |
| JovanEps | 11:4e700bbf93d7 | 161 | |
| JovanEps | 11:4e700bbf93d7 | 162 | /* compute a residual to verify results. */ |
| JovanEps | 11:4e700bbf93d7 | 163 | |
| JovanEps | 11:4e700bbf93d7 | 164 | for (i = 0; i < n; i++) { |
| JovanEps | 11:4e700bbf93d7 | 165 | x[i] = b[i]; |
| JovanEps | 11:4e700bbf93d7 | 166 | } |
| JovanEps | 11:4e700bbf93d7 | 167 | matgen(a,lda,n,b,&norma); |
| JovanEps | 11:4e700bbf93d7 | 168 | for (i = 0; i < n; i++) { |
| JovanEps | 11:4e700bbf93d7 | 169 | b[i] = -b[i]; |
| JovanEps | 11:4e700bbf93d7 | 170 | } |
| JovanEps | 11:4e700bbf93d7 | 171 | dmxpy(n,b,n,lda,x,a); |
| JovanEps | 11:4e700bbf93d7 | 172 | resid = 0.0; |
| JovanEps | 11:4e700bbf93d7 | 173 | normx = 0.0; |
| JovanEps | 11:4e700bbf93d7 | 174 | for (i = 0; i < n; i++) { |
| JovanEps | 11:4e700bbf93d7 | 175 | resid = (resid > fabs((double)b[i])) |
| JovanEps | 11:4e700bbf93d7 | 176 | ? resid : fabs((double)b[i]); |
| JovanEps | 11:4e700bbf93d7 | 177 | normx = (normx > fabs((double)x[i])) |
| JovanEps | 11:4e700bbf93d7 | 178 | ? normx : fabs((double)x[i]); |
| JovanEps | 11:4e700bbf93d7 | 179 | } |
| JovanEps | 11:4e700bbf93d7 | 180 | eps = epslon(ONE); |
| JovanEps | 11:4e700bbf93d7 | 181 | residn = resid/( n*norma*normx*eps ); |
| JovanEps | 11:4e700bbf93d7 | 182 | epsn = eps; |
| JovanEps | 11:4e700bbf93d7 | 183 | x1 = x[0] - 1; |
| JovanEps | 11:4e700bbf93d7 | 184 | x2 = x[n-1] - 1; |
| JovanEps | 11:4e700bbf93d7 | 185 | |
| JovanEps | 11:4e700bbf93d7 | 186 | pc.printf("norm resid resid machep"); |
| JovanEps | 11:4e700bbf93d7 | 187 | pc.printf(" x[0]-1 x[n-1]-1\n"); |
| JovanEps | 11:4e700bbf93d7 | 188 | pc.printf("%6.1f %17.8e%17.8e%17.8e%17.8e\n\n", |
| JovanEps | 11:4e700bbf93d7 | 189 | (double)residn, (double)resid, (double)epsn, |
| JovanEps | 11:4e700bbf93d7 | 190 | (double)x1, (double)x2); |
| JovanEps | 11:4e700bbf93d7 | 191 | |
| JovanEps | 11:4e700bbf93d7 | 192 | pc.printf("Times are reported for matrices of order %5d\n",n); |
| JovanEps | 11:4e700bbf93d7 | 193 | pc.printf("1 pass times for array with leading dimension of%5d\n\n",lda); |
| JovanEps | 11:4e700bbf93d7 | 194 | pc.printf(" dgefa dgesl total Mflops unit"); |
| JovanEps | 11:4e700bbf93d7 | 195 | pc.printf(" ratio\n"); |
| JovanEps | 11:4e700bbf93d7 | 196 | |
| JovanEps | 11:4e700bbf93d7 | 197 | atime[2][0] = total; |
| JovanEps | 11:4e700bbf93d7 | 198 | if (total > 0.0) { |
| JovanEps | 11:4e700bbf93d7 | 199 | atime[3][0] = ops/(1.0e6*total); |
| JovanEps | 11:4e700bbf93d7 | 200 | atime[4][0] = 2.0/atime[3][0]; |
| JovanEps | 11:4e700bbf93d7 | 201 | } else { |
| JovanEps | 11:4e700bbf93d7 | 202 | atime[3][0] = 0.0; |
| JovanEps | 11:4e700bbf93d7 | 203 | atime[4][0] = 0.0; |
| JovanEps | 11:4e700bbf93d7 | 204 | } |
| JovanEps | 11:4e700bbf93d7 | 205 | atime[5][0] = total/cray; |
| JovanEps | 11:4e700bbf93d7 | 206 | |
| JovanEps | 11:4e700bbf93d7 | 207 | print_time(0); |
| JovanEps | 11:4e700bbf93d7 | 208 | |
| JovanEps | 11:4e700bbf93d7 | 209 | /************************************************************************ |
| JovanEps | 11:4e700bbf93d7 | 210 | * Calculate overhead of executing matgen procedure * |
| JovanEps | 11:4e700bbf93d7 | 211 | ************************************************************************/ |
| JovanEps | 11:4e700bbf93d7 | 212 | |
| JovanEps | 11:4e700bbf93d7 | 213 | pc.printf ("\nCalculating matgen overhead\n"); |
| JovanEps | 11:4e700bbf93d7 | 214 | pass = -20; |
| JovanEps | 11:4e700bbf93d7 | 215 | loop = NTIMES; |
| JovanEps | 11:4e700bbf93d7 | 216 | do { |
| JovanEps | 11:4e700bbf93d7 | 217 | timer.start(); |
| JovanEps | 11:4e700bbf93d7 | 218 | t1 = timer.read(); |
| JovanEps | 11:4e700bbf93d7 | 219 | pass = pass + 1; |
| JovanEps | 11:4e700bbf93d7 | 220 | for ( i = 0 ; i < loop ; i++) { |
| JovanEps | 11:4e700bbf93d7 | 221 | matgen(a,lda,n,b,&norma); |
| JovanEps | 11:4e700bbf93d7 | 222 | } |
| JovanEps | 11:4e700bbf93d7 | 223 | t2 = timer.read(); |
| JovanEps | 11:4e700bbf93d7 | 224 | timer.stop(); |
| JovanEps | 11:4e700bbf93d7 | 225 | overhead1 = t2-t1; //sec |
| JovanEps | 11:4e700bbf93d7 | 226 | pc.printf ("%10d times %6.2f seconds\n", loop, overhead1); |
| JovanEps | 11:4e700bbf93d7 | 227 | if (overhead1 > runSecs) { |
| JovanEps | 11:4e700bbf93d7 | 228 | pass = 0; |
| JovanEps | 11:4e700bbf93d7 | 229 | } |
| JovanEps | 11:4e700bbf93d7 | 230 | if (pass < 0) { |
| JovanEps | 11:4e700bbf93d7 | 231 | if (overhead1 < 0.1) { |
| JovanEps | 11:4e700bbf93d7 | 232 | loop = loop * 10; |
| JovanEps | 11:4e700bbf93d7 | 233 | } else { |
| JovanEps | 11:4e700bbf93d7 | 234 | loop = loop * 2; |
| JovanEps | 11:4e700bbf93d7 | 235 | } |
| JovanEps | 11:4e700bbf93d7 | 236 | } |
| JovanEps | 11:4e700bbf93d7 | 237 | } while (pass < 0); |
| JovanEps | 11:4e700bbf93d7 | 238 | |
| JovanEps | 11:4e700bbf93d7 | 239 | overhead1 = overhead1 / (double)loop; |
| JovanEps | 11:4e700bbf93d7 | 240 | |
| JovanEps | 11:4e700bbf93d7 | 241 | pc.printf("Overhead for 1 matgen %12.5f seconds\n\n", overhead1); |
| JovanEps | 11:4e700bbf93d7 | 242 | |
| JovanEps | 11:4e700bbf93d7 | 243 | /************************************************************************ |
| JovanEps | 11:4e700bbf93d7 | 244 | * Calculate matgen/dgefa passes for runSecs seconds * |
| JovanEps | 11:4e700bbf93d7 | 245 | ************************************************************************/ |
| JovanEps | 11:4e700bbf93d7 | 246 | |
| JovanEps | 11:4e700bbf93d7 | 247 | pc.printf ("Calculating matgen/dgefa passes for %d seconds\n", (int)runSecs); |
| JovanEps | 11:4e700bbf93d7 | 248 | pass = -20; |
| JovanEps | 11:4e700bbf93d7 | 249 | ntimes = NTIMES; |
| JovanEps | 11:4e700bbf93d7 | 250 | do { |
| JovanEps | 11:4e700bbf93d7 | 251 | timer.start(); |
| JovanEps | 11:4e700bbf93d7 | 252 | t1 = timer.read(); |
| JovanEps | 11:4e700bbf93d7 | 253 | pass = pass + 1; |
| JovanEps | 11:4e700bbf93d7 | 254 | for ( i = 0 ; i < ntimes ; i++) { |
| JovanEps | 11:4e700bbf93d7 | 255 | matgen(a,lda,n,b,&norma); |
| JovanEps | 11:4e700bbf93d7 | 256 | dgefa(a,lda,n,ipvt,&info ); |
| JovanEps | 11:4e700bbf93d7 | 257 | } |
| JovanEps | 11:4e700bbf93d7 | 258 | t2 = timer.read(); |
| JovanEps | 11:4e700bbf93d7 | 259 | timer.stop(); |
| JovanEps | 11:4e700bbf93d7 | 260 | time2 = t2-t1; //sec |
| JovanEps | 11:4e700bbf93d7 | 261 | |
| JovanEps | 11:4e700bbf93d7 | 262 | pc.printf ("%10d times %6.2f seconds\n", ntimes, time2); |
| JovanEps | 11:4e700bbf93d7 | 263 | if (time2 > runSecs) { |
| JovanEps | 11:4e700bbf93d7 | 264 | pass = 0; |
| JovanEps | 11:4e700bbf93d7 | 265 | } |
| JovanEps | 11:4e700bbf93d7 | 266 | if (pass < 0) { |
| JovanEps | 11:4e700bbf93d7 | 267 | if (time2 < 0.1) { |
| JovanEps | 11:4e700bbf93d7 | 268 | ntimes = ntimes * 10; |
| JovanEps | 11:4e700bbf93d7 | 269 | } else { |
| JovanEps | 11:4e700bbf93d7 | 270 | ntimes = ntimes * 2; |
| JovanEps | 11:4e700bbf93d7 | 271 | } |
| JovanEps | 11:4e700bbf93d7 | 272 | } |
| JovanEps | 11:4e700bbf93d7 | 273 | } while (pass < 0); |
| JovanEps | 11:4e700bbf93d7 | 274 | |
| JovanEps | 11:4e700bbf93d7 | 275 | ntimes = (int)(runSecs * (double)ntimes / time2); |
| JovanEps | 11:4e700bbf93d7 | 276 | if (ntimes == 0) ntimes = 1; |
| JovanEps | 11:4e700bbf93d7 | 277 | |
| JovanEps | 11:4e700bbf93d7 | 278 | pc.printf ("Passes used %10d \n\n", ntimes); |
| JovanEps | 11:4e700bbf93d7 | 279 | pc.printf("Times for array with leading dimension of%4d\n\n",lda); |
| JovanEps | 11:4e700bbf93d7 | 280 | pc.printf(" dgefa dgesl total Mflops unit"); |
| JovanEps | 11:4e700bbf93d7 | 281 | pc.printf(" ratio\n"); |
| JovanEps | 11:4e700bbf93d7 | 282 | |
| JovanEps | 11:4e700bbf93d7 | 283 | /************************************************************************ |
| JovanEps | 11:4e700bbf93d7 | 284 | * Execute 5 passes * |
| JovanEps | 11:4e700bbf93d7 | 285 | ************************************************************************/ |
| JovanEps | 11:4e700bbf93d7 | 286 | |
| JovanEps | 11:4e700bbf93d7 | 287 | tm2 = ntimes * overhead1; |
| JovanEps | 11:4e700bbf93d7 | 288 | atime[3][6] = 0; |
| JovanEps | 11:4e700bbf93d7 | 289 | |
| JovanEps | 11:4e700bbf93d7 | 290 | for (j=1 ; j<6 ; j++) { |
| JovanEps | 11:4e700bbf93d7 | 291 | timer.start(); |
| JovanEps | 11:4e700bbf93d7 | 292 | t1 = timer.read(); |
| JovanEps | 11:4e700bbf93d7 | 293 | |
| JovanEps | 11:4e700bbf93d7 | 294 | for (i = 0; i < ntimes; i++) { |
| JovanEps | 11:4e700bbf93d7 | 295 | matgen(a,lda,n,b,&norma); |
| JovanEps | 11:4e700bbf93d7 | 296 | dgefa(a,lda,n,ipvt,&info ); |
| JovanEps | 11:4e700bbf93d7 | 297 | } |
| JovanEps | 11:4e700bbf93d7 | 298 | t2 = timer.read(); |
| JovanEps | 11:4e700bbf93d7 | 299 | timer.stop(); |
| JovanEps | 11:4e700bbf93d7 | 300 | atime[0][j] = ((t2-t1) - tm2)/ntimes; |
| JovanEps | 11:4e700bbf93d7 | 301 | |
| JovanEps | 11:4e700bbf93d7 | 302 | timer.start(); |
| JovanEps | 11:4e700bbf93d7 | 303 | t1 = timer.read(); |
| JovanEps | 11:4e700bbf93d7 | 304 | for (i = 0; i < ntimes; i++) { |
| JovanEps | 11:4e700bbf93d7 | 305 | dgesl(a,lda,n,ipvt,b,0); |
| JovanEps | 11:4e700bbf93d7 | 306 | } |
| JovanEps | 11:4e700bbf93d7 | 307 | t2 = timer.read(); |
| JovanEps | 11:4e700bbf93d7 | 308 | timer.stop(); |
| JovanEps | 11:4e700bbf93d7 | 309 | |
| JovanEps | 11:4e700bbf93d7 | 310 | atime[1][j] = (t2-t1)/ntimes; |
| JovanEps | 11:4e700bbf93d7 | 311 | total = atime[0][j] + atime[1][j]; |
| JovanEps | 11:4e700bbf93d7 | 312 | atime[2][j] = total; |
| JovanEps | 11:4e700bbf93d7 | 313 | atime[3][j] = ops/(1.0e6*total); |
| JovanEps | 11:4e700bbf93d7 | 314 | atime[4][j] = 2.0/atime[3][j]; |
| JovanEps | 11:4e700bbf93d7 | 315 | atime[5][j] = total/cray; |
| JovanEps | 11:4e700bbf93d7 | 316 | atime[3][6] = atime[3][6] + atime[3][j]; |
| JovanEps | 2:273153e44338 | 317 | |
| JovanEps | 11:4e700bbf93d7 | 318 | print_time(j); |
| JovanEps | 11:4e700bbf93d7 | 319 | } |
| JovanEps | 11:4e700bbf93d7 | 320 | atime[3][6] = atime[3][6] / 5.0; |
| JovanEps | 11:4e700bbf93d7 | 321 | pc.printf("Average %11.2f\n", |
| JovanEps | 11:4e700bbf93d7 | 322 | (double)atime[3][6]); |
| JovanEps | 11:4e700bbf93d7 | 323 | |
| JovanEps | 11:4e700bbf93d7 | 324 | pc.printf("\nCalculating matgen2 overhead\n"); |
| JovanEps | 11:4e700bbf93d7 | 325 | |
| JovanEps | 11:4e700bbf93d7 | 326 | /************************************************************************ |
| JovanEps | 11:4e700bbf93d7 | 327 | * Calculate overhead of executing matgen procedure * |
| JovanEps | 11:4e700bbf93d7 | 328 | ************************************************************************/ |
| JovanEps | 11:4e700bbf93d7 | 329 | |
| JovanEps | 11:4e700bbf93d7 | 330 | timer.start(); |
| JovanEps | 11:4e700bbf93d7 | 331 | t1 = timer.read(); |
| JovanEps | 11:4e700bbf93d7 | 332 | for ( i = 0 ; i < loop ; i++) { |
| JovanEps | 11:4e700bbf93d7 | 333 | matgen(aa,ldaa,n,b,&norma); |
| JovanEps | 11:4e700bbf93d7 | 334 | } |
| JovanEps | 11:4e700bbf93d7 | 335 | t2 = timer.read(); |
| JovanEps | 11:4e700bbf93d7 | 336 | timer.stop(); |
| JovanEps | 11:4e700bbf93d7 | 337 | overhead2 = t2-t1; |
| JovanEps | 11:4e700bbf93d7 | 338 | overhead2 = overhead2 / (double)loop; |
| JovanEps | 11:4e700bbf93d7 | 339 | |
| JovanEps | 11:4e700bbf93d7 | 340 | pc.printf("Overhead for 1 matgen %12.5f seconds\n\n", overhead2); |
| JovanEps | 11:4e700bbf93d7 | 341 | pc.printf("Times for array with leading dimension of%4d\n\n",ldaa); |
| JovanEps | 11:4e700bbf93d7 | 342 | pc.printf(" dgefa dgesl total Mflops unit"); |
| JovanEps | 11:4e700bbf93d7 | 343 | pc.printf(" ratio\n"); |
| JovanEps | 11:4e700bbf93d7 | 344 | |
| JovanEps | 11:4e700bbf93d7 | 345 | /************************************************************************ |
| JovanEps | 11:4e700bbf93d7 | 346 | * Execute 5 passes * |
| JovanEps | 11:4e700bbf93d7 | 347 | ************************************************************************/ |
| JovanEps | 11:4e700bbf93d7 | 348 | |
| JovanEps | 11:4e700bbf93d7 | 349 | tm2 = ntimes * overhead2; |
| JovanEps | 11:4e700bbf93d7 | 350 | atime[3][12] = 0; |
| JovanEps | 11:4e700bbf93d7 | 351 | |
| JovanEps | 11:4e700bbf93d7 | 352 | for (j=7 ; j<12 ; j++) { |
| JovanEps | 11:4e700bbf93d7 | 353 | timer.start(); |
| JovanEps | 11:4e700bbf93d7 | 354 | t1 = timer.read(); |
| JovanEps | 11:4e700bbf93d7 | 355 | for (i = 0; i < ntimes; i++) { |
| JovanEps | 11:4e700bbf93d7 | 356 | matgen(aa,ldaa,n,b,&norma); |
| JovanEps | 11:4e700bbf93d7 | 357 | dgefa(aa,ldaa,n,ipvt,&info ); |
| JovanEps | 11:4e700bbf93d7 | 358 | } |
| JovanEps | 11:4e700bbf93d7 | 359 | t2 = timer.read(); |
| JovanEps | 11:4e700bbf93d7 | 360 | timer.stop(); |
| JovanEps | 11:4e700bbf93d7 | 361 | atime[0][j] = ((t2-t1) - tm2)/ntimes; |
| JovanEps | 11:4e700bbf93d7 | 362 | |
| JovanEps | 11:4e700bbf93d7 | 363 | timer.start(); |
| JovanEps | 11:4e700bbf93d7 | 364 | t1 = timer.read(); |
| JovanEps | 11:4e700bbf93d7 | 365 | for (i = 0; i < ntimes; i++) { |
| JovanEps | 11:4e700bbf93d7 | 366 | dgesl(aa,ldaa,n,ipvt,b,0); |
| JovanEps | 11:4e700bbf93d7 | 367 | } |
| JovanEps | 11:4e700bbf93d7 | 368 | t2 = timer.read(); |
| JovanEps | 11:4e700bbf93d7 | 369 | timer.stop(); |
| JovanEps | 11:4e700bbf93d7 | 370 | atime[1][j] = (t2-t1)/ntimes; |
| JovanEps | 11:4e700bbf93d7 | 371 | total = atime[0][j] + atime[1][j]; |
| JovanEps | 11:4e700bbf93d7 | 372 | atime[2][j] = total; |
| JovanEps | 11:4e700bbf93d7 | 373 | atime[3][j] = ops/(1.0e6*total); |
| JovanEps | 11:4e700bbf93d7 | 374 | atime[4][j] = 2.0/atime[3][j]; |
| JovanEps | 11:4e700bbf93d7 | 375 | atime[5][j] = total/cray; |
| JovanEps | 11:4e700bbf93d7 | 376 | atime[3][12] = atime[3][12] + atime[3][j]; |
| JovanEps | 11:4e700bbf93d7 | 377 | |
| JovanEps | 11:4e700bbf93d7 | 378 | print_time(j); |
| JovanEps | 11:4e700bbf93d7 | 379 | } |
| JovanEps | 11:4e700bbf93d7 | 380 | atime[3][12] = atime[3][12] / 5.0; |
| JovanEps | 11:4e700bbf93d7 | 381 | pc.printf("Average %11.2f\n\n", |
| JovanEps | 11:4e700bbf93d7 | 382 | (double)atime[3][12]); |
| JovanEps | 11:4e700bbf93d7 | 383 | pc.printf("##########################################\n"); |
| JovanEps | 11:4e700bbf93d7 | 384 | pc.printf ("\nFrom File /proc/cpuinfo\n"); |
| JovanEps | 11:4e700bbf93d7 | 385 | // pc.printf("%s\n", configdata[0]); |
| JovanEps | 11:4e700bbf93d7 | 386 | pc.printf ("\nFrom File /proc/version\n"); |
| JovanEps | 11:4e700bbf93d7 | 387 | //pc.printf("%s\n", configdata[1]); |
| JovanEps | 11:4e700bbf93d7 | 388 | |
| JovanEps | 11:4e700bbf93d7 | 389 | /************************************************************************ |
| JovanEps | 11:4e700bbf93d7 | 390 | * Use minimum average as overall Mflops rating * |
| JovanEps | 11:4e700bbf93d7 | 391 | ************************************************************************/ |
| JovanEps | 11:4e700bbf93d7 | 392 | |
| JovanEps | 11:4e700bbf93d7 | 393 | mflops = atime[3][6]; |
| JovanEps | 11:4e700bbf93d7 | 394 | if (atime[3][12] < mflops) mflops = atime[3][12]; |
| JovanEps | 11:4e700bbf93d7 | 395 | |
| JovanEps | 11:4e700bbf93d7 | 396 | pc.printf("\n"); |
| JovanEps | 11:4e700bbf93d7 | 397 | pc.printf("%s ", ROLLING); |
| JovanEps | 11:4e700bbf93d7 | 398 | pc.printf("%s ", PREC); |
| JovanEps | 11:4e700bbf93d7 | 399 | pc.printf(" Precision %11.4f Mflops \n\n",mflops); |
| JovanEps | 11:4e700bbf93d7 | 400 | |
| JovanEps | 11:4e700bbf93d7 | 401 | // local_time(); |
| JovanEps | 11:4e700bbf93d7 | 402 | |
| JovanEps | 11:4e700bbf93d7 | 403 | |
| JovanEps | 11:4e700bbf93d7 | 404 | /************************************************************************ |
| JovanEps | 11:4e700bbf93d7 | 405 | * Add results to output file Linpack.txt * |
| JovanEps | 11:4e700bbf93d7 | 406 | ************************************************************************/ |
| JovanEps | 11:4e700bbf93d7 | 407 | pc.printf (" ########################################################\n\n"); |
| JovanEps | 11:4e700bbf93d7 | 408 | pc.printf (" Linpack %s Precision %s Benchmark n @ 100\n", PREC, ROLLING); |
| JovanEps | 11:4e700bbf93d7 | 409 | //pc.printf (outfile, " Optimisation %s, %s\n", options, timeday); |
| JovanEps | 11:4e700bbf93d7 | 410 | |
| JovanEps | 11:4e700bbf93d7 | 411 | max1 = 0; |
| JovanEps | 11:4e700bbf93d7 | 412 | for (i=1 ; i<6 ; i++) { |
| JovanEps | 11:4e700bbf93d7 | 413 | if (atime[3][i] > max1) max1 = atime[3][i]; |
| JovanEps | 11:4e700bbf93d7 | 414 | } |
| JovanEps | 11:4e700bbf93d7 | 415 | |
| JovanEps | 11:4e700bbf93d7 | 416 | max2 = 0; |
| JovanEps | 11:4e700bbf93d7 | 417 | for (i=7 ; i<12 ; i++) { |
| JovanEps | 11:4e700bbf93d7 | 418 | if (atime[3][i] > max2) max2 = atime[3][i]; |
| JovanEps | 11:4e700bbf93d7 | 419 | } |
| JovanEps | 11:4e700bbf93d7 | 420 | if (max1 < max2) max2 = max1; |
| JovanEps | 11:4e700bbf93d7 | 421 | |
| JovanEps | 11:4e700bbf93d7 | 422 | pc.printf(" Speed %10.4f MFLOPS\n\n", max2); |
| JovanEps | 11:4e700bbf93d7 | 423 | pc.printf(was[0], "%16.1f",(double)residn); |
| JovanEps | 11:4e700bbf93d7 | 424 | pc.printf(was[1], "%16.8e",(double)resid); |
| JovanEps | 11:4e700bbf93d7 | 425 | pc.printf(was[2], "%16.8e",(double)epsn); |
| JovanEps | 11:4e700bbf93d7 | 426 | pc.printf(was[3], "%16.8e",(double)x1); |
| JovanEps | 11:4e700bbf93d7 | 427 | pc.printf(was[4], "%16.8e",(double)x2); |
| JovanEps | 11:4e700bbf93d7 | 428 | |
| JovanEps | 11:4e700bbf93d7 | 429 | pc.printf(title[0], "norm. resid"); |
| JovanEps | 11:4e700bbf93d7 | 430 | pc.printf(title[1], "resid "); |
| JovanEps | 11:4e700bbf93d7 | 431 | pc.printf(title[2], "machep "); |
| JovanEps | 11:4e700bbf93d7 | 432 | pc.printf(title[3], "x[0]-1 "); |
| JovanEps | 11:4e700bbf93d7 | 433 | pc.printf(title[4], "x[n-1]-1 "); |
| JovanEps | 11:4e700bbf93d7 | 434 | |
| JovanEps | 11:4e700bbf93d7 | 435 | if (strtol(opt, NULL, 10) == 0) { |
| JovanEps | 11:4e700bbf93d7 | 436 | pc.printf(expect[2], " 8.88178420e-016"); |
| JovanEps | 11:4e700bbf93d7 | 437 | } |
| JovanEps | 11:4e700bbf93d7 | 438 | errors = 0; |
| JovanEps | 11:4e700bbf93d7 | 439 | |
| JovanEps | 11:4e700bbf93d7 | 440 | for (i=0; i<5; i++) { |
| JovanEps | 11:4e700bbf93d7 | 441 | if (strcmp (expect[i], was[i]) != 0) { |
| JovanEps | 11:4e700bbf93d7 | 442 | pc.printf(" Variable %s Non-standard result was %s instead of %s\n", |
| JovanEps | 11:4e700bbf93d7 | 443 | title[i], was[i], expect[i]); |
| JovanEps | 11:4e700bbf93d7 | 444 | errors = errors + 1; |
| JovanEps | 11:4e700bbf93d7 | 445 | } |
| JovanEps | 11:4e700bbf93d7 | 446 | } |
| JovanEps | 11:4e700bbf93d7 | 447 | if (errors == 0) { |
| JovanEps | 11:4e700bbf93d7 | 448 | pc.printf(" Numeric results were as expected\n\n"); |
| JovanEps | 11:4e700bbf93d7 | 449 | } else { |
| JovanEps | 11:4e700bbf93d7 | 450 | pc.printf(" Different numeric results - see linpack.txt\n\n"); |
| JovanEps | 11:4e700bbf93d7 | 451 | pc.printf("\n Compiler #define or values in linpack.c might need to be changed\n\n"); |
| JovanEps | 11:4e700bbf93d7 | 452 | |
| JovanEps | 11:4e700bbf93d7 | 453 | } |
| JovanEps | 11:4e700bbf93d7 | 454 | |
| JovanEps | 11:4e700bbf93d7 | 455 | |
| JovanEps | 11:4e700bbf93d7 | 456 | pc.printf (" ########################################################\n\n"); |
| JovanEps | 11:4e700bbf93d7 | 457 | pc.printf("\n"); |
| JovanEps | 11:4e700bbf93d7 | 458 | pc.printf ("SYSTEM INFORMATION\n\nFrom File /proc/cpuinfo\n"); |
| JovanEps | 11:4e700bbf93d7 | 459 | // pc.printf (outfile, "%s \n", configdata[0]); |
| JovanEps | 11:4e700bbf93d7 | 460 | pc.printf ("\nFrom File /proc/version\n"); |
| JovanEps | 11:4e700bbf93d7 | 461 | // pc.printf (outfile, "%s \n", configdata[1]); |
| JovanEps | 11:4e700bbf93d7 | 462 | pc.printf ("\n"); |
| JovanEps | 11:4e700bbf93d7 | 463 | |
| JovanEps | 11:4e700bbf93d7 | 464 | pc.printf("\n"); |
| JovanEps | 11:4e700bbf93d7 | 465 | |
| JovanEps | 11:4e700bbf93d7 | 466 | } |
| JovanEps | 11:4e700bbf93d7 | 467 | |
| JovanEps | 11:4e700bbf93d7 | 468 | /*----------------------*/ |
| JovanEps | 11:4e700bbf93d7 | 469 | void print_time (int row) |
| JovanEps | 11:4e700bbf93d7 | 470 | |
| JovanEps | 11:4e700bbf93d7 | 471 | { |
| JovanEps | 11:4e700bbf93d7 | 472 | pc.printf("%11.5f%11.5f%11.5f%11.2f%11.4f%11.4f\n", (double)atime[0][row], |
| JovanEps | 11:4e700bbf93d7 | 473 | (double)atime[1][row], (double)atime[2][row], (double)atime[3][row], |
| JovanEps | 11:4e700bbf93d7 | 474 | (double)atime[4][row], (double)atime[5][row]); |
| JovanEps | 11:4e700bbf93d7 | 475 | return; |
| JovanEps | 11:4e700bbf93d7 | 476 | } |
| JovanEps | 11:4e700bbf93d7 | 477 | |
| JovanEps | 11:4e700bbf93d7 | 478 | /*----------------------*/ |
| JovanEps | 11:4e700bbf93d7 | 479 | |
| JovanEps | 11:4e700bbf93d7 | 480 | void matgen (REAL a[], int lda, int n, REAL b[], REAL *norma) |
| JovanEps | 11:4e700bbf93d7 | 481 | |
| JovanEps | 11:4e700bbf93d7 | 482 | |
| JovanEps | 11:4e700bbf93d7 | 483 | /* We would like to declare a[][lda], but c does not allow it. In this |
| JovanEps | 11:4e700bbf93d7 | 484 | function, references to a[i][j] are written a[lda*i+j]. */ |
| JovanEps | 11:4e700bbf93d7 | 485 | |
| JovanEps | 11:4e700bbf93d7 | 486 | { |
| JovanEps | 11:4e700bbf93d7 | 487 | int init, i, j; |
| JovanEps | 11:4e700bbf93d7 | 488 | |
| JovanEps | 11:4e700bbf93d7 | 489 | init = 1325; |
| JovanEps | 11:4e700bbf93d7 | 490 | *norma = 0.0; |
| JovanEps | 11:4e700bbf93d7 | 491 | for (j = 0; j < n; j++) { |
| JovanEps | 11:4e700bbf93d7 | 492 | for (i = 0; i < n; i++) { |
| JovanEps | 11:4e700bbf93d7 | 493 | init = 3125*init % 65536; |
| JovanEps | 11:4e700bbf93d7 | 494 | a[lda*j+i] = (init - 32768.0)/16384.0; |
| JovanEps | 11:4e700bbf93d7 | 495 | *norma = (a[lda*j+i] > *norma) ? a[lda*j+i] : *norma; |
| JovanEps | 11:4e700bbf93d7 | 496 | |
| JovanEps | 11:4e700bbf93d7 | 497 | /* alternative for some compilers |
| JovanEps | 11:4e700bbf93d7 | 498 | if (fabs(a[lda*j+i]) > *norma) *norma = fabs(a[lda*j+i]); |
| JovanEps | 11:4e700bbf93d7 | 499 | */ |
| JovanEps | 11:4e700bbf93d7 | 500 | } |
| JovanEps | 11:4e700bbf93d7 | 501 | } |
| JovanEps | 11:4e700bbf93d7 | 502 | for (i = 0; i < n; i++) { |
| JovanEps | 11:4e700bbf93d7 | 503 | b[i] = 0.0; |
| JovanEps | 11:4e700bbf93d7 | 504 | } |
| JovanEps | 11:4e700bbf93d7 | 505 | for (j = 0; j < n; j++) { |
| JovanEps | 11:4e700bbf93d7 | 506 | for (i = 0; i < n; i++) { |
| JovanEps | 11:4e700bbf93d7 | 507 | b[i] = b[i] + a[lda*j+i]; |
| JovanEps | 11:4e700bbf93d7 | 508 | } |
| JovanEps | 11:4e700bbf93d7 | 509 | } |
| JovanEps | 11:4e700bbf93d7 | 510 | return; |
| JovanEps | 11:4e700bbf93d7 | 511 | } |
| JovanEps | 11:4e700bbf93d7 | 512 | |
| JovanEps | 11:4e700bbf93d7 | 513 | /*----------------------*/ |
| JovanEps | 11:4e700bbf93d7 | 514 | void dgefa(REAL a[], int lda, int n, int ipvt[], int *info) |
| JovanEps | 11:4e700bbf93d7 | 515 | |
| JovanEps | 11:4e700bbf93d7 | 516 | |
| JovanEps | 11:4e700bbf93d7 | 517 | /* We would like to declare a[][lda], but c does not allow it. In this |
| JovanEps | 11:4e700bbf93d7 | 518 | function, references to a[i][j] are written a[lda*i+j]. */ |
| JovanEps | 11:4e700bbf93d7 | 519 | /* |
| JovanEps | 11:4e700bbf93d7 | 520 | dgefa factors a double precision matrix by gaussian elimination. |
| JovanEps | 11:4e700bbf93d7 | 521 | |
| JovanEps | 11:4e700bbf93d7 | 522 | dgefa is usually called by dgeco, but it can be called |
| JovanEps | 11:4e700bbf93d7 | 523 | directly with a saving in time if rcond is not needed. |
| JovanEps | 11:4e700bbf93d7 | 524 | (time for dgeco) = (1 + 9/n)*(time for dgefa) . |
| JovanEps | 11:4e700bbf93d7 | 525 | |
| JovanEps | 11:4e700bbf93d7 | 526 | on entry |
| JovanEps | 11:4e700bbf93d7 | 527 | |
| JovanEps | 11:4e700bbf93d7 | 528 | a REAL precision[n][lda] |
| JovanEps | 11:4e700bbf93d7 | 529 | the matrix to be factored. |
| JovanEps | 11:4e700bbf93d7 | 530 | |
| JovanEps | 11:4e700bbf93d7 | 531 | lda integer |
| JovanEps | 11:4e700bbf93d7 | 532 | the leading dimension of the array a . |
| JovanEps | 11:4e700bbf93d7 | 533 | |
| JovanEps | 11:4e700bbf93d7 | 534 | n integer |
| JovanEps | 11:4e700bbf93d7 | 535 | the order of the matrix a . |
| JovanEps | 11:4e700bbf93d7 | 536 | |
| JovanEps | 11:4e700bbf93d7 | 537 | on return |
| JovanEps | 11:4e700bbf93d7 | 538 | |
| JovanEps | 11:4e700bbf93d7 | 539 | a an upper triangular matrix and the multipliers |
| JovanEps | 11:4e700bbf93d7 | 540 | which were used to obtain it. |
| JovanEps | 11:4e700bbf93d7 | 541 | the factorization can be written a = l*u where |
| JovanEps | 11:4e700bbf93d7 | 542 | l is a product of permutation and unit lower |
| JovanEps | 11:4e700bbf93d7 | 543 | triangular matrices and u is upper triangular. |
| JovanEps | 11:4e700bbf93d7 | 544 | |
| JovanEps | 11:4e700bbf93d7 | 545 | ipvt integer[n] |
| JovanEps | 11:4e700bbf93d7 | 546 | an integer vector of pivot indices. |
| JovanEps | 11:4e700bbf93d7 | 547 | |
| JovanEps | 11:4e700bbf93d7 | 548 | info integer |
| JovanEps | 11:4e700bbf93d7 | 549 | = 0 normal value. |
| JovanEps | 11:4e700bbf93d7 | 550 | = k if u[k][k] .eq. 0.0 . this is not an error |
| JovanEps | 11:4e700bbf93d7 | 551 | condition for this subroutine, but it does |
| JovanEps | 11:4e700bbf93d7 | 552 | indicate that dgesl or dgedi will divide by zero |
| JovanEps | 11:4e700bbf93d7 | 553 | if called. use rcond in dgeco for a reliable |
| JovanEps | 11:4e700bbf93d7 | 554 | indication of singularity. |
| JovanEps | 11:4e700bbf93d7 | 555 | |
| JovanEps | 11:4e700bbf93d7 | 556 | linpack. this version dated 08/14/78 . |
| JovanEps | 11:4e700bbf93d7 | 557 | cleve moler, university of new mexico, argonne national lab. |
| JovanEps | 11:4e700bbf93d7 | 558 | |
| JovanEps | 11:4e700bbf93d7 | 559 | functions |
| JovanEps | 11:4e700bbf93d7 | 560 | |
| JovanEps | 11:4e700bbf93d7 | 561 | blas daxpy,dscal,idamax |
| JovanEps | 11:4e700bbf93d7 | 562 | */ |
| JovanEps | 11:4e700bbf93d7 | 563 | |
| JovanEps | 11:4e700bbf93d7 | 564 | { |
| JovanEps | 11:4e700bbf93d7 | 565 | /* internal variables */ |
| JovanEps | 11:4e700bbf93d7 | 566 | |
| JovanEps | 11:4e700bbf93d7 | 567 | REAL t; |
| JovanEps | 11:4e700bbf93d7 | 568 | int j,k,kp1,l,nm1; |
| JovanEps | 11:4e700bbf93d7 | 569 | |
| JovanEps | 11:4e700bbf93d7 | 570 | |
| JovanEps | 11:4e700bbf93d7 | 571 | /* gaussian elimination with partial pivoting */ |
| JovanEps | 11:4e700bbf93d7 | 572 | |
| JovanEps | 11:4e700bbf93d7 | 573 | *info = 0; |
| JovanEps | 11:4e700bbf93d7 | 574 | nm1 = n - 1; |
| JovanEps | 11:4e700bbf93d7 | 575 | if (nm1 >= 0) { |
| JovanEps | 11:4e700bbf93d7 | 576 | for (k = 0; k < nm1; k++) { |
| JovanEps | 11:4e700bbf93d7 | 577 | kp1 = k + 1; |
| JovanEps | 11:4e700bbf93d7 | 578 | |
| JovanEps | 11:4e700bbf93d7 | 579 | /* find l = pivot index */ |
| JovanEps | 11:4e700bbf93d7 | 580 | |
| JovanEps | 11:4e700bbf93d7 | 581 | l = idamax(n-k,&a[lda*k+k],1) + k; |
| JovanEps | 11:4e700bbf93d7 | 582 | ipvt[k] = l; |
| JovanEps | 11:4e700bbf93d7 | 583 | |
| JovanEps | 11:4e700bbf93d7 | 584 | /* zero pivot implies this column already |
| JovanEps | 11:4e700bbf93d7 | 585 | triangularized */ |
| JovanEps | 11:4e700bbf93d7 | 586 | |
| JovanEps | 11:4e700bbf93d7 | 587 | if (a[lda*k+l] != ZERO) { |
| JovanEps | 11:4e700bbf93d7 | 588 | |
| JovanEps | 11:4e700bbf93d7 | 589 | /* interchange if necessary */ |
| JovanEps | 0:43b96e9650ef | 590 | |
| JovanEps | 11:4e700bbf93d7 | 591 | if (l != k) { |
| JovanEps | 11:4e700bbf93d7 | 592 | t = a[lda*k+l]; |
| JovanEps | 11:4e700bbf93d7 | 593 | a[lda*k+l] = a[lda*k+k]; |
| JovanEps | 11:4e700bbf93d7 | 594 | a[lda*k+k] = t; |
| JovanEps | 11:4e700bbf93d7 | 595 | } |
| JovanEps | 11:4e700bbf93d7 | 596 | |
| JovanEps | 11:4e700bbf93d7 | 597 | /* compute multipliers */ |
| JovanEps | 11:4e700bbf93d7 | 598 | |
| JovanEps | 11:4e700bbf93d7 | 599 | t = -ONE/a[lda*k+k]; |
| JovanEps | 11:4e700bbf93d7 | 600 | dscal(n-(k+1),t,&a[lda*k+k+1],1); |
| JovanEps | 11:4e700bbf93d7 | 601 | |
| JovanEps | 11:4e700bbf93d7 | 602 | /* row elimination with column indexing */ |
| JovanEps | 11:4e700bbf93d7 | 603 | |
| JovanEps | 11:4e700bbf93d7 | 604 | for (j = kp1; j < n; j++) { |
| JovanEps | 11:4e700bbf93d7 | 605 | t = a[lda*j+l]; |
| JovanEps | 11:4e700bbf93d7 | 606 | if (l != k) { |
| JovanEps | 11:4e700bbf93d7 | 607 | a[lda*j+l] = a[lda*j+k]; |
| JovanEps | 11:4e700bbf93d7 | 608 | a[lda*j+k] = t; |
| JovanEps | 11:4e700bbf93d7 | 609 | } |
| JovanEps | 11:4e700bbf93d7 | 610 | daxpy(n-(k+1),t,&a[lda*k+k+1],1, |
| JovanEps | 11:4e700bbf93d7 | 611 | &a[lda*j+k+1],1); |
| JovanEps | 11:4e700bbf93d7 | 612 | } |
| JovanEps | 11:4e700bbf93d7 | 613 | } else { |
| JovanEps | 11:4e700bbf93d7 | 614 | *info = k; |
| JovanEps | 11:4e700bbf93d7 | 615 | } |
| JovanEps | 11:4e700bbf93d7 | 616 | } |
| JovanEps | 11:4e700bbf93d7 | 617 | } |
| JovanEps | 11:4e700bbf93d7 | 618 | ipvt[n-1] = n-1; |
| JovanEps | 11:4e700bbf93d7 | 619 | if (a[lda*(n-1)+(n-1)] == ZERO) *info = n-1; |
| JovanEps | 11:4e700bbf93d7 | 620 | return; |
| JovanEps | 11:4e700bbf93d7 | 621 | } |
| JovanEps | 11:4e700bbf93d7 | 622 | |
| JovanEps | 11:4e700bbf93d7 | 623 | /*----------------------*/ |
| JovanEps | 11:4e700bbf93d7 | 624 | |
| JovanEps | 11:4e700bbf93d7 | 625 | void dgesl(REAL a[],int lda,int n,int ipvt[],REAL b[],int job ) |
| JovanEps | 11:4e700bbf93d7 | 626 | |
| JovanEps | 11:4e700bbf93d7 | 627 | |
| JovanEps | 11:4e700bbf93d7 | 628 | /* We would like to declare a[][lda], but c does not allow it. In this |
| JovanEps | 11:4e700bbf93d7 | 629 | function, references to a[i][j] are written a[lda*i+j]. */ |
| JovanEps | 11:4e700bbf93d7 | 630 | |
| JovanEps | 11:4e700bbf93d7 | 631 | /* |
| JovanEps | 11:4e700bbf93d7 | 632 | dgesl solves the double precision system |
| JovanEps | 11:4e700bbf93d7 | 633 | a * x = b or trans(a) * x = b |
| JovanEps | 11:4e700bbf93d7 | 634 | using the factors computed by dgeco or dgefa. |
| JovanEps | 11:4e700bbf93d7 | 635 | |
| JovanEps | 11:4e700bbf93d7 | 636 | on entry |
| JovanEps | 11:4e700bbf93d7 | 637 | |
| JovanEps | 11:4e700bbf93d7 | 638 | a double precision[n][lda] |
| JovanEps | 11:4e700bbf93d7 | 639 | the output from dgeco or dgefa. |
| JovanEps | 11:4e700bbf93d7 | 640 | |
| JovanEps | 11:4e700bbf93d7 | 641 | lda integer |
| JovanEps | 11:4e700bbf93d7 | 642 | the leading dimension of the array a . |
| JovanEps | 11:4e700bbf93d7 | 643 | |
| JovanEps | 11:4e700bbf93d7 | 644 | n integer |
| JovanEps | 11:4e700bbf93d7 | 645 | the order of the matrix a . |
| JovanEps | 11:4e700bbf93d7 | 646 | |
| JovanEps | 11:4e700bbf93d7 | 647 | ipvt integer[n] |
| JovanEps | 11:4e700bbf93d7 | 648 | the pivot vector from dgeco or dgefa. |
| JovanEps | 11:4e700bbf93d7 | 649 | |
| JovanEps | 11:4e700bbf93d7 | 650 | b double precision[n] |
| JovanEps | 11:4e700bbf93d7 | 651 | the right hand side vector. |
| JovanEps | 11:4e700bbf93d7 | 652 | |
| JovanEps | 11:4e700bbf93d7 | 653 | job integer |
| JovanEps | 11:4e700bbf93d7 | 654 | = 0 to solve a*x = b , |
| JovanEps | 11:4e700bbf93d7 | 655 | = nonzero to solve trans(a)*x = b where |
| JovanEps | 11:4e700bbf93d7 | 656 | trans(a) is the transpose. |
| JovanEps | 11:4e700bbf93d7 | 657 | |
| JovanEps | 11:4e700bbf93d7 | 658 | on return |
| JovanEps | 11:4e700bbf93d7 | 659 | |
| JovanEps | 11:4e700bbf93d7 | 660 | b the solution vector x . |
| JovanEps | 11:4e700bbf93d7 | 661 | |
| JovanEps | 11:4e700bbf93d7 | 662 | error condition |
| JovanEps | 11:4e700bbf93d7 | 663 | |
| JovanEps | 11:4e700bbf93d7 | 664 | a division by zero will occur if the input factor contains a |
| JovanEps | 11:4e700bbf93d7 | 665 | zero on the diagonal. technically this indicates singularity |
| JovanEps | 11:4e700bbf93d7 | 666 | but it is often caused by improper arguments or improper |
| JovanEps | 11:4e700bbf93d7 | 667 | setting of lda . it will not occur if the subroutines are |
| JovanEps | 11:4e700bbf93d7 | 668 | called correctly and if dgeco has set rcond .gt. 0.0 |
| JovanEps | 11:4e700bbf93d7 | 669 | or dgefa has set info .eq. 0 . |
| JovanEps | 11:4e700bbf93d7 | 670 | |
| JovanEps | 11:4e700bbf93d7 | 671 | to compute inverse(a) * c where c is a matrix |
| JovanEps | 11:4e700bbf93d7 | 672 | with p columns |
| JovanEps | 11:4e700bbf93d7 | 673 | dgeco(a,lda,n,ipvt,rcond,z) |
| JovanEps | 11:4e700bbf93d7 | 674 | if (!rcond is too small){ |
| JovanEps | 11:4e700bbf93d7 | 675 | for (j=0,j<p,j++) |
| JovanEps | 11:4e700bbf93d7 | 676 | dgesl(a,lda,n,ipvt,c[j][0],0); |
| JovanEps | 11:4e700bbf93d7 | 677 | } |
| JovanEps | 11:4e700bbf93d7 | 678 | |
| JovanEps | 11:4e700bbf93d7 | 679 | linpack. this version dated 08/14/78 . |
| JovanEps | 11:4e700bbf93d7 | 680 | cleve moler, university of new mexico, argonne national lab. |
| JovanEps | 11:4e700bbf93d7 | 681 | |
| JovanEps | 11:4e700bbf93d7 | 682 | functions |
| JovanEps | 11:4e700bbf93d7 | 683 | |
| JovanEps | 11:4e700bbf93d7 | 684 | blas daxpy,ddot |
| JovanEps | 11:4e700bbf93d7 | 685 | */ |
| JovanEps | 11:4e700bbf93d7 | 686 | { |
| JovanEps | 11:4e700bbf93d7 | 687 | /* internal variables */ |
| JovanEps | 11:4e700bbf93d7 | 688 | |
| JovanEps | 11:4e700bbf93d7 | 689 | REAL t; |
| JovanEps | 11:4e700bbf93d7 | 690 | int k,kb,l,nm1; |
| JovanEps | 11:4e700bbf93d7 | 691 | |
| JovanEps | 11:4e700bbf93d7 | 692 | nm1 = n - 1; |
| JovanEps | 11:4e700bbf93d7 | 693 | if (job == 0) { |
| JovanEps | 11:4e700bbf93d7 | 694 | |
| JovanEps | 11:4e700bbf93d7 | 695 | /* job = 0 , solve a * x = b |
| JovanEps | 11:4e700bbf93d7 | 696 | first solve l*y = b */ |
| JovanEps | 11:4e700bbf93d7 | 697 | |
| JovanEps | 11:4e700bbf93d7 | 698 | if (nm1 >= 1) { |
| JovanEps | 11:4e700bbf93d7 | 699 | for (k = 0; k < nm1; k++) { |
| JovanEps | 11:4e700bbf93d7 | 700 | l = ipvt[k]; |
| JovanEps | 11:4e700bbf93d7 | 701 | t = b[l]; |
| JovanEps | 11:4e700bbf93d7 | 702 | if (l != k) { |
| JovanEps | 11:4e700bbf93d7 | 703 | b[l] = b[k]; |
| JovanEps | 11:4e700bbf93d7 | 704 | b[k] = t; |
| JovanEps | 11:4e700bbf93d7 | 705 | } |
| JovanEps | 11:4e700bbf93d7 | 706 | daxpy(n-(k+1),t,&a[lda*k+k+1],1,&b[k+1],1 ); |
| JovanEps | 11:4e700bbf93d7 | 707 | } |
| JovanEps | 11:4e700bbf93d7 | 708 | } |
| JovanEps | 11:4e700bbf93d7 | 709 | |
| JovanEps | 11:4e700bbf93d7 | 710 | /* now solve u*x = y */ |
| JovanEps | 11:4e700bbf93d7 | 711 | |
| JovanEps | 11:4e700bbf93d7 | 712 | for (kb = 0; kb < n; kb++) { |
| JovanEps | 11:4e700bbf93d7 | 713 | k = n - (kb + 1); |
| JovanEps | 11:4e700bbf93d7 | 714 | b[k] = b[k]/a[lda*k+k]; |
| JovanEps | 11:4e700bbf93d7 | 715 | t = -b[k]; |
| JovanEps | 11:4e700bbf93d7 | 716 | daxpy(k,t,&a[lda*k+0],1,&b[0],1 ); |
| JovanEps | 11:4e700bbf93d7 | 717 | } |
| JovanEps | 11:4e700bbf93d7 | 718 | } else { |
| JovanEps | 11:4e700bbf93d7 | 719 | |
| JovanEps | 11:4e700bbf93d7 | 720 | /* job = nonzero, solve trans(a) * x = b |
| JovanEps | 11:4e700bbf93d7 | 721 | first solve trans(u)*y = b */ |
| JovanEps | 11:4e700bbf93d7 | 722 | |
| JovanEps | 11:4e700bbf93d7 | 723 | for (k = 0; k < n; k++) { |
| JovanEps | 11:4e700bbf93d7 | 724 | t = ddot(k,&a[lda*k+0],1,&b[0],1); |
| JovanEps | 11:4e700bbf93d7 | 725 | b[k] = (b[k] - t)/a[lda*k+k]; |
| JovanEps | 11:4e700bbf93d7 | 726 | } |
| JovanEps | 11:4e700bbf93d7 | 727 | |
| JovanEps | 11:4e700bbf93d7 | 728 | /* now solve trans(l)*x = y */ |
| JovanEps | 11:4e700bbf93d7 | 729 | |
| JovanEps | 11:4e700bbf93d7 | 730 | if (nm1 >= 1) { |
| JovanEps | 11:4e700bbf93d7 | 731 | for (kb = 1; kb < nm1; kb++) { |
| JovanEps | 11:4e700bbf93d7 | 732 | k = n - (kb+1); |
| JovanEps | 11:4e700bbf93d7 | 733 | b[k] = b[k] + ddot(n-(k+1),&a[lda*k+k+1],1,&b[k+1],1); |
| JovanEps | 11:4e700bbf93d7 | 734 | l = ipvt[k]; |
| JovanEps | 11:4e700bbf93d7 | 735 | if (l != k) { |
| JovanEps | 11:4e700bbf93d7 | 736 | t = b[l]; |
| JovanEps | 11:4e700bbf93d7 | 737 | b[l] = b[k]; |
| JovanEps | 11:4e700bbf93d7 | 738 | b[k] = t; |
| JovanEps | 11:4e700bbf93d7 | 739 | } |
| JovanEps | 11:4e700bbf93d7 | 740 | } |
| JovanEps | 11:4e700bbf93d7 | 741 | } |
| JovanEps | 11:4e700bbf93d7 | 742 | } |
| JovanEps | 11:4e700bbf93d7 | 743 | return; |
| JovanEps | 11:4e700bbf93d7 | 744 | } |
| JovanEps | 11:4e700bbf93d7 | 745 | |
| JovanEps | 11:4e700bbf93d7 | 746 | /*----------------------*/ |
| JovanEps | 11:4e700bbf93d7 | 747 | |
| JovanEps | 11:4e700bbf93d7 | 748 | void daxpy(int n, REAL da, REAL dx[], int incx, REAL dy[], int incy) |
| JovanEps | 11:4e700bbf93d7 | 749 | /* |
| JovanEps | 11:4e700bbf93d7 | 750 | constant times a vector plus a vector. |
| JovanEps | 11:4e700bbf93d7 | 751 | jack dongarra, linpack, 3/11/78. |
| JovanEps | 11:4e700bbf93d7 | 752 | */ |
| JovanEps | 11:4e700bbf93d7 | 753 | |
| JovanEps | 11:4e700bbf93d7 | 754 | { |
| JovanEps | 11:4e700bbf93d7 | 755 | int i,ix,iy,m,mp1; |
| JovanEps | 11:4e700bbf93d7 | 756 | |
| JovanEps | 11:4e700bbf93d7 | 757 | mp1 = 0; |
| JovanEps | 11:4e700bbf93d7 | 758 | m = 0; |
| JovanEps | 11:4e700bbf93d7 | 759 | |
| JovanEps | 11:4e700bbf93d7 | 760 | if(n <= 0) return; |
| JovanEps | 11:4e700bbf93d7 | 761 | if (da == ZERO) return; |
| JovanEps | 11:4e700bbf93d7 | 762 | |
| JovanEps | 11:4e700bbf93d7 | 763 | if(incx != 1 || incy != 1) { |
| JovanEps | 11:4e700bbf93d7 | 764 | |
| JovanEps | 11:4e700bbf93d7 | 765 | /* code for unequal increments or equal increments |
| JovanEps | 11:4e700bbf93d7 | 766 | not equal to 1 */ |
| JovanEps | 11:4e700bbf93d7 | 767 | |
| JovanEps | 11:4e700bbf93d7 | 768 | ix = 0; |
| JovanEps | 11:4e700bbf93d7 | 769 | iy = 0; |
| JovanEps | 11:4e700bbf93d7 | 770 | if(incx < 0) ix = (-n+1)*incx; |
| JovanEps | 11:4e700bbf93d7 | 771 | if(incy < 0)iy = (-n+1)*incy; |
| JovanEps | 11:4e700bbf93d7 | 772 | for (i = 0; i < n; i++) { |
| JovanEps | 11:4e700bbf93d7 | 773 | dy[iy] = dy[iy] + da*dx[ix]; |
| JovanEps | 11:4e700bbf93d7 | 774 | ix = ix + incx; |
| JovanEps | 11:4e700bbf93d7 | 775 | iy = iy + incy; |
| JovanEps | 11:4e700bbf93d7 | 776 | |
| JovanEps | 11:4e700bbf93d7 | 777 | } |
| JovanEps | 11:4e700bbf93d7 | 778 | return; |
| JovanEps | 11:4e700bbf93d7 | 779 | } |
| JovanEps | 11:4e700bbf93d7 | 780 | |
| JovanEps | 11:4e700bbf93d7 | 781 | /* code for both increments equal to 1 */ |
| JovanEps | 11:4e700bbf93d7 | 782 | |
| JovanEps | 11:4e700bbf93d7 | 783 | |
| JovanEps | 11:4e700bbf93d7 | 784 | #ifdef ROLL |
| JovanEps | 11:4e700bbf93d7 | 785 | |
| JovanEps | 11:4e700bbf93d7 | 786 | for (i = 0; i < n; i++) { |
| JovanEps | 11:4e700bbf93d7 | 787 | dy[i] = dy[i] + da*dx[i]; |
| JovanEps | 11:4e700bbf93d7 | 788 | } |
| JovanEps | 11:4e700bbf93d7 | 789 | |
| JovanEps | 11:4e700bbf93d7 | 790 | |
| JovanEps | 11:4e700bbf93d7 | 791 | #endif |
| JovanEps | 11:4e700bbf93d7 | 792 | |
| JovanEps | 11:4e700bbf93d7 | 793 | #ifdef UNROLL |
| JovanEps | 11:4e700bbf93d7 | 794 | |
| JovanEps | 11:4e700bbf93d7 | 795 | m = n % 4; |
| JovanEps | 11:4e700bbf93d7 | 796 | if ( m != 0) { |
| JovanEps | 11:4e700bbf93d7 | 797 | for (i = 0; i < m; i++) |
| JovanEps | 11:4e700bbf93d7 | 798 | dy[i] = dy[i] + da*dx[i]; |
| JovanEps | 11:4e700bbf93d7 | 799 | |
| JovanEps | 11:4e700bbf93d7 | 800 | if (n < 4) return; |
| JovanEps | 11:4e700bbf93d7 | 801 | } |
| JovanEps | 11:4e700bbf93d7 | 802 | for (i = m; i < n; i = i + 4) { |
| JovanEps | 11:4e700bbf93d7 | 803 | dy[i] = dy[i] + da*dx[i]; |
| JovanEps | 11:4e700bbf93d7 | 804 | dy[i+1] = dy[i+1] + da*dx[i+1]; |
| JovanEps | 11:4e700bbf93d7 | 805 | dy[i+2] = dy[i+2] + da*dx[i+2]; |
| JovanEps | 11:4e700bbf93d7 | 806 | dy[i+3] = dy[i+3] + da*dx[i+3]; |
| JovanEps | 11:4e700bbf93d7 | 807 | |
| JovanEps | 11:4e700bbf93d7 | 808 | } |
| JovanEps | 11:4e700bbf93d7 | 809 | |
| JovanEps | 11:4e700bbf93d7 | 810 | #endif |
| JovanEps | 11:4e700bbf93d7 | 811 | return; |
| JovanEps | 11:4e700bbf93d7 | 812 | } |
| JovanEps | 11:4e700bbf93d7 | 813 | |
| JovanEps | 11:4e700bbf93d7 | 814 | /*----------------------*/ |
| JovanEps | 11:4e700bbf93d7 | 815 | |
| JovanEps | 11:4e700bbf93d7 | 816 | REAL ddot(int n, REAL dx[], int incx, REAL dy[], int incy) |
| JovanEps | 11:4e700bbf93d7 | 817 | /* |
| JovanEps | 11:4e700bbf93d7 | 818 | forms the dot product of two vectors. |
| JovanEps | 11:4e700bbf93d7 | 819 | jack dongarra, linpack, 3/11/78. |
| JovanEps | 11:4e700bbf93d7 | 820 | */ |
| JovanEps | 11:4e700bbf93d7 | 821 | |
| JovanEps | 11:4e700bbf93d7 | 822 | { |
| JovanEps | 11:4e700bbf93d7 | 823 | REAL dtemp; |
| JovanEps | 11:4e700bbf93d7 | 824 | int i,ix,iy,m,mp1; |
| JovanEps | 11:4e700bbf93d7 | 825 | |
| JovanEps | 11:4e700bbf93d7 | 826 | mp1 = 0; |
| JovanEps | 11:4e700bbf93d7 | 827 | m = 0; |
| JovanEps | 11:4e700bbf93d7 | 828 | |
| JovanEps | 11:4e700bbf93d7 | 829 | dtemp = ZERO; |
| JovanEps | 11:4e700bbf93d7 | 830 | |
| JovanEps | 11:4e700bbf93d7 | 831 | if(n <= 0) return(ZERO); |
| JovanEps | 11:4e700bbf93d7 | 832 | |
| JovanEps | 11:4e700bbf93d7 | 833 | if(incx != 1 || incy != 1) { |
| JovanEps | 11:4e700bbf93d7 | 834 | |
| JovanEps | 11:4e700bbf93d7 | 835 | /* code for unequal increments or equal increments |
| JovanEps | 11:4e700bbf93d7 | 836 | not equal to 1 */ |
| JovanEps | 11:4e700bbf93d7 | 837 | |
| JovanEps | 11:4e700bbf93d7 | 838 | ix = 0; |
| JovanEps | 11:4e700bbf93d7 | 839 | iy = 0; |
| JovanEps | 11:4e700bbf93d7 | 840 | if (incx < 0) ix = (-n+1)*incx; |
| JovanEps | 11:4e700bbf93d7 | 841 | if (incy < 0) iy = (-n+1)*incy; |
| JovanEps | 11:4e700bbf93d7 | 842 | for (i = 0; i < n; i++) { |
| JovanEps | 11:4e700bbf93d7 | 843 | dtemp = dtemp + dx[ix]*dy[iy]; |
| JovanEps | 11:4e700bbf93d7 | 844 | ix = ix + incx; |
| JovanEps | 11:4e700bbf93d7 | 845 | iy = iy + incy; |
| JovanEps | 11:4e700bbf93d7 | 846 | |
| JovanEps | 11:4e700bbf93d7 | 847 | } |
| JovanEps | 11:4e700bbf93d7 | 848 | return(dtemp); |
| JovanEps | 11:4e700bbf93d7 | 849 | } |
| JovanEps | 11:4e700bbf93d7 | 850 | |
| JovanEps | 11:4e700bbf93d7 | 851 | /* code for both increments equal to 1 */ |
| JovanEps | 11:4e700bbf93d7 | 852 | |
| JovanEps | 11:4e700bbf93d7 | 853 | |
| JovanEps | 11:4e700bbf93d7 | 854 | #ifdef ROLL |
| JovanEps | 11:4e700bbf93d7 | 855 | |
| JovanEps | 11:4e700bbf93d7 | 856 | for (i=0; i < n; i++) |
| JovanEps | 11:4e700bbf93d7 | 857 | dtemp = dtemp + dx[i]*dy[i]; |
| JovanEps | 11:4e700bbf93d7 | 858 | |
| JovanEps | 11:4e700bbf93d7 | 859 | return(dtemp); |
| JovanEps | 11:4e700bbf93d7 | 860 | |
| JovanEps | 11:4e700bbf93d7 | 861 | #endif |
| JovanEps | 0:43b96e9650ef | 862 | |
| JovanEps | 11:4e700bbf93d7 | 863 | #ifdef UNROLL |
| JovanEps | 11:4e700bbf93d7 | 864 | |
| JovanEps | 11:4e700bbf93d7 | 865 | |
| JovanEps | 11:4e700bbf93d7 | 866 | m = n % 5; |
| JovanEps | 11:4e700bbf93d7 | 867 | if (m != 0) { |
| JovanEps | 11:4e700bbf93d7 | 868 | for (i = 0; i < m; i++) |
| JovanEps | 11:4e700bbf93d7 | 869 | dtemp = dtemp + dx[i]*dy[i]; |
| JovanEps | 11:4e700bbf93d7 | 870 | if (n < 5) return(dtemp); |
| JovanEps | 11:4e700bbf93d7 | 871 | } |
| JovanEps | 11:4e700bbf93d7 | 872 | for (i = m; i < n; i = i + 5) { |
| JovanEps | 11:4e700bbf93d7 | 873 | dtemp = dtemp + dx[i]*dy[i] + |
| JovanEps | 11:4e700bbf93d7 | 874 | dx[i+1]*dy[i+1] + dx[i+2]*dy[i+2] + |
| JovanEps | 11:4e700bbf93d7 | 875 | dx[i+3]*dy[i+3] + dx[i+4]*dy[i+4]; |
| JovanEps | 11:4e700bbf93d7 | 876 | } |
| JovanEps | 11:4e700bbf93d7 | 877 | return(dtemp); |
| JovanEps | 11:4e700bbf93d7 | 878 | |
| JovanEps | 11:4e700bbf93d7 | 879 | #endif |
| JovanEps | 11:4e700bbf93d7 | 880 | |
| JovanEps | 11:4e700bbf93d7 | 881 | } |
| JovanEps | 11:4e700bbf93d7 | 882 | |
| JovanEps | 11:4e700bbf93d7 | 883 | /*----------------------*/ |
| JovanEps | 11:4e700bbf93d7 | 884 | void dscal(int n, REAL da, REAL dx[], int incx) |
| JovanEps | 11:4e700bbf93d7 | 885 | |
| JovanEps | 11:4e700bbf93d7 | 886 | /* scales a vector by a constant. |
| JovanEps | 11:4e700bbf93d7 | 887 | jack dongarra, linpack, 3/11/78. |
| JovanEps | 11:4e700bbf93d7 | 888 | */ |
| JovanEps | 11:4e700bbf93d7 | 889 | |
| JovanEps | 11:4e700bbf93d7 | 890 | { |
| JovanEps | 11:4e700bbf93d7 | 891 | int i,m,mp1,nincx; |
| JovanEps | 11:4e700bbf93d7 | 892 | |
| JovanEps | 11:4e700bbf93d7 | 893 | mp1 = 0; |
| JovanEps | 11:4e700bbf93d7 | 894 | m = 0; |
| JovanEps | 11:4e700bbf93d7 | 895 | |
| JovanEps | 11:4e700bbf93d7 | 896 | if(n <= 0)return; |
| JovanEps | 11:4e700bbf93d7 | 897 | if(incx != 1) { |
| JovanEps | 11:4e700bbf93d7 | 898 | |
| JovanEps | 11:4e700bbf93d7 | 899 | /* code for increment not equal to 1 */ |
| JovanEps | 11:4e700bbf93d7 | 900 | |
| JovanEps | 11:4e700bbf93d7 | 901 | nincx = n*incx; |
| JovanEps | 11:4e700bbf93d7 | 902 | for (i = 0; i < nincx; i = i + incx) |
| JovanEps | 11:4e700bbf93d7 | 903 | dx[i] = da*dx[i]; |
| JovanEps | 11:4e700bbf93d7 | 904 | |
| JovanEps | 11:4e700bbf93d7 | 905 | return; |
| JovanEps | 11:4e700bbf93d7 | 906 | } |
| JovanEps | 11:4e700bbf93d7 | 907 | |
| JovanEps | 11:4e700bbf93d7 | 908 | /* code for increment equal to 1 */ |
| JovanEps | 11:4e700bbf93d7 | 909 | |
| JovanEps | 11:4e700bbf93d7 | 910 | |
| JovanEps | 11:4e700bbf93d7 | 911 | #ifdef ROLL |
| JovanEps | 11:4e700bbf93d7 | 912 | |
| JovanEps | 11:4e700bbf93d7 | 913 | for (i = 0; i < n; i++) |
| JovanEps | 11:4e700bbf93d7 | 914 | dx[i] = da*dx[i]; |
| JovanEps | 11:4e700bbf93d7 | 915 | |
| JovanEps | 11:4e700bbf93d7 | 916 | |
| JovanEps | 11:4e700bbf93d7 | 917 | #endif |
| JovanEps | 11:4e700bbf93d7 | 918 | |
| JovanEps | 11:4e700bbf93d7 | 919 | #ifdef UNROLL |
| JovanEps | 11:4e700bbf93d7 | 920 | |
| JovanEps | 11:4e700bbf93d7 | 921 | |
| JovanEps | 11:4e700bbf93d7 | 922 | m = n % 5; |
| JovanEps | 11:4e700bbf93d7 | 923 | if (m != 0) { |
| JovanEps | 11:4e700bbf93d7 | 924 | for (i = 0; i < m; i++) |
| JovanEps | 11:4e700bbf93d7 | 925 | dx[i] = da*dx[i]; |
| JovanEps | 11:4e700bbf93d7 | 926 | if (n < 5) return; |
| JovanEps | 11:4e700bbf93d7 | 927 | } |
| JovanEps | 11:4e700bbf93d7 | 928 | for (i = m; i < n; i = i + 5) { |
| JovanEps | 11:4e700bbf93d7 | 929 | dx[i] = da*dx[i]; |
| JovanEps | 11:4e700bbf93d7 | 930 | dx[i+1] = da*dx[i+1]; |
| JovanEps | 11:4e700bbf93d7 | 931 | dx[i+2] = da*dx[i+2]; |
| JovanEps | 11:4e700bbf93d7 | 932 | dx[i+3] = da*dx[i+3]; |
| JovanEps | 11:4e700bbf93d7 | 933 | dx[i+4] = da*dx[i+4]; |
| JovanEps | 11:4e700bbf93d7 | 934 | } |
| JovanEps | 11:4e700bbf93d7 | 935 | |
| JovanEps | 11:4e700bbf93d7 | 936 | #endif |
| JovanEps | 11:4e700bbf93d7 | 937 | |
| JovanEps | 11:4e700bbf93d7 | 938 | } |
| JovanEps | 11:4e700bbf93d7 | 939 | |
| JovanEps | 11:4e700bbf93d7 | 940 | /*----------------------*/ |
| JovanEps | 11:4e700bbf93d7 | 941 | int idamax(int n, REAL dx[], int incx) |
| JovanEps | 11:4e700bbf93d7 | 942 | |
| JovanEps | 11:4e700bbf93d7 | 943 | /* |
| JovanEps | 11:4e700bbf93d7 | 944 | finds the index of element having max. absolute value. |
| JovanEps | 11:4e700bbf93d7 | 945 | jack dongarra, linpack, 3/11/78. |
| JovanEps | 11:4e700bbf93d7 | 946 | */ |
| JovanEps | 11:4e700bbf93d7 | 947 | |
| JovanEps | 11:4e700bbf93d7 | 948 | |
| JovanEps | 11:4e700bbf93d7 | 949 | { |
| JovanEps | 11:4e700bbf93d7 | 950 | REAL dmax; |
| JovanEps | 11:4e700bbf93d7 | 951 | int i, ix, itemp; |
| JovanEps | 11:4e700bbf93d7 | 952 | |
| JovanEps | 11:4e700bbf93d7 | 953 | if( n < 1 ) return(-1); |
| JovanEps | 11:4e700bbf93d7 | 954 | if(n ==1 ) return(0); |
| JovanEps | 11:4e700bbf93d7 | 955 | if(incx != 1) { |
| JovanEps | 11:4e700bbf93d7 | 956 | |
| JovanEps | 11:4e700bbf93d7 | 957 | /* code for increment not equal to 1 */ |
| JovanEps | 11:4e700bbf93d7 | 958 | |
| JovanEps | 11:4e700bbf93d7 | 959 | ix = 1; |
| JovanEps | 11:4e700bbf93d7 | 960 | dmax = fabs((double)dx[0]); |
| JovanEps | 11:4e700bbf93d7 | 961 | ix = ix + incx; |
| JovanEps | 11:4e700bbf93d7 | 962 | for (i = 1; i < n; i++) { |
| JovanEps | 11:4e700bbf93d7 | 963 | if(fabs((double)dx[ix]) > dmax) { |
| JovanEps | 11:4e700bbf93d7 | 964 | itemp = i; |
| JovanEps | 11:4e700bbf93d7 | 965 | dmax = fabs((double)dx[ix]); |
| JovanEps | 11:4e700bbf93d7 | 966 | } |
| JovanEps | 11:4e700bbf93d7 | 967 | ix = ix + incx; |
| JovanEps | 11:4e700bbf93d7 | 968 | } |
| JovanEps | 11:4e700bbf93d7 | 969 | } else { |
| JovanEps | 11:4e700bbf93d7 | 970 | |
| JovanEps | 11:4e700bbf93d7 | 971 | /* code for increment equal to 1 */ |
| JovanEps | 11:4e700bbf93d7 | 972 | |
| JovanEps | 11:4e700bbf93d7 | 973 | itemp = 0; |
| JovanEps | 11:4e700bbf93d7 | 974 | dmax = fabs((double)dx[0]); |
| JovanEps | 11:4e700bbf93d7 | 975 | for (i = 1; i < n; i++) { |
| JovanEps | 11:4e700bbf93d7 | 976 | if(fabs((double)dx[i]) > dmax) { |
| JovanEps | 11:4e700bbf93d7 | 977 | itemp = i; |
| JovanEps | 11:4e700bbf93d7 | 978 | dmax = fabs((double)dx[i]); |
| JovanEps | 11:4e700bbf93d7 | 979 | } |
| JovanEps | 11:4e700bbf93d7 | 980 | } |
| JovanEps | 11:4e700bbf93d7 | 981 | } |
| JovanEps | 11:4e700bbf93d7 | 982 | return (itemp); |
| JovanEps | 11:4e700bbf93d7 | 983 | } |
| JovanEps | 11:4e700bbf93d7 | 984 | |
| JovanEps | 11:4e700bbf93d7 | 985 | /*----------------------*/ |
| JovanEps | 11:4e700bbf93d7 | 986 | REAL epslon (REAL x) |
| JovanEps | 11:4e700bbf93d7 | 987 | |
| JovanEps | 11:4e700bbf93d7 | 988 | /* |
| JovanEps | 11:4e700bbf93d7 | 989 | estimate unit roundoff in quantities of size x. |
| JovanEps | 11:4e700bbf93d7 | 990 | */ |
| JovanEps | 0:43b96e9650ef | 991 | |
| JovanEps | 11:4e700bbf93d7 | 992 | { |
| JovanEps | 11:4e700bbf93d7 | 993 | REAL a,b,c,eps; |
| JovanEps | 11:4e700bbf93d7 | 994 | /* |
| JovanEps | 11:4e700bbf93d7 | 995 | this program should function properly on all systems |
| JovanEps | 11:4e700bbf93d7 | 996 | satisfying the following two assumptions, |
| JovanEps | 11:4e700bbf93d7 | 997 | 1. the base used in representing dfloating point |
| JovanEps | 11:4e700bbf93d7 | 998 | numbers is not a power of three. |
| JovanEps | 11:4e700bbf93d7 | 999 | 2. the quantity a in statement 10 is represented to |
| JovanEps | 11:4e700bbf93d7 | 1000 | the accuracy used in dfloating point variables |
| JovanEps | 11:4e700bbf93d7 | 1001 | that are stored in memory. |
| JovanEps | 11:4e700bbf93d7 | 1002 | the statement number 10 and the go to 10 are intended to |
| JovanEps | 11:4e700bbf93d7 | 1003 | force optimizing compilers to generate code satisfying |
| JovanEps | 11:4e700bbf93d7 | 1004 | assumption 2. |
| JovanEps | 11:4e700bbf93d7 | 1005 | under these assumptions, it should be true that, |
| JovanEps | 11:4e700bbf93d7 | 1006 | a is not exactly equal to four-thirds, |
| JovanEps | 11:4e700bbf93d7 | 1007 | b has a zero for its last bit or digit, |
| JovanEps | 11:4e700bbf93d7 | 1008 | c is not exactly equal to one, |
| JovanEps | 11:4e700bbf93d7 | 1009 | eps measures the separation of 1.0 from |
| JovanEps | 11:4e700bbf93d7 | 1010 | the next larger dfloating point number. |
| JovanEps | 11:4e700bbf93d7 | 1011 | the developers of eispack would appreciate being informed |
| JovanEps | 11:4e700bbf93d7 | 1012 | about any systems where these assumptions do not hold. |
| JovanEps | 11:4e700bbf93d7 | 1013 | |
| JovanEps | 11:4e700bbf93d7 | 1014 | ***************************************************************** |
| JovanEps | 11:4e700bbf93d7 | 1015 | this routine is one of the auxiliary routines used by eispack iii |
| JovanEps | 11:4e700bbf93d7 | 1016 | to avoid machine dependencies. |
| JovanEps | 11:4e700bbf93d7 | 1017 | ***************************************************************** |
| JovanEps | 11:4e700bbf93d7 | 1018 | |
| JovanEps | 11:4e700bbf93d7 | 1019 | this version dated 4/6/83. |
| JovanEps | 11:4e700bbf93d7 | 1020 | */ |
| JovanEps | 11:4e700bbf93d7 | 1021 | |
| JovanEps | 11:4e700bbf93d7 | 1022 | a = 4.0e0/3.0e0; |
| JovanEps | 11:4e700bbf93d7 | 1023 | eps = ZERO; |
| JovanEps | 11:4e700bbf93d7 | 1024 | while (eps == ZERO) { |
| JovanEps | 11:4e700bbf93d7 | 1025 | b = a - ONE; |
| JovanEps | 11:4e700bbf93d7 | 1026 | c = b + b + b; |
| JovanEps | 11:4e700bbf93d7 | 1027 | eps = fabs((double)(c-ONE)); |
| JovanEps | 11:4e700bbf93d7 | 1028 | } |
| JovanEps | 11:4e700bbf93d7 | 1029 | return(eps*fabs((double)x)); |
| JovanEps | 11:4e700bbf93d7 | 1030 | } |
| JovanEps | 11:4e700bbf93d7 | 1031 | |
| JovanEps | 11:4e700bbf93d7 | 1032 | /*----------------------*/ |
| JovanEps | 11:4e700bbf93d7 | 1033 | void dmxpy (int n1, REAL y[], int n2, int ldm, REAL x[], REAL m[]) |
| JovanEps | 11:4e700bbf93d7 | 1034 | |
| JovanEps | 11:4e700bbf93d7 | 1035 | |
| JovanEps | 11:4e700bbf93d7 | 1036 | /* We would like to declare m[][ldm], but c does not allow it. In this |
| JovanEps | 11:4e700bbf93d7 | 1037 | function, references to m[i][j] are written m[ldm*i+j]. */ |
| JovanEps | 11:4e700bbf93d7 | 1038 | |
| JovanEps | 11:4e700bbf93d7 | 1039 | /* |
| JovanEps | 11:4e700bbf93d7 | 1040 | purpose: |
| JovanEps | 11:4e700bbf93d7 | 1041 | multiply matrix m times vector x and add the result to vector y. |
| JovanEps | 11:4e700bbf93d7 | 1042 | |
| JovanEps | 11:4e700bbf93d7 | 1043 | parameters: |
| JovanEps | 11:4e700bbf93d7 | 1044 | |
| JovanEps | 11:4e700bbf93d7 | 1045 | n1 integer, number of elements in vector y, and number of rows in |
| JovanEps | 11:4e700bbf93d7 | 1046 | matrix m |
| JovanEps | 11:4e700bbf93d7 | 1047 | |
| JovanEps | 11:4e700bbf93d7 | 1048 | y double [n1], vector of length n1 to which is added |
| JovanEps | 11:4e700bbf93d7 | 1049 | the product m*x |
| JovanEps | 11:4e700bbf93d7 | 1050 | |
| JovanEps | 11:4e700bbf93d7 | 1051 | n2 integer, number of elements in vector x, and number of columns |
| JovanEps | 11:4e700bbf93d7 | 1052 | in matrix m |
| JovanEps | 11:4e700bbf93d7 | 1053 | |
| JovanEps | 11:4e700bbf93d7 | 1054 | ldm integer, leading dimension of array m |
| JovanEps | 11:4e700bbf93d7 | 1055 | |
| JovanEps | 11:4e700bbf93d7 | 1056 | x double [n2], vector of length n2 |
| JovanEps | 11:4e700bbf93d7 | 1057 | |
| JovanEps | 11:4e700bbf93d7 | 1058 | m double [ldm][n2], matrix of n1 rows and n2 columns |
| JovanEps | 0:43b96e9650ef | 1059 | |
| JovanEps | 11:4e700bbf93d7 | 1060 | ---------------------------------------------------------------------- |
| JovanEps | 11:4e700bbf93d7 | 1061 | */ |
| JovanEps | 11:4e700bbf93d7 | 1062 | { |
| JovanEps | 11:4e700bbf93d7 | 1063 | int j,i,jmin; |
| JovanEps | 11:4e700bbf93d7 | 1064 | /* cleanup odd vector */ |
| JovanEps | 11:4e700bbf93d7 | 1065 | |
| JovanEps | 11:4e700bbf93d7 | 1066 | j = n2 % 2; |
| JovanEps | 11:4e700bbf93d7 | 1067 | if (j >= 1) { |
| JovanEps | 11:4e700bbf93d7 | 1068 | j = j - 1; |
| JovanEps | 11:4e700bbf93d7 | 1069 | for (i = 0; i < n1; i++) |
| JovanEps | 11:4e700bbf93d7 | 1070 | y[i] = (y[i]) + x[j]*m[ldm*j+i]; |
| JovanEps | 11:4e700bbf93d7 | 1071 | } |
| JovanEps | 11:4e700bbf93d7 | 1072 | |
| JovanEps | 11:4e700bbf93d7 | 1073 | /* cleanup odd group of two vectors */ |
| JovanEps | 11:4e700bbf93d7 | 1074 | |
| JovanEps | 11:4e700bbf93d7 | 1075 | j = n2 % 4; |
| JovanEps | 11:4e700bbf93d7 | 1076 | if (j >= 2) { |
| JovanEps | 11:4e700bbf93d7 | 1077 | j = j - 1; |
| JovanEps | 11:4e700bbf93d7 | 1078 | for (i = 0; i < n1; i++) |
| JovanEps | 11:4e700bbf93d7 | 1079 | y[i] = ( (y[i]) |
| JovanEps | 11:4e700bbf93d7 | 1080 | + x[j-1]*m[ldm*(j-1)+i]) + x[j]*m[ldm*j+i]; |
| JovanEps | 11:4e700bbf93d7 | 1081 | } |
| JovanEps | 11:4e700bbf93d7 | 1082 | |
| JovanEps | 11:4e700bbf93d7 | 1083 | /* cleanup odd group of four vectors */ |
| JovanEps | 11:4e700bbf93d7 | 1084 | |
| JovanEps | 11:4e700bbf93d7 | 1085 | j = n2 % 8; |
| JovanEps | 11:4e700bbf93d7 | 1086 | if (j >= 4) { |
| JovanEps | 11:4e700bbf93d7 | 1087 | j = j - 1; |
| JovanEps | 11:4e700bbf93d7 | 1088 | for (i = 0; i < n1; i++) |
| JovanEps | 11:4e700bbf93d7 | 1089 | y[i] = ((( (y[i]) |
| JovanEps | 11:4e700bbf93d7 | 1090 | + x[j-3]*m[ldm*(j-3)+i]) |
| JovanEps | 11:4e700bbf93d7 | 1091 | + x[j-2]*m[ldm*(j-2)+i]) |
| JovanEps | 11:4e700bbf93d7 | 1092 | + x[j-1]*m[ldm*(j-1)+i]) + x[j]*m[ldm*j+i]; |
| JovanEps | 11:4e700bbf93d7 | 1093 | } |
| JovanEps | 11:4e700bbf93d7 | 1094 | |
| JovanEps | 11:4e700bbf93d7 | 1095 | /* cleanup odd group of eight vectors */ |
| JovanEps | 0:43b96e9650ef | 1096 | |
| JovanEps | 11:4e700bbf93d7 | 1097 | j = n2 % 16; |
| JovanEps | 11:4e700bbf93d7 | 1098 | if (j >= 8) { |
| JovanEps | 11:4e700bbf93d7 | 1099 | j = j - 1; |
| JovanEps | 11:4e700bbf93d7 | 1100 | for (i = 0; i < n1; i++) |
| JovanEps | 11:4e700bbf93d7 | 1101 | y[i] = ((((((( (y[i]) |
| JovanEps | 11:4e700bbf93d7 | 1102 | + x[j-7]*m[ldm*(j-7)+i]) + x[j-6]*m[ldm*(j-6)+i]) |
| JovanEps | 11:4e700bbf93d7 | 1103 | + x[j-5]*m[ldm*(j-5)+i]) + x[j-4]*m[ldm*(j-4)+i]) |
| JovanEps | 11:4e700bbf93d7 | 1104 | + x[j-3]*m[ldm*(j-3)+i]) + x[j-2]*m[ldm*(j-2)+i]) |
| JovanEps | 11:4e700bbf93d7 | 1105 | + x[j-1]*m[ldm*(j-1)+i]) + x[j] *m[ldm*j+i]; |
| JovanEps | 11:4e700bbf93d7 | 1106 | } |
| JovanEps | 11:4e700bbf93d7 | 1107 | |
| JovanEps | 11:4e700bbf93d7 | 1108 | /* main loop - groups of sixteen vectors */ |
| JovanEps | 11:4e700bbf93d7 | 1109 | |
| JovanEps | 11:4e700bbf93d7 | 1110 | jmin = (n2%16)+16; |
| JovanEps | 11:4e700bbf93d7 | 1111 | for (j = jmin-1; j < n2; j = j + 16) { |
| JovanEps | 11:4e700bbf93d7 | 1112 | for (i = 0; i < n1; i++) |
| JovanEps | 11:4e700bbf93d7 | 1113 | y[i] = ((((((((((((((( (y[i]) |
| JovanEps | 11:4e700bbf93d7 | 1114 | + x[j-15]*m[ldm*(j-15)+i]) |
| JovanEps | 11:4e700bbf93d7 | 1115 | + x[j-14]*m[ldm*(j-14)+i]) |
| JovanEps | 11:4e700bbf93d7 | 1116 | + x[j-13]*m[ldm*(j-13)+i]) |
| JovanEps | 11:4e700bbf93d7 | 1117 | + x[j-12]*m[ldm*(j-12)+i]) |
| JovanEps | 11:4e700bbf93d7 | 1118 | + x[j-11]*m[ldm*(j-11)+i]) |
| JovanEps | 11:4e700bbf93d7 | 1119 | + x[j-10]*m[ldm*(j-10)+i]) |
| JovanEps | 11:4e700bbf93d7 | 1120 | + x[j- 9]*m[ldm*(j- 9)+i]) |
| JovanEps | 11:4e700bbf93d7 | 1121 | + x[j- 8]*m[ldm*(j- 8)+i]) |
| JovanEps | 11:4e700bbf93d7 | 1122 | + x[j- 7]*m[ldm*(j- 7)+i]) |
| JovanEps | 11:4e700bbf93d7 | 1123 | + x[j- 6]*m[ldm*(j- 6)+i]) |
| JovanEps | 11:4e700bbf93d7 | 1124 | + x[j- 5]*m[ldm*(j- 5)+i]) |
| JovanEps | 11:4e700bbf93d7 | 1125 | + x[j- 4]*m[ldm*(j- 4)+i]) |
| JovanEps | 11:4e700bbf93d7 | 1126 | + x[j- 3]*m[ldm*(j- 3)+i]) |
| JovanEps | 11:4e700bbf93d7 | 1127 | + x[j- 2]*m[ldm*(j- 2)+i]) |
| JovanEps | 11:4e700bbf93d7 | 1128 | + x[j- 1]*m[ldm*(j- 1)+i]) |
| JovanEps | 11:4e700bbf93d7 | 1129 | + x[j] *m[ldm*j+i]; |
| JovanEps | 11:4e700bbf93d7 | 1130 | } |
| JovanEps | 11:4e700bbf93d7 | 1131 | return; |
| JovanEps | 11:4e700bbf93d7 | 1132 | } |
| JovanEps | 11:4e700bbf93d7 | 1133 | //*---------------------------------------- |
| JovanEps | 2:273153e44338 | 1134 | int main() |
| JovanEps | 0:43b96e9650ef | 1135 | { |
| JovanEps | 11:4e700bbf93d7 | 1136 | pc.baud(9600); |
| JovanEps | 6:5e0f3eedaf66 | 1137 | while(1) { |
| JovanEps | 6:5e0f3eedaf66 | 1138 | |
| JovanEps | 2:273153e44338 | 1139 | pc.printf("Starting benchmark...\n"); |
| JovanEps | 6:5e0f3eedaf66 | 1140 | |
| JovanEps | 2:273153e44338 | 1141 | do_benchmark(); |
| JovanEps | 6:5e0f3eedaf66 | 1142 | |
| JovanEps | 2:273153e44338 | 1143 | pc.printf(" kraj \n\n"); |
| JovanEps | 2:273153e44338 | 1144 | } |
| JovanEps | 2:273153e44338 | 1145 | } |