Whetstone performance benchmark to compare the speed of my new Nucleo STM32 F401RE board to the Arduino. To summarize, while my Arduino Leonardo knockoff running at 16 Mhz runs through the 1,000 loops in 82 seconds for 1.21MIPS the Nucleo takes only 30-31 seconds and yields 3.2-3.3 MIPS. I would have expected better relative performance from a 32 bit ARM M4 processor over an 8 bit AVR. Maybe it will get better once the FPU is enabled in the online compiler.
Dependencies: ST_401_84MHZ mbed
whetstone.cpp
00001 #include "benchmark.h" 00002 #include "mbed.h" 00003 Serial pc(SERIAL_TX, SERIAL_RX); 00004 00005 00006 //Source: http://www.netlib.org/benchmark/whetstonec 00007 /* 00008 * C Converted Whetstone Double Precision Benchmark 00009 * Version 1.2 22 March 1998 00010 * 00011 * (c) Copyright 1998 Painter Engineering, Inc. 00012 * All Rights Reserved. 00013 * 00014 * Permission is granted to use, duplicate, and 00015 * publish this text and program as long as it 00016 * includes this entire comment block and limited 00017 * rights reference. 00018 * 00019 * Converted by Rich Painter, Painter Engineering, Inc. based on the 00020 * www.netlib.org benchmark/whetstoned version obtained 16 March 1998. 00021 * 00022 * A novel approach was used here to keep the look and feel of the 00023 * FORTRAN version. Altering the FORTRAN-based array indices, 00024 * starting at element 1, to start at element 0 for C, would require 00025 * numerous changes, including decrementing the variable indices by 1. 00026 * Instead, the array E1[] was declared 1 element larger in C. This 00027 * allows the FORTRAN index range to function without any literal or 00028 * variable indices changes. The array element E1[0] is simply never 00029 * used and does not alter the benchmark results. 00030 * 00031 * The major FORTRAN comment blocks were retained to minimize 00032 * differences between versions. Modules N5 and N12, like in the 00033 * FORTRAN version, have been eliminated here. 00034 * 00035 * An optional command-line argument has been provided [-c] to 00036 * offer continuous repetition of the entire benchmark. 00037 * An optional argument for setting an alternate LOOP count is also 00038 * provided. Define PRINTOUT to cause the POUT() function to print 00039 * outputs at various stages. Final timing measurements should be 00040 * made with the PRINTOUT undefined. 00041 * 00042 * Questions and comments may be directed to the author at 00043 * r.painter@ieee.org 00044 */ 00045 /* 00046 C********************************************************************** 00047 C Benchmark #2 -- Double Precision Whetstone (A001) 00048 C 00049 C o This is a REAL*8 version of 00050 C the Whetstone benchmark program. 00051 C 00052 C o DO-loop semantics are ANSI-66 compatible. 00053 C 00054 C o Final measurements are to be made with all 00055 C WRITE statements and FORMAT sttements removed. 00056 C 00057 C********************************************************************** 00058 */ 00059 00060 00061 00062 #include <stdlib.h> 00063 #include <stdio.h> 00064 #include <string.h> 00065 #include <math.h> 00066 /* the following is optional depending on the timing function used */ 00067 #include <time.h> 00068 00069 /* map the FORTRAN math functions, etc. to the C versions */ 00070 #define DSIN sin 00071 #define DCOS cos 00072 #define DATAN atan 00073 #define DLOG log 00074 #define DEXP exp 00075 #define DSQRT sqrt 00076 #define IF if 00077 00078 /* function prototypes */ 00079 void POUT(long N, long J, long K, double X1, double X2, double X3, double X4); 00080 void PA(double E[]); 00081 void P0(void); 00082 void P3(double X, double Y, double *Z); 00083 #define USAGE "usage: whetdc [-c] [loops]\n" 00084 00085 /* 00086 COMMON T,T1,T2,E1(4),J,K,L 00087 */ 00088 double T,T1,T2,E1[5]; 00089 int J,K,L; 00090 00091 int argc=0;//Mod for nucleo. Change in code below if you want non-default loop count 00092 00093 00094 int 00095 whetstone(int argc) 00096 { 00097 pc.printf("Beginning Whetstone benchmark at "); 00098 if(argc==0) 00099 pc.printf("default speed ...\n"); 00100 else 00101 pc.printf("84 MHz ...\n"); 00102 /* used in the FORTRAN version */ 00103 long I; 00104 long N1, N2, N3, N4, N6, N7, N8, N9, N10, N11; 00105 double X1,X2,X3,X4,X,Y,Z; 00106 long LOOP; 00107 int II, JJ; 00108 00109 /* added for this version */ 00110 long loopstart; 00111 long startsec, finisec; 00112 float KIPS; 00113 int continuous; 00114 00115 loopstart = 1000; /* see the note about LOOP below */ 00116 continuous = 0; 00117 00118 II = 1; /* start at the first arg (temp use of II here) */ 00119 /* while (II < argc) { 00120 if (strncmp(argv[II], "-c", 2) == 0 || argv[II][0] == 'c') { 00121 continuous = 1; 00122 } else if (atol(argv[II]) > 0) { 00123 loopstart = atol(argv[II]); 00124 } else { 00125 // fprintf(stderr, USAGE);//original code 00126 fprintf(stderr, USAGE);//not output toSTM32 version 00127 return(1); 00128 } 00129 II++; 00130 }*/ 00131 00132 LCONT: 00133 /* 00134 C 00135 C Start benchmark timing at this point. 00136 C 00137 */ 00138 startsec = time(0); 00139 00140 /* 00141 C 00142 C The actual benchmark starts here. 00143 C 00144 */ 00145 T = .499975; 00146 T1 = 0.50025; 00147 T2 = 2.0; 00148 /* 00149 C 00150 C With loopcount LOOP=10, one million Whetstone instructions 00151 C will be executed in EACH MAJOR LOOP..A MAJOR LOOP IS EXECUTED 00152 C 'II' TIMES TO INCREASE WALL-CLOCK TIMING ACCURACY. 00153 C 00154 LOOP = 1000; 00155 */ 00156 LOOP = loopstart; 00157 II = 1; 00158 00159 JJ = 1; 00160 00161 IILOOP: 00162 N1 = 0; 00163 N2 = 12 * LOOP; 00164 N3 = 14 * LOOP; 00165 N4 = 345 * LOOP; 00166 N6 = 210 * LOOP; 00167 N7 = 32 * LOOP; 00168 N8 = 899 * LOOP; 00169 N9 = 616 * LOOP; 00170 N10 = 0; 00171 N11 = 93 * LOOP; 00172 /* 00173 C 00174 C Module 1: Simple identifiers 00175 C 00176 */ 00177 X1 = 1.0; 00178 X2 = -1.0; 00179 X3 = -1.0; 00180 X4 = -1.0; 00181 00182 for (I = 1; I <= N1; I++) { 00183 X1 = (X1 + X2 + X3 - X4) * T; 00184 X2 = (X1 + X2 - X3 + X4) * T; 00185 X3 = (X1 - X2 + X3 + X4) * T; 00186 X4 = (-X1+ X2 + X3 + X4) * T; 00187 } 00188 #ifdef PRINTOUT 00189 IF (JJ==II)POUT(N1,N1,N1,X1,X2,X3,X4); 00190 #endif 00191 00192 /* 00193 C 00194 C Module 2: Array elements 00195 C 00196 */ 00197 E1[1] = 1.0; 00198 E1[2] = -1.0; 00199 E1[3] = -1.0; 00200 E1[4] = -1.0; 00201 00202 for (I = 1; I <= N2; I++) { 00203 E1[1] = ( E1[1] + E1[2] + E1[3] - E1[4]) * T; 00204 E1[2] = ( E1[1] + E1[2] - E1[3] + E1[4]) * T; 00205 E1[3] = ( E1[1] - E1[2] + E1[3] + E1[4]) * T; 00206 E1[4] = (-E1[1] + E1[2] + E1[3] + E1[4]) * T; 00207 } 00208 00209 #ifdef PRINTOUT 00210 IF (JJ==II)POUT(N2,N3,N2,E1[1],E1[2],E1[3],E1[4]); 00211 #endif 00212 00213 /* 00214 C 00215 C Module 3: Array as parameter 00216 C 00217 */ 00218 for (I = 1; I <= N3; I++) 00219 PA(E1); 00220 00221 #ifdef PRINTOUT 00222 IF (JJ==II)POUT(N3,N2,N2,E1[1],E1[2],E1[3],E1[4]); 00223 #endif 00224 00225 /* 00226 C 00227 C Module 4: Conditional jumps 00228 C 00229 */ 00230 J = 1; 00231 for (I = 1; I <= N4; I++) { 00232 if (J == 1) 00233 J = 2; 00234 else 00235 J = 3; 00236 00237 if (J > 2) 00238 J = 0; 00239 else 00240 J = 1; 00241 00242 if (J < 1) 00243 J = 1; 00244 else 00245 J = 0; 00246 } 00247 00248 #ifdef PRINTOUT 00249 IF (JJ==II)POUT(N4,J,J,X1,X2,X3,X4); 00250 #endif 00251 00252 /* 00253 C 00254 C Module 5: Omitted 00255 C Module 6: Integer arithmetic 00256 C 00257 */ 00258 00259 J = 1; 00260 K = 2; 00261 L = 3; 00262 00263 for (I = 1; I <= N6; I++) { 00264 J = J * (K-J) * (L-K); 00265 K = L * K - (L-J) * K; 00266 L = (L-K) * (K+J); 00267 E1[L-1] = J + K + L; 00268 E1[K-1] = J * K * L; 00269 } 00270 00271 #ifdef PRINTOUT 00272 IF (JJ==II)POUT(N6,J,K,E1[1],E1[2],E1[3],E1[4]); 00273 #endif 00274 00275 /* 00276 C 00277 C Module 7: Trigonometric functions 00278 C 00279 */ 00280 X = 0.5; 00281 Y = 0.5; 00282 00283 for (I = 1; I <= N7; I++) { 00284 X = T * DATAN(T2*DSIN(X)*DCOS(X)/(DCOS(X+Y)+DCOS(X-Y)-1.0)); 00285 Y = T * DATAN(T2*DSIN(Y)*DCOS(Y)/(DCOS(X+Y)+DCOS(X-Y)-1.0)); 00286 } 00287 00288 #ifdef PRINTOUT 00289 IF (JJ==II)POUT(N7,J,K,X,X,Y,Y); 00290 #endif 00291 00292 /* 00293 C 00294 C Module 8: Procedure calls 00295 C 00296 */ 00297 X = 1.0; 00298 Y = 1.0; 00299 Z = 1.0; 00300 00301 for (I = 1; I <= N8; I++) 00302 P3(X,Y,&Z); 00303 00304 #ifdef PRINTOUT 00305 IF (JJ==II)POUT(N8,J,K,X,Y,Z,Z); 00306 #endif 00307 00308 /* 00309 C 00310 C Module 9: Array references 00311 C 00312 */ 00313 J = 1; 00314 K = 2; 00315 L = 3; 00316 E1[1] = 1.0; 00317 E1[2] = 2.0; 00318 E1[3] = 3.0; 00319 00320 for (I = 1; I <= N9; I++) 00321 P0(); 00322 00323 #ifdef PRINTOUT 00324 IF (JJ==II)POUT(N9,J,K,E1[1],E1[2],E1[3],E1[4]); 00325 #endif 00326 00327 /* 00328 C 00329 C Module 10: Integer arithmetic 00330 C 00331 */ 00332 J = 2; 00333 K = 3; 00334 00335 for (I = 1; I <= N10; I++) { 00336 J = J + K; 00337 K = J + K; 00338 J = K - J; 00339 K = K - J - J; 00340 } 00341 00342 #ifdef PRINTOUT 00343 IF (JJ==II)POUT(N10,J,K,X1,X2,X3,X4); 00344 #endif 00345 00346 /* 00347 C 00348 C Module 11: Standard functions 00349 C 00350 */ 00351 X = 0.75; 00352 00353 for (I = 1; I <= N11; I++) 00354 X = DSQRT(DEXP(DLOG(X)/T1)); 00355 00356 #ifdef PRINTOUT 00357 IF (JJ==II)POUT(N11,J,K,X,X,X,X); 00358 #endif 00359 00360 /* 00361 C 00362 C THIS IS THE END OF THE MAJOR LOOP. 00363 C 00364 */ 00365 if (++JJ <= II) 00366 goto IILOOP; 00367 00368 /* 00369 C 00370 C Stop benchmark timing at this point. 00371 C 00372 */ 00373 finisec = time(0); 00374 00375 /* 00376 C---------------------------------------------------------------- 00377 C Performance in Whetstone KIP's per second is given by 00378 C 00379 C (100*LOOP*II)/TIME 00380 C 00381 C where TIME is in seconds. 00382 C-------------------------------------------------------------------- 00383 */ 00384 pc.printf("\n"); 00385 if (finisec-startsec <= 0) { 00386 pc.printf("Insufficient duration- Increase the LOOP count\n"); 00387 return(1); 00388 } 00389 00390 pc.printf("Loops: %ld, Iterations: %d, Duration: %ld sec.\n", 00391 LOOP, II, finisec-startsec); 00392 00393 KIPS = (100.0*LOOP*II)/(float)(finisec-startsec); 00394 if (KIPS >= 1000.0) 00395 pc.printf("C Converted Double Precision Whetstones: %.1f MIPS\n", KIPS/1000.0); 00396 else 00397 pc.printf("C Converted Double Precision Whetstones: %.1f KIPS\n", KIPS); 00398 00399 if (continuous) 00400 goto LCONT; 00401 00402 return(0); 00403 } 00404 00405 void 00406 PA(double E[]) 00407 { 00408 J = 0; 00409 00410 L10: 00411 E[1] = ( E[1] + E[2] + E[3] - E[4]) * T; 00412 E[2] = ( E[1] + E[2] - E[3] + E[4]) * T; 00413 E[3] = ( E[1] - E[2] + E[3] + E[4]) * T; 00414 E[4] = (-E[1] + E[2] + E[3] + E[4]) / T2; 00415 J += 1; 00416 00417 if (J < 6) 00418 goto L10; 00419 } 00420 00421 void 00422 P0(void) 00423 { 00424 E1[J] = E1[K]; 00425 E1[K] = E1[L]; 00426 E1[L] = E1[J]; 00427 } 00428 00429 void 00430 P3(double X, double Y, double *Z) 00431 { 00432 double X1, Y1; 00433 00434 X1 = X; 00435 Y1 = Y; 00436 X1 = T * (X1 + Y1); 00437 Y1 = T * (X1 + Y1); 00438 *Z = (X1 + Y1) / T2; 00439 } 00440 00441 #ifdef PRINTOUT 00442 void 00443 POUT(long N, long J, long K, double X1, double X2, double X3, double X4) 00444 { 00445 pc.printf("%7ld %7ld %7ld %12.4e %12.4e %12.4e %12.4e\n", 00446 N, J, K, X1, X2, X3, X4); 00447 } 00448 #endif 00449
Generated on Fri Jul 22 2022 13:15:22 by 1.7.2