Whetstone performance benchmark to compare the speed of my new Nucleo STM32 F401RE board to the Arduino. To summarize, while my Arduino Leonardo knockoff running at 16 Mhz runs through the 1,000 loops in 82 seconds for 1.21MIPS the Nucleo takes only 30-31 seconds and yields 3.2-3.3 MIPS. I would have expected better relative performance from a 32 bit ARM M4 processor over an 8 bit AVR. Maybe it will get better once the FPU is enabled in the online compiler.

Dependencies:   ST_401_84MHZ mbed

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers whetstone.cpp Source File

whetstone.cpp

00001 #include "benchmark.h"
00002 #include "mbed.h"
00003 Serial pc(SERIAL_TX, SERIAL_RX);
00004 
00005 
00006 //Source: http://www.netlib.org/benchmark/whetstonec
00007 /*
00008  * C Converted Whetstone Double Precision Benchmark
00009  *      Version 1.2 22 March 1998
00010  *
00011  *  (c) Copyright 1998 Painter Engineering, Inc.
00012  *      All Rights Reserved.
00013  *
00014  *      Permission is granted to use, duplicate, and
00015  *      publish this text and program as long as it
00016  *      includes this entire comment block and limited
00017  *      rights reference.
00018  *
00019  * Converted by Rich Painter, Painter Engineering, Inc. based on the
00020  * www.netlib.org benchmark/whetstoned version obtained 16 March 1998.
00021  *
00022  * A novel approach was used here to keep the look and feel of the
00023  * FORTRAN version.  Altering the FORTRAN-based array indices,
00024  * starting at element 1, to start at element 0 for C, would require
00025  * numerous changes, including decrementing the variable indices by 1.
00026  * Instead, the array E1[] was declared 1 element larger in C.  This
00027  * allows the FORTRAN index range to function without any literal or
00028  * variable indices changes.  The array element E1[0] is simply never
00029  * used and does not alter the benchmark results.
00030  *
00031  * The major FORTRAN comment blocks were retained to minimize
00032  * differences between versions.  Modules N5 and N12, like in the
00033  * FORTRAN version, have been eliminated here.
00034  *
00035  * An optional command-line argument has been provided [-c] to
00036  * offer continuous repetition of the entire benchmark.
00037  * An optional argument for setting an alternate LOOP count is also
00038  * provided.  Define PRINTOUT to cause the POUT() function to print
00039  * outputs at various stages.  Final timing measurements should be
00040  * made with the PRINTOUT undefined.
00041  *
00042  * Questions and comments may be directed to the author at
00043  *          r.painter@ieee.org
00044  */
00045 /*
00046 C**********************************************************************
00047 C     Benchmark #2 -- Double  Precision Whetstone (A001)
00048 C
00049 C     o This is a REAL*8 version of
00050 C   the Whetstone benchmark program.
00051 C
00052 C     o DO-loop semantics are ANSI-66 compatible.
00053 C
00054 C     o Final measurements are to be made with all
00055 C   WRITE statements and FORMAT sttements removed.
00056 C
00057 C**********************************************************************   
00058 */
00059 
00060 
00061  
00062 #include <stdlib.h>
00063 #include <stdio.h>
00064 #include <string.h>
00065 #include <math.h>
00066 /* the following is optional depending on the timing function used */
00067 #include <time.h>
00068 
00069 /* map the FORTRAN math functions, etc. to the C versions */
00070 #define DSIN    sin
00071 #define DCOS    cos
00072 #define DATAN   atan
00073 #define DLOG    log
00074 #define DEXP    exp
00075 #define DSQRT   sqrt
00076 #define IF      if
00077 
00078 /* function prototypes */
00079 void POUT(long N, long J, long K, double X1, double X2, double X3, double X4);
00080 void PA(double E[]);
00081 void P0(void);
00082 void P3(double X, double Y, double *Z);
00083 #define USAGE   "usage: whetdc [-c] [loops]\n"
00084 
00085 /*
00086     COMMON T,T1,T2,E1(4),J,K,L
00087 */
00088 double T,T1,T2,E1[5];
00089 int J,K,L;
00090 
00091 int argc=0;//Mod for nucleo. Change in code below if you want non-default loop count
00092 
00093 
00094 int
00095 whetstone(int argc)
00096 {
00097     pc.printf("Beginning Whetstone benchmark at ");
00098     if(argc==0)
00099         pc.printf("default speed ...\n");
00100     else
00101         pc.printf("84 MHz ...\n");
00102     /* used in the FORTRAN version */
00103     long I;
00104     long N1, N2, N3, N4, N6, N7, N8, N9, N10, N11;
00105     double X1,X2,X3,X4,X,Y,Z;
00106     long LOOP;
00107     int II, JJ;
00108 
00109     /* added for this version */
00110     long loopstart;
00111     long startsec, finisec;
00112     float KIPS;
00113     int continuous;
00114 
00115     loopstart = 1000;       /* see the note about LOOP below */
00116     continuous = 0;
00117 
00118     II = 1;     /* start at the first arg (temp use of II here) */
00119  /*   while (II < argc) {
00120         if (strncmp(argv[II], "-c", 2) == 0 || argv[II][0] == 'c') {
00121             continuous = 1;
00122         } else if (atol(argv[II]) > 0) {
00123             loopstart = atol(argv[II]);
00124         } else {
00125 //            fprintf(stderr, USAGE);//original code
00126             fprintf(stderr, USAGE);//not output toSTM32 version
00127             return(1);
00128         }
00129         II++;
00130     }*/
00131 
00132 LCONT:
00133 /*
00134 C
00135 C   Start benchmark timing at this point.
00136 C
00137 */
00138     startsec = time(0);
00139 
00140 /*
00141 C
00142 C   The actual benchmark starts here.
00143 C
00144 */
00145     T  = .499975;
00146     T1 = 0.50025;
00147     T2 = 2.0;
00148 /*
00149 C
00150 C   With loopcount LOOP=10, one million Whetstone instructions
00151 C   will be executed in EACH MAJOR LOOP..A MAJOR LOOP IS EXECUTED
00152 C   'II' TIMES TO INCREASE WALL-CLOCK TIMING ACCURACY.
00153 C
00154     LOOP = 1000;
00155 */
00156     LOOP = loopstart;
00157     II   = 1;
00158 
00159     JJ = 1;
00160 
00161 IILOOP:
00162     N1  = 0;
00163     N2  = 12 * LOOP;
00164     N3  = 14 * LOOP;
00165     N4  = 345 * LOOP;
00166     N6  = 210 * LOOP;
00167     N7  = 32 * LOOP;
00168     N8  = 899 * LOOP;
00169     N9  = 616 * LOOP;
00170     N10 = 0;
00171     N11 = 93 * LOOP;
00172 /*
00173 C
00174 C   Module 1: Simple identifiers
00175 C
00176 */
00177     X1  =  1.0;
00178     X2  = -1.0;
00179     X3  = -1.0;
00180     X4  = -1.0;
00181 
00182     for (I = 1; I <= N1; I++) {
00183         X1 = (X1 + X2 + X3 - X4) * T;
00184         X2 = (X1 + X2 - X3 + X4) * T;
00185         X3 = (X1 - X2 + X3 + X4) * T;
00186         X4 = (-X1+ X2 + X3 + X4) * T;
00187     }
00188 #ifdef PRINTOUT
00189     IF (JJ==II)POUT(N1,N1,N1,X1,X2,X3,X4);
00190 #endif
00191 
00192 /*
00193 C
00194 C   Module 2: Array elements
00195 C
00196 */
00197     E1[1] =  1.0;
00198     E1[2] = -1.0;
00199     E1[3] = -1.0;
00200     E1[4] = -1.0;
00201 
00202     for (I = 1; I <= N2; I++) {
00203         E1[1] = ( E1[1] + E1[2] + E1[3] - E1[4]) * T;
00204         E1[2] = ( E1[1] + E1[2] - E1[3] + E1[4]) * T;
00205         E1[3] = ( E1[1] - E1[2] + E1[3] + E1[4]) * T;
00206         E1[4] = (-E1[1] + E1[2] + E1[3] + E1[4]) * T;
00207     }
00208 
00209 #ifdef PRINTOUT
00210     IF (JJ==II)POUT(N2,N3,N2,E1[1],E1[2],E1[3],E1[4]);
00211 #endif
00212 
00213 /*
00214 C
00215 C   Module 3: Array as parameter
00216 C
00217 */
00218     for (I = 1; I <= N3; I++)
00219         PA(E1);
00220 
00221 #ifdef PRINTOUT
00222     IF (JJ==II)POUT(N3,N2,N2,E1[1],E1[2],E1[3],E1[4]);
00223 #endif
00224 
00225 /*
00226 C
00227 C   Module 4: Conditional jumps
00228 C
00229 */
00230     J = 1;
00231     for (I = 1; I <= N4; I++) {
00232         if (J == 1)
00233             J = 2;
00234         else
00235             J = 3;
00236 
00237         if (J > 2)
00238             J = 0;
00239         else
00240             J = 1;
00241 
00242         if (J < 1)
00243             J = 1;
00244         else
00245             J = 0;
00246     }
00247 
00248 #ifdef PRINTOUT
00249     IF (JJ==II)POUT(N4,J,J,X1,X2,X3,X4);
00250 #endif
00251 
00252 /*
00253 C
00254 C   Module 5: Omitted
00255 C   Module 6: Integer arithmetic
00256 C
00257 */
00258 
00259     J = 1;
00260     K = 2;
00261     L = 3;
00262 
00263     for (I = 1; I <= N6; I++) {
00264         J = J * (K-J) * (L-K);
00265         K = L * K - (L-J) * K;
00266         L = (L-K) * (K+J);
00267         E1[L-1] = J + K + L;
00268         E1[K-1] = J * K * L;
00269     }
00270 
00271 #ifdef PRINTOUT
00272     IF (JJ==II)POUT(N6,J,K,E1[1],E1[2],E1[3],E1[4]);
00273 #endif
00274 
00275 /*
00276 C
00277 C   Module 7: Trigonometric functions
00278 C
00279 */
00280     X = 0.5;
00281     Y = 0.5;
00282 
00283     for (I = 1; I <= N7; I++) {
00284         X = T * DATAN(T2*DSIN(X)*DCOS(X)/(DCOS(X+Y)+DCOS(X-Y)-1.0));
00285         Y = T * DATAN(T2*DSIN(Y)*DCOS(Y)/(DCOS(X+Y)+DCOS(X-Y)-1.0));
00286     }
00287 
00288 #ifdef PRINTOUT
00289     IF (JJ==II)POUT(N7,J,K,X,X,Y,Y);
00290 #endif
00291 
00292 /*
00293 C
00294 C   Module 8: Procedure calls
00295 C
00296 */
00297     X = 1.0;
00298     Y = 1.0;
00299     Z = 1.0;
00300 
00301     for (I = 1; I <= N8; I++)
00302         P3(X,Y,&Z);
00303 
00304 #ifdef PRINTOUT
00305     IF (JJ==II)POUT(N8,J,K,X,Y,Z,Z);
00306 #endif
00307 
00308 /*
00309 C
00310 C   Module 9: Array references
00311 C
00312 */
00313     J = 1;
00314     K = 2;
00315     L = 3;
00316     E1[1] = 1.0;
00317     E1[2] = 2.0;
00318     E1[3] = 3.0;
00319 
00320     for (I = 1; I <= N9; I++)
00321         P0();
00322 
00323 #ifdef PRINTOUT
00324     IF (JJ==II)POUT(N9,J,K,E1[1],E1[2],E1[3],E1[4]);
00325 #endif
00326 
00327 /*
00328 C
00329 C   Module 10: Integer arithmetic
00330 C
00331 */
00332     J = 2;
00333     K = 3;
00334 
00335     for (I = 1; I <= N10; I++) {
00336         J = J + K;
00337         K = J + K;
00338         J = K - J;
00339         K = K - J - J;
00340     }
00341 
00342 #ifdef PRINTOUT
00343     IF (JJ==II)POUT(N10,J,K,X1,X2,X3,X4);
00344 #endif
00345 
00346 /*
00347 C
00348 C   Module 11: Standard functions
00349 C
00350 */
00351     X = 0.75;
00352 
00353     for (I = 1; I <= N11; I++)
00354         X = DSQRT(DEXP(DLOG(X)/T1));
00355 
00356 #ifdef PRINTOUT
00357     IF (JJ==II)POUT(N11,J,K,X,X,X,X);
00358 #endif
00359 
00360 /*
00361 C
00362 C      THIS IS THE END OF THE MAJOR LOOP.
00363 C
00364 */
00365     if (++JJ <= II)
00366         goto IILOOP;
00367 
00368 /*
00369 C
00370 C      Stop benchmark timing at this point.
00371 C
00372 */
00373     finisec = time(0);
00374 
00375 /*
00376 C----------------------------------------------------------------
00377 C      Performance in Whetstone KIP's per second is given by
00378 C
00379 C   (100*LOOP*II)/TIME
00380 C
00381 C      where TIME is in seconds.
00382 C--------------------------------------------------------------------
00383 */
00384     pc.printf("\n");
00385     if (finisec-startsec <= 0) {
00386         pc.printf("Insufficient duration- Increase the LOOP count\n");
00387         return(1);
00388     }
00389 
00390     pc.printf("Loops: %ld, Iterations: %d, Duration: %ld sec.\n",
00391             LOOP, II, finisec-startsec);
00392 
00393     KIPS = (100.0*LOOP*II)/(float)(finisec-startsec);
00394     if (KIPS >= 1000.0)
00395         pc.printf("C Converted Double Precision Whetstones: %.1f MIPS\n", KIPS/1000.0);
00396     else
00397         pc.printf("C Converted Double Precision Whetstones: %.1f KIPS\n", KIPS);
00398 
00399     if (continuous)
00400         goto LCONT;
00401 
00402     return(0);
00403 }
00404 
00405 void
00406 PA(double E[])
00407 {
00408     J = 0;
00409 
00410 L10:
00411     E[1] = ( E[1] + E[2] + E[3] - E[4]) * T;
00412     E[2] = ( E[1] + E[2] - E[3] + E[4]) * T;
00413     E[3] = ( E[1] - E[2] + E[3] + E[4]) * T;
00414     E[4] = (-E[1] + E[2] + E[3] + E[4]) / T2;
00415     J += 1;
00416 
00417     if (J < 6)
00418         goto L10;
00419 }
00420 
00421 void
00422 P0(void)
00423 {
00424     E1[J] = E1[K];
00425     E1[K] = E1[L];
00426     E1[L] = E1[J];
00427 }
00428 
00429 void
00430 P3(double X, double Y, double *Z)
00431 {
00432     double X1, Y1;
00433 
00434     X1 = X;
00435     Y1 = Y;
00436     X1 = T * (X1 + Y1);
00437     Y1 = T * (X1 + Y1);
00438     *Z  = (X1 + Y1) / T2;
00439 }
00440 
00441 #ifdef PRINTOUT
00442 void
00443 POUT(long N, long J, long K, double X1, double X2, double X3, double X4)
00444 {
00445     pc.printf("%7ld %7ld %7ld %12.4e %12.4e %12.4e %12.4e\n",
00446                         N, J, K, X1, X2, X3, X4);
00447 }
00448 #endif
00449