Whetstone performance benchmark to compare the speed of my new Nucleo STM32 F401RE board to the Arduino. To summarize, while my Arduino Leonardo knockoff running at 16 Mhz runs through the 1,000 loops in 82 seconds for 1.21MIPS the Nucleo takes only 30-31 seconds and yields 3.2-3.3 MIPS. I would have expected better relative performance from a 32 bit ARM M4 processor over an 8 bit AVR. Maybe it will get better once the FPU is enabled in the online compiler.
Dependencies: ST_401_84MHZ mbed
Whetstone on the Nucleo at 84 MHz:
- Loops: 1000, Iterations: 1, Duration: 30 sec.
- C Converted Double Precision Whetstones: 3.3 MIPS
- Code size: 22.1 kB
Whetstone on the Arduino Leonardo at 16 MHz:
- Loops: 1000Iterations: 1Duration: 82320 millisec.
- C Converted Double Precision Whetstones: 1.21 MIPS
- Code size: 9,086 bytes
whetstone.cpp
- Committer:
- kirchnet
- Date:
- 2014-04-28
- Revision:
- 1:466dbb9d16a8
- Parent:
- 0:525baf7de2db
File content as of revision 1:466dbb9d16a8:
#include "benchmark.h" #include "mbed.h" Serial pc(SERIAL_TX, SERIAL_RX); //Source: http://www.netlib.org/benchmark/whetstonec /* * C Converted Whetstone Double Precision Benchmark * Version 1.2 22 March 1998 * * (c) Copyright 1998 Painter Engineering, Inc. * All Rights Reserved. * * Permission is granted to use, duplicate, and * publish this text and program as long as it * includes this entire comment block and limited * rights reference. * * Converted by Rich Painter, Painter Engineering, Inc. based on the * www.netlib.org benchmark/whetstoned version obtained 16 March 1998. * * A novel approach was used here to keep the look and feel of the * FORTRAN version. Altering the FORTRAN-based array indices, * starting at element 1, to start at element 0 for C, would require * numerous changes, including decrementing the variable indices by 1. * Instead, the array E1[] was declared 1 element larger in C. This * allows the FORTRAN index range to function without any literal or * variable indices changes. The array element E1[0] is simply never * used and does not alter the benchmark results. * * The major FORTRAN comment blocks were retained to minimize * differences between versions. Modules N5 and N12, like in the * FORTRAN version, have been eliminated here. * * An optional command-line argument has been provided [-c] to * offer continuous repetition of the entire benchmark. * An optional argument for setting an alternate LOOP count is also * provided. Define PRINTOUT to cause the POUT() function to print * outputs at various stages. Final timing measurements should be * made with the PRINTOUT undefined. * * Questions and comments may be directed to the author at * r.painter@ieee.org */ /* C********************************************************************** C Benchmark #2 -- Double Precision Whetstone (A001) C C o This is a REAL*8 version of C the Whetstone benchmark program. C C o DO-loop semantics are ANSI-66 compatible. C C o Final measurements are to be made with all C WRITE statements and FORMAT sttements removed. C C********************************************************************** */ #include <stdlib.h> #include <stdio.h> #include <string.h> #include <math.h> /* the following is optional depending on the timing function used */ #include <time.h> /* map the FORTRAN math functions, etc. to the C versions */ #define DSIN sin #define DCOS cos #define DATAN atan #define DLOG log #define DEXP exp #define DSQRT sqrt #define IF if /* function prototypes */ void POUT(long N, long J, long K, double X1, double X2, double X3, double X4); void PA(double E[]); void P0(void); void P3(double X, double Y, double *Z); #define USAGE "usage: whetdc [-c] [loops]\n" /* COMMON T,T1,T2,E1(4),J,K,L */ double T,T1,T2,E1[5]; int J,K,L; int argc=0;//Mod for nucleo. Change in code below if you want non-default loop count int whetstone(int argc) { pc.printf("Beginning Whetstone benchmark at "); if(argc==0) pc.printf("default speed ...\n"); else pc.printf("84 MHz ...\n"); /* used in the FORTRAN version */ long I; long N1, N2, N3, N4, N6, N7, N8, N9, N10, N11; double X1,X2,X3,X4,X,Y,Z; long LOOP; int II, JJ; /* added for this version */ long loopstart; long startsec, finisec; float KIPS; int continuous; loopstart = 1000; /* see the note about LOOP below */ continuous = 0; II = 1; /* start at the first arg (temp use of II here) */ /* while (II < argc) { if (strncmp(argv[II], "-c", 2) == 0 || argv[II][0] == 'c') { continuous = 1; } else if (atol(argv[II]) > 0) { loopstart = atol(argv[II]); } else { // fprintf(stderr, USAGE);//original code fprintf(stderr, USAGE);//not output toSTM32 version return(1); } II++; }*/ LCONT: /* C C Start benchmark timing at this point. C */ startsec = time(0); /* C C The actual benchmark starts here. C */ T = .499975; T1 = 0.50025; T2 = 2.0; /* C C With loopcount LOOP=10, one million Whetstone instructions C will be executed in EACH MAJOR LOOP..A MAJOR LOOP IS EXECUTED C 'II' TIMES TO INCREASE WALL-CLOCK TIMING ACCURACY. C LOOP = 1000; */ LOOP = loopstart; II = 1; JJ = 1; IILOOP: N1 = 0; N2 = 12 * LOOP; N3 = 14 * LOOP; N4 = 345 * LOOP; N6 = 210 * LOOP; N7 = 32 * LOOP; N8 = 899 * LOOP; N9 = 616 * LOOP; N10 = 0; N11 = 93 * LOOP; /* C C Module 1: Simple identifiers C */ X1 = 1.0; X2 = -1.0; X3 = -1.0; X4 = -1.0; for (I = 1; I <= N1; I++) { X1 = (X1 + X2 + X3 - X4) * T; X2 = (X1 + X2 - X3 + X4) * T; X3 = (X1 - X2 + X3 + X4) * T; X4 = (-X1+ X2 + X3 + X4) * T; } #ifdef PRINTOUT IF (JJ==II)POUT(N1,N1,N1,X1,X2,X3,X4); #endif /* C C Module 2: Array elements C */ E1[1] = 1.0; E1[2] = -1.0; E1[3] = -1.0; E1[4] = -1.0; for (I = 1; I <= N2; I++) { E1[1] = ( E1[1] + E1[2] + E1[3] - E1[4]) * T; E1[2] = ( E1[1] + E1[2] - E1[3] + E1[4]) * T; E1[3] = ( E1[1] - E1[2] + E1[3] + E1[4]) * T; E1[4] = (-E1[1] + E1[2] + E1[3] + E1[4]) * T; } #ifdef PRINTOUT IF (JJ==II)POUT(N2,N3,N2,E1[1],E1[2],E1[3],E1[4]); #endif /* C C Module 3: Array as parameter C */ for (I = 1; I <= N3; I++) PA(E1); #ifdef PRINTOUT IF (JJ==II)POUT(N3,N2,N2,E1[1],E1[2],E1[3],E1[4]); #endif /* C C Module 4: Conditional jumps C */ J = 1; for (I = 1; I <= N4; I++) { if (J == 1) J = 2; else J = 3; if (J > 2) J = 0; else J = 1; if (J < 1) J = 1; else J = 0; } #ifdef PRINTOUT IF (JJ==II)POUT(N4,J,J,X1,X2,X3,X4); #endif /* C C Module 5: Omitted C Module 6: Integer arithmetic C */ J = 1; K = 2; L = 3; for (I = 1; I <= N6; I++) { J = J * (K-J) * (L-K); K = L * K - (L-J) * K; L = (L-K) * (K+J); E1[L-1] = J + K + L; E1[K-1] = J * K * L; } #ifdef PRINTOUT IF (JJ==II)POUT(N6,J,K,E1[1],E1[2],E1[3],E1[4]); #endif /* C C Module 7: Trigonometric functions C */ X = 0.5; Y = 0.5; for (I = 1; I <= N7; I++) { X = T * DATAN(T2*DSIN(X)*DCOS(X)/(DCOS(X+Y)+DCOS(X-Y)-1.0)); Y = T * DATAN(T2*DSIN(Y)*DCOS(Y)/(DCOS(X+Y)+DCOS(X-Y)-1.0)); } #ifdef PRINTOUT IF (JJ==II)POUT(N7,J,K,X,X,Y,Y); #endif /* C C Module 8: Procedure calls C */ X = 1.0; Y = 1.0; Z = 1.0; for (I = 1; I <= N8; I++) P3(X,Y,&Z); #ifdef PRINTOUT IF (JJ==II)POUT(N8,J,K,X,Y,Z,Z); #endif /* C C Module 9: Array references C */ J = 1; K = 2; L = 3; E1[1] = 1.0; E1[2] = 2.0; E1[3] = 3.0; for (I = 1; I <= N9; I++) P0(); #ifdef PRINTOUT IF (JJ==II)POUT(N9,J,K,E1[1],E1[2],E1[3],E1[4]); #endif /* C C Module 10: Integer arithmetic C */ J = 2; K = 3; for (I = 1; I <= N10; I++) { J = J + K; K = J + K; J = K - J; K = K - J - J; } #ifdef PRINTOUT IF (JJ==II)POUT(N10,J,K,X1,X2,X3,X4); #endif /* C C Module 11: Standard functions C */ X = 0.75; for (I = 1; I <= N11; I++) X = DSQRT(DEXP(DLOG(X)/T1)); #ifdef PRINTOUT IF (JJ==II)POUT(N11,J,K,X,X,X,X); #endif /* C C THIS IS THE END OF THE MAJOR LOOP. C */ if (++JJ <= II) goto IILOOP; /* C C Stop benchmark timing at this point. C */ finisec = time(0); /* C---------------------------------------------------------------- C Performance in Whetstone KIP's per second is given by C C (100*LOOP*II)/TIME C C where TIME is in seconds. C-------------------------------------------------------------------- */ pc.printf("\n"); if (finisec-startsec <= 0) { pc.printf("Insufficient duration- Increase the LOOP count\n"); return(1); } pc.printf("Loops: %ld, Iterations: %d, Duration: %ld sec.\n", LOOP, II, finisec-startsec); KIPS = (100.0*LOOP*II)/(float)(finisec-startsec); if (KIPS >= 1000.0) pc.printf("C Converted Double Precision Whetstones: %.1f MIPS\n", KIPS/1000.0); else pc.printf("C Converted Double Precision Whetstones: %.1f KIPS\n", KIPS); if (continuous) goto LCONT; return(0); } void PA(double E[]) { J = 0; L10: E[1] = ( E[1] + E[2] + E[3] - E[4]) * T; E[2] = ( E[1] + E[2] - E[3] + E[4]) * T; E[3] = ( E[1] - E[2] + E[3] + E[4]) * T; E[4] = (-E[1] + E[2] + E[3] + E[4]) / T2; J += 1; if (J < 6) goto L10; } void P0(void) { E1[J] = E1[K]; E1[K] = E1[L]; E1[L] = E1[J]; } void P3(double X, double Y, double *Z) { double X1, Y1; X1 = X; Y1 = Y; X1 = T * (X1 + Y1); Y1 = T * (X1 + Y1); *Z = (X1 + Y1) / T2; } #ifdef PRINTOUT void POUT(long N, long J, long K, double X1, double X2, double X3, double X4) { pc.printf("%7ld %7ld %7ld %12.4e %12.4e %12.4e %12.4e\n", N, J, K, X1, X2, X3, X4); } #endif