Benchmark of DP, SP and Int. Arithmetic operations for ARM Cortex M7 MCU, Nucleo-144 Stm32F746 and Stm32F767 (modified from ddown post at "Arduino for STM32") by Jovan Ivković (JovanEps)
Diff: main.cpp
- Revision:
- 2:03cf226a5ba3
- Parent:
- 1:be78b18b8347
- Child:
- 3:12d9e9070739
--- a/main.cpp Mon Jan 02 02:55:50 2017 +0000 +++ b/main.cpp Wed Jan 04 00:04:46 2017 +0000 @@ -14,411 +14,246 @@ /* the following is optional depending on the timing function used */ #include <time.h> -/* map the FORTRAN math functions, etc. to the C versions */ -#define DSIN sin -#define DCOS cos -#define DATAN atan -#define DLOG log -#define DEXP exp -#define DSQRT sqrt -#define IF if -/* function prototypes */ -void POUT(long N, long J, long K, double X1, double X2, double X3, double X4); -void PA(double E[]); -void P0(void); -void P3(double X, double Y, double *Z); -#define USAGE "usage: whetdc [-c] [loops]\n" - -/* - COMMON T,T1,T2,E1(4),J,K,L -*/ -double T,T1,T2,E1[5]; -int J,K,L; -int argc = 0; //Mod for nucleo. Change in code below if you want non-default loop count - -//************************************ -//** Whetstone 64b-DP ** -//** SUB ** -//************************************ -int Whetstone() // ------------ Metoda ----------- -{ - pc.baud(115200); - pc.printf("Beginning Whetstone benchmark at "); - - pc.printf("default 216 MHz ...\n"); - /* used in the FORTRAN version */ - long I; - long N1, N2, N3, N4, N6, N7, N8, N9, N10, N11; - double X1,X2,X3,X4,X,Y,Z; - long LOOP; - int II, JJ; +#define REDO_COMPUTATIONS 10 +struct results { + uint32_t doubletime; + uint32_t floattime; + uint32_t inttime; +}; - /* added for this version */ - long loopstart = 0; - long startsec,finisec = 0; - double KIPS; - int continuous; - - loopstart = 25000; /* 1000 see the note about LOOP below */ - continuous = 0; - - II = 1; /* start at the first arg (temp use of II here) */ - -LCONT: -/* -******************************************** -* Start benchmark timing at this point. -******************************************** -*/ - timer.start(); - startsec = 0; - finisec = 0; - startsec = timer.read_us(); - -/* -******************************************** -* The actual benchmark starts here. -******************************************** -*/ - T = .499975; - T1 = 0.50025; - T2 = 2.0; -/* -******************************************** -* With loopcount LOOP=10, one million Whetstone instructions -* will be executed in EACH MAJOR LOOP..A MAJOR LOOP IS EXECUTED -* 'II' TIMES TO INCREASE WALL-CLOCK TIMING ACCURACY. -* -* LOOP = 1000; -*/ - LOOP = loopstart; - II = 1; - JJ = 1; +#define MAX_LOOPS 512 +double MyDoubles[MAX_LOOPS]; +double a_d = 12345.67, b_d = 54321.11; +float MyFloats[MAX_LOOPS]; +float a_f = 67890.12, b_f = 8756451.17; +int Myints[MAX_LOOPS]; +int a_i = 581674411, b_i = 18714; -IILOOP: - N1 = 0; - N2 = 12 * LOOP; - N3 = 14 * LOOP; - N4 = 345 * LOOP; - N6 = 210 * LOOP; - N7 = 32 * LOOP; - N8 = 899 * LOOP; - N9 = 616 * LOOP; - N10 = 0; - N11 = 93 * LOOP; -/* -******************************************** -* Module 1: Simple identifiers -******************************************** -*/ - X1 = 1.0; - X2 = -1.0; - X3 = -1.0; - X4 = -1.0; - - for (I = 1; I <= N1; I++) - { - X1 = (X1 + X2 + X3 - X4) * T; - X2 = (X1 + X2 - X3 + X4) * T; - X3 = (X1 - X2 + X3 + X4) * T; - X4 = (-X1+ X2 + X3 + X4) * T; - } -#ifdef PRINTOUT - IF (JJ==II) POUT(N1,N1,N1,X1,X2,X3,X4); -#endif - -/* -******************************************** -* Module 2: Array elements -******************************************** -*/ - E1[1] = 1.0; - E1[2] = -1.0; - E1[3] = -1.0; - E1[4] = -1.0; +//**************************************** +uint32_t micros () +{ +//**************************************** + // uint32_t usec = timer.read_us(); + //return usec; + return timer.read_us(); +} - for (I = 1; I <= N2; I++) - { - E1[1] = ( E1[1] + E1[2] + E1[3] - E1[4]) * T; - E1[2] = ( E1[1] + E1[2] - E1[3] + E1[4]) * T; - E1[3] = ( E1[1] - E1[2] + E1[3] + E1[4]) * T; - E1[4] = (-E1[1] + E1[2] + E1[3] + E1[4]) * T; - } - -#ifdef PRINTOUT - IF (JJ==II) POUT(N2,N3,N2,E1[1],E1[2],E1[3],E1[4]); -#endif - -/* -******************************************** -* Module 3: Array as parameter -******************************************** -*/ - for (I = 1; I <= N3; I++) - { - PA(E1); +//**************************************** +void math_add (struct results *r) { +//**************************************** + uint32_t t, c, l; + + t = micros (); + for ( c = 0 ; c < REDO_COMPUTATIONS ; c ++ ) + { + for ( l = 0 ; l < MAX_LOOPS ; l ++ ) + { + MyDoubles [ l ] = double ( a_d + b_d * double ( l ) ); } -#ifdef PRINTOUT - IF (JJ==II) POUT(N3,N2,N2,E1[1],E1[2],E1[3],E1[4]); -#endif - -/* -******************************************** -* Module 4: Conditional jumps -******************************************** -*/ - J = 1; - for (I = 1; I <= N4; I++) - { - if (J == 1) - J = 2; - else - J = 3; - - if (J > 2) - J = 0; - else - J = 1; - - if (J < 1) - J = 1; - else - J = 0; - } + } + r->doubletime = micros () - t; -#ifdef PRINTOUT - IF (JJ==II) POUT(N4,J,J,X1,X2,X3,X4); -#endif - -/* -******************************************** -* Module 5: Omitted -* Module 6: Integer arithmetic -******************************************** -*/ - - J = 1; - K = 2; - L = 3; - - for (I = 1; I <= N6; I++) - { - J = J * (K-J) * (L-K); - K = L * K - (L-J) * K; - L = (L-K) * (K+J); - E1[L-1] = J + K + L; - E1[K-1] = J * K * L; - } - -#ifdef PRINTOUT - IF (JJ==II) POUT(N6,J,K,E1[1],E1[2],E1[3],E1[4]); -#endif - -/* -******************************************** -* Module 7: Trigonometric functions -******************************************** -*/ - X = 0.5; - Y = 0.5; - - for (I = 1; I <= N7; I++) - { - X = T * DATAN(T2*DSIN(X)*DCOS(X)/(DCOS(X+Y)+DCOS(X-Y)-1.0)); - Y = T * DATAN(T2*DSIN(Y)*DCOS(Y)/(DCOS(X+Y)+DCOS(X-Y)-1.0)); + t = micros (); + for ( c = 0 ; c < REDO_COMPUTATIONS ; c ++ ) + { + for ( l = 0 ; l < MAX_LOOPS ; l ++ ) + { + MyFloats [ l ] = float ( a_f + b_f * float ( l ) ); } - -#ifdef PRINTOUT - IF (JJ==II)POUT(N7,J,K,X,X,Y,Y); -#endif + } + r->floattime = micros () - t; -/* -******************************************** -* Module 8: Procedure calls -******************************************** -*/ - X = 1.0; - Y = 1.0; - Z = 1.0; - - for (I = 1; I <= N8; I++) - { - P3(X,Y,&Z); + t = micros (); + for ( c = 0 ; c < REDO_COMPUTATIONS ; c ++ ) + { + for ( l = 0 ; l < MAX_LOOPS ; l ++ ) + { + Myints [ l ] = a_i + b_i * l; } -#ifdef PRINTOUT - IF (JJ==II)POUT(N8,J,K,X,Y,Z,Z); -#endif - -/* -******************************************** -* Module 9: Array references -******************************************** -*/ - J = 1; - K = 2; - L = 3; - E1[1] = 1.0; - E1[2] = 2.0; - E1[3] = 3.0; - - for (I = 1; I <= N9; I++) - { - P0(); - } -#ifdef PRINTOUT - IF (JJ==II) POUT(N9,J,K,E1[1],E1[2],E1[3],E1[4]); -#endif + } + r->inttime = micros () - t; +} -/* -******************************************** -* Module 10: Integer arithmetic -******************************************** -*/ - J = 2; - K = 3; - - for (I = 1; I <= N10; I++) - { - J = J + K; - K = J + K; - J = K - J; - K = K - J - J; +//**************************************** +void math_sub (struct results *r) { +//**************************************** + uint32_t t, c, l; + + t = micros (); + for ( c = 0 ; c < REDO_COMPUTATIONS ; c ++ ) + { + for ( l = 0 ; l < MAX_LOOPS ; l ++ ) + { + MyDoubles [ l ] = double ( a_d - b_d * double ( l ) ); } - -#ifdef PRINTOUT - IF (JJ==II) POUT(N10,J,K,X1,X2,X3,X4); -#endif - -/* -******************************************** -* Module 11: Standard functions -******************************************** -*/ - X = 0.75; - - for (I = 1; I <= N11; I++) - { - X = DSQRT(DEXP(DLOG(X)/T1)); - } -#ifdef PRINTOUT - IF (JJ==II) POUT(N11,J,K,X,X,X,X); -#endif - -/* -******************************************** -* THIS IS THE END OF THE MAJOR LOOP. -******************************************** -*/ - if (++JJ <= II) - goto IILOOP; + } + r->doubletime = micros () - t; -/* -******************************************** -* Stop benchmark timing at this point. -******************************************** -*/ - // finisec = time(0); - finisec = timer.read_us(); - //timer.reset(); + t = micros (); + for ( c = 0 ; c < REDO_COMPUTATIONS ; c ++ ) + { + for ( l = 0 ; l < MAX_LOOPS ; l ++ ) + { + MyFloats [ l ] = float ( a_f - b_f * float ( l ) ); + } + } + r->floattime = micros () - t; -/* -*-------------------------------------------------------------------- -* Performance in Whetstone KIP's per second is given by -* -* (100*LOOP*II)/TIME -* -* where TIME is in seconds. -*-------------------------------------------------------------------- -*/ - pc.printf(" kraj \n"); - double vreme; - vreme = (finisec - startsec) / 1000000; - - if (vreme <= 0) - { - pc.printf("Insufficient duration- Increase the LOOP count \n"); - finisec = 0; - startsec = 0; - return 1; - } - - pc.printf("Loops: %ld , \t Iterations: %d, \t Duration: %.3f sec. \n", - LOOP, II, vreme); - - KIPS = (100.0 * LOOP * II) / vreme ; - - // if (KIPS >= 1000.0) - // pc.printf("C Converted Double Precision Whetstones: %.3f MIPS \n\n", KIPS / 1000); - // else - // pc.printf("C Converted Double Precision Whetstones: %.3f KIPS \n\n", KIPS); - - pc.printf("C Converted Double Precision Whetstones: %.3f MIPS \n\n", KIPS / 1000); - - if (continuous) - goto LCONT; - - finisec = 0; - startsec = 0; - return 1; + t = micros (); + for ( c = 0 ; c < REDO_COMPUTATIONS ; c ++ ) + { + for ( l = 0 ; l < MAX_LOOPS ; l ++ ) + { + Myints [ l ] = a_i - b_i * l; + } + } + r->inttime = micros () - t; } -void PA(double E[]) -{ - J = 0; +//**************************************** +void math_mul (struct results *r) { +//**************************************** + uint32_t t, c, l; + + t = micros (); + for ( c = 0 ; c < REDO_COMPUTATIONS ; c ++ ) + { + for ( l = 0 ; l < MAX_LOOPS ; l ++ ) + { + MyDoubles [ l ] = double ( a_d * b_d * double ( l ) ); + } + } + r->doubletime = micros () - t; -L10: - E[1] = ( E[1] + E[2] + E[3] - E[4]) * T; - E[2] = ( E[1] + E[2] - E[3] + E[4]) * T; - E[3] = ( E[1] - E[2] + E[3] + E[4]) * T; - E[4] = (-E[1] + E[2] + E[3] + E[4]) / T2; - J += 1; + t = micros (); + for ( c = 0 ; c < REDO_COMPUTATIONS ; c ++ ) + { + for ( l = 0 ; l < MAX_LOOPS ; l ++ ) + { + MyFloats [ l ] = float ( a_f * b_f * float ( l ) ); + } + } + r->floattime = micros () - t; - if (J < 6) - goto L10; + t = micros (); + for ( c = 0 ; c < REDO_COMPUTATIONS ; c ++ ) + { + for ( l = 0 ; l < MAX_LOOPS ; l ++ ) + { + Myints [ l ] = a_i * b_i * l; + } + } + r->inttime = micros () - t; } -void P0(void) -{ - E1[J] = E1[K]; - E1[K] = E1[L]; - E1[L] = E1[J]; +//**************************************** +void math_div (struct results *r) { +//**************************************** + uint32_t t, c, l; + + t = micros (); + for ( c = 0 ; c < REDO_COMPUTATIONS ; c ++ ) + { + for ( l = 0 ; l < MAX_LOOPS ; l ++ ) + { + MyDoubles [ l ] = double ( a_d / b_d * double ( l ) ); + } + } + r->doubletime = micros () - t; + + t = micros (); + for ( c = 0 ; c < REDO_COMPUTATIONS ; c ++ ) + { + for ( l = 0 ; l < MAX_LOOPS ; l ++ ) + { + MyFloats [ l ] = float ( a_f / b_f * float ( l ) ); + } + } + r->floattime = micros () - t; + + t = micros (); + for ( c = 0 ; c < REDO_COMPUTATIONS ; c ++ ) + { + for ( l = 0 ; l < MAX_LOOPS ; l ++ ) + { + Myints [ l ] = a_i / b_i * l; + } + } + r->inttime = micros () - t; } -void P3(double X, double Y, double *Z) -{ - double X1, Y1; +//**************************************** +void bench_loop() { +//**************************************** + + struct results add_ops, sub_ops, mul_ops, div_ops; + + math_add(&add_ops); + math_sub(&sub_ops); + math_mul(&mul_ops); + math_div(&div_ops); - X1 = X; - Y1 = Y; - X1 = T * (X1 + Y1); - Y1 = T * (X1 + Y1); - *Z = (X1 + Y1) / T2; -} + pc.printf("\n\n"); + pc.printf("\n FUNCTION DOUBLE SINGLE INT"); + + pc.printf("\n Time - ADD (us/512) :\t "); + pc.printf( "%0.1f", ( float ) add_ops.doubletime / REDO_COMPUTATIONS ); + pc.printf("\t\t"); + pc.printf( "%0.1f", ( float ) add_ops.floattime / REDO_COMPUTATIONS ); + pc.printf("\t\t"); + pc.printf("%0.1f", ( float ) add_ops.inttime / REDO_COMPUTATIONS ); + pc.printf("\n"); + + pc.printf("\n Time - SUB (us/512) :\t "); + pc.printf( "%0.1f", ( float ) sub_ops.doubletime / REDO_COMPUTATIONS ); + pc.printf("\t\t"); + pc.printf( "%0.1f", ( float ) sub_ops.floattime / REDO_COMPUTATIONS ); + pc.printf("\t\t"); + pc.printf( "%0.1f", ( float ) sub_ops.inttime / REDO_COMPUTATIONS ); + pc.printf("\n"); -#ifdef PRINTOUT -void POUT(long N, long J, long K, double X1, double X2, double X3, double X4) -{ - pc.printf("%7ld %7ld %7ld %12.4e %12.4e %12.4e %12.4e\n", - N, J, K, X1, X2, X3, X4); + pc.printf("\n Time - MUL (us/512) :\t "); + pc.printf( "%0.1f", ( float ) mul_ops.doubletime / REDO_COMPUTATIONS ); + pc.printf("\t\t"); + pc.printf( "%0.1f", ( float ) mul_ops.floattime / REDO_COMPUTATIONS ); + pc.printf("\t\t"); + pc.printf( "%0.1f", ( float ) mul_ops.inttime / REDO_COMPUTATIONS ); + pc.printf("\n"); + + pc.printf("\n Time - DIV (us/512) :\t "); + pc.printf( "%0.1f", ( float ) div_ops.doubletime / REDO_COMPUTATIONS ); + pc.printf("\t\t"); + pc.printf( "%0.1f", ( float ) div_ops.floattime / REDO_COMPUTATIONS ); + pc.printf("\t\t"); + pc.printf( "%0.1f", ( float ) div_ops.inttime / REDO_COMPUTATIONS ); + pc.printf("\n"); + + wait(1); } -#endif //********************************* //** MAIN block ** //********************************* int main() { - int rez=0; - printf("\n My Benchamrk example for Whetstones \n"); + pc.baud(57600); + pc.printf("\n My Benchamrk ..."); + pc.printf("Beginningbenchmark at "); + pc.printf("default 216 MHz ...\n"); + pc.printf("\n\n"); + while(1) { - myled=1-rez; - - rez = Whetstone(); //Call of Whetstone banch metod - - myled=1-rez; - wait_us(0.3); + myled=1; + timer.start(); + + bench_loop(); //Call of banch method + + pc.printf(" kraj \n"); + myled=0; + timer.stop(); + } } \ No newline at end of file