Benchmarking test code for various operations
Fork of benchmark by
Benchmarking the performance of various mbed boards that I have
Using the excellent code by Igor, I have benchmarked the performance of the following platforms:
- mbed LPC1768
- mbed LPC11U24
- EA LPC4088 QuickStart Board
- mbed LPC1114FN28
- NUCLEO-F302R8
- FRDM-K64F
- NUCLEO-F411RE
- LPCXpresso4337
- Seeed Arch Max
- mbed LPC1768
The data can be found in this link: https://docs.google.com/spreadsheets/d/1d5BcNvC341xvktRJ6DC3wdlI6wuv0FjCvCqHAnfINmQ/pubhtml
For the hyperbolic tan (tanh) function, I made a graph showing how the clock speed of various ARM Cortex-M4 boards with FPU affects the computation time in microseconds.
main.cpp@3:ec2e20a9bd03, 2017-02-28 (annotated)
- Committer:
- mcx
- Date:
- Tue Feb 28 20:54:35 2017 +0000
- Revision:
- 3:ec2e20a9bd03
- Parent:
- 2:fc68d524dd7d
Now prints out clock speed in MHz
Who changed what in which revision?
User | Revision | Line number | New contents of line |
---|---|---|---|
igor_m | 0:6d89d8c13042 | 1 | #include "mbed.h" |
mcx | 1:f91e7bc0e244 | 2 | |
mcx | 1:f91e7bc0e244 | 3 | //https://developer.mbed.org/users/igor_m/code/benchmark/ |
igor_m | 0:6d89d8c13042 | 4 | /* This program determines the time it takes to perform floating point |
igor_m | 0:6d89d8c13042 | 5 | and integer operations. |
igor_m | 0:6d89d8c13042 | 6 | To determine the time it takes, a Timer is used to measure the time |
igor_m | 0:6d89d8c13042 | 7 | it takes to complete a large amount of iterations. The time for a single |
igor_m | 0:6d89d8c13042 | 8 | operation can then be determined. |
igor_m | 0:6d89d8c13042 | 9 | |
igor_m | 0:6d89d8c13042 | 10 | To increase accuracy of the results, an empty for loop is timed to determine |
igor_m | 0:6d89d8c13042 | 11 | the loop overhead and the overhead is subtracted from the time it takes to |
igor_m | 0:6d89d8c13042 | 12 | complete the operation loop. |
mcx | 1:f91e7bc0e244 | 13 | */ |
igor_m | 0:6d89d8c13042 | 14 | |
igor_m | 0:6d89d8c13042 | 15 | #define ITERATIONS 1000000 // Number of calculations. |
mcx | 3:ec2e20a9bd03 | 16 | #define CLOCK 64 // Clock freqency in MHz |
igor_m | 0:6d89d8c13042 | 17 | Timer timer; // Timer.. |
igor_m | 0:6d89d8c13042 | 18 | |
igor_m | 0:6d89d8c13042 | 19 | Serial pc(USBTX, USBRX); |
igor_m | 0:6d89d8c13042 | 20 | float number_of_cycles, single_operation_time; |
mcx | 3:ec2e20a9bd03 | 21 | //volatile int a, b, c; // Int operands and result. Must be volatile! |
mcx | 3:ec2e20a9bd03 | 22 | volatile float a, b, c; // Float operands and result. Must be volatile! |
mcx | 1:f91e7bc0e244 | 23 | //volatile double a, b, c; // Float operands and result. Must be volatile! |
igor_m | 0:6d89d8c13042 | 24 | |
igor_m | 0:6d89d8c13042 | 25 | int main() { |
igor_m | 0:6d89d8c13042 | 26 | |
mcx | 3:ec2e20a9bd03 | 27 | // Apparently I'm not getting full speed!? |
mcx | 3:ec2e20a9bd03 | 28 | // https://developer.mbed.org/forum/mbed/topic/229/?page=1 |
mcx | 3:ec2e20a9bd03 | 29 | // https://developer.mbed.org/questions/55557/CPU-clock-of-upto-100MHz/ |
mcx | 3:ec2e20a9bd03 | 30 | printf("SystemCoreClock = %d MHz\r\n", SystemCoreClock/1000000); |
mcx | 3:ec2e20a9bd03 | 31 | // Need external crystal to make it run faster? |
mcx | 3:ec2e20a9bd03 | 32 | // https://developer.mbed.org/users/dreschpe/code/ST_401_84MHZ/ |
mcx | 3:ec2e20a9bd03 | 33 | // https://developer.mbed.org/users/oliverb/notebook/crystal-oscillator-notes/ |
mcx | 3:ec2e20a9bd03 | 34 | |
igor_m | 0:6d89d8c13042 | 35 | unsigned int i, for_time, total_time, operation_time; |
igor_m | 0:6d89d8c13042 | 36 | a=2.3; |
igor_m | 0:6d89d8c13042 | 37 | b=5.33; |
igor_m | 0:6d89d8c13042 | 38 | |
igor_m | 0:6d89d8c13042 | 39 | timer.reset(); // Reset timer |
igor_m | 0:6d89d8c13042 | 40 | timer.start(); // Start timer |
mcx | 1:f91e7bc0e244 | 41 | pc.printf("Operations in progress.. May take some time.\r\n"); |
igor_m | 0:6d89d8c13042 | 42 | /* Determine loop overhead */ |
mcx | 1:f91e7bc0e244 | 43 | for (i=0; i<ITERATIONS; i++){} |
igor_m | 0:6d89d8c13042 | 44 | for_time=timer.read_us(); |
igor_m | 0:6d89d8c13042 | 45 | timer.stop(); |
igor_m | 0:6d89d8c13042 | 46 | |
igor_m | 0:6d89d8c13042 | 47 | /* Determine the total loop time */ |
igor_m | 0:6d89d8c13042 | 48 | timer.reset(); |
igor_m | 0:6d89d8c13042 | 49 | timer.start(); |
igor_m | 0:6d89d8c13042 | 50 | |
igor_m | 0:6d89d8c13042 | 51 | /* The operation takes place in the body of |
igor_m | 0:6d89d8c13042 | 52 | this for loop. */ |
mcx | 1:f91e7bc0e244 | 53 | for (i=0; i<ITERATIONS; i++){ |
igor_m | 0:6d89d8c13042 | 54 | |
mcx | 3:ec2e20a9bd03 | 55 | // a = b; |
mcx | 1:f91e7bc0e244 | 56 | // c = a+b; |
mcx | 1:f91e7bc0e244 | 57 | // c = a*b; |
mcx | 1:f91e7bc0e244 | 58 | // c = a/b; |
mcx | 1:f91e7bc0e244 | 59 | // a = sqrt(b); |
mcx | 1:f91e7bc0e244 | 60 | // a = log(b); |
mcx | 1:f91e7bc0e244 | 61 | // a = tanh(b); |
mcx | 3:ec2e20a9bd03 | 62 | // a = (10*pow(b,3) + 105*b)/(pow(b,4) + 45*pow(b,2) + 105); |
mcx | 3:ec2e20a9bd03 | 63 | a = (10*b*b*b + 105*b)/(b*b*b*b + 45* + 105); |
igor_m | 0:6d89d8c13042 | 64 | } |
mcx | 1:f91e7bc0e244 | 65 | |
igor_m | 0:6d89d8c13042 | 66 | total_time=timer.read_us(); |
igor_m | 0:6d89d8c13042 | 67 | |
igor_m | 0:6d89d8c13042 | 68 | operation_time = total_time-for_time; // Calculate the time it took for the number of operations |
igor_m | 0:6d89d8c13042 | 69 | |
igor_m | 0:6d89d8c13042 | 70 | single_operation_time=float(operation_time)/float(ITERATIONS); |
igor_m | 0:6d89d8c13042 | 71 | number_of_cycles = single_operation_time*CLOCK; |
igor_m | 0:6d89d8c13042 | 72 | |
mcx | 1:f91e7bc0e244 | 73 | pc.printf("for overhead: \t\t%dus \r\n", for_time); |
mcx | 1:f91e7bc0e244 | 74 | pc.printf("total time: \t\t%dus \r\n", total_time); |
mcx | 1:f91e7bc0e244 | 75 | pc.printf("%d calculations took:\t%dus \r\n", ITERATIONS, operation_time); |
mcx | 1:f91e7bc0e244 | 76 | pc.printf("single operation took: \t\t%fus \r\n", single_operation_time); |
mcx | 1:f91e7bc0e244 | 77 | pc.printf("single operation took: \t\t%.3f cycles \r\n", number_of_cycles); |
igor_m | 0:6d89d8c13042 | 78 | } |