Rtos API example

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers prof.c Source File

prof.c

00001 /*
00002  * Profiling framework for the events library
00003  *
00004  * Copyright (c) 2016 Christopher Haster
00005  *
00006  * Licensed under the Apache License, Version 2.0 (the "License");
00007  * you may not use this file except in compliance with the License.
00008  * You may obtain a copy of the License at
00009  *
00010  *     http://www.apache.org/licenses/LICENSE-2.0
00011  *
00012  * Unless required by applicable law or agreed to in writing, software
00013  * distributed under the License is distributed on an "AS IS" BASIS,
00014  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  * See the License for the specific language governing permissions and
00016  * limitations under the License.
00017  */
00018 #include "equeue.h"
00019 #include <unistd.h>
00020 #include <stdio.h>
00021 #include <setjmp.h>
00022 #include <stdint.h>
00023 #include <stdlib.h>
00024 #include <inttypes.h>
00025 #include <sys/time.h>
00026 
00027 
00028 // Performance measurement utils
00029 #define PROF_RUNS 5
00030 #define PROF_INTERVAL 100000000
00031 
00032 #define prof_volatile(t) __attribute__((unused)) volatile t
00033 
00034 typedef uint64_t prof_cycle_t;
00035 
00036 static volatile prof_cycle_t prof_start_cycle;
00037 static volatile prof_cycle_t prof_stop_cycle;
00038 static prof_cycle_t prof_accum_cycle;
00039 static prof_cycle_t prof_baseline_cycle;
00040 static prof_cycle_t prof_iterations;
00041 static const char *prof_units;
00042 
00043 #define prof_cycle() ({                                                     \
00044     uint32_t a, b;                                                          \
00045     __asm__ volatile ("rdtsc" : "=a" (a), "=d" (b));                        \
00046     ((uint64_t)b << 32) | (uint64_t)a;                                      \
00047 })
00048 
00049 #define prof_loop()                                                         \
00050     for (prof_iterations = 0;                                               \
00051          prof_accum_cycle < PROF_INTERVAL;                                  \
00052          prof_iterations++)
00053 
00054 #define prof_start() ({                                                     \
00055     prof_start_cycle = prof_cycle();                                        \
00056 })
00057 
00058 #define prof_stop() ({                                                      \
00059     prof_stop_cycle = prof_cycle();                                         \
00060     prof_accum_cycle += prof_stop_cycle - prof_start_cycle;                 \
00061 })
00062 
00063 #define prof_result(value, units) ({                                        \
00064     prof_accum_cycle = value+prof_baseline_cycle;                           \
00065     prof_iterations = 1;                                                    \
00066     prof_units = units;                                                     \
00067 })
00068 
00069 #define prof_measure(func, ...) ({                                          \
00070     printf("%s: ...", #func);                                               \
00071     fflush(stdout);                                                         \
00072                                                                             \
00073     prof_units = "cycles";                                                  \
00074     prof_cycle_t runs[PROF_RUNS];                                           \
00075     for (int i = 0; i < PROF_RUNS; i++) {                                   \
00076         prof_accum_cycle = 0;                                               \
00077         prof_iterations = 0;                                                \
00078         func(__VA_ARGS__);                                                  \
00079         runs[i] = prof_accum_cycle / prof_iterations;                       \
00080     }                                                                       \
00081                                                                             \
00082     prof_cycle_t res = runs[0];                                             \
00083     for (int i = 0; i < PROF_RUNS; i++) {                                   \
00084         if (runs[i] < res) {                                                \
00085             res = runs[i];                                                  \
00086         }                                                                   \
00087     }                                                                       \
00088     res -= prof_baseline_cycle;                                             \
00089     printf("\r%s: %"PRIu64" %s", #func, res, prof_units);                   \
00090                                                                             \
00091     if (!isatty(0)) {                                                       \
00092         prof_cycle_t prev;                                                  \
00093         while (scanf("%*[^0-9]%"PRIu64, &prev) == 0);                       \
00094         int64_t perc = 100*((int64_t)prev - (int64_t)res) / (int64_t)prev;  \
00095                                                                             \
00096         if (perc > 10) {                                                    \
00097             printf(" (\e[32m%+"PRId64"%%\e[0m)", perc);                     \
00098         } else if (perc < -10) {                                            \
00099             printf(" (\e[31m%+"PRId64"%%\e[0m)", perc);                     \
00100         } else {                                                            \
00101             printf(" (%+"PRId64"%%)", perc);                                \
00102         }                                                                   \
00103     }                                                                       \
00104                                                                             \
00105     printf("\n");                                                           \
00106     res;                                                                    \
00107 })
00108 
00109 #define prof_baseline(func, ...) ({                                         \
00110     prof_baseline_cycle = 0;                                                \
00111     prof_baseline_cycle = prof_measure(func, __VA_ARGS__);                  \
00112 })
00113 
00114 
00115 // Various test functions
00116 void no_func(void *eh) {
00117 }
00118 
00119 
00120 // Actual performance tests
00121 void baseline_prof(void) {
00122     prof_loop() {
00123         prof_start();
00124         __asm__ volatile ("");
00125         prof_stop();
00126     }
00127 }
00128 
00129 void equeue_tick_prof(void) {
00130     prof_volatile(unsigned) res;
00131     prof_loop() {
00132         prof_start();
00133         res = equeue_tick();
00134         prof_stop();
00135     }
00136 }
00137 
00138 void equeue_alloc_prof(void) {
00139     struct equeue q;
00140     equeue_create(&q, 32*EQUEUE_EVENT_SIZE);
00141 
00142     prof_loop() {
00143         prof_start();
00144         void *e = equeue_alloc(&q, 8 * sizeof(int));
00145         prof_stop();
00146 
00147         equeue_dealloc(&q, e);
00148     }
00149 
00150     equeue_destroy(&q);
00151 }
00152 
00153 void equeue_alloc_many_prof(int count) {
00154     struct equeue q;
00155     equeue_create(&q, count*EQUEUE_EVENT_SIZE);
00156 
00157     void *es[count];
00158 
00159     for (int i = 0; i < count; i++) {
00160         es[i] = equeue_alloc(&q, (i % 4) * sizeof(int));
00161     }
00162 
00163     for (int i = 0; i < count; i++) {
00164         equeue_dealloc(&q, es[i]);
00165     }
00166 
00167     prof_loop() {
00168         prof_start();
00169         void *e = equeue_alloc(&q, 8 * sizeof(int));
00170         prof_stop();
00171 
00172         equeue_dealloc(&q, e);
00173     }
00174 
00175     equeue_destroy(&q);
00176 }
00177 
00178 void equeue_post_prof(void) {
00179     struct equeue q;
00180     equeue_create(&q, EQUEUE_EVENT_SIZE);
00181 
00182     prof_loop() {
00183         void *e = equeue_alloc(&q, 0);
00184 
00185         prof_start();
00186         int id = equeue_post(&q, no_func, e);
00187         prof_stop();
00188 
00189         equeue_cancel(&q, id);
00190     }
00191 
00192     equeue_destroy(&q);
00193 }
00194 
00195 void equeue_post_many_prof(int count) {
00196     struct equeue q;
00197     equeue_create(&q, count*EQUEUE_EVENT_SIZE);
00198 
00199     for (int i = 0; i < count-1; i++) {
00200         equeue_call(&q, no_func, 0);
00201     }
00202 
00203     prof_loop() {
00204         void *e = equeue_alloc(&q, 0);
00205 
00206         prof_start();
00207         int id = equeue_post(&q, no_func, e);
00208         prof_stop();
00209 
00210         equeue_cancel(&q, id);
00211     }
00212 
00213     equeue_destroy(&q);
00214 }
00215 
00216 void equeue_post_future_prof(void) {
00217     struct equeue q;
00218     equeue_create(&q, EQUEUE_EVENT_SIZE);
00219 
00220     prof_loop() {
00221         void *e = equeue_alloc(&q, 0);
00222         equeue_event_delay(e, 1000);
00223 
00224         prof_start();
00225         int id = equeue_post(&q, no_func, e);
00226         prof_stop();
00227 
00228         equeue_cancel(&q, id);
00229     }
00230 
00231     equeue_destroy(&q);
00232 }
00233 
00234 void equeue_post_future_many_prof(int count) {
00235     struct equeue q;
00236     equeue_create(&q, count*EQUEUE_EVENT_SIZE);
00237 
00238     for (int i = 0; i < count-1; i++) {
00239         equeue_call(&q, no_func, 0);
00240     }
00241 
00242     prof_loop() {
00243         void *e = equeue_alloc(&q, 0);
00244         equeue_event_delay(e, 1000);
00245 
00246         prof_start();
00247         int id = equeue_post(&q, no_func, e);
00248         prof_stop();
00249 
00250         equeue_cancel(&q, id);
00251     }
00252 
00253     equeue_destroy(&q);
00254 }
00255 
00256 void equeue_dispatch_prof(void) {
00257     struct equeue q;
00258     equeue_create(&q, EQUEUE_EVENT_SIZE);
00259 
00260     prof_loop() {
00261         equeue_call(&q, no_func, 0);
00262 
00263         prof_start();
00264         equeue_dispatch(&q, 0);
00265         prof_stop();
00266     }
00267 
00268     equeue_destroy(&q);
00269 }
00270 
00271 void equeue_dispatch_many_prof(int count) {
00272     struct equeue q;
00273     equeue_create(&q, count*EQUEUE_EVENT_SIZE);
00274 
00275     prof_loop() {
00276         for (int i = 0; i < count; i++) {
00277             equeue_call(&q, no_func, 0);
00278         }
00279 
00280         prof_start();
00281         equeue_dispatch(&q, 0);
00282         prof_stop();
00283     }
00284 
00285     equeue_destroy(&q);
00286 }
00287 
00288 void equeue_cancel_prof(void) {
00289     struct equeue q;
00290     equeue_create(&q, EQUEUE_EVENT_SIZE);
00291 
00292     prof_loop() {
00293         int id = equeue_call(&q, no_func, 0);
00294 
00295         prof_start();
00296         equeue_cancel(&q, id);
00297         prof_stop();
00298     }
00299 
00300     equeue_destroy(&q);
00301 }
00302 
00303 void equeue_cancel_many_prof(int count) {
00304     struct equeue q;
00305     equeue_create(&q, count*EQUEUE_EVENT_SIZE);
00306 
00307     for (int i = 0; i < count-1; i++) {
00308         equeue_call(&q, no_func, 0);
00309     }
00310 
00311     prof_loop() {
00312         int id = equeue_call(&q, no_func, 0);
00313 
00314         prof_start();
00315         equeue_cancel(&q, id);
00316         prof_stop();
00317     }
00318 
00319     equeue_destroy(&q);
00320 }
00321 
00322 void equeue_alloc_size_prof(void) {
00323     size_t size = 32*EQUEUE_EVENT_SIZE;
00324 
00325     struct equeue q;
00326     equeue_create(&q, size);
00327     equeue_alloc(&q, 0);
00328 
00329     prof_result(size - q.slab.size, "bytes");
00330 
00331     equeue_destroy(&q);
00332 }
00333 
00334 void equeue_alloc_many_size_prof(int count) {
00335     size_t size = count*EQUEUE_EVENT_SIZE;
00336 
00337     struct equeue q;
00338     equeue_create(&q, size);
00339 
00340     for (int i = 0; i < count; i++) {
00341         equeue_alloc(&q, (i % 4) * sizeof(int));
00342     }
00343 
00344     prof_result(size - q.slab.size, "bytes");
00345 
00346     equeue_destroy(&q);
00347 }
00348 
00349 void equeue_alloc_fragmented_size_prof(int count) {
00350     size_t size = count*EQUEUE_EVENT_SIZE;
00351 
00352     struct equeue q;
00353     equeue_create(&q, size);
00354 
00355     void *es[count];
00356 
00357     for (int i = 0; i < count; i++) {
00358         es[i] = equeue_alloc(&q, (i % 4) * sizeof(int));
00359     }
00360 
00361     for (int i = 0; i < count; i++) {
00362         equeue_dealloc(&q, es[i]);
00363     }
00364 
00365     for (int i = count-1; i >= 0; i--) {
00366         es[i] = equeue_alloc(&q, (i % 4) * sizeof(int));
00367     }
00368 
00369     for (int i = count-1; i >= 0; i--) {
00370         equeue_dealloc(&q, es[i]);
00371     }
00372 
00373     for (int i = 0; i < count; i++) {
00374         equeue_alloc(&q, (i % 4) * sizeof(int));
00375     }
00376 
00377     prof_result(size - q.slab.size, "bytes");
00378 
00379     equeue_destroy(&q);
00380 }
00381 
00382 
00383 // Entry point
00384 int main() {
00385     printf("beginning profiling...\n");
00386 
00387     prof_baseline(baseline_prof);
00388 
00389     prof_measure(equeue_tick_prof);
00390     prof_measure(equeue_alloc_prof);
00391     prof_measure(equeue_post_prof);
00392     prof_measure(equeue_post_future_prof);
00393     prof_measure(equeue_dispatch_prof);
00394     prof_measure(equeue_cancel_prof);
00395 
00396     prof_measure(equeue_alloc_many_prof, 1000);
00397     prof_measure(equeue_post_many_prof, 1000);
00398     prof_measure(equeue_post_future_many_prof, 1000);
00399     prof_measure(equeue_dispatch_many_prof, 100);
00400     prof_measure(equeue_cancel_many_prof, 100);
00401 
00402     prof_measure(equeue_alloc_size_prof);
00403     prof_measure(equeue_alloc_many_size_prof, 1000);
00404     prof_measure(equeue_alloc_fragmented_size_prof, 1000);
00405 
00406     printf("done!\n");
00407 }