/*****************************************************************************
 * DESCRIPTION:
 *   This example program illustrates the use of mutex variables
 *   in a threads program. This version was obtained by modifying the
 *   serial version of the program (dotprod_serial.c) which performs a
 *   dot product. The main data is made available to all threads through
 *   a globally accessible  structure. Each thread works on a different
 *   part of the data. The main thread waits for all the threads to complete
 *   their computations, and then it prints the resulting sum.
 * SOURCE: Vijay Sonnad, IBM
 * MODIFICATOR: (Simon) CHENG Ye
 * LAST REVISED: 06/JAN/2013
 ******************************************************************************/

#include "mbed.h"
#include "cmsis_os.h"

Serial debug(USBTX, USBRX);


/*
 * The following structure contains the necessary information
 * to allow the function "dotprod" to access its input data and
 * place its output into the structure.  This structure is
 * unchanged from the sequential version.
 */
typedef struct
{
    double      *a;
    double      *b;
    double     sum;
    int     veclen;
} DOTDATA;

/* Define globally accessible variables and a mutex */
#define NUMTHRDS 4
#define VECLEN 200  //Use a smaller size vector for limited memeory space on mbed
DOTDATA dotstr;
osMutexId stdio_mutex;
osMutexDef(stdio_mutex);

/*
 * The function dotprod is activated when the thread is created.
 * As before, all input to this routine is obtained from a structure
 * of type DOTDATA and all output from this function is written into
 * this structure. The benefit of this approach is apparent for the
 * multi-threaded program: when a thread is created we pass a single
 * argument to the activated function - typically this argument
 * is a thread number. All  the other information required by the
 * function is accessed from the globally accessible structure.
 */
void dotprod(void const *arg)
{
    /* Define and use local variables for convenience */
    osThreadId id;
    int i, start, end, len ;
    long offset;
    double mysum, *x, *y;
    offset = (long)arg;

    len = dotstr.veclen;
    start = offset*len;
    end   = start + len;
    x = dotstr.a;
    y = dotstr.b;

    /*
     * Perform the dot product and assign result
     * to the appropriate variable in the structure.
     */
    mysum = 0;
    for (i=start; i<end ; i++)
        mysum += (x[i] * y[i]);

    /*
     * Lock a mutex prior to updating the value in the shared
     * structure, and unlock it upon updating.
     */
    osMutexWait(stdio_mutex, osWaitForever);
    dotstr.sum += mysum;
    printf("Thread %ld did %d to %d:  mysum=%f global sum=%f\n",offset,start,end,mysum,dotstr.sum);
    osMutexRelease(stdio_mutex);

    id = osThreadGetId();
    osThreadTerminate(id);
}

void t0(void const *argument) {dotprod(argument);}
osThreadDef(t0, osPriorityNormal, DEFAULT_STACK_SIZE);

void t1(void const *argument) {dotprod(argument);}
osThreadDef(t1, osPriorityNormal, DEFAULT_STACK_SIZE);

void t2(void const *argument) {dotprod(argument);}
osThreadDef(t2, osPriorityNormal, DEFAULT_STACK_SIZE);

void t3(void const *argument) {dotprod(argument);}
osThreadDef(t3, osPriorityNormal, DEFAULT_STACK_SIZE);

/*
 * The main program creates threads which do all the work and then
 * print out result upon completion. Before creating the threads,
 * The input data is created. Since all threads update a shared structure, we
 * need a mutex for mutual exclusion. The main thread needs to wait for
 * all threads to complete, it waits for each one of the threads. We specify
 * a thread attribute value that allow the main thread to join with the
 * threads it creates. Note also that we free up handles  when they are
 * no longer needed.
 */
int main()
{
    debug.baud(57600);

    long i;
    double *a, *b;

    /* Assign storage and initialize values */
    a = (double*) malloc (NUMTHRDS*VECLEN*sizeof(double));
    b = (double*) malloc (NUMTHRDS*VECLEN*sizeof(double));

    for (i=0; i<VECLEN*NUMTHRDS; i++)
    {
        a[i]=1;
        b[i]=a[i];
    }

    dotstr.veclen = VECLEN;
    dotstr.a = a;
    dotstr.b = b;
    dotstr.sum=0;

    stdio_mutex = osMutexCreate(osMutex(stdio_mutex));

    /* Create threads to perform the dotproduct  */
    for(i=0;i<NUMTHRDS;i++)
    {
        /* Each thread works on a different set of data.
         * The offset is specified by 'i'. The size of
         * the data for each thread is indicated by VECLEN.
         */
        if(i==0) osThreadCreate(osThread(t0), (void *)i);
        if(i==1) osThreadCreate(osThread(t1), (void *)i);
        if(i==2) osThreadCreate(osThread(t2), (void *)i);
        if(i==3) osThreadCreate(osThread(t3), (void *)i);
    }

    printf ("Sum =  %f \n", dotstr.sum);
    free (a);
    free (b);
}


