#include "mbed.h"
#include "FastIO.h"

/* version 0.1.5, P.C.S. Scholtens, Datang NXP, June 18th 2015, Nijmegen, Netherlands
   - Converter function parameter num_unsync_samples in fill_histogram() to constant
     NUM_UNSYNC_SAMPLES to speed up comparison of the core while loop.
   - Re-written core loop with nested macro's. This ensures that the code will stay
     exactly the same for both symbol 0 and 1 (Don't Repeat Yourself...).
   - At start-up mention is this executable has the debug and/or allow option activated.
   - Print also the assigned value of the detected underflow sequences.
*/

/* version 0.1.4, P.C.S. Scholtens, Datang NXP, June 17th 2015, Nijmegen, Netherlands
   - Flattened core loop of the fill_histogram() function. As both symbols 0 and 1 have
     their own core, sampling can be done faster: there's no comparison with the
     previous value (which was the variable previous_bit) but only a comparison with a
     constant 0 or 1. The sampling of longer sequences is now 1.5 times faster.
   - Added compiler pre-processor directive ALLOW_OUT_OF_RANGE to allow omitting
     out-of-range check of variable run_length. This improves sampling speed, but may
     cause segfaults.
*/

/* version 0.1.3, P.C.S. Scholtens, Datang NXP, May 27th 2015, Nijmegen, Netherlands
   - Added debug mode via compiler pre-processor directives, just for developers.
   - Repaired bug in calculation of offset.
   - Cleared assined values also in clear_histogram() function.
   - Corrected return value of new synchronization method, dutycycles was calculated
     in wrong way.
*/

/* version 0.1.2, P.C.S. Scholtens, Datang NXP, May 27th 2015, Nijmegen, Netherlands
   - Switched from the default I/O to FastIO library to enables faster reading of the
     input. The distinction between various run length should be improved.
   - Incremented number of samples to 4x10e7, with a duration of approx. 4 seconds.
   - Repaired bug of always clipped values in run length (accidentally swapped compare
     operator).
*/

/* version 0.1.1, P.C.S. Scholtens, Datang NXP, April 24th 2015, Nijmegen, Netherlands
   - Average function did not return the calculated average, now repaired.
   - Offset was subtracted while it should be added to compensate loss of oversampling
     ratio in the first round of the core loop.
   - Misleading type cast set to final type as chosen by compiler.
*/

/* version 0.1.0, P.C.S. Scholtens, Datang NXP, April 22th 2015, Nijmegen, Netherlands
   - Added more sophisticated method to find the correct symbol values. This one should
     be able to interpret the signals even if not all intermediate run length are present.
     This extends the usable input duty cycle range from [1/3,2/3] to [1/128, 127/128],
     if neither analog performance nor timing quantization errors create interference.
*/

/* version 0.0.9, P.C.S. Scholtens, Datang NXP, April 21th 2015, Nijmegen, Netherlands
   - Run time counter overflow fill now continue looking for same bit, however
     clipping the actual store value. This prevents underflow occurence of other symbol
     and may create lock if no bitstream is present.
   - Time out function added to prevent lock in case no bitstream is present.
   - Timer object renamed for clarity from t to timer, see http://xkcd.org/1513/
   - Includes updated build of library mbed.
   - Out-of-range of run length moved outside core loop, to speed up bitstream sampling
     and consequently improving accuracy.
*/

/* version 0.0.8, P.C.S. Scholtens, Datang NXP, April 17th 2015, Shanghai, PR China
   - Corrected assigned synchronized values, as the first appearance wasn't assigned.
*/

/* version 0.0.7, P.C.S. Scholtens, Datang NXP, April 16/17th 2015, Shanghai, PR China
   - Method written to assign synchronized values to run-length.
   - Added warnings for underflow.
   - After skipped run-in cycles, copy the current bit, to prevent false single hit.
*/

/* version 0.0.6, P.C.S. Scholtens, Datang NXP, April 15th, 2015, Shanghai, PR China
   - Corrected duty-cycle output for actual value of symbols (Thanks to Richard Zhu!).
   - Skipped run-in cycles to avoid pollution of the histogram with the first, most
     likely partial, sequence captured.
   - Added warnings for overflow.
*/

/* version 0.0.5, P.C.S. Scholtens, Datang NXP, April 14th, 2015, Shanghai, PR China
   Implement histogram to find run lengths of zeroes and ones. */

/* version 0.0.4, P.C.S. Scholtens, Datang NXP, April 14th, 2015, Shanghai, PR China
   Implement histogram to find run lengths of zeroes and ones. */

/* version 0.0.3, P.C.S. Scholtens, Datang NXP, April 14th, 2015, Shanghai, PR China
   Initial version. No synchronization of the symbols is done. */

/* See also:
https://developer.mbed.org/forum/bugs-suggestions/topic/3464/
To speed up, maybe bypass the mask function in the gpio_read function of file
./mbed/targets/hal/TARGET_NXP/TARGET_LPC176X/gpio_object.h
from git
git clone https://github.com/mbedmicro/mbed.git

*/

#define  DEPTH               1024
#define  WATCH_DOG_TIME      10
#define  NUM_UNSYNC_SAMPLES  4e7
#undef   DEBUG_MODE
//#define  ALLOW_OUT_OF_RANGE

/* Reserve memory space for the histogram */
unsigned int zeros[DEPTH];
unsigned int ones[DEPTH];
unsigned int assign[DEPTH];

FastIn<p11> bitstream;
DigitalOut myled(LED1);
Serial     pc(USBTX, USBRX); // tx, rx
Timer      timer;
Timeout    timeout;

class Recovered {
    public:
                     Recovered();
        virtual      ~Recovered();
        float        average;
        void         calc_average();
        unsigned int index_start;
        unsigned int index_stop;
        unsigned int assigned_val;
        Recovered    *next;
    private:
};

/* Constructor */
Recovered::Recovered()
{
    next = NULL;
};


/* Destructor */
Recovered::~Recovered()
{
    if (next != NULL)
        delete next;
};

/* Calculate average function, only call when index start and stop are defined. */
void Recovered::calc_average()
{
    unsigned int index  = index_start;
    unsigned int sum;
    unsigned int amount = 0;
    average             = 0;
    /* Test assumptions */
    if (index_start > DEPTH-1   ) pc.printf("ERROR: start value to high in average function.\n");
    if (index_stop  > DEPTH-1   ) pc.printf("ERROR: stop value to high in average function.\n");
    if (index_start > index_stop) pc.printf("ERROR: start value beyond stop value in average function.\n");
    /* Core function */
    while (index < index_stop) {
        sum      = zeros[index]+ones[index];
        amount  += sum;
        average += index*sum;
        index++;
    };
    average /= amount;
    return;
};

/* A function to clear the contents of both histograms */
void clear_histogram() {
    for(unsigned int i = 0; i < DEPTH; i++) {
        zeros[i]  = 0;
        ones[i]   = 0;
        assign[i] = 0;
    }
}

/* Print the contents of the histogram, excluding the empty values */
void print_histogram() {
    pc.printf(" Sequence    Zeros     Ones   Assign\n");
    if ( zeros[0]+ones[0] != 0 ) {
        pc.printf("Underflow %8i %8i %8i\n",zeros[0],ones[0],assign[0]);
    }
    for (unsigned int i = 1; i < DEPTH-1; i++) {
        if ( zeros[i]+ones[i] != 0 ) {
            pc.printf(" %8i %8i %8i %8i\n",i,zeros[i],ones[i],assign[i]);
        }
    }
    if ( zeros[DEPTH-1]+ones[DEPTH-1] != 0 ) {
        pc.printf("Overflow  %8i %8i\n",zeros[DEPTH-1],ones[DEPTH-1]);
    }

}

/* Will only be called if measurement time exceeds preset watch dog timer. */
void at_time_out() {
    pc.printf("Input clipped to level %i, no bitstream present.\n", (int) bitstream);
    timeout.attach(&at_time_out, WATCH_DOG_TIME);
}

/* Function which fill the histogram */
void fill_histogram() {

/* Make the macro definition of clip_run_length() macro dependent of the existence of ALLOW_OUT_OF_RANGE.
 * This optional macro tests if run length exceeds the defined depth of histogram, and if so assigns the clip value. */
#ifdef ALLOW_OUT_OF_RANGE
#define clip_run_length() ;
#else
#define clip_run_length() if (run_length > DEPTH-1) {run_length = DEPTH-1; }
#endif
/* Now define the center loop macro as it will be used in two symbol flavours: either 0 or 1. */
#define fast_loop(symbol) run_length = 0; while( (bool) bitstream == symbol) {run_length++;}; count += run_length; clip_run_length();
    unsigned int count = 0;
    unsigned int run_length = 0;
    /* Switch on watch dog timer */
    timeout.attach(&at_time_out, WATCH_DOG_TIME);
    /* Implements run-in: skip the first sequence of ZEROs as it is only a partial one. */
    fast_loop(0);
    /* Implements run-in: skip the first sequence of ONEs as we always want to start with zeros. */
    fast_loop(1);
    while(count < NUM_UNSYNC_SAMPLES ) {
        /* Core of the loop! */
        fast_loop(0);
        zeros[run_length]++;
        fast_loop(1);
        ones[run_length]++;
    }
    /* Switch off watch dog timer */
    timeout.detach();
/* Do not use outside this scope */
#undef fast_loop
#undef clip_run_length
}

/* Here we count the number of unsynchronized symbols, mimicing previous implementation */
unsigned int get_num_unsync_symbols(int symbol) {
    unsigned int sum = 0;
    for (unsigned int i = 0; i < DEPTH; i++) {
        if (symbol == 0) {
            sum += zeros[i];
        } else {
            sum += ones[i];
        }
    }
    return sum;
}

/* Calculate the value, using the unsynchronized method */
unsigned int get_value_unsync_symbols(int symbol) {
    unsigned int sum = 0;
    for (unsigned int i = 0; i < DEPTH; i++) {
        if (symbol == 0) {
            sum += i*zeros[i];
        } else {
            sum += i*ones[i];
        }
    }
    return sum;
}

/* Calculate the value, using the synchronization algorithm */
unsigned int get_value_synced_symbols(int symbol) {
    bool presence = false;
    int value = 0;
    for (unsigned int i = 0; i < DEPTH; i++) {
        if ( zeros[i]+ones[i] != 0 ) {
            if (presence) {
                assign[i] = value;
            } else {
                value++;
                presence  = true;
                assign[i] = value;
            }
        } else {
            presence = false;
        }
    }
    /* Now do the actual summation of symbol values */
    unsigned int sum = 0;
    for (unsigned int i = 0; i < DEPTH; i++) {
        if (symbol == 0) {
            sum += assign[i]*zeros[i];
        } else {
            sum += assign[i]*ones[i];
        }
    }
    return sum;
}

/* Calculate the value, using the new synchronization algorithm */
float get_dutycycle_synced_symbols_new_method() {
    /* First step (第一步): scan areas of non-zero content in histogram, starting at first non-overflow sequence at the end */
    bool presence = false;
    Recovered *list  = NULL;
    Recovered *first = NULL;
    for (signed int i = DEPTH-2; i > -1 ; i--) {
        if ( zeros[i]+ones[i] != 0 ) {
            if (presence) {
                first->index_start = i;
            } else {
                /* Create new Recovered symbol and position it at the beginning of the list of dis(/re)covered symbols */
                first             = new Recovered;
                first->next       = list;
                first->index_stop = i+1;
                list              = first;
                presence          = true;
            }
        } else {
            presence = false;
        }
    }
    /* Step two (第二步): for each found area, calculate average values  */
    Recovered* index = list;
    while (index != NULL) {
        index->calc_average();
        index = index->next;
    }
#ifdef DEBUG_MODE
    int j = 0;
    index = list;
    while (index != NULL) {
        pc.printf("Group %i from %i to %i, average = %f\n", j, index->index_start,index->index_stop, index->average);
        index = index->next;
        j++;
    }
#endif
    /* Step three (第三步): Find smallest distance between two adjacent symbols, e.g. with run length of 0.91, 6.99, 8.01, the last two define the grid/oversample ratio. */
    float oversample = DEPTH;
    Recovered* cmp1 = list;
    Recovered* cmp2 = list->next;
    if (list != NULL) {
        while (cmp2 != NULL) {
            float diff = cmp2->average-cmp1->average;
            if (diff < oversample) {
                oversample = diff;
            }
            cmp1=cmp2;
            cmp2=cmp1->next;
        }
    }
#ifdef DEBUG_MODE
    pc.printf("Oversample ratio %f\n", oversample);
#endif
    /* Step four (第四步): Divide the average run length of all found recovered symbol by the found oversample ratio. */
    index = list;
    while (index != NULL) {
        index->average /= oversample;
        index = index->next;
    }
    
    /* Step five (第五步): find offset and remove it (Assumption that there are always symbols with run length 1 ) */
    index = list;
    float offset = 1-index->average;
    while (index != NULL) {
        index->average += offset;
        index = index->next;
    }
#ifdef DEBUG_MODE
    pc.printf("Offset at initial run-in lengths %f\n", offset);
#endif

    /* Step six (第六步): round to nearest integer and assign value to both arrays */
    index = list;
    while (index != NULL) {
        index->assigned_val = (unsigned int) (index->average+0.5);
        for (int i = index->index_start; i < index->index_stop; i++ ) {
            assign[i] = index->assigned_val;
        }
        index = index->next;
    }
     
    /* Step seven (第七步): Now do the actual summation of symbol values */
    unsigned int sum0 = 0, sum1 = 0;
    for (unsigned int i = 0; i < DEPTH; i++) {
        sum0 += assign[i]*zeros[i];
        sum1 += assign[i]*ones[i];
    }
    /* Step eight (第八步): Delete the recovered symbol object to clear memory. As a destructor is defined
      this will be automatically handled recursively. And of course return the duty cycle */
    delete list;
    return ((float) sum1)/(sum0+sum1);
}

/* The main (主程序) routine of the program */

int main() {
#ifdef DEBUG_MODE
    unsigned int num_of_zeros, num_of_ones, value_of_unsync_zeros, value_of_unsync_ones, value_of_synced_zeros, value_of_synced_ones,
                sum_of_unsync_symbols, sum_of_synced_symbols;
    float unsync_voltage, synced_voltage, unsync_dutycycle, synced_dutycycle;
#endif

    float synced_dutycycle_new, synced_voltage_new;
    pc.baud(115200);
    pc.printf("Bitstream counter, version 0.1.5, P.C.S. Scholtens, June 18th 2015, Nijmegen, Netherlands.\n");
    pc.printf("Build: " __DATE__ ", " __TIME__ );
#ifdef DEBUG_MODE
    pc.printf(", debug mode");
#endif
#ifdef ALLOW_OUT_OF_RANGE
    pc.printf(", allows out-of-range");
#endif
    pc.printf(".\n");

    /*LPC_TIM2->PR = 0x0000002F;  / * decimal 47 */ 
    /*LPC_TIM3->PR = 24;*/
    while(1) {
        timer.reset();
        myled = 1;
        clear_histogram();
        timer.start();
        fill_histogram();
        timer.stop();
#ifdef DEBUG_MODE
        num_of_zeros = get_num_unsync_symbols(0);
        num_of_ones  = get_num_unsync_symbols(1);
        value_of_unsync_zeros = get_value_unsync_symbols(0);
        value_of_unsync_ones  = get_value_unsync_symbols(1);
        sum_of_unsync_symbols = value_of_unsync_zeros+value_of_unsync_ones;
        unsync_dutycycle = ((float) value_of_unsync_ones)/sum_of_unsync_symbols; /* We need to typecast one of the integers to float, otherwise the result is rounded till zero. */
        unsync_voltage   = (0.5*13*unsync_dutycycle+1)*0.9; /* This is the ADC formula, see analysisSigmaDeltaADC.pdf */
        value_of_synced_zeros = get_value_synced_symbols(0);
        value_of_synced_ones  = get_value_synced_symbols(1);
        sum_of_synced_symbols = value_of_synced_zeros+value_of_synced_ones;
        synced_dutycycle = ((float) value_of_synced_ones)/sum_of_synced_symbols; /* We need to typecast one of the integers to float, otherwise the result is rounded till zero. */
        synced_voltage   = (0.5*13*synced_dutycycle+1)*0.9; /* This is the ADC formula, see analysisSigmaDeltaADC.pdf */
#endif
        synced_dutycycle_new = get_dutycycle_synced_symbols_new_method();
        synced_voltage_new   = (0.5*13*synced_dutycycle_new+1)*0.9; /* This is the ADC formula, see analysisSigmaDeltaADC.pdf */
        pc.printf("\n------   Captured Histogram   ------\n");
        print_histogram();
#ifdef DEBUG_MODE
        pc.printf("------ Unsynchronized Results ------\n");
        pc.printf("Counted Sequences  %8i %8i\n",           num_of_zeros         , num_of_ones);
        pc.printf("Summed Values      %8i %8i\n",           value_of_unsync_zeros, value_of_unsync_ones);
        pc.printf("Duty Cycle %f, = %f Volt\n",             unsync_dutycycle     , unsync_voltage);
        pc.printf("----- Synchronized Results OLD -----\n");
        pc.printf("Summed Values      %8i %8i\n",           value_of_synced_zeros, value_of_synced_ones);
        pc.printf("Duty Cyle %f, = %f Volt\n",              synced_dutycycle     , synced_voltage);
#endif
        pc.printf("------- Synchronized Results -------\n");
        pc.printf("Duty Cyle %f, = %f Volt\n",              synced_dutycycle_new , synced_voltage_new);
        pc.printf("------------------------------------\n");
        pc.printf("Measured in %f sec.\n",                  timer.read());
        pc.printf("====================================\n");
        myled = 0;
        wait(0.1);
    }
}