// DMA SPI 8-bit frames based on teensy 3
// https://github.com/crteensy/DmaSpi
//  jumper MISO to MOSI for verification
#include "mbed.h"

#define PRREG(z) printf(#z" 0x%x\n",z)

Timer tmr;
DigitalOut CSpin(D10);
SPI spi(D11,D12,D13); // mosi, miso, sclk  SPI0

#define SPIHZ 12000000
#define TXCHNL 0
#define RXCHNL 1
#define DMAMUX_SPI0RX 14
#define DMAMUX_SPI0TX 15

#define NBYTES 1024
uint8_t rx_buffer[NBYTES];
uint8_t tx_buffer[NBYTES];

volatile int dmadone;

void dmaisr() {
    /// RX dma interrupt 
    DMA_CINT = RXCHNL;  //clear interrupt
    SPI0_RSER = 0;
    SPI0_SR = 0xFF0F0000;
    dmadone = 1;
    
}

void dma_init(void) {
    // Enable clock for DMAMUX and DMA
    SIM_SCGC6 |= SIM_SCGC6_DMAMUX_MASK;
    SIM_SCGC7 |= SIM_SCGC7_DMA_MASK; 

    DMA_CERQ = RXCHNL;
    DMA_CERQ = TXCHNL;
          
    // Enable TX channel and set SPI0_Tx as DMA request source 
    DMAMUX_CHCFG0 |= DMAMUX_CHCFG_ENBL_MASK | DMAMUX_CHCFG_SOURCE(DMAMUX_SPI0TX);
    DMA0->TCD[TXCHNL].DADDR = (uint32_t)&SPI0_PUSHR;
    DMA0->TCD[TXCHNL].SOFF = 1; 
    DMA0->TCD[TXCHNL].DOFF = 0; 
    DMA0->TCD[TXCHNL].ATTR = DMA_ATTR_SSIZE(0) | DMA_ATTR_DSIZE(0);
    DMA0->TCD[TXCHNL].NBYTES_MLNO = 1;
    DMA0->TCD[TXCHNL].DLAST_SGA=0;
    DMA0->TCD[TXCHNL].SLAST=0;
    DMA0->TCD[TXCHNL].CSR = DMA_CSR_DREQ_MASK;
    
    // Enable SPI0 RX channel 
    DMAMUX_CHCFG1 |= DMAMUX_CHCFG_ENBL_MASK | DMAMUX_CHCFG_SOURCE(DMAMUX_SPI0RX);
    DMA0->TCD[RXCHNL].SADDR = (uint32_t)&SPI0_POPR;     // recv
    DMA0->TCD[RXCHNL].SOFF = 0; 
    DMA0->TCD[RXCHNL].DOFF = 1; 
    DMA0->TCD[RXCHNL].ATTR = DMA_ATTR_SSIZE(0) | DMA_ATTR_DSIZE(0);
    DMA0->TCD[RXCHNL].NBYTES_MLNO = 1;
    DMA0->TCD[RXCHNL].DLAST_SGA=0;
    DMA0->TCD[RXCHNL].SLAST=0;
    DMA0->TCD[RXCHNL].CSR = DMA_CSR_DREQ_MASK | DMA_CSR_INTMAJOR_MASK;

    // setup RX ISR
    NVIC_EnableIRQ(DMA1_IRQn);
    NVIC_SetVector(DMA1_IRQn, (uint32_t)&dmaisr);
}

void spidma(void *txbuff, void *rxbuff, int bytes) {
    CSpin=0;
    dmadone = 0;
    SPI0_SR = 0xFF0F0000;
    SPI0_RSER = SPI_RSER_TFFF_RE_MASK | SPI_RSER_TFFF_DIRS_MASK | SPI_RSER_RFDF_RE_MASK | SPI_RSER_RFDF_DIRS_MASK;

    // Set memory address for source and destination 
    DMA0->TCD[TXCHNL].SADDR = (uint32_t)txbuff;            // xmit
    DMA0->TCD[RXCHNL].DADDR = (uint32_t)rxbuff;
        
    // Current major iteration count
    DMA0->TCD[TXCHNL].BITER_ELINKNO = DMA_BITER_ELINKNO_BITER(bytes);
    DMA0->TCD[TXCHNL].CITER_ELINKNO = DMA_CITER_ELINKNO_CITER(bytes);
    DMA0->TCD[RXCHNL].BITER_ELINKNO = DMA_BITER_ELINKNO_BITER(bytes);
    DMA0->TCD[RXCHNL].CITER_ELINKNO = DMA_CITER_ELINKNO_CITER(bytes);
    
    // Enable request signal for channels
    DMA_SERQ = RXCHNL;
    DMA_SERQ = TXCHNL;
   
   while (!dmadone);  // wait for completion
 //   while(!(DMA0->TCD[RXCHNL].CSR & BM_DMA_TCDn_CSR_DONE)); // wait

    CSpin=1;
}

void spiperf(int mhz) {
    int i, us;

    spi.frequency(mhz*1000000);
    CSpin=0;
    us = tmr.read_us();
    for(i=0;i<NBYTES;i++) spi.write(tx_buffer[i]);  // old school
    us = tmr.read_us()-us;
    CSpin=1;
    printf("spi %d mhz %d us %.2f mbs %d bytes %0x\n",mhz,us,8.*NBYTES/us,NBYTES,SPI0_CTAR0);
}  

int main() {
    int i, errs;
    uint32_t us;
    
    printf("SystemCoreClock %d  %s %s\n",SystemCoreClock,__TIME__,__DATE__);
    wait(2.0);
    tmr.start();
    CSpin=1;
    spi.format (8, 0);
    spi.frequency (SPIHZ);
 
// Load some exciting data into the buffers
    for (i=0; i<NBYTES; i++) tx_buffer[i] = i;

    dma_init();


    while (1){
      //  spiperf(SPIHZ/1000000);
        memset(rx_buffer,0,NBYTES);
        us = tmr.read_us();
        spidma(tx_buffer,rx_buffer,NBYTES);
        us = tmr.read_us()-us;
        errs=0;
        for (i=0; i<NBYTES; i++) if (rx_buffer[i] != i%256) errs++;  // MISO to MOSI
        printf("dmaspi %d hz %d us %.2f mbs %d bytes  errs %d %0x\n",SPIHZ,us,8.*NBYTES/us,NBYTES,errs,SPI0_CTAR0);
        wait(4.0);
    }
}