Library to control and transfer data from NXP SGTL5000. As used on the Teensy Audio Shield. It uses DMA to transfer I2S FIFO data.

The Library now supports dual codecs. Allowing all 4 channels of the Teensy I2S interface to RX and TX data to separate SGTL5000 devices.

The ISR routines that handles pointer swaps for double buffering has been fully coded in assembler to reduce overhead and now takes < 800nS per FIFO transfer when using all 4 channels.

Support added for all typical sample rates and system Clock speeds of 96Mhz or 120Mhz.

Pause and Resume functions added to allow quick and simple suppression of IRQs and stream halting and restart. This required software triggered IRQ, in order to ensure accurate word sync control.

Revision:
6:4ab5aaeaa064
Parent:
5:664802e89661
Child:
7:d65476c153a4
--- a/sgtl5000.cpp	Fri Jun 30 09:46:54 2017 +0000
+++ b/sgtl5000.cpp	Sat Jul 01 10:20:45 2017 +0000
@@ -25,37 +25,25 @@
 */
 
 #include "sgtl5000.h"
-#include "arm_math.h"
-
-extern bool grab;
-extern q31_t RX_L_dir_DMA[];
-extern q31_t RX_R_dir_DMA[];
 
 namespace SGTL5000
 {
 
 // Static variables required within ISRs
-uint32_t SGTL5000::I2S_TX_Buffer[16];                                           // Private double buffer space
-uint32_t SGTL5000::I2S_RX_Buffer[16];
 uint32_t *SGTL5000::BufRX_L_safe;                                               // Private pointers assigned to users data pointers
 uint32_t *SGTL5000::BufRX_R_safe;                                               // These are used to flip user pointers between safe 'active' regions of
 uint32_t *SGTL5000::BufTX_L_safe;                                               // double buffered space.
 uint32_t *SGTL5000::BufTX_R_safe;
-uint32_t SGTL5000::TX_block_size;                                               // User defined block size per call. This is the no. of 32bit words pushed or pulled from FIFO both left and right channel.
-uint32_t SGTL5000::RX_block_size;
-uint32_t SGTL5000::SYNC_attach_type = 0;                                        // User defined, blocking or non-blocking calls from DMA ISR. non-blocking uses software triggered IRQ, blocking uses callback.
-uint32_t SGTL5000::TX_attach_type = 0;
-uint32_t SGTL5000::RX_attach_type = 0;
-IRQn SGTL5000::SYNC_swIRQ;                                                     // IRQn assigned by user to the software IRQ triggered by the FIFO queues.
+IRQn SGTL5000::SYNC_swIRQ;                                                      // IRQn assigned by user to the software IRQ triggered by the FIFO queues.
 IRQn SGTL5000::TX_swIRQ;
 IRQn SGTL5000::RX_swIRQ;
-uint32_t SGTL5000::RX_DMAch;                                                   // User defined RX DMA channel number
-uint32_t SGTL5000::TX_DMAch;                                                   // User defined TX DMA channel number
+uint32_t SGTL5000::RX_DMAch;                                                    // User defined RX DMA channel number
+uint32_t SGTL5000::TX_DMAch;                                                    // User defined TX DMA channel number
 Callback<void()> SGTL5000::TX_user_func = NULL;
 Callback<void()> SGTL5000::RX_user_func = NULL;
-Callback<void()> SGTL5000::SYNC_user_func = NULL;                              // User defined callback function
+Callback<void()> SGTL5000::SYNC_user_func = NULL;                               // User defined callback function
 
-uint32_t volatile SGTL5000::debug[16] = {0};
+uint32_t SGTL5000::debug[16] = {0};
 
 SGTL5000::SGTL5000(PinName i2c_sda, PinName i2c_scl, int i2c_freq, bool i2c_ctrl_adr0_cs)
     : mI2C(i2c_sda, i2c_scl)
@@ -141,10 +129,20 @@
             codec_RATE_MODE = 0x0;
             break;
         case 144:
-            I2S_MCLK_M = 48;
-            I2S_MCLK_D = 125;
-            codec_SYS_FS = 0x03;
-            codec_RATE_MODE = 0x0;
+            switch(SystemCoreClock) {
+                case 96000000:
+                    I2S_MCLK_M = 48;
+                    I2S_MCLK_D = 125;
+                    codec_SYS_FS = 0x03;
+                    codec_RATE_MODE = 0x0;
+                    break;
+                case 120000000:
+                    I2S_MCLK_M = 48;
+                    I2S_MCLK_D = 156;
+                    codec_SYS_FS = 0x03;
+                    codec_RATE_MODE = 0x0;
+                    break;
+            }
             break;
         case 192:                                                               // Not officially supported by the codec, but it seems to work.
             I2S_MCLK_M = 64;
@@ -375,7 +373,7 @@
 {
     if(SYNC_attached) return -1;                                                                        // Assign Callback function
     SGTL5000::SYNC_user_func = func;
-    SGTL5000::SYNC_attach_type = 0;
+    SYNC_attach_type = 0;
     SYNC_attached = true;
     return 0;
 }
@@ -386,7 +384,7 @@
     NVIC_SetVector(SGTL5000::SYNC_swIRQ, user_ISR);
     NVIC_SetPriority(SGTL5000::SYNC_swIRQ, irq_pri);
     NVIC_EnableIRQ(SGTL5000::SYNC_swIRQ);
-    SGTL5000::SYNC_attach_type = 1;
+    SYNC_attach_type = 1;
     SYNC_attached = true;
 }
 
@@ -399,20 +397,25 @@
 int32_t SGTL5000::start_SYNC(uint32_t BufRX_L_safe, uint32_t BufRX_R_safe, uint32_t BufTX_L_safe, uint32_t BufTX_R_safe,
                              uint32_t block_size, bool _packed_RX, bool _packed_TX, bool _RX_shift, bool _TX_shift, uint32_t _RX_DMAch, uint32_t _TX_DMAch, uint32_t DMA_irq_pri)
 {
-    if(!SYNC_attached && !SGTL5000::SYNC_attach_type) return -1;                                        // Check we have a handler if using callback
+    if(!SYNC_attached && !SYNC_attach_type) return -1;                                        // Check we have a handler if using callback
     if(SYNC_run || TX_run || RX_run ) return -1;                                                        // Check if i2s is already started
     if(_RX_DMAch > 15 || _TX_DMAch > 15) return -1;                                                     // Sanity check DMAMUX channels
     if (!(block_size == 2 || block_size == 4 || block_size == 8)) return -1;                            // Only accept block size 2^n within range.
     packed_RX = _packed_RX;
     packed_TX = _packed_TX;
-    SGTL5000::TX_block_size = block_size;
-    SGTL5000::RX_block_size = SGTL5000::TX_block_size;
+    TX_block_size = block_size;
+    RX_block_size = TX_block_size;
     TX_bs_bytes = block_size * 4;
     RX_bs_bytes = TX_bs_bytes;
     SGTL5000::BufRX_L_safe = (uint32_t*)BufRX_L_safe;                                                   // Assign the users pointer addresses
     SGTL5000::BufRX_R_safe = (uint32_t*)BufRX_R_safe;
     SGTL5000::BufTX_L_safe = (uint32_t*)BufTX_L_safe;
     SGTL5000::BufTX_R_safe = (uint32_t*)BufTX_R_safe;
+    *SGTL5000::BufRX_L_safe = (uint32_t)&I2S_RX_Buffer[8];
+    *SGTL5000::BufRX_R_safe = (uint32_t)&I2S_RX_Buffer[8 + (RX_block_size / 2)];
+    *SGTL5000::BufTX_L_safe = (uint32_t)&I2S_TX_Buffer[8];
+    *SGTL5000::BufTX_R_safe = (uint32_t)&I2S_TX_Buffer[8 + (TX_block_size / 2)];
+
     if(packed_RX) RX_shift = false;
     else RX_shift = _RX_shift;
     if(packed_TX) TX_shift = false;
@@ -422,15 +425,20 @@
     SYNC_run = true;
     init_DMA();
 
-    I2S0->TCR1 = (I2S0->TCR1 & ~I2S_TCR1_TFW_MASK) | I2S_TCR1_TFW(8 - SGTL5000::TX_block_size);         // Set TX FIFO watermark
-    I2S0->RCR1 = (I2S0->RCR1 & ~I2S_RCR1_RFW_MASK) | I2S_RCR1_RFW(SGTL5000::RX_block_size - 1);         // Set RX FIFO watermark
+    I2S0->TCR1 = (I2S0->TCR1 & ~I2S_TCR1_TFW_MASK) | I2S_TCR1_TFW(8 - TX_block_size);         // Set TX FIFO watermark
+    I2S0->RCR1 = (I2S0->RCR1 & ~I2S_RCR1_RFW_MASK) | I2S_RCR1_RFW(RX_block_size - 1);         // Set RX FIFO watermark
     I2S0->TCSR |= I2S_TCSR_TE_MASK;                                                                     // TX enable
     I2S0->RCSR |= I2S_RCSR_RE_MASK;                                                                     // RX enable
     I2S0->TCR3 = (I2S0->TCR3 & ~I2S_TCR3_TCE_MASK) | I2S_TCR3_TCE(1);                                   // Enable TX channel 0.
     I2S0->RCR3 = (I2S0->RCR3 & ~I2S_RCR3_RCE_MASK) | I2S_RCR3_RCE(1);                                   // Enable RX channels 0
     //SGTL5000::_db_sync_phase = 0;
-    NVIC_SetVector((IRQn)SGTL5000::TX_DMAch, (uint32_t)&SGTL5000::sync_dma_ISR);                        // Set DMA TX handler vector
-    NVIC_SetVector((IRQn)SGTL5000::RX_DMAch, (uint32_t)&SGTL5000::sync_dma_ISR);                        // Set DMA RX handler vector
+    if(SYNC_attach_type) {
+        NVIC_SetVector((IRQn)SGTL5000::TX_DMAch, (uint32_t)&SGTL5000::sync_dma_ISR_NB);                        // Set DMA TX handler vector
+        NVIC_SetVector((IRQn)SGTL5000::RX_DMAch, (uint32_t)&SGTL5000::sync_dma_ISR_NB);                        // Set DMA RX handler vector
+    } else {
+        NVIC_SetVector((IRQn)SGTL5000::TX_DMAch, (uint32_t)&SGTL5000::sync_dma_ISR);                        // Set DMA TX handler vector
+        NVIC_SetVector((IRQn)SGTL5000::RX_DMAch, (uint32_t)&SGTL5000::sync_dma_ISR);                        // Set DMA RX handler vector
+    }
     NVIC_SetPriority((IRQn)SGTL5000::TX_DMAch, DMA_irq_pri);                                            // Set irq priorities the same
     NVIC_SetPriority((IRQn)SGTL5000::RX_DMAch, DMA_irq_pri);
     if(SGTL5000::TX_DMAch > SGTL5000::RX_DMAch) {
@@ -480,49 +488,112 @@
     I2S0->RCSR |= I2S_RCSR_FRDE_MASK;                                                                   // Enable DMA request based on RX FIFO watermark
 }
 
-void SGTL5000::sync_dma_ISR(void)
+void SGTL5000::sync_dma_ISR_NB(void)
 {
     /*!
     Refer to the DMA_init function for details of buffer layouts.
 
     When running both TX & RX synchronously, only 1 direction has its IRQ enabled in the NVIC, which is enabled is determined by the priority of the DMA channels.
-    This assumes strict prioriy ordering of DMA channels. In sync mode TX & RX DMAs transfer the same number of bytes to the FIFO, therefore we should see only 1 FIFO word 
+    This assumes strict prioriy ordering of DMA channels. In sync mode TX & RX DMAs transfer the same number of bytes to the FIFO, therefore we should see only 1 FIFO word
     difference between DMA demands for TX or RX.
     The DMA transfers will be pre-empting each other, dependant on relative priority.
     Therefore before the user ISR is called we must be sure that the lowest priority channel has completed its transfer and it is this channel that has its IRQ enabled.
     We clear both flags here to avoid checking which is active as the active IRQ is chosen in the start_SYNC function (saves a couple of cycles).
     Activating only one of the IRQs avoids servicing an extra IRQ stack operation and dealing with pending IRQs.
     */
-    static uint32_t db_sync_phase = 0;
-    static uint32_t dbA_rx_L = (uint32_t)&SGTL5000::I2S_RX_Buffer[0];                                   // Pre-compute buffer offsets etc to save cycles in ISR
-    static uint32_t dbA_rx_R = (uint32_t)&SGTL5000::I2S_RX_Buffer[SGTL5000::RX_block_size / 2];
-    static uint32_t dbA_tx_L = (uint32_t)&SGTL5000::I2S_TX_Buffer[0];
-    static uint32_t dbA_tx_R = (uint32_t)&SGTL5000::I2S_TX_Buffer[SGTL5000::TX_block_size / 2];
-    static uint32_t dbB_rx_L = (uint32_t)&SGTL5000::I2S_RX_Buffer[8];
-    static uint32_t dbB_rx_R = (uint32_t)&SGTL5000::I2S_RX_Buffer[8 + (SGTL5000::RX_block_size / 2)];
-    static uint32_t dbB_tx_L = (uint32_t)&SGTL5000::I2S_TX_Buffer[8];
-    static uint32_t dbB_tx_R = (uint32_t)&SGTL5000::I2S_TX_Buffer[8 + (SGTL5000::TX_block_size / 2)];
+    static uint32_t db_phase = 0;
 
     DMA0->CINT = DMA_CINT_CINT(SGTL5000::RX_DMAch);                                                 // Clear RX DMA IRQ flag
     DMA0->CINT = DMA_CINT_CINT(SGTL5000::TX_DMAch);                                                 // Clear TX DMA IRQ flag
 
-    if(db_sync_phase) {                                                                                 // Swap double buffer pointers with pre-computed indecies
-        *SGTL5000::BufRX_L_safe  = dbB_rx_L;
-        *SGTL5000::BufRX_R_safe  = dbB_rx_R;
-        *SGTL5000::BufTX_L_safe  = dbB_tx_L;
-        *SGTL5000::BufTX_R_safe  = dbB_tx_R;
-        --db_sync_phase;
-    } else {
-        *SGTL5000::BufRX_L_safe  = dbA_rx_L;
-        *SGTL5000::BufRX_R_safe  = dbA_rx_R;
-        *SGTL5000::BufTX_L_safe  = dbA_tx_L;
-        *SGTL5000::BufTX_R_safe  = dbA_tx_R;
-        ++db_sync_phase;
+    register uint32_t BU_RX_L;
+    register uint32_t BU_RX_R;
+    register uint32_t BU_TX_L;
+    register uint32_t BU_TX_R;
+    register uint32_t DB_PHASE;
+    __asm {
+        LDR     DB_PHASE, [&db_phase]
+        LDR     BU_RX_L, [SGTL5000::BufRX_L_safe]                       // Pipeline memory access
+        LDR     BU_RX_R, [SGTL5000::BufRX_R_safe]
+        LDR     BU_TX_L, [SGTL5000::BufTX_L_safe]
+        LDR     BU_TX_R, [SGTL5000::BufTX_R_safe]
+
+        TEQ     DB_PHASE, #0x0
+        IT      EQ
+        BEQ     buf_base
+
+        ADD     BU_RX_L, #32
+        ADD     BU_RX_R, #32
+        ADD     BU_TX_L, #32
+        ADD     BU_TX_R, #32
+        SUB     DB_PHASE, #0x1
+        B       store
+
+        buf_base:
+        SUB     BU_RX_L, #32
+        SUB     BU_RX_R, #32
+        SUB     BU_TX_L, #32
+        SUB     BU_TX_R, #32
+        ADD     DB_PHASE, #0x1
+
+        store:
+        STR     BU_RX_L, [SGTL5000::BufRX_L_safe]                       // Pipeline memory access
+        STR     BU_RX_R, [SGTL5000::BufRX_R_safe]
+        STR     BU_TX_L, [SGTL5000::BufTX_L_safe]
+        STR     BU_TX_R, [SGTL5000::BufTX_R_safe]
+        STR     DB_PHASE, [&db_phase]
     }
-    
-    if(SGTL5000::SYNC_attach_type) {                                                                    // Trigger swIRQ or call Callback
-        if(NVIC_GetActive(SGTL5000::SYNC_swIRQ) == 0) NVIC->STIR = SGTL5000::SYNC_swIRQ;
-    } else SGTL5000::SYNC_user_func.call();
+
+    if(!NVIC_GetActive(SGTL5000::SYNC_swIRQ)) NVIC->STIR = SGTL5000::SYNC_swIRQ;    // Trigger swIRQ or call Callback
+}
+
+void SGTL5000::sync_dma_ISR(void)
+{
+    static uint32_t db_phase = 0;
+
+    DMA0->CINT = DMA_CINT_CINT(SGTL5000::RX_DMAch);                             // Clear RX DMA IRQ flag
+    DMA0->CINT = DMA_CINT_CINT(SGTL5000::TX_DMAch);                             // Clear TX DMA IRQ flag
+
+    register uint32_t BU_RX_L;
+    register uint32_t BU_RX_R;
+    register uint32_t BU_TX_L;
+    register uint32_t BU_TX_R;
+    register uint32_t DB_PHASE;
+    __asm {
+        LDR     DB_PHASE, [&db_phase]
+        LDR     BU_RX_L, [SGTL5000::BufRX_L_safe]                               // Pipeline memory access
+        LDR     BU_RX_R, [SGTL5000::BufRX_R_safe]
+        LDR     BU_TX_L, [SGTL5000::BufTX_L_safe]
+        LDR     BU_TX_R, [SGTL5000::BufTX_R_safe]
+
+        TEQ     DB_PHASE, #0x0
+        IT      EQ
+        BEQ     buf_base
+
+        ADD     BU_RX_L, #32
+        ADD     BU_RX_R, #32
+        ADD     BU_TX_L, #32
+        ADD     BU_TX_R, #32
+        SUB     DB_PHASE, #0x1
+        B       store
+
+        buf_base:
+        SUB     BU_RX_L, #32
+        SUB     BU_RX_R, #32
+        SUB     BU_TX_L, #32
+        SUB     BU_TX_R, #32
+        ADD     DB_PHASE, #0x1
+
+        store:
+        STR     BU_RX_L, [SGTL5000::BufRX_L_safe]                               // Pipeline memory access
+        STR     BU_RX_R, [SGTL5000::BufRX_R_safe]
+        STR     BU_TX_L, [SGTL5000::BufTX_L_safe]
+        STR     BU_TX_R, [SGTL5000::BufTX_R_safe]
+        STR     DB_PHASE, [&db_phase]
+    }
+
+    SGTL5000::SYNC_user_func.call();                                             // Callback user function
+
 }
 
 
@@ -531,7 +602,7 @@
 {
     if(TX_attached) return -1;                                                                          // Assign Callback function
     SGTL5000::TX_user_func = func;
-    SGTL5000::TX_attach_type = 0;
+    TX_attach_type = 0;
     TX_attached = true;
     return 0;
 }
@@ -542,7 +613,7 @@
     NVIC_SetVector(SGTL5000::TX_swIRQ, user_ISR);
     NVIC_SetPriority(SGTL5000::TX_swIRQ, irq_pri);
     NVIC_EnableIRQ(SGTL5000::TX_swIRQ);
-    SGTL5000::TX_attach_type = 1;
+    TX_attach_type = 1;
     TX_attached = true;
 }
 
@@ -555,14 +626,16 @@
 int32_t SGTL5000::start_TX(uint32_t BufTX_L_safe, uint32_t BufTX_R_safe,
                            uint32_t block_size, bool _packed_TX, bool _TX_shift, uint32_t _TX_DMAch, uint32_t DMA_irq_pri)
 {
-    if(!TX_attached && !SGTL5000::TX_attach_type) return -1;                                            // Check we have a handler if using callback
+    if(!TX_attached && !TX_attach_type) return -1;                                                      // Check we have a handler if using callback
     if(SYNC_run || TX_run) return -1;                                                                   // Check if i2s is already started on tx
     if(_TX_DMAch > 15) return -1;                                                                       // Sanity check DMAMUX channels
     if (!(block_size == 2 || block_size == 4 || block_size == 8)) return -1;                            // Only accept block size 2^n within range.
-    SGTL5000::TX_block_size = block_size;
+    TX_block_size = block_size;
     TX_bs_bytes = block_size * 4;
     SGTL5000::BufTX_L_safe = (uint32_t*)BufTX_L_safe;                                                   // Assign the users pointer addresses
     SGTL5000::BufTX_R_safe = (uint32_t*)BufTX_R_safe;
+    *SGTL5000::BufTX_L_safe = (uint32_t)&I2S_TX_Buffer[8];
+    *SGTL5000::BufTX_R_safe = (uint32_t)&I2S_TX_Buffer[8 + (TX_block_size / 2)];
     packed_TX = _packed_TX;
     if(packed_TX) TX_shift = false;
     else TX_shift = _TX_shift;
@@ -570,10 +643,11 @@
     TX_run = true;
     init_DMA();
 
-    I2S0->TCR1 = (I2S0->TCR1 & ~I2S_TCR1_TFW_MASK) | I2S_TCR1_TFW(8 - SGTL5000::TX_block_size);         // Set TX FIFO watermark
+    I2S0->TCR1 = (I2S0->TCR1 & ~I2S_TCR1_TFW_MASK) | I2S_TCR1_TFW(8 - TX_block_size);                   // Set TX FIFO watermark
     I2S0->TCSR |= I2S_TCSR_TE_MASK;                                                                     // TX enable
     I2S0->TCR3 = (I2S0->TCR3 & ~I2S_TCR3_TCE_MASK) | I2S_TCR3_TCE(1);                                   // Enable TX channel 0.
-    NVIC_SetVector((IRQn)SGTL5000::TX_DMAch, (uint32_t)&SGTL5000::tx_dma_ISR);                          // Set DMA TX handler vector
+    if(TX_attach_type) NVIC_SetVector((IRQn)SGTL5000::TX_DMAch, (uint32_t)&SGTL5000::tx_dma_ISR_NB);    // Set DMA TX handler vector
+    else NVIC_SetVector((IRQn)SGTL5000::TX_DMAch, (uint32_t)&SGTL5000::tx_dma_ISR);
     NVIC_SetPriority((IRQn)SGTL5000::TX_DMAch, DMA_irq_pri);                                            // Set irq priorities the same
     NVIC_EnableIRQ((IRQn)SGTL5000::TX_DMAch);                                                           // Enable IRQ for chosen TX DMA channel
     NVIC_SetVector(I2S0_Tx_IRQn, (uint32_t)&SGTL5000::tx_I2S_ISR);                                      // Set vector for TX word start ISR
@@ -592,35 +666,83 @@
     TX_run = false;
 }
 
-void SGTL5000::tx_dma_ISR(void)
+void SGTL5000::tx_dma_ISR_NB(void)
 {
     static uint32_t db_phase = 0;
-    static uint32_t dbA_tx_L = (uint32_t)&SGTL5000::I2S_TX_Buffer[0];                                   // Pre-compute buffer offsets etc to save cycles in ISR
-    static uint32_t dbA_tx_R = (uint32_t)&SGTL5000::I2S_TX_Buffer[SGTL5000::TX_block_size / 2];
-    static uint32_t dbB_tx_L = (uint32_t)&SGTL5000::I2S_TX_Buffer[8];
-    static uint32_t dbB_tx_R = (uint32_t)&SGTL5000::I2S_TX_Buffer[8 + (SGTL5000::TX_block_size / 2)];
 
     DMA0->CINT = DMA_CINT_CINT(SGTL5000::TX_DMAch);
 
-    if(db_phase) {                                                                                      // Swap double buffer pointers with pre-computed indecies
-        *SGTL5000::BufTX_L_safe  = dbB_tx_L;
-        *SGTL5000::BufTX_R_safe  = dbB_tx_R;
-        --db_phase;
-    } else {
-        *SGTL5000::BufTX_L_safe  = dbA_tx_L;
-        *SGTL5000::BufTX_R_safe  = dbA_tx_R;
-        ++db_phase;
+    register uint32_t BU_TX_L;
+    register uint32_t BU_TX_R;
+    register uint32_t DB_PHASE;
+    __asm {
+        LDR     DB_PHASE, [&db_phase]                                           // Pipeline memory access
+        LDR     BU_TX_L, [SGTL5000::BufTX_L_safe]
+        LDR     BU_TX_R, [SGTL5000::BufTX_R_safe]
+
+        TEQ     DB_PHASE, #0x0
+        IT      EQ
+        BEQ     buf_base
+
+        ADD     BU_TX_L, #32
+        ADD     BU_TX_R, #32
+        SUB     DB_PHASE, #0x1
+        B       store
+
+        buf_base:
+        SUB     BU_TX_L, #32
+        SUB     BU_TX_R, #32
+        ADD     DB_PHASE, #0x1
+
+        store:                                                                  // Pipeline memory access
+        STR     BU_TX_L, [SGTL5000::BufTX_L_safe]
+        STR     BU_TX_R, [SGTL5000::BufTX_R_safe]
+        STR     DB_PHASE, [&db_phase]
     }
-    if(SGTL5000::TX_attach_type) {
-        if(NVIC_GetActive(SGTL5000::TX_swIRQ) == 0) NVIC->STIR = SGTL5000::TX_swIRQ;                    // Trigger swIRQ or call Callback
-    } else SGTL5000::TX_user_func.call();
+    if(!NVIC_GetActive(SGTL5000::TX_swIRQ)) NVIC->STIR = SGTL5000::TX_swIRQ;// Trigger swIRQ if not still processing
+}
+
+void SGTL5000::tx_dma_ISR(void)
+{
+    static uint32_t db_phase = 0;
+
+    DMA0->CINT = DMA_CINT_CINT(SGTL5000::TX_DMAch);
+
+    register uint32_t BU_TX_L;
+    register uint32_t BU_TX_R;
+    register uint32_t DB_PHASE;
+    __asm {
+        LDR     DB_PHASE, [&db_phase]                                           // Pipeline memory access
+        LDR     BU_TX_L, [SGTL5000::BufTX_L_safe]
+        LDR     BU_TX_R, [SGTL5000::BufTX_R_safe]
+
+        TEQ     DB_PHASE, #0x0
+        IT      EQ
+        BEQ     buf_base
+
+        ADD     BU_TX_L, #32
+        ADD     BU_TX_R, #32
+        SUB     DB_PHASE, #0x1
+        B       store
+
+        buf_base:
+        SUB     BU_TX_L, #32
+        SUB     BU_TX_R, #32
+        ADD     DB_PHASE, #0x1
+
+        store:                                                                  // Pipeline memory access
+        STR     BU_TX_L, [SGTL5000::BufTX_L_safe]
+        STR     BU_TX_R, [SGTL5000::BufTX_R_safe]
+        STR     DB_PHASE, [&db_phase]
+    }
+    SGTL5000::TX_user_func.call();                                              // Callback user function
 }
 
 int32_t SGTL5000::attach_RX(Callback<void()> func)
 {
     if(RX_attached) return -1;                                                                          // Assign Callback function
     SGTL5000::RX_user_func = func;
-    SGTL5000::RX_attach_type = 0;
+    RX_attach_type = 0;
     RX_attached = true;
     return 0;
 }
@@ -631,7 +753,7 @@
     NVIC_SetVector(SGTL5000::RX_swIRQ, user_ISR);
     NVIC_SetPriority(SGTL5000::RX_swIRQ, irq_pri);
     NVIC_EnableIRQ(SGTL5000::RX_swIRQ);
-    SGTL5000::RX_attach_type = 1;
+    RX_attach_type = 1;
     RX_attached = true;
 }
 
@@ -644,14 +766,16 @@
 int32_t SGTL5000::start_RX(uint32_t BufRX_L_safe, uint32_t BufRX_R_safe,
                            uint32_t block_size, bool _packed_RX, bool _RX_shift, uint32_t _RX_DMAch, uint32_t DMA_irq_pri)
 {
-    if(!RX_attached && !SGTL5000::RX_attach_type) return -1;                                            // Check we have a handler if using callback
+    if(!RX_attached && !RX_attach_type) return -1;                                                      // Check we have a handler if using callback
     if(SYNC_run || RX_run) return -1;                                                                   // Check if i2s is already started on rx
     if(_RX_DMAch > 15) return -1;                                                                       // Sanity check DMAMUX channels
     if (!(block_size == 2 || block_size == 4 || block_size == 8)) return -1;                            // Only accept block size 2^n within range.
-    SGTL5000::RX_block_size = block_size;
+    RX_block_size = block_size;
     RX_bs_bytes = block_size * 4;
     SGTL5000::BufRX_L_safe = (uint32_t*)BufRX_L_safe;                                                   // Assign the users pointer addresses
     SGTL5000::BufRX_R_safe = (uint32_t*)BufRX_R_safe;
+    *SGTL5000::BufRX_L_safe = (uint32_t)&I2S_RX_Buffer[8];
+    *SGTL5000::BufRX_R_safe = (uint32_t)&I2S_RX_Buffer[8 + (RX_block_size / 2)];
     packed_RX = _packed_RX;
     if(packed_RX) RX_shift = false;
     else RX_shift = _RX_shift;
@@ -659,11 +783,11 @@
     RX_run = true;
     init_DMA();
 
-    I2S0->RCR1 = (I2S0->RCR1 & ~I2S_RCR1_RFW_MASK) | I2S_RCR1_RFW(SGTL5000::RX_block_size - 1);         // Set RX FIFO watermark
+    I2S0->RCR1 = (I2S0->RCR1 & ~I2S_RCR1_RFW_MASK) | I2S_RCR1_RFW(RX_block_size - 1);                   // Set RX FIFO watermark
     I2S0->RCSR |= I2S_RCSR_RE_MASK;                                                                     // RX enable
     I2S0->RCR3 = (I2S0->RCR3 & ~I2S_RCR3_RCE_MASK) | I2S_RCR3_RCE(1);                                   // Enable RX channel 0.
-    //SGTL5000::_db_rx_phase = 0;
-    NVIC_SetVector((IRQn)SGTL5000::RX_DMAch, (uint32_t)&SGTL5000::rx_dma_ISR);                          // Set DMA RX handler vector
+    if(RX_attach_type) NVIC_SetVector((IRQn)SGTL5000::RX_DMAch, (uint32_t)&SGTL5000::rx_dma_ISR_NB);    // Set DMA RX handler vector
+    else NVIC_SetVector((IRQn)SGTL5000::RX_DMAch, (uint32_t)&SGTL5000::rx_dma_ISR);
     NVIC_SetPriority((IRQn)SGTL5000::RX_DMAch, DMA_irq_pri);                                            // Set irq priorities the same
     NVIC_EnableIRQ((IRQn)SGTL5000::RX_DMAch);                                                           // Enable IRQ for chosen RX DMA channel
     NVIC_SetVector(I2S0_Rx_IRQn, (uint32_t)&SGTL5000::rx_I2S_ISR);                                      // Set vector for RX word start ISR
@@ -682,28 +806,78 @@
     RX_run = false;
 }
 
-void SGTL5000::rx_dma_ISR(void)
+void SGTL5000::rx_dma_ISR_NB(void)
 {
     static uint32_t db_phase = 0;
-    static uint32_t dbA_rx_L = (uint32_t)&SGTL5000::I2S_RX_Buffer[0];                                   // Pre-compute buffer offsets etc to save cycles in ISR
-    static uint32_t dbA_rx_R = (uint32_t)&SGTL5000::I2S_RX_Buffer[SGTL5000::RX_block_size / 2];
-    static uint32_t dbB_rx_L = (uint32_t)&SGTL5000::I2S_RX_Buffer[8];
-    static uint32_t dbB_rx_R = (uint32_t)&SGTL5000::I2S_RX_Buffer[8 + (SGTL5000::RX_block_size / 2)];
 
     DMA0->CINT = DMA_CINT_CINT(SGTL5000::RX_DMAch);
 
-    if(db_phase) {                                                                                      // Swap double buffer pointers with pre-computed indecies
-        *SGTL5000::BufRX_L_safe  = dbB_rx_L;
-        *SGTL5000::BufRX_R_safe  = dbB_rx_R;
-        --db_phase;
-    } else {
-        *SGTL5000::BufRX_L_safe  = dbA_rx_L;
-        *SGTL5000::BufRX_R_safe  = dbA_rx_R;
-        ++db_phase;
+    register uint32_t BU_RX_L;
+    register uint32_t BU_RX_R;
+    register uint32_t DB_PHASE;
+    __asm {
+        LDR     DB_PHASE, [&db_phase]                                           // Pipeline memory access
+        LDR     BU_RX_L, [SGTL5000::BufRX_L_safe]
+        LDR     BU_RX_R, [SGTL5000::BufRX_R_safe]
+
+        TEQ     DB_PHASE, #0x0
+        IT      EQ
+        BEQ     buf_base
+
+        ADD     BU_RX_L, #32
+        ADD     BU_RX_R, #32
+        SUB     DB_PHASE, #0x1
+        B       store
+
+        buf_base:
+        SUB     BU_RX_L, #32
+        SUB     BU_RX_R, #32
+        ADD     DB_PHASE, #0x1
+
+        store:                                                                  // Pipeline memory access
+        STR     BU_RX_L, [SGTL5000::BufRX_L_safe]
+        STR     BU_RX_R, [SGTL5000::BufRX_R_safe]
+        STR     DB_PHASE, [&db_phase]
     }
-    if(SGTL5000::RX_attach_type) {                                                                      // Trigger swIRQ or call Callback
-        if(NVIC_GetActive(SGTL5000::RX_swIRQ) == 0) NVIC->STIR = SGTL5000::RX_swIRQ;
-    } else SGTL5000::RX_user_func.call();
+
+    if(!NVIC_GetActive(SGTL5000::RX_swIRQ)) NVIC->STIR = SGTL5000::RX_swIRQ;// Trigger swIRQ if not still processing
+
+}
+
+void SGTL5000::rx_dma_ISR(void)
+{
+    static uint32_t db_phase = 0;
+
+    DMA0->CINT = DMA_CINT_CINT(SGTL5000::RX_DMAch);
+
+    register uint32_t BU_RX_L;
+    register uint32_t BU_RX_R;
+    register uint32_t DB_PHASE;
+    __asm {
+        LDR     DB_PHASE, [&db_phase]                                 // Pipeline memory access
+        LDR     BU_RX_L, [SGTL5000::BufRX_L_safe]
+        LDR     BU_RX_R, [SGTL5000::BufRX_R_safe]
+
+        TEQ     DB_PHASE, #0x0
+        IT      EQ
+        BEQ     buf_base
+
+        ADD     BU_RX_L, #32
+        ADD     BU_RX_R, #32
+        SUB     DB_PHASE, #0x1
+        B       store
+
+        buf_base:
+        SUB     BU_RX_L, #32
+        SUB     BU_RX_R, #32
+        ADD     DB_PHASE, #0x1
+
+        store:                                                                  // Pipeline memory access
+        STR     BU_RX_L, [SGTL5000::BufRX_L_safe]
+        STR     BU_RX_R, [SGTL5000::BufRX_R_safe]
+        STR     DB_PHASE, [&db_phase]
+    }
+    SGTL5000::RX_user_func.call();                                              // Callback user function
 }
 
 
@@ -721,12 +895,12 @@
     Block Size = 2                                                            Block Size = 4                                                                Block Size = 8
     Double Buffer A                     Double Buffer B                       Double Buffer A                       Double Buffer B                         Double Buffer A                          Double Buffer B
     |L0:x|R0:x|x:x|x:x|x:x|x:x|x:x|x:x||L0:x|R0:x|x:x|x:x|x:x|x:x|x:x|x:x|    |L0:x|L1:x|R0:x|R1:x|x:x|x:x|x:x|x:x||L0:x|L1:x|R0:x|R1:x|x:x|x:x|x:x|x:x|    |L0:x|L1:x|L2:x|L3:x|R0:x|R1:x|R2:x|R3:x||L0:x|L1:x|L2:x|L3:x|R0:x|R1:x|R2:x|R3:x|
-    
+
     Packed buffer layout:
     Block Size = 2                                                            Block Size = 4                                                                Block Size = 8
     Double Buffer A                     Double Buffer B                       Double Buffer A                       Double Buffer B                         Double Buffer A                          Double Buffer B
     |L0:x|R0:x|x:x|x:x|x:x|x:x|x:x|x:x||L0:x|R0:x|x:x|x:x|x:x|x:x|x:x|x:x|    |L0:L1|x:x|R0:R1|x:x|x:x|x:x|x:x|x:x||L0:L1|x:x|R0:R1|x:x|x:x|x:x|x:x|x:x|    |L0:L1|L2:L3|x:x|x:x|R0:R1|R2:R3|x:x|x:x||L0:L1|L2:L3|x:x|x:x|R0:R1|R2:R3|x:x|x:x|
-    
+
     The users pointers are always updated to point to L0 & R0 of the current safe double buffer area.
 
     */
@@ -762,26 +936,26 @@
         DMA0->TCD[SGTL5000::RX_DMAch].CSR &= ~DMA_CSR_MAJORELINK_MASK;                                                                                                                      // Disable major loop linking
         DMA0->TCD[SGTL5000::RX_DMAch].CSR |= DMA_CSR_INTMAJOR_MASK;                                                                                                                         // Enable IRQ at Completion of Major cycle
         DMA0->TCD[SGTL5000::RX_DMAch].ATTR = DMA_ATTR_SMOD(0) | DMA_ATTR_SSIZE(1) | DMA_ATTR_DMOD(0) | DMA_ATTR_DSIZE(1);                                                                   // Set data transfer size @ 16bits per memory access across the memory bus
-        if(RX_shift) DMA0->TCD[SGTL5000::RX_DMAch].DADDR = (uint32_t)&SGTL5000::I2S_RX_Buffer[0] + 2;
-        else DMA0->TCD[SGTL5000::RX_DMAch].DADDR = (uint32_t)&SGTL5000::I2S_RX_Buffer[0];
+        if(RX_shift) DMA0->TCD[SGTL5000::RX_DMAch].DADDR = (uint32_t)&I2S_RX_Buffer[0] + 2;
+        else DMA0->TCD[SGTL5000::RX_DMAch].DADDR = (uint32_t)&I2S_RX_Buffer[0];
         DMA0->TCD[SGTL5000::RX_DMAch].SADDR = (uint32_t)&I2S0->RDR[0];                                                                                                                      // Set rxDMA Source addr pointer to 1st bit of I2S RX Data Reg
         DMA0->TCD[SGTL5000::RX_DMAch].SOFF = 0;                                                                                                                                             // Signed Source offset set to zero (always read from RDR[0]).
         DMA0->TCD[SGTL5000::RX_DMAch].DOFF = (RX_bs_bytes / 2);                                                                                                                        // After each write step into upper half of the buffer
         DMA0->TCD[SGTL5000::RX_DMAch].CITER_ELINKNO &= ~DMA_CITER_ELINKNO_ELINK_MASK;                                                                                                       // Disable channel linking minor loop
-        DMA0->TCD[SGTL5000::RX_DMAch].CITER_ELINKNO = (DMA0->TCD[SGTL5000::RX_DMAch].CITER_ELINKNO & ~DMA_CITER_ELINKNO_CITER_MASK) | DMA_CITER_ELINKNO_CITER(SGTL5000::RX_block_size / 2); // Major loop current iter count starting value
+        DMA0->TCD[SGTL5000::RX_DMAch].CITER_ELINKNO = (DMA0->TCD[SGTL5000::RX_DMAch].CITER_ELINKNO & ~DMA_CITER_ELINKNO_CITER_MASK) | DMA_CITER_ELINKNO_CITER(RX_block_size / 2); // Major loop current iter count starting value
         DMA0->TCD[SGTL5000::RX_DMAch].BITER_ELINKNO &= ~DMA_BITER_ELINKNO_ELINK_MASK;                                                                                                       // Disable channel linking minor loop
-        DMA0->TCD[SGTL5000::RX_DMAch].BITER_ELINKNO = (DMA0->TCD[SGTL5000::RX_DMAch].BITER_ELINKNO & ~DMA_BITER_ELINKNO_BITER_MASK) | DMA_BITER_ELINKNO_BITER(SGTL5000::RX_block_size / 2); // Major loop iter count to load again at after major completes
+        DMA0->TCD[SGTL5000::RX_DMAch].BITER_ELINKNO = (DMA0->TCD[SGTL5000::RX_DMAch].BITER_ELINKNO & ~DMA_BITER_ELINKNO_BITER_MASK) | DMA_BITER_ELINKNO_BITER(RX_block_size / 2); // Major loop iter count to load again at after major completes
         DMA0->TCD[SGTL5000::RX_DMAch].DLAST_SGA = (uint32_t)&SG_rx_TCD_B[0];     // Set scatter gather TCD definition location
         DMA0->TCD[SGTL5000::RX_DMAch].SLAST = 0;
         memcpy(&SG_rx_TCD_A[0], (void*)&DMA0->TCD[SGTL5000::RX_DMAch], 32);                                                                                                                 // Copy TCD A to memory
         DMA0->TCD[SGTL5000::RX_DMAch].DLAST_SGA = (uint32_t)&SG_rx_TCD_A[0];                                                                                                                // Set scatter gather TCD definition location
-        if(RX_shift) DMA0->TCD[SGTL5000::RX_DMAch].DADDR = (uint32_t)&SGTL5000::I2S_RX_Buffer[8] + 2;                                                                             // Swap RX double buffer
-        else DMA0->TCD[SGTL5000::RX_DMAch].DADDR = (uint32_t)&SGTL5000::I2S_RX_Buffer[8];
+        if(RX_shift) DMA0->TCD[SGTL5000::RX_DMAch].DADDR = (uint32_t)&I2S_RX_Buffer[8] + 2;                                                                             // Swap RX double buffer
+        else DMA0->TCD[SGTL5000::RX_DMAch].DADDR = (uint32_t)&I2S_RX_Buffer[8];
         memcpy(&SG_rx_TCD_B[0], (void*)&DMA0->TCD[SGTL5000::RX_DMAch], 32);                                                                                                                 // Copy TCD B to memory
         // Set TCD elements in the DMA controller back to initial TCD A
         DMA0->TCD[SGTL5000::RX_DMAch].DLAST_SGA = (uint32_t)&SG_rx_TCD_B[0];                                                                                                                // Set scatter gather TCD definition location
-        if(RX_shift) DMA0->TCD[SGTL5000::RX_DMAch].DADDR = (uint32_t)&SGTL5000::I2S_RX_Buffer[0] + 2;
-        else DMA0->TCD[SGTL5000::RX_DMAch].DADDR = (uint32_t)&SGTL5000::I2S_RX_Buffer[0];
+        if(RX_shift) DMA0->TCD[SGTL5000::RX_DMAch].DADDR = (uint32_t)&I2S_RX_Buffer[0] + 2;
+        else DMA0->TCD[SGTL5000::RX_DMAch].DADDR = (uint32_t)&I2S_RX_Buffer[0];
     }
 
 
@@ -805,25 +979,25 @@
         DMA0->TCD[SGTL5000::TX_DMAch].CSR &= ~DMA_CSR_MAJORELINK_MASK;                                                                                                                      // Disable major loop linking
         DMA0->TCD[SGTL5000::TX_DMAch].CSR |= DMA_CSR_INTMAJOR_MASK;                                                                                                                         // Enable IRQ at Completion of Major cycle
         DMA0->TCD[SGTL5000::TX_DMAch].ATTR = DMA_ATTR_SMOD(0) | DMA_ATTR_SSIZE(1) | DMA_ATTR_DMOD(0) | DMA_ATTR_DSIZE(1);                                                                   // Set data transfer size @ 16bits per memory access across the memory bus
-        if(TX_shift) DMA0->TCD[SGTL5000::TX_DMAch].SADDR = (uint32_t)&SGTL5000::I2S_TX_Buffer[0] + 2;
-        else DMA0->TCD[SGTL5000::TX_DMAch].SADDR = (uint32_t)&SGTL5000::I2S_TX_Buffer[0];
+        if(TX_shift) DMA0->TCD[SGTL5000::TX_DMAch].SADDR = (uint32_t)&I2S_TX_Buffer[0] + 2;
+        else DMA0->TCD[SGTL5000::TX_DMAch].SADDR = (uint32_t)&I2S_TX_Buffer[0];
         DMA0->TCD[SGTL5000::TX_DMAch].DADDR = (uint32_t)&I2S0->TDR[0];                                                                                                                      // Set rxDMA Source addr pointer to 1st bit of I2S RX Data Reg
         DMA0->TCD[SGTL5000::TX_DMAch].DOFF = 0;                                                                                                                                             // Signed Source offset set to zero (always write TDR[0]).
         DMA0->TCD[SGTL5000::TX_DMAch].SOFF = (TX_bs_bytes / 2);                                                                                                                             // After each write step into upper half of the buffer
         DMA0->TCD[SGTL5000::TX_DMAch].CITER_ELINKNO &= ~DMA_CITER_ELINKNO_ELINK_MASK;                                                                                                       // Disable channel linking minor loop
-        DMA0->TCD[SGTL5000::TX_DMAch].CITER_ELINKNO = (DMA0->TCD[SGTL5000::TX_DMAch].CITER_ELINKNO & ~DMA_CITER_ELINKNO_CITER_MASK) | DMA_CITER_ELINKNO_CITER(SGTL5000::TX_block_size / 2); // Major loop current iter count starting value
+        DMA0->TCD[SGTL5000::TX_DMAch].CITER_ELINKNO = (DMA0->TCD[SGTL5000::TX_DMAch].CITER_ELINKNO & ~DMA_CITER_ELINKNO_CITER_MASK) | DMA_CITER_ELINKNO_CITER(TX_block_size / 2); // Major loop current iter count starting value
         DMA0->TCD[SGTL5000::TX_DMAch].BITER_ELINKNO &= ~DMA_BITER_ELINKNO_ELINK_MASK;                                                                                                       // Disable channel linking minor loop
-        DMA0->TCD[SGTL5000::TX_DMAch].BITER_ELINKNO = (DMA0->TCD[SGTL5000::TX_DMAch].BITER_ELINKNO & ~DMA_BITER_ELINKNO_BITER_MASK) | DMA_BITER_ELINKNO_BITER(SGTL5000::TX_block_size / 2); // Major loop iter count to load again at after major completes                                                                                         // Reset dest addr to start address.
+        DMA0->TCD[SGTL5000::TX_DMAch].BITER_ELINKNO = (DMA0->TCD[SGTL5000::TX_DMAch].BITER_ELINKNO & ~DMA_BITER_ELINKNO_BITER_MASK) | DMA_BITER_ELINKNO_BITER(TX_block_size / 2); // Major loop iter count to load again at after major completes                                                                                         // Reset dest addr to start address.
         DMA0->TCD[SGTL5000::TX_DMAch].DLAST_SGA = (uint32_t)&SG_tx_TCD_B[0];                                                                                                                // Set scatter gather TCD definition location
         memcpy(&SG_tx_TCD_A[0], (void*)&DMA0->TCD[SGTL5000::TX_DMAch], 32);                                                                                                                 // Copy TCD A to memory
         DMA0->TCD[SGTL5000::TX_DMAch].DLAST_SGA = (uint32_t)&SG_tx_TCD_A[0];                                                                                                                // Set scatter gather TCD definition location
-        if(TX_shift) DMA0->TCD[SGTL5000::TX_DMAch].SADDR = (uint32_t)&SGTL5000::I2S_TX_Buffer[8] + 2;                                                                             // Swap TX double buffer
-        else DMA0->TCD[SGTL5000::TX_DMAch].SADDR = (uint32_t)&SGTL5000::I2S_TX_Buffer[8];
+        if(TX_shift) DMA0->TCD[SGTL5000::TX_DMAch].SADDR = (uint32_t)&I2S_TX_Buffer[8] + 2;                                                                             // Swap TX double buffer
+        else DMA0->TCD[SGTL5000::TX_DMAch].SADDR = (uint32_t)&I2S_TX_Buffer[8];
         memcpy(&SG_tx_TCD_B[0], (void*)&DMA0->TCD[SGTL5000::TX_DMAch], 32);                                                                                                                 // Copy TCD B to memory
         // Set TCD elements in the DMA controller back to initial TCD A
         DMA0->TCD[SGTL5000::TX_DMAch].DLAST_SGA = (uint32_t)&SG_tx_TCD_B[0];                                                                                                                // Set scatter gather TCD definition location
-        if(TX_shift) DMA0->TCD[SGTL5000::TX_DMAch].SADDR = (uint32_t)&SGTL5000::I2S_TX_Buffer[0] + 2;
-        else DMA0->TCD[SGTL5000::TX_DMAch].SADDR = (uint32_t)&SGTL5000::I2S_TX_Buffer[0];
+        if(TX_shift) DMA0->TCD[SGTL5000::TX_DMAch].SADDR = (uint32_t)&I2S_TX_Buffer[0] + 2;
+        else DMA0->TCD[SGTL5000::TX_DMAch].SADDR = (uint32_t)&I2S_TX_Buffer[0];
     }
 
     if(SYNC_run || RX_run) {
@@ -840,7 +1014,7 @@
 {
     //SGTL5000::debug[0] = packed_RX;
     //SGTL5000::debug[1] = packed_TX;
-    //SGTL5000::debug[2] = SGTL5000::I2S_RX_Buffer[0];
+    //SGTL5000::debug[2] = I2S_RX_Buffer[0];
     return SGTL5000::debug[index];
 };
 }
\ No newline at end of file