Test program for my Multi_WS2811 library that started out as a fork of heroic/WS2811. My library uses hardware DMA on the FRDM-KL25Z to drive up to 16 strings of WS2811 or WS2812 LEDs in parallel.
Dependencies: Multi_WS2811 mbed MMA8451Q
Fork of WS2811 by
NOTE: I have accidentally pushed changes for another fork of this program that I used in the recent Georgetown Carnival Power Tool Races. When I get some time, I will restore the test program to its original glory.
You can see my power tool racer (Nevermore's Revenge) here

This tests my FRDM-KL25Z multi-string WS2811/WS2812 library. It uses the accelerometer to change the rainbow phase on two strings of LEDs as well as the touch sense to change brightness.
A video of this program in operation is here.
Here is the library that I developed to run the LEDs:
Import libraryMulti_WS2811
Library allowing up to 16 strings of 60 WS2811 or WS2812 LEDs to be driven from a single FRDM-KL25Z board. Uses hardware DMA to do a full 800 KHz rate without much CPU burden.
Diff: WS2811.cpp
- Revision:
- 21:4541da183397
- Parent:
- 20:b9d76e567637
- Child:
- 23:33df42ff2541
--- a/WS2811.cpp Sat Dec 21 04:32:21 2013 +0000
+++ b/WS2811.cpp Thu Jan 02 00:50:09 2014 +0000
@@ -1,145 +1,394 @@
-// 800 KHz WS2811 driver, kinda.
-//
-// Parameterized and modified to use soft SPI.
-// Jas Strong <jasmine@electronpusher.org>
-// Modified to use hard SPI by Ned Konz <ned@bike-nomad.com>
-/*****************************************************************************/
-
-#include "LedStrip.h"
-#include "WS2811.h"
-extern void dump_spi_settings(SPI_Type const *spi);
-extern Serial pc;
-extern DigitalOut debugOut;
-
-static const unsigned DMA_MUX_SRC_SPI0_Transmit = 17;
-// const unsigned DMA_MUX_SRC_SPI1_Transmit = 19;
-
-static const unsigned dmaWriteChannel = 0;
-static const unsigned dmaXmitMuxSrc = DMA_MUX_SRC_SPI0_Transmit;
-
-static volatile bool dma_done = false;
-
-// 12.8 MHz => 800KHz bit rate (1.25 usec/byte)
-
-WS2811::WS2811(int n, SPI_Type *_spi, PinName _mosi, PinName sclk) :
- LedStrip(n),
- spi(_spi),
- mosi(_mosi)
-{
- SPI spitemp(_mosi, NC, sclk);
- spitemp.format(8,3);
- spitemp.frequency(800e3 * 16 * 2); // 12 MHz (48MHz/60) => 750KHz rate (1.33 usec/byte)
-
- //Enable DMA clocking
- SIM->SCGC6 |= SIM_SCGC6_DMAMUX_MASK; // Enable clock to DMA mux
- SIM->SCGC7 |= SIM_SCGC7_DMA_MASK; // Enable clock to DMA
-
- // reset DMAMUX
- DMAMUX0->CHCFG[dmaWriteChannel] = 0;
- DMAMUX0->CHCFG[dmaWriteChannel] = DMAMUX_CHCFG_ENBL_MASK | DMAMUX_CHCFG_SOURCE(dmaXmitMuxSrc);
-
- // Enable DMA features within the SPI registers
- spi->C1 |= SPI_C1_SPTIE_MASK | // enable transmit-interrupt
- SPI_C1_MSTR_MASK;
-}
-
-/*
- * These chips use a one-wire protocol based on a sort of NRZ signalling- jas.
- * Spec is 1.25usec +/- 600nsec => 650nsec to 1850nsec
- */
-
-
-void WS2811::startDMA()
-{
- DMA0->DMA[dmaWriteChannel].DSR_BCR = DMA_DSR_BCR_DONE_MASK; // clear/reset DMA status
- DMA0->DMA[dmaWriteChannel].SAR = (uint32_t)(void*)dmaBytes; // set source address
- DMA0->DMA[dmaWriteChannel].DAR = (uint32_t)(void*)&(spi->D); // set dest address: SPI0_Data register
- DMA0->DMA[dmaWriteChannel].DSR_BCR |= DMA_DSR_BCR_BCR_MASK & sizeof(dmaBytes); // length of transfer
- DMA0->DMA[dmaWriteChannel].DCR = DMA_DCR_EINT_MASK | // enable interrupt on end of transfer
- DMA_DCR_ERQ_MASK |
- DMA_DCR_SINC_MASK |
- DMA_DCR_SSIZE(0x01) |
- DMA_DCR_DSIZE(0x01) |
- // DMA_DCR_START_MASK |
- DMA_DCR_D_REQ_MASK; // clear ERQ on end of transfer
-
- dump_spi_settings(spi);
-
- debugOut = 1;
-
- while (!(spi->S & SPI_S_SPTEF_MASK))
- __NOP();
- spi->D = dmaBytes[0];
+// 800 KHz WS2811 driver driving potentially many LED strings.
+// Uses 3-phase DMA
+// 16K SRAM less stack, etc.
+//
+// Per LED: 3 bytes (malloc'd) for RGB data
+//
+// Per LED strip / per LED
+// 96 bytes (static) for bit data
+// + 96 bytes (static) for ones data
+// = 192 bytes
+//
+// 40 LEDs max per string = 7680 bytes static
+//
+// 40 LEDs: 7680 + 40*3 = 7800 bytes
+// 80 LEDs: 7680 + 80*3 = 7920 bytes
+
+#include "MKL25Z4.h"
+#include "LedStrip.h"
+#include "WS2811.h"
+
+//
+// Configuration
+//
+
+// Define MONITOR_TPM0_PWM as non-zero to monitor PWM timing on PTD0 and PTD1
+// PTD0 TPM0/CH0 PWM_1 J2/06
+// PTD1 TPM0/CH1 PWM_2 J2/12 (also LED_BLUE)
+#define MONITOR_TPM0_PWM 0
+
+// define DEBUG_PIN to identify a pin in PORTD used for debug output
+// #define DEBUG_PIN 3 /* PTD3 debugOut */
+
+#ifdef DEBUG_PIN
+#define DEBUG 1
+#endif
+
+#if DEBUG
+#define DEBUG_MASK (1<<DEBUG_PIN)
+#define RESET_DEBUG (IO_GPIO->PDOR &= ~DEBUG_MASK)
+#define SET_DEBUG (IO_GPIO->PDOR |= DEBUG_MASK)
+#else
+#define DEBUG_MASK 0
+#define RESET_DEBUG (void)0
+#define SET_DEBUG (void)0
+#endif
+
+static PORT_Type volatile * const IO_PORT = PORTD;
+static GPIO_Type volatile * const IO_GPIO = PTD;
+
+// 48 MHz clock, no prescaling.
+#define NSEC_TO_TICKS(nsec) ((nsec)*48/1000)
+static const uint32_t CLK_NSEC = 1250;
+static const uint32_t tpm_period = NSEC_TO_TICKS(CLK_NSEC);
+static const uint32_t tpm_p0_period = NSEC_TO_TICKS(CLK_NSEC / 3);
+static const uint32_t tpm_p1_period = NSEC_TO_TICKS(CLK_NSEC * 2 / 3);
+
+enum DMA_MUX_SRC
+{
+ DMA_MUX_SRC_TPM0_CH_0 = 24,
+ DMA_MUX_SRC_TPM0_CH_1,
+ DMA_MUX_SRC_TPM0_Overflow = 54,
+};
+
+enum DMA_CHAN
+{
+ DMA_CHAN_START = 0,
+ DMA_CHAN_0_LOW = 1,
+ DMA_CHAN_1_LOW = 2,
+ N_DMA_CHANNELS
+};
+
+static volatile bool dma_done = true;
+
+// class static
+bool WS2811::initialized = false;
+
+// class static
+uint32_t WS2811::enabledPins = 0;
+
+#define WORD_ALIGNED __attribute__ ((aligned(4)))
+
+#define DMA_LEADING_ZEROS 2
+#define BITS_PER_RGB 24
+#define DMA_TRAILING_ZEROS 1
+
+static struct
+{
+ uint32_t start_t1_low[ DMA_LEADING_ZEROS ];
+ uint32_t dmaWords[ BITS_PER_RGB * MAX_LEDS_PER_STRIP ];
+ uint32_t trailing_zeros_1[ DMA_TRAILING_ZEROS ];
+
+ uint32_t start_t0_high[ DMA_LEADING_ZEROS - 1 ];
+ uint32_t allOnes[ BITS_PER_RGB * MAX_LEDS_PER_STRIP ];
+ uint32_t trailing_zeros_2[ DMA_TRAILING_ZEROS + 1 ];
+} dmaData WORD_ALIGNED;
+
+// class static
+bool WS2811::is_dma_done()
+{
+ return dma_done;
+}
+
+// class static
+void WS2811::hw_init()
+{
+ if (initialized) return;
+
+ dma_data_init();
+ clock_init();
+ dma_init();
+ io_init();
+ tpm_init();
+
+ initialized = true;
+}
+
+// class static
+void WS2811::dma_data_init()
+{
+ memset(dmaData.allOnes, 0xFF, sizeof(dmaData.allOnes));
+
+#if DEBUG
+ for (unsigned i = 0; i < BITS_PER_RGB * MAX_LEDS_PER_STRIP; i++)
+ dmaData.dmaWords[i] = DEBUG_MASK;
+#endif
+}
+
+// class static
+
+/// Enable PORTD, DMA and TPM0 clocking
+void WS2811::clock_init()
+{
+ SIM->SCGC5 |= SIM_SCGC5_PORTD_MASK;
+ SIM->SCGC6 |= SIM_SCGC6_DMAMUX_MASK | SIM_SCGC6_TPM0_MASK; // Enable clock to DMA mux and TPM0
+ SIM->SCGC7 |= SIM_SCGC7_DMA_MASK; // Enable clock to DMA
+
+ SIM->SOPT2 |= SIM_SOPT2_TPMSRC(1); // Clock source: MCGFLLCLK or MCGPLLCLK
+}
+
+// class static
+
+/// Configure GPIO output pins
+void WS2811::io_init()
+{
+ uint32_t m = 1;
+ for (uint32_t i = 0; i < 32; i++)
+ {
+ // set up each pin
+ if (m & enabledPins)
+ {
+ IO_PORT->PCR[i] = PORT_PCR_MUX(1) // GPIO
+ | PORT_PCR_DSE_MASK; // high drive strength
+ }
+ m <<= 1;
+ }
+
+ IO_GPIO->PDDR |= enabledPins; // set as outputs
+
+#if MONITOR_TPM0_PWM
+ // PTD0 CH0 monitor: TPM0, high drive strength
+ IO_PORT->PCR[0] = PORT_PCR_MUX(4) | PORT_PCR_DSE_MASK;
+ // PTD1 CH1 monitor: TPM0, high drive strength
+ IO_PORT->PCR[1] = PORT_PCR_MUX(4) | PORT_PCR_DSE_MASK;
+ IO_GPIO->PDDR |= 3; // set as outputs
+ IO_GPIO->PDOR &= ~(enabledPins | 3); // initially low
+#else
+ IO_GPIO->PDOR &= ~enabledPins; // initially low
+#endif
+
+#if DEBUG
+ IO_PORT->PCR[DEBUG_PIN] = PORT_PCR_MUX(1) | PORT_PCR_DSE_MASK;
+ IO_GPIO->PDDR |= DEBUG_MASK;
+ IO_GPIO->PDOR &= ~DEBUG_MASK;
+#endif
+}
+
+// class static
+
+/// Configure DMA and DMAMUX
+void WS2811::dma_init()
+{
+ // reset DMAMUX
+ DMAMUX0->CHCFG[DMA_CHAN_START] = 0;
+ DMAMUX0->CHCFG[DMA_CHAN_0_LOW] = 0;
+ DMAMUX0->CHCFG[DMA_CHAN_1_LOW] = 0;
+
+ // wire our DMA event sources into the first three DMA channels
+ // t=0: all enabled outputs go high on TPM0 overflow
+ DMAMUX0->CHCFG[DMA_CHAN_START] = DMAMUX_CHCFG_ENBL_MASK | DMAMUX_CHCFG_SOURCE(DMA_MUX_SRC_TPM0_Overflow);
+ // t=tpm_p0_period: all of the 0 bits go low.
+ DMAMUX0->CHCFG[DMA_CHAN_0_LOW] = DMAMUX_CHCFG_ENBL_MASK | DMAMUX_CHCFG_SOURCE(DMA_MUX_SRC_TPM0_CH_0);
+ // t=tpm_p1_period: all outputs go low.
+ DMAMUX0->CHCFG[DMA_CHAN_1_LOW] = DMAMUX_CHCFG_ENBL_MASK | DMAMUX_CHCFG_SOURCE(DMA_MUX_SRC_TPM0_CH_1);
+
+ NVIC_EnableIRQ(DMA0_IRQn);
+}
+
+// class static
+
+/// Configure TPM0 to do two different PWM periods at 800kHz rate
+void WS2811::tpm_init()
+{
+ // set up TPM0 for proper period (800 kHz = 1.25 usec ±600nsec)
+ TPM_Type volatile *tpm = TPM0;
+ tpm->SC = TPM_SC_DMA_MASK // enable DMA
+ | TPM_SC_CMOD(0) // disable clocks
+ | TPM_SC_PS(0); // 48MHz / 1 = 48MHz clock
+ tpm->MOD = tpm_period - 1; // 48MHz / 800kHz
+
+ // No Interrupts; High True pulses on Edge Aligned PWM
+ tpm->CONTROLS[0].CnSC = TPM_CnSC_MSB_MASK | TPM_CnSC_ELSB_MASK | TPM_CnSC_DMA_MASK;
+ tpm->CONTROLS[1].CnSC = TPM_CnSC_MSB_MASK | TPM_CnSC_ELSB_MASK | TPM_CnSC_DMA_MASK;
+
+ // set TPM0 channel 0 for 0.35 usec (±150nsec) / 0.8 usec (±150nsec) (0 code)
+ // 1.25 usec * 1/3 = 417 nsec
+ tpm->CONTROLS[0].CnV = tpm_p0_period;
+
+ // set TPM0 channel 1 for 0.7 usec (±150nsec) / 0.6 usec (±150nsec) (1 code)
+ // 1.25 usec * 2/3 = 833 nsec
+ tpm->CONTROLS[1].CnV = tpm_p1_period;
+}
+
+WS2811::WS2811(unsigned n, unsigned pinNumber)
+ : LedStrip(n)
+ , pinMask(1U << pinNumber)
+{
+ enabledPins |= pinMask;
+ guardtime.start();
+}
+
+// class static
+void WS2811::startDMA()
+{
+ DMA_Type volatile * dma = DMA0;
+ TPM_Type volatile *tpm = TPM0;
+ uint32_t nBytes = sizeof(dmaData.start_t1_low)
+ + sizeof(dmaData.dmaWords)
+ + sizeof(dmaData.trailing_zeros_1);
+
+ tpm->SC &= ~TPM_SC_CMOD_MASK; // disable internal clocking
+ tpm->CNT = tpm_p0_period - 2 ;
+ tpm->STATUS = 0xFFFFFFFF;
+
+ dma->DMA[DMA_CHAN_START].DSR_BCR = DMA_DSR_BCR_DONE_MASK; // clear/reset DMA status
+ dma->DMA[DMA_CHAN_0_LOW].DSR_BCR = DMA_DSR_BCR_DONE_MASK; // clear/reset DMA status
+ dma->DMA[DMA_CHAN_1_LOW].DSR_BCR = DMA_DSR_BCR_DONE_MASK; // clear/reset DMA status
+
+ // t=0: all outputs go high
+ // triggered by TPM0_Overflow
+ // source is one word of 0 then 24 x 0xffffffff, then another 0 word
+ dma->DMA[DMA_CHAN_START].SAR = (uint32_t)(void*)dmaData.start_t0_high;
+ dma->DMA[DMA_CHAN_START].DSR_BCR = DMA_DSR_BCR_BCR_MASK & nBytes; // length of transfer in bytes
+
+ // t=tpm_p0_period: some outputs (the 0 bits) go low.
+ // Triggered by TPM0_CH0
+ // Start 2 words before the actual data to avoid garbage pulses.
+ dma->DMA[DMA_CHAN_0_LOW].SAR = (uint32_t)(void*)dmaData.start_t1_low; // set source address
+ dma->DMA[DMA_CHAN_0_LOW].DSR_BCR = DMA_DSR_BCR_BCR_MASK & nBytes; // length of transfer in bytes
+
+ // t=tpm_p1_period: all outputs go low.
+ // Triggered by TPM0_CH1
+ // source is constant 0x00000000 (first word of dmaWords)
+ dma->DMA[DMA_CHAN_1_LOW].SAR = (uint32_t)(void*)dmaData.start_t1_low; // set source address
+ dma->DMA[DMA_CHAN_1_LOW].DSR_BCR = DMA_DSR_BCR_BCR_MASK & nBytes; // length of transfer in bytes
+
+ dma->DMA[DMA_CHAN_0_LOW].DAR
+ = dma->DMA[DMA_CHAN_1_LOW].DAR
+ = dma->DMA[DMA_CHAN_START].DAR
+ = (uint32_t)(void*)&IO_GPIO->PDOR;
+
+ // wait until done
+ while (!is_dma_done())
+ {
+ __WFI();
+ }
+
+ SET_DEBUG;
+
+ dma->DMA[DMA_CHAN_0_LOW].DCR = DMA_DCR_EINT_MASK // enable interrupt on end of transfer
+ | DMA_DCR_ERQ_MASK
+ | DMA_DCR_D_REQ_MASK // clear ERQ on end of transfer
+ | DMA_DCR_SINC_MASK // increment source each transfer
+ | DMA_DCR_CS_MASK
+ | DMA_DCR_SSIZE(0) // 32-bit source transfers
+ | DMA_DCR_DSIZE(0); // 32-bit destination transfers
+
+ dma->DMA[DMA_CHAN_1_LOW].DCR = DMA_DCR_EINT_MASK // enable interrupt on end of transfer
+ | DMA_DCR_ERQ_MASK
+ | DMA_DCR_D_REQ_MASK // clear ERQ on end of transfer
+ | DMA_DCR_CS_MASK
+ | DMA_DCR_SSIZE(0) // 32-bit source transfers
+ | DMA_DCR_DSIZE(0); // 32-bit destination transfers
+
+ dma->DMA[DMA_CHAN_START].DCR = DMA_DCR_EINT_MASK // enable interrupt on end of transfer
+ | DMA_DCR_ERQ_MASK
+ | DMA_DCR_D_REQ_MASK // clear ERQ on end of transfer
+ | DMA_DCR_SINC_MASK // increment source each transfer
+ | DMA_DCR_CS_MASK
+ | DMA_DCR_SSIZE(0) // 32-bit source transfers
+ | DMA_DCR_DSIZE(0);
+
+
+ tpm->SC |= TPM_SC_CMOD(1); // enable internal clocking
+}
+
+void WS2811::writePixel(unsigned n, uint8_t *p)
+{
+ uint32_t *dest = dmaData.dmaWords + n * BITS_PER_RGB;
+ writeByte(*p++, pinMask, dest + 0); // G
+ writeByte(*p++, pinMask, dest + 8); // R
+ writeByte(*p, pinMask, dest + 16); // B
+}
+
+// class static
+void WS2811::writeByte(uint8_t byte, uint32_t mask, uint32_t *dest)
+{
+ for (uint8_t bm = 0x80; bm; bm >>= 1)
+ {
+ // MSBit first
+ if (byte & bm)
+ *dest |= mask;
+ else
+ *dest &= ~mask;
+ dest++;
+ }
+}
+
+void WS2811::begin()
+{
+ blank();
+ show();
+}
+
+void WS2811::blank()
+{
+ memset(pixels, 0x00, numPixelBytes());
+
+#if DEBUG
+ for (unsigned i = DMA_LEADING_ZEROS; i < DMA_LEADING_ZEROS + BITS_PER_RGB; i++)
+ dmaData.dmaWords[i] = DEBUG_MASK;
+#else
+ memset(dmaData.dmaWords, 0x00, sizeof(dmaData.dmaWords));
+#endif
+}
+
+void WS2811::show()
+{
+ hw_init();
+
+ uint16_t i, n = numPixels(); // 3 bytes per LED
+ uint8_t *p = pixels;
+ while (guardtime.read_us() < 50)
+ {
+ __NOP();
+ }
+
+ for (i=0; i<n; i++ )
+ {
+ writePixel(i, p);
+ p += 3;
+ }
+
+ startDMA();
+
+ guardtime.reset();
+}
+
+extern "C" void DMA0_IRQHandler()
+{
+ DMA_Type volatile * dma = DMA0;
+ TPM_Type volatile *tpm = TPM0;
+
+ uint32_t db = dma->DMA[DMA_CHAN_START].DSR_BCR;
+ if (db & DMA_DSR_BCR_DONE_MASK)
+ {
+ dma->DMA[DMA_CHAN_START].DSR_BCR = DMA_DSR_BCR_DONE_MASK; // clear/reset DMA status
+ }
+
+ db = dma->DMA[DMA_CHAN_0_LOW].DSR_BCR;
+ if (db & DMA_DSR_BCR_DONE_MASK)
+ {
+ dma->DMA[DMA_CHAN_0_LOW].DSR_BCR = DMA_DSR_BCR_DONE_MASK; // clear/reset DMA status
+ }
+
+ db = dma->DMA[DMA_CHAN_1_LOW].DSR_BCR;
+ if (db & DMA_DSR_BCR_DONE_MASK)
+ {
+ dma->DMA[DMA_CHAN_1_LOW].DSR_BCR = DMA_DSR_BCR_DONE_MASK; // clear/reset DMA status
+ dma_done = true;
+ tpm->SC &= ~TPM_SC_CMOD_MASK; // disable internal clocking
+ RESET_DEBUG;
+ }
+}
- dma_done = false;
-
- spi->C2 |= SPI_C2_TXDMAE_MASK;
-
- // wait until done
- // while (!(DMA0->DMA[dmaWriteChannel].DSR_BCR & DMA_DSR_BCR_DONE_MASK))
- while (!dma_done)
- __NOP();
-
- spi->C2 &= ~SPI_C2_TXDMAE_MASK;
- debugOut = 0;
-
- dump_spi_settings(spi);
-
-}
-
-void WS2811::writePixel(uint8_t *p)
-{
- writeByte(*p++, dmaBytes + 0);
- writeByte(*p++, dmaBytes + 16);
- writeByte(*p, dmaBytes + 32);
-// printf("DMA Bytes:\r\n");
-// for (int i = 0; i < sizeof(dmaBytes); i++)
-// printf(" %02x", dmaBytes[i]);
-// printf("\r\n");
- startDMA();
-}
-
-void WS2811::writeByte(uint8_t byte, uint8_t *dest)
-{
- for (uint8_t mask = 0x80; mask; mask >>= 1) {
- if (mask & byte)
- *dest++ = 0xff; // 8 high
- else
- *dest++ = 0xe0; // 3 high, 5 low
- *dest++ = 0x00; // 8 more low
- }
-}
-
-void WS2811::begin(void)
-{
- blank();
- show();
-}
-
-void WS2811::blank(void)
-{
- memset(pixels, 0x00, numPixelBytes());
-}
-
-
-void WS2811::show(void)
-{
- uint16_t i, n = numPixels(); // 3 bytes per LED
- uint8_t *p = pixels;
- while (guardtime.read_us() < 50)
- __NOP();
- for (i=0; i<n; i++ ) {
- writePixel(p);
- pc.printf("%d> ", i);
- pc.getc();
- p += 3;
- }
- guardtime.reset();
-}
-
-extern "C" void DMA0IntHandler()
-{
- DMA0->DMA[dmaWriteChannel].DSR_BCR = DMA_DSR_BCR_DONE_MASK; // clear/reset DMA status
- dma_done = true;
-}
-
Ned Konz


Generic WS2811/WS2812