Adafruit-RGB_matrix_Panel(32*16)

Dependencies:   Adafruit-GFX

RGBmatrixPanel.cpp

Committer:
lelect
Date:
2014-05-24
Revision:
3:aa3762e0dfee
Parent:
2:6136465ffd3a
Child:
4:0ff6053c4bb2

File content as of revision 3:aa3762e0dfee:

#define DEBUG
#undef DEBUG
#include "RGBmatrixPanel.h"
#include "gamma.h"

#define nPlanes 4

// The fact that the display driver interrupt stuff is tied to the
// singular Timer1 doesn't really take well to object orientation with
// multiple RGBmatrixPanel instances.  The solution at present is to
// allow instances, but only one is active at any given time, via its
// begin() method.  The implementation is still incomplete in parts;
// the prior active panel really should be gracefully disabled, and a
// stop() method should perhaps be added...assuming multiple instances
// are even an actual need.
static RGBmatrixPanel *activePanel = NULL;

// Code common to both the 16x32 and 32x32 constructors:
void RGBmatrixPanel::init(uint8_t rows, bool dbuf)
{
    nRows = rows; // Number of multiplexed rows; actual height is 2X this
    // Allocate and initialize matrix buffer:
    int buffsize  = 32*nRows*3, // x3 = 3 bytes holds 4 planes "packed"
        allocsize = (dbuf == true) ? (buffsize * 2) : buffsize;
    if(NULL == (matrixbuff[0] = (uint8_t *)malloc(allocsize))) return;
    memset(matrixbuff[0], 0, allocsize);
    // If not double-buffered, both buffers then point to the same address:
    matrixbuff[1] = (dbuf == true) ? &matrixbuff[0][buffsize] : matrixbuff[0];

    plane     = nPlanes - 1;
    row       = nRows   - 1;
    swapflag  = false;
    backindex = 0;     // Array index of back buffer
}

// Constructor for 16x32 panel:
RGBmatrixPanel::RGBmatrixPanel(PinName r1,PinName r2,PinName g1,PinName g2,PinName b1,PinName b2,PinName a,PinName b, PinName c, PinName sclk, PinName latch, PinName oe, bool dbuf)
    :Adafruit_GFX(32, 16),
     _sclk(sclk),
     _latch(latch),
     _oe(oe),
     _d(NC),
     _dataBus(r1,g1,b1,r2,g2,b2),
     _rowBus(c,b,a)
{
    init(8, dbuf);
}

// Constructor for 32x32 panel:
RGBmatrixPanel::RGBmatrixPanel(PinName r1,PinName r2,PinName g1,PinName g2,PinName b1,PinName b2,PinName a,PinName b,PinName c,PinName d,PinName sclk,PinName latch,PinName oe,bool dbuf)
    :Adafruit_GFX(32, 32),
     _sclk(sclk),
     _latch(latch),
     _oe(oe),
     _d(d),// Init 32x32-specific elements:
     _dataBus(r1,g1,b1,r2,g2,b2),
     _rowBus(c,b,a)
{
    init(16,dbuf);
}

void RGBmatrixPanel::begin(void)
{

    backindex   = 0;                         // Back buffer
    buffptr     = matrixbuff[1 - backindex]; // -> front buffer
    activePanel = this;                      // For interrupt hander

    // The high six bits of the data port are set as outputs;
    // Might make this configurable in the future, but not yet.
    /*
    DATADIR  = B11111100;
    DATAPORT = 0;
    */

    // Set up Timer for interrupt:
    _refresh.attach(activePanel,(&RGBmatrixPanel::updateDisplay),0.001);   //updateDisplay() called every 1ms
    /*
     TCCR1A  = _BV(WGM11); // Mode 14 (fast PWM), OC1A off
     TCCR1B  = _BV(WGM13) | _BV(WGM12) | _BV(CS10); // Mode 14, no prescale
     ICR1    = 100;
     TIMSK1 |= _BV(TOIE1); // Enable Timer1 interrupt
     sei();                // Enable global interrupts
     */
}

// Original RGBmatrixPanel library used 3/3/3 color.  Later version used
// 4/4/4.  Then Adafruit_GFX (core library used across all Adafruit
// display devices now) standardized on 5/6/5.  The matrix still operates
// internally on 4/4/4 color, but all the graphics functions are written
// to expect 5/6/5...the matrix lib will truncate the color components as
// needed when drawing.  These next functions are mostly here for the
// benefit of older code using one of the original color formats.

// Promote 3/3/3 RGB to Adafruit_GFX 5/6/5
uint16_t RGBmatrixPanel::Color333(uint8_t r, uint8_t g, uint8_t b)
{
    // RRRrrGGGgggBBBbb
    return ((r & 0x7) << 13) | ((r & 0x6) << 10) |
           ((g & 0x7) <<  8) | ((g & 0x7) <<  5) |
           ((b & 0x7) <<  2) | ((b & 0x6) >>  1);
}

// Promote 4/4/4 RGB to Adafruit_GFX 5/6/5
uint16_t RGBmatrixPanel::Color444(uint8_t r, uint8_t g, uint8_t b)
{
    // RRRRrGGGGggBBBBb
    return ((r & 0xF) << 12) | ((r & 0x8) << 8) |
           ((g & 0xF) <<  7) | ((g & 0xC) << 3) |
           ((b & 0xF) <<  1) | ((b & 0x8) >> 3);
}

// Demote 8/8/8 to Adafruit_GFX 5/6/5
// If no gamma flag passed, assume linear color
uint16_t RGBmatrixPanel::Color888(uint8_t r, uint8_t g, uint8_t b)
{
    return ((r & 0xF8) << 11) | ((g & 0xFC) << 5) | (b >> 3);
}

// 8/8/8 -> gamma -> 5/6/5
uint16_t RGBmatrixPanel::Color888(uint8_t r, uint8_t g, uint8_t b, bool gflag)
{
    if(gflag) { // Gamma-corrected color?
        r = gamma[r]; // Gamma correction table maps
        g = gamma[g]; // 8-bit input to 4-bit output
        b = gamma[b];
        return (r << 12) | ((r & 0x8) << 8) | // 4/4/4 -> 5/6/5
               (g <<  7) | ((g & 0xC) << 3) |
               (b <<  1) | ( b        >> 3);
    } // else linear (uncorrected) color
    return ((r & 0xF8) << 11) | ((g & 0xFC) << 5) | (b >> 3);
}

uint16_t RGBmatrixPanel::ColorHSV(long hue, uint8_t sat, uint8_t val, bool gflag)
{

    uint8_t  r, g, b, lo;
    uint16_t s1, v1;

    // Hue
    hue %= 1536;             // -1535 to +1535
    if(hue < 0) hue += 1536; //     0 to +1535
    lo = hue & 255;          // Low byte  = primary/secondary color mix
    switch(hue >> 8) {       // High byte = sextant of colorwheel
        case 0 :
            r = 255     ;
            g =  lo     ;
            b =   0     ;
            break; // R to Y
        case 1 :
            r = 255 - lo;
            g = 255     ;
            b =   0     ;
            break; // Y to G
        case 2 :
            r =   0     ;
            g = 255     ;
            b =  lo     ;
            break; // G to C
        case 3 :
            r =   0     ;
            g = 255 - lo;
            b = 255     ;
            break; // C to B
        case 4 :
            r =  lo     ;
            g =   0     ;
            b = 255     ;
            break; // B to M
        default:
            r = 255     ;
            g =   0     ;
            b = 255 - lo;
            break; // M to R
    }

    // Saturation: add 1 so range is 1 to 256, allowig a quick shift operation
    // on the result rather than a costly divide, while the type upgrade to int
    // avoids repeated type conversions in both directions.
    s1 = sat + 1;
    r  = 255 - (((255 - r) * s1) >> 8);
    g  = 255 - (((255 - g) * s1) >> 8);
    b  = 255 - (((255 - b) * s1) >> 8);

    // Value (brightness) & 16-bit color reduction: similar to above, add 1
    // to allow shifts, and upgrade to int makes other conversions implicit.
    v1 = val + 1;
    if(gflag) { // Gamma-corrected color?
        r = gamma[(r * v1) >> 8]; // Gamma correction table maps
        g = gamma[(g * v1) >> 8]; // 8-bit input to 4-bit output
        b = gamma[(b * v1) >> 8];
        //before pgm_read_byte(&gamma[(b * v1) >> 8])
    } else { // linear (uncorrected) color
        r = (r * v1) >> 12; // 4-bit results
        g = (g * v1) >> 12;
        b = (b * v1) >> 12;
    }
    return (r << 12) | ((r & 0x8) << 8) | // 4/4/4 -> 5/6/5
           (g <<  7) | ((g & 0xC) << 3) |
           (b <<  1) | ( b        >> 3);
}

void RGBmatrixPanel::drawPixel(int16_t x, int16_t y, uint16_t c)
{
    uint8_t r, g, b, bit, limit, *ptr;
    if((x < 0) || (x >= _width) || (y < 0) || (y >= _height)) return;
    switch(rotation) {
        case 1:
            swap(x, y);
            x = _rawWidth  - 1 - x;
            break;
        case 2:
            x = _rawWidth  - 1 - x;
            y = _rawHeight - 1 - y;
            break;
        case 3:
            swap(x, y);
            y = _rawHeight - 1 - y;
            break;
    }

    // Adafruit_GFX uses 16-bit color in 5/6/5 format, while matrix needs
    // 4/4/4.  Pluck out relevant bits while separating into R,G,B:
    r =  c >> 12;        // RRRRrggggggbbbbb
    g = (c >>  7) & 0xF; // rrrrrGGGGggbbbbb
    b = (c >>  1) & 0xF; // rrrrrggggggBBBBb
    // Loop counter stuff
    bit   = 2;
    limit = 1 << nPlanes;

    if(y < nRows) {
        // Data for the upper half of the display is stored in the lower bits of each byte.
        ptr = &matrixbuff[backindex][y*_rawWidth*(nPlanes-1) + x]; // Base addr
        // Plane 0 is a tricky case -- its data is spread about,
        // stored in least two bits not used by the other planes.
        ptr[64] &= ~(_BV(0)|_BV(1));            // Plane 0 R,G mask(0b11111100) out in one op
        if(r & 1) ptr[64] |=  _BV(0);  // Plane 0 R: 64 bytes ahead, bit 0
        if(g & 1) ptr[64] |=  _BV(1);  // Plane 0 G: 64 bytes ahead, bit 1
        if(b & 1) ptr[32] |=  _BV(0);  // Plane 0 B: 32 bytes ahead, bit 0
        else      ptr[32] &= ~_BV(0);  // Plane 0 B unset; mask out
        // The remaining three image planes are more normal-ish.
        // Data is stored in the high 6 bits so it can be quickly
        // copied to the DATAPORT register w/6 output lines.
        for(; bit < limit; bit <<= 1) {
            ptr[0] &= ~(_BV(2)|_BV(3)|_BV(4));             // Mask(0b00011100) out R,G,B in one op
            if(r & bit) *ptr |= _BV(2);  // Plane N R: bit 2
            if(g & bit) *ptr |= _BV(3);  // Plane N G: bit 3
            if(b & bit) *ptr |= _BV(4);  // Plane N B: bit 4
            ptr  += _rawWidth;                  // Advance to next bit plane
        }
    } else {
        // Data for the lower half of the display is stored in the upper bits, except for the plane 0 stuff, using 2 least bits.
        ptr = &matrixbuff[backindex][(y-nRows)*_rawWidth*(nPlanes-1) + x];
        *ptr &= ~(_BV(0)|_BV(1));               // Plane 0 G,B mask out in one op
        if(r & 1)  ptr[32] |=  _BV(1); // Plane 0 R: 32 bytes ahead, bit 1
        else       ptr[32] &= ~_BV(2); // Plane 0 R unset; mask out
        if(g & 1) *ptr     |=  _BV(0); // Plane 0 G: bit 0
        if(b & 1) *ptr     |=  _BV(1); // Plane 0 B: bit 0
        for(; bit < limit; bit <<= 1) {
            *ptr &= ~(_BV(5)|_BV(6)|_BV(7));             // Mask out R,G,B in one op
            if(r & bit) *ptr |= _BV(5);  // Plane N R: bit 5
            if(g & bit) *ptr |= _BV(6);  // Plane N G: bit 6
            if(b & bit) *ptr |= _BV(7);  // Plane N B: bit 7
            ptr  += _rawWidth;                  // Advance to next bit plane
        }
    }
}

void RGBmatrixPanel::fillScreen(uint16_t c)
{
    if((c == 0x0000) || (c == 0xffff)) {
        // For black or white, all bits in frame buffer will be identically
        // set or unset (regardless of weird bit packing), so it's OK to just
        // quickly memset the whole thing:
        memset(matrixbuff[backindex], c, 32 * nRows * 3);
    } else {
        // Otherwise, need to handle it the long way:
        Adafruit_GFX::fillScreen(c);
    }
}

// Return address of back buffer -- can then load/store data directly
uint8_t *RGBmatrixPanel::backBuffer()
{
    return matrixbuff[backindex];
}

// For smooth animation -- drawing always takes place in the "back" buffer;
// this method pushes it to the "front" for display.  Passing "true", the
// updated display contents are then copied to the new back buffer and can
// be incrementally modified.  If "false", the back buffer then contains
// the old front buffer contents -- your code can either clear this or
// draw over every pixel.  (No effect if double-buffering is not enabled.)
void RGBmatrixPanel::swapBuffers(bool copy)
{
    log_debug("call swapBuffers %s","\r\n");
    if(matrixbuff[0] != matrixbuff[1]) {
        // To avoid 'tearing' display, actual swap takes place in the interrupt
        // handler, at the end of a complete screen refresh cycle.
        swapflag = true;                  // Set flag here, then...
        while(swapflag == true) wait_ms(1); // wait for interrupt to clear it
        if(copy == true) {
            log_debug("\tmemcpy %s","\r\n");
            memcpy(matrixbuff[backindex], matrixbuff[1-backindex], 32 * nRows * 3);
        } else {
            log_debug("\tnot memcpy %s","\r\n");
        }
    }
}

// Dump display contents to the Serial Monitor, adding some formatting to
// simplify copy-and-paste of data as a PROGMEM-embedded image for another
// sketch.  If using multiple dumps this way, you'll need to edit the
// output to change the 'img' name for each.  Data can then be loaded
// back into the display using a pgm_read_byte() loop.
void RGBmatrixPanel::dumpMatrix(void)
{
    log_debug("call dumpMatrix%s","\r\n");
    int buffsize=32*nRows*3;
    for(int item=0; item<buffsize; item++) {
        if(item%(32*nRows)==0) {
            for(int i=0; i<32*5; i++) {
                log_debug("-%c",'\0');
            }
            log_debug("-%s","\r\n");
        }
        log_debug("0x%02X",matrixbuff[backindex][item]);
        if((item%32)==31)    log_debug(",\r\n");
        else                log_debug(",");
    }
    log_debug("%s","\r\n");
}

// -------------------- Interrupt handler stuff --------------------
/*
ISR(TIMER1_OVF_vect, ISR_BLOCK)   // ISR_BLOCK important -- see notes later
{
    activePanel->updateDisplay();   // Call refresh func for active display
    TIFR1 |= TOV1;                  // Clear Timer1 interrupt flag
}
*/
// Two constants are used in timing each successive BCM interval.
// These were found empirically, by checking the value of TCNT1 at
// certain positions in the interrupt code.
// CALLOVERHEAD is the number of CPU 'ticks' from the timer overflow
// condition (triggering the interrupt) to the first line in the
// updateDisplay() method.  It's then assumed (maybe not entirely 100%
// accurately, but close enough) that a similar amount of time will be
// needed at the opposite end, restoring regular program flow.
// LOOPTIME is the number of 'ticks' spent inside the shortest data-
// issuing loop (not actually a 'loop' because it's unrolled, but eh).
// Both numbers are rounded up slightly to allow a little wiggle room
// should different compilers produce slightly different results.
#define CALLOVERHEAD 60   // Actual value measured = 56
#define LOOPTIME     200  // Actual value measured = 188
// The "on" time for bitplane 0 (with the shortest BCM interval) can
// then be estimated as LOOPTIME + CALLOVERHEAD * 2.  Each successive
// bitplane then doubles the prior amount of time.  We can then
// estimate refresh rates from this:
// 4 bitplanes = 320 + 640 + 1280 + 2560 = 4800 ticks per row.
// 4800 ticks * 16 rows (for 32x32 matrix) = 76800 ticks/frame.
// 16M CPU ticks/sec / 76800 ticks/frame = 208.33 Hz.
// Actual frame rate will be slightly less due to work being done
// during the brief "LEDs off" interval...it's reasonable to say
// "about 200 Hz."  The 16x32 matrix only has to scan half as many
// rows...so we could either double the refresh rate (keeping the CPU
// load the same), or keep the same refresh rate but halve the CPU
// load.  We opted for the latter.
// Can also estimate CPU use: bitplanes 1-3 all use 320 ticks to
// issue data (the increasing gaps in the timing invervals are then
// available to other code), and bitplane 0 takes 920 ticks out of
// the 2560 tick interval.
// 320 * 3 + 920 = 1880 ticks spent in interrupt code, per row.
// From prior calculations, about 4800 ticks happen per row.
// CPU use = 1880 / 4800 = ~39% (actual use will be very slightly
// higher, again due to code used in the LEDs off interval).
// 16x32 matrix uses about half that CPU load.  CPU time could be
// further adjusted by padding the LOOPTIME value, but refresh rates
// will decrease proportionally, and 200 Hz is a decent target.

// The flow of the interrupt can be awkward to grasp, because data is
// being issued to the LED matrix for the *next* bitplane and/or row
// while the *current* plane/row is being shown.  As a result, the
// counter variables change between past/present/future tense in mid-
// function...hopefully tenses are sufficiently commented.

void RGBmatrixPanel::updateDisplay(void)
{
    //log_debug("call updateDisplay\t(plane,row)=(%d,%d)\r\n",plane,row);
    _oe=1;
    _latch=1;
    if(++plane >= nPlanes) {        // Advance plane counter.  Maxed out?
        plane = 0;                  // Yes, reset to plane 0, and
        if(++row >= nRows) {        // advance row counter.  Maxed out?
            row= 0;                 // Yes, reset row counter, then...
            if(swapflag == true) {  // Swap front/back buffers if requested
                backindex = 1 - backindex;
                log_debug("\t\treset swapflag%s","\r\n");
                swapflag  = false;
            }
            log_debug("\tReset into front buffer[%d]%s",backindex,"\r\n");
            buffptr = matrixbuff[1-backindex]; // Reset into front buffer
        }
    } else if(plane == 1) {
        log_debug("\r\n\tset row@(%d,%d)\r\n",plane,row);

        /*
        // Plane 0 was loaded on prior interrupt invocation and is about to
        // latch now, so update the row address lines before we do that:
        if(row & 0x1)   *addraport |=  addrapin;
        else            *addraport &= ~addrapin;
        if(row & 0x2)   *addrbport |=  addrbpin;
        else            *addrbport &= ~addrbpin;
        if(row & 0x4)   *addrcport |=  addrcpin;
        else            *addrcport &= ~addrcpin;
        if(nRows > 8) {
            if(row & 0x8) *addrdport |=  addrdpin;
            else          *addrdport &= ~addrdpin;
        }
        */
    }
    _rowBus=row;
    _oe=0;
    _latch=0;
    // buffptr, being 'volatile' type, doesn't take well to optimization.
    // A local register copy can speed some things up:
    uint8_t *ptr = (uint8_t *)buffptr;
    /*
            ICR1      = duration; // Set interval for next interrupt
            TCNT1     = 0;        // Restart interrupt timer
            *oeport  &= ~oepin;   // Re-enable output
            *latport &= ~latpin;  // Latch down

            // Record current state of SCLKPORT register, as well as a second
            // copy with the clock bit set.  This makes the innnermost data-
            // pushing loops faster, as they can just set the PORT state and
            // not have to load/modify/store bits every single time.  It's a
            // somewhat rude trick that ONLY works because the interrupt
            // handler is set ISR_BLOCK, halting any other interrupts that
            // might otherwise also be twiddling the port at the same time
            // (else this would clobber them).
            tock = SCLKPORT;
            tick = tock | sclkpin;
    */
    if(plane > 0) { // 188 ticks from TCNT1=0 (above) to end of function
        for(int i=0; i<32; i++) {
            _dataBus=(ptr[i] << 6) | ((ptr[i+32] << 4)&0x30) | ((ptr[i+64] << 2)&0x0C)>>2;
            _sclk=1;
            _sclk=0;
        }
        buffptr += 32;
    } else {
        // 920 ticks from TCNT1=0 (above) to end of function
        // Planes 1-3 (handled above) formatted their data "in place,"
        // their layout matching that out the output PORT register (where
        // 6 bits correspond to output data lines), maximizing throughput
        // as no conversion or unpacking is needed.  Plane 0 then takes up
        // the slack, with all its data packed into the 2 least bits not
        // used by the other planes.  This works because the unpacking and
        // output for plane 0 is handled while plane 3 is being displayed...
        // because binary coded modulation is used (not PWM), that plane
        // has the longest display interval, so the extra work fits.
        for(int i=0; i<32; i++) {
            _dataBus=(ptr[i] << 6) | ((ptr[i+32] << 4)&0x30) | ((ptr[i+64] << 2)&0x0C)>>2;
            _sclk=1;
            _sclk=0;
            log_debug("\t\t %02x@(%d,%d)",_dataBus.read(),plane,row);
        }
        //buffptr += 32;
    }
}