Adafruit-RGB_matrix_Panel(32*16)

Dependencies:   Adafruit-GFX

Files at this revision

API Documentation at this revision

Comitter:
lelect
Date:
Fri May 23 15:08:14 2014 +0000
Child:
1:0078213d3fa4
Commit message:
Initial commit

Changed in this revision

Adafruit-GFX.lib Show annotated file Show diff for this revision Revisions of this file
RGBmatrixPanel.cpp Show annotated file Show diff for this revision Revisions of this file
RGBmatrixPanel.h Show annotated file Show diff for this revision Revisions of this file
gamma.h Show annotated file Show diff for this revision Revisions of this file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Adafruit-GFX.lib	Fri May 23 15:08:14 2014 +0000
@@ -0,0 +1,1 @@
+Adafruit-GFX#3e9c32359703
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/RGBmatrixPanel.cpp	Fri May 23 15:08:14 2014 +0000
@@ -0,0 +1,536 @@
+#include "RGBmatrixPanel.h"
+#include "gamma.h"
+
+#define DATAPORT PORTD
+#define DATADIR  DDRD
+#define SCLKPORT PORTB
+
+#define nPlanes 4
+
+// The fact that the display driver interrupt stuff is tied to the
+// singular Timer1 doesn't really take well to object orientation with
+// multiple RGBmatrixPanel instances.  The solution at present is to
+// allow instances, but only one is active at any given time, via its
+// begin() method.  The implementation is still incomplete in parts;
+// the prior active panel really should be gracefully disabled, and a
+// stop() method should perhaps be added...assuming multiple instances
+// are even an actual need.
+static RGBmatrixPanel *activePanel = NULL;
+
+// Code common to both the 16x32 and 32x32 constructors:
+void RGBmatrixPanel::init(uint8_t rows, uint8_t a, uint8_t b, uint8_t c, uint8_t sclk, uint8_t latch, uint8_t oe, bool dbuf)
+{
+    nRows = rows; // Number of multiplexed rows; actual height is 2X this
+    // Allocate and initialize matrix buffer:
+    int buffsize  = 32 * nRows * 3, // x3 = 3 bytes holds 4 planes "packed"
+        allocsize = (dbuf == true) ? (buffsize * 2) : buffsize;
+    if(NULL == (matrixbuff[0] = (uint8_t *)malloc(allocsize))) return;
+    memset(matrixbuff[0], 0, allocsize);
+    // If not double-buffered, both buffers then point to the same address:
+    matrixbuff[1] = (dbuf == true) ? &matrixbuff[0][buffsize] : matrixbuff[0];
+
+    // Save pin numbers for use by begin() method later.
+    _a     = a;
+    _a     = a;
+    _b     = b;
+    _c     = c;
+    _sclk  = sclk;
+    _latch = latch;
+    _oe    = oe;
+
+    // Look up port registers and pin masks ahead of time,
+    // avoids many slow digitalWrite() calls later.
+    sclkpin   = digitalPinToBitMask(sclk);
+    latport   = portOutputRegister(digitalPinToPort(latch));
+    latpin    = digitalPinToBitMask(latch);
+    oeport    = portOutputRegister(digitalPinToPort(oe));
+    oepin     = digitalPinToBitMask(oe);
+    addraport = portOutputRegister(digitalPinToPort(a));
+    addrapin  = digitalPinToBitMask(a);
+    addrbport = portOutputRegister(digitalPinToPort(b));
+    addrbpin  = digitalPinToBitMask(b);
+    addrcport = portOutputRegister(digitalPinToPort(c));
+    addrcpin  = digitalPinToBitMask(c);
+    plane     = nPlanes - 1;
+    row       = nRows   - 1;
+    swapflag  = false;
+    backindex = 0;     // Array index of back buffer
+}
+
+// Constructor for 16x32 panel:
+RGBmatrixPanel::RGBmatrixPanel(uint8_t a, uint8_t b, uint8_t c, uint8_t sclk, uint8_t latch, uint8_t oe, bool dbuf)
+    :Adafruit_GFX(32, 16)
+{
+    init(8, a, b, c, sclk, latch, oe, dbuf);
+}
+
+// Constructor for 32x32 panel:
+RGBmatrixPanel::RGBmatrixPanel(uint8_t a, uint8_t b, uint8_t c, uint8_t d, uint8_t sclk, uint8_t latch, uint8_t oe, bool dbuf)
+    :Adafruit_GFX(32, 32)
+{
+    init(16, a, b, c, sclk, latch, oe, dbuf);
+
+    // Init a few extra 32x32-specific elements:
+    _d        = d;
+    addrdport = portOutputRegister(digitalPinToPort(d));
+    addrdpin  = digitalPinToBitMask(d);
+}
+
+void RGBmatrixPanel::begin(void)
+{
+
+    backindex   = 0;                         // Back buffer
+    buffptr     = matrixbuff[1 - backindex]; // -> front buffer
+    activePanel = this;                      // For interrupt hander
+
+    // Enable all comm & address pins as outputs, set default states:
+    pinMode(_sclk , OUTPUT);
+    SCLKPORT   &= ~sclkpin;  // Low
+    pinMode(_latch, OUTPUT);
+    *latport   &= ~latpin;   // Low
+    pinMode(_oe   , OUTPUT);
+    *oeport    |= oepin;     // High (disable output)
+    pinMode(_a    , OUTPUT);
+    *addraport &= ~addrapin; // Low
+    pinMode(_b    , OUTPUT);
+    *addrbport &= ~addrbpin; // Low
+    pinMode(_c    , OUTPUT);
+    *addrcport &= ~addrcpin; // Low
+    if(nRows > 8) {
+        pinMode(_d  , OUTPUT);
+        *addrdport &= ~addrdpin; // Low
+    }
+
+    // The high six bits of the data port are set as outputs;
+    // Might make this configurable in the future, but not yet.
+    DATADIR  = B11111100;
+    DATAPORT = 0;
+
+    // Set up Timer1 for interrupt:
+    TCCR1A  = _BV(WGM11); // Mode 14 (fast PWM), OC1A off
+    TCCR1B  = _BV(WGM13) | _BV(WGM12) | _BV(CS10); // Mode 14, no prescale
+    ICR1    = 100;
+    TIMSK1 |= _BV(TOIE1); // Enable Timer1 interrupt
+    sei();                // Enable global interrupts
+}
+
+// Original RGBmatrixPanel library used 3/3/3 color.  Later version used
+// 4/4/4.  Then Adafruit_GFX (core library used across all Adafruit
+// display devices now) standardized on 5/6/5.  The matrix still operates
+// internally on 4/4/4 color, but all the graphics functions are written
+// to expect 5/6/5...the matrix lib will truncate the color components as
+// needed when drawing.  These next functions are mostly here for the
+// benefit of older code using one of the original color formats.
+
+// Promote 3/3/3 RGB to Adafruit_GFX 5/6/5
+uint16_t RGBmatrixPanel::Color333(uint8_t r, uint8_t g, uint8_t b)
+{
+    // RRRrrGGGgggBBBbb
+    return ((r & 0x7) << 13) | ((r & 0x6) << 10) |
+           ((g & 0x7) <<  8) | ((g & 0x7) <<  5) |
+           ((b & 0x7) <<  2) | ((b & 0x6) >>  1);
+}
+
+// Promote 4/4/4 RGB to Adafruit_GFX 5/6/5
+uint16_t RGBmatrixPanel::Color444(uint8_t r, uint8_t g, uint8_t b)
+{
+    // RRRRrGGGGggBBBBb
+    return ((r & 0xF) << 12) | ((r & 0x8) << 8) |
+           ((g & 0xF) <<  7) | ((g & 0xC) << 3) |
+           ((b & 0xF) <<  1) | ((b & 0x8) >> 3);
+}
+
+// Demote 8/8/8 to Adafruit_GFX 5/6/5
+// If no gamma flag passed, assume linear color
+uint16_t RGBmatrixPanel::Color888(uint8_t r, uint8_t g, uint8_t b)
+{
+    return ((r & 0xF8) << 11) | ((g & 0xFC) << 5) | (b >> 3);
+}
+
+// 8/8/8 -> gamma -> 5/6/5
+uint16_t RGBmatrixPanel::Color888(
+    uint8_t r, uint8_t g, uint8_t b, bool gflag)
+{
+    if(gflag) { // Gamma-corrected color?
+        r = pgm_read_byte(&gamma[r]); // Gamma correction table maps
+        g = pgm_read_byte(&gamma[g]); // 8-bit input to 4-bit output
+        b = pgm_read_byte(&gamma[b]);
+        return (r << 12) | ((r & 0x8) << 8) | // 4/4/4 -> 5/6/5
+               (g <<  7) | ((g & 0xC) << 3) |
+               (b <<  1) | ( b        >> 3);
+    } // else linear (uncorrected) color
+    return ((r & 0xF8) << 11) | ((g & 0xFC) << 5) | (b >> 3);
+}
+
+uint16_t RGBmatrixPanel::ColorHSV(
+    long hue, uint8_t sat, uint8_t val, bool gflag)
+{
+
+    uint8_t  r, g, b, lo;
+    uint16_t s1, v1;
+
+    // Hue
+    hue %= 1536;             // -1535 to +1535
+    if(hue < 0) hue += 1536; //     0 to +1535
+    lo = hue & 255;          // Low byte  = primary/secondary color mix
+    switch(hue >> 8) {       // High byte = sextant of colorwheel
+        case 0 :
+            r = 255     ;
+            g =  lo     ;
+            b =   0     ;
+            break; // R to Y
+        case 1 :
+            r = 255 - lo;
+            g = 255     ;
+            b =   0     ;
+            break; // Y to G
+        case 2 :
+            r =   0     ;
+            g = 255     ;
+            b =  lo     ;
+            break; // G to C
+        case 3 :
+            r =   0     ;
+            g = 255 - lo;
+            b = 255     ;
+            break; // C to B
+        case 4 :
+            r =  lo     ;
+            g =   0     ;
+            b = 255     ;
+            break; // B to M
+        default:
+            r = 255     ;
+            g =   0     ;
+            b = 255 - lo;
+            break; // M to R
+    }
+
+    // Saturation: add 1 so range is 1 to 256, allowig a quick shift operation
+    // on the result rather than a costly divide, while the type upgrade to int
+    // avoids repeated type conversions in both directions.
+    s1 = sat + 1;
+    r  = 255 - (((255 - r) * s1) >> 8);
+    g  = 255 - (((255 - g) * s1) >> 8);
+    b  = 255 - (((255 - b) * s1) >> 8);
+
+    // Value (brightness) & 16-bit color reduction: similar to above, add 1
+    // to allow shifts, and upgrade to int makes other conversions implicit.
+    v1 = val + 1;
+    if(gflag) { // Gamma-corrected color?
+        r = pgm_read_byte(&gamma[(r * v1) >> 8]); // Gamma correction table maps
+        g = pgm_read_byte(&gamma[(g * v1) >> 8]); // 8-bit input to 4-bit output
+        b = pgm_read_byte(&gamma[(b * v1) >> 8]);
+    } else { // linear (uncorrected) color
+        r = (r * v1) >> 12; // 4-bit results
+        g = (g * v1) >> 12;
+        b = (b * v1) >> 12;
+    }
+    return (r << 12) | ((r & 0x8) << 8) | // 4/4/4 -> 5/6/5
+           (g <<  7) | ((g & 0xC) << 3) |
+           (b <<  1) | ( b        >> 3);
+}
+
+void RGBmatrixPanel::drawPixel(int16_t x, int16_t y, uint16_t c)
+{
+    uint8_t r, g, b, bit, limit, *ptr;
+    if((x < 0) || (x >= _width) || (y < 0) || (y >= _height)) return;
+    switch(rotation) {
+        case 1:
+            swap(x, y);
+            x = _rawWidth  - 1 - x;
+            break;
+        case 2:
+            x = _rawWidth  - 1 - x;
+            y = _rawHeight - 1 - y;
+            break;
+        case 3:
+            swap(x, y);
+            y = _rawHeight - 1 - y;
+            break;
+    }
+
+    // Adafruit_GFX uses 16-bit color in 5/6/5 format, while matrix needs
+    // 4/4/4.  Pluck out relevant bits while separating into R,G,B:
+    r =  c >> 12;        // RRRRrggggggbbbbb
+    g = (c >>  7) & 0xF; // rrrrrGGGGggbbbbb
+    b = (c >>  1) & 0xF; // rrrrrggggggBBBBb
+    // Loop counter stuff
+    bit   = 2;
+    limit = 1 << nPlanes;
+
+    if(y < nRows) {
+        // Data for the upper half of the display is stored in the lower
+        // bits of each byte.
+        ptr = &matrixbuff[backindex][y * _rawWidth * (nPlanes - 1) + x]; // Base addr
+        // Plane 0 is a tricky case -- its data is spread about,
+        // stored in least two bits not used by the other planes.
+        ptr[64] &= ~B00000011;            // Plane 0 R,G mask out in one op
+        if(r & 1) ptr[64] |=  B00000001;  // Plane 0 R: 64 bytes ahead, bit 0
+        if(g & 1) ptr[64] |=  B00000010;  // Plane 0 G: 64 bytes ahead, bit 1
+        if(b & 1) ptr[32] |=  B00000001;  // Plane 0 B: 32 bytes ahead, bit 0
+        else      ptr[32] &= ~B00000001;  // Plane 0 B unset; mask out
+        // The remaining three image planes are more normal-ish.
+        // Data is stored in the high 6 bits so it can be quickly
+        // copied to the DATAPORT register w/6 output lines.
+        for(; bit < limit; bit <<= 1) {
+            *ptr &= ~B00011100;             // Mask out R,G,B in one op
+            if(r & bit) *ptr |= B00000100;  // Plane N R: bit 2
+            if(g & bit) *ptr |= B00001000;  // Plane N G: bit 3
+            if(b & bit) *ptr |= B00010000;  // Plane N B: bit 4
+            ptr  += WIDTH;                  // Advance to next bit plane
+        }
+    } else {
+        // Data for the lower half of the display is stored in the upper
+        // bits, except for the plane 0 stuff, using 2 least bits.
+        ptr = &matrixbuff[backindex][(y - nRows) * WIDTH * (nPlanes - 1) + x];
+        *ptr &= ~B00000011;               // Plane 0 G,B mask out in one op
+        if(r & 1)  ptr[32] |=  B00000010; // Plane 0 R: 32 bytes ahead, bit 1
+        else       ptr[32] &= ~B00000010; // Plane 0 R unset; mask out
+        if(g & 1) *ptr     |=  B00000001; // Plane 0 G: bit 0
+        if(b & 1) *ptr     |=  B00000010; // Plane 0 B: bit 0
+        for(; bit < limit; bit <<= 1) {
+            *ptr &= ~B11100000;             // Mask out R,G,B in one op
+            if(r & bit) *ptr |= B00100000;  // Plane N R: bit 5
+            if(g & bit) *ptr |= B01000000;  // Plane N G: bit 6
+            if(b & bit) *ptr |= B10000000;  // Plane N B: bit 7
+            ptr  += WIDTH;                  // Advance to next bit plane
+        }
+    }
+}
+
+void RGBmatrixPanel::fillScreen(uint16_t c)
+{
+    if((c == 0x0000) || (c == 0xffff)) {
+        // For black or white, all bits in frame buffer will be identically
+        // set or unset (regardless of weird bit packing), so it's OK to just
+        // quickly memset the whole thing:
+        memset(matrixbuff[backindex], c, 32 * nRows * 3);
+    } else {
+        // Otherwise, need to handle it the long way:
+        Adafruit_GFX::fillScreen(c);
+    }
+}
+
+// Return address of back buffer -- can then load/store data directly
+uint8_t *RGBmatrixPanel::backBuffer()
+{
+    return matrixbuff[backindex];
+}
+
+// For smooth animation -- drawing always takes place in the "back" buffer;
+// this method pushes it to the "front" for display.  Passing "true", the
+// updated display contents are then copied to the new back buffer and can
+// be incrementally modified.  If "false", the back buffer then contains
+// the old front buffer contents -- your code can either clear this or
+// draw over every pixel.  (No effect if double-buffering is not enabled.)
+void RGBmatrixPanel::swapBuffers(bool copy)
+{
+    if(matrixbuff[0] != matrixbuff[1]) {
+        // To avoid 'tearing' display, actual swap takes place in the interrupt
+        // handler, at the end of a complete screen refresh cycle.
+        swapflag = true;                  // Set flag here, then...
+        while(swapflag == true) delay(1); // wait for interrupt to clear it
+        if(copy == true)
+            memcpy(matrixbuff[backindex], matrixbuff[1-backindex], 32 * nRows * 3);
+    }
+}
+
+// Dump display contents to the Serial Monitor, adding some formatting to
+// simplify copy-and-paste of data as a PROGMEM-embedded image for another
+// sketch.  If using multiple dumps this way, you'll need to edit the
+// output to change the 'img' name for each.  Data can then be loaded
+// back into the display using a pgm_read_byte() loop.
+void RGBmatrixPanel::dumpMatrix(void)
+{
+
+    int i, buffsize = 32 * nRows * 3;
+
+    Serial.print("\n\n"
+                 "#include <avr/pgmspace.h>\n\n"
+                 "static const uint8_t PROGMEM img[] = {\n  ");
+
+    for(i=0; i<buffsize; i++) {
+        Serial.print("0x");
+        if(matrixbuff[backindex][i] < 0x10) Serial.print('0');
+        Serial.print(matrixbuff[backindex][i],HEX);
+        if(i < (buffsize - 1)) {
+            if((i & 7) == 7) Serial.print(",\n  ");
+            else             Serial.print(',');
+        }
+    }
+    Serial.println("\n};");
+}
+
+// -------------------- Interrupt handler stuff --------------------
+
+ISR(TIMER1_OVF_vect, ISR_BLOCK)   // ISR_BLOCK important -- see notes later
+{
+    activePanel->updateDisplay();   // Call refresh func for active display
+    TIFR1 |= TOV1;                  // Clear Timer1 interrupt flag
+}
+
+// Two constants are used in timing each successive BCM interval.
+// These were found empirically, by checking the value of TCNT1 at
+// certain positions in the interrupt code.
+// CALLOVERHEAD is the number of CPU 'ticks' from the timer overflow
+// condition (triggering the interrupt) to the first line in the
+// updateDisplay() method.  It's then assumed (maybe not entirely 100%
+// accurately, but close enough) that a similar amount of time will be
+// needed at the opposite end, restoring regular program flow.
+// LOOPTIME is the number of 'ticks' spent inside the shortest data-
+// issuing loop (not actually a 'loop' because it's unrolled, but eh).
+// Both numbers are rounded up slightly to allow a little wiggle room
+// should different compilers produce slightly different results.
+#define CALLOVERHEAD 60   // Actual value measured = 56
+#define LOOPTIME     200  // Actual value measured = 188
+// The "on" time for bitplane 0 (with the shortest BCM interval) can
+// then be estimated as LOOPTIME + CALLOVERHEAD * 2.  Each successive
+// bitplane then doubles the prior amount of time.  We can then
+// estimate refresh rates from this:
+// 4 bitplanes = 320 + 640 + 1280 + 2560 = 4800 ticks per row.
+// 4800 ticks * 16 rows (for 32x32 matrix) = 76800 ticks/frame.
+// 16M CPU ticks/sec / 76800 ticks/frame = 208.33 Hz.
+// Actual frame rate will be slightly less due to work being done
+// during the brief "LEDs off" interval...it's reasonable to say
+// "about 200 Hz."  The 16x32 matrix only has to scan half as many
+// rows...so we could either double the refresh rate (keeping the CPU
+// load the same), or keep the same refresh rate but halve the CPU
+// load.  We opted for the latter.
+// Can also estimate CPU use: bitplanes 1-3 all use 320 ticks to
+// issue data (the increasing gaps in the timing invervals are then
+// available to other code), and bitplane 0 takes 920 ticks out of
+// the 2560 tick interval.
+// 320 * 3 + 920 = 1880 ticks spent in interrupt code, per row.
+// From prior calculations, about 4800 ticks happen per row.
+// CPU use = 1880 / 4800 = ~39% (actual use will be very slightly
+// higher, again due to code used in the LEDs off interval).
+// 16x32 matrix uses about half that CPU load.  CPU time could be
+// further adjusted by padding the LOOPTIME value, but refresh rates
+// will decrease proportionally, and 200 Hz is a decent target.
+
+// The flow of the interrupt can be awkward to grasp, because data is
+// being issued to the LED matrix for the *next* bitplane and/or row
+// while the *current* plane/row is being shown.  As a result, the
+// counter variables change between past/present/future tense in mid-
+// function...hopefully tenses are sufficiently commented.
+
+void RGBmatrixPanel::updateDisplay(void)
+{
+    uint8_t  i, tick, tock, *ptr;
+    uint16_t t, duration;
+
+    *oeport  |= oepin;  // Disable LED output during row/plane switchover
+    *latport |= latpin; // Latch data loaded during *prior* interrupt
+
+    // Calculate time to next interrupt BEFORE incrementing plane #.
+    // This is because duration is the display time for the data loaded
+    // on the PRIOR interrupt.  CALLOVERHEAD is subtracted from the
+    // result because that time is implicit between the timer overflow
+    // (interrupt triggered) and the initial LEDs-off line at the start
+    // of this method.
+    t = (nRows > 8) ? LOOPTIME : (LOOPTIME * 2);
+    duration = ((t + CALLOVERHEAD * 2) << plane) - CALLOVERHEAD;
+
+    // Borrowing a technique here from Ray's Logic:
+    // www.rayslogic.com/propeller/Programming/AdafruitRGB/AdafruitRGB.htm
+    // This code cycles through all four planes for each scanline before
+    // advancing to the next line.  While it might seem beneficial to
+    // advance lines every time and interleave the planes to reduce
+    // vertical scanning artifacts, in practice with this panel it causes
+    // a green 'ghosting' effect on black pixels, a much worse artifact.
+
+    if(++plane >= nPlanes) {      // Advance plane counter.  Maxed out?
+        plane = 0;                  // Yes, reset to plane 0, and
+        if(++row >= nRows) {        // advance row counter.  Maxed out?
+            row     = 0;              // Yes, reset row counter, then...
+            if(swapflag == true) {    // Swap front/back buffers if requested
+                backindex = 1 - backindex;
+                swapflag  = false;
+            }
+            buffptr = matrixbuff[1-backindex]; // Reset into front buffer
+        }
+    } else if(plane == 1) {
+        // Plane 0 was loaded on prior interrupt invocation and is about to
+        // latch now, so update the row address lines before we do that:
+        if(row & 0x1)   *addraport |=  addrapin;
+        else            *addraport &= ~addrapin;
+        if(row & 0x2)   *addrbport |=  addrbpin;
+        else            *addrbport &= ~addrbpin;
+        if(row & 0x4)   *addrcport |=  addrcpin;
+        else            *addrcport &= ~addrcpin;
+        if(nRows > 8) {
+            if(row & 0x8) *addrdport |=  addrdpin;
+            else          *addrdport &= ~addrdpin;
+        }
+    }
+
+    // buffptr, being 'volatile' type, doesn't take well to optimization.
+    // A local register copy can speed some things up:
+    ptr = (uint8_t *)buffptr;
+
+    ICR1      = duration; // Set interval for next interrupt
+    TCNT1     = 0;        // Restart interrupt timer
+    *oeport  &= ~oepin;   // Re-enable output
+    *latport &= ~latpin;  // Latch down
+
+    // Record current state of SCLKPORT register, as well as a second
+    // copy with the clock bit set.  This makes the innnermost data-
+    // pushing loops faster, as they can just set the PORT state and
+    // not have to load/modify/store bits every single time.  It's a
+    // somewhat rude trick that ONLY works because the interrupt
+    // handler is set ISR_BLOCK, halting any other interrupts that
+    // might otherwise also be twiddling the port at the same time
+    // (else this would clobber them).
+    tock = SCLKPORT;
+    tick = tock | sclkpin;
+
+    if(plane > 0) { // 188 ticks from TCNT1=0 (above) to end of function
+
+        // Planes 1-3 copy bytes directly from RAM to PORT without unpacking.
+        // The least 2 bits (used for plane 0 data) are presumed masked out
+        // by the port direction bits.
+
+        // A tiny bit of inline assembly is used; compiler doesn't pick
+        // up on opportunity for post-increment addressing mode.
+        // 5 instruction ticks per 'pew' = 160 ticks total
+#define pew asm volatile(                 \
+      "ld  __tmp_reg__, %a[ptr]+"    "\n\t"   \
+      "out %[data]    , __tmp_reg__" "\n\t"   \
+      "out %[clk]     , %[tick]"     "\n\t"   \
+      "out %[clk]     , %[tock]"     "\n"     \
+      :: [ptr]  "e" (ptr),                    \
+         [data] "I" (_SFR_IO_ADDR(DATAPORT)), \
+         [clk]  "I" (_SFR_IO_ADDR(SCLKPORT)), \
+         [tick] "r" (tick),                   \
+         [tock] "r" (tock));
+
+        // Loop is unrolled for speed:
+        pew pew pew pew pew pew pew pew
+        pew pew pew pew pew pew pew pew
+        pew pew pew pew pew pew pew pew
+        pew pew pew pew pew pew pew pew
+
+        buffptr += 32;
+
+    } else { // 920 ticks from TCNT1=0 (above) to end of function
+
+        // Planes 1-3 (handled above) formatted their data "in place,"
+        // their layout matching that out the output PORT register (where
+        // 6 bits correspond to output data lines), maximizing throughput
+        // as no conversion or unpacking is needed.  Plane 0 then takes up
+        // the slack, with all its data packed into the 2 least bits not
+        // used by the other planes.  This works because the unpacking and
+        // output for plane 0 is handled while plane 3 is being displayed...
+        // because binary coded modulation is used (not PWM), that plane
+        // has the longest display interval, so the extra work fits.
+        for(i=0; i<32; i++) {
+            DATAPORT =
+                ( ptr[i]    << 6)         |
+                ((ptr[i+32] << 4) & 0x30) |
+                ((ptr[i+64] << 2) & 0x0C);
+            SCLKPORT = tick; // Clock lo
+            SCLKPORT = tock; // Clock hi
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/RGBmatrixPanel.h	Fri May 23 15:08:14 2014 +0000
@@ -0,0 +1,46 @@
+#include "mbed.h"
+#include "Adafruit_GFX.h"
+
+class RGBmatrixPanel : public Adafruit_GFX
+{
+
+public:
+    // Constructor for 16x32 panel:
+    RGBmatrixPanel(uint8_t a, uint8_t b, uint8_t c, uint8_t sclk, uint8_t latch, uint8_t oe, bool dbuf);
+
+    // Constructor for 32x32 panel (adds 'd' pin):
+    RGBmatrixPanel(uint8_t a, uint8_t b, uint8_t c, uint8_t d, uint8_t sclk, uint8_t latch, uint8_t oe, bool dbuf);
+
+    void begin(void);
+    virtual void drawPixel(int16_t x, int16_t y, uint16_t c);
+    virtual void fillScreen(uint16_t c);
+    void updateDisplay(void);
+    void swapBuffers(bool);
+    void dumpMatrix(void);
+
+    uint8_t *backBuffer(void);
+
+    uint16_t Color333(uint8_t r, uint8_t g, uint8_t b);
+    uint16_t Color444(uint8_t r, uint8_t g, uint8_t b);
+    uint16_t Color888(uint8_t r, uint8_t g, uint8_t b);
+    uint16_t Color888(uint8_t r, uint8_t g, uint8_t b, bool gflag);
+    uint16_t ColorHSV(long hue, uint8_t sat, uint8_t val, bool gflag);
+
+private:
+    uint8_t         *matrixbuff[2];
+    uint8_t          nRows;
+    volatile uint8_t backindex;
+    volatile bool swapflag;
+
+    // Init/alloc code common to both constructors:
+    void init(uint8_t rows, uint8_t a, uint8_t b, uint8_t c, uint8_t sclk, uint8_t latch, uint8_t oe, bool dbuf);
+
+    // PORT register pointers, pin bitmasks, pin numbers:
+    volatile uint8_t *latport, *oeport, *addraport, *addrbport, *addrcport, *addrdport;
+    uint8_t sclkpin, latpin, oepin, addrapin, addrbpin, addrcpin, addrdpin;
+    uint8_t _sclk, _latch, _oe, _a, _b, _c, _d;
+
+    // Counters/pointers for interrupt handler:
+    volatile uint8_t row, plane;
+    volatile uint8_t *buffptr;
+};
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gamma.h	Fri May 23 15:08:14 2014 +0000
@@ -0,0 +1,39 @@
+#ifndef _GAMMA_H_
+#define _GAMMA_H_
+
+static const uint8_t gamma[] = {
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+  0x00,0x00,0x01,0x01,0x01,0x01,0x01,0x01,
+  0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,
+  0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,
+  0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,
+  0x01,0x01,0x01,0x01,0x01,0x01,0x02,0x02,
+  0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,
+  0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,
+  0x02,0x02,0x02,0x02,0x02,0x03,0x03,0x03,
+  0x03,0x03,0x03,0x03,0x03,0x03,0x03,0x03,
+  0x03,0x03,0x03,0x03,0x03,0x03,0x03,0x04,
+  0x04,0x04,0x04,0x04,0x04,0x04,0x04,0x04,
+  0x04,0x04,0x04,0x04,0x04,0x04,0x05,0x05,
+  0x05,0x05,0x05,0x05,0x05,0x05,0x05,0x05,
+  0x05,0x05,0x05,0x06,0x06,0x06,0x06,0x06,
+  0x06,0x06,0x06,0x06,0x06,0x06,0x06,0x07,
+  0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,
+  0x07,0x07,0x08,0x08,0x08,0x08,0x08,0x08,
+  0x08,0x08,0x08,0x08,0x09,0x09,0x09,0x09,
+  0x09,0x09,0x09,0x09,0x09,0x0a,0x0a,0x0a,
+  0x0a,0x0a,0x0a,0x0a,0x0a,0x0a,0x0b,0x0b,
+  0x0b,0x0b,0x0b,0x0b,0x0b,0x0b,0x0c,0x0c,
+  0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0d,0x0d,
+  0x0d,0x0d,0x0d,0x0d,0x0d,0x0e,0x0e,0x0e,
+  0x0e,0x0e,0x0e,0x0e,0x0f,0x0f,0x0f,0x0f
+};
+
+#endif // _GAMMA_H_