Jonne Valola / PokittoLib Featured

Dependents:   YATTT sd_map_test cPong SnowDemo ... more

PokittoLib

Library for programming Pokitto hardware

How to Use

  1. Import this library to online compiler (see button "import" on the right hand side
  2. DO NOT import mbed-src anymore, a better version is now included inside PokittoLib
  3. Change My_settings.h according to your project
  4. Start coding!

Files at this revision

API Documentation at this revision

Comitter:
Pokitto
Date:
Sat Mar 23 19:22:35 2019 +0000
Parent:
63:7d1c08cdde5c
Child:
66:6281a40d73e6
Commit message:
mode 15 (hirez 16 color) added

Changed in this revision

POKITTO_CORE/PokittoDisplay.cpp Show annotated file Show diff for this revision Revisions of this file
POKITTO_CORE/PokittoDisplay.h Show annotated file Show diff for this revision Revisions of this file
POKITTO_HW/HWLCD.cpp Show annotated file Show diff for this revision Revisions of this file
POKITTO_HW/HWLCD.h Show annotated file Show diff for this revision Revisions of this file
--- a/POKITTO_CORE/PokittoDisplay.cpp	Tue Oct 23 16:21:01 2018 +0000
+++ b/POKITTO_CORE/PokittoDisplay.cpp	Sat Mar 23 19:22:35 2019 +0000
@@ -71,6 +71,7 @@
 #include "PokittoSound.h"
 #include <stdio.h>
 #include <string.h>
+#include <ctype.h>
 #ifdef DISABLEAVRMIN
 #include <algorithm>
 using std::min;
@@ -213,7 +214,7 @@
     #endif // POK_GAMEBUINO_SUPPORT
 
     // Reset sprites
-    m_tilecolorbuf = NULL; //!!HV
+    m_tilecolorbuf = NULL;
     for (uint8_t s = 0; s < SPRITE_COUNT; s++) m_sprites[s].bitmapData = NULL;
 }
 
@@ -367,7 +368,12 @@
         directcolor = COLOR_WHITE;
         invisiblecolor = COLOR_BLACK;
         directbgcolor = 0x0001; // Cannot be black as that is transparent color
-	directtextrotated = false;
+        if (POK_SCREENMODE == MODE_FAST_16COLOR ||
+            POK_SCREENMODE == MODE13
+        )
+            directtextrotated = false;
+        else
+            directtextrotated = true;
         adjustCharStep = 0;
         setFont(fontC64);
         enableDirectPrinting(true);
@@ -496,9 +502,11 @@
     // GLCD fonts are arranged LSB = topmost pixel of char, so its easy to just shift through the column
     uint16_t bitcolumn; //16 bits for 2x8 bit high characters
 
+    if( fontSize != 2 ) fontSize = 1;
+
     void (*drawPixelFG)(int16_t,int16_t, uint8_t) = &Display::drawPixelNOP;
     void (*drawPixelBG)(int16_t,int16_t, uint8_t) = &Display::drawPixelNOP;
-    if( x>=0 && y >= 0 && x+w<width && y+h<height ){
+    if( x>=0 && y >= 0 && x+w*fontSize<width && y+(h+1)*fontSize<height ){
 	if( color != invisiblecolor )
 	    drawPixelFG = &Display::drawPixelRaw;
 	if( bgcolor != invisiblecolor )
@@ -595,10 +603,10 @@
         uint16_t j = POK_BITFRAME;
         if (bgcolor & 0x1) memset((void*)m_scrbuf,0xFF,j);// R
         else memset((void*)m_scrbuf,0x00,j);// R
-        if ((bgcolor>>1) & 0x1) memset((void*)m_scrbuf+POK_BITFRAME,0xFF,j);// G
-        else memset((void*)m_scrbuf+POK_BITFRAME,0x00,j);// G
-        if ((bgcolor>>2) & 0x1) memset((void*)m_scrbuf+POK_BITFRAME*2,0xFF,j);// B
-        else memset((void*)m_scrbuf+POK_BITFRAME*2,0x00,j);// B
+        if ((bgcolor>>1) & 0x1) memset((char*)m_scrbuf+POK_BITFRAME,0xFF,j);// G
+        else memset((char*)m_scrbuf+POK_BITFRAME,0x00,j);// G
+        if ((bgcolor>>2) & 0x1) memset((char*)m_scrbuf+POK_BITFRAME*2,0xFF,j);// B
+        else memset((char*)m_scrbuf+POK_BITFRAME*2,0x00,j);// B
         setCursor(0,0);
         return;
     } else if (bpp==4) {
@@ -1494,11 +1502,11 @@
 
     /** 3 bpp mode */
     else if (m_colordepth==3) {
-        int16_t i, j, byteNum, bitNum, byteWidth = (w + 7) >> 3;
+        int16_t i, j, byteNum, byteWidth = (w + 7) >> 3;
         int16_t bitFrame = w * h / 8;
         for (i = 0; i < w; i++) {
         byteNum = i / 8;
-        bitNum = i % 8;
+        //bitNum = i % 8;
 
         uint8_t bitcount=0;
         for (j = 0; j <= h/8; j++) {
@@ -1604,17 +1612,15 @@
     /** 4bpp fast version */
 
     if (m_colordepth==8) {
-	int16_t scrx,scry,xclip,xjump,scrxjump;
-    xclip=xjump=scrxjump=0;
+    int16_t scrx,scry;//,scrxjump;
+    int16_t xjump=0;
     /** y clipping */
     if (y<0) { h+=y; bitmap -= y*w; y=0;}
     else if (y+h>height) { h -=(y-height);}
     /** x clipping */
-    if (x<0) { xclip=x; w+=x; xjump = (-x); bitmap += xjump; x=0;}
+    if (x<0) { w+=x; xjump = (-x); bitmap += xjump; x=0;}
     else if (x+w>width) {
-            xclip = x;
-            scrxjump = x;
-            xjump=(x+w-width)+scrxjump;
+            xjump=(x+w-width);
             w = width-x;}
 
     uint8_t* scrptr = m_scrbuf + (y*width + x);
@@ -1628,8 +1634,8 @@
                     bitmap++;
                     scrptr++;
                 }
-            bitmap += xjump; // needed if x<0 clipping occurs
-        scrptr = scrptr + (width - w)+scrxjump;
+            bitmap += xjump; // needed if horizontal clipping occurs
+            scrptr = scrptr + (width - w);
     }
     return;
     }
@@ -1793,83 +1799,105 @@
     } // end for scry
 }
 
-void Display::drawBitmapXFlipped(int16_t x, int16_t y, const uint8_t* bitmap)
+void Display::drawBitmapDataXFlipped(int16_t x, int16_t y, int16_t w, int16_t h, const uint8_t* bitmap)
 {
-    int16_t w = *bitmap;
-	int16_t h = *(bitmap + 1);
-    bitmap = bitmap + 2; //add an offset to the pointer to start after the width and height
     /** visibility check */
     if (y<-h || y>height) return; //invisible
     if (x<-w || x>width) return;  //invisible
     /** 1 bpp mode */
-    if (m_colordepth<2) {
-    int16_t i, j, byteNum, bitNum, byteWidth = (w + 7) >> 3;
-    for (i = 0; i < w; i++) {
-        byteNum = i / 8;
-        bitNum = i % 8;
-        for (j = 0; j < h; j++) {
-            uint8_t source = *(bitmap + j * byteWidth + byteNum);
-            if (source & (0x80 >> bitNum)) {
-                drawPixel(x + w - i, y + j);
+    if (m_colordepth<2)
+    {
+        int16_t i, j, byteNum, bitNum, byteWidth = (w + 7) >> 3;
+        for (i = 0; i < w; i++)
+        {
+            byteNum = i / 8;
+            bitNum = i % 8;
+            for (j = 0; j < h; j++)
+            {
+                uint8_t source = *(bitmap + j * byteWidth + byteNum);
+                if (source & (0x80 >> bitNum))
+                {
+                    drawPixel(x + w - i, y + j);
+                }
             }
         }
-    }
 
-    return;
+        return;
     }
     /** 2 bpp mode */
-    if (m_colordepth<4) {
-    int16_t i, j, byteNum, bitNum, byteWidth = w >> 2;
-    for (i = 0; i < w; i++) {
-        byteNum = i / 4;
-        bitNum = (i % 4)<<1;
-        for (j = 0; j < h; j++) {
-            uint8_t source = *(bitmap + j * byteWidth + byteNum);
-            uint8_t output = (source & (0xC0 >> bitNum));
-            output >>= (6-bitNum);
-            if (output != invisiblecolor) {
-                setColor(output);
-                drawPixel(x + i, y + j);
+    else if (m_colordepth==2)
+    {
+        int16_t i, j, byteNum, bitNum, byteWidth = w >> 2;
+        for (i = 0; i < w; i++)
+        {
+            byteNum = i / 4;
+            bitNum = (i % 4)<<1;
+            for (j = 0; j < h; j++)
+            {
+                uint8_t source = *(bitmap + j * byteWidth + byteNum);
+                uint8_t output = (source & (0xC0 >> bitNum));
+                output >>= (6-bitNum);
+                if (output != invisiblecolor)
+                {
+                    setColor(output);
+                    drawPixel(x + i, y + j);
+                }
             }
         }
-    }
 
-    return;
+        return;
     }
-    /** 4bpp fast version */
-	int16_t scrx,scry,xclip,xjump,scrxjump;
-    xclip=xjump=scrxjump=0;
-    /** y clipping */
-    if (y<0) { h+=y; bitmap -= y*(w>>1); y=0;}
-    else if (y+h>height) { h -=(y-height);}
-    /** x clipping */
-    bitmap += ((w>>1)-1); //inverted!
-    if (x<0) {
+    else if (m_colordepth==4)
+    {
+        /** 4bpp fast version */
+        int16_t scrx,scry,xclip,xjump,scrxjump;
+        xclip=xjump=scrxjump=0;
+        /** y clipping */
+        if (y<0)
+        {
+            h+=y;
+            bitmap -= y*(w>>1);
+            y=0;
+        }
+        else if (y+h>height)
+        {
+            h -=(y-height);
+        }
+        /** x clipping */
+        bitmap += ((w>>1)-1); //inverted!
+        if (x<0)
+        {
             xclip=(x&1)<<1;
             w+=x;
             xjump = ((-x)>>1);
             //bitmap += xjump; // do not clip left edge of source, as bitmap is inverted !
             x=0;
-            }
-    else if (x+w>width) {
+        }
+        else if (x+w>width)
+        {
             xclip = (x&1)<<1;
             scrxjump = x&1;
             xjump=((x+w-width)>>1)+scrxjump;
-            w = width-x;}
+            w = width-x;
+        }
 
-    //uint8_t* scrptr = m_scrbuf + (y*(width>>1) + ((x+width)>>1));
-    uint8_t* scrptr = m_scrbuf + (y*(width>>1) + (x>>1));
-    /** ONLY 4-bit mode for time being **/
-    for (scry = y; scry < y+h; scry+=1) {
-    //    for (scry = y; scry < y+2; scry+=1) {
+        //uint8_t* scrptr = m_scrbuf + (y*(width>>1) + ((x+width)>>1));
+        uint8_t* scrptr = m_scrbuf + (y*(width>>1) + (x>>1));
+        /** ONLY 4-bit mode for time being **/
+        for (scry = y; scry < y+h; scry+=1)
+        {
+            //    for (scry = y; scry < y+2; scry+=1) {
             if (scry>=height) return;
-            if ((x&1)==0) { /** EVEN pixel starting line, very simple, just copypaste **/
+            if ((x&1)==0)   /** EVEN pixel starting line, very simple, just copypaste **/
+            {
                 //for (scrx = w+x-xclip-1; scrx >= x; scrx-=2) {
-                for (scrx = x; scrx < w+x-xclip; scrx+=2) {
+                for (scrx = x; scrx < w+x-xclip; scrx+=2)
+                {
                     uint8_t sourcepixel = *(bitmap);
-                    if (xclip) {
-                            sourcepixel <<=4;
-                            sourcepixel |= ((*(bitmap-1))>>4);//inverted!
+                    if (xclip)
+                    {
+                        sourcepixel <<=4;
+                        sourcepixel |= ((*(bitmap-1))>>4);//inverted!
                     }
                     uint8_t targetpixel = *scrptr;
                     // NIBBLES ARE INVERTED BECAUSE PICTURE IS FLIPPED !!!
@@ -1880,11 +1908,14 @@
                     scrptr++;
                 }
                 bitmap += w; // w*2 >> 1 because inverted and because 2 pixels per byte!!
-                if (xclip){
-                    if (w&1) {
+                if (xclip)
+                {
+                    if (w&1)
+                    {
                         /**last pixel is odd pixel due to clipping & odd width*/
                         uint8_t sourcepixel = *bitmap;
-                        if ((sourcepixel&0x0F) != invisiblecolor) {
+                        if ((sourcepixel&0x0F) != invisiblecolor)
+                        {
                             sourcepixel <<=4;
                             uint8_t targetpixel = *scrptr;// & 0x0F;
                             targetpixel |= sourcepixel;
@@ -1896,8 +1927,11 @@
                     scrptr++;
                 }
                 bitmap += xjump; // needed if x<0 clipping occurs
-            } else { /** ODD pixel starting line **/
-                for (scrx = x; scrx < w+x-xclip; scrx+=2 ) {
+            }
+            else     /** ODD pixel starting line **/
+            {
+                for (scrx = x; scrx < w+x-xclip; scrx+=2 )
+                {
                     uint8_t sourcepixel = *bitmap;
                     uint8_t targetpixel = *scrptr;
                     // inverted !!! store lower nibble of source pixel in lower nibble of target
@@ -1915,7 +1949,62 @@
             }
             // increment the y jump in the scrptr
             scrptr = scrptr + ((width - w)>>1)+scrxjump;
+        }
     }
+    /** 8 bpp mode */
+    else if (m_colordepth==8)
+    {
+        int16_t scrx,scry;//,scrxjump;
+        int16_t xjump=0;
+        /** y clipping */
+        if (y<0)
+        {
+            h+=y;
+            bitmap -= y*w;
+            y=0;
+        }
+        else if (y+h>height)
+        {
+            h -=(y-height);
+        }
+        /** x clipping */
+        if (x<0)
+        {
+            w+=x;
+            xjump = (-x);
+            bitmap += xjump;
+            x=0;
+        }
+        else if (x+w>width)
+        {
+            xjump=(x+w-width);
+            w = width-x;
+        }
+
+        uint8_t* scrptr = m_scrbuf + (y*width + x) + w;
+        for (scry = y; scry < y+h; scry+=1)
+        {
+            if (scry>=height) return;
+            for (scrx = x; scrx < w+x; scrx++)
+            {
+                uint8_t sourcepixel = *bitmap;
+                uint8_t targetpixel = *scrptr;
+                if (sourcepixel != invisiblecolor )
+                    targetpixel = sourcepixel;
+                *scrptr = targetpixel;
+                bitmap++;
+                scrptr--;
+            }
+            bitmap += xjump; // needed if horizontal clipping occurs
+            scrptr = scrptr + (width + w);
+        }
+        return;
+    }
+}
+
+void Display::drawBitmapXFlipped(int16_t x, int16_t y, const uint8_t* bitmap)
+{
+    drawBitmapDataXFlipped(x, y, bitmap[0], bitmap[1], bitmap + 2);
 }
 
 void Display::drawBitmap(int16_t x, int16_t y, const uint8_t *bitmap, uint8_t rotation, uint8_t flip) {
@@ -2514,6 +2603,7 @@
 }
 
 // Draw the crash screen and wait forever
+#define STR_TO_UPPER(str_from, str_to) for( int32_t i=0; i <= strlen(str_from); i++ ) str_to[i] = toupper(str_from[i]);
 void ShowCrashScreenAndWait( const char* texLine1, const char* texLine2, const char* texLine3, const char* texLine4, const char* texLine5 ) {
 
     // draw screen red
@@ -2530,15 +2620,17 @@
     Display::fixedWidthFont = true; // Needed for the non-proportional C64 font (default value=false)
     Display::enableDirectPrinting(true);
 
+    char convertedStr[128] = {0};
+
     // Draw texts
     int  yOffsetInPixels = 5;
     Display::set_cursor(0, 9 + yOffsetInPixels);
-    Display::print("  ");    Display::println(texLine1);
-    Display::print("  ");    Display::println(texLine2);
-    Display::print("  ");    Display::println(texLine3);
+    Display::print("  ");    STR_TO_UPPER(texLine1, convertedStr); Display::println(convertedStr);
+    Display::print("  ");    STR_TO_UPPER(texLine2, convertedStr); Display::println(convertedStr);
+    Display::print("  ");    STR_TO_UPPER(texLine3, convertedStr); Display::println(convertedStr);
     Display::println();
-    Display::print("  *");   Display::println(texLine4);
-    Display::print("  *");   Display::println(texLine5);
+    Display::print("  *");   STR_TO_UPPER(texLine4, convertedStr); Display::println(convertedStr);
+    Display::print("  *");   STR_TO_UPPER(texLine5, convertedStr); Display::println(convertedStr);
 
     Display::set_cursor(0, 0 + yOffsetInPixels);
 
--- a/POKITTO_CORE/PokittoDisplay.h	Tue Oct 23 16:21:01 2018 +0000
+++ b/POKITTO_CORE/PokittoDisplay.h	Sat Mar 23 19:22:35 2019 +0000
@@ -314,6 +314,8 @@
     static void drawRleBitmap(int16_t x, int16_t y, const uint8_t* bitmap);
     /** Draw animated bitmap frame */
     static void drawBitmap(int16_t x, int16_t y, const uint8_t* bitmap, uint8_t frame);
+    /** Draw bitmap data flipped on x-axis*/
+    static void drawBitmapDataXFlipped(int16_t x, int16_t y, int16_t w, int16_t h, const uint8_t* bitmap);
     /** Draw bitmap flipped on x-axis*/
     static void drawBitmapXFlipped(int16_t x, int16_t y, const uint8_t* bitmap);
     /** Draw bitmap with options */
--- a/POKITTO_HW/HWLCD.cpp	Tue Oct 23 16:21:01 2018 +0000
+++ b/POKITTO_HW/HWLCD.cpp	Sat Mar 23 19:22:35 2019 +0000
@@ -453,11 +453,132 @@
  * @param updRectH The update rect.
  * @param paletteptr The screen palette.
 */
+
+
+#define MODE1_LOOP					\
+  "	adds %[t], %[palette]"			"\n"	\
+  "	ldm %[t], {%[t], %[x]}"			"\n"	\
+  "	str %[t], [%[LCD], 0]"			"\n"	\
+  "	movs %[t], 252"	"\n"				\
+  "	str %[WRBit], [%[LCD], %[t]]"   	"\n"	\
+  "	str %[WRBit], [%[LCD], 124]"		"\n"	\
+  "	str %[x], [%[LCD], 0]"			"\n"	\
+  "	str %[WRBit], [%[LCD], %[t]]"   	"\n"	\
+  "	movs %[t], 0x0F"			"\n"	\
+  "	ands %[t], %[t], %[c]"			"\n"	\
+  "	str %[WRBit], [%[LCD], 124]"		"\n"	\
+							\
+  "	lsls %[t], 3"				"\n"    \
+  "	adds %[t], %[palette]"			"\n"	\
+  "	ldm %[t], {%[t], %[x]}"			"\n"	\
+  "	str %[t], [%[LCD], 0]"			"\n"	\
+  "	movs %[t], 252"	"\n"				\
+  "	str %[WRBit], [%[LCD], %[t]]"   	"\n"	\
+  "	str %[WRBit], [%[LCD], 124]"		"\n"	\
+  "	str %[x], [%[LCD], 0]"			"\n"	\
+  "	str %[WRBit], [%[LCD], %[t]]"   	"\n"	\
+  "	lsrs %[c], 8"				"\n"	\
+  "	movs %[t], 0xF0"			"\n"	\
+  "	ands %[t], %[t], %[c]"			"\n"	\
+  "	lsrs %[t], %[t], 1"			"\n"	\
+  "	str %[WRBit], [%[LCD], 124]"		"\n"
+
+
 void Pokitto::lcdRefreshMode1(uint8_t * scrbuf, uint8_t updRectX, uint8_t updRectY, uint8_t updRectW, uint8_t updRectH, uint16_t* paletteptr) {
 
-    uint16_t x,y;
+
+#ifdef XPERIMENTAL
+//#define __ARMCC_VERSION 1
+#endif
+
+#ifndef __ARMCC_VERSION
+
+  write_command(0x03); write_data(0x1038);
+  write_command(0x20);  // Horizontal DRAM Address
+  write_data(0);
+  write_command(0x21);  // Vertical DRAM Address
+  write_data(0);
+  write_command(0x22); // write data to DRAM
+  CLR_CS_SET_CD_RD_WR;
+
+  uint8_t *end=&scrbuf[POK_SCREENBUFFERSIZE>>1]+4;
+
+  volatile uint32_t palette[32];
+  for( uint32_t i=0; i<16; ++i ){
+    palette[(i<<1)+1] = static_cast<uint32_t>(paletteptr[i&3 ]) << 3;
+    palette[(i<<1)  ] = static_cast<uint32_t>(paletteptr[i>>2]) << 3;
+  }
+
+  SET_MASK_P2;
+
+  uint32_t c, WRBit = 1<<12;
+
+  register uint32_t x asm("r2");
+  register uint32_t t asm("r1");
+
+  asm volatile(
+
+	 ".syntax unified"         		"\n"
+	 "ldm %[scrbuf]!, {%[c]}"		"\n" // load 4 bytes (16 pixels)
+	 "movs %[t], 0xF0"			"\n"
+	 "ands %[t], %[t], %[c]"		"\n"
+	 "lsrs %[t], %[t], 1"			"\n"
+	 "mode1Loop%=:" 			"\n"
+	 MODE1_LOOP
+	 MODE1_LOOP
+	 MODE1_LOOP
+	 "	adds %[t], %[palette]"			"\n"
+	 "	ldm %[t], {%[t], %[x]}"			"\n"
+	 "	str %[t], [%[LCD], 0]"			"\n"
+	 "	movs %[t], 252"	"\n"
+	 "	str %[WRBit], [%[LCD], %[t]]"   	"\n"
+	 "	str %[WRBit], [%[LCD], 124]"		"\n"
+	 "	str %[x], [%[LCD], 0]"			"\n"
+	 "	str %[WRBit], [%[LCD], %[t]]"   	"\n"
+	 "	movs %[t], 0x0F"			"\n"
+	 "	ands %[t], %[t], %[c]"			"\n"
+	 "	str %[WRBit], [%[LCD], 124]"		"\n"
+
+	 "	lsls %[t], 3"				"\n"
+	 "	adds %[t], %[palette]"			"\n"
+	 "	ldm %[t], {%[t], %[x]}"			"\n"
+	 "	str %[t], [%[LCD], 0]"			"\n"
+	 "	movs %[t], 252"	"\n"
+	 "	str %[WRBit], [%[LCD], %[t]]"   	"\n"
+	 "	str %[WRBit], [%[LCD], 124]"		"\n"
+	 "	str %[x], [%[LCD], 0]"			"\n"
+	 "	str %[WRBit], [%[LCD], %[t]]"   	"\n"
+
+	 "	ldm %[scrbuf]!, {%[c]}"		"\n" // load next 4 bytes
+	 "	movs %[t], 0xF0"		"\n"
+	 "	ands %[t], %[t], %[c]"		"\n"
+	 "	lsrs %[t], %[t], 1"		"\n"
+	 "	str %[WRBit], [%[LCD], 124]"		"\n"
+
+	 "cmp %[end], %[scrbuf]"            	"\n"
+	 "bne mode1Loop%="       		"\n" // if scrbuf < end, loop
+
+	 : // outputs
+	   [c]"+l" (c),
+	   [t]"+l" (t),
+	   [end]"+h" (end),
+	   [scrbuf]"+l" (scrbuf),
+	   [WRBit]"+l" (WRBit),
+	   [x]"+l" (x)
+
+	 : // inputs
+	   [LCD]"l" (0xA0002188),
+	   [palette]"l" (palette)
+
+	 : // clobbers
+	   "cc"
+	       );
+
+
+#else
+    uint16_t x,y,xptr;
     uint16_t scanline[4][176]; // read 4 half-nibbles = 4 pixels at a time
-    uint8_t *d;
+    uint8_t *d, yoffset=0;
 
     // If not the full screen is updated, check the validity of the update rect.
     if ( updRectX != 0 || updRectY != 0 ||updRectW != LCDWIDTH ||updRectH != LCDHEIGHT ) {
@@ -477,6 +598,7 @@
     xptr = 8;
     setDRAMptr(8, 0);
     #else
+    xptr = 0;
     setDRAMptr(0, 0);
     #endif
 
@@ -484,6 +606,7 @@
         d = scrbuf+(x>>2);// point to beginning of line in data
 
         /** find colours in one scanline **/
+        uint8_t s=0;
         d += (updRectY * 220/4);
         for (y=updRectY; y<updRectY+updRectH; y++) {
             uint8_t tdata = *d;
@@ -555,6 +678,7 @@
             }
         }
     }
+#endif
 
     #ifdef POK_SIM
     simulator.refreshDisplay();
@@ -945,7 +1069,7 @@
     #endif
 }
 
-  
+
 #define MODE2_INNER_LOOP_B				\
   "	ldm %[scanline]!, {%[c]}"   "\n"		\
 	       "	str %[c], [%[LCD], 0]"    "\n"	\
@@ -955,98 +1079,104 @@
 	       "	str %[t], [%[LCD], 124]"  "\n"	\
 	       "	subs %[x], 1"             "\n"	\
 	       "	str %[t], [%[LCD], %[c]]" "\n"	\
-    
+
 
 void Pokitto::lcdRefreshMode2(uint8_t * scrbuf, uint16_t* paletteptr ) {
-  uint32_t x,y;
+  uint32_t x,y,byte,c,t=1<<12;
   uint32_t scanline[110];
 
   write_command(0x03); write_data(0x1038);
   write_command(0x20);  // Horizontal DRAM Address
   write_data(0);  // 0
   write_command(0x21);  // Vertical DRAM Address
-  write_data(1);
+
+#ifndef __ARMCC_VERSION
+  write_data(1); // still has pixel 0 bug
   write_command(0x22); // write data to DRAM
   CLR_CS_SET_CD_RD_WR;
   SET_MASK_P2;
 
-#ifndef __ARMCC_VERSION
-   asm volatile(
+  #ifdef PROJ_SHOW_FPS_COUNTER
+  setDRAMptr(0, 8);
+  y=4;
+  #endif
+
+  asm volatile(
 	 ".syntax unified"         "\n"
-	 
+
 	 "mov r10, %[scanline]"    "\n"
 	 "mov r11, %[t]"           "\n"
-	 
+
 	 "mode2OuterLoop:"        "\n"
-	 
+
 	 "movs %[x], 110"          "\n"
 	 "mode2InnerLoopA:"
 
 
-	 "	ldrb %[byte], [%[scrbuf],0]"   "\n"				
+	 "	ldrb %[byte], [%[scrbuf],0]"   "\n"
 	 "	lsrs %[c], %[byte], 4"    "\n"
 
 	 "	movs %[t], 15" "\n"
-	 "	ands %[byte], %[t]"    "\n"		
-	 
-	 "	lsls %[c], 1"             "\n"			
-	 "	ldrh %[t], [%[paletteptr], %[c]]"      "\n"	
-	 "	lsls %[t], %[t], 3"       "\n"			
-	 "	str %[t], [%[LCD], 0]"    "\n"			
-	 "	mov %[c], r11" "\n"				
-	 "	str %[c], [%[LCD], 124]"  "\n"			
-	 "	stm %[scanline]!, {%[t]}" "\n"			
-	 "	movs %[t], 252"   "\n"				
-	 "	str %[c], [%[LCD], %[t]]" "\n"			
-	 "	str %[c], [%[LCD], 124]"  "\n"			
-	 "	lsls %[byte], %[byte], 1"             "\n"			
+	 "	ands %[byte], %[t]"    "\n"
+
+	 "	lsls %[c], 1"             "\n"
+	 "	ldrh %[t], [%[paletteptr], %[c]]"      "\n"
+	 "	lsls %[t], %[t], 3"       "\n"
+	 "	str %[t], [%[LCD], 0]"    "\n"
+	 "	mov %[c], r11" "\n"
+	 "	str %[c], [%[LCD], 124]"  "\n"
+	 "	stm %[scanline]!, {%[t]}" "\n"
+	 "	movs %[t], 252"   "\n"
+	 "	str %[c], [%[LCD], %[t]]" "\n"
+	 "	str %[c], [%[LCD], 124]"  "\n"
+	 "	lsls %[byte], %[byte], 1"             "\n"
 	 "	str %[c], [%[LCD], %[t]]" "\n"
 
-	 "	ldrh %[t], [%[paletteptr], %[byte]]"      "\n"	
-	 "	lsls %[t], %[t], 3"       "\n"			
-	 "	str %[t], [%[LCD], 0]"    "\n"			
-	 "	mov %[c], r11" "\n"				
-	 "	str %[c], [%[LCD], 124]"  "\n"			
-	 "	stm %[scanline]!, {%[t]}" "\n"			
-	 "	movs %[t], 252"   "\n"				
-	 "	str %[c], [%[LCD], %[t]]" "\n"			
-	 "	str %[c], [%[LCD], 124]"  "\n"			
-	 "	adds %[scrbuf], %[scrbuf], 1" "\n"		
+	 "	ldrh %[t], [%[paletteptr], %[byte]]"      "\n"
+	 "	lsls %[t], %[t], 3"       "\n"
+	 "	str %[t], [%[LCD], 0]"    "\n"
+	 "	mov %[c], r11" "\n"
+	 "	str %[c], [%[LCD], 124]"  "\n"
+	 "	stm %[scanline]!, {%[t]}" "\n"
+	 "	movs %[t], 252"   "\n"
 	 "	str %[c], [%[LCD], %[t]]" "\n"
-	 
-	 "	subs %[x], 2"          "\n"	
+	 "	str %[c], [%[LCD], 124]"  "\n"
+	 "	adds %[scrbuf], %[scrbuf], 1" "\n"
+	 "	str %[c], [%[LCD], %[t]]" "\n"
+
+	 "	subs %[x], 2"          "\n"
 	 "	bne mode2InnerLoopA"  "\n"
 
 	 "mov %[scanline], r10"    "\n"
 	 "movs %[x], 110"          "\n"
 	 "mov %[t], r11"           "\n"
 	 "mode2InnerLoopB:"
-	 MODE2_INNER_LOOP_B	 
-	 MODE2_INNER_LOOP_B	 
-	 MODE2_INNER_LOOP_B	 
-	 MODE2_INNER_LOOP_B	 
-	 MODE2_INNER_LOOP_B	 
-	 MODE2_INNER_LOOP_B	 
-	 MODE2_INNER_LOOP_B	 
-	 MODE2_INNER_LOOP_B	 
-	 MODE2_INNER_LOOP_B	 
-	 MODE2_INNER_LOOP_B	 
+	 MODE2_INNER_LOOP_B
+	 MODE2_INNER_LOOP_B
+	 MODE2_INNER_LOOP_B
+	 MODE2_INNER_LOOP_B
+	 MODE2_INNER_LOOP_B
+	 MODE2_INNER_LOOP_B
+	 MODE2_INNER_LOOP_B
+	 MODE2_INNER_LOOP_B
+	 MODE2_INNER_LOOP_B
+	 MODE2_INNER_LOOP_B
 	 "	bne mode2InnerLoopB"     "\n"
-	 
+
 	 "mov %[scanline], r10"    "\n"
 	 "movs %[t], 1"              "\n"
 	 "movs %[c], 88"             "\n"
 	 "add %[y], %[t]"            "\n" // y++... derpy, but it's the outer loop
 	 "cmp %[y], %[c]"            "\n"
 	 "bne mode2OuterLoop"       "\n" // if y != 88, loop
-	 
+
 	 : // outputs
 	   [c]"+l" (c),
 	   [t]"+l" (t),
 	   [x]"+l" (x),
 	   [y]"+h" (y),  // +:Read-Write l:lower (0-7) register
 	   [scrbuf]"+l" (scrbuf)
-	   
+
 	 : // inputs
 	   [LCD]"l" (0xA0002188),
 	   [scanline]"l" (scanline),
@@ -1056,8 +1186,13 @@
 	   "cc", "r10", "r11"
        );
 
- 
+
 #else
+  write_data(0); // does not have pixel 0 bug
+  write_command(0x22); // write data to DRAM
+  CLR_CS_SET_CD_RD_WR;
+  SET_MASK_P2;
+
 uint8_t* d = scrbuf;// point to beginning of line in data
 
   #ifdef PROJ_SHOW_FPS_COUNTER
@@ -1713,122 +1848,132 @@
 }
 
 #define MODE13_INNER_LOOP_A						\
-  "	ldrb %[t], [%[scrbuf],0]"   "\n"				\
-	       "	add %[t], %[t], %[offset]"  "\n"		\
+	       "	add %[t], %[t], r10"	   "\n" 		\
 	       "	uxtb %[c], %[t] " "\n"				\
 	       "	lsls %[c], 1"             "\n"			\
 	       "	ldrh %[t], [%[paletteptr], %[c]]"      "\n"	\
 	       "	lsls %[t], %[t], 3"       "\n"			\
 	       "	str %[t], [%[LCD], 0]"    "\n"			\
-	       "	mov %[c], r11" "\n"				\
-	       "	str %[c], [%[LCD], 124]"  "\n"			\
-	       "	stm %[scanline]!, {%[t]}" "\n"			\
-	       "	movs %[t], 252"   "\n"				\
-	       "	str %[c], [%[LCD], %[t]]" "\n"			\
-	       "	str %[c], [%[LCD], 124]"  "\n"			\
+	       "	movs %[c], 252"   "\n"				\
+	       "	str %[offset], [%[LCD], %[c]]" "\n"		\
+	       "	stm %[scanline]!, {%[t]}"      "\n"		\
+	       "	str %[offset], [%[LCD], 124]"  "\n"		\
+	       "	str %[offset], [%[LCD], %[c]]" "\n"		\
 	       "	adds %[scrbuf], %[scrbuf], 1" "\n"		\
-	       "	str %[c], [%[LCD], %[t]]" "\n"
-  
-#define MODE13_INNER_LOOP_B				\
-  "	ldm %[scanline]!, {%[c]}"   "\n"		\
-	       "	str %[c], [%[LCD], 0]"    "\n"	\
-	       "	str %[t], [%[LCD], 124]"  "\n"	\
-	       "	movs %[c], 252"   "\n"		\
-	       "	str %[t], [%[LCD], %[c]]" "\n"	\
-	       "	str %[t], [%[LCD], 124]"  "\n"	\
-	       "	subs %[x], 1"             "\n"	\
-	       "	str %[t], [%[LCD], %[c]]" "\n"	\
+	       "	ldrb %[t], [%[scrbuf],0]"   "\n"		\
+	       "	str %[offset], [%[LCD], 124]"  "\n"
 
- 
+// This can be made 1 cycle faster (x -= 10 instead of x--),
+// but there will be noise
+#define MODE13_INNER_LOOP_B					\
+	       "	str %[c], [%[LCD], 0]"    "\n"		\
+	       "	str %[offset], [%[LCD], %[t]]" "\n"	\
+	       "	ldr %[c], [%[scanline]]"   "\n"		\
+	       "	str %[offset], [%[LCD], 124]"  "\n"	\
+	       "	str %[offset], [%[LCD], %[t]]" "\n"	\
+	       "	adds %[scanline], 4"             "\n"	\
+	       "	subs %[x], 1"			"\n"	\
+	       "	str %[offset], [%[LCD], 124]"  "\n"
+
+
  void Pokitto::lcdRefreshMode13(uint8_t * scrbuf, uint16_t* paletteptr, uint8_t offset){
    uint32_t scanline[110]; // read two nibbles = pixels at a time
-   
+
    write_command_16(0x03); write_data_16(0x1038);
    write_command(0x20); write_data(0);
-   write_command(0x21); write_data(1);
+   write_command(0x21); write_data(0);
    write_command(0x22);
    CLR_CS_SET_CD_RD_WR;
    SET_MASK_P2;
-   
-   uint32_t x, y=0;
-   
+
+   uint32_t x, y=0, c, t;
+
 #ifndef __ARMCC_VERSION
+   #ifdef PROJ_SHOW_FPS_COUNTER
+   setDRAMptr(0, 8);
+   y=4;
+   #endif
+
    asm volatile(
 	 ".syntax unified"         "\n"
-	 
-	 "mov r10, %[scanline]"    "\n"
-	 
-	 "movs %[t], 1"            "\n"
-	 "lsls %[t], %[t], 12"     "\n"
-	 "mov r11, %[t]"           "\n"
-	 
+
+	 "mov r10, %[offset]"	   "\n"
+	 "movs %[offset], 1"            "\n"
+	 "lsls %[offset], %[offset], 12"     "\n"
+
 	 "mode13OuterLoop:"        "\n"
-	 
+
 	 "movs %[x], 110"          "\n"
+	 "ldrb %[t], [%[scrbuf],0]"   "\n"
 	 "mode13InnerLoopA:"
 	 MODE13_INNER_LOOP_A
 	 MODE13_INNER_LOOP_A
-	 "	subs %[x], 2"          "\n"	
+	 "	subs %[x], 2"          "\n"
 	 "	bne mode13InnerLoopA"  "\n"
 
-	 "mov %[scanline], r10"    "\n"
+	 "subs %[scanline], 220"    "\n"
+	 "subs %[scanline], 220"    "\n"
+
 	 "movs %[x], 110"          "\n"
-	 "mov %[t], r11"           "\n"
+	 "movs %[t], 252"           "\n"
+	 "ldm %[scanline]!, {%[c]}"   "\n"
 	 "mode13InnerLoopB:"
-	 MODE13_INNER_LOOP_B	 
-	 MODE13_INNER_LOOP_B	 
-	 MODE13_INNER_LOOP_B	 
-	 MODE13_INNER_LOOP_B	 
-	 MODE13_INNER_LOOP_B	 
-	 MODE13_INNER_LOOP_B	 
-	 MODE13_INNER_LOOP_B	 
-	 MODE13_INNER_LOOP_B	 
-	 MODE13_INNER_LOOP_B	 
-	 MODE13_INNER_LOOP_B	 
+	 MODE13_INNER_LOOP_B
+	 MODE13_INNER_LOOP_B
+	 MODE13_INNER_LOOP_B
+	 MODE13_INNER_LOOP_B
+	 MODE13_INNER_LOOP_B
+	 MODE13_INNER_LOOP_B
+	 MODE13_INNER_LOOP_B
+	 MODE13_INNER_LOOP_B
+	 MODE13_INNER_LOOP_B
+	 MODE13_INNER_LOOP_B
 	 "	bne mode13InnerLoopB"     "\n"
-	 
-	 "mov %[scanline], r10"    "\n"
+
+	 "subs %[scanline], 220"    "\n"
+	 "subs %[scanline], 224"    "\n"
 	 "movs %[t], 1"              "\n"
 	 "movs %[c], 88"             "\n"
-	 "add %[y], %[t]"            "\n" // y++... derpy, but it's the outer loop
+	 "add %[y], %[t]"            "\n"
 	 "cmp %[y], %[c]"            "\n"
 	 "bne mode13OuterLoop"       "\n" // if y != 88, loop
-	 
+
 	 : // outputs
 	   [c]"+l" (c),
 	   [t]"+l" (t),
 	   [x]"+l" (x),
 	   [y]"+h" (y),  // +:Read-Write l:lower (0-7) register
-	   [scrbuf]"+l" (scrbuf)
-	   
+	   [scrbuf]"+l" (scrbuf),
+	   [offset]"+l" (offset)
+
 	 : // inputs
 	   [LCD]"l" (0xA0002188),
 	   [scanline]"l" (scanline),
-	   [paletteptr]"l" (paletteptr),
-	   [offset]"l" (offset)
+	   [paletteptr]"l" (paletteptr)
+
 	 : // clobbers
-	   "cc", "r10", "r11"
+	   "cc", "r10"
        );
 
 #else
    uint8_t* d = scrbuf;// point to beginning of line in data
    for(y=0;y<88;y++){
-     
+
      uint32_t* s = scanline;
-     
+
      for(x=0;x<110;x+=10){
-       *LCD = *s = paletteptr[(*d + offset)&255]<<3; TGL_WR_OP(s++);TGL_WR_OP(d++);	
-       *LCD = *s = paletteptr[(*d + offset)&255]<<3; TGL_WR_OP(s++);TGL_WR_OP(d++);	
-       *LCD = *s = paletteptr[(*d + offset)&255]<<3; TGL_WR_OP(s++);TGL_WR_OP(d++);	
-       *LCD = *s = paletteptr[(*d + offset)&255]<<3; TGL_WR_OP(s++);TGL_WR_OP(d++);	
-       *LCD = *s = paletteptr[(*d + offset)&255]<<3; TGL_WR_OP(s++);TGL_WR_OP(d++);	
-       *LCD = *s = paletteptr[(*d + offset)&255]<<3; TGL_WR_OP(s++);TGL_WR_OP(d++);	
-       *LCD = *s = paletteptr[(*d + offset)&255]<<3; TGL_WR_OP(s++);TGL_WR_OP(d++);	
-       *LCD = *s = paletteptr[(*d + offset)&255]<<3; TGL_WR_OP(s++);TGL_WR_OP(d++);	
-       *LCD = *s = paletteptr[(*d + offset)&255]<<3; TGL_WR_OP(s++);TGL_WR_OP(d++);	
-       *LCD = *s = paletteptr[(*d + offset)&255]<<3; TGL_WR_OP(s++);TGL_WR_OP(d++);	
+       *LCD = *s = paletteptr[(*d + offset)&255]<<3; TGL_WR_OP(s++);TGL_WR_OP(d++);
+       *LCD = *s = paletteptr[(*d + offset)&255]<<3; TGL_WR_OP(s++);TGL_WR_OP(d++);
+       *LCD = *s = paletteptr[(*d + offset)&255]<<3; TGL_WR_OP(s++);TGL_WR_OP(d++);
+       *LCD = *s = paletteptr[(*d + offset)&255]<<3; TGL_WR_OP(s++);TGL_WR_OP(d++);
+       *LCD = *s = paletteptr[(*d + offset)&255]<<3; TGL_WR_OP(s++);TGL_WR_OP(d++);
+       *LCD = *s = paletteptr[(*d + offset)&255]<<3; TGL_WR_OP(s++);TGL_WR_OP(d++);
+       *LCD = *s = paletteptr[(*d + offset)&255]<<3; TGL_WR_OP(s++);TGL_WR_OP(d++);
+       *LCD = *s = paletteptr[(*d + offset)&255]<<3; TGL_WR_OP(s++);TGL_WR_OP(d++);
+       *LCD = *s = paletteptr[(*d + offset)&255]<<3; TGL_WR_OP(s++);TGL_WR_OP(d++);
+       *LCD = *s = paletteptr[(*d + offset)&255]<<3; TGL_WR_OP(s++);TGL_WR_OP(d++);
      }
-     
+
      s = scanline;
      uint32_t c = *s;
      for(x=0;x<110;x+=10){
@@ -1843,13 +1988,113 @@
        *LCD = c; TGL_WR_OP(s++);TGL_WR_OP(c=*s);
        *LCD = c; TGL_WR_OP(s++);TGL_WR_OP(c=*s);
      }
-   
+
    }
 #endif
  }
 
+
+
+ void Pokitto::lcdRefreshMode64( uint8_t * scrbuf, uint16_t* paletteptr ){
+   uint8_t *end = &scrbuf[ POK_SCREENBUFFERSIZE+4 ];
+   write_command_16(0x03); write_data_16(0x1038);
+   write_command(0x20); write_data(0);
+#ifdef PROJ_SHOW_FPS_COUNTER
+  write_data(8);
+  scrbuf += 110*8;
+#else
+  write_data(0);
+#endif
+   write_command(0x21); write_data(0);
+   write_command(0x22);
+   CLR_CS_SET_CD_RD_WR;
+   SET_MASK_P2;
+
+   uint32_t TGL = 1<<12, CLR = 252, c, t;
+#ifndef __ARMCC_VERSION
+   asm volatile(
+	 ".syntax unified"         "\n"
+	 "ldm %[scrbuf]!, {%[c]}" "\n"
+	 "lsls %[t], %[c], 24" 			"\n"
+	 "mode64loop%=:"    "\n"
+	 "lsrs %[c], %[c], 8" 			"\n"
+	 "lsrs %[t], %[t], 23" 			"\n"
+	 "ldrh %[t], [%[paletteptr], %[t]]" 	"\n"
+	 "lsls %[t], 3" 			"\n"
+	 "str %[t], [%[LCD], 0]" 		"\n"
+	 "str %[TGL], [%[LCD], %[CLR]]" 	"\n"
+	 "lsls %[t], %[c], 24" 			"\n"
+	 "lsrs %[c], %[c], 8" 			"\n"
+	 "lsrs %[t], %[t], 23" 			"\n"
+	 "str %[TGL], [%[LCD], 124]" 		"\n"
+	 "str %[TGL], [%[LCD], %[CLR]]" 	"\n"
+	 "ldrh %[t], [%[paletteptr], %[t]]" 	"\n"
+	 "lsls %[t], 3" 			"\n"
+	 "str %[TGL], [%[LCD], 124]" 		"\n"
+
+	 "str %[t], [%[LCD], 0]" 		"\n"
+	 "str %[TGL], [%[LCD], %[CLR]]" 	"\n"
+	 "lsls %[t], %[c], 24" 			"\n"
+	 "lsrs %[t], %[t], 23" 			"\n"
+	 "str %[TGL], [%[LCD], 124]" 		"\n"
+	 "str %[TGL], [%[LCD], %[CLR]]" 	"\n"
+	 "ldrh %[t], [%[paletteptr], %[t]]" 	"\n"
+	 "lsls %[t], 3" 			"\n"
+	 "str %[TGL], [%[LCD], 124]" 		"\n"
+	 "str %[t], [%[LCD], 0]" 		"\n"
+	 "str %[TGL], [%[LCD], %[CLR]]" 	"\n"
+	 "lsrs %[c], %[c], 8" 			"\n"
+	 "lsls %[t], %[c], 1" 			"\n"
+	 "str %[TGL], [%[LCD], 124]" 		"\n"
+	 "str %[TGL], [%[LCD], %[CLR]]" 	"\n"
+	 "ldrh %[t], [%[paletteptr], %[t]]" 	"\n"
+	 "lsls %[t], 3" 			"\n"
+	 "str %[TGL], [%[LCD], 124]" 		"\n"
+
+	 "str %[t], [%[LCD], 0]" 		"\n"
+	 "str %[TGL], [%[LCD], %[CLR]]" 	"\n"
+	 "ldm %[scrbuf]!, {%[c]}" "\n"
+	 "str %[TGL], [%[LCD], 124]" 		"\n"
+	 "str %[TGL], [%[LCD], %[CLR]]" 	"\n"
+	 "lsls %[t], %[c], 24" 			"\n"
+	 "cmp %[scrbuf], %[end]" "\n"
+	 "str %[TGL], [%[LCD], 124]" 		"\n"
+
+	 "bne mode64loop%=" "\n"
+
+	 : // outputs
+	   [c]"+l" (c),
+	   [t]"+l" (t),
+	   [scrbuf]"+l" (scrbuf)
+
+	 : // inputs
+	   [CLR]"l" (CLR),
+	   [TGL]"l" (TGL),
+	   [LCD]"l" (0xA0002188),
+	   [paletteptr]"l" (paletteptr),
+	   [end]"h" (end)
+
+	 : // clobbers
+	   "cc"
+       );
+
+#else
+
+   c = uint32_t(paletteptr[(*scrbuf)&255])<<3;
+   while( scrbuf < end-4 ){
+       *LCD = c; TGL_WR_OP(scrbuf++);TGL_WR_OP( c = uint32_t(paletteptr[(*scrbuf)&255])<<3 );
+       *LCD = c; TGL_WR_OP(scrbuf++);TGL_WR_OP( c = uint32_t(paletteptr[(*scrbuf)&255])<<3 );
+       *LCD = c; TGL_WR_OP(scrbuf++);TGL_WR_OP( c = uint32_t(paletteptr[(*scrbuf)&255])<<3 );
+       *LCD = c; TGL_WR_OP(scrbuf++);TGL_WR_OP( c = uint32_t(paletteptr[(*scrbuf)&255])<<3 );
+   }
+   
+#endif
+ }
+
+ 
+ 
 void Pokitto::lcdRefreshMode14(uint8_t * scrbuf, uint16_t* paletteptr) {
-uint16_t x,y;
+uint16_t x,y,data,xptr;
 uint16_t scanline[176]; uint16_t* scptr;
 uint8_t *d;
 
@@ -1986,10 +2231,99 @@
 #else
 
 void Pokitto::lcdRefreshMode15(uint16_t* paletteptr, uint8_t* scrbuf){
-uint16_t x,y;
+//    #define __ARMCC_VERSION
+#ifndef __ARMCC_VERSION
+    
+#define MODE15_LOOP				\
+    "ands %[tmp], %[color]" "\n"		\
+	"lsrs %[tmp], 2" "\n"			\
+	"ldr %[tmp], [%[palette], %[tmp]]" "\n" \
+	"str %[tmp], [%[LCD]]" "\n"		\
+	"str %[WRBit], [%[LCD], %[CLR]]" "\n"	\
+	"movs %[tmp], 0x0F" "\n"		\
+	"ands %[tmp], %[color]" "\n"		\
+	"str %[WRBit], [%[LCD], 124]" "\n"	\
+	"lsls %[tmp], 2" "\n"			\
+	"ldr %[tmp], [%[palette], %[tmp]]" "\n" \
+	"str %[tmp], [%[LCD]]" "\n"		\
+	"str %[WRBit], [%[LCD], %[CLR]]" "\n"	\
+	"movs %[tmp], 0xF0" "\n"		\
+	"lsrs %[color], 8" "\n"			\
+	"str %[WRBit], [%[LCD], 124]" "\n"
+
+#define MODE15_ENDLOOP					\
+    "ands %[tmp], %[color]" "\n"			\
+	"lsrs %[tmp], 2" "\n"				\
+	"ldr %[tmp], [%[palette], %[tmp]]" "\n"		\
+	"str %[tmp], [%[LCD]]" "\n"			\
+	"str %[WRBit], [%[LCD], %[CLR]]" "\n"		\
+	"movs %[tmp], 0x0F" "\n"			\
+	"ands %[tmp], %[color]" "\n"			\
+	"str %[WRBit], [%[LCD], 124]" "\n"		\
+	"lsls %[tmp], 2" "\n"				\
+	"ldr %[tmp], [%[palette], %[tmp]]" "\n"		\
+	"str %[tmp], [%[LCD]]" "\n"			\
+	"str %[WRBit], [%[LCD], %[CLR]]" "\n"		\
+	"ldm %[scrbuf]!, {%[color]}" "\n"		\
+	"movs %[tmp], 0xF0" "\n"			\
+	"str %[WRBit], [%[LCD], 124]" "\n"
+    
+  uint8_t *end=&scrbuf[POK_SCREENBUFFERSIZE]+4;
+  volatile uint32_t palette[16];
+  for( uint32_t i=0; i<16; ++i )
+      palette[i] = uint32_t(paletteptr[i]) << 3;
+
+  write_command(0x03); write_data(0x1038);
+  write_command(0x21);  // Vertical DRAM Address
+  write_data(0);
+  write_command(0x20);  // Horizontal DRAM Address
+#ifdef PROJ_SHOW_FPS_COUNTER
+  write_data(8);
+  scrbuf += 110*8;
+#else
+  write_data(0);
+#endif
+  write_command(0x22); // write data to DRAM
+  CLR_CS_SET_CD_RD_WR;
+
+
+  SET_MASK_P2;
+
+  uint32_t WRBit = 1<<12, color, tmp;
+  asm volatile(
+      ".syntax unified" "\n"
+      "ldm %[scrbuf]!, {%[color]}" "\n"      
+      "movs %[tmp], 0xF0" "\n"
+      "mode15Loop%=:" "\n"
+      MODE15_LOOP
+      MODE15_LOOP
+      MODE15_LOOP
+      MODE15_ENDLOOP      
+      "cmp %[end], %[scrbuf]" "\n"
+      "bne mode15Loop%=" "\n"
+      :
+      [tmp]"+l" (tmp),
+      [color]"+l" (color),
+      [end]"+h" (end),
+      [scrbuf]"+l" (scrbuf),
+      [WRBit]"+l" (WRBit)
+      
+      :
+      [CLR]"l" (252),
+      [LCD]"l" (0xA0002188),
+      [palette]"l" (palette)
+      
+      :
+      "cc"
+      );
+    
+#else
+    
+uint16_t x,y,xptr;
 uint16_t scanline[2][176]; // read two nibbles = pixels at a time
-uint8_t *d;
+uint8_t *d, yoffset=0;
 
+xptr = 0;
 //setDRAMptr(xptr,yoffset);
 
 write_command(0x20); write_data(0);
@@ -2042,11 +2376,130 @@
         setup_data_16(scanline[1][s++]);CLR_WR;SET_WR;
     }
   }
+
+#endif
+
 }
 #endif //ADEKTOSMODE15
 
+void Pokitto::lcdRefreshMixMode(const uint8_t * screenBuffer, const uint16_t * palettePointer, const uint8_t * scanType)
+{
+	write_command(0x03);
+	write_data(0x1038);
+
+	// Horizontal DRAM Address
+	write_command(0x20);
+	write_data(0);
+
+	// Vertical DRAM Address
+	write_command(0x21);
+	write_data(0);
+
+	// write data to DRAM
+	write_command(0x22);
+	CLR_CS_SET_CD_RD_WR;
+	SET_MASK_P2;
+
+	uint32_t scanline[220];
+
+	// point to beginning of line in data
+	const uint8_t * d = screenBuffer;
+	for(uint32_t y = 0; y < 176; ++y)
+	{
+		// find colours in one scanline
+		uint8_t scanTypeIndex = y >> 1;
+		uint8_t lineIndex = 0;
+		switch(scanType[scanTypeIndex])
+		{
+			case 0:
+			{
+				// point to beginning of line in data
+				d = &screenBuffer[110 * scanTypeIndex];
+				for(uint8_t x = 0; x < (220 / 2); ++x)
+				{
+					uint32_t color = static_cast<uint32_t>(palettePointer[*d]) << 3;
+					++d;
+					scanline[lineIndex] = color;
+					++lineIndex;
+					scanline[lineIndex] = color;
+					++lineIndex;
+				}
+				break;
+			}
+			case 1:
+			{
+				for(uint8_t x = 0; x < (220 / 4); ++x)
+				{
+					uint8_t t = *d;
+					++d;
+
+					uint32_t color1 = static_cast<uint32_t>(palettePointer[256 + (t >> 4)]) << 3;
+					scanline[lineIndex] = color1;
+					++lineIndex;
+					scanline[lineIndex] = color1;
+					++lineIndex;
+
+					uint32_t color2 = static_cast<uint32_t>(palettePointer[256 + (t & 0xF)]) << 3;
+					scanline[lineIndex] = color2;
+					++lineIndex;
+					scanline[lineIndex] = color2;
+					++lineIndex;
+				}
+				break;
+			}
+			case 2:
+			{
+				for(uint8_t x = 0; x < (220 / 4); ++x)
+				{
+					uint8_t t = *d;
+					++d;
+
+					scanline[lineIndex] = static_cast<uint32_t>(palettePointer[272 + ((t >> 6) & 0x03)]) << 3;
+					++lineIndex;
+
+					scanline[lineIndex] = static_cast<uint32_t>(palettePointer[272 + ((t >> 4) & 0x03)]) << 3;
+					++lineIndex;
+
+					scanline[lineIndex] = static_cast<uint32_t>(palettePointer[272 + ((t >> 2) & 0x03)]) << 3;
+					++lineIndex;
+
+					scanline[lineIndex] = static_cast<uint32_t>(palettePointer[272 + ((t >> 0) & 0x03)]) << 3;
+					++lineIndex;
+				}
+				break;
+			}
+		}
+
+        uint32_t color = scanline[0];
+        #define WRITE_SCANLINE *LCD = color; TGL_WR_OP(color = scanline[++i]);
+
+		volatile uint32_t * LCD = reinterpret_cast< volatile uint32_t * >(0xA0002188);
+		for (uint8_t i = 0; i < 220;)
+		{
+			WRITE_SCANLINE	WRITE_SCANLINE	WRITE_SCANLINE	WRITE_SCANLINE
+			WRITE_SCANLINE	WRITE_SCANLINE	WRITE_SCANLINE	WRITE_SCANLINE
+			WRITE_SCANLINE  WRITE_SCANLINE	WRITE_SCANLINE
+			WRITE_SCANLINE	WRITE_SCANLINE	WRITE_SCANLINE	WRITE_SCANLINE
+			WRITE_SCANLINE	WRITE_SCANLINE	WRITE_SCANLINE	WRITE_SCANLINE
+			WRITE_SCANLINE  WRITE_SCANLINE	WRITE_SCANLINE
+			WRITE_SCANLINE	WRITE_SCANLINE	WRITE_SCANLINE	WRITE_SCANLINE
+			WRITE_SCANLINE	WRITE_SCANLINE	WRITE_SCANLINE	WRITE_SCANLINE
+			WRITE_SCANLINE  WRITE_SCANLINE	WRITE_SCANLINE
+			WRITE_SCANLINE	WRITE_SCANLINE	WRITE_SCANLINE	WRITE_SCANLINE
+			WRITE_SCANLINE	WRITE_SCANLINE	WRITE_SCANLINE	WRITE_SCANLINE
+			WRITE_SCANLINE  WRITE_SCANLINE	WRITE_SCANLINE
+		}
+
+		#undef WRITE_SCANLINE
+	}
+
+	CLR_MASK_P2;
+}
+
+
 void Pokitto::blitWord(uint16_t c) {
     setup_data_16(c);CLR_WR;SET_WR;
 }
 
 
+
--- a/POKITTO_HW/HWLCD.h	Tue Oct 23 16:21:01 2018 +0000
+++ b/POKITTO_HW/HWLCD.h	Sat Mar 23 19:22:35 2019 +0000
@@ -81,6 +81,8 @@
 extern void lcdRefreshMode3(uint8_t *, uint16_t*);
 extern void lcdRefreshModeGBC(uint8_t *, uint16_t*);
 extern void lcdRefreshMode13(uint8_t *, uint16_t*, uint8_t);
+extern void lcdRefreshMixMode(const uint8_t *, const uint16_t*, const uint8_t*);
+extern void lcdRefreshMode64(uint8_t *, uint16_t*);
 
 extern void lcdRefreshMode15(uint16_t*, uint8_t*);
 
@@ -147,14 +149,14 @@
 #define CLR_MASK_P2 LPC_GPIO_PORT->MASK[2] = 0; // all on
 
 #define TGL_WR_OP(OP)							\
-  LPC_GPIO_PORT->SET[LCD_WR_PORT] = 1 << LCD_WR_PIN;			\
+  LPC_GPIO_PORT->CLR[LCD_WR_PORT] = 1 << LCD_WR_PIN;			\
   OP;									\
-  LPC_GPIO_PORT->CLR[LCD_WR_PORT] = 1 << LCD_WR_PIN;
+  LPC_GPIO_PORT->SET[LCD_WR_PORT] = 1 << LCD_WR_PIN;			
 
 #define TGL_WR								\
-  LPC_GPIO_PORT->SET[LCD_WR_PORT] = 1 << LCD_WR_PIN;			\
-  __asm("nop"); \
-  LPC_GPIO_PORT->CLR[LCD_WR_PORT] = 1 << LCD_WR_PIN;
+  LPC_GPIO_PORT->CLR[LCD_WR_PORT] = 1 << LCD_WR_PIN;			\
+  __asm("nop");								\
+  LPC_GPIO_PORT->SET[LCD_WR_PORT] = 1 << LCD_WR_PIN;			
 
 /**************************************************************************/
 /**                          SETUP GPIO & DATA                           **/