PokittoLib is the library needed for programming the Pokitto DIY game console (www.pokitto.com)

Revision:
51:113b1d84c34f
Parent:
48:30b068b0d9e8
Child:
60:8b6a110feeea
diff -r ca94812a17b6 -r 113b1d84c34f POKITTO_HW/HWLCD.cpp
--- a/POKITTO_HW/HWLCD.cpp	Wed Jun 27 13:14:54 2018 +0000
+++ b/POKITTO_HW/HWLCD.cpp	Sun Jul 01 06:32:10 2018 +0000
@@ -37,10 +37,8 @@
 #include "HWLCD.h" //HWLCD.h" #include "HWLCD.h"
 #include "Pokitto_settings.h"
 
-#ifndef DISABLEAVRMIN
-#define max(a,b) ((a)>(b)?(a):(b))
-#define min(a,b) ((a)<(b)?(a):(b))
-#endif // DISABLEAVRMIN
+#define avrmax(a,b) ((a)>(b)?(a):(b))
+#define avrmin(a,b) ((a)<(b)?(a):(b))
 
 #ifdef DISABLEAVRMIN
 #include <algorithm>
@@ -59,6 +57,7 @@
     pwmout_t backlightpwm;
 #endif
 
+volatile uint32_t *LCD = reinterpret_cast< volatile uint32_t * >(0xA0002188);
 
 /**************************************************************************/
 /*!
@@ -749,8 +748,8 @@
                     continue;
 
                 // Detect the dirty rect x-span by combining the previous and current sprite position.
-                int16_t sprDirtyXMin = min(sprx, sprOldX);
-                int16_t sprDirtyXMax = max(sprx, sprOldX);
+                int16_t sprDirtyXMin = avrmin(sprx, sprOldX);
+                int16_t sprDirtyXMax = avrmax(sprx, sprOldX);
                 if (isCurrentSpriteOutOfScreen)
                     sprDirtyXMax = sprOldX;
                 if (isOldSpriteOutOfScreen)
@@ -763,15 +762,15 @@
                     // *** COMBINE DIRTY RECTS FOR THIS SCANLINE GROUP ***
 
                     // Dirty rect
-                    int sprDirtyYMin = min(spry, sprOldY);
-                    sprDirtyYMin = max((int)sprDirtyYMin, 0);
-                    int sprDirtyYMax = max(spry, sprOldY);
+                    int sprDirtyYMin = avrmin(spry, sprOldY);
+                    sprDirtyYMin = avrmax((int)sprDirtyYMin, 0);
+                    int sprDirtyYMax = avrmax(spry, sprOldY);
                     if (isCurrentSpriteOutOfScreen)
                         sprDirtyYMax = sprOldY;
                     if (isOldSpriteOutOfScreen)
                         sprDirtyYMax = spry;
                     int sprDirtyYMaxEnd = sprDirtyYMax + sprh - 1;
-                    sprDirtyYMaxEnd = min(sprDirtyYMaxEnd, LCDHEIGHT - 1);  // Should use LCDHEIGHT instead of screenH? Same with other screen* ?
+                    sprDirtyYMaxEnd = avrmin(sprDirtyYMaxEnd, LCDHEIGHT - 1);  // Should use LCDHEIGHT instead of screenH? Same with other screen* ?
 
                     // Get the scanline min and max y values for drawing
                     if (sprDirtyYMin < scanlineMinY)
@@ -948,91 +947,160 @@
     #endif
 }
 
-void Pokitto::lcdRefreshMode2(uint8_t * scrbuf, uint16_t* paletteptr) {
-uint16_t x,y;
-uint16_t scanline[2][88]; // read two nibbles = pixels at a time
-uint8_t *d;
+  
+#define MODE2_INNER_LOOP_B				\
+  "	ldm %[scanline]!, {%[c]}"   "\n"		\
+	       "	str %[c], [%[LCD], 0]"    "\n"	\
+	       "	str %[t], [%[LCD], 124]"  "\n"	\
+	       "	movs %[c], 252"   "\n"		\
+	       "	str %[t], [%[LCD], %[c]]" "\n"	\
+	       "	str %[t], [%[LCD], 124]"  "\n"	\
+	       "	subs %[x], 1"             "\n"	\
+	       "	str %[t], [%[LCD], %[c]]" "\n"	\
+    
+
+void Pokitto::lcdRefreshMode2(uint8_t * scrbuf, uint16_t* paletteptr ) {
+  uint32_t x,y,byte,c,t=1<<12;
+  uint32_t scanline[110];
+
+  write_command(0x03); write_data(0x1038);
+  write_command(0x20);  // Horizontal DRAM Address
+  write_data(0);  // 0
+  write_command(0x21);  // Vertical DRAM Address
+  write_data(1);
+  write_command(0x22); // write data to DRAM
+  CLR_CS_SET_CD_RD_WR;
+  SET_MASK_P2;
+
+#ifndef __ARMCC_VERSION
+   asm volatile(
+	 ".syntax unified"         "\n"
+	 
+	 "mov r10, %[scanline]"    "\n"
+	 "mov r11, %[t]"           "\n"
+	 
+	 "mode2OuterLoop:"        "\n"
+	 
+	 "movs %[x], 110"          "\n"
+	 "mode2InnerLoopA:"
+
+
+	 "	ldrb %[byte], [%[scrbuf],0]"   "\n"				
+	 "	lsrs %[c], %[byte], 4"    "\n"
+
+	 "	movs %[t], 15" "\n"
+	 "	ands %[byte], %[t]"    "\n"		
+	 
+	 "	lsls %[c], 1"             "\n"			
+	 "	ldrh %[t], [%[paletteptr], %[c]]"      "\n"	
+	 "	lsls %[t], %[t], 3"       "\n"			
+	 "	str %[t], [%[LCD], 0]"    "\n"			
+	 "	mov %[c], r11" "\n"				
+	 "	str %[c], [%[LCD], 124]"  "\n"			
+	 "	stm %[scanline]!, {%[t]}" "\n"			
+	 "	movs %[t], 252"   "\n"				
+	 "	str %[c], [%[LCD], %[t]]" "\n"			
+	 "	str %[c], [%[LCD], 124]"  "\n"			
+	 "	lsls %[byte], %[byte], 1"             "\n"			
+	 "	str %[c], [%[LCD], %[t]]" "\n"
 
-write_command(0x20);  // Horizontal DRAM Address
-write_data(0);  // 0
-write_command(0x21);  // Vertical DRAM Address
-write_data(0);
-write_command(0x22); // write data to DRAM
-CLR_CS_SET_CD_RD_WR;
+	 "	ldrh %[t], [%[paletteptr], %[byte]]"      "\n"	
+	 "	lsls %[t], %[t], 3"       "\n"			
+	 "	str %[t], [%[LCD], 0]"    "\n"			
+	 "	mov %[c], r11" "\n"				
+	 "	str %[c], [%[LCD], 124]"  "\n"			
+	 "	stm %[scanline]!, {%[t]}" "\n"			
+	 "	movs %[t], 252"   "\n"				
+	 "	str %[c], [%[LCD], %[t]]" "\n"			
+	 "	str %[c], [%[LCD], 124]"  "\n"			
+	 "	adds %[scrbuf], %[scrbuf], 1" "\n"		
+	 "	str %[c], [%[LCD], %[t]]" "\n"
+	 
+	 "	subs %[x], 2"          "\n"	
+	 "	bne mode2InnerLoopA"  "\n"
 
-for(x=0;x<110;x+=2)
+	 "mov %[scanline], r10"    "\n"
+	 "movs %[x], 110"          "\n"
+	 "mov %[t], r11"           "\n"
+	 "mode2InnerLoopB:"
+	 MODE2_INNER_LOOP_B	 
+	 MODE2_INNER_LOOP_B	 
+	 MODE2_INNER_LOOP_B	 
+	 MODE2_INNER_LOOP_B	 
+	 MODE2_INNER_LOOP_B	 
+	 MODE2_INNER_LOOP_B	 
+	 MODE2_INNER_LOOP_B	 
+	 MODE2_INNER_LOOP_B	 
+	 MODE2_INNER_LOOP_B	 
+	 MODE2_INNER_LOOP_B	 
+	 "	bne mode2InnerLoopB"     "\n"
+	 
+	 "mov %[scanline], r10"    "\n"
+	 "movs %[t], 1"              "\n"
+	 "movs %[c], 88"             "\n"
+	 "add %[y], %[t]"            "\n" // y++... derpy, but it's the outer loop
+	 "cmp %[y], %[c]"            "\n"
+	 "bne mode2OuterLoop"       "\n" // if y != 88, loop
+	 
+	 : // outputs
+	   [c]"+l" (c),
+	   [t]"+l" (t),
+	   [x]"+l" (x),
+	   [y]"+h" (y),  // +:Read-Write l:lower (0-7) register
+	   [scrbuf]"+l" (scrbuf)
+	   
+	 : // inputs
+	   [LCD]"l" (0xA0002188),
+	   [scanline]"l" (scanline),
+	   [paletteptr]"l" (paletteptr),
+	   [byte]"l" (byte)
+	 : // clobbers
+	   "cc", "r10", "r11"
+       );
+
+ 
+#else
+uint8_t* d = scrbuf;// point to beginning of line in data
+
+  #ifdef PROJ_SHOW_FPS_COUNTER
+  setDRAMptr(0, 8);
+  wait_us(200); // Add wait to compensate skipping of 8 lines. Makes FPS counter to show the correct value.
+  for(y=4;y<88;y++)
+  #else
+  for(y=0;y<88;y++)
+  #endif
   {
-    d = scrbuf+(x>>1);// point to beginning of line in data
-    /** find colours in one scanline **/
+
+
     uint8_t s=0;
-    for(y=0;y<88;y++)
+    for(x=0;x<110;x+=2)
     {
-    uint8_t t = *d >> 4; // higher nibble
-    uint8_t t2 = *d & 0xF; // lower nibble
-    /** higher nibble = left pixel in pixel pair **/
-    scanline[0][s] = paletteptr[t];
-    scanline[1][s++] = paletteptr[t2];
-    /** testing only **/
-    //scanline[0][s] = 0xFFFF*(s&1);
-    //scanline[1][s] = 0xFFFF*(!(s&1));
-    //s++;
-    /** until here **/
-    d+=110/2; // jump to read byte directly below in screenbuffer
-    }
-    s=0;
-    /** draw scanlines **/
-    /** leftmost scanline twice**/
-
-    #ifdef PROJ_SHOW_FPS_COUNTER
-    if (x<4) continue;
-    setDRAMptr(x<<1, 0);
-    #endif
-
-    for (s=0;s<88;) {
-        setup_data_16(scanline[0][s++]);CLR_WR;SET_WR;CLR_WR;SET_WR;
-        setup_data_16(scanline[0][s++]);CLR_WR;SET_WR;CLR_WR;SET_WR;
-        setup_data_16(scanline[0][s++]);CLR_WR;SET_WR;CLR_WR;SET_WR;
-        setup_data_16(scanline[0][s++]);CLR_WR;SET_WR;CLR_WR;SET_WR;
-        setup_data_16(scanline[0][s++]);CLR_WR;SET_WR;CLR_WR;SET_WR;
-        setup_data_16(scanline[0][s++]);CLR_WR;SET_WR;CLR_WR;SET_WR;
-        setup_data_16(scanline[0][s++]);CLR_WR;SET_WR;CLR_WR;SET_WR;
-        setup_data_16(scanline[0][s++]);CLR_WR;SET_WR;CLR_WR;SET_WR;
+      uint8_t t = *d++;
+      uint32_t color;
+      color = uint32_t(paletteptr[t>>4])<<3;
+      scanline[s]=*LCD=color;TGL_WR_OP(s++);TGL_WR;
+      color = uint32_t(paletteptr[t&0xF])<<3;
+      scanline[s]=*LCD=color;TGL_WR_OP(s++);TGL_WR;
     }
 
-    for (s=0;s<88;) {
-        setup_data_16(scanline[0][s++]);CLR_WR;SET_WR;CLR_WR;SET_WR;
-        setup_data_16(scanline[0][s++]);CLR_WR;SET_WR;CLR_WR;SET_WR;
-        setup_data_16(scanline[0][s++]);CLR_WR;SET_WR;CLR_WR;SET_WR;
-        setup_data_16(scanline[0][s++]);CLR_WR;SET_WR;CLR_WR;SET_WR;
-        setup_data_16(scanline[0][s++]);CLR_WR;SET_WR;CLR_WR;SET_WR;
-        setup_data_16(scanline[0][s++]);CLR_WR;SET_WR;CLR_WR;SET_WR;
-        setup_data_16(scanline[0][s++]);CLR_WR;SET_WR;CLR_WR;SET_WR;
-        setup_data_16(scanline[0][s++]);CLR_WR;SET_WR;CLR_WR;SET_WR;
-    }
-    /** rightmost scanline twice**/
-    //setDRAMptr(xptr++,yoffset);
-    for (s=0;s<88;) {
-        setup_data_16(scanline[1][s++]);CLR_WR;SET_WR;CLR_WR;SET_WR;
-        setup_data_16(scanline[1][s++]);CLR_WR;SET_WR;CLR_WR;SET_WR;
-        setup_data_16(scanline[1][s++]);CLR_WR;SET_WR;CLR_WR;SET_WR;
-        setup_data_16(scanline[1][s++]);CLR_WR;SET_WR;CLR_WR;SET_WR;
-        setup_data_16(scanline[1][s++]);CLR_WR;SET_WR;CLR_WR;SET_WR;
-        setup_data_16(scanline[1][s++]);CLR_WR;SET_WR;CLR_WR;SET_WR;
-        setup_data_16(scanline[1][s++]);CLR_WR;SET_WR;CLR_WR;SET_WR;
-        setup_data_16(scanline[1][s++]);CLR_WR;SET_WR;CLR_WR;SET_WR;
+    s=0;
+    for (s=0;s<110;) {
+      *LCD = (scanline[s]);TGL_WR_OP(s++);TGL_WR;
+      *LCD = (scanline[s]);TGL_WR_OP(s++);TGL_WR;
+      *LCD = (scanline[s]);TGL_WR_OP(s++);TGL_WR;
+      *LCD = (scanline[s]);TGL_WR_OP(s++);TGL_WR;
+      *LCD = (scanline[s]);TGL_WR_OP(s++);TGL_WR;
+      *LCD = (scanline[s]);TGL_WR_OP(s++);TGL_WR;
+      *LCD = (scanline[s]);TGL_WR_OP(s++);TGL_WR;
+      *LCD = (scanline[s]);TGL_WR_OP(s++);TGL_WR;
+      *LCD = (scanline[s]);TGL_WR_OP(s++);TGL_WR;
+      *LCD = (scanline[s]);TGL_WR_OP(s++);TGL_WR;
     }
 
-    for (s=0;s<88;) {
-        setup_data_16(scanline[1][s++]);CLR_WR;SET_WR;CLR_WR;SET_WR;
-        setup_data_16(scanline[1][s++]);CLR_WR;SET_WR;CLR_WR;SET_WR;
-        setup_data_16(scanline[1][s++]);CLR_WR;SET_WR;CLR_WR;SET_WR;
-        setup_data_16(scanline[1][s++]);CLR_WR;SET_WR;CLR_WR;SET_WR;
-        setup_data_16(scanline[1][s++]);CLR_WR;SET_WR;CLR_WR;SET_WR;
-        setup_data_16(scanline[1][s++]);CLR_WR;SET_WR;CLR_WR;SET_WR;
-        setup_data_16(scanline[1][s++]);CLR_WR;SET_WR;CLR_WR;SET_WR;
-        setup_data_16(scanline[1][s++]);CLR_WR;SET_WR;CLR_WR;SET_WR;
-    }
   }
+#endif
+
+ CLR_MASK_P2;
 }
 
 void Pokitto::lcdRefreshMode3(uint8_t * scrbuf, uint16_t* paletteptr) {
@@ -1646,75 +1714,141 @@
     #endif
 }
 
+#define MODE13_INNER_LOOP_A						\
+  "	ldrb %[t], [%[scrbuf],0]"   "\n"				\
+	       "	add %[t], %[t], %[offset]"  "\n"		\
+	       "	uxtb %[c], %[t] " "\n"				\
+	       "	lsls %[c], 1"             "\n"			\
+	       "	ldrh %[t], [%[paletteptr], %[c]]"      "\n"	\
+	       "	lsls %[t], %[t], 3"       "\n"			\
+	       "	str %[t], [%[LCD], 0]"    "\n"			\
+	       "	mov %[c], r11" "\n"				\
+	       "	str %[c], [%[LCD], 124]"  "\n"			\
+	       "	stm %[scanline]!, {%[t]}" "\n"			\
+	       "	movs %[t], 252"   "\n"				\
+	       "	str %[c], [%[LCD], %[t]]" "\n"			\
+	       "	str %[c], [%[LCD], 124]"  "\n"			\
+	       "	adds %[scrbuf], %[scrbuf], 1" "\n"		\
+	       "	str %[c], [%[LCD], %[t]]" "\n"
+  
+#define MODE13_INNER_LOOP_B				\
+  "	ldm %[scanline]!, {%[c]}"   "\n"		\
+	       "	str %[c], [%[LCD], 0]"    "\n"	\
+	       "	str %[t], [%[LCD], 124]"  "\n"	\
+	       "	movs %[c], 252"   "\n"		\
+	       "	str %[t], [%[LCD], %[c]]" "\n"	\
+	       "	str %[t], [%[LCD], 124]"  "\n"	\
+	       "	subs %[x], 1"             "\n"	\
+	       "	str %[t], [%[LCD], %[c]]" "\n"	\
 
-void Pokitto::lcdRefreshMode13(uint8_t * scrbuf, uint16_t* paletteptr, uint8_t offset){
-uint16_t x,y;
-uint16_t scanline[2][110]; // read two nibbles = pixels at a time
-uint8_t *d;
-
-write_command(0x20); write_data(0);
-write_command(0x21); write_data(0);
-write_command(0x22);
-CLR_CS_SET_CD_RD_WR;
+ 
+ void Pokitto::lcdRefreshMode13(uint8_t * scrbuf, uint16_t* paletteptr, uint8_t offset){
+   uint32_t scanline[110]; // read two nibbles = pixels at a time
+   
+   write_command_16(0x03); write_data_16(0x1038);
+   write_command(0x20); write_data(0);
+   write_command(0x21); write_data(1);
+   write_command(0x22);
+   CLR_CS_SET_CD_RD_WR;
+   SET_MASK_P2;
+   
+   uint32_t x, y=0, c, t;
+   
+#ifndef __ARMCC_VERSION
+   asm volatile(
+	 ".syntax unified"         "\n"
+	 
+	 "mov r10, %[scanline]"    "\n"
+	 
+	 "movs %[t], 1"            "\n"
+	 "lsls %[t], %[t], 12"     "\n"
+	 "mov r11, %[t]"           "\n"
+	 
+	 "mode13OuterLoop:"        "\n"
+	 
+	 "movs %[x], 110"          "\n"
+	 "mode13InnerLoopA:"
+	 MODE13_INNER_LOOP_A
+	 MODE13_INNER_LOOP_A
+	 "	subs %[x], 2"          "\n"	
+	 "	bne mode13InnerLoopA"  "\n"
 
-for(x=0;x<110;x+=2)
-  {
-    d = scrbuf+x;// point to beginning of line in data
-    uint8_t s=0;
-    for(y=0;y<88;y++)
-    {
-        uint8_t t = *d;
-        uint8_t t1 = *(d+1);
-        scanline[0][s] = paletteptr[(t+offset)&255];
-        scanline[1][s++] = paletteptr[(t1+offset)&255];
-        d+=110; // jump to read byte directly below in screenbuffer
-    }
-    s=0;
-    for (s=0;s<88;) {
-        setup_data_16(scanline[0][s++]);CLR_WR;SET_WR;CLR_WR;SET_WR;
-        setup_data_16(scanline[0][s++]);CLR_WR;SET_WR;CLR_WR;SET_WR;
-        setup_data_16(scanline[0][s++]);CLR_WR;SET_WR;CLR_WR;SET_WR;
-        setup_data_16(scanline[0][s++]);CLR_WR;SET_WR;CLR_WR;SET_WR;
-        setup_data_16(scanline[0][s++]);CLR_WR;SET_WR;CLR_WR;SET_WR;
-        setup_data_16(scanline[0][s++]);CLR_WR;SET_WR;CLR_WR;SET_WR;
-        setup_data_16(scanline[0][s++]);CLR_WR;SET_WR;CLR_WR;SET_WR;
-        setup_data_16(scanline[0][s++]);CLR_WR;SET_WR;CLR_WR;SET_WR;
-    }
-    for (s=0;s<88;) {
-        setup_data_16(scanline[0][s++]);CLR_WR;SET_WR;CLR_WR;SET_WR;
-        setup_data_16(scanline[0][s++]);CLR_WR;SET_WR;CLR_WR;SET_WR;
-        setup_data_16(scanline[0][s++]);CLR_WR;SET_WR;CLR_WR;SET_WR;
-        setup_data_16(scanline[0][s++]);CLR_WR;SET_WR;CLR_WR;SET_WR;
-        setup_data_16(scanline[0][s++]);CLR_WR;SET_WR;CLR_WR;SET_WR;
-        setup_data_16(scanline[0][s++]);CLR_WR;SET_WR;CLR_WR;SET_WR;
-        setup_data_16(scanline[0][s++]);CLR_WR;SET_WR;CLR_WR;SET_WR;
-        setup_data_16(scanline[0][s++]);CLR_WR;SET_WR;CLR_WR;SET_WR;
-    }
-    for (s=0;s<88;) {
-        setup_data_16(scanline[1][s++]);CLR_WR;SET_WR;CLR_WR;SET_WR;
-        setup_data_16(scanline[1][s++]);CLR_WR;SET_WR;CLR_WR;SET_WR;
-        setup_data_16(scanline[1][s++]);CLR_WR;SET_WR;CLR_WR;SET_WR;
-        setup_data_16(scanline[1][s++]);CLR_WR;SET_WR;CLR_WR;SET_WR;
-        setup_data_16(scanline[1][s++]);CLR_WR;SET_WR;CLR_WR;SET_WR;
-        setup_data_16(scanline[1][s++]);CLR_WR;SET_WR;CLR_WR;SET_WR;
-        setup_data_16(scanline[1][s++]);CLR_WR;SET_WR;CLR_WR;SET_WR;
-        setup_data_16(scanline[1][s++]);CLR_WR;SET_WR;CLR_WR;SET_WR;
-    }
-    for (s=0;s<88;) {
-        setup_data_16(scanline[1][s++]);CLR_WR;SET_WR;CLR_WR;SET_WR;
-        setup_data_16(scanline[1][s++]);CLR_WR;SET_WR;CLR_WR;SET_WR;
-        setup_data_16(scanline[1][s++]);CLR_WR;SET_WR;CLR_WR;SET_WR;
-        setup_data_16(scanline[1][s++]);CLR_WR;SET_WR;CLR_WR;SET_WR;
-        setup_data_16(scanline[1][s++]);CLR_WR;SET_WR;CLR_WR;SET_WR;
-        setup_data_16(scanline[1][s++]);CLR_WR;SET_WR;CLR_WR;SET_WR;
-        setup_data_16(scanline[1][s++]);CLR_WR;SET_WR;CLR_WR;SET_WR;
-        setup_data_16(scanline[1][s++]);CLR_WR;SET_WR;CLR_WR;SET_WR;
-    }
-  }
+	 "mov %[scanline], r10"    "\n"
+	 "movs %[x], 110"          "\n"
+	 "mov %[t], r11"           "\n"
+	 "mode13InnerLoopB:"
+	 MODE13_INNER_LOOP_B	 
+	 MODE13_INNER_LOOP_B	 
+	 MODE13_INNER_LOOP_B	 
+	 MODE13_INNER_LOOP_B	 
+	 MODE13_INNER_LOOP_B	 
+	 MODE13_INNER_LOOP_B	 
+	 MODE13_INNER_LOOP_B	 
+	 MODE13_INNER_LOOP_B	 
+	 MODE13_INNER_LOOP_B	 
+	 MODE13_INNER_LOOP_B	 
+	 "	bne mode13InnerLoopB"     "\n"
+	 
+	 "mov %[scanline], r10"    "\n"
+	 "movs %[t], 1"              "\n"
+	 "movs %[c], 88"             "\n"
+	 "add %[y], %[t]"            "\n" // y++... derpy, but it's the outer loop
+	 "cmp %[y], %[c]"            "\n"
+	 "bne mode13OuterLoop"       "\n" // if y != 88, loop
+	 
+	 : // outputs
+	   [c]"+l" (c),
+	   [t]"+l" (t),
+	   [x]"+l" (x),
+	   [y]"+h" (y),  // +:Read-Write l:lower (0-7) register
+	   [scrbuf]"+l" (scrbuf)
+	   
+	 : // inputs
+	   [LCD]"l" (0xA0002188),
+	   [scanline]"l" (scanline),
+	   [paletteptr]"l" (paletteptr),
+	   [offset]"l" (offset)
+	 : // clobbers
+	   "cc", "r10", "r11"
+       );
 
-}
-
-
+#else
+   uint8_t* d = scrbuf;// point to beginning of line in data
+   for(y=0;y<88;y++){
+     
+     uint32_t* s = scanline;
+     
+     for(x=0;x<110;x+=10){
+       *LCD = *s = paletteptr[(*d + offset)&255]<<3; TGL_WR_OP(s++);TGL_WR_OP(d++);	
+       *LCD = *s = paletteptr[(*d + offset)&255]<<3; TGL_WR_OP(s++);TGL_WR_OP(d++);	
+       *LCD = *s = paletteptr[(*d + offset)&255]<<3; TGL_WR_OP(s++);TGL_WR_OP(d++);	
+       *LCD = *s = paletteptr[(*d + offset)&255]<<3; TGL_WR_OP(s++);TGL_WR_OP(d++);	
+       *LCD = *s = paletteptr[(*d + offset)&255]<<3; TGL_WR_OP(s++);TGL_WR_OP(d++);	
+       *LCD = *s = paletteptr[(*d + offset)&255]<<3; TGL_WR_OP(s++);TGL_WR_OP(d++);	
+       *LCD = *s = paletteptr[(*d + offset)&255]<<3; TGL_WR_OP(s++);TGL_WR_OP(d++);	
+       *LCD = *s = paletteptr[(*d + offset)&255]<<3; TGL_WR_OP(s++);TGL_WR_OP(d++);	
+       *LCD = *s = paletteptr[(*d + offset)&255]<<3; TGL_WR_OP(s++);TGL_WR_OP(d++);	
+       *LCD = *s = paletteptr[(*d + offset)&255]<<3; TGL_WR_OP(s++);TGL_WR_OP(d++);	
+     }
+     
+     s = scanline;
+     uint32_t c = *s;
+     for(x=0;x<110;x+=10){
+       *LCD = c; TGL_WR_OP(s++);TGL_WR_OP(c=*s);
+       *LCD = c; TGL_WR_OP(s++);TGL_WR_OP(c=*s);
+       *LCD = c; TGL_WR_OP(s++);TGL_WR_OP(c=*s);
+       *LCD = c; TGL_WR_OP(s++);TGL_WR_OP(c=*s);
+       *LCD = c; TGL_WR_OP(s++);TGL_WR_OP(c=*s);
+       *LCD = c; TGL_WR_OP(s++);TGL_WR_OP(c=*s);
+       *LCD = c; TGL_WR_OP(s++);TGL_WR_OP(c=*s);
+       *LCD = c; TGL_WR_OP(s++);TGL_WR_OP(c=*s);
+       *LCD = c; TGL_WR_OP(s++);TGL_WR_OP(c=*s);
+       *LCD = c; TGL_WR_OP(s++);TGL_WR_OP(c=*s);
+     }
+   
+   }
+#endif
+ }
 
 void Pokitto::lcdRefreshMode14(uint8_t * scrbuf, uint16_t* paletteptr) {
 uint16_t x,y,data,xptr;