PokittoLib is the library needed for programming the Pokitto DIY game console (www.pokitto.com)

Dependents:   YATTT sd_map_test cPong SnowDemo ... more

PokittoLib

Library for programming Pokitto hardware

How to Use

  1. Import this library to online compiler (see button "import" on the right hand side
  2. DO NOT import mbed-src anymore, a better version is now included inside PokittoLib
  3. Change My_settings.h according to your project
  4. Start coding!
Revision:
64:1d52d8287c39
Parent:
60:8b6a110feeea
Child:
71:531419862202
--- a/POKITTO_HW/HWLCD.cpp	Sun Oct 07 10:19:52 2018 +0000
+++ b/POKITTO_HW/HWLCD.cpp	Sat Mar 23 19:19:23 2019 +0000
@@ -453,11 +453,132 @@
  * @param updRectH The update rect.
  * @param paletteptr The screen palette.
 */
+
+
+#define MODE1_LOOP					\
+  "	adds %[t], %[palette]"			"\n"	\
+  "	ldm %[t], {%[t], %[x]}"			"\n"	\
+  "	str %[t], [%[LCD], 0]"			"\n"	\
+  "	movs %[t], 252"	"\n"				\
+  "	str %[WRBit], [%[LCD], %[t]]"   	"\n"	\
+  "	str %[WRBit], [%[LCD], 124]"		"\n"	\
+  "	str %[x], [%[LCD], 0]"			"\n"	\
+  "	str %[WRBit], [%[LCD], %[t]]"   	"\n"	\
+  "	movs %[t], 0x0F"			"\n"	\
+  "	ands %[t], %[t], %[c]"			"\n"	\
+  "	str %[WRBit], [%[LCD], 124]"		"\n"	\
+							\
+  "	lsls %[t], 3"				"\n"    \
+  "	adds %[t], %[palette]"			"\n"	\
+  "	ldm %[t], {%[t], %[x]}"			"\n"	\
+  "	str %[t], [%[LCD], 0]"			"\n"	\
+  "	movs %[t], 252"	"\n"				\
+  "	str %[WRBit], [%[LCD], %[t]]"   	"\n"	\
+  "	str %[WRBit], [%[LCD], 124]"		"\n"	\
+  "	str %[x], [%[LCD], 0]"			"\n"	\
+  "	str %[WRBit], [%[LCD], %[t]]"   	"\n"	\
+  "	lsrs %[c], 8"				"\n"	\
+  "	movs %[t], 0xF0"			"\n"	\
+  "	ands %[t], %[t], %[c]"			"\n"	\
+  "	lsrs %[t], %[t], 1"			"\n"	\
+  "	str %[WRBit], [%[LCD], 124]"		"\n"
+
+
 void Pokitto::lcdRefreshMode1(uint8_t * scrbuf, uint8_t updRectX, uint8_t updRectY, uint8_t updRectW, uint8_t updRectH, uint16_t* paletteptr) {
 
-    uint16_t x,y;
+
+#ifdef XPERIMENTAL
+//#define __ARMCC_VERSION 1
+#endif
+
+#ifndef __ARMCC_VERSION
+
+  write_command(0x03); write_data(0x1038);
+  write_command(0x20);  // Horizontal DRAM Address
+  write_data(0);
+  write_command(0x21);  // Vertical DRAM Address
+  write_data(0);
+  write_command(0x22); // write data to DRAM
+  CLR_CS_SET_CD_RD_WR;
+
+  uint8_t *end=&scrbuf[POK_SCREENBUFFERSIZE>>1]+4;
+
+  volatile uint32_t palette[32];
+  for( uint32_t i=0; i<16; ++i ){
+    palette[(i<<1)+1] = static_cast<uint32_t>(paletteptr[i&3 ]) << 3;
+    palette[(i<<1)  ] = static_cast<uint32_t>(paletteptr[i>>2]) << 3;
+  }
+
+  SET_MASK_P2;
+
+  uint32_t c, WRBit = 1<<12;
+
+  register uint32_t x asm("r2");
+  register uint32_t t asm("r1");
+
+  asm volatile(
+
+	 ".syntax unified"         		"\n"
+	 "ldm %[scrbuf]!, {%[c]}"		"\n" // load 4 bytes (16 pixels)
+	 "movs %[t], 0xF0"			"\n"
+	 "ands %[t], %[t], %[c]"		"\n"
+	 "lsrs %[t], %[t], 1"			"\n"
+	 "mode1Loop%=:" 			"\n"
+	 MODE1_LOOP
+	 MODE1_LOOP
+	 MODE1_LOOP
+	 "	adds %[t], %[palette]"			"\n"
+	 "	ldm %[t], {%[t], %[x]}"			"\n"
+	 "	str %[t], [%[LCD], 0]"			"\n"
+	 "	movs %[t], 252"	"\n"
+	 "	str %[WRBit], [%[LCD], %[t]]"   	"\n"
+	 "	str %[WRBit], [%[LCD], 124]"		"\n"
+	 "	str %[x], [%[LCD], 0]"			"\n"
+	 "	str %[WRBit], [%[LCD], %[t]]"   	"\n"
+	 "	movs %[t], 0x0F"			"\n"
+	 "	ands %[t], %[t], %[c]"			"\n"
+	 "	str %[WRBit], [%[LCD], 124]"		"\n"
+
+	 "	lsls %[t], 3"				"\n"
+	 "	adds %[t], %[palette]"			"\n"
+	 "	ldm %[t], {%[t], %[x]}"			"\n"
+	 "	str %[t], [%[LCD], 0]"			"\n"
+	 "	movs %[t], 252"	"\n"
+	 "	str %[WRBit], [%[LCD], %[t]]"   	"\n"
+	 "	str %[WRBit], [%[LCD], 124]"		"\n"
+	 "	str %[x], [%[LCD], 0]"			"\n"
+	 "	str %[WRBit], [%[LCD], %[t]]"   	"\n"
+
+	 "	ldm %[scrbuf]!, {%[c]}"		"\n" // load next 4 bytes
+	 "	movs %[t], 0xF0"		"\n"
+	 "	ands %[t], %[t], %[c]"		"\n"
+	 "	lsrs %[t], %[t], 1"		"\n"
+	 "	str %[WRBit], [%[LCD], 124]"		"\n"
+
+	 "cmp %[end], %[scrbuf]"            	"\n"
+	 "bne mode1Loop%="       		"\n" // if scrbuf < end, loop
+
+	 : // outputs
+	   [c]"+l" (c),
+	   [t]"+l" (t),
+	   [end]"+h" (end),
+	   [scrbuf]"+l" (scrbuf),
+	   [WRBit]"+l" (WRBit),
+	   [x]"+l" (x)
+
+	 : // inputs
+	   [LCD]"l" (0xA0002188),
+	   [palette]"l" (palette)
+
+	 : // clobbers
+	   "cc"
+	       );
+
+
+#else
+    uint16_t x,y,xptr;
     uint16_t scanline[4][176]; // read 4 half-nibbles = 4 pixels at a time
-    uint8_t *d;
+    uint8_t *d, yoffset=0;
 
     // If not the full screen is updated, check the validity of the update rect.
     if ( updRectX != 0 || updRectY != 0 ||updRectW != LCDWIDTH ||updRectH != LCDHEIGHT ) {
@@ -477,6 +598,7 @@
     xptr = 8;
     setDRAMptr(8, 0);
     #else
+    xptr = 0;
     setDRAMptr(0, 0);
     #endif
 
@@ -484,6 +606,7 @@
         d = scrbuf+(x>>2);// point to beginning of line in data
 
         /** find colours in one scanline **/
+        uint8_t s=0;
         d += (updRectY * 220/4);
         for (y=updRectY; y<updRectY+updRectH; y++) {
             uint8_t tdata = *d;
@@ -555,6 +678,7 @@
             }
         }
     }
+#endif
 
     #ifdef POK_SIM
     simulator.refreshDisplay();
@@ -945,7 +1069,7 @@
     #endif
 }
 
-  
+
 #define MODE2_INNER_LOOP_B				\
   "	ldm %[scanline]!, {%[c]}"   "\n"		\
 	       "	str %[c], [%[LCD], 0]"    "\n"	\
@@ -955,98 +1079,104 @@
 	       "	str %[t], [%[LCD], 124]"  "\n"	\
 	       "	subs %[x], 1"             "\n"	\
 	       "	str %[t], [%[LCD], %[c]]" "\n"	\
-    
+
 
 void Pokitto::lcdRefreshMode2(uint8_t * scrbuf, uint16_t* paletteptr ) {
-  uint32_t x,y;
+  uint32_t x,y,byte,c,t=1<<12;
   uint32_t scanline[110];
 
   write_command(0x03); write_data(0x1038);
   write_command(0x20);  // Horizontal DRAM Address
   write_data(0);  // 0
   write_command(0x21);  // Vertical DRAM Address
-  write_data(1);
+
+#ifndef __ARMCC_VERSION
+  write_data(1); // still has pixel 0 bug
   write_command(0x22); // write data to DRAM
   CLR_CS_SET_CD_RD_WR;
   SET_MASK_P2;
 
-#ifndef __ARMCC_VERSION
-   asm volatile(
+  #ifdef PROJ_SHOW_FPS_COUNTER
+  setDRAMptr(0, 8);
+  y=4;
+  #endif
+
+  asm volatile(
 	 ".syntax unified"         "\n"
-	 
+
 	 "mov r10, %[scanline]"    "\n"
 	 "mov r11, %[t]"           "\n"
-	 
+
 	 "mode2OuterLoop:"        "\n"
-	 
+
 	 "movs %[x], 110"          "\n"
 	 "mode2InnerLoopA:"
 
 
-	 "	ldrb %[byte], [%[scrbuf],0]"   "\n"				
+	 "	ldrb %[byte], [%[scrbuf],0]"   "\n"
 	 "	lsrs %[c], %[byte], 4"    "\n"
 
 	 "	movs %[t], 15" "\n"
-	 "	ands %[byte], %[t]"    "\n"		
-	 
-	 "	lsls %[c], 1"             "\n"			
-	 "	ldrh %[t], [%[paletteptr], %[c]]"      "\n"	
-	 "	lsls %[t], %[t], 3"       "\n"			
-	 "	str %[t], [%[LCD], 0]"    "\n"			
-	 "	mov %[c], r11" "\n"				
-	 "	str %[c], [%[LCD], 124]"  "\n"			
-	 "	stm %[scanline]!, {%[t]}" "\n"			
-	 "	movs %[t], 252"   "\n"				
-	 "	str %[c], [%[LCD], %[t]]" "\n"			
-	 "	str %[c], [%[LCD], 124]"  "\n"			
-	 "	lsls %[byte], %[byte], 1"             "\n"			
+	 "	ands %[byte], %[t]"    "\n"
+
+	 "	lsls %[c], 1"             "\n"
+	 "	ldrh %[t], [%[paletteptr], %[c]]"      "\n"
+	 "	lsls %[t], %[t], 3"       "\n"
+	 "	str %[t], [%[LCD], 0]"    "\n"
+	 "	mov %[c], r11" "\n"
+	 "	str %[c], [%[LCD], 124]"  "\n"
+	 "	stm %[scanline]!, {%[t]}" "\n"
+	 "	movs %[t], 252"   "\n"
+	 "	str %[c], [%[LCD], %[t]]" "\n"
+	 "	str %[c], [%[LCD], 124]"  "\n"
+	 "	lsls %[byte], %[byte], 1"             "\n"
 	 "	str %[c], [%[LCD], %[t]]" "\n"
 
-	 "	ldrh %[t], [%[paletteptr], %[byte]]"      "\n"	
-	 "	lsls %[t], %[t], 3"       "\n"			
-	 "	str %[t], [%[LCD], 0]"    "\n"			
-	 "	mov %[c], r11" "\n"				
-	 "	str %[c], [%[LCD], 124]"  "\n"			
-	 "	stm %[scanline]!, {%[t]}" "\n"			
-	 "	movs %[t], 252"   "\n"				
-	 "	str %[c], [%[LCD], %[t]]" "\n"			
-	 "	str %[c], [%[LCD], 124]"  "\n"			
-	 "	adds %[scrbuf], %[scrbuf], 1" "\n"		
+	 "	ldrh %[t], [%[paletteptr], %[byte]]"      "\n"
+	 "	lsls %[t], %[t], 3"       "\n"
+	 "	str %[t], [%[LCD], 0]"    "\n"
+	 "	mov %[c], r11" "\n"
+	 "	str %[c], [%[LCD], 124]"  "\n"
+	 "	stm %[scanline]!, {%[t]}" "\n"
+	 "	movs %[t], 252"   "\n"
 	 "	str %[c], [%[LCD], %[t]]" "\n"
-	 
-	 "	subs %[x], 2"          "\n"	
+	 "	str %[c], [%[LCD], 124]"  "\n"
+	 "	adds %[scrbuf], %[scrbuf], 1" "\n"
+	 "	str %[c], [%[LCD], %[t]]" "\n"
+
+	 "	subs %[x], 2"          "\n"
 	 "	bne mode2InnerLoopA"  "\n"
 
 	 "mov %[scanline], r10"    "\n"
 	 "movs %[x], 110"          "\n"
 	 "mov %[t], r11"           "\n"
 	 "mode2InnerLoopB:"
-	 MODE2_INNER_LOOP_B	 
-	 MODE2_INNER_LOOP_B	 
-	 MODE2_INNER_LOOP_B	 
-	 MODE2_INNER_LOOP_B	 
-	 MODE2_INNER_LOOP_B	 
-	 MODE2_INNER_LOOP_B	 
-	 MODE2_INNER_LOOP_B	 
-	 MODE2_INNER_LOOP_B	 
-	 MODE2_INNER_LOOP_B	 
-	 MODE2_INNER_LOOP_B	 
+	 MODE2_INNER_LOOP_B
+	 MODE2_INNER_LOOP_B
+	 MODE2_INNER_LOOP_B
+	 MODE2_INNER_LOOP_B
+	 MODE2_INNER_LOOP_B
+	 MODE2_INNER_LOOP_B
+	 MODE2_INNER_LOOP_B
+	 MODE2_INNER_LOOP_B
+	 MODE2_INNER_LOOP_B
+	 MODE2_INNER_LOOP_B
 	 "	bne mode2InnerLoopB"     "\n"
-	 
+
 	 "mov %[scanline], r10"    "\n"
 	 "movs %[t], 1"              "\n"
 	 "movs %[c], 88"             "\n"
 	 "add %[y], %[t]"            "\n" // y++... derpy, but it's the outer loop
 	 "cmp %[y], %[c]"            "\n"
 	 "bne mode2OuterLoop"       "\n" // if y != 88, loop
-	 
+
 	 : // outputs
 	   [c]"+l" (c),
 	   [t]"+l" (t),
 	   [x]"+l" (x),
 	   [y]"+h" (y),  // +:Read-Write l:lower (0-7) register
 	   [scrbuf]"+l" (scrbuf)
-	   
+
 	 : // inputs
 	   [LCD]"l" (0xA0002188),
 	   [scanline]"l" (scanline),
@@ -1056,8 +1186,13 @@
 	   "cc", "r10", "r11"
        );
 
- 
+
 #else
+  write_data(0); // does not have pixel 0 bug
+  write_command(0x22); // write data to DRAM
+  CLR_CS_SET_CD_RD_WR;
+  SET_MASK_P2;
+
 uint8_t* d = scrbuf;// point to beginning of line in data
 
   #ifdef PROJ_SHOW_FPS_COUNTER
@@ -1713,122 +1848,132 @@
 }
 
 #define MODE13_INNER_LOOP_A						\
-  "	ldrb %[t], [%[scrbuf],0]"   "\n"				\
-	       "	add %[t], %[t], %[offset]"  "\n"		\
+	       "	add %[t], %[t], r10"	   "\n" 		\
 	       "	uxtb %[c], %[t] " "\n"				\
 	       "	lsls %[c], 1"             "\n"			\
 	       "	ldrh %[t], [%[paletteptr], %[c]]"      "\n"	\
 	       "	lsls %[t], %[t], 3"       "\n"			\
 	       "	str %[t], [%[LCD], 0]"    "\n"			\
-	       "	mov %[c], r11" "\n"				\
-	       "	str %[c], [%[LCD], 124]"  "\n"			\
-	       "	stm %[scanline]!, {%[t]}" "\n"			\
-	       "	movs %[t], 252"   "\n"				\
-	       "	str %[c], [%[LCD], %[t]]" "\n"			\
-	       "	str %[c], [%[LCD], 124]"  "\n"			\
+	       "	movs %[c], 252"   "\n"				\
+	       "	str %[offset], [%[LCD], %[c]]" "\n"		\
+	       "	stm %[scanline]!, {%[t]}"      "\n"		\
+	       "	str %[offset], [%[LCD], 124]"  "\n"		\
+	       "	str %[offset], [%[LCD], %[c]]" "\n"		\
 	       "	adds %[scrbuf], %[scrbuf], 1" "\n"		\
-	       "	str %[c], [%[LCD], %[t]]" "\n"
-  
-#define MODE13_INNER_LOOP_B				\
-  "	ldm %[scanline]!, {%[c]}"   "\n"		\
-	       "	str %[c], [%[LCD], 0]"    "\n"	\
-	       "	str %[t], [%[LCD], 124]"  "\n"	\
-	       "	movs %[c], 252"   "\n"		\
-	       "	str %[t], [%[LCD], %[c]]" "\n"	\
-	       "	str %[t], [%[LCD], 124]"  "\n"	\
-	       "	subs %[x], 1"             "\n"	\
-	       "	str %[t], [%[LCD], %[c]]" "\n"	\
+	       "	ldrb %[t], [%[scrbuf],0]"   "\n"		\
+	       "	str %[offset], [%[LCD], 124]"  "\n"
 
- 
+// This can be made 1 cycle faster (x -= 10 instead of x--),
+// but there will be noise
+#define MODE13_INNER_LOOP_B					\
+	       "	str %[c], [%[LCD], 0]"    "\n"		\
+	       "	str %[offset], [%[LCD], %[t]]" "\n"	\
+	       "	ldr %[c], [%[scanline]]"   "\n"		\
+	       "	str %[offset], [%[LCD], 124]"  "\n"	\
+	       "	str %[offset], [%[LCD], %[t]]" "\n"	\
+	       "	adds %[scanline], 4"             "\n"	\
+	       "	subs %[x], 1"			"\n"	\
+	       "	str %[offset], [%[LCD], 124]"  "\n"
+
+
  void Pokitto::lcdRefreshMode13(uint8_t * scrbuf, uint16_t* paletteptr, uint8_t offset){
    uint32_t scanline[110]; // read two nibbles = pixels at a time
-   
+
    write_command_16(0x03); write_data_16(0x1038);
    write_command(0x20); write_data(0);
-   write_command(0x21); write_data(1);
+   write_command(0x21); write_data(0);
    write_command(0x22);
    CLR_CS_SET_CD_RD_WR;
    SET_MASK_P2;
-   
-   uint32_t x, y=0;
-   
+
+   uint32_t x, y=0, c, t;
+
 #ifndef __ARMCC_VERSION
+   #ifdef PROJ_SHOW_FPS_COUNTER
+   setDRAMptr(0, 8);
+   y=4;
+   #endif
+
    asm volatile(
 	 ".syntax unified"         "\n"
-	 
-	 "mov r10, %[scanline]"    "\n"
-	 
-	 "movs %[t], 1"            "\n"
-	 "lsls %[t], %[t], 12"     "\n"
-	 "mov r11, %[t]"           "\n"
-	 
+
+	 "mov r10, %[offset]"	   "\n"
+	 "movs %[offset], 1"            "\n"
+	 "lsls %[offset], %[offset], 12"     "\n"
+
 	 "mode13OuterLoop:"        "\n"
-	 
+
 	 "movs %[x], 110"          "\n"
+	 "ldrb %[t], [%[scrbuf],0]"   "\n"
 	 "mode13InnerLoopA:"
 	 MODE13_INNER_LOOP_A
 	 MODE13_INNER_LOOP_A
-	 "	subs %[x], 2"          "\n"	
+	 "	subs %[x], 2"          "\n"
 	 "	bne mode13InnerLoopA"  "\n"
 
-	 "mov %[scanline], r10"    "\n"
+	 "subs %[scanline], 220"    "\n"
+	 "subs %[scanline], 220"    "\n"
+
 	 "movs %[x], 110"          "\n"
-	 "mov %[t], r11"           "\n"
+	 "movs %[t], 252"           "\n"
+	 "ldm %[scanline]!, {%[c]}"   "\n"
 	 "mode13InnerLoopB:"
-	 MODE13_INNER_LOOP_B	 
-	 MODE13_INNER_LOOP_B	 
-	 MODE13_INNER_LOOP_B	 
-	 MODE13_INNER_LOOP_B	 
-	 MODE13_INNER_LOOP_B	 
-	 MODE13_INNER_LOOP_B	 
-	 MODE13_INNER_LOOP_B	 
-	 MODE13_INNER_LOOP_B	 
-	 MODE13_INNER_LOOP_B	 
-	 MODE13_INNER_LOOP_B	 
+	 MODE13_INNER_LOOP_B
+	 MODE13_INNER_LOOP_B
+	 MODE13_INNER_LOOP_B
+	 MODE13_INNER_LOOP_B
+	 MODE13_INNER_LOOP_B
+	 MODE13_INNER_LOOP_B
+	 MODE13_INNER_LOOP_B
+	 MODE13_INNER_LOOP_B
+	 MODE13_INNER_LOOP_B
+	 MODE13_INNER_LOOP_B
 	 "	bne mode13InnerLoopB"     "\n"
-	 
-	 "mov %[scanline], r10"    "\n"
+
+	 "subs %[scanline], 220"    "\n"
+	 "subs %[scanline], 224"    "\n"
 	 "movs %[t], 1"              "\n"
 	 "movs %[c], 88"             "\n"
-	 "add %[y], %[t]"            "\n" // y++... derpy, but it's the outer loop
+	 "add %[y], %[t]"            "\n"
 	 "cmp %[y], %[c]"            "\n"
 	 "bne mode13OuterLoop"       "\n" // if y != 88, loop
-	 
+
 	 : // outputs
 	   [c]"+l" (c),
 	   [t]"+l" (t),
 	   [x]"+l" (x),
 	   [y]"+h" (y),  // +:Read-Write l:lower (0-7) register
-	   [scrbuf]"+l" (scrbuf)
-	   
+	   [scrbuf]"+l" (scrbuf),
+	   [offset]"+l" (offset)
+
 	 : // inputs
 	   [LCD]"l" (0xA0002188),
 	   [scanline]"l" (scanline),
-	   [paletteptr]"l" (paletteptr),
-	   [offset]"l" (offset)
+	   [paletteptr]"l" (paletteptr)
+
 	 : // clobbers
-	   "cc", "r10", "r11"
+	   "cc", "r10"
        );
 
 #else
    uint8_t* d = scrbuf;// point to beginning of line in data
    for(y=0;y<88;y++){
-     
+
      uint32_t* s = scanline;
-     
+
      for(x=0;x<110;x+=10){
-       *LCD = *s = paletteptr[(*d + offset)&255]<<3; TGL_WR_OP(s++);TGL_WR_OP(d++);	
-       *LCD = *s = paletteptr[(*d + offset)&255]<<3; TGL_WR_OP(s++);TGL_WR_OP(d++);	
-       *LCD = *s = paletteptr[(*d + offset)&255]<<3; TGL_WR_OP(s++);TGL_WR_OP(d++);	
-       *LCD = *s = paletteptr[(*d + offset)&255]<<3; TGL_WR_OP(s++);TGL_WR_OP(d++);	
-       *LCD = *s = paletteptr[(*d + offset)&255]<<3; TGL_WR_OP(s++);TGL_WR_OP(d++);	
-       *LCD = *s = paletteptr[(*d + offset)&255]<<3; TGL_WR_OP(s++);TGL_WR_OP(d++);	
-       *LCD = *s = paletteptr[(*d + offset)&255]<<3; TGL_WR_OP(s++);TGL_WR_OP(d++);	
-       *LCD = *s = paletteptr[(*d + offset)&255]<<3; TGL_WR_OP(s++);TGL_WR_OP(d++);	
-       *LCD = *s = paletteptr[(*d + offset)&255]<<3; TGL_WR_OP(s++);TGL_WR_OP(d++);	
-       *LCD = *s = paletteptr[(*d + offset)&255]<<3; TGL_WR_OP(s++);TGL_WR_OP(d++);	
+       *LCD = *s = paletteptr[(*d + offset)&255]<<3; TGL_WR_OP(s++);TGL_WR_OP(d++);
+       *LCD = *s = paletteptr[(*d + offset)&255]<<3; TGL_WR_OP(s++);TGL_WR_OP(d++);
+       *LCD = *s = paletteptr[(*d + offset)&255]<<3; TGL_WR_OP(s++);TGL_WR_OP(d++);
+       *LCD = *s = paletteptr[(*d + offset)&255]<<3; TGL_WR_OP(s++);TGL_WR_OP(d++);
+       *LCD = *s = paletteptr[(*d + offset)&255]<<3; TGL_WR_OP(s++);TGL_WR_OP(d++);
+       *LCD = *s = paletteptr[(*d + offset)&255]<<3; TGL_WR_OP(s++);TGL_WR_OP(d++);
+       *LCD = *s = paletteptr[(*d + offset)&255]<<3; TGL_WR_OP(s++);TGL_WR_OP(d++);
+       *LCD = *s = paletteptr[(*d + offset)&255]<<3; TGL_WR_OP(s++);TGL_WR_OP(d++);
+       *LCD = *s = paletteptr[(*d + offset)&255]<<3; TGL_WR_OP(s++);TGL_WR_OP(d++);
+       *LCD = *s = paletteptr[(*d + offset)&255]<<3; TGL_WR_OP(s++);TGL_WR_OP(d++);
      }
-     
+
      s = scanline;
      uint32_t c = *s;
      for(x=0;x<110;x+=10){
@@ -1843,13 +1988,113 @@
        *LCD = c; TGL_WR_OP(s++);TGL_WR_OP(c=*s);
        *LCD = c; TGL_WR_OP(s++);TGL_WR_OP(c=*s);
      }
-   
+
    }
 #endif
  }
 
+
+
+ void Pokitto::lcdRefreshMode64( uint8_t * scrbuf, uint16_t* paletteptr ){
+   uint8_t *end = &scrbuf[ POK_SCREENBUFFERSIZE+4 ];
+   write_command_16(0x03); write_data_16(0x1038);
+   write_command(0x20); write_data(0);
+#ifdef PROJ_SHOW_FPS_COUNTER
+  write_data(8);
+  scrbuf += 110*8;
+#else
+  write_data(0);
+#endif
+   write_command(0x21); write_data(0);
+   write_command(0x22);
+   CLR_CS_SET_CD_RD_WR;
+   SET_MASK_P2;
+
+   uint32_t TGL = 1<<12, CLR = 252, c, t;
+#ifndef __ARMCC_VERSION
+   asm volatile(
+	 ".syntax unified"         "\n"
+	 "ldm %[scrbuf]!, {%[c]}" "\n"
+	 "lsls %[t], %[c], 24" 			"\n"
+	 "mode64loop%=:"    "\n"
+	 "lsrs %[c], %[c], 8" 			"\n"
+	 "lsrs %[t], %[t], 23" 			"\n"
+	 "ldrh %[t], [%[paletteptr], %[t]]" 	"\n"
+	 "lsls %[t], 3" 			"\n"
+	 "str %[t], [%[LCD], 0]" 		"\n"
+	 "str %[TGL], [%[LCD], %[CLR]]" 	"\n"
+	 "lsls %[t], %[c], 24" 			"\n"
+	 "lsrs %[c], %[c], 8" 			"\n"
+	 "lsrs %[t], %[t], 23" 			"\n"
+	 "str %[TGL], [%[LCD], 124]" 		"\n"
+	 "str %[TGL], [%[LCD], %[CLR]]" 	"\n"
+	 "ldrh %[t], [%[paletteptr], %[t]]" 	"\n"
+	 "lsls %[t], 3" 			"\n"
+	 "str %[TGL], [%[LCD], 124]" 		"\n"
+
+	 "str %[t], [%[LCD], 0]" 		"\n"
+	 "str %[TGL], [%[LCD], %[CLR]]" 	"\n"
+	 "lsls %[t], %[c], 24" 			"\n"
+	 "lsrs %[t], %[t], 23" 			"\n"
+	 "str %[TGL], [%[LCD], 124]" 		"\n"
+	 "str %[TGL], [%[LCD], %[CLR]]" 	"\n"
+	 "ldrh %[t], [%[paletteptr], %[t]]" 	"\n"
+	 "lsls %[t], 3" 			"\n"
+	 "str %[TGL], [%[LCD], 124]" 		"\n"
+	 "str %[t], [%[LCD], 0]" 		"\n"
+	 "str %[TGL], [%[LCD], %[CLR]]" 	"\n"
+	 "lsrs %[c], %[c], 8" 			"\n"
+	 "lsls %[t], %[c], 1" 			"\n"
+	 "str %[TGL], [%[LCD], 124]" 		"\n"
+	 "str %[TGL], [%[LCD], %[CLR]]" 	"\n"
+	 "ldrh %[t], [%[paletteptr], %[t]]" 	"\n"
+	 "lsls %[t], 3" 			"\n"
+	 "str %[TGL], [%[LCD], 124]" 		"\n"
+
+	 "str %[t], [%[LCD], 0]" 		"\n"
+	 "str %[TGL], [%[LCD], %[CLR]]" 	"\n"
+	 "ldm %[scrbuf]!, {%[c]}" "\n"
+	 "str %[TGL], [%[LCD], 124]" 		"\n"
+	 "str %[TGL], [%[LCD], %[CLR]]" 	"\n"
+	 "lsls %[t], %[c], 24" 			"\n"
+	 "cmp %[scrbuf], %[end]" "\n"
+	 "str %[TGL], [%[LCD], 124]" 		"\n"
+
+	 "bne mode64loop%=" "\n"
+
+	 : // outputs
+	   [c]"+l" (c),
+	   [t]"+l" (t),
+	   [scrbuf]"+l" (scrbuf)
+
+	 : // inputs
+	   [CLR]"l" (CLR),
+	   [TGL]"l" (TGL),
+	   [LCD]"l" (0xA0002188),
+	   [paletteptr]"l" (paletteptr),
+	   [end]"h" (end)
+
+	 : // clobbers
+	   "cc"
+       );
+
+#else
+
+   c = uint32_t(paletteptr[(*scrbuf)&255])<<3;
+   while( scrbuf < end-4 ){
+       *LCD = c; TGL_WR_OP(scrbuf++);TGL_WR_OP( c = uint32_t(paletteptr[(*scrbuf)&255])<<3 );
+       *LCD = c; TGL_WR_OP(scrbuf++);TGL_WR_OP( c = uint32_t(paletteptr[(*scrbuf)&255])<<3 );
+       *LCD = c; TGL_WR_OP(scrbuf++);TGL_WR_OP( c = uint32_t(paletteptr[(*scrbuf)&255])<<3 );
+       *LCD = c; TGL_WR_OP(scrbuf++);TGL_WR_OP( c = uint32_t(paletteptr[(*scrbuf)&255])<<3 );
+   }
+   
+#endif
+ }
+
+ 
+ 
 void Pokitto::lcdRefreshMode14(uint8_t * scrbuf, uint16_t* paletteptr) {
-uint16_t x,y;
+uint16_t x,y,data,xptr;
 uint16_t scanline[176]; uint16_t* scptr;
 uint8_t *d;
 
@@ -1986,10 +2231,99 @@
 #else
 
 void Pokitto::lcdRefreshMode15(uint16_t* paletteptr, uint8_t* scrbuf){
-uint16_t x,y;
+//    #define __ARMCC_VERSION
+#ifndef __ARMCC_VERSION
+    
+#define MODE15_LOOP				\
+    "ands %[tmp], %[color]" "\n"		\
+	"lsrs %[tmp], 2" "\n"			\
+	"ldr %[tmp], [%[palette], %[tmp]]" "\n" \
+	"str %[tmp], [%[LCD]]" "\n"		\
+	"str %[WRBit], [%[LCD], %[CLR]]" "\n"	\
+	"movs %[tmp], 0x0F" "\n"		\
+	"ands %[tmp], %[color]" "\n"		\
+	"str %[WRBit], [%[LCD], 124]" "\n"	\
+	"lsls %[tmp], 2" "\n"			\
+	"ldr %[tmp], [%[palette], %[tmp]]" "\n" \
+	"str %[tmp], [%[LCD]]" "\n"		\
+	"str %[WRBit], [%[LCD], %[CLR]]" "\n"	\
+	"movs %[tmp], 0xF0" "\n"		\
+	"lsrs %[color], 8" "\n"			\
+	"str %[WRBit], [%[LCD], 124]" "\n"
+
+#define MODE15_ENDLOOP					\
+    "ands %[tmp], %[color]" "\n"			\
+	"lsrs %[tmp], 2" "\n"				\
+	"ldr %[tmp], [%[palette], %[tmp]]" "\n"		\
+	"str %[tmp], [%[LCD]]" "\n"			\
+	"str %[WRBit], [%[LCD], %[CLR]]" "\n"		\
+	"movs %[tmp], 0x0F" "\n"			\
+	"ands %[tmp], %[color]" "\n"			\
+	"str %[WRBit], [%[LCD], 124]" "\n"		\
+	"lsls %[tmp], 2" "\n"				\
+	"ldr %[tmp], [%[palette], %[tmp]]" "\n"		\
+	"str %[tmp], [%[LCD]]" "\n"			\
+	"str %[WRBit], [%[LCD], %[CLR]]" "\n"		\
+	"ldm %[scrbuf]!, {%[color]}" "\n"		\
+	"movs %[tmp], 0xF0" "\n"			\
+	"str %[WRBit], [%[LCD], 124]" "\n"
+    
+  uint8_t *end=&scrbuf[POK_SCREENBUFFERSIZE]+4;
+  volatile uint32_t palette[16];
+  for( uint32_t i=0; i<16; ++i )
+      palette[i] = uint32_t(paletteptr[i]) << 3;
+
+  write_command(0x03); write_data(0x1038);
+  write_command(0x21);  // Vertical DRAM Address
+  write_data(0);
+  write_command(0x20);  // Horizontal DRAM Address
+#ifdef PROJ_SHOW_FPS_COUNTER
+  write_data(8);
+  scrbuf += 110*8;
+#else
+  write_data(0);
+#endif
+  write_command(0x22); // write data to DRAM
+  CLR_CS_SET_CD_RD_WR;
+
+
+  SET_MASK_P2;
+
+  uint32_t WRBit = 1<<12, color, tmp;
+  asm volatile(
+      ".syntax unified" "\n"
+      "ldm %[scrbuf]!, {%[color]}" "\n"      
+      "movs %[tmp], 0xF0" "\n"
+      "mode15Loop%=:" "\n"
+      MODE15_LOOP
+      MODE15_LOOP
+      MODE15_LOOP
+      MODE15_ENDLOOP      
+      "cmp %[end], %[scrbuf]" "\n"
+      "bne mode15Loop%=" "\n"
+      :
+      [tmp]"+l" (tmp),
+      [color]"+l" (color),
+      [end]"+h" (end),
+      [scrbuf]"+l" (scrbuf),
+      [WRBit]"+l" (WRBit)
+      
+      :
+      [CLR]"l" (252),
+      [LCD]"l" (0xA0002188),
+      [palette]"l" (palette)
+      
+      :
+      "cc"
+      );
+    
+#else
+    
+uint16_t x,y,xptr;
 uint16_t scanline[2][176]; // read two nibbles = pixels at a time
-uint8_t *d;
+uint8_t *d, yoffset=0;
 
+xptr = 0;
 //setDRAMptr(xptr,yoffset);
 
 write_command(0x20); write_data(0);
@@ -2042,11 +2376,130 @@
         setup_data_16(scanline[1][s++]);CLR_WR;SET_WR;
     }
   }
+
+#endif
+
 }
 #endif //ADEKTOSMODE15
 
+void Pokitto::lcdRefreshMixMode(const uint8_t * screenBuffer, const uint16_t * palettePointer, const uint8_t * scanType)
+{
+	write_command(0x03);
+	write_data(0x1038);
+
+	// Horizontal DRAM Address
+	write_command(0x20);
+	write_data(0);
+
+	// Vertical DRAM Address
+	write_command(0x21);
+	write_data(0);
+
+	// write data to DRAM
+	write_command(0x22);
+	CLR_CS_SET_CD_RD_WR;
+	SET_MASK_P2;
+
+	uint32_t scanline[220];
+
+	// point to beginning of line in data
+	const uint8_t * d = screenBuffer;
+	for(uint32_t y = 0; y < 176; ++y)
+	{
+		// find colours in one scanline
+		uint8_t scanTypeIndex = y >> 1;
+		uint8_t lineIndex = 0;
+		switch(scanType[scanTypeIndex])
+		{
+			case 0:
+			{
+				// point to beginning of line in data
+				d = &screenBuffer[110 * scanTypeIndex];
+				for(uint8_t x = 0; x < (220 / 2); ++x)
+				{
+					uint32_t color = static_cast<uint32_t>(palettePointer[*d]) << 3;
+					++d;
+					scanline[lineIndex] = color;
+					++lineIndex;
+					scanline[lineIndex] = color;
+					++lineIndex;
+				}
+				break;
+			}
+			case 1:
+			{
+				for(uint8_t x = 0; x < (220 / 4); ++x)
+				{
+					uint8_t t = *d;
+					++d;
+
+					uint32_t color1 = static_cast<uint32_t>(palettePointer[256 + (t >> 4)]) << 3;
+					scanline[lineIndex] = color1;
+					++lineIndex;
+					scanline[lineIndex] = color1;
+					++lineIndex;
+
+					uint32_t color2 = static_cast<uint32_t>(palettePointer[256 + (t & 0xF)]) << 3;
+					scanline[lineIndex] = color2;
+					++lineIndex;
+					scanline[lineIndex] = color2;
+					++lineIndex;
+				}
+				break;
+			}
+			case 2:
+			{
+				for(uint8_t x = 0; x < (220 / 4); ++x)
+				{
+					uint8_t t = *d;
+					++d;
+
+					scanline[lineIndex] = static_cast<uint32_t>(palettePointer[272 + ((t >> 6) & 0x03)]) << 3;
+					++lineIndex;
+
+					scanline[lineIndex] = static_cast<uint32_t>(palettePointer[272 + ((t >> 4) & 0x03)]) << 3;
+					++lineIndex;
+
+					scanline[lineIndex] = static_cast<uint32_t>(palettePointer[272 + ((t >> 2) & 0x03)]) << 3;
+					++lineIndex;
+
+					scanline[lineIndex] = static_cast<uint32_t>(palettePointer[272 + ((t >> 0) & 0x03)]) << 3;
+					++lineIndex;
+				}
+				break;
+			}
+		}
+
+        uint32_t color = scanline[0];
+        #define WRITE_SCANLINE *LCD = color; TGL_WR_OP(color = scanline[++i]);
+
+		volatile uint32_t * LCD = reinterpret_cast< volatile uint32_t * >(0xA0002188);
+		for (uint8_t i = 0; i < 220;)
+		{
+			WRITE_SCANLINE	WRITE_SCANLINE	WRITE_SCANLINE	WRITE_SCANLINE
+			WRITE_SCANLINE	WRITE_SCANLINE	WRITE_SCANLINE	WRITE_SCANLINE
+			WRITE_SCANLINE  WRITE_SCANLINE	WRITE_SCANLINE
+			WRITE_SCANLINE	WRITE_SCANLINE	WRITE_SCANLINE	WRITE_SCANLINE
+			WRITE_SCANLINE	WRITE_SCANLINE	WRITE_SCANLINE	WRITE_SCANLINE
+			WRITE_SCANLINE  WRITE_SCANLINE	WRITE_SCANLINE
+			WRITE_SCANLINE	WRITE_SCANLINE	WRITE_SCANLINE	WRITE_SCANLINE
+			WRITE_SCANLINE	WRITE_SCANLINE	WRITE_SCANLINE	WRITE_SCANLINE
+			WRITE_SCANLINE  WRITE_SCANLINE	WRITE_SCANLINE
+			WRITE_SCANLINE	WRITE_SCANLINE	WRITE_SCANLINE	WRITE_SCANLINE
+			WRITE_SCANLINE	WRITE_SCANLINE	WRITE_SCANLINE	WRITE_SCANLINE
+			WRITE_SCANLINE  WRITE_SCANLINE	WRITE_SCANLINE
+		}
+
+		#undef WRITE_SCANLINE
+	}
+
+	CLR_MASK_P2;
+}
+
+
 void Pokitto::blitWord(uint16_t c) {
     setup_data_16(c);CLR_WR;SET_WR;
 }
 
 
+