Port of Artekit space invaders demo (http://www.artekit.eu/space-invaders-for-stm32/) for the STM32F3 Discovery board. Also shows game of life if started with the user button pressed.

Dependencies:   STM32F3-Discovery

Revision:
0:404dae88af71
Child:
1:1b37c4b989b4
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/conway.c	Tue Mar 01 02:40:19 2016 +0000
@@ -0,0 +1,175 @@
+#include "stm32f30x.h"
+#include "video.h"
+
+typedef unsigned Unit;
+
+#define cols 416
+#define rows 300
+#define bits 32
+
+extern char *fb[VID_VSIZE];
+
+typedef struct {
+  Unit sum;
+  Unit carry;
+} AddResult;
+
+static void half_add(AddResult *c,Unit a, Unit b) {
+  c->sum=a^b;
+  c->carry=a&b;
+}
+
+static void full_add(AddResult *d, Unit a, Unit b, Unit c) {
+  AddResult r0,r1;
+  half_add(&r0,a, b);
+  half_add(&r1,r0.sum, c);
+  d->sum=r1.sum;
+  d->carry=r0.carry | r1.carry;
+}
+
+static Unit col_step(Unit above[3],
+                     Unit current[3],
+                     Unit below[3]) {
+  AddResult a_inf,b_inf,c_inf,next0,next1a,next1b;
+  /*
+   * Compute row-wise influence sums.  This produces 96 2-bit sums (represented
+   * as three pairs of 32-vectors) giving the number of live cells in the 1D
+   * Moore neighborhood around each position.
+   */
+  full_add(&a_inf,(above[1] << 1) | (above[0] >> (bits - 1)),
+                             above[1],
+                             (above[1] >> 1) | (above[2] << (bits - 1)));
+  half_add(&c_inf,(current[1] << 1) | (current[0] >> (bits - 1)),
+                             /* middle bits of current[1] don't count */
+                             (current[1] >> 1) | (current[2] << (bits - 1)));
+  full_add(&b_inf,(below[1] << 1) | (below[0] >> (bits - 1)),
+                             below[1],
+                             (below[1] >> 1) | (below[2] << (bits - 1)));
+
+  /*
+   * Sum the row-wise sums into a two-dimensional Moore neighborhood population
+   * count.  Such a count can overflow into four bits, but we don't care: Conway
+   * has the same result for 8/9 and 0/1 (the cell is cleared in both cases).
+   *
+   * Thus, we don't need a four-bit addition.  Instead, we just retain the
+   * carry output from the two intermediate additions and use it as a mask.
+   */
+  full_add(&next0,a_inf.sum, c_inf.sum, b_inf.sum);
+  full_add(&next1a,a_inf.carry, next0.carry, b_inf.carry);
+  half_add(&next1b,c_inf.carry, next1a.sum);
+
+  /*
+   * Apply Niemiec's optimization: OR the current cell state vector into the
+   * 9-cell neighborhoold population count to derive the new state cheaply.  The
+   * cell is set iff its three-bit sum is 0b011.
+   */
+  return (next0.sum | current[1])
+       & next1b.sum
+       & ~next1a.carry
+       & ~next1b.carry;
+}
+
+
+
+static void step(Unit const *current_map,
+                 Unit *next_map,
+                 Unit width,
+                 Unit height) {
+  // We keep sliding windows of state in these arrays.
+  Unit above[3]={ 0, 0, 0 };
+  Unit current[3]={ 0, 0, 0 };
+  Unit below[3]={ 0, 0, 0 };
+  unsigned x,y;
+
+  // Bootstrap for first column of first row.
+  current[0] = current[1] = 0;
+  current[2] = current_map[0];
+
+  below[0] = below[1] = 0;
+  below[2] = current_map[width];
+
+  #define ADV(name, next) \
+    name[0] = name[1]; \
+    name[1] = name[2]; \
+    name[2] = (next)
+
+  // First row, wherein above[x] = 0, less final column
+  for (x = 0; x < width - 1; ++x) {
+    ADV(current, current_map[x + 1]);
+    ADV(below,   current_map[width + x + 1]);
+    next_map[x] = col_step(above, current, below);
+  }
+
+
+  // Final column of first row, wherein we cannot fetch next values.
+  ADV(current, 0);
+  ADV(below, 0);
+  next_map[width - 1] = col_step(above, current, below);
+
+  // Remaining rows except the last.
+  for (y = 1; y < height - 1; ++y) {
+    unsigned offset = y * width;
+
+    // Bootstrap row like we did for row 1.
+    above[0] = above[1] = 0;
+    current[0] = current[1] = 0;
+    below[0] = below[1] = 0;
+
+    above[2] = current_map[offset - width];
+    current[2] = current_map[offset];
+    below[2] = current_map[offset + width];
+
+    for (x = 0; x < width - 1; ++x) {
+      ADV(above, current_map[offset - width + x + 1]);
+      ADV(current, current_map[offset + x + 1]);
+      ADV(below, current_map[offset + width + x + 1]);
+      next_map[offset + x] = col_step(above, current, below);
+    }
+
+    // Last column.
+    ADV(above, 0);
+    ADV(current, 0);
+    ADV(below, 0);
+    next_map[offset + width - 1] = col_step(above, current, below);
+  }
+
+  // Final row, wherein below[x] = 0.
+  unsigned offset = width * (height - 1);
+  above[0] = above[1] = 0;
+  current[0] = current[1] = 0;
+  below[0] = below[1] = below[2] = 0;
+
+  above[2] = current_map[offset - width];
+  current[2] = current_map[offset];
+
+  for (x = 0; x < width - 1; ++x) {
+    ADV(above, current_map[offset - width + x + 1]);
+    ADV(current, current_map[offset + x + 1]);
+    next_map[offset + x] = col_step(above, current, below);
+  }
+
+  // Final column
+  ADV(above, 0);
+  ADV(current, 0);
+  next_map[offset + width - 1] = col_step(above, current, below);
+
+  #undef ADV
+}
+
+extern unsigned fboffset;
+
+void conway_demo() {
+	Unit *current=(Unit *)fb[0];
+	Unit *next=(Unit *)fb[rows];
+	SPI1->CR1 |= SPI_FirstBit_LSB;
+
+	while(1) {
+		step(current,next,cols/bits,rows);
+		fboffset=300*52;
+		sysDelayMs(1);
+		step(next,current,cols/bits,rows);
+		fboffset=0;
+		sysDelayMs(1);
+	}
+}
+