Port of Artekit space invaders demo (http://www.artekit.eu/space-invaders-for-stm32/) for the STM32F3 Discovery board. Also shows game of life if started with the user button pressed.
Dependencies: STM32F3-Discovery
Diff: conway.c
- Revision:
- 0:404dae88af71
- Child:
- 1:1b37c4b989b4
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/conway.c Tue Mar 01 02:40:19 2016 +0000 @@ -0,0 +1,175 @@ +#include "stm32f30x.h" +#include "video.h" + +typedef unsigned Unit; + +#define cols 416 +#define rows 300 +#define bits 32 + +extern char *fb[VID_VSIZE]; + +typedef struct { + Unit sum; + Unit carry; +} AddResult; + +static void half_add(AddResult *c,Unit a, Unit b) { + c->sum=a^b; + c->carry=a&b; +} + +static void full_add(AddResult *d, Unit a, Unit b, Unit c) { + AddResult r0,r1; + half_add(&r0,a, b); + half_add(&r1,r0.sum, c); + d->sum=r1.sum; + d->carry=r0.carry | r1.carry; +} + +static Unit col_step(Unit above[3], + Unit current[3], + Unit below[3]) { + AddResult a_inf,b_inf,c_inf,next0,next1a,next1b; + /* + * Compute row-wise influence sums. This produces 96 2-bit sums (represented + * as three pairs of 32-vectors) giving the number of live cells in the 1D + * Moore neighborhood around each position. + */ + full_add(&a_inf,(above[1] << 1) | (above[0] >> (bits - 1)), + above[1], + (above[1] >> 1) | (above[2] << (bits - 1))); + half_add(&c_inf,(current[1] << 1) | (current[0] >> (bits - 1)), + /* middle bits of current[1] don't count */ + (current[1] >> 1) | (current[2] << (bits - 1))); + full_add(&b_inf,(below[1] << 1) | (below[0] >> (bits - 1)), + below[1], + (below[1] >> 1) | (below[2] << (bits - 1))); + + /* + * Sum the row-wise sums into a two-dimensional Moore neighborhood population + * count. Such a count can overflow into four bits, but we don't care: Conway + * has the same result for 8/9 and 0/1 (the cell is cleared in both cases). + * + * Thus, we don't need a four-bit addition. Instead, we just retain the + * carry output from the two intermediate additions and use it as a mask. + */ + full_add(&next0,a_inf.sum, c_inf.sum, b_inf.sum); + full_add(&next1a,a_inf.carry, next0.carry, b_inf.carry); + half_add(&next1b,c_inf.carry, next1a.sum); + + /* + * Apply Niemiec's optimization: OR the current cell state vector into the + * 9-cell neighborhoold population count to derive the new state cheaply. The + * cell is set iff its three-bit sum is 0b011. + */ + return (next0.sum | current[1]) + & next1b.sum + & ~next1a.carry + & ~next1b.carry; +} + + + +static void step(Unit const *current_map, + Unit *next_map, + Unit width, + Unit height) { + // We keep sliding windows of state in these arrays. + Unit above[3]={ 0, 0, 0 }; + Unit current[3]={ 0, 0, 0 }; + Unit below[3]={ 0, 0, 0 }; + unsigned x,y; + + // Bootstrap for first column of first row. + current[0] = current[1] = 0; + current[2] = current_map[0]; + + below[0] = below[1] = 0; + below[2] = current_map[width]; + + #define ADV(name, next) \ + name[0] = name[1]; \ + name[1] = name[2]; \ + name[2] = (next) + + // First row, wherein above[x] = 0, less final column + for (x = 0; x < width - 1; ++x) { + ADV(current, current_map[x + 1]); + ADV(below, current_map[width + x + 1]); + next_map[x] = col_step(above, current, below); + } + + + // Final column of first row, wherein we cannot fetch next values. + ADV(current, 0); + ADV(below, 0); + next_map[width - 1] = col_step(above, current, below); + + // Remaining rows except the last. + for (y = 1; y < height - 1; ++y) { + unsigned offset = y * width; + + // Bootstrap row like we did for row 1. + above[0] = above[1] = 0; + current[0] = current[1] = 0; + below[0] = below[1] = 0; + + above[2] = current_map[offset - width]; + current[2] = current_map[offset]; + below[2] = current_map[offset + width]; + + for (x = 0; x < width - 1; ++x) { + ADV(above, current_map[offset - width + x + 1]); + ADV(current, current_map[offset + x + 1]); + ADV(below, current_map[offset + width + x + 1]); + next_map[offset + x] = col_step(above, current, below); + } + + // Last column. + ADV(above, 0); + ADV(current, 0); + ADV(below, 0); + next_map[offset + width - 1] = col_step(above, current, below); + } + + // Final row, wherein below[x] = 0. + unsigned offset = width * (height - 1); + above[0] = above[1] = 0; + current[0] = current[1] = 0; + below[0] = below[1] = below[2] = 0; + + above[2] = current_map[offset - width]; + current[2] = current_map[offset]; + + for (x = 0; x < width - 1; ++x) { + ADV(above, current_map[offset - width + x + 1]); + ADV(current, current_map[offset + x + 1]); + next_map[offset + x] = col_step(above, current, below); + } + + // Final column + ADV(above, 0); + ADV(current, 0); + next_map[offset + width - 1] = col_step(above, current, below); + + #undef ADV +} + +extern unsigned fboffset; + +void conway_demo() { + Unit *current=(Unit *)fb[0]; + Unit *next=(Unit *)fb[rows]; + SPI1->CR1 |= SPI_FirstBit_LSB; + + while(1) { + step(current,next,cols/bits,rows); + fboffset=300*52; + sysDelayMs(1); + step(next,current,cols/bits,rows); + fboffset=0; + sysDelayMs(1); + } +} +