Program to record speech audio into RAM and then play it back, moving Billy Bass's mouth in sync with the speech.

Dependencies:   mbed

Remember Big Mouth Billy Bass?

I've made a simple demo program for him using the Freescale FRDM-KL25Z board. I've hooked up the digital I/O to his motor driver transistors and pushbutton switch.

This program records 1.8 seconds of speech audio from ADC input when the pushbutton is pressed, then plays the audio back with Billy Bass's mouth controlled so that it opens during vowel sounds.

The ADC input is driven from a microphone and preamplifier, via a capacitor and into a resistor divider connected to the +3.3V supply pin to provide mid-range biasing for the ADC signals.

The DAC output is connected to his audio amplifier input (to the trace that was connected to pin 10 of the controller IC). I had to provide a DC bias using the DAC to get the single transistor amplifier biased into proper operation.

For more on the method of vowel recognition, please see the paper: http://www.mirlab.org/conference_papers/International_Conference/ICASSP%201999/PDF/AUTHOR/IC991957.PDF

Y. Nishida, Y. Nakadai, Y. Suzuki, T. Sakurai, T. Kurokawa, and H. Sato. 1999.

Voice recognition focusing on vowel strings on a fixed-point 20-MIPS DSP board.

In Proceedings of the Acoustics, Speech, and Signal Processing, 1999. on 1999 IEEE International Conference - Volume 01 (ICASSP '99), Vol. 1. IEEE Computer Society, Washington, DC, USA, 137-140. DOI=10.1109/ICASSP.1999.758081 http://dx.doi.org/10.1109/ICASSP.1999.758081

Committer:
bikeNomad
Date:
Wed May 15 23:36:12 2013 +0000
Revision:
7:f0e5450449cb
Parent:
6:0217f58bff36
turn DAC off after playing

Who changed what in which revision?

UserRevisionLine numberNew contents of line
bikeNomad 0:1ddd40d843cb 1 #include "mbed.h"
bikeNomad 1:2fa375aacece 2 #include "FastAnalogIn.h"
bikeNomad 3:c04d8d0493f4 3 #include "AudioAnalyzer.h"
bikeNomad 3:c04d8d0493f4 4 extern "C" {
bikeNomad 3:c04d8d0493f4 5 #include <math.h>
bikeNomad 3:c04d8d0493f4 6 }
bikeNomad 3:c04d8d0493f4 7
bikeNomad 1:2fa375aacece 8 using namespace NK;
bikeNomad 1:2fa375aacece 9
bikeNomad 1:2fa375aacece 10 // Power:
bikeNomad 1:2fa375aacece 11 // Power GND J9/14
bikeNomad 1:2fa375aacece 12 // Vin (6V) J9/16
bikeNomad 0:1ddd40d843cb 13
bikeNomad 1:2fa375aacece 14 // Digital:
bikeNomad 1:2fa375aacece 15 DigitalOut tail(PTA13); // J3/2
bikeNomad 1:2fa375aacece 16 DigitalOut mouth(PTC12); // J3/1
bikeNomad 1:2fa375aacece 17 DigitalOut head(PTC13); // J3/3
bikeNomad 1:2fa375aacece 18 DigitalIn pushbutton(PTD5); // J3/4
bikeNomad 1:2fa375aacece 19
bikeNomad 1:2fa375aacece 20 PwmOut redLED(LED_RED);
bikeNomad 1:2fa375aacece 21 PwmOut greenLED(LED_GREEN);
bikeNomad 1:2fa375aacece 22 PwmOut blueLED(LED_BLUE);
bikeNomad 0:1ddd40d843cb 23
bikeNomad 1:2fa375aacece 24 // Analog:
bikeNomad 1:2fa375aacece 25 // GND J3/14
bikeNomad 1:2fa375aacece 26 // VrefH J3/16
bikeNomad 1:2fa375aacece 27 FastAnalogIn microphone(PTB0); // J10/2
bikeNomad 1:2fa375aacece 28 AnalogOut speaker(PTE30); // J10/11
bikeNomad 1:2fa375aacece 29
bikeNomad 1:2fa375aacece 30 // Communications:
bikeNomad 1:2fa375aacece 31 // Serial uart1(PTC4, PTC3);
bikeNomad 1:2fa375aacece 32 Serial pc(USBTX, USBRX);
bikeNomad 1:2fa375aacece 33
bikeNomad 2:5bcd2f55a294 34 const unsigned SAMPLE_RATE_HZ = 7889;
bikeNomad 4:c989412b91ea 35 const unsigned SAMPLE_PERIOD_US = (1000000U / SAMPLE_RATE_HZ);
bikeNomad 5:9f4ffb2b0e6b 36 const unsigned SAMPLE_BUFFER_SIZE = 14000;
bikeNomad 4:c989412b91ea 37 const unsigned CHUNK_DURATION_MS = 80;
bikeNomad 4:c989412b91ea 38 const unsigned CHUNK_SIZE = SAMPLE_RATE_HZ * CHUNK_DURATION_MS / 1000;
bikeNomad 4:c989412b91ea 39 const unsigned NUM_CHUNKS = SAMPLE_BUFFER_SIZE / CHUNK_SIZE;
bikeNomad 1:2fa375aacece 40
bikeNomad 1:2fa375aacece 41 Ticker sampleTicker;
bikeNomad 2:5bcd2f55a294 42 Timer timer;
bikeNomad 1:2fa375aacece 43
bikeNomad 4:c989412b91ea 44 // audio samples
bikeNomad 3:c04d8d0493f4 45 int8_t sampleBuffer[SAMPLE_BUFFER_SIZE]; // 1 second buffer
bikeNomad 3:c04d8d0493f4 46 int8_t * volatile nextSample;
bikeNomad 3:c04d8d0493f4 47 uint16_t volatile samplesRemaining;
bikeNomad 1:2fa375aacece 48
bikeNomad 4:c989412b91ea 49 // vowel decisions
bikeNomad 4:c989412b91ea 50 bool vowels[ NUM_CHUNKS ];
bikeNomad 4:c989412b91ea 51
bikeNomad 1:2fa375aacece 52 extern "C"
bikeNomad 1:2fa375aacece 53 void ADC0_IRQHandler(void)
bikeNomad 0:1ddd40d843cb 54 {
bikeNomad 1:2fa375aacece 55 if (samplesRemaining) {
bikeNomad 3:c04d8d0493f4 56 *nextSample++ = microphone.read_s8_nowait();
bikeNomad 2:5bcd2f55a294 57 microphone.start_read();
bikeNomad 1:2fa375aacece 58 samplesRemaining--;
bikeNomad 1:2fa375aacece 59 } else {
bikeNomad 1:2fa375aacece 60 microphone.disable_interrupt();
bikeNomad 2:5bcd2f55a294 61 microphone.abort_read();
bikeNomad 2:5bcd2f55a294 62 timer.stop();
bikeNomad 0:1ddd40d843cb 63 }
bikeNomad 0:1ddd40d843cb 64 }
bikeNomad 0:1ddd40d843cb 65
bikeNomad 1:2fa375aacece 66 void playAudioSample()
bikeNomad 1:2fa375aacece 67 {
bikeNomad 6:0217f58bff36 68 static uint16_t dcBias = 0x4000;
bikeNomad 1:2fa375aacece 69 if (samplesRemaining) {
bikeNomad 3:c04d8d0493f4 70 int8_t val = *nextSample++;
bikeNomad 6:0217f58bff36 71 uint16_t val16 = dcBias + (val * 256);
bikeNomad 6:0217f58bff36 72 speaker.write_u16(val16);
bikeNomad 1:2fa375aacece 73 samplesRemaining--;
bikeNomad 1:2fa375aacece 74 } else {
bikeNomad 1:2fa375aacece 75 sampleTicker.detach();
bikeNomad 2:5bcd2f55a294 76 timer.stop();
bikeNomad 1:2fa375aacece 77 }
bikeNomad 1:2fa375aacece 78 }
bikeNomad 1:2fa375aacece 79
bikeNomad 3:c04d8d0493f4 80 void resetSampleBuffer(int8_t *start=sampleBuffer, uint16_t nsamples=SAMPLE_BUFFER_SIZE)
bikeNomad 1:2fa375aacece 81 {
bikeNomad 3:c04d8d0493f4 82 nextSample = start;
bikeNomad 3:c04d8d0493f4 83 samplesRemaining = nsamples;
bikeNomad 1:2fa375aacece 84 }
bikeNomad 1:2fa375aacece 85
bikeNomad 1:2fa375aacece 86 void recordAudio()
bikeNomad 1:2fa375aacece 87 {
bikeNomad 2:5bcd2f55a294 88 pc.printf("Recording %d samples... ", SAMPLE_BUFFER_SIZE);
bikeNomad 1:2fa375aacece 89 blueLED = 0.0;
bikeNomad 1:2fa375aacece 90
bikeNomad 1:2fa375aacece 91 resetSampleBuffer();
bikeNomad 3:c04d8d0493f4 92 timer.reset();
bikeNomad 2:5bcd2f55a294 93 timer.start();
bikeNomad 1:2fa375aacece 94 microphone.enable_interrupt();
bikeNomad 2:5bcd2f55a294 95 microphone.start_read();
bikeNomad 1:2fa375aacece 96
bikeNomad 1:2fa375aacece 97 while (samplesRemaining) {
bikeNomad 2:5bcd2f55a294 98 wait_us(50000);
bikeNomad 2:5bcd2f55a294 99 blueLED.write(1.0 - (1.0 * samplesRemaining / SAMPLE_BUFFER_SIZE));
bikeNomad 1:2fa375aacece 100 }
bikeNomad 1:2fa375aacece 101
bikeNomad 3:c04d8d0493f4 102 microphone.abort_read();
bikeNomad 3:c04d8d0493f4 103
bikeNomad 2:5bcd2f55a294 104 float elapsed = timer.read();
bikeNomad 2:5bcd2f55a294 105 pc.printf("Done. %u samples in %f usec = %f samples/sec\r\n", SAMPLE_BUFFER_SIZE, elapsed * 1.0e6, SAMPLE_BUFFER_SIZE / elapsed);
bikeNomad 1:2fa375aacece 106 }
bikeNomad 1:2fa375aacece 107
bikeNomad 4:c989412b91ea 108 void playAudio(unsigned duration_ms, int8_t *start=sampleBuffer, uint16_t nsamples=SAMPLE_BUFFER_SIZE)
bikeNomad 1:2fa375aacece 109 {
bikeNomad 3:c04d8d0493f4 110 resetSampleBuffer(start, nsamples);
bikeNomad 2:5bcd2f55a294 111 timer.reset();
bikeNomad 2:5bcd2f55a294 112 timer.start();
bikeNomad 4:c989412b91ea 113 sampleTicker.attach_us(&playAudioSample, duration_ms*1000/nsamples);
bikeNomad 1:2fa375aacece 114 while (samplesRemaining) {
bikeNomad 4:c989412b91ea 115 wait_us(CHUNK_DURATION_MS * 1000);
bikeNomad 3:c04d8d0493f4 116 }
bikeNomad 7:f0e5450449cb 117 speaker.write_u16(0);
bikeNomad 3:c04d8d0493f4 118 }
bikeNomad 3:c04d8d0493f4 119
bikeNomad 3:c04d8d0493f4 120 void audioTest()
bikeNomad 3:c04d8d0493f4 121 {
bikeNomad 3:c04d8d0493f4 122 double phase = 0.0;
bikeNomad 3:c04d8d0493f4 123 resetSampleBuffer();
bikeNomad 3:c04d8d0493f4 124 for (int8_t *p = sampleBuffer; p < sampleBuffer + SAMPLE_BUFFER_SIZE; p++) {
bikeNomad 3:c04d8d0493f4 125 double s = sin(phase) * 125.0;
bikeNomad 3:c04d8d0493f4 126 phase += 2000 * 3.1416 / SAMPLE_BUFFER_SIZE;
bikeNomad 3:c04d8d0493f4 127 *p = static_cast<int8_t>(s);
bikeNomad 3:c04d8d0493f4 128 }
bikeNomad 3:c04d8d0493f4 129 }
bikeNomad 3:c04d8d0493f4 130
bikeNomad 3:c04d8d0493f4 131 // returns true if chunk was louder than minimum
bikeNomad 4:c989412b91ea 132 bool analyzeChunk(int8_t *chunkStart, uint16_t CHUNK_SIZE, float powerRef, bool *pisvowel = 0)
bikeNomad 3:c04d8d0493f4 133 {
bikeNomad 4:c989412b91ea 134 AudioAnalyzer analyzer(chunkStart, CHUNK_SIZE);
bikeNomad 3:c04d8d0493f4 135 uint32_t power = analyzer.getPower();
bikeNomad 3:c04d8d0493f4 136 uint16_t zcs = analyzer.getZeroCrossings();
bikeNomad 3:c04d8d0493f4 137 int8_t min, max;
bikeNomad 3:c04d8d0493f4 138 analyzer.getMinMaxValues(&min, &max);
bikeNomad 4:c989412b91ea 139 analyzer.setPowerRef(powerRef);
bikeNomad 4:c989412b91ea 140 float logPower = analyzer.getLogPower();
bikeNomad 4:c989412b91ea 141 float zcRatio = analyzer.getZeroCrossingRatioPercent();
bikeNomad 4:c989412b91ea 142 pc.printf("%.2f\t%.2f\t%.2f\t%d\t%d\t%d\t", zcRatio, logPower, zcRatio / (logPower - AudioAnalyzer::VowelXIntercept), min, max, analyzer.isVowel());
bikeNomad 4:c989412b91ea 143 if (pisvowel)
bikeNomad 4:c989412b91ea 144 *pisvowel = analyzer.isVowel();
bikeNomad 4:c989412b91ea 145 return analyzer.isVoiced();
bikeNomad 3:c04d8d0493f4 146 }
bikeNomad 3:c04d8d0493f4 147
bikeNomad 3:c04d8d0493f4 148 void analyze(bool playToo = false)
bikeNomad 3:c04d8d0493f4 149 {
bikeNomad 3:c04d8d0493f4 150 int8_t *chunkStart = sampleBuffer;
bikeNomad 3:c04d8d0493f4 151 AudioAnalyzer analyzer(sampleBuffer, SAMPLE_BUFFER_SIZE);
bikeNomad 3:c04d8d0493f4 152 uint32_t power = analyzer.getPower();
bikeNomad 3:c04d8d0493f4 153 float powerRef = ::log((double)power);
bikeNomad 3:c04d8d0493f4 154 pc.printf("Reference power = %.2f\r\n", powerRef);
bikeNomad 4:c989412b91ea 155 pc.printf("Analyzing %d chunks of %d samples (%.2f seconds):\r\n", NUM_CHUNKS, CHUNK_SIZE, CHUNK_DURATION_MS);
bikeNomad 4:c989412b91ea 156 pc.printf("chunk\tstartms\tzcratio\tlogp\tmaxs\tmin\tmax\tisVowel\tvowel\r\n");
bikeNomad 4:c989412b91ea 157 for (uint16_t chunk = 0; chunk < NUM_CHUNKS; chunk++) {
bikeNomad 4:c989412b91ea 158 pc.printf("%u\t%u\t", chunk, chunk * CHUNK_DURATION_MS);
bikeNomad 4:c989412b91ea 159 bool loudEnough = analyzeChunk(chunkStart, CHUNK_SIZE, powerRef, &vowels[chunk]);
bikeNomad 3:c04d8d0493f4 160 if (loudEnough) {
bikeNomad 3:c04d8d0493f4 161 if (playToo) {
bikeNomad 3:c04d8d0493f4 162 while (! pc.readable())
bikeNomad 4:c989412b91ea 163 playAudio(CHUNK_DURATION_MS, chunkStart, CHUNK_SIZE);
bikeNomad 3:c04d8d0493f4 164 int c = pc.getc();
bikeNomad 3:c04d8d0493f4 165 pc.putc(c);
bikeNomad 3:c04d8d0493f4 166 } else
bikeNomad 3:c04d8d0493f4 167 pc.puts("-");
bikeNomad 3:c04d8d0493f4 168 }
bikeNomad 3:c04d8d0493f4 169 pc.puts("\r\n");
bikeNomad 4:c989412b91ea 170 chunkStart += CHUNK_SIZE;
bikeNomad 4:c989412b91ea 171 }
bikeNomad 4:c989412b91ea 172 }
bikeNomad 4:c989412b91ea 173
bikeNomad 4:c989412b91ea 174 // assumes that vowels[] has been set by analyze
bikeNomad 4:c989412b91ea 175 void playWithBilly()
bikeNomad 4:c989412b91ea 176 {
bikeNomad 4:c989412b91ea 177 int8_t *chunkStart = sampleBuffer;
bikeNomad 6:0217f58bff36 178 head = true;
bikeNomad 6:0217f58bff36 179 wait(0.2);
bikeNomad 4:c989412b91ea 180 for (uint16_t chunk = 0; chunk < NUM_CHUNKS; chunk++) {
bikeNomad 5:9f4ffb2b0e6b 181 if (vowels[chunk]) {
bikeNomad 5:9f4ffb2b0e6b 182 greenLED = 0.0;
bikeNomad 5:9f4ffb2b0e6b 183 mouth = true;
bikeNomad 5:9f4ffb2b0e6b 184 } else {
bikeNomad 5:9f4ffb2b0e6b 185 greenLED = 1.0;
bikeNomad 5:9f4ffb2b0e6b 186 mouth = false;
bikeNomad 5:9f4ffb2b0e6b 187 }
bikeNomad 4:c989412b91ea 188 playAudio(CHUNK_DURATION_MS, chunkStart, CHUNK_SIZE);
bikeNomad 4:c989412b91ea 189 chunkStart += CHUNK_SIZE;
bikeNomad 3:c04d8d0493f4 190 }
bikeNomad 6:0217f58bff36 191 tail = true;
bikeNomad 6:0217f58bff36 192 wait(0.2);
bikeNomad 6:0217f58bff36 193 tail = false;
bikeNomad 6:0217f58bff36 194 wait(0.1);
bikeNomad 6:0217f58bff36 195 head = false;
bikeNomad 3:c04d8d0493f4 196 }
bikeNomad 3:c04d8d0493f4 197
bikeNomad 3:c04d8d0493f4 198 void dumpAudio(int8_t *start=sampleBuffer, uint16_t nsamples=SAMPLE_BUFFER_SIZE)
bikeNomad 3:c04d8d0493f4 199 {
bikeNomad 3:c04d8d0493f4 200 for (int8_t *p = start; p < sampleBuffer + nsamples; p++) {
bikeNomad 3:c04d8d0493f4 201 pc.printf("%d\r\n", *p);
bikeNomad 3:c04d8d0493f4 202 }
bikeNomad 2:5bcd2f55a294 203 }
bikeNomad 1:2fa375aacece 204
bikeNomad 0:1ddd40d843cb 205 int main()
bikeNomad 0:1ddd40d843cb 206 {
bikeNomad 1:2fa375aacece 207 pc.baud(115200);
bikeNomad 2:5bcd2f55a294 208 pc.printf("\r\n\r\nSample buffer = %u samples; rate = %u Hz; period = %u usec\r\n", SAMPLE_BUFFER_SIZE, SAMPLE_RATE_HZ, SAMPLE_PERIOD_US);
bikeNomad 6:0217f58bff36 209 pushbutton.mode(PullUp);
bikeNomad 6:0217f58bff36 210
bikeNomad 3:c04d8d0493f4 211 for (;;) {
bikeNomad 3:c04d8d0493f4 212 redLED = 1.0;
bikeNomad 3:c04d8d0493f4 213 greenLED = 1.0;
bikeNomad 3:c04d8d0493f4 214 blueLED = 1.0;
bikeNomad 3:c04d8d0493f4 215
bikeNomad 6:0217f58bff36 216 #if 0
bikeNomad 3:c04d8d0493f4 217 pc.puts("ENTER when ready:");
bikeNomad 3:c04d8d0493f4 218 pc.getc();
bikeNomad 3:c04d8d0493f4 219 pc.puts("\r\n");
bikeNomad 6:0217f58bff36 220 #endif
bikeNomad 3:c04d8d0493f4 221 #if 0
bikeNomad 3:c04d8d0493f4 222 audioTest();
bikeNomad 4:c989412b91ea 223 playAudio(1000);
bikeNomad 3:c04d8d0493f4 224 analyze();
bikeNomad 3:c04d8d0493f4 225 #endif
bikeNomad 1:2fa375aacece 226
bikeNomad 6:0217f58bff36 227 while (pushbutton.read())
bikeNomad 6:0217f58bff36 228 wait(0.1);
bikeNomad 6:0217f58bff36 229
bikeNomad 3:c04d8d0493f4 230 recordAudio();
bikeNomad 3:c04d8d0493f4 231 float duration = timer.read();
bikeNomad 4:c989412b91ea 232 // playAudio(duration * 1000);
bikeNomad 3:c04d8d0493f4 233 float elapsed = timer.read();
bikeNomad 3:c04d8d0493f4 234 pc.printf("Done. %u samples in %f usec = %f samples/sec", SAMPLE_BUFFER_SIZE, elapsed * 1.0e6, SAMPLE_BUFFER_SIZE / elapsed);
bikeNomad 3:c04d8d0493f4 235 pc.printf(" (Rate %#+0.2f%%)\r\n", (duration-elapsed)*100/duration);
bikeNomad 4:c989412b91ea 236 analyze(false);
bikeNomad 3:c04d8d0493f4 237 // dumpAudio();
bikeNomad 4:c989412b91ea 238
bikeNomad 4:c989412b91ea 239 playWithBilly();
bikeNomad 3:c04d8d0493f4 240 }
bikeNomad 0:1ddd40d843cb 241 }