Program to record speech audio into RAM and then play it back, moving Billy Bass's mouth in sync with the speech.

Dependencies:   mbed

Remember Big Mouth Billy Bass?

I've made a simple demo program for him using the Freescale FRDM-KL25Z board. I've hooked up the digital I/O to his motor driver transistors and pushbutton switch.

This program records 1.8 seconds of speech audio from ADC input when the pushbutton is pressed, then plays the audio back with Billy Bass's mouth controlled so that it opens during vowel sounds.

The ADC input is driven from a microphone and preamplifier, via a capacitor and into a resistor divider connected to the +3.3V supply pin to provide mid-range biasing for the ADC signals.

The DAC output is connected to his audio amplifier input (to the trace that was connected to pin 10 of the controller IC). I had to provide a DC bias using the DAC to get the single transistor amplifier biased into proper operation.

For more on the method of vowel recognition, please see the paper: http://www.mirlab.org/conference_papers/International_Conference/ICASSP%201999/PDF/AUTHOR/IC991957.PDF

Y. Nishida, Y. Nakadai, Y. Suzuki, T. Sakurai, T. Kurokawa, and H. Sato. 1999.

Voice recognition focusing on vowel strings on a fixed-point 20-MIPS DSP board.

In Proceedings of the Acoustics, Speech, and Signal Processing, 1999. on 1999 IEEE International Conference - Volume 01 (ICASSP '99), Vol. 1. IEEE Computer Society, Washington, DC, USA, 137-140. DOI=10.1109/ICASSP.1999.758081 http://dx.doi.org/10.1109/ICASSP.1999.758081

Committer:
bikeNomad
Date:
Wed May 15 15:32:34 2013 +0000
Revision:
3:c04d8d0493f4
Parent:
2:5bcd2f55a294
Child:
4:c989412b91ea
Got audio sampling and analysis working

Who changed what in which revision?

UserRevisionLine numberNew contents of line
bikeNomad 0:1ddd40d843cb 1 #include "mbed.h"
bikeNomad 1:2fa375aacece 2 #include "FastAnalogIn.h"
bikeNomad 3:c04d8d0493f4 3 #include "AudioAnalyzer.h"
bikeNomad 3:c04d8d0493f4 4 extern "C" {
bikeNomad 3:c04d8d0493f4 5 #include <math.h>
bikeNomad 3:c04d8d0493f4 6 }
bikeNomad 3:c04d8d0493f4 7
bikeNomad 1:2fa375aacece 8 using namespace NK;
bikeNomad 1:2fa375aacece 9
bikeNomad 1:2fa375aacece 10 // Power:
bikeNomad 1:2fa375aacece 11 // Power GND J9/14
bikeNomad 1:2fa375aacece 12 // Vin (6V) J9/16
bikeNomad 0:1ddd40d843cb 13
bikeNomad 1:2fa375aacece 14 // Digital:
bikeNomad 1:2fa375aacece 15 DigitalOut tail(PTA13); // J3/2
bikeNomad 1:2fa375aacece 16 DigitalOut mouth(PTC12); // J3/1
bikeNomad 1:2fa375aacece 17 DigitalOut head(PTC13); // J3/3
bikeNomad 1:2fa375aacece 18 DigitalIn pushbutton(PTD5); // J3/4
bikeNomad 1:2fa375aacece 19
bikeNomad 1:2fa375aacece 20 PwmOut redLED(LED_RED);
bikeNomad 1:2fa375aacece 21 PwmOut greenLED(LED_GREEN);
bikeNomad 1:2fa375aacece 22 PwmOut blueLED(LED_BLUE);
bikeNomad 0:1ddd40d843cb 23
bikeNomad 1:2fa375aacece 24 // Analog:
bikeNomad 1:2fa375aacece 25 // GND J3/14
bikeNomad 1:2fa375aacece 26 // VrefH J3/16
bikeNomad 1:2fa375aacece 27 FastAnalogIn microphone(PTB0); // J10/2
bikeNomad 1:2fa375aacece 28 AnalogOut speaker(PTE30); // J10/11
bikeNomad 1:2fa375aacece 29
bikeNomad 1:2fa375aacece 30 // Communications:
bikeNomad 1:2fa375aacece 31 // Serial uart1(PTC4, PTC3);
bikeNomad 1:2fa375aacece 32 Serial pc(USBTX, USBRX);
bikeNomad 1:2fa375aacece 33
bikeNomad 2:5bcd2f55a294 34 const unsigned SAMPLE_RATE_HZ = 7889;
bikeNomad 1:2fa375aacece 35 const unsigned SAMPLE_PERIOD_US = (1000000U / SAMPLE_RATE_HZ);
bikeNomad 2:5bcd2f55a294 36 const unsigned SAMPLE_BUFFER_SIZE = 9000;
bikeNomad 3:c04d8d0493f4 37 const float CHUNK_DURATION = 0.08;
bikeNomad 1:2fa375aacece 38
bikeNomad 1:2fa375aacece 39 Ticker sampleTicker;
bikeNomad 2:5bcd2f55a294 40 Timer timer;
bikeNomad 1:2fa375aacece 41
bikeNomad 3:c04d8d0493f4 42 int8_t sampleBuffer[SAMPLE_BUFFER_SIZE]; // 1 second buffer
bikeNomad 3:c04d8d0493f4 43 int8_t * volatile nextSample;
bikeNomad 3:c04d8d0493f4 44 uint16_t volatile samplesRemaining;
bikeNomad 1:2fa375aacece 45
bikeNomad 1:2fa375aacece 46 extern "C"
bikeNomad 1:2fa375aacece 47 void ADC0_IRQHandler(void)
bikeNomad 0:1ddd40d843cb 48 {
bikeNomad 1:2fa375aacece 49 if (samplesRemaining) {
bikeNomad 3:c04d8d0493f4 50 *nextSample++ = microphone.read_s8_nowait();
bikeNomad 2:5bcd2f55a294 51 microphone.start_read();
bikeNomad 1:2fa375aacece 52 samplesRemaining--;
bikeNomad 1:2fa375aacece 53 } else {
bikeNomad 1:2fa375aacece 54 microphone.disable_interrupt();
bikeNomad 2:5bcd2f55a294 55 microphone.abort_read();
bikeNomad 2:5bcd2f55a294 56 timer.stop();
bikeNomad 0:1ddd40d843cb 57 }
bikeNomad 0:1ddd40d843cb 58 }
bikeNomad 0:1ddd40d843cb 59
bikeNomad 1:2fa375aacece 60 void playAudioSample()
bikeNomad 1:2fa375aacece 61 {
bikeNomad 1:2fa375aacece 62 if (samplesRemaining) {
bikeNomad 3:c04d8d0493f4 63 int8_t val = *nextSample++;
bikeNomad 3:c04d8d0493f4 64 speaker.write_u16((val + 128) << 8);
bikeNomad 1:2fa375aacece 65 samplesRemaining--;
bikeNomad 1:2fa375aacece 66 } else {
bikeNomad 1:2fa375aacece 67 sampleTicker.detach();
bikeNomad 2:5bcd2f55a294 68 timer.stop();
bikeNomad 1:2fa375aacece 69 }
bikeNomad 1:2fa375aacece 70 }
bikeNomad 1:2fa375aacece 71
bikeNomad 3:c04d8d0493f4 72 void resetSampleBuffer(int8_t *start=sampleBuffer, uint16_t nsamples=SAMPLE_BUFFER_SIZE)
bikeNomad 1:2fa375aacece 73 {
bikeNomad 3:c04d8d0493f4 74 nextSample = start;
bikeNomad 3:c04d8d0493f4 75 samplesRemaining = nsamples;
bikeNomad 1:2fa375aacece 76 }
bikeNomad 1:2fa375aacece 77
bikeNomad 1:2fa375aacece 78 void recordAudio()
bikeNomad 1:2fa375aacece 79 {
bikeNomad 2:5bcd2f55a294 80 pc.printf("Recording %d samples... ", SAMPLE_BUFFER_SIZE);
bikeNomad 1:2fa375aacece 81 blueLED = 0.0;
bikeNomad 1:2fa375aacece 82
bikeNomad 1:2fa375aacece 83 resetSampleBuffer();
bikeNomad 3:c04d8d0493f4 84 timer.reset();
bikeNomad 2:5bcd2f55a294 85 timer.start();
bikeNomad 1:2fa375aacece 86 microphone.enable_interrupt();
bikeNomad 2:5bcd2f55a294 87 microphone.start_read();
bikeNomad 1:2fa375aacece 88
bikeNomad 1:2fa375aacece 89 while (samplesRemaining) {
bikeNomad 2:5bcd2f55a294 90 wait_us(50000);
bikeNomad 2:5bcd2f55a294 91 blueLED.write(1.0 - (1.0 * samplesRemaining / SAMPLE_BUFFER_SIZE));
bikeNomad 1:2fa375aacece 92 }
bikeNomad 1:2fa375aacece 93
bikeNomad 3:c04d8d0493f4 94 microphone.abort_read();
bikeNomad 3:c04d8d0493f4 95
bikeNomad 2:5bcd2f55a294 96 float elapsed = timer.read();
bikeNomad 2:5bcd2f55a294 97 pc.printf("Done. %u samples in %f usec = %f samples/sec\r\n", SAMPLE_BUFFER_SIZE, elapsed * 1.0e6, SAMPLE_BUFFER_SIZE / elapsed);
bikeNomad 1:2fa375aacece 98 }
bikeNomad 1:2fa375aacece 99
bikeNomad 3:c04d8d0493f4 100 void playAudio(float duration, int8_t *start=sampleBuffer, uint16_t nsamples=SAMPLE_BUFFER_SIZE)
bikeNomad 1:2fa375aacece 101 {
bikeNomad 1:2fa375aacece 102 greenLED = 0.0;
bikeNomad 3:c04d8d0493f4 103 resetSampleBuffer(start, nsamples);
bikeNomad 2:5bcd2f55a294 104 timer.reset();
bikeNomad 2:5bcd2f55a294 105 timer.start();
bikeNomad 3:c04d8d0493f4 106 sampleTicker.attach(&playAudioSample, duration/nsamples);
bikeNomad 1:2fa375aacece 107 while (samplesRemaining) {
bikeNomad 2:5bcd2f55a294 108 wait_us(50000);
bikeNomad 3:c04d8d0493f4 109 greenLED.write(1.0 - (1.0 * samplesRemaining / nsamples));
bikeNomad 3:c04d8d0493f4 110 }
bikeNomad 3:c04d8d0493f4 111 }
bikeNomad 3:c04d8d0493f4 112
bikeNomad 3:c04d8d0493f4 113 void audioTest()
bikeNomad 3:c04d8d0493f4 114 {
bikeNomad 3:c04d8d0493f4 115 double phase = 0.0;
bikeNomad 3:c04d8d0493f4 116 resetSampleBuffer();
bikeNomad 3:c04d8d0493f4 117 for (int8_t *p = sampleBuffer; p < sampleBuffer + SAMPLE_BUFFER_SIZE; p++) {
bikeNomad 3:c04d8d0493f4 118 double s = sin(phase) * 125.0;
bikeNomad 3:c04d8d0493f4 119 phase += 2000 * 3.1416 / SAMPLE_BUFFER_SIZE;
bikeNomad 3:c04d8d0493f4 120 *p = static_cast<int8_t>(s);
bikeNomad 3:c04d8d0493f4 121 }
bikeNomad 3:c04d8d0493f4 122 }
bikeNomad 3:c04d8d0493f4 123
bikeNomad 3:c04d8d0493f4 124 // returns true if chunk was louder than minimum
bikeNomad 3:c04d8d0493f4 125 bool analyzeChunk(int8_t *chunkStart, uint16_t chunkSize, float powerRef)
bikeNomad 3:c04d8d0493f4 126 {
bikeNomad 3:c04d8d0493f4 127 AudioAnalyzer analyzer(chunkStart, chunkSize);
bikeNomad 3:c04d8d0493f4 128 uint32_t power = analyzer.getPower();
bikeNomad 3:c04d8d0493f4 129 uint16_t zcs = analyzer.getZeroCrossings();
bikeNomad 3:c04d8d0493f4 130 int8_t min, max;
bikeNomad 3:c04d8d0493f4 131 analyzer.getMinMaxValues(&min, &max);
bikeNomad 3:c04d8d0493f4 132 float logPower = ::log((double)power);
bikeNomad 3:c04d8d0493f4 133 if (isnan(logPower) || logPower < 1.0) {
bikeNomad 3:c04d8d0493f4 134 return false;
bikeNomad 1:2fa375aacece 135 }
bikeNomad 3:c04d8d0493f4 136 float zcRatio = (float)zcs / chunkSize;
bikeNomad 3:c04d8d0493f4 137 pc.printf("%.2f\t%.2f\t%d\t%d\t", zcRatio*100, logPower-powerRef, min, max);
bikeNomad 3:c04d8d0493f4 138 return true;
bikeNomad 3:c04d8d0493f4 139 }
bikeNomad 3:c04d8d0493f4 140
bikeNomad 3:c04d8d0493f4 141 void analyze(bool playToo = false)
bikeNomad 3:c04d8d0493f4 142 {
bikeNomad 3:c04d8d0493f4 143 uint16_t chunkSize = SAMPLE_RATE_HZ * CHUNK_DURATION;
bikeNomad 3:c04d8d0493f4 144 uint16_t nChunks = SAMPLE_BUFFER_SIZE / chunkSize;
bikeNomad 3:c04d8d0493f4 145 int8_t *chunkStart = sampleBuffer;
bikeNomad 3:c04d8d0493f4 146 AudioAnalyzer analyzer(sampleBuffer, SAMPLE_BUFFER_SIZE);
bikeNomad 3:c04d8d0493f4 147 uint32_t power = analyzer.getPower();
bikeNomad 3:c04d8d0493f4 148 float powerRef = ::log((double)power);
bikeNomad 3:c04d8d0493f4 149 pc.printf("Reference power = %.2f\r\n", powerRef);
bikeNomad 3:c04d8d0493f4 150 pc.printf("Analyzing %d chunks of %d samples (%.2f seconds):\r\n", nChunks, chunkSize, CHUNK_DURATION);
bikeNomad 3:c04d8d0493f4 151 pc.printf("chunk\tstart\tzcratio\tlogp\tmin\tmax\tvowel\r\n");
bikeNomad 3:c04d8d0493f4 152 for (uint16_t chunk = 0; chunk < nChunks; chunk++) {
bikeNomad 3:c04d8d0493f4 153 pc.printf("%u\t%.2f\t", chunk, chunk * CHUNK_DURATION);
bikeNomad 3:c04d8d0493f4 154 bool loudEnough = analyzeChunk(chunkStart, chunkSize, powerRef);
bikeNomad 3:c04d8d0493f4 155 if (loudEnough) {
bikeNomad 3:c04d8d0493f4 156 if (playToo) {
bikeNomad 3:c04d8d0493f4 157 while (! pc.readable())
bikeNomad 3:c04d8d0493f4 158 playAudio(CHUNK_DURATION, chunkStart, chunkSize);
bikeNomad 3:c04d8d0493f4 159 int c = pc.getc();
bikeNomad 3:c04d8d0493f4 160 pc.putc(c);
bikeNomad 3:c04d8d0493f4 161 } else
bikeNomad 3:c04d8d0493f4 162 pc.puts("-");
bikeNomad 3:c04d8d0493f4 163 }
bikeNomad 3:c04d8d0493f4 164 pc.puts("\r\n");
bikeNomad 3:c04d8d0493f4 165 chunkStart += chunkSize;
bikeNomad 3:c04d8d0493f4 166 }
bikeNomad 3:c04d8d0493f4 167 }
bikeNomad 3:c04d8d0493f4 168
bikeNomad 3:c04d8d0493f4 169 void dumpAudio(int8_t *start=sampleBuffer, uint16_t nsamples=SAMPLE_BUFFER_SIZE)
bikeNomad 3:c04d8d0493f4 170 {
bikeNomad 3:c04d8d0493f4 171 for (int8_t *p = start; p < sampleBuffer + nsamples; p++) {
bikeNomad 3:c04d8d0493f4 172 pc.printf("%d\r\n", *p);
bikeNomad 3:c04d8d0493f4 173 }
bikeNomad 2:5bcd2f55a294 174 }
bikeNomad 1:2fa375aacece 175
bikeNomad 0:1ddd40d843cb 176 int main()
bikeNomad 0:1ddd40d843cb 177 {
bikeNomad 1:2fa375aacece 178 pc.baud(115200);
bikeNomad 2:5bcd2f55a294 179 pc.printf("\r\n\r\nSample buffer = %u samples; rate = %u Hz; period = %u usec\r\n", SAMPLE_BUFFER_SIZE, SAMPLE_RATE_HZ, SAMPLE_PERIOD_US);
bikeNomad 3:c04d8d0493f4 180
bikeNomad 3:c04d8d0493f4 181 for (;;) {
bikeNomad 3:c04d8d0493f4 182 redLED = 1.0;
bikeNomad 3:c04d8d0493f4 183 greenLED = 1.0;
bikeNomad 3:c04d8d0493f4 184 blueLED = 1.0;
bikeNomad 3:c04d8d0493f4 185
bikeNomad 3:c04d8d0493f4 186 pc.puts("ENTER when ready:");
bikeNomad 3:c04d8d0493f4 187 pc.getc();
bikeNomad 3:c04d8d0493f4 188 pc.puts("\r\n");
bikeNomad 1:2fa375aacece 189
bikeNomad 3:c04d8d0493f4 190 #if 0
bikeNomad 3:c04d8d0493f4 191 audioTest();
bikeNomad 3:c04d8d0493f4 192 playAudio(1.0);
bikeNomad 3:c04d8d0493f4 193 analyze();
bikeNomad 3:c04d8d0493f4 194 #endif
bikeNomad 1:2fa375aacece 195
bikeNomad 3:c04d8d0493f4 196 recordAudio();
bikeNomad 3:c04d8d0493f4 197 float duration = timer.read();
bikeNomad 3:c04d8d0493f4 198 playAudio(duration);
bikeNomad 3:c04d8d0493f4 199 float elapsed = timer.read();
bikeNomad 3:c04d8d0493f4 200 pc.printf("Done. %u samples in %f usec = %f samples/sec", SAMPLE_BUFFER_SIZE, elapsed * 1.0e6, SAMPLE_BUFFER_SIZE / elapsed);
bikeNomad 3:c04d8d0493f4 201 pc.printf(" (Rate %#+0.2f%%)\r\n", (duration-elapsed)*100/duration);
bikeNomad 3:c04d8d0493f4 202 analyze(true);
bikeNomad 3:c04d8d0493f4 203 // dumpAudio();
bikeNomad 3:c04d8d0493f4 204 }
bikeNomad 0:1ddd40d843cb 205 }