Program to record speech audio into RAM and then play it back, moving Billy Bass's mouth in sync with the speech.

Dependencies:   mbed

Remember Big Mouth Billy Bass?

I've made a simple demo program for him using the Freescale FRDM-KL25Z board. I've hooked up the digital I/O to his motor driver transistors and pushbutton switch.

This program records 1.8 seconds of speech audio from ADC input when the pushbutton is pressed, then plays the audio back with Billy Bass's mouth controlled so that it opens during vowel sounds.

The ADC input is driven from a microphone and preamplifier, via a capacitor and into a resistor divider connected to the +3.3V supply pin to provide mid-range biasing for the ADC signals.

The DAC output is connected to his audio amplifier input (to the trace that was connected to pin 10 of the controller IC). I had to provide a DC bias using the DAC to get the single transistor amplifier biased into proper operation.

For more on the method of vowel recognition, please see the paper: http://www.mirlab.org/conference_papers/International_Conference/ICASSP%201999/PDF/AUTHOR/IC991957.PDF

Y. Nishida, Y. Nakadai, Y. Suzuki, T. Sakurai, T. Kurokawa, and H. Sato. 1999.

Voice recognition focusing on vowel strings on a fixed-point 20-MIPS DSP board.

In Proceedings of the Acoustics, Speech, and Signal Processing, 1999. on 1999 IEEE International Conference - Volume 01 (ICASSP '99), Vol. 1. IEEE Computer Society, Washington, DC, USA, 137-140. DOI=10.1109/ICASSP.1999.758081 http://dx.doi.org/10.1109/ICASSP.1999.758081

Revision:
4:c989412b91ea
Parent:
3:c04d8d0493f4
Child:
5:9f4ffb2b0e6b
--- a/main.cpp	Wed May 15 15:32:34 2013 +0000
+++ b/main.cpp	Wed May 15 17:53:33 2013 +0000
@@ -32,17 +32,23 @@
 Serial pc(USBTX, USBRX);
 
 const unsigned SAMPLE_RATE_HZ  = 7889;
-const unsigned SAMPLE_PERIOD_US     = (1000000U / SAMPLE_RATE_HZ);
+const unsigned SAMPLE_PERIOD_US  = (1000000U / SAMPLE_RATE_HZ);
 const unsigned SAMPLE_BUFFER_SIZE = 9000;
-const float CHUNK_DURATION = 0.08;
+const unsigned CHUNK_DURATION_MS = 80;
+const unsigned CHUNK_SIZE = SAMPLE_RATE_HZ  * CHUNK_DURATION_MS / 1000;
+const unsigned NUM_CHUNKS = SAMPLE_BUFFER_SIZE / CHUNK_SIZE;
 
 Ticker sampleTicker;
 Timer timer;
 
+// audio samples
 int8_t sampleBuffer[SAMPLE_BUFFER_SIZE];      // 1 second buffer
 int8_t * volatile nextSample;
 uint16_t volatile samplesRemaining;
 
+// vowel decisions
+bool vowels[ NUM_CHUNKS ];
+
 extern "C"
 void ADC0_IRQHandler(void)
 {
@@ -97,16 +103,14 @@
     pc.printf("Done. %u samples in %f usec = %f samples/sec\r\n", SAMPLE_BUFFER_SIZE, elapsed * 1.0e6, SAMPLE_BUFFER_SIZE / elapsed);
 }
 
-void playAudio(float duration, int8_t *start=sampleBuffer, uint16_t nsamples=SAMPLE_BUFFER_SIZE)
+void playAudio(unsigned duration_ms, int8_t *start=sampleBuffer, uint16_t nsamples=SAMPLE_BUFFER_SIZE)
 {
-    greenLED = 0.0;
     resetSampleBuffer(start, nsamples);
     timer.reset();
     timer.start();
-    sampleTicker.attach(&playAudioSample, duration/nsamples);
+    sampleTicker.attach_us(&playAudioSample, duration_ms*1000/nsamples);
     while (samplesRemaining) {
-        wait_us(50000);
-        greenLED.write(1.0 - (1.0 *  samplesRemaining / nsamples));
+        wait_us(CHUNK_DURATION_MS * 1000);
     }
 }
 
@@ -122,47 +126,57 @@
 }
 
 // returns true if chunk was louder than minimum
-bool analyzeChunk(int8_t  *chunkStart, uint16_t chunkSize, float powerRef)
+bool analyzeChunk(int8_t  *chunkStart, uint16_t CHUNK_SIZE, float powerRef, bool *pisvowel = 0)
 {
-    AudioAnalyzer analyzer(chunkStart, chunkSize);
+    AudioAnalyzer analyzer(chunkStart, CHUNK_SIZE);
     uint32_t power = analyzer.getPower();
     uint16_t zcs = analyzer.getZeroCrossings();
     int8_t min, max;
     analyzer.getMinMaxValues(&min, &max);
-    float logPower = ::log((double)power);
-    if (isnan(logPower) || logPower < 1.0) {
-        return false;
-    }
-    float zcRatio = (float)zcs / chunkSize;
-    pc.printf("%.2f\t%.2f\t%d\t%d\t", zcRatio*100, logPower-powerRef, min, max);
-    return true;
+    analyzer.setPowerRef(powerRef);
+    float logPower = analyzer.getLogPower();
+    float zcRatio = analyzer.getZeroCrossingRatioPercent();
+    pc.printf("%.2f\t%.2f\t%.2f\t%d\t%d\t%d\t", zcRatio, logPower, zcRatio / (logPower - AudioAnalyzer::VowelXIntercept), min, max, analyzer.isVowel());
+    if (pisvowel)
+        *pisvowel = analyzer.isVowel();
+    return analyzer.isVoiced();
 }
 
 void analyze(bool playToo = false)
 {
-    uint16_t chunkSize = SAMPLE_RATE_HZ  * CHUNK_DURATION;
-    uint16_t nChunks = SAMPLE_BUFFER_SIZE / chunkSize;
     int8_t  *chunkStart = sampleBuffer;
     AudioAnalyzer analyzer(sampleBuffer, SAMPLE_BUFFER_SIZE);
     uint32_t power = analyzer.getPower();
     float powerRef = ::log((double)power);
     pc.printf("Reference power = %.2f\r\n", powerRef);
-    pc.printf("Analyzing %d chunks of %d samples (%.2f seconds):\r\n", nChunks, chunkSize, CHUNK_DURATION);
-    pc.printf("chunk\tstart\tzcratio\tlogp\tmin\tmax\tvowel\r\n");
-    for (uint16_t chunk = 0; chunk < nChunks; chunk++) {
-        pc.printf("%u\t%.2f\t", chunk, chunk * CHUNK_DURATION);
-        bool loudEnough = analyzeChunk(chunkStart, chunkSize, powerRef);
+    pc.printf("Analyzing %d chunks of %d samples (%.2f seconds):\r\n", NUM_CHUNKS, CHUNK_SIZE, CHUNK_DURATION_MS);
+    pc.printf("chunk\tstartms\tzcratio\tlogp\tmaxs\tmin\tmax\tisVowel\tvowel\r\n");
+    for (uint16_t chunk = 0; chunk < NUM_CHUNKS; chunk++) {
+        pc.printf("%u\t%u\t", chunk, chunk * CHUNK_DURATION_MS);
+        bool loudEnough = analyzeChunk(chunkStart, CHUNK_SIZE, powerRef, &vowels[chunk]);
         if (loudEnough) {
             if (playToo) {
                 while (! pc.readable())
-                    playAudio(CHUNK_DURATION, chunkStart, chunkSize);
+                    playAudio(CHUNK_DURATION_MS, chunkStart, CHUNK_SIZE);
                 int c = pc.getc();
                 pc.putc(c);
             } else
                 pc.puts("-");
         }
         pc.puts("\r\n");
-        chunkStart += chunkSize;
+        chunkStart += CHUNK_SIZE;
+    }
+}
+
+// assumes that vowels[] has been set by analyze
+void playWithBilly()
+{
+    int8_t  *chunkStart = sampleBuffer;
+    for (uint16_t chunk = 0; chunk < NUM_CHUNKS; chunk++) {
+        greenLED = vowels[chunk] ? 0.0 : 1.0;
+        playAudio(CHUNK_DURATION_MS, chunkStart, CHUNK_SIZE);
+        chunkStart += CHUNK_SIZE;
+
     }
 }
 
@@ -189,17 +203,19 @@
 
 #if 0
         audioTest();
-        playAudio(1.0);
+        playAudio(1000);
         analyze();
 #endif
 
         recordAudio();
         float duration = timer.read();
-        playAudio(duration);
+        // playAudio(duration * 1000);
         float elapsed = timer.read();
         pc.printf("Done. %u samples in %f usec = %f samples/sec", SAMPLE_BUFFER_SIZE, elapsed * 1.0e6, SAMPLE_BUFFER_SIZE / elapsed);
         pc.printf(" (Rate %#+0.2f%%)\r\n", (duration-elapsed)*100/duration);
-        analyze(true);
+        analyze(false);
         // dumpAudio();
+
+        playWithBilly();
     }
 }