Program to record speech audio into RAM and then play it back, moving Billy Bass's mouth in sync with the speech.

Dependencies:   mbed

Remember Big Mouth Billy Bass?

I've made a simple demo program for him using the Freescale FRDM-KL25Z board. I've hooked up the digital I/O to his motor driver transistors and pushbutton switch.

This program records 1.8 seconds of speech audio from ADC input when the pushbutton is pressed, then plays the audio back with Billy Bass's mouth controlled so that it opens during vowel sounds.

The ADC input is driven from a microphone and preamplifier, via a capacitor and into a resistor divider connected to the +3.3V supply pin to provide mid-range biasing for the ADC signals.

The DAC output is connected to his audio amplifier input (to the trace that was connected to pin 10 of the controller IC). I had to provide a DC bias using the DAC to get the single transistor amplifier biased into proper operation.

For more on the method of vowel recognition, please see the paper: http://www.mirlab.org/conference_papers/International_Conference/ICASSP%201999/PDF/AUTHOR/IC991957.PDF

Y. Nishida, Y. Nakadai, Y. Suzuki, T. Sakurai, T. Kurokawa, and H. Sato. 1999.

Voice recognition focusing on vowel strings on a fixed-point 20-MIPS DSP board.

In Proceedings of the Acoustics, Speech, and Signal Processing, 1999. on 1999 IEEE International Conference - Volume 01 (ICASSP '99), Vol. 1. IEEE Computer Society, Washington, DC, USA, 137-140. DOI=10.1109/ICASSP.1999.758081 http://dx.doi.org/10.1109/ICASSP.1999.758081

main.cpp

Committer:
bikeNomad
Date:
2013-05-15
Revision:
6:0217f58bff36
Parent:
5:9f4ffb2b0e6b
Child:
7:f0e5450449cb

File content as of revision 6:0217f58bff36:

#include "mbed.h"
#include "FastAnalogIn.h"
#include "AudioAnalyzer.h"
extern "C" {
#include <math.h>
}

using namespace NK;

// Power:
// Power GND  J9/14
// Vin (6V)   J9/16

// Digital:
DigitalOut tail(PTA13);     // J3/2
DigitalOut mouth(PTC12);    // J3/1
DigitalOut head(PTC13);     // J3/3
DigitalIn pushbutton(PTD5); // J3/4

PwmOut redLED(LED_RED);
PwmOut greenLED(LED_GREEN);
PwmOut blueLED(LED_BLUE);

// Analog:
// GND   J3/14
// VrefH J3/16
FastAnalogIn microphone(PTB0);  // J10/2
AnalogOut speaker(PTE30);   // J10/11

// Communications:
// Serial uart1(PTC4, PTC3);
Serial pc(USBTX, USBRX);

const unsigned SAMPLE_RATE_HZ  = 7889;
const unsigned SAMPLE_PERIOD_US  = (1000000U / SAMPLE_RATE_HZ);
const unsigned SAMPLE_BUFFER_SIZE = 14000;
const unsigned CHUNK_DURATION_MS = 80;
const unsigned CHUNK_SIZE = SAMPLE_RATE_HZ  * CHUNK_DURATION_MS / 1000;
const unsigned NUM_CHUNKS = SAMPLE_BUFFER_SIZE / CHUNK_SIZE;

Ticker sampleTicker;
Timer timer;

// audio samples
int8_t sampleBuffer[SAMPLE_BUFFER_SIZE];      // 1 second buffer
int8_t * volatile nextSample;
uint16_t volatile samplesRemaining;

// vowel decisions
bool vowels[ NUM_CHUNKS ];

extern "C"
void ADC0_IRQHandler(void)
{
    if (samplesRemaining) {
        *nextSample++ = microphone.read_s8_nowait();
        microphone.start_read();
        samplesRemaining--;
    } else {
        microphone.disable_interrupt();
        microphone.abort_read();
        timer.stop();
    }
}

void playAudioSample()
{
    static uint16_t dcBias = 0x4000;
    if (samplesRemaining) {
        int8_t val = *nextSample++;
        uint16_t val16 = dcBias + (val * 256);
        speaker.write_u16(val16);
        samplesRemaining--;
    } else {
        sampleTicker.detach();
        timer.stop();
    }
}

void resetSampleBuffer(int8_t *start=sampleBuffer, uint16_t nsamples=SAMPLE_BUFFER_SIZE)
{
    nextSample = start;
    samplesRemaining = nsamples;
}

void recordAudio()
{
    pc.printf("Recording %d samples... ", SAMPLE_BUFFER_SIZE);
    blueLED = 0.0;

    resetSampleBuffer();
    timer.reset();
    timer.start();
    microphone.enable_interrupt();
    microphone.start_read();

    while (samplesRemaining) {
        wait_us(50000);
        blueLED.write(1.0 - (1.0 * samplesRemaining / SAMPLE_BUFFER_SIZE));
    }

    microphone.abort_read();

    float elapsed = timer.read();
    pc.printf("Done. %u samples in %f usec = %f samples/sec\r\n", SAMPLE_BUFFER_SIZE, elapsed * 1.0e6, SAMPLE_BUFFER_SIZE / elapsed);
}

void playAudio(unsigned duration_ms, int8_t *start=sampleBuffer, uint16_t nsamples=SAMPLE_BUFFER_SIZE)
{
    resetSampleBuffer(start, nsamples);
    timer.reset();
    timer.start();
    sampleTicker.attach_us(&playAudioSample, duration_ms*1000/nsamples);
    while (samplesRemaining) {
        wait_us(CHUNK_DURATION_MS * 1000);
    }
}

void audioTest()
{
    double phase = 0.0;
    resetSampleBuffer();
    for (int8_t *p = sampleBuffer; p < sampleBuffer + SAMPLE_BUFFER_SIZE; p++) {
        double s = sin(phase) * 125.0;
        phase += 2000 * 3.1416 / SAMPLE_BUFFER_SIZE;
        *p = static_cast<int8_t>(s);
    }
}

// returns true if chunk was louder than minimum
bool analyzeChunk(int8_t  *chunkStart, uint16_t CHUNK_SIZE, float powerRef, bool *pisvowel = 0)
{
    AudioAnalyzer analyzer(chunkStart, CHUNK_SIZE);
    uint32_t power = analyzer.getPower();
    uint16_t zcs = analyzer.getZeroCrossings();
    int8_t min, max;
    analyzer.getMinMaxValues(&min, &max);
    analyzer.setPowerRef(powerRef);
    float logPower = analyzer.getLogPower();
    float zcRatio = analyzer.getZeroCrossingRatioPercent();
    pc.printf("%.2f\t%.2f\t%.2f\t%d\t%d\t%d\t", zcRatio, logPower, zcRatio / (logPower - AudioAnalyzer::VowelXIntercept), min, max, analyzer.isVowel());
    if (pisvowel)
        *pisvowel = analyzer.isVowel();
    return analyzer.isVoiced();
}

void analyze(bool playToo = false)
{
    int8_t  *chunkStart = sampleBuffer;
    AudioAnalyzer analyzer(sampleBuffer, SAMPLE_BUFFER_SIZE);
    uint32_t power = analyzer.getPower();
    float powerRef = ::log((double)power);
    pc.printf("Reference power = %.2f\r\n", powerRef);
    pc.printf("Analyzing %d chunks of %d samples (%.2f seconds):\r\n", NUM_CHUNKS, CHUNK_SIZE, CHUNK_DURATION_MS);
    pc.printf("chunk\tstartms\tzcratio\tlogp\tmaxs\tmin\tmax\tisVowel\tvowel\r\n");
    for (uint16_t chunk = 0; chunk < NUM_CHUNKS; chunk++) {
        pc.printf("%u\t%u\t", chunk, chunk * CHUNK_DURATION_MS);
        bool loudEnough = analyzeChunk(chunkStart, CHUNK_SIZE, powerRef, &vowels[chunk]);
        if (loudEnough) {
            if (playToo) {
                while (! pc.readable())
                    playAudio(CHUNK_DURATION_MS, chunkStart, CHUNK_SIZE);
                int c = pc.getc();
                pc.putc(c);
            } else
                pc.puts("-");
        }
        pc.puts("\r\n");
        chunkStart += CHUNK_SIZE;
    }
}

// assumes that vowels[] has been set by analyze
void playWithBilly()
{
    int8_t  *chunkStart = sampleBuffer;
    head = true;
    wait(0.2);
    for (uint16_t chunk = 0; chunk < NUM_CHUNKS; chunk++) {
        if (vowels[chunk]) {
            greenLED = 0.0;
            mouth = true;
        } else {
            greenLED = 1.0;
            mouth = false;
        }
        playAudio(CHUNK_DURATION_MS, chunkStart, CHUNK_SIZE);
        chunkStart += CHUNK_SIZE;
    }
    tail = true;
    wait(0.2);
    tail = false;
    wait(0.1);
    head = false;
}

void dumpAudio(int8_t *start=sampleBuffer, uint16_t nsamples=SAMPLE_BUFFER_SIZE)
{
    for (int8_t *p = start; p < sampleBuffer + nsamples; p++) {
        pc.printf("%d\r\n", *p);
    }
}

int main()
{
    pc.baud(115200);
    pc.printf("\r\n\r\nSample buffer = %u samples; rate = %u Hz; period = %u usec\r\n", SAMPLE_BUFFER_SIZE, SAMPLE_RATE_HZ, SAMPLE_PERIOD_US);
    pushbutton.mode(PullUp);
   
    for (;;) {
        redLED = 1.0;
        greenLED = 1.0;
        blueLED = 1.0;

#if 0
        pc.puts("ENTER when ready:");
        pc.getc();
        pc.puts("\r\n");
#endif
#if 0
        audioTest();
        playAudio(1000);
        analyze();
#endif

        while (pushbutton.read())
            wait(0.1);

        recordAudio();
        float duration = timer.read();
        // playAudio(duration * 1000);
        float elapsed = timer.read();
        pc.printf("Done. %u samples in %f usec = %f samples/sec", SAMPLE_BUFFER_SIZE, elapsed * 1.0e6, SAMPLE_BUFFER_SIZE / elapsed);
        pc.printf(" (Rate %#+0.2f%%)\r\n", (duration-elapsed)*100/duration);
        analyze(false);
        // dumpAudio();

        playWithBilly();
    }
}