Speech Controlled Clock

Overview

The Mr. Clock team developed a speech controlled internet-of-things clock. This project combines EasyVR, recorder, and speech recognition. EasyVR is set up to trigger on a custom command "Mr. Clock" so that we can use that to start voice recording. We also modified a simple recorder to record commands and save it as a .wav file into SD card. The .wav file would then be sent through Ethernet to a windows computer where Microsoft Speech Recognition is used to decode commands, and send internet time as a response to client if commands are accepted. Thus, we can see the time show on a LCD screen or alarms be set.

Accepted Commands

Our clock can accept the following commands. Note that words surrounded by {} are optional, and words preceded by # are referencing an argument. For example #hour wants you to say a number between 1 and 12 for a valid hour. You can check the grammar.xml in the SpeechServerSource for the exact grammar. Of course, you will need to understand SRGS.

  • "sync {the} {time} now"
    • Uses NTP to set the current time from the internet. Does not change the timezone.
  • "set {the} time {to} #hour #minute #am/pm"
    • Manually sets the time to the given time
  • "set {the} timezone {to} #3letterTimezone"
    • Changes the time according to the given timezone
  • "set {an} alarm {for} #hour #minute #am/pm"
    • Sets an alarm for the given time. The alarm will always be within 24 hours of the current time.
  • "set {an} alarm {for} #hours and #minutes {from now}
    • Sets a timer for the given duration. After the duration, an alarm will sound.
  • "delete alarm"
    • Deletes the current stored timer or alarm.

Schematic

/media/uploads/yzhang612/draft1.png

Mbed Client Code

main.cpp

#include "mbed.h"
#include "rtos.h"
#include "EthernetInterface.h"
#include "uLCD_4DGL.h"
#include "SDFileSystem.h"
#include "Clock.h"
#include "Recorder.h"
#include "Speaker.h"
#include <string>
 
//#define SERVER_IP "143.215.120.157"
#define SERVER_IP "143.215.119.135"
#define SERVER_PORT 13000
#define REC_NAME "/sd/rec.wav"
#define PLAY_NAME "/sd/play.wav"
 
#define BUF_SIZE 8192
 
// Big Components
uLCD_4DGL uLCD(p28,p27,p30);
Serial pc(USBTX, USBRX);
Serial easyVR(p13, p14);
SDFileSystem sdc(p5, p6, p7, p8, "sd");
EthernetInterface eth;
TCPSocketConnection server;
Clock clk;
 
// Small Components
Speaker speaker(p21);
DigitalOut sleepLED(LED4);
Mutex cMutex;
Mutex sMutex;
char buf[BUF_SIZE];
// float array for recorder (they share the buffer)
float *buffer = (float*)buf; 
 
// Help with noise
AnalogIn pin15(p15);
AnalogIn pin16(p16);
AnalogIn pin17(p17);
AnalogIn pin19(p19);
 
void aThread(void const *args) {
    while (1) {
        Thread::signal_wait(0x1, osWaitForever);
        for (int i = 0; i < 5; i++) {
            speaker.playNote(969.0, 0.5, 0.1);
            Thread::wait(1000);
        }
    }
}
 
void execute(char *command) {
    char buffer[12];
    int hour, minute, period, zone;
    sscanf(command, "%s %d %d %d %d", buffer, &hour, &minute, &period, &zone);
    string operation(buffer);
    
    if (operation == "setTime") {
        cMutex.lock();
        clk.setTime(hour, minute, period);
        cMutex.unlock();
    } else if (operation == "setTimezone") {
        cMutex.lock();
        clk.setTimezone(zone);
        cMutex.unlock();
    } else if (operation == "setAlarm") {
        cMutex.lock();
        clk.setAlarm(hour, minute, period);
        cMutex.unlock(); 
    } else if (operation == "setTimer") {
        cMutex.lock();
        clk.setTimer(hour, minute);
        cMutex.unlock(); 
    } else if (operation == "deleteAlarm") {
        cMutex.lock();
        clk.deleteAlarm();
        cMutex.unlock(); 
    } else if (operation == "syncNow") {
        cMutex.lock();            
        if (clk.syncTime() != 0) {
            printf("  ERROR: failed to sync time\n\n\r");
        }
        cMutex.unlock();
    } else if (operation == "noCommand") {
        printf("  ERROR: speech not recognized\n\n\r");
    } else {
        printf("  ERROR: not a valid command\n\n\r");
    }
}
 
 
/**
 * Thread which updates the clock on the lcd while the main thread is waiting for 
 * or executing a command.
 */
void lcdUpdateThread(void const *args) {
    time_t time;
    struct tm *timeinfo;
    char buffer[20];
    
    // set initial time
    cMutex.lock();
    clk.syncTime();
    clk.setTimezone(-5);
    cMutex.unlock();
    
    // set lcd format
    sMutex.lock();
    uLCD.text_width(2);
    uLCD.text_height(2);
    uLCD.color(BLUE);
    sMutex.unlock();
    
    while (1) {
        cMutex.lock();
        time = clk.getTime();
        bool alarmSet = clk.alarmSet();
        cMutex.unlock();
        
        timeinfo = localtime(&time);
        strftime(buffer, 20, "%I:%M:%S        %p", timeinfo);
        
        sMutex.lock();
        uLCD.locate(0,3);
        uLCD.printf("%s", buffer);
        if (alarmSet)
            uLCD.printf("ALARM SET");
        else
            uLCD.printf("         ");
        sMutex.unlock();
        Thread::wait(200);
    }
}
 
void init() {
    printf("\r\n\n--Starting MbedClock--\n\r");
    
    eth.init();
    printf(" * Initialized Ethernet\n\r");
    
    eth.connect();
    printf(" * Connected using DHCP\n\r");
    wait(5);
    printf(" * Using IP: %s\n\r", eth.getIPAddress());
    
    easyVR.putc('b');
    easyVR.getc();
    printf(" * Initialized EasyVR\n\r");
 
    
}
 
void waitForTrigger() {
    // set EasyVR to 2 claps
    sMutex.lock();
    easyVR.putc('s');
    easyVR.putc('A' + 1);
    sMutex.unlock();
    wait(0.2);
    
    // Clear buffer and wait for awake
    printf("Waiting for trigger...");
    sMutex.lock();
    while (easyVR.readable())
        easyVR.getc();
    sMutex.unlock();
    sleepLED = 1;
    while(!easyVR.readable()) {
        wait(0.2);
    }
    sleepLED = 0;
    printf("trigger received!\n\r");
}
 
void waitForTrigger2() {
    char rchar = 0;
    sleepLED = 1;
    printf("Waiting for trigger...");
    while (rchar!='A') {
        wait(.001);
        sMutex.lock();
        easyVR.putc('d');
// a small delay is needed when sending EasyVR several characters
        wait(.001);
        easyVR.putc('A');
        sMutex.unlock();
        while (!easyVR.readable()) {
            wait(0.2);
        }
        sMutex.lock();
        rchar=easyVR.getc();
        sMutex.unlock();
        // word recognized
        if (rchar=='r') {
            wait(.001);
            sMutex.lock();
            easyVR.putc(' ');
            rchar=easyVR.getc();
            sMutex.unlock();
        // error
        } else if (rchar=='e') {
            wait(.001);
            sMutex.lock();
            easyVR.putc(' ');
            rchar=easyVR.getc();
            easyVR.putc(' ');
            rchar=easyVR.getc();
            sMutex.unlock();
        }
    }
    sleepLED = 0;
    printf("trigger received!\n\r");
}
 
void sendFile() {
    printf("  Sending \"%s\"...", REC_NAME);
    FILE *fp = fopen(REC_NAME, "rb");
 
    int sum = 0;
    while (sum < 110296)
    {
        int i = fread(buf, 1, BUF_SIZE, fp);
        server.send(buf, i);
        sum += i;
    }
    printf("sent\n\r");
    fclose(fp);
    
    int n = server.receive(buf, BUF_SIZE);
    buf[n] = '\0';
    printf("  Received \"%s\"\n\n\r", buf);
}
 
int main() {
    init();
    //RtosTimer alarmTimer(aThread, osTimerOnce, NULL);
 
    
    Thread alarmThread(aThread);
    Thread updateThread(lcdUpdateThread);
    printf(" * Started LCD and Alarm Threads\n\n\r");
    
    cMutex.lock();
    clk.setAlarmThread(&alarmThread);
    cMutex.unlock();
    while(1) {
        waitForTrigger();
 
        printf("  Recording audio file...");
        rec(REC_NAME, 5);
        printf("complete\n\r");
        //wait(0.1);
        //play(FILE_NAME);
        
        if (!server.connect(SERVER_IP, SERVER_PORT)) {
            printf("  Connected to %s\n\r", SERVER_IP);
            sendFile();
            execute(buf);
            server.close();
        } else {
            printf("  Unable to connect to %s\n\n\r", SERVER_IP);
        }
    }
}

More Info

Complete Mbed clock application code including Ethernet file transfer and speech recognition:

Import programMbedClock

Mbed Clock application using an NTP connection to get internet time and a terminal interface to send commands


Functionality

Clock.cpp

The center of the MbedClock is, of course, the Clock class. This class stores the current time in UTC format on the Mbed itself. This time is then converted using the given timezone variable whenever an operation is called. This way, it is possible to have multiple clocks with different timezones, although that functionality is not currently provided. The alarms work by setting a Ticker for the duration until the alarm would expire. For time alarms, this entails calculating the difference between the current time and the desired alarm as this duration. For timer alarms, this value is simply the duration of the timer in seconds. When the ticker expires, it calls a simple function which signals the alarmThread (using 0x1) to activate a non-blocking alarm.

MbedClock Program Flow

The program goes through the following steps:

  1. System boots and initialized all of the components. This takes a long time (around 10 seconds) because it takes a long time for the EthernetInterface to set up and acquire an IP address. There is an additional wait(5) in the code because my mbed at least seems to need more time after the return to eth.init() to acquire an IP address.
  2. Start the AlarmThread and the UpdateThread
    1. The AlarmThread is used just to sound alarms. It is usually in the waiting state for a signal (0x1 to start an alarm)so it does not take up CPU time unless an alarm is going off.
    2. The UpdateThread simply gets the current time and updates the LCD. It does this about 5 times a second for smooth seconds updating. The first thing it does is sets the time using NTP.
  3. The mbed then waits for a trigger from the EasyVR. When it receives a trigger, the function waitForTrigger() returns.
  4. A 5 second long .wav file using the rec() funciton provided by Shinichiro Nakamura is recorded and stored on the SD card.
  5. A connection to the server is attempted. If the connection fails, the mbed just waits for another trigger
  6. If the conneciton succeeds, the recorded .wav file is sent ot the server over a TCP socket. We send (and the server receives) exactly 110296 bytes because our file size is always the same, but a change could be made to read the file size from the .wav file header.
  7. The mbed receives and decodes a string command from the server. The command is executed, the TCP socket is closed, and the mbed waits for another trigger.

SpeechServer Program Flow

The SpeechServer runs on any Windows computer using the .NET framework. It is a C# program using Microsoft's Speech APIs. The program flow for the server is as follows:

  1. The server creates a SpeechRecognitionEngine object from a SRGS grammar .xml file and sets up the necessary callback functions
  2. Creates a TCP Listener socket (port 13000) and then listens for clients.
  3. When an mbed client connects, the server reads 110296 bytes and saves it as a .wav file. The file is then used as the input to the SpeechRecognitionEngine object and the recognition begins. A timeout it also set in case the speech recognition cannot recognize anything in which case no callback function will be called.
    1. If the speech recognition succeeds, the appropriate callback function is called and the timeout timer is stopped. The callback builds a response string from the given SemanticValue objects returned which is highly coupled to the grammar.xml file used earlier.
    2. If the speech recognition fails, then the timer callback function is called and a string representing a failed speech recognition is created.
  4. The response string is sent to the mbed and the connection is terminated. The server waits for another client.

Video


Please log in to post comments.