Speech Recognition Control Display Game

Introduction

In this project, we use a speech recognition software PocketSphinix to get the audio signal and convert it to text file. Then we build a server which could update the audio signal. With Arduino and Sparkfun CC3000 wifi Breakout board, we could connect the micro-controller with the internet and fetch information(html files) from the website, and control other activities. Here we use NXP LPC1768 as another micro-controller to control the uLCD display and start a game when the user says the word "START".

Parts

  • Speech Recognition: PocketSphinix /media/uploads/ZhiFeng/thumb_img_0091_1024.jpg
  • Server: Amazon AWS /media/uploads/ZhiFeng/thumb_img_0093_1024.jpg
  • Internet Connection: Sparkfun CC3000 wifi breakboard
  • Arduino and NXP LPC 1768
  • LCD Display: Sparkfun uLCD

Hardware Hookup

  • Connect the mbed with uLCD /media/uploads/ZhiFeng/capture.jpg
  • Connect mbed with Arduino Uno Here we use UART for communication between mbed and Arduino, so the hardware hookup is:

mbed TX -> Arduino RX

Arduino TX -> mbed RX

  • Connect Arduino with Sparkfun CC3000 Wifi Breakout (CC3000 Breakout Board → Arduino)

GND → GND

VCC → 5V

MOSI → 11

MISO → 12

CS → 10

INT → 2

SCK → 13

EN → 7

/media/uploads/ZhiFeng/capture_VAST0us.jpg

Code

  • For the Internet connection, we use Arduino and Sparkfun CC3000 Breakout Board. We refer to CC3000 Hookup Guide on the website of sparkfun. https://learn.sparkfun.com/tutorials/cc3000-hookup-guide We use its library and modify parts of it. Here is the code for Webclient which can fetch the html file on an assigned website:

<<Arduino+Sparkfun CC3000Wifi>>

  • For the UART transmission between mbed and Arduino, and uLCD display, we use mbed code.

<<mbed UART+ uLCD DIsplay>>

include the mbed library with this snippet

// uLCD-144-G2 demo program for uLCD-4GL LCD driver library
//
#include "mbed.h"
#include "uLCD_4DGL.h"

uLCD_4DGL uLCD(p28,p27,p11); // serial tx, serial rx, reset pin;

Serial device(p9, p10);  // tx, rx
Serial pc(USBTX, USBRX); 
DigitalOut led2(LED2);
DigitalOut led1(LED1);

int main()
{
    int c;
    int i = 0;
    char words[200]; 
    int flag = 0;
    device.baud(9600);
    // basic printf demo = 16 by 18 characters on screen
    uLCD.printf("\nSpeech Recognition!!\n"); //Default Green on black text
    uLCD.printf("\n  Starting Demo...");
    uLCD.text_width(4); //4X size text
    uLCD.text_height(4);
    uLCD.color(RED);
    for (int i=3; i>=0; --i) {
        uLCD.locate(1,2);
        uLCD.printf("%2D",i);
        wait(1.0);
    }
    uLCD.cls();
    uLCD.locate(1,2);
    uLCD.text_width(1); //4X size text
    uLCD.text_height(2);
    uLCD.printf("This is 4180 final project!!!");
    wait(1.0);
    uLCD.cls();
    uLCD.locate(1,2);
    uLCD.text_width(2); //4X size text
    uLCD.text_height(2);
    uLCD.printf("Waiting for 'START'...");
    
    
    while(1){
        if(device.readable()) {
           led1 = 1;
           c = device.getc();
           words[i] = c;
           uLCD.printf("%c", c);
       i++;
       if(i>4){
                if(words[i]=='t'){
                     if(words[i-1]=='r'){
                          if(words[i-2]=='a'){
                               if(words[i-3]=='t'){
                                     if(words[i-4]=='s'){
                                          flag = 1;
                                     }
                               }
                          }
                     }
                }
           }
          
           if(flag==1){    
                wait(8);     
                uLCD.cls();
                led2 = 1;
                float fx=50.0,fy=21.0,vx=1.0,vy=0.4;
                int x=50,y=21,radius=4;
                uLCD.background_color(BLACK);
                uLCD.cls();
                uLCD.line(0, 0, 127, 0, WHITE);
                uLCD.line(127, 0, 127, 127, WHITE);
                uLCD.line(127, 127, 0, 127, WHITE);
                uLCD.line(0, 127, 0, 0, WHITE);
                for (int i=0; i<1500; i++) {
                    //   draw ball
                    uLCD.filled_circle(x, y, radius, RED);
                    //bounce off edge walls and slow down a bit?
                    if ((x<=radius+1) || (x>=126-radius)) vx = -.90*vx;
                    if ((y<=radius+1) || (y>=126-radius)) vy = -.90*vy;
                    //erase old ball location
                    uLCD.filled_circle(x, y, radius, BLACK);
                    //  move ball
                    fx=fx+vx;
                    fy=fy+vy;
                    x=(int)fx;
                    y=(int)fy;
                }
                wait(0.5);
            }
                    
        }
    }
        
}

<<Speech recognition using Pocketsphinx>>

include the mbed library with this snippet

#include <stdio.h>
#include <string.h>
#include <assert.h>

#if defined(_WIN32) && !defined(__CYGWIN__)
#include <windows.h>
#else
#include <sys/select.h>
#endif

#include <sphinxbase/err.h>
#include <sphinxbase/ad.h>

#include "pocketsphinx.h"

static const arg_t cont_args_def[] = {
    POCKETSPHINX_OPTIONS,
    /* Argument file. */
    {"-argfile",
     ARG_STRING,
     NULL,
     "Argument file giving extra arguments."},
    {"-adcdev",
     ARG_STRING,
     NULL,
     "Name of audio device to use for input."},
    {"-infile",
     ARG_STRING,
     NULL,
     "Audio file to transcribe."},
    {"-inmic",
     ARG_BOOLEAN,
     "no",
     "Transcribe audio from microphone."},
    {"-time",
     ARG_BOOLEAN,
     "no",
     "Print word times in file transcription."},
    CMDLN_EMPTY_OPTION
};

static ps_decoder_t *ps;
static cmd_ln_t *config;
static FILE *rawfd;

static void
print_word_times()
{
    int frame_rate = cmd_ln_int32_r(config, "-frate");
    ps_seg_t *iter = ps_seg_iter(ps, NULL);
    while (iter != NULL) {
        int32 sf, ef, pprob;
        float conf;

        ps_seg_frames(iter, &sf, &ef);
        pprob = ps_seg_prob(iter, NULL, NULL, NULL);
        conf = logmath_exp(ps_get_logmath(ps), pprob);
        printf("%s %.3f %.3f %f\n", ps_seg_word(iter), ((float)sf / frame_rate),
               ((float) ef / frame_rate), conf);
        iter = ps_seg_next(iter);

    }
}

static int
check_wav_header(char *header, int expected_sr)
{
    int sr;

    if (header[34] != 0x10) {
        E_ERROR("Input audio file has [%d] bits per sample instead of 16\n", header[34]);
        return 0;
    }
    if (header[20] != 0x1) {
        E_ERROR("Input audio file has compression [%d] and not required PCM\n", header[20]);
        return 0;
    }
    if (header[22] != 0x1) {
        E_ERROR("Input audio file has [%d] channels, expected single channel mono\n", header[22]);
        return 0;
    }
    sr = ((header[24] & 0xFF) | ((header[25] & 0xFF) << 8) | ((header[26] & 0xFF) << 16) | ((header[27] & 0xFF) << 24));
    if (sr != expected_sr) {
        E_ERROR("Input audio file has sample rate [%d], but decoder expects [%d]\n", sr, expected_sr);
        return 0;
    }
    return 1;
}


/*
 * Continuous recognition from a file
 */
static void
recognize_from_file()
{
    int16 adbuf[2048];
    const char *fname;
    const char *hyp;
    int32 k;
    uint8 utt_started, in_speech;
    int32 print_times = cmd_ln_boolean_r(config, "-time");

    fname = cmd_ln_str_r(config, "-infile");
    if ((rawfd = fopen(fname, "rb")) == NULL) {
        E_FATAL_SYSTEM("Failed to open file '%s' for reading",
                       fname);
    }
    
    if (strlen(fname) > 4 && strcmp(fname + strlen(fname) - 4, ".wav") == 0) {
        char waveheader[44];
	fread(waveheader, 1, 44, rawfd);
	if (!check_wav_header(waveheader, (int)cmd_ln_float32_r(config, "-samprate")))
    	    E_FATAL("Failed to process file '%s' due to format mismatch.\n", fname);
    }
    
    ps_start_utt(ps);
    utt_started = FALSE;

    while ((k = fread(adbuf, sizeof(int16), 2048, rawfd)) > 0) {
        ps_process_raw(ps, adbuf, k, FALSE, FALSE);
        in_speech = ps_get_in_speech(ps);
        if (in_speech && !utt_started) {
            utt_started = TRUE;
        } 
        if (!in_speech && utt_started) {
            ps_end_utt(ps);
            hyp = ps_get_hyp(ps, NULL);
            if (hyp != NULL)
        	printf("%s\n", hyp);
            if (print_times)
        	print_word_times();

            ps_start_utt(ps);
            utt_started = FALSE;
        }
    }
    ps_end_utt(ps);
    if (utt_started) {
        hyp = ps_get_hyp(ps, NULL);
        if (hyp != NULL)
    	    printf("%s\n", hyp);
        if (print_times) {
        print_word_times();
	}
    }
    
    fclose(rawfd);
}

/* Sleep for specified msec */
static void
sleep_msec(int32 ms)
{
#if (defined(_WIN32) && !defined(GNUWINCE)) || defined(_WIN32_WCE)
    Sleep(ms);
#else
    /* ------------------- Unix ------------------ */
    struct timeval tmo;

    tmo.tv_sec = 0;
    tmo.tv_usec = ms * 1000;

    select(0, NULL, NULL, NULL, &tmo);
#endif
}

/*
 * Main utterance processing loop:
 *     for (;;) {
 *        start utterance and wait for speech to process
 *        decoding till end-of-utterance silence will be detected
 *        print utterance result;
 *     }
 */
static void
recognize_from_microphone()
{
    ad_rec_t *ad;
    int16 adbuf[2048];
    uint8 utt_started, in_speech;
    int32 k;
    char const *hyp;

  
    
    if ((ad = ad_open_dev(cmd_ln_str_r(config, "-adcdev"),
                          (int) cmd_ln_float32_r(config,
                                                 "-samprate"))) == NULL)
        E_FATAL("Failed to open audio device\n");
    if (ad_start_rec(ad) < 0)
        E_FATAL("Failed to start recording\n");

    if (ps_start_utt(ps) < 0)
        E_FATAL("Failed to start utterance\n");
    utt_started = FALSE;
    printf("READY....\n");

    for (;;) {
        if ((k = ad_read(ad, adbuf, 2048)) < 0)
            E_FATAL("Failed to read audio\n");
        ps_process_raw(ps, adbuf, k, FALSE, FALSE);
        in_speech = ps_get_in_speech(ps);
        if (in_speech && !utt_started) {
            utt_started = TRUE;
            printf("Listening...\n");
        }
        if (!in_speech && utt_started) {
            /* speech -> silence transition, time to start new utterance  */
            ps_end_utt(ps);
            hyp = ps_get_hyp(ps, NULL );
            if (hyp != NULL)
            {
                printf(" %s\n", hyp);
//                I added for 4180 project
                char c[500];
                strcpy(c,"curl -d \"content=");
                strcat(c,hyp);
                strcat(c,"\" —dump-header headers http://ec2-54-213-4-148.us-west-2.compute.amazonaws.com:8080/hello/");
                system(c);
                
//                
            }

            if (ps_start_utt(ps) < 0)
                E_FATAL("Failed to start utterance\n");
            utt_started = FALSE;
            printf("READY....\n");
            
        }
        sleep_msec(100);

    }
    ad_close(ad);
}

int
main(int argc, char *argv[])
{
    char const *cfg;

    config = cmd_ln_parse_r(NULL, cont_args_def, argc, argv, TRUE);

    /* Handle argument file as -argfile. */
    if (config && (cfg = cmd_ln_str_r(config, "-argfile")) != NULL) {
        config = cmd_ln_parse_file_r(config, cont_args_def, cfg, FALSE);
    }

    if (config == NULL || (cmd_ln_str_r(config, "-infile") == NULL && cmd_ln_boolean_r(config, "-inmic") == FALSE)) {
    E_INFO("Specify '-infile <file.wav>' to recognize from file or '-inmic yes' to recognize from microphone.");
    cmd_ln_free_r(config);
    return 1;
    }

    ps_default_search_args(config);
    ps = ps_init(config);
    if (ps == NULL) {
        cmd_ln_free_r(config);
        return 1;
    }

    E_INFO("%s COMPILED ON: %s, AT: %s\n\n", argv[0], __DATE__, __TIME__);

    if (cmd_ln_str_r(config, "-infile") != NULL) {
        recognize_from_file();
    } else if (cmd_ln_boolean_r(config, "-inmic")) {
        recognize_from_microphone();
    }

    ps_free(ps);
    cmd_ln_free_r(config);

    return 0;
}

#if defined(_WIN32_WCE)
#pragma comment(linker,"/entry:mainWCRTStartup")
#include <windows.h>
//Windows Mobile has the Unicode main only
int
wmain(int32 argc, wchar_t * wargv[])
{
    char **argv;
    size_t wlen;
    size_t len;
    int i;

    argv = malloc(argc * sizeof(char *));
    for (i = 0; i < argc; i++) {
        wlen = lstrlenW(wargv[i]);
        len = wcstombs(NULL, wargv[i], wlen);
        argv[i] = malloc(len + 1);
        wcstombs(argv[i], wargv[i], wlen);
    }

    //assuming ASCII parameters
    return main(argc, argv);
}
#endif

<<Arduino webclient code>>

include the mbed library with this snippet

#include <SPI.h>
#include <SFE_CC3000.h>
#include <SFE_CC3000_Client.h>

// Pins
#define CC3000_INT      2   // Needs to be an interrupt pin (D2/D3)
#define CC3000_EN       7   // Can be any digital pin
#define CC3000_CS       10  // Preferred is pin 10 on Uno

// Connection info data lengths
#define IP_ADDR_LEN     4   // Length of IP address in bytes

// Constants
char ap_ssid[] = " China Unicom";                  // SSID of network
char ap_password[] = "fengzhislc";          // Password of network
unsigned int ap_security = WLAN_SEC_WPA2; // Security of network
unsigned int timeout = 30000;             // Milliseconds
char server[] = "http://ec2-54-213-4-148.us-west-2.compute.amazonaws.com:8080/";        // Remote host site

// Global Variables
SFE_CC3000 wifi = SFE_CC3000(CC3000_INT, CC3000_EN, CC3000_CS);
SFE_CC3000_Client client = SFE_CC3000_Client(wifi);

void setup() {
  
  ConnectionInfo connection_info;
  int i;
  
  // Initialize Serial port
  Serial.begin(115200);
  Serial.println();
  Serial.println("---------------------------");
  Serial.println("SparkFun CC3000 - WebClient");
  Serial.println("---------------------------");
  
  // Initialize CC3000 (configure SPI communications)
  if ( wifi.init() ) {
    Serial.println("CC3000 initialization complete");
  } else {
    Serial.println("Something went wrong during CC3000 init!");
  }
  
  // Connect using DHCP
  Serial.print("Connecting to SSID: ");
  Serial.println(ap_ssid);
  if(!wifi.connect(ap_ssid, ap_security, ap_password, timeout)) {
    Serial.println("Error: Could not connect to AP");
  }
  
  // Gather connection details and print IP address
  if ( !wifi.getConnectionInfo(connection_info) ) {
    Serial.println("Error: Could not obtain connection details");
  } else {
    Serial.print("IP Address: ");
    for (i = 0; i < IP_ADDR_LEN; i++) {
      Serial.print(connection_info.ip_address[i]);
      if ( i < IP_ADDR_LEN - 1 ) {
        Serial.print(".");
      }
    }
    Serial.println();
  }
  
  // Make a TCP connection to remote host
  Serial.print("Performing HTTP GET of: ");
  Serial.println(server);
  if ( !client.connect(server, 80) ) {
    Serial.println("Error: Could not make a TCP connection");
  }
  
  // Make a HTTP GET request
  client.println("GET /index.html HTTP/1.1");
  client.print("Host: ");
  client.println(server);
  client.println("Connection: close");
  client.println();
  Serial.println();
}

void loop() {
  
  // If there are incoming bytes, print them
  if ( client.available() ) {
    char c = client.read();
    Serial.print(c);
  }
  
  // If the server has disconnected, stop the client and wifi
  if ( !client.connected() ) {
    Serial.println();
    
    // Close socket
    if ( !client.close() ) {
      Serial.println("Error: Could not close socket");
    }
    
    // Disconnect WiFi
    if ( !wifi.disconnect() ) {
      Serial.println("Error: Could not disconnect from network");
    }
    
    // Do nothing
    Serial.println("Finished WebClient test");
    while(true){
      delay(1000);
    }
  }
}

Video


Please log in to post comments.