Users » ZhiFeng » Notebook » Speech Recognition Control Display Game

Speech Recognition Control Display Game

Page last updated 02 May 2015, by Zhi Feng. 5 replies

Introduction

In this project, we use a speech recognition software PocketSphinix to get the audio signal and convert it to text file. Then we build a server which could update the audio signal. With Arduino and Sparkfun CC3000 wifi Breakout board, we could connect the micro-controller with the internet and fetch information(html files) from the website, and control other activities. Here we use NXP LPC1768 as another micro-controller to control the uLCD display and start a game when the user says the word "START".

Parts

Speech Recognition: PocketSphinix

Server: Amazon AWS

Internet Connection: Sparkfun CC3000 wifi breakboard
Arduino and NXP LPC 1768
LCD Display: Sparkfun uLCD

Hardware Hookup

Connect the mbed with uLCD
Connect mbed with Arduino Uno Here we use UART for communication between mbed and Arduino, so the hardware hookup is：

mbed TX -> Arduino RX

Arduino TX -> mbed RX

Connect Arduino with Sparkfun CC3000 Wifi Breakout (CC3000 Breakout Board → Arduino)

GND → GND

VCC → 5V

MOSI → 11

MISO → 12

CS → 10

INT → 2

SCK → 13

EN → 7

/media/uploads/ZhiFeng/capture_VAST0us.jpg

Code

For the Internet connection, we use Arduino and Sparkfun CC3000 Breakout Board. We refer to CC3000 Hookup Guide on the website of sparkfun. https://learn.sparkfun.com/tutorials/cc3000-hookup-guide We use its library and modify parts of it. Here is the code for Webclient which can fetch the html file on an assigned website:

<<Arduino+Sparkfun CC3000Wifi>>

For the UART transmission between mbed and Arduino, and uLCD display, we use mbed code.

<<mbed UART+ uLCD DIsplay>>

include the mbed library with this snippet

// uLCD-144-G2 demo program for uLCD-4GL LCD driver library
//
#include "mbed.h"
#include "uLCD_4DGL.h"

uLCD_4DGL uLCD(p28,p27,p11); // serial tx, serial rx, reset pin;

Serial device(p9, p10);  // tx, rx
Serial pc(USBTX, USBRX); 
DigitalOut led2(LED2);
DigitalOut led1(LED1);

int main()
{
    int c;
    int i = 0;
    char words[200]; 
    int flag = 0;
    device.baud(9600);
    // basic printf demo = 16 by 18 characters on screen
    uLCD.printf("\nSpeech Recognition!!\n"); //Default Green on black text
    uLCD.printf("\n  Starting Demo...");
    uLCD.text_width(4); //4X size text
    uLCD.text_height(4);
    uLCD.color(RED);
    for (int i=3; i>=0; --i) {
        uLCD.locate(1,2);
        uLCD.printf("%2D",i);
        wait(1.0);
    }
    uLCD.cls();
    uLCD.locate(1,2);
    uLCD.text_width(1); //4X size text
    uLCD.text_height(2);
    uLCD.printf("This is 4180 final project!!!");
    wait(1.0);
    uLCD.cls();
    uLCD.locate(1,2);
    uLCD.text_width(2); //4X size text
    uLCD.text_height(2);
    uLCD.printf("Waiting for 'START'...");
    
    
    while(1){
        if(device.readable()) {
           led1 = 1;
           c = device.getc();
           words[i] = c;
           uLCD.printf("%c", c);
       i++;
       if(i>4){
                if(words[i]=='t'){
                     if(words[i-1]=='r'){
                          if(words[i-2]=='a'){
                               if(words[i-3]=='t'){
                                     if(words[i-4]=='s'){
                                          flag = 1;
                                     }
                               }
                          }
                     }
                }
           }
          
           if(flag==1){    
                wait(8);     
                uLCD.cls();
                led2 = 1;
                float fx=50.0,fy=21.0,vx=1.0,vy=0.4;
                int x=50,y=21,radius=4;
                uLCD.background_color(BLACK);
                uLCD.cls();
                uLCD.line(0, 0, 127, 0, WHITE);
                uLCD.line(127, 0, 127, 127, WHITE);
                uLCD.line(127, 127, 0, 127, WHITE);
                uLCD.line(0, 127, 0, 0, WHITE);
                for (int i=0; i<1500; i++) {
                    //   draw ball
                    uLCD.filled_circle(x, y, radius, RED);
                    //bounce off edge walls and slow down a bit?
                    if ((x<=radius+1) || (x>=126-radius)) vx = -.90*vx;
                    if ((y<=radius+1) || (y>=126-radius)) vy = -.90*vy;
                    //erase old ball location
                    uLCD.filled_circle(x, y, radius, BLACK);
                    //  move ball
                    fx=fx+vx;
                    fy=fy+vy;
                    x=(int)fx;
                    y=(int)fy;
                }
                wait(0.5);
            }
                    
        }
    }
        
}

<<Speech recognition using Pocketsphinx>>

include the mbed library with this snippet

#include <stdio.h>
#include <string.h>
#include <assert.h>

#if defined(_WIN32) && !defined(__CYGWIN__)
#include <windows.h>
#else
#include <sys/select.h>
#endif

#include <sphinxbase/err.h>
#include <sphinxbase/ad.h>

#include "pocketsphinx.h"

static const arg_t cont_args_def[] = {
    POCKETSPHINX_OPTIONS,
    /* Argument file. */
    {"-argfile",
     ARG_STRING,
     NULL,
     "Argument file giving extra arguments."},
    {"-adcdev",
     ARG_STRING,
     NULL,
     "Name of audio device to use for input."},
    {"-infile",
     ARG_STRING,
     NULL,
     "Audio file to transcribe."},
    {"-inmic",
     ARG_BOOLEAN,
     "no",
     "Transcribe audio from microphone."},
    {"-time",
     ARG_BOOLEAN,
     "no",
     "Print word times in file transcription."},
    CMDLN_EMPTY_OPTION
};

static ps_decoder_t *ps;
static cmd_ln_t *config;
static FILE *rawfd;

static void
print_word_times()
{
    int frame_rate = cmd_ln_int32_r(config, "-frate");
    ps_seg_t *iter = ps_seg_iter(ps, NULL);
    while (iter != NULL) {
        int32 sf, ef, pprob;
        float conf;

        ps_seg_frames(iter, &sf, &ef);
        pprob = ps_seg_prob(iter, NULL, NULL, NULL);
        conf = logmath_exp(ps_get_logmath(ps), pprob);
        printf("%s %.3f %.3f %f\n", ps_seg_word(iter), ((float)sf / frame_rate),
               ((float) ef / frame_rate), conf);
        iter = ps_seg_next(iter);

    }
}

static int
check_wav_header(char *header, int expected_sr)
{
    int sr;

    if (header[34] != 0x10) {
        E_ERROR("Input audio file has [%d] bits per sample instead of 16\n", header[34]);
        return 0;
    }
    if (header[20] != 0x1) {
        E_ERROR("Input audio file has compression [%d] and not required PCM\n", header[20]);
        return 0;
    }
    if (header[22] != 0x1) {
        E_ERROR("Input audio file has [%d] channels, expected single channel mono\n", header[22]);
        return 0;
    }
    sr = ((header[24] & 0xFF) | ((header[25] & 0xFF) << 8) | ((header[26] & 0xFF) << 16) | ((header[27] & 0xFF) << 24));
    if (sr != expected_sr) {
        E_ERROR("Input audio file has sample rate [%d], but decoder expects [%d]\n", sr, expected_sr);
        return 0;
    }
    return 1;
}


/*
 * Continuous recognition from a file
 */
static void
recognize_from_file()
{
    int16 adbuf[2048];
    const char *fname;
    const char *hyp;
    int32 k;
    uint8 utt_started, in_speech;
    int32 print_times = cmd_ln_boolean_r(config, "-time");

    fname = cmd_ln_str_r(config, "-infile");
    if ((rawfd = fopen(fname, "rb")) == NULL) {
        E_FATAL_SYSTEM("Failed to open file '%s' for reading",
                       fname);
    }
    
    if (strlen(fname) > 4 && strcmp(fname + strlen(fname) - 4, ".wav") == 0) {
        char waveheader[44];
	fread(waveheader, 1, 44, rawfd);
	if (!check_wav_header(waveheader, (int)cmd_ln_float32_r(config, "-samprate")))
    	    E_FATAL("Failed to process file '%s' due to format mismatch.\n", fname);
    }
    
    ps_start_utt(ps);
    utt_started = FALSE;

    while ((k = fread(adbuf, sizeof(int16), 2048, rawfd)) > 0) {
        ps_process_raw(ps, adbuf, k, FALSE, FALSE);
        in_speech = ps_get_in_speech(ps);
        if (in_speech && !utt_started) {
            utt_started = TRUE;
        } 
        if (!in_speech && utt_started) {
            ps_end_utt(ps);
            hyp = ps_get_hyp(ps, NULL);
            if (hyp != NULL)
        	printf("%s\n", hyp);
            if (print_times)
        	print_word_times();

            ps_start_utt(ps);
            utt_started = FALSE;
        }
    }
    ps_end_utt(ps);
    if (utt_started) {
        hyp = ps_get_hyp(ps, NULL);
        if (hyp != NULL)
    	    printf("%s\n", hyp);
        if (print_times) {
        print_word_times();
	}
    }
    
    fclose(rawfd);
}

/* Sleep for specified msec */
static void
sleep_msec(int32 ms)
{
#if (defined(_WIN32) && !defined(GNUWINCE)) || defined(_WIN32_WCE)
    Sleep(ms);
#else
    /* ------------------- Unix ------------------ */
    struct timeval tmo;

    tmo.tv_sec = 0;
    tmo.tv_usec = ms * 1000;

    select(0, NULL, NULL, NULL, &tmo);
#endif
}

/*
 * Main utterance processing loop:
 *     for (;;) {
 *        start utterance and wait for speech to process
 *        decoding till end-of-utterance silence will be detected
 *        print utterance result;
 *     }
 */
static void
recognize_from_microphone()
{
    ad_rec_t *ad;
    int16 adbuf[2048];
    uint8 utt_started, in_speech;
    int32 k;
    char const *hyp;

  
    
    if ((ad = ad_open_dev(cmd_ln_str_r(config, "-adcdev"),
                          (int) cmd_ln_float32_r(config,
                                                 "-samprate"))) == NULL)
        E_FATAL("Failed to open audio device\n");
    if (ad_start_rec(ad) < 0)
        E_FATAL("Failed to start recording\n");

    if (ps_start_utt(ps) < 0)
        E_FATAL("Failed to start utterance\n");
    utt_started = FALSE;
    printf("READY....\n");

    for (;;) {
        if ((k = ad_read(ad, adbuf, 2048)) < 0)
            E_FATAL("Failed to read audio\n");
        ps_process_raw(ps, adbuf, k, FALSE, FALSE);
        in_speech = ps_get_in_speech(ps);
        if (in_speech && !utt_started) {
            utt_started = TRUE;
            printf("Listening...\n");
        }
        if (!in_speech && utt_started) {
            /* speech -> silence transition, time to start new utterance  */
            ps_end_utt(ps);
            hyp = ps_get_hyp(ps, NULL );
            if (hyp != NULL)
            {
                printf(" %s\n", hyp);
//                I added for 4180 project
                char c[500];
                strcpy(c,"curl -d \"content=");
                strcat(c,hyp);
                strcat(c,"\" —dump-header headers http://ec2-54-213-4-148.us-west-2.compute.amazonaws.com:8080/hello/");
                system(c);
                
//                
            }

            if (ps_start_utt(ps) < 0)
                E_FATAL("Failed to start utterance\n");
            utt_started = FALSE;
            printf("READY....\n");
            
        }
        sleep_msec(100);

    }
    ad_close(ad);
}

int
main(int argc, char *argv[])
{
    char const *cfg;

    config = cmd_ln_parse_r(NULL, cont_args_def, argc, argv, TRUE);

    /* Handle argument file as -argfile. */
    if (config && (cfg = cmd_ln_str_r(config, "-argfile")) != NULL) {
        config = cmd_ln_parse_file_r(config, cont_args_def, cfg, FALSE);
    }

    if (config == NULL || (cmd_ln_str_r(config, "-infile") == NULL && cmd_ln_boolean_r(config, "-inmic") == FALSE)) {
    E_INFO("Specify '-infile <file.wav>' to recognize from file or '-inmic yes' to recognize from microphone.");
    cmd_ln_free_r(config);
    return 1;
    }

    ps_default_search_args(config);
    ps = ps_init(config);
    if (ps == NULL) {
        cmd_ln_free_r(config);
        return 1;
    }

    E_INFO("%s COMPILED ON: %s, AT: %s\n\n", argv[0], __DATE__, __TIME__);

    if (cmd_ln_str_r(config, "-infile") != NULL) {
        recognize_from_file();
    } else if (cmd_ln_boolean_r(config, "-inmic")) {
        recognize_from_microphone();
    }

    ps_free(ps);
    cmd_ln_free_r(config);

    return 0;
}

#if defined(_WIN32_WCE)
#pragma comment(linker,"/entry:mainWCRTStartup")
#include <windows.h>
//Windows Mobile has the Unicode main only
int
wmain(int32 argc, wchar_t * wargv[])
{
    char **argv;
    size_t wlen;
    size_t len;
    int i;

    argv = malloc(argc * sizeof(char *));
    for (i = 0; i < argc; i++) {
        wlen = lstrlenW(wargv[i]);
        len = wcstombs(NULL, wargv[i], wlen);
        argv[i] = malloc(len + 1);
        wcstombs(argv[i], wargv[i], wlen);
    }

    //assuming ASCII parameters
    return main(argc, argv);
}
#endif

<<Arduino webclient code>>

include the mbed library with this snippet

#include <SPI.h>
#include <SFE_CC3000.h>
#include <SFE_CC3000_Client.h>

// Pins
#define CC3000_INT      2   // Needs to be an interrupt pin (D2/D3)
#define CC3000_EN       7   // Can be any digital pin
#define CC3000_CS       10  // Preferred is pin 10 on Uno

// Connection info data lengths
#define IP_ADDR_LEN     4   // Length of IP address in bytes

// Constants
char ap_ssid[] = " China Unicom";                  // SSID of network
char ap_password[] = "fengzhislc";          // Password of network
unsigned int ap_security = WLAN_SEC_WPA2; // Security of network
unsigned int timeout = 30000;             // Milliseconds
char server[] = "http://ec2-54-213-4-148.us-west-2.compute.amazonaws.com:8080/";        // Remote host site

// Global Variables
SFE_CC3000 wifi = SFE_CC3000(CC3000_INT, CC3000_EN, CC3000_CS);
SFE_CC3000_Client client = SFE_CC3000_Client(wifi);

void setup() {
  
  ConnectionInfo connection_info;
  int i;
  
  // Initialize Serial port
  Serial.begin(115200);
  Serial.println();
  Serial.println("---------------------------");
  Serial.println("SparkFun CC3000 - WebClient");
  Serial.println("---------------------------");
  
  // Initialize CC3000 (configure SPI communications)
  if ( wifi.init() ) {
    Serial.println("CC3000 initialization complete");
  } else {
    Serial.println("Something went wrong during CC3000 init!");
  }
  
  // Connect using DHCP
  Serial.print("Connecting to SSID: ");
  Serial.println(ap_ssid);
  if(!wifi.connect(ap_ssid, ap_security, ap_password, timeout)) {
    Serial.println("Error: Could not connect to AP");
  }
  
  // Gather connection details and print IP address
  if ( !wifi.getConnectionInfo(connection_info) ) {
    Serial.println("Error: Could not obtain connection details");
  } else {
    Serial.print("IP Address: ");
    for (i = 0; i < IP_ADDR_LEN; i++) {
      Serial.print(connection_info.ip_address[i]);
      if ( i < IP_ADDR_LEN - 1 ) {
        Serial.print(".");
      }
    }
    Serial.println();
  }
  
  // Make a TCP connection to remote host
  Serial.print("Performing HTTP GET of: ");
  Serial.println(server);
  if ( !client.connect(server, 80) ) {
    Serial.println("Error: Could not make a TCP connection");
  }
  
  // Make a HTTP GET request
  client.println("GET /index.html HTTP/1.1");
  client.print("Host: ");
  client.println(server);
  client.println("Connection: close");
  client.println();
  Serial.println();
}

void loop() {
  
  // If there are incoming bytes, print them
  if ( client.available() ) {
    char c = client.read();
    Serial.print(c);
  }
  
  // If the server has disconnected, stop the client and wifi
  if ( !client.connected() ) {
    Serial.println();
    
    // Close socket
    if ( !client.close() ) {
      Serial.println("Error: Could not close socket");
    }
    
    // Disconnect WiFi
    if ( !wifi.disconnect() ) {
      Serial.println("Error: Could not disconnect from network");
    }
    
    // Do nothing
    Serial.println("Finished WebClient test");
    while(true){
      delay(1000);
    }
  }
}

Video

5 comments on Speech Recognition Control Display Game :

top car

# 12 Jan 2022 This post is awaiting moderation

Speech Recognition Control Display Game ... Here we use NXP LPC1768 as another micro-controller to control the uLCD display and start a game ...https://www.gtopcars.com/makers/lotus/2022-lotus-elise/

Jerry Visser

# 22 Aug 2022 This post is awaiting moderation

16 Games That Use Voice Recognition ; 15 Escape The Ayuwoki. Escape The Ayuwoki Gameplay ; 14 Welcome To The Game. Welcome To The Game Starting ...https://www.thyene.com/

Rose Lee

# 22 Aug 2022 This post is awaiting moderation

Snake Game in Python using Pygame which is free and open-source Python library used to create games.Create snake,add food,increase snake ...https://haniem.com/

John Woodard

# 24 Aug 2022 This post is awaiting moderation

Voice controlled game allow you to command your character and interact with others through speech. Here's a look at this popular new feaure....https://aneony.com/

Luuna Luna Luna

# 28 Nov 2022 This post is awaiting moderation

Thanks for the post! It's really interesting as is https://ulive.chat/chatroulette-nl.html . There you can communicate with different people, it's cool

Please log in to post comments.

Speech Recognition Control Display Game

Introduction

Parts

Hardware Hookup

Code

include the mbed library with this snippet

include the mbed library with this snippet

include the mbed library with this snippet

Video

5 comments on Speech Recognition Control Display Game :

Speech Recognition Control Display Game

Important Information for this Arm website

Access Warning