Speech Recognition Control Display Game
Introduction
In this project, we use a speech recognition software PocketSphinix to get the audio signal and convert it to text file. Then we build a server which could update the audio signal. With Arduino and Sparkfun CC3000 wifi Breakout board, we could connect the micro-controller with the internet and fetch information(html files) from the website, and control other activities. Here we use NXP LPC1768 as another micro-controller to control the uLCD display and start a game when the user says the word "START".
Parts
- Speech Recognition: PocketSphinix
- Server: Amazon AWS
- Internet Connection: Sparkfun CC3000 wifi breakboard
- Arduino and NXP LPC 1768
- LCD Display: Sparkfun uLCD
Hardware Hookup
- Connect the mbed with uLCD
- Connect mbed with Arduino Uno Here we use UART for communication between mbed and Arduino, so the hardware hookup is:
mbed TX -> Arduino RX
Arduino TX -> mbed RX
- Connect Arduino with Sparkfun CC3000 Wifi Breakout (CC3000 Breakout Board → Arduino)
GND → GND
VCC → 5V
MOSI → 11
MISO → 12
CS → 10
INT → 2
SCK → 13
EN → 7
Code
- For the Internet connection, we use Arduino and Sparkfun CC3000 Breakout Board. We refer to CC3000 Hookup Guide on the website of sparkfun. https://learn.sparkfun.com/tutorials/cc3000-hookup-guide We use its library and modify parts of it. Here is the code for Webclient which can fetch the html file on an assigned website:
<<Arduino+Sparkfun CC3000Wifi>>
- For the UART transmission between mbed and Arduino, and uLCD display, we use mbed code.
<<mbed UART+ uLCD DIsplay>>
include the mbed library with this snippet
// uLCD-144-G2 demo program for uLCD-4GL LCD driver library // #include "mbed.h" #include "uLCD_4DGL.h" uLCD_4DGL uLCD(p28,p27,p11); // serial tx, serial rx, reset pin; Serial device(p9, p10); // tx, rx Serial pc(USBTX, USBRX); DigitalOut led2(LED2); DigitalOut led1(LED1); int main() { int c; int i = 0; char words[200]; int flag = 0; device.baud(9600); // basic printf demo = 16 by 18 characters on screen uLCD.printf("\nSpeech Recognition!!\n"); //Default Green on black text uLCD.printf("\n Starting Demo..."); uLCD.text_width(4); //4X size text uLCD.text_height(4); uLCD.color(RED); for (int i=3; i>=0; --i) { uLCD.locate(1,2); uLCD.printf("%2D",i); wait(1.0); } uLCD.cls(); uLCD.locate(1,2); uLCD.text_width(1); //4X size text uLCD.text_height(2); uLCD.printf("This is 4180 final project!!!"); wait(1.0); uLCD.cls(); uLCD.locate(1,2); uLCD.text_width(2); //4X size text uLCD.text_height(2); uLCD.printf("Waiting for 'START'..."); while(1){ if(device.readable()) { led1 = 1; c = device.getc(); words[i] = c; uLCD.printf("%c", c); i++; if(i>4){ if(words[i]=='t'){ if(words[i-1]=='r'){ if(words[i-2]=='a'){ if(words[i-3]=='t'){ if(words[i-4]=='s'){ flag = 1; } } } } } } if(flag==1){ wait(8); uLCD.cls(); led2 = 1; float fx=50.0,fy=21.0,vx=1.0,vy=0.4; int x=50,y=21,radius=4; uLCD.background_color(BLACK); uLCD.cls(); uLCD.line(0, 0, 127, 0, WHITE); uLCD.line(127, 0, 127, 127, WHITE); uLCD.line(127, 127, 0, 127, WHITE); uLCD.line(0, 127, 0, 0, WHITE); for (int i=0; i<1500; i++) { // draw ball uLCD.filled_circle(x, y, radius, RED); //bounce off edge walls and slow down a bit? if ((x<=radius+1) || (x>=126-radius)) vx = -.90*vx; if ((y<=radius+1) || (y>=126-radius)) vy = -.90*vy; //erase old ball location uLCD.filled_circle(x, y, radius, BLACK); // move ball fx=fx+vx; fy=fy+vy; x=(int)fx; y=(int)fy; } wait(0.5); } } } }
<<Speech recognition using Pocketsphinx>>
include the mbed library with this snippet
#include <stdio.h> #include <string.h> #include <assert.h> #if defined(_WIN32) && !defined(__CYGWIN__) #include <windows.h> #else #include <sys/select.h> #endif #include <sphinxbase/err.h> #include <sphinxbase/ad.h> #include "pocketsphinx.h" static const arg_t cont_args_def[] = { POCKETSPHINX_OPTIONS, /* Argument file. */ {"-argfile", ARG_STRING, NULL, "Argument file giving extra arguments."}, {"-adcdev", ARG_STRING, NULL, "Name of audio device to use for input."}, {"-infile", ARG_STRING, NULL, "Audio file to transcribe."}, {"-inmic", ARG_BOOLEAN, "no", "Transcribe audio from microphone."}, {"-time", ARG_BOOLEAN, "no", "Print word times in file transcription."}, CMDLN_EMPTY_OPTION }; static ps_decoder_t *ps; static cmd_ln_t *config; static FILE *rawfd; static void print_word_times() { int frame_rate = cmd_ln_int32_r(config, "-frate"); ps_seg_t *iter = ps_seg_iter(ps, NULL); while (iter != NULL) { int32 sf, ef, pprob; float conf; ps_seg_frames(iter, &sf, &ef); pprob = ps_seg_prob(iter, NULL, NULL, NULL); conf = logmath_exp(ps_get_logmath(ps), pprob); printf("%s %.3f %.3f %f\n", ps_seg_word(iter), ((float)sf / frame_rate), ((float) ef / frame_rate), conf); iter = ps_seg_next(iter); } } static int check_wav_header(char *header, int expected_sr) { int sr; if (header[34] != 0x10) { E_ERROR("Input audio file has [%d] bits per sample instead of 16\n", header[34]); return 0; } if (header[20] != 0x1) { E_ERROR("Input audio file has compression [%d] and not required PCM\n", header[20]); return 0; } if (header[22] != 0x1) { E_ERROR("Input audio file has [%d] channels, expected single channel mono\n", header[22]); return 0; } sr = ((header[24] & 0xFF) | ((header[25] & 0xFF) << 8) | ((header[26] & 0xFF) << 16) | ((header[27] & 0xFF) << 24)); if (sr != expected_sr) { E_ERROR("Input audio file has sample rate [%d], but decoder expects [%d]\n", sr, expected_sr); return 0; } return 1; } /* * Continuous recognition from a file */ static void recognize_from_file() { int16 adbuf[2048]; const char *fname; const char *hyp; int32 k; uint8 utt_started, in_speech; int32 print_times = cmd_ln_boolean_r(config, "-time"); fname = cmd_ln_str_r(config, "-infile"); if ((rawfd = fopen(fname, "rb")) == NULL) { E_FATAL_SYSTEM("Failed to open file '%s' for reading", fname); } if (strlen(fname) > 4 && strcmp(fname + strlen(fname) - 4, ".wav") == 0) { char waveheader[44]; fread(waveheader, 1, 44, rawfd); if (!check_wav_header(waveheader, (int)cmd_ln_float32_r(config, "-samprate"))) E_FATAL("Failed to process file '%s' due to format mismatch.\n", fname); } ps_start_utt(ps); utt_started = FALSE; while ((k = fread(adbuf, sizeof(int16), 2048, rawfd)) > 0) { ps_process_raw(ps, adbuf, k, FALSE, FALSE); in_speech = ps_get_in_speech(ps); if (in_speech && !utt_started) { utt_started = TRUE; } if (!in_speech && utt_started) { ps_end_utt(ps); hyp = ps_get_hyp(ps, NULL); if (hyp != NULL) printf("%s\n", hyp); if (print_times) print_word_times(); ps_start_utt(ps); utt_started = FALSE; } } ps_end_utt(ps); if (utt_started) { hyp = ps_get_hyp(ps, NULL); if (hyp != NULL) printf("%s\n", hyp); if (print_times) { print_word_times(); } } fclose(rawfd); } /* Sleep for specified msec */ static void sleep_msec(int32 ms) { #if (defined(_WIN32) && !defined(GNUWINCE)) || defined(_WIN32_WCE) Sleep(ms); #else /* ------------------- Unix ------------------ */ struct timeval tmo; tmo.tv_sec = 0; tmo.tv_usec = ms * 1000; select(0, NULL, NULL, NULL, &tmo); #endif } /* * Main utterance processing loop: * for (;;) { * start utterance and wait for speech to process * decoding till end-of-utterance silence will be detected * print utterance result; * } */ static void recognize_from_microphone() { ad_rec_t *ad; int16 adbuf[2048]; uint8 utt_started, in_speech; int32 k; char const *hyp; if ((ad = ad_open_dev(cmd_ln_str_r(config, "-adcdev"), (int) cmd_ln_float32_r(config, "-samprate"))) == NULL) E_FATAL("Failed to open audio device\n"); if (ad_start_rec(ad) < 0) E_FATAL("Failed to start recording\n"); if (ps_start_utt(ps) < 0) E_FATAL("Failed to start utterance\n"); utt_started = FALSE; printf("READY....\n"); for (;;) { if ((k = ad_read(ad, adbuf, 2048)) < 0) E_FATAL("Failed to read audio\n"); ps_process_raw(ps, adbuf, k, FALSE, FALSE); in_speech = ps_get_in_speech(ps); if (in_speech && !utt_started) { utt_started = TRUE; printf("Listening...\n"); } if (!in_speech && utt_started) { /* speech -> silence transition, time to start new utterance */ ps_end_utt(ps); hyp = ps_get_hyp(ps, NULL ); if (hyp != NULL) { printf(" %s\n", hyp); // I added for 4180 project char c[500]; strcpy(c,"curl -d \"content="); strcat(c,hyp); strcat(c,"\" —dump-header headers http://ec2-54-213-4-148.us-west-2.compute.amazonaws.com:8080/hello/"); system(c); // } if (ps_start_utt(ps) < 0) E_FATAL("Failed to start utterance\n"); utt_started = FALSE; printf("READY....\n"); } sleep_msec(100); } ad_close(ad); } int main(int argc, char *argv[]) { char const *cfg; config = cmd_ln_parse_r(NULL, cont_args_def, argc, argv, TRUE); /* Handle argument file as -argfile. */ if (config && (cfg = cmd_ln_str_r(config, "-argfile")) != NULL) { config = cmd_ln_parse_file_r(config, cont_args_def, cfg, FALSE); } if (config == NULL || (cmd_ln_str_r(config, "-infile") == NULL && cmd_ln_boolean_r(config, "-inmic") == FALSE)) { E_INFO("Specify '-infile <file.wav>' to recognize from file or '-inmic yes' to recognize from microphone."); cmd_ln_free_r(config); return 1; } ps_default_search_args(config); ps = ps_init(config); if (ps == NULL) { cmd_ln_free_r(config); return 1; } E_INFO("%s COMPILED ON: %s, AT: %s\n\n", argv[0], __DATE__, __TIME__); if (cmd_ln_str_r(config, "-infile") != NULL) { recognize_from_file(); } else if (cmd_ln_boolean_r(config, "-inmic")) { recognize_from_microphone(); } ps_free(ps); cmd_ln_free_r(config); return 0; } #if defined(_WIN32_WCE) #pragma comment(linker,"/entry:mainWCRTStartup") #include <windows.h> //Windows Mobile has the Unicode main only int wmain(int32 argc, wchar_t * wargv[]) { char **argv; size_t wlen; size_t len; int i; argv = malloc(argc * sizeof(char *)); for (i = 0; i < argc; i++) { wlen = lstrlenW(wargv[i]); len = wcstombs(NULL, wargv[i], wlen); argv[i] = malloc(len + 1); wcstombs(argv[i], wargv[i], wlen); } //assuming ASCII parameters return main(argc, argv); } #endif
<<Arduino webclient code>>
include the mbed library with this snippet
#include <SPI.h> #include <SFE_CC3000.h> #include <SFE_CC3000_Client.h> // Pins #define CC3000_INT 2 // Needs to be an interrupt pin (D2/D3) #define CC3000_EN 7 // Can be any digital pin #define CC3000_CS 10 // Preferred is pin 10 on Uno // Connection info data lengths #define IP_ADDR_LEN 4 // Length of IP address in bytes // Constants char ap_ssid[] = " China Unicom"; // SSID of network char ap_password[] = "fengzhislc"; // Password of network unsigned int ap_security = WLAN_SEC_WPA2; // Security of network unsigned int timeout = 30000; // Milliseconds char server[] = "http://ec2-54-213-4-148.us-west-2.compute.amazonaws.com:8080/"; // Remote host site // Global Variables SFE_CC3000 wifi = SFE_CC3000(CC3000_INT, CC3000_EN, CC3000_CS); SFE_CC3000_Client client = SFE_CC3000_Client(wifi); void setup() { ConnectionInfo connection_info; int i; // Initialize Serial port Serial.begin(115200); Serial.println(); Serial.println("---------------------------"); Serial.println("SparkFun CC3000 - WebClient"); Serial.println("---------------------------"); // Initialize CC3000 (configure SPI communications) if ( wifi.init() ) { Serial.println("CC3000 initialization complete"); } else { Serial.println("Something went wrong during CC3000 init!"); } // Connect using DHCP Serial.print("Connecting to SSID: "); Serial.println(ap_ssid); if(!wifi.connect(ap_ssid, ap_security, ap_password, timeout)) { Serial.println("Error: Could not connect to AP"); } // Gather connection details and print IP address if ( !wifi.getConnectionInfo(connection_info) ) { Serial.println("Error: Could not obtain connection details"); } else { Serial.print("IP Address: "); for (i = 0; i < IP_ADDR_LEN; i++) { Serial.print(connection_info.ip_address[i]); if ( i < IP_ADDR_LEN - 1 ) { Serial.print("."); } } Serial.println(); } // Make a TCP connection to remote host Serial.print("Performing HTTP GET of: "); Serial.println(server); if ( !client.connect(server, 80) ) { Serial.println("Error: Could not make a TCP connection"); } // Make a HTTP GET request client.println("GET /index.html HTTP/1.1"); client.print("Host: "); client.println(server); client.println("Connection: close"); client.println(); Serial.println(); } void loop() { // If there are incoming bytes, print them if ( client.available() ) { char c = client.read(); Serial.print(c); } // If the server has disconnected, stop the client and wifi if ( !client.connected() ) { Serial.println(); // Close socket if ( !client.close() ) { Serial.println("Error: Could not close socket"); } // Disconnect WiFi if ( !wifi.disconnect() ) { Serial.println("Error: Could not disconnect from network"); } // Do nothing Serial.println("Finished WebClient test"); while(true){ delay(1000); } } }
Video
5 comments on Speech Recognition Control Display Game :
Please log in to post comments.
Speech Recognition Control Display Game ... Here we use NXP LPC1768 as another micro-controller to control the uLCD display and start a game ...https://www.gtopcars.com/makers/lotus/2022-lotus-elise/