Speech Recognition Control Display Game
Introduction
In this project, we use a speech recognition software PocketSphinix to get the audio signal and convert it to text file. Then we build a server which could update the audio signal. With Arduino and Sparkfun CC3000 wifi Breakout board, we could connect the micro-controller with the internet and fetch information(html files) from the website, and control other activities. Here we use NXP LPC1768 as another micro-controller to control the uLCD display and start a game when the user says the word "START".
Parts
- Speech Recognition: PocketSphinix
- Server: Amazon AWS
- Internet Connection: Sparkfun CC3000 wifi breakboard
- Arduino and NXP LPC 1768
- LCD Display: Sparkfun uLCD
Hardware Hookup
- Connect the mbed with uLCD
- Connect mbed with Arduino Uno Here we use UART for communication between mbed and Arduino, so the hardware hookup is:
mbed TX -> Arduino RX
Arduino TX -> mbed RX
- Connect Arduino with Sparkfun CC3000 Wifi Breakout (CC3000 Breakout Board → Arduino)
GND → GND
VCC → 5V
MOSI → 11
MISO → 12
CS → 10
INT → 2
SCK → 13
EN → 7

Code
- For the Internet connection, we use Arduino and Sparkfun CC3000 Breakout Board. We refer to CC3000 Hookup Guide on the website of sparkfun. https://learn.sparkfun.com/tutorials/cc3000-hookup-guide We use its library and modify parts of it. Here is the code for Webclient which can fetch the html file on an assigned website:
<<Arduino+Sparkfun CC3000Wifi>>
- For the UART transmission between mbed and Arduino, and uLCD display, we use mbed code.
<<mbed UART+ uLCD DIsplay>>
include the mbed library with this snippet
// uLCD-144-G2 demo program for uLCD-4GL LCD driver library
//
#include "mbed.h"
#include "uLCD_4DGL.h"
uLCD_4DGL uLCD(p28,p27,p11); // serial tx, serial rx, reset pin;
Serial device(p9, p10); // tx, rx
Serial pc(USBTX, USBRX);
DigitalOut led2(LED2);
DigitalOut led1(LED1);
int main()
{
int c;
int i = 0;
char words[200];
int flag = 0;
device.baud(9600);
// basic printf demo = 16 by 18 characters on screen
uLCD.printf("\nSpeech Recognition!!\n"); //Default Green on black text
uLCD.printf("\n Starting Demo...");
uLCD.text_width(4); //4X size text
uLCD.text_height(4);
uLCD.color(RED);
for (int i=3; i>=0; --i) {
uLCD.locate(1,2);
uLCD.printf("%2D",i);
wait(1.0);
}
uLCD.cls();
uLCD.locate(1,2);
uLCD.text_width(1); //4X size text
uLCD.text_height(2);
uLCD.printf("This is 4180 final project!!!");
wait(1.0);
uLCD.cls();
uLCD.locate(1,2);
uLCD.text_width(2); //4X size text
uLCD.text_height(2);
uLCD.printf("Waiting for 'START'...");
while(1){
if(device.readable()) {
led1 = 1;
c = device.getc();
words[i] = c;
uLCD.printf("%c", c);
i++;
if(i>4){
if(words[i]=='t'){
if(words[i-1]=='r'){
if(words[i-2]=='a'){
if(words[i-3]=='t'){
if(words[i-4]=='s'){
flag = 1;
}
}
}
}
}
}
if(flag==1){
wait(8);
uLCD.cls();
led2 = 1;
float fx=50.0,fy=21.0,vx=1.0,vy=0.4;
int x=50,y=21,radius=4;
uLCD.background_color(BLACK);
uLCD.cls();
uLCD.line(0, 0, 127, 0, WHITE);
uLCD.line(127, 0, 127, 127, WHITE);
uLCD.line(127, 127, 0, 127, WHITE);
uLCD.line(0, 127, 0, 0, WHITE);
for (int i=0; i<1500; i++) {
// draw ball
uLCD.filled_circle(x, y, radius, RED);
//bounce off edge walls and slow down a bit?
if ((x<=radius+1) || (x>=126-radius)) vx = -.90*vx;
if ((y<=radius+1) || (y>=126-radius)) vy = -.90*vy;
//erase old ball location
uLCD.filled_circle(x, y, radius, BLACK);
// move ball
fx=fx+vx;
fy=fy+vy;
x=(int)fx;
y=(int)fy;
}
wait(0.5);
}
}
}
}
<<Speech recognition using Pocketsphinx>>
include the mbed library with this snippet
#include <stdio.h>
#include <string.h>
#include <assert.h>
#if defined(_WIN32) && !defined(__CYGWIN__)
#include <windows.h>
#else
#include <sys/select.h>
#endif
#include <sphinxbase/err.h>
#include <sphinxbase/ad.h>
#include "pocketsphinx.h"
static const arg_t cont_args_def[] = {
POCKETSPHINX_OPTIONS,
/* Argument file. */
{"-argfile",
ARG_STRING,
NULL,
"Argument file giving extra arguments."},
{"-adcdev",
ARG_STRING,
NULL,
"Name of audio device to use for input."},
{"-infile",
ARG_STRING,
NULL,
"Audio file to transcribe."},
{"-inmic",
ARG_BOOLEAN,
"no",
"Transcribe audio from microphone."},
{"-time",
ARG_BOOLEAN,
"no",
"Print word times in file transcription."},
CMDLN_EMPTY_OPTION
};
static ps_decoder_t *ps;
static cmd_ln_t *config;
static FILE *rawfd;
static void
print_word_times()
{
int frame_rate = cmd_ln_int32_r(config, "-frate");
ps_seg_t *iter = ps_seg_iter(ps, NULL);
while (iter != NULL) {
int32 sf, ef, pprob;
float conf;
ps_seg_frames(iter, &sf, &ef);
pprob = ps_seg_prob(iter, NULL, NULL, NULL);
conf = logmath_exp(ps_get_logmath(ps), pprob);
printf("%s %.3f %.3f %f\n", ps_seg_word(iter), ((float)sf / frame_rate),
((float) ef / frame_rate), conf);
iter = ps_seg_next(iter);
}
}
static int
check_wav_header(char *header, int expected_sr)
{
int sr;
if (header[34] != 0x10) {
E_ERROR("Input audio file has [%d] bits per sample instead of 16\n", header[34]);
return 0;
}
if (header[20] != 0x1) {
E_ERROR("Input audio file has compression [%d] and not required PCM\n", header[20]);
return 0;
}
if (header[22] != 0x1) {
E_ERROR("Input audio file has [%d] channels, expected single channel mono\n", header[22]);
return 0;
}
sr = ((header[24] & 0xFF) | ((header[25] & 0xFF) << 8) | ((header[26] & 0xFF) << 16) | ((header[27] & 0xFF) << 24));
if (sr != expected_sr) {
E_ERROR("Input audio file has sample rate [%d], but decoder expects [%d]\n", sr, expected_sr);
return 0;
}
return 1;
}
/*
* Continuous recognition from a file
*/
static void
recognize_from_file()
{
int16 adbuf[2048];
const char *fname;
const char *hyp;
int32 k;
uint8 utt_started, in_speech;
int32 print_times = cmd_ln_boolean_r(config, "-time");
fname = cmd_ln_str_r(config, "-infile");
if ((rawfd = fopen(fname, "rb")) == NULL) {
E_FATAL_SYSTEM("Failed to open file '%s' for reading",
fname);
}
if (strlen(fname) > 4 && strcmp(fname + strlen(fname) - 4, ".wav") == 0) {
char waveheader[44];
fread(waveheader, 1, 44, rawfd);
if (!check_wav_header(waveheader, (int)cmd_ln_float32_r(config, "-samprate")))
E_FATAL("Failed to process file '%s' due to format mismatch.\n", fname);
}
ps_start_utt(ps);
utt_started = FALSE;
while ((k = fread(adbuf, sizeof(int16), 2048, rawfd)) > 0) {
ps_process_raw(ps, adbuf, k, FALSE, FALSE);
in_speech = ps_get_in_speech(ps);
if (in_speech && !utt_started) {
utt_started = TRUE;
}
if (!in_speech && utt_started) {
ps_end_utt(ps);
hyp = ps_get_hyp(ps, NULL);
if (hyp != NULL)
printf("%s\n", hyp);
if (print_times)
print_word_times();
ps_start_utt(ps);
utt_started = FALSE;
}
}
ps_end_utt(ps);
if (utt_started) {
hyp = ps_get_hyp(ps, NULL);
if (hyp != NULL)
printf("%s\n", hyp);
if (print_times) {
print_word_times();
}
}
fclose(rawfd);
}
/* Sleep for specified msec */
static void
sleep_msec(int32 ms)
{
#if (defined(_WIN32) && !defined(GNUWINCE)) || defined(_WIN32_WCE)
Sleep(ms);
#else
/* ------------------- Unix ------------------ */
struct timeval tmo;
tmo.tv_sec = 0;
tmo.tv_usec = ms * 1000;
select(0, NULL, NULL, NULL, &tmo);
#endif
}
/*
* Main utterance processing loop:
* for (;;) {
* start utterance and wait for speech to process
* decoding till end-of-utterance silence will be detected
* print utterance result;
* }
*/
static void
recognize_from_microphone()
{
ad_rec_t *ad;
int16 adbuf[2048];
uint8 utt_started, in_speech;
int32 k;
char const *hyp;
if ((ad = ad_open_dev(cmd_ln_str_r(config, "-adcdev"),
(int) cmd_ln_float32_r(config,
"-samprate"))) == NULL)
E_FATAL("Failed to open audio device\n");
if (ad_start_rec(ad) < 0)
E_FATAL("Failed to start recording\n");
if (ps_start_utt(ps) < 0)
E_FATAL("Failed to start utterance\n");
utt_started = FALSE;
printf("READY....\n");
for (;;) {
if ((k = ad_read(ad, adbuf, 2048)) < 0)
E_FATAL("Failed to read audio\n");
ps_process_raw(ps, adbuf, k, FALSE, FALSE);
in_speech = ps_get_in_speech(ps);
if (in_speech && !utt_started) {
utt_started = TRUE;
printf("Listening...\n");
}
if (!in_speech && utt_started) {
/* speech -> silence transition, time to start new utterance */
ps_end_utt(ps);
hyp = ps_get_hyp(ps, NULL );
if (hyp != NULL)
{
printf(" %s\n", hyp);
// I added for 4180 project
char c[500];
strcpy(c,"curl -d \"content=");
strcat(c,hyp);
strcat(c,"\" —dump-header headers http://ec2-54-213-4-148.us-west-2.compute.amazonaws.com:8080/hello/");
system(c);
//
}
if (ps_start_utt(ps) < 0)
E_FATAL("Failed to start utterance\n");
utt_started = FALSE;
printf("READY....\n");
}
sleep_msec(100);
}
ad_close(ad);
}
int
main(int argc, char *argv[])
{
char const *cfg;
config = cmd_ln_parse_r(NULL, cont_args_def, argc, argv, TRUE);
/* Handle argument file as -argfile. */
if (config && (cfg = cmd_ln_str_r(config, "-argfile")) != NULL) {
config = cmd_ln_parse_file_r(config, cont_args_def, cfg, FALSE);
}
if (config == NULL || (cmd_ln_str_r(config, "-infile") == NULL && cmd_ln_boolean_r(config, "-inmic") == FALSE)) {
E_INFO("Specify '-infile <file.wav>' to recognize from file or '-inmic yes' to recognize from microphone.");
cmd_ln_free_r(config);
return 1;
}
ps_default_search_args(config);
ps = ps_init(config);
if (ps == NULL) {
cmd_ln_free_r(config);
return 1;
}
E_INFO("%s COMPILED ON: %s, AT: %s\n\n", argv[0], __DATE__, __TIME__);
if (cmd_ln_str_r(config, "-infile") != NULL) {
recognize_from_file();
} else if (cmd_ln_boolean_r(config, "-inmic")) {
recognize_from_microphone();
}
ps_free(ps);
cmd_ln_free_r(config);
return 0;
}
#if defined(_WIN32_WCE)
#pragma comment(linker,"/entry:mainWCRTStartup")
#include <windows.h>
//Windows Mobile has the Unicode main only
int
wmain(int32 argc, wchar_t * wargv[])
{
char **argv;
size_t wlen;
size_t len;
int i;
argv = malloc(argc * sizeof(char *));
for (i = 0; i < argc; i++) {
wlen = lstrlenW(wargv[i]);
len = wcstombs(NULL, wargv[i], wlen);
argv[i] = malloc(len + 1);
wcstombs(argv[i], wargv[i], wlen);
}
//assuming ASCII parameters
return main(argc, argv);
}
#endif
<<Arduino webclient code>>
include the mbed library with this snippet
#include <SPI.h>
#include <SFE_CC3000.h>
#include <SFE_CC3000_Client.h>
// Pins
#define CC3000_INT 2 // Needs to be an interrupt pin (D2/D3)
#define CC3000_EN 7 // Can be any digital pin
#define CC3000_CS 10 // Preferred is pin 10 on Uno
// Connection info data lengths
#define IP_ADDR_LEN 4 // Length of IP address in bytes
// Constants
char ap_ssid[] = " China Unicom"; // SSID of network
char ap_password[] = "fengzhislc"; // Password of network
unsigned int ap_security = WLAN_SEC_WPA2; // Security of network
unsigned int timeout = 30000; // Milliseconds
char server[] = "http://ec2-54-213-4-148.us-west-2.compute.amazonaws.com:8080/"; // Remote host site
// Global Variables
SFE_CC3000 wifi = SFE_CC3000(CC3000_INT, CC3000_EN, CC3000_CS);
SFE_CC3000_Client client = SFE_CC3000_Client(wifi);
void setup() {
ConnectionInfo connection_info;
int i;
// Initialize Serial port
Serial.begin(115200);
Serial.println();
Serial.println("---------------------------");
Serial.println("SparkFun CC3000 - WebClient");
Serial.println("---------------------------");
// Initialize CC3000 (configure SPI communications)
if ( wifi.init() ) {
Serial.println("CC3000 initialization complete");
} else {
Serial.println("Something went wrong during CC3000 init!");
}
// Connect using DHCP
Serial.print("Connecting to SSID: ");
Serial.println(ap_ssid);
if(!wifi.connect(ap_ssid, ap_security, ap_password, timeout)) {
Serial.println("Error: Could not connect to AP");
}
// Gather connection details and print IP address
if ( !wifi.getConnectionInfo(connection_info) ) {
Serial.println("Error: Could not obtain connection details");
} else {
Serial.print("IP Address: ");
for (i = 0; i < IP_ADDR_LEN; i++) {
Serial.print(connection_info.ip_address[i]);
if ( i < IP_ADDR_LEN - 1 ) {
Serial.print(".");
}
}
Serial.println();
}
// Make a TCP connection to remote host
Serial.print("Performing HTTP GET of: ");
Serial.println(server);
if ( !client.connect(server, 80) ) {
Serial.println("Error: Could not make a TCP connection");
}
// Make a HTTP GET request
client.println("GET /index.html HTTP/1.1");
client.print("Host: ");
client.println(server);
client.println("Connection: close");
client.println();
Serial.println();
}
void loop() {
// If there are incoming bytes, print them
if ( client.available() ) {
char c = client.read();
Serial.print(c);
}
// If the server has disconnected, stop the client and wifi
if ( !client.connected() ) {
Serial.println();
// Close socket
if ( !client.close() ) {
Serial.println("Error: Could not close socket");
}
// Disconnect WiFi
if ( !wifi.disconnect() ) {
Serial.println("Error: Could not disconnect from network");
}
// Do nothing
Serial.println("Finished WebClient test");
while(true){
delay(1000);
}
}
}
Video
5 comments on Speech Recognition Control Display Game :
Please log in to post comments.

Speech Recognition Control Display Game ... Here we use NXP LPC1768 as another micro-controller to control the uLCD display and start a game ...https://www.gtopcars.com/makers/lotus/2022-lotus-elise/