tom dunigan
/
tts
text-to-speech through DAC to audio amp/speaker
Embed:
(wiki syntax)
Show/hide line numbers
TTS.cpp
00001 /** 00002 * Text To Speech synthesis library 00003 * Copyright (c) 2008 Clive Webster. All rights reserved. 00004 * 00005 * Nov. 29th 2009 - Modified to work with Arduino by Gabriel Petrut: 00006 * The Text To Speech library uses Timer1 to generate the PWM 00007 * output on digital pin 10. The output signal needs to be fed 00008 * to an RC filter then through an amplifier to the speaker. 00009 * http://www.tehnorama.ro/minieric-modulul-de-control-si-sinteza-vocala/ 00010 * 00011 * Modified to allow use of different PWM pins by Stephen Crane. 00012 * Modified for Timer5 on Arduino Mega2560 by Peter Dambrowsky. 00013 */ 00014 00015 #include "TTS.h" 00016 00017 AnalogOut dac(DACpin); 00018 00019 // Random number seed 00020 static byte seed0; 00021 static byte seed1; 00022 static byte seed2; 00023 00024 static char phonemes[128]; 00025 static char modifier[128]; // must be same size as 'phonemes' 00026 static char g_text[128]; 00027 00028 static byte defaultPitch = 7; 00029 00030 // Lookup user specified pitch changes 00031 static const byte PROGMEM PitchesP[] = { 1, 2, 4, 6, 8, 10, 13, 16 }; 00032 00033 /** 00034 * Find the single character 'token' in 'vocab' 00035 * and append its phonemes to dest[x] 00036 */ 00037 static int copyToken(char token, char *dest, int x, const VOCAB * vocab) 00038 { 00039 for (unsigned int ph = 0; ph < numVocab; ph++) { 00040 const char *txt = (const char *) pgm_read_word(&vocab[ph].txt); 00041 if (pgm_read_byte(&txt[0]) == token && pgm_read_byte(&txt[1]) == 0) { 00042 const char *src = 00043 (const char *) pgm_read_word(&vocab[ph].phoneme); 00044 while (pgm_read_byte(src)) { 00045 dest[x++] = pgm_read_byte(src); 00046 src++; 00047 } 00048 break; 00049 } 00050 } 00051 return x; 00052 } 00053 00054 static byte whitespace(char c) 00055 { 00056 return (c == 0 || c == ' ' || c == ',' || c == '.' || c == '?' 00057 || c == '\'' || c == '!' || c == ':' || c == '/'); 00058 } 00059 00060 /** 00061 * Enter: 00062 * src => English text in upper case 00063 * vocab => VOCAB array 00064 * dest => address to return result 00065 * return 1 if ok, or 0 if error 00066 */ 00067 static int textToPhonemes(const char *src, const VOCAB * vocab, char *dest) 00068 { 00069 int outIndex = 0; // Current offset into dest 00070 int inIndex = -1; // Starts at -1 so that a leading space is assumed 00071 00072 while (inIndex == -1 || src[inIndex]) { // until end of text 00073 int maxMatch = 0; // Max chars matched on input text 00074 int numOut = 0; // Number of characters copied to output stream for the best match 00075 boolean endsInWhiteSpace = FALSE; 00076 int maxWildcardPos = 0; 00077 00078 // Get next phoneme, P2 00079 for (unsigned int ph = 0; ph < numVocab; ph++) { 00080 int y, x; 00081 char wildcard = 0; // modifier 00082 int wildcardInPos = 0; 00083 boolean hasWhiteSpace = FALSE; 00084 const char *text = 00085 (const char *) pgm_read_word(&vocab[ph].txt); 00086 const char *phon = 00087 (const char *) pgm_read_word(&vocab[ph].phoneme); 00088 00089 for (y = 0;; y++) { 00090 char nextVocabChar = pgm_read_byte(&text[y]); 00091 char nextCharIn = 00092 (y + inIndex == -1) ? ' ' : src[y + inIndex]; 00093 if (nextCharIn >= 'a' && nextCharIn <= 'z') 00094 nextCharIn = nextCharIn - 'a' + 'A'; 00095 00096 if (nextVocabChar == '#' && nextCharIn >= 'A' 00097 && nextCharIn <= 'Z') { 00098 wildcard = nextCharIn; // The character equivalent to the '#' 00099 wildcardInPos = y; 00100 continue; 00101 } 00102 00103 if (nextVocabChar == '_') { 00104 // try to match against a white space 00105 hasWhiteSpace = TRUE; 00106 if (whitespace(nextCharIn)) 00107 continue; 00108 y--; 00109 break; 00110 } 00111 // check for end of either string 00112 if (nextVocabChar == 0 || nextCharIn == 0) 00113 break; 00114 00115 if (nextVocabChar != nextCharIn) 00116 break; 00117 } 00118 00119 // See if its the longest complete match so far 00120 if (y <= maxMatch || pgm_read_byte(&text[y])) 00121 continue; 00122 00123 // This is the longest complete match 00124 maxMatch = y; 00125 maxWildcardPos = 0; 00126 x = outIndex; // offset into phoneme return data 00127 00128 // Copy the matching phrase changing any '#' to the phoneme for the wildcard 00129 for (y = 0;; y++) { 00130 char c = pgm_read_byte(&phon[y]); 00131 if (c == 0) 00132 break; 00133 if (c == '#') { 00134 if (pgm_read_byte(&phon[y + 1]) == 0) { 00135 // replacement ends in wildcard 00136 maxWildcardPos = wildcardInPos; 00137 } else { 00138 x = copyToken(wildcard, dest, x, vocab); // Copy the phonemes for the wildcard character 00139 } 00140 } else { 00141 dest[x++] = c; 00142 } 00143 } 00144 dest[x] = 0; 00145 endsInWhiteSpace = hasWhiteSpace; 00146 00147 // 14 00148 numOut = x - outIndex; // The number of bytes added 00149 } 00150 // 15 - end of vocab table 00151 00152 // 16 00153 if (endsInWhiteSpace) 00154 maxMatch--; 00155 00156 // 17 00157 if (maxMatch == 0) { 00158 //loggerP(PSTR("Mistake in SAY, no token for ")); 00159 //logger(&src[inIndex]); 00160 //loggerCRLF(); 00161 return 0; 00162 } 00163 // 20 00164 outIndex += numOut; 00165 if (outIndex > 128 - 16) { 00166 //loggerP(PSTR("Mistake in SAY, text too long\n")); 00167 return 0; 00168 } 00169 // 21 00170 inIndex += (maxWildcardPos > 0) ? maxWildcardPos : maxMatch; 00171 } 00172 return 1; 00173 } 00174 00175 /** 00176 * Convert phonemes to data string 00177 * Enter: textp = phonemes string 00178 * Return: phonemes = string of sound data 00179 * modifier = 2 bytes per sound data 00180 */ 00181 static int phonemesToData(const char *textp, const PHONEME * phoneme) 00182 { 00183 unsigned int phonemeOut = 0; // offset into the phonemes array 00184 unsigned int modifierOut = 0; // offset into the modifiers array 00185 unsigned int L81 = 0; // attenuate 00186 unsigned int L80 = 16; 00187 00188 while (*textp) { 00189 // P20: Get next phoneme 00190 boolean anyMatch = FALSE; 00191 int longestMatch = 0; 00192 int numOut = 0; // The number of bytes copied to the output for the longest match 00193 00194 // Get next phoneme, P2 00195 for (unsigned int ph = 0; ph < numPhoneme; ph++) { 00196 int numChars; 00197 00198 // Locate start of next phoneme 00199 const char *ph_text = 00200 (const char *) pgm_read_word(&phoneme[ph].txt); 00201 00202 // Set 'numChars' to the number of characters 00203 // that we match against this phoneme 00204 for (numChars = 0; textp[numChars]; numChars++) { 00205 00206 // get next input character and make lower case 00207 char nextChar = textp[numChars]; 00208 if (nextChar >= 'A' && nextChar <= 'Z') 00209 nextChar = nextChar - 'A' + 'a'; 00210 00211 if (nextChar != pgm_read_byte(&ph_text[numChars])) 00212 break; 00213 } 00214 00215 // if not the longest match so far then ignore 00216 if (numChars <= longestMatch) 00217 continue; 00218 00219 // partial phoneme match 00220 if (pgm_read_byte(&ph_text[numChars])) 00221 continue; 00222 00223 // P7: we have matched the whole phoneme 00224 longestMatch = numChars; 00225 00226 // Copy phoneme data to 'phonemes' 00227 const char *ph_ph = 00228 (const char *) pgm_read_word(&phoneme[ph].phoneme); 00229 for (numOut = 0; pgm_read_byte(&ph_ph[numOut]); numOut++) 00230 phonemes[phonemeOut + numOut] = 00231 pgm_read_byte(&ph_ph[numOut]); 00232 00233 L81 = pgm_read_byte(&phoneme[ph].attenuate) + '0'; 00234 anyMatch = TRUE; // phoneme match found 00235 00236 modifier[modifierOut] = -1; 00237 modifier[modifierOut + 1] = 0; 00238 00239 // Get char from text after the phoneme and test if it is a numeric 00240 if (textp[longestMatch] >= '0' && textp[longestMatch] <= '9') { 00241 // Pitch change requested 00242 modifier[modifierOut] = 00243 pgm_read_byte(&PitchesP[textp[longestMatch] - '1']); 00244 modifier[modifierOut + 1] = L81; 00245 longestMatch++; 00246 } 00247 // P10 00248 if (L81 != '0' && L81 != L80 && modifier[modifierOut] >= 0) { 00249 modifier[modifierOut - 2] = modifier[modifierOut]; 00250 modifier[modifierOut - 1] = '0'; 00251 continue; 00252 } 00253 // P11 00254 if ((textp[longestMatch - 1] | 0x20) == 0x20) { 00255 // end of input string or a space 00256 modifier[modifierOut] = 00257 (modifierOut == 0) ? 16 : modifier[modifierOut - 2]; 00258 } 00259 } // next phoneme 00260 00261 // p13 00262 L80 = L81; 00263 if (longestMatch == 0 && !anyMatch) { 00264 //loggerP(PSTR("Mistake in speech at ")); 00265 //logger(textp); 00266 //loggerCRLF(); 00267 return 0; 00268 } 00269 // Move over the bytes we have copied to the output 00270 phonemeOut += numOut; 00271 00272 if (phonemeOut > sizeof(phonemes) - 16) { 00273 //loggerP(PSTR("Line too long\n")); 00274 return 0; 00275 } 00276 // P16 00277 00278 // Copy the modifier setting to each sound data element for this phoneme 00279 if (numOut > 2) 00280 for (int count = 0; count != numOut; count += 2) { 00281 modifier[modifierOut + count + 2] = 00282 modifier[modifierOut + count]; 00283 modifier[modifierOut + count + 3] = 0; 00284 } 00285 modifierOut += numOut; 00286 00287 //p21 00288 textp += longestMatch; 00289 } 00290 00291 phonemes[phonemeOut++] = 'z'; 00292 phonemes[phonemeOut++] = 'z'; 00293 phonemes[phonemeOut++] = 'z'; 00294 phonemes[phonemeOut++] = 'z'; 00295 00296 while (phonemeOut < sizeof(phonemes)) 00297 phonemes[phonemeOut++] = 0; 00298 00299 while (modifierOut < sizeof(modifier)) { 00300 modifier[modifierOut++] = -1; 00301 modifier[modifierOut++] = 0; 00302 } 00303 00304 return 1; 00305 } 00306 00307 /* 00308 * A delay loop that doesn't change with different optimisation settings 00309 */ 00310 00311 00312 static void pause(byte delays) 00313 { 00314 wait_us(delays*6); 00315 } 00316 00317 static void delay2(byte d) 00318 { 00319 wait_us(d*3127); 00320 } 00321 00322 /* 00323 * Generate a random number 00324 */ 00325 static byte random2(void) 00326 { 00327 byte tmp = (seed0 & 0x48) + 0x38; 00328 seed0 <<= 1; 00329 if (seed1 & 0x80) 00330 seed0++; 00331 seed1 <<= 1; 00332 if (seed2 & 0x80) 00333 seed1++; 00334 seed2 <<= 1; 00335 if (tmp & 0x40) 00336 seed2++; 00337 return seed0; 00338 } 00339 00340 static int pin; 00341 00342 static void soundOff(void) 00343 { 00344 //dac.write(0); 00345 } 00346 00347 #define PWM_TOP (1200/2) 00348 00349 //https://sites.google.com/site/qeewiki/books/avr-guide/pwm-on-the-atmega328 00350 static void soundOn(void) 00351 { 00352 // dac.write(0); 00353 00354 // initialise random number seed 00355 seed0 = 0xecu; 00356 seed1 = 7; 00357 seed2 = 0xcfu; 00358 } 00359 00360 // Logarithmic scale 00361 //static const int16_t PROGMEM Volume[8] = 00362 //{ 0, PWM_TOP * 0.01, PWM_TOP * 0.02, PWM_TOP * 0.03, PWM_TOP * 0.06, 00363 //PWM_TOP * 0.12, PWM_TOP * 0.25, PWM_TOP * 0.5 }; 00364 00365 // Linear scale 00366 static const int16_t PROGMEM Volume[8] = { 00367 0, (uint16_t)(PWM_TOP * 0.07), (uint16_t)(PWM_TOP * 0.14), (uint16_t)(PWM_TOP * 0.21), (uint16_t)(PWM_TOP * 0.29), 00368 (uint16_t)(PWM_TOP * 0.36), (uint16_t)(PWM_TOP * 0.43), (uint16_t)(PWM_TOP * 0.5) 00369 }; 00370 00371 static void sound(byte b) 00372 { 00373 // Update PWM volume 00374 b = (b & 15); 00375 dac.write(0.5*b/16.); 00376 } 00377 00378 static byte playTone(byte soundNum, byte soundPos, char pitch1, 00379 char pitch2, byte count, byte volume) 00380 { 00381 const byte *soundData = &SoundData[soundNum * 0x40]; 00382 while (count-- > 0) { 00383 byte s = pgm_read_byte(&soundData[soundPos & 0x3fu]); 00384 sound((byte) (s & volume)); 00385 pause(pitch1); 00386 sound((byte) ((s >> 4) & volume)); 00387 pause(pitch2); 00388 00389 soundPos++; 00390 } 00391 return soundPos & 0x3fu; 00392 } 00393 00394 static void play(byte duration, byte soundNumber) 00395 { 00396 while (duration--) 00397 playTone(soundNumber, random2(), 7, 7, 10, 15); 00398 } 00399 00400 /****************************************************************************** 00401 * User API 00402 ******************************************************************************/ 00403 TTS::TTS() 00404 { 00405 } 00406 00407 void TTS::setPitch(byte pitch) 00408 { 00409 defaultPitch = pitch; 00410 } 00411 00412 byte TTS::getPitch(void) 00413 { 00414 return defaultPitch; 00415 } 00416 00417 /* 00418 * Speak a string of phonemes 00419 */ 00420 void TTS::sayPhonemes(const char *textp) 00421 { 00422 byte phonemeIn, // offset into text 00423 byte2, modifierIn, // offset into stuff in modifier 00424 punctuationPitchDelta; // change in pitch due to fullstop or question mark 00425 int8_t byte1; 00426 char phoneme; 00427 const SOUND_INDEX *soundIndex; 00428 byte sound1Num; // Sound data for the current phoneme 00429 byte sound2Num; // Sound data for the next phoneme 00430 byte sound2Stop; // Where the second sound should stop 00431 char pitch1; // pitch for the first sound 00432 char pitch2; // pitch for the second sound 00433 short i; 00434 byte sound1Duration; // the duration for sound 1 00435 00436 if (phonemesToData(textp, s_phonemes)) { 00437 // phonemes has list of sound bytes 00438 soundOn(); 00439 00440 // _630C 00441 byte1 = 0; 00442 punctuationPitchDelta = 0; 00443 00444 // Q19 00445 for (phonemeIn = 0, modifierIn = 0; phonemes[phonemeIn]; 00446 phonemeIn += 2, modifierIn += 2) { 00447 byte duration; // duration from text line 00448 byte SoundPos; // offset into sound data 00449 byte fadeSpeed = 0; 00450 00451 phoneme = phonemes[phonemeIn]; 00452 if (phoneme == 'z') { 00453 delay2(15); 00454 continue; 00455 } else if (phoneme == '#') { 00456 continue; 00457 } else { 00458 00459 // Collect info on sound 1 00460 soundIndex = &SoundIndex[phoneme - 'A']; 00461 sound1Num = pgm_read_byte(&soundIndex->SoundNumber); 00462 byte1 = pgm_read_byte(&soundIndex->byte1); 00463 byte2 = pgm_read_byte(&soundIndex->byte2); 00464 00465 duration = phonemes[phonemeIn + 1] - '0'; // Get duration from the input line 00466 if (duration != 1) 00467 duration <<= 1; 00468 00469 duration += 6; // scaled duration from the input line (at least 6) 00470 sound2Stop = 0x40 >> 1; 00471 00472 pitch1 = modifier[modifierIn]; 00473 if (modifier[modifierIn + 1] == 0 || pitch1 == -1) { 00474 pitch1 = 10; 00475 duration -= 6; 00476 } else if (modifier[modifierIn + 1] == '0' 00477 || duration == 6) { 00478 duration -= 6; 00479 } 00480 // q8 00481 pitch2 = modifier[modifierIn + 2]; 00482 if (modifier[modifierIn + 3] == 0 || pitch2 == -1) 00483 pitch2 = 10; 00484 00485 // q10 00486 if (byte1 < 0) { 00487 sound1Num = 0; 00488 random2(); 00489 sound2Stop = (0x40 >> 1) + 2; 00490 } else { 00491 // is positive 00492 if (byte1 == 2) { 00493 // 64A4 00494 // Make a white noise sound ! 00495 byte volume = (duration == 6) ? 15 : 1; // volume mask 00496 for (duration <<= 2; duration > 0; duration--) { 00497 playTone(sound1Num, random2(), 8, 12, 11, 00498 volume); 00499 // Increase the volume 00500 if (++volume == 16) 00501 volume = 15; // full volume from now on 00502 } 00503 continue; 00504 00505 } else { 00506 // q11 00507 if (byte1) 00508 delay2(25); 00509 } 00510 } 00511 } 00512 00513 // 6186 00514 pitch1 += defaultPitch + punctuationPitchDelta; 00515 if (pitch1 < 1) 00516 pitch1 = 1; 00517 00518 pitch2 += defaultPitch + punctuationPitchDelta; 00519 if (pitch2 < 1) 00520 pitch2 = 1; 00521 00522 // get next phoneme 00523 phoneme = phonemes[phonemeIn + 2]; 00524 00525 if (phoneme == 0 || phoneme == 'z') { 00526 if (duration == 1) 00527 delay2(60); 00528 phoneme = 'a'; // change to a pause 00529 } else { 00530 // s6 00531 if (byte2 != 1) 00532 byte2 = 00533 (byte2 + 00534 pgm_read_byte(&SoundIndex[phoneme - 'A'].byte2)) 00535 >> 1; 00536 00537 if (byte1 < 0 00538 || pgm_read_byte(&SoundIndex[phoneme - 'A'].byte1)) 00539 phoneme = 'a'; // change to a pause 00540 } 00541 00542 // S10 00543 sound2Num = 00544 pgm_read_byte(&SoundIndex[phoneme - 'A'].SoundNumber); 00545 00546 sound1Duration = 0x80; // play half of sound 1 00547 if (sound2Num == sound1Num) 00548 byte2 = duration; 00549 00550 // S11 00551 if ((byte2 >> 1) == 0) { 00552 sound1Duration = 0xff; // play all of sound 1 00553 } else { 00554 // The fade speed between the two sounds 00555 fadeSpeed = (sound1Duration + (byte2 >> 1)) / byte2; 00556 00557 if (duration == 1) { 00558 sound2Stop = 0x40; // dont play sound2 00559 sound1Duration = 0xff; // play all of sound 1 00560 pitch1 = 12; 00561 } 00562 } 00563 00564 SoundPos = 0; 00565 do { 00566 byte sound1Stop = (sound1Duration >> 2) & 0x3fu; 00567 byte sound1End = sound1Stop; 00568 if (sound2Stop < sound1End) sound1End = sound2Stop; // min 00569 00570 if (sound1Stop) 00571 SoundPos = 00572 playTone(sound1Num, SoundPos, pitch1, pitch1, 00573 sound1End, 15); 00574 00575 // s18 00576 if (sound2Stop != 0x40) { 00577 SoundPos = 00578 playTone(sound2Num, SoundPos, pitch2, pitch2, 00579 (byte) (sound2Stop - sound1End), 15); 00580 } 00581 // s23 00582 if (sound1Duration != 0xff && duration < byte2) { 00583 // Fade sound1 out 00584 sound1Duration -= fadeSpeed; 00585 if (sound1Duration >= (byte) 0xC8) 00586 sound1Duration = 0; // stop playing sound 1 00587 } 00588 // Call any additional sound 00589 if (byte1 == -1) 00590 play(3, 30); // make an 'f' sound 00591 else if (byte1 == -2) 00592 play(3, 29); // make an 's' sound 00593 else if (byte1 == -3) 00594 play(3, 33); // make a 'th' sound 00595 else if (byte1 == -4) 00596 play(3, 27); // make a 'sh' sound 00597 00598 } while (--duration); 00599 00600 // Scan ahead to find a '.' or a '?' as this will change the pitch 00601 punctuationPitchDelta = 0; 00602 for (i = 6; i > 0; i--) { 00603 char next = phonemes[phonemeIn + (i * 2)]; 00604 if (next == 'i') 00605 // found a full stop 00606 punctuationPitchDelta = 6 - i; // Lower the pitch 00607 else if (next == 'h') 00608 // found a question mark 00609 punctuationPitchDelta = i - 6; // Raise the pitch 00610 } 00611 00612 if (byte1 == 1) 00613 delay2(25); 00614 } // next phoneme 00615 } 00616 soundOff(); 00617 } 00618 00619 /* 00620 * Speak an English command line of text 00621 */ 00622 void TTS::sayText(const char *original) 00623 { 00624 unsigned int i; 00625 if (textToPhonemes(original, s_vocab, g_text)) { 00626 sayPhonemes(g_text); 00627 } 00628 }
Generated on Wed Jul 13 2022 01:54:20 by 1.7.2