tom dunigan
/
tts
text-to-speech through DAC to audio amp/speaker
text-to-speech TTS
- This program was based on modifying the Arduino/Teensy TTS library https://github.com/manitou48/TTS
- The audio is generated with the ARM's DAC pin.
- TTS.h selects DAC pin based on MBED board. Only tested on K64F
- Teensy discussions https://forum.pjrc.com/threads/44587-TTS-(Text-to-Speech)-Library-Port
Diff: TTS.cpp
- Revision:
- 0:bcd16e4a0207
- Child:
- 1:548323cfdb5d
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/TTS.cpp Sun Jun 11 11:03:23 2017 +0000 @@ -0,0 +1,632 @@ +/** + * Text To Speech synthesis library + * Copyright (c) 2008 Clive Webster. All rights reserved. + * + * Nov. 29th 2009 - Modified to work with Arduino by Gabriel Petrut: + * The Text To Speech library uses Timer1 to generate the PWM + * output on digital pin 10. The output signal needs to be fed + * to an RC filter then through an amplifier to the speaker. + * http://www.tehnorama.ro/minieric-modulul-de-control-si-sinteza-vocala/ + * + * Modified to allow use of different PWM pins by Stephen Crane. + * Modified for Timer5 on Arduino Mega2560 by Peter Dambrowsky. + */ + +#include "TTS.h" + +AnalogOut dac(DACpin); +DigitalOut tom(D0); + +// Random number seed +static byte seed0; +static byte seed1; +static byte seed2; + +static char phonemes[128]; +static char modifier[128]; // must be same size as 'phonemes' +static char g_text[128]; + +static byte defaultPitch = 7; + +// Lookup user specified pitch changes +static const byte PROGMEM PitchesP[] = { 1, 2, 4, 6, 8, 10, 13, 16 }; + +/** + * Find the single character 'token' in 'vocab' + * and append its phonemes to dest[x] + */ +static int copyToken(char token, char *dest, int x, const VOCAB * vocab) +{ + for (unsigned int ph = 0; ph < numVocab; ph++) { + const char *txt = (const char *) pgm_read_word(&vocab[ph].txt); + if (pgm_read_byte(&txt[0]) == token && pgm_read_byte(&txt[1]) == 0) { + const char *src = + (const char *) pgm_read_word(&vocab[ph].phoneme); + while (pgm_read_byte(src)) { + dest[x++] = pgm_read_byte(src); + src++; + } + break; + } + } + return x; +} + +static byte whitespace(char c) +{ + return (c == 0 || c == ' ' || c == ',' || c == '.' || c == '?' + || c == '\'' || c == '!' || c == ':' || c == '/'); +} + +/** + * Enter: + * src => English text in upper case + * vocab => VOCAB array + * dest => address to return result + * return 1 if ok, or 0 if error + */ +static int textToPhonemes(const char *src, const VOCAB * vocab, char *dest) +{ + int outIndex = 0; // Current offset into dest + int inIndex = -1; // Starts at -1 so that a leading space is assumed + + while (inIndex == -1 || src[inIndex]) { // until end of text + int maxMatch = 0; // Max chars matched on input text + int numOut = 0; // Number of characters copied to output stream for the best match + boolean endsInWhiteSpace = FALSE; + int maxWildcardPos = 0; + + // Get next phoneme, P2 + for (unsigned int ph = 0; ph < numVocab; ph++) { + int y, x; + char wildcard = 0; // modifier + int wildcardInPos = 0; + boolean hasWhiteSpace = FALSE; + const char *text = + (const char *) pgm_read_word(&vocab[ph].txt); + const char *phon = + (const char *) pgm_read_word(&vocab[ph].phoneme); + + for (y = 0;; y++) { + char nextVocabChar = pgm_read_byte(&text[y]); + char nextCharIn = + (y + inIndex == -1) ? ' ' : src[y + inIndex]; + if (nextCharIn >= 'a' && nextCharIn <= 'z') + nextCharIn = nextCharIn - 'a' + 'A'; + + if (nextVocabChar == '#' && nextCharIn >= 'A' + && nextCharIn <= 'Z') { + wildcard = nextCharIn; // The character equivalent to the '#' + wildcardInPos = y; + continue; + } + + if (nextVocabChar == '_') { + // try to match against a white space + hasWhiteSpace = TRUE; + if (whitespace(nextCharIn)) + continue; + y--; + break; + } + // check for end of either string + if (nextVocabChar == 0 || nextCharIn == 0) + break; + + if (nextVocabChar != nextCharIn) + break; + } + + // See if its the longest complete match so far + if (y <= maxMatch || pgm_read_byte(&text[y])) + continue; + + // This is the longest complete match + maxMatch = y; + maxWildcardPos = 0; + x = outIndex; // offset into phoneme return data + + // Copy the matching phrase changing any '#' to the phoneme for the wildcard + for (y = 0;; y++) { + char c = pgm_read_byte(&phon[y]); + if (c == 0) + break; + if (c == '#') { + if (pgm_read_byte(&phon[y + 1]) == 0) { + // replacement ends in wildcard + maxWildcardPos = wildcardInPos; + } else { + x = copyToken(wildcard, dest, x, vocab); // Copy the phonemes for the wildcard character + } + } else { + dest[x++] = c; + } + } + dest[x] = 0; + endsInWhiteSpace = hasWhiteSpace; + + // 14 + numOut = x - outIndex; // The number of bytes added + } + // 15 - end of vocab table + + // 16 + if (endsInWhiteSpace) + maxMatch--; + + // 17 + if (maxMatch == 0) { + //loggerP(PSTR("Mistake in SAY, no token for ")); + //logger(&src[inIndex]); + //loggerCRLF(); + return 0; + } + // 20 + outIndex += numOut; + if (outIndex > 128 - 16) { + //loggerP(PSTR("Mistake in SAY, text too long\n")); + return 0; + } + // 21 + inIndex += (maxWildcardPos > 0) ? maxWildcardPos : maxMatch; + } + return 1; +} + +/** + * Convert phonemes to data string + * Enter: textp = phonemes string + * Return: phonemes = string of sound data + * modifier = 2 bytes per sound data + */ +static int phonemesToData(const char *textp, const PHONEME * phoneme) +{ + unsigned int phonemeOut = 0; // offset into the phonemes array + unsigned int modifierOut = 0; // offset into the modifiers array + unsigned int L81 = 0; // attenuate + unsigned int L80 = 16; + + while (*textp) { + // P20: Get next phoneme + boolean anyMatch = FALSE; + int longestMatch = 0; + int numOut = 0; // The number of bytes copied to the output for the longest match + + // Get next phoneme, P2 + for (unsigned int ph = 0; ph < numPhoneme; ph++) { + int numChars; + + // Locate start of next phoneme + const char *ph_text = + (const char *) pgm_read_word(&phoneme[ph].txt); + + // Set 'numChars' to the number of characters + // that we match against this phoneme + for (numChars = 0; textp[numChars]; numChars++) { + + // get next input character and make lower case + char nextChar = textp[numChars]; + if (nextChar >= 'A' && nextChar <= 'Z') + nextChar = nextChar - 'A' + 'a'; + + if (nextChar != pgm_read_byte(&ph_text[numChars])) + break; + } + + // if not the longest match so far then ignore + if (numChars <= longestMatch) + continue; + + // partial phoneme match + if (pgm_read_byte(&ph_text[numChars])) + continue; + + // P7: we have matched the whole phoneme + longestMatch = numChars; + + // Copy phoneme data to 'phonemes' + const char *ph_ph = + (const char *) pgm_read_word(&phoneme[ph].phoneme); + for (numOut = 0; pgm_read_byte(&ph_ph[numOut]); numOut++) + phonemes[phonemeOut + numOut] = + pgm_read_byte(&ph_ph[numOut]); + + L81 = pgm_read_byte(&phoneme[ph].attenuate) + '0'; + anyMatch = TRUE; // phoneme match found + + modifier[modifierOut] = -1; + modifier[modifierOut + 1] = 0; + + // Get char from text after the phoneme and test if it is a numeric + if (textp[longestMatch] >= '0' && textp[longestMatch] <= '9') { + // Pitch change requested + modifier[modifierOut] = + pgm_read_byte(&PitchesP[textp[longestMatch] - '1']); + modifier[modifierOut + 1] = L81; + longestMatch++; + } + // P10 + if (L81 != '0' && L81 != L80 && modifier[modifierOut] >= 0) { + modifier[modifierOut - 2] = modifier[modifierOut]; + modifier[modifierOut - 1] = '0'; + continue; + } + // P11 + if ((textp[longestMatch - 1] | 0x20) == 0x20) { + // end of input string or a space + modifier[modifierOut] = + (modifierOut == 0) ? 16 : modifier[modifierOut - 2]; + } + } // next phoneme + + // p13 + L80 = L81; + if (longestMatch == 0 && !anyMatch) { + //loggerP(PSTR("Mistake in speech at ")); + //logger(textp); + //loggerCRLF(); + return 0; + } + // Move over the bytes we have copied to the output + phonemeOut += numOut; + + if (phonemeOut > sizeof(phonemes) - 16) { + //loggerP(PSTR("Line too long\n")); + return 0; + } + // P16 + + // Copy the modifier setting to each sound data element for this phoneme + if (numOut > 2) + for (int count = 0; count != numOut; count += 2) { + modifier[modifierOut + count + 2] = + modifier[modifierOut + count]; + modifier[modifierOut + count + 3] = 0; + } + modifierOut += numOut; + + //p21 + textp += longestMatch; + } + + phonemes[phonemeOut++] = 'z'; + phonemes[phonemeOut++] = 'z'; + phonemes[phonemeOut++] = 'z'; + phonemes[phonemeOut++] = 'z'; + + while (phonemeOut < sizeof(phonemes)) + phonemes[phonemeOut++] = 0; + + while (modifierOut < sizeof(modifier)) { + modifier[modifierOut++] = -1; + modifier[modifierOut++] = 0; + } + + return 1; +} + +/* + * A delay loop that doesn't change with different optimisation settings + */ + + +static void pause(byte delays) +{ + wait_us(delays*6); +} + +static void delay2(byte d) +{ + wait_us(d*3127); +} + +/* + * Generate a random number + */ +static byte random2(void) +{ + byte tmp = (seed0 & 0x48) + 0x38; + seed0 <<= 1; + if (seed1 & 0x80) + seed0++; + seed1 <<= 1; + if (seed2 & 0x80) + seed1++; + seed2 <<= 1; + if (tmp & 0x40) + seed2++; + return seed0; +} + +static int pin; + +static void soundOff(void) +{ + dac.write(0); +} + +#define PWM_TOP (1200/2) + +//https://sites.google.com/site/qeewiki/books/avr-guide/pwm-on-the-atmega328 +static void soundOn(void) +{ + dac.write(0); + + // initialise random number seed + seed0 = 0xecu; + seed1 = 7; + seed2 = 0xcfu; +} + +// Logarithmic scale +//static const int16_t PROGMEM Volume[8] = + //{ 0, PWM_TOP * 0.01, PWM_TOP * 0.02, PWM_TOP * 0.03, PWM_TOP * 0.06, +//PWM_TOP * 0.12, PWM_TOP * 0.25, PWM_TOP * 0.5 }; + +// Linear scale +static const int16_t PROGMEM Volume[8] = + { 0, (uint16_t)(PWM_TOP * 0.07), (uint16_t)(PWM_TOP * 0.14), (uint16_t)(PWM_TOP * 0.21), (uint16_t)(PWM_TOP * 0.29), + (uint16_t)(PWM_TOP * 0.36), (uint16_t)(PWM_TOP * 0.43), (uint16_t)(PWM_TOP * 0.5) +}; + +static void sound(byte b) +{ + // Update PWM volume + static int v=1; + b = (b & 15); + dac.write(0.5*b/16.); + + tom = v; v=1-v; +} + +static byte playTone(byte soundNum, byte soundPos, char pitch1, + char pitch2, byte count, byte volume) +{ + const byte *soundData = &SoundData[soundNum * 0x40]; + while (count-- > 0) { + byte s = pgm_read_byte(&soundData[soundPos & 0x3fu]); + sound((byte) (s & volume)); + pause(pitch1); + sound((byte) ((s >> 4) & volume)); + pause(pitch2); + + soundPos++; + } + return soundPos & 0x3fu; +} + +static void play(byte duration, byte soundNumber) +{ + while (duration--) + playTone(soundNumber, random2(), 7, 7, 10, 15); +} + +/****************************************************************************** + * User API + ******************************************************************************/ +TTS::TTS() +{ +} + +void TTS::setPitch(byte pitch) +{ + defaultPitch = pitch; +} + +byte TTS::getPitch(void) +{ + return defaultPitch; +} + +/* + * Speak a string of phonemes + */ +void TTS::sayPhonemes(const char *textp) +{ + byte phonemeIn, // offset into text + byte2, modifierIn, // offset into stuff in modifier + punctuationPitchDelta; // change in pitch due to fullstop or question mark + int8_t byte1; + char phoneme; + const SOUND_INDEX *soundIndex; + byte sound1Num; // Sound data for the current phoneme + byte sound2Num; // Sound data for the next phoneme + byte sound2Stop; // Where the second sound should stop + char pitch1; // pitch for the first sound + char pitch2; // pitch for the second sound + short i; + byte sound1Duration; // the duration for sound 1 + + if (phonemesToData(textp, s_phonemes)) { + // phonemes has list of sound bytes + soundOn(); + + // _630C + byte1 = 0; + punctuationPitchDelta = 0; + + // Q19 + for (phonemeIn = 0, modifierIn = 0; phonemes[phonemeIn]; + phonemeIn += 2, modifierIn += 2) { + byte duration; // duration from text line + byte SoundPos; // offset into sound data + byte fadeSpeed = 0; + + phoneme = phonemes[phonemeIn]; + if (phoneme == 'z') { + delay2(15); + continue; + } else if (phoneme == '#') { + continue; + } else { + + // Collect info on sound 1 + soundIndex = &SoundIndex[phoneme - 'A']; + sound1Num = pgm_read_byte(&soundIndex->SoundNumber); + byte1 = pgm_read_byte(&soundIndex->byte1); + byte2 = pgm_read_byte(&soundIndex->byte2); + + duration = phonemes[phonemeIn + 1] - '0'; // Get duration from the input line + if (duration != 1) + duration <<= 1; + + duration += 6; // scaled duration from the input line (at least 6) + sound2Stop = 0x40 >> 1; + + pitch1 = modifier[modifierIn]; + if (modifier[modifierIn + 1] == 0 || pitch1 == -1) { + pitch1 = 10; + duration -= 6; + } else if (modifier[modifierIn + 1] == '0' + || duration == 6) { + duration -= 6; + } + // q8 + pitch2 = modifier[modifierIn + 2]; + if (modifier[modifierIn + 3] == 0 || pitch2 == -1) + pitch2 = 10; + + // q10 + if (byte1 < 0) { + sound1Num = 0; + random2(); + sound2Stop = (0x40 >> 1) + 2; + } else { + // is positive + if (byte1 == 2) { + // 64A4 + // Make a white noise sound ! + byte volume = (duration == 6) ? 15 : 1; // volume mask + for (duration <<= 2; duration > 0; duration--) { + playTone(sound1Num, random2(), 8, 12, 11, + volume); + // Increase the volume + if (++volume == 16) + volume = 15; // full volume from now on + } + continue; + + } else { + // q11 + if (byte1) + delay2(25); + } + } + } + + // 6186 + pitch1 += defaultPitch + punctuationPitchDelta; + if (pitch1 < 1) + pitch1 = 1; + + pitch2 += defaultPitch + punctuationPitchDelta; + if (pitch2 < 1) + pitch2 = 1; + + // get next phoneme + phoneme = phonemes[phonemeIn + 2]; + + if (phoneme == 0 || phoneme == 'z') { + if (duration == 1) + delay2(60); + phoneme = 'a'; // change to a pause + } else { + // s6 + if (byte2 != 1) + byte2 = + (byte2 + + pgm_read_byte(&SoundIndex[phoneme - 'A'].byte2)) + >> 1; + + if (byte1 < 0 + || pgm_read_byte(&SoundIndex[phoneme - 'A'].byte1)) + phoneme = 'a'; // change to a pause + } + + // S10 + sound2Num = + pgm_read_byte(&SoundIndex[phoneme - 'A'].SoundNumber); + + sound1Duration = 0x80; // play half of sound 1 + if (sound2Num == sound1Num) + byte2 = duration; + + // S11 + if ((byte2 >> 1) == 0) { + sound1Duration = 0xff; // play all of sound 1 + } else { + // The fade speed between the two sounds + fadeSpeed = (sound1Duration + (byte2 >> 1)) / byte2; + + if (duration == 1) { + sound2Stop = 0x40; // dont play sound2 + sound1Duration = 0xff; // play all of sound 1 + pitch1 = 12; + } + } + + SoundPos = 0; + do { + byte sound1Stop = (sound1Duration >> 2) & 0x3fu; + byte sound1End = sound1Stop; + if (sound2Stop < sound1End) sound1End = sound2Stop; // min + + if (sound1Stop) + SoundPos = + playTone(sound1Num, SoundPos, pitch1, pitch1, + sound1End, 15); + + // s18 + if (sound2Stop != 0x40) { + SoundPos = + playTone(sound2Num, SoundPos, pitch2, pitch2, + (byte) (sound2Stop - sound1End), 15); + } + // s23 + if (sound1Duration != 0xff && duration < byte2) { + // Fade sound1 out + sound1Duration -= fadeSpeed; + if (sound1Duration >= (byte) 0xC8) + sound1Duration = 0; // stop playing sound 1 + } + // Call any additional sound + if (byte1 == -1) + play(3, 30); // make an 'f' sound + else if (byte1 == -2) + play(3, 29); // make an 's' sound + else if (byte1 == -3) + play(3, 33); // make a 'th' sound + else if (byte1 == -4) + play(3, 27); // make a 'sh' sound + + } while (--duration); + + // Scan ahead to find a '.' or a '?' as this will change the pitch + punctuationPitchDelta = 0; + for (i = 6; i > 0; i--) { + char next = phonemes[phonemeIn + (i * 2)]; + if (next == 'i') + // found a full stop + punctuationPitchDelta = 6 - i; // Lower the pitch + else if (next == 'h') + // found a question mark + punctuationPitchDelta = i - 6; // Raise the pitch + } + + if (byte1 == 1) + delay2(25); + } // next phoneme + } + soundOff(); +} + +/* + * Speak an English command line of text + */ +void TTS::sayText(const char *original) +{ + unsigned int i; + if (textToPhonemes(original, s_vocab, g_text)) { + sayPhonemes(g_text); + } +} \ No newline at end of file