text-to-speech through DAC to audio amp/speaker

Dependencies:   mbed

text-to-speech TTS

TTS.cpp

Committer:
manitou
Date:
2017-06-24
Revision:
2:eceeac07154b
Parent:
1:548323cfdb5d
Child:
3:d12c34704b6d

File content as of revision 2:eceeac07154b:

/** 
 * Text To Speech synthesis library 
 * Copyright (c) 2008 Clive Webster.  All rights reserved.
 *
 * Nov. 29th 2009 - Modified to work with Arduino by Gabriel Petrut:
 * The Text To Speech library uses Timer1 to generate the PWM
 * output on digital pin 10. The output signal needs to be fed
 * to an RC filter then through an amplifier to the speaker.
 * http://www.tehnorama.ro/minieric-modulul-de-control-si-sinteza-vocala/
 * 
 * Modified to allow use of different PWM pins by Stephen Crane.
 * Modified for Timer5 on Arduino Mega2560 by Peter Dambrowsky.
 */

#include "TTS.h"

AnalogOut dac(DACpin);

// Random number seed
static byte seed0;
static byte seed1;
static byte seed2;

static char phonemes[128];
static char modifier[128];  // must be same size as 'phonemes'
static char g_text[128];

static byte defaultPitch = 7;

// Lookup user specified pitch changes
static const byte PROGMEM PitchesP[] = { 1, 2, 4, 6, 8, 10, 13, 16 };

/**
 * Find the single character 'token' in 'vocab'
 * and append its phonemes to dest[x]
 */
static int copyToken(char token, char *dest, int x, const VOCAB * vocab)
{
    for (unsigned int ph = 0; ph < numVocab; ph++) {
    const char *txt = (const char *) pgm_read_word(&vocab[ph].txt);
    if (pgm_read_byte(&txt[0]) == token && pgm_read_byte(&txt[1]) == 0) {
        const char *src =
        (const char *) pgm_read_word(&vocab[ph].phoneme);
        while (pgm_read_byte(src)) {
        dest[x++] = pgm_read_byte(src);
        src++;
        }
        break;
    }
    }
    return x;
}

static byte whitespace(char c)
{
    return (c == 0 || c == ' ' || c == ',' || c == '.' || c == '?'
        || c == '\'' || c == '!' || c == ':' || c == '/');
}

/**
 * Enter:
 * src => English text in upper case
 * vocab => VOCAB array
 * dest => address to return result
 * return 1 if ok, or 0 if error
 */
static int textToPhonemes(const char *src, const VOCAB * vocab, char *dest)
{
    int outIndex = 0;       // Current offset into dest
    int inIndex = -1;       // Starts at -1 so that a leading space is assumed

    while (inIndex == -1 || src[inIndex]) { // until end of text
    int maxMatch = 0;   // Max chars matched on input text
    int numOut = 0;     // Number of characters copied to output stream for the best match
    boolean endsInWhiteSpace = FALSE;
    int maxWildcardPos = 0;

    // Get next phoneme, P2
    for (unsigned int ph = 0; ph < numVocab; ph++) {
        int y, x;
        char wildcard = 0;  // modifier
        int wildcardInPos = 0;
        boolean hasWhiteSpace = FALSE;
        const char *text =
        (const char *) pgm_read_word(&vocab[ph].txt);
        const char *phon =
        (const char *) pgm_read_word(&vocab[ph].phoneme);

        for (y = 0;; y++) {
        char nextVocabChar = pgm_read_byte(&text[y]);
        char nextCharIn =
            (y + inIndex == -1) ? ' ' : src[y + inIndex];
        if (nextCharIn >= 'a' && nextCharIn <= 'z')
            nextCharIn = nextCharIn - 'a' + 'A';

        if (nextVocabChar == '#' && nextCharIn >= 'A'
            && nextCharIn <= 'Z') {
            wildcard = nextCharIn;  // The character equivalent to the '#'
            wildcardInPos = y;
            continue;
        }

        if (nextVocabChar == '_') {
            // try to match against a white space
            hasWhiteSpace = TRUE;
            if (whitespace(nextCharIn))
            continue;
            y--;
            break;
        }
        // check for end of either string
        if (nextVocabChar == 0 || nextCharIn == 0)
            break;

        if (nextVocabChar != nextCharIn)
            break;
        }

        // See if its the longest complete match so far
        if (y <= maxMatch || pgm_read_byte(&text[y]))
        continue;

        // This is the longest complete match
        maxMatch = y;
        maxWildcardPos = 0;
        x = outIndex;   // offset into phoneme return data

        // Copy the matching phrase changing any '#' to the phoneme for the wildcard
        for (y = 0;; y++) {
        char c = pgm_read_byte(&phon[y]);
        if (c == 0)
            break;
        if (c == '#') {
            if (pgm_read_byte(&phon[y + 1]) == 0) {
            // replacement ends in wildcard
            maxWildcardPos = wildcardInPos;
            } else {
            x = copyToken(wildcard, dest, x, vocab);    // Copy the phonemes for the wildcard character
            }
        } else {
            dest[x++] = c;
        }
        }
        dest[x] = 0;
        endsInWhiteSpace = hasWhiteSpace;

        // 14
        numOut = x - outIndex;  // The number of bytes added
    }
    // 15 - end of vocab table

    // 16
    if (endsInWhiteSpace)
        maxMatch--;

    // 17
    if (maxMatch == 0) {
        //loggerP(PSTR("Mistake in SAY, no token for ")); 
        //logger(&src[inIndex]);
        //loggerCRLF();
        return 0;
    }
    // 20
    outIndex += numOut;
    if (outIndex > 128 - 16) {
        //loggerP(PSTR("Mistake in SAY, text too long\n"));
        return 0;
    }
    // 21 
    inIndex += (maxWildcardPos > 0) ? maxWildcardPos : maxMatch;
    }
    return 1;
}

/**
 * Convert phonemes to data string
 * Enter: textp = phonemes string
 * Return: phonemes = string of sound data
 *     modifier = 2 bytes per sound data
 */
static int phonemesToData(const char *textp, const PHONEME * phoneme)
{
    unsigned int phonemeOut = 0;    // offset into the phonemes array
    unsigned int modifierOut = 0;   // offset into the modifiers array
    unsigned int L81 = 0;   // attenuate
    unsigned int L80 = 16;

    while (*textp) {
    // P20: Get next phoneme
    boolean anyMatch = FALSE;
    int longestMatch = 0;
    int numOut = 0;     // The number of bytes copied to the output for the longest match

    // Get next phoneme, P2
    for (unsigned int ph = 0; ph < numPhoneme; ph++) {
        int numChars;

        // Locate start of next phoneme 
        const char *ph_text =
        (const char *) pgm_read_word(&phoneme[ph].txt);

        // Set 'numChars' to the number of characters
        // that we match against this phoneme
        for (numChars = 0; textp[numChars]; numChars++) {

        // get next input character and make lower case
        char nextChar = textp[numChars];
        if (nextChar >= 'A' && nextChar <= 'Z')
            nextChar = nextChar - 'A' + 'a';

        if (nextChar != pgm_read_byte(&ph_text[numChars]))
            break;
        }

        // if not the longest match so far then ignore
        if (numChars <= longestMatch)
        continue;

        // partial phoneme match
        if (pgm_read_byte(&ph_text[numChars]))
        continue;

        // P7: we have matched the whole phoneme
        longestMatch = numChars;

        // Copy phoneme data to 'phonemes'
        const char *ph_ph =
        (const char *) pgm_read_word(&phoneme[ph].phoneme);
        for (numOut = 0; pgm_read_byte(&ph_ph[numOut]); numOut++)
        phonemes[phonemeOut + numOut] =
            pgm_read_byte(&ph_ph[numOut]);

        L81 = pgm_read_byte(&phoneme[ph].attenuate) + '0';
        anyMatch = TRUE;    // phoneme match found

        modifier[modifierOut] = -1;
        modifier[modifierOut + 1] = 0;

        // Get char from text after the phoneme and test if it is a numeric
        if (textp[longestMatch] >= '0' && textp[longestMatch] <= '9') {
        // Pitch change requested
        modifier[modifierOut] =
            pgm_read_byte(&PitchesP[textp[longestMatch] - '1']);
        modifier[modifierOut + 1] = L81;
        longestMatch++;
        }
        // P10
        if (L81 != '0' && L81 != L80 && modifier[modifierOut] >= 0) {
        modifier[modifierOut - 2] = modifier[modifierOut];
        modifier[modifierOut - 1] = '0';
        continue;
        }
        // P11
        if ((textp[longestMatch - 1] | 0x20) == 0x20) {
        // end of input string or a space
        modifier[modifierOut] =
            (modifierOut == 0) ? 16 : modifier[modifierOut - 2];
        }
    }           // next phoneme

    // p13
    L80 = L81;
    if (longestMatch == 0 && !anyMatch) {
        //loggerP(PSTR("Mistake in speech at "));
        //logger(textp);
        //loggerCRLF();
        return 0;
    }
    // Move over the bytes we have copied to the output
    phonemeOut += numOut;

    if (phonemeOut > sizeof(phonemes) - 16) {
        //loggerP(PSTR("Line too long\n"));
        return 0;
    }
    // P16

    // Copy the modifier setting to each sound data element for this phoneme
    if (numOut > 2)
        for (int count = 0; count != numOut; count += 2) {
        modifier[modifierOut + count + 2] =
            modifier[modifierOut + count];
        modifier[modifierOut + count + 3] = 0;
        }
    modifierOut += numOut;

    //p21
    textp += longestMatch;
    }

    phonemes[phonemeOut++] = 'z';
    phonemes[phonemeOut++] = 'z';
    phonemes[phonemeOut++] = 'z';
    phonemes[phonemeOut++] = 'z';

    while (phonemeOut < sizeof(phonemes))
    phonemes[phonemeOut++] = 0;

    while (modifierOut < sizeof(modifier)) {
    modifier[modifierOut++] = -1;
    modifier[modifierOut++] = 0;
    }

    return 1;
}

/*
 * A delay loop that doesn't change with different optimisation settings
 */


static void pause(byte delays)
{
    wait_us(delays*6);
}

static void delay2(byte d)
{
    wait_us(d*3127);
}

/*
 * Generate a random number
 */
static byte random2(void)
{
    byte tmp = (seed0 & 0x48) + 0x38;
    seed0 <<= 1;
    if (seed1 & 0x80)
    seed0++;
    seed1 <<= 1;
    if (seed2 & 0x80)
    seed1++;
    seed2 <<= 1;
    if (tmp & 0x40)
    seed2++;
    return seed0;
}

static int pin;

static void soundOff(void)
{
    //dac.write(0);
}

#define PWM_TOP (1200/2)

//https://sites.google.com/site/qeewiki/books/avr-guide/pwm-on-the-atmega328
static void soundOn(void)
{
   // dac.write(0);

    // initialise random number seed
    seed0 = 0xecu;
    seed1 = 7;
    seed2 = 0xcfu;
}

// Logarithmic scale
//static const int16_t PROGMEM Volume[8] =
    //{ 0, PWM_TOP * 0.01, PWM_TOP * 0.02, PWM_TOP * 0.03, PWM_TOP * 0.06,
//PWM_TOP * 0.12, PWM_TOP * 0.25, PWM_TOP * 0.5 };

// Linear scale
static const int16_t PROGMEM Volume[8] =
    { 0, (uint16_t)(PWM_TOP * 0.07), (uint16_t)(PWM_TOP * 0.14), (uint16_t)(PWM_TOP * 0.21), (uint16_t)(PWM_TOP * 0.29),
    (uint16_t)(PWM_TOP * 0.36), (uint16_t)(PWM_TOP * 0.43), (uint16_t)(PWM_TOP * 0.5)
};

static void sound(byte b)
{
    // Update PWM volume 
    b = (b & 15);
    dac.write(0.5*b/16.);
}

static byte playTone(byte soundNum, byte soundPos, char pitch1,
             char pitch2, byte count, byte volume)
{
    const byte *soundData = &SoundData[soundNum * 0x40];
    while (count-- > 0) {
    byte s = pgm_read_byte(&soundData[soundPos & 0x3fu]);
    sound((byte) (s & volume));
    pause(pitch1);
    sound((byte) ((s >> 4) & volume));
    pause(pitch2);

    soundPos++;
    }
    return soundPos & 0x3fu;
}

static void play(byte duration, byte soundNumber)
{
    while (duration--)
    playTone(soundNumber, random2(), 7, 7, 10, 15);
}

/******************************************************************************
 * User API
 ******************************************************************************/
TTS::TTS()
{
}

void TTS::setPitch(byte pitch)
{
    defaultPitch = pitch;
}

byte TTS::getPitch(void)
{
    return defaultPitch;
}

/*
 * Speak a string of phonemes
 */
void TTS::sayPhonemes(const char *textp)
{
    byte phonemeIn,     // offset into text
    byte2, modifierIn,      // offset into stuff in modifier
    punctuationPitchDelta;  // change in pitch due to fullstop or question mark
    int8_t byte1;
    char phoneme;
    const SOUND_INDEX *soundIndex;
    byte sound1Num;     // Sound data for the current phoneme
    byte sound2Num;     // Sound data for the next phoneme
    byte sound2Stop;        // Where the second sound should stop
    char pitch1;        // pitch for the first sound
    char pitch2;        // pitch for the second sound
    short i;
    byte sound1Duration;    // the duration for sound 1

    if (phonemesToData(textp, s_phonemes)) {
    // phonemes has list of sound bytes
    soundOn();

    // _630C
    byte1 = 0;
    punctuationPitchDelta = 0;

    // Q19
    for (phonemeIn = 0, modifierIn = 0; phonemes[phonemeIn];
         phonemeIn += 2, modifierIn += 2) {
        byte duration;  // duration from text line
        byte SoundPos;  // offset into sound data
        byte fadeSpeed = 0;

        phoneme = phonemes[phonemeIn];
        if (phoneme == 'z') {
        delay2(15);
        continue;
        } else if (phoneme == '#') {
        continue;
        } else {

        // Collect info on sound 1
        soundIndex = &SoundIndex[phoneme - 'A'];
        sound1Num = pgm_read_byte(&soundIndex->SoundNumber);
        byte1 = pgm_read_byte(&soundIndex->byte1);
        byte2 = pgm_read_byte(&soundIndex->byte2);

        duration = phonemes[phonemeIn + 1] - '0';   // Get duration from the input line
        if (duration != 1)
            duration <<= 1;

        duration += 6;  // scaled duration from the input line (at least 6)
        sound2Stop = 0x40 >> 1;

        pitch1 = modifier[modifierIn];
        if (modifier[modifierIn + 1] == 0 || pitch1 == -1) {
            pitch1 = 10;
            duration -= 6;
        } else if (modifier[modifierIn + 1] == '0'
               || duration == 6) {
            duration -= 6;
        }
        // q8
        pitch2 = modifier[modifierIn + 2];
        if (modifier[modifierIn + 3] == 0 || pitch2 == -1)
            pitch2 = 10;

        // q10
        if (byte1 < 0) {
            sound1Num = 0;
            random2();
            sound2Stop = (0x40 >> 1) + 2;
        } else {
            // is positive
            if (byte1 == 2) {
            // 64A4
            // Make a white noise sound !
            byte volume = (duration == 6) ? 15 : 1; // volume mask
            for (duration <<= 2; duration > 0; duration--) {
                playTone(sound1Num, random2(), 8, 12, 11,
                     volume);
                // Increase the volume
                if (++volume == 16)
                volume = 15;    // full volume from now on
            }
            continue;

            } else {
            // q11
            if (byte1)
                delay2(25);
            }
        }
        }

        // 6186
        pitch1 += defaultPitch + punctuationPitchDelta;
        if (pitch1 < 1)
        pitch1 = 1;

        pitch2 += defaultPitch + punctuationPitchDelta;
        if (pitch2 < 1)
        pitch2 = 1;

        // get next phoneme
        phoneme = phonemes[phonemeIn + 2];

        if (phoneme == 0 || phoneme == 'z') {
        if (duration == 1)
            delay2(60);
        phoneme = 'a';  // change to a pause
        } else {
        // s6
        if (byte2 != 1)
            byte2 =
            (byte2 +
             pgm_read_byte(&SoundIndex[phoneme - 'A'].byte2))
            >> 1;

        if (byte1 < 0
            || pgm_read_byte(&SoundIndex[phoneme - 'A'].byte1))
            phoneme = 'a';  // change to a pause
        }

        // S10
        sound2Num =
        pgm_read_byte(&SoundIndex[phoneme - 'A'].SoundNumber);

        sound1Duration = 0x80;  // play half of sound 1
        if (sound2Num == sound1Num)
        byte2 = duration;

        // S11
        if ((byte2 >> 1) == 0) {
        sound1Duration = 0xff;  // play all of sound 1
        } else {
        // The fade speed between the two sounds
        fadeSpeed = (sound1Duration + (byte2 >> 1)) / byte2;

        if (duration == 1) {
            sound2Stop = 0x40;  // dont play sound2
            sound1Duration = 0xff;  // play all of sound 1
            pitch1 = 12;
        }
        }

        SoundPos = 0;
        do {
        byte sound1Stop = (sound1Duration >> 2) & 0x3fu;
        byte sound1End = sound1Stop;
        if (sound2Stop < sound1End) sound1End = sound2Stop;  // min

        if (sound1Stop)
            SoundPos =
            playTone(sound1Num, SoundPos, pitch1, pitch1,
                 sound1End, 15);

        // s18
        if (sound2Stop != 0x40) {
            SoundPos =
            playTone(sound2Num, SoundPos, pitch2, pitch2,
                 (byte) (sound2Stop - sound1End), 15);
        }
        // s23
        if (sound1Duration != 0xff && duration < byte2) {
            // Fade sound1 out
            sound1Duration -= fadeSpeed;
            if (sound1Duration >= (byte) 0xC8)
            sound1Duration = 0; // stop playing sound 1
        }
        // Call any additional sound
        if (byte1 == -1)
            play(3, 30);    // make an 'f' sound
        else if (byte1 == -2)
            play(3, 29);    // make an 's' sound
        else if (byte1 == -3)
            play(3, 33);    // make a 'th' sound
        else if (byte1 == -4)
            play(3, 27);    // make a 'sh' sound

        } while (--duration);

        // Scan ahead to find a '.' or a '?' as this will change the pitch
        punctuationPitchDelta = 0;
        for (i = 6; i > 0; i--) {
        char next = phonemes[phonemeIn + (i * 2)];
        if (next == 'i')
            // found a full stop
            punctuationPitchDelta = 6 - i;  // Lower the pitch
        else if (next == 'h')
            // found a question mark
            punctuationPitchDelta = i - 6;  // Raise the pitch
        }

        if (byte1 == 1)
        delay2(25);
    }           // next phoneme
    }
    soundOff();
}

/*
 * Speak an English command line of text
 */
void TTS::sayText(const char *original)
{
    unsigned int i;
    if (textToPhonemes(original, s_vocab, g_text)) {  
    sayPhonemes(g_text);
    }
}