Program to record speech audio into RAM and then play it back, moving Billy Bass's mouth in sync with the speech.

Dependencies:   mbed

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers AudioAnalyzer.h Source File

AudioAnalyzer.h

00001 #ifndef __included_audio_analyzer_h
00002 #define __included_audio_analyzer_h
00003 
00004 #include <math.h>
00005 
00006 namespace NK
00007 {
00008 
00009 class AudioAnalyzer
00010 {
00011 protected:
00012     int8_t const *samples;
00013     uint16_t nsamples;
00014     uint16_t zeroCrossings;
00015     uint32_t power;
00016     float logPower;
00017     float powerRef;
00018     int8_t minValue;
00019     int8_t maxValue;
00020     bool analyzed;
00021 
00022     void analyze();
00023 
00024 public:
00025     AudioAnalyzer(int8_t const *_samples, uint16_t _nsamples)
00026         : samples(_samples), nsamples(_nsamples), zeroCrossings(0), power(0), logPower(0.0), powerRef(0.0), analyzed(false) {
00027     }
00028 
00029     uint16_t getZeroCrossings() {
00030         if (!analyzed) analyze();
00031         return zeroCrossings;
00032     }
00033 
00034     float getZeroCrossingRatioPercent() {
00035         return getZeroCrossings() * 100.0 / nsamples;
00036     }
00037 
00038     uint32_t getPower() {
00039         if (!analyzed) analyze();
00040         return power;
00041     }
00042 
00043     float getLogPower() {
00044         if (!analyzed) analyze();
00045         logPower = ::log((double)power) - powerRef;
00046         return logPower;
00047     }
00048 
00049     void getMinMaxValues(int8_t *min, int8_t *max) {
00050         if (!analyzed) analyze();
00051         *min = minValue;
00052         *max = maxValue;
00053     }
00054 
00055     bool isVoiced() {
00056         return !(isnan(getLogPower()) || logPower < PowerThreshold);
00057     }
00058 
00059     void setPowerRef(float _powerRef) {
00060         powerRef = _powerRef;
00061     }
00062 
00063     // anything with logPower above PowerThreshold
00064     // and below the line
00065     // zeroCrossingRatioPercent = VowelSlope * logPower + VowelIntercept
00066     bool isVowel() {
00067         getLogPower();
00068         if (logPower < PowerThreshold)
00069             return false;
00070         return (getZeroCrossingRatioPercent() < VowelSlope * (logPower - VowelXIntercept));
00071     }
00072 
00073     static const float PowerThreshold = -4.0;
00074     // anything below the line
00075     // zeroCrossingRatioPercent = VowelSlope * logPower + VowelIntercept
00076     // and above PowerThreshold
00077     // is considered a vowel.
00078     static const float VowelSlope = 14.7;
00079     static const float VowelXIntercept = -0.7;
00080 };
00081 
00082 } // namespace NK
00083 #endif