Program to record speech audio into RAM and then play it back, moving Billy Bass's mouth in sync with the speech.
AudioAnalyzer.h
00001 #ifndef __included_audio_analyzer_h 00002 #define __included_audio_analyzer_h 00003 00004 #include <math.h> 00005 00006 namespace NK 00007 { 00008 00009 class AudioAnalyzer 00010 { 00011 protected: 00012 int8_t const *samples; 00013 uint16_t nsamples; 00014 uint16_t zeroCrossings; 00015 uint32_t power; 00016 float logPower; 00017 float powerRef; 00018 int8_t minValue; 00019 int8_t maxValue; 00020 bool analyzed; 00021 00022 void analyze(); 00023 00024 public: 00025 AudioAnalyzer(int8_t const *_samples, uint16_t _nsamples) 00026 : samples(_samples), nsamples(_nsamples), zeroCrossings(0), power(0), logPower(0.0), powerRef(0.0), analyzed(false) { 00027 } 00028 00029 uint16_t getZeroCrossings() { 00030 if (!analyzed) analyze(); 00031 return zeroCrossings; 00032 } 00033 00034 float getZeroCrossingRatioPercent() { 00035 return getZeroCrossings() * 100.0 / nsamples; 00036 } 00037 00038 uint32_t getPower() { 00039 if (!analyzed) analyze(); 00040 return power; 00041 } 00042 00043 float getLogPower() { 00044 if (!analyzed) analyze(); 00045 logPower = ::log((double)power) - powerRef; 00046 return logPower; 00047 } 00048 00049 void getMinMaxValues(int8_t *min, int8_t *max) { 00050 if (!analyzed) analyze(); 00051 *min = minValue; 00052 *max = maxValue; 00053 } 00054 00055 bool isVoiced() { 00056 return !(isnan(getLogPower()) || logPower < PowerThreshold); 00057 } 00058 00059 void setPowerRef(float _powerRef) { 00060 powerRef = _powerRef; 00061 } 00062 00063 // anything with logPower above PowerThreshold 00064 // and below the line 00065 // zeroCrossingRatioPercent = VowelSlope * logPower + VowelIntercept 00066 bool isVowel() { 00067 getLogPower(); 00068 if (logPower < PowerThreshold) 00069 return false; 00070 return (getZeroCrossingRatioPercent() < VowelSlope * (logPower - VowelXIntercept)); 00071 } 00072 00073 static const float PowerThreshold = -4.0; 00074 // anything below the line 00075 // zeroCrossingRatioPercent = VowelSlope * logPower + VowelIntercept 00076 // and above PowerThreshold 00077 // is considered a vowel. 00078 static const float VowelSlope = 14.7; 00079 static const float VowelXIntercept = -0.7; 00080 }; 00081 00082 } // namespace NK 00083 #endif
Generated on Wed Jul 13 2022 10:13:19 by 1.7.2