Created
February 3, 2012 16:57
-
-
Save naoh16/1731120 to your computer and use it in GitHub Desktop.
Revisions
-
naoh16 revised this gist
Feb 18, 2012 . 1 changed file with 1 addition and 6 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -8,7 +8,7 @@ * 2012-02-18 Add licence * 2008-10-12 引数をすべてオプションに変更(-h, -s, -x, -r, -l) * Pre-Emphasis オプション(-e) * 2008-10-10 E-stepとM-stepが混ざっていたのを修正 * HTK-Likeな初期値の指定 * データの80%tile-20%tileをSNR_ptileとして出力 * 2007-08-02 DAT法によるSNRも出力 @@ -30,11 +30,6 @@ * MAGNITUDE ESTIMATION USING GENERALIZED GAMMA PRIOR DISTRIBUTION," * in Proceedings of ICASSP, 2006. * * Last Modified: 2012/02/18 16:31:07. */ /*************** LICENSE ***************************************** -
naoh16 revised this gist
Feb 18, 2012 . 1 changed file with 392 additions and 121 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,30 +1,115 @@ /** * emsnr * SNRを計算する * * Usage: echo datafilename | emsnr -r residual -l maxloop * Output: filename SNR SNR_fix SNR_ptile weight low_power high_power low_variance high_variance loglikelyhood * * 2012-02-18 Add licence * 2008-10-12 引数をすべてオプションに変更(-h, -s, -x, -r, -l) * Pre-Emphasis オプション(-e) * 2008-10-10 E-stepとM-stepが混ざ・トいたのを修正 * HTK-Likeな初期値の指定 * データの80%tile-20%tileをSNR_ptileとして出力 * 2007-08-02 DAT法によるSNRも出力 * 2006-12-24 平均値の初期値を変更(10%タイルと90%タイルの値に) * 2006-12-24 平均値の初期値を変更(max,minから90%の値に) * 精度向上(double->long double) * 2004-10-31 尤度がNaNの時は終了するように変更 * 2004-10-12 分散の下限を設定 * 2004-09-24 メモリ開放忘れ,ファイル閉じ忘れ修正 * 2004-05-13 * * Bibliography * [1] T. H. Dat, K. Takeda, F. Itakura, * "On-line Gaussian mixture modeling in the log-power * domain for signal-to-noise ratio estimation and speech enhancement," * Speech Comunication, vol. 48, Issue 11, pp. 1515-1527, 2006. * [2] T. H. Dat, K. Takeda, F. Itakura, * "MULTICHANNEL SPEECH ENHANCEMENT BASED ON SPEECH SPECTRAL * MAGNITUDE ESTIMATION USING GENERALIZED GAMMA PRIOR DISTRIBUTION," * in Proceedings of ICASSP, 2006. * * Copyright (c) 2011-2012 Sunao Hara, NAIST/Japan. * Copyright (c) 2004-2011 Sunao Hara, Takeda Laboratory, Nagoya University. * Original(emsnr.pl) Copyright: * Copyright (c) 2002-2004 Hiroshi Fujimura, Takeda Laboratory, Nagoya University. * * Last Modified: 2012/02/18 16:31:07. */ /*************** LICENSE ***************************************** "emsnr" is licensed under the modified BSD license (3-clause license). Copyright (c) 2011-2012 Sunao Hara <hara@is.naist.jp>, NAIST/JAPAN. Copyright (c) 2004-2011 Sunao Hara <naoh@nagoya-u.jp>, Takeda Laboratory, Nagoya University. Copyright (c) 2002-2004 Hiroshi Fujimura, Takeda Laboratory, Nagoya University. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the author, the organization, nor the contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************/ #include <stdio.h> #include <stdlib.h> #include <time.h> #include <math.h> #ifdef _MSC_VER # include <search.h> # include <float.h> # define isnan(x) _isnan(x) #endif /** * INITIALIZE_MODE * 1 ... 平均値 = データの最小値と最大値の間で10%と90% * 分散 = 1.0 * 2 ... fujimura Original * 平均値 = データの10%-tileと90%-tile * 分散 = 1.0 * 3 ... HTK Like * 平均値 = 標本平均からプラスマイナス標本分散*0.2だけ離れた点 * 分散 = 標本分散 / 2 */ #define INITIALIZE_MODE 3 #define TPI (double)6.283185307179586476925286766559 #define ZERO_POWER -100.0 //#define VAR_FLOOR 1.0E-3 //#define LAMBDA_FLOOR 1.0E-3 #define VAR_FLOOR 1.0 #define VAR_CEIL 500.0 #define LAMBDA_FLOOR 0.2 // at 16000 [sample/sec] // 1 [ms] = 16 [sample] // 2 [ms] = 32 [sample] // 4 [ms] = 64 [sample] // at 8000 [sample/sec] // 1 [ms] = 8 [sample] // 2 [ms] = 16 [sample] // 4 [ms] = 32 [sample] #define FRAME_SHIFT 8 #define WINDOW_SIZE 32 // defined in stdio.h //#define FILENAME_MAX 1024 @@ -41,8 +126,15 @@ typedef struct { double lambda; } GAUSS_PROB_PARAM; int my_compare_double(const void *a, const void *b) { double* da = (double *)a; double* db = (double *)b; return (*da > *db)?1:-1; } /** * short(16bit)のデータをdouble型にして誌錡 */ double* i16d_readdata(char *filename, long *len, int bSwap) { short *tmp_data; @@ -56,14 +148,16 @@ double* i16d_readdata(char *filename, long *len, int bSwap) { fprintf(stderr, "Error at fopen(i16d_readdata).\n"); return NULL; } // Calculate length of the file { fseek(fp, 0L, SEEK_END); _len=ftell(fp) / sizeof(short); *len = _len; fseek(fp, 0L, SEEK_SET); } // Skip RIFF header chunks if(_len>4) { unsigned char buf[5]; unsigned long chanklen; @@ -87,13 +181,15 @@ double* i16d_readdata(char *filename, long *len, int bSwap) { *len = _len; fprintf(stderr, "header was removed.\n"); } else { // return to start position, if the file format is not RIFF. fseek(fp, 0L, SEEK_SET); } } /* * allocate the workspace memories */ tmp_data = malloc(_len * sizeof(short)); if(tmp_data == NULL){ fprintf(stderr, "Error at malloc_tmpdata(i16d_readdata).\n"); @@ -106,10 +202,11 @@ double* i16d_readdata(char *filename, long *len, int bSwap) { return NULL; } // load. fread(tmp_data, sizeof(short), _len, fp); if(bSwap) { unsigned char *p, tmp; p = (unsigned char *)tmp_data; @@ -119,15 +216,22 @@ double* i16d_readdata(char *filename, long *len, int bSwap) { p[2*i] = p[2*i+1]; p[2*i+1] = tmp; } } // doubleの配列にコピー(代入). // zeroデータへの対策のために雑音追加. { double data_avg = 0.0; for(i=0; i<_len; i++){ data[i] = ((double)tmp_data[i]+0.1*(rand()%2)) / 32768.0; data_avg += data[i]; } data_avg = data_avg / (double)_len; // mean normalization. for(i=0; i<_len; i++){ data[i] -= data_avg; } } free(tmp_data); tmp_data = NULL; @@ -144,13 +248,17 @@ void init_hamming() { } } void dd_preemphasis_I(double* srcdst, long len) { long s; for(s=len-1; s>0; --s) { srcdst[s] = srcdst[s] - 0.97*srcdst[s-1]; } } double* dd_hamming(double* src, double *dest, long len) { long n; for(n=0; n<len; n++) { dest[n] = src[n] * g_hamming_table[n]; } return dest; } @@ -161,6 +269,9 @@ double dd_avepower(double* src, long len) { for(i=0; i<len; i++){ result += src[i]*src[i] / (double)(len); } // if(result < 1.0E-16) { // result = 1.0E-8 * (rand()%10) / (double)len; // } return result; } @@ -189,120 +300,172 @@ double* dd_frame_power(double* src, long src_len, long* dest_len) { double gauss_prob(double x, double mu, double sigma){ double x_mu = x-mu; return exp(-(x_mu*x_mu)/(2.0*sigma)) / sqrt(TPI*sigma); } double mix2_gauss_prob(double x, GAUSS_PROB_PARAM* param){ // double x_mu1, x_mu2; // x_mu1 = x - param->mu1; // x_mu2 = x - param->mu2; return param->lambda * gauss_prob(x, param->mu1, param->sigma1) + (1.0-param->lambda) * gauss_prob(x, param->mu2, param->sigma2); } void init_gaussparam(GAUSS_PROB_PARAM *param, double* data, long len){ long i; // 重みの初期値は等分(=0.5). param->lambda = 0.5; #if INITIALIZE_MODE==3 { // 標本平均から標本標準偏差だけずれた点を各平均とする. long double s_mean = 0.0L; long double s_var = 0.0L; long double s_sd = 0.0L; for(i=0; i<len; ++i) { s_mean += data[i]; } s_mean /= (double)len; for(i=0; i<len; ++i) { double x_mean = data[i]-s_mean; s_var += x_mean * x_mean; } s_var /= (double)(len - 1); s_sd = sqrt(s_var); param->mu1 = s_mean - 0.2*s_sd; param->mu2 = s_mean + 0.2*s_sd; param->sigma1 = 0.8 * s_var; param->sigma2 = 0.8 * s_var; } #elif INITIALIZE_MODE==2 { // sortしてパーセンタイルで初期値決定. qsort(data, len, sizeof(double), (int (*)(const void*, const void*))my_compare_double); // for(i=0; i<len; ++i) { // printf("%d: %f\n", i, data[i]); // } param->mu1 = data[(int)(0.1*len)]; param->mu2 = data[(int)(0.9*len)]; // 分散の初期値は10. param->sigma1 = 10.0; param->sigma2 = 10.0; } #else { double mu1, mu2; // 平均の初期値は学習データの最小値と最大値. mu1 = mu2 = data[0]; for(i=1; i<len; i++){ if(data[i] < mu1) mu1 = data[i]; if(data[i] > mu2) mu2 = data[i]; } // param->mu1 = mu1; // param->mu2 = mu2; // オリジナルは90パーセンタイルだがココでは90%相当の値とする. param->mu1 = mu1 + 0.1 * (mu2 - mu1); param->mu2 = mu2 - 0.1 * (mu2 - mu1); // 分散の初期値は1. param->sigma1 = 1.0; param->sigma2 = 1.0; } #endif } double loglikelyhood(GAUSS_PROB_PARAM* param, double* data, int data_len){ int i; double sum=0.0; for(i=0; i<data_len; i++){ sum += log(mix2_gauss_prob(data[i], param)+1.0E-12); } return sum; } double em_2mixture(GAUSS_PROB_PARAM *param, int loop_count, double apply_residual){ int i, k; int same_count=0; long double new_loglikelyhood = 0.0, old_loglikelyhood = 0.0; double *data = param->data; long len = param->len; // パラメータ初期化 init_gaussparam(param, data, len); new_loglikelyhood = loglikelyhood(param, data, len); #if _DEBUG printf("#loop lambda mu1 sigma1 mu2 sigma2 loglikelyhood\n"); printf("%3d %f %f %f %f %f %Lf\n", -1, param->lambda, param->mu1, param->sigma1, param->mu2, param->sigma2, new_loglikelyhood); #endif for(k=0; k<loop_count; k++){ long double prob_m1, prob_m2, prob_mix, prob_Q1, prob_Q2; long double prob_sum1=0.0, prob_sum2=0.0; long double new_mu1=0.0, new_mu2=0.0; long double new_sigma1=0.0, new_sigma2=0.0; long double new_lambda=0.0; long double x_mu; // 期待値計算. for(i=0; i<len; i++){ // // E-step // prob_mix = mix2_gauss_prob(data[i], param); prob_m1 = param->lambda * gauss_prob(data[i], param->mu1, param->sigma1); prob_Q1 = prob_m1 / prob_mix; prob_m2 = (1.0 - param->lambda) * gauss_prob(data[i], param->mu2, param->sigma2); prob_Q2 = prob_m2 / prob_mix; // // M-step // // weight prob_sum1 += prob_Q1; prob_sum2 += prob_Q2; // mu new_mu1 += data[i] * prob_Q1; new_mu2 += data[i] * prob_Q2; // sigma x_mu = data[i] - param->mu1; new_sigma1 += x_mu * x_mu * prob_Q1; x_mu = data[i] - param->mu2; new_sigma2 += x_mu * x_mu * prob_Q2; } new_lambda = prob_sum1 / (prob_sum1 + prob_sum2); new_mu1 /= prob_sum1; new_mu2 /= prob_sum2; new_sigma1 /= prob_sum1; new_sigma2 /= prob_sum2; // 重みフロアリング. // if(new_lambda < LAMBDA_FLOOR) new_lambda = LAMBDA_FLOOR; // else if(new_lambda > (1.0-LAMBDA_FLOOR)) new_lambda = (1.0-LAMBDA_FLOOR); if(new_lambda < LAMBDA_FLOOR) new_lambda = 0.5; else if(new_lambda > (1.0-LAMBDA_FLOOR)) new_lambda = 0.5; // 分散フロアリング. if(new_sigma1 < VAR_FLOOR) new_sigma1 = VAR_FLOOR; if(new_sigma2 < VAR_FLOOR) new_sigma2 = VAR_FLOOR; if(new_sigma1 > VAR_CEIL) new_sigma1 = VAR_CEIL; if(new_sigma2 > VAR_CEIL) new_sigma2 = VAR_CEIL; // パラメータ更新. param->lambda = new_lambda; param->mu1 = new_mu1; param->mu2 = new_mu2; param->sigma1 = new_sigma1; param->sigma2 = new_sigma2; // 新パラメータでの尤度計算. new_loglikelyhood = loglikelyhood(param, data, len); #if _DEBUG printf("%3d %f %f %f %f %f %Lf\n", k, param->lambda, param->mu1, param->sigma1, param->mu2, param->sigma2, new_loglikelyhood); #endif // NaN 対策. if(isnan(new_loglikelyhood)) break; if(new_loglikelyhood - old_loglikelyhood < apply_residual){ @@ -313,53 +476,120 @@ double em_2mixture(GAUSS_PROB_PARAM *param, int loop_count, double apply_residua old_loglikelyhood = new_loglikelyhood; if(same_count > 10) break; } // sort parameters if( param->mu1 > param->mu2 ) { double tmp; param->lambda = 1.0 - param->lambda; tmp = param->mu1; param->mu1 = param->mu2; param->mu2 = tmp; tmp = param->sigma1; param->sigma1 = param->sigma2; param->sigma2 = tmp; } return new_loglikelyhood; } void usage() { fprintf(stderr, "usage: emsnr [-h] [-s] [-x] [-r residual] [-l maxloop] < filelist\n"); fprintf(stderr, " -h: output help and exit.\n"); fprintf(stderr, " -s: not output system parameter on startup.\n"); fprintf(stderr, " -x: speech files are treated as BigEndian.\n"); fprintf(stderr, " -r: EM stop paramter. Minimum residual of log-likelyfood updating.\n"); fprintf(stderr, " -l: EM stop paramter. Maximum count of EM loop.\n"); fprintf(stderr, "\n"); fprintf(stderr, "example: echo test.raw | emsnr -r 1.0E-8 -l 1000\n"); } int main(int argc, char* argv[]) { int loop_count = 1000; char filename[FILENAME_MAX], *p; double *data, *data_raw; double loglikelyhood = 0.0; double apply_residual = 1.0E-8; int bSwap = 0; int bVerbose = 1; int bPreEmphasis = 0; long len_raw, len; GAUSS_PROB_PARAM param; long filecount = 0; double snrsum = 0.0; srand((unsigned)time(NULL)); // Load arguments. { int i = 0; while(++i<argc) { if(strcmp(argv[i], "-r")==0) { apply_residual = atof(argv[++i]); } else if(strcmp(argv[i], "-l")==0) { loop_count = atoi(argv[++i]); } else if(strcmp(argv[i], "-x")==0) { bSwap = 1; } else if(strcmp(argv[i], "-e")==0) { bPreEmphasis = 1; } else if(strcmp(argv[i], "-s")==0) { bVerbose = 0; } else if(strcmp(argv[i], "-h")==0) { usage(); return 0; } else { break; } } } // Output Parameters if(bVerbose>0) { fprintf(stderr, "SYSTEM INFORMATION -----------------------------\n"); fprintf(stderr, " source-file: %s\n", __FILE__); fprintf(stderr, " compiled at %s %s\n", __DATE__, __TIME__); fprintf(stderr, "SPEECH PARAMETERS ------------------------------\n"); fprintf(stderr, " pre-emphasis: %s\n", (bPreEmphasis)?"True":"False"); fprintf(stderr, " swap: %s\n", (bSwap)?"True":"False"); fprintf(stderr, "EM PARAMETERS ----------------------------------\n"); fprintf(stderr, " loop count: %d\n", loop_count); fprintf(stderr, " LL update: %e\n", apply_residual); fprintf(stderr, "OUTPUT -----------------------------------------\n"); fprintf(stderr, "filename seg-SNR fix-SNR ptile-SNR lambda low_power high_power low_variance high_variance loglikelyhood\n"); fprintf(stderr, "================================================\n"); } // Initialize table of hamming window's coefficients init_hamming(); // Loop: get filenames from stdin while(NULL != fgets(&filename[0], FILENAME_MAX, stdin)){ double snr = 0.0; double snr_fix = 0.0; double snr_ptile = 0.0; while(NULL != (p=(char*)strrchr(filename, '\r'))) *p = '\0'; while(NULL != (p=(char*)strrchr(filename, '\n'))) *p = '\0'; // get speech data from file printf("%s ", filename); data_raw = i16d_readdata(filename, &len_raw, bSwap); if(data_raw == NULL) { fprintf(stderr, "Error at readdata[%s].\n", filename); return 1; } // Pre-Emphasis if(bPreEmphasis) dd_preemphasis_I(data_raw, len_raw); #if _DEBUG // debug { FILE* fpdeb = fopen("data_pre.raw", "wb"); fwrite(data_raw, sizeof(double), len_raw, fpdeb); fclose(fpdeb); } #endif // frame_powerをdataに. data = dd_frame_power(data_raw, len_raw, &len); // fprintf(stderr, "frame len: %d\n", len); printf("%d ", len); @@ -369,16 +599,57 @@ int main(int argc, char* argv[]) param.data = data; param.len = len; loglikelyhood = em_2mixture(¶m, loop_count, apply_residual); snr = fabs(param.mu2 - param.mu1); // DAT法に基づく SNR 算出. // 注:ICASSP2005の原稿で d は差にしているが、和の誤植である. // SpeechComunicationでは修正されている. { if(snr > 10.0) { snr_fix = snr; } else { double a = 4.342944819; // = 10.0/ln(10.0); double ia = 0.2302585093; // = ln(10.0)/10.0; double iaia = 0.0530189811; // = (ln(10.0)/10.0)^2; double m = ia * fabs(param.mu2 - param.mu1); double d = iaia * (param.sigma2 + param.sigma1); double snr1 = snr - a * 0.7 * exp( -m + 0.5*d ); // -(m-d/2) double snr2 = snr1 - a * 0.9 * exp( -2.0*m + 2.0*d ); // -2(m-d) double snr3 = snr2 - a * exp( -3.0*m + 4.5*d ); // -3(m-3d/2) // 補正が強すぎない範囲で適用する. // -8.945 = 10/ln(10) ln(e^r-1), r=0.12 // なお、基本的には補正すらことで小さくなるので、$snr1 < $snr0 . if((snr - snr1 > 8.0) || (snr1 < -8.945)) { snr_fix = snr; } else if((snr1 - snr2 > 8.0) || (snr2 < -8.945)) { snr_fix = snr1; } else if((snr2 - snr3 > 8.0) || (snr3 < -8.945)) { snr_fix = snr2; } else { snr_fix = snr3; } } } { qsort(data, len, sizeof(double), (int (*)(const void*, const void*))my_compare_double); snr_ptile = data[(int)(0.8*len)] - data[(int)(0.2*len)]; } printf("%f %f %f %f %f %f %f %f %f\n", snr, snr_fix, snr_ptile, param.lambda, param.mu1, param.mu2, param.sigma1, param.sigma2, loglikelyhood); fflush(stdout); free(data_raw); data_raw = NULL; free(data); data = NULL; memset(&filename[0], 0, FILENAME_MAX); // 最後の表示用の保存. if(!isnan(snr)) { ++filecount; snrsum += snr; -
naoh16 created this gist
Feb 3, 2012 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,391 @@ /** * emsnr.c * * SNRを計算する * * Usage: echo datafilename | emsnr residual maxloop * Output: filename SNR low_power high_power low_variance high_variance * * HARA Sunao * 2004-10-31 尤度がNaNの時は終了するように変更 * 2004-10-12 分散の下限を設定 * 2004-09-24 メモリ開放忘れ,ファイル閉じ忘れ修正 * 2004-05-13 */ #include <stdio.h> #include <stdlib.h> #include <math.h> #define TPI (double)6.283185307179586476925286766559 #define VAR_FLOOR 1.0E-4 //#define FRAME_SHIFT 100 //#define WINDOW_SIZE 256 #define FRAME_SHIFT 32 #define WINDOW_SIZE 64 // defined in stdio.h //#define FILENAME_MAX 1024 double g_hamming_table[WINDOW_SIZE]; typedef struct { double *data; long len; double mu1; double mu2; double sigma1; double sigma2; double lambda; } GAUSS_PROB_PARAM; /** * short(16bit)のデータをdouble型にして取得 */ double* i16d_readdata(char *filename, long *len, int bSwap) { short *tmp_data; double *data, *p; long i, _len; // char buf[256]; FILE *fp; fp = fopen(filename, "rb"); if(!fp){ fprintf(stderr, "Error at fopen(i16d_readdata).\n"); return NULL; } // 長さの取得 { fseek(fp, 0L, SEEK_END); _len=ftell(fp) / sizeof(short); *len = _len; fseek(fp, 0L, SEEK_SET); } // RIFF ヘッダのチェックとすっとばし if(_len>4) { unsigned char buf[5]; unsigned long chanklen; fread(buf, sizeof(char), 4, fp); // "RIFF" buf[4] = '\0'; if(buf[0]=='R' && buf[1]=='I' && buf[2]=='F' && buf[3]=='F') { fprintf(stderr, "Caution: RIFF File..."); fread(buf, sizeof(char), 4, fp); // "RIFF" length fread(buf, sizeof(char), 4, fp); // "WAVE" fread(buf, sizeof(char), 4, fp); // "fmt " while(!(buf[0]=='d' && buf[1]=='a' && buf[2]=='t' && buf[3]=='a')) { // printf("%s", buf); fread(&chanklen, sizeof(long), 1, fp); // "????" length fseek(fp, chanklen, SEEK_CUR); fread(buf, sizeof(char), 4, fp); // "????" } // printf("%s", buf); fread(&chanklen, sizeof(long), 1, fp); // "????" length // printf("[%d->%d]\n", _len, chanklen/sizeof(short)); _len = chanklen / sizeof(short); *len = _len; fprintf(stderr, "header was removed.\n"); } else { // 元に戻しておく fseek(fp, 0L, SEEK_SET); } } // メモリ確保 tmp_data = malloc(_len * sizeof(short)); if(tmp_data == NULL){ fprintf(stderr, "Error at malloc_tmpdata(i16d_readdata).\n"); return NULL; } data = malloc(_len * sizeof(double)); if(data == NULL){ fprintf(stderr, "Error at malloc_data(i16d_readdata).\n"); free(tmp_data); return NULL; } if(bSwap) { unsigned char *p, tmp; // 読み込み fread(tmp_data, sizeof(short), _len, fp); p = (unsigned char *)tmp_data; // swap for(i=0; i<_len; i++){ tmp = p[2*i]; p[2*i] = p[2*i+1]; p[2*i+1] = tmp; } } else{ // 読み込み fread(tmp_data, sizeof(short), _len, fp); } // doubleの配列にコピー(代入) for(i=0; i<_len; i++){ data[i] = (double)tmp_data[i]; } free(tmp_data); tmp_data = NULL; fclose(fp); return data; } void init_hamming() { int n; double len_1 = (double)(WINDOW_SIZE - 1); for(n=0; n<WINDOW_SIZE; n++) { g_hamming_table[n] = (0.54 - 0.46 * cos(TPI*(double)n / len_1)); } } double* dd_hamming(double* src, double *dest, long len) { long n; // double len_1 = (double)(len - 1); for(n=0; n<len; n++) { // dest[n] = src[n] * (0.54 - 0.46 * cos(TPI*(double)n / len_1)); dest[n] = src[n] * g_hamming_table[n]; // printf("%f, ", dest[n]); } return dest; } double dd_avepower(double* src, long len) { double result = 0.0; long i; for(i=0; i<len; i++){ result += src[i]*src[i] / (double)(len); } return result; } double* dd_frame_power(double* src, long src_len, long* dest_len) { long f = 0; long frame_len; double *frame_power; double *frame_tmp; frame_len = (long)((double)(src_len - WINDOW_SIZE) / (double)FRAME_SHIFT + 0.5); frame_power = (double *)malloc(frame_len*sizeof(double)); frame_tmp = (double *)malloc(WINDOW_SIZE*sizeof(double)); for(f=0; f<frame_len; f++){ dd_hamming(&src[f*FRAME_SHIFT], frame_tmp, WINDOW_SIZE); frame_power[f] = dd_avepower(frame_tmp, WINDOW_SIZE); frame_power[f] = 10.0 * log10(frame_power[f]); // printf("%d:%f\n", f, frame_power[f]); } *dest_len = frame_len; free(frame_tmp); frame_tmp = NULL; return frame_power; } double gauss_prob(double x, double mu, double sigma){ double x_mu = x-mu; return exp(-(x_mu*x_mu)/(2*sigma)) / sqrt(TPI*sigma); } double mix2_gauss_prob(double x, GAUSS_PROB_PARAM* param){ double x_mu1, x_mu2; // x_mu1 = x - param->mu1; // x_mu2 = x - param->mu2; return param->lambda * gauss_prob(x, param->mu1, param->sigma1) + (1-param->lambda) * gauss_prob(x, param->mu2, param->sigma2); } void init_gaussparam(GAUSS_PROB_PARAM *param, double* data, long len){ long i; double mu1, mu2, x_mu; double sigma1, sigma2; // 重みの初期値は等分(=0.5) param->lambda = 0.5; // 平均の初期値は学習データの最小値と最大値 mu1 = mu2 = data[0]; for(i=1; i<len; i++){ if(data[i] < mu1) mu1 = data[i]; if(data[i] > mu2) mu2 = data[i]; } param->mu1 = mu1; param->mu2 = mu2; // 分散の初期値は1 param->sigma1 = 1.0; param->sigma2 = 1.0; } double loglikelyhood(GAUSS_PROB_PARAM* param, double* data, int data_len){ int i; double sum=0.0; for(i=0; i<data_len; i++){ sum += log(mix2_gauss_prob(data[i], param)); } return sum; } double em_2mixture(GAUSS_PROB_PARAM *param, int loop_count, double apply_residual){ int i, k; int same_count=0; double new_loglikelyhood = 0.0, old_loglikelyhood = 0.0; double *data = param->data; long len = param->len; // パラメータ初期化 init_gaussparam(param, data, len); // printf("#loop lambda mu1 sigma1 mu2 sigma2 loglikelyhood\n"); new_loglikelyhood = loglikelyhood(param, data, len); // printf("%3d %f %f %f %f %f %f\n", // -1, param.lambda, param.mu1, param.sigma1, param.mu2, param.sigma2, new_loglikelyhood); for(k=0; k<loop_count; k++){ double prob, tmp_prob; double prob_sum1=0.0, prob_sum2=0.0; double new_mu1=0.0, new_mu2=0.0; double new_sigma1=0.0, new_sigma2=0.0; double new_lambda=0.0; double x_mu; // 重みの最推定 for(i=0; i<len; i++){ tmp_prob = mix2_gauss_prob(data[i], param); prob = param->lambda * gauss_prob(data[i], param->mu1, param->sigma1); prob_sum1 += prob / tmp_prob; prob = (1 - param->lambda) * gauss_prob(data[i], param->mu2, param->sigma2); prob_sum2 += prob / tmp_prob; } param->lambda = prob_sum1 / (prob_sum1 + prob_sum2); if(param->lambda < 0.00001) param->lambda = 0.00001; else if(param->lambda > 0.99999) param->lambda = 0.99999; // 平均の最推定(estimate) for(i=0; i<len; i++){ tmp_prob = mix2_gauss_prob(data[i], param); prob = param->lambda * gauss_prob(data[i], param->mu1, param->sigma1); new_mu1 += data[i] * prob / tmp_prob; prob = (1 - param->lambda) * gauss_prob(data[i], param->mu2, param->sigma2); new_mu2 += data[i] * prob / tmp_prob; } new_mu1 /= prob_sum1; new_mu2 /= prob_sum2; param->mu1 = new_mu1; param->mu2 = new_mu2; // 分散の最推定 for(i=0; i<len; i++){ tmp_prob = mix2_gauss_prob(data[i], param); prob = param->lambda * gauss_prob(data[i], param->mu1, param->sigma1); x_mu = data[i] - new_mu1; new_sigma1 += x_mu * x_mu * prob / tmp_prob; prob = (1 - param->lambda) * gauss_prob(data[i], param->mu2, param->sigma2); x_mu = data[i] - new_mu2; new_sigma2 += x_mu * x_mu * prob / tmp_prob; } param->sigma1 = new_sigma1 / prob_sum1; param->sigma2 = new_sigma2 / prob_sum2; // 分散フロアリング if(param->sigma1 < VAR_FLOOR) param->sigma1 = VAR_FLOOR; if(param->sigma2 < VAR_FLOOR) param->sigma2 = VAR_FLOOR; new_loglikelyhood = loglikelyhood(param, data, len); #if _DEBUG printf("%3d %f %f %f %f %f %f\n", k, param->lambda, param->mu1, param->sigma1, param->mu2, param->sigma2, new_loglikelyhood); #endif // NaN 対策 if(isnan(new_loglikelyhood)) break; if(new_loglikelyhood - old_loglikelyhood < apply_residual){ same_count++; }else{ same_count = 0; } old_loglikelyhood = new_loglikelyhood; if(same_count > 10) break; } return new_loglikelyhood; } int main(int argc, char* argv[]) { int loop_count; char filename[FILENAME_MAX], *p; double *data, *data_raw; double loglikelyhood = 0.0; double apply_residual = 0.0; int bSwap = 0; long len_raw, len; GAUSS_PROB_PARAM param; long filecount = 0; double snrsum = 0.0; if(argc <= 2){ fprintf(stderr, " usage: echo datafilename | emsnr residual maxloop\n"); fprintf(stderr, "example: echo test.raw | emsnr 1.0E-8 1000\n"); fprintf(stderr, "\nOutput: filename SNR low_power high_power low_variance high_variance loglikelyhood\n"); return 1; } // filename = argv[1]; apply_residual = atof(argv[1]); loop_count = atoi(argv[2]); if(argc > 3) bSwap = (strcmp(argv[3], "-x")==0)?1:0; // ハミング窓テーブル作成 init_hamming(); // データ読み込み while(NULL != fgets(&filename[0], FILENAME_MAX, stdin)){ double snr = 0.0; while(NULL != (p=strrchr(filename, '\r'))) *p = '\0'; while(NULL != (p=strrchr(filename, '\n'))) *p = '\0'; printf("%s ", filename); data_raw = i16d_readdata(filename, &len_raw, bSwap); if(data_raw == NULL) { fprintf(stderr, "Error at readdata[%s].\n", filename); return 1; } // frame_powerをdataに data = dd_frame_power(data_raw, len_raw, &len); // fprintf(stderr, "frame len: %d\n", len); printf("%d ", len); fflush(stdout); param.data = data; param.len = len; loglikelyhood = em_2mixture(¶m, loop_count, apply_residual); snr = fabs(param.mu2 - param.mu1); printf("%f %f %f %f %f %f\n", snr, param.mu1, param.mu2, param.sigma1, param.sigma2, loglikelyhood); fflush(stdout); free(data_raw); data_raw = NULL; free(data); data = NULL; memset(&filename[0], 0, FILENAME_MAX); // 最後の表示用の保存 if(!isnan(snr)) { ++filecount; snrsum += snr; } } fprintf(stderr, "Filecount=%d MeanSNR=%.2f\n", filecount, snrsum/filecount); return 0; }