float Statistics::calcEntropy() { //calculate entropy float result = 0; for (int direction = 0; direction < 4; direction++) { float** com = 0; switch(direction) { case 0: com = this->m_cooc0; break; case 1: com = this->m_cooc1; break; case 2: com = this->m_cooc2; break; case 3: com = this->m_cooc3; break; } result += calcEntropy(com); } return result / 4; }
int main(int argc, char** argv){ if(argc <3){ cerr<<"Usage: ./main <path/to/target/config/file> <path/to/target/src/dir>"<<endl; cerr<<"or"<<endl; cerr<<"Usage: ./main -f <path/to/predefined/config-name/file> <path/to/target/src/dir>"<<endl; return 0; } std::vector<std::string> vec_options; vec_options.clear(); if( argc == 4 /*&& argv[1] == "-f"*/){ readOptions( vec_options, argv[2]); //从option-name.txt中读入配置名 } else{ //通过augeaus程序分析配置文件得到配置名 std::string conf_file_path = string(argv[1]); if( confilter(conf_file_path) == -1){ cerr<<"filter the option name in Config file failed"<<endl; return -1; } readOptions(vec_options, "./conf_list.dat"); } //先对每个配置项生成字典,计算熵,确定权重,具体每个词单独的字典匹配使用时再生成 std::map< std::string, double > entropy_dict; entropy_dict.clear(); calcEntropy(vec_options, entropy_dict); printf("options size : %d\n", vec_options.size()); // for(int i =0; i< vec_options.size(); i++){ // printf("%s\n", vec_options[i].c_str()); // } // for( std::map<string, double>::iterator ite=entropy_dict.begin(); ite!=entropy_dict.end(); ite++){ // printf("%s %lf\n", ite->first.c_str(), ite->second); // } vector<string> file_v; if(argc == 3) traverseDir(argv[2], file_v); else if( argc == 4) traverseDir(argv[3], file_v); printf("file_v size : %d\n", file_v.size()); string targetfile = findTargetFile(file_v, vec_options); //如果stack太小会出现cannot access to variable ...的错误 if( targetfile == ""){ cerr<<"Don't find any targetfile to analyze!"<<endl; return -1; } analyzeTargetFile(targetfile, vec_options, entropy_dict); }
float Statistics::calcInformationMeasures1() { float result = 0; for (int direction = 0; direction < 4; direction++) { float** com = 0; switch(direction) { case 0: com = this->m_cooc0; break; case 1: com = this->m_cooc1; break; case 2: com = this->m_cooc2; break; case 3: com = this->m_cooc3; break; } //calculate HXY1 float HXY1 = 0; for (int i = 0; i < this->m_numColors; i++) { for (int j = 0; j < this->m_numColors; j++) { HXY1 += com[i][j] * log(px(com, i) * py(com, j) + Statistics::epsilon); } } HXY1 *= -1; //calculate HX and HY float HX = 0, HY = 0; for (int i = 0; i < this->m_numColors; i++) { HX += px(com, i) * log(px(com, i) + Statistics::epsilon); HY += py(com, i) * log(py(com, i) + Statistics::epsilon); } HX *= -1; HY *= -1; //calculate HXY float HXY = calcEntropy(com); //calculate information measures 1 result += (HXY - HXY1) / max(HX, HY); } return result / 4; }
float Statistics::calcInformationMeasures2() { float result = 0; for (int direction = 0; direction < 4; direction++) { float** com = 0; switch(direction) { case 0: com = this->m_cooc0; break; case 1: com = this->m_cooc1; break; case 2: com = this->m_cooc2; break; case 3: com = this->m_cooc3; break; } //calculate HXY2 float HXY2 = 0; for (int i = 0; i < this->m_numColors; i++) { for (int j = 0; j < this->m_numColors; j++) { HXY2 += px(com, i) * py(com, j) * log(px(com, i) * py(com, j) + Statistics::epsilon); } } HXY2 *= -1; //calculate HXY float HXY = calcEntropy(com); //calculate information measures 1 result += sqrt(1 - exp(-2.0 * (HXY2 - HXY))); } return result / 4; }