void Consolidator::performConsolidation(ErrorCalculator& eCalculator, int gap,int min_snp,float min_cm,int extendSnp) { //cout<<"in consolidator extendsnp"<<extendSnp<<endl; int consolidations = 0, removed = 0; for(int i=0;i<person_count;++i)//for each person { for(int j=i;j<person_count;++j)//compare with each other person { int temp1=-1,temp2=-1; for(int l=0;l<m_matches[i][j].size();++l)//for each match { temp1= m_matches[i][j][l].start;//cout<<"temp1 before = "<<temp1<<endl; temp2= m_matches[i][j][l].end;//cout<<"temp2 before = "<<temp2<<endl; if(temp2==-1||temp1==-1){continue;} for(int k=l+1;k<m_matches[i][j].size();++k) //for each other match { if((m_matches[i][j][k].start-temp2-1)<=gap) { ++consolidations; temp2=m_matches[i][j][k].end; m_matches[i][j][l].end=temp2; m_matches[i][j][k].end=-1; } else break; } //this may be what is causing our initial drops to never show up... if( ( (temp2-temp1)<min_snp) || ( (eCalculator.getCMDistance(temp2)-eCalculator.getCMDistance(temp1))<min_cm) ) { ++removed; // m_matches[i][j][l].end=m_matches[i][j][l].start=-1; } } } } /*std::string str = " \n Number of Consolidations: " + NumberToString( consolidations ); str = str + " \n Number of matches removed due to initial length: " + NumberToString( removed );*/ /*new*/ global_initial = removed; consolidated_str = "Number of Consolidations: " + NumberToString( consolidations ); initial_drop_str = "Number of matches removed due to initial length: " + NumberToString( removed ); /*wen*/ // eCalculator.log( str ); }
void Consolidator::performConsolidation(ErrorCalculator& eCalculator, int gap,int min_snp,float min_cm) { int consolidations = 0, removed = 0; for(int i=0;i<person_count;++i)//for each person { for(int j=i;j<person_count;++j)//compare with each other person { int temp1=-1,temp2=-1; for(int l=0;l<m_matches[i][j].size();++l)//for each match { temp1= m_matches[i][j][l].start; temp2= m_matches[i][j][l].end; if(temp2==-1||temp1==-1){continue;} for(int k=l+1;k<m_matches[i][j].size();++k) //for each other match { if((m_matches[i][j][k].start-temp2-1)<=gap) { ++consolidations; temp2=m_matches[i][j][k].end; m_matches[i][j][l].end=temp2; m_matches[i][j][k].end=-1; } else break; } if( ( (temp2-temp1)<min_snp) || ( (eCalculator.getCMDistance(temp2)-eCalculator.getCMDistance(temp1))<min_cm) ) { ++removed; } } } } /*new*/ global_initial = removed; consolidated_str = "Number of Consolidations: " + NumberToString( consolidations ); initial_drop_str = "Number of matches removed due to initial length: " + NumberToString( removed ); /*wen*/ }
void Consolidator::readMatches(string path,int pers_count, ErrorCalculator& eCalculator, int trueSNP, float trueCM ) { person_count=pers_count; if(pers_count<=0) { std::cerr<<"wrong BSID file, check it, reading ped file failed"<<std::endl; return; } try { person_count=pers_count; m_matches.resize(pers_count+1); m_trueMatches.resize( pers_count + 1 ); for(int i=0;i<pers_count;++i) { m_matches[i].resize(pers_count+1); m_trueMatches[i].resize( pers_count + 1 ); } unsigned int pid[2]; unsigned int sid[2]; unsigned int dif,hom[2]; ifstream file_bmatch(path.c_str(),ios::binary); if( !file_bmatch ) { cerr<<"unable to open the bmatch file, exiting the program" << endl; exit( -1 ); } while ( !file_bmatch.eof()) { pid[0] = -1; file_bmatch.read( (char*) &pid[0] , sizeof( unsigned int ) ); if ( pid[0] == -1 ) continue; file_bmatch.read( (char*) &pid[1] , sizeof( unsigned int ) ); file_bmatch.read( (char*) &sid[0] , sizeof( unsigned int ) ); file_bmatch.read( (char*) &sid[1] , sizeof( unsigned int ) ); file_bmatch.read( (char*) &dif , sizeof( int ) ); file_bmatch.read( (char*) &hom[0] , sizeof( bool ) ); file_bmatch.read( (char*) &hom[1] , sizeof( bool ) ); if(pid[0]>=pers_count||pid[1]>=pers_count) { cerr<<"problem with bsid file, check it please"<<endl; return; } SNP snp; snp.start=sid[0]; snp.end=sid[1]; if(pid[0]<=pid[1]) m_matches[(pid[0])][(pid[1])].push_back(snp); else m_matches[(pid[1])][(pid[0])].push_back(snp); if( ( eCalculator.getCMDistance( sid[ 1 ] ) - eCalculator.getCMDistance( sid[ 0 ] ) ) >= trueCM && ( sid[ 1 ] - sid[ 0 ] ) >= trueSNP && pid[0] != pid[1] ) { if(pid[0]<=pid[1]) m_trueMatches[(pid[0])][(pid[1])].push_back(snp); else m_trueMatches[(pid[1])][(pid[0])].push_back(snp); } } file_bmatch.close(); } catch(exception &e) { cerr<<"Error:"<<e.what()<<endl; exit( -1 ); } }
void Consolidator::findTrueSimplePctErrors( ErrorCalculator &e_obj, float PIElength, bool holdOut,int window, float ma_threshold, float empirical_ma_threshold ) { for(int i=0;i<person_count;i++) { for(int j=i;j<person_count;j++) { for(int l=0;l<m_trueMatches[i][j].size();l++) { if(m_trueMatches[i][j][l].end==-1) { continue; } //------------------------------------------------------------------------------------------------- int t1 = m_trueMatches[ i ][ j ][ l ].start + ( m_trueMatches[ i ][ j ][ l ].end - m_trueMatches[ i ][ j ][ l ].start ) * 0.25; int t2 = m_trueMatches[ i ][ j ][ l ].end - ( m_trueMatches[ i ][ j ][ l ].end - m_trueMatches[ i ][ j ][ l ].start ) * 0.25; /*What do these two functions do? Is this necessary for being able to find errors, or is it only useful for MA calculations?*/ vector<vector<int> > trueErrors=e_obj.checkErrors( i, j, t1, t2); vector<int>finalTrueErrors=e_obj.getFinalErrors( trueErrors ); /*x*/ //This section handles finding the maximum moving averages amongst trulyIBD segments std::vector<float> av; float current_max; if(empirical_ma_threshold < 0.0){ av = e_obj.getTrueMovingAverages(finalTrueErrors,t1,t2,window); current_max = av[0]; for(int q = 1; q < av.size(); q++){ if(av[q] > current_max){ current_max = av[q]; } } e_obj.addMaxAverage(current_max); } //------------------------------------------------------------------------------ int temp1 = m_trueMatches[i][j][l].start; int temp2 = m_trueMatches[i][j][l].end; float startCM = e_obj.getCMDistance( temp1 ); float endCM = e_obj.getCMDistance( temp2 ); float mid1CM = startCM + ( endCM - startCM ) / 2 - PIElength / 2; float mid2CM = startCM + ( endCM - startCM ) / 2 + PIElength / 2; while( e_obj.getCMDistance( temp1 ) <= mid1CM || e_obj.getCMDistance( temp2 ) >=mid2CM ) { if( e_obj.getCMDistance( temp1 ) <= mid1CM ) { ++temp1; } if( e_obj.getCMDistance( temp2 ) >=mid2CM ) { --temp2; } } /*Here they are again. */ vector<vector<int> > errors=e_obj.checkErrors( i, j, temp1, temp2); vector<int>finalErrors=e_obj.getFinalErrors( errors ); // float per_err = e_obj.getThreshold(finalErrors,temp1, temp2, 0 ); float per_err = e_obj.getThreshold(finalErrors,temp1,temp2); //overload! m_errors.push_back( per_err ); /*x*/ }//end for(l) }//end for(j) }//end for(i) //this section actually handles the sorting of the max averages, and the setting of the user supplied percentile. vector<float>maxes; float cutoff = empirical_ma_threshold; //assume the user wanted to supply a value. This value will be overwritten shortly if they did not. if(empirical_ma_threshold < 0.0){ maxes = e_obj.getMaxAverages(); std::sort(maxes.begin(),maxes.end()); e_obj.setMaxAverage(maxes); cutoff = e_obj.getXthPercentile(ma_threshold); //<-make that an actual user input value } e_obj.setCutoff(cutoff);//set the actual threshold to be used when calculating MA in all other SH if(empirical_ma_threshold < 0.0){ ma_thresh_str = "User supplied ma-threshold is: " + NumberToString(ma_threshold); emp_ma_thresh_str = "Moving Averages will be tested usign the empirical threshold: " + NumberToString(cutoff); } else { emp_ma_thresh_str = "Moving Averages will be tested usign the empirical threshold: " + NumberToString(cutoff); } //---------------------------------------- std::sort( m_errors.begin(), m_errors.end() ); std::sort( m_holdOutErrors.begin(), m_holdOutErrors.end() ); ibg_str = "No of segments deemed to be IBD for finding empirical error threshold " + NumberToString( m_errors.size() ); }//end ftspe
void Consolidator::readMatches(std::string path,int pers_count, ErrorCalculator& eCalculator, int trueSNP, float trueCM, int snipExtend,std::string pedFile )//path->BMATCHFILE file { person_count=pers_count; if(pers_count<=0) { std::cerr<<"wrong BSID file, check it, reading ped file failed"<<std::endl; return; } try { person_count=pers_count; m_matches.resize(pers_count+1); m_trueMatches.resize( pers_count + 1 ); for(int i=0;i<pers_count;++i) { m_matches[i].resize(pers_count+1); m_trueMatches[i].resize( pers_count + 1 ); } unsigned int pid[2]; unsigned int sid[2]; unsigned int dif,hom[2]; std::ifstream file_bmatch(path.c_str(),std::ios::binary); if( !file_bmatch ) { std::cerr<<"unable to open the bmatch file, exiting the program" << std::endl; std::cout<<"we found the bmatch file"<<std::endl; exit( -1 ); } //unsigned long long counttt=0; while ( !file_bmatch.eof()) { //counttt++; pid[0] = -1; file_bmatch.read( (char*) &pid[0] , sizeof( unsigned int ) ); if ( pid[0] == -1 ) continue; file_bmatch.read( (char*) &pid[1] , sizeof( unsigned int ) ); file_bmatch.read( (char*) &sid[0] , sizeof( unsigned int ) ); file_bmatch.read( (char*) &sid[1] , sizeof( unsigned int ) ); file_bmatch.read( (char*) &dif , sizeof( int ) ); file_bmatch.read( (char*) &hom[0] , sizeof( bool ) ); file_bmatch.read( (char*) &hom[1] , sizeof( bool ) ); if(pid[0]>=pers_count||pid[1]>=pers_count) { std::cerr<<"problem with bsid file, check it please"<<std::endl; return; } SNP_lrf snp; snp.start=sid[0]; snp.end=sid[1]; // std::cout<<" pid[0]= "<<pid[0]; std::cout<<"\tpid[1]= "<<pid[1]; std::cout<<"\tsid[0]= "<<sid[0]; std::cout<<"\tsid[1]= "<<sid[1]; std::cout<<"\tdif= "<<dif; std::cout<<"\thom[0]="<<hom[0]; std::cout<<"\thom[1]= "<<hom[1]; std::cout<<"\tsnp.start = "<<sid[0]; std::cout<<"\tsnp.end=" <<sid[1]; std::cout<<std::endl; // if(pid[0]<=pid[1]) m_matches[(pid[0])][(pid[1])].push_back(snp); else m_matches[(pid[1])][(pid[0])].push_back(snp); if( ( eCalculator.getCMDistance( sid[ 1 ] ) - eCalculator.getCMDistance( sid[ 0 ] ) ) >= trueCM && ( sid[ 1 ] - sid[ 0 ] ) >= trueSNP && pid[0] != pid[1] ) { if(pid[0]<=pid[1]) m_trueMatches[(pid[0])][(pid[1])].push_back(snp); else m_trueMatches[(pid[1])][(pid[0])].push_back(snp); } } // std::cout<<m_matches.size()<<std::endl; file_bmatch.close(); } catch(std::exception &e) { std::cerr<<"Error:"<<e.what()<<std::endl; exit( -1 ); } }