void Consolidator::performConsolidation(ErrorCalculator& eCalculator, int gap,int min_snp,float min_cm,int extendSnp)
{
	//cout<<"in consolidator extendsnp"<<extendSnp<<endl;
         int consolidations = 0, removed = 0;
for(int i=0;i<person_count;++i)//for each person
        {
                for(int j=i;j<person_count;++j)//compare with each other person
                {
                        int temp1=-1,temp2=-1;
                         for(int l=0;l<m_matches[i][j].size();++l)//for each match
                         {
                               temp1= m_matches[i][j][l].start;//cout<<"temp1 before = "<<temp1<<endl;
                               temp2= m_matches[i][j][l].end;//cout<<"temp2 before = "<<temp2<<endl;

                                if(temp2==-1||temp1==-1){continue;}

                                for(int k=l+1;k<m_matches[i][j].size();++k) //for each other match
                                {
                                    if((m_matches[i][j][k].start-temp2-1)<=gap)
                                    {
                                        ++consolidations;
                                        temp2=m_matches[i][j][k].end;
                                        m_matches[i][j][l].end=temp2;
                                        m_matches[i][j][k].end=-1;
                                    }
                                    else break;

                               }
     //this may be what is causing our initial drops to never show up...
     
                               if( ( (temp2-temp1)<min_snp) || ( (eCalculator.getCMDistance(temp2)-eCalculator.getCMDistance(temp1))<min_cm) )
                              {
      
                                       ++removed;
                          //             m_matches[i][j][l].end=m_matches[i][j][l].start=-1;
                              }
}
}
}
        /*std::string str = " \n Number of Consolidations: " +
                            NumberToString( consolidations );
        str =  str + " \n Number of matches removed due to initial length: "
                     +  NumberToString( removed );*/
        /*new*/
global_initial = removed;
        consolidated_str = "Number of Consolidations: " + NumberToString( consolidations );
        initial_drop_str = "Number of matches removed due to initial length: " +  NumberToString( removed );
        /*wen*/
       // eCalculator.log( str );
       
}
Ejemplo n.º 2
0
void Consolidator::performConsolidation(ErrorCalculator& eCalculator, int gap,int min_snp,float min_cm)
{
         int consolidations = 0, removed = 0;
	 for(int i=0;i<person_count;++i)//for each person
        {
                for(int j=i;j<person_count;++j)//compare with each other person
                {
                        int temp1=-1,temp2=-1;
                         for(int l=0;l<m_matches[i][j].size();++l)//for each match
                         {

                               temp1= m_matches[i][j][l].start;

                                temp2= m_matches[i][j][l].end;

        
                                if(temp2==-1||temp1==-1){continue;}

                                for(int k=l+1;k<m_matches[i][j].size();++k) //for each other match
                                {
                                    if((m_matches[i][j][k].start-temp2-1)<=gap)
                                    {
                                        ++consolidations;
                                        temp2=m_matches[i][j][k].end;
                                        m_matches[i][j][l].end=temp2;
                                        m_matches[i][j][k].end=-1;
                                    }
                                    else break;

                               }
			      
                               if( ( (temp2-temp1)<min_snp) || ( (eCalculator.getCMDistance(temp2)-eCalculator.getCMDistance(temp1))<min_cm) )
                              {
				       
                                       ++removed;
                              }
			}
		}
	}
        /*new*/
	      global_initial = removed;
        consolidated_str = "Number of Consolidations: " + NumberToString( consolidations );
        initial_drop_str = "Number of matches removed due to initial length: " +  NumberToString( removed );
        /*wen*/
       
       
}
Ejemplo n.º 3
0
void Consolidator::readMatches(string path,int pers_count, ErrorCalculator& eCalculator, int trueSNP, float trueCM )
{
     person_count=pers_count;
     if(pers_count<=0)
     {
       std::cerr<<"wrong BSID file, check it, reading ped file failed"<<std::endl;
       return;
     }
     try
     {
        person_count=pers_count;
        m_matches.resize(pers_count+1);
        m_trueMatches.resize( pers_count + 1 );
        for(int i=0;i<pers_count;++i)
        {
             m_matches[i].resize(pers_count+1);
             m_trueMatches[i].resize( pers_count + 1 );
        }
        unsigned int pid[2];
        unsigned int sid[2];
        unsigned int dif,hom[2];
        ifstream file_bmatch(path.c_str(),ios::binary);
        if( !file_bmatch )
         {
             cerr<<"unable to open the bmatch file, exiting the program" << endl;
             exit( -1 );

         }
   
        while ( !file_bmatch.eof())
        {
		pid[0] = -1;
                file_bmatch.read( (char*) &pid[0] , sizeof( unsigned int ) );
                if ( pid[0] == -1 ) continue;
                file_bmatch.read( (char*) &pid[1] , sizeof( unsigned int ) );
                file_bmatch.read( (char*) &sid[0] , sizeof( unsigned int ) );
                file_bmatch.read( (char*) &sid[1] , sizeof( unsigned int ) );
                file_bmatch.read( (char*) &dif , sizeof( int ) );
                file_bmatch.read( (char*) &hom[0] , sizeof( bool ) );
                file_bmatch.read( (char*) &hom[1] , sizeof( bool ) );
                 if(pid[0]>=pers_count||pid[1]>=pers_count)
                 {
                      cerr<<"problem with bsid file, check it please"<<endl;
                      return;

                 }
                 SNP snp;
                 snp.start=sid[0];
                 snp.end=sid[1];

                 if(pid[0]<=pid[1])
                       m_matches[(pid[0])][(pid[1])].push_back(snp);
                 else  
                       m_matches[(pid[1])][(pid[0])].push_back(snp);
                 if( ( eCalculator.getCMDistance( sid[ 1 ] ) - 
                                eCalculator.getCMDistance( sid[ 0 ] ) ) >= trueCM && 
                                 ( sid[ 1 ] - sid[ 0 ] ) >= trueSNP &&  pid[0] != pid[1] )
                 {
                     if(pid[0]<=pid[1])
                       m_trueMatches[(pid[0])][(pid[1])].push_back(snp);
                     else
                       m_trueMatches[(pid[1])][(pid[0])].push_back(snp);
   
                 }	

        }
        file_bmatch.close();
    }
    catch(exception &e)
    {
       cerr<<"Error:"<<e.what()<<endl;
       exit( -1 );
    }
        
}
Ejemplo n.º 4
0
void Consolidator::findTrueSimplePctErrors( ErrorCalculator &e_obj, float PIElength, bool holdOut,int window, float ma_threshold, float empirical_ma_threshold )
{
  for(int i=0;i<person_count;i++)
  {
    for(int j=i;j<person_count;j++)
    {
      for(int l=0;l<m_trueMatches[i][j].size();l++)
      {
        if(m_trueMatches[i][j][l].end==-1)
        {
          continue;
        }
        //-------------------------------------------------------------------------------------------------
        int t1 = m_trueMatches[ i ][ j ][ l ].start +
        ( m_trueMatches[ i ][ j ][ l ].end -
        m_trueMatches[ i ][ j ][ l ].start ) * 0.25;
        int t2 = m_trueMatches[ i ][ j ][ l ].end -
        ( m_trueMatches[ i ][ j ][ l ].end -
        m_trueMatches[ i ][ j ][ l ].start ) * 0.25;

        /*What do these two functions do? Is this necessary for being able to find errors, or 
        is it only useful for MA calculations?*/
        vector<vector<int> > trueErrors=e_obj.checkErrors( i, j, t1, t2);
        vector<int>finalTrueErrors=e_obj.getFinalErrors( trueErrors );
        /*x*/

        //This section handles finding the maximum moving averages amongst trulyIBD segments
        std::vector<float> av;
        float current_max;
        if(empirical_ma_threshold < 0.0){
          av = e_obj.getTrueMovingAverages(finalTrueErrors,t1,t2,window);
          current_max = av[0];
          for(int q = 1; q < av.size(); q++){
            if(av[q] > current_max){
              current_max = av[q];
           }
          }
          e_obj.addMaxAverage(current_max);
        }
        //------------------------------------------------------------------------------
        int temp1 = m_trueMatches[i][j][l].start;
        int temp2 = m_trueMatches[i][j][l].end;
        float startCM = e_obj.getCMDistance( temp1 );
        float endCM = e_obj.getCMDistance( temp2 );
        float mid1CM = startCM + ( endCM - startCM ) / 2 - PIElength / 2;
        float mid2CM = startCM + ( endCM - startCM ) / 2 + PIElength / 2;
        while( e_obj.getCMDistance( temp1 ) <= mid1CM || e_obj.getCMDistance( temp2 ) >=mid2CM )
        {
          if( e_obj.getCMDistance( temp1 ) <= mid1CM )
          {
            ++temp1;
          }
          if( e_obj.getCMDistance( temp2 ) >=mid2CM )
          {
            --temp2;
          }
        }

        /*Here they are again.
        */
        vector<vector<int> > errors=e_obj.checkErrors( i, j, temp1, temp2);

        vector<int>finalErrors=e_obj.getFinalErrors( errors );
        //                  float per_err = e_obj.getThreshold(finalErrors,temp1, temp2, 0 );
        float per_err = e_obj.getThreshold(finalErrors,temp1,temp2); //overload!
        m_errors.push_back( per_err );
        /*x*/


      }//end for(l)
    }//end for(j)
  }//end for(i)

    //this section actually handles the sorting of the max averages, and the setting of the user supplied percentile.
    vector<float>maxes;
    float cutoff = empirical_ma_threshold; //assume the user wanted to supply a value. This value will be overwritten shortly if they did not.
    if(empirical_ma_threshold < 0.0){
      maxes = e_obj.getMaxAverages();
      std::sort(maxes.begin(),maxes.end());
      e_obj.setMaxAverage(maxes);
      cutoff = e_obj.getXthPercentile(ma_threshold); //<-make that an actual user input value
    }

    e_obj.setCutoff(cutoff);//set the actual threshold to be used when calculating MA in all other SH

    if(empirical_ma_threshold < 0.0){
      ma_thresh_str = "User supplied ma-threshold is: " + NumberToString(ma_threshold);
      emp_ma_thresh_str = "Moving Averages will be tested usign the empirical threshold: " + NumberToString(cutoff);
    } else {
      emp_ma_thresh_str = "Moving Averages will be tested usign the empirical threshold: " + NumberToString(cutoff);
  }
  //----------------------------------------
  std::sort( m_errors.begin(), m_errors.end() );
  std::sort( m_holdOutErrors.begin(), m_holdOutErrors.end() );
  ibg_str = "No of segments deemed to be IBD for finding empirical error threshold "
  + NumberToString( m_errors.size() );
}//end ftspe
void Consolidator::readMatches(std::string path,int pers_count, ErrorCalculator& eCalculator, int trueSNP, float trueCM, int snipExtend,std::string pedFile )//path->BMATCHFILE file
{


     person_count=pers_count;
     if(pers_count<=0)
     {
       std::cerr<<"wrong BSID file, check it, reading ped file failed"<<std::endl;
       return;
     }
     try
     {
        person_count=pers_count;
        m_matches.resize(pers_count+1);
        m_trueMatches.resize( pers_count + 1 );
        for(int i=0;i<pers_count;++i)
        {
             m_matches[i].resize(pers_count+1);
             m_trueMatches[i].resize( pers_count + 1 );
        }
        unsigned int pid[2];
        unsigned int sid[2];
        unsigned int dif,hom[2];
        std::ifstream file_bmatch(path.c_str(),std::ios::binary);
        if( !file_bmatch )
         {
        	std::cerr<<"unable to open the bmatch file, exiting the program" << std::endl;
        	std::cout<<"we found the bmatch file"<<std::endl;
             exit( -1 );

         }


//unsigned long long counttt=0;
        while ( !file_bmatch.eof())
        {
        	//counttt++;

        		pid[0] = -1;
                file_bmatch.read( (char*) &pid[0] , sizeof( unsigned int ) );
                if ( pid[0] == -1 ) continue;
                file_bmatch.read( (char*) &pid[1] , sizeof( unsigned int ) );
                file_bmatch.read( (char*) &sid[0] , sizeof( unsigned int ) );
                file_bmatch.read( (char*) &sid[1] , sizeof( unsigned int ) );
                file_bmatch.read( (char*) &dif , sizeof( int ) );
                file_bmatch.read( (char*) &hom[0] , sizeof( bool ) );
                file_bmatch.read( (char*) &hom[1] , sizeof( bool ) );
                 if(pid[0]>=pers_count||pid[1]>=pers_count)
                 {
                	 std::cerr<<"problem with bsid file, check it please"<<std::endl;
                      return;

                 }
                 SNP_lrf snp;


                 snp.start=sid[0];
                 snp.end=sid[1];

//

                 std::cout<<" pid[0]= "<<pid[0];
                 std::cout<<"\tpid[1]= "<<pid[1];
                 std::cout<<"\tsid[0]= "<<sid[0];
                 std::cout<<"\tsid[1]= "<<sid[1];
                 std::cout<<"\tdif= "<<dif;
                 std::cout<<"\thom[0]="<<hom[0];
                 std::cout<<"\thom[1]= "<<hom[1];
                 std::cout<<"\tsnp.start = "<<sid[0];
                 std::cout<<"\tsnp.end=" <<sid[1];
                 std::cout<<std::endl;
//


                 if(pid[0]<=pid[1])
                       m_matches[(pid[0])][(pid[1])].push_back(snp);
                 else  
                       m_matches[(pid[1])][(pid[0])].push_back(snp);
                 if( ( eCalculator.getCMDistance( sid[ 1 ] ) - 
                                eCalculator.getCMDistance( sid[ 0 ] ) ) >= trueCM && 
                                 ( sid[ 1 ] - sid[ 0 ] ) >= trueSNP &&  pid[0] != pid[1] )
                 {
                     if(pid[0]<=pid[1])
                       m_trueMatches[(pid[0])][(pid[1])].push_back(snp);
                     else
                       m_trueMatches[(pid[1])][(pid[0])].push_back(snp);
   
                 }	

        }
   // std::cout<<m_matches.size()<<std::endl;
        file_bmatch.close();
    }
    catch(std::exception &e)
    {
    	std::cerr<<"Error:"<<e.what()<<std::endl;
       exit( -1 );
    }
        
}