Ejemplo n.º 1
0
void Consolidator::findTruePctErrors( ErrorCalculator &e_obj,int ma_snp_ends, bool holdOut,int window,float ma_threshold, float empirical_ma_threshold )
{
   for(int i=0;i<person_count;i++)
   {
      for(int j=i;j<person_count;j++)
      {
          for(int l=0;l<m_trueMatches[i][j].size();l++)
          {
              if(m_trueMatches[i][j][l].end==-1)
              {
                  continue;
              }
	      //handle moving averages calculation
	      //
	      int t1 = m_trueMatches[ i ][ j ][ l ].start +
                          ( m_trueMatches[ i ][ j ][ l ].end -
                            m_trueMatches[ i ][ j ][ l ].start ) * 0.25;
              int t2 = m_trueMatches[ i ][ j ][ l ].end -
                           ( m_trueMatches[ i ][ j ][ l ].end -
                            m_trueMatches[ i ][ j ][ l ].start ) * 0.25;		
	      //now we have the positions of the first and last 25% of the truly ibd SH
	      //all that's left to do is to pass them into the moving averages function, and obtain the max ma
	      //then store that in a vector, sort them, and find the xth percentile of that vector. That will be
	      //the ma that we use later
	      //for that "finalErrors" parameters, need to get the number of errors along the truly IBD SH first...
	      vector<vector<int> > trueErrors=e_obj.checkErrors( i, j, t1, t2);
              vector<int>finalTrueErrors=e_obj.getFinalErrors( trueErrors );

	      //handles MA calculations
	      std::vector<float> av;
	      float current_max;
	      if(empirical_ma_threshold < 0.0){
	      av = e_obj.getTrueMovingAverages(finalTrueErrors,t1,t2,window);
              current_max = av[0];
              for(int q = 1; q < av.size(); q++){
                   if(av[q] > current_max){
                           current_max = av[q];
                   }
              }
              e_obj.addMaxAverage(current_max);
	      }
 	      
	      //
              int temp1 = m_trueMatches[ i ][ j ][ l ].start +
                          ( m_trueMatches[ i ][ j ][ l ].end -
                            m_trueMatches[ i ][ j ][ l ].start ) * 0.15; //Should probably stop doing this
              int temp2 = m_trueMatches[ i ][ j ][ l ].end - 
                           ( m_trueMatches[ i ][ j ][ l ].end - 
                            m_trueMatches[ i ][ j ][ l ].start ) * 0.15;
              int start =0, end =0, fend = ( temp2 -temp1 )  ;
               
		  //since we are using MOL at this point, this will pick out a random SH from the set of non-truly IBD SH 
		  //and use that length to define the region over which we find PIE. Unless you are changing something with MOL,
		  //don't ever read this next block
                  int randPers1, randPers2, pos;
                  randPers1 = std::rand() % person_count;
                  randPers2 = std::rand() % person_count;
                  if( randPers1 > randPers2 )
                  {
                     randPers1 = randPers1 + randPers2;
                     randPers2 = randPers1 - randPers2;
                     randPers1 = randPers1 - randPers2;
                  }
                  while( m_matches[ randPers1 ][ randPers2 ].size() <= 0 )
                  {
                    randPers1 = std::rand() % person_count;
                    randPers2 = std::rand() % person_count;
                   if( randPers1 > randPers2 )
                   {
                      randPers1 = randPers1 + randPers2;
                      randPers2 = randPers1 - randPers2;
                      randPers1 = randPers1 - randPers2;
                   }

                  }
                  pos = std::rand() % m_matches[ randPers1 ][ randPers2 ].size();
                  int len = m_matches[ randPers1 ][ randPers2 ][ pos ].end 
                            - m_matches[ randPers1 ][ randPers2 ][ pos ].start;
                  if( len >= fend || len <= 0)
                  {
                      continue;
                  } 
                  temp1 = temp1;
                  temp2 = temp1 + len;
		  //end crazy MOL stuff
                  vector<vector<int> > errors=e_obj.checkErrors( i, j, temp1, temp2);

                  vector<int>finalErrors=e_obj.getFinalErrors( errors );
		  float per_err = e_obj.getThreshold(finalErrors,temp1,temp2);//overload
                  m_errors.push_back( per_err );
                 if( holdOut  )
                 {
                        float oppHom = ( e_obj.getOppHomThreshold( i, j, temp1, temp2 ) ) / ( temp2 -temp1 );
                        m_holdOutErrors.push_back( oppHom );
                 }
          }
       }  
   }
   vector<float>maxes;
   float cutoff = empirical_ma_threshold;
   if(empirical_ma_threshold < 0.0){
   maxes = e_obj.getMaxAverages();
   std::sort(maxes.begin(),maxes.end());
   e_obj.setMaxAverage(maxes);
   cutoff = e_obj.getXthPercentile(ma_threshold); 
   }
   e_obj.setCutoff(cutoff);//set the actual threshold to be used when calculating MA in all other SH
   //
   std::sort( m_errors.begin(), m_errors.end() );
   std::sort( m_holdOutErrors.begin(), m_holdOutErrors.end() );
   std::string str =  " \n No of elements in error check are: "
                      + NumberToString( m_errors.size() );
   str  = str + " \n No of elements in hold  error check are: "
                      + NumberToString( m_holdOutErrors.size() );

        e_obj.log( str );

}
Ejemplo n.º 2
0
//overloaded version
void Consolidator::performTrim(ErrorCalculator& e_obj,int window,
                               int ma_snp_ends, float ma_threshold,
                               int min_snp,float min_cm,
                               float per_err_threshold, string option,
                               float hThreshold, bool holdOut,float empirical_threshold, float empirical_pie_threshold,float cut_value,float cm_cut_value)
{
  int removed1 =0, removed2 = 0, removed3 = 0, removed4 = 0;
  int not_removed = 0;
  int total_count = global_initial;
  bool wrongOption = false;
  if((cut_value >= 0.0) && (cm_cut_value >= 0.0) ){
    cerr << "ERROR: You have specified both a cut value percentage and a cM cut value, but only one is allowed. Please try again." << endl;
    exit(1);
  }
  //8/14/14
  //Adding header for output
  cout << "id1" << "\t" << "id2" << "\t" << "marker_id1" << "\t" << "marker_id2" << "\t" << "snp_length" << "\t" << "cm_length" << "\t" << "start/end" << "\t" << "pie" << "\t" << "ma_max" << "\t" << "random_pie" << "\t" << "random_ma_max" << "\t" << "errors" << "\t" << "moving_averages" << endl;
  for(int i=0;i<person_count;i++)
  {
    for(int j=i;j<person_count;j++)
    {
      for(int l=0;l<m_matches[i][j].size();l++)
      {
        total_count++;
        if(m_matches[i][j][l].end==-1)
        {
          continue; 
        }
        if(i == j){
          continue; //ignore same person matches
        }

        int pers1 = i, pers2 = j;
        //cerr << "For this snp, before applying the cut argument, the length is: " << ((m_matches[i][j][l].end - m_matches[i][j][l].start)) << " SNPs" << endl;
        if((m_matches[i][j][l].end - m_matches[i][j][l].start) < min_snp){
          //reduced argument forces an initial drop due to SNPS
          continue;
        }
        //reduced argument check for mincm
        if(e_obj.isInitialCmDrop(m_matches[i][j][l].start,m_matches[i][j][l].end,min_cm)){
          continue;
        }
        
        int temp1,temp2;
        if(cut_value >= 0.0){
        //remove first and last 25% by default, this can also be a user-specified value
        float new_cut_value = (1 - cut_value) * 0.5; //cut it in half for each side
        temp1 = m_matches[ i ][ j ][ l ].start +
        (int)(( m_matches[ i ][ j ][ l ].end -
        m_matches[ i ][ j ][ l ].start ) * new_cut_value);
        temp2 = m_matches[ i ][ j ][ l ].end -
        (int)(( m_matches[ i ][ j ][ l ].end -
        m_matches[ i ][ j ][ l ].start ) * new_cut_value);
        } else {
          //check that cm arg does not exceed the SH's cM length
        float new_cm_length = e_obj.newCmLength(m_matches[i][j][l].start, m_matches[i][j][l].end,cm_cut_value); //change that cut_value parameter.
        new_cm_length = (new_cm_length / 2.0);
        if(new_cm_length <= 0.0){
          cerr << "Error: The cM length that you specified was larger than one or more of the SH lengths." << endl;
          exit(1);
        }
        float new_cm_start = e_obj.adjustCmLength(m_matches[i][j][l].start, new_cm_length);
        float new_cm_end =  e_obj.adjustCmLength(m_matches[i][j][l].end, ((-1.0) * new_cm_length) );
        if(new_cm_start >= new_cm_end){
          cerr << "ERROR: start value in cM greater than end value. Exiting..." << endl;
          exit(1);
        }
        temp1 = e_obj.snpFinder(new_cm_start,m_matches[i][j][l].start,m_matches[i][j][l].end,0);
        temp2 = e_obj.snpFinder(new_cm_end,m_matches[i][j][l].start,m_matches[i][j][l].end,1);
        }
        //we need to calculate for both this person and a random person.
        int randpers1;
        int randpers2;





        //for now, let's enable randomness by default
        randpers1 = std::rand() % e_obj.getNoOfPersons();
        randpers2 = std::rand() % e_obj.getNoOfPersons();
        if( randpers1 > randpers2 )
        {
          randpers1 = randpers1 + randpers2;
          randpers2 = randpers1 - randpers2;
          randpers1 = randpers1 - randpers2;
        }

        //we now have pers1,2 and randpers1,2

        vector<vector<int> > errors=e_obj.checkErrors(pers1, pers2, temp1, temp2);
        vector<int>finalErrors=e_obj.getFinalErrors(errors);
        vector<vector<int> > random_errors = e_obj.checkErrors(randpers1,randpers2,temp1,temp2);
        vector<int>random_final_errors = e_obj.getFinalErrors(random_errors);
        vector<float>movingAverages;
        vector<float>random_moving_averages;
        float threshold;
  
        movingAverages = e_obj.getMovingAverages(finalErrors,temp1,temp2,window);
        random_moving_averages = e_obj.getMovingAverages(random_final_errors,temp1,temp2,window);
        if(empirical_threshold < 0.0){
         threshold = e_obj.getCutoff(); //this is the empirical average threshold for moving averages
        } else {
          threshold = empirical_threshold;
        }
        
        float per_err = e_obj.getThreshold(finalErrors,temp1,temp2);
        float random_per_err = e_obj.getThreshold(random_final_errors,temp1,temp2);
        //find maximum of moving averages
        int max_pos = 0;
        int rmax_pos = 0;
        float max_ma = 0.0;
        float rmax_ma = 0.0;

        for(int z = 1; z < movingAverages.size();z++){
          if(movingAverages[z] > movingAverages[max_pos]){
            max_pos = z;
          }
        }
        max_ma = movingAverages[max_pos];
        for(int c = 1; c < random_moving_averages.size(); c++){
          if(random_moving_averages[c] > random_moving_averages[rmax_pos]){
            rmax_pos = c;
          }
        }
        rmax_ma = random_moving_averages[rmax_pos];

        if( (option.compare("Error1") == 0 ) || (option.compare("ErrorRandom1") == 0) || (option.compare("Error") == 0) ){
          not_removed++;
          e_obj.errorOutput(i,j,temp1,temp2,min_snp,min_cm,movingAverages,finalErrors,per_err,temp1,temp2,per_err,max_ma,random_per_err,rmax_ma);
          continue;
        }//end error1
      }//l
    }//j
  }//i

  ma_drop_str = "No of matches removed due to length of trimming by moving averages: " + NumberToString( removed2 );
  pie_drop_str = "No of matches removed due to percentage error: " + NumberToString( removed1 );

}//endperftrim()
Ejemplo n.º 3
0
void Consolidator::findTrueSimplePctErrors( ErrorCalculator &e_obj, float PIElength, bool holdOut,int window, float ma_threshold, float empirical_ma_threshold )
{
  for(int i=0;i<person_count;i++)
  {
    for(int j=i;j<person_count;j++)
    {
      for(int l=0;l<m_trueMatches[i][j].size();l++)
      {
        if(m_trueMatches[i][j][l].end==-1)
        {
          continue;
        }
        //-------------------------------------------------------------------------------------------------
        int t1 = m_trueMatches[ i ][ j ][ l ].start +
        ( m_trueMatches[ i ][ j ][ l ].end -
        m_trueMatches[ i ][ j ][ l ].start ) * 0.25;
        int t2 = m_trueMatches[ i ][ j ][ l ].end -
        ( m_trueMatches[ i ][ j ][ l ].end -
        m_trueMatches[ i ][ j ][ l ].start ) * 0.25;

        /*What do these two functions do? Is this necessary for being able to find errors, or 
        is it only useful for MA calculations?*/
        vector<vector<int> > trueErrors=e_obj.checkErrors( i, j, t1, t2);
        vector<int>finalTrueErrors=e_obj.getFinalErrors( trueErrors );
        /*x*/

        //This section handles finding the maximum moving averages amongst trulyIBD segments
        std::vector<float> av;
        float current_max;
        if(empirical_ma_threshold < 0.0){
          av = e_obj.getTrueMovingAverages(finalTrueErrors,t1,t2,window);
          current_max = av[0];
          for(int q = 1; q < av.size(); q++){
            if(av[q] > current_max){
              current_max = av[q];
           }
          }
          e_obj.addMaxAverage(current_max);
        }
        //------------------------------------------------------------------------------
        int temp1 = m_trueMatches[i][j][l].start;
        int temp2 = m_trueMatches[i][j][l].end;
        float startCM = e_obj.getCMDistance( temp1 );
        float endCM = e_obj.getCMDistance( temp2 );
        float mid1CM = startCM + ( endCM - startCM ) / 2 - PIElength / 2;
        float mid2CM = startCM + ( endCM - startCM ) / 2 + PIElength / 2;
        while( e_obj.getCMDistance( temp1 ) <= mid1CM || e_obj.getCMDistance( temp2 ) >=mid2CM )
        {
          if( e_obj.getCMDistance( temp1 ) <= mid1CM )
          {
            ++temp1;
          }
          if( e_obj.getCMDistance( temp2 ) >=mid2CM )
          {
            --temp2;
          }
        }

        /*Here they are again.
        */
        vector<vector<int> > errors=e_obj.checkErrors( i, j, temp1, temp2);

        vector<int>finalErrors=e_obj.getFinalErrors( errors );
        //                  float per_err = e_obj.getThreshold(finalErrors,temp1, temp2, 0 );
        float per_err = e_obj.getThreshold(finalErrors,temp1,temp2); //overload!
        m_errors.push_back( per_err );
        /*x*/


      }//end for(l)
    }//end for(j)
  }//end for(i)

    //this section actually handles the sorting of the max averages, and the setting of the user supplied percentile.
    vector<float>maxes;
    float cutoff = empirical_ma_threshold; //assume the user wanted to supply a value. This value will be overwritten shortly if they did not.
    if(empirical_ma_threshold < 0.0){
      maxes = e_obj.getMaxAverages();
      std::sort(maxes.begin(),maxes.end());
      e_obj.setMaxAverage(maxes);
      cutoff = e_obj.getXthPercentile(ma_threshold); //<-make that an actual user input value
    }

    e_obj.setCutoff(cutoff);//set the actual threshold to be used when calculating MA in all other SH

    if(empirical_ma_threshold < 0.0){
      ma_thresh_str = "User supplied ma-threshold is: " + NumberToString(ma_threshold);
      emp_ma_thresh_str = "Moving Averages will be tested usign the empirical threshold: " + NumberToString(cutoff);
    } else {
      emp_ma_thresh_str = "Moving Averages will be tested usign the empirical threshold: " + NumberToString(cutoff);
  }
  //----------------------------------------
  std::sort( m_errors.begin(), m_errors.end() );
  std::sort( m_holdOutErrors.begin(), m_holdOutErrors.end() );
  ibg_str = "No of segments deemed to be IBD for finding empirical error threshold "
  + NumberToString( m_errors.size() );
}//end ftspe
//overloaded version
void Consolidator::performTrim(ErrorCalculator& e_obj,int window,
                               int ma_snp_ends, float ma_threshold,
                               int min_snp,float min_cm,
                               float per_err_threshold, std::string option,
                               float hThreshold, bool holdOut,float empirical_threshold, float empirical_pie_threshold,int extendSnp)//<piyush> added the param int EXTENDSNP for calculating moving window avg)
{
  int removed1 =0, removed2 = 0, removed3 = 0, removed4 = 0;
  int not_removed = 0;
  int total_count = global_initial;
  bool wrongOption = false;
  float per_err_threshold1;
  if(empirical_pie_threshold >= 0.0){
    per_err_threshold1 = empirical_pie_threshold;
  } else {
    per_err_threshold1 = getPctErrThreshold( per_err_threshold );
  }
  std::stringstream sstr;
  sstr << std::fixed << std::setprecision(10) << per_err_threshold1;
  std::string per_err_value = sstr.str();
  emp_pie_thresh_str = "empirical pie threshold is : " + per_err_value  + " \n";
  float hThreshold1 = 0;
  if( holdOut )
  {
    hThreshold1 = getHoldOutThreshold( hThreshold );
  }
  
  per_err_threshold = per_err_threshold1;
  hThreshold = hThreshold1;

  for(int i=0;i<person_count;i++)
  {
    for(int j=i;j<person_count;j++)
    {
      for(int l=0;l<m_matches[i][j].size();l++)
      {
        total_count++;
        if(m_matches[i][j][l].end==-1)
        {
          continue; 
        }

        int temp1=m_matches[i][j][l].start;
        //cout<<"temp1 start begin= "<<temp1<<endl;

        int temp2=m_matches[i][j][l].end;
        //cout<<"temp2 end begin= "<<temp2<<endl;


if (extendSnp != 0)
{

        /*<piyush1>*/
        if(temp1-extendSnp <0)
                                     {
                                     	temp1=0;
                                     	//cout<<"New value of temp1= "<<temp1<<endl;
                                     }
                                     else
                                     {
                                     	temp1=m_matches[i][j][l].start-extendSnp;
                                     	//cout<<"New value of temp1= "<<temp1<<endl;
                                     }

                                     if(temp2+extendSnp > 4443)// change this constant
                                     {
                                     	temp2=m_matches[i][j][l].end;
                                     	//cout<<"New value of temp2= "<<temp2<<endl;
                                     }
                                     else
                                     {
                                     	temp2=m_matches[i][j][l].end+extendSnp;
                                     	//cout<<"New value of temp2= "<<temp2<<endl;
                                     }


        /*till here*/
                                     /*cout<<"temp1 start after= "<<temp1<<endl;
                                     cout<<"temp2 end after= "<<temp2<<endl;*/


                                     //cout<<"perform trim temp1= "<<temp1<<endl;
                                     //cout<<"perform trim temp2= "<<temp2<<endl;


}

        int pers1 = i, pers2 = j;
        if( option.compare( "ErrorRandom1" ) == 0 || option.compare( "ErrorRandom2" ) == 0 || option.compare( "ErrorRandom3" ) == 0 )
        {
          pers1 = std::rand() % e_obj.getNoOfPersons();
          pers2 = std::rand() % e_obj.getNoOfPersons();
          if( pers1 > pers2 )
          {
            pers1 = pers1 + pers2;
            pers2 = pers1 - pers2;
            pers1 = pers1 - pers2;
          }
        }

        std::vector<std::vector<int> > errors=e_obj.checkErrors(pers1, pers2, temp1, temp2);
        std::vector<int>finalErrors=e_obj.getFinalErrors(errors);//<piyush for errors>

        //cout<<"finalErrors size= "<<finalErrors.size()<<endl;
        /*Inject implied error at start/end of SH here*/
        std::vector<int>::iterator it;
        it = finalErrors.begin(); //go to the start of the vector
        if(finalErrors[0] != 1){
          finalErrors.insert(it,1); //inject an error at position 1, if not already there
        }	
        /*End inject implied error section*/

        std::vector<int>trimPositions;
        std::vector<float>movingAverages;
        float threshold;
        if( (e_obj.isInitialCmDrop(temp1,temp2,min_cm)) || ((temp2-temp1) < min_snp) ){ //initial drop. Don't calculate MA
          trimPositions.push_back(temp1);
          trimPositions.push_back(temp2);
          trimPositions.push_back(1);
        }else{

          movingAverages = e_obj.getMovingAverages(finalErrors,temp1,temp2,window,extendSnp);//<piyush> get moving averages are calculated from this part
          if(empirical_threshold < 0.0){
            threshold = e_obj.getCutoff();
          } else {
            threshold = empirical_threshold;
          }
          trimPositions = e_obj.getTrimPositions(movingAverages,temp1,temp2,threshold,min_cm); 

        }
        //-----------------

        int beforeTrimStart = temp1;	
        int beforeTrimEnd = temp2;
        m_matches[i][j][l].end = temp2 = temp1+trimPositions[1];
        m_matches[i][j][l].start = temp1 = temp1+trimPositions[0];
        int del0 = trimPositions[0];
        int del1 = trimPositions[1];
        float per_err = e_obj.getThreshold(finalErrors,del0,del1,ma_snp_ends);

        //add new weighted option
        /*
         For this new option, we only output SH that are not dropped. So, the output is finalOutput + weighted column.
        */
        if( (option.compare("weightedOutput") == 0) || (option.compare("weightedOutputBP") == 0) ){
          int snp1 = 0, snp2 = 0, hlength = 0;
          float noOfOppHom = 0;
          if( holdOut )
          {
            snp1 = e_obj.getNewSnp( temp1 );
            snp2 = e_obj.getNewSnp( temp2 );
            hlength = snp2 - snp1;
            if( hlength <= 0 )
            {
              hlength = 1;
            }
            noOfOppHom = e_obj.getOppHomThreshold( pers1, pers2, m_matches[i][j][l].start, m_matches[i][j][l].end );
          }
          if( ( (beforeTrimEnd - beforeTrimStart) < min_snp) || ( (trimPositions.size() == 3) && (trimPositions[2] == 1) ) ){ 
            m_matches[i][j][l].start= m_matches[i][j][l].end=-1;
            removed4++;
            continue;
          }
          if( (( temp2-temp1 ) < min_snp) || (trimPositions.size() == 3) ){ //removed2 a tpos.size of 3 indicates trimming due ot cM 
            removed2++;
            m_matches[i][j][l].start= m_matches[i][j][l].end=-1;
            continue;
          }
          if( per_err > per_err_threshold){
            removed1++;
            continue;
          }
          if( holdOut && hThreshold < ( noOfOppHom ) / hlength ){
            removed3++;
            m_matches[i][j][l].start= m_matches[i][j][l].end=-1;
            continue; 
          } //removed3

          not_removed++;
          m_weighted_sh.push_back(Weighted_SH(temp1,temp2,i,j)); //build the vector of SH that passed
          continue;
        }//end weghtedOutput
        /*Add new finalErrorsOutput*/
        if( (option.compare("finalErrorsOutput") == 0) ){
          int snp1 = 0, snp2 = 0, hlength = 0;
          float noOfOppHom = 0;

          if( holdOut )
          {
            snp1 = e_obj.getNewSnp( temp1 );
            snp2 = e_obj.getNewSnp( temp2 );
            hlength = snp2 - snp1;
            if( hlength <= 0 )
            {
              hlength = 1;
            }
            noOfOppHom = e_obj.getOppHomThreshold( pers1, pers2, m_matches[i][j][l].start, m_matches[i][j][l].end );
          }


          if( ( (beforeTrimEnd - beforeTrimStart) < min_snp) || ( (trimPositions.size() == 3) && (trimPositions[2] == 1) ) ){ 
            std::vector<float>movingAverages;
            temp1 = beforeTrimStart;
            temp2 = beforeTrimEnd;
            m_matches[i][j][l].start= m_matches[i][j][l].end=-1;
            removed4++;
            continue;
          }

          if( (( temp2-temp1 ) < min_snp) || (trimPositions.size() == 3) ){ //removed2 a tpos.size of 3 indicates trimming due ot cM
            removed2++;
            m_matches[i][j][l].start= m_matches[i][j][l].end=-1;
            continue;
          }
          if( per_err > per_err_threshold){
            removed1++;
            continue;
          }

          if( holdOut && hThreshold < ( noOfOppHom ) / hlength ){
            removed3++;
            m_matches[i][j][l].start= m_matches[i][j][l].end=-1;
            continue; 
          } //removed3
          not_removed++;
          e_obj.finalErrorsOutput(i,j,temp1,temp2,min_cm,per_err);// <piyush>this is where the final output is written is called at
          continue;
        }//end finalErrorsOutput
        if( (option.compare("FullPlusDropped") == 0) ){
          int snp1 = 0, snp2 = 0, hlength = 0;
          float noOfOppHom = 0;

          if( holdOut )
          {
            snp1 = e_obj.getNewSnp( temp1 );
            snp2 = e_obj.getNewSnp( temp2 );
            hlength = snp2 - snp1;
            if( hlength <= 0 )
            {
              hlength = 1;
            }
            noOfOppHom = e_obj.getOppHomThreshold( pers1, pers2, m_matches[i][j][l].start, m_matches[i][j][l].end );
          }


          if( ( (beforeTrimEnd - beforeTrimStart) < min_snp) || ( (trimPositions.size() == 3) && (trimPositions[2] == 1) ) ){	
            std::vector<float>movingAverages;
            temp1 = beforeTrimStart;
            temp2 = beforeTrimEnd;
            e_obj.fullPlusDroppedOutput(i,j,temp1,temp2,min_snp,min_cm,finalErrors,per_err,1);//standardize the error codes
            m_matches[i][j][l].start= m_matches[i][j][l].end=-1;
            removed4++;
            continue;
          }

          if( (( temp2-temp1 ) < min_snp) || (trimPositions.size() == 3) ){ //removed2 a tpos.size of 3 indicates trimming due ot cM
            e_obj.fullPlusDroppedOutput(i,j,temp1,temp2,min_snp,min_cm,finalErrors,per_err,2); 
            removed2++;
            m_matches[i][j][l].start= m_matches[i][j][l].end=-1;
            continue;
          }
          if( per_err > per_err_threshold){
            e_obj.fullPlusDroppedOutput(i,j,temp1,temp2,min_snp,min_cm,finalErrors,per_err,3);
            removed1++;
            continue;
          }

          if( holdOut && hThreshold < ( noOfOppHom ) / hlength ){
            e_obj.fullPlusDroppedOutput(i,j,temp1,temp2,min_snp,min_cm,finalErrors,per_err,4);
            removed3++;
            m_matches[i][j][l].start= m_matches[i][j][l].end=-1;
            continue; 
          } //removed3
          not_removed++;
          e_obj.finalOutPut(i,j,temp1,temp2,min_cm);
          continue;
        } //end FullPlusDropped

        //Calculate Error1
        if( (option.compare("Error1") == 0 ) || (option.compare("ErrorRandom1") == 0) || (option.compare("Error") == 0) ){

          if( ( (beforeTrimEnd - beforeTrimStart) < min_snp) || ( (trimPositions.size() == 3) && (trimPositions[2] == 1) ) ){ //dropped before trimming
            //don't bother printing out ma for this one. But go back and change it so that it doesn't actually calc it
            std::vector<float>movingAverages;//null	   
            //trying something special in this case. This can be removed once idrops aren't being trimmed
            //test code
            temp1 = beforeTrimStart;
            temp2 = beforeTrimEnd;
            //
            e_obj.errorOutput(i,j,temp1,temp2,min_snp,min_cm,movingAverages,finalErrors,per_err,temp1,temp2,beforeTrimStart,beforeTrimEnd,1);
            m_matches[i][j][l].start= m_matches[i][j][l].end=-1;
            removed4++; //seems ok
            continue;
          } 
          if( (( temp2-temp1 ) < min_snp) || ((trimPositions.size() == 3) && (trimPositions[2] == 2) ) ) //dropped after trimming
          {
            e_obj.errorOutput(i,j,temp1,temp2,min_snp,min_cm,movingAverages,finalErrors,per_err,temp1,temp2,beforeTrimStart,beforeTrimEnd,2);
            ++removed2;
            m_matches[i][j][l].start= m_matches[i][j][l].end=-1;
            continue;
          }
          if( per_err > per_err_threshold ) //dropped due to pie
          {
            e_obj.errorOutput(i,j,temp1,temp2,min_snp,min_cm,movingAverages,finalErrors,per_err,temp1,temp2,beforeTrimStart,beforeTrimEnd,3);
            ++removed1;
            m_matches[i][j][l].start= m_matches[i][j][l].end=-1;
            continue;
          }
          not_removed++;
          e_obj.errorOutput(i,j,temp1,temp2,min_snp,min_cm,movingAverages,finalErrors,per_err,temp1,temp2,beforeTrimStart,beforeTrimEnd,0);//no drop
          continue;
        }//end error1

        int snp1 = 0, snp2 = 0, hlength = 0;
        float noOfOppHom = 0;
        if( holdOut )
        {
          snp1 = e_obj.getNewSnp( temp1 );
          snp2 = e_obj.getNewSnp( temp2 );
          hlength = snp2 - snp1;
          if( hlength <= 0 )
          {
            hlength = 1;
          }
          noOfOppHom = e_obj.getOppHomThreshold( pers1, pers2, m_matches[i][j][l].start, m_matches[i][j][l].end );
        }
        //update drop order 2/26/14
        if( (( temp2-temp1 ) < min_snp) || (trimPositions.size() == 3) )
        {
          ++removed2;
          m_matches[i][j][l].start= m_matches[i][j][l].end=-1;
          continue;
        }

        if( per_err > per_err_threshold )
        {

          ++removed1;
          m_matches[i][j][l].start= m_matches[i][j][l].end=-1;
          continue;
        }
        //probably not removed?
        not_removed++;
        if( option.compare("MovingAverages")==0 ) //make this ma2
        {
          if( holdOut)
          {
            e_obj.middleHoldOutPut(i,j,temp1,temp2, min_snp,min_cm,movingAverages,trimPositions,per_err, noOfOppHom, hlength );
          }  
          else
          {
            e_obj.middleOutPut(i,j,temp1,temp2, min_snp, min_cm,movingAverages, trimPositions,per_err );
          }
          continue;
        }

        if(option.compare("Error2")==0 || option.compare( "ErrorRandom2" ) == 0)
        {
          if( holdOut)
          { 
            e_obj.middleHoldOutPut(i,j,temp1,temp2, min_snp, min_cm, finalErrors, trimPositions, per_err, noOfOppHom, hlength );
          }
          else
          {
            e_obj.middleOutPut(i,j,temp1,temp2, min_snp, min_cm, finalErrors, trimPositions, per_err);
          }
          continue;
        }
        if ( holdOut && hThreshold < ( noOfOppHom ) / hlength )
        {
          ++removed3;
          m_matches[i][j][l].start= m_matches[i][j][l].end=-1;
          continue;
        }
        if( option.compare("Error3")==0 || option.compare( "ErrorRandom3" ) == 0  )
        {
          e_obj.middleHoldOutPut(i,j,temp1,temp2, min_snp, min_cm, finalErrors, trimPositions, per_err, noOfOppHom, hlength );
        }
      }//l
    }//j
  }//i


  /*ENTERING TESTING AREA DEC 4th 2014*/
  /*****************************
  ******************************/


  /*Now, let's handle weighted output if need be*/
  if( option.compare("weightedOutput") == 0 ){
    float snp_average_count = 0.0;
    int start_position;
    int end_position;
    int genome_length;
    if(isUserSuppliedWeights()){ //the user has supplied their own weights.
      //in this case, the min and max values correspond to the number of lines in the input file,
      //since each line represents a snp. So the min is always 0, and the max is always the number of lines-1.
      start_position = 0;
      end_position = user_supplied_snp_weights.size() - 1;
    }else {
      start_position = find_genome_min();
      end_position = find_genome_max();
    }//end else
    genome_length = (end_position - start_position)+1;
    genome_vector.resize(genome_length,0);
    if(isUserSuppliedWeights()){
      for(int i = 0; i < user_supplied_snp_weights.size(); i++){
        update_genome(i,user_supplied_snp_weights[i]);
      }
    }else{
      /*This next for loop adds one to each snp in a SH. Bypass it if the user gives a files of weights*/
      for(int i = 0; i < m_weighted_sh.size(); i++){
        update_genome(m_weighted_sh[i].snp1, m_weighted_sh[i].snp2);
      }
    }
    //this part is next...will probably need to add stuff to that weighted object...
    snp_average_count = average_snp_count();
    for(int i = 0; i < m_weighted_sh.size(); i++){
      m_weighted_sh[i].snp_weight = update_snp_weight(m_weighted_sh[i].snp1, m_weighted_sh[i].snp2);
    }
    for(int i = 0; i < m_weighted_sh.size(); i++){
      m_weighted_sh[i].final_weight = ( snp_average_count / (m_weighted_sh[i].snp_weight));
      e_obj.weightedOutput(m_weighted_sh[i].per1, m_weighted_sh[i].per2, m_weighted_sh[i].snp1, m_weighted_sh[i].snp2, m_weighted_sh[i].final_weight);
    }
  }

  if (option.compare("weightedOutputBP") == 0){

  //begin new test code section here: Dec 4th 2014
  int genome_length = e_obj.getGenomeBPLength();    
  float adjusted_genome_length = genome_length / 1000.0; //L using kbp for now
  int genome_min = e_obj.getMinimumBP(); std::cout<<"genome_min= "<<genome_min<<std::endl;
  int genome_max = e_obj.getMaximumBP(); std::cout<<"genome_max= "<<genome_max<<std::endl;
  int genome_size_snps = (find_genome_max() - find_genome_min())+1; //used for genome_vector
  float wprime_numerator = 0.0;  //This is Ci / L
  float total_sh_length_sum = 0.0;
  float w2prime_denominator = 0.0;

  genome_vector.resize(genome_size_snps,0); //resize and zero out the genome. shit that needs to be snps.

  //update all of the snp counts in the genome. This looks fine.
  for(int i = 0; i < m_weighted_sh.size(); i++){
    update_genome(m_weighted_sh[i].snp1, m_weighted_sh[i].snp2); 
  }

  //calculate the w' numerator by summing up all of the snp counts and dividing by the genome length.
  //WARNING: This can cause wprime_numerator to overflow. Currently using kbp units to avoid this, but
  //this needs to be addressed.
  for(int i = 0; i < genome_vector.size(); i++){
    wprime_numerator += genome_vector[i] / adjusted_genome_length;
  }
  
  //Calculate w' for each SH.
  for(int i = 0; i < m_weighted_sh.size(); i++){
    float wprime_denominator = 0.0;
    m_weighted_sh[i].mbp_length = (e_obj.getSHBPLength(m_weighted_sh[i].snp1, m_weighted_sh[i].snp2)/1000.0);
    wprime_denominator = get_snps_over_range(m_weighted_sh[i].snp1, m_weighted_sh[i].snp2, m_weighted_sh[i].mbp_length);
    m_weighted_sh[i].wprime = wprime_numerator / wprime_denominator;
  }

  //This is the total length of all SH. This can probably overflow as well...ugh.
  for(int i = 0; i < m_weighted_sh.size(); i++){
    total_sh_length_sum += m_weighted_sh[i].mbp_length;
  }
  
  //Calculate the w2prime denominator - this value is a constant
  for(int i = 0; i < m_weighted_sh.size(); i++){
    float temp = m_weighted_sh[i].mbp_length * m_weighted_sh[i].wprime;
    w2prime_denominator += temp / total_sh_length_sum;
  }

  //Calculate and output w2' for each SH
  for(int i = 0; i < m_weighted_sh.size(); i++){
    m_weighted_sh[i].w2prime = (m_weighted_sh[i].wprime) / w2prime_denominator;
    e_obj.weightedOutput(m_weighted_sh[i].per1, m_weighted_sh[i].per2, m_weighted_sh[i].snp1, m_weighted_sh[i].snp2, m_weighted_sh[i].w2prime);
  }
}

  /*End weighted output*/



  /*END TESTING AREA DEC 4th 2014*/
  /*****************************
  ******************************/
  ma_drop_str = "No of matches removed due to length of trimming by moving averages: " + NumberToString( removed2 );
  pie_drop_str = "No of matches removed due to percentage error: " + NumberToString( removed1 );
  if(holdOut){
  //  str = str+ " \n No of matches removed due hold out ped file checking: "+ NumberToString( removed3 );
  }
  //begin log output
  std::string parameter_string_1 = "\n\n**********Parameters used in program**********\n";
  e_obj.log(parameter_string_1);
  e_obj.log(emp_ma_thresh_str); //keep
  e_obj.log(emp_pie_thresh_str);//keep
  parameter_string_1 = "**********************************************\n\n";
  e_obj.log(parameter_string_1);
  std::string total_count_str = "The total number of SH in the input file was: " + NumberToString(total_count);
  e_obj.log(total_count_str);
  e_obj.log(consolidated_str);
  e_obj.log(initial_drop_str);
  //  e_obj.log(ibg_str);
  e_obj.log(ma_drop_str);
  e_obj.log(pie_drop_str);
  final_sh_str = "Total number of SH that were not dropped is: " + NumberToString(not_removed);
  e_obj.log(final_sh_str);
}//end performTrim