void Consolidator::findTruePctErrors( ErrorCalculator &e_obj,int ma_snp_ends, bool holdOut,int window,float ma_threshold, float empirical_ma_threshold ) { for(int i=0;i<person_count;i++) { for(int j=i;j<person_count;j++) { for(int l=0;l<m_trueMatches[i][j].size();l++) { if(m_trueMatches[i][j][l].end==-1) { continue; } //handle moving averages calculation // int t1 = m_trueMatches[ i ][ j ][ l ].start + ( m_trueMatches[ i ][ j ][ l ].end - m_trueMatches[ i ][ j ][ l ].start ) * 0.25; int t2 = m_trueMatches[ i ][ j ][ l ].end - ( m_trueMatches[ i ][ j ][ l ].end - m_trueMatches[ i ][ j ][ l ].start ) * 0.25; //now we have the positions of the first and last 25% of the truly ibd SH //all that's left to do is to pass them into the moving averages function, and obtain the max ma //then store that in a vector, sort them, and find the xth percentile of that vector. That will be //the ma that we use later //for that "finalErrors" parameters, need to get the number of errors along the truly IBD SH first... vector<vector<int> > trueErrors=e_obj.checkErrors( i, j, t1, t2); vector<int>finalTrueErrors=e_obj.getFinalErrors( trueErrors ); //handles MA calculations std::vector<float> av; float current_max; if(empirical_ma_threshold < 0.0){ av = e_obj.getTrueMovingAverages(finalTrueErrors,t1,t2,window); current_max = av[0]; for(int q = 1; q < av.size(); q++){ if(av[q] > current_max){ current_max = av[q]; } } e_obj.addMaxAverage(current_max); } // int temp1 = m_trueMatches[ i ][ j ][ l ].start + ( m_trueMatches[ i ][ j ][ l ].end - m_trueMatches[ i ][ j ][ l ].start ) * 0.15; //Should probably stop doing this int temp2 = m_trueMatches[ i ][ j ][ l ].end - ( m_trueMatches[ i ][ j ][ l ].end - m_trueMatches[ i ][ j ][ l ].start ) * 0.15; int start =0, end =0, fend = ( temp2 -temp1 ) ; //since we are using MOL at this point, this will pick out a random SH from the set of non-truly IBD SH //and use that length to define the region over which we find PIE. Unless you are changing something with MOL, //don't ever read this next block int randPers1, randPers2, pos; randPers1 = std::rand() % person_count; randPers2 = std::rand() % person_count; if( randPers1 > randPers2 ) { randPers1 = randPers1 + randPers2; randPers2 = randPers1 - randPers2; randPers1 = randPers1 - randPers2; } while( m_matches[ randPers1 ][ randPers2 ].size() <= 0 ) { randPers1 = std::rand() % person_count; randPers2 = std::rand() % person_count; if( randPers1 > randPers2 ) { randPers1 = randPers1 + randPers2; randPers2 = randPers1 - randPers2; randPers1 = randPers1 - randPers2; } } pos = std::rand() % m_matches[ randPers1 ][ randPers2 ].size(); int len = m_matches[ randPers1 ][ randPers2 ][ pos ].end - m_matches[ randPers1 ][ randPers2 ][ pos ].start; if( len >= fend || len <= 0) { continue; } temp1 = temp1; temp2 = temp1 + len; //end crazy MOL stuff vector<vector<int> > errors=e_obj.checkErrors( i, j, temp1, temp2); vector<int>finalErrors=e_obj.getFinalErrors( errors ); float per_err = e_obj.getThreshold(finalErrors,temp1,temp2);//overload m_errors.push_back( per_err ); if( holdOut ) { float oppHom = ( e_obj.getOppHomThreshold( i, j, temp1, temp2 ) ) / ( temp2 -temp1 ); m_holdOutErrors.push_back( oppHom ); } } } } vector<float>maxes; float cutoff = empirical_ma_threshold; if(empirical_ma_threshold < 0.0){ maxes = e_obj.getMaxAverages(); std::sort(maxes.begin(),maxes.end()); e_obj.setMaxAverage(maxes); cutoff = e_obj.getXthPercentile(ma_threshold); } e_obj.setCutoff(cutoff);//set the actual threshold to be used when calculating MA in all other SH // std::sort( m_errors.begin(), m_errors.end() ); std::sort( m_holdOutErrors.begin(), m_holdOutErrors.end() ); std::string str = " \n No of elements in error check are: " + NumberToString( m_errors.size() ); str = str + " \n No of elements in hold error check are: " + NumberToString( m_holdOutErrors.size() ); e_obj.log( str ); }
void Consolidator::findTrueSimplePctErrors( ErrorCalculator &e_obj, float PIElength, bool holdOut,int window, float ma_threshold, float empirical_ma_threshold ) { for(int i=0;i<person_count;i++) { for(int j=i;j<person_count;j++) { for(int l=0;l<m_trueMatches[i][j].size();l++) { if(m_trueMatches[i][j][l].end==-1) { continue; } //------------------------------------------------------------------------------------------------- int t1 = m_trueMatches[ i ][ j ][ l ].start + ( m_trueMatches[ i ][ j ][ l ].end - m_trueMatches[ i ][ j ][ l ].start ) * 0.25; int t2 = m_trueMatches[ i ][ j ][ l ].end - ( m_trueMatches[ i ][ j ][ l ].end - m_trueMatches[ i ][ j ][ l ].start ) * 0.25; /*What do these two functions do? Is this necessary for being able to find errors, or is it only useful for MA calculations?*/ vector<vector<int> > trueErrors=e_obj.checkErrors( i, j, t1, t2); vector<int>finalTrueErrors=e_obj.getFinalErrors( trueErrors ); /*x*/ //This section handles finding the maximum moving averages amongst trulyIBD segments std::vector<float> av; float current_max; if(empirical_ma_threshold < 0.0){ av = e_obj.getTrueMovingAverages(finalTrueErrors,t1,t2,window); current_max = av[0]; for(int q = 1; q < av.size(); q++){ if(av[q] > current_max){ current_max = av[q]; } } e_obj.addMaxAverage(current_max); } //------------------------------------------------------------------------------ int temp1 = m_trueMatches[i][j][l].start; int temp2 = m_trueMatches[i][j][l].end; float startCM = e_obj.getCMDistance( temp1 ); float endCM = e_obj.getCMDistance( temp2 ); float mid1CM = startCM + ( endCM - startCM ) / 2 - PIElength / 2; float mid2CM = startCM + ( endCM - startCM ) / 2 + PIElength / 2; while( e_obj.getCMDistance( temp1 ) <= mid1CM || e_obj.getCMDistance( temp2 ) >=mid2CM ) { if( e_obj.getCMDistance( temp1 ) <= mid1CM ) { ++temp1; } if( e_obj.getCMDistance( temp2 ) >=mid2CM ) { --temp2; } } /*Here they are again. */ vector<vector<int> > errors=e_obj.checkErrors( i, j, temp1, temp2); vector<int>finalErrors=e_obj.getFinalErrors( errors ); // float per_err = e_obj.getThreshold(finalErrors,temp1, temp2, 0 ); float per_err = e_obj.getThreshold(finalErrors,temp1,temp2); //overload! m_errors.push_back( per_err ); /*x*/ }//end for(l) }//end for(j) }//end for(i) //this section actually handles the sorting of the max averages, and the setting of the user supplied percentile. vector<float>maxes; float cutoff = empirical_ma_threshold; //assume the user wanted to supply a value. This value will be overwritten shortly if they did not. if(empirical_ma_threshold < 0.0){ maxes = e_obj.getMaxAverages(); std::sort(maxes.begin(),maxes.end()); e_obj.setMaxAverage(maxes); cutoff = e_obj.getXthPercentile(ma_threshold); //<-make that an actual user input value } e_obj.setCutoff(cutoff);//set the actual threshold to be used when calculating MA in all other SH if(empirical_ma_threshold < 0.0){ ma_thresh_str = "User supplied ma-threshold is: " + NumberToString(ma_threshold); emp_ma_thresh_str = "Moving Averages will be tested usign the empirical threshold: " + NumberToString(cutoff); } else { emp_ma_thresh_str = "Moving Averages will be tested usign the empirical threshold: " + NumberToString(cutoff); } //---------------------------------------- std::sort( m_errors.begin(), m_errors.end() ); std::sort( m_holdOutErrors.begin(), m_holdOutErrors.end() ); ibg_str = "No of segments deemed to be IBD for finding empirical error threshold " + NumberToString( m_errors.size() ); }//end ftspe