uint Aligner::checkBeginGreedy(const string& read, pair<kmer, uint>& overlap, vector<uNumber>& path, uint errors){ if(overlap.second==0){path.push_back(0);return 0;} string readLeft(read.substr(0,overlap.second)),unitig; auto rangeUnitigs(getEnd(overlap.first)); uint minMiss(errors+1),indiceMinMiss(0); bool ended(false); int offset(0); kmer nextOverlap(0); for(uint i(0); i<rangeUnitigs.size(); ++i){ unitig=(rangeUnitigs[i].first); if(unitig.size()-k+1>=readLeft.size()){ uint miss(missmatchNumber(unitig.substr(unitig.size()-readLeft.size()-k+1,readLeft.size()),readLeft, errors)); // if(miss==0){ // path.push_back(unitig.size()-readLeft.size()-k+1); // path.push_back(rangeUnitigs[i].second); // return 0; // } if(miss<minMiss){ minMiss=miss; indiceMinMiss=i; ended=true; offset=unitig.size()-readLeft.size()-k+1; } }else{ uint miss(missmatchNumber(unitig.substr(0,unitig.size()-k+1), readLeft.substr(readLeft.size()+k-1-unitig.size()), errors)); // if(miss==0){ // minMiss+=mapOnLeftEndGreedy(read, path, {nextOverlap,overlap.second-(nextUnitig.size()-k+1)},errors); // if(minMiss<=errors){ // path.push_back(rangeUnitigs[indiceMinMiss].second); // sucessML++; // } // } if(miss<minMiss){ kmer overlapNum(str2num(unitig.substr(0,k-1))); if(miss<minMiss){ ended=false; minMiss=miss; indiceMinMiss=i; nextOverlap=overlapNum; } } } } if(minMiss<=errors){ if(ended){ path.push_back(offset); path.push_back(rangeUnitigs[indiceMinMiss].second); return minMiss; } minMiss+=mapOnLeftEndGreedy(read, path, {nextOverlap,overlap.second-(rangeUnitigs[indiceMinMiss].first.size()-k+1)},errors-minMiss); if(minMiss<=errors){ path.push_back(rangeUnitigs[indiceMinMiss].second); sucessML++; } } return minMiss; }
uint Aligner::mapOnRightEndGreedy(const string &read, vector<uNumber>& path, const pair<kmer, uint>& overlap , uint errors){ // cout<<"moreg"<<endl; string unitig,readLeft(read.substr(overlap.second)),nextUnitig; // auto rangeUnitigs(getBeginOpti(overlap.first,path.back())); auto rangeUnitigs(getBegin(overlap.first)); uint miniMiss(errors+1), miniMissIndice(9); bool ended(false); // int offset(0); kmer nextOverlap(0); // cout<<"go"<<endl; // for(uint i(0); i<rangeUnitigs2.size(); ++i){ // cout<<(rangeUnitigs2[i].first)<<endl; // } // cout<<"true"<<endl; for(uint i(0); i<rangeUnitigs.size(); ++i){ unitig=(rangeUnitigs[i].first); // bool stop(false); // if(rangeUnitigs[i].first!=rangeUnitigs2[i].first){ // cout<<read<<endl; // cout<<"lol2"<<endl; // cout<<rangeUnitigs[i].first<<endl<<rangeUnitigs2[i].first<<endl; // cout<<rangeUnitigs[i].second<<" "<<rangeUnitigs2[i].second<<endl; // stop=true; // } // if(stop){cin.get();} // cout<<unitig<<endl; // cout<<rangeUnitigs[i].first<<endl; //case the rest of the read is too small if(readLeft.size()<=unitig.size()){ uint miss(missmatchNumber(unitig.substr(0,readLeft.size()), readLeft, errors)); if(miss<miniMiss){ miniMiss=miss; miniMissIndice=i; ended=true; // offset=unitig.size()-readLeft.size()-k+1; } }else{ //case the read is big enough we want to recover a true overlap uint miss(missmatchNumber(unitig, read.substr(overlap.second,unitig.size()), errors)); if(miss<miniMiss){ if(miss<miniMiss){ kmer overlapNum(str2num(unitig.substr(unitig.size()-k+1,k-1))); miniMiss=miss; miniMissIndice=i; nextUnitig=unitig; nextOverlap=overlapNum; } } } } // cout<<"end"<<endl; if(miniMiss<=errors){ path.push_back(rangeUnitigs[miniMissIndice].second); if (ended){return miniMiss;} miniMiss+=mapOnRightEndGreedy(read , path, {nextOverlap,overlap.second+(nextUnitig.size()-k+1)}, errors-miniMiss); } return miniMiss; }
uint Aligner::checkEndExhaustive(const string& read, pair<kmer, uint>& overlap, vector<uNumber>& path, uint errors){ string readLeft(read.substr(overlap.second+k-1)),unitig; vector<uNumber> path2keep; if(readLeft.empty()){ path.push_back(0); return 0; } auto rangeUnitigs(getBegin(overlap.first)); uint minMiss(errors+1),indiceMinMiss(9); bool ended(false); int offset(-2); if(partial & rangeUnitigs.empty()){ //if(!path.empty()){ return 0; //} } for(uint i(0); i<rangeUnitigs.size(); ++i){ unitig=(rangeUnitigs[i].first); if(unitig.size()-k+1>=readLeft.size()){ uint miss(missmatchNumber(unitig.substr(k-1,readLeft.size()),readLeft, errors)); if(miss<minMiss){ minMiss=miss; indiceMinMiss=i; ended=true; offset=readLeft.size()+k-1; } }else{ uint miss(missmatchNumber(unitig.substr(k-1),readLeft.substr(0,unitig.size()-k+1), errors)); if(miss<minMiss){ kmer overlapNum(str2num(unitig.substr(unitig.size()-k+1,k-1))); vector<uNumber> possiblePath; miss+=mapOnRightEndExhaustive(read, possiblePath, {overlapNum,overlap.second+(unitig.size()-k+1)},errors-miss); if(miss<minMiss){ path2keep=possiblePath; minMiss=miss; indiceMinMiss=i; ended=false; } } } } if(minMiss<=errors){ if(ended){ path.push_back(rangeUnitigs[indiceMinMiss].second); path.push_back(offset); }else{ path.push_back(rangeUnitigs[indiceMinMiss].second); path.insert(path.end(), path2keep.begin(),path2keep.end()); } } return minMiss; }
pair<uint,uint> Aligner::mapOnRightCache(const string &read, vector<uNumber>& path, const overlapStruct& overlap, const vector<overlapStruct>& listOverlap, bool& ended,uint start, uint errors){ string unitig, readLeft(read.substr(overlap.pos+k-1)),nextUnitig; if(readLeft.empty()){cout<<"should not appears"<<endl;exit(0);return {start,0};} auto rangeUnitigs(overlap.unitig); uint miniMiss(errors+1),miniMissIndice(9); uint next(start); kmer nextOverlapNum(0); for(uint i(0); i<rangeUnitigs.size(); ++i){ unitig=(rangeUnitigs[i]); //case the rest of the read is too small if(readLeft.size() <= unitig.size()-k+1){ uint miss(missmatchNumber(unitig.substr(k-1,readLeft.size()), readLeft, errors)); if(miss<miniMiss){ ended=true; miniMiss=miss; miniMissIndice=i; } }else{ //case the read is big enough we want to recover a true overlap uint miss(missmatchNumber(unitig.substr(k-1), readLeft.substr(0,unitig.size()-k+1), errors)); if(miss<miniMiss){ kmer overlapNum(str2num(unitig.substr(unitig.size()-k+1,k-1))); if(miss<miniMiss){ ended=false; miniMiss=miss; miniMissIndice=i; nextOverlapNum=overlapNum; nextUnitig=unitig; next=start; for(uint j(start+1); j<listOverlap.size(); ++j){ if(overlapNum==listOverlap[j].seq and listOverlap[j].pos==overlap.pos+unitig.size()-k+1){ next=j; } } } } } } if(ended){ path.push_back(overlap.unitigNumbers[miniMissIndice]); return {start,miniMiss}; } if(miniMiss<=errors){ path.push_back(overlap.unitigNumbers[miniMissIndice]); if(next>start){ return {next,miniMiss}; } auto res(mapOnRightCache(read , path, {nextOverlapNum,overlap.pos+((uint)nextUnitig.size()-k+1)},listOverlap,ended,start, errors-miniMiss)); return {res.first,res.second+miniMiss}; } return {start,errors+1}; }
uint Aligner::checkPairCache(const overlapStruct& overlap1, const overlapStruct& overlap2, const string& read, uNumber& number, uint errorsAllowed){ if(overlap2.pos-overlap1.pos<k){ //TODO maybe it is a bad idea ... return 0; } string unitig,subRead(read.substr(overlap1.pos+k-1,overlap2.pos-overlap1.pos-(k-1))); auto rangeUnitigs1(overlap1.unitig); auto rangeUnitigs2(overlap2.unitig); uint minMissMatch(errorsAllowed+1),indice(0); string unitig1, unitig2; for(uint i(0); i<rangeUnitigs1.size(); ++i){ unitig1=rangeUnitigs1[i]; for(uint j(0); j<rangeUnitigs2.size(); ++j){ unitig2=rangeUnitigs2[j]; if(unitig2==unitig1){ if(unitig1.size()-2*(k-1)==subRead.size()){ uint missmatch(missmatchNumber(unitig1.substr(k-1,subRead.size()), subRead, errorsAllowed)); if(missmatch<minMissMatch){ minMissMatch=missmatch; indice=i; } } } } } if(minMissMatch<=errorsAllowed){number=(overlap1.unitigNumbers[indice]);} return minMissMatch; }
uint Aligner::mapOnLeftEndExhaustive(const string &read, vector<uNumber>& path, const pair<kmer, uint>& overlap , uint errors){ string unitig, readLeft(read.substr(0,overlap.second)); vector<uNumber> path2keep; if(readLeft.size()==0){return 0;} auto rangeUnitigs(getEnd(overlap.first)); uint miniMiss(errors+1),miniMissIndice(9); int offset(-2); bool ended(false); for(uint i(0); i<rangeUnitigs.size(); ++i){ unitig=(rangeUnitigs[i].first); //case the rest of the read is too small if(readLeft.size()+k-1 <= unitig.size()){ uint miss(missmatchNumber(unitig.substr(unitig.size()-readLeft.size()-k+1,readLeft.size()), readLeft, errors)); if(miss<miniMiss){ miniMiss=miss; miniMissIndice=i; offset=unitig.size()-readLeft.size()-k+1; ended=true; } }else{ //case the read is big enough we want to recover a true overlap uint miss(missmatchNumber(unitig.substr(0,unitig.size()-k+1), readLeft.substr(readLeft.size()-(unitig.size()-k+1)), errors)); if(miss<miniMiss){ kmer overlapNum(str2num(unitig.substr(0,k-1))); vector<uNumber> possiblePath; miss+=mapOnLeftEndExhaustive(read , possiblePath, {overlapNum,overlap.second-(unitig.size()-k+1)}, errors-miss); if(miss<miniMiss){ path2keep=possiblePath; miniMiss=miss; miniMissIndice=i; offset=-1; ended=false; } } } } if (miniMiss<=errors){ if(ended){ path.push_back(offset); }else{ path.insert(path.end(), path2keep.begin(),path2keep.end()); } path.push_back(rangeUnitigs[miniMissIndice].second); } return miniMiss; }
uint Aligner::mapOnLeftEndGreedy(const string &read, vector<uNumber>& path, const pair<kmer, uint>& overlap , uint errors){ // if(overlap.second==0){path.push_back(0);return 0;} string unitig,readLeft(read.substr(0,overlap.second)),nextUnitig; auto rangeUnitigs(getEnd(overlap.first)); uint miniMiss(errors+1),miniMissIndice(9); bool ended(false); int offset(0); kmer nextOverlap(0); for(uint i(0); i<rangeUnitigs.size(); ++i){ unitig=(rangeUnitigs[i].first); //case the rest of the read is too small if(readLeft.size()+k-1 <= unitig.size()){ uint miss(missmatchNumber(unitig.substr(unitig.size()-readLeft.size()-k+1,readLeft.size()), readLeft, errors)); if(miss<miniMiss){ miniMiss=miss; miniMissIndice=i; ended=true; offset=unitig.size()-readLeft.size()-k+1; } }else{ //case the read is big enough we want to recover a true overlap uint miss(missmatchNumber(unitig.substr(0,unitig.size()-k+1), readLeft.substr(readLeft.size()-(unitig.size()-k+1)), errors)); if(miss<miniMiss){ kmer overlapNum(str2num(unitig.substr(0,k-1))); if(miss<miniMiss){ ended=false; miniMiss=miss; miniMissIndice=i; nextUnitig=unitig; nextOverlap=overlapNum; } } } } if (miniMiss<=errors){ if(ended){ path.push_back(offset); path.push_back(rangeUnitigs[miniMissIndice].second); return miniMiss; } miniMiss+=mapOnLeftEndGreedy(read , path, {nextOverlap,overlap.second-(nextUnitig.size()-k+1)}, errors-miniMiss); path.push_back(rangeUnitigs[miniMissIndice].second); } return miniMiss; }
uint Aligner::checkEndGreedy(const string& read, pair<kmer, uint>& overlap, vector<uNumber>& path, uint errors){ string readLeft(read.substr(overlap.second+k-1)),unitig,nextUnitig; if(readLeft.size()<=k-1){return 0;} auto rangeUnitigs(getBegin(overlap.first)); uint minMiss(errors+1),indiceMinMiss(9); bool ended(false); kmer nextOverlap(0); for(uint i(0); i<rangeUnitigs.size(); ++i){ unitig=(rangeUnitigs[i].first); if(unitig.size()-k+1>=readLeft.size()){ uint miss(missmatchNumber(unitig.substr(k-1,readLeft.size()),readLeft, errors)); if(miss<minMiss){ minMiss=miss; indiceMinMiss=i; ended=true; } }else{ uint miss(missmatchNumber(unitig.substr(k-1),readLeft.substr(0,unitig.size()-k+1), errors)); if(miss<minMiss){ if(miss<minMiss){ kmer overlapNum(str2num(unitig.substr(unitig.size()-k+1,k-1))); minMiss=miss; indiceMinMiss=i; nextOverlap=overlapNum; nextUnitig=unitig; } } } } if(minMiss<=errors){ path.push_back(rangeUnitigs[indiceMinMiss].second); if(ended){ return minMiss; } minMiss+=mapOnRightEndGreedy(read, path, {nextOverlap,overlap.second+(nextUnitig.size()-k+1)},errors-minMiss); if(minMiss<=errors){ successMR++; } } return minMiss; }
uint Aligner::checkPair(const pair<kmer, uint>& overlap1, const pair<kmer, uint>& overlap2, const string& read, uNumber& number, uint errorsAllowed){ if(overlap2.second-overlap1.second<k){ int32_t positionget1, positionget2; auto rangeUnitigs1(getBegin(overlap1.first)); auto rangeUnitigs2(getEnd(overlap2.first)); for(uint i(0); i<rangeUnitigs1.size(); ++i){ positionget1=rangeUnitigs1[i].second; for(uint j(0); j<rangeUnitigs2.size(); ++j){ positionget2=rangeUnitigs2[j].second; if(positionget2==positionget1){ number=positionget1; return 0; } } } return errorsAllowed+1; } string unitig,subRead(read.substr(overlap1.second+k-1,overlap2.second-overlap1.second-(k-1))); auto rangeUnitigs1(getBegin(overlap1.first)); auto rangeUnitigs2(getEnd(overlap2.first)); uint minMissMatch(errorsAllowed+1),indice(0); int32_t positionget1, positionget2; for(uint i(0); i<rangeUnitigs1.size(); ++i){ positionget1=rangeUnitigs1[i].second; for(uint j(0); j<rangeUnitigs2.size(); ++j){ positionget2=rangeUnitigs2[j].second; if(positionget2==positionget1){ unitig=getUnitig(rangeUnitigs1[i].second); if(unitig.size()-2*(k-1)==subRead.size()){ uint missmatch(missmatchNumber(unitig.substr(k-1,subRead.size()), subRead, errorsAllowed)); if(missmatch<minMissMatch){ minMissMatch=missmatch; indice=i; } } } } } if(minMissMatch<=errorsAllowed){number=(rangeUnitigs1[indice].second);} return minMissMatch; }