void genomeGenerate(Parameters *P) { //check parameters if (P->sjdbOverhang<=0 && (P->sjdbFileChrStartEnd.at(0)!="-" || P->sjdbGTFfile!="-")) { ostringstream errOut; errOut << "EXITING because of FATAL INPUT PARAMETER ERROR: for generating genome with annotations (--sjdbFileChrStartEnd or --sjdbGTFfile options)\n"; errOut << "you need to specify >0 --sjdbOverhang\n"; errOut << "SOLUTION: re-run genome generation specifying non-zero --sjdbOverhang, which ideally should be equal to OneMateLength-1, or could be chosen generically as ~100\n"; exitWithError(errOut.str(),std::cerr, P->inOut->logMain, EXIT_CODE_INPUT_FILES, *P); } if (P->sjdbFileChrStartEnd.at(0)=="-" && P->sjdbGTFfile=="-") { if (P->parArray.at(P->sjdbOverhang_par)->inputLevel>0 && P->sjdbOverhang>0) { ostringstream errOut; errOut << "EXITING because of FATAL INPUT PARAMETER ERROR: when generating genome without annotations (--sjdbFileChrStartEnd or --sjdbGTFfile options)\n"; errOut << "do not specify >0 --sjdbOverhang\n"; errOut << "SOLUTION: re-run genome generation without --sjdbOverhang option\n"; exitWithError(errOut.str(),std::cerr, P->inOut->logMain, EXIT_CODE_INPUT_FILES, *P); }; P->sjdbOverhang=0; }; //time time_t rawTime; string timeString; time(&rawTime); P->inOut->logMain << timeMonthDayTime(rawTime) <<" ... Starting to generate Genome files\n" <<flush; *P->inOut->logStdOut << timeMonthDayTime(rawTime) <<" ... Starting to generate Genome files\n" <<flush; //define some parameters from input parameters P->genomeChrBinNbases=1LLU << P->genomeChrBinNbits; //write genome parameters file genomeParametersWrite(P->genomeDir+("/genomeParameters.txt"), P, "ERROR_00102"); char *G=NULL, *G1=NULL; uint nGenomeReal=genomeScanFastaFiles(P,G,false);//first scan the fasta file to find all the sizes P->chrBinFill(); uint L=10000;//maximum length of genome suffix uint nG1alloc=(nGenomeReal + L)*2; G1=new char[nG1alloc]; G=G1+L; memset(G1,GENOME_spacingChar,nG1alloc);//initialize to K-1 all bytes genomeScanFastaFiles(P,G,true); //load the genome sequence uint N = nGenomeReal; P->nGenome=N; uint N2 = N*2; ofstream chrN,chrS,chrL,chrNL; ofstrOpen(P->genomeDir+"/chrName.txt","ERROR_00103", P, chrN); ofstrOpen(P->genomeDir+"/chrStart.txt","ERROR_00103", P, chrS); ofstrOpen(P->genomeDir+"/chrLength.txt","ERROR_00103", P, chrL); ofstrOpen(P->genomeDir+"/chrNameLength.txt","ERROR_00103", P, chrNL); for (uint ii=0;ii<P->nChrReal;ii++) {//output names, starts, lengths chrN<<P->chrName[ii]<<"\n"; chrS<<P->chrStart[ii]<<"\n"; chrL<<P->chrLength.at(ii)<<"\n"; chrNL<<P->chrName[ii]<<"\t"<<P->chrLength.at(ii)<<"\n"; }; chrS<<P->chrStart[P->nChrReal]<<"\n";//size of the genome chrN.close();chrL.close();chrS.close(); chrNL.close(); if (P->limitGenomeGenerateRAM < (nG1alloc+nG1alloc/3)) {//allocate nG1alloc/3 for SA generation ostringstream errOut; errOut <<"EXITING because of FATAL PARAMETER ERROR: limitGenomeGenerateRAM="<< (P->limitGenomeGenerateRAM) <<"is too small for your genome\n"; errOut <<"SOLUTION: please specify limitGenomeGenerateRAM not less than"<< nG1alloc+nG1alloc/3 <<" and make that much RAM available \n"; exitWithError(errOut.str(),std::cerr, P->inOut->logMain, EXIT_CODE_INPUT_FILES, *P); }; //preparing to generate SA for (uint ii=0;ii<N;ii++) {//- strand G[N2-1-ii]=G[ii]<4 ? 3-G[ii] : G[ii]; }; P->nSA=0; for (uint ii=0;ii<N2;ii+=P->genomeSAsparseD) { if (G[ii]<4) { P->nSA++; }; }; P->GstrandBit = (uint) floor(log(N)/log(2))+1; if (P->GstrandBit<32) P->GstrandBit=32; //TODO: use simple access function for SA P->GstrandMask = ~(1LLU<<P->GstrandBit); PackedArray SA1;//SA without sjdb SA1.defineBits(P->GstrandBit+1,P->nSA); PackedArray SA2;//SA with sjdb, reserve more space if (P->sjdbInsert.yes) {//reserve space for junction insertion SA2.defineBits(P->GstrandBit+1,P->nSA+2*P->limitSjdbInsertNsj*P->sjdbLength);//TODO: this allocation is wasteful, get a better estimate of the number of junctions } else {//same as SA1 SA2.defineBits(P->GstrandBit+1,P->nSA); }; P->nSAbyte=SA2.lengthByte; P->inOut->logMain << "Number of SA indices: "<< P->nSA << "\n"<<flush; //sort SA time ( &rawTime ); P->inOut->logMain << timeMonthDayTime(rawTime) <<" ... starting to sort Suffix Array. This may take a long time...\n" <<flush; *P->inOut->logStdOut << timeMonthDayTime(rawTime) <<" ... starting to sort Suffix Array. This may take a long time...\n" <<flush; // if (false) {//sort SA chunks for (uint ii=0;ii<N;ii++) {//re-fill the array backwards for sorting swap(G[N2-1-ii],G[ii]); }; globalG=G; globalL=L/sizeof(uint); //count the number of indices with 4nt prefix uint indPrefN=1LLU << 16; uint* indPrefCount = new uint [indPrefN]; memset(indPrefCount,0,indPrefN*sizeof(indPrefCount[0])); P->nSA=0; for (uint ii=0;ii<N2;ii+=P->genomeSAsparseD) { if (G[ii]<4) { uint p1=(G[ii]<<12) + (G[ii-1]<<8) + (G[ii-2]<<4) + G[ii-3]; indPrefCount[p1]++; P->nSA++; }; }; uint saChunkSize=(P->limitGenomeGenerateRAM-nG1alloc)/8/P->runThreadN; //number of SA indexes per chunk saChunkSize=saChunkSize*6/10; //allow extra space for qsort //uint saChunkN=((P->nSA/saChunkSize+1)/P->runThreadN+1)*P->runThreadN;//ensure saChunkN is divisible by P->runThreadN //saChunkSize=P->nSA/saChunkN+100000;//final chunk size if (P->runThreadN>1) saChunkSize=min(saChunkSize,P->nSA/(P->runThreadN-1)); uint saChunkN=P->nSA/saChunkSize;//estimate uint* indPrefStart = new uint [saChunkN*2]; //start and stop, *2 just in case uint* indPrefChunkCount = new uint [saChunkN*2]; indPrefStart[0]=0; saChunkN=0;//start counting chunks uint chunkSize1=indPrefCount[0]; for (uint ii=1; ii<indPrefN; ii++) { chunkSize1 += indPrefCount[ii]; if (chunkSize1 > saChunkSize) { saChunkN++; indPrefStart[saChunkN]=ii; indPrefChunkCount[saChunkN-1]=chunkSize1-indPrefCount[ii]; chunkSize1=indPrefCount[ii]; }; }; saChunkN++; indPrefStart[saChunkN]=indPrefN+1; indPrefChunkCount[saChunkN-1]=chunkSize1; P->inOut->logMain << "Number of chunks: " << saChunkN <<"; chunks size limit: " << saChunkSize*8 <<" bytes\n" <<flush; time ( &rawTime ); P->inOut->logMain << timeMonthDayTime(rawTime) <<" ... sorting Suffix Array chunks and saving them to disk...\n" <<flush; *P->inOut->logStdOut << timeMonthDayTime(rawTime) <<" ... sorting Suffix Array chunks and saving them to disk...\n" <<flush; #pragma omp parallel for num_threads(P->runThreadN) ordered schedule(dynamic,1) for (int iChunk=0; iChunk < (int) saChunkN; iChunk++) {//start the chunk cycle: sort each chunk with qsort and write to a file uint* saChunk=new uint [indPrefChunkCount[iChunk]];//allocate local array for each chunk for (uint ii=0,jj=0;ii<N2;ii+=P->genomeSAsparseD) {//fill the chunk with SA indices if (G[ii]<4) { uint p1=(G[ii]<<12) + (G[ii-1]<<8) + (G[ii-2]<<4) + G[ii-3]; if (p1>=indPrefStart[iChunk] && p1<indPrefStart[iChunk+1]) { saChunk[jj]=ii; jj++; }; //TODO: if (jj==indPrefChunkCount[iChunk]) break; }; }; //sort the chunk qsort(saChunk,indPrefChunkCount[iChunk],sizeof(saChunk[0]),funCompareSuffixes); for (uint ii=0;ii<indPrefChunkCount[iChunk];ii++) { saChunk[ii]=N2-1-saChunk[ii]; }; //write files ofstream saChunkFile; string chunkFileName=P->genomeDir+"/SA_"+to_string( (uint) iChunk); ofstrOpen(chunkFileName,"ERROR_00105", P, saChunkFile); fstreamWriteBig(saChunkFile, (char*) saChunk, sizeof(saChunk[0])*indPrefChunkCount[iChunk],chunkFileName,"ERROR_00121",P); saChunkFile.close(); delete [] saChunk; saChunk=NULL; }; time ( &rawTime ); P->inOut->logMain << timeMonthDayTime(rawTime) <<" ... loading chunks from disk, packing SA...\n" <<flush; *P->inOut->logStdOut << timeMonthDayTime(rawTime) <<" ... loading chunks from disk, packing SA...\n" <<flush; //read chunks and pack into full SA1 SA2.allocateArray(); SA1.pointArray(SA2.charArray + SA2.lengthByte-SA1.lengthByte); //SA1 is shifted to have space for junction insertion uint N2bit= 1LLU << P->GstrandBit; uint packedInd=0; #define SA_CHUNK_BLOCK_SIZE 10000000 uint* saIn=new uint[SA_CHUNK_BLOCK_SIZE]; //TODO make adjustable #ifdef genenomeGenerate_SA_textOutput ofstream SAtxtStream ((P->genomeDir + "/SAtxt").c_str()); #endif for (uint iChunk=0;iChunk<saChunkN;iChunk++) {//load files one by one and convert to packed ostringstream saChunkFileNameStream(""); saChunkFileNameStream<< P->genomeDir << "/SA_" << iChunk; ifstream saChunkFile(saChunkFileNameStream.str().c_str()); while (! saChunkFile.eof()) {//read blocks from each file uint chunkBytesN=fstreamReadBig(saChunkFile,(char*) saIn,SA_CHUNK_BLOCK_SIZE*sizeof(saIn[0])); for (uint ii=0;ii<chunkBytesN/sizeof(saIn[0]);ii++) { SA1.writePacked( packedInd+ii, (saIn[ii]<N) ? saIn[ii] : ( (saIn[ii]-N) | N2bit ) ); #ifdef genenomeGenerate_SA_textOutput SAtxtStream << saIn[ii] << "\n"; #endif }; packedInd += chunkBytesN/sizeof(saIn[0]); }; saChunkFile.close(); remove(saChunkFileNameStream.str().c_str());//remove the chunk file }; #ifdef genenomeGenerate_SA_textOutput SAtxtStream.close(); #endif delete [] saIn; if (packedInd != P->nSA ) {// ostringstream errOut; errOut << "EXITING because of FATAL problem while generating the suffix array\n"; errOut << "The number of indices read from chunks = "<<packedInd<<" is not equal to expected nSA="<<P->nSA<<"\n"; errOut << "SOLUTION: try to re-run suffix array generation, if it still does not work, report this problem to the author\n"<<flush; exitWithError(errOut.str(),std::cerr, P->inOut->logMain, EXIT_CODE_INPUT_FILES, *P); }; //DONE with suffix array generation for (uint ii=0;ii<N;ii++) {//return to normal order for future use swap(G[N2-1-ii],G[ii]); }; delete [] indPrefCount; delete [] indPrefStart; delete [] indPrefChunkCount; }; time ( &rawTime ); timeString=asctime(localtime ( &rawTime )); timeString.erase(timeString.end()-1,timeString.end()); P->inOut->logMain << timeMonthDayTime(rawTime) <<" ... Finished generating suffix array\n" <<flush; *P->inOut->logStdOut << timeMonthDayTime(rawTime) <<" ... Finished generating suffix array\n" <<flush; //////////////////////////////////////// // SA index // // PackedArray SAold; // // if (true) // {//testing: load SA from disk // //read chunks and pack into full SA1 // // ifstream oldSAin("./DirTrue/SA"); // oldSAin.seekg (0, ios::end); // P->nSAbyte=(uint) oldSAin.tellg(); // oldSAin.clear(); // oldSAin.seekg (0, ios::beg); // // P->nSA=(P->nSAbyte*8)/(P->GstrandBit+1); // SAold.defineBits(P->GstrandBit+1,P->nSA); // SAold.allocateArray(); // // oldSAin.read(SAold.charArray,SAold.lengthByte); // oldSAin.close(); // // SA1=SAold; // SA2=SAold; // }; PackedArray SAip; genomeSAindex(G,SA1,P,SAip); if (P->sjdbFileChrStartEnd.at(0)!="-" || P->sjdbGTFfile!="-") {//insert junctions SjdbClass sjdbLoci; Genome mainGenome(P); mainGenome.G=G; mainGenome.SA=SA1; mainGenome.SApass1=SA2; mainGenome.SAi=SAip; P->sjdbInsert.outDir=P->genomeDir; P->sjdbN=0;//no junctions are loaded yet P->twoPass.pass2=false; Parameters *P1=new Parameters; *P1=*P; sjdbInsertJunctions(P, P1, mainGenome, sjdbLoci); //write an extra 0 at the end of the array, filling the last bytes that otherwise are not accessible, but will be written to disk //this is - to avoid valgrind complaints. Note that SA2 is allocated with plenty of space to spare. SA2.writePacked(P->nSA,0); }; //write genome to disk time ( &rawTime ); P->inOut->logMain << timeMonthDayTime(rawTime) <<" ... writing Genome to disk ...\n" <<flush; *P->inOut->logStdOut << timeMonthDayTime(rawTime) <<" ... writing Genome to disk ...\n" <<flush; ofstream genomeOut; ofstrOpen(P->genomeDir+"/Genome","ERROR_00104", P, genomeOut); fstreamWriteBig(genomeOut,G,P->nGenome,P->genomeDir+"/Genome","ERROR_00120",P); genomeOut.close(); //write SA time ( &rawTime ); P->inOut->logMain << "SA size in bytes: "<< P->nSAbyte << "\n"<<flush; P->inOut->logMain << timeMonthDayTime(rawTime) <<" ... writing Suffix Array to disk ...\n" <<flush; *P->inOut->logStdOut << timeMonthDayTime(rawTime) <<" ... writing Suffix Array to disk ...\n" <<flush; ofstream SAout; ofstrOpen(P->genomeDir+"/SA","ERROR_00106", P, SAout); fstreamWriteBig(SAout,(char*) SA2.charArray, (streamsize) P->nSAbyte,P->genomeDir+"/SA","ERROR_00122",P); SAout.close(); //write SAi time(&rawTime); P->inOut->logMain << timeMonthDayTime(rawTime) <<" ... writing SAindex to disk\n" <<flush; *P->inOut->logStdOut << timeMonthDayTime(rawTime) <<" ... writing SAindex to disk\n" <<flush; //write SAi to disk ofstream SAiOut; ofstrOpen(P->genomeDir+"/SAindex","ERROR_00107", P, SAiOut); fstreamWriteBig(SAiOut, (char*) &P->genomeSAindexNbases, sizeof(P->genomeSAindexNbases),P->genomeDir+"/SAindex","ERROR_00123",P); fstreamWriteBig(SAiOut, (char*) P->genomeSAindexStart, sizeof(P->genomeSAindexStart[0])*(P->genomeSAindexNbases+1),P->genomeDir+"/SAindex","ERROR_00124",P); fstreamWriteBig(SAiOut, SAip.charArray, SAip.lengthByte,P->genomeDir+"/SAindex","ERROR_00125",P); SAiOut.close(); SA2.deallocateArray(); time(&rawTime); timeString=asctime(localtime ( &rawTime )); timeString.erase(timeString.end()-1,timeString.end()); time(&rawTime); P->inOut->logMain << timeMonthDayTime(rawTime) << " ..... Finished successfully\n" <<flush; *P->inOut->logStdOut << timeMonthDayTime(rawTime) << " ..... Finished successfully\n" <<flush; };
uint insertSeqSA(PackedArray & SA, PackedArray & SA1, PackedArray & SAi, char * G, char * G1, uint64 nG, uint64 nG1, uint64 nG2, Parameters * P) {//insert new sequences into the SA uint GstrandBit1 = (uint) floor(log(nG+nG1)/log(2))+1; if (GstrandBit1<32) GstrandBit1=32; //TODO: use simple access function for SA if ( GstrandBit1+1 != SA.wordLength) {//sequence is too long - GstrandBit changed ostringstream errOut; errOut << "EXITING because of FATAL ERROR: cannot insert sequence on the fly because of strand GstrandBit problem\n"; errOut << "SOLUTION: please contact STAR author at https://groups.google.com/forum/#!forum/rna-star\n"; exitWithError(errOut.str(),std::cerr, P->inOut->logMain, EXIT_CODE_GENOME_FILES, *P); }; uint N2bit= 1LLU << (SA.wordLength-1); uint strandMask=~N2bit; for (uint64 isa=0;isa<SA.length; isa++) { uint64 ind1=SA[isa]; if ( (ind1 & N2bit)>0 ) {//- strand if ( (ind1 & strandMask)>=nG2 ) {//the first nG bases ind1+=nG1; //reverse complementary indices are all shifted by the length of the sequence SA.writePacked(isa,ind1); }; } else {//+ strand if ( ind1>=nG ) {//the last nG2 bases ind1+=nG1; //reverse complementary indices are all shifted by the length of the sequence SA.writePacked(isa,ind1); }; }; }; char** seq1=new char*[2]; #define GENOME_endFillL 16 char* seqq=new char [4*nG1+3*GENOME_endFillL];//ends shouldbe filled with 5 to mark boundaries seq1[0]=seqq+GENOME_endFillL;//TODO: avoid defining an extra array, use reverse search seq1[1]=seqq+2*GENOME_endFillL+2*nG1; memset(seqq,GENOME_spacingChar,GENOME_endFillL); memset(seqq+2*nG1+GENOME_endFillL,GENOME_spacingChar,GENOME_endFillL); memset(seqq+4*nG1+2*GENOME_endFillL,GENOME_spacingChar,GENOME_endFillL); memcpy(seq1[0], G1, nG1); for (uint ii=0; ii<nG1; ii++) {//reverse complement sequence seq1[0][2*nG1-1-ii]=seq1[0][ii]<4 ? 3-seq1[0][ii] : seq1[0][ii]; }; complementSeqNumbers(seq1[0], seq1[1], 2*nG1);//complement uint64* indArray=new uint64[nG1*2*2+2];// for each base, 1st number - insertion place in SA, 2nd number - index, *2 for reverse compl #pragma omp parallel num_threads(P->runThreadN) #pragma omp for schedule (dynamic,1000) for (uint ii=0; ii<2*nG1; ii++) {//find insertion points for each of the sequences if (seq1[0][ii]>3) {//no index for suffices starting with N indArray[ii*2]=-1; } else { indArray[ii*2] = suffixArraySearch1(seq1, ii, 10000, G, nG, SA, (ii<nG1 ? true:false), 0, SA.length-1, 0, P) ; indArray[ii*2+1] = ii; }; }; uint64 nInd=0;//true number of new indices for (uint ii=0; ii<2*nG1; ii++) {//remove entries that cannot be inserted, this cannot be done in the parallel cycle above if (indArray[ii*2]!= (uint) -1) { indArray[nInd*2]=indArray[ii*2]; indArray[nInd*2+1]=indArray[ii*2+1]; ++nInd; }; }; time_t rawtime; time ( &rawtime ); P->inOut->logMain << timeMonthDayTime(rawtime) << " Finished SA search, number of new SA indices = "<<nInd<<endl; globalGenomeArray=seq1[0]; qsort((void*) indArray, nInd, 2*sizeof(uint64), funCompareUintAndSuffixes); time ( &rawtime ); P->inOut->logMain << timeMonthDayTime(rawtime) << " Finished sorting SA indices"<<endl; indArray[2*nInd]=-999; //mark the last junction indArray[2*nInd+1]=-999; //mark the last junction SA1.defineBits(SA.wordLength,SA.length+nInd); /*testing PackedArray SAo; SAo.defineBits(P->GstrandBit+1,P->nSA+nInd); SAo.allocateArray(); ifstream oldSAin("./DirTrue/SA"); oldSAin.read(SAo.charArray,SAo.lengthByte); oldSAin.close(); */ uint isa1=0, isa2=0; for (uint isa=0;isa<SA.length;isa++) { while (isa==indArray[isa1*2]) {//insert new index before the existing index uint ind1=indArray[isa1*2+1]; if (ind1<nG1) { ind1+=nG; } else {//reverse strand ind1=(ind1-nG1+nG2) | N2bit; }; SA1.writePacked(isa2,ind1); /*testing if (SA1[isa2]!=SAo[isa2]) { cout <<isa2 <<" "<< SA1[isa2]<<" "<<SAo[isa2]<<endl; //sleep(100); }; */ ++isa2; ++isa1; }; SA1.writePacked(isa2,SA[isa]); //TODO make sure that the first sj index is not before the first array index /*testing if (SA1[isa2]!=SAo[isa2]) { cout <<isa2 <<" "<< SA1[isa2]<<" "<<SAo[isa2]<<endl; //sleep(100); }; */ ++isa2; }; for (;isa1<nInd;isa1++) {//insert the last indices uint ind1=indArray[isa1*2+1]; if (ind1<nG1) { ind1+=nG; } else {//reverse strand ind1=(ind1-nG1+nG2) | N2bit; }; SA1.writePacked(isa2,ind1); ++isa2; }; time ( &rawtime ); P->inOut->logMain << timeMonthDayTime(rawtime) << " Finished inserting SA indices" <<endl; // //SAi insertions // for (uint iL=0; iL < P->genomeSAindexNbases; iL++) { // uint iSeq=0; // uint ind0=P->genomeSAindexStart[iL]-1;//last index that was present in the old genome // for (uint ii=P->genomeSAindexStart[iL];ii<P->genomeSAindexStart[iL+1]; ii++) {//scan through the longest index // if (ii==798466) // cout <<ii; // // uint iSA1=SAi[ii]; // uint iSA2=iSA1 & P->SAiMarkNmask & P->SAiMarkAbsentMask; // // if ( iSeq<nInd && (iSA1 & P->SAiMarkAbsentMaskC)>0 ) // {//index missing from the old genome // uint iSeq1=iSeq; // int64 ind1=funCalcSAi(seq1[0]+indArray[2*iSeq+1],iL); // while (ind1 < (int64)(ii-P->genomeSAindexStart[iL]) && indArray[2*iSeq]<iSA2) { // ++iSeq; // ind1=funCalcSAi(seq1[0]+indArray[2*iSeq+1],iL); // }; // if (ind1 == (int64)(ii-P->genomeSAindexStart[iL]) ) { // SAi.writePacked(ii,indArray[2*iSeq]+iSeq+1); // for (uint ii0=ind0+1; ii0<ii; ii0++) {//fill all the absent indices with this value // SAi.writePacked(ii0,(indArray[2*iSeq]+iSeq+1) | P->SAiMarkAbsentMaskC); // }; // ++iSeq; // ind0=ii; // } else { // iSeq=iSeq1; // }; // } else // {//index was present in the old genome // while (iSeq<nInd && indArray[2*iSeq]+1<iSA2) {//for this index insert "smaller" junctions // ++iSeq; // }; // // while (iSeq<nInd && indArray[2*iSeq]+1==iSA2) {//special case, the index falls right behind SAi // if (funCalcSAi(seq1[0]+indArray[2*iSeq+1],iL) >= (int64) (ii-P->genomeSAindexStart[iL]) ) {//this belongs to the next index // break; // }; // ++iSeq; // }; // // SAi.writePacked(ii,iSA1+iSeq); // // for (uint ii0=ind0+1; ii0<ii; ii0++) {//fill all the absent indices with this value // SAi.writePacked(ii0,(iSA2+iSeq) | P->SAiMarkAbsentMaskC); // }; // ind0=ii; // }; // }; // // }; // // time ( &rawtime ); cout << timeMonthDayTime(rawtime) << "SAi first" <<endl; // // for (uint isj=0;isj<nInd;isj++) { // int64 ind1=0; // for (uint iL=0; iL < P->genomeSAindexNbases; iL++) { // uint g=(uint) seq1[0][indArray[2*isj+1]+iL]; // ind1 <<= 2; // if (g>3) {//this iSA contains N, need to mark the previous // for (uint iL1=iL; iL1 < P->genomeSAindexNbases; iL1++) { // ind1+=3; // int64 ind2=P->genomeSAindexStart[iL1]+ind1; // for (; ind2>=0; ind2--) {//find previous index that is not absent // if ( (SAi[ind2] & P->SAiMarkAbsentMaskC)==0 ) { // break; // }; // }; // SAi.writePacked(ind2,SAi[ind2] | P->SAiMarkNmaskC); // ind1 <<= 2; // }; // break; // } else { // ind1 += g; // }; // }; // }; // time ( &rawtime ); // P->inOut->logMain << timeMonthDayTime(rawtime) << " Finished SAi" <<endl; // // /* testing // PackedArray SAio=SAi; // SAio.allocateArray(); // ifstream oldSAiin("./DirTrue/SAindex"); // oldSAiin.read(SAio.charArray,8*(P->genomeSAindexNbases+2));//skip first bytes // oldSAiin.read(SAio.charArray,SAio.lengthByte); // oldSAiin.close(); // // for (uint iL=0; iL < P->genomeSAindexNbases; iL++) { // for (uint ii=P->genomeSAindexStart[iL];ii<P->genomeSAindexStart[iL+1]; ii++) {//scan through the longets index // if ( SAio[ii]!=SAi[ii] ) { // cout <<iL<<" "<<ii<<" "<<SAio[ii]<<" "<<SAi[ii]<<endl; // }; // }; // }; // */ //change parameters, most parameters are already re-defined in sjdbPrepare.cpp SA.defineBits(P->GstrandBit+1,SA.length+nInd);//same as SA2 SA.pointArray(SA1.charArray); P->nSA=SA.length; P->nSAbyte=SA.lengthByte; //generate SAi genomeSAindex(G,SA,P,SAi); time ( &rawtime ); P->inOut->logMain << timeMonthDayTime(rawtime) << " Finished SAi" <<endl; // P->sjGstart=P->chrStart[P->nChrReal]; // memcpy(G+P->chrStart[P->nChrReal],seq1[0], nseq1[0]); return nInd; };