Beispiel #1
0
int ReadAlign::oneRead() {//process one read: load, map, write

    //load read name, sequence, quality from the streams into internal arrays
    int readStatus[2];


    readStatus[0]=readLoad(*(readInStream[0]), P, 0, readLength[0], readLengthOriginal[0], readNameMates[0], Read0[0], Read1[0], Qual0[0], Qual1[0], clip3pNtotal[0], clip5pNtotal[0], clip3pAdapterN[0], iReadAll, readFilesIndex, readFilter, readNameExtra[0]);
    if (P.readNmates==2) {//read the 2nd mate
        readStatus[1]=readLoad(*(readInStream[1]), P, 1, readLength[1], readLengthOriginal[1], readNameMates[1], Read0[1], Read1[0]+readLength[0]+1, Qual0[1], Qual1[0]+readLength[0]+1, clip3pNtotal[1], clip5pNtotal[1], clip3pAdapterN[1], iReadAll, readFilesIndex, readFilter, readNameExtra[1]);

        if (readStatus[0]!=readStatus[1]) {
            ostringstream errOut;
            errOut << "EXITING because of FATAL ERROR: Read1 and Read2 are not consistent, reached the end of the one before the other one\n";
            errOut << "SOLUTION: Check you your input files: they may be corrupted\n";
            exitWithError(errOut.str(),std::cerr, P.inOut->logMain, EXIT_CODE_INPUT_FILES, P);
        } else if (readStatus[0]==-1) {//finished with the stream
            return -1;
        };

        //combine two reads together
        Lread=readLength[0]+readLength[1]+1;
        readLengthPairOriginal=readLengthOriginal[0]+readLengthOriginal[1]+1;
        if (Lread>DEF_readSeqLengthMax) {
            ostringstream errOut;
            errOut << "EXITING because of FATAL ERROR in reads input: Lread of the pair = " << Lread << "   while DEF_readSeqLengthMax=" << DEF_readSeqLengthMax <<endl;
            errOut << "Read Name="<<readNameMates[0]<<endl;
            errOut << "SOLUTION: increase DEF_readSeqLengthMax in IncludeDefine.h and re-compile STAR"<<endl<<flush;
            exitWithError(errOut.str(),std::cerr, P.inOut->logMain, EXIT_CODE_INPUT_FILES, P);
        };

        Read1[0][readLength[0]]=MARK_FRAG_SPACER_BASE; //marker for spacer base
        Qual1[0][readLength[0]]=0;
        complementSeqNumbers(Read1[0]+readLength[0]+1,Read1[0]+readLength[0]+1,readLength[1]); //returns complement of Reads[ii]
        for (uint ii=0;ii<readLength[1]/2;ii++) {
            swap(Read1[0][Lread-ii-1],Read1[0][ii+readLength[0]+1]); //reverse complement
            swap(Qual1[0][Lread-ii-1],Qual1[0][ii+readLength[0]+1]); //reverse complement   ??? was Qualof the second mate populated
        };

    } else {//1 mate

        if (readStatus[0]==-1) {//finished with the stream
            return -1;
        };

        Lread=readLength[0];
        readLengthPairOriginal=readLengthOriginal[0];
        readLength[1]=0;

    };

    readFileType=readStatus[0];

    complementSeqNumbers(Read1[0],Read1[1],Lread); //returns complement of Reads[ii]
    for (uint ii=0;ii<Lread;ii++) {//reverse
        Read1[2][Lread-ii-1]=Read1[1][ii];
        Qual1[1][Lread-ii-1]=Qual1[0][ii];
    };

    statsRA.readN++;
    statsRA.readBases += readLength[0]+readLength[1];

    //max number of mismatches allowed for this read
    outFilterMismatchNmaxTotal=min(P.outFilterMismatchNmax, (uint) (P.outFilterMismatchNoverReadLmax*(readLength[0]+readLength[1])));

    //map the read
    mapOneRead();

    peOverlapMergeMap();
    multMapSelect();
    mappedFilter();

    if (!peOv.yes) {//if the alignment was not mates merged - otherwise the chimeric detection was already done
        chimericDetection();
    };

    if (P.pCh.out.bam && chimRecord) {//chimeric alignment was recorded in main BAM files, and it contains the representative portion, so non-chimeric aligmnent is not output
        return 0;
    };

    waspMap();

    #ifdef OFF_BEFORE_OUTPUT
        #warning OFF_BEFORE_OUTPUT
        return 0;
    #endif

    //write out alignments
    outputAlignments();

    return 0;

};
Beispiel #2
0
void sjdbBuildIndex (Parameters *P, Parameters *P1, char *Gsj, char *G, PackedArray &SA, PackedArray &SA2, PackedArray &SAi) {
    
    #define SPACER_CHAR GENOME_spacingChar

    if (P->sjdbN==0)
    {//no junctions to insert
        return;
    };
    
    time_t rawtime;
    time ( &rawtime );
    P->inOut->logMain   << timeMonthDayTime(rawtime) << " ..... Inserting junctions into the genome indices" <<endl;    
    *P->inOut->logStdOut  << timeMonthDayTime(rawtime) << " ..... Inserting junctions into the genome indices" <<endl;
    
    uint nGsj=P->sjdbLength*P->sjdbN;
    for (uint ii=1; ii<=P->sjdbN; ii++) 
    {
        Gsj[ii*P->sjdbLength-1]=SPACER_CHAR; //to make sure this is > than any genome char
    };
    Gsj[nGsj*2]=SPACER_CHAR+1;//mark the end of the text

    for (uint ii=0; ii<nGsj; ii++) {//reverse complement junction sequences
        Gsj[nGsj*2-1-ii]=Gsj[ii]<4 ? 3-Gsj[ii] : Gsj[ii]; //reverse complement
    };

    char* G1c=new char[nGsj*2+1];
    complementSeqNumbers(Gsj, G1c, nGsj*2+1);

    uint32* oldSJind=new uint32[P1->sjdbN];
    
//     uint nIndicesSJ1=P->sjdbOverhang;
    uint   nIndicesSJ1=P->sjdbLength;//keep all indices - this is pre-2.4.1 of generating the genome
    
    uint64* indArray=new uint64[2*P->sjdbN*(nIndicesSJ1+1)*2];//8+4 bytes for SA index and index in the genome * nJunction * nIndices per junction * 2 for reverse compl
    uint64 sjNew=0;
    #pragma omp parallel num_threads(P->runThreadN)
    #pragma omp for schedule (dynamic,1000) reduction(+:sjNew)
    for (uint isj=0; isj<2*P->sjdbN; isj++) {//find insertion points for each of the sequences

        char** seq1=new char*[2];
        seq1[0]=Gsj+isj*P->sjdbLength;
        seq1[1]=G1c+isj*P->sjdbLength;
        
        uint isj1=isj<P->sjdbN ? isj : 2*P->sjdbN-1-isj;
        int sjdbInd = P1->sjdbN==0 ? -1 : binarySearch2(P->sjdbStart[isj1],P->sjdbEnd[isj1],P1->sjdbStart,P1->sjdbEnd,P1->sjdbN);
        if (sjdbInd<0) 
        {//count new junctions
            ++sjNew;
        } else 
        {//record new index of the old junctions
            oldSJind[sjdbInd]=isj1;
        };
        
        for (uint istart1=0; istart1<nIndicesSJ1;istart1++) {
            
            uint istart=istart1;
//             uint istart=isj<P->sjdbN ? istart1 : istart1+1; //for rev-compl junction, shift by one base to start with the 1st non-spacer base
            uint ind1=2*(isj*nIndicesSJ1+istart1);
            if (sjdbInd>=0 || seq1[0][istart]>3) 
            {//no index for already included junctions, or suffices starting with N
                indArray[ind1]=-1;
            } else 
            {
                //indArray[ind1] =  suffixArraySearch(seq1, istart, P->sjdbLength-istart1, G, SA, true, 0, P->nSA-1, 0, P) ;
                indArray[ind1] =  suffixArraySearch(seq1, istart, 10000, G, SA, true, 0, P->nSA-1, 0, P) ;
                indArray[ind1+1] = isj*P->sjdbLength+istart;
            };
        };
    };
//     for (int ii=0;ii<P1->sjdbN;ii++) {if ( oldSJind[ii]==0){cout <<ii<<endl;};};
    sjNew = sjNew/2;//novel junctions were double counted on two strands
    
    time ( &rawtime );
    P->inOut->logMain  << timeMonthDayTime(rawtime) << "   Finished SA search: number of new junctions=" << sjNew <<", old junctions="<<P->sjdbN-sjNew<<endl;
    
    uint nInd=0;//true number of new indices
    for (uint ii=0; ii<2*P->sjdbN*nIndicesSJ1; ii++) {//remove entries that cannot be inserted, this cannot be done in the parallel cycle above
        if (indArray[ii*2]!= (uint) -1) {
            indArray[nInd*2]=indArray[ii*2];
            indArray[nInd*2+1]=indArray[ii*2+1];
            ++nInd;
        };
    };

    globalGsj=Gsj;
    qsort((void*) indArray, nInd, 2*sizeof(uint64), funCompareUintAndSuffixes);
    time ( &rawtime );
    P->inOut->logMain  << timeMonthDayTime(rawtime) << "   Finished sorting SA indicesL nInd="<<nInd <<endl;

    indArray[2*nInd]=-999; //mark the last junction
    indArray[2*nInd+1]=-999; //mark the last junction
    
    P->nGenome=P->chrStart[P->nChrReal]+nGsj;    
    P->nSA+=nInd;
    
    uint GstrandBit1 = (uint) floor(log(P->nGenome)/log(2))+1;
    if (GstrandBit1<32) GstrandBit1=32; //TODO: use simple access function for SA
    if ( GstrandBit1 != P->GstrandBit) 
    {//too many junctions were added - GstrandBit changed
        ostringstream errOut;
        errOut << "EXITING because of FATAL ERROR: cannot insert junctions on the fly because of strand GstrandBit problem\n";
        errOut << "SOLUTION: please contact STAR author at https://groups.google.com/forum/#!forum/rna-star\n";
        exitWithError(errOut.str(),std::cerr, P->inOut->logMain, EXIT_CODE_GENOME_FILES, *P);
    };
    
    SA2.defineBits(P->GstrandBit+1,P->nSA);
    uint nGsjNew=sjNew*P->sjdbLength; //this is the actual number of bytes added to the genome, while nGsj is the total size of all junctions
    
    uint N2bit= 1LLU << P->GstrandBit;
    uint strandMask=~N2bit;
    
    //testing
//     PackedArray SAo;
//     SAo.defineBits(P->GstrandBit+1,P->nSA);
//     SAo.allocateArray();
//     ifstream oldSAin("./DirTrue/SA");
//     oldSAin.read(SAo.charArray,SAo.lengthByte);
//     oldSAin.close();
    
    
    uint isj=0, isa2=0;
    for (uint isa=0;isa<P1->nSA;isa++) {
        //testing
//         if (isa2>0 && SA2[isa2-1]!=SAo[isa2-1]) {
//             cout <<isa2 <<" "<< SA2[isa2-1]<<" "<<SAo[isa2-1]<<endl;
//         };        

//         if (isa==69789089)
//      	{ 
//           cout <<isa;
//         };

        uint ind1=SA[isa];
        
        if ( (ind1 & N2bit)>0 ) 
        {//- strand
            uint ind1s = P1->nGenome - (ind1 & strandMask);
            if (ind1s>P->chrStart[P->nChrReal])
            {//this index was an old sj, may need to shift it
                uint sj1 = (ind1s-P->chrStart[P->nChrReal])/P->sjdbLength;//old junction index
                ind1s += (oldSJind[sj1]-sj1)*P->sjdbLength;
                ind1 = (P->nGenome - ind1s) | N2bit;
            } else
            {
                ind1+=nGsjNew; //reverse complementary indices are all shifted by the length of junctions
            };
        } else
        {//+ strand
            if (ind1>P->chrStart[P->nChrReal])
            {//this index was an old sj, may need to shift it
                uint sj1 = (ind1-P->chrStart[P->nChrReal])/P->sjdbLength;//old junction index
                ind1 += (oldSJind[sj1]-sj1)*P->sjdbLength;
            };
        };
        
        SA2.writePacked(isa2,ind1); //TODO make sure that the first sj index is not before the first array index
        ++isa2;
        
        while (isa==indArray[isj*2]) {//insert sj index after the existing index
            uint ind1=indArray[isj*2+1];
            if (ind1<nGsj) {
                ind1+=P->chrStart[P->nChrReal];
            } else {//reverse strand
                ind1=(ind1-nGsj) | N2bit;
            };
            SA2.writePacked(isa2,ind1);
            ++isa2; ++isj;
        };
    };
    time ( &rawtime );
    P->inOut->logMain  << timeMonthDayTime(rawtime) << "   Finished inserting junction indices" <<endl;
    
    //SAi insertions
    for (uint iL=0; iL < P->genomeSAindexNbases; iL++) {
        uint iSJ=0;
        uint ind0=P->genomeSAindexStart[iL]-1;//last index that was present in the old genome
        for (uint ii=P->genomeSAindexStart[iL];ii<P->genomeSAindexStart[iL+1]; ii++) {//scan through the longest index
            uint iSA1=SAi[ii];
            uint iSA2=iSA1 & P->SAiMarkNmask & P->SAiMarkAbsentMask;
            
            if ( iSJ<nInd && (iSA1 &  P->SAiMarkAbsentMaskC)>0 ) 
            {//index missing from the old genome
                uint iSJ1=iSJ;
                int64 ind1=funCalcSAi(Gsj+indArray[2*iSJ+1],iL);
                while (ind1 < (int64)(ii-P->genomeSAindexStart[iL]) && indArray[2*iSJ]<iSA2) {
                    ++iSJ;
                    ind1=funCalcSAi(Gsj+indArray[2*iSJ+1],iL);
                };
                if (ind1 == (int64)(ii-P->genomeSAindexStart[iL]) ) {
                    SAi.writePacked(ii,indArray[2*iSJ]+iSJ+1);
                    for (uint ii0=ind0+1; ii0<ii; ii0++) {//fill all the absent indices with this value
                        SAi.writePacked(ii0,(indArray[2*iSJ]+iSJ+1) | P->SAiMarkAbsentMaskC);
                    };
                    ++iSJ;
                    ind0=ii;
                } else {
                    iSJ=iSJ1;
                };
            } else 
            {//index was present in the old genome
                while (iSJ<nInd && indArray[2*iSJ]+1<iSA2) {//for this index insert "smaller" junctions
                    ++iSJ;
                };
                
                while (iSJ<nInd && indArray[2*iSJ]+1==iSA2) {//special case, the index falls right behind SAi
                    if (funCalcSAi(Gsj+indArray[2*iSJ+1],iL) >= (int64) (ii-P->genomeSAindexStart[iL]) ) {//this belongs to the next index
                        break;
                    };
                    ++iSJ;
                };   
                
                SAi.writePacked(ii,iSA1+iSJ);
                
                for (uint ii0=ind0+1; ii0<ii; ii0++) {//fill all the absent indices with this value
                    SAi.writePacked(ii0,(iSA2+iSJ) | P->SAiMarkAbsentMaskC);
                };
                ind0=ii;
            };
        };

    };
//     time ( &rawtime );    cout << timeMonthDayTime(rawtime) << "SAi first" <<endl;

    for (uint isj=0;isj<nInd;isj++) {
        int64 ind1=0;
        for (uint iL=0; iL < P->genomeSAindexNbases; iL++) {
            uint g=(uint) Gsj[indArray[2*isj+1]+iL];
            ind1 <<= 2;
            if (g>3) {//this iSA contains N, need to mark the previous
                for (uint iL1=iL; iL1 < P->genomeSAindexNbases; iL1++) {
                    ind1+=3;
                    int64 ind2=P->genomeSAindexStart[iL1]+ind1;
                    for (; ind2>=0; ind2--) {//find previous index that is not absent
                        if ( (SAi[ind2] & P->SAiMarkAbsentMaskC)==0 ) {
                            break;
                        };
                    };
                    SAi.writePacked(ind2,SAi[ind2] | P->SAiMarkNmaskC);
                    ind1 <<= 2;
                };
                break;
            } else {
                ind1 += g;
            };
        };
    };
    time ( &rawtime );
    P->inOut->logMain  << timeMonthDayTime(rawtime) << "   Finished SAi" <<endl;
    
    //change parameters, most parameters are already re-defined in sjdbPrepare.cpp
    SA.defineBits(P->GstrandBit+1,P->nSA);//same as SA2
    SA.pointArray(SA2.charArray);
    P->nSAbyte=SA.lengthByte;
    P->sjGstart=P->chrStart[P->nChrReal];
    memcpy(G+P->chrStart[P->nChrReal],Gsj, nGsj);
    
    /* testing
    PackedArray SAio=SAi;
    SAio.allocateArray();
    ifstream oldSAiin("./DirTrue/SAindex");
//     oldSAin.read(SAio.charArray,8*(P->genomeSAindexNbases+2));//skip first bytes
    oldSAiin.read(SAio.charArray,SAio.lengthByte);
    oldSAiin.close();  
    

//     for (uint ii=0;ii<P->nSA;ii++) {
//         if (SA2[ii]!=SAo[ii]) {
//             cout <<ii <<" "<< SA2[ii]<<" "<<SAo[ii]<<endl;
//         };
//     };


    for (uint iL=0; iL < P->genomeSAindexNbases; iL++) {
        for (uint ii=P->genomeSAindexStart[iL];ii<P->genomeSAindexStart[iL+1]; ii++) {//scan through the longets index
                if ( SAio[ii]!=SAi[ii] ) {
                    cout <<ii<<" "<<SAio[ii]<<" "<<SAi[ii]<<endl;
                };
        };
    };    
    */
    
    /*
    ofstream genomeOut("/home/dobin/Genome");
    fstreamWriteBig(genomeOut,G,P->nGenome+nGsj,"777","777",P);
    genomeOut.close(); 
    genomeOut.open("/home/dobin/SA");
    fstreamWriteBig(genomeOut,SA2.charArray,SA2.lengthByte,"777","777",P);
    genomeOut.close();
    */
    
    delete [] indArray;
    delete [] G1c;
    delete [] oldSJind;       
    
};
void ReadAlign::peMergeMates() {

    uint s1=localSearchNisMM(Read1[0],readLength[0],Read1[0]+readLength[0]+1,readLength[1],P.peOverlap.MMp);
    uint s0=localSearchNisMM(Read1[0]+readLength[0]+1,readLength[1],Read1[0],readLength[0],P.peOverlap.MMp);

    uint o1=min(readLength[1],readLength[0]-s1);
    uint o0=min(readLength[0],readLength[1]-s0);

    peOv.nOv=max(o0,o1);

    if (peOv.nOv<P.peOverlap.NbasesMin) {//overlap is smaller than minimum allowed
        peOv.nOv=0;
        return;
    };

    if (o1>=o0) {
        peOv.mateStart[0]=0;
        peOv.mateStart[1]=s1;
        if (o1<readLength[1]) {//otherwise, if o1==readLength[1], read2 is entirely contained in read1
            //move unoverlapped portion of read2 to the end of read1
            memmove(Read1[0]+readLength[0], Read1[0]+readLength[0]+1+o1, readLength[1]-o1);
        };
    } else {
        peOv.mateStart[1]=0;
        peOv.mateStart[0]=s0;
        memmove(Read1[0]+Lread, Read1[0], readLength[0]);//temp move 0
        memmove(Read1[0], Read1[0]+readLength[0]+1, readLength[1]); //move 1 into 0
        if (o0<readLength[0]) {
            memmove(Read1[0]+readLength[1], Read1[0]+Lread+o0, readLength[0]-o0); //move 0 into 1
        };
    };

    //uint nMM=0;
    //for (uint ii=peOv.ovS; ii<readLength[0]; ii++) {//check for MM in the overlap area
    //    if (Read1[0][ii]!=Read1[0][ii-peOv.ovS+readLength[0]+1]) {
    //        Read1[0][ii]=4; //replace mismatched base with N
    //        ++nMM;
    //    };
    //};


    Lread=Lread-peOv.nOv-1;
    readLength[0]=Lread;
    readLength[1]=0;
    readLengthOriginal[0]=Lread;
    readLengthOriginal[1]=0;
    readNmates=1;

    //fill Read1[1,2]
    complementSeqNumbers(Read1[0],Read1[1],Lread); //returns complement of Reads[ii]
    for (uint ii=0;ii<Lread;ii++) {//reverse
        Read1[2][Lread-ii-1]=Read1[1][ii];
        if (Read1[1][ii]<4) {
            Qual1[0][ii]=1;
            Qual1[1][Lread-ii-1]=1;
        } else {
            Qual1[0][ii]=0;
            Qual1[1][Lread-ii-1]=0;
        };

    };

    return;
};
Beispiel #4
0
uint insertSeqSA(PackedArray & SA, PackedArray & SA1, PackedArray & SAi, char * G, char * G1, uint64 nG, uint64 nG1, uint64 nG2, Parameters * P)
{//insert new sequences into the SA

    uint GstrandBit1 = (uint) floor(log(nG+nG1)/log(2))+1;
    if (GstrandBit1<32) GstrandBit1=32; //TODO: use simple access function for SA
    if ( GstrandBit1+1 != SA.wordLength)
    {//sequence is too long - GstrandBit changed
        ostringstream errOut;
        errOut << "EXITING because of FATAL ERROR: cannot insert sequence on the fly because of strand GstrandBit problem\n";
        errOut << "SOLUTION: please contact STAR author at https://groups.google.com/forum/#!forum/rna-star\n";
        exitWithError(errOut.str(),std::cerr, P->inOut->logMain, EXIT_CODE_GENOME_FILES, *P);
    };

    uint N2bit= 1LLU << (SA.wordLength-1);
    uint strandMask=~N2bit;
    for (uint64 isa=0;isa<SA.length; isa++)
    {
        uint64 ind1=SA[isa];
        if ( (ind1 & N2bit)>0 )
        {//- strand
            if ( (ind1 & strandMask)>=nG2 )
            {//the first nG bases
                ind1+=nG1; //reverse complementary indices are all shifted by the length of the sequence
                SA.writePacked(isa,ind1);
            };
        } else
        {//+ strand
            if ( ind1>=nG )
            {//the last nG2 bases
                ind1+=nG1; //reverse complementary indices are all shifted by the length of the sequence
                SA.writePacked(isa,ind1);
            };
        };
    };

    char** seq1=new char*[2];

    #define GENOME_endFillL 16
    char* seqq=new char [4*nG1+3*GENOME_endFillL];//ends shouldbe filled with 5 to mark boundaries

    seq1[0]=seqq+GENOME_endFillL;//TODO: avoid defining an extra array, use reverse search
    seq1[1]=seqq+2*GENOME_endFillL+2*nG1;

    memset(seqq,GENOME_spacingChar,GENOME_endFillL);
    memset(seqq+2*nG1+GENOME_endFillL,GENOME_spacingChar,GENOME_endFillL);
    memset(seqq+4*nG1+2*GENOME_endFillL,GENOME_spacingChar,GENOME_endFillL);

    memcpy(seq1[0], G1, nG1);
    for (uint ii=0; ii<nG1; ii++)
    {//reverse complement sequence
        seq1[0][2*nG1-1-ii]=seq1[0][ii]<4 ? 3-seq1[0][ii] : seq1[0][ii];
    };
    complementSeqNumbers(seq1[0], seq1[1], 2*nG1);//complement

    uint64* indArray=new uint64[nG1*2*2+2];// for each base, 1st number - insertion place in SA, 2nd number - index, *2 for reverse compl


    #pragma omp parallel num_threads(P->runThreadN)
    #pragma omp for schedule (dynamic,1000)
    for (uint ii=0; ii<2*nG1; ii++) {//find insertion points for each of the sequences

        if (seq1[0][ii]>3)
        {//no index for suffices starting with N
            indArray[ii*2]=-1;
        } else
        {
            indArray[ii*2] =  suffixArraySearch1(seq1, ii, 10000, G, nG, SA, (ii<nG1 ? true:false), 0, SA.length-1, 0, P) ;
            indArray[ii*2+1] = ii;
        };
    };

    uint64 nInd=0;//true number of new indices
    for (uint ii=0; ii<2*nG1; ii++) {//remove entries that cannot be inserted, this cannot be done in the parallel cycle above
        if (indArray[ii*2]!= (uint) -1) {
            indArray[nInd*2]=indArray[ii*2];
            indArray[nInd*2+1]=indArray[ii*2+1];
            ++nInd;
        };
    };

    time_t rawtime;
    time ( &rawtime );
    P->inOut->logMain  << timeMonthDayTime(rawtime) << "   Finished SA search, number of new SA indices = "<<nInd<<endl;

    globalGenomeArray=seq1[0];
    qsort((void*) indArray, nInd, 2*sizeof(uint64), funCompareUintAndSuffixes);
    time ( &rawtime );
    P->inOut->logMain  << timeMonthDayTime(rawtime) << "   Finished sorting SA indices"<<endl;

    indArray[2*nInd]=-999; //mark the last junction
    indArray[2*nInd+1]=-999; //mark the last junction

    SA1.defineBits(SA.wordLength,SA.length+nInd);

    /*testing
    PackedArray SAo;
    SAo.defineBits(P->GstrandBit+1,P->nSA+nInd);
    SAo.allocateArray();
    ifstream oldSAin("./DirTrue/SA");
    oldSAin.read(SAo.charArray,SAo.lengthByte);
    oldSAin.close();
    */

    uint isa1=0, isa2=0;
    for (uint isa=0;isa<SA.length;isa++) {
        while (isa==indArray[isa1*2]) {//insert new index before the existing index
            uint ind1=indArray[isa1*2+1];
            if (ind1<nG1) {
                ind1+=nG;
            } else {//reverse strand
                ind1=(ind1-nG1+nG2) | N2bit;
            };
            SA1.writePacked(isa2,ind1);
            /*testing
            if (SA1[isa2]!=SAo[isa2]) {
               cout <<isa2 <<" "<< SA1[isa2]<<" "<<SAo[isa2]<<endl;
               //sleep(100);
            };
            */
            ++isa2; ++isa1;

        };

        SA1.writePacked(isa2,SA[isa]); //TODO make sure that the first sj index is not before the first array index
            /*testing
            if (SA1[isa2]!=SAo[isa2]) {
               cout <<isa2 <<" "<< SA1[isa2]<<" "<<SAo[isa2]<<endl;
               //sleep(100);
            };
            */
        ++isa2;
    };
    for (;isa1<nInd;isa1++)
    {//insert the last indices
        uint ind1=indArray[isa1*2+1];
        if (ind1<nG1)
        {
            ind1+=nG;
        } else
        {//reverse strand
            ind1=(ind1-nG1+nG2) | N2bit;
        };
        SA1.writePacked(isa2,ind1);
        ++isa2;
    };

    time ( &rawtime );
    P->inOut->logMain  << timeMonthDayTime(rawtime) << "   Finished inserting SA indices" <<endl;

//     //SAi insertions
//     for (uint iL=0; iL < P->genomeSAindexNbases; iL++) {
//         uint iSeq=0;
//         uint ind0=P->genomeSAindexStart[iL]-1;//last index that was present in the old genome
//         for (uint ii=P->genomeSAindexStart[iL];ii<P->genomeSAindexStart[iL+1]; ii++) {//scan through the longest index
//             if (ii==798466)
//                 cout <<ii;
//
//             uint iSA1=SAi[ii];
//             uint iSA2=iSA1 & P->SAiMarkNmask & P->SAiMarkAbsentMask;
//
//             if ( iSeq<nInd && (iSA1 &  P->SAiMarkAbsentMaskC)>0 )
//             {//index missing from the old genome
//                 uint iSeq1=iSeq;
//                 int64 ind1=funCalcSAi(seq1[0]+indArray[2*iSeq+1],iL);
//                 while (ind1 < (int64)(ii-P->genomeSAindexStart[iL]) && indArray[2*iSeq]<iSA2) {
//                     ++iSeq;
//                     ind1=funCalcSAi(seq1[0]+indArray[2*iSeq+1],iL);
//                 };
//                 if (ind1 == (int64)(ii-P->genomeSAindexStart[iL]) ) {
//                     SAi.writePacked(ii,indArray[2*iSeq]+iSeq+1);
//                     for (uint ii0=ind0+1; ii0<ii; ii0++) {//fill all the absent indices with this value
//                         SAi.writePacked(ii0,(indArray[2*iSeq]+iSeq+1) | P->SAiMarkAbsentMaskC);
//                     };
//                     ++iSeq;
//                     ind0=ii;
//                 } else {
//                     iSeq=iSeq1;
//                 };
//             } else
//             {//index was present in the old genome
//                 while (iSeq<nInd && indArray[2*iSeq]+1<iSA2) {//for this index insert "smaller" junctions
//                     ++iSeq;
//                 };
//
//                 while (iSeq<nInd && indArray[2*iSeq]+1==iSA2) {//special case, the index falls right behind SAi
//                     if (funCalcSAi(seq1[0]+indArray[2*iSeq+1],iL) >= (int64) (ii-P->genomeSAindexStart[iL]) ) {//this belongs to the next index
//                         break;
//                     };
//                     ++iSeq;
//                 };
//
//                 SAi.writePacked(ii,iSA1+iSeq);
//
//                 for (uint ii0=ind0+1; ii0<ii; ii0++) {//fill all the absent indices with this value
//                     SAi.writePacked(ii0,(iSA2+iSeq) | P->SAiMarkAbsentMaskC);
//                 };
//                 ind0=ii;
//             };
//         };
//
//     };
// //     time ( &rawtime );    cout << timeMonthDayTime(rawtime) << "SAi first" <<endl;
//
//     for (uint isj=0;isj<nInd;isj++) {
//         int64 ind1=0;
//         for (uint iL=0; iL < P->genomeSAindexNbases; iL++) {
//             uint g=(uint) seq1[0][indArray[2*isj+1]+iL];
//             ind1 <<= 2;
//             if (g>3) {//this iSA contains N, need to mark the previous
//                 for (uint iL1=iL; iL1 < P->genomeSAindexNbases; iL1++) {
//                     ind1+=3;
//                     int64 ind2=P->genomeSAindexStart[iL1]+ind1;
//                     for (; ind2>=0; ind2--) {//find previous index that is not absent
//                         if ( (SAi[ind2] & P->SAiMarkAbsentMaskC)==0 ) {
//                             break;
//                         };
//                     };
//                     SAi.writePacked(ind2,SAi[ind2] | P->SAiMarkNmaskC);
//                     ind1 <<= 2;
//                 };
//                 break;
//             } else {
//                 ind1 += g;
//             };
//         };
//     };
//     time ( &rawtime );
//     P->inOut->logMain  << timeMonthDayTime(rawtime) << "   Finished SAi" <<endl;
//
//     /* testing
//     PackedArray SAio=SAi;
//     SAio.allocateArray();
//     ifstream oldSAiin("./DirTrue/SAindex");
//     oldSAiin.read(SAio.charArray,8*(P->genomeSAindexNbases+2));//skip first bytes
//     oldSAiin.read(SAio.charArray,SAio.lengthByte);
//     oldSAiin.close();
//
//     for (uint iL=0; iL < P->genomeSAindexNbases; iL++) {
//         for (uint ii=P->genomeSAindexStart[iL];ii<P->genomeSAindexStart[iL+1]; ii++) {//scan through the longets index
//                 if ( SAio[ii]!=SAi[ii] ) {
//                     cout <<iL<<" "<<ii<<" "<<SAio[ii]<<" "<<SAi[ii]<<endl;
//                 };
//         };
//     };
//     */

    //change parameters, most parameters are already re-defined in sjdbPrepare.cpp
    SA.defineBits(P->GstrandBit+1,SA.length+nInd);//same as SA2
    SA.pointArray(SA1.charArray);
    P->nSA=SA.length;
    P->nSAbyte=SA.lengthByte;

    //generate SAi
    genomeSAindex(G,SA,P,SAi);

    time ( &rawtime );
    P->inOut->logMain  << timeMonthDayTime(rawtime) << "   Finished SAi" <<endl;


//     P->sjGstart=P->chrStart[P->nChrReal];
//     memcpy(G+P->chrStart[P->nChrReal],seq1[0], nseq1[0]);


    return nInd;
};