コード例 #1
0
ファイル: ReadAlign_oneRead.cpp プロジェクト: alexdobin/STAR
int ReadAlign::oneRead() {//process one read: load, map, write

    //load read name, sequence, quality from the streams into internal arrays
    int readStatus[2];


    readStatus[0]=readLoad(*(readInStream[0]), P, 0, readLength[0], readLengthOriginal[0], readNameMates[0], Read0[0], Read1[0], Qual0[0], Qual1[0], clip3pNtotal[0], clip5pNtotal[0], clip3pAdapterN[0], iReadAll, readFilesIndex, readFilter, readNameExtra[0]);
    if (P.readNmates==2) {//read the 2nd mate
        readStatus[1]=readLoad(*(readInStream[1]), P, 1, readLength[1], readLengthOriginal[1], readNameMates[1], Read0[1], Read1[0]+readLength[0]+1, Qual0[1], Qual1[0]+readLength[0]+1, clip3pNtotal[1], clip5pNtotal[1], clip3pAdapterN[1], iReadAll, readFilesIndex, readFilter, readNameExtra[1]);

        if (readStatus[0]!=readStatus[1]) {
            ostringstream errOut;
            errOut << "EXITING because of FATAL ERROR: Read1 and Read2 are not consistent, reached the end of the one before the other one\n";
            errOut << "SOLUTION: Check you your input files: they may be corrupted\n";
            exitWithError(errOut.str(),std::cerr, P.inOut->logMain, EXIT_CODE_INPUT_FILES, P);
        } else if (readStatus[0]==-1) {//finished with the stream
            return -1;
        };

        //combine two reads together
        Lread=readLength[0]+readLength[1]+1;
        readLengthPairOriginal=readLengthOriginal[0]+readLengthOriginal[1]+1;
        if (Lread>DEF_readSeqLengthMax) {
            ostringstream errOut;
            errOut << "EXITING because of FATAL ERROR in reads input: Lread of the pair = " << Lread << "   while DEF_readSeqLengthMax=" << DEF_readSeqLengthMax <<endl;
            errOut << "Read Name="<<readNameMates[0]<<endl;
            errOut << "SOLUTION: increase DEF_readSeqLengthMax in IncludeDefine.h and re-compile STAR"<<endl<<flush;
            exitWithError(errOut.str(),std::cerr, P.inOut->logMain, EXIT_CODE_INPUT_FILES, P);
        };

        Read1[0][readLength[0]]=MARK_FRAG_SPACER_BASE; //marker for spacer base
        Qual1[0][readLength[0]]=0;
        complementSeqNumbers(Read1[0]+readLength[0]+1,Read1[0]+readLength[0]+1,readLength[1]); //returns complement of Reads[ii]
        for (uint ii=0;ii<readLength[1]/2;ii++) {
            swap(Read1[0][Lread-ii-1],Read1[0][ii+readLength[0]+1]); //reverse complement
            swap(Qual1[0][Lread-ii-1],Qual1[0][ii+readLength[0]+1]); //reverse complement   ??? was Qualof the second mate populated
        };

    } else {//1 mate

        if (readStatus[0]==-1) {//finished with the stream
            return -1;
        };

        Lread=readLength[0];
        readLengthPairOriginal=readLengthOriginal[0];
        readLength[1]=0;

    };

    readFileType=readStatus[0];

    complementSeqNumbers(Read1[0],Read1[1],Lread); //returns complement of Reads[ii]
    for (uint ii=0;ii<Lread;ii++) {//reverse
        Read1[2][Lread-ii-1]=Read1[1][ii];
        Qual1[1][Lread-ii-1]=Qual1[0][ii];
    };

    statsRA.readN++;
    statsRA.readBases += readLength[0]+readLength[1];

    //max number of mismatches allowed for this read
    outFilterMismatchNmaxTotal=min(P.outFilterMismatchNmax, (uint) (P.outFilterMismatchNoverReadLmax*(readLength[0]+readLength[1])));

    //map the read
    mapOneRead();

    peOverlapMergeMap();
    multMapSelect();
    mappedFilter();

    if (!peOv.yes) {//if the alignment was not mates merged - otherwise the chimeric detection was already done
        chimericDetection();
    };

    if (P.pCh.out.bam && chimRecord) {//chimeric alignment was recorded in main BAM files, and it contains the representative portion, so non-chimeric aligmnent is not output
        return 0;
    };

    waspMap();

    #ifdef OFF_BEFORE_OUTPUT
        #warning OFF_BEFORE_OUTPUT
        return 0;
    #endif

    //write out alignments
    outputAlignments();

    return 0;

};
コード例 #2
0
void ReadAlign::outputAlignments() {

    outBAMbytes=0;
    outBAMbytes1=0;

    bool mateMapped[2]={false,false};

    if (P->outFilterBySJoutStage<=1) {//no chimeric output for stage=2
        if ( chimericDetection() )
        {
            statsRA.chimericAll++;
            if ( P->chimOutType=="WithinBAM" )
            {
                //if chimeric alignment was recorded in main BAM files, it contains the representative portion, so non-chimeric aligmnent is not output
                return;
            };
        };
    };

    if ( nW==0 ) {//no good windows
        statsRA.unmappedOther++;
        unmapType=0;
    } else if ( (trBest->maxScore < P->outFilterScoreMin) || (trBest->maxScore < (intScore) (P->outFilterScoreMinOverLread*(Lread-1))) \
              || (trBest->nMatch < P->outFilterMatchNmin)  || (trBest->nMatch < (uint) (P->outFilterMatchNminOverLread*(Lread-1))) ) {//too short
        statsRA.unmappedShort++;
        unmapType=1;
    } else if ( (trBest->nMM > outFilterMismatchNmaxTotal) || (double(trBest->nMM)/double(trBest->rLength)>P->outFilterMismatchNoverLmax) ) {//too many mismatches
        statsRA.unmappedMismatch++;
        unmapType=2;
    } else if (nTr > P->outFilterMultimapNmax){//too multi
        statsRA.unmappedMulti++;
        unmapType=3;
    } else {//output transcripts

        outFilterPassed=true;

        if (P->outFilterBySJoutStage==1) {//filtering by SJout
            for (uint iTr=0;iTr<nTr;iTr++) {//check transcript for unannotated junctions
                for (uint iex=0;iex<trMult[iTr]->nExons-1;iex++) {//check all junctions
                    if (trMult[iTr]->canonSJ[iex]>=0 && trMult[iTr]->sjAnnot[iex]==0) {
                        outFilterPassed=false;
                        break;
                    };
                };
                if (!outFilterPassed) break;
            };
            if (!outFilterPassed) {//this read is held for further filtering BySJout, record fastq
                unmapType=-3; //the read is not conisddred unmapped
                statsRA.readN--;
                statsRA.readBases -= readLength[0]+readLength[1];

//                 if (P->runThreadN>1) pthread_mutex_lock(&g_threadChunks.mutexOutFilterBySJout);
                for (uint im=0;im<P->readNmates;im++) {
                   chunkOutFilterBySJoutFiles[im] << readNameMates[im] <<" "<< iReadAll <<" "<< readFilter <<" "<< readFilesIndex;
                   chunkOutFilterBySJoutFiles[im] <<"\n";
                   chunkOutFilterBySJoutFiles[im] << Read0[im] <<"\n";
                    if (readFileType==2) {//fastq
                        chunkOutFilterBySJoutFiles[im] << "+\n";
                        chunkOutFilterBySJoutFiles[im] << Qual0[im] <<"\n";
                    };
                };
//                 if (P->runThreadN>1) pthread_mutex_unlock(&g_threadChunks.mutexOutFilterBySJout);
            };
        };

        if (P->outSJfilterReads=="All" || nTr==1) {
            uint sjReadStartN=chunkOutSJ1->N;
            for (uint iTr=0;iTr<nTr;iTr++) {//write all transcripts
                outputTranscriptSJ (*(trMult[iTr]), nTr, chunkOutSJ1, sjReadStartN);
            };
        };

        if (outFilterPassed) {
            uint nTrOut=nTr; //number of aligns to output
            bool outSAMfilterYes=true;
            if (P->outSAMfilter.yes)
            {
                if (P->outSAMfilter.KeepOnlyAddedReferences)
                {
                    for (uint itr=0;itr<nTr;itr++)
                    {//check if transcripts map to chr other than added references
                        if (trMult[itr]->Chr<P->genomeInsertChrIndFirst)
                        {
                            outSAMfilterYes=false;
                            break;
                        };
                    };
                } else if (P->outSAMfilter.KeepAllAddedReferences)
                {
                    nTrOut=0;
                    for (uint itr=0;itr<nTr;itr++)
                    {//check if transcripts map to chr other than added references
                        if (trMult[itr]->Chr>=P->genomeInsertChrIndFirst)
                        {
                            trMult[nTrOut]=trMult[itr];
                            trMult[nTrOut]->primaryFlag=false;
                            ++nTrOut;
                        };
                    };
                    if (nTrOut==0)
                    {
                        outSAMfilterYes=false;
                    } else
                    {
                        trMult[0]->primaryFlag=true;
                    };
                };
            };
            if (nTr>1) {//multimappers
                statsRA.mappedReadsM++;
                unmapType=-1;
            } else if (nTr==1) {//unique mappers
                statsRA.mappedReadsU++;
                statsRA.transcriptStats(*(trMult[0]),Lread);
                unmapType=-2;
            } else {//cannot be
                ostringstream errOut;
                errOut  << "EXITING because of a BUG: nTr=0 in outputAlignments.cpp";
                exitWithError(errOut.str(), std::cerr, P->inOut->logMain, EXIT_CODE_BUG, *P);
            };

            nTrOut=min(P->outSAMmultNmax,nTrOut); //number of to write to SAM/BAM files

            for (uint iTr=0;iTr<nTrOut;iTr++)
            {//write all transcripts
                //mate mapped = true if a mate was present in one of the trancsripts
                mateMapped[trMult[iTr]->exons[0][EX_iFrag]]=true;
                mateMapped[trMult[iTr]->exons[trMult[iTr]->nExons-1][EX_iFrag]]=true;

                //mateMapped1 = true if a mate is present in this transcript
                bool mateMapped1[2]={false,false};
                mateMapped1[trMult[iTr]->exons[0][EX_iFrag]]=true;
                mateMapped1[trMult[iTr]->exons[trMult[iTr]->nExons-1][EX_iFrag]]=true;

                if (P->outSAMbool && outSAMfilterYes)
                {//SAM output
                    outBAMbytes+=outputTranscriptSAM(*(trMult[iTr]), nTr, iTr, (uint) -1, (uint) -1, 0, -1, NULL, outSAMstream);
                    if (P->outSAMunmapped.keepPairs && P->readNmates>1 && ( !mateMapped1[0] || !mateMapped1[1] ) )
                    {//keep pairs && paired reads && one of the mates not mapped in this transcript
                        outBAMbytes+= outputTranscriptSAM(*(trMult[iTr]), 0, 0, (uint) -1, (uint) -1, 0, 4, mateMapped1, outSAMstream);
                    };
                };

                if ((P->outBAMunsorted || P->outBAMcoord) && outSAMfilterYes)
                {//BAM output
                    alignBAM(*(trMult[iTr]), nTr, iTr, P->chrStart[trMult[iTr]->Chr], (uint) -1, (uint) -1, 0, -1, NULL, P->outSAMattrOrder,outBAMoneAlign, outBAMoneAlignNbytes);

                    if (P->outBAMunsorted)
                    {//unsorted
                        for (uint imate=0; imate<P->readNmates; imate++)
                        {//output each mate
                            outBAMunsorted->unsortedOneAlign(outBAMoneAlign[imate], outBAMoneAlignNbytes[imate], (imate>0 || iTr>0) ? 0 : (outBAMoneAlignNbytes[0]+outBAMoneAlignNbytes[1])*2*nTrOut);
                        };
                        if (P->outSAMunmapped.keepPairs && P->readNmates>1 && ( !mateMapped1[0] || !mateMapped1[1] ) )
                        {//keep pairs && paired reads && one of the mates not mapped in this transcript
                            alignBAM(*trMult[iTr], 0, 0, P->chrStart[trMult[iTr]->Chr], (uint) -1, (uint) -1, 0, 4, mateMapped1, P->outSAMattrOrder, outBAMoneAlign, outBAMoneAlignNbytes);
                            for (uint imate=0; imate<P->readNmates; imate++)
                            {//output each mate
                                outBAMunsorted->unsortedOneAlign(outBAMoneAlign[imate], outBAMoneAlignNbytes[imate], (imate>0 || iTr>0) ? 0 : (outBAMoneAlignNbytes[0]+outBAMoneAlignNbytes[1])*2*nTrOut);
                            };
                        };
                    };

                    if (P->outBAMcoord)
                    {//coordinate sorted
                        for (uint imate=0; imate<P->readNmates; imate++)
                        {//output each mate
                            outBAMcoord->coordOneAlign(outBAMoneAlign[imate], outBAMoneAlignNbytes[imate], (iReadAll<<32) | (iTr<<8) | trMult[iTr]->exons[0][EX_iFrag] );
                        };
                    };

                };
            };

            if (P->readNmates>1 && !(mateMapped[0] && mateMapped[1]) )
            {
                unmapType=4;
            };

            if (unmapType==4 && P->outSAMunmapped.yes)
            {//output unmapped end for single-end alignments
                if (P->outSAMbool && !P->outSAMunmapped.keepPairs && outSAMfilterYes)
                {
                    outBAMbytes+= outputTranscriptSAM(*trBest, 0, 0, (uint) -1, (uint) -1, 0, unmapType, mateMapped, outSAMstream);
                };

                if ( (P->outBAMcoord || (P->outBAMunsorted && !P->outSAMunmapped.keepPairs) ) && outSAMfilterYes)
                {//BAM output
                    alignBAM(*trBest, 0, 0, P->chrStart[trBest->Chr], (uint) -1, (uint) -1, 0, unmapType, mateMapped, P->outSAMattrOrder, outBAMoneAlign, outBAMoneAlignNbytes);
                    for (uint imate=0; imate<P->readNmates; imate++)
                    {//alignBAM output is empty for mapped mate, but still need to scan through it
                        if (P->outBAMunsorted && !P->outSAMunmapped.keepPairs)
                        {
                            outBAMunsorted->unsortedOneAlign(outBAMoneAlign[imate], outBAMoneAlignNbytes[imate], imate>0 ? 0 : outBAMoneAlignNbytes[0]+outBAMoneAlignNbytes[1]);
                        };
                        if (P->outBAMcoord)
                        {//KeepPairs option does not affect for sorted BAM since we do not want multiple entries for the same unmapped read
                            outBAMcoord->coordOneAlign(outBAMoneAlign[imate], outBAMoneAlignNbytes[imate], iReadAll);
                        };
                    };
                };
            };

            if (P->outSJfilterReads=="All" || nTr==1)
            {
                uint sjReadStartN=chunkOutSJ->N;
                for (uint iTr=0;iTr<nTr;iTr++)
                {//write all transcripts
                    outputTranscriptSJ (*(trMult[iTr]), nTr, chunkOutSJ, sjReadStartN);
                };
            };

            if ( P->quant.geCount.yes )
            {
                chunkTr->geneCountsAddAlign(nTr, trMult);
            };

            if ( P->quant.trSAM.yes )
            {//NOTE: the transcripts are changed by this function (soft-clipping extended), cannot be reused
                quantTranscriptome(chunkTr, nTr, trMult,  alignTrAll);
            };
        };
    };

    if (unmapType>=0)
    {
        statsRA.unmappedAll++;
    };

    if ( P->outSAMunmapped.within && unmapType>=0 && unmapType<4 ) {//output unmapped within && unmapped read && both mates unmapped
        if (P->outBAMcoord || P->outBAMunsorted || P->quant.trSAM.yes)
        {//BAM output
            alignBAM(*trBest, 0, 0, P->chrStart[trBest->Chr], (uint) -1, (uint) -1, 0, unmapType, mateMapped, P->outSAMattrOrder, outBAMoneAlign, outBAMoneAlignNbytes);
            for (uint imate=0; imate<P->readNmates; imate++)
            {//output each mate
                if (P->outBAMunsorted)
                {
                    outBAMunsorted->unsortedOneAlign(outBAMoneAlign[imate], outBAMoneAlignNbytes[imate], imate>0 ? 0 : outBAMoneAlignNbytes[0]+outBAMoneAlignNbytes[1]);
                };
                if (P->quant.trSAM.yes)
                {
                    outBAMquant->unsortedOneAlign(outBAMoneAlign[imate], outBAMoneAlignNbytes[imate], imate>0 ? 0 : outBAMoneAlignNbytes[0]+outBAMoneAlignNbytes[1]);
                };
                if (P->outBAMcoord)
                {
                    outBAMcoord->coordOneAlign(outBAMoneAlign[imate], outBAMoneAlignNbytes[imate], iReadAll);
                };
            };
        };

        if (P->outSAMbool)
        {//output SAM
            outBAMbytes+= outputTranscriptSAM(*trBest, 0, 0, (uint) -1, (uint) -1, 0, unmapType, mateMapped, outSAMstream);
        };
    };
    if (unmapType>=0 && P->outReadsUnmapped=="Fastx" ){//output to fasta/q files
           for (uint im=0;im<P->readNmates;im++) {
               chunkOutUnmappedReadsStream[im] << readNameMates[im];
               if (P->readNmates>1) chunkOutUnmappedReadsStream[im] <<"\t"<< int(mateMapped[0]) <<  int(mateMapped[1]);
               chunkOutUnmappedReadsStream[im] <<"\n";
               chunkOutUnmappedReadsStream[im] << Read0[im] <<"\n";
                if (readFileType==2) {//fastq
                    chunkOutUnmappedReadsStream[im] << "+\n";
                    chunkOutUnmappedReadsStream[im] << Qual0[im] <<"\n";
                };
           };
    };
};