Esempio n. 1
0
void parseOutRG(SamFileHeader& header, std::string& noRgPgString, SamFileHeader* newHeader)
{
    noRgPgString.clear();
    // strings for comparing if two RGs with same ID are the same.
    static std::string prevString = "";
    static std::string newString = "";

    SamHeaderRecord* rec = header.getNextHeaderRecord();
    while(rec != NULL)
    {
        if(rec->getType() == SamHeaderRecord::RG)
        {
            if(newHeader != NULL)
            {
                // This is an RG line.
                // First check if this RG is already included in the new header.
                SamHeaderRG* prevRG = newHeader->getRG(rec->getTagValue("ID"));
                
                if(prevRG != NULL)
                {
                    // This RG already exists, check that they are the same.
                    // If they are the same, there is nothing to do.
                    bool status = true;
                    prevString.clear();
                    newString.clear();
                    status &= prevRG->appendString(prevString);
                    status &= rec->appendString(newString);
                    if(prevString != newString)
                    {
                        // They are not identical, so report an error.
                        Logger::gLogger->error("Failed to add readgroup to header, "
                                               "duplicate, but non-identical RG ID, %s",
                                               rec->getTagValue("ID"));
                    }
                }
                else
                {
                    // This RG does not exist yet, so add it to the new header.
                    if(!newHeader->addRecordCopy((SamHeaderRG&)(*rec)))
                    {
                        // Failed to add the RG, exit.
                        Logger::gLogger->error("Failed to add readgroup to header, %s",
                                               newHeader->getErrorMessage());
                    }
                }
            }
        }
        else if(rec->getType() == SamHeaderRecord::PG)
        {
            if(newHeader != NULL)
            {
                // This is a PG line.
                // First check if this PG is already included in the new header.
                SamHeaderPG* prevPG = newHeader->getPG(rec->getTagValue("ID"));
                
                if(prevPG != NULL)
                {
                    // This PG already exists, check if they are the same.
                    // If they are the same, there is nothing to do.
                    bool status = true;
                    prevString.clear();
                    newString.clear();
                    status &= prevPG->appendString(prevString);
                    status &= rec->appendString(newString);
                    if(prevString != newString)
                    {
                        // They are not identical, ignore for now.
                        // TODO: change the ID, and add it.
                        Logger::gLogger->warning("Warning: dropping duplicate, "
                                                 "but non-identical PG ID, %s",
                                                 rec->getTagValue("ID"));
                    }
                }
                else
                {
                    // This PG does not exist yet, so add it to the new header.
                    if(!newHeader->addRecordCopy((SamHeaderPG&)(*rec)))
                    {
                        // Failed to add the PG, exit.
                        Logger::gLogger->error("Failed to add PG to header, %s",
                                               newHeader->getErrorMessage());
                    }
                }
            }
        }
        else
        {
            rec->appendString(noRgPgString);
        }
        rec = header.getNextHeaderRecord();
    }

    // Append the comments.
    header.appendCommentLines(noRgPgString);
}
Esempio n. 2
0
void Bam2FastQ::writeFastQ(SamRecord& samRec, IFILE filePtr,
                           const std::string& fileNameExt, const char* readNameExt)
{
    static int16_t flag;
    static std::string sequence;
    static String quality;
    static std::string rg;
    static std::string rgFastqExt;
    static std::string rgListStr;
    static std::string fileName;
    static std::string fq2;
    if(mySplitRG)
    {
        rg = samRec.getString("RG").c_str();
        rgFastqExt = rg + fileNameExt;

        OutFastqMap::iterator it;
        it = myOutFastqs.find(rgFastqExt);
        if(it == myOutFastqs.end())
        {
            // New file.
            fileName = myOutBase.c_str();
            if(rg != "")
            {
                fileName += '.';
            }
            else
            {
                rg = ".";
            }
            fileName += rgFastqExt;
            filePtr = ifopen(fileName.c_str(), "w", myCompression);
            myOutFastqs[rgFastqExt] = filePtr;

            if(fileNameExt != mySecondFileNameExt)
            {
                // first end.
                const char* sm = mySamHeader.getRGTagValue("SM", rg.c_str());
                if(strcmp(sm, "") == 0){sm = myOutBase.c_str();}

                rgListStr.clear();
                SamHeaderRG* rgPtr = mySamHeader.getRG(rg.c_str());
                if((rgPtr == NULL) || (!rgPtr->appendString(rgListStr)))
                {
                    // No RG info for this record.
                    rgListStr = ".\n";
                }
                fq2 = ".";
                if(fileNameExt == myFirstFileNameExt)
                {
                    fq2 = myOutBase.c_str();
                    if(rg != ".")
                    {
                        fq2 += '.';
                        fq2 += rg;
                    }
                    fq2 += mySecondFileNameExt;
                }
                ifprintf(myFqList, "%s\t%s\t%s\t%s",
                         sm, fileName.c_str(), fq2.c_str(),
                         rgListStr.c_str());
            }
        }
        else
        {
            filePtr = it->second;
        }
    }
    if(filePtr == NULL)
    {
        throw(std::runtime_error("Programming ERROR/EXITING: Bam2FastQ filePtr not set."));
        return;
    }

    flag = samRec.getFlag();
    const char* readName = samRec.getReadName();
    sequence = samRec.getSequence();
    if(myQField.IsEmpty())
    {
        // Read the quality from the quality field
        quality = samRec.getQuality();
    }
    else
    {
        // Read Quality from the specified tag
        const String* qTagPtr = samRec.getStringTag(myQField.c_str());
        if((qTagPtr != NULL) && (qTagPtr->Length() == (int)sequence.length()))
        {
            // Use the tag value for quality
            quality = qTagPtr->c_str();
        }
        else
        {
            // Tag was not found, so use the quality field.
            ++myNumQualTagErrors;
            if(myNumQualTagErrors == 1)
            {
                std::cerr << "Bam2FastQ: " << myQField.c_str() 
                          << " tag was not found/invalid, so using the quality field in records without the tag\n";
            }
            quality = samRec.getQuality();
        }
    }
    
    if(SamFlag::isReverse(flag) && myReverseComp)
    {
        // It is reverse, so reverse compliment the sequence
        BaseUtilities::reverseComplement(sequence);
        // Reverse the quality.
        quality.Reverse();
    }
    else
    {
        // Ensure it is all capitalized.
        int seqLen = sequence.size();
        for (int i = 0; i < seqLen; i++)
        {
            sequence[i] = (char)toupper(sequence[i]);
        }
    }
    
    if(myRNPlus)
    {

        ifprintf(filePtr, "@%s%s\n%s\n+%s%s\n%s\n", readName, readNameExt,
                 sequence.c_str(), readName, readNameExt, quality.c_str());
    }
    else
    {
        ifprintf(filePtr, "@%s%s\n%s\n+\n%s\n", readName, readNameExt,
                 sequence.c_str(), quality.c_str());
    }
    // Release the record.
    myPool.releaseRecord(&samRec);
}
Esempio n. 3
0
void parseOutRG(SamFileHeader& header, std::string& noRgPgString, SamFileHeader* newHeader, bool ignorePI)
{
    noRgPgString.clear();
    // strings for comparing if two RGs with same ID are the same.
    static std::string prevString = "";
    static std::string newString = "";

    SamHeaderRecord* rec = header.getNextHeaderRecord();
    while(rec != NULL)
    {
        if(rec->getType() == SamHeaderRecord::RG)
        {
            if(newHeader != NULL)
            {
                // This is an RG line.
                // First check if this RG is already included in the new header.
                SamHeaderRG* prevRG = newHeader->getRG(rec->getTagValue("ID"));
                
                if(prevRG != NULL)
                {
                    // This RG already exists, check that they are the same.
                    // If they are the same, there is nothing to do.
                    bool status = true;
                    prevString.clear();
                    newString.clear();
                    status &= prevRG->appendString(prevString);
                    status &= rec->appendString(newString);

                    if(prevString != newString)
                    {
                        if(!ignorePI)
                        {
                            Logger::gLogger->error("Failed to add readgroup to "
                                                   "header, duplicate, but "
                                                   "non-identical RG ID, %s\n"
                                                   "prev:\t(%s)\nnew:\t(%s)",
                                                   rec->getTagValue("ID"),
                                                   prevString.c_str(),
                                                   newString.c_str());
                        }
                        else
                        {
                            // Check for a PI string.
                            size_t prevPIStart = prevString.find("PI:");
                            size_t newPIStart = newString.find("PI:");

                            // If they are both npos, then PI was not found
                            // so fail.
                            if((prevPIStart == std::string::npos) &&
                               (newPIStart == std::string::npos))
                            {
                                // They are not identical, so report an error.
                                Logger::gLogger->error("Failed to add readgroup"
                                                       " to header, duplicate,"
                                                       " but non-identical RG"
                                                       " ID, %s\n"
                                                       "prev:\t(%s)\nnew:\t(%s)",
                                                       rec->getTagValue("ID"),
                                                       prevString.c_str(),
                                                       newString.c_str());
                            }
                            else
                            {
                                // PI found in one or both strings.
                                size_t prevPIEnd;
                                size_t newPIEnd;
                                if(prevPIStart == std::string::npos)
                                {
                                    // new string has PI, so compare to the start of that.
                                    prevPIStart = newPIStart;
                                    prevPIEnd = newPIStart;
                                }
                                else
                                {
                                    prevPIEnd = prevString.find('\t', prevPIStart) + 1;
                                }
                                if(newPIStart == std::string::npos)
                                {
                                    // new string has PI, so compare to the start of that.
                                    newPIStart = prevPIStart;
                                    newPIEnd = newPIStart;
                                }
                                else
                                {
                                    newPIEnd = newString.find('\t', newPIStart) + 1;
                                }
                                // Compare before PI.
                                if((newString.compare(0, newPIStart, prevString, 0, prevPIStart) != 0) ||
                                   (newString.compare(newPIEnd, std::string::npos, prevString,
                                                      prevPIEnd, std::string::npos) != 0))
                                {
                                    // They are not identical, so report an error.
                                    Logger::gLogger->error("Failed to add readgroup to header, "
                                                           "duplicate, but non-identical RG ID, %s, "
                                                           "even when ignoring PI\n"
                                                           "prev:\t(%s)\nnew:\t(%s)",
                                                           rec->getTagValue("ID"),
                                                           prevString.c_str(),
                                                           newString.c_str());
                                }
                                else
                                {
                                    Logger::gLogger->warning("Warning: ignoring non-identical PI field "
                                                             "for RG ID, %s",
                                                             rec->getTagValue("ID"));
                                }
                            }
                        }
                    }
                }
                else
                {
                    // This RG does not exist yet, so add it to the new header.
                    if(!newHeader->addRecordCopy((SamHeaderRG&)(*rec)))
                    {
                        // Failed to add the RG, exit.
                        Logger::gLogger->error("Failed to add readgroup to header, %s",
                                               newHeader->getErrorMessage());
                    }
                }
            }
        }
        else if(rec->getType() == SamHeaderRecord::PG)
        {
            if(newHeader != NULL)
            {
                // This is a PG line.
                // First check if this PG is already included in the new header.
                SamHeaderPG* prevPG = newHeader->getPG(rec->getTagValue("ID"));
                
                if(prevPG != NULL)
                {
                    // This PG already exists, check if they are the same.
                    // If they are the same, there is nothing to do.
                    bool status = true;
                    prevString.clear();
                    newString.clear();
                    status &= prevPG->appendString(prevString);
                    status &= rec->appendString(newString);
                    if(prevString != newString)
                    {
                        // They are not identical, ignore for now.
                        // TODO: change the ID, and add it.
                        Logger::gLogger->warning("Warning: dropping duplicate, "
                                                 "but non-identical PG ID, %s",
                                                 rec->getTagValue("ID"));
                    }
                }
                else
                {
                    // This PG does not exist yet, so add it to the new header.
                    if(!newHeader->addRecordCopy((SamHeaderPG&)(*rec)))
                    {
                        // Failed to add the PG, exit.
                        Logger::gLogger->error("Failed to add PG to header, %s",
                                               newHeader->getErrorMessage());
                    }
                }
            }
        }
        else
        {
            rec->appendString(noRgPgString);
        }
        rec = header.getNextHeaderRecord();
    }

    // Append the comments.
    header.appendCommentLines(noRgPgString);
}