コード例 #1
0
ファイル: Dedup_LowMem.cpp プロジェクト: statgen/bamUtil
// build the read group library map
void Dedup_LowMem::buildReadGroupLibraryMap(SamFileHeader& header) {
    rgidLibMap.clear();
    numLibraries = 0;
    std::map<std::string,uint32_t> libNameMap;

    SamHeaderRecord * headerRecord = header.getNextRGRecord();
    while(headerRecord != NULL) {
        std::string ID = headerRecord->getTagValue("ID");
        std::string LB = headerRecord->getTagValue("LB");

        if ( ID.empty() ) {
            std::string headerRecordString;
            headerRecord->appendString(headerRecordString);
            Logger::gLogger->error("Cannot find readGroup ID information in the header line %s",
                                   headerRecordString.c_str());
        }
        if ( rgidLibMap.find(ID) != rgidLibMap.end() ) {
            Logger::gLogger->error("The readGroup ID %s is not a unique identifier",ID.c_str());
        }

        if ( LB.empty() ) {
            std::string headerRecordString;
            headerRecord->appendString(headerRecordString);
            Logger::gLogger->warning("Cannot find library information in the header line %s. Using empty string for library name",
                                     headerRecordString.c_str());
        }

        if ( libNameMap.find( LB ) != libNameMap.end() ) {
            rgidLibMap[ID] = libNameMap[LB];
        }
        else {
            numLibraries = libNameMap.size()+1;
            libNameMap[LB] = numLibraries;
            rgidLibMap[ID] = numLibraries;
        }
        headerRecord = header.getNextRGRecord();
    }

    if (numLibraries > 0xff) {
        Logger::gLogger->error("More than 255 library names are identified. Dedup_LowMem currently only allows up to 255 library names");
    }
}
コード例 #2
0
ファイル: MergeBam.cpp プロジェクト: ascendo/bamUtil
void parseOutRG(SamFileHeader& header, std::string& noRgPgString, SamFileHeader* newHeader)
{
    noRgPgString.clear();
    // strings for comparing if two RGs with same ID are the same.
    static std::string prevString = "";
    static std::string newString = "";

    SamHeaderRecord* rec = header.getNextHeaderRecord();
    while(rec != NULL)
    {
        if(rec->getType() == SamHeaderRecord::RG)
        {
            if(newHeader != NULL)
            {
                // This is an RG line.
                // First check if this RG is already included in the new header.
                SamHeaderRG* prevRG = newHeader->getRG(rec->getTagValue("ID"));
                
                if(prevRG != NULL)
                {
                    // This RG already exists, check that they are the same.
                    // If they are the same, there is nothing to do.
                    bool status = true;
                    prevString.clear();
                    newString.clear();
                    status &= prevRG->appendString(prevString);
                    status &= rec->appendString(newString);
                    if(prevString != newString)
                    {
                        // They are not identical, so report an error.
                        Logger::gLogger->error("Failed to add readgroup to header, "
                                               "duplicate, but non-identical RG ID, %s",
                                               rec->getTagValue("ID"));
                    }
                }
                else
                {
                    // This RG does not exist yet, so add it to the new header.
                    if(!newHeader->addRecordCopy((SamHeaderRG&)(*rec)))
                    {
                        // Failed to add the RG, exit.
                        Logger::gLogger->error("Failed to add readgroup to header, %s",
                                               newHeader->getErrorMessage());
                    }
                }
            }
        }
        else if(rec->getType() == SamHeaderRecord::PG)
        {
            if(newHeader != NULL)
            {
                // This is a PG line.
                // First check if this PG is already included in the new header.
                SamHeaderPG* prevPG = newHeader->getPG(rec->getTagValue("ID"));
                
                if(prevPG != NULL)
                {
                    // This PG already exists, check if they are the same.
                    // If they are the same, there is nothing to do.
                    bool status = true;
                    prevString.clear();
                    newString.clear();
                    status &= prevPG->appendString(prevString);
                    status &= rec->appendString(newString);
                    if(prevString != newString)
                    {
                        // They are not identical, ignore for now.
                        // TODO: change the ID, and add it.
                        Logger::gLogger->warning("Warning: dropping duplicate, "
                                                 "but non-identical PG ID, %s",
                                                 rec->getTagValue("ID"));
                    }
                }
                else
                {
                    // This PG does not exist yet, so add it to the new header.
                    if(!newHeader->addRecordCopy((SamHeaderPG&)(*rec)))
                    {
                        // Failed to add the PG, exit.
                        Logger::gLogger->error("Failed to add PG to header, %s",
                                               newHeader->getErrorMessage());
                    }
                }
            }
        }
        else
        {
            rec->appendString(noRgPgString);
        }
        rec = header.getNextHeaderRecord();
    }

    // Append the comments.
    header.appendCommentLines(noRgPgString);
}
コード例 #3
0
ファイル: MergeBam.cpp プロジェクト: rtchen/gotcloud
void parseOutRG(SamFileHeader& header, std::string& noRgPgString, SamFileHeader* newHeader, bool ignorePI)
{
    noRgPgString.clear();
    // strings for comparing if two RGs with same ID are the same.
    static std::string prevString = "";
    static std::string newString = "";

    SamHeaderRecord* rec = header.getNextHeaderRecord();
    while(rec != NULL)
    {
        if(rec->getType() == SamHeaderRecord::RG)
        {
            if(newHeader != NULL)
            {
                // This is an RG line.
                // First check if this RG is already included in the new header.
                SamHeaderRG* prevRG = newHeader->getRG(rec->getTagValue("ID"));
                
                if(prevRG != NULL)
                {
                    // This RG already exists, check that they are the same.
                    // If they are the same, there is nothing to do.
                    bool status = true;
                    prevString.clear();
                    newString.clear();
                    status &= prevRG->appendString(prevString);
                    status &= rec->appendString(newString);

                    if(prevString != newString)
                    {
                        if(!ignorePI)
                        {
                            Logger::gLogger->error("Failed to add readgroup to "
                                                   "header, duplicate, but "
                                                   "non-identical RG ID, %s\n"
                                                   "prev:\t(%s)\nnew:\t(%s)",
                                                   rec->getTagValue("ID"),
                                                   prevString.c_str(),
                                                   newString.c_str());
                        }
                        else
                        {
                            // Check for a PI string.
                            size_t prevPIStart = prevString.find("PI:");
                            size_t newPIStart = newString.find("PI:");

                            // If they are both npos, then PI was not found
                            // so fail.
                            if((prevPIStart == std::string::npos) &&
                               (newPIStart == std::string::npos))
                            {
                                // They are not identical, so report an error.
                                Logger::gLogger->error("Failed to add readgroup"
                                                       " to header, duplicate,"
                                                       " but non-identical RG"
                                                       " ID, %s\n"
                                                       "prev:\t(%s)\nnew:\t(%s)",
                                                       rec->getTagValue("ID"),
                                                       prevString.c_str(),
                                                       newString.c_str());
                            }
                            else
                            {
                                // PI found in one or both strings.
                                size_t prevPIEnd;
                                size_t newPIEnd;
                                if(prevPIStart == std::string::npos)
                                {
                                    // new string has PI, so compare to the start of that.
                                    prevPIStart = newPIStart;
                                    prevPIEnd = newPIStart;
                                }
                                else
                                {
                                    prevPIEnd = prevString.find('\t', prevPIStart) + 1;
                                }
                                if(newPIStart == std::string::npos)
                                {
                                    // new string has PI, so compare to the start of that.
                                    newPIStart = prevPIStart;
                                    newPIEnd = newPIStart;
                                }
                                else
                                {
                                    newPIEnd = newString.find('\t', newPIStart) + 1;
                                }
                                // Compare before PI.
                                if((newString.compare(0, newPIStart, prevString, 0, prevPIStart) != 0) ||
                                   (newString.compare(newPIEnd, std::string::npos, prevString,
                                                      prevPIEnd, std::string::npos) != 0))
                                {
                                    // They are not identical, so report an error.
                                    Logger::gLogger->error("Failed to add readgroup to header, "
                                                           "duplicate, but non-identical RG ID, %s, "
                                                           "even when ignoring PI\n"
                                                           "prev:\t(%s)\nnew:\t(%s)",
                                                           rec->getTagValue("ID"),
                                                           prevString.c_str(),
                                                           newString.c_str());
                                }
                                else
                                {
                                    Logger::gLogger->warning("Warning: ignoring non-identical PI field "
                                                             "for RG ID, %s",
                                                             rec->getTagValue("ID"));
                                }
                            }
                        }
                    }
                }
                else
                {
                    // This RG does not exist yet, so add it to the new header.
                    if(!newHeader->addRecordCopy((SamHeaderRG&)(*rec)))
                    {
                        // Failed to add the RG, exit.
                        Logger::gLogger->error("Failed to add readgroup to header, %s",
                                               newHeader->getErrorMessage());
                    }
                }
            }
        }
        else if(rec->getType() == SamHeaderRecord::PG)
        {
            if(newHeader != NULL)
            {
                // This is a PG line.
                // First check if this PG is already included in the new header.
                SamHeaderPG* prevPG = newHeader->getPG(rec->getTagValue("ID"));
                
                if(prevPG != NULL)
                {
                    // This PG already exists, check if they are the same.
                    // If they are the same, there is nothing to do.
                    bool status = true;
                    prevString.clear();
                    newString.clear();
                    status &= prevPG->appendString(prevString);
                    status &= rec->appendString(newString);
                    if(prevString != newString)
                    {
                        // They are not identical, ignore for now.
                        // TODO: change the ID, and add it.
                        Logger::gLogger->warning("Warning: dropping duplicate, "
                                                 "but non-identical PG ID, %s",
                                                 rec->getTagValue("ID"));
                    }
                }
                else
                {
                    // This PG does not exist yet, so add it to the new header.
                    if(!newHeader->addRecordCopy((SamHeaderPG&)(*rec)))
                    {
                        // Failed to add the PG, exit.
                        Logger::gLogger->error("Failed to add PG to header, %s",
                                               newHeader->getErrorMessage());
                    }
                }
            }
        }
        else
        {
            rec->appendString(noRgPgString);
        }
        rec = header.getNextHeaderRecord();
    }

    // Append the comments.
    header.appendCommentLines(noRgPgString);
}