void parse_rg_sample_map(char* hts_header, map<string, string>& rg_sample) { string header(hts_header); vector<string> header_lines = split_delims(header, "\n"); for (auto& line : header_lines) { // get next line from header, skip if empty if ( line.empty() ) { continue; } // lines of the header look like: // "@RG ID:- SM:NA11832 CN:BCM PL:454" // ^^^^^^^\ is our sample name if (line.find("@RG") == 0) { vector<string> rg_parts = split_delims(line, "\t "); string name; string rg_id; for (auto& part : rg_parts) { size_t colpos = part.find(":"); if (colpos != string::npos) { string fieldname = part.substr(0, colpos); if (fieldname == "SM") { name = part.substr(colpos+1); } else if (fieldname == "ID") { rg_id = part.substr(colpos+1); } } } if (name.empty()) { cerr << "[vg::alignment] Error: could not find 'SM' in @RG line " << endl << line << endl; exit(1); } if (rg_id.empty()) { cerr << "[vg::alignment] Error: could not find 'ID' in @RG line " << endl << line << endl; exit(1); } map<string, string>::iterator s = rg_sample.find(rg_id); if (s != rg_sample.end()) { if (s->second != name) { cerr << "[vg::alignment] Error: multiple samples (SM) map to the same read group (RG)" << endl << endl << "samples " << name << " and " << s->second << " map to " << rg_id << endl << endl << "It will not be possible to determine what sample an alignment belongs to" << endl << "at runtime." << endl << endl << "To resolve the issue, ensure that RG ids are unique to one sample" << endl << "across all the input files to freebayes." << endl << endl << "See bamaddrg (https://github.com/ekg/bamaddrg) for a method which can" << endl << "add RG tags to alignments." << endl; exit(1); } } // if it's the same sample name and RG combo, no worries rg_sample[rg_id] = name; } } }
std::vector<std::string> split_delims(const std::string &s, const std::string& delims) { std::vector<std::string> elems; return split_delims(s, delims, elems); }