void SamHeader::parse_text(const char* text) { std::istringstream strin(text); std::string line, tag; std::map<std::string, std::string> dict; std::map<std::string, std::string>::iterator dict_iter; HDstr = SQstr = RGstr = PGstr = COstr = other = ""; pids.clear(); while (getline(strin, line)) { if (line[0] != '@') continue; tag = line.substr(1, 2); if (tag == "HD") { general_assert(HDstr == "", "@HD tag can only present once!"); HDstr = line; HDstr += "\n"; } else if (tag == "SQ") { SQstr += line; SQstr += "\n"; } else if (tag == "RG") { RGstr += line; RGstr += "\n"; } else if (tag == "PG") { dict = parse_line(line); dict_iter = dict.find("ID"); general_assert(dict_iter != dict.end(), "\"" + line + "\" does not contain an ID!" ); general_assert(pids.find(dict_iter->second) == pids.end(), "Program record identifier " + dict_iter->second + " is not unique!"); pids.insert(dict_iter->second); PGstr += line; PGstr += "\n"; } else if (tag == "CO") { COstr += line; COstr += "\n"; } else { other += line; line += "\n"; } } }
void SamHeader::replaceSQ(const char* faiF) { std::ifstream fin(faiF); general_assert(fin.is_open(), "Cannot open " + cstrtos(faiF) + "! It may not exist."); std::string line; size_t pos; SQstr = ""; while (getline(fin, line)) { pos = line.find_first_of('\t'); assert(pos != std::string::npos && pos > 0 && pos + 1 < line.length() && line[pos + 1] != '\t'); SQstr += "@SQ\tSN:" + line.substr(0, pos) + "\tLN:" + line.substr(pos + 1, line.find_first_of('\t', pos + 1)) + "\n"; } fin.close(); }
void build_wiggles(const std::string& bam_filename, WiggleProcessor& processor) { samfile_t *bam_in = samopen(bam_filename.c_str(), "r", NULL); general_assert(bam_in != NULL, "Cannot open " + bam_filename + "!"); bam_hdr_t *header = bam_in->header; bool *used = new bool[header->n_targets]; memset(used, 0, sizeof(bool) * header->n_targets); int cur_tid = -1; //current tid; HIT_INT_TYPE cnt = 0; bam1_t *b = bam_init1(); Wiggle wiggle; while (samread(bam_in, b) >= 0) { if (bam_is_unmapped(b)) continue; if (b->core.tid != cur_tid) { if (cur_tid >= 0) { used[cur_tid] = true; processor.process(wiggle); } cur_tid = b->core.tid; wiggle.name = header->target_name[cur_tid]; wiggle.length = header->target_len[cur_tid]; wiggle.read_depth.assign(wiggle.length, 0.0); } add_bam_record_to_wiggle(b, wiggle); ++cnt; if (cnt % 1000000 == 0) std::cout<< cnt<< std::endl; } if (cur_tid >= 0) { used[cur_tid] = true; processor.process(wiggle); } for (int32_t i = 0; i < header->n_targets; i++) if (!used[i]) { wiggle.name = header->target_name[i]; wiggle.length = header->target_len[i]; wiggle.read_depth.clear(); processor.process(wiggle); } bam_destroy1(b); samclose(bam_in); delete[] used; }