Exemplo n.º 1
0
void SamHeader::parse_text(const char* text) {
    std::istringstream strin(text);
    std::string line, tag;

    std::map<std::string, std::string> dict;
    std::map<std::string, std::string>::iterator dict_iter;


    HDstr = SQstr = RGstr = PGstr = COstr = other = "";
    pids.clear();

    while (getline(strin, line)) {
        if (line[0] != '@') continue;
        tag = line.substr(1, 2);
        if (tag == "HD") {
            general_assert(HDstr == "", "@HD tag can only present once!");
            HDstr = line;
            HDstr += "\n";
        }
        else if (tag == "SQ") {
            SQstr += line;
            SQstr += "\n";
        }
        else if (tag == "RG") {
            RGstr += line;
            RGstr += "\n";
        }
        else if (tag == "PG") {
            dict = parse_line(line);
            dict_iter = dict.find("ID");
            general_assert(dict_iter != dict.end(), "\"" + line + "\" does not contain an ID!" );

            general_assert(pids.find(dict_iter->second) == pids.end(), "Program record identifier " + dict_iter->second + " is not unique!");
            pids.insert(dict_iter->second);

            PGstr += line;
            PGstr += "\n";
        }
        else if (tag == "CO") {
            COstr += line;
            COstr += "\n";
        }
        else {
            other += line;
            line += "\n";
        }
    }
}
Exemplo n.º 2
0
void SamHeader::replaceSQ(const char* faiF) {
    std::ifstream fin(faiF);
    general_assert(fin.is_open(), "Cannot open " + cstrtos(faiF) + "! It may not exist.");

    std::string line;
    size_t pos;

    SQstr = "";
    while (getline(fin, line)) {
        pos = line.find_first_of('\t');
        assert(pos != std::string::npos && pos > 0 && pos + 1 < line.length() && line[pos + 1] != '\t');
        SQstr += "@SQ\tSN:" + line.substr(0, pos) + "\tLN:" + line.substr(pos + 1, line.find_first_of('\t', pos + 1)) + "\n";
    }
    fin.close();
}
Exemplo n.º 3
0
void build_wiggles(const std::string& bam_filename,
                   WiggleProcessor& processor) {
  
    samfile_t *bam_in = samopen(bam_filename.c_str(), "r", NULL);
    general_assert(bam_in != NULL, "Cannot open " + bam_filename + "!");

    bam_hdr_t *header = bam_in->header;
    bool *used = new bool[header->n_targets];
    memset(used, 0, sizeof(bool) * header->n_targets);

    int cur_tid = -1; //current tid;
    HIT_INT_TYPE cnt = 0;
    bam1_t *b = bam_init1();
    Wiggle wiggle;
    while (samread(bam_in, b) >= 0) {
      if (bam_is_unmapped(b)) continue;
      
      if (b->core.tid != cur_tid) {
	if (cur_tid >= 0) { used[cur_tid] = true; processor.process(wiggle); }
	cur_tid = b->core.tid;
	wiggle.name = header->target_name[cur_tid];
	wiggle.length = header->target_len[cur_tid];
	wiggle.read_depth.assign(wiggle.length, 0.0);
      }
      add_bam_record_to_wiggle(b, wiggle);
      ++cnt;
      if (cnt % 1000000 == 0) std::cout<< cnt<< std::endl;
    }
    if (cur_tid >= 0) { used[cur_tid] = true; processor.process(wiggle); }
    
    for (int32_t i = 0; i < header->n_targets; i++)
      if (!used[i]) {
	wiggle.name = header->target_name[i];
	wiggle.length = header->target_len[i];
	wiggle.read_depth.clear();
	processor.process(wiggle);
      }

    bam_destroy1(b);
    samclose(bam_in);

    delete[] used;
}