void GroupFromAnnotation::GetGeneMap(String path) { IFILE genemap; genemap = ifopen(mapFile,"r"); if(genemap==NULL) { if(mapFile=="../data/refFlat_hg19.txt") { mapFile += ".gz"; genemap = ifopen(mapFile,"r"); if(genemap==NULL) { int loc = path.Find("bin"); if(loc!=-1) { mapFile = path.Left(loc-1); mapFile += "/data/refFlat_hg19.txt"; } else { mapFile += "../data/refFlat_hg19.txt"; } genemap = ifopen(mapFile,"r"); } if(genemap==NULL) { mapFile += ".gz"; genemap = ifopen(mapFile,"r"); } if(genemap==NULL) error("Cannot open gene mapping file %s.\n",mapFile.c_str()); } else error("Cannot open gene mapping file %s.\n",mapFile.c_str()); } StringIntHash GeneLocHash; StringArray strand; int gene_idx =0; while(!ifeof(genemap)) { String buffer; buffer.ReadLine(genemap); StringArray record; record.AddTokens(buffer,"\t"); int loc = GeneLocHash.Integer(record[0]); if(loc==-1) { GeneLocHash.SetInteger(record[0],gene_idx); //save chr, start and end positions StringArray gene_chr; if(record[2][2]=='r' || record[2][2]=='R') record[2] = record[2].SubStr(3); gene_chr.AddTokens(record[2],"_,;."); if(gene_chr[0].Find("Un")!=-1) continue; /* if(ChrLocHash.Integer(gene_chr[0])==-1) { chr_count++; unique_chr.Push(gene_chr[0]); ChrLocHash.SetInteger(gene_chr[0],chr_count); } */ chr.Push(gene_chr[0]); //printf("%d\t%s\t%s\n",idx,record[0].c_str(),gene_chr[0].c_str()); start_pos.Push(record[4].AsInteger()); end_pos.Push(record[5].AsInteger()); strand.Push(record[3]); genename.Push(record[0]); gene_idx++; } else { //get the current chr StringArray gene_chr; if(record[2][2]=='r' || record[2][2]=='R') record[2] = record[2].SubStr(3); gene_chr.AddTokens(record[2],"_,;."); if(gene_chr[0].Find("Un")!=-1) continue; //check if strand and chr are consistent with previous record if(chr[loc]!=gene_chr[0]) //if(strand[loc]!=record[3] || chr[loc]!=gene_chr[0]) // printf("Gene %s in %s has multiple records in different chromosome or strand.\n",record[0].c_str(),mapFile.c_str()); continue; //update start and end position if(record[4].AsInteger()<start_pos[loc]) start_pos[loc] = record[4].AsInteger(); if(record[5].AsInteger()>end_pos[loc]) end_pos[loc] = record[5].AsInteger(); } } ifclose(genemap); //ifclose(genemap); chr_idx.Index(chr); String chr_=chr[chr_idx[0]]; for(int i=1;i<chr.Length();i++) { if(chr[chr_idx[i]]!=chr_) { ChrStartHash.SetInteger(chr[chr_idx[i]],i); ChrEndHash.SetInteger(chr_,i-1); chr_ = chr[chr_idx[i]]; } } }