void buildTranscript(int sp, int ep) { int cur_s, cur_e; // current_start, current_end string transcript_id = items[sp].getTranscriptID(); string gene_id = items[sp].getGeneID(); string gene_name = "", transcript_name = ""; char strand = items[sp].getStrand(); string seqname = items[sp].getSeqName(); string left = items[sp].getLeft(); vec.clear(); cur_s = cur_e = -1; for (int i = sp; i <= ep; ++i) { int start = items[i].getStart(); int end = items[i].getEnd(); general_assert(strand == items[i].getStrand(), "According to the GTF file given, a transcript has exons from different orientations!"); general_assert(seqname == items[i].getSeqName(), "According to the GTF file given, a transcript has exons on multiple chromosomes!"); if (items[i].getGeneName() != "") { if (gene_name == "") gene_name = items[i].getGeneName(); else general_assert(gene_name == items[i].getGeneName(), "A transcript is associated with multiple gene names!"); } if (items[i].getTranscriptName() != "") { if (transcript_name == "") transcript_name = items[i].getTranscriptName(); else general_assert(transcript_name == items[i].getTranscriptName(), "A transcript is associated with multiple transcript names!"); } if (cur_e + 1 < start) { if (cur_s > 0) vec.push_back(Interval(cur_s, cur_e)); cur_s = start; } cur_e = (cur_e < end ? end : cur_e); } if (cur_s > 0) vec.push_back(Interval(cur_s, cur_e)); // if (gene_name != "") gene_id += "_" + gene_name; // if (transcript_name != "") transcript_id += "_" + transcript_name; transcripts.add(Transcript(transcript_id, gene_id, seqname, strand, vec, left)); }
bool buildTranscript(int sp, int ep) { int cur_s, cur_e; // current_start, current_end vector<Interval> vec; string transcript_id = items[sp].getTranscriptID(); string gene_id = items[sp].getGeneID(); char strand = items[sp].getStrand(); string seqname = items[sp].getSeqName(); string left = items[sp].getLeft(); vec.clear(); cur_s = cur_e = -1; for (int i = sp; i <= ep; i++) { int start = items[i].getStart(); int end = items[i].getEnd(); if (strand != items[i].getStrand()) { fprintf(stderr, "According to the GTF file given, a transcript has exons from different orientations!\n"); exit(-1); } if (seqname != items[i].getSeqName()) { fprintf(stderr, "According to the GTF file given, a transcript has exons on multiple chromosomes!\n"); exit(-1); } if (cur_e + 1 < start) { if (cur_s > 0) vec.push_back(Interval(cur_s, cur_e)); cur_s = start; } cur_e = (cur_e < end ? end : cur_e); } if (cur_s > 0) vec.push_back(Interval(cur_s, cur_e)); transcripts.add(Transcript(transcript_id, gene_id, seqname, strand, vec, left)); return true; }
int main(int argc, char* argv[]) { if (argc < 5 || ((hasMappingFile = atoi(argv[3])) && argc < 6)) { printf("Usage: synthesisRef refName quiet hasMappingFile<0,no;1,yes;2,allele-specific> [mappingFile] reference_file_1 [reference_file_2 ...]\n"); exit(-1); } verbose = !atoi(argv[2]); if (hasMappingFile) { loadMappingInfo(hasMappingFile, argv[4]); } // allele-specific if (hasMappingFile == 2) { transcripts.setType(2); } int start = hasMappingFile ? 5 : 4; ifstream fin; string line, gseq; string seqname, gene_id, transcript_id; vector<Interval> vec; M = 0; name2seq.clear(); for (int i = start; i < argc; i++) { fin.open(argv[i]); general_assert(fin.is_open(), "Cannot open " + cstrtos(argv[i]) + "! It may not exist."); unsigned long int line_no = 0; //Keep track of file line number getline(fin, line); line_no += 1; while ((fin) && (line[0] == '>')) { istringstream strin(line.substr(1)); strin>>seqname; gseq = ""; while((getline(fin, line)) && (line[0] != '>')) { line_no += 1; gseq += line; } int len = gseq.length(); assert(len > 0); for (int j = 0; j < len; j++) gseq[j] = check(gseq[j],line_no); name2seq[seqname] = gseq; transcript_id = seqname; gene_id = seqname; if (hasMappingFile) { mi_iter = mi_table.find(seqname); general_assert(mi_iter != mi_table.end(), "Mapping Info is not correct, cannot find " + seqname + "'s gene_id!"); gene_id = mi_iter->second; if (hasMappingFile == 2) { mi_iter2 = mi_table2.find(seqname); general_assert(mi_iter2 != mi_table2.end(), "Mapping Info is not correct, cannot find allele " + seqname + "'s transcript_id!"); transcript_id = mi_iter2->second; } } vec.clear(); vec.push_back(Interval(1, len)); transcripts.add(Transcript(transcript_id, gene_id, seqname, '+', vec, "")); ++M; if (verbose && M % 1000000 == 0) { printf("%d sequences are processed!\n", M); } } fin.close(); } if (M < 1) { fprintf(stderr, "Number of transcripts in the reference is less than 1!\n"); exit(-1); } assert(M == transcripts.getM()); transcripts.sort(); writeResults(hasMappingFile, argv[1]); return 0; }