void driver(const vector<FILE*>& splice_coords_files, const vector<FILE*>& insertion_coords_files, const vector<FILE*>& deletion_coords_files, ifstream& ref_stream) { char splice_buf[2048]; RefSequenceTable rt(true); JunctionSet junctions; for (size_t i = 0; i < splice_coords_files.size(); ++i) { FILE* splice_coords = splice_coords_files[i]; if (!splice_coords) continue; while (fgets(splice_buf, 2048, splice_coords)) { char* nl = strrchr(splice_buf, '\n'); char* buf = splice_buf; if (nl) *nl = 0; /** Fields are: 1) reference name 2) left coord of splice (last char of the left exon) 3) right coord of splice (first char of the right exon) */ char* ref_name = get_token((char**)&buf, "\t"); char* scan_left_coord = get_token((char**)&buf, "\t"); char* scan_right_coord = get_token((char**)&buf, "\t"); char* orientation = get_token((char**)&buf, "\t"); if (!scan_left_coord || !scan_right_coord || !orientation) { fprintf(stderr,"Error: malformed splice coordinate record\n"); exit(1); } uint32_t ref_id = rt.get_id(ref_name, NULL, 0); uint32_t left_coord = atoi(scan_left_coord); uint32_t right_coord = atoi(scan_right_coord); bool antisense = *orientation == '-'; junctions.insert(make_pair<Junction, JunctionStats>(Junction(ref_id, left_coord, right_coord, antisense), JunctionStats())); } } /* * Read in the deletion coordinates * and store in a set */ std::set<Deletion> deletions; for(size_t i=0; i < deletion_coords_files.size(); ++i){ FILE* deletion_coords = deletion_coords_files[i]; if(!deletion_coords){ continue; } while (fgets(splice_buf, 2048, deletion_coords)) { char* nl = strrchr(splice_buf, '\n'); char* buf = splice_buf; if (nl) *nl = 0; /** Fields are: 1) reference name 2) left coord of splice (last char of the left exon) 3) right coord of splice (first char of the right exon) */ char* ref_name = get_token((char**)&buf, "\t"); char* scan_left_coord = get_token((char**)&buf, "\t"); char* scan_right_coord = get_token((char**)&buf, "\t"); if (!scan_left_coord || !scan_right_coord) { fprintf(stderr,"Error: malformed deletion coordinate record\n"); exit(1); } /* * Note that when reading in a deletion, the left co-ord is the position of the * first deleted based. Since we are co-opting the junction data structure, need * to fix up this location */ uint32_t ref_id = rt.get_id(ref_name, NULL, 0); uint32_t left_coord = atoi(scan_left_coord); uint32_t right_coord = atoi(scan_right_coord); deletions.insert(Deletion(ref_id, left_coord - 1, right_coord, false)); } } /* * Read in the insertion coordinates * and store in a set */ std::set<Insertion> insertions; for(size_t i=0; i < insertion_coords_files.size(); ++i){ FILE* insertion_coords = insertion_coords_files[i]; if(!insertion_coords){ continue; } while(fgets(splice_buf, 2048, insertion_coords)){ char* nl = strrchr(splice_buf, '\n'); char* buf = splice_buf; if (nl) *nl = 0; char* ref_name = get_token((char**)&buf, "\t"); char* scan_left_coord = get_token((char**)&buf, "\t"); char* scan_right_coord = get_token((char**)&buf, "\t"); char* scan_sequence = get_token((char**)&buf, "\t"); if (!scan_left_coord || !scan_sequence || !scan_right_coord) { fprintf(stderr,"Error: malformed insertion coordinate record\n"); exit(1); } seqan::Dna5String sequence = seqan::Dna5String(scan_sequence); bool containsN = false; for(size_t index = 0; index < seqan::length(sequence); index += 1){ /* * Don't allow any ambiguities in the insertion */ if(sequence[index] == 'N'){ containsN = true; break; } } if(containsN){ continue; } seqan::CharString charSequence = sequence; uint32_t ref_id = rt.get_id(ref_name,NULL,0); uint32_t left_coord = atoi(scan_left_coord); insertions.insert(Insertion(ref_id, left_coord, seqan::toCString(charSequence))); } } typedef RefSequenceTable::Sequence Reference; while(ref_stream.good() && !ref_stream.eof()) { Reference ref_str; string name; readMeta(ref_stream, name, Fasta()); string::size_type space_pos = name.find_first_of(" \t\r"); if (space_pos != string::npos) { name.resize(space_pos); } read(ref_stream, ref_str, Fasta()); uint32_t refid = rt.get_id(name, NULL, 0); Junction dummy_left(refid, 0, 0, true); Junction dummy_right(refid, VMAXINT32, VMAXINT32, true); pair<JunctionSet::iterator, JunctionSet::iterator> r; r.first = junctions.lower_bound(dummy_left); r.second = junctions.upper_bound(dummy_right); JunctionSet::iterator itr = r.first; while(itr != r.second && itr != junctions.end()) { print_splice(itr->first, read_length, itr->first.antisense ? "GTAG|rev" : "GTAG|fwd", ref_str, name, cout); ++itr; } } ref_stream.clear(); ref_stream.seekg(0, ios::beg); while(ref_stream.good() && !ref_stream.eof()) { Reference ref_str; string name; readMeta(ref_stream, name, Fasta()); string::size_type space_pos = name.find_first_of(" \t\r"); if (space_pos != string::npos) { name.resize(space_pos); } read(ref_stream, ref_str, Fasta()); uint32_t refid = rt.get_id(name, NULL,0); Deletion dummy_left(refid, 0, 0, true); Deletion dummy_right(refid, VMAXINT32, VMAXINT32, true); pair<std::set<Deletion>::iterator, std::set<Deletion>::iterator> r; r.first = deletions.lower_bound(dummy_left); r.second = deletions.upper_bound(dummy_right); std::set<Deletion>::iterator itr = r.first; while(itr != r.second && itr != deletions.end()) { print_splice((Junction)*itr, read_length, itr->antisense ? "del|rev" : "del|fwd", ref_str, name, cout); ++itr; } } ref_stream.clear(); ref_stream.seekg(0, ios::beg); while(ref_stream.good() && !ref_stream.eof()) { Reference ref_str; string name; readMeta(ref_stream, name, Fasta()); string::size_type space_pos = name.find_first_of(" \t\r"); if (space_pos != string::npos) { name.resize(space_pos); } read(ref_stream, ref_str, Fasta()); uint32_t refid = rt.get_id(name, NULL,0); Insertion dummy_left(refid, 0, ""); Insertion dummy_right(refid, VMAXINT32, ""); std::set<Insertion>::iterator itr = insertions.lower_bound(dummy_left); std::set<Insertion>::iterator upper = insertions.upper_bound(dummy_right); while(itr != upper && itr != insertions.end()){ print_insertion(*itr, read_length, ref_str, name, cout); ++itr; } } }
void driver(const vector<FILE*>& splice_coords_files, const vector<FILE*>& insertion_coords_files, const vector<FILE*>& deletion_coords_files, const vector<FILE*>& fusion_coords_files, ifstream& ref_stream) { char splice_buf[2048]; RefSequenceTable rt(sam_header, true); get_seqs(ref_stream, rt, true); JunctionSet junctions; for (size_t i = 0; i < splice_coords_files.size(); ++i) { FILE* splice_coords = splice_coords_files[i]; if (!splice_coords) continue; while (fgets(splice_buf, 2048, splice_coords)) { char* nl = strrchr(splice_buf, '\n'); char* buf = splice_buf; if (nl) *nl = 0; /** Fields are: 1) reference name 2) left coord of splice (last char of the left exon) 3) right coord of splice (first char of the right exon) */ char* ref_name = get_token((char**)&buf, "\t"); char* scan_left_coord = get_token((char**)&buf, "\t"); char* scan_right_coord = get_token((char**)&buf, "\t"); char* orientation = get_token((char**)&buf, "\t"); if (!scan_left_coord || !scan_right_coord || !orientation) { fprintf(stderr,"Error: malformed splice coordinate record\n"); exit(1); } uint32_t ref_id = rt.get_id(ref_name, NULL, 0); uint32_t left_coord = atoi(scan_left_coord); uint32_t right_coord = atoi(scan_right_coord); bool antisense = *orientation == '-'; junctions.insert(make_pair<Junction, JunctionStats>(Junction(ref_id, left_coord, right_coord, antisense), JunctionStats())); } } /* * Read in the deletion coordinates * and store in a set */ std::set<Deletion> deletions; for(size_t i=0; i < deletion_coords_files.size(); ++i){ FILE* deletion_coords = deletion_coords_files[i]; if(!deletion_coords){ continue; } while (fgets(splice_buf, 2048, deletion_coords)) { char* nl = strrchr(splice_buf, '\n'); char* buf = splice_buf; if (nl) *nl = 0; /** Fields are: 1) reference name 2) left coord of splice (last char of the left exon) 3) right coord of splice (first char of the right exon) */ char* ref_name = get_token((char**)&buf, "\t"); char* scan_left_coord = get_token((char**)&buf, "\t"); char* scan_right_coord = get_token((char**)&buf, "\t"); if (!scan_left_coord || !scan_right_coord) { fprintf(stderr,"Error: malformed deletion coordinate record\n"); exit(1); } /* * Note that when reading in a deletion, the left co-ord is the position of the * first deleted based. Since we are co-opting the junction data structure, need * to fix up this location */ uint32_t ref_id = rt.get_id(ref_name, NULL, 0); uint32_t left_coord = atoi(scan_left_coord); uint32_t right_coord = atoi(scan_right_coord); deletions.insert(Deletion(ref_id, left_coord - 1, right_coord, false)); } } /* * Read in the insertion coordinates * and store in a set */ std::set<Insertion> insertions; for(size_t i=0; i < insertion_coords_files.size(); ++i){ FILE* insertion_coords = insertion_coords_files[i]; if(!insertion_coords){ continue; } while(fgets(splice_buf, 2048, insertion_coords)){ char* nl = strrchr(splice_buf, '\n'); char* buf = splice_buf; if (nl) *nl = 0; char* ref_name = get_token((char**)&buf, "\t"); char* scan_left_coord = get_token((char**)&buf, "\t"); char* scan_right_coord = get_token((char**)&buf, "\t"); char* scan_sequence = get_token((char**)&buf, "\t"); if (!scan_left_coord || !scan_sequence || !scan_right_coord) { fprintf(stderr,"Error: malformed insertion coordinate record\n"); exit(1); } seqan::Dna5String sequence = seqan::Dna5String(scan_sequence); bool containsN = false; for(size_t index = 0; index < seqan::length(sequence); index += 1){ /* * Don't allow any ambiguities in the insertion */ if(sequence[index] == 'N'){ containsN = true; break; } } if(containsN){ continue; } seqan::CharString charSequence = sequence; uint32_t ref_id = rt.get_id(ref_name,NULL,0); uint32_t left_coord = atoi(scan_left_coord); insertions.insert(Insertion(ref_id, left_coord, seqan::toCString(charSequence))); } } std::set<Fusion> fusions; for(size_t i=0; i < fusion_coords_files.size(); ++i){ FILE* fusion_coords = fusion_coords_files[i]; if(!fusion_coords){ continue; } while(fgets(splice_buf, 2048, fusion_coords)){ char* nl = strrchr(splice_buf, '\n'); char* buf = splice_buf; if (nl) *nl = 0; char* ref_name1 = strsep((char**)&buf, "\t"); char* scan_left_coord = strsep((char**)&buf, "\t"); char* ref_name2 = strsep((char**)&buf, "\t"); char* scan_right_coord = strsep((char**)&buf, "\t"); char* scan_dir = strsep((char**)&buf, "\t"); if (!ref_name1 || !scan_left_coord || !ref_name2 || !scan_right_coord || !scan_dir) { fprintf(stderr,"Error: malformed insertion coordinate record\n"); exit(1); } uint32_t ref_id1 = rt.get_id(ref_name1, NULL, 0); uint32_t ref_id2 = rt.get_id(ref_name2, NULL, 0); uint32_t left_coord = atoi(scan_left_coord); uint32_t right_coord = atoi(scan_right_coord); uint32_t dir = FUSION_FF; if (strcmp(scan_dir, "fr") == 0) dir = FUSION_FR; else if(strcmp(scan_dir, "rf") == 0) dir = FUSION_RF; else if(strcmp(scan_dir, "rr") == 0) dir = FUSION_RR; fusions.insert(Fusion(ref_id1, ref_id2, left_coord, right_coord, dir)); } } { JunctionSet::iterator itr = junctions.begin(); for (; itr != junctions.end(); ++itr) { RefSequenceTable::Sequence* ref_str = rt.get_seq(itr->first.refid); if (ref_str == NULL) continue; const char* name = rt.get_name(itr->first.refid); print_splice(itr->first, read_length, itr->first.antisense ? "GTAG|rev" : "GTAG|fwd", *ref_str, name, cout); } } { std::set<Deletion>::iterator itr = deletions.begin(); for (; itr != deletions.end(); ++itr) { RefSequenceTable::Sequence* ref_str = rt.get_seq(itr->refid); if (ref_str == NULL) continue; const char* name = rt.get_name(itr->refid); print_splice((Junction)*itr, read_length, itr->antisense ? "del|rev" : "del|fwd", *ref_str, name, cout); } } { std::set<Insertion>::iterator itr = insertions.begin(); for (; itr != insertions.end(); ++itr){ RefSequenceTable::Sequence* ref_str = rt.get_seq(itr->refid); if (ref_str == NULL) continue; const char* name = rt.get_name(itr->refid); print_insertion(*itr, read_length, *ref_str, name, cout); } } { std::set<Fusion>::iterator itr = fusions.begin(); for (; itr != fusions.end(); ++itr){ RefSequenceTable::Sequence* left_ref_str = rt.get_seq(itr->refid1); RefSequenceTable::Sequence* right_ref_str = rt.get_seq(itr->refid2); if (left_ref_str == NULL || right_ref_str == NULL) continue; const char* left_ref_name = rt.get_name(itr->refid1); const char* right_ref_name = rt.get_name(itr->refid2); print_fusion(*itr, read_length, *left_ref_str, *right_ref_str, left_ref_name, right_ref_name, cout); } } }