void streaming_sort() { int capacity = 256; int nitems = 0,i=0; int buffer[capacity]; stream_init(); // GET READY FILE*file = open_infile(infile_name); while(!feof(file) && !ferror(file)) { int success = fscanf(file,"%d",buffer + i); if(success != 1) break; nitems++; i++; if(i == capacity) { stream_data( buffer, i); // SOME DATA FOR YOU i = 0; } } if(i) stream_data( buffer, i); // HAVE A BIT MORE DATA stream_end(); // DONE fclose(file); }
RefGenome PileupParserGDA::getRefGenome(string pileupfile) { RefGenome refgenome; ifstream cur_fs_pileupfile; open_infile(cur_fs_pileupfile, pileupfile); while(true) { string cur_line_ins; string cur_line; getline(cur_fs_pileupfile, cur_line_ins); getline(cur_fs_pileupfile, cur_line); if (cur_fs_pileupfile.eof()) break; vector<string> buf_ins = split(cur_line_ins, '\t'); vector<string> buf = split(cur_line, '\t'); if ((buf.size()!=4 && buf.size()!=6) || (buf_ins.size()!=4 && buf_ins.size()!=6)) throw runtime_error ("Error in PileupParserGDA::getRefGenome : incorrect buf size."); if (buf_ins[1][0] != '_' || buf[1][0] == '_') throw runtime_error ("Error in PileupParserGDA::getRefGenome : incorrect pileupfile format."); refgenome[ atoi(buf[0].c_str()) ].push_back(buf[2][0]); } cur_fs_pileupfile.close(); return refgenome; }
int main (int argc, char **argv) { Exec_Name = basename (argv[0]); char *default_dictionary = DEFAULT_DICTNAME; char *user_dictionary = NULL; hashset_ref hashset = new_hashset (); yy_flex_debug = false; // Scan the arguments and set flags. opterr = false; for (;;) { int option = getopt (argc, argv, "nxyd:@:"); if (option == EOF) break; switch (option) { char optopt_string[16]; // used in default: case 'd': user_dictionary = optarg; break; case 'n': default_dictionary = NULL; break; case 'x': option_x(hashset); break; case 'y': yy_flex_debug = true; break; case '@': set_debugflags (optarg); if (strpbrk (optarg, "@y")) yy_flex_debug = true; break; default : sprintf (optopt_string, "-%c", optopt); print_error (optopt_string, "invalid option"); break; } } // Load the dictionaries into the hash table. load_dictionary (default_dictionary, hashset); load_dictionary (user_dictionary, hashset); // Read and do spell checking on each of the files. if (optind >= argc) { yyin = stdin; spellcheck (STDIN_NAME, hashset); }else { int fileix = optind; for (; fileix < argc; ++fileix) { DEBUGF ('m', "argv[%d] = \"%s\"\n", fileix, argv[fileix]); char *filename = argv[fileix]; if (strcmp (filename, STDIN_NAME) == 0) { yyin = stdin; spellcheck (STDIN_NAME, hashset); }else { yyin = open_infile (filename); if (yyin == NULL) continue; spellcheck (filename, hashset); fclose (yyin); } } } yycleanup (); return Exit_Status; }
int main ( int argc, char *argv[] ) { Param params; FlowCom *comPtr = NULL; char line [ LINE_LEN ]; while ( argc > 1 ) { if ( !strcmp( argv[1], "-D" ) ) { setDebugFlag(); } else if ( !strcmp( argv[1], "-V" ) ) { printf ( "%s\n", VERSION ); exit ( 0 ); } else { break; } argv++; argc--; } open_tempfile(); open_infile ( ( argc > 1 ) ? argv[ 1 ] : NULL ); tprintf ( "%% picture environment flowchart generated by flow " ); tprintf ( "%s\n", VERSION ); while ( readline_infile ( line, LINE_LEN ) != NULL ) { if ( ( comPtr = getCommand ( line, params ) ) != NULL ) { switch ( comPtr -> command ) { case MACRO: if ( doMacro ( params ) ) { errout ( E_NO_END_MACRO ); } break; case EXPAND: doExpand ( params ); break; default: if ( !doCommand ( comPtr, params ) ) { return 10; } } } else { break; } } close_infile(); close_tempfile(); apply_tempfile ( getPic(), ( argc > 2 ) ? argv[ 2 ] : NULL ); remove_tempfile(); // dumpFigure(); return 0; /* just to suppress the warning */ }
void load_dictionary (char *dictionary_name, hashset *hashset) { if (dictionary_name == NULL) return; DEBUGF ('m', "dictionary_name = \"%s\", hashset = %p\n", dictionary_name, hashset); FILE *dict = open_infile(dictionary_name); char buffer[1024]; while(fgets(buffer, sizeof buffer, dict) != NULL){ if(buffer[strlen(buffer)-1] == '\n') buffer[strlen(buffer)-1] = '\0'; char* word = strdup(buffer); assert(word != NULL); put_hashset(hashset, word); } fclose(dict); }
void load_dictionary (char *dictionary_name, hashset *hashset) { if (dictionary_name == NULL) return; FILE *dictionary = open_infile(dictionary_name); if (dictionary == NULL) return; for(;;){ char buffer[1024]; char *linepos = fgets(buffer, 1023, dictionary); if (linepos == NULL){ break; } int i = 0; for(; buffer[i] != '\n'; i++){} buffer[i] = '\0'; put_hashset(hashset, buffer); } fclose(dictionary); }
void load_dictionary (char *dictionary_name, hashset_ref hashset) { if (dictionary_name == NULL) return; DEBUGF ('m', "dictionary_name = \"%s\", hashset = %p\n", dictionary_name, hashset); // STUBPRINTF ("Open dictionary, load it, close it\n"); size_t length = return_length(hashset); char* buffer = return_array(hashset); printf("%s\n",buffer); FILE * dic = open_infile(dictionary_name); for(;;){ if(fgets(buffer, length, dic) != NULL){ fputs (buffer,dic); } } fclose(dic); }
int read_file(const char * infile_name, int ** data) { int capacity = 1024; int nitems = 0; *data = malloc(capacity*sizeof(int)); FILE * file = open_infile(infile_name); int value; while(!feof(file) && !ferror(file)) { int success = fscanf(file,"%d",&value); if(success != 1) break; if(nitems == capacity) { capacity*=2; *data = realloc(*data, sizeof(int) * capacity); } (*data)[nitems++] = value; } fclose(file); return nitems; }
int PileupParserGDA::getMaxMolID(string pileupfile) { int max_mol_id = -1; ifstream fs_pileupfile; open_infile(fs_pileupfile, pileupfile); this->setPileupFileStream(&fs_pileupfile); while(true) { this->readLine(); if (fs_pileupfile.eof()) break; Pileup pu = this->getPileup(); if (pu.readID.size() > 0) if (pu.readID.back() > max_mol_id) max_mol_id = pu.readID.back(); if (pu.readID_ins.size() > 0) if (pu.readID_ins.back() > max_mol_id) max_mol_id = pu.readID_ins.back(); } fs_pileupfile.close(); if (max_mol_id < 0) throw runtime_error("Error in PileupParserGDA::getMaxMolID(): fail to get max mol ID."); return max_mol_id ; }
int main(int argc, char **argv){ char *in_filename; char *out_filename; char *in_line; char *out_line; char instruction_type = 'r'; //default to rtype. You might change this. FILE *in; FILE *out; if(argc == 0){ printf("Please specify a file to assemble...\n"); return 1; } else{ in_filename = argv[0][0]; out_filename = argv[1][0]; } in = open_infile(in_filename); out = open_outfile(out_filename); for(;;){ fgets(in_line, 40, in); //40 here is arbitrary. if(in_line[0] == EOF) break; //instruction_type = type(in_line); /*switch (instruction_type){ case 'r': rtype(out_line, in_line); break; case 'i': itype(out_line, in_line); break; case 'j': jtype(out_line, in_line); break; default: break; }*/ fputs(out_line, out); } return 0; }
int main (int argc, char **argv) { program_name = basename (argv[0]); char *default_dictionary = DEFAULT_DICTNAME; char *user_dictionary = NULL; hashset *hashset = new_hashset (); yy_flex_debug = false; scan_options (argc, argv, &default_dictionary, &user_dictionary); // Load the dictionaries into the hash table. load_dictionary (default_dictionary, hashset); load_dictionary (user_dictionary, hashset); if(x == true) { handle_x(hashset); return exit_status; } // Read and do spell checking on each of the files. if (optind >= argc) { yyin = stdin; spellcheck (STDIN_NAME, hashset); }else { for (int fileix = optind; fileix < argc; ++fileix) { DEBUGF ('m', "argv[%d] = \"%s\"\n", fileix, argv[fileix]); char *filename = argv[fileix]; if (strcmp (filename, STDIN_NAME) == 0) { yyin = stdin; spellcheck (STDIN_NAME, hashset); }else { yyin = open_infile (filename); if (yyin == NULL) continue; spellcheck (filename, hashset); fclose (yyin); } } } yylex_destroy (); free_hashset(hashset); return exit_status; }
void load_dictionary (const char *dictionary_name, hashset *hashset) { if (dictionary_name == NULL) return; DEBUGF ('m', "dictionary_name = \"%s\", hashset = %p\n", dictionary_name, hashset); char buff[1024]; FILE *dictionary = open_infile(dictionary_name); assert(dictionary != NULL); int linenr = 1, j_index = 0; for(linenr = 1; ; linenr++){ char *line_pos = fgets(buff, sizeof buff, dictionary); //check to see if words scanned are valid if (line_pos == NULL) break; line_pos = strchr (buff, '\n'); //make sure the new line character is inserted correctly if (line_pos == NULL){ fflush(NULL); fprintf (stderr, "%s: %s[%d]: broken line\n", Exec_Name, dictionary_name, linenr); fflush(NULL); Exit_Status = 2; }else { *line_pos = '\0'; } //line_pos is fed the buffer's string line_pos = strdup(buff); assert (line_pos != NULL); //puts the input into the hashset put_hashset(hashset, line_pos); //frees the line_position free(line_pos); j_index++; } fclose(dictionary); printf("# of words added: %d\n", j_index); //STUBPRINTF ("Open dictionary, load it, close it\n"); }
bool PileupParserGDA::checkFormat(string pileupfile) { ifstream fs_pileupfile; open_infile(fs_pileupfile, pileupfile); int prev_refID = -1; int prev_locus = 0; bool is_start; while(true) { string line_ins; string line; getline(fs_pileupfile, line_ins); getline(fs_pileupfile, line); if (fs_pileupfile.eof()) break; vector<string> line_ins_list = split(line_ins, '\t'); vector<string> line_list = split(line, '\t'); // check number of columns if ((line_ins_list.size()!=4 && line_ins_list.size()!=6) || (line_list.size()!=4 && line_list.size()!=6)){ cerr << "number of columns is not 4 or 6" << endl; return false; } // check if two lines match int refID_ins = atoi(line_ins_list[0].c_str()); int refID = atoi(line_list[0].c_str()); int locus_ins = atoi(line_ins_list[1].substr(1,line_ins_list[1].size()-1).c_str()); int locus = atoi(line_list[1].c_str()); if (refID_ins != refID || locus_ins != locus) { cerr << "two lines not match" << endl; return false; } // check if the current locus is the start locus if (refID != prev_refID) is_start = true; else is_start = false; // check if locus start from 1 if (is_start && locus!=1) { cerr << "chr " << refID << ": locus is not start from 1" << endl; return false; } // check if locus is continous if (!is_start && locus != prev_locus + 1) { cerr << "chr " << refID << ", " << locus << ": locus is not continous" << endl; return false; } // check if molecule number is increasing if (line_ins_list.size() == 6) { vector<string> id_list = split(line_ins_list[5],','); for (int i=0; i<(int)(id_list.size()-1); i++) { if (atoi(id_list[i].c_str()) >= atoi(id_list[i+1].c_str())){ cerr << "chr " << refID << ", " << locus << "(ins): molecule ID is not increasing." << endl; return false; } } } if (line_list.size() == 6) { vector<string> id_list = split(line_list[5],','); for (int i=0; i<(int)(id_list.size()-1); i++) { if (atoi(id_list[i].c_str()) >= atoi(id_list[i+1].c_str())){ cerr << "chr " << refID << ", " << locus << ": molecule ID is not increasing." << endl; return false; } } } prev_refID = refID; prev_locus = locus; } fs_pileupfile.close(); return true; }
bool AlignCoderSNV::recode(string m5_file, string var_file, string recode_file, int left_len, int right_len, bool is_report_ref) { // load var_file vector<VarData> var_data; ifstream fs_varfile; int64_t max_code = -1; open_infile(fs_varfile, var_file); while(true){ string buf; getline(fs_varfile, buf); if(fs_varfile.eof()) break; vector<string> buf_vec = split(buf, '\t'); if (buf_vec.size()!=9) throw runtime_error("incorrect format in " + var_file); var_data.push_back(VarData(stod(buf_vec[0]), buf_vec[1][0], stod(buf_vec[2]))); if (stod(buf_vec[2]) > max_code) max_code = stod(buf_vec[2]); } fs_varfile.close(); // fill template of var_data vector<bool> var_data_temp(max_code + 4, false); for (int64_t i = 0; i < var_data.size(); ++i) var_data_temp[var_data[i].code] = true; // scan m5_file and recode if (p_alignreader==NULL) throw runtime_error("AlignCoderSNV::recode(): p_alignreader has not be set."); ofstream p_outfile; open_outfile(p_outfile, recode_file); ofstream p_outfile_ref; open_outfile(p_outfile_ref, recode_file + ".ref"); p_alignreader->open(m5_file); Align align; int nline = 0; while(p_alignreader->readline(align)){ ++nline; if (nline % 100 == 0) cout << nline << endl; //cout << nline << endl; // expections int alen = (int) align.matchPattern.size(); if ( !(align.qAlignedSeq.size()==alen && align.tAlignedSeq.size()==alen) ) throw runtime_error("incorrect match patter in line " + to_string(nline)); if (align.qStrand != '+') throw runtime_error("qStrand should be + in line " + to_string(nline)); // reverse alignment if it is aligned to negative strand if (align.tStrand != '+'){ align.qAlignedSeq = getrevcomp(align.qAlignedSeq); align.tAlignedSeq = getrevcomp(align.tAlignedSeq); } // encode int cur_pos = align.tStart; for (int i=0; i<alen; i++){ //cout << "nline=" << nline << ", i=" <<i << endl; if (align.tAlignedSeq[i]=='-') continue; if (4*cur_pos+3 > max_code + 3) break; // realign if hit detected variants int score_A = MIN_SCORE; int score_C = MIN_SCORE; int score_G = MIN_SCORE; int score_T = MIN_SCORE; bool is_var = false; seqan::Align<string, seqan::ArrayGaps> cur_realign_A; seqan::Align<string, seqan::ArrayGaps> cur_realign_C; seqan::Align<string, seqan::ArrayGaps> cur_realign_G; seqan::Align<string, seqan::ArrayGaps> cur_realign_T; string cur_qseq; string cur_rseq; pair<string, string> context; char ref_base; // align local sequence to the referece if (var_data_temp[4*cur_pos] || var_data_temp[4*cur_pos+1] || var_data_temp[4*cur_pos+2] || var_data_temp[4*cur_pos+3]){ bool rl = this->get_context_m5(i, left_len, right_len, align.tAlignedSeq, context); if (!rl){ ++cur_pos; continue; } is_var = true; // get left query sequence length int64_t k = 0; int64_t cur_qseq_start = i; while(true){ if (align.tAlignedSeq[cur_qseq_start]!='-') k++; if (k >= context.first.size()) break; --cur_qseq_start; } // get right query sequence length k = 0; int64_t cur_qseq_end = i+1; while(true){ if (align.tAlignedSeq[cur_qseq_end]!='-') k++; if (k >= context.second.size()) break; ++cur_qseq_end; } if (cur_qseq_start < 0) throw runtime_error("cur_qseq_start < 0"); if (cur_qseq_end >= alen) throw runtime_error("cur_qseq_end >= alen"); for (auto j = cur_qseq_start; j <= cur_qseq_end; ++j){ if (align.qAlignedSeq[j]!='-') cur_qseq.push_back(align.qAlignedSeq[j]); } cur_rseq = context.first + context.second; if (cur_qseq == ""){ ++cur_pos; continue; } if (cur_rseq == "") throw runtime_error("cur_rseq is empty"); ref_base = cur_rseq[context.first.size()-1]; // realign cur_rseq[context.first.size()-1] = 'A'; score_A = this->realign(cur_realign_A, cur_qseq, cur_rseq); cur_rseq[context.first.size()-1] = 'C'; score_C = this->realign(cur_realign_C, cur_qseq, cur_rseq); cur_rseq[context.first.size()-1] = 'G'; score_G = this->realign(cur_realign_G, cur_qseq, cur_rseq); cur_rseq[context.first.size()-1] = 'T'; score_T = this->realign(cur_realign_T, cur_qseq, cur_rseq); // to be removed /*if ((cur_pos==253 || cur_pos==319 || cur_pos==325) && nline == 7445){ cout << "cur_pos = " << cur_pos << endl; cout << "A: " << score_A << endl << cur_realign_A; cout << "C: " << score_C << endl << cur_realign_C; cout << "G: " << score_G << endl << cur_realign_G; cout << "T: " << score_T << endl << cur_realign_T; int tmp = 0; }*/ }else{ ++cur_pos; continue; } // recode if (score_A == MIN_SCORE && score_C == MIN_SCORE && score_G == MIN_SCORE && score_T == MIN_SCORE) throw runtime_error("A,C,G,T == MIN_SCORE, no alignment was done"); // A if (score_A > score_C && score_A > score_G && score_A > score_T){ if (align.tAlignedSeq[i]!='A'){ p_outfile << 4*cur_pos << '\t'; }else{ if (is_report_ref) p_outfile_ref << 4*cur_pos << '\t'; } } // C if (score_C > score_A && score_C > score_G && score_C > score_T){ if (align.tAlignedSeq[i]!='C'){ p_outfile << 4*cur_pos+1 << '\t'; }else{ if (is_report_ref) p_outfile_ref << 4*cur_pos+1 << '\t'; } } // G if (score_G > score_A && score_G > score_C && score_G > score_T){ if (align.tAlignedSeq[i]!='G'){ p_outfile << 4*cur_pos+2 << '\t'; }else{ if (is_report_ref) p_outfile_ref << 4*cur_pos+2 << '\t'; } } // T if (score_T > score_A && score_T > score_C && score_T > score_G){ if (align.tAlignedSeq[i]!='T'){ p_outfile << 4*cur_pos+3 << '\t'; }else{ if (is_report_ref) p_outfile_ref << 4*cur_pos+3 << '\t'; } } ++cur_pos; } p_outfile << endl; p_outfile_ref << endl; } p_alignreader->close(); p_outfile.close(); p_outfile_ref.close(); return true; }