Example #1
0
void streaming_sort() {
  int capacity = 256;
  int nitems = 0,i=0;
  int buffer[capacity];
  

  stream_init(); // GET READY
  
  FILE*file = open_infile(infile_name);

  while(!feof(file) && !ferror(file)) {
    
    int success = fscanf(file,"%d",buffer + i);
    
    if(success != 1) 
      break;
    nitems++; i++;
    
    if(i  == capacity) { 
      stream_data( buffer, i); // SOME DATA FOR YOU
      i = 0;
    } 
  }
  if(i) 
    stream_data( buffer, i); // HAVE A BIT MORE DATA
 
  stream_end(); // DONE
  
  fclose(file);
    
}
RefGenome PileupParserGDA::getRefGenome(string pileupfile) {
    RefGenome refgenome;
    ifstream cur_fs_pileupfile;
    open_infile(cur_fs_pileupfile, pileupfile);
    while(true) {
        string cur_line_ins;
        string cur_line;
        
        getline(cur_fs_pileupfile, cur_line_ins);
        getline(cur_fs_pileupfile, cur_line);
    
        if (cur_fs_pileupfile.eof()) break;
        
        vector<string> buf_ins = split(cur_line_ins, '\t');
        vector<string> buf = split(cur_line, '\t');
        
        if ((buf.size()!=4 && buf.size()!=6) || (buf_ins.size()!=4 && buf_ins.size()!=6)) 
            throw runtime_error ("Error in PileupParserGDA::getRefGenome : incorrect buf size.");
        if (buf_ins[1][0] != '_' || buf[1][0] == '_')
            throw runtime_error ("Error in PileupParserGDA::getRefGenome : incorrect pileupfile format.");
        
        refgenome[ atoi(buf[0].c_str()) ].push_back(buf[2][0]);
        
    }
    cur_fs_pileupfile.close();
    return refgenome;
}
int main (int argc, char **argv) {
   Exec_Name = basename (argv[0]);
   char *default_dictionary = DEFAULT_DICTNAME;
   char *user_dictionary = NULL;
   hashset_ref hashset = new_hashset ();
   yy_flex_debug = false;

   // Scan the arguments and set flags.
   opterr = false;
   for (;;) {
      int option = getopt (argc, argv, "nxyd:@:");
      if (option == EOF) break;
      switch (option) {
         char optopt_string[16]; // used in default:
         case 'd': user_dictionary = optarg;
                   break;
         case 'n': default_dictionary = NULL;
                   break;
         case 'x': option_x(hashset);
                   break;
         case 'y': yy_flex_debug = true;
                   break;
         case '@': set_debugflags (optarg);
                   if (strpbrk (optarg, "@y")) yy_flex_debug = true;
                   break;
         default : sprintf (optopt_string, "-%c", optopt);
                   print_error (optopt_string, "invalid option");
                   break;
      }
   }

   // Load the dictionaries into the hash table.
   load_dictionary (default_dictionary, hashset);
   load_dictionary (user_dictionary, hashset);

   // Read and do spell checking on each of the files.
   if (optind >= argc) {
      yyin = stdin;
      spellcheck (STDIN_NAME, hashset);
   }else {
      int fileix = optind;
      for (; fileix < argc; ++fileix) {
         DEBUGF ('m', "argv[%d] = \"%s\"\n", fileix, argv[fileix]);
         char *filename = argv[fileix];
         if (strcmp (filename, STDIN_NAME) == 0) {
            yyin = stdin;
            spellcheck (STDIN_NAME, hashset);
         }else {
            yyin = open_infile (filename);
            if (yyin == NULL) continue;
            spellcheck (filename, hashset);
            fclose (yyin);
         }
      }
   }
   
   yycleanup ();
   return Exit_Status;
}
Example #4
0
File: flow.c Project: kurino/flow
int main ( int argc, char *argv[] ) {
    Param	params;
    FlowCom	*comPtr = NULL;
	char	line [ LINE_LEN ];

	while ( argc > 1 ) {
		if ( !strcmp( argv[1], "-D" ) ) {
			setDebugFlag();
		} else if ( !strcmp( argv[1], "-V" ) ) {
			printf ( "%s\n", VERSION );
			exit ( 0 );
		} else {
			break;
		}

		argv++;
		argc--;
	}

	open_tempfile();
	open_infile ( ( argc > 1 ) ? argv[ 1 ] : NULL );

    tprintf ( "%% picture environment flowchart generated by flow " );
    tprintf ( "%s\n", VERSION );

	while ( readline_infile ( line, LINE_LEN ) != NULL ) {
		if ( ( comPtr = getCommand ( line, params ) ) != NULL ) {
			switch ( comPtr -> command ) {
			case MACRO:
				if ( doMacro ( params ) ) {
					errout ( E_NO_END_MACRO );
			  	}
				break;
			case EXPAND:
				doExpand ( params );
				break;
			default:
				if ( !doCommand ( comPtr, params ) ) {
					return 10;
				}
			}
		} else {
			break;
		}
	}

	close_infile();
	close_tempfile();

	apply_tempfile ( getPic(), ( argc > 2 ) ? argv[ 2 ] : NULL );

	remove_tempfile();

//	dumpFigure();

    return 0;    /* just to suppress the warning */
}
Example #5
0
void load_dictionary (char *dictionary_name, hashset *hashset) {
   if (dictionary_name == NULL) return;
   DEBUGF ('m', "dictionary_name = \"%s\", hashset = %p\n",
           dictionary_name, hashset);
   FILE *dict = open_infile(dictionary_name);
   char buffer[1024];
   while(fgets(buffer, sizeof buffer, dict) != NULL){
      if(buffer[strlen(buffer)-1] == '\n')
         buffer[strlen(buffer)-1] = '\0';
      char* word = strdup(buffer);
      assert(word != NULL);
      put_hashset(hashset, word);
   }
   fclose(dict);
}
Example #6
0
void load_dictionary (char *dictionary_name, hashset *hashset) {
   if (dictionary_name == NULL) return;
   FILE *dictionary = open_infile(dictionary_name);
   if (dictionary == NULL) return;
   for(;;){
      char buffer[1024];
      char *linepos = fgets(buffer, 1023, dictionary);
      if (linepos == NULL){
         break;
      }
      int i = 0;
      for(; buffer[i] != '\n'; i++){}
      buffer[i] = '\0';
      put_hashset(hashset, buffer);
   }
   fclose(dictionary);
}
void load_dictionary (char *dictionary_name, hashset_ref hashset) {
   if (dictionary_name == NULL) return;
   DEBUGF ('m', "dictionary_name = \"%s\", hashset = %p\n",
           dictionary_name, hashset);
//   STUBPRINTF ("Open dictionary, load it, close it\n");
   size_t length = return_length(hashset);
   char* buffer = return_array(hashset);
printf("%s\n",buffer);
   FILE * dic = open_infile(dictionary_name);
   for(;;){
      if(fgets(buffer, length, dic) != NULL){
         fputs (buffer,dic);
      }
  }
   fclose(dic);

}
Example #8
0
int read_file(const char * infile_name, int ** data) {
  int capacity = 1024;
  int nitems = 0;
  *data = malloc(capacity*sizeof(int));
  
  FILE * file = open_infile(infile_name);
  int value;
  while(!feof(file) && !ferror(file)) {
    
    int success = fscanf(file,"%d",&value);
    if(success != 1) break;
    if(nitems == capacity) { 
      capacity*=2; 
      *data = realloc(*data, sizeof(int) * capacity);
    }
    (*data)[nitems++] = value;
  }
  fclose(file);

  return nitems;
}
int PileupParserGDA::getMaxMolID(string pileupfile) {
    int max_mol_id = -1;
    ifstream fs_pileupfile; open_infile(fs_pileupfile, pileupfile);
    this->setPileupFileStream(&fs_pileupfile);
    while(true) {
        this->readLine();
        if (fs_pileupfile.eof()) break;
        
        Pileup pu = this->getPileup();
        
        if (pu.readID.size() > 0)
            if (pu.readID.back() > max_mol_id) max_mol_id = pu.readID.back();
        if (pu.readID_ins.size() > 0)
            if (pu.readID_ins.back() > max_mol_id) max_mol_id = pu.readID_ins.back();
    }
    fs_pileupfile.close();
    if (max_mol_id < 0)
        throw runtime_error("Error in PileupParserGDA::getMaxMolID(): fail to get max mol ID.");
    
    return max_mol_id ;
}
Example #10
0
int main(int argc, char **argv){
	char *in_filename;
	char *out_filename;
	char *in_line;
	char *out_line;
	char instruction_type = 'r'; //default to rtype.  You might change this.
	FILE *in;
	FILE *out;
	if(argc == 0){
		printf("Please specify a file to assemble...\n");
		return 1;
	}
	else{
		in_filename = argv[0][0];
		out_filename = argv[1][0];
	}
	in = open_infile(in_filename);
	out = open_outfile(out_filename);
	for(;;){
		fgets(in_line, 40, in); //40 here is arbitrary.
		if(in_line[0] == EOF) break;
		//instruction_type = type(in_line);
		/*switch (instruction_type){
			case 'r':
				rtype(out_line, in_line);
				break;
			case 'i':
				itype(out_line, in_line);
				break;
			case 'j':
				jtype(out_line, in_line);
				break;
			default:
				break;
		}*/
		fputs(out_line, out);
	}
	return 0;
}
Example #11
0
int main (int argc, char **argv) {
   program_name = basename (argv[0]);
   char *default_dictionary = DEFAULT_DICTNAME;
   char *user_dictionary = NULL;
   hashset *hashset = new_hashset ();
   yy_flex_debug = false;
   scan_options (argc, argv, &default_dictionary, &user_dictionary);

   // Load the dictionaries into the hash table.
   load_dictionary (default_dictionary, hashset);
   load_dictionary (user_dictionary, hashset);

   if(x == true) { handle_x(hashset); return exit_status; }

   // Read and do spell checking on each of the files.
   if (optind >= argc) {
      yyin = stdin;
      spellcheck (STDIN_NAME, hashset);
   }else {
      for (int fileix = optind; fileix < argc; ++fileix) {
         DEBUGF ('m', "argv[%d] = \"%s\"\n", fileix, argv[fileix]);
         char *filename = argv[fileix];
         if (strcmp (filename, STDIN_NAME) == 0) {
            yyin = stdin;
            spellcheck (STDIN_NAME, hashset);
         }else {
            yyin = open_infile (filename);
            if (yyin == NULL) continue;
            spellcheck (filename, hashset);
            fclose (yyin);
         }
      }
   }
   yylex_destroy ();
   free_hashset(hashset);
   return exit_status;
}
Example #12
0
void load_dictionary (const char *dictionary_name, hashset *hashset) {
   if (dictionary_name == NULL) return;
   DEBUGF ('m', "dictionary_name = \"%s\", hashset = %p\n",
           dictionary_name, hashset);
   char buff[1024];
   FILE *dictionary = open_infile(dictionary_name);
   assert(dictionary != NULL);
   int linenr = 1, j_index = 0;
   for(linenr = 1; ; linenr++){
      char *line_pos = fgets(buff, sizeof buff, dictionary);
      //check to see if words scanned are valid
      if (line_pos == NULL) break;
      line_pos = strchr (buff, '\n');
      //make sure the new line character is inserted correctly
      if (line_pos == NULL){
         fflush(NULL);
         fprintf (stderr, "%s: %s[%d]: broken line\n", Exec_Name,
                  dictionary_name, linenr);
         fflush(NULL);
         Exit_Status = 2;
      }else {
         *line_pos = '\0';
      }
      //line_pos is fed the buffer's string
      line_pos = strdup(buff);
      assert (line_pos != NULL);
      //puts the input into the hashset
      put_hashset(hashset, line_pos);
      //frees the line_position
      free(line_pos);
      j_index++;
   }
   fclose(dictionary);
   printf("# of words added: %d\n", j_index);
   //STUBPRINTF ("Open dictionary, load it, close it\n");
}
bool PileupParserGDA::checkFormat(string pileupfile) {
    ifstream fs_pileupfile; open_infile(fs_pileupfile, pileupfile);
    int prev_refID = -1;
    int prev_locus = 0;
    bool is_start;
    while(true) {
        string line_ins;
        string line;
        getline(fs_pileupfile, line_ins);
        getline(fs_pileupfile, line);
        if (fs_pileupfile.eof()) break;
        vector<string> line_ins_list = split(line_ins, '\t');
        vector<string> line_list = split(line, '\t');
        
        // check number of columns
        if ((line_ins_list.size()!=4 && line_ins_list.size()!=6) || (line_list.size()!=4 && line_list.size()!=6)){
            cerr << "number of columns is not 4 or 6" << endl;
            return false;
        }
        
        // check if two lines match 
        int refID_ins = atoi(line_ins_list[0].c_str());
        int refID = atoi(line_list[0].c_str());
        int locus_ins = atoi(line_ins_list[1].substr(1,line_ins_list[1].size()-1).c_str());
        int locus = atoi(line_list[1].c_str());
        if (refID_ins != refID || locus_ins != locus) {
            cerr << "two lines not match" << endl;
            return false;
        }
        
        // check if the current locus is the start locus
        if (refID != prev_refID)
            is_start = true;
        else
            is_start = false;
        
        // check if locus start from 1
        if (is_start && locus!=1) {
            cerr << "chr " << refID << ": locus is not start from 1" << endl;
            return false;
        }
        
        // check if locus is continous 
        if (!is_start && locus != prev_locus + 1) {
            cerr << "chr " << refID <<  ", " << locus << ": locus is not continous" << endl;
            return false;
        }
        
        // check if molecule number is increasing
        if (line_ins_list.size() == 6) {
            vector<string> id_list = split(line_ins_list[5],',');
            for (int i=0; i<(int)(id_list.size()-1); i++) {
                if (atoi(id_list[i].c_str()) >= atoi(id_list[i+1].c_str())){
                    cerr << "chr " << refID <<  ", " << locus << "(ins): molecule ID is not increasing." << endl;
                    return false;
                }
            }
        }
        
        if (line_list.size() == 6) {
            vector<string> id_list = split(line_list[5],',');
            for (int i=0; i<(int)(id_list.size()-1); i++) {
                if (atoi(id_list[i].c_str()) >= atoi(id_list[i+1].c_str())){
                    cerr << "chr " << refID <<  ", " << locus << ": molecule ID is not increasing." << endl;
                    return false;
                }
            }
        }
        
        prev_refID = refID;
        prev_locus = locus;
    }
    fs_pileupfile.close();
    
    return true;
}
Example #14
0
bool AlignCoderSNV::recode(string m5_file, string var_file, string recode_file, int left_len, int right_len, bool is_report_ref)
{
    // load var_file
    vector<VarData> var_data;
    ifstream fs_varfile;
    int64_t max_code = -1;
    open_infile(fs_varfile, var_file);
    while(true){
        string buf;
        getline(fs_varfile, buf);
        if(fs_varfile.eof())
            break;
        vector<string> buf_vec = split(buf, '\t');
        if (buf_vec.size()!=9)
            throw runtime_error("incorrect format in " + var_file);
        var_data.push_back(VarData(stod(buf_vec[0]), buf_vec[1][0], stod(buf_vec[2])));
        
        if (stod(buf_vec[2]) > max_code)
            max_code = stod(buf_vec[2]);
    }
    fs_varfile.close();
    
    // fill template of var_data
    vector<bool> var_data_temp(max_code + 4, false);
    for (int64_t i = 0; i < var_data.size(); ++i)
        var_data_temp[var_data[i].code] = true;
    
    
    // scan m5_file and recode
    if (p_alignreader==NULL)
        throw runtime_error("AlignCoderSNV::recode(): p_alignreader has not be set.");
    ofstream p_outfile;
    open_outfile(p_outfile, recode_file);
    ofstream p_outfile_ref;
    open_outfile(p_outfile_ref, recode_file + ".ref");
    
    p_alignreader->open(m5_file);
    Align align;
    int nline = 0;
    while(p_alignreader->readline(align)){
        ++nline;
        if (nline % 100 == 0)
            cout << nline << endl;
        //cout << nline << endl;
        
        // expections
        int alen = (int) align.matchPattern.size();
        if ( !(align.qAlignedSeq.size()==alen && align.tAlignedSeq.size()==alen) )
            throw runtime_error("incorrect match patter in line " + to_string(nline));
        if (align.qStrand != '+')
            throw runtime_error("qStrand should be + in line " + to_string(nline));
        
        // reverse alignment if it is aligned to negative strand
        if (align.tStrand != '+'){
            align.qAlignedSeq = getrevcomp(align.qAlignedSeq);
            align.tAlignedSeq = getrevcomp(align.tAlignedSeq);
        }
        
        // encode
        int cur_pos = align.tStart;
        for (int i=0; i<alen; i++){
            //cout << "nline=" << nline << ", i=" <<i << endl;
            if (align.tAlignedSeq[i]=='-')
                continue;
            
            if (4*cur_pos+3 > max_code + 3)
                break;
            
            // realign if hit detected variants
            int score_A = MIN_SCORE;
            int score_C = MIN_SCORE;
            int score_G = MIN_SCORE;
            int score_T = MIN_SCORE;
            bool is_var = false;

            seqan::Align<string, seqan::ArrayGaps> cur_realign_A;
            seqan::Align<string, seqan::ArrayGaps> cur_realign_C;
            seqan::Align<string, seqan::ArrayGaps> cur_realign_G;
            seqan::Align<string, seqan::ArrayGaps> cur_realign_T;
            
            string cur_qseq;
            string cur_rseq;
            pair<string, string> context;
            char ref_base;
            
            // align local sequence to the referece
            if (var_data_temp[4*cur_pos] || var_data_temp[4*cur_pos+1] || var_data_temp[4*cur_pos+2] || var_data_temp[4*cur_pos+3]){
                bool rl = this->get_context_m5(i, left_len, right_len, align.tAlignedSeq, context);
                if (!rl){
                    ++cur_pos;
                    continue;
                }
                
                is_var = true;

                // get left query sequence length
                int64_t k = 0;
                int64_t cur_qseq_start = i;
                while(true){
                    if (align.tAlignedSeq[cur_qseq_start]!='-')
                        k++;
                    if (k >= context.first.size())
                        break;
                    --cur_qseq_start;
                }
                
                
                // get right query sequence length
                k = 0;
                int64_t cur_qseq_end = i+1;
                while(true){
                    if (align.tAlignedSeq[cur_qseq_end]!='-')
                        k++;
                    if (k >= context.second.size())
                        break;
                    ++cur_qseq_end;
                }
                
                if (cur_qseq_start < 0)
                    throw runtime_error("cur_qseq_start < 0");
                if (cur_qseq_end >= alen)
                    throw runtime_error("cur_qseq_end >= alen");
                
                for (auto j = cur_qseq_start; j <= cur_qseq_end; ++j){
                    if (align.qAlignedSeq[j]!='-')
                        cur_qseq.push_back(align.qAlignedSeq[j]);
                }
                
                cur_rseq = context.first + context.second;
                
                if (cur_qseq == ""){
                    ++cur_pos;
                    continue;
                }
                
                if (cur_rseq == "")
                    throw runtime_error("cur_rseq is empty");
                
                ref_base = cur_rseq[context.first.size()-1];
                
                // realign
                cur_rseq[context.first.size()-1] = 'A';
                score_A = this->realign(cur_realign_A, cur_qseq, cur_rseq);
                
                cur_rseq[context.first.size()-1] = 'C';
                score_C = this->realign(cur_realign_C, cur_qseq, cur_rseq);
                
                cur_rseq[context.first.size()-1] = 'G';
                score_G = this->realign(cur_realign_G, cur_qseq, cur_rseq);
                
                cur_rseq[context.first.size()-1] = 'T';
                score_T = this->realign(cur_realign_T, cur_qseq, cur_rseq);
                
                // to be removed
                /*if ((cur_pos==253 || cur_pos==319 || cur_pos==325) && nline == 7445){
                    cout << "cur_pos = " << cur_pos << endl;
                    cout << "A: " << score_A << endl << cur_realign_A;
                    cout << "C: " << score_C << endl << cur_realign_C;
                    cout << "G: " << score_G << endl << cur_realign_G;
                    cout << "T: " << score_T << endl << cur_realign_T;
                    int tmp = 0;
                }*/
            }else{
                ++cur_pos;
                continue;
            }
            
            // recode
            if (score_A == MIN_SCORE && score_C == MIN_SCORE && score_G == MIN_SCORE && score_T == MIN_SCORE)
                throw runtime_error("A,C,G,T == MIN_SCORE, no alignment was done");
            // A
            if (score_A > score_C && score_A > score_G && score_A > score_T){
                if (align.tAlignedSeq[i]!='A'){
                    p_outfile << 4*cur_pos << '\t';
                }else{
                    if (is_report_ref)
                        p_outfile_ref << 4*cur_pos << '\t';
                }
            }
            
            // C
            if (score_C > score_A && score_C > score_G && score_C > score_T){
                if (align.tAlignedSeq[i]!='C'){
                    p_outfile << 4*cur_pos+1 << '\t';
                }else{
                    if (is_report_ref)
                        p_outfile_ref << 4*cur_pos+1 << '\t';
                }
            }
            
            // G
            if (score_G > score_A && score_G > score_C && score_G > score_T){
                if (align.tAlignedSeq[i]!='G'){
                    p_outfile << 4*cur_pos+2 << '\t';
                }else{
                    if (is_report_ref)
                        p_outfile_ref << 4*cur_pos+2 << '\t';
                }
            }
            
            // T
            if (score_T > score_A && score_T > score_C && score_T > score_G){
                if (align.tAlignedSeq[i]!='T'){
                    p_outfile << 4*cur_pos+3 << '\t';
                }else{
                    if (is_report_ref)
                        p_outfile_ref << 4*cur_pos+3 << '\t';
                }
                
            }
            
            ++cur_pos;
        }
        p_outfile << endl;
        p_outfile_ref << endl;
    }
    
    p_alignreader->close();
    
    
    p_outfile.close();
    p_outfile_ref.close();
    
    return true;
}