iMatrix *readmatrix(std::string filename,const std::string delim=",;: \t"){ ///@param filename A filename to read.@param delim A string of delimiters. std::vector<std::string> tokens; const int SIZE=500000; char buffer[SIZE]; std::ifstream pFile (filename.c_str(),std::ios::in); if(!pFile){ std::cout <<"Problems opening file" <<filename<<std::endl; exit(0); } std::string tmp_string; int doFirstRow =1; int itemsInFirstRow=0; int numRows =0; while(!pFile.eof()){ pFile.getline(buffer,SIZE); tmp_string = std::string(buffer); if(doFirstRow){ //if file has a emptystart line itemsInFirstRow = get_lexemes(tmp_string,tokens,delim); if (itemsInFirstRow==0) continue; // printf("items in first rwo:%d\n",itemsInFirstRow); doFirstRow=0; numRows++; } else{ int nItems = get_lexemes(tmp_string,tokens,delim); //if line is empty if(nItems==0) continue; numRows++; if(nItems!=itemsInFirstRow){ printf("row length mismatch at line:%d numitems is:%d shouldn't be:%d\t will exit\n",numRows,itemsInFirstRow,nItems); exit(0); } } } iMatrix *data_ = allocIntMatrix(numRows,itemsInFirstRow); //now we have a token array of string coerce the types now typecast_stringarray_to_int_matrix(tokens,data_); //copy(tokens.begin(), tokens.end(), ostream_iterator<string>(cout, ", ")); printf("\t-> Dimension of genotype datafile is (%d,%d)\n",data_->x,data_->y); return data_; }
iMatrix *readmatrix_filty_memory(std::string filename,const std::string delim=",;: \t"){ ///@param filename A filename to read.@param delim A string of delimiters. std::vector<std::string> tokens; const int SIZE = MAX_ELEMS_PER_LINE;//defined in conf.h char buffer[SIZE]; std::ifstream pFile (filename.c_str(),std::ios::in); if(!pFile){ fileError(filename); exit(0); } std::string tmp_string; int doFirstRow =1; int itemsInFirstRow=0; int numRows =0; while(!pFile.eof()){ pFile.getline(buffer,SIZE); tmp_string = std::string(buffer); if(doFirstRow){ //if file has a emptystart line itemsInFirstRow = get_lexemes(tmp_string,tokens,delim); if (itemsInFirstRow==0) continue; // printf("items in first rwo:%d\n",itemsInFirstRow); doFirstRow=0; numRows++; } else{ int nItems = get_lexemes(tmp_string,tokens,delim); //if line is empty if(nItems==0) continue; numRows++; if(nItems!=itemsInFirstRow){ printf("row length mismatch at line:%d numitems is:%d shouldn't be:%d\t will exit\n",numRows,itemsInFirstRow,nItems); exit(0); } } } flush_print("\r\t-> File has been read in now, will now typecheck... "); iMatrix *data_ = allocIntMatrix(numRows,itemsInFirstRow); //now we have a token array of string coerce the types now typecast_stringarray_to_int_matrix(tokens,data_); //copy(tokens.begin(), tokens.end(), ostream_iterator<string>(cout, ", ")); return data_; }
std::vector<std::string> readarray(std::string filename,const std::string delims=",;: \t"){ ///@param filename The name of the file to open. ///@param delims A string of delimeters to split by. std::vector<std::string> tokens; const int SIZE = MAX_ELEMS_PER_LINE; char *buffer = new char[SIZE]; std::ifstream pFile (filename.c_str(),std::ios::in); if(!pFile){ fileError(filename); exit(0); } std::string tmp_string; //we are reading an array, so just keep reading until no more lines while(!pFile.eof()){ pFile.getline(buffer,SIZE); tmp_string = std::string(buffer); get_lexemes(tmp_string,tokens,delims); } pFile.close(); delete [] buffer; ///now we have a token array of string coerce the types now //typecast_stringarray(tokens); return tokens; }
bool parser::process_if(executable_unit & output) { lexeme_container & lexemes = get_lexemes(); if(!is_if_statement()) return false; parse_tree_node conditional; process_composite_term(conditional); executable_units if_body; process_body(&if_body); bool is_if_else = false; if(line_offset < line_end) { lexeme_container & lexemes = get_lexemes(); if(is_if_statement() && lexemes.size() == 1) { is_if_else = true; executable_units else_body; process_body(&else_body); output.type = executable_unit_type::if_else_statement; if_else_statement * & if_else_pointer = output.if_else_pointer; if_else_pointer = new if_else_statement; if_else_pointer->conditional_term = conditional; if_else_pointer->if_body = if_body; if_else_pointer->else_body = else_body; } } if(!is_if_else) { output.type = executable_unit_type::if_statement; if_statement * & if_pointer = output.if_pointer; if_pointer = new if_statement; if_pointer->conditional_term = conditional; if_pointer->body = if_body; } return true; }
bool parser::process_return(executable_unit & output) { lexeme_container & lexemes = get_lexemes(); if(lexemes[0].type != lexeme_type::dot) return false; output.type = executable_unit_type::return_statement; parse_tree_node * & statement_pointer = output.statement_pointer; statement_pointer = new parse_tree_node; process_composite_term(*statement_pointer); return true; }
bool parser::process_while(executable_unit & output) { lexeme_container & lexemes = get_lexemes(); if(lexemes[0].type != lexeme_type::while_operator) return false; parse_tree_node conditional; process_composite_term(conditional); output.type = executable_unit_type::while_statement; while_statement * & while_pointer = output.while_pointer; while_pointer = new while_statement; while_pointer->conditional_term = conditional; process_body(&while_pointer->body); return true; }
bool parser::process_for(executable_unit & output) { lexeme_container & lexemes = get_lexemes(); if(lexemes[0].type != lexeme_type::iteration) return false; if(lexemes.size() == 1) { //three part for if(lines.size() - line_offset < 4) throw ail::exception("Incomplete for statement"); line_offset++; for(std::size_t i = line_offset, end = i + 3; i < end; i++) { if(lines[i].indentation_level != indentation_level) throw ail::exception("Invalid indentation level in a for statement"); } output.type = executable_unit_type::for_statement; for_statement * & for_pointer = output.for_pointer; for_pointer = new for_statement; process_offset_atomic_statement(for_pointer->initialisation); process_offset_atomic_statement(for_pointer->conditional); process_offset_atomic_statement(for_pointer->iteration); } else { //for each statement output.type = executable_unit_type::for_each_statement; for_each_statement * & for_each_pointer = output.for_each_pointer; for_each_pointer = new for_each_statement; process_composite_term(for_each_pointer->container); process_body(&for_each_pointer->body); } return true; }
/* we want to cut the first row and the fourth row. chromosomes and positions. But we want to check if the cromosomes are 1-22 These are the ones to include, so well return a keeplist, and the correct pars->chromo and pars->positions. This is rather slow because we use make a new vector on each line. This can be optimized in future versions. */ bArray *doBimFile(functionPars* pars,const char *filename,const std::string delim){ ///@param filename A filename to read.@param delim A string of delimiters. std::vector<int> chromos;//for all lines std::vector<double> positions;//for all lines const int SIZE=500000;//this should only be 6 elements but lets make it big.. char buffer[SIZE]; std::ifstream pFile (filename,std::ios::in); if(!pFile){ fileError(filename); exit(0); } std::string tmp_string; int itemsInRow; int numRows =0; while(!pFile.eof()){ pFile.getline(buffer,SIZE); tmp_string = std::string(buffer); std::vector<std::string> tokens; itemsInRow = get_lexemes(tmp_string,tokens,delim); if (itemsInRow!=6 && itemsInRow!=0){ printf("plink bim file:%s doesn't have 6 columns in row:%d\n",filename,numRows); exit(0); }else if(itemsInRow==0) break; chromos.push_back(atoi((tokens[0]).c_str())); positions.push_back(atof((tokens[3]).c_str())); numRows++; } // copy(chromos.begin(), chromos.end(), std::ostream_iterator<int>(std::cout, ", ")); // copy(positions.begin(), positions.end(), std::ostream_iterator<float>(std::cout, ", ")); bArray *ret = allocBoolArray(chromos.size()); int numTrue = 0; for(unsigned int i=0;i<chromos.size();i++) if(chromos[i]!=0 && chromos[i]<23 ){ ret->array[i] = 1; numTrue++; }else ret->array[i] = 0; ret->numTrue = numTrue; dArray *pos = allocDoubleArray(ret->numTrue); iArray *chr = allocIntArray(ret->numTrue); int atPos=0; for(int i=0;i<ret->x;i++){ if(ret->array[i]){ pos->array[atPos] = positions[i]/PLINK_POS_SCALING; chr->array[atPos] = chromos[i]; atPos++; } } pars->chr = chr; pars->position= pos; return ret; }
iMatrix *readmatrix(std::string filename,const std::string delim=",;: \t"){ ///@param filename A filename to read.@param delim A string of delimiters. if(0){ printf("\t-> will try to open postfile: \"%s\" ... \n",filename.c_str()); fflush(stdout); } const int SIZE = MAX_ELEMS_PER_LINE; char buffer[SIZE]; std::ifstream pFile (filename.c_str(),std::ios::in); if(!pFile){ std::cout <<"Problems opening file" <<filename<<std::endl; exit(0); } int doFirstRow =1; int itemsInFirstRow=0; int numRows =0; flush_print("Checking consistency of file..."); while(!pFile.eof()){ std::vector<std::string> tokens; std::string tmp_string; pFile.getline(buffer,SIZE); tmp_string = std::string(buffer); if(doFirstRow){ //if file has a emptystart line itemsInFirstRow = get_lexemes(tmp_string,tokens,delim); if (itemsInFirstRow==0) continue; // printf("items in first rwo:%d\n",itemsInFirstRow); doFirstRow=0; numRows++; } else{ int nItems = get_lexemes(tmp_string,tokens,delim); //if line is empty if(nItems==0) continue; numRows++; if(nItems!=itemsInFirstRow){ printf("row length mismatch at line:%d numitems is:%d shouldn't be:%d\t will exit\n",numRows,itemsInFirstRow,nItems); exit(0); } } if ((numRows%20 )==0){ printf("\r\t-> Checking consistency of file: (checking number of items at line: %d )",numRows); fflush(stdout); } } pFile.close(); fflush(stdout); iMatrix *mat = allocIntMatrix(numRows,itemsInFirstRow); numRows = 0; std::ifstream pFile2 (filename.c_str(),std::ios::in); while(!pFile2.eof()){ if ((numRows%5 )==0){ printf("\r\t-> Checking consistency of file: (Now reading in data at line: %d/%d ) ",numRows,mat->x); fflush(stdout); } std::vector<std::string> tokens; std::string tmp_string; pFile2.getline(buffer,SIZE); tmp_string = std::string(buffer); int itemsInRow = get_lexemes(tmp_string,tokens,delim); if (itemsInRow==0) continue; for(unsigned int i=0; i <tokens.size();i++){ mat->matrix[numRows][i] = to_int(tokens[i]); if(mat->matrix[numRows][i]<0||mat->matrix[numRows][i]>3){ printf("\n\t-> Error in genotype data: (%d,%d)=%d, value should be between 0 and 3.\n",numRows,i,mat->matrix[numRows][i]); exit(0); } } numRows++; } pFile2.close(); //copy(tokens.begin(), tokens.end(), ostream_iterator<string>(cout, ", ")); fflush(stdout); return mat; }