// DataIO_Std::Read_2D() int DataIO_Std::Read_2D(std::string const& fname, DataSetList& datasetlist, std::string const& dsname) { // Buffer file BufferedLine buffer; if (buffer.OpenFileRead( fname )) return 1; mprintf("\tData will be read as a 2D square matrix.\n"); // Skip comments const char* linebuffer = buffer.Line(); while (linebuffer != 0 && linebuffer[0] == '#') linebuffer = buffer.Line(); int ncols = -1; int nrows = 0; std::vector<double> matrixArray; while (linebuffer != 0) { int ntokens = buffer.TokenizeLine( SEPARATORS ); if (ncols < 0) { ncols = ntokens; if (ntokens < 1) { mprinterr("Error: Could not tokenize line.\n"); return 1; } } else if (ncols != ntokens) { mprinterr("Error: In 2D file, number of columns changes from %i to %i at line %i\n", ncols, ntokens, buffer.LineNumber()); return 1; } for (int i = 0; i < ntokens; i++) matrixArray.push_back( atof( buffer.NextToken() ) ); nrows++; linebuffer = buffer.Line(); } if (ncols < 0) { mprinterr("Error: No data detected in %s\n", buffer.Filename().full()); return 1; } if ( DetermineMatrixType( matrixArray, nrows, ncols, datasetlist, dsname )==0 ) return 1; return 0; }
// DataIO_Std::Read_1D() int DataIO_Std::Read_1D(std::string const& fname, DataSetList& datasetlist, std::string const& dsname) { ArgList labels; bool hasLabels = false; // Buffer file BufferedLine buffer; if (buffer.OpenFileRead( fname )) return 1; // Read the first line. Attempt to determine the number of columns const char* linebuffer = buffer.Line(); if (linebuffer == 0) return 1; int ntoken = buffer.TokenizeLine( SEPARATORS ); if ( ntoken == 0 ) { mprinterr("Error: No columns detected in %s\n", buffer.Filename().full()); return 1; } // Try to skip past any comments. If line begins with a '#', assume it // contains labels. bool isCommentLine = true; const char* ptr = linebuffer; while (isCommentLine) { // Skip past any whitespace while ( *ptr != '\0' && isspace(*ptr) ) ++ptr; // Assume these are column labels until proven otherwise. if (*ptr == '#') { labels.SetList(ptr+1, SEPARATORS ); if (!labels.empty()) { hasLabels = true; // If first label is Frame assume it is the index column if (labels[0] == "Frame" && indexcol_ == -1) indexcol_ = 0; } linebuffer = buffer.Line(); ptr = linebuffer; if (ptr == 0) { mprinterr("Error: No data found in file.\n"); return 1; } } else // Not a recognized comment character, assume data. isCommentLine = false; } // Special case: check if labels are '#F1 F2 <name> [nframes <#>]'. If so, assume // this is a cluster matrix file. if ((labels.Nargs() == 3 || labels.Nargs() == 5) && labels[0] == "F1" && labels[1] == "F2") { mprintf("Warning: Header format '#F1 F2 <name>' detected, assuming cluster pairwise matrix.\n"); return IS_ASCII_CMATRIX; } // Column user args start from 1 if (indexcol_ > -1) mprintf("\tUsing column %i as index column.\n", indexcol_ + 1); // Should be at first data line. Tokenize the line. ntoken = buffer.TokenizeLine( SEPARATORS ); // If # of data columns does not match # labels, clear labels. if ( !labels.empty() && ntoken != labels.Nargs() ) { labels.ClearList(); hasLabels = false; } // Index column checks if (indexcol_ != -1 ) { if (indexcol_ >= ntoken) { mprinterr("Error: Specified index column %i is out of range (%i columns).\n", indexcol_+1, ntoken); return 1; } if (!onlycols_.Empty() && !onlycols_.InRange(indexcol_)) { mprinterr("Error: Index column %i specified, but not in given column range '%s'\n", indexcol_+1, onlycols_.RangeArg()); return 1; } } // Determine the type of data stored in each column. Assume numbers should // be read with double precision. MetaData md( dsname ); DataSetList::DataListType inputSets; unsigned int nsets = 0; for (int col = 0; col != ntoken; ++col) { std::string token( buffer.NextToken() ); if (!onlycols_.Empty() && !onlycols_.InRange( col )) { mprintf("\tSkipping column %i\n", col+1); inputSets.push_back( 0 ); } else { md.SetIdx( col+1 ); if (hasLabels) md.SetLegend( labels[col] ); if ( col == indexcol_ ) { // Always save the index column as floating point inputSets.push_back( new DataSet_double() ); } else if (validInteger(token)) { // Integer number inputSets.push_back( datasetlist.Allocate(DataSet::INTEGER) ); } else if (validDouble(token)) { // Floating point number inputSets.push_back( new DataSet_double() ); } else { // Assume string. Not allowed for index column. if (col == indexcol_) { mprintf("Warning: '%s' index column %i has string values. No indices will be read.\n", buffer.Filename().full(), indexcol_+1); indexcol_ = -1; } inputSets.push_back( new DataSet_string() ); } inputSets.back()->SetMeta( md ); nsets++; } } if (inputSets.empty() || nsets == 0) { mprinterr("Error: No data detected.\n"); return 1; } // Read in data while (linebuffer != 0) { if ( buffer.TokenizeLine( SEPARATORS ) != ntoken ) { PrintColumnError(buffer.LineNumber()); break; } // Convert data in columns for (int i = 0; i < ntoken; ++i) { const char* token = buffer.NextToken(); if (inputSets[i] != 0) { if (inputSets[i]->Type() == DataSet::DOUBLE) ((DataSet_double*)inputSets[i])->AddElement( atof(token) ); else if (inputSets[i]->Type() == DataSet::INTEGER) ((DataSet_integer*)inputSets[i])->AddElement( atoi(token) ); else ((DataSet_string*)inputSets[i])->AddElement( std::string(token) ); } } //Ndata++; linebuffer = buffer.Line(); } buffer.CloseFile(); mprintf("\tDataFile %s has %i columns, %i lines.\n", buffer.Filename().full(), ntoken, buffer.LineNumber()); // Create list containing only data sets. DataSetList::DataListType mySets; DataSet_double* Xptr = 0; for (int idx = 0; idx != (int)inputSets.size(); idx++) { if (inputSets[idx] != 0) { if ( idx != indexcol_ ) mySets.push_back( inputSets[idx] ); else Xptr = (DataSet_double*)inputSets[idx]; } } mprintf("\tRead %zu data sets.\n", mySets.size()); std::string Xlabel; if (indexcol_ != -1 && indexcol_ < labels.Nargs()) Xlabel = labels[indexcol_]; if (Xptr == 0) datasetlist.AddOrAppendSets(Xlabel, DataSetList::Darray(), mySets); else { datasetlist.AddOrAppendSets(Xlabel, Xptr->Data(), mySets); delete Xptr; } return 0; }