Example #1
0
/** Add column label from loop section. */
int CIFfile::DataBlock::AddLoopColumn( const char* ptr, BufferedLine& infile ) {
  if (ptr == 0) return 1;
  // Expect header.id
  int Ncols = infile.TokenizeLine(" \t");
  if ( Ncols > 1 ) {
    mprinterr("Error: Data record expected to have ID only.\n"
              "Error: '%s'\n", ptr);
    return 1;
  }
  std::string ID, Header;
  if (ParseData( std::string(infile.NextToken()), Header, ID )) return 1;
  //mprintf("\n"); // DEBUG
  if (AddHeader( Header )) return 1;
  columnHeaders_.push_back( ID );

  return 0;
}  
Example #2
0
/** Read Xplor-style restraint file. */
int Action_NMRrst::ReadXplor( BufferedLine& infile ) {
  noeDataType NOE;
  const char* ptr = infile.Line();
  if (ptr == 0) {
    mprinterr("Error: Unexpected end of XPLOR restraint file.\n");
    return 1;
  }
  while ( ptr != 0 ) {
    if (ptr[0] == 'a' && ptr[1] == 's' && ptr[2] == 's' &&
        ptr[3] == 'i' && ptr[4] == 'g' && ptr[5] == 'n'   )
    {
      // 'assign' statement
      ArgList line(ptr, " ()");
      if (line.empty()) {
        mprinterr("Error: Could not parse XPLOR 'assign' line:\n\t%s",ptr);
      } else {
        line.MarkArg(0); // Mark 'assign'
        // Get 2 Masks
        NOE.resNum1_ = GetAssignSelection( NOE.aName1_, line, resOffset_ );
        NOE.resNum2_ = GetAssignSelection( NOE.aName2_, line, resOffset_ );
        if (NOE.resNum1_ < 1 || NOE.resNum2_ < 1) {
          mprinterr("Error: Could not get masks from line:\n\t%s", ptr);
          mprinterr("Error: Check if residue number + offset is out of bounds.\n");
        } else {
          // Check for noe bounds
          NOE.rexp_ = line.getNextDouble(-1.0);
          if ( NOE.rexp_ < 0.0 ) {
            // No more on this line, assume jcoupling
            ptr = infile.Line();
            line.SetList(ptr, " ()");
            // Get 2 more masks and jcoupling values 
          } else {
            // NOE
            NOE.boundh_ = NOE.rexp_ + line.getNextDouble(0.0);
            NOE.bound_ = NOE.rexp_ - line.getNextDouble(0.0);
            NOE.dist_ = 0;
            NOEs_.push_back( NOE );
          }
        }
      }
    }
    ptr = infile.Line();
  }
  return 0;
}
Example #3
0
// -----------------------------------------------------------------------------
int Action_NMRrst::ReadNmrRestraints( std::string const& rstfilename )
{
  BufferedLine infile;
  if (infile.OpenFileRead( rstfilename )) return 1;
  // Try to determine what kind of file.
  const char* ptr = infile.Line();
  // Try to skip past any blank lines and comments
  while ( SkipChar( ptr ) )
    ptr = infile.Line();
  if (ptr == 0) {
    mprinterr("Error: Unexpected end of restraint file.\n");
    return 1; 
  }
  std::string inputLine( ptr );
  infile.CloseFile();
  // Re-open file
  if (infile.OpenFileRead( rstfilename )) return 1;
  int err = 0;
  if ( inputLine.compare(0, 7, "*HEADER")==0 ||
       inputLine.compare(0, 6, "*TITLE")==0 ||
       inputLine.compare(0, 6, "assign")==0 )
    // XPLOR
    err = ReadXplor( infile );
  else
    // Assume DIANA/Amber
    err = ReadAmber( infile );
  infile.CloseFile();
  if (err != 0) {
    mprinterr("Error: Could not parse restraint file.\n");
    return 1;
  }
  return 0;
}
Example #4
0
/** Read Amber/DIANA style restraints. */
int Action_NMRrst::ReadAmber( BufferedLine& infile ) {
  noeDataType NOE;
  const char* ptr = infile.Line();
  if (ptr == 0) {
    mprinterr("Error: Unexpected end of Amber restraint file.\n");
    return 1;
  }
  char rname1[16], rname2[16], aname1[16], aname2[16];
  double l_bound, u_bound;
  while (ptr != 0) {
    if (!SkipChar(ptr)) {
      int cols = sscanf( ptr, "%d %s %s %d %s %s %lf %lf",
                         &NOE.resNum1_, rname1, aname1,
                         &NOE.resNum2_, rname2, aname2,
                         &l_bound, &u_bound );
      if (cols == 7) { // 7-column, upper-bound only
        NOE.bound_ = 0.0;
        NOE.boundh_ = l_bound;
      } else if (cols == 8) { // 8-column, lower/upper bounds
        NOE.bound_ = l_bound;
        NOE.boundh_ = u_bound;
      } else {
        mprinterr("Error: Expected only 7 or 8 columns in Amber restraint file, got %i.\n", cols);
        return 1;
      }
      NOE.rexp_ = -1.0;
      NOE.dist_ = 0;
      NOE.resNum1_ += resOffset_;
      NOE.resNum2_ += resOffset_;
      if (NOE.resNum1_ < 1 || NOE.resNum2_ < 1) {
        mprinterr("Error: One or both residue numbers are out of bounds (%i, %i)\n"
                  "Error: Line: %s", NOE.resNum1_, NOE.resNum2_, ptr);
      } else {
        NOE.aName1_.assign( aname1 );
        NOE.aName2_.assign( aname2 );
        NOEs_.push_back( NOE );
      }
    }
    ptr = infile.Line();
  }
  return 0;
}
Example #5
0
// DataIO_Std::Read_Mat3x3()
int DataIO_Std::Read_Mat3x3(std::string const& fname, 
                            DataSetList& datasetlist, std::string const& dsname)
{
  // Buffer file
  BufferedLine buffer;
  if (buffer.OpenFileRead( fname )) return 1;
  mprintf("\tAttempting to read 3x3 matrix data.\n");
  // Skip comments
  const char* linebuffer = buffer.Line();
  while (linebuffer != 0 && linebuffer[0] == '#')
    linebuffer = buffer.Line();
  // Check that number of columns (9) is correct.
  int ntokens = buffer.TokenizeLine( SEPARATORS );
  if (ntokens < 1) {
    mprinterr("Error: Could not tokenize line.\n");
    return 1;
  }
  int ncols = ntokens;
  bool hasIndex;
  if (ncols == 9)
    hasIndex = false;
  else if (ncols == 10) {
    hasIndex = true;
    mprintf("Warning: Not reading 3x3 matrix data indices.\n");
  } else {
    mprinterr("Error: Expected 9 columns of 3x3 matrix data, got %i.\n", ncols);
    return 1;
  }
  // Create data set
  DataSet_Mat3x3* ds = new DataSet_Mat3x3();
  if (ds == 0) return 1;
  ds->SetMeta( dsname );
  // Read 3x3 matrix data
  double mat[9];
  std::fill(mat, mat, 0.0);
  size_t ndata = 0;
  while (linebuffer != 0) {
    if (hasIndex)
      ntokens = sscanf(linebuffer, "%*f %lf %lf %lf %lf %lf %lf %lf %lf %lf",
                       mat, mat+1, mat+2, mat+3, mat+4, mat+5, mat+6, mat+7, mat+8);
    else
      ntokens = sscanf(linebuffer, "%lf %lf %lf %lf %lf %lf %lf %lf %lf",
                       mat, mat+1, mat+2, mat+3, mat+4, mat+5, mat+6, mat+7, mat+8);
    if (ntokens != 9) {
      mprinterr("Error: In 3x3 matrix file, line %i: expected 9 values, got %i\n",
                buffer.LineNumber(), ntokens);
      break;
    }
    ds->Add( ndata++, mat ); 
    linebuffer = buffer.Line();
  }
  return (datasetlist.AddOrAppendSets("", DataSetList::Darray(), DataSetList::DataListType(1, ds)));
}
Example #6
0
// DataIO_Std::Read_2D()
int DataIO_Std::Read_2D(std::string const& fname, 
                        DataSetList& datasetlist, std::string const& dsname)
{
  // Buffer file
  BufferedLine buffer;
  if (buffer.OpenFileRead( fname )) return 1;
  mprintf("\tData will be read as a 2D square matrix.\n");
  // Skip comments
  const char* linebuffer = buffer.Line();
  while (linebuffer != 0 && linebuffer[0] == '#')
    linebuffer = buffer.Line();
  int ncols = -1;
  int nrows = 0;
  std::vector<double> matrixArray;
  while (linebuffer != 0) {
    int ntokens = buffer.TokenizeLine( SEPARATORS );
    if (ncols < 0) {
      ncols = ntokens;
      if (ntokens < 1) {
        mprinterr("Error: Could not tokenize line.\n");
        return 1;
      }
    } else if (ncols != ntokens) {
      mprinterr("Error: In 2D file, number of columns changes from %i to %i at line %i\n",
                ncols, ntokens, buffer.LineNumber());
      return 1;
    }
    for (int i = 0; i < ntokens; i++)
      matrixArray.push_back( atof( buffer.NextToken() ) );
    nrows++;
    linebuffer = buffer.Line();
  }
  if (ncols < 0) {
    mprinterr("Error: No data detected in %s\n", buffer.Filename().full());
    return 1;
  }
  if ( DetermineMatrixType( matrixArray, nrows, ncols, datasetlist, dsname )==0 ) return 1;

  return 0;
}
Example #7
0
int Cluster_ReadInfo::Cluster() {
  BufferedLine infile;
  if (infile.OpenFileRead( filename_ )) return Err(0);
  const char* ptr = infile.Line();
  if (ptr == 0) return Err(1);
  ArgList infoLine( ptr, " " );
  int nclusters = infoLine.getKeyInt("#Clustering:", -1);
  if (nclusters == -1) return Err(2);
  int nframes = infoLine.getKeyInt("clusters", -1);
  if (nframes == -1) return Err(3);
  if (nframes != (int)FrameDistances_.Nframes()) {
    mprinterr("Error: # frames in cluster info file (%i) does not match"
              " current # frames (%zu)\n", nframes, FrameDistances_.Nframes());
    return 1;
  }
  // Scan down to clusters
  while (ptr[0] == '#') {
    ptr = infile.Line();
    if (ptr == 0) return Err(1);
    // Save previous clustering info. Includes newline.
    if (ptr[1] == 'A' && ptr[2] == 'l' && ptr[3] == 'g')
      algorithm_.assign( ptr + 12 ); // Right past '#Algorithm: '
  }
  // Read clusters
  ClusterDist::Cframes frames;
  for (int cnum = 0; cnum != nclusters; cnum++) {
    if (ptr == 0) return Err(1);
    frames.clear();
    // TODO: Check for busted lines?
    for (int fidx = 0; fidx != nframes; fidx++) {
      if (ptr[fidx] == 'X')
        frames.push_back( fidx );
    }
    AddCluster( frames );
    mprintf("\tRead cluster %i, %zu frames.\n", cnum, frames.size());
    ptr = infile.Line();
  }
  infile.CloseFile();
  mprintf("\tCalculating the distances between each cluster based on centroids.\n");
  CalcClusterDistances();
  return 0;
}
Example #8
0
/** Split given line into a certain number of tokens. Data might be
  * split across multiple lines.
  * \param NexpectedCols Number of expected data cols.
  * \param infile File being read.
  * \param isSerial true if inside a serial data block.
  */
int CIFfile::DataBlock::GetColumnData(int NexpectedCols, BufferedLine& infile, bool isSerial)
{
  const char* SEP = " \t";
  // Allocate for a line of data
  columnData_.push_back( Sarray() );
  // Tokenize the initial line
  int nReadCols = 0;
  int Ncols = infile.TokenizeLine(SEP);
  int idx = 0;
  bool insideQuote = false;
  bool insideSemi = false;
  while (nReadCols < NexpectedCols) {
    // Load up the next line if needed
    if (idx == Ncols) {
      if (infile.Line() == 0) break;
      Ncols = infile.TokenizeLine(SEP);
      idx = 0;
    }
    const char *tkn = infile.NextToken();
    // Skip blanks
    if (tkn == 0) continue;
    idx++;
    //mprintf("DEBUG: Token %i '%s'\n", idx, tkn);
    if (isSerial && nReadCols == 0) {
      // First column for serial data is header.id
      std::string ID, Header;
      if (ParseData( std::string(tkn), Header, ID )) return 1;
      //mprintf("  Ndata=%i  Data=%s\n", serialData.Nargs(), serialData[1].c_str());
      if (AddHeader( Header )) return 1;
      columnHeaders_.push_back( ID );
      nReadCols++;
    } else if (insideQuote) {
      // Append this to the current data column.
      columnData_.back().back().append( " " + std::string(tkn) );
      // Check for an end quote.
      if (HasEndQuote( columnData_.back().back() )) {
        // Remove that end quote.
        columnData_.back().back() = RemoveEndQuote( columnData_.back().back() );
        insideQuote = false;
        nReadCols++;
      }
    } else if (insideSemi) {
      // End if line begins with semicolon, otherwise append.
      if (tkn[0] == ';') {
        insideSemi = false;
        nReadCols++;
      } else
        columnData_.back().back().append( std::string(tkn) );
    } else {
      // Add new data column
      if (idx == 1 && tkn[0] == ';') {
        // Semicolon indicates more lines to be read.
        columnData_.back().push_back( std::string(tkn+1) );
        insideSemi = true;
      } else {
        columnData_.back().push_back( std::string(tkn) );
        // Check if column began and did not end with a quote.
        if (IsQuoteChar(columnData_.back().back()[0])) {
          // Remove leading quote.
          std::string tmps = columnData_.back().back().substr(1);
          columnData_.back().back() = tmps;
          if ( !HasEndQuote((columnData_.back().back())) ) {
            // Still need to look for the end quote.
            insideQuote = true;
          } else {
            // We have the end quote. Remove it.
            columnData_.back().back() = RemoveEndQuote( columnData_.back().back() );
          }
        }
      }
      if (!insideSemi && !insideQuote) nReadCols++;
    }
  }
  if (nReadCols != NexpectedCols) {
    mprinterr("Error: Line %i: '%s': Read %i columns, expected %i\n",
              infile.LineNumber(), dataHeader_.c_str(), nReadCols, NexpectedCols);
    return 1;
  }
  return 0;
}
Example #9
0
int SequenceAlign(CpptrajState& State, ArgList& argIn) {
  std::string blastfile = argIn.GetStringKey("blastfile");
  if (blastfile.empty()) {
    mprinterr("Error: 'blastfile' must be specified.\n");
    return 1;
  }
  ReferenceFrame qref = State.DSL()->GetReferenceFrame(argIn);
  if (qref.error() || qref.empty()) {
    mprinterr("Error: Must specify reference structure for query.\n");
    return 1;
  }
  std::string outfilename = argIn.GetStringKey("out");
  if (outfilename.empty()) {
    mprinterr("Error: Must specify output file.\n");
    return 1;
  }
  TrajectoryFile::TrajFormatType fmt = TrajectoryFile::GetFormatFromArg(argIn);
  if (fmt != TrajectoryFile::PDBFILE && fmt != TrajectoryFile::MOL2FILE)
    fmt = TrajectoryFile::PDBFILE; // Default to PDB
  int smaskoffset = argIn.getKeyInt("smaskoffset", 0) + 1;
  int qmaskoffset = argIn.getKeyInt("qmaskoffset", 0) + 1;

  // Load blast file
  mprintf("\tReading BLAST alignment from '%s'\n", blastfile.c_str());
  BufferedLine infile;
  if (infile.OpenFileRead( blastfile )) return 1;
  // Seek down to first Query line.
  const char* ptr = infile.Line();
  bool atFirstQuery = false;
  while (ptr != 0) {
    if (*ptr == 'Q') {
      if ( strncmp(ptr, "Query", 5) == 0 ) {
        atFirstQuery = true;
        break;
      }
    }
    ptr = infile.Line();
  }
  if (!atFirstQuery) {
    mprinterr("Error: 'Query' not found.\n");
    return 1;
  }

  // Read alignment. Replacing query with subject.
  typedef std::vector<char> Carray;
  typedef std::vector<int> Iarray;
  Carray Query; // Query residues
  Carray Sbjct; // Sbjct residues
  Iarray Smap;  // Smap[Sbjct index] = Query index
  while (ptr != 0) {
    const char* qline = ptr;           // query line
    const char* aline = infile.Line(); // alignment line
    const char* sline = infile.Line(); // subject line
    if (aline == 0 || sline == 0) {
      mprinterr("Error: Missing alignment line or subject line after Query:\n");
      mprinterr("Error:  %s", qline);
      return 1;
    }
    for (int idx = 12; qline[idx] != ' '; idx++) {
      if (qline[idx] == '-') {
        // Sbjct does not have corresponding res in Query
        Smap.push_back(-1);
        Sbjct.push_back( sline[idx] );
      } else if (sline[idx] == '-') {
        // Query does not have a corresponding res in Sbjct
        Query.push_back( qline[idx] );
      } else {
        // Direct Query to Sbjct map
        Smap.push_back( Query.size() );
        Sbjct.push_back( sline[idx] );
        Query.push_back( qline[idx] );
      }
    }
    // Scan to next Query 
    ptr = infile.Line();
    while (ptr != 0) {
      if (*ptr == 'Q') {
        if ( strncmp(ptr, "Query", 5) == 0 ) break;
      }
      ptr = infile.Line();
    }
  }
  // DEBUG
  std::string SmaskExp, QmaskExp;
  if (State.Debug() > 0) mprintf("  Map of Sbjct to Query:\n");
  for (int sres = 0; sres != (int)Sbjct.size(); sres++) {
    if (State.Debug() > 0)
      mprintf("%-i %3s %i", sres+smaskoffset, Residue::ConvertResName(Sbjct[sres]),
              Smap[sres]+qmaskoffset);
    const char* qres = "";
    if (Smap[sres] != -1) {
      qres = Residue::ConvertResName(Query[Smap[sres]]);
      if (SmaskExp.empty())
        SmaskExp.assign( integerToString(sres+smaskoffset) );
      else
        SmaskExp.append( "," + integerToString(sres+smaskoffset) );
      if (QmaskExp.empty())
        QmaskExp.assign( integerToString(Smap[sres]+qmaskoffset) );
      else
        QmaskExp.append( "," + integerToString(Smap[sres]+qmaskoffset) );

    }
    if (State.Debug() > 0) mprintf(" %3s\n", qres);
  }
  mprintf("Smask: %s\n", SmaskExp.c_str());
  mprintf("Qmask: %s\n", QmaskExp.c_str());
  // Check that query residues match reference.
  for (unsigned int sres = 0; sres != Sbjct.size(); sres++) {
    int qres = Smap[sres];
    if (qres != -1) {
      if (Query[qres] != qref.Parm().Res(qres).SingleCharName()) {
        mprintf("Warning: Potential residue mismatch: Query %s reference %s\n",
                Residue::ConvertResName(Query[qres]), qref.Parm().Res(qres).c_str());
      }
    }
  }
  // Build subject using coordinate from reference.
  //AtomMask sMask; // Contain atoms that should be in sTop
  Topology sTop;
  Frame sFrame;
  Iarray placeHolder; // Atom indices of placeholder residues.
  for (unsigned int sres = 0; sres != Sbjct.size(); sres++) {
    int qres = Smap[sres];
    NameType SresName( Residue::ConvertResName(Sbjct[sres]) );
    if (qres != -1) {
      Residue const& QR = qref.Parm().Res(qres);
      Residue SR(SresName, sres+1, ' ', QR.ChainID());
      if (Query[qres] == Sbjct[sres]) { // Exact match. All non-H atoms.
        for (int qat = QR.FirstAtom(); qat != QR.LastAtom(); qat++)
        {
          if (qref.Parm()[qat].Element() != Atom::HYDROGEN)
            sTop.AddTopAtom( qref.Parm()[qat], SR );
            sFrame.AddXYZ( qref.Coord().XYZ(qat) );
            //sMask.AddAtom(qat);
        }
      } else { // Partial match. Copy only backbone and CB.
        for (int qat = QR.FirstAtom(); qat != QR.LastAtom(); qat++)
        {
          if ( qref.Parm()[qat].Name().Match("N" ) ||
               qref.Parm()[qat].Name().Match("CA") ||
               qref.Parm()[qat].Name().Match("CB") ||
               qref.Parm()[qat].Name().Match("C" ) ||
               qref.Parm()[qat].Name().Match("O" ) )
          {
            sTop.AddTopAtom( qref.Parm()[qat], SR );
            sFrame.AddXYZ( qref.Coord().XYZ(qat) );
          }
        }
      }
    } else {
      // Residue in query does not exist for subject. Just put placeholder CA for now.
      Vec3 Zero(0.0);
      placeHolder.push_back( sTop.Natom() );
      sTop.AddTopAtom( Atom("CA", "C "), Residue(SresName, sres+1, ' ', ' ') );
      sFrame.AddXYZ( Zero.Dptr() );
    }
  }
  //sTop.PrintAtomInfo("*");
  mprintf("\tPlaceholder residue indices:");
  for (Iarray::const_iterator p = placeHolder.begin(); p != placeHolder.end(); ++p)
    mprintf(" %i", *p + 1);
  mprintf("\n");
  // Try to give placeholders more reasonable coordinates.
  if (!placeHolder.empty()) {
    Iarray current_indices;
    unsigned int pidx = 0;
    while (pidx < placeHolder.size()) {
      if (current_indices.empty()) {
        current_indices.push_back( placeHolder[pidx++] );
        // Search for the end of this segment
        for (; pidx != placeHolder.size(); pidx++) {
          if (placeHolder[pidx] - current_indices.back() > 1) break;
          current_indices.push_back( placeHolder[pidx] );
        }
        // DEBUG
        mprintf("\tSegment:");
        for (Iarray::const_iterator it = current_indices.begin();
                                    it != current_indices.end(); ++it)
          mprintf(" %i", *it + 1);
        // Get coordinates of residues bordering segment.
        int prev_res = sTop[current_indices.front()].ResNum() - 1;
        int next_res = sTop[current_indices.back() ].ResNum() + 1;
        mprintf(" (prev_res=%i, next_res=%i)\n", prev_res+1, next_res+1);
        Vec3 prev_crd(sFrame.XYZ(current_indices.front() - 1));
        Vec3 next_crd(sFrame.XYZ(current_indices.back()  + 1));
        prev_crd.Print("prev_crd");
        next_crd.Print("next_crd");
        Vec3 crd_step = (next_crd - prev_crd) / (double)(current_indices.size()+1);
        crd_step.Print("crd_step");
        double* xyz = sFrame.xAddress() + (current_indices.front() * 3);
        for (unsigned int i = 0; i != current_indices.size(); i++, xyz += 3) {
          prev_crd += crd_step;
          xyz[0] = prev_crd[0];
          xyz[1] = prev_crd[1];
          xyz[2] = prev_crd[2];
        }
        current_indices.clear();
      }
    }
  }
  //Topology* sTop = qref.Parm().partialModifyStateByMask( sMask );
  //if (sTop == 0) return 1;
  //Frame sFrame(qref.Coord(), sMask);
  // Write output traj
  Trajout_Single trajout;
  if (trajout.PrepareTrajWrite(outfilename, argIn, &sTop, CoordinateInfo(), 1, fmt)) return 1;
  if (trajout.WriteSingle(0, sFrame)) return 1;
  trajout.EndTraj();
  return 0;
}
Example #10
0
// DataIO_Mdout::ReadData()
int DataIO_Mdout::ReadData(FileName const& fname,
                            DataSetList& datasetlist, std::string const& dsname)
{
  mprintf("\tReading from mdout file: %s\n", fname.full());
  BufferedLine buffer;
  if (buffer.OpenFileRead( fname )) return 1;
  const char* ptr = buffer.Line();
  if (ptr == 0) {
    mprinterr("Error: Nothing in MDOUT file: %s\n", fname.full());
    return 1;
  }
  // ----- PARSE THE INPUT SECTION ----- 
  int imin = -1;           // imin for this file
  const char* Trigger = 0; // Trigger for storing energies, must be 8 chars long.
  int frame = 0;           // Frame counter for this file
  double dt = 1.0;         // Timestep for this file (MD)
  double t0 = 0.0;         // Initial time for this file (MD)
  int ntpr = 1;            // Value of ntpr
  int irest = 0;           // Value of irest
  while ( ptr != 0 && strncmp(ptr, "   2.  CONTROL  DATA", 20) != 0 )
    ptr = buffer.Line();
  if (ptr == 0) return EOF_ERROR();
  // Determine whether this is dynamics or minimization, get dt
  ptr = buffer.Line(); // Dashes 
  ptr = buffer.Line(); // Blank 
  ptr = buffer.Line(); // title line
  while ( strncmp(ptr, "   3.  ATOMIC", 13) != 0 ) 
  {
    ArgList mdin_args( ptr, " ,=" ); // Remove commas, equal signs
    // Scan for stuff we want
    //mprintf("DEBUG:\tInput[%i] %s", mdin_args.Nargs(), mdin_args.ArgLine());
    for (int col=0; col < mdin_args.Nargs(); col += 2) {
      int col1 = col + 1;
      if (mdin_args[col] == "imin") {
        imin = convertToInteger( mdin_args[ col1 ] );
        if (debug_ > 0) mprintf("\t\tMDIN: imin is %i\n", imin);
        // Set a trigger for printing. For imin5 this is the word minimization.
        // For imin0 or imin1 this is NSTEP.
        if      (imin==0) Trigger = " NSTEP =";
        else if (imin==1) Trigger = "   NSTEP";
        else if (imin==5) Trigger = "minimiza";
        // Since imin0 and imin1 first trigger has no data, set frame 1 lower.
        if (imin==1 || imin==0) frame = -1;
      } else if (mdin_args[col] == "dt") {
        dt = convertToDouble( mdin_args[ col1 ] );
        if (debug_ > 0) mprintf("\t\tMDIN: dt is %f\n", dt);
      } else if (mdin_args[col] == "t") {
        t0 = convertToDouble( mdin_args[ col1 ] );
        if (debug_ > 0) mprintf("\t\tMDIN: t is %f\n", t0);
      } else if (mdin_args[col] == "ntpr") {
        ntpr = convertToInteger( mdin_args[ col1 ] );
        if (debug_ > 0) mprintf("\t\tMDIN: ntpr is %i\n", ntpr);
      } else if (mdin_args[col] == "irest") {
        irest = convertToInteger( mdin_args[ col1 ] );
        if (debug_ > 0) mprintf("\t\tMDIN: irest is %i\n", irest);
      }
    }
    ptr = buffer.Line();
    if (ptr == 0) return EOF_ERROR();
  }
  if (Trigger == 0) {
    mprinterr("Error: Could not determine whether MDOUT is md, min, or post-process.\n");
    return 1;
  }
  // ----- PARSE THE ATOMIC ... SECTION -----
  while ( ptr != 0 && strncmp(ptr, "   4.  RESULTS", 14) != 0 )
  {
    ptr = buffer.Line();
    // If run is a restart, set the initial time value.
    if (irest == 1) {
      if (strncmp(ptr, " begin time", 11) == 0) {
        sscanf(ptr, " begin time read from input coords = %lf", &t0);
        if (debug_ > 0) mprintf("\t\tMD restart initial time= %f\n", t0);
      }
    }
  }
  if (ptr == 0) return EOF_ERROR();
  // ----- PARSE THE RESULTS SECTION -----
  bool finalE = false;
  int nstep;
  int minStep = 0; // For imin=1 only
  if (irest == 0)
    nstep = 0;
  else
    nstep = ntpr;
  double Energy[N_FIELDTYPES];
  std::fill( Energy, Energy+N_FIELDTYPES, 0.0 );
  std::vector<bool> EnergyExists(N_FIELDTYPES, false);
  DataSetList::Darray TimeVals;
  DataSetList::DataListType inputSets(N_FIELDTYPES, 0);
  Sarray Name(2);
  double time = 0.0;
  while (ptr != 0) {
    // Check for end of imin 0 or 1 run; do not record Average and Stdevs
    if ( (imin == 1 && (strncmp(ptr, "                    FINAL", 25) == 0 ||
                        strncmp(ptr, "   5.  TIMINGS",            14) == 0   )) ||
         (imin == 0 && strncmp(ptr, "      A V", 9) == 0))
      finalE = true;
    // Check for '| TI region  2' to prevent reading duplicate energies
    if ( strncmp(ptr, "| TI region  2", 14) == 0 ) {
      while (ptr != 0 && !(ptr[0] == ' ' && ptr[1] == '-'))
        ptr = buffer.Line();
      if (ptr == 0) return EOF_ERROR();
    }
    // Record set for energy post-processing
    if (imin == 5 && strncmp(ptr, "minimizing", 10) == 0)
      nstep = atoi( ptr + 22 );
    // MAIN OUTPUT ROUTINE
    // If the trigger has been reached print output.
    // For imin0 and imin1 the first trigger will have no data.
    // If the end of the file has been reached print then exit.
    if ( strncmp(ptr, Trigger, 8) == 0 || finalE ) {
      if (frame > -1) {
        // Store all energies present.
        for (int i = 0; i < (int)N_FIELDTYPES; i++) {
          if (EnergyExists[i]) {
            if (inputSets[i] == 0) {
              MetaData md( dsname, Enames[i] );
              md.SetLegend( dsname + "_" + Enames[i] );
              inputSets[i] = new DataSet_double();
              inputSets[i]->SetMeta( md );
            }
            // Since energy terms can appear and vanish over the course of the
            // mdout file, resize if necessary.
            if (frame > (int)inputSets[i]->Size())
              ((DataSet_double*)inputSets[i])->Resize( frame );
            ((DataSet_double*)inputSets[i])->AddElement( Energy[i] );
          }
        }
        TimeVals.push_back( time );
        nstep += ntpr;
      }
      frame++;
      if (finalE) break;
    }
    // Check for NSTEP in minimization or post-processing. Values will be
    // on the next line. NOTE: NSTEP means something different for imin=5.
    if ((imin == 1 || imin == 5) && strncmp(ptr, "   NSTEP", 8) == 0) {
      ptr = buffer.Line(); // Get next line
      //sscanf(ptr, " %6lf    %13lE  %13lE  %13lE", Energy+NSTEP, Energy+EPtot, Energy+RMS, Energy+GMAX);
      sscanf(ptr, " %i %lE %lE %lE", &minStep, Energy+EPtot, Energy+RMS, Energy+GMAX);
      EnergyExists[EPtot] = true;
      EnergyExists[RMS] = true;
      EnergyExists[GMAX] = true;
      ptr = buffer.Line();
    }
    // Tokenize line, scan through until '=' is reached; value after is target.
    int ntokens = buffer.TokenizeLine(" ");
    if (ntokens > 0) {
      int nidx = 0;
      Name[0].clear();
      Name[1].clear();
      for (int tidx = 0; tidx < ntokens; tidx++) {
        const char* tkn = buffer.NextToken();
        if (tkn[0] == '=') {
          FieldType Eindex = getEindex(Name);
          tkn = buffer.NextToken();
          ++tidx;
          if (tkn == 0)
            mprintf("Warning: No numerical value, line %i column %i. Skipping.\n",
                    buffer.LineNumber(), tidx+1);
          else if (tkn[0] == '*' || tkn[0] == 'N') // Assume if number begins with N it is NaN
            mprintf("Warning: Numerical overflow detected, line %i column %i. Skipping.\n",
                     buffer.LineNumber(), tidx+1);
          else {
            if (Eindex != N_FIELDTYPES) {
              Energy[Eindex] = atof( tkn );
              EnergyExists[Eindex] = true;
            }
          }
          nidx = 0;
          Name[0].clear();
          Name[1].clear();
        } else {
          if (nidx > 1) break; // Two tokens, no '=' found. Not an E line.
          Name[nidx++].assign( tkn );
        }
      }
    }
    // Set time
    switch (imin) {
      case 5: time = (double)nstep + t0; break;
      case 1: time = (double)minStep + t0; break;
      case 0: time = ((double)nstep * dt) + t0; break;
    }
    // Read in next line
    ptr = buffer.Line();
  }
  mprintf("\t%i frames\n", frame);
  buffer.CloseFile();
  std::string Xlabel;
  if      (imin == 5) Xlabel.assign("Set");
  else if (imin == 1) Xlabel.assign("Nstep");
  else                Xlabel.assign("Time"); // imin == 0
  if (datasetlist.AddOrAppendSets( Xlabel, TimeVals, inputSets )) return 1;
  return 0;
}
Example #11
0
int DataIO_XVG::ReadData(FileName const& fname, 
                         DataSetList& datasetlist, std::string const& dsname)
{
  std::vector<std::string> Legends;
  BufferedLine infile;

  if (infile.OpenFileRead( fname )) return 1;
  const char* ptr = infile.Line();
  if (ptr == 0) return 1;
  // Skip any comments
  while (ptr != 0 && ptr[0] == '#')
    ptr = infile.Line();
  // Try to get set legends
  while (ptr != 0 && ptr[0] == '@') {
    ArgList line(ptr, " \t");
    if (line.Nargs() > 3 && line[1][0] == 's') {
      std::string legend = line.GetStringKey("legend");
      if (!legend.empty()) {
        // Spaces will cause issues with data set selection.
        for (std::string::iterator s = legend.begin(); s != legend.end(); ++s)
          if (*s == ' ') *s = '_';
        Legends.push_back( legend );
      }
    }
    ptr = infile.Line();
  }
  if (Legends.empty()) {
    mprinterr("Error: No set legends found in XVG file.\n");
    return 1;
  }
  if (ptr == 0) {
    mprinterr("Error: No data in XVG file.\n");
    return 1;
  }
  // Create 1 data set for each legend
  DataSetList::DataListType inputSets;
  for (unsigned int i = 0; i != Legends.size(); i++) {
    MetaData md( dsname, i );
    md.SetLegend( Legends[i] );
    DataSet_double* ds = new DataSet_double();
    if (ds == 0) return 1;
    ds->SetMeta( md );
    inputSets.push_back( ds );
  }
  mprintf("\t%s has %zu columns of data.\n", fname.base(), inputSets.size());
  // Should now be positioned at first line of data. Assume first column is time values.
  DataSetList::Darray Xvals;
  int expectedCols = (int)inputSets.size() + 1;
  while (ptr != 0) {
    int ncols = infile.TokenizeLine(" \t");
    if (ncols != expectedCols)
      mprinterr("Error: Line %i: %i columns != expected # cols %i\n", infile.LineNumber(),
                ncols, expectedCols);
    else {
      Xvals.push_back( atof( infile.NextToken() ) );
      for (unsigned int i = 0; i != inputSets.size(); i++)
        ((DataSet_double*)inputSets[i])->AddElement( atof( infile.NextToken() ) );
    }
    ptr = infile.Line();
  }
  infile.CloseFile();
  return (datasetlist.AddOrAppendSets( "", Xvals, inputSets ));
}
Example #12
0
// DataIO_OpenDx::LoadGrid()
int DataIO_OpenDx::LoadGrid(const char* filename, DataSet& ds)
{
  // TODO: This may need to be changed if new 3D types introduced.
  DataSet_GridFlt& grid = static_cast<DataSet_GridFlt&>( ds );
  // Open file
  BufferedLine infile;
  if (infile.OpenFileRead(filename)) return 1;
  // Skip comments
  std::string line = infile.GetLine();
  while (!line.empty() && line[0] == '#') {
    mprintf("\t%s", line.c_str());
    line = infile.GetLine();
  }
  if (line.empty()) {
    mprinterr("Error: Unexpected EOF in DX file %s\n", filename);
    return 1;
  }
  // object 1 class gridpositions counts nx ny nz
  int nx, ny, nz;
  if (sscanf(line.c_str(), "object 1 class gridpositions counts %d %d %d",
             &nx, &ny, &nz) != 3)
  {
    mprinterr("Error: Reading grid counts from DX file %s\n", filename);
    return 1;
  }
  // origin xmin ymin zmin 
  double oxyz[3];
  line = infile.GetLine();
  if (sscanf(line.c_str(), "origin %lg %lg %lg", oxyz, oxyz+1, oxyz+2) != 3) {
    mprinterr("Error: Reading origin line from DX file %s\n", filename);
    return 1;
  }
  // 3x 'delta hx hy hz'
  double dxyz[3];
  Matrix_3x3 delta(0.0);
  bool isNonortho = false;
  int midx = 0;
  for (int i = 0; i < 3; i++, midx += 3) {
    line = infile.GetLine();
    if (sscanf(line.c_str(), "delta %lg %lg %lg", dxyz, dxyz+1, dxyz+2) != 3) {
      mprinterr("Error: Reading delta line from DX file %s\n", filename);
      return 1;
    }
    // Check that only 1 of the 3 values is non-zero. Otherwise non-ortho.
    if (dxyz[i] != (dxyz[0] + dxyz[1] + dxyz[2]))
      isNonortho = true;
    delta[midx  ] = dxyz[0];
    delta[midx+1] = dxyz[1];
    delta[midx+2] = dxyz[2];
  }
  // object 2 class gridconnections counts nx ny nz
  int nxyz[3];
  line = infile.GetLine();
  if (sscanf(line.c_str(), "object 2 class gridconnections counts %d %d %d",
             nxyz, nxyz+1, nxyz+2) != 3)
  {
    mprinterr("Error: Reading grid connections from DX file %s\n", filename);
    return 1;
  }
  // Sanity check for conflicting grid dimensions
  if (nxyz[0] != nx || nxyz[1] != ny || nxyz[2] != nz) {
    mprinterr("Error: Conflicting grid dimensions in input DX density file %s.\n",
              filename);
    mprinterr("Error: Grid positions: %d %d %d\n", nx, ny, nz);
    mprinterr("Error: Grid connections: %d %d %d\n", nxyz[0], nxyz[1], nxyz[2]);
    return 1;
  }
  // object 3 class array type <type> rank <r> times <i>
  // This line describes whether data will be in binary or ascii format.
  line = infile.GetLine();
  if (line.compare(0, 8, "object 3") != 0) {
    mprinterr("Error: DX file %s; expected 'object 3 ...', got [%s]\n",
              filename, line.c_str());
    return 1;
  }
  if (line.find("binary") != std::string::npos) {
    mprinterr("Error: DX file %s; binary DX files not yet supported.\n", filename);
    return 1;
  }
  // Allocate Grid from dims, origin, and spacing
  int err = 0;
  if (isNonortho) {
    // Create unit cell from delta and bins.
    delta[0] *= (double)nx; delta[1] *= (double)nx; delta[2] *= (double)nx;
    delta[3] *= (double)ny; delta[4] *= (double)ny; delta[5] *= (double)ny;
    delta[6] *= (double)nz; delta[7] *= (double)nz; delta[8] *= (double)nz;
    err = grid.Allocate_N_O_Box(nx,ny,nz, Vec3(oxyz), Box(delta));
  } else
    err = grid.Allocate_N_O_D(nx,ny,nz, Vec3(oxyz), Vec3(delta[0],delta[4],delta[8]));
  if (err != 0) { 
    mprinterr("Error: Could not allocate grid.\n");
    return 1;
  }
  grid.GridInfo();
  // Read in data
  size_t gridsize = grid.Size();
  mprintf("\tReading in %zu data elements from DX file.\n", gridsize); 
  size_t ndata = 0;
  ProgressBar progress( gridsize );
  while (ndata < gridsize) {
    if (infile.Line() == 0) {
      mprinterr("Error: Unexpected EOF hit in %s\n", filename);
      return 1;
    }
    int nTokens = infile.TokenizeLine(" \t");
    for (int j = 0; j < nTokens; j++) {
      if (ndata >= gridsize) {
        mprintf("Warning: Too many grid points found. Only reading %zu grid points.\n", gridsize);
        mprintf("Warning: Check that data region ends with a newline.\n");
        break;
      }
      grid[ndata++] = (float)atof(infile.NextToken());
    }
    progress.Update( ndata );
  }
  return 0;
}
Example #13
0
// DataIO_Std::Read_Vector()
int DataIO_Std::Read_Vector(std::string const& fname, 
                            DataSetList& datasetlist, std::string const& dsname)
{
  // Buffer file
  BufferedLine buffer;
  if (buffer.OpenFileRead( fname )) return 1;
  mprintf("\tAttempting to read vector data.\n");
  // Skip comments
  const char* linebuffer = buffer.Line();
  while (linebuffer != 0 && linebuffer[0] == '#')
    linebuffer = buffer.Line();
  // Determine format. Expect 3 (VXYZ), 6 (VXYZ OXYZ), or
  // 9 (VXYZ OXYZ VXYZ+OXYZ) values, optionally with indices.
  int ntokens = buffer.TokenizeLine( SEPARATORS );
  int ncols = ntokens; // Number of columns of vector data.
  int nv = 0;          // Number of columns to actually read from (3 or 6).
  bool hasIndex;
  if (ntokens < 1) {
    mprinterr("Error: Could not tokenize line.\n");
    return 1;
  }
  if (ncols == 3 || ncols == 6 || ncols == 9)
    hasIndex = false;
  else if (ncols == 4 || ncols == 7 || ncols == 10) {
    hasIndex = true;
    mprintf("Warning: Not reading vector data indices.\n");
  } else {
    mprinterr("Error: Expected 3, 6, or 9 columns of vector data, got %i.\n", ncols);
    return 1;
  }
  if (ncols >= 6) {
    nv = 6;
    mprintf("\tReading vector X Y Z and origin X Y Z values.\n");
  } else {
    nv = 3;
    mprintf("\tReading vector X Y Z values.\n");
  }
  // Create set
  DataSet_Vector* ds = new DataSet_Vector();
  if (ds == 0) return 1;
  ds->SetMeta( dsname );
  // Read vector data
  double vec[6];
  std::fill(vec, vec+6, 0.0);
  size_t ndata = 0;
  while (linebuffer != 0) {
    if (hasIndex)
      ntokens = sscanf(linebuffer, "%*f %lf %lf %lf %lf %lf %lf",
                       vec, vec+1, vec+2, vec+3, vec+4, vec+5);
    else
      ntokens = sscanf(linebuffer, "%lf %lf %lf %lf %lf %lf",
                       vec, vec+1, vec+2, vec+3, vec+4, vec+5);
    if (ntokens != nv) {
      mprinterr("Error: In vector file, line %i: expected %i values, got %i\n",
                buffer.LineNumber(), nv, ntokens);
      break;
    }
    ds->Add( ndata++, vec ); 
    linebuffer = buffer.Line();
  }
  return (datasetlist.AddOrAppendSets("", DataSetList::Darray(), DataSetList::DataListType(1, ds)));
}
Example #14
0
// DataIO_Std::Read_3D()
int DataIO_Std::Read_3D(std::string const& fname, 
                        DataSetList& datasetlist, std::string const& dsname)
{
  BufferedLine buffer;
  if (buffer.OpenFileRead( fname )) return 1;
  mprintf("\tData will be read as 3D grid: X Y Z Value\n");
  if (binCorners_)
    mprintf("\tAssuming X Y Z are bin corners\n");
  else
    mprintf("\tAssuming X Y Z are bin centers\n");
  const char* ptr = buffer.Line();
  // Check if #counts is present
  if (strncmp(ptr,"#counts",7)==0) {
    mprintf("\tReading grid dimensions.\n");
    unsigned int counts[3];
    sscanf(ptr+7,"%u %u %u", counts, counts+1, counts+2);
    for (int i = 0; i < 3; i++) {
      if (dims_[i] == 0)
        dims_[i] = counts[i];
      else if (dims_[i] != (size_t)counts[i])
        mprintf("Warning: Specified size for dim %i (%zu) differs from size in file (%u)\n",
                i, dims_[i], counts[i]);
    }
    ptr = buffer.Line();
  }
  if (dims_[0] == 0 || dims_[1] == 0 || dims_[2] == 0) {
    mprinterr("Error: 'dims' not specified for 'read3d' and no dims in file\n");
    return 1;
  }
  // Check if #origin is present
  if (strncmp(ptr,"#origin",7)==0) {
    mprintf("\tReading grid origin.\n");
    double oxyz[3];
    sscanf(ptr+7,"%lf %lf %lf", oxyz, oxyz+1, oxyz+2);
    for (int i = 0; i < 3; i++) {
      if (!originSpecified_)
        origin_[i] = oxyz[i];
      else if (origin_[i] != oxyz[i])
        mprintf("Warning: Specified origin for dim %i (%g) differs from origin in file (%g)\n",
                i, origin_[i], oxyz[i]);
    }
    ptr = buffer.Line();
  }
  // Check if #delta is present
  bool nonortho = false;
  Box gridBox;
  if (strncmp(ptr,"#delta",6)==0) {
    mprintf("\tReading grid deltas.\n");
    double dvals[9];
    int ndvals = sscanf(ptr+6,"%lf %lf %lf %lf %lf %lf %lf %lf %lf", dvals,
                        dvals+1, dvals+2, dvals+3, dvals+4, dvals+5,
                        dvals+6, dvals+7, dvals+8);
    if (ndvals == 3) {
      for (int i = 0; i < 3; i++) {
        if (!deltaSpecified_)
          delta_[i] = dvals[i];
        else if (delta_[i] != dvals[i])
          mprintf("Warning: Specified delta for dim %i (%g) differs from delta in file (%g)\n",
                  i, delta_[i], dvals[i]);
      }
    } else {
      nonortho = true;
      dvals[0] *= (double)dims_[0]; dvals[1] *= (double)dims_[0]; dvals[2] *= (double)dims_[0];
      dvals[3] *= (double)dims_[1]; dvals[4] *= (double)dims_[1]; dvals[5] *= (double)dims_[1];
      dvals[6] *= (double)dims_[2]; dvals[7] *= (double)dims_[2]; dvals[8] *= (double)dims_[2];
      gridBox = Box(Matrix_3x3(dvals));
    }
    ptr = buffer.Line();
  }
  // Get or allocate data set
  DataSet::DataType dtype;
  if (prec_ == DOUBLE) {
    dtype = DataSet::GRID_DBL;
    mprintf("\tGrid is double precision.\n");
  } else {
    dtype = DataSet::GRID_FLT;
    mprintf("\tGrid is single precision.\n");
  }
  MetaData md( dsname );
  DataSet_3D* ds = 0;
  DataSet* set = datasetlist.CheckForSet( md );
  if (set == 0) {
    ds = (DataSet_3D*)datasetlist.AddSet(dtype, dsname);
    if (ds == 0) return 1;
    int err = 0;
    if (nonortho)
      err = ds->Allocate_N_O_Box(dims_[0], dims_[1], dims_[2], origin_, gridBox);
    else
      err = ds->Allocate_N_O_D(dims_[0], dims_[1], dims_[2], origin_, delta_);
    if (err != 0) return 1;
  } else {
    mprintf("\tAppending to existing set '%s'\n", set->legend());
    if (set->Group() != DataSet::GRID_3D) {
      mprinterr("Error: Set '%s' is not a grid set, cannot append.\n", set->legend());
      return 1;
    }
    ds = (DataSet_3D*)set;
    // Check that dimensions line up. TODO check origin etc too?
    if (dims_[0] != ds->NX() ||
        dims_[1] != ds->NY() ||
        dims_[2] != ds->NZ())
    {
      mprintf("Warning: Specified grid dimensions (%zu %zu %zu) do not match\n"
              "Warning:   '%s' dimensions (%zu %zu %zu)\n", dims_[0], dims_[1], dims_[2],
              ds->legend(), dims_[0], dims_[1], dims_[2]);
    }
  }
  ds->GridInfo();
  // Determine if an offset is needed
  Vec3 offset(0.0);
  if (binCorners_) {
    // Assume XYZ coords are of bin corners. Need to offset coords by half
    // the voxel size.
    if (!ds->Bin().IsOrthoGrid()) {
      GridBin_Nonortho const& b = static_cast<GridBin_Nonortho const&>( ds->Bin() );
      offset = b.Ucell().TransposeMult(Vec3( 1/(2*(double)ds->NX()),
                                             1/(2*(double)ds->NY()),
                                             1/(2*(double)ds->NZ()) ));
    } else {
      GridBin_Ortho const& b = static_cast<GridBin_Ortho const&>( ds->Bin() );
      offset = Vec3(b.DX()/2, b.DY()/2, b.DZ()/2);
    }
  }
  if (debug_ > 0)
    mprintf("DEBUG: Offset: %E %E %E\n", offset[0], offset[1], offset[2]);
  // Read file
  unsigned int nvals = 0;
  while (ptr != 0) {
    if (ptr[0] != '#') {
      int ntokens = buffer.TokenizeLine( SEPARATORS );
      if (ntokens != 4) {
        mprinterr("Error: Expected 4 columns (X, Y, Z, data), got %i\n", ntokens);
        return 1;
      }
      nvals++;
      double xyzv[4];
      xyzv[0] = atof( buffer.NextToken() );
      xyzv[1] = atof( buffer.NextToken() );
      xyzv[2] = atof( buffer.NextToken() );
      xyzv[3] = atof( buffer.NextToken() );
      size_t ix, iy, iz;
      if ( ds->Bin().Calc(xyzv[0]+offset[0],
                          xyzv[1]+offset[1],
                          xyzv[2]+offset[2], ix, iy, iz ) )
        ds->UpdateVoxel(ds->CalcIndex(ix, iy, iz), xyzv[3]);
      else
        mprintf("Warning: Coordinate out of bounds (%g %g %g, ), line %i\n",
                xyzv[0], xyzv[1], xyzv[2], buffer.LineNumber());
    }
    ptr = buffer.Line();
  }
  mprintf("\tRead %u values.\n", nvals);
  return 0;
}
Example #15
0
/** Read cluster matrix file. Can only get here if file has already been
  * determined to be in the proper format, so do no further error checking.
  * Expected format:
  *   <int> <int> <name>
  */
int DataIO_Std::ReadCmatrix(FileName const& fname,
                            DataSetList& datasetlist, std::string const& dsname)
{
  // Allocate output data set
  DataSet* ds = datasetlist.AddSet( DataSet::CMATRIX, dsname );
  if (ds == 0) return 1;
  DataSet_Cmatrix_MEM& Mat = static_cast<DataSet_Cmatrix_MEM&>( *ds );
  // Buffer file
  BufferedLine buffer;
  if (buffer.OpenFileRead( fname )) return 1;
  // Read past title. See if optional 'nframes' key is there.
  const char* ptr = buffer.Line();
  ArgList header;
  header.SetList(ptr+1, SEPARATORS );
  int nframes = header.getKeyInt("nframes", -1);
  // Need to keep track of frame indices so we can check for sieving.
  std::vector<char> sieveStatus;
  if (nframes > 0)
    sieveStatus.assign(nframes, 'T');
  // Keep track of matrix values.
  std::vector<float> Vals;
  // Read file
  bool checkSieve = true;
  int f1 = -1, f2 = -1, firstf1 = -1;
  float val = 0;
  while ( (ptr = buffer.Line()) != 0 )
  {
    if (checkSieve) {
      sscanf(ptr, "%i %i %f", &f1, &f2, &val);
      if (f2 > (int)sieveStatus.size())
        sieveStatus.resize(f2, 'T');
      if (firstf1 == -1) {
        // First values.
        sieveStatus[f1-1] = 'F';
        sieveStatus[f2-1] = 'F';
        firstf1 = f1;
      } else if (f1 > firstf1) {
          checkSieve = false;
      } else {
        sieveStatus[f2-1] = 'F';
      }
    } else {
      sscanf(ptr, "%*i %*i %f", &val);
    }
    Vals.push_back( val );
  }
  // DEBUG
  //mprintf("Sieved array:\n");
  //for (unsigned int i = 0; i < sieveStatus.size(); i++)
  //  mprintf("\t%6u %c\n", i+1, sieveStatus[i]);
  // Try to determine if sieve is random or not.
  int sieveDelta = 1;
  f1 = -1;
  f2 = -1;
  int actual_nrows = 0;
  for (int i = 0; i < (int)sieveStatus.size(); i++) {
    if (sieveStatus[i] == 'F') {
      actual_nrows++;
      if (sieveDelta != -2) {
        if (f1 == -1) {
          f1 = i;
        } else if (f2 == -1) {
          sieveDelta = i - f1;
          f1 = i;
          f2 = i;
        } else {
          int newDelta = i - f1;
          if (newDelta != sieveDelta) {
            // Random. No need to calculate sieveDelta anymore.
            sieveDelta = -2;
          }
          f1 = i;
        }
      }
    }
  }
  if (sieveDelta == -2) {
    // Random sieve. Try to figure out original sieve value.
    int o_frames = (int)sieveStatus.size();
    int o_sieve_value = o_frames / actual_nrows;
    if ( (o_frames % actual_nrows) != 0 )
      o_sieve_value++;
    sieveDelta = -o_sieve_value;
  }
  if (debug_ > 0)
    mprintf("DEBUG: sieve %i, actual_nrows= %i\n", sieveDelta, actual_nrows);
  if (sieveDelta != 1 && nframes == -1)
    mprintf("Warning: Pairwise distance matrix file contains sieved frames but\n"
            "Warning:   number of original frames is not present in file - this\n"
            "Warning:   may lead to ignored frames in cluster output. Please add\n"
            "Warning:   'nframes <# original frames>' to the pairwise distance\n"
            "Warning:   matrix file header, e.g. '#F1 F2 pw.dat nframes 1000'.\n");
  
  // Save cluster matrix
  if (Mat.Allocate( DataSet::SizeArray(1, actual_nrows) )) return 1;
  std::copy( Vals.begin(), Vals.end(), Mat.Ptr() );
  Mat.SetSieveFromArray(sieveStatus, sieveDelta);

  return 0;
}
Example #16
0
// DataIO_Std::Read_1D()
int DataIO_Std::Read_1D(std::string const& fname, 
                        DataSetList& datasetlist, std::string const& dsname)
{
  ArgList labels;
  bool hasLabels = false;
  // Buffer file
  BufferedLine buffer;
  if (buffer.OpenFileRead( fname )) return 1;

  // Read the first line. Attempt to determine the number of columns
  const char* linebuffer = buffer.Line();
  if (linebuffer == 0) return 1;
  int ntoken = buffer.TokenizeLine( SEPARATORS );
  if ( ntoken == 0 ) {
    mprinterr("Error: No columns detected in %s\n", buffer.Filename().full());
    return 1;
  }

  // Try to skip past any comments. If line begins with a '#', assume it
  // contains labels. 
  bool isCommentLine = true;
  const char* ptr = linebuffer;
  while (isCommentLine) {
    // Skip past any whitespace
    while ( *ptr != '\0' && isspace(*ptr) ) ++ptr;
    // Assume these are column labels until proven otherwise.
    if (*ptr == '#') {
      labels.SetList(ptr+1, SEPARATORS );
      if (!labels.empty()) {
        hasLabels = true;
        // If first label is Frame assume it is the index column
        if (labels[0] == "Frame" && indexcol_ == -1)
          indexcol_ = 0;
      }
      linebuffer = buffer.Line();
      ptr = linebuffer;
      if (ptr == 0) {
        mprinterr("Error: No data found in file.\n");
        return 1;
      }
    } else 
      // Not a recognized comment character, assume data.
      isCommentLine = false;
  }
  // Special case: check if labels are '#F1   F2 <name> [nframes <#>]'. If so, assume
  // this is a cluster matrix file.
  if ((labels.Nargs() == 3 || labels.Nargs() == 5) && labels[0] == "F1" && labels[1] == "F2")
  {
    mprintf("Warning: Header format '#F1 F2 <name>' detected, assuming cluster pairwise matrix.\n");
    return IS_ASCII_CMATRIX;
  }
  // Column user args start from 1
  if (indexcol_ > -1)
    mprintf("\tUsing column %i as index column.\n", indexcol_ + 1);

  // Should be at first data line. Tokenize the line.
  ntoken = buffer.TokenizeLine( SEPARATORS );
  // If # of data columns does not match # labels, clear labels.
  if ( !labels.empty() && ntoken != labels.Nargs() ) {
    labels.ClearList();
    hasLabels = false;
  }
  // Index column checks
  if (indexcol_ != -1 ) {
    if (indexcol_ >= ntoken) {
      mprinterr("Error: Specified index column %i is out of range (%i columns).\n",
                indexcol_+1, ntoken);
      return 1;
    }
    if (!onlycols_.Empty() && !onlycols_.InRange(indexcol_)) {
      mprinterr("Error: Index column %i specified, but not in given column range '%s'\n",
                indexcol_+1, onlycols_.RangeArg());
      return 1;
    }
  }

  // Determine the type of data stored in each column. Assume numbers should
  // be read with double precision.
  MetaData md( dsname );
  DataSetList::DataListType inputSets;
  unsigned int nsets = 0;
  for (int col = 0; col != ntoken; ++col) {
    std::string token( buffer.NextToken() );
    if (!onlycols_.Empty() && !onlycols_.InRange( col )) {
      mprintf("\tSkipping column %i\n", col+1);
      inputSets.push_back( 0 );
    } else {
      md.SetIdx( col+1 );
      if (hasLabels) md.SetLegend( labels[col] );
      if ( col == indexcol_ ) {
        // Always save the index column as floating point
        inputSets.push_back( new DataSet_double() );
      } else if (validInteger(token)) {
        // Integer number
        inputSets.push_back( datasetlist.Allocate(DataSet::INTEGER) );
      } else if (validDouble(token)) {
        // Floating point number
        inputSets.push_back( new DataSet_double() );
      } else {
        // Assume string. Not allowed for index column.
        if (col == indexcol_) {
          mprintf("Warning: '%s' index column %i has string values. No indices will be read.\n", 
                    buffer.Filename().full(), indexcol_+1);
          indexcol_ = -1;
        }
        inputSets.push_back( new DataSet_string() );
      }
      inputSets.back()->SetMeta( md );
      nsets++;
    }
  }
  if (inputSets.empty() || nsets == 0) {
    mprinterr("Error: No data detected.\n");
    return 1;
  }

  // Read in data
  while (linebuffer != 0) {
    if ( buffer.TokenizeLine( SEPARATORS ) != ntoken ) {
      PrintColumnError(buffer.LineNumber());
      break;
    }
    // Convert data in columns
    for (int i = 0; i < ntoken; ++i) {
      const char* token = buffer.NextToken();
      if (inputSets[i] != 0) {
        if (inputSets[i]->Type() == DataSet::DOUBLE)
          ((DataSet_double*)inputSets[i])->AddElement( atof(token) );
        else if (inputSets[i]->Type() == DataSet::INTEGER)
          ((DataSet_integer*)inputSets[i])->AddElement( atoi(token) );
        else
          ((DataSet_string*)inputSets[i])->AddElement( std::string(token) );
      }
    }
    //Ndata++;
    linebuffer = buffer.Line();
  }
  buffer.CloseFile();
   mprintf("\tDataFile %s has %i columns, %i lines.\n", buffer.Filename().full(),
           ntoken, buffer.LineNumber());

  // Create list containing only data sets.
  DataSetList::DataListType mySets;
  DataSet_double* Xptr = 0;
  for (int idx = 0; idx != (int)inputSets.size(); idx++) {
    if (inputSets[idx] != 0) {
      if ( idx != indexcol_ )
        mySets.push_back( inputSets[idx] );
      else
        Xptr = (DataSet_double*)inputSets[idx];
    }
  }
  mprintf("\tRead %zu data sets.\n", mySets.size());
  std::string Xlabel;
  if (indexcol_ != -1 && indexcol_ < labels.Nargs())
    Xlabel = labels[indexcol_];
  if (Xptr == 0)
    datasetlist.AddOrAppendSets(Xlabel, DataSetList::Darray(), mySets);
  else {
    datasetlist.AddOrAppendSets(Xlabel, Xptr->Data(), mySets);
    delete Xptr;
  }

  return 0;
}