Beispiel #1
0
/** Given an argument with format, DataSet_Name[,min,max,step,bins], check
  * that DataSet_Name exists and is valid. Add the argument to 
  * dimensionArgs and the corresponding dataset to histdata.
  */
int Analysis_Hist::CheckDimension(std::string const& input, DataSetList& DSLin) {
  ArgList arglist;
  // Separate input string by ','
  arglist.SetList(input, ",");
  if (arglist.Nargs()<1) {
    mprinterr("Error: No arguments found in histogram argument: %s\n",input.c_str());
    return 1;
  }

  // First argument should specify dataset name
  if (debug_>0) mprintf("\tHist: Setting up histogram dimension using dataset %s\n",
                       arglist.Command());
  DataSet* dset = DSLin.GetDataSet( arglist[0] );
  if (dset == 0) {
    mprinterr("Error: Dataset %s not found.\n",arglist.Command());
    return 1;
  }

  // For now only 1D scalar data sets can be histogrammed
  if (dset->Group() != DataSet::SCALAR_1D) {
    mprinterr("Error: Cannot histogram data set '%s'\n", dset->legend());
    mprinterr("Error: Currently only 1D scalar data sets can be histogrammed.\n");
    return 1;
  }
  // TODO parse remaining args here, create data structure.
  dimensionArgs_.push_back( arglist );
  histdata_.push_back( (DataSet_1D*)dset );
  return 0;
}
Beispiel #2
0
/** Read cluster matrix file. Can only get here if file has already been
  * determined to be in the proper format, so do no further error checking.
  * Expected format:
  *   <int> <int> <name>
  */
int DataIO_Std::ReadCmatrix(FileName const& fname,
                            DataSetList& datasetlist, std::string const& dsname)
{
  // Allocate output data set
  DataSet* ds = datasetlist.AddSet( DataSet::CMATRIX, dsname );
  if (ds == 0) return 1;
  DataSet_Cmatrix_MEM& Mat = static_cast<DataSet_Cmatrix_MEM&>( *ds );
  // Buffer file
  BufferedLine buffer;
  if (buffer.OpenFileRead( fname )) return 1;
  // Read past title. See if optional 'nframes' key is there.
  const char* ptr = buffer.Line();
  ArgList header;
  header.SetList(ptr+1, SEPARATORS );
  int nframes = header.getKeyInt("nframes", -1);
  // Need to keep track of frame indices so we can check for sieving.
  std::vector<char> sieveStatus;
  if (nframes > 0)
    sieveStatus.assign(nframes, 'T');
  // Keep track of matrix values.
  std::vector<float> Vals;
  // Read file
  bool checkSieve = true;
  int f1 = -1, f2 = -1, firstf1 = -1;
  float val = 0;
  while ( (ptr = buffer.Line()) != 0 )
  {
    if (checkSieve) {
      sscanf(ptr, "%i %i %f", &f1, &f2, &val);
      if (f2 > (int)sieveStatus.size())
        sieveStatus.resize(f2, 'T');
      if (firstf1 == -1) {
        // First values.
        sieveStatus[f1-1] = 'F';
        sieveStatus[f2-1] = 'F';
        firstf1 = f1;
      } else if (f1 > firstf1) {
          checkSieve = false;
      } else {
        sieveStatus[f2-1] = 'F';
      }
    } else {
      sscanf(ptr, "%*i %*i %f", &val);
    }
    Vals.push_back( val );
  }
  // DEBUG
  //mprintf("Sieved array:\n");
  //for (unsigned int i = 0; i < sieveStatus.size(); i++)
  //  mprintf("\t%6u %c\n", i+1, sieveStatus[i]);
  // Try to determine if sieve is random or not.
  int sieveDelta = 1;
  f1 = -1;
  f2 = -1;
  int actual_nrows = 0;
  for (int i = 0; i < (int)sieveStatus.size(); i++) {
    if (sieveStatus[i] == 'F') {
      actual_nrows++;
      if (sieveDelta != -2) {
        if (f1 == -1) {
          f1 = i;
        } else if (f2 == -1) {
          sieveDelta = i - f1;
          f1 = i;
          f2 = i;
        } else {
          int newDelta = i - f1;
          if (newDelta != sieveDelta) {
            // Random. No need to calculate sieveDelta anymore.
            sieveDelta = -2;
          }
          f1 = i;
        }
      }
    }
  }
  if (sieveDelta == -2) {
    // Random sieve. Try to figure out original sieve value.
    int o_frames = (int)sieveStatus.size();
    int o_sieve_value = o_frames / actual_nrows;
    if ( (o_frames % actual_nrows) != 0 )
      o_sieve_value++;
    sieveDelta = -o_sieve_value;
  }
  if (debug_ > 0)
    mprintf("DEBUG: sieve %i, actual_nrows= %i\n", sieveDelta, actual_nrows);
  if (sieveDelta != 1 && nframes == -1)
    mprintf("Warning: Pairwise distance matrix file contains sieved frames but\n"
            "Warning:   number of original frames is not present in file - this\n"
            "Warning:   may lead to ignored frames in cluster output. Please add\n"
            "Warning:   'nframes <# original frames>' to the pairwise distance\n"
            "Warning:   matrix file header, e.g. '#F1 F2 pw.dat nframes 1000'.\n");
  
  // Save cluster matrix
  if (Mat.Allocate( DataSet::SizeArray(1, actual_nrows) )) return 1;
  std::copy( Vals.begin(), Vals.end(), Mat.Ptr() );
  Mat.SetSieveFromArray(sieveStatus, sieveDelta);

  return 0;
}
Beispiel #3
0
// ----- M A I N ---------------------------------------------------------------
int main(int argc, char** argv) {
  SetWorldSilent(true); // No STDOUT output from cpptraj routines.
  std::string topname, crdname, title, bres, pqr, sybyltype, writeconect;
  std::string aatm(" pdbatom"), ter_opt(" terbyres"), box(" sg \"P 1\"");
  TrajectoryFile::TrajFormatType fmt = TrajectoryFile::PDBFILE;
  bool ctr_origin = false;
  bool useExtendedInfo = false;
  int res_offset = 0;
  int debug = 0;
  int numSoloArgs = 0;
  for (int i = 1; i < argc; ++i) {
    std::string arg( argv[i] );
    if (arg == "-p" && i+1 != argc && topname.empty()) // Topology
      topname = std::string( argv[++i] );
    else if (arg == "-c" && i+1 != argc && crdname.empty()) // Coords
      crdname = std::string( argv[++i] );
    else if (arg == "-tit" && i+1 != argc && title.empty()) // Title
      title = " title " + std::string( argv[++i] );
    else if (arg == "-offset" && i+1 != argc) // Residue # offset
      res_offset = convertToInteger( argv[++i] );
    else if ((arg == "-d" || arg == "--debug") && i+1 != argc) // Debug level
      debug = convertToInteger( argv[++i] );
    else if (arg == "-h" || arg == "--help") { // Help
      Help(argv[0], true);
      return 0;
    } else if (arg == "-v" || arg == "--version") { // Version info
      WriteVersion();
      return 0;
    } else if (arg == "-aatm") // Amber atom names, include extra pts
      aatm.assign(" include_ep");
    else if (arg == "-sybyl") // Amber atom types to SYBYL
      sybyltype.assign(" sybyltype");
    else if (arg == "-conect") // Write CONECT records from bond info
      writeconect.assign(" conect");
    else if (arg == "-ep") // PDB atom names, include extra pts
      aatm.append(" include_ep");
    else if (arg == "-bres") // PDB residue names
      bres.assign(" pdbres");
    else if (arg == "-ext") // Use extended PDB info from Topology
      useExtendedInfo = true;
    else if (arg == "-ctr")  // Center on origin
      ctr_origin = true;
    else if (arg == "-noter") // No TER cards
      ter_opt.assign(" noter");
    else if (arg == "-nobox") // No CRYST1 record
      box.assign(" nobox");
    else if (arg == "-pqr") { // Charge/Radii in occ/bfactor cols
      pqr.assign(" dumpq");
      ++numSoloArgs;
    } else if (arg == "-mol2") { // output as mol2
      fmt = TrajectoryFile::MOL2FILE;
      ++numSoloArgs;
    } else if (Unsupported(arg)) {
      mprinterr("Error: Option '%s' is not yet supported.\n\n", arg.c_str());
      return 1;
    } else {
      mprinterr("Error: Unrecognized option '%s'\n", arg.c_str());
      Help(argv[0], false);
      return 1;
    }
  }
  if (debug > 0) WriteVersion();
  // Check command line for errors.
  if (topname.empty()) topname.assign("prmtop");
  if (debug > 0 && crdname.empty())
    mprinterr("| Reading Amber restart from STDIN\n");
  if (numSoloArgs > 1) {
    mprinterr("Error: Only one alternate output format option may be specified (found %i)\n",
              numSoloArgs);
    Help(argv[0], true);
    return 1;
  }
  if (!sybyltype.empty() && fmt != TrajectoryFile::MOL2FILE) {
    mprinterr("Warning: -sybyl is only valid for MOL2 file output.\n");
    sybyltype.clear();
  }
  if (debug > 0) {
    mprinterr("Warning: debug is %i; debug info will be written to STDOUT.\n", debug);
    SetWorldSilent(false);
  }
  // Topology
  ParmFile pfile;
  Topology parm;
  if (pfile.ReadTopology(parm, topname, debug)) {
    if (topname == "prmtop") Help(argv[0], false);
    return 1;
  }
  if (!useExtendedInfo)
    parm.ResetPDBinfo();
  if (res_offset != 0)
    for (int r = 0; r < parm.Nres(); r++)
      parm.SetRes(r).SetOriginalNum( parm.Res(r).OriginalResNum() + res_offset );
  ArgList trajArgs;
  // Input coords
  Frame TrajFrame;
  CoordinateInfo cInfo;
  if (!crdname.empty()) {
    Trajin_Single trajin;
    if (trajin.SetupTrajRead(crdname, trajArgs, &parm)) return 1;
    cInfo = trajin.TrajCoordInfo();
    TrajFrame.SetupFrameV(parm.Atoms(), cInfo);
    trajin.BeginTraj();
    if (trajin.ReadTrajFrame(0, TrajFrame)) return 1;
    trajin.EndTraj();
  } else {
    // Assume Amber restart from STDIN
    // Check that input is from a redirect.
    if ( isatty(fileno(stdin)) ) {
      mprinterr("Error: No coordinates specified with '-c' and no STDIN '<'.\n");
      return 1;
    }
    Traj_AmberRestart restartIn;
    restartIn.SetDebug( debug );
    //restartIn.processReadArgs( trajArgs );
    int total_frames = restartIn.setupTrajin("", &parm);
    if (total_frames < 1) return 1;
    cInfo = restartIn.CoordInfo();
    TrajFrame.SetupFrameV(parm.Atoms(), cInfo);
    if (restartIn.openTrajin()) return 1;
    if (restartIn.readFrame(0, TrajFrame)) return 1;
    restartIn.closeTraj();
  }
  if (ctr_origin) 
    TrajFrame.CenterOnOrigin(false);
  // Output coords
  Trajout_Single trajout;
  trajArgs.SetList( aatm + bres + pqr + title + ter_opt + box + sybyltype + writeconect, " " );
  if ( trajout.PrepareStdoutTrajWrite(trajArgs, &parm, cInfo, 1, fmt) ) return 1;
  trajout.WriteSingle(0, TrajFrame);
  trajout.EndTraj();
  return 0;
}
Beispiel #4
0
// DataIO_Std::Read_1D()
int DataIO_Std::Read_1D(std::string const& fname, 
                        DataSetList& datasetlist, std::string const& dsname)
{
  ArgList labels;
  bool hasLabels = false;
  // Buffer file
  BufferedLine buffer;
  if (buffer.OpenFileRead( fname )) return 1;

  // Read the first line. Attempt to determine the number of columns
  const char* linebuffer = buffer.Line();
  if (linebuffer == 0) return 1;
  int ntoken = buffer.TokenizeLine( SEPARATORS );
  if ( ntoken == 0 ) {
    mprinterr("Error: No columns detected in %s\n", buffer.Filename().full());
    return 1;
  }

  // Try to skip past any comments. If line begins with a '#', assume it
  // contains labels. 
  bool isCommentLine = true;
  const char* ptr = linebuffer;
  while (isCommentLine) {
    // Skip past any whitespace
    while ( *ptr != '\0' && isspace(*ptr) ) ++ptr;
    // Assume these are column labels until proven otherwise.
    if (*ptr == '#') {
      labels.SetList(ptr+1, SEPARATORS );
      if (!labels.empty()) {
        hasLabels = true;
        // If first label is Frame assume it is the index column
        if (labels[0] == "Frame" && indexcol_ == -1)
          indexcol_ = 0;
      }
      linebuffer = buffer.Line();
      ptr = linebuffer;
      if (ptr == 0) {
        mprinterr("Error: No data found in file.\n");
        return 1;
      }
    } else 
      // Not a recognized comment character, assume data.
      isCommentLine = false;
  }
  // Special case: check if labels are '#F1   F2 <name> [nframes <#>]'. If so, assume
  // this is a cluster matrix file.
  if ((labels.Nargs() == 3 || labels.Nargs() == 5) && labels[0] == "F1" && labels[1] == "F2")
  {
    mprintf("Warning: Header format '#F1 F2 <name>' detected, assuming cluster pairwise matrix.\n");
    return IS_ASCII_CMATRIX;
  }
  // Column user args start from 1
  if (indexcol_ > -1)
    mprintf("\tUsing column %i as index column.\n", indexcol_ + 1);

  // Should be at first data line. Tokenize the line.
  ntoken = buffer.TokenizeLine( SEPARATORS );
  // If # of data columns does not match # labels, clear labels.
  if ( !labels.empty() && ntoken != labels.Nargs() ) {
    labels.ClearList();
    hasLabels = false;
  }
  // Index column checks
  if (indexcol_ != -1 ) {
    if (indexcol_ >= ntoken) {
      mprinterr("Error: Specified index column %i is out of range (%i columns).\n",
                indexcol_+1, ntoken);
      return 1;
    }
    if (!onlycols_.Empty() && !onlycols_.InRange(indexcol_)) {
      mprinterr("Error: Index column %i specified, but not in given column range '%s'\n",
                indexcol_+1, onlycols_.RangeArg());
      return 1;
    }
  }

  // Determine the type of data stored in each column. Assume numbers should
  // be read with double precision.
  MetaData md( dsname );
  DataSetList::DataListType inputSets;
  unsigned int nsets = 0;
  for (int col = 0; col != ntoken; ++col) {
    std::string token( buffer.NextToken() );
    if (!onlycols_.Empty() && !onlycols_.InRange( col )) {
      mprintf("\tSkipping column %i\n", col+1);
      inputSets.push_back( 0 );
    } else {
      md.SetIdx( col+1 );
      if (hasLabels) md.SetLegend( labels[col] );
      if ( col == indexcol_ ) {
        // Always save the index column as floating point
        inputSets.push_back( new DataSet_double() );
      } else if (validInteger(token)) {
        // Integer number
        inputSets.push_back( datasetlist.Allocate(DataSet::INTEGER) );
      } else if (validDouble(token)) {
        // Floating point number
        inputSets.push_back( new DataSet_double() );
      } else {
        // Assume string. Not allowed for index column.
        if (col == indexcol_) {
          mprintf("Warning: '%s' index column %i has string values. No indices will be read.\n", 
                    buffer.Filename().full(), indexcol_+1);
          indexcol_ = -1;
        }
        inputSets.push_back( new DataSet_string() );
      }
      inputSets.back()->SetMeta( md );
      nsets++;
    }
  }
  if (inputSets.empty() || nsets == 0) {
    mprinterr("Error: No data detected.\n");
    return 1;
  }

  // Read in data
  while (linebuffer != 0) {
    if ( buffer.TokenizeLine( SEPARATORS ) != ntoken ) {
      PrintColumnError(buffer.LineNumber());
      break;
    }
    // Convert data in columns
    for (int i = 0; i < ntoken; ++i) {
      const char* token = buffer.NextToken();
      if (inputSets[i] != 0) {
        if (inputSets[i]->Type() == DataSet::DOUBLE)
          ((DataSet_double*)inputSets[i])->AddElement( atof(token) );
        else if (inputSets[i]->Type() == DataSet::INTEGER)
          ((DataSet_integer*)inputSets[i])->AddElement( atoi(token) );
        else
          ((DataSet_string*)inputSets[i])->AddElement( std::string(token) );
      }
    }
    //Ndata++;
    linebuffer = buffer.Line();
  }
  buffer.CloseFile();
   mprintf("\tDataFile %s has %i columns, %i lines.\n", buffer.Filename().full(),
           ntoken, buffer.LineNumber());

  // Create list containing only data sets.
  DataSetList::DataListType mySets;
  DataSet_double* Xptr = 0;
  for (int idx = 0; idx != (int)inputSets.size(); idx++) {
    if (inputSets[idx] != 0) {
      if ( idx != indexcol_ )
        mySets.push_back( inputSets[idx] );
      else
        Xptr = (DataSet_double*)inputSets[idx];
    }
  }
  mprintf("\tRead %zu data sets.\n", mySets.size());
  std::string Xlabel;
  if (indexcol_ != -1 && indexcol_ < labels.Nargs())
    Xlabel = labels[indexcol_];
  if (Xptr == 0)
    datasetlist.AddOrAppendSets(Xlabel, DataSetList::Darray(), mySets);
  else {
    datasetlist.AddOrAppendSets(Xlabel, Xptr->Data(), mySets);
    delete Xptr;
  }

  return 0;
}