示例#1
0
/* For the historical case:
     One experiment replicate per line, containing all responses in one line
     can't allow different sigma for each response and the 
     config vars are repeated per experiment. */
inline void
read_historical_data(const std::string& expDataFileName,
		     const std::string& context_message,
		     size_t numExperiments,
                     IntVector& numReplicates,
		     size_t numExpConfigVars,
		     size_t numFunctions,
		     size_t numExpStdDeviationsRead,
		     bool expDataFileAnnotated,
		     bool calc_sigma_from_data,
		     RealMatrix& xObsData,
		     RealMatrixArray& yObsData, 
		     RealMatrixArray& yStdData)
{
  //using boost::multi_array;
  //using boost::extents;
  size_t i,j,k, total_num_rows=0, max_replicates=1;
  //for now, numExperiments are the same for all functions
  //numReplicates can vary per experiment
  for (i=0; i<numExperiments; i++){
    total_num_rows+=numReplicates[i];
    max_replicates = (numReplicates[i]>max_replicates) ? numReplicates[i] : max_replicates;
  }
  Cout << "total number of rows " << total_num_rows << '\n';
  Cout << "max_replicates " << max_replicates << '\n';
  Cout << "numFunctions " << numFunctions << '\n';
  //yObsFull(extents[numFunctions][numExperiments][max_replicates]);

  // Read from a matrix with numExperiments rows and a number of cols
  // columns:  numExpConfigVars X, numFunctions Y, [numFunctions Sigma]
  RealMatrix experimental_data;

  size_t num_cols = numExpConfigVars + numFunctions + numExpStdDeviationsRead;

  TabularIO::read_data_tabular(expDataFileName, context_message, 
			       experimental_data, total_num_rows ,  num_cols, 
			       expDataFileAnnotated);

  // Get views of the data in 3 matrices for convenience

  size_t start_row, start_col;
  if (numExpConfigVars > 0) {
    start_row = 0;
    start_col = 0;
    RealMatrix x_obs_data(Teuchos::View, experimental_data,
			  total_num_rows, numExpConfigVars,
			  start_row, start_col);
    xObsData.reshape(total_num_rows, numExpConfigVars);
    for (i=0; i<total_num_rows; i++)
      for (j=0; j<numExpConfigVars; j++)
        xObsData(i,j) = x_obs_data(i,j);
  }
 
  start_row = 0;
  start_col = numExpConfigVars;
  RealMatrix y_obs_data(Teuchos::View, experimental_data,
			total_num_rows, numFunctions,
			start_row, start_col);
  yObsData.resize(numFunctions); 
  size_t numrows_thusfar = 0; 
  for (j=0; j<numFunctions; j++)
    yObsData[j].reshape(numExperiments,max_replicates);
  for (j=0; j<numFunctions; j++){
    size_t numrows_thusfar = 0; 
    for (i=0; i<numExperiments; i++){
      for (k=0; k<numReplicates(i); k++){ 
        yObsData[j](i,k) = y_obs_data(numrows_thusfar+k,j);
        Cout << yObsData[j];
      }
      numrows_thusfar +=numReplicates(i);
    }
  }
  // BMA TODO: The number of experimental functions may not match the
  // user functions, so can't assume numFunctions
  yStdData.resize(numFunctions);
  for (j=0; j<numFunctions; j++)
    yStdData[j].reshape(numExperiments,max_replicates);
  if (numExpStdDeviationsRead > 0) {
    start_row = 0;
    start_col = numExpConfigVars + numFunctions;
    RealMatrix y_std_data(Teuchos::View, experimental_data,
			  total_num_rows, numExpStdDeviationsRead,
			  start_row, start_col);
    size_t numrows_thusfar = 0; 
    // We allow 1 or numFunctions sigmas
    for (j=0; j<numFunctions; j++){
      size_t numrows_thusfar = 0; 
      for (i=0; i<numExperiments; i++){
        for (k=0; k<numReplicates(i); k++){ 
	  if (numExpStdDeviationsRead == 1)
            yStdData[j](i,k) = y_std_data(numrows_thusfar+k,0);
          else 
            yStdData[j](i,k) = y_std_data(numrows_thusfar+k,j);
          Cout << yStdData[j];
      	}
     	numrows_thusfar +=numReplicates(i);
      }
    }
    // user values?  Commenting out as we don't currently support
    // input file-specified errors.
    // if (expStdDeviations.length()==1) {
    //   for (int i=0; i<numExperiments; i++)
    //     for (int j=0; j<numFunctions; j++)
    //       yStdData(i,j) = expStdDeviations(0);
    // }
    // else if (expStdDeviations.length()==numFunctions) {
    //   for (int i=0; i<numExperiments; i++)
    //     for (int j=0; j<numFunctions; j++)
    //       yStdData(i,j) = expStdDeviations(j);
    // }
  }
  else if (calc_sigma_from_data) {
    // calculate sigma terms
    Real mean_est, var_est;
    for (j=0; j<numFunctions; j++){
      for (i=0; i<numExperiments; i++) {
      	mean_est = 0;
      	for (k=0; k<numReplicates(i); k++)
          mean_est += yObsData[j](i,k);
      	mean_est = mean_est / ((Real)numReplicates(i));
      	var_est = 0;
      	for (k=0; k<numReplicates(i); k++)
          var_est += (yObsData[j](i,k)-mean_est)*(yObsData[j](i,k)-mean_est); 
      	// If only one data, point, use 1.0 in the likelihood (no weight)
        for (k=0; k<numReplicates(i); k++) 
          yStdData[j](i,k) = (numReplicates(i) > 1) ? 
	    std::sqrt(var_est/(Real)(numReplicates(i)-1)) : 1.0;
      }
    }
  }
  else {
    // Default: use 1.0 in the likelihood (no weight)
    for (j=0; j<numFunctions; j++)
      for (i=0; i<numExperiments; i++)
        for (k=0; k<numReplicates(i); k++) 
            yStdData[j](i,k) = 1.0;
  }
}