Пример #1
0
int main(int argc, const char *argv[]) {
  OptArgs opts;
  string position_file;
  string h5file_in;
  string source;
  string h5file_out;
  string destination;
  string positions_file;
  bool help;
  string flowlimit_arg;
  unsigned int flowlimit;
  vector<string>otherArgs;

  DumpStartingStateOfExtractWells (argc,argv);

  opts.ParseCmdLine(argc, argv);
  opts.GetOption(h5file_in, "", 'i', "input");
  opts.GetOption(source, "", 's', "source");
  opts.GetOption(h5file_out, "", 'o', "output");
  opts.GetOption(destination, "", 'd', "destination");
  opts.GetOption(flowlimit_arg, "", 'f', "flowlimit");
  opts.GetOption(positions_file, "", 'p', "positions");
  opts.GetOption(help, "false", 'h', "help");
  opts.GetLeftoverArguments(otherArgs);

  // input data processing
  string line;
  vector<size_t> row_val;
  vector<size_t> col_val;
  ifstream filestream;
  if ( ! positions_file.empty() )
    filestream.open(&positions_file.At(0));
  istream &input = ( filestream.is_open() ) ? filestream : cin;


		      
  while ( getline(input, line) )
  {
    int num = -1;
    vector<size_t> ints;
    istringstream ss(line);
    while ( ss >> num && ints.size() < 2 ) {
      if (num < 0) {
	fprintf(stderr, "Found negative integer %d\n", num);
	exit(-1);
      }
      else
	ints.push_back((size_t)num);
    }
    if (ints.size() != 2) {
      fprintf(stderr, "Found %d integers in %s, expected 2\n", (int)ints.size(), &line[0]);
      continue;
    }
    row_val.push_back(ints.at(0));
    col_val.push_back(ints.at(1));
  }
  if (row_val.size() == 0 ) {
      fprintf(stdout, "No positions to extract, check input\n");
      exit(0);
  }    
  vector<size_t>input_positions(row_val.size(), 0);

  int numCPU = (int)sysconf( _SC_NPROCESSORS_ONLN );
  int numThreads = MAXTHREADS < numCPU ? MAXTHREADS : numCPU;
  fprintf(stdout, "Using %d threads of %d cores\n", numThreads, numCPU);

  if (source.empty())
    source = source + SIGNAL_IN;
  H5ReplayReader reader = H5ReplayReader(h5file_in, &source[0]);
  if ( h5file_out.empty() )
    h5file_out = h5file_out + H5FILE_OUT;
  if ( destination.empty() )
    destination = destination + SIGNAL_OUT;

  reader.Open();
  int rank = reader.GetRank();
  vector<hsize_t>dims(rank);
  vector<hsize_t>chunks(rank);
  reader.GetDims(dims);
  reader.GetChunkSize(chunks);
  reader.Close();

  // convert input row, col positions to indices
  for (hsize_t i=0; i<input_positions.size(); i++)
    input_positions.At(i) = RowColToIndex(row_val.At(i), col_val.At(i), dims.At(0), dims.At(1));
  sort(input_positions.begin(), input_positions.end());

  fprintf(stdout, "Opened for read %s:%s with rank %d, row x col x flow dims=[ ", &h5file_in[0], &source[0], rank);
  for (int i=0; i<rank; i++)
    fprintf(stdout, "%d ", (int)dims.At(i));
  fprintf(stdout, "], chunksize=[ ");
  for (int i=0; i<rank; i++)
    fprintf(stdout, "%d ", (int)chunks.At(i));
  fprintf(stdout, "]\n");

  
  H5ReplayRecorder recorder = H5ReplayRecorder(h5file_out, &destination[0],reader.GetType(),2);
  recorder.CreateFile();


  {
    vector<hsize_t> dims_pos(1, input_positions.size());
    string pos_name = "position";
    H5ReplayRecorder recorder_pos = H5ReplayRecorder(h5file_out, &pos_name[0],H5T_NATIVE_ULONG,1);
    recorder_pos.CreateDataset(dims_pos);
  }

  {
    string chip_dims = "chip_dims";
    H5ReplayRecorder recorder_chip_dims = H5ReplayRecorder(h5file_out, &chip_dims[0],H5T_NATIVE_ULLONG,1);
    vector<hsize_t> offset_dims(1,0);
    vector<hsize_t> count_dims(1,3);
    recorder_chip_dims.CreateDataset(count_dims);
    recorder_chip_dims.Write(offset_dims, count_dims, offset_dims, count_dims, &dims[0]);
  }
  if (flowlimit_arg.empty())
    flowlimit = dims.At(2);
  else
    flowlimit = atoi(flowlimit_arg.c_str());

  flowlimit = (flowlimit < dims.At(2)) ? flowlimit : dims.At(2);
  fprintf(stdout, "Using %u flows\n", flowlimit);

  // chunks no bigger than 100000
  vector<hsize_t>chunks_out(2);
  chunks_out.At(0) = (input_positions.size() < 10000) ? input_positions.size() : 100000;
  chunks_out.At(1) = chunks.At(2);

  recorder.CreateDataset(chunks_out);
  vector<hsize_t> extension(2);
  extension.At(0) = input_positions.size();
  extension.At(1) = dims.At(2);
  recorder.ExtendDataSet(extension); // extend if necessary

  fprintf(stdout, "Opening for write %s:%s with rank %d, position x flow chunks=[ ", &h5file_out[0], &destination[0], (int)chunks_out.size());
  for (int i=0; i<(int)chunks_out.size(); i++)
    fprintf(stdout, "%d ", (int)chunks_out.At(i));
  fprintf(stdout, "]\n");

  int max_threads_ever = (dims.At(0)/chunks.At(0) +1)*(dims.At(1)/chunks.At(1) +1);
  thread_flags.resize (max_threads_ever, 0);
  // fprintf(stdout, "max_threads_ever = %d\n", max_threads_ever);
  unsigned int thread_id = 0;
  vector<thread_args> my_args( max_threads_ever );

  size_t runningCount = 0;

  // layout is rows x cols x flows
  for (size_t row=0; row<dims.At(0); ) {
    for (size_t col=0; col<dims.At(1); ) {

      size_t ix = 0;
      hsize_t offset_out = 0;
      hsize_t count_out = 0;

      vector<size_t> limit(2);
      limit.At(0) = ( row+chunks.At(0) < dims.At(0) ) ? row+chunks.At(0) : dims.At(0);
      limit.At(1) = ( col+chunks.At(1) < dims.At(1) ) ? col+chunks.At(1) : dims.At(1);
      // fprintf(stdout, "Block row=%lu, col=%lu, count=[%lu %lu]\n", row, col, limit.At(0), limit.At(1));
      // bool first_time=true;
      for (size_t rr=row; rr<limit.At(0) && ix < input_positions.size(); rr++) {
	for (size_t cc=col; cc<limit.At(1) && ix < input_positions.size(); cc++) {
	  size_t pos = input_positions.At(ix);
	  size_t chp_indx = RowColToIndex(rr,cc, dims.At(0), dims.At(1));
	  // if (first_time)
	  //   fprintf(stdout, "Entering loop with pos=%lu, ix=%lu, chp_indx=%lu\n", pos, ix, chp_indx);
	  // first_time = false;

	  if ( chp_indx < pos)
	    continue;

	  while ( chp_indx > pos){
	    // fprintf(stdout, "chp_indx=%lu > pos=%lu, incrementing ix=%lu\n", chp_indx, pos, ix);
	    ix++;
	    if (ix == input_positions.size()){
	      break;
	    }
	    pos = input_positions.At(ix);
	    // first_time = true;
	  }

	  if( chp_indx == pos){
	    if ( count_out == 0)
	      offset_out = runningCount;
	    count_out++;
	    runningCount++;
	    // fprintf(stdout, "found: rr=%d, cc=%d, pos=%d, index=%d, ix=%lu, runningCount=%lu\n", (int)rr, (int)cc, (int)pos, (int)chp_indx, ix, runningCount);
	    ix++;
	    continue;
	  }
	  
	}
      }

      assert (ix <= input_positions.size() );
      assert (runningCount <= input_positions.size() );
      
      if (count_out > 0) {
	pthread_t thread;
	int thread_status = 0;

	assert( thread_id < thread_flags.size() );
	my_args.at(thread_id).row = row;
	my_args.at(thread_id).col = col;
	my_args.at(thread_id).chunks = &chunks;
	my_args.at(thread_id).chunks_out = &chunks_out;
	my_args.at(thread_id).dims = &dims;
	my_args.at(thread_id).h5file_in = &h5file_in;
	my_args.at(thread_id).source = &source;
	my_args.at(thread_id).h5file_out = &h5file_out;
	my_args.at(thread_id).destination = &destination;
	my_args.at(thread_id).offset_out = offset_out;
	my_args.at(thread_id).count_out = count_out;
	my_args.at(thread_id).input_positions = &input_positions;
	my_args.at(thread_id).thread_id = thread_id;
	my_args.at(thread_id).flowlimit = flowlimit;

	// fprintf(stdout, "creating thread %d from row=%d (max %d), column=%d (max %d), offset_out=%llu, count_out=%llu\n", thread_id, (int)row, (int)dims.At(0), (int)col, (int)dims.At(1), offset_out, count_out);
	while (accumulate(thread_flags.begin(), thread_flags.end(), 0) > numThreads) {
	  // only have to be approximate, don't worry about races
	  fprintf(stdout, "Sleeping before creating thread %d from row=%d (max %d), column=%d (max %d), offset_out=%llu, count_out=%llu ...\n", thread_id, (int)row, (int)dims.At(0), (int)col, (int)dims.At(1), offset_out, count_out);
	  sleep(1);
	}
	thread_flags.At(thread_id) = 1;
	thread_status = pthread_create(&thread, NULL, do_subset, (void *)&my_args[thread_id]);
	// do_subset((void *)&my_args[thread_id]);
	assert (thread_status >= 0);
	thread_id++;
      }
      col += chunks.At(1);
      //fflush(stdout);
    }
    row += chunks.At(0);
  }
  while (accumulate(thread_flags.begin(), thread_flags.end(), 0) > 0) {
    // wait for the threads to finish
    // fprintf(stdout, "Waiting ...\n");
    sleep(1);
  }

  assert (runningCount == input_positions.size() );
  cout << "Done." << endl;
  pthread_exit(NULL);
}
Пример #2
0
int main(int argc, const char *argv[]) {
  OptArgs opts;  
  string h5file;
  string source;
  string destination;
  vector<string> infiles;
  bool help;
  string flowlimit_arg;
  unsigned int flowlimit;

  DumpStartingStateOfNormWells (argc,argv);

  opts.ParseCmdLine(argc, argv);
  opts.GetOption(h5file, "", '-', "h5file");
  opts.GetOption(source, "", 's', "source");
  opts.GetOption(destination, "", 'd', "destination");
  opts.GetOption(flowlimit_arg, "", 'f', "flowlimit");
  opts.GetOption(help, "false", 'h', "help");
  opts.GetLeftoverArguments(infiles);
  if(help || infiles.empty() || (infiles.size() > 1) ) {
    usage();
  }
  h5file = infiles.front();

  int numCPU = (int)sysconf( _SC_NPROCESSORS_ONLN );
  int numThreads = MAXTHREADS < numCPU ? MAXTHREADS : numCPU;
  fprintf(stdout, "Using %d threads of %d cores\n", numThreads, numCPU);

  if (source.empty())
    source = source + SIGNAL_IN;
  H5ReplayReader reader = H5ReplayReader(h5file, &source[0]);
  if ( destination.empty() )
    destination = destination + SIGNAL_OUT;

  H5ReplayRecorder recorder = (source.compare(destination)==0)
    ? H5ReplayRecorder(h5file, &destination[0])
    : H5ReplayRecorder(h5file, &destination[0],reader.GetType(),reader.GetRank());

  reader.Open();
  int rank = reader.GetRank();
  vector<hsize_t>dims(rank,0);
  vector<hsize_t>chunks(rank,0);
  reader.GetDims(dims);
  reader.GetChunkSize(chunks);
  reader.Close();

  fprintf(stdout, "Opening for read %s:%s with rank %d, row x col x flow dims=[ ", &h5file[0], &source[0], rank);
  for (int i=0; i<rank; i++)
    fprintf(stdout, "%d ", (int)dims[i]);
  fprintf(stdout, "], chunksize=[ ");
  for (int i=0; i<rank; i++)
    fprintf(stdout, "%d ", (int)chunks[i]);
  fprintf(stdout, "]\n");

  if (flowlimit_arg.empty())
    flowlimit = dims[2];
  else
    flowlimit = atoi(flowlimit_arg.c_str());

  flowlimit = (flowlimit < dims[2]) ? flowlimit : dims[2];
  fprintf(stdout, "Using %u flows\n", flowlimit);

  // hard code region size to be at least 100x100
  chunks[0] = (chunks[0] < 100) ? 100 : chunks[0];
  chunks[1] = (chunks[1] < 100) ? 100 : chunks[1];

  recorder.CreateDataset(chunks);
  
  int max_threads_ever = (dims[0]/chunks[0] +1)*(dims[1]/chunks[1] +1);
  thread_flags.resize (max_threads_ever, 0);
  // fprintf(stdout, "max_threads_ever = %d\n", max_threads_ever);
  unsigned int thread_id = 0;
  vector<compute_norm_args> my_args( max_threads_ever );
  
  // layout is rows x cols x flows
  for (hsize_t row=0; row<dims[0]; ) {
    for (hsize_t col=0; col<dims[1]; ) {
      pthread_t thread;
      int thread_status;

      assert( thread_id < thread_flags.size() );
      my_args.at(thread_id).row = row;
      my_args.at(thread_id).col = col;
      my_args.at(thread_id).chunks = &chunks;
      my_args.at(thread_id).dims = &dims;
      my_args.at(thread_id).h5file = &h5file;
      my_args.at(thread_id).source = &source;
      my_args.at(thread_id).destination = &destination;
      my_args.at(thread_id).thread_id = thread_id;
      my_args.at(thread_id).flowlimit = flowlimit;

      fprintf(stdout, "creating thread %d from row=%d (max %d), column=%d (max %d)\n", thread_id, (int)row, (int)dims[0], (int)col, (int)dims[1]);
      while (accumulate(thread_flags.begin(), thread_flags.end(), 0) > numThreads) {
	// only have to be approximate, don't worry about races
	// fprintf(stdout, "Sleeping ...\n");
	sleep(1);
      }
      thread_flags[thread_id] = 1;
      thread_status = pthread_create(&thread, NULL, compute_norm, (void *)&my_args[thread_id]);
      // compute_norm((void *)&my_args[thread_id]);
      assert (thread_status >= 0);
      thread_id++;

      col += chunks[1];
      //fflush(stdout);
    }
    row += chunks[0];
  }
  while (accumulate(thread_flags.begin(), thread_flags.end(), 0) > 0) {
    // wait for the threads to finish
    // fprintf(stdout, "Waiting ...\n");
    sleep(1);
  }

  cout << "Done." << endl;
  pthread_exit(NULL);
}