Esempio n. 1
0
void worker_process(const MPI::Intracomm &comm_world, const int manager_rank, const int rank, const Mat& Q, const vector<float> &lut) {
	char flag = MANAGER_READY;
	int inputSize;
	int outputSize;
	int maskSize;
	char *input;
	char *output;
	char *mask;

	comm_world.Barrier();
	uint64_t t0, t1;
	printf("worker %d ready\n", rank);

	while (flag != MANAGER_FINISHED && flag != MANAGER_ERROR) {
		t0 = cci::common::event::timestampInUS();

		// tell the manager - ready
		comm_world.Send(&WORKER_READY, 1, MPI::CHAR, manager_rank, TAG_CONTROL);
//		printf("worker %d signal ready\n", rank);
		// get the manager status
		comm_world.Recv(&flag, 1, MPI::CHAR, manager_rank, TAG_CONTROL);
//		printf("worker %d received manager status %d\n", rank, flag);

		if (flag == MANAGER_READY) {
			// get data from manager
			comm_world.Recv(&inputSize, 1, MPI::INT, manager_rank, TAG_METADATA);
			comm_world.Recv(&maskSize, 1, MPI::INT, manager_rank, TAG_METADATA);
			comm_world.Recv(&outputSize, 1, MPI::INT, manager_rank, TAG_METADATA);

			// allocate the buffers
			input = new char[inputSize];
			mask = new char[maskSize];
			output = new char[outputSize];
			memset(input, 0, inputSize * sizeof(char));
			memset(mask, 0, maskSize * sizeof(char));
			memset(output, 0, outputSize * sizeof(char));

			// get the file names
			comm_world.Recv(input, inputSize, MPI::CHAR, manager_rank, TAG_DATA);
			comm_world.Recv(mask, maskSize, MPI::CHAR, manager_rank, TAG_DATA);
			comm_world.Recv(output, outputSize, MPI::CHAR, manager_rank, TAG_DATA);

			t0 = cci::common::event::timestampInUS();
//			printf("comm time for worker %d is %lu us\n", rank, t1 -t0);
			//printf("worker %d processing \"%s\"\n", rank, mask);


			// now do some work
			compute(input, mask, output, Q, lut);

			t1 = cci::common::event::timestampInUS();
//			printf("worker %d processed \"%s\" + \"%s\" -> \"%s\" in %lu us\n", rank, input, mask, output, t1 - t0);
			printf("worker %d processed \"%s\" in %lu us\n", rank, mask, t1 - t0);

			// clean up
			delete [] input;
			delete [] mask;
			delete [] output;

		}
	}
}
  // currently only hacked for spheres, with radius and sd as two parameters
  bool HipGISAXS::fit_steepest_descent(real_t zcut,
          real_t radius_min, real_t radius_max, real_t radius_num,
          real_t sd_min, real_t sd_max, real_t sd_num,
          unsigned int dim, MPI::Intracomm& world_comm,
          int x_min, int x_max, int x_step) {
    int mpi_rank = world_comm.Get_rank();

    if(!init_steepest_fit(world_comm, zcut)) return false;

    int num_alphai = 0, num_phi = 0, num_tilt = 0;;

    real_t alphai_min, alphai_max, alphai_step;
    HiGInput::instance().scattering_alphai(alphai_min, alphai_max, alphai_step);
    if(alphai_max < alphai_min) alphai_max = alphai_min;
    if(alphai_min == alphai_max || alphai_step == 0) num_alphai = 1;
    else num_alphai = (alphai_max - alphai_min) / alphai_step + 1;

    real_t phi_min, phi_max, phi_step;
    HiGInput::instance().scattering_inplanerot(phi_min, phi_max, phi_step);
    if(phi_step == 0) num_phi = 1;
    else num_phi = (phi_max - phi_min) / phi_step + 1;

    real_t tilt_min, tilt_max, tilt_step;
    HiGInput::instance().scattering_tilt(tilt_min, tilt_max, tilt_step);
    if(tilt_step == 0) num_tilt = 1;
    else num_tilt = (tilt_max - tilt_min) / tilt_step + 1;

    std::cout << "**                    Num alphai: " << num_alphai << std::endl
          << "**                       Num phi: " << num_phi << std::endl
          << "**                      Num tilt: " << num_tilt << std::endl;

    // prepare parameters

    std::vector<std::vector<real_t> > params;
    int num_params = 2;
    std::vector<real_t> temp;
    real_t deltap = 0.0;
    if(radius_num <= 1)
      temp.push_back(radius_min);
    else {
      deltap = fabs(radius_max - radius_min) / (radius_num - 1);
      for(int i = 0; i < radius_num; ++ i) {
        temp.push_back(radius_min + i * deltap);
      } // for
    } // if-else
    params.push_back(temp);
    temp.clear();
    if(sd_num <= 1)
      temp.push_back(sd_min);
    else {
      deltap = fabs(sd_max - sd_min) / (sd_num - 1);
      for(int i = 0; i < sd_num; ++ i) {
        temp.push_back(sd_min + i * deltap);
      } // for
    } // if-else
    params.push_back(temp);
    temp.clear();

    // this will work only on one shape and one structure

    const real_t err_threshold = 1e-8;
    const unsigned int max_iter = 200;

    std::vector<real_t> param_vals;
    //param_vals.push_back(16.0);
    //param_vals.push_back(6.0);
    param_vals.push_back(23.0);
    param_vals.push_back(2.0);
    std::vector<real_t> param_deltas;
    param_deltas.push_back(0.05);
    param_deltas.push_back(0.05);
    real_t gamma_const = 0.05;

    real_t qdeltay = QGrid::instance().delta_y();

    real_t alpha_i = alphai_min;
    // high level of parallelism here (alphai, phi, tilt) for dynamicity ...
    for(int i = 0; i < num_alphai; i ++, alpha_i += alphai_step) {
      real_t alphai = alpha_i * PI_ / 180;
      real_t phi = phi_min;
      for(int j = 0; j < num_phi; j ++, phi += phi_step) {
        real_t tilt = tilt_min;
        for(int k = 0; k < num_tilt; k ++, tilt += tilt_step) {

          std::cout << "-- Computing reference GISAXS "
                << i * num_phi * num_tilt + j * num_tilt + k + 1 << " / "
                << num_alphai * num_phi * num_tilt
                << " [alphai = " << alpha_i << ", phi = " << phi
                << ", tilt = " << tilt << "] ..." << std::endl;

          /* run the reference gisaxs simulation using input params */
          real_t* ref_data = NULL;
          if(!run_gisaxs(alpha_i, alphai, phi, tilt, ref_data, world_comm)) {
            if(mpi_rank == 0) std::cerr << "error: could not finish successfully" << std::endl;
            return false;
          } // if

          if(dim != 1) {
            std::cerr << "uh-oh: only 1D is supported for now" << std::endl;
            return false;
          } // if

          real_t* ref_z_cut = new (std::nothrow) real_t[nqy_];
          for(unsigned int iy = 0; iy < nqy_; ++ iy) {
            // assuming nqz_ == 1 ...
            ref_z_cut[iy] = ref_data[nqx_ * iy + 0];
          } // for

          delete[] ref_data;

          // this will store z cut values for each iteration for plotting later
          real_t* z_cuts = new (std::nothrow) real_t[nqy_ * max_iter];
          real_t* temp_zcuts = new (std::nothrow) real_t[nqy_];

          // do some preprocessing
          // start the main loop, bound by max_iter and err_threshold
          //   compute gisaxs for current parameter values
          //   compute the neighbors parameter values
          //   for 12 combinations of current and neighbors, compute gisaxs and error
          //   compute the derivatives (gradient) and error stuff
          //   update parameter values
          // compute the error surface

          real_t err = 10.0;
          std::vector<real_t> param1_list;
          std::vector<real_t> param2_list;
          structure_iterator_t structure_iter = HiGInput::instance().structure_begin();
          Structure* structure = &((*structure_iter).second);
          Shape* shape = HiGInput::instance().shape(*structure);
          shape_param_iterator_t shape_param = (*shape).param_begin();
          real_t* data = NULL;
          std::vector<real_t> param_error_data;
          for(unsigned int iter = 0; iter < max_iter; ++ iter) {
            param1_list.clear();
            param1_list.push_back(param_vals[0] - 2 * param_deltas[0]);  // p1mm
            param1_list.push_back(param_vals[0] - param_deltas[0]);    // p1m
            param1_list.push_back(param_vals[0]);            // p1
            param1_list.push_back(param_vals[0] + param_deltas[0]);    // p1p
            param1_list.push_back(param_vals[0] + 2 * param_deltas[0]);  // p1pp
            param2_list.clear();
            param2_list.push_back(param_vals[1] - 2 * param_deltas[1]);  // p2mm
            param2_list.push_back(param_vals[1] - param_deltas[1]);    // p2m
            param2_list.push_back(param_vals[1]);            // p2
            param2_list.push_back(param_vals[1] + param_deltas[1]);    // p2p
            param2_list.push_back(param_vals[1] + 2 * param_deltas[1]);  // p2pp

            // current point
            (*shape_param).second.mean(param1_list[2]);
            (*shape_param).second.deviation(param2_list[2]);
            if(!run_gisaxs(alpha_i, alphai, phi, tilt, data, world_comm)) {
              if(mpi_rank == 0)
                std::cerr << "error: could not finish successfully" << std::endl;
              return false;
            } // if
            for(unsigned int iy = 0; iy < nqy_; ++ iy) {
              // assuming nqz_ == 1 ...
              z_cuts[iter * nqy_ + iy] = data[nqx_ * iy];
            } // for
            delete[] data; data = NULL;
            real_t err22 = compute_cut_fit_error(z_cuts + iter * nqy_, ref_z_cut, qdeltay);

            // 12 neighbors

            (*shape_param).second.mean(param1_list[0]);
            (*shape_param).second.deviation(param2_list[2]);
            if(!run_gisaxs(alpha_i, alphai, phi, tilt, data, world_comm)) {
              if(mpi_rank == 0)
                std::cerr << "error: could not finish successfully" << std::endl;
              return false;
            } // if
            for(unsigned int iy = 0; iy < nqy_; ++ iy) {
              // assuming nqz_ == 1 ...
              temp_zcuts[iy] = data[nqx_ * iy];
            } // for
            delete[] data; data = NULL;
            real_t err02 = compute_cut_fit_error(temp_zcuts, ref_z_cut, qdeltay);

            (*shape_param).second.mean(param1_list[1]);
            (*shape_param).second.deviation(param2_list[1]);
            if(!run_gisaxs(alpha_i, alphai, phi, tilt, data, world_comm)) {
              if(mpi_rank == 0)
                std::cerr << "error: could not finish successfully" << std::endl;
              return false;
            } // if
            for(unsigned int iy = 0; iy < nqy_; ++ iy) {
              // assuming nqz_ == 1 ...
              temp_zcuts[iy] = data[nqx_ * iy];
            } // for
            delete[] data; data = NULL;
            real_t err11 = compute_cut_fit_error(temp_zcuts, ref_z_cut, qdeltay);

            (*shape_param).second.mean(param1_list[1]);
            (*shape_param).second.deviation(param2_list[2]);
            if(!run_gisaxs(alpha_i, alphai, phi, tilt, data, world_comm)) {
              if(mpi_rank == 0)
                std::cerr << "error: could not finish successfully" << std::endl;
              return false;
            } // if
            for(unsigned int iy = 0; iy < nqy_; ++ iy) {
              // assuming nqz_ == 1 ...
              temp_zcuts[iy] = data[nqx_ * iy];
            } // for
            delete[] data; data = NULL;
            real_t err12 = compute_cut_fit_error(temp_zcuts, ref_z_cut, qdeltay);

            (*shape_param).second.mean(param1_list[1]);
            (*shape_param).second.deviation(param2_list[3]);
            if(!run_gisaxs(alpha_i, alphai, phi, tilt, data, world_comm)) {
              if(mpi_rank == 0)
                std::cerr << "error: could not finish successfully" << std::endl;
              return false;
            } // if
            for(unsigned int iy = 0; iy < nqy_; ++ iy) {
              // assuming nqz_ == 1 ...
              temp_zcuts[iy] = data[nqx_ * iy];
            } // for
            delete[] data; data = NULL;
            real_t err13 = compute_cut_fit_error(temp_zcuts, ref_z_cut, qdeltay);

            (*shape_param).second.mean(param1_list[2]);
            (*shape_param).second.deviation(param2_list[0]);
            if(!run_gisaxs(alpha_i, alphai, phi, tilt, data, world_comm)) {
              if(mpi_rank == 0)
                std::cerr << "error: could not finish successfully" << std::endl;
              return false;
            } // if
            for(unsigned int iy = 0; iy < nqy_; ++ iy) {
              // assuming nqz_ == 1 ...
              temp_zcuts[iy] = data[nqx_ * iy];
            } // for
            delete[] data; data = NULL;
            real_t err20 = compute_cut_fit_error(temp_zcuts, ref_z_cut, qdeltay);

            (*shape_param).second.mean(param1_list[2]);
            (*shape_param).second.deviation(param2_list[1]);
            if(!run_gisaxs(alpha_i, alphai, phi, tilt, data, world_comm)) {
              if(mpi_rank == 0)
                std::cerr << "error: could not finish successfully" << std::endl;
              return false;
            } // if
            for(unsigned int iy = 0; iy < nqy_; ++ iy) {
              // assuming nqz_ == 1 ...
              temp_zcuts[iy] = data[nqx_ * iy];
            } // for
            delete[] data; data = NULL;
            real_t err21 = compute_cut_fit_error(temp_zcuts, ref_z_cut, qdeltay);

            (*shape_param).second.mean(param1_list[2]);
            (*shape_param).second.deviation(param2_list[3]);
            if(!run_gisaxs(alpha_i, alphai, phi, tilt, data, world_comm)) {
              if(mpi_rank == 0)
                std::cerr << "error: could not finish successfully" << std::endl;
              return false;
            } // if
            for(unsigned int iy = 0; iy < nqy_; ++ iy) {
              // assuming nqz_ == 1 ...
              temp_zcuts[iy] = data[nqx_ * iy];
            } // for
            delete[] data; data = NULL;
            real_t err23 = compute_cut_fit_error(temp_zcuts, ref_z_cut, qdeltay);

            (*shape_param).second.mean(param1_list[2]);
            (*shape_param).second.deviation(param2_list[4]);
            if(!run_gisaxs(alpha_i, alphai, phi, tilt, data, world_comm)) {
              if(mpi_rank == 0)
                std::cerr << "error: could not finish successfully" << std::endl;
              return false;
            } // if
            for(unsigned int iy = 0; iy < nqy_; ++ iy) {
              // assuming nqz_ == 1 ...
              temp_zcuts[iy] = data[nqx_ * iy];
            } // for
            delete[] data; data = NULL;
            real_t err24 = compute_cut_fit_error(temp_zcuts, ref_z_cut, qdeltay);

            (*shape_param).second.mean(param1_list[3]);
            (*shape_param).second.deviation(param2_list[1]);
            if(!run_gisaxs(alpha_i, alphai, phi, tilt, data, world_comm)) {
              if(mpi_rank == 0)
                std::cerr << "error: could not finish successfully" << std::endl;
              return false;
            } // if
            for(unsigned int iy = 0; iy < nqy_; ++ iy) {
              // assuming nqz_ == 1 ...
              temp_zcuts[iy] = data[nqx_ * iy];
            } // for
            delete[] data; data = NULL;
            real_t err31 = compute_cut_fit_error(temp_zcuts, ref_z_cut, qdeltay);

            (*shape_param).second.mean(param1_list[3]);
            (*shape_param).second.deviation(param2_list[2]);
            if(!run_gisaxs(alpha_i, alphai, phi, tilt, data, world_comm)) {
              if(mpi_rank == 0)
                std::cerr << "error: could not finish successfully" << std::endl;
              return false;
            } // if
            for(unsigned int iy = 0; iy < nqy_; ++ iy) {
              // assuming nqz_ == 1 ...
              temp_zcuts[iy] = data[nqx_ * iy];
            } // for
            delete[] data; data = NULL;
            real_t err32 = compute_cut_fit_error(temp_zcuts, ref_z_cut, qdeltay);

            (*shape_param).second.mean(param1_list[3]);
            (*shape_param).second.deviation(param2_list[3]);
            if(!run_gisaxs(alpha_i, alphai, phi, tilt, data, world_comm)) {
              if(mpi_rank == 0)
                std::cerr << "error: could not finish successfully" << std::endl;
              return false;
            } // if
            for(unsigned int iy = 0; iy < nqy_; ++ iy) {
              // assuming nqz_ == 1 ...
              temp_zcuts[iy] = data[nqx_ * iy];
            } // for
            delete[] data; data = NULL;
            real_t err33 = compute_cut_fit_error(temp_zcuts, ref_z_cut, qdeltay);

            (*shape_param).second.mean(param1_list[4]);
            (*shape_param).second.deviation(param2_list[2]);
            if(!run_gisaxs(alpha_i, alphai, phi, tilt, data, world_comm)) {
              if(mpi_rank == 0)
                std::cerr << "error: could not finish successfully" << std::endl;
              return false;
            } // if
            for(unsigned int iy = 0; iy < nqy_; ++ iy) {
              // assuming nqz_ == 1 ...
              temp_zcuts[iy] = data[nqx_ * iy];
            } // for
            delete[] data; data = NULL;
            real_t err42 = compute_cut_fit_error(temp_zcuts, ref_z_cut, qdeltay);

            // 22  0
            // 02  1mm
            // 11  1m2m
            // 12  1m
            // 13  1m2p
            // 20  2mm
            // 21  2m
            // 23  2p
            // 24  2pp
            // 31  1p2m
            // 32  1p
            // 33  1p2p
            // 42  1pp

            real_t derr1 = (err32 - err12) / (2 * param_deltas[0]);
            real_t derr2 = (err23 - err21) / (2 * param_deltas[1]);
            err = sqrt(derr1 * derr1 + derr2 * derr2);
            std::cout << "++ Iteration: " << iter << ", Error: " << err << std::endl;
            std::cout << "++ Parameter 1: " << param_vals[0]
                  << ", Parameter 2: " << param_vals[1] << std::endl;
            param_error_data.push_back(iter);
            param_error_data.push_back(param_vals[0]);
            param_error_data.push_back(param_vals[1]);
            param_error_data.push_back(err);
            if(err < err_threshold) break;

            real_t herr11 = (err42 + err02 - 2 * err22) /
                      (4 * param_deltas[0] * param_deltas[0]);
            real_t herr12 = (err33 - err13 - (err31 - err11)) /
                      (4 * param_deltas[0] * param_deltas[1]);
            real_t herr21 = (err33 - err13 - (err31 - err11)) /
                      (4 * param_deltas[0] * param_deltas[1]);
            real_t herr22 = (err24 + err20 - 2 * err22) /
                      (4 * param_deltas[1] * param_deltas[1]);
            real_t* herr = new (std::nothrow) real_t[2 * 2];
            herr[0] = herr11;
            herr[1] = herr12;
            herr[2] = herr21;
            herr[3] = herr22;
            real_t* herrinv;
            mldivide(2, herr, herrinv);

            param_vals[0] -= gamma_const * (herrinv[0] * derr1 + herrinv[1] * derr2);
            param_vals[1] -= gamma_const * (herrinv[2] * derr1 + herrinv[3] * derr2);

            delete[] herrinv;
            delete[] herr;
          } // for

          // compute the error surface
          std::vector<std::vector<real_t> >::iterator mean_iter = params.begin();
          std::vector<std::vector<real_t> >::iterator sd_iter = mean_iter + 1;
          std::vector<real_t> err_surface;
          for(std::vector<real_t>::iterator curr_mean = (*mean_iter).begin();
              curr_mean != (*mean_iter).end(); ++ curr_mean) {
            for(std::vector<real_t>::iterator curr_sd = (*sd_iter).begin();
                curr_sd != (*sd_iter).end(); ++ curr_sd) {
              (*shape_param).second.mean(*curr_mean);
              (*shape_param).second.deviation(*curr_sd);
              if(!run_gisaxs(alpha_i, alphai, phi, tilt, data, world_comm)) {
                if(mpi_rank == 0)
                  std::cerr << "error: could not finish successfully" << std::endl;
                return false;
              } // if
              for(unsigned int iy = 0; iy < nqy_; ++ iy) {
                // assuming nqz_ == 1 ...
                temp_zcuts[iy] = data[nqx_ * iy];
              } // for
              delete[] data; data = NULL;
              real_t curr_err = compute_cut_fit_error(temp_zcuts, ref_z_cut, qdeltay);
              err_surface.push_back(*curr_mean);
              err_surface.push_back(*curr_sd);
              err_surface.push_back(curr_err);
            } // for
          } // for

          // write data to files
          // define output filename
          std::stringstream alphai_b, phi_b, tilt_b;
          std::string alphai_s, phi_s, tilt_s;
          alphai_b << alpha_i; alphai_s = alphai_b.str();
          phi_b << phi; phi_s = phi_b.str();
          tilt_b << tilt; tilt_s = tilt_b.str();
          std::string param_error_file(HiGInput::instance().param_pathprefix() +
                        "/" + HiGInput::instance().runname() +
                        "/param_error_ai=" + alphai_s + "_rot=" + phi_s +
                        "_tilt=" + tilt_s + ".dat");
          std::string z_cut_file(HiGInput::instance().param_pathprefix() +
                        "/" + HiGInput::instance().runname() +
                        "/z_cut_ai=" + alphai_s + "_rot=" + phi_s +
                        "_tilt=" + tilt_s + ".dat");
          std::string err_surf_file(HiGInput::instance().param_pathprefix() +
                        "/" + HiGInput::instance().runname() +
                        "/err_surf_ai=" + alphai_s + "_rot=" + phi_s +
                        "_tilt=" + tilt_s + ".dat");
          // write param_error_data
          std::ofstream param_error_f(param_error_file.c_str());
          for(std::vector<real_t>::iterator pei = param_error_data.begin();
              pei != param_error_data.end(); pei += 4) {
            param_error_f << *pei << "\t" << *(pei + 1) << "\t" << *(pei + 2) << "\t"
                    << *(pei + 3) << std::endl;
          } // for
          param_error_f.close();
          // write ref_z_cut and z_cuts
          std::ofstream zcut_f(z_cut_file.c_str());
          for(unsigned int yy = 0; yy < nqy_; ++ yy) {
            zcut_f << ref_z_cut[yy] << "\t";
          } // for
          zcut_f << std::endl;
          for(unsigned int i = 0; i < max_iter; ++ i) {
            for(unsigned int yy = 0; yy < nqy_; ++ yy) {
              zcut_f << z_cuts[i * nqy_ + yy] << "\t";
            } // for
            zcut_f << std::endl;
          } // for
          zcut_f.close();
          // write error surface
          std::ofstream err_surf_f(err_surf_file.c_str());
          for(std::vector<real_t>::iterator surfi = err_surface.begin();
              surfi != err_surface.end(); surfi += 3) {
            err_surf_f << *surfi << "\t" << *(surfi + 1) << "\t" << *(surfi + 2) << std::endl;
          } // for
          err_surf_f.close();

          (*shape_param).second.mean(22.0);
          (*shape_param).second.deviation(7.0);

          param_error_data.clear();
          delete[] temp_zcuts;
          delete[] z_cuts;
          delete[] ref_z_cut;

          std::cout << "parameter values: " << param_vals[0] << ", " << param_vals[1]
                << " [error: " << err << "]" << std::endl;

          // synchronize all procs after each run
          world_comm.Barrier();
        } // for tilt
      } // for phi
    } // for alphai

    return true;
  } // HipGISAXS::fit_all_gisaxs()
Esempio n. 3
0
void manager_process(const MPI::Intracomm &comm_world, const int manager_rank, const int worker_size, std::string &maskName, std::string &imgDir, std::string &outDir, bool overwrite) {
	// first get the list of files to process
   	std::vector<std::string> filenames;
	std::vector<std::string> seg_output;
	std::vector<std::string> features_output;
	uint64_t t1, t0;

	t0 = cci::common::event::timestampInUS();
	getFiles(maskName, imgDir, outDir, filenames, seg_output, features_output, overwrite);

	t1 = cci::common::event::timestampInUS();
	printf("Manager ready at %d, file read took %lu us\n", manager_rank, t1 - t0);
	comm_world.Barrier();

	// now start the loop to listen for messages
	int curr = 0;
	int total = filenames.size();
	MPI::Status status;
	int worker_id;
	char ready;
	char *input;
	char *mask;
	char *output;
	int inputlen;
	int masklen;
	int outputlen;
	while (curr < total) {
		usleep(1000);

		if (comm_world.Iprobe(MPI_ANY_SOURCE, TAG_CONTROL, status)) {
/* where is it coming from */
			worker_id=status.Get_source();
			comm_world.Recv(&ready, 1, MPI::CHAR, worker_id, TAG_CONTROL);
//			printf("manager received request from worker %d\n",worker_id);
			if (worker_id == manager_rank) continue;

			if(ready == WORKER_READY) {
				// tell worker that manager is ready
				comm_world.Send(&MANAGER_READY, 1, MPI::CHAR, worker_id, TAG_CONTROL);
//				printf("manager signal transfer\n");
/* send real data */
				inputlen = filenames[curr].size() + 1;  // add one to create the zero-terminated string
				masklen = seg_output[curr].size() + 1;
				outputlen = features_output[curr].size() + 1;
				input = new char[inputlen];
				memset(input, 0, sizeof(char) * inputlen);
				strncpy(input, filenames[curr].c_str(), inputlen);
				mask = new char[masklen];
				memset(mask, 0, sizeof(char) * masklen);
				strncpy(mask, seg_output[curr].c_str(), masklen);
				output = new char[outputlen];
				memset(output, 0, sizeof(char) * outputlen);
				strncpy(output, features_output[curr].c_str(), outputlen);

				comm_world.Send(&inputlen, 1, MPI::INT, worker_id, TAG_METADATA);
				comm_world.Send(&masklen, 1, MPI::INT, worker_id, TAG_METADATA);
				comm_world.Send(&outputlen, 1, MPI::INT, worker_id, TAG_METADATA);

				// now send the actual string data
				comm_world.Send(input, inputlen, MPI::CHAR, worker_id, TAG_DATA);
				comm_world.Send(mask, masklen, MPI::CHAR, worker_id, TAG_DATA);
				comm_world.Send(output, outputlen, MPI::CHAR, worker_id, TAG_DATA);
				curr++;

				delete [] input;
				delete [] mask;
				delete [] output;

			}

			if (curr % 100 == 1) {
				printf("[ MANAGER STATUS ] %d tasks remaining.\n", total - curr);
			}

		}
	}
/* tell everyone to quit */
	int active_workers = worker_size;
	while (active_workers > 0) {
		usleep(1000);

		if (comm_world.Iprobe(MPI_ANY_SOURCE, TAG_CONTROL, status)) {
		/* where is it coming from */
			worker_id=status.Get_source();
			comm_world.Recv(&ready, 1, MPI::CHAR, worker_id, TAG_CONTROL);
//			printf("manager received request from worker %d\n",worker_id);
			if (worker_id == manager_rank) continue;

			if(ready == WORKER_READY) {
				comm_world.Send(&MANAGER_FINISHED, 1, MPI::CHAR, worker_id, TAG_CONTROL);
//				printf("manager signal finished\n");
				--active_workers;
			}
		}
	}
}