Example #1
0
static void send_data (msg_task_t msg)
{
    char         mailbox[MAILBOX_ALIAS_SIZE];
    double       data_size;
    size_t       my_id;
    task_info_t  ti;

    my_id = get_worker_id (MSG_host_self ());

    sprintf (mailbox, TASK_MAILBOX,
             get_worker_id (MSG_task_get_source (msg)),
             MSG_process_get_PID (MSG_task_get_sender (msg)));

    if (message_is (msg, SMS_GET_CHUNK))
    {
        MSG_task_dsend (MSG_task_create ("DATA-C", 0.0, config.chunk_size, NULL), mailbox, NULL);
    }
    else if (message_is (msg, SMS_GET_INTER_PAIRS))
    {
        ti = (task_info_t) MSG_task_get_data (msg);
        data_size = job.map_output[my_id][ti->id] - ti->map_output_copied[my_id];
        MSG_task_dsend (MSG_task_create ("DATA-IP", 0.0, data_size, NULL), mailbox, NULL);
    }

    MSG_task_destroy (msg);
}
Example #2
0
  void learning() {
    // first read
    std::unordered_map<node_t, double> kvmap_stale;
    for(auto & kv : klstmap) {
      for(auto & kkv : kv.second) {
        if(!kvmap_stale.count(kkv.first)) {
          //if(klstmap[kkv.first].size() == 0) continue;
          kvmap_stale[kkv.first] = paracel_read<double>(paracel::cvt(kkv.first) + "_pr");
        }
      }
    }

    paracel_sync();

    for(int rd = 0; rd < rounds; ++rd) {
      if(get_worker_id() == 0) std::cout << rd << std::endl;
      // pull
      paracel::list_type<paracel::str_type> keys;
      for(auto & kv : kvmap_stale) {
        keys.push_back(paracel::cvt(kv.first) + "_pr");
      }
      auto result_tmp = paracel_read_multi<double>(keys);
      keys.resize(0);
      int cnt = 0;
      for(auto & kv : kvmap_stale) {
        kvmap_stale[kv.first] = result_tmp[cnt];
        cnt ++;
      }
      result_tmp.resize(0);

      // map
      for(auto & kv : klstmap) {
        double sigma = 0.;
        for(auto & item : kv.second) {
          sigma += (kvmap_stale[item.first] / item.second);
        }
        kvmap[kv.first] = (1. - damping_factor) + damping_factor * sigma;
      }
      paracel_sync();

      std::unordered_map<std::string, double> kvmap_dct;
      // reduce
      for(auto & kv : kvmap) {
        kvmap_dct[paracel::cvt(kv.first) + "_pr"] = kv.second;
      }
      paracel_write_multi(kvmap_dct);
      paracel_sync();
    }
    // last pull all
    auto kvmap_tmp = paracel_read_special<double>(handle_file,
                                                  filter_function);
    auto tear_lambda = [] (const std::string & str) {
      auto pos = str.find('_');
      return str.substr(0, pos);
    };
    for(auto & kv : kvmap_tmp) {
      std::string tmp = tear_lambda(kv.first);
      kvmap[paracel::cvt(tmp)] = kv.second;
    }
  }
Example #3
0
  void mls_learning() {

    // learn with local item vectors
    local_learning(item_vects);
    sync();	

    // calc similarity with items in other proces 
    for(int node_id = 0; node_id < (int)get_worker_size(); ++node_id) {
      if(node_id == (int)get_worker_id()) continue;
      auto id_bag = paracel_read<std::vector<std::string> >(
          "item_bag_" + 
          std::to_string(node_id)
          );
      for(auto & iv : item_vects) {
        for(auto & iid : id_bag) {
          auto jv = paracel_read<std::vector<double> >(iid);
          double sim = paracel::dot_product(iv.second, jv);
          result[iv.first].push_back(std::make_pair(iid, sim));
        } // id_bag
      } // for iv
    } // bcast_ring 
    sync();

    // get ktop
    select_top();
    sync();
  }
Example #4
0
int data_node (int argc, char* argv[])
{
    char         mailbox[MAILBOX_ALIAS_SIZE];
    msg_error_t  status;
    msg_task_t   msg = NULL;

    sprintf (mailbox, DATANODE_MAILBOX, get_worker_id (MSG_host_self ()));

    while (!job.finished)
    {
        msg = NULL;
        status = receive (&msg, mailbox);
        if (status == MSG_OK)
        {
            if (message_is (msg, SMS_FINISH))
            {
                MSG_task_destroy (msg);
                break;
            }
            else
            {
                send_data (msg);
            }
        }
    }

    return 0;
}
Example #5
0
 void init_paras() {
   for(auto & kv : item_vects) {
     paracel_write(kv.first, kv.second); // push vector
     item_bag.push_back(kv.first);
   }
   std::string key = "item_bag_" + std::to_string(get_worker_id());
   paracel_write(key, item_bag); // push iids
 }
Example #6
0
void * master_worker_computation_routine(void * arg) {
    launchArg_t * launchArg = (launchArg_t *) arg;
    ocrPolicyDomain_t * pd = launchArg->PD;
    ocrWorker_t * worker = (ocrWorker_t *) launchArg->arg;
    DPRINTF(DEBUG_LVL_INFO, "Starting scheduler routine of master worker %d\n", get_worker_id(worker));
    worker_loop(pd, worker);
    return NULL;
}
Example #7
0
 void dump_result() {
   std::unordered_map<std::string, double> kvmap_dump;
   for(auto & kv : kvmap) {
     kvmap_dump[paracel::cvt(kv.first)] = kv.second;
   }
   kvmap.clear();
   if(get_worker_id() == 0) {
     paracel_dump_dict(kvmap_dump, "pagerank_");
   }
 }
Example #8
0
void logistic_regression::agd_learning() {
    int data_sz = samples.size(), data_dim = samples[0].size();
    int cnt = 0, read_batch = data_sz / 100, update_batch = data_sz / 100;
    if(read_batch == 0) {
        read_batch = 10;
    }
    if(update_batch == 0) {
        update_batch = 10;
    }
    theta = paracel::random_double_list(data_dim);
    paracel_write("theta", theta); // init push
    vector<int> idx;
    for(int i = 0; i < data_sz; ++i) {
        idx.push_back(i);
    }
    paracel_register_bupdate("/mfs/user/wuhong/paracel/build/lib/liblg_update.so",
                             "lg_theta_update");
    double coff2 = 2. * beta * alpha;
    vector<double> delta(data_dim);
    // main loop
    for(int rd = 0; rd < rounds; ++rd) {
        std::random_shuffle(idx.begin(), idx.end());
        theta = paracel_read<vector<double> >("theta");
        vector<double> theta_old(theta);
        // traverse data
        cnt = 0;
        for(auto sample_id : idx) {
            if( (cnt % read_batch == 0) || (cnt == (int)idx.size() - 1) ) {
                theta = paracel_read<vector<double> >("theta");
                theta_old = theta;
            }
            for(int i = 0; i < data_dim; ++i) {
                double coff1 = alpha * (labels[sample_id] - lg_hypothesis(samples[sample_id]));
                double t = coff1 * samples[sample_id][i] - coff2 * theta[i];
                theta[i] += t;
            }
            if(debug) {
                loss_error.push_back(calc_loss());
            }
            if( (cnt % update_batch == 0) || (cnt == (int)idx.size() - 1) ) {
                for(int i = 0; i < data_dim; ++i) {
                    delta[i] = theta[i] - theta_old[i];
                }
                paracel_bupdate("theta", delta);
            }
            cnt += 1;
        } // traverse
        sync();
        std::cout << "worker" << get_worker_id() << " at the end of rd" << rd << std::endl;
    } // rounds
    theta = paracel_read<vector<double> >("theta"); // last pull
}
Example #9
0
void * worker_computation_routine(void * arg) {
    // Need to pass down a data-structure
    launchArg_t * launchArg = (launchArg_t *) arg;
    ocrPolicyDomain_t * pd = launchArg->PD;
    ocrWorker_t * worker = (ocrWorker_t *) launchArg->arg;
    // associate current thread with the worker
    associate_comp_platform_and_worker(pd, worker);
    // Setting up this worker context to takeEdts
    // This assumes workers are not relocatable
    DPRINTF(DEBUG_LVL_INFO, "Starting scheduler routine of worker %d\n", get_worker_id(worker));
    worker_loop(pd, worker);
    return NULL;
}
Example #10
0
size_t fiber_control::pick_fiber_worker(fiber* fib) {
  // first try to use the original worker if possible
  size_t choice = get_worker_id();
  if (choice == (size_t)(-1) || fib->affinity.get(choice) == 0) {
    //choice rejected, pick randomly from the available choices
    size_t ra = graphlab::random::fast_uniform<size_t>(0,fib->affinity_array.size() - 1);
    size_t rb = graphlab::random::fast_uniform<size_t>(0,fib->affinity_array.size() - 1);
    ra = fib->affinity_array[ra];
    rb = fib->affinity_array[rb];
    choice = (schedule[ra].nactive <= schedule[rb].nactive) ? ra : rb;
  }
  return choice;
}
Example #11
0
/**
 * @brief  Checks if a worker is a straggler.
 * @param  worker  The worker to be probed.
 * @return 1 if true, 0 if false.
 */
static int is_straggler (msg_host_t worker)
{
    int     task_count;
    size_t  wid;

    wid = get_worker_id (worker);

    task_count = (config.slots[MAP] + config.slots[REDUCE]) - (job.heartbeats[wid].slots_av[MAP] + job.heartbeats[wid].slots_av[REDUCE]);

    if (MSG_get_host_speed (worker) < config.grid_average_speed && task_count > 0)
	return 1;

    return 0;
}
Example #12
0
void logistic_regression::ipm_learning() {
    int data_sz = samples.size(), data_dim = samples[0].size();
    theta = paracel::random_double_list(data_dim);
    paracel_write("theta", theta); // init push
    vector<int> idx;
    for(int i = 0; i < data_sz; ++i) {
        idx.push_back(i);
    }
    paracel_register_bupdate("/mfs/user/wuhong/paracel/build/lib/liblg_update.so",
                             "lg_theta_update");
    double coff2 = 2. * beta * alpha;
    double wgt = 1. / get_worker_size();
    vector<double> delta(data_dim);
    // main loop
    for(int rd = 0; rd < rounds; ++rd) {
        std::random_shuffle(idx.begin(), idx.end());
        theta = paracel_read<vector<double> >("theta");
        vector<double> theta_old(theta);
        // traverse data
        for(auto sample_id : idx) {
            for(int i = 0; i < data_dim; ++i) {
                double coff1 = alpha * (labels[sample_id] - lg_hypothesis(samples[sample_id]));
                double t = coff1 * samples[sample_id][i] - coff2 * theta[i];
                theta[i] += t;
            }
            if(debug) {
                loss_error.push_back(calc_loss());
            }
        } // traverse
        for(int i = 0; i < data_dim; ++i) {
            delta[i] = wgt * (theta[i] - theta_old[i]);
        }
        sync(); // sync for map
        paracel_bupdate("theta", delta); // update with delta
        sync(); // sync for reduce
        std::cout << "worker" << get_worker_id() << " at the end of rd" << rd << std::endl;
    } // rounds
    theta = paracel_read<vector<double> >("theta"); // last pull
}
Example #13
0
/**
 * @brief  Mark the tasks of a straggler as possible speculative tasks.
 * @param  worker  The straggler worker.
 */
static void set_speculative_tasks (msg_host_t worker)
{
    size_t       tid;
    size_t       wid;
    task_info_t  ti;

    wid = get_worker_id (worker);

    if (job.heartbeats[wid].slots_av[MAP] < config.slots[MAP])
    {
	for (tid = 0; tid < config.amount_of_tasks[MAP]; tid++)
	{
	    if (job.task_list[MAP][tid][0] != NULL)
	    {
		ti = (task_info_t) MSG_task_get_data (job.task_list[MAP][tid][0]);
		if (ti->wid == wid && task_time_elapsed (job.task_list[MAP][tid][0]) > 60)
		{
		    job.task_status[MAP][tid] = T_STATUS_TIP_SLOW;
		}
	    }
	}
    }

    if (job.heartbeats[wid].slots_av[REDUCE] < config.slots[REDUCE])
    {
	for (tid = 0; tid < config.amount_of_tasks[REDUCE]; tid++)
	{
	    if (job.task_list[REDUCE][tid][0] != NULL)
	    {
		ti = (task_info_t) MSG_task_get_data (job.task_list[REDUCE][tid][0]);
		if (ti->wid == wid && task_time_elapsed (job.task_list[REDUCE][tid][0]) > 60)
		{
		    job.task_status[REDUCE][tid] = T_STATUS_TIP_SLOW;
		}
	    }
	}
    }
}
Example #14
0
/** @brief  Main master function. */
int master (int argc, char* argv[])
{
    heartbeat_t  heartbeat;
    msg_error_t  status;
    msg_host_t   worker;
    msg_task_t   msg = NULL;
    size_t       wid;
    task_info_t  ti;
    double total_cpu_time = 0.0;
    double total_task_time = 0.0;

    print_config ();
    XBT_INFO ("JOB BEGIN"); XBT_INFO (" ");

    tasks_log = fopen ("tasks.csv", "w");
    fprintf (tasks_log, "task_id,phase,worker_id,time,action,shuffle_end\n");

    while (job.tasks_pending[MAP] + job.tasks_pending[REDUCE] > 0)
    {
	msg = NULL;
	status = receive (&msg, MASTER_MAILBOX);
	if (status == MSG_OK)
	{
	    worker = MSG_task_get_source (msg);
	    wid = get_worker_id (worker);

	    if (message_is (msg, SMS_HEARTBEAT))
	    {
		heartbeat = &job.heartbeats[wid];

		if (is_straggler (worker))
		{
		    set_speculative_tasks (worker);
		}
		else
		{
		    if (heartbeat->slots_av[MAP] > 0)
			send_scheduler_task(MAP, wid);

		    if (heartbeat->slots_av[REDUCE] > 0)
			send_scheduler_task(REDUCE, wid);
		}
	    }
	    else if (message_is (msg, SMS_TASK_DONE))
	    {
		ti = (task_info_t) MSG_task_get_data (msg);

		if (job.task_status[ti->phase][ti->id] != T_STATUS_DONE)
		{
		    job.task_status[ti->phase][ti->id] = T_STATUS_DONE;
		    finish_all_task_copies (ti);
		    job.tasks_pending[ti->phase]--;
		    if (job.tasks_pending[ti->phase] <= 0)
		    {
			XBT_INFO (" ");
			XBT_INFO ("%s PHASE DONE", (ti->phase==MAP?"MAP":"REDUCE"));
			XBT_INFO (" ");
		    }
                    ti->finished_time = MSG_get_clock();
                    ti->elapsed_time = ti->finished_time - ti->start_time;

                    total_task_time += ti->elapsed_time;
                    total_cpu_time  += ti->cpu_time;
		}
		xbt_free_ref (&ti);
	    }
	    MSG_task_destroy (msg);
	}
    }

    fclose (tasks_log);

    job.finished = 1;

    print_config ();
    print_stats ();

    XBT_INFO ("JOB END");
    XBT_INFO ("\tclock_time: %f", MSG_get_clock());
    XBT_INFO ("\ttotal_task_time: %f(%f)", total_task_time, total_task_time / MSG_get_clock());
    XBT_INFO ("\ttotal_cpu_time: %f(%f)", total_cpu_time, total_cpu_time / MSG_get_clock());

    return 0;
}
Example #15
0
  void init(const string & pattern) {
    // load miu
    auto lines = paracel_loadall(input_miu);
    auto temp = paracel::str_split(lines[1], '\t');
    miu = std::stod(temp[1]);
    
    // load item bias
    lines = paracel_loadall(input_ibias); 
    auto local_ibias_parser = [&] (const vector<string> & linelst,
                                   const char sep = '\t') {
      for(auto & line : linelst) {
        auto v = paracel::str_split(line, sep);
        ibias[v[0]] = std::stod(v[1]);
      }
    };
    local_ibias_parser(lines, '\t');
    lines.resize(0);
    std::cout << "print: " << ibias.size() << std::endl;

/*
    // load some of ifactor
    lines = paracel_load(input_ifac);
    auto local_ifac_parser = [&] (const vector<string> & linelst,
                                  const char sep1 = '\t',
                                  const char sep2 = '|') {
      auto tmp1 = paracel::str_split(linelst[0], sep1);
      auto tmp2 = paracel::str_split(tmp1[1], sep2);
      // init fac_dim
      fac_dim = tmp2.size();
      
      for(auto & line : linelst) {
        vector<double> tmp;
        auto v = paracel::str_split(line, sep1);
        auto vv = paracel::str_split(v[1], sep2);
        for(size_t i = 0; i < vv.size(); ++i) {
          tmp.push_back(std::stod(vv[i]));
        }
        ifactor[v[0]] = tmp;
      }
    };
    local_ifac_parser(lines, '\t', '|');
    lines.resize(0);
*/

    // init global ifactor
    if(get_worker_id() == 0) {
      auto handler_lambda = [&] (const vector<string> & linelst) {
        for(auto & line : linelst) {
          vector<double> tmp;
          auto v = paracel::str_split(line, '\t');
          auto vv = paracel::str_split(v[1], '|');
          for(size_t i = 0;i < vv.size(); ++i) {
            tmp.push_back(std::stod(vv[i]));
          }
          ifactor[v[0]] = tmp;
          paracel_write(v[0] + "_ifactor", tmp); // key: "iid_ifactor"
        }
      };
      paracel_sequential_loadall(input_ifac, handler_lambda);
    }
    sync();
    ifactor.clear();
    
    // load bigraph
    auto local_rating_parser = [] (const std::string & line) {
      return paracel::str_split(line, ',');
    };
    auto rating_parser = paracel::gen_parser(local_rating_parser);
    paracel_load_as_graph(rating_graph, 
                          input_rating, 
                          rating_parser, 
                          pattern);
    // split bigraph into user rating list
    auto split_lambda = [&] (const std::string & a,
                             const std::string & b,
                             double c) {
      // default fmap: first dim is uid
      usr_rating_lst[a].push_back(
          std::make_pair(b, c)
          );
    };
    rating_graph.traverse(split_lambda);
    std::cout << "traverse done" << std::endl;
    
    // init ufactor with specified ufac 
    auto select_lambda = [&] (const vector<string> & linelst) {
      auto tmp1 = paracel::str_split(linelst[0], '\t');
      auto tmp2 = paracel::str_split(tmp1[1], '|');
      // init fac_dim
      fac_dim = tmp2.size();
      for(auto & line : linelst) {
        vector<double> tmp;
        auto v = paracel::str_split(line, '\t');
        if(usr_rating_lst.count(v[0]) == 0) { continue; }
        auto vv = paracel::str_split(v[1], '|');
        for(size_t i = 0; i < vv.size(); ++i) {
          tmp.push_back(std::stod(vv[i]));
        }
        ufactor[v[0]] = tmp;
      }
    }; // select_lambda
    // load started user factor
    paracel_sequential_loadall(input_ufac, select_lambda);
    std::cout << "load ufactor done" << ufactor.size() << "|" << std::endl;

    // init ubias with specified ubias
    auto filter_lambda = [&] (const vector<string> & linelst) {
      for(auto & line : linelst) {
        auto v = paracel::str_split(line, '\t');
        string uid = v[0];
        if(usr_rating_lst.count(uid) == 0) { continue; }
        ubias[uid] = std::stod(v[1]);
      }
    };
    // load started user bias
    paracel_sequential_loadall(input_ubias, filter_lambda);
    std::cout << "load ubias done" << ubias.size() << std::endl;
    
    // resize ufactor/ubias here, with no ufac specified
    for(auto & kv : usr_rating_lst) {
      if(ufactor.count(kv.first) == 0) {
        ufactor[kv.first] = paracel::random_double_list(fac_dim, 0.001);
      }
      if(ubias.count(kv.first) == 0) {
        ubias[kv.first] = 0.001 * paracel::random_double();
      }
    }

  }
Example #16
0
  void init_paras() {
    auto local_parser = [] (const std::string & line) {
      return paracel::str_split(line, ',');
    };
    auto f_parser = paracel::gen_parser(local_parser);
    paracel_load_as_graph(local_graph, input, f_parser, "fmap");
    if(get_worker_id() == 0) std::cout << "load done" << std::endl;

    auto cnt_lambda = [&] (const node_t & a,
                           const node_t & b,
                           double c) {
      if(!kvmap.count(a)) {
        kvmap[a] = 1.;
      } else {
        kvmap[a] += 1.;
      }
    };
    local_graph.traverse(cnt_lambda);
    
    // make sure there are no same pieces
    // generate kv + local combine
    auto kvinit_lambda = [&] (const node_t & a,
                              const node_t & b,
                              double c) {
      klstmap[b].push_back(std::make_pair(a, kvmap[a]));
    };
    local_graph.traverse(kvinit_lambda);
    if(get_worker_id() == 0) std::cout << "stat done" << std::endl;

    // init push to construct global connect info
    std::unordered_map<std::string,
        std::vector<std::pair<node_t, double> > > klstmap_tmp;
    for(auto & kv : klstmap) {
      if(kv.first == SENTINEL) continue; // little tricky here
      klstmap_tmp[paracel::cvt(kv.first) + "_links"] = kv.second;
    }
    paracel_bupdate_multi(klstmap_tmp,
                          handle_file,
                          update_function);
    if(get_worker_id() == 0) std::cout << "first bupdate done" << std::endl;
    paracel_sync();

    // read connect info only once
    klstmap.clear();
    for(auto & kv : kvmap) {
      // notice: limit memory here
      paracel_read<std::vector<std::pair<node_t, double> > >
          (paracel::cvt(kv.first) + "_links",
           klstmap[kv.first]);
    }
    if(get_worker_id() == 0) std::cout << "first read done" << std::endl;

    // reuse kvmap to store pr
    // init pr with 1. / total_node_sz
    auto worker_comm = get_comm();
    long node_sz = kvmap.size();
    worker_comm.allreduce(node_sz);
    double init_val = 1. / node_sz;
    std::unordered_map<std::string, double> tmp;
    for(auto & kv : kvmap) {
      kvmap[kv.first] = init_val; 
      tmp[paracel::cvt(kv.first) + "_pr"] = init_val;
    }
    paracel_write_multi(tmp);
    paracel_sync();
  }
Example #17
0
void logistic_regression::dump_result() {
    if(get_worker_id() == 0) {
        paracel_dump_vector(theta, "lg_theta_", "|");
        paracel_dump_vector(loss_error, "lg_loss_error_", "\n");
    }
}