Example #1
0
    void recv(T& elem, const size_t id, const int tag = 0) {
#ifdef HAS_MPI
      // Get the mpi rank and size
      assert(id < size());

      int recv_buffer_size(-1);
      int dest(id);
      MPI_Status status;
      // recv the size
      int error = MPI_Recv(&recv_buffer_size,
                           1,
                           MPI_INT,
                           dest,
                           tag,
                           MPI_COMM_WORLD,
                           &status);
      assert(error == MPI_SUCCESS);
      assert(recv_buffer_size > 0);

      std::vector<char> recv_buffer(recv_buffer_size);
      // recv the actual content
      error = MPI_Recv(&(recv_buffer[0]),
                       recv_buffer_size,
                       MPI_BYTE,
                       dest,
                       tag,
                       MPI_COMM_WORLD,
                       &status);
      assert(error == MPI_SUCCESS);
      // deserialize
      // Update the local map
      namespace bio = boost::iostreams;
      typedef bio::stream<bio::array_source> icharstream;
      icharstream strm(&(recv_buffer[0]), recv_buffer.size());
      graphlab::iarchive iarc(strm);
      iarc >> elem;
#else
      logstream(LOG_FATAL) << "MPI not installed!" << std::endl;
#endif
    }
std::vector<sgraph_edge_data> graph_pylambda_evaluator::eval_triple_apply(
    const std::vector<sgraph_edge_data>& all_edge_data,
    size_t src_partition, size_t dst_partition,
    const std::vector<size_t>& mutated_edge_field_ids) {

  std::lock_guard<mutex> lg(m_mutex);
  
  logstream(LOG_INFO) << "graph_lambda_worker eval triple apply " << src_partition 
                      << ", " << dst_partition << std::endl;
  
  DASSERT_TRUE(is_loaded(src_partition));
  DASSERT_TRUE(is_loaded(dst_partition));

  auto& source_partition = m_graph_sync.get_partition(src_partition);
  auto& target_partition = m_graph_sync.get_partition(dst_partition);

  std::vector<std::string> mutated_edge_keys;
  for (size_t fid: mutated_edge_field_ids) {
    mutated_edge_keys.push_back(m_edge_keys[fid]);
  }

  std::vector<sgraph_edge_data> ret(all_edge_data.size());

  lambda_graph_triple_apply_data lgt;

  lgt.all_edge_data = &all_edge_data;
  lgt.out_edge_data = &ret;
  lgt.source_partition = &source_partition;
  lgt.target_partition = &target_partition;
  lgt.vertex_keys = &m_vertex_keys;
  lgt.edge_keys = &m_edge_keys;
  lgt.mutated_edge_keys = &mutated_edge_keys;
  lgt.srcid_column = m_srcid_column;
  lgt.dstid_column = m_dstid_column;

  evaluation_functions.eval_graph_triple_apply(m_lambda_id, &lgt);
  python::check_for_python_exception();
  
  return ret;
}
Example #3
0
/** load a matrix market file into a matrix */
void load_matrix_market_matrix(const std::string & filename, int offset, int D){
  MM_typecode matcode;                        
  uint i,I,J;
  double val;
  uint rows, cols;
  size_t nnz;
  FILE * f = open_file(filename.c_str() ,"r");
  int rc = mm_read_banner(f, &matcode); 
  if (rc != 0)
    logstream(LOG_FATAL)<<"Failed to load matrix market banner in file: " << filename << std::endl;

  if (mm_is_sparse(matcode)){
    int rc = mm_read_mtx_crd_size(f, &rows, &cols, &nnz);
    if (rc != 0)
      logstream(LOG_FATAL)<<"Failed to load matrix market banner in file: " << filename << std::endl;
  }
  else { //dense matrix
    rc = mm_read_mtx_array_size(f, &rows, &cols);
    if (rc != 0)
      logstream(LOG_FATAL)<<"Failed to load matrix market banner in file: " << filename << std::endl;
    nnz = rows * cols;
  }

  for (i=0; i<nnz; i++){
    if (mm_is_sparse(matcode)){
      rc = fscanf(f, "%u %u %lg\n", &I, &J, &val);
      if (rc != 3)
        logstream(LOG_FATAL)<<"Error reading input line " << i << std::endl;
      I--; J--;
      assert(I >= 0 && I < rows);
      assert(J >= 0 && J < cols);
      //set_val(a, I, J, val);
      latent_factors_inmem[I+offset].pvec[J] = val;
    }
    else {
      rc = fscanf(f, "%lg", &val);
      if (rc != 1)
        logstream(LOG_FATAL)<<"Error reading nnz " << i << std::endl;
      I = i / D;
      J = i % cols;
      //set_val(a, I, J, val);
      latent_factors_inmem[I+offset].pvec[J] = val;
    }
  }
  logstream(LOG_INFO) << "Factors from file: loaded matrix of size " << rows << " x " << cols << " from file: " << filename << " total of " << nnz << " entries. "<< i << std::endl;

}
Example #4
0
 void thread_pool::set_cpu_affinity(bool affinity) {
   if (affinity != cpu_affinity) {
     cpu_affinity = affinity;
     // stop the queue from blocking
     spawn_queue.stop_blocking();
   
     // join the threads in the thread group
     while(1) {
       try {
         threads.join(); break;
       } catch (const char* c) {
         // this should not be possible!
         logstream(LOG_FATAL) 
           << "Unexpected exception caught in thread pool destructor: " 
           << c << std::endl;
         // ASSERT_TRUE(false); // unnecessary
       }
     }
     spawn_queue.start_blocking();
     spawn_thread_group();
   }
 } // end of set_cpu_affinity
 void make_pds_constraint() {
   int p = 0;
   if (!is_pds_compatible(nshards, p)) {
     logstream(LOG_FATAL) << "Num shards: " << nshards << " cannot be used for pdsingress." << std::endl;
   };
   pds pds_generator;
   std::vector<size_t> results;
   if (p == 1) {
     results.push_back(0);
     results.push_back(2);
   } else {
     results = pds_generator.get_pds(p);
   }
   for (size_t i = 0; i < nshards; i++) {
     std::vector<procid_t> adjlist;
     for (size_t j = 0; j < results.size(); j++) {
       adjlist.push_back( (results[j] + i) % nshards);
     }
     std::sort(adjlist.begin(), adjlist.end());
     constraint_graph.push_back(adjlist);
   }
 }
Example #6
0
bool process::kill(bool async) {
  if(!m_launched)
    log_and_throw("No process launched!");

  if(m_proc_handle != NULL) {
    BOOL ret = TerminateProcess(m_proc_handle, 1);
    auto err_code = GetLastError();
    if(!async)
      WaitForSingleObject(m_proc_handle, 10000);
    CloseHandle(m_proc_handle);
    m_proc_handle = NULL;

    if(!ret) {
      logstream(LOG_INFO) << get_last_err_str(err_code);
      return false;
    }

    return true;
  }

  return false;
}
Example #7
0
distributed_control::~distributed_control() {
  // detach the instance
  last_dc = NULL;
  last_dc_procid = 0;
  distributed_services->full_barrier();
  logstream(LOG_INFO) << "Shutting down distributed control " << std::endl;
  FREE_CALLBACK_EVENT(EVENT_NETWORK_BYTES);
  FREE_CALLBACK_EVENT(EVENT_RPC_CALLS);
  // call all deletion callbacks
  for (size_t i = 0; i < deletion_callbacks.size(); ++i) {
    deletion_callbacks[i]();
  }

  size_t bytessent = bytes_sent();
  for (size_t i = 0;i < senders.size(); ++i) {
    senders[i]->flush();
  }

  comm->close();

  for (size_t i = 0;i < senders.size(); ++i) {
    delete senders[i];
  }
  senders.clear();

  pthread_key_delete(dc_impl::thrlocal_sequentialization_key);
  pthread_key_delete(dc_impl::thrlocal_send_buffer_key);

  size_t bytesreceived = bytes_received();
  for (size_t i = 0;i < receivers.size(); ++i) {
    receivers[i]->shutdown();
    delete receivers[i];
  }
  receivers.clear();
  // shutdown function call handlers
  for (size_t i = 0;i < fcallqueue.size(); ++i) fcallqueue[i].stop_blocking();
  fcallhandlers.join();
  logstream(LOG_INFO) << "Bytes Sent: " << bytessent << std::endl;
  logstream(LOG_INFO) << "Calls Sent: " << calls_sent() << std::endl;
  logstream(LOG_INFO) << "Network Sent: " << network_bytes_sent() << std::endl;
  logstream(LOG_INFO) << "Bytes Received: " << bytesreceived << std::endl;
  logstream(LOG_INFO) << "Calls Received: " << calls_received() << std::endl;

  delete comm;

}
  void fiber_async_consensus::cancel() {
    /*
      Assertion: numactive > 0 if there is work to do.
      If there are fibers trying to sleep, lets wake them up
    */
    if (trying_to_sleep > 0 || numactive < ncpus) {
      m.lock();
      size_t oldnumactive = numactive;
      // once I acquire this lock, all fibers must be
      // in the following states
      // 1: still running and has not reached begin_critical_section()
      // 2: is sleeping in cond.wait()
      // 3: has called begin_critical_section() but has not acquired
      //    the mutex
      // In the case of 1,3: These fibers will perform one more sweep
      // of their task queues. Therefore they will see any new job if available
      // in the case of 2: numactive must be < ncpus since numactive
      // is mutex protected. Then I can wake them up by
      // clearing their sleeping flags and broadcasting.
      if (numactive < ncpus) {
        // this is safe. Note that it is done from within 
        // the critical section.
        for (size_t i = 0;i < ncpus; ++i) {
          numactive += sleeping[i];
          if (sleeping[i]) {
            sleeping[i] = 0;
            // this here was basically cond[i].signal();
            if (cond[i] != 0) fiber_control::schedule_tid(cond[i]);
          }
        }
        if (oldnumactive == 0 && !done) {
          logstream(LOG_INFO) << rmi.procid() << ": Waking" << std::endl;
        }

      }
      m.unlock();
    }
  }
    static VARIABLE_IS_NOT_USED void metrics_report(metrics &m) {
        std::string reporters = get_option_string("metrics.reporter", "console");
        char * creps = (char*)reporters.c_str();
        const char * delims = ",";
        char * t = strtok(creps, delims);

        while(t != NULL) {            
            std::string repname(t);
            if (repname == "basic" || repname == "console") {
                basic_reporter rep;
                m.report(rep);
            } else if (repname == "file") {
                file_reporter rep(get_option_string("metrics.reporter.filename", "metrics.txt"));
                m.report(rep);
            } else if (repname == "html") {
                html_reporter rep(get_option_string("metrics.reporter.htmlfile", "metrics.html"));
                m.report(rep);
            } else {
                logstream(LOG_WARNING) << "Could not find metrics reporter with name [" << repname << "], ignoring." << std::endl;
            }
            t = strtok(NULL, delims);
        }
    }
Example #10
0
 binary_adjacency_list_writer(std::string filename) : filename(filename) {
     bufsize = (int) get_option_int("preprocessing.bufsize", 64 * 1024 * 1024);
     assert(bufsize > 1024 * 1024);
     fd = open(filename.c_str(), O_WRONLY | O_CREAT, S_IROTH | S_IWOTH | S_IWUSR | S_IRUSR);
     if (fd < 0) {
         logstream(LOG_FATAL) << "Could not open file " << filename << " for writing. " <<
         " Error: " << strerror(errno) << std::endl;
     }
     
     header.format_version = FORMAT_VERSION;
     header.max_vertex_id = 0;
     header.contains_edge_values = false;
     header.numedges = 0;
     header.edge_value_size = (uint32_t) sizeof(EdgeDataType);
     
     buf = (char*) malloc(bufsize);
     bufptr = buf;
     bwrite<bin_adj_header>(fd, buf, bufptr,  header);
     counter = 0;
     lastid = 0;
     initialized = false;
     assert(fd >= 0);
 }
Example #11
0
 void transform_vertices(GraphType& g,
                         TransformType transform_functor,
                         const vertex_set vset = GraphType::complete_set()) {
   typedef typename GraphType::vertex_type vertex_type;
   if(!g.is_finalized()) {
     logstream(LOG_FATAL)
         << "\n\tAttempting to call graph.transform_vertices(...)"
         << "\n\tbefore finalizing the graph."
         << std::endl;
   }
   g.dc().barrier();
   size_t ibegin = 0;
   size_t iend = g.num_local_vertices();
   parallel_for (ibegin, iend, [&](size_t i) {
       auto lvertex = g.l_vertex(i);
       if (lvertex.owned() && vset.l_contains(lvid_type(i))) {
         vertex_type vtx(lvertex);
         transform_functor(vtx);
       }
     });
   g.dc().barrier();
   g.synchronize();
 }
Example #12
0
        sliding_shard(stripedio * iomgr, std::string _filename_edata, std::string _filename_adj, vid_t _range_st, vid_t _range_en, size_t _blocksize, metrics &_m,
                      bool _disable_writes=false, bool onlyadj = false) :
        iomgr(iomgr),
        filename_edata(_filename_edata),
        filename_adj(_filename_adj),
        range_st(_range_st),
        range_end(_range_en),
        blocksize(_blocksize),
        m(_m),
        disable_writes(_disable_writes) {
            curvid = 0;
            adjoffset = 0;
            edataoffset = 0;
            disable_writes = false;
            only_adjacency = onlyadj;
            curblock = NULL;
            curadjblock = NULL;
            window_start_edataoffset = 0;
            disable_async_writes = false;
            
            while(blocksize % sizeof(int) != 0) blocksize++;
            assert(blocksize % sizeof(int)==0);
            
            adjfilesize = get_filesize(filename_adj);
            edatafilesize = get_shard_edata_filesize<int>(filename_edata);
            if (!only_adjacency) {
                logstream(LOG_DEBUG) << "Total edge data size: " << edatafilesize << std::endl;
            } else {
                // Nothing
            }
            
            adjfile_session = iomgr->open_session(filename_adj, true);
            save_offset();
            
            async_edata_loading = false; // With dynamic edge data size, do not load

        }
Example #13
0
float time_svdpp_predict(const time_svdpp_usr & usr, 
    const time_svdpp_movie & mov, 
    const time_svdpp_time & ptime,
    const float rating, 
    double & prediction){

  //prediction = global_mean + user_bias + movie_bias
  double pui  = globalMean + *usr.bu + *mov.bi;
  for(int k=0;k<D;k++){
    // + user x movie factors 
    pui += (usr.ptemp[k] * mov.q[k]);
    // + user x time factors
    pui += usr.x[k] * ptime.z[k];
    // + user x time x movies factors
    pui += usr.pu[k] * ptime.pt[k] * mov.q[k];
  }
  pui = std::min(pui,maxval);
  pui = std::max(pui,minval);
  prediction = pui;
  if (std::isnan(prediction))
    logstream(LOG_FATAL)<<"Got into numerical errors! Try to decrease --lrate, --gamma, --beta" <<std::endl;
  float err = rating - prediction;
  return err*err;
}
    void make_grid_constraint() {
      int ncols, nrows;
      if (!is_grid_compatible(nshards, nrows, ncols)) {
        logstream(LOG_FATAL) << "Num shards: " << nshards << " cannot be used for grid ingress." << std::endl;
      };

      for (size_t i = 0; i < nshards; i++) {
        std::vector<procid_t> adjlist;
        // add self
        adjlist.push_back(i);

        // add the row of i
        size_t rowbegin = (i/ncols) * ncols;
        for (size_t j = rowbegin; j < rowbegin + ncols; ++j)
          if (i != j) adjlist.push_back(j); 

        // add the col of i
        for (size_t j = i % ncols; j < nshards; j+=ncols)
          if (i != j) adjlist.push_back(j); 

        std::sort(adjlist.begin(), adjlist.end());
        constraint_graph.push_back(adjlist);
      }
    }
Example #15
0
  /**
   * Grab pivot's adjacency list into memory.
   */
  int load_edges_into_memory(CE_Graph_vertex<VertexDataType, EdgeDataType> &v) {
    //assert(is_pivot(v.id()));
    //assert(is_item(v.id()));

    int num_edges = v.num_edges();
    //not enough user rated this item, we don't need to compare to it
    if (num_edges < min_allowed_intersection){
      if (debug)
        logstream(LOG_DEBUG)<<"Skipping since num edges: " << num_edges << std::endl;
      return 0;
    }


    // Count how many neighbors have larger id than v
    dense_adj dadj;
    for(int i=0; i<num_edges; i++) 
      set_new( dadj.edges, v.edge(i)->vertex_id(), v.edge(i)->get_data());

    //std::sort(&dadj.adjlist[0], &dadj.adjlist[0] + num_edges);
    adjs[v.id() - pivot_st] = dadj;
    assert(v.id() - pivot_st < adjs.size());
    __sync_add_and_fetch(&grabbed_edges, num_edges /*edges_to_larger_id*/);
    return num_edges;
  }
Example #16
0
     std::vector<size_t> get_pds(size_t p) {
       std::vector<size_t> result = find_pds(p);
       // verify pdsness
       size_t pdslength = p *p + p + 1;
       std::vector<size_t> count(pdslength, 0);
       for (size_t i = 0;i < result.size(); ++i) {
         for (size_t j = 0;j < result.size(); ++j) {
           if (i == j) continue;
           count[(result[i] - result[j] + pdslength) % pdslength]++;
         }
       }
       bool ispds = true;
       for (size_t i = 1;i < count.size(); ++i) {
         if (count[i] != 1) ispds = false;
       }

       // If success, return the result, else, return empty vector.
       if (ispds) {
         return result;
       } else {
         logstream(LOG_ERROR) << "Fail to generate pds for p = " << p << std::endl;
         return std::vector<size_t>();
       }
     }
distributed_glshared_manager::distributed_glshared_manager(distributed_control &dc):
                  rmi(dc, this),
                  glsharedobjs(distgl_impl::get_global_dist_glshared_registry()),
                  dht(dc){
  dht.attach_modification_trigger(boost::bind(&distributed_glshared_manager::invalidate,
                                              this, _1, _2, _3));
  for (size_t i = 0; i < glsharedobjs.size(); ++i) {
    logstream(LOG_INFO) << "registered entry " << i << " with type " 
                        << glsharedobjs[i]->type_name() << std::endl;
    if (glsharedobjs[i]->manager != NULL) {
      logger(LOG_WARNING, "glshared objects are still attached to a previous manager!");
    }
    glsharedobjs[i]->manager = this;
    glsharedobjs[i]->id = i;
    objrevmap[glsharedobjs[i]] = i;
    if (dht.owning_machine(i) == rmi.procid()) {
      std::stringstream strm;
      oarchive oarc(strm);
      glsharedobjs[i]->save(oarc);
      dht.set(i, strm.str());
    }
  }
  // perform the sets
}
Example #18
0
 void save_format(GraphType& g, 
                  const std::string& prefix, const std::string& format,
                  bool gzip = true, size_t files_per_machine = 4) {
   if (prefix.length() == 0)
     return;
   if (format == "snap" || format == "tsv") {
     save(g, prefix, builtin_parsers::tsv_writer<GraphType>(),
          gzip, false, true, files_per_machine);
   } else if (format == "graphjrl") {
     save(g, prefix, builtin_parsers::graphjrl_writer<GraphType>(),
          gzip, true, true, files_per_machine);
   } else if (format == "bin") {
     save_binary(g, prefix);
   } 
   // else if (format == "bintsv4") {
   //   save_direct(prefix, gzip, &graph_type::save_bintsv4_to_stream);
   // }
   else {
     logstream(LOG_ERROR)
         << "Unrecognized Format \"" << format << "\"!" << std::endl;
     throw(std::string("Unrecognized Format \"" + format + "\""));
     return;
   }
 } // end of save structure
Example #19
0
 bool check_origfile_modification_earlier(std::string basefilename, int nshards) {
     /* Compare last modified dates of the original graph and the shards */
     if (file_exists(basefilename) && get_option_int("disable-modtime-check", 0) == 0) {
         struct stat origstat, shardstat;
         int err1 = stat(basefilename.c_str(), &origstat);
         
         std::string adjfname = filename_shard_adj(basefilename, 0, nshards);
         int err2 = stat(adjfname.c_str(), &shardstat);
         
         if (err1 != 0 || err2 != 0) {
             logstream(LOG_ERROR) << "Error when checking file modification times:  " << strerror(errno) << std::endl;
             return nshards;
         }
         
         if (origstat.st_mtime > shardstat.st_mtime) {
             logstream(LOG_INFO) << "The input graph modification date was newer than of the shards." << std::endl;
             logstream(LOG_INFO) << "Going to delete old shards and recreate new ones. To disable " << std::endl;
             logstream(LOG_INFO) << "functionality, specify --disable-modtime-check=1" << std::endl;
             
             // Delete shards
             delete_shards<EdgeDataType>(basefilename, nshards);
             
             // Delete the bin-file
             std::string preprocfile = preprocess_filename<EdgeDataType>(basefilename);
             if (file_exists(preprocfile)) {
                 logstream(LOG_DEBUG) << "Deleting: " << preprocfile << std::endl;
                 int err = remove(preprocfile.c_str());
                 if (err != 0) {
                     logstream(LOG_ERROR) << "Error deleting file: " << preprocfile << ", " <<
                     strerror(errno) << std::endl;
                 }
             }
             return false;
         } else {
             return true;
         }
     
         
     }
     return true;
 }
Example #20
0
distributed_control::~distributed_control() {
  distributed_services->full_barrier();
  if(last_dc_procid==0)
  	logstream(LOG_INFO) << "Shutting down distributed control " << std::endl;
  FREE_CALLBACK_EVENT(EVENT_NETWORK_BYTES);
  FREE_CALLBACK_EVENT(EVENT_RPC_CALLS);
  // call all deletion callbacks
  for (size_t i = 0; i < deletion_callbacks.size(); ++i) {
    deletion_callbacks[i]();
  }

  size_t bytessent = bytes_sent();
  for (size_t i = 0;i < senders.size(); ++i) {
    senders[i]->flush();
  }

  comm->close();

  for (size_t i = 0;i < senders.size(); ++i) {
    delete senders[i];
  }
  size_t bytesreceived = bytes_received();
  for (size_t i = 0;i < receivers.size(); ++i) {
    receivers[i]->shutdown();
    delete receivers[i];
  }
  senders.clear();
  receivers.clear();
  // shutdown function call handlers
  for (size_t i = 0;i < fcallqueue.size(); ++i) fcallqueue[i].stop_blocking();
  fcallhandlers.join();
  if(last_dc_procid==0){
  logstream(LOG_INFO) << "Bytes Sent: " << bytessent << std::endl;
  logstream(LOG_INFO) << "Calls Sent: " << calls_sent() << std::endl;
  logstream(LOG_INFO) << "Network Sent: " << network_bytes_sent() << std::endl;
  logstream(LOG_INFO) << "Bytes Received: " << bytesreceived << std::endl;
  logstream(LOG_INFO) << "Calls Received: " << calls_received() << std::endl;
  }
  delete comm;

}
  void fiber_async_consensus::pass_the_token() {
    // note that this function does not acquire the token lock
    // the caller must acquire it 
    assert(hastoken);
    // first check if we are done
    if (cur_token.last_change == rmi.procid() && 
        cur_token.total_calls_received == cur_token.total_calls_sent) {
      logstream(LOG_INFO) << "Completed Token: " 
                          << cur_token.total_calls_received << " " 
                          << cur_token.total_calls_sent << std::endl;
      // we have completed a loop around!
      // broadcast a completion
      for (procid_t i = 0;i < rmi.numprocs(); ++i) {
        if (i != rmi.procid()) {
          rmi.control_call(i,
                           &fiber_async_consensus::force_done);
        }
      }
      // set the complete flag
      // we can't call consensus() since it will deadlock
      done = true;
      // this is the same code as cancel(), but we can't call cancel 
      // since we are holding on to a lock
      if (numactive < ncpus) {
        // this is safe. Note that it is done from within 
        // the critical section.
        for (size_t i = 0;i < ncpus; ++i) {
          numactive += sleeping[i];
          if (sleeping[i]) {
            sleeping[i] = 0;
            // this here is basically cond[i].signal();
            size_t ch = cond[i];
            if (ch != 0) fiber_control::schedule_tid(ch);
          }
        }
      }

    }
    else {
      // update the token
      size_t callsrecv;
      size_t callssent;
    
      if (attachedobj) {
        callsrecv = attachedobj->calls_received();
        callssent = attachedobj->calls_sent();
      }
      else {
        callsrecv = rmi.dc().calls_received();
        callssent = rmi.dc().calls_sent();
      }

      if (callssent != last_calls_sent ||
          callsrecv != last_calls_received) {
        cur_token.total_calls_sent += callssent - last_calls_sent;
        cur_token.total_calls_received += callsrecv - last_calls_received;
        cur_token.last_change = rmi.procid();
      }
      //std::cout << "Sending token: (" << cur_token.total_calls_sent
      //<< ", " << cur_token.total_calls_received << ")" << std::endl;

      last_calls_sent = callssent;
      last_calls_received = callsrecv;
      // send it along.
      hastoken = false;
      /*logstream(LOG_INFO) << "Passing Token " << rmi.procid() << "-->" 
                          << (rmi.procid() + 1) % rmi.numprocs() << ": "
                          << cur_token.total_calls_received << " " 
                          << cur_token.total_calls_sent << std::endl;
*/
      rmi.control_call((procid_t)((rmi.procid() + 1) % rmi.numprocs()),
                       &fiber_async_consensus::receive_the_token,
                       cur_token);
    }
  }
Example #22
0
inline void write_output_vector(const std::string & datafile, const vec& output, bool issparse, std::string comment = ""){

  logstream(LOG_INFO)<<"Going to write output to file: " << datafile << " (vector of size: " << output.size() << ") " << std::endl;
  save_matrix_market_format_vector(datafile, output,issparse, comment); 
}
Example #23
0
vec load_matrix_market_vector(const std::string & filename,  bool optional_field, bool allow_zeros)
{

  int ret_code;
  MM_typecode matcode;
  uint M, N;
  size_t i,nz;

  logstream(LOG_INFO) <<"Going to read matrix market vector from input file: " << filename << std::endl;

  FILE * f = open_file(filename.c_str(), "r", optional_field);
  //if optional file not found return
  if (f== NULL && optional_field){
    return zeros(1);
  }

  if (mm_read_banner(f, &matcode) != 0)
    logstream(LOG_FATAL) << "Could not process Matrix Market banner." << std::endl;

  /*  This is how one can screen matrix types if their application */
  /*  only supports a subset of the Matrix Market data types.      */

  if (mm_is_complex(matcode) && mm_is_matrix(matcode) &&
      mm_is_sparse(matcode) )
    logstream(LOG_FATAL) << "sorry, this application does not support " << std::endl <<
      "Market Market type: " << mm_typecode_to_str(matcode) << std::endl;

  /* find out size of sparse matrix .... */
  if (mm_is_sparse(matcode)){
    if ((ret_code = mm_read_mtx_crd_size(f, &M, &N, &nz)) !=0)
      logstream(LOG_FATAL) << "failed to read matrix market cardinality size " << std::endl;
  }
  else {
    if ((ret_code = mm_read_mtx_array_size(f, &M, &N))!= 0)
      logstream(LOG_FATAL) << "failed to read matrix market vector size " << std::endl;
    if (N > M){ //if this is a row vector, transpose
      int tmp = N;
      N = M;
      M = tmp;
    }
    nz = M*N;
  }

  vec ret = zeros(M);
  uint row,col;
  double val;

  for (i=0; i<nz; i++)
  {
    if (mm_is_sparse(matcode)){
      int rc = fscanf(f, "%u %u %lg\n", &row, &col, &val);
      if (rc != 3){
        logstream(LOG_FATAL) << "Failed reading input file: " << filename << "Problm at data row " << i << " (not including header and comment lines)" << std::endl;
      }
      row--;  /* adjust from 1-based to 0-based */
      col--;
    }
    else {
      int rc = fscanf(f, "%lg\n", &val);
      if (rc != 1){
        logstream(LOG_FATAL) << "Failed reading input file: " << filename << "Problm at data row " << i << " (not including header and comment lines)" << std::endl;
      }
      row = i;
      col = 0;
    }
    //some users have gibrish in text file - better check both I and J are >=0 as well
    assert(row >=0 && row< M);
    assert(col == 0);
    if (val == 0 && !allow_zeros)
      logstream(LOG_FATAL)<<"Zero entries are not allowed in a sparse matrix market vector. Use --zero=true to avoid this error"<<std::endl;
    //set observation value
    ret[row] = val;
  }
  fclose(f);
  logstream(LOG_INFO)<<"Succesfully read a vector of size: " << M << " [ " << nz << "]" << std::endl;
  return ret;
}
Example #24
0
int convert_matrixmarket(std::string base_filename, size_t nodes = 0, size_t edges = 0, int tokens_per_row = 3, int type = TRAINING, int allow_square = true) {
  // Note, code based on: http://math.nist.gov/MatrixMarket/mmio/c/example_read.c
  FILE *f;
  size_t nz;

  /**
   * Create sharder object
   */
  int nshards;
  if ((nshards = find_shards<als_edge_type>(base_filename, get_option_string("nshards", "auto")))) {
    if (check_origfile_modification_earlier<als_edge_type>(base_filename, nshards)) {
      logstream(LOG_INFO) << "File " << base_filename << " was already preprocessed, won't do it again. " << std::endl;
      read_global_mean(base_filename, type);
      return nshards;
    }
  }

  sharder<als_edge_type> sharderobj(base_filename);
  sharderobj.start_preprocessing();

  detect_matrix_size(base_filename, f, type == TRAINING?M:Me, type == TRAINING?N:Ne, nz, nodes, edges, type);
  if (f == NULL){
    if (type == TRAINING){
      logstream(LOG_FATAL)<<"Failed to open training input file: " << base_filename << std::endl;
    }
    else if (type == VALIDATION){
      logstream(LOG_INFO)<<"Validation file: "  << base_filename << " is not found. " << std::endl;
      return -1;
    }
  }

  compute_matrix_size(nz, type);   
  uint I, J;
  double val = 1.0;
  bool active_edge = true;
  int zero_entries = 0;

  for (size_t i=0; i<nz; i++)
    {
      if (tokens_per_row == 3){
        int rc = fscanf(f, "%u %u %lg\n", &I, &J, &val);
        if (rc != 3)
          logstream(LOG_FATAL)<<"Error when reading input file: " << i << std::endl;
        if (val == 0 && ! allow_zeros)
          logstream(LOG_FATAL)<<"Encountered zero edge [ " << I << " " <<J << " 0] in line: " << i << " . Run with --allow_zeros=1 to ignore zero weights." << std::endl;
        else if (val == 0){
           zero_entries++;
           continue;
        }
      }
      else if (tokens_per_row == 2){
        int rc = fscanf(f, "%u %u\n", &I, &J);
        if (rc != 2)
          logstream(LOG_FATAL)<<"Error when reading input file: " << i << std::endl;
      }
      else assert(false);

      if (I ==987654321 || J== 987654321) //hack - to be removed later
        continue;
      I-=(uint)input_file_offset;  /* adjust from 1-based to 0-based */
      J-=(uint)input_file_offset;
      if (I >= M)
        logstream(LOG_FATAL)<<"Row index larger than the matrix row size " << I+1 << " > " << M << " in line: " << i << std::endl;
      if (J >= N)
        logstream(LOG_FATAL)<<"Col index larger than the matrix col size " << J+1 << " > " << N << " in line; " << i << std::endl;
      if (minval != -1e100 && val < minval)
        logstream(LOG_FATAL)<<"Found illegal rating value: " << val << " where min value is: " << minval << std::endl;
      if (maxval != 1e100 && val > maxval)
        logstream(LOG_FATAL)<<"Found illegal rating value: " << val << " where max value is: " << maxval << std::endl;

      active_edge = decide_if_edge_is_active(i, type);

      if (active_edge){
        if (type == TRAINING)
          globalMean += val;
        else globalMean2 += val;
        sharderobj.preprocessing_add_edge(I, (M==N && allow_square)?J:M + J, als_edge_type((float)val));
      } 
    }

    if (type == TRAINING){
      uint toadd = 0;
      if (implicitratingtype == IMPLICIT_RATING_RANDOM)
        toadd = add_implicit_edges(implicitratingtype, sharderobj);
      globalMean += implicitratingvalue * toadd;
      L += toadd;
      globalMean /= L;
      logstream(LOG_INFO) << "Global mean is: " << globalMean << " Now creating shards." << std::endl;
    }
    else {
      globalMean2 /= Le;
      logstream(LOG_INFO) << "Global mean is: " << globalMean2 << " Now creating shards." << std::endl;
    }
    write_global_mean(base_filename, type);
    sharderobj.end_preprocessing();

  if (zero_entries)
     logstream(LOG_WARNING)<<"Found " << zero_entries << " zero edges!" << std::endl; 
  fclose(f);


  logstream(LOG_INFO) << "Now creating shards." << std::endl;

  // Shard with a specified number of shards, or determine automatically if not defined
  nshards = sharderobj.execute_sharding(get_option_string("nshards", "auto"));
  logstream(LOG_INFO) << "Successfully finished sharding for " << base_filename<< std::endl;
  logstream(LOG_INFO) << "Created " << nshards << " shards." << std::endl;

  return nshards;
}
Example #25
0
int convert_matrixmarket_and_item_similarity(std::string base_filename, std::string similarity_file, int tokens_per_row, vec & degrees) {
  FILE *f = NULL, *fsim = NULL;
  size_t nz, nz_sim;
  /**
   * Create sharder object
   */
  int nshards;
  if ((nshards = find_shards<als_edge_type>(base_filename, get_option_string("nshards", "auto")))) {
    if (check_origfile_modification_earlier<als_edge_type>(base_filename, nshards)) {
      logstream(LOG_INFO) << "File " << base_filename << " was already preprocessed, won't do it again. " << std::endl;
      read_global_mean(base_filename, TRAINING);
      return nshards;
    }
  }

  sharder<als_edge_type> sharderobj(base_filename);
  sharderobj.start_preprocessing();

  detect_matrix_size(base_filename, f, M, N, nz);
  if (f == NULL)
    logstream(LOG_FATAL)<<"Failed to open training input file: " << base_filename << std::endl;
  uint N_row = 0 ,N_col = 0;
  detect_matrix_size(similarity_file, fsim, N_row, N_col, nz_sim);
  if (fsim == NULL || nz_sim == 0)
    logstream(LOG_FATAL)<<"Failed to open item similarity input file: " << similarity_file << std::endl;
  if (N_row != N || N_col != N)
    logstream(LOG_FATAL)<<"Wrong item similarity file matrix size: " << N_row <<" x " << N_col << "  Instead of " << N << " x " << N << std::endl;
  L=nz + nz_sim;

  degrees.resize(M+N);

  uint I, J;
  double val = 1.0;
  int zero_entries = 0;
  unsigned int actual_edges = 0;
    logstream(LOG_INFO) << "Starting to read matrix-market input. Matrix dimensions: "
      << M << " x " << N << ", non-zeros: " << nz << std::endl;

    for (size_t i=0; i<nz; i++){
      if (tokens_per_row == 3){
        int rc = fscanf(f, "%u %u %lg\n", &I, &J, &val);
        if (rc != 3)
          logstream(LOG_FATAL)<<"Error when reading input file in line: " << i << std::endl;
        if (val == 0 && ! allow_zeros)
          logstream(LOG_FATAL)<<"Zero weight encountered at input file line: " << i << " . Run with --allow_zeros=1 to ignore zero weights." << std::endl;
        else if (val == 0) { zero_entries++; continue; }
      }
      else if (tokens_per_row == 2){
        int rc = fscanf(f, "%u %u\n", &I, &J);
        if (rc != 2)
          logstream(LOG_FATAL)<<"Error when reading input file: " << i << std::endl;
      }
      else assert(false);

      I-=input_file_offset;  /* adjust from 1-based to 0-based */
      J-=input_file_offset;
      if (I >= M)
        logstream(LOG_FATAL)<<"Row index larger than the matrix row size " << I << " > " << M << " in line: " << i << std::endl;
      if (J >= N)
        logstream(LOG_FATAL)<<"Col index larger than the matrix col size " << J << " > " << N << " in line; " << i << std::endl;
      degrees[J+M]++;
      degrees[I]++;
      if (I< (uint)start_user || I >= (uint)end_user){
         continue;
      }
      sharderobj.preprocessing_add_edge(I, M + J, als_edge_type((float)val, 0));
      //std::cout<<"adding an edge: " <<I << " -> " << M+J << std::endl;
      actual_edges++;
    }

    logstream(LOG_DEBUG)<<"Finished loading " << actual_edges << " ratings from file: " << base_filename << std::endl;

    for (size_t i=0; i<nz_sim; i++){
      if (tokens_per_row == 3){
        int rc = fscanf(fsim, "%u %u %lg\n", &I, &J, &val);
        if (rc != 3)
          logstream(LOG_FATAL)<<"Error when reading input file: " << similarity_file << " line: " << i << std::endl;
      }
      else if (tokens_per_row == 2){
        int rc = fscanf(fsim, "%u %u\n", &I, &J);
        if (rc != 2)
          logstream(LOG_FATAL)<<"Error when reading input file: " << i << std::endl;
      }
      else assert(false);

      I-=input_file_offset;  /* adjust from 1-based to 0-based */
      J-=input_file_offset;
      if (I >= N)
        logstream(LOG_FATAL)<<"Row index larger than the matrix row size " << I << " > " << M << " in line: " << i << std::endl;
      if (J >= N)
        logstream(LOG_FATAL)<<"Col index larger than the matrix col size " << J << " > " << N << " in line; " << i << std::endl;
      if (I == J)
        logstream(LOG_FATAL)<<"Item similarity to itself found for item " << I << " in line; " << i << std::endl;
      //std::cout<<"Adding an edge between "<<M+I<< " : " << M+J << "  " << (I<J)  << " " << val << std::endl; 
      sharderobj.preprocessing_add_edge(M+I, M+J, als_edge_type(I < J? val: 0, I>J? val: 0));
      actual_edges++;
    }

    L = actual_edges;
    logstream(LOG_DEBUG)<<"Finished loading " << nz_sim << " ratings from file: " << similarity_file << std::endl;
    write_global_mean(base_filename, TRAINING);
    sharderobj.end_preprocessing();

    if (zero_entries)
      logstream(LOG_WARNING)<<"Found " << zero_entries << " edges with zero weight!" << std::endl;
    
  fclose(f);
  fclose(fsim);


  logstream(LOG_INFO) << "Now creating shards." << std::endl;

  // Shard with a specified number of shards, or determine automatically if not defined
  nshards = sharderobj.execute_sharding(get_option_string("nshards", "auto"));
  logstream(LOG_INFO) << "Successfully finished sharding for " << base_filename << std::endl;
  logstream(LOG_INFO) << "Created " << nshards << " shards." << std::endl;

  return nshards;
}
Example #26
0
int convert_matrixmarket4(std::string base_filename, bool add_time_edges = false, bool square = false, int type = TRAINING, int matlab_time_offset = 1) {
  // Note, code based on: http://math.nist.gov/MatrixMarket/mmio/c/example_read.c
  FILE *f = NULL;
  size_t nz;
  /**
   * Create sharder object
   */
  int nshards;
  if ((nshards = find_shards<als_edge_type>(base_filename, get_option_string("nshards", "auto")))) {

    if (check_origfile_modification_earlier<als_edge_type>(base_filename, nshards)) {
      logstream(LOG_INFO) << "File " << base_filename << " was already preprocessed, won't do it again. " << std::endl;
      read_global_mean(base_filename, type);
    }
    if (type == TRAINING)
      time_nodes_offset = M+N;
     return nshards;
  }

  sharder<als_edge_type> sharderobj(base_filename);
  sharderobj.start_preprocessing();


  detect_matrix_size(base_filename, f, type == TRAINING? M:Me, type == TRAINING? N:Ne, nz);
  if (f == NULL){
    if (type == VALIDATION){
      logstream(LOG_INFO)<< "Did not find validation file: " << base_filename << std::endl;
      return -1;
    }
    else if (type == TRAINING)
      logstream(LOG_FATAL)<<"Failed to open training input file: " << base_filename << std::endl;
  }

  if (type == TRAINING)
     time_nodes_offset = M+N;
 
 compute_matrix_size(nz, type); 

  uint I, J;
  double val, time;
  bool active_edge = true;

    for (size_t i=0; i<nz; i++)
    {
      int rc = fscanf(f, "%d %d %lg %lg\n", &I, &J, &time, &val);
      if (rc != 4)
        logstream(LOG_FATAL)<<"Error when reading input file - line " << i << std::endl;
      if (time < 0)
        logstream(LOG_FATAL)<<"Time (third columns) should be >= 0 " << std::endl;
      I-=input_file_offset;  /* adjust from 1-based to 0-based */
      J-=input_file_offset;
      if (I >= M)
        logstream(LOG_FATAL)<<"Row index larger than the matrix row size " << I << " > " << M << " in line: " << i << std::endl;
      if (J >= N)
        logstream(LOG_FATAL)<<"Col index larger than the matrix col size " << J << " > " << N << " in line; " << i << std::endl;
      K = std::max((int)time, (int)K);
      time -= matlab_time_offset;
      if (time < 0 && add_time_edges)
        logstream(LOG_FATAL)<<"Time bins should be >= " << matlab_time_offset << " in row " << i << std::endl;

      //only for tensor ALS we add edges between user and time bin and also item and time bin
      //time bins are numbered beteen M+N to M+N+K
      if (!weighted_als)
         time += time_nodes_offset;

      //avoid self edges
      if (square && I == J)
        continue;

      active_edge = decide_if_edge_is_active(i, type);

      if (active_edge){
        if (type == TRAINING)
        globalMean += val;
        else globalMean2 += val;
        sharderobj.preprocessing_add_edge(I, (square? J : (M + J)), als_edge_type(val, time));
      }
      //in case of a tensor, add besides of the user-> movie edge also
      //time -> user and time-> movie edges
      if (add_time_edges){
        sharderobj.preprocessing_add_edge((uint)time, I, als_edge_type(val, M+J));
        sharderobj.preprocessing_add_edge((uint)time, M+J , als_edge_type(val, I));
      }
    }

    if (type == TRAINING){
      uint toadd = 0;
      if (implicitratingtype == IMPLICIT_RATING_RANDOM)
        toadd = add_implicit_edges4(implicitratingtype, sharderobj);
      globalMean += implicitratingvalue * toadd;
      L += toadd;
      globalMean /= L;
      logstream(LOG_INFO) << "Global mean is: " << globalMean << " time bins: " << K << " . Now creating shards." << std::endl;
    }
    else {
      globalMean2 /= Le;
      logstream(LOG_INFO) << "Global mean is: " << globalMean2 << " time bins: " << K << " . Now creating shards." << std::endl;
    }
    write_global_mean(base_filename, type);

    sharderobj.end_preprocessing();

 
  fclose(f);
  logstream(LOG_INFO) << "Now creating shards." << std::endl;

  // Shard with a specified number of shards, or determine automatically if not defined
  nshards = sharderobj.execute_sharding(get_option_string("nshards", "auto"));

  return nshards;
}
Example #27
0
 void parse(T &x, const char * s) {
     logstream(LOG_FATAL) << "You need to define parse<your-type>(your-type &x, const char *s) function"
     << " to support parsing the edge value." << std::endl;
     assert(false);
 }
Example #28
0
 virtual void add_task(vid_t vid) {
     if (nwarnings++ % 10000 == 0) {
         logstream(LOG_WARNING) << "Tried to add task to scheduler, but scheduling was not enabled!" << std::endl;
     } 
 }
Example #29
0
void f() {
    LogStream logstream("test.log",New);
    enableModes(logstream);
    LOG(logstream,Debug) << "Debug message from a threaded function.\n";
    FLUSH(logstream);
}
Example #30
0
    static void delete_shards(std::string base_filename, int nshards) {
#ifdef DYNAMICEDATA
        typedef int EdgeDataType;
#else
        typedef EdgeDataType_ EdgeDataType;
#endif
        logstream(LOG_DEBUG) << "Deleting files for " << base_filename << " shards=" << nshards << std::endl;
        std::string intervalfname = filename_intervals(base_filename, nshards);
        if (file_exists(intervalfname)) {
            int err = remove(intervalfname.c_str());
            if (err != 0) logstream(LOG_ERROR) << "Error removing file " << intervalfname
                << ", " << strerror(errno) << std::endl;
            
        }
        /* Note: degree file is not removed, because same graph with different number
         of shards share the file. This should be probably change.
         std::string degreefname = filename_degree_data(base_filename);
         if (file_exists(degreefname)) {
         remove(degreefname.c_str());
         } */
        
        size_t blocksize = 4096 * 1024;
        while (blocksize % sizeof(EdgeDataType) != 0) blocksize++;
        
        for(int p=0; p < nshards; p++) {
            int blockid = 0;
            std::string filename_edata = filename_shard_edata<EdgeDataType>(base_filename, p, nshards);
            std::string fsizename = filename_edata + ".size";
            if (file_exists(fsizename)) {
                int err = remove(fsizename.c_str());
                if (err != 0) logstream(LOG_ERROR) << "Error removing file " << fsizename
                    << ", " << strerror(errno) << std::endl;
            }
            while(true) {
                std::string block_filename = filename_shard_edata_block(filename_edata, blockid, blocksize);
                if (file_exists(block_filename)) {
                    int err = remove(block_filename.c_str());
                    if (err != 0) logstream(LOG_ERROR) << "Error removing file " << block_filename
                        << ", " << strerror(errno) << std::endl;
                    
                } else {
                    
                    break;
                }
#ifdef DYNAMICEDATA
                delete_block_uncompressed_sizefile(block_filename);
#endif
                blockid++;
            }
            std::string dirname = dirname_shard_edata_block(filename_edata, blocksize);
            if (file_exists(dirname)) {
                int err = remove(dirname.c_str());
                if (err != 0) logstream(LOG_ERROR) << "Error removing directory " << dirname
                    << ", " << strerror(errno) << std::endl;
                
            }
            
            std::string adjname = filename_shard_adj(base_filename, p, nshards);
            logstream(LOG_DEBUG) << "Deleting " << adjname << " exists: " << file_exists(adjname) << std::endl;
            
            if (file_exists(adjname)) {
                int err = remove(adjname.c_str());
                if (err != 0) logstream(LOG_ERROR) << "Error removing file " << adjname
                    << ", " << strerror(errno) << std::endl;
            }
            
            
        }
        
        std::string numv_filename = base_filename + ".numvertices";
        if (file_exists(numv_filename)) {
            int err = remove(numv_filename.c_str());
            if (err != 0) logstream(LOG_ERROR) << "Error removing file " << numv_filename
                << ", " << strerror(errno) << std::endl;
        }
        
        /* Degree file */
        std::string deg_filename = filename_degree_data(base_filename);
        if (file_exists(deg_filename)) {
            int err = remove(deg_filename.c_str());
            if (err != 0) logstream(LOG_ERROR) << "Error removing file " << deg_filename
                << ", " << strerror(errno) << std::endl;
        }
    }