Ejemplo n.º 1
0
void iso_3dfd2(float *ptr_next, float *ptr_prev, float *ptr_vel, float *coeff,
	      const int n1, const int n2, const int n3, int nreps) {
  int it;
  transfer(ptr_vel, n1, n2, n3);
  int rank;
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);

  for(it=0; it<nreps; it+=2){
    double wstart = walltime();
    transfer(ptr_prev, n1, n2, n3);
    double wend = walltime();
    float delta = wend - wstart;
    if (rank == 0) printf("%8.2f\n", delta);
   	iso_3dfd_stencil2( ptr_next, ptr_prev, ptr_vel, coeff, n1, n2, n3);
   	wend = walltime();
   	delta = wend - wstart;
   	if (rank == 0) printf("%8.2f\n", delta);
	// here's where boundary conditions+halo exchanges happen
    MPI_Barrier(MPI_COMM_WORLD);
    transfer(ptr_next, n1, n2, n3);
	// Swap previous & next between iterations
    iso_3dfd_stencil2 ( ptr_prev, ptr_next, ptr_vel, coeff, n1, n2, n3);
    MPI_Barrier(MPI_COMM_WORLD);
  } // time loop
}
Ejemplo n.º 2
0
int run_mzed_add(void *_p, unsigned long long *data, int *data_len) {
  struct smallops_params *p = (struct smallops_params *)_p;
  *data_len = 2;

  gf2e *ff = gf2e_init(irreducible_polynomials[p->k][1]);

  mzed_t *A = mzed_init(ff,p->m,p->n);
  mzed_randomize(A);
  mzed_t *B = mzed_init(ff,p->m,p->n);
  mzed_randomize(B);
  mzed_t *C = mzed_init(ff,p->m,p->n);

  data[0] = walltime(0);
  data[1] = cpucycles();

  mzed_add(C, A, B);

  data[1] = cpucycles() - data[1];
  data[0] = walltime(data[0]);

  mzed_free(A);
  mzed_free(B);
  mzed_free(C);

  gf2e_free(ff);
  return 0;
}
Ejemplo n.º 3
0
double run_mp(double sigma_, double c_, int tau, dgs_disc_gauss_alg_t alg, size_t ntrials, unsigned long long *t) {
  mpfr_set_default_prec(80);
  mpfr_t sigma;
  mpfr_init_set_d(sigma, sigma_, MPFR_RNDN);
  gmp_randstate_t state;
  gmp_randinit_default(state);

  mpfr_t c;
  mpfr_init_set_d(c, c_, MPFR_RNDN);
  dgs_disc_gauss_mp_t *gen = dgs_disc_gauss_mp_init(sigma, c, tau, alg);

  double variance = 0.0;
  mpz_t r;
  mpz_init(r);

  *t =  walltime(0);
  for(size_t i=0; i<ntrials; i++) {
    gen->call(r, gen, state);
    variance += mpz_get_d(r)*mpz_get_d(r);
  }
  *t = walltime(*t);
  dgs_disc_gauss_mp_clear(gen);
  mpfr_clear(sigma);
  mpz_clear(r);
  mpfr_clear(c);

  gmp_randclear(state);

  variance /= ntrials;
  return sqrt(variance);
}
Ejemplo n.º 4
0
	bool shouldstop() const {
		if (this->cost < 0)
			return false;
		double t = wtime*(walltime() - this->res.wallstart);
		double c = wcost*this->cost;
		return t >= c/lambda;
	}
Ejemplo n.º 5
0
	bool shouldstop() const {
		if (this->cost < 0)
			return false;
		double t = walltime() - this->res.wallstart;
		double c = this->cost;
		return mon.stop(c, t);
	}
Ejemplo n.º 6
0
double run_dp(double sigma, double c, int tau, dgs_disc_gauss_alg_t alg, size_t ntrials, unsigned long long *t) {
  double variance = 0.0;
  gmp_randstate_t state;
  gmp_randinit_default(state);

  dgs_disc_gauss_dp_t *gen = dgs_disc_gauss_dp_init(sigma, c, tau, alg);

  *t =  walltime(0);
  for(size_t i=0; i<ntrials; i++) {
    long r = gen->call(gen);
    variance += ((double)r)*((double)r);
  }
  *t = walltime(*t);

  dgs_disc_gauss_dp_clear(gen);
  gmp_randclear(state);

  variance /= ntrials;
  return sqrt(variance);
}
int run(void *_p, unsigned long long *data, int *data_len) {
  struct trsm_params *p = (struct trsm_params *)_p;
  *data_len = 2;

  mzd_t *B = mzd_init(p->m, p->n);
  mzd_t *L = mzd_init(p->n, p->n);
  mzd_randomize(B);
  mzd_randomize(L);
  for (rci_t i = 0; i < p->n; ++i){
    for (rci_t j = i + 1; j < p->n; ++j)
      mzd_write_bit(L,i,j, 0);
    mzd_write_bit(L,i,i, 1);
  }

  data[0] = walltime(0);
  data[1] = cpucycles();
  mzd_trsm_lower_right(L, B, 2048);
  data[0] = walltime(data[0]);
  data[1] = cpucycles() - data[1];

  mzd_free(B);
  mzd_free(L);
  return 0;
}
Ejemplo n.º 8
0
	// updateopen updates the utilities of all nodes on open and
	// reinitializes the heap every 2^i expansions.
	void updateopen() {
		if (this->res.expd < nextresort)
			return;

		double nexpd = this->res.expd - lastexpd;
		lastexpd = this->res.expd;

		double t = walltime();
		timeper = (t - lasttime) / nexpd;
		lasttime = t;

		avgdelay = delaysum/nexpd;
		delaysum = 0;

		nextresort *= 2;
		nresort++;
		reinitheap();
	}
Ejemplo n.º 9
0
	void search(D &d, typename D::State &s0) {
		this->start();
		lasttime = walltime();
		closed.init(d);
		Node *n0 = init(d, s0);
		closed.add(n0);
		open.push(n0);

		while (!open.empty() && !SearchAlgorithm<D>::limit()) {
			Node* n = *open.pop();
			State buf, &state = d.unpack(buf, n->state);
			if (d.isgoal(state)) {
				solpath<D, Node>(d, n, this->res);
				break;
			}
			expand(d, n, state);
			updateopen();
		}

		this->finish();
	}
Ejemplo n.º 10
0
/**
 * \brief Function a given walltime into seconds
 * \param walltime The walltime to convert
 * \return the walltime converted to seconds
 */
long vishnu::convertStringToWallTime(const std::string& walltime_) {

  std::string walltime(walltime_);

  if(!walltime.empty()){
    if(*(walltime.begin())=='\"'){
      walltime.replace(walltime.begin(), walltime.begin()+1, "");
    }
    if(*(walltime.end()-1)=='\"'){
      walltime.replace(walltime.end()-1, walltime.end(), "");
    }
  }

  if(walltime.size()!=0) {
    int seconds = 0;
    int minute = 0;
    int heure = 0;
    int jour = 0;
    std::string value;

    size_t size = walltime.size();
    size_t pos = walltime.rfind(":");
    if(pos!=std::string::npos) {
      if((size-pos > 1)) {
        value = walltime.substr(pos+1, size-1-pos);
        if(isNumericalValue(value)) {
          seconds = convertToInt(value);
        }
      }
    } else {
      if(walltime.size() > 0) {
        value = walltime;
        if(isNumericalValue(value)) {
          seconds = convertToInt(value);
        }
      }
    }

    if((pos!=std::string::npos) && (pos > 0)) {
      size = pos;
      pos =  walltime.rfind(":", size-1);
      if(pos!=std::string::npos) {
        if((size-pos > 1)) {
          value = walltime.substr(pos+1, size-pos-1);
          if(isNumericalValue(value)) {
            minute = convertToInt(value);
          }
        }
      } else {
        value = walltime.substr(0, size);
        if(isNumericalValue(value)) {
          minute = convertToInt(value);
        }
      }
    }

    if((pos!=std::string::npos) && (pos > 0)) {
      size = pos;
      pos =  walltime.rfind(":", size-1);
      if(pos!=std::string::npos) {
        if((size-pos > 1)) {
          value = walltime.substr(pos+1, size-pos-1);
          if(isNumericalValue(value)) {
            heure = convertToInt(value);
          }
        }
      } else {
        value = walltime.substr(0, size);
        if(isNumericalValue(value)) {
          heure = convertToInt(value);
        }
      }
    }

    if((pos!=std::string::npos) && (pos > 0)) {
      size = pos;
      pos =  walltime.rfind(":", size-1);
      if(pos!=std::string::npos) {
        if((size-pos > 1)) {
          throw std::runtime_error("Invalid wallltime value: "+walltime);
        }
      } else {
        value = walltime.substr(0, size);
        if(isNumericalValue(value)) {
          jour = convertToInt(value);
        }
      }
    }

    long walltimeInSeconds = (jour*86400+heure*3600+minute*60+seconds);

    return walltimeInSeconds;

  } else {
    throw UserException(ERRCODE_INVALID_PARAM, ("Invalid walltime value: The given value is empty"));
  }

}
Ejemplo n.º 11
0
void query (const char* fname,int num_threads) {

    int result = 0;
    
    
    double start, end; 
    start = walltime();
    
    //scan edges
    vector<tuple> edges = vector<tuple>();
    ifstream f0(fname);
    while (!f0.eof()) {
    	int j;
    	f0 >> j;
        int k;
        f0 >> k;
        tuple t; t.to = j; t.from = k;
        edges.push_back(t);
    	//tmp_vector0.push_back(j);
    	//count0++;
    	//if (count0 == 2) {
    	//	count0 = 0;
    	//	edges.push_back(tmp_vector0);
    	//	tmp_vector0 = vector<int>();
    	//}
    }
    f0.close();
    
    end = walltime();
    scan_runtime = end - start;
    
    
    cout << "done reading file.\n";
    
    start = walltime();

    //hash edges
    map<int, vector<tuple > > edges0_hash;
    for (int i = 0; i < edges.size(); i++) {
    	if (edges0_hash.find(edges[i].to) == edges0_hash.end()) {
    		edges0_hash[edges[i].to] = vector<tuple> ();
    	}
    	edges0_hash[edges[i].to].push_back(edges[i]);
    }

    end = walltime();
    hash_runtime = end - start;
    
    
    cout << "done creating hash.\n";

    omp_set_num_threads(num_threads);
    
    start = walltime();
    
    //loop over edges
    #pragma omp parallel for reduction(+:result) schedule(static)
    for (int index0 = 0; index0 < edges.size(); ++index0) {
        if (edges[index0].to > edges[index0].from) { continue; }
        //if there is no match, continue
        if (edges0_hash.find(edges[index0].from) == edges0_hash.end()) {
            continue;
        }
        vector<tuple> table1 = edges0_hash[edges[index0].from];
    
    
    
        //loop over table1
        #pragma omp parallel for reduction(+:result) schedule(static)
        for (int index1 = 0; index1 < table1.size(); ++index1) {
            if (table1[index1].to > table1[index1].from) { continue;}
            //if there is no match, continue
            if (edges0_hash.find(table1[index1].from) == edges0_hash.end()) {
                continue;
            }
            vector<tuple> table2 = edges0_hash[table1[index1].from];
        
        
        
            //loop over final join results
            #pragma omp parallel for reduction(+:result) schedule(static)
            for (int index2 = 0; index2 < table2.size(); ++index2) {
                if (table2[index2].from==edges[index0].to) {
                        ++result;
                        
                        
                }
            }
        }
    }
    
    end = walltime();
    triangles_runtime = end - start;

    cout << "Found " << result << " tuples.\n";

    char scheduling[1024] = "static";
    int64_t chunk = -1;
    
    DictOut out;
    DICT_ADD(out, hash_runtime);
    DICT_ADD(out, triangles_runtime);
    DICT_ADD(out, scan_runtime);
    DICT_ADD(out, (int64_t)num_threads);
    DICT_ADD(out, fname);
    DICT_ADD(out, scheduling);
    DICT_ADD(out, chunk);
    std::cout << out.toString() << std::endl; 
}
Ejemplo n.º 12
0
	// row outputs an incumbent solution row.
	void row(unsigned long n, double epsprime) {
		dfrow(stdout, "incumbent", "uuugggg", n, this->res.expd,
			this->res.gend, wt, epsprime, cost,
			walltime() - this->res.wallstart);
	}
Ejemplo n.º 13
0
void bfs(GlobalAddress<G> _g, int nbfs, TupleGraph tg) {
  bool verified = false;
  double t;
      
  auto _frontier = GlobalBag<VertexID>::create(_g->nv);
  auto _next     = GlobalBag<VertexID>::create(_g->nv);
  call_on_all_cores([=]{ frontier = _frontier; next = _next; g = _g; });
    
  // do BFS from multiple different roots and average their times
  for (int root_idx = 0; root_idx < nbfs; root_idx++) {
  
    // intialize parent to -1
    forall(g, [](G::Vertex& v){ v->init(); v->level = -1; });
    
    VertexID root;
    if (FLAGS_max_degree_source) {
      forall(g, [](VertexID i, G::Vertex& v){
        max_degree << MaxDegree(i, v.nadj);
      });
      root = static_cast<MaxDegree>(max_degree).idx();
    } else {
      root = choose_root(g);
    }
    
    // setup 'root' as the parent of itself
    delegate::call(g->vs+root, [=](G::Vertex& v){
      v->parent = root;
      v->level = 0;
    });
    
    // reset frontier queues
    next->clear();
    frontier->clear();
    
    // start with root as only thing in frontier
    delegate::call((g->vs+root).core(), [=]{ frontier->add(root); });
    
    t = walltime();
    
    bool top_down = true;
    int64_t prev_nf = -1;
    int64_t frontier_edges = 0;
    int64_t remaining_edges = g->nadj;
    
    while (!frontier->empty()) {
      
      auto nf = frontier->size();
      VLOG(1) << "remaining_edges = " << remaining_edges << ", nf = " << nf << ", prev_nf = " << prev_nf << ", frontier_edges: " ;
      if (top_down && frontier_edges > remaining_edges/FLAGS_beamer_alpha && nf > prev_nf) {
        VLOG(1) << "switching to bottom-up";
        top_down = false;
      } else if (!top_down && frontier_edges < g->nv/FLAGS_beamer_beta && nf < prev_nf) {
        VLOG(1) << "switching to top-down";
        top_down = true;
      }
      
      edge_count = 0;
      
      if (top_down) {
                
        // iterate over vertices in this level of the frontier
        forall(frontier, [](VertexID& i){
          // visit all the adjacencies of the vertex
          // note: this has to be 'async' to prevent deadlock from
          //       running out of available workers
          forall<async>(adj(g,i), [i](G::Edge& e) {
            auto j = e.id;
            // at the core where the vertex is...
            delegate::call<async>(e.ga, [i,j](G::Vertex& vj){
              // note: no synchronization needed because 'call' is 
              // guaranteed to be executed atomically because it 
              // does no blocking operations
              if (vj->parent == -1) {
                // claim parenthood
                vj->parent = i;
                vj->level = current_depth;
                next->add(j);
                edge_count += vj.nadj;
              }
            });
          });
        });
      } else { // bottom-up
        
        forall<&phaser>(g, [](G::Vertex& v){
          if (v->level != -1) return;
          auto va = make_linear(&v);
          forall<async,&phaser>(adj(g,v), [=,&v](G::Edge& e){
            if (v->level != -1) return;
            
            phaser.enroll();
            auto eva = e.ga;
            send_heap_message(eva.core(), [=]{
              auto& ev = *eva.pointer();
              if (ev->level != -1 && ev->level < current_depth) {
                auto eid = g->id(ev);
                send_heap_message(va.core(), [=]{
                  auto& v = *va.pointer();
                  if (v->level == -1) {
                    next->add(g->id(v));
                    v->level = current_depth;
                    v->parent = eid;
                    edge_count += v.nadj;
                  }
                  phaser.complete();
                });
              } else {
                phaser.send_completion(va.core());
              }
            });
          });
        });
      }
      
      call_on_all_cores([=]{
        current_depth++;
        // switch to next frontier level
        std::swap(frontier, next);
      });
      next->clear();
      frontier_edges = edge_count;
      remaining_edges -= frontier_edges;
      prev_nf = nf;
    } // while (frontier not empty)
    
    double this_bfs_time = walltime() - t;
    LOG(INFO) << "(root=" << root << ", time=" << this_bfs_time << ")";
    
    if (!verified) {
      // only verify the first one to save time
      t = walltime();
      bfs_nedge = verify(tg, g, root);
      verify_time = (walltime()-t);
      LOG(INFO) << verify_time;
      verified = true;
      Metrics::reset_all_cores(); // don't count the first one
    } else {
      total_time += this_bfs_time;
    }
    
    bfs_mteps += bfs_nedge / this_bfs_time / 1.0e6;
  }
}
Ejemplo n.º 14
0
double duration(){
double now= walltime();
double r= now-start;
start= now;
return r;
}
Ejemplo n.º 15
0
	void* thread_search(void * arg) {

		int id = thread_id.fetch_add(1);
		// closed list is waaaay too big for my computer.
		// original 512927357
		// TODO: Must optimize these numbers
		//  9999943
		// 14414443
		//129402307
//		HashTable<typename D::PackedState, Node> closed(512927357 / tnum);
		HashTable<typename D::PackedState, Node> closed(closedlistsize);

//	printf("closedlistsize = %u\n", closedlistsize);

//		Heap<Node> open(100, overrun);
		heap open(openlistsize, overrun);
		Pool<Node> nodes(2048);

		// If the buffer is locked when the thread pushes a node,
		// stores it locally and pushes it afterward.
		// TODO: Array of dynamic sized objects.
		// This array would be allocated in heap rather than stack.
		// Therefore, not the best optimized way to do.
		// Also we need to fix it to compile in clang++.
		std::vector<std::vector<Node*>> outgo_buffer;
		outgo_buffer.reserve(tnum);

		std::vector<Node*> tmp;
		tmp.reserve(10); // TODO: ad hoc random number

		uint expd_here = 0;
		uint gend_here = 0;
		int max_outgo_buffer_size = 0;
		int max_income_buffer_size = 0;

		unsigned int discarded_here = 0;
		int duplicate_here = 0;

		int current_f = 0;

		double lapse;

		int useless = 0;

		int fval = -1;

		// How many of the nodes sent to itself.
		// If this high, then lower communication overhead.
		unsigned int self_push = 0;

		//		while (path.size() == 0) {

		printf("id = %d\n", id);
		printf("incumbent = %d\n", incumbent.load());
		unsigned int over_incumbent_count = 0;
		unsigned int no_work_iteration = 0;

		double init_time = walltime();

		while (true) {
			Node *n;

			if (this->isTimed) {
				double t = walltime() - init_time;
//				printf("t = %f\n", t);
				if (t > this->timer) {
//					closed.destruct_all(nodes);
					terminate[id] = true;
					break;
				}
			}


#ifdef ANALYZE_LAP
			startlapse(lapse); // income buffer
#endif
			if (!income_buffer[id].isempty()) {
				terminate[id] = false;
				if (income_buffer[id].size() >= income_threshold) {
					++force_income;
					income_buffer[id].lock();
					tmp = income_buffer[id].pull_all_with_lock();
					income_buffer[id].release_lock();
					uint size = tmp.size();
#ifdef ANALYZE_INCOME
					if (max_income_buffer_size < size) {
						max_income_buffer_size = size;
					}
					dbgprintf("size = %d\n", size);
#endif // ANALYZE_INCOME
					for (int i = 0; i < size; ++i) {
						dbgprintf("pushing %d, ", i);
						open.push(tmp[i]); // Not sure optimal or not. Vector to Heap.
					}
					tmp.clear();
				} else if (income_buffer[id].try_lock()) {
					tmp = income_buffer[id].pull_all_with_lock();
//					printf("%d", __LINE__);
					income_buffer[id].release_lock();

					uint size = tmp.size();
#ifdef ANALYZE_INCOME
					if (max_income_buffer_size < size) {
						max_income_buffer_size = size;
					}
					dbgprintf("size = %d\n", size);
#endif // ANALYZE_INCOME
					for (int i = 0; i < size; ++i) {
						dbgprintf("pushing %d, ", i);
						open.push(tmp[i]); // Not sure optimal or not.
					}
					tmp.clear();
				}
			}
#ifdef ANALYZE_LAPSE
			endlapse(lapse, "incomebuffer");
			startlapse(&lapse); // open list
#endif
#ifdef OUTSOURCING
			open_sizes[id] = open.getsize();
#endif
			if (open.isemptyunder(incumbent.load())) {
				dbgprintf("open is empty.\n");
				terminate[id] = true;
				if (hasterminated() && incumbent != initmaxcost) {
					printf("terminated\n");
					break;
				}
				++no_work_iteration;
				for (int i = 0; i < tnum; ++i) {
					if (i != id && outgo_buffer[i].size() > 0) {
						if (income_buffer[i].try_lock()) {
							// acquired lock
							income_buffer[i].push_all_with_lock(
									outgo_buffer[i]);
							income_buffer[i].release_lock();
							outgo_buffer[i].clear();
						}
					}
				}
				continue; // ad hoc
			}
			n = static_cast<Node*>(open.pop());

//			if (n->f >= incumbent.load()) {
//				printf("open list error: n->f >= incumbent: %u > %d\n", n->f, incumbent.load());
//			}
//			printf("f,g = %d, %d\n", n->f, n->g);

#ifdef ANALYZE_LAPSE
			endlapse(lapse, "openlist");
#endif

#ifdef ANALYZE_FTRACE
			int newf = open.minf();
			Logfvalue* lg = new Logfvalue(walltime() - wall0, n->f, n->f - n->g);
			logfvalue[id].push_back(*lg);
			if (fvalues[id] != newf) {
//				printf("ftrace %d %d %f\n", id, fvalues[id],
//						walltime() - wall0);
				fvalues[id] = newf;
			}
#endif // ANALYZE_FTRACE
			// TODO: Might not be the best way.
			// Would there be more novel way?

#ifdef ANALYZE_GLOBALF
			if (n->f != fvalues[id]) {
				fvalues[id] = n->f;
				int min = *std::min_element(fvalues, fvalues+tnum);

				if (min != globalf) {
					globalf = min;
					printf("globalf %d %d %f\n", id, min, walltime() - wall0);
				}
			}
#endif

			// If the new node n is duplicated and
			// the f value is higher than or equal to the duplicate, discard it.
#ifdef ANALYZE_LAPSE
			startlapse(&lapse); // closed list
#endif
//		if (n->thrown == 0) {
			Node *duplicate = closed.find(n->packed);
			if (duplicate) {
				if (duplicate->f <= n->f) {
					dbgprintf("Discarded\n");
					++discarded_here;
					nodes.destruct(n);
					continue;
#ifdef ANALYZE_DUPLICATE
				} else {
					duplicate_here++;
#endif // ANALYZE_DUPLICATE
				}
				// Node access here is unnecessary duplicates.
//					printf("Duplicated\n");
			}
			//	}
#ifdef ANALYZE_LAPSE
			endlapse(lapse, "closedlist");
#endif

#ifdef OUTSOURCING
			if ((n->thrown < 5) && (expd_here > 600) && outsourcing(n, id)) {
				if (n->thrown == 0) {
					closed.add(n);
				}
#ifdef ANALYZE_OUTSOURCING
				outsource_pushed++;
#endif
				dbgprintf("Out sourced a node\n");
				continue;
			}
#endif // OUTSOURCING
			typename D::State state;
			this->dom.unpack(state, n->packed);

#ifdef ANALYZE_ORDER
			if (fval != n->f) {
				fval = n->f;
				LogNodeOrder* ln = new LogNodeOrder(globalOrder.fetch_add(1),
						state.sequence, fval, open.getsize());
				lognodeorder[id].push_back(*ln);
			} else {
				LogNodeOrder* ln = new LogNodeOrder(globalOrder.fetch_add(1),
						state.sequence, -1, open.getsize());
				lognodeorder[id].push_back(*ln);
			}
#endif // ANALYZE_ORDER
			if (this->dom.isgoal(state)) {
				// TODO: For some reason, sometimes pops broken node.
//				if (state.tiles[1] == 0) {
//					printf("isgoal ERROR\n");
//					continue;
//				}
//				print_state(state);

				std::vector<typename D::State> newpath;

				for (Node *p = n; p; p = p->parent) {
					typename D::State s;
					this->dom.unpack(s, p->packed);
					newpath.push_back(s); // This triggers the main loop to terminate.
				}
				int length = newpath.size();
				printf("Goal! length = %d\n", length);
				printf("cost = %u\n", n->g);

				if (incumbent > n->g) {
					// TODO: this should be changed to match non-unit cost domains.
					incumbent = n->g;
					LogIncumbent* li = new LogIncumbent(walltime() - wall0,
							incumbent);
					logincumbent[id].push_back(*li);
					path = newpath;
				}

				continue;
			}
#ifdef OUTSOURCING
			if (n->thrown == 0) {
				closed.add(n);
			}
#else
			closed.add(n);
#endif
			expd_here++;
//			if (expd_here % 100000 == 0) {
//				printf("expd: %u\n", expd_here);
//			}
			//		printf("expd: %d\n", id);

#ifdef ANALYZE_LAPSE
			startlapse(&lapse);
#endif

//			buffer<Node>* buffers;

			useless += uselessCalc(useless);

			for (int i = 0; i < this->dom.nops(state); i++) {
				// op is the next blank position.
				int op = this->dom.nthop(state, i);
//				if (op == n->pop) {
//					//					printf("erase %d \n", i);
//					continue;
//				}

//				printf("gend: %d\n", id);

//				int moving_tile = 0;
//				int blank = 0; // Make this available for Grid pathfinding.
//				int moving_tile = state.tiles[op];
//				int blank = state.blank; // Make this available for Grid pathfinding.
				Edge<D> e = this->dom.apply(state, op);
				Node* next = wrap(state, n, e.cost, e.pop, nodes);

				///////////////////////////
				/// TESTS
				/// Compare nodes, n & next
				/// 1. f value does increase (or same)
				/// 2. g increases by cost
				///
				/// Compare states
				/// 1. operation working fine
				///////////////////////////
				if (n->f > next->f) {
//					// heuristic was calculating too big.
					printf("!!!ERROR: f decreases: %u %u\n", n->f, next->f);
					unsigned int nh = n->f - n->g;
					unsigned int nxh = next->f - next->g;
					printf("h = %u %u\n", nh, nxh);
					printf("cost = %d\n", e.cost);
				}
//				if (static_cast<unsigned int>(n->g + e.cost) != static_cast<unsigned int>(next->g)) {
//					printf("!!!ERROR: g is wrong: %u + %d != %u\n", n->g, e.cost, next->g);
//				}

				if (next->f >= incumbent.load()) {
//					printf("needless\n");-
					++over_incumbent_count;
					nodes.destruct(next);
					this->dom.undo(state, e);
//					printf("%u >= %d\n", next->f, incumbent.load());
					continue;
				}
//				Node *duplicate = closed.find(next->packed);
//				if (duplicate) {
//					if (duplicate->f <= next->f) {
//						dbgprintf("Discarded\n");
//						++discarded_here;
//						nodes.destruct(next);
//						this->dom.undo(state, e);
//						continue;
//					} else {
//						++duplicate_here;
//					}
//				}
//				printf("inc: %d, inc.load: %d\n", incumbent.load());

//				if (next->f >= initmaxcost || next->g >= initmaxcost
//						|| next->f >= incumbent || next->g >= incumbent
//						|| next->f >= incumbent.load()
//						|| next->g >= incumbent.load()) {
//					printf("f >= initmaxcost: %d > %d\n", next->f, initmaxcost);
//				}

				gend_here++;
//				if (gend_here % 1000000 == 0) {
//					printf("gend: %u\n", gend_here);
////					printf("%u < %d\n", next->f, incumbent.load());
//				}
				//printf("mv blank op = %d %d %d \n", moving_tile, blank, op);
//				print_state(state);


				unsigned int zbr = z.inc_hash(n->zbr, 0, 0, op, 0, state);
				next->zbr = zbr;
				zbr = zbr % tnum;
//				next->zbr = z.inc_hash(n->zbr, moving_tile, blank, op,
//						0, state);

//				next->zbr = z.inc_hash(state);

//				unsigned int zbr = next->zbr % tnum;
//				printf("zbr, zbr_tnum = (%u, %u)\n", next->zbr, zbr);

				// If the node belongs to itself, just push to this open list.
				if (zbr == id) {
//					double w = walltime();
					++self_push;
					open.push(next);
//					printf("self: %f\n", walltime() - w);
//				}
#ifdef SEMISYNC
					// Synchronous communication to avoid search overhead
					else if (outgo_buffer[zbr].size() > outgo_threshold) {
						income_buffer[zbr].lock();
						income_buffer[zbr].push_with_lock(next);
						income_buffer[zbr].push_all_with_lock(outgo_buffer[zbr]);
//					printf("%d", __LINE__);
						income_buffer[zbr].release_lock();
						outgo_buffer[zbr].clear();
#ifdef ANALYZE_SEMISYNC
						++force_outgo;
//					printf("semisync = %d to %d\n", id, zbr);
#endif // ANALYZE_SEMISYNC
					}
#endif // SEMISYNC
//				else if (income_buffer[zbr].try_lock()) {
//					// if able to acquire the lock, then push all nodes in local buffer.
//					income_buffer[zbr].push_with_lock(next);
//					if (outgo_buffer[zbr].size() != 0) {
//						income_buffer[zbr].push_all_with_lock(
//								outgo_buffer[zbr]);
//					}
////					printf("%d", __LINE__);
//					income_buffer[zbr].release_lock();
//					outgo_buffer[zbr].clear();

				} else {
Ejemplo n.º 16
0
void bfs(GlobalAddress<G> g, int nbfs, TupleGraph tg) {
  bool verified = false;
  double t;
      
  auto frontier = GlobalVector<int64_t>::create(g->nv);
  auto next     = GlobalVector<int64_t>::create(g->nv);
  
  // do BFS from multiple different roots and average their times
  for (int root_idx = 0; root_idx < nbfs; root_idx++) {
  
    // intialize parent to -1
    forall(g, [](G::Vertex& v){ v->init(); });
    
    int64_t root = choose_root(g);
    VLOG(1) << "root => " << root;
    
    // setup 'root' as the parent of itself
    delegate::call(g->vs+root, [=](G::Vertex& v){ v->parent = root; });
    
    // reset frontier queues
    next->clear();
    frontier->clear();
    
    // start with root as only thing in frontier
    frontier->push(root);
    
    t = walltime();
    
    while (!frontier->empty()) {
      // iterate over vertices in this level of the frontier
      forall(frontier, [g,next](int64_t& i){
        // visit all the adjacencies of the vertex
        // note: this has to be 'async' to prevent deadlock from
        //       running out of available workers
        forall<async>(adj(g,g->vs+i), [i,next](G::Edge& e) {
          auto j = e.id;
          // at the core where the vertex is...
          bool claimed = delegate::call(e.ga, [i](G::Vertex& v){
            // note: no synchronization needed because 'call' is 
            // guaranteed to be executed atomically because it 
            // does no blocking operations
            if (v->parent == -1) {
              // claim parenthood
              v->parent = i;
              return true;
            }
            return false;
          });
          if (claimed) {
            // add this vertex to the frontier for the next level
            // note: we (currently) can't do this 'push' inside the delegate because it may block
            next->push(j);
          }
        });
      });
      // switch to next frontier level
      std::swap(frontier, next);
      next->clear();
    }
    
    double this_total_time = walltime() - t;
    LOG(INFO) << "(root=" << root << ", time=" << this_total_time << ")";
    total_time += this_total_time;
    
    if (!verified) {
      // only verify the first one to save time
      t = walltime();
      bfs_nedge = verify(tg, g, root);
      verify_time = (walltime()-t);
      LOG(INFO) << verify_time;
      verified = true;
    }
    
    bfs_mteps += bfs_nedge / this_total_time / 1.0e6;
  }
}
Ejemplo n.º 17
0
int run(void *_p, unsigned long long *data, int *data_len) {
  struct elim_params *p = (struct elim_params *)_p;
#ifndef HAVE_LIBPAPI
  *data_len = 2;
#else
  *data_len = MIN(papi_array_len + 1, *data_len);
#endif
  int papi_res;

  mzd_t *A = mzd_init(p->m, p->n);

  if(p->r != 0) {
    mzd_t *L, *U;
    L = mzd_init(p->m, p->m);
    U = mzd_init(p->m, p->n);
    mzd_randomize(U);
    mzd_randomize(L);
    for (rci_t i = 0; i < p->m; ++i) {

      for (rci_t j = i + 1; j < p->m; j+=m4ri_radix) {
        int const length = MIN(m4ri_radix, p->m - j);
        mzd_clear_bits(L, i, j, length);
      }
      mzd_write_bit(L,i,i, 1);

      for (rci_t j = 0; j < i && j < p->n; j+=m4ri_radix) {
        int const length = MIN(m4ri_radix, i - j);
        mzd_clear_bits(U, i, j, length);
      }
      if(i < p->r) {
        mzd_write_bit(U, i, i, 1);
      } else {
        for (rci_t j = i; j < p->n; j+=m4ri_radix) {
          int const length = MIN(m4ri_radix, p->n - i);
          mzd_clear_bits(U, i, j, length);
        }
      }
    }
    mzd_mul(A,L,U,0);
    mzd_free(L);
    mzd_free(U);
  } else {
    mzd_randomize(A);
  }

  mzp_t *P = mzp_init(A->nrows);
  mzp_t *Q = mzp_init(A->ncols);

#ifndef HAVE_LIBPAPI
  data[0] = walltime(0);
  data[1] = cpucycles();
#else
  int array_len = *data_len - 1;
  unsigned long long t0 = PAPI_get_virt_usec();
  papi_res = PAPI_start_counters((int*)papi_events, array_len);
  if (papi_res)
    m4ri_die("");
#endif
  if(strcmp(p->algorithm, "m4ri") == 0)
    p->r = mzd_echelonize_m4ri(A, 0, 0);
  else if(strcmp(p->algorithm, "ple") == 0)
    p->r = mzd_ple(A, P, Q, 0);
  else if(strcmp(p->algorithm, "mmpf") == 0)
    p->r = _mzd_ple_russian(A, P, Q, 0);
  else
    m4ri_die("unknown algorithm %s",p->algorithm);
#ifndef HAVE_LIBPAPI
  data[1] = cpucycles() - data[1];
  data[0] = walltime(data[0]);
#else
  mzp_free(P);
  mzp_free(Q);

  PAPI_stop_counters((long long*)&data[1], array_len);
  t0 = PAPI_get_virt_usec() - t0;
  data[0] = t0;
  for (int nv = 0; nv <= array_len; ++nv) {
    data[nv] -= loop_calibration[nv];
  }
#endif
  mzd_free(A);
  return 0;
}
Ejemplo n.º 18
0
main (int argc, char **argv)
{
   /* declaration of variables */
   FILE *fp;                     /* file pointer */
   char *auxChar;                /* auxiliar character */
   char *modelFile = " ";        /* elastic model file */
                                 /* THICK - RHO - VP - QP - VS - QS */
   int i, k, iProc, iR;          /* counters */
   int initF, lastF;             /* initial and final frequencies */
   int apl_pid;                  /* PVM process id control */
   int nSamplesOrig;             /* time series length */
   int die;                      /* flag used to kill processes */
   int pid;                      /* process id */
   int nProc;                    /* number of processes */
   int processControl;           /* monitoring PVM start */
   int *processes;               /* array with process ids */
   int FReceived;                /* number of frequencies processed */
   int nFreqProc;                /* number of frequencies per process */
   int nFreqPart;                /* number of frequency partitions */
   int **statusFreq;             /* monitors processed frequencies */
   int FInfo[2];                 /* frequency delimiters */
   int **procInfo;               /* frequency limits for each processor */ 
   float wallcpu;                /* wall clock time */
   float dt;                     /* time sampling interval */
   float f;                      /* current frequency */
   float fR;                     /* reference frequency */
   float tMax;                   /* maximum recording time */
   float *thick, *alpha, *beta,
   *rho, *qP, *qS;               /* elastic constants and thickness */
   complex **freqPart;           /* frequency arrays sent by the slaves */
   complex **uRF, **uZF;         /* final frequency components */
   INFO info[1];                 /* basic information for slaves */
   
   /* Logging information */
   /* CleanLog(); */

   /* getting input */
   initargs(argc, argv);
   requestdoc(0);
   
   if (!getparstring("model", &modelFile)) modelFile = "model";
   if (!getparstring("recfile", &auxChar)) auxChar = " ";
   sprintf(info->recFile, "%s", auxChar);
   if (!getparint("directwave", &info->directWave)) info->directWave = 1;
   if (!getparfloat("r1", &info->r1)) info->r1 = 0;
   if (!getparint("nr", &info->nR)) info->nR = 148;
   if (!getparfloat("dr", &info->dR)) info->dR = .025;
   if (!getparfloat("zs", &info->zs)) info->zs = 0.001;
   if (info->zs <= 0) info->zs = 0.001;
   if (!getparfloat("u1", &info->u1)) info->u1 = 0.0002;
   if (!getparfloat("u2", &info->u2)) info->u2 = 1.;
   if (!getparint("nu", &info->nU)) info->nU = 1000;
   if (!getparfloat("f1", &info->f1)) info->f1 = 2;
   if (!getparfloat("f2", &info->f2)) info->f2 = 50;
   if (!getparfloat("dt", &dt)) dt = 0.004;
   if (!getparfloat("tmax", &tMax)) tMax = 8;
   if (!getparfloat("F1", &info->F1)) info->F1 = 0;
   if (!getparfloat("F2", &info->F2)) info->F2 = 0;
   if (!getparfloat("F3", &info->F3)) info->F3 = 1;
   if (!getparint("hanning", &info->hanningFlag)) info->hanningFlag = 0;
   if (!getparfloat("wu", &info->percU)) info->percU = 5; info->percU /= 100;
   if (!getparfloat("ww", &info->percW)) info->percW = 5; info->percW /= 100;
   if (!getparfloat("fr", &fR)) fR = 1; info->wR = 2 * PI * fR;
   if (!getparfloat("tau", &info->tau)) info->tau = 50;
   if (!getparint("nproc", &nProc)) nProc = 1;
   if (!getparint("nfreqproc", &nFreqProc) || nProc == 1) nFreqProc = 0;
   if (!getparint("verbose", &info->verbose)) info->verbose = 0;

   /* how many layers */
   fp = fopen(modelFile,"r");
   if (fp == NULL)
      err("No model file!\n");

   info->nL = 0;
   while (fscanf(fp, "%f %f %f %f %f %f\n", 
		 &f, &f, &f, &f, &f, &f) != EOF)
      info->nL++;
   info->nL--;
   fclose(fp);

   if (info->verbose)
      fprintf(stderr,"Number of layers in model %s : %d\n", 
	      modelFile, info->nL + 1); 
   
   /* if specific geometry, count number of receivers */
   fp = fopen(info->recFile, "r");
   if (fp != NULL)
   {
      info->nR = 0;
      while (fscanf(fp, "%f\n", &f) != EOF)
	 info->nR++;
   }
   fclose(fp);

   /* memory allocation */
   alpha = alloc1float(info->nL + 1);
   beta = alloc1float(info->nL + 1);
   rho = alloc1float(info->nL + 1);
   qP = alloc1float(info->nL + 1);
   qS = alloc1float(info->nL + 1);
   thick = alloc1float(info->nL + 1);
   processes = alloc1int(nProc);
   procInfo = alloc2int(2, nProc);

   /* reading the file */
   fp = fopen(modelFile,"r");
   if (info->verbose)
      fprintf(stderr,"Thickness     rho     vP     qP    vS     qS\n");
   for (i = 0; i < info->nL + 1; i++)
   {
      fscanf(fp, "%f %f %f %f %f %f\n", &thick[i], &rho[i], &alpha[i], 
	     &qP[i], &beta[i], &qS[i]);
      if (info->verbose)
	 fprintf(stderr,"   %7.4f      %4.3f   %3.2f  %5.1f  %3.2f  %5.1f\n",
		 thick[i], rho[i], alpha[i], qP[i], beta[i], qS[i]);
   }
   fclose(fp);

   /* computing frequency interval */
   info->nSamples = NINT(tMax / dt) + 1;
   nSamplesOrig = info->nSamples;
   info->nSamples = npfar(info->nSamples);

   /* slowness increment */
   info->dU = (info->u2 - info->u1) / (float) info->nU;

   /* computing more frequency related quatities */
   tMax = dt * (info->nSamples - 1);
   info->dF = 1. / (tMax);   
   f = info->dF;
   while (f < info->f1) f += info->dF;
   info->f1 = f;
   while (f < info->f2) f += info->dF;
   info->f2 = f; 
   initF = NINT(info->f1 / info->dF);
   lastF = NINT(info->f2 / info->dF);
   info->nF = lastF - initF + 1; 
   if (info->nF%2 == 0) 
   {
      info->nF++;
      lastF++;
   }
 
   /* attenuation of wrap-around */
   info->tau = log(info->tau) / tMax;
   if (info->tau > TAUMAX)
      info->tau = TAUMAX;
      
   if (info->verbose)
      fprintf(stderr, "Discrete frequency range to model: [%d, %d]\n", 
	      initF, lastF);
   
   if (nFreqProc == 0)
      nFreqProc = NINT((float) info->nF / (float) nProc + .5);
   else
      while (nFreqProc > info->nF) nFreqProc /= 2;
   nFreqPart = NINT((float) info->nF / (float) nFreqProc + .5);

   /* memory allocation for frequency arrays */
   uRF = alloc2complex(info->nSamples / 2 + 1, info->nR);
   uZF = alloc2complex(info->nSamples / 2 + 1, info->nR);
   freqPart = alloc2complex(nFreqProc, info->nR);
   statusFreq = alloc2int(3, nFreqPart);

   /* defining frequency partitions */
   for (k = initF, i = 0; i < nFreqPart; i++, k += nFreqProc)
   {
      statusFreq[i][0] = k;
      statusFreq[i][1] = MIN(k + nFreqProc - 1, lastF);
      statusFreq[i][2] = 0;       
   }

   if (info->verbose)
      fprintf(stderr, "Starting communication with PVM\n");
   
   /* starting communication with PVM */
   if ((apl_pid = pvm_mytid()) < 0) 
   {
      err("Error enrolling master process");
      /* exit(-1); */
   } 
   fprintf(stderr, "Starting %d slaves ... ", nProc);
   processControl = CreateSlaves(processes, PROCESS, nProc);
   if (processControl != nProc)
   {
      err("Problem starting Slaves (%s)\n", PROCESS);
      /* exit(-1); */
   }
   fprintf(stderr, " Ready \n");

   info->nFreqProc = nFreqProc;
   /* Broadcasting all processes common information */
   BroadINFO(info, 1, processes, nProc, GENERAL_INFORMATION);
   
   if (info->verbose) {
      fprintf(stderr, "Broadcasting model information to all slaves\n");
      fflush(stderr);
   }

   /* sending all profiles */
   BroadFloat(thick, info->nL + 1, processes, nProc, THICKNESS);
   BroadFloat(rho, info->nL + 1, processes, nProc, DENSITY);
   BroadFloat(alpha, info->nL + 1, processes, nProc, ALPHA);
   BroadFloat(qP, info->nL + 1, processes, nProc, QALPHA);
   BroadFloat(beta, info->nL + 1, processes, nProc, BETA);
   BroadFloat(qS, info->nL + 1, processes, nProc, QBETA);

   /* freeing memory */
   free1float(thick);
   free1float(rho);
   free1float(alpha);
   free1float(qP);
   free1float(beta);
   free1float(qS);

   /* sending frequency partitions for each process */
   for (iProc = 0; iProc < nProc; iProc++)
   {
      FInfo[0] = statusFreq[iProc][0];
      FInfo[1] = statusFreq[iProc][1];

      if (info->verbose) {
	 fprintf(stderr, 
	 "Master sending frequencies [%d, %d] out of %d to slave %d [id:%d]\n"
	  ,FInfo[0], FInfo[1], info->nF, iProc, processes[iProc]);
         fflush(stderr);
      }

      procInfo[iProc][0] = FInfo[0]; procInfo[iProc][1] = FInfo[1];
      SendInt(FInfo, 2, processes[iProc], FREQUENCY_LIMITS);
      statusFreq[iProc][2] = 1;
   }

   /* waiting modelled frequencies */
   /* master process will send more frequencies if there's more work to do */
   /* measuring elapsed time */
   wallcpu = walltime();  
   
   /* reseting frequency counter */
   FReceived = 0;
   
   while (FOREVER)
   {
      pid = RecvCplx(freqPart[0], info->nR * nFreqProc, -1, 
		     FREQUENCY_PARTITION_VERTICAL);

      /* finding the frequency limits of this process */
      iProc = 0;
      while (pid != processes[iProc])
	 iProc++;

      /* copying into proper place of the total frequency array */
      for (iR = 0; iR < info->nR; iR++)
      {
	 for (k = 0, i = procInfo[iProc][0]; i <= procInfo[iProc][1]; i++, k++)
	 {
	    uZF[iR][i] = freqPart[iR][k];
	 }
      }

      pid = RecvCplx(freqPart[0], info->nR * nFreqProc, -1, 
		     FREQUENCY_PARTITION_RADIAL);
      
      /* finding the frequency limits of this process */
      iProc = 0;
      while (pid != processes[iProc])
	 iProc++;
   
      /* copying into proper place of the total frequency array */
      for (iR = 0; iR < info->nR; iR++)
      { 
	 for (k = 0, i = procInfo[iProc][0]; i <= procInfo[iProc][1]; i++, k++)
	 {
	    uRF[iR][i] = freqPart[iR][k];
	 }
      }

      /* summing frequencies that are done */
      FReceived += procInfo[iProc][1] - procInfo[iProc][0] + 1;

      if (info->verbose)
	 fprintf(stderr, "Master received %d frequencies, remaining %d\n", 
	      FReceived, info->nF - FReceived);

/*       if (FReceived >= info->nF) break; */

      /* defining new frequency limits */
      i = 0;
      while (i < nFreqPart && statusFreq[i][2])
	 i++;
      
      if (i < nFreqPart)
      {
	 /* there is still more work to be done */
	 /* tell this process to not die */
	 die = 0;
	 SendInt(&die, 1, processes[iProc], DIE);

	 FInfo[0] = statusFreq[i][0];
	 FInfo[1] = statusFreq[i][1];

	 if (info->verbose)
	    fprintf(stderr, 
		    "Master sending frequencies [%d, %d] to slave %d\n", 
		    FInfo[0], FInfo[1], processes[iProc]);
	 
	 procInfo[iProc][0] = FInfo[0]; procInfo[iProc][1] = FInfo[1];
	 SendInt(FInfo, 2, processes[iProc], FREQUENCY_LIMITS);
	 statusFreq[i][2] = 1;
      }
      else
      {
	 /* tell this process to die since there is no more work to do */
	 if (info->verbose)
	    fprintf(stderr, "Master ''killing'' slave %d\n", processes[iProc]);
	 die = 1;
	 SendInt(&die, 1, processes[iProc], DIE);
      }
      
      /* a check to get out the loop */
      if (FReceived >= info->nF) break; 
   }

   if (info->verbose)
      fprintf(stderr, "Master ''killing'' remaining slaves\n");

   /* getting elapsed time */
   wallcpu = walltime() - wallcpu;
   fprintf(stderr, "Wall clock time = %f seconds\n", wallcpu);  
   
   /* going to time domain */
   memset( (void *) &trZ, (int) '\0', sizeof(trZ));     
   memset( (void *) &trR, (int) '\0', sizeof(trR));     
   trZ.dt = dt * 1000000;
   trZ.ns = nSamplesOrig;
   trR.dt = dt * 1000000;
   trR.ns = nSamplesOrig;
   
   /* z component */
   for (iR = 0; iR < info->nR; iR++)
   {
      trZ.tracl = iR + 1;
      /* inverse FFT */
      pfacr(1, info->nSamples, uZF[iR], trZ.data); 
      for (i = 0; i < info->nSamples; i++)
      {
	 /* compensating for the complex frequency */
	 trZ.data[i] *= exp(info->tau * i * dt);
      }
      puttr(&trZ);
   }

   /* r component */
   for (iR = 0; iR < info->nR; iR++)
   {
      trR.tracl = info->nR + iR + 1;
      /* inverse FFT */
      pfacr(1, info->nSamples, uRF[iR], trR.data); 
      for (i = 0; i < info->nSamples; i++)
      {
	 /* compensating for the complex frequency */
	 trR.data[i] *= exp(info->tau * i * dt);
      }
      puttr(&trR);
   }
   return(EXIT_SUCCESS);
}   
Ejemplo n.º 19
0
void gradient(float *grad)
{
   /* declaration of variables */
   int i, iF, iR, iProc, iDer, iL, iU, offset; 
                                   /* counters */
   int FReceived;                  /* number of frequencies processed */
   int die;                        /* die processor flag */ 
   int apl_pid;                    /* PVM process id control */
   int pid;                        /* process id */
   int masterId;                   /* master id */
   int processControl;             /* monitoring PVM start */
   int FInfo[2];                   /* frequency delimiters */
   float wallcpu;                   /* wall clock time */     
   float *gradPart;                 /* partition of gradients */
   complex **resCDPart;             /* partition of resCD */
   
   /* Clean up log files */
   CleanLog();
     
   /* Reseting synchronization flags */
   for (i = 0; i < nFreqPart; i++)
   {
      statusFreq[i][2] = 0;
   }
      
   /* allocating some memory */
   gradPart = alloc1float(numberPar * limRange);
    
   for (i = 0; i < numberPar * limRange; i++)
   {
      grad[i] = 0;
   }
   
   fprintf(stderr, "Starting communication with PVM for derivatives\n");
   /* starting communication with PVM */
   if ((apl_pid = pvm_mytid()) < 0)
   {
      pvm_perror("Error enrolling master process");
      exit(-1);
   }
   processControl = CreateSlaves(processes, PROCESS_FRECHET, nProc);
   
   if (processControl != nProc)
   {
      fprintf(stderr,"Problem starting PVM daemons\n");
      exit(-1);
   }
      
   /* converting to velocities */
   if (IMPEDANCE)
   {
      for (i = 0; i < info->nL + 1; i++)
      {
         alpha[i] /= rho[i];
         beta[i] /= rho[i];
      }
   }

   /* Broadcasting all processes common information */
   BroadINFO(info, 1, processes, nProc, GENERAL_INFORMATION);
   
   /* sending all profiles */
   BroadFloat(thick, info->nL + 1, processes, nProc, THICKNESS);
   BroadFloat(rho, info->nL + 1, processes, nProc, DENSITY);
   BroadFloat(alpha, info->nL + 1, processes, nProc, ALPHAS);
   BroadFloat(qP, info->nL + 1, processes, nProc, QALPHA);
   BroadFloat(beta, info->nL + 1, processes, nProc, BETAS);
   BroadFloat(qS, info->nL + 1, processes, nProc, QBETA);

   /* sending frequency partitions for each process */
   for (iProc = 0; iProc < nProc; iProc++)
   {
      FInfo[0] = statusFreq[iProc][0];
      FInfo[1] = statusFreq[iProc][1];
      if (info->verbose)
	 fprintf(stderr, 
	 "Master sending frequencies [%d, %d] out of %d to slave Frechet %d [id:%d]\n", FInfo[0], FInfo[1], info->nF, iProc, processes[iProc]);

      procInfo[iProc][0] = FInfo[0];
      procInfo[iProc][1] = FInfo[1];
      SendInt(FInfo, 2, processes[iProc], FREQUENCY_LIMITS);
      statusFreq[iProc][2] = 1;
      
      /* and sending the appropriate correlation chunk */
      /* allocating some memory */
      resCDPart = alloc2complex(FInfo[1] - FInfo[0] + 1, info->nR);

      for (iR = 0; iR < info->nR; iR++)
      {
	 for (i = 0, iF = FInfo[0]; iF <= FInfo[1]; iF++, i++)
	 {
	    resCDPart[iR][i] = resCD[iR][iF - initF];
/*	    fprintf(stderr, "iR %d iF %d [%f %f]\n",
		    iR, iF, resCDPart[iR][i].r, resCDPart[iR][i].i);*/
	 }
      }
      
      /* sending frequency partition to the slave process */
      SendCplx(resCDPart[0], (FInfo[1] - FInfo[0] + 1) * info->nR, 
	       processes[iProc], COVARIANCE_PARTITION);
      free2complex(resCDPart);
   }
   /* waiting modelled frequencies */
   /* master process will send more frequencies if there's more work to do */
   /* measuring elapsed time */
   wallcpu = walltime();


   /* reseting frequency counter */
   FReceived = 0;
   while (FOREVER)
   {
      pid = RecvFloat(gradPart, info->numberPar * info->limRange, -1,
		     PARTIAL_GRADIENT);

      /* finding the frequency limits of this process */
      /* DD 
 fprintf(stderr, "Master finding the frequency limits of this process\n");
      */

      iProc = 0;
      while (pid != processes[iProc])
	 iProc++;
	                       
      /* stacking gradient */
      for (i = 0; i < info->numberPar * info->limRange; i++)
      {
	 grad[i] += gradPart[i];
	 /* DD
	 fprintf(stderr, "i %d grad %f gradPart %f\n", i, grad[i], gradPart[i]);*/
      }
      
      /* summing frequencies that are done */
      FReceived += procInfo[iProc][1] - procInfo[iProc][0] + 1;
      if (info->verbose)
	 fprintf(stderr, "Master received %d frequencies, remaining %d\n",
		 FReceived, info->nF - FReceived);
             
      /* defining new frequency limits */
      i = 0;
      while (i < nFreqPart && statusFreq[i][2])
	 i++;

      /* DD 
      fprintf(stderr, "i %d nFreqPart %d\n", i, nFreqPart);*/
           
      if (i < nFreqPart)
      {
	 /* there is still more work to be done */
	 /* tell this process to not die */
	 die = 0;
	 SendInt(&die, 1, processes[iProc], DIE);
	 FInfo[0] = statusFreq[i][0];
	 FInfo[1] = statusFreq[i][1];
	 
	 if (info->verbose)
	    fprintf(stderr, 
		    "Master sending frequencies [%d, %d] to slave %d\n",
		    FInfo[0], FInfo[1], processes[iProc]);

	 procInfo[iProc][0] = FInfo[0];
	 procInfo[iProc][1] = FInfo[1];
	 SendInt(FInfo, 2, processes[iProc], FREQUENCY_LIMITS);
	 statusFreq[i][2] = 1;

	 /* sending covariance partition */
	 /* allocating some memory */
	 resCDPart = alloc2complex(FInfo[1] - FInfo[0] + 1, info->nR);

	 for (iR = 0; iR < info->nR; iR++)
	 {
	    for (i = 0, iF = FInfo[0]; iF <= FInfo[1]; iF++, i++)
	    {
	       resCDPart[iR][i] = resCD[iR][iF - initF];
	    }
	 }
	 /* sending frequency partition to the slave process */
	 SendCplx(resCDPart[0], (FInfo[1] - FInfo[0] + 1) * info->nR, 
		  processes[iProc], COVARIANCE_PARTITION);
	 free2complex(resCDPart);
      }
      else
      {
	 /* tell this process to die since there is no more work to do */
	 if (info->verbose)
	    fprintf(stderr, "Master ''killing'' slave %d\n", processes[iProc]);
	 die = 1;
	 SendInt(&die, 1, processes[iProc], DIE);
      }
      /* a check to get out the loop */
      if (FReceived >= info->nF) break;
   }

   /* getting elapsed time */
   wallcpu = walltime() - wallcpu;
   fprintf(stderr, "Frechet derivative wall clock time = %f seconds\n\n", 
	   wallcpu);   
   
   /* back to impedances*/
   if (IMPEDANCE)
   {
      for (i = 0; i < info->nL + 1; i++)
      {
         alpha[i] *= rho[i];
         beta[i] *= rho[i];
      }
   }

   /* finally the gradient, the 2 is due Parseval */
   for (iDer = 0; iDer < numberPar * limRange; iDer++)
   {
      grad[iDer] *= 2 / (float) (nTotalSamples * oFNorm);
   }

   /* getting gradient in impedance domain */
   if (IMPEDANCE)
   {
      offset = 0;
      for (i = lim[0], iL = 0; iL < limRange; iL++, i++)
      {
         if (vpFrechet) 
         {
            grad[iL] /= rho[i];
            offset = limRange;
	 }
	 
         if (vsFrechet) 
         {
            grad[iL + offset] /= rho[i];
            offset += limRange;
	 }
	 
         if (rhoFrechet)
         {
            grad[iL + offset] = - alpha[i] * grad[iL] -
	      beta[i] * grad[iL + limRange] + grad[iL + 2 * limRange];
         }
      }
   }

   if (PRIOR)
   {
      auxm1 = 1. / (float) (numberPar * limRange);     /* normalization */
      /* considering the regularization or model covariance term */
      for (i = 0; i < limRange; i++)
      {
	 for (offset = i, iL = 0; iL < limRange; iL++)
	 {
	    iU = 0;
	    if (vpFrechet)
	    {
	       grad[iL] += (alpha[i + lim[0]] - alphaMean[i + lim[0]]) * 
		            CMvP[offset] * auxm1;
	       iU = limRange; /* used as offset in gradient vector */
	    }
	    
	    if (vsFrechet)
	    {
	       grad[iL + iU] += (beta[i + lim[0]] - betaMean[i + lim[0]]) * 
	 	                 CMvS[offset] * auxm1;
	       iU += limRange;
	    }

	    if (rhoFrechet)
	    {
	       grad[iL + iU] += (rho[i + lim[0]] - rhoMean[i + lim[0]]) * 
		                 CMrho[offset] * auxm1;
	    }

	    offset += MAX(SGN0(i - iL) * (limRange - 1 - iL), 1);
	 }
      }
   }

   /* normalizing gradient 
   normalize(grad, numberPar * limRange);*/
   /* freeing memory */
   free1float(gradPart);
}
Ejemplo n.º 20
0
int run_nothing(void *_p, unsigned long long *data, int *data_len) {
  struct elim_params *p = (struct elim_params *)_p;

  mzd_t *A = mzd_init(p->m, p->n);

  if(p->r != 0) {
    mzd_t *L, *U;
    L = mzd_init(p->m, p->m);
    U = mzd_init(p->m, p->n);
    mzd_randomize(U);
    mzd_randomize(L);
    for (rci_t i = 0; i < p->m; ++i) {

      for (rci_t j = i + 1; j < p->m; j+=m4ri_radix) {
        int const length = MIN(m4ri_radix, p->m - j);
        mzd_clear_bits(L, i, j, length);
      }
      mzd_write_bit(L,i,i, 1);

      for (rci_t j = 0; j < i && j <p->n; j+=m4ri_radix) {
        int const length = MIN(m4ri_radix, i - j);
        mzd_clear_bits(U, i, j, length);
      }
      if(i < p->r) {
        mzd_write_bit(U, i, i, 1);
      } else {
        for (rci_t j = i; j < p->n; j+=m4ri_radix) {
          int const length = MIN(m4ri_radix, p->n - j);
          mzd_clear_bits(U, i, j, length);
        }
      }
    }
    mzd_mul(A,L,U,0);
    mzd_free(L);
    mzd_free(U);
  } else {
    mzd_randomize(A);
  }

#ifndef HAVE_LIBPAPI
  *data_len = 2;
#else
  *data_len = MIN(papi_array_len + 1, *data_len);
#endif
  int papi_res;

#ifndef HAVE_LIBPAPI
  data[0] = walltime(0);
  data[1] = cpucycles();
#else
  int array_len = *data_len - 1;
  unsigned long long t0 = PAPI_get_virt_usec();
  papi_res = PAPI_start_counters((int*)papi_events, array_len);
  if(papi_res)
    m4ri_die("");
#endif

#ifndef HAVE_LIBPAPI
  data[1] = cpucycles() - data[1];
  data[0] = walltime(data[0]);
#else
  PAPI_stop_counters((long long*)&data[1], array_len);
  t0 = PAPI_get_virt_usec() - t0;
  data[0] = t0;
  for (int nv = 0; nv <= array_len; ++nv) {
    if (data[nv] < loop_calibration[nv])
      loop_calibration[nv] = data[nv];
  }
#endif

  mzd_free(A);

  return (0);
}
Ejemplo n.º 21
0
int main(int argc, char* argv[])
{
    bool mig, sub;
    int it, nt, ix, nx, iz, nz, nx2, nz2, nzx, nzx2, ih, nh, nh2;
    int im, i, j, m2, it1, it2, its, ikz, ikx, ikh, n2, nk, snap;
    float dt, dx, dz, c, old, dh;
    float *curr, *prev, **img, **dat, **lft, **rht, **wave;
    sf_complex *cwave, *cwavem;
    sf_file data, image, left, right, snaps;

    /*MPI related*/
    int cpuid,numprocs;
    int provided;
    int n_local, o_local, nz_local;
    int ozx2;
    float *sendbuf, *recvbuf, *wave_all;
    int *rcounts, *displs;

    /*wall time*/
    double startTime, elapsedTime;
    double clockZero = 0.0;

    MPI_Init_thread(&argc,&argv,MPI_THREAD_FUNNELED,&provided);
    threads_ok = provided >= MPI_THREAD_FUNNELED;

    sf_init(argc,argv);

    MPI_Comm_rank(MPI_COMM_WORLD, &cpuid);
    MPI_Comm_size(MPI_COMM_WORLD, &numprocs);

    if (!sf_getbool("mig",&mig)) mig=false;
    /* if n, modeling; if y, migration */

    if (!sf_getint("snap",&snap)) snap=0;
    /* interval for snapshots */

    snaps = (snap > 0)? sf_output("snaps"): NULL;
    /* (optional) snapshot file */

    if (mig) { /* migration */
	data = sf_input("input");
	image = sf_output("output");

	if (!sf_histint(data,"n1",&nh)) sf_error("No n1=");
	if (!sf_histfloat(data,"d1",&dh)) sf_error("No d1=");

	if (!sf_histint(data,"n2",&nx)) sf_error("No n2=");
	if (!sf_histfloat(data,"d2",&dx)) sf_error("No d2=");

	if (!sf_histint(data,"n3",&nt)) sf_error("No n3=");
	if (!sf_histfloat(data,"d3",&dt)) sf_error("No d3=");

	if (!sf_getint("nz",&nz)) sf_error("Need nz=");
	/* time samples (if migration) */
	if (!sf_getfloat("dz",&dz)) sf_error("Need dz=");
	/* time sampling (if migration) */
        
        if (cpuid==0) {
	sf_putint(image,"o1",0.);
	sf_putint(image,"n1",nz);
	sf_putfloat(image,"d1",dz);
	sf_putstring(image,"label1","Depth");
	sf_putint(image,"o2",0.);
	sf_putint(image,"n2",nx);
	sf_putfloat(image,"d2",dx);
	sf_putstring(image,"label2","Midpoint");
	sf_putint(image,"n3",1); /* stack for now */
        }
    } else { /* modeling */
	image = sf_input("input");
	data = sf_output("output");

	if (!sf_histint(image,"n1",&nz)) sf_error("No n1=");
	if (!sf_histfloat(image,"d1",&dz)) sf_error("No d1=");

	if (!sf_histint(image,"n2",&nx)) sf_error("No n2=");
	if (!sf_histfloat(image,"d2",&dx)) sf_error("No d2=");

	if (!sf_getint("nt",&nt)) sf_error("Need nt=");
	/* time samples (if modeling) */
	if (!sf_getfloat("dt",&dt)) sf_error("Need dt=");
	/* time sampling (if modeling) */

	if (!sf_getint("nh",&nh)) sf_error("Need nh=");
        /* offset samples (if modeling) */
	if (!sf_getfloat("dh",&dh)) sf_error("Need dh=");
	/* offset sampling (if modeling) */

        if (cpuid==0) {
	sf_putint(data,"n1",nh);
	sf_putfloat(data,"d1",dh);
	sf_putstring(data,"label1","Half-Offset");
	sf_putint(data,"o2",0.);
	sf_putint(data,"n2",nx);
	sf_putfloat(data,"d2",dx);
	sf_putstring(data,"label2","Midpoint");
	sf_putint(data,"n3",nt);
	sf_putfloat(data,"d3",dt);
	sf_putstring(data,"label3","Time");
	sf_putstring(data,"unit3","s");
        }
    }

    if (cpuid==0) {
    if (NULL != snaps) {
      sf_putint(snaps,"n1",nh);
      sf_putfloat(snaps,"d1",dh);
      sf_putstring(snaps,"label1","Half-Offset");

      sf_putint(snaps,"n2",nx);
      sf_putfloat(snaps,"d2",dx);
      sf_putstring(snaps,"label2","Midpoint");

      sf_putint(snaps,"n3",nz);
      sf_putfloat(snaps,"d3",dz);
      sf_putstring(snaps,"label3","Depth");

      sf_putint(snaps,"n4",nt/snap);
      sf_putfloat(snaps,"d4",dt*snap);
      if (mig) {
        sf_putfloat(snaps,"o4",(nt-1)*dt);
      } else {
        sf_putfloat(snaps,"o4",0.);
      }
      sf_putstring(snaps,"label4","Time");
    }
    }

    /* Mark the starting time. */
    startTime = walltime( &clockZero );

    nk = mcfft3_init(1,nh,nx,nz,&nh2,&nx2,&nz2,&n_local,&o_local);
    nz_local = (n_local < nz-o_local)? n_local:nz-o_local;
    sf_warning("Cpuid=%d,n2=%d,n1=%d,n0=%d,local_n0=%d,local_0_start=%d,nz_local=%d",cpuid,nh2,nx2,nz2,n_local,o_local,nz_local);
    if (cpuid==0)
      if (o_local!=0) sf_error("Cpuid and o_local inconsistant!");

    nzx = nz*nx*nh;
    //nzx2 = nz2*nx2*nh2;
    nzx2 = n_local*nx2*nh2;
    ozx2 = o_local*nx2*nh2;

    img = sf_floatalloc2(nz,nx);
    dat = sf_floatalloc2(nh,nx);

    /* propagator matrices */
    left = sf_input("left");
    right = sf_input("right");

    if (!sf_histbool(left,"sub",&sub) && !sf_getbool("sub",&sub)) sub=true;
    /* if -1 is included in the matrix */

    if (!sf_histint(left,"n1",&n2) || n2 != nzx) sf_error("Need n1=%d in left",nzx);
    if (!sf_histint(left,"n2",&m2))  sf_error("No n2= in left");
    
    if (!sf_histint(right,"n1",&n2) || n2 != m2) sf_error("Need n1=%d in right",m2);
    if (!sf_histint(right,"n2",&n2) || n2 != nk) sf_error("Need n2=%d in right",nk);
 
    lft = sf_floatalloc2(nzx,m2);
    rht = sf_floatalloc2(m2,nk);

    sf_floatread(lft[0],nzx*m2,left);
    sf_floatread(rht[0],m2*nk,right);

    curr = sf_floatalloc(nzx2);
    prev = sf_floatalloc(nzx2);

    cwave  = sf_complexalloc(nk);
    cwavem = sf_complexalloc(nk);
    wave = sf_floatalloc2(nzx2,m2);

#ifdef _OPENMP
#pragma omp parallel for default(shared) private(iz)
#endif
    for (iz=0; iz < nzx2; iz++) {
	curr[iz]=0.;
	prev[iz]=0.;
    }

    sendbuf = prev;
    if (cpuid==0) {
      wave_all = sf_floatalloc(nh2*nx2*nz2);
      recvbuf = wave_all;
      rcounts = sf_intalloc(numprocs);
      displs  = sf_intalloc(numprocs);
    } else {
      wave_all = NULL;
      recvbuf = NULL;
      rcounts = NULL;
      displs = NULL;
    }

    MPI_Gather(&nzx2, 1, MPI_INT, rcounts, 1, MPI_INT, 0, MPI_COMM_WORLD);
    MPI_Gather(&ozx2, 1, MPI_INT, displs, 1, MPI_INT, 0, MPI_COMM_WORLD);

    if (mig) { /* migration */
	/* step backward in time */
	it1 = nt-1;
	it2 = -1;
	its = -1;	
    } else { /* modeling */
	sf_floatread(img[0],nz*nx,image);

	/* transpose and initialize at zero offset */
#ifdef _OPENMP
#pragma omp parallel for default(shared) private(iz,ix)
#endif
	for (iz=0; iz < nz_local; iz++) {
	    for (ix=0; ix < nx; ix++) {
		curr[nh2*(ix+iz*nx2)]=img[ix][iz+o_local];
	    }
	}
	
	/* step forward in time */
	it1 = 0;
	it2 = nt;
	its = +1;
    }

    /* time stepping */
    for (it=it1; it != it2; it += its) {
	sf_warning("it=%d;",it);

	if (mig) { /* migration <- read data */
	    sf_floatread(dat[0],nx*nh,data);
	} else {
#ifdef _OPENMP
#pragma omp parallel for default(shared) private(ix,ih)
#endif
	    for (ix=0; ix < nx; ix++) {
		for (ih=0; ih < nh; ih++) {
		    dat[ix][ih] = 0.;
		}
	    }
	}
	
	if (NULL != snaps && 0 == it%snap) {
          MPI_Gatherv(sendbuf, nzx2, MPI_FLOAT, recvbuf, rcounts, displs, MPI_FLOAT, 0, MPI_COMM_WORLD);
          if (cpuid==0) {
            for (iz = 0; iz < nz; iz++)
              for (ix = 0; ix < nx; ix++)
                sf_floatwrite(wave_all+nh2*(ix+nx2*iz),nh,snaps);
          }
        }

	/* at z=0 */
        if (cpuid==0) {
#ifdef _OPENMP
#pragma omp parallel for default(shared) private(ix,ih)
#endif
	for (ix=0; ix < nx; ix++) {
	    for (ih=0; ih < nh; ih++) {
		if (mig) {
		    curr[ix*nh2+ih] += dat[ix][ih];
		} else {
		    dat[ix][ih] = curr[ix*nh2+ih];
		}
	    }
	}
        }

	/* matrix multiplication */
	mcfft3(curr,cwave);

	for (im = 0; im < m2; im++) {
          //for (ik = 0; ik < nk; ik++) {
#ifdef _OPENMP
#pragma omp parallel for default(shared) private(ikz,ikx,ikh,i,j)
#endif
          for (ikz = 0; ikz < n_local; ikz++) {
            for (ikx = 0; ikx < nx2; ikx++) {
              for (ikh = 0; ikh < nh2; ikh++) {
                i = ikh + ikx*nh2 + (o_local+ikz)*nx2*nh2;
                j = ikh + ikx*nh2 + ikz*nx2*nh2;
#ifdef SF_HAS_COMPLEX_H
		cwavem[j] = cwave[j]*rht[i][im];
#else
		cwavem[j] = sf_crmul(cwave[j],rht[i][im]);
#endif
              }
            }
          }
          imcfft3(wave[im],cwavem);
	}

#ifdef _OPENMP
#pragma omp parallel for default(shared) private(ix,iz,ih,i,j,im,old,c)
#endif
        for (iz=0; iz < nz_local; iz++) {
	    for (ix = 0; ix < nx; ix++) {
		for (ih=0; ih < nh; ih++) {	
                    i = ih + ix*nh + (o_local+iz)*nx*nh;  /* original grid */
                    j = ih + ix*nh2+ iz*nx2*nh2; /* padded grid */
		
		    old = curr[j];

		    c = sub? 2*old: 0.0f;

		    c -= prev[j];

		    prev[j] = old;

		    for (im = 0; im < m2; im++) {
			c += lft[im][i]*wave[im][j];
		    }
		    
		    curr[j] = c;
		}
	    }
	}
	
	if (!mig) { /* modeling -> write out data */
          if (cpuid==0)
	    sf_floatwrite(dat[0],nx*nh,data);
	}
    }
    sf_warning(".");

    if (mig) {
      sendbuf = curr;
      MPI_Gatherv(sendbuf, nzx2, MPI_FLOAT, recvbuf, rcounts, displs, MPI_FLOAT, 0, MPI_COMM_WORLD);

      if (cpuid==0) {
        /* transpose */
#ifdef _OPENMP
#pragma omp parallel for default(shared) private(iz,ix)
#endif
        for (iz=0; iz < nz; iz++) {
          for (ix=0; ix < nx; ix++) {
            img[ix][iz] = wave_all[nh2*(ix+iz*nx2)];
          }
        }
	sf_floatwrite(img[0],nz*nx,image);
      }

    }

    mcfft3_finalize();

    /* Work's done. Get the elapsed wall time. */
    elapsedTime = walltime( &startTime );
    /* Print the wall time and terminate. */
    if (cpuid==0)
      printf("\nwall time = %.5fs\n", elapsedTime);

    MPI_Finalize();

    exit(0);
}
Ejemplo n.º 22
0
Archivo: matrix.c Proyecto: ederc/gb
ri_t reduce_gbla_matrix_keep_A(mat_t *mat, int verbose, int nthreads)
{
  /*  timing structs */
  struct timeval t_load_start;
  struct timeval t_complete;

  if (verbose > 2)
    gettimeofday(&t_complete, NULL);
  /* A^-1 * B */
  if (verbose > 2) {
    printf("---------------------------------------------------------------------------\n");
    printf("GBLA Matrix Reduction\n");
    printf("---------------------------------------------------------------------------\n");
    gettimeofday(&t_load_start, NULL);
    printf("%-38s","Storing A in C ...");
    fflush(stdout);
  }

  if (mat->AR->row != NULL) {
    if (elim_fl_C_sparse_dense_keep_A(mat->CR, &(mat->AR), mat->mod, nthreads)) {
      printf("Error while reducing A.\n");
      return 1;
    }
  }
  if (verbose > 2) {
    printf("%9.3f sec\n",
        walltime(t_load_start) / (1000000));
  }
  if (verbose > 3) {
    print_mem_usage();
  }
  /* reducing submatrix C to zero using methods of Faugère & Lachartre */
  if (verbose > 2) {
    gettimeofday(&t_load_start, NULL);
    printf("%-38s","Copying C to sparse block representation ...");
    fflush(stdout);
  }
  if (mat->CR->row != NULL) {
    mat->C  = copy_sparse_to_block_matrix(mat->CR, nthreads);
    free_sparse_matrix(&(mat->CR), nthreads);
  }
  if (verbose > 2) {
    printf("%9.3f sec\n",
        walltime(t_load_start) / (1000000));
  }
  if (verbose > 3) {
    print_mem_usage();
  }
  if (verbose > 2) {
    printf("%-38s","Reducing C to zero ...");
    fflush(stdout);
  }
  if (mat->C != NULL) {
    if (elim_fl_C_sparse_dense_block(mat->B, &(mat->C), mat->D, mat->mod, nthreads)) {
      printf("Error while reducing A.\n");
      return 1;
    }
  }
  if (verbose > 2) {
    printf("%9.3f sec\n",
        walltime(t_load_start) / (1000000));
  }
  if (verbose > 3) {
    print_mem_usage();
  }
  /* copy block D to dense wide (re_l_t) representation */
  mat->DR = copy_block_to_dense_matrix(&(mat->D), nthreads, 1);
  mat->DR->mod  = mat->mod;

  /* eliminate mat->DR using a structured Gaussian Elimination process on the rows */
  nelts_t rank_D = 0;
  /* echelonizing D to zero using methods of Faugère & Lachartre */
  if (verbose > 2) {
    gettimeofday(&t_load_start, NULL);
    printf("%-38s","Reducing D ...");
    fflush(stdout);
  }
  if (mat->DR->nrows > 0)
    /* rank_D = elim_fl_dense_D(mat->DR, nthreads); */
    rank_D = elim_fl_dense_D_completely(mat->DR, nthreads);
  if (verbose > 2) {
    printf("%9.3f sec %5d %5d %5d\n",
        walltime(t_load_start) / (1000000), rank_D, mat->DR->nrows - rank_D, mat->DR->nrows);
  }
  if (verbose > 3) {
    print_mem_usage();
  }
  if (verbose > 2) {
    printf("---------------------------------------------------------------------------\n");
    printf("%-38s","Reduction completed ...");
    fflush(stdout);
    printf("%9.3f sec\n",
        walltime(t_complete) / (1000000));
    if (verbose > 3)
      print_mem_usage();
  }

  return rank_D;
}
Ejemplo n.º 23
0
float modeling()
{
   /* declaration of variables */
   FILE *fp;                       /* to report results */
   int iF, iF1, iR, offset, iT1, iT2, iS, iProc, i, k;
                                   /* counters */
   int wL;                         /* window length */
   int die;                        /* die processor flag */
   int FReceived;                  /* number of frequencies processed */
   int apl_pid;                    /* PVM process id control */
   int pid;                        /* process id */
   int processControl;             /* monitoring PVM start */
   int FInfo[2];                   /* frequency delimiters */
   float wallcpu;                  /* wall clock time */
   float oF;                       /* value of the objective function */
   float residue;                  /* data residue */
   float wdw;                      /* windowing purposes */
   float *buffer, *bufferRCD;      /* auxiliary buffers */
                                   /* upgoing waves */
   complex **dataS;                /* synthethics in the frequency domain */
   complex *bufferC;               /* auxiliary buffer */
   complex **freqPart;             /* frequency arrays sent by the slaves */
   
   /* Clean up log files */
   CleanLog();

   /* Reseting synchronization flags */
   for (i = 0; i < nFreqPart; i++)
   {
      statusFreq[i][2] = 0;
   }
    
   /* allocating some memory */
   dataS = alloc2complex(info->nF, info->nR);
   buffer = alloc1float(info->nSamples);
   bufferRCD = alloc1float(info->nSamples);
   bufferC = alloc1complex(info->nSamples / 2 + 1);
   freqPart = alloc2complex(info->nFreqProc, info->nR);

   /* reseting */
   for (iF = 0; iF < info->nSamples / 2 + 1; iF++)
      bufferC[iF] = zeroC;
   for (iS = 0; iS < info->nSamples; iS++)
   {
      buffer[iS] = 0; bufferRCD[iS] = 0;
   }

   /* DD 
   fprintf(stderr, "nF -> %d\n", info->nF);*/
   fprintf(stderr, "Starting communication with PVM for modeling\n");

   /* starting communication with PVM */
   if ((apl_pid = pvm_mytid()) < 0) 
   {
      pvm_perror("Error enrolling master process");
      exit(-1);
   }
   processControl = CreateSlaves(processes, PROCESS_MODELING, nProc);
   
   if (processControl != nProc)
   {
      fprintf(stderr,"Problem starting PVM daemons\n");
      exit(-1);
   }

   /* converting to velocities */
   if (IMPEDANCE)
   {
      for (i = 0; i < info->nL + 1; i++)
      {
         alpha[i] /= rho[i];
         beta[i] /= rho[i];
      }
   }
   
   /* Broadcasting all processes common information */
   BroadINFO(info, 1, processes, nProc, GENERAL_INFORMATION);
   
   /* sending all profiles */
   BroadFloat(thick, info->nL + 1, processes, nProc, THICKNESS);
   BroadFloat(rho, info->nL + 1, processes, nProc, DENSITY);
   BroadFloat(alpha, info->nL + 1, processes, nProc, ALPHAS);
   BroadFloat(qP, info->nL + 1, processes, nProc, QALPHA);
   BroadFloat(beta, info->nL + 1, processes, nProc, BETAS);
   BroadFloat(qS, info->nL + 1, processes, nProc, QBETA);
   
   /* sending frequency partitions for each process */
   for (iProc = 0; iProc < nProc; iProc++)
   {
      FInfo[0] = statusFreq[iProc][0];
      FInfo[1] = statusFreq[iProc][1];

      if (info->verbose)
	 fprintf(stderr, 
		 "Master sending frequencies [%d, %d] out of %d to slave Modeling %d [id:%d]\n", FInfo[0], FInfo[1], info->nF, iProc, processes[iProc]);
      
      procInfo[iProc][0] = FInfo[0];
      procInfo[iProc][1] = FInfo[1];
      SendInt(FInfo, 2, processes[iProc], FREQUENCY_LIMITS);
      statusFreq[iProc][2] = 1;
   }

   /* waiting modelled frequencies */
   /* master process will send more frequencies if there's more work to do */
   /* measuring elapsed time */
   wallcpu = walltime();

   /* reseting frequency counter */
   FReceived = 0;

   while (FOREVER)
   {
      pid = RecvCplx(freqPart[0], info->nR * info->nFreqProc, -1, 
		     FREQUENCY_PARTITION);

      /* finding the frequency limits of this process */
      /* DD 
      fprintf(stderr, "Master finding the frequency limits of this process\n");
      */

      iProc = 0;
      while (pid != processes[iProc])
	 iProc++;

      /* DD 
      fprintf(stderr, "iProc %d pid %d\n", iProc, pid);*/

      /* copying into proper place of the total frequency array */
      for (iR = 0; iR < info->nR; iR++)
      {
	 for (k = 0, i = procInfo[iProc][0]; i <= procInfo[iProc][1]; i++, k++)
	 {
	    dataS[iR][i - initF] = freqPart[iR][k];
	 }
      }
      
      /* summing frequencies that are done */
      FReceived += procInfo[iProc][1] - procInfo[iProc][0] + 1;

      if (info->verbose)
	 fprintf(stderr, "Master received %d frequencies, remaining %d\n", 
		 FReceived, info->nF - FReceived);

      /* defining new frequency limits */
      i = 0;
      while (i < nFreqPart && statusFreq[i][2])
	 i++;

      /* DD 
      fprintf(stderr, "i %d nFreqPart %d\n", i, nFreqPart);*/
      
      if (i < nFreqPart)
      {
	 /* there is still more work to be done */
	 /* tell this process to not die */
	 die = 0;
	 SendInt(&die, 1, processes[iProc], DIE);
	 FInfo[0] = statusFreq[i][0];
	 FInfo[1] = statusFreq[i][1];

	 if (info->verbose)
	    fprintf(stderr, "Master sending frequencies [%d, %d] to slave %d\n", FInfo[0], FInfo[1], processes[iProc]);

	 procInfo[iProc][0] = FInfo[0];
	 procInfo[iProc][1] = FInfo[1];
	 SendInt(FInfo, 2, processes[iProc], FREQUENCY_LIMITS);
	 statusFreq[i][2] = 1;
      }
      else
      {
	 /* tell this process to die since there is no more work to do */
	 if (info->verbose)
	    fprintf(stderr, "Master ''killing'' slave %d\n", processes[iProc]);
	 die = 1;
	 SendInt(&die, 1, processes[iProc], DIE);
      }

      /* a check to get out the loop */
      if (FReceived >= info->nF) break;
   }
   
   /* quitting PVM */
   EndOfMaster();
   
   /* getting elapsed time */
   wallcpu = walltime() - wallcpu;
   fprintf(stderr, "Modeling wall clock time = %f seconds\n", 
	   wallcpu);
     
   /* back to impedances*/
   if (IMPEDANCE)
   {
      for (i = 0; i < info->nL + 1; i++)
      {
         alpha[i] *= rho[i];
         beta[i] *= rho[i];
      }
   }

   /* computing the objective function for the time window */
   for (oF = 0, residue = 0, iR = 0; iR < info->nR; iR++)
   {
      /* windowing as it was done to the input data */
      iT1 = NINT(info->f1 / info->dF);
      iT2 = NINT(info->f2 / info->dF);
      wL = info->nF * PERC_WINDOW / 2;
      wL = 2 * wL + 1;
      for (iS = 0, iF = 0; iF < info->nSamples / 2 + 1; iF++)
      {
	 if (iF < iT1 || iF >= iT2)
         {
            bufferC[iF] = cmplx(0, 0);
         }
         else if (iF - iT1 < (wL - 1) / 2)
         {
            wdw = .42 - .5 * cos(2 * PI * (float) iS / ((float) (wL - 1))) +
                  .08 * cos(4 * PI * (float) iS / ((float) (wL - 1)));
	    bufferC[iF].r = dataS[iR][iF - iT1].r * wdw;
	    bufferC[iF].i = dataS[iR][iF - iT1].i * wdw;
            iS++;
         }
         else if (iF - iT1 >= info->nF - (wL - 1) / 2)
         {
            iS++;
            wdw = .42 - .5 * cos(2 * PI * (float) iS / ((float) (wL - 1))) +
                  .08 * cos(4 * PI * (float) iS / ((float) (wL - 1)));
	    bufferC[iF].r = dataS[iR][iF - iT1].r * wdw;
	    bufferC[iF].i = dataS[iR][iF - iT1].i * wdw;
         }
	 else
	 {
	    bufferC[iF] = dataS[iR][iF - iT1];
	 }
      }
      
      /* going to time domain */
      /* DD 
      fprintf(stderr, "going to time domain \n");*/

      pfacr(1, info->nSamples, bufferC, buffer);

      /* muting ? */
      if (MUTE)
      {
         for (iS = 0; iS <= NINT(t1Mute[iR] / dt); iS++)
         {
	    buffer[iS] = 0;
         }
      }

      /* and computing data misfit and likelihood function */
      iS = NINT(t1 / dt);
      for (iT1 = 0; iT1 < nDM; iT1++)
      {
	 bufferRCD[iT1 + iS] = 0;

	 for (offset = iT1, iT2 = 0; iT2 < nDM; iT2++)
	 {
	    bufferRCD[iT1 + iS] +=  
	                   (buffer[iT2 + iS] - dataObs[iR][iT2]) * CD[offset];
	    offset += MAX(SGN0(iT1 - iT2) * (nDM - 1 - iT2), 1);
	 }
	 oF += (buffer[iT1 + iS] - dataObs[iR][iT1]) * bufferRCD[iT1 + iS];

	 residue += (buffer[iT1 + iS] - dataObs[iR][iT1]) * 
                    (buffer[iT1 + iS] - dataObs[iR][iT1]);

	 /* DD 
	 fprintf(stdout, "%d %f %f %f %f %f %d %f %f\n", 
		 nTotalSamples, oF, dt, auxm1, 
		 info->tau, residue, iT1, buffer[iT1], 
		 dataObs[iR][iT1 - NINT(t1 / dt)]); */
      }

      /* windowing bufferRCD */
      iT1 = NINT(t1 / dt);
      iT2 = NINT(t2 / dt);
      wL = nDM * PERC_WINDOW / 2;
      wL = 2 * wL + 1;
      for (iS = 0, iF = 0; iF < info->nSamples; iF++)
      {
         if (iF < iT1 || iF >= iT2)
         {
            bufferRCD[iF] = 0;
         }
	 else if (iF - iT1 < (wL - 1) / 2)
         {
            wdw =
               .42 - .5 * cos(2 * PI * (float) iS / ((float) (wL - 1))) +
                  .08 * cos(4 * PI * (float) iS / ((float) (wL - 1)));
            bufferRCD[iF] *= wdw;
            iS++;
         }
         else if (iF - iT1 >= nDM - (wL - 1) / 2)
         {
            iS++;
            wdw =
               .42 - .5 * cos(2 * PI * (float) iS / ((float) (wL - 1))) +
                  .08 * cos(4 * PI * (float) iS / ((float) (wL - 1)));
            bufferRCD[iF] *= wdw;
         }
      }
      
      /* going back to Fourier domain */
      pfarc(-1, info->nSamples, bufferRCD, bufferC);          
      
      for (iF1 = 0, iF = NINT(info->f1 / info->dF); 
	   iF <= NINT(info->f2 / info->dF); iF++, iF1++)
      {
	 resCD[iR][iF1] = bufferC[iF];
      }
   }

   /* considering the .5 factor of the exponent of the Gaussian */
   /* and normalizing the likelihood by the number of samples */
   oF /= (2 * nTotalSamples);

   /* freeing some memory */
   /* allocating some memory */
   free2complex(dataS);
   free1float(buffer);
   free1float(bufferRCD);
   free1complex(bufferC);
   free2complex(freqPart);

   /* considering the regularizaton or model covariance term */
   if (PRIOR)
   {
      auxm1 = 1. / (float) (numberPar * limRange);     /* normalization */
      for (auxm2 = 0, iF = 0; iF < limRange; iF++)
      {
	 for (offset = iF, iF1 = 0; iF1 < limRange; iF1++)
	 {
	    if (vpFrechet)
	    {
	       auxm2 += (alpha[iF + lim[0]] - alphaMean[iF + lim[0]]) * 
		         CMvP[offset] * auxm1 * 
		        (alpha[iF1 + lim[0]] - alphaMean[iF1 + lim[0]]);
	    }
	    
	    if (vsFrechet)
	    {
	       auxm2 += (beta[iF + lim[0]] - betaMean[iF + lim[0]]) * 
	                 CMvS[offset] * auxm1 *
		        (beta[iF1 + lim[0]] - betaMean[iF1 + lim[0]]);
	    }
	    
	    if (rhoFrechet)
	    {
	       auxm2 += (rho[iF + lim[0]] - rhoMean[iF + lim[0]]) * 
		         CMrho[offset] * auxm1 *
		        (rho[iF1 + lim[0]] - rhoMean[iF1 + lim[0]]);
	    }
	    offset += MAX(SGN0(iF - iF1) * (limRange - 1 - iF1), 1);
	 }
      }
   }
   /* getting normalization factor */
   fp = fopen("report", "a");
   fprintf(fp,"-----------------------\n");

   if (modCount == 0) 
   {
      oFNorm = oF;
      fprintf(fp,">> Normalization constant for objective function: %f <<\n",
	      oFNorm);
   }
   
   /* normalizing residue */
   residue /= (nTotalSamples);

   if (!DATACOV && noiseVar == 0) noiseVar = residue / 10.;
   
   if (PRIOR)
   {
      fprintf(fp,
      "residue at iteration [%d] : Data residue variance %f , Noise variance %f , Likelihood %f , Prior %f\n", 
      modCount, residue, noiseVar, oF / oFNorm, auxm2 / oFNorm);
   }
   else
   {
      fprintf(fp,"residue at iteration [%d] : Data residue variance %f , Noise variance %f , Likelihood %f , No Prior\n", modCount, residue, noiseVar, oF / oFNorm);
   }

   /* checking if we reached noise variance with the data residue */
   if (residue / noiseVar <= 1)
   {
      /* DATA IS FIT, stop the procedure */
      fprintf(fp, "[][][][][][][][][][][][][][][][][][][][]\n");
      fprintf(fp, "DATA WAS FIT UP TO 1 VARIANCE!\n");
      fprintf(fp, "[][][][][][][][][][][][][][][][][][][][]\n");
      exit(0);
   }
   
   /* adding Likelihood and Prior */
   if (PRIOR) oF += auxm2 / 2;
   fprintf(fp,"TOTAL residue at iteration [%d] : %f\n", 
	   modCount, oF / oFNorm);

   fprintf(fp,"-----------------------\n");
   fclose(fp);


   /* returning objective function value */
   return(oF / oFNorm);
}
Ejemplo n.º 24
0
int main(int argc, char *argv[]) {
  char tbuf[100];

  initialize_mycallable();
  initialize_mycaller();

  void *obj_intern = get_f_callable_intern();
  void *obj_key = get_f_callable_key();
  printf("Direct result: %f\n", func(2.0));
  printf("Intern result: %f\n", docall_intern(obj_intern, 2.0));
  printf("Key result: %f\n", docall_key(obj_key, 2.0));
  printf("Intern+getfunc result: %f\n", docall_getfunc_intern(obj_intern, 2.0));
  printf("Key+getfunc result: %f\n", docall_getfunc_key(obj_key, 2.0));


  double s = 0;
    {
    double times[K];
    for (int k = 0; k != K; ++k) {
      double t0 = walltime();
      for (int i = 0; i != J; i++) {
        s += func(2.0);
      }
      times[k] = walltime() - t0;
    }
    snftime(tbuf, 100, arrmin(times, K) / (double)J);
    printf("Direct took %s\n", tbuf);
  }

  {
    double times[K];
    for (int k = 0; k != K; ++k) {
      double t0 = walltime();
      for (int i = 0; i != J; i++) {
        s += docall_dispatch(&func, 2.0);
      }
      times[k] = walltime() - t0;
    }
    snftime(tbuf, 100, arrmin(times, K) / (double)J);
    printf("Dispatch took %s\n", tbuf);
    }

  {
    double times[K];
    for (int k = 0; k != K; ++k) {
      double t0 = walltime();
      for (int i = 0; i != J; i++) {
        s += docall_intern(obj_intern, 2.0);
      }
      times[k] = walltime() - t0;
    }
    snftime(tbuf, 100, arrmin(times, K) / (double)J);
    printf("Intern method took %s\n", tbuf);
  }

  {
    double times[K];
    for (int k = 0; k != K; ++k) {
      double t0 = walltime();
      for (int i = 0; i != J; i++) {
        s += docall_key(obj_key, 2.0);
      }
      times[k] = walltime() - t0;
    }
    snftime(tbuf, 100, arrmin(times, K) / (double)J);
    printf("Key method took %s\n", tbuf);
    }

  {
    double times[K];
    for (int k = 0; k != K; ++k) {
      double t0 = walltime();
      for (int i = 0; i != J; i++) {
        s += docall_getfunc_intern(obj_intern, 2.0);
      }
      times[k] = walltime() - t0;
    }
    snftime(tbuf, 100, arrmin(times, K) / (double)J);
    printf("Intern+getfunc method took %s\n", tbuf);
  }

  {
    double times[K];
    for (int k = 0; k != K; ++k) {
      double t0 = walltime();
      for (int i = 0; i != J; i++) {
        s += docall_getfunc_key(obj_key, 2.0);
      }
      times[k] = walltime() - t0;
    }
    snftime(tbuf, 100, arrmin(times, K) / (double)J);
    printf("Key+getfunc method took %s\n", tbuf);
  }

  printf("s: %f\n", s);
  return 0;
}
Ejemplo n.º 25
0
int main(int argc, char* argv[]) {
  init(&argc, &argv);
  run([]{
    
    double t;
    
    TupleGraph tg;
    
    GRAPPA_TIME_REGION(tuple_time) {
      if (FLAGS_path.empty()) {
        int64_t NE = (1L << FLAGS_scale) * FLAGS_edgefactor;
        tg = TupleGraph::Kronecker(FLAGS_scale, NE, 111, 222);
      } else {
        LOG(INFO) << "loading " << FLAGS_path;
        tg = TupleGraph::Load(FLAGS_path, FLAGS_format);
      }
    }
    LOG(INFO) << tuple_time;
    LOG(INFO) << "constructing graph";
    t = walltime();
    
    auto g = G::create(tg);
    
    construction_time = walltime()-t;
    LOG(INFO) << construction_time;
    
    count = 0;
    forall(masters(g), [](G::Vertex& v){
      count += v.n_out;
    });
    CHECK_EQ(count, g->ne);
    
    Metrics::start_tracing();
    
    for (int i = 0; i < FLAGS_trials; i++) {
      if (FLAGS_trials > 1) LOG(INFO) << "trial " << i;
      
      forall(g, [](G::Vertex& v){ v->rank = 1.0; });
      
      GRAPPA_TIME_REGION(total_time) {
        activate_all(g);
        GraphlabEngine<G,PagerankVertexProgram>::run_sync(g);
      }
      
      if (i == 0) {
        total_time.reset(); // don't count the first one
        total_rank = 0;
        forall(g, [](G::Vertex& v){ total_rank += v->rank; });
        std::cerr << "total_rank: " << total_rank << "\n";
      }      
    }
    
    Metrics::stop_tracing();
    
    LOG(INFO) << total_time;
    
    total_rank = 0;
    forall(masters(g), [](G::Vertex& v){ total_rank += v->rank; });
    LOG(INFO) << "total_rank: " << total_rank << "\n";
    
  });
Ejemplo n.º 26
0
Archivo: matrix.c Proyecto: ederc/gb
ri_t reduce_gbla_matrix(mat_t * mat, int verbose, int nthreads)
{
  /*  timing structs */
  struct timeval t_load_start;
  struct timeval t_complete;
  if (verbose > 2)
    gettimeofday(&t_complete, NULL);
  /* A^-1 * B */
  if (verbose > 2) {
    printf("---------------------------------------------------------------------------\n");
    printf("GBLA Matrix Reduction\n");
    printf("---------------------------------------------------------------------------\n");
    gettimeofday(&t_load_start, NULL);
    printf("%-38s","Reducing A ...");
    fflush(stdout);
  }
  if (mat->A->blocks != NULL) {
    if (elim_fl_A_sparse_dense_block(&(mat->A), mat->B, mat->mod, nthreads)) {
      printf("Error while reducing A.\n");
      return 1;
    }
  }
  if (verbose > 2) {
    printf("%9.3f sec\n",
        walltime(t_load_start) / (1000000));
  }
  if (verbose > 3) {
    print_mem_usage();
  }
  /* reducing submatrix C to zero using methods of Faugère & Lachartre */
  if (verbose > 2) {
    gettimeofday(&t_load_start, NULL);
    printf("%-38s","Reducing C ...");
    fflush(stdout);
  }
  if (mat->C->blocks != NULL) {
    if (elim_fl_C_sparse_dense_block(mat->B, &(mat->C), mat->D, mat->mod, nthreads)) {
      printf("Error while reducing C.\n");
      return 1;
    }
  }
  if (verbose > 2) {
    printf("%9.3f sec\n",
        walltime(t_load_start) / (1000000));
  }
  if (verbose > 3) {
    print_mem_usage();
  }
  /* copy block D to dense wide (re_l_t) representation */
  mat->DR = copy_block_to_dense_matrix(&(mat->D), nthreads, 1);
  mat->DR->mod  = mat->mod;
#if 0
  printf("number of rows of DR %u\n", mat->DR->nrows);
  for (int ii=0; ii<mat->DR->nrows; ++ii) {
    printf("ROW %d\n",ii);
    if (mat->DR->row[ii]->init_val == NULL)
      printf("NULL!");
    else {
      printf("%u || ", mat->DR->row[ii]->lead);
      for (int jj=0; jj<mat->DR->ncols; ++jj)
#if defined(GBLA_USE_UINT16) || defined(GBLA_USE_UINT32)
        printf("%u (%u)  ", mat->DR->row[ii]->init_val[jj], jj+mat->ncl);
#else
        printf("%.0f  ", mat->DR->row[ii]->init_val[jj]);
#endif
    }
    printf("\n");
  }
#endif

  /* eliminate mat->DR using a structured Gaussian Elimination process on the rows */
  nelts_t rank_D = 0;
  /* echelonizing D to zero using methods of Faugère & Lachartre */
  if (verbose > 2) {
    gettimeofday(&t_load_start, NULL);
    printf("%-38s","Reducing D ...");
    fflush(stdout);
  }
  if (mat->DR->nrows > 0) {
    if (nthreads == 1) {
      rank_D = elim_fl_dense_D_completely(mat->DR, nthreads);
    } else {
      rank_D = elim_fl_dense_D(mat->DR, nthreads);
      nelts_t l;
      for (l=1; l<mat->DR->rank; ++l) {
      /* for (l=(int)(mat->DR->rank-1); l>0; --l) { */
        copy_piv_to_val(mat->DR, mat->DR->rank-l-1);
        completely_reduce_D(mat->DR, mat->DR->rank-l-1);
      }
    }
  }
  if (verbose > 2) {
    printf("%9.3f sec %5d %5d %5d\n",
        walltime(t_load_start) / (1000000), rank_D, mat->DR->nrows - rank_D, mat->DR->nrows);
  }
  /* if we simplify, then copy B to dense row representation */
  if (mat->sl > 0 && mat->B->blocks != NULL) {
    /* first copy B to BR (dense row format) */
    mat->BR = copy_block_to_dense_matrix(&(mat->B), nthreads, 0);
    mat->BR->mod  = mat->mod;
  }
  if (verbose > 3) {
    print_mem_usage();
  }
  if (verbose > 2) {
    printf("---------------------------------------------------------------------------\n");
    printf("%-38s","Reduction completed ...");
    fflush(stdout);
    printf("%9.3f sec\n",
        walltime(t_complete) / (1000000));
    if (verbose > 3)
      print_mem_usage();
  }

  return rank_D;
}
Ejemplo n.º 27
0
  static void run_sync(GlobalAddress<Graph<V,E>> _g) {
    
    call_on_all_cores([=]{ g = _g; });
    
    ct = 0;
    // initialize GraphlabVertexProgram
    forall(g, [=](Vertex& v){
      v->prog = new VertexProg(v);
      if (prog(v).gather_edges(v)) ct++;
    });
    
    if (ct > 0) {
      forall(g, [=](Vertex& v){
        forall<async>(adj(g,v), [=,&v](Edge& e){
          // gather
          auto delta = prog(v).gather(v, e);

          call<async>(e.ga, [=](Vertex& ve){
            prog(ve).post_delta(delta);
          });
        });
      });
    }
    int iteration = 0;
    size_t active = V::total_active;
    while ( active > 0 && iteration < FLAGS_max_iterations )
        GRAPPA_TIME_REGION(iteration_time) {
      VLOG(1) << "iteration " << std::setw(3) << iteration;
      VLOG(1) << "  active: " << active;

      double t = walltime();
      
      forall(g, [=](Vertex& v){
        if (!v->active) return;
        v->deactivate();

        auto& p = prog(v);

        // apply
        p.apply(v, p.cache);

        v->active_minor_step = p.scatter_edges(v);
      });

      forall(g, [=](Vertex& v){
        if (v->active_minor_step) {
          v->active_minor_step = false;
          auto prog_copy = prog(v);
          // scatter
          forall<async>(adj(g,v), [=](Edge& e){
            _do_scatter(prog_copy, e, &VertexProg::scatter);
          });
        }
      });
  
  {
    symmetric_static std::ofstream myFile;
    //std::ofstream myFile;
    int pid = getpid();
    LOG(INFO) << "start writing file";
    std::string path = NaiveGraphlabEngine<G,VertexProg>::OutputPath;
      //on_all_cores( [pid, iteration, path] {
          std::ostringstream oss;
          oss << OutputPath << "-" << pid << "-" << mycore() << "-" << iteration;
          new (&myFile) std::ofstream(oss.str());
          if (!myFile.is_open()) exit(1);
        //});
      forall(g, [](VertexID i, Vertex& v){ 
          // LOG(INFO) << "id: " << i << " label: " << v->label;
          myFile << i << " ";
          for (int j = 0; j < NaiveGraphlabEngine<G,VertexProg>::Number_of_groups; j++) {
            myFile << prog(v).cache.label_count[j] << " ";
          }
          myFile << v->label << "\n";
        });
      //on_all_cores( [] {
          myFile.close();
        //});
    LOG(INFO) << "end writig file";
    }
      iteration++; 
      VLOG(1) << "  time:   " << walltime()-t;
      active = V::total_active;
    
    }
  
  

    forall(g, [](Vertex& v){ delete static_cast<VertexProg*>(v->prog); });
  }