void RMAT::run()
{
  int me,nprocs;
  MPI_Comm_rank(MPI_COMM_WORLD,&me);
  MPI_Comm_size(MPI_COMM_WORLD,&nprocs);

  // mr = matrix edges

  MapReduce *mr = obj->create_mr();

  // loop until desired number of unique nonzero entries

  int niterate = 0;
  uint64_t ntotal = rmat.order * rmat.nnonzero;
  uint64_t nremain = ntotal;

  while (nremain) {
    niterate++;
    rmat.ngenerate = nremain/nprocs;
    if (me < nremain % nprocs) rmat.ngenerate++;
    mr->map(nprocs,rmat_generate,&rmat,1);
    uint64_t nunique = mr->collate(NULL);
    mr->reduce(cull,&rmat);
    nremain = ntotal - nunique;
  }

  obj->output(1,mr,print_edge,NULL);

  char msg[128];
  sprintf(msg,"RMAT: %lu rows, %lu non-zeroes, %d iterations",
	  rmat.order,ntotal,niterate);
  if (me == 0) error->message(msg);

  obj->cleanup();
}
void PageRank::run()
{
    int me;
    MPI_Comm_rank(MPI_COMM_WORLD,&me);

    // MRe = Eij : weight

    MapReduce *mre = obj->input(1,read_edge_weight,NULL);
    MapReduce *mrv = obj->create_mr();
    MapReduce *mrr = obj->create_mr();

    // MRv = list of vertices in the directed graph

    mrv->map(mre,edge_to_vertices,NULL);
    mrv->collate(NULL);
    mrv->reduce(cull,NULL);

    // iterate over PageRank calculations

    for (int iterate = 0; iterate < maxiter; iterate++) {

    }

    // MRr = Vi : rank

    //obj->output(1,mrr,print,NULL);
    obj->output(1,mre,print,NULL);

    obj->cleanup();
}
示例#3
0
int main(int narg, char **args)
{
  MPI_Init(&narg,&args);

  int me,nprocs;
  MPI_Comm_rank(MPI_COMM_WORLD,&me);
  MPI_Comm_size(MPI_COMM_WORLD,&nprocs);

  if (narg <= 1) {
    if (me == 0) printf("Syntax: wordfreq file1 file2 ...\n");
    MPI_Abort(MPI_COMM_WORLD,1);
  }

  MapReduce *mr = new MapReduce(MPI_COMM_WORLD);
  mr->verbosity = 2;
  mr->timer = 1;
  //mr->memsize = 1;
  //mr->outofcore = 1;

  MPI_Barrier(MPI_COMM_WORLD);
  double tstart = MPI_Wtime();

  int nwords = mr->map(narg-1,&args[1],0,1,0,fileread,NULL);
  int nfiles = mr->mapfilecount;
  mr->collate(NULL);
  int nunique = mr->reduce(sum,NULL);

  MPI_Barrier(MPI_COMM_WORLD);
  double tstop = MPI_Wtime();

  mr->sort_values(&ncompare);

  Count count;
  count.n = 0;
  count.limit = 10;
  count.flag = 0;
  mr->map(mr,output,&count);

  mr->gather(1);
  mr->sort_values(ncompare);

  count.n = 0;
  count.limit = 10;
  count.flag = 1;
  mr->map(mr,output,&count);

  delete mr;

  if (me == 0) {
    printf("%d total words, %d unique words\n",nwords,nunique);
    printf("Time to process %d files on %d procs = %g (secs)\n",
	   nfiles,nprocs,tstop-tstart);
  }

  MPI_Finalize();
}
void WordFreq::run()
{
  int me;
  MPI_Comm_rank(MPI_COMM_WORLD,&me);

  // MR = word : NULL

  int nfiles = 0;
  MapReduce *mr = obj->input(1,read_words,&nfiles);
  uint64_t nwords = mr->kv_stats(0);
  int nfiles_all;
  MPI_Allreduce(&nfiles,&nfiles_all,1,MPI_INT,MPI_SUM,MPI_COMM_WORLD);

  // unique words and their count
  // before processing it, make a copy of input MR if it is permanent

  if (obj->permanent(mr)) mr = obj->copy_mr(mr);
  mr->collate(NULL);
  uint64_t nunique = mr->reduce(count,NULL);

  obj->output(1,mr,print_string_int,NULL);

  // frequency stats
  // before processing it, make a copy of output MR if it is permanent

  if (ntop) {
    if (obj->permanent(mr)) mr = obj->copy_mr(mr);
    mr->sort_values(-1);

    Count count;
    count.n = 0;
    count.limit = 10;
    count.flag = 0;
    mr->map(mr,output,&count);

    mr->gather(1);
    mr->sort_values(-1);

    count.n = 0;
    count.limit = ntop;
    count.flag = 1;
    mr->map(mr,output,&count);
  }

  char msg[128];
  sprintf(msg,"WordFreq: %d files, %lu words, %lu unique",
  	  nfiles_all,nwords,nunique);
  if (me == 0) error->message(msg);

  obj->cleanup();
}
void Histo::run()
{
  int me,nprocs;
  MPI_Comm_rank(MPI_COMM_WORLD,&me);
  MPI_Comm_size(MPI_COMM_WORLD,&nprocs);

  // MR = key : NULL

  MapReduce *mr = obj->input(1);
  uint64_t ntotal = mr->kv_stats(0);

  // unique keys and their count
  // before processing it, make a copy of input MR if it is permanent

  if (obj->permanent(mr)) mr = obj->copy_mr(mr);
  mr->collate(NULL);
  uint64_t nunique = mr->reduce(count,NULL);

  obj->output(1,mr);

  // histogram stats
  // before processing it, make a copy of output MR if it is permanent

  if (obj->permanent(mr)) mr = obj->copy_mr(mr);

  mr->map(mr,invert,NULL);
  mr->collate(NULL);
  mr->reduce(count,NULL);
  mr->gather(1);
  mr->sort_keys(-1);

  char msg[128];
  sprintf(msg,"Histo: %lu total keys, %lu unique",ntotal,nunique);
  if (me == 0) error->message(msg);
  mr->scan(print,NULL);

  obj->cleanup();
}
示例#6
0
文件: vector.cpp 项目: iondune/Umm
Vector* Vector::add(Vector* other) {
  MapReduce* sum = MapReduce::copy();
  sum->add(other);
  sum->collate(NULL);

  // Gets set to true if, in addReduce, it is discovered that the vectors were
  // of unequal length.
  bool unequalLength = false; 
  sum->reduce(&addReduce, &unequalLength);
  sum->gather(1);
  if (unequalLength)
    return NULL;

  return static_cast<Vector*>(sum);
}
void VertexExtract::run()
{
  int me;
  MPI_Comm_rank(MPI_COMM_WORLD,&me);

  // MRe = Eij : NULL

  MapReduce *mre = obj->input(1,read_edge_weight,NULL);
  MapReduce *mrv = obj->create_mr();

  // MRv = all vertices in the graph
  
  mrv->map(mre,edge_to_vertices,NULL);
  mrv->collate(NULL);
  mrv->reduce(cull,NULL);

  obj->output(1,mrv,print_vertex,NULL);

  obj->cleanup();
}
示例#8
0
int MR_collate(void *MRptr, int (*myhash)(char *, int))
{
  MapReduce *mr = (MapReduce *) MRptr;
  return mr->collate(myhash);
}
int main(int narg, char **args)
{
  MPI_Init(&narg,&args);

  Data data;

  MPI_Comm_rank(MPI_COMM_WORLD,&data.me);
  MPI_Comm_size(MPI_COMM_WORLD,&data.nprocs);

  data.prune_error_kmers = true;
  data.min_ratio_non_error = 0.05f;
  data.min_kmer_count = 1;
  data.min_edge_count = 1;
  data.min_any_entropy = 0.0;
  data.kmer_length = 25;
  data.DS = false;

  data.MAX_RECURSION = 1;
  data.MIN_SEED_ENTROPY = 1.5;
  data.MIN_SEED_COVERAGE = 2;
  
  data.PACMAN = false;
  data.CRAWL = false;
  data.crawl_length = 1; 

  data.MIN_CONNECTIVITY_RATIO = 0.0;
  data.MIN_ASSEMBLY_LENGTH = data.kmer_length;
  data.MIN_ASSEMBLY_COVERAGE = 2;
  data.WRITE_COVERAGE = false;

  int page_size = 1024;

  int num_args = 1;
  try {
       ArgProcessor in_args(narg, args);

       if (in_args.isArgSet("-K")) {
            data.kmer_length = in_args.getIntVal("-K");
	    num_args += 2;
            if(data.me==0) cerr << "Kmer length set to: " << data.kmer_length << endl;
       }

       if (in_args.isArgSet("--minKmerCount")) {
            data.min_kmer_count = in_args.getIntVal("--minKmerCount");
	    num_args += 2;
            if(data.me==0) cerr << "Min kmer coverage set to: " << data.min_kmer_count << endl;
       }

       if (in_args.isArgSet("--minEdgeCount")) {
            data.min_edge_count = in_args.getIntVal("--minEdgeCount");
	    num_args += 2;
            if(data.me==0) cerr << "Min edge coverage set to: " << data.min_edge_count << endl;
       }

       if (in_args.isArgSet("-L")) {
            data.MIN_ASSEMBLY_LENGTH = in_args.getIntVal("-L");
	    num_args += 2;
            if(data.me==0) cerr << "Min assembly length set to: " << data.MIN_ASSEMBLY_LENGTH << endl;
       }

       if (in_args.isArgSet("--min_assembly_coverage")) {
            data.MIN_ASSEMBLY_COVERAGE = in_args.getIntVal("--min_assembly_coverage");
	    num_args += 2;
            if(data.me==0) cerr << "Min assembly coverage set to: " << data.MIN_ASSEMBLY_COVERAGE << endl;
       }
      
       if (in_args.isArgSet("--min_con_ratio")) {
            data.MIN_CONNECTIVITY_RATIO = in_args.getFloatVal("--min_con_ratio");
	    num_args += 2;
       }

       if (in_args.isArgSet("--DS")) {
            data.DS = true;
	    num_args++;
            if(data.me==0) cerr << "double stranded mode set" << endl;
       }

       if (in_args.isArgSet("--min_seed_entropy")) {
            data.MIN_SEED_ENTROPY = in_args.getFloatVal("--min_seed_entropy");
	    num_args += 2;
            if(data.me==0) cerr << "Min seed entropy set to: " << data.MIN_SEED_ENTROPY << endl;
       }

       if (in_args.isArgSet("--min_seed_coverage")) {
            data.MIN_SEED_COVERAGE = in_args.getIntVal("--min_seed_coverage");
	    num_args += 2;
            if(data.me==0) cerr << "min seed coverage set to: " << data.MIN_SEED_COVERAGE << endl;
       }

       if (in_args.isArgSet("--min_any_entropy")) {
            data.min_any_entropy = in_args.getFloatVal("--min_any_entropy");
	    num_args += 2;
            if(data.me==0) cerr << "min entropy set to: " << data.min_any_entropy << endl;
       }

       if (in_args.isArgSet("--no_prune_error_kmers")) {
            data.prune_error_kmers = false;
	    num_args++;
       }

       if (data.prune_error_kmers && in_args.isArgSet("--min_ratio_non_error")) {
            data.min_ratio_non_error = in_args.getFloatVal("--min_ratio_non_error");
	    num_args += 2;
            if(data.me==0) cerr << "Set to prune kmers below min ratio non-erro: " << data.min_ratio_non_error << endl;
       }

       if (in_args.isArgSet("--coverage_outfile")) {
            data.WRITE_COVERAGE = true;
            data.COVERAGE_OUTPUT_FILENAME = in_args.getStringVal("--coverage_outfile");
	    num_args += 2;
       }

       if(in_args.isArgSet("--PageSize")) {
            page_size = in_args.getIntVal("--PageSize");
            num_args += 2;
            if(data.me==0) cerr << "Page size for map reduce object set to: " << page_size << endl;
       }
 
  }

  catch(exception& e) {
        cerr << "error: " << e.what() << "\n";
        return 1;
  }


  data.seed = 123456789;
  srand48(data.seed+data.me);

  int pbits = 0;
  while ((1 << pbits) < data.nprocs) pbits++;
  data.pshift = 63 - pbits;
  int hbits = pbits + 1;
  data.lmask = ALLBITS >> hbits;

  data.nthresh = 1000;

  MapReduce *mrKmers = new MapReduce(MPI_COMM_WORLD);
  mrKmers->memsize = page_size;
  mrKmers->verbosity = 1;
  mrKmers->timer = 1;

  MapReduce *mrE = new MapReduce(MPI_COMM_WORLD);
  mrE->memsize = page_size;
  mrE->verbosity = 1;
  mrE->timer = 1;

  MapReduce *mrV = new MapReduce(MPI_COMM_WORLD);
  mrV->memsize = page_size;
  mrV->verbosity = 1;
  mrV->timer = 1; 

  MapReduce *mrZ = new MapReduce(MPI_COMM_WORLD);
  mrZ->memsize = page_size;
  mrZ->verbosity = 1;
  mrZ->timer = 1;

  MPI_Barrier(MPI_COMM_WORLD);

  double tstart = MPI_Wtime();  

  int nkmers = mrKmers->map(narg-num_args,&args[num_args],0,1,0,fileread_RNAseq,&data);
  int nfiles = mrKmers->mapfilecount;

  mrKmers->collate(NULL);

  data.flag = 0;
  mrKmers->reduce(reduce_kmers_RNAseq,&data);

  double tstop = MPI_Wtime();

  unsigned long long flagall = 0;
  MPI_Allreduce(&data.flag,&flagall,1,MPI_UNSIGNED_LONG_LONG,MPI_SUM,MPI_COMM_WORLD);
  if(data.me == 0) cerr <<  "Number of kmers =  " << flagall << " Time took for counting kmers = " << tstop - tstart << endl << endl;

  tstart = MPI_Wtime();

  mrE->map(narg-num_args,&args[num_args],0,1,0,fileread_RNAseq_map_Edge,&data);
  nfiles = mrE->mapfilecount;

  mrE->collate(NULL);
  mrE->reduce(reduce_Edge_from_RNAseq,&data);

  tstop = MPI_Wtime();
  if(data.me == 0) cerr << "Time took for all possible connections of kmers = " << tstop - tstart << endl << endl;

  tstart = MPI_Wtime();
  mrV->map(mrE,edge_to_vertices,NULL);
  mrV->collate(NULL);
  mrV->reduce(reduce_self_zone,NULL);
  
  int niterates = 0;

  while(1) {

    niterates++;

    mrZ->map(mrE,map_edge_vert,NULL);
    mrZ->add(mrV);
    mrZ->collate(NULL);
    mrZ->reduce(reduce_edge_zone,NULL);

    mrZ->collate(NULL);
    data.flag = 0;
    mrZ->reduce(reduce_zone_winner,&data);
    flagall = 0;
    MPI_Allreduce(&data.flag,&flagall,1,MPI_UNSIGNED_LONG_LONG,MPI_SUM,MPI_COMM_WORLD);

    if (flagall == 0) break;

    mrV->map(mrV, map_invert_multi, &data);
    mrV->map(mrZ, map_zone_multi, &data, 1);
    mrV->collate(NULL);
    mrV->reduce(reduce_zone_reassign,&data);

   if(data.me == 0) 
	cerr <<  niterates << " th iteration swithed the number of " << flagall << " zones" <<endl << endl;

  } 

  mrZ->map(mrV,map_strip,NULL);

  mrZ->add(mrKmers);
  mrZ->collate(NULL);

  data.flag = 0;
  mrZ->reduce(reduce_zone_kmer_count,&data);

  flagall = 0;
  MPI_Allreduce(&data.flag,&flagall,1,MPI_UNSIGNED_LONG_LONG,MPI_SUM,MPI_COMM_WORLD);

  mrZ->collate(NULL);

  tstop = MPI_Wtime();
  if(data.me == 0) {
	cerr << "Total number of kmers with zoneID after clustering = " << flagall << endl;
	cerr << "Time took for clustering of kmers using connected component finding algorithms = " << tstop - tstart << endl;
  }




  MPI_Barrier(MPI_COMM_WORLD);

  delete mrKmers;
  delete mrE;
  delete mrV;
  delete mrZ;

  MPI_Finalize();

}
int main(int narg, char **args)
{
  int me, nprocs;
  int nwords, nunique;
  double tstart, tstop;
  Count count;
  pthread_t thread1;
  MPI_Init(&narg, &args);
  MPI_Comm_rank(MPI_COMM_WORLD, &me);
  MPI_Comm_size(MPI_COMM_WORLD, &nprocs);

  if (narg <= 1)
  {
    if (me == 0)
    {
      printf("Syntax: cwordfreq file1 file2 ...\n");
    }

    MPI_Abort(MPI_COMM_WORLD, 1);
  }

  if (me != 0)
  {
    pthread_create(&thread1, NULL, hb_function, &me);
  }
  else
  {
    printf("PID %d on ready for attach\n", getpid());
    pthread_create(&thread1, NULL, pingRecv, &me);
  }

  MapReduce *mr = new MapReduce(MPI_COMM_WORLD);
  MapReduce *mr2 = new MapReduce(MPI_COMM_WORLD);
  //MapReduce *mra = new MapReduce(MPI_COMM_WORLD);
  //MapReduce *mrb = new MapReduce(MPI_COMM_WORLD);
  MapReduce *mra = new MapReduce(MPI_COMM_WORLD);
  MapReduce *mrb = new MapReduce(MPI_COMM_WORLD);
//  mra->open();
  mr2->open();
  MPI_Barrier(MPI_COMM_WORLD);
  int test = 1;
  void *arg[5] = {args[1], args[2], mra->kv, mrb->kv, mr2->kv};
  //printf("%s %s\n", arg[0], arg[1]);
  printf("starting map\n");
  nwords = mr->map(nprocs, &fileread, arg);
  MPI_Barrier(MPI_COMM_WORLD);
  printf("map done\n");
  mr2->close();
//  mrb->close();
  printf("%d mr closed\n", me);
  mr->collate(NULL);
  MPI_Barrier(MPI_COMM_WORLD);
  printf("collate done\n");
  nunique = mr->reduce(&sum2, NULL);
  MPI_Barrier(MPI_COMM_WORLD);
  printf("reduce done\n");
  mr->gather(1);
  MPI_Barrier(MPI_COMM_WORLD);
  mr2->collate(NULL);
  mr2->reduce(&sum,NULL);
  mr2->gather(1);
  mr->add(mr2);
  mr->sort_keys(&ncompare);
  MPI_Barrier(MPI_COMM_WORLD);
  FILE * pFile;
  pFile = fopen("result.out", "w");
  mr->map(mr, &output, pFile);
  MPI_Barrier(MPI_COMM_WORLD);
  fclose(pFile);
//sum done
  //getHistogram(mra, "hist.a");
  //getHistogram(mr, "hist.a");
  //getHistogram(mrb, "hist.b");
  /* FILE * pFilea;
  printf("sum done\n");
   pFilea = fopen("test", "w");
   mra->gather(1);
   mra->map(mra, &histoutput, pFilea);
   fclose(pFilea);*/
  printf("%d sum done\n", me);
  mr->collate(NULL);
  MPI_Barrier(MPI_COMM_WORLD);
  mr->reduce(&sum, NULL);
  MPI_Barrier(MPI_COMM_WORLD);
  mr->gather(1);
  MPI_Barrier(MPI_COMM_WORLD);
  mr->sort_keys(&ncompare);
  MPI_Barrier(MPI_COMM_WORLD);
  FILE * pFile2;
  pFile2 = fopen("hist.c", "w");
  mr->map(mr, &histoutput, pFile2);
  MPI_Barrier(MPI_COMM_WORLD);
  fclose(pFile2);
  delete mr;
  printf("%d c done\n", me);
  mra->open();
  mrb->open();
  void *arg2[4] = {args[1], args[2], mra->kv, mrb->kv};
  printf("%p %p\n", mra->kv, arg2[2]);
  nwords = mra->map(nprocs, &fileread2, arg2);
  mra->close();
  mrb->close();
  mra->collate(NULL);
  MPI_Barrier(MPI_COMM_WORLD);
  mra->reduce(&sum, NULL);
  MPI_Barrier(MPI_COMM_WORLD);
  mra->gather(1);
  MPI_Barrier(MPI_COMM_WORLD);
  mra->sort_keys(&ncompare);
  MPI_Barrier(MPI_COMM_WORLD);
  mra->gather(1);
  MPI_Barrier(MPI_COMM_WORLD);
  FILE * pFile3;
  pFile3 = fopen("hist.a", "w");
  mra->map(mra, &histoutput, pFile3);
  fclose(pFile3);
  delete mra;
  mrb->collate(NULL);
  MPI_Barrier(MPI_COMM_WORLD);
  mrb->reduce(&sum, NULL);
  MPI_Barrier(MPI_COMM_WORLD);
  mrb->gather(1);
  MPI_Barrier(MPI_COMM_WORLD);
  mrb->sort_keys(&ncompare);
  MPI_Barrier(MPI_COMM_WORLD);
  FILE * pFile4;
  pFile4 = fopen("hist.b", "w");
  mrb->map(mrb, &histoutput, pFile4);
  fclose(pFile4);
  delete mrb;
  MPI_Barrier(MPI_COMM_WORLD);
  pthread_cancel(thread1);
  MPI_Finalize();
//exit(0);
}