Esempio n. 1
0
Vector* Vector::add(Vector* other) {
  MapReduce* sum = MapReduce::copy();
  sum->add(other);
  sum->collate(NULL);

  // Gets set to true if, in addReduce, it is discovered that the vectors were
  // of unequal length.
  bool unequalLength = false; 
  sum->reduce(&addReduce, &unequalLength);
  sum->gather(1);
  if (unequalLength)
    return NULL;

  return static_cast<Vector*>(sum);
}
int main(int narg, char **args)
{
  MPI_Init(&narg,&args);

  Data data;

  MPI_Comm_rank(MPI_COMM_WORLD,&data.me);
  MPI_Comm_size(MPI_COMM_WORLD,&data.nprocs);

  data.prune_error_kmers = true;
  data.min_ratio_non_error = 0.05f;
  data.min_kmer_count = 1;
  data.min_edge_count = 1;
  data.min_any_entropy = 0.0;
  data.kmer_length = 25;
  data.DS = false;

  data.MAX_RECURSION = 1;
  data.MIN_SEED_ENTROPY = 1.5;
  data.MIN_SEED_COVERAGE = 2;
  
  data.PACMAN = false;
  data.CRAWL = false;
  data.crawl_length = 1; 

  data.MIN_CONNECTIVITY_RATIO = 0.0;
  data.MIN_ASSEMBLY_LENGTH = data.kmer_length;
  data.MIN_ASSEMBLY_COVERAGE = 2;
  data.WRITE_COVERAGE = false;

  int page_size = 1024;

  int num_args = 1;
  try {
       ArgProcessor in_args(narg, args);

       if (in_args.isArgSet("-K")) {
            data.kmer_length = in_args.getIntVal("-K");
	    num_args += 2;
            if(data.me==0) cerr << "Kmer length set to: " << data.kmer_length << endl;
       }

       if (in_args.isArgSet("--minKmerCount")) {
            data.min_kmer_count = in_args.getIntVal("--minKmerCount");
	    num_args += 2;
            if(data.me==0) cerr << "Min kmer coverage set to: " << data.min_kmer_count << endl;
       }

       if (in_args.isArgSet("--minEdgeCount")) {
            data.min_edge_count = in_args.getIntVal("--minEdgeCount");
	    num_args += 2;
            if(data.me==0) cerr << "Min edge coverage set to: " << data.min_edge_count << endl;
       }

       if (in_args.isArgSet("-L")) {
            data.MIN_ASSEMBLY_LENGTH = in_args.getIntVal("-L");
	    num_args += 2;
            if(data.me==0) cerr << "Min assembly length set to: " << data.MIN_ASSEMBLY_LENGTH << endl;
       }

       if (in_args.isArgSet("--min_assembly_coverage")) {
            data.MIN_ASSEMBLY_COVERAGE = in_args.getIntVal("--min_assembly_coverage");
	    num_args += 2;
            if(data.me==0) cerr << "Min assembly coverage set to: " << data.MIN_ASSEMBLY_COVERAGE << endl;
       }
      
       if (in_args.isArgSet("--min_con_ratio")) {
            data.MIN_CONNECTIVITY_RATIO = in_args.getFloatVal("--min_con_ratio");
	    num_args += 2;
       }

       if (in_args.isArgSet("--DS")) {
            data.DS = true;
	    num_args++;
            if(data.me==0) cerr << "double stranded mode set" << endl;
       }

       if (in_args.isArgSet("--min_seed_entropy")) {
            data.MIN_SEED_ENTROPY = in_args.getFloatVal("--min_seed_entropy");
	    num_args += 2;
            if(data.me==0) cerr << "Min seed entropy set to: " << data.MIN_SEED_ENTROPY << endl;
       }

       if (in_args.isArgSet("--min_seed_coverage")) {
            data.MIN_SEED_COVERAGE = in_args.getIntVal("--min_seed_coverage");
	    num_args += 2;
            if(data.me==0) cerr << "min seed coverage set to: " << data.MIN_SEED_COVERAGE << endl;
       }

       if (in_args.isArgSet("--min_any_entropy")) {
            data.min_any_entropy = in_args.getFloatVal("--min_any_entropy");
	    num_args += 2;
            if(data.me==0) cerr << "min entropy set to: " << data.min_any_entropy << endl;
       }

       if (in_args.isArgSet("--no_prune_error_kmers")) {
            data.prune_error_kmers = false;
	    num_args++;
       }

       if (data.prune_error_kmers && in_args.isArgSet("--min_ratio_non_error")) {
            data.min_ratio_non_error = in_args.getFloatVal("--min_ratio_non_error");
	    num_args += 2;
            if(data.me==0) cerr << "Set to prune kmers below min ratio non-erro: " << data.min_ratio_non_error << endl;
       }

       if (in_args.isArgSet("--coverage_outfile")) {
            data.WRITE_COVERAGE = true;
            data.COVERAGE_OUTPUT_FILENAME = in_args.getStringVal("--coverage_outfile");
	    num_args += 2;
       }

       if(in_args.isArgSet("--PageSize")) {
            page_size = in_args.getIntVal("--PageSize");
            num_args += 2;
            if(data.me==0) cerr << "Page size for map reduce object set to: " << page_size << endl;
       }
 
  }

  catch(exception& e) {
        cerr << "error: " << e.what() << "\n";
        return 1;
  }


  data.seed = 123456789;
  srand48(data.seed+data.me);

  int pbits = 0;
  while ((1 << pbits) < data.nprocs) pbits++;
  data.pshift = 63 - pbits;
  int hbits = pbits + 1;
  data.lmask = ALLBITS >> hbits;

  data.nthresh = 1000;

  MapReduce *mrKmers = new MapReduce(MPI_COMM_WORLD);
  mrKmers->memsize = page_size;
  mrKmers->verbosity = 1;
  mrKmers->timer = 1;

  MapReduce *mrE = new MapReduce(MPI_COMM_WORLD);
  mrE->memsize = page_size;
  mrE->verbosity = 1;
  mrE->timer = 1;

  MapReduce *mrV = new MapReduce(MPI_COMM_WORLD);
  mrV->memsize = page_size;
  mrV->verbosity = 1;
  mrV->timer = 1; 

  MapReduce *mrZ = new MapReduce(MPI_COMM_WORLD);
  mrZ->memsize = page_size;
  mrZ->verbosity = 1;
  mrZ->timer = 1;

  MPI_Barrier(MPI_COMM_WORLD);

  double tstart = MPI_Wtime();  

  int nkmers = mrKmers->map(narg-num_args,&args[num_args],0,1,0,fileread_RNAseq,&data);
  int nfiles = mrKmers->mapfilecount;

  mrKmers->collate(NULL);

  data.flag = 0;
  mrKmers->reduce(reduce_kmers_RNAseq,&data);

  double tstop = MPI_Wtime();

  unsigned long long flagall = 0;
  MPI_Allreduce(&data.flag,&flagall,1,MPI_UNSIGNED_LONG_LONG,MPI_SUM,MPI_COMM_WORLD);
  if(data.me == 0) cerr <<  "Number of kmers =  " << flagall << " Time took for counting kmers = " << tstop - tstart << endl << endl;

  tstart = MPI_Wtime();

  mrE->map(narg-num_args,&args[num_args],0,1,0,fileread_RNAseq_map_Edge,&data);
  nfiles = mrE->mapfilecount;

  mrE->collate(NULL);
  mrE->reduce(reduce_Edge_from_RNAseq,&data);

  tstop = MPI_Wtime();
  if(data.me == 0) cerr << "Time took for all possible connections of kmers = " << tstop - tstart << endl << endl;

  tstart = MPI_Wtime();
  mrV->map(mrE,edge_to_vertices,NULL);
  mrV->collate(NULL);
  mrV->reduce(reduce_self_zone,NULL);
  
  int niterates = 0;

  while(1) {

    niterates++;

    mrZ->map(mrE,map_edge_vert,NULL);
    mrZ->add(mrV);
    mrZ->collate(NULL);
    mrZ->reduce(reduce_edge_zone,NULL);

    mrZ->collate(NULL);
    data.flag = 0;
    mrZ->reduce(reduce_zone_winner,&data);
    flagall = 0;
    MPI_Allreduce(&data.flag,&flagall,1,MPI_UNSIGNED_LONG_LONG,MPI_SUM,MPI_COMM_WORLD);

    if (flagall == 0) break;

    mrV->map(mrV, map_invert_multi, &data);
    mrV->map(mrZ, map_zone_multi, &data, 1);
    mrV->collate(NULL);
    mrV->reduce(reduce_zone_reassign,&data);

   if(data.me == 0) 
	cerr <<  niterates << " th iteration swithed the number of " << flagall << " zones" <<endl << endl;

  } 

  mrZ->map(mrV,map_strip,NULL);

  mrZ->add(mrKmers);
  mrZ->collate(NULL);

  data.flag = 0;
  mrZ->reduce(reduce_zone_kmer_count,&data);

  flagall = 0;
  MPI_Allreduce(&data.flag,&flagall,1,MPI_UNSIGNED_LONG_LONG,MPI_SUM,MPI_COMM_WORLD);

  mrZ->collate(NULL);

  tstop = MPI_Wtime();
  if(data.me == 0) {
	cerr << "Total number of kmers with zoneID after clustering = " << flagall << endl;
	cerr << "Time took for clustering of kmers using connected component finding algorithms = " << tstop - tstart << endl;
  }




  MPI_Barrier(MPI_COMM_WORLD);

  delete mrKmers;
  delete mrE;
  delete mrV;
  delete mrZ;

  MPI_Finalize();

}
int main(int narg, char **args)
{
  int me, nprocs;
  int nwords, nunique;
  double tstart, tstop;
  Count count;
  pthread_t thread1;
  MPI_Init(&narg, &args);
  MPI_Comm_rank(MPI_COMM_WORLD, &me);
  MPI_Comm_size(MPI_COMM_WORLD, &nprocs);

  if (narg <= 1)
  {
    if (me == 0)
    {
      printf("Syntax: cwordfreq file1 file2 ...\n");
    }

    MPI_Abort(MPI_COMM_WORLD, 1);
  }

  if (me != 0)
  {
    pthread_create(&thread1, NULL, hb_function, &me);
  }
  else
  {
    printf("PID %d on ready for attach\n", getpid());
    pthread_create(&thread1, NULL, pingRecv, &me);
  }

  MapReduce *mr = new MapReduce(MPI_COMM_WORLD);
  MapReduce *mr2 = new MapReduce(MPI_COMM_WORLD);
  //MapReduce *mra = new MapReduce(MPI_COMM_WORLD);
  //MapReduce *mrb = new MapReduce(MPI_COMM_WORLD);
  MapReduce *mra = new MapReduce(MPI_COMM_WORLD);
  MapReduce *mrb = new MapReduce(MPI_COMM_WORLD);
//  mra->open();
  mr2->open();
  MPI_Barrier(MPI_COMM_WORLD);
  int test = 1;
  void *arg[5] = {args[1], args[2], mra->kv, mrb->kv, mr2->kv};
  //printf("%s %s\n", arg[0], arg[1]);
  printf("starting map\n");
  nwords = mr->map(nprocs, &fileread, arg);
  MPI_Barrier(MPI_COMM_WORLD);
  printf("map done\n");
  mr2->close();
//  mrb->close();
  printf("%d mr closed\n", me);
  mr->collate(NULL);
  MPI_Barrier(MPI_COMM_WORLD);
  printf("collate done\n");
  nunique = mr->reduce(&sum2, NULL);
  MPI_Barrier(MPI_COMM_WORLD);
  printf("reduce done\n");
  mr->gather(1);
  MPI_Barrier(MPI_COMM_WORLD);
  mr2->collate(NULL);
  mr2->reduce(&sum,NULL);
  mr2->gather(1);
  mr->add(mr2);
  mr->sort_keys(&ncompare);
  MPI_Barrier(MPI_COMM_WORLD);
  FILE * pFile;
  pFile = fopen("result.out", "w");
  mr->map(mr, &output, pFile);
  MPI_Barrier(MPI_COMM_WORLD);
  fclose(pFile);
//sum done
  //getHistogram(mra, "hist.a");
  //getHistogram(mr, "hist.a");
  //getHistogram(mrb, "hist.b");
  /* FILE * pFilea;
  printf("sum done\n");
   pFilea = fopen("test", "w");
   mra->gather(1);
   mra->map(mra, &histoutput, pFilea);
   fclose(pFilea);*/
  printf("%d sum done\n", me);
  mr->collate(NULL);
  MPI_Barrier(MPI_COMM_WORLD);
  mr->reduce(&sum, NULL);
  MPI_Barrier(MPI_COMM_WORLD);
  mr->gather(1);
  MPI_Barrier(MPI_COMM_WORLD);
  mr->sort_keys(&ncompare);
  MPI_Barrier(MPI_COMM_WORLD);
  FILE * pFile2;
  pFile2 = fopen("hist.c", "w");
  mr->map(mr, &histoutput, pFile2);
  MPI_Barrier(MPI_COMM_WORLD);
  fclose(pFile2);
  delete mr;
  printf("%d c done\n", me);
  mra->open();
  mrb->open();
  void *arg2[4] = {args[1], args[2], mra->kv, mrb->kv};
  printf("%p %p\n", mra->kv, arg2[2]);
  nwords = mra->map(nprocs, &fileread2, arg2);
  mra->close();
  mrb->close();
  mra->collate(NULL);
  MPI_Barrier(MPI_COMM_WORLD);
  mra->reduce(&sum, NULL);
  MPI_Barrier(MPI_COMM_WORLD);
  mra->gather(1);
  MPI_Barrier(MPI_COMM_WORLD);
  mra->sort_keys(&ncompare);
  MPI_Barrier(MPI_COMM_WORLD);
  mra->gather(1);
  MPI_Barrier(MPI_COMM_WORLD);
  FILE * pFile3;
  pFile3 = fopen("hist.a", "w");
  mra->map(mra, &histoutput, pFile3);
  fclose(pFile3);
  delete mra;
  mrb->collate(NULL);
  MPI_Barrier(MPI_COMM_WORLD);
  mrb->reduce(&sum, NULL);
  MPI_Barrier(MPI_COMM_WORLD);
  mrb->gather(1);
  MPI_Barrier(MPI_COMM_WORLD);
  mrb->sort_keys(&ncompare);
  MPI_Barrier(MPI_COMM_WORLD);
  FILE * pFile4;
  pFile4 = fopen("hist.b", "w");
  mrb->map(mrb, &histoutput, pFile4);
  fclose(pFile4);
  delete mrb;
  MPI_Barrier(MPI_COMM_WORLD);
  pthread_cancel(thread1);
  MPI_Finalize();
//exit(0);
}