int main(int narg, char **args) { MPI_Init(&narg,&args); int me,nprocs; MPI_Comm_rank(MPI_COMM_WORLD,&me); MPI_Comm_size(MPI_COMM_WORLD,&nprocs); if (narg <= 1) { if (me == 0) printf("Syntax: wordfreq file1 file2 ...\n"); MPI_Abort(MPI_COMM_WORLD,1); } MapReduce *mr = new MapReduce(MPI_COMM_WORLD); mr->verbosity = 2; mr->timer = 1; //mr->memsize = 1; //mr->outofcore = 1; MPI_Barrier(MPI_COMM_WORLD); double tstart = MPI_Wtime(); int nwords = mr->map(narg-1,&args[1],0,1,0,fileread,NULL); int nfiles = mr->mapfilecount; mr->collate(NULL); int nunique = mr->reduce(sum,NULL); MPI_Barrier(MPI_COMM_WORLD); double tstop = MPI_Wtime(); mr->sort_values(&ncompare); Count count; count.n = 0; count.limit = 10; count.flag = 0; mr->map(mr,output,&count); mr->gather(1); mr->sort_values(ncompare); count.n = 0; count.limit = 10; count.flag = 1; mr->map(mr,output,&count); delete mr; if (me == 0) { printf("%d total words, %d unique words\n",nwords,nunique); printf("Time to process %d files on %d procs = %g (secs)\n", nfiles,nprocs,tstop-tstart); } MPI_Finalize(); }
void WordFreq::run() { int me; MPI_Comm_rank(MPI_COMM_WORLD,&me); // MR = word : NULL int nfiles = 0; MapReduce *mr = obj->input(1,read_words,&nfiles); uint64_t nwords = mr->kv_stats(0); int nfiles_all; MPI_Allreduce(&nfiles,&nfiles_all,1,MPI_INT,MPI_SUM,MPI_COMM_WORLD); // unique words and their count // before processing it, make a copy of input MR if it is permanent if (obj->permanent(mr)) mr = obj->copy_mr(mr); mr->collate(NULL); uint64_t nunique = mr->reduce(count,NULL); obj->output(1,mr,print_string_int,NULL); // frequency stats // before processing it, make a copy of output MR if it is permanent if (ntop) { if (obj->permanent(mr)) mr = obj->copy_mr(mr); mr->sort_values(-1); Count count; count.n = 0; count.limit = 10; count.flag = 0; mr->map(mr,output,&count); mr->gather(1); mr->sort_values(-1); count.n = 0; count.limit = ntop; count.flag = 1; mr->map(mr,output,&count); } char msg[128]; sprintf(msg,"WordFreq: %d files, %lu words, %lu unique", nfiles_all,nwords,nunique); if (me == 0) error->message(msg); obj->cleanup(); }
void RMAT::run() { int me,nprocs; MPI_Comm_rank(MPI_COMM_WORLD,&me); MPI_Comm_size(MPI_COMM_WORLD,&nprocs); // mr = matrix edges MapReduce *mr = obj->create_mr(); // loop until desired number of unique nonzero entries int niterate = 0; uint64_t ntotal = rmat.order * rmat.nnonzero; uint64_t nremain = ntotal; while (nremain) { niterate++; rmat.ngenerate = nremain/nprocs; if (me < nremain % nprocs) rmat.ngenerate++; mr->map(nprocs,rmat_generate,&rmat,1); uint64_t nunique = mr->collate(NULL); mr->reduce(cull,&rmat); nremain = ntotal - nunique; } obj->output(1,mr,print_edge,NULL); char msg[128]; sprintf(msg,"RMAT: %lu rows, %lu non-zeroes, %d iterations", rmat.order,ntotal,niterate); if (me == 0) error->message(msg); obj->cleanup(); }
void PageRank::run() { int me; MPI_Comm_rank(MPI_COMM_WORLD,&me); // MRe = Eij : weight MapReduce *mre = obj->input(1,read_edge_weight,NULL); MapReduce *mrv = obj->create_mr(); MapReduce *mrr = obj->create_mr(); // MRv = list of vertices in the directed graph mrv->map(mre,edge_to_vertices,NULL); mrv->collate(NULL); mrv->reduce(cull,NULL); // iterate over PageRank calculations for (int iterate = 0; iterate < maxiter; iterate++) { } // MRr = Vi : rank //obj->output(1,mrr,print,NULL); obj->output(1,mre,print,NULL); obj->cleanup(); }
int MR_map_file_list_add(void *MRptr, char *file, void (*mymap)(int, char *, void *, void *), void *APPptr, int addflag) { typedef void (MapFunc)(int, char *, KeyValue *, void *); MapReduce *mr = (MapReduce *) MRptr; MapFunc *appmap = (MapFunc *) mymap; return mr->map(file,appmap,APPptr,addflag); }
int MR_map_add(void *MRptr, int nmap, void (*mymap)(int, void *, void *), void *APPptr, int addflag) { typedef void (MapFunc)(int, KeyValue *, void *); MapReduce *mr = (MapReduce *) MRptr; MapFunc *appmap = (MapFunc *) mymap; return mr->map(nmap,appmap,APPptr,addflag); }
int MR_map_kv_add(void *MRptr, void *MRptr2, void (*mymap)(int, char *, int, char *, int, void *, void *), void *APPptr, int addflag) { typedef void (MapFunc)(int, char *, int, char *, int, KeyValue *, void *); MapReduce *mr = (MapReduce *) MRptr; KeyValue *kv = ((MapReduce *) MRptr2)->kv; MapFunc *appmap = (MapFunc *) mymap; return mr->map(kv,appmap,APPptr,addflag); }
int MR_map_file_str_add(void *MRptr, int nmap, int nfiles, char **files, char *sepstr, int delta, void (*mymap)(int, char *, int, void *, void *), void *APPptr, int addflag) { typedef void (MapFunc)(int, char *, int, KeyValue *, void *); MapReduce *mr = (MapReduce *) MRptr; MapFunc *appmap = (MapFunc *) mymap; return mr->map(nmap,nfiles,files,sepstr,delta,appmap,APPptr,addflag); }
void VertexExtract::run() { int me; MPI_Comm_rank(MPI_COMM_WORLD,&me); // MRe = Eij : NULL MapReduce *mre = obj->input(1,read_edge_weight,NULL); MapReduce *mrv = obj->create_mr(); // MRv = all vertices in the graph mrv->map(mre,edge_to_vertices,NULL); mrv->collate(NULL); mrv->reduce(cull,NULL); obj->output(1,mrv,print_vertex,NULL); obj->cleanup(); }
void Histo::run() { int me,nprocs; MPI_Comm_rank(MPI_COMM_WORLD,&me); MPI_Comm_size(MPI_COMM_WORLD,&nprocs); // MR = key : NULL MapReduce *mr = obj->input(1); uint64_t ntotal = mr->kv_stats(0); // unique keys and their count // before processing it, make a copy of input MR if it is permanent if (obj->permanent(mr)) mr = obj->copy_mr(mr); mr->collate(NULL); uint64_t nunique = mr->reduce(count,NULL); obj->output(1,mr); // histogram stats // before processing it, make a copy of output MR if it is permanent if (obj->permanent(mr)) mr = obj->copy_mr(mr); mr->map(mr,invert,NULL); mr->collate(NULL); mr->reduce(count,NULL); mr->gather(1); mr->sort_keys(-1); char msg[128]; sprintf(msg,"Histo: %lu total keys, %lu unique",ntotal,nunique); if (me == 0) error->message(msg); mr->scan(print,NULL); obj->cleanup(); }
int main(int narg, char **args) { MPI_Init(&narg,&args); Data data; MPI_Comm_rank(MPI_COMM_WORLD,&data.me); MPI_Comm_size(MPI_COMM_WORLD,&data.nprocs); data.prune_error_kmers = true; data.min_ratio_non_error = 0.05f; data.min_kmer_count = 1; data.min_edge_count = 1; data.min_any_entropy = 0.0; data.kmer_length = 25; data.DS = false; data.MAX_RECURSION = 1; data.MIN_SEED_ENTROPY = 1.5; data.MIN_SEED_COVERAGE = 2; data.PACMAN = false; data.CRAWL = false; data.crawl_length = 1; data.MIN_CONNECTIVITY_RATIO = 0.0; data.MIN_ASSEMBLY_LENGTH = data.kmer_length; data.MIN_ASSEMBLY_COVERAGE = 2; data.WRITE_COVERAGE = false; int page_size = 1024; int num_args = 1; try { ArgProcessor in_args(narg, args); if (in_args.isArgSet("-K")) { data.kmer_length = in_args.getIntVal("-K"); num_args += 2; if(data.me==0) cerr << "Kmer length set to: " << data.kmer_length << endl; } if (in_args.isArgSet("--minKmerCount")) { data.min_kmer_count = in_args.getIntVal("--minKmerCount"); num_args += 2; if(data.me==0) cerr << "Min kmer coverage set to: " << data.min_kmer_count << endl; } if (in_args.isArgSet("--minEdgeCount")) { data.min_edge_count = in_args.getIntVal("--minEdgeCount"); num_args += 2; if(data.me==0) cerr << "Min edge coverage set to: " << data.min_edge_count << endl; } if (in_args.isArgSet("-L")) { data.MIN_ASSEMBLY_LENGTH = in_args.getIntVal("-L"); num_args += 2; if(data.me==0) cerr << "Min assembly length set to: " << data.MIN_ASSEMBLY_LENGTH << endl; } if (in_args.isArgSet("--min_assembly_coverage")) { data.MIN_ASSEMBLY_COVERAGE = in_args.getIntVal("--min_assembly_coverage"); num_args += 2; if(data.me==0) cerr << "Min assembly coverage set to: " << data.MIN_ASSEMBLY_COVERAGE << endl; } if (in_args.isArgSet("--min_con_ratio")) { data.MIN_CONNECTIVITY_RATIO = in_args.getFloatVal("--min_con_ratio"); num_args += 2; } if (in_args.isArgSet("--DS")) { data.DS = true; num_args++; if(data.me==0) cerr << "double stranded mode set" << endl; } if (in_args.isArgSet("--min_seed_entropy")) { data.MIN_SEED_ENTROPY = in_args.getFloatVal("--min_seed_entropy"); num_args += 2; if(data.me==0) cerr << "Min seed entropy set to: " << data.MIN_SEED_ENTROPY << endl; } if (in_args.isArgSet("--min_seed_coverage")) { data.MIN_SEED_COVERAGE = in_args.getIntVal("--min_seed_coverage"); num_args += 2; if(data.me==0) cerr << "min seed coverage set to: " << data.MIN_SEED_COVERAGE << endl; } if (in_args.isArgSet("--min_any_entropy")) { data.min_any_entropy = in_args.getFloatVal("--min_any_entropy"); num_args += 2; if(data.me==0) cerr << "min entropy set to: " << data.min_any_entropy << endl; } if (in_args.isArgSet("--no_prune_error_kmers")) { data.prune_error_kmers = false; num_args++; } if (data.prune_error_kmers && in_args.isArgSet("--min_ratio_non_error")) { data.min_ratio_non_error = in_args.getFloatVal("--min_ratio_non_error"); num_args += 2; if(data.me==0) cerr << "Set to prune kmers below min ratio non-erro: " << data.min_ratio_non_error << endl; } if (in_args.isArgSet("--coverage_outfile")) { data.WRITE_COVERAGE = true; data.COVERAGE_OUTPUT_FILENAME = in_args.getStringVal("--coverage_outfile"); num_args += 2; } if(in_args.isArgSet("--PageSize")) { page_size = in_args.getIntVal("--PageSize"); num_args += 2; if(data.me==0) cerr << "Page size for map reduce object set to: " << page_size << endl; } } catch(exception& e) { cerr << "error: " << e.what() << "\n"; return 1; } data.seed = 123456789; srand48(data.seed+data.me); int pbits = 0; while ((1 << pbits) < data.nprocs) pbits++; data.pshift = 63 - pbits; int hbits = pbits + 1; data.lmask = ALLBITS >> hbits; data.nthresh = 1000; MapReduce *mrKmers = new MapReduce(MPI_COMM_WORLD); mrKmers->memsize = page_size; mrKmers->verbosity = 1; mrKmers->timer = 1; MapReduce *mrE = new MapReduce(MPI_COMM_WORLD); mrE->memsize = page_size; mrE->verbosity = 1; mrE->timer = 1; MapReduce *mrV = new MapReduce(MPI_COMM_WORLD); mrV->memsize = page_size; mrV->verbosity = 1; mrV->timer = 1; MapReduce *mrZ = new MapReduce(MPI_COMM_WORLD); mrZ->memsize = page_size; mrZ->verbosity = 1; mrZ->timer = 1; MPI_Barrier(MPI_COMM_WORLD); double tstart = MPI_Wtime(); int nkmers = mrKmers->map(narg-num_args,&args[num_args],0,1,0,fileread_RNAseq,&data); int nfiles = mrKmers->mapfilecount; mrKmers->collate(NULL); data.flag = 0; mrKmers->reduce(reduce_kmers_RNAseq,&data); double tstop = MPI_Wtime(); unsigned long long flagall = 0; MPI_Allreduce(&data.flag,&flagall,1,MPI_UNSIGNED_LONG_LONG,MPI_SUM,MPI_COMM_WORLD); if(data.me == 0) cerr << "Number of kmers = " << flagall << " Time took for counting kmers = " << tstop - tstart << endl << endl; tstart = MPI_Wtime(); mrE->map(narg-num_args,&args[num_args],0,1,0,fileread_RNAseq_map_Edge,&data); nfiles = mrE->mapfilecount; mrE->collate(NULL); mrE->reduce(reduce_Edge_from_RNAseq,&data); tstop = MPI_Wtime(); if(data.me == 0) cerr << "Time took for all possible connections of kmers = " << tstop - tstart << endl << endl; tstart = MPI_Wtime(); mrV->map(mrE,edge_to_vertices,NULL); mrV->collate(NULL); mrV->reduce(reduce_self_zone,NULL); int niterates = 0; while(1) { niterates++; mrZ->map(mrE,map_edge_vert,NULL); mrZ->add(mrV); mrZ->collate(NULL); mrZ->reduce(reduce_edge_zone,NULL); mrZ->collate(NULL); data.flag = 0; mrZ->reduce(reduce_zone_winner,&data); flagall = 0; MPI_Allreduce(&data.flag,&flagall,1,MPI_UNSIGNED_LONG_LONG,MPI_SUM,MPI_COMM_WORLD); if (flagall == 0) break; mrV->map(mrV, map_invert_multi, &data); mrV->map(mrZ, map_zone_multi, &data, 1); mrV->collate(NULL); mrV->reduce(reduce_zone_reassign,&data); if(data.me == 0) cerr << niterates << " th iteration swithed the number of " << flagall << " zones" <<endl << endl; } mrZ->map(mrV,map_strip,NULL); mrZ->add(mrKmers); mrZ->collate(NULL); data.flag = 0; mrZ->reduce(reduce_zone_kmer_count,&data); flagall = 0; MPI_Allreduce(&data.flag,&flagall,1,MPI_UNSIGNED_LONG_LONG,MPI_SUM,MPI_COMM_WORLD); mrZ->collate(NULL); tstop = MPI_Wtime(); if(data.me == 0) { cerr << "Total number of kmers with zoneID after clustering = " << flagall << endl; cerr << "Time took for clustering of kmers using connected component finding algorithms = " << tstop - tstart << endl; } MPI_Barrier(MPI_COMM_WORLD); delete mrKmers; delete mrE; delete mrV; delete mrZ; MPI_Finalize(); }
int main(int narg, char **args) { int me, nprocs; int nwords, nunique; double tstart, tstop; Count count; pthread_t thread1; MPI_Init(&narg, &args); MPI_Comm_rank(MPI_COMM_WORLD, &me); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); if (narg <= 1) { if (me == 0) { printf("Syntax: cwordfreq file1 file2 ...\n"); } MPI_Abort(MPI_COMM_WORLD, 1); } if (me != 0) { pthread_create(&thread1, NULL, hb_function, &me); } else { printf("PID %d on ready for attach\n", getpid()); pthread_create(&thread1, NULL, pingRecv, &me); } MapReduce *mr = new MapReduce(MPI_COMM_WORLD); MapReduce *mr2 = new MapReduce(MPI_COMM_WORLD); //MapReduce *mra = new MapReduce(MPI_COMM_WORLD); //MapReduce *mrb = new MapReduce(MPI_COMM_WORLD); MapReduce *mra = new MapReduce(MPI_COMM_WORLD); MapReduce *mrb = new MapReduce(MPI_COMM_WORLD); // mra->open(); mr2->open(); MPI_Barrier(MPI_COMM_WORLD); int test = 1; void *arg[5] = {args[1], args[2], mra->kv, mrb->kv, mr2->kv}; //printf("%s %s\n", arg[0], arg[1]); printf("starting map\n"); nwords = mr->map(nprocs, &fileread, arg); MPI_Barrier(MPI_COMM_WORLD); printf("map done\n"); mr2->close(); // mrb->close(); printf("%d mr closed\n", me); mr->collate(NULL); MPI_Barrier(MPI_COMM_WORLD); printf("collate done\n"); nunique = mr->reduce(&sum2, NULL); MPI_Barrier(MPI_COMM_WORLD); printf("reduce done\n"); mr->gather(1); MPI_Barrier(MPI_COMM_WORLD); mr2->collate(NULL); mr2->reduce(&sum,NULL); mr2->gather(1); mr->add(mr2); mr->sort_keys(&ncompare); MPI_Barrier(MPI_COMM_WORLD); FILE * pFile; pFile = fopen("result.out", "w"); mr->map(mr, &output, pFile); MPI_Barrier(MPI_COMM_WORLD); fclose(pFile); //sum done //getHistogram(mra, "hist.a"); //getHistogram(mr, "hist.a"); //getHistogram(mrb, "hist.b"); /* FILE * pFilea; printf("sum done\n"); pFilea = fopen("test", "w"); mra->gather(1); mra->map(mra, &histoutput, pFilea); fclose(pFilea);*/ printf("%d sum done\n", me); mr->collate(NULL); MPI_Barrier(MPI_COMM_WORLD); mr->reduce(&sum, NULL); MPI_Barrier(MPI_COMM_WORLD); mr->gather(1); MPI_Barrier(MPI_COMM_WORLD); mr->sort_keys(&ncompare); MPI_Barrier(MPI_COMM_WORLD); FILE * pFile2; pFile2 = fopen("hist.c", "w"); mr->map(mr, &histoutput, pFile2); MPI_Barrier(MPI_COMM_WORLD); fclose(pFile2); delete mr; printf("%d c done\n", me); mra->open(); mrb->open(); void *arg2[4] = {args[1], args[2], mra->kv, mrb->kv}; printf("%p %p\n", mra->kv, arg2[2]); nwords = mra->map(nprocs, &fileread2, arg2); mra->close(); mrb->close(); mra->collate(NULL); MPI_Barrier(MPI_COMM_WORLD); mra->reduce(&sum, NULL); MPI_Barrier(MPI_COMM_WORLD); mra->gather(1); MPI_Barrier(MPI_COMM_WORLD); mra->sort_keys(&ncompare); MPI_Barrier(MPI_COMM_WORLD); mra->gather(1); MPI_Barrier(MPI_COMM_WORLD); FILE * pFile3; pFile3 = fopen("hist.a", "w"); mra->map(mra, &histoutput, pFile3); fclose(pFile3); delete mra; mrb->collate(NULL); MPI_Barrier(MPI_COMM_WORLD); mrb->reduce(&sum, NULL); MPI_Barrier(MPI_COMM_WORLD); mrb->gather(1); MPI_Barrier(MPI_COMM_WORLD); mrb->sort_keys(&ncompare); MPI_Barrier(MPI_COMM_WORLD); FILE * pFile4; pFile4 = fopen("hist.b", "w"); mrb->map(mrb, &histoutput, pFile4); fclose(pFile4); delete mrb; MPI_Barrier(MPI_COMM_WORLD); pthread_cancel(thread1); MPI_Finalize(); //exit(0); }