adpvm_manager::adpvm_manager(int _mode) { cout << "calling load library" << endl; if (load_adpvm_library() < 0) { cerr << "error loading pvm library" << endl; exit(1); } pvm_setopt(PvmRoute, PvmRouteDirect); /* channel for communication */ /* get and display configuration of the parallel machine */ int status=pvm_config( &nhost, &narch, &hostp ); /* get configuration */ if (status<0) { cerr << "error trying to get configuration of pvm (virtual machine)" << endl; if (status == PvmSysErr) cerr << " PVM Daemon not responing -- maybe it is not started" << endl; ad_exit(1); } printf("I found the following hosts in your virtual machine\n"); int i; for (i = 0; i < nhost; i++) { printf(" %s\n", hostp[i].hi_name); } //id.allocate(0,nhost); mode=_mode; if (mode == 1) // master { slave_argv = new adpvm_slave_args(20,20); int ierr=pvm_catchout(stdout); if (ierr<0) { cerr << "Error in pvm_catchout" << endl; } strcpy(*slave_argv,"progname"); int on1,nopt1; if ( (on1=option_match(ad_comm::argc,ad_comm::argv,"-exec",nopt1))>-1) { if (nopt1 !=1) { cerr << "Wrong number of options to -exec -- must be 1" " you have " << nopt1 << endl; ad_exit(1); } slave_names+= ad_comm::argv[on1+1]; } else { slave_names+="test"; } } timing_flag=0; int on1,nopt1; if ( (on1=option_match(ad_comm::argc,ad_comm::argv,"-pvmtime",nopt1))>-1) { timing_flag=1; } }
/* Create the PE Tasks. We spawn (nPEs-1) pvm threads: the Main Thread (which starts execution and performs IO) is created by forking SysMan */ static int createPEs(int total_nPEs) { int i, spawn_nPEs, iSpawn = 0, nArch, nHost; struct pvmhostinfo *hostp; int sysman_host; spawn_nPEs = total_nPEs-1; if (spawn_nPEs > 0) { IF_PAR_DEBUG(verbose, fprintf(stderr, "==== [%x] Spawning %d PEs(%s) ...\n", sysman_id, spawn_nPEs, petask); fprintf(stderr, " args: "); for (i = 0; pargv[i]; ++i) fprintf(stderr, "%s, ", pargv[i]); fprintf(stderr, "\n")); pvm_config(&nHost,&nArch,&hostp); sysman_host=pvm_tidtohost(sysman_id); /* create PEs on the specific machines in the specified order! */ for (i=0; (iSpawn<spawn_nPEs) && (i<nHost); i++) if (hostp[i].hi_tid != sysman_host) { checkComms(pvm_spawn(petask, pargv, spawn_flag+PvmTaskHost, hostp[i].hi_name, 1, gtids+iSpawn), "SysMan startup"); IF_PAR_DEBUG(verbose, fprintf(stderr, "==== [%x] Spawned PE %d onto %s\n", sysman_id, i, hostp[i].hi_name)); iSpawn++; } /* create additional PEs anywhere you like */ if (iSpawn<spawn_nPEs) { checkComms(pvm_spawn(petask, pargv, spawn_flag, "", spawn_nPEs-iSpawn, gtids+iSpawn), "SysMan startup"); IF_PAR_DEBUG(verbose, fprintf(stderr,"==== [%x] Spawned %d additional PEs anywhere\n", sysman_id, spawn_nPEs-iSpawn)); } }
// Set up the PVM stuff. Register this task then set up all of the slaves. // Return 1 if a problem, 0 if everything went ok. int StartupPVM(const char* prog, PVMData& d) { int i; d.masterid = pvm_mytid(); int nhost, narch; struct pvmhostinfo* hostp; int status = pvm_config(&nhost, &narch, &hostp); if(status == PvmSysErr) { cerr<<"\n" << prog << ": PVM not responding. Have you started the PVM?\n"; return 1; } d.tid = new int [d.nreq]; // task IDs for the slaves d.ntasks = pvm_spawn(SLAVE_NAME, (char**)0, 0, "", d.nreq, d.tid); if(d.ntasks <= 0) { cerr << prog << ": Error spawning slaves.\n"; cerr << " Error codes of failed spawns are:\n"; for(i=0; i<d.nreq; i++) { cerr << " slave "; cerr.width(3); cerr << i << ": " << d.tid[i] << "\n"; } pvm_exit(); return 1; } else if(d.ntasks < d.nreq) { cerr << prog << ": Spawned only "<<d.ntasks<<" of "<<d.nreq<<"\n"; cerr << " Error codes of failed spawns are:\n"; for(i=0; i<d.nreq; i++) { cerr << " slave "; cerr.width(3); cerr << i << ": " << d.tid[i] << "\n"; } } else { cerr << prog << ": Spawned " << d.nreq << " slave processes...\n"; } return 0; }
value Pvm_config (void) { CAMLparam0(); int res,bytes; int nhost,narch; int i,j; struct pvmhostinfo *p; value v; CAMLlocal1(r); r = alloc(3, 0); res=pvm_config(&nhost,&narch,&p); if (res<0) TreatError(res); Store_field (r, 2, alloc_shr(nhost,0)); for (i=0;i<nhost;i++) initialize(&Field(Field(r, 2),i),Val_int(0)); for (i=0;i<nhost;i++) { bytes=strlen(p[i].hi_name); Store_field (r, 0, alloc_string(bytes)); for (j=0;j<bytes;j++) Byte(Field(r, 0),j)=p[i].hi_name[j]; bytes=strlen(p[i].hi_arch); Store_field (r, 1, alloc_string(bytes)); for (j=0;j<bytes;j++) Byte(Field(r, 1),j)=p[i].hi_arch[j]; v=alloc_tuple(3); Store_field(v, 0, Field(r, 0)); Store_field(v, 1, Field(r, 1)); Store_field(v, 2, Val_int(p[i].hi_speed)); modify(&Field(Field(r, 2),i),v); } CAMLreturn(Field(r, 2)); }
void BBSDirect::start() { char* client = 0; int tid, host_mytid; int i, n, ncpu, nncpu; struct pvmhostinfo* hostp; if (started_) { return; } BBSImpl::start(); mytid_ = pvm_mytid(); nrnmpi_myid = 0; if (mytid_ < 0) { perror("start"); } host_mytid = pvm_tidtohost(mytid_); tid = pvm_parent(); if (tid == PvmSysErr) { perror("start"); }else if (tid == PvmNoParent) { is_master_ = true; pvm_catchout(stdout); pvm_setopt(PvmRoute, PvmRouteDirect); pvm_config(&n, NULL, &hostp); nncpu = 0; for (i=0; i < n; ++i) { ncpu = hostp[i].hi_speed; if (ncpu%1000) { hoc_warning(hostp[i].hi_name, " speed in pvm configuration file is not a multiple of 1000. Assuming 1000."); ncpu = 1000; } nncpu += ncpu/1000; } nrnmpi_numprocs = nncpu; ncids = 0; }else{ // a worker, impossible assert(false); } if (nrnmpi_numprocs > 1 && tid == PvmNoParent) { char ** sargv; // args are workingdirectory specialOrNrniv -bbs_nhost nhost args sargv = new char*[nrn_global_argc + 4]; for (i=1; i < nrn_global_argc; ++i) { sargv[i+3] = nrn_global_argv[i]; } sargv[nrn_global_argc + 3] = 0; sargv[0] = rel_working_dir(); //printf("sargv[0]=|%s|\n", sargv[0]); sargv[1] = nrn_global_argv[0]; sargv[2] = "-bbs_nhost"; sargv[3] = new char[10]; sprintf(sargv[3], "%d", nrnmpi_numprocs); cids = new int[nrnmpi_numprocs-1]; if (nrn_global_argv[nrn_global_argc] != 0) { printf("argv not null terminated\n"); exit(1); } BBSDirectServer::server_->start(); bbs_sig_set(); bbs_handle(); //spawn according to number of cpu's (but master has one less) //printf("%d total number of cpus on %d machines\n", nncpu, n); int icid = 0; bool first = true; while (icid < nrnmpi_numprocs - 1) { for (i=0; i < n; ++i) { ncpu = hostp[i].hi_speed; if (ncpu%1000) { ncpu = 1000; } ncpu /= 1000; //printf("%d cpu for machine %d (%s)\n", ncpu, i, hostp[i].hi_name); if (first && hostp[i].hi_tid == host_mytid) { // spawn one fewer on master first time through --ncpu; } if (icid + ncpu >= nrnmpi_numprocs) { ncpu = nrnmpi_numprocs - icid - 1; } //printf("before spawn %d processes (icid=%d) for machine %d (%s)\n", ncpu, icid, i, hostp[i].hi_name); if (ncpu) { ncids = pvm_spawn("bbswork.sh", sargv, PvmTaskHost, hostp[i].hi_name, ncpu, cids + icid); if (ncids != ncpu) { fprintf(stderr, "Tried to spawn %d tasks, only %d succeeded on %s\n", ncpu, ncids, hostp[i].hi_name); hoc_execerror("Could not spawn all the requested tasks for", hostp[i].hi_name); } //printf("spawned %d for %s with cids starting at %d\n", ncpu, hostp[i].hi_name, icid); icid += ncpu; } if (icid >= nrnmpi_numprocs) { break; } } first = false; } ncids = icid; printf("spawned %d more %s on %d cpus on %d machines\n", ncids, nrn_global_argv[0], nncpu, n); delete [] sargv[3]; delete [] sargv; } }
void pvmica(doublereal *data, doublereal *weights, doublereal *sphere, doublereal *eigv, integer chans, integer ncomps, integer frames, integer epochs, int *window, doublereal *bias, integer *signs, char **fnames) { struct pvmhostinfo *hinfo; int i, j, datasize, maxep, maxfr, segs, speed, spwnd = 0, id = 0; int *tids, tid, nhost, narch, nproc = 0, ntask = 0, last = 0; char *name; FILE *fids[3]; integer **srec; doublereal **wrec, **brec, *basedata, *windata, *prjdata, deflr = lrate; tresult result; tassign assign; /* pvm_catchout(stdout);*/ pvm_config(&nhost,&narch,&hinfo); for (i=0 ; i<nhost ; i++) nproc += speed2proc(hinfo[i].hi_speed); basedata = (doublereal*)malloc(ncomps*epochs*window[BASELINE]*sizeof(doublereal)); baseline(data,basedata,ncomps,frames,epochs,window); runica(basedata,weights,ncomps,1,window[BASELINE]*epochs,bias,signs); free(basedata); assign.weights = weights; assign.chans = ncomps; assign.frames = window[FRAMEWINDOW]; assign.epochs = window[EPOCHWINDOW]; assign.bias = (int)(bias!=NULL); assign.signs = (int)(signs!=NULL); assign.extended = extended; assign.extblocks = extblocks; assign.pdfsize = pdfsize; assign.nsub = nsub; assign.verbose = verbose; assign.block = block; assign.maxsteps = maxsteps; assign.lrate = deflr; assign.annealstep = annealstep; assign.annealdeg = annealdeg; assign.nochange = nochange; assign.momentum = momentum; datasize = window[FRAMEWINDOW] * window[EPOCHWINDOW] * ncomps; windata = (doublereal*)malloc(datasize*sizeof(doublereal)); prjdata = (doublereal*)malloc(datasize*sizeof(doublereal)); maxep = (epochs-window[EPOCHWINDOW])/window[EPOCHSTEP] + 1; maxfr = (frames-window[FRAMEWINDOW])/window[FRAMESTEP] + 1; segs = maxep*maxfr; if (segs > nproc) { tids = (int*)malloc(nproc*sizeof(int)); for (i=0 ; i<nhost ; i++) { name = hinfo[i].hi_name; speed = hinfo[i].hi_speed; spwnd = pvm_spawn(SPAWN_ICA,NULL,1,name,speed2proc(speed),&tids[ntask]); if (spwnd <= 0) error("Failed to spawn processes"); ntask += spwnd; } } else { tids = (int*)malloc(segs*sizeof(int)); spwnd += pvm_spawn(SPAWN_ICA,NULL,0,"",segs,tids); if (spwnd <= 0) error("Failed to spawn processes"); ntask += spwnd; } for (i=0,j=0 ; i<ntask ; i++,j+=2,id++) { if (j >= ntask) j=1; assign.id = id; assign.data = extract(data,windata,(int)ncomps,(int)frames,(int)epochs,(int)id,window); send_assign(tids[j],&assign); } wrec = (doublereal**)malloc(segs*sizeof(doublereal*)); brec = (doublereal**)malloc(segs*sizeof(doublereal*)); srec = (integer**)malloc(segs*sizeof(integer*)); for (i=0 ; i<segs ; i++) { wrec[i] = NULL; brec[i] = NULL; srec[i] = NULL; } for (i=0 ; i<3 ; i++) fids[i] = NULL; if (fnames[0] != NULL) fids[0] = fopen(fnames[0],"wb"); if (fnames[1] != NULL) fids[1] = fopen(fnames[1],"wb"); if (fnames[2] != NULL) fids[2] = fopen(fnames[2],"wt"); for (i=0 ; i<segs ; i++) { tid = receive_result(&result); printf("Received id %d\n",result.id); wrec[result.id] = result.weights; brec[result.id] = result.bias; srec[result.id] = result.signs; if (ntask < segs) { assign.id = id; assign.data = extract(data,windata,(int)ncomps,(int)frames,(int)epochs,(int)id,window); send_assign(tid,&assign); ntask++; id++; } else { send_kill(tid); } datasize = window[FRAMEWINDOW] * window[EPOCHWINDOW]; extract(data,windata,(int)ncomps,(int)frames,(int)epochs,(int)result.id,window); geproj(windata,result.weights,(integer)ncomps,(integer)datasize,prjdata); if (eigv) varsort(prjdata,result.weights,sphere,&eigv[chans*(chans-ncomps)],result.bias,result.signs,(integer)ncomps,(integer)datasize,(integer)chans); else varsort(prjdata,result.weights,sphere,NULL,result.bias,result.signs,(integer)ncomps,(integer)datasize,(integer)chans); while (last<segs && wrec[last]!=NULL) { if (fids[0]!=NULL && wrec[last]!=NULL) fbc_matwrite(fids[0],chans*ncomps,wrec[last]); if (fids[1]!=NULL && brec[last]!=NULL) fbc_matwrite(fids[1],ncomps,brec[last]); if (fids[2]!=NULL && srec[last]!=NULL) iac_matwrite(fids[2],ncomps,srec[last]); if (wrec[last] != NULL) free(wrec[last]); if (brec[last] != NULL) free(brec[last]); if (srec[last] != NULL) free(srec[last]); last++; } } for (i=0 ; i<3 ; i++) if (fids[i] != NULL) fclose(fids[i]); if (wrec != NULL) free(wrec); if (brec != NULL) free(brec); if (srec != NULL) free(srec); if (windata != NULL) free(windata); if (prjdata != NULL) free(prjdata); if (tids != NULL) free(tids); }
int main(int argc, char** argv) { int pid = pvm_mytid(); if (argc != 2 && argc != 3) stop("Usage: tm_driver <control_file> [TM_tid]\n"); int tm_tid = 0; char * control_file = strdup(argv[1]); if (argc == 3) sscanf(argv[2], "t%x", &tm_tid); int info = 0; // Get the machine configuration int nhost = 0; int narch = 0; struct pvmhostinfo *hostp = 0; info = pvm_config(&nhost, &narch, &hostp); // Parse the control file int to_delete_size = 0; // # of machsto delete char ** to_delete = 0; // names of machs to delete int to_add_size = 0; // # of machsto add char ** to_add = 0; // names of machs to add int delete_proc_num = 0; // # of procs to delete int * tid_delete = 0; // the tids of procs to delete // # of various procs to start int lp_num = 0; int cg_num = 0; int vg_num = 0; int cp_num = 0; int vp_num = 0; // the mach names where the procs shoud be started char ** lp_mach = 0; char ** cg_mach = 0; char ** vg_mach = 0; char ** cp_mach = 0; char ** vp_mach = 0; // Do the parsing. First count ifstream ctl(control_file); if (!ctl) stop("Cannot open parameter file... Aborting.\n"); // Get the lines of the parameter file one-by-one and if a line contains a // (keyword, value) pair then interpret it. const int MAX_PARAM_LINE_LENGTH = 1024; char line[MAX_PARAM_LINE_LENGTH+1], *end_of_line, *keyword, *value, *ctmp; char ch; while (ctl) { ctl.get(line, MAX_PARAM_LINE_LENGTH); if (ctl) { ctl.get(ch); if (ch != '\n') { printf("Too long (>= %i chars) line in the parameter file.\n", MAX_PARAM_LINE_LENGTH); stop("This is absurd. Aborting.\n"); } } end_of_line = line + strlen(line); //-------------------------- First separate the keyword and value ------ keyword = find_if(line, end_of_line, isgraph); if (keyword == end_of_line) // empty line continue; ctmp = find_if(keyword, end_of_line, isspace); if (ctmp == end_of_line) // line is just one word. must be a comment continue; *ctmp = 0; // terminate the keyword with a 0 character ++ctmp; value = find_if(ctmp, end_of_line, isgraph); if (value == end_of_line) // line is just one word. must be a comment continue; ctmp = find_if(value, end_of_line, isspace); *ctmp = 0; // terminate the value with a 0 character. this is good even // if ctmp == end_ofline if (str_eq(keyword, "BCP_delete_machine")) { ++to_delete_size; } else if (str_eq(keyword, "BCP_add_machine")) { ++to_add_size; } else if (str_eq(keyword, "BCP_delete_proc")) { ++delete_proc_num; } else if (str_eq(keyword, "BCP_lp_process")) { ++lp_num; } else if (str_eq(keyword, "BCP_cg_process")) { ++cg_num; } else if (str_eq(keyword, "BCP_vg_process")) { ++vg_num; } else if (str_eq(keyword, "BCP_cp_process")) { ++cp_num; } else if (str_eq(keyword, "BCP_vp_process")) { ++vp_num; } } ctl.close(); if (to_delete_size > 0) { to_delete = new char*[to_delete_size]; to_delete_size = 0; } if (to_add_size > 0) { to_add = new char*[to_add_size]; to_add_size = 0; } if (delete_proc_num > 0) { tid_delete = new int[delete_proc_num]; delete_proc_num = 0; } if (lp_num) { lp_mach = new char*[lp_num]; lp_num = 0; } if (cg_num) { cg_mach = new char*[cg_num]; cg_num = 0; } if (vg_num) { vg_mach = new char*[vg_num]; vg_num = 0; } if (cp_num) { cp_mach = new char*[cp_num]; cp_num = 0; } if (vp_num) { vp_mach = new char*[vp_num]; vp_num = 0; } ctl.open(control_file); while (ctl) { ctl.get(line, MAX_PARAM_LINE_LENGTH); if (ctl) { ctl.get(ch); if (ch != '\n') { printf("Too long (>= %i chars) line in the parameter file.\n", MAX_PARAM_LINE_LENGTH); stop("This is absurd. Aborting.\n"); } } end_of_line = line + strlen(line); //-------------------------- First separate the keyword and value ------ keyword = find_if(line, end_of_line, isgraph); if (keyword == end_of_line) // empty line continue; ctmp = find_if(keyword, end_of_line, isspace); if (ctmp == end_of_line) // line is just one word. must be a comment continue; *ctmp = 0; // terminate the keyword with a 0 character ++ctmp; value = find_if(ctmp, end_of_line, isgraph); if (value == end_of_line) // line is just one word. must be a comment continue; ctmp = find_if(value, end_of_line, isspace); *ctmp = 0; // terminate the value with a 0 character. this is good even // if ctmp == end_ofline if (str_eq(keyword, "BCP_delete_machine")) { to_delete[to_delete_size++] = strdup(value); } else if (str_eq(keyword, "BCP_add_machine")) { to_add[to_add_size++] = strdup(value); } else if (str_eq(keyword, "BCP_delete_proc")) { sscanf(value, "t%x", &tid_delete[delete_proc_num++]); } else if (str_eq(keyword, "BCP_lp_process")) { lp_mach[lp_num++] = strdup(value); } else if (str_eq(keyword, "BCP_cg_process")) { cg_mach[cg_num++] = strdup(value); } else if (str_eq(keyword, "BCP_vg_process")) { vg_mach[vg_num++] = strdup(value); } else if (str_eq(keyword, "BCP_cp_process")) { cp_mach[cp_num++] = strdup(value); } else if (str_eq(keyword, "BCP_vp_process")) { vp_mach[vp_num++] = strdup(value); } } ctl.close(); // Check that machine deletions and additions are correct char ** last = 0; // Are there duplicates on the to be deleted list ? if (to_delete_size > 0) { sort(to_delete, to_delete + to_delete_size, str_lt); last = unique(to_delete, to_delete + to_delete_size, str_eq); if (to_delete_size != last - to_delete) stop("A machine to be deleted is listed twice... Aborting.\n"); } // Are there duplicates on the to be added list? if (to_add_size > 0) { sort(to_add, to_add + to_add_size, str_lt); last = unique(to_add, to_add + to_add_size, str_eq); if (to_add_size != last - to_add) stop("A machine to be added is listed twice... Aborting.\n"); } int i; char ** mach_list = new char*[nhost + to_add_size]; for (i = 0; i < nhost; ++i) mach_list[i] = strdup(hostp[i].hi_name); sort(mach_list, mach_list + nhost, str_lt); char ** current_list = new char*[nhost + to_add_size]; // Is there a nonexisting machine to be deleted? if (to_delete_size > 0) { last = set_difference(to_delete, to_delete + to_delete_size, mach_list, mach_list + nhost, current_list, str_lt); if (last != current_list) stop("A nonexisting machine is to be deleted... Aborting.\n"); last = set_difference(mach_list, mach_list + nhost, to_delete, to_delete + to_delete_size, current_list, str_lt); ::swap(mach_list, current_list); } // Is there an already existing machine to be added? if (to_add_size > 0) { last = set_intersection(to_add, to_add + to_add_size, mach_list, mach_list + nhost, current_list, str_lt); if (last != current_list) stop("A machine to be added is already there... Aborting.\n"); last = merge(to_add, to_add + to_add_size, mach_list, mach_list + nhost, current_list, str_lt); ::swap(mach_list, current_list); } const int mach_num = nhost - to_delete_size + to_add_size; // Check that the machines the new processes are supposed to be started on // really exist. if (lp_num > 0) { sort(lp_mach, lp_mach + lp_num, str_lt); if (set_difference(lp_mach, lp_mach + lp_num, mach_list, mach_list + mach_num, current_list, str_lt) != current_list) stop("An lp machine is not in the final machine list... Aborting.\n"); } if (cg_num > 0) { sort(cg_mach, cg_mach + cg_num, str_lt); if (set_difference(cg_mach, cg_mach + cg_num, mach_list, mach_list + mach_num, current_list, str_lt) != current_list) stop("An cg machine is not in the final machine list... Aborting.\n"); } if (vg_num > 0) { sort(vg_mach, vg_mach + vg_num, str_lt); if (set_difference(vg_mach, vg_mach + vg_num, mach_list, mach_list + mach_num, current_list, str_lt) != current_list) stop("An vg machine is not in the final machine list... Aborting.\n"); } if (cp_num > 0) { sort(cp_mach, cp_mach + cp_num, str_lt); if (set_difference(cp_mach, cp_mach + cp_num, mach_list, mach_list + mach_num, current_list, str_lt) != current_list) stop("An cp machine is not in the final machine list... Aborting.\n"); } if (vp_num > 0) { sort(vp_mach, vp_mach + vp_num, str_lt); if (set_difference(vp_mach, vp_mach + vp_num, mach_list, mach_list + mach_num, current_list, str_lt) != current_list) stop("An vp machine is not in the final machine list... Aborting.\n"); } // Find the tree manager find_tree_manager(pid, tm_tid); // Check that the TM is not on one of the machines to be deleted. if (to_delete_size > 0) { const int dtid = pvm_tidtohost(tm_tid); for (i = 0; i < nhost; ++i) { if (hostp[i].hi_tid == dtid) for (int j = 0; j < to_delete_size; ++j) { if (str_eq(hostp[i].hi_name, to_delete[j])) stop("Can't delete the machine the TM is on. Aborting.\n"); } } } // Check that the TM is not one of the processes to be deleted if (delete_proc_num > 0) { if (find(tid_delete, tid_delete + delete_proc_num, tm_tid) != tid_delete + delete_proc_num) stop("Can't delete the TM... Aborting.\n"); } // Modify the machine configuration if (to_delete_size > 0 || to_add_size > 0) { int * infos = new int[max(to_delete_size, to_add_size)]; if (to_delete_size > 0) if (pvm_delhosts(to_delete, to_delete_size, infos) < 0) { printf("Failed to delete all specified machines...\n"); stop("Please check the situation manually... Aborting.\n"); } if (to_add_size > 0) if (pvm_addhosts(to_add, to_add_size, infos) < 0) { printf("Failed to add all specified machines...\n"); stop("Please check the situation manually... Aborting.\n"); } } // Kill the processes to be killed for (i = 0; i < delete_proc_num; ++i) pvm_kill(tid_delete[i]); // Put together a message to be sent to the TM that contains the machine // names on which the new processes should be spawned int len = (lp_num + cg_num + vg_num + cp_num + vp_num) * sizeof(int); if (len > 0) { len += 5 * sizeof(int); for (i = 0; i < lp_num; ++i) len += strlen(lp_mach[i]); for (i = 0; i < cg_num; ++i) len += strlen(cg_mach[i]); for (i = 0; i < vg_num; ++i) len += strlen(vg_mach[i]); for (i = 0; i < cp_num; ++i) len += strlen(cp_mach[i]); for (i = 0; i < vp_num; ++i) len += strlen(vp_mach[i]); char * buf = new char[len]; memcpy(buf, &lp_num, sizeof(int)); buf += sizeof(int); for (i = 0; i < lp_num; ++i) { const int l = strlen(lp_mach[i]); memcpy(buf, &l, sizeof(int)); buf += sizeof(int); memcpy(buf, lp_mach[i], l); buf += l; } memcpy(buf, &cg_num, sizeof(int)); buf += sizeof(int); for (i = 0; i < cg_num; ++i) { const int l = strlen(cg_mach[i]); memcpy(buf, &l, sizeof(int)); buf += sizeof(int); memcpy(buf, cg_mach[i], l); buf += l; } memcpy(buf, &vg_num, sizeof(int)); buf += sizeof(int); for (i = 0; i < vg_num; ++i) { const int l = strlen(vg_mach[i]); memcpy(buf, &l, sizeof(int)); buf += sizeof(int); memcpy(buf, vg_mach[i], l); buf += l; } memcpy(buf, &cp_num, sizeof(int)); buf += sizeof(int); for (i = 0; i < cp_num; ++i) { const int l = strlen(cp_mach[i]); memcpy(buf, &l, sizeof(int)); buf += sizeof(int); memcpy(buf, cp_mach[i], l); buf += l; } memcpy(buf, &vp_num, sizeof(int)); buf += sizeof(int); for (i = 0; i < vp_num; ++i) { const int l = strlen(vp_mach[i]); memcpy(buf, &l, sizeof(int)); buf += sizeof(int); memcpy(buf, vp_mach[i], l); buf += l; } buf -= len; pvm_initsend(PvmDataRaw); pvm_pkbyte(buf, len, 1); pvm_send(tm_tid, BCP_CONFIG_CHANGE); int bufid = pvm_recv(tm_tid, -1); int bytes = 0, msgtag = 0; pvm_bufinfo(bufid, &bytes, &msgtag, &tm_tid); if (msgtag == BCP_CONFIG_ERROR) stop("TM had difficulties. Please check the situation manually.\n"); } pvm_exit(); return 0; }
// Hook up to the pvm and spawn the slave processes. int PVMDemeGA::spawn(const char* slavename) { _mid = pvm_mytid(); if(_mid < 0) { cerr << "\n" << className() << ": spawn:\n"; cerr << " Bad ID for master task. Have you started the PVM?\n"; return _status = _mid; } struct pvmhostinfo* hostp; _status = pvm_config(&_nhosts, &_narch, &hostp); if(_status == PvmSysErr) { cerr << "\n" << className() << ": spawn:\n"; cerr << " PVM not responding. Have you started the PVM?\n"; return _status; } _Ntid = npop; _tid = new int [_Ntid]; // task IDs for the slaves char sn[32]; // PVM is not const-safe... strcpy(sn, slavename); _ntid = pvm_spawn(sn, (char**)0, 0, "", _Ntid, _tid); if(_ntid <= 0) { cerr << className() << ": spawn:\n Error spawning slaves.\n"; cerr << " Error codes of failed spawns are:\n"; for(int i=0; i<_Ntid; i++) { cerr << " slave "; cerr.width(3); cerr<<i<<": "<<_tid[i]<<"\n"; } pvm_exit(); return _status = -1; } else if(_ntid < _Ntid) { cerr << className() << ": spawn:\n "; cerr << " Spawned only "<<_ntid<<" of "<<_Ntid<<"\n"; cerr << " Error codes of failed spawns are:\n"; for(int i=0; i<_Ntid; i++) { if(_tid[i] < 0) { cerr << " slave "; cerr.width(3); cerr<<i<<": "<<_tid[i]<<"\n"; } } } else { cerr << className() << ": spawn:\n"; cerr << " Spawned " << _Ntid << " slave processes...\n"; } #ifdef DEBUG cerr << "waiting for response from slaves...\n"; #endif int flag = _ntid; while(flag > 0) { int bufid = pvm_recv(-1, -1); if(bufid >= 0) { int bytes, msgtag, tid; _status = pvm_bufinfo(bufid, &bytes, &msgtag, &tid); int which = tid2idx(tid); switch(msgtag) { case MSG_READY: #ifdef DEBUG cerr << " slave " << tid << " (" << which << ") is alive\n"; #endif flag--; break; default: cerr << className() << ": spawn:\n"; cerr << " unexpected msgtag: " << msgtag << "\n"; break; } } else { cerr << className() << ": spawn:\n"; cerr << " error from pvm_recv: " << bufid << "\n"; } } #ifdef DEBUG cerr << "slaves appear to be up and running.\n"; #endif return _status; }
int main(int argc, char **argv) { int tid; int parent; struct pvmhostinfo *hostp; int nhost, narch; tid = pvm_mytid(); if(tid < 0) pvm_ferror("pvm_mytid", 1); parent = pvm_parent(); if(parent == PvmNoParent || parent == PvmParentNotSet) { /* Processus pere */ int nchildren, children[MAXCHILDREN]; int i, j, res, rc; int bytes, tag, from_tid; /* Ask PVM for information about the virtual machine, and display it to the user. */ pvm_config(&nhost, &narch, &hostp); printf("I found the following %d hosts...\n",nhost); for (i = 0; i < nhost; i++) printf("%d. %s \t(%s)\n",i,hostp[i].hi_name,hostp[i].hi_arch); rc = pvm_spawn("pvm_mandel", NULL, PvmTaskDefault, NULL, NUMCHILDREN, children); if(rc < 0) pvm_ferror("pvm_spawn", 1); printf("%d enfants\n", rc); nchildren = 0; for(i = 0; i < NUMCHILDREN; i++) { printf("Enfant %d, tid = %d\n", i, children[i]); if(children[i] >= 0) nchildren++; if(nchildren < 1) pvm_ferror("Pas d'enfants", 0); } for(i = -MAXX; i <= MAXX; i++) { for(j = -MAXY; j <= MAXY; j++) { rc = pvm_recv(-1,-1); if (rc < 0) { printf("An error occurred when trying to receive a message.\n"); break; } /* Find out who this message is from, and how big it is. */ rc = pvm_bufinfo(rc,&bytes,&tag,&from_tid); /* printf("received message from %s of %d bytes, tag %d\n", get_host_by_tid(hostp,nhost,from_tid), bytes, tag); */ rc = pvm_upkint(&res, 1, 1); if(rc < 0) pvm_ferror("pvm_upkint", 1); cases[i + MAXX][j + MAXY] = res; } } dump_ppm("mandel.ppm", cases); printf("Fini.\n"); pvm_exit(); exit(0); } else if(parent >= 0) { /* On est l'un des fils */ double x, y; int i, j, res, rc; printf("Fils: %d\n", tid); for(i = -MAXX; i <= MAXX; i++) { for(j = -MAXY; j <= MAXY; j++) { x = 2 * i / (double)MAXX; y = 1.5 * j / (double)MAXY; res = mandel(x, y); rc = pvm_initsend(PvmDataDefault); if(rc < 0) pvm_ferror("pvm_initsend", 1); rc = pvm_pkint(&res, 1, 1); if(rc < 0) pvm_ferror("pvm_pkint", 1); rc = pvm_send(parent, 0); if(rc < 0) pvm_ferror("pvm_send", 1); } } printf("Fils %d termine.\n", tid); pvm_exit(); exit(0); } else pvm_ferror("pvm_parent", 1); assert(0); }
int main(int argc, char ** argv){ TAILQ_INIT(&head); int nproc, numt, i, nhost, narch; int num_tasks = 0; struct pvmhostinfo *hostp; struct stat s; FILE * codefile = fopen("./codes", "w"); if(argc != 2){ exit_prog("USAGE: ./taskgen directory\n",1); } else { stat(argv[1], &s); if(s.st_mode & S_IFDIR){ char path[PATH_MAX+1]; realpath(argv[1],path); printf("Loading graphs from %s...", path); fflush(stdout); load_graph_dir(path, &num_tasks); printf("done. Found %d tasks.\n", num_tasks); } else{ printf("%s is not a directory. Please check arguments.\n", argv[1]); exit_prog(NULL, 1); } } pvm_config(&nhost, &narch, &hostp); /*Set number of slaves to start */ nproc = nhost * 3; /*3 processes per host */ printf("Spawning %d worker tasks on %d machines...", nproc, nhost); int tids[nproc]; /*hold the task ids of the workers */ int flags = PvmTaskDefault; // flags += PvmTaskDebug; numt =pvm_spawn("worker", (char**)0, flags, "", nproc, tids); /*start up the workers */ if(numt < nproc){ /*Error Checking */ printf("\n Trouble spawing slaves. Error codes are:\n"); for(i = numt; i < nproc; i++){ printf("TID %d: %d\n", i, tids[i]); } for(i = 0; i < numt; i++){ pvm_kill(tids[i]); } exit_prog("Failure.\n",1); } printf("done. Connected.\n"); /* Main loop */ int bufid, bytes, msgtype, source; int sent_tasks, comp_tasks, found_codes = 0; int finished = 0; int percent_comp = 0; char * buf; while(!finished){ bufid = pvm_recv(-1, -1); /*Accept any message from any task BLOCKING CALL*/ pvm_bufinfo(bufid, &bytes, &msgtype, &source); switch(msgtype){ case MSGREQTASK: if(!TAILQ_EMPTY(&head)){ send_task(source); /*send out the message */ sent_tasks++; } break; case MSGCODE: buf = malloc(bytes); pvm_upkstr(buf); fprintf(codefile, "%s\n", buf); fflush(codefile); comp_tasks++; found_codes++; break; case MSGNOCODE: comp_tasks++; break; default: printf("Incorrect MSGTYPE received from task %d. Received: %d\n", source, msgtype); break; } if(((float)comp_tasks/num_tasks * 100) > percent_comp + 1){ printf("Tasks Complete: %d Tasks Sent: %d Percent Complete: %.2f\n", comp_tasks, sent_tasks, (float)comp_tasks/num_tasks * 100); percent_comp = (float)comp_tasks/num_tasks * 100; } if(comp_tasks == num_tasks){ finished = 1; } } printf("All tasks complete.\n"); exit_prog(NULL, 0); return 0; }