int recvFilterData(FilterData *fData) { int bufId; int i, l, toFilter; char *cwd = (char *)malloc(MAX_CWD_LENGTH+1); int num = 0; int *tids = (int *)malloc(sizeof(int)*MAXINSTANCES); char *filterName = (char *)malloc(MAX_FNAME_LENGTH); char *libName = (char *)malloc(MAX_LNAME_LENGTH); char *hostname = (char *)malloc(MAX_HNAME_LENGTH); char *labelStreamLibname = (char *)malloc(MAX_LNAME_LENGTH); int parentTid = pvm_parent(); #ifdef ATTACH // if im using attach i dont no if the // manager process is realy my father bufId = pvm_recv(-1, 0); int bytes, msgtag,tid; pvm_bufinfo(bufId, &bytes, &msgtag, &tid ); printf("recvFilterData: bytes = %d msgtag = %d tid = %d\n", bytes, msgtag, tid); #else //We receive one message with all data in it bufId = pvm_recv(parentTid, 0); #endif //get the current working directory pvm_upkint(&l, 1, 1); pvm_upkbyte(cwd, l, 1); cwd[l] = '\0'; #ifdef DEBUG printf("cwd = %s\n",cwd); #endif // filter id, useful for debugging pvm_upkint(&num, 1, 1); setFDIdFilter(fData, num); // get my rank pvm_upkint(&num, 1, 1); setFDRank(fData, num); // total number of instances of this filter pvm_upkint(&num, 1, 1); setFDNumInstances(fData, num); // get my brothers tids pvm_upkint(tids, num, 1); setFDTids(fData, tids); #ifdef ATTACH // getting my parent Tid because if i'm a attached filter // i can be adopted for anyone.. so we do it for all filters.. // just to make the code easy to understand.. pvm_upkint(&parentTid, 1, 1); int attached; pvm_upkint(&attached, 1, 1); setFDAttached(fData, attached); #endif // if you are not using Attach the parentTid still the same // or its equal to pvm_parent() setFDParentTid(fData, parentTid); #ifdef BMI_FT int faultStatus, lastFilter; pvm_upkint(&faultStatus, 1, 1); setFDFaultStatus(fData, faultStatus); pvm_upkint(&lastFilter, 1, 1); setFDLastFilter(fData, lastFilter); #endif // filtername pvm_upkint(&l, 1, 1); pvm_upkbyte(filterName, l, 1); filterName[l] = '\0'; setFDName(fData, filterName); //machine declared memory: -1 autodetect, declared on XML pvm_upkint(&num, 1, 1); setFDMachineMem(fd, num); //number of brothers(+ me) I have on this machine, useful for memory management pvm_upkint(&num, 1, 1); setFDNumLocalInstances(fd, num); #ifdef VOID_INST char instDir[MAX_IDIR_LENGTH]; pvm_upkint(&l, 1, 1); pvm_upkbyte(instDir, l, 1); instDir[l] = '\0'; setFDInstDir(fd, instDir); #endif // receives shared lib name pvm_upkint(&l, 1, 1); pvm_upkbyte(libName, l, 1); libName[l] = '\0'; setFDLibName(fData, libName); /*if (loadFDLibFunctions(fData) == -1){ char msg[1000]; sprintf(msg, "could not load shared library %s", libName); pvm_initsend(PvmDataRaw); pvm_pkbyte(msg, strlen(msg), 1); pvm_send(pvm_parent(), MSGT_FERROR); return -1; }*/ // set hostname gethostname(hostname, MAX_HNAME_LENGTH); setFDHostName(fData, hostname); // data received till now fprintf(stderr,"filter %s (rank: %d): pvm_tid:%d hostname:%s\n", fData->name, fData->myRank, pvm_mytid(), fData->hostName); //port data //Receive numOutputs pvm_upkint(&num, 1, 1); setFDNumOutputs(fData, num); //receive numInputs pvm_upkint(&num, 1, 1); setFDNumInputs(fData, num); // for each OutputPort for(i = 0; i < fData->numOutputPorts; i++) { int nOutHosts = 0, tag = 0; int numToSend; int *outTids = NULL; int firstInstanceToWrite; char *portName = (char *)malloc(MAX_PTNAME_LENGTH + 1); char *writePolicyName = (char *)malloc(100); writePolicy_t wp; OutputPort *outputPort; pvm_upkint(&numToSend, 1, 1); //now we can create the port outputPort = createOutputPort(numToSend); //port data pvm_upkint(&l, 1, 1); pvm_upkbyte(portName, l, 1); //portname portName[l] = '\0'; setOPName(outputPort, portName); pvm_upkint(&tag, 1, 1); //get tag setOPTag(outputPort, tag); for( toFilter = 0; toFilter < numToSend; toFilter++ ) { pvm_upkint(&nOutHosts, 1, 1); //number of tids it is connected setOPNumDestinations(outputPort, toFilter, nOutHosts); outTids = (int *) malloc(sizeof(int)*nOutHosts); pvm_upkint(outTids, nOutHosts, 1); //get tids setOPTidsDestinations(outputPort, toFilter, outTids); pvm_upkint(&l, 1, 1); pvm_upkbyte(writePolicyName, l, 1); // get write policy writePolicyName[l] = '\0'; wp = getWritePolicyByName(writePolicyName); setOPWritePolicy(outputPort, toFilter, wp); // get LS sharedlib if policy is LS if (wp == LABELED_STREAM ){ pvm_upkint(&l, 1, 1); pvm_upkbyte(labelStreamLibname, l, 1); labelStreamLibname[l] = '\0'; //set output port library name setOPLibName(outputPort, toFilter, labelStreamLibname); //load output port library for ls if (loadOPLSData(outputPort, toFilter) == -1 ){ char msg[1000]; sprintf(msg, "could not load LS shared library %s", labelStreamLibname); pvm_initsend(PvmDataRaw); pvm_pkbyte(msg, strlen(msg), 1); pvm_send(pvm_parent(), MSGT_FERROR); return -1; } } else if (wp == MULTICAST_LABELED_STREAM) { pvm_upkint(&l, 1, 1); pvm_upkbyte(labelStreamLibname, l, 1); labelStreamLibname[l] = '\0'; //set output port library name setOPLibName(outputPort, toFilter, labelStreamLibname); //load output port library for ls if (loadOPMLSData(outputPort, toFilter) == -1){ char msg[1000]; sprintf(msg,"could not load MLS shared library %s", labelStreamLibname); pvm_initsend(PvmDataRaw); pvm_pkbyte(msg, strlen(msg), 1); pvm_send(pvm_parent(), MSGT_FERROR); return -1; } } else { //if not LS, we needa know who will be the first instance to receive msgs pvm_upkint(&firstInstanceToWrite, 1, 1); //the first instance to write setOPNextToSend(outputPort, toFilter, firstInstanceToWrite); } } // and we finally add the port to our filterData structure addFDOutputPort(fData, outputPort); //free pointers free(outTids); free(portName); free(writePolicyName); } // foreach InputPort for(i = 0; i < fData->numInputPorts; i++) { int nInHosts = 0; int *inTids = NULL; int inTag; char portName[MAX_PTNAME_LENGTH + 1]; int l; InputPort *inputPort = createInputPort(); //get the portName pvm_upkint(&l, 1, 1); pvm_upkbyte(portName, l, 1); portName[l] = '\0'; setIPName(inputPort, portName); // receive the number of tids of this port pvm_upkint(&nInHosts, 1, 1); // number of instances connected to this port setIPNumSources(inputPort, nInHosts); // get the tids inTids = (int *) malloc(sizeof(int)*nInHosts); pvm_upkint(inTids, nInHosts, 1); //get tids setIPTidsSources(inputPort, inTids); free(inTids); pvm_upkint(&inTag, 1, 1); //the port tag setIPTag(inputPort, inTag); int hasLabel; pvm_upkint(&hasLabel, 1, 1); toFilter = 0; // if it receives from a LS or MLS we have to have the library with // the function that extracts the label if (hasLabel){ setIPLS(inputPort, 1); pvm_upkint(&l, 1, 1); pvm_upkbyte(labelStreamLibname, l, 1); labelStreamLibname[l] = '\0'; //set output port library name setIPLibName(inputPort, toFilter, labelStreamLibname); //load output port library for ls if (loadIPLSData(inputPort, toFilter) == -1 ){ char msg[1000]; sprintf(msg, "could not load LS shared library %s", labelStreamLibname); pvm_initsend(PvmDataRaw); pvm_pkbyte(msg, strlen(msg), 1); pvm_send(pvm_parent(), MSGT_FERROR); return -1; } } //finally add the port to our filterData addFDInputPort(fData, inputPort); } free(tids); free(hostname); free(filterName); free(libName); free(labelStreamLibname); return 1; }
// This population evaluator is the administrator for the parallelization. // It looks around to see when slaves are available to evaluate a genome. As // soon as a slave is available and a genome needs to be evaluated, this // routine sends it off. When a slave is finished, it posts a message to // say so and this routine gets the message and grabs the results from the // slave that posted the message. // An index of -1 means that the slave has no assignment. The first int in // the stream of stuff is always the ID of the slave (0-nslaves) that is // sending the information. After that it is either nothing (the slave just // reported that it is ready for another genome) or it is a float (the score // of the genome that was assigned to the slave). void PopulationEvaluator(GAPopulation& pop) { PVMDataPtr data = (PVMDataPtr)pop.userData(); int* index = new int [data->nreq]; int done = 0, outstanding = 0, next = 0; int bufid, status, bytes, msgtag, tid, who; while(!done) { // If we have a genome that needs to be evaluated and one of the slaves is // ready to evaluate it, send the genome to the slave. if(next < pop.size() && (bufid=pvm_nrecv(-1, MSG_READY)) != 0) { if(bufid > 0) { pvm_bufinfo(bufid, &bytes, &msgtag, &tid); status = SendGenomeData(pop.individual(next), tid); if(status >= 0) { if((who = id2idx(tid, *data)) >= 0) { index[who] = next; next++; outstanding++; } else { cerr << "PopEval: bogus tid mapping: " << tid << "\n"; } } else { cerr << "PopEval: error sending data to: " << tid; cerr << " error code is: " << status << "\n"; } } else { cerr << "PopEval: error from pvm_nrecv: " << bufid << "\n"; } } // If we have any genomes waiting for their evaluation and any slaves have // posted a message stating that they have a finished score ready for us, get // the score from the slave and stuff it into the appropriate genome. if(outstanding > 0 && (bufid=pvm_nrecv(-1, MSG_GENOME_SCORE)) != 0) { if(bufid > 0) { pvm_bufinfo(bufid, &bytes, &msgtag, &tid); if((who = id2idx(tid, *data)) >= 0) { if(index[who] >= 0) { status = RecvGenomeScore(pop.individual(index[who])); if(status >= 0) { index[who] = -1; outstanding--; } else { cerr << "PopEval: error receiving score from: " << tid; cerr << " error code is: " << status << "\n"; } } else { cerr << "PopEval: index conflict from tid " << tid << "\n"; } } else { cerr << "PopEval: bogus tid mapping: " << tid << "\n"; } } else { cerr << "PopEval: error from pvm_nrecv: " << bufid << "\n"; } } if(next == pop.size() && outstanding == 0) done = 1; if(next > pop.size()) { cerr << "bogus value for next: " << next; cerr << " popsize is: " << pop.size() << "\n"; } } delete [] index; }
int main(int argc, char** argv){ int parent_tid = pvm_parent(); int my_tid = pvm_mytid(); DEBUGA("[TID %d] Slave starting ...",my_tid); /* Checking number of arguments */ if(argc<=3){ printf("Error in arguments"); exit(1); } /* Getting password from arguments */ int number_of_threads = atoi(argv[3]); int posi = atoi(argv[4]); int pass_size = atoi(argv[1]); char * password = malloc(sizeof(char)*(pass_size+1)); password[0] = '\0'; strncat(password,argv[2],pass_size); password[pass_size] = '\0'; DEBUGA("[TID %d] Password %s Size %d",my_tid,password,pass_size); char * temp_password = malloc(sizeof(char)*(pass_size+1)); char * interval_start = malloc(sizeof(char)*(pass_size+1)); char * interval_end = malloc(sizeof(char)*(pass_size+1)); char * recv_buffer = malloc(sizeof(char)*MAX_SIZE); recv_buffer[0] = '\0'; unsigned long long interval_size; int buf_size,mes_tag,mes_tid,buf_id; /* Ask parent for new interval */ pvm_initsend(PvmDataDefault); pvm_pkint(&my_tid,1,1); pvm_send(parent_tid,NEED_INTERVAL); buf_id = pvm_recv(parent_tid,-1); pvm_bufinfo(buf_id,&buf_size,&mes_tag,&mes_tid); if(mes_tag == NEW_INTERVAL){ pvm_upkstr(recv_buffer); } else if(mes_tag == NO_NEW_INTERVAL){ exit(1); } else{ exit(1); } /* Extract data from buffer */ interval_start[0] = '\0'; interval_end[0] = '\0'; temp_password[0]='\0'; strncpy(interval_start,recv_buffer,pass_size); interval_start[pass_size] = '\0'; strncpy(interval_end,recv_buffer+pass_size,pass_size); interval_end[pass_size] = '\0'; pthread_mutex_lock(&mutex_i_manager); init_interval_manager(&i_manager, interval_start, interval_end, password, posi); pthread_mutex_unlock(&mutex_i_manager); /* Creating threads ... */ pthread_t * threads = malloc(sizeof(pthread_t)*number_of_threads); pthread_attr_t attr; int c_thread = 0; for(c_thread = 0; c_thread < number_of_threads ; ++c_thread){ pthread_create(&threads[c_thread],&attr,find_password_thread,(void*)NULL); } while(1){ pthread_mutex_lock(&mutex_action); pthread_mutex_unlock(&mutex_slave_waiting); pthread_cond_wait(&cond_action,&mutex_action); pthread_mutex_lock(&mutex_slave_waiting); if(action == NEW_INTERVAL){ pvm_initsend(PvmDataDefault); pvm_pkint(&my_tid,1,1); pvm_send(parent_tid,NEED_INTERVAL); buf_id = pvm_recv(parent_tid,-1); pvm_bufinfo(buf_id,&buf_size,&mes_tag,&mes_tid); if(mes_tag == NEW_INTERVAL){ pvm_upkstr(recv_buffer); interval_start[0] = '\0'; interval_end[0] = '\0'; strncpy(interval_start,recv_buffer,pass_size); interval_start[pass_size] = '\0'; strncpy(interval_end,recv_buffer+pass_size,pass_size); interval_end[pass_size] = '\0'; DEBUGA("New interval start interval : %s end interval : %s",interval_start,interval_end); pthread_mutex_lock(&mutex_i_manager); set_new_interval(&i_manager,interval_start,interval_end); pthread_mutex_unlock(&mutex_i_manager); } else if(mes_tag == NO_NEW_INTERVAL){ } else{ } } else if (action == FOUND_PASSWORD){ pthread_mutex_lock(&mutex_f_password); pthread_mutex_lock(&mutex_f_c_password); DEBUGA("[TID %d] Found Password",my_tid); pvm_initsend( PvmDataDefault ); pvm_pkstr(found_char_password); pvm_send(parent_tid,FOUND_PASSWORD); pthread_mutex_unlock(&mutex_f_password); pthread_mutex_unlock(&mutex_f_c_password); break; } pthread_mutex_unlock(&mutex_action); } pvm_send(parent_tid,CHILD_TERMINATE); DEBUGA("[TID %d] Terminating slave",my_tid); free(password); free(temp_password); free(interval_start); free(interval_end); free(recv_buffer); return EXIT_SUCCESS; }
/*--------------------------------------------------------------------------- ** MBUSRECV -- Receive a message from another application. We are normally ** called from a program i/o handler while waiting for messages. */ int mbusRecv (int *from_tid, int *to_tid, int *subject, char **host, char **msg) { int bufid, info; int tid = *to_tid; int tag = *subject; int nbytes=0, type=0, source=0; int host_len, msg_len, get_ack=0; char dummy; if (MB_DEBUG) printf("mbRecv: tid = %d tag = %d\n", tid, tag); /* On entry, to_tid/subject may be specified as '-1' to indicate that we * will accept a message from any host for any reason. We unpack the * information from the sender as part of the message and fill it in * on the way out. The caller must remember to reset this value! * * The host/msg pointers may be allocated here and will contain the * data from the message. The caller is responsible for freeing * these pointers when it's done with them, passing in a static array * will segfault. if ((bufid = pvm_nrecv (-1, -1)) < 0) { */ if ((bufid = pvm_recv (tid, tag)) < 0) { switch (bufid) { case PvmBadParam: fprintf (stderr, "mbRecv: %d fails, bad tid/msgtag\n", tid); return (ERR); case PvmSysErr: fprintf (stderr, "mbRecv: %d fails, pvmd not responding\n", tid); return (ERR); } } if (USE_ACK) { info = pvm_upkint (&get_ack, 1, 1); /* Ack required? */ } else { get_ack = 0; info = pvm_upkint (&get_ack, 1, 1); /* Ack required? */ } info = pvm_upkint (from_tid, 1, 1); /* sender */ info = pvm_upkint (to_tid, 1, 1); /* target recipient */ info = pvm_upkint (subject, 1, 1); /* subject */ info = pvm_upkint (&host_len, 1, 1); /* len of host name */ if (host_len > 0) { /* host name (optional) */ *host = calloc (1, host_len); info = pvm_upkbyte (*host, host_len-1, 1); info = pvm_upkbyte (&dummy, 1, 1); } else *host = NULL; info = pvm_upkint (&msg_len, 1, 1); /* len of msg body */ if (msg_len > 0) { /* msg body */ *msg = calloc (1, msg_len); info = pvm_upkbyte (*msg, msg_len-1, 1); info = pvm_upkbyte (&dummy, 1, 1); } else *msg = NULL; if ((info = pvm_bufinfo (bufid, &nbytes, &type, &source)) >= 0) { if (MB_DEBUG) { printf ("\nrecv: %d bytes from %d about %d\n", nbytes,source,type); printf( "mbRecv(%d): from:%d to:%d subj:%d host:'%s'(%d) msg='%s'(%d)\n", get_ack, *from_tid, *to_tid, *subject, *host, host_len, *msg, msg_len); } } /* Return ACK to the sender if requested. */ return (get_ack ? mbusAck (source, type) : OK); }
// The population initializer invokes the genomes' initializers just like the // standard population initializer, but here we farm out the genomes to the // slaves before invoking the initialization. Farm out the genomes and give // the slaves the initialize command rather than the evaluate command. void PopulationInitializer(GAPopulation& pop) { PVMDataPtr data = (PVMDataPtr)pop.userData(); int* index = new int [data->nreq]; int done = 0, outstanding = 0, next = 0; int bufid, status, bytes, msgtag, tid, who; while(!done) { // If we have a genome that needs to be initialized and one of the slaves is // available, then ask the slave to configure a genome and send us back the // configured, initialized genome. if(next < pop.size() && (bufid=pvm_nrecv(-1, MSG_READY)) != 0) { if(bufid > 0) { status = pvm_bufinfo(bufid, &bytes, &msgtag, &tid); status = SendGenomeInitialize(pop.individual(next), tid); if(status >= 0) { if((who = id2idx(tid, *data)) >= 0) { index[who] = next; next++; outstanding++; } else { cerr << "PopInit: bogus tid mapping: " << tid << "\n"; } } else { cerr << "PopInit: error sending initialize command to: " << tid; cerr << " genome " << next << " will be inited by next slave\n"; cerr << " error code is: " << status << "\n"; } } else { cerr << "PopInit: error from pvm_nrecv: " << bufid << "\n"; } } // If we have requests for initialization outstanding and a slave has posted // a message stating that it will provide genome data, then get the data from // the slave and stuff it into the appropriate genome in the population. if(outstanding > 0 && (bufid=pvm_nrecv(-1, MSG_GENOME_DATA)) != 0) { if(bufid > 0) { status = pvm_bufinfo(bufid, &bytes, &msgtag, &tid); if((who = id2idx(tid, *data)) >= 0) { if(index[who] >= 0) { status = RecvGenomeData(pop.individual(index[who])); if(status >= 0) { index[who] = -1; outstanding--; } else { cerr << "PopInit: error receiving data from: " << tid; cerr << " error code is: " << status << "\n"; } } else { cerr << "PopInit: index conflict from tid " << tid << "\n"; } } else { cerr << "PopInit: bogus tid mapping: " << tid << "\n"; } } else { cerr << "PopInit: error from pvm_nrecv: " << bufid << "\n"; } } if(next == pop.size() && outstanding == 0) done = 1; if(next > pop.size()) { cerr << "bogus value for next: " << next; cerr << " popsize is: " << pop.size() << "\n"; } } delete [] index; }
int main(int argc, char** argv) { int pid = pvm_mytid(); if (argc != 2 && argc != 3) stop("Usage: tm_driver <control_file> [TM_tid]\n"); int tm_tid = 0; char * control_file = strdup(argv[1]); if (argc == 3) sscanf(argv[2], "t%x", &tm_tid); int info = 0; // Get the machine configuration int nhost = 0; int narch = 0; struct pvmhostinfo *hostp = 0; info = pvm_config(&nhost, &narch, &hostp); // Parse the control file int to_delete_size = 0; // # of machsto delete char ** to_delete = 0; // names of machs to delete int to_add_size = 0; // # of machsto add char ** to_add = 0; // names of machs to add int delete_proc_num = 0; // # of procs to delete int * tid_delete = 0; // the tids of procs to delete // # of various procs to start int lp_num = 0; int cg_num = 0; int vg_num = 0; int cp_num = 0; int vp_num = 0; // the mach names where the procs shoud be started char ** lp_mach = 0; char ** cg_mach = 0; char ** vg_mach = 0; char ** cp_mach = 0; char ** vp_mach = 0; // Do the parsing. First count ifstream ctl(control_file); if (!ctl) stop("Cannot open parameter file... Aborting.\n"); // Get the lines of the parameter file one-by-one and if a line contains a // (keyword, value) pair then interpret it. const int MAX_PARAM_LINE_LENGTH = 1024; char line[MAX_PARAM_LINE_LENGTH+1], *end_of_line, *keyword, *value, *ctmp; char ch; while (ctl) { ctl.get(line, MAX_PARAM_LINE_LENGTH); if (ctl) { ctl.get(ch); if (ch != '\n') { printf("Too long (>= %i chars) line in the parameter file.\n", MAX_PARAM_LINE_LENGTH); stop("This is absurd. Aborting.\n"); } } end_of_line = line + strlen(line); //-------------------------- First separate the keyword and value ------ keyword = find_if(line, end_of_line, isgraph); if (keyword == end_of_line) // empty line continue; ctmp = find_if(keyword, end_of_line, isspace); if (ctmp == end_of_line) // line is just one word. must be a comment continue; *ctmp = 0; // terminate the keyword with a 0 character ++ctmp; value = find_if(ctmp, end_of_line, isgraph); if (value == end_of_line) // line is just one word. must be a comment continue; ctmp = find_if(value, end_of_line, isspace); *ctmp = 0; // terminate the value with a 0 character. this is good even // if ctmp == end_ofline if (str_eq(keyword, "BCP_delete_machine")) { ++to_delete_size; } else if (str_eq(keyword, "BCP_add_machine")) { ++to_add_size; } else if (str_eq(keyword, "BCP_delete_proc")) { ++delete_proc_num; } else if (str_eq(keyword, "BCP_lp_process")) { ++lp_num; } else if (str_eq(keyword, "BCP_cg_process")) { ++cg_num; } else if (str_eq(keyword, "BCP_vg_process")) { ++vg_num; } else if (str_eq(keyword, "BCP_cp_process")) { ++cp_num; } else if (str_eq(keyword, "BCP_vp_process")) { ++vp_num; } } ctl.close(); if (to_delete_size > 0) { to_delete = new char*[to_delete_size]; to_delete_size = 0; } if (to_add_size > 0) { to_add = new char*[to_add_size]; to_add_size = 0; } if (delete_proc_num > 0) { tid_delete = new int[delete_proc_num]; delete_proc_num = 0; } if (lp_num) { lp_mach = new char*[lp_num]; lp_num = 0; } if (cg_num) { cg_mach = new char*[cg_num]; cg_num = 0; } if (vg_num) { vg_mach = new char*[vg_num]; vg_num = 0; } if (cp_num) { cp_mach = new char*[cp_num]; cp_num = 0; } if (vp_num) { vp_mach = new char*[vp_num]; vp_num = 0; } ctl.open(control_file); while (ctl) { ctl.get(line, MAX_PARAM_LINE_LENGTH); if (ctl) { ctl.get(ch); if (ch != '\n') { printf("Too long (>= %i chars) line in the parameter file.\n", MAX_PARAM_LINE_LENGTH); stop("This is absurd. Aborting.\n"); } } end_of_line = line + strlen(line); //-------------------------- First separate the keyword and value ------ keyword = find_if(line, end_of_line, isgraph); if (keyword == end_of_line) // empty line continue; ctmp = find_if(keyword, end_of_line, isspace); if (ctmp == end_of_line) // line is just one word. must be a comment continue; *ctmp = 0; // terminate the keyword with a 0 character ++ctmp; value = find_if(ctmp, end_of_line, isgraph); if (value == end_of_line) // line is just one word. must be a comment continue; ctmp = find_if(value, end_of_line, isspace); *ctmp = 0; // terminate the value with a 0 character. this is good even // if ctmp == end_ofline if (str_eq(keyword, "BCP_delete_machine")) { to_delete[to_delete_size++] = strdup(value); } else if (str_eq(keyword, "BCP_add_machine")) { to_add[to_add_size++] = strdup(value); } else if (str_eq(keyword, "BCP_delete_proc")) { sscanf(value, "t%x", &tid_delete[delete_proc_num++]); } else if (str_eq(keyword, "BCP_lp_process")) { lp_mach[lp_num++] = strdup(value); } else if (str_eq(keyword, "BCP_cg_process")) { cg_mach[cg_num++] = strdup(value); } else if (str_eq(keyword, "BCP_vg_process")) { vg_mach[vg_num++] = strdup(value); } else if (str_eq(keyword, "BCP_cp_process")) { cp_mach[cp_num++] = strdup(value); } else if (str_eq(keyword, "BCP_vp_process")) { vp_mach[vp_num++] = strdup(value); } } ctl.close(); // Check that machine deletions and additions are correct char ** last = 0; // Are there duplicates on the to be deleted list ? if (to_delete_size > 0) { sort(to_delete, to_delete + to_delete_size, str_lt); last = unique(to_delete, to_delete + to_delete_size, str_eq); if (to_delete_size != last - to_delete) stop("A machine to be deleted is listed twice... Aborting.\n"); } // Are there duplicates on the to be added list? if (to_add_size > 0) { sort(to_add, to_add + to_add_size, str_lt); last = unique(to_add, to_add + to_add_size, str_eq); if (to_add_size != last - to_add) stop("A machine to be added is listed twice... Aborting.\n"); } int i; char ** mach_list = new char*[nhost + to_add_size]; for (i = 0; i < nhost; ++i) mach_list[i] = strdup(hostp[i].hi_name); sort(mach_list, mach_list + nhost, str_lt); char ** current_list = new char*[nhost + to_add_size]; // Is there a nonexisting machine to be deleted? if (to_delete_size > 0) { last = set_difference(to_delete, to_delete + to_delete_size, mach_list, mach_list + nhost, current_list, str_lt); if (last != current_list) stop("A nonexisting machine is to be deleted... Aborting.\n"); last = set_difference(mach_list, mach_list + nhost, to_delete, to_delete + to_delete_size, current_list, str_lt); ::swap(mach_list, current_list); } // Is there an already existing machine to be added? if (to_add_size > 0) { last = set_intersection(to_add, to_add + to_add_size, mach_list, mach_list + nhost, current_list, str_lt); if (last != current_list) stop("A machine to be added is already there... Aborting.\n"); last = merge(to_add, to_add + to_add_size, mach_list, mach_list + nhost, current_list, str_lt); ::swap(mach_list, current_list); } const int mach_num = nhost - to_delete_size + to_add_size; // Check that the machines the new processes are supposed to be started on // really exist. if (lp_num > 0) { sort(lp_mach, lp_mach + lp_num, str_lt); if (set_difference(lp_mach, lp_mach + lp_num, mach_list, mach_list + mach_num, current_list, str_lt) != current_list) stop("An lp machine is not in the final machine list... Aborting.\n"); } if (cg_num > 0) { sort(cg_mach, cg_mach + cg_num, str_lt); if (set_difference(cg_mach, cg_mach + cg_num, mach_list, mach_list + mach_num, current_list, str_lt) != current_list) stop("An cg machine is not in the final machine list... Aborting.\n"); } if (vg_num > 0) { sort(vg_mach, vg_mach + vg_num, str_lt); if (set_difference(vg_mach, vg_mach + vg_num, mach_list, mach_list + mach_num, current_list, str_lt) != current_list) stop("An vg machine is not in the final machine list... Aborting.\n"); } if (cp_num > 0) { sort(cp_mach, cp_mach + cp_num, str_lt); if (set_difference(cp_mach, cp_mach + cp_num, mach_list, mach_list + mach_num, current_list, str_lt) != current_list) stop("An cp machine is not in the final machine list... Aborting.\n"); } if (vp_num > 0) { sort(vp_mach, vp_mach + vp_num, str_lt); if (set_difference(vp_mach, vp_mach + vp_num, mach_list, mach_list + mach_num, current_list, str_lt) != current_list) stop("An vp machine is not in the final machine list... Aborting.\n"); } // Find the tree manager find_tree_manager(pid, tm_tid); // Check that the TM is not on one of the machines to be deleted. if (to_delete_size > 0) { const int dtid = pvm_tidtohost(tm_tid); for (i = 0; i < nhost; ++i) { if (hostp[i].hi_tid == dtid) for (int j = 0; j < to_delete_size; ++j) { if (str_eq(hostp[i].hi_name, to_delete[j])) stop("Can't delete the machine the TM is on. Aborting.\n"); } } } // Check that the TM is not one of the processes to be deleted if (delete_proc_num > 0) { if (find(tid_delete, tid_delete + delete_proc_num, tm_tid) != tid_delete + delete_proc_num) stop("Can't delete the TM... Aborting.\n"); } // Modify the machine configuration if (to_delete_size > 0 || to_add_size > 0) { int * infos = new int[max(to_delete_size, to_add_size)]; if (to_delete_size > 0) if (pvm_delhosts(to_delete, to_delete_size, infos) < 0) { printf("Failed to delete all specified machines...\n"); stop("Please check the situation manually... Aborting.\n"); } if (to_add_size > 0) if (pvm_addhosts(to_add, to_add_size, infos) < 0) { printf("Failed to add all specified machines...\n"); stop("Please check the situation manually... Aborting.\n"); } } // Kill the processes to be killed for (i = 0; i < delete_proc_num; ++i) pvm_kill(tid_delete[i]); // Put together a message to be sent to the TM that contains the machine // names on which the new processes should be spawned int len = (lp_num + cg_num + vg_num + cp_num + vp_num) * sizeof(int); if (len > 0) { len += 5 * sizeof(int); for (i = 0; i < lp_num; ++i) len += strlen(lp_mach[i]); for (i = 0; i < cg_num; ++i) len += strlen(cg_mach[i]); for (i = 0; i < vg_num; ++i) len += strlen(vg_mach[i]); for (i = 0; i < cp_num; ++i) len += strlen(cp_mach[i]); for (i = 0; i < vp_num; ++i) len += strlen(vp_mach[i]); char * buf = new char[len]; memcpy(buf, &lp_num, sizeof(int)); buf += sizeof(int); for (i = 0; i < lp_num; ++i) { const int l = strlen(lp_mach[i]); memcpy(buf, &l, sizeof(int)); buf += sizeof(int); memcpy(buf, lp_mach[i], l); buf += l; } memcpy(buf, &cg_num, sizeof(int)); buf += sizeof(int); for (i = 0; i < cg_num; ++i) { const int l = strlen(cg_mach[i]); memcpy(buf, &l, sizeof(int)); buf += sizeof(int); memcpy(buf, cg_mach[i], l); buf += l; } memcpy(buf, &vg_num, sizeof(int)); buf += sizeof(int); for (i = 0; i < vg_num; ++i) { const int l = strlen(vg_mach[i]); memcpy(buf, &l, sizeof(int)); buf += sizeof(int); memcpy(buf, vg_mach[i], l); buf += l; } memcpy(buf, &cp_num, sizeof(int)); buf += sizeof(int); for (i = 0; i < cp_num; ++i) { const int l = strlen(cp_mach[i]); memcpy(buf, &l, sizeof(int)); buf += sizeof(int); memcpy(buf, cp_mach[i], l); buf += l; } memcpy(buf, &vp_num, sizeof(int)); buf += sizeof(int); for (i = 0; i < vp_num; ++i) { const int l = strlen(vp_mach[i]); memcpy(buf, &l, sizeof(int)); buf += sizeof(int); memcpy(buf, vp_mach[i], l); buf += l; } buf -= len; pvm_initsend(PvmDataRaw); pvm_pkbyte(buf, len, 1); pvm_send(tm_tid, BCP_CONFIG_CHANGE); int bufid = pvm_recv(tm_tid, -1); int bytes = 0, msgtag = 0; pvm_bufinfo(bufid, &bytes, &msgtag, &tm_tid); if (msgtag == BCP_CONFIG_ERROR) stop("TM had difficulties. Please check the situation manually.\n"); } pvm_exit(); return 0; }
/// user function:add a new query to a pipeline of filters. Called by user manager runs this. /// \param layout System Layout. /// \param work Buffer with a Unit of Work (UoW) /// \param workSize Unit of Work Size (UoW) /// \return Zero on success, -1 on error. int appendWork(Layout *layout, void *work, int workSize) { #ifdef NO_BARRIER // sends work for each filter pvm_initsend(PvmDataRaw); // First tell that is a mensage of WORK int msgType = MSGT_WORK; pvm_pkint(&msgType, 1, 1); //then attach the work to it pvm_pkbyte((char *)work, workSize, 1); // for each filter, send his work for(i = 0; i < layout->numFilters; i++) { FilterPlacement *pFilterP = &(layout->filters[i]->filterPlacement); // sends work to all filters of this set pvm_mcast(pFilterP->tids, pFilterP->numInstances, 0); } #else int i; int totalEows = 0, numEowsReceived; #ifdef ATTACH int totalAttachedFilters = 0; #endif int reconf = 0 /** should we reconfigure? */, remainingReconfs = 0; //how many times should we try? #ifdef VOID_INST #ifdef BMI_FT char instDir[MAX_IDIR_LENGTH]; sprintf(instDir, "%s/", INST_DIR); for (i=0; i < layout->numFilters-1; i++) { if (strlen(instDir) >= (MAX_IDIR_LENGTH - 3)) { //dont want to overflow this array fprintf(stderr, "%s %d: warning, instrumentation directory name too big, truncating to %s\n", __FILE__, __LINE__, instDir); break; } sprintf(instDir, "%s%d-", instDir, layout->filters[i]->filterPlacement.numInstances); } sprintf(instDir, "%s%d", instDir, layout->filters[layout->numFilters-1]->filterPlacement.numInstances); char managerLogFile[MAX_IDIR_LENGTH+20]; sprintf(managerLogFile, "%s/manager.time", instDir); FILE *fp = fopen(managerLogFile, "w"); struct timeval tv; struct timezone tz; //not used //get the time gettimeofday(&tv, &tz); if(fp != NULL) fprintf(fp, "1 %ld %ld\n", tv.tv_sec, tv.tv_usec); #endif #endif //before sending, we check if we received any filter error int bufid = pvm_probe(-1, MSGT_FERROR); if (bufid != 0) { int bytes, tag, tid; char *msg; pvm_bufinfo(bufid, &bytes, &tag, &tid); msg = (char*)malloc(bytes+1); pvm_recv(tid, MSGT_FERROR); pvm_upkbyte(msg, bytes, 1); msg[bytes] = '\0'; fprintf(stderr, "Manager.c: Error, received death notification\n"); fprintf(stderr, "Manager.c: %s\n", msg); free(msg); killAllFilters(layout); exit(-1); } printf("Manager.c: starting work...\n"); // number of EOWs we expect to receive for(i = 0; i < layout->numFilters; i++) { totalEows += layout->filters[i]->filterPlacement.numInstances; #ifdef ATTACH if(layout->filters[i]->attached) { totalAttachedFilters += layout->filters[i]->filterPlacement.numInstances; } // TODO:TOSCO::: if(layout->filters[i]->attach) return 0; #endif } //we stay in this loop while we have to reconfigure //usually, this will be only one time, unless a we get some reconf message do { // sends work for each filter pvm_initsend(PvmDataRaw); int msgType; #ifdef VOID_FT if(!reconf) { #endif // First tell that is a mensage of WORK msgType = MSGT_WORK; #ifdef VOID_FT } else { // one fault has occurred msgType = MSGT_FT; } #endif pvm_pkint(&msgType, 1, 1); //then attach the work to it pvm_pkbyte((char *)work, workSize, 1); reconf = 0; //we are optimistic, always expect to not reconfigure // for each filter, send his work for(i = 0; i < layout->numFilters; i++) { FilterPlacement *pFilterP = &(layout->filters[i]->filterPlacement); // sends work to all filters of this set pvm_mcast(pFilterP->tids, pFilterP->numInstances, 0); } /* TaskIdList *finalTaskIdList = NULL, *currentTaskIdList; int filtersThatUseTasks = 0; // Manager receives filter's terminated tasks list for(i = 0; i < layout->numFilters; i++) { FilterPlacement *pFilterP = &(layout->filters[i]->filterPlacement); #ifdef ATTACH // if this filter is of the attached // type i dont need worry about use Task // if(layout->filters[i]->attached)continue; #endif for(j = 0; j < pFilterP->numInstances; j++) { int instanceUseTasks = -1; // Get is this filter use tasks pvm_recv(pFilterP->tids[j], 0); pvm_upkint(&instanceUseTasks, 1, 1); layout->filters[i]->useTasks = instanceUseTasks; #ifdef VOID_FT if (instanceUseTasks) { currentTaskIdList = (TaskIdList *)unpackTaskIdList(); // Para fazer intersecao, gerente ordenar? as listas de tarefas recebidas e utilizar? a fun??o meet() do CrazyMiner/ID3. qsort(currentTaskIdList->vetor, currentTaskIdList->size, sizeof(int), compareTaskId); if(finalTaskIdList == NULL) { finalTaskIdList = currentTaskIdList; } else { // Manager makes the intersection of all finished tasks lists finalTaskIdList = taskIdListIntersection(finalTaskIdList, currentTaskIdList); taskIdListDestroy(currentTaskIdList); } } #endif } // for if (layout->filters[i]->useTasks) filtersThatUseTasks++; } // Gerente devolve resultado das intersecoes para todas as instancias de todos os filtros. for(i = 0; i < layout->numFilters; i++) { FilterPlacement *pFilterP = &(layout->filters[i]->filterPlacement); #ifdef ATTACH // if this filter is of the attached // type i dont need worry about use Task //TODO if(layout->filters[i]->attached)continue; #endif int needForwardTaskMessages = 1; if (filtersThatUseTasks < 2) needForwardTaskMessages = 0; #ifdef VOID_FT if (layout->filters[i]->useTasks) { // Send if they should forward task messages // and pigback :-) the final task id list pvm_initsend(PvmDataDefault); pvm_pkint(&needForwardTaskMessages, 1, 1); packTaskIdList(finalTaskIdList); pvm_mcast(pFilterP->tids, pFilterP->numInstances, 0); } else { #endif // Only send if they should forward task messages pvm_initsend(PvmDataDefault); pvm_pkint(&needForwardTaskMessages, 1, 1); pvm_mcast(pFilterP->tids, pFilterP->numInstances, 0); #ifdef VOID_FT } #endif } taskIdListDestroy(finalTaskIdList); */ //now we receive the EOWs numEowsReceived = 0; //now we expect to receive EOW or errors while(numEowsReceived < totalEows) { //we are open to receive anything from anyone here //all messages to the manager should be tagged, so we now their type int szMsg = -1; int inst_tid = -1; int msgTag = -1; int bufid = pvm_recv(-1, -1); pvm_bufinfo(bufid, &szMsg, &msgTag, &inst_tid); switch (msgTag) { case (MSGT_EOW): { //received EOW, expect this usually int instance = -1; FilterSpec *pFilter = NULL; getFilterByTid(layout, inst_tid, &pFilter, &instance); if ((pFilter != NULL) && (instance != -1)) { printf("Manager.c: EOW received from %s, instance %d\n", pFilter->name, instance); } else { fprintf(stderr, "Manager.c: unknown EOW received! Shouldnt get here!\n"); } numEowsReceived++; break; } case (MSGT_AEXIT): case (MSGT_FERROR): { //someone called dsExit or system error at the filter side //common cause for this are library not found, wrong initscritpt etc char *message = (char*)malloc(sizeof(char)*szMsg+1); pvm_upkbyte(message, szMsg, 1); message[szMsg] = '\0'; //the filter and the instance FilterSpec *fp = NULL; int instance = -1; getFilterByTid(layout, inst_tid, &fp, &instance); if (msgTag == MSGT_AEXIT) { printf("Manager.c: Filter %s, instance %d(tid %x) called dsExit: %s\n", fp->name, instance, inst_tid, message); } else { printf("Manager.c: Filter %s error, instance %d(tid %x) called exit: %s\n", fp->name, instance, inst_tid, message); } free(message); // kill all instances killAllFilters(layout); exit(-1); break; } //task exited or host crashed case (MSGT_TEXIT): case (MSGT_HDEL): { //we only reconfigure a fixed number of times if (remainingReconfs <= 0) { //max number of reconfigurations reached... aborting fprintf(stderr, "Manager.c: max reconfigurations reached, aborting...\n"); fflush(stderr); fprintf(stdout, "Manager.c: max reconfigurations reached, aborting...\n"); fflush(stdout); reconf = 0; // kill all instances which might be alive killAllFilters(layout); exit(-1);; } #ifdef BMI_FT gettimeofday(&tv, &tz); fprintf(fp, "2 %ld %ld\n", tv.tv_sec, tv.tv_usec); #endif remainingReconfs--; reconf = 1; // In case of pvm notification, inst_tid will be t80000000 int notifiesRecv = 1; // We are receiving the first death notification int deadFilterTid = -1; FilterSpec *pFilter = NULL; int instanceDead = -1; // Get the tid and name of the dead filter int info = pvm_upkint(&deadFilterTid, 1, 1); if (info < 0) pvm_perror("Manager.c: error calling pvm_upkint"); //discover which filter died getFilterByTid(layout, deadFilterTid, &pFilter, &instanceDead); if((pFilter != NULL) && (instanceDead != -1)) { if (msgTag == MSGT_TEXIT) { fprintf(stderr, "Manager.c: filter %s: instance %d (tid t%x) of %d is dead!!!\n", pFilter->name, instanceDead, deadFilterTid, pFilter->filterPlacement.numInstances); } else { fprintf(stderr, "Manager.c: filter %s: instance %d (tid t%x) of %d machine's crashed!!!\n", pFilter->name, instanceDead, deadFilterTid, pFilter->filterPlacement.numInstances); } } printf("Manager.c: starting reconfiguration\n"); // kill all filters in the pipeline killAllFilters(layout); if (msgTag == MSGT_HDEL) { //int his case, host died, so we must change layout replaceCrashedHost(layout, pFilter, instanceDead); } #ifdef ATTACH if (pFilter->attached) { // In this case, all filters that were killed have to notify // their dead. notifiesRecv = 0; } #endif //Flush the streams //receive all messages which are about to arrive till we get the death notification //pvm order should garantee this #ifdef ATTACH while (notifiesRecv < (totalEows - totalAttachedFilters)) { #else while (notifiesRecv < totalEows) { #endif int newMsgTag = -1; bufid = pvm_recv(-1, MSGT_TEXIT); info = pvm_bufinfo(bufid, NULL, &newMsgTag, &inst_tid); info = pvm_upkint(&deadFilterTid, 1, 1); if (info < 0) pvm_perror("Manager.c: error calling pvm_upkint"); fprintf(stderr, "Manager.c: WARNING: received notification (tag %d) about pvm tid t%x death\n", newMsgTag, deadFilterTid); notifiesRecv++; } #ifdef ATTACH if(pFilter->attached) { notifiesRecv = 1; } else { notifiesRecv = 0; } // Receive all EOW messages from the attached filters. while(notifiesRecv < totalAttachedFilters) { int newMsgTag = -1; bufid = pvm_recv(-1, MSGT_EOW); info = pvm_bufinfo(bufid, NULL, &newMsgTag, &inst_tid); if (info < 0) pvm_perror("Manager.c: error calling pvm_upkint"); fprintf(stderr, "Manager.c: WARNING: received EOW (tag %d) from pvm tid t%x\n", newMsgTag, inst_tid); notifiesRecv++; } #endif // probes for remaining machine crash notifications while (pvm_probe(-1, MSGT_HDEL) > 0) { int newMsgTag = -1; bufid = pvm_recv(-1, MSGT_HDEL); info = pvm_bufinfo(bufid, NULL, &newMsgTag, &inst_tid); info = pvm_upkint(&deadFilterTid, 1, 1); if (info < 0) pvm_perror("Manager.c: error calling pvm_upkint"); fprintf(stderr, "Manager.c: WARNING: received notification (tag %d) about pvm tid t%x machine's crash\n", newMsgTag, deadFilterTid); // Replace the died host FilterSpec *pCrashedFilter = NULL; int crashedInstance = -1; getFilterByTid(layout, deadFilterTid, &pCrashedFilter, &crashedInstance); replaceCrashedHost(layout, pCrashedFilter, crashedInstance); } #ifdef BMI_FT updateAllFiltersFaultStatus(layout, FAULT_OTHER_FILTER_INST); pFilter->faultStatus = instanceDead; #endif //spawn all filters again spawnAllFilter(layout); #ifdef ATTACH // Verifies if the dead filter is an attached. If yes, spawn it. if(pFilter->attached == 1) { spawnOneAttachedInstance(layout, pFilter, instanceDead); } #endif resetStreams(layout); //resend the data sendFiltersData(layout); //start all over again numEowsReceived = 0; #ifdef BMI_FT gettimeofday(&tv, &tz); fprintf(fp, "3 %ld %ld\n", tv.tv_sec, tv.tv_usec); #endif break; } #ifdef VOID_TERM // One filter instance detected local termination case (MSGT_LOCALTERM): { int localTermTag; // filter instance local termination tag pvm_upkint(&localTermTag, 1, 1); verifyGlobalTermination(inst_tid, localTermTag); break; } #endif default: { fprintf(stderr, "Manager.c: error receiving EOW, unknown tag!!!\n"); } } //end switch message tag if((msgTag == MSGT_TEXIT) || (msgTag == MSGT_HDEL)) { // work should be sent again break; } } //end receiving eows } while(reconf == 1); //leave this loop if we will not reconfigure #ifdef BMI_FT gettimeofday(&tv, &tz); fprintf(fp, "4 %ld %ld\n", tv.tv_sec, tv.tv_usec); #endif printf("Manager.c: Work ended\n\n"); return 0; #endif } /// Finalize a Void pipeline. Only manager runs this. int finalizeDs(Layout *layout) { #ifdef NO_BARRIER #else int i; // Envia eof para todos os filtros // Primeiro envia se eh work (WORK) ou EOF (END_OF_FILTER) pvm_initsend(PvmDataRaw); int tipo_msg = MSGT_EOF; pvm_pkint(&tipo_msg, 1, 1); //sends the EOF message for all instances of the filter for(i = 0; i < layout->numFilters; i++) { FilterPlacement *pFilterP = &(layout->filters[i]->filterPlacement); #ifdef ATTACH if(layout->filters[i]->attach) continue;// this filter cant not receive a EOF because // it needs still runnig #endif pvm_mcast(pFilterP->tids, pFilterP->numInstances, 0); } destroyLayout(layout); pvm_exit(); return 0; #endif }
/* message handling routine */ void p_handle(int b) { int i,k,len,tid,tag; jobinfo *j; jobinfo tj; pvm_bufinfo(b,&len,&tag,&tid); switch (tag) { case TAG_HELLO: if (master) { wtid = realloc(wtid,sizeof(int) * ++nw); wtid[nw-1] = tid; for (i=1;i<nw;i++) { pvm_initsend(0); k = i << BASE_SHIFT; pvm_pkint(&k,1,1); pvm_pkint(&nw,1,1); pvm_pkint(wtid,nw,1); pvm_send(wtid[i],TAG_WORKERS); } } break; case TAG_WORKERS: pvm_upkint(&nextjid,1,1); nextjid <<= BASE_SHIFT; pvm_upkint(&nw,1,1); free(wtid); wtid = malloc(sizeof(int) * nw); pvm_upkint(wtid,nw,1); break; case TAG_KILL: die("Received TAG_KILL. Too lazy to do anything useful.\n"); case TAG_REQUEST: if (j = freejob()) { pvm_initsend(0); pkjobinfo_active(j); pvm_send(tid,TAG_JOB); add_stolen(tid,j->jid); free(j); } else if (!victim(tid)) { pvm_initsend(0); pvm_send(tid,TAG_NO_JOB); } break; case TAG_DONE: upkjobinfo_done(&tj); if (cj && cj->jid == tj.pjid) slow_absorb(cj,&tj.s); else if (!tj.jid && master) { j = malloc(sizeof(jobinfo)); *j = tj; add_jobarray(j); } else if ((i = find_jobarray(tj.pjid)) >= 0) { slow_absorb(ja[i],&tj.s); if (ja[i]->status == JOB_DONE && !(!ja[i]->jid && master)) { send_done(ja[i]); free(ja[i]); del_jobarray(i); } } else if (tj.tid = find_stolen(tj.pjid)) // if 0, job was cancelled send_done(&tj); break; case TAG_CANCEL: pvm_upkint(&k,1,1); if (cj && cj->jid == k) { cj->status = JOB_CANCELLED; if (p_head > 0) cancel_children(cj); p_head = 1000; add_stolen(0,cj->jid); } else if ((i = find_jobarray(k)) >= 0) { cancel_children(ja[i]); add_stolen(0,ja[i]->jid); free(ja[i]); del_jobarray(i); } else if (tid = find_stolen(k)) send_cancel(tid,k); break; case TAG_STAT: default: die("Warning: ignoring invalid message\n"); } }
IPstream::IPstream ( const int fromProcNo, const label bufSize, streamFormat format, versionNumber version ) : Pstream(bufSize), Istream(format, version), fromProcNo_(fromProcNo), messageSize_(0) { setOpened(); setGood(); int bufid, tag, tid; // If the buffer size is not specified then probe the incomming message if (!bufSize) { // Probe read buffer until message arrives. while (!(bufid = pvm_probe(procID(fromProcNo_), msgType()))); // When the message arrives find its size pvm_bufinfo(bufid, &messageSize_, &tag, &tid); // Resize buffer to message size buf_.setSize(messageSize_); } // Read message into buffer if ( pvm_precv ( procID(fromProcNo_), msgType(), buf_.begin(), buf_.size(), PVM_BYTE, &tid, &tag, &messageSize_ ) != PvmOk ) { FatalErrorIn("IPstream::IPstream(const int fromProcNo)") << "pvm_precv cannot receive incomming message" << ::abort; } // Check size of message read if (messageSize_ > buf_.size()) { FatalErrorIn("IPstream::IPstream(const int fromProcNo)") << "buffer (" << buf_.size() << ") not large enough for incomming message (" << messageSize_ << ')' << ::abort; } }
// Hook up to the pvm and spawn the slave processes. int PVMDemeGA::spawn(const char* slavename) { _mid = pvm_mytid(); if(_mid < 0) { cerr << "\n" << className() << ": spawn:\n"; cerr << " Bad ID for master task. Have you started the PVM?\n"; return _status = _mid; } struct pvmhostinfo* hostp; _status = pvm_config(&_nhosts, &_narch, &hostp); if(_status == PvmSysErr) { cerr << "\n" << className() << ": spawn:\n"; cerr << " PVM not responding. Have you started the PVM?\n"; return _status; } _Ntid = npop; _tid = new int [_Ntid]; // task IDs for the slaves char sn[32]; // PVM is not const-safe... strcpy(sn, slavename); _ntid = pvm_spawn(sn, (char**)0, 0, "", _Ntid, _tid); if(_ntid <= 0) { cerr << className() << ": spawn:\n Error spawning slaves.\n"; cerr << " Error codes of failed spawns are:\n"; for(int i=0; i<_Ntid; i++) { cerr << " slave "; cerr.width(3); cerr<<i<<": "<<_tid[i]<<"\n"; } pvm_exit(); return _status = -1; } else if(_ntid < _Ntid) { cerr << className() << ": spawn:\n "; cerr << " Spawned only "<<_ntid<<" of "<<_Ntid<<"\n"; cerr << " Error codes of failed spawns are:\n"; for(int i=0; i<_Ntid; i++) { if(_tid[i] < 0) { cerr << " slave "; cerr.width(3); cerr<<i<<": "<<_tid[i]<<"\n"; } } } else { cerr << className() << ": spawn:\n"; cerr << " Spawned " << _Ntid << " slave processes...\n"; } #ifdef DEBUG cerr << "waiting for response from slaves...\n"; #endif int flag = _ntid; while(flag > 0) { int bufid = pvm_recv(-1, -1); if(bufid >= 0) { int bytes, msgtag, tid; _status = pvm_bufinfo(bufid, &bytes, &msgtag, &tid); int which = tid2idx(tid); switch(msgtag) { case MSG_READY: #ifdef DEBUG cerr << " slave " << tid << " (" << which << ") is alive\n"; #endif flag--; break; default: cerr << className() << ": spawn:\n"; cerr << " unexpected msgtag: " << msgtag << "\n"; break; } } else { cerr << className() << ": spawn:\n"; cerr << " error from pvm_recv: " << bufid << "\n"; } } #ifdef DEBUG cerr << "slaves appear to be up and running.\n"; #endif return _status; }
int main (int argc, char **argv) { int r_bufid, info, bytes, msgtag, parent, endofprocess = 0; heur_prob *p = (heur_prob *) calloc(1, sizeof(heur_prob)); parent = receive(p); printf("\nWelcome, I am task %i\n\n", pvm_mytid()); while(!endofprocess){ printf("\nim gonna try to receive at parallel_process.\n"); PVM_FUNC(r_bufid, pvm_recv(-1, -1)); PVM_FUNC(info, pvm_bufinfo(r_bufid, &bytes, &msgtag, &parent)); printf("\nim still in parallel_process\n"); switch(msgtag){ case S_EXCHANGE: exchange(parent, p); break; case S_EXCHANGE2: exchange2(parent, p); break; case S_FARNEAR_INS: farnear_ins(parent, p); break; case S_FARTHEST_INS: farthest_ins(parent, p); break; case S_MST: mst(); break; case S_NEAREST_INS: nearest_ins(parent, p); break; case S_NEAR_CLUSTER: near_cluster(parent, p); break; case S_SAVINGS: savings(parent, p); break; case S_SAVINGS2: savings2(parent, p); break; case S_SAVINGS3: savings3(parent, p); break; case S_SWEEP: sweep(parent, p); break; case S_TSP_FI: tsp_fi(parent, p); break; case S_TSP_FINI: tsp_fini(parent, p); break; case S_TSP_NI: tsp_ni(parent, p); break; case STOP: endofprocess = 1; } } return 0; }
// To evolve the genetic algorithm, we loop through all of our populations and // tell each process to evolve its population for a certain number of // generations. Then allow the migrator to do its thing. Each process is // supposed to keep track of the statistics for its population, so we reap // those as well. void PVMDemeGA::step() { if(_mid == 0) return; #ifdef DEBUG cerr << "sending step command to slaves...\n"; #endif for(int j=0; j<_ntid; j++) { int nsteps = 10; _status = pvm_initsend(PvmDataDefault); _status = pvm_pkint(&nsteps, 1, 1); _status = pvm_send(_tid[j], MSG_STEP); } #ifdef DEBUG cerr << "waiting for slaves to step...\n"; #endif int flag = _ntid; while(flag > 0) { int bufid = pvm_recv(-1, -1); if(bufid >= 0) { int bytes, msgtag, tid; _status = pvm_bufinfo(bufid, &bytes, &msgtag, &tid); switch(msgtag) { case MSG_STEP_COMPLETE: flag--; #ifdef DEBUG cerr << " tid " << tid << " has finished step\n"; #endif break; default: cerr << className() << ": step:\n"; cerr << " unexpected msgtag: " << msgtag << "\n"; break; } } else { cerr << className() << ": step:\n"; cerr << " error from pvm_recv: " << bufid << "\n"; } } migrate(); // Now update the statistics and individuals in our local populations. We copy // all of the distributed individuals into our own populations then do the // statistics updates. Since we copy, we don't force any new evaluations. If // you don't need to keep the master up-to-date, then comment out this section // and just let the slaves run on their own. collect(); for(unsigned int jj=0; jj<npop; jj++) pstats[jj].update(*deme[jj]); stats.numsel = stats.numcro = stats.nummut = stats.numrep = stats.numeval=0; for(unsigned int kk=0; kk<npop; kk++) { pop->individual(kk).copy(deme[kk]->best()); stats.numsel += pstats[kk].numsel; stats.numcro += pstats[kk].numcro; stats.nummut += pstats[kk].nummut; stats.numrep += pstats[kk].numrep; stats.numeval += pstats[kk].numeval; } pop->touch(); stats.update(*pop); for(unsigned int ll=0; ll<npop; ll++) stats.numpeval += pstats[ll].numpeval; }
int main(int argc, char **argv) { int tid; int parent; struct pvmhostinfo *hostp; int nhost, narch; tid = pvm_mytid(); if(tid < 0) pvm_ferror("pvm_mytid", 1); parent = pvm_parent(); if(parent == PvmNoParent || parent == PvmParentNotSet) { /* Processus pere */ int nchildren, children[MAXCHILDREN]; int i, j, res, rc; int bytes, tag, from_tid; /* Ask PVM for information about the virtual machine, and display it to the user. */ pvm_config(&nhost, &narch, &hostp); printf("I found the following %d hosts...\n",nhost); for (i = 0; i < nhost; i++) printf("%d. %s \t(%s)\n",i,hostp[i].hi_name,hostp[i].hi_arch); rc = pvm_spawn("pvm_mandel", NULL, PvmTaskDefault, NULL, NUMCHILDREN, children); if(rc < 0) pvm_ferror("pvm_spawn", 1); printf("%d enfants\n", rc); nchildren = 0; for(i = 0; i < NUMCHILDREN; i++) { printf("Enfant %d, tid = %d\n", i, children[i]); if(children[i] >= 0) nchildren++; if(nchildren < 1) pvm_ferror("Pas d'enfants", 0); } for(i = -MAXX; i <= MAXX; i++) { for(j = -MAXY; j <= MAXY; j++) { rc = pvm_recv(-1,-1); if (rc < 0) { printf("An error occurred when trying to receive a message.\n"); break; } /* Find out who this message is from, and how big it is. */ rc = pvm_bufinfo(rc,&bytes,&tag,&from_tid); /* printf("received message from %s of %d bytes, tag %d\n", get_host_by_tid(hostp,nhost,from_tid), bytes, tag); */ rc = pvm_upkint(&res, 1, 1); if(rc < 0) pvm_ferror("pvm_upkint", 1); cases[i + MAXX][j + MAXY] = res; } } dump_ppm("mandel.ppm", cases); printf("Fini.\n"); pvm_exit(); exit(0); } else if(parent >= 0) { /* On est l'un des fils */ double x, y; int i, j, res, rc; printf("Fils: %d\n", tid); for(i = -MAXX; i <= MAXX; i++) { for(j = -MAXY; j <= MAXY; j++) { x = 2 * i / (double)MAXX; y = 1.5 * j / (double)MAXY; res = mandel(x, y); rc = pvm_initsend(PvmDataDefault); if(rc < 0) pvm_ferror("pvm_initsend", 1); rc = pvm_pkint(&res, 1, 1); if(rc < 0) pvm_ferror("pvm_pkint", 1); rc = pvm_send(parent, 0); if(rc < 0) pvm_ferror("pvm_send", 1); } } printf("Fils %d termine.\n", tid); pvm_exit(); exit(0); } else pvm_ferror("pvm_parent", 1); assert(0); }
int main(int argc, char ** argv){ TAILQ_INIT(&head); int nproc, numt, i, nhost, narch; int num_tasks = 0; struct pvmhostinfo *hostp; struct stat s; FILE * codefile = fopen("./codes", "w"); if(argc != 2){ exit_prog("USAGE: ./taskgen directory\n",1); } else { stat(argv[1], &s); if(s.st_mode & S_IFDIR){ char path[PATH_MAX+1]; realpath(argv[1],path); printf("Loading graphs from %s...", path); fflush(stdout); load_graph_dir(path, &num_tasks); printf("done. Found %d tasks.\n", num_tasks); } else{ printf("%s is not a directory. Please check arguments.\n", argv[1]); exit_prog(NULL, 1); } } pvm_config(&nhost, &narch, &hostp); /*Set number of slaves to start */ nproc = nhost * 3; /*3 processes per host */ printf("Spawning %d worker tasks on %d machines...", nproc, nhost); int tids[nproc]; /*hold the task ids of the workers */ int flags = PvmTaskDefault; // flags += PvmTaskDebug; numt =pvm_spawn("worker", (char**)0, flags, "", nproc, tids); /*start up the workers */ if(numt < nproc){ /*Error Checking */ printf("\n Trouble spawing slaves. Error codes are:\n"); for(i = numt; i < nproc; i++){ printf("TID %d: %d\n", i, tids[i]); } for(i = 0; i < numt; i++){ pvm_kill(tids[i]); } exit_prog("Failure.\n",1); } printf("done. Connected.\n"); /* Main loop */ int bufid, bytes, msgtype, source; int sent_tasks, comp_tasks, found_codes = 0; int finished = 0; int percent_comp = 0; char * buf; while(!finished){ bufid = pvm_recv(-1, -1); /*Accept any message from any task BLOCKING CALL*/ pvm_bufinfo(bufid, &bytes, &msgtype, &source); switch(msgtype){ case MSGREQTASK: if(!TAILQ_EMPTY(&head)){ send_task(source); /*send out the message */ sent_tasks++; } break; case MSGCODE: buf = malloc(bytes); pvm_upkstr(buf); fprintf(codefile, "%s\n", buf); fflush(codefile); comp_tasks++; found_codes++; break; case MSGNOCODE: comp_tasks++; break; default: printf("Incorrect MSGTYPE received from task %d. Received: %d\n", source, msgtype); break; } if(((float)comp_tasks/num_tasks * 100) > percent_comp + 1){ printf("Tasks Complete: %d Tasks Sent: %d Percent Complete: %.2f\n", comp_tasks, sent_tasks, (float)comp_tasks/num_tasks * 100); percent_comp = (float)comp_tasks/num_tasks * 100; } if(comp_tasks == num_tasks){ finished = 1; } } printf("All tasks complete.\n"); exit_prog(NULL, 0); return 0; }
int main (int argc, char* argv[]) { if(argc !=4) { printf("usage : ./craquage p r m\n"); return EXIT_FAILURE; } //Initialisation des variables int nb_esclaves = atoi(argv[1]); int* tids = (int*) calloc(nb_esclaves, sizeof(int)); int longueur_mdp = atoi(argv[2]); char* mdp = (char*) calloc(strlen(argv[3])+1, sizeof(char)); strcpy(mdp, argv[3]); //declaration de type de tres long entiers (avec bibliotheque GMP) mpz_t debut_sequence, pas_reel, pas, fin_exec; mpz_init(debut_sequence); mpz_init(pas_reel); mpz_init(pas); mpz_init(fin_exec); //recuperation du chemin de l executable char* chemin = getcwd(NULL, 1000); strcat(chemin, "/craquage_esclave"); //creation des arguments pour l esclave char *argv_esclave[3]; argv_esclave[2]=NULL; argv_esclave[0] = (char*) calloc(strlen(argv[2])+1, sizeof(char)); strcpy(argv_esclave[0],argv[2]); argv_esclave[1] = (char*) calloc(strlen(argv[3])+1, sizeof(char)); strcpy(argv_esclave[1],argv[3]); //printf("strlen %lu, %lu\n", (long unsigned)strlen(argv[2]),(long unsigned) strlen(argv[3])); //printf("nb_esclaves %d\n", nb_esclaves); int i; int trouve = 0; int fini = 0; int size; int nb_envoi = 0; int nb_pas = nb_esclaves*longueur_mdp; int nb_changement = 0; char* envoi_char; int bufid, info, bytes, type, source; char * solution; pvm_catchout(stderr); struct timeval tv1, tv2; gettimeofday(&tv1, NULL); pvm_spawn(chemin, argv_esclave, PvmTaskDefault,"", nb_esclaves, tids); //calcul du pas, fin_exec (= fin execution) mpz_set_ui(debut_sequence, 0); mpz_ui_pow_ui(fin_exec, 15, longueur_mdp+1); mpz_sub_ui(fin_exec, fin_exec, 15); mpz_cdiv_q_ui(fin_exec, fin_exec, 14); mpz_set(pas, fin_exec); mpz_cdiv_q_ui(pas, pas, nb_pas); if(mpz_cmp_ui(pas, 0)==0) { mpz_set_ui(pas,1); } //gmp_printf("fin_exec: %Zd\npas:%Zd\ndebut_sequence:%Zd\n",fin_exec, pas, debut_sequence); //boucle principale while(!trouve && fini!=nb_esclaves) { //Attente de reception de donnees d un esclave bufid = pvm_recv( -1, -1 ); info = pvm_bufinfo( bufid, &bytes, &type, &source ); if (info < 0) { printf("Erreur de reception : %d\n", info); exit(1); } //selon le tag du message, demande de donnees ou solution trouvee switch(type) { case(0)://mot de passe trouve solution = calloc(bytes, sizeof(char)); pvm_upkstr(solution); printf("\nLa solution est : %s\n\n", solution); trouve = 1; break; case(1)://esclave veut plus de donnees //prendre en compte la fin des donnees dans le calcul du pas if(nb_changement <= 2 && nb_envoi>=(3*nb_pas/4)) { mpz_cdiv_q_ui(pas, pas, 2); nb_envoi = 0; nb_pas/=2; nb_changement++; } //gmp_printf("fin_exec: %Zd pas:%Zd debut_sequence:%Zd\n",fin_exec, pas, debut_sequence); if(mpz_cmp(debut_sequence, fin_exec)< 0){ mpz_sub(pas_reel, fin_exec, debut_sequence); if(mpz_cmp(pas, pas_reel)<0) { mpz_set(pas_reel, pas); } //envoi des donnes a l esclave pvm_initsend(PvmDataDefault); size = gmp_asprintf(&envoi_char, "%Zd", debut_sequence); pvm_pkint(&size, 1, 1); pvm_pkbyte(envoi_char,size+1, 1); free(envoi_char); size = gmp_asprintf(&envoi_char, "%Zd", pas_reel); pvm_pkint(&size, 1, 1); pvm_pkbyte(envoi_char,size+1 , 1); free(envoi_char); pvm_send(source,0); if(mpz_cmp(pas_reel,pas)!=0) { mpz_set(debut_sequence,fin_exec); } else { mpz_add(debut_sequence, debut_sequence,pas); } nb_envoi++; } else{ fini++ ; printf("Pas de solution pour %d esclave(s)\n", fini); } break; default: break; } } // suppression des esclave for(i=0; i<nb_esclaves;i++) { info = pvm_kill(tids[i]); //printf("Suppression de l esclave %d: retour de pvm_kill: %d\n",i ,info); } pvm_exit(); gettimeofday(&tv2, NULL); printf("%d %ld\n",longueur_mdp,(tv2.tv_sec-tv1.tv_sec)*1000 + (tv2.tv_usec-tv1.tv_usec)/1000); mpz_clear(debut_sequence); mpz_clear(pas_reel); mpz_clear(pas); mpz_clear(fin_exec); free(tids); free(mdp); free(argv_esclave[0]); free(argv_esclave[1]); return EXIT_SUCCESS; }
int main(int, char** argv) { int status = 0; int mytid = pvm_mytid(); int masterid = pvm_parent(); if(mytid < 0 || masterid < 0) { cerr << "\n" << argv[0] << ": Couldn't get slave/master IDs. Aborting.\n"; exit(1); } GA1DBinaryStringGenome genome(GENOME_LENGTH,GenomeEvaluator); GASteadyStateGA ga(genome); status = pvm_initsend(PvmDataDefault); status = pvm_send(masterid, MSG_READY); int done = 0; while(!done){ int bufid = pvm_recv(-1, -1); int ival; if(bufid >= 0) { int bytes, msgtag, tid; status = pvm_bufinfo(bufid, &bytes, &msgtag, &tid); switch(msgtag) { case MSG_DONE: done = 1; break; case MSG_SET_POPULATION_SIZE: ival = gaDefPopSize; status = pvm_upkint(&ival, 1, 1); ga.populationSize(ival); break; case MSG_INITIALIZE: ga.initialize(); break; case MSG_STEP: ival = 0; status = pvm_upkint(&ival, 1, 1); for(int i=0; i<ival; i++) ga.step(); ival = ga.generation(); status = pvm_initsend(PvmDataDefault); status = pvm_pkint(&ival, 1, 1); status = pvm_send(masterid, MSG_STEP_COMPLETE); break; case MSG_INCOMING_MIGRATION: RecvMigration(ga); break; case MSG_SEND_MIGRATION: { int toid = 0, count = 0; status = pvm_upkint(&toid, 1, 1); status = pvm_upkint(&count, 1, 1); SendMigration(toid, ga, count); } break; case MSG_SEND_POPULATION: SendPopulation(masterid, ga.population()); break; case MSG_SEND_STATISTICS: SendStatistics(masterid, ga.statistics()); break; default: cerr << argv[0] << ": unknown msgtag: " << msgtag << "\n"; break; } } else { cerr << argv[0] << ": error from pvm_recv: " << bufid << "\n"; } } pvm_exit(); return 0; }