/// Init a ds pipeline. /// \param confFile XML file configuration, NULL if we want to make things by hand /// \param argc argc received by main function that will be forwarded to filter processes /// \param argv argv received by main function that will be forwarded to filter processes /// \return Layout of the pipeline Layout *initDs(char *confFile, int argc, char **argv) { int err; //configura pvm pra enviar dado diretamente(IMPORTANTE!!!!) pvm_setopt(PvmRoute, PvmRouteDirect); //err == 0, OK err = pvm_start_pvmd(0, NULL, 0); switch (err) { case PvmSysErr: printf("Manager.c: error starting PVM, aborting\n"); exit(1); break; } //start random number generator srandom(getpid()); // config DS to use signals // signal(SIGSEGV, &captureSignal); // signal(SIGINT, &captureSignal); // signal(SIGILL, &captureSignal); // signal(SIGHUP, &captureSignal); // signal(SIGTERM, &captureSignal); if (I_AM_THE_MANAGER) { return initManager(confFile, argc, argv); } else { runFilter(argc, argv); exit(0); } }
adpvm_manager::adpvm_manager(int _mode) { cout << "calling load library" << endl; if (load_adpvm_library() < 0) { cerr << "error loading pvm library" << endl; exit(1); } pvm_setopt(PvmRoute, PvmRouteDirect); /* channel for communication */ /* get and display configuration of the parallel machine */ int status=pvm_config( &nhost, &narch, &hostp ); /* get configuration */ if (status<0) { cerr << "error trying to get configuration of pvm (virtual machine)" << endl; if (status == PvmSysErr) cerr << " PVM Daemon not responing -- maybe it is not started" << endl; ad_exit(1); } printf("I found the following hosts in your virtual machine\n"); int i; for (i = 0; i < nhost; i++) { printf(" %s\n", hostp[i].hi_name); } //id.allocate(0,nhost); mode=_mode; if (mode == 1) // master { slave_argv = new adpvm_slave_args(20,20); int ierr=pvm_catchout(stdout); if (ierr<0) { cerr << "Error in pvm_catchout" << endl; } strcpy(*slave_argv,"progname"); int on1,nopt1; if ( (on1=option_match(ad_comm::argc,ad_comm::argv,"-exec",nopt1))>-1) { if (nopt1 !=1) { cerr << "Wrong number of options to -exec -- must be 1" " you have " << nopt1 << endl; ad_exit(1); } slave_names+= ad_comm::argv[on1+1]; } else { slave_names+="test"; } } timing_flag=0; int on1,nopt1; if ( (on1=option_match(ad_comm::argc,ad_comm::argv,"-pvmtime",nopt1))>-1) { timing_flag=1; } }
void BBSDirect::start() { char* client = 0; int tid, host_mytid; int i, n, ncpu, nncpu; struct pvmhostinfo* hostp; if (started_) { return; } BBSImpl::start(); mytid_ = pvm_mytid(); nrnmpi_myid = 0; if (mytid_ < 0) { perror("start"); } host_mytid = pvm_tidtohost(mytid_); tid = pvm_parent(); if (tid == PvmSysErr) { perror("start"); }else if (tid == PvmNoParent) { is_master_ = true; pvm_catchout(stdout); pvm_setopt(PvmRoute, PvmRouteDirect); pvm_config(&n, NULL, &hostp); nncpu = 0; for (i=0; i < n; ++i) { ncpu = hostp[i].hi_speed; if (ncpu%1000) { hoc_warning(hostp[i].hi_name, " speed in pvm configuration file is not a multiple of 1000. Assuming 1000."); ncpu = 1000; } nncpu += ncpu/1000; } nrnmpi_numprocs = nncpu; ncids = 0; }else{ // a worker, impossible assert(false); } if (nrnmpi_numprocs > 1 && tid == PvmNoParent) { char ** sargv; // args are workingdirectory specialOrNrniv -bbs_nhost nhost args sargv = new char*[nrn_global_argc + 4]; for (i=1; i < nrn_global_argc; ++i) { sargv[i+3] = nrn_global_argv[i]; } sargv[nrn_global_argc + 3] = 0; sargv[0] = rel_working_dir(); //printf("sargv[0]=|%s|\n", sargv[0]); sargv[1] = nrn_global_argv[0]; sargv[2] = "-bbs_nhost"; sargv[3] = new char[10]; sprintf(sargv[3], "%d", nrnmpi_numprocs); cids = new int[nrnmpi_numprocs-1]; if (nrn_global_argv[nrn_global_argc] != 0) { printf("argv not null terminated\n"); exit(1); } BBSDirectServer::server_->start(); bbs_sig_set(); bbs_handle(); //spawn according to number of cpu's (but master has one less) //printf("%d total number of cpus on %d machines\n", nncpu, n); int icid = 0; bool first = true; while (icid < nrnmpi_numprocs - 1) { for (i=0; i < n; ++i) { ncpu = hostp[i].hi_speed; if (ncpu%1000) { ncpu = 1000; } ncpu /= 1000; //printf("%d cpu for machine %d (%s)\n", ncpu, i, hostp[i].hi_name); if (first && hostp[i].hi_tid == host_mytid) { // spawn one fewer on master first time through --ncpu; } if (icid + ncpu >= nrnmpi_numprocs) { ncpu = nrnmpi_numprocs - icid - 1; } //printf("before spawn %d processes (icid=%d) for machine %d (%s)\n", ncpu, icid, i, hostp[i].hi_name); if (ncpu) { ncids = pvm_spawn("bbswork.sh", sargv, PvmTaskHost, hostp[i].hi_name, ncpu, cids + icid); if (ncids != ncpu) { fprintf(stderr, "Tried to spawn %d tasks, only %d succeeded on %s\n", ncpu, ncids, hostp[i].hi_name); hoc_execerror("Could not spawn all the requested tasks for", hostp[i].hi_name); } //printf("spawned %d for %s with cids starting at %d\n", ncpu, hostp[i].hi_name, icid); icid += ncpu; } if (icid >= nrnmpi_numprocs) { break; } } first = false; } ncids = icid; printf("spawned %d more %s on %d cpus on %d machines\n", ncids, nrn_global_argv[0], nncpu, n); delete [] sargv[3]; delete [] sargv; } }
int Establish(ArgStruct *p) { /* Task information for the entire parallel machine (if trans) */ int tasks_status; struct pvmtaskinfo *taskp; int ntasks; /* Received buffer (if receiver) */ int buffer_id; /* If we are the transmitting side, go find the other one and send it a message containing our tid. If we are the receiving side, just wait for a message. */ if (p->tr) { #ifdef DEBUG printf("this is the transmitter\n"); #endif tasks_status = pvm_tasks( 0, &ntasks, &taskp ); if (ntasks != 2) { printf("Error, too many processes in parallel machine \n"); printf("Start a clean machine. n=%d\n", ntasks); exit(-1); } /* Since there are two tasks, one is ours the other is the receiver */ p->prot.othertid = -1; if (taskp[0].ti_tid == p->prot.mytid) { p->prot.othertid = taskp[1].ti_tid; } if (taskp[1].ti_tid == p->prot.mytid) { p->prot.othertid = taskp[0].ti_tid; } if (p->prot.othertid == -1) { printf("Error, cannot find other (receiving) task \n"); printf("Id's: %d %d \n",taskp[0].ti_tid,taskp[1].ti_tid); } /* Send the receiver a message. Tell pvm to keep the channel open */ #ifdef DEBUG printf("The receiver tid is %d \n",p->prot.othertid); #endif pvm_setopt( PvmRoute, PvmRouteDirect ); pvm_initsend( PVMDATA ); pvm_pkint( &p->prot.mytid, 1, 1 ); pvm_send( p->prot.othertid, 1 ); } else { #ifdef DEBUG printf("This is the receiver \n"); #endif /* Receive any message from any task */ buffer_id = pvm_recv(-1, -1); if (buffer_id < 0) { printf("Error on receive in receiver\n"); exit(-1); } pvm_upkint( &p->prot.othertid, 1, 1 ); } }
/*--------------------------------------------------------------------------- ** MBUSCONNECT -- Connect the client to the message bus. In the case of ** a client we'll look first for a running Supervisor and notify them we've ** come online. The Client connection is complete when a MB_READY state ** is sent. */ int mbusConnect (char *whoAmI, char *group, int singleton) { int i, mytid, super, info, oldval; MBusPtr mbus = (MBusPtr) NULL; char msg[SZ_LINE]; pid_t pid = getpid(); /* Initialize host name. */ mbInitMBHost (); mbSetMBHost (); /* At this point we're only getting state information from the * application cache, we don't talk to the message bus yet. */ if (mbAppGet (APP_INIT)) { fprintf (stderr, "Application already initialized, resetting....\n"); mbInitApp(); /* FIXME: ....disconnect old client.....*/ } /*if (isSupervisor (whoAmI) ) {*/ if (MB_DEBUG) printf ("Re-Initializing default message bus...\n"); mbusInitialize (whoAmI, NULL); /* Attach this process to the message bus. */ if ((mytid = pvm_mytid()) < 0) { pvm_perror (whoAmI); exit (1); } oldval = pvm_setopt (PvmAutoErr, 0); /* disable libpvm msgs */ /*}*/ /* No MBus pointer found meaning we have no connection established * or context yet. If we're a SUPERVISOR, reinitialize. Otherwise, * now is the time to connect and set the state. */ mbus = mbAppGetMBus (); if (mbus == (MBusPtr) NULL) { /* Message bus not initialized by supervisor. * Attach this process to the message bus. */ if ((mytid = pvm_mytid()) < 0) { pvm_perror (whoAmI); exit (1); } oldval = pvm_setopt (PvmAutoErr, 0); /* disable libpvm msgs */ /* Allocate an message bus structure. We'll update the state from * the supervisor once connected and setup. */ if (MB_DEBUG) printf ("Allocating mbus struct ...\n"); mbus = (MBusPtr) calloc (1, sizeof (MBus)); } /********************** * CLIENT CODE **********************/ if (! isSupervisor (whoAmI)) { /* If we're a Client, look for a Supervisor. If we can't find a * Supervisor the simply return an error, individuals client may * handle it differently. */ for (i=MB_CONNECT_RETRYS; i; i--) { info = pvm_lookup (SUPERVISOR, -1, &super); if (info == PvmNoEntry) { fprintf (stderr, "Can't find Supervisor on msgbus, retrying\n"); sleep (1); } else { if (MB_DEBUG) { printf ("Supervisor on msgbus at tid=%d\n", super); printf ("Registering '%s' with VM....\n", whoAmI); } if (pvm_insert (whoAmI, -1, mytid) < 0) { fprintf (stderr, "Register of '%s' failed....\n", whoAmI); return (ERR); } else { /* Send the Supervisor a CONNECT message. */ memset (msg, 0, SZ_LINE); sprintf (msg, "{ tid=%d who=%s host=%s pid=%d }", mytid, whoAmI, mbGetMBHost(), pid); mbusSend (SUPERVISOR, ANY, MB_CONNECT, msg); } break; } } if (i == 0) { if (MB_VERBOSE) fprintf (stderr, "Supervisor not on msgbus, returning\n"); mytid *= (-1); super = ERR; /*return (((-1) * mytid)); */ } /********************* * SUPERVISOR CODE **********************/ } else if (isSupervisor (whoAmI)) { /* If we're a Supervisor, check whether we're already registered. */ info = pvm_lookup (SUPERVISOR, -1, &super); if (info == PvmBadParam) { fprintf (stderr, "Supervisor lookup error, BadParam...\n"); return (ERR); } else if (info == PvmNoEntry) { /* Supervisor does not exist */ if (MB_DEBUG) fprintf (stderr, "Supervisor not found, registering...\n"); if (pvm_insert (SUPERVISOR, -1, (super = mytid)) < 0) { fprintf (stderr, "Supervisor register failed....\n"); return (ERR); } } else if (info >= 0) { if (MB_DEBUG) fprintf (stderr, "Supervisor already registered at tid=%d, validating..\n", super); /* Try sending a message to the registered tid to see if it is * alive. */ if (mbusPing (super, 500) == OK) { if (singleton) { /* tell other supervisor we're taking over.... */ printf ("got a ping reply....\n"); } else { fprintf (stderr, "ERROR: Supervisor already registered at tid=%d\n", super); return (ERR); } } else { /* The registered supervisor didn't respond, so delete it * from the database and try again. */ extern int pvmreset(); if (MB_DEBUG) fprintf (stderr, "Cleaning up earlier super at %d(%d)...\n", info, super); /* FIXME: ....disconnect old supervisor.....*/ (void) pvmreset (mytid, 1, "", 0); if (pvm_delete (SUPERVISOR, info) != PvmOk) if (MB_DEBUG) fprintf (stderr, "Supervisor cleanup failed\n"); if ((info = pvm_kill (super)) != PvmOk) if (MB_DEBUG) fprintf (stderr, "Supervisor assasination failed\n"); /* goto lookup_; */ } } /* Broadcast a message to any running clients telling them we're * now running. */ memset (msg, 0, SZ_LINE); sprintf (msg, "{ tid=%d who=%s host=%s pid=%d }", super, SUPERVISOR, mbGetMBHost(), pid); mbusBcast (CLIENT, msg, MB_CONNECT); } /* Save the Supervisor location and other bits about this client. */ mbAppSet (APP_TID, mytid); mbAppSet (APP_STID, super); mbAppSet (APP_FD, mbGetMBusFD()); mbAppSetName (whoAmI); mbAppSetMBus ((MBusPtr) mbus); /* Install the exit handler so we're sure to make a clean getaway. */ atexit (mbusExitHandler); if (MB_DEBUG) fprintf (stderr, "mbConnect: whoAmI='%s' group='%s' mbus = 0x%x\n", whoAmI, group, (int) mbus); /* Join a specific group if specified. A process may be part of * multiple groups and so we use the mbus routine the same as a * caller would. */ if (group) mbusJoinGroup (group); /* Return our tid. */ return (mytid); }