示例#1
0
文件: Manager.c 项目: barroca/Anthill
/// Init a ds pipeline.
///	\param confFile XML file configuration, NULL if we want to make things by hand
///	\param argc argc received by main function that will be forwarded to filter processes
///	\param argv argv received by main function that will be forwarded to filter processes
///	\return Layout of the pipeline
Layout *initDs(char *confFile, int argc, char **argv) {
    int err;

    //configura pvm pra enviar dado diretamente(IMPORTANTE!!!!)
    pvm_setopt(PvmRoute, PvmRouteDirect);

    //err == 0, OK
    err = pvm_start_pvmd(0, NULL, 0);
    switch (err) {
    case PvmSysErr:
        printf("Manager.c: error starting PVM, aborting\n");
        exit(1);
        break;
    }

    //start random number generator
    srandom(getpid());

    // config DS to use signals
//	signal(SIGSEGV, &captureSignal);
//	signal(SIGINT, &captureSignal);
//	signal(SIGILL, &captureSignal);
//	signal(SIGHUP, &captureSignal);
//	signal(SIGTERM, &captureSignal);

    if (I_AM_THE_MANAGER) {
        return initManager(confFile, argc, argv);
    } else {
        runFilter(argc, argv);
        exit(0);
    }
}
示例#2
0
adpvm_manager::adpvm_manager(int _mode)
{
  cout << "calling load library" << endl;
  if (load_adpvm_library() < 0)
  {
    cerr << "error loading pvm library" << endl;
    exit(1);
  }
  pvm_setopt(PvmRoute, PvmRouteDirect);  /* channel for communication */
  /* get and display configuration of the parallel machine */
  int status=pvm_config( &nhost, &narch, &hostp );  /* get configuration */
  if (status<0)
  {
    cerr << "error trying to get configuration of pvm (virtual machine)" << endl;
    if (status == PvmSysErr)
      cerr << " PVM Daemon not responing -- maybe it is not started" << endl;
    ad_exit(1);
  }
  printf("I found the following hosts in your virtual machine\n");
  int i;
  for (i = 0; i < nhost; i++)
  {
    printf("    %s\n", hostp[i].hi_name);
  }
  //id.allocate(0,nhost);
  mode=_mode;
  if (mode == 1)  // master
  {
    slave_argv = new adpvm_slave_args(20,20);
    int ierr=pvm_catchout(stdout);
    if (ierr<0)
    {
      cerr << "Error in pvm_catchout" << endl;
    }
    strcpy(*slave_argv,"progname");
    int on1,nopt1;
    if ( (on1=option_match(ad_comm::argc,ad_comm::argv,"-exec",nopt1))>-1)
    {
      if (nopt1 !=1)
      {
        cerr << "Wrong number of options to -exec -- must be 1"
          " you have " << nopt1 << endl;
        ad_exit(1);
      }
      slave_names+= ad_comm::argv[on1+1];
    }
    else
    {
      slave_names+="test";
    }
  }
  timing_flag=0;
  int on1,nopt1;
  if ( (on1=option_match(ad_comm::argc,ad_comm::argv,"-pvmtime",nopt1))>-1)
  {
    timing_flag=1;
  }
}
示例#3
0
void BBSDirect::start() {
	char* client = 0;
	int tid, host_mytid;
	int i, n, ncpu, nncpu;
	struct pvmhostinfo* hostp;
	if (started_) { return; }
	BBSImpl::start();
	mytid_ = pvm_mytid();
	nrnmpi_myid = 0;
	if (mytid_ < 0) { perror("start"); }
	host_mytid = pvm_tidtohost(mytid_);
	tid = pvm_parent();
	if (tid == PvmSysErr) {
		perror("start");
	}else if (tid == PvmNoParent) {
		is_master_ = true;
		pvm_catchout(stdout);
		pvm_setopt(PvmRoute, PvmRouteDirect);
		pvm_config(&n, NULL, &hostp);
		nncpu = 0;
		for (i=0; i < n; ++i) {
			ncpu = hostp[i].hi_speed;
			if (ncpu%1000) {
				hoc_warning(hostp[i].hi_name,
" speed in pvm configuration file is not a multiple of 1000. Assuming 1000.");
				ncpu = 1000;
			}
			nncpu += ncpu/1000;
		}
		nrnmpi_numprocs = nncpu;
		ncids = 0;
	}else{ // a worker, impossible
		assert(false);
	}
	if (nrnmpi_numprocs > 1 && tid == PvmNoParent) {
		char ** sargv;
	// args are workingdirectory specialOrNrniv -bbs_nhost nhost args
		sargv = new char*[nrn_global_argc + 4];
		for (i=1; i < nrn_global_argc; ++i) {
			sargv[i+3] = nrn_global_argv[i];
		}
		sargv[nrn_global_argc + 3] = 0;
		sargv[0] = rel_working_dir();
//printf("sargv[0]=|%s|\n", sargv[0]);
		sargv[1] = nrn_global_argv[0];
		sargv[2] = "-bbs_nhost";
		sargv[3] = new char[10];
		sprintf(sargv[3], "%d", nrnmpi_numprocs);
		cids = new int[nrnmpi_numprocs-1];
if (nrn_global_argv[nrn_global_argc] != 0) {
	printf("argv not null terminated\n");
	exit(1);
}

		BBSDirectServer::server_->start();
		bbs_sig_set();
		bbs_handle();

		//spawn according to number of cpu's (but master has one less)
//printf("%d total number of cpus on %d machines\n", nncpu, n);
		int icid = 0;
		bool first = true;
	    while (icid < nrnmpi_numprocs - 1) {
		for (i=0; i < n; ++i) {
			ncpu = hostp[i].hi_speed;
			if (ncpu%1000) {
				ncpu = 1000;
			}
			ncpu /= 1000;
//printf("%d cpu for machine %d (%s)\n", ncpu, i, hostp[i].hi_name);
			if (first && hostp[i].hi_tid == host_mytid) {
				// spawn one fewer on master first time through
				--ncpu;
			}
			if (icid + ncpu >= nrnmpi_numprocs) {
				ncpu = nrnmpi_numprocs - icid - 1;
			}
//printf("before spawn %d processes (icid=%d) for machine %d (%s)\n", ncpu, icid, i, hostp[i].hi_name);
			if (ncpu) {
				ncids = pvm_spawn("bbswork.sh", sargv,
					PvmTaskHost, hostp[i].hi_name, ncpu, cids + icid);
				if (ncids != ncpu) {
fprintf(stderr, "Tried to spawn %d tasks, only %d succeeded on %s\n", ncpu, ncids, hostp[i].hi_name);
hoc_execerror("Could not spawn all the requested tasks for", hostp[i].hi_name);
				}
//printf("spawned %d for %s with cids starting at %d\n", ncpu, hostp[i].hi_name, icid);
				icid += ncpu;
			}
			if (icid >= nrnmpi_numprocs) {
				break;
			}
		}
		first = false;
	    }
		ncids = icid;
printf("spawned %d more %s on %d cpus on %d machines\n", ncids, nrn_global_argv[0], nncpu, n);
		delete [] sargv[3];
		delete [] sargv;
	}
}
示例#4
0
int
Establish(ArgStruct *p)
{
    /* Task information for the entire parallel machine (if trans) */
    int                     tasks_status;
    struct pvmtaskinfo      *taskp;
    int                     ntasks;

        /* Received buffer (if receiver)  */
    int buffer_id;

    /*
    If we are the transmitting side, go find the other one and send
    it a message containing our tid. If we are the receiving side,
    just wait for a message.
    */
    if (p->tr) {
#ifdef DEBUG
	printf("this is the transmitter\n");
#endif
	tasks_status = pvm_tasks( 0, &ntasks, &taskp );
	if (ntasks != 2) {
	    printf("Error, too many processes in parallel machine \n");
	    printf("Start a clean machine.  n=%d\n", ntasks);
	    exit(-1);
	}

	/* Since there are two tasks, one is ours the other is the receiver */
	p->prot.othertid = -1;
	if (taskp[0].ti_tid == p->prot.mytid) {
	    p->prot.othertid = taskp[1].ti_tid;
	}
	if (taskp[1].ti_tid == p->prot.mytid) {
	    p->prot.othertid = taskp[0].ti_tid;
	}
	if (p->prot.othertid == -1) {
	    printf("Error, cannot find other (receiving) task \n");
	    printf("Id's:  %d %d  \n",taskp[0].ti_tid,taskp[1].ti_tid);
	}

	/* Send the receiver a message.  Tell pvm to keep the channel open */

#ifdef DEBUG
	printf("The receiver tid is %d \n",p->prot.othertid);
#endif
	pvm_setopt( PvmRoute, PvmRouteDirect );
	pvm_initsend( PVMDATA );
	pvm_pkint( &p->prot.mytid, 1, 1 );
	pvm_send( p->prot.othertid, 1 );
    } else {
#ifdef DEBUG
	printf("This is the receiver \n");
#endif

	/* Receive any message from any task */
	buffer_id = pvm_recv(-1, -1);

	if (buffer_id < 0) {
	    printf("Error on receive in receiver\n");
	    exit(-1);
	}
	pvm_upkint( &p->prot.othertid, 1, 1 );
    }
}
示例#5
0
/*---------------------------------------------------------------------------
**  MBUSCONNECT -- Connect the client to the message bus.  In the case of
**  a client we'll look first for a running Supervisor and notify them we've
**  come online.  The Client connection is complete when a MB_READY state
**  is sent.
*/
int
mbusConnect (char *whoAmI, char *group, int singleton)
{
    int     i, mytid, super, info, oldval;
    MBusPtr mbus = (MBusPtr) NULL;
    char    msg[SZ_LINE];
    pid_t   pid = getpid();


    /* Initialize host name.
    */
    mbInitMBHost ();
    mbSetMBHost ();


    /*  At this point we're only getting state information from the
     *  application cache, we don't talk to the message bus yet.
     */
    if (mbAppGet (APP_INIT)) {
	fprintf (stderr, "Application already initialized, resetting....\n");
        mbInitApp();

	/* FIXME:  ....disconnect old client.....*/
    }


    /*if (isSupervisor (whoAmI) ) {*/
        if (MB_DEBUG) printf ("Re-Initializing default message bus...\n");
        mbusInitialize (whoAmI, NULL);

	/*  Attach this process to the message bus.
	 */
    	if ((mytid = pvm_mytid()) < 0) {
	    pvm_perror (whoAmI);
	    exit (1);
    	}
    	oldval = pvm_setopt (PvmAutoErr, 0);  /* disable libpvm msgs */
    /*}*/

    /* No MBus pointer found meaning we have no connection established
     * or context yet.  If we're a SUPERVISOR, reinitialize.  Otherwise,
     * now is the time to connect and set the state.
     */
    mbus = mbAppGetMBus ();
    if (mbus == (MBusPtr) NULL) {
	/*  Message bus not initialized by supervisor.
    	 *  Attach this process to the message bus.
	 */
    	if ((mytid = pvm_mytid()) < 0) {
	    pvm_perror (whoAmI);
	    exit (1);
    	}
    	oldval = pvm_setopt (PvmAutoErr, 0);  /* disable libpvm msgs */

        /* Allocate an message bus structure.  We'll update the state from
	 * the supervisor once connected and setup.
         */
        if (MB_DEBUG) printf ("Allocating mbus struct ...\n");
        mbus = (MBusPtr) calloc (1, sizeof (MBus));
    }




    /**********************
     *  CLIENT CODE
     **********************/
    if (! isSupervisor (whoAmI)) {
        /*  If we're a Client, look for a Supervisor.  If we can't find a
         *  Supervisor the simply return an error, individuals client may 
         *  handle it differently.
         */
	for (i=MB_CONNECT_RETRYS; i; i--) {
            info = pvm_lookup (SUPERVISOR, -1, &super);
	    if (info == PvmNoEntry) {
	        fprintf (stderr, "Can't find Supervisor on msgbus, retrying\n");
		sleep (1);

	    } else {
	        if (MB_DEBUG) {
		    printf ("Supervisor on msgbus at tid=%d\n", super);
                    printf ("Registering '%s' with VM....\n", whoAmI);
	        }

                if (pvm_insert (whoAmI, -1, mytid) < 0) {
                    fprintf (stderr, "Register of '%s' failed....\n", whoAmI);
                    return (ERR);

                } else {
		    /* Send the Supervisor a CONNECT message. 
		     */
		    memset (msg, 0, SZ_LINE);
		    sprintf (msg, "{ tid=%d who=%s host=%s pid=%d }",
			mytid, whoAmI, mbGetMBHost(), pid);
		    mbusSend (SUPERVISOR, ANY, MB_CONNECT, msg);
		}
		break;
	    }
	}
	if (i == 0) {
	    if (MB_VERBOSE)
	        fprintf (stderr, "Supervisor not on msgbus, returning\n");
	    mytid *= (-1);
	    super = ERR;
	    /*return (((-1) * mytid)); */
	}


    /*********************
     * SUPERVISOR CODE
     **********************/
    } else if (isSupervisor (whoAmI)) {
	/* If we're a Supervisor, check whether we're already registered.
	 */
        info = pvm_lookup (SUPERVISOR, -1, &super);
	if (info == PvmBadParam) {
	    fprintf (stderr, "Supervisor lookup error, BadParam...\n");
	    return (ERR);

	} else if (info == PvmNoEntry) {
            /*  Supervisor does not exist */
	    if (MB_DEBUG) 
		fprintf (stderr, "Supervisor not found, registering...\n");
            if (pvm_insert (SUPERVISOR, -1, (super = mytid)) < 0) {
                fprintf (stderr, "Supervisor register failed....\n");
                return (ERR);
            }

	} else if (info >= 0) {
	    if (MB_DEBUG)
		fprintf (stderr, 
		    "Supervisor already registered at tid=%d, validating..\n",
		    super);

	    /* Try sending a message to the registered tid to see if it is
	     * alive.
	     */
	    if (mbusPing (super, 500) == OK) {
		if (singleton) {
		    /* tell other supervisor we're taking over.... */
		    printf ("got a ping reply....\n");
		} else {
		    fprintf (stderr, 
		        "ERROR: Supervisor already registered at tid=%d\n",
			super);
	            return (ERR);
		}

	    } else {
		/* The registered supervisor didn't respond, so delete it
		 * from the database and try again.
		 */
		extern int pvmreset();

	        if (MB_DEBUG)
		    fprintf (stderr, "Cleaning up earlier super at %d(%d)...\n",
			info, super);

		/* FIXME:  ....disconnect old supervisor.....*/
		(void) pvmreset (mytid, 1, "", 0);
    		if (pvm_delete (SUPERVISOR, info) != PvmOk)
		    if (MB_DEBUG)
        	        fprintf (stderr, "Supervisor cleanup failed\n");
    		if ((info = pvm_kill (super)) != PvmOk)
		    if (MB_DEBUG)
        	        fprintf (stderr, "Supervisor assasination failed\n");
		/* goto lookup_; */
	    }
        }


	/* Broadcast a message to any running clients telling them we're
	 * now running.
	 */
	memset (msg, 0, SZ_LINE); 
	sprintf (msg, "{ tid=%d who=%s host=%s pid=%d }", 
	    super, SUPERVISOR, mbGetMBHost(), pid);

	mbusBcast (CLIENT, msg, MB_CONNECT);
    }

    /* Save the Supervisor location and other bits about this client.
     */
    mbAppSet (APP_TID, mytid);
    mbAppSet (APP_STID, super);
    mbAppSet (APP_FD, mbGetMBusFD());
    mbAppSetName (whoAmI);
    mbAppSetMBus ((MBusPtr) mbus);


    /* Install the exit handler so we're sure to make a clean getaway.
     */
    atexit (mbusExitHandler);
	        
    if (MB_DEBUG)
	fprintf (stderr, "mbConnect: whoAmI='%s' group='%s'  mbus = 0x%x\n", 
	    whoAmI, group, (int) mbus);


    /* Join a specific group if specified.  A process may be part of
     * multiple groups and so we use the mbus routine the same as a
     * caller would.
     */
    if (group)
	mbusJoinGroup (group);


    /*  Return our tid.
     */
    return (mytid);
}