/*--------------------------------------------------------------------------- ** MBECONNECTTOSUPER -- Establish a connection between the application ** running on 'mytid' with the Supervisor running at pid@host with ** 'supertid'. */ int mbConnectToSuper (int mytid, int supertid, char *host, int pid) { char msg[SZ_LINE]; char *whoAmI = mbAppGetName (); /* app name */ int mysuper = mbAppGet (APP_STID); /* current app's super */ int mypid = getpid(); int i, info, super = -1; if (mbAppGet(APP_STID) == mytid) { fprintf (stderr, "ERROR: Supervisor connecting to itelf.\n"); return (ERR); } /* Look for a Supervisor and send a connect message. */ for (i=MB_CONNECT_RETRYS; i; i--) { info = pvm_lookup (SUPERVISOR, -1, &super); if (info == PvmNoEntry || (mysuper != supertid)) { /* Supervisor not found, so connect to specified supertid. */ memset (msg, 0, SZ_LINE); sprintf (msg, "{ tid=%d who=%s host=%s pid=%d }", abs(mytid), whoAmI, host, mypid); if (MB_DEBUG) printf ("Supervisor msg '%s'\n", msg); if (mbusSend (SUPERVISOR, ANY, MB_CONNECT, msg) != ERR) break; sleep (1); } else { if (MB_DEBUG) printf ("Supervisor already on msgbus at tid=%d\n", super); /* FIXME -- Need to disconnect from old Supervisor. memset (msg, 0, SZ_LINE); sprintf (msg, "{ tid=%d who=%s host=%s pid=%d }", abs(mytid), whoAmI, host, mypid); if (mbusSend (SUPERVISOR, ANY, MB_DISCONNECT, msg) != ERR) break; */ break; } } if (super < 0) { if (MB_VERBOSE) fprintf(stderr,"Supervisor not on msgbus, returning\n"); super = ERR; } else { mbAppSet (APP_TID, abs (mytid)); mbAppSet (APP_STID, super); } return (super); }
/*--------------------------------------------------------------------------- ** MBINITAPP -- Initialize the application state structure. */ void mbInitApp () { mbAppSet (APP_TID, 0); /* reset the static state of this client */ mbAppSet (APP_STID, 0); mbAppSet (APP_INIT, 0); mbAppSet (APP_FD, 0); mbAppSetMBus ((MBusPtr) NULL); mbAppSetName (NULL); }
/* Local Message Handler */ void myHandler (int from, int subject, char *msg) { int tid = 0, pid = 0, mytid = mbAppGet (APP_TID); char *me, who[128], host[128]; char *w = who, *h = host; double expID; extern smCache_t *smc; me = (char *)mbAppGetName(); switch (subject) { case MB_CONNECT: if (console) fprintf (stderr, "CONNECT on %s: %s\n", me, msg); mbParseConnectMsg (msg, &tid, &w, &h, &pid); /* If it's the supervisor connecting, and we don't already have an * established connection to the Super, set it up now. */ if (isSupervisor (who) && mbAppGet (APP_STID) < 0) { mytid = mbAppGet(APP_TID); if (mbConnectToSuper (mytid, tid, host, pid) == OK) mbAppSet (APP_TID, abs(mytid)); } /* When we get a CONNECT message, post a notifier so we're * alerted whent the task exits. mbAddTaskExitHandler (tid, myTaskExited); */ break; case MB_START: if (console) fprintf (stderr, "START on %s: %s\n", me, msg); if (strncmp (msg, "process", 7) == 0) { /* Begin processing an image given the ExpID. */ expID = (double) atof (&msg[8]); if (console) fprintf (stderr, "PXF processing ExpID %.6lf\n", expID); /* Process the pages. */ if (!noop) pxfProcess (smc, expID); dhsTransferComplete (dca_tid, seqno); memset (buf, 0, SZ_BUF); sprintf (buf, "process pxf done %.6lf", expID); mbusSend (SUPERVISOR, ANY, MB_SET, buf); mbusSend (SUPERVISOR, ANY, MB_STATUS, "inactive"); mbusSend (SUPERVISOR, ANY, MB_STATUS, "Ready..."); mbusSend (SUPERVISOR, ANY, MB_ACK, ""); mbusSend (SUPERVISOR, ANY, MB_ACK, ""); mbusSend (SUPERVISOR, ANY, MB_ACK, ""); if (console) { fprintf (stderr, "\n"); fprintf (stderr, "**************************************\n"); fprintf (stderr, "PXF processing Done: ExpID %.6lf\n", expID); fprintf (stderr, "**************************************\n\n"); } } break; case MB_SET: if (console && strncmp (msg, "no-op", 5) != 0) fprintf (stderr, "SET on %s: %s\n", me, msg); if (strncmp (msg, "dca_tid", 7) == 0) { dca_tid = atoi (&msg[8]); } else if (strncmp (msg, "nbin", 4) == 0) { /* nbin = atoi (&msg[5]); */ } else if (strncmp (msg, "seqno", 5) == 0) { seqno = atoi (&msg[6]); } else if (strncmp (msg, "no-op", 5) == 0) { /* mbusSend (SUPERVISOR, ANY, MB_ACK, ""); */ ; } else if (strncmp (msg, "keyword add", 11) == 0) { char *sp, *op, *ip = &msg[12]; char buf[SZ_LINE]; /* printf ("Adding keyword monitor: '%s'\n", ip); */ while (*ip) { /* skip leading whitespace */ for (sp=ip; *ip && isspace(*sp); sp++) ip++; /* Copy the keyword to the list. */ bzero (buf, SZ_LINE); for (op=&buf[0]; *ip && !isspace (*ip); ) *op++ = *ip++; if (! keywList[NKeywords]) keywList[NKeywords] = malloc (SZ_LINE); strcpy (keywList[NKeywords++], buf); } } else if (strncmp (msg, "keyword del", 11) == 0) { char *key = &msg[12]; int i, j; for (i=0; i < NKeywords; i++) { if (strcmp (keywList[i], key) == 0) { for (j=i+1; j < NKeywords; ) strcpy (keywList[i++], keywList[j++]); NKeywords--; break; } } } break; case MB_EXITING: /* If it's the supervisor disconnecting, ..... */ if (console) fprintf (stderr, "DISCONNECT on %s: %s\n", me, msg); break; case MB_STATUS: /* Send a Status response.... */ mbusSend (SUPERVISOR, ANY, MB_STATUS, "Ready..."); break; case MB_PING: /* Return an ACK/STATUS to sender .... */ mbusSend (SUPERVISOR, ANY, MB_ACK, ""); break; case MB_DONE: /* Exit for now... */ exit (0); break; case MB_ERR: /* No-op */ break; default: if (console) { fprintf (stderr, "DEFAULT recv:%d: ", subject); fprintf (stderr, " from:%d subj:%d\n msg='%s'\n", from, subject, msg); } if (strncmp (msg, "quit", 4) == 0) exit (0); } return; }
/*--------------------------------------------------------------------------- ** MBUSCONNECT -- Connect the client to the message bus. In the case of ** a client we'll look first for a running Supervisor and notify them we've ** come online. The Client connection is complete when a MB_READY state ** is sent. */ int mbusConnect (char *whoAmI, char *group, int singleton) { int i, mytid, super, info, oldval; MBusPtr mbus = (MBusPtr) NULL; char msg[SZ_LINE]; pid_t pid = getpid(); /* Initialize host name. */ mbInitMBHost (); mbSetMBHost (); /* At this point we're only getting state information from the * application cache, we don't talk to the message bus yet. */ if (mbAppGet (APP_INIT)) { fprintf (stderr, "Application already initialized, resetting....\n"); mbInitApp(); /* FIXME: ....disconnect old client.....*/ } /*if (isSupervisor (whoAmI) ) {*/ if (MB_DEBUG) printf ("Re-Initializing default message bus...\n"); mbusInitialize (whoAmI, NULL); /* Attach this process to the message bus. */ if ((mytid = pvm_mytid()) < 0) { pvm_perror (whoAmI); exit (1); } oldval = pvm_setopt (PvmAutoErr, 0); /* disable libpvm msgs */ /*}*/ /* No MBus pointer found meaning we have no connection established * or context yet. If we're a SUPERVISOR, reinitialize. Otherwise, * now is the time to connect and set the state. */ mbus = mbAppGetMBus (); if (mbus == (MBusPtr) NULL) { /* Message bus not initialized by supervisor. * Attach this process to the message bus. */ if ((mytid = pvm_mytid()) < 0) { pvm_perror (whoAmI); exit (1); } oldval = pvm_setopt (PvmAutoErr, 0); /* disable libpvm msgs */ /* Allocate an message bus structure. We'll update the state from * the supervisor once connected and setup. */ if (MB_DEBUG) printf ("Allocating mbus struct ...\n"); mbus = (MBusPtr) calloc (1, sizeof (MBus)); } /********************** * CLIENT CODE **********************/ if (! isSupervisor (whoAmI)) { /* If we're a Client, look for a Supervisor. If we can't find a * Supervisor the simply return an error, individuals client may * handle it differently. */ for (i=MB_CONNECT_RETRYS; i; i--) { info = pvm_lookup (SUPERVISOR, -1, &super); if (info == PvmNoEntry) { fprintf (stderr, "Can't find Supervisor on msgbus, retrying\n"); sleep (1); } else { if (MB_DEBUG) { printf ("Supervisor on msgbus at tid=%d\n", super); printf ("Registering '%s' with VM....\n", whoAmI); } if (pvm_insert (whoAmI, -1, mytid) < 0) { fprintf (stderr, "Register of '%s' failed....\n", whoAmI); return (ERR); } else { /* Send the Supervisor a CONNECT message. */ memset (msg, 0, SZ_LINE); sprintf (msg, "{ tid=%d who=%s host=%s pid=%d }", mytid, whoAmI, mbGetMBHost(), pid); mbusSend (SUPERVISOR, ANY, MB_CONNECT, msg); } break; } } if (i == 0) { if (MB_VERBOSE) fprintf (stderr, "Supervisor not on msgbus, returning\n"); mytid *= (-1); super = ERR; /*return (((-1) * mytid)); */ } /********************* * SUPERVISOR CODE **********************/ } else if (isSupervisor (whoAmI)) { /* If we're a Supervisor, check whether we're already registered. */ info = pvm_lookup (SUPERVISOR, -1, &super); if (info == PvmBadParam) { fprintf (stderr, "Supervisor lookup error, BadParam...\n"); return (ERR); } else if (info == PvmNoEntry) { /* Supervisor does not exist */ if (MB_DEBUG) fprintf (stderr, "Supervisor not found, registering...\n"); if (pvm_insert (SUPERVISOR, -1, (super = mytid)) < 0) { fprintf (stderr, "Supervisor register failed....\n"); return (ERR); } } else if (info >= 0) { if (MB_DEBUG) fprintf (stderr, "Supervisor already registered at tid=%d, validating..\n", super); /* Try sending a message to the registered tid to see if it is * alive. */ if (mbusPing (super, 500) == OK) { if (singleton) { /* tell other supervisor we're taking over.... */ printf ("got a ping reply....\n"); } else { fprintf (stderr, "ERROR: Supervisor already registered at tid=%d\n", super); return (ERR); } } else { /* The registered supervisor didn't respond, so delete it * from the database and try again. */ extern int pvmreset(); if (MB_DEBUG) fprintf (stderr, "Cleaning up earlier super at %d(%d)...\n", info, super); /* FIXME: ....disconnect old supervisor.....*/ (void) pvmreset (mytid, 1, "", 0); if (pvm_delete (SUPERVISOR, info) != PvmOk) if (MB_DEBUG) fprintf (stderr, "Supervisor cleanup failed\n"); if ((info = pvm_kill (super)) != PvmOk) if (MB_DEBUG) fprintf (stderr, "Supervisor assasination failed\n"); /* goto lookup_; */ } } /* Broadcast a message to any running clients telling them we're * now running. */ memset (msg, 0, SZ_LINE); sprintf (msg, "{ tid=%d who=%s host=%s pid=%d }", super, SUPERVISOR, mbGetMBHost(), pid); mbusBcast (CLIENT, msg, MB_CONNECT); } /* Save the Supervisor location and other bits about this client. */ mbAppSet (APP_TID, mytid); mbAppSet (APP_STID, super); mbAppSet (APP_FD, mbGetMBusFD()); mbAppSetName (whoAmI); mbAppSetMBus ((MBusPtr) mbus); /* Install the exit handler so we're sure to make a clean getaway. */ atexit (mbusExitHandler); if (MB_DEBUG) fprintf (stderr, "mbConnect: whoAmI='%s' group='%s' mbus = 0x%x\n", whoAmI, group, (int) mbus); /* Join a specific group if specified. A process may be part of * multiple groups and so we use the mbus routine the same as a * caller would. */ if (group) mbusJoinGroup (group); /* Return our tid. */ return (mytid); }