Ejemplo n.º 1
0
static void debug_control_thread(node *n, endpoint *endpt, void *arg)
{
  debug_control *dbc = (debug_control*)arg;
  message *msg;
  int done = 0;
  int indebug = 0;

  while (!done) {
    msg = endpoint_receive(endpt,indebug ? -1 : DEBUG_DELAY);
    if (NULL == msg) {
      assert(!indebug);
      endpoint_send(endpt,dbc->epid,MSG_DEBUG_START,&endpt->epid,sizeof(endpointid));
      indebug = 1;
    }
    else {
      switch (msg->tag) {
      case MSG_DEBUG_DONE:
        assert(indebug);
        indebug = 0;
        break;
      case MSG_KILL:
        fatal("Node shutdown during chord test");
        break;
      default:
        fatal("debug_control received invalid message: %d",msg->tag);
        break;
      }
      message_free(msg);
    }
  }

  free(dbc);
}
Ejemplo n.º 2
0
static void check_abort(check *chk)
{
  if (chk->indebug) {
    endpoint_send(chk->endpt,chk->caller,MSG_DEBUG_DONE,NULL,0);
    chk->indebug = 0;
  }
}
Ejemplo n.º 3
0
static void manager_startgc(node *n, endpoint *endpt, startgc_msg *m, endpointid source)
{
  gcarg *ga = (gcarg*)calloc(1,sizeof(gcarg)+m->count*sizeof(endpointid));
  ga->ntasks = m->count;
  memcpy(ga->idmap,m->idmap,m->count*sizeof(endpointid));
  node_add_thread(n,"gc",gc_thread,ga,NULL);
  endpoint_send(endpt,source,MSG_STARTGC_RESPONSE,NULL,0);
}
Ejemplo n.º 4
0
static void add_node(node *n, endpoint *endpt, endpointid managerid, endpointid initial)
{
  start_chord_msg scm;
  scm.initial = initial;
  scm.caller = endpt->epid;
  scm.stabilize_delay = STABILIZE_DELAY;
  endpoint_send(endpt,managerid,MSG_START_CHORD,&scm,sizeof(start_chord_msg));
}
Ejemplo n.º 5
0
DSME_HANDLER(DSM_MSGTYPE_STATE_QUERY, client, msg)
{
  DSM_MSGTYPE_STATE_CHANGE_IND ind_msg =
    DSME_MSG_INIT(DSM_MSGTYPE_STATE_CHANGE_IND);

  dsme_log(LOG_DEBUG, PFIX"state_query, state: %s", state_name(current_state));

  ind_msg.state = current_state;
  endpoint_send(client, &ind_msg);
}
Ejemplo n.º 6
0
static void manager_get_tasks(node *n, endpoint *endpt, get_tasks_msg *m)
{
  get_tasks_response_msg *gtrm;
  int msize;
  endpointid *epids = NULL;
  int count = node_get_endpoints(n,"task",&epids);

  msize = sizeof(get_tasks_response_msg)+count*sizeof(endpointid);
  gtrm = (get_tasks_response_msg*)calloc(1,msize);
  gtrm->count = count;
  memcpy(gtrm->tasks,epids,count*sizeof(endpointid));

  endpoint_send(endpt,m->sender,MSG_GET_TASKS_RESPONSE,gtrm,msize);
  free(gtrm);
}
Ejemplo n.º 7
0
static void ping_all(void)
{
    GSList* node;
    GSList* next;

    for (node = processes; node; node = next) {
        dsme_swwd_entry_t* proc;

        next = g_slist_next(node);

        proc = (dsme_swwd_entry_t *)(node->data);
        proc->pingcount++;

        /* Is it pinged too many times ? */
        if (proc->pingcount == MAXPING) {
            if (proc->kill_timer == 0) {
                dsme_log(LOG_ERR, "process (pid: %i) not responding to processwd pings," 
                         " aborting it...", proc->pid);
                /* give the nonresponsive process chance to abort... */
                kill(proc->pid, SIGABRT);

                /* ...but make sure to kill it after a grace period */
                proc->kill_timer = dsme_create_timer(ABORT_GRACE_PERIOD_SECONDS,
                                                     abort_timeout_func,
                                                     GINT_TO_POINTER(proc->pid));
                if (proc->kill_timer == 0) {
                    /* timer creation failed; kill the process immediately */
                    dsme_log(LOG_ERR, "...kill due to timer failure: %s", strerror(errno));
                    kill(proc->pid, SIGKILL);

                    swwd_entry_delete(proc);
                    processes = g_slist_delete_link(processes, node);
                }
            }
        } else {
            DSM_MSGTYPE_PROCESSWD_PING msg =
              DSME_MSG_INIT(DSM_MSGTYPE_PROCESSWD_PING);

            msg.pid = proc->pid;
            endpoint_send(proc->client, &msg);
            dsme_log(LOG_DEBUG, "sent ping to pid %i", proc->pid);
        }
    }
}
Ejemplo n.º 8
0
static chordnode start_one_chord(node *n, endpoint *endpt, endpointid initial, endpointid managerid)
{
  int done = 0;
  chordnode cn;
  start_chord_msg scm;

  memset(&cn,0,sizeof(chordnode));

  scm.initial = initial;
  scm.caller = endpt->epid;
  scm.stabilize_delay = STABILIZE_DELAY;
  endpoint_send(endpt,managerid,MSG_START_CHORD,&scm,sizeof(start_chord_msg));

  while (!done) {
    message *msg = endpoint_receive(endpt,20000);
    if (!msg) {
      fatal("Timeout waiting for CHORD_STARTED message");
    }
    switch (msg->tag) {
    case MSG_CHORD_STARTED: {
      chord_started_msg *m = (chord_started_msg*)msg->data;
      assert(sizeof(chord_started_msg) == msg->size);
      endpoint_link(endpt,m->cn.epid);
      cn = m->cn;
      done = 1;
      break;
    }
    case MSG_KILL:
      fatal("Node shutdown during chord test");
      break;
    default:
      fatal("start_one_chord received invalid message: %d",msg->tag);
      break;
    }
    message_free(msg);
  }

  return cn;
}
Ejemplo n.º 9
0
static void gc_thread(node *n, endpoint *endpt, void *arg)
{
  gcarg *ga = (gcarg*)arg;
  int done = 0;
  int i;
  int ingc = 0;
  int *count = (int*)calloc(ga->ntasks,sizeof(int));
  int mark_done = 0;
  int rem_startacks = 0;
  int rem_sweepacks = 0;
  int gciter = 0;

  #ifdef DEBUG_DISTGC
  printf("gc_thread\n");
  for (i = 0; i < ga->ntasks; i++)
    printf("gc_thread: idmap[%d] = "EPID_FORMAT"\n",i,EPID_ARGS(ga->idmap[i]));
  #endif

  for (i = 0; i < ga->ntasks; i++)
    endpoint_link(endpt,ga->idmap[i]);

  while (!done) {
    message *msg = endpoint_receive(endpt,ingc ? -1 : DISTGC_DELAY);
    if (NULL == msg) {
      startdistgc_msg sm;
      sm.gc = endpt->epid;
      sm.gciter = ++gciter;
      #ifdef DEBUG_DISTGC
      printf("Starting distributed garbage collection\n");
      #endif
      assert(!ingc);
      ingc = 1;

      for (i = 0; i < ga->ntasks; i++)
        endpoint_send(endpt,ga->idmap[i],MSG_STARTDISTGC,&sm,sizeof(sm));
      rem_startacks = ga->ntasks;

      continue;
    }
    switch (msg->tag) {
    case MSG_STARTDISTGCACK: {
      assert(ingc);
      assert(!mark_done);
      assert(0 < rem_startacks);
      rem_startacks--;
      if (0 == rem_startacks) {
        #ifdef DEBUG_DISTGC
        printf("All tasks have received STARTDISTGC\n");
        #endif
        memset(count,0,ga->ntasks*sizeof(int));

        for (i = 0; i < ga->ntasks; i++) {
          endpoint_send(endpt,ga->idmap[i],MSG_MARKROOTS,NULL,0);
          count[i]++;
        }
      }
      break;
    }
    case MSG_UPDATE: {
      update_msg *m = (update_msg*)msg->data;
      assert(sizeof(update_msg)+ga->ntasks*sizeof(int) == msg->size);
      assert(ingc);
      assert(!mark_done);
      assert(m->gciter == gciter);

      for (i = 0; i < ga->ntasks; i++)
        count[i] += m->counts[i];

      #ifdef DEBUG_DISTGC
      printf("after update (gciter %d) from "EPID_FORMAT":",m->gciter,EPID_ARGS(msg->source));
      for (i = 0; i < ga->ntasks; i++)
        printf(" %d",count[i]);
      printf("\n");
      #endif

      mark_done = 1;
      for (i = 0; i < ga->ntasks; i++)
        if (count[i])
          mark_done = 0;

      if (mark_done) {
        #ifdef DEBUG_DISTGC
        printf("Mark done\n");
        #endif
        for (i = 0; i < ga->ntasks; i++)
          endpoint_send(endpt,ga->idmap[i],MSG_SWEEP,NULL,0);
        rem_sweepacks = ga->ntasks;
      }
      break;
    }
    case MSG_SWEEPACK: {
      assert(ingc);
      assert(mark_done);
      assert(0 < rem_sweepacks);
      rem_sweepacks--;
      if (0 == rem_sweepacks) {
        #ifdef DEBUG_DISTGC
        printf("Distributed garbage collection completed\n");
        #endif
        ingc = 0;
        mark_done = 0;
        for (i = 0; i < ga->ntasks; i++) {
          assert(0 == count[i]);
        }
      }
      break;
    }
    case MSG_ENDPOINT_EXIT:
      done = 1;
      break;
    case MSG_KILL:
      done = 1;
      break;
    default:
      fatal("gc: unexpected message %d",msg->tag);
      break;
    }
    message_free(msg);
  }
  free(count);
  free(ga);
}
Ejemplo n.º 10
0
static void manager_thread(node *n, endpoint *endpt, void *arg)
{
  int done = 0;
  while (!done) {
    message *msg = endpoint_receive(endpt,-1);
    switch (msg->tag) {
    case MSG_NEWTASK: {
      newtask_msg *ntmsg;
      endpointid epid;
      array *args = array_new(sizeof(char*),0);
      char *str;
      char *start;
      if (sizeof(newtask_msg) > msg->size)
        fatal("NEWTASK: invalid message size");
      ntmsg = (newtask_msg*)msg->data;
      if (sizeof(newtask_msg)+ntmsg->bcsize > msg->size)
        fatal("NEWTASK: invalid bytecode size");

      str = ((char*)msg->data)+sizeof(newtask_msg)+ntmsg->bcsize;
      start = str;
      while (str < ((char*)msg->data)+msg->size) {
        if ('\0' == *str) {
          array_append(args,&start,sizeof(char*));
          start = str+1;
        }
        str++;
      }
      assert(array_count(args) == ntmsg->argc);

      node_log(n,LOG_INFO,"NEWTASK pid = %d, groupsize = %d, bcsize = %d",
               ntmsg->tid,ntmsg->groupsize,ntmsg->bcsize);

      task_new(ntmsg->tid,ntmsg->groupsize,ntmsg->bcdata,ntmsg->bcsize,args,n,
               ntmsg->out_sockid,&epid);

      endpoint_send(endpt,msg->source,MSG_NEWTASKRESP,
                    &epid.localid,sizeof(int));
      array_free(args);
      break;
    }
    case MSG_START_CHORD: {
      start_chord_msg *m = (start_chord_msg*)msg->data;
      assert(sizeof(start_chord_msg) == msg->size);
      start_chord(n,0,m->initial,m->caller,m->stabilize_delay);
      break;
    }
    case MSG_STARTGC:
      assert(sizeof(startgc_msg) <= msg->size);
      manager_startgc(n,endpt,(startgc_msg*)msg->data,msg->source);
      break;
    case MSG_GET_TASKS:
      assert(sizeof(get_tasks_msg) == msg->size);
      manager_get_tasks(n,endpt,(get_tasks_msg*)msg->data);
      break;
    case MSG_KILL:
      node_log(n,LOG_INFO,"Manager received KILL");
      done = 1;
      break;
    default:
      fatal("manager: unexpected message %d",msg->tag);
      break;
    }
    message_free(msg);
  }
}
Ejemplo n.º 11
0
static void check_check_next(check *chk)
{
  assert(chk->indebug);
  if (chk->cur_lookup >= TEST_LOOKUPS_PER_NODE) {
    chk->cur_lookup = 0;
    chk->cur_node++;

    if (chk->cur_node < chk->ncount) {
      get_table_msg gfm;

      #ifdef CHORDTEST_TIMING
      struct timeval now;
      gettimeofday(&now,NULL);
      printf("check node %d took %d\n",chk->cur_node-1,timeval_diffms(chk->node_start,now));
      #endif

      gfm.sender = chk->endpt->epid;
      gettimeofday(&chk->node_start,NULL);
      endpoint_send(chk->endpt,chk->nodes[chk->cur_node].epid,MSG_GET_TABLE,
                &gfm,sizeof(get_table_msg));
    }
    else {
      /* Print lookup statistics */
      struct timeval now;
      struct timeval diff;
      double seconds;
      gettimeofday(&now,NULL);
      diff = timeval_diff(chk->start,now);
      seconds = (double)diff.tv_sec + ((double)diff.tv_usec)/1000000.0;

      #ifdef CHORDTEST_TIMING
      printf("check round took %d\n",timeval_diffms(chk->debug_start,now));
      #endif

      printf("%.3f %.6f %.6f %.6f %.6f %.6f %.6f %.6f %6d check %d\n",
             seconds,
             100.0*chk->incorrect_nodes/(double)chk->ncount,
             100.0*chk->incorrect_successor/(double)chk->ncount,
             100.0*chk->incorrect_links/(double)chk->ncount,
             100.0*chk->incorrect_fingers/(double)(MBITS*chk->ncount),
             100.0*chk->total_bad/(double)chk->total_lookups,
             chk->total_hops/(double)chk->total_lookups,
             100.0*chk->incorrect_succlist/(double)(NSUCCESSORS*chk->ncount),
             chk->ncount,
             chk->iterations);
      endpoint_send(chk->endpt,chk->caller,MSG_DEBUG_DONE,NULL,0);
      chk->indebug = 0;
      chk->iterations++;
    }
  }
  else {
    chordid id = rand()%KEYSPACE;
    find_successor_msg fsm;
    fsm.id = id;
    fsm.sender = chk->endpt->epid;
    fsm.hops = 0;
    fsm.payload = 0;
    endpoint_send(chk->endpt,chk->nodes[chk->cur_node].epid,
              MSG_FIND_SUCCESSOR,&fsm,sizeof(find_successor_msg));
    chk->lookup_id = id;
  }
}
Ejemplo n.º 12
0
static void check_debug_start(check *chk, endpointid *caller)
{
  assert(!chk->indebug);
  chk->indebug = 1;
  memcpy(&chk->caller,caller,sizeof(endpointid));
  chk->cur_node = 0;
  chk->cur_lookup = 0;

  chk->incorrect_nodes = 0;
  chk->incorrect_successor = 0;
  chk->incorrect_links = 0;
  chk->incorrect_fingers = 0;
  chk->incorrect_succlist = 0;
  chk->total_bad = 0;
  chk->total_hops = 0;
  chk->total_lookups = 0;

  gettimeofday(&chk->debug_start,NULL);

  if (0 < chk->pending_joins) {
    printf("skipping debug, due to %d pending joins\n",chk->pending_joins);
    check_abort(chk);
  }
  else if ((1 <= chk->iterations) && (0 == (chk->iterations % DISRUPT_INTERVAL))) {
    if (0 == (chk->iterations % (2*DISRUPT_INTERVAL))) {
      /* Add some new nodes */
      endpointid initial = chk->nodes[rand()%chk->ncount].epid;
      int i;
      for (i = 0; i < JOIN_COUNT; i++) {
        add_node(chk->n,chk->endpt,chk->managerids[rand()%chk->nmanagers],initial);
        chk->pending_joins++;
      }
    }
    else {
      /* Kill some existing nodes */
      int i;
      int j;
      int *killindices = (int*)calloc(KILL_COUNT,sizeof(int));
      for (i = 0; i < KILL_COUNT; i++) {
        int have;
        int index;
        do {
          have = 0;
          index = rand()%chk->ncount;

          for (j = 0; j < i; j++)
            if (killindices[j] == index)
              have = 1;
        } while (have);
        killindices[i] = index;
      }

      for (i = 0; i < KILL_COUNT; i++) {
        endpoint_send(chk->endpt,chk->nodes[killindices[i]].epid,MSG_KILL,NULL,0);
      }

      free(killindices);
    }

    chk->iterations++;
    check_abort(chk);
  }
  else {
    get_table_msg gtm;
    gtm.sender = chk->endpt->epid;
    gettimeofday(&chk->node_start,NULL);
    endpoint_send(chk->endpt,chk->nodes[0].epid,MSG_GET_TABLE,&gtm,sizeof(gtm));
  }
}