示例#1
0
int exa_rdev_static_init(rdev_static_op_t op)
{
    EXA_ASSERT_VERBOSE(init_op == RDEV_STATIC_OP_INVALID, "static data already initialized");

    EXA_ASSERT_VERBOSE(op == RDEV_STATIC_CREATE || op == RDEV_STATIC_GET,
                       "invalid static init op: %d", op);

    init_op = op;

    return 0;
}
示例#2
0
void __serverd_perf_sensor_init(void)
{
    EXA_ASSERT_VERBOSE(eh != NULL, "Exaperf handle is nil");

    req_size_repart[__READ] = exaperf_repart_init(eh, "NBD_SERVER_REQ_SIZE_READ",
                                                NB_REPART, limits_nbd_server_req);
    req_size_repart[__WRITE] = exaperf_repart_init(eh, "NBD_SERVER_REQ_SIZE_WRITE",
						 NB_REPART, limits_nbd_server_req);

    inter_arrival_repart[__READ] = exaperf_repart_init(eh, "NBD_SERVER_INTERARRIVAL_READ",
						     NB_REPART_INTER, limits_inter);
    inter_arrival_repart[__WRITE] = exaperf_repart_init(eh, "NBD_SERVER_INTERARRIVAL_WRITE",
                                                      NB_REPART_INTER, limits_inter);

    lba_repart[__READ] = exaperf_repart_init(eh, "NBD_SERVER_LBA_READ",
					   NB_REPART_LBA, limits_lba);
    lba_repart[__WRITE] = exaperf_repart_init(eh, "NBD_SERVER_LBA_WRITE",
					    NB_REPART_LBA, limits_lba);

    distance_repart[__READ] = exaperf_repart_init(eh, "NBD_SERVER_DISTANCE_READ",
                                                NB_REPART_DIST, limits_dist);
    distance_repart[__WRITE] = exaperf_repart_init(eh, "NBD_SERVER_DISTANCE_WRITE",
						 NB_REPART_DIST, limits_dist);

    header_dur[__READ] = exaperf_duration_init(eh,"NBD_SERVER_HEADER_DUR_READ", true);
    header_dur[__WRITE] = exaperf_duration_init(eh, "NBD_SERVER_HEADER_DUR_WRITE", true);

    data_dur = exaperf_duration_init(eh, "NBD_SERVER_DATA_DUR_WRITE", true);
}
示例#3
0
int exa_select_out(exa_select_handle_t *h, fd_set *set)
{
#if WIN32
    int nb_sock;

    EXA_ASSERT_VERBOSE(h->magic == EXA_SELECT_MAGIC,
                       "Corrupted handle detected %d", h->magic);

    nb_sock = os_select(0 /* ignored */, NULL, set, NULL, &select_timeout);

    if (nb_sock == 0) /* timeout is reached */
    {
        /* reset set because there was actually no event on sockets */
        FD_ZERO(set);
        return -EFAULT;
    }

    return nb_sock > 0 ? 0 : nb_sock;
#else
    if (ioctl(h->fd, EXA_SELECT_OUT, set) == -1)
	return -errno;

    return 0;
#endif
}
示例#4
0
exa_select_handle_t *exa_select_new_handle(void)
{
    exa_select_handle_t *h = os_malloc(sizeof(exa_select_handle_t));
    EXA_ASSERT(h != NULL);
#if WIN32
    h->magic = EXA_SELECT_MAGIC;
#else
    h->fd = open(EXACOMMON_MODULE_PATH, O_RDWR);
    EXA_ASSERT_VERBOSE(h->fd >= 0, "Cannot bind to exa_common module: %s (%d)",
                       os_strerror(errno), -errno);

    EXA_ASSERT_VERBOSE(ioctl(h->fd, EXA_SELECT_MAL) != -1,
                       "Cannot register to exa_common module: %s (%d)",
                       os_strerror(errno), -errno);
#endif
    return h;
}
示例#5
0
static int
vrt_cmd_group_event(const struct VrtGroupEvent *event_msg)
{
    int retval = -EINVAL;
    struct vrt_group *group;

    group = vrt_get_group_from_uuid(&event_msg->group_uuid);
    if (!group)
    {
	exalog_debug("group " UUID_FMT " not found",
                     UUID_VAL(&event_msg->group_uuid));
	return -VRT_ERR_UNKNOWN_GROUP_UUID;
    }
    switch(event_msg->event)
    {
    case VRT_GROUP_RESUME:
	retval = vrt_group_resume(group);
	break;

    case VRT_GROUP_SUSPEND_METADATA_AND_REBUILD:
        vrt_group_metadata_thread_suspend(group);
        vrt_group_rebuild_thread_suspend(group);
        retval = 0;
	break;

    case VRT_GROUP_RESUME_METADATA_AND_REBUILD:
        vrt_group_metadata_thread_resume(group);
        vrt_group_rebuild_thread_resume(group);
        retval = 0;
	break;

    case VRT_GROUP_COMPUTESTATUS:
	retval = vrt_group_compute_status(group);
	break;

    case VRT_GROUP_WAIT_INITIALIZED_REQUESTS:
	vrt_group_wait_initialized_requests (group);
	retval = EXA_SUCCESS;
	break;

    case VRT_GROUP_POSTRESYNC:
	retval = vrt_group_post_resync(group);
	break;

    default :
	EXA_ASSERT_VERBOSE(0, "struct VrtGroupEvent: Unknown event type %d\n",
			   event_msg->event);
    }

    vrt_group_unref(group);

    return retval;
}
示例#6
0
void exa_rdev_static_clean(rdev_static_op_t op)
{
    /* Initialization not performed, nothing to clean */
    if (init_op == RDEV_STATIC_OP_INVALID)
        return;

    EXA_ASSERT_VERBOSE(op == RDEV_STATIC_RELEASE || op == RDEV_STATIC_DELETE,
               "invalid static clean op: %d", op);

    if (op == RDEV_STATIC_DELETE)
    {
        EXA_ASSERT_VERBOSE(init_op == RDEV_STATIC_CREATE,
                           "deletion of static data by non-owner");
    }
    else /* RDEV_STATIC_RELEASE */
    {
        EXA_ASSERT_VERBOSE(init_op == RDEV_STATIC_GET,
                           "release of static data by owner");
    }

    init_op = RDEV_STATIC_OP_INVALID;
}
示例#7
0
void exa_select_delete_handle(exa_select_handle_t *h)
{
    if (h == NULL)
        return;

#if WIN32
    EXA_ASSERT_VERBOSE(h->magic == EXA_SELECT_MAGIC,
                       "Corrupted handle detected %d", h->magic);
#else
    close(h->fd);
#endif

    os_free(h);
}
示例#8
0
/* parts to handle devices events, cases REBUILD and RECOVER are
   missing */
static int
vrt_cmd_device_event(const struct VrtDeviceEvent *event_msg)
{
    int retval = -EINVAL;
    struct vrt_group *group;
    struct vrt_realdev *rdev;

    group = vrt_get_group_from_uuid(&event_msg->group_uuid);
    if (!group)
    {
	exalog_debug("group " UUID_FMT " not found",
                     UUID_VAL(&event_msg->group_uuid));
	return -VRT_ERR_UNKNOWN_GROUP_UUID;
    }

    rdev = storage_get_rdev(group->storage, &event_msg->rdev_uuid);
    if (!rdev)
    {
	exalog_debug("rdev " UUID_FMT " not found", UUID_VAL(&event_msg->rdev_uuid));
	return -VRT_ERR_OLD_RDEVS_MISSING;
    }

    switch(event_msg->event)
    {
    case VRT_DEVICE_DOWN:
	retval = vrt_group_rdev_down(group, rdev);
	break;

    case VRT_DEVICE_UP:
	retval = vrt_group_rdev_up(group, rdev);
	break;

    case VRT_DEVICE_REINTEGRATE:
	retval = vrt_group_reintegrate_rdev(group, rdev);
	break;

    case VRT_DEVICE_POST_REINTEGRATE:
	retval = vrt_group_post_reintegrate_rdev(group, rdev);
	break;

    default :
	EXA_ASSERT_VERBOSE(0, "struct VrtDeviceEvent: Unknown event type %d\n",
			   event_msg->event);
    }

    vrt_group_unref(group);

    return retval;
}
示例#9
0
/*-------------------------------------------------------------------------*/
static void
close_connection(int connection_id)
{
  int cli_fd = connectlist[connection_id].fd;

  EXA_ASSERT_VERBOSE(cli_fd > 2 , "fd = %d", cli_fd);

  FD_CLR(cli_fd, &setSocks);
  os_closesocket(cli_fd);
  exalog_debug("CONNECTION: %d  Socket %d closed state_work NOT_USED",
	       connection_id, cli_fd);

  /* Do not reset uid and free field because there may be a command still
   * running on the worker thread. Those fields will be reset when it ends. */
  connectlist[connection_id].fd = -1;
}
示例#10
0
/**
 * Main loop.
 */
static void
loop(void)
{
  static struct timespec last_time;

  __trace("marking self as alive");

  os_get_monotonic_time(&last_time);

  /* We *always* see ourself */
  mark_alive(self);
  do_ping = true;

  while (!csupd_quit())
    {
      struct timespec now;
      sup_ping_t ping;

      os_get_monotonic_time(&now);

      /* if the node was detected as frozen for more than half a ping_timeout
       * we abort because this behaviour is not acceptable (byzantine) */
      EXA_ASSERT_VERBOSE(
	     difftime(now.tv_sec, last_time.tv_sec) <= (ping_timeout + 1) / 2,
	     "Node frozen during '%lu' seconds. Aborting",
	     (unsigned long)difftime(now.tv_sec, last_time.tv_sec));
      last_time = now;

      if (do_ping)
	{
	  do_ping = false;

	  check_admind();

	  sup_pre_ping();
	  sup_send_ping(&cluster, &self->view);
	  sup_post_ping();
	}

      /* wait for an event and process it */
      if (sup_recv_ping(&ping))
        sup_process_ping(&ping);
    }

  sup_view_debug(&self->view);
  __trace("I am seen down, bye bye");
}
示例#11
0
/**
 * Deliver to Evmgr the membership calculated by Csupd.
 *
 * \param[in] gen    Generation number
 * \param[in] mship  Membership to deliver
 *
 * \return 0 if successfull, negative error code otherwise
 */
int
sup_deliver(sup_gen_t gen, const exa_nodeset_t *mship)
{
  int ret;
  SupEventMshipChange msg;

  msg.any.type = EXAMSG_SUP_MSHIP_CHANGE;
  msg.gen = gen;
  exa_nodeset_copy(&msg.mship, mship);

  ret = examsgSendNoBlock(sup_mh, EXAMSG_ADMIND_EVMGR_ID, EXAMSG_LOCALHOST, &msg,
	                  sizeof(SupEventMshipChange));
  EXA_ASSERT_VERBOSE(ret == sizeof(SupEventMshipChange),
                     "Unable to deliver membership to the evmgr (%d)", ret);

  return 0;
}
示例#12
0
void assembly_volume_map_sector_to_slot(const assembly_volume_t *av, uint64_t slot_size,
                                        uint64_t vsector,
                                        unsigned int *slot_index,
                                        uint64_t *offset_in_slot)
{
    uint64_t volume_slot_index;

    EXA_ASSERT_VERBOSE(vsector < av->total_slots_count * slot_size,
                       "vsector=%" PRIu64 ", av->total_slots_count=%" PRIu64 ", slot_size=%" PRIu64 "\n",
                       vsector, av->total_slots_count, slot_size);

    /* Compute the index of the slot in the volume slot array */
    volume_slot_index = vsector / slot_size;
    EXA_ASSERT(volume_slot_index < av->total_slots_count);

    /* Compute the offset in the slot */
    *offset_in_slot = vsector % slot_size;

    *slot_index = volume_slot_index;
}
示例#13
0
int vrt_node_get_upnode_id(void)
{
    int upnode_id = -1;
    exa_nodeid_t node;

    for (node = 0 ; node < EXA_MAX_NODES_NUMBER ; node++)
    {
	if (exa_nodeset_contains(&nodes_up, node))
	{
	    upnode_id++;
	    if (node == vrt_node_id)
		return upnode_id;
	}
    }

    EXA_ASSERT_VERBOSE(false,
		       "Upnode ID not found. upnode_id=%d vrt_node_get_upnodes_count=%d vrt_node_id=%d",
		       upnode_id,
		       vrt_node_get_upnodes_count(),
		       vrt_node_id);
    return 0;
}
示例#14
0
static void set_peer_socket(exa_nodeid_t node_id, const char *ip_addr, int sock)
{
    peer_t *peer;

    exalog_debug("setting socket of peer %"PRInodeid": %d '%s'", node_id, sock, ip_addr);

    os_thread_mutex_lock(&peers_lock);

    peer = &peers[node_id];

    EXA_ASSERT(peer->sock == -1);
    /* A node's IP address is not supposed to change during the lifetime of a
       cluster (ie, the node id <-> IP address mapping is bijective), so we
       assert if the received IP doesn't match the one registered */
    EXA_ASSERT_VERBOSE(strcmp(ip_addr, peer->ip_addr) == 0,
                       "peer %"PRInodeid": received addr %s, expected %s",
                       node_id, ip_addr, peer->ip_addr);

    peer->sock = sock;

    os_thread_mutex_unlock(&peers_lock);
}
示例#15
0
void vrt_cmd_handle_message(const vrt_cmd_t *recv, vrt_reply_t *reply)
{

    EXA_ASSERT_VERBOSE(VRTRECV_TYPE_IS_VALID(recv->type),
		       "Data type %d is unknown.", recv->type);
    switch (recv->type)
    {
    case VRTRECV_NODE_SET_UPNODES:
	reply->retval = vrt_cmd_node_set_upnodes(&recv->d.vrt_set_nodes_status);
	break;

    case VRTRECV_DEVICE_EVENT:
	reply->retval = vrt_cmd_device_event(&recv->d.vrt_device_event);
	break;

    case VRTRECV_DEVICE_REPLACE:
        reply->retval = vrt_cmd_device_replace(&recv->d.vrt_device_replace);
        break;

    case VRTRECV_GET_VOLUME_STATUS:
	reply->retval = vrt_cmd_get_volume_status(&recv->d.vrt_get_volume_status);
	break;

    case VRTRECV_GROUP_ADD_RDEV:
	reply->retval = vrt_cmd_group_add_rdev(&recv->d.vrt_group_add_rdev);
	break;

    case VRTRECV_GROUP_BEGIN:
	reply->retval = vrt_cmd_group_begin(&recv->d.vrt_group_begin);
	break;

    case VRTRECV_GROUP_CREATE:
	reply->retval = vrt_cmd_group_create(&recv->d.vrt_group_create, &reply->group_create);
	break;

    case VRTRECV_GROUP_EVENT:
	reply->retval = vrt_cmd_group_event(&recv->d.vrt_group_event);
	break;

    case VRTRECV_GROUP_START:
	reply->retval = vrt_cmd_group_start(&recv->d.vrt_group_start);
	break;

    case VRTRECV_GROUP_STOP:
	reply->retval = vrt_cmd_group_stop(&recv->d.vrt_group_stop);
	break;

    case VRTRECV_GROUP_INSERT_RDEV:
	reply->retval = vrt_cmd_group_insert_rdev(&recv->d.vrt_group_insert_rdev);
	break;

    case VRTRECV_GROUP_STOPPABLE:
	reply->retval = vrt_cmd_group_stoppable(&recv->d.vrt_group_stoppable);
	break;

    case VRTRECV_GROUP_GOING_OFFLINE:
	reply->retval = vrt_cmd_group_going_offline(&recv->d.vrt_group_going_offline);
	break;

    case VRTRECV_GROUP_SYNC_SB:
	reply->retval = vrt_cmd_group_sync_sb(&recv->d.vrt_group_sync_sb);
	break;

    case VRTRECV_GROUP_FREEZE:
	reply->retval = vrt_cmd_group_freeze(&recv->d.vrt_group_freeze);
	break;

    case VRTRECV_GROUP_UNFREEZE:
	/* group unfreeze has been catched by the multiplexer thread */
	EXA_ASSERT(FALSE);
	break;

    case VRTRECV_VOLUME_CREATE:
	reply->retval = vrt_cmd_volume_create(& recv->d.vrt_volume_create);
	break;

    case VRTRECV_VOLUME_DELETE:
	reply->retval = vrt_cmd_volume_delete(& recv->d.vrt_volume_delete);
	break;

    case VRTRECV_VOLUME_RESIZE:
	reply->retval = vrt_cmd_volume_resize(& recv->d.vrt_volume_resize);
	break;

    case VRTRECV_VOLUME_START:
	reply->retval = vrt_cmd_volume_start(& recv->d.vrt_volume_start);
	break;

    case VRTRECV_VOLUME_STOP:
	reply->retval = vrt_cmd_volume_stop(& recv->d.vrt_volume_stop);
	break;

    case VRTRECV_PENDING_GROUP_CLEANUP:
	reply->retval = vrt_cmd_pending_group_cleanup();
	break;

    case VRTRECV_GROUP_RESET:
	reply->retval = vrt_cmd_group_reset(& recv->d.vrt_group_reset);
	break;

    case VRTRECV_GROUP_CHECK:
	reply->retval = vrt_cmd_group_check(& recv->d.vrt_group_check);
	break;

    case VRTRECV_GROUP_RESYNC:
	reply->retval = vrt_cmd_group_resync(&recv->d.vrt_group_resync);
	break;

    case VRTRECV_ASK_INFO:
    case VRTRECV_STATS:
	EXA_ASSERT_VERBOSE(FALSE,
		"Type %s (%d) Should not be handled by this thread\n",
		recv->type == VRTRECV_STATS ?  "stats" : "info", recv->type);
    }
}
示例#16
0
int adm_vrt_group_sync_sb(int thr_nb, struct adm_group *group)
{
  struct {
    bool group_is_started;
    bool can_write;
    bool have_disk_in_group;
  } info, reply;

  exa_nodeid_t nid;
  bool group_is_started_somewhere = false;
  int ret;
  int barrier_ret = EXA_SUCCESS;
  admwrk_request_t rpc;
  struct adm_disk *disk;
  int nb_nodes_with_writable_disks = 0;
  int nb_nodes_with_disks_in_group = 0;

  uint64_t old_sb_version, new_sb_version;

  COMPILE_TIME_ASSERT(sizeof(info) <= ADM_MAILBOX_PAYLOAD_PER_NODE);

  /* XXX maybe checking started is useless as administrable => started
   * and !administrable => return */
  info.group_is_started = group->started;
  info.can_write = false;
  info.have_disk_in_group = false;

  adm_group_for_each_disk(group, disk)
  {
      if (disk->node_id == adm_my_id)
      {
          info.have_disk_in_group = true;

          if (disk->up_in_vrt)
              info.can_write = true;
      }
  }

  admwrk_bcast(thr_nb, &rpc, EXAMSG_SERVICE_VRT_SB_SYNC, &info, sizeof(info));
  while (admwrk_get_bcast(&rpc, &nid, &reply, sizeof(reply), &ret))
  {
    if (ret == -ADMIND_ERR_NODE_DOWN)
    {
      barrier_ret = -ADMIND_ERR_NODE_DOWN;
      continue;
    }

    EXA_ASSERT(ret == EXA_SUCCESS);

    if (reply.can_write)
        nb_nodes_with_writable_disks++;

    if (reply.have_disk_in_group)
        nb_nodes_with_disks_in_group++;

    if (reply.group_is_started)
        group_is_started_somewhere = true;
  }

  if (barrier_ret != EXA_SUCCESS)
    return barrier_ret;

  /* do not write superblocks if the group is stopped on all nodes */
  if (!group_is_started_somewhere)
    return EXA_SUCCESS;

  if (nb_nodes_with_writable_disks < quotient_ceil64(nb_nodes_with_disks_in_group, 2))
      return -VRT_ERR_GROUP_NOT_ADMINISTRABLE;

  old_sb_version = sb_version_get_version(group->sb_version);
  new_sb_version = sb_version_new_version_prepare(group->sb_version);

  if (group->started)
  {
      ret = vrt_client_group_sync_sb(adm_wt_get_localmb(),
                                     &group->uuid, old_sb_version, new_sb_version);

      EXA_ASSERT_VERBOSE(ret == EXA_SUCCESS || ret == -ADMIND_ERR_NODE_DOWN,
                         "Synchronization of superblocks failed for group '%s' "
                         "UUID=" UUID_FMT ": %s (%d)", group->name,
                         UUID_VAL(&group->uuid), exa_error_msg(ret), ret);
  }
  else
      ret = EXA_SUCCESS;

  barrier_ret = admwrk_barrier(thr_nb, ret, "VRT: Preparing superblocks version");
  if (barrier_ret != EXA_SUCCESS)
    return barrier_ret;

  sb_version_new_version_done(group->sb_version);

  barrier_ret = admwrk_barrier(thr_nb, EXA_SUCCESS, "VRT: Writing superblocks version");

  /* Commit anyway, If we are here, we are sure that other nodes have done the
   * job too even if they crashed meanwhile */
  sb_version_new_version_commit(group->sb_version);

  return barrier_ret;
}
示例#17
0
static void
local_exa_vldelete (int thr_nb, void *msg)
{
  struct adm_group *group;
  struct adm_volume *volume = NULL;
  int ret;         /* used for local function calls */
  int barrier_ret; /* used for barriers return values */
  int undo_ret;
  struct vldelete_info *info = msg;

  group = adm_group_get_group_by_name(info->group_name);
  if (group == NULL)
  {
    ret = -ADMIND_ERR_UNKNOWN_GROUPNAME;
    goto get_barrier;
  }

  volume = adm_group_get_volume_by_name(group, info->volume_name);
  if (volume == NULL)
  {
    ret = -ADMIND_ERR_UNKNOWN_VOLUMENAME;
    goto get_barrier;
  }

get_barrier:
  /*** Barrier: getting parameters ***/
  ret = EXA_SUCCESS;
  barrier_ret = admwrk_barrier(thr_nb, ret, "Getting parameters");
  if (barrier_ret != EXA_SUCCESS)
    goto local_exa_vldelete_end_no_resume;

  ret = vrt_group_suspend_threads_barrier(thr_nb, &group->uuid);
  if (ret != EXA_SUCCESS)
      goto local_exa_vldelete_end;

  /*** Action: mark the transaction as in-progress ***/
  /* This is an in-memory operation, we assume it won't fail */
  volume->committed = false;
  ret = conf_save_synchronous();
  EXA_ASSERT_VERBOSE(ret == EXA_SUCCESS, "%s", exa_error_msg(ret));

  /*** Barrier: mark the transaction as in-progress ***/
  barrier_ret = admwrk_barrier(thr_nb, ret, "Marking transaction as in-progress");
  if (barrier_ret == -ADMIND_ERR_NODE_DOWN)
    goto metadata_corruption;
  else if (barrier_ret != EXA_SUCCESS)
    goto local_exa_vldelete_end;

  /* XXX should errors be handled ? */
  lum_exports_remove_export_from_uuid(&volume->uuid);
  lum_exports_increment_version();
  lum_serialize_exports();

  /*** Action: delete the volume (in memory) through the VRT API ***/
  ret = vrt_client_volume_delete(adm_wt_get_localmb(), &group->uuid, &volume->uuid);

  /*** Barrier: delete the volume through the VRT API ***/
  barrier_ret = admwrk_barrier(thr_nb, ret, "Deleting volume");
  if (barrier_ret == -ADMIND_ERR_NODE_DOWN)
    goto metadata_corruption;
  else if (barrier_ret == -VRT_ERR_GROUP_NOT_ADMINISTRABLE)
    {
      /* Mark the transaction as committed, so that the volume is not
       * shown as "invalid" later.
       */
      volume->committed = true;
      undo_ret = conf_save_synchronous();
      EXA_ASSERT_VERBOSE(undo_ret == EXA_SUCCESS, "%s", exa_error_msg(undo_ret));
      goto local_exa_vldelete_end;
    }
  else if ((barrier_ret != EXA_SUCCESS) && !info->metadata_recovery)
    goto local_exa_vldelete_end;

  /*** Action: group sync SB (master) ***/
  ret = adm_vrt_group_sync_sb(thr_nb, group);

  /*** Barrier: group sync SB ***/
  barrier_ret = admwrk_barrier(thr_nb, ret,
			       "Syncing metadata on disk");
  if (barrier_ret == -ADMIND_ERR_NODE_DOWN)
    goto metadata_corruption;
  else if (barrier_ret != EXA_SUCCESS)
    goto local_exa_vldelete_end;

  /* Delete the volume from the configuration */
  adm_group_remove_volume(volume);
  adm_volume_free(volume);
  ret = conf_save_synchronous();
  EXA_ASSERT_VERBOSE(ret == EXA_SUCCESS, "%s", exa_error_msg(ret));

  barrier_ret = admwrk_barrier(thr_nb, ret, "Updating XML configuration");
  if (barrier_ret == -ADMIND_ERR_NODE_DOWN)
    goto metadata_corruption;

  goto local_exa_vldelete_end;

metadata_corruption:
  ret = -ADMIND_ERR_METADATA_CORRUPTION;

local_exa_vldelete_end:
    barrier_ret = vrt_group_resume_threads_barrier(thr_nb, &group->uuid);
    /* What to do if that fails... I don't know. */
    if (barrier_ret != 0)
        ret = barrier_ret;

local_exa_vldelete_end_no_resume:
  exalog_debug("local_exa_vldelete() = %s", exa_error_msg(ret));
  admwrk_ack(thr_nb, ret);
}
示例#18
0
static void
check_internal_msg(void)
{
  struct timeval timeout = { .tv_sec = 0, .tv_usec = EXAMSG_TIMEOUT };
  static Examsg msg;
  command_end_t *end;
  int i, ret;

  ret = examsgWaitTimeout(cli_mh, &timeout);

  if (ret < 0 && ret != -ETIME)
    {
      exalog_error("Message wait failed %s (%d)",
	           exa_error_msg(ret), ret);
      return;
    }

  if (ret == -ETIME)
    return;

  ret = examsgRecv(cli_mh, NULL, &msg, sizeof(msg));
  if (ret == 0)
    return;

  EXA_ASSERT_VERBOSE(ret > 0, "Message receive failed: %s (%d)",
                     exa_error_msg(ret), ret);

  if (ret < 0)
    exalog_error("Message receive failed: %s (%d)",
	         exa_error_msg(ret), ret);

  /* The CLI server can only receive EXAMSG_ADM_CLUSTER_CMD_END messages for now */
  EXA_ASSERT(msg.any.type == EXAMSG_ADM_CLUSTER_CMD_END);

  end = (command_end_t *)msg.payload;
  for (i = 0; i < MAX_CONNECTION; i++)
    if (end->cuid == connectlist[i].uid)
      {
	cli_command_end_complete(connectlist[i].fd, &end->err_desc);
	connectlist[i].uid = CMD_UID_INVALID;
	break;
      }
  EXA_ASSERT(i < MAX_CONNECTION);
}

static void
check_tcp_connection(void)
{
  static struct timeval timeout = { .tv_sec = 0, .tv_usec = 0 };
  fd_set setSave = setSocks;
  int ret, conn_id;

  do
    ret = os_select(FD_SETSIZE, &setSave, NULL,  NULL, &timeout);
  while (ret == -EINTR);

  if (ret < 0)
    {
      /* FIXME should assert ? */
      exalog_debug("Select failed %m");
      return;
    }

  /* Check working sockets */
  for (conn_id = 0; conn_id < MAX_CONNECTION; ++conn_id)
    {
      int sock_fd = connectlist[conn_id].fd;
      if (sock_fd >= 0 && FD_ISSET(sock_fd, &setSave))
	handle_inputdata(conn_id, sock_fd);
    }

  /* Must be done at the end to make sure messages for current
   * working threads are processed first */
  if (FD_ISSET(listen_fd, &setSave))
    accept_new_client();
}

/*-------------------------------------------------------------------------*/
/** \brief Connection thread: wait on xml message and pass the command
 * to the work thread.
 *
 * \param[in] sock_xml: socket xml on which it receives commands.
 *
 */
/*-------------------------------------------------------------------------*/
static void
cli_server(void *data)
{
  int i;

  /* Initialize exalog */
  exalog_as(EXAMSG_ADMIND_ID);
  exalog_debug("cli_server: started");

  /* Initialization */
  FD_ZERO(&setSocks);
  FD_SET(listen_fd, &setSocks);

  for (i = 0; i < MAX_CONNECTION; i++)
    {
      connectlist[i].fd  = -1;
      /* A command cannot be CMD_UID_INVALID, so CMD_UID_INVALID means here
       * no command running */
      connectlist[i].uid = CMD_UID_INVALID;
    }

  while (!stop)
    {
      check_tcp_connection();
      check_internal_msg();
    }

  os_closesocket(listen_fd);

  os_net_cleanup();

  examsgDelMbox(cli_mh, EXAMSG_ADMIND_CLISERVER_ID);
  examsgExit(cli_mh);
}

int
cli_server_start(void)
{
  listen_fd = listen_socket_port(ADMIND_SOCKET_PORT);
  if (listen_fd < 0)
    return listen_fd;

  cli_mh = examsgInit(EXAMSG_ADMIND_CLISERVER_ID);
  if (!cli_mh)
    return -EINVAL;

  /* The mailbox needs to be able to receive command end messages from the
   * event manager; as there can be at most MAX_CONNECTION client connections
   * we can receive at the time at most 10 command end messages. */
  examsgAddMbox(cli_mh, EXAMSG_ADMIND_CLISERVER_ID, MAX_CONNECTION,
	        sizeof(command_end_t));

  stop = false;

  if (!exathread_create_named(&thr_xml_proto,
                              ADMIND_THREAD_STACK_SIZE+MIN_THREAD_STACK_SIZE,
                              &cli_server, NULL, "exa_adm_xml"))
      return -EXA_ERR_DEFAULT;

  return EXA_SUCCESS;
}
示例#19
0
/**
 * Process a connection that has incoming data.
 *
 * \param[in] conn_id  Connection id
 * \param[in] sock_fd  Connection socket
 */
static void
handle_inputdata(int conn_id, int sock_fd)
{
  char *buffer = NULL;
  void *data = NULL;
  size_t data_size;
  adm_command_code_t cmd_code;
  const struct AdmCommand *command;
  exa_uuid_t cluster_uuid;
  cl_error_desc_t err_desc;
  int retval;

  /* Receive the xml tree parsed in message */
  retval = receive(sock_fd, &buffer);
  if (retval < 0)
    {
      if (retval == -ECONNRESET || retval == -ECONNABORTED)
	exalog_debug("CONNECTION %d: An error occurred : %d [%s]",
		     conn_id, retval, exa_error_msg(retval));
      else
	exalog_error("Socket %d peer '%s': An error occurred : %s (%d)",
		     sock_fd, cli_peer_from_fd(sock_fd), exa_error_msg(retval),
		     retval);

      close_connection(conn_id);
      return;
    }

  /* Parse the tree we just received and get a newly allocated payload data */
  xml_command_parse(buffer, &cmd_code, &cluster_uuid,
                    &data, &data_size, &err_desc);

  /* buffer is now parsed, let's free it */
  os_free(buffer);

  if (err_desc.code != EXA_SUCCESS)
    {
      /* No need to free data buffer if parsing returned an error */
      exalog_error("Failed to parse command on socket %d (from peer '%s'): %s (%d)",
	           sock_fd, cli_peer_from_fd(sock_fd), err_desc.msg, err_desc.code);

      cli_command_end_complete(sock_fd, &err_desc);
      return;
    }

  command = adm_command_find(cmd_code);
  EXA_ASSERT_VERBOSE(command, "Missing implementation of command #%d", cmd_code);

  connectlist[conn_id].uid = get_new_cmd_uid();
  retval = send_command_to_evmgr(connectlist[conn_id].uid,
                                 command,
				 &cluster_uuid, data, data_size);
  if (retval < 0)
    {
      if (retval == -EXA_ERR_ADM_BUSY)
        exalog_warning("Running command %s (request from %s) failed: %s",
                       adm_command_find(cmd_code)->msg,
                       cli_get_peername(connectlist[conn_id].uid),
                       exa_error_msg(retval));
      else
        exalog_error("Running command %s (request from %s) failed: %s (%d)",
                     adm_command_find(cmd_code)->msg,
		     cli_get_peername(connectlist[conn_id].uid),
                     exa_error_msg(retval), retval);

      set_error(&err_desc, retval, NULL);
      cli_command_end_complete(sock_fd, &err_desc);

      /* the command was not scheduled, reset the uid */
      connectlist[conn_id].uid = CMD_UID_INVALID;
    }

  os_free(data);
}
示例#20
0
void rebuild_helper_thread(void *p)
{
  ExamsgHandle mh;
  int err;

  exalog_as(EXAMSG_NBD_SERVER_ID);

  /* initialize examsg framework */
  mh = examsgInit(EXAMSG_NBD_LOCKING_ID);
  EXA_ASSERT(mh != NULL);

  err = examsgAddMbox(mh, EXAMSG_NBD_LOCKING_ID, 1, 5 * EXAMSG_MSG_MAX);
  EXA_ASSERT(err == 0);

  os_sem_post(&nbd_server.mailbox_sem);

  while (nbd_server.run)
  {
      device_t *device;
      ExamsgNbdLock nbd_lock_msg;
      ExamsgMID from;
      struct timeval timeout = { .tv_sec = 0, .tv_usec = 100000 };
      exa_nodeset_t dest_nodes;

      err = examsgWaitTimeout(mh, &timeout);
      /* Just in order to check stopping the thread is required*/
      if (err == -ETIME)
	  continue;

      if (err != 0)
      {
          exalog_error("Locking thread encountered error %s (%d) while "
                       "waiting in event loop.", exa_error_msg(err), err);
          continue;
      }

      err = examsgRecv(mh, &from, &nbd_lock_msg, sizeof(nbd_lock_msg));

      /* No message */
      if (err == 0)
	continue;

      if (err < 0)
      {
          exalog_error("Locking thread encountered error %s (%d) while "
                       "receiving a messsage.", exa_error_msg(err), err);
	  continue;
      }

      switch(nbd_lock_msg.any.type)
      {
      case EXAMSG_NBD_LOCK:
	  /* find device from name */
          /* FIXME devices lock is not held... it should */
          device = find_device_from_uuid(&nbd_lock_msg.disk_uuid);
	  if (device == NULL)
          {
              exalog_error("Unknown device with UUID " UUID_FMT, UUID_VAL(&nbd_lock_msg.disk_uuid));
              err = -CMD_EXP_ERR_UNKNOWN_DEVICE;
              break;
          }
          if (nbd_lock_msg.lock)
          {
              err = exa_disk_lock_zone(device, nbd_lock_msg.locked_zone_start,
                                          nbd_lock_msg.locked_zone_size);
              EXA_ASSERT_VERBOSE(err == 0, "Trying to lock too many zone "
                                 "(>%d). Last zone not succesfully locked "
                                 "(start = %" PRId64 ", size = %" PRId64 " ) "
                                 "on device UUID " UUID_FMT, NBMAX_DISK_LOCKED_ZONES,
                                 nbd_lock_msg.locked_zone_start,
                                 nbd_lock_msg.locked_zone_size,
                                 UUID_VAL(&nbd_lock_msg.disk_uuid));
          }
          else
          {
              err = exa_disk_unlock_zone(device, nbd_lock_msg.locked_zone_start,
                                            nbd_lock_msg.locked_zone_size);
              EXA_ASSERT_VERBOSE(err == 0, "Trying to unlock a never locked "
                                 "zone (unlocked zone start =%" PRId64 ", "
                                 "unlocked zone size = %" PRId64 ") on device"
                                 " UUID " UUID_FMT,
                                 nbd_lock_msg.locked_zone_start,
                                 nbd_lock_msg.locked_zone_size,
                                 UUID_VAL(&nbd_lock_msg.disk_uuid));
          }
	  break;

	default:
	  /* error */
	  EXA_ASSERT_VERBOSE(false, "Locking thread got unknown message of"
                             " type %d ", nbd_lock_msg.any.type);
	  break;
	}

      exa_nodeset_single(&dest_nodes, from.netid.node);
      examsgAckReply(mh, (Examsg *)&nbd_lock_msg, err, from.id, &dest_nodes);
    }

  examsgDelMbox(mh, EXAMSG_NBD_LOCKING_ID);
  examsgExit(mh);
}

/** get the number of sector of the device
 * \param device_path the device to get the number of sector
 * \param nb_sectors64 the number of sectors of the device
 * \return nb_sectors the returned number of sector
 */

static int get_nb_sectors(const char *device_path, uint64_t *nb_sectors)
{
  uint64_t device_size; /* in bytes */
  int retval;
  int fd;

  /* We need the read access to get the size. */
  if ((fd = os_disk_open_raw(device_path, OS_DISK_READ)) < 0)
  {
    exalog_error("cannot open device '%s'  error=%s ",
                 device_path, exa_error_msg(-fd));
    return -CMD_EXP_ERR_OPEN_DEVICE;
  }

  retval = os_disk_get_size(fd, &device_size);
  if (retval < 0)
  {
    exalog_error("os_disk_get_size() error=%s", exa_error_msg(retval));
    if (close(fd) != 0)
      exalog_error("can't EVEN close dev '%s'", device_path);
    return -EXA_ERR_IOCTL;
  }

  retval = close(fd);
  if (retval < 0)
  {
    retval = -errno;
    exalog_error("cannot close device '%s' error=%s ",
                 device_path, exa_error_msg(retval));
    return -CMD_EXP_ERR_CLOSE_DEVICE;
  }

  *nb_sectors = device_size / SECTOR_SIZE;

  /* remove the size of the reserved area for storing admind info */
  *nb_sectors -= RDEV_RESERVED_AREA_IN_SECTORS;

  /* Align the size on 1K
   * this is the best we can do to have the same size of devices on 2.4 and 2.6 kernels due to
   * the fact that kernel 2.4 rounds the size of devices with 1 K
   */
  *nb_sectors -= *nb_sectors % (1024 / SECTOR_SIZE);

  return EXA_SUCCESS;
}
示例#21
0
/**
 * Thread processing network events (coming from other nodes).
 *
 * This routine may set the network status to down as a side-effect
 * of calling network_recv(), and sets said status to up when the
 * network comes back.
 *
 * \param[in] dummy  Unused
 *
 * \return NULL
 */
static void
net_events_routine(void *dummy)
{
  int dest_mbox;
  ExamsgMID mid;
  size_t size;
  char *msg;
  int s;

  exalog_as(EXAMSG_CMSGD_ID);
  exalog_trace("network events routine started");

  while (!quit)
    {
      int status = network_status();
      bool retry;

      if (status == -ENETDOWN)
	{
	  network_waitup();
	  network_set_status(0);
	}

      do
	{
	  s = network_recv(net_mh, &mid, &msg, &size, &dest_mbox);
	  retry = (s < 0 && network_manageable(s) && s != -ENETDOWN);
	  if (retry)
	      os_sleep(1);
	}
      while (retry);

      /* Succeeded, the network status is ok */
      if (s > 0 && status != 0)
	network_set_status(0);

      if (s == 0 || s == -ENETDOWN)
	continue;

      EXA_ASSERT(s > 0);

      /* Ping from another node for keepalive */
      if (((ExamsgAny *)msg)->type == EXAMSG_PING)
	{
	  EXA_ASSERT(dest_mbox == EXAMSG_CMSGD_ID);
	  exalog_trace("received an EXAMSG_PING from %u:%s",
		       mid.netid.node, mid.host);
	  continue;
	}

      exalog_trace("delivering %" PRIzu " bytes to %d",
		   size, dest_mbox);

      s = examsgMboxSend(&mid, examsgOwner(net_mh), dest_mbox, msg, size);
      switch (s)
	{
	case -ENXIO:
         /* The mailbox does not exist (yet). This is not an error: csupd may
          * not be started yet and we receive an examsg for it.
          * XXX Doesn't sound too good to me, and we should at least check that
          * the destination is indeed csupd */
	  break;

	case -ENOSPC:
	  mailbox_full(dest_mbox, &mid, (Examsg *)msg);
	  break;

	default:
	  EXA_ASSERT_VERBOSE(s == size + sizeof(mid),
		             "Error %d delivering message to %d",
			     s, dest_mbox);
	  break;
	}
    }
}
static void persistent_register_lun(pr_context_t *context,
                                    int session_id, lun_t lun,
                                    pr_key_t service_action_key,
                                    scsi_command_status_t *scsi_status)
{
    int id;
    bool remove_registration = true;
    pr_info_t *pr_info = &context->pr_info[lun];

    EXA_ASSERT(session_id < MAX_GLOBAL_SESSION);

    exalog_debug("iSCSI PR: session %i register to LUN %" PRIlun " with key %" PRIu64,
                 session_id, lun, service_action_key);
    SCSI_STATUS_OK(scsi_status, 0);

    if (service_action_key != 0)
    {
        pr_registration_t *registration = pr_info_add_registration(pr_info,
                                                                   session_id,
                                                                   service_action_key);
        /* FIXME: manage the error */
        EXA_ASSERT_VERBOSE(registration != NULL,
                           "No more free registration for LUN %" PRIlun
                           ", (session=%i key=%" PRIu64 ")",
                           lun, session_id, service_action_key);

        return;
    }

    /* spc3r23 5.6.10.3 removing lun reservation */

    if (is_lun_reserved(context, lun))
    {
        if (pr_info->reservation_type == PR_TYPE_EXCLUSIVE_ACCESS_ALL_REGISTRANTS
            || pr_info->reservation_type == PR_TYPE_WRITE_EXCLUSIVE_ALL_REGISTRANTS)
        {
            unsigned int i;

            /* if we are not a holder, don't remove reservation */
            remove_registration = persistent_is_holder(context, session_id, lun);

            for (i = 0; i < MAX_REGISTRATIONS; i++)
            {
                id = pr_info->registrations[i].session_id;

                if (id != SESSION_ID_NONE && id != session_id && context->session_id_used[id])
                {
                     /* this registered nexus was not the last one */
                    remove_registration = false;
                }
            }
        }
        else
        {
            remove_registration = false;
            if (persistent_is_holder(context, session_id, lun))
                 remove_registration = true;

            if (pr_info->reservation_type == PR_TYPE_WRITE_EXCLUSIVE_REGISTRANTS_ONLY
                || pr_info->reservation_type == PR_TYPE_EXCLUSIVE_ACCESS_REGISTRANTS_ONLY)
            {
                unsigned int i;
                for (i = 0; i < MAX_REGISTRATIONS; i++)
                {
                    id = pr_info->registrations[i].session_id;

                    if (id != SESSION_ID_NONE && context->session_id_used[id] && id != session_id)
                    {
                        callback_send_sense_data(context, id, lun, SCSI_STATUS_CHECK_CONDITION,
                                                 SCSI_SENSE_UNIT_ATTENTION,
                                                 SCSI_SENSE_ASC_RESERVATIONS_RELEASED);
                    }
                }
            }
        }

        if (remove_registration)
        {
            pr_info->reservation_type = PR_TYPE_NONE;
            pr_info_clear_registrations(pr_info);
        }
    }

    pr_info_del_registration(pr_info, session_id);
}