示例#1
0
static void rain1_update_sync_tag(rain1_group_t *rxg)
{
    rain1_realdev_t *lr;
    int i;
    sync_tag_t new_tag = sync_tag_inc(rxg->sync_tag);

    foreach_rainx_rdev(rxg, lr, i)
    {
        /* The uptodate devices that are still UP must have their tag
         * incremented as well */
        if (rdev_is_up(lr->rdev)
            && sync_tag_is_equal(lr->sync_tag, rxg->sync_tag))
        {
            exalog_debug("Marking rdev "UUID_FMT" with new sync_tag %"PRIsync_tag,
                        UUID_VAL(&lr->uuid), new_tag);
            lr->sync_tag = new_tag;
        }
        else
            exalog_debug("NOT marking rdev "UUID_FMT" with new sync_tag.",
                        UUID_VAL(&lr->uuid));

        /* The device that are so outdated that their tag is not even
         * comparable with the new tag are considered as blank.
         * It avoids that a very outdated device magically becomes uptodate
         * due to the tag wraping.
         */
        if (!sync_tags_are_comparable(lr->sync_tag, new_tag))
            lr->sync_tag = SYNC_TAG_BLANK;
    }

   exalog_debug("Marking group with new sync_tag %"PRIsync_tag, new_tag);
   rxg->sync_tag = new_tag;
}
示例#2
0
/**
 * Stop an active group
 *
 * @param[in] params The parsed command array
 *
 * The real parameters passed in the array are:
 *  - UUID of the group to stop
 *
 * @return 0 on success, a negative error code on failure
 */
static int
vrt_cmd_group_stop(const struct VrtGroupStop *cmd)
{
    struct vrt_group *group;
    int ret;

    exalog_debug("stop group " UUID_FMT, UUID_VAL(&cmd->group_uuid));

    group = vrt_get_group_from_uuid(&cmd->group_uuid);
    if (!group)
    {
	exalog_debug("unknown group " UUID_FMT, UUID_VAL(&cmd->group_uuid));
	return -VRT_ERR_GROUP_NOT_STARTED;
    }

    /* removing group from group list prevent other user to be able to get
     * new references on this group */
    vrt_groups_list_del(group);

    ret = vrt_group_stop(group);
    if (ret != EXA_SUCCESS)
    {
	vrt_group_unref(group);
	return ret;
    }

    /* No need to call (and can't anyway) vrt_group_unref() because the
       group has been freed */

    return EXA_SUCCESS;
}
示例#3
0
/**
 * Start a volume.
 *
 * @param[in] cmd       : A struct VrtVolumeStart
 *
 * @return EXA_SUCCESS or a negative error code
 */
static int vrt_cmd_volume_start(const struct VrtVolumeStart *cmd)
{
    struct vrt_group *group;
    struct vrt_volume *volume;
    int error;

    group = vrt_get_group_from_uuid(&cmd->group_uuid);
    if (!group)
	return -VRT_ERR_GROUP_NOT_STARTED;

    volume = vrt_group_find_volume(group, &cmd->volume_uuid);
    if (! volume)
    {
	exalog_debug("volume '" UUID_FMT "' not found in group '" UUID_FMT "'",
                     UUID_VAL(&cmd->volume_uuid), UUID_VAL(&group->uuid));
	vrt_group_unref(group);
	return -VRT_ERR_UNKNOWN_VOLUME_UUID;
    }

    error = vrt_volume_start(volume);

    vrt_group_unref(group);

    return error;
}
示例#4
0
/* parts to handle devices events, cases REBUILD and RECOVER are
   missing */
static int
vrt_cmd_device_event(const struct VrtDeviceEvent *event_msg)
{
    int retval = -EINVAL;
    struct vrt_group *group;
    struct vrt_realdev *rdev;

    group = vrt_get_group_from_uuid(&event_msg->group_uuid);
    if (!group)
    {
	exalog_debug("group " UUID_FMT " not found",
                     UUID_VAL(&event_msg->group_uuid));
	return -VRT_ERR_UNKNOWN_GROUP_UUID;
    }

    rdev = storage_get_rdev(group->storage, &event_msg->rdev_uuid);
    if (!rdev)
    {
	exalog_debug("rdev " UUID_FMT " not found", UUID_VAL(&event_msg->rdev_uuid));
	return -VRT_ERR_OLD_RDEVS_MISSING;
    }

    switch(event_msg->event)
    {
    case VRT_DEVICE_DOWN:
	retval = vrt_group_rdev_down(group, rdev);
	break;

    case VRT_DEVICE_UP:
	retval = vrt_group_rdev_up(group, rdev);
	break;

    case VRT_DEVICE_REINTEGRATE:
	retval = vrt_group_reintegrate_rdev(group, rdev);
	break;

    case VRT_DEVICE_POST_REINTEGRATE:
	retval = vrt_group_post_reintegrate_rdev(group, rdev);
	break;

    default :
	EXA_ASSERT_VERBOSE(0, "struct VrtDeviceEvent: Unknown event type %d\n",
			   event_msg->event);
    }

    vrt_group_unref(group);

    return retval;
}
示例#5
0
/**
 * Unfreeze a group: resume IO previously blocked by freeze
 *
 * @param[in] cmd       The msg containing parameters for freezing the group
 *
 * @return 0 on success, a negative error code on failure
 */
int
vrt_cmd_group_unfreeze(const struct VrtGroupUnfreeze *cmd)
{
    struct vrt_group *group;
    int i;

    group = vrt_get_group_from_uuid(&cmd->group_uuid);
    if(!group)
    {
	exalog_debug("Unknown group " UUID_FMT, UUID_VAL(&cmd->group_uuid));
	return -VRT_ERR_UNKNOWN_GROUP_UUID;
    }

    for (i = 0 ; i < NBMAX_VOLUMES_PER_GROUP ; i++)
    {
	struct vrt_volume *volume = group->volumes[i];

	if (! volume)
	    continue;

	volume->frozen = FALSE;
	wake_up_all(&volume->frozen_req_wq);
    }

    vrt_group_unref(group);

    return EXA_SUCCESS;
}
示例#6
0
/**
 * Freeze a group: block incoming IO and wait current IO to be finished
 *
 * @param[in] cmd       The msg containing parameters for freezing the group
 *
 * @return 0 on success, a negative error code on failure
 */
static int
vrt_cmd_group_freeze(const struct VrtGroupFreeze *cmd)
{
    struct vrt_group *group;
    int i;

    group = vrt_get_group_from_uuid(&cmd->group_uuid);
    if(!group)
    {
	exalog_debug("Unknown group " UUID_FMT, UUID_VAL(&cmd->group_uuid));
	return -VRT_ERR_UNKNOWN_GROUP_UUID;
    }

    for (i = 0 ; i < NBMAX_VOLUMES_PER_GROUP ; i++)
    {
	struct vrt_volume *volume = group->volumes[i];

	if (! volume)
	    continue;

	volume->frozen = TRUE;
	wait_event(volume->cmd_wq,
		   os_atomic_read(&volume->inprogress_request_count) == 0);
    }

    vrt_group_unref(group);

    return EXA_SUCCESS;
}
示例#7
0
/**
 * Create a volume in a given group
 *
 * @param[in] params The parsed command array
 *
 * The real parameters passed in the array are:
 *  - UUID of the group in which the volume has to be created
 *  - Name of the volume to create
 *  - UUID of the volume to create
 *  - Size of the volume to create (in KB)
 *
 * @return 0 on success, a negative error code on failure
 */
static int
vrt_cmd_volume_create(const struct VrtVolumeCreate *cmd)
{
    vrt_group_t *group;
    vrt_volume_t *volume;
    int ret;

    EXA_ASSERT(cmd->volume_size > 0);

    exalog_debug("create volume '%s': size %" PRIu64 " KB",
                 cmd->volume_name, cmd->volume_size);

    group = vrt_get_group_from_uuid(&cmd->group_uuid);
    if (group == NULL)
    {
	exalog_debug("Unknown group " UUID_FMT, UUID_VAL(&cmd->group_uuid));
	return -VRT_ERR_UNKNOWN_GROUP_UUID;
    }


    /* !!! All sizes in 'cmd' are in KB and VRT internal functions want sizes in
     * sectors.
     */
    ret = vrt_group_create_volume(group, &volume, &cmd->volume_uuid, cmd->volume_name,
                                  KBYTES_2_SECTORS(cmd->volume_size));

    if (ret != EXA_SUCCESS)
    {
        exalog_error("Can't create volume '%s' in group '%s': %s(%d)",
                     cmd->volume_name, group->name, exa_error_msg(ret), ret);
	vrt_group_unref(group);
	return ret;
    }

    EXA_ASSERT(volume != NULL);

    /* wipe the newly created volume
     *
     * FIXME: This code is called from all the clients while it should be done
     * only once. To do so we should add a new RPC and trigger the wipping from
     * admind.
     */
    /* Let only one node (the first one) do the wipe */
    if (vrt_node_get_upnode_id() == 0)
    {
        ret = vrt_group_wipe_volume(group, volume);
        if (ret != EXA_SUCCESS)
        {
            exalog_error("Can't wipe volume '%s' in group '%s': %s(%d)",
                         volume->name, group->name, exa_error_msg(ret), ret);
            /* Rollback volume creation */
            vrt_group_delete_volume(group, volume);
            vrt_group_unref(group);
            return ret;
        }
    }

    vrt_group_unref(group);
    return EXA_SUCCESS;
}
示例#8
0
/**
 * Finalize the insertion of a new rdev in a group.
 *
 * @param[in] params The parsed command array
 *
 * The real parameters passed in the array are:
 *  - UUID of the group to insert the new rdev in
 *
 * @return 0 on success, a negative error code on failure
 */
static int vrt_cmd_group_insert_rdev(const struct VrtGroupInsertRdev *cmd)
{
    int ret;

    exalog_debug("finish adding rdev in group " UUID_FMT,
            UUID_VAL(&cmd->group_uuid));

    os_thread_mutex_lock(&pending_group_lock);
    if (!pending_group)
    {
	os_thread_mutex_unlock(&pending_group_lock);
	return -EPERM;
    }

    if (!uuid_is_equal(&cmd->group_uuid, &pending_group->uuid))
    {
	exalog_error("You are trying to insert an rdev into a group "
                     "while a dgcreate, dgstart or dgdiskadd is running.");
	os_thread_mutex_unlock(&pending_group_lock);
	return -EAGAIN;
    }

    ret = vrt_group_insert_rdev(pending_group, &cmd->uuid, &cmd->nbd_uuid,
                                cmd->node_id, cmd->spof_id, cmd->local,
                                cmd->old_sb_version, cmd->new_sb_version);

    os_free(pending_group);
    os_thread_mutex_unlock(&pending_group_lock);

    return ret;
}
示例#9
0
static int
vrt_cmd_device_replace(const struct VrtDeviceReplace *cmd)
{
    struct vrt_group *group;
    struct vrt_realdev *rdev;
    int ret;

    group = vrt_get_group_from_uuid(&cmd->group_uuid);
    if (group == NULL)
        return -VRT_ERR_UNKNOWN_GROUP_UUID;

    rdev = storage_get_rdev(group->storage, &cmd->vrt_uuid);
    if (rdev == NULL)
    {
        exalog_error("Cannot find vrt UUID " UUID_FMT " in group '%s'",
                     UUID_VAL(&cmd->vrt_uuid), group->name);
        vrt_group_unref(group);
        return -VRT_ERR_NO_SUCH_RDEV_IN_GROUP;
    }

    if (!vrt_group_supports_device_replacement(group))
    {
	exalog_error("Group '%s' (layout '%s') does not support disk replacement",
                     group->name, group->layout->name);
        vrt_group_unref(group);
	return -VRT_ERR_DISK_REPLACEMENT_NOT_SUPPORTED;
    }

    ret = vrt_group_rdev_replace(group, rdev, &cmd->rdev_uuid);

    vrt_group_unref(group);

    return ret;
}
示例#10
0
int vrt_rdev_open(struct vrt_realdev *rdev)
{
    int err;

    rdev->blockdevice = client_get_blockdevice(&rdev->nbd_uuid);

    if (rdev->blockdevice == NULL)
    {
        exalog_error("Could not open device "UUID_FMT, UUID_VAL(&rdev->uuid));
        return -ENODEV;
    }

    err = vrt_rdev_open_superblock_streams(rdev);
    if (err != 0)
        return err;

    if (rdev->up)
    {
        uint64_t size = __get_block_aligned_bdev_size(rdev->blockdevice);
        err = vrt_rdev_set_real_size(rdev, size);

        if (err != 0)
            return err;
    }

    return EXA_SUCCESS;
}
示例#11
0
/**
 * Synchronize the broken field of each disk with the content of the table
 * of broken disks. Request an NBD recovery if the status of one or several
 * disks changed.
 */
static void
rdev_update_disks(void)
{
  struct adm_node *node;

  adm_cluster_for_each_node(node)
  {
    struct adm_disk *disk;

    adm_node_for_each_disk(node, disk)
    {
      /* FIXME there is no check that the uuid in broken_disks are actually part
       * of the cluster */
      bool broken = broken_disk_table_contains(broken_disks, &disk->uuid);
      bool missing = disk->local != NULL && !disk->local->reachable;

      if (broken == disk->broken)
        continue;

      exalog_info("%s:"UUID_FMT" (%s) is %s%s", node->name,
                  UUID_VAL(&disk->uuid), disk->path,
                  broken ? "broken" : "not broken",
                  missing ? ", missing" : "");

      disk->broken = broken;
      if (disk->broken)
      {
        rdev_stop_disk(disk, node);
        inst_set_resources_changed_down(&adm_service_nbd);
      }
      else
        inst_set_resources_changed_up(&adm_service_nbd);
    }
  }
示例#12
0
int vrt_rdev_replace(struct vrt_realdev *rdev, const exa_uuid_t *new_rdev_uuid)
{
    blockdevice_t *new_blockdevice;
    int err;

    if (rdev_is_ok(rdev))
    {
        exalog_error("Bad rdev status %d", rdev_get_compound_status(rdev));
        return -VRT_ERR_CANT_DGDISKRECOVER;
    }

    new_blockdevice = client_get_blockdevice(new_rdev_uuid);
    if (new_blockdevice == NULL)
    {
        exalog_error("Could not open device "UUID_FMT, UUID_VAL(new_rdev_uuid));
        return -ENODEV;
    }

    if (__get_block_aligned_bdev_size(new_blockdevice)
            < __get_block_aligned_bdev_size(rdev->blockdevice))
        return -VRT_ERR_RDEV_TOO_SMALL;

    rdev->blockdevice = new_blockdevice;

    /* re-open the superblock stream */
    vrt_rdev_close_superblock_streams(rdev);
    err = vrt_rdev_open_superblock_streams(rdev);
    if (err != 0)
        return err;

    uuid_copy(&rdev->nbd_uuid, new_rdev_uuid);

    return EXA_SUCCESS;
}
示例#13
0
void rain1_compute_status(struct vrt_group *group)
{
    rain1_group_t *lg = RAIN1_GROUP(group);
    struct assembly_group *ag = &lg->assembly_group;
    size_t nb_not_corrected_spofs;
    rain1_realdev_t *lr;
    int i;

    /* Compute the status of the group.
     * This status depends on the number of SPOF groups that are not working
     * fine (down or outdated).
     */
    nb_not_corrected_spofs = 0;
    for (i = 0; i < group->storage->num_spof_groups; i++)
    {
	if (rain1_spof_group_has_defect(lg, &group->storage->spof_groups[i]))
	    nb_not_corrected_spofs++;
    }

    if (nb_not_corrected_spofs == 0)
    {
	group->status = EXA_GROUP_OK;
	exalog_debug("Status of group '%s' is OK", group->name);
#ifdef WITH_MONITORING
	md_client_notify_diskgroup_ok(vrt_msg_handle, &group->uuid, group->name);
#endif

	/* FIXME: There could be no 'not_corrected' spof groups while an
	 * updating operation is on-going. In this case, we should have
	 * the group status set to EXA_GROUP_REBUILDING ... if we keep using
	 * EXA_GROUP_REBUILDING.
	 */
    }
    else if (nb_not_corrected_spofs == 1)
    {
	group->status = EXA_GROUP_DEGRADED;
	exalog_debug("Status of group '%s' is DEGRADED", group->name);
#ifdef WITH_MONITORING
	md_client_notify_diskgroup_degraded(vrt_msg_handle, &group->uuid, group->name);
#endif
    }
    else
    {
	group->status = EXA_GROUP_OFFLINE;
	exalog_debug("Status of group '%s' is OFFLINE (%" PRIzu " SPOFs not corrected)",
		     group->name, nb_not_corrected_spofs);
#ifdef WITH_MONITORING
	md_client_notify_diskgroup_offline(vrt_msg_handle, &group->uuid, group->name);
#endif
    }

    exalog_debug("Clearing Rebuild context for each of the group "UUID_FMT" rdevs",
                UUID_VAL(&group->uuid));
    /* Cleanup previous rebuilding status */
    foreach_rainx_rdev(lg, lr, i)
    {
        rain1_rdev_clear_rebuild_context(lr);
    }
示例#14
0
/**
 * Command to register a real device in a group.
 * It must be called after group_begin() and before
 * group_create() or group_start().
 *
 * @param[in] params The parsed command array
 *
 * The real parameters passed in the array are:
 *  - UUID of the group in which the real device has to be added
 *  - UUID of the real device in the VRT
 *  - UUID of the real device in the NBD
 *  - Whether the disk is local or not
 *  - Whether the disk is UP or not
 *  - Whether the device properties must be loaded from disk
 *
 * @return EXA_SUCCESS on success, negative error code on failure.
 */
static int
vrt_cmd_group_add_rdev(const struct VrtGroupAddRdev *cmd)
{
    vrt_rdev_info_t *rdev_info;

    os_thread_mutex_lock(&pending_group_lock);
    if (!pending_group)
    {
	os_thread_mutex_unlock(&pending_group_lock);
	return -EPERM;
    }

    if (!uuid_is_equal(&cmd->group_uuid, &pending_group->uuid))
    {
	exalog_error("Failed to edit group " UUID_FMT
                     ", group " UUID_FMT " is already being edited.",
                     UUID_VAL(&cmd->group_uuid), UUID_VAL(&pending_group->uuid));
	os_thread_mutex_unlock(&pending_group_lock);
	return -EAGAIN;
    }

    if (pending_group->nb_rdevs == NBMAX_DISKS_PER_GROUP)
    {
	os_thread_mutex_unlock(&pending_group_lock);
	return -EAGAIN;
    }

    rdev_info = &pending_group->rdevs[pending_group->nb_rdevs];

    uuid_copy(&rdev_info->uuid,     &cmd->uuid);
    uuid_copy(&rdev_info->nbd_uuid, &cmd->nbd_uuid);

    rdev_info->node_id = cmd->node_id;
    rdev_info->spof_id = cmd->spof_id;
    rdev_info->local   = cmd->local;
    rdev_info->up      = cmd->up;

    pending_group->nb_rdevs++;

    os_thread_mutex_unlock(&pending_group_lock);

    return EXA_SUCCESS;
}
示例#15
0
static int
vrt_cmd_group_event(const struct VrtGroupEvent *event_msg)
{
    int retval = -EINVAL;
    struct vrt_group *group;

    group = vrt_get_group_from_uuid(&event_msg->group_uuid);
    if (!group)
    {
	exalog_debug("group " UUID_FMT " not found",
                     UUID_VAL(&event_msg->group_uuid));
	return -VRT_ERR_UNKNOWN_GROUP_UUID;
    }
    switch(event_msg->event)
    {
    case VRT_GROUP_RESUME:
	retval = vrt_group_resume(group);
	break;

    case VRT_GROUP_SUSPEND_METADATA_AND_REBUILD:
        vrt_group_metadata_thread_suspend(group);
        vrt_group_rebuild_thread_suspend(group);
        retval = 0;
	break;

    case VRT_GROUP_RESUME_METADATA_AND_REBUILD:
        vrt_group_metadata_thread_resume(group);
        vrt_group_rebuild_thread_resume(group);
        retval = 0;
	break;

    case VRT_GROUP_COMPUTESTATUS:
	retval = vrt_group_compute_status(group);
	break;

    case VRT_GROUP_WAIT_INITIALIZED_REQUESTS:
	vrt_group_wait_initialized_requests (group);
	retval = EXA_SUCCESS;
	break;

    case VRT_GROUP_POSTRESYNC:
	retval = vrt_group_post_resync(group);
	break;

    default :
	EXA_ASSERT_VERBOSE(0, "struct VrtGroupEvent: Unknown event type %d\n",
			   event_msg->event);
    }

    vrt_group_unref(group);

    return retval;
}
示例#16
0
/**
 * Create a new real dev.
 *
 * @param group       Group in which the real device will be added
 * @param node_id     ID of the node where the device is attached
 * @param uuid        UUID of the real device in the VRT
 * @param nbd_uuid    UUID of the real device in the NBD
 * @param local       true is the rdev is local
 * @param up          true if Admind considers the device as UP
 *
 * @return            a valid vrt_realdev
 */
struct vrt_realdev *vrt_rdev_new(exa_nodeid_t node_id,
                                 spof_id_t spof_id, const exa_uuid_t *uuid,
                                 const exa_uuid_t *nbd_uuid, int index,
                                 bool local, bool up)
{
    struct vrt_realdev *rdev;

    exalog_debug("adding rdev " UUID_FMT ": status = %s",
                 UUID_VAL(uuid), up ? "UP" : "DOWN");

    rdev = os_malloc(sizeof(struct vrt_realdev));
    if (rdev == NULL)
        return NULL;

    memset(rdev, 0, sizeof(struct vrt_realdev));

    /* FIXME It would be f*****g great to have the fields initialized in the
             same order as they appear in the structure definition. */

    rdev->local = local;

    rdev->node_id = node_id;
    rdev->spof_id = spof_id;
    rdev->index   = index;

    /* Initialize the device status */
    rdev->up = up;
    rdev->corrupted = FALSE;
    rdev->real_size = 0;

    /* Initialize the its superblock info */
    uuid_copy(&rdev->uuid, uuid);
    uuid_copy(&rdev->nbd_uuid, nbd_uuid);

    rdev->chunks.chunk_size = 0;
    rdev->chunks.total_chunks_count = 0;
    rdev->chunks.free_chunks_count = 0;
    rdev->chunks.free_chunks = NULL;

    rdev->raw_sb_stream = NULL;

    rdev->sb_data_streams[0] = NULL;
    rdev->sb_data_streams[1] = NULL;

    rdev->checksum_sb_streams[0] = NULL;
    rdev->checksum_sb_streams[1] = NULL;

    return rdev;
}
示例#17
0
/**
 * Sync metadata on disk.
 *
 * @param[in] params The parsed command array
 *
 * The real parameters passed in the array are:
 *  - UUID of the group
 *
 * @return 0 on success, a negative error code on failure
 */
static int vrt_cmd_group_sync_sb(const struct VrtGroupSyncSb *cmd)
{
    struct vrt_group *group;
    int ret;

    group = vrt_get_group_from_uuid(&cmd->group_uuid);
    if(!group)
    {
	exalog_error("Unknown group " UUID_FMT, UUID_VAL(&cmd->group_uuid));
	return -VRT_ERR_UNKNOWN_GROUP_UUID;
    }

    ret = vrt_group_sync_sb(group, cmd->old_sb_version, cmd->new_sb_version);
    vrt_group_unref(group);

    return ret;
}
示例#18
0
int vrt_rdev_set_real_size(struct vrt_realdev *rdev, uint64_t size)
{
    EXA_ASSERT(rdev->up);
    EXA_ASSERT(rdev->blockdevice != NULL);

    if (__usable_size(size) <= 0)
    {
        exalog_error("rdev " UUID_FMT " is too small (%" PRIu64
                     " sectors) to store the superblocks",
                     UUID_VAL(&rdev->uuid), size);
        return -VRT_ERR_RDEV_TOO_SMALL;
    }

    rdev->real_size = size;

    return EXA_SUCCESS;
}
示例#19
0
/**
 * Begin group creation/starting command. Such a command must be
 * followed by several group_add_rdev() commands to add the real
 * devices in the group, and finally by a group_create() or
 * group_start() command.
 *
 * @param[in] params The parsed command array
 *
 * The real parameters passed in the array are:
 *  - Name of the group to create or start
 *  - Its UUID
 *  - Name of the layout to use
 *
 * @return EXA_SUCCESS on success, negative error code on failure
 */
static int
vrt_cmd_group_begin(const struct VrtGroupBegin *cmd)
{
    vrt_group_t *group;

    exalog_debug("begin group '%s': UUID='" UUID_FMT "' layout='%s'",
                 cmd->group_name, UUID_VAL(& cmd->group_uuid), cmd->layout);

    /* Check if the group is already started */
    group = vrt_get_group_from_uuid(&cmd->group_uuid);
    if(group)
    {
	EXA_ASSERT(strcmp(cmd->group_name, group->name)==0 );
	vrt_group_unref(group);
	return -VRT_INFO_GROUP_ALREADY_STARTED;
    }

    /* check if the group name is already used (this should not happen if
     * admind XML parsing is correct...
     */
    group = vrt_get_group_from_name(cmd->group_name);
    if (group != NULL)
    {
	vrt_group_unref(group);
	return -VRT_ERR_GROUPNAME_USED;
    }

    os_thread_mutex_lock(&pending_group_lock);
    if (pending_group)
        os_free(pending_group);

    pending_group = os_malloc(sizeof(vrt_group_info_t));
    vrt_group_info_init(pending_group);
    os_strlcpy(pending_group->name, cmd->group_name,
            sizeof(pending_group->name));
    os_strlcpy(pending_group->layout_name, cmd->layout,
            sizeof(pending_group->layout_name));
    uuid_copy(&pending_group->uuid, &cmd->group_uuid);
    pending_group->sb_version = cmd->sb_version;

    os_thread_mutex_unlock(&pending_group_lock);

    return EXA_SUCCESS;
}
示例#20
0
/**
 * Tells that a unusable device is now available again. We'll put it
 * in the EXA_REALDEV_UPDATING state, which doesn't mean we can safely
 * use it, but that a rebuilding process must take place before
 * changing the status to EXA_REALDEV_OK.
 *
 * @param[in] rdev       The real device which is now available again
 *
 * @return    always EXA_SUCCESS
 */
int
vrt_rdev_up(struct vrt_realdev *rdev)
{
    uint64_t size;
    uint64_t required_size;
    int err;

    /* Admind can send an up message even if the device is not down */
    if (rdev_is_ok(rdev))
        return EXA_SUCCESS;

    rdev->up = TRUE;

    size = __get_block_aligned_bdev_size(rdev->blockdevice);
    err = vrt_rdev_set_real_size(rdev, size);
    if (err != EXA_SUCCESS)
        return err;

    /* Check that the size of the device correspond to the real size of the
     * device.
     * This test is also done at group start (see vrt_group_start)
     *
     * FIXME: This kind of verification could be done by the service in
     * charge of the devices
     */
    required_size = rdev_chunk_based_size(rdev);
    if (vrt_realdev_get_usable_size(rdev) < required_size)
    {
        /* XXX Duplicate: same error in vrt_group_start() */
        exalog_error("Real size of device "UUID_FMT" is too small: %"PRIu64" < %"PRIu64,
                     UUID_VAL(&rdev->uuid), vrt_realdev_get_usable_size(rdev),
                     required_size);
        rdev->corrupted = TRUE;
        rdev->real_size = 0;

        return EXA_SUCCESS;
    }

    rdev->corrupted = FALSE;


    return EXA_SUCCESS;
}
示例#21
0
/**
 * Prepare an updating operation
 *
 * @param[in]  ag                assembly group
 * @param[in]  lg                rain1 group data
 * @param[in] spof_group         The SPOF group to update
 */
static void rain1_group_prepare_updating(const struct assembly_group *ag,
                                         const rain1_group_t *lg,
                                         const spof_group_t *spof_group)
{
    uint32_t i;

    for (i = 0; i < spof_group->nb_realdevs; i++)
    {
        struct vrt_realdev *rdev = spof_group->realdevs[i];
        struct rain1_realdev *lr = RAIN1_REALDEV(lg, rdev);

        if (rdev_is_ok(rdev) && !rain1_rdev_is_uptodate(lr, lg->sync_tag))
        {
            exalog_debug("New updating: update disk: index = %d, UUID = " UUID_FMT,
                         rdev->index, UUID_VAL(&rdev->uuid));

            rain1_rdev_init_rebuild_context(lr, EXA_RDEV_REBUILD_UPDATING,
                                            lg->sync_tag);
        }
    }
}
示例#22
0
int unexport_device(const exa_uuid_t *uuid)
{
  device_t *dev = find_device_from_uuid(uuid);
  if (dev == NULL)
    {
      exalog_error("can not remove unknown device with UUID = " UUID_FMT, UUID_VAL(uuid));
      return -CMD_EXP_ERR_UNKNOWN_DEVICE;
    }

  os_thread_mutex_lock(&nbd_server.mutex_edevs);
  /* ask the thread to terminate */
  dev->exit_thread = true;

  /* prevent any new IO to be put in device IO list */
  nbd_server.devices[dev->dev_index] = NULL;
  os_thread_mutex_unlock(&nbd_server.mutex_edevs);

  /* now we can join, because with the nbd_close_list()
   * we can assume was the disk thread will reach a cancelation point */
  os_thread_join(nbd_server.td_pid[dev->dev_index]);

  /* close the list used to disk queue */
  nbd_close_list(&dev->disk_queue);

  /* get back all header in the kernel exa_rdev to the free list and close the device */
  if (dev->handle != NULL)
      exa_rdev_handle_free(dev->handle);

  /* close the semaphore used by the disk */
  os_sem_destroy(&dev->lock_sem_disk);

  /* free used memory for the device */
  os_free(dev);

  return EXA_SUCCESS;
}
示例#23
0
/**
 * Finalize the start of a group.
 *
 * @param[in] params The parsed command array
 *
 * The real parameters passed in the array are:
 *  - UUID of the group to start
 *
 * @return 0 on success, a negative error code on failure
 */
static int
vrt_cmd_group_start(const struct VrtGroupStart *cmd)
{
    vrt_group_t *group;
    int ret;

    exalog_debug("start group " UUID_FMT, UUID_VAL(&cmd->group_uuid));

    os_thread_mutex_lock(&pending_group_lock);
    if (!pending_group)
    {
	os_thread_mutex_unlock(&pending_group_lock);
	return -EPERM;
    }

    if (!uuid_is_equal(&cmd->group_uuid, &pending_group->uuid))
    {
	exalog_error("You are trying to create or start two groups at the same time.");
	os_thread_mutex_unlock(&pending_group_lock);
	return -EAGAIN;
    }

    ret = vrt_group_start(pending_group, &group);

    if (ret == EXA_SUCCESS)
    {
        ret = vrt_groups_list_add(group);
        if (ret != EXA_SUCCESS)
            vrt_group_stop(group);
    }

    os_free(pending_group);
    os_thread_mutex_unlock(&pending_group_lock);

    return ret;
}
示例#24
0
static void disk_checking_thread(void *dummy)
{
  exalog_as(EXAMSG_RDEV_ID);

  while (!quit)
  {
    int rdev_need_check = false;
    struct adm_disk *disk;

    adm_node_lock_disk_removal();

    adm_node_for_each_disk(adm_myself(), disk)
    {
      if (disk->local->rdev_req != NULL)
      {
        int state, last_state;

	last_state = disk->local->state;

        state = exa_rdev_test(disk->local->rdev_req,
                              rdev_check_buffer, RDEV_SUPERBLOCK_SIZE);

	/* if exa_rdev_test returns an error, the disk is considered in failure
	 * as we have no mean to know what really happened. */
	if (state < 0)
	{
	    exalog_error("testing rdev '%s' " UUID_FMT " failed: %s (%d)",
			 disk->path, UUID_VAL(&disk->uuid),
			 exa_error_msg(state), state);
	    state = EXA_RDEV_STATUS_FAIL;
	}

	if (state != last_state)
	{
	    if (state == EXA_RDEV_STATUS_FAIL)
		rdev_need_check = true;
	    disk->local->state = state;
	}
      }
    }

    adm_node_unlock_disk_removal();

    if (quit)
	break;

    if (rdev_need_check)
    {
      instance_event_msg_t msg;
      int ret;

      msg.any.type = EXAMSG_EVMGR_INST_EVENT;
      msg.event.id = EXAMSG_RDEV_ID;
      msg.event.state = INSTANCE_CHECK_DOWN;
      msg.event.node_id = adm_myself()->id;

      exalog_info("... broadcasting action: rdev check down");

      ret = examsgSend(mh, EXAMSG_ADMIND_EVMGR_ID,
                       EXAMSG_ALLHOSTS, &msg, sizeof(msg));
      EXA_ASSERT(ret == sizeof(msg));
    }

    os_sleep(DISK_CHECK_INTERVAL);
  }
}
示例#25
0
void rebuild_helper_thread(void *p)
{
  ExamsgHandle mh;
  int err;

  exalog_as(EXAMSG_NBD_SERVER_ID);

  /* initialize examsg framework */
  mh = examsgInit(EXAMSG_NBD_LOCKING_ID);
  EXA_ASSERT(mh != NULL);

  err = examsgAddMbox(mh, EXAMSG_NBD_LOCKING_ID, 1, 5 * EXAMSG_MSG_MAX);
  EXA_ASSERT(err == 0);

  os_sem_post(&nbd_server.mailbox_sem);

  while (nbd_server.run)
  {
      device_t *device;
      ExamsgNbdLock nbd_lock_msg;
      ExamsgMID from;
      struct timeval timeout = { .tv_sec = 0, .tv_usec = 100000 };
      exa_nodeset_t dest_nodes;

      err = examsgWaitTimeout(mh, &timeout);
      /* Just in order to check stopping the thread is required*/
      if (err == -ETIME)
	  continue;

      if (err != 0)
      {
          exalog_error("Locking thread encountered error %s (%d) while "
                       "waiting in event loop.", exa_error_msg(err), err);
          continue;
      }

      err = examsgRecv(mh, &from, &nbd_lock_msg, sizeof(nbd_lock_msg));

      /* No message */
      if (err == 0)
	continue;

      if (err < 0)
      {
          exalog_error("Locking thread encountered error %s (%d) while "
                       "receiving a messsage.", exa_error_msg(err), err);
	  continue;
      }

      switch(nbd_lock_msg.any.type)
      {
      case EXAMSG_NBD_LOCK:
	  /* find device from name */
          /* FIXME devices lock is not held... it should */
          device = find_device_from_uuid(&nbd_lock_msg.disk_uuid);
	  if (device == NULL)
          {
              exalog_error("Unknown device with UUID " UUID_FMT, UUID_VAL(&nbd_lock_msg.disk_uuid));
              err = -CMD_EXP_ERR_UNKNOWN_DEVICE;
              break;
          }
          if (nbd_lock_msg.lock)
          {
              err = exa_disk_lock_zone(device, nbd_lock_msg.locked_zone_start,
                                          nbd_lock_msg.locked_zone_size);
              EXA_ASSERT_VERBOSE(err == 0, "Trying to lock too many zone "
                                 "(>%d). Last zone not succesfully locked "
                                 "(start = %" PRId64 ", size = %" PRId64 " ) "
                                 "on device UUID " UUID_FMT, NBMAX_DISK_LOCKED_ZONES,
                                 nbd_lock_msg.locked_zone_start,
                                 nbd_lock_msg.locked_zone_size,
                                 UUID_VAL(&nbd_lock_msg.disk_uuid));
          }
          else
          {
              err = exa_disk_unlock_zone(device, nbd_lock_msg.locked_zone_start,
                                            nbd_lock_msg.locked_zone_size);
              EXA_ASSERT_VERBOSE(err == 0, "Trying to unlock a never locked "
                                 "zone (unlocked zone start =%" PRId64 ", "
                                 "unlocked zone size = %" PRId64 ") on device"
                                 " UUID " UUID_FMT,
                                 nbd_lock_msg.locked_zone_start,
                                 nbd_lock_msg.locked_zone_size,
                                 UUID_VAL(&nbd_lock_msg.disk_uuid));
          }
	  break;

	default:
	  /* error */
	  EXA_ASSERT_VERBOSE(false, "Locking thread got unknown message of"
                             " type %d ", nbd_lock_msg.any.type);
	  break;
	}

      exa_nodeset_single(&dest_nodes, from.netid.node);
      examsgAckReply(mh, (Examsg *)&nbd_lock_msg, err, from.id, &dest_nodes);
    }

  examsgDelMbox(mh, EXAMSG_NBD_LOCKING_ID);
  examsgExit(mh);
}

/** get the number of sector of the device
 * \param device_path the device to get the number of sector
 * \param nb_sectors64 the number of sectors of the device
 * \return nb_sectors the returned number of sector
 */

static int get_nb_sectors(const char *device_path, uint64_t *nb_sectors)
{
  uint64_t device_size; /* in bytes */
  int retval;
  int fd;

  /* We need the read access to get the size. */
  if ((fd = os_disk_open_raw(device_path, OS_DISK_READ)) < 0)
  {
    exalog_error("cannot open device '%s'  error=%s ",
                 device_path, exa_error_msg(-fd));
    return -CMD_EXP_ERR_OPEN_DEVICE;
  }

  retval = os_disk_get_size(fd, &device_size);
  if (retval < 0)
  {
    exalog_error("os_disk_get_size() error=%s", exa_error_msg(retval));
    if (close(fd) != 0)
      exalog_error("can't EVEN close dev '%s'", device_path);
    return -EXA_ERR_IOCTL;
  }

  retval = close(fd);
  if (retval < 0)
  {
    retval = -errno;
    exalog_error("cannot close device '%s' error=%s ",
                 device_path, exa_error_msg(retval));
    return -CMD_EXP_ERR_CLOSE_DEVICE;
  }

  *nb_sectors = device_size / SECTOR_SIZE;

  /* remove the size of the reserved area for storing admind info */
  *nb_sectors -= RDEV_RESERVED_AREA_IN_SECTORS;

  /* Align the size on 1K
   * this is the best we can do to have the same size of devices on 2.4 and 2.6 kernels due to
   * the fact that kernel 2.4 rounds the size of devices with 1 K
   */
  *nb_sectors -= *nb_sectors % (1024 / SECTOR_SIZE);

  return EXA_SUCCESS;
}
示例#26
0
int adm_vrt_group_sync_sb(int thr_nb, struct adm_group *group)
{
  struct {
    bool group_is_started;
    bool can_write;
    bool have_disk_in_group;
  } info, reply;

  exa_nodeid_t nid;
  bool group_is_started_somewhere = false;
  int ret;
  int barrier_ret = EXA_SUCCESS;
  admwrk_request_t rpc;
  struct adm_disk *disk;
  int nb_nodes_with_writable_disks = 0;
  int nb_nodes_with_disks_in_group = 0;

  uint64_t old_sb_version, new_sb_version;

  COMPILE_TIME_ASSERT(sizeof(info) <= ADM_MAILBOX_PAYLOAD_PER_NODE);

  /* XXX maybe checking started is useless as administrable => started
   * and !administrable => return */
  info.group_is_started = group->started;
  info.can_write = false;
  info.have_disk_in_group = false;

  adm_group_for_each_disk(group, disk)
  {
      if (disk->node_id == adm_my_id)
      {
          info.have_disk_in_group = true;

          if (disk->up_in_vrt)
              info.can_write = true;
      }
  }

  admwrk_bcast(thr_nb, &rpc, EXAMSG_SERVICE_VRT_SB_SYNC, &info, sizeof(info));
  while (admwrk_get_bcast(&rpc, &nid, &reply, sizeof(reply), &ret))
  {
    if (ret == -ADMIND_ERR_NODE_DOWN)
    {
      barrier_ret = -ADMIND_ERR_NODE_DOWN;
      continue;
    }

    EXA_ASSERT(ret == EXA_SUCCESS);

    if (reply.can_write)
        nb_nodes_with_writable_disks++;

    if (reply.have_disk_in_group)
        nb_nodes_with_disks_in_group++;

    if (reply.group_is_started)
        group_is_started_somewhere = true;
  }

  if (barrier_ret != EXA_SUCCESS)
    return barrier_ret;

  /* do not write superblocks if the group is stopped on all nodes */
  if (!group_is_started_somewhere)
    return EXA_SUCCESS;

  if (nb_nodes_with_writable_disks < quotient_ceil64(nb_nodes_with_disks_in_group, 2))
      return -VRT_ERR_GROUP_NOT_ADMINISTRABLE;

  old_sb_version = sb_version_get_version(group->sb_version);
  new_sb_version = sb_version_new_version_prepare(group->sb_version);

  if (group->started)
  {
      ret = vrt_client_group_sync_sb(adm_wt_get_localmb(),
                                     &group->uuid, old_sb_version, new_sb_version);

      EXA_ASSERT_VERBOSE(ret == EXA_SUCCESS || ret == -ADMIND_ERR_NODE_DOWN,
                         "Synchronization of superblocks failed for group '%s' "
                         "UUID=" UUID_FMT ": %s (%d)", group->name,
                         UUID_VAL(&group->uuid), exa_error_msg(ret), ret);
  }
  else
      ret = EXA_SUCCESS;

  barrier_ret = admwrk_barrier(thr_nb, ret, "VRT: Preparing superblocks version");
  if (barrier_ret != EXA_SUCCESS)
    return barrier_ret;

  sb_version_new_version_done(group->sb_version);

  barrier_ret = admwrk_barrier(thr_nb, EXA_SUCCESS, "VRT: Writing superblocks version");

  /* Commit anyway, If we are here, we are sure that other nodes have done the
   * job too even if they crashed meanwhile */
  sb_version_new_version_commit(group->sb_version);

  return barrier_ret;
}
示例#27
0
static void
get_info_from_params(const struct dgcreate_params *params,
		     struct dgcreate_info *info,
		     cl_error_desc_t *err_desc)
{
    xmlDocPtr config;
    xmlNodePtr diskgroup_ptr;
    xmlAttrPtr attr;
    int i;

    EXA_ASSERT(params);
    EXA_ASSERT(info);
    EXA_ASSERT(err_desc);

    config = params->config;
    memset(info, 0, sizeof(*info));

    diskgroup_ptr = xml_conf_xpath_singleton(config, "/Exanodes/diskgroup");

    uuid_generate(&info->uuid);
    /* 0 means that the slot width will be automagically computed */
    info->slot_width = 0;
    info->chunk_size = adm_cluster_get_param_int("default_chunk_size");
    info->su_size = adm_cluster_get_param_int("default_su_size");
    info->dirty_zone_size = adm_cluster_get_param_int("default_dirty_zone_size");
    info->blended_stripes = false;
    info->nb_disks = 0;
    info->nb_spare = VRT_DEFAULT_NB_SPARES;
    info->layout[0] = '\0';

    for (attr = diskgroup_ptr->properties; attr != NULL; attr = attr->next)
    {
	if (xmlStrEqual(attr->name, BAD_CAST("name")))
	    strlcpy(info->name, xml_get_prop(diskgroup_ptr, "name"), EXA_MAXSIZE_GROUPNAME + 1);
	else if (xmlStrEqual(attr->name, BAD_CAST("layout")))
	    strlcpy(info->layout, xml_get_prop(diskgroup_ptr, "layout"), EXA_MAXSIZE_LAYOUTNAME + 1);
	else if (xmlStrEqual(attr->name, BAD_CAST("slot_width")))
	{
	    if (xml_get_uint_prop(diskgroup_ptr, "slot_width",
				  &info->slot_width, err_desc) != 0)
		return;
	    /* NOTE User can not give a zero value
	     * If slot_width is not provided, we pass zero
	     * to vrt so that it can calculate the proper slot_width
	     */
	    if (info->slot_width == 0)
	    {
		set_error(err_desc, -EXA_ERR_XML_GET,
			  "slot_width must be greater than zero");
		return;
	    }
	}
	else if (xmlStrEqual(attr->name, BAD_CAST("chunk_size")))
	{
	    if (xml_get_uint_prop(diskgroup_ptr, "chunk_size",
				  &info->chunk_size, err_desc) != 0)
		return;
	}
	else if (xmlStrEqual(attr->name, BAD_CAST("su_size")))
	{
	    if (xml_get_uint_prop(diskgroup_ptr, "su_size",
				  &info->su_size, err_desc) != 0)
		return;
	}
	else if (xmlStrEqual(attr->name, BAD_CAST("dirty_zone_size")))
	{
	    if (xml_get_uint_prop(diskgroup_ptr, "dirty_zone_size",
				  &info->dirty_zone_size, err_desc) != 0)
		return;
	}
	else if (xmlStrEqual(attr->name, BAD_CAST("blended_stripes")))
	{
	    if (xml_get_uint_prop(diskgroup_ptr, "blended_stripes",
				  &info->blended_stripes, err_desc) != 0)
		return;
	}
	else if (xmlStrEqual(attr->name, BAD_CAST("nb_spare")))
	{
	    if (xml_get_uint_prop(diskgroup_ptr, "nb_spare",
				  &info->nb_spare, err_desc) != 0)
		return;
	}
	else if (!xmlStrEqual(attr->name, BAD_CAST("cluster")))
	{
	    set_error(err_desc, -EXA_ERR_XML_GET,
		      "Unknown group property '%s'", (char *)attr->name);
	    return;
	}
    }

    /* Check the group name */
    if (info->name == NULL || info->name[0] == '\0')
    {
	set_error(err_desc, -EXA_ERR_INVALID_PARAM, NULL);
	return;
    }

    /* Check if a group with that name already exist */
    if (adm_group_get_group_by_name(info->name) != NULL)
    {
	set_error(err_desc, -VRT_ERR_GROUPNAME_USED, NULL);
	return;
    }

    if (info->layout[0] == '\0')
    {
	set_error(err_desc, -EXA_ERR_XML_GET, NULL);
	return;
    }

    if (params->alldisks)
    {
	struct adm_node *node;
	adm_cluster_for_each_node(node)
	{
	    struct adm_disk *disk;
	    adm_node_for_each_disk(node, disk)
	    {
		if (uuid_is_zero(&disk->group_uuid))
                {
                    if (disk->path[0] == '\0')
                    {
                        set_error(err_desc, -ADMIND_ERR_UNKNOWN_DISK,
                              "disk " UUID_FMT " is unknown", UUID_VAL(&disk->uuid));
                              return;
                    }

		    if (info->nb_disks >= NBMAX_DISKS_PER_GROUP)
		    {
			set_error(err_desc, -ADMIND_ERR_TOO_MANY_DISKS_IN_GROUP,
				  "too many disks in group (> %d)", NBMAX_DISKS_PER_GROUP);
			return;
		    }

		    uuid_copy(&info->disks[info->nb_disks], &disk->uuid);
		    info->nb_disks++;
		}
	    }
	}
    }
    else
    {