int exa_rdev_static_init(rdev_static_op_t op) { EXA_ASSERT_VERBOSE(init_op == RDEV_STATIC_OP_INVALID, "static data already initialized"); EXA_ASSERT_VERBOSE(op == RDEV_STATIC_CREATE || op == RDEV_STATIC_GET, "invalid static init op: %d", op); init_op = op; return 0; }
void __serverd_perf_sensor_init(void) { EXA_ASSERT_VERBOSE(eh != NULL, "Exaperf handle is nil"); req_size_repart[__READ] = exaperf_repart_init(eh, "NBD_SERVER_REQ_SIZE_READ", NB_REPART, limits_nbd_server_req); req_size_repart[__WRITE] = exaperf_repart_init(eh, "NBD_SERVER_REQ_SIZE_WRITE", NB_REPART, limits_nbd_server_req); inter_arrival_repart[__READ] = exaperf_repart_init(eh, "NBD_SERVER_INTERARRIVAL_READ", NB_REPART_INTER, limits_inter); inter_arrival_repart[__WRITE] = exaperf_repart_init(eh, "NBD_SERVER_INTERARRIVAL_WRITE", NB_REPART_INTER, limits_inter); lba_repart[__READ] = exaperf_repart_init(eh, "NBD_SERVER_LBA_READ", NB_REPART_LBA, limits_lba); lba_repart[__WRITE] = exaperf_repart_init(eh, "NBD_SERVER_LBA_WRITE", NB_REPART_LBA, limits_lba); distance_repart[__READ] = exaperf_repart_init(eh, "NBD_SERVER_DISTANCE_READ", NB_REPART_DIST, limits_dist); distance_repart[__WRITE] = exaperf_repart_init(eh, "NBD_SERVER_DISTANCE_WRITE", NB_REPART_DIST, limits_dist); header_dur[__READ] = exaperf_duration_init(eh,"NBD_SERVER_HEADER_DUR_READ", true); header_dur[__WRITE] = exaperf_duration_init(eh, "NBD_SERVER_HEADER_DUR_WRITE", true); data_dur = exaperf_duration_init(eh, "NBD_SERVER_DATA_DUR_WRITE", true); }
int exa_select_out(exa_select_handle_t *h, fd_set *set) { #if WIN32 int nb_sock; EXA_ASSERT_VERBOSE(h->magic == EXA_SELECT_MAGIC, "Corrupted handle detected %d", h->magic); nb_sock = os_select(0 /* ignored */, NULL, set, NULL, &select_timeout); if (nb_sock == 0) /* timeout is reached */ { /* reset set because there was actually no event on sockets */ FD_ZERO(set); return -EFAULT; } return nb_sock > 0 ? 0 : nb_sock; #else if (ioctl(h->fd, EXA_SELECT_OUT, set) == -1) return -errno; return 0; #endif }
exa_select_handle_t *exa_select_new_handle(void) { exa_select_handle_t *h = os_malloc(sizeof(exa_select_handle_t)); EXA_ASSERT(h != NULL); #if WIN32 h->magic = EXA_SELECT_MAGIC; #else h->fd = open(EXACOMMON_MODULE_PATH, O_RDWR); EXA_ASSERT_VERBOSE(h->fd >= 0, "Cannot bind to exa_common module: %s (%d)", os_strerror(errno), -errno); EXA_ASSERT_VERBOSE(ioctl(h->fd, EXA_SELECT_MAL) != -1, "Cannot register to exa_common module: %s (%d)", os_strerror(errno), -errno); #endif return h; }
static int vrt_cmd_group_event(const struct VrtGroupEvent *event_msg) { int retval = -EINVAL; struct vrt_group *group; group = vrt_get_group_from_uuid(&event_msg->group_uuid); if (!group) { exalog_debug("group " UUID_FMT " not found", UUID_VAL(&event_msg->group_uuid)); return -VRT_ERR_UNKNOWN_GROUP_UUID; } switch(event_msg->event) { case VRT_GROUP_RESUME: retval = vrt_group_resume(group); break; case VRT_GROUP_SUSPEND_METADATA_AND_REBUILD: vrt_group_metadata_thread_suspend(group); vrt_group_rebuild_thread_suspend(group); retval = 0; break; case VRT_GROUP_RESUME_METADATA_AND_REBUILD: vrt_group_metadata_thread_resume(group); vrt_group_rebuild_thread_resume(group); retval = 0; break; case VRT_GROUP_COMPUTESTATUS: retval = vrt_group_compute_status(group); break; case VRT_GROUP_WAIT_INITIALIZED_REQUESTS: vrt_group_wait_initialized_requests (group); retval = EXA_SUCCESS; break; case VRT_GROUP_POSTRESYNC: retval = vrt_group_post_resync(group); break; default : EXA_ASSERT_VERBOSE(0, "struct VrtGroupEvent: Unknown event type %d\n", event_msg->event); } vrt_group_unref(group); return retval; }
void exa_rdev_static_clean(rdev_static_op_t op) { /* Initialization not performed, nothing to clean */ if (init_op == RDEV_STATIC_OP_INVALID) return; EXA_ASSERT_VERBOSE(op == RDEV_STATIC_RELEASE || op == RDEV_STATIC_DELETE, "invalid static clean op: %d", op); if (op == RDEV_STATIC_DELETE) { EXA_ASSERT_VERBOSE(init_op == RDEV_STATIC_CREATE, "deletion of static data by non-owner"); } else /* RDEV_STATIC_RELEASE */ { EXA_ASSERT_VERBOSE(init_op == RDEV_STATIC_GET, "release of static data by owner"); } init_op = RDEV_STATIC_OP_INVALID; }
void exa_select_delete_handle(exa_select_handle_t *h) { if (h == NULL) return; #if WIN32 EXA_ASSERT_VERBOSE(h->magic == EXA_SELECT_MAGIC, "Corrupted handle detected %d", h->magic); #else close(h->fd); #endif os_free(h); }
/* parts to handle devices events, cases REBUILD and RECOVER are missing */ static int vrt_cmd_device_event(const struct VrtDeviceEvent *event_msg) { int retval = -EINVAL; struct vrt_group *group; struct vrt_realdev *rdev; group = vrt_get_group_from_uuid(&event_msg->group_uuid); if (!group) { exalog_debug("group " UUID_FMT " not found", UUID_VAL(&event_msg->group_uuid)); return -VRT_ERR_UNKNOWN_GROUP_UUID; } rdev = storage_get_rdev(group->storage, &event_msg->rdev_uuid); if (!rdev) { exalog_debug("rdev " UUID_FMT " not found", UUID_VAL(&event_msg->rdev_uuid)); return -VRT_ERR_OLD_RDEVS_MISSING; } switch(event_msg->event) { case VRT_DEVICE_DOWN: retval = vrt_group_rdev_down(group, rdev); break; case VRT_DEVICE_UP: retval = vrt_group_rdev_up(group, rdev); break; case VRT_DEVICE_REINTEGRATE: retval = vrt_group_reintegrate_rdev(group, rdev); break; case VRT_DEVICE_POST_REINTEGRATE: retval = vrt_group_post_reintegrate_rdev(group, rdev); break; default : EXA_ASSERT_VERBOSE(0, "struct VrtDeviceEvent: Unknown event type %d\n", event_msg->event); } vrt_group_unref(group); return retval; }
/*-------------------------------------------------------------------------*/ static void close_connection(int connection_id) { int cli_fd = connectlist[connection_id].fd; EXA_ASSERT_VERBOSE(cli_fd > 2 , "fd = %d", cli_fd); FD_CLR(cli_fd, &setSocks); os_closesocket(cli_fd); exalog_debug("CONNECTION: %d Socket %d closed state_work NOT_USED", connection_id, cli_fd); /* Do not reset uid and free field because there may be a command still * running on the worker thread. Those fields will be reset when it ends. */ connectlist[connection_id].fd = -1; }
/** * Main loop. */ static void loop(void) { static struct timespec last_time; __trace("marking self as alive"); os_get_monotonic_time(&last_time); /* We *always* see ourself */ mark_alive(self); do_ping = true; while (!csupd_quit()) { struct timespec now; sup_ping_t ping; os_get_monotonic_time(&now); /* if the node was detected as frozen for more than half a ping_timeout * we abort because this behaviour is not acceptable (byzantine) */ EXA_ASSERT_VERBOSE( difftime(now.tv_sec, last_time.tv_sec) <= (ping_timeout + 1) / 2, "Node frozen during '%lu' seconds. Aborting", (unsigned long)difftime(now.tv_sec, last_time.tv_sec)); last_time = now; if (do_ping) { do_ping = false; check_admind(); sup_pre_ping(); sup_send_ping(&cluster, &self->view); sup_post_ping(); } /* wait for an event and process it */ if (sup_recv_ping(&ping)) sup_process_ping(&ping); } sup_view_debug(&self->view); __trace("I am seen down, bye bye"); }
/** * Deliver to Evmgr the membership calculated by Csupd. * * \param[in] gen Generation number * \param[in] mship Membership to deliver * * \return 0 if successfull, negative error code otherwise */ int sup_deliver(sup_gen_t gen, const exa_nodeset_t *mship) { int ret; SupEventMshipChange msg; msg.any.type = EXAMSG_SUP_MSHIP_CHANGE; msg.gen = gen; exa_nodeset_copy(&msg.mship, mship); ret = examsgSendNoBlock(sup_mh, EXAMSG_ADMIND_EVMGR_ID, EXAMSG_LOCALHOST, &msg, sizeof(SupEventMshipChange)); EXA_ASSERT_VERBOSE(ret == sizeof(SupEventMshipChange), "Unable to deliver membership to the evmgr (%d)", ret); return 0; }
void assembly_volume_map_sector_to_slot(const assembly_volume_t *av, uint64_t slot_size, uint64_t vsector, unsigned int *slot_index, uint64_t *offset_in_slot) { uint64_t volume_slot_index; EXA_ASSERT_VERBOSE(vsector < av->total_slots_count * slot_size, "vsector=%" PRIu64 ", av->total_slots_count=%" PRIu64 ", slot_size=%" PRIu64 "\n", vsector, av->total_slots_count, slot_size); /* Compute the index of the slot in the volume slot array */ volume_slot_index = vsector / slot_size; EXA_ASSERT(volume_slot_index < av->total_slots_count); /* Compute the offset in the slot */ *offset_in_slot = vsector % slot_size; *slot_index = volume_slot_index; }
int vrt_node_get_upnode_id(void) { int upnode_id = -1; exa_nodeid_t node; for (node = 0 ; node < EXA_MAX_NODES_NUMBER ; node++) { if (exa_nodeset_contains(&nodes_up, node)) { upnode_id++; if (node == vrt_node_id) return upnode_id; } } EXA_ASSERT_VERBOSE(false, "Upnode ID not found. upnode_id=%d vrt_node_get_upnodes_count=%d vrt_node_id=%d", upnode_id, vrt_node_get_upnodes_count(), vrt_node_id); return 0; }
static void set_peer_socket(exa_nodeid_t node_id, const char *ip_addr, int sock) { peer_t *peer; exalog_debug("setting socket of peer %"PRInodeid": %d '%s'", node_id, sock, ip_addr); os_thread_mutex_lock(&peers_lock); peer = &peers[node_id]; EXA_ASSERT(peer->sock == -1); /* A node's IP address is not supposed to change during the lifetime of a cluster (ie, the node id <-> IP address mapping is bijective), so we assert if the received IP doesn't match the one registered */ EXA_ASSERT_VERBOSE(strcmp(ip_addr, peer->ip_addr) == 0, "peer %"PRInodeid": received addr %s, expected %s", node_id, ip_addr, peer->ip_addr); peer->sock = sock; os_thread_mutex_unlock(&peers_lock); }
void vrt_cmd_handle_message(const vrt_cmd_t *recv, vrt_reply_t *reply) { EXA_ASSERT_VERBOSE(VRTRECV_TYPE_IS_VALID(recv->type), "Data type %d is unknown.", recv->type); switch (recv->type) { case VRTRECV_NODE_SET_UPNODES: reply->retval = vrt_cmd_node_set_upnodes(&recv->d.vrt_set_nodes_status); break; case VRTRECV_DEVICE_EVENT: reply->retval = vrt_cmd_device_event(&recv->d.vrt_device_event); break; case VRTRECV_DEVICE_REPLACE: reply->retval = vrt_cmd_device_replace(&recv->d.vrt_device_replace); break; case VRTRECV_GET_VOLUME_STATUS: reply->retval = vrt_cmd_get_volume_status(&recv->d.vrt_get_volume_status); break; case VRTRECV_GROUP_ADD_RDEV: reply->retval = vrt_cmd_group_add_rdev(&recv->d.vrt_group_add_rdev); break; case VRTRECV_GROUP_BEGIN: reply->retval = vrt_cmd_group_begin(&recv->d.vrt_group_begin); break; case VRTRECV_GROUP_CREATE: reply->retval = vrt_cmd_group_create(&recv->d.vrt_group_create, &reply->group_create); break; case VRTRECV_GROUP_EVENT: reply->retval = vrt_cmd_group_event(&recv->d.vrt_group_event); break; case VRTRECV_GROUP_START: reply->retval = vrt_cmd_group_start(&recv->d.vrt_group_start); break; case VRTRECV_GROUP_STOP: reply->retval = vrt_cmd_group_stop(&recv->d.vrt_group_stop); break; case VRTRECV_GROUP_INSERT_RDEV: reply->retval = vrt_cmd_group_insert_rdev(&recv->d.vrt_group_insert_rdev); break; case VRTRECV_GROUP_STOPPABLE: reply->retval = vrt_cmd_group_stoppable(&recv->d.vrt_group_stoppable); break; case VRTRECV_GROUP_GOING_OFFLINE: reply->retval = vrt_cmd_group_going_offline(&recv->d.vrt_group_going_offline); break; case VRTRECV_GROUP_SYNC_SB: reply->retval = vrt_cmd_group_sync_sb(&recv->d.vrt_group_sync_sb); break; case VRTRECV_GROUP_FREEZE: reply->retval = vrt_cmd_group_freeze(&recv->d.vrt_group_freeze); break; case VRTRECV_GROUP_UNFREEZE: /* group unfreeze has been catched by the multiplexer thread */ EXA_ASSERT(FALSE); break; case VRTRECV_VOLUME_CREATE: reply->retval = vrt_cmd_volume_create(& recv->d.vrt_volume_create); break; case VRTRECV_VOLUME_DELETE: reply->retval = vrt_cmd_volume_delete(& recv->d.vrt_volume_delete); break; case VRTRECV_VOLUME_RESIZE: reply->retval = vrt_cmd_volume_resize(& recv->d.vrt_volume_resize); break; case VRTRECV_VOLUME_START: reply->retval = vrt_cmd_volume_start(& recv->d.vrt_volume_start); break; case VRTRECV_VOLUME_STOP: reply->retval = vrt_cmd_volume_stop(& recv->d.vrt_volume_stop); break; case VRTRECV_PENDING_GROUP_CLEANUP: reply->retval = vrt_cmd_pending_group_cleanup(); break; case VRTRECV_GROUP_RESET: reply->retval = vrt_cmd_group_reset(& recv->d.vrt_group_reset); break; case VRTRECV_GROUP_CHECK: reply->retval = vrt_cmd_group_check(& recv->d.vrt_group_check); break; case VRTRECV_GROUP_RESYNC: reply->retval = vrt_cmd_group_resync(&recv->d.vrt_group_resync); break; case VRTRECV_ASK_INFO: case VRTRECV_STATS: EXA_ASSERT_VERBOSE(FALSE, "Type %s (%d) Should not be handled by this thread\n", recv->type == VRTRECV_STATS ? "stats" : "info", recv->type); } }
int adm_vrt_group_sync_sb(int thr_nb, struct adm_group *group) { struct { bool group_is_started; bool can_write; bool have_disk_in_group; } info, reply; exa_nodeid_t nid; bool group_is_started_somewhere = false; int ret; int barrier_ret = EXA_SUCCESS; admwrk_request_t rpc; struct adm_disk *disk; int nb_nodes_with_writable_disks = 0; int nb_nodes_with_disks_in_group = 0; uint64_t old_sb_version, new_sb_version; COMPILE_TIME_ASSERT(sizeof(info) <= ADM_MAILBOX_PAYLOAD_PER_NODE); /* XXX maybe checking started is useless as administrable => started * and !administrable => return */ info.group_is_started = group->started; info.can_write = false; info.have_disk_in_group = false; adm_group_for_each_disk(group, disk) { if (disk->node_id == adm_my_id) { info.have_disk_in_group = true; if (disk->up_in_vrt) info.can_write = true; } } admwrk_bcast(thr_nb, &rpc, EXAMSG_SERVICE_VRT_SB_SYNC, &info, sizeof(info)); while (admwrk_get_bcast(&rpc, &nid, &reply, sizeof(reply), &ret)) { if (ret == -ADMIND_ERR_NODE_DOWN) { barrier_ret = -ADMIND_ERR_NODE_DOWN; continue; } EXA_ASSERT(ret == EXA_SUCCESS); if (reply.can_write) nb_nodes_with_writable_disks++; if (reply.have_disk_in_group) nb_nodes_with_disks_in_group++; if (reply.group_is_started) group_is_started_somewhere = true; } if (barrier_ret != EXA_SUCCESS) return barrier_ret; /* do not write superblocks if the group is stopped on all nodes */ if (!group_is_started_somewhere) return EXA_SUCCESS; if (nb_nodes_with_writable_disks < quotient_ceil64(nb_nodes_with_disks_in_group, 2)) return -VRT_ERR_GROUP_NOT_ADMINISTRABLE; old_sb_version = sb_version_get_version(group->sb_version); new_sb_version = sb_version_new_version_prepare(group->sb_version); if (group->started) { ret = vrt_client_group_sync_sb(adm_wt_get_localmb(), &group->uuid, old_sb_version, new_sb_version); EXA_ASSERT_VERBOSE(ret == EXA_SUCCESS || ret == -ADMIND_ERR_NODE_DOWN, "Synchronization of superblocks failed for group '%s' " "UUID=" UUID_FMT ": %s (%d)", group->name, UUID_VAL(&group->uuid), exa_error_msg(ret), ret); } else ret = EXA_SUCCESS; barrier_ret = admwrk_barrier(thr_nb, ret, "VRT: Preparing superblocks version"); if (barrier_ret != EXA_SUCCESS) return barrier_ret; sb_version_new_version_done(group->sb_version); barrier_ret = admwrk_barrier(thr_nb, EXA_SUCCESS, "VRT: Writing superblocks version"); /* Commit anyway, If we are here, we are sure that other nodes have done the * job too even if they crashed meanwhile */ sb_version_new_version_commit(group->sb_version); return barrier_ret; }
static void local_exa_vldelete (int thr_nb, void *msg) { struct adm_group *group; struct adm_volume *volume = NULL; int ret; /* used for local function calls */ int barrier_ret; /* used for barriers return values */ int undo_ret; struct vldelete_info *info = msg; group = adm_group_get_group_by_name(info->group_name); if (group == NULL) { ret = -ADMIND_ERR_UNKNOWN_GROUPNAME; goto get_barrier; } volume = adm_group_get_volume_by_name(group, info->volume_name); if (volume == NULL) { ret = -ADMIND_ERR_UNKNOWN_VOLUMENAME; goto get_barrier; } get_barrier: /*** Barrier: getting parameters ***/ ret = EXA_SUCCESS; barrier_ret = admwrk_barrier(thr_nb, ret, "Getting parameters"); if (barrier_ret != EXA_SUCCESS) goto local_exa_vldelete_end_no_resume; ret = vrt_group_suspend_threads_barrier(thr_nb, &group->uuid); if (ret != EXA_SUCCESS) goto local_exa_vldelete_end; /*** Action: mark the transaction as in-progress ***/ /* This is an in-memory operation, we assume it won't fail */ volume->committed = false; ret = conf_save_synchronous(); EXA_ASSERT_VERBOSE(ret == EXA_SUCCESS, "%s", exa_error_msg(ret)); /*** Barrier: mark the transaction as in-progress ***/ barrier_ret = admwrk_barrier(thr_nb, ret, "Marking transaction as in-progress"); if (barrier_ret == -ADMIND_ERR_NODE_DOWN) goto metadata_corruption; else if (barrier_ret != EXA_SUCCESS) goto local_exa_vldelete_end; /* XXX should errors be handled ? */ lum_exports_remove_export_from_uuid(&volume->uuid); lum_exports_increment_version(); lum_serialize_exports(); /*** Action: delete the volume (in memory) through the VRT API ***/ ret = vrt_client_volume_delete(adm_wt_get_localmb(), &group->uuid, &volume->uuid); /*** Barrier: delete the volume through the VRT API ***/ barrier_ret = admwrk_barrier(thr_nb, ret, "Deleting volume"); if (barrier_ret == -ADMIND_ERR_NODE_DOWN) goto metadata_corruption; else if (barrier_ret == -VRT_ERR_GROUP_NOT_ADMINISTRABLE) { /* Mark the transaction as committed, so that the volume is not * shown as "invalid" later. */ volume->committed = true; undo_ret = conf_save_synchronous(); EXA_ASSERT_VERBOSE(undo_ret == EXA_SUCCESS, "%s", exa_error_msg(undo_ret)); goto local_exa_vldelete_end; } else if ((barrier_ret != EXA_SUCCESS) && !info->metadata_recovery) goto local_exa_vldelete_end; /*** Action: group sync SB (master) ***/ ret = adm_vrt_group_sync_sb(thr_nb, group); /*** Barrier: group sync SB ***/ barrier_ret = admwrk_barrier(thr_nb, ret, "Syncing metadata on disk"); if (barrier_ret == -ADMIND_ERR_NODE_DOWN) goto metadata_corruption; else if (barrier_ret != EXA_SUCCESS) goto local_exa_vldelete_end; /* Delete the volume from the configuration */ adm_group_remove_volume(volume); adm_volume_free(volume); ret = conf_save_synchronous(); EXA_ASSERT_VERBOSE(ret == EXA_SUCCESS, "%s", exa_error_msg(ret)); barrier_ret = admwrk_barrier(thr_nb, ret, "Updating XML configuration"); if (barrier_ret == -ADMIND_ERR_NODE_DOWN) goto metadata_corruption; goto local_exa_vldelete_end; metadata_corruption: ret = -ADMIND_ERR_METADATA_CORRUPTION; local_exa_vldelete_end: barrier_ret = vrt_group_resume_threads_barrier(thr_nb, &group->uuid); /* What to do if that fails... I don't know. */ if (barrier_ret != 0) ret = barrier_ret; local_exa_vldelete_end_no_resume: exalog_debug("local_exa_vldelete() = %s", exa_error_msg(ret)); admwrk_ack(thr_nb, ret); }
static void check_internal_msg(void) { struct timeval timeout = { .tv_sec = 0, .tv_usec = EXAMSG_TIMEOUT }; static Examsg msg; command_end_t *end; int i, ret; ret = examsgWaitTimeout(cli_mh, &timeout); if (ret < 0 && ret != -ETIME) { exalog_error("Message wait failed %s (%d)", exa_error_msg(ret), ret); return; } if (ret == -ETIME) return; ret = examsgRecv(cli_mh, NULL, &msg, sizeof(msg)); if (ret == 0) return; EXA_ASSERT_VERBOSE(ret > 0, "Message receive failed: %s (%d)", exa_error_msg(ret), ret); if (ret < 0) exalog_error("Message receive failed: %s (%d)", exa_error_msg(ret), ret); /* The CLI server can only receive EXAMSG_ADM_CLUSTER_CMD_END messages for now */ EXA_ASSERT(msg.any.type == EXAMSG_ADM_CLUSTER_CMD_END); end = (command_end_t *)msg.payload; for (i = 0; i < MAX_CONNECTION; i++) if (end->cuid == connectlist[i].uid) { cli_command_end_complete(connectlist[i].fd, &end->err_desc); connectlist[i].uid = CMD_UID_INVALID; break; } EXA_ASSERT(i < MAX_CONNECTION); } static void check_tcp_connection(void) { static struct timeval timeout = { .tv_sec = 0, .tv_usec = 0 }; fd_set setSave = setSocks; int ret, conn_id; do ret = os_select(FD_SETSIZE, &setSave, NULL, NULL, &timeout); while (ret == -EINTR); if (ret < 0) { /* FIXME should assert ? */ exalog_debug("Select failed %m"); return; } /* Check working sockets */ for (conn_id = 0; conn_id < MAX_CONNECTION; ++conn_id) { int sock_fd = connectlist[conn_id].fd; if (sock_fd >= 0 && FD_ISSET(sock_fd, &setSave)) handle_inputdata(conn_id, sock_fd); } /* Must be done at the end to make sure messages for current * working threads are processed first */ if (FD_ISSET(listen_fd, &setSave)) accept_new_client(); } /*-------------------------------------------------------------------------*/ /** \brief Connection thread: wait on xml message and pass the command * to the work thread. * * \param[in] sock_xml: socket xml on which it receives commands. * */ /*-------------------------------------------------------------------------*/ static void cli_server(void *data) { int i; /* Initialize exalog */ exalog_as(EXAMSG_ADMIND_ID); exalog_debug("cli_server: started"); /* Initialization */ FD_ZERO(&setSocks); FD_SET(listen_fd, &setSocks); for (i = 0; i < MAX_CONNECTION; i++) { connectlist[i].fd = -1; /* A command cannot be CMD_UID_INVALID, so CMD_UID_INVALID means here * no command running */ connectlist[i].uid = CMD_UID_INVALID; } while (!stop) { check_tcp_connection(); check_internal_msg(); } os_closesocket(listen_fd); os_net_cleanup(); examsgDelMbox(cli_mh, EXAMSG_ADMIND_CLISERVER_ID); examsgExit(cli_mh); } int cli_server_start(void) { listen_fd = listen_socket_port(ADMIND_SOCKET_PORT); if (listen_fd < 0) return listen_fd; cli_mh = examsgInit(EXAMSG_ADMIND_CLISERVER_ID); if (!cli_mh) return -EINVAL; /* The mailbox needs to be able to receive command end messages from the * event manager; as there can be at most MAX_CONNECTION client connections * we can receive at the time at most 10 command end messages. */ examsgAddMbox(cli_mh, EXAMSG_ADMIND_CLISERVER_ID, MAX_CONNECTION, sizeof(command_end_t)); stop = false; if (!exathread_create_named(&thr_xml_proto, ADMIND_THREAD_STACK_SIZE+MIN_THREAD_STACK_SIZE, &cli_server, NULL, "exa_adm_xml")) return -EXA_ERR_DEFAULT; return EXA_SUCCESS; }
/** * Process a connection that has incoming data. * * \param[in] conn_id Connection id * \param[in] sock_fd Connection socket */ static void handle_inputdata(int conn_id, int sock_fd) { char *buffer = NULL; void *data = NULL; size_t data_size; adm_command_code_t cmd_code; const struct AdmCommand *command; exa_uuid_t cluster_uuid; cl_error_desc_t err_desc; int retval; /* Receive the xml tree parsed in message */ retval = receive(sock_fd, &buffer); if (retval < 0) { if (retval == -ECONNRESET || retval == -ECONNABORTED) exalog_debug("CONNECTION %d: An error occurred : %d [%s]", conn_id, retval, exa_error_msg(retval)); else exalog_error("Socket %d peer '%s': An error occurred : %s (%d)", sock_fd, cli_peer_from_fd(sock_fd), exa_error_msg(retval), retval); close_connection(conn_id); return; } /* Parse the tree we just received and get a newly allocated payload data */ xml_command_parse(buffer, &cmd_code, &cluster_uuid, &data, &data_size, &err_desc); /* buffer is now parsed, let's free it */ os_free(buffer); if (err_desc.code != EXA_SUCCESS) { /* No need to free data buffer if parsing returned an error */ exalog_error("Failed to parse command on socket %d (from peer '%s'): %s (%d)", sock_fd, cli_peer_from_fd(sock_fd), err_desc.msg, err_desc.code); cli_command_end_complete(sock_fd, &err_desc); return; } command = adm_command_find(cmd_code); EXA_ASSERT_VERBOSE(command, "Missing implementation of command #%d", cmd_code); connectlist[conn_id].uid = get_new_cmd_uid(); retval = send_command_to_evmgr(connectlist[conn_id].uid, command, &cluster_uuid, data, data_size); if (retval < 0) { if (retval == -EXA_ERR_ADM_BUSY) exalog_warning("Running command %s (request from %s) failed: %s", adm_command_find(cmd_code)->msg, cli_get_peername(connectlist[conn_id].uid), exa_error_msg(retval)); else exalog_error("Running command %s (request from %s) failed: %s (%d)", adm_command_find(cmd_code)->msg, cli_get_peername(connectlist[conn_id].uid), exa_error_msg(retval), retval); set_error(&err_desc, retval, NULL); cli_command_end_complete(sock_fd, &err_desc); /* the command was not scheduled, reset the uid */ connectlist[conn_id].uid = CMD_UID_INVALID; } os_free(data); }
void rebuild_helper_thread(void *p) { ExamsgHandle mh; int err; exalog_as(EXAMSG_NBD_SERVER_ID); /* initialize examsg framework */ mh = examsgInit(EXAMSG_NBD_LOCKING_ID); EXA_ASSERT(mh != NULL); err = examsgAddMbox(mh, EXAMSG_NBD_LOCKING_ID, 1, 5 * EXAMSG_MSG_MAX); EXA_ASSERT(err == 0); os_sem_post(&nbd_server.mailbox_sem); while (nbd_server.run) { device_t *device; ExamsgNbdLock nbd_lock_msg; ExamsgMID from; struct timeval timeout = { .tv_sec = 0, .tv_usec = 100000 }; exa_nodeset_t dest_nodes; err = examsgWaitTimeout(mh, &timeout); /* Just in order to check stopping the thread is required*/ if (err == -ETIME) continue; if (err != 0) { exalog_error("Locking thread encountered error %s (%d) while " "waiting in event loop.", exa_error_msg(err), err); continue; } err = examsgRecv(mh, &from, &nbd_lock_msg, sizeof(nbd_lock_msg)); /* No message */ if (err == 0) continue; if (err < 0) { exalog_error("Locking thread encountered error %s (%d) while " "receiving a messsage.", exa_error_msg(err), err); continue; } switch(nbd_lock_msg.any.type) { case EXAMSG_NBD_LOCK: /* find device from name */ /* FIXME devices lock is not held... it should */ device = find_device_from_uuid(&nbd_lock_msg.disk_uuid); if (device == NULL) { exalog_error("Unknown device with UUID " UUID_FMT, UUID_VAL(&nbd_lock_msg.disk_uuid)); err = -CMD_EXP_ERR_UNKNOWN_DEVICE; break; } if (nbd_lock_msg.lock) { err = exa_disk_lock_zone(device, nbd_lock_msg.locked_zone_start, nbd_lock_msg.locked_zone_size); EXA_ASSERT_VERBOSE(err == 0, "Trying to lock too many zone " "(>%d). Last zone not succesfully locked " "(start = %" PRId64 ", size = %" PRId64 " ) " "on device UUID " UUID_FMT, NBMAX_DISK_LOCKED_ZONES, nbd_lock_msg.locked_zone_start, nbd_lock_msg.locked_zone_size, UUID_VAL(&nbd_lock_msg.disk_uuid)); } else { err = exa_disk_unlock_zone(device, nbd_lock_msg.locked_zone_start, nbd_lock_msg.locked_zone_size); EXA_ASSERT_VERBOSE(err == 0, "Trying to unlock a never locked " "zone (unlocked zone start =%" PRId64 ", " "unlocked zone size = %" PRId64 ") on device" " UUID " UUID_FMT, nbd_lock_msg.locked_zone_start, nbd_lock_msg.locked_zone_size, UUID_VAL(&nbd_lock_msg.disk_uuid)); } break; default: /* error */ EXA_ASSERT_VERBOSE(false, "Locking thread got unknown message of" " type %d ", nbd_lock_msg.any.type); break; } exa_nodeset_single(&dest_nodes, from.netid.node); examsgAckReply(mh, (Examsg *)&nbd_lock_msg, err, from.id, &dest_nodes); } examsgDelMbox(mh, EXAMSG_NBD_LOCKING_ID); examsgExit(mh); } /** get the number of sector of the device * \param device_path the device to get the number of sector * \param nb_sectors64 the number of sectors of the device * \return nb_sectors the returned number of sector */ static int get_nb_sectors(const char *device_path, uint64_t *nb_sectors) { uint64_t device_size; /* in bytes */ int retval; int fd; /* We need the read access to get the size. */ if ((fd = os_disk_open_raw(device_path, OS_DISK_READ)) < 0) { exalog_error("cannot open device '%s' error=%s ", device_path, exa_error_msg(-fd)); return -CMD_EXP_ERR_OPEN_DEVICE; } retval = os_disk_get_size(fd, &device_size); if (retval < 0) { exalog_error("os_disk_get_size() error=%s", exa_error_msg(retval)); if (close(fd) != 0) exalog_error("can't EVEN close dev '%s'", device_path); return -EXA_ERR_IOCTL; } retval = close(fd); if (retval < 0) { retval = -errno; exalog_error("cannot close device '%s' error=%s ", device_path, exa_error_msg(retval)); return -CMD_EXP_ERR_CLOSE_DEVICE; } *nb_sectors = device_size / SECTOR_SIZE; /* remove the size of the reserved area for storing admind info */ *nb_sectors -= RDEV_RESERVED_AREA_IN_SECTORS; /* Align the size on 1K * this is the best we can do to have the same size of devices on 2.4 and 2.6 kernels due to * the fact that kernel 2.4 rounds the size of devices with 1 K */ *nb_sectors -= *nb_sectors % (1024 / SECTOR_SIZE); return EXA_SUCCESS; }
/** * Thread processing network events (coming from other nodes). * * This routine may set the network status to down as a side-effect * of calling network_recv(), and sets said status to up when the * network comes back. * * \param[in] dummy Unused * * \return NULL */ static void net_events_routine(void *dummy) { int dest_mbox; ExamsgMID mid; size_t size; char *msg; int s; exalog_as(EXAMSG_CMSGD_ID); exalog_trace("network events routine started"); while (!quit) { int status = network_status(); bool retry; if (status == -ENETDOWN) { network_waitup(); network_set_status(0); } do { s = network_recv(net_mh, &mid, &msg, &size, &dest_mbox); retry = (s < 0 && network_manageable(s) && s != -ENETDOWN); if (retry) os_sleep(1); } while (retry); /* Succeeded, the network status is ok */ if (s > 0 && status != 0) network_set_status(0); if (s == 0 || s == -ENETDOWN) continue; EXA_ASSERT(s > 0); /* Ping from another node for keepalive */ if (((ExamsgAny *)msg)->type == EXAMSG_PING) { EXA_ASSERT(dest_mbox == EXAMSG_CMSGD_ID); exalog_trace("received an EXAMSG_PING from %u:%s", mid.netid.node, mid.host); continue; } exalog_trace("delivering %" PRIzu " bytes to %d", size, dest_mbox); s = examsgMboxSend(&mid, examsgOwner(net_mh), dest_mbox, msg, size); switch (s) { case -ENXIO: /* The mailbox does not exist (yet). This is not an error: csupd may * not be started yet and we receive an examsg for it. * XXX Doesn't sound too good to me, and we should at least check that * the destination is indeed csupd */ break; case -ENOSPC: mailbox_full(dest_mbox, &mid, (Examsg *)msg); break; default: EXA_ASSERT_VERBOSE(s == size + sizeof(mid), "Error %d delivering message to %d", s, dest_mbox); break; } } }
static void persistent_register_lun(pr_context_t *context, int session_id, lun_t lun, pr_key_t service_action_key, scsi_command_status_t *scsi_status) { int id; bool remove_registration = true; pr_info_t *pr_info = &context->pr_info[lun]; EXA_ASSERT(session_id < MAX_GLOBAL_SESSION); exalog_debug("iSCSI PR: session %i register to LUN %" PRIlun " with key %" PRIu64, session_id, lun, service_action_key); SCSI_STATUS_OK(scsi_status, 0); if (service_action_key != 0) { pr_registration_t *registration = pr_info_add_registration(pr_info, session_id, service_action_key); /* FIXME: manage the error */ EXA_ASSERT_VERBOSE(registration != NULL, "No more free registration for LUN %" PRIlun ", (session=%i key=%" PRIu64 ")", lun, session_id, service_action_key); return; } /* spc3r23 5.6.10.3 removing lun reservation */ if (is_lun_reserved(context, lun)) { if (pr_info->reservation_type == PR_TYPE_EXCLUSIVE_ACCESS_ALL_REGISTRANTS || pr_info->reservation_type == PR_TYPE_WRITE_EXCLUSIVE_ALL_REGISTRANTS) { unsigned int i; /* if we are not a holder, don't remove reservation */ remove_registration = persistent_is_holder(context, session_id, lun); for (i = 0; i < MAX_REGISTRATIONS; i++) { id = pr_info->registrations[i].session_id; if (id != SESSION_ID_NONE && id != session_id && context->session_id_used[id]) { /* this registered nexus was not the last one */ remove_registration = false; } } } else { remove_registration = false; if (persistent_is_holder(context, session_id, lun)) remove_registration = true; if (pr_info->reservation_type == PR_TYPE_WRITE_EXCLUSIVE_REGISTRANTS_ONLY || pr_info->reservation_type == PR_TYPE_EXCLUSIVE_ACCESS_REGISTRANTS_ONLY) { unsigned int i; for (i = 0; i < MAX_REGISTRATIONS; i++) { id = pr_info->registrations[i].session_id; if (id != SESSION_ID_NONE && context->session_id_used[id] && id != session_id) { callback_send_sense_data(context, id, lun, SCSI_STATUS_CHECK_CONDITION, SCSI_SENSE_UNIT_ATTENTION, SCSI_SENSE_ASC_RESERVATIONS_RELEASED); } } } } if (remove_registration) { pr_info->reservation_type = PR_TYPE_NONE; pr_info_clear_registrations(pr_info); } } pr_info_del_registration(pr_info, session_id); }