/* Translate_ids will return 1 if it did some uid/gid squashing, 0 otherwise */ static int translate_ids(PVFS_fs_id fsid, PVFS_uid uid, PVFS_gid gid, PVFS_uid *translated_uid, PVFS_gid *translated_gid, PVFS_BMI_addr_t client_addr) { int exp_flags = 0; struct server_configuration_s *serv_config = NULL; struct filesystem_configuration_s * fsconfig = NULL; serv_config = PINT_get_server_config(); fsconfig = PINT_config_find_fs_id(serv_config, fsid); if (fsconfig == NULL) { return 0; } exp_flags = fsconfig->exp_flags; /* If all squash was set */ if (exp_flags & TROVE_EXP_ALL_SQUASH) { if (iterate_all_squash_wildcards(fsconfig, client_addr) == 1) { get_anon_ids(fsconfig, translated_uid, translated_gid); gossip_debug(GOSSIP_SERVER_DEBUG, "Translated ids from <%u:%u> to <%u:%u>\n", uid, gid, *translated_uid, *translated_gid); return 1; } } /* if only root squash was set translate uids for root alone*/ if (exp_flags & TROVE_EXP_ROOT_SQUASH) { if (uid == 0 || gid == 0) { if (iterate_root_squash_wildcards(fsconfig, client_addr) == 1) { get_anon_ids(fsconfig, translated_uid, translated_gid); gossip_debug(GOSSIP_SERVER_DEBUG, "Translated ids from <%u:%u> to <%u:%u>\n", uid, gid, *translated_uid, *translated_gid); return 1; } } } /* no such translation required! */ *translated_uid = uid; *translated_gid = gid; return 0; }
/* * Return zero if this operation should be allowed. */ static int permit_operation(PVFS_fs_id fsid, enum PINT_server_req_access_type access_type, PVFS_BMI_addr_t client_addr) { int exp_flags = 0; struct server_configuration_s *serv_config = NULL; struct filesystem_configuration_s * fsconfig = NULL; if (access_type == PINT_SERVER_REQ_READONLY) { return 0; /* anything that doesn't modify state is okay */ } serv_config = PINT_get_server_config(); fsconfig = PINT_config_find_fs_id(serv_config, fsid); if (fsconfig == NULL) { return 0; } exp_flags = fsconfig->exp_flags; /* cheap test to see if ReadOnly was even specified in the exportoptions */ if (!(exp_flags & TROVE_EXP_READ_ONLY)) { return 0; } /* Drat. Iterate thru the list of wildcards specified in server_configuration and see * the client address matches. if yes, then we deny permission */ if (iterate_ro_wildcards(fsconfig, client_addr) == 1) { gossip_debug(GOSSIP_SERVER_DEBUG, "Disallowing read-write operation on a read-only exported file-system\n"); return -EROFS; } return 0; }
static PINT_sm_action small_io_start_job( struct PINT_smcb *smcb, job_status_s *js_p) { struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); int ret; job_id_t tmp_id; PINT_Request_state * file_req_state; PINT_request_file_data fdata; PINT_Request_result result; struct filesystem_configuration_s * fs_config; struct server_configuration_s * server_config; memset(&s_op->resp.u.small_io, 0, sizeof(struct PVFS_servresp_small_io)); /* set io type in response to io type in request. This is * needed by the client so it konws how to decode the response * appropriately. */ s_op->resp.u.small_io.io_type = s_op->req->u.small_io.io_type; if(s_op->req->u.small_io.io_type == PVFS_IO_READ && s_op->ds_attr.u.datafile.b_size == 0) { /* nothing to read. return SM_ACTION_DEFERRED */ js_p->error_code = 0; return SM_ACTION_COMPLETE; } file_req_state = PINT_new_request_state( s_op->req->u.small_io.file_req); fdata.server_nr = s_op->req->u.small_io.server_nr; fdata.server_ct = s_op->req->u.small_io.server_ct; fdata.dist = s_op->req->u.small_io.dist; result.offset_array = s_op->u.small_io.offsets; result.size_array = s_op->u.small_io.sizes; result.segmax = IO_MAX_REGIONS; result.bytemax = s_op->req->u.small_io.aggregate_size; result.bytes = 0; result.segs = 0; PINT_REQUEST_STATE_SET_TARGET(file_req_state, s_op->req->u.small_io.file_req_offset); PINT_REQUEST_STATE_SET_FINAL(file_req_state, s_op->req->u.small_io.file_req_offset + s_op->req->u.small_io.aggregate_size); s_op->resp.u.small_io.bstream_size = s_op->ds_attr.u.datafile.b_size; fdata.fsize = s_op->ds_attr.u.datafile.b_size; fdata.extend_flag = (s_op->req->u.small_io.io_type == PVFS_IO_READ) ? 0 : 1; /* calculate the offsets and sizes in the datafile for the read or write */ ret = PINT_process_request( file_req_state, NULL, &fdata, &result, PINT_SERVER); if(ret < 0) { gossip_err("small_io: Failed to process file request\n"); js_p->error_code = ret; return SM_ACTION_COMPLETE; } /* figure out if the fs config has trove data sync turned on or off */ server_config = get_server_config_struct(); if(!server_config) { gossip_err("small_io: server config is NULL!\n"); js_p->error_code = -PVFS_EINVAL; return SM_ACTION_COMPLETE; } fs_config = PINT_config_find_fs_id( server_config, s_op->req->u.small_io.fs_id); if(!fs_config) { gossip_err("small_io: Failed to get filesystem " "config from fs_id of: %d\n", s_op->req->u.small_io.fs_id); js_p->error_code = -PVFS_EINVAL; return SM_ACTION_COMPLETE; } if(s_op->req->u.small_io.io_type == PVFS_IO_WRITE) { ret = job_trove_bstream_write_list( s_op->req->u.small_io.fs_id, s_op->req->u.small_io.handle, (char **)&s_op->req->u.small_io.buffer, (TROVE_size *)&s_op->req->u.small_io.total_bytes, 1, s_op->u.small_io.offsets, s_op->u.small_io.sizes, result.segs, &s_op->resp.u.small_io.result_size, (fs_config->trove_sync_data ? TROVE_SYNC : 0), NULL, smcb, 0, js_p, &tmp_id, server_job_context, s_op->req->hints); if(ret < 0) { gossip_err("small_io: Failed to post trove bstream write\n"); } } else { /* allocate space for the read in the response buffer */ s_op->resp.u.small_io.buffer = BMI_memalloc( s_op->addr, result.bytes, BMI_SEND); if(!s_op->resp.u.small_io.buffer) { js_p->error_code = -PVFS_ENOMEM; return SM_ACTION_COMPLETE; } s_op->u.small_io.result_bytes = result.bytes; ret = job_trove_bstream_read_list( s_op->req->u.small_io.fs_id, s_op->req->u.small_io.handle, (char **)&s_op->resp.u.small_io.buffer, &s_op->u.small_io.result_bytes, 1, s_op->u.small_io.offsets, s_op->u.small_io.sizes, result.segs, &s_op->resp.u.small_io.result_size, (fs_config->trove_sync_data ? TROVE_SYNC : 0), NULL, smcb, 0, js_p, &tmp_id, server_job_context, s_op->req->hints); if(ret < 0) { gossip_err("small-io: Failed to post trove bstream read\n"); js_p->error_code = ret; return SM_ACTION_COMPLETE; } } PINT_free_request_state(file_req_state); return ret; }
int main(int argc, char **argv) { int ret = -1; /* all parameters read in from fs.conf */ struct server_configuration_s server_config; PINT_llist_p fs_configs; char *server_alias; /* make sure that the buffers we intend to use for reading keys and * values is at least large enough to hold the maximum size of xattr keys * and values */ if(DEF_KEY_SIZE < PVFS_REQ_LIMIT_KEY_LEN) { DEF_KEY_SIZE = PVFS_REQ_LIMIT_KEY_LEN; } if(DEF_DATA_SIZE < PVFS_REQ_LIMIT_VAL_LEN) { DEF_DATA_SIZE = PVFS_REQ_LIMIT_VAL_LEN; } if (parse_args(argc, argv, &opts)) { fprintf(stderr,"%s: error: argument parsing failed.\n", argv[0]); return -1; } if(opts.alias_set) { server_alias = opts.alias; } else { server_alias = PINT_util_guess_alias(); } ret = PINT_parse_config(&server_config, opts.fs_conf, server_alias); if(ret < 0) { gossip_err("Error: Please check your config files.\n"); if(!opts.alias_set) { free(server_alias); } return -1; } if(!opts.alias_set) { free(server_alias); } if(opts.all_set) { /* get all the collection ids from the fs config */ fs_configs = PINT_config_get_filesystems(&server_config); } else { /* get the collection id from the specified fs name */ PVFS_fs_id fs_id = PINT_config_get_fs_id_by_fs_name( &server_config, opts.fs); fs_configs = PINT_llist_new(); PINT_llist_add_to_head( fs_configs, (void *)PINT_config_find_fs_id(&server_config, fs_id)); } ret = PINT_llist_doall_arg(fs_configs, migrate_collection, &server_config); if(ret < 0) { PINT_config_release(&server_config); if(!opts.all_set) { PINT_llist_free(fs_configs, fs_config_dummy_free); } return(-1); } return 0; }
/* * Function: io_start_flow() * * Params: server_op *s_op, * job_status_s* js_p * * Pre: all of the previous steps have succeeded, so that we * are ready to actually perform the I/O * * Post: I/O has been carried out * * Returns: int * * Synopsis: this is the most important part of the state machine. * we setup the flow descriptor and post it in order to * carry out the data transfer * */ static PINT_sm_action io_start_flow( struct PINT_smcb *smcb, job_status_s *js_p) { struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); int err = -PVFS_EIO; job_id_t tmp_id; struct server_configuration_s *user_opts = get_server_config_struct(); struct filesystem_configuration_s *fs_conf; // gossip_debug(GOSSIP_LB_DEBUG, "\n\n Receive mig request: set mig_state = %d\n\n", mig_state); gossip_debug(GOSSIP_SERVER_DEBUG, "IO start flow\n"); s_op->u.migposter.flow_desc = PINT_flow_alloc(); if (!s_op->u.migposter.flow_desc) { js_p->error_code = -PVFS_ENOMEM; return SM_ACTION_COMPLETE; } /* we still have the file size stored in the response structure * that we sent in the previous state, other details come from * request */ s_op->u.migposter.flow_desc->file_data.fsize = 0; s_op->u.migposter.flow_desc->file_data.dist = PINT_dist_create("simple_stripe"); s_op->u.migposter.flow_desc->file_data.server_nr = 0; s_op->u.migposter.flow_desc->file_data.server_ct = 1; /* on writes, we allow the bstream to be extended at EOF */ gossip_debug(GOSSIP_SERVER_DEBUG, "io_start_flow() issuing flow to " "write data.\n"); s_op->u.migposter.flow_desc->file_data.extend_flag = 1; s_op->u.migposter.flow_desc->file_req = PVFS_BYTE; s_op->u.migposter.flow_desc->file_req_offset = 0; s_op->u.migposter.flow_desc->mem_req = NULL; s_op->u.migposter.flow_desc->aggregate_size = s_op->req->u.migposter.dfsize; s_op->u.migposter.flow_desc->tag = 0; s_op->u.migposter.flow_desc->user_ptr = NULL; s_op->u.migposter.flow_desc->type = FLOWPROTO_MULTIQUEUE; fs_conf = PINT_config_find_fs_id(user_opts, s_op->req->u.io.fs_id); if(fs_conf) { /* pick up any buffer settings overrides from fs conf */ s_op->u.migposter.flow_desc->buffer_size = fs_conf->fp_buffer_size; s_op->u.migposter.flow_desc->buffers_per_flow = fs_conf->fp_buffers_per_flow; } gossip_debug(GOSSIP_SERVER_DEBUG, "flow: fsize: %lld, " "server_nr: %d, server_ct: %d\n", lld(s_op->u.migposter.flow_desc->file_data.fsize), (int)s_op->u.migposter.flow_desc->file_data.server_nr, (int)s_op->u.migposter.flow_desc->file_data.server_ct); gossip_debug(GOSSIP_SERVER_DEBUG, "file_req_offset: %lld," "aggregate_size: %lld, handle: %llu\n", lld(s_op->u.migposter.flow_desc->file_req_offset), lld(s_op->u.migposter.flow_desc->aggregate_size), llu(s_op->resp.u.migposter.handle)); /* set endpoints depending on type of io requested */ s_op->u.migposter.flow_desc->src.endpoint_id = BMI_ENDPOINT; s_op->u.migposter.flow_desc->src.u.bmi.address = s_op->addr; s_op->u.migposter.flow_desc->dest.endpoint_id = TROVE_ENDPOINT; s_op->u.migposter.flow_desc->dest.u.trove.handle = s_op->resp.u.migposter.handle; s_op->u.migposter.flow_desc->dest.u.trove.coll_id = s_op->req->u.migposter.fs_id; err = job_flow(s_op->u.migposter.flow_desc, smcb, 0, js_p, &tmp_id, server_job_context, user_opts->server_job_flow_timeout, NULL); return err; }
/* given mount information, retrieve the server's configuration by issuing a getconfig operation. on successful response, we parse the configuration and fill in the config object specified. returns 0 on success, -errno on error */ int PVFS_mgmt_get_config( const PVFS_fs_id * fsid, PVFS_BMI_addr_t * addr, char *fs_buf, int fs_buf_size) { int ret = -PVFS_EINVAL; PINT_smcb *smcb = NULL; PINT_client_sm *sm_p = NULL; PVFS_error error = 0; PVFS_credentials creds; struct filesystem_configuration_s *cur_fs = NULL; PVFS_sys_op_id op_id; struct server_configuration_s *config = NULL; struct PVFS_sys_mntent mntent; int server_type = 0; gossip_debug(GOSSIP_CLIENT_DEBUG, "PVFS_mgmt_get_config entered\n"); PVFS_util_gen_credentials(&creds); PINT_smcb_alloc(&smcb, PVFS_SERVER_GET_CONFIG, sizeof(struct PINT_client_sm), client_op_state_get_machine, client_state_machine_terminate, pint_client_sm_context); if(smcb == NULL) { return -PVFS_ENOMEM; } sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); sm_p->u.get_config.persist_config_buffers = 1; PINT_init_msgarray_params(sm_p, *fsid); PINT_init_sysint_credentials(sm_p->cred_p, &creds); config = PINT_get_server_config_struct(*fsid); mntent.the_pvfs_config_server = (char*)PINT_cached_config_map_addr(*fsid, *addr, &server_type); PINT_put_server_config_struct(config); cur_fs = PINT_config_find_fs_id(config, *fsid); mntent.encoding = cur_fs->encoding; mntent.flowproto = cur_fs->flowproto; mntent.fs_id = *fsid; mntent.pvfs_fs_name = cur_fs->file_system_name; sm_p->u.get_config.config = config; sm_p->msgarray_op.msgpair.enc_type = cur_fs->encoding; sm_p->u.get_config.mntent = &mntent; PINT_msgpair_init(&sm_p->msgarray_op); ret = PINT_client_state_machine_post( smcb, &op_id, NULL); if (ret) { PVFS_perror_gossip("PINT_client_state_machine_post call", ret); error = ret; } else { ret = PVFS_mgmt_wait(op_id, "X-get_config", &error); if (ret) { PVFS_perror_gossip("PVFS_mgmt_wait call", ret); error = ret; } } if (error) { goto exit_path; } gossip_debug(GOSSIP_CLIENT_DEBUG, "PVFS_mgmt_get_config completed\n"); /* make sure strings will be null terminated after strncpy */ fs_buf[fs_buf_size-1] = '\0'; /* The following copies the retrieved configuration buffers into the return buffers */ strncpy(fs_buf, sm_p->u.get_config.fs_config_buf, (fs_buf_size - 1)); exit_path: if (sm_p && sm_p->u.get_config.persist_config_buffers) { free(sm_p->u.get_config.fs_config_buf); sm_p->u.get_config.fs_config_buf = NULL; } PINT_mgmt_release(op_id); return error; }
/* msgpairarray_post() * * The following elements of the PINT_sm_msgpair_state * should be valid prior to this state (for each msgpair in array): * - req (unencoded request) * - srv_addr of each element in msg array * * This state performs the following operations for each msgpair, * one at a time: * (1) encodes request * (2) calculates maximum response size * (3) allocates BMI memory for response data (encoded) * (4) gets a session tag for the pair of messages * (5) posts the receive of the response * (6) posts the send of the request * (7) stores job ids for later matching * */ static PINT_sm_action msgpairarray_post( struct PINT_smcb *smcb, job_status_s *js_p) { PINT_sm_msgarray_op *mop = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); int ret = -PVFS_EINVAL, i = 0, tmp = 0; struct server_configuration_s *server_config = NULL; PVFS_msg_tag_t session_tag; PINT_sm_msgpair_state *msg_p = NULL; struct filesystem_configuration_s *cur_fs = NULL; int must_loop_encodings = 0; int local_enc_and_alloc = 0; gossip_debug( GOSSIP_MSGPAIR_DEBUG, "%s: sm %p " "%d total message(s) with %d incomplete\n", __func__, smcb, mop->count * 2, mop->params.comp_ct); js_p->error_code = 0; assert(mop->count > 0); assert(mop->params.comp_ct >= 2); for (i = 0; i < mop->count; i++) { msg_p = &mop->msgarray[i]; assert(msg_p); /* here we skip over the msgs that have already completed in the case of being in the retry code path when it's ok */ if (msg_p->complete) { continue; } msg_p->op_status = 0; if (msg_p->encoded_resp_p == NULL) { if (msg_p->fs_id != PVFS_FS_ID_NULL) { server_config = PINT_server_config_mgr_get_config( msg_p->fs_id); assert(server_config); cur_fs = PINT_config_find_fs_id( server_config, msg_p->fs_id); PINT_server_config_mgr_put_config(server_config); assert(cur_fs); msg_p->enc_type = cur_fs->encoding; } if (!ENCODING_IS_VALID(msg_p->enc_type)) { PRINT_ENCODING_ERROR("supported", msg_p->enc_type); must_loop_encodings = 1; msg_p->enc_type = (ENCODING_INVALID_MIN + 1); } else if (!ENCODING_IS_SUPPORTED(msg_p->enc_type)) { PRINT_ENCODING_ERROR("supported", msg_p->enc_type); must_loop_encodings = 1; msg_p->enc_type = ENCODING_SUPPORTED_MIN; } try_next_encoding: assert(ENCODING_IS_VALID(msg_p->enc_type)); ret = PINT_encode(&msg_p->req, PINT_ENCODE_REQ, &msg_p->encoded_req, msg_p->svr_addr, msg_p->enc_type); if (ret != 0) { if (must_loop_encodings) { gossip_debug(GOSSIP_MSGPAIR_DEBUG, "Looping through " "encodings [%d/%d]\n", msg_p->enc_type, ENCODING_INVALID_MAX); msg_p->enc_type++; if (ENCODING_IS_VALID(msg_p->enc_type)) { goto try_next_encoding; } } gossip_lerr("msgpairarray_post: PINT_encode failed\n"); js_p->error_code = ret; return SM_ACTION_COMPLETE; } /* calculate max response msg size and allocate space */ msg_p->max_resp_sz = PINT_encode_calc_max_size( PINT_ENCODE_RESP, msg_p->req.op, msg_p->enc_type); msg_p->encoded_resp_p = BMI_memalloc( msg_p->svr_addr, msg_p->max_resp_sz, BMI_RECV); if (msg_p->encoded_resp_p == NULL) { js_p->error_code = -PVFS_ENOMEM; return SM_ACTION_COMPLETE; } local_enc_and_alloc = 1; } session_tag = PINT_util_get_next_tag(); gossip_debug(GOSSIP_MSGPAIR_DEBUG, "%s: sm %p msgpair %d: " "posting recv\n", __func__, smcb, i); /* post receive of response; job_id stored in recv_id */ ret = job_bmi_recv(msg_p->svr_addr, msg_p->encoded_resp_p, msg_p->max_resp_sz, session_tag, BMI_PRE_ALLOC, smcb, i, &msg_p->recv_status, &msg_p->recv_id, mop->params.job_context, mop->params.job_timeout, msg_p->req.hints); if (ret == 0) { /* perform a quick test to see if the recv failed before posting * the send; if it reports an error quickly then we can save the * confusion of sending a request for which we can't recv a * response */ ret = job_test(msg_p->recv_id, &tmp, NULL, &msg_p->recv_status, 0, mop->params.job_context); } if ((ret < 0) || (ret == 1)) { /* it is impossible for this recv to complete at this point * without errors; we haven't sent the request yet! */ assert(ret < 0 || msg_p->recv_status.error_code != 0); if (ret < 0) { PVFS_perror_gossip("Post of receive failed", ret); } else { PVFS_perror_gossip("Receive immediately failed", msg_p->recv_status.error_code); } msg_p->recv_id = 0; msg_p->send_id = 0; /* mark send as bad too and don't post it */ msg_p->send_status.error_code = msg_p->recv_status.error_code; msg_p->op_status = msg_p->recv_status.error_code; mop->params.comp_ct -= 2; if (local_enc_and_alloc) { PINT_encode_release(&msg_p->encoded_req, PINT_ENCODE_REQ); BMI_memfree(msg_p->svr_addr,msg_p->encoded_resp_p, msg_p->max_resp_sz, BMI_RECV); msg_p->encoded_resp_p = NULL; local_enc_and_alloc = 0; } /* continue to send other array entries if possible */ continue; } /* if we reach here, the recv has been posted without failure, but * has not completed yet */ assert(ret == 0); gossip_debug(GOSSIP_MSGPAIR_DEBUG, "%s: sm %p msgpair %d: " "posting send\n", __func__, smcb, i); /* post send of request; job_id stored in send_id */ ret = job_bmi_send_list(msg_p->encoded_req.dest, msg_p->encoded_req.buffer_list, msg_p->encoded_req.size_list, msg_p->encoded_req.list_count, msg_p->encoded_req.total_size, session_tag, msg_p->encoded_req.buffer_type, 1, smcb, mop->count+i, &msg_p->send_status, &msg_p->send_id, mop->params.job_context, mop->params.job_timeout, msg_p->req.hints); if ((ret < 0) || ((ret == 1) && (msg_p->send_status.error_code != 0))) { if (ret < 0) { PVFS_perror_gossip("Post of send failed", ret); } else { PVFS_perror_gossip("Send immediately failed", msg_p->send_status.error_code); } gossip_err_unless_quiet("Send error: cancelling recv.\n"); job_bmi_cancel(msg_p->recv_id, mop->params.job_context); /* we still have to wait for recv completion, so just decrement * comp_ct by one and keep going */ msg_p->op_status = msg_p->send_status.error_code; msg_p->send_id = 0; mop->params.comp_ct--; } else if (ret == 1) { /* immediate completion */ msg_p->send_id = 0; /* decrement our count, since send is already done. */ mop->params.comp_ct--; } /* else: successful post, no immediate completion */ } if (mop->params.comp_ct == 0) { /* everything is completed already (could happen in some failure * cases); jump straight to final completion function. */ js_p->error_code = MSGPAIRS_COMPLETE; return SM_ACTION_COMPLETE; } /* we are still waiting on operations to complete, next state * transition will handle them */ return SM_ACTION_DEFERRED; }