/** Set up a mgc obd to process startup logs * * \param sb [in] super block of the mgc obd * * \retval 0 success, otherwise error code */ int lustre_start_mgc(struct super_block *sb) { struct obd_connect_data *data = NULL; struct lustre_sb_info *lsi = s2lsi(sb); struct obd_device *obd; struct obd_export *exp; struct obd_uuid *uuid; class_uuid_t uuidc; lnet_nid_t nid; char nidstr[LNET_NIDSTR_SIZE]; char *mgcname = NULL, *niduuid = NULL, *mgssec = NULL; char *ptr; int rc = 0, i = 0, j; LASSERT(lsi->lsi_lmd); /* Use nids from mount line: uml1,1@elan:uml2,2@elan:/lustre */ ptr = lsi->lsi_lmd->lmd_dev; if (class_parse_nid(ptr, &nid, &ptr) == 0) i++; if (i == 0) { CERROR("No valid MGS nids found.\n"); return -EINVAL; } mutex_lock(&mgc_start_lock); libcfs_nid2str_r(nid, nidstr, sizeof(nidstr)); mgcname = kasprintf(GFP_NOFS, "%s%s", LUSTRE_MGC_OBDNAME, nidstr); niduuid = kasprintf(GFP_NOFS, "%s_%x", mgcname, i); if (!mgcname || !niduuid) { rc = -ENOMEM; goto out_free; } mgssec = lsi->lsi_lmd->lmd_mgssec ? lsi->lsi_lmd->lmd_mgssec : ""; data = kzalloc(sizeof(*data), GFP_NOFS); if (!data) { rc = -ENOMEM; goto out_free; } obd = class_name2obd(mgcname); if (obd && !obd->obd_stopping) { int recov_bk; rc = obd_set_info_async(NULL, obd->obd_self_export, strlen(KEY_MGSSEC), KEY_MGSSEC, strlen(mgssec), mgssec, NULL); if (rc) goto out_free; /* Re-using an existing MGC */ atomic_inc(&obd->u.cli.cl_mgc_refcount); /* IR compatibility check, only for clients */ if (lmd_is_client(lsi->lsi_lmd)) { int has_ir; int vallen = sizeof(*data); __u32 *flags = &lsi->lsi_lmd->lmd_flags; rc = obd_get_info(NULL, obd->obd_self_export, strlen(KEY_CONN_DATA), KEY_CONN_DATA, &vallen, data, NULL); LASSERT(rc == 0); has_ir = OCD_HAS_FLAG(data, IMP_RECOV); if (has_ir ^ !(*flags & LMD_FLG_NOIR)) { /* LMD_FLG_NOIR is for test purpose only */ LCONSOLE_WARN( "Trying to mount a client with IR setting not compatible with current mgc. Force to use current mgc setting that is IR %s.\n", has_ir ? "enabled" : "disabled"); if (has_ir) *flags &= ~LMD_FLG_NOIR; else *flags |= LMD_FLG_NOIR; } } recov_bk = 0; /* Try all connections, but only once (again). We don't want to block another target from starting (using its local copy of the log), but we do want to connect if at all possible. */ recov_bk++; CDEBUG(D_MOUNT, "%s: Set MGC reconnect %d\n", mgcname, recov_bk); rc = obd_set_info_async(NULL, obd->obd_self_export, sizeof(KEY_INIT_RECOV_BACKUP), KEY_INIT_RECOV_BACKUP, sizeof(recov_bk), &recov_bk, NULL); rc = 0; goto out; } CDEBUG(D_MOUNT, "Start MGC '%s'\n", mgcname); /* Add the primary nids for the MGS */ i = 0; /* Use nids from mount line: uml1,1@elan:uml2,2@elan:/lustre */ ptr = lsi->lsi_lmd->lmd_dev; while (class_parse_nid(ptr, &nid, &ptr) == 0) { rc = do_lcfg(mgcname, nid, LCFG_ADD_UUID, niduuid, NULL, NULL, NULL); i++; /* Stop at the first failover nid */ if (*ptr == ':') break; } if (i == 0) { CERROR("No valid MGS nids found.\n"); rc = -EINVAL; goto out_free; } lsi->lsi_lmd->lmd_mgs_failnodes = 1; /* Random uuid for MGC allows easier reconnects */ uuid = kzalloc(sizeof(*uuid), GFP_NOFS); if (!uuid) { rc = -ENOMEM; goto out_free; } ll_generate_random_uuid(uuidc); class_uuid_unparse(uuidc, uuid); /* Start the MGC */ rc = lustre_start_simple(mgcname, LUSTRE_MGC_NAME, (char *)uuid->uuid, LUSTRE_MGS_OBDNAME, niduuid, NULL, NULL); kfree(uuid); if (rc) goto out_free; /* Add any failover MGS nids */ i = 1; while (ptr && ((*ptr == ':' || class_find_param(ptr, PARAM_MGSNODE, &ptr) == 0))) { /* New failover node */ sprintf(niduuid, "%s_%x", mgcname, i); j = 0; while (class_parse_nid_quiet(ptr, &nid, &ptr) == 0) { j++; rc = do_lcfg(mgcname, nid, LCFG_ADD_UUID, niduuid, NULL, NULL, NULL); if (*ptr == ':') break; } if (j > 0) { rc = do_lcfg(mgcname, 0, LCFG_ADD_CONN, niduuid, NULL, NULL, NULL); i++; } else { /* at ":/fsname" */ break; } } lsi->lsi_lmd->lmd_mgs_failnodes = i; obd = class_name2obd(mgcname); if (!obd) { CERROR("Can't find mgcobd %s\n", mgcname); rc = -ENOTCONN; goto out_free; } rc = obd_set_info_async(NULL, obd->obd_self_export, strlen(KEY_MGSSEC), KEY_MGSSEC, strlen(mgssec), mgssec, NULL); if (rc) goto out_free; /* Keep a refcount of servers/clients who started with "mount", so we know when we can get rid of the mgc. */ atomic_set(&obd->u.cli.cl_mgc_refcount, 1); /* We connect to the MGS at setup, and don't disconnect until cleanup */ data->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_AT | OBD_CONNECT_FULL20 | OBD_CONNECT_IMP_RECOV | OBD_CONNECT_LVB_TYPE; #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 2, 50, 0) data->ocd_connect_flags |= OBD_CONNECT_MNE_SWAB; #else #warning "LU-1644: Remove old OBD_CONNECT_MNE_SWAB fixup and imp_need_mne_swab" #endif if (lmd_is_client(lsi->lsi_lmd) && lsi->lsi_lmd->lmd_flags & LMD_FLG_NOIR) data->ocd_connect_flags &= ~OBD_CONNECT_IMP_RECOV; data->ocd_version = LUSTRE_VERSION_CODE; rc = obd_connect(NULL, &exp, obd, &(obd->obd_uuid), data, NULL); if (rc) { CERROR("connect failed %d\n", rc); goto out; } obd->u.cli.cl_mgc_mgsexp = exp; out: /* Keep the mgc info in the sb. Note that many lsi's can point to the same mgc.*/ lsi->lsi_mgc = obd; out_free: mutex_unlock(&mgc_start_lock); kfree(data); kfree(mgcname); kfree(niduuid); return rc; }
/** Set up a mgc obd to process startup logs * * \param sb [in] super block of the mgc obd * * \retval 0 success, otherwise error code */ int lustre_start_mgc(struct super_block *sb) { struct obd_connect_data *data = NULL; struct lustre_sb_info *lsi = s2lsi(sb); struct obd_device *obd; struct obd_export *exp; struct obd_uuid *uuid; class_uuid_t uuidc; lnet_nid_t nid; char *mgcname = NULL, *niduuid = NULL, *mgssec = NULL; char *ptr; int recov_bk; int rc = 0, i = 0, j, len; LASSERT(lsi->lsi_lmd); /* Find the first non-lo MGS nid for our MGC name */ if (IS_SERVER(lsi)) { /* mount -o mgsnode=nid */ ptr = lsi->lsi_lmd->lmd_mgs; if (lsi->lsi_lmd->lmd_mgs && (class_parse_nid(lsi->lsi_lmd->lmd_mgs, &nid, &ptr) == 0)) { i++; } else if (IS_MGS(lsi)) { lnet_process_id_t id; while ((rc = LNetGetId(i++, &id)) != -ENOENT) { if (LNET_NETTYP(LNET_NIDNET(id.nid)) == LOLND) continue; nid = id.nid; i++; break; } } } else { /* client */ /* Use nids from mount line: uml1,1@elan:uml2,2@elan:/lustre */ ptr = lsi->lsi_lmd->lmd_dev; if (class_parse_nid(ptr, &nid, &ptr) == 0) i++; } if (i == 0) { CERROR("No valid MGS nids found.\n"); return -EINVAL; } mutex_lock(&mgc_start_lock); len = strlen(LUSTRE_MGC_OBDNAME) + strlen(libcfs_nid2str(nid)) + 1; OBD_ALLOC(mgcname, len); OBD_ALLOC(niduuid, len + 2); if (!mgcname || !niduuid) GOTO(out_free, rc = -ENOMEM); sprintf(mgcname, "%s%s", LUSTRE_MGC_OBDNAME, libcfs_nid2str(nid)); mgssec = lsi->lsi_lmd->lmd_mgssec ? lsi->lsi_lmd->lmd_mgssec : ""; OBD_ALLOC_PTR(data); if (data == NULL) GOTO(out_free, rc = -ENOMEM); obd = class_name2obd(mgcname); if (obd && !obd->obd_stopping) { rc = obd_set_info_async(NULL, obd->obd_self_export, strlen(KEY_MGSSEC), KEY_MGSSEC, strlen(mgssec), mgssec, NULL); if (rc) GOTO(out_free, rc); /* Re-using an existing MGC */ atomic_inc(&obd->u.cli.cl_mgc_refcount); /* IR compatibility check, only for clients */ if (lmd_is_client(lsi->lsi_lmd)) { int has_ir; int vallen = sizeof(*data); __u32 *flags = &lsi->lsi_lmd->lmd_flags; rc = obd_get_info(NULL, obd->obd_self_export, strlen(KEY_CONN_DATA), KEY_CONN_DATA, &vallen, data, NULL); LASSERT(rc == 0); has_ir = OCD_HAS_FLAG(data, IMP_RECOV); if (has_ir ^ !(*flags & LMD_FLG_NOIR)) { /* LMD_FLG_NOIR is for test purpose only */ LCONSOLE_WARN( "Trying to mount a client with IR setting " "not compatible with current mgc. " "Force to use current mgc setting that is " "IR %s.\n", has_ir ? "enabled" : "disabled"); if (has_ir) *flags &= ~LMD_FLG_NOIR; else *flags |= LMD_FLG_NOIR; } } recov_bk = 0; /* If we are restarting the MGS, don't try to keep the MGC's old connection, or registration will fail. */ if (IS_MGS(lsi)) { CDEBUG(D_MOUNT, "New MGS with live MGC\n"); recov_bk = 1; } /* Try all connections, but only once (again). We don't want to block another target from starting (using its local copy of the log), but we do want to connect if at all possible. */ recov_bk++; CDEBUG(D_MOUNT, "%s: Set MGC reconnect %d\n", mgcname,recov_bk); rc = obd_set_info_async(NULL, obd->obd_self_export, sizeof(KEY_INIT_RECOV_BACKUP), KEY_INIT_RECOV_BACKUP, sizeof(recov_bk), &recov_bk, NULL); GOTO(out, rc = 0); } CDEBUG(D_MOUNT, "Start MGC '%s'\n", mgcname); /* Add the primary nids for the MGS */ i = 0; sprintf(niduuid, "%s_%x", mgcname, i); if (IS_SERVER(lsi)) { ptr = lsi->lsi_lmd->lmd_mgs; if (IS_MGS(lsi)) { /* Use local nids (including LO) */ lnet_process_id_t id; while ((rc = LNetGetId(i++, &id)) != -ENOENT) { rc = do_lcfg(mgcname, id.nid, LCFG_ADD_UUID, niduuid, 0,0,0); } } else { /* Use mgsnode= nids */ /* mount -o mgsnode=nid */ if (lsi->lsi_lmd->lmd_mgs) { ptr = lsi->lsi_lmd->lmd_mgs; } else if (class_find_param(ptr, PARAM_MGSNODE, &ptr) != 0) { CERROR("No MGS nids given.\n"); GOTO(out_free, rc = -EINVAL); } while (class_parse_nid(ptr, &nid, &ptr) == 0) { rc = do_lcfg(mgcname, nid, LCFG_ADD_UUID, niduuid, 0,0,0); i++; } } } else { /* client */ /* Use nids from mount line: uml1,1@elan:uml2,2@elan:/lustre */ ptr = lsi->lsi_lmd->lmd_dev; while (class_parse_nid(ptr, &nid, &ptr) == 0) { rc = do_lcfg(mgcname, nid, LCFG_ADD_UUID, niduuid, 0,0,0); i++; /* Stop at the first failover nid */ if (*ptr == ':') break; } } if (i == 0) { CERROR("No valid MGS nids found.\n"); GOTO(out_free, rc = -EINVAL); } lsi->lsi_lmd->lmd_mgs_failnodes = 1; /* Random uuid for MGC allows easier reconnects */ OBD_ALLOC_PTR(uuid); ll_generate_random_uuid(uuidc); class_uuid_unparse(uuidc, uuid); /* Start the MGC */ rc = lustre_start_simple(mgcname, LUSTRE_MGC_NAME, (char *)uuid->uuid, LUSTRE_MGS_OBDNAME, niduuid, 0, 0); OBD_FREE_PTR(uuid); if (rc) GOTO(out_free, rc); /* Add any failover MGS nids */ i = 1; while (ptr && ((*ptr == ':' || class_find_param(ptr, PARAM_MGSNODE, &ptr) == 0))) { /* New failover node */ sprintf(niduuid, "%s_%x", mgcname, i); j = 0; while (class_parse_nid_quiet(ptr, &nid, &ptr) == 0) { j++; rc = do_lcfg(mgcname, nid, LCFG_ADD_UUID, niduuid, 0,0,0); if (*ptr == ':') break; } if (j > 0) { rc = do_lcfg(mgcname, 0, LCFG_ADD_CONN, niduuid, 0, 0, 0); i++; } else { /* at ":/fsname" */ break; } } lsi->lsi_lmd->lmd_mgs_failnodes = i; obd = class_name2obd(mgcname); if (!obd) { CERROR("Can't find mgcobd %s\n", mgcname); GOTO(out_free, rc = -ENOTCONN); } rc = obd_set_info_async(NULL, obd->obd_self_export, strlen(KEY_MGSSEC), KEY_MGSSEC, strlen(mgssec), mgssec, NULL); if (rc) GOTO(out_free, rc); /* Keep a refcount of servers/clients who started with "mount", so we know when we can get rid of the mgc. */ atomic_set(&obd->u.cli.cl_mgc_refcount, 1); /* Try all connections, but only once. */ recov_bk = 1; rc = obd_set_info_async(NULL, obd->obd_self_export, sizeof(KEY_INIT_RECOV_BACKUP), KEY_INIT_RECOV_BACKUP, sizeof(recov_bk), &recov_bk, NULL); if (rc) /* nonfatal */ CWARN("can't set %s %d\n", KEY_INIT_RECOV_BACKUP, rc); /* We connect to the MGS at setup, and don't disconnect until cleanup */ data->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_AT | OBD_CONNECT_FULL20 | OBD_CONNECT_IMP_RECOV | OBD_CONNECT_LVB_TYPE; #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 2, 50, 0) data->ocd_connect_flags |= OBD_CONNECT_MNE_SWAB; #else #warning "LU-1644: Remove old OBD_CONNECT_MNE_SWAB fixup and imp_need_mne_swab" #endif if (lmd_is_client(lsi->lsi_lmd) && lsi->lsi_lmd->lmd_flags & LMD_FLG_NOIR) data->ocd_connect_flags &= ~OBD_CONNECT_IMP_RECOV; data->ocd_version = LUSTRE_VERSION_CODE; rc = obd_connect(NULL, &exp, obd, &(obd->obd_uuid), data, NULL); if (rc) { CERROR("connect failed %d\n", rc); GOTO(out, rc); } obd->u.cli.cl_mgc_mgsexp = exp; out: /* Keep the mgc info in the sb. Note that many lsi's can point to the same mgc.*/ lsi->lsi_mgc = obd; out_free: mutex_unlock(&mgc_start_lock); if (data) OBD_FREE_PTR(data); if (mgcname) OBD_FREE(mgcname, len); if (niduuid) OBD_FREE(niduuid, len + 2); return rc; }
/** * Match client and OST server connection feature flags. * * Compute the compatibility flags for a connection request based on * features mutually supported by client and server. * * The obd_export::exp_connect_data.ocd_connect_flags field in \a exp * must not be updated here, otherwise a partially initialized value may * be exposed. After the connection request is successfully processed, * the top-level tgt_connect() request handler atomically updates the export * connect flags from the obd_connect_data::ocd_connect_flags field of the * reply. \see tgt_connect(). * * \param[in] env execution environment * \param[in] exp the obd_export associated with this * client/target pair * \param[in] data stores data for this connect request * \param[in] new_connection is this connection new or not * * \retval 0 if success * \retval -EPROTO client and server feature requirements are * incompatible * \retval -EBADF OST index in connect request doesn't match * real OST index */ static int ofd_parse_connect_data(const struct lu_env *env, struct obd_export *exp, struct obd_connect_data *data, bool new_connection) { struct ofd_device *ofd = ofd_exp(exp); struct filter_export_data *fed = &exp->exp_filter_data; if (!data) RETURN(0); CDEBUG(D_RPCTRACE, "%s: cli %s/%p ocd_connect_flags: "LPX64 " ocd_version: %x ocd_grant: %d ocd_index: %u" " ocd_group %u\n", exp->exp_obd->obd_name, exp->exp_client_uuid.uuid, exp, data->ocd_connect_flags, data->ocd_version, data->ocd_grant, data->ocd_index, data->ocd_group); if (fed->fed_group != 0 && fed->fed_group != data->ocd_group) { CWARN("!!! This export (nid %s) used object group %d " "earlier; now it's trying to use group %d! This could " "be a bug in the MDS. Please report to " "https://jira.hpdd.intel.com/\n", obd_export_nid2str(exp), fed->fed_group, data->ocd_group); RETURN(-EPROTO); } fed->fed_group = data->ocd_group; data->ocd_connect_flags &= OST_CONNECT_SUPPORTED; data->ocd_version = LUSTRE_VERSION_CODE; /* Kindly make sure the SKIP_ORPHAN flag is from MDS. */ if (data->ocd_connect_flags & OBD_CONNECT_MDS) CDEBUG(D_HA, "%s: Received MDS connection for group %u\n", exp->exp_obd->obd_name, data->ocd_group); else if (data->ocd_connect_flags & OBD_CONNECT_SKIP_ORPHAN) RETURN(-EPROTO); if (ofd_grant_param_supp(exp)) { exp->exp_filter_data.fed_pagesize = data->ocd_blocksize; /* ocd_{blocksize,inodespace} are log2 values */ data->ocd_blocksize = ofd->ofd_blockbits; data->ocd_inodespace = ofd->ofd_dt_conf.ddp_inodespace; /* ocd_grant_extent is in 1K blocks */ data->ocd_grant_extent = ofd->ofd_dt_conf.ddp_grant_frag >> 10; } if (data->ocd_connect_flags & OBD_CONNECT_GRANT) data->ocd_grant = ofd_grant_connect(env, exp, data->ocd_grant, new_connection); if (data->ocd_connect_flags & OBD_CONNECT_INDEX) { struct lr_server_data *lsd = &ofd->ofd_lut.lut_lsd; int index = lsd->lsd_osd_index; if (index != data->ocd_index) { LCONSOLE_ERROR_MSG(0x136, "Connection from %s to index" " %u doesn't match actual OST index" " %u in last_rcvd file, bad " "configuration?\n", obd_export_nid2str(exp), index, data->ocd_index); RETURN(-EBADF); } if (!(lsd->lsd_feature_compat & OBD_COMPAT_OST)) { /* this will only happen on the first connect */ lsd->lsd_feature_compat |= OBD_COMPAT_OST; /* sync is not needed here as lut_client_add will * set exp_need_sync flag */ tgt_server_data_update(env, &ofd->ofd_lut, 0); } } if (OBD_FAIL_CHECK(OBD_FAIL_OST_BRW_SIZE)) { data->ocd_brw_size = 65536; } else if (data->ocd_connect_flags & OBD_CONNECT_BRW_SIZE) { data->ocd_brw_size = min(data->ocd_brw_size, (__u32)DT_MAX_BRW_SIZE); if (data->ocd_brw_size == 0) { CERROR("%s: cli %s/%p ocd_connect_flags: "LPX64 " ocd_version: %x ocd_grant: %d ocd_index: %u " "ocd_brw_size is unexpectedly zero, " "network data corruption?" "Refusing connection of this client\n", exp->exp_obd->obd_name, exp->exp_client_uuid.uuid, exp, data->ocd_connect_flags, data->ocd_version, data->ocd_grant, data->ocd_index); RETURN(-EPROTO); } } if (data->ocd_connect_flags & OBD_CONNECT_CKSUM) { __u32 cksum_types = data->ocd_cksum_types; /* The client set in ocd_cksum_types the checksum types it * supports. We have to mask off the algorithms that we don't * support */ data->ocd_cksum_types &= cksum_types_supported_server(); if (unlikely(data->ocd_cksum_types == 0)) { CERROR("%s: Connect with checksum support but no " "ocd_cksum_types is set\n", exp->exp_obd->obd_name); RETURN(-EPROTO); } CDEBUG(D_RPCTRACE, "%s: cli %s supports cksum type %x, return " "%x\n", exp->exp_obd->obd_name, obd_export_nid2str(exp), cksum_types, data->ocd_cksum_types); } else { /* This client does not support OBD_CONNECT_CKSUM * fall back to CRC32 */ CDEBUG(D_RPCTRACE, "%s: cli %s does not support " "OBD_CONNECT_CKSUM, CRC32 will be used\n", exp->exp_obd->obd_name, obd_export_nid2str(exp)); } if (data->ocd_connect_flags & OBD_CONNECT_MAXBYTES) data->ocd_maxbytes = ofd->ofd_dt_conf.ddp_maxbytes; if (OCD_HAS_FLAG(data, PINGLESS)) { if (ptlrpc_pinger_suppress_pings()) { spin_lock(&exp->exp_obd->obd_dev_lock); list_del_init(&exp->exp_obd_chain_timed); spin_unlock(&exp->exp_obd->obd_dev_lock); } else { data->ocd_connect_flags &= ~OBD_CONNECT_PINGLESS; } } RETURN(0); }