Exemple #1
0
static void
lcw_dump(struct lc_watchdog *lcw)
{
        ENTRY;
#if defined(HAVE_TASKLIST_LOCK)
        cfs_read_lock(&tasklist_lock);
#elif defined(HAVE_TASK_RCU)
        rcu_read_lock();
#else
        CERROR("unable to dump stack because of missing export\n");
        RETURN_EXIT;
#endif
       if (lcw->lcw_task == NULL) {
                LCONSOLE_WARN("Process " LPPID " was not found in the task "
                              "list; watchdog callback may be incomplete\n",
                              (int)lcw->lcw_pid);
        } else {
                libcfs_debug_dumpstack(lcw->lcw_task);
        }

#if defined(HAVE_TASKLIST_LOCK)
        cfs_read_unlock(&tasklist_lock);
#elif defined(HAVE_TASK_RCU)
        rcu_read_unlock();
#endif
        EXIT;
}
Exemple #2
0
static int mds_cmd_cleanup(struct obd_device *obd)
{
        struct mds_obd *mds = &obd->u.mds;
        struct lvfs_run_ctxt saved;
        int rc = 0;
        ENTRY;

        mds->mds_lov_exp = NULL;

        if (obd->obd_fail)
                LCONSOLE_WARN("%s: shutting down for failover; client state "
                              "will be preserved.\n", obd->obd_name);

        if (strncmp(obd->obd_name, MDD_OBD_NAME, strlen(MDD_OBD_NAME)))
                RETURN(0);

        push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);

        mds_lov_destroy_objids(obd);

        if (mds->mds_objects_dir != NULL) {
                l_dput(mds->mds_objects_dir);
                mds->mds_objects_dir = NULL;
        }

        dput(mds->mds_fid_de);
        LL_DQUOT_OFF(obd->u.obt.obt_sb);
        shrink_dcache_sb(mds->mds_obt.obt_sb);
        fsfilt_put_ops(obd->obd_fsops);

        pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
        RETURN(rc);
}
Exemple #3
0
static void lcw_dump_stack(struct lc_watchdog *lcw)
{
        cfs_time_t      current_time;
        cfs_duration_t  delta_time;
        struct timeval  timediff;

        current_time = cfs_time_current();
        delta_time = cfs_time_sub(current_time, lcw->lcw_last_touched);
        cfs_duration_usec(delta_time, &timediff);

        /*
         * Check to see if we should throttle the watchdog timer to avoid
         * too many dumps going to the console thus triggering an NMI.
         */
        delta_time = cfs_duration_sec(cfs_time_sub(current_time,
                                                   lcw_last_watchdog_time));

        if (delta_time < libcfs_watchdog_ratelimit &&
            lcw_recent_watchdog_count > 3) {
                LCONSOLE_WARN("Service thread pid %u was inactive for "
                              "%lu.%.02lus. Watchdog stack traces are limited "
                              "to 3 per %d seconds, skipping this one.\n",
                              (int)lcw->lcw_pid,
                              timediff.tv_sec,
                              timediff.tv_usec / 10000,
                              libcfs_watchdog_ratelimit);
        } else {
                if (delta_time < libcfs_watchdog_ratelimit) {
                        lcw_recent_watchdog_count++;
                } else {
                        memcpy(&lcw_last_watchdog_time, &current_time,
                               sizeof(current_time));
                        lcw_recent_watchdog_count = 0;
                }

                LCONSOLE_WARN("Service thread pid %u was inactive for "
                              "%lu.%.02lus. The thread might be hung, or it "
                              "might only be slow and will resume later. "
                              "Dumping the stack trace for debugging purposes:"
                              "\n",
                              (int)lcw->lcw_pid,
                              timediff.tv_sec,
                              timediff.tv_usec / 10000);
                lcw_dump(lcw);
        }
}
Exemple #4
0
/* this function ONLY returns valid dget'd dentries with an initialized inode
   or errors */
struct dentry *mds_fid2dentry(struct mds_obd *mds, struct ll_fid *fid,
                              struct vfsmount **mnt)
{
        char fid_name[32];
        unsigned long ino = fid->id;
        __u32 generation = fid->generation;
        struct inode *inode;
        struct dentry *result;

        if (ino == 0)
                RETURN(ERR_PTR(-ESTALE));

        snprintf(fid_name, sizeof(fid_name), "0x%lx", ino);

        /* under ext3 this is neither supposed to return bad inodes
           nor NULL inodes. */
        result = ll_lookup_one_len(fid_name, mds->mds_fid_de, strlen(fid_name));
        if (IS_ERR(result))
                RETURN(result);

        inode = result->d_inode;
        if (!inode)
                RETURN(ERR_PTR(-ENOENT));

        if (inode->i_generation == 0 || inode->i_nlink == 0) {
                LCONSOLE_WARN("Found inode with zero generation or link -- this"
                              " may indicate disk corruption (inode: %lu/%u, "
                              "link %lu, count %d)\n", inode->i_ino,
                              inode->i_generation,(unsigned long)inode->i_nlink,
                              atomic_read(&inode->i_count));
                dput(result);
                RETURN(ERR_PTR(-ENOENT));
        }

        if (generation && inode->i_generation != generation) {
                /* we didn't find the right inode.. */
                CDEBUG(D_INODE, "found wrong generation: inode %lu, link: %lu, "
                       "count: %d, generation %u/%u\n", inode->i_ino,
                       (unsigned long)inode->i_nlink,
                       atomic_read(&inode->i_count), inode->i_generation,
                       generation);
                dput(result);
                RETURN(ERR_PTR(-ENOENT));
        }

        if (mnt) {
                *mnt = mds->mds_obt.obt_vfsmnt;
                mntget(*mnt);
        }

        RETURN(result);
}
Exemple #5
0
static int osp_precreate_timeout_condition(void *data)
{
	struct osp_device *d = data;

	LCONSOLE_WARN("%s: slow creates, last="DFID", next="DFID", "
		      "reserved="LPU64", syn_changes=%lu, "
		      "syn_rpc_in_progress=%d, status=%d\n",
		      d->opd_obd->obd_name, PFID(&d->opd_pre_last_created_fid),
		      PFID(&d->opd_pre_used_fid), d->opd_pre_reserved,
		      d->opd_syn_changes, d->opd_syn_rpc_in_progress,
		      d->opd_pre_status);

	return 1;
}
Exemple #6
0
int osd_oi_mod_init(void)
{
        if (osd_oi_count == 0 || osd_oi_count > OSD_OI_FID_NR_MAX)
                osd_oi_count = OSD_OI_FID_NR;

        if ((osd_oi_count & (osd_oi_count - 1)) != 0) {
                LCONSOLE_WARN("Round up oi_count %d to power2 %d\n",
                              osd_oi_count, size_roundup_power2(osd_oi_count));
                osd_oi_count = size_roundup_power2(osd_oi_count);
        }

	mutex_init(&oi_init_lock);
        return 0;
}
Exemple #7
0
static ssize_t
ldiskfs_osd_index_in_idif_seq_write(struct file *file,
				    const char __user *buffer,
				    size_t count, loff_t *off)
{
	struct lu_env env;
	struct seq_file *m = file->private_data;
	struct dt_device *dt = m->private;
	struct osd_device *dev = osd_dt_dev(dt);
	struct lu_target *tgt;
	__s64 val;
	int rc;

	LASSERT(dev != NULL);
	if (unlikely(dev->od_mnt == NULL))
		return -EINPROGRESS;

	rc = lprocfs_str_to_s64(buffer, count, &val);
	if (rc != 0)
		return rc;

	if (dev->od_index_in_idif) {
		if (val != 0)
			return count;

		LCONSOLE_WARN("%s: OST-index in IDIF has been enabled, "
			      "it cannot be reverted back.\n", osd_name(dev));
		return -EPERM;
	}

	if (val == 0)
		return count;

	rc = lu_env_init(&env, LCT_DT_THREAD);
	if (rc != 0)
		return rc;

	tgt = dev->od_dt_dev.dd_lu_dev.ld_site->ls_tgt;
	tgt->lut_lsd.lsd_feature_rocompat |= OBD_ROCOMPAT_IDX_IN_IDIF;
	rc = tgt_server_data_update(&env, tgt, 1);
	lu_env_fini(&env);
	if (rc < 0)
		return rc;

	LCONSOLE_INFO("%s: enable OST-index in IDIF successfully, "
		      "it cannot be reverted back.\n", osd_name(dev));

	dev->od_index_in_idif = 1;
	return count;
}
Exemple #8
0
static void
lcw_dump(struct lc_watchdog *lcw)
{
        ENTRY;
        rcu_read_lock();
       if (lcw->lcw_task == NULL) {
                LCONSOLE_WARN("Process " LPPID " was not found in the task "
                              "list; watchdog callback may be incomplete\n",
                              (int)lcw->lcw_pid);
        } else {
                libcfs_debug_dumpstack(lcw->lcw_task);
        }

        rcu_read_unlock();
        EXIT;
}
Exemple #9
0
static void lcw_update_time(struct lc_watchdog *lcw, const char *message)
{
        cfs_time_t newtime = cfs_time_current();;

        if (lcw->lcw_state == LC_WATCHDOG_EXPIRED) {
                struct timeval timediff;
                cfs_time_t delta_time = cfs_time_sub(newtime,
                                                     lcw->lcw_last_touched);
                cfs_duration_usec(delta_time, &timediff);

                LCONSOLE_WARN("Service thread pid %u %s after %lu.%.02lus. "
                              "This indicates the system was overloaded (too "
                              "many service threads, or there were not enough "
                              "hardware resources).\n",
                              lcw->lcw_pid,
                              message,
                              timediff.tv_sec,
                              timediff.tv_usec / 10000);
        }
        lcw->lcw_last_touched = newtime;
}
Exemple #10
0
/* Ensure this is not a failover node that is connecting first*/
static int mgs_check_failover_reg(struct mgs_target_info *mti)
{
        lnet_nid_t nid;
        char *ptr;
        int i;

        ptr = mti->mti_params;
        while (class_find_param(ptr, PARAM_FAILNODE, &ptr) == 0) {
                while (class_parse_nid(ptr, &nid, &ptr) == 0) {
                        for (i = 0; i < mti->mti_nid_count; i++) {
                                if (nid == mti->mti_nids[i]) {
                                        LCONSOLE_WARN("Denying initial registra"
                                                      "tion attempt from nid %s"
                                                      ", specified as failover"
                                                      "\n",libcfs_nid2str(nid));
                                        return -EADDRNOTAVAIL;
                                }
                        }
                }
        }
        return 0;
}
Exemple #11
0
/** Set up a mgc obd to process startup logs
 *
 * \param sb [in] super block of the mgc obd
 *
 * \retval 0 success, otherwise error code
 */
int lustre_start_mgc(struct super_block *sb)
{
	struct obd_connect_data *data = NULL;
	struct lustre_sb_info *lsi = s2lsi(sb);
	struct obd_device *obd;
	struct obd_export *exp;
	struct obd_uuid *uuid;
	class_uuid_t uuidc;
	lnet_nid_t nid;
	char *mgcname = NULL, *niduuid = NULL, *mgssec = NULL;
	char *ptr;
	int recov_bk;
	int rc = 0, i = 0, j, len;

	LASSERT(lsi->lsi_lmd);

	/* Find the first non-lo MGS nid for our MGC name */
	if (IS_SERVER(lsi)) {
		/* mount -o mgsnode=nid */
		ptr = lsi->lsi_lmd->lmd_mgs;
		if (lsi->lsi_lmd->lmd_mgs &&
		    (class_parse_nid(lsi->lsi_lmd->lmd_mgs, &nid, &ptr) == 0)) {
			i++;
		} else if (IS_MGS(lsi)) {
			lnet_process_id_t id;
			while ((rc = LNetGetId(i++, &id)) != -ENOENT) {
				if (LNET_NETTYP(LNET_NIDNET(id.nid)) == LOLND)
					continue;
				nid = id.nid;
				i++;
				break;
			}
		}
	} else { /* client */
		/* Use nids from mount line: uml1,1@elan:uml2,2@elan:/lustre */
		ptr = lsi->lsi_lmd->lmd_dev;
		if (class_parse_nid(ptr, &nid, &ptr) == 0)
			i++;
	}
	if (i == 0) {
		CERROR("No valid MGS nids found.\n");
		return -EINVAL;
	}

	mutex_lock(&mgc_start_lock);

	len = strlen(LUSTRE_MGC_OBDNAME) + strlen(libcfs_nid2str(nid)) + 1;
	OBD_ALLOC(mgcname, len);
	OBD_ALLOC(niduuid, len + 2);
	if (!mgcname || !niduuid)
		GOTO(out_free, rc = -ENOMEM);
	sprintf(mgcname, "%s%s", LUSTRE_MGC_OBDNAME, libcfs_nid2str(nid));

	mgssec = lsi->lsi_lmd->lmd_mgssec ? lsi->lsi_lmd->lmd_mgssec : "";

	OBD_ALLOC_PTR(data);
	if (data == NULL)
		GOTO(out_free, rc = -ENOMEM);

	obd = class_name2obd(mgcname);
	if (obd && !obd->obd_stopping) {
		rc = obd_set_info_async(NULL, obd->obd_self_export,
					strlen(KEY_MGSSEC), KEY_MGSSEC,
					strlen(mgssec), mgssec, NULL);
		if (rc)
			GOTO(out_free, rc);

		/* Re-using an existing MGC */
		atomic_inc(&obd->u.cli.cl_mgc_refcount);

		/* IR compatibility check, only for clients */
		if (lmd_is_client(lsi->lsi_lmd)) {
			int has_ir;
			int vallen = sizeof(*data);
			__u32 *flags = &lsi->lsi_lmd->lmd_flags;

			rc = obd_get_info(NULL, obd->obd_self_export,
					  strlen(KEY_CONN_DATA), KEY_CONN_DATA,
					  &vallen, data, NULL);
			LASSERT(rc == 0);
			has_ir = OCD_HAS_FLAG(data, IMP_RECOV);
			if (has_ir ^ !(*flags & LMD_FLG_NOIR)) {
				/* LMD_FLG_NOIR is for test purpose only */
				LCONSOLE_WARN(
				    "Trying to mount a client with IR setting "
				    "not compatible with current mgc. "
				    "Force to use current mgc setting that is "
				    "IR %s.\n",
				    has_ir ? "enabled" : "disabled");
				if (has_ir)
					*flags &= ~LMD_FLG_NOIR;
				else
					*flags |= LMD_FLG_NOIR;
			}
		}

		recov_bk = 0;
		/* If we are restarting the MGS, don't try to keep the MGC's
		   old connection, or registration will fail. */
		if (IS_MGS(lsi)) {
			CDEBUG(D_MOUNT, "New MGS with live MGC\n");
			recov_bk = 1;
		}

		/* Try all connections, but only once (again).
		   We don't want to block another target from starting
		   (using its local copy of the log), but we do want to connect
		   if at all possible. */
		recov_bk++;
		CDEBUG(D_MOUNT, "%s: Set MGC reconnect %d\n", mgcname,recov_bk);
		rc = obd_set_info_async(NULL, obd->obd_self_export,
					sizeof(KEY_INIT_RECOV_BACKUP),
					KEY_INIT_RECOV_BACKUP,
					sizeof(recov_bk), &recov_bk, NULL);
		GOTO(out, rc = 0);
	}

	CDEBUG(D_MOUNT, "Start MGC '%s'\n", mgcname);

	/* Add the primary nids for the MGS */
	i = 0;
	sprintf(niduuid, "%s_%x", mgcname, i);
	if (IS_SERVER(lsi)) {
		ptr = lsi->lsi_lmd->lmd_mgs;
		if (IS_MGS(lsi)) {
			/* Use local nids (including LO) */
			lnet_process_id_t id;
			while ((rc = LNetGetId(i++, &id)) != -ENOENT) {
				rc = do_lcfg(mgcname, id.nid,
					     LCFG_ADD_UUID, niduuid, 0,0,0);
			}
		} else {
			/* Use mgsnode= nids */
			/* mount -o mgsnode=nid */
			if (lsi->lsi_lmd->lmd_mgs) {
				ptr = lsi->lsi_lmd->lmd_mgs;
			} else if (class_find_param(ptr, PARAM_MGSNODE,
						    &ptr) != 0) {
				CERROR("No MGS nids given.\n");
				GOTO(out_free, rc = -EINVAL);
			}
			while (class_parse_nid(ptr, &nid, &ptr) == 0) {
				rc = do_lcfg(mgcname, nid,
					     LCFG_ADD_UUID, niduuid, 0,0,0);
				i++;
			}
		}
	} else { /* client */
		/* Use nids from mount line: uml1,1@elan:uml2,2@elan:/lustre */
		ptr = lsi->lsi_lmd->lmd_dev;
		while (class_parse_nid(ptr, &nid, &ptr) == 0) {
			rc = do_lcfg(mgcname, nid,
				     LCFG_ADD_UUID, niduuid, 0,0,0);
			i++;
			/* Stop at the first failover nid */
			if (*ptr == ':')
				break;
		}
	}
	if (i == 0) {
		CERROR("No valid MGS nids found.\n");
		GOTO(out_free, rc = -EINVAL);
	}
	lsi->lsi_lmd->lmd_mgs_failnodes = 1;

	/* Random uuid for MGC allows easier reconnects */
	OBD_ALLOC_PTR(uuid);
	ll_generate_random_uuid(uuidc);
	class_uuid_unparse(uuidc, uuid);

	/* Start the MGC */
	rc = lustre_start_simple(mgcname, LUSTRE_MGC_NAME,
				 (char *)uuid->uuid, LUSTRE_MGS_OBDNAME,
				 niduuid, 0, 0);
	OBD_FREE_PTR(uuid);
	if (rc)
		GOTO(out_free, rc);

	/* Add any failover MGS nids */
	i = 1;
	while (ptr && ((*ptr == ':' ||
	       class_find_param(ptr, PARAM_MGSNODE, &ptr) == 0))) {
		/* New failover node */
		sprintf(niduuid, "%s_%x", mgcname, i);
		j = 0;
		while (class_parse_nid_quiet(ptr, &nid, &ptr) == 0) {
			j++;
			rc = do_lcfg(mgcname, nid,
				     LCFG_ADD_UUID, niduuid, 0,0,0);
			if (*ptr == ':')
				break;
		}
		if (j > 0) {
			rc = do_lcfg(mgcname, 0, LCFG_ADD_CONN,
				     niduuid, 0, 0, 0);
			i++;
		} else {
			/* at ":/fsname" */
			break;
		}
	}
	lsi->lsi_lmd->lmd_mgs_failnodes = i;

	obd = class_name2obd(mgcname);
	if (!obd) {
		CERROR("Can't find mgcobd %s\n", mgcname);
		GOTO(out_free, rc = -ENOTCONN);
	}

	rc = obd_set_info_async(NULL, obd->obd_self_export,
				strlen(KEY_MGSSEC), KEY_MGSSEC,
				strlen(mgssec), mgssec, NULL);
	if (rc)
		GOTO(out_free, rc);

	/* Keep a refcount of servers/clients who started with "mount",
	   so we know when we can get rid of the mgc. */
	atomic_set(&obd->u.cli.cl_mgc_refcount, 1);

	/* Try all connections, but only once. */
	recov_bk = 1;
	rc = obd_set_info_async(NULL, obd->obd_self_export,
				sizeof(KEY_INIT_RECOV_BACKUP),
				KEY_INIT_RECOV_BACKUP,
				sizeof(recov_bk), &recov_bk, NULL);
	if (rc)
		/* nonfatal */
		CWARN("can't set %s %d\n", KEY_INIT_RECOV_BACKUP, rc);

	/* We connect to the MGS at setup, and don't disconnect until cleanup */
	data->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_AT |
				  OBD_CONNECT_FULL20 | OBD_CONNECT_IMP_RECOV |
				  OBD_CONNECT_LVB_TYPE;

#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 2, 50, 0)
	data->ocd_connect_flags |= OBD_CONNECT_MNE_SWAB;
#else
#warning "LU-1644: Remove old OBD_CONNECT_MNE_SWAB fixup and imp_need_mne_swab"
#endif

	if (lmd_is_client(lsi->lsi_lmd) &&
	    lsi->lsi_lmd->lmd_flags & LMD_FLG_NOIR)
		data->ocd_connect_flags &= ~OBD_CONNECT_IMP_RECOV;
	data->ocd_version = LUSTRE_VERSION_CODE;
	rc = obd_connect(NULL, &exp, obd, &(obd->obd_uuid), data, NULL);
	if (rc) {
		CERROR("connect failed %d\n", rc);
		GOTO(out, rc);
	}

	obd->u.cli.cl_mgc_mgsexp = exp;

out:
	/* Keep the mgc info in the sb. Note that many lsi's can point
	   to the same mgc.*/
	lsi->lsi_mgc = obd;
out_free:
	mutex_unlock(&mgc_start_lock);

	if (data)
		OBD_FREE_PTR(data);
	if (mgcname)
		OBD_FREE(mgcname, len);
	if (niduuid)
		OBD_FREE(niduuid, len + 2);
	return rc;
}
/* Retrieve object striping information.
 *
 * @lump is a pointer to an in-core struct with lmm_ost_count indicating
 * the maximum number of OST indices which will fit in the user buffer.
 * lmm_magic must be LOV_USER_MAGIC.
 *
 * If @size > 0, User specified limited buffer size, usually the buffer is from
 * ll_lov_setstripe(), and the buffer can only hold basic layout template info.
 */
int lov_getstripe(const struct lu_env *env, struct lov_object *obj,
		  struct lov_stripe_md *lsm, struct lov_user_md __user *lump,
		  size_t size)
{
	/* we use lov_user_md_v3 because it is larger than lov_user_md_v1 */
	struct lov_mds_md *lmmk, *lmm;
	struct lov_user_md_v1 lum;
	size_t	lmmk_size;
	ssize_t	lmm_size, lum_size = 0;
	static bool printed;
	int	rc = 0;
	ENTRY;

	if (lsm->lsm_magic != LOV_MAGIC_V1 && lsm->lsm_magic != LOV_MAGIC_V3 &&
	    lsm->lsm_magic != LOV_MAGIC_COMP_V1) {
		CERROR("bad LSM MAGIC: 0x%08X != 0x%08X nor 0x%08X\n",
		       lsm->lsm_magic, LOV_MAGIC_V1, LOV_MAGIC_V3);
		GOTO(out, rc = -EIO);
	}

	if (!printed) {
		LCONSOLE_WARN("%s: using old ioctl(LL_IOC_LOV_GETSTRIPE) on "
			      DFID", use llapi_layout_get_by_path()\n",
			      current->comm,
			      PFID(&obj->lo_cl.co_lu.lo_header->loh_fid));
		printed = true;
	}

	lmmk_size = lov_comp_md_size(lsm);

	OBD_ALLOC_LARGE(lmmk, lmmk_size);
	if (lmmk == NULL)
		GOTO(out, rc = -ENOMEM);

	lmm_size = lov_lsm_pack(lsm, lmmk, lmmk_size);
	if (lmm_size < 0)
		GOTO(out_free, rc = lmm_size);

	if (cpu_to_le32(LOV_MAGIC) != LOV_MAGIC) {
		if (lmmk->lmm_magic == cpu_to_le32(LOV_MAGIC_V1) ||
		    lmmk->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
			lustre_swab_lov_mds_md(lmmk);
			lustre_swab_lov_user_md_objects(
				(struct lov_user_ost_data *)lmmk->lmm_objects,
				lmmk->lmm_stripe_count);
		} else if (lmmk->lmm_magic == cpu_to_le32(LOV_MAGIC_COMP_V1)) {
			lustre_swab_lov_comp_md_v1(
					(struct lov_comp_md_v1 *)lmmk);
		}
	}

	/* Legacy appication passes limited buffer, we need to figure out
	 * the user buffer size by the passed in lmm_stripe_count. */
	if (copy_from_user(&lum, lump, sizeof(struct lov_user_md_v1)))
		GOTO(out_free, rc = -EFAULT);

	if (lum.lmm_magic == LOV_USER_MAGIC_V1 ||
	    lum.lmm_magic == LOV_USER_MAGIC_V3)
		lum_size = lov_user_md_size(lum.lmm_stripe_count,
					    lum.lmm_magic);

	if (lum_size != 0) {
		struct lov_mds_md *comp_md = lmmk;

		/* Legacy app (ADIO for instance) treats the layout as V1/V3
		 * blindly, we'd return a reasonable V1/V3 for them. */
		if (lmmk->lmm_magic == LOV_MAGIC_COMP_V1) {
			struct lov_comp_md_v1 *comp_v1;
			struct cl_object *cl_obj;
			struct cl_attr attr;
			int i;

			attr.cat_size = 0;
			cl_obj = cl_object_top(&obj->lo_cl);
			cl_object_attr_get(env, cl_obj, &attr);

			/* return the last instantiated component if file size
			 * is non-zero, otherwise, return the last component.*/
			comp_v1 = (struct lov_comp_md_v1 *)lmmk;
			i = attr.cat_size == 0 ? comp_v1->lcm_entry_count : 0;
			for (; i < comp_v1->lcm_entry_count; i++) {
				if (!(comp_v1->lcm_entries[i].lcme_flags &
						LCME_FL_INIT))
					break;
			}
			if (i > 0)
				i--;
			comp_md = (struct lov_mds_md *)((char *)comp_v1 +
					comp_v1->lcm_entries[i].lcme_offset);
		}

		lmm = comp_md;
		lmm_size = lum_size;
	} else {
		lmm = lmmk;
		lmm_size = lmmk_size;
	}
	/**
	 * User specified limited buffer size, usually the buffer is
	 * from ll_lov_setstripe(), and the buffer can only hold basic
	 * layout template info.
	 */
	if (size == 0 || size > lmm_size)
		size = lmm_size;
	if (copy_to_user(lump, lmm, size))
		GOTO(out_free, rc = -EFAULT);

out_free:
	OBD_FREE_LARGE(lmmk, lmmk_size);
out:
	RETURN(rc);
}
Exemple #13
0
/** Set up a mgc obd to process startup logs
 *
 * \param sb [in] super block of the mgc obd
 *
 * \retval 0 success, otherwise error code
 */
int lustre_start_mgc(struct super_block *sb)
{
	struct obd_connect_data *data = NULL;
	struct lustre_sb_info *lsi = s2lsi(sb);
	struct obd_device *obd;
	struct obd_export *exp;
	struct obd_uuid *uuid;
	class_uuid_t uuidc;
	lnet_nid_t nid;
	char nidstr[LNET_NIDSTR_SIZE];
	char *mgcname = NULL, *niduuid = NULL, *mgssec = NULL;
	char *ptr;
	int rc = 0, i = 0, j;

	LASSERT(lsi->lsi_lmd);

	/* Use nids from mount line: uml1,1@elan:uml2,2@elan:/lustre */
	ptr = lsi->lsi_lmd->lmd_dev;
	if (class_parse_nid(ptr, &nid, &ptr) == 0)
		i++;
	if (i == 0) {
		CERROR("No valid MGS nids found.\n");
		return -EINVAL;
	}

	mutex_lock(&mgc_start_lock);

	libcfs_nid2str_r(nid, nidstr, sizeof(nidstr));
	mgcname = kasprintf(GFP_NOFS,
			    "%s%s", LUSTRE_MGC_OBDNAME, nidstr);
	niduuid = kasprintf(GFP_NOFS, "%s_%x", mgcname, i);
	if (!mgcname || !niduuid) {
		rc = -ENOMEM;
		goto out_free;
	}

	mgssec = lsi->lsi_lmd->lmd_mgssec ? lsi->lsi_lmd->lmd_mgssec : "";

	data = kzalloc(sizeof(*data), GFP_NOFS);
	if (!data) {
		rc = -ENOMEM;
		goto out_free;
	}

	obd = class_name2obd(mgcname);
	if (obd && !obd->obd_stopping) {
		int recov_bk;

		rc = obd_set_info_async(NULL, obd->obd_self_export,
					strlen(KEY_MGSSEC), KEY_MGSSEC,
					strlen(mgssec), mgssec, NULL);
		if (rc)
			goto out_free;

		/* Re-using an existing MGC */
		atomic_inc(&obd->u.cli.cl_mgc_refcount);

		/* IR compatibility check, only for clients */
		if (lmd_is_client(lsi->lsi_lmd)) {
			int has_ir;
			int vallen = sizeof(*data);
			__u32 *flags = &lsi->lsi_lmd->lmd_flags;

			rc = obd_get_info(NULL, obd->obd_self_export,
					  strlen(KEY_CONN_DATA), KEY_CONN_DATA,
					  &vallen, data, NULL);
			LASSERT(rc == 0);
			has_ir = OCD_HAS_FLAG(data, IMP_RECOV);
			if (has_ir ^ !(*flags & LMD_FLG_NOIR)) {
				/* LMD_FLG_NOIR is for test purpose only */
				LCONSOLE_WARN(
					"Trying to mount a client with IR setting not compatible with current mgc. Force to use current mgc setting that is IR %s.\n",
					has_ir ? "enabled" : "disabled");
				if (has_ir)
					*flags &= ~LMD_FLG_NOIR;
				else
					*flags |= LMD_FLG_NOIR;
			}
		}

		recov_bk = 0;

		/* Try all connections, but only once (again).
		   We don't want to block another target from starting
		   (using its local copy of the log), but we do want to connect
		   if at all possible. */
		recov_bk++;
		CDEBUG(D_MOUNT, "%s: Set MGC reconnect %d\n", mgcname,
		       recov_bk);
		rc = obd_set_info_async(NULL, obd->obd_self_export,
					sizeof(KEY_INIT_RECOV_BACKUP),
					KEY_INIT_RECOV_BACKUP,
					sizeof(recov_bk), &recov_bk, NULL);
		rc = 0;
		goto out;
	}

	CDEBUG(D_MOUNT, "Start MGC '%s'\n", mgcname);

	/* Add the primary nids for the MGS */
	i = 0;
	/* Use nids from mount line: uml1,1@elan:uml2,2@elan:/lustre */
	ptr = lsi->lsi_lmd->lmd_dev;
	while (class_parse_nid(ptr, &nid, &ptr) == 0) {
		rc = do_lcfg(mgcname, nid,
			     LCFG_ADD_UUID, niduuid, NULL, NULL, NULL);
		i++;
		/* Stop at the first failover nid */
		if (*ptr == ':')
			break;
	}
	if (i == 0) {
		CERROR("No valid MGS nids found.\n");
		rc = -EINVAL;
		goto out_free;
	}
	lsi->lsi_lmd->lmd_mgs_failnodes = 1;

	/* Random uuid for MGC allows easier reconnects */
	uuid = kzalloc(sizeof(*uuid), GFP_NOFS);
	if (!uuid) {
		rc = -ENOMEM;
		goto out_free;
	}

	ll_generate_random_uuid(uuidc);
	class_uuid_unparse(uuidc, uuid);

	/* Start the MGC */
	rc = lustre_start_simple(mgcname, LUSTRE_MGC_NAME,
				 (char *)uuid->uuid, LUSTRE_MGS_OBDNAME,
				 niduuid, NULL, NULL);
	kfree(uuid);
	if (rc)
		goto out_free;

	/* Add any failover MGS nids */
	i = 1;
	while (ptr && ((*ptr == ':' ||
	       class_find_param(ptr, PARAM_MGSNODE, &ptr) == 0))) {
		/* New failover node */
		sprintf(niduuid, "%s_%x", mgcname, i);
		j = 0;
		while (class_parse_nid_quiet(ptr, &nid, &ptr) == 0) {
			j++;
			rc = do_lcfg(mgcname, nid,
				     LCFG_ADD_UUID, niduuid, NULL, NULL, NULL);
			if (*ptr == ':')
				break;
		}
		if (j > 0) {
			rc = do_lcfg(mgcname, 0, LCFG_ADD_CONN,
				     niduuid, NULL, NULL, NULL);
			i++;
		} else {
			/* at ":/fsname" */
			break;
		}
	}
	lsi->lsi_lmd->lmd_mgs_failnodes = i;

	obd = class_name2obd(mgcname);
	if (!obd) {
		CERROR("Can't find mgcobd %s\n", mgcname);
		rc = -ENOTCONN;
		goto out_free;
	}

	rc = obd_set_info_async(NULL, obd->obd_self_export,
				strlen(KEY_MGSSEC), KEY_MGSSEC,
				strlen(mgssec), mgssec, NULL);
	if (rc)
		goto out_free;

	/* Keep a refcount of servers/clients who started with "mount",
	   so we know when we can get rid of the mgc. */
	atomic_set(&obd->u.cli.cl_mgc_refcount, 1);

	/* We connect to the MGS at setup, and don't disconnect until cleanup */
	data->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_AT |
				  OBD_CONNECT_FULL20 | OBD_CONNECT_IMP_RECOV |
				  OBD_CONNECT_LVB_TYPE;

#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 2, 50, 0)
	data->ocd_connect_flags |= OBD_CONNECT_MNE_SWAB;
#else
#warning "LU-1644: Remove old OBD_CONNECT_MNE_SWAB fixup and imp_need_mne_swab"
#endif

	if (lmd_is_client(lsi->lsi_lmd) &&
	    lsi->lsi_lmd->lmd_flags & LMD_FLG_NOIR)
		data->ocd_connect_flags &= ~OBD_CONNECT_IMP_RECOV;
	data->ocd_version = LUSTRE_VERSION_CODE;
	rc = obd_connect(NULL, &exp, obd, &(obd->obd_uuid), data, NULL);
	if (rc) {
		CERROR("connect failed %d\n", rc);
		goto out;
	}

	obd->u.cli.cl_mgc_mgsexp = exp;

out:
	/* Keep the mgc info in the sb. Note that many lsi's can point
	   to the same mgc.*/
	lsi->lsi_mgc = obd;
out_free:
	mutex_unlock(&mgc_start_lock);

	kfree(data);
	kfree(mgcname);
	kfree(niduuid);
	return rc;
}
Exemple #14
0
/* Called whenever a target starts up.  Flags indicate first connect, etc. */
static int mgs_handle_target_reg(struct ptlrpc_request *req)
{
        struct obd_device *obd = req->rq_export->exp_obd;
        struct mgs_target_info *mti, *rep_mti;
        struct fs_db *fsdb;
        int opc;
        int rc = 0;
        ENTRY;

        mgs_counter_incr(req->rq_export, LPROC_MGS_TARGET_REG);

        mti = req_capsule_client_get(&req->rq_pill, &RMF_MGS_TARGET_INFO);

        opc = mti->mti_flags & LDD_F_OPC_MASK;
        if (opc == LDD_F_OPC_READY) {
                CDEBUG(D_MGS, "fs: %s index: %d is ready to reconnect.\n",
                       mti->mti_fsname, mti->mti_stripe_index);
                rc = mgs_ir_update(obd, mti);
                if (rc) {
                        LASSERT(!(mti->mti_flags & LDD_F_IR_CAPABLE));
                        CERROR("Update IR return with %d(ignore and IR "
                               "disabled)\n", rc);
                }
                GOTO(out_nolock, rc);
        }

        /* Do not support unregistering right now. */
        if (opc != LDD_F_OPC_REG)
                GOTO(out_nolock, rc = -EINVAL);

        CDEBUG(D_MGS, "fs: %s index: %d is registered to MGS.\n",
               mti->mti_fsname, mti->mti_stripe_index);

        if (mti->mti_flags & LDD_F_NEED_INDEX)
                mti->mti_flags |= LDD_F_WRITECONF;

        if (!(mti->mti_flags & (LDD_F_WRITECONF | LDD_F_UPGRADE14 |
                                LDD_F_UPDATE))) {
                /* We're just here as a startup ping. */
                CDEBUG(D_MGS, "Server %s is running on %s\n",
                       mti->mti_svname, obd_export_nid2str(req->rq_export));
                rc = mgs_check_target(obd, mti);
                /* above will set appropriate mti flags */
                if (rc <= 0)
                        /* Nothing wrong, or fatal error */
                        GOTO(out_nolock, rc);
        } else {
                if (!(mti->mti_flags & LDD_F_NO_PRIMNODE)
                    && (rc = mgs_check_failover_reg(mti)))
                        GOTO(out_nolock, rc);
        }

        OBD_FAIL_TIMEOUT(OBD_FAIL_MGS_PAUSE_TARGET_REG, 10);

        if (mti->mti_flags & LDD_F_WRITECONF) {
                if (mti->mti_flags & LDD_F_SV_TYPE_MDT &&
                    mti->mti_stripe_index == 0) {
                        rc = mgs_erase_logs(obd, mti->mti_fsname);
                        LCONSOLE_WARN("%s: Logs for fs %s were removed by user "
                                      "request.  All servers must be restarted "
                                      "in order to regenerate the logs."
                                      "\n", obd->obd_name, mti->mti_fsname);
                } else if (mti->mti_flags &
                           (LDD_F_SV_TYPE_OST | LDD_F_SV_TYPE_MDT)) {
                        rc = mgs_erase_log(obd, mti->mti_svname);
                        LCONSOLE_WARN("%s: Regenerating %s log by user "
                                      "request.\n",
                                      obd->obd_name, mti->mti_svname);
                }
                mti->mti_flags |= LDD_F_UPDATE;
                /* Erased logs means start from scratch. */
                mti->mti_flags &= ~LDD_F_UPGRADE14;
        }

        rc = mgs_find_or_make_fsdb(obd, mti->mti_fsname, &fsdb);
        if (rc) {
                CERROR("Can't get db for %s: %d\n", mti->mti_fsname, rc);
                GOTO(out_nolock, rc);
        }

        /*
         * Log writing contention is handled by the fsdb_mutex.
         *
         * It should be alright if someone was reading while we were
         * updating the logs - if we revoke at the end they will just update
         * from where they left off.
         */

        /* COMPAT_146 */
        if (mti->mti_flags & LDD_F_UPGRADE14) {
                rc = mgs_upgrade_sv_14(obd, mti, fsdb);
                if (rc) {
                        CERROR("Can't upgrade from 1.4 (%d)\n", rc);
                        GOTO(out, rc);
                }

                /* We're good to go */
                mti->mti_flags |= LDD_F_UPDATE;
        }
        /* end COMPAT_146 */

        if (mti->mti_flags & LDD_F_UPDATE) {
                CDEBUG(D_MGS, "updating %s, index=%d\n", mti->mti_svname,
                       mti->mti_stripe_index);

                /* create or update the target log
                   and update the client/mdt logs */
                rc = mgs_write_log_target(obd, mti, fsdb);
                if (rc) {
                        CERROR("Failed to write %s log (%d)\n",
                               mti->mti_svname, rc);
                        GOTO(out, rc);
                }

                mti->mti_flags &= ~(LDD_F_VIRGIN | LDD_F_UPDATE |
                                    LDD_F_NEED_INDEX | LDD_F_WRITECONF |
                                    LDD_F_UPGRADE14);
                mti->mti_flags |= LDD_F_REWRITE_LDD;
        }

out:
        mgs_revoke_lock(obd, fsdb, CONFIG_T_CONFIG);

out_nolock:
        CDEBUG(D_MGS, "replying with %s, index=%d, rc=%d\n", mti->mti_svname,
               mti->mti_stripe_index, rc);
        req->rq_status = rc;
        if (rc)
                /* we need an error flag to tell the target what's going on,
                 * instead of just doing it by error code only. */
                mti->mti_flags |= LDD_F_ERROR;

        rc = req_capsule_server_pack(&req->rq_pill);
        if (rc)
                RETURN(rc);

        /* send back the whole mti in the reply */
        rep_mti = req_capsule_server_get(&req->rq_pill, &RMF_MGS_TARGET_INFO);
        *rep_mti = *mti;

        /* Flush logs to disk */
        fsfilt_sync(obd, obd->u.mgs.mgs_sb);
        RETURN(rc);
}