示例#1
0
/*
 * Construct the pathname of the state file and return a pointer to
 * caller.  Read the config file to get the mount point of the
 * filesystem and the pathname within fs.
 */
char *
cpr_build_statefile_path(void)
{
	struct cprconfig *cf = &cprconfig;

	if (cpr_get_config())
		return (NULL);

	switch (cf->cf_type) {
	case CFT_UFS:
		if (strlen(cf->cf_path) + strlen(cf->cf_fs) >= MAXNAMELEN - 1) {
			cpr_err(CE_CONT, "Statefile path is too long.\n");
			return (NULL);
		}
		return (cpr_cprconfig_to_path());
	case CFT_ZVOL:
		/*FALLTHROUGH*/
	case CFT_SPEC:
		return (cf->cf_devfs);
	default:
		cpr_err(CE_PANIC, "invalid statefile type");
		/*NOTREACHED*/
		return (NULL);
	}
}
示例#2
0
int
cpr_init(int fcn)
{
	/*
	 * Allow only one suspend/resume process.
	 */
	if (mutex_tryenter(&cpr_slock) == 0)
		return (EBUSY);

	CPR->c_flags = 0;
	CPR->c_substate = 0;
	CPR->c_cprboot_magic = 0;
	CPR->c_alloc_cnt = 0;

	CPR->c_fcn = fcn;
	if (fcn == AD_CPR_REUSABLE)
		CPR->c_flags |= C_REUSABLE;
	else
		CPR->c_flags |= C_SUSPENDING;
	if (fcn != AD_CPR_NOCOMPRESS && fcn != AD_CPR_TESTNOZ)
		CPR->c_flags |= C_COMPRESSING;
	/*
	 * reserve CPR_MAXCONTIG virtual pages for cpr_dump()
	 */
	CPR->c_mapping_area = i_cpr_map_setup();
	if (CPR->c_mapping_area == 0) {		/* no space in kernelmap */
		cpr_err(CE_CONT, "Unable to alloc from kernelmap.\n");
		mutex_exit(&cpr_slock);
		return (EAGAIN);
	}
	DEBUG3(cpr_err(CE_CONT, "Reserved virtual range from 0x%p for writing "
	    "kas\n", (void *)CPR->c_mapping_area));

	return (0);
}
示例#3
0
int
i_cpr_reusefini(void)
{
	struct vnode *vp;
	cdef_t *cdef;
	size_t size;
	char *bufp;
	int rc;

	if (cpr_reusable_mode)
		cpr_reusable_mode = 0;

	if (rc = cpr_open_deffile(FREAD|FWRITE, &vp)) {
		if (rc == EROFS) {
			cpr_err(CE_CONT, "uadmin A_FREEZE AD_REUSEFINI "
			    "(uadmin %d %d)\nmust be done with / mounted "
			    "writeable.\n", A_FREEZE, AD_REUSEFINI);
		}
		return (rc);
	}

	cdef = kmem_alloc(sizeof (*cdef), KM_SLEEP);
	rc = cpr_rdwr(UIO_READ, vp, cdef, sizeof (*cdef));

	if (rc) {
		cpr_err(CE_WARN, "Failed reading %s, errno = %d",
		    cpr_default_path, rc);
	} else if (cdef->mini.magic != CPR_DEFAULT_MAGIC) {
		cpr_err(CE_WARN, "bad magic number in %s, cannot restore "
		    "prom values for %s", cpr_default_path,
		    cpr_enumerate_promprops(&bufp, &size));
		kmem_free(bufp, size);
		rc = EINVAL;
	} else {
		/*
		 * clean up prom properties
		 */
		rc = cpr_update_nvram(cdef->props);
		if (rc == 0) {
			/*
			 * invalidate the disk copy and turn off reusable
			 */
			cdef->mini.magic = 0;
			cdef->mini.reusable = 0;
			if (rc = cpr_rdwr(UIO_WRITE, vp,
			    &cdef->mini, sizeof (cdef->mini))) {
				cpr_err(CE_WARN, "Failed writing %s, errno %d",
				    cpr_default_path, rc);
			}
		}
	}

	(void) VOP_CLOSE(vp, FREAD|FWRITE, 1, (offset_t)0, CRED(), NULL);
	VN_RELE(vp);
	kmem_free(cdef, sizeof (*cdef));

	return (rc);
}
/*
 * write new or original values to nvram
 */
int
cpr_update_nvram(cprop_t *props)
{
	cprop_t *tail;
	pnode_t node;
	int len, rc;

	if (rc = cpr_get_options_node(&node))
		return (rc);

	if (cpr_show_props)
		prom_printf("\ncpr_show_props:\n");
	for (tail = props + CPR_MAXPROP; props < tail; props++) {
		if (cpr_show_props) {
			prom_printf("mod=%c, name \"%s\",\tvalue \"%s\"\n",
			    props->mod, props->name, props->value);
		}
		if (props->mod == PROP_NOMOD)
			continue;
		/*
		 * Note: When doing a prom_setprop you must include the
		 * trailing NULL in the length argument, but when calling
		 * prom_getproplen() the NULL is excluded from the count!
		 */
		len = strlen(props->value);
		rc = prom_setprop(node, props->name, props->value, len + 1);
		if (rc < 0 || prom_getproplen(node, props->name) != len) {
			cpr_err(CE_WARN, "cannot set nvram \"%s\" to \"%s\"",
			    props->name, props->value);
			return (ENXIO);
		}
	}

	return (0);
}
示例#5
0
/*
 * Checks and makes sure all user threads are stopped
 */
static int
cpr_check_user_threads()
{
	kthread_id_t tp;
	int rc = 0;

	mutex_enter(&pidlock);
	tp = curthread->t_next;
	do {
		if (ttoproc(tp)->p_as == &kas || ttoproc(tp)->p_stat == SZOMB)
			continue;

		thread_lock(tp);
		/*
		 * make sure that we are off all the queues and in a stopped
		 * state.
		 */
		if (!CPR_ISTOPPED(tp)) {
			thread_unlock(tp);
			mutex_exit(&pidlock);

			if (count == CPR_UTSTOP_RETRY) {
			DEBUG1(errp("Suspend failed: cannt stop "
				"uthread\n"));
			cpr_err(CE_WARN, "Suspend cannot stop "
				"process %s (%p:%x).",
				ttoproc(tp)->p_user.u_psargs, (void *)tp,
				tp->t_state);
			cpr_err(CE_WARN, "Process may be waiting for"
				" network request, please try again.");
			}

			DEBUG2(errp("cant stop t=%p state=%x pfg=%x sched=%x\n",
			tp, tp->t_state, tp->t_proc_flag, tp->t_schedflag));
			DEBUG2(errp("proc %p state=%x pid=%d\n",
				ttoproc(tp), ttoproc(tp)->p_stat,
				ttoproc(tp)->p_pidp->pid_id));
			return (1);
		}
		thread_unlock(tp);

	} while ((tp = tp->t_next) != curthread && rc == 0);

	mutex_exit(&pidlock);
	return (0);
}
示例#6
0
/*
 * Stop kernel threads by using the callback mechanism.  If any thread
 * cannot be stopped, return failure.
 */
int
cpr_stop_kernel_threads(void)
{
	caddr_t	name;
	kthread_id_t tp;
	proc_t *p;

	callb_lock_table();	/* Note: we unlock the table in resume. */

	DEBUG1(errp("stopping kernel daemons..."));
	if ((name = callb_execute_class(CB_CL_CPR_DAEMON,
	    CB_CODE_CPR_CHKPT)) != (caddr_t)NULL) {
		cpr_err(CE_WARN,
		    "Could not stop \"%s\" kernel thread.  "
		    "Please try again later.", name);
		return (EBUSY);
	}

	/*
	 * We think we stopped all the kernel threads.  Just in case
	 * someone is not playing by the rules, take a spin through
	 * the threadlist and see if we can account for everybody.
	 */
	mutex_enter(&pidlock);
	tp = curthread->t_next;
	do {
		p = ttoproc(tp);
		if (p->p_as != &kas)
			continue;

		if (tp->t_flag & T_INTR_THREAD)
			continue;

		if (! callb_is_stopped(tp, &name)) {
			mutex_exit(&pidlock);
			cpr_err(CE_WARN,
			    "\"%s\" kernel thread not stopped.", name);
			return (EBUSY);
		}
	} while ((tp = tp->t_next) != curthread);
	mutex_exit(&pidlock);

	DEBUG1(errp("done\n"));
	return (0);
}
示例#7
0
/*
 * open cpr default file and display error
 */
int
cpr_open_deffile(int mode, vnode_t **vpp)
{
	int error;

	if (error = cpr_open(cpr_default_path, mode, vpp))
		cpr_err(CE_CONT, "cannot open \"%s\", error %d\n",
		    cpr_default_path, error);
	return (error);
}
static int
cpr_get_options_node(pnode_t *nodep)
{
	*nodep = prom_optionsnode();
	if (*nodep == OBP_NONODE || *nodep == OBP_BADNODE) {
		cpr_err(CE_WARN, "cannot get \"options\" node");
		return (ENOENT);
	}

	return (0);
}
示例#9
0
/*
 * Allocate bitmaps according to the phys_install list.
 */
static int
i_cpr_bitmap_setup(void)
{
	struct memlist *pmem;
	cbd_t *dp, *tail;
	void *space;
	size_t size;

	/*
	 * The number of bitmap descriptors will be the count of
	 * phys_install ranges plus 1 for a trailing NULL struct.
	 */
	cpr_nbitmaps = 1;
	for (pmem = phys_install; pmem; pmem = pmem->ml_next)
		cpr_nbitmaps++;

	if (cpr_nbitmaps > (CPR_MAX_BMDESC - 1)) {
		cpr_err(CE_WARN, "too many physical memory ranges %d, max %d",
		    cpr_nbitmaps, CPR_MAX_BMDESC - 1);
		return (EFBIG);
	}

	/* Alloc an array of bitmap descriptors. */
	dp = kmem_zalloc(cpr_nbitmaps * sizeof (*dp), KM_NOSLEEP);
	if (dp == NULL) {
		cpr_nbitmaps = 0;
		return (ENOMEM);
	}
	tail = dp + cpr_nbitmaps;

	CPR->c_bmda = dp;
	for (pmem = phys_install; pmem; pmem = pmem->ml_next) {
		size = BITMAP_BYTES(pmem->ml_size);
		space = kmem_zalloc(size * 2, KM_NOSLEEP);
		if (space == NULL)
			return (ENOMEM);
		ASSERT(dp < tail);
		dp->cbd_magic = CPR_BITMAP_MAGIC;
		dp->cbd_spfn = mmu_btop(pmem->ml_address);
		dp->cbd_epfn = mmu_btop(pmem->ml_address + pmem->ml_size) - 1;
		dp->cbd_size = size;
		dp->cbd_reg_bitmap = (cpr_ptr)space;
		dp->cbd_vlt_bitmap = (cpr_ptr)((caddr_t)space + size);
		dp++;
	}

	/* set magic for the last descriptor */
	ASSERT(dp == (tail - 1));
	dp->cbd_magic = CPR_BITMAP_MAGIC;

	return (0);
}
示例#10
0
/*
 * Save miscellaneous information which needs to be written to the
 * state file.  This information is required to re-initialize
 * kernel/prom handshaking.
 */
void
i_cpr_save_machdep_info(void)
{
	CPR_DEBUG(CPR_DEBUG5, "jumpback size = 0x%lx\n",
	    (uintptr_t)&i_cpr_end_jumpback -
	    (uintptr_t)i_cpr_resume_setup);

	/*
	 * Verify the jumpback code all falls in one page.
	 */
	if (((uintptr_t)&i_cpr_end_jumpback & MMU_PAGEMASK) !=
	    ((uintptr_t)i_cpr_resume_setup & MMU_PAGEMASK))
		cpr_err(CE_PANIC, "jumpback code exceeds one page.");
}
示例#11
0
/*
 * reads config data into cprconfig
 */
static int
cpr_get_config(void)
{
	static char config_path[] = CPR_CONFIG;
	struct cprconfig *cf = &cprconfig;
	struct vnode *vp;
	char *fmt;
	int err;

	if (cprconfig_loaded)
		return (0);

	fmt = "cannot %s config file \"%s\", error %d\n";
	if (err = vn_open(config_path, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0)) {
		cpr_err(CE_CONT, fmt, "open", config_path, err);
		return (err);
	}

	err = cpr_rdwr(UIO_READ, vp, cf, sizeof (*cf));
	(void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
	VN_RELE(vp);
	if (err) {
		cpr_err(CE_CONT, fmt, "read", config_path, err);
		return (err);
	}

	if (cf->cf_magic == CPR_CONFIG_MAGIC)
		cprconfig_loaded = 1;
	else {
		cpr_err(CE_CONT, "invalid config file \"%s\", "
		    "rerun pmconfig(1M)\n", config_path);
		err = EINVAL;
	}

	return (err);
}
示例#12
0
int
i_cpr_check_cprinfo(void)
{
	struct vnode *vp;
	cmini_t mini;
	int rc = 0;

	if (rc = cpr_open_deffile(FREAD, &vp)) {
		if (rc == ENOENT)
			cpr_err(CE_NOTE, "cprinfo file does not "
			    "exist.  You must run 'uadmin %d %d' "
			    "command while / is mounted writeable,\n"
			    "then reboot and run 'uadmin %d %d' "
			    "to create a reusable statefile",
			    A_FREEZE, AD_REUSEINIT, A_FREEZE, AD_REUSABLE);
		return (rc);
	}

	rc = cpr_rdwr(UIO_READ, vp, &mini, sizeof (mini));
	(void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
	VN_RELE(vp);

	if (rc) {
		cpr_err(CE_WARN, "Failed reading %s, errno = %d",
		    cpr_default_path, rc);
	} else if (mini.magic != CPR_DEFAULT_MAGIC) {
		cpr_err(CE_CONT, "bad magic number in cprinfo file.\n"
		    "You must run 'uadmin %d %d' while / is mounted "
		    "writeable, then reboot and run 'uadmin %d %d' "
		    "to create a reusable statefile\n",
		    A_FREEZE, AD_REUSEINIT, A_FREEZE, AD_REUSABLE);
		rc = EINVAL;
	}

	return (rc);
}
示例#13
0
/*
 * change cpu to online/offline
 */
static int
cpr_p_online(cpu_t *cp, int state)
{
	int rc;

	ASSERT(MUTEX_HELD(&cpu_lock));

	switch (state) {
	case CPU_CPR_ONLINE:
		rc = cpu_online(cp);
		break;
	case CPU_CPR_OFFLINE:
		rc = cpu_offline(cp, CPU_FORCED);
		break;
	}
	if (rc) {
		cpr_err(CE_WARN, "Failed to change processor %d to "
		    "state %d, (errno %d)", cp->cpu_id, state, rc);
	}
	return (rc);
}
示例#14
0
/*
 * This is a list of file systems that are allowed to be writeable when a
 * reusable statefile checkpoint is taken.  They must not have any state that
 * cannot be restored to consistency by simply rebooting using the checkpoint.
 * (In contrast to ufs, cachefs and pcfs which have disk state that could get
 * out of sync with the in-kernel data).
 */
int
cpr_reusable_mount_check(void)
{
	struct vfs *vfsp;
	char *fsname;
	char **cpp;
	static char *cpr_writeok_fss[] = {
		"autofs", "devfs", "fd", "lofs", "mntfs", "namefs", "nfs",
		"proc", "tmpfs", "ctfs", "objfs", "dev", NULL
	};

	vfs_list_read_lock();
	vfsp = rootvfs;
	do {
		if (vfsp->vfs_flag & VFS_RDONLY) {
			vfsp = vfsp->vfs_next;
			continue;
		}
		fsname = vfssw[vfsp->vfs_fstype].vsw_name;
		for (cpp = cpr_writeok_fss; *cpp; cpp++) {
			if (strcmp(fsname, *cpp) == 0)
				break;
		}
		/*
		 * if the inner loop reached the NULL terminator,
		 * the current fs-type does not match any OK-type
		 */
		if (*cpp == NULL) {
			cpr_err(CE_CONT, "a filesystem of type %s is "
			    "mounted read/write.\nReusable statefile requires "
			    "no writeable filesystem of this type be mounted\n",
			    fsname);
			vfs_list_unlock();
			return (EINVAL);
		}
		vfsp = vfsp->vfs_next;
	} while (vfsp != rootvfs);
	vfs_list_unlock();
	return (0);
}
示例#15
0
/*
 * This routine releases any resources used during the checkpoint.
 */
void
cpr_done(void)
{
	cpr_stat_cleanup();
	i_cpr_bitmap_cleanup();

	/*
	 * Free pages used by cpr buffers.
	 */
	if (cpr_buf) {
		kmem_free(cpr_buf, cpr_buf_size);
		cpr_buf = NULL;
	}
	if (cpr_pagedata) {
		kmem_free(cpr_pagedata, cpr_pagedata_size);
		cpr_pagedata = NULL;
	}

	i_cpr_free_memory_resources();
	mutex_exit(&cpr_slock);
	cpr_err(CE_CONT, "System has been resumed.\n");
}
示例#16
0
/*
 * increase statefile size
 */
static int
cpr_grow_statefile(vnode_t *vp, u_longlong_t newsize)
{
	extern uchar_t cpr_pagecopy[];
	struct inode *ip = VTOI(vp);
	u_longlong_t offset;
	int error, increase;
	ssize_t resid;

	rw_enter(&ip->i_contents, RW_READER);
	increase = (ip->i_size < newsize);
	offset = ip->i_size;
	rw_exit(&ip->i_contents);

	if (increase == 0)
		return (0);

	/*
	 * write to each logical block to reserve disk space
	 */
	error = 0;
	cpr_pagecopy[0] = '1';
	for (; offset < newsize; offset += ip->i_fs->fs_bsize) {
		if (error = vn_rdwr(UIO_WRITE, vp, (caddr_t)cpr_pagecopy,
		    ip->i_fs->fs_bsize, (offset_t)offset, UIO_SYSSPACE, 0,
		    (rlim64_t)MAXOFF_T, CRED(), &resid)) {
			if (error == ENOSPC) {
				cpr_err(CE_WARN, "error %d while reserving "
				    "disk space for statefile %s\n"
				    "wanted %lld bytes, file is %lld short",
				    error, cpr_cprconfig_to_path(),
				    newsize, newsize - offset);
			}
			break;
		}
	}
	return (error);
}
示例#17
0
/*
 * write cdef_t to disk.  This contains the original values of prom
 * properties that we modify.  We fill in the magic number of the file
 * here as a signal to the booter code that the state file is valid.
 * Be sure the file gets synced, since we may be shutting down the OS.
 */
int
cpr_write_deffile(cdef_t *cdef)
{
	struct vnode *vp;
	char *str;
	int rc;

	if (rc = cpr_open_deffile(FCREAT|FWRITE, &vp))
		return (rc);

	if (rc = cpr_rdwr(UIO_WRITE, vp, cdef, sizeof (*cdef)))
		str = "write";
	else if (rc = VOP_FSYNC(vp, FSYNC, CRED(), NULL))
		str = "fsync";
	(void) VOP_CLOSE(vp, FWRITE, 1, (offset_t)0, CRED(), NULL);
	VN_RELE(vp);

	if (rc) {
		cpr_err(CE_WARN, "%s error %d, file \"%s\"",
		    str, rc, cpr_default_path);
	}
	return (rc);
}
示例#18
0
int
i_cpr_reuseinit(void)
{
	int rc = 0;

	if (rc = cpr_default_setup(1))
		return (rc);

	/*
	 * We need to validate default file
	 */
	rc = cpr_validate_definfo(1);
	if (rc == 0)
		cpr_reusable_mode = 1;
	else if (rc == EROFS) {
		cpr_err(CE_NOTE, "reuseinit must be performed "
		    "while / is mounted writeable");
	}

	(void) cpr_default_setup(0);

	return (rc);
}
/*
 * Top level routine to direct suspend/resume of a domain.
 */
void
xen_suspend_domain(void)
{
	extern void rtcsync(void);
	extern hrtime_t hres_last_tick;
	mfn_t start_info_mfn;
	ulong_t flags;
	pfn_t pfn;
	int i;

	/*
	 * Check that we are happy to suspend on this hypervisor.
	 */
	if (xen_hypervisor_supports_solaris(XEN_SUSPEND_CHECK) == 0) {
		cpr_err(CE_WARN, "Cannot suspend on this hypervisor "
		    "version: v%lu.%lu%s, need at least version v3.0.4 or "
		    "-xvm based hypervisor", XENVER_CURRENT(xv_major),
		    XENVER_CURRENT(xv_minor), XENVER_CURRENT(xv_ver));
		return;
	}

	/*
	 * XXPV - Are we definitely OK to suspend by the time we've connected
	 * the handler?
	 */

	cpr_err(CE_NOTE, "Domain suspending for save/migrate");

	SUSPEND_DEBUG("xen_suspend_domain\n");

	/*
	 * suspend interrupts and devices
	 * XXPV - we use suspend/resume for both save/restore domains (like sun
	 * cpr) and for migration.  Would be nice to know the difference if
	 * possible.  For save/restore where down time may be a long time, we
	 * may want to do more of the things that cpr does.  (i.e. notify user
	 * processes, shrink memory footprint for faster restore, etc.)
	 */
	xen_suspend_devices();
	SUSPEND_DEBUG("xenbus_suspend\n");
	xenbus_suspend();

	pfn = hat_getpfnum(kas.a_hat, (caddr_t)xen_info);
	start_info_mfn = pfn_to_mfn(pfn);

	/*
	 * XXPV: cpu hotplug can hold this under a xenbus watch. Are we safe
	 * wrt xenbus being suspended here?
	 */
	mutex_enter(&cpu_lock);

	/*
	 * Suspend must be done on vcpu 0, as no context for other CPUs is
	 * saved.
	 *
	 * XXPV - add to taskq API ?
	 */
	thread_affinity_set(curthread, 0);
	kpreempt_disable();

	SUSPEND_DEBUG("xen_start_migrate\n");
	xen_start_migrate();
	if (ncpus > 1)
		suspend_cpus();

	/*
	 * We can grab the ec_lock as it's a spinlock with a high SPL. Hence
	 * any holder would have dropped it to get through suspend_cpus().
	 */
	mutex_enter(&ec_lock);

	/*
	 * From here on in, we can't take locks.
	 */
	SUSPEND_DEBUG("ec_suspend\n");
	ec_suspend();
	SUSPEND_DEBUG("gnttab_suspend\n");
	gnttab_suspend();

	flags = intr_clear();

	xpv_time_suspend();

	/*
	 * Currently, the hypervisor incorrectly fails to bring back
	 * powered-down VCPUs.  Thus we need to record any powered-down VCPUs
	 * to prevent any attempts to operate on them.  But we have to do this
	 * *after* the very first time we do ec_suspend().
	 */
	for (i = 1; i < ncpus; i++) {
		if (cpu[i] == NULL)
			continue;

		if (cpu_get_state(cpu[i]) == P_POWEROFF)
			CPUSET_ATOMIC_ADD(cpu_suspend_lost_set, i);
	}

	/*
	 * The dom0 save/migrate code doesn't automatically translate
	 * these into PFNs, but expects them to be, so we do it here.
	 * We don't use mfn_to_pfn() because so many OS services have
	 * been disabled at this point.
	 */
	xen_info->store_mfn = mfn_to_pfn_mapping[xen_info->store_mfn];
	xen_info->console.domU.mfn =
	    mfn_to_pfn_mapping[xen_info->console.domU.mfn];

	if (CPU->cpu_m.mcpu_vcpu_info->evtchn_upcall_mask == 0) {
		prom_printf("xen_suspend_domain(): "
		    "CPU->cpu_m.mcpu_vcpu_info->evtchn_upcall_mask not set\n");
		(void) HYPERVISOR_shutdown(SHUTDOWN_crash);
	}

	if (HYPERVISOR_update_va_mapping((uintptr_t)HYPERVISOR_shared_info,
	    0, UVMF_INVLPG)) {
		prom_printf("xen_suspend_domain(): "
		    "HYPERVISOR_update_va_mapping() failed\n");
		(void) HYPERVISOR_shutdown(SHUTDOWN_crash);
	}

	SUSPEND_DEBUG("HYPERVISOR_suspend\n");

	/*
	 * At this point we suspend and sometime later resume.
	 */
	if (HYPERVISOR_suspend(start_info_mfn)) {
		prom_printf("xen_suspend_domain(): "
		    "HYPERVISOR_suspend() failed\n");
		(void) HYPERVISOR_shutdown(SHUTDOWN_crash);
	}

	/*
	 * Point HYPERVISOR_shared_info to its new value.
	 */
	if (HYPERVISOR_update_va_mapping((uintptr_t)HYPERVISOR_shared_info,
	    xen_info->shared_info | PT_NOCONSIST | PT_VALID | PT_WRITABLE,
	    UVMF_INVLPG))
		(void) HYPERVISOR_shutdown(SHUTDOWN_crash);

	if (xen_info->nr_pages != mfn_count) {
		prom_printf("xen_suspend_domain(): number of pages"
		    " changed, was 0x%lx, now 0x%lx\n", mfn_count,
		    xen_info->nr_pages);
		(void) HYPERVISOR_shutdown(SHUTDOWN_crash);
	}

	xpv_time_resume();

	cached_max_mfn = 0;

	SUSPEND_DEBUG("gnttab_resume\n");
	gnttab_resume();

	/* XXPV: add a note that this must be lockless. */
	SUSPEND_DEBUG("ec_resume\n");
	ec_resume();

	intr_restore(flags);

	if (ncpus > 1)
		resume_cpus();

	mutex_exit(&ec_lock);
	xen_end_migrate();
	mutex_exit(&cpu_lock);

	/*
	 * Now we can take locks again.
	 */

	/*
	 * Force the tick value used for tv_nsec in hres_tick() to be up to
	 * date. rtcsync() will reset the hrestime value appropriately.
	 */
	hres_last_tick = xpv_gethrtime();

	/*
	 * XXPV: we need to have resumed the CPUs since this takes locks, but
	 * can remote CPUs see bad state? Presumably yes. Should probably nest
	 * taking of todlock inside of cpu_lock, or vice versa, then provide an
	 * unlocked version.  Probably need to call clkinitf to reset cpu freq
	 * and re-calibrate if we migrated to a different speed cpu.  Also need
	 * to make a (re)init_cpu_info call to update processor info structs
	 * and device tree info.  That remains to be written at the moment.
	 */
	rtcsync();

	rebuild_mfn_list();

	SUSPEND_DEBUG("xenbus_resume\n");
	xenbus_resume();
	SUSPEND_DEBUG("xenbus_resume_devices\n");
	xen_resume_devices();

	thread_affinity_clear(curthread);
	kpreempt_enable();

	SUSPEND_DEBUG("finished xen_suspend_domain\n");

	/*
	 * We have restarted our suspended domain, update the hypervisor
	 * details. NB: This must be done at the end of this function,
	 * since we need the domain to be completely resumed before
	 * these functions will work correctly.
	 */
	xen_set_version(XENVER_CURRENT_IDX);

	/*
	 * We can check and report a warning, but we don't stop the
	 * process.
	 */
	if (xen_hypervisor_supports_solaris(XEN_SUSPEND_CHECK) == 0)
		cmn_err(CE_WARN, "Found hypervisor version: v%lu.%lu%s "
		    "but need at least version v3.0.4",
		    XENVER_CURRENT(xv_major), XENVER_CURRENT(xv_minor),
		    XENVER_CURRENT(xv_ver));

	cmn_err(CE_NOTE, "domain restore/migrate completed");
}
示例#20
0
/*
 * Verify that the information in the configuration file regarding the
 * location for the statefile is still valid, depending on cf_type.
 * for CFT_UFS, cf_fs must still be a mounted filesystem, it must be
 *	mounted on the same device as when pmconfig was last run,
 *	and the translation of that device to a node in the prom's
 *	device tree must be the same as when pmconfig was last run.
 * for CFT_SPEC and CFT_ZVOL, cf_path must be the path to a block
 *      special file, it must have no file system mounted on it,
 *	and the translation of that device to a node in the prom's
 *	device tree must be the same as when pmconfig was last run.
 */
static int
cpr_verify_statefile_path(void)
{
	struct cprconfig *cf = &cprconfig;
	static const char long_name[] = "Statefile pathname is too long.\n";
	static const char lookup_fmt[] = "Lookup failed for "
	    "cpr statefile device %s.\n";
	static const char path_chg_fmt[] = "Device path for statefile "
	    "has changed from %s to %s.\t%s\n";
	static const char rerun[] = "Please rerun pmconfig(1m).";
	struct vfs *vfsp = NULL, *vfsp_save = rootvfs;
	ufsvfs_t *ufsvfsp = (ufsvfs_t *)rootvfs->vfs_data;
	ufsvfs_t *ufsvfsp_save = ufsvfsp;
	int error;
	struct vnode *vp;
	char *slash, *tail, *longest;
	char *errstr;
	int found = 0;
	union {
		char un_devpath[OBP_MAXPATHLEN];
		char un_sfpath[MAXNAMELEN];
	} un;
#define	devpath	un.un_devpath
#define	sfpath	un.un_sfpath

	ASSERT(cprconfig_loaded);
	/*
	 * We need not worry about locking or the timing of releasing
	 * the vnode, since we are single-threaded now.
	 */

	switch (cf->cf_type) {
	case CFT_SPEC:
		error = i_devname_to_promname(cf->cf_devfs, devpath,
		    OBP_MAXPATHLEN);
		if (error || strcmp(devpath, cf->cf_dev_prom)) {
			cpr_err(CE_CONT, path_chg_fmt,
			    cf->cf_dev_prom, devpath, rerun);
			return (error);
		}
		/*FALLTHROUGH*/
	case CFT_ZVOL:
		if (strlen(cf->cf_path) > sizeof (sfpath)) {
			cpr_err(CE_CONT, long_name);
			return (ENAMETOOLONG);
		}
		if ((error = lookupname(cf->cf_devfs,
		    UIO_SYSSPACE, FOLLOW, NULLVPP, &vp)) != 0) {
			cpr_err(CE_CONT, lookup_fmt, cf->cf_devfs);
			return (error);
		}
		if (vp->v_type != VBLK)
			errstr = "statefile must be a block device";
		else if (vfs_devismounted(vp->v_rdev))
			errstr = "statefile device must not "
			    "have a file system mounted on it";
		else if (IS_SWAPVP(vp))
			errstr = "statefile device must not "
			    "be configured as swap file";
		else
			errstr = NULL;

		VN_RELE(vp);
		if (errstr) {
			cpr_err(CE_CONT, "%s.\n", errstr);
			return (ENOTSUP);
		}

		return (error);
	case CFT_UFS:
		break;		/* don't indent all the original code */
	default:
		cpr_err(CE_PANIC, "invalid cf_type");
	}

	/*
	 * The original code for UFS statefile
	 */
	if (strlen(cf->cf_fs) + strlen(cf->cf_path) + 2 > sizeof (sfpath)) {
		cpr_err(CE_CONT, long_name);
		return (ENAMETOOLONG);
	}

	bzero(sfpath, sizeof (sfpath));
	(void) strcpy(sfpath, cpr_cprconfig_to_path());

	if (*sfpath != '/') {
		cpr_err(CE_CONT, "Statefile pathname %s "
		    "must begin with a /\n", sfpath);
		return (EINVAL);
	}

	/*
	 * Find the longest prefix of the statefile pathname which
	 * is the mountpoint of a filesystem.  This string must
	 * match the cf_fs field we read from the config file.  Other-
	 * wise the user has changed things without running pmconfig.
	 */
	tail = longest = sfpath + 1;	/* pt beyond the leading "/" */
	while ((slash = strchr(tail, '/')) != NULL) {
		*slash = '\0';	  /* temporarily terminate the string */
		if ((error = lookupname(sfpath,
		    UIO_SYSSPACE, FOLLOW, NULLVPP, &vp)) != 0) {
			*slash = '/';
			cpr_err(CE_CONT, "A directory in the "
			    "statefile path %s was not found.\n", sfpath);
			VN_RELE(vp);

			return (error);
		}

		vfs_list_read_lock();
		vfsp = rootvfs;
		do {
			ufsvfsp = (struct ufsvfs *)vfsp->vfs_data;
			if (ufsvfsp != NULL && ufsvfsp->vfs_root == vp) {
				found = 1;
				break;
			}
			vfsp = vfsp->vfs_next;
		} while (vfsp != rootvfs);
		vfs_list_unlock();

		/*
		 * If we have found a filesystem mounted on the current
		 * path prefix, remember the end of the string in
		 * "longest".  If it happens to be the the exact fs
		 * saved in the configuration file, save the current
		 * ufsvfsp so we can make additional checks further down.
		 */
		if (found) {
			longest = slash;
			if (strcmp(cf->cf_fs, sfpath) == 0) {
				ufsvfsp_save = ufsvfsp;
				vfsp_save = vfsp;
			}
			found = 0;
		}

		VN_RELE(vp);
		*slash = '/';
		tail = slash + 1;
	}
	*longest = '\0';
	if (cpr_is_ufs(vfsp_save) == 0 || strcmp(cf->cf_fs, sfpath)) {
		cpr_err(CE_CONT, "Filesystem containing "
		    "the statefile when pmconfig was run (%s) has "
		    "changed to %s. %s\n", cf->cf_fs, sfpath, rerun);
		return (EINVAL);
	}

	if ((error = lookupname(cf->cf_devfs,
	    UIO_SYSSPACE, FOLLOW, NULLVPP, &vp)) != 0) {
		cpr_err(CE_CONT, lookup_fmt, cf->cf_devfs);
		return (error);
	}

	if (ufsvfsp_save->vfs_devvp->v_rdev != vp->v_rdev) {
		cpr_err(CE_CONT, "Filesystem containing "
		    "statefile no longer mounted on device %s. "
		    "See power.conf(4).", cf->cf_devfs);
		VN_RELE(vp);
		return (ENXIO);
	}
	VN_RELE(vp);

	error = i_devname_to_promname(cf->cf_devfs, devpath, OBP_MAXPATHLEN);
	if (error || strcmp(devpath, cf->cf_dev_prom)) {
		cpr_err(CE_CONT, path_chg_fmt,
		    cf->cf_dev_prom, devpath, rerun);
		return (error);
	}

	return (0);
}
示例#21
0
int
cpr_alloc_statefile(int alloc_retry)
{
	register int rc = 0;
	char *str;

	/*
	 * Statefile size validation. If checkpoint the first time, disk blocks
	 * allocation will be done; otherwise, just do file size check.
	 * if statefile allocation is being retried, C_VP will be inited
	 */
	if (alloc_retry) {
		str = "\n-->Retrying statefile allocation...";
		if (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG7))
			prom_printf(str);
		if (C_VP->v_type != VBLK)
			(void) VOP_DUMPCTL(C_VP, DUMP_FREE, NULL, NULL);
	} else {
		/*
		 * Open an exiting file for writing, the state file needs to be
		 * pre-allocated since we can't and don't want to do allocation
		 * during checkpoint (too much of the OS is disabled).
		 *    - do a preliminary size checking here, if it is too small,
		 *	allocate more space internally and retry.
		 *    - check the vp to make sure it's the right type.
		 */
		char *path = cpr_build_statefile_path();

		if (path == NULL)
			return (ENXIO);
		else if (rc = cpr_verify_statefile_path())
			return (rc);

		if (rc = vn_open(path, UIO_SYSSPACE,
		    FCREAT|FWRITE, 0600, &C_VP, CRCREAT, 0)) {
			cpr_err(CE_WARN, "cannot open statefile %s", path);
			return (rc);
		}
	}

	/*
	 * Only ufs and block special statefiles supported
	 */
	if (C_VP->v_type != VREG && C_VP->v_type != VBLK) {
		cpr_err(CE_CONT,
		    "Statefile must be regular file or block special file.");
		return (EACCES);
	}

	if (rc = cpr_statefile_ok(C_VP, alloc_retry))
		return (rc);

	if (C_VP->v_type != VBLK) {
		/*
		 * sync out the fs change due to the statefile reservation.
		 */
		(void) VFS_SYNC(C_VP->v_vfsp, 0, CRED());

		/*
		 * Validate disk blocks allocation for the state file.
		 * Ask the file system prepare itself for the dump operation.
		 */
		if (rc = VOP_DUMPCTL(C_VP, DUMP_ALLOC, NULL, NULL)) {
			cpr_err(CE_CONT, "Error allocating "
			    "blocks for cpr statefile.");
			return (rc);
		}
	}
	return (0);
}
示例#22
0
int
cpr(int fcn, void *mdep)
{

#if defined(__sparc)
	static const char noswapstr[] = "reusable statefile requires "
	    "that no swap area be configured.\n";
	static const char blockstr[] = "reusable statefile must be "
	    "a block device.  See power.conf(4) and pmconfig(1M).\n";
	static const char normalfmt[] = "cannot run normal "
	    "checkpoint/resume when in reusable statefile mode. "
	    "use uadmin A_FREEZE AD_REUSEFINI (uadmin %d %d) "
	    "to exit reusable statefile mode.\n";
	static const char modefmt[] = "%s in reusable mode.\n";
#endif
	register int rc = 0;
	int cpr_sleeptype;

	/*
	 * First, reject commands that we don't (yet) support on this arch.
	 * This is easier to understand broken out like this than grotting
	 * through the second switch below.
	 */

	switch (fcn) {
#if defined(__sparc)
	case AD_CHECK_SUSPEND_TO_RAM:
	case AD_SUSPEND_TO_RAM:
		return (ENOTSUP);
	case AD_CHECK_SUSPEND_TO_DISK:
	case AD_SUSPEND_TO_DISK:
	case AD_CPR_REUSEINIT:
	case AD_CPR_NOCOMPRESS:
	case AD_CPR_FORCE:
	case AD_CPR_REUSABLE:
	case AD_CPR_REUSEFINI:
	case AD_CPR_TESTZ:
	case AD_CPR_TESTNOZ:
	case AD_CPR_TESTHALT:
	case AD_CPR_SUSP_DEVICES:
		cpr_sleeptype = CPR_TODISK;
		break;
#endif
#if defined(__x86)
	case AD_CHECK_SUSPEND_TO_DISK:
	case AD_SUSPEND_TO_DISK:
	case AD_CPR_REUSEINIT:
	case AD_CPR_NOCOMPRESS:
	case AD_CPR_FORCE:
	case AD_CPR_REUSABLE:
	case AD_CPR_REUSEFINI:
	case AD_CPR_TESTZ:
	case AD_CPR_TESTNOZ:
	case AD_CPR_TESTHALT:
	case AD_CPR_PRINT:
		return (ENOTSUP);
	/* The DEV_* values need to be removed after sys-syspend is fixed */
	case DEV_CHECK_SUSPEND_TO_RAM:
	case DEV_SUSPEND_TO_RAM:
	case AD_CPR_SUSP_DEVICES:
	case AD_CHECK_SUSPEND_TO_RAM:
	case AD_SUSPEND_TO_RAM:
	case AD_LOOPBACK_SUSPEND_TO_RAM_PASS:
	case AD_LOOPBACK_SUSPEND_TO_RAM_FAIL:
	case AD_FORCE_SUSPEND_TO_RAM:
	case AD_DEVICE_SUSPEND_TO_RAM:
		cpr_sleeptype = CPR_TORAM;
		break;
#endif
	}
#if defined(__sparc)
	/*
	 * Need to know if we're in reusable mode, but we will likely have
	 * rebooted since REUSEINIT, so we have to get the info from the
	 * file system
	 */
	if (!cpr_reusable_mode)
		cpr_reusable_mode = cpr_get_reusable_mode();

	cpr_forget_cprconfig();
#endif

	switch (fcn) {

#if defined(__sparc)
	case AD_CPR_REUSEINIT:
		if (!i_cpr_reusable_supported())
			return (ENOTSUP);
		if (!cpr_statefile_is_spec()) {
			cpr_err(CE_CONT, blockstr);
			return (EINVAL);
		}
		if ((rc = cpr_check_spec_statefile()) != 0)
			return (rc);
		if (swapinfo) {
			cpr_err(CE_CONT, noswapstr);
			return (EINVAL);
		}
		cpr_test_mode = 0;
		break;

	case AD_CPR_NOCOMPRESS:
	case AD_CPR_COMPRESS:
	case AD_CPR_FORCE:
		if (cpr_reusable_mode) {
			cpr_err(CE_CONT, normalfmt, A_FREEZE, AD_REUSEFINI);
			return (ENOTSUP);
		}
		cpr_test_mode = 0;
		break;

	case AD_CPR_REUSABLE:
		if (!i_cpr_reusable_supported())
			return (ENOTSUP);
		if (!cpr_statefile_is_spec()) {
			cpr_err(CE_CONT, blockstr);
			return (EINVAL);
		}
		if ((rc = cpr_check_spec_statefile()) != 0)
			return (rc);
		if (swapinfo) {
			cpr_err(CE_CONT, noswapstr);
			return (EINVAL);
		}
		if ((rc = cpr_reusable_mount_check()) != 0)
			return (rc);
		cpr_test_mode = 0;
		break;

	case AD_CPR_REUSEFINI:
		if (!i_cpr_reusable_supported())
			return (ENOTSUP);
		cpr_test_mode = 0;
		break;

	case AD_CPR_TESTZ:
	case AD_CPR_TESTNOZ:
	case AD_CPR_TESTHALT:
		if (cpr_reusable_mode) {
			cpr_err(CE_CONT, normalfmt, A_FREEZE, AD_REUSEFINI);
			return (ENOTSUP);
		}
		cpr_test_mode = 1;
		break;

	case AD_CPR_CHECK:
		if (!i_cpr_is_supported(cpr_sleeptype) || cpr_reusable_mode)
			return (ENOTSUP);
		return (0);

	case AD_CPR_PRINT:
		CPR_STAT_EVENT_END("POST CPR DELAY");
		cpr_stat_event_print();
		return (0);
#endif

	case AD_CPR_DEBUG0:
		cpr_debug = 0;
		return (0);

	case AD_CPR_DEBUG1:
	case AD_CPR_DEBUG2:
	case AD_CPR_DEBUG3:
	case AD_CPR_DEBUG4:
	case AD_CPR_DEBUG5:
	case AD_CPR_DEBUG7:
	case AD_CPR_DEBUG8:
		cpr_debug |= CPR_DEBUG_BIT(fcn);
		return (0);

	case AD_CPR_DEBUG9:
		cpr_debug |= CPR_DEBUG6;
		return (0);

	/* The DEV_* values need to be removed after sys-syspend is fixed */
	case DEV_CHECK_SUSPEND_TO_RAM:
	case DEV_SUSPEND_TO_RAM:
	case AD_CHECK_SUSPEND_TO_RAM:
	case AD_SUSPEND_TO_RAM:
		cpr_test_point = LOOP_BACK_NONE;
		break;

	case AD_LOOPBACK_SUSPEND_TO_RAM_PASS:
		cpr_test_point = LOOP_BACK_PASS;
		break;

	case AD_LOOPBACK_SUSPEND_TO_RAM_FAIL:
		cpr_test_point = LOOP_BACK_FAIL;
		break;

	case AD_FORCE_SUSPEND_TO_RAM:
		cpr_test_point = FORCE_SUSPEND_TO_RAM;
		break;

	case AD_DEVICE_SUSPEND_TO_RAM:
		if (mdep == NULL) {
			/* Didn't pass enough arguments */
			return (EINVAL);
		}
		cpr_test_point = DEVICE_SUSPEND_TO_RAM;
		cpr_device = (major_t)atoi((char *)mdep);
		break;

	case AD_CPR_SUSP_DEVICES:
		cpr_test_point = FORCE_SUSPEND_TO_RAM;
		if (cpr_suspend_devices(ddi_root_node()) != DDI_SUCCESS)
			cmn_err(CE_WARN,
			    "Some devices did not suspend "
			    "and may be unusable");
		(void) cpr_resume_devices(ddi_root_node(), 0);
		return (0);

	default:
		return (ENOTSUP);
	}

	if (!i_cpr_is_supported(cpr_sleeptype))
		return (ENOTSUP);

#if defined(__sparc)
	if ((cpr_sleeptype == CPR_TODISK &&
	    !cpr_is_ufs(rootvfs) && !cpr_is_zfs(rootvfs)))
		return (ENOTSUP);
#endif

	if (fcn == AD_CHECK_SUSPEND_TO_RAM ||
	    fcn == DEV_CHECK_SUSPEND_TO_RAM) {
		ASSERT(i_cpr_is_supported(cpr_sleeptype));
		return (0);
	}

#if defined(__sparc)
	if (fcn == AD_CPR_REUSEINIT) {
		if (mutex_tryenter(&cpr_slock) == 0)
			return (EBUSY);
		if (cpr_reusable_mode) {
			cpr_err(CE_CONT, modefmt, "already");
			mutex_exit(&cpr_slock);
			return (EBUSY);
		}
		rc = i_cpr_reuseinit();
		mutex_exit(&cpr_slock);
		return (rc);
	}

	if (fcn == AD_CPR_REUSEFINI) {
		if (mutex_tryenter(&cpr_slock) == 0)
			return (EBUSY);
		if (!cpr_reusable_mode) {
			cpr_err(CE_CONT, modefmt, "not");
			mutex_exit(&cpr_slock);
			return (EINVAL);
		}
		rc = i_cpr_reusefini();
		mutex_exit(&cpr_slock);
		return (rc);
	}
#endif

	/*
	 * acquire cpr serial lock and init cpr state structure.
	 */
	if (rc = cpr_init(fcn))
		return (rc);

#if defined(__sparc)
	if (fcn == AD_CPR_REUSABLE) {
		if ((rc = i_cpr_check_cprinfo()) != 0)  {
			mutex_exit(&cpr_slock);
			return (rc);
		}
	}
#endif

	/*
	 * Call the main cpr routine. If we are successful, we will be coming
	 * down from the resume side, otherwise we are still in suspend.
	 */
	cpr_err(CE_CONT, "System is being suspended");
	if (rc = cpr_main(cpr_sleeptype)) {
		CPR->c_flags |= C_ERROR;
		PMD(PMD_SX, ("cpr: Suspend operation failed.\n"))
		cpr_err(CE_NOTE, "Suspend operation failed.");
	} else if (CPR->c_flags & C_SUSPENDING) {

		/*
		 * In the suspend to RAM case, by the time we get
		 * control back we're already resumed
		 */
		if (cpr_sleeptype == CPR_TORAM) {
			PMD(PMD_SX, ("cpr: cpr CPR_TORAM done\n"))
			cpr_done();
			return (rc);
		}

#if defined(__sparc)

		PMD(PMD_SX, ("cpr: Suspend operation succeeded.\n"))
		/*
		 * Back from a successful checkpoint
		 */
		if (fcn == AD_CPR_TESTZ || fcn == AD_CPR_TESTNOZ) {
			mdboot(0, AD_BOOT, "", B_FALSE);
			/* NOTREACHED */
		}

		/* make sure there are no more changes to the device tree */
		PMD(PMD_SX, ("cpr: dev tree freeze\n"))
		devtree_freeze();

		/*
		 * stop other cpus and raise our priority.  since there is only
		 * one active cpu after this, and our priority will be too high
		 * for us to be preempted, we're essentially single threaded
		 * from here on out.
		 */
		PMD(PMD_SX, ("cpr: stop other cpus\n"))
		i_cpr_stop_other_cpus();
		PMD(PMD_SX, ("cpr: spl6\n"))
		(void) spl6();

		/*
		 * try and reset leaf devices.  reset_leaves() should only
		 * be called when there are no other threads that could be
		 * accessing devices
		 */
		PMD(PMD_SX, ("cpr: reset leaves\n"))
		reset_leaves();

		/*
		 * If i_cpr_power_down() succeeds, it'll not return
		 *
		 * Drives with write-cache enabled need to flush
		 * their cache.
		 */
		if (fcn != AD_CPR_TESTHALT) {
			PMD(PMD_SX, ("cpr: power down\n"))
			(void) i_cpr_power_down(cpr_sleeptype);
		}
		ASSERT(cpr_sleeptype == CPR_TODISK);
		/* currently CPR_TODISK comes back via a boot path */
		CPR_DEBUG(CPR_DEBUG1, "(Done. Please Switch Off)\n");
		halt(NULL);
		/* NOTREACHED */
#endif
	}
	PMD(PMD_SX, ("cpr: cpr done\n"))
	cpr_done();
	return (rc);
}
示例#23
0
/*
 * do a simple estimate of the space needed to hold the statefile
 * taking compression into account, but be fairly conservative
 * so we have a better chance of completing; when dump fails,
 * the retry cost is fairly high.
 *
 * Do disk blocks allocation for the state file if no space has
 * been allocated yet. Since the state file will not be removed,
 * allocation should only be done once.
 */
static int
cpr_statefile_ok(vnode_t *vp, int alloc_retry)
{
	extern size_t cpr_bitmap_size;
	struct inode *ip = VTOI(vp);
	const int UCOMP_RATE = 20; /* comp. ratio*10 for user pages */
	u_longlong_t size, isize, ksize, raw_data;
	char *str, *est_fmt;
	size_t space;
	int error;

	/*
	 * number of pages short for swapping.
	 */
	STAT->cs_nosw_pages = k_anoninfo.ani_mem_resv;
	if (STAT->cs_nosw_pages < 0)
		STAT->cs_nosw_pages = 0;

	str = "cpr_statefile_ok:";

	CPR_DEBUG(CPR_DEBUG9, "Phys swap: max=%lu resv=%lu\n",
	    k_anoninfo.ani_max, k_anoninfo.ani_phys_resv);
	CPR_DEBUG(CPR_DEBUG9, "Mem swap: max=%ld resv=%lu\n",
	    MAX(availrmem - swapfs_minfree, 0),
	    k_anoninfo.ani_mem_resv);
	CPR_DEBUG(CPR_DEBUG9, "Total available swap: %ld\n",
	    CURRENT_TOTAL_AVAILABLE_SWAP);

	/*
	 * try increasing filesize by 15%
	 */
	if (alloc_retry) {
		/*
		 * block device doesn't get any bigger
		 */
		if (vp->v_type == VBLK) {
			if (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG6))
				prom_printf(
				    "Retry statefile on special file\n");
			return (ENOMEM);
		} else {
			rw_enter(&ip->i_contents, RW_READER);
			size = (ip->i_size * SIZE_RATE) / INTEGRAL;
			rw_exit(&ip->i_contents);
		}
		if (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG6))
			prom_printf("Retry statefile size = %lld\n", size);
	} else {
		u_longlong_t cpd_size;
		pgcnt_t npages, nback;
		int ndvram;

		ndvram = 0;
		(void) callb_execute_class(CB_CL_CPR_FB,
		    (int)(uintptr_t)&ndvram);
		if (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG6))
			prom_printf("ndvram size = %d\n", ndvram);

		/*
		 * estimate 1 cpd_t for every (CPR_MAXCONTIG / 2) pages
		 */
		npages = cpr_count_kpages(REGULAR_BITMAP, cpr_nobit);
		cpd_size = sizeof (cpd_t) * (npages / (CPR_MAXCONTIG / 2));
		raw_data = cpd_size + cpr_bitmap_size;
		ksize = ndvram + mmu_ptob(npages);

		est_fmt = "%s estimated size with "
		    "%scompression %lld, ksize %lld\n";
		nback = mmu_ptob(STAT->cs_nosw_pages);
		if (CPR->c_flags & C_COMPRESSING) {
			size = ((ksize * COMPRESS_PERCENT) / INTEGRAL) +
			    raw_data + ((nback * 10) / UCOMP_RATE);
			CPR_DEBUG(CPR_DEBUG1, est_fmt, str, "", size, ksize);
		} else {
			size = ksize + raw_data + nback;
			CPR_DEBUG(CPR_DEBUG1, est_fmt, str, "no ",
			    size, ksize);
		}
	}

	/*
	 * All this is much simpler for a block device
	 */
	if (vp->v_type == VBLK) {
		space = cpr_get_devsize(vp->v_rdev);
		if (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG6))
			prom_printf("statefile dev size %lu\n", space);

		/*
		 * Export the estimated filesize info, this value will be
		 * compared before dumping out the statefile in the case of
		 * no compression.
		 */
		STAT->cs_est_statefsz = size;
		if (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG6))
			prom_printf("%s Estimated statefile size %llu, "
			    "space %lu\n", str, size, space);
		if (size > space) {
			cpr_err(CE_CONT, "Statefile partition too small.");
			return (ENOMEM);
		}
		return (0);
	} else {
		if (CPR->c_alloc_cnt++ > C_MAX_ALLOC_RETRY) {
			cpr_err(CE_CONT, "Statefile allocation retry failed\n");
			return (ENOMEM);
		}

		/*
		 * Estimate space needed for the state file.
		 *
		 * State file size in bytes:
		 * 	kernel size + non-cache pte seg +
		 *	bitmap size + cpr state file headers size
		 * (round up to fs->fs_bsize)
		 */
		size = blkroundup(ip->i_fs, size);

		/*
		 * Export the estimated filesize info, this value will be
		 * compared before dumping out the statefile in the case of
		 * no compression.
		 */
		STAT->cs_est_statefsz = size;
		error = cpr_grow_statefile(vp, size);
		if (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG6)) {
			rw_enter(&ip->i_contents, RW_READER);
			isize = ip->i_size;
			rw_exit(&ip->i_contents);
			prom_printf("%s Estimated statefile size %lld, "
			    "i_size %lld\n", str, size, isize);
		}

		return (error);
	}
}
示例#24
0
/*
 * Write necessary machine dependent information to cpr state file,
 * eg. sun4u mmu ctx secondary for the current running process (cpr) ...
 */
int
i_cpr_write_machdep(vnode_t *vp)
{
	extern uint_t getpstate(), getwstate();
	extern uint_t i_cpr_tstack_size;
	const char ustr[] = ": unix-tte 2drop false ;";
	uintptr_t tinfo;
	label_t *ltp;
	cmd_t cmach;
	char *fmt;
	int rc;

	/*
	 * ustr[] is used as temporary forth words during
	 * slave startup sequence, see sfmmu_mp_startup()
	 */

	cmach.md_magic = (uint_t)CPR_MACHDEP_MAGIC;
	cmach.md_size = sizeof (m_info) + sizeof (ustr);

	if (rc = cpr_write(vp, (caddr_t)&cmach, sizeof (cmach))) {
		cpr_err(CE_WARN, "Failed to write descriptor.");
		return (rc);
	}

	/*
	 * m_info is now cleared in i_cpr_dump_setup()
	 */
	m_info.ksb = (uint32_t)STACK_BIAS;
	m_info.kpstate = (uint16_t)getpstate();
	m_info.kwstate = (uint16_t)getwstate();
	CPR_DEBUG(CPR_DEBUG1, "stack bias 0x%x, pstate 0x%x, wstate 0x%x\n",
	    m_info.ksb, m_info.kpstate, m_info.kwstate);

	ltp = &ttolwp(curthread)->lwp_qsav;
	m_info.qsav_pc = (cpr_ext)ltp->val[0];
	m_info.qsav_sp = (cpr_ext)ltp->val[1];

	/*
	 * Set secondary context to INVALID_CONTEXT to force the HAT
	 * to re-setup the MMU registers and locked TTEs it needs for
	 * TLB miss handling.
	 */
	m_info.mmu_ctx_sec = INVALID_CONTEXT;
	m_info.mmu_ctx_pri = KCONTEXT;

	tinfo = (uintptr_t)curthread;
	m_info.thrp = (cpr_ptr)tinfo;

	tinfo = (uintptr_t)i_cpr_resume_setup;
	m_info.func = (cpr_ptr)tinfo;

	/*
	 * i_cpr_data_page is comprised of a 4K stack area and a few
	 * trailing data symbols; the page is shared by the prom and
	 * kernel during resume.  the stack size is recorded here
	 * and used by cprboot to set %sp
	 */
	tinfo = (uintptr_t)&i_cpr_data_page;
	m_info.tmp_stack = (cpr_ptr)tinfo;
	m_info.tmp_stacksize = i_cpr_tstack_size;

	m_info.test_mode = cpr_test_mode;

	i_cpr_save_cpu_info();

	if (rc = cpr_write(vp, (caddr_t)&m_info, sizeof (m_info))) {
		cpr_err(CE_WARN, "Failed to write machdep info.");
		return (rc);
	}

	fmt = "error writing %s forth info";
	if (rc = cpr_write(vp, (caddr_t)ustr, sizeof (ustr)))
		cpr_err(CE_WARN, fmt, "unix-tte");

	return (rc);
}
示例#25
0
/*
 * find prom phys pages and alloc space for a tmp copy
 */
static int
i_cpr_find_ppages(void)
{
	struct page *pp;
	struct memlist *pmem;
	pgcnt_t npages, pcnt, scnt, vcnt;
	pfn_t ppn, plast, *dst;
	int mapflag;

	cpr_clear_bitmaps();
	mapflag = REGULAR_BITMAP;

	/*
	 * there should be a page_t for each phys page used by the kernel;
	 * set a bit for each phys page not tracked by a page_t
	 */
	pcnt = 0;
	memlist_read_lock();
	for (pmem = phys_install; pmem; pmem = pmem->ml_next) {
		npages = mmu_btop(pmem->ml_size);
		ppn = mmu_btop(pmem->ml_address);
		for (plast = ppn + npages; ppn < plast; ppn++) {
			if (page_numtopp_nolock(ppn))
				continue;
			(void) cpr_setbit(ppn, mapflag);
			pcnt++;
		}
	}
	memlist_read_unlock();

	/*
	 * clear bits for phys pages in each segment
	 */
	scnt = cpr_count_seg_pages(mapflag, cpr_clrbit);

	/*
	 * set bits for phys pages referenced by the promvp vnode;
	 * these pages are mostly comprised of forthdebug words
	 */
	vcnt = 0;
	for (pp = promvp.v_pages; pp; ) {
		if (cpr_setbit(pp->p_offset, mapflag) == 0)
			vcnt++;
		pp = pp->p_vpnext;
		if (pp == promvp.v_pages)
			break;
	}

	/*
	 * total number of prom pages are:
	 * (non-page_t pages - seg pages + vnode pages)
	 */
	ppage_count = pcnt - scnt + vcnt;
	CPR_DEBUG(CPR_DEBUG1,
	    "find_ppages: pcnt %ld - scnt %ld + vcnt %ld = %ld\n",
	    pcnt, scnt, vcnt, ppage_count);

	/*
	 * alloc array of pfn_t to store phys page list
	 */
	pphys_list_size = ppage_count * sizeof (pfn_t);
	pphys_list = kmem_alloc(pphys_list_size, KM_NOSLEEP);
	if (pphys_list == NULL) {
		cpr_err(CE_WARN, "cannot alloc pphys_list");
		return (ENOMEM);
	}

	/*
	 * phys pages referenced in the bitmap should be
	 * those used by the prom; scan bitmap and save
	 * a list of prom phys page numbers
	 */
	dst = pphys_list;
	memlist_read_lock();
	for (pmem = phys_install; pmem; pmem = pmem->ml_next) {
		npages = mmu_btop(pmem->ml_size);
		ppn = mmu_btop(pmem->ml_address);
		for (plast = ppn + npages; ppn < plast; ppn++) {
			if (cpr_isset(ppn, mapflag)) {
				ASSERT(dst < (pphys_list + ppage_count));
				*dst++ = ppn;
			}
		}
	}
	memlist_read_unlock();

	/*
	 * allocate space to store prom pages
	 */
	ppage_buf = kmem_alloc(mmu_ptob(ppage_count), KM_NOSLEEP);
	if (ppage_buf == NULL) {
		kmem_free(pphys_list, pphys_list_size);
		pphys_list = NULL;
		cpr_err(CE_WARN, "cannot alloc ppage_buf");
		return (ENOMEM);
	}

	return (0);
}