/* * Construct the pathname of the state file and return a pointer to * caller. Read the config file to get the mount point of the * filesystem and the pathname within fs. */ char * cpr_build_statefile_path(void) { struct cprconfig *cf = &cprconfig; if (cpr_get_config()) return (NULL); switch (cf->cf_type) { case CFT_UFS: if (strlen(cf->cf_path) + strlen(cf->cf_fs) >= MAXNAMELEN - 1) { cpr_err(CE_CONT, "Statefile path is too long.\n"); return (NULL); } return (cpr_cprconfig_to_path()); case CFT_ZVOL: /*FALLTHROUGH*/ case CFT_SPEC: return (cf->cf_devfs); default: cpr_err(CE_PANIC, "invalid statefile type"); /*NOTREACHED*/ return (NULL); } }
int cpr_init(int fcn) { /* * Allow only one suspend/resume process. */ if (mutex_tryenter(&cpr_slock) == 0) return (EBUSY); CPR->c_flags = 0; CPR->c_substate = 0; CPR->c_cprboot_magic = 0; CPR->c_alloc_cnt = 0; CPR->c_fcn = fcn; if (fcn == AD_CPR_REUSABLE) CPR->c_flags |= C_REUSABLE; else CPR->c_flags |= C_SUSPENDING; if (fcn != AD_CPR_NOCOMPRESS && fcn != AD_CPR_TESTNOZ) CPR->c_flags |= C_COMPRESSING; /* * reserve CPR_MAXCONTIG virtual pages for cpr_dump() */ CPR->c_mapping_area = i_cpr_map_setup(); if (CPR->c_mapping_area == 0) { /* no space in kernelmap */ cpr_err(CE_CONT, "Unable to alloc from kernelmap.\n"); mutex_exit(&cpr_slock); return (EAGAIN); } DEBUG3(cpr_err(CE_CONT, "Reserved virtual range from 0x%p for writing " "kas\n", (void *)CPR->c_mapping_area)); return (0); }
int i_cpr_reusefini(void) { struct vnode *vp; cdef_t *cdef; size_t size; char *bufp; int rc; if (cpr_reusable_mode) cpr_reusable_mode = 0; if (rc = cpr_open_deffile(FREAD|FWRITE, &vp)) { if (rc == EROFS) { cpr_err(CE_CONT, "uadmin A_FREEZE AD_REUSEFINI " "(uadmin %d %d)\nmust be done with / mounted " "writeable.\n", A_FREEZE, AD_REUSEFINI); } return (rc); } cdef = kmem_alloc(sizeof (*cdef), KM_SLEEP); rc = cpr_rdwr(UIO_READ, vp, cdef, sizeof (*cdef)); if (rc) { cpr_err(CE_WARN, "Failed reading %s, errno = %d", cpr_default_path, rc); } else if (cdef->mini.magic != CPR_DEFAULT_MAGIC) { cpr_err(CE_WARN, "bad magic number in %s, cannot restore " "prom values for %s", cpr_default_path, cpr_enumerate_promprops(&bufp, &size)); kmem_free(bufp, size); rc = EINVAL; } else { /* * clean up prom properties */ rc = cpr_update_nvram(cdef->props); if (rc == 0) { /* * invalidate the disk copy and turn off reusable */ cdef->mini.magic = 0; cdef->mini.reusable = 0; if (rc = cpr_rdwr(UIO_WRITE, vp, &cdef->mini, sizeof (cdef->mini))) { cpr_err(CE_WARN, "Failed writing %s, errno %d", cpr_default_path, rc); } } } (void) VOP_CLOSE(vp, FREAD|FWRITE, 1, (offset_t)0, CRED(), NULL); VN_RELE(vp); kmem_free(cdef, sizeof (*cdef)); return (rc); }
/* * write new or original values to nvram */ int cpr_update_nvram(cprop_t *props) { cprop_t *tail; pnode_t node; int len, rc; if (rc = cpr_get_options_node(&node)) return (rc); if (cpr_show_props) prom_printf("\ncpr_show_props:\n"); for (tail = props + CPR_MAXPROP; props < tail; props++) { if (cpr_show_props) { prom_printf("mod=%c, name \"%s\",\tvalue \"%s\"\n", props->mod, props->name, props->value); } if (props->mod == PROP_NOMOD) continue; /* * Note: When doing a prom_setprop you must include the * trailing NULL in the length argument, but when calling * prom_getproplen() the NULL is excluded from the count! */ len = strlen(props->value); rc = prom_setprop(node, props->name, props->value, len + 1); if (rc < 0 || prom_getproplen(node, props->name) != len) { cpr_err(CE_WARN, "cannot set nvram \"%s\" to \"%s\"", props->name, props->value); return (ENXIO); } } return (0); }
/* * Checks and makes sure all user threads are stopped */ static int cpr_check_user_threads() { kthread_id_t tp; int rc = 0; mutex_enter(&pidlock); tp = curthread->t_next; do { if (ttoproc(tp)->p_as == &kas || ttoproc(tp)->p_stat == SZOMB) continue; thread_lock(tp); /* * make sure that we are off all the queues and in a stopped * state. */ if (!CPR_ISTOPPED(tp)) { thread_unlock(tp); mutex_exit(&pidlock); if (count == CPR_UTSTOP_RETRY) { DEBUG1(errp("Suspend failed: cannt stop " "uthread\n")); cpr_err(CE_WARN, "Suspend cannot stop " "process %s (%p:%x).", ttoproc(tp)->p_user.u_psargs, (void *)tp, tp->t_state); cpr_err(CE_WARN, "Process may be waiting for" " network request, please try again."); } DEBUG2(errp("cant stop t=%p state=%x pfg=%x sched=%x\n", tp, tp->t_state, tp->t_proc_flag, tp->t_schedflag)); DEBUG2(errp("proc %p state=%x pid=%d\n", ttoproc(tp), ttoproc(tp)->p_stat, ttoproc(tp)->p_pidp->pid_id)); return (1); } thread_unlock(tp); } while ((tp = tp->t_next) != curthread && rc == 0); mutex_exit(&pidlock); return (0); }
/* * Stop kernel threads by using the callback mechanism. If any thread * cannot be stopped, return failure. */ int cpr_stop_kernel_threads(void) { caddr_t name; kthread_id_t tp; proc_t *p; callb_lock_table(); /* Note: we unlock the table in resume. */ DEBUG1(errp("stopping kernel daemons...")); if ((name = callb_execute_class(CB_CL_CPR_DAEMON, CB_CODE_CPR_CHKPT)) != (caddr_t)NULL) { cpr_err(CE_WARN, "Could not stop \"%s\" kernel thread. " "Please try again later.", name); return (EBUSY); } /* * We think we stopped all the kernel threads. Just in case * someone is not playing by the rules, take a spin through * the threadlist and see if we can account for everybody. */ mutex_enter(&pidlock); tp = curthread->t_next; do { p = ttoproc(tp); if (p->p_as != &kas) continue; if (tp->t_flag & T_INTR_THREAD) continue; if (! callb_is_stopped(tp, &name)) { mutex_exit(&pidlock); cpr_err(CE_WARN, "\"%s\" kernel thread not stopped.", name); return (EBUSY); } } while ((tp = tp->t_next) != curthread); mutex_exit(&pidlock); DEBUG1(errp("done\n")); return (0); }
/* * open cpr default file and display error */ int cpr_open_deffile(int mode, vnode_t **vpp) { int error; if (error = cpr_open(cpr_default_path, mode, vpp)) cpr_err(CE_CONT, "cannot open \"%s\", error %d\n", cpr_default_path, error); return (error); }
static int cpr_get_options_node(pnode_t *nodep) { *nodep = prom_optionsnode(); if (*nodep == OBP_NONODE || *nodep == OBP_BADNODE) { cpr_err(CE_WARN, "cannot get \"options\" node"); return (ENOENT); } return (0); }
/* * Allocate bitmaps according to the phys_install list. */ static int i_cpr_bitmap_setup(void) { struct memlist *pmem; cbd_t *dp, *tail; void *space; size_t size; /* * The number of bitmap descriptors will be the count of * phys_install ranges plus 1 for a trailing NULL struct. */ cpr_nbitmaps = 1; for (pmem = phys_install; pmem; pmem = pmem->ml_next) cpr_nbitmaps++; if (cpr_nbitmaps > (CPR_MAX_BMDESC - 1)) { cpr_err(CE_WARN, "too many physical memory ranges %d, max %d", cpr_nbitmaps, CPR_MAX_BMDESC - 1); return (EFBIG); } /* Alloc an array of bitmap descriptors. */ dp = kmem_zalloc(cpr_nbitmaps * sizeof (*dp), KM_NOSLEEP); if (dp == NULL) { cpr_nbitmaps = 0; return (ENOMEM); } tail = dp + cpr_nbitmaps; CPR->c_bmda = dp; for (pmem = phys_install; pmem; pmem = pmem->ml_next) { size = BITMAP_BYTES(pmem->ml_size); space = kmem_zalloc(size * 2, KM_NOSLEEP); if (space == NULL) return (ENOMEM); ASSERT(dp < tail); dp->cbd_magic = CPR_BITMAP_MAGIC; dp->cbd_spfn = mmu_btop(pmem->ml_address); dp->cbd_epfn = mmu_btop(pmem->ml_address + pmem->ml_size) - 1; dp->cbd_size = size; dp->cbd_reg_bitmap = (cpr_ptr)space; dp->cbd_vlt_bitmap = (cpr_ptr)((caddr_t)space + size); dp++; } /* set magic for the last descriptor */ ASSERT(dp == (tail - 1)); dp->cbd_magic = CPR_BITMAP_MAGIC; return (0); }
/* * Save miscellaneous information which needs to be written to the * state file. This information is required to re-initialize * kernel/prom handshaking. */ void i_cpr_save_machdep_info(void) { CPR_DEBUG(CPR_DEBUG5, "jumpback size = 0x%lx\n", (uintptr_t)&i_cpr_end_jumpback - (uintptr_t)i_cpr_resume_setup); /* * Verify the jumpback code all falls in one page. */ if (((uintptr_t)&i_cpr_end_jumpback & MMU_PAGEMASK) != ((uintptr_t)i_cpr_resume_setup & MMU_PAGEMASK)) cpr_err(CE_PANIC, "jumpback code exceeds one page."); }
/* * reads config data into cprconfig */ static int cpr_get_config(void) { static char config_path[] = CPR_CONFIG; struct cprconfig *cf = &cprconfig; struct vnode *vp; char *fmt; int err; if (cprconfig_loaded) return (0); fmt = "cannot %s config file \"%s\", error %d\n"; if (err = vn_open(config_path, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0)) { cpr_err(CE_CONT, fmt, "open", config_path, err); return (err); } err = cpr_rdwr(UIO_READ, vp, cf, sizeof (*cf)); (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL); VN_RELE(vp); if (err) { cpr_err(CE_CONT, fmt, "read", config_path, err); return (err); } if (cf->cf_magic == CPR_CONFIG_MAGIC) cprconfig_loaded = 1; else { cpr_err(CE_CONT, "invalid config file \"%s\", " "rerun pmconfig(1M)\n", config_path); err = EINVAL; } return (err); }
int i_cpr_check_cprinfo(void) { struct vnode *vp; cmini_t mini; int rc = 0; if (rc = cpr_open_deffile(FREAD, &vp)) { if (rc == ENOENT) cpr_err(CE_NOTE, "cprinfo file does not " "exist. You must run 'uadmin %d %d' " "command while / is mounted writeable,\n" "then reboot and run 'uadmin %d %d' " "to create a reusable statefile", A_FREEZE, AD_REUSEINIT, A_FREEZE, AD_REUSABLE); return (rc); } rc = cpr_rdwr(UIO_READ, vp, &mini, sizeof (mini)); (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL); VN_RELE(vp); if (rc) { cpr_err(CE_WARN, "Failed reading %s, errno = %d", cpr_default_path, rc); } else if (mini.magic != CPR_DEFAULT_MAGIC) { cpr_err(CE_CONT, "bad magic number in cprinfo file.\n" "You must run 'uadmin %d %d' while / is mounted " "writeable, then reboot and run 'uadmin %d %d' " "to create a reusable statefile\n", A_FREEZE, AD_REUSEINIT, A_FREEZE, AD_REUSABLE); rc = EINVAL; } return (rc); }
/* * change cpu to online/offline */ static int cpr_p_online(cpu_t *cp, int state) { int rc; ASSERT(MUTEX_HELD(&cpu_lock)); switch (state) { case CPU_CPR_ONLINE: rc = cpu_online(cp); break; case CPU_CPR_OFFLINE: rc = cpu_offline(cp, CPU_FORCED); break; } if (rc) { cpr_err(CE_WARN, "Failed to change processor %d to " "state %d, (errno %d)", cp->cpu_id, state, rc); } return (rc); }
/* * This is a list of file systems that are allowed to be writeable when a * reusable statefile checkpoint is taken. They must not have any state that * cannot be restored to consistency by simply rebooting using the checkpoint. * (In contrast to ufs, cachefs and pcfs which have disk state that could get * out of sync with the in-kernel data). */ int cpr_reusable_mount_check(void) { struct vfs *vfsp; char *fsname; char **cpp; static char *cpr_writeok_fss[] = { "autofs", "devfs", "fd", "lofs", "mntfs", "namefs", "nfs", "proc", "tmpfs", "ctfs", "objfs", "dev", NULL }; vfs_list_read_lock(); vfsp = rootvfs; do { if (vfsp->vfs_flag & VFS_RDONLY) { vfsp = vfsp->vfs_next; continue; } fsname = vfssw[vfsp->vfs_fstype].vsw_name; for (cpp = cpr_writeok_fss; *cpp; cpp++) { if (strcmp(fsname, *cpp) == 0) break; } /* * if the inner loop reached the NULL terminator, * the current fs-type does not match any OK-type */ if (*cpp == NULL) { cpr_err(CE_CONT, "a filesystem of type %s is " "mounted read/write.\nReusable statefile requires " "no writeable filesystem of this type be mounted\n", fsname); vfs_list_unlock(); return (EINVAL); } vfsp = vfsp->vfs_next; } while (vfsp != rootvfs); vfs_list_unlock(); return (0); }
/* * This routine releases any resources used during the checkpoint. */ void cpr_done(void) { cpr_stat_cleanup(); i_cpr_bitmap_cleanup(); /* * Free pages used by cpr buffers. */ if (cpr_buf) { kmem_free(cpr_buf, cpr_buf_size); cpr_buf = NULL; } if (cpr_pagedata) { kmem_free(cpr_pagedata, cpr_pagedata_size); cpr_pagedata = NULL; } i_cpr_free_memory_resources(); mutex_exit(&cpr_slock); cpr_err(CE_CONT, "System has been resumed.\n"); }
/* * increase statefile size */ static int cpr_grow_statefile(vnode_t *vp, u_longlong_t newsize) { extern uchar_t cpr_pagecopy[]; struct inode *ip = VTOI(vp); u_longlong_t offset; int error, increase; ssize_t resid; rw_enter(&ip->i_contents, RW_READER); increase = (ip->i_size < newsize); offset = ip->i_size; rw_exit(&ip->i_contents); if (increase == 0) return (0); /* * write to each logical block to reserve disk space */ error = 0; cpr_pagecopy[0] = '1'; for (; offset < newsize; offset += ip->i_fs->fs_bsize) { if (error = vn_rdwr(UIO_WRITE, vp, (caddr_t)cpr_pagecopy, ip->i_fs->fs_bsize, (offset_t)offset, UIO_SYSSPACE, 0, (rlim64_t)MAXOFF_T, CRED(), &resid)) { if (error == ENOSPC) { cpr_err(CE_WARN, "error %d while reserving " "disk space for statefile %s\n" "wanted %lld bytes, file is %lld short", error, cpr_cprconfig_to_path(), newsize, newsize - offset); } break; } } return (error); }
/* * write cdef_t to disk. This contains the original values of prom * properties that we modify. We fill in the magic number of the file * here as a signal to the booter code that the state file is valid. * Be sure the file gets synced, since we may be shutting down the OS. */ int cpr_write_deffile(cdef_t *cdef) { struct vnode *vp; char *str; int rc; if (rc = cpr_open_deffile(FCREAT|FWRITE, &vp)) return (rc); if (rc = cpr_rdwr(UIO_WRITE, vp, cdef, sizeof (*cdef))) str = "write"; else if (rc = VOP_FSYNC(vp, FSYNC, CRED(), NULL)) str = "fsync"; (void) VOP_CLOSE(vp, FWRITE, 1, (offset_t)0, CRED(), NULL); VN_RELE(vp); if (rc) { cpr_err(CE_WARN, "%s error %d, file \"%s\"", str, rc, cpr_default_path); } return (rc); }
int i_cpr_reuseinit(void) { int rc = 0; if (rc = cpr_default_setup(1)) return (rc); /* * We need to validate default file */ rc = cpr_validate_definfo(1); if (rc == 0) cpr_reusable_mode = 1; else if (rc == EROFS) { cpr_err(CE_NOTE, "reuseinit must be performed " "while / is mounted writeable"); } (void) cpr_default_setup(0); return (rc); }
/* * Top level routine to direct suspend/resume of a domain. */ void xen_suspend_domain(void) { extern void rtcsync(void); extern hrtime_t hres_last_tick; mfn_t start_info_mfn; ulong_t flags; pfn_t pfn; int i; /* * Check that we are happy to suspend on this hypervisor. */ if (xen_hypervisor_supports_solaris(XEN_SUSPEND_CHECK) == 0) { cpr_err(CE_WARN, "Cannot suspend on this hypervisor " "version: v%lu.%lu%s, need at least version v3.0.4 or " "-xvm based hypervisor", XENVER_CURRENT(xv_major), XENVER_CURRENT(xv_minor), XENVER_CURRENT(xv_ver)); return; } /* * XXPV - Are we definitely OK to suspend by the time we've connected * the handler? */ cpr_err(CE_NOTE, "Domain suspending for save/migrate"); SUSPEND_DEBUG("xen_suspend_domain\n"); /* * suspend interrupts and devices * XXPV - we use suspend/resume for both save/restore domains (like sun * cpr) and for migration. Would be nice to know the difference if * possible. For save/restore where down time may be a long time, we * may want to do more of the things that cpr does. (i.e. notify user * processes, shrink memory footprint for faster restore, etc.) */ xen_suspend_devices(); SUSPEND_DEBUG("xenbus_suspend\n"); xenbus_suspend(); pfn = hat_getpfnum(kas.a_hat, (caddr_t)xen_info); start_info_mfn = pfn_to_mfn(pfn); /* * XXPV: cpu hotplug can hold this under a xenbus watch. Are we safe * wrt xenbus being suspended here? */ mutex_enter(&cpu_lock); /* * Suspend must be done on vcpu 0, as no context for other CPUs is * saved. * * XXPV - add to taskq API ? */ thread_affinity_set(curthread, 0); kpreempt_disable(); SUSPEND_DEBUG("xen_start_migrate\n"); xen_start_migrate(); if (ncpus > 1) suspend_cpus(); /* * We can grab the ec_lock as it's a spinlock with a high SPL. Hence * any holder would have dropped it to get through suspend_cpus(). */ mutex_enter(&ec_lock); /* * From here on in, we can't take locks. */ SUSPEND_DEBUG("ec_suspend\n"); ec_suspend(); SUSPEND_DEBUG("gnttab_suspend\n"); gnttab_suspend(); flags = intr_clear(); xpv_time_suspend(); /* * Currently, the hypervisor incorrectly fails to bring back * powered-down VCPUs. Thus we need to record any powered-down VCPUs * to prevent any attempts to operate on them. But we have to do this * *after* the very first time we do ec_suspend(). */ for (i = 1; i < ncpus; i++) { if (cpu[i] == NULL) continue; if (cpu_get_state(cpu[i]) == P_POWEROFF) CPUSET_ATOMIC_ADD(cpu_suspend_lost_set, i); } /* * The dom0 save/migrate code doesn't automatically translate * these into PFNs, but expects them to be, so we do it here. * We don't use mfn_to_pfn() because so many OS services have * been disabled at this point. */ xen_info->store_mfn = mfn_to_pfn_mapping[xen_info->store_mfn]; xen_info->console.domU.mfn = mfn_to_pfn_mapping[xen_info->console.domU.mfn]; if (CPU->cpu_m.mcpu_vcpu_info->evtchn_upcall_mask == 0) { prom_printf("xen_suspend_domain(): " "CPU->cpu_m.mcpu_vcpu_info->evtchn_upcall_mask not set\n"); (void) HYPERVISOR_shutdown(SHUTDOWN_crash); } if (HYPERVISOR_update_va_mapping((uintptr_t)HYPERVISOR_shared_info, 0, UVMF_INVLPG)) { prom_printf("xen_suspend_domain(): " "HYPERVISOR_update_va_mapping() failed\n"); (void) HYPERVISOR_shutdown(SHUTDOWN_crash); } SUSPEND_DEBUG("HYPERVISOR_suspend\n"); /* * At this point we suspend and sometime later resume. */ if (HYPERVISOR_suspend(start_info_mfn)) { prom_printf("xen_suspend_domain(): " "HYPERVISOR_suspend() failed\n"); (void) HYPERVISOR_shutdown(SHUTDOWN_crash); } /* * Point HYPERVISOR_shared_info to its new value. */ if (HYPERVISOR_update_va_mapping((uintptr_t)HYPERVISOR_shared_info, xen_info->shared_info | PT_NOCONSIST | PT_VALID | PT_WRITABLE, UVMF_INVLPG)) (void) HYPERVISOR_shutdown(SHUTDOWN_crash); if (xen_info->nr_pages != mfn_count) { prom_printf("xen_suspend_domain(): number of pages" " changed, was 0x%lx, now 0x%lx\n", mfn_count, xen_info->nr_pages); (void) HYPERVISOR_shutdown(SHUTDOWN_crash); } xpv_time_resume(); cached_max_mfn = 0; SUSPEND_DEBUG("gnttab_resume\n"); gnttab_resume(); /* XXPV: add a note that this must be lockless. */ SUSPEND_DEBUG("ec_resume\n"); ec_resume(); intr_restore(flags); if (ncpus > 1) resume_cpus(); mutex_exit(&ec_lock); xen_end_migrate(); mutex_exit(&cpu_lock); /* * Now we can take locks again. */ /* * Force the tick value used for tv_nsec in hres_tick() to be up to * date. rtcsync() will reset the hrestime value appropriately. */ hres_last_tick = xpv_gethrtime(); /* * XXPV: we need to have resumed the CPUs since this takes locks, but * can remote CPUs see bad state? Presumably yes. Should probably nest * taking of todlock inside of cpu_lock, or vice versa, then provide an * unlocked version. Probably need to call clkinitf to reset cpu freq * and re-calibrate if we migrated to a different speed cpu. Also need * to make a (re)init_cpu_info call to update processor info structs * and device tree info. That remains to be written at the moment. */ rtcsync(); rebuild_mfn_list(); SUSPEND_DEBUG("xenbus_resume\n"); xenbus_resume(); SUSPEND_DEBUG("xenbus_resume_devices\n"); xen_resume_devices(); thread_affinity_clear(curthread); kpreempt_enable(); SUSPEND_DEBUG("finished xen_suspend_domain\n"); /* * We have restarted our suspended domain, update the hypervisor * details. NB: This must be done at the end of this function, * since we need the domain to be completely resumed before * these functions will work correctly. */ xen_set_version(XENVER_CURRENT_IDX); /* * We can check and report a warning, but we don't stop the * process. */ if (xen_hypervisor_supports_solaris(XEN_SUSPEND_CHECK) == 0) cmn_err(CE_WARN, "Found hypervisor version: v%lu.%lu%s " "but need at least version v3.0.4", XENVER_CURRENT(xv_major), XENVER_CURRENT(xv_minor), XENVER_CURRENT(xv_ver)); cmn_err(CE_NOTE, "domain restore/migrate completed"); }
/* * Verify that the information in the configuration file regarding the * location for the statefile is still valid, depending on cf_type. * for CFT_UFS, cf_fs must still be a mounted filesystem, it must be * mounted on the same device as when pmconfig was last run, * and the translation of that device to a node in the prom's * device tree must be the same as when pmconfig was last run. * for CFT_SPEC and CFT_ZVOL, cf_path must be the path to a block * special file, it must have no file system mounted on it, * and the translation of that device to a node in the prom's * device tree must be the same as when pmconfig was last run. */ static int cpr_verify_statefile_path(void) { struct cprconfig *cf = &cprconfig; static const char long_name[] = "Statefile pathname is too long.\n"; static const char lookup_fmt[] = "Lookup failed for " "cpr statefile device %s.\n"; static const char path_chg_fmt[] = "Device path for statefile " "has changed from %s to %s.\t%s\n"; static const char rerun[] = "Please rerun pmconfig(1m)."; struct vfs *vfsp = NULL, *vfsp_save = rootvfs; ufsvfs_t *ufsvfsp = (ufsvfs_t *)rootvfs->vfs_data; ufsvfs_t *ufsvfsp_save = ufsvfsp; int error; struct vnode *vp; char *slash, *tail, *longest; char *errstr; int found = 0; union { char un_devpath[OBP_MAXPATHLEN]; char un_sfpath[MAXNAMELEN]; } un; #define devpath un.un_devpath #define sfpath un.un_sfpath ASSERT(cprconfig_loaded); /* * We need not worry about locking or the timing of releasing * the vnode, since we are single-threaded now. */ switch (cf->cf_type) { case CFT_SPEC: error = i_devname_to_promname(cf->cf_devfs, devpath, OBP_MAXPATHLEN); if (error || strcmp(devpath, cf->cf_dev_prom)) { cpr_err(CE_CONT, path_chg_fmt, cf->cf_dev_prom, devpath, rerun); return (error); } /*FALLTHROUGH*/ case CFT_ZVOL: if (strlen(cf->cf_path) > sizeof (sfpath)) { cpr_err(CE_CONT, long_name); return (ENAMETOOLONG); } if ((error = lookupname(cf->cf_devfs, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp)) != 0) { cpr_err(CE_CONT, lookup_fmt, cf->cf_devfs); return (error); } if (vp->v_type != VBLK) errstr = "statefile must be a block device"; else if (vfs_devismounted(vp->v_rdev)) errstr = "statefile device must not " "have a file system mounted on it"; else if (IS_SWAPVP(vp)) errstr = "statefile device must not " "be configured as swap file"; else errstr = NULL; VN_RELE(vp); if (errstr) { cpr_err(CE_CONT, "%s.\n", errstr); return (ENOTSUP); } return (error); case CFT_UFS: break; /* don't indent all the original code */ default: cpr_err(CE_PANIC, "invalid cf_type"); } /* * The original code for UFS statefile */ if (strlen(cf->cf_fs) + strlen(cf->cf_path) + 2 > sizeof (sfpath)) { cpr_err(CE_CONT, long_name); return (ENAMETOOLONG); } bzero(sfpath, sizeof (sfpath)); (void) strcpy(sfpath, cpr_cprconfig_to_path()); if (*sfpath != '/') { cpr_err(CE_CONT, "Statefile pathname %s " "must begin with a /\n", sfpath); return (EINVAL); } /* * Find the longest prefix of the statefile pathname which * is the mountpoint of a filesystem. This string must * match the cf_fs field we read from the config file. Other- * wise the user has changed things without running pmconfig. */ tail = longest = sfpath + 1; /* pt beyond the leading "/" */ while ((slash = strchr(tail, '/')) != NULL) { *slash = '\0'; /* temporarily terminate the string */ if ((error = lookupname(sfpath, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp)) != 0) { *slash = '/'; cpr_err(CE_CONT, "A directory in the " "statefile path %s was not found.\n", sfpath); VN_RELE(vp); return (error); } vfs_list_read_lock(); vfsp = rootvfs; do { ufsvfsp = (struct ufsvfs *)vfsp->vfs_data; if (ufsvfsp != NULL && ufsvfsp->vfs_root == vp) { found = 1; break; } vfsp = vfsp->vfs_next; } while (vfsp != rootvfs); vfs_list_unlock(); /* * If we have found a filesystem mounted on the current * path prefix, remember the end of the string in * "longest". If it happens to be the the exact fs * saved in the configuration file, save the current * ufsvfsp so we can make additional checks further down. */ if (found) { longest = slash; if (strcmp(cf->cf_fs, sfpath) == 0) { ufsvfsp_save = ufsvfsp; vfsp_save = vfsp; } found = 0; } VN_RELE(vp); *slash = '/'; tail = slash + 1; } *longest = '\0'; if (cpr_is_ufs(vfsp_save) == 0 || strcmp(cf->cf_fs, sfpath)) { cpr_err(CE_CONT, "Filesystem containing " "the statefile when pmconfig was run (%s) has " "changed to %s. %s\n", cf->cf_fs, sfpath, rerun); return (EINVAL); } if ((error = lookupname(cf->cf_devfs, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp)) != 0) { cpr_err(CE_CONT, lookup_fmt, cf->cf_devfs); return (error); } if (ufsvfsp_save->vfs_devvp->v_rdev != vp->v_rdev) { cpr_err(CE_CONT, "Filesystem containing " "statefile no longer mounted on device %s. " "See power.conf(4).", cf->cf_devfs); VN_RELE(vp); return (ENXIO); } VN_RELE(vp); error = i_devname_to_promname(cf->cf_devfs, devpath, OBP_MAXPATHLEN); if (error || strcmp(devpath, cf->cf_dev_prom)) { cpr_err(CE_CONT, path_chg_fmt, cf->cf_dev_prom, devpath, rerun); return (error); } return (0); }
int cpr_alloc_statefile(int alloc_retry) { register int rc = 0; char *str; /* * Statefile size validation. If checkpoint the first time, disk blocks * allocation will be done; otherwise, just do file size check. * if statefile allocation is being retried, C_VP will be inited */ if (alloc_retry) { str = "\n-->Retrying statefile allocation..."; if (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG7)) prom_printf(str); if (C_VP->v_type != VBLK) (void) VOP_DUMPCTL(C_VP, DUMP_FREE, NULL, NULL); } else { /* * Open an exiting file for writing, the state file needs to be * pre-allocated since we can't and don't want to do allocation * during checkpoint (too much of the OS is disabled). * - do a preliminary size checking here, if it is too small, * allocate more space internally and retry. * - check the vp to make sure it's the right type. */ char *path = cpr_build_statefile_path(); if (path == NULL) return (ENXIO); else if (rc = cpr_verify_statefile_path()) return (rc); if (rc = vn_open(path, UIO_SYSSPACE, FCREAT|FWRITE, 0600, &C_VP, CRCREAT, 0)) { cpr_err(CE_WARN, "cannot open statefile %s", path); return (rc); } } /* * Only ufs and block special statefiles supported */ if (C_VP->v_type != VREG && C_VP->v_type != VBLK) { cpr_err(CE_CONT, "Statefile must be regular file or block special file."); return (EACCES); } if (rc = cpr_statefile_ok(C_VP, alloc_retry)) return (rc); if (C_VP->v_type != VBLK) { /* * sync out the fs change due to the statefile reservation. */ (void) VFS_SYNC(C_VP->v_vfsp, 0, CRED()); /* * Validate disk blocks allocation for the state file. * Ask the file system prepare itself for the dump operation. */ if (rc = VOP_DUMPCTL(C_VP, DUMP_ALLOC, NULL, NULL)) { cpr_err(CE_CONT, "Error allocating " "blocks for cpr statefile."); return (rc); } } return (0); }
int cpr(int fcn, void *mdep) { #if defined(__sparc) static const char noswapstr[] = "reusable statefile requires " "that no swap area be configured.\n"; static const char blockstr[] = "reusable statefile must be " "a block device. See power.conf(4) and pmconfig(1M).\n"; static const char normalfmt[] = "cannot run normal " "checkpoint/resume when in reusable statefile mode. " "use uadmin A_FREEZE AD_REUSEFINI (uadmin %d %d) " "to exit reusable statefile mode.\n"; static const char modefmt[] = "%s in reusable mode.\n"; #endif register int rc = 0; int cpr_sleeptype; /* * First, reject commands that we don't (yet) support on this arch. * This is easier to understand broken out like this than grotting * through the second switch below. */ switch (fcn) { #if defined(__sparc) case AD_CHECK_SUSPEND_TO_RAM: case AD_SUSPEND_TO_RAM: return (ENOTSUP); case AD_CHECK_SUSPEND_TO_DISK: case AD_SUSPEND_TO_DISK: case AD_CPR_REUSEINIT: case AD_CPR_NOCOMPRESS: case AD_CPR_FORCE: case AD_CPR_REUSABLE: case AD_CPR_REUSEFINI: case AD_CPR_TESTZ: case AD_CPR_TESTNOZ: case AD_CPR_TESTHALT: case AD_CPR_SUSP_DEVICES: cpr_sleeptype = CPR_TODISK; break; #endif #if defined(__x86) case AD_CHECK_SUSPEND_TO_DISK: case AD_SUSPEND_TO_DISK: case AD_CPR_REUSEINIT: case AD_CPR_NOCOMPRESS: case AD_CPR_FORCE: case AD_CPR_REUSABLE: case AD_CPR_REUSEFINI: case AD_CPR_TESTZ: case AD_CPR_TESTNOZ: case AD_CPR_TESTHALT: case AD_CPR_PRINT: return (ENOTSUP); /* The DEV_* values need to be removed after sys-syspend is fixed */ case DEV_CHECK_SUSPEND_TO_RAM: case DEV_SUSPEND_TO_RAM: case AD_CPR_SUSP_DEVICES: case AD_CHECK_SUSPEND_TO_RAM: case AD_SUSPEND_TO_RAM: case AD_LOOPBACK_SUSPEND_TO_RAM_PASS: case AD_LOOPBACK_SUSPEND_TO_RAM_FAIL: case AD_FORCE_SUSPEND_TO_RAM: case AD_DEVICE_SUSPEND_TO_RAM: cpr_sleeptype = CPR_TORAM; break; #endif } #if defined(__sparc) /* * Need to know if we're in reusable mode, but we will likely have * rebooted since REUSEINIT, so we have to get the info from the * file system */ if (!cpr_reusable_mode) cpr_reusable_mode = cpr_get_reusable_mode(); cpr_forget_cprconfig(); #endif switch (fcn) { #if defined(__sparc) case AD_CPR_REUSEINIT: if (!i_cpr_reusable_supported()) return (ENOTSUP); if (!cpr_statefile_is_spec()) { cpr_err(CE_CONT, blockstr); return (EINVAL); } if ((rc = cpr_check_spec_statefile()) != 0) return (rc); if (swapinfo) { cpr_err(CE_CONT, noswapstr); return (EINVAL); } cpr_test_mode = 0; break; case AD_CPR_NOCOMPRESS: case AD_CPR_COMPRESS: case AD_CPR_FORCE: if (cpr_reusable_mode) { cpr_err(CE_CONT, normalfmt, A_FREEZE, AD_REUSEFINI); return (ENOTSUP); } cpr_test_mode = 0; break; case AD_CPR_REUSABLE: if (!i_cpr_reusable_supported()) return (ENOTSUP); if (!cpr_statefile_is_spec()) { cpr_err(CE_CONT, blockstr); return (EINVAL); } if ((rc = cpr_check_spec_statefile()) != 0) return (rc); if (swapinfo) { cpr_err(CE_CONT, noswapstr); return (EINVAL); } if ((rc = cpr_reusable_mount_check()) != 0) return (rc); cpr_test_mode = 0; break; case AD_CPR_REUSEFINI: if (!i_cpr_reusable_supported()) return (ENOTSUP); cpr_test_mode = 0; break; case AD_CPR_TESTZ: case AD_CPR_TESTNOZ: case AD_CPR_TESTHALT: if (cpr_reusable_mode) { cpr_err(CE_CONT, normalfmt, A_FREEZE, AD_REUSEFINI); return (ENOTSUP); } cpr_test_mode = 1; break; case AD_CPR_CHECK: if (!i_cpr_is_supported(cpr_sleeptype) || cpr_reusable_mode) return (ENOTSUP); return (0); case AD_CPR_PRINT: CPR_STAT_EVENT_END("POST CPR DELAY"); cpr_stat_event_print(); return (0); #endif case AD_CPR_DEBUG0: cpr_debug = 0; return (0); case AD_CPR_DEBUG1: case AD_CPR_DEBUG2: case AD_CPR_DEBUG3: case AD_CPR_DEBUG4: case AD_CPR_DEBUG5: case AD_CPR_DEBUG7: case AD_CPR_DEBUG8: cpr_debug |= CPR_DEBUG_BIT(fcn); return (0); case AD_CPR_DEBUG9: cpr_debug |= CPR_DEBUG6; return (0); /* The DEV_* values need to be removed after sys-syspend is fixed */ case DEV_CHECK_SUSPEND_TO_RAM: case DEV_SUSPEND_TO_RAM: case AD_CHECK_SUSPEND_TO_RAM: case AD_SUSPEND_TO_RAM: cpr_test_point = LOOP_BACK_NONE; break; case AD_LOOPBACK_SUSPEND_TO_RAM_PASS: cpr_test_point = LOOP_BACK_PASS; break; case AD_LOOPBACK_SUSPEND_TO_RAM_FAIL: cpr_test_point = LOOP_BACK_FAIL; break; case AD_FORCE_SUSPEND_TO_RAM: cpr_test_point = FORCE_SUSPEND_TO_RAM; break; case AD_DEVICE_SUSPEND_TO_RAM: if (mdep == NULL) { /* Didn't pass enough arguments */ return (EINVAL); } cpr_test_point = DEVICE_SUSPEND_TO_RAM; cpr_device = (major_t)atoi((char *)mdep); break; case AD_CPR_SUSP_DEVICES: cpr_test_point = FORCE_SUSPEND_TO_RAM; if (cpr_suspend_devices(ddi_root_node()) != DDI_SUCCESS) cmn_err(CE_WARN, "Some devices did not suspend " "and may be unusable"); (void) cpr_resume_devices(ddi_root_node(), 0); return (0); default: return (ENOTSUP); } if (!i_cpr_is_supported(cpr_sleeptype)) return (ENOTSUP); #if defined(__sparc) if ((cpr_sleeptype == CPR_TODISK && !cpr_is_ufs(rootvfs) && !cpr_is_zfs(rootvfs))) return (ENOTSUP); #endif if (fcn == AD_CHECK_SUSPEND_TO_RAM || fcn == DEV_CHECK_SUSPEND_TO_RAM) { ASSERT(i_cpr_is_supported(cpr_sleeptype)); return (0); } #if defined(__sparc) if (fcn == AD_CPR_REUSEINIT) { if (mutex_tryenter(&cpr_slock) == 0) return (EBUSY); if (cpr_reusable_mode) { cpr_err(CE_CONT, modefmt, "already"); mutex_exit(&cpr_slock); return (EBUSY); } rc = i_cpr_reuseinit(); mutex_exit(&cpr_slock); return (rc); } if (fcn == AD_CPR_REUSEFINI) { if (mutex_tryenter(&cpr_slock) == 0) return (EBUSY); if (!cpr_reusable_mode) { cpr_err(CE_CONT, modefmt, "not"); mutex_exit(&cpr_slock); return (EINVAL); } rc = i_cpr_reusefini(); mutex_exit(&cpr_slock); return (rc); } #endif /* * acquire cpr serial lock and init cpr state structure. */ if (rc = cpr_init(fcn)) return (rc); #if defined(__sparc) if (fcn == AD_CPR_REUSABLE) { if ((rc = i_cpr_check_cprinfo()) != 0) { mutex_exit(&cpr_slock); return (rc); } } #endif /* * Call the main cpr routine. If we are successful, we will be coming * down from the resume side, otherwise we are still in suspend. */ cpr_err(CE_CONT, "System is being suspended"); if (rc = cpr_main(cpr_sleeptype)) { CPR->c_flags |= C_ERROR; PMD(PMD_SX, ("cpr: Suspend operation failed.\n")) cpr_err(CE_NOTE, "Suspend operation failed."); } else if (CPR->c_flags & C_SUSPENDING) { /* * In the suspend to RAM case, by the time we get * control back we're already resumed */ if (cpr_sleeptype == CPR_TORAM) { PMD(PMD_SX, ("cpr: cpr CPR_TORAM done\n")) cpr_done(); return (rc); } #if defined(__sparc) PMD(PMD_SX, ("cpr: Suspend operation succeeded.\n")) /* * Back from a successful checkpoint */ if (fcn == AD_CPR_TESTZ || fcn == AD_CPR_TESTNOZ) { mdboot(0, AD_BOOT, "", B_FALSE); /* NOTREACHED */ } /* make sure there are no more changes to the device tree */ PMD(PMD_SX, ("cpr: dev tree freeze\n")) devtree_freeze(); /* * stop other cpus and raise our priority. since there is only * one active cpu after this, and our priority will be too high * for us to be preempted, we're essentially single threaded * from here on out. */ PMD(PMD_SX, ("cpr: stop other cpus\n")) i_cpr_stop_other_cpus(); PMD(PMD_SX, ("cpr: spl6\n")) (void) spl6(); /* * try and reset leaf devices. reset_leaves() should only * be called when there are no other threads that could be * accessing devices */ PMD(PMD_SX, ("cpr: reset leaves\n")) reset_leaves(); /* * If i_cpr_power_down() succeeds, it'll not return * * Drives with write-cache enabled need to flush * their cache. */ if (fcn != AD_CPR_TESTHALT) { PMD(PMD_SX, ("cpr: power down\n")) (void) i_cpr_power_down(cpr_sleeptype); } ASSERT(cpr_sleeptype == CPR_TODISK); /* currently CPR_TODISK comes back via a boot path */ CPR_DEBUG(CPR_DEBUG1, "(Done. Please Switch Off)\n"); halt(NULL); /* NOTREACHED */ #endif } PMD(PMD_SX, ("cpr: cpr done\n")) cpr_done(); return (rc); }
/* * do a simple estimate of the space needed to hold the statefile * taking compression into account, but be fairly conservative * so we have a better chance of completing; when dump fails, * the retry cost is fairly high. * * Do disk blocks allocation for the state file if no space has * been allocated yet. Since the state file will not be removed, * allocation should only be done once. */ static int cpr_statefile_ok(vnode_t *vp, int alloc_retry) { extern size_t cpr_bitmap_size; struct inode *ip = VTOI(vp); const int UCOMP_RATE = 20; /* comp. ratio*10 for user pages */ u_longlong_t size, isize, ksize, raw_data; char *str, *est_fmt; size_t space; int error; /* * number of pages short for swapping. */ STAT->cs_nosw_pages = k_anoninfo.ani_mem_resv; if (STAT->cs_nosw_pages < 0) STAT->cs_nosw_pages = 0; str = "cpr_statefile_ok:"; CPR_DEBUG(CPR_DEBUG9, "Phys swap: max=%lu resv=%lu\n", k_anoninfo.ani_max, k_anoninfo.ani_phys_resv); CPR_DEBUG(CPR_DEBUG9, "Mem swap: max=%ld resv=%lu\n", MAX(availrmem - swapfs_minfree, 0), k_anoninfo.ani_mem_resv); CPR_DEBUG(CPR_DEBUG9, "Total available swap: %ld\n", CURRENT_TOTAL_AVAILABLE_SWAP); /* * try increasing filesize by 15% */ if (alloc_retry) { /* * block device doesn't get any bigger */ if (vp->v_type == VBLK) { if (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG6)) prom_printf( "Retry statefile on special file\n"); return (ENOMEM); } else { rw_enter(&ip->i_contents, RW_READER); size = (ip->i_size * SIZE_RATE) / INTEGRAL; rw_exit(&ip->i_contents); } if (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG6)) prom_printf("Retry statefile size = %lld\n", size); } else { u_longlong_t cpd_size; pgcnt_t npages, nback; int ndvram; ndvram = 0; (void) callb_execute_class(CB_CL_CPR_FB, (int)(uintptr_t)&ndvram); if (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG6)) prom_printf("ndvram size = %d\n", ndvram); /* * estimate 1 cpd_t for every (CPR_MAXCONTIG / 2) pages */ npages = cpr_count_kpages(REGULAR_BITMAP, cpr_nobit); cpd_size = sizeof (cpd_t) * (npages / (CPR_MAXCONTIG / 2)); raw_data = cpd_size + cpr_bitmap_size; ksize = ndvram + mmu_ptob(npages); est_fmt = "%s estimated size with " "%scompression %lld, ksize %lld\n"; nback = mmu_ptob(STAT->cs_nosw_pages); if (CPR->c_flags & C_COMPRESSING) { size = ((ksize * COMPRESS_PERCENT) / INTEGRAL) + raw_data + ((nback * 10) / UCOMP_RATE); CPR_DEBUG(CPR_DEBUG1, est_fmt, str, "", size, ksize); } else { size = ksize + raw_data + nback; CPR_DEBUG(CPR_DEBUG1, est_fmt, str, "no ", size, ksize); } } /* * All this is much simpler for a block device */ if (vp->v_type == VBLK) { space = cpr_get_devsize(vp->v_rdev); if (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG6)) prom_printf("statefile dev size %lu\n", space); /* * Export the estimated filesize info, this value will be * compared before dumping out the statefile in the case of * no compression. */ STAT->cs_est_statefsz = size; if (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG6)) prom_printf("%s Estimated statefile size %llu, " "space %lu\n", str, size, space); if (size > space) { cpr_err(CE_CONT, "Statefile partition too small."); return (ENOMEM); } return (0); } else { if (CPR->c_alloc_cnt++ > C_MAX_ALLOC_RETRY) { cpr_err(CE_CONT, "Statefile allocation retry failed\n"); return (ENOMEM); } /* * Estimate space needed for the state file. * * State file size in bytes: * kernel size + non-cache pte seg + * bitmap size + cpr state file headers size * (round up to fs->fs_bsize) */ size = blkroundup(ip->i_fs, size); /* * Export the estimated filesize info, this value will be * compared before dumping out the statefile in the case of * no compression. */ STAT->cs_est_statefsz = size; error = cpr_grow_statefile(vp, size); if (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG6)) { rw_enter(&ip->i_contents, RW_READER); isize = ip->i_size; rw_exit(&ip->i_contents); prom_printf("%s Estimated statefile size %lld, " "i_size %lld\n", str, size, isize); } return (error); } }
/* * Write necessary machine dependent information to cpr state file, * eg. sun4u mmu ctx secondary for the current running process (cpr) ... */ int i_cpr_write_machdep(vnode_t *vp) { extern uint_t getpstate(), getwstate(); extern uint_t i_cpr_tstack_size; const char ustr[] = ": unix-tte 2drop false ;"; uintptr_t tinfo; label_t *ltp; cmd_t cmach; char *fmt; int rc; /* * ustr[] is used as temporary forth words during * slave startup sequence, see sfmmu_mp_startup() */ cmach.md_magic = (uint_t)CPR_MACHDEP_MAGIC; cmach.md_size = sizeof (m_info) + sizeof (ustr); if (rc = cpr_write(vp, (caddr_t)&cmach, sizeof (cmach))) { cpr_err(CE_WARN, "Failed to write descriptor."); return (rc); } /* * m_info is now cleared in i_cpr_dump_setup() */ m_info.ksb = (uint32_t)STACK_BIAS; m_info.kpstate = (uint16_t)getpstate(); m_info.kwstate = (uint16_t)getwstate(); CPR_DEBUG(CPR_DEBUG1, "stack bias 0x%x, pstate 0x%x, wstate 0x%x\n", m_info.ksb, m_info.kpstate, m_info.kwstate); ltp = &ttolwp(curthread)->lwp_qsav; m_info.qsav_pc = (cpr_ext)ltp->val[0]; m_info.qsav_sp = (cpr_ext)ltp->val[1]; /* * Set secondary context to INVALID_CONTEXT to force the HAT * to re-setup the MMU registers and locked TTEs it needs for * TLB miss handling. */ m_info.mmu_ctx_sec = INVALID_CONTEXT; m_info.mmu_ctx_pri = KCONTEXT; tinfo = (uintptr_t)curthread; m_info.thrp = (cpr_ptr)tinfo; tinfo = (uintptr_t)i_cpr_resume_setup; m_info.func = (cpr_ptr)tinfo; /* * i_cpr_data_page is comprised of a 4K stack area and a few * trailing data symbols; the page is shared by the prom and * kernel during resume. the stack size is recorded here * and used by cprboot to set %sp */ tinfo = (uintptr_t)&i_cpr_data_page; m_info.tmp_stack = (cpr_ptr)tinfo; m_info.tmp_stacksize = i_cpr_tstack_size; m_info.test_mode = cpr_test_mode; i_cpr_save_cpu_info(); if (rc = cpr_write(vp, (caddr_t)&m_info, sizeof (m_info))) { cpr_err(CE_WARN, "Failed to write machdep info."); return (rc); } fmt = "error writing %s forth info"; if (rc = cpr_write(vp, (caddr_t)ustr, sizeof (ustr))) cpr_err(CE_WARN, fmt, "unix-tte"); return (rc); }
/* * find prom phys pages and alloc space for a tmp copy */ static int i_cpr_find_ppages(void) { struct page *pp; struct memlist *pmem; pgcnt_t npages, pcnt, scnt, vcnt; pfn_t ppn, plast, *dst; int mapflag; cpr_clear_bitmaps(); mapflag = REGULAR_BITMAP; /* * there should be a page_t for each phys page used by the kernel; * set a bit for each phys page not tracked by a page_t */ pcnt = 0; memlist_read_lock(); for (pmem = phys_install; pmem; pmem = pmem->ml_next) { npages = mmu_btop(pmem->ml_size); ppn = mmu_btop(pmem->ml_address); for (plast = ppn + npages; ppn < plast; ppn++) { if (page_numtopp_nolock(ppn)) continue; (void) cpr_setbit(ppn, mapflag); pcnt++; } } memlist_read_unlock(); /* * clear bits for phys pages in each segment */ scnt = cpr_count_seg_pages(mapflag, cpr_clrbit); /* * set bits for phys pages referenced by the promvp vnode; * these pages are mostly comprised of forthdebug words */ vcnt = 0; for (pp = promvp.v_pages; pp; ) { if (cpr_setbit(pp->p_offset, mapflag) == 0) vcnt++; pp = pp->p_vpnext; if (pp == promvp.v_pages) break; } /* * total number of prom pages are: * (non-page_t pages - seg pages + vnode pages) */ ppage_count = pcnt - scnt + vcnt; CPR_DEBUG(CPR_DEBUG1, "find_ppages: pcnt %ld - scnt %ld + vcnt %ld = %ld\n", pcnt, scnt, vcnt, ppage_count); /* * alloc array of pfn_t to store phys page list */ pphys_list_size = ppage_count * sizeof (pfn_t); pphys_list = kmem_alloc(pphys_list_size, KM_NOSLEEP); if (pphys_list == NULL) { cpr_err(CE_WARN, "cannot alloc pphys_list"); return (ENOMEM); } /* * phys pages referenced in the bitmap should be * those used by the prom; scan bitmap and save * a list of prom phys page numbers */ dst = pphys_list; memlist_read_lock(); for (pmem = phys_install; pmem; pmem = pmem->ml_next) { npages = mmu_btop(pmem->ml_size); ppn = mmu_btop(pmem->ml_address); for (plast = ppn + npages; ppn < plast; ppn++) { if (cpr_isset(ppn, mapflag)) { ASSERT(dst < (pphys_list + ppage_count)); *dst++ = ppn; } } } memlist_read_unlock(); /* * allocate space to store prom pages */ ppage_buf = kmem_alloc(mmu_ptob(ppage_count), KM_NOSLEEP); if (ppage_buf == NULL) { kmem_free(pphys_list, pphys_list_size); pphys_list = NULL; cpr_err(CE_WARN, "cannot alloc ppage_buf"); return (ENOMEM); } return (0); }