/*===========================================================================* * sef_cb_init_fresh * *===========================================================================*/ static int sef_cb_init_fresh(int UNUSED(type), sef_init_info_t *UNUSED(info)) { /* Initialize the reincarnation server. */ struct boot_image *ip; int s,i; int nr_image_srvs, nr_image_priv_srvs, nr_uncaught_init_srvs; struct rproc *rp; struct rprocpub *rpub; struct boot_image image[NR_BOOT_PROCS]; struct boot_image_priv *boot_image_priv; struct boot_image_sys *boot_image_sys; struct boot_image_dev *boot_image_dev; int ipc_to; int *calls; int all_c[] = { ALL_C, NULL_C }; int no_c[] = { NULL_C }; /* See if we run in verbose mode. */ env_parse("rs_verbose", "d", 0, &rs_verbose, 0, 1); if ((s = sys_getinfo(GET_HZ, &system_hz, sizeof(system_hz), 0, 0)) != OK) panic("Cannot get system timer frequency\n"); /* Initialize the global init descriptor. */ rinit.rproctab_gid = cpf_grant_direct(ANY, (vir_bytes) rprocpub, sizeof(rprocpub), CPF_READ); if(!GRANT_VALID(rinit.rproctab_gid)) { panic("unable to create rprocpub table grant: %d", rinit.rproctab_gid); } /* Initialize some global variables. */ rupdate.flags = 0; shutting_down = FALSE; /* Get a copy of the boot image table. */ if ((s = sys_getimage(image)) != OK) { panic("unable to get copy of boot image table: %d", s); } /* Determine the number of system services in the boot image table. */ nr_image_srvs = 0; for(i=0;i<NR_BOOT_PROCS;i++) { ip = &image[i]; /* System services only. */ if(iskerneln(_ENDPOINT_P(ip->endpoint))) { continue; } nr_image_srvs++; } /* Determine the number of entries in the boot image priv table and make sure * it matches the number of system services in the boot image table. */ nr_image_priv_srvs = 0; for (i=0; boot_image_priv_table[i].endpoint != NULL_BOOT_NR; i++) { boot_image_priv = &boot_image_priv_table[i]; /* System services only. */ if(iskerneln(_ENDPOINT_P(boot_image_priv->endpoint))) { continue; } nr_image_priv_srvs++; } if(nr_image_srvs != nr_image_priv_srvs) { panic("boot image table and boot image priv table mismatch"); } /* Reset the system process table. */ for (rp=BEG_RPROC_ADDR; rp<END_RPROC_ADDR; rp++) { rp->r_flags = 0; rp->r_pub = &rprocpub[rp - rproc]; rp->r_pub->in_use = FALSE; } /* Initialize the system process table in 4 steps, each of them following * the appearance of system services in the boot image priv table. * - Step 1: set priviliges, sys properties, and dev properties (if any) * for every system service. */ for (i=0; boot_image_priv_table[i].endpoint != NULL_BOOT_NR; i++) { boot_image_priv = &boot_image_priv_table[i]; /* System services only. */ if(iskerneln(_ENDPOINT_P(boot_image_priv->endpoint))) { continue; } /* Lookup the corresponding entries in other tables. */ boot_image_info_lookup(boot_image_priv->endpoint, image, &ip, NULL, &boot_image_sys, &boot_image_dev); rp = &rproc[boot_image_priv - boot_image_priv_table]; rpub = rp->r_pub; /* * Set privileges. */ /* Get label. */ strcpy(rpub->label, boot_image_priv->label); /* Force a static priv id for system services in the boot image. */ rp->r_priv.s_id = static_priv_id( _ENDPOINT_P(boot_image_priv->endpoint)); /* Initialize privilege bitmaps and signal manager. */ rp->r_priv.s_flags = boot_image_priv->flags; /* priv flags */ rp->r_priv.s_trap_mask= SRV_OR_USR(rp, SRV_T, USR_T); /* traps */ ipc_to = SRV_OR_USR(rp, SRV_M, USR_M); /* targets */ fill_send_mask(&rp->r_priv.s_ipc_to, ipc_to == ALL_M); rp->r_priv.s_sig_mgr= SRV_OR_USR(rp, SRV_SM, USR_SM); /* sig mgr */ rp->r_priv.s_bak_sig_mgr = NONE; /* backup sig mgr */ /* Initialize kernel call mask bitmap. */ calls = SRV_OR_USR(rp, SRV_KC, USR_KC) == ALL_C ? all_c : no_c; fill_call_mask(calls, NR_SYS_CALLS, rp->r_priv.s_k_call_mask, KERNEL_CALL, TRUE); /* Set the privilege structure. RS and VM are exceptions and are already * running. */ if(boot_image_priv->endpoint != RS_PROC_NR && boot_image_priv->endpoint != VM_PROC_NR) { if ((s = sys_privctl(ip->endpoint, SYS_PRIV_SET_SYS, &(rp->r_priv))) != OK) { panic("unable to set privilege structure: %d", s); } } /* Synch the privilege structure with the kernel. */ if ((s = sys_getpriv(&(rp->r_priv), ip->endpoint)) != OK) { panic("unable to synch privilege structure: %d", s); } /* * Set sys properties. */ rpub->sys_flags = boot_image_sys->flags; /* sys flags */ /* * Set dev properties. */ rpub->dev_flags = boot_image_dev->flags; /* device flags */ rpub->dev_nr = boot_image_dev->dev_nr; /* major device number */ rpub->dev_style = boot_image_dev->dev_style; /* device style */ rpub->dev_style2 = boot_image_dev->dev_style2; /* device style 2 */ /* Build command settings. This will also set the process name. */ strlcpy(rp->r_cmd, ip->proc_name, sizeof(rp->r_cmd)); rp->r_script[0]= '\0'; build_cmd_dep(rp); /* Initialize vm call mask bitmap. */ calls = SRV_OR_USR(rp, SRV_VC, USR_VC) == ALL_C ? all_c : no_c; fill_call_mask(calls, NR_VM_CALLS, rpub->vm_call_mask, VM_RQ_BASE, TRUE); /* Scheduling parameters. */ rp->r_scheduler = SRV_OR_USR(rp, SRV_SCH, USR_SCH); rp->r_priority = SRV_OR_USR(rp, SRV_Q, USR_Q); rp->r_quantum = SRV_OR_USR(rp, SRV_QT, USR_QT); /* Get some settings from the boot image table. */ rpub->endpoint = ip->endpoint; /* Set some defaults. */ rp->r_old_rp = NULL; /* no old version yet */ rp->r_new_rp = NULL; /* no new version yet */ rp->r_prev_rp = NULL; /* no prev replica yet */ rp->r_next_rp = NULL; /* no next replica yet */ rp->r_uid = 0; /* root */ rp->r_check_tm = 0; /* not checked yet */ getticks(&rp->r_alive_tm); /* currently alive */ rp->r_stop_tm = 0; /* not exiting yet */ rp->r_restarts = 0; /* no restarts so far */ rp->r_period = 0; /* no period yet */ rp->r_exec = NULL; /* no in-memory copy yet */ rp->r_exec_len = 0; /* Mark as in use and active. */ rp->r_flags = RS_IN_USE | RS_ACTIVE; rproc_ptr[_ENDPOINT_P(rpub->endpoint)]= rp; rpub->in_use = TRUE; } /* - Step 2: allow every system service in the boot image to run. */ nr_uncaught_init_srvs = 0; for (i=0; boot_image_priv_table[i].endpoint != NULL_BOOT_NR; i++) { boot_image_priv = &boot_image_priv_table[i]; /* System services only. */ if(iskerneln(_ENDPOINT_P(boot_image_priv->endpoint))) { continue; } /* Lookup the corresponding slot in the system process table. */ rp = &rproc[boot_image_priv - boot_image_priv_table]; rpub = rp->r_pub; /* RS/VM are already running as we speak. */ if(boot_image_priv->endpoint == RS_PROC_NR || boot_image_priv->endpoint == VM_PROC_NR) { if ((s = init_service(rp, SEF_INIT_FRESH)) != OK) { panic("unable to initialize %d: %d", boot_image_priv->endpoint, s); } continue; } /* Allow the service to run. */ if ((s = sched_init_proc(rp)) != OK) { panic("unable to initialize scheduling: %d", s); } if ((s = sys_privctl(rpub->endpoint, SYS_PRIV_ALLOW, NULL)) != OK) { panic("unable to initialize privileges: %d", s); } /* Initialize service. We assume every service will always get * back to us here at boot time. */ if(boot_image_priv->flags & SYS_PROC) { if ((s = init_service(rp, SEF_INIT_FRESH)) != OK) { panic("unable to initialize service: %d", s); } if(rpub->sys_flags & SF_SYNCH_BOOT) { /* Catch init ready message now to synchronize. */ catch_boot_init_ready(rpub->endpoint); } else { /* Catch init ready message later. */ nr_uncaught_init_srvs++; } } } /* - Step 3: let every system service complete initialization by * catching all the init ready messages left. */ while(nr_uncaught_init_srvs) { catch_boot_init_ready(ANY); nr_uncaught_init_srvs--; } /* - Step 4: all the system services in the boot image are now running. * Complete the initialization of the system process table in collaboration * with other system services. */ for (i=0; boot_image_priv_table[i].endpoint != NULL_BOOT_NR; i++) { boot_image_priv = &boot_image_priv_table[i]; /* System services only. */ if(iskerneln(_ENDPOINT_P(boot_image_priv->endpoint))) { continue; } /* Lookup the corresponding slot in the system process table. */ rp = &rproc[boot_image_priv - boot_image_priv_table]; rpub = rp->r_pub; /* Get pid from PM. */ rp->r_pid = getnpid(rpub->endpoint); if(rp->r_pid == -1) { panic("unable to get pid"); } } /* Set alarm to periodically check service status. */ if (OK != (s=sys_setalarm(RS_DELTA_T, 0))) panic("couldn't set alarm: %d", s); #if USE_LIVEUPDATE /* Now create a new RS instance and let the current * instance live update into the replica. Clone RS' own slot first. */ rp = rproc_ptr[_ENDPOINT_P(RS_PROC_NR)]; if((s = clone_slot(rp, &replica_rp)) != OK) { panic("unable to clone current RS instance: %d", s); } /* Fork a new RS instance with root:operator. */ pid = srv_fork(0, 0); if(pid == -1) { panic("unable to fork a new RS instance"); } replica_pid = pid ? pid : getpid(); replica_endpoint = getnprocnr(replica_pid); replica_rp->r_pid = replica_pid; replica_rp->r_pub->endpoint = replica_endpoint; if(pid == 0) { /* New RS instance running. */ /* Live update the old instance into the new one. */ s = update_service(&rp, &replica_rp, RS_SWAP); if(s != OK) { panic("unable to live update RS: %d", s); } cpf_reload(); /* Clean up the old RS instance, the new instance will take over. */ cleanup_service(rp); /* Ask VM to pin memory for the new RS instance. */ if((s = vm_memctl(RS_PROC_NR, VM_RS_MEM_PIN)) != OK) { panic("unable to pin memory for the new RS instance: %d", s); } } else { /* Old RS instance running. */ /* Set up privileges for the new instance and let it run. */ s = sys_privctl(replica_endpoint, SYS_PRIV_SET_SYS, &(replica_rp->r_priv)); if(s != OK) { panic("unable to set privileges for the new RS instance: %d", s); } if ((s = sched_init_proc(replica_rp)) != OK) { panic("unable to initialize RS replica scheduling: %d", s); } s = sys_privctl(replica_endpoint, SYS_PRIV_YIELD, NULL); if(s != OK) { panic("unable to yield control to the new RS instance: %d", s); } NOT_REACHABLE; } #endif /* USE_LIVEUPDATE */ return(OK); }
int main(int argc, char **argv) { int err; struct capfs_upcall up; struct capfs_downcall down; struct capfs_dirent *dent = NULL; char *link_name = NULL; int opt = 0; int capfsd_log_level = CRITICAL_MSG | WARNING_MSG; char options[256]; struct cas_options cas_options = { doInstrumentation:0, use_sockets:0, }; #ifdef DEBUG capfsd_log_level |= INFO_MSG; capfsd_log_level |= DEBUG_MSG; #endif set_log_level(capfsd_log_level); /* capfsd must register a callback with the meta-data server at the time of mount */ check_for_registration = 1; while((opt = getopt(argc, argv, "dhsn:p:")) != EOF) { switch(opt){ case 's': cas_options.use_sockets = 1; break; case 'd': is_daemon = 0; break; case 'p': err = sscanf(optarg, "%x", &capfs_debug); if(err != 1){ usage(); exiterror("bad arguments"); exit(1); } break; case 'n': num_threads = atoi(optarg); break; case 'h': usage(); exit(0); case '?': default: usage(); exiterror("bad arguments"); exit(1); } } if (getuid() != 0 && geteuid() != 0) { exiterror("must be run as root"); exit(1); } if (setup_capfsdev() < 0) { exiterror("setup_capfsdev() failed"); exit(1); } if ((dev_fd = open_capfsdev()) < 0) { exiterror("open_capfsdev() failed"); exit(1); } startup(argc, argv); /* Initialize the plugin interface */ capfsd_plugin_init(); capfs_comm_init(); /* allocate a 64K, page-aligned buffer for small operations */ capfs_opt_io_size = ROUND_UP(CAPFS_OPT_IO_SIZE); if ((orig_iobuf = (char *) valloc(capfs_opt_io_size)) == NULL) { exiterror("calloc failed"); capfsd_plugin_cleanup(); exit(1); } memset(orig_iobuf, 0, capfs_opt_io_size); capfs_dent_size = ROUND_UP((FETCH_DENTRY_COUNT * sizeof(struct capfs_dirent))); /* allocate a suitably large dent buffer for getdents speed up */ if ((dent = (struct capfs_dirent *) valloc(capfs_dent_size)) == NULL) { exiterror("calloc failed"); capfsd_plugin_cleanup(); exit(1); } memset(dent, 0, capfs_dent_size); /* maximum size of a link target cannot be > 4096 */ capfs_link_size = ROUND_UP(4096); link_name = (char *) valloc(capfs_link_size); if(!link_name) { exiterror("calloc failed"); capfsd_plugin_cleanup(); exit(1); } memset(link_name, 0, capfs_link_size); fprintf(stderr, "------------ Starting client daemon servicing VFS requests using a thread pool [%d threads] ----------\n", num_threads); /* * Start up the local RPC service on both TCP/UDP * for callbacks. */ pmap_unset(CAPFS_CAPFSD, clientv1); if (setup_service(CAPFS_CAPFSD /* program number */, clientv1 /* version */, -1 /* both tcp/udp */, -1 /* any available port */, CAPFS_DISPATCH_FN(clientv1) /* dispatch routine */, &info) < 0) { exiterror("Could not setup local RPC service!\n"); capfsd_plugin_cleanup(); exit(1); } /* * Initialize the hash cache. * Note that we are using default values of cache sizes, * and this should probably be an exposed knob to the user. * CMGR_BSIZE is == CAPFS_MAXHASHLENGTH for SHA1-hash. So we dont need to set * that. We use environment variables to communicate the parameters * to the caches. */ snprintf(options, 256, "%d", CAPFS_CHUNK_SIZE); setenv("CMGR_CHUNK_SIZE", options, 1); snprintf(options, 256, "%d", CAPFS_HCACHE_COUNT); setenv("CMGR_BCOUNT", options, 1); init_hashes(); #if 0 /* * Initialize the client-side data cache. * Note that we are not using this layer * right now. It is getting fairly complicated already. */ snprintf(options, 256, "%d", CAPFS_DCACHE_BSIZE); setenv("CMGR_BSIZE", options, 1); snprintf(options, 256, "%d", CAPFS_DCACHE_COUNT); setenv("CMGR_BCOUNT", options, 1); #endif /* * Initialize the client-side data server communication * stuff. */ clnt_init(&cas_options, num_threads, CAPFS_CHUNK_SIZE); /* loop forever, doing: * - read from device * - service request * - write back response */ for (;;) { struct timeval begin, end; err = read_capfsdev(dev_fd, &up, 30); if (err < 0) { /* cleanup the hash cache */ cleanup_hashes(); /* Cleanup the RPC service */ cleanup_service(&info); capfs_comm_shutdown(); close_capfsdev(dev_fd); /* cleanup the plugins */ capfsd_plugin_cleanup(); /* cleanup the client-side stuff */ clnt_finalize(); exiterror("read failed\n"); exit(1); } if (err == 0) { /* timed out */ capfs_comm_idle(); continue; } gettimeofday(&begin, NULL); /* the do_capfs_op() call does this already; can probably remove */ init_downcall(&down, &up); err = 0; switch (up.type) { /* all the easy operations */ case GETMETA_OP: case SETMETA_OP: case LOOKUP_OP: case CREATE_OP: case REMOVE_OP: case RENAME_OP: case SYMLINK_OP: case MKDIR_OP: case RMDIR_OP: case STATFS_OP: case HINT_OP: case FSYNC_OP: case LINK_OP: { PDEBUG(D_UPCALL, "read upcall; type = %d, name = %s\n", up.type, up.v1.fhname); err = do_capfs_op(&up, &down); if (err < 0) { PDEBUG(D_LIB, "do_capfs_op failed for type %d\n", up.type); } break; /* the more interesting ones */ } case GETDENTS_OP: /* need to pass location and size of buffer to do_capfs_op() */ up.xfer.ptr = dent; up.xfer.size = capfs_dent_size; err = do_capfs_op(&up, &down); if (err < 0) { PDEBUG(D_LIB, "do_capfs_op failed for getdents\n"); } break; case READLINK_OP: /* need to pass location and size of buffer to hold the target name */ up.xfer.ptr = link_name; up.xfer.size = capfs_link_size; err = do_capfs_op(&up, &down); if(err < 0) { PDEBUG(D_LIB, "do_capfs_op failed for readlink\n"); } break; case READ_OP: err = read_op(&up, &down); if (err < 0) { PDEBUG(D_LIB, "read_op failed\n"); } break; case WRITE_OP: err = write_op(&up, &down); if (err < 0) { PDEBUG(D_LIB, "do_capfs_op failed\n"); } break; /* things that aren't done yet */ default: err = -ENOSYS; break; } gettimeofday(&end, NULL); /* calculate the total time spent servicing this call */ if (end.tv_usec < begin.tv_usec) { end.tv_usec += 1000000; end.tv_sec--; } end.tv_sec -= begin.tv_sec; end.tv_usec -= begin.tv_usec; down.total_time = (end.tv_sec * 1000000 + end.tv_usec); down.error = err; switch(up.type) { case HINT_OP: /* this is a one shot hint, we don't want a response in case of HINT_OPEN/HINT_CLOSE */ if (up.u.hint.hint == HINT_CLOSE || up.u.hint.hint == HINT_OPEN) { err = 0; break; } /* fall through */ default: /* the default behavior is to write a response to the device */ err = write_capfsdev(dev_fd, &down, -1); if (err < 0) { /* cleanup the hash cache */ cleanup_hashes(); /* Cleanup the RPC service */ cleanup_service(&info); capfs_comm_shutdown(); close_capfsdev(dev_fd); /* Cleanup the plugins */ capfsd_plugin_cleanup(); /* cleanup the client-side stuff */ clnt_finalize(); exiterror("write failed"); exit(1); } break; } /* If we used a big I/O buffer, free it after we have successfully * returned the downcall. */ if (big_iobuf != NULL) { free(big_iobuf); big_iobuf = NULL; } } /* Not reached */ /* cleanup the hash cache */ cleanup_hashes(); /* Cleanup the RPC service */ cleanup_service(&info); capfs_comm_shutdown(); close_capfsdev(dev_fd); /* cleanup the plugins */ capfsd_plugin_cleanup(); /* cleanup the client-side stuff */ clnt_finalize(); exit(1); }