/* * Creates threads for the threadflows associated with a procflow. * The routine iterates through the list of threadflows in the * supplied procflow's pf_threads list. For each threadflow on * the list, it defines tf_instances number of cloned * threadflows, and then calls threadflow_createthread() for * each to create and start the actual operating system thread. * Note that each of the newly defined threadflows will be linked * into the procflows threadflow list, but at the head of the * list, so they will not become part of the supplied set. After * all the threads have been created, threadflow_init enters * a join loop for all the threads in the newly defined * threadflows. Once all the created threads have exited, * threadflow_init will return 0. If errors are encountered, it * will return a non zero value. */ int threadflow_init(procflow_t *procflow) { threadflow_t *threadflow = procflow->pf_threads; int ret = 0; (void) ipc_mutex_lock(&filebench_shm->shm_threadflow_lock); while (threadflow) { threadflow_t *newthread; int instances; int i; instances = avd_get_int(threadflow->tf_instances); filebench_log(LOG_VERBOSE, "Starting %d %s threads", instances, threadflow->tf_name); for (i = 1; i < instances; i++) { /* Create threads */ newthread = threadflow_define_common(procflow, threadflow->tf_name, threadflow, i + 1); if (newthread == NULL) return (-1); ret |= threadflow_createthread(newthread); } newthread = threadflow_define_common(procflow, threadflow->tf_name, threadflow, 1); if (newthread == NULL) return (-1); /* Create each thread */ ret |= threadflow_createthread(newthread); threadflow = threadflow->tf_next; } threadflow = procflow->pf_threads; (void) ipc_mutex_unlock(&filebench_shm->shm_threadflow_lock); while (threadflow) { /* wait for all threads to finish */ if (threadflow->tf_tid) { void *status; if (pthread_join(threadflow->tf_tid, &status) == 0) ret += *(int *)status; } threadflow = threadflow->tf_next; } procflow->pf_running = 0; return (ret); }
/* * Composite flowop method. Does one pass through its list of * inner flowops per iteration. */ static int flowop_composite(threadflow_t *threadflow, flowop_t *flowop) { flowop_t *inner_flowop; /* get the first flowop in the list */ inner_flowop = flowop->fo_comp_fops; /* make a pass through the list of sub flowops */ while (inner_flowop) { int i, count; /* Abort if asked */ if (threadflow->tf_abort || filebench_shm->shm_f_abort) return (FILEBENCH_DONE); if (inner_flowop->fo_stats.fs_stime == 0) inner_flowop->fo_stats.fs_stime = gethrtime(); /* Execute the flowop for fo_iters times */ count = (int)avd_get_int(inner_flowop->fo_iters); for (i = 0; i < count; i++) { filebench_log(LOG_DEBUG_SCRIPT, "%s: executing flowop " "%s-%d", threadflow->tf_name, inner_flowop->fo_name, inner_flowop->fo_instance); switch ((*inner_flowop->fo_func)(threadflow, inner_flowop)) { /* all done */ case FILEBENCH_DONE: return (FILEBENCH_DONE); /* quit if inner flowop limit reached */ case FILEBENCH_NORSC: return (FILEBENCH_NORSC); /* quit on inner flowop error */ case FILEBENCH_ERROR: filebench_log(LOG_ERROR, "inner flowop %s failed", inner_flowop->fo_name); return (FILEBENCH_ERROR); /* otherwise keep going */ default: break; } } /* advance to next flowop */ inner_flowop = inner_flowop->fo_exec_next; } /* finished with this pass */ return (FILEBENCH_OK); }
/* * The producer side of the event system. * Once eventgen_hz has been set by eventgen_setrate(), * the routine sends eventgen_hz events per second until * the program terminates. Events are posted by incrementing * filebench_shm->shm_eventgen_q by the number of generated * events then signalling the condition variable * filebench_shm->shm_eventgen_cv to indicate to event consumers * that more events are available. * * Eventgen_thread attempts to sleep for 10 event periods, * then, once awakened, determines how many periods actually * passed since sleeping, and issues a set of events equal * to the number of periods that it slept, thus keeping the * average rate at the requested rate. */ static void eventgen_thread(void) { hrtime_t last; last = gethrtime(); filebench_shm->shm_eventgen_enabled = FALSE; /* CONSTCOND */ while (1) { struct timespec sleeptime; hrtime_t delta; int count, rate; if (filebench_shm->shm_eventgen_hz == NULL) { (void) sleep(1); continue; } else { rate = avd_get_int(filebench_shm->shm_eventgen_hz); if (rate > 0) { filebench_shm->shm_eventgen_enabled = TRUE; } else { continue; } } /* Sleep for 10xperiod */ sleeptime.tv_sec = 0; sleeptime.tv_nsec = FB_SEC2NSEC / rate; sleeptime.tv_nsec *= 10; if (sleeptime.tv_nsec < 1000UL) sleeptime.tv_nsec = 1000UL; sleeptime.tv_sec = sleeptime.tv_nsec / FB_SEC2NSEC; if (sleeptime.tv_sec > 0) sleeptime.tv_nsec -= (sleeptime.tv_sec * FB_SEC2NSEC); (void) nanosleep(&sleeptime, NULL); delta = gethrtime() - last; last = gethrtime(); count = (rate * delta) / FB_SEC2NSEC; filebench_log(LOG_DEBUG_SCRIPT, "delta %llums count %d", (u_longlong_t)(delta / 1000000), count); /* Send 'count' events */ (void) ipc_mutex_lock(&filebench_shm->shm_eventgen_lock); /* Keep the producer with a max of 5 second depth */ if (filebench_shm->shm_eventgen_q < (5 * rate)) filebench_shm->shm_eventgen_q += count; (void) pthread_cond_signal(&filebench_shm->shm_eventgen_cv); (void) ipc_mutex_unlock(&filebench_shm->shm_eventgen_lock); } }
/* * Calls the flowop's initialization function, pointed to by * flowop->fo_init. */ static int flowop_initflow(flowop_t *flowop) { /* * save static copies of two items, in case they are supplied * from random variables */ if (!AVD_IS_STRING(flowop->fo_value)) flowop->fo_constvalue = avd_get_int(flowop->fo_value); flowop->fo_constwss = avd_get_int(flowop->fo_wss); if ((*flowop->fo_init)(flowop) < 0) { filebench_log(LOG_ERROR, "flowop %s-%d init failed", flowop->fo_name, flowop->fo_instance); return (-1); } return (0); }
/* * Reads a 32 bit random number from the urandom "file". * Shuts down the run if the read fails. Otherwise returns * the random number after rounding it off by "round". * Returns 0 on success, -1 on failure. */ int filebench_randomno32(uint32_t *randp, uint32_t max, uint32_t round, avd_t avd) { uint32_t random; /* check for round value too large */ if (max <= round) { *randp = 0; /* if it just fits, its ok, otherwise error */ if (max == round) return (0); else return (-1); } if (avd) { /* get it from the variable */ random = (uint32_t)avd_get_int(avd); } else { /* get it from urandom */ if (read(urandomfd, &random, sizeof (uint32_t)) != sizeof (uint32_t)) { filebench_log(LOG_ERROR, "read /dev/urandom failed: %s", strerror(errno)); filebench_shutdown(1); } } /* clip with max and optionally round */ max -= round; random = random / (FILEBENCH_RANDMAX32 / max); if (round) { random = random / round; random *= round; } if (random > max) random = max; *randp = random; return (0); }
/* * Sets the event generator rate to that supplied by * var_t *rate. */ void eventgen_setrate(avd_t rate) { filebench_shm->shm_eventgen_hz = rate; if (rate == NULL) { filebench_log(LOG_ERROR, "eventgen_setrate() called without a rate"); return; } if (AVD_IS_VAR(rate)) { filebench_log(LOG_VERBOSE, "Eventgen rate taken from variable"); } else { filebench_log(LOG_VERBOSE, "Eventgen: %llu per second", (u_longlong_t)avd_get_int(rate)); } }
/* * Set the random seed in the supplied random object. */ static void rand_seed_set(randdist_t *rndp) { union { uint64_t ll; uint16_t w[4]; } temp1; int idx; temp1.ll = (uint64_t)avd_get_int(rndp->rnd_seed); for (idx = 0; idx < 3; idx++) { #ifdef _BIG_ENDIAN rndp->rnd_xi[idx] = temp1.w[3-idx]; #else rndp->rnd_xi[idx] = temp1.w[idx]; #endif } }
/* * Creates a thread for the supplied threadflow. If interprocess * shared memory is desired, then increments the amount of shared * memory needed by the amount specified in the threadflow's * tf_memsize parameter. The thread starts in routine * flowop_start() with a poineter to the threadflow supplied * as the argument. */ static int threadflow_createthread(threadflow_t *threadflow) { fbint_t memsize; memsize = avd_get_int(threadflow->tf_memsize); threadflow->tf_constmemsize = memsize; filebench_log(LOG_DEBUG_SCRIPT, "Creating thread %s, memory = %ld", threadflow->tf_name, memsize); if (threadflow->tf_attrs & THREADFLOW_USEISM) filebench_shm->shm_required += memsize; if (pthread_create(&threadflow->tf_tid, NULL, (void *(*)(void*))flowop_start, threadflow) != 0) { filebench_log(LOG_ERROR, "thread create failed"); filebench_shutdown(1); return (FILEBENCH_ERROR); } return (FILEBENCH_OK); }
static int posset_rnd_fill(struct posset *ps) { uint64_t pos; int i; if (!avd_get_int(ps->ps_rnd_max)) { filebench_log(LOG_ERROR, "posset_rnd_fill: maximum value " "for random posset is not specified"); return -1; } for (i = 0; i < avd_get_int(ps->ps_entries); i++) { if (!avd_get_int(ps->ps_rnd_seed)) { filebench_log(LOG_INFO, "seed was not specified," "using /dev/urandom when filling posset"); fb_urandom64(&pos, avd_get_int(ps->ps_rnd_max), POSSET_POS_ALIGNMENT, NULL); } else { /* XXX: this code below MUST eventually be moved to fb_random.c */ if (i == 0) init_random_my(); pos = ((double)rand() / RAND_MAX) * UINT64_MAX; //printf("pos value : %f\n",pos); pos = pos / (UINT64_MAX / (avd_get_int(ps->ps_rnd_max) + POSSET_POS_ALIGNMENT)); if (pos > avd_get_int(ps->ps_rnd_max)) pos = avd_get_int(ps->ps_rnd_max); pos = pos / POSSET_POS_ALIGNMENT; pos = pos * POSSET_POS_ALIGNMENT; } ps->ps_positions[i] = pos; } return 0; }
/* * The final initialization and main execution loop for the * worker threads. Sets threadflow and flowop start times, * waits for all process to start, then creates the runtime * flowops from those defined by the F language workload * script. It does some more initialization, then enters a * loop to repeatedly execute the flowops on the flowop list * until an abort condition is detected, at which time it exits. * This is the starting routine for the new worker thread * created by threadflow_createthread(), and is not currently * called from anywhere else. */ void flowop_start(threadflow_t *threadflow) { flowop_t *flowop; size_t memsize; int ret = FILEBENCH_OK; set_thread_ioprio(threadflow); #ifdef HAVE_PROC_PID_LWP char procname[128]; long ctl[2] = {PCSET, PR_MSACCT}; int pfd; (void) snprintf(procname, sizeof (procname), "/proc/%d/lwp/%d/lwpctl", (int)my_pid, _lwp_self()); pfd = open(procname, O_WRONLY); (void) pwrite(pfd, &ctl, sizeof (ctl), 0); (void) close(pfd); #endif (void) ipc_mutex_lock(&controlstats_lock); if (!controlstats_zeroed) { (void) memset(&controlstats, 0, sizeof (controlstats)); controlstats_zeroed = 1; } (void) ipc_mutex_unlock(&controlstats_lock); flowop = threadflow->tf_thrd_fops; /* Hold the flowop find lock as reader to prevent lookups */ (void) pthread_rwlock_rdlock(&filebench_shm->shm_flowop_find_lock); /* Create the runtime flowops from those defined by the script */ (void) ipc_mutex_lock(&filebench_shm->shm_flowop_lock); if (flowop_create_runtime_flowops(threadflow, &threadflow->tf_thrd_fops) != FILEBENCH_OK) { (void) ipc_mutex_unlock(&filebench_shm->shm_flowop_lock); filebench_shutdown(1); return; } (void) ipc_mutex_unlock(&filebench_shm->shm_flowop_lock); /* Release the find lock as reader to allow lookups */ (void) pthread_rwlock_unlock(&filebench_shm->shm_flowop_find_lock); /* Set to the start of the new flowop list */ flowop = threadflow->tf_thrd_fops; memsize = (size_t)threadflow->tf_constmemsize; /* If we are going to use ISM, allocate later */ if (threadflow->tf_attrs & THREADFLOW_USEISM) { threadflow->tf_mem = ipc_ismmalloc(memsize); } else { threadflow->tf_mem = malloc(memsize); } (void) memset(threadflow->tf_mem, 0, memsize); filebench_log(LOG_DEBUG_SCRIPT, "Thread allocated %d bytes", memsize); #ifdef HAVE_LWPS filebench_log(LOG_DEBUG_SCRIPT, "Thread %zx (%d) started", threadflow, _lwp_self()); #endif /* * Now we set tf_running flag to indicate to the main process * that the worker thread is running. However, the thread is * still not executing the workload, as it is blocked by the * shm_run_lock. Main thread will release this lock when all * threads set their tf_running flag to 1. */ threadflow->tf_abort = 0; threadflow->tf_running = 1; /* * Block until all processes have started, acting like * a barrier. The original filebench process initially * holds the run_lock as a reader, preventing any of the * threads from obtaining the writer lock, and hence * passing this point. Once all processes and threads * have been created, the original process unlocks * run_lock, allowing each waiting thread to lock * and then immediately unlock it, then begin running. */ (void) pthread_rwlock_wrlock(&filebench_shm->shm_run_lock); (void) pthread_rwlock_unlock(&filebench_shm->shm_run_lock); /* Main filebench worker loop */ while (ret == FILEBENCH_OK) { int i, count; /* Abort if asked */ if (threadflow->tf_abort || filebench_shm->shm_f_abort) break; /* Be quiet while stats are gathered */ if (filebench_shm->shm_bequiet) { (void) sleep(1); continue; } /* Take it easy until everyone is ready to go */ if (!filebench_shm->shm_procs_running) { (void) sleep(1); continue; } if (flowop == NULL) { filebench_log(LOG_ERROR, "flowop_read null flowop"); return; } /* Execute the flowop for fo_iters times */ count = (int)avd_get_int(flowop->fo_iters); for (i = 0; i < count; i++) { filebench_log(LOG_DEBUG_SCRIPT, "%s: executing flowop " "%s-%d", threadflow->tf_name, flowop->fo_name, flowop->fo_instance); ret = (*flowop->fo_func)(threadflow, flowop); /* * Return value FILEBENCH_ERROR means "flowop * failed, stop the filebench run" */ if (ret == FILEBENCH_ERROR) { filebench_log(LOG_ERROR, "%s-%d: flowop %s-%d failed", threadflow->tf_name, threadflow->tf_instance, flowop->fo_name, flowop->fo_instance); (void) ipc_mutex_lock(&threadflow->tf_lock); threadflow->tf_abort = 1; filebench_shm->shm_f_abort = FILEBENCH_ABORT_ERROR; (void) ipc_mutex_unlock(&threadflow->tf_lock); break; } /* * Return value of FILEBENCH_NORSC means "stop * the filebench run" if in "end on no work mode", * otherwise it indicates an error */ if (ret == FILEBENCH_NORSC) { (void) ipc_mutex_lock(&threadflow->tf_lock); threadflow->tf_abort = FILEBENCH_DONE; if (filebench_shm->shm_rmode == FILEBENCH_MODE_Q1STDONE) { filebench_shm->shm_f_abort = FILEBENCH_ABORT_RSRC; } else if (filebench_shm->shm_rmode != FILEBENCH_MODE_QALLDONE) { filebench_log(LOG_ERROR1, "WARNING! Run stopped early:\n " " flowop %s-%d could " "not obtain a file. Please\n " " reduce runtime, " "increase fileset entries " "($nfiles), or switch modes.", flowop->fo_name, flowop->fo_instance); filebench_shm->shm_f_abort = FILEBENCH_ABORT_ERROR; } (void) ipc_mutex_unlock(&threadflow->tf_lock); break; } /* * Return value of FILEBENCH_DONE means "stop * the filebench run without error" */ if (ret == FILEBENCH_DONE) { (void) ipc_mutex_lock(&threadflow->tf_lock); threadflow->tf_abort = FILEBENCH_DONE; filebench_shm->shm_f_abort = FILEBENCH_ABORT_DONE; (void) ipc_mutex_unlock(&threadflow->tf_lock); break; } /* * If we get here and the return is something other * than FILEBENCH_OK, it means a spurious code * was returned, so treat as major error. This * probably indicates a bug in the flowop. */ if (ret != FILEBENCH_OK) { filebench_log(LOG_ERROR, "Flowop %s unexpected return value = %d\n", flowop->fo_name, ret); filebench_shm->shm_f_abort = FILEBENCH_ABORT_ERROR; break; } } /* advance to next flowop */ flowop = flowop->fo_exec_next; /* but if at end of list, start over from the beginning */ if (flowop == NULL) { flowop = threadflow->tf_thrd_fops; threadflow->tf_stats.fs_count++; } } #ifdef HAVE_LWPS filebench_log(LOG_DEBUG_SCRIPT, "Thread %d exiting", _lwp_self()); #endif /* Tell flowops to destroy locally acquired state */ flowop_destruct_all_flows(threadflow); pthread_exit(&threadflow->tf_abort); }
/* * Puts the current event rate in the integer portion of the * supplied var_t. Returns a pointer to the var_t. */ var_t * eventgen_ratevar(var_t *var) { VAR_SET_INT(var, avd_get_int(filebench_shm->shm_eventgen_hz)); return (var); }
struct posset * posset_alloc(avd_t name, avd_t type, avd_t seed, avd_t max, avd_t entries) { struct posset *ps; int ret; ps = (struct posset *)ipc_malloc(FILEBENCH_POSSET); if (!ps) { filebench_log(LOG_ERROR, "posset_alloc: " "can't malloc posset in IPC region"); return NULL; } /* we do not support any possets except "rnd" at the moment */ if (!strcmp(avd_get_str(type), "rnd")) { ps->ps_type = avd_int_alloc(POSSET_TYPE_RND); } else if (!strcmp(avd_get_str(type), "collection")) { ps->ps_type = avd_int_alloc(POSSET_TYPE_COLLECTION); } else { filebench_log(LOG_ERROR, "posset_alloc: wrong posset type"); ipc_free(FILEBENCH_POSSET, (char *)ps); return NULL; } ps->ps_name = name; ps->ps_rnd_seed = seed; ps->ps_rnd_max = max; ps->ps_entries = entries; if (avd_get_int(ps->ps_entries) > POSSET_MAX_ENTRIES) { filebench_log(LOG_ERROR, "posset_alloc: the number of posset " "entries is too high"); ipc_free(FILEBENCH_POSSET, (char *)ps); return NULL; } /* depending on the posset type generate (or load) positions */ switch (avd_get_int(ps->ps_type)) { case(POSSET_TYPE_RND): ret = posset_rnd_fill(ps); break; case(POSSET_TYPE_COLLECTION): ret = posset_collection_fill(ps); break; default: filebench_log(LOG_ERROR, "posset_alloc: wrong posset type"); ipc_free(FILEBENCH_POSSET, (char *)ps); return NULL; } if (ret < 0) { filebench_log(LOG_ERROR, "posset_alloc: could not fill posset"); ipc_free(FILEBENCH_POSSET, (char *)ps); return NULL; } /* add posset to the global list */ (void)ipc_mutex_lock(&filebench_shm->shm_posset_lock); if (filebench_shm->shm_possetlist == NULL) { filebench_shm->shm_possetlist = ps; ps->ps_next = NULL; } else { ps->ps_next = filebench_shm->shm_possetlist; filebench_shm->shm_possetlist = ps; } (void)ipc_mutex_unlock(&filebench_shm->shm_posset_lock); return ps; }
/* * Initializes a random distribution entity, converting avd_t * parameters to doubles, and converting the list of probability density * function table entries, if supplied, into a probablilty function table */ static void randdist_init_one(randdist_t *rndp) { probtabent_t *rdte_hdp, *ptep; double tablemean, tablemin; int pteidx; /* convert parameters to doubles */ rndp->rnd_dbl_gamma = (double)avd_get_int(rndp->rnd_gamma) / 1000.0; if (rndp->rnd_mean != NULL) rndp->rnd_dbl_mean = (double)avd_get_int(rndp->rnd_mean); else rndp->rnd_dbl_mean = rndp->rnd_dbl_gamma; /* de-reference min and round amounts for later use */ rndp->rnd_vint_min = avd_get_int(rndp->rnd_min); rndp->rnd_vint_round = avd_get_int(rndp->rnd_round); filebench_log(LOG_DEBUG_IMPL, "init random var %s: Mean = %6.0llf, Gamma = %6.3llf, Min = %llu", rndp->rnd_var->var_name, rndp->rnd_dbl_mean, rndp->rnd_dbl_gamma, (u_longlong_t)rndp->rnd_vint_min); /* initialize distribution to apply */ switch (rndp->rnd_type & RAND_TYPE_MASK) { case RAND_TYPE_UNIFORM: rndp->rnd_get = rand_uniform_get; break; case RAND_TYPE_GAMMA: rndp->rnd_get = rand_gamma_get; break; case RAND_TYPE_TABLE: rndp->rnd_get = rand_table_get; break; default: filebench_log(LOG_DEBUG_IMPL, "Random Type not Specified"); filebench_shutdown(1); return; } /* initialize source of random numbers */ if (rndp->rnd_type & RAND_SRC_GENERATOR) { rndp->rnd_src = rand_src_rand48; rand_seed_set(rndp); } else { rndp->rnd_src = rand_src_urandom; } /* any random distribution table to convert? */ if ((rdte_hdp = rndp->rnd_probtabs) == NULL) return; /* determine random distribution max and mins and initialize table */ pteidx = 0; tablemean = 0.0; for (ptep = rdte_hdp; ptep; ptep = ptep->pte_next) { double dmin, dmax; int entcnt; dmax = (double)avd_get_int(ptep->pte_segmax); dmin = (double)avd_get_int(ptep->pte_segmin); /* initialize table minimum on first pass */ if (pteidx == 0) tablemin = dmin; /* update table minimum */ if (tablemin > dmin) tablemin = dmin; entcnt = (int)avd_get_int(ptep->pte_percent); tablemean += (((dmin + dmax)/2.0) * (double)entcnt); /* populate the lookup table */ for (; entcnt > 0; entcnt--) { rndp->rnd_rft[pteidx].rf_base = dmin; rndp->rnd_rft[pteidx].rf_range = dmax - dmin; pteidx++; } } /* check to see if probability equals 100% */ if (pteidx != PF_TAB_SIZE) filebench_log(LOG_ERROR, "Prob table only totals %d%%", pteidx); /* If table is not supplied with a mean value, set it to table mean */ if (rndp->rnd_dbl_mean == 0.0) rndp->rnd_dbl_mean = (double)tablemean / (double)PF_TAB_SIZE; /* now normalize the entries for a min value of 0, mean of 1 */ tablemean = (tablemean / 100.0) - tablemin; /* special case if really a constant value */ if (tablemean == 0.0) { for (pteidx = 0; pteidx < PF_TAB_SIZE; pteidx++) { rndp->rnd_rft[pteidx].rf_base = 0.0; rndp->rnd_rft[pteidx].rf_range = 0.0; } return; } for (pteidx = 0; pteidx < PF_TAB_SIZE; pteidx++) { rndp->rnd_rft[pteidx].rf_base = ((rndp->rnd_rft[pteidx].rf_base - tablemin) / tablemean); rndp->rnd_rft[pteidx].rf_range = (rndp->rnd_rft[pteidx].rf_range / tablemean); } }