struct resmgr *resmgr_new(struct anetlist *a, struct db *db) { struct resmgr *r; int i; r = alloc_type(struct resmgr); r->a = a; r->db = db; r->prng = alloc_type0(mt_state); mts_seed32(r->prng, 0); r->n_control_sets = 0; r->control_sets = NULL; populate_control_sets(r, a); printf("Number of unique control sets:\t%d\n", r->n_control_sets); alloc_csr(&r->slices); r->slices_cs = alloc_size(r->n_control_sets*sizeof(struct resmgr_control_set_resources *)); for(i=0;i<r->n_control_sets;i++) alloc_csr(&r->slices_cs[i]); r->free_iobm = rtree_new_root(); r->free_iobs = rtree_new_root(); r->used_resources = rtree_new_root(); populate_resources(r); printf("Available SLICEXs:\t\t%d\n", r->slices.slicex[0]->count); printf("Available SLICELs:\t\t%d\n", r->slices.slicel[0]->count); printf("Available SLICEMs:\t\t%d\n", r->slices.slicem[0]->count); printf("Available IOBMs:\t\t%d\n", r->free_iobm->count); printf("Available IOBSs:\t\t%d\n", r->free_iobs->count); return r; }
/* * Save state to a file. The save format is compatible with Richard * J. Wagner's format, although the details are different. Returns NZ * if the save succeeded. Produces one very long line containing 625 * numbers. */ int mts_savestate( FILE* statefile, /* File to save to */ mt_state* state) /* State to be saved */ { int i; /* Next word to save */ if (!state->initialized) mts_seed32(state, DEFAULT_SEED32_OLD); /* * Ensure the state pointer is valid. */ if (state->stateptr < 0 || state->stateptr > MT_STATE_SIZE) { fprintf(stderr, "Mtwist internal: Trying to write invalid state pointer %d\n", state->stateptr); mts_refresh(state); } for (i = MT_STATE_SIZE; --i >= 0; ) { if (fprintf(statefile, "%" PRIu32 " ", state->statevec[i]) < 0) return 0; } if (fprintf(statefile, "%d\n", state->stateptr) < 0) return 0; return 1; }
/* * Save state to a file. The save format is compatible with Richard * J. Wagner's format, although the details are different. Returns NZ * if the save succeeded. Produces one very long line containing 625 * numbers. */ int mts_savestate( FILE* statefile, /* File to save to */ mt_state* state) /* State to be saved */ { int i; /* Next word to save */ if (!state->initialized) mts_seed32(state, DEFAULT_SEED32_OLD); for (i = MT_STATE_SIZE; --i >= 0; ) { if (fprintf(statefile, "%" PRIu32 " ", state->statevec[i]) < 0) return 0; } if (fprintf(statefile, "%d\n", state->stateptr) < 0) return 0; return 1; }
/* * Generate 624 more random values. This function is called when the * state vector has been exhausted. It generates another batch of * pseudo-random values. The performance of this function is critical * to the performance of the Mersenne Twist PRNG, so it has been * highly optimized. */ void mts_refresh( register mt_state* state) /* State for the PRNG */ { register int i; /* Index into the state */ register uint32_t* state_ptr; /* Next place to get from state */ register uint32_t value1; /* Scratch val picked up from state */ register uint32_t value2; /* Scratch val picked up from state */ /* * Start by making sure a random seed has been set. If not, set * one. */ if (!state->initialized) { mts_seed32(state, DEFAULT_SEED32_OLD); return; /* Seed32 calls us recursively */ } /* * Now generate the new pseudorandom values by applying the * recurrence relation. We use two loops and a final * 2-statement sequence so that we can handle the wraparound * explicitly, rather than having to use the relatively slow * modulus operator. * * In essence, the recurrence relation concatenates bits * chosen from the current random value (last time around) * with the immediately preceding one. Then it * matrix-multiplies the concatenated bits with a value * RECURRENCE_OFFSET away and a constant matrix. The matrix * multiplication reduces to a shift and two XORs. * * Some comments on the optimizations are in order: * * Strictly speaking, none of the optimizations should be * necessary. All could conceivably be done by a really good * compiler. However, the compilers available to me aren't quite * smart enough, so hand optimization needs to be done. * * Shawn Cokus was the first to achieve a major speedup. In the * original code, the first value given to COMBINE_BITS (in my * characterization) was re-fetched from the state array, rather * than being carried in a scratch variable. Cokus noticed that * the first argument to COMBINE_BITS could be saved in a register * in the previous loop iteration, getting rid of the need for an * expensive memory reference. * * Cokus also switched to using pointers to access the state * array and broke the original loop into two so that he could * avoid using the expensive modulus operator. Cokus used three * pointers; Richard J. Wagner noticed that the offsets between * the three were constant, so that they could be collapsed into a * single pointer and constant-offset accesses. This is clearly * faster on x86 architectures, and is the same cost on RISC * machines. A secondary benefit is that Cokus' version was * register-starved on the x86, while Wagner's version was not. * * I made several smaller improvements to these observations. * First, I reversed the contents of the state vector. In the * current version of the code, this change doesn't directly * affect the performance of the refresh loop, but it has the nice * side benefit that an all-zero state structure represents an * uninitialized generator. It also slightly speeds up the * random-number routines, since they can compare the state * pointer against zero instead of against a constant (this makes * the biggest difference on RISC machines). * * Second, I returned to Matsumoto and Nishimura's original * technique of using a lookup table to decide whether to xor the * constant vector A (MATRIX_A in this code) with the newly * computed value. Cokus and Wagner had used the ?: operator, * which requires a test and branch. Modern machines don't like * branches, so the table lookup is faster. * * Third, in the Cokus and Wagner versions the loop ends with a * statement similar to "value1 = value2", which is necessary to * carry the fetched value into the next loop iteration. I * recognized that if the loop were unrolled so that it generates * two values per iteration, a bit of variable renaming would get * rid of that assignment. A nice side effect is that the * overhead of loop control becomes only half as large. * * It is possible to improve the code's performance somewhat * further. In particular, since the second loop's loop count * factors into 2*2*3*3*11, it could be unrolled yet further. * That's easy to do, too: just change the "/ 2" into a division * by whatever factor you choose, and then use cut-and-paste to * duplicate the code in the body. To remove a few more cycles, * fix the code to decrement state_ptr by the unrolling factor, and * adjust the various offsets appropriately. However, the payoff * will be small. At the moment, the x86 version of the loop is * 25 instructions, of which 3 are involved in loop control * (including the decrementing of state_ptr). Further unrolling by * a factor of 2 would thus produce only about a 6% speedup. * * The logical extension of the unrolling * approach would be to remove the loops and create 624 * appropriate copies of the body. However, I think that doing * the latter is a bit excessive! * * I suspect that a superior optimization would be to simplify the * mathematical operations involved in the recurrence relation. * However, I have no idea whether such a simplification is * feasible. */ state_ptr = &state->statevec[MT_STATE_SIZE - 1]; value1 = *state_ptr; for (i = (MT_STATE_SIZE - RECURRENCE_OFFSET) / 2; --i >= 0; ) { state_ptr -= 2; value2 = state_ptr[1]; value1 = COMBINE_BITS(value1, value2); state_ptr[2] = MATRIX_MULTIPLY(state_ptr[-RECURRENCE_OFFSET + 2], value1); value1 = state_ptr[0]; value2 = COMBINE_BITS(value2, value1); state_ptr[1] = MATRIX_MULTIPLY(state_ptr[-RECURRENCE_OFFSET + 1], value2); } value2 = *--state_ptr; value1 = COMBINE_BITS(value1, value2); state_ptr[1] = MATRIX_MULTIPLY(state_ptr[-RECURRENCE_OFFSET + 1], value1); for (i = (RECURRENCE_OFFSET - 1) / 2; --i >= 0; ) { state_ptr -= 2; value1 = state_ptr[1]; value2 = COMBINE_BITS(value2, value1); state_ptr[2] = MATRIX_MULTIPLY(state_ptr[MT_STATE_SIZE - RECURRENCE_OFFSET + 2], value2); value2 = state_ptr[0]; value1 = COMBINE_BITS(value1, value2); state_ptr[1] = MATRIX_MULTIPLY(state_ptr[MT_STATE_SIZE - RECURRENCE_OFFSET + 1], value1); } /* * The final entry in the table requires the "previous" value * to be gotten from the other end of the state vector, so it * must be handled specially. */ value1 = COMBINE_BITS(value2, state->statevec[MT_STATE_SIZE - 1]); *state_ptr = MATRIX_MULTIPLY(state_ptr[MT_STATE_SIZE - RECURRENCE_OFFSET], value1); /* * Now that refresh is complete, reset the state pointer to allow more * pseudorandom values to be fetched from the state array. */ state->stateptr = MT_STATE_SIZE; }
/* * Initialize the default Mersenne Twist PRNG from a 32-bit seed. * * See mts_seed32 for full commentary. */ void mt_seed32( uint32_t seed) /* 32-bit seed to start from */ { mts_seed32(&mt_default_state, seed); }
/* * Initialize the default Mersenne Twist PRNG from a 32-bit seed. * * See mts_seed32 for full commentary. */ void mt_seed32( unsigned long seed) /* 32-bit seed to start from */ { mts_seed32(&mt_default_state, seed); }
/** Refresh the state for next set of random numbers. */ void mts_refresh(register mt_state* state /**< State for the PRNG */ ){ register int i; /* Index into the state */ register mt_u32bit_t* state_ptr; /* Next place to get from state */ register mt_u32bit_t value1; /* Scratch val picked up from state */ register mt_u32bit_t value2; /* Scratch val picked up from state */ /* * Start by making sure a random seed has been set. If not, set * one. */ if (!state->initialized) { mts_seed32(state, DEFAULT_SEED32_OLD); return; /* Seed32 calls us recursively */ } state_ptr = &state->statevec[MT_STATE_SIZE - 1]; value1 = *state_ptr; for (i = (MT_STATE_SIZE - RECURRENCE_OFFSET) / 2; --i >= 0; ) { state_ptr -= 2; value2 = state_ptr[1]; value1 = COMBINE_BITS(value1, value2); state_ptr[2] = MATRIX_MULTIPLX(state_ptr[-RECURRENCE_OFFSET + 2], value1); value1 = state_ptr[0]; value2 = COMBINE_BITS(value2, value1); state_ptr[1] = MATRIX_MULTIPLX(state_ptr[-RECURRENCE_OFFSET + 1], value2); } value2 = *--state_ptr; value1 = COMBINE_BITS(value1, value2); state_ptr[1] = MATRIX_MULTIPLX(state_ptr[-RECURRENCE_OFFSET + 1], value1); for (i = (RECURRENCE_OFFSET - 1) / 2; --i >= 0; ) { state_ptr -= 2; value1 = state_ptr[1]; value2 = COMBINE_BITS(value2, value1); state_ptr[2] = MATRIX_MULTIPLX(state_ptr[MT_STATE_SIZE - RECURRENCE_OFFSET + 2], value2); value2 = state_ptr[0]; value1 = COMBINE_BITS(value1, value2); state_ptr[1] = MATRIX_MULTIPLX(state_ptr[MT_STATE_SIZE - RECURRENCE_OFFSET + 1], value1); } /* * The final entry in the table requires the "previous" value * to be gotten from the other end of the state vector, so it * must be handled specially. */ value1 = COMBINE_BITS(value2, state->statevec[MT_STATE_SIZE - 1]); *state_ptr = MATRIX_MULTIPLX(state_ptr[MT_STATE_SIZE - RECURRENCE_OFFSET], value1); /* * Now that refresh is complete, reset the state pointer to allow more * pseudorandom values to be fetched from the state array. */ state->stateptr = MT_STATE_SIZE; }