void LayerNet::gen_init ( TrainingSet *tptr , // Training set to use struct LearnParams *lptr // User's general learning parameters ) { int i, istart, individual, best_individual, generation, n_cross ; int first_child, parent1, parent2, improved, crosspt, nchoices, *choices ; int initpop, popsize, gens, climb, nvars, chromsize, split, ind ; float pcross, pmutate, error, besterror, *errors, *fitness, worst ; float fquit, favor_best, fitfac, maxerr, minerr, avgerr, overinit ; SingularValueDecomp *sptr ; struct GenInitParams *gptr ; // User's genetic initialization parameters char *pool1, *pool2, *oldpop, *newpop, *popptr, *temppop, *best ; char msg[80] ; gptr = lptr->gp ; popsize = gptr->pool ; gens = gptr->gens ; climb = gptr->climb ; overinit = gptr->overinit ; pcross = gptr->pcross ; pmutate = gptr->pmutate ; fquit = lptr->quit_err ; favor_best = 3.1 ; fitfac = -20.0 ; /* -------------------------------------------------------------------------------- Do all scratch memory allocation. -------------------------------------------------------------------------------- */ /* Allocate the singular value decomposition object for REGRESS. */ if (nhid2 == 0) // One hidden layer nvars = nhid1 + 1 ; else // Two hidden layers nvars = nhid2 + 1 ; MEMTEXT ( "GEN_INIT: new SingularValueDecomp" ) ; sptr = new SingularValueDecomp ( tptr->ntrain , nvars , 1 ) ; if ((sptr == NULL) || ! sptr->ok) { memory_message("for genetic initialization. Try ANNEAL NOREGRESS."); neterr = 1.0 ; // Flag failure to LayerNet::learn which called us if (sptr != NULL) delete sptr ; return ; } chromsize = nhid1 * (nin+1) ; // Length of an individual's chromosome if (nhid2) // is the number of hidden weights chromsize += nhid2 * (nhid1+1) ; errors = fitness = NULL ; choices = NULL ; pool1 = pool2 = NULL ; MEMTEXT ( "GEN_INIT: errors, fitness, choices, best, pool1,pool2"); if (((errors = (float*) MALLOC ( popsize * sizeof(float))) == NULL) || ((fitness = (float*) MALLOC ( popsize * sizeof(float))) == NULL) || ((best = (char*) MALLOC( chromsize )) == NULL) || ((choices = (int*) MALLOC ( popsize * sizeof(int))) == NULL) || ((pool1 = (char*) MALLOC( popsize * chromsize )) == NULL) || ((pool2 = (char*) MALLOC( popsize * chromsize )) == NULL)) { if (errors != NULL) FREE ( errors ) ; if (fitness != NULL) FREE ( fitness ) ; if (choices != NULL) FREE ( choices ) ; if (pool1 != NULL) FREE ( pool1 ) ; if (pool2 != NULL) FREE ( pool2 ) ; delete sptr ; memory_message("for genetic initialization. Try ANNEAL NOREGRESS." ) ; neterr = 1.0 ; // Flag failure to LayerNet::learn which called us return ; } /* Generate initial population pool. We also preserve the best weights across all generations, as this is what we will ultimately return to the user. Its mean square error is besterror. */ besterror = 1.e30 ; // For saving best (across all individuals and gens) maxerr = avgerr = 0.0 ; // For progress display only best_individual = 0 ; // Safety only initpop = popsize * overinit ; // Overinitialization of initial population progress_message ( "\nGenerating initial population" ) ; for (ind=0 ; ind<initpop ; ind++) { // Try overinitialization times if (ind<popsize) // If still in pop size limit individual = ind ; // just use next avail space else { // Else we search entire pop worst = -1. ; // for the worst member for (i=0 ; i<popsize ; i++) { // which we will then replace if (errors[i] > worst) { worst = errors[i] ; individual = i ; } } avgerr -= worst ; // Exclude discards from average } popptr = pool1 + individual * chromsize ; // Build init pop in pool1 rand_ind ( popptr , chromsize ) ; // Randomly generate individual decode ( popptr , nin , nhid1 , nhid2 , // Convert genotype (chromosome) hid1_coefs , hid2_coefs ); // to phenotype (weights) error = regress ( tptr , sptr ) ; // Evaluate network error errors[individual] = error ; // and keep all errors if (error < besterror) { // Keep track of best besterror = error ; // as it is returned to user best_individual = individual ; // This is its index in pool1 } if (error > maxerr) // Max and average error are maxerr = error ; // for progress display only avgerr += error ; if (error <= fquit) break ; progress_message ( "." ) ; } sprintf (msg , "\nInitial pop: Min err=%7.4lf Max=%7.4lf Avg=%7.4lf", 100. * besterror, 100. * maxerr, 100.0 * avgerr / (float) popsize); progress_message ( msg ) ; /* The initial population has been built in pool1. Copy its best member to 'best' in case it never gets beat (unlikely but possible!). Also, we will need best if the climb option is true. */ popptr = pool1 + best_individual * chromsize ; // Point to best memcpy ( best , popptr , chromsize ) ; // and save it /* This is the main generation loop. There are two areas for population pool storage: pool1 and pool2. At any given time, oldpop will be set to one of them, and newpop to the other. This avoids a lot of copying. */ oldpop = pool1 ; // This is the initial population newpop = pool2 ; // The next generation is created here for (generation=0 ; generation<gens ; generation++) { if (error <= fquit) // We may have satisfied this in init pop break ; // So we test at start of generation loop error_to_fitness ( popsize , favor_best , fitfac , errors , fitness ) ; fitness_to_choices ( popsize , fitness , choices ) ; nchoices = popsize ; // Will count down as choices array emptied n_cross = pcross * popsize ; // Number crossing over first_child = 1 ; // Generating first of parent's 2 children? improved = 0 ; // Flags if we beat best if (climb) { // If we are to hill climb memcpy ( newpop , best , chromsize ) ; // start with best errors[0] = besterror ; // Record its error istart = 1 ; // and start children past it } else istart = 0 ; /* Generate the children */ maxerr = avgerr = 0.0 ; // For progress display only minerr = 1.0 ; // Ditto for (individual=istart ; individual<popsize ; individual++) { popptr = newpop + individual * chromsize ; // Will put this child here if (first_child) // If this is the first of 2 children, pick parents pick_parents ( &nchoices , choices , &parent1 , &parent2 ) ; if (n_cross-- > 0) // Do crossovers first reproduce ( oldpop + parent1 * chromsize , oldpop + parent2 * chromsize , first_child , chromsize , popptr , &crosspt , &split ) ; else if (first_child) // No more crossovers, so just copy parent memcpy ( popptr , oldpop + parent1 * chromsize , chromsize ) ; else memcpy ( popptr , oldpop + parent2 * chromsize , chromsize ); if (pmutate > 0.0) mutate ( popptr , chromsize , pmutate ) ; decode ( popptr , nin , nhid1 , nhid2 , hid1_coefs , hid2_coefs ) ; error = regress ( tptr , sptr ) ; // Evaluate child's error errors[individual] = error ; // and keep each if (error < besterror) { // Keep track of best besterror = error ; // It will be returned to user best_individual = individual ; // This is its index in newpop improved = 1 ; // Flag so we copy it later } if (error > maxerr) // Min, max and average error maxerr = error ; // for progress display only if (error < minerr) minerr = error ; avgerr += error ; if (error <= fquit) break ; first_child = ! first_child ; } // For all genes in population /* We finished generating all children. If we improved (one of these children beat the best so far) then copy that child to the best. Swap oldpop and newpop for the next generation. */ if (improved) { popptr = newpop + best_individual * chromsize ; // Point to best memcpy ( best , popptr , chromsize ) ; // and save it } temppop = oldpop ; // Switch old and new pops for next generation oldpop = newpop ; newpop = temppop ; sprintf(msg, "\nGeneration %3d: Min err=%7.4lf Max=%7.4lf Avg=%7.4lf", generation+1, 100. * minerr, 100. * maxerr, 100.0 * avgerr / (float) popsize ) ; progress_message ( msg ) ; } /* We are all done. */ decode ( best , nin , nhid1 , nhid2 , hid1_coefs , hid2_coefs ) ; besterror = regress ( tptr , sptr ) ; // Evaluate network error MEMTEXT ( "GEN_INIT: errors, fitness, choices, best, pool1,pool2"); FREE ( errors ) ; FREE ( fitness ) ; FREE ( choices ) ; FREE ( best ) ; FREE ( pool1 ) ; FREE ( pool2 ) ; MEMTEXT ( "GEN_INIT: delete sptr" ) ; delete sptr ; }
void LayerNet::anx_dd ( TrainingSet *tptr , struct LearnParams *lptr ) { int itry, n_escape, n_retry, bad_count, new_record, refined ; long seed ; double err, prev_err, best_err, start_of_loop_error, best_inner_error ; double initial_accuracy, final_accuracy ; char msg[80] ; LayerNet *worknet, *worknet2, *bestnet ; n_escape = n_retry = 0 ; /* Allocate scratch memory */ MEMTEXT ( "ANX_DD::learn new worknet, bestnet" ) ; worknet = new LayerNet ( model , outmod , outlin , nin , nhid1 , nhid2 , nout , 0 , 0 ) ; bestnet = new LayerNet ( model , outmod , outlin , nin , nhid1 , nhid2 , nout , 0 , 1 ) ; if ((worknet == NULL) || (! worknet->ok) || (bestnet == NULL) || (! bestnet->ok)) { memory_message ( "to learn" ) ; if (worknet != NULL) delete worknet ; if (bestnet != NULL) delete bestnet ; errtype = 0 ; return ; } if ((lptr->method == METHOD_AN2_CJ) || (lptr->method == METHOD_AN2_LM)) { worknet2 = new LayerNet ( model , outmod , outlin , nin , nhid1 , nhid2 , nout , 0 , 0 ) ; if ((worknet2 == NULL) || (! worknet2->ok)) { if (worknet2 != NULL) delete worknet2 ; delete worknet ; delete bestnet ; memory_message ( "to learn" ) ; errtype = 0 ; return ; } } else worknet2 = NULL ; /* Start by annealing around the starting weights. These will be zero if the net was just created. If it was restored or partially trained already, they will be meaningful. Anneal1 guarantees that it will not return all zero weights if there is at least one hidden layer, even if that means that the error exceeds the amount that could be attained by all zeros. */ best_err = best_inner_error = 1.e30 ; if ((lptr->method == METHOD_AN1_CJ) || (lptr->method == METHOD_AN1_LM)) anneal1 ( tptr , lptr , worknet , 1 , 0 ) ; else if ((lptr->method == METHOD_AN2_CJ) || (lptr->method == METHOD_AN2_LM)) anneal2 ( tptr , lptr , worknet , worknet2 , 1 ) ; /* Do direct descent optimization, finding local minimum. Then anneal to break out of it. If successful, loop back up to do direct descent again. Otherwise restart totally random. */ bad_count = 0 ; // Handles flat local mins refined = 0 ; // Did we ever refine to high resolution? Not yet. new_record = 0 ; // Refine every time a new inner error record set initial_accuracy = pow ( 10.0 , -lptr->cj_acc ) ; final_accuracy = initial_accuracy * pow ( 10.0 , -lptr->cj_refine ) ; for (itry=1 ; ; itry++) { if (neterr < best_err) { // Keep track of best copy_weights ( bestnet , this ) ; best_err = neterr ; } sprintf ( msg , "Try %d (best=%lf):", itry, best_err ) ; normal_message ( msg ) ; if (neterr <= lptr->quit_err) break ; start_of_loop_error = neterr ; if ((lptr->method == METHOD_AN1_CJ) || (lptr->method == METHOD_AN2_CJ)) err = conjgrad ( tptr , 32767 , initial_accuracy , lptr->quit_err , lptr->cj_progress ) ; else if ((lptr->method==METHOD_AN1_LM) || (lptr->method==METHOD_AN2_LM)) err = lev_marq ( tptr , 32767 , initial_accuracy , lptr->quit_err , lptr->cj_progress ) ; neterr = fabs ( err ) ; // err<0 if user pressed ESCape sprintf ( msg , " Gradient err=%lf", neterr ) ; progress_message ( msg ) ; if (neterr < best_err) { // Keep track of best copy_weights ( bestnet , this ) ; best_err = neterr ; } if (err <= lptr->quit_err) { // err<0 if user pressed ESCape if (err < -1.e29) // or insufficient memory printf ( "\nInsufficient memory for gradient learning." ) ; break ; } seed = flrand() - (long) (itry * 97) ; // Insure new seed for anneal sflrand ( seed ) ; prev_err = neterr ; // So we can see if anneal helped if ((lptr->method == METHOD_AN1_CJ) || (lptr->method == METHOD_AN1_LM)) anneal1 ( tptr , lptr , worknet , 0 , itry ) ; else if ((lptr->method==METHOD_AN2_CJ) || (lptr->method==METHOD_AN2_LM)) anneal2 ( tptr , lptr , worknet , worknet2 , 0 ) ; sprintf ( msg , " Anneal err=%lf", neterr ) ; progress_message ( msg ) ; if (neterr < best_err) { // Keep track of best copy_weights ( bestnet , this ) ; best_err = neterr ; } if (best_err <= lptr->quit_err) break ; if (neterr < best_inner_error) { // Keep track of best inner for refine best_inner_error = neterr ; new_record = 1 ; // Tells us to refine } if ((prev_err - neterr) > 1.e-7) { // Did we break out of local min? if ((start_of_loop_error - neterr) < 1.e-3) ++bad_count ; // Avoid many unprofitable iters else bad_count = 0 ; if (bad_count < 4) { ++n_escape ; // Count escapes from local min continue ; // Escaped, so gradient learn again } } /* After first few tries, and after each inprovement thereafter, refine to high resolution */ if ((itry-n_escape >= lptr->cj_pretries) && (new_record || ! refined)) { if (! refined) { // If refining the best of the pretries copy_weights ( this , bestnet ) ; // Get that net neterr = best_err ; } refined = 1 ; // Only force refine once new_record = 0 ; // Reset new inner error record flag progress_message ( " REFINING" ) ; if ((lptr->method == METHOD_AN1_CJ) || (lptr->method == METHOD_AN2_CJ)) err = conjgrad ( tptr , 0 , final_accuracy , lptr->quit_err , lptr->cj_progress ) ; else if ((lptr->method==METHOD_AN1_LM)|| (lptr->method==METHOD_AN2_LM)) err = lev_marq ( tptr , 0 , final_accuracy , lptr->quit_err , lptr->cj_progress ) ; neterr = fabs ( err ) ; // err<0 if user pressed ESCape sprintf ( msg , " Attained err=%lf", neterr ) ; progress_message ( msg ) ; if (neterr < best_err) { // Keep track of best copy_weights ( bestnet , this ) ; best_err = neterr ; } } if (++n_retry > lptr->retries) break ; progress_message ( " RESTART" ) ; zero_weights () ; // Failed to break out, so retry random seed = flrand() - (long) (itry * 773) ; // Insure new seed for anneal sflrand ( seed ) ; if ((lptr->method == METHOD_AN1_CJ) || (lptr->method == METHOD_AN1_LM)) anneal1 ( tptr , lptr , worknet , 1 , itry ) ; else if ((lptr->method==METHOD_AN2_CJ) || (lptr->method==METHOD_AN2_LM)) anneal2 ( tptr , lptr , worknet , worknet2 , 1 ) ; } FINISH: copy_weights ( this , bestnet ) ; neterr = best_err ; MEMTEXT ( "AN1_DD::learn delete worknet, bestnet" ) ; delete worknet ; delete bestnet ; sprintf ( msg , "%d successful escapes, %d retries", n_escape, n_retry ) ; normal_message ( msg ) ; return ; }
void LayerNet::anneal ( TrainingSet *tptr , // Training set to use struct LearnParams *lptr , // User's general learning parameters LayerNet *bestnet , // Work area used to keep best network int init // Use zero suffix (initialization) anneal parms? ) { int ntemps, niters, setback, reg, nvars, key, user_quit ; int i, iter, improved, ever_improved, itemp ; long seed, bestseed ; char msg[80] ; double tempmult, temp, fval, bestfval, starttemp, stoptemp, fquit ; SingularValueDecomp *sptr ; struct AnnealParams *aptr ; // User's annealing parameters aptr = lptr->ap ; /* The parameter 'init' is nonzero if we are initializing weights for learning. If zero we are attempting to break out of a local minimum. The main effect of this parameter is whether or not we use the zero suffix variables in the anneal parameters. A second effect is that regression is used only for initialization, not for escape. */ if (init) { ntemps = aptr->temps0 ; niters = aptr->iters0 ; setback = aptr->setback0 ; starttemp = aptr->start0 ; stoptemp = aptr->stop0 ; } else { ntemps = aptr->temps ; niters = aptr->iters ; setback = aptr->setback ; starttemp = aptr->start ; stoptemp = aptr->stop ; } /* Initialize other local parameters. Note that there is no sense using regression if there are no hidden layers. Also, regression is almost always counterproductive for local minimum escape. */ fquit = lptr->quit_err ; reg = init && nhid1 && (lptr->init != 1) ; /* Allocate the singular value decomposition object for REGRESS. Also allocate a work area for REGRESS to preserve matrix. */ if (reg) { if (nhid1 == 0) // No hidden layer nvars = nin + 1 ; else if (nhid2 == 0) // One hidden layer nvars = nhid1 + 1 ; else // Two hidden layers nvars = nhid2 + 1 ; MEMTEXT ( "ANNEAL: new SingularValueDecomp" ) ; sptr = new SingularValueDecomp ( tptr->ntrain , nvars , 1 ) ; if ((sptr == NULL) || ! sptr->ok) { memory_message ( "for annealing with regression. Try ANNEAL NOREGRESS."); if (sptr != NULL) delete sptr ; neterr = 1.0 ; // Flag failure to LayerNet::learn which called us return ; } } /* For every temperature, the center around which we will perturb is the best point so far. This is kept in 'bestnet', so initialize it to the user's starting estimate. Also, initialize 'bestfval', the best function value so far, to be the function value at that starting point. */ copy_weights ( bestnet , this ) ; // Current weights are best so far if (init) bestfval = 1.e30 ; // Force it to accept SOMETHING else bestfval = trial_error ( tptr ) ; /* This is the temperature reduction loop and the iteration within temperature loop. We use a slick trick to keep track of the best point at a given temperature. We certainly don't want to replace the best every time an improvement is had, as then we would be moving our center about, compromising the global nature of the algorithm. We could, of course, have a second work area in which we save the 'best so far for this temperature' point. But if there are a lot of variables, the usual case, this wastes memory. What we do is to save the seed of the random number generator which created the improvement. Then later, when we need to retrieve the best, simply set the random seed and regenerate it. This technique also saves a lot of copying time if many improvements are made for a single temperature. */ temp = starttemp ; tempmult = exp( log( stoptemp / starttemp ) / (ntemps-1)) ; ever_improved = 0 ; // Flags if improved at all user_quit = 0 ; // Flags user pressed ESCape for (itemp=0 ; itemp<ntemps ; itemp++) { // Temp reduction loop improved = 0 ; // Flags if this temp improved if (init) { sprintf ( msg , "\nANNEAL temp=%.2lf ", temp ) ; progress_message ( msg ) ; } for (iter=0 ; iter<niters ; iter++) { // Iters per temp loop seed = longrand () ; // Get a random seed slongrand ( seed ) ; // Brute force set it perturb (bestnet, this, temp, reg) ;// Randomly perturb about best if (reg) // If using regression, estimate fval = regress ( tptr , sptr ) ; // out weights now else // Otherwise just evaluate fval = trial_error ( tptr ) ; if (fval < bestfval) { // If this iteration improved bestfval = fval ; // then update the best so far bestseed = seed ; // and save seed to recreate it ever_improved = improved = 1 ; // Flag that we improved if (bestfval <= fquit) // If we reached the user's break ; // limit, we can quit iter -= setback ; // It often pays to keep going if (iter < 0) // at this temperature if we iter = 0 ; // are still improving } } // Loop: for all iters at a temp if (improved) { // If this temp saw improvement slongrand ( bestseed ) ; // set seed to what caused it perturb (bestnet, this, temp, reg) ;// and recreate that point copy_weights ( bestnet , this ) ; // which will become next center slongrand ( bestseed / 2 + 999 ) ; // Jog seed away from best if (init) { sprintf ( msg , " err=%.3lf%% ", 100.0 * bestfval ) ; progress_message ( msg ) ; } } if (bestfval <= fquit) // If we reached the user's break ; // limit, we can quit /*********************************************************************** if (kbhit()) { // Was a key pressed? key = getch () ; // Read it if so while (kbhit()) // Flush key buffer in case function key getch () ; // or key was held down if (key == 27) { // ESCape user_quit = 1 ; // Flags user that ESCape was pressed break ; } } ***********************************************************************/ if (user_quit) break ; temp *= tempmult ; // Reduce temp for next pass } // through this temperature loop /* The trials left this weight set and neterr in random condition. Make them equal to the best, which will be the original if we never improved. Also, if we improved and are using regression, recall that bestnet only contains the best hidden weights, as we did not bother to run regress when we updated bestnet. Do that now before returning. */ copy_weights ( this , bestnet ) ; // Return best weights in this net neterr = bestfval ; // Trials destroyed weights, err if (ever_improved && reg) neterr = regress ( tptr , sptr ) ; // regressed output weights if (reg) { MEMTEXT ( "ANNEAL: delete SingularValueDecomp" ) ; delete sptr ; } }
int LayerNet::ssg_core ( TrainingSet *tptr , // Training set to use struct LearnParams *lptr , // User's general learning parameters LayerNet *avgnet , // Work area used to keep average weights LayerNet *bestnet , // And the best so far double *work1 , // Gradient work vector double *work2 , // Ditto double *grad , // Ditto double *avg_grad , // Ditto int n_grad // Length of above vectors ) { int ntemps, niters, setback, reg, nvars, user_quit ; int i, iter, itemp, n_good, n_bad, use_grad ; char msg[80] ; double tempmult, temp, fval, bestfval, starttemp, stoptemp, fquit ; double avg_func, new_fac, gradlen, grad_weight, weight_used ; enum RandomDensity density ; SingularValueDecomp *sptr ; struct AnnealParams *aptr ; // User's annealing parameters aptr = lptr->ap ; ntemps = aptr->temps0 ; niters = aptr->iters0 ; setback = aptr->setback0 ; starttemp = aptr->start0 ; stoptemp = aptr->stop0 ; if (aptr->random0 == ANNEAL_GAUSSIAN) density = NormalDensity ; else if (aptr->random0 == ANNEAL_CAUCHY) density = CauchyDensity ; if (! (ntemps * niters)) return 0 ; /* Initialize other local parameters. Note that there is no sense using regression if there are no hidden layers. */ use_grad = (grad != NULL) ; fquit = lptr->quit_err ; reg = nhid1 ; /* Allocate the singular value decomposition object for REGRESS. Also allocate a work area for REGRESS to preserve matrix. */ if (reg) { // False if no hidden layers if (nhid2 == 0) // One hidden layer nvars = nhid1_n ; else // Two hidden layers nvars = nhid2_n ; i = (model == NETMOD_COMPLEX) ? 2 * tptr->ntrain : tptr->ntrain ; if (i < nvars) { warning_message ( "Too few training sets for regression." ) ; reg = 0 ; } else { MEMTEXT ( "SSG: new SingularValueDecomp" ) ; sptr = new SingularValueDecomp ( i , nvars , 1 ) ; if ((sptr == NULL) || ! sptr->ok) { memory_message ( "for SS(G) with regression. Using total randomization."); if (sptr != NULL) delete sptr ; reg = 0 ; } } } /* For the basic algorithm, we will keep the current 'average' network weight set in avgnet. This will be the moving center about which the perturbation is done. Although not directly related to the algorithm itself, we keep track of the best network ever found in bestnet. That is what the user will get at the end. */ copy_weights ( bestnet , this ) ; // Current weights are best so far copy_weights ( avgnet , this ) ; // Center of perturbation bestfval = trial_error ( tptr ) ; /* If this is being used to initialize the weights, make sure that they are not identically zero. Do this by setting bestfval huge so that SOMETHING is accepted later. */ if (nhid1) { i = nhid1 * nin_n ; while (i--) { if (fabs(hid1_coefs[i]) > 1.e-10) break ; } if (i < 0) bestfval = 1.e30 ; } /* Initialize by cumulating a bunch of points */ normal_message ( "Initializing..." ) ; avg_func = 0.0 ; // Mean function around center if (use_grad) { for (i=0 ; i<n_grad ; i++) // Zero the mean gradient avg_grad[i] = 0.0 ; } for (iter=0 ; iter<niters ; iter++) { // Initializing iterations perturb ( avgnet , this , starttemp , reg , density ) ; // Move point if (reg) // If using regression, estimate fval = regress ( tptr , sptr ) ; // out weights now, ignore fval if (use_grad) // Also need gradient? fval = gradient ( tptr , work1 , work2 , grad ) ; // fval redundant else if (! reg) // If reg we got fval from regress fval = trial_error ( tptr ) ; avg_func += fval ; // Cumulate mean function if (use_grad) { // Also need gradient? for (i=0 ; i<n_grad ; i++) // Cumulate mean gradient avg_grad[i] += grad[i] ; } if (fval < bestfval) { // If this iteration improved bestfval = fval ; // then update the best so far copy_weights ( bestnet , this ) ; // Keep the network if (bestfval <= fquit) // If we reached the user's goto FINISH ; // limit, we can quit } if ((user_quit = user_pressed_escape ()) != 0) goto FINISH ; } // Loop: for all initial iters avg_func /= niters ; // Mean of all points around avgnet new_fac = 1.0 / niters ; // Weight of each point sprintf ( msg , " avg=%.6lf best=%.6lf", avg_func, bestfval ) ; progress_message ( msg ) ; if (use_grad) { // Also need gradient? gradlen = 0.0 ; // Will cumulate grad length for (i=0 ; i<n_grad ; i++) { // Find gradient mean and length avg_grad[i] /= niters ; gradlen += avg_grad[i] * avg_grad[i] ; } gradlen = sqrt ( gradlen ) ; grad_weight = 0.5 ; } /* This is the temperature reduction loop and the iteration within temperature loop. */ temp = starttemp ; tempmult = exp( log( stoptemp / starttemp ) / (ntemps-1)) ; user_quit = 0 ; // Flags user pressed ESCape for (itemp=0 ; itemp<ntemps ; itemp++) { // Temp reduction loop n_good = n_bad = 0 ; // Counts better and worse sprintf ( msg , "Temp=%.3lf ", temp ) ; normal_message ( msg ) ; for (iter=0 ; iter<niters ; iter++) { // Iters per temp loop if ((n_bad >= 10) && ((double) n_good / (double) (n_good+n_bad) < 0.15)) break ; perturb ( avgnet , this , temp , reg , density ) ; // Randomly perturb about center if (use_grad) // Bias per gradient? weight_used = shift ( grad , this , grad_weight , reg ) ; if (reg) { // If using regression, estimate fval = regress ( tptr , sptr ) ; // out weights now if ((user_quit = user_pressed_escape ()) != 0) break ; if (fval >= avg_func) { // If this would raise mean ++n_bad ; // Count this bad point for user continue ; // Skip it and try again } } if (use_grad) // Need gradient, fval redundant fval = gradient ( tptr , work1 , work2 , grad ) ; else if (! reg) // If reg we got fval from regress fval = trial_error ( tptr ) ; if ((user_quit = user_pressed_escape ()) != 0) break ; if (fval >= avg_func) { // If this would raise mean ++n_bad ; // Count this bad point for user continue ; // Skip it and try again } ++n_good ; if (fval < bestfval) { // If this iteration improved bestfval = fval ; // then update the best so far copy_weights ( bestnet , this ) ; // Keep the network if (bestfval <= fquit) // If we reached the user's break ; // limit, we can quit iter -= setback ; // It often pays to keep going if (iter < 0) // at this temperature if we iter = 0 ; // are still improving } adjust ( avgnet , this , reg , new_fac ) ; // Move center slightly avg_func = new_fac * fval + (1.0 - new_fac) * avg_func ; if (use_grad) { grad_weight = new_fac * weight_used + (1.0 - new_fac) * grad_weight ; for (i=0 ; i<n_grad ; i++) // Adjust mean gradient avg_grad[i] = new_fac * grad[i] + (1.0 - new_fac) * avg_grad[i] ; } } // Loop: for all iters at a temp /* Iters within temp loop now complete */ sprintf ( msg , " %.3lf%% improved avg=%.5lf best=%.5lf", 100.0 * n_good / (double) (n_good+n_bad), avg_func, bestfval ) ; progress_message ( msg ) ; if (use_grad) { gradlen = 0.0 ; // Will cumulate grad length for (i=0 ; i<n_grad ; i++) // Find gradient length gradlen += avg_grad[i] * avg_grad[i] ; gradlen = sqrt ( gradlen ) ; sprintf ( msg , " grad=%.5lf", gradlen ) ; progress_message ( msg ) ; } if (bestfval <= fquit) // If we reached the user's break ; // limit, we can quit if (user_quit) break ; temp *= tempmult ; // Reduce temp for next pass } // through this temperature loop /* The trials left this weight set and neterr in random condition. Make them equal to the best, which will be the original if we never improved. */ FINISH: copy_weights ( this , bestnet ) ; // Return best weights in this net neterr = bestfval ; // Trials destroyed weights, err if (reg) { MEMTEXT ( "SSG: delete SingularValueDecomp" ) ; delete sptr ; } if (user_quit) return 1 ; else return 0 ; }