mat Layer::forwardprop(const mat pa) { this->pa = addcol(pa, 1); z = this->pa * W; a = funcop(z, act); return a; }
mat Layer::backprop(const mat d) { // compute this delta and grad mat actdz = funcop(z, actd); mat delta = d; delta = delta % addcol(actdz, 1); delta.shed_col(0); grad = pa.t() * delta; // regularization grad = grad + lambda * W; // compute new delta to throw to next layer mat newdelta = delta * W.t(); return newdelta; }
void direc ( void ) { /* Local Declarations */ int modr; /* logical */ double p, t, gtp, ytp, sum, tst, dmax, dmin, gtp1, told, dnorm; int i, j, k, ii, kadd, ksub, nn, nns; static int msgcg; if ( ipr >= 5 ) fprintf ( ioout, "DIREC ENTERED\n" ); update = 0; dfail = 1; drop = ( drop == 1 || nsuper == 0 ) ? 1 : 0; varmet = ( nsuper <= maxrm ) ? 1 : 0; conjgr = ( varmet == 0 ) ? 1 : 0; if ( jstfes == 1 ) restrt = 1; if ( drop == 1 ) goto w500; if ( conjgr == 1 ) goto w110; /* COMPLEMENTARY DFP VARIABLE METRIC ALGORITHM */ if ( nsear == 0 ) goto w60; if ( sbchng == 1 ) goto w50; if ( restrt == 1 ) goto w60; move = ( step > eps ) ? 1 : 0; /* msgcg CAUSES cg TO PRINT ON NEXT CALL */ if ( ipr3 >= 0 ) msgcg = 1; modr = 0; if ( move == 0 ) goto w70; ytp = 0.0; gtp = 0.0; gtp1 = 0.0; for ( i=1; i<=nsuper; i++ ) { p = d[i]; y[i] = gradf[i] - gradp[i]; ytp = y[i] * p + ytp; gtp = gradp[i] * p + gtp; gtp1 = gradf[i] * p + gtp1; } /* USE comdfp UPDATE ONLY IF gtp1/ gtp < 0.9(say). NOTE THAT gtp < 0 */ modr = ( gtp1 > 0.99 * gtp ) ? 1 : 0; if ( modr == 0 && ipr >= 4 ) { fprintf ( ioout, "MODR FALSE, SKIP UPDATE OF HESSIAN\n" ); fprintf ( ioout, " GTP1 = %e GTP = %e\n", gtp1, gtp ); } goto w70; /* HESSIAN UPDATE WHEN BASIC VARIABLE HITS BOUND */ w50 : if ( nsupp > maxrm && ipr3 >= 0 ) fprintf ( ioout, "SWITCH TO VARIABLE METRIC\n" ); resetr(); goto w80; /* RESET HESSIAN */ w60 : resetr(); goto w80; /* HESSIAN UPDATE WHEN NO VARIABLE HITS BOUND */ w70 : if ( modr == 0 ) goto w80; kadd = 4; ksub = 4; comdfp( ytp, gtp, &kadd, &ksub ); update = 1; /* COMPUTE SEARCH DIRECTION, D */ w80 : for ( j=1; j<=nsuper; j++ ) d[j] = -gradf[j]; rtrsol( r, d, nsuper); /* COMPUTE CONDITION NUMBER OF DIAGONAL OF R */ dmin = plinfy; dmax = 0.0; k = 0; for ( i=1; i<=nsuper; i++ ) { k += i; t = fabs( r[k] ); if ( dmin > t ) dmin = t; if ( dmax < t ) dmax = t; } cond = plinfy; if ( dmin < eps ) goto w120; cond = ( dmax / dmin ) * ( dmax / dmin ); goto w140; /* CONJUGATE GRADIENT METHOD */ w110 : if ( uncon == 0 || sbchng == 1 ) restrt = 1; cg(&msgcg); /* CHECK IF DIRECTION IS DOWNHILL */ w120 : sum = 0.0; for ( i=1; i<=nsuper; i++ ) sum = sum + d[i]*gradf[i]; if ( sum < -eps ) goto w145; /* BAD DIRECTION. RESET */ if ( restrt == 1 ) goto w235; if ( ipr3 >= 2 ) fprintf ( ioout, "DIRECTION NOT DOWNHILL. RESET.\n" ); restrt = 1; if ( varmet == 1 ) goto w60; goto w110; w140 : if ( ipr3 < 5 ) goto w145; k = nsuper * ( nsuper + 1 ) / 2; for ( i=1; i<=k; i++ ) fprintf ( ioout, "R[%d] = %e\n", i, r[i] ); /* THIS CODE DECIDES IF ANY VARIABLES AT BOUNDS ARE TO BE */ /* RELEASED FROM THEM */ w145 : sum = 0.0; k = 0; for ( i=1; i<=nsuper; i++ ) { dnorm = gradf[i]; /*08/1991 - 11/1991*/ /* Changed line below FROM ( fabs(dnorm) < epstop ) TO */ /* ( fabs(dnorm) < epnewt ) */ /*08/1991 - 11/1991*/ if ( fabs(dnorm) < epnewt ) goto w150; k++; sum = sum + dnorm * dnorm; w150 : ; } if ( k == 0 ) goto w400; told = sqrt( sum ) / k; goto w501; w400 : /*08/1991 - 11/1991*/ /* commented out two lines below */ /*08/1991 - 11/1991*/ /* told = epstop; */ /* goto w501; */ w500 : /* RELEASE ALL POSSIBLE NONBASICS */ told = 0.0; w501 : nn = 10; nns = nsear; /*08/1991 - 11/1991*/ /* Changed line below by removing " && ( nns % nn ) != 0 " from if */ /*08/1991 - 11/1991*/ if ( told > epnewt ) goto w540; j = nsuper + 1; if ( j > n ) goto w540; for ( ii=j; ii<=n; ii++ ) { i = inbv[ii]; /* SKIP FIXED VARIABLES AND EQUALITY SLACKS */ if ( ub[i] == alb[i] ) goto w530; if ( i <= n ) goto w505; /* REGULAR VARIABLES AND INEQUALITY SLACKS */ w505 : tst = gradf[ii]; if ( iub[ii] == 1 ) goto w510; /* VARIABLE AT LOWER BOUND */ if ( tst >= -told ) goto w530; if ( i > n && tst >= -100 * told ) goto w530; goto w520; /* VARIABLE AT UPPER BOUND */ w510 : if ( tst <= told ) goto w530; if ( i > n && tst <= 100 * told ) goto w530; /* MAKE THIS VARIABLE SUPERBASIC */ w520 : nsuper++; inbv[ii] = inbv[nsuper]; inbv[nsuper] = i; gradf[ii] = gradf[nsuper]; gradf[nsuper] = tst; iub[ii] = iub[nsuper]; iub[nsuper] = 0; if ( varmet == 1 ) addcol(); d[nsuper] = -gradf[nsuper]; dfail = 0; w530 : ; } w540 : if ( drop == 0 ) goto w220; if ( dfail == 1 && restrt == 1 ) goto w237; if ( dfail == 1 ) fprintf ( ioout, "COULD NOT DROP ANY CONSTRAINT. TRY -VE GRADIENT DIRECTION.\n" ); /* UPDATE DIRECTION VECTOR */ for ( j=1; j<=nsuper; j++ ) d[j] = -gradf[j]; restrt = 1; if ( varmet == 1 ) resetr(); w215 : drop = 0; w220 : for ( i=1; i<=n; i++ ) gradp[i] = gradf[i]; nsupp = nsuper; dfail = 0; if ( ipr >= 5 ) fprintf ( ioout, "DIREC COMPLETED.\n" ); return; /* NEGATIVE GRADIENT DIRECTION NOT DOWNHILL */ w235 : for ( i=1; i<=nsuper; i++ ) if ( d[i] != 0.0 ) goto w239; /* DIRECTION VECTOR ZERO. TRY DROPPING A CONSTRAINT. */ drop = 1; if ( ipr >= 1 ) fprintf ( ioout, "DIRECTION VECTOR ZERO. TRY DROPPING A CONSTRAINT.\n" ); goto w500; w237 : fprintf ( ioout, "DIRECTION VECTOR ZERO AND NO CONSTRAINT COULD BE DROPPED\n"); fprintf ( ioout, "KUHN-TUCKER CONDITION IMPLIED.\n" ); return; w239 : fprintf ( ioout, "NEGATIVE GRADIENT DIRECTION NOT DOWNHILL.\n" ); fprintf ( ioout, "CHECK DERIVATIVES AND/OR TOLERANCES.\n" ); return; /* end of direc() */ }