void network::SGD(train_couple* mini_batch, int batch_size, double eta,int n) { matrix* X=extract_X(mini_batch,batch_size); matrix* Y=extract_Y(mini_batch,batch_size); matrix* Gw_sum=prepare_Gw(); matrix* Gb_sum=prepare_Gb(); for (int i=0; i<batch_size; i++){ matrix* Gw=prepare_Gw(); matrix* Gb=prepare_Gb(); back_propagation(X[i],Y[i],Gw,Gb); for (int l=0; l<layers_count; l++){ Gw_sum[l]=Gw_sum[l]+Gw[l]; Gb_sum[l]=Gb_sum[l]+Gb[l]; } delete [] Gw; delete [] Gb; } /////Averaging for (int l=0; l<layers_count; l++){ Gw_sum[l]=Gw_sum[l]*(n/batch_size); Gb_sum[l]=Gb_sum[l]*(n/batch_size); } for (int l=0; l<layers_count; l++){ w[l]=w[l]-eta*Gw_sum[l] ; b[l]=b[l]-eta*Gb_sum[l] ; } delete [] X; delete [] Y; delete [] Gw_sum; delete [] Gb_sum; }
// betatp => T x p ?? // Xtp => N x p // XB => N x 1 void comb_XB_tp(int *n, int *r, int *T, int *p, double *Xtp, double *betatp, int *constant, double *XB) { int t, l, n1, r1, T1, p1; n1 =*n; r1 =*r; T1 =*T; p1 =*p; double *X1, *beta, *XB1; X1 = (double *) malloc((size_t)((n1*p1)*sizeof(double))); beta = (double *) malloc((size_t)((p1)*sizeof(double))); XB1 = (double *) malloc((size_t)((n1)*sizeof(double))); for(l=0; l<r1; l++){ for(t=0; t<T1; t++){ extract_X(t, l, n, r, T, p, Xtp, X1); // nrT x p into n x p extract_beta_t(t, T, p, betatp, beta); // p x T into p x 1 MProd(beta, constant, p, X1, n, XB1); // n x 1 put_together1(l, t, n, r, T, XB, XB1); // int i; // for(i=0; i< n1*p1; i++){ // Rprintf(" X1: %4.4f, \n", X1[i]); // } // for(i=0; i< p1; i++){ // Rprintf(" beta: %4.4f, \n", beta[i]); // } // for(i=0; i< n1; i++){ // Rprintf(" XB: %4.4f, \n", XB1[i]); // } } } free(X1); free(beta); free(XB1); return; }
// Posterior distribution for "theta" void beta_gp(int *n, int *r, int *T, int *rT, int *p, double *prior_mu, double *prior_sig, double *Qeta, double *X, double *o, int *constant, double *betap) { int t, l, i, n1, p1, r1, T1, col; n1 =*n; p1 =*p; r1 =*r; T1 =*T; col =*constant; double *del, *chi, *ot1, *X1, *tX1, *out, *tX1QX1, *tX1Qo, *det, *mu, *I; del = (double *) malloc((size_t)((p1*p1)*sizeof(double))); chi = (double *) malloc((size_t)((p1*col)*sizeof(double))); ot1 = (double *) malloc((size_t)((n1*col)*sizeof(double))); X1 = (double *) malloc((size_t)((n1*p1)*sizeof(double))); tX1 = (double *) malloc((size_t)((n1*p1)*sizeof(double))); out = (double *) malloc((size_t)((n1*p1)*sizeof(double))); tX1QX1 = (double *) malloc((size_t)((p1*p1)*sizeof(double))); tX1Qo = (double *) malloc((size_t)((p1*col)*sizeof(double))); det = (double *) malloc((size_t)((col)*sizeof(double))); mu = (double *) malloc((size_t)((p1*col)*sizeof(double))); I = (double *) malloc((size_t)((p1*p1)*sizeof(double))); for(i=0; i<p1*p1; i++){ del[i] = 0.0; } for(i=0; i<p1; i++){ chi[i] = 0.0; } for(l=0; l<r1; l++){ for(t=0; t<T1; t++){ extract_X(t, l, n, r, T, p, X, X1); // n x p MTranspose(X1, p, n, tX1); // p x n MProd(X1, p, n, Qeta, n, out); // n x p MProd(out, p, n, tX1, p, tX1QX1); // pxp MAdd(del, p, p, tX1QX1, del); // pxp extract_alt2(l, t, n, rT, T, o, ot1); // n x 1 MProd(ot1, constant, n, Qeta, n, out); // n x 1 MProd(out, constant, n, tX1, p, tX1Qo); // p x 1 MAdd(chi, p, constant, tX1Qo, chi); // p x 1 } } IdentityM(p, I); for(i=0; i<p1*p1; i++){ del[i] = del[i] + I[i]*(1.0/prior_sig[0]); } free(I); for(i=0; i<p1; i++){ chi[i] = chi[i] + prior_mu[0]/prior_sig[0]; } MInv(del, del, p, det); MProd(chi, constant, p, del, p, mu); // p x 1 mvrnormal(constant, mu, del, p, betap); free(del); free(chi); free(ot1); free(X1); free(tX1); free(out); free(tX1QX1); free(tX1Qo); free(det); free(mu); return; }