cube barrel_distort_rgb(const cube &F, double offset_x) { cube G(F.n_rows, F.n_cols, F.n_slices); G.slice(0) = barrel_distort(F.slice(0), offset_x); G.slice(1) = barrel_distort(F.slice(1), offset_x); G.slice(2) = barrel_distort(F.slice(2), offset_x); return G; }
SEXP move_B(const mat& y, cube& B, const mat& kappa_star, const field<mat>& C, //const field<sp_mat>& C, mat& gamma, const mat& D, const ucolvec& s, double tau_e) { BEGIN_RCPP // N x T matrix of standardized counts, y // B = (B_1,...,B_K), where B_k is N x T matrix for iGMRF term k // N x T matrix, gamma = sum_{k=1^K}(B_k) // K x T, D, where row k contains T x 1 diagonal elements of Q_k // sample cluster assignments, s(1), ..., s(N) // Q = (Q_1,...,Q_K), where Q_k is a T x T de-scaled iGMRF precision matrix // C = (C_1,...,C_K), where C_k = D_k^-1 * Omega_k, // where Omega_k is the T x T adjacency matrix for iGMRF term, k // D is a K x T matrix where row k contains T diagonal elements of Q_k // K x M matrix, kappa_star records locations for each iGMRF term int K = B.n_slices; int N = y.n_rows; int T = D.n_cols; colvec bbar_ki(T); bbar_ki.zeros(); rowvec gammatilde_ki(T); gammatilde_ki.zeros(); rowvec ytilde_ki(T); ytilde_ki.zeros(); rowvec d_k(T); vec zro(T); zro.zeros(); double e_ij, phi_ij, bkij, h_ij; int k = 0, i = 0, j = 0; /* loop variables */ for( k = 0; k < K; k++ ) /* over iGMRF terms */ { d_k = D.row(k); for( i = 0; i < N; i++ ) /* over units */ { /* take out T x 1, b_ki, to be sampled from gamma */ //gammatilde_ki = gamma.row(i) - B.slice(k).row(i); /* 1 x T */ //ytilde_ki = y.row(i) - gammatilde_ki; gammatilde_ki = gamma.row(i); /* 1 x T */ // mean of univariate iGMRF, b_kij = 1/d_kj * (omega_kj(-j) * b_ki(-j)) bbar_ki = C(k,0) * B.slice(k).row(i).t(); /* T x 1 */ for( j = 0; j < T; j++ ) /* over time points */ { gammatilde_ki(j) -= B.slice(k)(i,j); ytilde_ki(j) = y(i,j) - gammatilde_ki(j); // mean of univariate iGMRF, b_kij = 1/d_kj * (omega_kj(-j) * b_ki(-j)) B.slice(k)(i,j) = 0; //bbar_ki(j) = dot( C(k,0).row(j), B.slice(k).row(i) ); e_ij = tau_e*ytilde_ki(j) + d_k(j)*kappa_star(k,s(i)) * bbar_ki(j); phi_ij = tau_e + d_k(j)*kappa_star(k,s(i)); h_ij = (e_ij / phi_ij); bkij = rnorm( 1, (h_ij), sqrt(1/phi_ij) )[0]; B.slice(k)(i,j) = bkij; /* put back new sampled values for b_kij */ gammatilde_ki(j) += B.slice(k)(i,j); } /* end loop j over time points */ /* put back new sampled values for b_ki */ //gamma.row(i) = gammatilde_ki + B.slice(k).row(i); gamma.row(i) = gammatilde_ki; } /* end loop i over units */ } /* end loop k over iGMRF terms */ END_RCPP } /* end function ustep to sample B_1,...,B_K */
/********************************************************************** * MSE Class ***********************************************************************/ double MSE::cost(cube pred, cube y){ dbg_assert(y.n_cols == 1 && y.n_slices == 1); pred.reshape(pred.n_elem,1,1); y.reshape(y.n_elem,1,1); double retn = 0.0f; for(int i=0; i < y.n_elem; i++) { retn += pow(pred(i,0,0) - y(i,0,0),2); } retn /= y.n_elem; retn *= 0.5f; //return(retn); return(0.5 * mean(mean(square(pred.slice(0) - y.slice(0))))); }
cube Utils::conv3(cube body, cube kernel) { if (body.max() <= 0) { return zeros(body.n_rows, body.n_cols, body.n_slices); } cube Q(body); for (int i = 1; i < body.n_slices - 1; i++) { Q.slice(i) = conv2(body.slice(i), kernel.slice(1), "same") + conv2(body.slice(i - 1), kernel.slice(0), "same") + conv2(body.slice(i + 1), kernel.slice(2), "same"); } return Q; }
cube noncont_slices(const cube & X, const uvec & index){ cube Xsub(X.n_rows, X.n_cols, index.n_elem); for(unsigned int i=0; i<index.n_elem; i++){ Xsub.slice(i) = X.slice(index[i]); } return Xsub; }
void lmMean(const cube& wX, const rowvec& vec_beta, mat& mean){ int n = wX.n_slices, P=wX.n_cols; mean = zeros(n,P); for(int i=0;i<n;i++){ mean.row(i) = vec_beta* wX.slice(i); } }
cube conv2(const cube &F, const mat &H) { cube G(F.n_rows, F.n_cols, F.n_slices); for (uword i = 0; i < F.n_slices; i++) { G.slice(i) = conv2(F.slice(i), H); } return G; }
cube imresize2(const cube &C, uword m, uword n) { cube F(m, n, C.n_slices); for (uword k = 0; k < C.n_slices; k++) { F.slice(k) = imresize2(C.slice(k), m, n); } return F; }
//! compute (trial)point-to-(model)point Mahalanobis distance //! @param[in] index index of the points (in trial and model) to be compared //! @param[in] &trial reference to the trial //! @param[in] &model reference to the model //! @param[in] &variance reference to the model variance //! @return Mahalanobis distance between trial-point and model-point float Classifier::mahalanobisDist(int index,mat &trial,mat &model,cube &variance) { mat difference = trial.col(index) - model.col(index); mat distance = (difference.t() * (variance.slice(index)).i()) * difference; return distance(0,0); }
cube noncont_slices(const cube & X, const uvec & index, int r_start, int c_start, int r_stop, int c_stop){ cube Xsub(r_stop-r_start+1, c_stop-c_start+1, index.n_elem); for(unsigned int i=0; i<index.n_elem; i++){ Xsub.slice(i) = X.slice(index[i]).submat(r_start, c_start, r_stop, c_stop); } return Xsub; }
void white(const mat& y, const mat& y_center, const uvec& sti, const cube& matphi1, const cube& matphi2, mat& wy, mat& wy_center, const uvec& WTIME){ int L=matphi1.n_slices, wT = WTIME.n_elem, t=0; mat multiply; if(L==0){wy=y; wy_center = y_center;} else{ wy = y.rows(WTIME); wy_center = y_center.rows(WTIME); for(int wt=0; wt<wT ; wt++){ t = WTIME[wt]; for(int l=0; l<L; l++){ multiply = (sti[t-l-1])?(matphi1.slice(l)):(matphi2.slice(l)); wy.row(wt) -= y.row(t-l-1)*multiply; wy_center.row(wt) -= y_center.row(t-l-1)*multiply; } } } }
cube ConvLayer::backward(cube delta) { // NOTE: delta may come from a linear layer delta.reshape(_a.n_rows, _a.n_cols, _a.n_slices); if(!_prev) { dbg_print("null pointer to _prev in ConvLayer::backward"); return zeros<cube>(0,0,0); } // Compute Weight Updates cube input = addPadding(_prev->getActivationCube(), _ksize); for(int i = 0; i < _units; i++){ // dz_dw for(int j=0; j < input.n_slices; j++) { _dw(i).slice(j) = conv2d(input.slice(j), delta.slice(i)); } // dz_db _db(0,0,i) = accu(delta.slice(i)); } // Compute next delta int nr = _prev->getActivationCube().n_rows; int nc = _prev->getActivationCube().n_cols; int ns = _prev->getActivationCube().n_slices; delta = addPadding(delta, _ksize); cube next_delta = zeros<cube>(nr,nc,ns); for(int i=0; i < ns; i++){ for(int j=0; j < _units; j++) { next_delta.slice(i) += conv2d( delta.slice(j), fliplr(flipud(_w(j).slice(i))) ); } } if(_prev) { return(_prev->backward(next_delta)); } return(next_delta); }
void white(const mat& x1, const mat& x2, const uvec& sti, const cube& matphi1, const cube& matphi2, cube& wX, const uvec& WTIME){ int T = x1.n_rows, L = matphi1.n_slices, P = x1.n_cols, wT = WTIME.n_elem, t=0; mat multiply(P,P); cube X(3*P,P,T); for(int t=0;t<T;t++){ X.slice(t)=join_cols(join_cols(diagmat(x1.row(t)),diagmat(x2.row(t))),diagmat(ones(1,P))); } wX = noncont_slices(X, WTIME); if(L>0){ for(int wt=0; wt<wT ; wt++){ t = WTIME[wt]; for(int l=0; l<L; l++){ multiply = sti[t-l-1]?(matphi1.slice(l)):(matphi2.slice(l)); wX.slice(wt) -= X.slice(t-l-1)*multiply; } } } }
void lm2(const mat& wy, const cube& wX, const mat& Omega, rowvec& numer, mat& denom){ int P=wy.n_cols, n = wy.n_rows, k = wX.n_rows; denom.zeros(), numer.zeros(); mat twX(P,k); for(int t=0; t<n; t++){ twX = trans(wX.slice(t)); denom=denom+wX.slice(t)*Omega*twX; numer=numer+wy.row(t)*Omega*twX; } }
void white(const mat& x1, const mat& x2, const uvec& sti, const cube& matphi1, const cube& matphi2, cube& wX, const uvec& End, const uvec & wEnd){ int T = x1.n_rows, L = matphi1.n_slices, P = x1.n_cols, m = End.n_elem-1; mat multiply(P,P); cube X(3*P,P,T); for(int t=0;t<T;t++){ X.slice(t)=join_cols(join_cols(diagmat(x1.row(t)),diagmat(x2.row(t))),diagmat(ones(1,P))); } for(int run=0;run<m;run++){ wX.slices(wEnd[run],wEnd[run+1]-1) = X.slices(End[run]+L,End[run+1]-1); if(L>0){ for(unsigned int t=End[run]+L;t<End[run+1];t++){ for(int l=0;l<L;l++){ multiply = sti[t-l-1]?(matphi1.slice(l)):(matphi2.slice(l)); wX.slice(t-L*(run+1)) -= X.slice(t-l-1)*multiply; } } } } }
void white(const mat& y, const mat& y_center, const uvec& sti, const cube& matphi1, const cube& matphi2, mat& wwy, mat& wwy_center, const uvec& End, const uvec& wEnd){ int L=matphi1.n_slices, m = End.n_elem-1; mat multiply, wy, wy_center; if(L==0){wwy=y; wwy_center = y_center;} else{ for(int run=0;run< m ;run++){ wy = y.rows(End[run]+L,End[run+1]-1); wy_center = y_center.rows(End[run]+L,End[run+1]-1); for(unsigned int t=End[run]+L;t<End[run+1];t++){ for(int l=0;l<L;l++){ multiply = (sti[t-l-1])?(matphi1.slice(l)):(matphi2.slice(l)); wy.row(t-End[run]-L) -= y.row(t-l-1)*multiply; wy_center.row(t-End[run]-L) -= y_center.row(t-l-1)*multiply; } } wwy.rows(wEnd[run],wEnd[run+1]-1) = wy; wwy_center.rows(wEnd[run],wEnd[run+1]-1) = wy_center; } } }
cube maxPoolLayer::forward (cube x) { for(int i=0; i < x.n_slices; i++) _a.slice(i) = maxDownSample(x.slice(i), _sample_size, _mask.slice(i)); if(_next) return(_next->forward(_a)); return _a; }
void white(const mat& Hc, const rowvec vec_beta, const uvec& sti, const cube& matphi1, const cube& matphi2, cube& wHX, const uvec& WTIME){ int T = Hc.n_rows, J = Hc.n_cols/2, L = matphi1.n_slices, P = matphi1.n_rows, wT = WTIME.n_elem, t=0; mat multiply(P,P); cube X = zeros(J*P,P,T); for(int t=0;t<T;t++){ for(int p=0; p<P;p++){ X.slice(t).col(p).rows(J*p, J*(p+1)-1) = trans(Hc.submat(t, 0, t, J-1))*vec_beta[p] + trans(Hc.submat(t, J, t, 2*J-1))*vec_beta[P+p]; } } wHX = noncont_slices(X, WTIME); if(L>0){ for(int wt=0; wt < wT; wt++){ t = WTIME[wt]; for(int l=0; l<L; l++){ multiply = sti[t-l-1]?(matphi1.slice(l)):(matphi2.slice(l)); wHX.slice(wt) -= X.slice(t-l-1)*multiply; } } } }
SEXP move_kappastar_alt(mat& kappa_star, const cube& B, const cube& Q, const ucolvec& s, uvec& o, int T, int a, int b, const vec& ipr) { BEGIN_RCPP // K x M matrix, kappa_star, records location values // N x T matrices, B_1,...,B_K contains the de-noised functions // on posterior for kappa_star. int K = kappa_star.n_rows; /* number of iGMRF terms */ int M = kappa_star.n_cols; /* number of clusters */ int k = 0, m = 0, i = 0, count_m; double num_m; uvec pos_m; double a1_mk; /* posterior shape parameter */ double b1_mk; /* posterior rate parameter */ colvec b_ki(T); for(m = 0; m < M; m++) { pos_m = find( s == m ); /* s is vector length N */ count_m = pos_m.n_elem; num_m = sum( 1/ipr(pos_m) ); /* sample posterior for kappa_star(k,) for each iGMRF term, k = 1,...,K */ for( k = 0; k < K; k++ ) { b1_mk = 0; for( i = 0; i < count_m; i++ ) { b_ki = B.slice(k).row(pos_m(i)).t(); /* T x 1 */ b1_mk += 0.5*( as_scalar(b_ki.t()*symmatl(Q.slice(k))*b_ki) / ipr(pos_m(i)) ); } /* end loop i over num_m weighted units in cluster m */ b1_mk += b; a1_mk = 0.5*num_m*(double(T)-double(o(k))) + a; kappa_star(k,m) = rgamma(1, a1_mk, (1/b1_mk))[0]; } /* end loop over K iGMRF terms */ } /* end loop m over clusters */ /* add a bumper */ END_RCPP } /* end function move_kappastar() to sample cluster locations */
cube maxPoolLayer::backward (cube delta) { cube next_delta(_mask); for(int i=0; i < delta.n_slices; i++) next_delta.slice(i) = maxUpSample(delta.slice(i), _sample_size, _mask.slice(i)); if(_prev) return(_prev->backward(next_delta)); return next_delta; }
void white(const mat& Hc, const rowvec vec_beta, const uvec& sti, const cube& matphi1, const cube& matphi2, cube& wHX, const uvec& End, const uvec & wEnd){ int T = Hc.n_rows, J = Hc.n_cols/2, L = matphi1.n_slices, P = matphi1.n_rows, m = End.n_elem-1; mat multiply(P,P); cube X = zeros(J*P,P,T); for(int t=0;t<T;t++){ for(int p=0; p<P;p++){ X.slice(t).col(p).rows(J*p, J*(p+1)-1) = trans(Hc.submat(t, 0, t, J-1))*vec_beta[p] + trans(Hc.submat(t, J, t, 2*J-1))*vec_beta[P+p]; } } for(int run=0;run<m;run++){ wHX.slices(wEnd[run],wEnd[run+1]-1) = X.slices(End[run]+L,End[run+1]-1); if(L>0){ for(unsigned int t=End[run]+L;t<End[run+1];t++){ for(int l=0;l<L;l++){ multiply = sti[t-l-1]?(matphi1.slice(l)):(matphi2.slice(l)); wHX.slice(t-L*(run+1)) -= X.slice(t-l-1)*multiply; } } } } }
void phi_design(mat& U, const uvec& sti, const int L, umat& vxicat, cube& wZ, const uvec& WTIME){ int P=U.n_cols, wT = WTIME.n_elem, t; int Psq=P*P; wZ.zeros(); for(int wt=0; wt<wT; wt++){ t = WTIME[wt]; for(int l=1;l<=L;l++){ if(sti[t-l]){ for(int p=0;p<P;p++){ uvec cond= (vxicat.col(p) >= l); wZ.slice(wt).col(p).subvec((l-1)*Psq+ p*P,(l-1)*Psq+(p+1)*P-1) = (cond)%trans(U.row(t-l)); } wZ.slice(wt).rows((L+l-1)*Psq,(L+l)*Psq-1).zeros(); }else{ wZ.slice(wt).rows((l-1)*Psq,l*Psq-1).zeros(); for(int p=0;p<P;p++){ uvec cond= (vxicat.col(p) >= l); wZ.slice(wt).col(p).subvec((L+l-1)*Psq+p*P,(L+l-1)*Psq+(p+1)*P-1)=cond%trans(U.row(t-l)); } } } } }
// update vector of cluster membership indicators, s(i),....,s(N) SEXP clusterstep(const cube& B, mat& kappa_star, mat& B1, const uvec& o, const field<mat>& C, const mat& D, ucolvec& s, //const field<sp_mat>& C, ucolvec& num, unsigned int& M, double& conc, int a, int b, const vec& ipr, colvec& Num) { BEGIN_RCPP // sample cluster assignments, s(1), ..., s(N) // B = (B_1,...,B_K), where B_k is N x T matrix for iGMRF term k // Q = (Q_1,...,Q_K), where Q_k is a T x T de-scaled iGMRF precision matrix // C = (C_1,...,C_K), where C_k = D_k^-1 * Omega_k, // where Omega_k is the T x T adjacency matrix for iGMRF term, k // D is a K x T matrix where row k contains T diagonal elements of Q_k // K x M matrix, kappa_star records locations for each iGMRF term // o = (o_1,...,o_k) is a vector where each entry denotes the order of term K. // e.g. RW(1) -> o = 2, RW(2) -> o = 3, seas(3) -> o = 3 int N = B.slice(0).n_rows; int T = B.slice(0).n_cols; int K = C.n_rows; double sweights = 0; // zro is the zeros.T vector colvec zro(T); zro.zeros(); uvec o_adjust = o; //o_adjust.zeros(); // capture quadratic product for rate kernel of posterior gamma // posterior for kappa_star(k,i). // save B1 to latter (in another function) compute posterior for kappa_star // mat B1(K,N); double a1k; /* posterior shape for kappa_star(k,i) under 1 obs */ B1.zeros(); int i, j, k; unsigned int l; /* mat D_k(T,T), Omega_k(T,T); cube Q(T,T,K); for(k = 0; k < k; k++) { D_k.zeros(); D_k.diag() = D.row(k); Omega_k = D_k * C(k,0); Q.slice(k) = D_k - Omega_k; } // end loop K over iGMRF terms */ for(i = 0; i < N; i++) { // check if _i assigned to singleton cluster // if so, remove the cluster associated to _i // and decrement the cluster labels for m > s(i) if(num(s(i)) == 1) /* remove singleton cluster */ { kappa_star.shed_col(s(i)); num.shed_row(s(i)); Num.shed_row(s(i)); M -= 1; /* decrement cluster count */ //decrement cluster tracking values by 1 for tossed cluster s( find(s > s(i)) ) -= 1; } /* end cluster accounting adjustment for singleton cluster */ else /* cluster contains more than one unit */ { num(s(i)) -= 1; /* scale up num to population totals, Num, based on H-T inverse probability estimator */ Num(s(i)) -= 1/ipr(i); } /* decrement non-singleton cluster count by one */ // construct normalization constant, q0i, to sample s(i) // build loqq0 and exponentiate colvec bki(T), bbar_ki(T); /* T x 1, D_k^-1*Omega_k*b_ki = C(k,0)*b_ki */ mat bbar_i(K,T); bbar_i.zeros(); double logd_dk = 0; /* set of T 0 mean gaussian densities for term k */ double logq0ki = 0, logq0i = 0, q0i = 0; // accumulate weight, q0i, for s(i) over K iGMRF terms for( k = 0; k < K; k++) { logq0ki = 0; /* reset k-indexed log-like on each k */ //a1k = 0.5*(double(T)) + a; a1k = 0.5*(double(T)-double(o_adjust(k))) + a; bki = B.slice(k).row(i).t(); bbar_ki = C(k,0) * bki; /* T x 1 */ bbar_i.row(k) = bbar_ki.t(); B1(k,i) = 0.5*dot( D.row(k), pow((bki-bbar_ki),2) ); /* no b */ logd_dk = 0; /* set of T gaussian densities for term k */ /* dmvn(zro|m,Q.slice(k),true) */ for( j = 0; j < T; j++ ) { logd_dk += R::dnorm(0.0,0.0,double(1/sqrt(D(k,j))),true); } logq0ki = logd_dk + lgamma(a1k) + a*log(b) - lgamma(a) - a1k*trunc_log(B1(k,i)+b); logq0i += logq0ki; } /* end loop k over iGMRF terms */ q0i = trunc_exp(logq0i); // construct posterior sampling weights to sample s(i) colvec weights(M+1); weights.zeros(); /* evaluate likelihood under kappa_star(k,i) */ double lweights_l; for(l = 0; l < M; l++) /* cycle through all clusters for s(i) */ { s(i) = l; /* will compute likelihoods for every cluster */ lweights_l = 0; /* hold log densities for K computations */ for(k = 0; k < K; k++) { bki = B.slice(k).row(i).t(); for( j = 0; j < T; j++ ) { /* effectively making assignment, s(i) = l */ lweights_l += trunc_log(R::dnorm(bki(j),bbar_i(k,j), double(1/sqrt(kappa_star(k,l)*D(k,j))),false)); } /* end loop j over time index */ } /* end loop k over iGMRF terms */ //if(lweights_l < -300){lweights_l = -300;} weights(l) = trunc_exp(lweights_l); weights(l) *= double(Num(s(i)))/(double(N) - 1/ipr(i) + conc); } /* end loop l over existing or populated clusters */ /* M+1 or new component sampled from F_{0} */ weights(M) = conc/(double(N) - 1/ipr(i) + conc)*q0i; // normalize weights sweights = sum(weights); if(sweights == 0) { weights.ones(); weights *= 1/(double(M)+1); } else { weights /= sweights; } // conduct discrete posterior draw for s(j) unsigned long MplusOne = M + 1; s(i) = rdrawone(weights, MplusOne); // if new cluster chosen, generate new location if(s(i) == M) { /* sample posterior of ksi_star[k,m] for 1 (vs. n_m) observation */ double a_star_k; /* shape for 1 obs */ double bstar_ki; kappa_star.insert_cols(M,1); /* add K vector new location to kappa_star */ num.insert_rows(M,1); Num.insert_rows(M,1); for(k = 0; k < K; k++) { a_star_k = 0.5*(double(T) - double(o_adjust(k))) + a; /* shape for 1 obs */ bstar_ki = B1(k,i) + b; /* B1(k,i) is a scalar quadratic product */ /* bki = B.slice(k).row(i).t(); bstar_ki = 0.5*( as_scalar(bki.t()*symmatl(Q.slice(k))*bki) ) + b; */ kappa_star(k,M) = rgamma(1, a_star_k, (1/bstar_ki))[0]; } num(M) = 1; Num(M) = 1/ipr(i); M = MplusOne; } else { num(s(i)) += 1; Num(s(i)) += 1/ipr(i); } } /* end loop i for cluster assignment to unit i = 1,...,N */ END_RCPP } /* end function bstep for cluster assignments, s, and computing zb */
void draw_circle(cube &I, const vec &v, vec pt, double radius) { for (uword k = 0; k < v.n_elem; k++) { draw_circle(I.slice(k), v(k), pt, radius); } }
cube LinearLayer::backward(cube delta){ // we need to calculate two types // of gradients here: // 1. d(z) / d(theta) // 2. d(z) / d(a_prev_layer) // Phase 1: Consume // compute d(z) / d(theta) if(!prev){ cout << __LINE__ << ": _prev is null, this should not happen." << endl; return zeros<cube>(0,0,0); } dbg_assert(delta.n_cols == 1); dbg_assert(delta.n_slices == 1); dbg_print("backward at linear layer(" << _units << ")"); mat delta_mat = delta.slice(0); cube prev_a = prev->getActivationCube(); //mat dz_dw = reshape(prev_a, prev_a.n_elem, 1, 1); mat dz_dw = vectorise(prev_a); //TODO: this still looks jenky // Add bias term to dz_dw //mat bias = ones<mat>(dz_dw.n_cols, 1); //dz_dw.insert_rows(0, bias); // Add bias term // delta_weight = delta * d(z)/d(w) // dz_dw is [nsamples, prev_nunits] // delta is [nsamples, nunits] , delta is [nunits,1] _dw.slice(0) = (momentum * _dw.slice(0)) + delta_mat * trans(dz_dw); _db.slice(0) = (momentum * _db.slice(0)) + delta_mat; // Phase 2: Produce // compute grad = d(z) / d(a_prev_layer) // NOTE: gradi here equals to just theta, however // we should remove the bias column // mat grad = _w.tail_cols(_w.n_cols - 1); // compute delta_new = sum(delta * grad) over output units // _w [_units x prev->_units] // delta [_units x 1 ] // new delta [prev->_units x 1] mat new_delta = trans(_w.slice(0)) * delta_mat; // TODO: reduce the amount of mat copies. // as it stands, 5+ copies are performed...not good. cube next_delta = zeros<cube>(new_delta.n_rows, 1, 1); next_delta.slice(0) = new_delta; if(_prev) return(_prev->backward(next_delta)); return(next_delta); }
void HMM::_fwdback(mat init_state_distrib, mat _transmat, mat obslik, mat &alpha, mat &beta, mat& gamma, double &loglik, mat &xi_summed, cube &gamma2, cube &obslik2, bool fwd_only, bool compute_gamma2) { /* * Compute the posterior probs. in an HMM using the forwards backwards algo. * * Notation: * Y(t) = observation, Q(t) = hidden state, M(t) = mixture variable (for MOG outputs) * A(t) = discrete input (action) (for POMDP models) * * INPUT: * init_state_distrib(i) = Pr(Q(1) = i) * transmat(i,j) = Pr(Q(t) = j | Q(t-1)=i) * or transmat{a}(i,j) = Pr(Q(t) = j | Q(t-1)=i, A(t-1)=a) if there are discrete inputs * obslik(i,t) = Pr(Y(t)| Q(t)=i) * */ bool scaled = true; bool maximize = false; bool compute_xi = true; int Q = obslik.n_rows; int T = obslik.n_cols; mat mixmat; mat act; // qui act è tutti zero, altrimenti potrebbe essere un input, TODO aggiungere &act negli input mat scale; if (obslik2.is_empty()) compute_gamma2 = false; act = zeros(1,T); // TODO this could be a colvec scale = ones(1,T); field<mat> transmat(1,1); transmat(0,0) = _transmat; // scale(t) = Pr(O(t) | O(1:t-1)) = gamma21/c(t) as defined by Rabiner (1989). // Hence prod_t scale(t) = Pr(O(1)) Pr(O(2)|O(1)) Pr(O(3) | O(1:2)) ... = Pr(O(1), ... ,O(T)) // or log P = sum_t log scale(t). // Rabiner suggests multiplying beta(t) by scale(t), but we can instead // normalise beta(t) - the constants will cancel when we compute gamma. if (compute_xi) xi_summed = zeros(Q,Q); //else // xi_summed = []; //%%%%%%%%% Forwards %%%%%%%%%% //cout << "fwdback > Forwards" << endl; int t = 0; alpha.col(0) = vectorize(init_state_distrib) % obslik.col(t); if (scaled){ std::pair<mat,double> _tmp = normaliseC(alpha.col(t)); alpha.col(t) = _tmp.first; scale(t) = _tmp.second; } for(int t=1; t<T; t++) { mat trans; mat m; trans = transmat(act(t-1)); if (maximize){ //m = max_mult(trans.t(), alpha.col(t-1)); // TODO max_mult } else { m = trans.t() * alpha.col(t-1); } alpha.col(t) = vectorize(m) % obslik.col(t); if (scaled) { std::pair<mat,double> _tmp = normaliseC(alpha.col(t)); alpha.col(t) = _tmp.first; scale(t) = _tmp.second; } if (compute_xi && fwd_only) {// useful for online EM xi_summed = xi_summed + normalise((alpha.col(t-1) * obslik.col(t).t()) % trans); } } if (scaled) { uvec _s = find(scale); // se c'è almeno uno zero // portando a logaritmo c'è almeno un infinito // quindi somma tutto a infinito if ( _s.is_empty() ) { loglik = -std::numeric_limits<double>::max(); } else { loglik = sum(sum(log(scale))); // nested arma::sum because sum(mat X) return a rowvec } } else { loglik = log(sum(alpha.col(T))); } if (fwd_only) { gamma = alpha; return; } //%%%%%%%%% Backwards %%%%%%%%%% //cout << "fwdback > Backwards" << endl; int M; mat trans; mat denom; beta = zeros(Q,T); if (compute_gamma2) { M = mixmat.n_cols; gamma2 = zeros(Q,M,T); } else { //gamma2 = [] } beta.col(T-1) = ones(Q,1); gamma.col(T-1) = normalise(alpha.col(T-1) % beta.col(T-1)); t=T-1; if (compute_gamma2) { denom = obslik.col(t) + (obslik.col(t)==0); // replace 0s with 1s before dividing gamma2.slice(t) = obslik2.slice(t) % mixmat % repmat(gamma.col(t), 1, M) % repmat(denom, 1, M); } for (int t=T-2; t>=0; t--) { // T-2 because there are some calls to t+1 // and col(T) will generate the error Mat::col(): out of bounds // so we must assure the limit of col(T-1) mat b = beta.col(t+1) % obslik.col(t+1); trans = transmat(act(t)); if (maximize){ mat B = repmat(vectorize(b).t(), Q, 1); beta.col(t) = max(trans % B, 1); } else beta.col(t) = trans * b; if (scaled) beta.col(t) = normalise( beta.col(t) ); gamma.col(t) = normalise(alpha.col(t) % beta.col(t)); if (compute_xi){ xi_summed = xi_summed + normalise((trans % (alpha.col(t) * b.t()))); } if (compute_gamma2){ denom = obslik.col(t) + (obslik(t)==0); // replace 0s with 1s before dividing gamma2.slice(t) = obslik2.slice(t) % mixmat % repmat(gamma.col(t), 1, M) % repmat(denom, 1, M); } } }
void draw_rect(cube &I, const vec &v, vec topleft, vec btmright) { for (uword k = 0; k < v.n_elem; k++) { draw_rect(I.slice(k), v(k), topleft, btmright); } }
void draw_line(cube &I, const vec &v, vec pt1, vec pt2) { for (uword k = 0; k < v.n_elem; k++) { draw_line(I.slice(k), v(k), pt1, pt2); } }
vector<float> predict_list(const record_array & rcd_array) { // predicting stage unsigned int j = 0; unsigned int train_start = 0; unsigned int train_end = 0; unsigned int test_start = 0; unsigned int test_end = 0; unsigned int train_user = ptr_train_data->data[0].user; unsigned int test_user = ptr_test_data->data[0].user; vec Hu = zeros<vec>(F); vec Vum(K); ivec scores = linspace<ivec>(1, 5, 5); vector<float>results; results.resize(rcd_array.size); for (int i = 0; i < ptr_test_data->size; i++) { record r_test = ptr_test_data->data[i]; if ((test_user != r_test.user) || i == ptr_test_data->size -1) { // make prediction of test_user for movies in the test set test_end = (i == ptr_test_data->size-1) ? (i + 1) : i; int u_size = test_end - test_start; // find train_start and train_end // record r_train = ptr_train_data->data[j]; while (j < ptr_train_data->size) { record r_train = ptr_train_data->data[j]; if (r_train.user < test_user) { train_start = j + 1; } else if (r_train.user > test_user) { break; } j++; } train_end = j; if (ptr_train_data->data[j-1].user == test_user) { // positive phase to compute Hu Hu = BH; for (int f = 0; f < F; f++) { for (int u = train_start; u < train_end; u++) { record r_train = ptr_train_data->data[u]; unsigned int k = int(r_train.score) - 1; double w = W(k, f, r_train.movie); Hu(f) += w; } } Hu = 1.0 / (1 + exp(-Hu)); // negative phase to predict score for (int u = test_start; u < test_end; u++) { record r_test = ptr_test_data->data[u]; Vum = normalise( exp(BV.col(r_test.movie) + W.slice(r_test.movie) * Hu), 1); results[u] = dot(Vum, scores); } } else { // TODO: predict all movies to be the averaged movie rating double predict_score; for (int u = test_start; u < test_end; u++) { predict_score = 3.6; results[u] = predict_score; } } train_start = j; test_start = i; test_user = r_test.user; } } return results; }
void train(const record *data, unsigned int user_id, unsigned int size, int CD_K) { // initialization mat V0 = zeros<mat>(K, size); mat Vt = zeros<mat>(K, size); vec H0 = zeros<vec>(F); vec Ht = zeros<vec>(F); // set up V0 and Vt based on the input data. for (int i = 0; i < size; i++) { record r = data[i]; V0(int(r.score)-1, i) = 1; // score - 1 is the index Vt(int(r.score)-1, i) = 1; } /* /////////////////// set up H0 by V -> H ////////////////// H0(j) = sigma( BH(j) + sum_ik ( W(k, j, r.movie) * V0(k, i) )) */ H0 = BH; for (int i = 0; i < size; i++) { H0 += W.slice(data[i].movie).t() * V0.col(i); } H0 = 1.0 / (1 + exp(-H0)); /////////////////// Do the contrastive divergence /////////////////// for (int n = 0; n < CD_K; n++) { ////////////// positive phase: V -> H ///////// Ht = BH; for (int i = 0; i < size; i ++) { Ht += W.slice(data[i].movie).t() * Vt.col(i); } Ht = 1.0 / (1 + exp(-Ht)); // negative phase: H -> V for (int i = 0; i < size; i++) { record r = data[i]; Vt.col(i) = exp(BV.col(r.movie) + W.slice(r.movie) * Ht); } // Normalize Vt -> sum_k (Vt(k, i)) = 1 Vt = normalise(Vt, 1); } // update W for (int i = 0; i < size; i++) { W.slice(data[i].movie) += lrate * (V0.col(i) * H0.t() - Vt.col(i) * Ht.t()); } // update BH BH += lrate * (H0 - Ht); // update BV for (int i = 0; i < size; i++) { BV.col(data[i].movie) += lrate * (V0.col(i) - Vt.col(i)); } }