int notmain(void) { int i,j,k; Matrix_3x3 id; Matrix_3x3 invid; Vector vec(1,2,3); Vector res, res2; for(i=0;i<3;i++) for(j=0;j<3;j++) id.set(i,j,(float)((i*4)+j)); for(k=0;k<100;k++) { for(i=0;i<3;i++) //col for(j=0;j<3;j++) //lin { /* if(i<j) id.set(i,j,0.0); else*/ id.set(i,j, (float)( (((i*j*k)%7 + ((k+1)%3) + (i+j))+1) * (((k+i+j)%2)==0 ? -1 : 1) )); }; printf("\n\nTEST #%d\n",k); if(id.is_invertable()) { printf("Det==%g\n",id.det()); res=Vector(1.0,1.0,1.0); res=id.solve(res); printf("solve erg = %g,%g,%g\n",res[0],res[1],res[2]); res2=id*res; printf("solve test = %g,%g,%g\n",res2[0],res2[1],res2[2]); id.dump(" M"); invid=id.invert(); invid.dump("M^-1"); (id*invid).dump("mul"); }; }; id.dump("id"); printf("vec = "); dvec(vec); printf("\n"); res=id*vec; printf("res = "); dvec(res); printf("\n"); return 0; }
void resample_chain (int N, int W, int T, double alpha, double beta, int *w, int *d, int *z, int **Nwt, int **Ndt, int *Nt) // { int i, t; double totprob, U, cumprob; double *prob = dvec(T); double wbeta = W*beta; for (i = 0; i < N; i++) { t = z[i]; Ndt[d[i]][t]--; totprob = 0; for (t = 0; t < T; t++) { prob[t] = (Nwt[w[i]][t] + beta)/(Nt[t] + wbeta)*(Ndt[d[i]][t] + alpha); totprob += prob[t]; } U = drand48()*totprob; cumprob = prob[0]; t = 0; // sample a topic t from the distribution while (U>cumprob) { t++; cumprob += prob[t]; } z[i] = t; Ndt[d[i]][t]++; } free(prob); }
void loglike (int N, int W, int D, int T, double alpha, double beta, int *w, int *d, int **Nwt, int **Ndt, int *Nt, int *Nd) // { int i, j, t; double llike; static int init = 0; static double **prob_w_given_t; static double **prob_t_given_d; static double *Nd_; double Nt_; if (init==0) { init = 1; prob_w_given_t = dmat(W,T); prob_t_given_d = dmat(D,T); Nd_ = dvec(D); for (j = 0; j < D; j++) Nd_[j] = Nd[j] + T*alpha; } for (t = 0; t < T; t++) { Nt_ = Nt[t] + W*beta; for (i = 0; i < W; i++) prob_w_given_t[i][t] = (Nwt[i][t]+beta) / Nt_; for (j = 0; j < D; j++) prob_t_given_d[j][t] = (Ndt[j][t]+alpha)/ Nd_[j]; } llike = 0; for (i = 0; i < N; i++) llike += log(ddot(T, prob_w_given_t[w[i]], prob_t_given_d[d[i]])); printf(">>> llike = %.6e ", llike); printf("pplex = %.4f\n", exp(-llike/N)); }
double pplex_d(int N, int W, int T, int *w, int *d, double **Nwt, double **Ndt) // { int i, t; double mypplex, llike=0, p1, p2, Z, pwd; double *zwt = dvec(T); for (t = 0; t < T; t++) for (zwt[t] = 0, i = 0; i < W; i++) zwt[t] += Nwt[i][t]; for (i = 0; i < N; i++) { Z = pwd = 0; for (t = 0; t < T; t++) { p1 = Nwt[w[i]][t]; p2 = Ndt[d[i]][t]; Z += p2; pwd += p1 * p2 / zwt[t]; } llike += log( pwd / Z ); } mypplex = exp(-llike / N); return mypplex; }
dmatrix3 ConvLayer::think(dmatrix3 mat) { dmatrix3 slab(mat.size(), dmatrix2(Fshape[1], dvec(Fshape[0]))); ivec step(4); dvec exc(OutShape[1]*OutShape[2]); dvec act(OutShape[1]*OutShape[2]); ivec foldshape(2); foldshape[0] = OutShape[1]; foldshape[1] = OutShape[2]; Inputs = &mat; for(int f=0;f<Filters.size();f++) { dmatrix3 filt = Filters[f]; for(int i=0;i<Steps.size();i++) { step = Steps[i]; slab = invert<real>(slice<real>(invert<real>(mat), step)); exc[i] = frobenius(slab, filt); // This is the "convolve" step act[i] = sigmoid(exc[exc.size()-1]); } Excitations[f] = fold2<real>(exc, foldshape); Activations[f] = fold2<real>(act, foldshape); } return Activations; }
CV_IMPL void cvSplit( const void* srcarr, void* dstarr0, void* dstarr1, void* dstarr2, void* dstarr3 ) { void* dptrs[] = { dstarr0, dstarr1, dstarr2, dstarr3 }; cv::Mat src = cv::cvarrToMat(srcarr); int i, j, nz = 0; for( i = 0; i < 4; i++ ) nz += dptrs[i] != 0; CV_Assert( nz > 0 ); std::vector<cv::Mat> dvec(nz); std::vector<int> pairs(nz*2); for( i = j = 0; i < 4; i++ ) { if( dptrs[i] != 0 ) { dvec[j] = cv::cvarrToMat(dptrs[i]); CV_Assert( dvec[j].size() == src.size() ); CV_Assert( dvec[j].depth() == src.depth() ); CV_Assert( dvec[j].channels() == 1 ); CV_Assert( i < src.channels() ); pairs[j*2] = i; pairs[j*2+1] = j; j++; } } if( nz == src.channels() ) cv::split( src, dvec ); else { cv::mixChannels( &src, 1, &dvec[0], nz, &pairs[0], nz ); } }
dmatrix3 ConvLayer::backpropagation() const { dmatrix3 outputs(Excitations.size(), dmatrix2 (Excitations[0].size(), dvec (Excitations[0][0].size(), 0.0))); ivec step; step.reserve(4); int index; for(int z=0;z<Errors.size();z++) { index = 0; for(int y=0;y<Errors[0].size();y++) { for(int x=0;x<Errors[0][0].size();x++, index++) { step = Steps[index]; for(int i=step[0];i<step[1];i++) { for(int j=step[2];j<step[3];j++) { outputs[z][i][j] += sigmoid_p( Excitations[z][i][j] * Errors[z][y][x]); } } } } } return outputs; }
void oversample_Ndt (int N, int W, int T, double alpha, double beta, int *w, int *d, int *z, int **Nwt, int **Ndt, int *Nt) // { int i, t, k, ntimes=4; double totprob, U, cumprob; double *prob = dvec(T); double wbeta = W*beta; for (i = 0; i < N; i++) { totprob = 0; for (t = 0; t < T; t++) { prob[t] = (Nwt[w[i]][t] + beta)/(Nt[t] + wbeta)*(Ndt[d[i]][t] + alpha); totprob += prob[t]; } for (k = 0; k < ntimes; k++) { U = drand48()*totprob; cumprob = prob[0]; t = 0; while (U>cumprob) { t++; cumprob += prob[t]; } Ndt[d[i]][t]++; } } free(prob); }
exp_vector_t degree_vector(ex e, const exvector& vars) { e = e.expand(); exp_vector_t dvec(vars.size()); for (std::size_t i = vars.size(); i-- != 0; ) { const int deg_i = e.degree(vars[i]); e = e.coeff(vars[i], deg_i); dvec[i] = deg_i; } return dvec; }
/**------------------------------------------------- * Make a quadrature given a Polynomial. * @param P :: A polynomial to use to make the quadrature. */ void MakeQuadrature::makeQuadrature(const Polynomial& P) { auto& r = P.getRoots(); auto& w = P.getWeights(); const size_t n = r.size(); auto quad = new Quadrature; quad->setRowCount( int(n) ); quad->addDoubleColumn("r", API::NumericColumn::X); auto& rc = quad->getDoubleData("r"); rc = r; quad->addDoubleColumn("w", API::NumericColumn::Y); auto& wc = quad->getDoubleData("w"); wc = w; FunctionDomain1DView domain( r ); FunctionValues values( domain ); std::vector<double> wgt; std::vector<double> wgtDeriv; P.weightFunction()->function( domain, values ); values.copyToStdVector( wgt ); P.weightDerivative()->function( domain, values ); values.copyToStdVector( wgtDeriv ); quad->addDoubleColumn("weight", API::NumericColumn::Y); auto& wgtc = quad->getDoubleData("weight"); wgtc = wgt; quad->addDoubleColumn("deriv", API::NumericColumn::Y); auto& derc = quad->getDoubleData("deriv"); derc = wgtDeriv; Quadrature::FuncVector fvec( n ); Quadrature::FuncVector dvec( n ); for(size_t i = 0; i < n; ++i) { std::string colInd = boost::lexical_cast<std::string>( i ); quad->addDoubleColumn("f"+colInd, API::NumericColumn::Y); fvec[i] = &quad->getDoubleData("f"+colInd); quad->addDoubleColumn("d"+colInd, API::NumericColumn::Y); dvec[i] = &quad->getDoubleData("d"+colInd); } P.calcPolyValues( fvec, dvec ); quad->init(); setClassProperty( "Quadrature", API::TableWorkspace_ptr( quad ) ); { const double startX = get("StartX"); const double endX = get("EndX"); ChebfunWorkspace_sptr cheb( new ChebfunWorkspace(chebfun( 100, startX, endX )) ); cheb->fun().fit( P ); setClassProperty("ChebWorkspace", cheb); } }
ConvLayer::ConvLayer(int filters, ivec inshape, ivec fshape, int stride, ConvNet* net) { InShape = inshape; Stride = stride; Fshape = fshape; OutShape = outshape(InShape, Fshape, Stride, filters); Steps = calcsteps(InShape, Fshape, Stride, filters); dmatrix3 refE(OutShape[0], dmatrix2(OutShape[1], dvec(OutShape[2], 0.0))); refE.swap(Excitations); dmatrix3 refA(OutShape[0], dmatrix2(OutShape[1], dvec(OutShape[2], 0.0))); refA.swap(Activations); dmatrix3 refErr(OutShape[0],dmatrix2(OutShape[1],dvec(OutShape[2], 0.0))); refErr.swap(Errors); dmatrix4 flt(filters,dmatrix3(InShape[0], dmatrix2(Fshape[0],dvec(Fshape[1], 0.5)))); flt.swap(Filters); Brain = net; }
int main(int argc, char const *argv[]) { std::vector<int> ivec(9, 8); std::vector<double> dvec(8, 9.9); std::vector<char> cvec(7, 'h'); std::cout << count(ivec, 8) << std::endl; std::cout << count(dvec, 9.9) << std::endl; std::cout << count(cvec, 'h') << std::endl; std::vector<std::string> svec(6, "hey"); std::cout << count(svec, std::string("hey")) << std::endl; return 0; }
int *dsort(int n, double *x) // { int *indx = ivec(n); int i; dcomp_vec = dvec(n); for (i = 0; i < n; i++) { dcomp_vec[i] = -x[i]; indx[i] = i; } qsort(indx,n,sizeof(int),dcomp); free(dcomp_vec); return indx; }
void sample_chain_with_prior (int N, int W, int T, int *w, int *d, int *z, double **Nwt, double **Ndt, double *Nt, int *order, double **prior_Nwt) // { int ii, i, t; double totprob, U, cumprob; double *prob = dvec(T); int wid, did; double *word_vec; double *doc_vec; double *prior_word_vec; for (ii = 0; ii < N; ii++) { i = order[ ii ]; wid = w[i]; did = d[i]; word_vec = Nwt[wid]; doc_vec = Ndt[did]; prior_word_vec = prior_Nwt[wid]; t = z[i]; Nt[t]--; word_vec[t]--; doc_vec[t]--; totprob = 0; for (t = 0; t < T; t++) { prob[t] = doc_vec[t] * (word_vec[t] + prior_word_vec[t]) / Nt[t]; totprob += prob[t]; } // U = drand48()*totprob; U = sample_uniform() * totprob; cumprob = prob[0]; t = 0; while (U>cumprob) { t++; cumprob += prob[t]; } z[i] = t; word_vec[t]++; doc_vec[t]++; Nt[t]++; } free(prob); }
/*------------------------------------------ * sample_chain_alpha *------------------------------------------ */ void sample_chain_alpha (int N, int W, int T, double *alpha, double beta, int *w, int *d, int *z, int **Nwt, int **Ndt, int *Nt, int *order) // { int ii, i, t; double totprob, U, cumprob; double *prob = dvec(T); double wbeta = W*beta; int wid, did; int *word_vec; int *doc_vec; for (ii = 0; ii < N; ii++) { i = order[ ii ]; wid = w[i]; did = d[i]; word_vec = Nwt[wid]; doc_vec = Ndt[did]; t = z[i]; // take the current topic assignment to word token i Nt[t]--; // and substract that from the counts word_vec[t]--; doc_vec[t]--; totprob = 0; for (t = 0; t < T; t++) { prob[t] = (doc_vec[t] + alpha[t]) * (word_vec[t] + beta) / (Nt[t] + wbeta); totprob += prob[t]; } U = drand48()*totprob; cumprob = prob[0]; t = 0; // sample a topic t from the distribution while (U>cumprob) { t++; cumprob += prob[t]; } z[i] = t; // assign current word token i to topic t word_vec[t]++; // and update counts doc_vec[t]++; Nt[t]++; } free(prob); }
void benchmark_convert_type () { const size_t size = 10000000; const S testval(1.0); std::vector<S> svec (size, testval); std::vector<D> dvec (size); std::cout << Strutil::format("Benchmark conversion of %6s -> %6s : ", TypeDesc(BaseTypeFromC<S>::value), TypeDesc(BaseTypeFromC<D>::value)); float time = time_trial (bind (do_convert_type<S,D>, OIIO::cref(svec), OIIO::ref(dvec)), ntrials, iterations) / iterations; std::cout << Strutil::format ("%7.1f Mvals/sec", (size/1.0e6)/time) << std::endl; D r = convert_type<S,D>(testval); OIIO_CHECK_EQUAL (dvec[size-1], r); }
/*------------------------------------------ * sample_chain *------------------------------------------ */ void sample_chain (int N, int W, int T, double alpha, double beta, int *w, int *d, int *z, int **Nwt, int **Ndt, int *Nt, int *order) // { int ii, i, t; double totprob, U, cumprob; double *prob = dvec(T); double wbeta = W*beta; int wid, did; int *word_vec; int *doc_vec; for (ii = 0; ii < N; ii++) { i = order[ ii ]; wid = w[i]; did = d[i]; word_vec = Nwt[wid]; doc_vec = Ndt[did]; t = z[i]; Nt[t]--; word_vec[t]--; doc_vec[t]--; totprob = 0; for (t = 0; t < T; t++) { prob[t] = (doc_vec[t] + alpha) * (word_vec[t] + beta) / (Nt[t] + wbeta); totprob += prob[t]; } U = drand48()*totprob; cumprob = prob[0]; t = 0; while (U>cumprob) { t++; cumprob += prob[t]; } z[i] = t; word_vec[t]++; doc_vec[t]++; Nt[t]++; } free(prob); }
MM::MultinomialModel(const StringVec &names) : ParamPolicy(new VectorParams(1)), DataPolicy(new MS(1)), ConjPriorPolicy(), logp_current_(false) { std::vector<Ptr<CD> > dvec(make_catdat_ptrs(names)); uint nlev= dvec[0]->nlevels(); Vec probs(nlev, 1.0/nlev); set_pi(probs); set_data(dvec); mle(); set_observer(); }
virtual void dispatch(libmaus2::parallel::SimpleThreadWorkPackage * P, libmaus2::parallel::SimpleThreadPoolInterfaceEnqueTermInterface & /* tpi */) { FragReadEndsMergeWorkPackage * BP = dynamic_cast<FragReadEndsMergeWorkPackage *>(P); assert ( BP ); ReadEndsBlockIndexSet fragindexset(*(BP->REQ.MI)); libmaus2::bambam::DupSetCallbackSharedVector dvec(*(BP->REQ.dupbitvec)); fragindexset.merge( BP->REQ.SMI, libmaus2::bambam::DupMarkBase::isDupFrag, libmaus2::bambam::DupMarkBase::markDuplicateFrags,dvec ); addDuplicationMetricsInterface.addDuplicationMetrics(dvec.metrics); mergeFinishedInterface.fragReadEndsMergeWorkPackageFinished(BP); packageReturnInterface.fragReadEndsMergeWorkPackageReturn(BP); }
/*------------------------------------------ * sample_chain_rank *------------------------------------------ */ void sample_chain_rank (int N, int W, int T, double alpha, double beta, int *w, int *d, int *drank, int *z, int **Nwt, int **Ndt, int *Nt, int *order) // { int ii, i, t; double totprob, U, cumprob; double *prob = dvec(T); double wbeta = W*beta; for (ii = 0; ii < N; ii++) { i = order[ ii ]; t = z[i]; // take the current topic assignment to word token i Nt[t] -= drank[d[i]]; Nwt[w[i]][t] -= drank[d[i]]; Ndt[d[i]][t] -= drank[d[i]]; totprob = 0; for (t = 0; t < T; t++) { prob[t] = (Nwt[w[i]][t] + beta)/(Nt[t]+ wbeta)*(Ndt[d[i]][t]+ alpha); totprob += prob[t]; } U = drand48()*totprob; cumprob = prob[0]; t = 0; // sample a topic t from the distribution while (U>cumprob) { t++; cumprob += prob[t]; } z[i] = t; // assign current word token i to topic t Nwt[w[i]][t] += drank[d[i]]; Ndt[d[i]][t] += drank[d[i]]; Nt[t] += drank[d[i]]; } free(prob); }
int main() { FILE *fin,*fout; double **a,*b; int i; //open file// a =dmatrix(1,N,1,N); b =dvec(1,N); fin = fopen("input.dat","r"); if (fin ==NULL) { printf("Can't find file\n"); exit(1); } fout = fopen("output.dat","w"); if(fout == NULL) { printf("Can't make file\n"); exit(1); } input_matrix(a,'A',fin,fout); input_vec(b,'b',fin,fout); // printf("%lf",a[1][1]); b =simple_gauss(a,b); //output results// fprintf(fout,"Ax=bの計算結果は次の通り\n"); for(i = 1;i <= N; i++) { fprintf(fout,"%f\n",b[i]); } fclose(fin);fclose(fout); // free_dmatrix(a,1,N,1,N);free_dvec(b,1); return(0); }
void sample_chain0 (int N, int W, int T, double alpha, double beta, int *w, int *d, int *z, int **Nwt, int **Ndt, int *Nt) // { int i, t; double totprob, U, cumprob; double *prob = dvec(T); double wbeta = W*beta; for (i = 0; i < N; i++) { t = z[i]; // take the current topic assignment to word token i Nt[t]--; // and substract that from the counts Nwt[w[i]][t]--; Ndt[d[i]][t]--; for (t = 0, totprob = 0.0; t < T; t++) { prob[t] = (Ndt[d[i]][t] + alpha) * (Nwt[w[i]][t] + beta) / (Nt[t] + wbeta); totprob += prob[t]; } U = drand48()*totprob; cumprob = prob[0]; t = 0; // sample a topic t from the distribution while (U>cumprob) { t++; cumprob += prob[t]; } z[i] = t; // assign current word token i to topic t Nwt[w[i]][t]++; // and update counts Ndt[d[i]][t]++; Nt[t]++; } free(prob); }
// [[Rcpp::export]] SEXP hpbcpp(SEXP eta, SEXP beta, SEXP doc_ct, SEXP mu, SEXP siginv, SEXP sigmaentropy){ Rcpp::NumericVector etav(eta); arma::vec etas(etav.begin(), etav.size(), false); Rcpp::NumericMatrix betam(beta); arma::mat betas(betam.begin(), betam.nrow(), betam.ncol()); Rcpp::NumericVector doc_ctv(doc_ct); arma::vec doc_cts(doc_ctv.begin(), doc_ctv.size(), false); Rcpp::NumericVector muv(mu); arma::vec mus(muv.begin(), muv.size(), false); Rcpp::NumericMatrix siginvm(siginv); arma::mat siginvs(siginvm.begin(), siginvm.nrow(), siginvm.ncol(), false); Rcpp::NumericVector sigmaentropym(sigmaentropy); arma::vec entropy(sigmaentropym); //Performance Nots from 3/6/2015 // I tried a few different variants and benchmarked this one as roughly twice as // fast as the R code for a K=100 problem. Key to performance was not creating // too many objects and being selective in how things were flagged as triangular. // Some additional notes in the code below. // // Some things this doesn't have or I haven't tried // - I didn't tweak the arguments much. sigmaentropy is a double, and I'm still // passing beta in the same way. I tried doing a ", false" for beta but it didn't // change much so I left it the same as in gradient. // - I tried treating the factors for doc_cts and colSums(EB) as a diagonal matrix- much slower. // Haven't Tried/Done // - each_row() might be much slower (not sure but arma is column order). Maybe transpose in place? // - depending on costs there are some really minor calculations that could be precomputed: // - sum(doc_ct) // - sqrt(doc_ct) // More on passing by reference here: // - Hypothetically we could alter beta (because hessian is last thing we do) however down // the road we may want to explore treating nonPD hessians by optimization at which point // we would need it again. arma::colvec expeta(etas.size()+1); expeta.fill(1); int neta = etas.size(); for(int j=0; j <neta; j++){ expeta(j) = exp(etas(j)); } arma::vec theta = expeta/sum(expeta); //create a new version of the matrix so we can mess with it arma::mat EB(betam.begin(), betam.nrow(), betam.ncol()); //multiply each column by expeta EB.each_col() %= expeta; //this should be fastest as its column-major ordering //divide out by the column sums EB.each_row() %= arma::trans(sqrt(doc_cts))/sum(EB,0); //Combine the pieces of the Hessian which are matrices arma::mat hess = EB*EB.t() - sum(doc_cts)*(theta*theta.t()); //we don't need EB any more so we turn it into phi EB.each_row() %= arma::trans(sqrt(doc_cts)); //Now alter just the diagonal of the Hessian hess.diag() -= sum(EB,1) - sum(doc_cts)*theta; //Drop the last row and column hess.shed_row(neta); hess.shed_col(neta); //Now we can add in siginv hess = hess + siginvs; //At this point the Hessian is complete. //This next bit of code is from http://arma.sourceforge.net/docs.html#logging //It basically keeps arma from printing errors from chol to the console. std::ostream nullstream(0); arma::set_stream_err2(nullstream); //// //Invert via cholesky decomposition //// //Start by initializing an object arma::mat nu = arma::mat(hess.n_rows, hess.n_rows); //This version of chol generates a boolean which tells us if it failed. bool worked = arma::chol(nu,hess); if(!worked) { //It failed! Oh Nos. // So the matrix wasn't positive definite. In practice this means that it hasn't // converged probably along some minor aspect of the dimension. //Here we make it positive definite through diagonal dominance arma::vec dvec = hess.diag(); //find the magnitude of the diagonal arma::vec magnitudes = sum(abs(hess), 1) - abs(dvec); //iterate over each row and set the minimum value of the diagonal to be the magnitude of the other terms int Km1 = dvec.size(); for(int j=0; j < Km1; j++){ if(arma::as_scalar(dvec(j)) < arma::as_scalar(magnitudes(j))) dvec(j) = magnitudes(j); //enforce diagonal dominance } //overwrite the diagonal of the hessian with our new object hess.diag() = dvec; //that was sufficient to ensure positive definiteness so we now do cholesky nu = arma::chol(hess); } //compute 1/2 the determinant from the cholesky decomposition double detTerm = -sum(log(nu.diag())); //Now finish constructing nu nu = arma::inv(arma::trimatu(nu)); nu = nu * nu.t(); //trimatu doesn't do anything for multiplication so it would just be timesink to signal here. //Precompute the difference since we use it twice arma::vec diff = etas - mus; //Now generate the bound and make it a scalar double bound = arma::as_scalar(log(arma::trans(theta)*betas)*doc_cts + detTerm - .5*diff.t()*siginvs*diff - entropy); // Generate a return list that mimics the R output return Rcpp::List::create( Rcpp::Named("phis") = EB, Rcpp::Named("eta") = Rcpp::List::create(Rcpp::Named("lambda")=etas, Rcpp::Named("nu")=nu), Rcpp::Named("bound") = bound ); }
void ATMBP(double ALPHA, double BETA, int W, int J, int D, int A, int MA, int NN, int OUTPUT, mwIndex *irwd, mwIndex *jcwd, double *srwd, mwIndex *irad, mwIndex *jcad, double *muz, double *mux, double *phi, double *theta, int startcond) { int wi, di, ai, i, j, a, topic, iter; double xi, totprob, probs, WBETA = (double) (W*BETA), JALPHA = (double) (J*ALPHA); double *thetad, *phitot, *xprob, *zprob; phitot = dvec(J); for (wi=0; wi<W; wi++) { for (j=0; j<J; j++) { phitot[j] += phi[wi*J + j]; } } thetad = dvec(A); xprob = dvec(MA); zprob = dvec(J); if (startcond==1) { /* start from previous state */ for (di=0; di<D; di++) { for (i=jcwd[di]; i<jcwd[di + 1]; i++) { wi = (int) irwd[i]; xi = srwd[i]; for (j=0; j<J; j++) { for (a=0; a<(jcad[di+1] - jcad[di]); a++) { ai = (int) irad[jcad[di] + a]; theta[ai*J + j] += xi*muz[i*J + j]*mux[i*MA + a]; // increment theta count matrix thetad[ai] += xi*muz[i*J + j]*mux[i*MA + a]; } } } } } if (startcond==0) { /* random initialization */ if (OUTPUT==2) mexPrintf( "Starting Random initialization\n" ); for (di=0; di<D; di++) { for (i=jcwd[di]; i<jcwd[di + 1]; i++) { wi = (int) irwd[i]; xi = srwd[i]; // pick a random topic 0..J-1 topic = (int) (J*drand()); muz[i*J + topic] = (double) 1; // assign this word token to this topic /* pick a random number between jcad[di + 1] and jcad[di] */ a = (int) ((jcad[di + 1] - jcad[di])*drand()); ai = (int) irad[jcad[di] + a]; // assign this word to this author mux[i*MA + a] = (double) 1; // update counts for this author theta[ai*J + topic] += xi; // increment theta count matrix thetad[ai] += xi; } } } for (iter=0; iter<NN; iter++) { if (OUTPUT >=1) { if ((iter % 10)==0) mexPrintf( "\tIteration %d of %d\n" , iter , NN ); if ((iter % 10)==0) mexEvalString("drawnow;"); } for (di=0; di<D; di++) { for (i=jcwd[di]; i<jcwd[di + 1]; i++) { wi = (int) irwd[i]; // current word index xi = srwd[i]; // current word counts // message for (a=0; a<(jcad[di + 1]-jcad[di]); a++) xprob[a] = (double) 0; for (j=0; j<J; j++) zprob[j] = (double) 0; totprob = (double) 0; for (a=0; a<(jcad[di + 1]-jcad[di]); a++) { ai = (int) irad[jcad[di] + a]; // current author index under consideration for (j=0; j<J; j++) { // probs contains the (unnormalized) probability of assigning this word token to topic j and author ai probs = ((double) phi[wi*J + j] + (double) BETA) / ((double) phitot[j] + (double) WBETA) * ((double) theta[ai*J + j] - (double) xi*muz[i*J + j]*mux[i*MA + a] + (double) ALPHA) / ((double) thetad[ai] - (double) xi*mux[i*MA + a] + (double) JALPHA); xprob[a] += probs; zprob[j] += probs; totprob += probs; } } for (a=0; a<(jcad[di + 1]-jcad[di]); a++) { mux[i*MA + a] = xprob[a]/totprob; } for (j=0; j<J; j++) { muz[i*J + j] = zprob[j]/totprob; } } } /* clear phi, theta, thetad and phitot */ for (i=0; i<J*A; i++) theta[i] = (double) 0; for (i=0; i<A; i++) thetad[i] = (double) 0; // update parameters for (di=0; di<D; di++) { for (i=jcwd[di]; i<jcwd[di + 1]; i++) { wi = (int) irwd[i]; xi = srwd[i]; for (j=0; j<J; j++) { for (a=0; a<(jcad[di+1] - jcad[di]); a++) { ai = (int) irad[jcad[di] + a]; theta[ai*J + j] += xi*muz[i*J + j]*mux[i*MA + a]; // increment theta count matrix thetad[ai] += xi*muz[i*J + j]*mux[i*MA + a]; } } } } } }
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) { double *srwd, *srad, *MUZIN, *MUXIN, *theta, *phi, *thetad, *muz, *mux; double ALPHA,BETA; int W, J, D, A, MA = 0, NN, SEED, OUTPUT, nzmaxwd, nzmaxad, i, j, a, startcond; mwIndex *irwd, *jcwd, *irad, *jcad; /* Check for proper number of arguments. */ if (nrhs < 8) { mexErrMsgTxt("At least 8 input arguments required"); } else if (nlhs < 1) { mexErrMsgTxt("At least 1 output arguments required"); } startcond = 0; if (nrhs > 8) startcond = 1; /* dealing with sparse array WD */ if (mxIsDouble(prhs[0]) != 1) mexErrMsgTxt("WD must be a double precision matrix"); srwd = mxGetPr(prhs[0]); irwd = mxGetIr(prhs[0]); jcwd = mxGetJc(prhs[0]); nzmaxwd = (int) mxGetNzmax(prhs[0]); W = (int) mxGetM(prhs[0]); D = (int) mxGetN(prhs[0]); /* dealing with sparse array AD */ if (mxIsDouble(prhs[1]) != 1) mexErrMsgTxt("AD must be a double precision matrix"); srad = mxGetPr(prhs[1]); irad = mxGetIr(prhs[1]); jcad = mxGetJc(prhs[1]); nzmaxad = (int) mxGetNzmax(prhs[1]); A = (int) mxGetM(prhs[1]); if ((int) mxGetN(prhs[1]) != D) mexErrMsgTxt("WD and AD must have the same number of columns"); /* check that every document has some authors */ for (i=0; i<D; i++) { if ((jcad[i + 1] - jcad[i]) == 0) mexErrMsgTxt("there are some documents without authors in AD matrix "); if ((jcad[i + 1] - jcad[i]) > NAMAX) mexErrMsgTxt("Too many authors in some documents ... reached the NAMAX limit"); if ((jcad[i + 1] - jcad[i]) > MA) MA = (int) (jcad[i + 1] - jcad[i]); } phi = mxGetPr(prhs[2]); J = (int) mxGetM(prhs[2]); if (J<=0) mexErrMsgTxt("Number of topics must be greater than zero"); if ((int) mxGetN(prhs[2]) != W) mexErrMsgTxt("Vocabulary mismatches"); NN = (int) mxGetScalar(prhs[3]); if (NN<0) mexErrMsgTxt("Number of iterations must be greater than zero"); ALPHA = (double) mxGetScalar(prhs[4]); if (ALPHA<0) mexErrMsgTxt("ALPHA must be greater than zero"); BETA = (double) mxGetScalar(prhs[5]); if (BETA<0) mexErrMsgTxt("BETA must be greater than zero"); SEED = (int) mxGetScalar(prhs[6]); // set the seed of the random number generator OUTPUT = (int) mxGetScalar(prhs[7]); if (startcond == 1) { MUZIN = mxGetPr(prhs[8]); if (nzmaxwd != mxGetN(prhs[8])) mexErrMsgTxt("WD and MUZIN mismatch"); if (J != mxGetM( prhs[ 8 ])) mexErrMsgTxt("J and MUZIN mismatch"); MUXIN = mxGetPr(prhs[9]); if (nzmaxwd != mxGetN( prhs[9])) mexErrMsgTxt("WD and MUXIN mismatch"); if (MA != mxGetM(prhs[9])) mexErrMsgTxt("MA and MUXIN mismatch"); } // seeding seedMT( 1 + SEED * 2 ); // seeding only works on uneven numbers /* allocate memory */ muz = dvec(J*nzmaxwd); mux = dvec(MA*nzmaxwd); if (startcond == 1) { for (i=0; i<J*nzmaxwd; i++) muz[i] = (double) MUZIN[i]; for (a=0; a<MA*nzmaxwd; a++) mux[i] = (double) MUXIN[i]; } theta = dvec(J*A); /* run the model */ ATMBP( ALPHA, BETA, W, J, D, A, MA, NN, OUTPUT, irwd, jcwd, srwd, irad, jcad, muz, mux, phi, theta, startcond ); /* output */ plhs[0] = mxCreateDoubleMatrix(J, A, mxREAL); mxSetPr(plhs[0], theta); plhs[1] = mxCreateDoubleMatrix(J, nzmaxwd, mxREAL); mxSetPr(plhs[1], muz); plhs[2] = mxCreateDoubleMatrix(MA, nzmaxwd, mxREAL); mxSetPr(plhs[2], mux); }
void hca_displaytopics(char *stem, char *resstem, int topword, enum ScoreType scoretype, int pmicount, int fullreport) { int w,k; uint32_t *termindk = NULL; uint32_t *indk = NULL; int Nk_tot = 0; double (*termtscore)(int) = NULL; double (*tscore)(int) = NULL; double sparsityword = 0; double sparsitydoc = 0; double underused = 0; uint32_t *top1cnt = NULL; FILE *fp; float *tpmi = NULL; char *topfile; char *repfile; uint32_t *psort; FILE *rp = NULL; float *gtvec = globalprop(); //#define XTRA // prints model topic probs after observed #ifdef XTRA double *gtavec = calloc(ddN.T,sizeof(gtavec[0])); #endif float *gpvec = calloc(ddN.W,sizeof(gpvec[0])); float *pvec = calloc(ddN.W,sizeof(pvec[0])); #ifdef KL float *dfvec = calloc(ddN.W,sizeof(dfvec[0])); #endif double *ngalpha = NULL; T_stats_t *termstats; #ifdef XTRA get_probs(gtavec); #endif if ( pmicount>topword ) pmicount = topword; if ( scoretype == ST_idf ) { tscore = idfscore; } else if ( scoretype == ST_phirat ) { tscore = phiratioscore; } else if ( scoretype == ST_phi ) { tscore = phiscore; } else if ( scoretype == ST_count ) { tscore = countscore; } else if ( scoretype == ST_cost ) { tscore = costscore; } else if ( scoretype == ST_Q ) { tscore = Qscore; lowerQ = 1.0/ddN.T; } if ( ddS.TwT==NULL && ddP.phi==NULL && scoretype == ST_phirat ) yap_quit("Cannot use '-orat' option with this model/settings.\n"); if ( ddP.PYalpha==H_NG ) { /* * provide an estimate of alpha */ ngalpha = dvec(ddN.T); get_probs(ngalpha); for (k=0; k<ddN.T; k++) { ddP.alphapr[k] = ngalpha[k]; } } /* * returns null if no relevant data file */ termstats = tstats_init(ddS.z, ddD.NdTcum, ddN.T, ddN.DT, stem); if ( termstats ) { if ( scoretype == ST_idf ) { termtscore = termidfscore; } else termtscore = termcountscore; } /* * first collect counts of each word/term, * and build gpvec (mean word probs) */ build_NwK(); if ( termstats ) build_termNwK(termstats); { /* * gpvec[] is normalised NwK[] */ double tot = 0; for (w=0; w<ddN.W; w++) tot += gpvec[w] = NwK[w]+0.1; for (w=0; w<ddN.W; w++) gpvec[w] /= tot; } if ( ddS.Nwt ) { for (k=0; k<ddN.T; k++) { Nk_tot += ddS.NWt[k]; } } psort = sorttops(gtvec, ddN.T); top1cnt = hca_top1cnt(); if ( !top1cnt ) yap_quit("Cannot allocate top1cnt in hca_displaytopics()\n"); if ( pmicount ) { tpmi = malloc(sizeof(*tpmi)*(ddN.T+1)); if ( !tpmi ) yap_quit("Cannot allocate tpmi in hca_displaytopics()\n"); } indk = malloc(sizeof(*indk)*ddN.W); if ( !indk ) yap_quit("Cannot allocate indk in hca_displaytopics()\n"); if ( termstats ) { termindk = malloc(sizeof(*indk)*termstats->K); if ( !termindk ) yap_quit("Cannot allocate termindk in hca_displaytopics()\n"); } data_df(stem); #ifdef KL for (w=0; w<ddN.W; w++) dfvec[w] = ddD.df[w]; #endif /* * two passes through, * first to build the top words and dump to file */ repfile = yap_makename(resstem,".topset"); topfile = yap_makename(resstem,".toplst"); fp = fopen(topfile,"w"); if ( !fp ) yap_sysquit("Cannot open file '%s' for write\n", topfile); yap_message("\n"); for (k=0; k<ddN.T; k++) { int cnt, termcnt = 0; tscorek = k; /* * build sorted word list */ cnt = buildindk(k, indk); topk(topword, cnt, indk, tscore); if ( cnt==0 ) continue; if ( termstats ) { termcnt = buildtermindk(k, termindk, termstats); topk(topword, termcnt, termindk, termtscore); } /* * dump words to file */ fprintf(fp,"%d: ", k); for (w=0; w<topword && w<cnt; w++) { fprintf(fp," %d", (int)indk[w]); } if ( termstats ) { for (w=0; w<topword && w<termcnt; w++) { fprintf(fp," %d", (int)termstats->Kmin+termindk[w]); } } fprintf(fp, "\n"); } if ( ddP.PYbeta && (ddP.phi==NULL || ddP.betapr) ) { int cnt; /* * dump root words */ tscorek = -1; cnt = buildindk(-1, indk); topk(topword, cnt, indk, (ddP.phi==NULL)?countscore:phiscore); fprintf(fp,"-1:"); for (w=0; w<topword && w<cnt; w++) { fprintf(fp," %d", (int)indk[w]); } fprintf(fp, "\n"); } fclose(fp); if ( verbose>1 ) yap_message("\n"); if ( pmicount ) { /* * compute PMI */ char *toppmifile; char *pmifile; double *tp; tp = dvec(ddN.T); pmifile=yap_makename(stem,".pmi"); toppmifile=yap_makename(resstem,".toppmi"); get_probs(tp); report_pmi(topfile, pmifile, toppmifile, ddN.T, ddN.W, 1, pmicount, tp, tpmi); free(toppmifile); free(pmifile); free(tp); } /* * now report words and diagnostics */ //ttop_open(topfile); if ( fullreport ) { rp = fopen(repfile,"w"); if ( !rp ) yap_sysquit("Cannot open file '%s' for write\n", repfile); fprintf(rp, "#topic index rank prop word-sparse doc-sparse eff-words eff-docs docs-bound top-one " "dist-unif dist-unigrm"); if ( PCTL_BURSTY() ) fprintf(rp, " burst-concent"); if ( ddN.tokens ) fprintf(rp, " ave-length"); fprintf(rp, " coher"); if ( pmicount ) fprintf(rp, " pmi"); fprintf(rp, "\n#word topic index rank"); if ( ddS.Nwt ) fprintf(rp, " count"); fprintf(rp, " prop cumm df coher\n"); } for (k=0; k<ddN.T; k++) { int cnt, termcnt = 0; int kk = psort[k]; uint32_t **dfmtx; if ( ddP.phi==NULL && ddS.NWt[kk]==0 ) continue; /* * grab word prob vec for later use */ if ( ddS.Nwt ) { int w; for (w=0; w<ddN.W; w++) pvec[w] = wordprob(w,kk); } else if ( ddP.phi ) fv_copy(pvec, ddP.phi[kk], ddN.W); else if ( ddS.phi ) fv_copy(pvec, ddS.phi[kk], ddN.W); /* * rebuild word list */ tscorek = kk; cnt = buildindk(kk, indk); topk(topword, cnt, indk, tscore); if ( topword<cnt ) cnt = topword; assert(cnt>0); if ( termstats ) { termcnt = buildtermindk(kk, termindk, termstats); topk(topword, termcnt, termindk, termtscore); if ( topword<termcnt ) termcnt = topword; } /* * df stats for topic returned as matrix */ dfmtx = hca_dfmtx(indk, cnt, kk); if ( ddS.Nwt && (ddS.NWt[kk]*ddN.T*100<Nk_tot || ddS.NWt[kk]<5 )) underused++; /* * print stats for topic * Mallet: tokens, doc_ent, ave-word-len, coher., * uni-dist, corp-dist, eff-no-words */ yap_message("Topic %d/%d", kk, k); { /* * compute diagnostics */ double prop = gtvec[kk]; float *dprop = docprop(kk); double spw = 0; double spd = ((double)nonzero_Ndt(kk))/((double)ddN.DT); #ifdef KL double ew = fv_kl(dfvec,pvec,ddN.W); #else double ew = exp(fv_entropy(pvec,ddN.W)); #endif double ud = fv_helldistunif(pvec,ddN.W); double pd = fv_helldist(pvec,gpvec,ddN.W); double sl = fv_avestrlen(pvec,ddN.tokens,ddN.W); double co = coherence(dfmtx, cnt); double ed = dprop?exp(fv_entropy(dprop,ddN.DT)):ddN.DT; #define MALLET_EW #ifdef MALLET_EW double ewp = dprop?(1.0/fv_expprob(pvec,ddN.W)):ddN.W; #endif double da = dprop?fv_bound(dprop,ddN.DT,1.0/sqrt((double)ddN.T)):0; sparsitydoc += spd; yap_message((ddN.T>200)?" p=%.3lf%%":" p=%.2lf%%",100*prop); #ifdef XTRA yap_message((ddN.T>200)?"/%.3lf%%":"/%.2lf%%",100*gtavec[kk]); #endif if ( ddS.Nwt ) { spw = ((double)nonzero_Nwt(kk))/((double)ddN.W); sparsityword += spw; yap_message(" ws=%.1lf%%", 100*(1-spw)); } yap_message(" ds=%.1lf%%", 100*(1-spd) ); #ifdef KL yap_message(" ew=%lf", ew); #else yap_message(" ew=%.0lf", ew); #endif #ifdef MALLET_EW yap_message(" ewp=%.1lf", ewp); #endif yap_message(" ed=%.1lf", ed); yap_message(" da=%.0lf", da+0.1); yap_message(" t1=%u", top1cnt[kk]); yap_message(" ud=%.3lf", ud); yap_message(" pd=%.3lf", pd); if ( PCTL_BURSTY() ) yap_message(" bd=%.3lf", ddP.bdk[kk]); if ( ddP.NGbeta ) { /* * approx. as sqrt(var(lambda_k)/lambda-normaliser */ double ngvar = sqrt(ddP.NGalpha[kk]) * (ngalpha[kk]/ddP.NGalpha[kk]); yap_message(" ng=%.4lf,%.4lf", ngalpha[kk], ngvar/ngalpha[kk]); if ( ddS.sparse ) yap_message(",%.4f", 1-((float)ddS.sparseD[kk])/ddN.DTused); if ( verbose>2 ) yap_message(" ngl=%.4lf,%.4lf, nga=%.4lf,%.4lf", ddP.NGalpha[kk]/ddP.NGbeta[kk], sqrt(ddP.NGalpha[kk]/ddP.NGbeta[kk]/ddP.NGbeta[kk]), ddP.NGalpha[kk], ddP.NGbeta[kk]); } if ( ddN.tokens ) yap_message(" sl=%.2lf", sl); yap_message(" co=%.3lf%%", co); if ( pmicount ) yap_message(" pmi=%.3f", tpmi[kk]); if ( fullreport ) { fprintf(rp,"topic %d %d", kk, k); fprintf(rp," %.6lf", prop); if ( ddS.Nwt ) { fprintf(rp," %.6lf", (1-spw)); } else { fprintf(rp," 0"); } fprintf(rp," %.6lf", (1-spd) ); #ifdef KL yap_message(" %lf", ew); #else fprintf(rp," %.2lf", ew); #endif #ifdef MALLET_EW fprintf(rp," %.2lf", ewp); #endif fprintf(rp," %.2lf", ed); fprintf(rp," %.0lf", da+0.1); fprintf(rp," %u", top1cnt[kk]); fprintf(rp," %.6lf", ud); fprintf(rp," %.6lf", pd); if ( PCTL_BURSTY() ) fprintf(rp," %.3lf", ddP.bdk[kk]); fprintf(rp," %.4lf", (ddN.tokens)?sl:0); fprintf(rp," %.6lf", co); if ( pmicount ) fprintf(rp," %.4f", tpmi[kk]); fprintf(rp,"\n"); } if ( dprop) free(dprop); } if ( verbose>1 ) { double pcumm = 0; /* * print top words: * Mallet: rank, count, prob, cumm, docs, coh */ yap_message("\ntopic %d/%d", kk, k); yap_message(" words="); for (w=0; w<cnt; w++) { if ( w>0 ) yap_message(","); if ( ddN.tokens ) yap_message("%s", ddN.tokens[indk[w]]); else yap_message("%d", indk[w]); if ( verbose>2 ) { if ( scoretype == ST_count ) yap_message("(%d)", (int)(tscore(indk[w])+0.2)); else yap_message("(%6lf)", tscore(indk[w])); } if ( fullreport ) { fprintf(rp, "word %d %d %d", kk, indk[w], w); if ( ddS.Nwt ) fprintf(rp, " %d", ddS.Nwt[indk[w]][kk]); pcumm += pvec[indk[w]]; fprintf(rp, " %.6f %.6f", pvec[indk[w]], pcumm); fprintf(rp, " %d", dfmtx[w][w]); fprintf(rp, " %.6f", coherence_word(dfmtx, cnt, w)); if ( ddN.tokens ) fprintf(rp, " %s", ddN.tokens[indk[w]]); fprintf(rp, "\n"); } } if ( termstats ) { yap_message(" terms="); for (w=0; w<termcnt; w++) { if ( w>0 ) yap_message(","); if ( ddN.tokens ) yap_message("%s", termstats->tokens[termindk[w]]); else yap_message("%d", termstats->Kmin+termindk[w]); if ( verbose>2 ) { if ( scoretype == ST_count ) yap_message("(%d)", (int)(termtscore(termindk[w])+0.2)); else yap_message("(%6lf)", termtscore(termindk[w])); } if ( fullreport ) { fprintf(rp, "term %d %d %d", kk, termindk[w], w); fprintf(rp, " %d", termstats->Nkt[termindk[w]][kk]); fprintf(rp, " %s", termstats->tokens[termindk[w]]); fprintf(rp, "\n"); } } } } yap_message("\n"); free(dfmtx[0]); free(dfmtx); } if ( verbose>1 && ddP.PYbeta ) { int cnt; double pcumm = 0; /* * print root words */ tscorek = -1; cnt = buildindk(-1,indk); /* this case gives bad results */ // if ( scoretype == ST_phirat ) topk(topword, cnt, indk, phiratioscore); topk(topword, cnt, indk, (ddP.phi==NULL)?countscore:phiscore); /* * cannot build df mtx for root because * it is latent w.r.t. topics */ yap_message("Topic root words="); if ( fullreport ) { int w; if ( ddP.phi && ddP.PYbeta!=H_PDP ) { for (w=0; w<ddN.W; w++) pvec[w] = ddS.phi[ddN.T][w]; } else { for (w=0; w<ddN.W; w++) pvec[w] = betabasewordprob(w); } #ifdef KL double ew = fv_kl(dfvec,pvec,ddN.W); #else double ew = exp(fv_entropy(pvec,ddN.W)); #endif double ud = fv_helldistunif(pvec,ddN.W); double pd = fv_helldist(pvec,gpvec,ddN.W); fprintf(rp,"topic -1 -1 0 0"); fprintf(rp," %.4lf", ew); fprintf(rp," %.6lf", ud); fprintf(rp," %.6lf", pd); fprintf(rp,"\n"); } for (w=0; w<topword && w<cnt; w++) { if ( w>0 ) yap_message(","); if ( ddN.tokens ) yap_message("%s", ddN.tokens[indk[w]]); else yap_message("%d", indk[w]); if ( verbose>2 && !ddP.phi ) yap_message("(%6lf)", countscore(indk[w])); if ( fullreport ) { fprintf(rp, "word %d %d %d", -1, indk[w], w); if ( ddS.TwT ) fprintf(rp, " %d", ddS.TwT[w]); pcumm += pvec[indk[w]]; fprintf(rp, " %.6f %.6f", pvec[indk[w]], pcumm); fprintf(rp, " 0 0"); if ( ddN.tokens ) fprintf(rp, " %s", ddN.tokens[indk[w]]); fprintf(rp, "\n"); } } yap_message("\nTopical words="); topk(topword, cnt, indk, phiinvratioscore); for (w=0; w<topword && w<cnt; w++) { if ( w>0 ) yap_message(","); if ( ddN.tokens ) yap_message("%s", ddN.tokens[indk[w]]); else yap_message("%d", indk[w]); } yap_message("\n"); } yap_message("\n"); if ( rp ) fclose(rp); if ( ddS.Nwt ) yap_message("Average topicXword sparsity = %.2lf%%\n", 100*(1-sparsityword/ddN.T) ); yap_message("Average docXtopic sparsity = %.2lf%%\n" "Underused topics = %.1lf%%\n", 100*(1-sparsitydoc/ddN.T), 100.0*underused/(double)ddN.T); if ( ddS.sparse && ddP.PYalpha==H_NG ) { double avesp = 0; // correct_docsp(); for (k=0; k<ddN.T; k++) { avesp += gtvec[k]; } // check gtvec[] sums to 1 assert(fabs(avesp-1.0)<0.00001); avesp = 0; for (k=0; k<ddN.T; k++) { avesp += gtvec[k]*((float)ddS.sparseD[k])/ddN.DTused; assert(ddS.sparseD[k]<=ddN.DTused); } assert(avesp<=1.0); assert(avesp>0.0); yap_message("IBP sparsity = %.2lf%%\n", 100*(1-avesp)); } if ( pmicount ) yap_message("Average PMI = %.3f\n", tpmi[ddN.T]); /* * print */ if ( 1 ) { float **cmtx = hca_topmtx(); int t1, t2; int m1, m2; float mval; char *corfile = yap_makename(resstem,".topcor"); fp = fopen(corfile,"w"); if ( !fp ) yap_sysquit("Cannot open file '%s' for write\n", corfile); /* * print file */ for (t1=0; t1<ddN.T; t1++) { for (t2=0; t2<t1; t2++) if ( cmtx[t1][t2]>1.0e-7 ) fprintf(fp, "%d %d %0.6f\n", t1, t2, cmtx[t1][t2]); } fclose(fp); free(corfile); /* * display maximum */ m1 = 1; m2 = 0; mval = cmtx[1][0]; for (t1=0; t1<ddN.T; t1++) { for (t2=0; t2<t1; t2++) { if ( mval<cmtx[t1][t2] ) { mval = cmtx[t1][t2]; m1 = t1; m2 = t2; } } } yap_message("Maximum correlated topics (%d,%d) = %f\n", m1, m2, mval); free(cmtx[0]); free(cmtx); } /* * print burstiness report */ if ( PCTL_BURSTY() ) { int tottbl = 0; int totmlttbl = 0; int totmlt = 0; int i; for (i=0; i<ddN.NT; i++) { if ( Z_issetr(ddS.z[i]) ) { if ( M_multi(i) ) totmlttbl++; tottbl++; } if ( M_multi(i) ) totmlt++; } yap_message("Burst report: multis=%.2lf%%, tables=%.2lf%%, tbls-in-multis=%.2lf%%\n", 100.0*((double)ddM.dim_multiind)/ddN.N, 100.0*((double)tottbl)/ddN.NT, 100.0*((double)totmlttbl)/totmlt); } yap_message("\n"); free(topfile); if ( repfile ) free(repfile); if ( top1cnt ) free(top1cnt); free(indk); free(psort); if ( ngalpha ) free(ngalpha); if ( pmicount ) free(tpmi); if ( NwK ) { free(NwK); NwK = NULL; } #ifdef KL free(dfvec); #endif free(pvec); free(gtvec); free(gpvec); tstats_free(termstats); }
w_rc_t ShoreTPCBEnv::_pad_BRANCHES() { ss_m* db = this->db(); // lock the BRANCHES table branch_t* br = branch_man->table(); std::vector<index_desc_t*>& br_idx = br->get_indexes(); // lock the table and index(es) for exclusive access W_DO(ss_m::lm->intent_vol_lock(br->primary_idx()->stid().vol, okvl_mode::IX)); W_DO(ss_m::lm->intent_store_lock(br->primary_idx()->stid(), okvl_mode::X)); for(size_t i=0; i < br_idx.size(); i++) { W_DO(ss_m::lm->intent_store_lock(br_idx[i]->stid(), okvl_mode::X)); } guard<ats_char_t> pts = new ats_char_t(br->maxsize()); // copy and pad all tuples smaller than 4k // WARNING: this code assumes that existing tuples are packed // densly so that all padded tuples are added after the last // unpadded one bool eof; // we know you can't fit two 4k records on a single page static int const PADDED_SIZE = 4096; array_guard_t<char> padding = new char[PADDED_SIZE]; std::vector<rid_t> hit_list; { table_scan_iter_impl<branch_t>* iter = new table_scan_iter_impl<branch_t>(branch_man->table()); int count = 0; table_row_t row(br); rep_row_t arep(pts); int psize = br->maxsize()+1; W_DO(iter->next(db, eof, row)); while (!eof) { // figure out how big the old record is int bsize = row.size(); if (bsize == psize) { TRACE(TRACE_ALWAYS, "-> Found padded BRANCH record. Stopping search (%d)\n", count); break; } else if (bsize > psize) { // too big... shrink it down to save on logging // handle->truncate_rec(bsize - psize); fprintf(stderr, "+"); // CS: no more pin_i -> do nothing } else { // copy and pad the record (and mark the old one for deletion) rid_t new_rid; vec_t hvec(handle->hdr(), hsize); vec_t dvec(handle->body(), bsize); vec_t pvec(padding, PADDED_SIZE-bsize); W_DO(db->create_rec(br_fid, hvec, PADDED_SIZE, dvec, new_rid)); W_DO(db->append_rec(new_rid, pvec)); // mark the old record for deletion hit_list.push_back(handle->rid()); // update the index(es) vec_t rvec(&row._rid, sizeof(rid_t)); vec_t nrvec(&new_rid, sizeof(new_rid)); for(int i=0; i < br_idx_count; i++) { int key_sz = branch_man()->format_key(br_idx+i, &row, arep); vec_t kvec(arep._dest, key_sz); // destroy the old mapping and replace it with the new // one. If it turns out this is super-slow, we can // look into probing the index with a cursor and // updating it directly. int pnum = _pbranch_man->get_pnum(&br_idx[i], &row); stid_t fid = br_idx[i].fid(pnum); W_DO(db->destroy_assoc(fid, kvec, rvec)); // now put the entry back with the new rid W_DO(db->create_assoc(fid, kvec, nrvec)); } fprintf(stderr, "."); } // next! count++; W_DO(iter->next(db, eof, row)); } TRACE(TRACE_ALWAYS, "padded records added\n"); delete iter; } // delete the old records int hlsize = hit_list.size(); TRACE(TRACE_ALWAYS, "-> Deleting (%d) old BRANCH unpadded records\n", hlsize); for(int i=0; i < hlsize; i++) { W_DO(db->destroy_rec(hit_list[i])); } return (RCOK); }
///////////////////////////////////////////////////////////////////////////////// // calculate the intersection of a sphere the given ray // the ray has an origin and a direction, ray = origin + t*direction // find the t parameter, return true if it is between 0.0 and 1.0, false // otherwise, write the results in following variables: // depth - t \in [0.0 1.0] // posX - x position of intersection point, nothing if no intersection // posY - y position of intersection point, nothing if no intersection // posZ - z position of intersection point, nothing if no intersection // normalX - x component of normal at intersection point, nothing if no intersection // normalX - y component of normal at intersection point, nothing if no intersection // normalX - z component of normal at intersection point, nothing if no intersection // // attention: a sphere has usually two intersection points make sure to return // the one that is closest to the ray's origin and still in the viewing frustum // ///////////////////////////////////////////////////////////////////////////////// bool Sphere::intersect(Ray ray, double *depth, double *posX, double *posY, double *posZ, double *normalX, double *normalY, double *normalZ) { //////////*********** START OF CODE TO CHANGE *******//////////// // from slides: // (cx + t * vx)^2 + (cy + t * vy)^2 + (cz + t * vy)^2 = r^2 // text: // (e+td−c)·(e+td−c)−R2 = 0 // (d·d)t^2 +2d·(e−c)t+(e−c)·(e−c)−R^2 = 0 // d: the direction vector of the ray // e: point at which the ray starts // c: center point of the sphere Vec3 dvec( ray.direction[0], ray.direction[1], ray.direction[2]); Vec3 evec( ray.origin[0], ray.origin[1], ray.origin[2]); Vec3 cvec( this->center[0], this->center[1], this->center[2]); // use the quadratic equation, since we have the form At^2 + Bt + C = 0. double a = dvec.dot(dvec); double b = dvec.scale(2).dot(evec.subtract(cvec)); Vec3 eMinusCvec = evec.subtract(cvec); double c = eMinusCvec.dot(eMinusCvec) - (this->radius * this->radius); // discriminant: b^2 - 4ac double discriminant = (b * b) - (4 * a * c); // From text: If the discriminant is negative, its square root // is imaginary and the line and sphere do not intersect. if (discriminant < 0) { //printf("No intersection with sphere - 1\n"); return false; } else { // there is at least one intersection point double t1 = (-b + sqrt(discriminant)) / (2 * a); double t2 = (-b - sqrt(discriminant)) / (2 * a); double tmin = fminf(t1, t2); double tmax = fmaxf(t1, t2); double t = 0; // t is set to either tmin or tmax (or the function returns false) if (tmin >= 0) { //} && tmin <= 1) { t = tmin; } else if (tmax >= 0) { //} && tmax <= 1) { t = tmax; } else { // return false if neither interestion point is within [0, 1] //printf("No intersection with sphere. t values (%f, %f)\n", t1, t2); return false; } *depth = t; // position: (e + td) Vec3 posvec = dvec.scale(t).add(evec); *posX = posvec[0]; *posY = posvec[1]; *posZ = posvec[2]; // normal: 2(p - c) Vec3 normalvec = posvec.subtract(cvec).scale(2); normalvec.normalize(); *normalX = normalvec[0]; *normalY = normalvec[1]; *normalZ = normalvec[2]; } //////////*********** END OF CODE TO CHANGE *******//////////// //printf("Sphere intersection found (%f, %f, %f) \n", *posX, *posY, *posZ); return true; }
///////////////////////////////////////////////////////////////////////////////// // calculate the intersection of a plane the given ray // the ray has an origin and a direction, ray = origin + t*direction // find the t parameter, return true if it is between 0.0 and 1.0, false // otherwise, write the results in following variables: // depth - t \in [0.0 1.0] // posX - x position of intersection point, nothing if no intersection // posY - y position of intersection point, nothing if no intersection // posZ - z position of intersection point, nothing if no intersection // normalX - x component of normal at intersection point, nothing if no intersection // normalX - y component of normal at intersection point, nothing if no intersection // normalX - z component of normal at intersection point, nothing if no intersection // ///////////////////////////////////////////////////////////////////////////////// bool Plane::intersect(Ray ray, double *depth, double *posX, double *posY, double *posZ, double *normalX, double *normalY, double *normalZ) { //////////*********** START OF CODE TO CHANGE *******//////////// Vec3 evec( ray.origin[0], ray.origin[1], ray.origin[2]); Vec3 nvec( this->params[0], this->params[1], this->params[2]); Vec3 dvec( ray.direction[0], ray.direction[1], ray.direction[2]); double d = this->params[3] * sqrt(nvec[0] * nvec[0] + nvec[1] * nvec[1] + nvec[2] * nvec[2]); double t = -1; double denom = dvec.dot(nvec); if (denom != 0) { t = (-d - (evec.dot(nvec))) / dvec.dot(nvec); } if (t <= 0) { return false; } else { *depth = t; *posX = (dvec[0] * t) + evec[0]; *posY = (dvec[1] * t) + evec[1]; *posZ = (dvec[2] * t) + evec[2]; if (denom > 0) { *normalX = -nvec[0]; *normalY = -nvec[1]; *normalZ = -nvec[2]; } else { *normalX = nvec[0]; *normalY = nvec[1]; *normalZ = nvec[2]; } } //////////*********** END OF CODE TO CHANGE *******//////////// //printf("dvec[0], dvec[1], dvec[2]: (%f, %f, %f) \n", dvec[0], dvec[1], dvec[2]); //printf("evec[0], evec[1], evec[2]: (%f, %f, %f) \n", evec[0], evec[1], evec[2]); //printf("Plane interesection at t:%f (%f, %f, %f)\n", t, *posX, *posY, *posZ); return true; }
w_rc_t ShoreTPCCEnv::_post_init_impl() { #ifndef CFG_HACK return (RCOK); #endif TRACE (TRACE_ALWAYS, "Padding WAREHOUSES"); ss_m* db = this->db(); // lock the WH table warehouse_t* wh = warehouse_desc(); index_desc_t* idx = wh->indexes(); int icount = wh->index_count(); stid_t wh_fid = wh->fid(); // lock the table and index(es) for exclusive access W_DO(db->lock(wh_fid, EX)); for(int i=0; i < icount; i++) { for(int j=0; j < idx[i].get_partition_count(); j++) W_DO(db->lock(idx[i].fid(j), EX)); } guard<ats_char_t> pts = new ats_char_t(wh->maxsize()); /* copy and pad all tuples smaller than 4k WARNING: this code assumes that existing tuples are packed densly so that all padded tuples are added after the last unpadded one */ bool eof; static int const PADDED_SIZE = 4096; // we know you can't fit two 4k records on a single page array_guard_t<char> padding = new char[PADDED_SIZE]; std::vector<rid_t> hit_list; { guard<warehouse_man_impl::table_iter> iter; { warehouse_man_impl::table_iter* tmp; W_DO(warehouse_man()->get_iter_for_file_scan(db, tmp)); iter = tmp; } int count = 0; table_row_t row(wh); rep_row_t arep(pts); int psize = wh->maxsize()+1; W_DO(iter->next(db, eof, row)); while (1) { pin_i* handle = iter->cursor(); if (!handle) { TRACE(TRACE_ALWAYS, " -> Reached EOF. Search complete (%d)\n", count); break; } // figure out how big the old record is int hsize = handle->hdr_size(); int bsize = handle->body_size(); if (bsize == psize) { TRACE(TRACE_ALWAYS, " -> Found padded WH record. Stopping search (%d)\n", count); break; } else if (bsize > psize) { // too big... shrink it down to save on logging handle->truncate_rec(bsize - psize); fprintf(stderr, "+"); } else { // copy and pad the record (and mark the old one for deletion) rid_t new_rid; vec_t hvec(handle->hdr(), hsize); vec_t dvec(handle->body(), bsize); vec_t pvec(padding, PADDED_SIZE-bsize); W_DO(db->create_rec(wh_fid, hvec, PADDED_SIZE, dvec, new_rid)); W_DO(db->append_rec(new_rid, pvec)); // for small databases, first padded record fits on this page if (not handle->up_to_date()) handle->repin(); // mark the old record for deletion hit_list.push_back(handle->rid()); // update the index(es) vec_t rvec(&row._rid, sizeof(rid_t)); vec_t nrvec(&new_rid, sizeof(new_rid)); for(int i=0; i < icount; i++) { int key_sz = warehouse_man()->format_key(idx+i, &row, arep); vec_t kvec(arep._dest, key_sz); /* destroy the old mapping and replace it with the new one. If it turns out this is super-slow, we can look into probing the index with a cursor and updating it directly. */ int pnum = _pwarehouse_man->get_pnum(&idx[i], &row); stid_t fid = idx[i].fid(pnum); if(idx[i].is_mr()) { W_DO(db->destroy_mr_assoc(fid, kvec, rvec)); // now put the entry back with the new rid el_filler ef; ef._el.put(nrvec); W_DO(db->create_mr_assoc(fid, kvec, ef)); } else { W_DO(db->destroy_assoc(fid, kvec, rvec)); // now put the entry back with the new rid W_DO(db->create_assoc(fid, kvec, nrvec)); } } fprintf(stderr, "."); } // next! count++; W_DO(iter->next(db, eof, row)); } fprintf(stderr, "\n"); // put the iter out of scope } // delete the old records int hlsize = hit_list.size(); TRACE(TRACE_ALWAYS, "-> Deleting (%d) old unpadded records\n", hlsize); for(int i=0; i < hlsize; i++) { W_DO(db->destroy_rec(hit_list[i])); } return (RCOK); }