Ejemplo n.º 1
0
int notmain(void)
{
  int i,j,k;
  Matrix_3x3 id;
  Matrix_3x3 invid;
  Vector vec(1,2,3);
  Vector res, res2;

  for(i=0;i<3;i++) for(j=0;j<3;j++) id.set(i,j,(float)((i*4)+j));

  for(k=0;k<100;k++)
    {
      for(i=0;i<3;i++) //col
	for(j=0;j<3;j++) //lin
	  {
	    /*	    if(i<j)
	      id.set(i,j,0.0);
	      else*/
	      id.set(i,j, (float)(  (((i*j*k)%7 + ((k+1)%3) + (i+j))+1) * (((k+i+j)%2)==0 ? -1 : 1) ));
	  };
      printf("\n\nTEST #%d\n",k);
      if(id.is_invertable())
	{
	  printf("Det==%g\n",id.det());
	  res=Vector(1.0,1.0,1.0);
	  res=id.solve(res);
	  printf("solve erg = %g,%g,%g\n",res[0],res[1],res[2]);
	  res2=id*res;
	  printf("solve test = %g,%g,%g\n",res2[0],res2[1],res2[2]);

	  id.dump("   M");
	  invid=id.invert();
	  invid.dump("M^-1");
	  
	  (id*invid).dump("mul");
	};
    };


  id.dump("id");

  printf("vec = ");
  dvec(vec);
  printf("\n");

  res=id*vec;

  printf("res = ");
  dvec(res);
  printf("\n");

  return 0;
}
Ejemplo n.º 2
0
void resample_chain (int N, int W, int T, double alpha, double beta, int *w, int *d, int *z, int **Nwt, int **Ndt, int *Nt) //
{
	int i, t;
	double totprob, U, cumprob;
	double *prob = dvec(T);
	double wbeta = W*beta;

	for (i = 0; i < N; i++) {

		t = z[i];
		Ndt[d[i]][t]--;
		totprob = 0;

		for (t = 0; t < T; t++) {
			prob[t] = (Nwt[w[i]][t] + beta)/(Nt[t] + wbeta)*(Ndt[d[i]][t] + alpha);
			totprob += prob[t];
		}

		U = drand48()*totprob;
		cumprob = prob[0];
		t = 0;

		// sample a topic t from the distribution
		while (U>cumprob) {
			t++;
			cumprob += prob[t];
		}

		z[i] = t;
		Ndt[d[i]][t]++;
	}

	free(prob);
}
Ejemplo n.º 3
0
void loglike (int N, int W, int D, int T, double alpha, double beta, int *w, int *d, int **Nwt, int **Ndt, int *Nt, int *Nd) //
{
	int    i, j, t;
	double llike;
	static int init = 0;
	static double **prob_w_given_t;
	static double **prob_t_given_d;
	static double *Nd_;
	double Nt_;

	if (init==0) {
		init = 1;
		prob_w_given_t = dmat(W,T);
		prob_t_given_d = dmat(D,T);
		Nd_ = dvec(D);
		for (j = 0; j < D; j++) Nd_[j] = Nd[j] + T*alpha;
	}

	for (t = 0; t < T; t++) {
		Nt_ = Nt[t] + W*beta;
		for (i = 0; i < W; i++) prob_w_given_t[i][t] = (Nwt[i][t]+beta) / Nt_;
		for (j = 0; j < D; j++) prob_t_given_d[j][t] = (Ndt[j][t]+alpha)/ Nd_[j];
	}

	llike = 0;
	for (i = 0; i < N; i++)
		llike += log(ddot(T, prob_w_given_t[w[i]], prob_t_given_d[d[i]]));

	printf(">>> llike = %.6e    ", llike);
	printf("pplex = %.4f\n", exp(-llike/N));
}
Ejemplo n.º 4
0
double pplex_d(int N, int W, int T, int *w, int *d, double **Nwt, double **Ndt) //
{
	int i, t;
	double mypplex, llike=0, p1, p2, Z, pwd;
	double *zwt = dvec(T);

	for (t = 0; t < T; t++)
		for (zwt[t] = 0, i = 0; i < W; i++)
			zwt[t] += Nwt[i][t];

	for (i = 0; i < N; i++) {
		Z = pwd = 0;
		for (t = 0; t < T; t++) {
			p1 = Nwt[w[i]][t];
			p2 = Ndt[d[i]][t];
			Z += p2;
			pwd += p1 * p2 / zwt[t];
		}
		llike += log( pwd / Z );
	}

	mypplex = exp(-llike / N);

	return mypplex;
}
Ejemplo n.º 5
0
dmatrix3 ConvLayer::think(dmatrix3 mat)
{
    dmatrix3 slab(mat.size(), dmatrix2(Fshape[1], dvec(Fshape[0])));
    ivec step(4);
    dvec exc(OutShape[1]*OutShape[2]);
    dvec act(OutShape[1]*OutShape[2]);
       
    ivec foldshape(2);
    foldshape[0] = OutShape[1];
    foldshape[1] = OutShape[2];
    
    Inputs = &mat;
    
    for(int f=0;f<Filters.size();f++) {
        dmatrix3 filt = Filters[f];
        for(int i=0;i<Steps.size();i++) {
            step = Steps[i];
            slab = invert<real>(slice<real>(invert<real>(mat), step));
            exc[i] = frobenius(slab, filt); // This is the "convolve" step
            act[i] = sigmoid(exc[exc.size()-1]);
        }
        Excitations[f] = fold2<real>(exc, foldshape);
        Activations[f] = fold2<real>(act, foldshape);
    }
    return Activations;
}
Ejemplo n.º 6
0
CV_IMPL void
cvSplit( const void* srcarr, void* dstarr0, void* dstarr1, void* dstarr2, void* dstarr3 )
{
    void* dptrs[] = { dstarr0, dstarr1, dstarr2, dstarr3 };
    cv::Mat src = cv::cvarrToMat(srcarr);
    int i, j, nz = 0;
    for( i = 0; i < 4; i++ )
        nz += dptrs[i] != 0;
    CV_Assert( nz > 0 );
    std::vector<cv::Mat> dvec(nz);
    std::vector<int> pairs(nz*2);

    for( i = j = 0; i < 4; i++ )
    {
        if( dptrs[i] != 0 )
        {
            dvec[j] = cv::cvarrToMat(dptrs[i]);
            CV_Assert( dvec[j].size() == src.size() );
            CV_Assert( dvec[j].depth() == src.depth() );
            CV_Assert( dvec[j].channels() == 1 );
            CV_Assert( i < src.channels() );
            pairs[j*2] = i;
            pairs[j*2+1] = j;
            j++;
        }
    }
    if( nz == src.channels() )
        cv::split( src, dvec );
    else
    {
        cv::mixChannels( &src, 1, &dvec[0], nz, &pairs[0], nz );
    }
}
Ejemplo n.º 7
0
dmatrix3 ConvLayer::backpropagation() const
{
    dmatrix3 outputs(Excitations.size(), dmatrix2
                    (Excitations[0].size(), dvec
                    (Excitations[0][0].size(), 0.0)));
    ivec step;
    step.reserve(4);
    
    int index;
    
    for(int z=0;z<Errors.size();z++) {
        index = 0;
        for(int y=0;y<Errors[0].size();y++) {
            for(int x=0;x<Errors[0][0].size();x++, index++) {
                step = Steps[index];
                for(int i=step[0];i<step[1];i++) {
                    for(int j=step[2];j<step[3];j++) {
                        outputs[z][i][j] += sigmoid_p(
                                Excitations[z][i][j] *
                                Errors[z][y][x]);
                    }
                }
            }
        }
    }
    return outputs;
}
Ejemplo n.º 8
0
void oversample_Ndt (int N, int W, int T, double alpha, double beta, int *w, int *d, int *z, int **Nwt, int **Ndt, int *Nt) //
{
	int i, t, k, ntimes=4;
	double totprob, U, cumprob;
	double *prob = dvec(T);
	double wbeta = W*beta;

	for (i = 0; i < N; i++) {

		totprob = 0;
		for (t = 0; t < T; t++) {
			prob[t] = (Nwt[w[i]][t] + beta)/(Nt[t] + wbeta)*(Ndt[d[i]][t] + alpha);
			totprob += prob[t];
		}

		for (k = 0; k < ntimes; k++) {
			U = drand48()*totprob;
			cumprob = prob[0];
			t = 0;
			while (U>cumprob) {
				t++;
				cumprob += prob[t];
			}
			Ndt[d[i]][t]++;
		}
	}

	free(prob);
}
Ejemplo n.º 9
0
exp_vector_t degree_vector(ex e, const exvector& vars)
{
	e = e.expand();
	exp_vector_t dvec(vars.size());
	for (std::size_t i = vars.size(); i-- != 0; ) {
		const int deg_i = e.degree(vars[i]);
		e = e.coeff(vars[i], deg_i);
		dvec[i] = deg_i;
	}
	return dvec;
}
Ejemplo n.º 10
0
/**-------------------------------------------------
 * Make a quadrature given a Polynomial.
 * @param P :: A polynomial to use to make the quadrature.
 */
void MakeQuadrature::makeQuadrature(const Polynomial& P)
{
  auto& r = P.getRoots();
  auto& w = P.getWeights();
  const size_t n = r.size();

  auto quad = new Quadrature;
  quad->setRowCount( int(n) );
  quad->addDoubleColumn("r", API::NumericColumn::X);
  auto& rc = quad->getDoubleData("r");
  rc = r;
  quad->addDoubleColumn("w", API::NumericColumn::Y);
  auto& wc = quad->getDoubleData("w");
  wc = w;

  FunctionDomain1DView domain( r );
  FunctionValues values( domain );
  std::vector<double> wgt;
  std::vector<double> wgtDeriv;
  P.weightFunction()->function( domain, values );
  values.copyToStdVector( wgt );
  P.weightDerivative()->function( domain, values );
  values.copyToStdVector( wgtDeriv );

  quad->addDoubleColumn("weight", API::NumericColumn::Y);
  auto& wgtc = quad->getDoubleData("weight");
  wgtc = wgt;
  quad->addDoubleColumn("deriv", API::NumericColumn::Y);
  auto& derc = quad->getDoubleData("deriv");
  derc = wgtDeriv;

  Quadrature::FuncVector fvec( n );
  Quadrature::FuncVector dvec( n );
  for(size_t i = 0; i < n; ++i)
  {
    std::string colInd = boost::lexical_cast<std::string>( i );
    quad->addDoubleColumn("f"+colInd, API::NumericColumn::Y);
    fvec[i] = &quad->getDoubleData("f"+colInd);
    quad->addDoubleColumn("d"+colInd, API::NumericColumn::Y);
    dvec[i] = &quad->getDoubleData("d"+colInd);
  }
  P.calcPolyValues( fvec, dvec );
  quad->init();

  setClassProperty( "Quadrature", API::TableWorkspace_ptr( quad ) );
  {
    const double startX = get("StartX");
    const double endX = get("EndX");
    ChebfunWorkspace_sptr cheb( new ChebfunWorkspace(chebfun( 100, startX, endX )) );
    cheb->fun().fit( P );
    setClassProperty("ChebWorkspace", cheb);
  }
}
Ejemplo n.º 11
0
ConvLayer::ConvLayer(int filters, ivec inshape, ivec fshape, int stride,
                     ConvNet* net)
{
    InShape  = inshape;
    Stride   = stride;
    Fshape   = fshape;
    OutShape = outshape(InShape, Fshape, Stride, filters);
    Steps    = calcsteps(InShape, Fshape, Stride, filters);
            
    dmatrix3 refE(OutShape[0], dmatrix2(OutShape[1], dvec(OutShape[2], 0.0)));
    refE.swap(Excitations);
    dmatrix3 refA(OutShape[0], dmatrix2(OutShape[1], dvec(OutShape[2], 0.0)));
    refA.swap(Activations);
    dmatrix3 refErr(OutShape[0],dmatrix2(OutShape[1],dvec(OutShape[2], 0.0)));
    refErr.swap(Errors);
    dmatrix4 flt(filters,dmatrix3(InShape[0],
                 dmatrix2(Fshape[0],dvec(Fshape[1], 0.5))));
    flt.swap(Filters);
    
    Brain = net;
}
Ejemplo n.º 12
0
int main(int argc, char const *argv[])
{
	std::vector<int> ivec(9, 8);
	std::vector<double> dvec(8, 9.9);
	std::vector<char> cvec(7, 'h');
	std::cout << count(ivec, 8) << std::endl;
	std::cout << count(dvec, 9.9) << std::endl;
	std::cout << count(cvec, 'h') << std::endl;

	std::vector<std::string> svec(6, "hey");
	std::cout << count(svec, std::string("hey")) << std::endl;
	return 0;
}
Ejemplo n.º 13
0
int *dsort(int n, double *x) //
{
	int *indx = ivec(n);
	int i;
	dcomp_vec = dvec(n);
	for (i = 0; i < n; i++) {
		dcomp_vec[i] = -x[i];
		indx[i] = i;
	}
	qsort(indx,n,sizeof(int),dcomp);
	free(dcomp_vec);
	return indx;
}
Ejemplo n.º 14
0
void sample_chain_with_prior (int N, int W, int T, int *w, int *d, int *z, double **Nwt, double **Ndt, double *Nt, int *order, double **prior_Nwt) //
{
	int ii, i, t;
	double totprob, U, cumprob;
	double *prob = dvec(T);
	int wid, did;
	double *word_vec;
	double *doc_vec;
	double *prior_word_vec;

	for (ii = 0; ii < N; ii++) {

		i = order[ ii ];

		wid = w[i];
		did = d[i];

		word_vec = Nwt[wid];
		doc_vec  = Ndt[did];
		prior_word_vec = prior_Nwt[wid];

		t = z[i];
		Nt[t]--;
		word_vec[t]--;
		doc_vec[t]--;
		totprob = 0;

		for (t = 0; t < T; t++) {
			prob[t] = doc_vec[t] * (word_vec[t] + prior_word_vec[t]) / Nt[t];
			totprob += prob[t];
		}

		// U = drand48()*totprob;
		U = sample_uniform() * totprob;
		cumprob = prob[0];
		t = 0;
		while (U>cumprob) {
			t++;
			cumprob += prob[t];
		}

		z[i] = t;
		word_vec[t]++;
		doc_vec[t]++;
		Nt[t]++;
	}

	free(prob);
}
Ejemplo n.º 15
0
/*------------------------------------------
* sample_chain_alpha
*------------------------------------------ */
void sample_chain_alpha (int N, int W, int T, double *alpha, double beta, int *w, int *d, int *z, int **Nwt, int **Ndt, int *Nt, int *order) //
{
	int ii, i, t;
	double totprob, U, cumprob;
	double *prob = dvec(T);
	double wbeta = W*beta;
	int wid, did;
	int *word_vec;
	int *doc_vec;

	for (ii = 0; ii < N; ii++) {

		i = order[ ii ];

		wid = w[i];
		did = d[i];

		word_vec = Nwt[wid];
		doc_vec  = Ndt[did];

		t = z[i];      // take the current topic assignment to word token i
		Nt[t]--;     // and substract that from the counts
		word_vec[t]--;
		doc_vec[t]--;
		totprob = 0;

		for (t = 0; t < T; t++) {
			prob[t] = (doc_vec[t] + alpha[t]) * (word_vec[t] + beta) / (Nt[t] + wbeta);
			totprob += prob[t];
		}

		U = drand48()*totprob;
		cumprob = prob[0];
		t = 0;

		// sample a topic t from the distribution
		while (U>cumprob) {
			t++;
			cumprob += prob[t];
		}

		z[i] = t;      // assign current word token i to topic t
		word_vec[t]++; // and update counts
		doc_vec[t]++;
		Nt[t]++;
	}

	free(prob);
}
Ejemplo n.º 16
0
void benchmark_convert_type ()
{
    const size_t size = 10000000;
    const S testval(1.0);
    std::vector<S> svec (size, testval);
    std::vector<D> dvec (size);
    std::cout << Strutil::format("Benchmark conversion of %6s -> %6s : ",
                                 TypeDesc(BaseTypeFromC<S>::value),
                                 TypeDesc(BaseTypeFromC<D>::value));
    float time = time_trial (bind (do_convert_type<S,D>, OIIO::cref(svec), OIIO::ref(dvec)),
                             ntrials, iterations) / iterations;
    std::cout << Strutil::format ("%7.1f Mvals/sec", (size/1.0e6)/time) << std::endl;
    D r = convert_type<S,D>(testval);
    OIIO_CHECK_EQUAL (dvec[size-1], r);
}
Ejemplo n.º 17
0
/*------------------------------------------
* sample_chain
*------------------------------------------ */
void sample_chain (int N, int W, int T, double alpha, double beta, int *w, int *d, int *z, int **Nwt, int **Ndt, int *Nt, int *order) //
{
	int ii, i, t;
	double totprob, U, cumprob;
	double *prob = dvec(T);
	double wbeta = W*beta;
	int wid, did;
	int *word_vec;
	int *doc_vec;

	for (ii = 0; ii < N; ii++) {

		i = order[ ii ];

		wid = w[i];
		did = d[i];

		word_vec = Nwt[wid];
		doc_vec  = Ndt[did];

		t = z[i];
		Nt[t]--;
		word_vec[t]--;
		doc_vec[t]--;
		totprob = 0;

		for (t = 0; t < T; t++) {
			prob[t] = (doc_vec[t] + alpha) * (word_vec[t] + beta) / (Nt[t] + wbeta);
			totprob += prob[t];
		}

		U = drand48()*totprob;
		cumprob = prob[0];
		t = 0;
		while (U>cumprob) {
			t++;
			cumprob += prob[t];
		}

		z[i] = t;
		word_vec[t]++;
		doc_vec[t]++;
		Nt[t]++;
	}

	free(prob);
}
Ejemplo n.º 18
0
  MM::MultinomialModel(const StringVec &names)
    : ParamPolicy(new VectorParams(1)),
      DataPolicy(new MS(1)),
      ConjPriorPolicy(),
      logp_current_(false)
  {
    std::vector<Ptr<CD> >
      dvec(make_catdat_ptrs(names));

    uint nlev= dvec[0]->nlevels();
    Vec probs(nlev, 1.0/nlev);
    set_pi(probs);

    set_data(dvec);
    mle();
    set_observer();
  }
				virtual void dispatch(libmaus2::parallel::SimpleThreadWorkPackage * P, libmaus2::parallel::SimpleThreadPoolInterfaceEnqueTermInterface & /* tpi */)
				{
					FragReadEndsMergeWorkPackage * BP = dynamic_cast<FragReadEndsMergeWorkPackage *>(P);
					assert ( BP );

					ReadEndsBlockIndexSet fragindexset(*(BP->REQ.MI));
					libmaus2::bambam::DupSetCallbackSharedVector dvec(*(BP->REQ.dupbitvec));
							
					fragindexset.merge(
						BP->REQ.SMI,
						libmaus2::bambam::DupMarkBase::isDupFrag,
						libmaus2::bambam::DupMarkBase::markDuplicateFrags,dvec
					);
									
					addDuplicationMetricsInterface.addDuplicationMetrics(dvec.metrics);
					
					mergeFinishedInterface.fragReadEndsMergeWorkPackageFinished(BP);
					packageReturnInterface.fragReadEndsMergeWorkPackageReturn(BP);
				}
Ejemplo n.º 20
0
/*------------------------------------------
* sample_chain_rank
*------------------------------------------ */
void sample_chain_rank (int N, int W, int T, double alpha, double beta, int *w, int *d, int *drank, int *z, int **Nwt, int **Ndt, int *Nt, int *order) //
{
	int ii, i, t;
	double totprob, U, cumprob;
	double *prob = dvec(T);
	double wbeta = W*beta;

	for (ii = 0; ii < N; ii++) {

		i = order[ ii ];

		t = z[i];      // take the current topic assignment to word token i
		Nt[t] -= drank[d[i]];
		Nwt[w[i]][t] -= drank[d[i]];
		Ndt[d[i]][t] -= drank[d[i]];
		totprob = 0;

		for (t = 0; t < T; t++) {
			prob[t] = (Nwt[w[i]][t] + beta)/(Nt[t]+  wbeta)*(Ndt[d[i]][t]+  alpha);
			totprob += prob[t];
		}

		U = drand48()*totprob;
		cumprob = prob[0];
		t = 0;

		// sample a topic t from the distribution
		while (U>cumprob) {
			t++;
			cumprob += prob[t];
		}

		z[i] = t;      // assign current word token i to topic t
		Nwt[w[i]][t] += drank[d[i]];
		Ndt[d[i]][t] += drank[d[i]];
		Nt[t] += drank[d[i]];
	}

	free(prob);
}
Ejemplo n.º 21
0
int main()
{
  FILE *fin,*fout;
  double **a,*b;
  int i;
  //open file//
  a =dmatrix(1,N,1,N);
  b =dvec(1,N);

  fin = fopen("input.dat","r");
  if (fin ==NULL)
    {
      printf("Can't find file\n");
      exit(1);
    }
  fout = fopen("output.dat","w");
  if(fout == NULL)
    {
      printf("Can't make file\n");
      exit(1);
    }
  input_matrix(a,'A',fin,fout);
  input_vec(b,'b',fin,fout);
  //  printf("%lf",a[1][1]);
  b =simple_gauss(a,b);

  //output results//
  fprintf(fout,"Ax=bの計算結果は次の通り\n");
  for(i = 1;i <= N; i++)
    {
      fprintf(fout,"%f\n",b[i]);
    }
  
  fclose(fin);fclose(fout);

  //  free_dmatrix(a,1,N,1,N);free_dvec(b,1);
  return(0);
}
Ejemplo n.º 22
0
void sample_chain0 (int N, int W, int T, double alpha, double beta, int *w, int *d, int *z, int **Nwt, int **Ndt, int *Nt) //
{
	int i, t;
	double totprob, U, cumprob;
	double *prob = dvec(T);
	double wbeta = W*beta;

	for (i = 0; i < N; i++) {

		t = z[i];      // take the current topic assignment to word token i
		Nt[t]--;     // and substract that from the counts
		Nwt[w[i]][t]--;
		Ndt[d[i]][t]--;

		for (t = 0, totprob = 0.0; t < T; t++) {
			prob[t] = (Ndt[d[i]][t] + alpha) * (Nwt[w[i]][t] + beta) / (Nt[t] + wbeta);
			totprob += prob[t];
		}


		U = drand48()*totprob;
		cumprob = prob[0];
		t = 0;

		// sample a topic t from the distribution
		while (U>cumprob) {
			t++;
			cumprob += prob[t];
		}

		z[i] = t;      // assign current word token i to topic t
		Nwt[w[i]][t]++; // and update counts
		Ndt[d[i]][t]++;
		Nt[t]++;
	}

	free(prob);  
}
Ejemplo n.º 23
0
// [[Rcpp::export]]
SEXP hpbcpp(SEXP eta,
            SEXP beta,
            SEXP doc_ct,
            SEXP mu,
            SEXP siginv,
            SEXP sigmaentropy){
 
   Rcpp::NumericVector etav(eta); 
   arma::vec etas(etav.begin(), etav.size(), false);
   Rcpp::NumericMatrix betam(beta);
   arma::mat betas(betam.begin(), betam.nrow(), betam.ncol());
   Rcpp::NumericVector doc_ctv(doc_ct);
   arma::vec doc_cts(doc_ctv.begin(), doc_ctv.size(), false);
   Rcpp::NumericVector muv(mu);
   arma::vec mus(muv.begin(), muv.size(), false);
   Rcpp::NumericMatrix siginvm(siginv);
   arma::mat siginvs(siginvm.begin(), siginvm.nrow(), siginvm.ncol(), false);
   Rcpp::NumericVector sigmaentropym(sigmaentropy);
   arma::vec entropy(sigmaentropym);

   //Performance Nots from 3/6/2015
   //  I tried a few different variants and benchmarked this one as roughly twice as
   //  fast as the R code for a K=100 problem.  Key to performance was not creating
   //  too many objects and being selective in how things were flagged as triangular.
   //  Some additional notes in the code below.
   //
   //  Some things this doesn't have or I haven't tried
   //  - I didn't tweak the arguments much.  sigmaentropy is a double, and I'm still
   //    passing beta in the same way.  I tried doing a ", false" for beta but it didn't
   //    change much so I left it the same as in gradient.  
   //  - I tried treating the factors for doc_cts and colSums(EB) as a diagonal matrix- much slower.
   
   //  Haven't Tried/Done
   //  - each_row() might be much slower (not sure but arma is column order).  Maybe transpose in place?
   //  - depending on costs there are some really minor calculations that could be precomputed: 
   //     - sum(doc_ct)
   //     - sqrt(doc_ct)
   
   //  More on passing by reference here:
   //  - Hypothetically we could alter beta (because hessian is last thing we do) however down
   //    the road we may want to explore treating nonPD hessians by optimization at which point
   //    we would need it again.
   
   arma::colvec expeta(etas.size()+1); 
   expeta.fill(1);
   int neta = etas.size(); 
   for(int j=0; j <neta;  j++){
     expeta(j) = exp(etas(j));
   }
   arma::vec theta = expeta/sum(expeta);

   //create a new version of the matrix so we can mess with it
   arma::mat EB(betam.begin(), betam.nrow(), betam.ncol());
   //multiply each column by expeta
   EB.each_col() %= expeta; //this should be fastest as its column-major ordering
  
   //divide out by the column sums
   EB.each_row() %= arma::trans(sqrt(doc_cts))/sum(EB,0);
    
   //Combine the pieces of the Hessian which are matrices
   arma::mat hess = EB*EB.t() - sum(doc_cts)*(theta*theta.t());
  
   //we don't need EB any more so we turn it into phi
   EB.each_row() %= arma::trans(sqrt(doc_cts));
   
   //Now alter just the diagonal of the Hessian
   hess.diag() -= sum(EB,1) - sum(doc_cts)*theta;
   //Drop the last row and column
   hess.shed_row(neta);
   hess.shed_col(neta);
   //Now we can add in siginv
   hess = hess + siginvs;
   //At this point the Hessian is complete.
   
   //This next bit of code is from http://arma.sourceforge.net/docs.html#logging
   //It basically keeps arma from printing errors from chol to the console.
   std::ostream nullstream(0);
   arma::set_stream_err2(nullstream);
   
   ////
   //Invert via cholesky decomposition
   ////
   //Start by initializing an object
   arma::mat nu = arma::mat(hess.n_rows, hess.n_rows);
   //This version of chol generates a boolean which tells us if it failed.
   bool worked = arma::chol(nu,hess);
   if(!worked) {
     //It failed!  Oh Nos.
     // So the matrix wasn't positive definite.  In practice this means that it hasn't
     // converged probably along some minor aspect of the dimension.
     
     //Here we make it positive definite through diagonal dominance
     arma::vec dvec = hess.diag();
     //find the magnitude of the diagonal 
     arma::vec magnitudes = sum(abs(hess), 1) - abs(dvec);
     //iterate over each row and set the minimum value of the diagonal to be the magnitude of the other terms
     int Km1 = dvec.size();
     for(int j=0; j < Km1;  j++){
       if(arma::as_scalar(dvec(j)) < arma::as_scalar(magnitudes(j))) dvec(j) = magnitudes(j); //enforce diagonal dominance 
     }
     //overwrite the diagonal of the hessian with our new object
     hess.diag() = dvec;
     //that was sufficient to ensure positive definiteness so we now do cholesky
     nu = arma::chol(hess);
   }
   //compute 1/2 the determinant from the cholesky decomposition
   double detTerm = -sum(log(nu.diag()));
   
   //Now finish constructing nu
   nu = arma::inv(arma::trimatu(nu));
   nu = nu * nu.t(); //trimatu doesn't do anything for multiplication so it would just be timesink to signal here.
   
   //Precompute the difference since we use it twice
   arma::vec diff = etas - mus;
   //Now generate the bound and make it a scalar
   double bound = arma::as_scalar(log(arma::trans(theta)*betas)*doc_cts + detTerm - .5*diff.t()*siginvs*diff - entropy); 
   
   // Generate a return list that mimics the R output
   return Rcpp::List::create(
        Rcpp::Named("phis") = EB,
        Rcpp::Named("eta") = Rcpp::List::create(Rcpp::Named("lambda")=etas, Rcpp::Named("nu")=nu),
        Rcpp::Named("bound") = bound
        );
}
void ATMBP(double ALPHA, double BETA, int W, int J, int D, int A, int MA, int NN, int OUTPUT, 
	mwIndex *irwd, mwIndex *jcwd, double *srwd, mwIndex *irad, mwIndex *jcad, 
	double *muz, double *mux, double *phi, double *theta, int startcond) 
{ 		
	int wi, di, ai, i, j, a, topic, iter;
	double xi, totprob, probs, WBETA = (double) (W*BETA), JALPHA = (double) (J*ALPHA);
	double *thetad, *phitot, *xprob, *zprob;

	phitot = dvec(J);

	for (wi=0; wi<W; wi++) {
		for (j=0; j<J; j++) {
			phitot[j] += phi[wi*J + j];
		}
	}

	thetad = dvec(A);
	xprob = dvec(MA);
	zprob = dvec(J);

	if (startcond==1) {
		/* start from previous state */
		for (di=0; di<D; di++) {
			for (i=jcwd[di]; i<jcwd[di + 1]; i++) {
				wi = (int) irwd[i];
				xi = srwd[i];
				for (j=0; j<J; j++) {
					for (a=0; a<(jcad[di+1] - jcad[di]); a++) {
						ai = (int) irad[jcad[di] + a];
						theta[ai*J + j] += xi*muz[i*J + j]*mux[i*MA + a]; // increment theta count matrix
						thetad[ai] += xi*muz[i*J + j]*mux[i*MA + a];
					}
				}
			}
		}
	}

	if (startcond==0) {
		/* random initialization */
		if (OUTPUT==2) mexPrintf( "Starting Random initialization\n" );
		for (di=0; di<D; di++) {
			for (i=jcwd[di]; i<jcwd[di + 1]; i++) {
				wi = (int) irwd[i];
				xi = srwd[i];
				// pick a random topic 0..J-1
				topic = (int) (J*drand());
				muz[i*J + topic] = (double) 1; // assign this word token to this topic
				/* pick a random number between jcad[di + 1] and jcad[di] */
				a = (int) ((jcad[di + 1] - jcad[di])*drand());
				ai = (int) irad[jcad[di] + a]; // assign this word to this author
				mux[i*MA + a] = (double) 1;
				// update counts for this author
				theta[ai*J + topic] += xi; // increment theta count matrix
				thetad[ai] += xi;
			}
		}
	}

	for (iter=0; iter<NN; iter++) {

		if (OUTPUT >=1) {
			if ((iter % 10)==0) mexPrintf( "\tIteration %d of %d\n" , iter , NN );
			if ((iter % 10)==0) mexEvalString("drawnow;");
		}

		for (di=0; di<D; di++) {
			for (i=jcwd[di]; i<jcwd[di + 1]; i++) {
				wi = (int) irwd[i]; // current word index 
				xi = srwd[i]; // current word counts
				// message
				for (a=0; a<(jcad[di + 1]-jcad[di]); a++) xprob[a] = (double) 0;
				for (j=0; j<J; j++) zprob[j] = (double) 0;
				totprob = (double) 0;
				for (a=0; a<(jcad[di + 1]-jcad[di]); a++) {
					ai = (int) irad[jcad[di] + a]; // current author index under consideration					
					for (j=0; j<J; j++) {	  
						// probs contains the (unnormalized) probability of assigning this word token to topic j and author ai
						probs = ((double) phi[wi*J + j] + (double) BETA) / 
							((double) phitot[j] + (double) WBETA) *	             
							((double) theta[ai*J + j] - (double) xi*muz[i*J + j]*mux[i*MA + a] + (double) ALPHA) /
							((double) thetad[ai] - (double) xi*mux[i*MA + a] + (double) JALPHA);
						xprob[a] += probs;
						zprob[j] += probs;
						totprob += probs;
					}
				}
				for (a=0; a<(jcad[di + 1]-jcad[di]); a++) {
					mux[i*MA + a] = xprob[a]/totprob;
				}
				for (j=0; j<J; j++) {
					muz[i*J + j] = zprob[j]/totprob;
				}
			}
		}

		/* clear phi, theta, thetad and phitot */
		for (i=0; i<J*A; i++) theta[i] = (double) 0;
		for (i=0; i<A; i++) thetad[i] = (double) 0;

		// update parameters
		for (di=0; di<D; di++) {
			for (i=jcwd[di]; i<jcwd[di + 1]; i++) {
				wi = (int) irwd[i];
				xi = srwd[i];
				for (j=0; j<J; j++) {
					for (a=0; a<(jcad[di+1] - jcad[di]); a++) {
						ai = (int) irad[jcad[di] + a];
						theta[ai*J + j] += xi*muz[i*J + j]*mux[i*MA + a]; // increment theta count matrix
						thetad[ai] += xi*muz[i*J + j]*mux[i*MA + a];
					}
				}
			}
		}
	}
}
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) 
{
	double *srwd, *srad, *MUZIN, *MUXIN, *theta, *phi, *thetad, *muz, *mux;
	double ALPHA,BETA;	
	int W, J, D, A, MA = 0, NN, SEED, OUTPUT, nzmaxwd, nzmaxad, i, j, a, startcond;
	mwIndex *irwd, *jcwd, *irad, *jcad;

	/* Check for proper number of arguments. */
	if (nrhs < 8) {
		mexErrMsgTxt("At least 8 input arguments required");
	} else if (nlhs < 1) {
		mexErrMsgTxt("At least 1 output arguments required");
	}

	startcond = 0;
	if (nrhs > 8) startcond = 1;

	/* dealing with sparse array WD */
	if (mxIsDouble(prhs[0]) != 1) mexErrMsgTxt("WD must be a double precision matrix");
	srwd = mxGetPr(prhs[0]);
	irwd = mxGetIr(prhs[0]);
	jcwd = mxGetJc(prhs[0]);
	nzmaxwd = (int) mxGetNzmax(prhs[0]);
	W = (int) mxGetM(prhs[0]);
	D = (int) mxGetN(prhs[0]);

	/* dealing with sparse array AD */
	if (mxIsDouble(prhs[1]) != 1) mexErrMsgTxt("AD must be a double precision matrix");
	srad = mxGetPr(prhs[1]);
	irad = mxGetIr(prhs[1]);
	jcad = mxGetJc(prhs[1]);
	nzmaxad = (int) mxGetNzmax(prhs[1]);
	A = (int) mxGetM(prhs[1]);
	if ((int) mxGetN(prhs[1]) != D) mexErrMsgTxt("WD and AD must have the same number of columns");

	/* check that every document has some authors */
	for (i=0; i<D; i++) {
		if ((jcad[i + 1] - jcad[i]) == 0) mexErrMsgTxt("there are some documents without authors in AD matrix ");
		if ((jcad[i + 1] - jcad[i]) > NAMAX) mexErrMsgTxt("Too many authors in some documents ... reached the NAMAX limit");
		if ((jcad[i + 1] - jcad[i]) > MA) MA = (int) (jcad[i + 1] - jcad[i]);
	}

	phi = mxGetPr(prhs[2]);
	J = (int) mxGetM(prhs[2]);
	if (J<=0) mexErrMsgTxt("Number of topics must be greater than zero");
	if ((int) mxGetN(prhs[2]) != W) mexErrMsgTxt("Vocabulary mismatches");

	NN = (int) mxGetScalar(prhs[3]);
	if (NN<0) mexErrMsgTxt("Number of iterations must be greater than zero");

	ALPHA = (double) mxGetScalar(prhs[4]);
	if (ALPHA<0) mexErrMsgTxt("ALPHA must be greater than zero");

	BETA = (double) mxGetScalar(prhs[5]);
	if (BETA<0) mexErrMsgTxt("BETA must be greater than zero");

	SEED = (int) mxGetScalar(prhs[6]);
	// set the seed of the random number generator

	OUTPUT = (int) mxGetScalar(prhs[7]);

	if (startcond == 1) {
		MUZIN = mxGetPr(prhs[8]);
		if (nzmaxwd != mxGetN(prhs[8])) mexErrMsgTxt("WD and MUZIN mismatch");
		if (J != mxGetM( prhs[ 8 ])) mexErrMsgTxt("J and MUZIN mismatch");
		MUXIN = mxGetPr(prhs[9]);
		if (nzmaxwd != mxGetN( prhs[9])) mexErrMsgTxt("WD and MUXIN mismatch");
		if (MA != mxGetM(prhs[9])) mexErrMsgTxt("MA and MUXIN mismatch");
	}

	// seeding
	seedMT( 1 + SEED * 2 ); // seeding only works on uneven numbers

	/* allocate memory */
	muz  = dvec(J*nzmaxwd);
	mux  = dvec(MA*nzmaxwd);

	if (startcond == 1) {
		for (i=0; i<J*nzmaxwd; i++) muz[i] = (double) MUZIN[i]; 
		for (a=0; a<MA*nzmaxwd; a++) mux[i] = (double) MUXIN[i];
	}

	theta = dvec(J*A);

	/* run the model */
	ATMBP( ALPHA, BETA, W, J, D, A, MA, NN, OUTPUT, irwd, jcwd, srwd, irad, jcad, muz, mux, phi, theta, startcond );

	/* output */
	plhs[0] = mxCreateDoubleMatrix(J, A, mxREAL);
	mxSetPr(plhs[0], theta);

	plhs[1] = mxCreateDoubleMatrix(J, nzmaxwd, mxREAL);
	mxSetPr(plhs[1], muz);

	plhs[2] = mxCreateDoubleMatrix(MA, nzmaxwd, mxREAL);
	mxSetPr(plhs[2], mux);
}
Ejemplo n.º 26
0
void hca_displaytopics(char *stem, char *resstem, int topword, 
                       enum ScoreType scoretype, int pmicount, int fullreport) {
  int w,k;
  uint32_t *termindk = NULL;
  uint32_t *indk = NULL;
  int Nk_tot = 0;
  double (*termtscore)(int) = NULL;
  double (*tscore)(int) = NULL;
  double sparsityword = 0;
  double sparsitydoc = 0;
  double underused = 0;
  uint32_t *top1cnt = NULL;
  FILE *fp;
  float *tpmi = NULL;
  char *topfile;
  char *repfile;
  uint32_t *psort;
  FILE *rp = NULL;
  float *gtvec = globalprop();
//#define XTRA // prints model topic probs after observed
#ifdef XTRA
  double *gtavec = calloc(ddN.T,sizeof(gtavec[0]));
#endif
  float *gpvec = calloc(ddN.W,sizeof(gpvec[0]));
  float *pvec = calloc(ddN.W,sizeof(pvec[0]));
#ifdef KL
  float *dfvec = calloc(ddN.W,sizeof(dfvec[0]));
#endif
  double *ngalpha = NULL;
  T_stats_t *termstats;
  
#ifdef XTRA
  get_probs(gtavec);
#endif

  if ( pmicount>topword )
    pmicount = topword;
  if ( scoretype == ST_idf ) {
    tscore = idfscore;
  } else if ( scoretype == ST_phirat ) {
    tscore = phiratioscore;
  } else if ( scoretype == ST_phi ) {
    tscore = phiscore;
  } else if ( scoretype == ST_count ) {
    tscore = countscore;
  } else if ( scoretype == ST_cost ) {
    tscore = costscore;
  } else if ( scoretype == ST_Q ) {
    tscore = Qscore;
    lowerQ = 1.0/ddN.T;
  }  

  if ( ddS.TwT==NULL && ddP.phi==NULL && scoretype == ST_phirat ) 
	yap_quit("Cannot use '-orat' option with this model/settings.\n");	

  if ( ddP.PYalpha==H_NG ) {
    /*
     *  provide an estimate of alpha
     */
    ngalpha = dvec(ddN.T);
    get_probs(ngalpha);
    for (k=0; k<ddN.T; k++) {
      ddP.alphapr[k] = ngalpha[k];
    }
  }

  /*
   *  returns null if no relevant data file
   */
  termstats = tstats_init(ddS.z, ddD.NdTcum, ddN.T, ddN.DT, stem);
  if ( termstats ) {
    if ( scoretype == ST_idf ) {
      termtscore = termidfscore;
    } else 
      termtscore = termcountscore;
  }  

  
  /*
   *  first collect counts of each word/term,
   *  and build gpvec (mean word probs)
   */
  build_NwK();
  if ( termstats )
    build_termNwK(termstats);
  {
    /*
     *  gpvec[] is normalised NwK[]
     */
    double tot = 0;
    for (w=0; w<ddN.W; w++)
      tot += gpvec[w] = NwK[w]+0.1; 
    for (w=0; w<ddN.W; w++)
      gpvec[w] /= tot;
  }
  if ( ddS.Nwt ) {
    for (k=0; k<ddN.T; k++) {
      Nk_tot += ddS.NWt[k];
    }
  } 
  
  psort = sorttops(gtvec, ddN.T);
  
  top1cnt = hca_top1cnt();
  if ( !top1cnt )
    yap_quit("Cannot allocate top1cnt in hca_displaytopics()\n");

  if ( pmicount ) {
    tpmi = malloc(sizeof(*tpmi)*(ddN.T+1));
    if ( !tpmi )
      yap_quit("Cannot allocate tpmi in hca_displaytopics()\n");
  }
  indk = malloc(sizeof(*indk)*ddN.W);
  if ( !indk )
    yap_quit("Cannot allocate indk in hca_displaytopics()\n");
  if ( termstats ) {
    termindk = malloc(sizeof(*indk)*termstats->K);
    if ( !termindk )
      yap_quit("Cannot allocate termindk in hca_displaytopics()\n");
  }

  
  data_df(stem);

#ifdef KL
  for (w=0; w<ddN.W; w++)
    dfvec[w] = ddD.df[w];
#endif
  
  /*
   *   two passes through, 
   *           first to build the top words and dump to file
   */
  repfile = yap_makename(resstem,".topset");
  topfile = yap_makename(resstem,".toplst");
  fp = fopen(topfile,"w");
  if ( !fp ) 
    yap_sysquit("Cannot open file '%s' for write\n", topfile);
  yap_message("\n");
  for (k=0; k<ddN.T; k++) {
    int cnt, termcnt = 0;
    tscorek = k;
    /*
     *    build sorted word list
     */
    cnt = buildindk(k, indk);
    topk(topword, cnt, indk, tscore);
    if ( cnt==0 )
      continue;
    if ( termstats ) {
      termcnt = buildtermindk(k, termindk, termstats);
      topk(topword, termcnt, termindk, termtscore);
    }
    /*
     *   dump words to file
     */
    fprintf(fp,"%d: ", k);
    for (w=0; w<topword && w<cnt; w++) {
      fprintf(fp," %d", (int)indk[w]);
    }
    if ( termstats ) {
      for (w=0; w<topword && w<termcnt; w++) {
	fprintf(fp," %d", (int)termstats->Kmin+termindk[w]);
      }
    }
    fprintf(fp, "\n");
  }
  if ( ddP.PYbeta && (ddP.phi==NULL || ddP.betapr)  ) {
    int cnt;
     /*
     *    dump root words
     */
    tscorek = -1;
    cnt = buildindk(-1, indk);
    topk(topword, cnt, indk, (ddP.phi==NULL)?countscore:phiscore);
    fprintf(fp,"-1:");
    for (w=0; w<topword && w<cnt; w++) {
      fprintf(fp," %d", (int)indk[w]);
    }
    fprintf(fp, "\n");
  }
  fclose(fp);
  if ( verbose>1 ) yap_message("\n");

  if ( pmicount ) {
    /*
     * compute PMI
     */
    char *toppmifile;
    char *pmifile;
    double *tp;
    tp = dvec(ddN.T);
    pmifile=yap_makename(stem,".pmi");
    toppmifile=yap_makename(resstem,".toppmi");
    get_probs(tp);
    report_pmi(topfile, pmifile, toppmifile, ddN.T, ddN.W, 1, 
               pmicount, tp, tpmi);
    free(toppmifile);
    free(pmifile);
    free(tp);
  }

  /*
   *   now report words and diagnostics
   */
  //ttop_open(topfile);
  if ( fullreport ) {
    rp = fopen(repfile,"w");
    if ( !rp ) 
      yap_sysquit("Cannot open file '%s' for write\n", repfile);
    fprintf(rp, "#topic index rank prop word-sparse doc-sparse eff-words eff-docs docs-bound top-one "
	    "dist-unif dist-unigrm");
    if ( PCTL_BURSTY() ) 
      fprintf(rp, " burst-concent");
    if ( ddN.tokens )  
      fprintf(rp, " ave-length");
    fprintf(rp, " coher");
    if ( pmicount ) 
      fprintf(rp, " pmi");
    fprintf(rp, "\n#word topic index rank");
    if ( ddS.Nwt )
      fprintf(rp, " count");
    fprintf(rp, " prop cumm df coher\n");
    
  }
  for (k=0; k<ddN.T; k++) {
    int cnt, termcnt = 0;
    int kk = psort[k];
    uint32_t **dfmtx;

    if ( ddP.phi==NULL && ddS.NWt[kk]==0 )
      continue;
    /*
     *   grab word prob vec for later use
     */
    if ( ddS.Nwt ) {
      int w;
      for (w=0; w<ddN.W; w++)
	pvec[w] = wordprob(w,kk);
    } else if ( ddP.phi ) 
      fv_copy(pvec, ddP.phi[kk], ddN.W);
    else if ( ddS.phi ) 
      fv_copy(pvec, ddS.phi[kk], ddN.W);

    /*
     *  rebuild word list
     */
    tscorek = kk;
    cnt = buildindk(kk, indk);
    topk(topword, cnt, indk, tscore);
    if ( topword<cnt )
      cnt = topword;
    assert(cnt>0);
    if ( termstats ) {
      termcnt = buildtermindk(kk, termindk, termstats);
      topk(topword, termcnt, termindk, termtscore);
      if ( topword<termcnt )
	termcnt = topword;
    }
    /*
     *     df stats for topic returned as matrix
     */
    dfmtx = hca_dfmtx(indk, cnt, kk);

    if ( ddS.Nwt && (ddS.NWt[kk]*ddN.T*100<Nk_tot || ddS.NWt[kk]<5 )) 
      underused++;
    /*
     *  print stats for topic
     *    Mallet:  tokens, doc_ent, ave-word-len, coher., 
     *             uni-dist, corp-dist, eff-no-words
     */
    yap_message("Topic %d/%d", kk, k);
    {
      /*
       *   compute diagnostics
       */
      double prop = gtvec[kk];
      float *dprop = docprop(kk);
      double spw = 0;
      double spd = ((double)nonzero_Ndt(kk))/((double)ddN.DT);
#ifdef KL
      double ew = fv_kl(dfvec,pvec,ddN.W);
#else
      double ew = exp(fv_entropy(pvec,ddN.W));
#endif
      double ud = fv_helldistunif(pvec,ddN.W);
      double pd = fv_helldist(pvec,gpvec,ddN.W);
      double sl = fv_avestrlen(pvec,ddN.tokens,ddN.W);
      double co = coherence(dfmtx, cnt);
      double ed = dprop?exp(fv_entropy(dprop,ddN.DT)):ddN.DT;
#define MALLET_EW
#ifdef MALLET_EW
      double ewp = dprop?(1.0/fv_expprob(pvec,ddN.W)):ddN.W;
#endif
      double da = dprop?fv_bound(dprop,ddN.DT,1.0/sqrt((double)ddN.T)):0;
      sparsitydoc += spd;
      yap_message((ddN.T>200)?" p=%.3lf%%":" p=%.2lf%%",100*prop);   
#ifdef XTRA
      yap_message((ddN.T>200)?"/%.3lf%%":"/%.2lf%%",100*gtavec[kk]);   
#endif
      if ( ddS.Nwt ) {
	spw = ((double)nonzero_Nwt(kk))/((double)ddN.W);
	sparsityword += spw;
	yap_message(" ws=%.1lf%%", 100*(1-spw));
      } 
      yap_message(" ds=%.1lf%%", 100*(1-spd) );
#ifdef KL
      yap_message(" ew=%lf", ew);
#else
      yap_message(" ew=%.0lf", ew);
#endif
#ifdef MALLET_EW
      yap_message(" ewp=%.1lf", ewp); 
#endif
      yap_message(" ed=%.1lf", ed); 
      yap_message(" da=%.0lf", da+0.1); 
      yap_message(" t1=%u", top1cnt[kk]); 
      yap_message(" ud=%.3lf", ud); 
      yap_message(" pd=%.3lf", pd); 
      if ( PCTL_BURSTY() ) 
	yap_message(" bd=%.3lf", ddP.bdk[kk]); 
      if ( ddP.NGbeta ) {
	/*
	 *   approx. as sqrt(var(lambda_k)/lambda-normaliser
	 */
	double ngvar = sqrt(ddP.NGalpha[kk])
	  * (ngalpha[kk]/ddP.NGalpha[kk]);
	yap_message(" ng=%.4lf,%.4lf", 
		    ngalpha[kk], ngvar/ngalpha[kk]);
	if ( ddS.sparse )
	    yap_message(",%.4f", 1-((float)ddS.sparseD[kk])/ddN.DTused);
	if ( verbose>2 )
	    yap_message(" ngl=%.4lf,%.4lf, nga=%.4lf,%.4lf", 
		    ddP.NGalpha[kk]/ddP.NGbeta[kk], 
		    sqrt(ddP.NGalpha[kk]/ddP.NGbeta[kk]/ddP.NGbeta[kk]),
		    ddP.NGalpha[kk], ddP.NGbeta[kk]); 
      }
      if ( ddN.tokens )  
	yap_message(" sl=%.2lf", sl); 
      yap_message(" co=%.3lf%%", co);
      if ( pmicount ) 
	yap_message(" pmi=%.3f", tpmi[kk]);
      if ( fullreport ) {
	fprintf(rp,"topic %d %d", kk, k);
	fprintf(rp," %.6lf", prop);   
	if ( ddS.Nwt ) {
	  fprintf(rp," %.6lf", (1-spw));
	} else {
	  fprintf(rp," 0");
	}
	fprintf(rp," %.6lf", (1-spd) );
#ifdef KL
	yap_message(" %lf", ew);
#else
	fprintf(rp," %.2lf", ew);
#endif
#ifdef MALLET_EW
	fprintf(rp," %.2lf", ewp); 
#endif
	fprintf(rp," %.2lf", ed); 
	fprintf(rp," %.0lf", da+0.1); 
	fprintf(rp," %u", top1cnt[kk]); 
	fprintf(rp," %.6lf", ud); 
	fprintf(rp," %.6lf", pd); 
	if ( PCTL_BURSTY() ) 
	  fprintf(rp," %.3lf", ddP.bdk[kk]); 
	fprintf(rp," %.4lf", (ddN.tokens)?sl:0); 
	fprintf(rp," %.6lf", co);
	if ( pmicount ) 
	  fprintf(rp," %.4f", tpmi[kk]);
	fprintf(rp,"\n");
      }
      if ( dprop) free(dprop);
    }
    if ( verbose>1 ) {
      double pcumm = 0;
      /*
       *   print top words:
       *     Mallet:   rank, count, prob, cumm, docs, coh
       */
      yap_message("\ntopic %d/%d", kk, k);
      yap_message(" words=");
      for (w=0; w<cnt; w++) {
	if ( w>0 ) yap_message(",");
	if ( ddN.tokens ) 
	  yap_message("%s", ddN.tokens[indk[w]]);
	else
	  yap_message("%d", indk[w]);
	if ( verbose>2 ) {
	  if ( scoretype == ST_count )
	    yap_message("(%d)", (int)(tscore(indk[w])+0.2));
	  else
	    yap_message("(%6lf)", tscore(indk[w]));
	}
	if ( fullreport ) {
	  fprintf(rp, "word %d %d %d", kk, indk[w], w);
	  if ( ddS.Nwt )
	    fprintf(rp, " %d", ddS.Nwt[indk[w]][kk]);
	  pcumm += pvec[indk[w]];
	  fprintf(rp, " %.6f %.6f", pvec[indk[w]], pcumm);
	  fprintf(rp, " %d", dfmtx[w][w]); 
	  fprintf(rp, " %.6f", coherence_word(dfmtx, cnt, w));
	  if ( ddN.tokens ) 
	    fprintf(rp, " %s", ddN.tokens[indk[w]]);
	  fprintf(rp, "\n");
	}
      }
      if ( termstats ) {
	yap_message(" terms=");
	for (w=0; w<termcnt; w++) {
	  if ( w>0 ) yap_message(",");
	  if ( ddN.tokens ) 
	    yap_message("%s", termstats->tokens[termindk[w]]);
	  else
	    yap_message("%d", termstats->Kmin+termindk[w]);
	  if ( verbose>2 ) {
	    if ( scoretype == ST_count )
	      yap_message("(%d)", (int)(termtscore(termindk[w])+0.2));
	    else
	      yap_message("(%6lf)", termtscore(termindk[w]));
	  }
	  if ( fullreport ) {
	    fprintf(rp, "term %d %d %d", kk, termindk[w], w);
	    fprintf(rp, " %d", termstats->Nkt[termindk[w]][kk]);
	    fprintf(rp, " %s", termstats->tokens[termindk[w]]);
	    fprintf(rp, "\n");
	  }
	}
      }
    }
    yap_message("\n");
    free(dfmtx[0]); free(dfmtx); 
  }
  if ( verbose>1 && ddP.PYbeta ) {
    int cnt;
    double pcumm = 0;
     /*
     *    print root words
     */
    tscorek = -1;
    cnt = buildindk(-1,indk);
    /*  this case gives bad results */
    // if ( scoretype == ST_phirat ) topk(topword, cnt, indk, phiratioscore);
    topk(topword, cnt, indk, (ddP.phi==NULL)?countscore:phiscore);
    /*
     *     cannot build df mtx for root because
     *     it is latent w.r.t. topics
     */
    yap_message("Topic root words=");
    if ( fullreport ) {
      int w;
      if ( ddP.phi && ddP.PYbeta!=H_PDP ) {
	for (w=0; w<ddN.W; w++)
	  pvec[w] = ddS.phi[ddN.T][w];
      } else {
	for (w=0; w<ddN.W; w++)
	  pvec[w] = betabasewordprob(w);
      }
#ifdef KL
      double ew = fv_kl(dfvec,pvec,ddN.W);
#else
      double ew = exp(fv_entropy(pvec,ddN.W));
#endif

      double ud = fv_helldistunif(pvec,ddN.W);
      double pd = fv_helldist(pvec,gpvec,ddN.W);
      fprintf(rp,"topic -1 -1 0 0");
      fprintf(rp," %.4lf", ew); 
      fprintf(rp," %.6lf", ud); 
      fprintf(rp," %.6lf", pd); 
      fprintf(rp,"\n");
    }
    for (w=0; w<topword && w<cnt; w++) {
      if ( w>0 ) yap_message(",");
      if ( ddN.tokens )
	yap_message("%s", ddN.tokens[indk[w]]);
      else
	yap_message("%d", indk[w]);
      if ( verbose>2 && !ddP.phi )
	yap_message("(%6lf)", countscore(indk[w]));
      if ( fullreport ) {
	fprintf(rp, "word %d %d %d", -1, indk[w], w);
	if ( ddS.TwT )
	  fprintf(rp, " %d", ddS.TwT[w]);
	pcumm += pvec[indk[w]];
	fprintf(rp, " %.6f %.6f", pvec[indk[w]], pcumm);
	fprintf(rp, " 0 0"); 
	if ( ddN.tokens ) 
	  fprintf(rp, " %s", ddN.tokens[indk[w]]);
	fprintf(rp, "\n");
      }
    }
    yap_message("\nTopical words=");
    topk(topword, cnt, indk, phiinvratioscore);
    for (w=0; w<topword && w<cnt; w++) {
      if ( w>0 ) yap_message(",");
      if ( ddN.tokens )
	yap_message("%s", ddN.tokens[indk[w]]);
      else
	yap_message("%d", indk[w]);
    }
    yap_message("\n");
  }  
  yap_message("\n");
  if ( rp )
    fclose(rp);
	     
  if ( ddS.Nwt )
    yap_message("Average topicXword sparsity = %.2lf%%\n",
                100*(1-sparsityword/ddN.T) );
  yap_message("Average docXtopic sparsity = %.2lf%%\n"
	      "Underused topics = %.1lf%%\n",
	      100*(1-sparsitydoc/ddN.T), 
	      100.0*underused/(double)ddN.T);
  if ( ddS.sparse && ddP.PYalpha==H_NG ) {
    double avesp = 0;
    // correct_docsp();
    for (k=0; k<ddN.T; k++) {
      avesp += gtvec[k];
    }
    // check gtvec[] sums to 1
    assert(fabs(avesp-1.0)<0.00001);
    avesp = 0;
    for (k=0; k<ddN.T; k++) {
        avesp += gtvec[k]*((float)ddS.sparseD[k])/ddN.DTused;
	assert(ddS.sparseD[k]<=ddN.DTused);
    }
    assert(avesp<=1.0);
    assert(avesp>0.0);
    yap_message("IBP sparsity = %.2lf%%\n", 100*(1-avesp));
  }
	
  if ( pmicount ) 
    yap_message("Average PMI = %.3f\n", tpmi[ddN.T]);

  /*
   *   print 
   */
  if ( 1 ) {
    float **cmtx = hca_topmtx();
    int t1, t2;
    int m1, m2;
    float mval;
    char *corfile = yap_makename(resstem,".topcor");
    fp = fopen(corfile,"w");
    if ( !fp ) 
      yap_sysquit("Cannot open file '%s' for write\n", corfile);
    /*
     *   print file
     */
    for (t1=0; t1<ddN.T; t1++) {
      for (t2=0; t2<t1; t2++) 
	 if ( cmtx[t1][t2]>1.0e-7 ) 
	  fprintf(fp, "%d %d %0.6f\n", t1, t2, cmtx[t1][t2]);
    }
    fclose(fp);
    free(corfile);
    /*
     *   display maximum
     */
    m1 = 1; m2 = 0;
    mval = cmtx[1][0];
    for (t1=0; t1<ddN.T; t1++) {
      for (t2=0; t2<t1; t2++) {
	if ( mval<cmtx[t1][t2] ) {
	  mval = cmtx[t1][t2];
	  m1 = t1;
	  m2 = t2;
	}
      }
    }
    yap_message("Maximum correlated topics (%d,%d) = %f\n", m1, m2, mval);
    free(cmtx[0]); free(cmtx);
  }

  /*
   *  print burstiness report
   */
  if ( PCTL_BURSTY() ) {
    int tottbl = 0;
    int totmlttbl = 0;
    int totmlt = 0;
    int i;
    for (i=0; i<ddN.NT; i++) {
      if ( Z_issetr(ddS.z[i]) ) {
	if ( M_multi(i) )
	  totmlttbl++;
	tottbl++;
      }
      if ( M_multi(i) )
	totmlt++;
    }
    yap_message("Burst report: multis=%.2lf%%, tables=%.2lf%%, tbls-in-multis=%.2lf%%\n",
		100.0*((double)ddM.dim_multiind)/ddN.N,
		100.0*((double)tottbl)/ddN.NT,
		100.0*((double)totmlttbl)/totmlt);
  }
  yap_message("\n");

  free(topfile);
  if ( repfile ) free(repfile);
  if ( top1cnt ) free(top1cnt);
  free(indk);
  free(psort);
  if ( ngalpha )
    free(ngalpha);
  if ( pmicount )
    free(tpmi);
  if ( NwK ) {
    free(NwK);
    NwK = NULL;
  }
#ifdef KL
  free(dfvec);
#endif
  free(pvec); 
  free(gtvec);
  free(gpvec);
  tstats_free(termstats);
}
Ejemplo n.º 27
0
w_rc_t ShoreTPCBEnv::_pad_BRANCHES()
{
    ss_m* db = this->db();

    // lock the BRANCHES table
    branch_t* br = branch_man->table();
    std::vector<index_desc_t*>& br_idx = br->get_indexes();

    // lock the table and index(es) for exclusive access
    W_DO(ss_m::lm->intent_vol_lock(br->primary_idx()->stid().vol,
                okvl_mode::IX));
    W_DO(ss_m::lm->intent_store_lock(br->primary_idx()->stid(),
                okvl_mode::X));
    for(size_t i=0; i < br_idx.size(); i++) {
        W_DO(ss_m::lm->intent_store_lock(br_idx[i]->stid(), okvl_mode::X));
    }

    guard<ats_char_t> pts = new ats_char_t(br->maxsize());

    // copy and pad all tuples smaller than 4k

    // WARNING: this code assumes that existing tuples are packed
    // densly so that all padded tuples are added after the last
    // unpadded one

    bool eof;

    // we know you can't fit two 4k records on a single page
    static int const PADDED_SIZE = 4096;

    array_guard_t<char> padding = new char[PADDED_SIZE];
    std::vector<rid_t> hit_list;
    {
	table_scan_iter_impl<branch_t>* iter =
            new table_scan_iter_impl<branch_t>(branch_man->table());

	int count = 0;
	table_row_t row(br);
	rep_row_t arep(pts);
	int psize = br->maxsize()+1;

	W_DO(iter->next(db, eof, row));
	while (!eof) {
	    // figure out how big the old record is
	    int bsize = row.size();
	    if (bsize == psize) {
		TRACE(TRACE_ALWAYS,
                      "-> Found padded BRANCH record. Stopping search (%d)\n",
                      count);
		break;
	    }
	    else if (bsize > psize) {
		// too big... shrink it down to save on logging
		// handle->truncate_rec(bsize - psize);
                fprintf(stderr, "+");
                // CS: no more pin_i -> do nothing
	    }
	    else {
		// copy and pad the record (and mark the old one for deletion)
		rid_t new_rid;
		vec_t hvec(handle->hdr(), hsize);
		vec_t dvec(handle->body(), bsize);
		vec_t pvec(padding, PADDED_SIZE-bsize);
		W_DO(db->create_rec(br_fid, hvec, PADDED_SIZE, dvec, new_rid));
		W_DO(db->append_rec(new_rid, pvec));

                // mark the old record for deletion
		hit_list.push_back(handle->rid());

		// update the index(es)
		vec_t rvec(&row._rid, sizeof(rid_t));
		vec_t nrvec(&new_rid, sizeof(new_rid));
		for(int i=0; i < br_idx_count; i++) {
		    int key_sz = branch_man()->format_key(br_idx+i, &row, arep);
		    vec_t kvec(arep._dest, key_sz);

		    // destroy the old mapping and replace it with the new
                    // one.  If it turns out this is super-slow, we can
                    // look into probing the index with a cursor and
                    // updating it directly.
		    int pnum = _pbranch_man->get_pnum(&br_idx[i], &row);
		    stid_t fid = br_idx[i].fid(pnum);

                    W_DO(db->destroy_assoc(fid, kvec, rvec));
                    // now put the entry back with the new rid
                    W_DO(db->create_assoc(fid, kvec, nrvec));

		}
                fprintf(stderr, ".");
	    }

	    // next!
	    count++;
	    W_DO(iter->next(db, eof, row));
	}
        TRACE(TRACE_ALWAYS, "padded records added\n");

        delete iter;
    }

    // delete the old records
    int hlsize = hit_list.size();
    TRACE(TRACE_ALWAYS,
          "-> Deleting (%d) old BRANCH unpadded records\n",
          hlsize);
    for(int i=0; i < hlsize; i++) {
	W_DO(db->destroy_rec(hit_list[i]));
    }

    return (RCOK);
}
Ejemplo n.º 28
0
/////////////////////////////////////////////////////////////////////////////////
//	calculate the intersection of a sphere the given ray
//	the ray has an origin and a direction, ray = origin + t*direction
//	find the t parameter, return true if it is between 0.0 and 1.0, false 
//	otherwise, write the results in following variables:
//	depth	- t \in [0.0 1.0]
//	posX	- x position of intersection point, nothing if no intersection
//	posY	- y position of intersection point, nothing if no intersection
//	posZ	- z position of intersection point, nothing if no intersection
//	normalX	- x component of normal at intersection point, nothing if no intersection
//	normalX	- y component of normal at intersection point, nothing if no intersection
//	normalX	- z component of normal at intersection point, nothing if no intersection
//
//	attention: a sphere has usually two intersection points make sure to return 
//	the one that is closest to the ray's origin and still in the viewing frustum
//
/////////////////////////////////////////////////////////////////////////////////
bool 
Sphere::intersect(Ray ray, double *depth,	
				  double *posX, double *posY, double *posZ,
				  double *normalX, double *normalY, double *normalZ)

{
	//////////*********** START OF CODE TO CHANGE *******////////////

	// from slides:
	// (cx + t * vx)^2 + (cy + t * vy)^2 + (cz + t * vy)^2 = r^2

	// text:
	// (e+td−c)·(e+td−c)−R2 = 0
	// (d·d)t^2 +2d·(e−c)t+(e−c)·(e−c)−R^2 = 0

	// d: the direction vector of the ray
	// e: point at which the ray starts
	// c: center point of the sphere

	Vec3 dvec(	ray.direction[0],
				ray.direction[1],
				ray.direction[2]);

	Vec3 evec(	ray.origin[0],
				ray.origin[1],
				ray.origin[2]);

	Vec3 cvec(	this->center[0],
				this->center[1],
				this->center[2]);

	// use the quadratic equation, since we have the form At^2 + Bt + C = 0.

	double a = dvec.dot(dvec);
	double b = dvec.scale(2).dot(evec.subtract(cvec));

	Vec3 eMinusCvec = evec.subtract(cvec);
	double c = eMinusCvec.dot(eMinusCvec) - (this->radius * this->radius);

	// discriminant: b^2 - 4ac
	double discriminant = (b * b) - (4 * a * c);

	// From text: If the discriminant is negative, its square root 
	// is imaginary and the line and sphere do not intersect.
	if (discriminant < 0) {
		
		//printf("No intersection with sphere - 1\n");
		return false;

	} else {
		// there is at least one intersection point
		double t1 = (-b + sqrt(discriminant)) / (2 * a);
		double t2 = (-b - sqrt(discriminant)) / (2 * a);

		double tmin = fminf(t1, t2);
		double tmax = fmaxf(t1, t2);

		double t = 0; // t is set to either tmin or tmax (or the function returns false)

		if (tmin >= 0) { //} && tmin <= 1) {

			t = tmin;

		} else if (tmax >= 0) { //} && tmax <= 1) {

			t = tmax;

		} else {

			// return false if neither interestion point is within [0, 1]
			//printf("No intersection with sphere. t values (%f, %f)\n", t1, t2);
			return false;

		}

		*depth = t;
		
		// position: (e + td)
		Vec3 posvec = dvec.scale(t).add(evec);
		*posX = posvec[0];
		*posY = posvec[1];
		*posZ = posvec[2];

		// normal: 2(p - c)
		Vec3 normalvec = posvec.subtract(cvec).scale(2);
		normalvec.normalize();
		*normalX = normalvec[0];
		*normalY = normalvec[1];
		*normalZ = normalvec[2];
	}

	//////////*********** END OF CODE TO CHANGE *******////////////
	//printf("Sphere intersection found (%f, %f, %f) \n", *posX, *posY, *posZ);
	return true;
}
Ejemplo n.º 29
0
/////////////////////////////////////////////////////////////////////////////////
//	calculate the intersection of a plane the given ray
//	the ray has an origin and a direction, ray = origin + t*direction
//	find the t parameter, return true if it is between 0.0 and 1.0, false 
//	otherwise, write the results in following variables:
//	depth	- t \in [0.0 1.0]
//	posX	- x position of intersection point, nothing if no intersection
//	posY	- y position of intersection point, nothing if no intersection
//	posZ	- z position of intersection point, nothing if no intersection
//	normalX	- x component of normal at intersection point, nothing if no intersection
//	normalX	- y component of normal at intersection point, nothing if no intersection
//	normalX	- z component of normal at intersection point, nothing if no intersection
//
/////////////////////////////////////////////////////////////////////////////////
bool 
Plane::intersect(Ray ray, double *depth,
				 double *posX, double *posY, double *posZ,
				 double *normalX, double *normalY, double *normalZ)

{
	//////////*********** START OF CODE TO CHANGE *******////////////

	Vec3 evec(	ray.origin[0],
				ray.origin[1],
				ray.origin[2]);

	Vec3 nvec(	this->params[0],
				this->params[1],
				this->params[2]);

	Vec3 dvec(	ray.direction[0],
				ray.direction[1],
				ray.direction[2]);

	double d = this->params[3] * sqrt(nvec[0] * nvec[0] + nvec[1] * nvec[1] + nvec[2] * nvec[2]);
	double t = -1;
	double denom = dvec.dot(nvec);

	if (denom != 0) {

		t = (-d - (evec.dot(nvec))) / dvec.dot(nvec);

	}

	if (t <= 0) {

		return false;

	} else {

		*depth = t;
		*posX = (dvec[0] * t) + evec[0];
		*posY =	(dvec[1] * t) + evec[1];
		*posZ = (dvec[2] * t) + evec[2];

		if (denom > 0) {
			
			*normalX = -nvec[0];
			*normalY = -nvec[1];
			*normalZ = -nvec[2];

		} else {

			*normalX = nvec[0];
			*normalY = nvec[1];
			*normalZ = nvec[2];

		}
	}

	//////////*********** END OF CODE TO CHANGE *******////////////
	//printf("dvec[0], dvec[1], dvec[2]: (%f, %f, %f) \n", dvec[0], dvec[1], dvec[2]);
	//printf("evec[0], evec[1], evec[2]: (%f, %f, %f) \n", evec[0], evec[1], evec[2]);
	//printf("Plane interesection at t:%f (%f, %f, %f)\n", t, *posX, *posY, *posZ);
	return true;
}
Ejemplo n.º 30
0
w_rc_t ShoreTPCCEnv::_post_init_impl()
{
#ifndef CFG_HACK
    return (RCOK);
#endif

    TRACE (TRACE_ALWAYS, "Padding WAREHOUSES");
    ss_m* db = this->db();

    // lock the WH table
    warehouse_t* wh = warehouse_desc();
    index_desc_t* idx = wh->indexes();
    int icount = wh->index_count();
    stid_t wh_fid = wh->fid();

    // lock the table and index(es) for exclusive access
    W_DO(db->lock(wh_fid, EX));
    for(int i=0; i < icount; i++) {
	for(int j=0; j < idx[i].get_partition_count(); j++)
	    W_DO(db->lock(idx[i].fid(j), EX));
    }

    guard<ats_char_t> pts = new ats_char_t(wh->maxsize());

    /* copy and pad all tuples smaller than 4k

       WARNING: this code assumes that existing tuples are packed
       densly so that all padded tuples are added after the last
       unpadded one
    */
    bool eof;
    static int const PADDED_SIZE = 4096; // we know you can't fit two 4k records on a single page
    array_guard_t<char> padding = new char[PADDED_SIZE];
    std::vector<rid_t> hit_list;
    {
	guard<warehouse_man_impl::table_iter> iter;
	{
	    warehouse_man_impl::table_iter* tmp;
	    W_DO(warehouse_man()->get_iter_for_file_scan(db, tmp));
	    iter = tmp;
	}

	int count = 0;
	table_row_t row(wh);
	rep_row_t arep(pts);
	int psize = wh->maxsize()+1;

	W_DO(iter->next(db, eof, row));
	while (1) {
	    pin_i* handle = iter->cursor();
	    if (!handle) {
		TRACE(TRACE_ALWAYS, " -> Reached EOF. Search complete (%d)\n", count);
		break;
	    }

	    // figure out how big the old record is
	    int hsize = handle->hdr_size();
	    int bsize = handle->body_size();
	    if (bsize == psize) {
		TRACE(TRACE_ALWAYS, " -> Found padded WH record. Stopping search (%d)\n", count);
		break;
	    }
	    else if (bsize > psize) {
		// too big... shrink it down to save on logging
		handle->truncate_rec(bsize - psize);
                fprintf(stderr, "+");
	    }
	    else {
		// copy and pad the record (and mark the old one for deletion)
		rid_t new_rid;
		vec_t hvec(handle->hdr(), hsize);
		vec_t dvec(handle->body(), bsize);
		vec_t pvec(padding, PADDED_SIZE-bsize);
		W_DO(db->create_rec(wh_fid, hvec, PADDED_SIZE, dvec, new_rid));
		W_DO(db->append_rec(new_rid, pvec));
                // for small databases, first padded record fits on this page
                if (not handle->up_to_date())
                    handle->repin();

                // mark the old record for deletion
		hit_list.push_back(handle->rid());

		// update the index(es)
		vec_t rvec(&row._rid, sizeof(rid_t));
		vec_t nrvec(&new_rid, sizeof(new_rid));
		for(int i=0; i < icount; i++) {
		    int key_sz = warehouse_man()->format_key(idx+i, &row, arep);
		    vec_t kvec(arep._dest, key_sz);

		    /* destroy the old mapping and replace it with the new
		       one.  If it turns out this is super-slow, we can
		       look into probing the index with a cursor and
		       updating it directly.
		    */
		    int pnum = _pwarehouse_man->get_pnum(&idx[i], &row);
		    stid_t fid = idx[i].fid(pnum);

		    if(idx[i].is_mr()) {
			W_DO(db->destroy_mr_assoc(fid, kvec, rvec));
			// now put the entry back with the new rid
			el_filler ef;
			ef._el.put(nrvec);
			W_DO(db->create_mr_assoc(fid, kvec, ef));
		    } else {
			W_DO(db->destroy_assoc(fid, kvec, rvec));
			// now put the entry back with the new rid
			W_DO(db->create_assoc(fid, kvec, nrvec));
		    }

		}
                fprintf(stderr, ".");
	    }

	    // next!
	    count++;
	    W_DO(iter->next(db, eof, row));
	}
        fprintf(stderr, "\n");

	// put the iter out of scope
    }

    // delete the old records
    int hlsize = hit_list.size();
    TRACE(TRACE_ALWAYS, "-> Deleting (%d) old unpadded records\n", hlsize);
    for(int i=0; i < hlsize; i++) {
	W_DO(db->destroy_rec(hit_list[i]));
    }

    return (RCOK);
}