Exemplo n.º 1
0
static void
evaluate_string(ngram_model_t *lm, logmath_t *lmath, const char *text)
{
	char *textfoo;
	char **words;
	int32 n, ch, noovs, nccs, lscr;

	/* Split it into an array of strings. */
	textfoo = ckd_salloc(text);
	n = str2words(textfoo, NULL, 0);
	if (n < 0)
		E_FATAL("str2words(textfoo, NULL, 0) = %d, should not happen\n", n);
	if (n == 0) /* Do nothing! */
		return;
	words = ckd_calloc(n, sizeof(*words));
	str2words(textfoo, words, n);

	ch = calc_entropy(lm, words, n, &nccs, &noovs, &lscr);

	printf("input: %s\n", text);
	printf("cross-entropy: %f bits\n",
	       ch * log(logmath_get_base(lmath)) / log(2));

	/* Calculate perplexity pplx = exp CH */
	printf("perplexity: %f\n", logmath_exp(lmath, ch));
        printf("lm score: %d\n", lscr);

	/* Report OOVs and CCs */
	printf("%d words evaluated\n", n);
	printf("%d OOVs, %d context cues removed\n",
	      noovs, nccs);

	ckd_free(textfoo);
	ckd_free(words);
}
Exemplo n.º 2
0
static void
evaluate_file(ngram_model_t *lm, logmath_t *lmath, const char *lsnfn)
{
	FILE *fh;
        lineiter_t *litor;
	int32 nccs, noovs, nwords, lscr;
	float64 ch, log_to_log2;;

	if ((fh = fopen(lsnfn, "r")) == NULL)
		E_FATAL_SYSTEM("failed to open transcript file %s", lsnfn);

	/* We have to keep ch in floating-point to avoid overflows, so
	 * we might as well use log2. */
	log_to_log2 = log(logmath_get_base(lmath)) / log(2);
	lscr = nccs = noovs = nwords = 0;
	ch = 0.0;
        for (litor = lineiter_start(fh); litor; litor = lineiter_next(litor)) {
		char **words;
		int32 n, tmp_ch, tmp_noovs, tmp_nccs, tmp_lscr;

		n = str2words(litor->buf, NULL, 0);
		if (n < 0)
			E_FATAL("str2words(line, NULL, 0) = %d, should not happen\n", n);
		if (n == 0) /* Do nothing! */
			continue;
		words = ckd_calloc(n, sizeof(*words));
		str2words(litor->buf, words, n);

		/* Remove any utterance ID (FIXME: has to be a single "word") */
		if (words[n-1][0] == '('
		    && words[n-1][strlen(words[n-1])-1] == ')')
			n = n - 1;

		tmp_ch = calc_entropy(lm, words, n, &tmp_nccs,
                                      &tmp_noovs, &tmp_lscr);

		ch += (float64) tmp_ch * (n - tmp_nccs - tmp_noovs) * log_to_log2;
		nccs += tmp_nccs;
		noovs += tmp_noovs;
                lscr += tmp_lscr;
		nwords += n;
		
		ckd_free(words);
	}

	ch /= (nwords - nccs - noovs);
	printf("cross-entropy: %f bits\n", ch);

	/* Calculate perplexity pplx = exp CH */
	printf("perplexity: %f\n", pow(2.0, ch));
        printf("lm score: %d\n", lscr);

	/* Report OOVs and CCs */
	printf("%d words evaluated\n", nwords);
	printf("%d OOVs (%.2f%%), %d context cues removed\n",
	       noovs, (double)noovs / nwords * 100, nccs);
}
Exemplo n.º 3
0
void compute_streamlines() 
{

	printf("generating seeds...\n"); 
	int num=0;
	int grid_res[3];
	float* vectors=get_grid_vec_data(grid_res);//get vec data at each grid point
	
										//load seeds from file
	int* donot_change=new int[grid_res[0]*grid_res[1]*grid_res[2]];
	memset(donot_change,0,sizeof(int)*grid_res[0]*grid_res[1]*grid_res[2]);
	float* new_vectors=new float[grid_res[0]*grid_res[1]*grid_res[2]*3];
	memset(new_vectors,0,sizeof(float)*grid_res[0]*grid_res[1]*grid_res[2]*3);

	int nSeeds; 
	//set first seed as domain center
	sl_list.clear(); 
	seed_list.clear();
	
	//set boundary condition
	for(int z=0;z<grid_res[2];z++)
	for(int y=0;y<grid_res[1];y++)
	for(int x=0;x<grid_res[0];x++)
	{
		if( x==0||x==grid_res[0]-1||
			y==0||y==grid_res[1]-1||
			z==0||z==grid_res[2]-1)
		{
			int idx=x+y*grid_res[0]+z*grid_res[0]*grid_res[1];
			new_vectors[idx*3+0]=vectors[idx*3+0];
			new_vectors[idx*3+1]=vectors[idx*3+1];
			new_vectors[idx*3+2]=vectors[idx*3+2];
			donot_change[idx]=1;
		}
	}

	int* old_bin, *new_bin;
	old_bin=new int [grid_res[0]*grid_res[1]*grid_res[2]];
	new_bin=new int [grid_res[0]*grid_res[1]*grid_res[2]];

	for(int i=0;i<grid_res[0]*grid_res[1]*grid_res[2];i++)
	{
		VECTOR3 orif;
		orif.Set(vectors[i*3+0],vectors[i*3+1],vectors[i*3+2]);
		old_bin[i]=get_bin_number_3D(orif,theta, phi,binnum);
		new_bin[i]=0.0;
	}


	if(!entropies)
	{
		printf("calculating every point entropies\n");
		entropies=new float[grid_res[0]*grid_res[1]*grid_res[2]];
		calc_entropy( old_bin,grid_res, binnum, entropies);
		dumpEntropyField("entropies.bin",entropies, grid_res);

		printf("entropy calculation done\n");
		
	}		
	int selected_line_num=0;
	float entropy=8888;
	std::vector<VECTOR3> seedlist,selected_list;
	srand((unsigned)time(NULL));			// initialize random number generator
	std::vector<float> entropies;

	int x_min=0,x_max=0,y_min=0,y_max=0;
	int* occupied=new int[grid_res[0]*grid_res[1]*grid_res[2]];
	memset(occupied,0,sizeof(int)*grid_res[0]*grid_res[1]*grid_res[2]);
	
	float* kx=new float[grid_res[0]*grid_res[1]*grid_res[2]];
	float* ky=new float[grid_res[0]*grid_res[1]*grid_res[2]];
	float* kz=new float[grid_res[0]*grid_res[1]*grid_res[2]];

	float* b=new float[grid_res[0]*grid_res[1]*grid_res[2]];
	float* c1=new float[grid_res[0]*grid_res[1]*grid_res[2]];
	float* c2=new float[grid_res[0]*grid_res[1]*grid_res[2]];
	float* c3=new float[grid_res[0]*grid_res[1]*grid_res[2]];

	// ADD-BY-LEETEN 2009/11/10-BEGIN
	_FlowDiffusionInit(grid_res[0], grid_res[1], grid_res[2]);
	int get_bin_by_angle(float mytheta, float myphi, int binnum, float* theta, float* phi);

	static const int iNrOfThetas = 720;
	static const int iNrOfPhis = 360;
	static const double dNrOfThetas = double(iNrOfThetas);
	static const double dNrOfPhis	= double(iNrOfPhis);
	static int	ppiAngleMap[iNrOfThetas][iNrOfPhis];
	for(int t = 0; t < iNrOfThetas; t++)
		for(int p = 0; p < iNrOfPhis; p++)
		{
			float fTheta =	M_PI * 2.0f * float(t) / float(iNrOfThetas);
			float fPhi =	M_PI * float(p) / float(iNrOfPhis);
			int iBin = get_bin_by_angle(fTheta, fPhi, binnum, theta, phi);
			if( iBin >= 0 )
				ppiAngleMap[t][p] = iBin;
		}

	_FlowDiffusionSetAngleMap(&ppiAngleMap[0][0], iNrOfPhis, iNrOfThetas);
	// ADD-BY-LEETEN 2009/11/10-END
	//initial entropy
	float error=0.05;
	float target_entropy=-error*log2(error)-(1-error)*log2(1-error)+error*log2(359);
	printf("entropy_target=%f\n",target_entropy);
	std::vector<int> line_color;

	int *histo_puv=new int[binnum*binnum];
	int *histo_pv=new int[binnum];
	float* entropy_tmp=new float[binnum];
	float* pv=new float[binnum];
	int line_num_thres=NR_OF_STREAMLINES;//27;											//want to select top line_num_thres lines

	memset(kx,0,sizeof(float)*grid_res[0]*grid_res[1]*grid_res[2]);
	memset(ky,0,sizeof(float)*grid_res[0]*grid_res[1]*grid_res[2]);
	memset(kz,0,sizeof(float)*grid_res[0]*grid_res[1]*grid_res[2]);
	memset(b,0,sizeof(float)*grid_res[0]*grid_res[1]*grid_res[2]);
	memset(c1,0,sizeof(float)*grid_res[0]*grid_res[1]*grid_res[2]);
	memset(c2,0,sizeof(float)*grid_res[0]*grid_res[1]*grid_res[2]);
	memset(c3,0,sizeof(float)*grid_res[0]*grid_res[1]*grid_res[2]);
	int round=0;

	std::vector<float> line_importance,entropy_list;

//	while(selected_line_num<line_num_thres )//&& entropy>.5)
	while(entropy>target_entropy)
	{
		VECTOR3 next_seed;
		std::vector<VECTOR3> seeds;
		printf("%d seeds selected,round=%d  entropy=%f\n",seed_list.size(), round, entropy);
		selectStreamlines_by_distribution(vectors,new_vectors, grid_res, occupied,seeds,
			theta, phi,old_bin,new_bin,0,0,round++,line_importance,entropy);


		//select new seed
		for(int i=0;i<seeds.size();i++)
			line_color.push_back(selected_line_num+i);
		selected_line_num+=seeds.size();

		//printf("calculate the bin number\r");
		for(int i=0;i<grid_res[0]*grid_res[1]*grid_res[2];i++)
		{
			VECTOR3 newf,orif;
			orif.Set(vectors[i*3+0],vectors[i*3+1],vectors[i*3+2]);
			newf.Set(new_vectors[i*3+0],new_vectors[i*3+1],new_vectors[i*3+2]);
			//if within error range, let it be teh same bin as the ori, otherwise select the bin num
			newf.Normalize();
			float dotv=dot(newf,orif);
			new_bin[i]=get_bin_number_3D(newf,theta, phi,binnum);
		}

		entropy=calcRelativeEntropy6_new(	vectors, new_vectors,  grid_res, VECTOR3(2,2,2),//do not count boundaries
									VECTOR3(grid_res[0]-2,grid_res[1]-2,grid_res[2]-2),theta,phi,old_bin,new_bin,0,binnum,histo_puv,histo_pv,
									pv,entropy_tmp);
	//	entropy_list.push_back(entropy);
		for(int i=0;i<seeds.size();i++)
		{
			selected_list.push_back(seeds[i]);
			//seedlist.push_back(seeds[i]);
			seed_list.push_back(seeds[i]);
		}
	
		//dumpEntropy(entropies,"entropy.bin");
		dumpSeeds(seed_list,"myseeds.seed");//crtical points excluded
		double dwStart= GetTickCount();

		//printf("streamline size=%d\n",sl_list.size());
		reconstruct_field_GVF_3D(new_vectors,vectors,grid_res,sl_list,donot_change,
						 kx,ky,kz,b,c1,c2,c3);//,importance);
		double elapsedTime= GetTickCount() - dwStart;
		printf("\n\n reconstruction time is %.3f milli-seconds.\n",elapsedTime); 	
		
		//clear the memory of sl_list;save memory for larger dataset
		/*std::list<vtListSeedTrace*>::iterator pIter; 
		pIter = sl_list.begin(); 
		
		for (; pIter!=sl_list.end(); pIter++) {
		
			vtListSeedTrace *trace = *pIter; 
			std::list<VECTOR3*>::iterator pnIter,pnIter2; 
			pnIter = trace->begin(); 
			for (; pnIter!= trace->end(); pnIter++) 
			 delete  *pnIter; 
			delete trace;
		}
		sl_list.clear();*/
		//dumpReconstruedField("r.vec", new_vectors, grid_res);
		
		unsigned char* dat=new unsigned char[grid_res[0]*grid_res[1]*grid_res[2]];
		for(int i=0;i<grid_res[0]*grid_res[1]*grid_res[2];i++)
			dat[i]=occupied[i]*255;
		/*FILE* test=fopen("impor.bin","wb");
		fwrite(grid_res,sizeof(int),3,test);
		fwrite(occupied,sizeof(unsigned char),grid_res[0]*grid_res[1]*grid_res[2],test);
		fclose(test);
		*/
		delete [] dat;

		for(int i=0;i<sl_list.size();i++)
		line_color.push_back(i);
		char filename[255];
		memset(filename,0,255);
		sprintf(filename,"streamlines%d.dat",round);
		save_streamlines_to_file_hand_tuning(filename,line_color,nSeeds);
		dumpEntropies(line_importance);
		//	getchar();
	//	printf("halted, save files now\n");
	}
	

	//dumpReconstruedField("r.vec", new_vectors, grid_res);
	dumpSeeds(seed_list,"myseeds.seed");//crtical points excluded
	delete [] histo_puv;
	delete [] histo_pv;
	delete [] pv;
	delete [] entropy_tmp;
	delete [] occupied;
	delete [] old_bin;
	delete [] new_bin;
	delete [] kx;
	delete [] ky;
	delete [] kz;
	delete [] b;
	delete [] c1;
	delete [] c2;
	delete [] c3;

	delete [] donot_change;
	delete [] vectors;
	delete [] new_vectors;
	#if	USE_CUDA	
	_FlowDiffusionFree();
	#endif
}