Ejemplo n.º 1
0
int main(int argc, char** argv)  {
	if(argc < 5)  {
		std::cerr << "usage: " << argv[0] << " evaluation_base shape num_shapes data_base [exp]" << std::endl;
		return 1;
	}
	int arg_num = 1;
	string eval_base = argv[arg_num++];
	string shape = argv[arg_num++];
	int num_shapes = atoi(argv[arg_num++]);

	string data_base = argv[arg_num++];
	string distmean_filename = data_base + "_distmean.txt";
	string distmax_filename = data_base + "_distmax.txt";
	string anglemean_filename = data_base + "_anglemean.txt";
	string anglemax_filename = data_base + "_anglemax.txt";

	string exp = "";
	if(argc == 6)
		exp = argv[5];

	FILE* distmean_file = fopen(distmean_filename.c_str(), "w");
	FILE* distmax_file = fopen(distmax_filename.c_str(), "w");
	FILE* anglemean_file = fopen(anglemean_filename.c_str(), "w");
	FILE* anglemax_file = fopen(anglemax_filename.c_str(), "w");

	vector<FILE*> all_files;
	all_files.push_back(distmean_file);
	all_files.push_back(distmax_file);
	all_files.push_back(anglemean_file);
	all_files.push_back(anglemax_file);

	vector<string> all_algorithms;
	all_algorithms.push_back("apss");
	all_algorithms.push_back("fourier");
	all_algorithms.push_back("imls");
	all_algorithms.push_back("mpu");
	all_algorithms.push_back("mpusmooth");
	all_algorithms.push_back("poisson");
	all_algorithms.push_back("rbf");
	all_algorithms.push_back("scattered");
	all_algorithms.push_back("spss");
	all_algorithms.push_back("wavelet");

	for(unsigned a = 0; a < all_algorithms.size(); a++)  {
		string algorithm = all_algorithms[a];

		vector<double> mean_dist;
		vector<double> max_dist;
		vector<double> mean_angle;
		vector<double> max_angle;

		vector<double> computation_time;

		for(int i = 0; i < num_shapes; i++)  {
			char* eval_num = new char[30];
			sprintf(eval_num, "%u", i);
			string base_alg_file = eval_base+"/"+shape+"/"+exp+"/"+algorithm+"/"+shape+"_"+eval_num;
			string stats_filename = base_alg_file+".recon";
			string dist_filename = base_alg_file+".dist";
			FILE* stats_file = fopen(stats_filename.c_str(), "rb");
			FILE* dist_file = fopen(dist_filename.c_str(), "rb");

			if(stats_file == 0 || dist_file == 0)  {
				cerr << "bad stat file or dist file! " << base_alg_file << endl;
				continue;
			}

			size_t num_read = 0;
			double shape_dist[8];
			num_read = fread(shape_dist, sizeof(double), 8, dist_file);
			double angle_dist[8];
			num_read = fread(angle_dist, sizeof(double), 8, dist_file);
			fclose(dist_file);
			fclose(stats_file);

			double pc_mean_dist = shape_dist[5] > shape_dist[6] ? shape_dist[5] : shape_dist[6];
			//double pc_mean_dist = shape_dist[7];
			double pc_max_dist = shape_dist[4];
			double pc_mean_angle = angle_dist[5] > angle_dist[6] ? angle_dist[5] : angle_dist[6];
			//double pc_mean_angle = angle_dist[7];
			double pc_max_angle = angle_dist[4];

			mean_dist.push_back(pc_mean_dist);
			max_dist.push_back(pc_max_dist);
			mean_angle.push_back(pc_mean_angle);
			max_angle.push_back(pc_max_angle);

			delete [] eval_num;
		}

		vector< vector<double> > distributions;
		distributions.push_back(mean_dist);
		distributions.push_back(max_dist);
		distributions.push_back(mean_angle);
		distributions.push_back(max_angle);

		for(int i = 0; i < 4; i++)  {
			vector<double> distribution = distributions[i];
			vector<Measure> full_distribution;
			for(unsigned d = 0; d < distribution.size(); d++)
				full_distribution.push_back(Measure(d, distribution[d]));
			FILE* dist_file = all_files[i];

			std::sort(full_distribution.begin(),full_distribution.end());
			double quartile_info[5];
			int quartile_indices[5];
			quartiles(&full_distribution, quartile_info, quartile_indices);
			for(int i = 0; i < 5; i++)
				fprintf(dist_file, "%.10f ", quartile_info[i]);
			for(int i = 0; i < 4; i++)
				fprintf(dist_file, "%u ", quartile_indices[i]);
			fprintf(dist_file, "%u\n", quartile_indices[4]);
		}
	}

	fclose(distmean_file);
	fclose(distmax_file);
	fclose(anglemean_file);
	fclose(anglemax_file);
}
Ejemplo n.º 2
0
int main(int argc, char **argv) {
	if(argc != 3)  {
		std::cerr << "usage: " << argv[0] << " evaluation_base box_plot" << std::endl;
		return 1;
	}
	int arg_num = 1;
	string eval_base = argv[arg_num++];
	string plot_filename = argv[arg_num++];

	string m2i_file = eval_base+"_m2i.sdm";
	string i2m_file = eval_base+"_i2m.sdm";
	string stats_file = eval_base+".recon";

	GlobalStats g_stats(stats_file);
	if(!g_stats.is_valid_file())  {
		cerr << stats_file << "did not execute!" << endl;
		return 1;
	}

	cout << "parsing..." << endl;
	ShortestDistanceMap m2i_map(m2i_file);
	ShortestDistanceMap i2m_map(i2m_file);
	cout << "... done parsing." << endl;

	vector<double> distances;
	vector<double> angles;

	double mean_m2i_dist = 0, mean_m2i_angle = 0, mean_dist = 0, mean_angle = 0;
	int num_m2i = 0, num_m2i_nan = 0;
	for(int i = 0; i < m2i_map.num_correspondences(); i++)  {
		PointPair point_pair = m2i_map.getPointCorrespondence(i);
		NormalPair normal_pair = m2i_map.getNormalCorrespondence(i);
		if(point_pair.has_nan() || normal_pair.has_nan())  {
			num_m2i_nan++;
			continue;
		}
		num_m2i++;

		double next_dist = point_pair.distance();
		distances.push_back(next_dist);

		double next_angle = normal_pair.angle();
		angles.push_back(next_angle);

		if(isnan(next_angle))
			cout << "bad angle: " << normal_pair.normal1 << " : " << normal_pair.normal2 << endl;

		mean_m2i_dist += next_dist;
		mean_dist += next_dist;
		mean_m2i_angle += next_angle;
		mean_angle += next_angle;
	}

	double mean_i2m_dist = 0, mean_i2m_angle = 0;
	int num_i2m = 0, num_i2m_nan = 0;
	for(int i = 0; i < i2m_map.num_correspondences(); i++)  {
		PointPair point_pair = i2m_map.getPointCorrespondence(i);
		NormalPair normal_pair = i2m_map.getNormalCorrespondence(i);
		if(point_pair.has_nan() || normal_pair.has_nan())  {
			num_i2m_nan++;
			continue;
		}
		num_i2m++;

		double next_dist = point_pair.distance();
		distances.push_back(next_dist);

		double next_angle = normal_pair.angle();
		angles.push_back(next_angle);

		if(isnan(next_angle))
			cout << "bad angle: " << normal_pair.normal1 << " : " << normal_pair.normal2 << endl;

		mean_i2m_dist += next_dist;
		mean_dist += next_dist;
		mean_i2m_angle += next_angle;
		mean_angle += next_angle;
	}

	std::sort(distances.begin(),distances.end());
	std::sort(angles.begin(),angles.end());

	mean_m2i_dist /= ((double)num_m2i);
	mean_m2i_angle /= ((double)num_m2i);
	mean_i2m_dist /= ((double)num_i2m);
	mean_i2m_angle /= ((double)num_i2m);
	mean_dist /= (double(num_m2i+num_i2m));
	mean_angle /= (double(num_m2i+num_i2m));

	cout << "m2i nan: " << num_m2i_nan << " i2m nan: " << num_i2m_nan << endl;
	if((num_m2i+num_i2m) != distances.size() || (num_m2i+num_i2m) != angles.size())
		cout << "ERROR: sizes don't match up!" << endl;

	double min_dist, lq_dist, median_dist, uq_dist, max_dist;
	quartiles(&distances, min_dist, lq_dist, median_dist, uq_dist, max_dist);
	double min_angle, lq_angle, median_angle, uq_angle, max_angle;
	quartiles(&angles, min_angle, lq_angle, median_angle, uq_angle, max_angle);

	FILE* dist_file = fopen(plot_filename.c_str(), "wb");
	double dist_distribution[] = { min_dist, lq_dist, median_dist, uq_dist, max_dist };
	fwrite(dist_distribution, sizeof(double), 5, dist_file);
	fwrite(&mean_m2i_dist, sizeof(double), 1, dist_file);
	fwrite(&mean_i2m_dist, sizeof(double), 1, dist_file);
	fwrite(&mean_dist, sizeof(double), 1, dist_file);

	double angle_distribution[] = { min_angle, lq_angle, median_angle, uq_angle, max_angle };
	fwrite(angle_distribution, sizeof(double), 5, dist_file);
	fwrite(&mean_m2i_angle, sizeof(double), 1, dist_file);
	fwrite(&mean_i2m_angle, sizeof(double), 1, dist_file);
	fwrite(&mean_angle, sizeof(double), 1, dist_file);

	fclose(dist_file);
}
/* Function for calculating the probability density, BY SOME
 * METRIC THAT'S STILL UNDER DEVELOPMENT.
 *
 * Approximated by drawing samples from pre-computed random
 * sequences at each point and comparing to the native sequence.
 */
float EnergyFuncProposal::calcProbDensity(){
	unsigned int i, j, middle;
	float nativeEng = 0.0; 
	float density, dispersal, randomEng;
	vector<float> parameters;
	vector<float> randomEngs;
	vector<float> decoyEngs( decoyTermVals.size(), 0.0);
	vector<float> zeroes( decoyTermVals.size(), 0.0);
	vector<float> quarts;

	/* Convert current parameter set to floating point representation.
	 */
	for(i = 0; i < params.size(); i++){
		parameters.push_back( params[i]->toFloat() );
	}

	/* Calculate the native sequence energy under these
	 * parameters.
	 */
	for(i = 0; i < parameters.size(); i++){
		nativeEng += parameters[i]*natSeqTermVals[i];
	}

	/* And the respective decoy energies.
	 */
	for(i = 0; i < decoyTermVals.size(); i++){
		for(j = 0; j < parameters.size(); j++){
			decoyEngs[i] += parameters[i]*decoyTermVals[i][j];
		}
	}

	/* Calculate the scaled version of the energy function for
	 * all random sequences. Note that random energy values are
	 * organized by column (one for each term in the function),
	 * not by row (one for each sequence).
	 */
	for(i = 0; i < randSeqTermVals[0].size(); i++){
		
		randomEng = 0.0;

		/* Get the scaled energy value for this random sequence.
		 */
		for(j = 0; j < parameters.size(); j++){
			randomEng += parameters[j]*randSeqTermVals[j][i];
		}

		randomEngs.push_back(randomEng);
	}

	/* Calculate the quartiles of the distribution of random
	 * energies.
	 */
	quarts = quartiles(randomEngs);
	dispersal = quarts[2] - quarts[1];

	/* Calculate the enegy gap to the nearest explicit decoy
	 * conformation, or the energy gap to the middle of the distribution
	 * of random conformation.
	 */
	if(decoyEngs != zeroes){

		sort(decoyEngs.begin(), decoyEngs.end());

		/* Case 1: Best decoy is between native and random states,
		 * measure against it.
		 */
		if(randomEngs[0] > nativeEng && decoyEngs[0] > nativeEng && decoyEngs[0] < randomEngs[0]){
			/* DEBUG
			 *
			cerr << "A decoy is the closest misfold." << endl;
			*/

			density = (decoyEngs[0] - nativeEng) / dispersal;
		}
		/* Case 2: Best decoy is worse than best random state: measure 
		 * against random.
		 */
		else if(randomEngs[0] > nativeEng && decoyEngs[0] > randomEngs[0]){

			/* DEBUG
			 *
			cerr << "A random sequence is the closest misfold." << endl;
			*/

			density = (randomEngs[0] - nativeEng) / dispersal;
		}
		/* Either the best decoy or best random is better than native:
		 * not cool, sign should switch to negative to get out of this
		 * situation.
		 */
		else if(decoyEngs[0] < nativeEng || randomEngs[0] < nativeEng){

			/* DEBUG
			 *
			cerr << "DANGER: native not the most stable state, ";
			*/
			
			if(decoyEngs[0] < randomEngs[0]){

				/* DEBUG
				 *
				cout << "a decoy is!" << endl;
				*/

				density = (decoyEngs[0] - nativeEng) / dispersal;
			}
			else{
				/* DEBUG
				 *
				cout << "a random sequence is!" << endl;
				*/

				density = (randomEngs[0] - nativeEng) / dispersal;
			}
		}
	}
	else{
		density = -1.0*( (nativeEng - quarts[2]) / (quarts[2]-quarts[1]) );
	}

	return density;
}