Ejemplo n.º 1
0
/**
 *
 *  Main program:
 *   This program reads the following parameters from the console and
 *   then computes the optical flow:
 *   I_1 		  Previous image to I0
 *   I0          first image
 *   I1          second image
 *   I0_Smoothed Image for using with function g
 *   out         name of the output flow field
 *   outOcc      name of the output occlusion map
 *   nprocs      number of threads to use (OpenMP library)
 *   tauEta      Time step in the primal-dual scheme for eta variable
 *   tauChi      Time step in the primal-dual scheme for chi variable
 *   lambda      Data term weight parameter
 *   alpha       Length term weight parameter (in the occlusion region)
 *   beta		  Negative divergence data Term
 *   theta       tightness parameter
 *   nscales     number of scales in the pyramidal structure
 *   zfactor     downsampling factor for creating the scales
 *   nwarps      number of warps per scales
 *   epsilon     stopping criterion threshold for the iterative process
 *   verbose     switch on/off messages
 *
 */
int main(int argc, char *argv[])
{
	if (argc < 3) {
		fprintf(stderr, "Usage: %s I_1 I0 I1 [I0_Smoothed out "
				//              0   1   2  3      4        5
				"outOcc nproc lambda alpha beta theta nscales zfactor nwarps epsilon "
				// 6      7      8   9    10    11        12     13     14     15
				"verbose  ]\n", *argv);
				// 16
		return EXIT_FAILURE;
	}
	// Variable Declaration
	double *u1=NULL, *u2=NULL; //field
	double *chi=NULL;	 	   //Occlussion map

	double *I_1=NULL, *I0=NULL, *I1=NULL;  // Previous (I_1), current (I0) and next image (I1)
	double *filtI0=NULL;                   //Filtered image used in function g

	int nx_1, ny_1, nx, ny, nx1, ny1, nxf, nyf; //Image sizes

	//read the parameters
	int i = 1;
	char* image_1_name = argv[i]; i++; //1
	char* image1_name  = argv[i]; i++; //2
	char* image2_name  = argv[i]; i++; //3
	char* image1_Smooth_name = (argc>i) ? argv[i] : argv[2]; i++; //4 If there is no I0_Smoothed, then it will be I0
	const char* outfile = (argc>i)? argv[i]: PAR_DEFAULT_OUTFLOW;       i++; //5
	const char* outOccFile = (argc>i)? argv[i]: PAR_DEFAULT_OUT_OCC;       i++; //6
	int   nproc   = (argc>i)? atoi(argv[i]): PAR_DEFAULT_NPROC;   i++; //7
	double lambda  = (argc>i)? atof(argv[i]): PAR_DEFAULT_LAMBDA;  i++; //8
	double alpha   = (argc>i)? atof(argv[i]): PAR_DEFAULT_ALPHA;   i++; //9
	double betaW   = (argc>i)? atof(argv[i]): PAR_DEFAULT_BETA;    i++; //10
	double theta   = (argc>i)? atof(argv[i]): PAR_DEFAULT_THETA;   i++; //11
	int   nscales = (argc>i)? atoi(argv[i]): PAR_DEFAULT_NSCALES; i++; //12
	double zfactor = (argc>i)? atof(argv[i]): PAR_DEFAULT_ZFACTOR; i++; //13
	int   nwarps  = (argc>i)? atoi(argv[i]): PAR_DEFAULT_NWARPS;  i++; //14
	double epsilon = (argc>i)? atof(argv[i]): PAR_DEFAULT_EPSILON; i++; //15
	int   verbose = (argc>i)? atoi(argv[i]): PAR_DEFAULT_VERBOSE; i++; //16

	//check parameters
	if (nproc < 0) {
		nproc = PAR_DEFAULT_NPROC;
		fprintf(stderr, "warning: "
				"nproc changed to %d\n", nproc);
	}
	if (lambda <= 0) {
		lambda = PAR_DEFAULT_LAMBDA;
		fprintf(stderr, "warning: "
				"lambda changed to %g\n", lambda);
	}
	if (alpha <= 0) {
		alpha = PAR_DEFAULT_ALPHA;
		fprintf(stderr, "warning: "
				"alpha changed to %g\n", alpha);
	}
	if (betaW <= 0) {
		betaW = PAR_DEFAULT_BETA;
		fprintf(stderr, "warning: "
				"beta changed to %g\n", betaW);
	}
	if (theta <= 0) {
		theta = PAR_DEFAULT_THETA;
		if (verbose) fprintf(stderr, "warning: "
				"theta changed to %g\n", theta);
	}
	if (nscales <= 0) {
		nscales = PAR_DEFAULT_NSCALES;
		fprintf(stderr, "warning: "
				"nscales changed to %d\n", nscales);
	}
	if (zfactor <= 0 || zfactor >= 1) {
		zfactor = PAR_DEFAULT_ZFACTOR;
		fprintf(stderr, "warning: "
				"zfactor changed to %g\n", zfactor);
	}
	if (nwarps <= 0) {
		nwarps = PAR_DEFAULT_NWARPS;
		fprintf(stderr, "warning: "
				"nwarps changed to %d\n", nwarps);
	}
	if (epsilon <= 0) {
		epsilon = PAR_DEFAULT_EPSILON;
		fprintf(stderr, "warning: "
				"epsilon changed to %f\n", epsilon);
	}


#ifdef _OPENMP
	if (nproc > 0)
		omp_set_num_threads(nproc);
#endif//DISABLE_OMP

	// read the input images
	I_1    = read_image(image_1_name, &nx_1, &ny_1);
	I0     = read_image(image1_name, &nx, &ny);
	I1     = read_image(image2_name, &nx1, &ny1);
	filtI0 = read_image(image1_Smooth_name, &nxf, &nyf);

	if(nx==nx_1 && nx==nx1 && nx==nxf &&
			ny==ny_1 && ny==ny1 && ny==nyf)
	{
		//Set the number of scales according to the size of the
		//images.  The value N is computed to assure that the smaller
		//images of the pyramid don't have a size smaller than 16x16

		const int N = floor(log((float)MIN(nx, ny)/16.0)/log(1./zfactor)) + 1;

		if (N < nscales)
			nscales = N;

		if (verbose)
			fprintf(stderr,
					" nproc=%d   \n lambda=%f \n alpha=%f \n"
					" beta=%f \n theta=%f \n nscales=%d \n zfactor=%f\n nwarps=%d \n epsilon=%g\n",
					nproc, lambda, alpha, betaW, theta, nscales,
					zfactor, nwarps, epsilon);

		//allocate memory for the flow
		u1  = (double *)xmalloc( nx * ny * sizeof(double));
		u2  = (double *)xmalloc( nx * ny * sizeof(double));

		//and the occlusion map
		chi = (double *)xmalloc( nx * ny * sizeof(double));


		for(int i=0; i<nx*ny; i++)
		{
			chi[i] = 0.0;
			u1[i]  = 0.0;
			u2[i]  = 0.0;
		}


		//compute the optical flow
		Dual_TVL1_optic_flow_multiscale(
				I_1, I0, I1, filtI0, u1, u2, chi, nx, ny, lambda, alpha, betaW,  theta,
				nscales, zfactor, nwarps, epsilon, verbose);

		//write_flow(u1, u2, nx, ny); //<----Eliminar en la version fina a entregar. Solo esta para propositos de depuraci—n.

		//save the optical flow

		float *f = (float *)malloc(sizeof(float) * nx * ny * 2);
		for (int i = 0; i < nx * ny; i++)
		{
			f[2*i] = (float)u1[i];   //Avoid the cast!
			f[2*i+1] = (float)u2[i]; //Avoid the cast!
		}
		iio_save_image_float_vec((char *)outfile, f, nx, ny, 2);

		free(f);

		//save the occlusions
/*		int iv=0;
		FILE * fid=fopen(outOccFile, "w");
		for (int i=0; i<ny; i++)
		{
			for (int j=0; j<nx; j++)
			{
				fprintf(fid, " %f", chi[iv]);
				iv++;
			}
			fprintf(fid, " \n");
		}
		fclose(fid);*/
		//iio_save_image_double((char *)outOccFile, chi, nx, ny);

		float *fOcc = (float *)malloc(sizeof(float) * nx * ny );
		for (int i = 0; i < nx * ny; i++)
		{
			fOcc[i] = (float)chi[i]*255;   //Avoid the cast!
		}
		iio_save_image_float((char *)outOccFile, fOcc, nx, ny);

		free(fOcc);
	}
	//delete allocated memory
	free(I0);
	free(I1);
	free(u1);
	free(u2);
	free(filtI0);
	free(chi);

	return EXIT_SUCCESS;
}
// METHOD THAT RECEIVES POINT CLOUDS (OPEN MP)
std::vector<cluster> poseEstimationSV::poseEstimationCore_openmp(pcl::PointCloud<pcl::PointXYZ>::ConstPtr cloud)
{
Tic();
	std::vector <std::vector < pose > > bestPosesAux;
	bestPosesAux.resize(omp_get_num_procs());

	//int bestPoseAlpha;
	//int bestPosePoint;
	//int bestPoseVotes;
	
	Eigen::Vector3f scenePoint;
	Eigen::Vector3f sceneNormal;


	pcl::PointIndices normals_nan_indices;
	pcl::ExtractIndices<pcl::PointNormal> nan_extract;

	float alpha;
	unsigned int alphaBin,index;
	// Iterators
	//unsigned int sr; // scene reference point
	pcl::PointCloud<pcl::PointNormal>::iterator si;	// scene paired point
	std::vector<pointPairSV>::iterator sameFeatureIt; // same key on hash table
	std::vector<boost::shared_ptr<pose> >::iterator bestPosesIt;

	Eigen::Vector4f feature;
	Eigen::Vector3f _pointTwoTransformed;
	std::cout<< "\tCloud size: " << cloud->size() << endl;
	//////////////////////////////////////////////
	// Downsample point cloud using a voxelgrid //
	//////////////////////////////////////////////

	pcl::PointCloud<pcl::PointXYZ>::Ptr cloudDownsampled(new pcl::PointCloud<pcl::PointXYZ> ());
  	// Create the filtering object
  	pcl::VoxelGrid<pcl::PointXYZ> sor;
  	sor.setInputCloud (cloud);
  	sor.setLeafSize (model->distanceStep,model->distanceStep,model->distanceStep);
  	sor.filter (*cloudDownsampled);
	std::cout<< "\tCloud size after downsampling: " << cloudDownsampled->size() << endl;

	// Compute point cloud normals (using cloud before downsampling information)
	std::cout<< "\tCompute normals... ";
	cloudNormals=model->computeSceneNormals(cloudDownsampled);
	std::cout<< "Done" << endl;

	/*boost::shared_ptr<pcl_visualization::PCLVisualizer> viewer2 = objectModel::viewportsVis(cloudFilteredNormals);

  	while (!viewer2->wasStopped ())
  	{
   		viewer2->spinOnce (100);
    		boost::this_thread::sleep (boost::posix_time::microseconds (100000));
  	}*/

	/*boost::shared_ptr<pcl_visualization::PCLVisualizer> viewer2 = objectModel::viewportsVis(model->modelCloud);

  	while (!viewer2->wasStopped ())
  	{
   		viewer2->spinOnce (100);
    		boost::this_thread::sleep (boost::posix_time::microseconds (100000));
  	}*/
	//////////////////////////////////////////////////////////////////////////////
	// Filter again to remove spurious normals nans (and it's associated point) //
	////////////////////////////////////////////////fa//////////////////////////////

	for (unsigned int i = 0; i < cloudNormals->points.size(); ++i) 
	{
		if (isnan(cloudNormals->points[i].normal[0]) || isnan(cloudNormals->points[i].normal[1]) || isnan(cloudNormals->points[i].normal[2]))
		{
	   		normals_nan_indices.indices.push_back(i);
		}
	}

	nan_extract.setInputCloud(cloudNormals);
	nan_extract.setIndices(boost::make_shared<pcl::PointIndices> (normals_nan_indices));
	nan_extract.setNegative(true);
	nan_extract.filter(*cloudWithNormalsDownSampled);
	std::cout<< "\tCloud size after removing NaN normals: " << cloudWithNormalsDownSampled->size() << endl;


	/////////////////////////////////////////////
	// Extract reference points from the scene //
	/////////////////////////////////////////////

	//pcl::RandomSample< pcl::PointCloud<pcl::PointNormal> > randomSampler;
	//randomSampler.setInputCloud(cloudWithNormalsDownSampled);
	// Create the filtering object
	int numberOfPoints=(int) (cloudWithNormalsDownSampled->size () )*referencePointsPercentage;
	int totalPoints=(int) (cloudWithNormalsDownSampled->size ());
	std::cout << "\tUniform sample a set of " << numberOfPoints << "(" << referencePointsPercentage*100 <<  "%)... ";
	referencePointsIndices->indices.clear();
	extractReferencePointsUniform(referencePointsPercentage,totalPoints);
	std::cout << "Done" << std::endl;
	//std::cout << referencePointsIndices->indices.size() << std::endl;

	//////////////
	// Votation //
	//////////////

	std::cout<< "\tVotation... ";

	omp_set_num_threads(omp_get_num_procs());
	//omp_set_num_threads(1);
	//int iteration=0;

        bestPoses.clear();
	#pragma omp parallel for private(alpha,alphaBin,alphaScene,sameFeatureIt,index,feature,si,_pointTwoTransformed) //reduction(+:iteration)  //nowait
	for(unsigned int sr=0; sr < referencePointsIndices->indices.size(); ++sr)
	{
	
		//++iteration;
		//std::cout << "iteration: " << iteration << " thread:" << omp_get_thread_num() << std::endl;
		//printf("Hello from thread %d, nthreads %d\n", omp_get_thread_num(), omp_get_num_threads());
		scenePoint=cloudWithNormalsDownSampled->points[referencePointsIndices->indices[sr]].getVector3fMap();
		sceneNormal=cloudWithNormalsDownSampled->points[referencePointsIndices->indices[sr]].getNormalVector3fMap();

		// Get transformation from scene frame to global frame
		Eigen::Vector3f cross=sceneNormal.cross (Eigen::Vector3f::UnitX ()). normalized();

		Eigen::Affine3f rotationSceneToGlobal;
		if(isnan(cross[0]))
		{
			rotationSceneToGlobal=Eigen::AngleAxisf(0.0,Eigen::Vector3f::UnitX ());
		}
		else
			rotationSceneToGlobal=Eigen::AngleAxisf(acosf (sceneNormal.dot (Eigen::Vector3f::UnitX ())),cross);

		Eigen::Affine3f transformSceneToGlobal = Eigen::Translation3f ( rotationSceneToGlobal* ((-1)*scenePoint)) * rotationSceneToGlobal;

		//////////////////////
		// Choose best pose //
		//////////////////////

		// Reset pose accumulator
		for(std::vector<std::vector<int> >::iterator accumulatorIt=accumulatorParallelAux[omp_get_thread_num()].begin();accumulatorIt < accumulatorParallelAux[omp_get_thread_num()].end(); ++accumulatorIt)
		{
			std::fill(accumulatorIt->begin(),accumulatorIt->end(),0); 
		}
		

		//std::cout << std::endl;
		for(si=cloudWithNormalsDownSampled->begin(); si < cloudWithNormalsDownSampled->end();++si)
		{
			// if same point, skip point pair
			if( (cloudWithNormalsDownSampled->points[referencePointsIndices->indices[sr]].x==si->x) && (cloudWithNormalsDownSampled->points[referencePointsIndices->indices[sr]].y==si->y) && (cloudWithNormalsDownSampled->points[referencePointsIndices->indices[sr]].z==si->z))
			{
				//std::cout << si->x << " " << si->y << " " << si->z << std::endl;
				continue;
			}	

			// Compute PPF
			pointPairSV PPF=pointPairSV(cloudWithNormalsDownSampled->points[sr],*si, transformSceneToGlobal);

			// Compute index
			index=PPF.getHash(*si,model->distanceStepInverted);

			// If distance between point pairs is bigger than the maximum for this model, skip point pair
			if(index>pointPairSV::maxHash)
			{
				//std::cout << "DEBUG" << std::endl;
				continue;
			}

			// If there is no similar point pair features in the model, skip point pair and avoid computing the alpha
			if(model->hashTable[index].size()==0)
				continue; 

			for(sameFeatureIt=model->hashTable[index].begin(); sameFeatureIt<model->hashTable[index].end(); ++sameFeatureIt)
			{
				// Vote on the reference point and angle (and object)
				alpha=sameFeatureIt->alpha-PPF.alpha; // alpha values between [-360,360]

				// alpha values should be between [-180,180] ANGLE_MAX = 2*PI
				if(alpha<(-PI))
					alpha=ANGLE_MAX+alpha;
				else if(alpha>(PI))
					alpha=alpha-ANGLE_MAX;
				//std::cout << "alpha after: " << alpha*RAD_TO_DEG << std::endl;
				//std::cout << "alpha after2: " << (alpha+PI)*RAD_TO_DEG << std::endl;
				alphaBin=static_cast<unsigned int> ( round((alpha+PI)*pointPair::angleStepInverted) ); // division is slower than multiplication
				//std::cout << "angle1: " << alphaBin << std::endl;
           			/*alphaBin = static_cast<unsigned int> (floor (alpha) + floor (PI *poseAngleStepInverted));
				std::cout << "angle2: " << alphaBin << std::endl;*/
				//alphaBin=static_cast<unsigned int> ( floor(alpha*poseAngleStepInverted) + floor(PI*poseAngleStepInverted) );
				if(alphaBin>=pointPair::angleBins)
				{	
					alphaBin=0;
					//ROS_INFO("naoooo");
					//exit(1);
				}

//#pragma omp critical
//{std::cout << index <<" "<<sameFeatureIt->id << " " << alphaBin << " " << omp_get_thread_num() << " " << accumulatorParallelAux[omp_get_thread_num()][sameFeatureIt->id][alphaBin] << std::endl;}

				accumulatorParallelAux[omp_get_thread_num()][sameFeatureIt->id][alphaBin]+=sameFeatureIt->weight;
			}
		}
		//ROS_INFO("DISTANCE:%f DISTANCE SQUARED:%f", model->maxModelDist, model->maxModel

		// Choose best pose (highest peak on the accumulator[peak with more votes])

		int bestPoseAlpha=0;
		int bestPosePoint=0;
		int bestPoseVotes=0;

		for(size_t p=0; p < model->modelCloud->size(); ++p)
		{
			for(unsigned int a=0; a < pointPair::angleBins; ++a)
			{
				if(accumulatorParallelAux[omp_get_thread_num()][p][a]>bestPoseVotes)
				{
					bestPoseVotes=accumulatorParallelAux[omp_get_thread_num()][p][a];
					bestPosePoint=p;
					bestPoseAlpha=a;
				}
			}
		}

		// A candidate pose was found
		if(bestPoseVotes!=0)
		{
			// Compute and store transformation from model to scene
			//boost::shared_ptr<pose> bestPose(new pose( bestPoseVotes,model->modelToScene(model->modelCloud->points[bestPosePoint],transformSceneToGlobal,static_cast<float>(bestPoseAlpha)*pointPair::angleStep-PI) ));

			bestPosesAux[omp_get_thread_num()].push_back(pose( bestPoseVotes,model->modelToScene(bestPosePoint,transformSceneToGlobal,static_cast<float>(bestPoseAlpha)*pointPair::angleStep-PI) ));
			//bestPoses.push_back(bestPose);

			//std::cout << bestPosesAux[omp_get_thread_num()].size() <<" " <<omp_get_thread_num()<< std::endl;
		}
		else 
		{
			continue;
		}

		// Choose poses whose votes are a percentage above a given threshold of the best pose
		accumulatorParallelAux[omp_get_thread_num()][bestPosePoint][bestPoseAlpha]=0; 	// This is more efficient than having an if condition to verify if we are considering the best pose again
		for(size_t p=0; p < model->modelCloud->size(); ++p)
		{
			for(unsigned int a=0; a < pointPair::angleBins; ++a)
			{
				if(accumulatorParallelAux[omp_get_thread_num()][p][a]>=accumulatorPeakThreshold*bestPoseVotes)
				{
					// Compute and store transformation from model to scene
					//boost::shared_ptr<pose> bestPose(new pose( accumulatorParallelAux[omp_get_thread_num()][p][a],model->modelToScene(model->modelCloud->points[p],transformSceneToGlobal,static_cast<float>(a)*pointPair::angleStep-PI ) ));


					//bestPoses.push_back(bestPose);
					bestPosesAux[omp_get_thread_num()].push_back(pose( bestPoseVotes,model->modelToScene(bestPosePoint,transformSceneToGlobal,static_cast<float>(bestPoseAlpha)*pointPair::angleStep-PI) ));
					//std::cout << bestPosesAux[omp_get_thread_num()].size() <<" " <<omp_get_thread_num()<< std::endl;
				}
			}
		}
	}

	std::cout << "Done" << std::endl;


	for(int i=0; i<omp_get_num_procs(); ++i)
	{
		for(unsigned int j=0; j<bestPosesAux[i].size(); ++j)
			bestPoses.push_back(bestPosesAux[i][j]);
	}
	std::cout << "\thypothesis number: " << bestPoses.size() << std::endl << std::endl;

	if(bestPoses.size()==0)
	{
		clusters.clear();
		return clusters;
	}

	
	//////////////////////
	// Compute clusters //
	//////////////////////
Tac();
	std::cout << "\tCompute clusters... ";
Tic();
	clusters=poseClustering(bestPoses);
Tac();
	std::cout << "Done" << std::endl;

	return clusters;
}
void parallel_lu(int argc, char **argv, double **matrix, int dim, int block_dim, int rank2print, int doSerial, int numThreads) {
	omp_set_num_threads(numThreads);
  int procs;
	int rank;
	MPI_Comm_size(MPI_COMM_WORLD, &procs);
	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
	MPI_Status status;
	MPI_Request request;

	int num_rows = sqrt(procs);
	int num_cols = sqrt(procs);

	int dimSize[2] = {num_rows, num_cols};
	int periodic[2] = {0, 0};
	int myCoords[2];

	MPI_Comm comm2D;
	MPI_Cart_create(MPI_COMM_WORLD, 2, dimSize, periodic, 0, &comm2D);

	int myRow, myCol;
	MPI_Cart_coords(comm2D, rank, 2, myCoords);
	myRow = myCoords[0]; 
	myCol = myCoords[1];

	//Determine the neighbour rank numbers
	int rightRank;
	int leftRank = rank;
	int botRank;
	int topRank = rank;

	MPI_Cart_shift(comm2D, 1, 1, &leftRank, &rightRank);
	MPI_Cart_shift(comm2D, 0, 1, &topRank, &botRank);

	double **L = create_zero_matrix(dim);
	double *LBuffSend = (double*) malloc (block_dim * sizeof(double));
	double *LBuffRecv = (double*) malloc (block_dim * sizeof(double));
	double *PBuffSend = (double*) malloc (block_dim * sizeof(double));
	double *PBuffRecv = (double*) malloc (block_dim * sizeof(double));

	int i,j,k;
	// initialize buffers
	for (i=0;i<block_dim;i++) {
		LBuffSend[i] = LBuffRecv[i] = PBuffSend[i] = PBuffRecv[i] = 0;
	} 

	// initialize L diag
	for (i=0;i<dim;i++) {
		L[i][i] = 1.0;
	}

	int proc_per_row = dim/block_dim; 
	int col_start = (rank*block_dim) % dim;
	int col_end = col_start+block_dim-1;
	int row_start = (rank/proc_per_row)*block_dim;
	int row_end = row_start+block_dim-1;

	if(rank==rank2print) {
		printf("Rank %i\n", rank);
		printf("myRow of proc:%i\n", myRow);
		printf("myCol of proc:%i\n", myCol);
		printf("Right rank is: %i\n",rightRank);
		printf("Left rank is: %i\n",leftRank);
		printf("Top rank is: %i\n",topRank);
		printf("Bottom rank is: %i\n",botRank);
		printf("Col start %i\n", col_start);
		printf("Col end %i\n", col_end);
		printf("Row start %i\n", row_start);
		printf("Row end %i\n", row_end);
		//print_matrix(dim,matrix);
	}

	//Main computation loop
	for(k=0;k<dim;k++) {
		bool kInMyRows = k >= row_start && k <= row_end;
		bool kInTopRows = k <= row_end-block_dim;
		bool kInBotRows = k >= row_start+block_dim;
		
		bool kInMyCols = k>=col_start && k<=col_end;
		bool kInLeftCols = k <= col_end-block_dim;
		bool kInRightCols = k >= col_start+block_dim;

		//Send & recieve pivot row
		//Recieve PBuffRec from top
		if(topRank >= 0 && kInTopRows && !kInRightCols) {
			MPI_Recv(PBuffRecv, block_dim, MPI_DOUBLE, topRank, 0, MPI_COMM_WORLD, &status);
			if(rank==rank2print) {
				printf("Received pivot row from rank %i for k = %i: ",topRank,k);
				print_vector(block_dim,PBuffRecv);
			}
			//Place PBuffRecv in correct place of matrix
			for(j=col_start;j<=col_end;j++) {
				if(j>=k) {
					matrix[k][j] = PBuffRecv[j-col_start];
				}
			}
		}
		//send PBuffSend to bottom 
		if(botRank >= 0 && !kInRightCols) {
			if(kInMyRows) { //pivot row is generated from this process
				//Assemble PBuffSend
				for(j=col_start;j<=col_end;j++) {
					if(j>=k) {
						PBuffSend[j-col_start] = matrix[k][j];
					}
				}
				if(rank==rank2print) {
					printf("Sending pivot row to rank %i for k = %i (Creating): ",botRank,k);
					print_vector(block_dim,PBuffSend);
				}
			}
			else if(kInTopRows) { //pivot row is generated in a top process; just pass the recieved one along
				//Assemble PBuffSend
				for(j=col_start;j<=col_end;j++) {
					if(j>=k) {
						PBuffSend[j-col_start] = PBuffRecv[j-col_start];
					}
				}
				if(rank==rank2print) {
					printf("Sending pivot row to rank %i for k = %i (Passing): ",botRank,k);
					print_vector(block_dim,PBuffSend);
				}
			}
			MPI_Isend(PBuffSend, block_dim, MPI_DOUBLE, botRank, 0, MPI_COMM_WORLD, &request);
		} 

		//Calculate ratios
		
		if(kInMyCols) {
			for(i=row_start;i<=row_end;i++) {
				if (i>k) {
					L[i][k] = matrix[i][k]/matrix[k][k];
				}
			}
		}

		//Wait for PBuffSend to be usable
		if(botRank >= 0 && kInMyRows)
			MPI_Wait(&request, &status);

		if(rank==rank2print) {
			printf("L:\n");
			print_matrix_chunk(block_dim,row_start,col_start,L);
		}

		//Send & recieve ratios
		//Recieve LBuffRec from left
		if(leftRank >= 0 && kInLeftCols && !kInBotRows) {
			MPI_Recv(LBuffRecv, block_dim, MPI_DOUBLE, leftRank, 0, MPI_COMM_WORLD, &status);
			if(rank==rank2print) {
				printf("Recieved L from rank %i: ",leftRank);
				print_vector(block_dim,LBuffRecv);
			}
			//Place LBuffRecv in correct place of L[i][k]
			for(i=row_start;i<=row_end;i++) {
				if(i>k) {
					L[i][k] = LBuffRecv[i-row_start];
				}
			}
		}
		//send LBuffSend to right
		if(rightRank >= 0 && !kInBotRows) { 
			if(kInMyCols) {  //ratio is generated from this process
				//Assemble LBuffSend
				for(i=row_start;i<=row_end;i++) {
					if(i>k) {
						LBuffSend[i-row_start] = L[i][k];
					}
				}
				if(rank==rank2print) {
					printf("Sending L to rank %i for k = %i: (Creating)",rightRank,k);
					print_vector(block_dim,LBuffSend);
				}
			}
			else if(kInLeftCols) { //ratio is generated in a left process; just pass the recieved one along
				//Assemble LBuffSend
				for(i=row_start;i<=row_end;i++) {
					if(i>k) {
						LBuffSend[i-row_start] = LBuffRecv[i-row_start];
					}
				}
				if(rank==rank2print) {
					printf("Sending L to rank %i for k = %i (Passing): ",rightRank,k);
					print_vector(block_dim,LBuffSend);
				}
			}
			MPI_Isend(LBuffSend, block_dim, MPI_DOUBLE, rightRank, 0, MPI_COMM_WORLD, &request);
		}

		//Compute upper triangular matrix
    #pragma omp parallel for private(j,i) firstprivate(k,col_start,col_end) 
		for (j=col_start;j<=col_end;j++) {
			if (j>=k) {
				for (i=row_start;i<=row_end;i++) {
					if (i>k) {
						matrix[i][j] = matrix[i][j]-L[i][k]*matrix[k][j];
					}
				}
			}
		}

		//Wait for LBuffSend to be usable
		if(rightRank >= 0 && kInMyCols)
			MPI_Wait(&request, &status);

		if(rank==rank2print) {
			printf("U:\n");
			print_matrix_chunk(block_dim,row_start,col_start,matrix);
		}

	}

	/*
	double **L_chunk = create_zero_matrix(block_dim);
	double **U_chunk = create_zero_matrix(block_dim);
	// copy chunk data
	int r = 0;
	for(i=row_start;i<=row_end;i++) {
	int c = 0;
	for(j=col_start;j<=col_end;j++) {
	L_chunk[r][c] = L[i][j];
	U_chunk[r][c] = matrix[i][j];  
	c++;
	}
	r++;
	}*/

	if(rank2print == -1) {
		printf("Rank %i\n",rank);
		printf("L\n"); 
		print_matrix_chunk(block_dim,row_start,col_start,L);
		//print_matrix(block_dim,L_chunk);
		printf("U\n"); 
		print_matrix_chunk(block_dim,row_start,col_start,matrix);
		//print_matrix(block_dim,U_chunk);
	}

	/*if(rank != 0) {
	// send L and U chunks to process 0
	MPI_Isend(L_chunk,block_dim*block_dim,MPI_DOUBLE,0,rank*,MPI_COMM_WORLD,&request);
	} else {
	// receive L and U chunks from all processes
	}*/

	free_matrix(dim,L);
	free_matrix(dim,matrix);
}
Ejemplo n.º 4
0
int dt_init(int argc, char *argv[], const int init_gui)
{
  // make everything go a lot faster.
  _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
#if !defined __APPLE__ && !defined __WIN32__
  _dt_sigsegv_old_handler = signal(SIGSEGV,&_dt_sigsegv_handler);
#endif

#ifndef __SSE2__
  fprintf(stderr, "[dt_init] unfortunately we depend on SSE2 instructions at this time.\n");
  fprintf(stderr, "[dt_init] please contribute a backport patch (or buy a newer processor).\n");
  return 1;
#endif

#ifdef M_MMAP_THRESHOLD
  mallopt(M_MMAP_THRESHOLD,128*1024) ; /* use mmap() for large allocations */
#endif

  // we have to have our share dir in XDG_DATA_DIRS,
  // otherwise GTK+ won't find our logo for the about screen (and maybe other things)
  {
    const gchar *xdg_data_dirs = g_getenv("XDG_DATA_DIRS");
    gchar *new_xdg_data_dirs = NULL;
    gboolean set_env = TRUE;
    if(xdg_data_dirs != NULL && *xdg_data_dirs != '\0')
    {
      // check if DARKTABLE_SHAREDIR is already in there
      gboolean found = FALSE;
      gchar **tokens = g_strsplit(xdg_data_dirs, ":", 0);
      // xdg_data_dirs is neither NULL nor empty => tokens != NULL
      for(char **iter = tokens; *iter != NULL; iter++)
        if(!strcmp(DARKTABLE_SHAREDIR, *iter))
        {
          found = TRUE;
          break;
        }
      g_strfreev(tokens);
      if(found)
        set_env = FALSE;
      else
        new_xdg_data_dirs = g_strjoin(":", DARKTABLE_SHAREDIR, xdg_data_dirs, NULL);
    }
    else
      new_xdg_data_dirs = g_strdup(DARKTABLE_SHAREDIR);

    if(set_env)
      g_setenv("XDG_DATA_DIRS", new_xdg_data_dirs, 1);
    g_free(new_xdg_data_dirs);
  }

  setlocale(LC_ALL, "");
  bindtextdomain (GETTEXT_PACKAGE, DARKTABLE_LOCALEDIR);
  bind_textdomain_codeset (GETTEXT_PACKAGE, "UTF-8");
  textdomain (GETTEXT_PACKAGE);


  // init all pointers to 0:
  memset(&darktable, 0, sizeof(darktable_t));

  darktable.progname = argv[0];

  // database
  gchar *dbfilename_from_command = NULL;
  char *datadir_from_command = NULL;
  char *moduledir_from_command = NULL;
  char *tmpdir_from_command = NULL;
  char *configdir_from_command = NULL;
  char *cachedir_from_command = NULL;

  darktable.num_openmp_threads = 1;
#ifdef _OPENMP
  darktable.num_openmp_threads = omp_get_num_procs();
#endif
  darktable.unmuted = 0;
  GSList *images_to_load = NULL, *config_override = NULL;
  for(int k=1; k<argc; k++)
  {
    if(argv[k][0] == '-')
    {
      if(!strcmp(argv[k], "--help"))
      {
        return usage(argv[0]);
      }
      if(!strcmp(argv[k], "-h"))
      {
        return usage(argv[0]);
      }
      else if(!strcmp(argv[k], "--version"))
      {
        printf("this is "PACKAGE_STRING"\ncopyright (c) 2009-2014 johannes hanika\n"PACKAGE_BUGREPORT"\n");
        return 1;
      }
      else if(!strcmp(argv[k], "--library"))
      {
        dbfilename_from_command = argv[++k];
      }
      else if(!strcmp(argv[k], "--datadir"))
      {
        datadir_from_command = argv[++k];
      }
      else if(!strcmp(argv[k], "--moduledir"))
      {
        moduledir_from_command = argv[++k];
      }
      else if(!strcmp(argv[k], "--tmpdir"))
      {
        tmpdir_from_command = argv[++k];
      }
      else if(!strcmp(argv[k], "--configdir"))
      {
        configdir_from_command = argv[++k];
      }
      else if(!strcmp(argv[k], "--cachedir"))
      {
        cachedir_from_command = argv[++k];
      }
      else if(!strcmp(argv[k], "--localedir"))
      {
        bindtextdomain (GETTEXT_PACKAGE, argv[++k]);
      }
      else if(argv[k][1] == 'd' && argc > k+1)
      {
        if(!strcmp(argv[k+1], "all"))             darktable.unmuted = 0xffffffff;   // enable all debug information
        else if(!strcmp(argv[k+1], "cache"))      darktable.unmuted |= DT_DEBUG_CACHE;   // enable debugging for lib/film/cache module
        else if(!strcmp(argv[k+1], "control"))    darktable.unmuted |= DT_DEBUG_CONTROL; // enable debugging for scheduler module
        else if(!strcmp(argv[k+1], "dev"))        darktable.unmuted |= DT_DEBUG_DEV; // develop module
        else if(!strcmp(argv[k+1], "fswatch"))    darktable.unmuted |= DT_DEBUG_FSWATCH; // fswatch module
        else if(!strcmp(argv[k+1], "input"))      darktable.unmuted |= DT_DEBUG_INPUT; // input devices
        else if(!strcmp(argv[k+1], "camctl"))     darktable.unmuted |= DT_DEBUG_CAMCTL; // camera control module
        else if(!strcmp(argv[k+1], "perf"))       darktable.unmuted |= DT_DEBUG_PERF; // performance measurements
        else if(!strcmp(argv[k+1], "pwstorage"))  darktable.unmuted |= DT_DEBUG_PWSTORAGE; // pwstorage module
        else if(!strcmp(argv[k+1], "opencl"))     darktable.unmuted |= DT_DEBUG_OPENCL;    // gpu accel via opencl
        else if(!strcmp(argv[k+1], "sql"))        darktable.unmuted |= DT_DEBUG_SQL; // SQLite3 queries
        else if(!strcmp(argv[k+1], "memory"))     darktable.unmuted |= DT_DEBUG_MEMORY; // some stats on mem usage now and then.
        else if(!strcmp(argv[k+1], "lighttable")) darktable.unmuted |= DT_DEBUG_LIGHTTABLE; // lighttable related stuff.
        else if(!strcmp(argv[k+1], "nan"))        darktable.unmuted |= DT_DEBUG_NAN; // check for NANs when processing the pipe.
        else if(!strcmp(argv[k+1], "masks"))      darktable.unmuted |= DT_DEBUG_MASKS; // masks related stuff.
        else if(!strcmp(argv[k+1], "lua"))        darktable.unmuted |= DT_DEBUG_LUA; // lua errors are reported on console
        else return usage(argv[0]);
        k ++;
      }
      else if(argv[k][1] == 't' && argc > k+1)
      {
        darktable.num_openmp_threads = CLAMP(atol(argv[k+1]), 1, 100);
        printf("[dt_init] using %d threads for openmp parallel sections\n", darktable.num_openmp_threads);
        k ++;
      }
      else if(!strcmp(argv[k], "--conf"))
      {
        gchar *keyval = g_strdup(argv[++k]), *c = keyval;
        while(*c != '=' && c < keyval + strlen(keyval)) c++;
        if(*c == '=' && *(c+1) != '\0')
        {
          *c++ = '\0';
          dt_conf_string_entry_t *entry = (dt_conf_string_entry_t*)g_malloc(sizeof(dt_conf_string_entry_t));
          entry->key = g_strdup(keyval);
          entry->value = g_strdup(c);
          config_override = g_slist_append(config_override, entry);
        }
        g_free(keyval);
      }
    }
#ifndef MAC_INTEGRATION
    else
    {
      images_to_load = g_slist_append(images_to_load, argv[k]);
    }
#endif
  }

  if(darktable.unmuted & DT_DEBUG_MEMORY)
  {
    fprintf(stderr, "[memory] at startup\n");
    dt_print_mem_usage();
  }

#ifdef _OPENMP
  omp_set_num_threads(darktable.num_openmp_threads);
#endif
  dt_loc_init_datadir(datadir_from_command);
  dt_loc_init_plugindir(moduledir_from_command);
  if(dt_loc_init_tmp_dir(tmpdir_from_command))
  {
    printf(_("ERROR : invalid temporary directory : %s\n"),darktable.tmpdir);
    return usage(argv[0]);
  }
  dt_loc_init_user_config_dir(configdir_from_command);
  dt_loc_init_user_cache_dir(cachedir_from_command);

#if !GLIB_CHECK_VERSION(2, 35, 0)
  g_type_init();
#endif

  // does not work, as gtk is not inited yet.
  // even if it were, it's a super bad idea to invoke gtk stuff from
  // a signal handler.
  /* check cput caps */
  // dt_check_cpu(argc,argv);

#ifdef HAVE_GEGL
  char geglpath[DT_MAX_PATH_LEN];
  char datadir[DT_MAX_PATH_LEN];
  dt_loc_get_datadir(datadir, DT_MAX_PATH_LEN);
  snprintf(geglpath, DT_MAX_PATH_LEN, "%s/gegl:/usr/lib/gegl-0.0", datadir);
  (void)setenv("GEGL_PATH", geglpath, 1);
  gegl_init(&argc, &argv);
#endif
#ifdef USE_LUA
  dt_lua_init_early(NULL);
#endif

  // thread-safe init:
  dt_exif_init();
  char datadir[DT_MAX_PATH_LEN];
  dt_loc_get_user_config_dir (datadir,DT_MAX_PATH_LEN);
  char filename[DT_MAX_PATH_LEN];
  snprintf(filename, DT_MAX_PATH_LEN, "%s/darktablerc", datadir);

  // initialize the config backend. this needs to be done first...
  darktable.conf = (dt_conf_t *)malloc(sizeof(dt_conf_t));
  memset(darktable.conf, 0, sizeof(dt_conf_t));
  dt_conf_init(darktable.conf, filename, config_override);
  g_slist_free_full(config_override, g_free);

  // set the interface language
  const gchar* lang = dt_conf_get_string("ui_last/gui_language");
  if(lang != NULL && lang[0] != '\0')
  {
    if(setlocale(LC_ALL, lang) != NULL)
      gtk_disable_setlocale();
  }

  // initialize the database
  darktable.db = dt_database_init(dbfilename_from_command);
  if(darktable.db == NULL)
  {
    printf("ERROR : cannot open database\n");
    return 1;
  }
  else if(!dt_database_get_lock_acquired(darktable.db))
  {
    // send the images to the other instance via dbus
    if(images_to_load)
    {
      GSList *p = images_to_load;

      // get a connection!
      GDBusConnection *connection = g_bus_get_sync(G_BUS_TYPE_SESSION,NULL, NULL);

      while (p != NULL)
      {
        // make the filename absolute ...
        gchar *filename = dt_make_path_absolute((gchar*)p->data);
        if(filename == NULL) continue;
        // ... and send it to the running instance of darktable
        g_dbus_connection_call_sync(connection,
                                    "org.darktable.service",
                                    "/darktable",
                                    "org.darktable.service.Remote",
                                    "Open",
                                    g_variant_new ("(s)", filename),
                                    NULL,
                                    G_DBUS_CALL_FLAGS_NONE,
                                    -1,
                                    NULL,
                                    NULL);
        p = g_slist_next(p);
        g_free(filename);
      }

      g_slist_free(images_to_load);
      g_object_unref(connection);
    }

    return 1;
  }

  // Initialize the signal system
  darktable.signals = dt_control_signal_init();

  // Initialize the filesystem watcher
  darktable.fswatch=dt_fswatch_new();

#ifdef HAVE_GPHOTO2
  // Initialize the camera control
  darktable.camctl=dt_camctl_new();
#endif

  // get max lighttable thumbnail size:
  darktable.thumbnail_width  = CLAMPS(dt_conf_get_int("plugins/lighttable/thumbnail_width"),  200, 3000);
  darktable.thumbnail_height = CLAMPS(dt_conf_get_int("plugins/lighttable/thumbnail_height"), 200, 3000);
  // and make sure it can be mip-mapped all the way from mip4 to mip0
  darktable.thumbnail_width  /= 16;
  darktable.thumbnail_width  *= 16;
  darktable.thumbnail_height /= 16;
  darktable.thumbnail_height *= 16;

  // Initialize the password storage engine
  darktable.pwstorage=dt_pwstorage_new();

  // FIXME: move there into dt_database_t
  dt_pthread_mutex_init(&(darktable.db_insert), NULL);
  dt_pthread_mutex_init(&(darktable.plugin_threadsafe), NULL);
  dt_pthread_mutex_init(&(darktable.capabilities_threadsafe), NULL);
  darktable.control = (dt_control_t *)malloc(sizeof(dt_control_t));
  memset(darktable.control, 0, sizeof(dt_control_t));
  if(init_gui)
  {
    dt_control_init(darktable.control);
  }
  else
  {
    if(dbfilename_from_command && !strcmp(dbfilename_from_command, ":memory:"))
      dt_gui_presets_init(); // init preset db schema.
    darktable.control->running = 0;
    darktable.control->accelerators = NULL;
    dt_pthread_mutex_init(&darktable.control->run_mutex, NULL);
  }

  // initialize collection query
  darktable.collection_listeners = NULL;
  darktable.collection = dt_collection_new(NULL);

  /* initialize selection */
  darktable.selection = dt_selection_new();

  /* capabilities set to NULL */
  darktable.capabilities = NULL;

#ifdef HAVE_GRAPHICSMAGICK
  /* GraphicsMagick init */
  InitializeMagick(darktable.progname);
#endif

  darktable.opencl = (dt_opencl_t *)malloc(sizeof(dt_opencl_t));
  memset(darktable.opencl, 0, sizeof(dt_opencl_t));
#ifdef HAVE_OPENCL
  dt_opencl_init(darktable.opencl, argc, argv);
#endif

  darktable.blendop = (dt_blendop_t *)malloc(sizeof(dt_blendop_t));
  memset(darktable.blendop, 0, sizeof(dt_blendop_t));
  dt_develop_blend_init(darktable.blendop);

  darktable.points = (dt_points_t *)malloc(sizeof(dt_points_t));
  memset(darktable.points, 0, sizeof(dt_points_t));
  dt_points_init(darktable.points, dt_get_num_threads());

  // must come before mipmap_cache, because that one will need to access
  // image dimensions stored in here:
  darktable.image_cache = (dt_image_cache_t *)malloc(sizeof(dt_image_cache_t));
  memset(darktable.image_cache, 0, sizeof(dt_image_cache_t));
  dt_image_cache_init(darktable.image_cache);

  darktable.mipmap_cache = (dt_mipmap_cache_t *)malloc(sizeof(dt_mipmap_cache_t));
  memset(darktable.mipmap_cache, 0, sizeof(dt_mipmap_cache_t));
  dt_mipmap_cache_init(darktable.mipmap_cache);

  // The GUI must be initialized before the views, because the init()
  // functions of the views depend on darktable.control->accels_* to register
  // their keyboard accelerators

  if(init_gui)
  {
    darktable.gui = (dt_gui_gtk_t *)malloc(sizeof(dt_gui_gtk_t));
    memset(darktable.gui,0,sizeof(dt_gui_gtk_t));
    if(dt_gui_gtk_init(darktable.gui, argc, argv)) return 1;
    dt_bauhaus_init();
  }
  else darktable.gui = NULL;

  darktable.view_manager = (dt_view_manager_t *)malloc(sizeof(dt_view_manager_t));
  memset(darktable.view_manager, 0, sizeof(dt_view_manager_t));
  dt_view_manager_init(darktable.view_manager);

  // load the darkroom mode plugins once:
  dt_iop_load_modules_so();

  if(init_gui)
  {
    darktable.lib = (dt_lib_t *)malloc(sizeof(dt_lib_t));
    memset(darktable.lib, 0, sizeof(dt_lib_t));
    dt_lib_init(darktable.lib);

    dt_control_load_config(darktable.control);
  }
  darktable.imageio = (dt_imageio_t *)malloc(sizeof(dt_imageio_t));
  memset(darktable.imageio, 0, sizeof(dt_imageio_t));
  dt_imageio_init(darktable.imageio);

  if(init_gui)
  {
    // Loading the keybindings
    char keyfile[DT_MAX_PATH_LEN];

    // First dump the default keymapping
    snprintf(keyfile, DT_MAX_PATH_LEN, "%s/keyboardrc_default", datadir);
    gtk_accel_map_save(keyfile);

    // Removing extraneous semi-colons from the default keymap
    strip_semicolons_from_keymap(keyfile);

    // Then load any modified keys if available
    snprintf(keyfile, DT_MAX_PATH_LEN, "%s/keyboardrc", datadir);
    if(g_file_test(keyfile, G_FILE_TEST_EXISTS))
      gtk_accel_map_load(keyfile);
    else
      gtk_accel_map_save(keyfile); // Save the default keymap if none is present

    // I doubt that connecting to dbus for darktable-cli makes sense
    darktable.dbus = dt_dbus_init();

    // initialize undo struct
    darktable.undo = dt_undo_init();

    // load image(s) specified on cmdline
    int id = 0;
    if(images_to_load)
    {
      // If only one image is listed, attempt to load it in darkroom
      gboolean load_in_dr = (g_slist_next(images_to_load) == NULL);
      GSList *p = images_to_load;

      while (p != NULL)
      {
        // don't put these function calls into MAX(), the macro will evaluate
        // it twice (and happily deadlock, in this particular case)
        int newid = dt_load_from_string((gchar*)p->data, load_in_dr);
        id = MAX(id, newid);
        p = g_slist_next(p);
      }

      if (!load_in_dr || id == 0)
        dt_ctl_switch_mode_to(DT_LIBRARY);

      g_slist_free(images_to_load);
    }
    else
      dt_ctl_switch_mode_to(DT_LIBRARY);
  }

  if(darktable.unmuted & DT_DEBUG_MEMORY)
  {
    fprintf(stderr, "[memory] after successful startup\n");
    dt_print_mem_usage();
  }

  dt_image_local_copy_synch();

  /* init lua last, since it's user made stuff it must be in the real environment */
#ifdef USE_LUA
  dt_lua_init(darktable.lua_state.state,init_gui);
#endif
  return 0;
}
Ejemplo n.º 5
0
int main(int argc, char * argv[]) {

    Descr qry_descr = {
        {0}
    };
    Descr tgt_descr = {
        {0}
    };
    clock_t CPU_time_begin, CPU_time_end;
    int retval, qry_done, tgt_done;
    int db_ctr, db_effective_ctr;
    int user_defined_name;
    FILE * qry_fptr = NULL, * tgt_fptr = NULL, * digest = NULL;

//    Score score;


    //int compare(Descr *descr1, Descr *descr2, Score * score);
    int compare(Descr *descr1, Descr *descr2, Score * score, Score * score_hung);
    int read_cmd_file(char *filename);

    if (argc < 3) {
        fprintf(stderr,
                "Usage: %s <db file> <qry file> [<parameter file>].\n",
                argv[0]);
        exit(1);
    }
    if (!(qry_fptr = efopen(argv[2], "r"))) return 1;
    if (!(tgt_fptr = efopen(argv[1], "r"))) return 1;

    /* set defaults: */
    set_default_options();

    /* change them with the cmd file, if the cmd file given */
    if (argc == 4) {
        if (read_cmd_file(argv[3])) return 1;
    }

    /* read in the table of integral values */
    /* the array int_table in struct_table.c */
    if (read_integral_table(options.path)) {
        fprintf(stderr, "In data file  %s.\n\n", options.path);
        exit(1);
    }
    set_up_exp_table();

    user_defined_name = options.outname[0];


    /*********************************/
    /* loop over the query database :*/
    qry_done = 0;
    retval = -1;
    db_effective_ctr = 0;
    CPU_time_begin = clock();

    while (!qry_done) {
        retval = get_next_descr(qry_fptr, &qry_descr);
        if (retval == 1) {
            continue;
        } else if (retval == -1) {
            qry_done = 1;
            continue;
        }

        /* digest file for larger scale comparisons */
        if (!digest) {
            if (!user_defined_name) {
                sprintf(options.outname, "%s.struct_out",
                        qry_descr.name);
            }

            // ************ added by Mile
            // output name in postprocessing consists of query and target name
            retval = get_next_descr(tgt_fptr, &tgt_descr);
            if (options.postprocess) {
                sprintf(options.outname, "%s_%s.struct_out", qry_descr.name, tgt_descr.name);
            }


            // ************* end by Mile

            digest = efopen(options.outname, "w");
            if (!digest) exit(1);
            if (options.print_header) {
                fprintf(digest, "%% columns: \n");
                fprintf(digest, "%% query, target: structure names\n");
                fprintf(digest, "%% geom_z:  z score for the orientational match \n");
                fprintf(digest, "%% <dL>:    average length mismatch for matched SSEs \n");
                fprintf(digest, "%% T:       total score assigned to matched SSEs \n");
                fprintf(digest, "%% frac:    T divided by the number of matched SSEs \n");
                fprintf(digest, "%% GC_rmsd: RMSD btw geometric centers of matched SSEs (before postprocessing) \n");
                fprintf(digest, "%% A:       (after postprocessing) the alignment score \n");
                fprintf(digest, "%% aln_L:   (after postprocessing) the alignment length \n\n");
                fprintf(digest, "%% %6s%6s %6s %6s  %6s %6s %6s %6s %6s %6s \n",
                        "query ", "target ", "geom_z", "<dL>", "  T  ", "frac",
                        "GC_rmsd", "rmsd  ", "A  ", "aln_L  ");
            }

        } else {
            /* otherwise write to the same old digest file */
        }

        /* loop over the database :*/


        // Added by Mile - using FOR instead of WHILE - parallelization

        int tgt_counter = 0;
        int i;
        int *retval_array;
        Descr *tgt_descr_array;
         
        rewind(tgt_fptr);
        tgt_done = 0;

        /*
         * Counting number of successful targets
         */
        while (!tgt_done) {
            retval = get_next_descr(tgt_fptr, &tgt_descr);
            if (retval == 0 || retval == 1) {
                tgt_counter++;
            } else if (retval == -1) {
                tgt_done = 1;
            }

        }
        /*
         * Initialization of a Descr array (array of targets) - easy parallelization
         */
        
        rewind(tgt_fptr);
        tgt_descr_array = (Descr *) calloc(tgt_counter, sizeof(Descr));
        if (tgt_descr_array == NULL) {
            printf("malloc return NULL!\n");
        }
        retval_array = (int *) calloc(tgt_counter, sizeof(int));    
        if (retval_array == NULL) {
            printf("malloc return NULL!\n");
        }
        
        
        /*
         * Storing targets a returning values
         */
        
        for(i = 0; i < tgt_counter; ++i) {
            retval = get_next_descr(tgt_fptr, &tgt_descr_array[i]);
            retval_array[i] = retval;
        }
        
        // Added by Mile - end


        rewind(tgt_fptr);
        //	tgt_done = 0;
        db_ctr = 0;
        db_effective_ctr = 0;
        if (!user_defined_name) CPU_time_begin = clock();
        retval = -1;

        /*
                while ( ! tgt_done) {
         */
        
        // Start of parallelization
        if (options.postprocess) omp_set_num_threads(1);
        else omp_set_num_threads(6);
        
        #pragma omp parallel // num_threads(1)
        {

            #pragma omp for
            for (i = 0; i < tgt_counter; ++i) { // Added by Mile

                
                int retval = retval_array[i];
                /*
                 * Two scores one for Smith Waterman, another for Hungarian in database search phase
                 */
                Score score;
                Score score_hung;
                Descr tgt_descr = tgt_descr_array[i];
/*
                printf("%s %d\n", tgt_descr.name, retval);
*/
                
 //               Descr qry_descr = qry_descr;
                
                
                #pragma omp atomic
                db_ctr++; // atomic
                
/*
                retval = get_next_descr(tgt_fptr, &tgt_descr);
*/
                if (retval == 1) {
                    continue;
                } else if (retval == -1) {
                    //  tgt_done = 1;
                    printf("Error!!!!\n");
                    exit(1);
                    // added by Mile
                } else {

                    /* min number of elements */
                    int helix_overlap =
                            (qry_descr.no_of_helices < tgt_descr.no_of_helices) ?
                            qry_descr.no_of_helices : tgt_descr.no_of_helices;
                    int strand_overlap =
                            (qry_descr.no_of_strands < tgt_descr.no_of_strands) ?
                            qry_descr.no_of_strands : tgt_descr.no_of_strands;
                    double fraction_assigned;
                    int query_size = qry_descr.no_of_strands + qry_descr.no_of_helices;
                    int target_size = tgt_descr.no_of_strands + tgt_descr.no_of_helices;
                    if (helix_overlap + strand_overlap >= options.min_no_SSEs) {

                        #pragma omp atomic
                        db_effective_ctr++; // atomic

                        /* here is the core of the operation: */
                        retval = compare(&tgt_descr, &qry_descr, &score, &score_hung);
                        if (retval) {
                            printf(" error comparing  db:%s   query:%s   \n",
                                    tgt_descr.name, qry_descr.name);
                            exit(retval);
                        }

 
                        /*
                         * Output score. Can be based:
                         * - only on SW alignment during the database search
                         * - only on Hungarian algorithm during the database search
                         * - on combination depending on the postprocessing score
                         */  
                                
                        
                        switch (options.score_out) {
                            case 0: // SW
                                if (query_size > target_size) {
                                   fraction_assigned = score.total_assigned_score / target_size;
                                } else {
                                   fraction_assigned = score.total_assigned_score / query_size;
                                }
                                retval =  print_score(digest, &qry_descr, &tgt_descr, &score, fraction_assigned, 1);  
                                break;
                            case 1: // Hungarian
                                if (query_size > target_size) {
                                    fraction_assigned = score_hung.total_assigned_score / target_size;
                                } else {
                                    fraction_assigned = score_hung.total_assigned_score / query_size;
                                }
                                retval =  print_score(digest, &qry_descr, &tgt_descr, &score_hung, fraction_assigned, 1);
                                break;
                            case 2: // either SW or Hungarian depends on score
                                if (score.total_assigned_score > score_hung.total_assigned_score) {
                                    if (query_size > target_size) {
                                        fraction_assigned = score.total_assigned_score / target_size;
                                    } else {
                                        fraction_assigned = score.total_assigned_score / query_size;
                                    }
                                    retval =  print_score(digest, &qry_descr, &tgt_descr, &score, fraction_assigned, 1);
                                } else {
                                    if (query_size > target_size) {
                                        fraction_assigned = score_hung.total_assigned_score / target_size;
                                    } else {
                                        fraction_assigned = score_hung.total_assigned_score / query_size;
                                    }
                                    retval =  print_score(digest, &qry_descr, &tgt_descr, &score_hung, fraction_assigned, 1);
                                    
                                }
                                break;
                        }
                            
                            
                        
                        if (retval) {
                            printf("error in printing to output file\n");
                            exit(retval);
                        }

                    } else if (options.report_no_sse_overlap) {
                        retval =  print_score(digest, &qry_descr, &tgt_descr, &score, fraction_assigned, 0);
                        if (retval) {
                            printf("error in printing to output file\n");
                            exit(retval);
                        }
                    }
                }
                /*
                if (options.postprocess) tgt_done = 1; // for now, we postprocess only
                                                    one pair of structures (not structure against database) */
               // if (options.postprocess) break; // added by Mile tricky but I think it should work even without it
            }

        }

        // Added by Mile
        // Memory cleaning
        for(i = 0; i < tgt_counter; ++i) {
            descr_shutdown ( &tgt_descr_array[i] );
        }
        free(tgt_descr_array);
        free(retval_array);    
        
        
        // End added by Mile
        
        if (!user_defined_name && db_effective_ctr) {
            CPU_time_end = clock();
            fprintf(digest, "done   CPU:  %10.3lf s\n", (double) (CPU_time_end - CPU_time_begin) / CLOCKS_PER_SEC);
            fflush(digest);
        }

        if (!user_defined_name) {
            fclose(digest);
            digest = NULL;
        } /* otherwise we keep writing into the saem digest file */

        if (options.postprocess) qry_done = 1; /* for now, we postprocess only
						one pair of structures (not structure against database) */

    }

    if (digest) {
        CPU_time_end = clock();
        fprintf(digest, "done   CPU:  %10.3lf s\n", (double) (CPU_time_end - CPU_time_begin) / CLOCKS_PER_SEC);
        fflush(digest);
    }
    if (options.verbose) {
        printf("\n\nlooked at %d db entries.\n",
                db_effective_ctr);
        printf("the output written to %s.\n\n", options.outname);
    }
    /**************************************************/
    /* housekeeping, good for tracking memory leaks   */ if (digest) fclose(digest);
//    map_consistence(0, 0, NULL, NULL, NULL, NULL, NULL);
//    compare(NULL, NULL, NULL);
    descr_shutdown(&qry_descr);
    descr_shutdown(&tgt_descr);

    fclose(qry_fptr);
    fclose(tgt_fptr);

    return 0;

}
Ejemplo n.º 6
0
//
//  benchmarking program
//
int main(int argc, char **argv) {
  if( find_option( argc, argv, "-h" ) >= 0 )
  {
    printf( "Options:\n" );
    printf( "-h to see this help\n" );
    printf( "-n <int> to set number of particles\n" );
    printf( "-o <filename> to specify the output file name\n" );
    printf( "-s <filename> to specify a summary file name\n" );
    printf( "-no turns off all correctness checks and particle output\n");
    printf( "-p <int> to set the (maximum) number of threads used\n");
    return 0;
  }

  const int n = read_int( argc, argv, "-n", 1000 );
  const bool fast = (find_option( argc, argv, "-no" ) != -1);
  const char *savename = read_string( argc, argv, "-o", NULL );
  const char *sumname = read_string( argc, argv, "-s", NULL );
  const int num_threads_override = read_int( argc, argv, "-p", 0);


  FILE *fsave = ((!fast) && savename) ? fopen( savename, "w" ) : NULL;
  FILE *fsum = sumname ? fopen ( sumname, "a" ) : NULL;

  const double size = set_size( n );
  // We need to set the size of a grid square so that the average number of
  // particles per grid square is constant.  The simulation already ensures
  // that the average number of particles in an arbitrary region is constant
  // and proportional to the area.  So this is just a constant.
  const double grid_square_size = sqrt(0.0005) + 0.000001;
  const int num_grid_squares_per_side = size / grid_square_size;
  printf("Using %d grid squares of side-length %f for %d particles.\n", num_grid_squares_per_side*num_grid_squares_per_side, grid_square_size, n);
  std::unique_ptr<std::vector<particle_t> > particles = init_particles(n);

  if (num_threads_override > 0) {
    omp_set_dynamic(0);   // fixed number of threads
    omp_set_num_threads(num_threads_override);  // assign number of threads
  }

  //
  //  simulate a number of time steps
  //
  double simulation_time = read_timer( );

  int max_num_threads = omp_get_max_threads();
  int num_actual_threads;

  // User-defined reductions aren't available in the version of OMP we're
  // using.  Instead, we accumulate per-thread stats in this global array
  // and reduce manually when we're done.
  Stats per_thread_stats[max_num_threads];

  // Shared across threads.
  std::unique_ptr<OmpThreadsafeGrid> old_grid(new OmpThreadsafeGrid(size, num_grid_squares_per_side));
  std::unique_ptr<OmpThreadsafeGrid> next_grid(new OmpThreadsafeGrid(size, num_grid_squares_per_side));


  #pragma omp parallel
  {
    #pragma omp atomic write
    num_actual_threads = omp_get_num_threads();   //get number of actual threads

    int thread_idx = omp_get_thread_num();    
    Stats thread_stats;
    for (int step = 0; step < 1000; step++) {
      // If this is the first step, we must initialize the grid here
      // without respecting cache locality.  Since we cannot use the existing
      // grid, we have to just divide the particles arbitrarily.  This
      // means that the subsequent code for simulating forces and movement
      // will have almost no cache locality on the first iteration: Each thread
      // has picked up an arbitrary subset of the particles to insert into the
      // grid, and then the threads are responsible for simulating a different,
      // mostly-disjoint subset of the particles.  On subsequent iterations,
      // only the particles that have moved will cause cache misses, so we
      // should have much better locality.  If we want to really optimize,
      // it may be worth rethinking how we store particles and communicate among
      // threads.  But at that point we might as well write distributed-memory
      // code.
      if (step == 0) {
        #pragma omp for
        for (int i = 0; i < n; i++) {
          next_grid->add((*particles)[i]);
        }
      }

      // Here we are building the grid that maps locations to sets of
      // particles.  This step does O(n) work, so it is a bottleneck if done
      // serially.  For performance comparisons, we have two versions of the
      // grid-formation code.  The second simply forms the grid serially, in a
      // single arbitrary thread.  The first is parallel and attempts
      // some cache locality.  Each thread is responsible for re-inserting
      // the grid elements that previously lay in its subgrid.  For that reason
      // we need to keep around the old grid while we are building the new one;
      // this is why we have old_grid and next_grid.

      // NOTE: We could instead re-insert each particle right after moving it.
      // This would be faster, but it would require us to think about
      // simultaneous parallel delete and add, while the current scheme needs
      // only support parallel add.  (Deleting the entire grid at once is an
      // O(1) operation, so we can do it in one thread with a barrier.)
      // (The actual simulation operations are read-only on the grid structure
      // and write to each particle only once, so we can simply use two
      // barriers to protect them.
      #pragma omp single
      {
        old_grid.swap(next_grid);
        next_grid.reset(new OmpThreadsafeGrid(size, num_grid_squares_per_side));
      }

      // Now insert each particle into the new grid.
      {
        std::unique_ptr<SimpleIterator<particle_t&> > particles_to_insert = old_grid->subgrid(thread_idx, num_actual_threads);
        while (particles_to_insert->hasNext()) {
          particle_t& p = particles_to_insert->next();
          next_grid->add(p);
        }
      }

      // Now we compute forces for particles.  Each thread handles its assigned
      // subgrid.  We first need a barrier to ensure that everyone sees all
      // the particles in next_grid.
      #pragma omp barrier

      {
        std::unique_ptr<SimpleIterator<particle_t&> > particles_to_force = next_grid->subgrid(thread_idx, num_actual_threads);
        while (particles_to_force->hasNext()) {
          particle_t& p = particles_to_force->next();
          p.ax = p.ay = 0;
          std::unique_ptr<SimpleIterator<particle_t&> > neighbors = next_grid->neighbor_iterator(p);
          while (neighbors->hasNext()) {
            particle_t& neighbor = neighbors->next();
            apply_force(p, neighbor, thread_stats);
          }
        }
      }

      // The barrier here ensures that no particle is moved before it is used
      // in apply_force above.
      #pragma omp barrier

      // Now we move each particle.
      std::unique_ptr<SimpleIterator<particle_t&> > particles_to_move = next_grid->subgrid(thread_idx, num_actual_threads);
      while (particles_to_move->hasNext()) {
        particle_t& p = particles_to_move->next();
        move(p);
      }

      // This barrier is probably unnecessary unless save() is going to happen.
      #pragma omp barrier

      if (!fast) {
        //
        //  save if necessary
        //
        #pragma omp master
        if( fsave && (step%SAVEFREQ) == 0 ) {
          save( fsave, n, (*particles).data() );
        }
      }

      // This barrier is probably unnecessary unless save() happened.
      #pragma omp barrier
    }

    #pragma omp critical
    per_thread_stats[thread_idx] = thread_stats;
  }
  simulation_time = read_timer( ) - simulation_time;

  // Could do a tree reduce here, but it seems unnecessary.
  Stats overall_stats;
  for (int thread_idx = 0; thread_idx < max_num_threads; thread_idx++) {
    overall_stats.aggregate_left(per_thread_stats[thread_idx]);
  }

  printf( "n = %d,threads = %d, simulation time = %g seconds", n,num_actual_threads, simulation_time);

  if (!fast) {
    //
    //  -the minimum distance absmin between 2 particles during the run of the simulation
    //  -A Correct simulation will have particles stay at greater than 0.4 (of cutoff) with typical values between .7-.8
    //  -A simulation were particles don't interact correctly will be less than 0.4 (of cutoff) with typical values between .01-.05
    //
    //  -The average distance absavg is ~.95 when most particles are interacting correctly and ~.66 when no particles are interacting
    //
    printf( ", absmin = %lf, absavg = %lf", overall_stats.min, overall_stats.avg);
    if (overall_stats.min < 0.4) printf ("\nThe minimum distance is below 0.4 meaning that some particle is not interacting");
    if (overall_stats.avg < 0.8) printf ("\nThe average distance is below 0.8 meaning that most particles are not interacting");
  }
  printf("\n");

  //
  // Printing summary data
  //
  if( fsum)
    fprintf(fsum,"%d %d %g\n",n,num_actual_threads, simulation_time);

  //
  // Clearing space
  //
  if( fsum )
    fclose( fsum );

  if( fsave )
    fclose( fsave );

  return 0;
}
Ejemplo n.º 7
0
int main(int argc, char ** argv)
{
  int      my_ID;           /* Thread ID                                         */
  int      vector_length;   /* length of vector loop containing the branch       */
  int      nfunc;           /* number of functions used in INS_HEAVY option      */
  int      rank;            /* matrix rank used in INS_HEAVY option              */
  double   branch_time,     /* timing parameters                                 */
           no_branch_time;
  double   ops;             /* double precision representation of integer ops    */
  int      iterations;      /* number of times the branch loop is carried out    */
  int      i, iter, aux;    /* dummies                                           */
  char     *branch_type;    /* string defining branching type                    */
  int      btype;           /* integer encoding branching type                   */
  int      total=0, 
           total_ref;       /* computed and stored verification values           */
  int      nthread_input;   /* thread parameters                                 */
  int      nthread; 
  int      num_error=0;     /* flag that signals that requested and obtained
                               numbers of threads are the same                  */

/**********************************************************************************
** process and test input parameters    
**********************************************************************************/

  printf("Parallel Research Kernels version %s\n", PRKVERSION);
  printf("OpenMP Branching Bonanza\n");

  if (argc != 5){
    printf("Usage:     %s <# threads> <# iterations> <vector length>", *argv);
    printf("<branching type>\n");
    printf("branching type: vector_go, vector_stop, no_vector, ins_heavy\n");
    exit(EXIT_FAILURE);
  }

  nthread_input = atoi(*++argv);
  if ((nthread_input < 1) || (nthread_input > MAX_THREADS)) {
    printf("ERROR: Invalid number of threads: %d\n", nthread_input);
    exit(EXIT_FAILURE);
  }

  omp_set_num_threads(nthread_input);

  iterations = atoi(*++argv);
  if (iterations < 1 || iterations%2==1){
     printf("ERROR: Iterations must be positive and even : %d \n", iterations);
     exit(EXIT_FAILURE);
  }

  vector_length  = atoi(*++argv);
  if (vector_length < 1){
     printf("ERROR: loop length must be >= 1 : %d \n",vector_length);
     exit(EXIT_FAILURE);
  }

  branch_type = *++argv;
  if      (!strcmp(branch_type,"vector_stop")) btype = VECTOR_STOP;
  else if (!strcmp(branch_type,"vector_go"  )) btype = VECTOR_GO;
  else if (!strcmp(branch_type,"no_vector"  )) btype = NO_VECTOR;
  else if (!strcmp(branch_type,"ins_heavy"  )) btype = INS_HEAVY;
  else  {
    printf("Wrong branch type: %s; choose vector_stop, vector_go, ", branch_type);
    printf("no_vector, or ins_heavy\n");
    exit(EXIT_FAILURE);
  }

  #pragma omp parallel private(i, my_ID, iter, aux, nfunc, rank) reduction(+:total)
  {
  int * RESTRICT vector; int * RESTRICT index;
  int factor = -1;

  #pragma omp master
  {
  nthread = omp_get_num_threads();
  if (nthread != nthread_input) {
    num_error = 1;
    printf("ERROR: number of requested threads %d does not equal ",
           nthread_input);
    printf("number of spawned threads %d\n", nthread);
  } 
  else {
    printf("Number of threads          = %d\n", nthread_input);
    printf("Vector length              = %d\n", vector_length);
    printf("Number of iterations       = %d\n", iterations);
    printf("Branching type             = %s\n", branch_type);
#if RESTRICT_KEYWORD
    printf("No aliasing                = on\n");
#else
    printf("No aliasing                = off\n");
#endif
  }
  }
  bail_out(num_error);

  my_ID = omp_get_thread_num();

  vector = prk_malloc(vector_length*2*sizeof(int));
  if (!vector) {
    printf("ERROR: Thread %d failed to allocate space for vector\n", my_ID);
    num_error = 1;
  }

  bail_out(num_error);

  /* grab the second half of vector to store index array                         */
  index   = vector + vector_length;

  /* initialize the array with entries with varying signs; array "index" is only 
     used to obfuscate the compiler (i.e. it won't vectorize a loop containing
     indirect referencing). It functions as the identity operator.               */
  for (i=0; i<vector_length; i++) { 
    vector[i]  = 3 - (i&7);
    index[i]   = i;
  }

  #pragma omp barrier   
  #pragma omp master
  {   
  branch_time = wtime();
  }

  /* do actual branching */

  switch (btype) {

    case VECTOR_STOP:
      /* condition vector[index[i]]>0 inhibits vectorization                     */
      for (iter=0; iter<iterations; iter+=2) {
        #pragma vector always
        for (i=0; i<vector_length; i++) { 
          aux = -(3 - (i&7));
          if (vector[index[i]]>0) vector[i] -= 2*vector[i];
          else                    vector[i] -= 2*aux;
        }
        #pragma vector always
        for (i=0; i<vector_length; i++) { 
          aux = (3 - (i&7));
          if (vector[index[i]]>0) vector[i] -= 2*vector[i];
          else                    vector[i] -= 2*aux;
        }
      }
      break;

    case VECTOR_GO:
      /* condition aux>0 allows vectorization                                    */
      for (iter=0; iter<iterations; iter+=2) {
        #pragma vector always
        for (i=0; i<vector_length; i++) {
          aux = -(3 - (i&7));
          if (aux>0) vector[i] -= 2*vector[i];
          else       vector[i] -= 2*aux;
        }
        #pragma vector always
        for (i=0; i<vector_length; i++) {
          aux = (3 - (i&7));
          if (aux>0) vector[i] -= 2*vector[i];
          else       vector[i] -= 2*aux;
        }
      }
      break;

    case NO_VECTOR:
      /* condition aux>0 allows vectorization, but indirect indexing inbibits it */
      for (iter=0; iter<iterations; iter+=2) {
        #pragma vector always
        for (i=0; i<vector_length; i++) {
          aux = -(3 - (i&7));
          if (aux>0) vector[i] -= 2*vector[index[i]];
          else       vector[i] -= 2*aux;
        }
        #pragma vector always
        for (i=0; i<vector_length; i++) {
          aux = (3 - (i&7));
          if (aux>0) vector[i] -= 2*vector[index[i]];
          else       vector[i] -= 2*aux;
        }
      }
      break;

    case INS_HEAVY:
      fill_vec(vector, vector_length, iterations, WITH_BRANCHES, &nfunc, &rank);
    }
    #pragma omp master
    {
    branch_time = wtime() - branch_time;
    if (btype == INS_HEAVY) {
      printf("Number of matrix functions = %d\n", nfunc);
      printf("Matrix order               = %d\n", rank);
    }
    }

    /* do the whole thing once more, but now without branches                    */

    #pragma omp barrier
    #pragma omp master
    {   
    no_branch_time = wtime();
    }

    /* do actual branching */

    switch (btype) {

    case VECTOR_STOP:
    case VECTOR_GO:
      for (iter=0; iter<iterations; iter+=2) {
        #pragma vector always
        for (i=0; i<vector_length; i++) { 
          aux = -(3-(i&7)); 
          vector[i] -= (vector[i] + aux);
        }
        for (i=0; i<vector_length; i++) {
          aux = (3-(i&7)); 
          vector[i] -= (vector[i] + aux);
        }
      }
      break;

    case NO_VECTOR:
      for (iter=0; iter<iterations; iter+=2) {
        #pragma vector always
        for (i=0; i<vector_length; i++) {
          aux = -(3-(i&7));
          vector[i] -= (vector[index[i]]+aux); 
        }
        #pragma vector always
        for (i=0; i<vector_length; i++) {
          aux = (3-(i&7));
          vector[i] -= (vector[index[i]]+aux); 
        }
      }
      break;

    case INS_HEAVY:
      fill_vec(vector, vector_length, iterations, WITHOUT_BRANCHES, &nfunc, &rank);
    }

    #pragma omp master
    {
    no_branch_time = wtime() - no_branch_time;
    ops = (double)vector_length * (double)iterations * (double)nthread;
    if (btype == INS_HEAVY) ops *= rank*(rank*19 + 6);
    else                    ops *= 4;
    }

    for (total = 0, i=0; i<vector_length; i++) total += vector[i];
  } /* end of OPENMP parallel region                                             */

  /* compute verification values                                                 */
  total_ref = ((vector_length%8)*(vector_length%8-8) + vector_length)/2*nthread;

  if (total == total_ref) {
    printf("Solution validates\n");
    printf("Rate (Mops/s) with branches:    %lf time (s): %lf\n", 
           ops/(branch_time*1.e6), branch_time);
    printf("Rate (Mops/s) without branches: %lf time (s): %lf\n", 
           ops/(no_branch_time*1.e6), no_branch_time);
#if VERBOSE
    printf("Array sum = %d, reference value = %d\n", total, total_ref);
#endif     
  }
  else {
    printf("ERROR: array sum = %d, reference value = %d\n", total, total_ref);
  }

  exit(EXIT_SUCCESS);
}
Ejemplo n.º 8
0
int main(int argc, char* argv[]) 
{
	bool visualize = true;
	int threads = 8;
	int config = 0;
	real gravity = -9.81;			//acceleration due to gravity
	real timestep = .01;			//step size
	real time_to_run = 1;			//length of simulation
	real current_time = 0;

	int num_steps = time_to_run / timestep;
	int max_iteration = 15;
	int tolerance = 0;

	//=========================================================================================================
	// Create system
	//=========================================================================================================
	ChSystemParallel * system_gpu = new ChSystemParallel;

	//=========================================================================================================
	// Populate the system with bodies/constraints/forces/etc.
	//=========================================================================================================
	ChVector<> lpos(0, 0, 0);
	ChQuaternion<> quat(1, 0, 0, 0);
	real container_width = 5;		//width of area with particles
	real container_length = 25;		//length of area that roller will go over
	real container_thickness = .25;     	//thickness of container walls
	real container_height = 2;		//height of the outer walls
	real particle_radius = .58;

	// Create a material (will be used by both objects)
	ChSharedPtr<ChMaterialSurface> material;
	material = ChSharedPtr<ChMaterialSurface>(new ChMaterialSurface);
	material->SetFriction(0.4);

	// Create a ball
	ChSharedBodyPtr ball = ChSharedBodyPtr(new ChBody(new ChCollisionModelParallel));
	InitObject(ball, 
		1, 				// mass
		ChVector<>(0, 10, 0), 		// position
		ChQuaternion<>(1, 0, 0, 0), 	// rotation
		material, 			// material
		true, 				// collide?
		false, 				// static?
		-15, -15);			// collision family
	ball->SetPos_dt(ChVector<>(0,0,10));
	AddCollisionGeometry(ball, SPHERE, particle_radius, lpos, quat);
	FinalizeObject(ball, (ChSystemParallel *) system_gpu);

	// Create a bin for the ball to fall into
	ChSharedBodyPtr bin = ChSharedBodyPtr(new ChBody(new ChCollisionModelParallel));
	InitObject(bin, 
		1, 				// mass
		ChVector<>(0, 0, 0), 		// position
		ChQuaternion<>(1, 0, 0, 0), 	// rotation
		material, 			// material
		true, 				// collide?
		true, 				// static?
		-20, -20); 			// collision family
	AddCollisionGeometry(bin, BOX, ChVector<>(container_width, container_thickness, container_length), lpos, quat);
	AddCollisionGeometry(bin, BOX, Vector(container_thickness, container_height, container_length), Vector(-container_width + container_thickness, container_height, 0), quat);
	AddCollisionGeometry(bin, BOX, Vector(container_thickness, container_height, container_length), Vector(container_width - container_thickness, container_height, 0), quat);
	AddCollisionGeometry(bin, BOX, Vector(container_width, container_height, container_thickness), Vector(0, container_height, -container_length + container_thickness), quat);
	AddCollisionGeometry(bin, BOX, Vector(container_width, container_height, container_thickness), Vector(0, container_height, container_length - container_thickness), quat);
	FinalizeObject(bin, (ChSystemParallel *) system_gpu);

	//=========================================================================================================
	// Edit system settings
	//=========================================================================================================
	system_gpu->SetIntegrationType(ChSystem::INT_ANITESCU);
	system_gpu->SetParallelThreadNumber(threads);
	system_gpu->SetMaxiter(max_iteration);
	system_gpu->SetIterLCPmaxItersSpeed(max_iteration);
	system_gpu->SetTol(1e-3);
	system_gpu->SetTolSpeeds(1e-3);
	system_gpu->Set_G_acc(ChVector<>(0, gravity, 0));
	system_gpu->SetStep(timestep);

	((ChLcpSolverParallel *) (system_gpu->GetLcpSolverSpeed()))->SetMaxIteration(max_iteration);
	((ChLcpSolverParallel *) (system_gpu->GetLcpSolverSpeed()))->SetTolerance(0);
	((ChLcpSolverParallel *) (system_gpu->GetLcpSolverSpeed()))->SetCompliance(0, 0, 0);
	((ChLcpSolverParallel *) (system_gpu->GetLcpSolverSpeed()))->SetContactRecoverySpeed(300);
	((ChLcpSolverParallel *) (system_gpu->GetLcpSolverSpeed()))->SetSolverType(ACCELERATED_PROJECTED_GRADIENT_DESCENT);

	((ChCollisionSystemParallel *) (system_gpu->GetCollisionSystem()))->SetCollisionEnvelope(particle_radius * .05);
	((ChCollisionSystemParallel *) (system_gpu->GetCollisionSystem()))->setBinsPerAxis(R3(10, 10, 10));
	((ChCollisionSystemParallel *) (system_gpu->GetCollisionSystem()))->setBodyPerBin(100, 50);

	omp_set_num_threads(threads);

	//=========================================================================================================
	// Enter the time loop and render the simulation
	//=========================================================================================================
	if (visualize) {
		ChOpenGLManager * window_manager = new ChOpenGLManager();
		ChOpenGL openGLView(window_manager, system_gpu, 800, 600, 0, 0, "Test_Solvers");
		openGLView.render_camera->camera_pos = Vector(0, 5, -20);
		openGLView.render_camera->look_at = Vector(0, 0, 0);
		openGLView.SetCustomCallback(RunTimeStep);
		openGLView.StartSpinning(window_manager);
		window_manager->CallGlutMainLoop();
	}

	return 0;
}
Ejemplo n.º 9
0
int main(){
  omp_set_num_threads(35);
  //run_vanilla_nolemma(400,0.05);
  //run_vanilla(800,0.05);
  run_sampler(100000,0.03,100);
} 
Ejemplo n.º 10
0
int main(int argc, char** argv) {

	int tid;
	int i, j;
	float interface_u;
	float FR[2];
	float FL[2];
	float speed;
	int index;
	int N_thread = N/2; 	// share work
	printf("1D SW Eqn Solver\n");

	// Set the initial condition
	for (i = 0; i < N; i++) {
		if (i < 0.5*N) {
			P[0][i] = 1.0; // Water Depth
			P[1][i] = 0.0; // Water Speed
		} else {
			P[0][i] = 0.1;
			P[1][i] = 0.0;
		}
		// Compute U vector 
		U[0][i] = P[0][i];              // Depth = mass of fluid
		U[1][i] = P[0][i]*P[1][i];      // Momentum of fluid
	}

	omp_set_num_threads(2); // Create 2 threads for this
	#pragma omp parallel private(tid, i, j, FL, FR, speed,index) shared(U, P, U_new, N_thread)
	{

	tid = omp_get_thread_num();
	printf("Thread %d up and running\n", tid);
	
	for (j = 0; j < NO_STEPS; j++) {


		// Compute U_new in all cells (except the ends)
		for (index = 0; index < N_thread; index++) {
			i = tid*N_thread + index;
			if ((i > 0) && (i < (N-1))) {
				// Left Flux first - the flux across the surface between i-1 and i 

				// Rusanov Flux
				speed = sqrtf(0.5*G*(P[0][i-1]+P[0][i]));
				FL[0] = 0.5*(P[0][i-1]*P[1][i-1] + P[0][i]*P[1][i]) - speed*(U[0][i] - U[0][i-1]);
				FL[1] = 0.5*(  (P[0][i-1]*P[1][i-1]*P[1][i-1] + 0.5*G*P[0][i-1]*P[0][i-1]) + (P[0][i]*P[1][i]*P[1][i] + 0.5*G*P[0][i]*P[0][i]) ) - speed*(U[1][i] - U[1][i-1]);
			

				// Right Flux next - the flux across the surface between i and i+1
	
				// Rusanov Flux
				speed = sqrtf(0.5*G*(P[0][i+1]+P[0][i]));
				FR[0] = 0.5*(P[0][i]*P[1][i] + P[0][i+1]*P[1][i+1]) - speed*(U[0][i+1] - U[0][i]);
				FR[1] = 0.5*(  (P[0][i]*P[1][i]*P[1][i] + 0.5*G*P[0][i]*P[0][i]) + (P[0][i+1]*P[1][i+1]*P[1][i+1] + 0.5*G*P[0][i+1]*P[0][i+1]) ) - speed*(U[1][i+1] - U[1][i]);
				
	
				// Now, compute the new U value
				U_new[0][i] = U[0][i] - (DT/DX)*(FR[0]-FL[0]);
				U_new[1][i] = U[1][i] - (DT/DX)*(FR[1]-FL[1]);
			}
			
			// We cannot update P, yet. Next loop.
		}
		#pragma omp barrier
		
		// Update U and P now
		for (index = 0; index < N_thread; index++) {
			i = tid*N_thread + index;
			if ( (i > 0) && (i < (N-1)) ) {
				U[0][i] = U_new[0][i];
				U[1][i] = U_new[1][i];
		
				P[0][i] = U[0][i];	
				P[1][i] = U[1][i]/U[0][i];	
			}
		}
		#pragma omp barrier
		
		if (tid == 0) {
			// Correct ends using reflective conditions
			P[0][0] = P[0][1];  
			P[1][0] = -P[1][1]; 
			
			P[0][N-1] = P[0][N-2];  
			P[1][N-1] = -P[1][N-2]; 
		}
		#pragma omp barrier
		
	}
	} // end parallel section
	// Save the data
	Save_Results();	
	
	
	return 0;
}
Ejemplo n.º 11
0
void FishModel::SetNumThreads(size_t n) { 
  omp_set_num_threads(n);
}
int main (int argc, char *argv[])
{
	void inidat();
	float  ***array;        /* array for grid */
	int	taskid,                     /* this task's unique id */
		numtasks,                   /* number of tasks */
		averow,rows,offset,extra,   /* for sending rows of data */
		dest, source,               /* to - from for message send-receive */
		left,right,        /* neighbor tasks */
		msgtype,                    /* for message types */
		rc,start,end,               /* misc */
		i,x,y,z,it,size,t_sqrt;              /* loop variables */
	MPI_Status status;
   	MPI_Datatype dt,dt2; 
    MPI_Request req, req2,req3,req4,req5;
    double t1,t2;

/* First, find out my taskid and how many tasks are running */
   	MPI_Init(&argc,&argv);
   	MPI_Comm_size(MPI_COMM_WORLD,&numtasks);
   	MPI_Comm_rank(MPI_COMM_WORLD,&taskid);

   	/*Set number of threads */
	omp_set_num_threads(atoi(argv[1])); // Use n threads for all consecutive parallel regions
	omp_set_nested(1);

	if (taskid == 0)
   	{
   		//printf("Grid size: X= %d  Y= %d  Time steps= %d\n",NXPROB,NYPROB,STEPS);
   		t1 = MPI_Wtime();
   	}
   	i = 0;
   	while(i*i < (NXPROB*NYPROB)/numtasks)
   		i++;
   	size = i;
   	i = 0;
   	while(i*i<numtasks)
   		i++;
   	t_sqrt = i;
   	MPI_Type_contiguous(size+2,MPI_FLOAT, &dt); 
	MPI_Type_commit(&dt);
	MPI_Type_vector(size+2,1,size+2,MPI_FLOAT,&dt2);
	MPI_Type_commit(&dt2); 
	array = malloc(2*sizeof(float**));
	for (i = 0;i<2;i++){
		array[i] = malloc((2+size)*sizeof(float*));
		array[i][0] = malloc(((2+size)*(2+size))*sizeof(float));
		for (x = 1;x<2+size;x++){
			array[i][x] = &(array[i][0][x*(2+size)]);
		}
	}
	for (z=0; z<2; z++){
		for (x=0; x<2+size; x++){
			for (y=0; y<2+size; y++){
				array[z][x][y] = 0.0;
			}
		}
	}
	z = 0;
	inidat(NXPROB,NYPROB,array[z],size*(taskid/t_sqrt),size*(taskid%t_sqrt),size);
	for (i = 1; i <= STEPS; i++)
	{
		if (taskid/t_sqrt != 0) //not first row
		{
			MPI_Isend(array[z][1],1,dt,taskid-t_sqrt,100, MPI_COMM_WORLD, &req);
			MPI_Irecv(array[z][0],1,dt,taskid-t_sqrt,100, MPI_COMM_WORLD, &req2);
		}
		if (taskid/t_sqrt != t_sqrt-1) //not last row
		{
			MPI_Isend(array[z][size],1,dt,taskid+t_sqrt,100, MPI_COMM_WORLD, &req);
			MPI_Irecv(array[z][size+1],1,dt,taskid+t_sqrt,100, MPI_COMM_WORLD, &req3);
		}
		if(taskid%t_sqrt != 0) //not last column
		{
			MPI_Isend(&array[z][0][1],1,dt2,taskid-1,100, MPI_COMM_WORLD, &req);
			MPI_Irecv(&array[z][0][0],1,dt2,taskid-1,100, MPI_COMM_WORLD, &req4);
		}
		if(taskid%t_sqrt != t_sqrt-1) //not last column
		{
			MPI_Isend(&array[z][0][size],1,dt2,taskid+1,100, MPI_COMM_WORLD, &req);
			MPI_Irecv(&array[z][0][size+1],1,dt2,taskid+1,100, MPI_COMM_WORLD, &req5);
		}
		inner_update(size,array[z],array[1-z]);
		if (taskid/t_sqrt != 0) 
			MPI_Wait(&req2,&status);
		if (taskid/t_sqrt != t_sqrt-1) 
			MPI_Wait(&req3,&status);
		if(taskid%t_sqrt != 0) 
			MPI_Wait(&req4,&status);
		if(taskid%t_sqrt != t_sqrt-1) 
			MPI_Wait(&req5,&status);
		outer_update(size,taskid,t_sqrt,array[z],array[1-z]);
		z = 1-z;
	}
	if (taskid == 0){
		t2 = MPI_Wtime();
		printf("MPI_Wtime measured: %1.2f\n", t2-t1);
	} 
	for (i = 0;i<2;i++){
		free(array[i][0]);
		free(array[i]);
	}
	free(array);
	MPI_Type_free(&dt);
	MPI_Type_free(&dt2);
	MPI_Finalize();
}
int main(int argc, char *argv[])
{

  //////////////////////////***Definitions***////////////////////////////////////////////////////////////////////
  int nthreads=16,chunk=CHUNKSIZE;
  /*Input args/files */

  FILE 	*fp1, /*Spectroscopic Galaxy File */
    *fp2; /*Imaging Galaxy File */

  char	*Gxy_Spectro,
    *Gxy_Imaging;


  int	N_Bins; /*Number of log bins */
  double	Start_Bin, /*Location of the edge of the smallest bin */
    Max_Separation, /*Maximum rp Separation */
    log_Bin_Size, /*Rp Bin Size in log*/
  Minimum_Redshift=1000.0, /*Used to calculated maximum serapartion to filter pairs.*/
  Maximum_Redshift=0;
   int	Normalization_Choice; /*Which normalization should be used for the imaging catalogue 1= Di 2=Ri */
  /* Spectroscopic Galaxy/Randoms Information */

  int	Spectro_Size=1E5; /*This is the assumed length of the galaxy file */
  double	*RA_s, /* Given */
    *Dec_s, /* Given */
    *Redshift_s, /*Given */
    *Weight_s, /*The Fiber Collision or Completeness Weight of The Galaxy/Randoms */	
    *Distance_s;

  double	*X_s,*Y_s,*Z_s; /*The cartesian elements to calculate cos_Theta*/
  double area_tot=4*PI;
//  fprintf(stderr,"ASSUMMING SPHERE GEOMETRY FOR NORMALIZATION CHOICE!!!!!!!!!!!!!\n");
  /* Imaging Galaxy/Randoms Information */

  int 	Imaging_Size=4E5; /*This is the assumed length of the imaging file */
  double  *RA_i, /* Given */
    *Dec_i; /* Given */



  double	*X_i,*Y_i,*Z_i;
  /* Wp calculation information */

  double	*DD,	/*This is not an int because the counts will be weights. It is the shape Nbins X NJackknife */
    Maximum_Dec_Separation, /*Filter by this dec difference */
    Distance_to_Near_Z=1646., /*distance to inner redshift bin */
    Distance_to_Far_Z=0., /*distance to inner redshift bin */
    cos_Theta,
    rp;




  int	bin;
  /*Random Counters and Such */
  int	i=0,j=0,k=0;
  int	Ngal_s=0; /*Number of Galaxies/Randoms in the Spectro Sample */
  int 	Ngal_i=0; /*Number of Galaxies/Randoms in the Imagin Sample */
  /* void gridlink1D(int np,double rmin,double rmax,double rcell,double *z,int *ngrid,int **gridinit,int **gridlist); */
  void gridlink1D_with_struct(int np,double dmin,double dmax,double rcell,double *x1,double *y1,double *z1,double *dec,int *ngrid,cellarray **lattice);

  struct timeval t0,t1;

  int nitems,nread;
  char buffer[MAXBUFSIZE];


  /*Read in Args */
  Gxy_Spectro=argv[1];
  Gxy_Imaging=argv[2];
  sscanf(argv[3],"%lf",&Start_Bin);
  sscanf(argv[4],"%lf",&Max_Separation);
  sscanf(argv[5],"%d",&N_Bins);
  sscanf(argv[6],"%d",&Normalization_Choice);
  if(argc > 6)
	sscanf(argv[7],"%lf",&area_tot) ;



  log_Bin_Size=(log10(Max_Separation)-log10(Start_Bin))/(N_Bins);
  //log_Bin_Size=(log10(Max_Separation)-log10(Start_Bin))/(N_Bins-1.);
  fprintf(stderr,"BOSS Wp > Log Bin size = %lf \n",log_Bin_Size);
  //////////////////////////////*Allocate the Arrays that are going to be used *//////////////////////////////////////////////


/* #ifdef USE_BINLOOKUP */
/*   int *binlookup=NULL; */
/*   const int NBINLOOKUP=5e4; */
/*   binlookup = my_calloc(sizeof(*binlookup),NBINLOOKUP+2); */
/* #ifdef AVOID_SQRT */
/*   setup_squared_bin_lookup(sdss_data_file,&rmin,&rmax,&nbin,NBINLOOKUP,&rupp,binlookup); */
/*   binfac=NBINLOOKUP/(rmax*rmax); */
/* #else */
/*   setup_bin_lookup(sdss_data_file,&rmin,&rmax,&nbin,NBINLOOKUP,&rupp,binlookup); */
/*   binfac=NBINLOOKUP/rmax; */
/* #endif */
/* #endif */



  /*Spectro Arrays*/
  //Variables in the file
  RA_s       = my_calloc(sizeof(*RA_s),Spectro_Size);
  Dec_s      = my_calloc(sizeof(*Dec_s),Spectro_Size);
  Redshift_s = my_calloc(sizeof(*Redshift_s),Spectro_Size);
  Weight_s   = my_calloc(sizeof(*Weight_s),Spectro_Size);





	

  /////////////////////////////* [ READ IN THE GALAXY FILES AND CONVERT REDSHIFTS TO MPC ] *////////////////////////////////////
  	
  /*Read in Spectro Sample*/
  gettimeofday(&t0,NULL);
  fp1 = my_fopen(Gxy_Spectro,"r") ;
  i=0;
  int flag=0,trash_d;
  nitems=5;
  /* while(fscanf(fp1,"%lf %lf %lf %lf %d",&RA_s[i],&Dec_s[i],&Redshift_s[i],&Weight_s[i],&Sector_s[i])!=EOF) { */
  while(fgets(buffer,MAXBUFSIZE,fp1)!=NULL) {
    nread=sscanf(buffer,"%lf %lf %lf %lf %d",&RA_s[i],&Dec_s[i],&Redshift_s[i],&Weight_s[i],&trash_d);
    if (nread == nitems) {
      if(Redshift_s[i] > 10.0) {
	Redshift_s[i]/=SPEED_OF_LIGHT;
	flag=1;
      }
      
      if(Redshift_s[i] < 0) {
	fprintf(stderr,"BOSS Wp > Warning! Redshift = %lf, NR = %d. Setting to nearly 0.\n",Redshift_s[i],i);
	Redshift_s[i]=0.00001;
      }
      i++;

      if(i==Spectro_Size) {
	fprintf(stderr,"Increasing memory allocation for the spectroscopic sample\n");
	Spectro_Size *= MEMORY_INCREASE_FAC;
	RA_s       = my_realloc(RA_s,sizeof(*RA_s),Spectro_Size,"RA_s");
	Dec_s      = my_realloc(Dec_s,sizeof(*Dec_s),Spectro_Size,"Dec_s");
	Redshift_s = my_realloc(Redshift_s,sizeof(*Redshift_s),Spectro_Size,"Redshift_s");
	Weight_s   = my_realloc(Weight_s,sizeof(*Weight_s),Spectro_Size,"Weight_s");

      }
    } else {
      fprintf(stderr,"WARNING: In spectroscopic sample line %d did not contain %d elements...skipping line\n",i,nitems);
    }
  }
  Ngal_s=i;
  fclose(fp1);
  gettimeofday(&t1,NULL);
  
  if(flag!=0)
    fprintf(stderr,"BOSS Wp > Warning! You gave me cz instead of redshift!\n"); 
	
  //Derived variables
  Distance_s = my_calloc(sizeof(*Distance_s),Ngal_s);
  X_s        = my_calloc(sizeof(*X_s),Ngal_s);
  Y_s        = my_calloc(sizeof(*Y_s),Ngal_s);
  Z_s        = my_calloc(sizeof(*Z_s),Ngal_s);



  if(Ngal_s >= Spectro_Size) {
    fprintf(stderr,"BOSS Wp > Something Terrible Has Happened: SPECTROSCOPIC FILE TOO LONG!!!");
    return EXIT_FAILURE;
    
  }
  fprintf(stderr,"BOSS Wp > There are %d Galaxies in the Spectro Sample. Time taken = %6.2lf sec\n",Ngal_s,ADD_DIFF_TIME(t0,t1));	


  /*Convert Redshift to Comoving Distance in MPC */
  /* Here I am using Simpsons' Numerical Integration Rule To 
   * convert the redshift of the galaxy into Megaparsecs.
   * The details of the integrals I am using is obviously
   * in Hogg's Distance Measures in Cosmology and you can
   * wikipedia Simpsons' Rule.  I am assuming WMAP7 Cosmology
   * throughout.  You can adjust all those parameters in the header.
   * I'm including an extra parameter (the equation of state of dark energy)
   * because I felt like it.
   */


  double mean_distance=0;	
  /*GSL Numerical Integration Crap */
  gsl_integration_workspace * w 
    = gsl_integration_workspace_alloc (1000);
  double result, error,redshift_gsl;
  gsl_function F;
  F.function = &f;
  F.params = &redshift_gsl;

  for(i=0;i<Ngal_s;i++) {
    gsl_integration_qags (&F, 0, Redshift_s[i], 0, 1e-7, 1000,
			  w, &result, &error);
    Distance_s[i]=result; 
   if(Redshift_s[i] < Minimum_Redshift) {
      Distance_to_Near_Z=Distance_s[i];		
      Minimum_Redshift=Redshift_s[i];
    }
    if(Redshift_s[i] > Maximum_Redshift){
	Distance_to_Far_Z=Distance_s[i];
	Maximum_Redshift=Redshift_s[i];
	}
    mean_distance+=Distance_s[i];
  }
  gsl_integration_workspace_free(w);
  
  fprintf(stderr,"BOSS Wp > Mean Distance = %lf\n",mean_distance/Ngal_s);	
  fprintf(stderr,"BOSS Wp > The Distance to the closest redshift is %lf\n",Distance_to_Near_Z);
  fprintf(stderr,"BOSS Wp > The Distance to the furthest redshift %lf is %lf\n",Maximum_Redshift,Distance_to_Far_Z);
  double dist_range=(Distance_to_Far_Z - Distance_to_Near_Z);
  double Volume1=4./3.*PI*pow(Distance_to_Far_Z,3);
  double Volume2=4./3.*PI*pow(Distance_to_Near_Z,3);
  
  double percentage_area=area_tot/(4.*PI);
  double Volume=(Volume1-Volume2)*percentage_area;
  fprintf(stderr,"BOSS Wp > Spherical Volume =%lf\n",Volume);
  fprintf(stderr,"BOSS Wp > Number Density of Spectro Gal =%17.16f\n",Ngal_s/Volume);
  //	fprintf(stderr,"The Maximum Separation you decided is %lf\n",Max_Separation);	

  Maximum_Dec_Separation=asin(Max_Separation/(2*Distance_to_Near_Z))*2.*RAD_TO_DEG*1.00002; //The maximum separation that can happen and let's multiply it by 20% more
  fprintf(stderr,"BOSS Wp > Maximum Dec Separation is %lf\n",Maximum_Dec_Separation);
  
  
  /*Read in Imaging File*/
  /*Imaging Arrays */
  RA_i     = my_calloc(sizeof(*RA_i),Imaging_Size);
  Dec_i    = my_calloc(sizeof(*Dec_i),Imaging_Size);

  

  nitems=3;
  gettimeofday(&t0,NULL);
  fp2=my_fopen(Gxy_Imaging,"r") ;
  i=0;
  while(fgets(buffer,MAXBUFSIZE,fp2)!=NULL) {
    nread = sscanf(buffer,"%lf %lf %d",&RA_i[i],&Dec_i[i],&trash_d);
    if(nread == nitems) {
      i++;
      if(i==Imaging_Size) {
	fprintf(stderr,"Increasing memory allocation for the imaging sample\n");
	Imaging_Size *= MEMORY_INCREASE_FAC;
	RA_i     = my_realloc(RA_i,sizeof(*RA_i),Imaging_Size,"RA_i");
	Dec_i    = my_realloc(Dec_i,sizeof(*Dec_i),Imaging_Size,"Dec_i");

      }
    } else {
      fprintf(stderr,"WARNING: line %d did not contain %d elements - skipping\n",i,nitems);
    }
  }
  fclose(fp2);
  gettimeofday(&t1,NULL);
  Ngal_i=i;
  if(Ngal_i >= Imaging_Size) {
    fprintf(stderr,"BOSS Wp > Something Terrible Has Happened: IMAGING FILE TOO LONG!!!\n");
    return EXIT_FAILURE;
  }

  X_i   = my_calloc(sizeof(*X_i),Ngal_i);
  Y_i   = my_calloc(sizeof(*Y_i),Ngal_i);
  Z_i   = my_calloc(sizeof(*Z_i),Ngal_i);


  fprintf(stderr,"BOSS Wp > There are %d Galaxies in the Imaging Sample. Time taken = %6.2lf sec\n",Ngal_i,ADD_DIFF_TIME(t0,t1));


  for(i=0;i<Ngal_s;i++) {
    X_s[i]=sin((90-Dec_s[i]) * DEG_TO_RAD)*cos(RA_s[i] * DEG_TO_RAD) ;
    Y_s[i]=sin((90-Dec_s[i]) * DEG_TO_RAD)*sin(RA_s[i] * DEG_TO_RAD) ;
    Z_s[i]=cos((90-Dec_s[i]) * DEG_TO_RAD) ;
  }



  for(i=0;i<Ngal_i;i++){
    X_i[i]=sin((90-Dec_i[i]) * DEG_TO_RAD)*cos(RA_i[i] * DEG_TO_RAD) ;
    Y_i[i]=sin((90-Dec_i[i]) * DEG_TO_RAD)*sin(RA_i[i] * DEG_TO_RAD) ;
    Z_i[i]=cos((90-Dec_i[i]) * DEG_TO_RAD) ;
  }

    /*
   *This is where the jackknife call is going to go.
   *It's going to take the map file,the number of jackknife samples and the observed sectors in the same order as the observed galaxies.
   *It will return the vector of jackknife ID's in the same order the sector list was given to it.
   *The jackknife ID corresponds to the *one* jackknife sample that galaxy doesn't belong in.

   */
  
  double number_density_of_imaging=Ngal_i/area_tot;
  double distance_squared=0.0,Normalization=0.0;

  if(Normalization_Choice==1) {

   for(i=0;i<Ngal_s;i++) {
      Normalization+=Weight_s[i];
    } 

 } else {  

    for(i=0;i<Ngal_s;i++){
      distance_squared+=1./SQR(Distance_s[i]);
      Normalization+=number_density_of_imaging*Weight_s[i]*1./SQR(Distance_s[i]);
    }

//	Normalization=number_density_of_imaging*1.204988;	 
	fprintf(stderr,"Distance Squared = %lf,Normalization =%lf\n",distance_squared,Normalization); 
  }
 


  

  //gridlink the spectroscopic sample
  /*---Gridlink-variables----------------*/
  int ngrid;/* *gridinit1D,*gridlist1D ; */
  double dmin=-90,dmax=90.0;//min/max dec
  double inv_dmax_diff = 1.0/(dmax-dmin);
  cellarray *lattice;
  
  ngrid=0 ;
  /* gridlink1D(Ngal_i,dmin,dmax,Max_Separation,Dec_i,&ngrid,&gridinit1D,&gridlist1D) ; */
  gridlink1D_with_struct(Ngal_i,dmin,dmax,Maximum_Dec_Separation,X_i,Y_i,Z_i,Dec_i,&ngrid,&lattice);
  fprintf(stderr,"gridlink1D done. ngrid= %d\n",ngrid) ;

  ////////////////////////////////////****Calculation of Wp****/////////////////////////////////////////////////////////////////////////
//  double rp_sqr=0.0;
  double max_sep_sqr = Max_Separation*Max_Separation;
  double start_bin_sqr = Start_Bin*Start_Bin;
  double inv_start_bin_sqr = 1.0/start_bin_sqr;
  double inv_log_bin_size = 1.0/log_Bin_Size;
  /* int icen,icell; */
  /* double *x1,*y1,*z1,*dec; */
  /* int *imaging; */
  cellarray *cellstruct __attribute__((aligned(ALIGNMENT)));

  int xx=0;
  for(i=0;i<ngrid;i++)
    xx+= lattice[i].nelements;

  if(xx!=Ngal_i) {
    fprintf(stderr,"ERROR: xx=%d is not equal to Ngal_i=%d\n",xx,Ngal_i);
    exit(EXIT_FAILURE);
  }
    
  /*Wp Measurement Arrays */


  DD    = my_calloc(sizeof(*DD),N_Bins);

  double DD_threads[N_Bins][nthreads];
  for(i=0;i<N_Bins;i++) {
    for(j=0;j<nthreads;j++) {
      DD_threads[i][j]=0.0;
    }
  }

  /* int ispectro=0,ii=0,p; */
  gettimeofday(&t0,NULL);
  omp_set_num_threads(nthreads);
  int counter=0;
  int interrupted=0; 
  init_my_progressbar(Ngal_s,&interrupted);
/* #pragma omp parallel shared(Dec_s,Weight_s,X_s,Y_s,Z_s,chunk) private(cos_Theta,ispectro,icen,icell,rp_sqr,bin,x1,y1,z1,imaging,cellstruct) */
#pragma omp parallel default(none) shared(interrupted,stderr,counter,Ngal_s,Dec_s,Weight_s,X_s,Y_s,Z_s,chunk,ngrid,dmin,inv_dmax_diff,Maximum_Dec_Separation,Distance_s,inv_start_bin_sqr,max_sep_sqr,inv_log_bin_size,start_bin_sqr,DD_threads,lattice) 
  {
    int tid = omp_get_thread_num();
#pragma omp for schedule(dynamic,chunk)
    for(int ispectro=0;ispectro<Ngal_s;ispectro++) {
      #pragma omp atomic
 	counter++;
      	if(tid==0){
			my_progressbar(counter,&interrupted);
      		}
      int icen = (int)(ngrid*(Dec_s[ispectro]-dmin)*inv_dmax_diff);
      if(icen<0) icen++;
      if(icen>=ngrid) icen = icen--;
      assert(icen >=0 && icen < ngrid && "icen needs to be in [0, ngrid)");
      for(int ii=-BIN_REFINE_FACTOR;ii<=BIN_REFINE_FACTOR;ii++) {
	int icell = icen + ii ;
	/* for(icell=0;icell<ngrid;icell++) { */ // This makes no difference in the output - so the logic is correct
	if(icell>=0 && icell<ngrid)  {
	  /*---Loop-over-particles-in-each-cell-----------------*/
	  cellarray *cellstruct=&(lattice[icell]);
	  double *x1 = cellstruct->x;
	  double *y1 = cellstruct->y;
	  double *z1 = cellstruct->z;
	  double *dec = cellstruct->dec;
	  int *imaging = cellstruct->index;
	  for(int p=0;p<cellstruct->nelements;p++) {
	    if(fabs(Dec_s[ispectro]-dec[p]) <= Maximum_Dec_Separation) {
	      double cos_Theta=X_s[ispectro] * x1[p] + Y_s[ispectro] * y1[p] + Z_s[ispectro] * z1[p];
	      /* rp_sqr=4.0*Distance_s[ispectro]*Distance_s[ispectro]*(1.0 - cos_Theta)*0.5; /\* sin(arccos x) = sqrt(1-x^2) *\/ */
	      double rp_sqr=2.0*Distance_s[ispectro]*Distance_s[ispectro]*(1.0 - cos_Theta); /* sin(arccos x) = sqrt(1-x^2) */
	      if(rp_sqr < max_sep_sqr && rp_sqr >= start_bin_sqr) {
		int bin=(int)floor((0.5*log10(rp_sqr*inv_start_bin_sqr))*inv_log_bin_size);
		//	      bin=(int)floor((0.5*log10(rp_sqr*inv_start_bin_sqr))*inv_log_bin_size)-1;
		/* bin=(int)floor((log10(sqrt(rp_sqr)/Start_Bin))/log_Bin_Size); */
		DD_threads[bin][tid]+=Weight_s[ispectro]; //Put the Count in the Keeping Track Bin//
	      }
	    }
	  }
	}
      }
    }
  }
 
  finish_myprogressbar(&interrupted);
 
  for(i=0;i<N_Bins;i++) {
    for(j=0;j<nthreads;j++){
      DD[i]+=DD_threads[i][j];		
    }	
  }
 
  gettimeofday(&t1,NULL);
  fprintf(stderr,"Double loop time in main -> %6.2lf sec \n",ADD_DIFF_TIME(t0,t1));
  
  /* #ifndef USE_AVX */
	/* for(p=0;p<cellstruct->nelements;p++) { */
	/*   if(fabs(Dec_s[ispectro]-dec[p]) <= Maximum_Dec_Separation) { */
	/*     cos_Theta=X_s[ispectro] * x1[p] + Y_s[ispectro] * y1[p] + Z_s[ispectro] * z1[p]; */
	/*     rp_sqr=4.0*Distance_s[ispectro]*Distance_s[ispectro]*(1.0 - cos_Theta)*0.5; /\* sin(arccos x) = sqrt(1-x^2) *\/ */
	/*     if(rp_sqr < max_sep_sqr && rp_sqr >= start_bin_sqr) { */
	/*       bin=(int)floor((0.5*log10(rp_sqr*inv_start_bin_sqr))*inv_log_bin_size); */
	/*       /\* bin=(int)floor((log10(sqrt(rp_sqr)/Start_Bin))/log_Bin_Size); *\/ */
	/*       DD[bin][0]+=Weight_s[ispectro]; //Put the Count in the Keeping Track Bin// */
	/*       DD[bin][Jackknife_s[ispectro]+1]+=Weight_s[ispectro]; */
	/*       if(Jackknife_i[imaging[p]]!=Jackknife_s[ispectro]){ */
	/* 	DD[bin][Jackknife_i[imaging[p]]+1]+=Weight_s[ispectro]; */
	/*       } */
	/*     } */
	/*   } */
	/* } */
/* #else */
/* 	double dec_separation[NVECD]; */
/* 	double rp_sqr_array[NVECD],cos_theta_array[NVECD]; */
/* 	for(p=0;(p+NVECD)<cellstruct->nelements;p+=NVECD) { */
/* 	  #pragma vector always */
/* 	  for(int j=0;j<NVECD;j++) { */
/* 	    dec_separation[j]  = fabs(Dec_s[ispectro]-dec[p]); */
/* 	    cos_theta_array[j] = X_s[ispectro] * x1[p+j] + Y_s[ispectro] * y1[p+j] + Z_s[ispectro] * z1[p+j]; */
/* 	    rp_sqr_array[j]    = 4.0*Distance_s[ispectro]*Distance_s[ispectro]*(1.0 - cos_theta_array[j])*0.5; /\* sin(arccos x) = sqrt(1-x^2) *\/ */
/* 	  } */

/* 	  #pragma novector  */
/* 	  for(int j=0;j<NVECD;j++) { */
/* 	    rp_sqr = rp_sqr_array[j]; */
/* 	    if(dec_separation[j] <= Maximum_Dec_Separation) { */
/* 	      if(rp_sqr < max_sep_sqr && rp_sqr >= start_bin_sqr) { */
/* 		bin=(int)floor((0.5*log10(rp_sqr*inv_start_bin_sqr))*inv_log_bin_size); */
/* 		DD[bin][0]+=Weight_s[ispectro]; //Put the Count in the Keeping Track Bin// */
/* 		DD[bin][Jackknife_s[ispectro]+1]+=Weight_s[ispectro]; */
/* 		if(Jackknife_i[imaging[p+j]]!=Jackknife_s[ispectro]){ */
/* 		  DD[bin][Jackknife_i[imaging[p+j]]+1]+=Weight_s[ispectro]; */
/* 		} */
/* 	      } */
/* 	    } */
/* 	  } */
/* 	} */

/* 	//Now serially process the rest */
/* 	p = p > cellstruct->nelements ? p-NVECD:p; */
/* 	for(;p<cellstruct->nelements;p++){ 
/* 	  if(fabs(Dec_s[ispectro]-dec[p]) <= Maximum_Dec_Separation) { */
/* 	    cos_Theta=X_s[ispectro] * x1[p] + Y_s[ispectro] * y1[p] + Z_s[ispectro] * z1[p]; */
/* 	    rp_sqr=4.0*Distance_s[ispectro]*Distance_s[ispectro]*(1.0 - cos_Theta)*0.5; /\* sin(arccos x) = sqrt(1-x^2) *\/ */
/* 	    if(rp_sqr < max_sep_sqr && rp_sqr >= start_bin_sqr) { */
/* 	      bin=(int)floor((0.5*log10(rp_sqr*inv_start_bin_sqr))*inv_log_bin_size); */
/* 	      DD[bin][0]+=Weight_s[ispectro]; //Put the Count in the Keeping Track Bin// */
/* 	      DD[bin][Jackknife_s[ispectro]+1]+=Weight_s[ispectro]; */
/* 	      if(Jackknife_i[imaging[p]]!=Jackknife_s[ispectro]){ */
/* 		DD[bin][Jackknife_i[imaging[p]]+1]+=Weight_s[ispectro]; */
/* 	      } */
/* 	    } */
/* 	  } */
/* 	} */
/* #endif */

  /* for(int ispectro=0;ispectro<Ngal_s;ispectro++){ */
    /* for(int imaging=0;imaging<Ngal_i;imaging++){ */
    /*   if(fabs(Dec_s[ispectro]-Dec_i[imaging]) <= Maximum_Dec_Separation){ */
    /* 	cos_Theta=X_s[ispectro] * X_i[imaging] + Y_s[ispectro] * Y_i[imaging] + Z_s[ispectro] * Z_i[imaging]; */
    /* 	//rp=2.0*Distance_s[ispectro]*SQRT((1.0 - cos_Theta)/2.); /\* sin(arccos x) = sqrt(1-x^2) *\/ */
    /* 	rp_sqr=4.0*Distance_s[ispectro]*Distance_s[ispectro]*(1.0 - cos_Theta)*0.5; /\* sin(arccos x) = sqrt(1-x^2) *\/ */
    /* 	//fprintf(stderr,"distance = %lf,cos_Theta=%lf,rp = %lf\n",Distance_s[ispectro],cos_Theta,rp); */
    /* 	/\* if(rp < Max_Separation && rp>=Start_Bin){ *\/ */
    /* 	if(rp_sqr < max_sep_sqr && rp_sqr >= start_bin_sqr) { */
    /* 	  /\* bin=(int)floor((log10(rp/Start_Bin))/log_Bin_Size); *\/ */
    /* 	  bin=(int)floor((0.5*log10(rp_sqr*inv_start_bin_sqr))*inv_log_bin_size); */
    /* 	  DD[bin][0]+=Weight_s[ispectro]; //Put the Count in the Keeping Track Bin// */
    /* 	  DD[bin][Jackknife_s[ispectro]+1]+=Weight_s[ispectro]; */
    /* 	  if(Jackknife_i[imaging]!=Jackknife_s[ispectro]){ */
    /* 	    //						fprintf(fp3,"%d %lf %d %d %d %d \n",bin, rp,Jackknife_s[ispectro],Sector_s[ispectro],Jackknife_i[imaging],Sector_i[imaging]); */
    /* 	    DD[bin][Jackknife_i[imaging]+1]+=Weight_s[ispectro]; */
    /* 	  } */
    /* 	} */
    /*   } */
    /* } */
  /* } */



  for(i=0;i<N_Bins;i++) {
    //		fprintf(stderr,"%lf %e %e %e ",pow(10,(log_Bin_Size*(i)+log10(Start_Bin))),DD[i][0]/(Normalization),Mean[i],Error[i]);
    fprintf(stdout,"%lf %e %lf\n",pow(10,(log_Bin_Size*(i)+log10(Start_Bin))),DD[i]/(Normalization),DD[i]);


  }

	




  /* Free ALL the arrays */	
  free(RA_i);
  free(Dec_i);
  free(X_s);
  free(Y_s);
  free(Z_s);
  free(X_i);
  free(Y_i);
  free(Z_i);
  free(RA_s);
  free(Dec_s);
  free(Redshift_s);
  free(Distance_s); 

  free(Weight_s);
  free(DD);	




  for(i=0;i<ngrid;i++) {
    free(lattice[i].x);
    free(lattice[i].y);
    free(lattice[i].z);
    free(lattice[i].dec);
    free(lattice[i].index);
  }
  free(lattice);

  return 0;
}
Ejemplo n.º 14
0
int _start(int argc, char *argv[], boost::shared_ptr<Logger> qLogger, const std::string& processpath)
{
   bool bError = false;

   po::options_description desc("Program-Options");
   desc.add_options()
       ("name", po::value<std::string>(), "layer name (string)")
       ("lod", po::value<int>(), "desired level of detail (integer)")
       ("extent", po::value< std::vector<int64> >()->multitoken(), "tile boundary (tx0 ty0 tx1 ty1) for elevation/image data")
       ("boundary", po::value<std::vector<double> >()->multitoken(), "WGS84 boundary for point data or mapnik rendering")
       ("force", "[optional] force creation. (Warning: if this layer already exists it will be deleted)")
       ("numthreads", po::value<int>(), "[optional] force number of threads")
       ("type",  po::value<std::string>(), "[optional] layer type. This can be image, elevation, poi, point, geometry. image is default value.")
       ;

   po::variables_map vm;

   try
   {
      po::store(po::parse_command_line(argc, argv, desc), vm);
      po::notify(vm);
   }
   catch (const std::exception &ex)
   {
      bError = true;
      std::cout << "Error when parsing command line options:\n" << ex.what() << "\n\n";
      std::cout << desc << "\n";
      return 4;
   }

   std::string sLayerName;
   int nLod = 0;
   std::vector<int64> vecExtent;
   std::vector<double> vecBoundary;
   bool bForce = false;
   ELayerType eLayer = IMAGE_LAYER;

   
   if (!vm.count("name"))
   {
      qLogger->Error("layer name is not specified!");
      bError = true;
   }
   else
   {
      sLayerName = vm["name"].as<std::string>();
      if (sLayerName.length() == 0)
      {
         qLogger->Error("layer name is empty!");
         bError = true;
      }
   }

   if (!vm.count("lod"))
   {
	   if(vm["type"].as< std::string >() != "mapnik")
	   {
		qLogger->Error("lod not specified!");
		bError = true;
	   }
   }
   else
   {
      nLod = vm["lod"].as<int>();
   }

   if (vm.count("force"))
   {
      bForce = true;
   }

   if (vm.count("extent"))
   {
      vecExtent = vm["extent"].as< std::vector<int64> >();
   }

   if (vm.count("boundary"))
   {
      vecBoundary = vm["boundary"].as< std::vector<double> >();
   }

   if (vm.count("numthreads"))
   {
      int n = vm["numthreads"].as<int>();
      if (n>0 && n<65)
      {
         std::ostringstream oss; 
         oss << "Forcing number of threads to " << n;
         qLogger->Info(oss.str());
         omp_set_num_threads(n);
      }
   }

   if (vm.count("type"))
   {
      std::string sLayerType = vm["type"].as< std::string >();
      if (sLayerType == "image")
      {
         eLayer = IMAGE_LAYER;
      }
      else if (sLayerType == "imagepostprocessing")
      {
         eLayer = IMAGE_POSTPROCESSING_LAYER;
      }
	  else if (sLayerType == "mapnik")
      {
         eLayer = MAPNIK_LAYER;
      }
      else if (sLayerType == "elevation")
      {
         eLayer = ELEVATION_LAYER;
      }
      else if (sLayerType == "poi")
      {
         eLayer = POI_LAYER;
      }
      else if (sLayerType == "point")
      {
         eLayer = POINT_LAYER;
      }
      else if (sLayerType == "geometry")
      {
         eLayer = GEOMETRY_LAYER;
      }
      else
      {
         bError = true;
      }
   }
   else
   {
       qLogger->Warn("It is highly recommended to use --type! Using default --type image");
   }

   if (eLayer == POINT_LAYER)
   {
      if (vecBoundary.size() != 6 )
      {
         qLogger->Error("boundary must be specified with 6 values (WGS84): lng0 lat0 elv0 lng1 lat1 elv1");
         bError = true;
      }
   }
   else
   {
      if (vecExtent.size() != 4 )
      {
         qLogger->Error("extent must be defined with 4 values (Tile Coords): x0 y0 x1 y1");
         bError = true;
      }
   }

   if (bError)
   {
      qLogger->Error("Wrong parameters!");
      std::ostringstream sstr;
      sstr << desc;
      qLogger->Info("\n" + sstr.str());

      return ERROR_PARAMS;
   }

   std::string sLayerPath = FilenameUtils::DelimitPath(processpath) + sLayerName;
   qLogger->Info("Target directory: " + sLayerPath);
   
   if (FileSystem::DirExists(sLayerPath))
   {
      if (!bForce)
      {
         qLogger->Error("Layer already exists!!");
         qLogger->Error("the directory " + sLayerPath + " already exists. Please delete manually or choose another layer name or use the --force option");
         return ERROR_LAYEREXISTS;
      }
      else
      {
         qLogger->Info("Force option detected. Deleting already existing layer... this may take a while");
         if (!FileSystem::rm_all(sLayerPath))
         {
            qLogger->Error("Can't delete old layer (file permission).");
            return ERROR_DELETE_PERMISSION;
         }
         else
         {
            qLogger->Info("ok.. layer deleted.");
         }
      }
   }

   if (eLayer == IMAGE_LAYER)
   {
      return _createimagelayer(sLayerName, sLayerPath, nLod, vecExtent, qLogger, false);
   }
   if (eLayer == IMAGE_POSTPROCESSING_LAYER)
   {
      return _createimagelayer(sLayerName, sLayerPath, nLod, vecExtent, qLogger, true);
   }
   if (eLayer == MAPNIK_LAYER)
   {
      return _createmapniklayer(sLayerName, sLayerPath, vecBoundary, qLogger);
   }
   else if (eLayer == ELEVATION_LAYER)
   {
      return _createelevationlayer(sLayerName, sLayerPath, nLod, vecExtent, qLogger);
   }
   else if (eLayer == POINT_LAYER)
   {
      return _createpointlayer(sLayerName, sLayerPath, nLod, vecBoundary, qLogger);
   }
   else
   {
      return ERROR_UNSUPPORTED;
   }
   
}
Ejemplo n.º 15
0
int main(int argc, char* argv[]) {
	int threads = 8;

	if (argc > 1) {
		threads = (atoi(argv[1]));
	}

	omp_set_num_threads(threads);
//=========================================================================================================
	ChSystemParallelDVI * system_gpu = new ChSystemParallelDVI;
	ChCollisionSystemParallel *mcollisionengine = new ChCollisionSystemParallel();
	system_gpu->SetIntegrationType(ChSystem::INT_ANITESCU);

//=========================================================================================================
	system_gpu->SetParallelThreadNumber(threads);
	system_gpu->SetMaxiter(max_iter);
	system_gpu->SetIterLCPmaxItersSpeed(max_iter);
	((ChLcpSolverParallelDVI *) (system_gpu->GetLcpSolverSpeed()))->SetMaxIteration(max_iter);
	system_gpu->SetTol(.1);
	system_gpu->SetTolSpeeds(.1);
	((ChLcpSolverParallelDVI *) (system_gpu->GetLcpSolverSpeed()))->SetTolerance(.1);
	((ChLcpSolverParallelDVI *) (system_gpu->GetLcpSolverSpeed()))->SetCompliance(0);
	((ChLcpSolverParallelDVI *) (system_gpu->GetLcpSolverSpeed()))->SetContactRecoverySpeed(10);
	((ChLcpSolverParallelDVI *) (system_gpu->GetLcpSolverSpeed()))->SetSolverType(ACCELERATED_PROJECTED_GRADIENT_DESCENT);
	((ChCollisionSystemParallel *) (system_gpu->GetCollisionSystem()))->SetCollisionEnvelope(particle_radius * .01);
	mcollisionengine->setBinsPerAxis(I3(50, 50, 50));
	mcollisionengine->setBodyPerBin(100, 50);
	system_gpu->Set_G_acc(ChVector<>(0, gravity, 0));
	system_gpu->SetStep(timestep);
	((ChSystemParallel*) system_gpu)->SetAABB(R3(-6, -3, -12), R3(6, 6, 12));
//=========================================================================================================
//cout << num_per_dir.x << " " << num_per_dir.y << " " << num_per_dir.z << " " << num_per_dir.x * num_per_dir.y * num_per_dir.z << endl;
//addPerturbedLayer(R3(0, -5 +container_thickness-particle_radius.y, 0), ELLIPSOID, particle_radius, num_per_dir, R3(.01, .01, .01), 10, 1, system_gpu);
//addHCPCube(num_per_dir.x, num_per_dir.y, num_per_dir.z, 1, particle_radius.x, 1, true, 0,  -6 +container_thickness+particle_radius.y, 0, 0, system_gpu);
//=========================================================================================================

	ChSharedBodyPtr L = ChSharedBodyPtr(new ChBody(new ChCollisionModelParallel));
	ChSharedBodyPtr R = ChSharedBodyPtr(new ChBody(new ChCollisionModelParallel));
	ChSharedBodyPtr F = ChSharedBodyPtr(new ChBody(new ChCollisionModelParallel));
	ChSharedBodyPtr B = ChSharedBodyPtr(new ChBody(new ChCollisionModelParallel));
	ChSharedBodyPtr Bottom = ChSharedBodyPtr(new ChBody(new ChCollisionModelParallel));
	ChSharedBodyPtr Top = ChSharedBodyPtr(new ChBody(new ChCollisionModelParallel));
	ChSharedBodyPtr Tube = ChSharedBodyPtr(new ChBody(new ChCollisionModelParallel));
	ChSharedPtr<ChMaterialSurface> material;
	material = ChSharedPtr<ChMaterialSurface>(new ChMaterialSurface);
	material->SetFriction(.1);
	material->SetRollingFriction(0);
	material->SetSpinningFriction(0);
	material->SetCompliance(0);
	material->SetCohesion(-100);

	Quaternion q;
	q.Q_from_AngX(-.1);

	InitObject(L, 100000, Vector(-container_size.x + container_thickness, container_height - container_thickness, 0), Quaternion(1, 0, 0, 0), material, true, true, -20, -20);
	InitObject(R, 100000, Vector(container_size.x - container_thickness, container_height - container_thickness, 0), Quaternion(1, 0, 0, 0), material, true, true, -20, -20);
	InitObject(F, 100000, Vector(0, container_height - container_thickness, -container_size.z + container_thickness), Quaternion(1, 0, 0, 0), material, true, true, -20, -20);
	InitObject(B, 100000, Vector(0, container_height - container_thickness, container_size.z - container_thickness), Quaternion(1, 0, 0, 0), material, true, true, -20, -20);
	InitObject(Bottom, 100000, Vector(0, container_height - container_size.y / 1.5, 0), q, material, true, true, -20, -20);
	InitObject(Top, 100000, Vector(0, container_height + container_size.y, 0), Quaternion(1, 0, 0, 0), material, true, true, -20, -20);
	InitObject(Tube, 100000, Vector(container_size.x - container_thickness, container_height - container_thickness, 0), Quaternion(1, 0, 0, 0), material, true, true, -20, -20);

	AddCollisionGeometry(L, BOX, Vector(container_thickness, container_size.y, container_size.z), Vector(0, 0, 0), Quaternion(1, 0, 0, 0));
	AddCollisionGeometry(R, BOX, Vector(container_thickness, container_size.y, container_size.z), Vector(0, 0, 0), Quaternion(1, 0, 0, 0));
	AddCollisionGeometry(F, BOX, Vector(container_size.x, container_size.y, container_thickness), Vector(0, 0, 0), Quaternion(1, 0, 0, 0));
	AddCollisionGeometry(B, BOX, Vector(container_size.x, container_size.y, container_thickness), Vector(0, 0, 0), Quaternion(1, 0, 0, 0));
	AddCollisionGeometry(Bottom, BOX, Vector(container_size.x, container_thickness, container_size.z), Vector(0, 0, 0), Quaternion(1, 0, 0, 0));
	AddCollisionGeometry(Top, BOX, Vector(container_size.x, container_thickness, container_size.z), Vector(0, 0, 0), Quaternion(1, 0, 0, 0));

	AddCollisionGeometry(Tube, BOX, Vector(2, container_thickness / 6.0, 1), Vector(0, container_size.y / 2.0 + .6 + .4, 0), Quaternion(1, 0, 0, 0));
	AddCollisionGeometry(Tube, BOX, Vector(2, container_thickness / 6.0, 1), Vector(0, container_size.y / 2.0 - .6 + .4, 0), Quaternion(1, 0, 0, 0));

	AddCollisionGeometry(Tube, BOX, Vector(2, .6, container_thickness / 6.0), Vector(0, container_size.y / 2.0 + .4, -1), Quaternion(1, 0, 0, 0));
	AddCollisionGeometry(Tube, BOX, Vector(2, .6, container_thickness / 6.0), Vector(0, container_size.y / 2.0 + .4, 1), Quaternion(1, 0, 0, 0));

	FinalizeObject(L, (ChSystemParallel *) system_gpu);
	FinalizeObject(R, (ChSystemParallel *) system_gpu);
	FinalizeObject(F, (ChSystemParallel *) system_gpu);
	FinalizeObject(B, (ChSystemParallel *) system_gpu);
	FinalizeObject(Bottom, (ChSystemParallel *) system_gpu);
	//FinalizeObject(Top, (ChSystemParallel *) system_gpu);
	//FinalizeObject(Tube, (ChSystemParallel *) system_gpu);

	material_fiber = ChSharedPtr<ChMaterialSurface>(new ChMaterialSurface);
	material_fiber->SetFriction(.4);
	material_fiber->SetRollingFriction(1);
	material_fiber->SetSpinningFriction(1);
	material_fiber->SetCompliance(0);
	material_fiber->SetCohesion(0);

//=========================================================================================================
//Rendering specific stuff:
	ChOpenGLManager * window_manager = new ChOpenGLManager();
	ChOpenGL openGLView(window_manager, system_gpu, 800, 600, 0, 0, "Test_Solvers");
	//openGLView.render_camera->camera_position = glm::vec3(0, -5, -10);
		//openGLView.render_camera->camera_look_at = glm::vec3(0, -5, 0);
		//openGLView.render_camera->camera_scale = .1;
	openGLView.SetCustomCallback(RunTimeStep);
	openGLView.StartSpinning(window_manager);
	window_manager->CallGlutMainLoop();
//=========================================================================================================
	int file = 0;
	for (int i = 0; i < num_steps; i++) {
		system_gpu->DoStepDynamics(timestep);
		double TIME = system_gpu->GetChTime();
		double STEP = system_gpu->GetTimerStep();
		double BROD = system_gpu->GetTimerCollisionBroad();
		double NARR = system_gpu->GetTimerCollisionNarrow();
		double LCP = system_gpu->GetTimerLcp();
		double UPDT = system_gpu->GetTimerUpdate();
		double RESID = ((ChLcpSolverParallelDVI *) (system_gpu->GetLcpSolverSpeed()))->GetResidual();
		int BODS = system_gpu->GetNbodies();
		int CNTC = system_gpu->GetNcontacts();
		int REQ_ITS = ((ChLcpSolverParallelDVI*) (system_gpu->GetLcpSolverSpeed()))->GetTotalIterations();

		printf("%7.4f|%7.4f|%7.4f|%7.4f|%7.4f|%7.4f|%7d|%7d|%7d|%7.4f\n", TIME, STEP, BROD, NARR, LCP, UPDT, BODS, CNTC, REQ_ITS, RESID);

		int save_every = 1.0 / timestep / 60.0;     //save data every n steps
		if (i % save_every == 0) {
			stringstream ss;

			cout << "Frame: " << file << endl;
			ss << "data/fiber/" << "/" << file << ".txt";
			//DumpAllObjects(system_gpu, ss.str(), ",", true);
			DumpAllObjectsWithGeometryPovray(system_gpu, ss.str());
			//output.ExportData(ss.str());
			file++;
		}
		RunTimeStep(system_gpu, i);
	}

	//DumpObjects(system_gpu, "diagonal_impact_settled.txt", "\t");

}
Ejemplo n.º 16
0
void generate_errors_per_base(JSONWriter* pWriter, const BWTIndexSet& index_set)
{

    int n_samples = 100000;
    size_t k = 25;

    double max_error_rate = 0.95;
    size_t min_overlap = 50;
    
    std::vector<size_t> position_count;
    std::vector<size_t> error_count;

    Timer timer("test", true);
#if HAVE_OPENMP
        omp_set_num_threads(opt::numThreads);
        #pragma omp parallel for
#endif
    for(int i = 0; i < n_samples; ++i)
    {
        std::string s = BWTAlgorithms::sampleRandomString(index_set.pBWT);
        KmerOverlaps::retrieveMatches(s, k, min_overlap, max_error_rate, 2, index_set);
        //KmerOverlaps::approximateMatch(s, min_overlap, max_error_rate, 2, 200, index_set);

        MultipleAlignment ma = 
            KmerOverlaps::buildMultipleAlignment(s, k, min_overlap, max_error_rate, 2, index_set);

        // Skip when there is insufficient depth to classify errors
        size_t ma_rows = ma.getNumRows();
        if(ma_rows <= 1)
            continue;

        size_t ma_cols = ma.getNumColumns();
        size_t position = 0;
        for(size_t j = 0; j < ma_cols; ++j)
        {
            char s_symbol = ma.getSymbol(0, j);

            // Skip gaps
            if(s_symbol == '-' || s_symbol == '\0')
                continue;
            
            SymbolCountVector scv = ma.getSymbolCountVector(j);
            int s_symbol_count = 0;
            char max_symbol = 0;
            int max_count = 0;

            for(size_t k = 0; k < scv.size(); ++k)
            {
                if(scv[k].symbol == s_symbol)
                    s_symbol_count = scv[k].count;
                if(scv[k].count > max_count)
                {
                    max_count = scv[k].count;
                    max_symbol = scv[k].symbol;
                }
            }

            //printf("P: %zu S: %c M: %c MC: %d\n", position, s_symbol, max_symbol, max_count);

            // Call an error at this position if the consensus symbol differs from the read
            //    and the support for the read symbol is less than 4 and the consensus symbol
            //    is strongly supported.
            bool is_error = s_symbol != max_symbol && s_symbol_count < 4 && max_count >= 3;

#if HAVE_OPENMP
            #pragma omp critical
#endif
            {
                if(position >= position_count.size())
                {
                    position_count.resize(position+1);
                    error_count.resize(position+1);
                }

                position_count[position]++;
                error_count[position] += is_error;
            }
            position += 1;
        }
    }
    
    pWriter->String("ErrorsPerBase");
    pWriter->StartObject();
    
    pWriter->String("base_count");
    pWriter->StartArray();
    for(size_t i = 0; i < position_count.size(); ++i)
        pWriter->Int(position_count[i]);
    pWriter->EndArray();
    
    pWriter->String("error_count");
    pWriter->StartArray();
    for(size_t i = 0; i < position_count.size(); ++i)
        pWriter->Int(error_count[i]);
    pWriter->EndArray();

    pWriter->EndObject();
}
Ejemplo n.º 17
0
double integrateVegas(double * limits , int threads, double * params){
    //Setting the number of threads
     omp_set_num_threads(threads);
    //How many iterations to perform
    int iterations =15;
    //Which iteration to start sampling more
    int switchIteration = 7;
    //How many points to sample in total
    int samples = 100000;
    //How many points to sample after grid set up
    int samplesAfter = 5000000;
    //How many intervals for each dimension
    int intervals = 10;
    //How many subIntervals
    int subIntervals = 1000;
    //Parameter alpha controls convergence rate
    double alpha = 0.5;
    int seed = 40847516;
    //double to store volume integrated over
    double volume = 1.0;
    for(int i=0; i<dimensions; i++){
        volume*= (limits[(2*i)+1]-limits[2*i]);
    };
    //Number of boxes
    int numBoxes = intervals;
    for(int i=1; i<dimensions; i++){
        numBoxes *= intervals;
    }
    //CHANGE SEED WHEN YOU KNOW IT WORKS
    //Setting up one random number stream for each thread
    VSLStreamStatePtr * streams; 
    streams = ( VSLStreamStatePtr * )_mm_malloc(sizeof(VSLStreamStatePtr)*threads,64);
    for(int i=0; i<threads; i++){
        vslNewStream(&streams[i], VSL_BRNG_MT2203+i,seed);
    }
    //Arrays to store integral and uncertainty for each iteration
    double * integral = (double *)_mm_malloc(sizeof(double)*iterations,64);
    double * sigmas = (double *)_mm_malloc(sizeof(double)*iterations,64);
    for(int i=0; i<iterations; i++){
        integral[i] = 0;
        sigmas[i] = 0;
    }
    //Points per each box
    int pointsPerBox = samples/numBoxes;
    //Array storing the box limits (stores x limits then y limits and so on) intervals+1 to store all limits
    double * boxLimits = (double *)_mm_malloc(sizeof(double)*(intervals+1)*dimensions,64);
    //Array to store average function values for each box
    double * heights = (double *)_mm_malloc(sizeof(double)*dimensions*intervals,64);
    //Array storing values of m
    double * mValues = (double *)_mm_malloc(sizeof(double)*intervals,64);
    //Array storing widths of sub boxes
    double * subWidths = (double *) _mm_malloc(sizeof(double)*intervals,64);
    //Getting initial limits for the boxes 
    for(int i=0; i<dimensions; i++){
        double boxWidth = (limits[(2*i)+1]-limits[2*i])/intervals;
        //0th iteration
        boxLimits[i*(intervals+1)] = limits[2*i];
        for(int j=1; j<=intervals; j++){
            int x = (i*(intervals+1))+j;
            boxLimits[x] =  boxLimits[x-1]+boxWidth;
        }
    };
    //Pointer to store random generated  numbers
      double  randomNums[dimensions]__attribute__((aligned(64)));
      int  binNums[dimensions]__attribute__((aligned(64)));
    //Double to store p(x) denominator for monte carlo
    double prob;
    //Values to store integral and sigma for each thread so they can be reduced in OpenMp
    double integralTemp;
    double sigmaTemp;
    double heightsTemp[dimensions*intervals]__attribute__((aligned(64)));
    int threadNum;
#pragma omp parallel  default(none) private(sigmaTemp,integralTemp,binNums,randomNums,prob,threadNum,heightsTemp) shared(iterations,subIntervals,alpha,mValues,subWidths,streams,samples,boxLimits,intervals, integral, sigmas, heights, threads, volume, samplesAfter, switchIteration, params) 
    {
        for(int iter=0; iter<iterations; iter++){ 
            //Stepping up to more samples when grid calibrated
            if(iter==switchIteration){
                samples = samplesAfter;
            }
            //Performing  iterations
            for(int i=0; i<dimensions*intervals; i++){
                heightsTemp[i] = 0;
            }

            integralTemp = 0; 
            sigmaTemp = 0;
            //Getting chunk sizes for each thread
            threadNum = omp_get_thread_num();
            int seg = ceil((double)samples/threads);
            int lower = seg*threadNum;
            int upper = seg*(threadNum+1);
            if(upper > samples){
                upper = samples;
            };
            //Spliting monte carlo up
            for(int i=0; i<seg; i++){
                prob = 1;
                //Randomly choosing bins to sample from
                viRngUniform(VSL_RNG_METHOD_UNIFORM_STD,streams[threadNum],dimensions,binNums,0,intervals);
                vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD,streams[threadNum],dimensions,randomNums,0,1);
                //Getting samples from bins
                for(int j=0; j<dimensions; j++){
                    int x = ((intervals+1)*j)+binNums[j];
                    randomNums[j] *= (boxLimits[x+1]-boxLimits[x]);
                    randomNums[j] += boxLimits[x];
                    prob *= 1.0/(intervals*(boxLimits[x+1]-boxLimits[x]));
                }
                //Performing evaluation of function and adding it to the total integral
                double eval = evaluate(randomNums,params);
                integralTemp += eval/prob;
                sigmaTemp += (eval*eval)/(prob*prob);
                //Calculating the values of f for bin resising
                for(int j=0; j<dimensions; j++){
                    int x = binNums[j]+(j*intervals);
                    //May need to initialize heights
                    // #pragma omp atomic
                    // printf("heightsTemp before=%f\n",heightsTemp[x]);
                    heightsTemp[x] += eval;
                    // printf("heightsTemp=%f x=%d eval=%f thread=%d\n",heightsTemp[x],x,eval,omp_get_thread_num());
                }

            } 
#pragma omp critical
            {
                integral[iter] += integralTemp;
                sigmas[iter] += sigmaTemp;
                for(int k=0; k<dimensions*intervals; k++){
                    // printf("heightTemp[k]=%f k=%d\n",heightsTemp[k],k);
                    heights[k] += heightsTemp[k];
                }
            }
#pragma omp barrier
#pragma omp single
            {
                //Calculating the values of sigma and the integral
                integral[iter] /= samples;
                sigmas[iter] /= samples;
                sigmas[iter] -= (integral[iter]*integral[iter]);
                sigmas[iter] /= (samples-1);
                 // printf("integral=%f\n",integral[iter]);

                //Readjusting the box widths based on the heights
                //Creating array to store values of m and their sum 
                int totalM=0; 
                //Doing for each dimension seperately
                for(int i=0; i<dimensions; i++){
                    double sum = 0;
                    //Getting the sum of f*delta x
                    for(int j=0; j<intervals; j++){
                        int x = (i*(intervals))+j ;
                        //May be bug with these indicies
                        sum += heights[x]*(boxLimits[x+1+i]-boxLimits[x+i]);
                    }
                    //Performing the rescaling 
                    for(int j=0; j<intervals; j++){
                        int x = (i*(intervals))+j;
                        double value = heights[x]*(boxLimits[x+1+i]-boxLimits[x+i]);
                        mValues[j] = ceil(subIntervals*pow((value-1)*(1.0/log(value)),alpha));
                        subWidths[j] = (boxLimits[x+1+i]-boxLimits[x+i])/mValues[j];
                        totalM += mValues[j];
                    }
                    int mPerInterval = totalM/intervals;
                    int mValueIterator = 0;
                    //Adjusting the intervals going from 1 to less than intervals to keep the edges at the limits
                    for(int j=1; j<intervals; j++){
                        double width = 0;
                        for(int y=0; y<mPerInterval; y++){
                            width += subWidths[mValueIterator];
                            mValues[mValueIterator]--;
                            if(mValues[mValueIterator]==0){
                                mValueIterator++;
                            }
                        }
                        //NEED TO SET BOX LIMITS NOW  
                        int x = j+(i*(intervals+1));
                        boxLimits[x] = boxLimits[x-1]+width;    
                    }
                    //Setting mvalues etc. (reseting memory allocated before the dimensions loop to 0)
                    totalM = 0;
                    for(int k=0; k<intervals; k++){
                        subWidths[k] = 0;
                        mValues[k] = 0;

                    }
                }

                //Setting heights to zero for next iteration
                for(int i=0; i<intervals*dimensions; i++ ){
                    heights[i] = 0;
                }
            }

            
        }
    }
    //All iterations done 
    //Free stuff
    
    _mm_free(subWidths);
    _mm_free(mValues);
    _mm_free(boxLimits);
    _mm_free(streams);
    _mm_free(heights);
   
    //Calculating the final value of the integral
    double denom = 0;
    double numerator =0;
    for(int i=7; i<iterations; i++){
        numerator += integral[i]*((integral[i]*integral[i])/(sigmas[i]*sigmas[i]));
        denom += ((integral[i]*integral[i])/(sigmas[i]*sigmas[i]));
         // printf("integral=%f sigma=%f\n",integral[i],sigmas[i]);
    }
    double output  = numerator/denom;
    //Calculating value of x^2 to check if result can be trusted
    double chisq = 0;
    for(int i=0; i<iterations; i++){
       chisq += (((integral[i]-output)*(integral[i]-output))/(sigmas[i]*sigmas[i]));
    }
    if(chisq>iterations){
        printf("Chisq value is %f, it should be not much greater than %d (iterations-1) Integral:%f Analytical Value=%f\n",chisq,iterations-1,output,normValue(params));
    }
      _mm_free(integral);
      _mm_free(sigmas);
    return output;
    
}
Ejemplo n.º 18
0
// Measure genome repetitiveness using the rate of k-mers
// that branch on both ends
void generate_double_branch(JSONWriter* pWriter, const BWTIndexSet& index_set)
{
    int n_samples = 50000;
    size_t min_coverage_to_test = 5;
    size_t min_coverage_for_branch = 3;
    double min_coverage_ratio = 0.5f;

    pWriter->String("DoubleBranch");
    pWriter->StartArray();
    for(size_t k = 16; k < 86; k += 5)
    {
        size_t num_branches = 0;
        size_t num_kmers = 0;

#if HAVE_OPENMP
        omp_set_num_threads(opt::numThreads);
        #pragma omp parallel for
#endif
        for(int i = 0; i < n_samples; ++i)
        {
            std::string s = BWTAlgorithms::sampleRandomString(index_set.pBWT);
            if(s.size() < k)
                continue;

            std::string kmer = s.substr(0, k);
            size_t count = BWTAlgorithms::countSequenceOccurrences(kmer, index_set);
            if(count >= min_coverage_to_test)
            {
                std::string right_extensions = 
                    get_valid_dbg_neighbors_coverage_and_ratio(kmer, 
                                                               index_set,
                                                               min_coverage_for_branch, 
                                                               min_coverage_ratio,
                                                               ED_SENSE);

                std::string left_extensions = 
                    get_valid_dbg_neighbors_coverage_and_ratio(kmer, 
                                                               index_set,
                                                               min_coverage_for_branch, 
                                                               min_coverage_ratio,
                                                               ED_ANTISENSE);
#if HAVE_OPENMP
                    #pragma omp critical
#endif
                    {    
                        num_branches += (left_extensions.size() > 1 && right_extensions.size() > 1);
                        num_kmers += 1;
                    }
            }
        }

        pWriter->StartObject();
        pWriter->String("k");
        pWriter->Int(k);
        pWriter->String("num_kmers");
        pWriter->Int(num_kmers);
        pWriter->String("num_branches");
        pWriter->Int(num_branches);
        pWriter->EndObject();
    }
    pWriter->EndArray();
}
Ejemplo n.º 19
0
// An old main(), including a serial bottleneck.  I've left it here for
// now for benchmarking purposes.
int bottlenecked_main(int argc, char **argv) {
    int numthreads;

    if( find_option( argc, argv, "-h" ) >= 0 )
    {
        printf( "Options:\n" );
        printf( "-h to see this help\n" );
        printf( "-n <int> to set number of particles\n" );
        printf( "-o <filename> to specify the output file name\n" );
        printf( "-s <filename> to specify a summary file name\n" );
        printf( "-no turns off all correctness checks and particle output\n");
        printf( "-p <int> to set the (maximum) number of threads used\n");
        return 0;
    }

    const int n = read_int( argc, argv, "-n", 1000 );
    const bool fast = (find_option( argc, argv, "-no" ) != -1);
    const char *savename = read_string( argc, argv, "-o", NULL );
    const char *sumname = read_string( argc, argv, "-s", NULL );
    const int num_threads_override = read_int( argc, argv, "-p", 0);

    FILE *fsave = savename ? fopen( savename, "w" ) : NULL;
    FILE *fsum = sumname ? fopen ( sumname, "a" ) : NULL;
    const double size = set_size( n );
    // We need to set the size of a grid square so that the average number of
    // particles per grid square is constant.  The simulation already ensures
    // that the average number of particles in an arbitrary region is constant
    // and proportional to the area.  So this is just a constant.
    const double grid_square_size = sqrt(0.0005) + 0.000001;
    const int num_grid_squares_per_side = size / grid_square_size;
    printf("Using %d grid squares of side-length %f for %d particles.\n", num_grid_squares_per_side*num_grid_squares_per_side, grid_square_size, n);
    std::unique_ptr<std::vector<particle_t> > particles = init_particles(n);

    if (num_threads_override > 0) {
      omp_set_dynamic(0);
      omp_set_num_threads(num_threads_override);
    }

    //
    //  simulate a number of time steps
    //
    double simulation_time = read_timer( );

    int max_num_threads = omp_get_max_threads();

    // User-defined reductions aren't available in the version of OMP we're
    // using.  Instead, we accumulate per-thread stats in this global array
    // and reduce manually when we're done.
    Stats per_thread_stats[max_num_threads];

    // Shared across threads.
    std::unique_ptr<Grid> g(new Grid(size, num_grid_squares_per_side));

    #pragma omp parallel
    {
    numthreads = omp_get_num_threads();
    for (int step = 0; step < 1000; step++) {
      //TODO: Does this need to be declared private?
      int thread_idx;

      #pragma omp single
      g.reset(new Grid(size, num_grid_squares_per_side, *particles));

      //TODO: Could improve data locality by blocking according to the block
      // structure of the grid.  That would require keeping track, dynamically,
      // of the locations of each particle.  It would be interesting to test
      // whether manually allocating sub-blocks (as in the distributed memory
      // code) to threads improves things further.
      #pragma omp for
      for (int i = 0; i < n; i++) {
        thread_idx = omp_get_thread_num();
        particle_t& p = (*particles)[i];
        p.ax = p.ay = 0;
        std::unique_ptr<SimpleIterator<particle_t&> > neighbors = (*g).neighbor_iterator(p);
        while (neighbors->hasNext()) {
          particle_t& neighbor = neighbors->next();
          apply_force(p, neighbor, per_thread_stats[thread_idx]);
        }
      }

      // There is an implicit barrier here, which is important for correctness.
      // (Technically, some asynchrony could be allowed: A thread's sub-block
      // can be moved once it receives force messages from its neighboring
      // sub-blocks.)

      //
      //  move particles
      //
      #pragma omp for
      for (int i = 0; i < n; i++) {
        move((*particles)[i]);
      }

      if (!fast) {
        //
        //  save if necessary
        //
        #pragma omp master
        if( fsave && (step%SAVEFREQ) == 0 ) {
          save( fsave, n, (*particles).data() );
        }
      }
    }
    }
    simulation_time = read_timer( ) - simulation_time;

    // Could do a tree reduce here, but it seems unnecessary.
    Stats overall_stats;
    for (int thread_idx = 0; thread_idx < max_num_threads; thread_idx++) {
      overall_stats.aggregate_left(per_thread_stats[thread_idx]);
    }

    printf( "n = %d,threads = %d, simulation time = %g seconds", n,numthreads, simulation_time);

    if (!fast) {
      //
      //  -the minimum distance absmin between 2 particles during the run of the simulation
      //  -A Correct simulation will have particles stay at greater than 0.4 (of cutoff) with typical values between .7-.8
      //  -A simulation were particles don't interact correctly will be less than 0.4 (of cutoff) with typical values between .01-.05
      //
      //  -The average distance absavg is ~.95 when most particles are interacting correctly and ~.66 when no particles are interacting
      //
      printf( ", absmin = %lf, absavg = %lf", overall_stats.min, overall_stats.avg);
      if (overall_stats.min < 0.4) printf ("\nThe minimum distance is below 0.4 meaning that some particle is not interacting");
      if (overall_stats.avg < 0.8) printf ("\nThe average distance is below 0.8 meaning that most particles are not interacting");
    }
    printf("\n");

    //
    // Printing summary data
    //
    if( fsum)
        fprintf(fsum,"%d %d %g\n",n,numthreads,simulation_time);

    //
    // Clearing space
    //
    if( fsum )
        fclose( fsum );

    if( fsave )
        fclose( fsave );

    return 0;
}
Ejemplo n.º 20
0
// Generate random walk length
void generate_random_walk_length(JSONWriter* pWriter, const BWTIndexSet& index_set)
{
    int n_samples = 1000;
    size_t min_coverage = 5;
    double coverage_cutoff = 0.75f;
    size_t max_length = 30000;
    
    // Create a bloom filter to mark
    // visited kmers. We do not allow a new
    // walk to start at one of these kmers
    size_t bf_overcommit = 20;
    BloomFilter* bloom_filter = new BloomFilter;;
    bloom_filter->initialize(n_samples * max_length * bf_overcommit, 3);

    pWriter->String("RandomWalkLength");
    pWriter->StartArray();

    for(size_t k = 16; k < 86; k += 5)
    {
        pWriter->StartObject();
        pWriter->String("k");
        pWriter->Int(k);
        pWriter->String("walk_lengths");
        pWriter->StartArray();
#if HAVE_OPENMP
        omp_set_num_threads(opt::numThreads);
        #pragma omp parallel for
#endif
        for(int i = 0; i < n_samples; ++i)
        {
            size_t walk_length = 0;
            std::string s = BWTAlgorithms::sampleRandomString(index_set.pBWT);
            if(s.size() < k)
                continue;
            std::string kmer = s.substr(0, k);
            if(bloom_filter->test(kmer.c_str(), k) || BWTAlgorithms::countSequenceOccurrences(kmer, index_set) < min_coverage)
                continue;
            bloom_filter->add(kmer.c_str(), k);

            while(walk_length < max_length) 
            {
                std::string extensions = get_valid_dbg_neighbors_ratio(kmer, index_set, coverage_cutoff);
                if(!extensions.empty())
                {
                    kmer.erase(0, 1);
                    kmer.append(1, extensions[rand() % extensions.size()]);
                    walk_length += 1;
                    bloom_filter->add(kmer.c_str(), k);
                }
                else
                {
                    break;
                }
            }
#if HAVE_OPENMP
        #pragma omp critical
#endif
            pWriter->Int(walk_length);
        }
        pWriter->EndArray();
        pWriter->EndObject();
    }

    pWriter->EndArray();
    delete bloom_filter;
}
Ejemplo n.º 21
0
int main(int argc, char **argv)
{
	int N;
	int nThreads;
	int nColumns;
	int i,j,k;
	double *A,*Bi,*C,*Ci;
	int BiRows, BiColumns;
	CompressedMatrix *cBi;
	CompressedMatrix *cCi;
	double elapsed;

	char printDebug;

	//************ Check Input **************/
	if(argc < 3){
		printf("Usage: %s MaxtrixSize NumberOfThreads\n" , argv[0] );
		exit(EXIT_FAILURE);
	}

	N = atoi(argv[1]);
	if( N <= 1){
		printf("MatrixSize must be bigger than 1!");
		exit(EXIT_FAILURE);
	}

	nThreads = atoi(argv[2]);
	if( nThreads <= 1){
		printf("NumberOfThreads must be bigger than 1!");
		exit(EXIT_FAILURE);
	}

	omp_set_num_threads(nThreads);
	omp_set_schedule(omp_sched_dynamic, N/10);

	MPI_Init(&argc, &argv);
	MPI_Comm_rank(MPI_COMM_WORLD, &mpi_id);
	MPI_Comm_size(MPI_COMM_WORLD, &mpi_size);
	nColumns = N / mpi_size; //For the moment depend on N being a multiple the number of MPI nodes

	//************ Prepare Matrix **************/
	A = (double *) malloc( N*N * sizeof(double) );
	if((A == NULL) ){
	  printf("Running out of memory!\n"); exit(EXIT_FAILURE);
	}

//	if(mpi_id != 0){
//		MPI_Finalize();
//		exit(0);
//	}

	if(mpi_id == 0)
	{
		printDebug = 0;

		if(printDebug) printf("[%d] Generating A ...",mpi_id);
		//Fill matrixes. Generate Identity like matrix for A and B , So C should result in an matrix with a single major diagonal
		for(i=0; i < N; i++ ){
		 for(j=0; j < N; j++){
			A[i+N*j] = (i==j)?i:0.0;

//			//Sparse Matrix with 10% population
//			A[i+N*j] = rand()%10;
//			if(A[i+N*j] == 0)
//				A[i+N*j] = rand()%10;
//			else
//				A[i+N*j] = 0;
		 }
		}

//		printMatrix(A, N, nColumns);
//		cA = compressMatrix(A, N, nColumns);
//		printCompressedMatrix(cA);
//		uncompressMatrix(cA, &Bi, &i, &j);
//		printMatrix(Bi, i, j);
//
//		MPI_Finalize();
//		exit(0);

		tick();

		if(printDebug) printf("[%d] Broadcasting A ...",mpi_id);
		MPI_Bcast( A, N*N, MPI_DOUBLE, 0, MPI_COMM_WORLD);

		if(printDebug) printf("[%d] Generating B ...",mpi_id);
		double* B; CompressedMatrix* cB;
		B = (double *) malloc( N*N * sizeof(double) );
		for(i=0; i < N; i++ ){
		 for(j=0; j < N; j++){
			B[j+N*i] = (i==j)?1.0:0.0;
		 }
		}

		if(printDebug) printf("[%d] Compressing and distributing Bi ...",mpi_id);
		cB = compressMatrix(B, N, N);
		for(i=1; i < mpi_size; i++){
			mpiSendCompressedMatrix(cB, i*nColumns, (i+1)*nColumns, i);
		}

		//Fake shorten cB
		free(B);
		cB->columns = nColumns;
		uncompressMatrix(cB, &Bi, &BiRows, &BiColumns);
		Ci = MatrixMultiply(A, N, N, Bi, nColumns);

		if(printDebug) printf("[%d] Ci = A x Bi ...", mpi_id);
		if(printDebug) printMatrix(Ci, N, nColumns);

		cCi = compressMatrix(Ci, N, nColumns);
		if(printDebug) printf("cCi ...\n");
		if(printDebug) printCompressedMatrix(cCi);

		MPI_Barrier(MPI_COMM_WORLD);

		if(printDebug) printf("[%d] Receiving Ci fragments ...\n", mpi_id);
		CompressedMatrix** Cii;
		Cii = (CompressedMatrix**) malloc(sizeof(CompressedMatrix*) * mpi_size);
			if(Cii == NULL){ perror("malloc"); exit(EXIT_FAILURE); }
		Cii[0] = cCi;
		for(i=1; i < mpi_size; i++){
			Cii[i] = mpiRecvCompressedMatrix(N,nColumns, i);
		}

		if(printDebug) printf("[%d] Joining Cii ...\n", mpi_id);
		CompressedMatrix *cC;
		cC = joinCompressedMatrices(Cii, mpi_size);
		if(printDebug) printCompressedMatrix(cC);

		elapsed =  tack();

		printf("[%d] C ...\n", mpi_id);
		uncompressMatrix(cC, &C, &i,&j);
		if(i <= 20){
			printMatrix(C, i,j);
		} else {
			if(i < 1000){
				printf("C is too big, only printing first diagonal %d.\n[",j);
				for(k=0; (k < i) && (k < j); k++){
					printf("%3.2f ",C[k + k*j]);
				}
				printf("]\n");
			} else {
				printf("C is just too big!");
			}
		}

		printf("Took [%f] seconds!\n",elapsed);

	} else {
		printDebug = 0;

		if(printDebug) printf("[%d] Waiting for A ...",mpi_id);
		MPI_Bcast( A, N*N, MPI_DOUBLE, 0, MPI_COMM_WORLD);

		if(printDebug) printf("[%d] Received A ...\n", mpi_id);
		if(printDebug) printMatrix(A, N, N);

		if(printDebug) printf("[%d] Waiting for Bi ...",mpi_id);
		cBi = mpiRecvCompressedMatrix(N, nColumns, 0);
		uncompressMatrix(cBi, &Bi, &BiRows, &BiColumns);

		if(printDebug) printf("[%d] Received Bi ...",mpi_id);
		if(printDebug) printMatrix(Bi,BiRows, BiColumns);

		assert( (BiRows == N) && "Number or Rows in Bi is not right!");
		assert( (BiColumns == nColumns) && "Number or Columns in Bi is not right!");

		Ci = MatrixMultiply(A, N, N, Bi, BiColumns);

		if(printDebug) printf("[%d] Ci = A x Bi ...", mpi_id);
		if(printDebug) printMatrix(Ci, N, nColumns);

		cCi = compressMatrix(Ci, N, nColumns);
		if(printDebug) printCompressedMatrix(cCi);

		MPI_Barrier(MPI_COMM_WORLD);

		if(printDebug) printf("[%d] Returning Ci ...\n", mpi_id);
		mpiSendCompressedMatrix(cCi, 0, nColumns, 0);

	}


	MPI_Finalize();
	// NxM = NxN * NxM
	exit(EXIT_SUCCESS);
}
Ejemplo n.º 22
0
void generate_duplication_rate(JSONWriter* pJSONWriter, const BWTIndexSet& index_set)
{
    int n_samples = 10000;
    size_t k = 50;

    size_t total_pairs = index_set.pBWT->getNumStrings() / 2;
    size_t num_pairs_checked = 0;
    size_t num_duplicates = 0;
#if HAVE_OPENMP
    omp_set_num_threads(opt::numThreads);
    #pragma omp parallel for
#endif
    for(int i = 0; i < n_samples; ++i)
    {
        // Choose a read pair
        int64_t source_pair_idx = rand() % total_pairs;
        std::string r1 = BWTAlgorithms::extractString(index_set.pBWT, source_pair_idx * 2);
        std::string r2 = BWTAlgorithms::extractString(index_set.pBWT, source_pair_idx * 2 + 1);

        // Get the interval for $k1/$k2 which corresponds to the 
        // lexicographic rank of reads starting with those kmers
        std::string k1 = "$" + r1.substr(0, k);
        std::string k2 = "$" + r2.substr(0, k);

        BWTInterval i1 = BWTAlgorithms::findInterval(index_set.pBWT, k1);
        BWTInterval i2 = BWTAlgorithms::findInterval(index_set.pBWT, k2);

        std::vector<int64_t> pair_ids;
        for(int64_t j = i1.lower; j <= i1.upper; ++j)
        {
            int64_t read_id = index_set.pSSA->lookupLexoRank(j);
            if(read_id % 2 == 1)
                continue;
            
            int64_t pair_id = read_id % 2 == 0 ? read_id / 2 : (read_id - 1) / 2;
            if(pair_id != source_pair_idx)
                pair_ids.push_back(pair_id);
        }

        for(int64_t j = i2.lower; j <= i2.upper; ++j)
        {
            int64_t read_id = index_set.pSSA->lookupLexoRank(j);
            if(read_id % 2 == 0)
                continue;
            int64_t pair_id = read_id % 2 == 0 ? read_id / 2 : (read_id - 1) / 2;
            if(pair_id != source_pair_idx)
                pair_ids.push_back(pair_id);
        }

        std::sort(pair_ids.begin(), pair_ids.end());
        std::vector<int64_t>::iterator iter = 
            std::adjacent_find(pair_ids.begin(), pair_ids.end());
                                           
        bool has_duplicate = iter != pair_ids.end();
#if HAVE_OPENMP
        #pragma omp critical
#endif
        {
            num_pairs_checked += 1;
            num_duplicates += has_duplicate;
        }
    }

    pJSONWriter->String("PCRDuplicates");
    pJSONWriter->StartObject();
    pJSONWriter->String("num_duplicates");
    pJSONWriter->Int(num_duplicates);
    pJSONWriter->String("num_pairs");
    pJSONWriter->Int(num_pairs_checked);
    pJSONWriter->EndObject();
}
int main(int argc, char* argv[]) {
    ArgProcessor args(argc, argv);
    if(args.isArgSet("--help") ||
       (!(args.isArgSet("--reads") && args.isArgSet("--kmers")))) {
        cerr << usage(args) << endl << endl;
        exit(1);
    }
    string reads_fasta_file = args.getStringVal("--reads");
    string kmers_fasta_file = args.getStringVal("--kmers");
    bool is_DS = (! args.isArgSet("--SS"));
    if(args.isArgSet("--kmer_size")) {
        KMER_SIZE = args.getIntVal("--kmer_size");
        if(KMER_SIZE < 20) {
            cerr << "Error, min kmer size is 20";
            exit(2);
        }
    }
    if(args.isArgSet("--monitor")) {
        IRKE_COMMON::MONITOR = args.getIntVal("--monitor");
    }
    if(omp_get_max_threads() > MAX_THREADS) {
        omp_set_num_threads(MAX_THREADS);
    }
    KmerCounter kcounter (KMER_SIZE, is_DS);
    populate_kmer_counter(kcounter, kmers_fasta_file);
    Fasta_reader fasta_reader(reads_fasta_file);
    ofstream* filewriter = NULL;
    ofstream* covwriter = NULL;
    bool write_coverage_info = args.isArgSet("--capture_coverage_info");
    while (true) {
        Fasta_entry fe = fasta_reader.getNext();
        string sequence = fe.get_sequence();
        if(sequence == "") break;
        string header = fe.get_header();
        vector<unsigned int> kmer_coverage = compute_kmer_coverage(sequence, kcounter);
        unsigned int median_cov = median_coverage(kmer_coverage);
        float mean_cov = mean(kmer_coverage);
        float stdev = stDev(kmer_coverage);
        float pct_stdev_of_avg = stdev/mean_cov*100;
        stringstream stats_text;
        stats_text << median_cov << "\t"
                   << mean_cov << "\t"
                   << stdev << "\t"
                   << pct_stdev_of_avg << "\t"
                   << fe.get_accession();
        if(write_coverage_info) {
            // add the coverage info
            stats_text << "\t";
            for (int i = 0; i < kmer_coverage.size(); i++) {
                stats_text<< kmer_coverage[i];
                if(i != kmer_coverage.size() - 1) {
                    stats_text<< ",";
                }
            }
        }
        stats_text << endl;
        cout << stats_text.str();

        if (mean_cov < 0) {
            cerr << "ERROR, cannot have negative coverage!!" << endl;
            exit(1);
        }
        
    }
    return(0);
}
Ejemplo n.º 24
0
int main()
{
	unsigned overlap = (fftlen - chunklen) / 2;

	// Create data
	float *input = (float *) malloc(nsamp * ndms * sizeof(float));

    // LOAD FILE: FOR TESTING ONLY
//	FILE *fp = fopen("/home/lessju/Code/MDSM/src/prototypes/TestingCCode.dat", "rb");
//	printf("Read: %ld\n", fread(input, sizeof(float), nsamp, fp));
//	fclose(fp);

	// Initialise templating
	unsigned numDownFacts;
	for(numDownFacts = 0; numDownFacts < 12; numDownFacts++)
		if (downfactors[numDownFacts] > maxDownfact)
			break;

	// Allocate kernels
	fftwf_complex **kernels = (fftwf_complex **) malloc(numDownFacts * sizeof(fftwf_complex *));
	for(unsigned i = 0; i < numDownFacts; i++)
		kernels[i] = (fftwf_complex *) fftwf_malloc(fftlen / 2 * sizeof(fftwf_complex));

	// Create kernels
	for(unsigned i = 0; i < numDownFacts; i++)
		createFFTKernel(kernels[i], downfactors[i], fftlen);

	// Start timing
	struct timeval start, end;
	long mtime, seconds, useconds;
	gettimeofday(&start, NULL);

	// Set number of OpenMP threads
	omp_set_num_threads(threads);

	// Create candidate container
	std::vector<Candidate> **candidates = (std::vector<Candidate> **) malloc(threads * sizeof(std::vector<Candidate> *));

	unsigned nchunks = nsamp / chunklen;

	#pragma omp parallel \
		shared(kernels, input, ndms, nsamp, fftlen, chunklen, numDownFacts, tsamp, \
			   overlap, downfactors, threshold, nchunks, candidates)
	{
		// Get thread details
		unsigned numThreads = omp_get_num_threads();
		unsigned threadId = omp_get_thread_num();

		// Allocate memory to be used in processing
		candidates[threadId] = new std::vector<Candidate>();

        float *chunk = (float *) fftwf_malloc(fftlen * sizeof(float)); // Store input chunk
		fftwf_complex *fftChunk = (fftwf_complex *)
                fftwf_malloc(fftlen / 2 * sizeof(fftwf_complex));      // Store FFT'ed input chunk
        fftwf_complex *convolvedChunk = (fftwf_complex *)
                fftwf_malloc(fftlen / 2 * sizeof(fftwf_complex));      // Store FFT'ed, convolved input chunk
        InitialCandidate *initialCands = (InitialCandidate *)
                malloc(fftlen * sizeof(InitialCandidate));             // Store initial Candidate list

		// Create FFTW plans (these calls are note thread safe, place in critical section)
		fftwf_plan chunkPlan, convPlan;
		#pragma omp critical
		{
			chunkPlan = fftwf_plan_dft_r2c_1d(fftlen, chunk, fftChunk, FFTW_ESTIMATE);
		    convPlan  = fftwf_plan_dft_c2r_1d(fftlen, convolvedChunk, chunk, FFTW_ESTIMATE) ;
		}

		// Process all DM buffer associated with this thread
		for(unsigned j = 0; j < ndms / numThreads; j++)
		{
			unsigned d = ndms / numThreads * threadId + j;

			std::vector<Candidate> dmCandidates;

			// Process all data chunks
			for (unsigned c = 0; c < nchunks; c++)
			{
				int beg = d * nsamp + c * chunklen - overlap;
				if (c == 0)                // First chunk, we need to insert 0s at the beginning
				{
					memset(chunk, 0, overlap * sizeof(float));
                    memcpy(chunk + overlap, input, (fftlen - overlap) * sizeof(float));
				}
				else if (c == nchunks - 1) // Last chunk, insert 0s at the end
				{
					memset(chunk + fftlen - overlap, 0, overlap * sizeof(float));
					memcpy(chunk, input + beg, (fftlen - overlap) * sizeof(float));
				}
				else
					memcpy(chunk, input + beg, fftlen * sizeof(float));

				// Search non-downsampled data first
				for(unsigned i = overlap; i < chunklen; i++)
					if (chunk[i] >= threshold)
					{
						candidate newCand = { d, chunk[i], 25, c*chunklen+i, 1 };
						dmCandidates.push_back(newCand);
					}

				// FFT current chunk
				fftwf_execute(chunkPlan);

				// Loop over all downfactor levels
				for(unsigned s = 0; s < numDownFacts; s++)
				{
                    // Reset inital Candidate List
                    memset(initialCands, 0, fftlen * sizeof(InitialCandidate));

					// Perform convolution
					convolve(fftChunk, kernels[s], convolvedChunk, chunk, fftlen, convPlan);

					// Threshold results and build preliminary candidate list
                    unsigned numCands = 0;
					for(unsigned i = overlap; i < chunklen; i++)
					{
						if (chunk[i] >= threshold)
                        {
						//	printf("We have something %d %d \n", c, s);
						    initialCands[numCands].bin = i;
						    initialCands[numCands].value = chunk[i];
                            numCands++;
                        }
					}

                    if (numCands != 0)
                    {
                        // Prune candidate list
                        pruneRelated(initialCands, downfactors[s], numCands);

                        // Store candidate list
                        for(unsigned k = 0; k < numCands; k++)
							if (initialCands[k].value != 0)
							{
                                Candidate newCand = { d, initialCands[j].value, 5, c * chunklen + k, downfactors[s] };
                                dmCandidates.push_back(newCand);
                            }
                    }
				}
			}

			// Remove redundate candidates across downsampling levels
			if (dmCandidates.size() > 0)
			{
				char *mask = (char *) malloc(dmCandidates.size() * sizeof(char));
		        pruneRelatedDownfactors(dmCandidates, mask, numDownFacts);

	            // Append to final candidate list
	            for(j = 0; j < dmCandidates.size(); j++)
	                if (mask[j])
	                    candidates[threadId] -> push_back(dmCandidates[j]);

				free(mask);
			}
		}

		free(convolvedChunk);
		free(fftChunk);
		free(chunk);
	}

    gettimeofday(&end, NULL);
    seconds  = end.tv_sec  - start.tv_sec;
    useconds = end.tv_usec - start.tv_usec;

    mtime = ((seconds) * 1000 + useconds/1000.0) + 0.5;
	printf("Processed everything in %ld ms\n", mtime);

	// Now write everything to disk...
	FILE *fp2 = fopen("output.dat", "w");
	for(unsigned i = 0; i < threads; i++)
		for(unsigned j = 0; j < candidates[i] -> size(); j++)
		{
			Candidate cand = candidates[i] -> at(j);
			fprintf(fp2, "%f,%f,%f,%ld,%d\n", cand.dm, cand.value, cand.time, cand.bin, cand.downfact);
		}
	fflush(fp2);
	fclose(fp2);
}
Ejemplo n.º 25
0
int kmeans(int iteration_n, int class_n, int data_n, Point* centroids, Point* data, int* partitioned, int num_threads, int local_size, int argc, char** argv)
{
    // Count number of data in each class
    int* count = new int[class_n];
    int max_threads = omp_get_max_threads();
    Point* tempCentroids = new Point[max_threads * class_n];
    int* tempCount = new int[max_threads * class_n];
    // Iterate through number of interations
    omp_set_num_threads(num_threads);
    for (int i = 0; i < iteration_n; i++) {
#pragma omp parallel
        {
            const int ithread = omp_get_thread_num();
#pragma omp single
            {
                memset(tempCentroids, 0, max_threads * class_n * sizeof(Point));
                memset(tempCount, 0, max_threads * class_n * sizeof(int));
            }
            // Assignment step
#pragma omp for
            for (int data_i = 0; data_i < data_n; ++data_i) {
                float min_dist = FLT_MAX;
                for (int class_i = 0; class_i < class_n; class_i++) {
                    float x = data[data_i].x - centroids[class_i].x;
                    float y = data[data_i].y - centroids[class_i].y;
                    float dist = x * x + y * y;
                    if (dist < min_dist) {
                        partitioned[data_i] = class_i;
                        min_dist = dist;
                    }
                }
                // Sum up and count data for each class
                int index = ithread * class_n + partitioned[data_i];
                tempCentroids[index].x += data[data_i].x;
                tempCentroids[index].y += data[data_i].y;
                tempCount[index]++;
            }
            // Update step
#pragma omp single
            {
                // Clear sum buffer and class count
                memset(centroids, 0, class_n * sizeof(Point));
                memset(count, 0, class_n * sizeof(int));
            }
#pragma omp for
            for (int class_i = 0; class_i < class_n; ++class_i) {
                for (int t = 0; t < max_threads; ++t) {
                    centroids[class_i].x += tempCentroids[t * class_n + class_i].x;
                    centroids[class_i].y += tempCentroids[t * class_n + class_i].y;
                    count[class_i] += tempCount[t * class_n + class_i];
                }
                // Divide the sum with number of class for mean point
                centroids[class_i].x /= count[class_i];
                centroids[class_i].y /= count[class_i];
            }
        }
    }
    delete[] tempCount;
    delete[] tempCentroids;
    delete[] count;
    return 0;
}
int main(int argc, char* argv[]) {
    if(argc < 2) {
        usage(argv[0]);
    }

    int num_threads = 2;
    if(argc > 2) {
        for(int i = 0; i < argc; i++) {
            if(strcmp(argv[i], "-t") == 0 && argc > i+1) {
                num_threads = atoi(argv[i+1]);
            }
        }
    }
    omp_set_num_threads(num_threads);

    // Open files
    FILE* input_file = fopen(argv[1], "r");
    if(input_file == NULL) {
        usage(argv[0]);
    }

    // Read the matrix
    int dim = 0;
    fscanf(input_file, "%u\n", &dim);
    int mat[dim][dim];
    int element = 0;
    for(int i=0; i<dim; i++) {
        for(int j=0; j<dim; j++) {
            if (j != (dim-1))
                fscanf(input_file, "%d\t", &element);
            else
                fscanf(input_file, "%d\n",&element);
            mat[i][j] = element;
        }
    }

#ifdef _PRINT_INFO
    // Print the matrix
    printf("Input matrix [%d]\n", dim);
    for(int i=0; i<dim; i++) {
        for(int j=0; j<dim; j++) {
            printf("%d\t", mat[i][j]);
        }
        printf("\n");
    }
#endif

    // Algorithm based on information obtained here:
    // http://stackoverflow.com/questions/2643908/getting-the-submatrix-with-maximum-sum
    long alg_start = get_usecs();

    // Compute vertical prefix sum
    int ps[dim][dim];

    for (int j=0; j<dim; j++) {
        ps[0][j] = mat[0][j];
        for (int i=1; i<dim; i++) {
            ps[i][j] = ps[i-1][j] + mat[i][j];
        }
    }

#ifdef _PRINT_INFO
    // Print the matrix
    printf("Vertical prefix sum matrix [%d]\n", dim);
    for(int i=0; i<dim; i++) {
        for(int j=0; j<dim; j++) {
            printf("%d\t", ps[i][j]);
        }
        printf("\n");
    }
#endif

    int max_sum = mat[0][0];
    int top = 0, left = 0, bottom = 0, right = 0;

    //Auxilliary variables
    int sum[dim];
    int pos[dim];
    int local_max;

    #pragma omp parallel for private(sum, pos, local_max) schedule(static, 10)
    for (int i=0; i<dim; i++) {
        for (int k=i; k<dim; k++) {
            // Kandane over all columns with the i..k rows
            clear(sum, dim);
            clear(pos, dim);
            local_max = 0;

            // We keep track of the position of the max value over each Kandane's execution
            // Notice that we do not keep track of the max value, but only its position
            sum[0] = ps[k][0] - (i==0 ? 0 : ps[i-1][0]);
            for (int j=1; j<dim; j++) {
                if (sum[j-1] > 0) {
                    sum[j] = sum[j-1] + ps[k][j] - (i==0 ? 0 : ps[i-1][j]);
                    pos[j] = pos[j-1];
                }
                else {
                    sum[j] = ps[k][j] - (i==0 ? 0 : ps[i-1][j]);
                    pos[j] = j;
                }
                if (sum[j] > sum[local_max]) {
                    local_max = j;
                }
            } //Kandane ends here

            #pragma omp critical
            if (sum[local_max] > max_sum) {
                // sum[local_max] is the new max value
                // the corresponding submatrix goes from rows i..k.
                // and from columns pos[local_max]..local_max
                max_sum = sum[local_max];
                top = i;
                left = pos[local_max];
                bottom = k;
                right = local_max;
            }
        }
    }

    // Compose the output matrix
    int outmat_row_dim = bottom - top + 1;
    int outmat_col_dim = right - left + 1;
    int outmat[outmat_row_dim][outmat_col_dim];
    for(int i=top, k=0; i<=bottom; i++, k++) {
        for(int j=left, l=0; j<=right ; j++, l++) {
            outmat[k][l] = mat[i][j];
        }
    }


    long alg_end = get_usecs();

    // Print output matrix
    printf("Sub-matrix [%dX%d] with max sum = %d, top = %d, bottom = %d, left = %d, right = %d\n", outmat_row_dim, outmat_col_dim, max_sum, top, bottom, left, right);
#ifdef _PRINT_INFO
    for(int i=0; i<outmat_row_dim; i++) {
        for(int j=0; j<outmat_col_dim; j++) {
            printf("%d\t", outmat[i][j]);
        }
        printf("\n");
    }
#endif

    printf("%s,arg(%s),%s,%f sec, threads: %d\n", argv[0], argv[1], "CHECK_NOT_PERFORMED", ((double)(alg_end-alg_start))/1000000, num_threads);

    // Release resources
    fclose(input_file);

    return 0;
}
Ejemplo n.º 27
0
int main(int argc, char *argv[])
{
	HashFunction hf[] = { RSHash, JSHash, ELFHash, BKDRHash, SDBMHash,
		DJBHash, DEKHash, BPHash, FNVHash, APHash,
		hash_div_701, hash_div_899, hash_mult_700, hash_mult_900
	};
	word_list *wl;
	char *word;
	char *bv;
	double start, end, diff;
	size_t wl_size;
	size_t bv_size;
	size_t num_hf;
	size_t i, j;
	unsigned int hash;
	int misspelled;
	// Set Number of threads to 4
	omp_set_num_threads(4); 
	//
	if (argc != 2) {
		printf("Please give word to spell check\n");
		exit(EXIT_FAILURE);
	}
	word = argv[1];

	/* load the word list */
	wl = create_word_list("word_list.txt");
	if (!wl) {
		fprintf(stderr, "Could not read word list\n");
		exit(EXIT_FAILURE);
	}
	wl_size = get_num_words(wl);
	
	start = omp_get_wtime();
	/* create the bit vector */
	bv_size = 100000000;
	num_hf = sizeof(hf) / sizeof(HashFunction);
	bv = calloc(bv_size, sizeof(char));
	if (!bv) {
		destroy_word_list(wl);
		exit(EXIT_FAILURE);
	}
	
    for (j = 0; j < num_hf; j++) {
        #pragma omp parallel for private(hash)
	    for (i = 0; i < wl_size; i++) {
            hash = hf[j] (get_word(wl, i));
            hash %= bv_size;
            bv[hash] = 1;
        }
	}

	/* do the spell checking */
	misspelled = 0;
	for (j = 0; j < num_hf; j++) {
		hash = hf[j] (word);
		hash %= bv_size;
		if (bv[hash] == 0)
			misspelled = 1;
	}
	end = omp_get_wtime();
	diff = end - start;
	printf("Spell check time: %f\n", diff);

	/* tell the user the result */
	if (misspelled)
		printf("Word %s is misspelled\n", word);
	else
		printf("Word %s is spelled correctly\n", word);

	free(bv);
	destroy_word_list(wl);
	return EXIT_SUCCESS;
}
Ejemplo n.º 28
0
int main(int argc, char ** argv) {

    // enhanced usage, useful for testing
    if (argc != 1 && argc != 2 && argc != 7) {
        fprintf(stderr, "Usage: %s [[threads] yMin yMax xMin xMax dxy]\n", argv[0]);
        fprintf(stderr, "Either specify no args, or only threads, or all args.\n");
        return -2;
    }

    // determine amount of threads
    if (argc > 1)
        omp_set_num_threads(atoi(argv[1]));
    else
        omp_set_num_threads(4);

    // set constants if supplied
    if (argc == 7) {
        yMin = atof(argv[2]);
        yMax = atof(argv[3]);
        xMin = atof(argv[4]);
        xMax = atof(argv[5]);
        dxy  = atof(argv[6]);
    }
    
    double time;
    timer_start();
    
    double cx, cy;
    double zx, zy, new_zx;
    unsigned char n;
    int nx, ny;

    // The Mandelbrot calculation is to iterate the equation
    // z = z*z + c, where z and c are complex numbers, z is initially
    // zero, and c is the coordinate of the point being tested. If
    // the magnitude of z remains less than 2 for ever, then the point
    // c is in the Mandelbrot set. We write out the number of iterations
    // before the magnitude of z exceeds 2, or UCHAR_MAX, whichever is
    // smaller.

    nx = 0;
    ny = 0;
    nx = (xMax - xMin) / dxy;
    ny = (yMax - yMin) / dxy;
        
    int i, j;
    unsigned char * buffer = malloc(nx * ny * sizeof(unsigned char));
    if (buffer == NULL) {
      fprintf (stderr, "Couldn't malloc buffer!\n");
      return EXIT_FAILURE;
    }
    
    // do the calculations parallel
    #pragma omp parallel for private(i, j, cx, zx, zy, n, new_zx, cy)
    for (i = 0; i < ny; i++) {
        cy = yMin - dxy + i * dxy;
        for (j = 0; j < nx; j++) {
            cx = xMin - dxy + j * dxy;
            zx = 0.0; 
            zy = 0.0; 
            n = 0;
            
            while ((zx*zx + zy*zy < 4.0) && (n != UCHAR_MAX)) {
                new_zx = zx*zx - zy*zy + cx;
                zy = 2.0*zx*zy + cy;
                zx = new_zx;
                n++;
            }
            buffer[i * nx + j] = n;
        }
    }
    
    time = timer_end();
    
    fprintf (stderr, "Took %g seconds.\nNow writing file...\n", time);
    fwrite(buffer, sizeof(unsigned char), nx * ny, stdout);

    fprintf (stderr, "All done! To process the image: convert -depth 8 -size " \
             "%dx%d gray:output out.jpg\n", nx, ny);
    return 0;
}
Ejemplo n.º 29
0
int main(int argc, char * argv[])
{
	if(argc!= 11)
	{
		cout<<"usage: PLSACluster <inputfile>  <indexmidfile> <indextagfile> <crossfolds> <numTopics> <numIters> <anneal> <numBlocks> <top-k words> <pos>"<<endl;
		cout<<"./PLSACluster  data/inputtagsformat.txt  data/indexmediaid.txt  data/indextag.txt 10 200 200 100 8 50 0"<<endl;
		return 1;
	}

	char* inputfile=argv[1];		// input file
	char* indexmidfile=argv[2];		// mid inverted index table file
	char* indextagfile=argv[3];		// tag inverted index table file
	int crossfold=atoi(argv[4]);	// cross validation dataset  10(1:9)
	int numLS=atoi(argv[5]);		// topic number
	int numIters=atoi(argv[6]);		// iterate number
	int anneal=atoi(argv[7]);		// simulated annealing
	int numBlocks=atoi(argv[8]);	// block number
	int topk=atoi(argv[9]);			// number of tags in each topics
	int pos=atoi(argv[10]);

	int cpu_core_nums = omp_get_num_procs();
	omp_set_num_threads(cpu_core_nums);
	
	iPLSA * plsa;

	plsa=new iPLSA(inputfile,indexmidfile,indextagfile,crossfold, numLS, numIters, 1, 1, 0.552, anneal, 0.92, cpu_core_nums, numBlocks, pos);

	plsa->run();

	double ** p_d_z = plsa->get_p_d_z();
	double ** p_w_z = plsa->get_p_w_z();
	int document_num = plsa->numDocs();
	int topic_num = plsa->numCats();
	int word_num = plsa->numWords(); 
	int midcount = plsa->numDocs();

	vector<int>     index2mid(midcount);
	vector<string>  index2tag(word_num);
	ifstream in_inter(indexmidfile);
	ifstream in_inter2(indextagfile);
	loadmidinfo(in_inter,index2mid);
	loadtaginfo(in_inter2,index2tag);

	FILE *doc2topic_fp = fopen("doc2topic_distribution.txt","w");
	if(doc2topic_fp==NULL) return -1;


	for( int i = 0; i < document_num; ++i )
	{
		fprintf(doc2topic_fp, "%d ", index2mid[i]);
		for( int j = 1; j < topic_num; ++j )
		{
			fprintf(doc2topic_fp, "%f ", p_d_z[i][j]);
		}
		fprintf(doc2topic_fp, "\n");
	}

	FILE *topic2word_fp = fopen("topic2word_distribution.txt","w");
	if(doc2topic_fp==NULL) 
		return -1;
	for( int i = 0; i < topic_num; ++i )
	{
		map<int,double> wMap;
		for( int w = 0; w<word_num; w++ )
		{
			wMap[w] = p_w_z[w][i];
		}

		vector< pair<int, double> > wVector;
		sortMapByValue(wMap,wVector);
		for( int w = 1; w<=topk; w++ )
		{
			fprintf(topic2word_fp, "%s:%f ",index2tag[wVector[w].first].c_str(), wVector[w].second); 
		}
		fprintf(topic2word_fp, "\n");
	}

	return 0;
}
Ejemplo n.º 30
0
/*
 * Class:     com_intel_analytics_bigdl_mkl_MKL
 * Method:    setNumThreads
 * Signature: (I)V
 */
JNIEXPORT void JNICALL Java_com_intel_analytics_bigdl_mkl_MKL_setNumThreads
  (JNIEnv * env, jclass cls, jint num_threads) {
  omp_set_num_threads(num_threads);
}