//---------------------------------------------------------
DVec& NDG2D::PoissonIPDGbc2D
(DVec& ubc, //[in]
 DVec& qbc  //[in]
)
//---------------------------------------------------------
{
  // function [OP] = PoissonIPDGbc2D()
  // Purpose: Set up the discrete Poisson matrix directly
  //          using LDG. The operator is set up in the weak form

  // build DG derivative matrices
  int max_OP = (K*Np*Np*(1+Nfaces));

  // initialize parameters
  DVec faceR("faceR"), faceS("faceS");
  DMat V1D("V1D"), Dx("Dx"),Dy("Dy"), Dn1("Dn1"), mmE_Fm1("mmE(:,Fm1)");
  IVec Fm("Fm"), Fm1("Fm1"), fidM("fidM");
  double lnx=0.0,lny=0.0,lsJ=0.0,hinv=0.0,gtau=0.0;
  int i=0,k1=0,f1=0,id=0;
  IVec i1_Nfp = Range(1,Nfp);
  double N1N1 = double((N+1)*(N+1));

  // build local face matrices
  DMat massEdge[4]; // = zeros(Np,Np,Nfaces);
  for (i=1; i<=Nfaces; ++i) {
    massEdge[i].resize(Np,Np);
  }

  // face mass matrix 1
  Fm = Fmask(All,1); faceR = r(Fm); 
  V1D = Vandermonde1D(N, faceR);
  massEdge[1](Fm,Fm) = inv(V1D*trans(V1D));

  // face mass matrix 2
  Fm = Fmask(All,2); faceR = r(Fm); 
  V1D = Vandermonde1D(N, faceR);
  massEdge[2](Fm,Fm) = inv(V1D*trans(V1D));

  // face mass matrix 3
  Fm = Fmask(All,3); faceS = s(Fm); 
  V1D = Vandermonde1D(N, faceS); 
  massEdge[3](Fm,Fm) = inv(V1D*trans(V1D));
 
  // build DG right hand side
  DVec* pBC = new DVec(Np*K, "bc", OBJ_temp); 
  DVec& bc = (*pBC);  // reference, for syntax
  ////////////////////////////////////////////////////////////////

  umMSG(1, "\n ==> {OP} assembly [bc]: ");
  for (k1=1; k1<=K; ++k1)
  {
    if (! (k1%100)) { umMSG(1, "%d, ",k1); }

    // rows1 = outer(Range((k1-1)*Np+1,k1*Np), Ones(NGauss));

    // Build element-to-element parts of operator
    for (f1=1; f1<=Nfaces; ++f1)
    {
      if (BCType(k1,f1))
      {
        ////////////////////////added by Kevin ///////////////////////////////
        Fm1 = Fmask(All,f1); 
        fidM  = (k1-1)*Nfp*Nfaces + (f1-1)*Nfp + i1_Nfp;
        id = 1+(f1-1)*Nfp + (k1-1)*Nfp*Nfaces;

        lnx = nx(id); lny = ny(id); 
        lsJ = sJ(id); hinv = Fscale(id);

        Dx = rx(1,k1)*Dr + sx(1,k1)*Ds;  
        Dy = ry(1,k1)*Dr + sy(1,k1)*Ds;
        Dn1 = lnx*Dx + lny*Dy;

      //mmE = lsJ*massEdge(:,:,f1);
      //bc(All,k1) += (gtau*mmE(All,Fm1) - Dn1'*mmE(All,Fm1))*ubc(fidM);

        mmE_Fm1 = massEdge[f1](All,Fm1);  mmE_Fm1 *= lsJ;

        gtau = 10*N1N1*hinv; // set penalty scaling
        //bc(All,k1) += (gtau*mmE_Fm1 - trans(Dn1)*mmE_Fm1) * ubc(fidM);

        switch(BCType(k1,f1)){
	  case BC_Dirichlet: 
            bc(Np*(k1-1)+Range(1,Np)) += (gtau*mmE_Fm1 - trans(Dn1)*mmE_Fm1)*ubc(fidM);  
            break;
          case BC_Neuman:
            bc(Np*(k1-1)+Range(1,Np)) += mmE_Fm1*qbc(fidM);
            break;
	default:
	  std::cout<<"warning: boundary condition is incorrect"<<std::endl;
	}
      }
    }
  }
  return bc;
}
Beispiel #2
0
 inline oclMat oclMat::colRange(int startcol, int endcol) const
 {
     return oclMat(*this, Range::all(), Range(startcol, endcol));
 }
Beispiel #3
0
void OpponentColorDescriptorExtractor::computeImpl( const Mat& bgrImage, std::vector<KeyPoint>& keypoints, Mat& descriptors ) const
{
    std::vector<Mat> opponentChannels;
    convertBGRImageToOpponentColorSpace( bgrImage, opponentChannels );

    const int N = 3; // channels count
    std::vector<KeyPoint> channelKeypoints[N];
    Mat channelDescriptors[N];
    std::vector<int> idxs[N];

    // Compute descriptors three times, once for each Opponent channel to concatenate into a single color descriptor
    int maxKeypointsCount = 0;
    for( int ci = 0; ci < N; ci++ )
    {
        channelKeypoints[ci].insert( channelKeypoints[ci].begin(), keypoints.begin(), keypoints.end() );
        // Use class_id member to get indices into initial keypoints vector
        for( size_t ki = 0; ki < channelKeypoints[ci].size(); ki++ )
            channelKeypoints[ci][ki].class_id = (int)ki;

        descriptorExtractor->compute( opponentChannels[ci], channelKeypoints[ci], channelDescriptors[ci] );
        idxs[ci].resize( channelKeypoints[ci].size() );
        for( size_t ki = 0; ki < channelKeypoints[ci].size(); ki++ )
        {
            idxs[ci][ki] = (int)ki;
        }
        std::sort( idxs[ci].begin(), idxs[ci].end(), KP_LessThan(channelKeypoints[ci]) );
        maxKeypointsCount = std::max( maxKeypointsCount, (int)channelKeypoints[ci].size());
    }

    std::vector<KeyPoint> outKeypoints;
    outKeypoints.reserve( keypoints.size() );

    int dSize = descriptorExtractor->descriptorSize();
    Mat mergedDescriptors( maxKeypointsCount, 3*dSize, descriptorExtractor->descriptorType() );
    int mergedCount = 0;
    // cp - current channel position
    size_t cp[] = {0, 0, 0};
    while( cp[0] < channelKeypoints[0].size() &&
           cp[1] < channelKeypoints[1].size() &&
           cp[2] < channelKeypoints[2].size() )
    {
        const int maxInitIdx = std::max( 0, std::max( channelKeypoints[0][idxs[0][cp[0]]].class_id,
                                                      std::max( channelKeypoints[1][idxs[1][cp[1]]].class_id,
                                                                channelKeypoints[2][idxs[2][cp[2]]].class_id ) ) );

        while( channelKeypoints[0][idxs[0][cp[0]]].class_id < maxInitIdx && cp[0] < channelKeypoints[0].size() ) { cp[0]++; }
        while( channelKeypoints[1][idxs[1][cp[1]]].class_id < maxInitIdx && cp[1] < channelKeypoints[1].size() ) { cp[1]++; }
        while( channelKeypoints[2][idxs[2][cp[2]]].class_id < maxInitIdx && cp[2] < channelKeypoints[2].size() ) { cp[2]++; }
        if( cp[0] >= channelKeypoints[0].size() || cp[1] >= channelKeypoints[1].size() || cp[2] >= channelKeypoints[2].size() )
            break;

        if( channelKeypoints[0][idxs[0][cp[0]]].class_id == maxInitIdx &&
            channelKeypoints[1][idxs[1][cp[1]]].class_id == maxInitIdx &&
            channelKeypoints[2][idxs[2][cp[2]]].class_id == maxInitIdx )
        {
            outKeypoints.push_back( keypoints[maxInitIdx] );
            // merge descriptors
            for( int ci = 0; ci < N; ci++ )
            {
                Mat dst = mergedDescriptors(Range(mergedCount, mergedCount+1), Range(ci*dSize, (ci+1)*dSize));
                channelDescriptors[ci].row( idxs[ci][cp[ci]] ).copyTo( dst );
                cp[ci]++;
            }
            mergedCount++;
        }
    }
    mergedDescriptors.rowRange(0, mergedCount).copyTo( descriptors );
    std::swap( outKeypoints, keypoints );
}
Beispiel #4
0
//\begin{>>PPP_2d_Jacobi_1d_Part.tex}{\subsubsection{runBenchmark}}
void PPP_2d_Jacobi_1d_Part::
runBenchmark(  )
//================================================================
// /Description:  Executes the benchmarking code the specified
//  number of times and stores the times in the array.
//
// /Return Values: None.
//
// /Errors:
//   None.
//
// /Author:  BJM
// /Date:  24 August 2000
//\end{PPP_2d_Jacobi_1d_Part.tex}
//================================================================
{
  double theTime,theStartTime,theEndTime;
  
  int theNumProcs = 1;
  int thisProcNum = 0;

// MPI_Comm_size(MPI_COMM_WORLD, &theNumProcs);
// MPI_Comm_rank(MPI_COMM_WORLD, &thisProcNum);
 
  Partitioning_Type Partitioning(Range(0,theNumProcs-1));
  Partitioning.SpecifyInternalGhostBoundaryWidths(1,1);
  Partitioning.SpecifyDecompositionAxes(1);

  int theArraySideLength = sqrt( mUnknownsPerProc * theNumProcs );

  printf ("theArraySideLength = %d \n",theArraySideLength);
  
  doubleArray U_old(theArraySideLength,theArraySideLength,Partitioning);
  U_old = 0.0;

  Index I (1,theArraySideLength-2);
  Index J (1,theArraySideLength-2);
  int i;

// printf ("Warming up ... \n");
  for(i=0;i< mNumberOfWarmupIterations;i++)
  {
    U_old(I,J) =
      (U_old(I+1,J+1) + U_old(I+1,J) + U_old(I+1,J-1) + U_old(I,J+1) +
       U_old(I,J-1) + U_old(I-1,J+1) + U_old(I-1,J) + U_old(I-1,J-1)) / 8.0;
  }
  
  // Now time the problem
// printf ("Running timing loop ... \n");
  for (i=0; i < mNumberOfTimingIterations; i++)
  {
//  printf ("Running timing loop iteration %d \n",i);
    theStartTime = Communication_Manager::Wall_Clock_Time();
//    theStartTime = clock();
    U_old(I,J) =
      (U_old(I+1,J+1) + U_old(I+1,J) + U_old(I+1,J-1) + U_old(I,J+1) +
       U_old(I,J-1) + U_old(I-1,J+1) + U_old(I-1,J) + U_old(I-1,J-1)) / 8.0;
    theEndTime = Communication_Manager::Wall_Clock_Time();
//    theEndTime = clock();
    theTime = theEndTime - theStartTime;
    
    mTimes[0][i] = theTime;
    printf("time= %f\n",theTime);
    
  }//end of loop over timed iterations
}
Beispiel #5
0
 inline oclMat oclMat::col(int x) const
 {
     return oclMat(*this, Range::all(), Range(x, x + 1));
 }
Beispiel #6
0
Range<Scalar,Dim> Range<Scalar,Dim>::unitRange()
{
    return Range(Vector<Scalar,Dim>(0),Vector<Scalar,Dim>(1.0));
}
Beispiel #7
0
void PortList::addRange(int startPort, int endPort)
{
    m_ranges << Range(startPort, endPort);
}
Beispiel #8
0
 static Range Invalid() {
     return Range(std::numeric_limits<size_t>::max(),
                  std::numeric_limits<size_t>::min());
 }
Beispiel #9
0
 //! return shifted Range
 Range operator + (const size_t& shift) const {
     return Range(begin + shift, end + shift);
 }
//////////main関数/////////////
int main(int argc, char** argv){
	
	cubeSize_ = cubeSize/2.;

	int i=0, j=0, k=0;
	clock_t start_time_total,end_time_total;
	clock_t start_time[fileTotal];
	clock_t end_time[fileTotal];

	//char * filename[fileTotal];
	//並列用
	char * model_filename[fileTotal];
	char * data_filename[fileTotal];

	//Mat shape[fileTotal];
	Mat shape_reg[fileTotal];
	//Mat shape_temp[fileTotal];
	Mat shape_fixed[fileTotal];

	//並列用
	Mat model_shape[fileTotal];
	Mat data_shape[fileTotal];
	Mat shape_temp[fileTotal][fileTotal];

	Mat my_model_corr[fileTotal];
	int myIndex[fileTotal][rows];
	float myDist[fileTotal][rows];
	RT<float> my_rt[fileTotal];

	Mat_<float> model_mean;

	//modelファイルのデータ数
	//model_rows = 16128;
	//dataファイルのデータ数
	//data_rows = 16128;
	
	start_time_total = clock();
	cout << "-------------" << endl;
	cout << "ICP Algorithm" << endl;
	cout << "-------------" << endl;

#pragma omp parallel for
	for(fileCount=0;fileCount<fileTotal;fileCount++)
	{

#pragma region // --- 点群のCSVファイルをcv::Matに取り込む ---
		if(fileCount>=1){
			///model
			//csvファイル名
			model_filename[fileCount] = (char *)malloc(sizeof(char *) * 100);
			//sprintf(model_filename[fileCount],"%s/%s/%d.csv",filedir,dir,fileCount);
			sprintf(model_filename[fileCount],"%s/%s/points%02d.csv",filedir,dir,fileCount);
			//csvファイルのデータ数
			model_rows[fileCount] = rows;
			//CSVファイル読み込み
			model_shape[fileCount] = csvread(model_filename[fileCount], model_rows[fileCount], cols);
			//コンソールにファイル名表示
			//cout << "model点群データファイル名 " << model_filename[fileCount] << endl;
		}
		///data
		//csvファイル名
		data_filename[fileCount] = (char *)malloc(sizeof(char *) * 100);
		//sprintf(data_filename[fileCount],"%s/%s/%d.csv",filedir,dir,(fileCount+1));
		sprintf(data_filename[fileCount],"%s/%s/points%02d.csv",filedir,dir,(fileCount+1));
		//csvファイルのデータ数
		data_rows[fileCount] = rows;
		//CSVファイル読み込み
		data_shape[fileCount] = csvread(data_filename[fileCount], data_rows[fileCount], cols);
		//コンソールにファイル名表示
		cout << "点群データファイル名 " << data_filename[fileCount] << endl;
#pragma endregion 

		if(fileCount>=1){

#pragma region // --- ICPによるレジストレーション ---
#if 1 // --- ICP実行する ---
			//実行時間計測開始
			start_time[fileCount] = clock();
			cout << "\t標準ICP開始" << endl;
			//ICP with flann search and unit quaternion method
			//cout << "kd-tree探索+クォータニオンにより[R/t]を推定します" << endl << endl;
			ClosestPointFlann model_shape_flann (model_shape[fileCount]);
			RT_L2 <float, SolveRot_eigen<float>> rt_solver;
			ICP <ClosestPointFlann> icp (model_shape_flann, rt_solver);

			icp.set(data_shape[fileCount]);
			icp.reg(100, 1.0e-6);

			//実行時間計測終了
			end_time[fileCount] = clock();
			//cout << "icp result : [R/t] =" << endl << (icp.rt) << endl << endl;
			cout << "\t" << data_filename[fileCount] << "  icp error =" << icp.dk << endl;
			cout << "\t" << data_filename[fileCount] << "  実行時間 = " << (float)(end_time[fileCount] - start_time[fileCount])/CLOCKS_PER_SEC << "秒" << endl << endl;
			
			//データをローカル変数に格納
			//my_model_corr[fileCount] = Mat::zeros(rows, cols, CV_32F);
			my_model_corr[fileCount].create(rows, cols, CV_32F);
			icp.model_corr.copyTo(my_model_corr[fileCount]);
			icp.rt.copyTo(my_rt[fileCount]);
			for(int k=0;k<data_rows[fileCount];k++){
				myIndex[fileCount][k] = icp.index[k];
				myDist[fileCount][k] = icp.distance[k];
			}

#else // --- ICP実行しない場合 ---
			shape_reg[fileCount] = data_shape[fileCount];
#endif
#pragma endregion
		}else{
			shape_reg[fileCount] = data_shape[fileCount];
		}
	}

#pragma region // --- 座標変換 ---
	//平均値の計算
	reduce(shape_reg[0], model_mean, 0, CV_REDUCE_AVG);

#pragma omp parallel for private(i,j,k)
	for(fileCount=0;fileCount<fileTotal;fileCount++)
	{
		if(fileCount>=1){
			//得られたrtをdatashapeに適用
			//その前にshape_tempの初期化
			for(k=0;k<fileTotal;k++)
			{
				shape_temp[fileCount][k] = cv::Mat::zeros(data_rows[fileCount], cols, CV_32F);
			}
			shape_temp[fileCount][fileCount] = data_shape[fileCount];
			for(k=0;k<fileCount;k++)
			{
				shape_temp[fileCount][fileCount-(k+1)] = my_rt[(fileCount-k)].transform(shape_temp[fileCount][fileCount-k]);
			}
			shape_reg[fileCount] = shape_temp[fileCount][0];
		}

		shape_fixed[fileCount] = shape_reg[fileCount] - repeat(model_mean, shape_reg[fileCount].rows, 1);
		
		/*
		//メモリ割り当て
		points[fileCount] = (GLfloat *)malloc(sizeof(float)*data_rows[fileCount]*cols);
		//座標値をGLpointsに入れる
		for(i=0;i<data_rows[fileCount];i++){
			for(j=0;j<cols;j++){
				points[fileCount][i*cols+j] = shape_fixed[fileCount].at<float>(i,j);
			}
		}*/
#pragma endregion
	}

#pragma region // --- OpenGLにデータ渡す ---
	
	//メモリ割り当て
	allpoints = (GLfloat *)malloc(sizeof(float)*rows*fileTotal*cols);
	for(fileCount=0;fileCount<fileTotal;fileCount++)
	{
		//座標値をallpointsに入れる
		for(int i=0;i<rows;i++){
			for(int j=0;j<cols;j++){
				allpoints[fileCount*rows*cols+i*cols+j] = shape_fixed[fileCount].at<float>(i,j);
			}
		}
	}
#pragma endregion

	
#pragma region // --- カメラRTの計算 ---
	Mat cameraRT[fileTotal];
	Mat cameraR[fileTotal];
	Mat cameraT[fileTotal];
	cameraRT[0] = Mat::eye(4,4,CV_32F);
	cameraR[0] = Mat::eye(3,3,CV_32F);
	cameraT[0] = Mat::zeros(1,3,CV_32F);
	for(i=1;i<fileTotal;i++){
		cameraRT[i] = Mat::eye(4,4,CV_32F);
		cameraR[i] = Mat::eye(3,3,CV_32F);
		cameraT[i] = Mat::zeros(1,3,CV_32F);
		
		Mat r = my_rt[i].operator()(Range(0,3),Range(0,3));
		cameraR[i] = cameraR[i-1]*r.t();
		Mat t = my_rt[i].operator()(Range(3,4),Range(0,3));
		cameraT[i] = t*cameraR[i-1].t() + cameraT[i-1];
		
		cameraRT[i].at<float>(0,0) = cameraR[i].at<float>(0,0);
		cameraRT[i].at<float>(0,1) = cameraR[i].at<float>(0,1);
		cameraRT[i].at<float>(0,2) = cameraR[i].at<float>(0,2);
		cameraRT[i].at<float>(1,0) = cameraR[i].at<float>(1,0);
		cameraRT[i].at<float>(1,1) = cameraR[i].at<float>(1,1);
		cameraRT[i].at<float>(1,2) = cameraR[i].at<float>(1,2);
		cameraRT[i].at<float>(2,0) = cameraR[i].at<float>(2,0);
		cameraRT[i].at<float>(2,1) = cameraR[i].at<float>(2,1);
		cameraRT[i].at<float>(2,2) = cameraR[i].at<float>(2,2);
		
		cameraRT[i].at<float>(3,0) = cameraT[i].at<float>(0,0);
		cameraRT[i].at<float>(3,1) = cameraT[i].at<float>(0,1);
		cameraRT[i].at<float>(3,2) = cameraT[i].at<float>(0,2);
	}
#pragma endregion

// --- データ出力 ---
#if FILEOUTPUT

	///////////////////////////////
	// 全ての点群(shape_fixed)をまとめて書き出し
	// pcd
	//
	FILE *outfp;
	char outfilename[100];
	sprintf(outfilename,"%s/%s/result_xyz.pcd",outdir,dir);
	outfp = fopen(outfilename,"w");
	if(outfp == NULL){
		printf("%sファイルが開けません\n",outfilename);
		return -1;
	}
	int red = 255*256*256;
	int green = 255*256*256 + 255*256;
	int white = 255*256*256 + 255*256 + 255;
	fprintf(outfp,"# .PCD v.7 - Point Cloud Data file format\nVERSION .7\nFIELDS x y z rgb\nSIZE 4 4 4 4\nTYPE F F F F\nCOUNT 1 1 1 1\nWIDTH %d\nHEIGHT 1\nVIEWPOINT 0 0 0 1 0 0 0\nPOINTS %d\nDATA ascii\n", rows*fileTotal, rows*fileTotal);
	for(i=0;i<fileTotal;i++){
		for(j=0;j<data_rows[i];j++){
			fprintf(outfp,"%f %f %f %d\n", shape_reg[i].at<float>(j,0), shape_reg[i].at<float>(j,1), shape_reg[i].at<float>(j,2), green+(int)floor(255.*(i+1)/fileTotal));
		}
	}
	fclose(outfp);

	///////////////////////////////
	// 全ての点群(shape_fixed)をまとめて書き出し
	// csv
	//
	sprintf(outfilename,"%s/%s/allpoints.csv",outdir,dir);
	outfp = fopen(outfilename,"w");
	if(outfp == NULL){
		printf("%sファイルが開けません\n",outfilename);
		return -1;
	}
	for(i=0;i<fileTotal;i++){
		for(j=0;j<data_rows[i];j++){
			fprintf(outfp,"%f %f %f\n", shape_reg[i].at<float>(j,0), shape_reg[i].at<float>(j,1), shape_reg[i].at<float>(j,2));
		}
	}
	fclose(outfp);

	///////////////////////////////
	// 全ての点群(shape_fixed)をまとめて書き出し
	// result_xyz.csv
	//
	sprintf(outfilename,"%s/%s/result_xyz_icp.csv",outdir,dir);
	outfp = fopen(outfilename,"w");
	if(outfp == NULL){
		printf("%sファイルが開けません\n",outfilename);
		return -1;
	}
	for(i=0;i<fileTotal;i++){
		for(j=0;j<data_rows[i];j++){
			fprintf(outfp,"%f,%f,%f\n", shape_reg[i].at<float>(j,0), shape_reg[i].at<float>(j,1), shape_reg[i].at<float>(j,2));
		}
	}
	fclose(outfp);

	//////////////////////////////////
	// Corr(対応点), Index(対応点の要素番号), Distance(対応点間距離)の書き出し
	//
	FILE *outfp_corr;
	char outfilename_corr[100];

	for(fileCount=1;fileCount<fileTotal;fileCount++){

		///Indexファイル
		sprintf(outfilename_corr,"%s/%s/index%02d.csv",outdir,dir,(fileCount));
		outfp_corr = fopen(outfilename_corr,"w");
		if(outfp_corr == NULL){
			printf("%sファイルが開けません\n",outfilename_corr);
			return -1;
		}
		for(j=0;j<data_rows[fileCount];j++){
			fprintf(outfp_corr,"%d\n", myIndex[fileCount][j]);
		}
		fclose(outfp_corr);

		///Distanceファイル
		sprintf(outfilename_corr,"%s/%s/dist%02d.csv",outdir,dir,(fileCount));
		outfp_corr = fopen(outfilename_corr,"w");
		if(outfp_corr == NULL){
			printf("%sファイルが開けません\n",outfilename_corr);
			return -1;
		}
		for(j=0;j<data_rows[fileCount];j++){
			fprintf(outfp_corr,"%f\n", myDist[fileCount][j]);
		}
		fclose(outfp_corr);
	}
	
	for(fileCount=0;fileCount<fileTotal;fileCount++){
		
		if(fileCount<(fileTotal-1)){
			///Corr点群ファイル
			sprintf(outfilename_corr,"%s/%s/corr%02d.csv",outdir,dir,(fileCount+1));
			outfp_corr = fopen(outfilename_corr,"w");
			if(outfp_corr == NULL){
				printf("%sファイルが開けません\n",outfilename_corr);
				return -1;
			}

			for(j=0;j<data_rows[fileCount];j++){
				//fprintf(outfp_corr,"%f %f %f\n", my_model_corr[fileCount].at<float>(j,0), my_model_corr[fileCount].at<float>(j,1), my_model_corr[fileCount].at<float>(j,2));
				fprintf(outfp_corr,"%f %f %f\n", shape_reg[fileCount].at<float>(myIndex[fileCount+1][j],0), shape_reg[fileCount].at<float>(myIndex[fileCount+1][j],1), shape_reg[fileCount].at<float>(myIndex[fileCount+1][j],2));
			}
			fclose(outfp_corr);
		}else{
			///Corr点群ファイル
			sprintf(outfilename_corr,"%s/%s/corr%02d.csv",outdir,dir,(fileCount+1));
			outfp_corr = fopen(outfilename_corr,"w");
			if(outfp_corr == NULL){
				printf("%sファイルが開けません\n",outfilename_corr);
				return -1;
			}

			for(j=0;j<data_rows[fileCount];j++){
				//fprintf(outfp_corr,"%f %f %f\n", my_model_corr[fileCount].at<float>(j,0), my_model_corr[fileCount].at<float>(j,1), my_model_corr[fileCount].at<float>(j,2));
				fprintf(outfp_corr,"%f %f %f\n", shape_reg[fileCount].at<float>(j,0), shape_reg[fileCount].at<float>(j,1), shape_reg[fileCount].at<float>(j,2));
			}
			fclose(outfp_corr);
		}
	}

	/////////////////////
	// RTの書き出し
	//
	//my_rt[0]に恒等変換を代入
	//Mat rt0 = (Mat_<float>(4,4) << 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1);
	Mat rt0 = Mat::eye(4,4,CV_32F);
	rt0.copyTo(my_rt[0]);
	// Open File Storage
	char rtfilename[100];
	sprintf(rtfilename,"%s/%s/rt.xml",outdir,dir);
	cv::FileStorage	cvfs(rtfilename,CV_STORAGE_WRITE);
	cv::WriteStructContext ws(cvfs, "mat_rt", CV_NODE_SEQ);	// create node
	for(int i=0; i<fileTotal; i++){
		cv::write(cvfs,"",cameraRT[i]);
	}
	cvfs.release();

#endif

//--- OpenGLで表示 ---
#if GLVIEW
	
	// --- GLUT initialize ---
	initFlag();
	initParam();

	//window1
	glutInit(&argc, argv);
	glutInitWindowPosition(0, 0);
	glutInitWindowSize(window_w, window_h);
	glutInitDisplayMode( GLUT_RGBA | GLUT_DEPTH | GLUT_DOUBLE );

	window1 = glutCreateWindow("Window1");
	glutMouseFunc(mouse);
	glutMotionFunc(drag);
	glutPassiveMotionFunc(passive);
	glutMouseWheelFunc ( MouseWheel ) ;//ホイールコールバック
	glutDisplayFunc(disp);
	glutIdleFunc(myGlutIdle);
	glutKeyboardFunc(glut_keyboard);
	glutIdleFunc(animate);
	glClearColor(0.0, 0.0, 0.0, 0.5); //背景色

	glutMainLoop();
#endif
	
	//実行時間計測終了
	end_time_total = clock();
	cout << "-------------" << endl;
	cout << "    Finish   " << endl;
	cout << "-------------" << endl;
	cout << "プログラム実行時間 = " << (float)(end_time_total - start_time_total)/CLOCKS_PER_SEC << "秒" << endl << endl;
	
	//cvNamedWindow ("WaitKey", CV_WINDOW_AUTOSIZE);
	//cvWaitKey(0);
	return 0;
}
Beispiel #11
0
//! given a global range [0,global_size) and p PEs to split the range, calculate
//! the [local_begin,local_end) index range assigned to the PE i.
static inline Range CalculateLocalRange(
    size_t global_size, size_t p, size_t i) {
    return Range(0, global_size).Partition(i, p);
}
Beispiel #12
0
void X_Comp::EvalProducts()
  {
  if (NJoins()>0)
    switch (SolveMode())
      {
      case PBMODE:
        DoBreak();
        break;
      case SSMODE:
        DoBreak();
        break;
      case DYNMODE:
        {
        int JoinId=0;
        double Press=Joins[JoinId].Pressure();

        int I[MaxIOList+1];
        FillIOIndexList(JoinId, I);

        flag dbg=0;//((Debug|dbgDerivs) & DBG_Derivs);

        if (SolveMode()==PBMODE)
          Press=GetPBInputPressure(JoinId);

        StkSpConduit SdLcl("SdLcl", chLINEID(), this);
        SpConduit &Sd = SdLcl();
        Sd.QZero();
        Sd.SetPress(Press);
        double  Qot=0.0;
        int NFeeds=0;
        for (int i, ic = 0; (i=I[ic])>=0; ic++)
          if (IO_In(i))
            {
            Sd.SelectModel(IOConduit(i), NFeeds==0);
            Sd.QAddF(*IOConduit(i), som_ALL, 1.0);
            NFeeds++;
            }
          else
            Qot += IOQmEst_Out(i);

        if (dbg)
          for (ic = 0; (i=I[ic])>=0; ic++)
            if (IO_In(i))
              {
              char t[128];
              sprintf(t, "%s.%s", FullObjTag(), IODesc_Self(i)->pName);
              dbgpln("i-Xfr %-12.12s : %14.6g, %14.6g | %14.6g",t,
                     IOConduit(i)->QMass(som_SL), IOConduit(i)->QMass(som_Vap), K_2_C(IOConduit(i)->Temp()));
              }

        double denom;
        double  P1 = IOP_Rmt (IOWithId_Self(ioid_In));
        double  P2 = IOP_Self(IOWithId_Self(ioid_In));

        // hss 9/3/98 - Use data base for specific heat ratio.
        P_SpecificHeatRatio = Max(1.1, Sd.CpCv());

        if( Sd.Temp() > 0.01 )
          Tin = Sd.Temp();

        // hss Calc Polytropic Efficiency
        /*double FlowMin  = pI->QVolume() * 60.0;
        double rpm      = Max(SpeedRatio, 10000.0);
        double EffTemp  = Efficiency.Zxy(FlowMin,rpm);
        if (EffTemp >= 0.6)
          P_PolytropicEff = EffTemp;
        else
          P_PolytropicEff = 0.6;*/

        denom = P_SpecificHeatRatio * P_PolytropicEff;
        if( fabs(denom) < 1.0e-30)
          denom = 1.0e-30;
        // hss Try a fix to prevent crash when P1 is negative
        if ((P1 > 0.0) && (P2 > P1))
          Tout = Tin*pow(P2/P1,(P_SpecificHeatRatio - 1.0)/denom);
        else
          Tout = Tin;      // end of kluge

        Sd.SetTemp(Tout);

        double Qin=Sd.QMass(som_ALL);
        // What Comes in must go out
        double Scl=Range(0.0, Qin/GTZ(Qot), 1000.0);
        for (ic = 0; (i=I[ic])>=0; ic++)
          if (IO_Out(i))
            IOConduit(i)->QSetM(Sd, som_ALL, IOQmEst_Out(i)*Scl, Press);//Joins[JoinId].Pressure());//PMax);
        }
        break;
      }
  };
Beispiel #13
0
void X_Comp::EvalDiscrete()
  {
  if (Control==CTSpeed)
    {
    SpConduit &C=*IOConduit(IOWithId_Self(ioid_In));
    //double Cflow = GEZ(IOFB(IOWithId_Self(ioid_In), 0)->GetQm());
    //double VFlow = GEZ(IOFB(IOWithId_Self(ioid_In), 0)->GetQv());
    double Qv=C.QVolume();
    double Qm=C.QMass();

    MaxSpeed=Max(100.0, MaxSpeed);
    MinSpeed=Range(0.0, MinSpeed, MaxSpeed);
    double RqdS=(P_Status ? Range(MinSpeed/MaxSpeed, SpdSpt,1.0) : 0);
    SpdFbk+=(RqdS-SpdFbk)*ICGetTimeInc()/Max(1.0, SpdTau);
    ActSpeed=SpdFbk*MaxSpeed;
    SpeedRatio=SpdFbk;

    double Hd=-166.466+1.581213*ActSpeed+(-17.5093-0.15871*ActSpeed)*Qv+(0.0003296+0.014441*ActSpeed)*Qv*Qv;
//    double SrgVolFlw=-0.96597+0.010344*ActSpeed;
    double SrgSpd=Range(166.0, ActSpeed, 275.0);
    QvSrg=7E-05*SrgSpd*SrgSpd - 0.0199*SrgSpd + 2.2831;
    QvIn=C.QVolume();

    //Qs=Range(0.0, Qm*SrgVolFlw/GTZ(Qm), 150.0);

    Pin  = Max(100.0,IOP_Rmt (IOWithId_Self(ioid_In)));
//    double PCalc=(7990.392-21.47525359*SpdFbk*266.67+26.75703018*Range(0.0,Cflow,100.0));
//    PCalc=Max(1000.0, PCalc);
//    double PRatio=7990/PCalc;

    //PressIn=GTZ(PressIn);
//    if ((Qv>1.0e-6) && (C.QMass()>1.0e-6))
//      { // Only Change if Direction is Forward
    double GammaIn=1.262;//Range(1.2, C.CpCv(), 1.5);
    double MoleWtFIn=C.MoleWt();//*Rho;
    double TempIn=C.Temp();
//      }


    double R=8.314/MoleWtFIn;
    double X=GammaIn/(GammaIn-1.0);
    // PHT_Isentropic:

    // Units of Hd Expected to be kNm/kg == kJ/kg;
    // See Perry P6-17 & WMC-KNS Acid Plant Curves.
    double dEfficiency=0.9;
    double P2=Pin*Pow(GEZ(Hd*dEfficiency/GTZ(X*R*TempIn)+1.0), X);
    double Bst=GEZ(P2-Pin);

    double TheBoostDmp=1.0-(1.0/Max(ICGetTimeInc(), SpdTau));
    TheBoost=TheBoostDmp*TheBoost+(1-TheBoostDmp)*GEZ(Bst);
    //TheBoost=TheBoostDmp*TheBoost+(1-TheBoostDmp)*GEZ(Pin*PRatio-Pin);

    }
  else
    {
    double Q     = IOQm_In(IOWithId_Self(ioid_In));
    double P1    = IOP_Rmt(IOWithId_Self(ioid_In));
    double P2    = IOP_Self(IOWithId_Self(ioid_In));
    double pwr   = Pwr_Curve(Q,P1,P2);
    double goodp = P2_Curve(Q,P1,P_MaxPower);
    double Qc    = Flw_Curve(P_MaxPower,P1,P_SetpointPressure);

    if( fabs(Q - Qprv)/Max(1.0e-60,Qprv) < 0.001 && ((P2 < P1) || (Q < 0.001)) )
      {
      X_SetpointPressure = 0.0;
      }
    else if( Q > 1.0e-50 && fabs(Q - Qprv)/Max(1.0e-60,Qprv) < 0.001 && P1 < P2 )
      {
      if( Q > Qc )
        {
        double newval = (P_SetpointPressure - goodp);
        double test   = (X_SetpointPressure*0.5) + (newval*0.5);
        if( P2-test < P1 )
          {// This will fail at unrealistically high flow
          X_SetpointPressure = (X_SetpointPressure*0.5) + 0.5*(P2-P1);
          }
        else
          {
          X_SetpointPressure = test;
          }
        //TRACE("EVAL DISCREET CONVERGED %f %f %f %f\n\n",Q,pwr,goodp,X_SetpointPressure);
        }
      else
        {
        X_SetpointPressure /= 2.0;
        }
      }
    Qprv = Q;
    }
  }
    // Test object detection network from Darknet framework.
    void testDarknetModel(const std::string& cfg, const std::string& weights,
                          const std::vector<std::vector<int> >& refClassIds,
                          const std::vector<std::vector<float> >& refConfidences,
                          const std::vector<std::vector<Rect2d> >& refBoxes,
                          double scoreDiff, double iouDiff, float confThreshold = 0.24, float nmsThreshold = 0.4)
    {
        checkBackend();

        Mat img1 = imread(_tf("dog416.png"));
        Mat img2 = imread(_tf("street.png"));
        std::vector<Mat> samples(2);
        samples[0] = img1; samples[1] = img2;

        // determine test type, whether batch or single img
        int batch_size = refClassIds.size();
        CV_Assert(batch_size == 1 || batch_size == 2);
        samples.resize(batch_size);

        Mat inp = blobFromImages(samples, 1.0/255, Size(416, 416), Scalar(), true, false);

        Net net = readNet(findDataFile("dnn/" + cfg, false),
                          findDataFile("dnn/" + weights, false));
        net.setPreferableBackend(backend);
        net.setPreferableTarget(target);
        net.setInput(inp);
        std::vector<Mat> outs;
        net.forward(outs, getOutputsNames(net));

        for (int b = 0; b < batch_size; ++b)
        {
            std::vector<int> classIds;
            std::vector<float> confidences;
            std::vector<Rect2d> boxes;
            for (int i = 0; i < outs.size(); ++i)
            {
                Mat out;
                if (batch_size > 1){
                    // get the sample slice from 3D matrix (batch, box, classes+5)
                    Range ranges[3] = {Range(b, b+1), Range::all(), Range::all()};
                    out = outs[i](ranges).reshape(1, outs[i].size[1]);
                }else{
                    out = outs[i];
                }
                for (int j = 0; j < out.rows; ++j)
                {
                    Mat scores = out.row(j).colRange(5, out.cols);
                    double confidence;
                    Point maxLoc;
                    minMaxLoc(scores, 0, &confidence, 0, &maxLoc);

                    if (confidence > confThreshold) {
                        float* detection = out.ptr<float>(j);
                        double centerX = detection[0];
                        double centerY = detection[1];
                        double width = detection[2];
                        double height = detection[3];
                        boxes.push_back(Rect2d(centerX - 0.5 * width, centerY - 0.5 * height,
                                            width, height));
                        confidences.push_back(confidence);
                        classIds.push_back(maxLoc.x);
                    }
                }
            }

            // here we need NMS of boxes
            std::vector<int> indices;
            NMSBoxes(boxes, confidences, confThreshold, nmsThreshold, indices);

            std::vector<int> nms_classIds;
            std::vector<float> nms_confidences;
            std::vector<Rect2d> nms_boxes;

            for (size_t i = 0; i < indices.size(); ++i)
            {
                int idx = indices[i];
                Rect2d box = boxes[idx];
                float conf = confidences[idx];
                int class_id = classIds[idx];
                nms_boxes.push_back(box);
                nms_confidences.push_back(conf);
                nms_classIds.push_back(class_id);
            }

            normAssertDetections(refClassIds[b], refConfidences[b], refBoxes[b], nms_classIds,
                             nms_confidences, nms_boxes, format("batch size %d, sample %d\n", batch_size, b).c_str(), confThreshold, scoreDiff, iouDiff);
        }
    }
Beispiel #15
0
void SubmatrixQueriesTest::multipleBenchmarksRowQueries(size_t n, bench_time_t *naiveTime, bench_time_t *queryTime, bench_time_t *cascadingTime, bench_time_t *simpleCascadingTime)
{
    bench_time_t clock1, clock2;
    for (size_t i = 0; i < n; i++) {        
        size_t row = rand() % (_testMatrix->rows());
        
        size_t c1,c2;
        c1 = rand() % (_testMatrix->cols());
        c2 = rand() % (_testMatrix->cols());
        
        Range c = Range(min(c1,c2),max(c1,c2));

        clock1 = now();
        _queryDS->columnTree()->cascadingMaxInRange(row, c);
        clock2 = now();
        
        *cascadingTime = add(*cascadingTime,diff(clock2, clock1));
    }
    for (size_t i = 0; i < n; i++) {

        size_t row = rand() % (_testMatrix->rows());
        
        size_t c1,c2;
        c1 = rand() % (_testMatrix->cols());
        c2 = rand() % (_testMatrix->cols());
        
        Range c = Range(min(c1,c2),max(c1,c2));
        
        clock1 = now();
        _queryDS->columnTree()->simpleCascadingMaxInRange(row, c);
        clock2 = now();
        
        *simpleCascadingTime = add(*simpleCascadingTime,diff(clock2, clock1));
    }
    for (size_t i = 0; i < n; i++) {
        size_t row = rand() % (_testMatrix->rows());
        
        size_t c1,c2;
        c1 = rand() % (_testMatrix->cols());
        c2 = rand() % (_testMatrix->cols());
        
        Range c = Range(min(c1,c2),max(c1,c2));
        
        clock1 = now();
        _queryDS->columnTree()->maxForRowInRange(row, c.min, c.max);
        clock2 = now();
        
        *queryTime = add(*queryTime,diff(clock2, clock1));
    }
    if(SubmatrixQueriesTest::benchmarkNaiveQueries){
        for (size_t i = 0; i < n; i++) {
            size_t row = rand() % (_testMatrix->rows());
            
            size_t c1,c2;
            c1 = rand() % (_testMatrix->cols());
            c2 = rand() % (_testMatrix->cols());
            
            Range c = Range(min(c1,c2),max(c1,c2));

            clock1 = now();
            SubmatrixQueriesTest::naiveMaximumInRow(_testMatrix, c, row);
            clock2 = now();
            
            *naiveTime = add(*naiveTime,diff(clock2, clock1));
        }
    }
}
Beispiel #16
0
 //! calculate a partition range [begin,end) by taking the current Range
 //! splitting it into p parts and taking the i-th one.
 Range Partition(size_t i, size_t parts) const {
     assert(i < parts);
     return Range(CalculateBeginOfPart(i, parts),
                  CalculateBeginOfPart(i + 1, parts));
 }
Beispiel #17
0
void SubmatrixQueriesTest::multipleBenchmarksColQueries(size_t n, bench_time_t *naiveTime, bench_time_t *queryTime, bench_time_t *cascadingTime, bench_time_t *simpleCascadingTime)
{
    bench_time_t clock1, clock2;
    for (size_t i = 0; i < n; i++) {
        size_t col = rand() % (_testMatrix->cols());
        
        size_t r1,r2;
        r1 = rand() % (_testMatrix->rows());
        r2 = rand() % (_testMatrix->rows());
        
        Range r = Range(min(r1,r2),max(r1,r2));
        
        clock1 = now();
        _queryDS->columnTree()->cascadingMaxInRange(col, r);
        clock2 = now();

        *cascadingTime = add(*cascadingTime,diff(clock2, clock1));
    }
    for (size_t i = 0; i < n; i++) {
        
        size_t col = rand() % (_testMatrix->cols());
        
        size_t r1,r2;
        r1 = rand() % (_testMatrix->rows());
        r2 = rand() % (_testMatrix->rows());
        
        Range r = Range(min(r1,r2),max(r1,r2));
        
        clock1 = now();
        _queryDS->columnTree()->simpleCascadingMaxInRange(col, r);
        
        clock2 = now();

        *simpleCascadingTime = add(*simpleCascadingTime,diff(clock2, clock1));
    }
    for (size_t i = 0; i < n; i++) {
        size_t col = rand() % (_testMatrix->cols());
        
        size_t r1,r2;
        r1 = rand() % (_testMatrix->rows());
        r2 = rand() % (_testMatrix->rows());
        
        Range r = Range(min(r1,r2),max(r1,r2));
        
        clock1 = now();
        _queryDS->columnTree()->maxForRowInRange(col, r.min, r.max);
        clock2 = now();

        *queryTime = add(*queryTime,diff(clock2, clock1));
    }
    if(SubmatrixQueriesTest::benchmarkNaiveQueries){
        for (size_t i = 0; i < n; i++) {
            size_t col = rand() % (_testMatrix->cols());
            
            size_t r1,r2;
            r1 = rand() % (_testMatrix->rows());
            r2 = rand() % (_testMatrix->rows());
            
            Range r = Range(min(r1,r2),max(r1,r2));
            
            clock1 = now();
            SubmatrixQueriesTest::naiveMaximumInRow(_testMatrix, r, col);
            clock2 = now();

            *naiveTime = add(*naiveTime,diff(clock2, clock1));
        }
    }
}
Beispiel #18
0
void my_init(vector<bool> keepcol){
  Partial.extend(num_var_tape_ * 1);
  arg_mark_.resize(play_.op_arg_rec_.size());
  for(size_t i=0;i<arg_mark_.size();i++)arg_mark_[i]=false;
  /* Run a reverse test-sweep to store pointers once */
  tape_point tp;
  play_.reverse_start(tp.op, tp.op_arg, tp.op_index, tp.var_index);
  tp_.resize(tp.op_index+1);
  var2op_.resize(tp.var_index+1);
  op_mark_.resize(tp.op_index+1);
  for(size_t i=0;i<op_mark_.size();i++)op_mark_[i]=0;
  user_region_mark_.resize(tp.op_index+1);
  for(size_t i=0;i<user_region_mark_.size();i++)user_region_mark_[i]=0;
  tp_[tp.op_index]=tp;
  /* 1. We need to be able to find out, for a given variable, what operator created 
     the variable. This is easiest done by looping through the _operators_ because for a 
     given op we have access to all the resulting variables it creates.
     2. We precompute the a vector of "tape_points" so that instead of calling 
     "reverse_next", we simply get the next tape entry by tp_[i-1].
  */
  while(tp.op != BeginOp ){ /* tp.op_index is decremented by one in each iteration ... */
    // printTP(tp); /* For debugging */
    play_.reverse_next(tp.op, tp.op_arg, tp.op_index, tp.var_index);
    /* Csum is special case - see remarks in player.hpp and reverse_sweep.hpp */
    if(tp.op == CSumOp)play_.reverse_csum(tp.op, tp.op_arg, tp.op_index, tp.var_index);
    for(size_t i=0;i<NumRes(tp.op);i++)var2op_[tp.var_index-i]=tp.op_index;
    tp_[tp.op_index]=tp;
    markArgs(tp);
  }
  /* Lookup table: is tape_point within a UserOp region? */
  bool user_within=false;
  user_region_.resize(tp_.size());
  for(size_t i=0;i<tp_.size();i++){
    if(tp_[i].op==UserOp){
      user_region_[i]=true;
      user_within=!user_within;	
    } else {
      user_region_[i]=user_within;
    }
  }

  /* Lookup table: is tape_point a constant (=only fixed effect dependent) ? */
  constant_tape_point_.resize(tp_.size());
  int indep_var_number=0;
  for(size_t i=0;i<tp_.size();i++){
    if(tp_[i].op==InvOp){ /* All independent variables are marked according to being
			     random or fixed effect */
      constant_tape_point_[i]=!keepcol[indep_var_number];
      indep_var_number++;
    } else { /* Mark operator as constant if _all_ arguments are constant */
      constant_tape_point_[i] = is_tape_point_constant(i);
    }

    //std::cout << constant_tape_point_[i] << " "; printTP(tp_[i]);

  }
  // std::cout << "Total:   " << constant_tape_point_.size() << "\n";
  // int sum=0; for(int i=0;i<constant_tape_point_.size();i++)sum+=constant_tape_point_[i];
  // std::cout << "Constant:" << sum << "\n";


  // Calculate pattern
  int m=Range();
  colpattern.resize(m);
  for(int i=0;i<m;i++)my_pattern(i);
  for(size_t i=0;i<op_mark_.size();i++)op_mark_[i]=0; /* remember to reset marks */
  for(size_t i=0;i<user_region_mark_.size();i++)user_region_mark_[i]=0; /* remember to reset marks */
}
Beispiel #19
0
Range getOverlapped(const Range& a, const Range& b){
    if(a.first > b.first) return getOverlapped(b, a);
    if(b.first > a.second) return NULL_RANGE;
    return Range(b.first, (a.second > b.second)? b.second : a.second);
}
Beispiel #20
0
  void TaskManager :: CreateJob (const function<void(TaskInfo&)> & afunc,
                                 int antasks)
  {
    if (num_threads == 1 || !task_manager || func)
      {
        if (startup_function) (*startup_function)();
        
        TaskInfo ti;
        ti.ntasks = antasks;
        ti.thread_nr = 0; ti.nthreads = 1;
        // ti.node_nr = 0; ti.nnodes = 1;
        for (ti.task_nr = 0; ti.task_nr < antasks; ti.task_nr++)
          afunc(ti);

        if (cleanup_function) (*cleanup_function)();        
        return;
      }

    
    trace->StartJob(jobnr, afunc.target_type());

    func = &afunc;

    ntasks.store (antasks); // , memory_order_relaxed);
    ex = nullptr;


    nodedata[0]->start_cnt.store (0, memory_order_relaxed);

    jobnr++;
    
    for (int j = 0; j < num_nodes; j++)
      nodedata[j]->participate |= 1;

    if (startup_function) (*startup_function)();
    
    int thd = 0;
    int thds = GetNumThreads();
    int mynode = num_nodes * thd/thds;

    IntRange mytasks = Range(int(ntasks)).Split (mynode, num_nodes);
    NodeData & mynode_data = *(nodedata[mynode]);

    TaskInfo ti;
    ti.nthreads = thds;
    ti.thread_nr = thd;
    // ti.nnodes = num_nodes;
    // ti.node_nr = mynode;

    try
      {
        while (1)
          {
            int mytask = mynode_data.start_cnt++;
            if (mytask >= mytasks.Size()) break;
            
            ti.task_nr = mytasks.First()+mytask;
            ti.ntasks = ntasks;

            {
              RegionTracer t(ti.thread_nr, jobnr, RegionTracer::ID_JOB, ti.task_nr);
              (*func)(ti); 
            }
          }

      }
    catch (Exception e)
      {
        {
          lock_guard<mutex> guard(copyex_mutex);
          delete ex;
          ex = new Exception (e);
          mynode_data.start_cnt = mytasks.Size();
        }
      }

    if (cleanup_function) (*cleanup_function)();
    
    for (int j = 0; j < num_nodes; j++)
      if (workers_on_node[j])
        {
          while (complete[j] != jobnr)
            _mm_pause();
        }

    func = nullptr;
    if (ex)
      throw Exception (*ex);

    trace->StopJob();
  }
Beispiel #21
0
static void computeShapeByReshapeMask(const MatShape &srcShape,
                                      const MatShape &maskShape,
                                      Range srcRange /*= Range::all()*/,
                                      MatShape& dstShape)
{
    int srcShapeSize = (int)srcShape.size();
    int maskShapeSize = (int)maskShape.size();

    if (srcRange == Range::all())
        srcRange = Range(0, srcShapeSize);
    else
    {
        int sz = srcRange.size();
        srcRange.start = clamp(srcRange.start, srcShapeSize);
        srcRange.end = srcRange.end == INT_MAX ? srcShapeSize : srcRange.start + sz;
    }

    bool explicitMask = !maskShape.empty();  // All mask values are positive.
    for (int i = 0, n = maskShape.size(); i < n && explicitMask; ++i)
    {
        explicitMask = maskShape[i] > 0;
    }
    // Working range of source shape is a range where area(src) == area(mask).
    if (explicitMask)
    {
        int maskTotal = total(maskShape);
        // Go from the end of mask until we collect required total.
        bool matched = false;
        for (int i = srcRange.end - 1; i >= srcRange.start; --i)
        {
            if (matched)
            {
                if (i == 0 || total(srcShape, i, srcRange.end) != maskTotal)
                {
                    srcRange.start = i + 1;
                    break;
                }
            }
            else
            {
                matched = total(srcShape, i, srcRange.end) == maskTotal;
            }
        }
        CV_Assert(total(srcShape, srcRange.start, srcRange.end) == maskTotal);
    }

    CV_Assert(0 <= srcRange.start && srcRange.start <= srcRange.end && srcRange.end <= srcShapeSize);
    int dstShapeSize = srcShapeSize - srcRange.size() + maskShapeSize;
    dstShape.resize(dstShapeSize);

    std::copy(srcShape.begin(), srcShape.begin() + srcRange.start, dstShape.begin());
    std::copy(srcShape.begin() + srcRange.end, srcShape.begin() + srcShapeSize, dstShape.begin() + srcRange.start + maskShapeSize);

    int inferDim = -1;
    for (int i = 0; i < maskShapeSize; i++)
    {
        if (maskShape[i] > 0)
        {
            dstShape[srcRange.start + i] = maskShape[i];
        }
        else if (maskShape[i] == 0)
        {
            if (srcRange.start + i >= srcShapeSize)
                CV_Error(Error::StsBadArg, format("Copy dim[%d] (which has zero size) is out of the source shape bounds", srcRange.start + i));
            dstShape[srcRange.start + i] = srcShape[srcRange.start + i];
        }
        else if (maskShape[i] == -1)
        {
            if (inferDim != -1)
                CV_Error(Error::StsAssert, "Duplicate of inferred dim (which is denoted by -1)");
            inferDim = srcRange.start + i;
            dstShape[inferDim] = 1;
        }
        else
            CV_Error(Error::StsBadArg, "maskShape[i] >= -1");
    }

    size_t srcTotal = total(srcShape);
    size_t dstTotal = total(dstShape);

    if (inferDim != -1)
    {
        if (srcTotal % dstTotal != 0)
            CV_Error(Error::StsBackTrace, "Can't infer a dim denoted by -1");

        dstShape[inferDim] = (int)(srcTotal / dstTotal);
    }
    else
    {
        CV_Assert(srcTotal == dstTotal);
    }
}
Beispiel #22
0
  void TaskManager :: Loop(int thd)
  {
    /*
    static Timer tADD("add entry counter");
    static Timer tCASready1("spin-CAS ready tick1");
    static Timer tCASready2("spin-CAS ready tick2");
    static Timer tCASyield("spin-CAS yield");
    static Timer tCAS1("spin-CAS wait");
    static Timer texit("exit zone");
    static Timer tdec("decrement");
    */
    thread_id = thd;

    int thds = GetNumThreads();

    int mynode = num_nodes * thd/thds;

    NodeData & mynode_data = *(nodedata[mynode]);



    TaskInfo ti;
    ti.nthreads = thds;
    ti.thread_nr = thd;
    // ti.nnodes = num_nodes;
    // ti.node_nr = mynode;

      
#ifdef USE_NUMA
    numa_run_on_node (mynode);
#endif
    active_workers++;
    workers_on_node[mynode]++;
    int jobdone = 0;


#ifdef USE_MKL
    auto mkl_max = mkl_get_max_threads();
    mkl_set_num_threads_local(1);
#endif

    
    while (!done)
      {
        if (complete[mynode] > jobdone)
          jobdone = complete[mynode];

        if (jobnr == jobdone)
          {
            // RegionTracer t(ti.thread_nr, tCASyield, ti.task_nr);            
            if(sleep)
              this_thread::sleep_for(chrono::microseconds(sleep_usecs));
            else
              {
#ifdef WIN32
                this_thread::yield();
#else  // WIN32
                sched_yield();
#endif // WIN32
              }
            continue;
          }

        {
          // RegionTracer t(ti.thread_nr, tADD, ti.task_nr);

          // non-atomic fast check ...
          if ( (mynode_data.participate & 1) == 0) continue;

          int oldval = mynode_data.participate += 2;
          if ( (oldval & 1) == 0)
            { // job not active, going out again
              mynode_data.participate -= 2;
              continue;
            }
        }

        if (startup_function) (*startup_function)();
        
        IntRange mytasks = Range(int(ntasks)).Split (mynode, num_nodes);
          
        try
          {
            
            while (1)
              {
                if (mynode_data.start_cnt >= mytasks.Size()) break;
		int mytask = mynode_data.start_cnt.fetch_add(1, memory_order_relaxed);
                if (mytask >= mytasks.Size()) break;
                
                ti.task_nr = mytasks.First()+mytask;
                ti.ntasks = ntasks;
                
                {
                  RegionTracer t(ti.thread_nr, jobnr, RegionTracer::ID_JOB, ti.task_nr);
                  (*func)(ti);
                }
              }

          }
        catch (Exception e)
          {
            {
              // cout << "got exception in TM" << endl; 
              lock_guard<mutex> guard(copyex_mutex);
              delete ex;
              ex = new Exception (e);
              mynode_data.start_cnt = mytasks.Size();
            }
          }

#ifndef __MIC__
        atomic_thread_fence (memory_order_release);     
#endif // __MIC__

        if (cleanup_function) (*cleanup_function)();

        jobdone = jobnr;

        mynode_data.participate-=2;

	{
	  int oldpart = 1;
	  if (mynode_data.participate.compare_exchange_strong (oldpart, 0))
	    {
              if (jobdone < jobnr.load())
                { // reopen gate
                  mynode_data.participate |= 1;                  
                }
              else
                {
                  if (mynode != 0)
                    mynode_data.start_cnt = 0;
                  complete[mynode] = jobnr.load(); 
                }
	    }	      
	}
      }
    

#ifdef USE_MKL
    mkl_set_num_threads_local(mkl_max);
#endif

    workers_on_node[mynode]--;
    active_workers--;
  }
Beispiel #23
0
 inline oclMat oclMat::row(int y) const
 {
     return oclMat(*this, Range(y, y + 1), Range::all());
 }
Beispiel #24
0
// Preprocess() - build a segment tree for O(log n) queries
void regProp2::compSeeds(void)
{
	Datareg2& reg2 = (Datareg2&)data;
	int i, j;
	int xdim, ydim;
	float val[4];
	Range* _prop_x, *prop_x;
	Range prop_y;
	Range propagated;
	Range c_prop;
	Range responsibility, c_respons;
	Range delay;
	Range y_comp;
	float min_x, min_y, max_x, max_y;
	float min_in, max_in, min4, max4;
	int nseed;
	xdim = reg2.dim[0];
	ydim = reg2.dim[1];
	_prop_x = new Range[ydim];
	// proceed through the slices computing seeds
	nseed=0;
	// process the k'th slab
	for(i=0; i<xdim-1; i++)
		for(j=0; j<ydim-1; j++)
		{
			prop_x = &_prop_x[j];
			// load the voxel data
			reg2.getCellValues(i, j, val);
			min_x = MIN2(val[0], val[3]);
			max_x = MAX2(val[0], val[3]);
			min_y = MIN2(val[0], val[1]);
			max_y = MAX2(val[0], val[1]);
			// set the incoming values if on a border
			if(i==0)
			{
				prop_x->Set(min_x, max_x);
			}
			if(j==0)
			{
				prop_y.Set(min_y, max_y);
			}
			// merge incoming information
			y_comp = prop_y.Complement(min_y, max_y);
			propagated = prop_y + ((*prop_x)-y_comp);
			// compute complement of incoming ranges
			min_in = MIN2(min_x, min_y);
			max_in = MAX2(max_x, max_y);
			c_prop.Set(min_in,max_in);
			c_prop -= propagated;
			// compute responsibility ranges
			min4 = MIN2(min_in, val[2]);
			max4 = MAX2(max_in, val[2]);
			responsibility.Set(min4, max4);
			responsibility-=c_prop;
			c_respons = responsibility.Complement(min4, max4);
			// determine range which can be delayed
			delay.MakeEmpty();
			if(i < xdim-2)
				delay+=Range(MIN2(val[1], val[2]),
							 MAX2(val[1], val[2]));
			if(j < ydim-2)
				delay+=Range(MIN2(val[2], val[3]),
							 MAX2(val[2], val[3]));
			// test for propagation of entire responsibility range
			if(responsibility.Empty() || (!delay.Empty() &&
										  delay.MinAll() <= responsibility.MinAll() &&
										  delay.MaxAll() >= responsibility.MaxAll()))
			{
				// propagate first to the next x-slice
				if(i == xdim-2)
				{
					prop_x->MakeEmpty();
				}
				else
				{
					prop_x->Set(MIN2(val[1], val[2]), MAX2(val[1], val[2]));
					*prop_x-=c_respons;
				}
				c_respons += *prop_x;
				// all remaining propagated in y-dir
				if(j == ydim-2)
				{
					prop_y.MakeEmpty();
				}
				else
				{
					prop_y.Set(MIN2(val[2], val[3]), MAX2(val[2], val[3]));
					prop_y-= c_respons;
				}
			}
			else
			{
				// can't propagate all responsiblity, cell must be a seed
				seeds.AddSeed(reg2.index2cell(i,j), responsibility.MinAll(),
							  responsibility.MaxAll());
				nseed++;
				prop_y.MakeEmpty();
				prop_x->MakeEmpty();
			}
		}
	if(verbose)
	{
		printf("computed %d seeds\n", nseed);
	}
}
Beispiel #25
0
 inline oclMat oclMat::rowRange(int startrow, int endrow) const
 {
     return oclMat(*this, Range(startrow, endrow), Range::all());
 }
Beispiel #26
0
int main(int argc, char **argv)
{
  Range all = Range::all();
  Range none = Range();
  tassert(none == Range::none());
  tassert(!(all == none));

  tassert(all.min == -std::numeric_limits<double>::max());
  tassert(all.max == std::numeric_limits<double>::max());
  
  tassert(all.includes(-std::numeric_limits<double>::max()));
  tassert(all.includes(-1));
  tassert(all.includes(0));
  tassert(all.includes(1));
  tassert(all.includes(std::numeric_limits<double>::max()));
  
  tassert(none.min == std::numeric_limits<double>::max());
  tassert(none.max == -std::numeric_limits<double>::max());

  tassert(!none.includes(-std::numeric_limits<double>::max()));
  tassert(!none.includes(-1));
  tassert(!none.includes(0));
  tassert(!none.includes(1));
  tassert(!none.includes(std::numeric_limits<double>::max()));

  Range a = all;
  a.add(-100);
  tassert(a == Range::all());
  
  Range b = none;
  b.add(-100);
  tassert(!(b == none));

  tassert(b.min == -100);
  tassert(b.max == -100);
  tassert(!b.includes(-std::numeric_limits<double>::max()));
  tassert(!b.includes(-101));
  tassert( b.includes(-100));
  tassert(!b.includes(-99));
  tassert(!b.includes(-1));
  tassert(!b.includes(0));
  tassert(!b.includes(1));
  tassert(!b.includes(std::numeric_limits<double>::max()));
  
  Range c = b;
  c.add(1000);
  tassert(!(b == c));
  
  tassert(c.min == -100);
  tassert(c.max == 1000);
  tassert(!c.includes(-std::numeric_limits<double>::max()));
  tassert(!c.includes(-101));
  tassert( c.includes(-100));
  tassert( c.includes(-99));
  tassert( c.includes(-1));
  tassert( c.includes(0));
  tassert( c.includes(1));
  tassert( c.includes(999));
  tassert( c.includes(1000));
  tassert(!c.includes(1001));
  tassert(!c.includes(std::numeric_limits<double>::max()));
  
  c.add(none);
  tassert(c == Range(-100, 1000));
  
  c.add(all);
  tassert(!(c == Range(-100, 1000)));
  tassert(c == Range::all());

  tassert(!Range(-100,100).empty());
  tassert(Range(100,-100).empty());

  tassert(!Range(100,200).intersects(Range(0,99.999)));
  tassert(Range(100,200).intersects(Range(0,100)));
  tassert(Range(100,200).intersects(Range(50,150)));
  tassert(Range(100,200).intersects(Range(100,200)));
  tassert(Range(100,200).intersects(Range(150,250)));
  tassert(Range(100,200).intersects(Range(200,300)));
  tassert(!Range(100,200).intersects(Range(200.01,300.01)));

  tassert(Range(100,200).intersects(Range(150, 150)));
  tassert(Range(100,200).intersects(Range(150, 151)));
  tassert(!Range(100,200).intersects(Range(151, 150)));

  tassert(!Range::none().intersects(Range::all()));
  tassert(!Range::all().intersects(Range::none()));

  return 0;
}
Beispiel #27
0
void fit2015(
             TString FileName ="/afs/cern.ch/user/a/anstahll/work/public/ExpressStream2015/ppData/OniaTree_262163_262328.root", 
             int  oniamode  = 2,        // oniamode-> 3: Z,  2: Upsilon and 1: J/Psi
             bool isData    = true,     // isData = false for MC, true for Data
             bool isPbPb    = false,    // isPbPb = false for pp, true for PbPb
	     bool doFit = false ,
             bool inExcStat = true      // if inExcStat is true, then the excited states are fitted
             ) {

  InputOpt opt;
  SetOptions(&opt, isData, isPbPb, oniamode,inExcStat);

  if (isPbPb) {
    FileName = "/afs/cern.ch/user/a/anstahll/work/public/ExpressStream2015/PbPbData/OniaTree_262548_262893.root";
  } else {
    FileName = "/afs/cern.ch/user/a/anstahll/work/public/ExpressStream2015/ppData/OniaTree_262163_262328.root";
  }
    
  int nbins = 1; //ceil((opt.dMuon->M->Max - opt.dMuon->M->Min)/binw);
  if (oniamode==1){
    nbins = 140;
  } else if (oniamode==2) {
    nbins = 70; 
  } else if (oniamode==3) {
    nbins = 40;
  } 
 
  RooWorkspace myws;
  TH1F* hDataOS =  new TH1F("hDataOS","hDataOS", nbins, opt.dMuon.M.Min, opt.dMuon.M.Max);
  makeWorkspace2015(myws, FileName, opt, hDataOS);

  RooRealVar* mass      = (RooRealVar*) myws.var("invariantMass"); 
  RooDataSet* dataOS_fit = (RooDataSet*) myws.data("dataOS");
  RooDataSet* dataSS_fit = (RooDataSet*) myws.data("dataSS");
  RooAbsPdf*  pdf = NULL;

  if (oniamode==3) { doFit=false; }
  if (doFit) {
    int sigModel=0, bkgModel=0;  
    if (isData) {
      if (oniamode==1){
        sigModel = inExcStat ? 2 : 3;
        bkgModel = 1;
      } else {
        sigModel = inExcStat ? 1 : 3; // gaussian   
        bkgModel = 2;
      }      
    } else {
      if (oniamode==1){
        sigModel = inExcStat ? 2 : 3; // gaussian   
        bkgModel = 2;
      } else {
        sigModel = inExcStat ? 2 : 3; // gaussian   
        bkgModel = 3;
      }
    }

    if (opt.oniaMode==1) buildModelJpsi2015(myws, sigModel, bkgModel,inExcStat);
    else if (opt.oniaMode==2) buildModelUpsi2015(myws, sigModel, bkgModel,inExcStat);

    pdf       =(RooAbsPdf*)  myws.pdf("pdf");
    RooFitResult* fitObject = pdf->fitTo(*dataOS_fit,Save(),Hesse(kTRUE),Extended(kTRUE)); // Fit
  }

  RooPlot* frame = mass->frame(Bins(nbins),Range(opt.dMuon.M.Min, opt.dMuon.M.Max));  
  RooPlot* frame2 = NULL;
  dataSS_fit->plotOn(frame, Name("dataSS_FIT"), MarkerColor(kRed), LineColor(kRed), MarkerSize(1.2)); 
  dataOS_fit->plotOn(frame, Name("dataOS_FIT"), MarkerColor(kBlue), LineColor(kBlue), MarkerSize(1.2));
  

  if (doFit) {
     pdf->plotOn(frame,Name("thePdf"),Normalization(dataOS_fit->sumEntries(),RooAbsReal::NumEvent));
     RooHist *hpull = frame -> pullHist(0,0,true);
     hpull -> SetName("hpull");
     frame2 = mass->frame(Title("Pull Distribution"),Bins(nbins),Range(opt.dMuon.M.Min,opt.dMuon.M.Max));
     frame2 -> addPlotable(hpull,"PX");  
     } 
  drawPlot(frame,frame2, pdf, opt, doFit,inExcStat);

  TString OutputFileName = "";
  if (isPbPb) {
    FileName = "/afs/cern.ch/user/a/anstahll/work/public/ExpressStream2015/PbPbData/OniaTree_262548_262893.root";
    opt.RunNb.Start=262548;
    opt.RunNb.End=262893;
    if (oniamode==1) {OutputFileName = (TString)("JPSIPbPbDataset.root");}
    if (oniamode==2) {OutputFileName = (TString)("YPbPbDataset.root");}
    if (oniamode==3) {OutputFileName = (TString)("ZPbPbDataset.root");}
  } else {
    FileName = "/afs/cern.ch/user/a/anstahll/work/public/ExpressStream2015/ppData/OniaTree_262163_262328.root";
    opt.RunNb.Start=262163;
    opt.RunNb.End=262328;
    if (oniamode==1) {OutputFileName = (TString)("JPSIppDataset.root");}
    if (oniamode==2) {OutputFileName = (TString)("YppDataset.root");}
    if (oniamode==3) {OutputFileName = (TString)("ZppDataset.root");}
  }
  
  TFile* oFile =  new TFile(OutputFileName,"RECREATE");
  oFile->cd();
  hDataOS->Write("hDataOS");
  dataOS_fit->Write("dataOS_FIT");
  oFile->Write();
  oFile->Close();

}
Beispiel #28
0
    int recoverPose( InputArray E, InputArray _points1, InputArray _points2, InputArray _cameraMatrix,
                         OutputArray _R, OutputArray _t, InputOutputArray _mask)
    {

        Mat points1, points2, cameraMatrix;
        _points1.getMat().convertTo(points1, CV_64F);
        _points2.getMat().convertTo(points2, CV_64F);
        _cameraMatrix.getMat().convertTo(cameraMatrix, CV_64F);

        int npoints = points1.checkVector(2);
        CV_Assert( npoints >= 0 && points2.checkVector(2) == npoints &&
                                  points1.type() == points2.type());

        CV_Assert(cameraMatrix.rows == 3 && cameraMatrix.cols == 3 && cameraMatrix.channels() == 1);

        if (points1.channels() > 1)
        {
            points1 = points1.reshape(1, npoints);
            points2 = points2.reshape(1, npoints);
        }

        double fx = cameraMatrix.at<double>(0,0);
        double fy = cameraMatrix.at<double>(1,1);
        double cx = cameraMatrix.at<double>(0,2);
        double cy = cameraMatrix.at<double>(1,2);

        points1.col(0) = (points1.col(0) - cx) / fx;
        points2.col(0) = (points2.col(0) - cx) / fx;
        points1.col(1) = (points1.col(1) - cy) / fy;
        points2.col(1) = (points2.col(1) - cy) / fy;

        points1 = points1.t();
        points2 = points2.t();

        Mat R1, R2, t;
        decomposeEssentialMat(E, R1, R2, t);
        Mat P0 = Mat::eye(3, 4, R1.type());
        Mat P1(3, 4, R1.type()), P2(3, 4, R1.type()), P3(3, 4, R1.type()), P4(3, 4, R1.type());
        P1(Range::all(), Range(0, 3)) = R1 * 1.0; P1.col(3) = t * 1.0;
        P2(Range::all(), Range(0, 3)) = R2 * 1.0; P2.col(3) = t * 1.0;
        P3(Range::all(), Range(0, 3)) = R1 * 1.0; P3.col(3) = -t * 1.0;
        P4(Range::all(), Range(0, 3)) = R2 * 1.0; P4.col(3) = -t * 1.0;

        // Do the cheirality check.
        // Notice here a threshold dist is used to filter
        // out far away points (i.e. infinite points) since
        // there depth may vary between postive and negtive.
        double dist = 50.0;
        Mat Q;
        triangulatePoints(P0, P1, points1, points2, Q);
        Mat mask1 = Q.row(2).mul(Q.row(3)) > 0;
        Q.row(0) /= Q.row(3);
        Q.row(1) /= Q.row(3);
        Q.row(2) /= Q.row(3);
        Q.row(3) /= Q.row(3);
        mask1 = (Q.row(2) < dist) & mask1;
        Q = P1 * Q;
        mask1 = (Q.row(2) > 0) & mask1;
        mask1 = (Q.row(2) < dist) & mask1;

        triangulatePoints(P0, P2, points1, points2, Q);
        Mat mask2 = Q.row(2).mul(Q.row(3)) > 0;
        Q.row(0) /= Q.row(3);
        Q.row(1) /= Q.row(3);
        Q.row(2) /= Q.row(3);
        Q.row(3) /= Q.row(3);
        mask2 = (Q.row(2) < dist) & mask2;
        Q = P2 * Q;
        mask2 = (Q.row(2) > 0) & mask2;
        mask2 = (Q.row(2) < dist) & mask2;

        triangulatePoints(P0, P3, points1, points2, Q);
        Mat mask3 = Q.row(2).mul(Q.row(3)) > 0;
        Q.row(0) /= Q.row(3);
        Q.row(1) /= Q.row(3);
        Q.row(2) /= Q.row(3);
        Q.row(3) /= Q.row(3);
        mask3 = (Q.row(2) < dist) & mask3;
        Q = P3 * Q;
        mask3 = (Q.row(2) > 0) & mask3;
        mask3 = (Q.row(2) < dist) & mask3;

        triangulatePoints(P0, P4, points1, points2, Q);
        Mat mask4 = Q.row(2).mul(Q.row(3)) > 0;
        Q.row(0) /= Q.row(3);
        Q.row(1) /= Q.row(3);
        Q.row(2) /= Q.row(3);
        Q.row(3) /= Q.row(3);
        mask4 = (Q.row(2) < dist) & mask4;
        Q = P4 * Q;
        mask4 = (Q.row(2) > 0) & mask4;
        mask4 = (Q.row(2) < dist) & mask4;

        mask1 = mask1.t();
        mask2 = mask2.t();
        mask3 = mask3.t();
        mask4 = mask4.t();

        // If _mask is given, then use it to filter outliers.
        if (!_mask.empty())
        {
            Mat mask = _mask.getMat();
            CV_Assert(mask.size() == mask1.size());
            bitwise_and(mask, mask1, mask1);
            bitwise_and(mask, mask2, mask2);
            bitwise_and(mask, mask3, mask3);
            bitwise_and(mask, mask4, mask4);
        }
        if (_mask.empty() && _mask.needed())
        {
            _mask.create(mask1.size(), CV_8U);
        }

        CV_Assert(_R.needed() && _t.needed());
        _R.create(3, 3, R1.type());
        _t.create(3, 1, t.type());

        int good1 = countNonZero(mask1);
        int good2 = countNonZero(mask2);
        int good3 = countNonZero(mask3);
        int good4 = countNonZero(mask4);

        if (good1 >= good2 && good1 >= good3 && good1 >= good4)
        {
            R1.copyTo(_R);
            t.copyTo(_t);
            if (_mask.needed()) mask1.copyTo(_mask);
            return good1;
        }
        else if (good2 >= good1 && good2 >= good3 && good2 >= good4)
        {
            R2.copyTo(_R);
            t.copyTo(_t);
            if (_mask.needed()) mask2.copyTo(_mask);
            return good2;
        }
        else if (good3 >= good1 && good3 >= good2 && good3 >= good4)
        {
            t = -t;
            R1.copyTo(_R);
            t.copyTo(_t);
            if (_mask.needed()) mask3.copyTo(_mask);
            return good3;
        }
        else
        {
            t = -t;
            R2.copyTo(_R);
            t.copyTo(_t);
            if (_mask.needed()) mask4.copyTo(_mask);
            return good4;
        }
    }
            void compute(InputArray leftarr, InputArray rightarr, OutputArray disparr)
            {
                int dtype = disparr.fixedType() ? disparr.type() : params.dispType;
                Size leftsize = leftarr.size();

                if (leftarr.size() != rightarr.size())
                    CV_Error(Error::StsUnmatchedSizes, "All the images must have the same size");

                if (leftarr.type() != CV_8UC1 || rightarr.type() != CV_8UC1)
                    CV_Error(Error::StsUnsupportedFormat, "Both input images must have CV_8UC1");

                if (dtype != CV_16SC1 && dtype != CV_32FC1)
                    CV_Error(Error::StsUnsupportedFormat, "Disparity image must have CV_16SC1 or CV_32FC1 format");

                if (params.preFilterType != PREFILTER_NORMALIZED_RESPONSE &&
                    params.preFilterType != PREFILTER_XSOBEL)
                    CV_Error(Error::StsOutOfRange, "preFilterType must be = CV_STEREO_BM_NORMALIZED_RESPONSE");

                if (params.preFilterSize < 5 || params.preFilterSize > 255 || params.preFilterSize % 2 == 0)
                    CV_Error(Error::StsOutOfRange, "preFilterSize must be odd and be within 5..255");

                if (params.preFilterCap < 1 || params.preFilterCap > 63)
                    CV_Error(Error::StsOutOfRange, "preFilterCap must be within 1..63");

                if (params.kernelSize < 5 || params.kernelSize > 255 || params.kernelSize % 2 == 0 ||
                    params.kernelSize >= std::min(leftsize.width, leftsize.height))
                    CV_Error(Error::StsOutOfRange, "kernelSize must be odd, be within 5..255 and be not larger than image width or height");

                if (params.numDisparities <= 0 || params.numDisparities % 16 != 0)
                    CV_Error(Error::StsOutOfRange, "numDisparities must be positive and divisble by 16");

                if (params.textureThreshold < 0)
                    CV_Error(Error::StsOutOfRange, "texture threshold must be non-negative");

                if (params.uniquenessRatio < 0)
                    CV_Error(Error::StsOutOfRange, "uniqueness ratio must be non-negative");

                int FILTERED = (params.minDisparity - 1) << DISPARITY_SHIFT;

                Mat left0 = leftarr.getMat(), right0 = rightarr.getMat();
                Mat disp0 = disparr.getMat();

                int width = left0.cols;
                int height = left0.rows;
                if(previous_size != width * height)
                {
                    previous_size = width * height;
                    speckleX.create(height,width,CV_32SC4);
                    speckleY.create(height,width,CV_32SC4);
                    puss.create(height,width,CV_32SC4);

                    censusImage[0].create(left0.rows,left0.cols,CV_32SC4);
                    censusImage[1].create(left0.rows,left0.cols,CV_32SC4);

                    partialSumsLR.create(left0.rows + 1,(left0.cols + 1) * (params.numDisparities + 1),CV_16S);
                    agregatedHammingLRCost.create(left0.rows + 1,(left0.cols + 1) * (params.numDisparities + 1),CV_16S);
                    hammingDistance.create(left0.rows, left0.cols * (params.numDisparities + 1),CV_16S);

                    preFilteredImg0.create(left0.size(), CV_8U);
                    preFilteredImg1.create(left0.size(), CV_8U);

                    aux.create(height,width,CV_8UC1);
                }

                Mat left = preFilteredImg0, right = preFilteredImg1;

                int ndisp = params.numDisparities;

                int wsz = params.kernelSize;
                int bufSize0 = (int)((ndisp + 2)*sizeof(int));
                bufSize0 += (int)((height + wsz + 2)*ndisp*sizeof(int));
                bufSize0 += (int)((height + wsz + 2)*sizeof(int));
                bufSize0 += (int)((height + wsz + 2)*ndisp*(wsz + 2)*sizeof(uchar) + 256);

                int bufSize1 = (int)((width + params.preFilterSize + 2) * sizeof(int) + 256);
                if(params.usePrefilter == true)
                {
                    uchar *_buf = slidingSumBuf.ptr();

                    parallel_for_(Range(0, 2), PrefilterInvoker(left0, right0, left, right, _buf, _buf + bufSize1, &params), 1);
                }
                else if(params.usePrefilter == false)
                {
                    left = left0;
                    right = right0;
                }
                if(params.kernelType == CV_SPARSE_CENSUS)
                {
                    censusTransform(left,right,params.kernelSize,censusImage[0],censusImage[1],CV_SPARSE_CENSUS);
                }
                else if(params.kernelType == CV_DENSE_CENSUS)
                {
                    censusTransform(left,right,params.kernelSize,censusImage[0],censusImage[1],CV_SPARSE_CENSUS);
                }
                else if(params.kernelType == CV_CS_CENSUS)
                {
                    symetricCensusTransform(left,right,params.kernelSize,censusImage[0],censusImage[1],CV_CS_CENSUS);
                }
                else if(params.kernelType == CV_MODIFIED_CS_CENSUS)
                {
                    symetricCensusTransform(left,right,params.kernelSize,censusImage[0],censusImage[1],CV_MODIFIED_CS_CENSUS);
                }
                else if(params.kernelType == CV_MODIFIED_CENSUS_TRANSFORM)
                {
                    modifiedCensusTransform(left,right,params.kernelSize,censusImage[0],censusImage[1],CV_MODIFIED_CENSUS_TRANSFORM,0);
                }
                else if(params.kernelType == CV_MEAN_VARIATION)
                {
                    parSumsIntensityImage[0].create(left0.rows, left0.cols,CV_32SC4);
                    parSumsIntensityImage[1].create(left0.rows, left0.cols,CV_32SC4);
                    Integral[0].create(left0.rows,left0.cols,CV_32SC4);
                    Integral[1].create(left0.rows,left0.cols,CV_32SC4);
                    integral(left, parSumsIntensityImage[0],CV_32S);
                    integral(right, parSumsIntensityImage[1],CV_32S);
                    imageMeanKernelSize(parSumsIntensityImage[0], params.kernelSize,Integral[0]);
                    imageMeanKernelSize(parSumsIntensityImage[1], params.kernelSize, Integral[1]);
                    modifiedCensusTransform(left,right,params.kernelSize,censusImage[0],censusImage[1],CV_MEAN_VARIATION,0,Integral[0], Integral[1]);
                }
                else if(params.kernelType == CV_STAR_KERNEL)
                {
                    starCensusTransform(left,right,params.kernelSize,censusImage[0],censusImage[1]);
                }
                hammingDistanceBlockMatching(censusImage[0], censusImage[1], hammingDistance);
                costGathering(hammingDistance, partialSumsLR);
                blockAgregation(partialSumsLR, params.agregationWindowSize, agregatedHammingLRCost);
                dispartyMapFormation(agregatedHammingLRCost, disp0, 3);
                Median1x9Filter<uint8_t>(disp0, aux);
                Median9x1Filter<uint8_t>(aux,disp0);

                if(params.regionRemoval == CV_SPECKLE_REMOVAL_AVG_ALGORITHM)
                {
                    smallRegionRemoval<uint8_t>(disp0,params.speckleWindowSize,disp0);
                }
                else if(params.regionRemoval == CV_SPECKLE_REMOVAL_ALGORITHM)
                {
                    if (params.speckleRange >= 0 && params.speckleWindowSize > 0)
                        filterSpeckles(disp0, FILTERED, params.speckleWindowSize, params.speckleRange, slidingSumBuf);
                }
            }
void NDG2D::PoissonIPDGbc2D(
  CSd& spOP //[out] sparse operator 
  )
{
  // function [OP] = PoissonIPDGbc2D()
  // Purpose: Set up the discrete Poisson matrix directly
  //          using LDG. The operator is set up in the weak form

  // build DG derivative matrices
  int max_OP = (K*Np*Np*(1+Nfaces));

  //initialize parameters
  DVec faceR("faceR"), faceS("faceS");
  IVec Fm("Fm"), Fm1("Fm1"), fidM("fidM");
  DMat V1D("V1D"); int i=0;

  // build local face matrices
  DMat massEdge[4]; // = zeros(Np,Np,Nfaces);
  for (i=1; i<=Nfaces; ++i) {
    massEdge[i].resize(Np,Np);
  }

  // face mass matrix 1
  Fm = Fmask(All,1); faceR = r(Fm); 
  V1D = Vandermonde1D(N, faceR);
  massEdge[1](Fm,Fm) = inv(V1D*trans(V1D));

  // face mass matrix 2
  Fm = Fmask(All,2); faceR = r(Fm); 
  V1D = Vandermonde1D(N, faceR);
  massEdge[2](Fm,Fm) = inv(V1D*trans(V1D));

  // face mass matrix 3
  Fm = Fmask(All,3); faceS = s(Fm); 
  V1D = Vandermonde1D(N, faceS); 
  massEdge[3](Fm,Fm) = inv(V1D*trans(V1D));

  //continue initialize parameters
  DMat Dx("Dx"),Dy("Dy"), Dn1("Dn1"), mmE_Fm1("mmE(:,Fm1)");
  double lnx=0.0,lny=0.0,lsJ=0.0,hinv=0.0,gtau=0.0;
  int k1=0,f1=0,id=0;
  IVec i1_Nfp = Range(1,Nfp);
  double N1N1 = double((N+1)*(N+1));
  
  // "OP" triplets (i,j,x), extracted to {Ai,Aj,Ax}
  IVec OPi(max_OP),OPj(max_OP), Ai,Aj; DVec OPx(max_OP), Ax;
  IMat rows1, cols1;  Index1D entries; DMat OP11(Np,Nfp, 0.0);

  // global node numbering
  entries.reset(1,Np*Nfp); 
  cols1 = outer(Ones(Np), Range(1,Nfp));

  umMSG(1, "\n ==> {OP} assembly [bc]: ");
  for (k1=1; k1<=K; ++k1)
  {
    if (! (k1%100)) { umMSG(1, "%d, ",k1); }
    rows1 = outer(Range((k1-1)*Np+1,k1*Np), Ones(Nfp));

    // Build element-to-element parts of operator
    for (f1=1; f1<=Nfaces; ++f1)
    {
      if (BCType(k1,f1))
      {   
        ////////////////////////added by Kevin ///////////////////////////////
        Fm1 = Fmask(All,f1); 
        fidM  = (k1-1)*Nfp*Nfaces + (f1-1)*Nfp + i1_Nfp;
        id = 1+(f1-1)*Nfp + (k1-1)*Nfp*Nfaces;

        lnx = nx(id); lny = ny(id); 
        lsJ = sJ(id); hinv = Fscale(id);

        Dx = rx(1,k1)*Dr + sx(1,k1)*Ds;  
        Dy = ry(1,k1)*Dr + sy(1,k1)*Ds;
        Dn1 = lnx*Dx + lny*Dy;

      //mmE = lsJ*massEdge(:,:,f1);
      //bc(All,k1) += (gtau*mmE(All,Fm1) - Dn1'*mmE(All,Fm1))*ubc(fidM);

        mmE_Fm1 = massEdge[f1](All,Fm1);  mmE_Fm1 *= lsJ;

        gtau = 10*N1N1*hinv; // set penalty scaling
        //bc(All,k1) += (gtau*mmE_Fm1 - trans(Dn1)*mmE_Fm1) * ubc(fidM);

        switch(BCType(k1,f1)){
	  case BC_Dirichlet: 
            OP11 = gtau*mmE_Fm1 - trans(Dn1)*mmE_Fm1;  
            break;
          case BC_Neuman:
            OP11 = mmE_Fm1;
            break;
	default:
	  std::cout<<"warning: boundary condition is incorrect"<<std::endl;
	}

        OPi(entries)=rows1; OPj(entries)=cols1; OPx(entries)=OP11; 
        entries += (Np*Nfp);
      }
      cols1 += Nfp;
    }
  }

  umMSG(1, "\n ==> {OPbc} to sparse\n");
  entries.reset(1, entries.hi()-(Np*Nfp));

  // extract triplets from large buffers
  Ai=OPi(entries); Aj=OPj(entries); Ax=OPx(entries);

  // These arrays can be HUGE, so force deallocation
  OPi.Free(); OPj.Free(); OPx.Free();

  // return 0-based sparse result
  Ai -= 1; Aj -= 1;

  //-------------------------------------------------------
  // This operator is not symmetric, and will NOT be 
  // factorised, only used to create reference RHS's:
  //
  //    refrhsbcPR = spOP1 * bcPR;
  //    refrhsbcUx = spOP2 * bcUx;
  //    refrhsbcUy = spOP2 * bcUy;
  //
  // Load ALL elements (both upper and lower triangles):
  //-------------------------------------------------------
  spOP.load(Np*K, Nfp*Nfaces*K, Ai,Aj,Ax, sp_All,false, 1e-15,true);

  Ai.Free();  Aj.Free();  Ax.Free();
  umMSG(1, " ==> {OPbc} ready.\n");

#if (1)
  // check on original estimates for nnx
  umMSG(1, " ==> max_OP: %12d\n", max_OP);
  umMSG(1, " ==> nnz_OP: %12d\n", entries.hi());
#endif
}