//--------------------------------------------------------- DVec& NDG2D::PoissonIPDGbc2D (DVec& ubc, //[in] DVec& qbc //[in] ) //--------------------------------------------------------- { // function [OP] = PoissonIPDGbc2D() // Purpose: Set up the discrete Poisson matrix directly // using LDG. The operator is set up in the weak form // build DG derivative matrices int max_OP = (K*Np*Np*(1+Nfaces)); // initialize parameters DVec faceR("faceR"), faceS("faceS"); DMat V1D("V1D"), Dx("Dx"),Dy("Dy"), Dn1("Dn1"), mmE_Fm1("mmE(:,Fm1)"); IVec Fm("Fm"), Fm1("Fm1"), fidM("fidM"); double lnx=0.0,lny=0.0,lsJ=0.0,hinv=0.0,gtau=0.0; int i=0,k1=0,f1=0,id=0; IVec i1_Nfp = Range(1,Nfp); double N1N1 = double((N+1)*(N+1)); // build local face matrices DMat massEdge[4]; // = zeros(Np,Np,Nfaces); for (i=1; i<=Nfaces; ++i) { massEdge[i].resize(Np,Np); } // face mass matrix 1 Fm = Fmask(All,1); faceR = r(Fm); V1D = Vandermonde1D(N, faceR); massEdge[1](Fm,Fm) = inv(V1D*trans(V1D)); // face mass matrix 2 Fm = Fmask(All,2); faceR = r(Fm); V1D = Vandermonde1D(N, faceR); massEdge[2](Fm,Fm) = inv(V1D*trans(V1D)); // face mass matrix 3 Fm = Fmask(All,3); faceS = s(Fm); V1D = Vandermonde1D(N, faceS); massEdge[3](Fm,Fm) = inv(V1D*trans(V1D)); // build DG right hand side DVec* pBC = new DVec(Np*K, "bc", OBJ_temp); DVec& bc = (*pBC); // reference, for syntax //////////////////////////////////////////////////////////////// umMSG(1, "\n ==> {OP} assembly [bc]: "); for (k1=1; k1<=K; ++k1) { if (! (k1%100)) { umMSG(1, "%d, ",k1); } // rows1 = outer(Range((k1-1)*Np+1,k1*Np), Ones(NGauss)); // Build element-to-element parts of operator for (f1=1; f1<=Nfaces; ++f1) { if (BCType(k1,f1)) { ////////////////////////added by Kevin /////////////////////////////// Fm1 = Fmask(All,f1); fidM = (k1-1)*Nfp*Nfaces + (f1-1)*Nfp + i1_Nfp; id = 1+(f1-1)*Nfp + (k1-1)*Nfp*Nfaces; lnx = nx(id); lny = ny(id); lsJ = sJ(id); hinv = Fscale(id); Dx = rx(1,k1)*Dr + sx(1,k1)*Ds; Dy = ry(1,k1)*Dr + sy(1,k1)*Ds; Dn1 = lnx*Dx + lny*Dy; //mmE = lsJ*massEdge(:,:,f1); //bc(All,k1) += (gtau*mmE(All,Fm1) - Dn1'*mmE(All,Fm1))*ubc(fidM); mmE_Fm1 = massEdge[f1](All,Fm1); mmE_Fm1 *= lsJ; gtau = 10*N1N1*hinv; // set penalty scaling //bc(All,k1) += (gtau*mmE_Fm1 - trans(Dn1)*mmE_Fm1) * ubc(fidM); switch(BCType(k1,f1)){ case BC_Dirichlet: bc(Np*(k1-1)+Range(1,Np)) += (gtau*mmE_Fm1 - trans(Dn1)*mmE_Fm1)*ubc(fidM); break; case BC_Neuman: bc(Np*(k1-1)+Range(1,Np)) += mmE_Fm1*qbc(fidM); break; default: std::cout<<"warning: boundary condition is incorrect"<<std::endl; } } } } return bc; }
inline oclMat oclMat::colRange(int startcol, int endcol) const { return oclMat(*this, Range::all(), Range(startcol, endcol)); }
void OpponentColorDescriptorExtractor::computeImpl( const Mat& bgrImage, std::vector<KeyPoint>& keypoints, Mat& descriptors ) const { std::vector<Mat> opponentChannels; convertBGRImageToOpponentColorSpace( bgrImage, opponentChannels ); const int N = 3; // channels count std::vector<KeyPoint> channelKeypoints[N]; Mat channelDescriptors[N]; std::vector<int> idxs[N]; // Compute descriptors three times, once for each Opponent channel to concatenate into a single color descriptor int maxKeypointsCount = 0; for( int ci = 0; ci < N; ci++ ) { channelKeypoints[ci].insert( channelKeypoints[ci].begin(), keypoints.begin(), keypoints.end() ); // Use class_id member to get indices into initial keypoints vector for( size_t ki = 0; ki < channelKeypoints[ci].size(); ki++ ) channelKeypoints[ci][ki].class_id = (int)ki; descriptorExtractor->compute( opponentChannels[ci], channelKeypoints[ci], channelDescriptors[ci] ); idxs[ci].resize( channelKeypoints[ci].size() ); for( size_t ki = 0; ki < channelKeypoints[ci].size(); ki++ ) { idxs[ci][ki] = (int)ki; } std::sort( idxs[ci].begin(), idxs[ci].end(), KP_LessThan(channelKeypoints[ci]) ); maxKeypointsCount = std::max( maxKeypointsCount, (int)channelKeypoints[ci].size()); } std::vector<KeyPoint> outKeypoints; outKeypoints.reserve( keypoints.size() ); int dSize = descriptorExtractor->descriptorSize(); Mat mergedDescriptors( maxKeypointsCount, 3*dSize, descriptorExtractor->descriptorType() ); int mergedCount = 0; // cp - current channel position size_t cp[] = {0, 0, 0}; while( cp[0] < channelKeypoints[0].size() && cp[1] < channelKeypoints[1].size() && cp[2] < channelKeypoints[2].size() ) { const int maxInitIdx = std::max( 0, std::max( channelKeypoints[0][idxs[0][cp[0]]].class_id, std::max( channelKeypoints[1][idxs[1][cp[1]]].class_id, channelKeypoints[2][idxs[2][cp[2]]].class_id ) ) ); while( channelKeypoints[0][idxs[0][cp[0]]].class_id < maxInitIdx && cp[0] < channelKeypoints[0].size() ) { cp[0]++; } while( channelKeypoints[1][idxs[1][cp[1]]].class_id < maxInitIdx && cp[1] < channelKeypoints[1].size() ) { cp[1]++; } while( channelKeypoints[2][idxs[2][cp[2]]].class_id < maxInitIdx && cp[2] < channelKeypoints[2].size() ) { cp[2]++; } if( cp[0] >= channelKeypoints[0].size() || cp[1] >= channelKeypoints[1].size() || cp[2] >= channelKeypoints[2].size() ) break; if( channelKeypoints[0][idxs[0][cp[0]]].class_id == maxInitIdx && channelKeypoints[1][idxs[1][cp[1]]].class_id == maxInitIdx && channelKeypoints[2][idxs[2][cp[2]]].class_id == maxInitIdx ) { outKeypoints.push_back( keypoints[maxInitIdx] ); // merge descriptors for( int ci = 0; ci < N; ci++ ) { Mat dst = mergedDescriptors(Range(mergedCount, mergedCount+1), Range(ci*dSize, (ci+1)*dSize)); channelDescriptors[ci].row( idxs[ci][cp[ci]] ).copyTo( dst ); cp[ci]++; } mergedCount++; } } mergedDescriptors.rowRange(0, mergedCount).copyTo( descriptors ); std::swap( outKeypoints, keypoints ); }
//\begin{>>PPP_2d_Jacobi_1d_Part.tex}{\subsubsection{runBenchmark}} void PPP_2d_Jacobi_1d_Part:: runBenchmark( ) //================================================================ // /Description: Executes the benchmarking code the specified // number of times and stores the times in the array. // // /Return Values: None. // // /Errors: // None. // // /Author: BJM // /Date: 24 August 2000 //\end{PPP_2d_Jacobi_1d_Part.tex} //================================================================ { double theTime,theStartTime,theEndTime; int theNumProcs = 1; int thisProcNum = 0; // MPI_Comm_size(MPI_COMM_WORLD, &theNumProcs); // MPI_Comm_rank(MPI_COMM_WORLD, &thisProcNum); Partitioning_Type Partitioning(Range(0,theNumProcs-1)); Partitioning.SpecifyInternalGhostBoundaryWidths(1,1); Partitioning.SpecifyDecompositionAxes(1); int theArraySideLength = sqrt( mUnknownsPerProc * theNumProcs ); printf ("theArraySideLength = %d \n",theArraySideLength); doubleArray U_old(theArraySideLength,theArraySideLength,Partitioning); U_old = 0.0; Index I (1,theArraySideLength-2); Index J (1,theArraySideLength-2); int i; // printf ("Warming up ... \n"); for(i=0;i< mNumberOfWarmupIterations;i++) { U_old(I,J) = (U_old(I+1,J+1) + U_old(I+1,J) + U_old(I+1,J-1) + U_old(I,J+1) + U_old(I,J-1) + U_old(I-1,J+1) + U_old(I-1,J) + U_old(I-1,J-1)) / 8.0; } // Now time the problem // printf ("Running timing loop ... \n"); for (i=0; i < mNumberOfTimingIterations; i++) { // printf ("Running timing loop iteration %d \n",i); theStartTime = Communication_Manager::Wall_Clock_Time(); // theStartTime = clock(); U_old(I,J) = (U_old(I+1,J+1) + U_old(I+1,J) + U_old(I+1,J-1) + U_old(I,J+1) + U_old(I,J-1) + U_old(I-1,J+1) + U_old(I-1,J) + U_old(I-1,J-1)) / 8.0; theEndTime = Communication_Manager::Wall_Clock_Time(); // theEndTime = clock(); theTime = theEndTime - theStartTime; mTimes[0][i] = theTime; printf("time= %f\n",theTime); }//end of loop over timed iterations }
inline oclMat oclMat::col(int x) const { return oclMat(*this, Range::all(), Range(x, x + 1)); }
Range<Scalar,Dim> Range<Scalar,Dim>::unitRange() { return Range(Vector<Scalar,Dim>(0),Vector<Scalar,Dim>(1.0)); }
void PortList::addRange(int startPort, int endPort) { m_ranges << Range(startPort, endPort); }
static Range Invalid() { return Range(std::numeric_limits<size_t>::max(), std::numeric_limits<size_t>::min()); }
//! return shifted Range Range operator + (const size_t& shift) const { return Range(begin + shift, end + shift); }
//////////main関数///////////// int main(int argc, char** argv){ cubeSize_ = cubeSize/2.; int i=0, j=0, k=0; clock_t start_time_total,end_time_total; clock_t start_time[fileTotal]; clock_t end_time[fileTotal]; //char * filename[fileTotal]; //並列用 char * model_filename[fileTotal]; char * data_filename[fileTotal]; //Mat shape[fileTotal]; Mat shape_reg[fileTotal]; //Mat shape_temp[fileTotal]; Mat shape_fixed[fileTotal]; //並列用 Mat model_shape[fileTotal]; Mat data_shape[fileTotal]; Mat shape_temp[fileTotal][fileTotal]; Mat my_model_corr[fileTotal]; int myIndex[fileTotal][rows]; float myDist[fileTotal][rows]; RT<float> my_rt[fileTotal]; Mat_<float> model_mean; //modelファイルのデータ数 //model_rows = 16128; //dataファイルのデータ数 //data_rows = 16128; start_time_total = clock(); cout << "-------------" << endl; cout << "ICP Algorithm" << endl; cout << "-------------" << endl; #pragma omp parallel for for(fileCount=0;fileCount<fileTotal;fileCount++) { #pragma region // --- 点群のCSVファイルをcv::Matに取り込む --- if(fileCount>=1){ ///model //csvファイル名 model_filename[fileCount] = (char *)malloc(sizeof(char *) * 100); //sprintf(model_filename[fileCount],"%s/%s/%d.csv",filedir,dir,fileCount); sprintf(model_filename[fileCount],"%s/%s/points%02d.csv",filedir,dir,fileCount); //csvファイルのデータ数 model_rows[fileCount] = rows; //CSVファイル読み込み model_shape[fileCount] = csvread(model_filename[fileCount], model_rows[fileCount], cols); //コンソールにファイル名表示 //cout << "model点群データファイル名 " << model_filename[fileCount] << endl; } ///data //csvファイル名 data_filename[fileCount] = (char *)malloc(sizeof(char *) * 100); //sprintf(data_filename[fileCount],"%s/%s/%d.csv",filedir,dir,(fileCount+1)); sprintf(data_filename[fileCount],"%s/%s/points%02d.csv",filedir,dir,(fileCount+1)); //csvファイルのデータ数 data_rows[fileCount] = rows; //CSVファイル読み込み data_shape[fileCount] = csvread(data_filename[fileCount], data_rows[fileCount], cols); //コンソールにファイル名表示 cout << "点群データファイル名 " << data_filename[fileCount] << endl; #pragma endregion if(fileCount>=1){ #pragma region // --- ICPによるレジストレーション --- #if 1 // --- ICP実行する --- //実行時間計測開始 start_time[fileCount] = clock(); cout << "\t標準ICP開始" << endl; //ICP with flann search and unit quaternion method //cout << "kd-tree探索+クォータニオンにより[R/t]を推定します" << endl << endl; ClosestPointFlann model_shape_flann (model_shape[fileCount]); RT_L2 <float, SolveRot_eigen<float>> rt_solver; ICP <ClosestPointFlann> icp (model_shape_flann, rt_solver); icp.set(data_shape[fileCount]); icp.reg(100, 1.0e-6); //実行時間計測終了 end_time[fileCount] = clock(); //cout << "icp result : [R/t] =" << endl << (icp.rt) << endl << endl; cout << "\t" << data_filename[fileCount] << " icp error =" << icp.dk << endl; cout << "\t" << data_filename[fileCount] << " 実行時間 = " << (float)(end_time[fileCount] - start_time[fileCount])/CLOCKS_PER_SEC << "秒" << endl << endl; //データをローカル変数に格納 //my_model_corr[fileCount] = Mat::zeros(rows, cols, CV_32F); my_model_corr[fileCount].create(rows, cols, CV_32F); icp.model_corr.copyTo(my_model_corr[fileCount]); icp.rt.copyTo(my_rt[fileCount]); for(int k=0;k<data_rows[fileCount];k++){ myIndex[fileCount][k] = icp.index[k]; myDist[fileCount][k] = icp.distance[k]; } #else // --- ICP実行しない場合 --- shape_reg[fileCount] = data_shape[fileCount]; #endif #pragma endregion }else{ shape_reg[fileCount] = data_shape[fileCount]; } } #pragma region // --- 座標変換 --- //平均値の計算 reduce(shape_reg[0], model_mean, 0, CV_REDUCE_AVG); #pragma omp parallel for private(i,j,k) for(fileCount=0;fileCount<fileTotal;fileCount++) { if(fileCount>=1){ //得られたrtをdatashapeに適用 //その前にshape_tempの初期化 for(k=0;k<fileTotal;k++) { shape_temp[fileCount][k] = cv::Mat::zeros(data_rows[fileCount], cols, CV_32F); } shape_temp[fileCount][fileCount] = data_shape[fileCount]; for(k=0;k<fileCount;k++) { shape_temp[fileCount][fileCount-(k+1)] = my_rt[(fileCount-k)].transform(shape_temp[fileCount][fileCount-k]); } shape_reg[fileCount] = shape_temp[fileCount][0]; } shape_fixed[fileCount] = shape_reg[fileCount] - repeat(model_mean, shape_reg[fileCount].rows, 1); /* //メモリ割り当て points[fileCount] = (GLfloat *)malloc(sizeof(float)*data_rows[fileCount]*cols); //座標値をGLpointsに入れる for(i=0;i<data_rows[fileCount];i++){ for(j=0;j<cols;j++){ points[fileCount][i*cols+j] = shape_fixed[fileCount].at<float>(i,j); } }*/ #pragma endregion } #pragma region // --- OpenGLにデータ渡す --- //メモリ割り当て allpoints = (GLfloat *)malloc(sizeof(float)*rows*fileTotal*cols); for(fileCount=0;fileCount<fileTotal;fileCount++) { //座標値をallpointsに入れる for(int i=0;i<rows;i++){ for(int j=0;j<cols;j++){ allpoints[fileCount*rows*cols+i*cols+j] = shape_fixed[fileCount].at<float>(i,j); } } } #pragma endregion #pragma region // --- カメラRTの計算 --- Mat cameraRT[fileTotal]; Mat cameraR[fileTotal]; Mat cameraT[fileTotal]; cameraRT[0] = Mat::eye(4,4,CV_32F); cameraR[0] = Mat::eye(3,3,CV_32F); cameraT[0] = Mat::zeros(1,3,CV_32F); for(i=1;i<fileTotal;i++){ cameraRT[i] = Mat::eye(4,4,CV_32F); cameraR[i] = Mat::eye(3,3,CV_32F); cameraT[i] = Mat::zeros(1,3,CV_32F); Mat r = my_rt[i].operator()(Range(0,3),Range(0,3)); cameraR[i] = cameraR[i-1]*r.t(); Mat t = my_rt[i].operator()(Range(3,4),Range(0,3)); cameraT[i] = t*cameraR[i-1].t() + cameraT[i-1]; cameraRT[i].at<float>(0,0) = cameraR[i].at<float>(0,0); cameraRT[i].at<float>(0,1) = cameraR[i].at<float>(0,1); cameraRT[i].at<float>(0,2) = cameraR[i].at<float>(0,2); cameraRT[i].at<float>(1,0) = cameraR[i].at<float>(1,0); cameraRT[i].at<float>(1,1) = cameraR[i].at<float>(1,1); cameraRT[i].at<float>(1,2) = cameraR[i].at<float>(1,2); cameraRT[i].at<float>(2,0) = cameraR[i].at<float>(2,0); cameraRT[i].at<float>(2,1) = cameraR[i].at<float>(2,1); cameraRT[i].at<float>(2,2) = cameraR[i].at<float>(2,2); cameraRT[i].at<float>(3,0) = cameraT[i].at<float>(0,0); cameraRT[i].at<float>(3,1) = cameraT[i].at<float>(0,1); cameraRT[i].at<float>(3,2) = cameraT[i].at<float>(0,2); } #pragma endregion // --- データ出力 --- #if FILEOUTPUT /////////////////////////////// // 全ての点群(shape_fixed)をまとめて書き出し // pcd // FILE *outfp; char outfilename[100]; sprintf(outfilename,"%s/%s/result_xyz.pcd",outdir,dir); outfp = fopen(outfilename,"w"); if(outfp == NULL){ printf("%sファイルが開けません\n",outfilename); return -1; } int red = 255*256*256; int green = 255*256*256 + 255*256; int white = 255*256*256 + 255*256 + 255; fprintf(outfp,"# .PCD v.7 - Point Cloud Data file format\nVERSION .7\nFIELDS x y z rgb\nSIZE 4 4 4 4\nTYPE F F F F\nCOUNT 1 1 1 1\nWIDTH %d\nHEIGHT 1\nVIEWPOINT 0 0 0 1 0 0 0\nPOINTS %d\nDATA ascii\n", rows*fileTotal, rows*fileTotal); for(i=0;i<fileTotal;i++){ for(j=0;j<data_rows[i];j++){ fprintf(outfp,"%f %f %f %d\n", shape_reg[i].at<float>(j,0), shape_reg[i].at<float>(j,1), shape_reg[i].at<float>(j,2), green+(int)floor(255.*(i+1)/fileTotal)); } } fclose(outfp); /////////////////////////////// // 全ての点群(shape_fixed)をまとめて書き出し // csv // sprintf(outfilename,"%s/%s/allpoints.csv",outdir,dir); outfp = fopen(outfilename,"w"); if(outfp == NULL){ printf("%sファイルが開けません\n",outfilename); return -1; } for(i=0;i<fileTotal;i++){ for(j=0;j<data_rows[i];j++){ fprintf(outfp,"%f %f %f\n", shape_reg[i].at<float>(j,0), shape_reg[i].at<float>(j,1), shape_reg[i].at<float>(j,2)); } } fclose(outfp); /////////////////////////////// // 全ての点群(shape_fixed)をまとめて書き出し // result_xyz.csv // sprintf(outfilename,"%s/%s/result_xyz_icp.csv",outdir,dir); outfp = fopen(outfilename,"w"); if(outfp == NULL){ printf("%sファイルが開けません\n",outfilename); return -1; } for(i=0;i<fileTotal;i++){ for(j=0;j<data_rows[i];j++){ fprintf(outfp,"%f,%f,%f\n", shape_reg[i].at<float>(j,0), shape_reg[i].at<float>(j,1), shape_reg[i].at<float>(j,2)); } } fclose(outfp); ////////////////////////////////// // Corr(対応点), Index(対応点の要素番号), Distance(対応点間距離)の書き出し // FILE *outfp_corr; char outfilename_corr[100]; for(fileCount=1;fileCount<fileTotal;fileCount++){ ///Indexファイル sprintf(outfilename_corr,"%s/%s/index%02d.csv",outdir,dir,(fileCount)); outfp_corr = fopen(outfilename_corr,"w"); if(outfp_corr == NULL){ printf("%sファイルが開けません\n",outfilename_corr); return -1; } for(j=0;j<data_rows[fileCount];j++){ fprintf(outfp_corr,"%d\n", myIndex[fileCount][j]); } fclose(outfp_corr); ///Distanceファイル sprintf(outfilename_corr,"%s/%s/dist%02d.csv",outdir,dir,(fileCount)); outfp_corr = fopen(outfilename_corr,"w"); if(outfp_corr == NULL){ printf("%sファイルが開けません\n",outfilename_corr); return -1; } for(j=0;j<data_rows[fileCount];j++){ fprintf(outfp_corr,"%f\n", myDist[fileCount][j]); } fclose(outfp_corr); } for(fileCount=0;fileCount<fileTotal;fileCount++){ if(fileCount<(fileTotal-1)){ ///Corr点群ファイル sprintf(outfilename_corr,"%s/%s/corr%02d.csv",outdir,dir,(fileCount+1)); outfp_corr = fopen(outfilename_corr,"w"); if(outfp_corr == NULL){ printf("%sファイルが開けません\n",outfilename_corr); return -1; } for(j=0;j<data_rows[fileCount];j++){ //fprintf(outfp_corr,"%f %f %f\n", my_model_corr[fileCount].at<float>(j,0), my_model_corr[fileCount].at<float>(j,1), my_model_corr[fileCount].at<float>(j,2)); fprintf(outfp_corr,"%f %f %f\n", shape_reg[fileCount].at<float>(myIndex[fileCount+1][j],0), shape_reg[fileCount].at<float>(myIndex[fileCount+1][j],1), shape_reg[fileCount].at<float>(myIndex[fileCount+1][j],2)); } fclose(outfp_corr); }else{ ///Corr点群ファイル sprintf(outfilename_corr,"%s/%s/corr%02d.csv",outdir,dir,(fileCount+1)); outfp_corr = fopen(outfilename_corr,"w"); if(outfp_corr == NULL){ printf("%sファイルが開けません\n",outfilename_corr); return -1; } for(j=0;j<data_rows[fileCount];j++){ //fprintf(outfp_corr,"%f %f %f\n", my_model_corr[fileCount].at<float>(j,0), my_model_corr[fileCount].at<float>(j,1), my_model_corr[fileCount].at<float>(j,2)); fprintf(outfp_corr,"%f %f %f\n", shape_reg[fileCount].at<float>(j,0), shape_reg[fileCount].at<float>(j,1), shape_reg[fileCount].at<float>(j,2)); } fclose(outfp_corr); } } ///////////////////// // RTの書き出し // //my_rt[0]に恒等変換を代入 //Mat rt0 = (Mat_<float>(4,4) << 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1); Mat rt0 = Mat::eye(4,4,CV_32F); rt0.copyTo(my_rt[0]); // Open File Storage char rtfilename[100]; sprintf(rtfilename,"%s/%s/rt.xml",outdir,dir); cv::FileStorage cvfs(rtfilename,CV_STORAGE_WRITE); cv::WriteStructContext ws(cvfs, "mat_rt", CV_NODE_SEQ); // create node for(int i=0; i<fileTotal; i++){ cv::write(cvfs,"",cameraRT[i]); } cvfs.release(); #endif //--- OpenGLで表示 --- #if GLVIEW // --- GLUT initialize --- initFlag(); initParam(); //window1 glutInit(&argc, argv); glutInitWindowPosition(0, 0); glutInitWindowSize(window_w, window_h); glutInitDisplayMode( GLUT_RGBA | GLUT_DEPTH | GLUT_DOUBLE ); window1 = glutCreateWindow("Window1"); glutMouseFunc(mouse); glutMotionFunc(drag); glutPassiveMotionFunc(passive); glutMouseWheelFunc ( MouseWheel ) ;//ホイールコールバック glutDisplayFunc(disp); glutIdleFunc(myGlutIdle); glutKeyboardFunc(glut_keyboard); glutIdleFunc(animate); glClearColor(0.0, 0.0, 0.0, 0.5); //背景色 glutMainLoop(); #endif //実行時間計測終了 end_time_total = clock(); cout << "-------------" << endl; cout << " Finish " << endl; cout << "-------------" << endl; cout << "プログラム実行時間 = " << (float)(end_time_total - start_time_total)/CLOCKS_PER_SEC << "秒" << endl << endl; //cvNamedWindow ("WaitKey", CV_WINDOW_AUTOSIZE); //cvWaitKey(0); return 0; }
//! given a global range [0,global_size) and p PEs to split the range, calculate //! the [local_begin,local_end) index range assigned to the PE i. static inline Range CalculateLocalRange( size_t global_size, size_t p, size_t i) { return Range(0, global_size).Partition(i, p); }
void X_Comp::EvalProducts() { if (NJoins()>0) switch (SolveMode()) { case PBMODE: DoBreak(); break; case SSMODE: DoBreak(); break; case DYNMODE: { int JoinId=0; double Press=Joins[JoinId].Pressure(); int I[MaxIOList+1]; FillIOIndexList(JoinId, I); flag dbg=0;//((Debug|dbgDerivs) & DBG_Derivs); if (SolveMode()==PBMODE) Press=GetPBInputPressure(JoinId); StkSpConduit SdLcl("SdLcl", chLINEID(), this); SpConduit &Sd = SdLcl(); Sd.QZero(); Sd.SetPress(Press); double Qot=0.0; int NFeeds=0; for (int i, ic = 0; (i=I[ic])>=0; ic++) if (IO_In(i)) { Sd.SelectModel(IOConduit(i), NFeeds==0); Sd.QAddF(*IOConduit(i), som_ALL, 1.0); NFeeds++; } else Qot += IOQmEst_Out(i); if (dbg) for (ic = 0; (i=I[ic])>=0; ic++) if (IO_In(i)) { char t[128]; sprintf(t, "%s.%s", FullObjTag(), IODesc_Self(i)->pName); dbgpln("i-Xfr %-12.12s : %14.6g, %14.6g | %14.6g",t, IOConduit(i)->QMass(som_SL), IOConduit(i)->QMass(som_Vap), K_2_C(IOConduit(i)->Temp())); } double denom; double P1 = IOP_Rmt (IOWithId_Self(ioid_In)); double P2 = IOP_Self(IOWithId_Self(ioid_In)); // hss 9/3/98 - Use data base for specific heat ratio. P_SpecificHeatRatio = Max(1.1, Sd.CpCv()); if( Sd.Temp() > 0.01 ) Tin = Sd.Temp(); // hss Calc Polytropic Efficiency /*double FlowMin = pI->QVolume() * 60.0; double rpm = Max(SpeedRatio, 10000.0); double EffTemp = Efficiency.Zxy(FlowMin,rpm); if (EffTemp >= 0.6) P_PolytropicEff = EffTemp; else P_PolytropicEff = 0.6;*/ denom = P_SpecificHeatRatio * P_PolytropicEff; if( fabs(denom) < 1.0e-30) denom = 1.0e-30; // hss Try a fix to prevent crash when P1 is negative if ((P1 > 0.0) && (P2 > P1)) Tout = Tin*pow(P2/P1,(P_SpecificHeatRatio - 1.0)/denom); else Tout = Tin; // end of kluge Sd.SetTemp(Tout); double Qin=Sd.QMass(som_ALL); // What Comes in must go out double Scl=Range(0.0, Qin/GTZ(Qot), 1000.0); for (ic = 0; (i=I[ic])>=0; ic++) if (IO_Out(i)) IOConduit(i)->QSetM(Sd, som_ALL, IOQmEst_Out(i)*Scl, Press);//Joins[JoinId].Pressure());//PMax); } break; } };
void X_Comp::EvalDiscrete() { if (Control==CTSpeed) { SpConduit &C=*IOConduit(IOWithId_Self(ioid_In)); //double Cflow = GEZ(IOFB(IOWithId_Self(ioid_In), 0)->GetQm()); //double VFlow = GEZ(IOFB(IOWithId_Self(ioid_In), 0)->GetQv()); double Qv=C.QVolume(); double Qm=C.QMass(); MaxSpeed=Max(100.0, MaxSpeed); MinSpeed=Range(0.0, MinSpeed, MaxSpeed); double RqdS=(P_Status ? Range(MinSpeed/MaxSpeed, SpdSpt,1.0) : 0); SpdFbk+=(RqdS-SpdFbk)*ICGetTimeInc()/Max(1.0, SpdTau); ActSpeed=SpdFbk*MaxSpeed; SpeedRatio=SpdFbk; double Hd=-166.466+1.581213*ActSpeed+(-17.5093-0.15871*ActSpeed)*Qv+(0.0003296+0.014441*ActSpeed)*Qv*Qv; // double SrgVolFlw=-0.96597+0.010344*ActSpeed; double SrgSpd=Range(166.0, ActSpeed, 275.0); QvSrg=7E-05*SrgSpd*SrgSpd - 0.0199*SrgSpd + 2.2831; QvIn=C.QVolume(); //Qs=Range(0.0, Qm*SrgVolFlw/GTZ(Qm), 150.0); Pin = Max(100.0,IOP_Rmt (IOWithId_Self(ioid_In))); // double PCalc=(7990.392-21.47525359*SpdFbk*266.67+26.75703018*Range(0.0,Cflow,100.0)); // PCalc=Max(1000.0, PCalc); // double PRatio=7990/PCalc; //PressIn=GTZ(PressIn); // if ((Qv>1.0e-6) && (C.QMass()>1.0e-6)) // { // Only Change if Direction is Forward double GammaIn=1.262;//Range(1.2, C.CpCv(), 1.5); double MoleWtFIn=C.MoleWt();//*Rho; double TempIn=C.Temp(); // } double R=8.314/MoleWtFIn; double X=GammaIn/(GammaIn-1.0); // PHT_Isentropic: // Units of Hd Expected to be kNm/kg == kJ/kg; // See Perry P6-17 & WMC-KNS Acid Plant Curves. double dEfficiency=0.9; double P2=Pin*Pow(GEZ(Hd*dEfficiency/GTZ(X*R*TempIn)+1.0), X); double Bst=GEZ(P2-Pin); double TheBoostDmp=1.0-(1.0/Max(ICGetTimeInc(), SpdTau)); TheBoost=TheBoostDmp*TheBoost+(1-TheBoostDmp)*GEZ(Bst); //TheBoost=TheBoostDmp*TheBoost+(1-TheBoostDmp)*GEZ(Pin*PRatio-Pin); } else { double Q = IOQm_In(IOWithId_Self(ioid_In)); double P1 = IOP_Rmt(IOWithId_Self(ioid_In)); double P2 = IOP_Self(IOWithId_Self(ioid_In)); double pwr = Pwr_Curve(Q,P1,P2); double goodp = P2_Curve(Q,P1,P_MaxPower); double Qc = Flw_Curve(P_MaxPower,P1,P_SetpointPressure); if( fabs(Q - Qprv)/Max(1.0e-60,Qprv) < 0.001 && ((P2 < P1) || (Q < 0.001)) ) { X_SetpointPressure = 0.0; } else if( Q > 1.0e-50 && fabs(Q - Qprv)/Max(1.0e-60,Qprv) < 0.001 && P1 < P2 ) { if( Q > Qc ) { double newval = (P_SetpointPressure - goodp); double test = (X_SetpointPressure*0.5) + (newval*0.5); if( P2-test < P1 ) {// This will fail at unrealistically high flow X_SetpointPressure = (X_SetpointPressure*0.5) + 0.5*(P2-P1); } else { X_SetpointPressure = test; } //TRACE("EVAL DISCREET CONVERGED %f %f %f %f\n\n",Q,pwr,goodp,X_SetpointPressure); } else { X_SetpointPressure /= 2.0; } } Qprv = Q; } }
// Test object detection network from Darknet framework. void testDarknetModel(const std::string& cfg, const std::string& weights, const std::vector<std::vector<int> >& refClassIds, const std::vector<std::vector<float> >& refConfidences, const std::vector<std::vector<Rect2d> >& refBoxes, double scoreDiff, double iouDiff, float confThreshold = 0.24, float nmsThreshold = 0.4) { checkBackend(); Mat img1 = imread(_tf("dog416.png")); Mat img2 = imread(_tf("street.png")); std::vector<Mat> samples(2); samples[0] = img1; samples[1] = img2; // determine test type, whether batch or single img int batch_size = refClassIds.size(); CV_Assert(batch_size == 1 || batch_size == 2); samples.resize(batch_size); Mat inp = blobFromImages(samples, 1.0/255, Size(416, 416), Scalar(), true, false); Net net = readNet(findDataFile("dnn/" + cfg, false), findDataFile("dnn/" + weights, false)); net.setPreferableBackend(backend); net.setPreferableTarget(target); net.setInput(inp); std::vector<Mat> outs; net.forward(outs, getOutputsNames(net)); for (int b = 0; b < batch_size; ++b) { std::vector<int> classIds; std::vector<float> confidences; std::vector<Rect2d> boxes; for (int i = 0; i < outs.size(); ++i) { Mat out; if (batch_size > 1){ // get the sample slice from 3D matrix (batch, box, classes+5) Range ranges[3] = {Range(b, b+1), Range::all(), Range::all()}; out = outs[i](ranges).reshape(1, outs[i].size[1]); }else{ out = outs[i]; } for (int j = 0; j < out.rows; ++j) { Mat scores = out.row(j).colRange(5, out.cols); double confidence; Point maxLoc; minMaxLoc(scores, 0, &confidence, 0, &maxLoc); if (confidence > confThreshold) { float* detection = out.ptr<float>(j); double centerX = detection[0]; double centerY = detection[1]; double width = detection[2]; double height = detection[3]; boxes.push_back(Rect2d(centerX - 0.5 * width, centerY - 0.5 * height, width, height)); confidences.push_back(confidence); classIds.push_back(maxLoc.x); } } } // here we need NMS of boxes std::vector<int> indices; NMSBoxes(boxes, confidences, confThreshold, nmsThreshold, indices); std::vector<int> nms_classIds; std::vector<float> nms_confidences; std::vector<Rect2d> nms_boxes; for (size_t i = 0; i < indices.size(); ++i) { int idx = indices[i]; Rect2d box = boxes[idx]; float conf = confidences[idx]; int class_id = classIds[idx]; nms_boxes.push_back(box); nms_confidences.push_back(conf); nms_classIds.push_back(class_id); } normAssertDetections(refClassIds[b], refConfidences[b], refBoxes[b], nms_classIds, nms_confidences, nms_boxes, format("batch size %d, sample %d\n", batch_size, b).c_str(), confThreshold, scoreDiff, iouDiff); } }
void SubmatrixQueriesTest::multipleBenchmarksRowQueries(size_t n, bench_time_t *naiveTime, bench_time_t *queryTime, bench_time_t *cascadingTime, bench_time_t *simpleCascadingTime) { bench_time_t clock1, clock2; for (size_t i = 0; i < n; i++) { size_t row = rand() % (_testMatrix->rows()); size_t c1,c2; c1 = rand() % (_testMatrix->cols()); c2 = rand() % (_testMatrix->cols()); Range c = Range(min(c1,c2),max(c1,c2)); clock1 = now(); _queryDS->columnTree()->cascadingMaxInRange(row, c); clock2 = now(); *cascadingTime = add(*cascadingTime,diff(clock2, clock1)); } for (size_t i = 0; i < n; i++) { size_t row = rand() % (_testMatrix->rows()); size_t c1,c2; c1 = rand() % (_testMatrix->cols()); c2 = rand() % (_testMatrix->cols()); Range c = Range(min(c1,c2),max(c1,c2)); clock1 = now(); _queryDS->columnTree()->simpleCascadingMaxInRange(row, c); clock2 = now(); *simpleCascadingTime = add(*simpleCascadingTime,diff(clock2, clock1)); } for (size_t i = 0; i < n; i++) { size_t row = rand() % (_testMatrix->rows()); size_t c1,c2; c1 = rand() % (_testMatrix->cols()); c2 = rand() % (_testMatrix->cols()); Range c = Range(min(c1,c2),max(c1,c2)); clock1 = now(); _queryDS->columnTree()->maxForRowInRange(row, c.min, c.max); clock2 = now(); *queryTime = add(*queryTime,diff(clock2, clock1)); } if(SubmatrixQueriesTest::benchmarkNaiveQueries){ for (size_t i = 0; i < n; i++) { size_t row = rand() % (_testMatrix->rows()); size_t c1,c2; c1 = rand() % (_testMatrix->cols()); c2 = rand() % (_testMatrix->cols()); Range c = Range(min(c1,c2),max(c1,c2)); clock1 = now(); SubmatrixQueriesTest::naiveMaximumInRow(_testMatrix, c, row); clock2 = now(); *naiveTime = add(*naiveTime,diff(clock2, clock1)); } } }
//! calculate a partition range [begin,end) by taking the current Range //! splitting it into p parts and taking the i-th one. Range Partition(size_t i, size_t parts) const { assert(i < parts); return Range(CalculateBeginOfPart(i, parts), CalculateBeginOfPart(i + 1, parts)); }
void SubmatrixQueriesTest::multipleBenchmarksColQueries(size_t n, bench_time_t *naiveTime, bench_time_t *queryTime, bench_time_t *cascadingTime, bench_time_t *simpleCascadingTime) { bench_time_t clock1, clock2; for (size_t i = 0; i < n; i++) { size_t col = rand() % (_testMatrix->cols()); size_t r1,r2; r1 = rand() % (_testMatrix->rows()); r2 = rand() % (_testMatrix->rows()); Range r = Range(min(r1,r2),max(r1,r2)); clock1 = now(); _queryDS->columnTree()->cascadingMaxInRange(col, r); clock2 = now(); *cascadingTime = add(*cascadingTime,diff(clock2, clock1)); } for (size_t i = 0; i < n; i++) { size_t col = rand() % (_testMatrix->cols()); size_t r1,r2; r1 = rand() % (_testMatrix->rows()); r2 = rand() % (_testMatrix->rows()); Range r = Range(min(r1,r2),max(r1,r2)); clock1 = now(); _queryDS->columnTree()->simpleCascadingMaxInRange(col, r); clock2 = now(); *simpleCascadingTime = add(*simpleCascadingTime,diff(clock2, clock1)); } for (size_t i = 0; i < n; i++) { size_t col = rand() % (_testMatrix->cols()); size_t r1,r2; r1 = rand() % (_testMatrix->rows()); r2 = rand() % (_testMatrix->rows()); Range r = Range(min(r1,r2),max(r1,r2)); clock1 = now(); _queryDS->columnTree()->maxForRowInRange(col, r.min, r.max); clock2 = now(); *queryTime = add(*queryTime,diff(clock2, clock1)); } if(SubmatrixQueriesTest::benchmarkNaiveQueries){ for (size_t i = 0; i < n; i++) { size_t col = rand() % (_testMatrix->cols()); size_t r1,r2; r1 = rand() % (_testMatrix->rows()); r2 = rand() % (_testMatrix->rows()); Range r = Range(min(r1,r2),max(r1,r2)); clock1 = now(); SubmatrixQueriesTest::naiveMaximumInRow(_testMatrix, r, col); clock2 = now(); *naiveTime = add(*naiveTime,diff(clock2, clock1)); } } }
void my_init(vector<bool> keepcol){ Partial.extend(num_var_tape_ * 1); arg_mark_.resize(play_.op_arg_rec_.size()); for(size_t i=0;i<arg_mark_.size();i++)arg_mark_[i]=false; /* Run a reverse test-sweep to store pointers once */ tape_point tp; play_.reverse_start(tp.op, tp.op_arg, tp.op_index, tp.var_index); tp_.resize(tp.op_index+1); var2op_.resize(tp.var_index+1); op_mark_.resize(tp.op_index+1); for(size_t i=0;i<op_mark_.size();i++)op_mark_[i]=0; user_region_mark_.resize(tp.op_index+1); for(size_t i=0;i<user_region_mark_.size();i++)user_region_mark_[i]=0; tp_[tp.op_index]=tp; /* 1. We need to be able to find out, for a given variable, what operator created the variable. This is easiest done by looping through the _operators_ because for a given op we have access to all the resulting variables it creates. 2. We precompute the a vector of "tape_points" so that instead of calling "reverse_next", we simply get the next tape entry by tp_[i-1]. */ while(tp.op != BeginOp ){ /* tp.op_index is decremented by one in each iteration ... */ // printTP(tp); /* For debugging */ play_.reverse_next(tp.op, tp.op_arg, tp.op_index, tp.var_index); /* Csum is special case - see remarks in player.hpp and reverse_sweep.hpp */ if(tp.op == CSumOp)play_.reverse_csum(tp.op, tp.op_arg, tp.op_index, tp.var_index); for(size_t i=0;i<NumRes(tp.op);i++)var2op_[tp.var_index-i]=tp.op_index; tp_[tp.op_index]=tp; markArgs(tp); } /* Lookup table: is tape_point within a UserOp region? */ bool user_within=false; user_region_.resize(tp_.size()); for(size_t i=0;i<tp_.size();i++){ if(tp_[i].op==UserOp){ user_region_[i]=true; user_within=!user_within; } else { user_region_[i]=user_within; } } /* Lookup table: is tape_point a constant (=only fixed effect dependent) ? */ constant_tape_point_.resize(tp_.size()); int indep_var_number=0; for(size_t i=0;i<tp_.size();i++){ if(tp_[i].op==InvOp){ /* All independent variables are marked according to being random or fixed effect */ constant_tape_point_[i]=!keepcol[indep_var_number]; indep_var_number++; } else { /* Mark operator as constant if _all_ arguments are constant */ constant_tape_point_[i] = is_tape_point_constant(i); } //std::cout << constant_tape_point_[i] << " "; printTP(tp_[i]); } // std::cout << "Total: " << constant_tape_point_.size() << "\n"; // int sum=0; for(int i=0;i<constant_tape_point_.size();i++)sum+=constant_tape_point_[i]; // std::cout << "Constant:" << sum << "\n"; // Calculate pattern int m=Range(); colpattern.resize(m); for(int i=0;i<m;i++)my_pattern(i); for(size_t i=0;i<op_mark_.size();i++)op_mark_[i]=0; /* remember to reset marks */ for(size_t i=0;i<user_region_mark_.size();i++)user_region_mark_[i]=0; /* remember to reset marks */ }
Range getOverlapped(const Range& a, const Range& b){ if(a.first > b.first) return getOverlapped(b, a); if(b.first > a.second) return NULL_RANGE; return Range(b.first, (a.second > b.second)? b.second : a.second); }
void TaskManager :: CreateJob (const function<void(TaskInfo&)> & afunc, int antasks) { if (num_threads == 1 || !task_manager || func) { if (startup_function) (*startup_function)(); TaskInfo ti; ti.ntasks = antasks; ti.thread_nr = 0; ti.nthreads = 1; // ti.node_nr = 0; ti.nnodes = 1; for (ti.task_nr = 0; ti.task_nr < antasks; ti.task_nr++) afunc(ti); if (cleanup_function) (*cleanup_function)(); return; } trace->StartJob(jobnr, afunc.target_type()); func = &afunc; ntasks.store (antasks); // , memory_order_relaxed); ex = nullptr; nodedata[0]->start_cnt.store (0, memory_order_relaxed); jobnr++; for (int j = 0; j < num_nodes; j++) nodedata[j]->participate |= 1; if (startup_function) (*startup_function)(); int thd = 0; int thds = GetNumThreads(); int mynode = num_nodes * thd/thds; IntRange mytasks = Range(int(ntasks)).Split (mynode, num_nodes); NodeData & mynode_data = *(nodedata[mynode]); TaskInfo ti; ti.nthreads = thds; ti.thread_nr = thd; // ti.nnodes = num_nodes; // ti.node_nr = mynode; try { while (1) { int mytask = mynode_data.start_cnt++; if (mytask >= mytasks.Size()) break; ti.task_nr = mytasks.First()+mytask; ti.ntasks = ntasks; { RegionTracer t(ti.thread_nr, jobnr, RegionTracer::ID_JOB, ti.task_nr); (*func)(ti); } } } catch (Exception e) { { lock_guard<mutex> guard(copyex_mutex); delete ex; ex = new Exception (e); mynode_data.start_cnt = mytasks.Size(); } } if (cleanup_function) (*cleanup_function)(); for (int j = 0; j < num_nodes; j++) if (workers_on_node[j]) { while (complete[j] != jobnr) _mm_pause(); } func = nullptr; if (ex) throw Exception (*ex); trace->StopJob(); }
static void computeShapeByReshapeMask(const MatShape &srcShape, const MatShape &maskShape, Range srcRange /*= Range::all()*/, MatShape& dstShape) { int srcShapeSize = (int)srcShape.size(); int maskShapeSize = (int)maskShape.size(); if (srcRange == Range::all()) srcRange = Range(0, srcShapeSize); else { int sz = srcRange.size(); srcRange.start = clamp(srcRange.start, srcShapeSize); srcRange.end = srcRange.end == INT_MAX ? srcShapeSize : srcRange.start + sz; } bool explicitMask = !maskShape.empty(); // All mask values are positive. for (int i = 0, n = maskShape.size(); i < n && explicitMask; ++i) { explicitMask = maskShape[i] > 0; } // Working range of source shape is a range where area(src) == area(mask). if (explicitMask) { int maskTotal = total(maskShape); // Go from the end of mask until we collect required total. bool matched = false; for (int i = srcRange.end - 1; i >= srcRange.start; --i) { if (matched) { if (i == 0 || total(srcShape, i, srcRange.end) != maskTotal) { srcRange.start = i + 1; break; } } else { matched = total(srcShape, i, srcRange.end) == maskTotal; } } CV_Assert(total(srcShape, srcRange.start, srcRange.end) == maskTotal); } CV_Assert(0 <= srcRange.start && srcRange.start <= srcRange.end && srcRange.end <= srcShapeSize); int dstShapeSize = srcShapeSize - srcRange.size() + maskShapeSize; dstShape.resize(dstShapeSize); std::copy(srcShape.begin(), srcShape.begin() + srcRange.start, dstShape.begin()); std::copy(srcShape.begin() + srcRange.end, srcShape.begin() + srcShapeSize, dstShape.begin() + srcRange.start + maskShapeSize); int inferDim = -1; for (int i = 0; i < maskShapeSize; i++) { if (maskShape[i] > 0) { dstShape[srcRange.start + i] = maskShape[i]; } else if (maskShape[i] == 0) { if (srcRange.start + i >= srcShapeSize) CV_Error(Error::StsBadArg, format("Copy dim[%d] (which has zero size) is out of the source shape bounds", srcRange.start + i)); dstShape[srcRange.start + i] = srcShape[srcRange.start + i]; } else if (maskShape[i] == -1) { if (inferDim != -1) CV_Error(Error::StsAssert, "Duplicate of inferred dim (which is denoted by -1)"); inferDim = srcRange.start + i; dstShape[inferDim] = 1; } else CV_Error(Error::StsBadArg, "maskShape[i] >= -1"); } size_t srcTotal = total(srcShape); size_t dstTotal = total(dstShape); if (inferDim != -1) { if (srcTotal % dstTotal != 0) CV_Error(Error::StsBackTrace, "Can't infer a dim denoted by -1"); dstShape[inferDim] = (int)(srcTotal / dstTotal); } else { CV_Assert(srcTotal == dstTotal); } }
void TaskManager :: Loop(int thd) { /* static Timer tADD("add entry counter"); static Timer tCASready1("spin-CAS ready tick1"); static Timer tCASready2("spin-CAS ready tick2"); static Timer tCASyield("spin-CAS yield"); static Timer tCAS1("spin-CAS wait"); static Timer texit("exit zone"); static Timer tdec("decrement"); */ thread_id = thd; int thds = GetNumThreads(); int mynode = num_nodes * thd/thds; NodeData & mynode_data = *(nodedata[mynode]); TaskInfo ti; ti.nthreads = thds; ti.thread_nr = thd; // ti.nnodes = num_nodes; // ti.node_nr = mynode; #ifdef USE_NUMA numa_run_on_node (mynode); #endif active_workers++; workers_on_node[mynode]++; int jobdone = 0; #ifdef USE_MKL auto mkl_max = mkl_get_max_threads(); mkl_set_num_threads_local(1); #endif while (!done) { if (complete[mynode] > jobdone) jobdone = complete[mynode]; if (jobnr == jobdone) { // RegionTracer t(ti.thread_nr, tCASyield, ti.task_nr); if(sleep) this_thread::sleep_for(chrono::microseconds(sleep_usecs)); else { #ifdef WIN32 this_thread::yield(); #else // WIN32 sched_yield(); #endif // WIN32 } continue; } { // RegionTracer t(ti.thread_nr, tADD, ti.task_nr); // non-atomic fast check ... if ( (mynode_data.participate & 1) == 0) continue; int oldval = mynode_data.participate += 2; if ( (oldval & 1) == 0) { // job not active, going out again mynode_data.participate -= 2; continue; } } if (startup_function) (*startup_function)(); IntRange mytasks = Range(int(ntasks)).Split (mynode, num_nodes); try { while (1) { if (mynode_data.start_cnt >= mytasks.Size()) break; int mytask = mynode_data.start_cnt.fetch_add(1, memory_order_relaxed); if (mytask >= mytasks.Size()) break; ti.task_nr = mytasks.First()+mytask; ti.ntasks = ntasks; { RegionTracer t(ti.thread_nr, jobnr, RegionTracer::ID_JOB, ti.task_nr); (*func)(ti); } } } catch (Exception e) { { // cout << "got exception in TM" << endl; lock_guard<mutex> guard(copyex_mutex); delete ex; ex = new Exception (e); mynode_data.start_cnt = mytasks.Size(); } } #ifndef __MIC__ atomic_thread_fence (memory_order_release); #endif // __MIC__ if (cleanup_function) (*cleanup_function)(); jobdone = jobnr; mynode_data.participate-=2; { int oldpart = 1; if (mynode_data.participate.compare_exchange_strong (oldpart, 0)) { if (jobdone < jobnr.load()) { // reopen gate mynode_data.participate |= 1; } else { if (mynode != 0) mynode_data.start_cnt = 0; complete[mynode] = jobnr.load(); } } } } #ifdef USE_MKL mkl_set_num_threads_local(mkl_max); #endif workers_on_node[mynode]--; active_workers--; }
inline oclMat oclMat::row(int y) const { return oclMat(*this, Range(y, y + 1), Range::all()); }
// Preprocess() - build a segment tree for O(log n) queries void regProp2::compSeeds(void) { Datareg2& reg2 = (Datareg2&)data; int i, j; int xdim, ydim; float val[4]; Range* _prop_x, *prop_x; Range prop_y; Range propagated; Range c_prop; Range responsibility, c_respons; Range delay; Range y_comp; float min_x, min_y, max_x, max_y; float min_in, max_in, min4, max4; int nseed; xdim = reg2.dim[0]; ydim = reg2.dim[1]; _prop_x = new Range[ydim]; // proceed through the slices computing seeds nseed=0; // process the k'th slab for(i=0; i<xdim-1; i++) for(j=0; j<ydim-1; j++) { prop_x = &_prop_x[j]; // load the voxel data reg2.getCellValues(i, j, val); min_x = MIN2(val[0], val[3]); max_x = MAX2(val[0], val[3]); min_y = MIN2(val[0], val[1]); max_y = MAX2(val[0], val[1]); // set the incoming values if on a border if(i==0) { prop_x->Set(min_x, max_x); } if(j==0) { prop_y.Set(min_y, max_y); } // merge incoming information y_comp = prop_y.Complement(min_y, max_y); propagated = prop_y + ((*prop_x)-y_comp); // compute complement of incoming ranges min_in = MIN2(min_x, min_y); max_in = MAX2(max_x, max_y); c_prop.Set(min_in,max_in); c_prop -= propagated; // compute responsibility ranges min4 = MIN2(min_in, val[2]); max4 = MAX2(max_in, val[2]); responsibility.Set(min4, max4); responsibility-=c_prop; c_respons = responsibility.Complement(min4, max4); // determine range which can be delayed delay.MakeEmpty(); if(i < xdim-2) delay+=Range(MIN2(val[1], val[2]), MAX2(val[1], val[2])); if(j < ydim-2) delay+=Range(MIN2(val[2], val[3]), MAX2(val[2], val[3])); // test for propagation of entire responsibility range if(responsibility.Empty() || (!delay.Empty() && delay.MinAll() <= responsibility.MinAll() && delay.MaxAll() >= responsibility.MaxAll())) { // propagate first to the next x-slice if(i == xdim-2) { prop_x->MakeEmpty(); } else { prop_x->Set(MIN2(val[1], val[2]), MAX2(val[1], val[2])); *prop_x-=c_respons; } c_respons += *prop_x; // all remaining propagated in y-dir if(j == ydim-2) { prop_y.MakeEmpty(); } else { prop_y.Set(MIN2(val[2], val[3]), MAX2(val[2], val[3])); prop_y-= c_respons; } } else { // can't propagate all responsiblity, cell must be a seed seeds.AddSeed(reg2.index2cell(i,j), responsibility.MinAll(), responsibility.MaxAll()); nseed++; prop_y.MakeEmpty(); prop_x->MakeEmpty(); } } if(verbose) { printf("computed %d seeds\n", nseed); } }
inline oclMat oclMat::rowRange(int startrow, int endrow) const { return oclMat(*this, Range(startrow, endrow), Range::all()); }
int main(int argc, char **argv) { Range all = Range::all(); Range none = Range(); tassert(none == Range::none()); tassert(!(all == none)); tassert(all.min == -std::numeric_limits<double>::max()); tassert(all.max == std::numeric_limits<double>::max()); tassert(all.includes(-std::numeric_limits<double>::max())); tassert(all.includes(-1)); tassert(all.includes(0)); tassert(all.includes(1)); tassert(all.includes(std::numeric_limits<double>::max())); tassert(none.min == std::numeric_limits<double>::max()); tassert(none.max == -std::numeric_limits<double>::max()); tassert(!none.includes(-std::numeric_limits<double>::max())); tassert(!none.includes(-1)); tassert(!none.includes(0)); tassert(!none.includes(1)); tassert(!none.includes(std::numeric_limits<double>::max())); Range a = all; a.add(-100); tassert(a == Range::all()); Range b = none; b.add(-100); tassert(!(b == none)); tassert(b.min == -100); tassert(b.max == -100); tassert(!b.includes(-std::numeric_limits<double>::max())); tassert(!b.includes(-101)); tassert( b.includes(-100)); tassert(!b.includes(-99)); tassert(!b.includes(-1)); tassert(!b.includes(0)); tassert(!b.includes(1)); tassert(!b.includes(std::numeric_limits<double>::max())); Range c = b; c.add(1000); tassert(!(b == c)); tassert(c.min == -100); tassert(c.max == 1000); tassert(!c.includes(-std::numeric_limits<double>::max())); tassert(!c.includes(-101)); tassert( c.includes(-100)); tassert( c.includes(-99)); tassert( c.includes(-1)); tassert( c.includes(0)); tassert( c.includes(1)); tassert( c.includes(999)); tassert( c.includes(1000)); tassert(!c.includes(1001)); tassert(!c.includes(std::numeric_limits<double>::max())); c.add(none); tassert(c == Range(-100, 1000)); c.add(all); tassert(!(c == Range(-100, 1000))); tassert(c == Range::all()); tassert(!Range(-100,100).empty()); tassert(Range(100,-100).empty()); tassert(!Range(100,200).intersects(Range(0,99.999))); tassert(Range(100,200).intersects(Range(0,100))); tassert(Range(100,200).intersects(Range(50,150))); tassert(Range(100,200).intersects(Range(100,200))); tassert(Range(100,200).intersects(Range(150,250))); tassert(Range(100,200).intersects(Range(200,300))); tassert(!Range(100,200).intersects(Range(200.01,300.01))); tassert(Range(100,200).intersects(Range(150, 150))); tassert(Range(100,200).intersects(Range(150, 151))); tassert(!Range(100,200).intersects(Range(151, 150))); tassert(!Range::none().intersects(Range::all())); tassert(!Range::all().intersects(Range::none())); return 0; }
void fit2015( TString FileName ="/afs/cern.ch/user/a/anstahll/work/public/ExpressStream2015/ppData/OniaTree_262163_262328.root", int oniamode = 2, // oniamode-> 3: Z, 2: Upsilon and 1: J/Psi bool isData = true, // isData = false for MC, true for Data bool isPbPb = false, // isPbPb = false for pp, true for PbPb bool doFit = false , bool inExcStat = true // if inExcStat is true, then the excited states are fitted ) { InputOpt opt; SetOptions(&opt, isData, isPbPb, oniamode,inExcStat); if (isPbPb) { FileName = "/afs/cern.ch/user/a/anstahll/work/public/ExpressStream2015/PbPbData/OniaTree_262548_262893.root"; } else { FileName = "/afs/cern.ch/user/a/anstahll/work/public/ExpressStream2015/ppData/OniaTree_262163_262328.root"; } int nbins = 1; //ceil((opt.dMuon->M->Max - opt.dMuon->M->Min)/binw); if (oniamode==1){ nbins = 140; } else if (oniamode==2) { nbins = 70; } else if (oniamode==3) { nbins = 40; } RooWorkspace myws; TH1F* hDataOS = new TH1F("hDataOS","hDataOS", nbins, opt.dMuon.M.Min, opt.dMuon.M.Max); makeWorkspace2015(myws, FileName, opt, hDataOS); RooRealVar* mass = (RooRealVar*) myws.var("invariantMass"); RooDataSet* dataOS_fit = (RooDataSet*) myws.data("dataOS"); RooDataSet* dataSS_fit = (RooDataSet*) myws.data("dataSS"); RooAbsPdf* pdf = NULL; if (oniamode==3) { doFit=false; } if (doFit) { int sigModel=0, bkgModel=0; if (isData) { if (oniamode==1){ sigModel = inExcStat ? 2 : 3; bkgModel = 1; } else { sigModel = inExcStat ? 1 : 3; // gaussian bkgModel = 2; } } else { if (oniamode==1){ sigModel = inExcStat ? 2 : 3; // gaussian bkgModel = 2; } else { sigModel = inExcStat ? 2 : 3; // gaussian bkgModel = 3; } } if (opt.oniaMode==1) buildModelJpsi2015(myws, sigModel, bkgModel,inExcStat); else if (opt.oniaMode==2) buildModelUpsi2015(myws, sigModel, bkgModel,inExcStat); pdf =(RooAbsPdf*) myws.pdf("pdf"); RooFitResult* fitObject = pdf->fitTo(*dataOS_fit,Save(),Hesse(kTRUE),Extended(kTRUE)); // Fit } RooPlot* frame = mass->frame(Bins(nbins),Range(opt.dMuon.M.Min, opt.dMuon.M.Max)); RooPlot* frame2 = NULL; dataSS_fit->plotOn(frame, Name("dataSS_FIT"), MarkerColor(kRed), LineColor(kRed), MarkerSize(1.2)); dataOS_fit->plotOn(frame, Name("dataOS_FIT"), MarkerColor(kBlue), LineColor(kBlue), MarkerSize(1.2)); if (doFit) { pdf->plotOn(frame,Name("thePdf"),Normalization(dataOS_fit->sumEntries(),RooAbsReal::NumEvent)); RooHist *hpull = frame -> pullHist(0,0,true); hpull -> SetName("hpull"); frame2 = mass->frame(Title("Pull Distribution"),Bins(nbins),Range(opt.dMuon.M.Min,opt.dMuon.M.Max)); frame2 -> addPlotable(hpull,"PX"); } drawPlot(frame,frame2, pdf, opt, doFit,inExcStat); TString OutputFileName = ""; if (isPbPb) { FileName = "/afs/cern.ch/user/a/anstahll/work/public/ExpressStream2015/PbPbData/OniaTree_262548_262893.root"; opt.RunNb.Start=262548; opt.RunNb.End=262893; if (oniamode==1) {OutputFileName = (TString)("JPSIPbPbDataset.root");} if (oniamode==2) {OutputFileName = (TString)("YPbPbDataset.root");} if (oniamode==3) {OutputFileName = (TString)("ZPbPbDataset.root");} } else { FileName = "/afs/cern.ch/user/a/anstahll/work/public/ExpressStream2015/ppData/OniaTree_262163_262328.root"; opt.RunNb.Start=262163; opt.RunNb.End=262328; if (oniamode==1) {OutputFileName = (TString)("JPSIppDataset.root");} if (oniamode==2) {OutputFileName = (TString)("YppDataset.root");} if (oniamode==3) {OutputFileName = (TString)("ZppDataset.root");} } TFile* oFile = new TFile(OutputFileName,"RECREATE"); oFile->cd(); hDataOS->Write("hDataOS"); dataOS_fit->Write("dataOS_FIT"); oFile->Write(); oFile->Close(); }
int recoverPose( InputArray E, InputArray _points1, InputArray _points2, InputArray _cameraMatrix, OutputArray _R, OutputArray _t, InputOutputArray _mask) { Mat points1, points2, cameraMatrix; _points1.getMat().convertTo(points1, CV_64F); _points2.getMat().convertTo(points2, CV_64F); _cameraMatrix.getMat().convertTo(cameraMatrix, CV_64F); int npoints = points1.checkVector(2); CV_Assert( npoints >= 0 && points2.checkVector(2) == npoints && points1.type() == points2.type()); CV_Assert(cameraMatrix.rows == 3 && cameraMatrix.cols == 3 && cameraMatrix.channels() == 1); if (points1.channels() > 1) { points1 = points1.reshape(1, npoints); points2 = points2.reshape(1, npoints); } double fx = cameraMatrix.at<double>(0,0); double fy = cameraMatrix.at<double>(1,1); double cx = cameraMatrix.at<double>(0,2); double cy = cameraMatrix.at<double>(1,2); points1.col(0) = (points1.col(0) - cx) / fx; points2.col(0) = (points2.col(0) - cx) / fx; points1.col(1) = (points1.col(1) - cy) / fy; points2.col(1) = (points2.col(1) - cy) / fy; points1 = points1.t(); points2 = points2.t(); Mat R1, R2, t; decomposeEssentialMat(E, R1, R2, t); Mat P0 = Mat::eye(3, 4, R1.type()); Mat P1(3, 4, R1.type()), P2(3, 4, R1.type()), P3(3, 4, R1.type()), P4(3, 4, R1.type()); P1(Range::all(), Range(0, 3)) = R1 * 1.0; P1.col(3) = t * 1.0; P2(Range::all(), Range(0, 3)) = R2 * 1.0; P2.col(3) = t * 1.0; P3(Range::all(), Range(0, 3)) = R1 * 1.0; P3.col(3) = -t * 1.0; P4(Range::all(), Range(0, 3)) = R2 * 1.0; P4.col(3) = -t * 1.0; // Do the cheirality check. // Notice here a threshold dist is used to filter // out far away points (i.e. infinite points) since // there depth may vary between postive and negtive. double dist = 50.0; Mat Q; triangulatePoints(P0, P1, points1, points2, Q); Mat mask1 = Q.row(2).mul(Q.row(3)) > 0; Q.row(0) /= Q.row(3); Q.row(1) /= Q.row(3); Q.row(2) /= Q.row(3); Q.row(3) /= Q.row(3); mask1 = (Q.row(2) < dist) & mask1; Q = P1 * Q; mask1 = (Q.row(2) > 0) & mask1; mask1 = (Q.row(2) < dist) & mask1; triangulatePoints(P0, P2, points1, points2, Q); Mat mask2 = Q.row(2).mul(Q.row(3)) > 0; Q.row(0) /= Q.row(3); Q.row(1) /= Q.row(3); Q.row(2) /= Q.row(3); Q.row(3) /= Q.row(3); mask2 = (Q.row(2) < dist) & mask2; Q = P2 * Q; mask2 = (Q.row(2) > 0) & mask2; mask2 = (Q.row(2) < dist) & mask2; triangulatePoints(P0, P3, points1, points2, Q); Mat mask3 = Q.row(2).mul(Q.row(3)) > 0; Q.row(0) /= Q.row(3); Q.row(1) /= Q.row(3); Q.row(2) /= Q.row(3); Q.row(3) /= Q.row(3); mask3 = (Q.row(2) < dist) & mask3; Q = P3 * Q; mask3 = (Q.row(2) > 0) & mask3; mask3 = (Q.row(2) < dist) & mask3; triangulatePoints(P0, P4, points1, points2, Q); Mat mask4 = Q.row(2).mul(Q.row(3)) > 0; Q.row(0) /= Q.row(3); Q.row(1) /= Q.row(3); Q.row(2) /= Q.row(3); Q.row(3) /= Q.row(3); mask4 = (Q.row(2) < dist) & mask4; Q = P4 * Q; mask4 = (Q.row(2) > 0) & mask4; mask4 = (Q.row(2) < dist) & mask4; mask1 = mask1.t(); mask2 = mask2.t(); mask3 = mask3.t(); mask4 = mask4.t(); // If _mask is given, then use it to filter outliers. if (!_mask.empty()) { Mat mask = _mask.getMat(); CV_Assert(mask.size() == mask1.size()); bitwise_and(mask, mask1, mask1); bitwise_and(mask, mask2, mask2); bitwise_and(mask, mask3, mask3); bitwise_and(mask, mask4, mask4); } if (_mask.empty() && _mask.needed()) { _mask.create(mask1.size(), CV_8U); } CV_Assert(_R.needed() && _t.needed()); _R.create(3, 3, R1.type()); _t.create(3, 1, t.type()); int good1 = countNonZero(mask1); int good2 = countNonZero(mask2); int good3 = countNonZero(mask3); int good4 = countNonZero(mask4); if (good1 >= good2 && good1 >= good3 && good1 >= good4) { R1.copyTo(_R); t.copyTo(_t); if (_mask.needed()) mask1.copyTo(_mask); return good1; } else if (good2 >= good1 && good2 >= good3 && good2 >= good4) { R2.copyTo(_R); t.copyTo(_t); if (_mask.needed()) mask2.copyTo(_mask); return good2; } else if (good3 >= good1 && good3 >= good2 && good3 >= good4) { t = -t; R1.copyTo(_R); t.copyTo(_t); if (_mask.needed()) mask3.copyTo(_mask); return good3; } else { t = -t; R2.copyTo(_R); t.copyTo(_t); if (_mask.needed()) mask4.copyTo(_mask); return good4; } }
void compute(InputArray leftarr, InputArray rightarr, OutputArray disparr) { int dtype = disparr.fixedType() ? disparr.type() : params.dispType; Size leftsize = leftarr.size(); if (leftarr.size() != rightarr.size()) CV_Error(Error::StsUnmatchedSizes, "All the images must have the same size"); if (leftarr.type() != CV_8UC1 || rightarr.type() != CV_8UC1) CV_Error(Error::StsUnsupportedFormat, "Both input images must have CV_8UC1"); if (dtype != CV_16SC1 && dtype != CV_32FC1) CV_Error(Error::StsUnsupportedFormat, "Disparity image must have CV_16SC1 or CV_32FC1 format"); if (params.preFilterType != PREFILTER_NORMALIZED_RESPONSE && params.preFilterType != PREFILTER_XSOBEL) CV_Error(Error::StsOutOfRange, "preFilterType must be = CV_STEREO_BM_NORMALIZED_RESPONSE"); if (params.preFilterSize < 5 || params.preFilterSize > 255 || params.preFilterSize % 2 == 0) CV_Error(Error::StsOutOfRange, "preFilterSize must be odd and be within 5..255"); if (params.preFilterCap < 1 || params.preFilterCap > 63) CV_Error(Error::StsOutOfRange, "preFilterCap must be within 1..63"); if (params.kernelSize < 5 || params.kernelSize > 255 || params.kernelSize % 2 == 0 || params.kernelSize >= std::min(leftsize.width, leftsize.height)) CV_Error(Error::StsOutOfRange, "kernelSize must be odd, be within 5..255 and be not larger than image width or height"); if (params.numDisparities <= 0 || params.numDisparities % 16 != 0) CV_Error(Error::StsOutOfRange, "numDisparities must be positive and divisble by 16"); if (params.textureThreshold < 0) CV_Error(Error::StsOutOfRange, "texture threshold must be non-negative"); if (params.uniquenessRatio < 0) CV_Error(Error::StsOutOfRange, "uniqueness ratio must be non-negative"); int FILTERED = (params.minDisparity - 1) << DISPARITY_SHIFT; Mat left0 = leftarr.getMat(), right0 = rightarr.getMat(); Mat disp0 = disparr.getMat(); int width = left0.cols; int height = left0.rows; if(previous_size != width * height) { previous_size = width * height; speckleX.create(height,width,CV_32SC4); speckleY.create(height,width,CV_32SC4); puss.create(height,width,CV_32SC4); censusImage[0].create(left0.rows,left0.cols,CV_32SC4); censusImage[1].create(left0.rows,left0.cols,CV_32SC4); partialSumsLR.create(left0.rows + 1,(left0.cols + 1) * (params.numDisparities + 1),CV_16S); agregatedHammingLRCost.create(left0.rows + 1,(left0.cols + 1) * (params.numDisparities + 1),CV_16S); hammingDistance.create(left0.rows, left0.cols * (params.numDisparities + 1),CV_16S); preFilteredImg0.create(left0.size(), CV_8U); preFilteredImg1.create(left0.size(), CV_8U); aux.create(height,width,CV_8UC1); } Mat left = preFilteredImg0, right = preFilteredImg1; int ndisp = params.numDisparities; int wsz = params.kernelSize; int bufSize0 = (int)((ndisp + 2)*sizeof(int)); bufSize0 += (int)((height + wsz + 2)*ndisp*sizeof(int)); bufSize0 += (int)((height + wsz + 2)*sizeof(int)); bufSize0 += (int)((height + wsz + 2)*ndisp*(wsz + 2)*sizeof(uchar) + 256); int bufSize1 = (int)((width + params.preFilterSize + 2) * sizeof(int) + 256); if(params.usePrefilter == true) { uchar *_buf = slidingSumBuf.ptr(); parallel_for_(Range(0, 2), PrefilterInvoker(left0, right0, left, right, _buf, _buf + bufSize1, ¶ms), 1); } else if(params.usePrefilter == false) { left = left0; right = right0; } if(params.kernelType == CV_SPARSE_CENSUS) { censusTransform(left,right,params.kernelSize,censusImage[0],censusImage[1],CV_SPARSE_CENSUS); } else if(params.kernelType == CV_DENSE_CENSUS) { censusTransform(left,right,params.kernelSize,censusImage[0],censusImage[1],CV_SPARSE_CENSUS); } else if(params.kernelType == CV_CS_CENSUS) { symetricCensusTransform(left,right,params.kernelSize,censusImage[0],censusImage[1],CV_CS_CENSUS); } else if(params.kernelType == CV_MODIFIED_CS_CENSUS) { symetricCensusTransform(left,right,params.kernelSize,censusImage[0],censusImage[1],CV_MODIFIED_CS_CENSUS); } else if(params.kernelType == CV_MODIFIED_CENSUS_TRANSFORM) { modifiedCensusTransform(left,right,params.kernelSize,censusImage[0],censusImage[1],CV_MODIFIED_CENSUS_TRANSFORM,0); } else if(params.kernelType == CV_MEAN_VARIATION) { parSumsIntensityImage[0].create(left0.rows, left0.cols,CV_32SC4); parSumsIntensityImage[1].create(left0.rows, left0.cols,CV_32SC4); Integral[0].create(left0.rows,left0.cols,CV_32SC4); Integral[1].create(left0.rows,left0.cols,CV_32SC4); integral(left, parSumsIntensityImage[0],CV_32S); integral(right, parSumsIntensityImage[1],CV_32S); imageMeanKernelSize(parSumsIntensityImage[0], params.kernelSize,Integral[0]); imageMeanKernelSize(parSumsIntensityImage[1], params.kernelSize, Integral[1]); modifiedCensusTransform(left,right,params.kernelSize,censusImage[0],censusImage[1],CV_MEAN_VARIATION,0,Integral[0], Integral[1]); } else if(params.kernelType == CV_STAR_KERNEL) { starCensusTransform(left,right,params.kernelSize,censusImage[0],censusImage[1]); } hammingDistanceBlockMatching(censusImage[0], censusImage[1], hammingDistance); costGathering(hammingDistance, partialSumsLR); blockAgregation(partialSumsLR, params.agregationWindowSize, agregatedHammingLRCost); dispartyMapFormation(agregatedHammingLRCost, disp0, 3); Median1x9Filter<uint8_t>(disp0, aux); Median9x1Filter<uint8_t>(aux,disp0); if(params.regionRemoval == CV_SPECKLE_REMOVAL_AVG_ALGORITHM) { smallRegionRemoval<uint8_t>(disp0,params.speckleWindowSize,disp0); } else if(params.regionRemoval == CV_SPECKLE_REMOVAL_ALGORITHM) { if (params.speckleRange >= 0 && params.speckleWindowSize > 0) filterSpeckles(disp0, FILTERED, params.speckleWindowSize, params.speckleRange, slidingSumBuf); } }
void NDG2D::PoissonIPDGbc2D( CSd& spOP //[out] sparse operator ) { // function [OP] = PoissonIPDGbc2D() // Purpose: Set up the discrete Poisson matrix directly // using LDG. The operator is set up in the weak form // build DG derivative matrices int max_OP = (K*Np*Np*(1+Nfaces)); //initialize parameters DVec faceR("faceR"), faceS("faceS"); IVec Fm("Fm"), Fm1("Fm1"), fidM("fidM"); DMat V1D("V1D"); int i=0; // build local face matrices DMat massEdge[4]; // = zeros(Np,Np,Nfaces); for (i=1; i<=Nfaces; ++i) { massEdge[i].resize(Np,Np); } // face mass matrix 1 Fm = Fmask(All,1); faceR = r(Fm); V1D = Vandermonde1D(N, faceR); massEdge[1](Fm,Fm) = inv(V1D*trans(V1D)); // face mass matrix 2 Fm = Fmask(All,2); faceR = r(Fm); V1D = Vandermonde1D(N, faceR); massEdge[2](Fm,Fm) = inv(V1D*trans(V1D)); // face mass matrix 3 Fm = Fmask(All,3); faceS = s(Fm); V1D = Vandermonde1D(N, faceS); massEdge[3](Fm,Fm) = inv(V1D*trans(V1D)); //continue initialize parameters DMat Dx("Dx"),Dy("Dy"), Dn1("Dn1"), mmE_Fm1("mmE(:,Fm1)"); double lnx=0.0,lny=0.0,lsJ=0.0,hinv=0.0,gtau=0.0; int k1=0,f1=0,id=0; IVec i1_Nfp = Range(1,Nfp); double N1N1 = double((N+1)*(N+1)); // "OP" triplets (i,j,x), extracted to {Ai,Aj,Ax} IVec OPi(max_OP),OPj(max_OP), Ai,Aj; DVec OPx(max_OP), Ax; IMat rows1, cols1; Index1D entries; DMat OP11(Np,Nfp, 0.0); // global node numbering entries.reset(1,Np*Nfp); cols1 = outer(Ones(Np), Range(1,Nfp)); umMSG(1, "\n ==> {OP} assembly [bc]: "); for (k1=1; k1<=K; ++k1) { if (! (k1%100)) { umMSG(1, "%d, ",k1); } rows1 = outer(Range((k1-1)*Np+1,k1*Np), Ones(Nfp)); // Build element-to-element parts of operator for (f1=1; f1<=Nfaces; ++f1) { if (BCType(k1,f1)) { ////////////////////////added by Kevin /////////////////////////////// Fm1 = Fmask(All,f1); fidM = (k1-1)*Nfp*Nfaces + (f1-1)*Nfp + i1_Nfp; id = 1+(f1-1)*Nfp + (k1-1)*Nfp*Nfaces; lnx = nx(id); lny = ny(id); lsJ = sJ(id); hinv = Fscale(id); Dx = rx(1,k1)*Dr + sx(1,k1)*Ds; Dy = ry(1,k1)*Dr + sy(1,k1)*Ds; Dn1 = lnx*Dx + lny*Dy; //mmE = lsJ*massEdge(:,:,f1); //bc(All,k1) += (gtau*mmE(All,Fm1) - Dn1'*mmE(All,Fm1))*ubc(fidM); mmE_Fm1 = massEdge[f1](All,Fm1); mmE_Fm1 *= lsJ; gtau = 10*N1N1*hinv; // set penalty scaling //bc(All,k1) += (gtau*mmE_Fm1 - trans(Dn1)*mmE_Fm1) * ubc(fidM); switch(BCType(k1,f1)){ case BC_Dirichlet: OP11 = gtau*mmE_Fm1 - trans(Dn1)*mmE_Fm1; break; case BC_Neuman: OP11 = mmE_Fm1; break; default: std::cout<<"warning: boundary condition is incorrect"<<std::endl; } OPi(entries)=rows1; OPj(entries)=cols1; OPx(entries)=OP11; entries += (Np*Nfp); } cols1 += Nfp; } } umMSG(1, "\n ==> {OPbc} to sparse\n"); entries.reset(1, entries.hi()-(Np*Nfp)); // extract triplets from large buffers Ai=OPi(entries); Aj=OPj(entries); Ax=OPx(entries); // These arrays can be HUGE, so force deallocation OPi.Free(); OPj.Free(); OPx.Free(); // return 0-based sparse result Ai -= 1; Aj -= 1; //------------------------------------------------------- // This operator is not symmetric, and will NOT be // factorised, only used to create reference RHS's: // // refrhsbcPR = spOP1 * bcPR; // refrhsbcUx = spOP2 * bcUx; // refrhsbcUy = spOP2 * bcUy; // // Load ALL elements (both upper and lower triangles): //------------------------------------------------------- spOP.load(Np*K, Nfp*Nfaces*K, Ai,Aj,Ax, sp_All,false, 1e-15,true); Ai.Free(); Aj.Free(); Ax.Free(); umMSG(1, " ==> {OPbc} ready.\n"); #if (1) // check on original estimates for nnx umMSG(1, " ==> max_OP: %12d\n", max_OP); umMSG(1, " ==> nnz_OP: %12d\n", entries.hi()); #endif }