PetscErrorCode restrictMatVecType2(Mat R, Vec f, Vec c) { TransferOpData *data; PetscFunctionBegin; iC(MatShellGetContext( R, (void **)&data)); MPI_Comm comm = data->comm; Vec tmp = data->tmp; PetscInt tmpSz; PetscInt fSz; iC(VecGetLocalSize(tmp,&tmpSz)); iC(VecGetLocalSize(f,&fSz)); scatterValues(f, tmp, fSz, tmpSz, data->sendSzR, data->sendOffR, data->recvSzR, data->recvOffR, comm); restrictMatVecType1(R, tmp, c); PetscFunctionReturn(0); }
//--------------------------------------------- double CpxCrvletPrtd::globalenergy() { double lclsum = 0; vector< vector<int> >& c = _nx; for(int s=0; s<c.size(); s++) for(int w=0; w<c[s].size(); w++) if(_owners[s][w]==mpirank()) lclsum += energy(_blocks[s][w]); double glbsum = 0; iC( MPI_Reduce((void*)(&lclsum), (void*)(&glbsum), 1, MPI_DOUBLE, MPI_SUM, 0, PETSC_COMM_WORLD) ); return glbsum; }
// ---------------------------------------------------------------------- int Dense3d::evaluate(const DblNumVec& srcDen, DblNumVec& trgVal) { //----------------------------------- iA(srcDen.m()==srcDOF()*(*_srcPos).n()); iA(trgVal.m()==trgDOF()*(*_trgPos).n()); int dim = this->dim(); int srcDOF = this->srcDOF(); int trgDOF = this->trgDOF(); /* Number of sources */ int numSrc = (*_srcPos).n(); /* Number of targets */ int numTrg = (*_trgPos).n(); DblNumMat inter(trgDOF, numSrc*srcDOF); for(int i=0; i<numTrg; i++) { DblNumMat onePosMat(dim, 1, false, (*_trgPos).clmdata(i)); DblNumVec oneValVec(trgDOF, false,*i); iC( _knl.kernel((*_srcPos), (*_srcNor), onePosMat, inter) ); iC( dgemv(1.0, inter, srcDen, 0.0, oneValVec) ); } return 0; }
// -------------------------------------------------------- LoadReadoutMenu::LoadReadoutMenu( const std::string& aId, swatch::core::ActionableObject& aActionable ) : swatch::core::Command(aId, aActionable, xdata::UnsignedInteger()) { ::mp7::MP7Controller& lDriver = getActionable<MP7Processor>().driver(); const ::mp7::ReadoutCtrlNode& rc = lDriver.getReadout().getNode< ::mp7::ReadoutCtrlNode>("readout_control"); mBanks = rc.readNumBanks(); mModes = rc.readNumModes(); mCaptures = rc.readNumCaptures(); std::string bankStr, modeStr, capStr; for( uint32_t iB(0); iB < mBanks; ++iB ) { bankStr = "bank"+boost::lexical_cast<std::string>(iB)+":"; registerParameter(bankStr+"wordsPerBx", xdata::UnsignedInteger()); } for( uint32_t iM(0); iM < mModes; ++iM ) { modeStr = "mode"+boost::lexical_cast<std::string>(iM)+":"; registerParameter(modeStr+"eventSize", xdata::UnsignedInteger()); registerParameter(modeStr+"eventToTrigger", xdata::UnsignedInteger()); registerParameter(modeStr+"eventType", xdata::UnsignedInteger()); registerParameter(modeStr+"tokenDelay", xdata::UnsignedInteger()); for( uint32_t iC(0); iC < mCaptures; ++iC ) { capStr = modeStr+"capture"+boost::lexical_cast<std::string>(iC)+":"; registerParameter(capStr+"enable", xdata::Boolean()); registerParameter(capStr+"id", xdata::UnsignedInteger()); registerParameter(capStr+"bankId", xdata::UnsignedInteger()); registerParameter(capStr+"length", xdata::UnsignedInteger()); registerParameter(capStr+"delay", xdata::UnsignedInteger()); registerParameter(capStr+"readoutLength", xdata::UnsignedInteger()); } } }
PetscErrorCode restrictMatVecType1(Mat R, Vec f, Vec c) { PROF_MG_RESTRICT_BEGIN TransferOpData *data; iC(MatShellGetContext( R, (void **)&data)); unsigned int dof = data->dof; //Overlap 75% of the independent computation with the first communication and //25% with the second communication. In the first communication, we exchange // fine grid ghosts. In the second, we exchange coarse grid ghosts (1/4 of //fine grid, assuming uniform refinement). So, the //first comm. is more expensive. unsigned int fop = 75; unsigned char* suppressedDOFc = data->suppressedDOFc; unsigned char* suppressedDOFf = data->suppressedDOFf; ot::DA * dac = data->dac; ot::DA * daf = data->daf; PetscInt cSz; iC(VecGetLocalSize(c,&cSz)); unsigned int fopCnt = (fop*cSz)/(100*dof); std::vector<ot::FineTouchedStatus >* fineTouchedFlags = data->fineTouchedFlags; ot::FineTouchedStatus* fineTouchedFlagsArr; PetscScalar *farr = NULL; PetscScalar *carr = NULL; daf->vecGetBuffer(f, farr, false, false, true, dof);//Read-only daf->vecGetBuffer<ot::FineTouchedStatus >(*fineTouchedFlags, fineTouchedFlagsArr, false, false, true, 1);//read-only if(daf->iAmActive()) { daf->ReadFromGhostsBegin<PetscScalar>(farr, dof); //This communication can be avoided if we store it daf->ReadFromGhostsBegin<ot::FineTouchedStatus>(fineTouchedFlagsArr, 1); } VecZeroEntries(c); dac->vecGetBuffer(c, carr, false, false, false, dof);//Writable if(dac->iAmActive()) { //Note: If Coarse is Independent, then the corresponding Fine is also independent. //Hence, overlapping comm with comp is possible. //Order of the test condition is important. We want to store the info before checking loopCtr. unsigned int loopCtr = 0; if(suppressedDOFc || suppressedDOFf) { for(dac->init<ot::DA_FLAGS::INDEPENDENT>(), daf->init<ot::DA_FLAGS::WRITABLE>(); ( (daf->currWithInfo() == daf->currWithInfo()) && (dac->currWithInfo() < dac->end<ot::DA_FLAGS::INDEPENDENT>()) && (loopCtr < fopCnt) ); dac->next<ot::DA_FLAGS::INDEPENDENT>(), loopCtr++) { INTERGRID_TRANSFER_LOOP_BLOCK(ITLB_SET_VALUE_SUPPRESSED_DOFS); }//end Independent loop (overlapping with read from fine ghosts) } else { for(dac->init<ot::DA_FLAGS::INDEPENDENT>(), daf->init<ot::DA_FLAGS::WRITABLE>(); ( (daf->currWithInfo() == daf->currWithInfo()) && (dac->currWithInfo() < dac->end<ot::DA_FLAGS::INDEPENDENT>()) && (loopCtr < fopCnt) ); dac->next<ot::DA_FLAGS::INDEPENDENT>(), loopCtr++) { INTERGRID_TRANSFER_LOOP_BLOCK(ITLB_SET_VALUE_NO_SUPPRESSED_DOFS); }//end Independent loop (overlapping with read from fine ghosts) } } if(daf->iAmActive()) { daf->ReadFromGhostsEnd<PetscScalar>(farr); daf->ReadFromGhostsEnd<ot::FineTouchedStatus>(fineTouchedFlagsArr); } if(dac->iAmActive()) { if(suppressedDOFc || suppressedDOFf) { for(dac->init<ot::DA_FLAGS::W_DEPENDENT>(), daf->init<ot::DA_FLAGS::WRITABLE>(); dac->curr() < dac->end<ot::DA_FLAGS::W_DEPENDENT>(); dac->next<ot::DA_FLAGS::W_DEPENDENT>()) { INTERGRID_TRANSFER_LOOP_BLOCK(ITLB_SET_VALUE_SUPPRESSED_DOFS); }//end dependent loop } else { for(dac->init<ot::DA_FLAGS::W_DEPENDENT>(), daf->init<ot::DA_FLAGS::WRITABLE>(); dac->curr() < dac->end<ot::DA_FLAGS::W_DEPENDENT>(); dac->next<ot::DA_FLAGS::W_DEPENDENT>()) { INTERGRID_TRANSFER_LOOP_BLOCK(ITLB_SET_VALUE_NO_SUPPRESSED_DOFS); }//end dependent loop } } if(dac->iAmActive()) { dac->WriteToGhostsBegin<PetscScalar>(carr, dof); } if(dac->iAmActive()) { //Continue Independent loop from where we left off. if(suppressedDOFc || suppressedDOFf) { for(dac->init<ot::DA_FLAGS::FROM_STORED>(), daf->init<ot::DA_FLAGS::FROM_STORED>(); dac->curr() < dac->end<ot::DA_FLAGS::INDEPENDENT>(); dac->next<ot::DA_FLAGS::INDEPENDENT>()) { INTERGRID_TRANSFER_LOOP_BLOCK(ITLB_SET_VALUE_SUPPRESSED_DOFS); }//end Independent loop (overlapping with write to coarse ghosts) } else { for(dac->init<ot::DA_FLAGS::FROM_STORED>(), daf->init<ot::DA_FLAGS::FROM_STORED>(); dac->curr() < dac->end<ot::DA_FLAGS::INDEPENDENT>(); dac->next<ot::DA_FLAGS::INDEPENDENT>()) { INTERGRID_TRANSFER_LOOP_BLOCK(ITLB_SET_VALUE_NO_SUPPRESSED_DOFS); }//end Independent loop (overlapping with write to coarse ghosts) } } if(dac->iAmActive()) { dac->WriteToGhostsEnd<PetscScalar>(carr, dof); } daf->vecRestoreBuffer(f, farr, false, false, true, dof);//Read-only dac->vecRestoreBuffer(c, carr, false, false, false, dof);//Writable daf->vecRestoreBuffer<ot::FineTouchedStatus >(*fineTouchedFlags, fineTouchedFlagsArr, false, false, true, 1);//read-only #ifdef PETSC_USE_LOG PetscLogFlops(128*dof*(daf->getElementSize())); #endif PROF_MG_RESTRICT_END }//restrict-3
int main(int argc, char **argv) { PetscInitialize(&argc, &argv, "elas.opt", help); int rank; MPI_Comm_rank(MPI_COMM_WORLD, &rank); int Ns = 32; unsigned int dof = 3; char problemName[PETSC_MAX_PATH_LEN]; char filename[PETSC_MAX_PATH_LEN]; double t0 = 0.0; double dt = 0.1; double t1 = 1.0; double beta = 0.000001; double gamma = 0.0; // percent of noise added ... // double dtratio = 1.0; DA da; // Underlying scalar DA - for scalar properties DA da3d; // Underlying vector DA - for vector properties Vec rho; // density - elemental scalar Vec lambda; // Lame parameter - lambda - elemental scalar Vec mu; // Lame parameter - mu - elemental scalar Vec fibers; // Fiber orientations - nodal vector (3-dof) Vec fibersElemental; // for IO. will be destroyed. - elemental vector (3-dof) std::vector<Vec> tau; // the scalar activation - nodal scalar // Initial conditions Vec initialDisplacement; Vec initialVelocity; timeInfo ti; // get Ns CHKERRQ ( PetscOptionsGetInt(0,"-Ns",&Ns,0) ); CHKERRQ ( PetscOptionsGetScalar(0,"-t0",&t0,0) ); CHKERRQ ( PetscOptionsGetScalar(0,"-t1",&t1,0) ); CHKERRQ ( PetscOptionsGetScalar(0,"-dt",&dt,0) ); CHKERRQ ( PetscOptionsGetScalar(0,"-beta",&beta,0) ); CHKERRQ ( PetscOptionsGetString(PETSC_NULL,"-pn",problemName,PETSC_MAX_PATH_LEN-1,PETSC_NULL)); if (!rank) { std::cout << "Problem size is " << Ns+1 << " spatially and NT = " << (int)ceil(1.0/dt) << std::endl; } // Time info for timestepping ti.start = t0; ti.stop = t1; ti.step = dt; // create DA CHKERRQ ( DACreate3d ( PETSC_COMM_WORLD, DA_NONPERIODIC, DA_STENCIL_BOX, Ns+1, Ns+1, Ns+1, PETSC_DECIDE, PETSC_DECIDE, PETSC_DECIDE, 1, 1, 0, 0, 0, &da) ); CHKERRQ ( DACreate3d ( PETSC_COMM_WORLD, DA_NONPERIODIC, DA_STENCIL_BOX, Ns+1, Ns+1, Ns+1, PETSC_DECIDE, PETSC_DECIDE, PETSC_DECIDE, dof, 1, 0, 0, 0, &da3d) ); elasMass *Mass = new elasMass(feMat::PETSC); // Mass Matrix elasStiffness *Stiffness = new elasStiffness(feMat::PETSC); // Stiffness matrix raleighDamping *Damping = new raleighDamping(feMat::PETSC); // Damping Matrix cardiacDynamic *Force = new cardiacDynamic(feVec::PETSC); // Force Vector // create vectors CHKERRQ( DACreateGlobalVector(da, &rho) ); CHKERRQ( DACreateGlobalVector(da, &mu) ); CHKERRQ( DACreateGlobalVector(da, &lambda) ); CHKERRQ( DACreateGlobalVector(da3d, &initialDisplacement) ); CHKERRQ( DACreateGlobalVector(da3d, &initialVelocity) ); // Set initial conditions CHKERRQ( VecSet ( initialDisplacement, 0.0) ); CHKERRQ( VecSet ( initialVelocity, 0.0) ); VecZeroEntries( mu ); VecZeroEntries( lambda ); VecZeroEntries( rho ); int x, y, z, m, n, p; int mx,my,mz, xne, yne, zne; CHKERRQ( DAGetCorners(da, &x, &y, &z, &m, &n, &p) ); CHKERRQ( DAGetInfo(da,0, &mx, &my, &mz, 0,0,0,0,0,0,0) ); if (x+m == mx) { xne=m-1; } else { xne=m; } if (y+n == my) { yne=n-1; } else { yne=n; } if (z+p == mz) { zne=p-1; } else { zne=p; } double acx,acy,acz; double hx = 1.0/((double)Ns); // SET MATERIAL PROPERTIES ... // @todo - Write routines to read/write in Parallel // allocate for temporary buffers ... unsigned int elemSize = Ns*Ns*Ns; // std::cout << "Elem size is " << elemSize << std::endl; unsigned int nodeSize = (Ns+1)*(Ns+1)*(Ns+1); unsigned char *tmp_mat = new unsigned char[elemSize]; double *tmp_tau = new double[dof*elemSize]; // generate filenames & read in the raw arrays first ... std::ifstream fin; sprintf(filename, "%s.%d.img", problemName, Ns);, std::ios::binary); *)tmp_mat, elemSize); fin.close(); // Set Elemental material properties PetscScalar ***muArray, ***lambdaArray, ***rhoArray; CHKERRQ(DAVecGetArray(da, mu, &muArray)); CHKERRQ(DAVecGetArray(da, lambda, &lambdaArray)); CHKERRQ(DAVecGetArray(da, rho, &rhoArray)); // assign material properties ... // myo, tissue // nu = 0.49, 0.45 // E = 10000, 1000 // rho = 1.0, 0.1 // std::cout << "Setting Elemental properties." << std::endl; // loop through all elements ... for (int k=z; k<z+zne; k++) { for (int j=y; j<y+yne; j++) { for (int i=x; i<x+xne; i++) { int indx = k*Ns*Ns + j*Ns + i; if ( tmp_mat[indx] ) { muArray[k][j][i] = 344.82; //3355.7; lambdaArray[k][j][i] = 3103.448;// 164429.53; rhoArray[k][j][i] = 1.0; } else { muArray[k][j][i] = 344.82; lambdaArray[k][j][i] = 3103.448; rhoArray[k][j][i] = 1.0; } } // end i } // end j } // end k // std::cout << "Finished Elemental loop." << std::endl; CHKERRQ( DAVecRestoreArray ( da, mu, &muArray ) ); CHKERRQ( DAVecRestoreArray ( da, lambda, &lambdaArray ) ); CHKERRQ( DAVecRestoreArray ( da, rho, &rhoArray ) ); // std::cout << "Finished restoring arrays" << std::endl; // delete temporary buffers delete [] tmp_mat; // Now set the activation ... unsigned int numSteps = (unsigned int)(ceil(( ti.stop - ti.start)/ti.step)); // tau = (Vec *) new char*[numSteps+1]; // std::cout << "Numsteps is " << numSteps << std::endl; Vec tauVec, tmpTau; CHKERRQ( DACreateGlobalVector(da3d, &tmpTau) ); #ifdef __DEBUG__ if (!rank) { std::cout << x << ", " << y << ", " << z << " + " << xne << ", " << yne << ", " << zne << std::endl; } #endif PetscScalar ***tauArray; double tauNorm; for (unsigned int t=0; t<numSteps+1; t++) { CHKERRQ( DACreateGlobalVector(da3d, &tauVec) ); CHKERRQ( VecSet( tmpTau, 0.0)); CHKERRQ(DAVecGetArray(da3d, tmpTau, &tauArray)); // std::cout << "Setting force vectors" << std::endl; sprintf(filename, "%s.%d.%.3d.fld", problemName, Ns, t); // std::cout << "Reading force file " << filename << std::endl;; *)tmp_tau, dof*elemSize*sizeof(double)); fin.close(); for (int k = z; k < z + zne ; k++) { for (int j = y; j < y + yne; j++) { for (int i = x; i < x + xne; i++) { int indx = dof*(k*Ns*Ns + j*Ns + i); tauArray[k][j][dof*i] = tmp_tau[indx]; tauArray[k][j][dof*i+1] = tmp_tau[indx+1]; tauArray[k][j][dof*i+2] = tmp_tau[indx+2]; } } } // std::cout << CYN"\tFinished elemental loop"NRM << std::endl; CHKERRQ( DAVecRestoreArray ( da3d, tmpTau, &tauArray ) ); // std::cout << "Converting to Nodal Vector" << std::endl; // VecNorm(tmpTau, NORM_2, &tauNorm); // tauNorm = tauNorm/pow(Ns,1.5); // std::cout << "Elemental Norm is " << tauNorm << std::endl; // std::cout << rank << " Converting to Nodal" << std::endl; elementToNode(da3d, tmpTau, tauVec); /* VecNorm(tauVec, NORM_2, &tauNorm); tauNorm = tauNorm/pow(Ns,1.5); std::cout << "Nodal Norm is " << tauNorm << std::endl; */ // std::cout << rank << " Done converting to Nodal Vector" << std::endl; tau.push_back(tauVec); } //if (!rank) { // std::cout << "Finished setting forces" << std::endl; // } // CHKERRQ( VecDestroy( tmpTau ) ); delete [] tmp_tau; // DONE - SET MATERIAL PROPERTIES ... // Setup Matrices and Force Vector ... Mass->setProblemDimensions(1.0, 1.0, 1.0); Mass->setDA(da3d); Mass->setDof(dof); Mass->setDensity(rho); Stiffness->setProblemDimensions(1.0, 1.0, 1.0); Stiffness->setDA(da3d); Stiffness->setDof(dof); Stiffness->setLame(lambda, mu); Damping->setAlpha(0.0); Damping->setBeta(0.00075); Damping->setMassMatrix(Mass); Damping->setStiffnessMatrix(Stiffness); Damping->setDA(da3d); Damping->setDof(dof); // Force Vector Force->setProblemDimensions(1.0,1.0,1.0); Force->setDA(da3d); // Force->setActivationVec(tau); // Force->setFiberOrientations(fibers); Force->setFDynamic(tau); Force->setTimeInfo(&ti); // Newmark time stepper ... newmark *ts = new newmark; ts->setMassMatrix(Mass); ts->setDampingMatrix(Damping); ts->setStiffnessMatrix(Stiffness); ts->damp(false); ts->setTimeFrames(1); ts->setForceVector(Force); ts->setInitialDisplacement(initialDisplacement); ts->setInitialVelocity(initialVelocity); ts->storeVec(true); ts->setTimeInfo(&ti); ts->setAdjoint(false); // set if adjoint or forward ts->init(); // initialize IMPORTANT if (!rank) std::cout << RED"Starting Newmark Solve"NRM << std::endl; ts->solve();// solve if (!rank) std::cout << GRN"Done Newmark"NRM << std::endl; std::vector<Vec> solvec = ts->getSolution(); /* Set very initial guess for the inverse problem*/ /* PetscRandom rctx; PetscRandomCreate(PETSC_COMM_WORLD,&rctx); PetscRandomSetFromOptions(rctx); VecSetRandom(guess,rctx); VecNorm(guess,NORM_INFINITY,&norm); PetscPrintf(0,"guess norm = %g\n",norm); */ double errnorm; double exsolnorm; Vec guess; Vec truth; Vec Err; concatenateVecs(solvec, guess); concatenateVecs(tau, truth); iC(VecNorm(truth, NORM_2, &exsolnorm)); /* std::cout << "Forward solver solution size is " << solvec.size() << std::endl; std::cout << "Forward solver solution norm is " << exsolnorm << std::endl; */ VecZeroEntries(guess); // Inverse solver set up hyperbolicInverse *hyperInv = new hyperbolicInverse; hyperInv->setForwardInitialConditions(initialDisplacement, initialVelocity); hyperInv->setTimeStepper(ts); // set the timestepper hyperInv->setInitialGuess(guess);// set the initial guess // hyperInv->setInitialGuess(truth);// set the initial guess hyperInv->setRegularizationParameter(beta); // set the regularization paramter hyperInv->setObservations(solvec); // set the data for the problem hyperInv->init(); // initialize the inverse solver hyperInv->solve(); // solve hyperInv->getCurrentControl(guess); // get the solution // see the error in the solution relative to the actual solution VecDuplicate(truth, &Err); iC(VecZeroEntries(Err)); iC(VecWAXPY(Err, -1.0, guess, truth)); iC(VecNorm(Err, NORM_2, &errnorm)); PetscPrintf(0,"errr in inverse = %g\n", errnorm/exsolnorm); PetscFinalize(); }
// -------------------------------------------------------- core::Command::State LoadReadoutMenu::code(const ::swatch::core::XParameterSet& aParams) { ::mp7::ReadoutMenu lMenu(mBanks, mModes, mCaptures); // Bank IDs std::string bankStr, modeStr, capStr; for( uint32_t iB(0); iB < mBanks; ++iB ) { bankStr = "bank"+boost::lexical_cast<std::string>(iB)+":"; const xdata::UnsignedInteger& bxOffset = aParams.get<xdata::UnsignedInteger>(bankStr+"wordsPerBx"); if ( !bxOffset.isNaN()) = bxOffset.value_; } for( uint32_t iM(0); iM < mModes; ++iM ) { modeStr = "mode"+boost::lexical_cast<std::string>(iM)+":"; const xdata::UnsignedInteger& eventSize = aParams.get<xdata::UnsignedInteger>(modeStr+"eventSize"); const xdata::UnsignedInteger& eventToTrigger = aParams.get<xdata::UnsignedInteger>(modeStr+"eventToTrigger"); const xdata::UnsignedInteger& eventType = aParams.get<xdata::UnsignedInteger>(modeStr+"eventType"); const xdata::UnsignedInteger& tokenDelay = aParams.get<xdata::UnsignedInteger>(modeStr+"tokenDelay"); ::mp7::ReadoutMenu::Mode& lMode = lMenu.mode(iM); if ( !eventSize.isNaN()) lMode.eventSize = eventSize.value_; if ( !eventToTrigger.isNaN()) lMode.eventToTrigger = eventToTrigger.value_; if ( !eventType.isNaN()) lMode.eventType = eventType.value_; if ( !tokenDelay.isNaN()) lMode.tokenDelay = tokenDelay.value_; LOG(swatch::logger::kWarning) << lMode; for( uint32_t iC(0); iC < mCaptures; ++iC ) { capStr = modeStr+"capture"+boost::lexical_cast<std::string>(iC)+":"; const xdata::Boolean& enable = aParams.get<xdata::Boolean>(capStr+"enable"); const xdata::UnsignedInteger& id = aParams.get<xdata::UnsignedInteger>(capStr+"id"); const xdata::UnsignedInteger& bankId = aParams.get<xdata::UnsignedInteger>(capStr+"bankId"); const xdata::UnsignedInteger& length = aParams.get<xdata::UnsignedInteger>(capStr+"length"); const xdata::UnsignedInteger& delay = aParams.get<xdata::UnsignedInteger>(capStr+"delay"); const xdata::UnsignedInteger& readoutLength = aParams.get<xdata::UnsignedInteger>(capStr+"readoutLength"); ::mp7::ReadoutMenu::Capture& lCapture = lMode[iC]; if ( !enable.isNaN()) lCapture.enable = enable.value_; if ( !id.isNaN()) = id.value_; if ( !bankId.isNaN()) lCapture.bankId = bankId.value_; if ( !length.isNaN()) lCapture.length = length.value_; if ( !delay.isNaN()) lCapture.delay = delay.value_; if ( !readoutLength.isNaN()) lCapture.readoutLength = readoutLength.value_; } } ::mp7::MP7Controller& driver = getActionable<MP7Processor>().driver(); const ::mp7::ReadoutCtrlNode& rc = driver.getReadout().getNode< ::mp7::ReadoutCtrlNode >("readout_control"); std::map<uint32_t,uint32_t> lEventSizes = driver.computeEventSizes(lMenu); for( uint32_t iM(0); iM < mModes; ++iM ) { ::mp7::ReadoutMenu::Mode& lMode = lMenu.mode(iM); if ( lMode.eventSize == 0xfffff ) continue; lMode.eventSize =; LOG(swatch::logger::kInfo) << "Mode " << iM << " event size set to " << lMode.eventSize; } LOG(swatch::logger::kInfo) << lMenu; rc.configureMenu(lMenu); return State::kDone; }
//------------------------------------------------------------------------------------ int fdct3d_inverse(int N1, int N2, int N3, int nbscales, int nbdstz_coarse, CpxCrvletPrtd& C, CpxNumTnsBlkd& W, CpxNumTnsBlkd& X) { //check the size of c,w, make sure it is okay time_t tm0, tm1; tm0 = time(NULL); int b = W.b(); int e = W.e(); int f = W.f(); int g = W.g(); int mpirank; MPI_Comm_rank(MPI_COMM_WORLD, &mpirank); iC( MPI_Barrier(MPI_COMM_WORLD) ); //iC( PetscPrintf(MPI_COMM_WORLD, "%d inverse 0\n", mpirank) ); iC( MPI_Barrier(MPI_COMM_WORLD) ); //------------------------------------------- //1. fft on X X = W; BolNumTns newtnsexists(e,f,g); IntNumTns newtnsowners(e,f,g); fdct3d_partition_cpxnumtnsblkd_z(N1,N2,N3,b, newtnsexists,newtnsowners); //scatter x to contain z slices iC( X.scatter(newtnsexists) ); iC( MPI_Barrier(MPI_COMM_WORLD) ); tm1 = time(NULL); //iC( PetscPrintf(MPI_COMM_WORLD, "inv x scatter %f\n", difftime(tm1,tm0)) ); tm0 = tm1; //shift x's owner to z slices iC( X.shift(newtnsowners) ); iC( MPI_Barrier(MPI_COMM_WORLD) ); tm1 = time(NULL); //iC( PetscPrintf(MPI_COMM_WORLD, "inv x shift %f\n", difftime(tm1,tm0)) ); tm0 = tm1; //discard x's nonowners iC( X.discard() ); iC( MPI_Barrier(MPI_COMM_WORLD) ); tm1 = time(NULL); //iC( PetscPrintf(MPI_COMM_WORLD, "inv x discard %f\n", difftime(tm1,tm0)) ); tm0 = tm1; //ifft iC( fdct3d_fft(X) ); iC( MPI_Barrier(MPI_COMM_WORLD) ); tm1 = time(NULL); //iC( PetscPrintf(MPI_COMM_WORLD, "inv x fft %f\n", difftime(tm1,tm0)) ); tm0 = tm1; //scale x with POU DblOffVec big1(N1); fdct3d_lowpass(2.0*N1/3, big1); DblOffVec big2(N2); fdct3d_lowpass(2.0*N2/3, big2); DblOffVec big3(N3); fdct3d_lowpass(2.0*N3/3, big3); IntNumTns& Xowners = X.owners(); for(int i=0; i<e; i++) for(int j=0; j<f; j++) for(int k=0; k<g; k++) { if(Xowners(i,j,k)==mpirank) { CpxNumTns& Xblk = X.block(i,j,k); int istt = i*b-N1/2; int jstt = j*b-N2/2; int kstt = k*b-N3/2; for(int ioff=0; ioff<b; ioff++) for(int joff=0; joff<b; joff++) for(int koff=0; koff<b; koff++) { double pou = big1(ioff+istt) * big2(joff+jstt)*big3(koff+kstt); Xblk(ioff, joff, koff) *= sqrt(1-pou*pou); } } } //------------------------------------------- //2. compute wedges int L = nbscales; //setup c, 1,2,3, 6*np/8 processors are computing. 0 processor contains also the center wedge vector< vector<bool> > newcrvexists; vector< vector<int > > newcrvowners; fdct3d_partition_cpxcrvletprtd(N1,N2,N3, nbscales, nbdstz_coarse, newcrvexists, newcrvowners); //vector< vector<double> > fxs, fys, fzs; //vector< vector<int > > nxs, nys, nzs; //fdct3d_param(N1,N2,N3, nbscales,nbdstz_coarse, fxs,fys,fzs, nxs,nys,nzs); //LEXING: might not be necessary //find out the required blocks from x for each processor fdct3d_dependency(N1,N2,N3,b, nbscales,nbdstz_coarse, newcrvowners, newtnsexists); //expand x according to c's request iC( X.expand(newtnsexists) ); iC( MPI_Barrier(MPI_COMM_WORLD) ); tm1 = time(NULL); //iC( PetscPrintf(MPI_COMM_WORLD, "inv x expand %f\n", difftime(tm1,tm0)) ); tm0 = tm1; //compute contribution to x from c { int s = 0; double L1 = 2.0*N1/3.0 / pow2(L-2-s); double L2 = 2.0*N2/3.0 / pow2(L-2-s); double L3 = 2.0*N3/3.0 / pow2(L-2-s); fdct3d_inverse_center(N1,N2,N3,b, L1,L2,L3, s, C, X); } for(int s=1; s<nbscales-1; s++) { double L1 = 2.0*N1/3.0 / pow2(L-2-s); double L2 = 2.0*N2/3.0 / pow2(L-2-s); double L3 = 2.0*N3/3.0 / pow2(L-2-s); int nd = nbdstz_coarse * pow2(s/2); fdct3d_inverse_angles(N1,N2,N3,b, L1,L2,L3, s, nd, C, X); } iC( MPI_Barrier(MPI_COMM_WORLD) ); tm1 = time(NULL); //iC( PetscPrintf(MPI_COMM_WORLD, "inv c compute %f\n", difftime(tm1,tm0)) ); tm0 = tm1; //combine x iC( X.combine() ); iC( MPI_Barrier(MPI_COMM_WORLD) ); tm1 = time(NULL); //iC( PetscPrintf(MPI_COMM_WORLD, "inv x combine %f\n", difftime(tm1,tm0)) ); tm0 = tm1; //------------------------------------------- //3. ifft on X iC( fdct3d_ifft(X) ); iC( MPI_Barrier(MPI_COMM_WORLD) ); tm1 = time(NULL); //iC( PetscPrintf(MPI_COMM_WORLD, "inv x ifft %f\n", difftime(tm1,tm0)) ); tm0 = tm1; //done return 0; }
int CpxCrvletPrtd::scatter(vector< vector<bool> >& newexists) { //LEXING: usually only called once vector< vector<int> >& c = _nx; //1. the global vector vector<int> glblszs(mpisize(), 0); int glbnum = 0; for(int s=0; s<c.size(); s++) for(int w=0; w<c[s].size(); w++) { int pi = _owners[s][w]; glblszs[pi] += _sizes[s][w]; glbnum += _sizes[s][w]; } vector<int> glbaccs(mpisize(), 0); int tmp = 0; for(int pi=0; pi<mpisize(); pi++) { glbaccs[pi] = tmp; tmp += glblszs[pi]; } vector< vector<int> > glbstts(c); //not cleared, but okay for(int s=0; s<c.size(); s++) for(int w=0; w<c[s].size(); w++) { int pi = _owners[s][w]; glbstts[s][w] = glbaccs[pi]; glbaccs[pi] += _sizes[s][w]; } int lclsum = 0; vector<int> l2gmap; for(int s=0; s<c.size(); s++) for(int w=0; w<c[s].size(); w++) { if(newexists[s][w]==true && _exists[s][w]==false) { lclsum += _sizes[s][w]; for(int g=0; g<_sizes[s][w]; g++) l2gmap.push_back( glbstts[s][w] + g ); } } iA(l2gmap.size()==lclsum); IS lclis; iC( ISCreateStride(PETSC_COMM_SELF, l2gmap.size(), 0, 1, &lclis) ); IS glbis; iC( ISCreateGeneral(PETSC_COMM_WORLD, l2gmap.size(), &(l2gmap[0]), &glbis) ); l2gmap.clear(); //SAVE SPACE //2. allocate a global vector, and copy data Vec glbvec; iC( VecCreateMPI(PETSC_COMM_WORLD, glblszs[mpirank()], PETSC_DETERMINE, &glbvec) ); double* glbarr; iC( VecGetArray(glbvec, &glbarr) ); double* glbptr = glbarr; for(int s=0; s<c.size(); s++) for(int w=0; w<c[s].size(); w++) { int pi = _owners[s][w]; if(pi==mpirank()) { double* tmpptr = (double*)(_blocks[s][w].data()); for(int g=0; g<_sizes[s][w]; g++) { *glbptr = tmpptr[g]; glbptr++; } } } iC( VecRestoreArray(glbvec, &glbarr) ); Vec lclvec; iC( VecCreateSeq(PETSC_COMM_SELF, lclsum, &lclvec) ); //3. vec scatter VecScatter sc; iC( VecScatterCreate(glbvec, glbis, lclvec, lclis, &sc) ); iC( ISDestroy(lclis) ); iC( ISDestroy(glbis) ); //SAVE SPACE iC( VecScatterBegin(glbvec, lclvec, INSERT_VALUES, SCATTER_FORWARD, sc) ); iC( VecScatterEnd( glbvec, lclvec, INSERT_VALUES, SCATTER_FORWARD, sc) ); iC( VecScatterDestroy(sc) ); //SAVE SPACE iC( VecDestroy(glbvec) ); //4. store double* lclarr; iC( VecGetArray(lclvec, &lclarr) ); double* lclptr = lclarr; for(int s=0; s<c.size(); s++) for(int w=0; w<c[s].size(); w++) { if(newexists[s][w]==true && _exists[s][w]==false) { _blocks[s][w].resize(_nx[s][w], _ny[s][w], _nz[s][w]); double* tmpptr = (double*)(_blocks[s][w].data()); for(int g=0; g<_sizes[s][w]; g++) { tmpptr[g] = *lclptr; lclptr++; } _exists[s][w] = true; //VERY IMPORTANT } } iC( VecRestoreArray(lclvec, &lclarr) ); iC( VecDestroy(lclvec) ); return 0; }
int CpxCrvletPrtd::combine() { //LEXING: usually only called once vector< vector<int> >& c = _nx; //1. the global vector vector<int> glblszs(mpisize(), 0); int glbnum = 0; for(int s=0; s<c.size(); s++) for(int w=0; w<c[s].size(); w++) { int pi = _owners[s][w]; glblszs[pi] += _sizes[s][w]; glbnum += _sizes[s][w]; } vector<int> glbaccs(mpisize(), 0); int tmp = 0; for(int pi=0; pi<mpisize(); pi++) { glbaccs[pi] = tmp; tmp += glblszs[pi]; } vector< vector<int> > glbstts(c); for(int s=0; s<c.size(); s++) for(int w=0; w<c[s].size(); w++) { int pi = _owners[s][w]; glbstts[s][w] = glbaccs[pi]; glbaccs[pi] += _sizes[s][w]; } int lclsum = 0; vector<int> l2gmap; for(int s=0; s<c.size(); s++) for(int w=0; w<c[s].size(); w++) { if(_exists[s][w]==true && _owners[s][w]!=mpirank()) { lclsum += _sizes[s][w]; for(int g=0; g<_sizes[s][w]; g++) l2gmap.push_back( glbstts[s][w] + g ); } } iA(l2gmap.size()==lclsum); IS lclis; iC( ISCreateStride(PETSC_COMM_SELF, l2gmap.size(), 0, 1, &lclis) ); IS glbis; iC( ISCreateGeneral(PETSC_COMM_WORLD, l2gmap.size(), &(l2gmap[0]), &glbis) ); l2gmap.clear(); //SAVE SPACE //2. allocate a global vector and a local vector, put data in local Vec glbvec; iC( VecCreateMPI(PETSC_COMM_WORLD, glblszs[mpirank()], PETSC_DETERMINE, &glbvec) ); Vec lclvec; iC( VecCreateSeq(PETSC_COMM_SELF, lclsum, &lclvec) ); double* lclarr; iC( VecGetArray(lclvec, &lclarr) ); double* lclptr = lclarr; for(int s=0; s<c.size(); s++) for(int w=0; w<c[s].size(); w++) { if(_exists[s][w]==true && _owners[s][w]!=mpirank()) { double* tmpptr = (double*)(_blocks[s][w].data()); for(int g=0; g<_sizes[s][w]; g++) { *lclptr = tmpptr[g]; lclptr++; } } } iC( VecRestoreArray(lclvec, &lclarr) ); //3. vec scatter VecScatter sc; iC( VecScatterCreate(glbvec, glbis, lclvec, lclis, &sc) ); iC( ISDestroy(lclis) ); iC( ISDestroy(glbis) ); //SAVE SPACE iC( VecScatterBegin(glbvec, lclvec, ADD_VALUES, SCATTER_REVERSE, sc) ); iC( VecScatterEnd( glbvec, lclvec, ADD_VALUES, SCATTER_REVERSE, sc) ); iC( VecScatterDestroy(sc) ); //SAVE SPACE iC( VecDestroy(lclvec) ); //4. store double* glbarr; iC( VecGetArray(glbvec, &glbarr) ); double* glbptr = glbarr; for(int s=0; s<c.size(); s++) for(int w=0; w<c[s].size(); w++) { int pi = _owners[s][w]; if(pi==mpirank()) { double* tmpptr = (double*)(_blocks[s][w].data()); for(int g=0; g<_sizes[s][w]; g++) { tmpptr[g] += *glbptr; glbptr++; //LEXING: += is very important } } } iC( VecRestoreArray(glbvec, &glbarr) ); iC( VecDestroy(glbvec) ); //IMPORTANT for(int s=0; s<c.size(); s++) for(int w=0; w<c[s].size(); w++) { if(_owners[s][w]!=mpirank()) { _blocks[s][w].resize(0,0,0); _exists[s][w] = false; } } return 0; }
int main(int argc, char ** argv ) { int size, rank; bool incCorner = 1; unsigned int dim=3; unsigned int maxDepth=29; bool compressLut=true; std::vector<ot::TreeNode> balOct; double mgLoadFac = 2.0; unsigned int regLev = 2; PetscInitialize(&argc, &argv, "options", help); ot::RegisterEvents(); ot::DAMG_Initialize(MPI_COMM_WORLD); #ifdef PETSC_USE_LOG PetscClassId classid; PetscClassIdRegister("Dendro",&classid); PetscLogEventRegister("matProp",classid, &matPropEvent); PetscLogEventRegister("ODAmatDiag",classid, &Jac1DiagEvent); PetscLogEventRegister("ODAmatMult",classid, &Jac1MultEvent); PetscLogEventRegister("ODAmatDiagFinest",classid, &Jac1FinestDiagEvent); PetscLogEventRegister("ODAmatMultFinest",classid, &Jac1FinestMultEvent); PetscLogEventRegister("OMGmatDiag-2",classid, &Jac2DiagEvent); PetscLogEventRegister("OMGmatMult-2",classid, &Jac2MultEvent); PetscLogEventRegister("OMGmatDiagFinest-2",classid, &Jac2FinestDiagEvent); PetscLogEventRegister("OMGmatMultFinest-2",classid, &Jac2FinestMultEvent); PetscLogEventRegister("OMGmatDiag-3",classid, &Jac3DiagEvent); PetscLogEventRegister("OMGmatMult-3",classid, &Jac3MultEvent); PetscLogEventRegister("OMGmatDiagFinest-3",classid, &Jac3FinestDiagEvent); PetscLogEventRegister("OMGmatMultFinest-3",classid, &Jac3FinestMultEvent); int stages[1]; PetscLogStageRegister("Solve",&stages[0]); #endif MPI_Comm_size(MPI_COMM_WORLD,&size); MPI_Comm_rank(MPI_COMM_WORLD,&rank); if(argc > 1) { regLev = atoi(argv[1]); } if(argc > 2) { maxDepth = atoi(argv[2]); } if(argc > 3) { dim = atoi(argv[3]); } if(argc > 4) { incCorner = (bool)(atoi(argv[4]));} if(argc > 5) { compressLut = (bool)(atoi(argv[5]));} if(argc > 6) { mgLoadFac = atof(argv[6]); } #ifdef PETSC_USE_LOG PetscLogStagePush(stages[0]); #endif MPI_Barrier(MPI_COMM_WORLD); ot::DAMG *damg; int nlevels = 1; //number of multigrid levels unsigned int dof = 1; // degrees of freedom per node createRegularOctree(balOct, regLev, dim, maxDepth, MPI_COMM_WORLD); PetscInt nlevelsPetscInt = nlevels; //To keep the compilers happy when using 64-bit indices PetscOptionsGetInt(0, "-nlevels", &nlevelsPetscInt, 0); nlevels = nlevelsPetscInt; // Note: The user context for all levels will be set separately later. MPI_Barrier(MPI_COMM_WORLD); if(!rank) { std::cout<<" nlevels initial: "<<nlevels<<std::endl; } ot::DAMGCreateAndSetDA(PETSC_COMM_WORLD, nlevels, NULL, &damg, balOct, dof, mgLoadFac, compressLut, incCorner); if(!rank) { std::cout<<" nlevels final: "<<nlevels<<std::endl; } MPI_Barrier(MPI_COMM_WORLD); if(!rank) { std::cout << "Created DA for all levels."<< std::endl; } MPI_Barrier(MPI_COMM_WORLD); ot::PrintDAMG(damg); MPI_Barrier(MPI_COMM_WORLD); for(int i=0;i<nlevels;i++) { bool isRegOct = isRegularGrid(damg[i]->da); if(!rank) { std::cout<<"Level "<<i<<" is regular? "<<isRegOct<<std::endl; } }//end for i SetUserContexts(damg); if(!rank) { std::cout << "Set User Contexts all levels."<< std::endl; } MPI_Barrier(MPI_COMM_WORLD); PetscInt jacType = 1; PetscOptionsGetInt(0,"-jacType",&jacType,0); PetscInt rhsType = 1; PetscOptionsGetInt(0,"-rhsType",&rhsType,0); createLmatType2(LaplacianType2Stencil); createMmatType2(MassType2Stencil); if(jacType == 3) { createLmatType1(LaplacianType1Stencil); createMmatType1(MassType1Stencil); } createShFnMat(ShapeFnStencil); if(!rank) { std::cout << "Created Stencils."<< std::endl; } //Function handles PetscErrorCode (*ComputeRHSHandle)(ot::DAMG damg,Vec rhs) = NULL; PetscErrorCode (*CreateJacobianHandle)(ot::DAMG damg,Mat *B) = NULL; PetscErrorCode (*ComputeJacobianHandle)(ot::DAMG damg,Mat J, Mat B) = NULL; if(rhsType == 0) { ComputeRHSHandle = ComputeRHS0; } else if (rhsType == 1) { ComputeRHSHandle = ComputeRHS1; } else if (rhsType == 2) { ComputeRHSHandle = ComputeRHS2; } else if (rhsType == 3) { ComputeRHSHandle = ComputeRHS3; } else if (rhsType == 4) { ComputeRHSHandle = ComputeRHS4; } else if (rhsType == 5) { ComputeRHSHandle = ComputeRHS5; } else if (rhsType == 6) { ComputeRHSHandle = ComputeRHS6; } else if (rhsType == 7) { ComputeRHSHandle = ComputeRHS7; } else if (rhsType == 8) { ComputeRHSHandle = ComputeRHS8; } else { assert(false); } if(jacType == 1) { CreateJacobianHandle = CreateJacobian1; ComputeJacobianHandle = ComputeJacobian1; } else if (jacType == 2) { CreateJacobianHandle = CreateJacobian2; ComputeJacobianHandle = ComputeJacobian2; } else if (jacType == 3) { CreateJacobianHandle = CreateJacobian3; ComputeJacobianHandle = ComputeJacobian3; //Skip the finest and the coarsest levels. For the other levels, J and B //must be different for(int i = 1; i < (nlevels-1); i++) { ot::DAMGCreateJMatrix(damg[i], CreateJacobianHandle); } } else { assert(false); } //Global Function Handles for using KSP_Shell (will be used @ the coarsest grid if not all //processors are active on the coarsest grid) if (jacType == 1) { ot::getPrivateMatricesForKSP_Shell = getPrivateMatricesForKSP_Shell_Jac1; } else if (jacType == 2) { ot::getPrivateMatricesForKSP_Shell = getPrivateMatricesForKSP_Shell_Jac2; } else if (jacType == 3) { ot::getPrivateMatricesForKSP_Shell = getPrivateMatricesForKSP_Shell_Jac3; } else { assert(false); } ot::DAMGSetKSP(damg, CreateJacobianHandle, ComputeJacobianHandle, ComputeRHSHandle); if(!rank) { std::cout<<"Solving u-Lu=f"<<std::endl; } iC(ot::DAMGSolve(damg)); destroyLmatType2(LaplacianType2Stencil); destroyMmatType2(MassType2Stencil); if(jacType == 3) { destroyLmatType1(LaplacianType1Stencil); destroyMmatType1(MassType1Stencil); } destroyShFnMat(ShapeFnStencil); MPI_Barrier(MPI_COMM_WORLD); DestroyUserContexts(damg); if (!rank) { std::cout << GRN << "Destroyed User Contexts." << NRM << std::endl; } MPI_Barrier(MPI_COMM_WORLD); iC(DAMGDestroy(damg)); if (!rank) { std::cout << GRN << "Destroyed DAMG" << NRM << std::endl; } #ifdef PETSC_USE_LOG PetscLogStagePop(); #endif balOct.clear(); if (!rank) { std::cout << GRN << "Finalizing ..." << NRM << std::endl; } ot::DAMG_Finalize(); PetscFinalize(); }//end function