void avtADIOSBasicFileFormat::ComputeStartCount(uint64_t *globalDims, int dim, uint64_t *start, uint64_t *count) { #if PARALLEL int domCount[3] = {0, 0, 0}; int s[3] = {start[0],start[1],start[2]}, c[3] = {count[0],count[1],count[2]}; avtDatabase::ComputeRectilinearDecomposition(dim, PAR_Size(), globalDims[0], globalDims[1], globalDims[2], &domCount[0], &domCount[1], &domCount[2]); // Determine this processor's logical domain (e.g. domain ijk) indices int domLogicalCoords[3] = {0, 0, 0}; avtDatabase::ComputeDomainLogicalCoords(dim, domCount, PAR_Rank(), domLogicalCoords); // Compute domain bounds. for (int i = 0; i < 3; i++) { avtDatabase::ComputeDomainBounds(globalDims[i], domCount[i], domLogicalCoords[i], &(s[i]), &(c[i])); c[i]++; if (s[i]+c[i] >= globalDims[i]) c[i]--; start[i] = s[i]; count[i] = c[i]; } #endif }
void avtMemoryUsageQuery::PerformQuery(QueryAttributes *atts) { // grab memory usage per engine process unsigned long m_size, m_rss; avtMemory::GetMemorySize(m_size, m_rss); if(m_size == 0 || m_rss == 0) { memSizeVals.clear(); atts->SetResultsValue(memSizeVals); atts->SetResultsMessage("The Memory Usage Query is not supported on " "this platform"); return; } // convert to megabytes double m_size_mb = ( (double)m_size / 1048576.0); int nprocs = PAR_Size(); int rank = PAR_Rank(); memSizeVals.resize(nprocs); for (int i= 0; i < nprocs; i++) memSizeVals[i] = 0.0; memSizeVals[rank] = m_size_mb; #ifdef PARALLEL // get values from other procs to the root if (nprocs > 1 ) { MPI_Gather(&m_size_mb, 1, MPI_DOUBLE, &memSizeVals[0], 1, MPI_DOUBLE, 0, VISIT_MPI_COMM); } #endif atts->SetResultsValue(memSizeVals); queryAtts = *atts; std::string msg = GetResultMessage(); atts->SetResultsMessage(msg); }
void MPIXfer::SendInterruption(int mpiInterruptTag) { if (PAR_UIProcess()) { // Do a nonblocking send to all processes to do it quickly int size = PAR_Size(); unsigned char buf[1] = {255}; MPI_Request *request = new MPI_Request[size-1]; for (int i=1; i<size; i++) { MPI_Isend(buf, 1, MPI_CHAR, i, mpiInterruptTag, VISIT_MPI_COMM, &request[i-1]); } // Then wait for them all to read the command MPI_Status *status = new MPI_Status[size-1]; MPI_Waitall(size-1, request, status); delete [] request; delete [] status; } }
void avtGTCFileFormat::PopulateDatabaseMetaData(avtDatabaseMetaData *md) { // Add a point mesh std::string meshname = "particles"; avtMeshMetaData *mmd = new avtMeshMetaData; mmd->name = meshname; mmd->spatialDimension = 3; mmd->topologicalDimension = 0; mmd->meshType = AVT_POINT_MESH; #ifdef PARALLEL mmd->numBlocks = PAR_Size(); #endif md->Add(mmd); // Add scalar variables. for ( int i = 3; i < nVars; i++ ) { std::string var = IndexToVarName( i ); if ( var != "" ) AddScalarVarToMetaData(md, var, meshname, AVT_NODECENT); } }
bool avtGTCFileFormat::Initialize() { const char *mName = "avtGTCFileFormat::Initialize: "; if(initialized) return true; // Init HDF5 and turn off error message printing. H5open(); H5Eset_auto( NULL, NULL ); bool err = false; // Check for a valid GTC file if( H5Fis_hdf5( GetFilename() ) < 0 ) EXCEPTION1( InvalidFilesException, GetFilename() ); if ((fileHandle = H5Fopen(GetFilename(), H5F_ACC_RDONLY, H5P_DEFAULT)) < 0) EXCEPTION1( InvalidFilesException, GetFilename() ); if ((particleHandle = H5Dopen(fileHandle, "particle_data")) < 0) { H5Fclose(fileHandle); EXCEPTION1( InvalidFilesException, GetFilename() ); } // At this point consider the file to truly be a GTC file. If // some other file NonCompliantExceptions will be thrown. // Continue as normal reporting NonCompliantExceptions //Check variable's size. hid_t dataspace = H5Dget_space(particleHandle); hsize_t dims[3]; hid_t sid = H5Dget_space(particleHandle); int ndims = H5Sget_simple_extent_dims(dataspace, dims, NULL); if(ndims < 0 || ndims > 2) { debug4 << mName << "Could not determine number of dimensions" << endl; H5Sclose(sid); H5Dclose(particleHandle); H5Fclose(fileHandle); EXCEPTION1( InvalidVariableException, "GTC Dataset Extents - Dataset 'particle_data' has an invalid extents"); } debug4 << mName << "Determining variable size" << endl; int val = H5Sget_simple_extent_dims(sid, dims, NULL); if(val < 0 || dims[1] < 3) { debug4 << mName << "Could not determine variable size" << endl; H5Sclose(sid); H5Dclose(particleHandle); H5Fclose(fileHandle); EXCEPTION1( InvalidVariableException, "GTC Dataset Extents - Dataset 'particle_data' has an insufficient number of variables"); } H5Sclose(dataspace); debug4 << mName << "variable size (" << dims[0] << ", " << dims[1] << ")" << endl; nTotalPoints = dims[0]; nVars = dims[1]; #ifdef PARALLEL nProcs = PAR_Size(); rank = PAR_Rank(); nPoints = nTotalPoints / nProcs; int remainder = nTotalPoints % nProcs; startOffset = rank * nPoints; if ( rank < remainder ) startOffset += rank; else startOffset += remainder; if ( rank < remainder ) nPoints++; #else nPoints = nTotalPoints; startOffset = 0; #endif initialized = true; return initialized; }
void avtZoneDumpFilter::PostExecute() { // skip dump if not enabled if(!atts.GetEnabled()) return; #ifdef PARALLEL // loop index int i; // get the number of processors and the current processor id int nprocs = PAR_Size(); int procid = PAR_Rank(); // get the number of zone infos to send int n_snd_zones = zones.size(); // size of each zone info int zinfo_size = ZoneInfo::PackedSize(); // calculate the total send message size int snd_msg_size = n_snd_zones * zinfo_size; // send buffer unsigned char *snd_msg = NULL; // vars for the root processor // size of the gather message int rcv_msg_size = 0; // holds size of the msg from each other processor int *rcv_count = NULL; // holds the displacement of the msg from each other processor int *rcv_disp = NULL; // receive buffer unsigned char *rcv_msg = NULL; if(procid == 0) { // allocate space for these for root proc only rcv_count = new int[nprocs]; rcv_disp = new int[nprocs]; } // gather message sizes from all procs to root proc MPI_Gather(&snd_msg_size,1, MPI_INT, rcv_count, 1, MPI_INT, 0, VISIT_MPI_COMM); // find message offsets and total rcv size if(procid == 0) { rcv_disp[0] = 0; rcv_msg_size = rcv_count[0]; for( i=1; i<nprocs;i++) { rcv_disp[i] = rcv_count[i-1] + rcv_disp[i-1]; rcv_msg_size += rcv_count[i]; } } // get total # of zone infos int nrcv_zones = rcv_msg_size / zinfo_size; // create msg to send to the root proc if(snd_msg_size > 0) { snd_msg= new unsigned char[snd_msg_size]; unsigned char *snd_msg_ptr = snd_msg; // pack zone infos for(i=0; i < n_snd_zones; i++) { zones[i].Pack(snd_msg_ptr); snd_msg_ptr+= zinfo_size; } } if(procid == 0 && rcv_msg_size > 0) { // create the rcv buffer for the root proc rcv_msg = new unsigned char[rcv_msg_size]; } // gather all zone infos MPI_Gatherv(snd_msg, snd_msg_size, MPI_UNSIGNED_CHAR, rcv_msg, rcv_count, rcv_disp, MPI_UNSIGNED_CHAR, 0,VISIT_MPI_COMM); if(procid == 0 ) { // unpack all rcvd zones std::vector<ZoneInfo> rcv_zones(nrcv_zones); unsigned char *rcv_msg_ptr = rcv_msg; for( i = 0; i < nrcv_zones; i++) { rcv_zones[i].Unpack(rcv_msg_ptr); rcv_msg_ptr += zinfo_size; } // save all zones SaveOutput(atts.GetOutputFile(),rcv_zones); } // cleanup if(snd_msg) delete[] snd_msg; if(rcv_msg) delete[] rcv_msg; if(rcv_count) delete[] rcv_count; if(rcv_disp) delete[] rcv_disp; #else // for serial case, simply dump out zones found during the exe pass. SaveOutput(atts.GetOutputFile(),zones); #endif }
void avtResampleFilter::ResampleInput(void) { int i, j, k; avtDataset_p output = GetTypedOutput(); double bounds[6] = { 0, 0, 0, 0, 0, 0 }; bool is3D = GetBounds(bounds); debug4 << "Resampling over space: " << bounds[0] << ", " << bounds[1] << ": " << bounds[2] << ", " << bounds[3] << ": " << bounds[4] << ", " << bounds[5] << endl; // // Our resampling leaves some invalid values in the data range. The // easiest way to bypass this is to get the data range from the input and // pass it along (since resampling does not change it in theory). // double range[2]; if (GetInput()->GetInfo().GetAttributes().ValidActiveVariable()) { GetDataExtents(range); output->GetInfo().GetAttributes().GetDesiredDataExtents()->Set(range); } avtViewInfo view; double scale[3]; CreateViewFromBounds(view, bounds, scale); // // What we want the width, height, and depth to be depends on the // attributes. // int width, height, depth; GetDimensions(width, height, depth, bounds, is3D); // // If there are no variables, then just create the mesh and exit. // bool thereAreNoVariables = (GetInput()->GetInfo().GetAttributes().GetNumberOfVariables() <= 0); if (thereAreNoVariables) { if (PAR_Rank() == 0) { vtkRectilinearGrid *rg = CreateGrid(bounds, width, height, depth, 0, width, 0, height, cellCenteredOutput, is3D); avtDataTree_p tree = new avtDataTree(rg, 0); rg->Delete(); SetOutputDataTree(tree); } else { // // Putting in a NULL data tree can lead to seg faults, etc. // avtDataTree_p dummy = new avtDataTree(); SetOutputDataTree(dummy); } return; } // // World space is a right-handed coordinate system. Image space (as used // in the sample point extractor) is a left-handed coordinate system. // This is because large X is at the right and large Y is at the top. // The z-buffer has the closest points at z=0, so Z is going away from the // screen ===> left handed coordinate system. If we reflect across X, // then this will account for the difference between the coordinate // systems. // scale[0] *= -1.; // // We don't want an Update to go all the way up the pipeline, so make // a terminating source corresponding to our input. // avtDataset_p ds; avtDataObject_p dObj = GetInput(); CopyTo(ds, dObj); avtSourceFromAVTDataset termsrc(ds); // // The sample point extractor expects everything to be in image space. // avtWorldSpaceToImageSpaceTransform trans(view, scale); trans.SetInput(termsrc.GetOutput()); bool doKernel = (GetInput()->GetInfo().GetAttributes().GetTopologicalDimension() == 0); avtSamplePointExtractor extractor(width, height, depth); extractor.SendCellsMode(false); extractor.Set3DMode(is3D); extractor.SetInput(trans.GetOutput()); if (doKernel) extractor.SetKernelBasedSampling(true); avtSamplePoints_p samples = extractor.GetTypedOutput(); // // If the selection this filter exists to create has already been handled, // or if there are no pieces for this processor to process, then we can skip // execution. But, take care to emulate the same collective // calls other processors may make before returning. // if (GetInput()->GetInfo().GetAttributes().GetSelectionApplied(selID)) { debug1 << "Bypassing Resample operator because database plugin " "claims to have applied the selection already" << endl; SetOutputDataTree(GetInputDataTree()); // we can save a lot of time if we know everyone can bypass if (UnifyMaximumValue(0) == 0) return; // here is some dummied up code to match collective calls below int effectiveVars = samples->GetNumberOfRealVariables(); double *ptrtmp = new double[width*height*depth]; for (int jj = 0; jj < width*height*depth; jj++) ptrtmp[jj] = -FLT_MAX; for (i = 0 ; i < effectiveVars ; i++) Collect(ptrtmp, width*height*depth); delete [] ptrtmp; return; } else { UnifyMaximumValue(1); } // // // PROBLEM SIZED WORK OCCURS BEYOND THIS POINT // If you add (or remove) collective calls below this point, make sure to // put matching sequence into bypass code above // // avtSamplePointCommunicator communicator; avtImagePartition partition(width, height, PAR_Size(), PAR_Rank()); communicator.SetImagePartition(&partition); bool doDistributedResample = false; #ifdef PARALLEL doDistributedResample = atts.GetDistributedResample(); #endif if (doDistributedResample) { partition.SetShouldProduceOverlaps(true); avtDataObject_p dob; CopyTo(dob, samples); communicator.SetInput(dob); samples = communicator.GetTypedOutput(); } // Always set up an arbitrator, even if user selected random. bool arbLessThan = !atts.GetUseArbitrator() || atts.GetArbitratorLessThan(); std::string arbName = atts.GetArbitratorVarName(); if (arbName == "default") arbName = primaryVariable; extractor.SetUpArbitrator(arbName, arbLessThan); // // Since this is Execute, forcing an update is okay... // samples->Update(GetGeneralContract()); if (samples->GetInfo().GetValidity().HasErrorOccurred()) { GetOutput()->GetInfo().GetValidity().ErrorOccurred(); GetOutput()->GetInfo().GetValidity().SetErrorMessage( samples->GetInfo().GetValidity().GetErrorMessage()); } // // Create a rectilinear dataset that is stretched according to the // original bounds. // int width_start = 0; int width_end = width; int height_start = 0; int height_end = height; if (doDistributedResample) { partition.GetThisPartition(width_start, width_end, height_start, height_end); width_end += 1; height_end += 1; } // // If we have more processors than domains, we have to handle that // gracefully. Communicate how many variables there are so that those // that don't have data can play well. // int realVars = samples->GetNumberOfRealVariables(); int numArrays = realVars; if (doKernel) numArrays++; vtkDataArray **vars = new vtkDataArray*[numArrays]; for (i = 0 ; i < numArrays ; i++) { vars[i] = vtkDoubleArray::New(); if (doKernel && (i == numArrays-1)) vars[i]->SetNumberOfComponents(1); else { vars[i]->SetNumberOfComponents(samples->GetVariableSize(i)); vars[i]->SetName(samples->GetVariableName(i).c_str()); } } if (doKernel) samples->GetVolume()->SetUseKernel(true); avtImagePartition *ip = NULL; if (doDistributedResample) ip = &partition; // We want all uncovered regions to get the default value. That is // what the first argument of GetVariables is for. But if the // default value is large, then it will screw up the collect call below, // which uses MPI_MAX for an all reduce. So give uncovered regions very // small values now (-FLT_MAX) and then replace them later. double defaultPlaceholder = -FLT_MAX; samples->GetVolume()->GetVariables(defaultPlaceholder, vars, numArrays, ip); if (!doDistributedResample) { // // Collect will perform the parallel collection. Does nothing in // serial. This will only be valid on processor 0. // for (i = 0 ; i < numArrays ; i++) { double *ptr = (double *) vars[i]->GetVoidPointer(0); Collect(ptr, vars[i]->GetNumberOfComponents()*width*height*depth); } } // Now replace the -FLT_MAX's with the default value. (See comment above.) for (i = 0 ; i < numArrays ; i++) { int numTups = vars[i]->GetNumberOfComponents() * vars[i]->GetNumberOfTuples(); if (numTups > 0) { double *ptr = (double *) vars[i]->GetVoidPointer(0); for (j = 0 ; j < numTups ; j++) ptr[j] = (ptr[j] == defaultPlaceholder ? atts.GetDefaultVal() : ptr[j]); } } bool iHaveData = false; if (doDistributedResample) iHaveData = true; if (PAR_Rank() == 0) iHaveData = true; if (height_end > height) iHaveData = false; if (iHaveData) { vtkRectilinearGrid *rg = CreateGrid(bounds, width, height, depth, width_start, width_end, height_start, height_end, cellCenteredOutput, is3D); if (doKernel) { double min_weight = avtPointExtractor::GetMinimumWeightCutoff(); vtkDataArray *weights = vars[numArrays-1]; int numVals = weights->GetNumberOfTuples(); for (i = 0 ; i < realVars ; i++) { for (j = 0 ; j < vars[i]->GetNumberOfComponents() ; j++) { for (k = 0 ; k < numVals ; k++) { double weight = weights->GetTuple1(k); if (weight <= min_weight) vars[i]->SetComponent(k, j, atts.GetDefaultVal()); else vars[i]->SetComponent(k, j, vars[i]->GetComponent(k, j) / weight); } } } } // // Attach these variables to our rectilinear grid. // for (i = 0 ; i < realVars ; i++) { const char *varname = vars[i]->GetName(); if (strcmp(varname, primaryVariable) == 0) { if (vars[i]->GetNumberOfComponents() == 3) if (cellCenteredOutput) rg->GetCellData()->SetVectors(vars[i]); else rg->GetPointData()->SetVectors(vars[i]); else if (vars[i]->GetNumberOfComponents() == 1) { if (cellCenteredOutput) { rg->GetCellData()->AddArray(vars[i]); rg->GetCellData()->SetScalars(vars[i]); } else { rg->GetPointData()->AddArray(vars[i]); rg->GetPointData()->SetScalars(vars[i]); } } else { if (cellCenteredOutput) rg->GetCellData()->AddArray(vars[i]); else rg->GetPointData()->AddArray(vars[i]); } } else { if (cellCenteredOutput) rg->GetCellData()->AddArray(vars[i]); else rg->GetPointData()->AddArray(vars[i]); } } avtDataTree_p tree = new avtDataTree(rg, 0); rg->Delete(); SetOutputDataTree(tree); } else { // // Putting in a NULL data tree can lead to seg faults, etc. // avtDataTree_p dummy = new avtDataTree(); SetOutputDataTree(dummy); } for (i = 0 ; i < numArrays ; i++) { vars[i]->Delete(); } delete [] vars; }
void avtStreamlineInfoQuery::PostExecute() { //Everyone communicate data to proc 0. #ifdef PARALLEL int nProcs = PAR_Size(); int *counts = new int[nProcs]; for (int i = 0; i < nProcs; i++) counts[i] = 0; counts[PAR_Rank()] = slData.size(); Collect(counts, nProcs); int tag = GetUniqueMessageTag(); MPI_Status stat; if (PAR_Rank() == 0) { for (int i = 1; i < nProcs; i++) { if (counts[i] > 0) { float *vals = new float[counts[i]]; void *ptr = (void *)&vals[0]; MPI_Recv(ptr, counts[i], MPI_FLOAT, i, tag, VISIT_MPI_COMM, &stat); for (int j = 0; j < counts[i]; j++) slData.push_back(vals[j]); delete [] vals; } } } else { if (slData.size() > 0) { void *ptr = (void *)&slData[0]; MPI_Send(ptr, slData.size(), MPI_FLOAT, 0, tag, VISIT_MPI_COMM); } } delete [] counts; #endif std::string msg; char str[128]; int i = 0, sz = slData.size(); int slIdx = 0; MapNode result_node; while (i < sz) { sprintf(str, "Streamline %d: Seed %f %f %f Arclength %f\n", slIdx, slData[i], slData[i+1], slData[i+2], slData[i+3]); MapNode sl_res_node; doubleVector sl_res_seed; sl_res_seed.push_back(slData[i]); sl_res_seed.push_back(slData[i+1]); sl_res_seed.push_back(slData[i+2]); sl_res_node["seed"] = sl_res_seed; sl_res_node["arclength"] = slData[i+3]; i+=4; msg += str; if (dumpSteps) { int numSteps = (int)slData[i++]; doubleVector sl_steps; for (int j = 0; j < numSteps; j++) { sprintf(str, " %f %f %f \n", slData[i], slData[i+1], slData[i+2]);// slData[i+3], slData[i+4]); sl_steps.push_back(slData[i]); sl_steps.push_back(slData[i+1]); sl_steps.push_back(slData[i+2]); i+=5; msg += str; } sl_res_node["steps"] = sl_steps; } sprintf(str, "streamline %d", slIdx); result_node[str] = sl_res_node; slIdx++; } SetResultMessage(msg.c_str()); SetXmlResult(result_node.ToXML()); }
bool avtLCSFilter::RectilinearGridIterativeCalc( std::vector<avtIntegralCurve*> &ics ) { //algorithm sends index to global datastructure as well as end points. //Send List of index into global array to rank 0 //Send end positions into global array to rank 0 size_t nics = ics.size(); //loop over all the intelgral curves and add it back to the //original list of seeds. intVector indices(nics); doubleVector points(nics*3); doubleVector times(nics); for(size_t i=0, j=0; i<nics; ++i, j+=3) { avtStreamlineIC * ic = (avtStreamlineIC *) ics[i]; indices[i] = ic->id; avtVector point = ic->GetEndPoint(); points[j+0] = point[0]; points[j+1] = point[1]; points[j+2] = point[2]; if( doPathlines ) times[i] = ic->GetTime() - seedTime0; else times[i] = ic->GetTime(); } int* all_indices = 0; int* index_counts = 0; double* all_points = 0; int *point_counts = 0; double* all_times = 0; int *time_counts = 0; Barrier(); CollectIntArraysOnRootProc(all_indices, index_counts, &indices.front(), (int)indices.size()); CollectDoubleArraysOnRootProc(all_points, point_counts, &points.front(), (int)points.size()); CollectDoubleArraysOnRootProc(all_times, time_counts, ×.front(), (int)times.size()); Barrier(); //root should now have index into global structure and all //matching end positions. if(PAR_Rank() != 0) { return true; } else { //variable name. std::string var = outVarRoot + outVarName; //now global grid has been created. if( fsle_ds == 0 ) fsle_ds = CreateIterativeCalcDataSet(); // Get the stored data arrays vtkDoubleArray *exponents = (vtkDoubleArray *) fsle_ds->GetPointData()->GetArray(var.c_str()); vtkDoubleArray *component = (vtkDoubleArray *) fsle_ds->GetPointData()->GetArray("component"); vtkDoubleArray *times = (vtkDoubleArray *) fsle_ds->GetPointData()->GetArray("times"); size_t nTuples = exponents->GetNumberOfTuples(); // Storage for the points and times std::vector<avtVector> remapPoints(nTuples); std::vector<double> remapTimes(nTuples); //update remapPoints with new value bounds from integral curves. int par_size = PAR_Size(); size_t total = 0; for(int i = 0; i < par_size; ++i) { if(index_counts[i]*3 != point_counts[i] || index_counts[i] != time_counts[i]) { EXCEPTION1(VisItException, "Index count does not the result count." ); } total += index_counts[i]; } for(size_t j=0, k=0; j<total; ++j, k+=3) { size_t index = all_indices[j]; if(nTuples <= index) { EXCEPTION1(VisItException, "More integral curves were generatated than " "grid points." ); } remapPoints[index].set( all_points[k+0], all_points[k+1], all_points[k+2]); remapTimes[index] = all_times[j]; } // Store the times for the exponent. for(size_t l=0; l<nTuples; ++l) times->SetTuple1(l, remapTimes[l]); //use static function in avtGradientExpression to calculate //gradients. since this function only does scalar, break our //vectors into scalar components and calculate one at a time. vtkDataArray* jacobian[3]; for(int i = 0; i < 3; ++i) { // Store the point component by component for(size_t l=0; l<nTuples; ++l) component->SetTuple1(l, remapPoints[l][i]); jacobian[i] = avtGradientExpression::CalculateGradient(fsle_ds, "component"); } for (size_t i = 0; i < nTuples; i++) component->SetTuple1(i, std::numeric_limits<double>::epsilon()); //now have the jacobian - 3 arrays with 3 components. ComputeLyapunovExponent(jacobian, component); jacobian[0]->Delete(); jacobian[1]->Delete(); jacobian[2]->Delete(); // Compute the FSLE ComputeFSLE( component, times, exponents ); bool haveAllExponents = true; // For each integral curve check it's mask value to see it // additional integration is required. // ARS - FIX ME not parallelized!!!!!!!! for(size_t i=0; i<ics.size(); ++i) { avtStreamlineIC * ic = (avtStreamlineIC *) ics[i]; int ms = ic->GetMaxSteps(); if( ms < maxSteps ) { ic->SetMaxSteps(ms+1); ic->status.ClearTerminationMet(); } size_t l = ic->id; // The curve id is the index into the VTK data. // Check to see if all exponents have been found. if( exponents->GetTuple1(l) == std::numeric_limits<double>::min() && ms < maxSteps ) haveAllExponents = false; } //cleanup. if (all_indices) delete [] all_indices; if (index_counts) delete [] index_counts; if (all_points) delete [] all_points; if (point_counts) delete [] point_counts; if (all_times) delete [] all_times; if (time_counts) delete [] time_counts; return haveAllExponents; } }
// **************************************************************************** // Function: VisIt_MPI_Bcast // // Purpose: A smarter broadcast that gives VisIt control over polling // behavior. MPI's Bcast method can wind up doing a 'spin-wait' eating cpu // resources. Our implementation, here, is both similar and different to // that. // // In our Bcast method, all processors (except for root) start by posting a // non-blocking receive. A non-blocking receive can be tested for completion // using MPI_Test. All processors (again except for root), enter a polling // loop calling MPI_Test to check to see if the receive completed. For the // first several seconds of inactivity, they poll as fast as they can (this // is equivalent to MPI's spin-wait behavior). However, after not too much // time, they begin to introduce delays, using nanosleep, into this polling // loop. The delays substantially reduce the load on the cpu. // // The broadcast itself is initiated at the root and proceeds in tree-like // fashion. The root sends a message to the highest power of 2 ranked // processor that is less than the communicator size. As that processor // completes its recieve, it exits from its polling loop and enters the // 're-send to other processors' phase of the broadcast. It begins to send // messages to other processors and, simultaneously, the root also continues // to send messages to other processors. More processors complete their // recieves and then send the message on to still other processors. This // continues in tree-like fashion based on the MPI rank of the processors // until everyone has finished sending messages to those they are responsible // for. As the process continues, all odd-numbered processors only ever // execute a receive. All even numbered processors execute the receive and // then a variable number of sends depending on their rank relative to the // next closest power-of-2. // // In the case of a 13 processor run, this is how the broadcast phase would // proceed... // // 1) P0->P8 // 2) PO->P4 P8->P12 // 3) P0->P2, P4->P6, P8->P10 // 4) P0->P1, P2->P3, P4->P5, P6->P7, P8->P9, P10->P11 // // The first thing each processor does is to compute who it will recieve // the message from. That is determined by zeroing the lowest-order '1' bit // in the binary expression of the processor's rank. The difference between // that processor and the executing processor is a power-of-2. The executing // processor then sends the message to all processors that are above it in // rank by all powers-of-2 between 1 and the difference less one power of 2. // // Programmer: Mark C. Miller // Creation: February 12, 2007 // // Modifications: // // Mark C. Miller, Wed Feb 14 14:36:11 PST 2007 // Added class statics to control behavior and fall back to MPI's Bcast // when we specify 0 sleep time. // **************************************************************************** int MPIXfer::VisIt_MPI_Bcast(void *buf, int count, MPI_Datatype datatype, int root, MPI_Comm comm) { // // Fall back to MPI broadcast if zero sleep time is specified // if (nanoSecsOfSleeps <= 0) { static bool first = true; if (first) debug5 << "Using MPI's Bcast; not VisIt_MPI_Bcast" << endl; first = false; MPI_Bcast(buf, count, datatype, root, comm); return 2; } int rank = PAR_Rank(); int size = PAR_Size(); MPI_Status mpiStatus; // // Make proc 0 the root if it isn't already // if (root != 0) { if (rank == root) // only original root does this MPI_Send(buf, count, datatype, 0, UI_BCAST_TAG, comm); if (rank == 0) // only new root (zero) does this MPI_Recv(buf, count, datatype, root, UI_BCAST_TAG, comm, &mpiStatus); root = 0; // everyone does this } // // Compute who the executing proc. will recieve its message from. // int srcProc = 0; for (int i = 0; i < 31; i++) { int mask = 0x00000001; int bit = (rank >> i) & mask; if (bit == 1) { int mask1 = ~(0x00000001 << i); srcProc = (rank & mask1); break; } } // // Polling Phase // if (rank != 0) { // // Everyone posts a non-blocking recieve // MPI_Request bcastRecv; MPI_Irecv(buf, count, datatype, srcProc, UI_BCAST_TAG, comm, &bcastRecv); // // Main polling loop // double startedIdlingAt = TOA_THIS_LINE; int mpiFlag; bool first = true; while (true) { // non-blocking test for recv completion MPI_Test(&bcastRecv, &mpiFlag, &mpiStatus); if (mpiFlag == 1) break; // // Note: We could add logic here to deal with engine idle timeout // instead of using the alarm mechanism we currently use. // // // Insert nanosleeps into the polling loop as determined by // amount of time we've been sitting here in this loop // double idleTime = TOA_THIS_LINE - startedIdlingAt; if (idleTime > secsOfSpinBeforeSleeps) { if (first) debug5 << "VisIt_MPI_Bcast started using " << nanoSecsOfSleeps / 1.0e9 << " seconds of nanosleep" << endl; first = false; #if defined(_WIN32) SleepEx((DWORD)(nanoSecsOfSleeps/1e6), false); #else struct timespec ts = {0, nanoSecsOfSleeps}; nanosleep(&ts, 0); #endif } } } // // Send on to other processors phase // // // Determine highest rank proc above the executing proc // that it is responsible to send a message to. // int deltaProc = (rank - srcProc) >> 1; if (rank == 0) { deltaProc = 1; while ((deltaProc << 1) < PAR_Size()) deltaProc = deltaProc << 1; } // // Send message to other procs the executing proc is responsible for // while (deltaProc > 0) { if (rank + deltaProc < size) MPI_Send(buf, count, datatype, rank + deltaProc, UI_BCAST_TAG, comm); deltaProc = deltaProc >> 1; } return 0; }
void avtQueryOverTimeFilter::CreateFinalOutput() { if (ParallelizingOverTime()) { double *totalQRes; int *qResMsgs; CollectDoubleArraysOnRootProc(totalQRes, qResMsgs, &(qRes[0]), qRes.size()); double *totalTimes; int *timesMsgs; CollectDoubleArraysOnRootProc(totalTimes, timesMsgs, &(times[0]), times.size()); if (PAR_Rank() == 0) { int i; int nResults = 0; int maxIterations = 0; for (i = 0 ; i < PAR_Size() ; i++) { nResults += timesMsgs[i]; maxIterations = (timesMsgs[i] > maxIterations ? timesMsgs[i] : maxIterations); } std::vector<double> finalQRes(nResults, 0.); std::vector<double> finalTimes(nResults, 0.); int index = 0; for (int j = 0 ; j < maxIterations ; j++) { int loc = 0; for (i = 0 ; i < PAR_Size() ; i++) { if (timesMsgs[i] > j) { finalQRes[index] = totalQRes[loc+j]; finalTimes[index] = totalTimes[loc+j]; index++; } loc += timesMsgs[i]; } } qRes = finalQRes; times = finalTimes; delete [] totalQRes; delete [] qResMsgs; delete [] totalTimes; delete [] timesMsgs; } else { SetOutputDataTree(new avtDataTree()); finalOutputCreated = true; return; } } if (qRes.size() == 0) { debug4 << "Query failed at all timesteps" << endl; avtCallback::IssueWarning("Query failed at all timesteps"); avtDataTree_p dummy = new avtDataTree(); SetOutputDataTree(dummy); return; } if (useTimeForXAxis && qRes.size()/nResultsToStore != times.size()) { debug4 << "QueryOverTime ERROR, number of results (" << qRes.size() << ") does not equal number " << "of timesteps (" << times.size() << ")." << endl; avtCallback::IssueWarning( "\nQueryOverTime error, number of results does not equal " "number of timestates. Curve being created may be missing " "some values. Please contact a VisIt developer."); } else if (nResultsToStore > 1 && qRes.size() % nResultsToStore != 0) { debug4 << "QueryOverTime ERROR, number of results (" << qRes.size() << ") is not a multiple of " << nResultsToStore << "and therefore cannot generate x,y pairs." << endl; avtCallback::IssueWarning( "\nQueryOverTime error, number of results is incorrect. " "Curve being created may be missing some values. " "Please contact a VisIt developer."); } if (skippedTimes.size() != 0) { std::ostringstream osm; osm << "\nQueryOverTime (" << atts.GetQueryAtts().GetName().c_str() << ") experienced\n" << "problems with the following timesteps and \n" << "skipped them while generating the curve:\n "; for (int j = 0; j < skippedTimes.size(); j++) osm << skippedTimes[j] << " "; osm << "\nLast message received: " << errorMessage.c_str() << ends; debug4 << osm.str() << endl; avtCallback::IssueWarning(osm.str().c_str()); } stringVector vars = atts.GetQueryAtts().GetVariables(); bool multiCurve = false; if (atts.GetQueryAtts().GetQueryInputParams().HasNumericEntry("curve_plot_type")) { multiCurve = (atts.GetQueryAtts().GetQueryInputParams().GetEntry("curve_plot_type")->ToInt() == 1); } avtDataTree_p tree = CreateTree(times, qRes, vars, multiCurve); SetOutputDataTree(tree); finalOutputCreated = true; }