void DOFVectorBase<double>::getD2AtQPs( const ElInfo* elInfo, const Quadrature* quad, const FastQuadrature* quadFast, DenseVector<D2Type<double>::type>& d2AtQPs) const { FUNCNAME("DOFVector<double>::getD2AtQPs()"); TEST_EXIT_DBG(quad || quadFast)("neither quad nor quadFast defined\n"); if (quad && quadFast) { TEST_EXIT_DBG(quad == quadFast->getQuadrature()) ("quad != quadFast->quadrature\n"); } TEST_EXIT_DBG(!quadFast || quadFast->getBasisFunctions() == feSpace->getBasisFcts()) ("invalid basis functions"); Element* el = elInfo->getElement(); int dow = Global::getGeo(WORLD); int nPoints = quadFast ? quadFast->getQuadrature()->getNumPoints() : quad->getNumPoints(); DenseVector<double> localVec(nBasFcts); getLocalVector(el, localVec); DimMat<double> D2Tmp(dim, dim, 0.0); int parts = Global::getGeo(PARTS, dim); const DimVec<WorldVector<double>>& grdLambda = elInfo->getGrdLambda(); d2AtQPs.change_dim(nPoints); if (quadFast) { for (int iq = 0; iq < nPoints; iq++) { for (int k = 0; k < parts; k++) for (int l = 0; l < parts; l++) D2Tmp[k][l] = 0.0; for (int i = 0; i < nBasFcts; i++) { for (int k = 0; k < parts; k++) for (int l = 0; l < parts; l++) D2Tmp[k][l] += localVec[i] * quadFast->getSecDer(iq, i, k, l); } for (int i = 0; i < dow; i++) for (int j = 0; j < dow; j++) { d2AtQPs[iq][i][j] = 0.0; for (int k = 0; k < parts; k++) for (int l = 0; l < parts; l++) d2AtQPs[iq][i][j] += grdLambda[k][i]*grdLambda[l][j]*D2Tmp[k][l]; } } } else { const BasisFunction* basFcts = feSpace->getBasisFcts(); DimMat<double> D2Phi(dim, dim); for (int iq = 0; iq < nPoints; iq++) { for (int k = 0; k < parts; k++) for (int l = 0; l < parts; l++) D2Tmp[k][l] = 0.0; for (int i = 0; i < nBasFcts; i++) { WARNING("not tested after index correction\n"); (*(basFcts->getD2Phi(i)))(quad->getLambda(iq), D2Phi); for (int k = 0; k < parts; k++) for (int l = 0; l < parts; l++) D2Tmp[k][l] += localVec[i] * D2Phi[k][l]; } for (int i = 0; i < dow; i++) for (int j = 0; j < dow; j++) { d2AtQPs[iq][i][j] = 0.0; for (int k = 0; k < parts; k++) for (int l = 0; l < parts; l++) d2AtQPs[iq][i][j] += grdLambda[k][i] * grdLambda[l][j] * D2Tmp[k][l]; } } } }
mlib_status mlib_ImageAffineTable_32ext( PARAMS_EXT) { DECLAREVAR; FP_TYPE buff_local[BUFF_SIZE], *buff = buff_local; FP_TYPE sat_off = SAT_OFF; mlib_s32 sbits, x_mask; mlib_s32 c2_flag = 0, c3_flag = 0; #ifndef SRC_EXTEND #if IMG_TYPE == 4 mlib_s32 align = (mlib_s32)lineAddr[0] | ws->srcStride; c2_flag = ((n & 1) | (m & 3) | (nchan & 1) | (align & 7)) == 0; c3_flag = (n & 1) == 0 && (m & 1) == 0 && (nchan == 3) && (type == 1); #endif /* IMG_TYPE == 4 */ #endif /* SRC_EXTEND */ if (type < 4) { #if IMG_TYPE == 4 b_step = (nchan == 4) ? 2 : nchan; max_xsize *= b_step; #ifdef MLIB_USE_FTOI_CLAMPING sat_off = -127.5; #else /* MLIB_USE_FTOI_CLAMPING */ sat_off = 0.5; #endif /* MLIB_USE_FTOI_CLAMPING */ #endif /* IMG_TYPE == 4 */ if (max_xsize > BUFF_SIZE) { buff = __mlib_malloc(max_xsize * sizeof (FP_TYPE)); if (buff == NULL) return (MLIB_FAILURE); } } #if FLT_BITS == 2 filterX = table->dataH_f32; filterY = table->dataV_f32; #else /* FLT_BITS == 2 */ filterX = table->dataH_d64; filterY = table->dataV_d64; #endif /* FLT_BITS == 2 */ DIST_BITS(); #ifndef SRC_EXTEND switch (nchan) { case 1: sbits = 0; break; case 2: sbits = 1; break; case 3: sbits = 0; break; case 4: sbits = 2; break; default: sbits = 0; break; } #else /* SRC_EXTEND */ sbits = 0; #endif /* SRC_EXTEND */ x_mask = ~((1 << sbits) - 1); x_shift -= sbits; ws->x_shift = x_shift; ws->x_mask = x_mask; ws->xf_shift = xf_shift; ws->xf_mask = xf_mask; ws->yf_shift = yf_shift; ws->yf_mask = yf_mask; for (j = yStart; j <= yFinish; j++) { old_size = size; CLIP(CHAN1); if (type < 4) { #if IMG_TYPE == 4 /* * u8 via F32 image */ if (c2_flag || c3_flag) b_step = (nchan == 4) ? 2 : nchan; else b_step = 1; #ifdef __SUNPRO_C #pragma pipeloop(0) #endif /* __SUNPRO_C */ for (i = b_step * old_size; i < b_step * size; i++) { buff[i] = sat_off; } #else /* IMG_TYPE == 4 */ /* * process by one channel */ b_step = 1; #ifdef __SUNPRO_C #pragma pipeloop(0) #endif /* __SUNPRO_C */ for (i = old_size; i < size; i++) { buff[i] = sat_off; } #endif /* IMG_TYPE == 4 */ } else { /* mlib_f32 types */ b_step = nchan; #ifdef __SUNPRO_C #pragma pipeloop(0) #endif /* __SUNPRO_C */ for (i = 0; i < size * nchan; i++) { dstPixelPtr[i] = (DTYPE) sat_off; } } ws->b_step = b_step; /* * move to kernel center */ x0 -= ws->x_move; y0 -= ws->y_move; ws->size = size; ws->x0 = x0; ws->y0 = y0; for (k = 0; k < nchan; k++) { #if IMG_TYPE < 4 DTYPE *dPtr = dstPixelPtr + k; #endif /* IMG_TYPE < 4 */ if (c2_flag && (k & 1)) continue; if (c3_flag && k) continue; ws->k = k; if (type >= 4) { buff = (void *)(dstPixelPtr + k); } for (l = 0; l < n; l += kh) { /* kernel lines */ kh = n - l; if (kh >= 4 && (m & 3) == 0 && !(c2_flag | c3_flag)) kh = 4; else if (kh >= 2) kh = 2; for (off = 0; off < m; off += kw) { /* offset in current kernel line */ ws->x0 = x0 + (off << (x_shift + sbits)); kw = m - off; if (kw > 2 * MAX_KER) kw = MAX_KER; else if (kw > MAX_KER) kw = kw / 2; #ifndef SRC_EXTEND #if IMG_TYPE == 4 if (c3_flag) { kw = 2; FUNCNAME(c3_2_2) (buff, filterX + off, filterY + l, lineAddr + l, ws); continue; } if (c2_flag) { if (nchan == 2) { FUNCNAME(c2_2_4) (buff, filterX + off, filterY + l, lineAddr + l, ws); } else { FUNCNAME(c4_2_4) (buff, filterX + off, filterY + l, lineAddr + l, ws); } } else #endif /* IMG_TYPE == 4 */ #endif /* SRC_EXTEND */ CALL_FUNC(32); } } #if IMG_TYPE < 4 #ifdef __SUNPRO_C #pragma pipeloop(0) #endif /* __SUNPRO_C */ for (i = 0; i < size; i++) { FP_TYPE val = buff[i]; #if IMG_TYPE < 3 && defined(MLIB_USE_FTOI_CLAMPING) mlib_s32 ival; #endif /* IMG_TYPE < 3 && defined(MLIB_USE_FTOI_CLAMPING) */ SAT(dPtr[i * nchan], ival, val); buff[i] = sat_off; } #endif /* IMG_TYPE < 4 */ #if IMG_TYPE == 4 if (type == 1) { mlib_u8 *dp = (mlib_u8 *)dstData + nchan * xLeft + k; if (c3_flag) { #ifdef __SUNPRO_C #pragma pipeloop(0) #endif /* __SUNPRO_C */ for (i = 0; i < 3 * size; i++) { FP_TYPE val = (FP_TYPE) buff[i]; #ifdef MLIB_USE_FTOI_CLAMPING mlib_s32 ival; #endif /* MLIB_USE_FTOI_CLAMPING */ SAT8(dp[i], ival, val); buff[i] = sat_off; } } else if (c2_flag) { #ifdef __SUNPRO_C #pragma pipeloop(0) #endif /* __SUNPRO_C */ for (i = 0; i < size; i++) { FP_TYPE val0 = (FP_TYPE) buff[2 * i]; FP_TYPE val1 = (FP_TYPE) buff[2 * i + 1]; #ifdef MLIB_USE_FTOI_CLAMPING mlib_s32 ival0, ival1; #endif /* MLIB_USE_FTOI_CLAMPING */ SAT8(dp[i * nchan], ival0, val0); SAT8(dp[i * nchan + 1], ival1, val1); buff[2 * i] = sat_off; buff[2 * i + 1] = sat_off; } } else { #ifdef __SUNPRO_C #pragma pipeloop(0) #endif /* __SUNPRO_C */ for (i = 0; i < size; i++) { FP_TYPE val = (FP_TYPE) buff[i]; #ifdef MLIB_USE_FTOI_CLAMPING mlib_s32 ival; #endif /* MLIB_USE_FTOI_CLAMPING */ SAT8(dp[i * nchan], ival, val); buff[i] = sat_off; } } } #endif /* IMG_TYPE == 4 */ } } if (type < 4) { if (buff != buff_local) __mlib_free(buff); } return (MLIB_SUCCESS); }
ParMetisMesh::ParMetisMesh(Mesh* mesh, MPI::Intracomm* comm, std::map<int, bool>& elementInRank, DofMap* mapLocalGlobal) : dim(mesh->getDim()), nElements(0), mpiComm(comm) { FUNCNAME("ParMetisMesh::ParMetisMesh()"); int mpiSize = mpiComm->Get_size(); int elementCounter = 0; int dow = Global::getGeo(WORLD); TraverseStack stack; ElInfo* elInfo = stack.traverseFirst(mesh, 0, Mesh::CALL_EL_LEVEL); while (elInfo) { if (elementInRank[elInfo->getElement()->getIndex()]) elementCounter++; elInfo = stack.traverseNext(elInfo); } nElements = elementCounter; TEST_EXIT(nElements > 0)("No elements in ParMETIS mesh!\n"); // allocate memory eptr = new int[nElements + 1]; eind = new int[nElements * (mesh->getGeo(VERTEX))]; elmdist = new int[mpiSize + 1]; elem_p2a = new int[nElements]; if (dim == dow) xyz = new float[nElements * dim]; else xyz = NULL; eptr[0] = 0; int* ptr_eptr = eptr + 1; int* ptr_eind = eind; float* ptr_xyz = xyz; // gather element numbers and create elmdist mpiComm->Allgather(&nElements, 1, MPI_INT, elmdist + 1, 1, MPI_INT); elmdist[0] = 0; for (int i = 2; i < mpiSize + 1; i++) elmdist[i] += elmdist[i - 1]; // traverse mesh and fill distributed ParMETIS data DimVec<double> bary(dim, 1.0 / mesh->getGeo(VERTEX)); WorldVector<double> world; elementCounter = 0; int nodeCounter = 0; elInfo = stack.traverseFirst(mesh, 0, Mesh::CALL_EL_LEVEL | Mesh::FILL_COORDS); while (elInfo) { Element* element = elInfo->getElement(); int index = element->getIndex(); // if element in partition if (elementInRank[index]) { // remember index setParMetisIndex(index, elementCounter); setAMDiSIndex(elementCounter, index); // write eptr entry nodeCounter += mesh->getGeo(VERTEX); *ptr_eptr = nodeCounter; ptr_eptr++; // write eind entries (element nodes) for (int i = 0; i < dim + 1; i++) { if (mapLocalGlobal) *ptr_eind = (*mapLocalGlobal)[element->getDof(i, 0)].global; else *ptr_eind = element->getDof(i, 0); ptr_eind++; } // write xyz element coordinates if (ptr_xyz) { elInfo->coordToWorld(bary, world); for (int i = 0; i < dim; i++) { *ptr_xyz = static_cast<float>(world[i]); ptr_xyz++; } } elementCounter++; } elInfo = stack.traverseNext(elInfo); } }
bool ParMetisPartitioner::distributePartitioning(int* part) { FUNCNAME("ParMetisPartitioner::distributePartitioning()"); int mpiSize = mpiComm->Get_size(); int mpiRank = mpiComm->Get_rank(); int nElements = parMetisMesh->getNumElements(); // nPartitionElements[i] is the number of elements for the i-th partition int* nPartitionElements = new int[mpiSize]; for (int i = 0; i < mpiSize; i++) nPartitionElements[i] = 0; for (int i = 0; i < nElements; i++) nPartitionElements[part[i]]++; // collect number of partition elements from all ranks for this rank int* nRankElements = new int[mpiSize]; mpiComm->Alltoall(nPartitionElements, 1, MPI_INT, nRankElements, 1, MPI_INT); // sum up partition elements over all ranks int* sumPartitionElements = new int[mpiSize]; mpiComm->Allreduce(nPartitionElements, sumPartitionElements, mpiSize, MPI_INT, MPI_SUM); // Test if there exists an empty partition bool emptyPartition = false; for (int i = 0; i < mpiSize; i++) emptyPartition |= (sumPartitionElements[i] == 0); if (emptyPartition) return false; // prepare distribution (fill partitionElements with AMDiS indices) int* bufferOffset = new int[mpiSize]; bufferOffset[0] = 0; for (int i = 1; i < mpiSize; i++) bufferOffset[i] = bufferOffset[i - 1] + nPartitionElements[i - 1]; int* partitionElements = new int[nElements]; int** partitionPtr = new int* [mpiSize]; for (int i = 0; i < mpiSize; i++) partitionPtr[i] = partitionElements + bufferOffset[i]; sendElements.clear(); for (int i = 0; i < nElements; i++) { int partition = part[i]; int amdisIndex = parMetisMesh->getAMDiSIndex(i); if (partition != mpiRank) sendElements[partition].push_back(amdisIndex); *(partitionPtr[partition]) = amdisIndex; ++(partitionPtr[partition]); } // all to all: partition elements to rank elements int* rankElements = new int[sumPartitionElements[mpiRank]]; int* recvBufferOffset = new int[mpiSize]; recvBufferOffset[0] = 0; for (int i = 1; i < mpiSize; i++) recvBufferOffset[i] = recvBufferOffset[i - 1] + nRankElements[i - 1]; mpiComm->Alltoallv(partitionElements, nPartitionElements, bufferOffset, MPI_INT, rankElements, nRankElements, recvBufferOffset, MPI_INT); TEST_EXIT(elementInRank.size() != 0)("Should not happen!\n"); for (map<int, bool>::iterator it = elementInRank.begin(); it != elementInRank.end(); ++it) elementInRank[it->first] = false; // Create map which stores for each element index on macro level // if the element is in the partition of this rank. recvElements.clear(); for (int i = 0; i < mpiSize; i++) { int* rankStart = rankElements + recvBufferOffset[i]; int* rankEnd = rankStart + nRankElements[i]; for (int* rankPtr = rankStart; rankPtr < rankEnd; ++rankPtr) { elementInRank[*rankPtr] = true; if (i != mpiRank) recvElements[i].push_back(*rankPtr); } } delete parMetisMesh; parMetisMesh = NULL; delete [] rankElements; delete [] nPartitionElements; delete [] nRankElements; delete [] sumPartitionElements; delete [] partitionElements; delete [] partitionPtr; delete [] bufferOffset; delete [] recvBufferOffset; return true; }
Pix* convertTo8(Pix* pix) { FUNCNAME("convertTo8"); return pixConvertTo8(pix, FALSE); }
bool ParMetisPartitioner::partition(map<int, double>& elemWeights, PartitionMode mode) { FUNCNAME("ParMetisPartitioner::partition()"); int mpiSize = mpiComm->Get_size(); // === Create parmetis mesh === if (parMetisMesh) delete parMetisMesh; TEST_EXIT_DBG(elementInRank.size() != 0)("Should not happen!\n"); parMetisMesh = new ParMetisMesh(mesh, mpiComm, elementInRank, mapLocalGlobal); int nElements = parMetisMesh->getNumElements(); // === Create weight array === vector<int> wgts(nElements); vector<float> floatWgts(nElements); unsigned int floatWgtsPos = 0; float maxWgt = 0.0; TraverseStack stack; ElInfo* elInfo = stack.traverseFirst(mesh, 0, Mesh::CALL_EL_LEVEL); while (elInfo) { int index = elInfo->getElement()->getIndex(); if (elementInRank[index]) { // get weight float wgt = static_cast<float>(elemWeights[index]); maxWgt = std::max(wgt, maxWgt); // write float weight TEST_EXIT_DBG(floatWgtsPos < floatWgts.size())("Should not happen!\n"); floatWgts[floatWgtsPos++] = wgt; } elInfo = stack.traverseNext(elInfo); } TEST_EXIT_DBG(floatWgtsPos == floatWgts.size())("Should not happen!\n"); float tmp; mpiComm->Allreduce(&maxWgt, &tmp, 1, MPI_FLOAT, MPI_MAX); maxWgt = tmp; // === Create dual graph === ParMetisGraph parMetisGraph(parMetisMesh, mpiComm); // === Partitioning of dual graph === int wgtflag = 2; // weights at vertices only! int numflag = 0; // c numbering style! int ncon = 1; // one weight at each vertex! int nparts = mpiSize; // number of partitions vector<double> tpwgts(mpiSize); double ubvec = 1.05; int options[4] = {0, 0, 15, PARMETIS_PSR_COUPLED}; // default options int edgecut = -1; vector<int> part(nElements); // set tpwgts for (int i = 0; i < mpiSize; i++) tpwgts[i] = 1.0 / static_cast<double>(nparts); // float scale = 10000.0 / maxWgt; for (int i = 0; i < nElements; i++) wgts[i] = floatWgts[i]; // wgts[i] = static_cast<int>(floatWgts[i] * scale); // === Start ParMETIS. === MPI_Comm tmpComm = MPI_Comm(*mpiComm); switch (mode) { case INITIAL: ParMETIS_V3_PartKway(parMetisMesh->getElementDist(), parMetisGraph.getXAdj(), parMetisGraph.getAdjncy(), &(wgts[0]), NULL, &wgtflag, &numflag, &ncon, &nparts, &(tpwgts[0]), &ubvec, options, &edgecut, &(part[0]), &tmpComm); break; case ADAPTIVE_REPART: { vector<int> vsize(nElements); for (int i = 0; i < nElements; i++) vsize[i] = static_cast<int>(floatWgts[i]); ParMETIS_V3_AdaptiveRepart(parMetisMesh->getElementDist(), parMetisGraph.getXAdj(), parMetisGraph.getAdjncy(), &(wgts[0]), NULL, &(vsize[0]), &wgtflag, &numflag, &ncon, &nparts, &(tpwgts[0]), &ubvec, &itr, options, &edgecut, &(part[0]), &tmpComm); } break; case REFINE_PART: ParMETIS_V3_RefineKway(parMetisMesh->getElementDist(), parMetisGraph.getXAdj(), parMetisGraph.getAdjncy(), &(wgts[0]), NULL, &wgtflag, &numflag, &ncon, &nparts, &(tpwgts[0]), &ubvec, options, &edgecut, &(part[0]), &tmpComm); break; default: ERROR_EXIT("unknown partitioning mode\n"); } // === Distribute new partition data. === return distributePartitioning(&(part[0])); }
Pix* invert(Pix* pix){ FUNCNAME("invert"); pixInvert(pix,pix); return pixClone(pix); }
Pix* edgeDetect(Pix* pix){ FUNCNAME("edgeDetect"); PixEdgeDetector edgeDetector; return edgeDetector.makeEdges(pix); }
Pix* reduceGray4(Pix* pix){ FUNCNAME("reduceGray4"); return pixScaleSmooth(pix, 0.25, 0.25); }
Pix* reduceGray2(Pix* pix){ FUNCNAME("reduceGray2"); return pixScaleSmooth(pix, 0.5, 0.5); }
Pix* savGol32(Pix* pix){ FUNCNAME("savGol32"); Pix* result = pixSavGolFilter(pix, 3, 2, 2); pixCopyResolution(result, pix); return result; }