Beispiel #1
0
  void DOFVectorBase<double>::getD2AtQPs( const ElInfo* elInfo,
                                          const Quadrature* quad,
                                          const FastQuadrature* quadFast,
                                          DenseVector<D2Type<double>::type>& d2AtQPs) const
  {
    FUNCNAME("DOFVector<double>::getD2AtQPs()");

    TEST_EXIT_DBG(quad || quadFast)("neither quad nor quadFast defined\n");

    if (quad && quadFast)
    {
      TEST_EXIT_DBG(quad == quadFast->getQuadrature())
      ("quad != quadFast->quadrature\n");
    }

    TEST_EXIT_DBG(!quadFast || quadFast->getBasisFunctions() == feSpace->getBasisFcts())
    ("invalid basis functions");

    Element* el = elInfo->getElement();

    int dow = Global::getGeo(WORLD);
    int nPoints = quadFast ? quadFast->getQuadrature()->getNumPoints() : quad->getNumPoints();

    DenseVector<double> localVec(nBasFcts);
    getLocalVector(el, localVec);

    DimMat<double> D2Tmp(dim, dim, 0.0);
    int parts = Global::getGeo(PARTS, dim);
    const DimVec<WorldVector<double>>& grdLambda = elInfo->getGrdLambda();

    d2AtQPs.change_dim(nPoints);
    if (quadFast)
    {
      for (int iq = 0; iq < nPoints; iq++)
      {
        for (int k = 0; k < parts; k++)
          for (int l = 0; l < parts; l++)
            D2Tmp[k][l] = 0.0;

        for (int i = 0; i < nBasFcts; i++)
        {
          for (int k = 0; k < parts; k++)
            for (int l = 0; l < parts; l++)
              D2Tmp[k][l] += localVec[i] * quadFast->getSecDer(iq, i, k, l);
        }

        for (int i = 0; i < dow; i++)
          for (int j = 0; j < dow; j++)
          {
            d2AtQPs[iq][i][j] = 0.0;
            for (int k = 0; k < parts; k++)
              for (int l = 0; l < parts; l++)
                d2AtQPs[iq][i][j] += grdLambda[k][i]*grdLambda[l][j]*D2Tmp[k][l];
          }
      }
    }
    else
    {
      const BasisFunction* basFcts = feSpace->getBasisFcts();
      DimMat<double> D2Phi(dim, dim);

      for (int iq = 0; iq < nPoints; iq++)
      {
        for (int k = 0; k < parts; k++)
          for (int l = 0; l < parts; l++)
            D2Tmp[k][l] = 0.0;

        for (int i = 0; i < nBasFcts; i++)
        {
          WARNING("not tested after index correction\n");
          (*(basFcts->getD2Phi(i)))(quad->getLambda(iq), D2Phi);

          for (int k = 0; k < parts; k++)
            for (int l = 0; l < parts; l++)
              D2Tmp[k][l] += localVec[i] * D2Phi[k][l];
        }

        for (int i = 0; i < dow; i++)
          for (int j = 0; j < dow; j++)
          {
            d2AtQPs[iq][i][j] = 0.0;
            for (int k = 0; k < parts; k++)
              for (int l = 0; l < parts; l++)
                d2AtQPs[iq][i][j] += grdLambda[k][i] * grdLambda[l][j] * D2Tmp[k][l];
          }
      }
    }
  }
mlib_status
mlib_ImageAffineTable_32ext(
    PARAMS_EXT)
{
	DECLAREVAR;
	FP_TYPE buff_local[BUFF_SIZE], *buff = buff_local;
	FP_TYPE sat_off = SAT_OFF;
	mlib_s32 sbits, x_mask;
	mlib_s32 c2_flag = 0, c3_flag = 0;

#ifndef SRC_EXTEND
#if IMG_TYPE == 4
	mlib_s32 align = (mlib_s32)lineAddr[0] | ws->srcStride;

	c2_flag = ((n & 1) | (m & 3) | (nchan & 1) | (align & 7)) == 0;
	c3_flag = (n & 1) == 0 && (m & 1) == 0 && (nchan == 3) && (type == 1);
#endif /* IMG_TYPE == 4 */
#endif /* SRC_EXTEND */

	if (type < 4) {
#if IMG_TYPE == 4
		b_step = (nchan == 4) ? 2 : nchan;
		max_xsize *= b_step;
#ifdef MLIB_USE_FTOI_CLAMPING
		sat_off = -127.5;
#else /* MLIB_USE_FTOI_CLAMPING */
		sat_off = 0.5;
#endif /* MLIB_USE_FTOI_CLAMPING */
#endif /* IMG_TYPE == 4 */

		if (max_xsize > BUFF_SIZE) {
			buff = __mlib_malloc(max_xsize * sizeof (FP_TYPE));

			if (buff == NULL)
				return (MLIB_FAILURE);
		}
	}
#if FLT_BITS == 2
	filterX = table->dataH_f32;
	filterY = table->dataV_f32;
#else /* FLT_BITS == 2 */
	filterX = table->dataH_d64;
	filterY = table->dataV_d64;
#endif /* FLT_BITS == 2 */

	DIST_BITS();

#ifndef SRC_EXTEND
	switch (nchan) {
	    case 1:
		    sbits = 0;
		    break;
	    case 2:
		    sbits = 1;
		    break;
	    case 3:
		    sbits = 0;
		    break;
	    case 4:
		    sbits = 2;
		    break;
	    default:
		    sbits = 0;
		    break;
	}

#else /* SRC_EXTEND */
	sbits = 0;
#endif /* SRC_EXTEND */
	x_mask = ~((1 << sbits) - 1);
	x_shift -= sbits;

	ws->x_shift = x_shift;
	ws->x_mask = x_mask;
	ws->xf_shift = xf_shift;
	ws->xf_mask = xf_mask;
	ws->yf_shift = yf_shift;
	ws->yf_mask = yf_mask;

	for (j = yStart; j <= yFinish; j++) {
		old_size = size;
		CLIP(CHAN1);

		if (type < 4) {
#if IMG_TYPE == 4
/*
 * u8 via F32 image
 */

			if (c2_flag || c3_flag)
				b_step = (nchan == 4) ? 2 : nchan;
			else
				b_step = 1;

#ifdef __SUNPRO_C
#pragma pipeloop(0)
#endif /* __SUNPRO_C */
			for (i = b_step * old_size; i < b_step * size; i++) {
				buff[i] = sat_off;
			}

#else /* IMG_TYPE == 4 */
/*
 * process by one channel
 */
			b_step = 1;
#ifdef __SUNPRO_C
#pragma pipeloop(0)
#endif /* __SUNPRO_C */
			for (i = old_size; i < size; i++) {
				buff[i] = sat_off;
			}

#endif /* IMG_TYPE == 4 */
		} else {
/* mlib_f32 types */
			b_step = nchan;
#ifdef __SUNPRO_C
#pragma pipeloop(0)
#endif /* __SUNPRO_C */
			for (i = 0; i < size * nchan; i++) {
				dstPixelPtr[i] = (DTYPE) sat_off;
			}
		}

		ws->b_step = b_step;

/*
 * move to kernel center
 */
		x0 -= ws->x_move;
		y0 -= ws->y_move;

		ws->size = size;
		ws->x0 = x0;
		ws->y0 = y0;

		for (k = 0; k < nchan; k++) {
#if IMG_TYPE < 4
			DTYPE *dPtr = dstPixelPtr + k;
#endif /* IMG_TYPE < 4 */

			if (c2_flag && (k & 1))
				continue;

			if (c3_flag && k)
				continue;

			ws->k = k;

			if (type >= 4) {
				buff = (void *)(dstPixelPtr + k);
			}

			for (l = 0; l < n; l += kh) {
/* kernel lines */
				kh = n - l;

				if (kh >= 4 && (m & 3) == 0 &&
				    !(c2_flag | c3_flag))
					kh = 4;
				else if (kh >= 2)
					kh = 2;

				for (off = 0; off < m; off += kw) {
/* offset in current kernel line */

					ws->x0 =
					    x0 + (off << (x_shift + sbits));

					kw = m - off;

					if (kw > 2 * MAX_KER)
						kw = MAX_KER;
					else if (kw > MAX_KER)
						kw = kw / 2;

#ifndef SRC_EXTEND
#if IMG_TYPE == 4

					if (c3_flag) {
						kw = 2;
						FUNCNAME(c3_2_2)
						    (buff, filterX + off,
						    filterY + l, lineAddr + l,
						    ws);
						continue;
					}

					if (c2_flag) {
						if (nchan == 2) {
							FUNCNAME(c2_2_4)
							    (buff,
							    filterX + off,
							    filterY + l,
							    lineAddr + l, ws);
						} else {
							FUNCNAME(c4_2_4)
							    (buff,
							    filterX + off,
							    filterY + l,
							    lineAddr + l, ws);
						}
					} else
#endif /* IMG_TYPE == 4 */
#endif /* SRC_EXTEND */

						CALL_FUNC(32);
				}
			}

#if IMG_TYPE < 4
#ifdef __SUNPRO_C
#pragma pipeloop(0)
#endif /* __SUNPRO_C */
			for (i = 0; i < size; i++) {
				FP_TYPE val = buff[i];

#if IMG_TYPE < 3 && defined(MLIB_USE_FTOI_CLAMPING)
				mlib_s32 ival;
#endif /* IMG_TYPE < 3 && defined(MLIB_USE_FTOI_CLAMPING) */

				SAT(dPtr[i * nchan], ival, val);

				buff[i] = sat_off;
			}

#endif /* IMG_TYPE < 4 */

#if IMG_TYPE == 4

			if (type == 1) {
				mlib_u8 *dp =
				    (mlib_u8 *)dstData + nchan * xLeft + k;

				if (c3_flag) {
#ifdef __SUNPRO_C
#pragma pipeloop(0)
#endif /* __SUNPRO_C */
					for (i = 0; i < 3 * size; i++) {
						FP_TYPE val = (FP_TYPE) buff[i];

#ifdef MLIB_USE_FTOI_CLAMPING
						mlib_s32 ival;
#endif /* MLIB_USE_FTOI_CLAMPING */
						SAT8(dp[i], ival, val);

						buff[i] = sat_off;
					}
				} else if (c2_flag) {
#ifdef __SUNPRO_C
#pragma pipeloop(0)
#endif /* __SUNPRO_C */
					for (i = 0; i < size; i++) {
						FP_TYPE val0 =
						    (FP_TYPE) buff[2 * i];
						FP_TYPE val1 =
						    (FP_TYPE) buff[2 * i + 1];
#ifdef MLIB_USE_FTOI_CLAMPING
						mlib_s32 ival0, ival1;
#endif /* MLIB_USE_FTOI_CLAMPING */
						SAT8(dp[i * nchan], ival0,
						    val0);
						SAT8(dp[i * nchan + 1], ival1,
						    val1);

						buff[2 * i] = sat_off;
						buff[2 * i + 1] = sat_off;
					}
				} else {
#ifdef __SUNPRO_C
#pragma pipeloop(0)
#endif /* __SUNPRO_C */
					for (i = 0; i < size; i++) {
						FP_TYPE val = (FP_TYPE) buff[i];

#ifdef MLIB_USE_FTOI_CLAMPING
						mlib_s32 ival;
#endif /* MLIB_USE_FTOI_CLAMPING */
						SAT8(dp[i * nchan], ival, val);

						buff[i] = sat_off;
					}
				}
			}
#endif /* IMG_TYPE == 4 */
		}
	}

	if (type < 4) {
		if (buff != buff_local)
			__mlib_free(buff);
	}

	return (MLIB_SUCCESS);
}
    ParMetisMesh::ParMetisMesh(Mesh* mesh, MPI::Intracomm* comm,
                               std::map<int, bool>& elementInRank,
                               DofMap* mapLocalGlobal)
      : dim(mesh->getDim()),
        nElements(0),
        mpiComm(comm)
    {
      FUNCNAME("ParMetisMesh::ParMetisMesh()");

      int mpiSize = mpiComm->Get_size();
      int elementCounter = 0;
      int dow = Global::getGeo(WORLD);

      TraverseStack stack;
      ElInfo* elInfo = stack.traverseFirst(mesh, 0, Mesh::CALL_EL_LEVEL);
      while (elInfo)
      {
        if (elementInRank[elInfo->getElement()->getIndex()])
          elementCounter++;

        elInfo = stack.traverseNext(elInfo);
      }

      nElements = elementCounter;

      TEST_EXIT(nElements > 0)("No elements in ParMETIS mesh!\n");

      // allocate memory
      eptr = new int[nElements + 1];
      eind = new int[nElements * (mesh->getGeo(VERTEX))];
      elmdist = new int[mpiSize + 1];
      elem_p2a = new int[nElements];

      if (dim == dow)
        xyz = new float[nElements * dim];
      else
        xyz = NULL;

      eptr[0] = 0;

      int* ptr_eptr = eptr + 1;
      int* ptr_eind = eind;
      float* ptr_xyz = xyz;

      // gather element numbers and create elmdist
      mpiComm->Allgather(&nElements, 1, MPI_INT, elmdist + 1, 1, MPI_INT);

      elmdist[0] = 0;
      for (int i = 2; i < mpiSize + 1; i++)
        elmdist[i] += elmdist[i - 1];

      // traverse mesh and fill distributed ParMETIS data
      DimVec<double> bary(dim, 1.0 / mesh->getGeo(VERTEX));
      WorldVector<double> world;

      elementCounter = 0;
      int nodeCounter = 0;

      elInfo = stack.traverseFirst(mesh, 0, Mesh::CALL_EL_LEVEL | Mesh::FILL_COORDS);
      while (elInfo)
      {
        Element* element = elInfo->getElement();
        int index = element->getIndex();

        // if element in partition
        if (elementInRank[index])
        {
          // remember index
          setParMetisIndex(index, elementCounter);
          setAMDiSIndex(elementCounter, index);

          // write eptr entry
          nodeCounter += mesh->getGeo(VERTEX);
          *ptr_eptr = nodeCounter;
          ptr_eptr++;

          // write eind entries (element nodes)
          for (int i = 0; i < dim + 1; i++)
          {
            if (mapLocalGlobal)
              *ptr_eind = (*mapLocalGlobal)[element->getDof(i, 0)].global;
            else
              *ptr_eind = element->getDof(i, 0);

            ptr_eind++;
          }

          // write xyz element coordinates
          if (ptr_xyz)
          {
            elInfo->coordToWorld(bary, world);
            for (int i = 0; i < dim; i++)
            {
              *ptr_xyz = static_cast<float>(world[i]);
              ptr_xyz++;
            }
          }

          elementCounter++;
        }

        elInfo = stack.traverseNext(elInfo);
      }
    }
    bool ParMetisPartitioner::distributePartitioning(int* part)
    {
      FUNCNAME("ParMetisPartitioner::distributePartitioning()");

      int mpiSize = mpiComm->Get_size();
      int mpiRank = mpiComm->Get_rank();
      int nElements = parMetisMesh->getNumElements();

      // nPartitionElements[i] is the number of elements for the i-th partition
      int* nPartitionElements = new int[mpiSize];
      for (int i = 0; i < mpiSize; i++)
        nPartitionElements[i] = 0;
      for (int i = 0; i < nElements; i++)
        nPartitionElements[part[i]]++;

      // collect number of partition elements from all ranks for this rank
      int* nRankElements = new int[mpiSize];
      mpiComm->Alltoall(nPartitionElements, 1, MPI_INT, nRankElements, 1, MPI_INT);


      // sum up partition elements over all ranks
      int* sumPartitionElements = new int[mpiSize];
      mpiComm->Allreduce(nPartitionElements, sumPartitionElements, mpiSize,
                         MPI_INT, MPI_SUM);

      // Test if there exists an empty partition
      bool emptyPartition = false;
      for (int i = 0; i < mpiSize; i++)
        emptyPartition |= (sumPartitionElements[i] == 0);

      if (emptyPartition)
        return false;

      // prepare distribution (fill partitionElements with AMDiS indices)
      int* bufferOffset = new int[mpiSize];
      bufferOffset[0] = 0;
      for (int i = 1; i < mpiSize; i++)
        bufferOffset[i] = bufferOffset[i - 1] + nPartitionElements[i - 1];

      int* partitionElements = new int[nElements];
      int** partitionPtr = new int* [mpiSize];

      for (int i = 0; i < mpiSize; i++)
        partitionPtr[i] = partitionElements + bufferOffset[i];

      sendElements.clear();
      for (int i = 0; i < nElements; i++)
      {
        int partition = part[i];
        int amdisIndex = parMetisMesh->getAMDiSIndex(i);

        if (partition != mpiRank)
          sendElements[partition].push_back(amdisIndex);

        *(partitionPtr[partition]) = amdisIndex;
        ++(partitionPtr[partition]);
      }

      // all to all: partition elements to rank elements
      int* rankElements = new int[sumPartitionElements[mpiRank]];
      int* recvBufferOffset = new int[mpiSize];
      recvBufferOffset[0] = 0;
      for (int i = 1; i < mpiSize; i++)
        recvBufferOffset[i] = recvBufferOffset[i - 1] + nRankElements[i - 1];

      mpiComm->Alltoallv(partitionElements,
                         nPartitionElements,
                         bufferOffset,
                         MPI_INT,
                         rankElements,
                         nRankElements,
                         recvBufferOffset,
                         MPI_INT);

      TEST_EXIT(elementInRank.size() != 0)("Should not happen!\n");
      for (map<int, bool>::iterator it = elementInRank.begin();
           it != elementInRank.end(); ++it)
        elementInRank[it->first] = false;

      // Create map which stores for each element index on macro level
      // if the element is in the partition of this rank.
      recvElements.clear();
      for (int i = 0; i < mpiSize; i++)
      {
        int* rankStart = rankElements + recvBufferOffset[i];
        int* rankEnd = rankStart + nRankElements[i];
        for (int* rankPtr = rankStart; rankPtr < rankEnd; ++rankPtr)
        {
          elementInRank[*rankPtr] = true;
          if (i != mpiRank)
            recvElements[i].push_back(*rankPtr);
        }
      }

      delete parMetisMesh;
      parMetisMesh = NULL;

      delete [] rankElements;
      delete [] nPartitionElements;
      delete [] nRankElements;
      delete [] sumPartitionElements;
      delete [] partitionElements;
      delete [] partitionPtr;
      delete [] bufferOffset;
      delete [] recvBufferOffset;

      return true;
    }
Beispiel #5
0
Pix* convertTo8(Pix* pix) {
    FUNCNAME("convertTo8");
    return pixConvertTo8(pix, FALSE);
}
    bool ParMetisPartitioner::partition(map<int, double>& elemWeights,
                                        PartitionMode mode)
    {
      FUNCNAME("ParMetisPartitioner::partition()");

      int mpiSize = mpiComm->Get_size();


      // === Create parmetis mesh ===

      if (parMetisMesh)
        delete parMetisMesh;

      TEST_EXIT_DBG(elementInRank.size() != 0)("Should not happen!\n");

      parMetisMesh = new ParMetisMesh(mesh, mpiComm, elementInRank, mapLocalGlobal);

      int nElements = parMetisMesh->getNumElements();


      // === Create weight array ===

      vector<int> wgts(nElements);
      vector<float> floatWgts(nElements);
      unsigned int floatWgtsPos = 0;
      float maxWgt = 0.0;

      TraverseStack stack;
      ElInfo* elInfo = stack.traverseFirst(mesh, 0, Mesh::CALL_EL_LEVEL);
      while (elInfo)
      {
        int index = elInfo->getElement()->getIndex();

        if (elementInRank[index])
        {
          // get weight
          float wgt = static_cast<float>(elemWeights[index]);
          maxWgt = std::max(wgt, maxWgt);

          // write float weight
          TEST_EXIT_DBG(floatWgtsPos < floatWgts.size())("Should not happen!\n");
          floatWgts[floatWgtsPos++] = wgt;
        }
        elInfo = stack.traverseNext(elInfo);
      }

      TEST_EXIT_DBG(floatWgtsPos == floatWgts.size())("Should not happen!\n");

      float tmp;
      mpiComm->Allreduce(&maxWgt, &tmp, 1, MPI_FLOAT, MPI_MAX);
      maxWgt = tmp;


      // === Create dual graph ===

      ParMetisGraph parMetisGraph(parMetisMesh, mpiComm);


      // === Partitioning of dual graph ===

      int wgtflag = 2; // weights at vertices only!
      int numflag = 0; // c numbering style!
      int ncon = 1; // one weight at each vertex!
      int nparts = mpiSize; // number of partitions

      vector<double> tpwgts(mpiSize);
      double ubvec = 1.05;
      int options[4] = {0, 0, 15, PARMETIS_PSR_COUPLED}; // default options
      int edgecut = -1;
      vector<int> part(nElements);

      // set tpwgts
      for (int i = 0; i < mpiSize; i++)
        tpwgts[i] = 1.0 / static_cast<double>(nparts);

      //     float scale = 10000.0 / maxWgt;
      for (int i = 0; i < nElements; i++)
        wgts[i] = floatWgts[i];
      //      wgts[i] = static_cast<int>(floatWgts[i] * scale);


      // === Start ParMETIS. ===

      MPI_Comm tmpComm = MPI_Comm(*mpiComm);

      switch (mode)
      {
      case INITIAL:
        ParMETIS_V3_PartKway(parMetisMesh->getElementDist(),
                             parMetisGraph.getXAdj(),
                             parMetisGraph.getAdjncy(),
                             &(wgts[0]),
                             NULL,
                             &wgtflag,
                             &numflag,
                             &ncon,
                             &nparts,
                             &(tpwgts[0]),
                             &ubvec,
                             options,
                             &edgecut,
                             &(part[0]),
                             &tmpComm);
        break;
      case ADAPTIVE_REPART:
      {
        vector<int> vsize(nElements);
        for (int i = 0; i < nElements; i++)
          vsize[i] = static_cast<int>(floatWgts[i]);

        ParMETIS_V3_AdaptiveRepart(parMetisMesh->getElementDist(),
                                   parMetisGraph.getXAdj(),
                                   parMetisGraph.getAdjncy(),
                                   &(wgts[0]),
                                   NULL,
                                   &(vsize[0]),
                                   &wgtflag,
                                   &numflag,
                                   &ncon,
                                   &nparts,
                                   &(tpwgts[0]),
                                   &ubvec,
                                   &itr,
                                   options,
                                   &edgecut,
                                   &(part[0]),
                                   &tmpComm);
      }
      break;
      case REFINE_PART:
        ParMETIS_V3_RefineKway(parMetisMesh->getElementDist(),
                               parMetisGraph.getXAdj(),
                               parMetisGraph.getAdjncy(),
                               &(wgts[0]),
                               NULL,
                               &wgtflag,
                               &numflag,
                               &ncon,
                               &nparts,
                               &(tpwgts[0]),
                               &ubvec,
                               options,
                               &edgecut,
                               &(part[0]),
                               &tmpComm);

        break;
      default:
        ERROR_EXIT("unknown partitioning mode\n");
      }


      // === Distribute new partition data. ===

      return distributePartitioning(&(part[0]));
    }
Beispiel #7
0
Pix* invert(Pix* pix){
    FUNCNAME("invert");
    pixInvert(pix,pix);
    return pixClone(pix);
}
Beispiel #8
0
Pix* edgeDetect(Pix* pix){
    FUNCNAME("edgeDetect");
    PixEdgeDetector edgeDetector;
    return edgeDetector.makeEdges(pix);
}
Beispiel #9
0
Pix* reduceGray4(Pix* pix){
    FUNCNAME("reduceGray4");
    return pixScaleSmooth(pix, 0.25, 0.25);
}
Beispiel #10
0
Pix* reduceGray2(Pix* pix){
    FUNCNAME("reduceGray2");
    return pixScaleSmooth(pix, 0.5, 0.5);
}
Beispiel #11
0
Pix* savGol32(Pix* pix){
    FUNCNAME("savGol32");
    Pix* result =  pixSavGolFilter(pix, 3, 2, 2);
    pixCopyResolution(result, pix);
    return result;
}