C++ (Cpp) saxpy示例

示例#1

0

显示文件

文件： blashelper.hpp 项目： BenJamesbabala/twostreamfusion

 static vl::Error
 axpy(vl::Context & context,
      ptrdiff_t n,
      type alpha,
      type const *x, ptrdiff_t incx,
      type *y, ptrdiff_t incy)
 {
   saxpy(&n,
         &alpha,
         (float*)x, &incx,
         (float*)y, &incy) ;
   return vl::vlSuccess ;
 }

示例#2

0

显示文件

文件： mrefine.c 项目： netsym/minissf

/*************************************************************************
* This function computes the initial id/ed
**************************************************************************/
void MocCompute2WayPartitionParams(CtrlType *ctrl, GraphType *graph)
{
    int i, j, /*k, l,*/ nvtxs, ncon, nbnd, mincut;
    idxtype *xadj, *adjncy, *adjwgt;
    float *nvwgt, *npwgts;
    idxtype *id, *ed, *where;
    idxtype *bndptr, *bndind;
    int me/*, other*/;

    nvtxs = graph->nvtxs;
    ncon = graph->ncon;
    xadj = graph->xadj;
    nvwgt = graph->nvwgt;
    adjncy = graph->adjncy;
    adjwgt = graph->adjwgt;

    where = graph->where;
    npwgts = sset(2*ncon, 0.0, graph->npwgts);
    id = idxset(nvtxs, 0, graph->id);
    ed = idxset(nvtxs, 0, graph->ed);
    bndptr = idxset(nvtxs, -1, graph->bndptr);
    bndind = graph->bndind;


    /*------------------------------------------------------------
    / Compute now the id/ed degrees
    /------------------------------------------------------------*/
    nbnd = mincut = 0;
    for (i=0; i<nvtxs; i++) {
        ASSERT(where[i] >= 0 && where[i] <= 1);
        me = where[i];
        saxpy(ncon, 1.0, nvwgt+i*ncon, 1, npwgts+me*ncon, 1);

        for (j=xadj[i]; j<xadj[i+1]; j++) {
            if (me == where[adjncy[j]])
                id[i] += adjwgt[j];
            else
                ed[i] += adjwgt[j];
        }

        if (ed[i] > 0 || xadj[i] == xadj[i+1]) {
            mincut += ed[i];
            bndptr[i] = nbnd;
            bndind[nbnd++] = i;
        }
    }

    graph->mincut = mincut/2;
    graph->nbnd = nbnd;

}

示例#3

0

显示文件

文件： poly_interp.hpp 项目： memmett/PFASST

        //! @{
        virtual void interpolate_initial(shared_ptr<ISweeper<time>> dst,
                                         shared_ptr<const ISweeper<time>> src) override
        {
          auto& fine = as_encap_sweeper(dst);
          auto& crse = as_encap_sweeper(src);

          auto crse_factory = crse.get_factory();
          auto fine_factory = fine.get_factory();

          auto crse_delta = crse_factory->create(solution);
          this->restrict(crse_delta, fine.get_start_state());
          crse_delta->saxpy(-1.0, crse.get_start_state());

          auto fine_delta = fine_factory->create(solution);
          this->interpolate(fine_delta, crse_delta);
          fine.get_start_state()->saxpy(-1.0, fine_delta);

          fine.reevaluate(true);
        }

示例#4

0

显示文件

文件： ch02.4.cpp 项目： gpu0/cpp-threads

int main()
{

	uint32_t num_cpus = std::thread::hardware_concurrency();
	std::vector<std::thread>threads(num_cpus);

	float *S = new float[num_cpus], a = 1.0f, b = 2.0f, *X = new float[num_cpus], *Y = new float[num_cpus];

	for(uint32_t i=0;i<num_cpus;i++){
		X[i] = 1.0f*i;
		Y[i] = 2.0f*(i+1);
		Saxpy saxpy(std::ref(S[i]), a, X[i], b, Y[i]);
		threads[i] = std::thread(saxpy);
	}

	for(uint32_t i=0;i<num_cpus;i++){
		threads[i].join();
		std::cout<<S[i]<<std::endl;
	}
}

示例#5

0

显示文件

文件： success_simd_03_parallel_saxpy.c 项目： drpicox/mcxx

int main (int argc, char * argv[])
{
    const int N = 16;
    const int iters = 1;

    float *x, *y, *z; 
    
    posix_memalign((void **)&x, VECTOR_SIZE, N*sizeof(float));
    posix_memalign((void **)&y, VECTOR_SIZE, N*sizeof(float));
    posix_memalign((void **)&z, VECTOR_SIZE, N*sizeof(float));
    
    float a = 0.93f;

    int i, j;

    for (i=0; i<N; i++)
    {
        x[i] = i+1;
        y[i] = i-1;
        z[i] = 0.0f;
    }

    for (i=0; i<iters; i++)
    {
        saxpy(x, y, z, a, N);
    }

    for (i=0; i<N; i++)
    {
        if (z[i] != (a * x[i] + y[i]))
        {
            printf("Error\n");
            return (1);
        }
    }

    printf("SUCCESS!\n");
    return 0;
}

示例#6

0

显示文件

文件： saxpy.c 项目： mfarrera/ompss-tests

int main(int argc, char* argv[]) {
    float  a, x[N], y[N];
	a=5;
	int i;
	for (i=0; i<N; ++i){
		x[i]=i;
		y[i]=i+2;
	}

    saxpy(N, a, x,  y);

#pragma omp taskwait
	
	//Check results	
	for (i=0; i<N; ++i){
		if (y[i]!=a*i+(i+2)){
			printf("Error when checking results, in position %d\n",i);
			return -1;
		}
	}
	printf("Results are correct\n");
    return 0;
}

示例#7

0

显示文件

文件： poly_interp.hpp 项目： memmett/PFASST

        virtual void interpolate(shared_ptr<ISweeper<time>> dst,
                                 shared_ptr<const ISweeper<time>> src,
                                 bool interp_initial) override
        {
          auto& fine = as_encap_sweeper(dst);
          auto& crse = as_encap_sweeper(src);

          if (tmat.rows() == 0) {
            tmat = pfasst::quadrature::compute_interp<time>(fine.get_nodes(), crse.get_nodes());
          }

          if (interp_initial) {
            this->interpolate_initial(dst, src);
          }

          size_t nfine = fine.get_nodes().size();
          size_t ncrse = crse.get_nodes().size();

          auto crse_factory = crse.get_factory();
          auto fine_factory = fine.get_factory();

          EncapVecT fine_state(nfine), fine_delta(ncrse);

          for (size_t m = 0; m < nfine; m++) { fine_state[m] = fine.get_state(m); }
          for (size_t m = 0; m < ncrse; m++) { fine_delta[m] = fine_factory->create(solution); }

          auto crse_delta = crse_factory->create(solution);
          for (size_t m = 0; m < ncrse; m++) {
            crse_delta->copy(crse.get_state(m));
            crse_delta->saxpy(-1.0, crse.get_saved_state(m));
            interpolate(fine_delta[m], crse_delta);
          }

          fine.get_state(0)->mat_apply(fine_state, 1.0, tmat, fine_delta, false);

          fine.reevaluate();
        }

示例#8

0

显示文件

文件： saxpy.cpp 项目： jaredhoberock/personal

 void operator()(float a, float *x, float *y, std::size_t n)
 {
   saxpy(a,x,y,n);
 }

示例#9

0

显示文件

文件： sgefa.c 项目： 8l/insieme

int sgefa ( float a[], int lda, int n, int ipvt[] )

/*******************************************************************************/
/*
  Purpose:

    SGEFA factors a matrix by gaussian elimination.

  Discussion:

    Matrix references which would, mathematically, be written A(I,J)
    must be written here as:
    * A[I+J*LDA], when the value is needed, or
    * A+I+J*LDA, when the address is needed.

  Modified:

    07 March 2008

  Author:

    FORTRAN77 original version by Cleve Moler.
    C version by John Burkardt.

  Reference:

    Jack Dongarra, Jim Bunch, Cleve Moler, Pete Stewart,
    LINPACK User's Guide,
    SIAM, 1979,
    ISBN13: 978-0-898711-72-1,
    LC: QA214.L56.

  Parameters:

    Input/output, float A[LDA*N].  On input, the matrix to be factored.
    On output, an upper triangular matrix and the multipliers which were
    used to obtain it.  The factorization can be written A = L * U where
    L is a product of permutation and unit lower triangular matrices and
    U is upper triangular.

    Input, int LDA, the leading dimension of the matrix.

    Input, int N, the order of the matrix.

    Output, int IPVT[N], the pivot indices.

    Output, int SGEFA, indicates singularity.
    If 0, this is the normal value, and the algorithm succeeded.
    If K, then on the K-th elimination step, a zero pivot was encountered.
    The matrix is numerically not invertible.
*/
{
  int j;
  int info;
  int k;
  int kp1;
  int l;
  int nm1;
  float t;

  info = 0;

  for ( k = 1; k <= n - 1; k++ )
  {
/*
  Find l = pivot index.
*/
    l = isamax ( n-k+1, &a[k-1+(k-1)*lda], 1 ) + k - 1;
    ipvt[k-1] = l;
/*
  Zero pivot implies this column already triangularized.
*/
    if ( a[l-1+(k-1)*lda] != 0.0 )
    {
/*
  Interchange if necessary.
*/
      if ( l != k )
      {
        t                = a[l-1+(k-1)*lda];
        a[l-1+(k-1)*lda] = a[k-1+(k-1)*lda];
        a[k-1+(k-1)*lda] = t;
      }
/*
  Compute multipliers.
*/
      t = - 1.0 / a[k-1+(k-1)*lda];
      sscal ( n-k, t, &a[k+(k-1)*lda], 1 );
/*
  Row elimination with column indexing.
*/
      for ( j = k + 1; j <= n; j++ )
      {
        t = a[l-1+(j-1)*lda];
        if (l != k)
        {
          a[l-1+(j-1)*lda] = a[k-1+(j-1)*lda];
          a[k-1+(j-1)*lda] = t;
        }
        saxpy ( n-k, t, &a[k+(k-1)*lda], 1, &a[k+(j-1)*lda], 1 );
      }
    }
    else
    {
      info = k;
    }
  }
  ipvt[n-1] = n;

  if (a[n-1+(n-1)*lda] == 0.0 )
  {
    info = n - 1;
  }
  return info;
}

示例#10

0

显示文件

文件： host_data-1.c 项目： vinriviere/m68k-atari-mint-gcc

int
main()
{
#define N 8
  int i;
  float x_ref[N], y_ref[N];
  float x[N], y[N];
  cublasHandle_t h;
  float a = 2.0;

  for (i = 0; i < N; i++)
    {
      x[i] = x_ref[i] = 4.0 + i;
      y[i] = y_ref[i] = 3.0;
    }

  saxpy (N, a, x_ref, y_ref);

  cublasCreate (&h);

#pragma acc data copyin (x[0:N]) copy (y[0:N])
  {
#pragma acc host_data use_device (x, y)
    {
      cublasSaxpy (h, N, &a, x, 1, y, 1);
    }
  }

  validate_results (N, y, y_ref);

#pragma acc data create (x[0:N]) copyout (y[0:N])
  {
#pragma acc kernels
    for (i = 0; i < N; i++)
      y[i] = 3.0;

#pragma acc host_data use_device (x, y)
    {
      cublasSaxpy (h, N, &a, x, 1, y, 1);
    }
  }

  cublasDestroy (h);

  validate_results (N, y, y_ref);

  for (i = 0; i < N; i++)
    y[i] = 3.0;

  /* There's no need to use host_data here.  */
#pragma acc data copyin (x[0:N]) copyin (a) copy (y[0:N])
  {
#pragma acc parallel present (x[0:N]) pcopy (y[0:N]) present (a)
    saxpy (N, a, x, y);
  }

  validate_results (N, y, y_ref);

  /* Exercise host_data with data transferred with acc enter data.  */

  for (i = 0; i < N; i++)
    y[i] = 3.0;

#pragma acc enter data copyin (x, a, y)
#pragma acc parallel present (x[0:N]) pcopy (y[0:N]) present (a)
  {
    saxpy (N, a, x, y);
  }
#pragma acc exit data delete (x, a) copyout (y)

  validate_results (N, y, y_ref);

  return 0;
}

示例#11

0

显示文件

文件： ccgraph.c 项目： aceskpark/osfeo

/*************************************************************************
* This function creates the coarser graph
**************************************************************************/
void CreateCoarseGraphNoMask(CtrlType *ctrl, GraphType *graph, int cnvtxs, idxtype *match, idxtype *perm)
{
  int i, j, k, m, istart, iend, nvtxs, nedges, ncon, cnedges, v, u, dovsize;
  idxtype *xadj, *vwgt, *vsize, *adjncy, *adjwgt, *adjwgtsum, *auxadj;
  idxtype *cmap, *htable;
  idxtype *cxadj, *cvwgt, *cvsize, *cadjncy, *cadjwgt, *cadjwgtsum;
  float *nvwgt, *cnvwgt;
  GraphType *cgraph;

  dovsize = (ctrl->optype == OP_KVMETIS ? 1 : 0);

  IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->ContractTmr));

  nvtxs = graph->nvtxs;
  ncon = graph->ncon;
  xadj = graph->xadj;
  vwgt = graph->vwgt;
  vsize = graph->vsize;
  nvwgt = graph->nvwgt;
  adjncy = graph->adjncy;
  adjwgt = graph->adjwgt;
  adjwgtsum = graph->adjwgtsum;
  cmap = graph->cmap;


  /* Initialize the coarser graph */
  cgraph = SetUpCoarseGraph(graph, cnvtxs, dovsize);
  cxadj = cgraph->xadj;
  cvwgt = cgraph->vwgt;
  cvsize = cgraph->vsize;
  cnvwgt = cgraph->nvwgt;
  cadjwgtsum = cgraph->adjwgtsum;
  cadjncy = cgraph->adjncy;
  cadjwgt = cgraph->adjwgt;


  htable = idxset(cnvtxs, -1, idxwspacemalloc(ctrl, cnvtxs));

  iend = xadj[nvtxs];
  auxadj = ctrl->wspace.auxcore; 
  memcpy(auxadj, adjncy, iend*sizeof(idxtype)); 
  for (i=0; i<iend; i++)
    auxadj[i] = cmap[auxadj[i]];

  cxadj[0] = cnvtxs = cnedges = 0;
  for (i=0; i<nvtxs; i++) {
    v = perm[i];
    if (cmap[v] != cnvtxs) 
      continue;

    u = match[v];
    if (ncon == 1)
      cvwgt[cnvtxs] = vwgt[v];
    else
      scopy(ncon, nvwgt+v*ncon, cnvwgt+cnvtxs*ncon);

    if (dovsize)
      cvsize[cnvtxs] = vsize[v];

    cadjwgtsum[cnvtxs] = adjwgtsum[v];
    nedges = 0;

    istart = xadj[v];
    iend = xadj[v+1];
    for (j=istart; j<iend; j++) {
      k = auxadj[j];
      if ((m = htable[k]) == -1) {
        cadjncy[nedges] = k;
        cadjwgt[nedges] = adjwgt[j];
        htable[k] = nedges++;
      }
      else {
        cadjwgt[m] += adjwgt[j];
      }
    }

    if (v != u) { 
      if (ncon == 1)
        cvwgt[cnvtxs] += vwgt[u];
      else
        saxpy(ncon, 1.0, nvwgt+u*ncon, 1, cnvwgt+cnvtxs*ncon, 1);

      if (dovsize)
        cvsize[cnvtxs] += vsize[u];

      cadjwgtsum[cnvtxs] += adjwgtsum[u];

      istart = xadj[u];
      iend = xadj[u+1];
      for (j=istart; j<iend; j++) {
        k = auxadj[j];
        if ((m = htable[k]) == -1) {
          cadjncy[nedges] = k;
          cadjwgt[nedges] = adjwgt[j];
          htable[k] = nedges++;
        }
        else {
          cadjwgt[m] += adjwgt[j];
        }
      }

      /* Remove the contracted adjacency weight */
      if ((j = htable[cnvtxs]) != -1) {
        ASSERT(cadjncy[j] == cnvtxs);
        cadjwgtsum[cnvtxs] -= cadjwgt[j];
        cadjncy[j] = cadjncy[--nedges];
        cadjwgt[j] = cadjwgt[nedges];
        htable[cnvtxs] = -1;
      }
    }

    ASSERTP(cadjwgtsum[cnvtxs] == idxsum(nedges, cadjwgt), ("%d %d\n", cadjwgtsum[cnvtxs], idxsum(nedges, cadjwgt)));

    for (j=0; j<nedges; j++)
      htable[cadjncy[j]] = -1;  /* Zero out the htable */

    cnedges += nedges;
    cxadj[++cnvtxs] = cnedges;
    cadjncy += nedges;
    cadjwgt += nedges;
  }

  cgraph->nedges = cnedges;

  ReAdjustMemory(graph, cgraph, dovsize);

  IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->ContractTmr));

  idxwspacefree(ctrl, cnvtxs);
}

示例#12

0

显示文件

文件： mkwayfmh.c 项目： Vignesh2208/Awlsim_Ins

/*************************************************************************
* This function performs k-way refinement
**************************************************************************/
void MCRandom_KWayEdgeRefineHorizontal(CtrlType *ctrl, GraphType *graph, int nparts, 
       float *orgubvec, int npasses)
{
  int i, ii, iii, j, /*jj,*/ k, /*l,*/ pass, nvtxs, ncon, nmoves, nbnd, myndegrees, same; 
  int from, me, to, oldcut, gain;
  idxtype *xadj, *adjncy, *adjwgt;
  idxtype *where, *perm, *bndptr, *bndind;
  EDegreeType *myedegrees;
  RInfoType *myrinfo;
  float *npwgts, *nvwgt, *minwgt, *maxwgt, maxlb, minlb, ubvec[MAXNCON], tvec[MAXNCON];

  nvtxs = graph->nvtxs;
  ncon = graph->ncon;
  xadj = graph->xadj;
  adjncy = graph->adjncy;
  adjwgt = graph->adjwgt;

  bndptr = graph->bndptr;
  bndind = graph->bndind;

  where = graph->where;
  npwgts = graph->npwgts;
  
  /* Setup the weight intervals of the various subdomains */
  minwgt =  fwspacemalloc(ctrl, nparts*ncon);
  maxwgt = fwspacemalloc(ctrl, nparts*ncon);

  /* See if the orgubvec consists of identical constraints */
  maxlb = minlb = orgubvec[0];
  for (i=1; i<ncon; i++) {
    minlb = (orgubvec[i] < minlb ? orgubvec[i] : minlb);
    maxlb = (orgubvec[i] > maxlb ? orgubvec[i] : maxlb);
  }
  same = (fabs(maxlb-minlb) < .01 ? 1 : 0);


  /* Let's not get very optimistic. Let Balancing do the work */
  ComputeHKWayLoadImbalance(ncon, nparts, npwgts, ubvec);
  for (i=0; i<ncon; i++)
    ubvec[i] = amax(ubvec[i], orgubvec[i]);

  if (!same) {
    for (i=0; i<nparts; i++) {
      for (j=0; j<ncon; j++) {
        maxwgt[i*ncon+j] = ubvec[j]/nparts;
        minwgt[i*ncon+j] = 1.0/(ubvec[j]*nparts);
      }
    }
  }
  else {
    maxlb = ubvec[0];
    for (i=1; i<ncon; i++) 
      maxlb = (ubvec[i] > maxlb ? ubvec[i] : maxlb);

    for (i=0; i<nparts; i++) {
      for (j=0; j<ncon; j++) {
        maxwgt[i*ncon+j] = maxlb/nparts;
        minwgt[i*ncon+j] = 1.0/(maxlb*nparts);
      }
    }
  }


  perm = idxwspacemalloc(ctrl, nvtxs);

  if (ctrl->dbglvl&DBG_REFINE) {
    printf("Partitions: [%5.4f %5.4f], Nv-Nb[%6d %6d]. Cut: %6d, LB: ",
            npwgts[samin(ncon*nparts, npwgts)], npwgts[samax(ncon*nparts, npwgts)], 
            graph->nvtxs, graph->nbnd, graph->mincut);
    ComputeHKWayLoadImbalance(ncon, nparts, npwgts, tvec);
    for (i=0; i<ncon; i++)
      printf("%.3f ", tvec[i]);
    printf("\n");
  }

  for (pass=0; pass<npasses; pass++) {
    ASSERT(ComputeCut(graph, where) == graph->mincut);

    oldcut = graph->mincut;
    nbnd = graph->nbnd;

    RandomPermute(nbnd, perm, 1);
    for (nmoves=iii=0; iii<graph->nbnd; iii++) {
      ii = perm[iii];
      if (ii >= nbnd)
        continue;
      i = bndind[ii];

      myrinfo = graph->rinfo+i;

      if (myrinfo->ed >= myrinfo->id) { /* Total ED is too high */
        from = where[i];
        nvwgt = graph->nvwgt+i*ncon;

        if (myrinfo->id > 0 && AreAllHVwgtsBelow(ncon, 1.0, npwgts+from*ncon, -1.0, nvwgt, minwgt+from*ncon)) 
          continue;   /* This cannot be moved! */

        myedegrees = myrinfo->edegrees;
        myndegrees = myrinfo->ndegrees;

        for (k=0; k<myndegrees; k++) {
          to = myedegrees[k].pid;
          gain = myedegrees[k].ed - myrinfo->id; 
          if (gain >= 0 && 
              (AreAllHVwgtsBelow(ncon, 1.0, npwgts+to*ncon, 1.0, nvwgt, maxwgt+to*ncon) ||
               IsHBalanceBetterFT(ncon, nparts, npwgts+from*ncon, npwgts+to*ncon, nvwgt, ubvec)))
            break;
        }
        if (k == myndegrees)
          continue;  /* break out if you did not find a candidate */

        for (j=k+1; j<myndegrees; j++) {
          to = myedegrees[j].pid;
          if ((myedegrees[j].ed > myedegrees[k].ed &&
               (AreAllHVwgtsBelow(ncon, 1.0, npwgts+to*ncon, 1.0, nvwgt, maxwgt+to*ncon) || 
               IsHBalanceBetterFT(ncon, nparts, npwgts+from*ncon, npwgts+to*ncon, nvwgt, ubvec))) ||
              (myedegrees[j].ed == myedegrees[k].ed && 
               IsHBalanceBetterTT(ncon, nparts, npwgts+myedegrees[k].pid*ncon, npwgts+to*ncon, nvwgt, ubvec)))
            k = j;
        }

        to = myedegrees[k].pid;

        if (myedegrees[k].ed-myrinfo->id == 0 
            && !IsHBalanceBetterFT(ncon, nparts, npwgts+from*ncon, npwgts+to*ncon, nvwgt, ubvec)
            && AreAllHVwgtsBelow(ncon, 1.0, npwgts+from*ncon, 0.0, npwgts+from*ncon, maxwgt+from*ncon)) 
          continue;

        /*=====================================================================
        * If we got here, we can now move the vertex from 'from' to 'to' 
        *======================================================================*/
        graph->mincut -= myedegrees[k].ed-myrinfo->id;

        IFSET(ctrl->dbglvl, DBG_MOVEINFO, printf("\t\tMoving %6d to %3d. Gain: %4d. Cut: %6d\n", i, to, myedegrees[k].ed-myrinfo->id, graph->mincut));

        /* Update where, weight, and ID/ED information of the vertex you moved */
        saxpy(ncon, 1.0, nvwgt, 1, npwgts+to*ncon, 1);
        saxpy(ncon, -1.0, nvwgt, 1, npwgts+from*ncon, 1);
        where[i] = to;
        myrinfo->ed += myrinfo->id-myedegrees[k].ed;
        SWAP(myrinfo->id, myedegrees[k].ed, j);
        if (myedegrees[k].ed == 0) 
          myedegrees[k] = myedegrees[--myrinfo->ndegrees];
        else
          myedegrees[k].pid = from;

        if (myrinfo->ed-myrinfo->id < 0)
          BNDDelete(nbnd, bndind, bndptr, i);

        /* Update the degrees of adjacent vertices */
        for (j=xadj[i]; j<xadj[i+1]; j++) {
          ii = adjncy[j];
          me = where[ii];

          myrinfo = graph->rinfo+ii;
          if (myrinfo->edegrees == NULL) {
            myrinfo->edegrees = ctrl->wspace.edegrees+ctrl->wspace.cdegree;
            ctrl->wspace.cdegree += xadj[ii+1]-xadj[ii];
          }
          myedegrees = myrinfo->edegrees;

          ASSERT(CheckRInfo(myrinfo));

          if (me == from) {
            INC_DEC(myrinfo->ed, myrinfo->id, adjwgt[j]);

            if (myrinfo->ed-myrinfo->id >= 0 && bndptr[ii] == -1)
              BNDInsert(nbnd, bndind, bndptr, ii);
          }
          else if (me == to) {
            INC_DEC(myrinfo->id, myrinfo->ed, adjwgt[j]);

            if (myrinfo->ed-myrinfo->id < 0 && bndptr[ii] != -1)
              BNDDelete(nbnd, bndind, bndptr, ii);
          }

          /* Remove contribution from the .ed of 'from' */
          if (me != from) {
            for (k=0; k<myrinfo->ndegrees; k++) {
              if (myedegrees[k].pid == from) {
                if (myedegrees[k].ed == adjwgt[j])
                  myedegrees[k] = myedegrees[--myrinfo->ndegrees];
                else
                  myedegrees[k].ed -= adjwgt[j];
                break;
              }
            }
          }

          /* Add contribution to the .ed of 'to' */
          if (me != to) {
            for (k=0; k<myrinfo->ndegrees; k++) {
              if (myedegrees[k].pid == to) {
                myedegrees[k].ed += adjwgt[j];
                break;
              }
            }
            if (k == myrinfo->ndegrees) {
              myedegrees[myrinfo->ndegrees].pid = to;
              myedegrees[myrinfo->ndegrees++].ed = adjwgt[j];
            }
          }

          ASSERT(myrinfo->ndegrees <= xadj[ii+1]-xadj[ii]);
          ASSERT(CheckRInfo(myrinfo));

        }
        nmoves++;
      }
    }

    graph->nbnd = nbnd;

    if (ctrl->dbglvl&DBG_REFINE) {
      printf("\t [%5.4f %5.4f], Nb: %6d, Nmoves: %5d, Cut: %6d, LB: ",
              npwgts[samin(ncon*nparts, npwgts)], npwgts[samax(ncon*nparts, npwgts)], 
              nbnd, nmoves, graph->mincut);
      ComputeHKWayLoadImbalance(ncon, nparts, npwgts, tvec);
      for (i=0; i<ncon; i++)
        printf("%.3f ", tvec[i]);
      printf("\n");
    }

    if (graph->mincut == oldcut)
      break;
  }

  fwspacefree(ctrl, ncon*nparts);
  fwspacefree(ctrl, ncon*nparts);
  idxwspacefree(ctrl, nvtxs);
}

示例#13

0

显示文件

文件： sgefa.c 项目： 8l/insieme

void sgesl ( float a[], int lda, int n, int ipvt[], float b[], int job )

/******************************************************************************/
/*
  Purpose:

    SGESL solves a real general linear system A * X = B.

  Discussion:

    SGESL can solve either of the systems A * X = B or A' * X = B.

    The system matrix must have been factored by SGECO or SGEFA.

    A division by zero will occur if the input factor contains a
    zero on the diagonal.  Technically this indicates singularity
    but it is often caused by improper arguments or improper
    setting of LDA.  It will not occur if the subroutines are
    called correctly and if SGECO has set 0.0 < RCOND
    or SGEFA has set INFO == 0.

  Modified:

    04 April 2006

  Author:

    FORTRAN77 original by Dongarra, Moler, Bunch and Stewart.
    C translation by John Burkardt.

  Reference:

    Jack Dongarra, Cleve Moler, Jim Bunch, Pete Stewart,
    LINPACK User's Guide,
    SIAM, (Society for Industrial and Applied Mathematics),
    3600 University City Science Center,
    Philadelphia, PA, 19104-2688.
    ISBN: 0-89871-172-X

  Parameters:

    Input, float A[LDA*N], the output from SGECO or SGEFA.

    Input, int LDA, the leading dimension of A.

    Input, int N, the order of the matrix A.

    Input, int IPVT[N], the pivot vector from SGECO or SGEFA.

    Input/output, float B[N].
    On input, the right hand side vector.
    On output, the solution vector.

    Input, int JOB.
    0, solve A * X = B;
    nonzero, solve A' * X = B.
*/
{
  int k;
  int l;
  float t;
/*
  Solve A * X = B.
*/
  if ( job == 0 )
  {
    for ( k = 1; k <= n-1; k++ )
    {
      l = ipvt[k-1];
      t = b[l-1];

      if ( l != k )
      {
        b[l-1] = b[k-1];
        b[k-1] = t;
      }
      saxpy ( n-k, t, a+k+(k-1)*lda, 1, b+k, 1 );
    }

    for ( k = n; 1 <= k; k-- )
    {
      b[k-1] = b[k-1] / a[k-1+(k-1)*lda];
      t = -b[k-1];
      saxpy ( k-1, t, a+0+(k-1)*lda, 1, b, 1 );
    }
  }
/*
  Solve A' * X = B.
*/
  else
  {
    for ( k = 1; k <= n; k++ )
    {
      t = sdot ( k-1, a+0+(k-1)*lda, 1, b, 1 );
      b[k-1] = ( b[k-1] - t ) / a[k-1+(k-1)*lda];
    }

    for ( k = n-1; 1 <= k; k-- )
    {
      b[k-1] = b[k-1] + sdot ( n-k, a+k+(k-1)*lda, 1, b+k, 1 );
      l = ipvt[k-1];

      if ( l != k )
      {
        t = b[l-1];
        b[l-1] = b[k-1];
        b[k-1] = t;
      }
    }
  }
  return;
}

示例#14

0

显示文件

文件： diffutil.c 项目： davidheryanto/sc14

/*************************************************************************
* This function implements the CG solver used during the directed diffusion
**************************************************************************/
void ConjGrad2(MatrixType *A, floattype *b, floattype *x, floattype tol, floattype *workspace)
{
  int i, k, n;
  floattype *p, *r, *q, *z, *M;
  floattype alpha, beta, rho, rho_1 = -1.0, error, bnrm2, tmp;
  idxtype *rowptr, *colind;
  floattype *values;

  n = A->nrows;
  rowptr = A->rowptr;
  colind = A->colind;
  values = A->values;

  /* Initial Setup */
  p = workspace;
  r = workspace + n;
  q = workspace + 2*n;
  z = workspace + 3*n;
  M = workspace + 4*n;

  for (i=0; i<n; i++) {
    x[i] = 0.0;
    if (values[rowptr[i]] != 0.0)
      M[i] = 1.0/values[rowptr[i]];
    else
      M[i] = 0.0;
  }

  /* r = b - Ax */
  mvMult2(A, x, r);
  for (i=0; i<n; i++)
    r[i] = b[i]-r[i];

  bnrm2 = snorm2(n, b);
  if (bnrm2 > 0.0) {
    error = snorm2(n, r) / bnrm2;

    if (error > tol) {
      /* Begin Iterations */
      for (k=0; k<n; k++) {
        for (i=0; i<n; i++)
          z[i] = r[i]*M[i];

        rho = sdot(n, r, z);

        if (k == 0)
          scopy(n, z, p);
        else {
          if (rho_1 != 0.0)
            beta = rho/rho_1;
          else
            beta = 0.0;
          for (i=0; i<n; i++)
            p[i] = z[i] + beta*p[i];
        }

        mvMult2(A, p, q); /* q = A*p */

        tmp = sdot(n, p, q);
        if (tmp != 0.0)
          alpha = rho/tmp;
        else
          alpha = 0.0;
        saxpy(n, alpha, p, x);    /* x = x + alpha*p */
        saxpy(n, -alpha, q, r);   /* r = r - alpha*q */
        error = snorm2(n, r) / bnrm2;
        if (error < tol)
          break;

        rho_1 = rho;
      }
    }
  }
}

示例#15

0

显示文件

文件： sqr.c 项目： gwowen/seismicunix

void
sqrdc (float **x, int n, int p, float *qraux, int *jpvt,
	float *work, int job)
/*****************************************************************************
Use Householder transformations to compute the QR decomposition of an n by p
matrix x.  Column pivoting based on the 2-norms of the reduced columns may be
performed at the user's option.
******************************************************************************
Input:
x		matrix[p][n] to decompose (see notes below)
n		number of rows in the matrix x
p		number of columns in the matrix x
jpvt		array[p] controlling the pivot columns (see notes below)
job		=0 for no pivoting;
		=1 for pivoting

Output:
x		matrix[p][n] decomposed (see notes below)
qraux		array[p] containing information required to recover the
		orthogonal part of the decomposition
jpvt		array[p] with jpvt[k] containing the index of the original
		matrix that has been interchanged into the k-th column, if
		pivoting is requested.

Workspace:
work		array[p] of workspace
******************************************************************************
Notes:
This function was adapted from LINPACK FORTRAN.  Because two-dimensional 
arrays cannot be declared with variable dimensions in C, the matrix x
is actually a pointer to an array of pointers to floats, as declared
above and used below.

Elements of x are stored as follows:
x[0][0]    x[1][0]    x[2][0]   ... x[p-1][0]
x[0][1]    x[1][1]    x[2][1]   ... x[p-1][1]
x[0][2]    x[1][2]    x[2][2]   ... x[p-1][2]
.                                       .
.             .                         .
.                        .              .
.                                       .
x[0][n-1]  x[1][n-1]  x[2][n-1] ... x[p-1][n-1]

After decomposition, x contains in its upper triangular matrix R of the QR
decomposition.  Below its diagonal x contains information from which the
orthogonal part of the decomposition can be recovered.  Note that if
pivoting has been requested, the decomposition is not that of the original
matrix x but that of x with its columns permuted as described by jpvt.

The selection of pivot columns is controlled by jpvt as follows.
The k-th column x[k] of x is placed in one of three classes according to
the value of jpvt[k].
	if jpvt[k] >  0, then x[k] is an initial column.
	if jpvt[k] == 0, then x[k] is a free column.
	if jpvt[k] <  0, then x[k] is a final column.
Before the decomposition is computed, initial columns are moved to the
beginning of the array x and final columns to the end.  Both initial and
final columns are frozen in place during the computation and only free
columns are moved.  At the k-th stage of the reduction, if x[k] is occupied
by a free column it is interchanged with the free column of largest reduced
norm.  jpvt is not referenced if job == 0.
******************************************************************************
Author:  Dave Hale, Colorado School of Mines, 12/29/89
*****************************************************************************/
{
	int j,jp,l,lup,maxj,pl,pu,negj,swapj;
	float maxnrm,t,tt,ttt,nrmxl;
	
	pl = 0;
	pu = -1;
	
	/* if pivoting has been requested */
	if (job!=0) {
		
		/* rearrange columns according to jpvt */
		for (j=0; j<p; j++) {
			swapj = jpvt[j]>0;
			negj = jpvt[j]<0;
			jpvt[j] = j;
			if (negj) jpvt[j] = -j;
			if (swapj) {
				if (j!=pl) sswap(n,x[pl],1,x[j],1);
				jpvt[j] = jpvt[pl];
				jpvt[pl] = j;
				pl++;
			}
		}
		pu = p-1;
		for (j=p-1; j>=0; j--) {
			if (jpvt[j]<0) {
				jpvt[j] = -jpvt[j];
				if (j!=pu) {
					sswap(n,x[pu],1,x[j],1);
					jp = jpvt[pu];
					jpvt[pu] = jpvt[j];
					jpvt[j] = jp;
				}
				pu--;
			}
		}
	}
	
	/* compute the norms of the free columns */
	for (j=pl; j<=pu; j++) {
		qraux[j] = snrm2(n,x[j],1);
		work[j] = qraux[j];
	}
	
	/* perform the Householder reduction of x */
	lup = MIN(n,p);
	for (l=0; l<lup; l++) {
		if (l>=pl && l<pu) {
			
			/* 
			 * locate the column of largest norm and
			 * bring it into pivot position.
			 */
			maxnrm = 0.0;
			maxj = l;
			for (j=l; j<=pu; j++) {
			 	if (qraux[j]>maxnrm) {
					maxnrm = qraux[j];
					maxj = j;
				}
			}
			if (maxj!=l) {
				sswap(n,x[l],1,x[maxj],1);
				qraux[maxj] = qraux[l];
				work[maxj] = work[l];
				jp = jpvt[maxj];
				jpvt[maxj] = jpvt[l];
				jpvt[l] = jp;
			}
		}
		qraux[l] = 0.0;
		if (l!=n-1) {
		
			/*
			 * compute the Householder transformation
			 * for column l
			 */
			nrmxl = snrm2(n-l,&x[l][l],1);
			if (nrmxl!=0.0) {
				if (x[l][l]!=0.0)
					nrmxl = (x[l][l]>0.0) ?
						ABS(nrmxl) :
						-ABS(nrmxl);
				sscal(n-l,1.0/nrmxl,&x[l][l],1);
				x[l][l] += 1.0;
				
				/*
				 * apply the transformation to the remaining
				 * columns, updating the norms
				 */
				 for (j=l+1; j<p; j++) {
					 t = -sdot(n-l,&x[l][l],1,&x[j][l],1)/
					 	x[l][l];
					saxpy(n-l,t,&x[l][l],1,&x[j][l],1);
					if (j>=pl && j<=pu && qraux[j]!=0.0) {
						tt = ABS(x[j][l])/qraux[j];
						tt = 1.0-tt*tt;
						tt = MAX(tt,0.0);
						t = tt;
						ttt = qraux[j]/work[j];
						tt = 1.0+0.05*tt*ttt*ttt;
						if (tt!=1.0) {
							qraux[j] *= sqrt(t);
						} else {
							qraux[j] = snrm2(n-l-1,
								&x[j][l+1],1);
							work[j] = qraux[j];
						}
					}
				}
				
				/* save the transformation */
				qraux[l] = x[l][l];
				x[l][l] = -nrmxl;
			}
		}
	}
}

示例#16

0

显示文件

文件： mbalance.c 项目： iyer-arvind/gmsh

/*************************************************************************
* This function performs an edge-based FM refinement
**************************************************************************/
void MocGeneral2WayBalance(CtrlType *ctrl, GraphType *graph, float *tpwgts, float lbfactor)
{
  int i, ii, j, k, l, kwgt, nvtxs, ncon, nbnd, nswaps, from, to, pass, me, limit, tmp, cnum;
  idxtype *xadj, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind;
  idxtype *moved, *swaps, *perm, *qnum;
  float *nvwgt, *npwgts, mindiff[MAXNCON], origbal, minbal, newbal;
  PQueueType parts[MAXNCON][2];
  int higain, oldgain, mincut, newcut, mincutorder;
  int qsizes[MAXNCON][2];

  nvtxs = graph->nvtxs;
  ncon = graph->ncon;
  xadj = graph->xadj;
  nvwgt = graph->nvwgt;
  adjncy = graph->adjncy;
  adjwgt = graph->adjwgt;
  where = graph->where;
  id = graph->id;
  ed = graph->ed;
  npwgts = graph->npwgts;
  bndptr = graph->bndptr;
  bndind = graph->bndind;

  moved = idxwspacemalloc(ctrl, nvtxs);
  swaps = idxwspacemalloc(ctrl, nvtxs);
  perm = idxwspacemalloc(ctrl, nvtxs);
  qnum = idxwspacemalloc(ctrl, nvtxs);

  limit = amin(amax(0.01*nvtxs, 15), 100);

  /* Initialize the queues */
  for (i=0; i<ncon; i++) {
    PQueueInit(ctrl, &parts[i][0], nvtxs, PLUS_GAINSPAN+1);
    PQueueInit(ctrl, &parts[i][1], nvtxs, PLUS_GAINSPAN+1);
    qsizes[i][0] = qsizes[i][1] = 0;
  }

  for (i=0; i<nvtxs; i++) {
    qnum[i] = samax(ncon, nvwgt+i*ncon);
    qsizes[qnum[i]][where[i]]++;
  }

/*
  printf("Weight Distribution:    \t");
  for (i=0; i<ncon; i++) 
    printf(" [%d %d]", qsizes[i][0], qsizes[i][1]); 
  printf("\n");
*/

  for (from=0; from<2; from++) {
    for (j=0; j<ncon; j++) {
      if (qsizes[j][from] == 0) {
        for (i=0; i<nvtxs; i++) {
          if (where[i] != from)
            continue;

          k = samax2(ncon, nvwgt+i*ncon);
          if (k == j && qsizes[qnum[i]][from] > qsizes[j][from] && nvwgt[i*ncon+qnum[i]] < 1.3*nvwgt[i*ncon+j]) {
            qsizes[qnum[i]][from]--;
            qsizes[j][from]++;
            qnum[i] = j;
          }
        }
      }
    }
  }

/*
  printf("Weight Distribution (after):\t ");
  for (i=0; i<ncon; i++) 
    printf(" [%d %d]", qsizes[i][0], qsizes[i][1]); 
  printf("\n");
*/



  for (i=0; i<ncon; i++) 
    mindiff[i] = fabs(tpwgts[0]-npwgts[i]);
  minbal = origbal = Compute2WayHLoadImbalance(ncon, npwgts, tpwgts);
  newcut = mincut = graph->mincut;
  mincutorder = -1;

  if (ctrl->dbglvl&DBG_REFINE) {
    printf("Parts: [");
    for (l=0; l<ncon; l++)
      printf("(%.3f, %.3f) ", npwgts[l], npwgts[ncon+l]);
    printf("] T[%.3f %.3f], Nv-Nb[%5d, %5d]. ICut: %6d, LB: %.3f [B]\n", tpwgts[0], tpwgts[1], graph->nvtxs, graph->nbnd, graph->mincut, origbal);
  }

  idxset(nvtxs, -1, moved);

  ASSERT(ComputeCut(graph, where) == graph->mincut);
  ASSERT(CheckBnd(graph));

  /* Insert all nodes in the priority queues */
  nbnd = graph->nbnd;
  RandomPermute(nvtxs, perm, 1);
  for (ii=0; ii<nvtxs; ii++) {
    i = perm[ii];
    PQueueInsert(&parts[qnum[i]][where[i]], i, ed[i]-id[i]);
  }

  for (nswaps=0; nswaps<nvtxs; nswaps++) {
    if (minbal < lbfactor)
      break;

    SelectQueue(ncon, npwgts, tpwgts, &from, &cnum, parts);
    to = (from+1)%2;

    if (from == -1 || (higain = PQueueGetMax(&parts[cnum][from])) == -1)
      break;

    saxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1);
    saxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1);
    newcut -= (ed[higain]-id[higain]);
    newbal = Compute2WayHLoadImbalance(ncon, npwgts, tpwgts);

    if (newbal < minbal || (newbal == minbal && 
        (newcut < mincut || (newcut == mincut && BetterBalance(ncon, npwgts, tpwgts, mindiff))))) {
      mincut = newcut;
      minbal = newbal;
      mincutorder = nswaps;
      for (i=0; i<ncon; i++)
        mindiff[i] = fabs(tpwgts[0]-npwgts[i]);
    }
    else if (nswaps-mincutorder > limit) { /* We hit the limit, undo last move */
      newcut += (ed[higain]-id[higain]);
      saxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1);
      saxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1);
      break;
    }

    where[higain] = to;
    moved[higain] = nswaps;
    swaps[nswaps] = higain;

    if (ctrl->dbglvl&DBG_MOVEINFO) {
      printf("Moved %6d from %d(%d). Gain: %5d, Cut: %5d, NPwgts: ", higain, from, cnum, ed[higain]-id[higain], newcut);
      for (l=0; l<ncon; l++) 
        printf("(%.3f, %.3f) ", npwgts[l], npwgts[ncon+l]);
      printf(", %.3f LB: %.3f\n", minbal, newbal);
    }


    /**************************************************************
    * Update the id[i]/ed[i] values of the affected nodes
    ***************************************************************/
    SWAP(id[higain], ed[higain], tmp);
    if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1]) 
      BNDDelete(nbnd, bndind,  bndptr, higain);
    if (ed[higain] > 0 && bndptr[higain] == -1)
      BNDInsert(nbnd, bndind,  bndptr, higain);

    for (j=xadj[higain]; j<xadj[higain+1]; j++) {
      k = adjncy[j];
      oldgain = ed[k]-id[k];

      kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]);
      INC_DEC(id[k], ed[k], kwgt);

      /* Update the queue position */
      if (moved[k] == -1)
        PQueueUpdate(&parts[qnum[k]][where[k]], k, oldgain, ed[k]-id[k]);

      /* Update its boundary information */
      if (ed[k] == 0 && bndptr[k] != -1) 
        BNDDelete(nbnd, bndind, bndptr, k);
      else if (ed[k] > 0 && bndptr[k] == -1)  
        BNDInsert(nbnd, bndind, bndptr, k);
    }
  }



  /****************************************************************
  * Roll back computations
  *****************************************************************/
  for (nswaps--; nswaps>mincutorder; nswaps--) {
    higain = swaps[nswaps];

    to = where[higain] = (where[higain]+1)%2;
    SWAP(id[higain], ed[higain], tmp);
    if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1])
      BNDDelete(nbnd, bndind,  bndptr, higain);
    else if (ed[higain] > 0 && bndptr[higain] == -1)
      BNDInsert(nbnd, bndind,  bndptr, higain);

    saxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1);
    saxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+((to+1)%2)*ncon, 1);
    for (j=xadj[higain]; j<xadj[higain+1]; j++) {
      k = adjncy[j];

      kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]);
      INC_DEC(id[k], ed[k], kwgt);

      if (bndptr[k] != -1 && ed[k] == 0)
        BNDDelete(nbnd, bndind, bndptr, k);
      if (bndptr[k] == -1 && ed[k] > 0)
        BNDInsert(nbnd, bndind, bndptr, k);
    }
  }

  if (ctrl->dbglvl&DBG_REFINE) {
    printf("\tMincut: %6d at %5d, NBND: %6d, NPwgts: [", mincut, mincutorder, nbnd);
    for (l=0; l<ncon; l++)
      printf("(%.3f, %.3f) ", npwgts[l], npwgts[ncon+l]);
    printf("], LB: %.3f\n", Compute2WayHLoadImbalance(ncon, npwgts, tpwgts));
  }

  graph->mincut = mincut;
  graph->nbnd = nbnd;


  for (i=0; i<ncon; i++) {
    PQueueFree(ctrl, &parts[i][0]);
    PQueueFree(ctrl, &parts[i][1]);
  }

  idxwspacefree(ctrl, nvtxs);
  idxwspacefree(ctrl, nvtxs);
  idxwspacefree(ctrl, nvtxs);
  idxwspacefree(ctrl, nvtxs);

}

示例#17

0

显示文件

文件： main.cpp 项目： ViktorRoy94/Completed-projects

int main()
{
    const int n = 1000000;
    const int incx = 2;
    const int incy = 3;
    const float a = float(rand()) / RAND_MAX;

    float * x, * y, * dev_x, * dev_y, * y_verify;

    x = new float[n * incx];
	y = new float[n * incy];
	y_verify = new float[n * incy];
    
	cudaMalloc((void **) &dev_x, n * incx * sizeof(float));
    cudaMalloc((void **) &dev_y, n * incy * sizeof(float));

    for (int i = 0; i < n * incx; ++i)
        x[i] = float(rand()) / RAND_MAX;
    for (int i = 0; i < n * incy; ++i)
        y[i] = float(rand()) / RAND_MAX;

    cudaMemcpy(dev_x, x, n * incx * sizeof(float), cudaMemcpyHostToDevice);
    cudaMemcpy(dev_y, y, n * incy * sizeof(float), cudaMemcpyHostToDevice);

	clock_t start = clock();
    saxpy(n, a, x, incx, y, incy);
    clock_t finish = clock();

	std::cout.setf(std::ios_base::fixed);
    std::cout.precision(5);

    double cpu_time = (finish - start) /(double) CLOCKS_PER_SEC;
    std::cout << "CPU " << cpu_time << " seconds.\n";

	start = clock();
    saxpy_openmp(n, a, x, incx, y, incy);
    finish = clock();
	double cpu_openmp_time = (finish - start) /(double) CLOCKS_PER_SEC;
    std::cout << "CPU_openmp " << cpu_openmp_time << " seconds.\n";

	start = clock();
    saxpy_gpu(n, a, dev_x, incx, dev_y, incy);
    cudaThreadSynchronize();
    finish = clock();

    double gpu_time = (float) (finish - start) / (double)CLOCKS_PER_SEC;

    std::cout << "GPU " << gpu_time << " seconds.\n";

    cudaMemcpy(y_verify, dev_y, n * incy * sizeof(float), cudaMemcpyDeviceToHost);

    double diff = 0;
    for (int i = 0; i < n * incy; ++i)
        diff += (y[i] - y_verify[i]) * (y[i] - y_verify[i]);

    if (diff < 1e-4f)
        std::cout << "Correct.\n";
    else
        std::cout << "Correct.\n";
	
	////
	//// а теперь тоже самое для double
	////

	//double * xd, * yd, * dev_xd, * dev_yd, * y_verifyd;
	//const float ad = float(rand()) / RAND_MAX;

 //   xd = new double[n * incx];
	//yd = new double[n * incy];
	//y_verifyd = new double[n * incy];
 //   
	//cudaMalloc((void **) &dev_xd, n * incx * sizeof(double));
 //   cudaMalloc((void **) &dev_yd, n * incy * sizeof(double));

 //   for (int i = 0; i < n * incx; ++i)
 //       xd[i] = double(rand()) / RAND_MAX;
 //   for (int i = 0; i < n * incy; ++i)
 //       yd[i] = double(rand()) / RAND_MAX;

 //   cudaMemcpy(dev_xd, xd, n * incx * sizeof(double), cudaMemcpyHostToDevice);
 //   cudaMemcpy(dev_yd, yd, n * incy * sizeof(double), cudaMemcpyHostToDevice);

	//start = clock();
 //   for (int i = 0; i < iterations; ++i)
 //       daxpy(n, ad, xd, incx, yd, incy);
 //   finish = clock();

 //   cpu_time = (finish - start) / CLOCKS_PER_SEC;
 //   std::cout << "CPU " << cpu_time << " seconds.\n";

	//start = clock();
 //   for (int i = 0; i < iterations; ++i)
 //       daxpy_gpu(n, ad, dev_xd, incx, dev_yd, incy);
 //   
 //   cudaThreadSynchronize();
 //   finish = clock();

 //   gpu_time = (float) (finish - start) / CLOCKS_PER_SEC;

 //   std::cout << "GPU " << gpu_time << " seconds.\n";

 //   cudaMemcpy(y_verifyd, dev_yd, n * incy * sizeof(double), cudaMemcpyDeviceToHost);

	//diff = 0;
 //   for (int i = 0; i < n * incy; ++i)
 //       diff += (yd[i] - y_verifyd[i]) * (yd[i] - y_verifyd[i]);

 //   if (diff < 1e-1f)
 //       std::cout << "Correct.\n";
 //   else
 //       std::cout << "Incorrect.\n";


	system("pause");

	cudaFree(dev_x);
    cudaFree(dev_y);

	//cudaFree(dev_xd);
 //   cudaFree(dev_yd);
	
    delete [] x;
	delete [] y;
	delete [] y_verify;

	//delete [] xd;
	//delete [] yd;
	//delete [] y_verifyd;
}

示例#18

0

显示文件

文件： blast.c 项目： JOravetz/SeisUnix

main()
{
	int i,n=N;

	printf("isamax = %d\n",isamax(n,sx,1));
	printf("isamax = %d\n",isamax(n/2,sx,2));
	printf("isamax = %d\n",isamax(n,sy,1));

	printf("sasum = %g\n",sasum(n,sx,1));
	printf("sasum = %g\n",sasum(n/2,sx,2));
	printf("sasum = %g\n",sasum(n,sy,1));

	printf("snrm2 = %g\n",snrm2(n,sx,1));
	printf("snrm2 = %g\n",snrm2(n/2,sx,2));
	printf("snrm2 = %g\n",snrm2(n,sy,1));

	printf("sdot = %g\n",sdot(n,sx,1,sy,1));
	printf("sdot = %g\n",sdot(n/2,sx,2,sy,2));
	printf("sdot = %g\n",sdot(n/2,sx,-2,sy,2));
	printf("sdot = %g\n",sdot(n,sy,1,sy,1));

	printf("sscal\n");
	sscal(n,2.0,sx,1);
	pvec(n,sx);
	sscal(n,0.5,sx,1);
	pvec(n,sx);
	sscal(n/2,2.0,sx,2);
	pvec(n,sx);
	sscal(n/2,0.5,sx,2);
	pvec(n,sx);

	printf("sswap\n");
	sswap(n,sx,1,sy,1);
	pvec(n,sx); pvec(n,sy);
	sswap(n,sy,1,sx,1);
	pvec(n,sx); pvec(n,sy);
	sswap(n/2,sx,1,sx+n/2,-1);
	pvec(n,sx);
	sswap(n/2,sx,1,sx+n/2,-1);
	pvec(n,sx);
	sswap(n/2,sx,2,sy,2);
	pvec(n,sx); pvec(n,sy);
	sswap(n/2,sx,2,sy,2);
	pvec(n,sx); pvec(n,sy);

	printf("saxpy\n");
	saxpy(n,2.0,sx,1,sy,1);
	pvec(n,sx); pvec(n,sy);
	saxpy(n,-2.0,sx,1,sy,1);
	pvec(n,sx); pvec(n,sy);
	saxpy(n/2,2.0,sx,2,sy,2);
	pvec(n,sx); pvec(n,sy);
	saxpy(n/2,-2.0,sx,2,sy,2);
	pvec(n,sx); pvec(n,sy);
	saxpy(n/2,2.0,sx,-2,sy,1);
	pvec(n,sx); pvec(n,sy);
	saxpy(n/2,-2.0,sx,-2,sy,1);
	pvec(n,sx); pvec(n,sy);

	printf("scopy\n");
	scopy(n/2,sx,2,sy,2);
	pvec(n,sx); pvec(n,sy);
	scopy(n/2,sx+1,2,sy+1,2);
	pvec(n,sx); pvec(n,sy);
	scopy(n/2,sx,2,sy,1);
	pvec(n,sx); pvec(n,sy);
	scopy(n/2,sx+1,-2,sy+n/2,-1);
	pvec(n,sx); pvec(n,sy);
}

示例#19

0

显示文件

文件： mkwayrefine.c 项目： cran/BigQuic

/*************************************************************************
* This function computes the initial id/ed 
**************************************************************************/
void MocComputeKWayPartitionParams(CtrlType *ctrl, GraphType *graph, int nparts)
{
  int i, j, k, l, nvtxs, ncon, nbnd, mincut, me, other;
  idxtype *xadj, *adjncy, *adjwgt, *where, *bndind, *bndptr;
  RInfoType *rinfo, *myrinfo;
  EDegreeType *myedegrees;
  float *nvwgt, *npwgts;

  nvtxs = graph->nvtxs;
  ncon = graph->ncon;
  xadj = graph->xadj;
  nvwgt = graph->nvwgt;
  adjncy = graph->adjncy;
  adjwgt = graph->adjwgt;

  where = graph->where;
  npwgts = sset(ncon*nparts, 0.0, graph->npwgts);
  bndind = graph->bndind;
  bndptr = idxset(nvtxs, -1, graph->bndptr);
  rinfo = graph->rinfo;


  /*------------------------------------------------------------
  / Compute now the id/ed degrees
  /------------------------------------------------------------*/
  ctrl->wspace.cdegree = 0;
  nbnd = mincut = 0;
  for (i=0; i<nvtxs; i++) {
    me = where[i];
    saxpy(ncon, 1.0, nvwgt+i*ncon, 1, npwgts+me*ncon, 1);

    myrinfo = rinfo+i;
    myrinfo->id = myrinfo->ed = myrinfo->ndegrees = 0;
    myrinfo->edegrees = NULL;

    for (j=xadj[i]; j<xadj[i+1]; j++) {
      if (me != where[adjncy[j]])
        myrinfo->ed += adjwgt[j];
    }
    myrinfo->id = graph->adjwgtsum[i] - myrinfo->ed;

    if (myrinfo->ed > 0) 
      mincut += myrinfo->ed;

    if (myrinfo->ed-myrinfo->id >= 0)
      BNDInsert(nbnd, bndind, bndptr, i);

    /* Time to compute the particular external degrees */
    if (myrinfo->ed > 0) { 
      myedegrees = myrinfo->edegrees = ctrl->wspace.edegrees+ctrl->wspace.cdegree;
      ctrl->wspace.cdegree += xadj[i+1]-xadj[i];

      for (j=xadj[i]; j<xadj[i+1]; j++) {
        other = where[adjncy[j]];
        if (me != other) {
          for (k=0; k<myrinfo->ndegrees; k++) {
            if (myedegrees[k].pid == other) {
              myedegrees[k].ed += adjwgt[j];
              break;
            }
          }
          if (k == myrinfo->ndegrees) {
            myedegrees[myrinfo->ndegrees].pid = other;
            myedegrees[myrinfo->ndegrees++].ed = adjwgt[j];
          }
        }
      }

      ASSERT(myrinfo->ndegrees <= xadj[i+1]-xadj[i]);
    }
  }

  graph->mincut = mincut/2;
  graph->nbnd = nbnd;

}

示例#20

0

显示文件

文件： sqr.c 项目： gwowen/seismicunix

void
sqrsl (float **x, int n, int k, float *qraux,
	float *y, float *qy, float *qty,
	float *b, float *rsd, float *xb, int job, int *info)
/*****************************************************************************
Use the output of sqrdc to compute coordinate transformations, projections,
and least squares solutions.  For k <= MIN(n,p), let xk be the matrix
	xk = (x[jpvt[0]], x[jpvt[1]], ..., x[jpvt[k-1]])
formed from columns jpvt[0], jpvt[1], ..., jpvt[k-1] of the original
n by p matrix x that was input to sqrdc.  (If no pivoting was done, xk
consists of the first k columns of x in their original order.)  sqrdc
produces a factored orthogonal matrix Q and an upper triangular matrix R
such that
	xk = Q * (R)
	         (0)
This information is contained in coded form in the arrays x and qraux.
******************************************************************************
Input:
x		matrix[p][n] containing output of sqrdc.
n		number of rows in the matrix xk; same as in sqrdc.
k		number of columns in the matrix xk; k must not be greater
		than MIN(n,p), where p is the same as in sqrdc.
qraux		array[p] containing auxiliary output from sqrdc.
y		array[n] to be manipulated by sqrsl.
job		specifies what is to be computed.  job has the decimal
		expansion ABCDE, with the following meaning:
		if A != 0, compute qy.
		if B, C, D, or E != 0, compute qty.
		if C != 0, compute b.
		if D != 0, compute rsd.
		if E != 0, compute xb.
		Note that a request to compute b, rsd, or xb automatically
		triggers the computation of qty, for which an array must
		be provided.

Output:
qy		array[n] containing Qy, if its computation has been
		requested.
qty		array[n] containing Q'y, if its computation has
		been requested.  Here Q' denotes the transpose of Q.
b		array[k] containing solution of the least squares problem:
			minimize norm2(y - xk*b),
		if its computation has been requested.  (Note that if
		pivoting was requested in sqrdc, the j-th component of
		b will be associated with column jpvt[j] of the original
		matrix x that was input into sqrdc.)
rsd		array[n] containing the least squares residual y - xk*b,
		if its computation has been requested.  rsd is also the
		orthogonal projection of y onto the orthogonal complement
		of the column space of xk.
xb		array[n] containing the least squares approximation xk*b,
		if its computation has been requested.  xb is also the
		orthogonal projection of y onto the column space of x.
info		=0 unless the computation of b has been requested and R
		is exactly singular.  In this case, info is the index of
		the first zero diagonal element of R and b is left
		unaltered.
******************************************************************************
Notes:
This function was adapted from LINPACK FORTRAN.  Because two-dimensional 
arrays cannot be declared with variable dimensions in C, the matrix x
is actually a pointer to an array of pointers to floats, as declared
above and used below.

Elements of x are stored as follows:
x[0][0]    x[1][0]    x[2][0]   ... x[k-1][0]
x[0][1]    x[1][1]    x[2][1]   ... x[k-1][1]
x[0][2]    x[1][2]    x[2][2]   ... x[k-1][2]
.                                       .
.             .                         .
.                        .              .
.                                       .
x[0][n-1]  x[1][n-1]  x[2][n-1] ... x[k-1][n-1]

The parameters qy, qty, b, rsd, and xb are not referenced if their
computation is not requested and in this case can be replaced by NULL
pointers in the calling program.  To save storage, the user may in
some cases use the same array for different parameters in the calling
sequence.  A frequently occuring example is when one wishes to compute
any of b, rsd, or xb and does not need y or qty.  In this case one may
equivalence y, qty, and one of b, rsd, or xb, while providing separate
arrays for anything else that is to be computed.  Thus the calling
sequence
	sqrsl(x,n,k,qraux,y,NULL,y,b,y,NULL,110,&info)
will result in the computation of b and rsd, with rsd overwriting y.
More generally, each item in the following list contains groups of
permissible equivalences for a single calling sequence.
	1. (y,qty,b) (rsd) (xb) (qy)
	2. (y,qty,rsd) (b) (xb) (qy)
	3. (y,qty,xb) (b) (rsd) (qy)
	4. (y,qy) (qty,b) (rsd) (xb)
	5. (y,qy) (qty,rsd) (b) (xb)
	6. (y,qy) (qty,xb) (b) (rsd)
In any group the value returned in the array allocated to the group
corresponds to the last member of the group.
******************************************************************************
Author:  Dave Hale, Colorado School of Mines, 12/29/89
*****************************************************************************/
{
	int i,j,ju,cb,cqy,cqty,cr,cxb;
	float t,temp;
	
	/* set info flag */
	*info = 0;
	
	/* determine what is to be computed */
	cqy = job/10000!=0;
	cqty = job%10000!=0;
	cb = (job%1000)/100!=0;
	cr = (job%100)/10!=0;
	cxb = job%10!=0;
	ju = MIN(k,n-1);
	
	/* special action when n=1 */
	if (ju==0) {
		if (cqy) qy[0] = y[0];
		if (cqty) qty[0] = y[0];
		if (cxb) xb[0] = y[0];
		if (cb) {
			if (x[0][0]==0.0)
				*info = 1;
			else
				b[0] = y[0]/x[0][0];
		}
		if (cr) rsd[0] = 0.0;
		return;
	}
	
	/* set up to compute Qy or Q'y */
	if (cqy) scopy(n,y,1,qy,1);
	if (cqty) scopy(n,y,1,qty,1);
	if (cqy) {
	
		/* compute Qy */
		for (j=ju-1; j>=0; j--) {
			if (qraux[j]!=0.0) {
				temp = x[j][j];
				x[j][j] = qraux[j];
				t = -sdot(n-j,&x[j][j],1,&qy[j],1)/x[j][j];
				saxpy(n-j,t,&x[j][j],1,&qy[j],1);
				x[j][j] = temp;
			}
		}
	}
	if (cqty) {
		
		/* compute Q'y */
		for (j=0; j<ju; j++) {
			if (qraux[j]!=0.0) {
				temp = x[j][j];
				x[j][j] = qraux[j];
				t = -sdot(n-j,&x[j][j],1,&qty[j],1)/x[j][j];
				saxpy(n-j,t,&x[j][j],1,&qty[j],1);
				x[j][j] = temp;
			}
		}
	}
	
	/* set up to compute b, rsd, or xb */
	if (cb) scopy(k,qty,1,b,1);
	if (cxb) scopy(k,qty,1,xb,1);
	if (cr && k<n) scopy(n-k,&qty[k],1,&rsd[k],1);
	if (cxb && k<n)
		for (i=k; i<n; i++)
			xb[i] = 0.0;
	if (cr)
		for (i=0; i<k; i++)
			rsd[i] = 0.0;
	if (cb) {
	
		/* compute b */
		for (j=k-1; j>=0; j--) {
			if (x[j][j]==0.0) {
				*info = j;
				break;
			}
			b[j] /= x[j][j];
			if (j!=0) {
				t = -b[j];
				saxpy(j,t,x[j],1,b,1);
			}
		}
	}
	if (cr || cxb) {
	
		/* compute rsd or xb as requested */
		for (j=ju-1; j>=0; j--) {
			if (qraux[j]!=0.0) {
				temp = x[j][j];
				x[j][j] = qraux[j];
				if (cr) {
					t = -sdot(n-j,&x[j][j],1,&rsd[j],1)/
						x[j][j];
					saxpy(n-j,t,&x[j][j],1,&rsd[j],1);
				}
				if (cxb) {
					t = -sdot(n-j,&x[j][j],1,&xb[j],1)/
						x[j][j];
					saxpy(n-j,t,&x[j][j],1,&xb[j],1);
				}
				x[j][j] = temp;
			}
		}
	}
}

示例#21

0

显示文件

文件： mfm.c 项目： cran/BigQuic

/*************************************************************************
* This function performs an edge-based FM refinement
**************************************************************************/
void MocFM_2WayEdgeRefine(CtrlType *ctrl, GraphType *graph, float *tpwgts, int npasses)
{
  int i, ii, j, k, l, kwgt, nvtxs, ncon, nbnd, nswaps, from, to, pass, me, limit, tmp, cnum;
  idxtype *xadj, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind;
  idxtype *moved, *swaps, *perm, *qnum;
  float *nvwgt, *npwgts, mindiff[MAXNCON], origbal, minbal, newbal;
  PQueueType parts[MAXNCON][2];
  int higain, oldgain, mincut, initcut, newcut, mincutorder;
  float rtpwgts[2];

  nvtxs = graph->nvtxs;
  ncon = graph->ncon;
  xadj = graph->xadj;
  nvwgt = graph->nvwgt;
  adjncy = graph->adjncy;
  adjwgt = graph->adjwgt;
  where = graph->where;
  id = graph->id;
  ed = graph->ed;
  npwgts = graph->npwgts;
  bndptr = graph->bndptr;
  bndind = graph->bndind;

  moved = idxwspacemalloc(ctrl, nvtxs);
  swaps = idxwspacemalloc(ctrl, nvtxs);
  perm = idxwspacemalloc(ctrl, nvtxs);
  qnum = idxwspacemalloc(ctrl, nvtxs);

  limit = amin(amax(0.01*nvtxs, 25), 150);

  /* Initialize the queues */
  for (i=0; i<ncon; i++) {
    PQueueInit(ctrl, &parts[i][0], nvtxs, PLUS_GAINSPAN+1);
    PQueueInit(ctrl, &parts[i][1], nvtxs, PLUS_GAINSPAN+1);
  }
  for (i=0; i<nvtxs; i++)
    qnum[i] = samax(ncon, nvwgt+i*ncon);

  origbal = Compute2WayHLoadImbalance(ncon, npwgts, tpwgts);

  rtpwgts[0] = origbal*tpwgts[0];
  rtpwgts[1] = origbal*tpwgts[1];

/*
  if (ctrl->dbglvl&DBG_REFINE) {
    printf("Parts: [");
    for (l=0; l<ncon; l++)
      printf("(%.3f, %.3f) ", npwgts[l], npwgts[ncon+l]);
    printf("] T[%.3f %.3f], Nv-Nb[%5d, %5d]. ICut: %6d, LB: %.3f\n", tpwgts[0], tpwgts[1], graph->nvtxs, graph->nbnd, graph->mincut, origbal);
  }
*/

  idxset(nvtxs, -1, moved);
  for (pass=0; pass<npasses; pass++) { /* Do a number of passes */
    for (i=0; i<ncon; i++) { 
      PQueueReset(&parts[i][0]);
      PQueueReset(&parts[i][1]);
    }

    mincutorder = -1;
    newcut = mincut = initcut = graph->mincut;
    for (i=0; i<ncon; i++)
      mindiff[i] = fabs(tpwgts[0]-npwgts[i]);
    minbal = Compute2WayHLoadImbalance(ncon, npwgts, tpwgts);

    ASSERT(ComputeCut(graph, where) == graph->mincut);
    ASSERT(CheckBnd(graph));

    /* Insert boundary nodes in the priority queues */
    nbnd = graph->nbnd;
    RandomPermute(nbnd, perm, 1);
    for (ii=0; ii<nbnd; ii++) {
      i = bndind[perm[ii]];
      ASSERT(ed[i] > 0 || id[i] == 0);
      ASSERT(bndptr[i] != -1);
      PQueueInsert(&parts[qnum[i]][where[i]], i, ed[i]-id[i]);
    }

    for (nswaps=0; nswaps<nvtxs; nswaps++) {
      SelectQueue(ncon, npwgts, rtpwgts, &from, &cnum, parts);
      to = (from+1)%2;

      if (from == -1 || (higain = PQueueGetMax(&parts[cnum][from])) == -1)
        break;
      ASSERT(bndptr[higain] != -1);

      saxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1);
      saxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1);

      newcut -= (ed[higain]-id[higain]);
      newbal = Compute2WayHLoadImbalance(ncon, npwgts, tpwgts);

      if ((newcut < mincut && newbal-origbal <= .00001) || 
          (newcut == mincut && (newbal < minbal || 
                                (newbal == minbal && BetterBalance(ncon, npwgts, tpwgts, mindiff))))) {
        mincut = newcut;
        minbal = newbal;
        mincutorder = nswaps;
        for (i=0; i<ncon; i++)
          mindiff[i] = fabs(tpwgts[0]-npwgts[i]);
      }
      else if (nswaps-mincutorder > limit) { /* We hit the limit, undo last move */
        newcut += (ed[higain]-id[higain]);
        saxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1);
        saxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1);
        break;
      }

      where[higain] = to;
      moved[higain] = nswaps;
      swaps[nswaps] = higain;

/*
      if (ctrl->dbglvl&DBG_MOVEINFO) {
        printf("Moved %6d from %d(%d). Gain: %5d, Cut: %5d, NPwgts: ", higain, from, cnum, ed[higain]-id[higain], newcut);
        for (l=0; l<ncon; l++) 
          printf("(%.3f, %.3f) ", npwgts[l], npwgts[ncon+l]);
        printf(", %.3f LB: %.3f\n", minbal, newbal);
      }
*/


      /**************************************************************
      * Update the id[i]/ed[i] values of the affected nodes
      ***************************************************************/
      SWAP(id[higain], ed[higain], tmp);
      if (ed[higain] == 0 && xadj[higain] < xadj[higain+1]) 
        BNDDelete(nbnd, bndind,  bndptr, higain);

      for (j=xadj[higain]; j<xadj[higain+1]; j++) {
        k = adjncy[j];
        oldgain = ed[k]-id[k];

        kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]);
        INC_DEC(id[k], ed[k], kwgt);

        /* Update its boundary information and queue position */
        if (bndptr[k] != -1) { /* If k was a boundary vertex */
          if (ed[k] == 0) { /* Not a boundary vertex any more */
            BNDDelete(nbnd, bndind, bndptr, k);
            if (moved[k] == -1)  /* Remove it if in the queues */
              PQueueDelete(&parts[qnum[k]][where[k]], k, oldgain);
          }
          else { /* If it has not been moved, update its position in the queue */
            if (moved[k] == -1)
              PQueueUpdate(&parts[qnum[k]][where[k]], k, oldgain, ed[k]-id[k]);
          }
        }
        else {
          if (ed[k] > 0) {  /* It will now become a boundary vertex */
            BNDInsert(nbnd, bndind, bndptr, k);
            if (moved[k] == -1) 
              PQueueInsert(&parts[qnum[k]][where[k]], k, ed[k]-id[k]);
          }
        }
      }

    }


    /****************************************************************
    * Roll back computations
    *****************************************************************/
    for (i=0; i<nswaps; i++)
      moved[swaps[i]] = -1;  /* reset moved array */
    for (nswaps--; nswaps>mincutorder; nswaps--) {
      higain = swaps[nswaps];

      to = where[higain] = (where[higain]+1)%2;
      SWAP(id[higain], ed[higain], tmp);
      if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1])
        BNDDelete(nbnd, bndind,  bndptr, higain);
      else if (ed[higain] > 0 && bndptr[higain] == -1)
        BNDInsert(nbnd, bndind,  bndptr, higain);

      saxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1);
      saxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+((to+1)%2)*ncon, 1);
      for (j=xadj[higain]; j<xadj[higain+1]; j++) {
        k = adjncy[j];

        kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]);
        INC_DEC(id[k], ed[k], kwgt);

        if (bndptr[k] != -1 && ed[k] == 0)
          BNDDelete(nbnd, bndind, bndptr, k);
        if (bndptr[k] == -1 && ed[k] > 0)
          BNDInsert(nbnd, bndind, bndptr, k);
      }
    }

/*
    if (ctrl->dbglvl&DBG_REFINE) {
      printf("\tMincut: %6d at %5d, NBND: %6d, NPwgts: [", mincut, mincutorder, nbnd);
      for (l=0; l<ncon; l++)
        printf("(%.3f, %.3f) ", npwgts[l], npwgts[ncon+l]);
      printf("], LB: %.3f\n", Compute2WayHLoadImbalance(ncon, npwgts, tpwgts));
    }
*/

    graph->mincut = mincut;
    graph->nbnd = nbnd;

    if (mincutorder == -1 || mincut == initcut)
      break;
  }

  for (i=0; i<ncon; i++) {
    PQueueFree(ctrl, &parts[i][0]);
    PQueueFree(ctrl, &parts[i][1]);
  }

  idxwspacefree(ctrl, nvtxs);
  idxwspacefree(ctrl, nvtxs);
  idxwspacefree(ctrl, nvtxs);
  idxwspacefree(ctrl, nvtxs);

}

示例#22

0

显示文件

/*************************************************************************
* This function balances two partitions by moving the highest gain 
* (including negative gain) vertices to the other domain.
* It is used only when tha unbalance is due to non contigous
* subdomains. That is, the are no boundary vertices.
* It moves vertices from the domain that is overweight to the one that 
* is underweight.
**************************************************************************/
void MocInit2WayBalance(CtrlType *ctrl, GraphType *graph, float *tpwgts)
{
  int i, ii, j, k, l, kwgt, nvtxs, nbnd, ncon, nswaps, from, to, pass, me, cnum, tmp;
  idxtype *xadj, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind;
  idxtype *perm, *qnum;
  float *nvwgt, *npwgts;
  PQueueType parts[MAXNCON][2];
  int higain, oldgain, mincut;

  nvtxs = graph->nvtxs;
  ncon = graph->ncon;
  xadj = graph->xadj;
  adjncy = graph->adjncy;
  nvwgt = graph->nvwgt;
  adjwgt = graph->adjwgt;
  where = graph->where;
  id = graph->id;
  ed = graph->ed;
  npwgts = graph->npwgts;
  bndptr = graph->bndptr;
  bndind = graph->bndind;

  perm = idxwspacemalloc(ctrl, nvtxs);
  qnum = idxwspacemalloc(ctrl, nvtxs);

  /* This is called for initial partitioning so we know from where to pick nodes */
  from = 1;
  to = (from+1)%2;

  if (ctrl->dbglvl&DBG_REFINE) {
    printf("Parts: [");
    for (l=0; l<ncon; l++)
      printf("(%.3f, %.3f) ", npwgts[l], npwgts[ncon+l]);
    printf("] T[%.3f %.3f], Nv-Nb[%5d, %5d]. ICut: %6d, LB: %.3f [B]\n", tpwgts[0], tpwgts[1], 
           graph->nvtxs, graph->nbnd, graph->mincut, 
           Compute2WayHLoadImbalance(ncon, npwgts, tpwgts));
  }

  for (i=0; i<ncon; i++) {
    PQueueInit(ctrl, &parts[i][0], nvtxs, PLUS_GAINSPAN+1);
    PQueueInit(ctrl, &parts[i][1], nvtxs, PLUS_GAINSPAN+1);
  }

  ASSERT(ComputeCut(graph, where) == graph->mincut);
  ASSERT(CheckBnd(graph));
  ASSERT(CheckGraph(graph));

  /* Compute the queues in which each vertex will be assigned to */
  for (i=0; i<nvtxs; i++)
    qnum[i] = samax(ncon, nvwgt+i*ncon);

  /* Insert the nodes of the proper partition in the appropriate priority queue */
  RandomPermute(nvtxs, perm, 1);
  for (ii=0; ii<nvtxs; ii++) {
    i = perm[ii];
    if (where[i] == from) {
      if (ed[i] > 0)
        PQueueInsert(&parts[qnum[i]][0], i, ed[i]-id[i]);
      else
        PQueueInsert(&parts[qnum[i]][1], i, ed[i]-id[i]);
    }
  }


  mincut = graph->mincut;
  nbnd = graph->nbnd;
  for (nswaps=0; nswaps<nvtxs; nswaps++) {
    if (AreAnyVwgtsBelow(ncon, 1.0, npwgts+from*ncon, 0.0, nvwgt, tpwgts[from]))
      break;

    if ((cnum = SelectQueueOneWay(ncon, npwgts, tpwgts, from, parts)) == -1)
      break;

    if ((higain = PQueueGetMax(&parts[cnum][0])) == -1)
      higain = PQueueGetMax(&parts[cnum][1]);

    mincut -= (ed[higain]-id[higain]);
    saxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1);
    saxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1);

    where[higain] = to;

    if (ctrl->dbglvl&DBG_MOVEINFO) {
      printf("Moved %6d from %d(%d). [%5d] %5d, NPwgts: ", higain, from, cnum, ed[higain]-id[higain], mincut);
      for (l=0; l<ncon; l++) 
        printf("(%.3f, %.3f) ", npwgts[l], npwgts[ncon+l]);
      printf(", LB: %.3f\n", Compute2WayHLoadImbalance(ncon, npwgts, tpwgts));
      if (ed[higain] == 0 && id[higain] > 0)
        printf("\t Pulled from the interior!\n");
    }


    /**************************************************************
    * Update the id[i]/ed[i] values of the affected nodes
    ***************************************************************/
    SWAP(id[higain], ed[higain], tmp);
    if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1]) 
      BNDDelete(nbnd, bndind,  bndptr, higain);
    if (ed[higain] > 0 && bndptr[higain] == -1)
      BNDInsert(nbnd, bndind,  bndptr, higain);

    for (j=xadj[higain]; j<xadj[higain+1]; j++) {
      k = adjncy[j];
      oldgain = ed[k]-id[k];

      kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]);
      INC_DEC(id[k], ed[k], kwgt);

      /* Update the queue position */
      if (where[k] == from) {
        if (ed[k] > 0 && bndptr[k] == -1) {  /* It moves in boundary */
          PQueueDelete(&parts[qnum[k]][1], k, oldgain);
          PQueueInsert(&parts[qnum[k]][0], k, ed[k]-id[k]);
        }
        else { /* It must be in the boundary already */
          if (bndptr[k] == -1)
            printf("What you thought was wrong!\n");
          PQueueUpdate(&parts[qnum[k]][0], k, oldgain, ed[k]-id[k]);
        }
      }

      /* Update its boundary information */
      if (ed[k] == 0 && bndptr[k] != -1) 
        BNDDelete(nbnd, bndind, bndptr, k);
      else if (ed[k] > 0 && bndptr[k] == -1)  
        BNDInsert(nbnd, bndind, bndptr, k);
    }

    ASSERTP(ComputeCut(graph, where) == mincut, ("%d != %d\n", ComputeCut(graph, where), mincut));

  }

  if (ctrl->dbglvl&DBG_REFINE) {
    printf("\tMincut: %6d, NBND: %6d, NPwgts: ", mincut, nbnd);
    for (l=0; l<ncon; l++)
      printf("(%.3f, %.3f) ", npwgts[l], npwgts[ncon+l]);
    printf(", LB: %.3f\n", Compute2WayHLoadImbalance(ncon, npwgts, tpwgts));
  }

  graph->mincut = mincut;
  graph->nbnd = nbnd;

  for (i=0; i<ncon; i++) {
    PQueueFree(ctrl, &parts[i][0]);
    PQueueFree(ctrl, &parts[i][1]);
  }

  ASSERT(ComputeCut(graph, where) == graph->mincut);
  ASSERT(CheckBnd(graph));

  idxwspacefree(ctrl, nvtxs);
  idxwspacefree(ctrl, nvtxs);
}

示例#23

0

显示文件

文件： sge.c 项目： gwowen/seismicunix

void
sgeco (float **a, int n, int *ipvt, float *rcond, float *z)
/*****************************************************************************
Gaussian elimination to obtain the LU factorization and
condition number of a matrix.
******************************************************************************
Input:
a		matrix[n][n] to be factored (see notes below)
n		dimension of a

Output:
a		matrix[n][n] factored (see notes below)
ipvt		indices of pivot permutations (see notes below)
rcond		reciprocal condition number (see notes below)

Workspace:
z		array[n]
******************************************************************************
Notes:
This function was adapted from LINPACK FORTRAN.  Because two-dimensional 
arrays cannot be declared with variable dimensions in C, the matrix a
is actually a pointer to an array of pointers to floats, as declared
above and used below.

Elements of a are stored as follows:
a[0][0]    a[1][0]    a[2][0]   ... a[n-1][0]
a[0][1]    a[1][1]    a[2][1]   ... a[n-1][1]
a[0][2]    a[1][2]    a[2][2]   ... a[n-1][2]
.                                       .
.             .                         .
.                        .              .
.                                       .
a[0][n-1]  a[1][n-1]  a[2][n-1] ... a[n-1][n-1]

Both the factored matrix a and the pivot indices ipvt are required
to solve linear systems of equations via sgesl.

Given the reciprocal of the condition number, rcond, and the float
epsilon, FLT_EPSILON, the number of significant decimal digits, nsdd,
in the solution of a linear system of equations may be estimated by:
	nsdd = (int)log10(rcond/FLT_EPSILON)
******************************************************************************
Author:  Dave Hale, Colorado School of Mines, 10/01/89
*****************************************************************************/
{
	int info,j,k,kp1,l;
	float ek,t,wk,wkm,anorm,s,sm,ynorm;

	/* compute 1-norm of a */
	for (j=0,anorm=0.0; j<n; j++) {
		t = sasum(n,a[j],1);
		anorm = (t>anorm)?t:anorm;
	}

	/* factor */
	sgefa(a,n,ipvt,&info);

	/* rcond = 1/(norm(a)*(estimate of norm(inverse(a)))).
	 * estimate = norm(z)/norm(y) where Az = y and A'y = e.
	 * A' is the transpose of A.  The components of e are
	 * chosen to cause maximum local growth in the elements of
	 * w where U'w = e.  The vectors are frequently rescaled
	 * to avoid overflow
	 */

	/* solve U'w = e */
	ek = 1.0;
	for (j=0; j<n; j++)
		z[j] = 0.0;
	for (k=0; k<n; k++) {
		if (z[k]!=0.0) ek = (z[k]>0.0)?-ABS(ek):ABS(ek);
		if (ABS(ek-z[k])>ABS(a[k][k])) {
			s = ABS(a[k][k])/ABS(ek-z[k]);
			sscal(n,s,z,1);
			ek *= s;
		}
		wk = ek-z[k];
		wkm = -ek-z[k];
		s = ABS(wk);
		sm = ABS(wkm);
		if (a[k][k]==0.0) {
			wk = 1.0;
			wkm = 1.0;
		} else {
			wk = wk/a[k][k];
			wkm = wkm/a[k][k];
		}
		kp1 = k+1;
		if (kp1<n) {
			for (j=kp1; j<n; j++) {
				t = z[j]+wkm*a[j][k];
				sm += ABS(t);
				z[j] += wk*a[j][k];
				s += ABS(z[j]);
			}
			if (s<sm) {
				t = wkm-wk;
				wk = wkm;
				for (j=kp1; j<n; j++)
					z[j] += t*a[j][k];
			}
		}
		z[k] = wk;
	}
	s = 1.0/sasum(n,z,1);
	sscal(n,s,z,1);

	/* solve L'y = w */
	for (k=n-1; k>=0; k--) {
		if (k<n-1) z[k] += sdot(n-k-1,&a[k][k+1],1,&z[k+1],1);
		if (ABS(z[k])>1.0) {
			s = 1.0/ABS(z[k]);
			sscal(n,s,z,1);
		}
		l = ipvt[k];
		t = z[l];
		z[l] = z[k];
		z[k] = t;
	}
	s = 1.0/sasum(n,z,1);
	sscal(n,s,z,1);

	ynorm = 1.0;

	/* solve Lv = y */
	for (k=0; k<n; k++) {
		l = ipvt[k];
		t = z[l];
		z[l] = z[k];
		z[k] = t;
		if (k<n-1) saxpy(n-k-1,t,&a[k][k+1],1,&z[k+1],1);
		if (ABS(z[k])>1.0) {
			s = 1.0/ABS(z[k]);
			sscal(n,s,z,1);
			ynorm *= s;
		}
	}
	s = 1.0/sasum(n,z,1);
	sscal(n,s,z,1);
	ynorm *= s;

	/* solve Uz = v */
	for (k=n-1; k>=0; k--) {
		if (ABS(z[k])>ABS(a[k][k])) {
			s = ABS(a[k][k])/ABS(z[k]);
			sscal(n,s,z,1);
			ynorm *= s;
		}
		if (a[k][k]!=0.0) 
			z[k] /= a[k][k];
		else
			z[k] = 1.0;
		t = -z[k];
		saxpy(k,t,a[k],1,z,1);
	}

	/* make znorm = 1.0 */
	s = 1.0/sasum(n,z,1);
	sscal(n,s,z,1);
	ynorm *= s;

	if (anorm!=0.0) 
		*rcond = ynorm/anorm;
	else
		*rcond = 0.0;
}

示例#24

0

显示文件

文件： mbalance2.c 项目： askhl/octopus-dfrt2

/*************************************************************************
* This function performs an edge-based FM refinement
**************************************************************************/
void MocGeneral2WayBalance2(CtrlType *ctrl, GraphType *graph, float *tpwgts, float *ubvec)
{
  int i, ii, j, k, l, kwgt, nvtxs, ncon, nbnd, nswaps, from, to, limit, tmp, cnum;
  idxtype *xadj, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind;
  idxtype *moved, *swaps, *perm, *qnum;
  float *nvwgt, *npwgts, origbal[MAXNCON], minbal[MAXNCON], newbal[MAXNCON];
  PQueueType parts[MAXNCON][2];
  int higain, oldgain, mincut, newcut, mincutorder;
  float *maxwgt, *minwgt, tvec[MAXNCON];


  nvtxs = graph->nvtxs;
  ncon = graph->ncon;
  xadj = graph->xadj;
  nvwgt = graph->nvwgt;
  adjncy = graph->adjncy;
  adjwgt = graph->adjwgt;
  where = graph->where;
  id = graph->id;
  ed = graph->ed;
  npwgts = graph->npwgts;
  bndptr = graph->bndptr;
  bndind = graph->bndind;

  moved = idxwspacemalloc(ctrl, nvtxs);
  swaps = idxwspacemalloc(ctrl, nvtxs);
  perm = idxwspacemalloc(ctrl, nvtxs);
  qnum = idxwspacemalloc(ctrl, nvtxs);

  limit = amin(amax(0.01*nvtxs, 15), 100);

  /* Setup the weight intervals of the two subdomains */
  minwgt = fwspacemalloc(ctrl, 2*ncon);
  maxwgt = fwspacemalloc(ctrl, 2*ncon);

  for (i=0; i<2; i++) {
    for (j=0; j<ncon; j++) {
      maxwgt[i*ncon+j] = tpwgts[i]*ubvec[j];
      minwgt[i*ncon+j] = tpwgts[i]*(1.0/ubvec[j]);
    }
  }


  /* Initialize the queues */
  for (i=0; i<ncon; i++) {
    PQueueInit(ctrl, &parts[i][0], nvtxs, PLUS_GAINSPAN+1);
    PQueueInit(ctrl, &parts[i][1], nvtxs, PLUS_GAINSPAN+1);
  }
  for (i=0; i<nvtxs; i++)
    qnum[i] = samax(ncon, nvwgt+i*ncon);

  Compute2WayHLoadImbalanceVec(ncon, npwgts, tpwgts, origbal);
  for (i=0; i<ncon; i++) 
    minbal[i] = origbal[i];

  newcut = mincut = graph->mincut;
  mincutorder = -1;

  if (ctrl->dbglvl&DBG_REFINE) {
    printf("Parts: [");
    for (l=0; l<ncon; l++)
      printf("(%.3f, %.3f) ", npwgts[l], npwgts[ncon+l]);
    printf("] T[%.3f %.3f], Nv-Nb[%5d, %5d]. ICut: %6d, LB: ", tpwgts[0], tpwgts[1], 
            graph->nvtxs, graph->nbnd, graph->mincut);
    for (i=0; i<ncon; i++)
      printf("%.3f ", origbal[i]);
    printf("[B]\n");
  }

  idxset(nvtxs, -1, moved);

  ASSERT(ComputeCut(graph, where) == graph->mincut);
  ASSERT(CheckBnd(graph));

  /* Insert all nodes in the priority queues */
  nbnd = graph->nbnd;
  RandomPermute(nvtxs, perm, 1);
  for (ii=0; ii<nvtxs; ii++) {
    i = perm[ii];
    PQueueInsert(&parts[qnum[i]][where[i]], i, ed[i]-id[i]);
  }


  for (nswaps=0; nswaps<nvtxs; nswaps++) {
    if (AreAllBelow(ncon, minbal, ubvec))
      break;

    SelectQueue3(ncon, npwgts, tpwgts, &from, &cnum, parts, maxwgt);
    to = (from+1)%2;

    if (from == -1 || (higain = PQueueGetMax(&parts[cnum][from])) == -1)
      break;

    saxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1);
    saxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1);
    newcut -= (ed[higain]-id[higain]);
    Compute2WayHLoadImbalanceVec(ncon, npwgts, tpwgts, newbal);

    if (IsBetter2wayBalance(ncon, newbal, minbal, ubvec) || 
        (IsBetter2wayBalance(ncon, newbal, origbal, ubvec) && newcut < mincut)) {
      mincut = newcut;
      for (i=0; i<ncon; i++) 
        minbal[i] = newbal[i];
      mincutorder = nswaps;
    }
    else if (nswaps-mincutorder > limit) { /* We hit the limit, undo last move */
      newcut += (ed[higain]-id[higain]);
      saxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1);
      saxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1);
      break;
    }

    where[higain] = to;
    moved[higain] = nswaps;
    swaps[nswaps] = higain;

    if (ctrl->dbglvl&DBG_MOVEINFO) {
      printf("Moved %6d from %d(%d). Gain: %5d, Cut: %5d, NPwgts: ", higain, from, cnum, ed[higain]-id[higain], newcut);
      for (i=0; i<ncon; i++) 
        printf("(%.3f, %.3f) ", npwgts[i], npwgts[ncon+i]);

      Compute2WayHLoadImbalanceVec(ncon, npwgts, tpwgts, tvec);
      printf(", LB: ");
      for (i=0; i<ncon; i++) 
        printf("%.3f ", tvec[i]);
      if (mincutorder == nswaps)
        printf(" *\n");
      else
        printf("\n");
    }


    /**************************************************************
    * Update the id[i]/ed[i] values of the affected nodes
    ***************************************************************/
    SWAP(id[higain], ed[higain], tmp);
    if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1]) 
      BNDDelete(nbnd, bndind,  bndptr, higain);
    if (ed[higain] > 0 && bndptr[higain] == -1)
      BNDInsert(nbnd, bndind,  bndptr, higain);

    for (j=xadj[higain]; j<xadj[higain+1]; j++) {
      k = adjncy[j];
      oldgain = ed[k]-id[k];

      kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]);
      INC_DEC(id[k], ed[k], kwgt);

      /* Update the queue position */
      if (moved[k] == -1)
        PQueueUpdate(&parts[qnum[k]][where[k]], k, oldgain, ed[k]-id[k]);

      /* Update its boundary information */
      if (ed[k] == 0 && bndptr[k] != -1) 
        BNDDelete(nbnd, bndind, bndptr, k);
      else if (ed[k] > 0 && bndptr[k] == -1)  
        BNDInsert(nbnd, bndind, bndptr, k);
    }
   
  }



  /****************************************************************
  * Roll back computations
  *****************************************************************/
  for (i=0; i<nswaps; i++)
    moved[swaps[i]] = -1;  /* reset moved array */
  for (nswaps--; nswaps>mincutorder; nswaps--) {
    higain = swaps[nswaps];

    to = where[higain] = (where[higain]+1)%2;
    SWAP(id[higain], ed[higain], tmp);
    if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1])
      BNDDelete(nbnd, bndind,  bndptr, higain);
    else if (ed[higain] > 0 && bndptr[higain] == -1)
      BNDInsert(nbnd, bndind,  bndptr, higain);

    saxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1);
    saxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+((to+1)%2)*ncon, 1);
    for (j=xadj[higain]; j<xadj[higain+1]; j++) {
      k = adjncy[j];

      kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]);
      INC_DEC(id[k], ed[k], kwgt);

      if (bndptr[k] != -1 && ed[k] == 0)
        BNDDelete(nbnd, bndind, bndptr, k);
      if (bndptr[k] == -1 && ed[k] > 0)
        BNDInsert(nbnd, bndind, bndptr, k);
    }
  }

  if (ctrl->dbglvl&DBG_REFINE) {
    printf("\tMincut: %6d at %5d, NBND: %6d, NPwgts: [", mincut, mincutorder, nbnd);
    for (i=0; i<ncon; i++)
      printf("(%.3f, %.3f) ", npwgts[i], npwgts[ncon+i]);
    printf("], LB: ");
    Compute2WayHLoadImbalanceVec(ncon, npwgts, tpwgts, tvec);
    for (i=0; i<ncon; i++) 
      printf("%.3f ", tvec[i]);
    printf("\n");
  }

  graph->mincut = mincut;
  graph->nbnd = nbnd;


  for (i=0; i<ncon; i++) {
    PQueueFree(ctrl, &parts[i][0]);
    PQueueFree(ctrl, &parts[i][1]);
  }

  idxwspacefree(ctrl, nvtxs);
  idxwspacefree(ctrl, nvtxs);
  idxwspacefree(ctrl, nvtxs);
  idxwspacefree(ctrl, nvtxs);
  fwspacefree(ctrl, 2*ncon);
  fwspacefree(ctrl, 2*ncon);

}

示例#25

0

显示文件

文件： sge.c 项目： gwowen/seismicunix

void
sgesl (float **a, int n, int *ipvt, float *b, int job)
/*****************************************************************************
solve linear system Ax = b or A'x = b after LU factorization
******************************************************************************
Input:
a		matrix[n][n] that has been LU factored (see notes below)
n		dimension of a
ipvt		indices of pivot permutations (see notes below)
b		right-hand-side vector[n]
job		=0 to solve Ax = b
		=1 to solve A'x = b

Output:
b		solution vector[n]
******************************************************************************
Notes:
This function was adapted from LINPACK FORTRAN.  Because two-dimensional 
arrays cannot be declared with variable dimensions in C, the matrix a
is actually a pointer to an array of pointers to floats, as declared
above and used below.
******************************************************************************
Author:  Dave Hale, Colorado School of Mines, 10/01/89
*****************************************************************************/
{
	int k,l,nm1;
	float t;

	nm1 = n-1;

	/* if solving Ax = b */
	if (job==0) {

		/* first solve Ly = b */
		for (k=0; k<nm1; k++) {
			l = ipvt[k];
			t = b[l];
			if (l!=k) {
				b[l] = b[k];
				b[k] = t;
			}
			saxpy(n-k-1,t,&a[k][k+1],1,&b[k+1],1);
		}

		/* now solve Ux = y */
		for (k=n-1; k>=0; k--) {
			b[k] /= a[k][k];
			t = -b[k];
			saxpy(k,t,a[k],1,b,1);
		}

	/* else, if solving A'x = b */
	} else {

		/* first solve U'y = b */
		for (k=0; k<n; k++) {
			t = sdot(k,a[k],1,b,1);
			b[k] = (b[k]-t)/a[k][k];
		}

		/* now solve L'x = y */
		for (k=n-2; k>=0; k--) {
			b[k] += sdot(n-k-1,&a[k][k+1],1,&b[k+1],1);
			l = ipvt[k];
			if (l!=k) {
				t = b[l];
				b[l] = b[k];
				b[k] = t;
			}
		}
	}
}

示例#26

0

显示文件

文件： mkwayfmh.c 项目： Vignesh2208/Awlsim_Ins

/*************************************************************************
* This function performs k-way refinement
**************************************************************************/
void MCGreedy_KWayEdgeBalanceHorizontal(CtrlType *ctrl, GraphType *graph, int nparts, 
       float *ubvec, int npasses)
{
  int i, ii, /*iii,*/ j, /*jj,*/ k, /*l,*/ pass, nvtxs, ncon, nbnd, myndegrees, oldgain, gain, nmoves; 
  int from, me, to, oldcut;
  idxtype *xadj, *adjncy, *adjwgt;
  idxtype *where, *perm, *bndptr, *bndind, *moved;
  EDegreeType *myedegrees;
  RInfoType *myrinfo;
  PQueueType queue;
  float *npwgts, *nvwgt, *minwgt, *maxwgt, tvec[MAXNCON];

  nvtxs = graph->nvtxs;
  ncon = graph->ncon;
  xadj = graph->xadj;
  adjncy = graph->adjncy;
  adjwgt = graph->adjwgt;

  bndind = graph->bndind;
  bndptr = graph->bndptr;

  where = graph->where;
  npwgts = graph->npwgts;
  
  /* Setup the weight intervals of the various subdomains */
  minwgt =  fwspacemalloc(ctrl, ncon*nparts);
  maxwgt = fwspacemalloc(ctrl, ncon*nparts);

  for (i=0; i<nparts; i++) {
    for (j=0; j<ncon; j++) {
      maxwgt[i*ncon+j] = ubvec[j]/nparts;
      minwgt[i*ncon+j] = 1.0/(ubvec[j]*nparts);
    }
  }

  perm = idxwspacemalloc(ctrl, nvtxs);
  moved = idxwspacemalloc(ctrl, nvtxs);

  PQueueInit(ctrl, &queue, nvtxs, graph->adjwgtsum[idxamax(nvtxs, graph->adjwgtsum)]);

  if (ctrl->dbglvl&DBG_REFINE) {
    printf("Partitions: [%5.4f %5.4f], Nv-Nb[%6d %6d]. Cut: %6d, LB: ",
            npwgts[samin(ncon*nparts, npwgts)], npwgts[samax(ncon*nparts, npwgts)], 
            graph->nvtxs, graph->nbnd, graph->mincut);
    ComputeHKWayLoadImbalance(ncon, nparts, npwgts, tvec);
    for (i=0; i<ncon; i++)
      printf("%.3f ", tvec[i]);
    printf("[B]\n");
  }


  for (pass=0; pass<npasses; pass++) {
    ASSERT(ComputeCut(graph, where) == graph->mincut);

    /* Check to see if things are out of balance, given the tolerance */
    if (MocIsHBalanced(ncon, nparts, npwgts, ubvec))
      break;

    PQueueReset(&queue);
    idxset(nvtxs, -1, moved);

    oldcut = graph->mincut;
    nbnd = graph->nbnd;

    RandomPermute(nbnd, perm, 1);
    for (ii=0; ii<nbnd; ii++) {
      i = bndind[perm[ii]];
      PQueueInsert(&queue, i, graph->rinfo[i].ed - graph->rinfo[i].id);
      moved[i] = 2;
    }

    nmoves = 0;
    for (;;) {
      if ((i = PQueueGetMax(&queue)) == -1) 
        break;
      moved[i] = 1;

      myrinfo = graph->rinfo+i;
      from = where[i];
      nvwgt = graph->nvwgt+i*ncon;

      if (AreAllHVwgtsBelow(ncon, 1.0, npwgts+from*ncon, -1.0, nvwgt, minwgt+from*ncon))
        continue;   /* This cannot be moved! */

      myedegrees = myrinfo->edegrees;
      myndegrees = myrinfo->ndegrees;

      for (k=0; k<myndegrees; k++) {
        to = myedegrees[k].pid;
        if (IsHBalanceBetterFT(ncon, nparts, npwgts+from*ncon, npwgts+to*ncon, nvwgt, ubvec))
          break;
      }
      if (k == myndegrees) 
        continue;  /* break out if you did not find a candidate */

      for (j=k+1; j<myndegrees; j++) {
        to = myedegrees[j].pid;
        if (IsHBalanceBetterTT(ncon, nparts, npwgts+myedegrees[k].pid*ncon, npwgts+to*ncon, nvwgt, ubvec)) 
          k = j;
      }

      to = myedegrees[k].pid;

      j = 0;
      if (!AreAllHVwgtsBelow(ncon, 1.0, npwgts+from*ncon, 0.0, nvwgt, maxwgt+from*ncon))
        j++;
      if (myedegrees[k].ed-myrinfo->id >= 0)
        j++;
      if (!AreAllHVwgtsAbove(ncon, 1.0, npwgts+to*ncon, 0.0, nvwgt, minwgt+to*ncon) &&
          AreAllHVwgtsBelow(ncon, 1.0, npwgts+to*ncon, 1.0, nvwgt, maxwgt+to*ncon))
        j++;
      if (j == 0)
        continue;

/* DELETE
      if (myedegrees[k].ed-myrinfo->id < 0 && 
          AreAllHVwgtsBelow(ncon, 1.0, npwgts+from*ncon, 0.0, nvwgt, maxwgt+from*ncon) &&
          AreAllHVwgtsAbove(ncon, 1.0, npwgts+to*ncon, 0.0, nvwgt, minwgt+to*ncon) &&
          AreAllHVwgtsBelow(ncon, 1.0, npwgts+to*ncon, 1.0, nvwgt, maxwgt+to*ncon))
        continue;
*/
      /*=====================================================================
      * If we got here, we can now move the vertex from 'from' to 'to' 
      *======================================================================*/
      graph->mincut -= myedegrees[k].ed-myrinfo->id;

      IFSET(ctrl->dbglvl, DBG_MOVEINFO, printf("\t\tMoving %6d to %3d. Gain: %4d. Cut: %6d\n", i, to, myedegrees[k].ed-myrinfo->id, graph->mincut));

      /* Update where, weight, and ID/ED information of the vertex you moved */
      saxpy(ncon, 1.0, nvwgt, 1, npwgts+to*ncon, 1);
      saxpy(ncon, -1.0, nvwgt, 1, npwgts+from*ncon, 1);
      where[i] = to;
      myrinfo->ed += myrinfo->id-myedegrees[k].ed;
      SWAP(myrinfo->id, myedegrees[k].ed, j);
      if (myedegrees[k].ed == 0) 
        myedegrees[k] = myedegrees[--myrinfo->ndegrees];
      else
        myedegrees[k].pid = from;

      if (myrinfo->ed == 0)
        BNDDelete(nbnd, bndind, bndptr, i);

      /* Update the degrees of adjacent vertices */
      for (j=xadj[i]; j<xadj[i+1]; j++) {
        ii = adjncy[j];
        me = where[ii];

        myrinfo = graph->rinfo+ii;
        if (myrinfo->edegrees == NULL) {
          myrinfo->edegrees = ctrl->wspace.edegrees+ctrl->wspace.cdegree;
          ctrl->wspace.cdegree += xadj[ii+1]-xadj[ii];
        }
        myedegrees = myrinfo->edegrees;

        ASSERT(CheckRInfo(myrinfo));

        oldgain = (myrinfo->ed-myrinfo->id);

        if (me == from) {
          INC_DEC(myrinfo->ed, myrinfo->id, adjwgt[j]);

          if (myrinfo->ed > 0 && bndptr[ii] == -1)
            BNDInsert(nbnd, bndind, bndptr, ii);
        }
        else if (me == to) {
          INC_DEC(myrinfo->id, myrinfo->ed, adjwgt[j]);

          if (myrinfo->ed == 0 && bndptr[ii] != -1)
            BNDDelete(nbnd, bndind, bndptr, ii);
        }

        /* Remove contribution from the .ed of 'from' */
        if (me != from) {
          for (k=0; k<myrinfo->ndegrees; k++) {
            if (myedegrees[k].pid == from) {
              if (myedegrees[k].ed == adjwgt[j])
                myedegrees[k] = myedegrees[--myrinfo->ndegrees];
              else
                myedegrees[k].ed -= adjwgt[j];
              break;
            }
          }
        }

        /* Add contribution to the .ed of 'to' */
        if (me != to) {
          for (k=0; k<myrinfo->ndegrees; k++) {
            if (myedegrees[k].pid == to) {
              myedegrees[k].ed += adjwgt[j];
              break;
            }
          }
          if (k == myrinfo->ndegrees) {
            myedegrees[myrinfo->ndegrees].pid = to;
            myedegrees[myrinfo->ndegrees++].ed = adjwgt[j];
          }
        }


        /* Update the queue */
        if (me == to || me == from) { 
          gain = myrinfo->ed-myrinfo->id;
          if (moved[ii] == 2) {
            if (myrinfo->ed > 0)
              PQueueUpdate(&queue, ii, oldgain, gain);
            else {
              PQueueDelete(&queue, ii, oldgain);
              moved[ii] = -1;
            }
          }
          else if (moved[ii] == -1 && myrinfo->ed > 0) {
            PQueueInsert(&queue, ii, gain);
            moved[ii] = 2;
          }
        } 

        ASSERT(myrinfo->ndegrees <= xadj[ii+1]-xadj[ii]);
        ASSERT(CheckRInfo(myrinfo));
      }
      nmoves++;
    }

    graph->nbnd = nbnd;

    if (ctrl->dbglvl&DBG_REFINE) {
      printf("\t [%5.4f %5.4f], Nb: %6d, Nmoves: %5d, Cut: %6d, LB: ",
              npwgts[samin(ncon*nparts, npwgts)], npwgts[samax(ncon*nparts, npwgts)], 
              nbnd, nmoves, graph->mincut);
      ComputeHKWayLoadImbalance(ncon, nparts, npwgts, tvec);
      for (i=0; i<ncon; i++)
        printf("%.3f ", tvec[i]);
      printf("\n");
    }

    if (nmoves == 0)
      break;
  }

  PQueueFree(ctrl, &queue);

  fwspacefree(ctrl, ncon*nparts);
  fwspacefree(ctrl, ncon*nparts);
  idxwspacefree(ctrl, nvtxs);
  idxwspacefree(ctrl, nvtxs);

}

示例#27

0

显示文件

文件： sge.c 项目： gwowen/seismicunix

void
sgefa (float **a, int n, int *ipvt, int *info)
/*****************************************************************************
Gaussian elimination to obtain the LU factorization of a matrix
******************************************************************************
Input:
a		matrix[n][n] to be factored (see notes below)
n		dimension of a

Output:
a		matrix[n][n] factored (see notes below)
ipvt		indices of pivot permutations (see notes below)
info		index of last zero pivot (or -1 if no zero pivots)
******************************************************************************
Notes:
This function was adapted from LINPACK FORTRAN.  Because two-dimensional 
arrays cannot be declared with variable dimensions in C, the matrix a
is actually a pointer to an array of pointers to floats, as declared
above and used below.

Elements of a are stored as follows:
a[0][0]    a[1][0]    a[2][0]   ... a[n-1][0]
a[0][1]    a[1][1]    a[2][1]   ... a[n-1][1]
a[0][2]    a[1][2]    a[2][2]   ... a[n-1][2]
.                                       .
.             .                         .
.                        .              .
.                                       .
a[0][n-1]  a[1][n-1]  a[2][n-1] ... a[n-1][n-1]

Both the factored matrix a and the pivot indices ipvt are required
to solve linear systems of equations via sgesl.
******************************************************************************
Author:  Dave Hale, Colorado School of Mines, 10/01/89
*****************************************************************************/
{
	int j,k,kp1,l,nm1;
	float t;

	*info = -1;
	nm1 = n-1;
	for (k=0; k<nm1; k++) {
		kp1 = k+1;

		/* find l = pivot index */
		l = k+isamax(n-k,&a[k][k],1);
		ipvt[k] = l;

		/* zero pivot implies this column already triangularized */
		if (a[k][l]==0.0) {
			*info = k;
			continue;
		}

		/* if necessary, interchange */
		if (l!=k) {
			t = a[k][l];
			a[k][l] = a[k][k];
			a[k][k] = t;
		}

		/* compute multipliers */
		t = -1.0/a[k][k];
		sscal(n-k-1,t,&a[k][k+1],1);

		/* row elimination with column indexing */
		for (j=kp1; j<n; j++) {
			t = a[j][l];
			if (l!=k) {
				a[j][l] = a[j][k];
				a[j][k] = t;
			}
			saxpy(n-k-1,t,&a[k][k+1],1,&a[j][k+1],1);
		}
	}
	ipvt[n-1] = n-1;
	if (a[n-1][n-1]==0.0) *info = n-1;
}

示例#28

0

显示文件

文件： ccgraph.c 项目： aceskpark/osfeo

/*************************************************************************
* This function creates the coarser graph
**************************************************************************/
void CreateCoarseGraph(CtrlType *ctrl, GraphType *graph, int cnvtxs, idxtype *match, idxtype *perm)
{
  int i, j, jj, k, kk, l, m, istart, iend, nvtxs, nedges, ncon, cnedges, v, u, mask, dovsize;
  idxtype *xadj, *vwgt, *vsize, *adjncy, *adjwgt, *adjwgtsum, *auxadj;
  idxtype *cmap, *htable;
  idxtype *cxadj, *cvwgt, *cvsize, *cadjncy, *cadjwgt, *cadjwgtsum;
  float *nvwgt, *cnvwgt;
  GraphType *cgraph;

  dovsize = (ctrl->optype == OP_KVMETIS ? 1 : 0);

  mask = HTLENGTH;
  if (cnvtxs < 8*mask || graph->nedges/graph->nvtxs > 15) { 
    CreateCoarseGraphNoMask(ctrl, graph, cnvtxs, match, perm);
    return;
  }

  IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->ContractTmr));

  nvtxs = graph->nvtxs;
  ncon = graph->ncon;
  xadj = graph->xadj;
  vwgt = graph->vwgt;
  vsize = graph->vsize;
  nvwgt = graph->nvwgt;
  adjncy = graph->adjncy;
  adjwgt = graph->adjwgt;
  adjwgtsum = graph->adjwgtsum;
  cmap = graph->cmap;

  /* Initialize the coarser graph */
  cgraph = SetUpCoarseGraph(graph, cnvtxs, dovsize);
  cxadj = cgraph->xadj;
  cvwgt = cgraph->vwgt;
  cvsize = cgraph->vsize;
  cnvwgt = cgraph->nvwgt;
  cadjwgtsum = cgraph->adjwgtsum;
  cadjncy = cgraph->adjncy;
  cadjwgt = cgraph->adjwgt;


  iend = xadj[nvtxs];
  auxadj = ctrl->wspace.auxcore; 
  memcpy(auxadj, adjncy, iend*sizeof(idxtype)); 
  for (i=0; i<iend; i++)
    auxadj[i] = cmap[auxadj[i]];

  htable = idxset(mask+1, -1, idxwspacemalloc(ctrl, mask+1)); 

  cxadj[0] = cnvtxs = cnedges = 0;
  for (i=0; i<nvtxs; i++) {
    v = perm[i];
    if (cmap[v] != cnvtxs) 
      continue;

    u = match[v];
    if (ncon == 1)
      cvwgt[cnvtxs] = vwgt[v];
    else
      scopy(ncon, nvwgt+v*ncon, cnvwgt+cnvtxs*ncon);

    if (dovsize)
      cvsize[cnvtxs] = vsize[v];

    cadjwgtsum[cnvtxs] = adjwgtsum[v];
    nedges = 0;

    istart = xadj[v];
    iend = xadj[v+1];
    for (j=istart; j<iend; j++) {
      k = auxadj[j];
      kk = k&mask;
      if ((m = htable[kk]) == -1) {
        cadjncy[nedges] = k;
        cadjwgt[nedges] = adjwgt[j];
        htable[kk] = nedges++;
      }
      else if (cadjncy[m] == k) {
        cadjwgt[m] += adjwgt[j];
      }
      else {
        for (jj=0; jj<nedges; jj++) {
          if (cadjncy[jj] == k) {
            cadjwgt[jj] += adjwgt[j];
            break;
          }
        }
        if (jj == nedges) {
          cadjncy[nedges] = k;
          cadjwgt[nedges++] = adjwgt[j];
        }
      }
    }

    if (v != u) { 
      if (ncon == 1)
        cvwgt[cnvtxs] += vwgt[u];
      else
        saxpy(ncon, 1.0, nvwgt+u*ncon, 1, cnvwgt+cnvtxs*ncon, 1);

      if (dovsize)
        cvsize[cnvtxs] += vsize[u];

      cadjwgtsum[cnvtxs] += adjwgtsum[u];

      istart = xadj[u];
      iend = xadj[u+1];
      for (j=istart; j<iend; j++) {
        k = auxadj[j];
        kk = k&mask;
        if ((m = htable[kk]) == -1) {
          cadjncy[nedges] = k;
          cadjwgt[nedges] = adjwgt[j];
          htable[kk] = nedges++;
        }
        else if (cadjncy[m] == k) {
          cadjwgt[m] += adjwgt[j];
        }
        else {
          for (jj=0; jj<nedges; jj++) {
            if (cadjncy[jj] == k) {
              cadjwgt[jj] += adjwgt[j];
              break;
            }
          }
          if (jj == nedges) {
            cadjncy[nedges] = k;
            cadjwgt[nedges++] = adjwgt[j];
          }
        }
      }

      /* Remove the contracted adjacency weight */
      jj = htable[cnvtxs&mask];
      if (jj >= 0 && cadjncy[jj] != cnvtxs) {
        for (jj=0; jj<nedges; jj++) {
          if (cadjncy[jj] == cnvtxs) 
            break;
        }
      }
      if (jj >= 0 && cadjncy[jj] == cnvtxs) { /* This 2nd check is needed for non-adjacent matchings */
        cadjwgtsum[cnvtxs] -= cadjwgt[jj];
        cadjncy[jj] = cadjncy[--nedges];
        cadjwgt[jj] = cadjwgt[nedges];
      }
    }

    ASSERTP(cadjwgtsum[cnvtxs] == idxsum(nedges, cadjwgt), ("%d %d %d %d %d\n", cnvtxs, cadjwgtsum[cnvtxs], idxsum(nedges, cadjwgt), adjwgtsum[u], adjwgtsum[v]));

    for (j=0; j<nedges; j++)
      htable[cadjncy[j]&mask] = -1;  /* Zero out the htable */
    htable[cnvtxs&mask] = -1;

    cnedges += nedges;
    cxadj[++cnvtxs] = cnedges;
    cadjncy += nedges;
    cadjwgt += nedges;
  }

  cgraph->nedges = cnedges;

  ReAdjustMemory(graph, cgraph, dovsize);

  IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->ContractTmr));

  idxwspacefree(ctrl, mask+1);

}

示例#29

0

显示文件

文件： context-3.c 项目： 0day-ci/gcc

int
main (int argc, char **argv)
{
    cublasStatus_t s;
    cublasHandle_t h;
    CUcontext pctx;
    CUresult r;
    int i;
    const int N = 256;
    float *h_X, *h_Y1, *h_Y2;
    float *d_X,*d_Y;
    float alpha = 2.0f;
    float error_norm;
    float ref_norm;

    /* Test 3 - OpenACC creates, cuBLAS shares.  */

    acc_set_device_num (0, acc_device_nvidia);

    r = cuCtxGetCurrent (&pctx);
    if (r != CUDA_SUCCESS)
    {
        fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
        exit (EXIT_FAILURE);
    }

    h_X = (float *) malloc (N * sizeof (float));
    if (h_X == 0)
    {
        fprintf (stderr, "malloc failed: for h_X\n");
        exit (EXIT_FAILURE);
    }

    h_Y1 = (float *) malloc (N * sizeof (float));
    if (h_Y1 == 0)
    {
        fprintf (stderr, "malloc failed: for h_Y1\n");
        exit (EXIT_FAILURE);
    }

    h_Y2 = (float *) malloc (N * sizeof (float));
    if (h_Y2 == 0)
    {
        fprintf (stderr, "malloc failed: for h_Y2\n");
        exit (EXIT_FAILURE);
    }

    for (i = 0; i < N; i++)
    {
        h_X[i] = rand () / (float) RAND_MAX;
        h_Y2[i] = h_Y1[i] = rand () / (float) RAND_MAX;
    }

    d_X = (float *) acc_copyin (&h_X[0], N * sizeof (float));
    if (d_X == NULL)
    {
        fprintf (stderr, "copyin error h_X\n");
        exit (EXIT_FAILURE);
    }

    d_Y = (float *) acc_copyin (&h_Y1[0], N * sizeof (float));
    if (d_Y == NULL)
    {
        fprintf (stderr, "copyin error h_Y1\n");
        exit (EXIT_FAILURE);
    }

    context_check (pctx);

    s = cublasCreate (&h);
    if (s != CUBLAS_STATUS_SUCCESS)
    {
        fprintf (stderr, "cublasCreate failed: %d\n", s);
        exit (EXIT_FAILURE);
    }

    context_check (pctx);

    s = cublasSaxpy (h, N, &alpha, d_X, 1, d_Y, 1);
    if (s != CUBLAS_STATUS_SUCCESS)
    {
        fprintf (stderr, "cublasSaxpy failed: %d\n", s);
        exit (EXIT_FAILURE);
    }

    context_check (pctx);

    acc_memcpy_from_device (&h_Y1[0], d_Y, N * sizeof (float));

    context_check (pctx);

    saxpy (N, alpha, h_X, h_Y2);

    error_norm = 0;
    ref_norm = 0;

    for (i = 0; i < N; ++i)
    {
        float diff;

        diff = h_Y1[i] - h_Y2[i];
        error_norm += diff * diff;
        ref_norm += h_Y2[i] * h_Y2[i];
    }

    error_norm = (float) sqrt ((double) error_norm);
    ref_norm = (float) sqrt ((double) ref_norm);

    if ((fabs (ref_norm) < 1e-7) || ((error_norm / ref_norm) >= 1e-6f))
    {
        fprintf (stderr, "math error\n");
        exit (EXIT_FAILURE);
    }

    free (h_X);
    free (h_Y1);
    free (h_Y2);

    acc_free (d_X);
    acc_free (d_Y);

    context_check (pctx);

    s = cublasDestroy (h);
    if (s != CUBLAS_STATUS_SUCCESS)
    {
        fprintf (stderr, "cublasDestroy failed: %d\n", s);
        exit (EXIT_FAILURE);
    }

    context_check (pctx);

    acc_shutdown (acc_device_nvidia);

    r = cuCtxGetCurrent (&pctx);
    if (r != CUDA_SUCCESS)
    {
        fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
        exit (EXIT_FAILURE);
    }

    if (pctx)
    {
        fprintf (stderr, "Unexpected context\n");
        exit (EXIT_FAILURE);
    }

    return EXIT_SUCCESS;
}