示例#1
0
void RpalParser::T()
{
  pushProc("T()");
  Ta();
  int n = 1;
  while (_nt == ",")
  {
    read_token(",");
    Ta();
    n++;
  }
  if (n > 1)
  {
    build("tau", n);
  }
  popProc("T()");
}
static void* _OnePass_HFromH( void* ithr )
{
	CThrdat	&me = vthr[(int)(long)ithr];

	int	i1[5] = { 0, 1, 2, 6, 7 },
		i2[5] = { 3, 4, 5, 6, 7 };

	for( int i = me.r0; i < me.rlim; ++i ) {

		const RGN&	R = vRgn[i];

		if( R.itr < 0 )
			continue;

		int	nc = myc[i].size();

		if( nc < 4 )
			continue;

		double	*RHS = &(*Xout)[R.itr * 8];
		double	LHS[8*8];
		THmgphy	Ta( &(*Xin)[R.itr * 8] );
		THmgphy	Tb;
		int		lastb	= -1;	// cache Tb

		memset( RHS, 0, 8   * sizeof(double) );
		memset( LHS, 0, 8*8 * sizeof(double) );

		for( int j = 0; j < nc; ++j ) {

			const Constraint&	C = vAllC[myc[i][j]];
			Point				A, B;

			// Mixing old and new solutions is related to
			// "successive over relaxation" methods in other
			// iterative solution schemes. Experimentally,
			// I like w = 0.9 (same layer), 0.9 (down).

			if( C.r1 == i ) {

				int	bitr = vRgn[C.r2].itr;

				if( bitr < 0 )
					continue;

				if( C.r2 != lastb ) {
					Tb.CopyIn( &(*Xin)[bitr * 8] );
					lastb = C.r2;
				}
				Tb.Transform( B = C.p2 );
				Ta.Transform( A = C.p1 );
				B.x = w * B.x + (1 - w) * A.x;
				B.y = w * B.y + (1 - w) * A.y;
				A = C.p1;
			}
			else {

				int	bitr = vRgn[C.r1].itr;

				if( bitr < 0 )
					continue;

				if( C.r1 != lastb ) {
					Tb.CopyIn( &(*Xin)[bitr * 8] );
					lastb = C.r1;
				}
				Tb.Transform( B = C.p1 );
				Ta.Transform( A = C.p2 );
				B.x = w * B.x + (1 - w) * A.x;
				B.y = w * B.y + (1 - w) * A.y;
				A = C.p2;
			}

			double	v[5] = { A.x, A.y, 1.0, -A.x*B.x, -A.y*B.x };

			AddConstraint_Quick( LHS, RHS, 8, 5, i1, v, B.x );

			v[3] = -A.x*B.y;
			v[4] = -A.y*B.y;

			AddConstraint_Quick( LHS, RHS, 8, 5, i2, v, B.y );
		}

		if( gpass < EDITDELAY ) {
			Solve_Quick( LHS, RHS, 8 );
			continue;
		}

		if( !Solve_Quick( LHS, RHS, 8 ) ||
			THmgphy( RHS ).Squareness() > SQRTOL ) {

			HFromH_SLOnly( RHS, i, (int)(long)ithr );
		}
	}

	return NULL;
}
示例#3
0
TA::Tensor<float> SchwarzScreen::norm_estimate(
    madness::World &world, std::vector<gaussian::Basis> const &bs_array,
    TA::Pmap const &pmap, bool replicate) const {
  const auto ndims = bs_array.size();
  auto trange = gaussian::detail::create_trange(bs_array);
  auto norms = TA::Tensor<float>(trange.tiles_range(), 0.0);

  if (ndims == 3) {
    auto const &Ta = Qbra_->Qtile();
    auto const &Tbc = Qket_->Qtile();
    auto ord = 0ul;
    for (auto a = 0l; a < Ta.size(); ++a) {
      const float a_val = Ta(a);
      for (auto b = 0l; b < Tbc.rows(); ++b) {
        for (auto c = 0l; c < Tbc.cols(); ++c, ++ord) {
          if (pmap.is_local(ord)){
            norms[ord] = std::sqrt(a_val * Tbc(b, c));
          }
        }
      }
    }
  } else if (ndims == 4) {
    auto const &Tab = Qbra_->Qtile();
    auto const &Tcd = Qket_->Qtile();
    auto ord = 0ul;
    for (auto a = 0l; a < Tab.rows(); ++a) {
      for (auto b = 0l; b < Tab.cols(); ++b) {
        const float ab = Tab(a, b);
        for (auto c = 0l; c < Tcd.rows(); ++c) {
          for (auto d = 0l; d < Tcd.cols(); ++d, ++ord) {
            if (pmap.is_local(ord)) {
              norms[ord] = std::sqrt(ab * Tcd(c, d));
            }
          }
        }
      }
    }
  } else {
    norms = Screener::norm_estimate(world, bs_array, pmap);
  }
  world.gop.fence();

  // If we want to replicate and the size of the tensor is larger than max_int
  // then we will have to do it using multiple sums.  This is necessary because
  // MPI_ISend can only send an integer (int) number of things in a single
  // message.
  if (replicate) {  // construct the sum
    // First get the size in a 64 bit int, if that overflows then it probably
    // wasn't going to fit on 1 node anyways (2017, maybe one day I'll be wrong)
    int64_t size = norms.size();

    const int64_t int_max = std::numeric_limits<int>::max();
    if (size < int_max) {  // If size fits into an int then life is easy
      world.gop.sum(norms.data(), size);
    } else {
      // Blah, testing on NewRiver gave failures when trying to write in chunks
      // of both int_max and int_max/2.  For now I'll be conservative and just
      // write in small chunks.  Writing in chunks of int_max/10 is slow, but
      // worked on NR.
      const int64_t write_size = int_max / 10;
      auto i = 0;
      while (size > write_size) {
        const auto next_ptr = norms.data() + i * write_size;
        world.gop.sum(next_ptr, write_size);
        size -= write_size;
        ++i;
      }

      // get the remaining elements
      world.gop.sum(norms.data() + i * write_size, size);
    }
  }
  world.gop.fence();

  return norms;
}
static void HFromH_SLOnly( double *RHS, int i, int ithr )
{
	const RGN&	R = vRgn[i];

	int	i1[5] = { 0, 1, 2, 6, 7 },
		i2[5] = { 3, 4, 5, 6, 7 };

	int	nc = myc[i].size();

	double	LHS[8*8];
	THmgphy	Ta( &(*Xin)[R.itr * 8] );
	THmgphy	Tb;
	int		lastb	= -1,	// cache Tb
			nSLc	= 0;

	memset( RHS, 0, 8   * sizeof(double) );
	memset( LHS, 0, 8*8 * sizeof(double) );

	for( int j = 0; j < nc; ++j ) {

		const Constraint&	C = vAllC[myc[i][j]];
		Point				A, B;

		// Mixing old and new solutions is related to
		// "successive over relaxation" methods in other
		// iterative solution schemes. Experimentally,
		// I like w = 0.9 (same layer), 0.9 (down).

		if( C.r1 == i ) {

			if( vRgn[C.r2].z != R.z )
				continue;

			int	bitr = vRgn[C.r2].itr;

			if( bitr < 0 )
				continue;

			if( C.r2 != lastb ) {
				Tb.CopyIn( &(*Xin)[bitr * 8] );
				lastb = C.r2;
			}
			Tb.Transform( B = C.p2 );
			Ta.Transform( A = C.p1 );
			B.x = w * B.x + (1 - w) * A.x;
			B.y = w * B.y + (1 - w) * A.y;
			A = C.p1;
		}
		else {

			if( vRgn[C.r1].z != R.z )
				continue;

			int	bitr = vRgn[C.r1].itr;

			if( bitr < 0 )
				continue;

			if( C.r1 != lastb ) {
				Tb.CopyIn( &(*Xin)[bitr * 8] );
				lastb = C.r1;
			}
			Tb.Transform( B = C.p1 );
			Ta.Transform( A = C.p2 );
			B.x = w * B.x + (1 - w) * A.x;
			B.y = w * B.y + (1 - w) * A.y;
			A = C.p2;
		}

		++nSLc;

		double	v[5] = { A.x, A.y, 1.0, -A.x*B.x, -A.y*B.x };

		AddConstraint_Quick( LHS, RHS, 8, 5, i1, v, B.x );

		v[3] = -A.x*B.y;
		v[4] = -A.y*B.y;

		AddConstraint_Quick( LHS, RHS, 8, 5, i2, v, B.y );
	}

	if( nSLc < 4 ||
		!Solve_Quick( LHS, RHS, 8 ) ||
		THmgphy( RHS ).Squareness() > SQRTOL ) {

		vthr[ithr].Rkil.push_back( i );
	}
	else
		vthr[ithr].Rslo.push_back( i );
}