Ejemplo n.º 1
0
// Collect message sizes and times, separating same-rank, same-node, and internode messages
static Hashtable<string,Array<const Vector<float,2>>>
message_statistics(const vector<vector<Array<const history_t>>>& event_sorted_history,
                   const int ranks_per_node, const int threads_per_rank,
                   const time_kind_t source_kind, const int steps, RawArray<const double> slice_compression_ratio) {
  GEODE_ASSERT(ranks_per_node>=1);
  GEODE_ASSERT(threads_per_rank>1);
  GEODE_ASSERT(vec(request_send_kind,response_send_kind,output_send_kind).contains(source_kind));
  const int ranks = CHECK_CAST_INT(event_sorted_history.size())/threads_per_rank;
  GEODE_ASSERT((int)event_sorted_history.size()==ranks*threads_per_rank);
  GEODE_ASSERT(slice_compression_ratio.size()==37);
  GEODE_ASSERT(steps==1 || steps==2);

  // Separate same-rank, same-node, and internode
  Vector<Array<Vector<float,2>>,3> data;

  // Traverse each message and place it in the appropriate bin
  for (const int source_rank : range(ranks)) {
    const int source_thread = source_rank*threads_per_rank;
    for (const history_t& source : event_sorted_history[source_thread][source_kind]) {
      auto deps = event_dependencies(event_sorted_history,1,source_thread,source_kind,source);
      GEODE_ASSERT(deps.size()==1);
      if (steps==2) {
        GEODE_ASSERT(source_kind == request_send_kind);
        deps = event_dependencies(event_sorted_history,1,deps[0].x,deps[0].y,deps[0].z);
        GEODE_ASSERT(deps.size()==1);
      }
      const int target_thread = deps[0].x;
      const int target_rank = target_thread/threads_per_rank;
      GEODE_ASSERT(target_thread==target_rank*threads_per_rank);
      const history_t& target = deps[0].z;

      // Clamp message time to be nonnegative
      const double time = max(0,target.start.seconds()-source.start.seconds());

      // Estimate message size
      double size;
      if (source_kind == request_send_kind)
        size = 8;
      else {
        const section_t section = parse_section(source.event);
        const Vector<uint8_t,4> block = parse_block(source.event);
        double compression_ratio = 1;
        if (source_kind == response_send_kind) {
          compression_ratio = slice_compression_ratio[section.sum()];
          GEODE_ASSERT(0<compression_ratio && compression_ratio<1);
        }
        size = sizeof(Vector<super_t,2>)*block_shape(section.shape(),block).product()*compression_ratio;
      }

      // Add entry
      const int type = source_rank==target_rank                               ? 0
                     : source_rank/ranks_per_node==target_rank/ranks_per_node ? 1
                                                                              : 2;
      data[type].append(Vector<float,2>(size,time));
    }
  }

  // Make a nice hashtable for Python
  Hashtable<string,Array<const Vector<float,2>>> table;
  table["same-rank"] = data[0];
  table["same-node"] = data[1];
  table["different"] = data[2];
  return table;
}
Ejemplo n.º 2
0
// Compute rank-to-rank bandwidth estimates localized in time (dimensions: epoch,src,dst)
static Array<double,3> estimate_bandwidth(const vector<vector<Array<const history_t>>>& event_sorted_history,
                                          const int threads, const double dt_seconds) {
  Log::Scope scope("estimate bandwidth");
  GEODE_ASSERT(threads>1);
  const int ranks = CHECK_CAST_INT(event_sorted_history.size())/threads;
  GEODE_ASSERT((int)event_sorted_history.size()==ranks*threads);
  const double dt = 1e6*dt_seconds;
  // Count how many epochs we need
  int64_t elapsed = 0;
  for (auto& thread : event_sorted_history)
    for (auto& events : thread)
      if (events.size())
        elapsed = max(elapsed,events.back().end.us);
  const int epochs = int(ceil(elapsed/dt)); // Last epoch is incomplete
  // Statics: responses, outputs, total
  Vector<uint64_t,3> messages;
  Vector<double,3> total_data, total_time, max_time;
  int64_t max_time_travel = 0;
  const double compression_ratio = .35;
  // Traverse each large message, accumulating total data sent
  Array<double,3> bandwidths(epochs,ranks,ranks);
  for (const int target_rank : range(ranks))
    for (const int kind : vec(response_recv_kind,output_recv_kind))
      for (const history_t& target : event_sorted_history[threads*target_rank][kind]) {
        const auto deps = event_dependencies(event_sorted_history,-1,threads*target_rank,kind,target);
        GEODE_ASSERT(deps.size()==1);
        const int source_thread = deps[0].x;
        const int source_rank = source_thread/threads;
        GEODE_ASSERT(source_thread==source_rank*threads);
        const history_t& source = deps[0].z;
        const bool which = kind==output_recv_kind;
        messages[which]++;
        // Estimate message size
        const section_t section = parse_section(source.event);
        const Vector<uint8_t,4> block = parse_block(source.event);
        const double data_size = sizeof(Vector<super_t,2>)*block_shape(section.shape(),block).product()*(kind==response_recv_kind?compression_ratio:1);
        total_data[which] += data_size;
        // Distribute data amongst all overlapped epochs
        const int64_t time_travel = source.start.us - target.end.us;
        max_time_travel = max(max_time_travel,time_travel);
        Box<double> box(source.start.us/dt,target.end.us/dt);
        if (box.size()<=1e-7)
          box = Box<double>(box.center()).thickened(.5e-7);
        total_time[which] += box.size();
        max_time[which] = max(max_time[which],box.size());
        const double rate = data_size/box.size();
        for (const int epoch : range(max(0,int(box.min)),min(epochs,int(box.max)+1)))
          bandwidths(epoch,source_rank,target_rank) += rate*Box<double>::intersect(box,Box<double>(epoch,epoch+1)).size();
      }

  // Rescale
  bandwidths /= dt_seconds;

  // Print statistics
  cout << "dt = "<<dt_seconds<<" s"<<endl;
  cout << "elapsed = "<<1e-6*elapsed<<" s"<<endl;
  cout << "ranks = "<<ranks<<endl;
  messages[2] = messages.sum();
  total_data[2] = total_data.sum();
  total_time[2] = total_time.sum();
  max_time[2] = max_time.max();
  for (int i=0;i<3;i++) {
    cout << (i==0?"responses:":i==1?"outputs:":"total:") << endl;
    cout << "  messages = "<<messages[i]<<endl;
    cout << "  total data = "<<total_data[i]<<endl;
    cout << "  total time = "<<dt_seconds*total_time[i]<<endl;
    cout << "  average time = "<<dt_seconds*total_time[i]/messages[i]<<endl;
    cout << "  max time = "<<dt_seconds*max_time[i]<<endl;
    cout << "  average bandwidth = "<<total_data[i]/(1e-6*elapsed)<<endl;
    cout << "  average bandwidth / ranks = "<<total_data[i]/(1e-6*elapsed*ranks)<<endl;
  }
  cout << "max time travel = "<<1e-6*max_time_travel<<endl;
  cout << "bandwidth array stats:"<<endl;
  const double sum = bandwidths.sum();
  cout << "  sum = "<<sum<<endl;
  cout << "  average rank bandwidth = "<<sum/epochs/ranks<<endl;
  cout << "  average rank-to-rank bandwidth = "<<sum/epochs/sqr(ranks)<<endl;

  // All done
  return bandwidths;
}
Ejemplo n.º 3
0
string GEODE_UNUSED str(uint128_t n) {
  uint64_t lo(n);
  GEODE_ASSERT(lo==n);
  return format("%lld",lo);
}
Ejemplo n.º 4
0
static Array<Tuple<time_kind_t,event_t>> dependencies(const int direction, const time_kind_t kind, const event_t event) {
  GEODE_ASSERT(abs(direction)==1);
  static_assert(compress_kind==0,"Verify that -kind != kind for kinds we care about");

  // Parse event
  const section_t section = parse_section(event);
  const auto block = parse_block(event);
  const uint8_t dimensions = parse_dimensions(event),
                parent_to_child_symmetry = dimensions>>2,
                dimension = dimensions&3;
  const auto ekind = event&ekind_mask;

  // See mpi/graph for summarized explanation
  Array<Tuple<time_kind_t,event_t>> deps;
  switch (direction*kind) {
    case -allocate_line_kind: {
      GEODE_ASSERT(ekind==line_ekind);
      break; }
    case  response_recv_kind:
    case -request_send_kind: {
      GEODE_ASSERT(ekind==block_lines_ekind);
      const auto other_kind = kind==response_recv_kind ? schedule_kind : allocate_line_kind;
      const auto parent_section = section.parent(dimension).transform(symmetry_t::invert_global(parent_to_child_symmetry));
      const auto permutation = section_t::quadrant_permutation(parent_to_child_symmetry);
      const uint8_t parent_dimension = permutation.find(dimension);
      const auto block_base = Vector<uint8_t,4>(block.subset(permutation)).remove_index(parent_dimension);
      deps.append(tuple(other_kind,line_event(parent_section,parent_dimension,block_base)));
      break; }
    case  request_send_kind: {
      GEODE_ASSERT(ekind==block_lines_ekind);
      deps.append(tuple(response_send_kind,event));
      break; }
    case -response_send_kind:
    case  response_send_kind: {
      GEODE_ASSERT(ekind==block_lines_ekind);
      deps.append(tuple(direction<0?request_send_kind:response_recv_kind,event));
      break; }
    case -response_recv_kind: {
      GEODE_ASSERT(ekind==block_lines_ekind);
      deps.append(tuple(response_send_kind,event));
      break; }
    case  allocate_line_kind:
    case -schedule_kind: {
      GEODE_ASSERT(ekind==line_ekind);
      if (section.sum()!=35) {
        const auto other_kind = kind==allocate_line_kind ? request_send_kind : response_recv_kind;
        const auto child_section = section.child(dimension).standardize<8>();
        const auto permutation = section_t::quadrant_permutation(symmetry_t::invert_global(child_section.y));
        const uint8_t child_dimension = permutation.find(dimension);
        const dimensions_t dimensions(child_section.y,child_dimension);
        auto child_block = Vector<uint8_t,4>(block.slice<0,3>().insert(0,dimension).subset(permutation));
        for (const uint8_t b : range(section_blocks(child_section.x)[child_dimension])) {
          child_block[child_dimension] = b;
          deps.append(tuple(other_kind,block_lines_event(child_section.x,dimensions,child_block)));
        }
      }
      break; }
    case  schedule_kind: {
      GEODE_ASSERT(ekind==line_ekind);
      deps.append(tuple(compute_kind,event)); // Corresponds to many different microline compute events
      break; }
    case -compute_kind: // Note: all microline compute events have the same line event
    case  compute_kind: {
      GEODE_ASSERT(ekind==line_ekind);
      deps.append(tuple(direction<0?schedule_kind:wakeup_kind,event));
      break; }
    case -wakeup_kind: {
      GEODE_ASSERT(ekind==line_ekind);
      deps.append(tuple(compute_kind,event)); // Corresponds to many different microline compute events
      break; }
    case  wakeup_kind: {
      GEODE_ASSERT(ekind==line_ekind);
      const auto block_base = block.slice<0,3>();
      for (const uint8_t b : range(section_blocks(section)[dimension]))
        deps.append(tuple(output_send_kind,block_line_event(section,dimension,block_base.insert(b,dimension))));
      break; }
    case -output_send_kind:
    case  output_send_kind: {
      GEODE_ASSERT(ekind==block_line_ekind);
      if (direction<0)
        deps.append(tuple(wakeup_kind,line_event(section,dimension,block.remove_index(dimension))));
      else
        deps.append(tuple(output_recv_kind,event));
      break; }
    case -output_recv_kind:
    case  output_recv_kind: {
      GEODE_ASSERT(ekind==block_line_ekind);
      deps.append(tuple(direction<0?output_send_kind:snappy_kind,event));
      break; }
    case -snappy_kind:
    case  snappy_kind: {
      GEODE_ASSERT(ekind==block_line_ekind);
      if (direction<0)
        deps.append(tuple(output_recv_kind,event));
      break; }
    default:
      break;
  }
  return deps;
}
Ejemplo n.º 5
0
template<class TV> struct NumpyArrayType<Rotation<TV>>{static PyTypeObject* t;static PyTypeObject* type(){GEODE_ASSERT(t);Py_INCREF(t);return t;}};
Ejemplo n.º 6
0
RawArray<const Vector<super_t,2>> readable_block_store_t::get_raw_flat(local_id_t local_id) const {
  const auto& info = block_info.get(local_id);
  GEODE_ASSERT(!info.missing_dimensions);
  return all_data.slice(info.nodes);
}
Ejemplo n.º 7
0
compact_blob_t supertensor_index_t::block_location(RawArray<const uint8_t> blob) {
  compact_blob_t b;
  GEODE_ASSERT(blob.size()==sizeof(b),format("expected size %d, got size %d, data %s",sizeof(b),blob.size(),str(blob)));
  memcpy(&b,blob.data(),sizeof(b));
  return b;
}
Ejemplo n.º 8
0
 T operator()(NdArray<const T> x) const {
   GEODE_ASSERT(x.shape==xshape);
   return (*this)(x.flat.reshape(n+3,d));
 }
Ejemplo n.º 9
0
void SurfacePins::add_damping_gradient(SolidMatrix<TV>& matrix) const {
  GEODE_ASSERT(matrix.size()==mass.size());
  GEODE_NOT_IMPLEMENTED();
}
Ejemplo n.º 10
0
supertensor_index_t::supertensor_index_t(const sections_t& sections)
  : sections(ref(sections))
  , section_offset(make_offsets(sections)) {
  // Make sure we have a complete slice
  GEODE_ASSERT(descendent_sections(section_t(),sections.slice).at(sections.slice)->sections==sections.sections);
}
Ejemplo n.º 11
0
// Safely expose snap_divs to python for testing purposes
static Array<Quantized> snap_divs_test(RawArray<mp_limb_t,2> values, const bool take_sqrt) {
  GEODE_ASSERT(values.m && !values.back().contains_only(0));
  Array<Quantized> result(values.m-1);
  snap_divs(result,values,take_sqrt);
  return result;
}
Ejemplo n.º 12
0
template<class PerturbedT> bool perturbed_ratio(RawArray<Quantized> result, void(*const ratio)(RawArray<mp_limb_t,2>,RawArray<const Vector<Exact<1>,PerturbedT::m>>), const int degree, RawArray<const PerturbedT> X, const bool take_sqrt) {
  const int m = PerturbedT::m;
  typedef Vector<Exact<1>,m> EV;
  const int n = X.size();
  const int r = result.size();

  if (verbose)
    cout << "perturbed_ratio:\n  degree = "<<degree<<"\n  X = "<<X<<endl;

  // Check if the ratio is nonsingular before perturbation
  const auto Z = GEODE_RAW_ALLOCA(n,EV);
  const int precision = degree*Exact<1>::ratio;
  {
    for (int i=0;i<n;i++)
      Z[i] = EV(to_exact(X[i].value()));
    const auto R = GEODE_RAW_ALLOCA((r+1)*precision,mp_limb_t).reshape(r+1,precision);
    ratio(R,Z);
    if (const int sign = mpz_sign(R[r])) {
      snap_divs(result,R,take_sqrt);
      return sign>0;
    }
  }

  // Check the first perturbation level with specialized code
  vector<Vector<ExactInt,m>> Y(n); // perturbations
  {
    // Compute the first level of perturbations
    for (int i=0;i<n;i++)
      Y[i] = perturbation<m>(1,X[i].seed());
    if (verbose)
      cout << "  Y = "<<Y<<endl;

    // Evaluate polynomial at epsilon = 1, ..., degree
    const int scaled_precision = precision+factorial_limbs(degree);
    const auto values = GEODE_RAW_ALLOCA(degree*(r+1)*scaled_precision,mp_limb_t).reshape(degree,r+1,scaled_precision);
    for (int j=0;j<degree;j++) {
      for (int i=0;i<n;i++)
        Z[i] = EV(to_exact(X[i].value())+(j+1)*Y[i]);
      ratio(values[j],Z);
      if (verbose)
        cout << "  ratio("<<Z<<") = "<<mpz_str(values[j])<<endl;
    }

    // Find interpolating polynomials, overriding the input with the result.
    for (int k=0;k<=r;k++) {
      scaled_univariate_in_place_interpolating_polynomial(values.sub<1>(k));
      if (verbose)
        cout << "  coefs "<<k<<" = "<<mpz_str(values.sub<1>(k))<<endl;
    }

    // Find the largest (lowest degree) nonzero denominator coefficient.  If we detect an infinity during this process, explode.
    for (int j=0;j<degree;j++) {
      if (const int sign = mpz_sign(values(j,r))) { // We found a nonzero, now compute the rounded ratio
        snap_divs(result,values[j],take_sqrt);
        return sign>0;
      } else
        for (int k=0;k<r;k++)
          if (mpz_nonzero(values(j,k)))
            throw OverflowError(format("perturbed_ratio: infinite result in l'Hopital expansion: %s/0",mpz_str(values(j,k))));
    }
  }

  {
    // Add one perturbation level after another until we hit a nonzero denominator.  Our current implementation duplicates
    // work from one iteration to the next for simplicity, which is fine since the first interation suffices almost always.
    for (int d=2;;d++) {
      // Compute the next level of perturbations
      Y.resize(d*n);
      for (int i=0;i<n;i++)
        Y[(d-1)*n+i] = perturbation<m>(d,X[i].seed());

      // Evaluate polynomial at every point in an "easy corner"
      const auto lambda = monomials(degree,d);
      const Array<mp_limb_t,3> values(lambda.m,r+1,precision,uninit);
      for (int j=0;j<lambda.m;j++) {
        for (int i=0;i<n;i++)
          Z[i] = EV(to_exact(X[i].value())+lambda(j,0)*Y[i]);
        for (int v=1;v<d;v++)
          for (int i=0;i<n;i++)
            Z[i] += EV(lambda(j,v)*Y[v*n+i]);
        ratio(values[j],Z);
      }

      // Find interpolating polynomials, overriding the input with the result.
      for (int k=0;k<=r;k++)
        in_place_interpolating_polynomial(degree,lambda,values.sub<1>(k));

      // Find the largest nonzero denominator coefficient
      int sign = 0;
      int nonzero = -1;
      for (int j=0;j<lambda.m;j++)
        if (const int s = mpz_sign(values(j,r))) {
          if (check) // Verify that a term which used to be zero doesn't become nonzero
            GEODE_ASSERT(lambda(j,d-1));
          if (nonzero<0 || monomial_less(lambda[nonzero],lambda[j])) {
            sign = s;
            nonzero = j;
          }
        }

      // Verify that numerator coefficients are zero for all large monomials
      for (int j=0;j<lambda.m;j++)
        if (nonzero<0 || monomial_less(lambda[nonzero],lambda[j]))
          for (int k=0;k<r;k++)
            if (mpz_nonzero(values(j,k)))
              throw OverflowError(format("perturbed_ratio: infinite result in l'Hopital expansion: %s/0",str(values(j,k))));

      // If we found a nonzero, compute the result
      if (nonzero >= 0) {
        snap_divs(result,values[nonzero],take_sqrt);
        return sign>0;
      }

      // If we get through two levels without fixing the degeneracy, run a fast, strict identity test to make sure we weren't handed an impossible problem.
      if (d==2)
        assert_last_nonzero(ratio,values[0],X,"perturbed_ratio: identically zero denominator");
    }
  }
}
Ejemplo n.º 13
0
template<class PerturbedT> bool perturbed_sign(void(*const predicate)(RawArray<mp_limb_t>,RawArray<const Vector<Exact<1>,PerturbedT::m>>),
                                                      const int degree, RawArray<const PerturbedT> X) {
  const int m = PerturbedT::m;
  typedef Vector<Exact<1>,m> EV;
  if (check)
    GEODE_WARNING("Expensive consistency checking enabled");

  const int n = X.size();
  if (verbose)
    cout << "perturbed_sign:\n  degree = "<<degree<<"\n  X = "<<X<<endl;

  // Check if the predicate is nonsingular without perturbation
  const auto Z = GEODE_RAW_ALLOCA(n,EV);
  const int precision = degree*Exact<1>::ratio;
  {
    for (int i=0;i<n;i++)
      Z[i] = EV(to_exact(X[i].value()));
    const auto R = GEODE_RAW_ALLOCA(precision,mp_limb_t);
    predicate(R,Z);
    if (const int sign = mpz_sign(R))
      return sign>0;
  }

  // Check the first perturbation level with specialized code
  vector<Vector<ExactInt,m>> Y(n); // perturbations
  {
    // Compute the first level of perturbations
    for (int i=0;i<n;i++)
      Y[i] = perturbation<m>(1,X[i].seed());
    if (verbose)
      cout << "  Y = "<<Y<<endl;

    // Evaluate polynomial at epsilon = 1, ..., degree
    const int scaled_precision = precision+factorial_limbs(degree);
    const auto values = GEODE_RAW_ALLOCA(degree*scaled_precision,mp_limb_t).reshape(degree,scaled_precision);
    memset(values.data(),0,sizeof(mp_limb_t)*values.flat.size());
    for (int j=0;j<degree;j++) {
      for (int i=0;i<n;i++)
        Z[i] = EV(to_exact(X[i].value())+(j+1)*Y[i]);
      predicate(values[j],Z);
      if (verbose)
        cout << "  predicate("<<Z<<") = "<<mpz_str(values[j])<<endl;
    }

    // Find an interpolating polynomial, overriding the input with the result.
    scaled_univariate_in_place_interpolating_polynomial(values);
    if (verbose)
      cout << "  coefs = "<<mpz_str(values)<<endl;

    // Compute sign
    for (int j=0;j<degree;j++)
      if (const int sign = mpz_sign(values[j]))
        return sign>0;
  }

  {
    // Add one perturbation level after another until we hit a nonzero polynomial.  Our current implementation duplicates
    // work from one iteration to the next for simplicity, which is fine since the first interation suffices almost always.
    for (int d=2;;d++) {
      if (verbose)
        cout << "  level "<<d<<endl;
      // Compute the next level of perturbations
      Y.resize(d*n);
      for (int i=0;i<n;i++)
        Y[(d-1)*n+i] = perturbation<m>(d,X[i].seed());

      // Evaluate polynomial at every point in an "easy corner"
      const auto lambda = monomials(degree,d);
      const Array<mp_limb_t,2> values(lambda.m,precision,uninit);
      for (int j=0;j<lambda.m;j++) {
        for (int i=0;i<n;i++)
          Z[i] = EV(to_exact(X[i].value())+lambda(j,0)*Y[i]);
        for (int v=1;v<d;v++)
          for (int i=0;i<n;i++)
            Z[i] += EV(lambda(j,v)*Y[v*n+i]);
        predicate(values[j],Z);
      }

      // Find an interpolating polynomial, overriding the input with the result.
      in_place_interpolating_polynomial(degree,lambda,values);

      // Compute sign
      int sign = 0;
      int sign_j = -1;
      for (int j=0;j<lambda.m;j++)
        if (const int s = mpz_sign(values[j])) {
          if (check) // Verify that a term which used to be zero doesn't become nonzero
            GEODE_ASSERT(lambda(j,d-1));
          if (!sign || monomial_less(lambda[sign_j],lambda[j])) {
            sign = s;
            sign_j = j;
          }
        }

      // If we find a nonzero sign, we're done!
      if (sign)
        return sign>0;

      // If we get through two levels without fixing the degeneracy, run a fast, strict identity test to make sure we weren't handed an impossible problem.
      if (d==2)
        assert_last_nonzero(predicate,values[0],X,"perturbed_sign: identically zero predicate");
    }
  }
}
Ejemplo n.º 14
0
  void gradient(RawArray<const T,2> x, RawArray<T,2> grad) const {
    // Temporary arrays and views
    GEODE_ASSERT(x.sizes()==vec(n+3,d) && grad.sizes()==x.sizes());
    const auto sx = smallx.flat.raw(),
               sv = smallv.flat.raw();

    // Collect quadrature points
    const int e = 4*d;
    Array<T,3> tq(    n,quads,e,uninit);
    Array<T,4> xq(vec(n,quads,e,d),uninit);
    Array<T,4> vq(vec(n,quads,e,d),uninit);
    for (int i=0;i<n;i++) {
      T_INFO(i)
      for (int q=0;q<quads;q++) {
        const T s = samples[q],
                t = t1+dt*s;
        for (int j=0;j<e;j++)
          tq(i,q,j) = t;
        SPLINE_INFO(s)
        for (int a=0;a<d;a++) {
          X_INFO(i,a)
          const T x = a0*x0+a1*x1+a2*x2+a3*x3,
                  v = b0*x0+b1*x1+b2*x2+b3*x3;
          for (int j=0;j<e;j++) {
            xq(i,q,j,a) = x;
            vq(i,q,j,a) = v;
          }
        }
        for (int a=0;a<d;a++) {
          xq(i,q,4*a  ,a) -= sx[a];
          xq(i,q,4*a+1,a) += sx[a];
          vq(i,q,4*a+2,a) -= sv[a];
          vq(i,q,4*a+3,a) += sv[a];
        }
      }
    }

    // Compute energies
    const auto Uq_ = U(tq.reshape_own(n*quads*e),NdArray<const T>(q2shape,xq.flat),NdArray<const T>(q2shape,vq.flat));
    GEODE_ASSERT(Uq_.size()==n*quads*e);
    const auto Uq = Uq_.reshape(n,quads,e);

    // Accumulate
    grad.fill(0);
    const auto inv_2s = GEODE_RAW_ALLOCA(d,Vector<T,2>);
    for (int a=0;a<d;a++)
      inv_2s[a] = vec(.5/sx[a],.5/sv[a]);
    for (int i=0;i<n;i++) {
      T_INFO(i)
      for (int q=0;q<quads;q++) {
        const T s = samples[q],
                w = dt*weights[q];
        SPLINE_INFO(s)
        for (int a=0;a<d;a++) {
          const T wx = w*inv_2s[a].x*(Uq(i,q,4*a+1)-Uq(i,q,4*a  )),
                  wv = w*inv_2s[a].y*(Uq(i,q,4*a+3)-Uq(i,q,4*a+2));
          grad(i  ,a) += a0*wx+b0*wv;
          grad(i+1,a) += a1*wx+b1*wv;
          grad(i+2,a) += a2*wx+b2*wv;
          grad(i+3,a) += a3*wx+b3*wv;
        }
      }
    }
  }
Ejemplo n.º 15
0
template<class TV> Array<TV> TriangleSubdivision::loop_subdivide(RawArray<const TV> X) const {
  GEODE_ASSERT(X.size()==coarse_mesh->nodes());
  Array<TV> fine_X(fine_mesh->nodes(),uninit);
  loop_matrix()->multiply(X,fine_X);
  return fine_X;
}
Ejemplo n.º 16
0
  void hessian(RawArray<const T,2> x, RawArray<T,4> hess) const {
    // Temporary arrays and views
    GEODE_ASSERT(x.sizes()==vec(n+3,d) && hess.sizes()==vec(n+3,4,d,d));
    const auto sx = smallx.flat.raw(),
               sv = smallv.flat.raw();

    // Collect quadrature points
    const int e = 1+8*d+8*d*(d-1);
    Array<T,3> tq(    n,quads,e,uninit);
    Array<T,4> xq(vec(n,quads,e,d),uninit);
    Array<T,4> vq(vec(n,quads,e,d),uninit);
    for (int i=0;i<n;i++) {
      T_INFO(i)
      for (int q=0;q<quads;q++) {
        const T s = samples[q],
                t = t1+dt*s;
        for (int j=0;j<e;j++)
          tq(i,q,j) = t;
        SPLINE_INFO(s)
        for (int a=0;a<d;a++) {
          X_INFO(i,a)
          const T x = a0*x0+a1*x1+a2*x2+a3*x3,
                  v = b0*x0+b1*x1+b2*x2+b3*x3;
          for (int j=0;j<e;j++) {
            xq(i,q,j,a) = x;
            vq(i,q,j,a) = v;
          }
          int j = 1;
          for (int b=0;b<d;b++) {
            const T xb = sx[b],
                    vb = sv[b];
            xq(i,q,j++,a) -= xb;
            xq(i,q,j++,a) += xb;
            vq(i,q,j++,a) -= vb;
            vq(i,q,j++,a) += vb;
            xq(i,q,j  ,a) -= xb;
            vq(i,q,j++,a) -= vb;
            xq(i,q,j  ,a) -= xb;
            vq(i,q,j++,a) += vb;
            xq(i,q,j  ,a) += xb;
            vq(i,q,j++,a) -= vb;
            xq(i,q,j  ,a) += xb;
            vq(i,q,j++,a) += vb;
            for (int c=b+1;c<d;c++) {
              const T xc = sx[c],
                      vc = sv[c];
              xq(i,q,j++,a) -= xb+xc;
              xq(i,q,j++,a) -= xb-xc;
              xq(i,q,j++,a) += xb-xc;
              xq(i,q,j++,a) += xb+xc;
              vq(i,q,j++,a) -= vb+vc;
              vq(i,q,j++,a) -= vb-vc;
              vq(i,q,j++,a) += vb-vc;
              vq(i,q,j++,a) += vb+vc;

              vq(i,q,j++,a) -= sv[b];
              xq(i,q,j  ,a) -= sx[b];
              vq(i,q,j++,a) += sv[b];
              xq(i,q,j  ,a) += sx[b];
              vq(i,q,j++,a) -= sv[b];
              xq(i,q,j  ,a) += sx[b];
              vq(i,q,j++,a) += sv[b];
            }
          }
        }
      }
    }

    // Compute energies
    const auto Uq_ = U(tq.reshape_own(n*quads*d4),NdArray<const T>(q2shape,xq.flat),NdArray<const T>(q2shape,vq.flat));
    GEODE_ASSERT(Uq_.size()==n*quads*d4);
    const auto Uq = Uq_.reshape(n,quads,d4);

    // Accumulate
    grad.fill(0);
    const auto inv_2s = GEODE_RAW_ALLOCA(d,Vector<T,2>);
    for (int a=0;a<d;a++)
      inv_2s[a] = vec(.5/sx[a],.5/sv[a]);
    for (int i=0;i<n;i++) {
      T_INFO(i)
      for (int q=0;q<quads;q++) {
        const T s = samples[q],
                w = dt*weights[q];
        SPLINE_INFO(s)
        for (int b=0;b<d;b++) {
          const T wx = w*inv_2s[b].x*(Uq(i,q,4*b+1)-Uq(i,q,4*b  )),
                  wv = w*inv_2s[b].y*(Uq(i,q,4*b+3)-Uq(i,q,4*b+2));
          grad(i  ,b) += a0*wx+b0*wv;
          grad(i+1,b) += a1*wx+b1*wv;
          grad(i+2,b) += a2*wx+b2*wv;
          grad(i+3,b) += a3*wx+b3*wv;
        }
      }
    }
  }
Ejemplo n.º 17
0
RawArray<const uint8_t> readable_block_store_t::get_compressed(local_id_t local_id) const {
  const auto& info = block_info(local_id);
  GEODE_ASSERT(!info.missing_dimensions);
  return store.get_frozen(info.flat_id);
}
Ejemplo n.º 18
0
 NdArray<T> gradient(NdArray<const T> x) const {
   GEODE_ASSERT(x.shape==xshape);
   NdArray<T> grad(xshape,uninit);
   gradient(x.flat.reshape(n+3,d),grad.flat.reshape(n+3,d));
   return grad;
 }
Ejemplo n.º 19
0
template<class TV> struct NumpyDescr<Rotation<TV>>{static PyArray_Descr* d;static PyArray_Descr* descr(){GEODE_ASSERT(d);Py_INCREF(d);return d;}};