Пример #1
0
void  
PenaltyMP_FE::determineTangent(void)
{
    // first determine [C] = [-I [Ccr]]
    C->Zero();
    const Matrix &constraint = theMP->getConstraint();
    int noRows = constraint.noRows();
    int noCols = constraint.noCols();
    
    for (int j=0; j<noRows; j++)
	(*C)(j,j) = -1.0;
    
    for (int i=0; i<noRows; i++)
	for (int j=0; j<noCols; j++)
	    (*C)(i,j+noRows) = constraint(i,j);
    

    // now form the tangent: [K] = alpha * [C]^t[C]
    // *(tang) = (*C)^(*C);
    // *(tang) *= alpha;

	// THIS IS A WORKAROUND UNTIL WE GET addMatrixTransposeProduct() IN
	// THE Matrix CLASS OR UNROLL THIS COMPUTATION
	int rows = C->noRows();
	int cols = C->noCols();
	Matrix CT(cols,rows);
	const Matrix &Cref = *C;
	// Fill in the transpose of C
	for (int k = 0; k < cols; k++)
		for (int l = 0; l < rows; l++)
			CT(k,l) = Cref(l,k);
	// Compute alpha*(C^*C)
	tang->addMatrixProduct(0.0, CT, Cref, alpha);
}
Пример #2
0
void EllipsoidalIntegrator::updateQ( Tmatrix<double> C, Tmatrix<Interval> R ){

	Tmatrix<double> CT(nx,nx);
	
	for( int i=0; i<nx; i++ )
		for( int j=0;j<nx;j++)
			CT(i,j) = C(j,i);
	
	Q = C*Q*CT;
	
	double trQ = 0.0;
	for( int i=0; i<nx; i++ ) trQ += Q(i,i)/(Q(i,i)+1e-8);
	trQ = ::sqrt(trQ);
	
	Vector sqrR(nx);
	for( int i=0; i<nx; i++ ) sqrR(i) = acadoMax(::fabs(R(i).l()),::fabs(R(i).u()))/::sqrt(Q(i,i)+1e-8);
	
	double kappa = trQ;
	for( int i=0; i<nx; i++ ) kappa += sqrR(i);
	
	Q *= kappa/(trQ+EPS);
	for( int i=0; i<nx; i++ ){
		double tmp = acadoMax(::fabs(R(i).l()),::fabs(R(i).u()));
		tmp *= ::sqrt(kappa/(sqrR(i)+EPS));
		Q(i,i) += tmp*tmp+EPS;
	}
}
Пример #3
0
RTPDestBox::RTPDestBox( QWidget *_parent, const char *_mux )
    : VirtualDestBox( _parent ), mux( qfu(_mux) )
{
    QGridLayout *layout = new QGridLayout( this );

    QLabel *rtpOutput = new QLabel(
        qtr( "This module outputs the transcoded stream to a network via RTP."),
        this );
    layout->addWidget(rtpOutput, 0, 0, 1, -1);

    QLabel *RTPLabel = new QLabel( qtr("Address"), this );
    RTPEdit = new QLineEdit(this);
    layout->addWidget(RTPLabel, 1, 0, 1, 1);
    layout->addWidget(RTPEdit, 1, 1, 1, 1);

    QLabel *RTPPortLabel = new QLabel( qtr("Base port"), this );
    RTPPort = new QSpinBox(this);
    RTPPort->setMaximumSize(QSize(90, 16777215));
    RTPPort->setAlignment(Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter);
    RTPPort->setMinimum(1);
    RTPPort->setMaximum(65535);
    RTPPort->setValue(5004);
    layout->addWidget(RTPPortLabel, 2, 0, 1, 1);
    layout->addWidget(RTPPort, 2, 1, 1, 1);

    QLabel *SAPNameLabel = new QLabel( qtr("Stream name"), this );
    SAPName = new QLineEdit(this);
    layout->addWidget(SAPNameLabel, 3, 0, 1, 1);
    layout->addWidget(SAPName, 3, 1, 1, 1);

    CT( RTPEdit );
    CS( RTPPort );
    CT( SAPName );
}
Пример #4
0
std::ostream & operator<<(std::ostream & str, const DensePolynomial<D, CT> & polynomial)
{
    bool first = true;

    for(int degree = D-1; degree >= 0; --degree)
    {
        // a non-zero coeff
        if(polynomial.coeffientAt(degree) != CT())
        {
            if(first)
                first = false;
            else
                str << " + ";

            str << polynomial.coeffientAt(degree);
            if(degree != 0)
                str << "{x^" << degree << "}";
        }
    }

    if(first)
        str << CT();

    return str;
}
Пример #5
0
	/// @brief Gets a rotated angle in degrees.
	/// @details Example use: a character slowly aiming towards the mouse position.
	/// @param mStart Angle to start from.
	/// @param mEnd Target angle.
	/// @param mSpeed Rotation speed.
	/// @return Returns the rotated angle in degrees.
	template<typename T1, typename T2, typename T3> inline auto getRotatedDeg(const T1& mStart, const T2& mEnd, const T3& mSpeed) noexcept
	{
		using CT = Common<T1, T2, T3>;
		CT diff{getCycledValue(wrapDeg(mEnd) - wrapDeg(mStart), -CT(180), CT(180))};
		if(diff < -mSpeed) return mStart - mSpeed;
		if(diff > mSpeed) return mStart + mSpeed;
		return mEnd;
	}
			/**
			\brief Run an iteration of the tracker loop.

			Predict and correct, adjusting precision and stepsize as necessary.

			\return Success if the step was successful, and a non-success code if something went wrong, such as a linear algebra failure or AMP Criterion violation.
			*/
			SuccessCode TrackerIteration() const override
			{
				static_assert(std::is_same<	typename Eigen::NumTraits<RT>::Real, 
			              				typename Eigen::NumTraits<CT>::Real>::value,
			              				"underlying complex type and the type for comparisons must match");

				this->NotifyObservers(NewStep<EmitterType >(*this));

				Vec<CT>& predicted_space = std::get<Vec<CT> >(this->temporary_space_); // this will be populated in the Predict step
				Vec<CT>& current_space = std::get<Vec<CT> >(this->current_space_); // the thing we ultimately wish to update
				CT current_time = CT(this->current_time_);
				CT delta_t = CT(this->delta_t_);

				SuccessCode predictor_code = Predict(predicted_space, current_space, current_time, delta_t);

				if (predictor_code!=SuccessCode::Success)
				{
					this->NotifyObservers(FirstStepPredictorMatrixSolveFailure<EmitterType >(*this));

					this->next_stepsize_ = this->stepping_config_.step_size_fail_factor*this->current_stepsize_;

					UpdateStepsize();

					return predictor_code;
				}

				this->NotifyObservers(SuccessfulPredict<EmitterType , CT>(*this, predicted_space));

				Vec<CT>& tentative_next_space = std::get<Vec<CT> >(this->tentative_space_); // this will be populated in the Correct step

				CT tentative_next_time = current_time + delta_t;

				SuccessCode corrector_code = Correct(tentative_next_space,
													 predicted_space,
													 tentative_next_time);

				if (corrector_code == SuccessCode::GoingToInfinity)
				{
					// there is no corrective action possible...
					return corrector_code;
				}
				else if (corrector_code!=SuccessCode::Success)
				{
					this->NotifyObservers(CorrectorMatrixSolveFailure<EmitterType >(*this));

					this->next_stepsize_ = this->stepping_config_.step_size_fail_factor*this->current_stepsize_;
					UpdateStepsize();

					return corrector_code;
				}

				
				this->NotifyObservers(SuccessfulCorrect<EmitterType , CT>(*this, tentative_next_space));

				// copy the tentative vector into the current space vector;
				current_space = tentative_next_space;
				return SuccessCode::Success;
			}
Пример #7
0
MMSHDestBox::MMSHDestBox( QWidget *_parent ) : VirtualDestBox( _parent )
{
    QGridLayout *layout = new QGridLayout( this );

    QLabel *mmshOutput = new QLabel(
        qtr( "This module outputs the transcoded stream to a network "
             "via the mms protocol." ), this );
    layout->addWidget(mmshOutput, 0, 0, 1, -1);

    QLabel *MMSHLabel = new QLabel( qtr("Address"), this );
    QLabel *MMSHPortLabel = new QLabel( qtr("Port"), this );
    layout->addWidget(MMSHLabel, 1, 0, 1, 1);
    layout->addWidget(MMSHPortLabel, 2, 0, 1, 1);

    MMSHEdit = new QLineEdit(this);
    MMSHEdit->setText( "0.0.0.0" );

    MMSHPort = new QSpinBox(this);
    MMSHPort->setMaximumSize(QSize(90, 16777215));
    MMSHPort->setAlignment(Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter);
    MMSHPort->setMinimum(1);
    MMSHPort->setMaximum(65535);
    MMSHPort->setValue(8080);

    layout->addWidget(MMSHEdit, 1, 1, 1, 1);
    layout->addWidget(MMSHPort, 2, 1, 1, 1);
    CS( MMSHPort );
    CT( MMSHEdit );
}
Пример #8
0
UDPDestBox::UDPDestBox( QWidget *_parent ) : VirtualDestBox( _parent )
{
    QGridLayout *layout = new QGridLayout( this );

    QLabel *udpOutput = new QLabel(
        qtr( "This module outputs the transcoded stream to a network via UDP."),
        this );
    layout->addWidget(udpOutput, 0, 0, 1, -1);

    QLabel *UDPLabel = new QLabel( qtr("Address"), this );
    QLabel *UDPPortLabel = new QLabel( qtr("Port"), this );
    layout->addWidget(UDPLabel, 1, 0, 1, 1);
    layout->addWidget(UDPPortLabel, 2, 0, 1, 1);

    UDPEdit = new QLineEdit(this);

    UDPPort = new QSpinBox(this);
    UDPPort->setMaximumSize(QSize(90, 16777215));
    UDPPort->setAlignment(Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter);
    UDPPort->setMinimum(1);
    UDPPort->setMaximum(65535);
    UDPPort->setValue(1234);

    layout->addWidget(UDPEdit, 1, 1, 1, 1);
    layout->addWidget(UDPPort, 2, 1, 1, 1);
    CS( UDPPort );
    CT( UDPEdit );
}
Пример #9
0
CT test_vrt_lon_sph(CT lon1r,
                    CT lat1r,
                    CT lon2r,
                    CT lat2r)
{
    CT a1 = bg::formula::spherical_azimuth(lon1r, lat1r, lon2r, lat2r);

    typedef bg::model::point<CT, 2,
            bg::cs::spherical_equatorial<bg::radian> > point;

    bg::model::segment<point> segment(point(lon1r, lat1r),
                                      point(lon2r, lat2r));
    bg::model::box<point> box;
    bg::envelope(segment, box);

    CT vertex_lat;
    CT lat_sum = lat1r + lat2r;
    if (lat_sum > CT(0))
    {
        vertex_lat = bg::get_as_radian<bg::max_corner, 1>(box);
    } else {
        vertex_lat = bg::get_as_radian<bg::min_corner, 1>(box);
    }

    bg::strategy::azimuth::spherical<> azimuth;

    return bg::formula::vertex_longitude
            <CT, bg::spherical_equatorial_tag>::
            apply(lon1r, lat1r,
                  lon2r, lat2r,
                  vertex_lat,
                  a1,
                  azimuth);
}
Пример #10
0
RTSPDestBox::RTSPDestBox( QWidget *_parent ) : VirtualDestBox( _parent )
{
    QGridLayout *layout = new QGridLayout( this );

    QLabel *rtspOutput = new QLabel(
        qtr( "This module outputs the transcoded stream to a network via "
             "RTSP." ), this );
    layout->addWidget( rtspOutput, 0, 0, 1, -1 );

    QLabel *RTSPLabel = new QLabel( qtr("Path"), this );
    QLabel *RTSPPortLabel = new QLabel( qtr("Port"), this );
    layout->addWidget( RTSPLabel, 2, 0, 1, 1 );
    layout->addWidget( RTSPPortLabel, 1, 0, 1, 1 );

    RTSPEdit = new QLineEdit( this );
    RTSPEdit->setText( "/" );

    RTSPPort = new QSpinBox( this );
    RTSPPort->setMaximumSize( QSize( 90, 16777215 ) );
    RTSPPort->setAlignment( Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter );
    RTSPPort->setMinimum( 1 );
    RTSPPort->setMaximum( 65535 );
    RTSPPort->setValue( 8554 );

    layout->addWidget( RTSPEdit, 2, 1, 1, 1 );
    layout->addWidget( RTSPPort, 1, 1, 1, 1 );
    CS( RTSPPort );
    CT( RTSPEdit );
}
Пример #11
0
/* FileDest Box */
FileDestBox::FileDestBox( QWidget *_parent, intf_thread_t * _p_intf ) : VirtualDestBox( _parent )
{
    p_intf = _p_intf;

    QPushButton *fileSelectButton;
    QGridLayout *layout = new QGridLayout( this );

    QLabel *fileOutput = new QLabel(
         qtr( "This module writes the transcoded stream to a file."), this );
    layout->addWidget(fileOutput, 0, 0, 1, -1);

    QLabel *fileLabel = new QLabel( qtr( "Filename"), this );
    layout->addWidget(fileLabel, 1, 0, 1, 1);

    fileEdit = new QLineEdit(this);
    layout->addWidget(fileEdit, 1, 4, 1, 1);

    fileSelectButton = new QPushButton( qtr( "Browse..." ), this );
    QSizePolicy sizePolicy(QSizePolicy::Maximum, QSizePolicy::Fixed);
    fileSelectButton->setSizePolicy(sizePolicy);

    layout->addWidget(fileSelectButton, 1, 5, 1, 1);
    CT( fileEdit );
    BUTTONACT( fileSelectButton, fileBrowse() );
}
Пример #12
0
void ada_read_sys(PolySys& sys)
{
	int fail;
	std::cout << "testing reading and writing a system" << std::endl;
	//fail = syscon_read_system();
	std::cout << "the system is .." << std::endl;
	fail = syscon_write_system();

	// Get variable names
	int s_dim = 80;
	char *s = (char*) calloc(80,sizeof(char));
	fail = syscon_string_of_symbols(&s_dim, s);
	string* x_names;
	var_name(s, s_dim, x_names);

	int dim = 4;
	int i = 1;
	double c[2];
	int d[dim];

	int n_eq = 0;
	fail = syscon_number_of_polynomials(&n_eq);

	sys.n_eq = n_eq;
	sys.dim  = dim;
	sys.eq_space = new PolyEq[n_eq];
	sys.pos_var = x_names;

	PolyEq* tmp_eq = sys.eq_space;

	for(int i=1; i<n_eq+1; i++){
		int nt;
		fail = syscon_number_of_terms(i,&nt);
		//std::cout << "  #terms in polynomial " << i << " : " << nt << std::endl;
		tmp_eq->n_mon = nt;
		tmp_eq->dim = dim;
		for(int j=1; j<=nt; j++)
		{
			fail = syscon_retrieve_term(i,j,dim,d,c);
			//std::cout << c[0] << " " << c[1] << std::endl;
			//for (int k=0; k<n; k++) std::cout << " " << d[k];
			//std::cout << std::endl;
			bool constant_term = true;
			for (int k=0; k<dim; k++){
				if(d[k]!=0){
					constant_term = false;
				}
			}

			if(constant_term==true){
				tmp_eq->n_mon--;
				tmp_eq->constant += CT(c[0],c[1]);
				//std::cout << "constant " << c[0] \
				          << " " << c[1] << std::endl;
			}
			else{
Пример #13
0
ICEDestBox::ICEDestBox( QWidget *_parent ) : VirtualDestBox( _parent )
{
    QGridLayout *layout = new QGridLayout( this );

    QLabel *iceOutput = new QLabel(
        qtr( "This module outputs the transcoded stream to an Icecast server."),
        this );
    layout->addWidget(iceOutput, 0, 0, 1, -1);

    QLabel *ICELabel = new QLabel( qtr("Address"), this );
    QLabel *ICEPortLabel = new QLabel( qtr("Port"), this );
    layout->addWidget(ICELabel, 1, 0, 1, 1);
    layout->addWidget(ICEPortLabel, 2, 0, 1, 1);

    ICEEdit = new QLineEdit(this);

    ICEPort = new QSpinBox(this);
    ICEPort->setMaximumSize(QSize(90, 16777215));
    ICEPort->setAlignment(Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter);
    ICEPort->setMinimum(1);
    ICEPort->setMaximum(65535);
    ICEPort->setValue(8000);

    layout->addWidget(ICEEdit, 1, 1, 1, 1);
    layout->addWidget(ICEPort, 2, 1, 1, 1);

    QLabel *IcecastMountpointLabel = new QLabel( qtr( "Mount Point" ), this );
    QLabel *IcecastNameLabel = new QLabel( qtr( "Login:pass" ), this );
    ICEMountEdit = new QLineEdit( this );
    ICEPassEdit = new QLineEdit( this );
    layout->addWidget(IcecastMountpointLabel, 3, 0, 1, 1 );
    layout->addWidget(ICEMountEdit, 3, 1, 1, -1 );
    layout->addWidget(IcecastNameLabel, 4, 0, 1, 1 );
    layout->addWidget(ICEPassEdit, 4, 1, 1, -1 );

    CS( ICEPort );
    CT( ICEEdit );
    CT( ICEMountEdit );
    CT( ICEPassEdit );
}
Пример #14
0
Logic al1_bldrep(int cos)
  {
  int low, slow, col, scol, i;

  repsiz = 0;
  if (cos <= 1 || cos >= nextdf || COL1(cos) < 0)
    { return(TRUE); }

  low = slow = cos;
  while (low > 1)
    {
    scol = 0;
    for (col = 1; col <= ncol; col++)
      {
      if ((i = CT(low,col)) > 0)
        {
        if (i < slow)				/* Lower row number found */
          {
          slow = i;
          scol = col;
          }
        else if (i == slow && scol != 0)	/* Same row & slow < low */
          {					/* ... earlier column? */
          if (invcol[col] < invcol[scol])
            { scol = col; }
          }
        }
      }

    /* Add it (increases repsiz); note the column inversion!  Failure sets 
    repsiz to 0 */

    if (!al1_addrep(invcol[scol]))
      { return(FALSE); }

    low = slow;
    }

  /* Reverse representative (note: inversion already done) */

  for (i = 1; i <= repsiz/2; i++) 
    {
    col  = currrep[i-1]; 
    scol = currrep[repsiz-i];

    currrep[i-1]      = scol; 
    currrep[repsiz-i] = col;
    }

  return(TRUE);
  }
Пример #15
0
extern "C" int MAMain() {
    InitConsole();
    gConsoleLogging = 1;

    static const char data[] = "userid=joe&password=guessme";
    int size = sizeof(data) - 1;
    char buffer[64];

    printf("HTTP POST test\n");
    Handle http = maHttpCreate("http://msdev.mine.nu:8080/testing/posttest.php", HTTP_POST);
    CT(http);

    maHttpSetRequestHeader(http, "X-MoSync-test", "terue");

    _itoa(size, buffer, 10);
    maHttpSetRequestHeader(http, "Content-Length", buffer);

    maHttpSetRequestHeader(http, "Content-Type", "application/x-www-form-urlencoded");

    printf("write\n");
    maConnWrite(http, data, size);
    if(waitConn(http) < 0)
        Freeze(0);

    printf("finish\n");
    maHttpFinish(http);
    if(waitConn(http) < 0)
        Freeze(0);

    int res = maHttpGetResponseHeader(http, "Content-Length", buffer, sizeof(buffer));
    if(res <= 0 || res >= (int)sizeof(buffer)) {
        printf("CLerr %i\n", res);
        Freeze(0);
    }
    printf("Content-Length: %s\n", buffer);

    res = 0;
    while(true) {
        maConnRead(http, buffer, sizeof(buffer)-1);
        size = waitConn(http);
        if(size < 0)
            break;
        res += size;
        buffer[size] = 0;
        printf(buffer);
    }
    printf("Bytes read: %i\n", res);
    Freeze(0);
    return 0;
}
Пример #16
0
int al1_trrep(int cos)
  {
  int i;

  if (repsiz == 0)
    { return(0); }

  for (i = 0; i < repsiz; i++)
    {
    if ((COL1(cos) < 0) || ((cos = CT(cos,currrep[i])) == 0))
      { return(0); }
    }

  return(cos);
  }
Пример #17
0
int main() {

  initComm();

  Scene scene;
  SceneConfig::enableIK = false;

  PR2Manager pr2m(scene);
  KinectTransformer kinectTrans(pr2m.pr2->robot);
  kinectTrans.calibrate(btTransform::getIdentity());
  CoordinateTransformer CT(kinectTrans.getWFC());
  
  FakeKinect fk(scene.env->osg, CT.worldFromCamEigen);

  scene.startViewer();
  scene.step(0);

  while (true) {
    fk.sendMessage();
  }

}
Пример #18
0
CT test_vrt_lon_geo(CT lon1r,
                    CT lat1r,
                    CT lon2r,
                    CT lat2r)
{
    // WGS84
    bg::srs::spheroid<CT> spheroid(6378137.0, 6356752.3142451793);

    typedef FormulaPolicy<CT, false, true, false, false, false> formula;
    CT a1 = formula::apply(lon1r, lat1r, lon2r, lat2r, spheroid).azimuth;

    typedef bg::model::point<CT, 2, bg::cs::geographic<bg::radian> > geo_point;

    bg::model::segment<geo_point> segment(geo_point(lon1r, lat1r),
                                          geo_point(lon2r, lat2r));
    bg::model::box<geo_point> box;
    bg::envelope(segment, box);

    CT vertex_lat;
    CT lat_sum = lat1r + lat2r;
    if (lat_sum > CT(0))
    {
        vertex_lat = bg::get_as_radian<bg::max_corner, 1>(box);
    } else {
        vertex_lat = bg::get_as_radian<bg::min_corner, 1>(box);
    }

    bg::strategy::azimuth::geographic<> azimuth_geographic;

    return bg::formula::vertex_longitude
            <CT, bg::geographic_tag>::apply(lon1r, lat1r,
                                            lon2r, lat2r,
                                            vertex_lat,
                                            a1,
                                            azimuth_geographic);

}
Пример #19
0
Logic al2_normal(int cos)
  {
  int s, *beg, *end, *pi, next;

  if (cos < 1 || cos >= nextdf || COL1(cos) < 0)
    { return(FALSE); }

  for (s = 1; s <= nsgpg; s++)
    {
    beg = &(subggen[subgindex[s]]);
    end = beg-1 + subglength[s];

    next = cos;
    for (pi = beg; pi <= end; pi++)
      {
      if ((next = CT(next,*pi)) == 0 || COL1(next) < 0)
        { return(FALSE); }
      }
    if (next != cos)
      { return(FALSE); }
    }

  return(TRUE);
  }
Пример #20
0
HTTPDestBox::HTTPDestBox( QWidget *_parent ) : VirtualDestBox( _parent )
{
    label->setText( qtr( "This module outputs the transcoded stream to a network via HTTP.") );

    QLabel *HTTPLabel = new QLabel( qtr("Path"), this );
    QLabel *HTTPPortLabel = new QLabel( qtr("Port"), this );
    layout->addWidget(HTTPLabel, 2, 0, 1, 1);
    layout->addWidget(HTTPPortLabel, 1, 0, 1, 1);

    HTTPEdit = new QLineEdit(this);
    HTTPEdit->setText( "/" );

    HTTPPort = new QSpinBox(this);
    HTTPPort->setMaximumSize(QSize(90, 16777215));
    HTTPPort->setAlignment(Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter);
    HTTPPort->setMinimum(1);
    HTTPPort->setMaximum(65535);
    HTTPPort->setValue(8080);

    layout->addWidget(HTTPEdit, 2, 1, 1, 1);
    layout->addWidget(HTTPPort, 1, 1, 1, 1);
    CS( HTTPPort );
    CT( HTTPEdit );
}
Пример #21
0
inline void
SymmLLC
( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B,
  T beta,        DistMatrix<T>& C )
{
#ifndef RELEASE
    PushCallStack("internal::SymmLLC");
    if( A.Grid() != B.Grid() || B.Grid() != C.Grid() )
        throw std::logic_error
        ("{A,B,C} must be distributed over the same grid");
#endif
    const Grid& g = A.Grid();

    // Matrix views
    DistMatrix<T> 
        ATL(g), ATR(g),  A00(g), A01(g), A02(g),  AColPan(g),
        ABL(g), ABR(g),  A10(g), A11(g), A12(g),  ARowPan(g),
                         A20(g), A21(g), A22(g);
    DistMatrix<T> 
        BT(g),  B0(g),
        BB(g),  B1(g),
                B2(g);
    DistMatrix<T> 
        CT(g),  C0(g),  CAbove(g),
        CB(g),  C1(g),  CBelow(g),
                C2(g);

    // Temporary distributions
    DistMatrix<T,MC,  STAR> AColPan_MC_STAR(g);
    DistMatrix<T,STAR,MC  > ARowPan_STAR_MC(g);
    DistMatrix<T,MR,  STAR> B1Trans_MR_STAR(g);

    B1Trans_MR_STAR.AlignWith( C );

    // Start the algorithm
    Scale( beta, C );
    LockedPartitionDownDiagonal
    ( A, ATL, ATR,
         ABL, ABR, 0 );
    LockedPartitionDown
    ( B, BT,
         BB, 0 );
    PartitionDown
    ( C, CT,
         CB, 0 );
    while( CB.Height() > 0 )
    {
        LockedRepartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, /**/ A01, A02,
         /*************/ /******************/
               /**/       A10, /**/ A11, A12,
          ABL, /**/ ABR,  A20, /**/ A21, A22 );

        LockedRepartitionDown
        ( BT,  B0,
         /**/ /**/
               B1,
          BB,  B2 );

        RepartitionDown
        ( CT,  C0,
         /**/ /**/
               C1,
          CB,  C2 );

        LockedView1x2( ARowPan, A10, A11 );
        LockedView2x1
        ( AColPan, A11,
                   A21 );

        View2x1
        ( CAbove, C0,
                  C1 );
        View2x1
        ( CBelow, C1,
                  C2 );

        AColPan_MC_STAR.AlignWith( CBelow );
        ARowPan_STAR_MC.AlignWith( CAbove );
        //--------------------------------------------------------------------//
        AColPan_MC_STAR = AColPan;
        ARowPan_STAR_MC = ARowPan;
        MakeTrapezoidal( LEFT,  LOWER,  0, AColPan_MC_STAR );
        MakeTrapezoidal( RIGHT, LOWER, -1, ARowPan_STAR_MC );

        B1Trans_MR_STAR.TransposeFrom( B1 );

        LocalGemm
        ( NORMAL, TRANSPOSE, 
          alpha, AColPan_MC_STAR, B1Trans_MR_STAR, T(1), CBelow );

        LocalGemm
        ( TRANSPOSE, TRANSPOSE, 
          alpha, ARowPan_STAR_MC, B1Trans_MR_STAR, T(1), CAbove );
        //--------------------------------------------------------------------//
        AColPan_MC_STAR.FreeAlignments();
        ARowPan_STAR_MC.FreeAlignments();

        SlideLockedPartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, A01, /**/ A02,
               /**/       A10, A11, /**/ A12,
         /*************/ /******************/
          ABL, /**/ ABR,  A20, A21, /**/ A22 );

        SlideLockedPartitionDown
        ( BT,  B0,
               B1,
         /**/ /**/
          BB,  B2 );

        SlidePartitionDown
        ( CT,  C0,
               C1,
         /**/ /**/
          CB,  C2 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Пример #22
0
void test_compiler_diffs(void)
{
    mpdm_t r, cc, ac;
    int n;
    struct {
        wchar_t *code;
        int line;
    } compiler_tests[] = {
//        CT(L"local a = 1, b = 2, c, d = 3;"),
//        CT(L"local a = 1;"),
//        CT(L"local v; 1;"),
//        CT(L"local x, y, z; 2;"),
        CT(L"a |= 6;"),
        CT(L"a %= 6;"),
        CT(L"a /= 6;"),
        CT(L"a *= 6;"),
        CT(L"a -= 6;"),
        CT(L"a += 6;"),
        CT(L"for (n = 0; n < 100; n = n + 1) { print('kill'); } 1234;"),
        CT(L"for (;;) { print('kill'); } 1234;"),
        CT(L"foreach (v, i, [1, 2, 3]) { print(e); } 666;"),
        CT(L"eol = driver == 'win32' && 'crlf' || 'lf';"),
        CT(L"255 $ '%x';"),
        CT(L"foreach (e, [1, 2, 3]) { print(e); } 666;"),
//        CT(L"global v; 1;"),
//        CT(L"global x, y, z; 2;"),
        CT(L"sub pi { 3.1416; } 100;"),
        CT(L"sub pi () { 3.1416; } 200;"),
        CT(L"sub by2(v) { v * 2; } 250;"),
        CT(L"sub mul(v1, v2) { v1 * v2; } 300;"),
        CT(L"mul = sub (v1, v2) { v1 * v2; }; 123;"),
        CT(L"by2 = sub (e) { e * 2; }; 100;"),
        CT(L"pi = sub { 3.14; }; 6;"),
        CT(L"f->write('hi', string(1 + 3), eol); 1;"),
        CT(L"f->read(); 1;"),
        CT(L"while (1) 2;"),
        CT(L"while (1) { 2; 3; }"),
        CT(L"while (a < 10) { a = a + 1; }"),
        CT(L"if (a == 1) { b = 2 + 4; c = 3 * 2; } else { d = 3; e = d / 2; }"), 
        CT(L"if (1) 2; else 3;"), 
        CT(L"if (1) { 2; 3; }"),
        CT(L"if (1) { 2; }"),
        CT(L"if (1) 2;"),
        CT(L"if (2 + 3) 4 + 5;"),
        CT(L"if (a == 1) b = 2;"),
        CT(L"list[0];"),
        CT(L"list[1] = 1;"),
        CT(L"MPSL['OPCODE'];"),
        CT(L"q = 1 + 2 * 3;"),
        CT(L"q.q = 1 * 2 + 3; q2 = [];"),
        CT(L"q = 100;"),
        CT(L"MPSL.CORE.random;"),
        CT(L"1 + 2; [1, 2] ; {};"),
        CT(L"a + 1;"),
        CT(L"1 * (2 + 3);"),
        CT(L"{};"),
        CT(L"{a: 1};"),
        CT(L"{c: 2, d: 3};"),
        CT(L"{'e' => 4, 'f' => 5};"),
        CT(L"{g: 6, 'h' => 7};"),
        CT(L"{a: 2 * 3, b: 5 + 8};"),
        CT(L"[];"),
        CT(L"[1, 2, 3];"),
        CT(L"[1, 2 + 3, 4];"),
        CT(L"MPSL.CORE.random();"),
        CT(L"bool(1, 2) + 666;"),
        CT(L"1 ; 2 ; 3;"),
        CT(L"random();"),
        CT(L"5 != '5';"),
        CT(L"10 == '10';"),
        CT(L"10 > 1 + 2;"),
        CT(L"1 + 2 * 3;"),
        CT(L"1 * 2 + 3;"),
        CT(L"1.2 + 3.4;"),
        CT(L"/* test test */"),
        CT(L"1;"),
        CT(L"!1;"),
        CT(L"'abcde';"),
        CT(L"3.14;"),
        CT(NULL)
    };

    printf("\nComparing the output of the two compilers:\n");

    r  = mpdm_get_wcs(mpdm_root(), L"MPSL");
    cc = mpdm_get_wcs(r, L"c_compiler");
    ac = mpdm_get_wcs(r, L"a_compiler");

    for (n = 0; compiler_tests[n].code; n++) {
        mpdm_t c, x1, x2, d1, d2;

        c = MPDM_S(compiler_tests[n].code);

        mpdm_ref(c);

        mpdm_set_wcs(r, cc, L"compiler");
        x1 = mpsl_compile(c, NULL);
        d1 = mpsl_decompile(x1);

        mpdm_set_wcs(r, ac, L"compiler");
        x2 = mpsl_compile(c, NULL);
        d2 = mpsl_decompile(x2);

/*        printf("%ls\n", mpdm_string(d1));
        printf("%ls\n", mpdm_string(d2));*/

        _do_test("compiler output equal",
            mpdm_cmp(d1, d2) == 0, compiler_tests[n].line);

        mpdm_unref(c);
    }

    mpdm_set_wcs(r, cc, L"compiler");
}
Пример #23
0
 explicit CT(T&&... t) {        // use explicit to prevent unexpected type conversion
     CT(std::forward<T>(t)...); // forward<T> == static_cast<T&&>
 }
Пример #24
0
inline void
GemmTTB
( Orientation orientationOfA, 
  Orientation orientationOfB,
  T alpha, const DistMatrix<T>& A,
           const DistMatrix<T>& B,
  T beta,        DistMatrix<T>& C )
{
#ifndef RELEASE
    PushCallStack("internal::GemmTTB");
    if( A.Grid() != B.Grid() || B.Grid() != C.Grid() )
        throw std::logic_error
        ("{A,B,C} must be distributed over the same grid");
    if( orientationOfA == NORMAL || orientationOfB == NORMAL )
        throw std::logic_error
        ("GemmTTB expects A and B to be (Conjugate)Transposed");
    if( A.Width()  != C.Height() ||
        B.Height() != C.Width()  ||
        A.Height() != B.Width()    )
    {
        std::ostringstream msg;
        msg << "Nonconformal GemmTTB: \n"
            << "  A ~ " << A.Height() << " x " << A.Width() << "\n"
            << "  B ~ " << B.Height() << " x " << B.Width() << "\n"
            << "  C ~ " << C.Height() << " x " << C.Width() << "\n";
        throw std::logic_error( msg.str().c_str() );
    }
#endif
    const Grid& g = A.Grid();

    // Matrix views
    DistMatrix<T> AL(g), AR(g),
                  A0(g), A1(g), A2(g);
    DistMatrix<T> CT(g),  C0(g),
                  CB(g),  C1(g),
                          C2(g);

    // Temporary distributions
    DistMatrix<T,VR,  STAR> A1_VR_STAR(g);
    DistMatrix<T,STAR,MR  > A1AdjOrTrans_STAR_MR(g);
    DistMatrix<T,STAR,MC  > D1_STAR_MC(g);
    DistMatrix<T,MR,  MC  > D1_MR_MC(g);
    DistMatrix<T> D1(g);

    A1_VR_STAR.AlignWith( B );
    A1AdjOrTrans_STAR_MR.AlignWith( B );
    D1_STAR_MC.AlignWith( B );

    // Start the algorithm 
    Scale( beta, C );
    LockedPartitionRight( A, AL, AR, 0 );
    PartitionDown
    ( C, CT,
         CB, 0 );
    while( AR.Width() > 0 )
    {
        LockedRepartitionRight
        ( AL, /**/     AR,
          A0, /**/ A1, A2 );
 
        RepartitionDown
        ( CT,  C0,
         /**/ /**/
               C1,
          CB,  C2 );

        D1.AlignWith( C1 );
        Zeros( C1.Height(), C1.Width(), D1_STAR_MC );
        //--------------------------------------------------------------------//
        A1_VR_STAR = A1;
        if( orientationOfA == ADJOINT )
            A1AdjOrTrans_STAR_MR.AdjointFrom( A1_VR_STAR );
        else
            A1AdjOrTrans_STAR_MR.TransposeFrom( A1_VR_STAR );
 
        // D1[*,MC] := alpha (A1[MR,*])^[T/H] (B[MC,MR])^[T/H]
        //           = alpha (A1^[T/H])[*,MR] (B^[T/H])[MR,MC]
        LocalGemm
        ( NORMAL, orientationOfB, 
          alpha, A1AdjOrTrans_STAR_MR, B, T(0), D1_STAR_MC );

        // C1[MC,MR] += scattered & transposed D1[*,MC] summed over grid rows
        D1_MR_MC.SumScatterFrom( D1_STAR_MC );
        D1 = D1_MR_MC; 
        Axpy( T(1), D1, C1 );
        //--------------------------------------------------------------------//
        D1.FreeAlignments();

        SlideLockedPartitionRight
        ( AL,     /**/ AR,
          A0, A1, /**/ A2 );

        SlidePartitionDown
        ( CT,  C0,
               C1,
         /**/ /**/
          CB,  C2 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Пример #25
0
void YieldStrength<EvalT, Traits>::
evaluateFields(typename Traits::EvalData workset)
{
  bool print = false;
  //if (typeid(ScalarT) == typeid(RealType)) print = true;

  if (print)
    std::cout << " *** YieldStrength *** " << std::endl;

  int numCells = workset.numCells;

  if (is_constant) {
    for (int cell=0; cell < numCells; ++cell) {
      for (int qp=0; qp < numQPs; ++qp) {
	yieldStrength(cell,qp) = constant_value;
      }
    }
  }
  else {
    for (int cell=0; cell < numCells; ++cell) {
      for (int qp=0; qp < numQPs; ++qp) {
	Teuchos::Array<MeshScalarT> point(numDims);
	for (int i=0; i<numDims; i++)
	  point[i] = Sacado::ScalarValue<MeshScalarT>::eval(coordVec(cell,qp,i));
	yieldStrength(cell,qp) = exp_rf_kl->evaluate(point, rv);
      }
    }
  }
  if (isThermoElastic) {
    for (int cell=0; cell < numCells; ++cell) {
      for (int qp=0; qp < numQPs; ++qp) {
	yieldStrength(cell,qp) -= dYdT_value * (Temperature(cell,qp) - refTemp);

        if (print)
        {
          std::cout << "    Y   : " << yieldStrength(cell,qp) << std::endl;
          std::cout << "    temp: " << Temperature(cell,qp) << std::endl;
          std::cout << "    dYdT: " << dYdT_value << std::endl;
          std::cout << "    refT: " << refTemp << std::endl;
        }
      }
    }
  }
  if (isDiffuseDeformation) {

	  Albany::MDArray CLold   = (*workset.stateArrayPtr)[CLname];

      for (int cell=0; cell < numCells; ++cell) {
        for (int qp=0; qp < numQPs; ++qp) {
 //       	yieldStrength(cell,qp) = constant_value*( 1.0 + (zeta-1.0)*CL(cell,qp)   );
        	yieldStrength(cell,qp) -= constant_value*(zeta-1.0)*(CL(cell,qp) -CLold(cell,qp)  );

          if (print)
          {
            std::cout << "    Y   : " << yieldStrength(cell,qp) << std::endl;
            std::cout << "    CT  : " << CT(cell,qp) << std::endl;
            std::cout << "   zeta : " << zeta << std::endl;
          }
        }
      }
    }
}
Пример #26
0
void al2_normcl(Logic build)
  {
  int col, first, next, s, *beg, *end, *pi, j,k,l;
  Logic found;
  Wlist *list;
  Wlelt *lelt;

  found = FALSE;
  list  = NULL;

  for (col = 1; col <= ncol; col++)	/* all `significant' gen'rs */
    {
    if ((first = CT(1,invcol[col])) == 0 || COL1(first) < 0)
      { continue; }			/* trace incomplete, next col */

    for (s = 1; s <= nsgpg; s++)	/* all (original) subgrp gens */
      {
      beg = &subggen[subgindex[s]];
      end = beg-1 + subglength[s];

      next = first;
      for (pi = beg; pi <= end; pi++)
        {
        if ((next = CT(next,*pi)) == 0 || COL1(next) < 0)
          { goto next_s; }		/* trace incomplete, next gen */
        }
      if (next == first)
        { continue; }			/* closes, next gen */

      /* At this point, we know that the trace of s^col completes but does
      not get back to 1.  So we have a conjugate that's not in the subgrp. */

      found = TRUE; 			/* at least 1 conjugate not in sgp */

      k = colgen[col];			/* (signed) generator number */
      if (!galpha) 
        { 
        fprintf(fop, "Conjugate by grp gen'r \"%d\" of", k); 
        fprintf(fop, " subgrp gen'r \"");
        for (pi = beg; pi <= end; pi++)
          { fprintf(fop, " %d", colgen[*pi]); }
        }
      else 
        { 
        fprintf(fop, "Conjugate by grp gen'r \"%c\" of",
                     (k > 0) ? algen[k] : toupper(algen[-k]));
        fprintf(fop, " subgrp gen'r \"");
        for (pi = beg; pi <= end; pi++)
          {
          if ((l = colgen[*pi]) > 0)
            { fprintf(fop, "%c", algen[l]); }
          else
            { fprintf(fop, "%c", toupper(algen[-l])); }
          }
        }
      fprintf(fop, "\" not in subgrp\n");

      if (build)
        {
        if (list == NULL)
          {
          if ((list = al1_newwl()) == NULL)
            { al2_continue("unable to create new subgrp gen'r list"); }
          }

        if ((lelt = al1_newelt()) == NULL)
          {
          al1_emptywl(list);
          free(list);
          al2_continue("unable to create subgrp gen'r list elt"); 
          }

        lelt->len = subglength[s] + 2;		/* gen'r + col/col^-1 */
        if ((lelt->word = (int*)malloc((lelt->len+1)*sizeof(int))) == NULL)
          {
          al1_emptywl(list);
          free(list);
          free(lelt);
          al2_continue("unable to create subgrp gen'r list elt word"); 
          }
        lelt->exp = 1;

        lelt->word[1] = -k;
        for (pi = beg, j = 2; pi <= end; pi++, j++)
          { lelt->word[j] = colgen[*pi]; }
        lelt->word[lelt->len] = k;

        al1_addwl(list,lelt);
        }

      next_s:
        ;
      }
    }

  if (!found)
    { fprintf(fop, "* All (traceable) conjugates in subgroup\n"); }

  /* If list != NULL then we must have created a list with at least one new
  subgrp gen'r; so found is T & genlst is non-NULL/non-empty!  Append the
  list of new gen'rs & update the enumeration status. */

  if (list != NULL)
    {
    al1_concatwl(genlst,list);

    nsgpg = genlst->len;

    okcont  = FALSE;
    tabinfo = tabindex = FALSE;

    fprintf(fop, "* Subgroup generators have been augmented\n");
    }
  }
Пример #27
0
void al2_cycles(void)
  {
  int i, j, k, kn, t, length;
  Logic id;

  for (j = 1; j <= ndgen; j++) 
    {
    k = gencol[ndgen+j];	/* find the column k for generator j */
    id = TRUE;        		/* assume action is the identity */

    if (!galpha)		/* print lhs & record its length */
      { 
      fprintf(fop, "%d = ", j);
      length = al2_outlen(j) + 3;
      } 
    else 
      { 
      fprintf(fop, "%c = ", algen[j]);
      length = 4;
      }

    for (i = 1; i <= nalive; i++) 
      {
      if (CT(i, k) == i)	/* skip if i is a one-cycle */
        { 
        CT(i, k) = -i; 
        continue; 
        }

      /* have we used coset i in previous cycle?  */

      if (CT((kn = i), k) < 0) 
        { continue; } 

      id = FALSE;   		/* action of generator not identity */

      /* no, trace out this cycle  */

      length += al2_outlen(kn) + 1;
      if (length < LLL) 
        { fprintf(fop, "(%d", kn); }
      else
        {
        fprintf(fop, "\n  (%d", kn); 
        length = al2_outlen(kn) + 3;
        }

      t = CT(kn, k);
      CT(kn, k) = -t;   	/* mark this coset as used */
      kn = t;

      while (CT(kn,k) > 0) 
        {
        length += al2_outlen(kn) + 1;
        if (length < LLL) 
          { fprintf(fop, ",%d", kn); }
        else 
          { 
          fprintf(fop, ",\n  %d", kn); 
          length = al2_outlen(kn) + 2;
          } 

        t = CT(kn, k);
        CT(kn, k) = -t;
        kn = t;
        }

      /* we have reached the end of the cycle */

      fprintf(fop, ")"); 
      length++;
      }

    if (id) 
      { fprintf(fop, "identity\n"); } 
    else 
      { fprintf(fop, "\n"); }

    /* change all the (negative) values in this column back to positive */

    for (i = 1; i <= nalive; i++) 
      { CT(i, k) = -CT(i, k); }
    }
  }
Пример #28
0
T1 eval_test_classic
 ( Workspace& workspace_cpu, CPUInstHom& cpu_inst_hom, CT* sol0, CT t,
   PolySys& Classic_Sys, int n_path )
{
   struct timeval start, end;
   long seconds, useconds;
   double timeMS_classic;
   double timeMS_cpu;
   double timeMS_gpu;

   int n_eq = cpu_inst_hom.n_eq;
   int dim = cpu_inst_hom.dim;

   if(n_path<=0)
   {
      std::cout << "Default number of path" << std::endl;
      n_path = 1000;
   }
   int n_predictor = workspace_cpu.n_predictor;
   std::cout << "n_path = " << n_path << std::endl;
   CT* sol = new CT[n_path*dim*(n_predictor+1)];
   CT* sol_tmp = sol;
   for(int sol_idx=0; sol_idx<n_path; sol_idx++)
   {
      for(int pred_idx=0; pred_idx<n_predictor+1; pred_idx++)
      {
         for(int x_idx=0; x_idx<dim; x_idx++)
         {
            int r = rand();
            T1 tmp = T1(r);
            // sol_tmp[x_idx] = CT(sin(tmp),cos(tmp));
            sol_tmp[x_idx] = CT(x_idx+1,0.0);
            // sol_tmp[x_idx] = CT(1,0.0);
         }
         sol_tmp += dim;
      }
   }
   CT* t_mult = new CT[n_path*(n_predictor+1)];
   for(int sol_idx=0; sol_idx<n_path*(n_predictor+1); sol_idx++)
   {
      double r = 1.0*rand()/RAND_MAX;
      // t_mult[sol_idx] = CT(r,0.0);
      t_mult[sol_idx] = CT(1,0.0);
   }
   int* x_t_idx = new int[n_path];
   for(int sol_idx=0; sol_idx<n_path; sol_idx++)
   {
      x_t_idx[sol_idx] = rand()%(n_predictor+1);
   }
   std::cout << "----- CPU Evaluation ----" << std::endl;
   Workspace* workspace_cpu_all = new Workspace[n_path];
   for(int sol_idx=0; sol_idx<n_path; sol_idx++)
   {
      cpu_inst_hom.init_workspace(workspace_cpu_all[sol_idx]);
   }
   gettimeofday(&start, NULL);
   for(int sol_idx=0; sol_idx<n_path; sol_idx++)
   {
      CT* tmp_sol = sol+sol_idx*dim*(n_predictor+1)+dim*x_t_idx[sol_idx];
      CT* t_tmp = t_mult+sol_idx*(n_predictor+1)+x_t_idx[sol_idx];
      cpu_inst_hom.eval(workspace_cpu_all[sol_idx], tmp_sol, *t_tmp);
   }
   gettimeofday(&end, NULL);
   seconds  = end.tv_sec  - start.tv_sec;
   useconds = end.tv_usec - start.tv_usec;
   timeMS_cpu = seconds*1000 + useconds/1000.0;

   bool classic_check = false;
   if(classic_check)
   {
      std::cout << "----- Class Evaluation ----" << std::endl;
      CT* workspace_classic = new CT[n_path*n_eq*(dim+1)];
      CT** f_val = new CT*[n_path];
      CT* tmp_workspace = workspace_classic;
      CT*** deri_val = new CT**[n_path];
      CT** deri_space = new CT*[n_path*n_eq];

      for(int sol_idx=0; sol_idx<n_path; sol_idx++)
      {
         f_val[sol_idx] = tmp_workspace;
         tmp_workspace += n_eq;
         deri_val[sol_idx] = deri_space + sol_idx*n_eq;
         for(int i=0; i<n_eq; i++)
         {
            deri_val[sol_idx][i] = tmp_workspace;
            tmp_workspace += dim;
         }
      }
      gettimeofday(&start, NULL);
      for(int sol_idx=0; sol_idx<n_path; sol_idx++)
      {
         CT* tmp_sol = sol+sol_idx*dim*(n_predictor+1)+dim*x_t_idx[sol_idx];
         Classic_Sys.eval(tmp_sol, f_val[sol_idx], deri_val[sol_idx]);
      }
      gettimeofday(&end, NULL);
      seconds  = end.tv_sec  - start.tv_sec;
      useconds = end.tv_usec - start.tv_usec;
      timeMS_classic = seconds*1000 + useconds/1000.0;

      // Check two CPU method
      std::cout << "----- Classic Evaluation Check ----" << std::endl;
      for(int sol_idx=0; sol_idx<n_path; sol_idx++)
      {
         err_check_class_workspace(deri_val[sol_idx],f_val[sol_idx],
            workspace_cpu_all[sol_idx].matrix, n_eq, dim);
      }
      delete[] workspace_classic;
      delete[] f_val;
      delete[] deri_val;
      delete[] deri_space;
   }
   std::cout << "----- GPU Evaluation ----" << std::endl;
   CT** gpu_workspace_all;
   CT** gpu_matrix_all;
   gettimeofday(&start, NULL);
   GPU_Eval(cpu_inst_hom,sol,t_mult,gpu_workspace_all,gpu_matrix_all,n_path,
      x_t_idx, n_predictor);
   gettimeofday(&end, NULL);
   seconds  = end.tv_sec  - start.tv_sec;
   useconds = end.tv_usec - start.tv_usec;
   timeMS_gpu = seconds*1000 + useconds/1000.0;
   std::cout << "----- CPU vs GPU Evaluation Check----" << std::endl;
   T1 err = 0;
   for(int sol_idx=0; sol_idx<n_path; sol_idx++)
   {
      // std::cout << "sol_idx = " << sol_idx << std::endl;
      T1 err_tmp = eval_compare(cpu_inst_hom,gpu_workspace_all[sol_idx],
         gpu_matrix_all[sol_idx],workspace_cpu_all[sol_idx].all,
         workspace_cpu_all[sol_idx].matrix);
      if(err_tmp > err)
      {
         err = err_tmp;
      }
      // std::cout << "err = " << err_tmp << std::endl;
   }
   delete[] x_t_idx;
   delete[] t_mult;
   delete[] sol;

   for(int sol_idx=0; sol_idx<n_path; sol_idx++)
   {
      delete[] gpu_workspace_all[sol_idx];
      delete[] gpu_matrix_all[sol_idx];
   }
   delete[] gpu_workspace_all;
   delete[] gpu_matrix_all;

   std::cout << "err = " << err << std::endl;
   std::cout << "Classic Eval time " << timeMS_classic << std::endl;
   std::cout << "CPU     Eval time " << timeMS_cpu << std::endl;
   std::cout << "GPU     Eval time " << timeMS_gpu << std::endl;

   return err;
}
Пример #29
0
inline void 
GemmNNB
( T alpha, const DistMatrix<T>& A,
           const DistMatrix<T>& B,
  T beta,        DistMatrix<T>& C )
{
#ifndef RELEASE
    PushCallStack("internal::GemmNNB");
    if( A.Grid() != B.Grid() || B.Grid() != C.Grid() )
        throw std::logic_error
        ("{A,B,C} must be distributed over the same grid");
    if( A.Height() != C.Height() ||
        B.Width()  != C.Width()  ||
        A.Width()  != B.Height() )
    {
        std::ostringstream msg;
        msg << "Nonconformal GemmNNB: \n"
            << "  A ~ " << A.Height() << " x " << A.Width() << "\n"
            << "  B ~ " << B.Height() << " x " << B.Width() << "\n"
            << "  C ~ " << C.Height() << " x " << C.Width() << "\n";
        throw std::logic_error( msg.str().c_str() );
    }
#endif
    const Grid& g = A.Grid();

    // Matrix views
    DistMatrix<T> AT(g),  A0(g),
                  AB(g),  A1(g),
                          A2(g);
    DistMatrix<T> CT(g),  C0(g),
                  CB(g),  C1(g),
                          C2(g);

    // Temporary distributions
    DistMatrix<T,STAR,MC> A1_STAR_MC(g);
    DistMatrix<T,MR,STAR> D1Trans_MR_STAR(g);

    A1_STAR_MC.AlignWith( B );
    D1Trans_MR_STAR.AlignWith( B );

    // Start the algorithm
    Scale( beta, C );
    LockedPartitionDown
    ( A, AT,
         AB, 0 );
    PartitionDown
    ( C, CT,
         CB, 0 );
    while( AB.Height() > 0 )
    {
        LockedRepartitionDown
        ( AT,  A0,
         /**/ /**/
               A1,
          AB,  A2 );

        RepartitionDown
        ( CT,  C0,
         /**/ /**/
               C1,
          CB,  C2 );

        Zeros( C1.Width(), C1.Height(), D1Trans_MR_STAR );
        //--------------------------------------------------------------------//
        A1_STAR_MC = A1; // A1[*,MC] <- A1[MC,MR]

        // D1^T[MR,* ] := alpha B^T[MR,MC] A1^T[MC,* ]
        LocalGemm
        ( TRANSPOSE, TRANSPOSE, alpha, B, A1_STAR_MC, T(0), D1Trans_MR_STAR );

        C1.TransposeSumScatterUpdate( T(1), D1Trans_MR_STAR );
        //--------------------------------------------------------------------//

        SlideLockedPartitionDown
        ( AT,  A0,
               A1,
         /**/ /**/
          AB,  A2 );
 
        SlidePartitionDown
        ( CT,  C0,
               C1,
         /**/ /**/
          CB,  C2 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}                     
Пример #30
0
inline void 
GemmNNDot
( T alpha, const DistMatrix<T>& A,
           const DistMatrix<T>& B,
  T beta,        DistMatrix<T>& C )
{
#ifndef RELEASE
    PushCallStack("internal::GemmNNDot");
    if( A.Grid() != B.Grid() || B.Grid() != C.Grid() )
        throw std::logic_error
        ("{A,B,C} must be distributed over the same grid");
    if( A.Height() != C.Height() ||
        B.Width()  != C.Width()  ||
        A.Width()  != B.Height() )
    {
        std::ostringstream msg;
        msg << "Nonconformal GemmNNDot: \n"
            << "  A ~ " << A.Height() << " x " << A.Width() << "\n"
            << "  B ~ " << B.Height() << " x " << B.Width() << "\n"
            << "  C ~ " << C.Height() << " x " << C.Width() << "\n";
        throw std::logic_error( msg.str().c_str() );
    }
#endif
    const Grid& g = A.Grid();

    if( A.Height() > B.Width() )
    {
        // Matrix views
        DistMatrix<T> AT(g), AB(g),
                      A0(g), A1(g), A2(g);         
        DistMatrix<T> BL(g),  B0(g),
                      BR(g),  B1(g),
                              B2(g);
        DistMatrix<T> CT(g), C0(g), C1L(g), C1R(g),
                      CB(g), C1(g), C10(g), C11(g), C12(g),
                             C2(g);

        // Temporary distributions
        DistMatrix<T,STAR,VC> A1_STAR_VC(g);
        DistMatrix<T,VC,STAR> B1_VC_STAR(g);
        DistMatrix<T,STAR,STAR> C11_STAR_STAR(g);

        // Star the algorithm
        Scale( beta, C );
        LockedPartitionDown
        ( A, AT,
             AB, 0 );
        PartitionDown
        ( C, CT,
             CB, 0 );
        while( AB.Height() > 0 )
        {
            LockedRepartitionDown
            ( AT,  A0,
             /**/ /**/
                   A1,
              AB,  A2 );

            RepartitionDown
            ( CT,  C0,
             /**/ /**/
                   C1,
              CB,  C2 );

            A1_STAR_VC = A1; 
            B1_VC_STAR.AlignWith( A1_STAR_VC );

            LockedPartitionRight( B, BL, BR, 0 );
            PartitionRight( C1, C1L, C1R, 0 );
            while( BR.Width() > 0 )
            {
                LockedRepartitionRight
                ( BL, /**/ BR,
                  B0, /**/ B1, B2 );

                RepartitionRight
                ( C1L, /**/ C1R,
                  C10, /**/ C11, C12 );

                Zeros( C11.Height(), C11.Width(), C11_STAR_STAR );
                //------------------------------------------------------------//
                B1_VC_STAR = B1;
                LocalGemm
                ( NORMAL, NORMAL, 
                  alpha, A1_STAR_VC, B1_VC_STAR, T(0), C11_STAR_STAR );
                C11.SumScatterUpdate( T(1), C11_STAR_STAR );
                //------------------------------------------------------------//

                SlideLockedPartitionRight
                ( BL,     /**/ BR,
                  B0, B1, /**/ B2 );

                SlidePartitionRight
                ( C1L,      /**/ C1R,
                  C10, C11, /**/ C12 );
            }
            B1_VC_STAR.FreeAlignments();

            SlideLockedPartitionDown
            ( AT,  A0,
                   A1,
             /**/ /**/
              AB,  A2 );

            SlidePartitionDown
            ( CT,  C0,
                   C1,
             /**/ /**/
              CB,  C2 );
        }
    }
    else
    {
        // Matrix views
        DistMatrix<T> AT(g), AB(g),
                      A0(g), A1(g), A2(g);         
        DistMatrix<T> BL(g),  B0(g),
                      BR(g),  B1(g),
                              B2(g);
        DistMatrix<T> 
            CL(g), CR(g),         C1T(g),  C01(g),
            C0(g), C1(g), C2(g),  C1B(g),  C11(g),
                                           C21(g);

        // Temporary distributions
        DistMatrix<T,STAR,VR> A1_STAR_VR(g);
        DistMatrix<T,VR,STAR> B1_VR_STAR(g);
        DistMatrix<T,STAR,STAR> C11_STAR_STAR(g);

        // Star the algorithm
        Scale( beta, C );
        LockedPartitionRight( B, BL, BR, 0 );
        PartitionRight( C, CL, CR, 0 );
        while( BR.Width() > 0 )
        {
            LockedRepartitionRight
            ( BL, /**/ BR,
              B0, /**/ B1, B2 );

            RepartitionRight
            ( CL, /**/ CR,
              C0, /**/ C1, C2 );

            B1_VR_STAR = B1;
            A1_STAR_VR.AlignWith( B1_VR_STAR );

            LockedPartitionDown
            ( A, AT,
                 AB, 0 );
            PartitionDown
            ( C1, C1T,
                  C1B, 0 );
            while( AB.Height() > 0 )
            {
                LockedRepartitionDown
                ( AT,  A0,
                 /**/ /**/
                       A1,
                  AB,  A2 );

                RepartitionDown
                ( C1T,  C01,
                 /***/ /***/
                        C11,
                  C1B,  C21 );

                Zeros( C11.Height(), C11.Width(), C11_STAR_STAR );
                //------------------------------------------------------------//
                A1_STAR_VR = A1;
                LocalGemm
                ( NORMAL, NORMAL, 
                  alpha, A1_STAR_VR, B1_VR_STAR, T(0), C11_STAR_STAR );
                C11.SumScatterUpdate( T(1), C11_STAR_STAR );
                //------------------------------------------------------------//

                SlideLockedPartitionDown
                ( AT,  A0,
                       A1,
                 /**/ /**/
                  AB,  A2 );

                SlidePartitionDown
                ( C1T,  C01,
                        C11,
                 /***/ /***/
                  C1B,  C21 );
            }
            A1_STAR_VR.FreeAlignments();

            SlideLockedPartitionRight
            ( BL,     /**/ BR,
              B0, B1, /**/ B2 ); 

            SlidePartitionRight
            ( CL,     /**/ CR,
              C0, C1, /**/ C2 );
        }
    }
#ifndef RELEASE
    PopCallStack();
#endif
}