void PenaltyMP_FE::determineTangent(void) { // first determine [C] = [-I [Ccr]] C->Zero(); const Matrix &constraint = theMP->getConstraint(); int noRows = constraint.noRows(); int noCols = constraint.noCols(); for (int j=0; j<noRows; j++) (*C)(j,j) = -1.0; for (int i=0; i<noRows; i++) for (int j=0; j<noCols; j++) (*C)(i,j+noRows) = constraint(i,j); // now form the tangent: [K] = alpha * [C]^t[C] // *(tang) = (*C)^(*C); // *(tang) *= alpha; // THIS IS A WORKAROUND UNTIL WE GET addMatrixTransposeProduct() IN // THE Matrix CLASS OR UNROLL THIS COMPUTATION int rows = C->noRows(); int cols = C->noCols(); Matrix CT(cols,rows); const Matrix &Cref = *C; // Fill in the transpose of C for (int k = 0; k < cols; k++) for (int l = 0; l < rows; l++) CT(k,l) = Cref(l,k); // Compute alpha*(C^*C) tang->addMatrixProduct(0.0, CT, Cref, alpha); }
void EllipsoidalIntegrator::updateQ( Tmatrix<double> C, Tmatrix<Interval> R ){ Tmatrix<double> CT(nx,nx); for( int i=0; i<nx; i++ ) for( int j=0;j<nx;j++) CT(i,j) = C(j,i); Q = C*Q*CT; double trQ = 0.0; for( int i=0; i<nx; i++ ) trQ += Q(i,i)/(Q(i,i)+1e-8); trQ = ::sqrt(trQ); Vector sqrR(nx); for( int i=0; i<nx; i++ ) sqrR(i) = acadoMax(::fabs(R(i).l()),::fabs(R(i).u()))/::sqrt(Q(i,i)+1e-8); double kappa = trQ; for( int i=0; i<nx; i++ ) kappa += sqrR(i); Q *= kappa/(trQ+EPS); for( int i=0; i<nx; i++ ){ double tmp = acadoMax(::fabs(R(i).l()),::fabs(R(i).u())); tmp *= ::sqrt(kappa/(sqrR(i)+EPS)); Q(i,i) += tmp*tmp+EPS; } }
RTPDestBox::RTPDestBox( QWidget *_parent, const char *_mux ) : VirtualDestBox( _parent ), mux( qfu(_mux) ) { QGridLayout *layout = new QGridLayout( this ); QLabel *rtpOutput = new QLabel( qtr( "This module outputs the transcoded stream to a network via RTP."), this ); layout->addWidget(rtpOutput, 0, 0, 1, -1); QLabel *RTPLabel = new QLabel( qtr("Address"), this ); RTPEdit = new QLineEdit(this); layout->addWidget(RTPLabel, 1, 0, 1, 1); layout->addWidget(RTPEdit, 1, 1, 1, 1); QLabel *RTPPortLabel = new QLabel( qtr("Base port"), this ); RTPPort = new QSpinBox(this); RTPPort->setMaximumSize(QSize(90, 16777215)); RTPPort->setAlignment(Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter); RTPPort->setMinimum(1); RTPPort->setMaximum(65535); RTPPort->setValue(5004); layout->addWidget(RTPPortLabel, 2, 0, 1, 1); layout->addWidget(RTPPort, 2, 1, 1, 1); QLabel *SAPNameLabel = new QLabel( qtr("Stream name"), this ); SAPName = new QLineEdit(this); layout->addWidget(SAPNameLabel, 3, 0, 1, 1); layout->addWidget(SAPName, 3, 1, 1, 1); CT( RTPEdit ); CS( RTPPort ); CT( SAPName ); }
std::ostream & operator<<(std::ostream & str, const DensePolynomial<D, CT> & polynomial) { bool first = true; for(int degree = D-1; degree >= 0; --degree) { // a non-zero coeff if(polynomial.coeffientAt(degree) != CT()) { if(first) first = false; else str << " + "; str << polynomial.coeffientAt(degree); if(degree != 0) str << "{x^" << degree << "}"; } } if(first) str << CT(); return str; }
/// @brief Gets a rotated angle in degrees. /// @details Example use: a character slowly aiming towards the mouse position. /// @param mStart Angle to start from. /// @param mEnd Target angle. /// @param mSpeed Rotation speed. /// @return Returns the rotated angle in degrees. template<typename T1, typename T2, typename T3> inline auto getRotatedDeg(const T1& mStart, const T2& mEnd, const T3& mSpeed) noexcept { using CT = Common<T1, T2, T3>; CT diff{getCycledValue(wrapDeg(mEnd) - wrapDeg(mStart), -CT(180), CT(180))}; if(diff < -mSpeed) return mStart - mSpeed; if(diff > mSpeed) return mStart + mSpeed; return mEnd; }
/** \brief Run an iteration of the tracker loop. Predict and correct, adjusting precision and stepsize as necessary. \return Success if the step was successful, and a non-success code if something went wrong, such as a linear algebra failure or AMP Criterion violation. */ SuccessCode TrackerIteration() const override { static_assert(std::is_same< typename Eigen::NumTraits<RT>::Real, typename Eigen::NumTraits<CT>::Real>::value, "underlying complex type and the type for comparisons must match"); this->NotifyObservers(NewStep<EmitterType >(*this)); Vec<CT>& predicted_space = std::get<Vec<CT> >(this->temporary_space_); // this will be populated in the Predict step Vec<CT>& current_space = std::get<Vec<CT> >(this->current_space_); // the thing we ultimately wish to update CT current_time = CT(this->current_time_); CT delta_t = CT(this->delta_t_); SuccessCode predictor_code = Predict(predicted_space, current_space, current_time, delta_t); if (predictor_code!=SuccessCode::Success) { this->NotifyObservers(FirstStepPredictorMatrixSolveFailure<EmitterType >(*this)); this->next_stepsize_ = this->stepping_config_.step_size_fail_factor*this->current_stepsize_; UpdateStepsize(); return predictor_code; } this->NotifyObservers(SuccessfulPredict<EmitterType , CT>(*this, predicted_space)); Vec<CT>& tentative_next_space = std::get<Vec<CT> >(this->tentative_space_); // this will be populated in the Correct step CT tentative_next_time = current_time + delta_t; SuccessCode corrector_code = Correct(tentative_next_space, predicted_space, tentative_next_time); if (corrector_code == SuccessCode::GoingToInfinity) { // there is no corrective action possible... return corrector_code; } else if (corrector_code!=SuccessCode::Success) { this->NotifyObservers(CorrectorMatrixSolveFailure<EmitterType >(*this)); this->next_stepsize_ = this->stepping_config_.step_size_fail_factor*this->current_stepsize_; UpdateStepsize(); return corrector_code; } this->NotifyObservers(SuccessfulCorrect<EmitterType , CT>(*this, tentative_next_space)); // copy the tentative vector into the current space vector; current_space = tentative_next_space; return SuccessCode::Success; }
MMSHDestBox::MMSHDestBox( QWidget *_parent ) : VirtualDestBox( _parent ) { QGridLayout *layout = new QGridLayout( this ); QLabel *mmshOutput = new QLabel( qtr( "This module outputs the transcoded stream to a network " "via the mms protocol." ), this ); layout->addWidget(mmshOutput, 0, 0, 1, -1); QLabel *MMSHLabel = new QLabel( qtr("Address"), this ); QLabel *MMSHPortLabel = new QLabel( qtr("Port"), this ); layout->addWidget(MMSHLabel, 1, 0, 1, 1); layout->addWidget(MMSHPortLabel, 2, 0, 1, 1); MMSHEdit = new QLineEdit(this); MMSHEdit->setText( "0.0.0.0" ); MMSHPort = new QSpinBox(this); MMSHPort->setMaximumSize(QSize(90, 16777215)); MMSHPort->setAlignment(Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter); MMSHPort->setMinimum(1); MMSHPort->setMaximum(65535); MMSHPort->setValue(8080); layout->addWidget(MMSHEdit, 1, 1, 1, 1); layout->addWidget(MMSHPort, 2, 1, 1, 1); CS( MMSHPort ); CT( MMSHEdit ); }
UDPDestBox::UDPDestBox( QWidget *_parent ) : VirtualDestBox( _parent ) { QGridLayout *layout = new QGridLayout( this ); QLabel *udpOutput = new QLabel( qtr( "This module outputs the transcoded stream to a network via UDP."), this ); layout->addWidget(udpOutput, 0, 0, 1, -1); QLabel *UDPLabel = new QLabel( qtr("Address"), this ); QLabel *UDPPortLabel = new QLabel( qtr("Port"), this ); layout->addWidget(UDPLabel, 1, 0, 1, 1); layout->addWidget(UDPPortLabel, 2, 0, 1, 1); UDPEdit = new QLineEdit(this); UDPPort = new QSpinBox(this); UDPPort->setMaximumSize(QSize(90, 16777215)); UDPPort->setAlignment(Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter); UDPPort->setMinimum(1); UDPPort->setMaximum(65535); UDPPort->setValue(1234); layout->addWidget(UDPEdit, 1, 1, 1, 1); layout->addWidget(UDPPort, 2, 1, 1, 1); CS( UDPPort ); CT( UDPEdit ); }
CT test_vrt_lon_sph(CT lon1r, CT lat1r, CT lon2r, CT lat2r) { CT a1 = bg::formula::spherical_azimuth(lon1r, lat1r, lon2r, lat2r); typedef bg::model::point<CT, 2, bg::cs::spherical_equatorial<bg::radian> > point; bg::model::segment<point> segment(point(lon1r, lat1r), point(lon2r, lat2r)); bg::model::box<point> box; bg::envelope(segment, box); CT vertex_lat; CT lat_sum = lat1r + lat2r; if (lat_sum > CT(0)) { vertex_lat = bg::get_as_radian<bg::max_corner, 1>(box); } else { vertex_lat = bg::get_as_radian<bg::min_corner, 1>(box); } bg::strategy::azimuth::spherical<> azimuth; return bg::formula::vertex_longitude <CT, bg::spherical_equatorial_tag>:: apply(lon1r, lat1r, lon2r, lat2r, vertex_lat, a1, azimuth); }
RTSPDestBox::RTSPDestBox( QWidget *_parent ) : VirtualDestBox( _parent ) { QGridLayout *layout = new QGridLayout( this ); QLabel *rtspOutput = new QLabel( qtr( "This module outputs the transcoded stream to a network via " "RTSP." ), this ); layout->addWidget( rtspOutput, 0, 0, 1, -1 ); QLabel *RTSPLabel = new QLabel( qtr("Path"), this ); QLabel *RTSPPortLabel = new QLabel( qtr("Port"), this ); layout->addWidget( RTSPLabel, 2, 0, 1, 1 ); layout->addWidget( RTSPPortLabel, 1, 0, 1, 1 ); RTSPEdit = new QLineEdit( this ); RTSPEdit->setText( "/" ); RTSPPort = new QSpinBox( this ); RTSPPort->setMaximumSize( QSize( 90, 16777215 ) ); RTSPPort->setAlignment( Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter ); RTSPPort->setMinimum( 1 ); RTSPPort->setMaximum( 65535 ); RTSPPort->setValue( 8554 ); layout->addWidget( RTSPEdit, 2, 1, 1, 1 ); layout->addWidget( RTSPPort, 1, 1, 1, 1 ); CS( RTSPPort ); CT( RTSPEdit ); }
/* FileDest Box */ FileDestBox::FileDestBox( QWidget *_parent, intf_thread_t * _p_intf ) : VirtualDestBox( _parent ) { p_intf = _p_intf; QPushButton *fileSelectButton; QGridLayout *layout = new QGridLayout( this ); QLabel *fileOutput = new QLabel( qtr( "This module writes the transcoded stream to a file."), this ); layout->addWidget(fileOutput, 0, 0, 1, -1); QLabel *fileLabel = new QLabel( qtr( "Filename"), this ); layout->addWidget(fileLabel, 1, 0, 1, 1); fileEdit = new QLineEdit(this); layout->addWidget(fileEdit, 1, 4, 1, 1); fileSelectButton = new QPushButton( qtr( "Browse..." ), this ); QSizePolicy sizePolicy(QSizePolicy::Maximum, QSizePolicy::Fixed); fileSelectButton->setSizePolicy(sizePolicy); layout->addWidget(fileSelectButton, 1, 5, 1, 1); CT( fileEdit ); BUTTONACT( fileSelectButton, fileBrowse() ); }
void ada_read_sys(PolySys& sys) { int fail; std::cout << "testing reading and writing a system" << std::endl; //fail = syscon_read_system(); std::cout << "the system is .." << std::endl; fail = syscon_write_system(); // Get variable names int s_dim = 80; char *s = (char*) calloc(80,sizeof(char)); fail = syscon_string_of_symbols(&s_dim, s); string* x_names; var_name(s, s_dim, x_names); int dim = 4; int i = 1; double c[2]; int d[dim]; int n_eq = 0; fail = syscon_number_of_polynomials(&n_eq); sys.n_eq = n_eq; sys.dim = dim; sys.eq_space = new PolyEq[n_eq]; sys.pos_var = x_names; PolyEq* tmp_eq = sys.eq_space; for(int i=1; i<n_eq+1; i++){ int nt; fail = syscon_number_of_terms(i,&nt); //std::cout << " #terms in polynomial " << i << " : " << nt << std::endl; tmp_eq->n_mon = nt; tmp_eq->dim = dim; for(int j=1; j<=nt; j++) { fail = syscon_retrieve_term(i,j,dim,d,c); //std::cout << c[0] << " " << c[1] << std::endl; //for (int k=0; k<n; k++) std::cout << " " << d[k]; //std::cout << std::endl; bool constant_term = true; for (int k=0; k<dim; k++){ if(d[k]!=0){ constant_term = false; } } if(constant_term==true){ tmp_eq->n_mon--; tmp_eq->constant += CT(c[0],c[1]); //std::cout << "constant " << c[0] \ << " " << c[1] << std::endl; } else{
ICEDestBox::ICEDestBox( QWidget *_parent ) : VirtualDestBox( _parent ) { QGridLayout *layout = new QGridLayout( this ); QLabel *iceOutput = new QLabel( qtr( "This module outputs the transcoded stream to an Icecast server."), this ); layout->addWidget(iceOutput, 0, 0, 1, -1); QLabel *ICELabel = new QLabel( qtr("Address"), this ); QLabel *ICEPortLabel = new QLabel( qtr("Port"), this ); layout->addWidget(ICELabel, 1, 0, 1, 1); layout->addWidget(ICEPortLabel, 2, 0, 1, 1); ICEEdit = new QLineEdit(this); ICEPort = new QSpinBox(this); ICEPort->setMaximumSize(QSize(90, 16777215)); ICEPort->setAlignment(Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter); ICEPort->setMinimum(1); ICEPort->setMaximum(65535); ICEPort->setValue(8000); layout->addWidget(ICEEdit, 1, 1, 1, 1); layout->addWidget(ICEPort, 2, 1, 1, 1); QLabel *IcecastMountpointLabel = new QLabel( qtr( "Mount Point" ), this ); QLabel *IcecastNameLabel = new QLabel( qtr( "Login:pass" ), this ); ICEMountEdit = new QLineEdit( this ); ICEPassEdit = new QLineEdit( this ); layout->addWidget(IcecastMountpointLabel, 3, 0, 1, 1 ); layout->addWidget(ICEMountEdit, 3, 1, 1, -1 ); layout->addWidget(IcecastNameLabel, 4, 0, 1, 1 ); layout->addWidget(ICEPassEdit, 4, 1, 1, -1 ); CS( ICEPort ); CT( ICEEdit ); CT( ICEMountEdit ); CT( ICEPassEdit ); }
Logic al1_bldrep(int cos) { int low, slow, col, scol, i; repsiz = 0; if (cos <= 1 || cos >= nextdf || COL1(cos) < 0) { return(TRUE); } low = slow = cos; while (low > 1) { scol = 0; for (col = 1; col <= ncol; col++) { if ((i = CT(low,col)) > 0) { if (i < slow) /* Lower row number found */ { slow = i; scol = col; } else if (i == slow && scol != 0) /* Same row & slow < low */ { /* ... earlier column? */ if (invcol[col] < invcol[scol]) { scol = col; } } } } /* Add it (increases repsiz); note the column inversion! Failure sets repsiz to 0 */ if (!al1_addrep(invcol[scol])) { return(FALSE); } low = slow; } /* Reverse representative (note: inversion already done) */ for (i = 1; i <= repsiz/2; i++) { col = currrep[i-1]; scol = currrep[repsiz-i]; currrep[i-1] = scol; currrep[repsiz-i] = col; } return(TRUE); }
extern "C" int MAMain() { InitConsole(); gConsoleLogging = 1; static const char data[] = "userid=joe&password=guessme"; int size = sizeof(data) - 1; char buffer[64]; printf("HTTP POST test\n"); Handle http = maHttpCreate("http://msdev.mine.nu:8080/testing/posttest.php", HTTP_POST); CT(http); maHttpSetRequestHeader(http, "X-MoSync-test", "terue"); _itoa(size, buffer, 10); maHttpSetRequestHeader(http, "Content-Length", buffer); maHttpSetRequestHeader(http, "Content-Type", "application/x-www-form-urlencoded"); printf("write\n"); maConnWrite(http, data, size); if(waitConn(http) < 0) Freeze(0); printf("finish\n"); maHttpFinish(http); if(waitConn(http) < 0) Freeze(0); int res = maHttpGetResponseHeader(http, "Content-Length", buffer, sizeof(buffer)); if(res <= 0 || res >= (int)sizeof(buffer)) { printf("CLerr %i\n", res); Freeze(0); } printf("Content-Length: %s\n", buffer); res = 0; while(true) { maConnRead(http, buffer, sizeof(buffer)-1); size = waitConn(http); if(size < 0) break; res += size; buffer[size] = 0; printf(buffer); } printf("Bytes read: %i\n", res); Freeze(0); return 0; }
int al1_trrep(int cos) { int i; if (repsiz == 0) { return(0); } for (i = 0; i < repsiz; i++) { if ((COL1(cos) < 0) || ((cos = CT(cos,currrep[i])) == 0)) { return(0); } } return(cos); }
int main() { initComm(); Scene scene; SceneConfig::enableIK = false; PR2Manager pr2m(scene); KinectTransformer kinectTrans(pr2m.pr2->robot); kinectTrans.calibrate(btTransform::getIdentity()); CoordinateTransformer CT(kinectTrans.getWFC()); FakeKinect fk(scene.env->osg, CT.worldFromCamEigen); scene.startViewer(); scene.step(0); while (true) { fk.sendMessage(); } }
CT test_vrt_lon_geo(CT lon1r, CT lat1r, CT lon2r, CT lat2r) { // WGS84 bg::srs::spheroid<CT> spheroid(6378137.0, 6356752.3142451793); typedef FormulaPolicy<CT, false, true, false, false, false> formula; CT a1 = formula::apply(lon1r, lat1r, lon2r, lat2r, spheroid).azimuth; typedef bg::model::point<CT, 2, bg::cs::geographic<bg::radian> > geo_point; bg::model::segment<geo_point> segment(geo_point(lon1r, lat1r), geo_point(lon2r, lat2r)); bg::model::box<geo_point> box; bg::envelope(segment, box); CT vertex_lat; CT lat_sum = lat1r + lat2r; if (lat_sum > CT(0)) { vertex_lat = bg::get_as_radian<bg::max_corner, 1>(box); } else { vertex_lat = bg::get_as_radian<bg::min_corner, 1>(box); } bg::strategy::azimuth::geographic<> azimuth_geographic; return bg::formula::vertex_longitude <CT, bg::geographic_tag>::apply(lon1r, lat1r, lon2r, lat2r, vertex_lat, a1, azimuth_geographic); }
Logic al2_normal(int cos) { int s, *beg, *end, *pi, next; if (cos < 1 || cos >= nextdf || COL1(cos) < 0) { return(FALSE); } for (s = 1; s <= nsgpg; s++) { beg = &(subggen[subgindex[s]]); end = beg-1 + subglength[s]; next = cos; for (pi = beg; pi <= end; pi++) { if ((next = CT(next,*pi)) == 0 || COL1(next) < 0) { return(FALSE); } } if (next != cos) { return(FALSE); } } return(TRUE); }
HTTPDestBox::HTTPDestBox( QWidget *_parent ) : VirtualDestBox( _parent ) { label->setText( qtr( "This module outputs the transcoded stream to a network via HTTP.") ); QLabel *HTTPLabel = new QLabel( qtr("Path"), this ); QLabel *HTTPPortLabel = new QLabel( qtr("Port"), this ); layout->addWidget(HTTPLabel, 2, 0, 1, 1); layout->addWidget(HTTPPortLabel, 1, 0, 1, 1); HTTPEdit = new QLineEdit(this); HTTPEdit->setText( "/" ); HTTPPort = new QSpinBox(this); HTTPPort->setMaximumSize(QSize(90, 16777215)); HTTPPort->setAlignment(Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter); HTTPPort->setMinimum(1); HTTPPort->setMaximum(65535); HTTPPort->setValue(8080); layout->addWidget(HTTPEdit, 2, 1, 1, 1); layout->addWidget(HTTPPort, 1, 1, 1, 1); CS( HTTPPort ); CT( HTTPEdit ); }
inline void SymmLLC ( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C ) { #ifndef RELEASE PushCallStack("internal::SymmLLC"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T> ATL(g), ATR(g), A00(g), A01(g), A02(g), AColPan(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), ARowPan(g), A20(g), A21(g), A22(g); DistMatrix<T> BT(g), B0(g), BB(g), B1(g), B2(g); DistMatrix<T> CT(g), C0(g), CAbove(g), CB(g), C1(g), CBelow(g), C2(g); // Temporary distributions DistMatrix<T,MC, STAR> AColPan_MC_STAR(g); DistMatrix<T,STAR,MC > ARowPan_STAR_MC(g); DistMatrix<T,MR, STAR> B1Trans_MR_STAR(g); B1Trans_MR_STAR.AlignWith( C ); // Start the algorithm Scale( beta, C ); LockedPartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionDown ( B, BT, BB, 0 ); PartitionDown ( C, CT, CB, 0 ); while( CB.Height() > 0 ) { LockedRepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionDown ( BT, B0, /**/ /**/ B1, BB, B2 ); RepartitionDown ( CT, C0, /**/ /**/ C1, CB, C2 ); LockedView1x2( ARowPan, A10, A11 ); LockedView2x1 ( AColPan, A11, A21 ); View2x1 ( CAbove, C0, C1 ); View2x1 ( CBelow, C1, C2 ); AColPan_MC_STAR.AlignWith( CBelow ); ARowPan_STAR_MC.AlignWith( CAbove ); //--------------------------------------------------------------------// AColPan_MC_STAR = AColPan; ARowPan_STAR_MC = ARowPan; MakeTrapezoidal( LEFT, LOWER, 0, AColPan_MC_STAR ); MakeTrapezoidal( RIGHT, LOWER, -1, ARowPan_STAR_MC ); B1Trans_MR_STAR.TransposeFrom( B1 ); LocalGemm ( NORMAL, TRANSPOSE, alpha, AColPan_MC_STAR, B1Trans_MR_STAR, T(1), CBelow ); LocalGemm ( TRANSPOSE, TRANSPOSE, alpha, ARowPan_STAR_MC, B1Trans_MR_STAR, T(1), CAbove ); //--------------------------------------------------------------------// AColPan_MC_STAR.FreeAlignments(); ARowPan_STAR_MC.FreeAlignments(); SlideLockedPartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionDown ( BT, B0, B1, /**/ /**/ BB, B2 ); SlidePartitionDown ( CT, C0, C1, /**/ /**/ CB, C2 ); } #ifndef RELEASE PopCallStack(); #endif }
void test_compiler_diffs(void) { mpdm_t r, cc, ac; int n; struct { wchar_t *code; int line; } compiler_tests[] = { // CT(L"local a = 1, b = 2, c, d = 3;"), // CT(L"local a = 1;"), // CT(L"local v; 1;"), // CT(L"local x, y, z; 2;"), CT(L"a |= 6;"), CT(L"a %= 6;"), CT(L"a /= 6;"), CT(L"a *= 6;"), CT(L"a -= 6;"), CT(L"a += 6;"), CT(L"for (n = 0; n < 100; n = n + 1) { print('kill'); } 1234;"), CT(L"for (;;) { print('kill'); } 1234;"), CT(L"foreach (v, i, [1, 2, 3]) { print(e); } 666;"), CT(L"eol = driver == 'win32' && 'crlf' || 'lf';"), CT(L"255 $ '%x';"), CT(L"foreach (e, [1, 2, 3]) { print(e); } 666;"), // CT(L"global v; 1;"), // CT(L"global x, y, z; 2;"), CT(L"sub pi { 3.1416; } 100;"), CT(L"sub pi () { 3.1416; } 200;"), CT(L"sub by2(v) { v * 2; } 250;"), CT(L"sub mul(v1, v2) { v1 * v2; } 300;"), CT(L"mul = sub (v1, v2) { v1 * v2; }; 123;"), CT(L"by2 = sub (e) { e * 2; }; 100;"), CT(L"pi = sub { 3.14; }; 6;"), CT(L"f->write('hi', string(1 + 3), eol); 1;"), CT(L"f->read(); 1;"), CT(L"while (1) 2;"), CT(L"while (1) { 2; 3; }"), CT(L"while (a < 10) { a = a + 1; }"), CT(L"if (a == 1) { b = 2 + 4; c = 3 * 2; } else { d = 3; e = d / 2; }"), CT(L"if (1) 2; else 3;"), CT(L"if (1) { 2; 3; }"), CT(L"if (1) { 2; }"), CT(L"if (1) 2;"), CT(L"if (2 + 3) 4 + 5;"), CT(L"if (a == 1) b = 2;"), CT(L"list[0];"), CT(L"list[1] = 1;"), CT(L"MPSL['OPCODE'];"), CT(L"q = 1 + 2 * 3;"), CT(L"q.q = 1 * 2 + 3; q2 = [];"), CT(L"q = 100;"), CT(L"MPSL.CORE.random;"), CT(L"1 + 2; [1, 2] ; {};"), CT(L"a + 1;"), CT(L"1 * (2 + 3);"), CT(L"{};"), CT(L"{a: 1};"), CT(L"{c: 2, d: 3};"), CT(L"{'e' => 4, 'f' => 5};"), CT(L"{g: 6, 'h' => 7};"), CT(L"{a: 2 * 3, b: 5 + 8};"), CT(L"[];"), CT(L"[1, 2, 3];"), CT(L"[1, 2 + 3, 4];"), CT(L"MPSL.CORE.random();"), CT(L"bool(1, 2) + 666;"), CT(L"1 ; 2 ; 3;"), CT(L"random();"), CT(L"5 != '5';"), CT(L"10 == '10';"), CT(L"10 > 1 + 2;"), CT(L"1 + 2 * 3;"), CT(L"1 * 2 + 3;"), CT(L"1.2 + 3.4;"), CT(L"/* test test */"), CT(L"1;"), CT(L"!1;"), CT(L"'abcde';"), CT(L"3.14;"), CT(NULL) }; printf("\nComparing the output of the two compilers:\n"); r = mpdm_get_wcs(mpdm_root(), L"MPSL"); cc = mpdm_get_wcs(r, L"c_compiler"); ac = mpdm_get_wcs(r, L"a_compiler"); for (n = 0; compiler_tests[n].code; n++) { mpdm_t c, x1, x2, d1, d2; c = MPDM_S(compiler_tests[n].code); mpdm_ref(c); mpdm_set_wcs(r, cc, L"compiler"); x1 = mpsl_compile(c, NULL); d1 = mpsl_decompile(x1); mpdm_set_wcs(r, ac, L"compiler"); x2 = mpsl_compile(c, NULL); d2 = mpsl_decompile(x2); /* printf("%ls\n", mpdm_string(d1)); printf("%ls\n", mpdm_string(d2));*/ _do_test("compiler output equal", mpdm_cmp(d1, d2) == 0, compiler_tests[n].line); mpdm_unref(c); } mpdm_set_wcs(r, cc, L"compiler"); }
explicit CT(T&&... t) { // use explicit to prevent unexpected type conversion CT(std::forward<T>(t)...); // forward<T> == static_cast<T&&> }
inline void GemmTTB ( Orientation orientationOfA, Orientation orientationOfB, T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C ) { #ifndef RELEASE PushCallStack("internal::GemmTTB"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); if( orientationOfA == NORMAL || orientationOfB == NORMAL ) throw std::logic_error ("GemmTTB expects A and B to be (Conjugate)Transposed"); if( A.Width() != C.Height() || B.Height() != C.Width() || A.Height() != B.Width() ) { std::ostringstream msg; msg << "Nonconformal GemmTTB: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B ~ " << B.Height() << " x " << B.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T> AL(g), AR(g), A0(g), A1(g), A2(g); DistMatrix<T> CT(g), C0(g), CB(g), C1(g), C2(g); // Temporary distributions DistMatrix<T,VR, STAR> A1_VR_STAR(g); DistMatrix<T,STAR,MR > A1AdjOrTrans_STAR_MR(g); DistMatrix<T,STAR,MC > D1_STAR_MC(g); DistMatrix<T,MR, MC > D1_MR_MC(g); DistMatrix<T> D1(g); A1_VR_STAR.AlignWith( B ); A1AdjOrTrans_STAR_MR.AlignWith( B ); D1_STAR_MC.AlignWith( B ); // Start the algorithm Scale( beta, C ); LockedPartitionRight( A, AL, AR, 0 ); PartitionDown ( C, CT, CB, 0 ); while( AR.Width() > 0 ) { LockedRepartitionRight ( AL, /**/ AR, A0, /**/ A1, A2 ); RepartitionDown ( CT, C0, /**/ /**/ C1, CB, C2 ); D1.AlignWith( C1 ); Zeros( C1.Height(), C1.Width(), D1_STAR_MC ); //--------------------------------------------------------------------// A1_VR_STAR = A1; if( orientationOfA == ADJOINT ) A1AdjOrTrans_STAR_MR.AdjointFrom( A1_VR_STAR ); else A1AdjOrTrans_STAR_MR.TransposeFrom( A1_VR_STAR ); // D1[*,MC] := alpha (A1[MR,*])^[T/H] (B[MC,MR])^[T/H] // = alpha (A1^[T/H])[*,MR] (B^[T/H])[MR,MC] LocalGemm ( NORMAL, orientationOfB, alpha, A1AdjOrTrans_STAR_MR, B, T(0), D1_STAR_MC ); // C1[MC,MR] += scattered & transposed D1[*,MC] summed over grid rows D1_MR_MC.SumScatterFrom( D1_STAR_MC ); D1 = D1_MR_MC; Axpy( T(1), D1, C1 ); //--------------------------------------------------------------------// D1.FreeAlignments(); SlideLockedPartitionRight ( AL, /**/ AR, A0, A1, /**/ A2 ); SlidePartitionDown ( CT, C0, C1, /**/ /**/ CB, C2 ); } #ifndef RELEASE PopCallStack(); #endif }
void YieldStrength<EvalT, Traits>:: evaluateFields(typename Traits::EvalData workset) { bool print = false; //if (typeid(ScalarT) == typeid(RealType)) print = true; if (print) std::cout << " *** YieldStrength *** " << std::endl; int numCells = workset.numCells; if (is_constant) { for (int cell=0; cell < numCells; ++cell) { for (int qp=0; qp < numQPs; ++qp) { yieldStrength(cell,qp) = constant_value; } } } else { for (int cell=0; cell < numCells; ++cell) { for (int qp=0; qp < numQPs; ++qp) { Teuchos::Array<MeshScalarT> point(numDims); for (int i=0; i<numDims; i++) point[i] = Sacado::ScalarValue<MeshScalarT>::eval(coordVec(cell,qp,i)); yieldStrength(cell,qp) = exp_rf_kl->evaluate(point, rv); } } } if (isThermoElastic) { for (int cell=0; cell < numCells; ++cell) { for (int qp=0; qp < numQPs; ++qp) { yieldStrength(cell,qp) -= dYdT_value * (Temperature(cell,qp) - refTemp); if (print) { std::cout << " Y : " << yieldStrength(cell,qp) << std::endl; std::cout << " temp: " << Temperature(cell,qp) << std::endl; std::cout << " dYdT: " << dYdT_value << std::endl; std::cout << " refT: " << refTemp << std::endl; } } } } if (isDiffuseDeformation) { Albany::MDArray CLold = (*workset.stateArrayPtr)[CLname]; for (int cell=0; cell < numCells; ++cell) { for (int qp=0; qp < numQPs; ++qp) { // yieldStrength(cell,qp) = constant_value*( 1.0 + (zeta-1.0)*CL(cell,qp) ); yieldStrength(cell,qp) -= constant_value*(zeta-1.0)*(CL(cell,qp) -CLold(cell,qp) ); if (print) { std::cout << " Y : " << yieldStrength(cell,qp) << std::endl; std::cout << " CT : " << CT(cell,qp) << std::endl; std::cout << " zeta : " << zeta << std::endl; } } } } }
void al2_normcl(Logic build) { int col, first, next, s, *beg, *end, *pi, j,k,l; Logic found; Wlist *list; Wlelt *lelt; found = FALSE; list = NULL; for (col = 1; col <= ncol; col++) /* all `significant' gen'rs */ { if ((first = CT(1,invcol[col])) == 0 || COL1(first) < 0) { continue; } /* trace incomplete, next col */ for (s = 1; s <= nsgpg; s++) /* all (original) subgrp gens */ { beg = &subggen[subgindex[s]]; end = beg-1 + subglength[s]; next = first; for (pi = beg; pi <= end; pi++) { if ((next = CT(next,*pi)) == 0 || COL1(next) < 0) { goto next_s; } /* trace incomplete, next gen */ } if (next == first) { continue; } /* closes, next gen */ /* At this point, we know that the trace of s^col completes but does not get back to 1. So we have a conjugate that's not in the subgrp. */ found = TRUE; /* at least 1 conjugate not in sgp */ k = colgen[col]; /* (signed) generator number */ if (!galpha) { fprintf(fop, "Conjugate by grp gen'r \"%d\" of", k); fprintf(fop, " subgrp gen'r \""); for (pi = beg; pi <= end; pi++) { fprintf(fop, " %d", colgen[*pi]); } } else { fprintf(fop, "Conjugate by grp gen'r \"%c\" of", (k > 0) ? algen[k] : toupper(algen[-k])); fprintf(fop, " subgrp gen'r \""); for (pi = beg; pi <= end; pi++) { if ((l = colgen[*pi]) > 0) { fprintf(fop, "%c", algen[l]); } else { fprintf(fop, "%c", toupper(algen[-l])); } } } fprintf(fop, "\" not in subgrp\n"); if (build) { if (list == NULL) { if ((list = al1_newwl()) == NULL) { al2_continue("unable to create new subgrp gen'r list"); } } if ((lelt = al1_newelt()) == NULL) { al1_emptywl(list); free(list); al2_continue("unable to create subgrp gen'r list elt"); } lelt->len = subglength[s] + 2; /* gen'r + col/col^-1 */ if ((lelt->word = (int*)malloc((lelt->len+1)*sizeof(int))) == NULL) { al1_emptywl(list); free(list); free(lelt); al2_continue("unable to create subgrp gen'r list elt word"); } lelt->exp = 1; lelt->word[1] = -k; for (pi = beg, j = 2; pi <= end; pi++, j++) { lelt->word[j] = colgen[*pi]; } lelt->word[lelt->len] = k; al1_addwl(list,lelt); } next_s: ; } } if (!found) { fprintf(fop, "* All (traceable) conjugates in subgroup\n"); } /* If list != NULL then we must have created a list with at least one new subgrp gen'r; so found is T & genlst is non-NULL/non-empty! Append the list of new gen'rs & update the enumeration status. */ if (list != NULL) { al1_concatwl(genlst,list); nsgpg = genlst->len; okcont = FALSE; tabinfo = tabindex = FALSE; fprintf(fop, "* Subgroup generators have been augmented\n"); } }
void al2_cycles(void) { int i, j, k, kn, t, length; Logic id; for (j = 1; j <= ndgen; j++) { k = gencol[ndgen+j]; /* find the column k for generator j */ id = TRUE; /* assume action is the identity */ if (!galpha) /* print lhs & record its length */ { fprintf(fop, "%d = ", j); length = al2_outlen(j) + 3; } else { fprintf(fop, "%c = ", algen[j]); length = 4; } for (i = 1; i <= nalive; i++) { if (CT(i, k) == i) /* skip if i is a one-cycle */ { CT(i, k) = -i; continue; } /* have we used coset i in previous cycle? */ if (CT((kn = i), k) < 0) { continue; } id = FALSE; /* action of generator not identity */ /* no, trace out this cycle */ length += al2_outlen(kn) + 1; if (length < LLL) { fprintf(fop, "(%d", kn); } else { fprintf(fop, "\n (%d", kn); length = al2_outlen(kn) + 3; } t = CT(kn, k); CT(kn, k) = -t; /* mark this coset as used */ kn = t; while (CT(kn,k) > 0) { length += al2_outlen(kn) + 1; if (length < LLL) { fprintf(fop, ",%d", kn); } else { fprintf(fop, ",\n %d", kn); length = al2_outlen(kn) + 2; } t = CT(kn, k); CT(kn, k) = -t; kn = t; } /* we have reached the end of the cycle */ fprintf(fop, ")"); length++; } if (id) { fprintf(fop, "identity\n"); } else { fprintf(fop, "\n"); } /* change all the (negative) values in this column back to positive */ for (i = 1; i <= nalive; i++) { CT(i, k) = -CT(i, k); } } }
T1 eval_test_classic ( Workspace& workspace_cpu, CPUInstHom& cpu_inst_hom, CT* sol0, CT t, PolySys& Classic_Sys, int n_path ) { struct timeval start, end; long seconds, useconds; double timeMS_classic; double timeMS_cpu; double timeMS_gpu; int n_eq = cpu_inst_hom.n_eq; int dim = cpu_inst_hom.dim; if(n_path<=0) { std::cout << "Default number of path" << std::endl; n_path = 1000; } int n_predictor = workspace_cpu.n_predictor; std::cout << "n_path = " << n_path << std::endl; CT* sol = new CT[n_path*dim*(n_predictor+1)]; CT* sol_tmp = sol; for(int sol_idx=0; sol_idx<n_path; sol_idx++) { for(int pred_idx=0; pred_idx<n_predictor+1; pred_idx++) { for(int x_idx=0; x_idx<dim; x_idx++) { int r = rand(); T1 tmp = T1(r); // sol_tmp[x_idx] = CT(sin(tmp),cos(tmp)); sol_tmp[x_idx] = CT(x_idx+1,0.0); // sol_tmp[x_idx] = CT(1,0.0); } sol_tmp += dim; } } CT* t_mult = new CT[n_path*(n_predictor+1)]; for(int sol_idx=0; sol_idx<n_path*(n_predictor+1); sol_idx++) { double r = 1.0*rand()/RAND_MAX; // t_mult[sol_idx] = CT(r,0.0); t_mult[sol_idx] = CT(1,0.0); } int* x_t_idx = new int[n_path]; for(int sol_idx=0; sol_idx<n_path; sol_idx++) { x_t_idx[sol_idx] = rand()%(n_predictor+1); } std::cout << "----- CPU Evaluation ----" << std::endl; Workspace* workspace_cpu_all = new Workspace[n_path]; for(int sol_idx=0; sol_idx<n_path; sol_idx++) { cpu_inst_hom.init_workspace(workspace_cpu_all[sol_idx]); } gettimeofday(&start, NULL); for(int sol_idx=0; sol_idx<n_path; sol_idx++) { CT* tmp_sol = sol+sol_idx*dim*(n_predictor+1)+dim*x_t_idx[sol_idx]; CT* t_tmp = t_mult+sol_idx*(n_predictor+1)+x_t_idx[sol_idx]; cpu_inst_hom.eval(workspace_cpu_all[sol_idx], tmp_sol, *t_tmp); } gettimeofday(&end, NULL); seconds = end.tv_sec - start.tv_sec; useconds = end.tv_usec - start.tv_usec; timeMS_cpu = seconds*1000 + useconds/1000.0; bool classic_check = false; if(classic_check) { std::cout << "----- Class Evaluation ----" << std::endl; CT* workspace_classic = new CT[n_path*n_eq*(dim+1)]; CT** f_val = new CT*[n_path]; CT* tmp_workspace = workspace_classic; CT*** deri_val = new CT**[n_path]; CT** deri_space = new CT*[n_path*n_eq]; for(int sol_idx=0; sol_idx<n_path; sol_idx++) { f_val[sol_idx] = tmp_workspace; tmp_workspace += n_eq; deri_val[sol_idx] = deri_space + sol_idx*n_eq; for(int i=0; i<n_eq; i++) { deri_val[sol_idx][i] = tmp_workspace; tmp_workspace += dim; } } gettimeofday(&start, NULL); for(int sol_idx=0; sol_idx<n_path; sol_idx++) { CT* tmp_sol = sol+sol_idx*dim*(n_predictor+1)+dim*x_t_idx[sol_idx]; Classic_Sys.eval(tmp_sol, f_val[sol_idx], deri_val[sol_idx]); } gettimeofday(&end, NULL); seconds = end.tv_sec - start.tv_sec; useconds = end.tv_usec - start.tv_usec; timeMS_classic = seconds*1000 + useconds/1000.0; // Check two CPU method std::cout << "----- Classic Evaluation Check ----" << std::endl; for(int sol_idx=0; sol_idx<n_path; sol_idx++) { err_check_class_workspace(deri_val[sol_idx],f_val[sol_idx], workspace_cpu_all[sol_idx].matrix, n_eq, dim); } delete[] workspace_classic; delete[] f_val; delete[] deri_val; delete[] deri_space; } std::cout << "----- GPU Evaluation ----" << std::endl; CT** gpu_workspace_all; CT** gpu_matrix_all; gettimeofday(&start, NULL); GPU_Eval(cpu_inst_hom,sol,t_mult,gpu_workspace_all,gpu_matrix_all,n_path, x_t_idx, n_predictor); gettimeofday(&end, NULL); seconds = end.tv_sec - start.tv_sec; useconds = end.tv_usec - start.tv_usec; timeMS_gpu = seconds*1000 + useconds/1000.0; std::cout << "----- CPU vs GPU Evaluation Check----" << std::endl; T1 err = 0; for(int sol_idx=0; sol_idx<n_path; sol_idx++) { // std::cout << "sol_idx = " << sol_idx << std::endl; T1 err_tmp = eval_compare(cpu_inst_hom,gpu_workspace_all[sol_idx], gpu_matrix_all[sol_idx],workspace_cpu_all[sol_idx].all, workspace_cpu_all[sol_idx].matrix); if(err_tmp > err) { err = err_tmp; } // std::cout << "err = " << err_tmp << std::endl; } delete[] x_t_idx; delete[] t_mult; delete[] sol; for(int sol_idx=0; sol_idx<n_path; sol_idx++) { delete[] gpu_workspace_all[sol_idx]; delete[] gpu_matrix_all[sol_idx]; } delete[] gpu_workspace_all; delete[] gpu_matrix_all; std::cout << "err = " << err << std::endl; std::cout << "Classic Eval time " << timeMS_classic << std::endl; std::cout << "CPU Eval time " << timeMS_cpu << std::endl; std::cout << "GPU Eval time " << timeMS_gpu << std::endl; return err; }
inline void GemmNNB ( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C ) { #ifndef RELEASE PushCallStack("internal::GemmNNB"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); if( A.Height() != C.Height() || B.Width() != C.Width() || A.Width() != B.Height() ) { std::ostringstream msg; msg << "Nonconformal GemmNNB: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B ~ " << B.Height() << " x " << B.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T> AT(g), A0(g), AB(g), A1(g), A2(g); DistMatrix<T> CT(g), C0(g), CB(g), C1(g), C2(g); // Temporary distributions DistMatrix<T,STAR,MC> A1_STAR_MC(g); DistMatrix<T,MR,STAR> D1Trans_MR_STAR(g); A1_STAR_MC.AlignWith( B ); D1Trans_MR_STAR.AlignWith( B ); // Start the algorithm Scale( beta, C ); LockedPartitionDown ( A, AT, AB, 0 ); PartitionDown ( C, CT, CB, 0 ); while( AB.Height() > 0 ) { LockedRepartitionDown ( AT, A0, /**/ /**/ A1, AB, A2 ); RepartitionDown ( CT, C0, /**/ /**/ C1, CB, C2 ); Zeros( C1.Width(), C1.Height(), D1Trans_MR_STAR ); //--------------------------------------------------------------------// A1_STAR_MC = A1; // A1[*,MC] <- A1[MC,MR] // D1^T[MR,* ] := alpha B^T[MR,MC] A1^T[MC,* ] LocalGemm ( TRANSPOSE, TRANSPOSE, alpha, B, A1_STAR_MC, T(0), D1Trans_MR_STAR ); C1.TransposeSumScatterUpdate( T(1), D1Trans_MR_STAR ); //--------------------------------------------------------------------// SlideLockedPartitionDown ( AT, A0, A1, /**/ /**/ AB, A2 ); SlidePartitionDown ( CT, C0, C1, /**/ /**/ CB, C2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void GemmNNDot ( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C ) { #ifndef RELEASE PushCallStack("internal::GemmNNDot"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); if( A.Height() != C.Height() || B.Width() != C.Width() || A.Width() != B.Height() ) { std::ostringstream msg; msg << "Nonconformal GemmNNDot: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B ~ " << B.Height() << " x " << B.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = A.Grid(); if( A.Height() > B.Width() ) { // Matrix views DistMatrix<T> AT(g), AB(g), A0(g), A1(g), A2(g); DistMatrix<T> BL(g), B0(g), BR(g), B1(g), B2(g); DistMatrix<T> CT(g), C0(g), C1L(g), C1R(g), CB(g), C1(g), C10(g), C11(g), C12(g), C2(g); // Temporary distributions DistMatrix<T,STAR,VC> A1_STAR_VC(g); DistMatrix<T,VC,STAR> B1_VC_STAR(g); DistMatrix<T,STAR,STAR> C11_STAR_STAR(g); // Star the algorithm Scale( beta, C ); LockedPartitionDown ( A, AT, AB, 0 ); PartitionDown ( C, CT, CB, 0 ); while( AB.Height() > 0 ) { LockedRepartitionDown ( AT, A0, /**/ /**/ A1, AB, A2 ); RepartitionDown ( CT, C0, /**/ /**/ C1, CB, C2 ); A1_STAR_VC = A1; B1_VC_STAR.AlignWith( A1_STAR_VC ); LockedPartitionRight( B, BL, BR, 0 ); PartitionRight( C1, C1L, C1R, 0 ); while( BR.Width() > 0 ) { LockedRepartitionRight ( BL, /**/ BR, B0, /**/ B1, B2 ); RepartitionRight ( C1L, /**/ C1R, C10, /**/ C11, C12 ); Zeros( C11.Height(), C11.Width(), C11_STAR_STAR ); //------------------------------------------------------------// B1_VC_STAR = B1; LocalGemm ( NORMAL, NORMAL, alpha, A1_STAR_VC, B1_VC_STAR, T(0), C11_STAR_STAR ); C11.SumScatterUpdate( T(1), C11_STAR_STAR ); //------------------------------------------------------------// SlideLockedPartitionRight ( BL, /**/ BR, B0, B1, /**/ B2 ); SlidePartitionRight ( C1L, /**/ C1R, C10, C11, /**/ C12 ); } B1_VC_STAR.FreeAlignments(); SlideLockedPartitionDown ( AT, A0, A1, /**/ /**/ AB, A2 ); SlidePartitionDown ( CT, C0, C1, /**/ /**/ CB, C2 ); } } else { // Matrix views DistMatrix<T> AT(g), AB(g), A0(g), A1(g), A2(g); DistMatrix<T> BL(g), B0(g), BR(g), B1(g), B2(g); DistMatrix<T> CL(g), CR(g), C1T(g), C01(g), C0(g), C1(g), C2(g), C1B(g), C11(g), C21(g); // Temporary distributions DistMatrix<T,STAR,VR> A1_STAR_VR(g); DistMatrix<T,VR,STAR> B1_VR_STAR(g); DistMatrix<T,STAR,STAR> C11_STAR_STAR(g); // Star the algorithm Scale( beta, C ); LockedPartitionRight( B, BL, BR, 0 ); PartitionRight( C, CL, CR, 0 ); while( BR.Width() > 0 ) { LockedRepartitionRight ( BL, /**/ BR, B0, /**/ B1, B2 ); RepartitionRight ( CL, /**/ CR, C0, /**/ C1, C2 ); B1_VR_STAR = B1; A1_STAR_VR.AlignWith( B1_VR_STAR ); LockedPartitionDown ( A, AT, AB, 0 ); PartitionDown ( C1, C1T, C1B, 0 ); while( AB.Height() > 0 ) { LockedRepartitionDown ( AT, A0, /**/ /**/ A1, AB, A2 ); RepartitionDown ( C1T, C01, /***/ /***/ C11, C1B, C21 ); Zeros( C11.Height(), C11.Width(), C11_STAR_STAR ); //------------------------------------------------------------// A1_STAR_VR = A1; LocalGemm ( NORMAL, NORMAL, alpha, A1_STAR_VR, B1_VR_STAR, T(0), C11_STAR_STAR ); C11.SumScatterUpdate( T(1), C11_STAR_STAR ); //------------------------------------------------------------// SlideLockedPartitionDown ( AT, A0, A1, /**/ /**/ AB, A2 ); SlidePartitionDown ( C1T, C01, C11, /***/ /***/ C1B, C21 ); } A1_STAR_VR.FreeAlignments(); SlideLockedPartitionRight ( BL, /**/ BR, B0, B1, /**/ B2 ); SlidePartitionRight ( CL, /**/ CR, C0, C1, /**/ C2 ); } } #ifndef RELEASE PopCallStack(); #endif }