コード例 #1
0
ファイル: tty.c プロジェクト: sisoftrg/qico
int tty_get(byte *buf,size_t size,int *timeout)
{
	int rc,i;
	time_t t;
	fd_set rfds,efds;
	struct timeval tv;
	if(tty_hangedup)return RCDO;
	FD_ZERO(&rfds);FD_ZERO(&efds);
	FD_SET(STDIN_FILENO,&rfds);
	FD_SET(STDIN_FILENO,&efds);
	tv.tv_sec=*timeout;
	tv.tv_usec=0;
	t=time(NULL);
	rc=selectmy(STDIN_FILENO+1,&rfds,&efds,&tv);
	if(rc<0) {
		if(tty_hangedup)return RCDO;
		else return ERROR;
	}
	*timeout-=(time(NULL)-t);
	if(!rc)return TIMEOUT;
	if(FD_ISSET(STDIN_FILENO,&efds))return ERROR;
	rc=read(STDIN_FILENO,buf,size);
	if(rc<1) {
		if(tty_hangedup||errno==EPIPE)return RCDO;
		else return(errno!=EAGAIN&&errno!=EINTR)?ERROR:TIMEOUT;
	}
#if DEBUG_SLEEP==1
	if(is_ip)usleep(1000);
#endif
#ifdef NEED_DEBUG
	for(i=0;i<rc;i++)DEBUG(('M',9,"tty_get: '%c' (%d)",C0(buf[i]),buf[i]));
#endif
	return rc;
}
コード例 #2
0
ファイル: math.cpp プロジェクト: MichaelStevens/VirtualBot
bool intersectsRect(Vector2 A, Vector2 B, double x, double y, double width, double height) {
    Vector2 C0(x, y);
    Vector2 D0(x + width, y);

    Vector2 C1(x + width, y);
    Vector2 D1(x + width, y + height);

    Vector2 C2(x + width, y + height);
    Vector2 D2(x, y + height);

    Vector2 C3(x, y + height);
    Vector2 D3(x, y);

    bool I0, I1, I2, I3;

    Vector2 buf;

    I0 = intersects(A, B, C0, D0, buf);
    I1 = intersects(A, B, C1, D1, buf);
    I2 = intersects(A, B, C2, D2, buf);
    I3 = intersects(A, B, C3, D3, buf);
    if(I0 || I1 || I2 || I3) {
        return false;
    }
    return true;
}
コード例 #3
0
ファイル: ofApp.cpp プロジェクト: ChongChongS/ColorMixer
//--------------------------------------------------------------
void ofApp::setup(){  
  ofSetFrameRate(60.0f);

  for(int i=0;i<12;i++)
  {
    randColorDrop();
  }
  Attractor.reset(new ColorDropAttractor());
  Merger.reset(new ColorDropMerger());
  Dragger.reset(new ColorDropDragger());
  
  ofPtr<ColorHole> H;
  ofFloatColor C0(ofFloatColor::yellow);
  C0.setBrightness(0.7f);
  H.reset(new ColorHole(
    ofVec2f(200,200),
    C0,2600));
  Holes.push_back(H);
  ofFloatColor C1(ofFloatColor::cyan);
  C1.setBrightness(0.7f);
  H.reset(new ColorHole(
    ofVec2f(750,240),
    ofFloatColor(0.8f,0.0f,0.8f,1),3000));
  Holes.push_back(H);
  ofFloatColor C2(ofFloatColor::violet);
  C2.setBrightness(0.7f);
  H.reset(new ColorHole(
    ofVec2f(450,500),
    ofFloatColor(0.0f,0.9f,0.9f,1),3800));
  Holes.push_back(H);
}
コード例 #4
0
ファイル: ARIB8CharDecode.cpp プロジェクト: Velmy/EDCB
BOOL CARIB8CharDecode::Analyze( const BYTE* pbSrc, DWORD dwSrcSize, DWORD* pdwReadSize )
{
	if( pbSrc == NULL || dwSrcSize == 0 || pdwReadSize == NULL){
		return FALSE;
	}
	BOOL bRet = TRUE;
	DWORD dwReadSize = 0;

	while( dwReadSize < dwSrcSize ){
		DWORD dwReadBuff = 0;
		//1バイト目チェック
		if( pbSrc[dwReadSize] <= 0x20 ){
			//C0制御コード
			bRet = C0( pbSrc+dwReadSize, &dwReadBuff );
			dwReadSize += dwReadBuff;
			if( bRet == FALSE ){
				return FALSE;
			}else if( bRet == 2 ){
				bRet = TRUE;
				break;
			}
		}else if( pbSrc[dwReadSize] > 0x20 && pbSrc[dwReadSize] < 0x7F ){
			//GL符号領域
			if( GL( pbSrc+dwReadSize, &dwReadBuff ) == FALSE ){
				return FALSE;
			}
			dwReadSize += dwReadBuff;
		}else if( pbSrc[dwReadSize] >= 0x7F && pbSrc[dwReadSize] <= 0xA0 ){
			//C1制御コード
			bRet = C1( pbSrc+dwReadSize, &dwReadBuff );
			dwReadSize += dwReadBuff;
			if( bRet == FALSE ){
				return FALSE;
			}else if( bRet == 2 ){
				bRet = TRUE;
				break;
			}
		}else if( pbSrc[dwReadSize] > 0xA0 && pbSrc[dwReadSize] < 0xFF ){
			//GR符号領域
			if( GR( pbSrc+dwReadSize, &dwReadBuff ) == FALSE ){
				return FALSE;
			}
			dwReadSize += dwReadBuff;
		}
	}

	*pdwReadSize = dwReadSize;
	return bRet;
}
コード例 #5
0
ファイル: CharacterController.cpp プロジェクト: gbaumgart/vt
static bool SweepCapsuleMesh(const SweepTest* sweep_test, const SweptVolume* volume, const TouchedGeom* geom, const NxExtendedVec3& center, const NxVec3& dir, SweptContact& impact)
{
	ASSERT(volume->GetType()==SWEPT_CAPSULE);
	ASSERT(geom->mType==TOUCHED_MESH);
	const SweptCapsule* SC = static_cast<const SweptCapsule*>(volume);
	const TouchedMesh* TM = static_cast<const TouchedMesh*>(geom);

	NxU32 NbTris = TM->mNbTris;
	if(!NbTris)	return false;

	// Fetch triangle data for current mesh (the stream may contain triangles from multiple meshes)
	const NxTriangle* T		= &sweep_test->mWorldTriangles[TM->mIndexWorldTriangles];
//	const NxTriangle* ET	= &sweep_test->mWorldEdgeNormals[TM->mIndexWorldEdgeNormals];
	const NxU32* EdgeFlags	= &sweep_test->mEdgeFlags[TM->mIndexEdgeFlags];

	NxVec3 C0(float(center.x - TM->mOffset.x), float(center.y - TM->mOffset.y), float(center.z - TM->mOffset.z));

	// PT: this only really works when the CCT collides with a single mesh, but that's the most common case. When it doesn't, there's just no speedup but it still works.
	NxU32 CachedIndex = sweep_test->mCachedTriIndex[sweep_test->mCachedTriIndexIndex];
	if(CachedIndex>=NbTris)	CachedIndex=0;

	NxVec3 Hit, Normal;
	float t;
	NxU32 Index;
	if(gUtilLib->NxSweepCapsuleTriangles(sweep_test->mUpDirection, NbTris, T, EdgeFlags,
							C0, SC->mRadius, SC->mHeight,
							dir, impact.mDistance,
							Hit, Normal, t, Index, &CachedIndex))
	{
		if(t>=impact.mDistance)			return false;

		impact.mDistance	= t;
		impact.mWorldNormal	= Normal;
		impact.mWorldPos.x	= Hit.x + TM->mOffset.x;
		impact.mWorldPos.y	= Hit.y + TM->mOffset.y;
		impact.mWorldPos.z	= Hit.z + TM->mOffset.z;

		// Returned index is only between 0 and NbTris, i.e. it indexes the array of cached triangles, not the original mesh.
		assert(Index<NbTris);
		sweep_test->mCachedTriIndex[sweep_test->mCachedTriIndexIndex] = Index;

		// The CCT loop will use the index from the start of the cache...
		impact.mIndex = Index + TM->mIndexWorldTriangles;

		return true;
	}
	return false;
}
コード例 #6
0
ファイル: main.cpp プロジェクト: CCJY/coliru
int main()
{
    C0().f0();
    
    C1().f0();
    C1().f1();
    
    C2().f0();
    C2().f1();
    C2().f2();
    
    C3().f0();
    C3().f1();
    C3().f2();
    C3().f3();
}
コード例 #7
0
ファイル: tty.c プロジェクト: sisoftrg/qico
int tty_put(byte *buf,size_t size)
{
	int rc;
	if(tty_hangedup)return RCDO;
	rc=write(STDOUT_FILENO,buf,size);
	if(rc!=size) {
		if(tty_hangedup||errno==EPIPE)return RCDO;
		    else return ERROR;
	}
#if DEBUG_SLEEP==1
	if(is_ip)usleep(1000);
#endif
#ifdef NEED_DEBUG
	for(rc=0;rc<size;rc++)DEBUG(('M',9,"tty_put: '%c' (%d)",C0(buf[rc]),buf[rc]));
#endif
	return OK;
}
コード例 #8
0
//戻り値がFALSEのとき*pdwReadSizeは不定、TRUEのとき*pdwReadSize<=dwSrcSize (C0 C1ほかメソッドも同様)
BOOL CARIB8CharDecode::Analyze( const BYTE* pbSrc, DWORD dwSrcSize, DWORD* pdwReadSize )
{
	if( dwSrcSize == 0 ){
		return FALSE;
	}
	DWORD dwReadSize = 0;

	while( dwReadSize < dwSrcSize ){
		BOOL bRet = FALSE;
		DWORD dwReadBuff = 0;
		BOOL bHoldCharacter = m_bRPC;

		//1バイト目チェック
		if( pbSrc[dwReadSize] <= 0x20 ){
			//C0制御コード
			bRet = C0( pbSrc+dwReadSize, dwSrcSize-dwReadSize, &dwReadBuff );
		}else if( pbSrc[dwReadSize] < 0x7F ){
			//GL符号領域
			bRet = GL_GR( pbSrc+dwReadSize, dwSrcSize-dwReadSize, &dwReadBuff, m_GL );
		}else if( pbSrc[dwReadSize] <= 0xA0 ){
			//C1制御コード
			bRet = C1( pbSrc+dwReadSize, dwSrcSize-dwReadSize, &dwReadBuff );
		}else if( pbSrc[dwReadSize] < 0xFF ){
			//GR符号領域
			bRet = GL_GR( pbSrc+dwReadSize, dwSrcSize-dwReadSize, &dwReadBuff, m_GR );
		}
		if( !bRet ){
			return FALSE;
		}
		if( m_bSpacing && bHoldCharacter ){
			if( m_wRPC != 0 && --m_wRPC == 0 ){
				//文字繰り返し終了
				m_bRPC = FALSE;
				dwReadSize += dwReadBuff;
			}
		}else{
			dwReadSize += dwReadBuff;
		}
	}

	*pdwReadSize = dwReadSize;
	ASSERT( dwReadSize == dwSrcSize );
	return TRUE;
}
コード例 #9
0
ファイル: modem.c プロジェクト: askovpen/qico
int modem_sendstr(char *cmd)
{
	int rc=1;
	if(!cmd) return 1;
	DEBUG(('M',1,">> %s",cmd));
	while(*cmd && rc>0) {
		switch(*cmd) {
		case '|': rc=write(STDOUT_FILENO, "\r", 1);usleep(300000L);break;
		case '~': sleep(1);rc=1;break;
		case '\'': usleep(200000L);rc=1;break;
		case '^': rc=tty_setdtr(1); break;
		case 'v': rc=tty_setdtr(0); break;
		default: rc=write(STDOUT_FILENO, cmd, 1);
		DEBUG(('M',4,">>> %c",C0(*cmd)));
		}
		cmd++;
	}
	if(rc>0) DEBUG(('M',4,"modem_sendstr: sent"));
	    else DEBUG(('M',3,"modem_sendstr: error, rc=%d, errno=%d",rc,errno));
	return rc;
}
コード例 #10
0
ファイル: gtc_integer.cpp プロジェクト: hrehfeld/glm_working
	int test()
	{
		int Error = 0;

		int A0(glm::log2(10.f));
		glm::ivec1 B0(glm::log2(glm::vec1(10.f)));
		glm::ivec2 C0(glm::log2(glm::vec2(10.f)));
		glm::ivec3 D0(glm::log2(glm::vec3(10.f)));
		glm::ivec4 E0(glm::log2(glm::vec4(10.f)));

		int A1 = glm::log2(int(10.f));
		glm::ivec1 B1 = glm::log2(glm::ivec1(10.f));
		glm::ivec2 C1 = glm::log2(glm::ivec2(10.f));
		glm::ivec3 D1 = glm::log2(glm::ivec3(10.f));
		glm::ivec4 E1 = glm::log2(glm::ivec4(10.f));

		Error += A0 == A1 ? 0 : 1;
		Error += glm::all(glm::equal(B0, B1)) ? 0 : 1;
		Error += glm::all(glm::equal(C0, C1)) ? 0 : 1;
		Error += glm::all(glm::equal(D0, D1)) ? 0 : 1;
		Error += glm::all(glm::equal(E0, E1)) ? 0 : 1;

		return Error;
	}
コード例 #11
0
	int test()
	{
		int Error = 0;

		int A0 = static_cast<int>(glm::log2(16.f));
		glm::ivec1 B0(glm::log2(glm::vec1(16.f)));
		glm::ivec2 C0(glm::log2(glm::vec2(16.f)));
		glm::ivec3 D0(glm::log2(glm::vec3(16.f)));
		glm::ivec4 E0(glm::log2(glm::vec4(16.f)));

		int A1 = glm::log2(int(16));
		glm::ivec1 B1 = glm::log2(glm::ivec1(16));
		glm::ivec2 C1 = glm::log2(glm::ivec2(16));
		glm::ivec3 D1 = glm::log2(glm::ivec3(16));
		glm::ivec4 E1 = glm::log2(glm::ivec4(16));

		Error += A0 == A1 ? 0 : 1;
		Error += glm::all(glm::equal(B0, B1)) ? 0 : 1;
		Error += glm::all(glm::equal(C0, C1)) ? 0 : 1;
		Error += glm::all(glm::equal(D0, D1)) ? 0 : 1;
		Error += glm::all(glm::equal(E0, E1)) ? 0 : 1;

		glm::uint64 A2 = glm::log2(glm::uint64(16));
		glm::u64vec1 B2 = glm::log2(glm::u64vec1(16));
		glm::u64vec2 C2 = glm::log2(glm::u64vec2(16));
		glm::u64vec3 D2 = glm::log2(glm::u64vec3(16));
		glm::u64vec4 E2 = glm::log2(glm::u64vec4(16));

		Error += A2 == glm::uint64(4) ? 0 : 1;
		Error += glm::all(glm::equal(B2, glm::u64vec1(4))) ? 0 : 1;
		Error += glm::all(glm::equal(C2, glm::u64vec2(4))) ? 0 : 1;
		Error += glm::all(glm::equal(D2, glm::u64vec3(4))) ? 0 : 1;
		Error += glm::all(glm::equal(E2, glm::u64vec4(4))) ? 0 : 1;

		return Error;
	}
コード例 #12
0
ファイル: LoopToolsWrapper.cpp プロジェクト: shehu0/HEPfit
complex LoopToolsWrapper::PV_C0(const double p2,
                                const double m02, const double m12, const double m22) const
{
    std::complex<double> C0val = C0(0.0, 0.0, p2, m02, m12, m22);
    return complex( C0val.real(), C0val.imag(), false );
}
コード例 #13
0
ファイル: TT.hpp プロジェクト: certik/Elemental
inline void
GemmTTA
( Orientation orientationOfA, 
  Orientation orientationOfB,
  T alpha, const DistMatrix<T>& A,
           const DistMatrix<T>& B,
  T beta,        DistMatrix<T>& C )
{
#ifndef RELEASE
    PushCallStack("internal::GemmTTA");
    if( A.Grid() != B.Grid() || B.Grid() != C.Grid() )
        throw std::logic_error
        ("{A,B,C} must be distributed over the same grid");
    if( orientationOfA == NORMAL || orientationOfB == NORMAL )
        throw std::logic_error
        ("GemmTTA expects A and B to be (Conjugate)Transposed");
    if( A.Width()  != C.Height() ||
        B.Height() != C.Width()  ||
        A.Height() != B.Width()    )
    {
        std::ostringstream msg;
        msg << "Nonconformal GemmTTA: \n"
            << "  A ~ " << A.Height() << " x " << A.Width() << "\n"
            << "  B ~ " << B.Height() << " x " << B.Width() << "\n"
            << "  C ~ " << C.Height() << " x " << C.Width() << "\n";
        throw std::logic_error( msg.str().c_str() );
    }
#endif
    const Grid& g = A.Grid();

    // Matrix views
    DistMatrix<T> BT(g),  B0(g),
                  BB(g),  B1(g),
                          B2(g);
    DistMatrix<T> CL(g), CR(g),
                  C0(g), C1(g), C2(g);

    // Temporary distributions
    DistMatrix<T,STAR,MC  > B1_STAR_MC(g);
    DistMatrix<T,MR,  STAR> D1_MR_STAR(g);
    DistMatrix<T,MR,  MC  > D1_MR_MC(g);
    DistMatrix<T> D1(g);

    B1_STAR_MC.AlignWith( A ); 
    D1_MR_STAR.AlignWith( A );  

    // Start the algorithm
    Scale( beta, C );
    LockedPartitionDown
    ( B, BT,
         BB, 0 );
    PartitionRight( C, CL, CR, 0 );
    while( BB.Height() > 0 )
    {
        LockedRepartitionDown
        ( BT,  B0,
         /**/ /**/
               B1,
          BB,  B2 );

        RepartitionRight
        ( CL, /**/     CR,
          C0, /**/ C1, C2 );

        D1.AlignWith( C1 );  
        Zeros( C1.Height(), C1.Width(), D1_MR_STAR );
        //--------------------------------------------------------------------//
        B1_STAR_MC = B1; // B1[*,MC] <- B1[MC,MR]

        // D1[MR,*] := alpha (A[MC,MR])^T (B1[*,MC])^T
        //           = alpha (A^T)[MR,MC] (B1^T)[MC,*]
        LocalGemm
        ( orientationOfA, orientationOfB, 
          alpha, A, B1_STAR_MC, T(0), D1_MR_STAR );

        // C1[MC,MR] += scattered & transposed D1[MR,*] summed over grid cols
        D1_MR_MC.SumScatterFrom( D1_MR_STAR );
        D1 = D1_MR_MC; 
        Axpy( T(1), D1, C1 );
        //--------------------------------------------------------------------//
        D1.FreeAlignments();
        
        SlideLockedPartitionDown
        ( BT,  B0,
               B1,
         /**/ /**/
          BB,  B2 );

        SlidePartitionRight
        ( CL,     /**/ CR,
          C0, C1, /**/ C2 ); 
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
コード例 #14
0
ファイル: TT.hpp プロジェクト: certik/Elemental
inline void
GemmTTB
( Orientation orientationOfA, 
  Orientation orientationOfB,
  T alpha, const DistMatrix<T>& A,
           const DistMatrix<T>& B,
  T beta,        DistMatrix<T>& C )
{
#ifndef RELEASE
    PushCallStack("internal::GemmTTB");
    if( A.Grid() != B.Grid() || B.Grid() != C.Grid() )
        throw std::logic_error
        ("{A,B,C} must be distributed over the same grid");
    if( orientationOfA == NORMAL || orientationOfB == NORMAL )
        throw std::logic_error
        ("GemmTTB expects A and B to be (Conjugate)Transposed");
    if( A.Width()  != C.Height() ||
        B.Height() != C.Width()  ||
        A.Height() != B.Width()    )
    {
        std::ostringstream msg;
        msg << "Nonconformal GemmTTB: \n"
            << "  A ~ " << A.Height() << " x " << A.Width() << "\n"
            << "  B ~ " << B.Height() << " x " << B.Width() << "\n"
            << "  C ~ " << C.Height() << " x " << C.Width() << "\n";
        throw std::logic_error( msg.str().c_str() );
    }
#endif
    const Grid& g = A.Grid();

    // Matrix views
    DistMatrix<T> AL(g), AR(g),
                  A0(g), A1(g), A2(g);
    DistMatrix<T> CT(g),  C0(g),
                  CB(g),  C1(g),
                          C2(g);

    // Temporary distributions
    DistMatrix<T,VR,  STAR> A1_VR_STAR(g);
    DistMatrix<T,STAR,MR  > A1AdjOrTrans_STAR_MR(g);
    DistMatrix<T,STAR,MC  > D1_STAR_MC(g);
    DistMatrix<T,MR,  MC  > D1_MR_MC(g);
    DistMatrix<T> D1(g);

    A1_VR_STAR.AlignWith( B );
    A1AdjOrTrans_STAR_MR.AlignWith( B );
    D1_STAR_MC.AlignWith( B );

    // Start the algorithm 
    Scale( beta, C );
    LockedPartitionRight( A, AL, AR, 0 );
    PartitionDown
    ( C, CT,
         CB, 0 );
    while( AR.Width() > 0 )
    {
        LockedRepartitionRight
        ( AL, /**/     AR,
          A0, /**/ A1, A2 );
 
        RepartitionDown
        ( CT,  C0,
         /**/ /**/
               C1,
          CB,  C2 );

        D1.AlignWith( C1 );
        Zeros( C1.Height(), C1.Width(), D1_STAR_MC );
        //--------------------------------------------------------------------//
        A1_VR_STAR = A1;
        if( orientationOfA == ADJOINT )
            A1AdjOrTrans_STAR_MR.AdjointFrom( A1_VR_STAR );
        else
            A1AdjOrTrans_STAR_MR.TransposeFrom( A1_VR_STAR );
 
        // D1[*,MC] := alpha (A1[MR,*])^[T/H] (B[MC,MR])^[T/H]
        //           = alpha (A1^[T/H])[*,MR] (B^[T/H])[MR,MC]
        LocalGemm
        ( NORMAL, orientationOfB, 
          alpha, A1AdjOrTrans_STAR_MR, B, T(0), D1_STAR_MC );

        // C1[MC,MR] += scattered & transposed D1[*,MC] summed over grid rows
        D1_MR_MC.SumScatterFrom( D1_STAR_MC );
        D1 = D1_MR_MC; 
        Axpy( T(1), D1, C1 );
        //--------------------------------------------------------------------//
        D1.FreeAlignments();

        SlideLockedPartitionRight
        ( AL,     /**/ AR,
          A0, A1, /**/ A2 );

        SlidePartitionDown
        ( CT,  C0,
               C1,
         /**/ /**/
          CB,  C2 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
コード例 #15
0
ファイル: cxx_main.cpp プロジェクト: gitter-badger/quinoa
int main(int argc, char *argv[]) {

  int i, returnierr=0;

#ifdef EPETRA_MPI
  // Initialize MPI
  MPI_Init(&argc,&argv);
  Epetra_MpiComm Comm(MPI_COMM_WORLD);
#else
  Epetra_SerialComm Comm;
#endif

  // Uncomment to debug in parallel int tmp; if (Comm.MyPID()==0) cin >> tmp; Comm.Barrier();

  bool verbose = false;
  bool veryVerbose = false;

  // Check if we should print results to standard out
  if (argc>1) if (argv[1][0]=='-' && argv[1][1]=='v') verbose = true;

  // Check if we should print lots of results to standard out
  if (argc>2) if (argv[2][0]=='-' && argv[2][1]=='v') veryVerbose = true;

  if (verbose && Comm.MyPID()==0)
    std::cout << Epetra_Version() << std::endl << std::endl;

  if (!verbose) Comm.SetTracebackMode(0); // This should shut down any error traceback reporting

  if (verbose) std::cout << Comm << std::endl << std::flush;

  bool verbose1 = verbose;
  if (verbose) verbose = (Comm.MyPID()==0);

  bool veryVerbose1 = veryVerbose;
  if (veryVerbose) veryVerbose = (Comm.MyPID()==0);

  int NumMyElements = 100;
  if (veryVerbose1) NumMyElements = 10;
  NumMyElements += Comm.MyPID();
  int MaxNumMyElements = NumMyElements+Comm.NumProc()-1;
  int * ElementSizeList = new int[NumMyElements];
  long long * MyGlobalElements = new long long[NumMyElements];

  for (i = 0; i<NumMyElements; i++) {
    MyGlobalElements[i] = (Comm.MyPID()*MaxNumMyElements+i)*2;
    ElementSizeList[i] = i%6 + 2; // elementsizes go from 2 to 7
  }

  Epetra_BlockMap Map(-1LL, NumMyElements, MyGlobalElements, ElementSizeList,
		      0, Comm);

  delete [] ElementSizeList;
  delete [] MyGlobalElements;

  Epetra_MapColoring C0(Map);

  int * elementColors = new int[NumMyElements];

  int maxcolor = 24;
  int * colorCount = new int[maxcolor];
  int ** colorLIDs = new int*[maxcolor];
  for (i=0; i<maxcolor; i++) colorCount[i] = 0;
  for (i=0; i<maxcolor; i++) colorLIDs[i] = 0;

  int defaultColor = C0.DefaultColor();
  for (i=0; i<Map.NumMyElements(); i++) {
    assert(C0[i]==defaultColor);
    assert(C0(Map.GID64(i))==defaultColor);
    if (i%2==0) C0[i] = i%6+5+i%14; // cycle through 5...23 on even elements
    else C0(Map.GID64(i)) = i%5+1; // cycle through 1...5 on odd elements
    elementColors[i] = C0[i]; // Record color of ith element for use below
    colorCount[C0[i]]++; // Count how many of each color for checking below
  }
  
  if (veryVerbose)
    std::cout << "Original Map Coloring using element-by-element definitions" << std::endl;
  if (veryVerbose1)
    std::cout <<  C0 << std::endl;

  int numColors = 0;
  for (i=0; i<maxcolor; i++) 
    if (colorCount[i]>0) {
      numColors++;
      colorLIDs[i] = new int[colorCount[i]];
    }
  for (i=0; i<maxcolor; i++) colorCount[i] = 0;
  for (i=0; i<Map.NumMyElements(); i++) colorLIDs[C0[i]][colorCount[C0[i]]++] = i;

  

  int newDefaultColor = -1;
  Epetra_MapColoring C1(Map, elementColors, newDefaultColor);
  if (veryVerbose)
    std::cout << "Same Map Coloring using one-time construction" << std::endl;
  if (veryVerbose1)
    std::cout <<  C1 << std::endl;
  assert(C1.DefaultColor()==newDefaultColor);
  for (i=0; i<Map.NumMyElements(); i++) assert(C1[i]==C0[i]);

  Epetra_MapColoring C2(C1);
  if (veryVerbose)
    std::cout << "Same Map Coloring using copy constructor" << std::endl;
  if (veryVerbose1)
    std::cout <<  C1 << std::endl;
  for (i=0; i<Map.NumMyElements(); i++) assert(C2[i]==C0[i]);
  assert(C2.DefaultColor()==newDefaultColor);

  assert(numColors==C2.NumColors());

  for (i=0; i<maxcolor; i++) {
    int curNumElementsWithColor = C2.NumElementsWithColor(i);
    assert(colorCount[i]==curNumElementsWithColor);
    int * curColorLIDList = C2.ColorLIDList(i);
    if (curNumElementsWithColor==0) {
      assert(curColorLIDList==0);
    }
    else
      for (int j=0; j<curNumElementsWithColor; j++) assert(curColorLIDList[j]==colorLIDs[i][j]);
  }
  int curColor = 1;
  Epetra_Map * Map1 = C2.GenerateMap(curColor);
  Epetra_BlockMap * Map2 = C2.GenerateBlockMap(curColor);

  assert(Map1->NumMyElements()==colorCount[curColor]);
  assert(Map2->NumMyElements()==colorCount[curColor]);

  for (i=0; i<Map1->NumMyElements(); i++) {
    assert(Map1->GID64(i)==Map.GID64(colorLIDs[curColor][i]));
    assert(Map2->GID64(i)==Map.GID64(colorLIDs[curColor][i]));
    assert(Map2->ElementSize(i)==Map.ElementSize(colorLIDs[curColor][i]));
  }

  // Now test data redistribution capabilities


  Epetra_Map ContiguousMap(-1LL, Map.NumMyElements(), Map.IndexBase64(), Comm);
  // This vector contains the element sizes for the original map.
  Epetra_IntVector elementSizes(Copy, ContiguousMap, Map.ElementSizeList());
  Epetra_LongLongVector elementIDs(Copy, ContiguousMap, Map.MyGlobalElements64());
  Epetra_IntVector elementColorValues(Copy, ContiguousMap, C2.ElementColors());


  long long NumMyElements0 = 0;
  if (Comm.MyPID()==0) NumMyElements0 = Map.NumGlobalElements64();
  Epetra_Map CMap0(-1LL, NumMyElements0, Map.IndexBase64(), Comm);
  Epetra_Import importer(CMap0, ContiguousMap);
  Epetra_IntVector elementSizes0(CMap0);
  Epetra_LongLongVector elementIDs0(CMap0);
  Epetra_IntVector elementColorValues0(CMap0);
  elementSizes0.Import(elementSizes, importer, Insert);
  elementIDs0.Import(elementIDs, importer, Insert);
  elementColorValues0.Import(elementColorValues, importer, Insert);

  Epetra_BlockMap MapOnPE0(-1LL,NumMyElements0, elementIDs0.Values(), 
			   elementSizes0.Values(), Map.IndexBase64(), Comm);

  Epetra_Import importer1(MapOnPE0, Map);
  Epetra_MapColoring ColoringOnPE0(MapOnPE0);
  ColoringOnPE0.Import(C2, importer1, Insert);

  for (i=0; i<MapOnPE0.NumMyElements(); i++)
    assert(ColoringOnPE0[i]==elementColorValues0[i]);

  if (veryVerbose)
    std::cout << "Same Map Coloring on PE 0 only" << std::endl;
  if (veryVerbose1)
    std::cout <<  ColoringOnPE0 << std::endl;
  Epetra_MapColoring C3(Map);
  C3.Export(ColoringOnPE0, importer1, Insert);
  for (i=0; i<Map.NumMyElements(); i++) assert(C3[i]==C2[i]);
  if (veryVerbose)
    std::cout << "Same Map Coloring after Import/Export exercise" << std::endl;
  if (veryVerbose1)
    std::cout <<  ColoringOnPE0 << std::endl;
   
  
  if (verbose) std::cout << "Checked OK\n\n" << std::endl;

  if (verbose1) {
    if (verbose) std::cout << "Test ostream << operator" << std::endl << std::flush;
    std::cout << C0 << std::endl;
  }
	

  delete [] elementColors;
  for (i=0; i<maxcolor; i++) if (colorLIDs[i]!=0) delete [] colorLIDs[i];
  delete [] colorLIDs;
  delete [] colorCount;

  delete Map1;
  delete Map2;


#ifdef EPETRA_MPI
  MPI_Finalize();
#endif

  return returnierr;
}
コード例 #16
0
ファイル: NNTN.hpp プロジェクト: jimgoo/Elemental
inline void
Trr2kNNTN
( UpperOrLower uplo,
  Orientation orientationOfC,
  T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B,
           const DistMatrix<T>& C, const DistMatrix<T>& D,
  T beta,        DistMatrix<T>& E )
{
#ifndef RELEASE
    PushCallStack("internal::Trr2kNNTN");
    if( E.Height() != E.Width()  || A.Width()  != C.Height() ||
        A.Height() != E.Height() || C.Width()  != E.Height() ||
        B.Width()  != E.Width()  || D.Width()  != E.Width()  ||
        A.Width()  != B.Height() || C.Height() != D.Height() )
        throw std::logic_error("Nonconformal Trr2kNNTN");
#endif
    const Grid& g = E.Grid();

    DistMatrix<T> AL(g), AR(g),
                  A0(g), A1(g), A2(g);
    DistMatrix<T> BT(g),  B0(g),
                  BB(g),  B1(g),
                          B2(g);

    DistMatrix<T> CT(g),  C0(g),
                  CB(g),  C1(g),
                          C2(g);
    DistMatrix<T> DT(g),  D0(g),
                  DB(g),  D1(g),
                          D2(g);

    DistMatrix<T,MC,  STAR> A1_MC_STAR(g);
    DistMatrix<T,MR,  STAR> B1Trans_MR_STAR(g);
    DistMatrix<T,STAR,MC  > C1_STAR_MC(g);
    DistMatrix<T,MR,  STAR> D1Trans_MR_STAR(g);

    A1_MC_STAR.AlignWith( E );
    B1Trans_MR_STAR.AlignWith( E );
    C1_STAR_MC.AlignWith( E );
    D1Trans_MR_STAR.AlignWith( E );

    LockedPartitionRight( A, AL, AR, 0 );
    LockedPartitionDown
    ( B, BT,
         BB, 0 );
    LockedPartitionDown
    ( C, CT,
         CB, 0 );
    LockedPartitionDown
    ( D, DT,
         DB, 0 );
    while( AL.Width() < A.Width() )
    {
        LockedRepartitionRight
        ( AL, /**/ AR,
          A0, /**/ A1, A2 );
        LockedRepartitionDown
        ( BT,  B0,
         /**/ /**/
               B1,
          BB,  B2 );
        LockedRepartitionDown
        ( CT,  C0,
         /**/ /**/
               C1,
          CB,  C2 );
        LockedRepartitionDown
        ( DT,  D0,
         /**/ /**/
               D1,
          DB,  D2 );

        //--------------------------------------------------------------------//
        A1_MC_STAR = A1;
        C1_STAR_MC = C1;
        B1Trans_MR_STAR.TransposeFrom( B1 );
        D1Trans_MR_STAR.TransposeFrom( D1 );
        LocalTrr2k
        ( uplo, TRANSPOSE, orientationOfC, TRANSPOSE,
          alpha, A1_MC_STAR, B1Trans_MR_STAR, 
                 C1_STAR_MC, D1Trans_MR_STAR,
          beta,  E );
        //--------------------------------------------------------------------//

        SlideLockedPartitionDown
        ( DT,  D0,
               D1,
         /**/ /**/
          DB,  D2 );
        SlideLockedPartitionDown
        ( CT,  C0,
               C1,
         /**/ /**/
          CB,  C2 );
        SlideLockedPartitionDown
        ( BT,  B0,
               B1,
         /**/ /**/
          BB,  B2 );
        SlideLockedPartitionRight
        ( AL,     /**/ AR,
          A0, A1, /**/ A2 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
コード例 #17
0
ファイル: LL.hpp プロジェクト: jimgoo/Elemental
inline void
SymmLLA
( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B,
  T beta,        DistMatrix<T>& C )
{
#ifndef RELEASE
    PushCallStack("internal::SymmLLA");
    if( A.Grid() != B.Grid() || B.Grid() != C.Grid() )
        throw std::logic_error
        ("{A,B,C} must be distributed over the same grid");
#endif
    const Grid& g = A.Grid();

    DistMatrix<T> 
        BL(g), BR(g),
        B0(g), B1(g), B2(g);

    DistMatrix<T>
        CL(g), CR(g),
        C0(g), C1(g), C2(g);

    DistMatrix<T,MC,STAR> B1_MC_STAR(g);
    DistMatrix<T,VR,STAR> B1_VR_STAR(g);
    DistMatrix<T,STAR,MR> B1Trans_STAR_MR(g);
    DistMatrix<T> Z1(g);
    DistMatrix<T,MC,STAR> Z1_MC_STAR(g);
    DistMatrix<T,MR,STAR> Z1_MR_STAR(g);
    DistMatrix<T,MR,MC  > Z1_MR_MC(g);

    B1_MC_STAR.AlignWith( A );
    B1_VR_STAR.AlignWith( A );
    B1Trans_STAR_MR.AlignWith( A );
    Z1_MC_STAR.AlignWith( A );
    Z1_MR_STAR.AlignWith( A );

    Scale( beta, C );
    LockedPartitionRight
    ( B, BL, BR, 0 );
    PartitionRight
    ( C, CL, CR, 0 );
    while( CL.Width() < C.Width() )
    {
        LockedRepartitionRight 
        ( BL, /**/ BR,
          B0, /**/ B1, B2 );

        RepartitionRight
        ( CL, /**/ CR,
          C0, /**/ C1, C2 );

        Z1.AlignWith( C1 );
        Zeros( C1.Height(), C1.Width(), Z1_MC_STAR );
        Zeros( C1.Height(), C1.Width(), Z1_MR_STAR );
        //--------------------------------------------------------------------//
        B1_MC_STAR = B1;
        B1_VR_STAR = B1_MC_STAR;
        B1Trans_STAR_MR.TransposeFrom( B1_VR_STAR );
        LocalSymmetricAccumulateLL
        ( TRANSPOSE, 
          alpha, A, B1_MC_STAR, B1Trans_STAR_MR, Z1_MC_STAR, Z1_MR_STAR );

        Z1_MR_MC.SumScatterFrom( Z1_MR_STAR );
        Z1 = Z1_MR_MC;
        Z1.SumScatterUpdate( T(1), Z1_MC_STAR );
        Axpy( T(1), Z1, C1 );
        //--------------------------------------------------------------------//
        Z1.FreeAlignments();

        SlideLockedPartitionRight
        ( BL,     /**/ BR,
          B0, B1, /**/ B2 );

        SlidePartitionRight
        ( CL,     /**/ CR,
          C0, C1, /**/ C2 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
コード例 #18
0
ファイル: NNNT.hpp プロジェクト: mcg1969/Elemental
void Trr2kNNNT
( UpperOrLower uplo,
  Orientation orientationOfD,
  T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B,
           const DistMatrix<T>& C, const DistMatrix<T>& D,
  T beta,        DistMatrix<T>& E )
{
#ifndef RELEASE
    PushCallStack("internal::Trr2kNNNT");
    if( E.Height() != E.Width()  || A.Width()  != C.Width()  ||
        A.Height() != E.Height() || C.Height() != E.Height() ||
        B.Width()  != E.Width()  || D.Height() != E.Width()  ||
        A.Width()  != B.Height() || C.Width()  != D.Width() )
        throw std::logic_error("Nonconformal Trr2kNNNT");
#endif
    const Grid& g = E.Grid();

    DistMatrix<T> AL(g), AR(g),
                  A0(g), A1(g), A2(g);
    DistMatrix<T> BT(g),  B0(g),
                  BB(g),  B1(g),
                          B2(g);

    DistMatrix<T> CL(g), CR(g),
                  C0(g), C1(g), C2(g);
    DistMatrix<T> DL(g), DR(g),
                  D0(g), D1(g), D2(g);

    DistMatrix<T,MC,  STAR> A1_MC_STAR(g);
    DistMatrix<T,MR,  STAR> B1Trans_MR_STAR(g);
    DistMatrix<T,MC,  STAR> C1_MC_STAR(g);
    DistMatrix<T,VR,  STAR> D1_VR_STAR(g);
    DistMatrix<T,STAR,MR  > D1AdjOrTrans_STAR_MR(g);

    A1_MC_STAR.AlignWith( E );
    B1Trans_MR_STAR.AlignWith( E );
    C1_MC_STAR.AlignWith( E );
    D1_VR_STAR.AlignWith( E );
    D1AdjOrTrans_STAR_MR.AlignWith( E );

    LockedPartitionRight( A, AL, AR, 0 );
    LockedPartitionDown
    ( B, BT,
         BB, 0 );
    LockedPartitionRight( C, CL, CR, 0 );
    LockedPartitionRight( D, DL, DR, 0 );
    while( AL.Width() < A.Width() )
    {
        LockedRepartitionRight
        ( AL, /**/ AR,
          A0, /**/ A1, A2 );
        LockedRepartitionDown
        ( BT,  B0,
         /**/ /**/
               B1,
          BB,  B2 );
        LockedRepartitionRight
        ( CL, /**/ CR,
          C0, /**/ C1, C2 );
        LockedRepartitionRight
        ( CL, /**/ CR,
          C0, /**/ C1, C2 );

        //--------------------------------------------------------------------//
        A1_MC_STAR = A1;
        C1_MC_STAR = C1;
        B1Trans_MR_STAR.TransposeFrom( B1 );
        D1_VR_STAR = D1;
        if( orientationOfD == ADJOINT )
            D1AdjOrTrans_STAR_MR.AdjointFrom( D1_VR_STAR );
        else
            D1AdjOrTrans_STAR_MR.TransposeFrom( D1_VR_STAR );
        LocalTrr2k
        ( uplo, TRANSPOSE, 
          alpha, A1_MC_STAR, B1Trans_MR_STAR, 
                 C1_MC_STAR, D1AdjOrTrans_STAR_MR,
          beta,  E );
        //--------------------------------------------------------------------//

        SlideLockedPartitionRight
        ( DL,     /**/ DR,
          D0, D1, /**/ D2 );
        SlideLockedPartitionRight
        ( CL,     /**/ CR,
          C0, C1, /**/ C2 );
        SlideLockedPartitionDown
        ( BT,  B0,
               B1,
         /**/ /**/
          BB,  B2 );
        SlideLockedPartitionRight
        ( AL,     /**/ AR,
          A0, A1, /**/ A2 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
コード例 #19
0
ファイル: GemmTN.hpp プロジェクト: ahmadia/elemental
inline void
internal::GemmTNA
( Orientation orientationOfA,
  T alpha, const DistMatrix<T,MC,MR>& A,
           const DistMatrix<T,MC,MR>& B,
  T beta,        DistMatrix<T,MC,MR>& C )
{
#ifndef RELEASE
    PushCallStack("internal::GemmTNA");    
    if( A.Grid() != B.Grid() || B.Grid() != C.Grid() )
        throw std::logic_error
        ("{A,B,C} must be distributed over the same grid");
    if( orientationOfA == NORMAL )
        throw std::logic_error("GemmTNA assumes A is (Conjugate)Transposed");
    if( A.Width()  != C.Height() ||
        B.Width()  != C.Width()  ||
        A.Height() != B.Height()   )
    {
        std::ostringstream msg;
        msg << "Nonconformal GemmTNA: \n"
            << "  A ~ " << A.Height() << " x " << A.Width() << "\n"
            << "  B ~ " << B.Height() << " x " << B.Width() << "\n"
            << "  C ~ " << C.Height() << " x " << C.Width() << "\n";
        throw std::logic_error( msg.str().c_str() );
    }
#endif
    const Grid& g = A.Grid();

    // Matrix views
    DistMatrix<T,MC,MR> BL(g), BR(g),
                        B0(g), B1(g), B2(g);

    DistMatrix<T,MC,MR> CL(g), CR(g),
                        C0(g), C1(g), C2(g);

    // Temporary distributions
    DistMatrix<T,MC,STAR> B1_MC_STAR(g);
    DistMatrix<T,MR,STAR> D1_MR_STAR(g);
    DistMatrix<T,MR,MC  > D1_MR_MC(g);
    DistMatrix<T,MC,MR  > D1(g);

    // Start the algorithm
    Scal( beta, C );
    LockedPartitionRight( B, BL, BR, 0 );
    PartitionRight( C, CL, CR, 0 );
    while( BR.Width() > 0 )
    {
        LockedRepartitionRight
        ( BL, /**/     BR,
          B0, /**/ B1, B2 );
 
        RepartitionRight
        ( CL, /**/     CR,
          C0, /**/ C1, C2 );

        B1_MC_STAR.AlignWith( A );
        D1_MR_STAR.AlignWith( A );
        D1_MR_STAR.ResizeTo( C1.Height(), C1.Width() );
        D1.AlignWith( C1 );
        //--------------------------------------------------------------------//
        B1_MC_STAR = B1; // B1[MC,*] <- B1[MC,MR]

        // D1[MR,*] := alpha (A1[MC,MR])^T B1[MC,*]
        //           = alpha (A1^T)[MR,MC] B1[MC,*]
        internal::LocalGemm
        ( orientationOfA, NORMAL, alpha, A, B1_MC_STAR, (T)0, D1_MR_STAR );

        // C1[MC,MR] += scattered & transposed D1[MR,*] summed over grid cols
        D1_MR_MC.SumScatterFrom( D1_MR_STAR );
        D1 = D1_MR_MC; 
        Axpy( (T)1, D1, C1 );
        //--------------------------------------------------------------------//
        B1_MC_STAR.FreeAlignments();
        D1_MR_STAR.FreeAlignments();
        D1.FreeAlignments();

        SlideLockedPartitionRight
        ( BL,     /**/ BR,
          B0, B1, /**/ B2 );

        SlidePartitionRight
        ( CL,     /**/ CR,
          C0, C1, /**/ C2 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
コード例 #20
0
Func ColorMgather(Func stBasis, float angle, uint8_t * orders, Expr filterthreshold, Expr divisionthreshold, Expr divisionthreshold2) {
    uint8_t x_order = orders[0];
    uint8_t y_order = orders[1];
    uint8_t t_order = orders[2];
    uint8_t c_order = orders[3];

    Func X("X"),Y("Y"),T("T"),Xrg("Xrg"),Yrg("Yrg"),Trg("Trg");
    uint8_t max_order = x_order;
    // std::vector<Expr>Xk_expr (max_order,cast<float>(0.0f));
    // std::vector<Expr>Yk_expr (max_order,cast<float>(0.0f));
    // std::vector<Expr>Tk_expr (max_order,cast<float>(0.0f));

    uint8_t Xk_uI[max_order];
    uint8_t Yk_uI[max_order];
    uint8_t Tk_uI[max_order];
    Func Xk[max_order]; Func Yk[max_order]; Func Tk[max_order];

    // Expr Xk[max_order],Yk[max_order],Tk[max_order];

    for (int iO=0; iO < x_order; iO++) {
        Xk[iO](x,y,t) = Expr(0.0f);
        Yk[iO](x,y,t) = Expr(0.0f);
        Tk[iO](x,y,t) = Expr(0.0f);
        Xk_uI[iO] = 0;
        Yk_uI[iO] = 0;
        Tk_uI[iO] = 0;
    }

    int k = 0;
    for (int iXo = 0; iXo < x_order; iXo++) // x_order
        for (int iYo = 0; iYo < y_order; iYo++) // y_oder
            for (int iTo = 0; iTo < t_order; iTo++) // t_order
                for (int iCo = 0; iCo < c_order; iCo ++ ) // c_order: index of color channel
                {
                    if ((iYo+iTo+iCo == 0 || iYo+iTo+iCo == 1)
                        && ((iXo+iYo+iTo+iCo+1) < (x_order + 1))) {
                        X = ColorMgetfilter(stBasis, angle, iXo+1, iYo, iTo, iCo);
                        Y = ColorMgetfilter(stBasis, angle, iXo, iYo+1, iTo, iCo);
                        T = ColorMgetfilter(stBasis, angle, iXo, iYo, iTo+1, iCo);

                        Xrg = ColorMgetfilter(stBasis, angle, iXo+1, iYo, iTo, iCo+1);
                        Yrg = ColorMgetfilter(stBasis, angle, iXo, iYo+1, iTo, iCo+1);
                        Trg = ColorMgetfilter(stBasis, angle, iXo, iYo, iTo+1, iCo+1);

                        k = iXo + iYo + iTo + iCo;
                        Xk[k](x,y,t) += X(x,y,t) + Xrg(x,y,t);
                        Yk[k](x,y,t) += Y(x,y,t) + Yrg(x,y,t);
                        Tk[k](x,y,t) += T(x,y,t) + Trg(x,y,t);

                        Xk[k].update(Xk_uI[k]); Xk_uI[k]++;
                        Yk[k].update(Yk_uI[k]); Yk_uI[k]++;
                        Tk[k].update(Tk_uI[k]); Tk_uI[k]++;
                    }
                }

    // Scheduling
    for (int iO = 0; iO <= k; iO++) {
        Xk[iO].compute_root();
        Yk[iO].compute_root();
        Tk[iO].compute_root();
    }

    std::vector<Expr> st_expr(6,cast<float>(0.0f));
    for (int iK=0; iK <= k; iK++) {
        st_expr[0] += Xk[iK](x,y,t)*Tk[iK](x,y,t);
        st_expr[1] += Tk[iK](x,y,t)*Tk[iK](x,y,t);
        st_expr[2] += Xk[iK](x,y,t)*Xk[iK](x,y,t);
        st_expr[3] += Yk[iK](x,y,t)*Tk[iK](x,y,t);
        st_expr[4] += Yk[iK](x,y,t)*Yk[iK](x,y,t);
        st_expr[5] += Xk[iK](x,y,t)*Yk[iK](x,y,t);
    }

    Func st("st"); st(x,y,t) = Tuple(st_expr);
    st.compute_root();

    Expr x_clamped = clamp(x,0,width-1);
    Expr y_clamped = clamp(y,0,height-1);
    Func st_clamped("st_clamped"); st_clamped(x,y,t) = st(x_clamped,y_clamped,t);

    // float win = 7.0;
    // Image<float> meanfilter(7,7,"meanfilter_data");
    // meanfilter(x,y) = Expr(1.0f/(win*win));

    // RDom rMF(meanfilter);
    uint8_t win = 7;
    RDom rMF(0,win,0,win);

    Func st_filtered[6];
    for (uint8_t iPc=0; iPc<6; iPc++) {
    // iPc: index of product component
        // Apply average filter
        st_filtered[iPc](x,y,t) = sum(rMF,st_clamped(x + rMF.x,y + rMF.y,t)[iPc]/Expr(float(win*win)),"mean_filter");
        st_filtered[iPc].compute_root();
    }
    // Tuple st_tuple = Tuple(st_expr);

    // 4 debug
    // Func tmpOut("tmpOut"); tmpOut(x,y,t) = Tuple(st_filtered[0](x,y,t),st_filtered[1](x,y,t),st_filtered[2](x,y,t),st_filtered[3](x,y,t),st_filtered[4](x,y,t),st_filtered[5](x,y,t));
    // return tmpOut;

    Tuple pbx = Tuple(st_filtered[2](x,y,t),st_filtered[5](x,y,t),st_filtered[0](x,y,t));
    Tuple pby = Tuple(st_filtered[5](x,y,t),st_filtered[4](x,y,t),st_filtered[3](x,y,t));
    Tuple pbt = Tuple(st_filtered[0](x,y,t),st_filtered[3](x,y,t),st_filtered[1](x,y,t));

    Func pbxy("pbxy"); pbxy = cross(pby,pbx); pbxy.compute_root();
    Func pbxt("pbxt"); pbxt = cross(pbx,pbt); pbxt.compute_root();
    Func pbyt("pbyt"); pbyt = cross(pby,pbt); pbyt.compute_root();

    Func pbxyd("pbxyd"); pbxyd = dot(pby,pbx); pbxyd.compute_root();
    Func pbxtd("pbxtd"); pbxtd = dot(pbx,pbt); pbxtd.compute_root();
    Func pbytd("pbytd"); pbytd = dot(pby,pbt); pbytd.compute_root();

    // 4 debug
    // Func tmpOut("tmpOut"); tmpOut(x,y,t) = Tuple(pbxy(x,y,t)[0],pbxt(x,y,t)[0],pbyt(x,y,t)[0],pbxyd(x,y,t),pbxtd(x,y,t),pbytd(x,y,t));
    // return tmpOut;

    Func yt_xy("yt_xy"); yt_xy = dot(pbyt(x,y,t),pbxy(x,y,t)); yt_xy.compute_root();
    Func xt_yt("xt_yt"); xt_yt = dot(pbxt(x,y,t),pbyt(x,y,t)); xt_yt.compute_root();
    Func xt_xy("xt_xy"); xt_xy = dot(pbxt(x,y,t),pbxy(x,y,t)); xt_xy.compute_root();
    Func yt_yt("yt_yt"); yt_yt = dot(pbyt(x,y,t),pbyt(x,y,t)); yt_yt.compute_root();
    Func xt_xt("xt_xt"); xt_xt = dot(pbxt(x,y,t),pbxt(x,y,t)); xt_xt.compute_root();
    Func xy_xy("xy_xy"); xy_xy = dot(pbxy(x,y,t),pbxy(x,y,t)); xy_xy.compute_root();

    Tuple Tk_tuple = Tuple(Tk[0](x,y,t),Tk[1](x,y,t),Tk[2](x,y,t),
                           Tk[3](x,y,t),Tk[4](x,y,t));
    Func Tkd("Tkd"); Tkd = dot(Tk_tuple,Tk_tuple); Tkd.compute_root();

    // Expr Dimen = pbxyd/xy_xy;
    Expr kill(1.0f);

    Func Oxy; Oxy(x,y,t) = Mdefdiv(st_filtered[5](x,y,t) - Mdefdivang(yt_xy(x,y,t),yt_yt(x,y,t),pbxyd(x,y,t),divisionthreshold2)*st_filtered[3](x,y,t)*kill,st_filtered[4](x,y,t),divisionthreshold);
    Oxy.compute_root();

    Func Oyx; Oyx(x,y,t) = Mdefdiv(st_filtered[5](x,y,t) + Mdefdivang(xt_xy(x,y,t),xt_xt(x,y,t),pbxyd(x,y,t),divisionthreshold2)*st_filtered[0](x,y,t)*kill,st_filtered[2](x,y,t),divisionthreshold);
    Oyx.compute_root();

    Func C0; C0(x,y,t) = st_filtered[3](x,y,t) * Mdefdivang(Expr(-1.0f)*xt_yt(x,y,t),yt_yt(x,y,t),pbxyd(x,y,t),divisionthreshold2)*kill;
    C0.compute_root();

    Func M0; M0(x,y,t) = Mdefdiv(st_filtered[0](x,y,t) + C0(x,y,t), st_filtered[1](x,y,t)*pow(Mdefdivang(xt_yt(x,y,t),yt_yt(x,y,t),pbxyd(x,y,t),divisionthreshold2),Expr(2.0f)),divisionthreshold);
    M0.compute_root();

    Func C1; C1(x,y,t) = st_filtered[5](x,y,t) * Mdefdivang(Expr(-1.0f)*xt_xy(x,y,t),xy_xy(x,y,t),pbxyd(x,y,t),divisionthreshold2)*kill;
    C1.compute_root();

    Func P1; P1(x,y,t) = pow(Mdefdivang(xt_yt(x,y,t),xt_xt(x,y,t),pbxyd(x,y,t),divisionthreshold2),Expr(2.0f))*kill + 1.0f;
    P1.compute_root();

    // 4 debug
    // Func tmpOut("tmpOut"); tmpOut(x,y,t) = Tuple(Oxy(x,y,t),Oyx(x,y,t),C0(x,y,t),M0(x,y,t),C1(x,y,t),P1(x,y,t));
    // return tmpOut;


    Func Q1; Q1(x,y,t) = st_filtered[2](x,y,t) * (pow(Oyx(x,y,t),Expr(2.0f))+Expr(1.0f));
    Q1.compute_root();

    Func M1; M1(x,y,t) = Mdefdiv(((st_filtered[0](x,y,t)-C1(x,y,t))*P1(x,y,t)),Q1(x,y,t),divisionthreshold);
    M1.compute_root();

    Func C2; C2(x,y,t) = st_filtered[0](x,y,t) * Mdefdivang(Expr(-1.0f)*xt_yt(x,y,t),xt_xt(x,y,t),pbxyd(x,y,t),divisionthreshold2)*kill;
    C2.compute_root();

    Func M2; M2(x,y,t) = Mdefdiv(st_filtered[3](x,y,t)+C2(x,y,t),st_filtered[1](x,y,t)*(pow(Mdefdivang(xt_yt(x,y,t),xt_xt(x,y,t),pbxyd(x,y,t),divisionthreshold2),Expr(2.0f))*kill+Expr(1.0f)),divisionthreshold);
    M2.compute_root();

    Func C3; C3(x,y,t) = st_filtered[5](x,y,t) * Mdefdivang(yt_xy(x,y,t),xy_xy(x,y,t),pbxyd(x,y,t),divisionthreshold2)*kill;
    C3.compute_root();

    Func P3; P3(x,y,t) = pow(Mdefdivang(xt_yt(x,y,t),yt_yt(x,y,t),pbxyd(x,y,t),divisionthreshold2),Expr(2.0f))*kill + Expr(1.0f);
    P3.compute_root();

    Func Q3; Q3(x,y,t) = st_filtered[4](x,y,t) * (pow(Oxy(x,y,t),Expr(2.0f))+Expr(1.0f));
    Q3.compute_root();

    Func M3; M3(x,y,t) = Mdefdiv(((st_filtered[3](x,y,t)-C3(x,y,t))*P3(x,y,t)),Q3(x,y,t),divisionthreshold);
    M3.compute_root();

    Func basisAtAngle;
    basisAtAngle(x,y,t) = Tuple(M0(x,y,t),M1(x,y,t),M2(x,y,t),M3(x,y,t),Tkd(x,y,t));

    return basisAtAngle;

// Func hsv2rgb(Func colorImage) { // Took this function
//     Var x, y, c, t;
//     Func output;
//     output(x,y,c,t) = cast <float> (0.0f);
//     Expr fR, fG, fB; // R,G & B values
//     Expr fH = (colorImage(x,y,0,t)); //H value [0-360)
//     Expr fS = (colorImage(x,y,1,t)); //S value
//     Expr fV = (colorImage(x,y,2,t)); //V value

// //Conversion (I took the one on Wikipedia)
//     // https://fr.wikipedia.org/wiki/Teinte_Saturation_Valeur#Conversion_de_TSV_vers_RVB
//     Expr fHi = floor(fH / Expr(60.0f));
//     Expr fF = fH / 60.0f - fHi;
//     Expr fL = fV * (1 - fS);
//     Expr fM = fV * (1 - fF * fS) ;
//     Expr fN = fV * (1 - (1 - fF) * fS);

//     fR = select((0 == fHi),fV,
//                 (1 == fHi),fM,
//                 (2 == fHi),fL,
//                 (3 == fHi),fL,
//                 (4 == fHi),fN,
//                 (5 == fHi),fV,
//                 0.0f);

//     fG = select((0 == fHi),fN,
//                 (1 == fHi),fV,
//                 (2 == fHi),fV,
//                 (3 == fHi),fM,
//                 (4 == fHi),fL,
//                 (5 == fHi),fL,
//                 0.0f);

//     fB = select((0 == fHi),fL,
//                 (1 == fHi),fL,
//                 (2 == fHi),fN,
//                 (3 == fHi),fV,
//                 (4 == fHi),fV,
//                 (5 == fHi),fM,
//                 0.0f);

//     output(x,y,0,t) = fR;
//     output(x,y,1,t) = fG;
//     output(x,y,2,t) = fB;
//     return output;

// }

// Func angle2rgb (Func v) {
//     Var x, y, c, t;
//     Func ov, a;
//     ov(x,y,c,t) = cast <float> (0.0f);
//     Expr pi2(2*M_PI);
//     a(x,y,c,t) = v(x,y,c,t) / pi2;
//     ov(x,y,0,t) = a(x,y,c,t);
//     ov(x,y,1,t) = 1;
//     ov(x,y,2,t) = 1;
//     return ov;
// }

// Func outputvelocity(Func Blur, Func Speed, Func Angle, int border, Expr speedthreshold, Expr filterthreshold) {
//     extern Expr width;
//     extern Expr height;

//     Func Blur3, Speed3;
//     Blur3(x,y,c,t) = cast <float> (0.0f);
//     Speed3(x,y,c,t) = cast <float> (0.0f);

// //Scale the grey level images
//     Blur(x,y,0,t) = (Blur(x,y,0,t) - minimum(Blur(x,y,0,t))) / (maximum(Blur(x,y,0,t)) - minimum(Blur(x,y,0,t)));
//     //Concatenation along the third dimension
//     Blur3(x,y,0,t) = Blur(x,y,0,t);
//     Blur3(x,y,1,t) = Blur(x,y,0,t);
//     Blur3(x,y,2,t) = Blur(x,y,0,t);

// //Speed scaled to 1
//     //Concatenation along the third dimension
//     Speed3(x,y,1,t) = Speed(x,y,0,t);
//     Speed3(x,y,2,t) = Speed(x,y,0,t);

// //Use the log speed to visualise speed
//     Func LogSpeed;
//     LogSpeed(x,y,c,t) = fast_log(Speed3(x,y,c,t) + Expr(0.0000001f))/fast_log(Expr(10.0f));
//     LogSpeed(x,y,c,t) = (LogSpeed(x,y,c,t) - minimum(LogSpeed(x,y,c,t))) / (maximum(LogSpeed(x,y,c,t)) - minimum(LogSpeed(x,y,c,t)));

// //Make a colour image
//     // uint16_t rows = height;
//     // uint16_t cols = width;
//     // int depth = Angle.channels();

// //Do it the HSV way
//     Func colorImage;
//     colorImage(x,y,0,t) = Angle(x,y,0,t);

// //Do hsv to rgb
//     Func colorImage1;
//     colorImage1 = hsv2rgb(colorImage);

// // Assume the border equals to the size of spatial filter
// //Make the border
//     // int bir = rows + 2 * border;
//     // int bic = cols + 2 * border;
//     Expr orows = height / Expr(2);
//     Expr ocols = width / Expr(2);

// //Rotation matrix
//     int ph = 0;
//     Func mb, sb;
//    // if (rx < border - 1 || rx >= rows+border -1 || ry < border - 1 || ry >= cols+border - 1) {
//     Expr co1 = x - orows;
//     Expr co2 = - (y - ocols);
//     Expr cosPh(cos(ph));
//     Expr sinPh(sin(ph));
//     Expr rco1 = cosPh * co1 - sinPh * co2; //Using rotation matrix
//     Expr rco2 = sinPh * co1 + cosPh * co2;
//     // Expr justPi (M_PI);
//     mb(x,y,c,t) =
//         select (((x < (border - 1)) ||
//                   (x >= (height+border -1)) ||
//                   (y < (border - 1)) ||
//                   (y >= (width+border - 1))),
//                 atan2(rco1,rco2) + Expr(M_PI),mb(x,y,c,t));
//     sb(x,y,c,t) =
//          select (((x < (border - 1)) ||
//                   (x >= (height+border -1)) ||
//                   (y < (border - 1) ) ||
//                   (y >= (width+border - 1))),
//                   1, sb (x,y,c,t));

//     Func cb;
//     cb = angle2rgb(mb);

// //Get the old data
//     // Expr pi2(2*M_PI);
//     colorImage1(x,y,0,t)=colorImage(x,y,0,t) * Expr(2*M_PI);
//     colorImage1=angle2rgb(colorImage1);
//     colorImage1(x,y,c,t)=select(abs(Speed3(x,y,c,t))<speedthreshold,Expr(0.0f),colorImage1(x,y,c,t));
//     Func colorImage2;
//     colorImage2(x,y,c,t) = colorImage1(x,y,c,t) * Speed(x,y,c,t);

// //Put the data in the border
//     RDom bordx (border,rows + border);
//     RDom bordy (border,cols + border);
//     Func ang1, ang2;
//     ang1 (x,y,c,t) = cast <float> (0.0f);
//     ang2 (x,y,c,t) = cast <float> (0.0f);

//     cb(bordx, bordy,c,t) = colorImage1(x,y,c,t);
//     ang1 = cb;
//     cb(bordx, bordy,c,t) = colorImage2(x,y,c,t);
//     ang2 = cb;
//     sb(bordx, bordy,c,t) = Speed3(x,y,c,t);
//     Speed3 = sb;
//     sb(bordx, bordy,c,t) = Blur3(x,y,c,t);
//     Blur3 = sb;

//     // Func I;
//     // I (x,y,c,t) = Blur3(x,y,c,t) + Speed3(x,y - height,c,t) + ang1(x - width,y,c,t) + ang2(x - width,y - height,c,t);
//     //I = cat(2,cat(1,Blur,Speed),cat(1,ang1,ang2));
//     return I;
// }
}
コード例 #21
0
ファイル: NTTN.hpp プロジェクト: khalid-hasanov/Elemental
void Trr2kNTTN
( UpperOrLower uplo,
  Orientation orientationOfB, Orientation orientationOfC,
  T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B,
           const DistMatrix<T>& C, const DistMatrix<T>& D,
  T beta,        DistMatrix<T>& E )
{
#ifndef RELEASE
    CallStackEntry entry("internal::Trr2kNTTN");
    if( E.Height() != E.Width()  || A.Width()  != C.Height() ||
        A.Height() != E.Height() || C.Width()  != E.Height() ||
        B.Height() != E.Width()  || D.Width()  != E.Width()  ||
        A.Width()  != B.Width()  || C.Height() != D.Height() )
        LogicError("Nonconformal Trr2kNTTN");
#endif
    const Grid& g = E.Grid();

    DistMatrix<T> AL(g), AR(g),
                  A0(g), A1(g), A2(g);
    DistMatrix<T> BL(g), BR(g),
                  B0(g), B1(g), B2(g);

    DistMatrix<T> CT(g),  C0(g),
                  CB(g),  C1(g),
                          C2(g);
    DistMatrix<T> DT(g),  D0(g),
                  DB(g),  D1(g),
                          D2(g);

    DistMatrix<T,MC,  STAR> A1_MC_STAR(g);
    DistMatrix<T,VR,  STAR> B1_VR_STAR(g);
    DistMatrix<T,STAR,MR  > B1AdjOrTrans_STAR_MR(g);
    DistMatrix<T,STAR,MC  > C1_STAR_MC(g);
    DistMatrix<T,MR,  STAR> D1Trans_MR_STAR(g);

    A1_MC_STAR.AlignWith( E );
    B1_VR_STAR.AlignWith( E );
    B1AdjOrTrans_STAR_MR.AlignWith( E );
    C1_STAR_MC.AlignWith( E );
    D1Trans_MR_STAR.AlignWith( E );

    LockedPartitionRight( A, AL, AR, 0 );
    LockedPartitionRight( B, BL, BR, 0 );
    LockedPartitionDown
    ( C, CT,
         CB, 0 );
    LockedPartitionDown
    ( D, DT,
         DB, 0 );
    while( AL.Width() < A.Width() )
    {
        LockedRepartitionRight
        ( AL, /**/ AR,
          A0, /**/ A1, A2 );
        LockedRepartitionRight
        ( BL, /**/ BR,
          B0, /**/ B1, B2 );
        LockedRepartitionDown
        ( CT,  C0,
         /**/ /**/
               C1,
          CB,  C2 );
        LockedRepartitionDown
        ( DT,  D0,
         /**/ /**/
               D1,
          DB,  D2 );

        //--------------------------------------------------------------------//
        A1_MC_STAR = A1;
        C1_STAR_MC = C1;
        B1_VR_STAR = B1;
        if( orientationOfB == ADJOINT )
            B1AdjOrTrans_STAR_MR.AdjointFrom( B1_VR_STAR );
        else
            B1AdjOrTrans_STAR_MR.TransposeFrom( B1_VR_STAR );
        D1Trans_MR_STAR.TransposeFrom( D1 );
        LocalTrr2k 
        ( uplo, orientationOfC, TRANSPOSE,
          alpha, A1_MC_STAR, B1AdjOrTrans_STAR_MR,
                 C1_STAR_MC, D1Trans_MR_STAR,
          beta,  E );
        //--------------------------------------------------------------------//

        SlideLockedPartitionRight
        ( AL,     /**/ AR,
          A0, A1, /**/ A2 );
        SlideLockedPartitionRight
        ( BL,     /**/ BR,
          B0, B1, /**/ B2 );
        SlideLockedPartitionDown
        ( CT,  C0,
               C1,
         /**/ /**/
          CB,  C2 );
        SlideLockedPartitionDown
        ( DT,  D0,
               D1,
         /**/ /**/
          DB,  D2 );
    }
}
コード例 #22
0
ファイル: RU.hpp プロジェクト: certik/Elemental
inline void
HemmRUC
( T alpha, const DistMatrix<T>& A,
           const DistMatrix<T>& B,
  T beta,        DistMatrix<T>& C )
{
#ifndef RELEASE
    PushCallStack("internal::HemmRUC");
    if( A.Grid() != B.Grid() || B.Grid() != C.Grid() )
        throw std::logic_error("{A,B,C} must be distributed on the same grid");
#endif
    const Grid& g = A.Grid();

    // Matrix views
    DistMatrix<T> 
        ATL(g), ATR(g),  A00(g), A01(g), A02(g),  AColPan(g),
        ABL(g), ABR(g),  A10(g), A11(g), A12(g),  ARowPan(g),
                         A20(g), A21(g), A22(g);
    DistMatrix<T> BL(g), BR(g),
                  B0(g), B1(g), B2(g);
    DistMatrix<T> CL(g), CR(g),
                  C0(g), C1(g), C2(g),
                  CLeft(g), CRight(g);

    // Temporary distributions
    DistMatrix<T,MC,STAR> B1_MC_STAR(g);
    DistMatrix<T,VR,  STAR> AColPan_VR_STAR(g);
    DistMatrix<T,STAR,MR  > AColPanAdj_STAR_MR(g);
    DistMatrix<T,MR,  STAR> ARowPanAdj_MR_STAR(g);

    B1_MC_STAR.AlignWith( C );

    // Start the algorithm
    Scale( beta, C );
    LockedPartitionDownDiagonal
    ( A, ATL, ATR,
         ABL, ABR, 0 );
    LockedPartitionRight( B, BL, BR, 0 );
    PartitionRight( C, CL, CR, 0 );
    while( CR.Width() > 0 )
    {
        LockedRepartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, /**/ A01, A02,
         /*************/ /******************/
               /**/       A10, /**/ A11, A12,
          ABL, /**/ ABR,  A20, /**/ A21, A22 );

        LockedRepartitionRight
        ( BL, /**/ BR,
          B0, /**/ B1, B2 );

        RepartitionRight
        ( CL, /**/ CR,
          C0, /**/ C1, C2 );

        ARowPan.LockedView1x2( A11, A12 );
        AColPan.LockedView2x1
        ( A01,
          A11 );

        CLeft.View1x2( C0, C1 );
        CRight.View1x2( C1, C2 );

        AColPan_VR_STAR.AlignWith( CLeft );
        AColPanAdj_STAR_MR.AlignWith( CLeft );
        ARowPanAdj_MR_STAR.AlignWith( CRight );
        //--------------------------------------------------------------------//
        B1_MC_STAR = B1;

        AColPan_VR_STAR = AColPan;
        AColPanAdj_STAR_MR.AdjointFrom( AColPan_VR_STAR );
        ARowPanAdj_MR_STAR.AdjointFrom( ARowPan );
        MakeTrapezoidal( LEFT,  LOWER,  0, ARowPanAdj_MR_STAR );
        MakeTrapezoidal( RIGHT, LOWER, -1, AColPanAdj_STAR_MR );

        LocalGemm
        ( NORMAL, ADJOINT, 
          alpha, B1_MC_STAR, ARowPanAdj_MR_STAR, T(1), CRight );

        LocalGemm
        ( NORMAL, NORMAL,
          alpha, B1_MC_STAR, AColPanAdj_STAR_MR, T(1), CLeft );
        //--------------------------------------------------------------------//
        AColPan_VR_STAR.FreeAlignments();
        AColPanAdj_STAR_MR.FreeAlignments();
        ARowPanAdj_MR_STAR.FreeAlignments();

        SlideLockedPartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, A01, /**/ A02,
               /**/       A10, A11, /**/ A12,
         /*************/ /******************/
          ABL, /**/ ABR,  A20, A21, /**/ A22 );

        SlideLockedPartitionRight
        ( BL,     /**/ BR,
          B0, B1, /**/ B2 );

        SlidePartitionRight
        ( CL,     /**/ CR,
          C0, C1, /**/ C2 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
コード例 #23
0
ファイル: TTTT.hpp プロジェクト: jimgoo/Elemental
inline void
Trr2kTTTT
( UpperOrLower uplo,
  Orientation orientationOfA, Orientation orientationOfB,
  Orientation orientationOfC, Orientation orientationOfD, 
  T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B,
           const DistMatrix<T>& C, const DistMatrix<T>& D,
  T beta,        DistMatrix<T>& E )
{
#ifndef RELEASE
    PushCallStack("internal::Trr2kTTTT");
    if( E.Height() != E.Width()  || A.Height() != C.Height() ||
        A.Width()  != E.Height() || C.Width()  != E.Height() ||
        B.Height() != E.Width()  || D.Height() != E.Width()  ||
        A.Height() != B.Width()  || C.Height() != D.Width() )
        throw std::logic_error("Nonconformal Trr2kTTTT");
#endif
    const Grid& g = E.Grid();

    DistMatrix<T> AT(g),  A0(g),
                  AB(g),  A1(g),
                          A2(g);
    DistMatrix<T> BL(g), BR(g),
                  B0(g), B1(g), B2(g);

    DistMatrix<T> CT(g),  C0(g),
                  CB(g),  C1(g),
                          C2(g);
    DistMatrix<T> DL(g), DR(g),
                  D0(g), D1(g), D2(g);

    DistMatrix<T,STAR,MC  > A1_STAR_MC(g);
    DistMatrix<T,VR,  STAR> B1_VR_STAR(g);
    DistMatrix<T,STAR,MR  > B1AdjOrTrans_STAR_MR(g);
    DistMatrix<T,STAR,MC  > C1_STAR_MC(g);
    DistMatrix<T,VR,  STAR> D1_VR_STAR(g);
    DistMatrix<T,STAR,MR  > D1AdjOrTrans_STAR_MR(g);

    A1_STAR_MC.AlignWith( E );
    B1_VR_STAR.AlignWith( E );
    B1AdjOrTrans_STAR_MR.AlignWith( E );
    C1_STAR_MC.AlignWith( E );
    D1_VR_STAR.AlignWith( E );
    D1AdjOrTrans_STAR_MR.AlignWith( E );

    LockedPartitionDown
    ( A, AT,
         AB, 0 );
    LockedPartitionRight( B, BL, BR, 0 );
    LockedPartitionDown
    ( C, CT,
         CB, 0 );
    LockedPartitionRight( D, DL, DR, 0 );
    while( AT.Height() < A.Height() )
    {
        LockedRepartitionDown
        ( AT,  A0,
         /**/ /**/
               A1,
          AB,  A2 );
        LockedRepartitionRight
        ( BL, /**/ BR,
          B0, /**/ B1, B2 );
        LockedRepartitionDown
        ( CT,  C0,
         /**/ /**/
               C1,
          CB,  C2 );
        LockedRepartitionRight
        ( DL, /**/ DR,
          D0, /**/ D1, D2 );

        //--------------------------------------------------------------------//
        A1_STAR_MC = A1;
        C1_STAR_MC = C1;
        B1_VR_STAR = B1;
        D1_VR_STAR = D1;
        if( orientationOfB == ADJOINT )
            B1AdjOrTrans_STAR_MR.AdjointFrom( B1_VR_STAR );
        else
            B1AdjOrTrans_STAR_MR.TransposeFrom( B1_VR_STAR );
        if( orientationOfD == ADJOINT )
            D1AdjOrTrans_STAR_MR.AdjointFrom( D1_VR_STAR );
        else
            D1AdjOrTrans_STAR_MR.TransposeFrom( D1_VR_STAR );
        LocalTrr2k
        ( uplo, orientationOfA, orientationOfC,
          alpha, A1_STAR_MC, B1AdjOrTrans_STAR_MR,
                 C1_STAR_MC, D1AdjOrTrans_STAR_MR,
          beta,  E );
        //--------------------------------------------------------------------//

        SlideLockedPartitionRight
        ( DL,     /**/ DR,
          D0, D1, /**/ D2 );
        SlideLockedPartitionDown
        ( CT,  C0,
               C1,
         /**/ /**/
          CB,  C2 );
        SlideLockedPartitionRight
        ( BL,     /**/ BR,
          B0, B1, /**/ B2 );
        SlideLockedPartitionDown
        ( AT,  A0,
               A1,
         /**/ /**/
          AB,  A2 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
コード例 #24
0
ファイル: RU.hpp プロジェクト: certik/Elemental
inline void
HemmRUA
( T alpha, const DistMatrix<T>& A,
           const DistMatrix<T>& B,
  T beta,        DistMatrix<T>& C )
{
#ifndef RELEASE
    PushCallStack("internal::HemmRUA");
    if( A.Grid() != B.Grid() || B.Grid() != C.Grid() )
        throw std::logic_error
        ("{A,B,C} must be distributed over the same grid");
#endif
    const Grid& g = A.Grid();

    DistMatrix<T>
        BT(g),  B0(g),
        BB(g),  B1(g),
                B2(g);
    DistMatrix<T>
        CT(g),  C0(g),
        CB(g),  C1(g),
                C2(g);

    DistMatrix<T,MR,  STAR> B1Adj_MR_STAR(g);
    DistMatrix<T,VC,  STAR> B1Adj_VC_STAR(g);
    DistMatrix<T,STAR,MC  > B1_STAR_MC(g);
    DistMatrix<T,MC,  STAR> Z1Adj_MC_STAR(g);
    DistMatrix<T,MR,  STAR> Z1Adj_MR_STAR(g);
    DistMatrix<T,MR,  MC  > Z1Adj_MR_MC(g);
    DistMatrix<T> Z1Adj(g);

    B1Adj_MR_STAR.AlignWith( A );
    B1Adj_VC_STAR.AlignWith( A );
    B1_STAR_MC.AlignWith( A );
    Z1Adj_MC_STAR.AlignWith( A );
    Z1Adj_MR_STAR.AlignWith( A );

    Matrix<T> Z1Local;

    Scale( beta, C );
    LockedPartitionDown
    ( B, BT,
         BB, 0 );
    PartitionDown
    ( C, CT,
         CB, 0 );
    while( CT.Height() < C.Height() )
    {
        LockedRepartitionDown
        ( BT,  B0, 
         /**/ /**/
               B1,
          BB,  B2 );

        RepartitionDown
        ( CT,  C0,
         /**/ /**/
               C1,
          CB,  C2 );

        Z1Adj_MR_MC.AlignWith( C1 );
        Zeros( C1.Width(), C1.Height(), Z1Adj_MC_STAR );
        Zeros( C1.Width(), C1.Height(), Z1Adj_MR_STAR );
        //--------------------------------------------------------------------//
        B1Adj_MR_STAR.AdjointFrom( B1 );
        B1Adj_VC_STAR = B1Adj_MR_STAR;
        B1_STAR_MC.AdjointFrom( B1Adj_VC_STAR );
        LocalSymmetricAccumulateRU
        ( ADJOINT, alpha, A, B1_STAR_MC, B1Adj_MR_STAR, 
          Z1Adj_MC_STAR, Z1Adj_MR_STAR );

        Z1Adj.SumScatterFrom( Z1Adj_MC_STAR );
        Z1Adj_MR_MC = Z1Adj;
        Z1Adj_MR_MC.SumScatterUpdate( T(1), Z1Adj_MR_STAR );
        Adjoint( Z1Adj_MR_MC.LockedLocalMatrix(), Z1Local );
        Axpy( T(1), Z1Local, C1.LocalMatrix() );
        //--------------------------------------------------------------------//
        Z1Adj_MR_MC.FreeAlignments();

        SlideLockedPartitionDown
        ( BT,  B0,
               B1,
         /**/ /**/
          BB,  B2 );

        SlidePartitionDown
        ( CT,  C0,
               C1,
         /**/ /**/
          CB,  C2 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
コード例 #25
0
ファイル: NN.hpp プロジェクト: jimgoo/Elemental
inline void
GemmNNA
( T alpha, const DistMatrix<T>& A,
           const DistMatrix<T>& B,
  T beta,        DistMatrix<T>& C )
{
#ifndef RELEASE
    PushCallStack("internal::GemmNNA");
    if( A.Grid() != B.Grid() || B.Grid() != C.Grid() )
        throw std::logic_error
        ("{A,B,C} must be distributed over the same grid");
    if( A.Height() != C.Height() ||
        B.Width()  != C.Width()  ||
        A.Width()  != B.Height() )
    {
        std::ostringstream msg;
        msg << "Nonconformal GemmNNA: \n"
            << "  A ~ " << A.Height() << " x " << A.Width() << "\n"
            << "  B ~ " << B.Height() << " x " << B.Width() << "\n"
            << "  C ~ " << C.Height() << " x " << C.Width() << "\n";
        throw std::logic_error( msg.str().c_str() );
    }
#endif
    const Grid& g = A.Grid();

    // Matrix views
    DistMatrix<T> BL(g), BR(g),
                  B0(g), B1(g), B2(g);
    DistMatrix<T> CL(g), CR(g),
                  C0(g), C1(g), C2(g);

    // Temporary distributions
    DistMatrix<T,VR,STAR> B1_VR_STAR(g);
    DistMatrix<T,STAR,MR> B1Trans_STAR_MR(g);
    DistMatrix<T,MC,STAR> D1_MC_STAR(g);

    B1_VR_STAR.AlignWith( A );
    B1Trans_STAR_MR.AlignWith( A );
    D1_MC_STAR.AlignWith( A );

    // Start the algorithm
    Scale( beta, C );
    LockedPartitionRight( B, BL, BR, 0 );
    PartitionRight( C, CL, CR, 0 );
    while( BR.Width() > 0 )
    {
        LockedRepartitionRight
        ( BL, /**/     BR,
          B0, /**/ B1, B2 );

        RepartitionRight
        ( CL, /**/     CR,
          C0, /**/ C1, C2 );

        Zeros( C1.Height(), C1.Width(), D1_MC_STAR );
        //--------------------------------------------------------------------//
        B1_VR_STAR = B1;
        B1Trans_STAR_MR.TransposeFrom( B1_VR_STAR );

        // D1[MC,*] := alpha A[MC,MR] B1[MR,*]
        LocalGemm
        ( NORMAL, TRANSPOSE, alpha, A, B1Trans_STAR_MR, T(0), D1_MC_STAR );

        // C1[MC,MR] += scattered result of D1[MC,*] summed over grid rows
        C1.SumScatterUpdate( T(1), D1_MC_STAR );
        //--------------------------------------------------------------------//

        SlideLockedPartitionRight
        ( BL,     /**/ BR,
          B0, B1, /**/ B2 );

        SlidePartitionRight
        ( CL,     /**/ CR,
          C0, C1, /**/ C2 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
コード例 #26
0
ファイル: NN.hpp プロジェクト: jimgoo/Elemental
inline void 
GemmNNDot
( T alpha, const DistMatrix<T>& A,
           const DistMatrix<T>& B,
  T beta,        DistMatrix<T>& C )
{
#ifndef RELEASE
    PushCallStack("internal::GemmNNDot");
    if( A.Grid() != B.Grid() || B.Grid() != C.Grid() )
        throw std::logic_error
        ("{A,B,C} must be distributed over the same grid");
    if( A.Height() != C.Height() ||
        B.Width()  != C.Width()  ||
        A.Width()  != B.Height() )
    {
        std::ostringstream msg;
        msg << "Nonconformal GemmNNDot: \n"
            << "  A ~ " << A.Height() << " x " << A.Width() << "\n"
            << "  B ~ " << B.Height() << " x " << B.Width() << "\n"
            << "  C ~ " << C.Height() << " x " << C.Width() << "\n";
        throw std::logic_error( msg.str().c_str() );
    }
#endif
    const Grid& g = A.Grid();

    if( A.Height() > B.Width() )
    {
        // Matrix views
        DistMatrix<T> AT(g), AB(g),
                      A0(g), A1(g), A2(g);         
        DistMatrix<T> BL(g),  B0(g),
                      BR(g),  B1(g),
                              B2(g);
        DistMatrix<T> CT(g), C0(g), C1L(g), C1R(g),
                      CB(g), C1(g), C10(g), C11(g), C12(g),
                             C2(g);

        // Temporary distributions
        DistMatrix<T,STAR,VC> A1_STAR_VC(g);
        DistMatrix<T,VC,STAR> B1_VC_STAR(g);
        DistMatrix<T,STAR,STAR> C11_STAR_STAR(g);

        // Star the algorithm
        Scale( beta, C );
        LockedPartitionDown
        ( A, AT,
             AB, 0 );
        PartitionDown
        ( C, CT,
             CB, 0 );
        while( AB.Height() > 0 )
        {
            LockedRepartitionDown
            ( AT,  A0,
             /**/ /**/
                   A1,
              AB,  A2 );

            RepartitionDown
            ( CT,  C0,
             /**/ /**/
                   C1,
              CB,  C2 );

            A1_STAR_VC = A1; 
            B1_VC_STAR.AlignWith( A1_STAR_VC );

            LockedPartitionRight( B, BL, BR, 0 );
            PartitionRight( C1, C1L, C1R, 0 );
            while( BR.Width() > 0 )
            {
                LockedRepartitionRight
                ( BL, /**/ BR,
                  B0, /**/ B1, B2 );

                RepartitionRight
                ( C1L, /**/ C1R,
                  C10, /**/ C11, C12 );

                Zeros( C11.Height(), C11.Width(), C11_STAR_STAR );
                //------------------------------------------------------------//
                B1_VC_STAR = B1;
                LocalGemm
                ( NORMAL, NORMAL, 
                  alpha, A1_STAR_VC, B1_VC_STAR, T(0), C11_STAR_STAR );
                C11.SumScatterUpdate( T(1), C11_STAR_STAR );
                //------------------------------------------------------------//

                SlideLockedPartitionRight
                ( BL,     /**/ BR,
                  B0, B1, /**/ B2 );

                SlidePartitionRight
                ( C1L,      /**/ C1R,
                  C10, C11, /**/ C12 );
            }
            B1_VC_STAR.FreeAlignments();

            SlideLockedPartitionDown
            ( AT,  A0,
                   A1,
             /**/ /**/
              AB,  A2 );

            SlidePartitionDown
            ( CT,  C0,
                   C1,
             /**/ /**/
              CB,  C2 );
        }
    }
    else
    {
        // Matrix views
        DistMatrix<T> AT(g), AB(g),
                      A0(g), A1(g), A2(g);         
        DistMatrix<T> BL(g),  B0(g),
                      BR(g),  B1(g),
                              B2(g);
        DistMatrix<T> 
            CL(g), CR(g),         C1T(g),  C01(g),
            C0(g), C1(g), C2(g),  C1B(g),  C11(g),
                                           C21(g);

        // Temporary distributions
        DistMatrix<T,STAR,VR> A1_STAR_VR(g);
        DistMatrix<T,VR,STAR> B1_VR_STAR(g);
        DistMatrix<T,STAR,STAR> C11_STAR_STAR(g);

        // Star the algorithm
        Scale( beta, C );
        LockedPartitionRight( B, BL, BR, 0 );
        PartitionRight( C, CL, CR, 0 );
        while( BR.Width() > 0 )
        {
            LockedRepartitionRight
            ( BL, /**/ BR,
              B0, /**/ B1, B2 );

            RepartitionRight
            ( CL, /**/ CR,
              C0, /**/ C1, C2 );

            B1_VR_STAR = B1;
            A1_STAR_VR.AlignWith( B1_VR_STAR );

            LockedPartitionDown
            ( A, AT,
                 AB, 0 );
            PartitionDown
            ( C1, C1T,
                  C1B, 0 );
            while( AB.Height() > 0 )
            {
                LockedRepartitionDown
                ( AT,  A0,
                 /**/ /**/
                       A1,
                  AB,  A2 );

                RepartitionDown
                ( C1T,  C01,
                 /***/ /***/
                        C11,
                  C1B,  C21 );

                Zeros( C11.Height(), C11.Width(), C11_STAR_STAR );
                //------------------------------------------------------------//
                A1_STAR_VR = A1;
                LocalGemm
                ( NORMAL, NORMAL, 
                  alpha, A1_STAR_VR, B1_VR_STAR, T(0), C11_STAR_STAR );
                C11.SumScatterUpdate( T(1), C11_STAR_STAR );
                //------------------------------------------------------------//

                SlideLockedPartitionDown
                ( AT,  A0,
                       A1,
                 /**/ /**/
                  AB,  A2 );

                SlidePartitionDown
                ( C1T,  C01,
                        C11,
                 /***/ /***/
                  C1B,  C21 );
            }
            A1_STAR_VR.FreeAlignments();

            SlideLockedPartitionRight
            ( BL,     /**/ BR,
              B0, B1, /**/ B2 ); 

            SlidePartitionRight
            ( CL,     /**/ CR,
              C0, C1, /**/ C2 );
        }
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
コード例 #27
0
ファイル: wp_block.c プロジェクト: NSGod/openbsd
void whirlpool_block(WHIRLPOOL_CTX *ctx,const void *inp,size_t n)
	{
	int	r;
	const u8 *p=inp;
	union	{ u64 q[8]; u8 c[64]; } S,K,*H=(void *)ctx->H.q;

#ifdef GO_FOR_MMX
	GO_FOR_MMX(ctx,inp,n);
#endif
							do {
#ifdef OPENSSL_SMALL_FOOTPRINT
	u64	L[8];
	int	i;

	for (i=0;i<64;i++)	S.c[i] = (K.c[i] = H->c[i]) ^ p[i];
	for (r=0;r<ROUNDS;r++)
		{
		for (i=0;i<8;i++)
			{
			L[i]  = i ? 0 : RC[r];
			L[i] ^=	C0(K,i)       ^ C1(K,(i-1)&7) ^
				C2(K,(i-2)&7) ^ C3(K,(i-3)&7) ^
				C4(K,(i-4)&7) ^ C5(K,(i-5)&7) ^
				C6(K,(i-6)&7) ^ C7(K,(i-7)&7);
			}
		memcpy (K.q,L,64);
		for (i=0;i<8;i++)
			{
			L[i] ^= C0(S,i)       ^ C1(S,(i-1)&7) ^
				C2(S,(i-2)&7) ^ C3(S,(i-3)&7) ^
				C4(S,(i-4)&7) ^ C5(S,(i-5)&7) ^
				C6(S,(i-6)&7) ^ C7(S,(i-7)&7);
			}
		memcpy (S.q,L,64);
		}
	for (i=0;i<64;i++)	H->c[i] ^= S.c[i] ^ p[i];
#else
	u64	L0,L1,L2,L3,L4,L5,L6,L7;

#ifdef __STRICT_ALIGNMENT
	if ((size_t)p & 7)
		{
		memcpy (S.c,p,64);
		S.q[0] ^= (K.q[0] = H->q[0]);
		S.q[1] ^= (K.q[1] = H->q[1]);
		S.q[2] ^= (K.q[2] = H->q[2]);
		S.q[3] ^= (K.q[3] = H->q[3]);
		S.q[4] ^= (K.q[4] = H->q[4]);
		S.q[5] ^= (K.q[5] = H->q[5]);
		S.q[6] ^= (K.q[6] = H->q[6]);
		S.q[7] ^= (K.q[7] = H->q[7]);
		}
	else
#endif
		{
		const u64 *pa = (const u64*)p;
		S.q[0] = (K.q[0] = H->q[0]) ^ pa[0];
		S.q[1] = (K.q[1] = H->q[1]) ^ pa[1];
		S.q[2] = (K.q[2] = H->q[2]) ^ pa[2];
		S.q[3] = (K.q[3] = H->q[3]) ^ pa[3];
		S.q[4] = (K.q[4] = H->q[4]) ^ pa[4];
		S.q[5] = (K.q[5] = H->q[5]) ^ pa[5];
		S.q[6] = (K.q[6] = H->q[6]) ^ pa[6];
		S.q[7] = (K.q[7] = H->q[7]) ^ pa[7];
		}

	for(r=0;r<ROUNDS;r++)
		{
#ifdef SMALL_REGISTER_BANK
		L0 =	C0(K,0) ^ C1(K,7) ^ C2(K,6) ^ C3(K,5) ^
			C4(K,4) ^ C5(K,3) ^ C6(K,2) ^ C7(K,1) ^ RC[r];
		L1 =	C0(K,1) ^ C1(K,0) ^ C2(K,7) ^ C3(K,6) ^
			C4(K,5) ^ C5(K,4) ^ C6(K,3) ^ C7(K,2);
		L2 =	C0(K,2) ^ C1(K,1) ^ C2(K,0) ^ C3(K,7) ^
			C4(K,6) ^ C5(K,5) ^ C6(K,4) ^ C7(K,3);
		L3 =	C0(K,3) ^ C1(K,2) ^ C2(K,1) ^ C3(K,0) ^
			C4(K,7) ^ C5(K,6) ^ C6(K,5) ^ C7(K,4);
		L4 =	C0(K,4) ^ C1(K,3) ^ C2(K,2) ^ C3(K,1) ^
			C4(K,0) ^ C5(K,7) ^ C6(K,6) ^ C7(K,5);
		L5 =	C0(K,5) ^ C1(K,4) ^ C2(K,3) ^ C3(K,2) ^
			C4(K,1) ^ C5(K,0) ^ C6(K,7) ^ C7(K,6);
		L6 =	C0(K,6) ^ C1(K,5) ^ C2(K,4) ^ C3(K,3) ^
			C4(K,2) ^ C5(K,1) ^ C6(K,0) ^ C7(K,7);
		L7 =	C0(K,7) ^ C1(K,6) ^ C2(K,5) ^ C3(K,4) ^
			C4(K,3) ^ C5(K,2) ^ C6(K,1) ^ C7(K,0);

		K.q[0] = L0; K.q[1] = L1; K.q[2] = L2; K.q[3] = L3;
		K.q[4] = L4; K.q[5] = L5; K.q[6] = L6; K.q[7] = L7;

		L0 ^=	C0(S,0) ^ C1(S,7) ^ C2(S,6) ^ C3(S,5) ^
			C4(S,4) ^ C5(S,3) ^ C6(S,2) ^ C7(S,1);
		L1 ^=	C0(S,1) ^ C1(S,0) ^ C2(S,7) ^ C3(S,6) ^
			C4(S,5) ^ C5(S,4) ^ C6(S,3) ^ C7(S,2);
		L2 ^=	C0(S,2) ^ C1(S,1) ^ C2(S,0) ^ C3(S,7) ^
			C4(S,6) ^ C5(S,5) ^ C6(S,4) ^ C7(S,3);
		L3 ^=	C0(S,3) ^ C1(S,2) ^ C2(S,1) ^ C3(S,0) ^
			C4(S,7) ^ C5(S,6) ^ C6(S,5) ^ C7(S,4);
		L4 ^=	C0(S,4) ^ C1(S,3) ^ C2(S,2) ^ C3(S,1) ^
			C4(S,0) ^ C5(S,7) ^ C6(S,6) ^ C7(S,5);
		L5 ^=	C0(S,5) ^ C1(S,4) ^ C2(S,3) ^ C3(S,2) ^
			C4(S,1) ^ C5(S,0) ^ C6(S,7) ^ C7(S,6);
		L6 ^=	C0(S,6) ^ C1(S,5) ^ C2(S,4) ^ C3(S,3) ^
			C4(S,2) ^ C5(S,1) ^ C6(S,0) ^ C7(S,7);
		L7 ^=	C0(S,7) ^ C1(S,6) ^ C2(S,5) ^ C3(S,4) ^
			C4(S,3) ^ C5(S,2) ^ C6(S,1) ^ C7(S,0);

		S.q[0] = L0; S.q[1] = L1; S.q[2] = L2; S.q[3] = L3;
		S.q[4] = L4; S.q[5] = L5; S.q[6] = L6; S.q[7] = L7;
#else
		L0  = C0(K,0); L1  = C1(K,0); L2  = C2(K,0); L3  = C3(K,0);
		L4  = C4(K,0); L5  = C5(K,0); L6  = C6(K,0); L7  = C7(K,0);
		L0 ^= RC[r];

		L1 ^= C0(K,1); L2 ^= C1(K,1); L3 ^= C2(K,1); L4 ^= C3(K,1);
		L5 ^= C4(K,1); L6 ^= C5(K,1); L7 ^= C6(K,1); L0 ^= C7(K,1);

		L2 ^= C0(K,2); L3 ^= C1(K,2); L4 ^= C2(K,2); L5 ^= C3(K,2);
		L6 ^= C4(K,2); L7 ^= C5(K,2); L0 ^= C6(K,2); L1 ^= C7(K,2);

		L3 ^= C0(K,3); L4 ^= C1(K,3); L5 ^= C2(K,3); L6 ^= C3(K,3);
		L7 ^= C4(K,3); L0 ^= C5(K,3); L1 ^= C6(K,3); L2 ^= C7(K,3);

		L4 ^= C0(K,4); L5 ^= C1(K,4); L6 ^= C2(K,4); L7 ^= C3(K,4);
		L0 ^= C4(K,4); L1 ^= C5(K,4); L2 ^= C6(K,4); L3 ^= C7(K,4);

		L5 ^= C0(K,5); L6 ^= C1(K,5); L7 ^= C2(K,5); L0 ^= C3(K,5);
		L1 ^= C4(K,5); L2 ^= C5(K,5); L3 ^= C6(K,5); L4 ^= C7(K,5);

		L6 ^= C0(K,6); L7 ^= C1(K,6); L0 ^= C2(K,6); L1 ^= C3(K,6);
		L2 ^= C4(K,6); L3 ^= C5(K,6); L4 ^= C6(K,6); L5 ^= C7(K,6);

		L7 ^= C0(K,7); L0 ^= C1(K,7); L1 ^= C2(K,7); L2 ^= C3(K,7);
		L3 ^= C4(K,7); L4 ^= C5(K,7); L5 ^= C6(K,7); L6 ^= C7(K,7);

		K.q[0] = L0; K.q[1] = L1; K.q[2] = L2; K.q[3] = L3;
		K.q[4] = L4; K.q[5] = L5; K.q[6] = L6; K.q[7] = L7;

		L0 ^= C0(S,0); L1 ^= C1(S,0); L2 ^= C2(S,0); L3 ^= C3(S,0);
		L4 ^= C4(S,0); L5 ^= C5(S,0); L6 ^= C6(S,0); L7 ^= C7(S,0);

		L1 ^= C0(S,1); L2 ^= C1(S,1); L3 ^= C2(S,1); L4 ^= C3(S,1);
		L5 ^= C4(S,1); L6 ^= C5(S,1); L7 ^= C6(S,1); L0 ^= C7(S,1);

		L2 ^= C0(S,2); L3 ^= C1(S,2); L4 ^= C2(S,2); L5 ^= C3(S,2);
		L6 ^= C4(S,2); L7 ^= C5(S,2); L0 ^= C6(S,2); L1 ^= C7(S,2);

		L3 ^= C0(S,3); L4 ^= C1(S,3); L5 ^= C2(S,3); L6 ^= C3(S,3);
		L7 ^= C4(S,3); L0 ^= C5(S,3); L1 ^= C6(S,3); L2 ^= C7(S,3);

		L4 ^= C0(S,4); L5 ^= C1(S,4); L6 ^= C2(S,4); L7 ^= C3(S,4);
		L0 ^= C4(S,4); L1 ^= C5(S,4); L2 ^= C6(S,4); L3 ^= C7(S,4);

		L5 ^= C0(S,5); L6 ^= C1(S,5); L7 ^= C2(S,5); L0 ^= C3(S,5);
		L1 ^= C4(S,5); L2 ^= C5(S,5); L3 ^= C6(S,5); L4 ^= C7(S,5);

		L6 ^= C0(S,6); L7 ^= C1(S,6); L0 ^= C2(S,6); L1 ^= C3(S,6);
		L2 ^= C4(S,6); L3 ^= C5(S,6); L4 ^= C6(S,6); L5 ^= C7(S,6);

		L7 ^= C0(S,7); L0 ^= C1(S,7); L1 ^= C2(S,7); L2 ^= C3(S,7);
		L3 ^= C4(S,7); L4 ^= C5(S,7); L5 ^= C6(S,7); L6 ^= C7(S,7);

		S.q[0] = L0; S.q[1] = L1; S.q[2] = L2; S.q[3] = L3;
		S.q[4] = L4; S.q[5] = L5; S.q[6] = L6; S.q[7] = L7;
#endif
		}

#ifdef __STRICT_ALIGNMENT
	if ((size_t)p & 7)
		{
		int i;
		for(i=0;i<64;i++)	H->c[i] ^= S.c[i] ^ p[i];
		}
	else
#endif
		{
		const u64 *pa=(const u64 *)p;
		H->q[0] ^= S.q[0] ^ pa[0];
		H->q[1] ^= S.q[1] ^ pa[1];
		H->q[2] ^= S.q[2] ^ pa[2];
		H->q[3] ^= S.q[3] ^ pa[3];
		H->q[4] ^= S.q[4] ^ pa[4];
		H->q[5] ^= S.q[5] ^ pa[5];
		H->q[6] ^= S.q[6] ^ pa[6];
		H->q[7] ^= S.q[7] ^ pa[7];
		}
#endif
							p += 64;
							} while(--n);
	}
コード例 #28
0
ファイル: NN.hpp プロジェクト: jimgoo/Elemental
inline void 
GemmNNB
( T alpha, const DistMatrix<T>& A,
           const DistMatrix<T>& B,
  T beta,        DistMatrix<T>& C )
{
#ifndef RELEASE
    PushCallStack("internal::GemmNNB");
    if( A.Grid() != B.Grid() || B.Grid() != C.Grid() )
        throw std::logic_error
        ("{A,B,C} must be distributed over the same grid");
    if( A.Height() != C.Height() ||
        B.Width()  != C.Width()  ||
        A.Width()  != B.Height() )
    {
        std::ostringstream msg;
        msg << "Nonconformal GemmNNB: \n"
            << "  A ~ " << A.Height() << " x " << A.Width() << "\n"
            << "  B ~ " << B.Height() << " x " << B.Width() << "\n"
            << "  C ~ " << C.Height() << " x " << C.Width() << "\n";
        throw std::logic_error( msg.str().c_str() );
    }
#endif
    const Grid& g = A.Grid();

    // Matrix views
    DistMatrix<T> AT(g),  A0(g),
                  AB(g),  A1(g),
                          A2(g);
    DistMatrix<T> CT(g),  C0(g),
                  CB(g),  C1(g),
                          C2(g);

    // Temporary distributions
    DistMatrix<T,STAR,MC> A1_STAR_MC(g);
    DistMatrix<T,MR,STAR> D1Trans_MR_STAR(g);

    A1_STAR_MC.AlignWith( B );
    D1Trans_MR_STAR.AlignWith( B );

    // Start the algorithm
    Scale( beta, C );
    LockedPartitionDown
    ( A, AT,
         AB, 0 );
    PartitionDown
    ( C, CT,
         CB, 0 );
    while( AB.Height() > 0 )
    {
        LockedRepartitionDown
        ( AT,  A0,
         /**/ /**/
               A1,
          AB,  A2 );

        RepartitionDown
        ( CT,  C0,
         /**/ /**/
               C1,
          CB,  C2 );

        Zeros( C1.Width(), C1.Height(), D1Trans_MR_STAR );
        //--------------------------------------------------------------------//
        A1_STAR_MC = A1; // A1[*,MC] <- A1[MC,MR]

        // D1^T[MR,* ] := alpha B^T[MR,MC] A1^T[MC,* ]
        LocalGemm
        ( TRANSPOSE, TRANSPOSE, alpha, B, A1_STAR_MC, T(0), D1Trans_MR_STAR );

        C1.TransposeSumScatterUpdate( T(1), D1Trans_MR_STAR );
        //--------------------------------------------------------------------//

        SlideLockedPartitionDown
        ( AT,  A0,
               A1,
         /**/ /**/
          AB,  A2 );
 
        SlidePartitionDown
        ( CT,  C0,
               C1,
         /**/ /**/
          CB,  C2 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}                     
コード例 #29
0
ファイル: LL.hpp プロジェクト: jimgoo/Elemental
inline void
SymmLLC
( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B,
  T beta,        DistMatrix<T>& C )
{
#ifndef RELEASE
    PushCallStack("internal::SymmLLC");
    if( A.Grid() != B.Grid() || B.Grid() != C.Grid() )
        throw std::logic_error
        ("{A,B,C} must be distributed over the same grid");
#endif
    const Grid& g = A.Grid();

    // Matrix views
    DistMatrix<T> 
        ATL(g), ATR(g),  A00(g), A01(g), A02(g),  AColPan(g),
        ABL(g), ABR(g),  A10(g), A11(g), A12(g),  ARowPan(g),
                         A20(g), A21(g), A22(g);
    DistMatrix<T> 
        BT(g),  B0(g),
        BB(g),  B1(g),
                B2(g);
    DistMatrix<T> 
        CT(g),  C0(g),  CAbove(g),
        CB(g),  C1(g),  CBelow(g),
                C2(g);

    // Temporary distributions
    DistMatrix<T,MC,  STAR> AColPan_MC_STAR(g);
    DistMatrix<T,STAR,MC  > ARowPan_STAR_MC(g);
    DistMatrix<T,MR,  STAR> B1Trans_MR_STAR(g);

    B1Trans_MR_STAR.AlignWith( C );

    // Start the algorithm
    Scale( beta, C );
    LockedPartitionDownDiagonal
    ( A, ATL, ATR,
         ABL, ABR, 0 );
    LockedPartitionDown
    ( B, BT,
         BB, 0 );
    PartitionDown
    ( C, CT,
         CB, 0 );
    while( CB.Height() > 0 )
    {
        LockedRepartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, /**/ A01, A02,
         /*************/ /******************/
               /**/       A10, /**/ A11, A12,
          ABL, /**/ ABR,  A20, /**/ A21, A22 );

        LockedRepartitionDown
        ( BT,  B0,
         /**/ /**/
               B1,
          BB,  B2 );

        RepartitionDown
        ( CT,  C0,
         /**/ /**/
               C1,
          CB,  C2 );

        LockedView1x2( ARowPan, A10, A11 );
        LockedView2x1
        ( AColPan, A11,
                   A21 );

        View2x1
        ( CAbove, C0,
                  C1 );
        View2x1
        ( CBelow, C1,
                  C2 );

        AColPan_MC_STAR.AlignWith( CBelow );
        ARowPan_STAR_MC.AlignWith( CAbove );
        //--------------------------------------------------------------------//
        AColPan_MC_STAR = AColPan;
        ARowPan_STAR_MC = ARowPan;
        MakeTrapezoidal( LEFT,  LOWER,  0, AColPan_MC_STAR );
        MakeTrapezoidal( RIGHT, LOWER, -1, ARowPan_STAR_MC );

        B1Trans_MR_STAR.TransposeFrom( B1 );

        LocalGemm
        ( NORMAL, TRANSPOSE, 
          alpha, AColPan_MC_STAR, B1Trans_MR_STAR, T(1), CBelow );

        LocalGemm
        ( TRANSPOSE, TRANSPOSE, 
          alpha, ARowPan_STAR_MC, B1Trans_MR_STAR, T(1), CAbove );
        //--------------------------------------------------------------------//
        AColPan_MC_STAR.FreeAlignments();
        ARowPan_STAR_MC.FreeAlignments();

        SlideLockedPartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, A01, /**/ A02,
               /**/       A10, A11, /**/ A12,
         /*************/ /******************/
          ABL, /**/ ABR,  A20, A21, /**/ A22 );

        SlideLockedPartitionDown
        ( BT,  B0,
               B1,
         /**/ /**/
          BB,  B2 );

        SlidePartitionDown
        ( CT,  C0,
               C1,
         /**/ /**/
          CB,  C2 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
コード例 #30
0
ファイル: UT_EKF.cpp プロジェクト: iefiac/vxl_util
void test_EKF_remove_feature_landmark_noise_ptz_data()
{
    vcl_string ptz_file("/Users/jimmy/Desktop/images/33_slam_data/ptz_145420_145719.txt");
    vcl_vector<PTZData> ptzs;
    bool isRead = readPTZCameraFile(ptz_file.c_str(), ptzs, 1);
    assert(isRead);
    
    vcl_vector<double> gdPans;
    vcl_vector<double> gdTilts;
    vcl_vector<double> gdZooms;
    vcl_vector<double> observedPans;
    vcl_vector<double> observedTilts;
    vcl_vector<double> observedZooms;
    
    vcl_vector<vcl_vector<vgl_point_2d<double> > > observedImagePts;
    
    const int width = 1280;
    const int height = 720;
    vnl_random rnd;
    double delta = 2.0;
    vgl_point_2d<double> pp(width/2, height/2);
    
    PTZKeypointDynamicEKF ptzDynamicEKF;
    
    //  const int M = (int)firstFeatures.size();
    
    vnl_vector<double> C0(6, 0);
    C0[0] = ptzs[0].pan;
    C0[1] = ptzs[0].tilt;
    C0[2] = ptzs[0].fl;
    
    vnl_matrix<double> CP0(6, 6, 0);
    CP0(0, 0) = 0.01;  CP0(1, 1) = 0.01;  CP0(2, 2) = 10;
    CP0(3, 3) = 0.001; CP0(4, 4) = 0.001; CP0(5, 5) = 0.1;
    
    vnl_matrix<double> CQ0(6, 6, 0);
    CQ0(0, 0) = 0.00000004; CQ0(1, 1) = 0.00000004; CQ0(2, 2) = 0.0000004;
    CQ0(3, 3) = 0.00000001; CQ0(4, 4) = 0.00000001; CQ0(5, 5) = 0.0000001;
    
    // construct feature a database
    vcl_vector<vgl_point_2d<double> > courtPoints = DisneyWorldBasketballCourt::getCalibPoints();
    vcl_list<VxlEKFFeaturePoint> featureDatabase;
    for (int i = 0; i<courtPoints.size(); i++) {
        vgl_point_3d<double> p(courtPoints[i].x(), courtPoints[i].y(), 0);
        vgl_point_2d<double> q(0, 0);
        
        VxlEKFFeaturePoint feat(p, q);
        feat.id_ = i;
        featureDatabase.push_back(feat);
    }
    
    
    // init camera and feature from ground truth of data set
    VxlEKFCamera camera(C0, CP0, CQ0);
    vcl_list<VxlEKFFeaturePoint> features;
    
    for (int i = 0; i<courtPoints.size(); i++) {
        vgl_homg_point_3d<double> p(courtPoints[i].x(), courtPoints[i].y(), 0, 1.0);
        if (camera.is_behind_camera(p)) {
            continue;
        }
        vgl_point_2d<double> q= camera.project(p);
        if (vgl_inside_image(q, width, height, 10)) {
            VxlEKFFeaturePoint feat(p, q);
            feat.id_ = i;
            features.push_back(feat);
        }
    }
    printf("initiate feature number is %lu\n", features.size());
    vnl_vector<double> Xk;
    vnl_matrix<double> Pk;
    bool isInit = ptzDynamicEKF.updateCameraFeature(camera, features, Xk, Pk);
    assert(isInit);
    
    vcl_vector<double> smoothedPans;
    vcl_vector<double> smoothedTilts;
    vcl_vector<double> smoothedZooms;
    
    for (int i = 1; i<ptzs.size(); i++) {  // ptzs.size()
        gdPans.push_back(ptzs[i].pan);
        gdTilts.push_back(ptzs[i].tilt);
        gdZooms.push_back(ptzs[i].fl);
        
        vpgl_perspective_camera<double> gdCamera;
        bool isCamera = VxlPTZCamera::PTZToCamera(ptzs[i].fl, ptzs[i].pan, ptzs[i].tilt, gdCamera);
        assert(isCamera);
        
        // remove features
        vcl_list<VxlEKFFeaturePoint>::iterator it = features.begin();
        while (it != features.end()) {
            vgl_point_3d<double> p = it->worldPt();
            if (gdCamera.is_behind_camera(vgl_homg_point_3d<double>(p.x(), p.y(), p.z(), 1.0)))
            {
                it = features.erase(it);
                printf("erase a feature\n");
                continue;
            }
            vgl_point_2d<double> q = gdCamera.project(p);
            if (!vgl_inside_image(q, width, height, 10)) {
                it = features.erase(it);
                printf("erase a feature\n");
                continue;
            }
            it++;
        }
        printf("feature number is %lu\n", features.size());
        
        // add features
        for (vcl_list<VxlEKFFeaturePoint>::iterator it = featureDatabase.begin(); it != featureDatabase.end(); it++) {
            vgl_point_3d<double> p = it->worldPt();
            if (gdCamera.is_behind_camera(vgl_homg_point_3d<double>(p.x(), p.y(), p.z(), 1.0)))
            {
                continue;
            }
            vgl_point_2d<double> q = gdCamera.project(p);
            if (vgl_inside_image(q, width, height, 10))
            {
                vcl_list<VxlEKFFeaturePoint>::iterator findIt = std::find(features.begin(), features.end(), *it);
                // cannot find same featuere (defined by id) in the features
                if (findIt == features.end()) {
                    VxlEKFFeaturePoint feat(p, q);
                    feat.id_ = it->id_;
                    features.push_back(feat);
                    
                    printf("add a new feature with id %d\n", (int)feat.id_);
                }
            }
            
        }
        
        vcl_vector<vgl_point_2d<double> > worldPts;
        vcl_vector<vgl_point_2d<double> > imagePts;
        // add noise to current observation
        for (vcl_list<VxlEKFFeaturePoint>::iterator it = features.begin(); it != features.end(); it++) {
            vgl_point_3d<double> p = it->worldPt();
            vgl_point_2d<double> q = gdCamera.project(p);
            double x = q.x();
            double y = q.y();
            x += delta * rnd.normal();
            y += delta * rnd.normal();
            it->setImagePoint(x, y);
            
            worldPts.push_back(vgl_point_2d<double>(p.x(), p.y()));
            imagePts.push_back(it->imagePt());
        }
        vpgl_perspective_camera<double> initCamera;
        vpgl_perspective_camera<double> finalCamera;
        
        bool isInit = VpglPlus::init_calib(worldPts, imagePts, pp, initCamera);
        if (!isInit) {
            printf("initiate camera error\n");
            continue;
        }
        bool isFinal = VpglPlus::optimize_perspective_camera(worldPts, imagePts, initCamera, finalCamera);
        if (!isFinal) {
            printf("final camera error\n");
            continue;
        }
        //
        double pan = 0;
        double tilt = 0;
        double zoom = 0;
        bool isPTZ = VxlPTZCamera::CameraToPTZ(finalCamera, pan, tilt, zoom);
        assert(isPTZ);
        observedPans.push_back(pan);
        observedTilts.push_back(tilt);
        observedZooms.push_back(zoom);
        
        printf("observed  pan tilt focal length is %f %f %f\n", pan, tilt, zoom);
        ptzDynamicEKF.updateCameraFeature(camera, features, Xk, Pk);
        printf("gd        pan tilt focal length is %f %f %f\n\n", ptzs[i].pan, ptzs[i].tilt, ptzs[i].fl);
        
        smoothedPans.push_back(Xk[0]);
        smoothedTilts.push_back(Xk[1]);
        smoothedZooms.push_back(Xk[2]);
    }
    
    //save
    
    vnl_vector<double> gdPanVec(&gdPans[0], (int)gdPans.size());
    vnl_vector<double> observedPanVec(&observedPans[0], (int)observedPans.size());
    vnl_vector<double> smoothedPanVec(&smoothedPans[0], (int)smoothedPans.size());
    
    vnl_vector<double> gdZoomVec(&gdZooms[0], (int)gdZooms.size());
    vnl_vector<double> observedZoomVec(&observedZooms[0], (int)observedZooms.size());
    vnl_vector<double> smoothedZoomVec(&smoothedZooms[0], (int)smoothedZooms.size());
    
    vnl_vector<double> gdTiltVec(&gdTilts[0], (int)gdTilts.size());
    vnl_vector<double> observedTiltVec(&observedTilts[0], (int)observedTilts.size());
    vnl_vector<double> smoothedTiltVec(&smoothedTilts[0], (int)observedTilts.size());
    
    
    vcl_string save_file("EKF_dynamic_ptz.mat");
    vnl_matlab_filewrite awriter(save_file.c_str());
    
    awriter.write(gdPanVec, "gdPan");
    awriter.write(observedPanVec, "observedPan");
    awriter.write(smoothedPanVec, "sm_pan");
    awriter.write(gdZoomVec, "gdZoom");
    awriter.write(observedZoomVec, "observedZoom");
    awriter.write(smoothedZoomVec, "smoothedZoom");
    awriter.write(gdTiltVec, "gdTilt");
    awriter.write(vnl_vector<double>(&observedTilts[0], (int)observedTilts.size()), "observedTilt");
    awriter.write(vnl_vector<double>(&smoothedTilts[0], (int)observedTilts.size()), "smoothedTilt");
    
    printf("save to %s\n", save_file.c_str());
}