int tty_get(byte *buf,size_t size,int *timeout) { int rc,i; time_t t; fd_set rfds,efds; struct timeval tv; if(tty_hangedup)return RCDO; FD_ZERO(&rfds);FD_ZERO(&efds); FD_SET(STDIN_FILENO,&rfds); FD_SET(STDIN_FILENO,&efds); tv.tv_sec=*timeout; tv.tv_usec=0; t=time(NULL); rc=selectmy(STDIN_FILENO+1,&rfds,&efds,&tv); if(rc<0) { if(tty_hangedup)return RCDO; else return ERROR; } *timeout-=(time(NULL)-t); if(!rc)return TIMEOUT; if(FD_ISSET(STDIN_FILENO,&efds))return ERROR; rc=read(STDIN_FILENO,buf,size); if(rc<1) { if(tty_hangedup||errno==EPIPE)return RCDO; else return(errno!=EAGAIN&&errno!=EINTR)?ERROR:TIMEOUT; } #if DEBUG_SLEEP==1 if(is_ip)usleep(1000); #endif #ifdef NEED_DEBUG for(i=0;i<rc;i++)DEBUG(('M',9,"tty_get: '%c' (%d)",C0(buf[i]),buf[i])); #endif return rc; }
bool intersectsRect(Vector2 A, Vector2 B, double x, double y, double width, double height) { Vector2 C0(x, y); Vector2 D0(x + width, y); Vector2 C1(x + width, y); Vector2 D1(x + width, y + height); Vector2 C2(x + width, y + height); Vector2 D2(x, y + height); Vector2 C3(x, y + height); Vector2 D3(x, y); bool I0, I1, I2, I3; Vector2 buf; I0 = intersects(A, B, C0, D0, buf); I1 = intersects(A, B, C1, D1, buf); I2 = intersects(A, B, C2, D2, buf); I3 = intersects(A, B, C3, D3, buf); if(I0 || I1 || I2 || I3) { return false; } return true; }
//-------------------------------------------------------------- void ofApp::setup(){ ofSetFrameRate(60.0f); for(int i=0;i<12;i++) { randColorDrop(); } Attractor.reset(new ColorDropAttractor()); Merger.reset(new ColorDropMerger()); Dragger.reset(new ColorDropDragger()); ofPtr<ColorHole> H; ofFloatColor C0(ofFloatColor::yellow); C0.setBrightness(0.7f); H.reset(new ColorHole( ofVec2f(200,200), C0,2600)); Holes.push_back(H); ofFloatColor C1(ofFloatColor::cyan); C1.setBrightness(0.7f); H.reset(new ColorHole( ofVec2f(750,240), ofFloatColor(0.8f,0.0f,0.8f,1),3000)); Holes.push_back(H); ofFloatColor C2(ofFloatColor::violet); C2.setBrightness(0.7f); H.reset(new ColorHole( ofVec2f(450,500), ofFloatColor(0.0f,0.9f,0.9f,1),3800)); Holes.push_back(H); }
BOOL CARIB8CharDecode::Analyze( const BYTE* pbSrc, DWORD dwSrcSize, DWORD* pdwReadSize ) { if( pbSrc == NULL || dwSrcSize == 0 || pdwReadSize == NULL){ return FALSE; } BOOL bRet = TRUE; DWORD dwReadSize = 0; while( dwReadSize < dwSrcSize ){ DWORD dwReadBuff = 0; //1バイト目チェック if( pbSrc[dwReadSize] <= 0x20 ){ //C0制御コード bRet = C0( pbSrc+dwReadSize, &dwReadBuff ); dwReadSize += dwReadBuff; if( bRet == FALSE ){ return FALSE; }else if( bRet == 2 ){ bRet = TRUE; break; } }else if( pbSrc[dwReadSize] > 0x20 && pbSrc[dwReadSize] < 0x7F ){ //GL符号領域 if( GL( pbSrc+dwReadSize, &dwReadBuff ) == FALSE ){ return FALSE; } dwReadSize += dwReadBuff; }else if( pbSrc[dwReadSize] >= 0x7F && pbSrc[dwReadSize] <= 0xA0 ){ //C1制御コード bRet = C1( pbSrc+dwReadSize, &dwReadBuff ); dwReadSize += dwReadBuff; if( bRet == FALSE ){ return FALSE; }else if( bRet == 2 ){ bRet = TRUE; break; } }else if( pbSrc[dwReadSize] > 0xA0 && pbSrc[dwReadSize] < 0xFF ){ //GR符号領域 if( GR( pbSrc+dwReadSize, &dwReadBuff ) == FALSE ){ return FALSE; } dwReadSize += dwReadBuff; } } *pdwReadSize = dwReadSize; return bRet; }
static bool SweepCapsuleMesh(const SweepTest* sweep_test, const SweptVolume* volume, const TouchedGeom* geom, const NxExtendedVec3& center, const NxVec3& dir, SweptContact& impact) { ASSERT(volume->GetType()==SWEPT_CAPSULE); ASSERT(geom->mType==TOUCHED_MESH); const SweptCapsule* SC = static_cast<const SweptCapsule*>(volume); const TouchedMesh* TM = static_cast<const TouchedMesh*>(geom); NxU32 NbTris = TM->mNbTris; if(!NbTris) return false; // Fetch triangle data for current mesh (the stream may contain triangles from multiple meshes) const NxTriangle* T = &sweep_test->mWorldTriangles[TM->mIndexWorldTriangles]; // const NxTriangle* ET = &sweep_test->mWorldEdgeNormals[TM->mIndexWorldEdgeNormals]; const NxU32* EdgeFlags = &sweep_test->mEdgeFlags[TM->mIndexEdgeFlags]; NxVec3 C0(float(center.x - TM->mOffset.x), float(center.y - TM->mOffset.y), float(center.z - TM->mOffset.z)); // PT: this only really works when the CCT collides with a single mesh, but that's the most common case. When it doesn't, there's just no speedup but it still works. NxU32 CachedIndex = sweep_test->mCachedTriIndex[sweep_test->mCachedTriIndexIndex]; if(CachedIndex>=NbTris) CachedIndex=0; NxVec3 Hit, Normal; float t; NxU32 Index; if(gUtilLib->NxSweepCapsuleTriangles(sweep_test->mUpDirection, NbTris, T, EdgeFlags, C0, SC->mRadius, SC->mHeight, dir, impact.mDistance, Hit, Normal, t, Index, &CachedIndex)) { if(t>=impact.mDistance) return false; impact.mDistance = t; impact.mWorldNormal = Normal; impact.mWorldPos.x = Hit.x + TM->mOffset.x; impact.mWorldPos.y = Hit.y + TM->mOffset.y; impact.mWorldPos.z = Hit.z + TM->mOffset.z; // Returned index is only between 0 and NbTris, i.e. it indexes the array of cached triangles, not the original mesh. assert(Index<NbTris); sweep_test->mCachedTriIndex[sweep_test->mCachedTriIndexIndex] = Index; // The CCT loop will use the index from the start of the cache... impact.mIndex = Index + TM->mIndexWorldTriangles; return true; } return false; }
int main() { C0().f0(); C1().f0(); C1().f1(); C2().f0(); C2().f1(); C2().f2(); C3().f0(); C3().f1(); C3().f2(); C3().f3(); }
int tty_put(byte *buf,size_t size) { int rc; if(tty_hangedup)return RCDO; rc=write(STDOUT_FILENO,buf,size); if(rc!=size) { if(tty_hangedup||errno==EPIPE)return RCDO; else return ERROR; } #if DEBUG_SLEEP==1 if(is_ip)usleep(1000); #endif #ifdef NEED_DEBUG for(rc=0;rc<size;rc++)DEBUG(('M',9,"tty_put: '%c' (%d)",C0(buf[rc]),buf[rc])); #endif return OK; }
//戻り値がFALSEのとき*pdwReadSizeは不定、TRUEのとき*pdwReadSize<=dwSrcSize (C0 C1ほかメソッドも同様) BOOL CARIB8CharDecode::Analyze( const BYTE* pbSrc, DWORD dwSrcSize, DWORD* pdwReadSize ) { if( dwSrcSize == 0 ){ return FALSE; } DWORD dwReadSize = 0; while( dwReadSize < dwSrcSize ){ BOOL bRet = FALSE; DWORD dwReadBuff = 0; BOOL bHoldCharacter = m_bRPC; //1バイト目チェック if( pbSrc[dwReadSize] <= 0x20 ){ //C0制御コード bRet = C0( pbSrc+dwReadSize, dwSrcSize-dwReadSize, &dwReadBuff ); }else if( pbSrc[dwReadSize] < 0x7F ){ //GL符号領域 bRet = GL_GR( pbSrc+dwReadSize, dwSrcSize-dwReadSize, &dwReadBuff, m_GL ); }else if( pbSrc[dwReadSize] <= 0xA0 ){ //C1制御コード bRet = C1( pbSrc+dwReadSize, dwSrcSize-dwReadSize, &dwReadBuff ); }else if( pbSrc[dwReadSize] < 0xFF ){ //GR符号領域 bRet = GL_GR( pbSrc+dwReadSize, dwSrcSize-dwReadSize, &dwReadBuff, m_GR ); } if( !bRet ){ return FALSE; } if( m_bSpacing && bHoldCharacter ){ if( m_wRPC != 0 && --m_wRPC == 0 ){ //文字繰り返し終了 m_bRPC = FALSE; dwReadSize += dwReadBuff; } }else{ dwReadSize += dwReadBuff; } } *pdwReadSize = dwReadSize; ASSERT( dwReadSize == dwSrcSize ); return TRUE; }
int modem_sendstr(char *cmd) { int rc=1; if(!cmd) return 1; DEBUG(('M',1,">> %s",cmd)); while(*cmd && rc>0) { switch(*cmd) { case '|': rc=write(STDOUT_FILENO, "\r", 1);usleep(300000L);break; case '~': sleep(1);rc=1;break; case '\'': usleep(200000L);rc=1;break; case '^': rc=tty_setdtr(1); break; case 'v': rc=tty_setdtr(0); break; default: rc=write(STDOUT_FILENO, cmd, 1); DEBUG(('M',4,">>> %c",C0(*cmd))); } cmd++; } if(rc>0) DEBUG(('M',4,"modem_sendstr: sent")); else DEBUG(('M',3,"modem_sendstr: error, rc=%d, errno=%d",rc,errno)); return rc; }
int test() { int Error = 0; int A0(glm::log2(10.f)); glm::ivec1 B0(glm::log2(glm::vec1(10.f))); glm::ivec2 C0(glm::log2(glm::vec2(10.f))); glm::ivec3 D0(glm::log2(glm::vec3(10.f))); glm::ivec4 E0(glm::log2(glm::vec4(10.f))); int A1 = glm::log2(int(10.f)); glm::ivec1 B1 = glm::log2(glm::ivec1(10.f)); glm::ivec2 C1 = glm::log2(glm::ivec2(10.f)); glm::ivec3 D1 = glm::log2(glm::ivec3(10.f)); glm::ivec4 E1 = glm::log2(glm::ivec4(10.f)); Error += A0 == A1 ? 0 : 1; Error += glm::all(glm::equal(B0, B1)) ? 0 : 1; Error += glm::all(glm::equal(C0, C1)) ? 0 : 1; Error += glm::all(glm::equal(D0, D1)) ? 0 : 1; Error += glm::all(glm::equal(E0, E1)) ? 0 : 1; return Error; }
int test() { int Error = 0; int A0 = static_cast<int>(glm::log2(16.f)); glm::ivec1 B0(glm::log2(glm::vec1(16.f))); glm::ivec2 C0(glm::log2(glm::vec2(16.f))); glm::ivec3 D0(glm::log2(glm::vec3(16.f))); glm::ivec4 E0(glm::log2(glm::vec4(16.f))); int A1 = glm::log2(int(16)); glm::ivec1 B1 = glm::log2(glm::ivec1(16)); glm::ivec2 C1 = glm::log2(glm::ivec2(16)); glm::ivec3 D1 = glm::log2(glm::ivec3(16)); glm::ivec4 E1 = glm::log2(glm::ivec4(16)); Error += A0 == A1 ? 0 : 1; Error += glm::all(glm::equal(B0, B1)) ? 0 : 1; Error += glm::all(glm::equal(C0, C1)) ? 0 : 1; Error += glm::all(glm::equal(D0, D1)) ? 0 : 1; Error += glm::all(glm::equal(E0, E1)) ? 0 : 1; glm::uint64 A2 = glm::log2(glm::uint64(16)); glm::u64vec1 B2 = glm::log2(glm::u64vec1(16)); glm::u64vec2 C2 = glm::log2(glm::u64vec2(16)); glm::u64vec3 D2 = glm::log2(glm::u64vec3(16)); glm::u64vec4 E2 = glm::log2(glm::u64vec4(16)); Error += A2 == glm::uint64(4) ? 0 : 1; Error += glm::all(glm::equal(B2, glm::u64vec1(4))) ? 0 : 1; Error += glm::all(glm::equal(C2, glm::u64vec2(4))) ? 0 : 1; Error += glm::all(glm::equal(D2, glm::u64vec3(4))) ? 0 : 1; Error += glm::all(glm::equal(E2, glm::u64vec4(4))) ? 0 : 1; return Error; }
complex LoopToolsWrapper::PV_C0(const double p2, const double m02, const double m12, const double m22) const { std::complex<double> C0val = C0(0.0, 0.0, p2, m02, m12, m22); return complex( C0val.real(), C0val.imag(), false ); }
inline void GemmTTA ( Orientation orientationOfA, Orientation orientationOfB, T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C ) { #ifndef RELEASE PushCallStack("internal::GemmTTA"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); if( orientationOfA == NORMAL || orientationOfB == NORMAL ) throw std::logic_error ("GemmTTA expects A and B to be (Conjugate)Transposed"); if( A.Width() != C.Height() || B.Height() != C.Width() || A.Height() != B.Width() ) { std::ostringstream msg; msg << "Nonconformal GemmTTA: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B ~ " << B.Height() << " x " << B.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T> BT(g), B0(g), BB(g), B1(g), B2(g); DistMatrix<T> CL(g), CR(g), C0(g), C1(g), C2(g); // Temporary distributions DistMatrix<T,STAR,MC > B1_STAR_MC(g); DistMatrix<T,MR, STAR> D1_MR_STAR(g); DistMatrix<T,MR, MC > D1_MR_MC(g); DistMatrix<T> D1(g); B1_STAR_MC.AlignWith( A ); D1_MR_STAR.AlignWith( A ); // Start the algorithm Scale( beta, C ); LockedPartitionDown ( B, BT, BB, 0 ); PartitionRight( C, CL, CR, 0 ); while( BB.Height() > 0 ) { LockedRepartitionDown ( BT, B0, /**/ /**/ B1, BB, B2 ); RepartitionRight ( CL, /**/ CR, C0, /**/ C1, C2 ); D1.AlignWith( C1 ); Zeros( C1.Height(), C1.Width(), D1_MR_STAR ); //--------------------------------------------------------------------// B1_STAR_MC = B1; // B1[*,MC] <- B1[MC,MR] // D1[MR,*] := alpha (A[MC,MR])^T (B1[*,MC])^T // = alpha (A^T)[MR,MC] (B1^T)[MC,*] LocalGemm ( orientationOfA, orientationOfB, alpha, A, B1_STAR_MC, T(0), D1_MR_STAR ); // C1[MC,MR] += scattered & transposed D1[MR,*] summed over grid cols D1_MR_MC.SumScatterFrom( D1_MR_STAR ); D1 = D1_MR_MC; Axpy( T(1), D1, C1 ); //--------------------------------------------------------------------// D1.FreeAlignments(); SlideLockedPartitionDown ( BT, B0, B1, /**/ /**/ BB, B2 ); SlidePartitionRight ( CL, /**/ CR, C0, C1, /**/ C2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void GemmTTB ( Orientation orientationOfA, Orientation orientationOfB, T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C ) { #ifndef RELEASE PushCallStack("internal::GemmTTB"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); if( orientationOfA == NORMAL || orientationOfB == NORMAL ) throw std::logic_error ("GemmTTB expects A and B to be (Conjugate)Transposed"); if( A.Width() != C.Height() || B.Height() != C.Width() || A.Height() != B.Width() ) { std::ostringstream msg; msg << "Nonconformal GemmTTB: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B ~ " << B.Height() << " x " << B.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T> AL(g), AR(g), A0(g), A1(g), A2(g); DistMatrix<T> CT(g), C0(g), CB(g), C1(g), C2(g); // Temporary distributions DistMatrix<T,VR, STAR> A1_VR_STAR(g); DistMatrix<T,STAR,MR > A1AdjOrTrans_STAR_MR(g); DistMatrix<T,STAR,MC > D1_STAR_MC(g); DistMatrix<T,MR, MC > D1_MR_MC(g); DistMatrix<T> D1(g); A1_VR_STAR.AlignWith( B ); A1AdjOrTrans_STAR_MR.AlignWith( B ); D1_STAR_MC.AlignWith( B ); // Start the algorithm Scale( beta, C ); LockedPartitionRight( A, AL, AR, 0 ); PartitionDown ( C, CT, CB, 0 ); while( AR.Width() > 0 ) { LockedRepartitionRight ( AL, /**/ AR, A0, /**/ A1, A2 ); RepartitionDown ( CT, C0, /**/ /**/ C1, CB, C2 ); D1.AlignWith( C1 ); Zeros( C1.Height(), C1.Width(), D1_STAR_MC ); //--------------------------------------------------------------------// A1_VR_STAR = A1; if( orientationOfA == ADJOINT ) A1AdjOrTrans_STAR_MR.AdjointFrom( A1_VR_STAR ); else A1AdjOrTrans_STAR_MR.TransposeFrom( A1_VR_STAR ); // D1[*,MC] := alpha (A1[MR,*])^[T/H] (B[MC,MR])^[T/H] // = alpha (A1^[T/H])[*,MR] (B^[T/H])[MR,MC] LocalGemm ( NORMAL, orientationOfB, alpha, A1AdjOrTrans_STAR_MR, B, T(0), D1_STAR_MC ); // C1[MC,MR] += scattered & transposed D1[*,MC] summed over grid rows D1_MR_MC.SumScatterFrom( D1_STAR_MC ); D1 = D1_MR_MC; Axpy( T(1), D1, C1 ); //--------------------------------------------------------------------// D1.FreeAlignments(); SlideLockedPartitionRight ( AL, /**/ AR, A0, A1, /**/ A2 ); SlidePartitionDown ( CT, C0, C1, /**/ /**/ CB, C2 ); } #ifndef RELEASE PopCallStack(); #endif }
int main(int argc, char *argv[]) { int i, returnierr=0; #ifdef EPETRA_MPI // Initialize MPI MPI_Init(&argc,&argv); Epetra_MpiComm Comm(MPI_COMM_WORLD); #else Epetra_SerialComm Comm; #endif // Uncomment to debug in parallel int tmp; if (Comm.MyPID()==0) cin >> tmp; Comm.Barrier(); bool verbose = false; bool veryVerbose = false; // Check if we should print results to standard out if (argc>1) if (argv[1][0]=='-' && argv[1][1]=='v') verbose = true; // Check if we should print lots of results to standard out if (argc>2) if (argv[2][0]=='-' && argv[2][1]=='v') veryVerbose = true; if (verbose && Comm.MyPID()==0) std::cout << Epetra_Version() << std::endl << std::endl; if (!verbose) Comm.SetTracebackMode(0); // This should shut down any error traceback reporting if (verbose) std::cout << Comm << std::endl << std::flush; bool verbose1 = verbose; if (verbose) verbose = (Comm.MyPID()==0); bool veryVerbose1 = veryVerbose; if (veryVerbose) veryVerbose = (Comm.MyPID()==0); int NumMyElements = 100; if (veryVerbose1) NumMyElements = 10; NumMyElements += Comm.MyPID(); int MaxNumMyElements = NumMyElements+Comm.NumProc()-1; int * ElementSizeList = new int[NumMyElements]; long long * MyGlobalElements = new long long[NumMyElements]; for (i = 0; i<NumMyElements; i++) { MyGlobalElements[i] = (Comm.MyPID()*MaxNumMyElements+i)*2; ElementSizeList[i] = i%6 + 2; // elementsizes go from 2 to 7 } Epetra_BlockMap Map(-1LL, NumMyElements, MyGlobalElements, ElementSizeList, 0, Comm); delete [] ElementSizeList; delete [] MyGlobalElements; Epetra_MapColoring C0(Map); int * elementColors = new int[NumMyElements]; int maxcolor = 24; int * colorCount = new int[maxcolor]; int ** colorLIDs = new int*[maxcolor]; for (i=0; i<maxcolor; i++) colorCount[i] = 0; for (i=0; i<maxcolor; i++) colorLIDs[i] = 0; int defaultColor = C0.DefaultColor(); for (i=0; i<Map.NumMyElements(); i++) { assert(C0[i]==defaultColor); assert(C0(Map.GID64(i))==defaultColor); if (i%2==0) C0[i] = i%6+5+i%14; // cycle through 5...23 on even elements else C0(Map.GID64(i)) = i%5+1; // cycle through 1...5 on odd elements elementColors[i] = C0[i]; // Record color of ith element for use below colorCount[C0[i]]++; // Count how many of each color for checking below } if (veryVerbose) std::cout << "Original Map Coloring using element-by-element definitions" << std::endl; if (veryVerbose1) std::cout << C0 << std::endl; int numColors = 0; for (i=0; i<maxcolor; i++) if (colorCount[i]>0) { numColors++; colorLIDs[i] = new int[colorCount[i]]; } for (i=0; i<maxcolor; i++) colorCount[i] = 0; for (i=0; i<Map.NumMyElements(); i++) colorLIDs[C0[i]][colorCount[C0[i]]++] = i; int newDefaultColor = -1; Epetra_MapColoring C1(Map, elementColors, newDefaultColor); if (veryVerbose) std::cout << "Same Map Coloring using one-time construction" << std::endl; if (veryVerbose1) std::cout << C1 << std::endl; assert(C1.DefaultColor()==newDefaultColor); for (i=0; i<Map.NumMyElements(); i++) assert(C1[i]==C0[i]); Epetra_MapColoring C2(C1); if (veryVerbose) std::cout << "Same Map Coloring using copy constructor" << std::endl; if (veryVerbose1) std::cout << C1 << std::endl; for (i=0; i<Map.NumMyElements(); i++) assert(C2[i]==C0[i]); assert(C2.DefaultColor()==newDefaultColor); assert(numColors==C2.NumColors()); for (i=0; i<maxcolor; i++) { int curNumElementsWithColor = C2.NumElementsWithColor(i); assert(colorCount[i]==curNumElementsWithColor); int * curColorLIDList = C2.ColorLIDList(i); if (curNumElementsWithColor==0) { assert(curColorLIDList==0); } else for (int j=0; j<curNumElementsWithColor; j++) assert(curColorLIDList[j]==colorLIDs[i][j]); } int curColor = 1; Epetra_Map * Map1 = C2.GenerateMap(curColor); Epetra_BlockMap * Map2 = C2.GenerateBlockMap(curColor); assert(Map1->NumMyElements()==colorCount[curColor]); assert(Map2->NumMyElements()==colorCount[curColor]); for (i=0; i<Map1->NumMyElements(); i++) { assert(Map1->GID64(i)==Map.GID64(colorLIDs[curColor][i])); assert(Map2->GID64(i)==Map.GID64(colorLIDs[curColor][i])); assert(Map2->ElementSize(i)==Map.ElementSize(colorLIDs[curColor][i])); } // Now test data redistribution capabilities Epetra_Map ContiguousMap(-1LL, Map.NumMyElements(), Map.IndexBase64(), Comm); // This vector contains the element sizes for the original map. Epetra_IntVector elementSizes(Copy, ContiguousMap, Map.ElementSizeList()); Epetra_LongLongVector elementIDs(Copy, ContiguousMap, Map.MyGlobalElements64()); Epetra_IntVector elementColorValues(Copy, ContiguousMap, C2.ElementColors()); long long NumMyElements0 = 0; if (Comm.MyPID()==0) NumMyElements0 = Map.NumGlobalElements64(); Epetra_Map CMap0(-1LL, NumMyElements0, Map.IndexBase64(), Comm); Epetra_Import importer(CMap0, ContiguousMap); Epetra_IntVector elementSizes0(CMap0); Epetra_LongLongVector elementIDs0(CMap0); Epetra_IntVector elementColorValues0(CMap0); elementSizes0.Import(elementSizes, importer, Insert); elementIDs0.Import(elementIDs, importer, Insert); elementColorValues0.Import(elementColorValues, importer, Insert); Epetra_BlockMap MapOnPE0(-1LL,NumMyElements0, elementIDs0.Values(), elementSizes0.Values(), Map.IndexBase64(), Comm); Epetra_Import importer1(MapOnPE0, Map); Epetra_MapColoring ColoringOnPE0(MapOnPE0); ColoringOnPE0.Import(C2, importer1, Insert); for (i=0; i<MapOnPE0.NumMyElements(); i++) assert(ColoringOnPE0[i]==elementColorValues0[i]); if (veryVerbose) std::cout << "Same Map Coloring on PE 0 only" << std::endl; if (veryVerbose1) std::cout << ColoringOnPE0 << std::endl; Epetra_MapColoring C3(Map); C3.Export(ColoringOnPE0, importer1, Insert); for (i=0; i<Map.NumMyElements(); i++) assert(C3[i]==C2[i]); if (veryVerbose) std::cout << "Same Map Coloring after Import/Export exercise" << std::endl; if (veryVerbose1) std::cout << ColoringOnPE0 << std::endl; if (verbose) std::cout << "Checked OK\n\n" << std::endl; if (verbose1) { if (verbose) std::cout << "Test ostream << operator" << std::endl << std::flush; std::cout << C0 << std::endl; } delete [] elementColors; for (i=0; i<maxcolor; i++) if (colorLIDs[i]!=0) delete [] colorLIDs[i]; delete [] colorLIDs; delete [] colorCount; delete Map1; delete Map2; #ifdef EPETRA_MPI MPI_Finalize(); #endif return returnierr; }
inline void Trr2kNNTN ( UpperOrLower uplo, Orientation orientationOfC, T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, const DistMatrix<T>& C, const DistMatrix<T>& D, T beta, DistMatrix<T>& E ) { #ifndef RELEASE PushCallStack("internal::Trr2kNNTN"); if( E.Height() != E.Width() || A.Width() != C.Height() || A.Height() != E.Height() || C.Width() != E.Height() || B.Width() != E.Width() || D.Width() != E.Width() || A.Width() != B.Height() || C.Height() != D.Height() ) throw std::logic_error("Nonconformal Trr2kNNTN"); #endif const Grid& g = E.Grid(); DistMatrix<T> AL(g), AR(g), A0(g), A1(g), A2(g); DistMatrix<T> BT(g), B0(g), BB(g), B1(g), B2(g); DistMatrix<T> CT(g), C0(g), CB(g), C1(g), C2(g); DistMatrix<T> DT(g), D0(g), DB(g), D1(g), D2(g); DistMatrix<T,MC, STAR> A1_MC_STAR(g); DistMatrix<T,MR, STAR> B1Trans_MR_STAR(g); DistMatrix<T,STAR,MC > C1_STAR_MC(g); DistMatrix<T,MR, STAR> D1Trans_MR_STAR(g); A1_MC_STAR.AlignWith( E ); B1Trans_MR_STAR.AlignWith( E ); C1_STAR_MC.AlignWith( E ); D1Trans_MR_STAR.AlignWith( E ); LockedPartitionRight( A, AL, AR, 0 ); LockedPartitionDown ( B, BT, BB, 0 ); LockedPartitionDown ( C, CT, CB, 0 ); LockedPartitionDown ( D, DT, DB, 0 ); while( AL.Width() < A.Width() ) { LockedRepartitionRight ( AL, /**/ AR, A0, /**/ A1, A2 ); LockedRepartitionDown ( BT, B0, /**/ /**/ B1, BB, B2 ); LockedRepartitionDown ( CT, C0, /**/ /**/ C1, CB, C2 ); LockedRepartitionDown ( DT, D0, /**/ /**/ D1, DB, D2 ); //--------------------------------------------------------------------// A1_MC_STAR = A1; C1_STAR_MC = C1; B1Trans_MR_STAR.TransposeFrom( B1 ); D1Trans_MR_STAR.TransposeFrom( D1 ); LocalTrr2k ( uplo, TRANSPOSE, orientationOfC, TRANSPOSE, alpha, A1_MC_STAR, B1Trans_MR_STAR, C1_STAR_MC, D1Trans_MR_STAR, beta, E ); //--------------------------------------------------------------------// SlideLockedPartitionDown ( DT, D0, D1, /**/ /**/ DB, D2 ); SlideLockedPartitionDown ( CT, C0, C1, /**/ /**/ CB, C2 ); SlideLockedPartitionDown ( BT, B0, B1, /**/ /**/ BB, B2 ); SlideLockedPartitionRight ( AL, /**/ AR, A0, A1, /**/ A2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void SymmLLA ( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C ) { #ifndef RELEASE PushCallStack("internal::SymmLLA"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); #endif const Grid& g = A.Grid(); DistMatrix<T> BL(g), BR(g), B0(g), B1(g), B2(g); DistMatrix<T> CL(g), CR(g), C0(g), C1(g), C2(g); DistMatrix<T,MC,STAR> B1_MC_STAR(g); DistMatrix<T,VR,STAR> B1_VR_STAR(g); DistMatrix<T,STAR,MR> B1Trans_STAR_MR(g); DistMatrix<T> Z1(g); DistMatrix<T,MC,STAR> Z1_MC_STAR(g); DistMatrix<T,MR,STAR> Z1_MR_STAR(g); DistMatrix<T,MR,MC > Z1_MR_MC(g); B1_MC_STAR.AlignWith( A ); B1_VR_STAR.AlignWith( A ); B1Trans_STAR_MR.AlignWith( A ); Z1_MC_STAR.AlignWith( A ); Z1_MR_STAR.AlignWith( A ); Scale( beta, C ); LockedPartitionRight ( B, BL, BR, 0 ); PartitionRight ( C, CL, CR, 0 ); while( CL.Width() < C.Width() ) { LockedRepartitionRight ( BL, /**/ BR, B0, /**/ B1, B2 ); RepartitionRight ( CL, /**/ CR, C0, /**/ C1, C2 ); Z1.AlignWith( C1 ); Zeros( C1.Height(), C1.Width(), Z1_MC_STAR ); Zeros( C1.Height(), C1.Width(), Z1_MR_STAR ); //--------------------------------------------------------------------// B1_MC_STAR = B1; B1_VR_STAR = B1_MC_STAR; B1Trans_STAR_MR.TransposeFrom( B1_VR_STAR ); LocalSymmetricAccumulateLL ( TRANSPOSE, alpha, A, B1_MC_STAR, B1Trans_STAR_MR, Z1_MC_STAR, Z1_MR_STAR ); Z1_MR_MC.SumScatterFrom( Z1_MR_STAR ); Z1 = Z1_MR_MC; Z1.SumScatterUpdate( T(1), Z1_MC_STAR ); Axpy( T(1), Z1, C1 ); //--------------------------------------------------------------------// Z1.FreeAlignments(); SlideLockedPartitionRight ( BL, /**/ BR, B0, B1, /**/ B2 ); SlidePartitionRight ( CL, /**/ CR, C0, C1, /**/ C2 ); } #ifndef RELEASE PopCallStack(); #endif }
void Trr2kNNNT ( UpperOrLower uplo, Orientation orientationOfD, T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, const DistMatrix<T>& C, const DistMatrix<T>& D, T beta, DistMatrix<T>& E ) { #ifndef RELEASE PushCallStack("internal::Trr2kNNNT"); if( E.Height() != E.Width() || A.Width() != C.Width() || A.Height() != E.Height() || C.Height() != E.Height() || B.Width() != E.Width() || D.Height() != E.Width() || A.Width() != B.Height() || C.Width() != D.Width() ) throw std::logic_error("Nonconformal Trr2kNNNT"); #endif const Grid& g = E.Grid(); DistMatrix<T> AL(g), AR(g), A0(g), A1(g), A2(g); DistMatrix<T> BT(g), B0(g), BB(g), B1(g), B2(g); DistMatrix<T> CL(g), CR(g), C0(g), C1(g), C2(g); DistMatrix<T> DL(g), DR(g), D0(g), D1(g), D2(g); DistMatrix<T,MC, STAR> A1_MC_STAR(g); DistMatrix<T,MR, STAR> B1Trans_MR_STAR(g); DistMatrix<T,MC, STAR> C1_MC_STAR(g); DistMatrix<T,VR, STAR> D1_VR_STAR(g); DistMatrix<T,STAR,MR > D1AdjOrTrans_STAR_MR(g); A1_MC_STAR.AlignWith( E ); B1Trans_MR_STAR.AlignWith( E ); C1_MC_STAR.AlignWith( E ); D1_VR_STAR.AlignWith( E ); D1AdjOrTrans_STAR_MR.AlignWith( E ); LockedPartitionRight( A, AL, AR, 0 ); LockedPartitionDown ( B, BT, BB, 0 ); LockedPartitionRight( C, CL, CR, 0 ); LockedPartitionRight( D, DL, DR, 0 ); while( AL.Width() < A.Width() ) { LockedRepartitionRight ( AL, /**/ AR, A0, /**/ A1, A2 ); LockedRepartitionDown ( BT, B0, /**/ /**/ B1, BB, B2 ); LockedRepartitionRight ( CL, /**/ CR, C0, /**/ C1, C2 ); LockedRepartitionRight ( CL, /**/ CR, C0, /**/ C1, C2 ); //--------------------------------------------------------------------// A1_MC_STAR = A1; C1_MC_STAR = C1; B1Trans_MR_STAR.TransposeFrom( B1 ); D1_VR_STAR = D1; if( orientationOfD == ADJOINT ) D1AdjOrTrans_STAR_MR.AdjointFrom( D1_VR_STAR ); else D1AdjOrTrans_STAR_MR.TransposeFrom( D1_VR_STAR ); LocalTrr2k ( uplo, TRANSPOSE, alpha, A1_MC_STAR, B1Trans_MR_STAR, C1_MC_STAR, D1AdjOrTrans_STAR_MR, beta, E ); //--------------------------------------------------------------------// SlideLockedPartitionRight ( DL, /**/ DR, D0, D1, /**/ D2 ); SlideLockedPartitionRight ( CL, /**/ CR, C0, C1, /**/ C2 ); SlideLockedPartitionDown ( BT, B0, B1, /**/ /**/ BB, B2 ); SlideLockedPartitionRight ( AL, /**/ AR, A0, A1, /**/ A2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void internal::GemmTNA ( Orientation orientationOfA, T alpha, const DistMatrix<T,MC,MR>& A, const DistMatrix<T,MC,MR>& B, T beta, DistMatrix<T,MC,MR>& C ) { #ifndef RELEASE PushCallStack("internal::GemmTNA"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); if( orientationOfA == NORMAL ) throw std::logic_error("GemmTNA assumes A is (Conjugate)Transposed"); if( A.Width() != C.Height() || B.Width() != C.Width() || A.Height() != B.Height() ) { std::ostringstream msg; msg << "Nonconformal GemmTNA: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B ~ " << B.Height() << " x " << B.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T,MC,MR> BL(g), BR(g), B0(g), B1(g), B2(g); DistMatrix<T,MC,MR> CL(g), CR(g), C0(g), C1(g), C2(g); // Temporary distributions DistMatrix<T,MC,STAR> B1_MC_STAR(g); DistMatrix<T,MR,STAR> D1_MR_STAR(g); DistMatrix<T,MR,MC > D1_MR_MC(g); DistMatrix<T,MC,MR > D1(g); // Start the algorithm Scal( beta, C ); LockedPartitionRight( B, BL, BR, 0 ); PartitionRight( C, CL, CR, 0 ); while( BR.Width() > 0 ) { LockedRepartitionRight ( BL, /**/ BR, B0, /**/ B1, B2 ); RepartitionRight ( CL, /**/ CR, C0, /**/ C1, C2 ); B1_MC_STAR.AlignWith( A ); D1_MR_STAR.AlignWith( A ); D1_MR_STAR.ResizeTo( C1.Height(), C1.Width() ); D1.AlignWith( C1 ); //--------------------------------------------------------------------// B1_MC_STAR = B1; // B1[MC,*] <- B1[MC,MR] // D1[MR,*] := alpha (A1[MC,MR])^T B1[MC,*] // = alpha (A1^T)[MR,MC] B1[MC,*] internal::LocalGemm ( orientationOfA, NORMAL, alpha, A, B1_MC_STAR, (T)0, D1_MR_STAR ); // C1[MC,MR] += scattered & transposed D1[MR,*] summed over grid cols D1_MR_MC.SumScatterFrom( D1_MR_STAR ); D1 = D1_MR_MC; Axpy( (T)1, D1, C1 ); //--------------------------------------------------------------------// B1_MC_STAR.FreeAlignments(); D1_MR_STAR.FreeAlignments(); D1.FreeAlignments(); SlideLockedPartitionRight ( BL, /**/ BR, B0, B1, /**/ B2 ); SlidePartitionRight ( CL, /**/ CR, C0, C1, /**/ C2 ); } #ifndef RELEASE PopCallStack(); #endif }
Func ColorMgather(Func stBasis, float angle, uint8_t * orders, Expr filterthreshold, Expr divisionthreshold, Expr divisionthreshold2) { uint8_t x_order = orders[0]; uint8_t y_order = orders[1]; uint8_t t_order = orders[2]; uint8_t c_order = orders[3]; Func X("X"),Y("Y"),T("T"),Xrg("Xrg"),Yrg("Yrg"),Trg("Trg"); uint8_t max_order = x_order; // std::vector<Expr>Xk_expr (max_order,cast<float>(0.0f)); // std::vector<Expr>Yk_expr (max_order,cast<float>(0.0f)); // std::vector<Expr>Tk_expr (max_order,cast<float>(0.0f)); uint8_t Xk_uI[max_order]; uint8_t Yk_uI[max_order]; uint8_t Tk_uI[max_order]; Func Xk[max_order]; Func Yk[max_order]; Func Tk[max_order]; // Expr Xk[max_order],Yk[max_order],Tk[max_order]; for (int iO=0; iO < x_order; iO++) { Xk[iO](x,y,t) = Expr(0.0f); Yk[iO](x,y,t) = Expr(0.0f); Tk[iO](x,y,t) = Expr(0.0f); Xk_uI[iO] = 0; Yk_uI[iO] = 0; Tk_uI[iO] = 0; } int k = 0; for (int iXo = 0; iXo < x_order; iXo++) // x_order for (int iYo = 0; iYo < y_order; iYo++) // y_oder for (int iTo = 0; iTo < t_order; iTo++) // t_order for (int iCo = 0; iCo < c_order; iCo ++ ) // c_order: index of color channel { if ((iYo+iTo+iCo == 0 || iYo+iTo+iCo == 1) && ((iXo+iYo+iTo+iCo+1) < (x_order + 1))) { X = ColorMgetfilter(stBasis, angle, iXo+1, iYo, iTo, iCo); Y = ColorMgetfilter(stBasis, angle, iXo, iYo+1, iTo, iCo); T = ColorMgetfilter(stBasis, angle, iXo, iYo, iTo+1, iCo); Xrg = ColorMgetfilter(stBasis, angle, iXo+1, iYo, iTo, iCo+1); Yrg = ColorMgetfilter(stBasis, angle, iXo, iYo+1, iTo, iCo+1); Trg = ColorMgetfilter(stBasis, angle, iXo, iYo, iTo+1, iCo+1); k = iXo + iYo + iTo + iCo; Xk[k](x,y,t) += X(x,y,t) + Xrg(x,y,t); Yk[k](x,y,t) += Y(x,y,t) + Yrg(x,y,t); Tk[k](x,y,t) += T(x,y,t) + Trg(x,y,t); Xk[k].update(Xk_uI[k]); Xk_uI[k]++; Yk[k].update(Yk_uI[k]); Yk_uI[k]++; Tk[k].update(Tk_uI[k]); Tk_uI[k]++; } } // Scheduling for (int iO = 0; iO <= k; iO++) { Xk[iO].compute_root(); Yk[iO].compute_root(); Tk[iO].compute_root(); } std::vector<Expr> st_expr(6,cast<float>(0.0f)); for (int iK=0; iK <= k; iK++) { st_expr[0] += Xk[iK](x,y,t)*Tk[iK](x,y,t); st_expr[1] += Tk[iK](x,y,t)*Tk[iK](x,y,t); st_expr[2] += Xk[iK](x,y,t)*Xk[iK](x,y,t); st_expr[3] += Yk[iK](x,y,t)*Tk[iK](x,y,t); st_expr[4] += Yk[iK](x,y,t)*Yk[iK](x,y,t); st_expr[5] += Xk[iK](x,y,t)*Yk[iK](x,y,t); } Func st("st"); st(x,y,t) = Tuple(st_expr); st.compute_root(); Expr x_clamped = clamp(x,0,width-1); Expr y_clamped = clamp(y,0,height-1); Func st_clamped("st_clamped"); st_clamped(x,y,t) = st(x_clamped,y_clamped,t); // float win = 7.0; // Image<float> meanfilter(7,7,"meanfilter_data"); // meanfilter(x,y) = Expr(1.0f/(win*win)); // RDom rMF(meanfilter); uint8_t win = 7; RDom rMF(0,win,0,win); Func st_filtered[6]; for (uint8_t iPc=0; iPc<6; iPc++) { // iPc: index of product component // Apply average filter st_filtered[iPc](x,y,t) = sum(rMF,st_clamped(x + rMF.x,y + rMF.y,t)[iPc]/Expr(float(win*win)),"mean_filter"); st_filtered[iPc].compute_root(); } // Tuple st_tuple = Tuple(st_expr); // 4 debug // Func tmpOut("tmpOut"); tmpOut(x,y,t) = Tuple(st_filtered[0](x,y,t),st_filtered[1](x,y,t),st_filtered[2](x,y,t),st_filtered[3](x,y,t),st_filtered[4](x,y,t),st_filtered[5](x,y,t)); // return tmpOut; Tuple pbx = Tuple(st_filtered[2](x,y,t),st_filtered[5](x,y,t),st_filtered[0](x,y,t)); Tuple pby = Tuple(st_filtered[5](x,y,t),st_filtered[4](x,y,t),st_filtered[3](x,y,t)); Tuple pbt = Tuple(st_filtered[0](x,y,t),st_filtered[3](x,y,t),st_filtered[1](x,y,t)); Func pbxy("pbxy"); pbxy = cross(pby,pbx); pbxy.compute_root(); Func pbxt("pbxt"); pbxt = cross(pbx,pbt); pbxt.compute_root(); Func pbyt("pbyt"); pbyt = cross(pby,pbt); pbyt.compute_root(); Func pbxyd("pbxyd"); pbxyd = dot(pby,pbx); pbxyd.compute_root(); Func pbxtd("pbxtd"); pbxtd = dot(pbx,pbt); pbxtd.compute_root(); Func pbytd("pbytd"); pbytd = dot(pby,pbt); pbytd.compute_root(); // 4 debug // Func tmpOut("tmpOut"); tmpOut(x,y,t) = Tuple(pbxy(x,y,t)[0],pbxt(x,y,t)[0],pbyt(x,y,t)[0],pbxyd(x,y,t),pbxtd(x,y,t),pbytd(x,y,t)); // return tmpOut; Func yt_xy("yt_xy"); yt_xy = dot(pbyt(x,y,t),pbxy(x,y,t)); yt_xy.compute_root(); Func xt_yt("xt_yt"); xt_yt = dot(pbxt(x,y,t),pbyt(x,y,t)); xt_yt.compute_root(); Func xt_xy("xt_xy"); xt_xy = dot(pbxt(x,y,t),pbxy(x,y,t)); xt_xy.compute_root(); Func yt_yt("yt_yt"); yt_yt = dot(pbyt(x,y,t),pbyt(x,y,t)); yt_yt.compute_root(); Func xt_xt("xt_xt"); xt_xt = dot(pbxt(x,y,t),pbxt(x,y,t)); xt_xt.compute_root(); Func xy_xy("xy_xy"); xy_xy = dot(pbxy(x,y,t),pbxy(x,y,t)); xy_xy.compute_root(); Tuple Tk_tuple = Tuple(Tk[0](x,y,t),Tk[1](x,y,t),Tk[2](x,y,t), Tk[3](x,y,t),Tk[4](x,y,t)); Func Tkd("Tkd"); Tkd = dot(Tk_tuple,Tk_tuple); Tkd.compute_root(); // Expr Dimen = pbxyd/xy_xy; Expr kill(1.0f); Func Oxy; Oxy(x,y,t) = Mdefdiv(st_filtered[5](x,y,t) - Mdefdivang(yt_xy(x,y,t),yt_yt(x,y,t),pbxyd(x,y,t),divisionthreshold2)*st_filtered[3](x,y,t)*kill,st_filtered[4](x,y,t),divisionthreshold); Oxy.compute_root(); Func Oyx; Oyx(x,y,t) = Mdefdiv(st_filtered[5](x,y,t) + Mdefdivang(xt_xy(x,y,t),xt_xt(x,y,t),pbxyd(x,y,t),divisionthreshold2)*st_filtered[0](x,y,t)*kill,st_filtered[2](x,y,t),divisionthreshold); Oyx.compute_root(); Func C0; C0(x,y,t) = st_filtered[3](x,y,t) * Mdefdivang(Expr(-1.0f)*xt_yt(x,y,t),yt_yt(x,y,t),pbxyd(x,y,t),divisionthreshold2)*kill; C0.compute_root(); Func M0; M0(x,y,t) = Mdefdiv(st_filtered[0](x,y,t) + C0(x,y,t), st_filtered[1](x,y,t)*pow(Mdefdivang(xt_yt(x,y,t),yt_yt(x,y,t),pbxyd(x,y,t),divisionthreshold2),Expr(2.0f)),divisionthreshold); M0.compute_root(); Func C1; C1(x,y,t) = st_filtered[5](x,y,t) * Mdefdivang(Expr(-1.0f)*xt_xy(x,y,t),xy_xy(x,y,t),pbxyd(x,y,t),divisionthreshold2)*kill; C1.compute_root(); Func P1; P1(x,y,t) = pow(Mdefdivang(xt_yt(x,y,t),xt_xt(x,y,t),pbxyd(x,y,t),divisionthreshold2),Expr(2.0f))*kill + 1.0f; P1.compute_root(); // 4 debug // Func tmpOut("tmpOut"); tmpOut(x,y,t) = Tuple(Oxy(x,y,t),Oyx(x,y,t),C0(x,y,t),M0(x,y,t),C1(x,y,t),P1(x,y,t)); // return tmpOut; Func Q1; Q1(x,y,t) = st_filtered[2](x,y,t) * (pow(Oyx(x,y,t),Expr(2.0f))+Expr(1.0f)); Q1.compute_root(); Func M1; M1(x,y,t) = Mdefdiv(((st_filtered[0](x,y,t)-C1(x,y,t))*P1(x,y,t)),Q1(x,y,t),divisionthreshold); M1.compute_root(); Func C2; C2(x,y,t) = st_filtered[0](x,y,t) * Mdefdivang(Expr(-1.0f)*xt_yt(x,y,t),xt_xt(x,y,t),pbxyd(x,y,t),divisionthreshold2)*kill; C2.compute_root(); Func M2; M2(x,y,t) = Mdefdiv(st_filtered[3](x,y,t)+C2(x,y,t),st_filtered[1](x,y,t)*(pow(Mdefdivang(xt_yt(x,y,t),xt_xt(x,y,t),pbxyd(x,y,t),divisionthreshold2),Expr(2.0f))*kill+Expr(1.0f)),divisionthreshold); M2.compute_root(); Func C3; C3(x,y,t) = st_filtered[5](x,y,t) * Mdefdivang(yt_xy(x,y,t),xy_xy(x,y,t),pbxyd(x,y,t),divisionthreshold2)*kill; C3.compute_root(); Func P3; P3(x,y,t) = pow(Mdefdivang(xt_yt(x,y,t),yt_yt(x,y,t),pbxyd(x,y,t),divisionthreshold2),Expr(2.0f))*kill + Expr(1.0f); P3.compute_root(); Func Q3; Q3(x,y,t) = st_filtered[4](x,y,t) * (pow(Oxy(x,y,t),Expr(2.0f))+Expr(1.0f)); Q3.compute_root(); Func M3; M3(x,y,t) = Mdefdiv(((st_filtered[3](x,y,t)-C3(x,y,t))*P3(x,y,t)),Q3(x,y,t),divisionthreshold); M3.compute_root(); Func basisAtAngle; basisAtAngle(x,y,t) = Tuple(M0(x,y,t),M1(x,y,t),M2(x,y,t),M3(x,y,t),Tkd(x,y,t)); return basisAtAngle; // Func hsv2rgb(Func colorImage) { // Took this function // Var x, y, c, t; // Func output; // output(x,y,c,t) = cast <float> (0.0f); // Expr fR, fG, fB; // R,G & B values // Expr fH = (colorImage(x,y,0,t)); //H value [0-360) // Expr fS = (colorImage(x,y,1,t)); //S value // Expr fV = (colorImage(x,y,2,t)); //V value // //Conversion (I took the one on Wikipedia) // // https://fr.wikipedia.org/wiki/Teinte_Saturation_Valeur#Conversion_de_TSV_vers_RVB // Expr fHi = floor(fH / Expr(60.0f)); // Expr fF = fH / 60.0f - fHi; // Expr fL = fV * (1 - fS); // Expr fM = fV * (1 - fF * fS) ; // Expr fN = fV * (1 - (1 - fF) * fS); // fR = select((0 == fHi),fV, // (1 == fHi),fM, // (2 == fHi),fL, // (3 == fHi),fL, // (4 == fHi),fN, // (5 == fHi),fV, // 0.0f); // fG = select((0 == fHi),fN, // (1 == fHi),fV, // (2 == fHi),fV, // (3 == fHi),fM, // (4 == fHi),fL, // (5 == fHi),fL, // 0.0f); // fB = select((0 == fHi),fL, // (1 == fHi),fL, // (2 == fHi),fN, // (3 == fHi),fV, // (4 == fHi),fV, // (5 == fHi),fM, // 0.0f); // output(x,y,0,t) = fR; // output(x,y,1,t) = fG; // output(x,y,2,t) = fB; // return output; // } // Func angle2rgb (Func v) { // Var x, y, c, t; // Func ov, a; // ov(x,y,c,t) = cast <float> (0.0f); // Expr pi2(2*M_PI); // a(x,y,c,t) = v(x,y,c,t) / pi2; // ov(x,y,0,t) = a(x,y,c,t); // ov(x,y,1,t) = 1; // ov(x,y,2,t) = 1; // return ov; // } // Func outputvelocity(Func Blur, Func Speed, Func Angle, int border, Expr speedthreshold, Expr filterthreshold) { // extern Expr width; // extern Expr height; // Func Blur3, Speed3; // Blur3(x,y,c,t) = cast <float> (0.0f); // Speed3(x,y,c,t) = cast <float> (0.0f); // //Scale the grey level images // Blur(x,y,0,t) = (Blur(x,y,0,t) - minimum(Blur(x,y,0,t))) / (maximum(Blur(x,y,0,t)) - minimum(Blur(x,y,0,t))); // //Concatenation along the third dimension // Blur3(x,y,0,t) = Blur(x,y,0,t); // Blur3(x,y,1,t) = Blur(x,y,0,t); // Blur3(x,y,2,t) = Blur(x,y,0,t); // //Speed scaled to 1 // //Concatenation along the third dimension // Speed3(x,y,1,t) = Speed(x,y,0,t); // Speed3(x,y,2,t) = Speed(x,y,0,t); // //Use the log speed to visualise speed // Func LogSpeed; // LogSpeed(x,y,c,t) = fast_log(Speed3(x,y,c,t) + Expr(0.0000001f))/fast_log(Expr(10.0f)); // LogSpeed(x,y,c,t) = (LogSpeed(x,y,c,t) - minimum(LogSpeed(x,y,c,t))) / (maximum(LogSpeed(x,y,c,t)) - minimum(LogSpeed(x,y,c,t))); // //Make a colour image // // uint16_t rows = height; // // uint16_t cols = width; // // int depth = Angle.channels(); // //Do it the HSV way // Func colorImage; // colorImage(x,y,0,t) = Angle(x,y,0,t); // //Do hsv to rgb // Func colorImage1; // colorImage1 = hsv2rgb(colorImage); // // Assume the border equals to the size of spatial filter // //Make the border // // int bir = rows + 2 * border; // // int bic = cols + 2 * border; // Expr orows = height / Expr(2); // Expr ocols = width / Expr(2); // //Rotation matrix // int ph = 0; // Func mb, sb; // // if (rx < border - 1 || rx >= rows+border -1 || ry < border - 1 || ry >= cols+border - 1) { // Expr co1 = x - orows; // Expr co2 = - (y - ocols); // Expr cosPh(cos(ph)); // Expr sinPh(sin(ph)); // Expr rco1 = cosPh * co1 - sinPh * co2; //Using rotation matrix // Expr rco2 = sinPh * co1 + cosPh * co2; // // Expr justPi (M_PI); // mb(x,y,c,t) = // select (((x < (border - 1)) || // (x >= (height+border -1)) || // (y < (border - 1)) || // (y >= (width+border - 1))), // atan2(rco1,rco2) + Expr(M_PI),mb(x,y,c,t)); // sb(x,y,c,t) = // select (((x < (border - 1)) || // (x >= (height+border -1)) || // (y < (border - 1) ) || // (y >= (width+border - 1))), // 1, sb (x,y,c,t)); // Func cb; // cb = angle2rgb(mb); // //Get the old data // // Expr pi2(2*M_PI); // colorImage1(x,y,0,t)=colorImage(x,y,0,t) * Expr(2*M_PI); // colorImage1=angle2rgb(colorImage1); // colorImage1(x,y,c,t)=select(abs(Speed3(x,y,c,t))<speedthreshold,Expr(0.0f),colorImage1(x,y,c,t)); // Func colorImage2; // colorImage2(x,y,c,t) = colorImage1(x,y,c,t) * Speed(x,y,c,t); // //Put the data in the border // RDom bordx (border,rows + border); // RDom bordy (border,cols + border); // Func ang1, ang2; // ang1 (x,y,c,t) = cast <float> (0.0f); // ang2 (x,y,c,t) = cast <float> (0.0f); // cb(bordx, bordy,c,t) = colorImage1(x,y,c,t); // ang1 = cb; // cb(bordx, bordy,c,t) = colorImage2(x,y,c,t); // ang2 = cb; // sb(bordx, bordy,c,t) = Speed3(x,y,c,t); // Speed3 = sb; // sb(bordx, bordy,c,t) = Blur3(x,y,c,t); // Blur3 = sb; // // Func I; // // I (x,y,c,t) = Blur3(x,y,c,t) + Speed3(x,y - height,c,t) + ang1(x - width,y,c,t) + ang2(x - width,y - height,c,t); // //I = cat(2,cat(1,Blur,Speed),cat(1,ang1,ang2)); // return I; // } }
void Trr2kNTTN ( UpperOrLower uplo, Orientation orientationOfB, Orientation orientationOfC, T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, const DistMatrix<T>& C, const DistMatrix<T>& D, T beta, DistMatrix<T>& E ) { #ifndef RELEASE CallStackEntry entry("internal::Trr2kNTTN"); if( E.Height() != E.Width() || A.Width() != C.Height() || A.Height() != E.Height() || C.Width() != E.Height() || B.Height() != E.Width() || D.Width() != E.Width() || A.Width() != B.Width() || C.Height() != D.Height() ) LogicError("Nonconformal Trr2kNTTN"); #endif const Grid& g = E.Grid(); DistMatrix<T> AL(g), AR(g), A0(g), A1(g), A2(g); DistMatrix<T> BL(g), BR(g), B0(g), B1(g), B2(g); DistMatrix<T> CT(g), C0(g), CB(g), C1(g), C2(g); DistMatrix<T> DT(g), D0(g), DB(g), D1(g), D2(g); DistMatrix<T,MC, STAR> A1_MC_STAR(g); DistMatrix<T,VR, STAR> B1_VR_STAR(g); DistMatrix<T,STAR,MR > B1AdjOrTrans_STAR_MR(g); DistMatrix<T,STAR,MC > C1_STAR_MC(g); DistMatrix<T,MR, STAR> D1Trans_MR_STAR(g); A1_MC_STAR.AlignWith( E ); B1_VR_STAR.AlignWith( E ); B1AdjOrTrans_STAR_MR.AlignWith( E ); C1_STAR_MC.AlignWith( E ); D1Trans_MR_STAR.AlignWith( E ); LockedPartitionRight( A, AL, AR, 0 ); LockedPartitionRight( B, BL, BR, 0 ); LockedPartitionDown ( C, CT, CB, 0 ); LockedPartitionDown ( D, DT, DB, 0 ); while( AL.Width() < A.Width() ) { LockedRepartitionRight ( AL, /**/ AR, A0, /**/ A1, A2 ); LockedRepartitionRight ( BL, /**/ BR, B0, /**/ B1, B2 ); LockedRepartitionDown ( CT, C0, /**/ /**/ C1, CB, C2 ); LockedRepartitionDown ( DT, D0, /**/ /**/ D1, DB, D2 ); //--------------------------------------------------------------------// A1_MC_STAR = A1; C1_STAR_MC = C1; B1_VR_STAR = B1; if( orientationOfB == ADJOINT ) B1AdjOrTrans_STAR_MR.AdjointFrom( B1_VR_STAR ); else B1AdjOrTrans_STAR_MR.TransposeFrom( B1_VR_STAR ); D1Trans_MR_STAR.TransposeFrom( D1 ); LocalTrr2k ( uplo, orientationOfC, TRANSPOSE, alpha, A1_MC_STAR, B1AdjOrTrans_STAR_MR, C1_STAR_MC, D1Trans_MR_STAR, beta, E ); //--------------------------------------------------------------------// SlideLockedPartitionRight ( AL, /**/ AR, A0, A1, /**/ A2 ); SlideLockedPartitionRight ( BL, /**/ BR, B0, B1, /**/ B2 ); SlideLockedPartitionDown ( CT, C0, C1, /**/ /**/ CB, C2 ); SlideLockedPartitionDown ( DT, D0, D1, /**/ /**/ DB, D2 ); } }
inline void HemmRUC ( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C ) { #ifndef RELEASE PushCallStack("internal::HemmRUC"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error("{A,B,C} must be distributed on the same grid"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T> ATL(g), ATR(g), A00(g), A01(g), A02(g), AColPan(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), ARowPan(g), A20(g), A21(g), A22(g); DistMatrix<T> BL(g), BR(g), B0(g), B1(g), B2(g); DistMatrix<T> CL(g), CR(g), C0(g), C1(g), C2(g), CLeft(g), CRight(g); // Temporary distributions DistMatrix<T,MC,STAR> B1_MC_STAR(g); DistMatrix<T,VR, STAR> AColPan_VR_STAR(g); DistMatrix<T,STAR,MR > AColPanAdj_STAR_MR(g); DistMatrix<T,MR, STAR> ARowPanAdj_MR_STAR(g); B1_MC_STAR.AlignWith( C ); // Start the algorithm Scale( beta, C ); LockedPartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionRight( B, BL, BR, 0 ); PartitionRight( C, CL, CR, 0 ); while( CR.Width() > 0 ) { LockedRepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionRight ( BL, /**/ BR, B0, /**/ B1, B2 ); RepartitionRight ( CL, /**/ CR, C0, /**/ C1, C2 ); ARowPan.LockedView1x2( A11, A12 ); AColPan.LockedView2x1 ( A01, A11 ); CLeft.View1x2( C0, C1 ); CRight.View1x2( C1, C2 ); AColPan_VR_STAR.AlignWith( CLeft ); AColPanAdj_STAR_MR.AlignWith( CLeft ); ARowPanAdj_MR_STAR.AlignWith( CRight ); //--------------------------------------------------------------------// B1_MC_STAR = B1; AColPan_VR_STAR = AColPan; AColPanAdj_STAR_MR.AdjointFrom( AColPan_VR_STAR ); ARowPanAdj_MR_STAR.AdjointFrom( ARowPan ); MakeTrapezoidal( LEFT, LOWER, 0, ARowPanAdj_MR_STAR ); MakeTrapezoidal( RIGHT, LOWER, -1, AColPanAdj_STAR_MR ); LocalGemm ( NORMAL, ADJOINT, alpha, B1_MC_STAR, ARowPanAdj_MR_STAR, T(1), CRight ); LocalGemm ( NORMAL, NORMAL, alpha, B1_MC_STAR, AColPanAdj_STAR_MR, T(1), CLeft ); //--------------------------------------------------------------------// AColPan_VR_STAR.FreeAlignments(); AColPanAdj_STAR_MR.FreeAlignments(); ARowPanAdj_MR_STAR.FreeAlignments(); SlideLockedPartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionRight ( BL, /**/ BR, B0, B1, /**/ B2 ); SlidePartitionRight ( CL, /**/ CR, C0, C1, /**/ C2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void Trr2kTTTT ( UpperOrLower uplo, Orientation orientationOfA, Orientation orientationOfB, Orientation orientationOfC, Orientation orientationOfD, T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, const DistMatrix<T>& C, const DistMatrix<T>& D, T beta, DistMatrix<T>& E ) { #ifndef RELEASE PushCallStack("internal::Trr2kTTTT"); if( E.Height() != E.Width() || A.Height() != C.Height() || A.Width() != E.Height() || C.Width() != E.Height() || B.Height() != E.Width() || D.Height() != E.Width() || A.Height() != B.Width() || C.Height() != D.Width() ) throw std::logic_error("Nonconformal Trr2kTTTT"); #endif const Grid& g = E.Grid(); DistMatrix<T> AT(g), A0(g), AB(g), A1(g), A2(g); DistMatrix<T> BL(g), BR(g), B0(g), B1(g), B2(g); DistMatrix<T> CT(g), C0(g), CB(g), C1(g), C2(g); DistMatrix<T> DL(g), DR(g), D0(g), D1(g), D2(g); DistMatrix<T,STAR,MC > A1_STAR_MC(g); DistMatrix<T,VR, STAR> B1_VR_STAR(g); DistMatrix<T,STAR,MR > B1AdjOrTrans_STAR_MR(g); DistMatrix<T,STAR,MC > C1_STAR_MC(g); DistMatrix<T,VR, STAR> D1_VR_STAR(g); DistMatrix<T,STAR,MR > D1AdjOrTrans_STAR_MR(g); A1_STAR_MC.AlignWith( E ); B1_VR_STAR.AlignWith( E ); B1AdjOrTrans_STAR_MR.AlignWith( E ); C1_STAR_MC.AlignWith( E ); D1_VR_STAR.AlignWith( E ); D1AdjOrTrans_STAR_MR.AlignWith( E ); LockedPartitionDown ( A, AT, AB, 0 ); LockedPartitionRight( B, BL, BR, 0 ); LockedPartitionDown ( C, CT, CB, 0 ); LockedPartitionRight( D, DL, DR, 0 ); while( AT.Height() < A.Height() ) { LockedRepartitionDown ( AT, A0, /**/ /**/ A1, AB, A2 ); LockedRepartitionRight ( BL, /**/ BR, B0, /**/ B1, B2 ); LockedRepartitionDown ( CT, C0, /**/ /**/ C1, CB, C2 ); LockedRepartitionRight ( DL, /**/ DR, D0, /**/ D1, D2 ); //--------------------------------------------------------------------// A1_STAR_MC = A1; C1_STAR_MC = C1; B1_VR_STAR = B1; D1_VR_STAR = D1; if( orientationOfB == ADJOINT ) B1AdjOrTrans_STAR_MR.AdjointFrom( B1_VR_STAR ); else B1AdjOrTrans_STAR_MR.TransposeFrom( B1_VR_STAR ); if( orientationOfD == ADJOINT ) D1AdjOrTrans_STAR_MR.AdjointFrom( D1_VR_STAR ); else D1AdjOrTrans_STAR_MR.TransposeFrom( D1_VR_STAR ); LocalTrr2k ( uplo, orientationOfA, orientationOfC, alpha, A1_STAR_MC, B1AdjOrTrans_STAR_MR, C1_STAR_MC, D1AdjOrTrans_STAR_MR, beta, E ); //--------------------------------------------------------------------// SlideLockedPartitionRight ( DL, /**/ DR, D0, D1, /**/ D2 ); SlideLockedPartitionDown ( CT, C0, C1, /**/ /**/ CB, C2 ); SlideLockedPartitionRight ( BL, /**/ BR, B0, B1, /**/ B2 ); SlideLockedPartitionDown ( AT, A0, A1, /**/ /**/ AB, A2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void HemmRUA ( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C ) { #ifndef RELEASE PushCallStack("internal::HemmRUA"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); #endif const Grid& g = A.Grid(); DistMatrix<T> BT(g), B0(g), BB(g), B1(g), B2(g); DistMatrix<T> CT(g), C0(g), CB(g), C1(g), C2(g); DistMatrix<T,MR, STAR> B1Adj_MR_STAR(g); DistMatrix<T,VC, STAR> B1Adj_VC_STAR(g); DistMatrix<T,STAR,MC > B1_STAR_MC(g); DistMatrix<T,MC, STAR> Z1Adj_MC_STAR(g); DistMatrix<T,MR, STAR> Z1Adj_MR_STAR(g); DistMatrix<T,MR, MC > Z1Adj_MR_MC(g); DistMatrix<T> Z1Adj(g); B1Adj_MR_STAR.AlignWith( A ); B1Adj_VC_STAR.AlignWith( A ); B1_STAR_MC.AlignWith( A ); Z1Adj_MC_STAR.AlignWith( A ); Z1Adj_MR_STAR.AlignWith( A ); Matrix<T> Z1Local; Scale( beta, C ); LockedPartitionDown ( B, BT, BB, 0 ); PartitionDown ( C, CT, CB, 0 ); while( CT.Height() < C.Height() ) { LockedRepartitionDown ( BT, B0, /**/ /**/ B1, BB, B2 ); RepartitionDown ( CT, C0, /**/ /**/ C1, CB, C2 ); Z1Adj_MR_MC.AlignWith( C1 ); Zeros( C1.Width(), C1.Height(), Z1Adj_MC_STAR ); Zeros( C1.Width(), C1.Height(), Z1Adj_MR_STAR ); //--------------------------------------------------------------------// B1Adj_MR_STAR.AdjointFrom( B1 ); B1Adj_VC_STAR = B1Adj_MR_STAR; B1_STAR_MC.AdjointFrom( B1Adj_VC_STAR ); LocalSymmetricAccumulateRU ( ADJOINT, alpha, A, B1_STAR_MC, B1Adj_MR_STAR, Z1Adj_MC_STAR, Z1Adj_MR_STAR ); Z1Adj.SumScatterFrom( Z1Adj_MC_STAR ); Z1Adj_MR_MC = Z1Adj; Z1Adj_MR_MC.SumScatterUpdate( T(1), Z1Adj_MR_STAR ); Adjoint( Z1Adj_MR_MC.LockedLocalMatrix(), Z1Local ); Axpy( T(1), Z1Local, C1.LocalMatrix() ); //--------------------------------------------------------------------// Z1Adj_MR_MC.FreeAlignments(); SlideLockedPartitionDown ( BT, B0, B1, /**/ /**/ BB, B2 ); SlidePartitionDown ( CT, C0, C1, /**/ /**/ CB, C2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void GemmNNA ( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C ) { #ifndef RELEASE PushCallStack("internal::GemmNNA"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); if( A.Height() != C.Height() || B.Width() != C.Width() || A.Width() != B.Height() ) { std::ostringstream msg; msg << "Nonconformal GemmNNA: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B ~ " << B.Height() << " x " << B.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T> BL(g), BR(g), B0(g), B1(g), B2(g); DistMatrix<T> CL(g), CR(g), C0(g), C1(g), C2(g); // Temporary distributions DistMatrix<T,VR,STAR> B1_VR_STAR(g); DistMatrix<T,STAR,MR> B1Trans_STAR_MR(g); DistMatrix<T,MC,STAR> D1_MC_STAR(g); B1_VR_STAR.AlignWith( A ); B1Trans_STAR_MR.AlignWith( A ); D1_MC_STAR.AlignWith( A ); // Start the algorithm Scale( beta, C ); LockedPartitionRight( B, BL, BR, 0 ); PartitionRight( C, CL, CR, 0 ); while( BR.Width() > 0 ) { LockedRepartitionRight ( BL, /**/ BR, B0, /**/ B1, B2 ); RepartitionRight ( CL, /**/ CR, C0, /**/ C1, C2 ); Zeros( C1.Height(), C1.Width(), D1_MC_STAR ); //--------------------------------------------------------------------// B1_VR_STAR = B1; B1Trans_STAR_MR.TransposeFrom( B1_VR_STAR ); // D1[MC,*] := alpha A[MC,MR] B1[MR,*] LocalGemm ( NORMAL, TRANSPOSE, alpha, A, B1Trans_STAR_MR, T(0), D1_MC_STAR ); // C1[MC,MR] += scattered result of D1[MC,*] summed over grid rows C1.SumScatterUpdate( T(1), D1_MC_STAR ); //--------------------------------------------------------------------// SlideLockedPartitionRight ( BL, /**/ BR, B0, B1, /**/ B2 ); SlidePartitionRight ( CL, /**/ CR, C0, C1, /**/ C2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void GemmNNDot ( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C ) { #ifndef RELEASE PushCallStack("internal::GemmNNDot"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); if( A.Height() != C.Height() || B.Width() != C.Width() || A.Width() != B.Height() ) { std::ostringstream msg; msg << "Nonconformal GemmNNDot: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B ~ " << B.Height() << " x " << B.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = A.Grid(); if( A.Height() > B.Width() ) { // Matrix views DistMatrix<T> AT(g), AB(g), A0(g), A1(g), A2(g); DistMatrix<T> BL(g), B0(g), BR(g), B1(g), B2(g); DistMatrix<T> CT(g), C0(g), C1L(g), C1R(g), CB(g), C1(g), C10(g), C11(g), C12(g), C2(g); // Temporary distributions DistMatrix<T,STAR,VC> A1_STAR_VC(g); DistMatrix<T,VC,STAR> B1_VC_STAR(g); DistMatrix<T,STAR,STAR> C11_STAR_STAR(g); // Star the algorithm Scale( beta, C ); LockedPartitionDown ( A, AT, AB, 0 ); PartitionDown ( C, CT, CB, 0 ); while( AB.Height() > 0 ) { LockedRepartitionDown ( AT, A0, /**/ /**/ A1, AB, A2 ); RepartitionDown ( CT, C0, /**/ /**/ C1, CB, C2 ); A1_STAR_VC = A1; B1_VC_STAR.AlignWith( A1_STAR_VC ); LockedPartitionRight( B, BL, BR, 0 ); PartitionRight( C1, C1L, C1R, 0 ); while( BR.Width() > 0 ) { LockedRepartitionRight ( BL, /**/ BR, B0, /**/ B1, B2 ); RepartitionRight ( C1L, /**/ C1R, C10, /**/ C11, C12 ); Zeros( C11.Height(), C11.Width(), C11_STAR_STAR ); //------------------------------------------------------------// B1_VC_STAR = B1; LocalGemm ( NORMAL, NORMAL, alpha, A1_STAR_VC, B1_VC_STAR, T(0), C11_STAR_STAR ); C11.SumScatterUpdate( T(1), C11_STAR_STAR ); //------------------------------------------------------------// SlideLockedPartitionRight ( BL, /**/ BR, B0, B1, /**/ B2 ); SlidePartitionRight ( C1L, /**/ C1R, C10, C11, /**/ C12 ); } B1_VC_STAR.FreeAlignments(); SlideLockedPartitionDown ( AT, A0, A1, /**/ /**/ AB, A2 ); SlidePartitionDown ( CT, C0, C1, /**/ /**/ CB, C2 ); } } else { // Matrix views DistMatrix<T> AT(g), AB(g), A0(g), A1(g), A2(g); DistMatrix<T> BL(g), B0(g), BR(g), B1(g), B2(g); DistMatrix<T> CL(g), CR(g), C1T(g), C01(g), C0(g), C1(g), C2(g), C1B(g), C11(g), C21(g); // Temporary distributions DistMatrix<T,STAR,VR> A1_STAR_VR(g); DistMatrix<T,VR,STAR> B1_VR_STAR(g); DistMatrix<T,STAR,STAR> C11_STAR_STAR(g); // Star the algorithm Scale( beta, C ); LockedPartitionRight( B, BL, BR, 0 ); PartitionRight( C, CL, CR, 0 ); while( BR.Width() > 0 ) { LockedRepartitionRight ( BL, /**/ BR, B0, /**/ B1, B2 ); RepartitionRight ( CL, /**/ CR, C0, /**/ C1, C2 ); B1_VR_STAR = B1; A1_STAR_VR.AlignWith( B1_VR_STAR ); LockedPartitionDown ( A, AT, AB, 0 ); PartitionDown ( C1, C1T, C1B, 0 ); while( AB.Height() > 0 ) { LockedRepartitionDown ( AT, A0, /**/ /**/ A1, AB, A2 ); RepartitionDown ( C1T, C01, /***/ /***/ C11, C1B, C21 ); Zeros( C11.Height(), C11.Width(), C11_STAR_STAR ); //------------------------------------------------------------// A1_STAR_VR = A1; LocalGemm ( NORMAL, NORMAL, alpha, A1_STAR_VR, B1_VR_STAR, T(0), C11_STAR_STAR ); C11.SumScatterUpdate( T(1), C11_STAR_STAR ); //------------------------------------------------------------// SlideLockedPartitionDown ( AT, A0, A1, /**/ /**/ AB, A2 ); SlidePartitionDown ( C1T, C01, C11, /***/ /***/ C1B, C21 ); } A1_STAR_VR.FreeAlignments(); SlideLockedPartitionRight ( BL, /**/ BR, B0, B1, /**/ B2 ); SlidePartitionRight ( CL, /**/ CR, C0, C1, /**/ C2 ); } } #ifndef RELEASE PopCallStack(); #endif }
void whirlpool_block(WHIRLPOOL_CTX *ctx,const void *inp,size_t n) { int r; const u8 *p=inp; union { u64 q[8]; u8 c[64]; } S,K,*H=(void *)ctx->H.q; #ifdef GO_FOR_MMX GO_FOR_MMX(ctx,inp,n); #endif do { #ifdef OPENSSL_SMALL_FOOTPRINT u64 L[8]; int i; for (i=0;i<64;i++) S.c[i] = (K.c[i] = H->c[i]) ^ p[i]; for (r=0;r<ROUNDS;r++) { for (i=0;i<8;i++) { L[i] = i ? 0 : RC[r]; L[i] ^= C0(K,i) ^ C1(K,(i-1)&7) ^ C2(K,(i-2)&7) ^ C3(K,(i-3)&7) ^ C4(K,(i-4)&7) ^ C5(K,(i-5)&7) ^ C6(K,(i-6)&7) ^ C7(K,(i-7)&7); } memcpy (K.q,L,64); for (i=0;i<8;i++) { L[i] ^= C0(S,i) ^ C1(S,(i-1)&7) ^ C2(S,(i-2)&7) ^ C3(S,(i-3)&7) ^ C4(S,(i-4)&7) ^ C5(S,(i-5)&7) ^ C6(S,(i-6)&7) ^ C7(S,(i-7)&7); } memcpy (S.q,L,64); } for (i=0;i<64;i++) H->c[i] ^= S.c[i] ^ p[i]; #else u64 L0,L1,L2,L3,L4,L5,L6,L7; #ifdef __STRICT_ALIGNMENT if ((size_t)p & 7) { memcpy (S.c,p,64); S.q[0] ^= (K.q[0] = H->q[0]); S.q[1] ^= (K.q[1] = H->q[1]); S.q[2] ^= (K.q[2] = H->q[2]); S.q[3] ^= (K.q[3] = H->q[3]); S.q[4] ^= (K.q[4] = H->q[4]); S.q[5] ^= (K.q[5] = H->q[5]); S.q[6] ^= (K.q[6] = H->q[6]); S.q[7] ^= (K.q[7] = H->q[7]); } else #endif { const u64 *pa = (const u64*)p; S.q[0] = (K.q[0] = H->q[0]) ^ pa[0]; S.q[1] = (K.q[1] = H->q[1]) ^ pa[1]; S.q[2] = (K.q[2] = H->q[2]) ^ pa[2]; S.q[3] = (K.q[3] = H->q[3]) ^ pa[3]; S.q[4] = (K.q[4] = H->q[4]) ^ pa[4]; S.q[5] = (K.q[5] = H->q[5]) ^ pa[5]; S.q[6] = (K.q[6] = H->q[6]) ^ pa[6]; S.q[7] = (K.q[7] = H->q[7]) ^ pa[7]; } for(r=0;r<ROUNDS;r++) { #ifdef SMALL_REGISTER_BANK L0 = C0(K,0) ^ C1(K,7) ^ C2(K,6) ^ C3(K,5) ^ C4(K,4) ^ C5(K,3) ^ C6(K,2) ^ C7(K,1) ^ RC[r]; L1 = C0(K,1) ^ C1(K,0) ^ C2(K,7) ^ C3(K,6) ^ C4(K,5) ^ C5(K,4) ^ C6(K,3) ^ C7(K,2); L2 = C0(K,2) ^ C1(K,1) ^ C2(K,0) ^ C3(K,7) ^ C4(K,6) ^ C5(K,5) ^ C6(K,4) ^ C7(K,3); L3 = C0(K,3) ^ C1(K,2) ^ C2(K,1) ^ C3(K,0) ^ C4(K,7) ^ C5(K,6) ^ C6(K,5) ^ C7(K,4); L4 = C0(K,4) ^ C1(K,3) ^ C2(K,2) ^ C3(K,1) ^ C4(K,0) ^ C5(K,7) ^ C6(K,6) ^ C7(K,5); L5 = C0(K,5) ^ C1(K,4) ^ C2(K,3) ^ C3(K,2) ^ C4(K,1) ^ C5(K,0) ^ C6(K,7) ^ C7(K,6); L6 = C0(K,6) ^ C1(K,5) ^ C2(K,4) ^ C3(K,3) ^ C4(K,2) ^ C5(K,1) ^ C6(K,0) ^ C7(K,7); L7 = C0(K,7) ^ C1(K,6) ^ C2(K,5) ^ C3(K,4) ^ C4(K,3) ^ C5(K,2) ^ C6(K,1) ^ C7(K,0); K.q[0] = L0; K.q[1] = L1; K.q[2] = L2; K.q[3] = L3; K.q[4] = L4; K.q[5] = L5; K.q[6] = L6; K.q[7] = L7; L0 ^= C0(S,0) ^ C1(S,7) ^ C2(S,6) ^ C3(S,5) ^ C4(S,4) ^ C5(S,3) ^ C6(S,2) ^ C7(S,1); L1 ^= C0(S,1) ^ C1(S,0) ^ C2(S,7) ^ C3(S,6) ^ C4(S,5) ^ C5(S,4) ^ C6(S,3) ^ C7(S,2); L2 ^= C0(S,2) ^ C1(S,1) ^ C2(S,0) ^ C3(S,7) ^ C4(S,6) ^ C5(S,5) ^ C6(S,4) ^ C7(S,3); L3 ^= C0(S,3) ^ C1(S,2) ^ C2(S,1) ^ C3(S,0) ^ C4(S,7) ^ C5(S,6) ^ C6(S,5) ^ C7(S,4); L4 ^= C0(S,4) ^ C1(S,3) ^ C2(S,2) ^ C3(S,1) ^ C4(S,0) ^ C5(S,7) ^ C6(S,6) ^ C7(S,5); L5 ^= C0(S,5) ^ C1(S,4) ^ C2(S,3) ^ C3(S,2) ^ C4(S,1) ^ C5(S,0) ^ C6(S,7) ^ C7(S,6); L6 ^= C0(S,6) ^ C1(S,5) ^ C2(S,4) ^ C3(S,3) ^ C4(S,2) ^ C5(S,1) ^ C6(S,0) ^ C7(S,7); L7 ^= C0(S,7) ^ C1(S,6) ^ C2(S,5) ^ C3(S,4) ^ C4(S,3) ^ C5(S,2) ^ C6(S,1) ^ C7(S,0); S.q[0] = L0; S.q[1] = L1; S.q[2] = L2; S.q[3] = L3; S.q[4] = L4; S.q[5] = L5; S.q[6] = L6; S.q[7] = L7; #else L0 = C0(K,0); L1 = C1(K,0); L2 = C2(K,0); L3 = C3(K,0); L4 = C4(K,0); L5 = C5(K,0); L6 = C6(K,0); L7 = C7(K,0); L0 ^= RC[r]; L1 ^= C0(K,1); L2 ^= C1(K,1); L3 ^= C2(K,1); L4 ^= C3(K,1); L5 ^= C4(K,1); L6 ^= C5(K,1); L7 ^= C6(K,1); L0 ^= C7(K,1); L2 ^= C0(K,2); L3 ^= C1(K,2); L4 ^= C2(K,2); L5 ^= C3(K,2); L6 ^= C4(K,2); L7 ^= C5(K,2); L0 ^= C6(K,2); L1 ^= C7(K,2); L3 ^= C0(K,3); L4 ^= C1(K,3); L5 ^= C2(K,3); L6 ^= C3(K,3); L7 ^= C4(K,3); L0 ^= C5(K,3); L1 ^= C6(K,3); L2 ^= C7(K,3); L4 ^= C0(K,4); L5 ^= C1(K,4); L6 ^= C2(K,4); L7 ^= C3(K,4); L0 ^= C4(K,4); L1 ^= C5(K,4); L2 ^= C6(K,4); L3 ^= C7(K,4); L5 ^= C0(K,5); L6 ^= C1(K,5); L7 ^= C2(K,5); L0 ^= C3(K,5); L1 ^= C4(K,5); L2 ^= C5(K,5); L3 ^= C6(K,5); L4 ^= C7(K,5); L6 ^= C0(K,6); L7 ^= C1(K,6); L0 ^= C2(K,6); L1 ^= C3(K,6); L2 ^= C4(K,6); L3 ^= C5(K,6); L4 ^= C6(K,6); L5 ^= C7(K,6); L7 ^= C0(K,7); L0 ^= C1(K,7); L1 ^= C2(K,7); L2 ^= C3(K,7); L3 ^= C4(K,7); L4 ^= C5(K,7); L5 ^= C6(K,7); L6 ^= C7(K,7); K.q[0] = L0; K.q[1] = L1; K.q[2] = L2; K.q[3] = L3; K.q[4] = L4; K.q[5] = L5; K.q[6] = L6; K.q[7] = L7; L0 ^= C0(S,0); L1 ^= C1(S,0); L2 ^= C2(S,0); L3 ^= C3(S,0); L4 ^= C4(S,0); L5 ^= C5(S,0); L6 ^= C6(S,0); L7 ^= C7(S,0); L1 ^= C0(S,1); L2 ^= C1(S,1); L3 ^= C2(S,1); L4 ^= C3(S,1); L5 ^= C4(S,1); L6 ^= C5(S,1); L7 ^= C6(S,1); L0 ^= C7(S,1); L2 ^= C0(S,2); L3 ^= C1(S,2); L4 ^= C2(S,2); L5 ^= C3(S,2); L6 ^= C4(S,2); L7 ^= C5(S,2); L0 ^= C6(S,2); L1 ^= C7(S,2); L3 ^= C0(S,3); L4 ^= C1(S,3); L5 ^= C2(S,3); L6 ^= C3(S,3); L7 ^= C4(S,3); L0 ^= C5(S,3); L1 ^= C6(S,3); L2 ^= C7(S,3); L4 ^= C0(S,4); L5 ^= C1(S,4); L6 ^= C2(S,4); L7 ^= C3(S,4); L0 ^= C4(S,4); L1 ^= C5(S,4); L2 ^= C6(S,4); L3 ^= C7(S,4); L5 ^= C0(S,5); L6 ^= C1(S,5); L7 ^= C2(S,5); L0 ^= C3(S,5); L1 ^= C4(S,5); L2 ^= C5(S,5); L3 ^= C6(S,5); L4 ^= C7(S,5); L6 ^= C0(S,6); L7 ^= C1(S,6); L0 ^= C2(S,6); L1 ^= C3(S,6); L2 ^= C4(S,6); L3 ^= C5(S,6); L4 ^= C6(S,6); L5 ^= C7(S,6); L7 ^= C0(S,7); L0 ^= C1(S,7); L1 ^= C2(S,7); L2 ^= C3(S,7); L3 ^= C4(S,7); L4 ^= C5(S,7); L5 ^= C6(S,7); L6 ^= C7(S,7); S.q[0] = L0; S.q[1] = L1; S.q[2] = L2; S.q[3] = L3; S.q[4] = L4; S.q[5] = L5; S.q[6] = L6; S.q[7] = L7; #endif } #ifdef __STRICT_ALIGNMENT if ((size_t)p & 7) { int i; for(i=0;i<64;i++) H->c[i] ^= S.c[i] ^ p[i]; } else #endif { const u64 *pa=(const u64 *)p; H->q[0] ^= S.q[0] ^ pa[0]; H->q[1] ^= S.q[1] ^ pa[1]; H->q[2] ^= S.q[2] ^ pa[2]; H->q[3] ^= S.q[3] ^ pa[3]; H->q[4] ^= S.q[4] ^ pa[4]; H->q[5] ^= S.q[5] ^ pa[5]; H->q[6] ^= S.q[6] ^ pa[6]; H->q[7] ^= S.q[7] ^ pa[7]; } #endif p += 64; } while(--n); }
inline void GemmNNB ( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C ) { #ifndef RELEASE PushCallStack("internal::GemmNNB"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); if( A.Height() != C.Height() || B.Width() != C.Width() || A.Width() != B.Height() ) { std::ostringstream msg; msg << "Nonconformal GemmNNB: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B ~ " << B.Height() << " x " << B.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T> AT(g), A0(g), AB(g), A1(g), A2(g); DistMatrix<T> CT(g), C0(g), CB(g), C1(g), C2(g); // Temporary distributions DistMatrix<T,STAR,MC> A1_STAR_MC(g); DistMatrix<T,MR,STAR> D1Trans_MR_STAR(g); A1_STAR_MC.AlignWith( B ); D1Trans_MR_STAR.AlignWith( B ); // Start the algorithm Scale( beta, C ); LockedPartitionDown ( A, AT, AB, 0 ); PartitionDown ( C, CT, CB, 0 ); while( AB.Height() > 0 ) { LockedRepartitionDown ( AT, A0, /**/ /**/ A1, AB, A2 ); RepartitionDown ( CT, C0, /**/ /**/ C1, CB, C2 ); Zeros( C1.Width(), C1.Height(), D1Trans_MR_STAR ); //--------------------------------------------------------------------// A1_STAR_MC = A1; // A1[*,MC] <- A1[MC,MR] // D1^T[MR,* ] := alpha B^T[MR,MC] A1^T[MC,* ] LocalGemm ( TRANSPOSE, TRANSPOSE, alpha, B, A1_STAR_MC, T(0), D1Trans_MR_STAR ); C1.TransposeSumScatterUpdate( T(1), D1Trans_MR_STAR ); //--------------------------------------------------------------------// SlideLockedPartitionDown ( AT, A0, A1, /**/ /**/ AB, A2 ); SlidePartitionDown ( CT, C0, C1, /**/ /**/ CB, C2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void SymmLLC ( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C ) { #ifndef RELEASE PushCallStack("internal::SymmLLC"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T> ATL(g), ATR(g), A00(g), A01(g), A02(g), AColPan(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), ARowPan(g), A20(g), A21(g), A22(g); DistMatrix<T> BT(g), B0(g), BB(g), B1(g), B2(g); DistMatrix<T> CT(g), C0(g), CAbove(g), CB(g), C1(g), CBelow(g), C2(g); // Temporary distributions DistMatrix<T,MC, STAR> AColPan_MC_STAR(g); DistMatrix<T,STAR,MC > ARowPan_STAR_MC(g); DistMatrix<T,MR, STAR> B1Trans_MR_STAR(g); B1Trans_MR_STAR.AlignWith( C ); // Start the algorithm Scale( beta, C ); LockedPartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionDown ( B, BT, BB, 0 ); PartitionDown ( C, CT, CB, 0 ); while( CB.Height() > 0 ) { LockedRepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionDown ( BT, B0, /**/ /**/ B1, BB, B2 ); RepartitionDown ( CT, C0, /**/ /**/ C1, CB, C2 ); LockedView1x2( ARowPan, A10, A11 ); LockedView2x1 ( AColPan, A11, A21 ); View2x1 ( CAbove, C0, C1 ); View2x1 ( CBelow, C1, C2 ); AColPan_MC_STAR.AlignWith( CBelow ); ARowPan_STAR_MC.AlignWith( CAbove ); //--------------------------------------------------------------------// AColPan_MC_STAR = AColPan; ARowPan_STAR_MC = ARowPan; MakeTrapezoidal( LEFT, LOWER, 0, AColPan_MC_STAR ); MakeTrapezoidal( RIGHT, LOWER, -1, ARowPan_STAR_MC ); B1Trans_MR_STAR.TransposeFrom( B1 ); LocalGemm ( NORMAL, TRANSPOSE, alpha, AColPan_MC_STAR, B1Trans_MR_STAR, T(1), CBelow ); LocalGemm ( TRANSPOSE, TRANSPOSE, alpha, ARowPan_STAR_MC, B1Trans_MR_STAR, T(1), CAbove ); //--------------------------------------------------------------------// AColPan_MC_STAR.FreeAlignments(); ARowPan_STAR_MC.FreeAlignments(); SlideLockedPartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionDown ( BT, B0, B1, /**/ /**/ BB, B2 ); SlidePartitionDown ( CT, C0, C1, /**/ /**/ CB, C2 ); } #ifndef RELEASE PopCallStack(); #endif }
void test_EKF_remove_feature_landmark_noise_ptz_data() { vcl_string ptz_file("/Users/jimmy/Desktop/images/33_slam_data/ptz_145420_145719.txt"); vcl_vector<PTZData> ptzs; bool isRead = readPTZCameraFile(ptz_file.c_str(), ptzs, 1); assert(isRead); vcl_vector<double> gdPans; vcl_vector<double> gdTilts; vcl_vector<double> gdZooms; vcl_vector<double> observedPans; vcl_vector<double> observedTilts; vcl_vector<double> observedZooms; vcl_vector<vcl_vector<vgl_point_2d<double> > > observedImagePts; const int width = 1280; const int height = 720; vnl_random rnd; double delta = 2.0; vgl_point_2d<double> pp(width/2, height/2); PTZKeypointDynamicEKF ptzDynamicEKF; // const int M = (int)firstFeatures.size(); vnl_vector<double> C0(6, 0); C0[0] = ptzs[0].pan; C0[1] = ptzs[0].tilt; C0[2] = ptzs[0].fl; vnl_matrix<double> CP0(6, 6, 0); CP0(0, 0) = 0.01; CP0(1, 1) = 0.01; CP0(2, 2) = 10; CP0(3, 3) = 0.001; CP0(4, 4) = 0.001; CP0(5, 5) = 0.1; vnl_matrix<double> CQ0(6, 6, 0); CQ0(0, 0) = 0.00000004; CQ0(1, 1) = 0.00000004; CQ0(2, 2) = 0.0000004; CQ0(3, 3) = 0.00000001; CQ0(4, 4) = 0.00000001; CQ0(5, 5) = 0.0000001; // construct feature a database vcl_vector<vgl_point_2d<double> > courtPoints = DisneyWorldBasketballCourt::getCalibPoints(); vcl_list<VxlEKFFeaturePoint> featureDatabase; for (int i = 0; i<courtPoints.size(); i++) { vgl_point_3d<double> p(courtPoints[i].x(), courtPoints[i].y(), 0); vgl_point_2d<double> q(0, 0); VxlEKFFeaturePoint feat(p, q); feat.id_ = i; featureDatabase.push_back(feat); } // init camera and feature from ground truth of data set VxlEKFCamera camera(C0, CP0, CQ0); vcl_list<VxlEKFFeaturePoint> features; for (int i = 0; i<courtPoints.size(); i++) { vgl_homg_point_3d<double> p(courtPoints[i].x(), courtPoints[i].y(), 0, 1.0); if (camera.is_behind_camera(p)) { continue; } vgl_point_2d<double> q= camera.project(p); if (vgl_inside_image(q, width, height, 10)) { VxlEKFFeaturePoint feat(p, q); feat.id_ = i; features.push_back(feat); } } printf("initiate feature number is %lu\n", features.size()); vnl_vector<double> Xk; vnl_matrix<double> Pk; bool isInit = ptzDynamicEKF.updateCameraFeature(camera, features, Xk, Pk); assert(isInit); vcl_vector<double> smoothedPans; vcl_vector<double> smoothedTilts; vcl_vector<double> smoothedZooms; for (int i = 1; i<ptzs.size(); i++) { // ptzs.size() gdPans.push_back(ptzs[i].pan); gdTilts.push_back(ptzs[i].tilt); gdZooms.push_back(ptzs[i].fl); vpgl_perspective_camera<double> gdCamera; bool isCamera = VxlPTZCamera::PTZToCamera(ptzs[i].fl, ptzs[i].pan, ptzs[i].tilt, gdCamera); assert(isCamera); // remove features vcl_list<VxlEKFFeaturePoint>::iterator it = features.begin(); while (it != features.end()) { vgl_point_3d<double> p = it->worldPt(); if (gdCamera.is_behind_camera(vgl_homg_point_3d<double>(p.x(), p.y(), p.z(), 1.0))) { it = features.erase(it); printf("erase a feature\n"); continue; } vgl_point_2d<double> q = gdCamera.project(p); if (!vgl_inside_image(q, width, height, 10)) { it = features.erase(it); printf("erase a feature\n"); continue; } it++; } printf("feature number is %lu\n", features.size()); // add features for (vcl_list<VxlEKFFeaturePoint>::iterator it = featureDatabase.begin(); it != featureDatabase.end(); it++) { vgl_point_3d<double> p = it->worldPt(); if (gdCamera.is_behind_camera(vgl_homg_point_3d<double>(p.x(), p.y(), p.z(), 1.0))) { continue; } vgl_point_2d<double> q = gdCamera.project(p); if (vgl_inside_image(q, width, height, 10)) { vcl_list<VxlEKFFeaturePoint>::iterator findIt = std::find(features.begin(), features.end(), *it); // cannot find same featuere (defined by id) in the features if (findIt == features.end()) { VxlEKFFeaturePoint feat(p, q); feat.id_ = it->id_; features.push_back(feat); printf("add a new feature with id %d\n", (int)feat.id_); } } } vcl_vector<vgl_point_2d<double> > worldPts; vcl_vector<vgl_point_2d<double> > imagePts; // add noise to current observation for (vcl_list<VxlEKFFeaturePoint>::iterator it = features.begin(); it != features.end(); it++) { vgl_point_3d<double> p = it->worldPt(); vgl_point_2d<double> q = gdCamera.project(p); double x = q.x(); double y = q.y(); x += delta * rnd.normal(); y += delta * rnd.normal(); it->setImagePoint(x, y); worldPts.push_back(vgl_point_2d<double>(p.x(), p.y())); imagePts.push_back(it->imagePt()); } vpgl_perspective_camera<double> initCamera; vpgl_perspective_camera<double> finalCamera; bool isInit = VpglPlus::init_calib(worldPts, imagePts, pp, initCamera); if (!isInit) { printf("initiate camera error\n"); continue; } bool isFinal = VpglPlus::optimize_perspective_camera(worldPts, imagePts, initCamera, finalCamera); if (!isFinal) { printf("final camera error\n"); continue; } // double pan = 0; double tilt = 0; double zoom = 0; bool isPTZ = VxlPTZCamera::CameraToPTZ(finalCamera, pan, tilt, zoom); assert(isPTZ); observedPans.push_back(pan); observedTilts.push_back(tilt); observedZooms.push_back(zoom); printf("observed pan tilt focal length is %f %f %f\n", pan, tilt, zoom); ptzDynamicEKF.updateCameraFeature(camera, features, Xk, Pk); printf("gd pan tilt focal length is %f %f %f\n\n", ptzs[i].pan, ptzs[i].tilt, ptzs[i].fl); smoothedPans.push_back(Xk[0]); smoothedTilts.push_back(Xk[1]); smoothedZooms.push_back(Xk[2]); } //save vnl_vector<double> gdPanVec(&gdPans[0], (int)gdPans.size()); vnl_vector<double> observedPanVec(&observedPans[0], (int)observedPans.size()); vnl_vector<double> smoothedPanVec(&smoothedPans[0], (int)smoothedPans.size()); vnl_vector<double> gdZoomVec(&gdZooms[0], (int)gdZooms.size()); vnl_vector<double> observedZoomVec(&observedZooms[0], (int)observedZooms.size()); vnl_vector<double> smoothedZoomVec(&smoothedZooms[0], (int)smoothedZooms.size()); vnl_vector<double> gdTiltVec(&gdTilts[0], (int)gdTilts.size()); vnl_vector<double> observedTiltVec(&observedTilts[0], (int)observedTilts.size()); vnl_vector<double> smoothedTiltVec(&smoothedTilts[0], (int)observedTilts.size()); vcl_string save_file("EKF_dynamic_ptz.mat"); vnl_matlab_filewrite awriter(save_file.c_str()); awriter.write(gdPanVec, "gdPan"); awriter.write(observedPanVec, "observedPan"); awriter.write(smoothedPanVec, "sm_pan"); awriter.write(gdZoomVec, "gdZoom"); awriter.write(observedZoomVec, "observedZoom"); awriter.write(smoothedZoomVec, "smoothedZoom"); awriter.write(gdTiltVec, "gdTilt"); awriter.write(vnl_vector<double>(&observedTilts[0], (int)observedTilts.size()), "observedTilt"); awriter.write(vnl_vector<double>(&smoothedTilts[0], (int)observedTilts.size()), "smoothedTilt"); printf("save to %s\n", save_file.c_str()); }