void EdgeBoxGenerator::prepDataStructs( arrayf &E ) { int c, r, i; // initialize step sizes _scStep=sqrt(1/_alpha); _arStep=(1+_alpha)/(2*_alpha); _rcStepRatio=(1-_alpha)/(1+_alpha); // create _scaleNorm _scaleNorm.resize(10000); for( i=0; i<10000; i++ ) _scaleNorm[i]=pow(1.f/i,_kappa); // create _segIImg arrayf E1; E1.init(h,w); for( i=0; i<_segCnt; i++ ) if( _segMag[i]>0 ) { E1.val(_segC[i],_segR[i]) = _segMag[i]; } _segIImg.init(h+1,w+1); for( c=1; c<w; c++ ) for( r=1; r<h; r++ ) { _segIImg.val(c+1,r+1) = E1.val(c,r) + _segIImg.val(c,r+1) + _segIImg.val(c+1,r) - _segIImg.val(c,r); } // create _magIImg _magIImg.init(h+1,w+1); for( c=1; c<w; c++ ) for( r=1; r<h; r++ ) { float e = E.val(c,r) > _edgeMinMag ? E.val(c,r) : 0; _magIImg.val(c+1,r+1) = e + _magIImg.val(c,r+1) + _magIImg.val(c+1,r) - _magIImg.val(c,r); } // create remaining data structures _hIdxs.resize(h); _hIdxImg.init(h,w); for( r=0; r<h; r++ ) { int s=0, s1; _hIdxs[r].push_back(s); for( c=0; c<w; c++ ) { s1 = _segIds.val(c,r); if( s1!=s ) { s=s1; _hIdxs[r].push_back(s); } _hIdxImg.val(c,r) = int(_hIdxs[r].size())-1; } } _vIdxs.resize(w); _vIdxImg.init(h,w); for( c=0; c<w; c++ ) { int s=0; _vIdxs[c].push_back(s); for( r=0; r<h; r++ ) { int s1 = _segIds.val(c,r); if( s1!=s ) { s=s1; _vIdxs[c].push_back(s); } _vIdxImg.val(c,r) = int(_vIdxs[c].size())-1; } } // initialize scoreBox() data structures int n=_segCnt+1; _sWts.init(n,1); _sDone.init(n,1); _sMap.init(n,1); _sIds.init(n,1); for( i=0; i<n; i++ ) _sDone.val(0,i)=-1; _sId=0; }
void EdgeBoxGenerator::clusterEdges( arrayf &E, arrayf &O, arrayf &V ) { int c, r, cd, rd, i, j; h=E._h; w=E._w; // greedily merge connected edge pixels into clusters (create _segIds) _segIds.init(h,w); _segCnt=1; for( c=0; c<w; c++ ) for( r=0; r<h; r++ ) { if( c==0 || r==0 || c==w-1 || r==h-1 || E.val(c,r)<=_edgeMinMag ) _segIds.val(c,r)=-1; else _segIds.val(c,r)=0; } for( c=1; c<w-1; c++ ) for( r=1; r<h-1; r++ ) { if(_segIds.val(c,r)!=0) continue; float sumv=0; int c0=c, r0=r; vectorf vs; vectori cs, rs; while( sumv < _edgeMergeThr ) { _segIds.val(c0,r0)=_segCnt; float o0 = O.val(c0,r0), o1, v; bool found; for( cd=-1; cd<=1; cd++ ) for( rd=-1; rd<=1; rd++ ) { if( _segIds.val(c0+cd,r0+rd)!=0 ) continue; found=false; for( i=0; i<cs.size(); i++ ) if( cs[i]==c0+cd && rs[i]==r0+rd ) { found=true; break; } if( found ) continue; o1=O.val(c0+cd,r0+rd); v=fabs(o1-o0)/PI; if(v>.5) v=1-v; vs.push_back(v); cs.push_back(c0+cd); rs.push_back(r0+rd); } float minv=1000; j=0; for( i=0; i<vs.size(); i++ ) if( vs[i]<minv ) { minv=vs[i]; c0=cs[i]; r0=rs[i]; j=i; } sumv+=minv; if(minv<1000) vs[j]=1000; } _segCnt++; } // merge or remove small segments _segMag.resize(_segCnt,0); for( c=1; c<w-1; c++ ) for( r=1; r<h-1; r++ ) if( (j=_segIds.val(c,r))>0 ) _segMag[j]+=E.val(c,r); for( c=1; c<w-1; c++ ) for( r=1; r<h-1; r++ ) if( (j=_segIds.val(c,r))>0 && _segMag[j]<=_clusterMinMag) _segIds.val(c,r)=0; i=1; while(i>0) { i=0; for( c=1; c<w-1; c++ ) for( r=1; r<h-1; r++ ) { if( _segIds.val(c,r)!=0 ) continue; float o0=O.val(c,r), o1, v, minv=1000; j=0; for( cd=-1; cd<=1; cd++ ) for( rd=-1; rd<=1; rd++ ) { if( _segIds.val(c+cd,r+rd)<=0 ) continue; o1=O.val(c+cd,r+rd); v=fabs(o1-o0)/PI; if(v>.5) v=1-v; if( v<minv ) { minv=v; j=_segIds.val(c+cd,r+rd); } } _segIds.val(c,r)=j; if(j>0) i++; } } // compactify representation _segMag.assign(_segCnt,0); vectori map(_segCnt,0); _segCnt=1; for( c=1; c<w-1; c++ ) for( r=1; r<h-1; r++ ) if( (j=_segIds.val(c,r))>0 ) _segMag[j]+=E.val(c,r); for( i=0; i<_segMag.size(); i++ ) if( _segMag[i]>0 ) map[i]=_segCnt++; for( c=1; c<w-1; c++ ) for( r=1; r<h-1; r++ ) if( (j=_segIds.val(c,r))>0 ) _segIds.val(c,r)=map[j]; // compute positional means and recompute _segMag _segMag.assign(_segCnt,0); vectorf meanX(_segCnt,0), meanY(_segCnt,0); vectorf meanOx(_segCnt,0), meanOy(_segCnt,0), meanO(_segCnt,0); for( c=1; c<w-1; c++ ) for( r=1; r<h-1; r++ ) { j=_segIds.val(c,r); if(j<=0) continue; float m=E.val(c,r), o=O.val(c,r); _segMag[j]+=m; meanOx[j]+=m*cos(2*o); meanOy[j]+=m*sin(2*o); meanX[j]+=m*c; meanY[j]+=m*r; } for( i=0; i<_segCnt; i++ ) if( _segMag[i]>0 ) { float m=_segMag[i]; meanX[i]/=m; meanY[i]/=m; meanO[i]=atan2(meanOy[i]/m,meanOx[i]/m)/2; } // compute segment affinities _segAff.resize(_segCnt); _segAffIdx.resize(_segCnt); for(i=0; i<_segCnt; i++) _segAff[i].resize(0); for(i=0; i<_segCnt; i++) _segAffIdx[i].resize(0); const int rad = 2; for( c=rad; c<w-rad; c++ ) for( r=rad; r<h-rad; r++ ) { int s0=_segIds.val(c,r); if( s0<=0 ) continue; for( cd=-rad; cd<=rad; cd++ ) for( rd=-rad; rd<=rad; rd++ ) { int s1=_segIds.val(c+cd,r+rd); if(s1<=s0) continue; bool found = false; for(i=0;i<_segAffIdx[s0].size();i++) if(_segAffIdx[s0][i] == s1) { found=true; break; } if( found ) continue; float o=atan2(meanY[s0]-meanY[s1],meanX[s0]-meanX[s1])+PI/2; float a=fabs(cos(meanO[s0]-o)*cos(meanO[s1]-o)); a=pow(a,_gamma); _segAff[s0].push_back(a); _segAffIdx[s0].push_back(s1); _segAff[s1].push_back(a); _segAffIdx[s1].push_back(s0); } } // compute _segC and _segR _segC.resize(_segCnt); _segR.resize(_segCnt); for( c=1; c<w-1; c++ ) for( r=1; r<h-1; r++ ) if( (j=_segIds.val(c,r))>0 ) { _segC[j]=c; _segR[j]=r; } // optionally create visualization (assume memory initialized is 3*w*h) if( V._x ) for( c=0; c<w; c++ ) for( r=0; r<h; r++ ) { i=_segIds.val(c,r); V.val(c+w*0,r) = i<=0 ? 1 : ((123*i + 128)%255)/255.0f; V.val(c+w*1,r) = i<=0 ? 1 : ((7*i + 3)%255)/255.0f; V.val(c+w*2,r) = i<=0 ? 1 : ((174*i + 80)%255)/255.0f; } }
ErrorCode GPUODESolverFixedStepIterative::SimulateODE( const matrixf& A, const matrixf& B, const matrixf& C, const matrixf& D, double tStart, double tEnd, double tStep, const vectorf& x0, vectorf & tVect, matrixf & xVect){ try{ InitializeSolverData( A, B, tStart, tEnd, tStep, x0); BuildOutputObjects(C, D); } catch(std::exception& except){ return ParODE_NotEnoughResources; } tVect.resize(_nTotalStepsSimulation); for(int ii = 0; ii < _nTotalStepsSimulation - _nSteps; ii++) tVect[ii] = tStart - (_nTotalStepsSimulation - _nSteps - ii) * tStep; tVect[ _nTotalStepsSimulation - _nSteps ] = tStart; for(int ii = 1; ii < _nSteps; ii++) tVect[(_nTotalStepsSimulation - _nSteps) + ii] = tVect[ (_nTotalStepsSimulation - _nSteps) + ii - 1] + tStep; xVect.resize(_nOutputSize, _nTotalStepsSimulation); matrixf xVectInit; try{ if(_nTotalStepsSimulation != _nSteps){ matrixf xVectInit; matrixf uVectInit; GetInitialStatesAndInputs(xVectInit, uVectInit); // compute outVectInit if(_bZeroC == 1 && _bZeroD == 1){ // copy the state in the output for(int ii = 0; ii < _nTotalStepsSimulation - _nSteps; ii++) for(int jj = 0; jj < _nOutputSize; jj++) xVect(jj, ii) = xVectInit(jj, ii); } else{ for(int ii = 0; ii < _nTotalStepsSimulation - _nSteps; ii++) for(int jj = 0; jj < _nOutputSize; jj++){ fType val = 0.0; if(!_bZeroC) for(int kk = 0; kk < _nSystemSize; kk++) val += C(jj, kk) * xVectInit(kk, ii); if(!_bZeroD) for(int kk = 0; kk < _nInputs; kk++) val += D(jj, kk) * uVectInit(kk, ii); xVect(jj, ii) = val; } } } } catch(exception &e){ std::cerr << "Exception caught : " << e.what() << std::endl; return ParODE_InvalidSolverType; } // compute the total number of steps per batch unsigned long NSB = 0; // number of steps per batch unsigned long maxStepsPerDevice = 0; unsigned int nDevices = _pGPUM->GetNumberOfDevices(); assert(_nStepsPerDevice.size() == nDevices); for(int ii = 0; ii < _nStepsPerDevice.size(); ii++){ NSB += _nStepsPerDevice[ii]; if(maxStepsPerDevice < _nStepsPerDevice[ii]) maxStepsPerDevice = _nStepsPerDevice[ii]; } int nB; // accumulators for computing the global scan fType* accMatrix = new fType[_nMatrixSize * _nMatrixSize]; fType* accVector = new fType[_nMatrixSize]; // initialize the accumulator vector with the initial value X0 // initialize the accumulator matrix with identity for(int ii = 0; ii < _nMatrixSize; ii++){ accVector[ii] = _X0(ii); for(int jj = 0; jj < _nMatrixSize; jj++) accMatrix[ii * _nMatrixSize + jj] = _M0(ii, jj); } long globalResultTIndex(_nTotalStepsSimulation - _nSteps); // global simulation time try{ for(nB = 0; nB < (_nSteps - 1) / NSB + 1; nB++){ // test if this the last iteration if( nB == (_nSteps - 1) / NSB ){ long nStepsLeft = _nSteps - nB * NSB; // distribute the steps left int ii; for(ii = 0; ii < _nStepsPerDevice.size(); ii++){ if(nStepsLeft <= _nStepsPerDevice[ii]) break; nStepsLeft -= _nStepsPerDevice[ii]; } assert(ii < _nStepsPerDevice.size()); // re-adjust to a power of two int nStepsii(1); while(nStepsii < nStepsLeft) nStepsii <<= 1; _nStepsPerDevice[ii++] = nStepsii; for(;ii < _nStepsPerDevice.size(); ii++) _nStepsPerDevice[ii] = 0; } unsigned int nDevices = _nStepsPerDevice.size(); vector<int> nSource(nDevices, 0), nDest(nDevices, 1); double tStartBatch = tStart + nB * NSB * tStep; vector<int> nElementStart( _nStepsPerDevice.size(), 0 ); for(int ii = 1; ii < nElementStart.size(); ii++) nElementStart[ii] = nElementStart[ii-1] + _nStepsPerDevice[ii-1]; for(int nDI = 0; nDI < nDevices; nDI++) InitializeBatchData(_BufferMatrices[nSource[nDI]][nDI], _BufferVectors[nSource[nDI]][nDI], nDI, tStartBatch + nElementStart[nDI] * tStep, tStep, _nStepsPerDevice[nDI]); // up-sweep for(int d = 1; d < maxStepsPerDevice; d <<= 1 ){ for(int nDI = 0; nDI < nDevices; nDI++) if(d < _nStepsPerDevice[nDI]){ // set parameters for MM upsweep vector<size_t> szLocal(2), szGlobal(2); szLocal[0] = szLocal[1] = _nLocalSizeMM; szGlobal[0] = szLocal[0] * _nStepsPerDevice[nDI] / (2 * d); szGlobal[1] = szLocal[1]; _vectGPUKernels[nDI * _nKernels]->GetKernel<MMKernelScanUpsweep>()->Launch( szLocal, szGlobal, 0, _BufferMatrices[nSource[nDI]][nDI], _BufferMatrices[nDest[nDI]][nDI], d, _nMatrixSize); // set parameters for MVV upsweep szLocal[0] = _nLocalSizeMVVColumn; szLocal[1] = _nLocalSizeMVVRow; szGlobal[0] = szLocal[0] * _nStepsPerDevice[nDI] / (2 * d); szGlobal[1] = szLocal[1]; _vectGPUKernels[nDI * _nKernels + 1]->GetKernel<MVVKernelScanUpsweep>()->Launch( szLocal, szGlobal, 1, _BufferMatrices[nSource[nDI]][nDI], _BufferVectors[nSource[nDI]][nDI], _BufferVectors[nDest[nDI]][nDI], d, _nMatrixSize); // copy the left leaf // set parameters for copy kernel for the matrix component int nMSZ = _nMatrixSize * _nMatrixSize; vector<size_t> szLocalCopy(1), szGlobalCopy(1); szLocalCopy[0] = min(nMSZ, (int)(_pGPUM->GetDeviceAndContext(nDI)->MaxWorkgroupSize())); szGlobalCopy[0] = szLocalCopy[0] * _nStepsPerDevice[nDI] / (2 * d); _vectGPUKernels[nDI * _nKernels + 2]->GetKernel<LeftLeafCopyKernelScanUpsweep>()->Launch( szLocalCopy, szGlobalCopy, 0, _BufferMatrices[nSource[nDI]][nDI], _BufferMatrices[nDest[nDI]][nDI], d, nMSZ); // set parameters for copy kernel for the vector component szLocalCopy[0] = _nLocalSizeMM; szGlobalCopy[0] = szLocalCopy[0] * _nStepsPerDevice[nDI] / (2 * d); _vectGPUKernels[nDI * _nKernels + 2]->GetKernel<LeftLeafCopyKernelScanUpsweep>()->Launch( szLocalCopy, szGlobalCopy, 1, _BufferVectors[nSource[nDI]][nDI], _BufferVectors[nDest[nDI]][nDI], d, _nMatrixSize); } for(int nDI = 0; nDI < _nStepsPerDevice.size(); nDI++) if(d < _nStepsPerDevice[nDI]){ // synchronize both queues for device nDI _pGPUM->GetDeviceAndContext(nDI)->GetQueue(0)->Synchronize(); _pGPUM->GetDeviceAndContext(nDI)->GetQueue(1)->Synchronize(); // change source with destination nSource[nDI] = nDest[nDI]; nDest[nDI] = (nSource[nDI] + 1) % 2; } } // copy the reduction data from the devices to the host memory // temporary buffer for copying data fType* bufferMatrix = new fType[_nMatrixSize * _nMatrixSize]; fType* bufferVector = new fType[_nMatrixSize]; fType* tempMatrix = new fType[_nMatrixSize * _nMatrixSize]; fType* tempVector = new fType[_nMatrixSize]; for(int nDI = 0; nDI < _nStepsPerDevice.size(); nDI++){ // copy the reduction result _BufferMatrices[nSource[nDI]][nDI]->MemRead( (void*)bufferMatrix, _BufferMatrices[nSource[nDI]][nDI]->GetContext()->GetQueue(0), _nMatrixSize * _nMatrixSize * (_nStepsPerDevice[nDI] - 1) * sizeof(fType), _nMatrixSize * _nMatrixSize * sizeof(fType)); _BufferVectors[nSource[nDI]][nDI]->MemRead( (void*)bufferVector, _BufferVectors[nSource[nDI]][nDI]->GetContext()->GetQueue(0), _nMatrixSize * (_nStepsPerDevice[nDI] - 1) * sizeof(fType), _nMatrixSize * sizeof(fType)); // wait for the memcopy to complete _BufferMatrices[nSource[nDI]][nDI]->GetContext()->GetQueue(0)->Synchronize(); // save the accumulator into the temporary buffers memcpy(tempMatrix, accMatrix, _nMatrixSize * _nMatrixSize * sizeof(fType)); memcpy(tempVector, accVector, _nMatrixSize * sizeof(fType)); // compute the new accumulator matrix for(int row = 0; row < _nMatrixSize; row++) for(int column = 0; column < _nMatrixSize; column++){ fType resProd = 0.0; for(int kk = 0; kk < _nMatrixSize; kk++) resProd += bufferMatrix[row * _nMatrixSize + kk] * tempMatrix[kk * _nMatrixSize + column]; accMatrix[row * _nMatrixSize + column] = resProd; } // compute the new accumulator vector for(int row = 0; row < _nMatrixSize; row++){ fType resVal(0.0); for(int column = 0; column < _nMatrixSize; column++) resVal += bufferMatrix[row * _nMatrixSize + column] * tempVector[column]; accVector[row] = resVal + bufferVector[row]; } // copy the old accumulators in the reduction vectors GPU buffers _BufferMatrices[nSource[nDI]][nDI]->MemWrite(tempMatrix, _BufferMatrices[nSource[nDI]][nDI]->GetContext()->GetQueue(0), _nMatrixSize * _nMatrixSize * (_nStepsPerDevice[nDI] - 1) * sizeof(fType), _nMatrixSize * _nMatrixSize * sizeof(fType)); _BufferVectors[nSource[nDI]][nDI]->MemWrite(tempVector, _BufferVectors[nSource[nDI]][nDI]->GetContext()->GetQueue(0), _nMatrixSize * (_nStepsPerDevice[nDI] - 1) * sizeof(fType), _nMatrixSize * sizeof(fType)); // wait for the memcpy to finish _BufferVectors[nSource[nDI]][nDI]->GetContext()->GetQueue(0)->Synchronize(); } delete [] bufferMatrix; delete [] bufferVector; delete [] tempMatrix; delete [] tempVector; // downsweep stage for(int d = maxStepsPerDevice / 2; d > 0; d >>= 1 ){ for(int nDI = 0; nDI < nDevices; nDI++) if(d < _nStepsPerDevice[nDI]){ // set parameters for MM kernel downsweep vector<size_t> szLocal(2), szGlobal(2); szLocal[0] = szLocal[1] = _nLocalSizeMM; szGlobal[0] = szLocal[0] * _nStepsPerDevice[nDI] / (2 * d); szGlobal[1] = szLocal[1]; _vectGPUKernels[nDI * _nKernels + 3]->GetKernel<MMKernelScanDownsweep>()->Launch( szLocal, szGlobal, 0, _BufferMatrices[nSource[nDI]][nDI], _BufferMatrices[nDest[nDI]][nDI], d, _nMatrixSize); // call the MVV kernel for downsweep; queue 1 szLocal[0] = _nLocalSizeMVVColumn; szLocal[1] = _nLocalSizeMVVRow; szGlobal[0] = szLocal[0] * _nStepsPerDevice[nDI] / (2 * d); szGlobal[1] = szLocal[1]; _vectGPUKernels[nDI * _nKernels + 4]->GetKernel<MVVKernelScanDownsweep>()->Launch( szLocal, szGlobal, 1, _BufferMatrices[nSource[nDI]][nDI], _BufferVectors[nSource[nDI]][nDI], _BufferVectors[nDest[nDI]][nDI], d, _nMatrixSize); // set parameters Matrix copy downsweep // call the copy kernel for downsweep; queue 0 (copy the matrix component) vector<size_t> szLocalCopy(1), szGlobalCopy(1); int nMSZ = _nMatrixSize * _nMatrixSize; szLocalCopy[0] = min(nMSZ, (int)(_pGPUM->GetDeviceAndContext(nDI)->MaxWorkgroupSize())); szGlobalCopy[0] = szLocalCopy[0] * _nStepsPerDevice[nDI] / (2 * d); _vectGPUKernels[nDI * _nKernels + 5]->GetKernel<RootCopyKernelScanDownsweep>()->Launch( szLocalCopy, szGlobalCopy, 0, _BufferMatrices[nSource[nDI]][nDI], _BufferMatrices[nDest[nDI]][nDI], d, nMSZ); // set parameters Vector copy downsweep // call the copy kernel for downsweep; queue 1 (copy the vector component) szLocalCopy[0] = _nLocalSizeMM; szGlobalCopy[0] = szLocalCopy[0] * _nStepsPerDevice[nDI] / (2 * d); _vectGPUKernels[nDI * _nKernels + 5]->GetKernel<RootCopyKernelScanDownsweep>()->Launch( szLocalCopy, szGlobalCopy, 1, _BufferVectors[nSource[nDI]][nDI], _BufferVectors[nDest[nDI]][nDI], d, _nMatrixSize); } // synchronize all threads for(int nDI = 0; nDI < _nStepsPerDevice.size(); nDI++) if(d < _nStepsPerDevice[nDI]){ // synchronize both queues for device nDI _pGPUM->GetDeviceAndContext(nDI)->GetQueue(0)->Synchronize(); _pGPUM->GetDeviceAndContext(nDI)->GetQueue(1)->Synchronize(); // change source with destination nSource[nDI] = nDest[nDI]; nDest[nDI] = (nSource[nDI] + 1) % 2; } } // the result is in nSource[nDI] for each device // copy the result from the device memory to the result vector for(int nDI = 0; nDI < nDevices; nDI++) if(_nStepsPerDevice[nDI] > 0){ fType* buffer; int bufferStride; if(_bZeroC == 1 && _bZeroD == 1){ buffer = new fType[ _nStepsPerDevice[nDI] * _nMatrixSize]; _BufferVectors[nSource[nDI]][nDI]->MemRead( (void*)buffer, _BufferVectors[nSource[nDI]][nDI]->GetContext()->GetQueue(0), 0, _nStepsPerDevice[nDI] * _nMatrixSize * sizeof(fType)); bufferStride = _nMatrixSize; } else{ // compute the output Y // set parameters for SystemOutput Kernel vector<size_t> szLocalOutputKernel(2), szGlobalOutputKernel(2); szLocalOutputKernel[0] = _nLocalSizeMVVColumn; szLocalOutputKernel[1] = _nLocalSizeMVVRow; szGlobalOutputKernel[0] = _nStepsPerDevice[nDI] * _nLocalSizeMVVColumn; // this should actually be nElementsLocal - 1 szGlobalOutputKernel[1] = szLocalOutputKernel[1]; _vectGPUKernels[nDI * _nKernels + 8]->GetKernel<SystemOutput>()->Launch( szLocalOutputKernel, szGlobalOutputKernel, 0, _BufferC[nDI], _BufferD[nDI], _BufferVectors[nSource[nDI]][nDI], _vectUBuffers[nDI], _BufferYVectors[nDI], _nStateStride, _nStateOffset, _nInputStride, _nInputOffset, _nSystemSize, _nInputs, _nOutputSize, _bZeroC, _bZeroD); _pGPUM->GetDeviceAndContext(nDI)->GetQueue(0)->Synchronize(); // copy the result from the GPU memory to the CPU memory buffer = new fType[ _nStepsPerDevice[nDI] * _nOutputSize]; _BufferYVectors[nDI]->MemRead((void*)buffer, _BufferYVectors[nDI]->GetContext()->GetQueue(0), 0, _nStepsPerDevice[nDI] * _nOutputSize * sizeof(fType)); bufferStride = _nOutputSize; } for(int ii = 0; ii < _nStepsPerDevice[nDI]; ii++) if(globalResultTIndex + ii < xVect.size2()) for(int jj = 0; jj < _nOutputSize; jj++) xVect(jj, globalResultTIndex + ii) = buffer[(ii + 1) * bufferStride - _nOutputSize + jj]; // we assume that for state vectors that include more than one state // the current 'original' state is stored at the end of the vector delete [] buffer; globalResultTIndex += _nStepsPerDevice[nDI]; } } } catch(exception& except){ delete [] accMatrix; delete [] accVector; DeleteGPUFSIObjects(); CleanSolverData(); return ParODE_NotEnoughResources; } delete [] accMatrix; delete [] accVector; CleanSolverData(); DeleteGPUFSIObjects(); return ParODE_OK; }
//! set the root score of the detection void setScore(float confidence) { if (confidence_.size() == 0) confidence_.resize(1); confidence_[0] = confidence; }