T* doPermute(T* pIn, const std::vector<int>& dimsVect) { int iDims = pIn->getDims(); int* piDimsArray = pIn->getDimsArray(); int* piOffset = new int[iDims]; int* piMaxOffset = new int[iDims]; int* piIndex = new int[iDims](); computeOffsets(iDims, piDimsArray, dimsVect, piOffset, piMaxOffset); T* pOut = pIn->clone(); for (int iSource = 0, iDest = 0; iSource < pIn->getSize(); iSource++) { pOut->set(iDest, pIn->get(iSource)); for (int j = 0; j < iDims; j++) { ++piIndex[j]; iDest += piOffset[j]; if (piIndex[j] < piDimsArray[j]) { break; } iDest -= piMaxOffset[j]; piIndex[j] = 0; } } delete[] piIndex; delete[] piOffset; delete[] piMaxOffset; return pOut; }
void PtexReader::readFaceData(FilePos pos, FaceDataHeader fdh, Res res, int levelid, FaceData*& face) { // keep new face local until fully initialized FaceData* volatile newface = 0; seek(pos); switch (fdh.encoding()) { case enc_constant: { ConstantFace* pf = new ConstantFace((void**)&face, _cache, _pixelsize); readBlock(pf->data(), _pixelsize); if (levelid==0 && _premultiply && _header.hasAlpha()) PtexUtils::multalpha(pf->data(), 1, _header.datatype, _header.nchannels, _header.alphachan); newface = pf; } break; case enc_tiled: { Res tileres; readBlock(&tileres, sizeof(tileres)); uint32_t tileheadersize; readBlock(&tileheadersize, sizeof(tileheadersize)); TiledFace* tf = new TiledFace((void**)&face, _cache, res, tileres, levelid, this); readZipBlock(&tf->_fdh[0], tileheadersize, FaceDataHeaderSize * tf->_ntiles); computeOffsets(tell(), tf->_ntiles, &tf->_fdh[0], &tf->_offsets[0]); newface = tf; } break; case enc_zipped: case enc_diffzipped: { int uw = res.u(), vw = res.v(); int npixels = uw * vw; int unpackedSize = _pixelsize * npixels; PackedFace* pf = new PackedFace((void**)&face, _cache, res, _pixelsize, unpackedSize); bool useMalloc = unpackedSize > AllocaMax; void* tmp = useMalloc ? malloc(unpackedSize) : alloca(unpackedSize); readZipBlock(tmp, fdh.blocksize(), unpackedSize); if (fdh.encoding() == enc_diffzipped) PtexUtils::decodeDifference(tmp, unpackedSize, _header.datatype); PtexUtils::interleave(tmp, uw * DataSize(_header.datatype), uw, vw, pf->data(), uw * _pixelsize, _header.datatype, _header.nchannels); if (levelid==0 && _premultiply && _header.hasAlpha()) PtexUtils::multalpha(pf->data(), npixels, _header.datatype, _header.nchannels, _header.alphachan); newface = pf; if (useMalloc) free(tmp); } break; } face = newface; }
/*private*/ void OffsetPointGenerator::extractPoints(const LineString* line) { const CoordinateSequence& pts = *(line->getCoordinatesRO()); assert(pts.size() > 1 ); for (size_t i=0, n=pts.size()-1; i<n; ++i) { computeOffsets(pts[i], pts[i + 1]); } }
esvmArr2_f *convolve2D(float im[], const int irows, const int icols, float kern[], const int krows, const int kcols) { esvmArr2_f *output = (esvmArr2_f *)esvmMalloc(sizeof(esvmArr2_f)); output->arr = (float *)esvmCalloc(irows*icols,sizeof(float)); int *offset = (int *)esvmMalloc(krows*kcols*sizeof(int)); computeOffsets(offset,krows,kcols,irows,icols); ispc::convolve2D(im, irows, icols, kern, krows, kcols,(ispc::esvmArr2_f *)output,offset); return output; }
EBStenVarCoef:: EBStenVarCoef(const Vector<VolIndex>& a_srcVofs, const BaseIVFAB<VoFStencil>& a_vofStencil, const Box& a_box, const EBISBox& a_ebisBox, const IntVect& a_ghostVect, int a_varDest) : m_box( a_box ), m_ebisBox( a_ebisBox ), m_ghostVect( a_ghostVect ), m_destVar( a_varDest ) { CH_TIME("EBStenVarCoef::EBStenVarCoef"); computeOffsets(a_srcVofs, a_vofStencil); }
void PtexReader::readLevel(int levelid, Level*& level) { // temporarily release cache lock so other threads can proceed _cache->cachelock.unlock(); // get read lock and make sure we still need to read AutoMutex locker(readlock); if (level) { // another thread must have read it while we were waiting _cache->cachelock.lock(); // make sure it's still there now that we have the lock if (level) { level->ref(); return; } _cache->cachelock.unlock(); } // go ahead and read the level LevelInfo& l = _levelinfo[levelid]; // keep new level local until finished Level* volatile newlevel = new Level((void**)&level, _cache, l.nfaces); seek(_levelpos[levelid]); readZipBlock(&newlevel->fdh[0], l.levelheadersize, FaceDataHeaderSize * l.nfaces); computeOffsets(tell(), l.nfaces, &newlevel->fdh[0], &newlevel->offsets[0]); // apply edits (if any) to level 0 if (levelid == 0) { for (size_t i = 0, size = _faceedits.size(); i < size; i++) { FaceEdit& e = _faceedits[i]; newlevel->fdh[e.faceid] = e.fdh; newlevel->offsets[e.faceid] = e.pos; } } // don't assign to result until level data is fully initialized _cache->cachelock.lock(); level = newlevel; // clean up unused data _cache->purgeData(); }
esvmArr2_f *convolvePyramids(const esvmHogPyr *feats, const esvmHogPyr *whogs, const bool enablePadding,const int userTasks) { esvmArr2_i *offsets = (esvmArr2_i *)esvmMalloc(sizeof(esvmArr2_i)*feats->num*whogs->num); esvmArr2_f *outputs = (esvmArr2_f *)esvmMalloc(sizeof(esvmArr2_f)*feats->num*whogs->num); #pragma omp parallel for for(int i=0;i<feats->num;i++) { for(int j=0;j<whogs->num;j++) { const int frows = feats->hogs[i]->rows; const int fcols = feats->hogs[i]->cols; const int fbins = feats->hogs[i]->bins; const int wrows = whogs->hogs[j]->rows; const int wcols = whogs->hogs[j]->cols; offsets[i*whogs->num+j].arr = (int *)esvmMalloc(wrows*wcols*sizeof(int)); offsets[i*whogs->num+j].rows = wrows; offsets[i*whogs->num+j].cols = wcols; outputs[i*whogs->num+j].arr = (float *)esvmMalloc(frows*fcols*fbins*sizeof(float)); outputs[i*whogs->num+j].rows = frows; outputs[i*whogs->num+j].cols = fcols; computeOffsets((offsets[i*whogs->num+j].arr),wrows,wcols,frows,fcols); } } ispc::convolvePyramids((ispc::esvmHogPyr *)feats,(ispc::esvmHogPyr *)whogs,(ispc::esvmArr2_i *)offsets,enablePadding,(ispc::esvmArr2_f *)outputs,userTasks); if(__unlikely(enablePadding==true)) { #pragma omp parallel for for(int i=0;i<feats->num;i++) { for(int j=0;j<whogs->num;j++) { const int frows = feats->hogs[i]->rows; const int fcols = feats->hogs[i]->cols; outputs[i*whogs->num+j].arr = (float *)realloc((void *)outputs[i*whogs->num+j].arr,frows*fcols*sizeof(float)); } } } else { #pragma omp parallel for for(int i=0;i<feats->num;i++) { for(int j=0;j<whogs->num;j++) { const int frows = feats->hogs[i]->rows; const int fcols = feats->hogs[i]->cols; const int wrows = whogs->hogs[j]->rows; const int wcols = whogs->hogs[j]->cols; const int apronRows = (wrows/2); const int apronCols = (wcols/2); const int rowEnd = frows-apronRows+(1^(wrows&1)); const int colEnd = fcols - apronCols+(1^(wcols&1)); const int urows = rowEnd-apronRows; const int ucols = colEnd-apronCols; float *uout = (float *)esvmMalloc(urows*ucols*sizeof(float)); float *output = outputs[i*whogs->num+j].arr; for(int ii=0;ii<urows;ii++) { float *tmp = uout+ii*ucols; float *tmpout = output+(ii+apronRows)*fcols+apronCols; for(int jj=0;jj<ucols;jj++) { (*tmp++) = (*tmpout++); } } free(output); outputs[i*whogs->num+j].arr = uout; outputs[i*whogs->num+j].rows = urows; outputs[i*whogs->num+j].cols = ucols; } } } return outputs; }
esvmArr2_f *ompConvolve3D(const esvmHog *feat, const esvmHog *whog, const bool enablePadding, const int userTasks) { if(__unlikely(feat==NULL||whog==NULL)) { fprintf(stderr,"convolve3D:: got NULL features or NULL weights\n"); return NULL; } const int frows = feat->rows; const int fcols = feat->cols; const int fbins = feat->bins; const int wrows = whog->rows; const int wcols = whog->cols; const int wbins = whog->bins; if(__unlikely(wbins!=fbins || frows<wrows || fcols<wcols)) { fprintf(stderr,"convolve3D:: dimensions of feature and weights don't match. feat(%d,%d,%d) ; weight (%d,%d,%d)\n",frows,fcols,fbins,wrows,wcols,wbins); return NULL; } const int apronRows = floor(wrows/2); const int apronCols = floor(wcols/2); float *kern = whog->feature; float *output = (float *)esvmCalloc(frows*fcols*fbins,sizeof(float)); if(__unlikely(output==NULL)) { fprintf(stderr,"convolve3D:: Not enough memory for output array. Needed %ld bytes\n",frows*fcols*fbins*sizeof(float)); return NULL; } int *offset = (int *)esvmMalloc(wrows*wcols*sizeof(int)); if(__unlikely(offset==NULL)) { fprintf(stderr,"convolve3D:: Not enough memory for offset array. Needed %ld bytes\n",wrows*wcols*sizeof(int)); return NULL; } computeOffsets(offset,wrows,wcols,frows,fcols); const int dim1 = frows*fcols; for(int i=0;i<fbins;i++) { conv2DValid(feat->feature+i*dim1, frows, fcols, kern+i*wrows*wcols, wrows, wcols, apronRows, apronCols, offset, output+i*dim1, frows, fcols,1,0); } mergeConv(output, frows, fcols, fbins); //cleanup free(offset); if(enablePadding==true) { //shrink the output output= (float *)realloc((void *)output,frows*fcols*sizeof(float)); esvmArr2_f *soln = (esvmArr2_f *)esvmMalloc(sizeof(esvmArr2_f)); soln->arr = output; soln->rows = frows; soln->cols = fcols; return soln; } else { const int rowEnd = frows-apronRows+(1^(wrows&1)); const int colEnd = fcols - apronCols+(1^(wcols&1)); const int urows = rowEnd-apronRows; const int ucols = colEnd-apronCols; float *uout = (float *)esvmMalloc(urows*ucols*sizeof(float)); for(int i=0;i<urows;i++) { float *tmp = uout+i*ucols; float *tmpout = output+(i+apronRows)*fcols+apronCols; for(int j=0;j<ucols;j++) { (*tmp++) = (*tmpout++); } } free(output); esvmArr2_f *soln = (esvmArr2_f *)esvmMalloc(sizeof(esvmArr2_f)); soln->arr = uout; soln->rows = urows; soln->cols = ucols; return soln; } }
void Builder::fixup() { computeOffsets(); fixupStringPoolOffsets(); fixupNameGroupPoolOffsets(); fixupMiscellaneousOffsets(); }
T *doNativePermute(T *pIn, const std::vector<int>& dimsVect) { int iDims = pIn->getDims(); int* piDimsArray = pIn->getDimsArray(); int* piIndex = new int[iDims](); int* piOffset = new int[iDims]; int* piMaxOffset = new int[iDims]; computeOffsets(iDims, piDimsArray, dimsVect, piOffset, piMaxOffset); T* pOut = pIn->clone(); typename T::type* pout = pOut->get(); if (pIn->isComplex()) { typename T::type* poutImg = pOut->getImg(); for (typename T::type *pin = pIn->get(), *pinImg = pIn->getImg(); pin < pIn->get() + pIn->getSize(); pin++, pinImg++) { *pout = *pin; *poutImg = *pinImg; for (int j = 0; j < iDims; j++) { ++piIndex[j]; pout += piOffset[j]; poutImg += piOffset[j]; if (piIndex[j] < piDimsArray[j]) { break; } pout -= piMaxOffset[j]; poutImg -= piMaxOffset[j]; piIndex[j] = 0; } } } else { for (typename T::type *pin = pIn->get(); pin < pIn->get() + pIn->getSize(); pin++) { *pout = *pin; for (int j = 0; j < iDims; j++) { ++piIndex[j]; pout += piOffset[j]; if (piIndex[j] < piDimsArray[j]) { break; } pout -= piMaxOffset[j]; piIndex[j] = 0; } } } delete[] piIndex; delete[] piOffset; delete[] piMaxOffset; return pOut; }