예제 #1
0
void BaseAnimationState::handleScreenChanged() {
#ifndef BACKEND_8BIT
	const int screenW = _sys->getOverlayWidth();
	const int screenH = _sys->getOverlayHeight();

	int newScale = MIN(screenW / _movieWidth, screenH / _movieHeight);

	assert(newScale >= 1);
	if (newScale > 3)
		newScale = 3;

	if (newScale != _movieScale) {
		// HACK: Since frames generally do not cover the entire screen,
		//       We need to undraw the old frame. This is a very hacky
		//       way of doing that.
		OverlayColor *buf = (OverlayColor *)calloc(screenW * screenH, sizeof(OverlayColor));
		_sys->copyRectToOverlay(buf, screenW, 0, 0, screenW, screenH);
		free(buf);

		free(_overlay);
		_movieScale = newScale;
		_overlay = (OverlayColor *)calloc(_movieScale * _movieWidth * _movieScale * _movieHeight, sizeof(OverlayColor));
	}

	buildLookup();
#endif
}
예제 #2
0
파일: seqparser.cpp 프로젝트: minego/wTerm
ControlSeqParser::ControlSeqParser()
{
	m_seq = NULL;
	m_savedPos = 0;
	m_values = (int *)malloc(sizeof(int) * MAX_NUM_VALUES);
	reset();
	buildLookup();
}
예제 #3
0
bool BaseAnimationState::decodeFrame() {
#ifdef USE_MPEG2
	mpeg2_state_t state;
	const mpeg2_sequence_t *sequence_i;
	size_t size = (size_t) -1;
	static byte buf[BUFFER_SIZE];

	do {
		state = mpeg2_parse(_mpegDecoder);
		sequence_i = _mpegInfo->sequence;

		switch (state) {
		case STATE_BUFFER:
			size = _mpegFile->read(buf, BUFFER_SIZE);
			mpeg2_buffer(_mpegDecoder, buf, buf + size);
			break;

		case STATE_SLICE:
		case STATE_END:
			if (_mpegInfo->display_fbuf) {
				checkPaletteSwitch();
				drawYUV(sequence_i->width, sequence_i->height, _mpegInfo->display_fbuf->buf);
#ifdef BACKEND_8BIT
				buildLookup(_palNum + 1, _lutCalcNum);
#endif

				_frameNum++;
				return true;
			}
			break;

		default:
			break;
		}
	} while (size);
#endif
	return false;
}
예제 #4
0
ControlSeqParser::ControlSeqParser()
: m_state(ST_START), m_seq(NULL), m_mode(MODE_UTF8), m_utf8_remlen(0), m_vt52(false)
{
	buildLookup();
}
// [E,ind,segs] = mexFunction(model,I,chns,chnsSs) - helper for edgesDetect.m
void mexFunction( int nl, mxArray *pl[], int nr, const mxArray *pr[] )
{
	// get inputs
	mxArray *model = (mxArray*) pr[0];
	float *I = (float*) mxGetData(pr[1]);
	float *feats =  (float*) mxGetData(pr[2]);

	// extract relevant fields from model and options
	float *thrs = (float*) mxGetData(mxGetField(model,0,"thrs"));
	uint32 *fids = (uint32*) mxGetData(mxGetField(model,0,"fids"));
	uint32 *child = (uint32*) mxGetData(mxGetField(model,0,"child"));
	uint8 *segs = (uint8*) mxGetData(mxGetField(pr[0],0,"segs"));
	uint8 *nSegs = (uint8*) mxGetData(mxGetField(pr[0],0,"nSegs"));
	uint16 *eBins = (uint16*) mxGetData(mxGetField(model,0,"eBins"));
	uint32 *eBnds = (uint32*) mxGetData(mxGetField(model,0,"eBnds"));
	mxArray *opts = mxGetField(model,0,"opts");
	const int shrink = (int) mxGetScalar(mxGetField(opts,0,"shrink"));
	const int imWidth = (int) mxGetScalar(mxGetField(opts,0,"imWidth"));
	const int gtWidth = (int) mxGetScalar(mxGetField(opts,0,"gtWidth"));
	const int nChns = (int) mxGetScalar(mxGetField(opts,0,"nChns"));
	const int nCells = (int) mxGetScalar(mxGetField(opts,0,"nCells"));
	const uint32 nTotFtrs = (uint32) mxGetScalar(mxGetField(opts,0,"nTotFtrs"));
	const int stride = (int) mxGetScalar(mxGetField(opts,0,"stride"));
	const int nTreesEval = (int) mxGetScalar(mxGetField(opts,0,"nTreesEval"));
	int sharpen = (int) mxGetScalar(mxGetField(opts,0,"sharpen"));
	int nThreads = (int) mxGetScalar(mxGetField(opts,0,"nThreads"));
	const int nBnds = int(mxGetNumberOfElements(mxGetField(model,0,"eBnds"))-1)/
		int(mxGetNumberOfElements(mxGetField(model,0,"thrs")));
	const char *msgSharpen="Model supports sharpening of at most %i pixels!\n";
	if( sharpen>nBnds-1 ) 
	{ 
		sharpen=nBnds-1; 
		mexPrintf(msgSharpen,sharpen); 
	}

	// get dimensions and constants
	const mwSize *imgSize = mxGetDimensions(pr[1]);
	const int h = (int) imgSize[0];
	const int w = (int) imgSize[1];
	const int Z = mxGetNumberOfDimensions(pr[1])<=2 ? 1 : imgSize[2];
	const mwSize *fidsSize = mxGetDimensions(mxGetField(model,0,"fids"));
	const int nTreeNodes = (int) fidsSize[0];
	const int nTrees = (int) fidsSize[1];
	const int h1 = (int) ceil(double(h-imWidth)/stride);
	const int w1 = (int) ceil(double(w-imWidth)/stride);
	const int h2 = h1*stride+gtWidth;
	const int w2 = w1*stride+gtWidth;
	const int imgDims[3] = {h,w,Z};
	const int chnDims[3] = {h/shrink,w/shrink,nChns};
	const int indDims[3] = {h1,w1,nTreesEval};
	const int outDims[3] = {h2,w2,1};
	const int segDims[5] = {gtWidth,gtWidth,h1,w1,nTreesEval};

	// construct lookup tables
	uint32 *iids, *eids, *cids;
	iids = buildLookup( (int*)imgDims, gtWidth );
	eids = buildLookup( (int*)outDims, gtWidth );
	cids = buildLookup( (int*)chnDims,  floor((double)2/shrink + 0.5));
	//cids = buildLookup( (int*)chnDims, 1 );
	// create outputs
	pl[0] = mxCreateNumericArray(3,outDims,mxSINGLE_CLASS,mxREAL);
	float *E = (float*) mxGetData(pl[0]);
	pl[1] = mxCreateNumericArray(3,indDims,mxUINT32_CLASS,mxREAL);
	uint32 *ind = (uint32*) mxGetData(pl[1]);
	if(nl>2) pl[2] = mxCreateNumericArray(5,segDims,mxUINT8_CLASS,mxREAL);
	uint8 *segsOut; if(nl>2) segsOut = (uint8*) mxGetData(pl[2]);

	// apply forest to all patches and store leaf inds
#ifdef USEOMP
	nThreads = min(nThreads,omp_get_max_threads());
#pragma omp parallel for num_threads(nThreads)
#endif
	for( int c=0; c<w1; c++ ) 
		for( int t=0; t<nTreesEval; t++ ) 
		{
			for( int r0=0; r0<2; r0++ ) 
				for( int r=r0; r<h1; r+=2 ) 
				{
					int o = (r*stride/shrink) + (c*stride/shrink)*h/shrink;
					// select tree to evaluate
					int t1 = ((r+c)%2*nTreesEval+t)%nTrees; 
					uint32 k = t1*nTreeNodes;
					while( child[k] ) 
				  {
					  // compute feature (either channel or self-similarity feature)
					  uint32 f = fids[k]; 
					  float ftr;
					  ftr = feats[cids[f]+o]; 
					  // compare ftr to threshold and move left or right accordingly
					  if( ftr < thrs[k] ) 
						  k = child[k]-1; 
					  else 
						  k = child[k];
					  k += t1*nTreeNodes;
				  }
					// store leaf index and update edge maps
					ind[ r + c*h1 + t*h1*w1 ] = k;
				}
		}

		// compute edge maps (avoiding collisions from parallel executions)
		if( !sharpen ) 
			for( int c0=0; c0<gtWidth/stride; c0++ )
			{
#ifdef USEOMP
#pragma omp parallel for num_threads(nThreads)
#endif
				for( int c=c0; c<w1; c+=gtWidth/stride )
				{
					for( int r=0; r<h1; r++ )
						for( int t=0; t<nTreesEval; t++ ) 
				  {
					  uint32 k = ind[ r + c*h1 + t*h1*w1 ];
					  float *E1 = E + (r*stride) + (c*stride)*h2;
					  int b0=eBnds[k*nBnds], b1=eBnds[k*nBnds+1]; 
					  if(b0==b1) 
						  continue;
					  for( int b=b0; b<b1; b++ ) 
						  E1[eids[eBins[b]]]++;
					  if(nl>2) 
						  memcpy(segsOut+(r+c*h1+t*h1*w1)*gtWidth*gtWidth,
						  segs+k*gtWidth*gtWidth,gtWidth*gtWidth*sizeof(uint8));
				  }
				}
			}

			// computed sharpened edge maps, snapping to local color values
			if( sharpen )
			{
				// compute neighbors array
				const int g=gtWidth; uint16 N[4096*4];
				for( int c=0; c<g; c++ ) 
					for( int r=0; r<g; r++ )
					{
						int i=c*g+r; 
						uint16 *N1=N+i*4;
						N1[0] = c>0 ? i-g : i; N1[1] = c<g-1 ? i+g : i;
						N1[2] = r>0 ? i-1 : i; N1[3] = r<g-1 ? i+1 : i;
					}
#ifdef USEOMP
#pragma omp parallel for num_threads(nThreads)
#endif
					for( int c=0; c<w1; c++ ) 
						for( int r=0; r<h1; r++ ) 
						{
							for( int t=0; t<nTreesEval; t++ ) 
							{
								// get current segment and copy into S
								uint32 k = ind[ r + c*h1 + t*h1*w1 ];
								int m = nSegs[k]; 
								if( m==1 ) 
									continue;
								uint8 S0[4096], *S=(nl<=2) ? S0 : segsOut+(r+c*h1+t*h1*w1)*g*g;
								memcpy(S,segs+k*g*g, g*g*sizeof(uint8));
								// compute color model for each segment using every other pixel
								int ci, ri, s, z; 
								float ns[100], mus[1000];
								const float *I1 = I+(c*stride+(imWidth-g)/2)*h+r*stride+(imWidth-g)/2;
								for( s=0; s<m; s++ ) 
								{ 
									ns[s]=0; 
									for( z=0; z<Z; z++ ) 
										mus[s*Z+z]=0; 
								}
								for( ci=0; ci<g; ci+=2 ) 
									for( ri=0; ri<g; ri+=2 )
									{
										s = S[ci*g+ri]; 
										ns[s]++;
										for( z=0; z<Z; z++ ) 
											mus[s*Z+z]+=I1[z*h*w+ci*h+ri];
									}
									for(s=0; s<m; s++) 
										for( z=0; z<Z; z++ ) 
											mus[s*Z+z]/=ns[s];
									// update segment S according to local color values
									int b0=eBnds[k*nBnds], b1=eBnds[k*nBnds+sharpen];
									for( int b=b0; b<b1; b++ )
									{
										float vs[10], d, e, eBest=1e10f;
										int i, sBest=-1, ss[4];
										for( i=0; i<4; i++ ) 
											ss[i]=S[N[eBins[b]*4+i]];
										for( z=0; z<Z; z++ ) 
											vs[z]=I1[iids[eBins[b]]+z*h*w];
										for( i=0; i<4; i++ )
										{
											s=ss[i]; 
											if(s==sBest) 
												continue;
											e=0; 
											for( z=0; z<Z; z++ ) 
											{ 
												d=mus[s*Z+z]-vs[z]; 
												e+=d*d; 
											}
											if( e<eBest )
											{ 
												eBest=e; 
												sBest=s;
											}
										}
										S[eBins[b]]=sBest;
									}
									// convert mask to edge maps (examining expanded set of pixels)
									float *E1 = E + c*stride*h2 + r*stride; b1=eBnds[k*nBnds+sharpen+1];
									for( int b=b0; b<b1; b++ ) 
									{
										int i=eBins[b]; 
										uint8 s=S[i]; 
										uint16 *N1=N+i*4;
										if( s!=S[N1[0]] || s!=S[N1[1]] || s!=S[N1[2]] || s!=S[N1[3]] )
											E1[eids[i]]++;
									}
							}
						}
			}

			// free memory
			delete [] iids; delete [] eids;
			delete [] cids;
}
예제 #6
0
/**
 * Encodes a set of data with DC's version of huffman encoding..
 * @todo Use real streams maybe? or something else than string (operator[] contains a compare, slow...)
 */
void CryptoManager::encodeHuffman(const string& is, string& os) {
	
	// We might as well expect this much data as huffman encoding doesn't go very far...
	os.reserve(is.size());
	if(is.length() == 0) {
		os.append("HE3\x0d");
		
		// Nada...
		os.append(7, '\0');
		return;
	}
	// First, we count all characters
	u_int8_t csum = 0;
	int count[256];
	memset(count, 0, sizeof(count));
	int chars = countChars(is, count, csum);

	// Next, we create a set of nodes and add it to a list, removing all characters that never occur.
	
	list<Node*> nodes;

	int i;
	for(i=0; i<256; i++) {
		if(count[i] > 0) {
			nodes.push_back(new Node(i, count[i]));
		}
	}

	nodes.sort(greaterNode());
#ifdef _DEBUG
	for(list<Node*>::iterator it = nodes.begin(); it != nodes.end(); ++it) dcdebug("%.02x:%d, ", (*it)->chr, (*it)->weight);
	dcdebug("\n");
#endif
	
	walkTree(nodes);
	dcassert(nodes.size() == 1);

	Node* root = nodes.front();
	vector<u_int8_t> lookup[256];
	
	// Build a lookup table for fast character lookups
	buildLookup(lookup, root);
	delete root;

	// Reserve some memory to avoid all those copies when appending...
	os.reserve(is.size() * 3 / 4);

	os.append("HE3\x0d");
	
	// Checksum
	os.append(1, csum);
	string::size_type sz = is.size();
	os.append((char*)&sz, 4);

	// Character count
	os.append((char*)&chars, 2);

	// The characters and their bitlengths
	for(i=0; i<256; i++) {
		if(count[i] > 0) {
			os.append(1, (u_int8_t)i);
			os.append(1, (u_int8_t)lookup[i].size());
		}
	}
	
	BitOutputStream bos(os);
	// The tree itself, ie the bits of each character
	for(i=0; i<256; i++) {
		if(count[i] > 0) {
			bos.put(lookup[i]);
		}
	}
	
	dcdebug("u_int8_ts: %d\n", os.size());
	bos.skipToByte();

	for(string::size_type j=0; j<is.size(); j++) {
		dcassert(lookup[(u_int8_t)is[j]].size() != 0);
		bos.put(lookup[(u_int8_t)is[j]]);
	}
	bos.skipToByte();
}
예제 #7
0
bool BaseAnimationState::init(const char *name) {
#ifdef USE_MPEG2
	char tempFile[512];

	_mpegDecoder = NULL;
	_mpegFile = NULL;

#ifdef BACKEND_8BIT

	uint i, p;

	// Load lookup palettes
	sprintf(tempFile, "%s.pal", name);

	Common::File f;

	if (!f.open(tempFile)) {
		warning("Cutscene: %s palette missing", tempFile);
		return false;
	}

	p = 0;
	while (!f.eos()) {
		_palettes[p].end = f.readUint16LE();
		_palettes[p].cnt = f.readUint16LE();

		for (i = 0; i < _palettes[p].cnt; i++) {
			_palettes[p].pal[4 * i] = f.readByte();
			_palettes[p].pal[4 * i + 1] = f.readByte();
			_palettes[p].pal[4 * i + 2] = f.readByte();
			_palettes[p].pal[4 * i + 3] = 0;
		}
		for (; i < 256; i++) {
			_palettes[p].pal[4 * i] = 0;
			_palettes[p].pal[4 * i + 1] = 0;
			_palettes[p].pal[4 * i + 2] = 0;
			_palettes[p].pal[4 * i + 3] = 0;
		}

		p++;
	}

	f.close();

	_palNum = 0;
	_maxPalNum = p;
	setPalette(_palettes[_palNum].pal);
	_lut = _lut2 = _yuvLookup[0];
	_curPal = -1;
	_cr = 0;
	buildLookup(_palNum, 256);
	_lut2 = _yuvLookup[1];
	_lutCalcNum = (BITDEPTH + _palettes[_palNum].end + 2) / (_palettes[_palNum].end + 2);
#else
	buildLookup();
	_overlay = (OverlayColor *)calloc(_movieScale * _movieWidth * _movieScale * _movieHeight, sizeof(OverlayColor));
	_sys->showOverlay();
#endif

	// Open MPEG2 stream
	_mpegFile = new Common::File();
	sprintf(tempFile, "%s.mp2", name);
	if (!_mpegFile->open(tempFile)) {
		warning("Cutscene: Could not open %s", tempFile);
		return false;
	}

	// Load and configure decoder
	_mpegDecoder = mpeg2_init();
	if (_mpegDecoder == NULL) {
		warning("Cutscene: Could not allocate an MPEG2 decoder");
		return false;
	}

	_mpegInfo = mpeg2_info(_mpegDecoder);
	_frameNum = 0;

	return true;
#else /* USE_MPEG2 */
	return false;
#endif
}
예제 #8
0
// ind = mexFunction(model,chns,chnsSs)
void mexFunction( int nl, mxArray *pl[], int nr, const mxArray *pr[] )
{
  // get inputs
  mxArray *model = (mxArray*) pr[0];
  float *chns = (float*) mxGetData(pr[1]);
  float *chnsSs = (float*) mxGetData(pr[2]);

  // extract relevant fields from model and options
  float *thrs = (float*) mxGetData(mxGetField(model,0,"thrs"));
  uint32 *fids = (uint32*) mxGetData(mxGetField(model,0,"fids"));
  uint32 *child = (uint32*) mxGetData(mxGetField(model,0,"child"));
  mxArray *opts = mxGetField(model,0,"opts");
  const int shrink = (int) mxGetScalar(mxGetField(opts,0,"shrink"));
  const int imWidth = (int) mxGetScalar(mxGetField(opts,0,"imWidth"));
  const int nChns = (int) mxGetScalar(mxGetField(opts,0,"nChns"));
  const int nCells = (int) mxGetScalar(mxGetField(opts,0,"nCells"));
  const uint32 nChnFtrs = (uint32) mxGetScalar(mxGetField(opts,0,"nChnFtrs"));
  const int stride = (int) mxGetScalar(mxGetField(opts,0,"stride"));
  const int nTreesEval = (int) mxGetScalar(mxGetField(opts,0,"nTreesEval"));
  int nThreads = (int) mxGetScalar(mxGetField(opts,0,"nThreads"));

  // get dimensions and constants
  const mwSize *chnsSize = mxGetDimensions(pr[1]);
  const int h = (int) chnsSize[0]*shrink;
  const int w = (int) chnsSize[1]*shrink;
  const mwSize *fidsSize = mxGetDimensions(mxGetField(model,0,"fids"));
  const int nTreeNodes = (int) fidsSize[0];
  const int nTrees = (int) fidsSize[1];
  const int h1 = (int) ceil(double(h-imWidth)/stride);
  const int w1 = (int) ceil(double(w-imWidth)/stride);
  const int chnDims[3] = {h/shrink,w/shrink,nChns};
  const int indDims[3] = {h1,w1,nTreesEval};

  // construct lookup tables
  uint32 *cids, *cids1, *cids2;
  cids = buildLookup( (int*)chnDims, imWidth/shrink );
  buildLookupSs( cids1, cids2, (int*)chnDims, imWidth/shrink, nCells );

  // create output
  pl[0] = mxCreateNumericArray(3,indDims,mxUINT32_CLASS,mxREAL);
  uint32 *ind = (uint32*) mxGetData(pl[0]);

  // apply forest to all patches and store leaf inds
  #ifdef USEOMP
  nThreads = min(nThreads,omp_get_max_threads());
  #pragma omp parallel for num_threads(nThreads)
  #endif
  for( int c=0; c<w1; c++ ) for( int t=0; t<nTreesEval; t++ ) {
    for( int r0=0; r0<2; r0++ ) for( int r=r0; r<h1; r+=2 ) {
      int o = (r*stride/shrink) + (c*stride/shrink)*h/shrink;
      // select tree to evaluate
      int t1 = ((r+c)%2*nTreesEval+t)%nTrees; uint32 k = t1*nTreeNodes;
      while( child[k] ) {
        // compute feature (either channel or self-similarity feature)
        uint32 f = fids[k]; float ftr;
        if( f<nChnFtrs ) ftr = chns[cids[f]+o]; else
          ftr = chnsSs[cids1[f-nChnFtrs]+o]-chnsSs[cids2[f-nChnFtrs]+o];
        // compare ftr to threshold and move left or right accordingly
        if( ftr < thrs[k] ) k = child[k]-1; else k = child[k];
        k += t1*nTreeNodes;
      }
      // store leaf index
      ind[ r + c*h1 + t*h1*w1 ] = k;
    }
  }

  delete [] cids; delete [] cids1; delete [] cids2;
}
// [E,ind] = mexFunction(model,chns,chnsSsi, chnsNormal) - helper for edgesDetect.m
void mexFunction( int nl, mxArray *pl[], int nr, const mxArray *pr[] )
{
  // get inputs
  mxArray *model = (mxArray*) pr[0];
  float *chns = (float*) mxGetData(pr[1]);
  float *chnsSs = (float*) mxGetData(pr[2]);
  float *chnsNormal = (float*) mxGetData(pr[3]);

  // extract relevant fields from model and options
  float *thrs = (float*) mxGetData(mxGetField(model,0,"thrs"));
  uint32 *fids = (uint32*) mxGetData(mxGetField(model,0,"fids"));
  uint32 *child = (uint32*) mxGetData(mxGetField(model,0,"child"));
  uint16 *eBins = (uint16*) mxGetData(mxGetField(model,0,"eBins"));
  uint32 *eBnds = (uint32*) mxGetData(mxGetField(model,0,"eBnds"));
  mxArray *opts = mxGetField(model,0,"opts");
  const int shrink = (int) mxGetScalar(mxGetField(opts,0,"shrink"));
  const int imWidth = (int) mxGetScalar(mxGetField(opts,0,"imWidth"));
  const int gtWidth = (int) mxGetScalar(mxGetField(opts,0,"gtWidth"));
  const int nChns = (int) mxGetScalar(mxGetField(opts,0,"nChns"));
  const int nCells = (int) mxGetScalar(mxGetField(opts,0,"nCells"));
  
  const int nNormalCells = (int) mxGetScalar(mxGetField(opts,0,"nNormalCells"));
  const int nNormalFtrs = nNormalCells*nNormalCells*(nNormalCells*nNormalCells-1)/2;
  
  const uint32 nChnFtrs = (uint32) mxGetScalar(mxGetField(opts,0,"nChnFtrs"));
  const uint32 nSimFtrs = (uint32) mxGetScalar(mxGetField(opts,0,"nSimFtrs"));
  const uint32 nNormalFtrsAll = (uint32) mxGetScalar(mxGetField(opts,0,"nNormalFtrs"));
  const int nEdgeBins = (int) mxGetScalar(mxGetField(opts,0,"nEdgeBins"));
  const int stride = (int) mxGetScalar(mxGetField(opts,0,"stride"));
  const int nTreesEval = (int) mxGetScalar(mxGetField(opts,0,"nTreesEval"));
  int nThreads = (int) mxGetScalar(mxGetField(opts,0,"nThreads"));

  // get dimensions and constants
  const mwSize *chnsSize = mxGetDimensions(pr[1]);
  const int h = (int) chnsSize[0]*shrink;
  const int w = (int) chnsSize[1]*shrink;
  const mwSize *fidsSize = mxGetDimensions(mxGetField(model,0,"fids"));
  const int nTreeNodes = (int) fidsSize[0];
  const int nTrees = (int) fidsSize[1];
  const int h1 = (int) ceil(double(h-imWidth)/stride);
  const int w1 = (int) ceil(double(w-imWidth)/stride);
  const int h2 = h1*stride+gtWidth;
  const int w2 = w1*stride+gtWidth;
  const int chnDims[3] = {h/shrink,w/shrink,nChns};
  const int normalDims[3] = {h/shrink,w/shrink,nNormalFtrs};
  const int indDims[3] = {h1,w1,nTreesEval};
  const int outDims[3] = {h2,w2,nEdgeBins};

  // construct lookup tables
  uint32 *eids, *cids, *cids1, *cids2;
  eids = buildLookup( (int*)outDims, gtWidth );
  cids = buildLookup( (int*)chnDims, imWidth/shrink );
  buildLookupSs( cids1, cids2, (int*)chnDims, imWidth/shrink, nCells );
  uint32 *nids;
  nids = buildLookup( (int*)normalDims, 1);

  // mexPrintf("Computed all look up tables.\n");
  // mexPrintf("Size of normalDims - %d, %d, %d\n", normalDims[0], normalDims[1], normalDims[2]);


  // create outputs
  pl[0] = mxCreateNumericArray(3,outDims,mxSINGLE_CLASS,mxREAL);
  float *E = (float*) mxGetData(pl[0]);
  pl[1] = mxCreateNumericArray(3,indDims,mxUINT32_CLASS,mxREAL);
  uint32 *ind = (uint32*) mxGetData(pl[1]);

  // apply forest to all patches and store leaf inds
  // #ifdef USEOMP
  // nThreads = min(nThreads,omp_get_max_threads());
  // #pragma omp parallel for num_threads(nThreads)
  // #endif
  for( int c=0; c<w1; c++ ) for( int t=0; t<nTreesEval; t++ ) {
    for( int r0=0; r0<2; r0++ ) for( int r=r0; r<h1; r+=2 ) {
      int o = (r*stride/shrink) + (c*stride/shrink)*h/shrink;
      // select tree to evaluate
      int t1 = ((r+c)%2*nTreesEval+t)%nTrees; uint32 k = t1*nTreeNodes;
      while( child[k] ) {
        // compute feature (either channel or self-similarity feature)
        uint32 f = fids[k]; float ftr;
        if( f<nChnFtrs ) 
          ftr = chns[cids[f]+o]; 
        else {
          if(f<nChnFtrs+nSimFtrs){
            ftr = chnsSs[cids1[f-nChnFtrs]+o]-chnsSs[cids2[f-nChnFtrs]+o];
          }
          else{
            // for(int i = 0; i < 300; i++) mexPrintf("%d: %d\n", i, nids[i]); mexPrintf("\n\n\n");
            // normal self similarity features
            // mexPrintf("%d, ", f);
            uint32 ff = f-nChnFtrs-nSimFtrs;
            while(ff >= nNormalFtrs) ff = ff-nNormalFtrs;
            // mexPrintf("%d, nids: %d, ", ff, nids[ff]);
            ftr = chnsNormal[nids[ff]+o];
          }
        }
        // compare ftr to threshold and move left or right accordingly
        if( ftr < thrs[k] ) k = child[k]-1; else k = child[k];
        k += t1*nTreeNodes;
      }
      // store leaf index and update edge maps
      ind[ r + c*h1 + t*h1*w1 ] = k;
    }
  }

  // compute edge maps (avoiding collisions from parallel executions)
  for( int c0=0; c0<gtWidth/stride; c0++ ) {
    #ifdef USEOMP
    #pragma omp parallel for num_threads(nThreads)
    #endif
    for( int c=c0; c<w1; c+=gtWidth/stride ) {
      for( int r=0; r<h1; r++ ) for( int t=0; t<nTreesEval; t++ ) {
        uint32 k = ind[ r + c*h1 + t*h1*w1 ];
        float *E1 = E + (r*stride) + (c*stride)*h2;
        int b0=eBnds[k], b1=eBnds[k+1]; if(b0==b1) continue;
        for( int b=b0; b<b1; b++ ) E1[eids[eBins[b]]]++;
      }
    }
  }

  delete [] eids; delete [] cids; delete [] cids1; delete [] cids2; delete [] nids;
}