void ProjectorLensDistortionOperation::executePixel(float output[4], int x, int y, void *data)
{
  float inputValue[4];
  const float height = this->getHeight();
  const float width = this->getWidth();
  const float v = (y + 0.5f) / height;
  const float u = (x + 0.5f) / width;
  MemoryBuffer *inputBuffer = (MemoryBuffer *)data;
  inputBuffer->readBilinear(inputValue, (u * width + this->m_kr2) - 0.5f, v * height - 0.5f);
  output[0] = inputValue[0];
  inputBuffer->read(inputValue, x, y);
  output[1] = inputValue[1];
  inputBuffer->readBilinear(inputValue, (u * width - this->m_kr2) - 0.5f, v * height - 0.5f);
  output[2] = inputValue[2];
  output[3] = 1.0f;
}
void GlareGhostOperation::generateGlare(float *data, MemoryBuffer *inputTile, NodeGlare *settings)
{
	const int qt = 1 << settings->quality;
	const float s1 = 4.0f / (float)qt, s2 = 2.0f * s1;
	int x, y, n, p, np;
	fRGB c, tc, cm[64];
	float sc, isc, u, v, sm, s, t, ofs, scalef[64];
	const float cmo = 1.0f - settings->colmod;

	MemoryBuffer *gbuf = inputTile->duplicate();
	MemoryBuffer *tbuf1 = inputTile->duplicate();

	bool breaked = false;

	FastGaussianBlurOperation::IIR_gauss(tbuf1, s1, 0, 3);
	if (!breaked) FastGaussianBlurOperation::IIR_gauss(tbuf1, s1, 1, 3);
	if (isBreaked()) breaked = true;
	if (!breaked) FastGaussianBlurOperation::IIR_gauss(tbuf1, s1, 2, 3);

	MemoryBuffer *tbuf2 = tbuf1->duplicate();

	if (isBreaked()) breaked = true;
	if (!breaked) FastGaussianBlurOperation::IIR_gauss(tbuf2, s2, 0, 3);
	if (isBreaked()) breaked = true;
	if (!breaked) FastGaussianBlurOperation::IIR_gauss(tbuf2, s2, 1, 3);
	if (isBreaked()) breaked = true;
	if (!breaked) FastGaussianBlurOperation::IIR_gauss(tbuf2, s2, 2, 3);

	ofs = (settings->iter & 1) ? 0.5f : 0.0f;
	for (x = 0; x < (settings->iter * 4); x++) {
		y = x & 3;
		cm[x][0] = cm[x][1] = cm[x][2] = 1;
		if (y == 1) fRGB_rgbmult(cm[x], 1.0f, cmo, cmo);
		if (y == 2) fRGB_rgbmult(cm[x], cmo, cmo, 1.0f);
		if (y == 3) fRGB_rgbmult(cm[x], cmo, 1.0f, cmo);
		scalef[x] = 2.1f * (1.0f - (x + ofs) / (float)(settings->iter * 4));
		if (x & 1) scalef[x] = -0.99f / scalef[x];
	}

	sc = 2.13;
	isc = -0.97;
	for (y = 0; y < gbuf->getHeight() && (!breaked); y++) {
		v = ((float)y + 0.5f) / (float)gbuf->getHeight();
		for (x = 0; x < gbuf->getWidth(); x++) {
			u = ((float)x + 0.5f) / (float)gbuf->getWidth();
			s = (u - 0.5f) * sc + 0.5f, t = (v - 0.5f) * sc + 0.5f;
			tbuf1->readBilinear(c, s * gbuf->getWidth(), t * gbuf->getHeight());
			sm = smoothMask(s, t);
			mul_v3_fl(c, sm);
			s = (u - 0.5f) * isc + 0.5f, t = (v - 0.5f) * isc + 0.5f;
			tbuf2->readBilinear(tc, s * gbuf->getWidth() - 0.5f, t * gbuf->getHeight() - 0.5f);
			sm = smoothMask(s, t);
			madd_v3_v3fl(c, tc, sm);

			gbuf->writePixel(x, y, c);
		}
		if (isBreaked()) breaked = true;

	}

	memset(tbuf1->getBuffer(), 0, tbuf1->getWidth() * tbuf1->getHeight() * COM_NUM_CHANNELS_COLOR * sizeof(float));
	for (n = 1; n < settings->iter && (!breaked); n++) {
		for (y = 0; y < gbuf->getHeight() && (!breaked); y++) {
			v = ((float)y + 0.5f) / (float)gbuf->getHeight();
			for (x = 0; x < gbuf->getWidth(); x++) {
				u = ((float)x + 0.5f) / (float)gbuf->getWidth();
				tc[0] = tc[1] = tc[2] = 0.0f;
				for (p = 0; p < 4; p++) {
					np = (n << 2) + p;
					s = (u - 0.5f) * scalef[np] + 0.5f;
					t = (v - 0.5f) * scalef[np] + 0.5f;
					gbuf->readBilinear(c, s * gbuf->getWidth() - 0.5f, t * gbuf->getHeight() - 0.5f);
					mul_v3_v3(c, cm[np]);
					sm = smoothMask(s, t) * 0.25f;
					madd_v3_v3fl(tc, c, sm);
				}
				tbuf1->addPixel(x, y, tc);
			}
			if (isBreaked()) breaked = true;
		}
		memcpy(gbuf->getBuffer(), tbuf1->getBuffer(), tbuf1->getWidth() * tbuf1->getHeight() * COM_NUM_CHANNELS_COLOR * sizeof(float));
	}
	memcpy(data, gbuf->getBuffer(), gbuf->getWidth() * gbuf->getHeight() * COM_NUM_CHANNELS_COLOR * sizeof(float));

	delete gbuf;
	delete tbuf1;
	delete tbuf2;
}
void GlareStreaksOperation::generateGlare(float *data, MemoryBuffer *inputTile, NodeGlare *settings)
{
	int x, y, n;
	unsigned int nump = 0;
	float c1[4], c2[4], c3[4], c4[4];
	float a, ang = DEG2RADF(360.0f) / (float)settings->streaks;

	int size = inputTile->getWidth() * inputTile->getHeight();
	int size4 = size * 4;

	bool breaked = false;

	MemoryBuffer *tsrc = inputTile->duplicate();
	MemoryBuffer *tdst = new MemoryBuffer(COM_DT_COLOR, inputTile->getRect());
	tdst->clear();
	memset(data, 0, size4 * sizeof(float));

	for (a = 0.0f; a < DEG2RADF(360.0f) && (!breaked); a += ang) {
		const float an = a + settings->angle_ofs;
		const float vx = cos((double)an), vy = sin((double)an);
		for (n = 0; n < settings->iter && (!breaked); ++n) {
			const float p4 = pow(4.0, (double)n);
			const float vxp = vx * p4, vyp = vy * p4;
			const float wt = pow((double)settings->fade, (double)p4);
			const float cmo = 1.0f - (float)pow((double)settings->colmod, (double)n + 1);  // colormodulation amount relative to current pass
			float *tdstcol = tdst->getBuffer();
			for (y = 0; y < tsrc->getHeight() && (!breaked); ++y) {
				for (x = 0; x < tsrc->getWidth(); ++x, tdstcol += 4) {
					// first pass no offset, always same for every pass, exact copy,
					// otherwise results in uneven brightness, only need once
					if (n == 0) tsrc->read(c1, x, y); else c1[0] = c1[1] = c1[2] = 0;
					tsrc->readBilinear(c2, x + vxp, y + vyp);
					tsrc->readBilinear(c3, x + vxp * 2.0f, y + vyp * 2.0f);
					tsrc->readBilinear(c4, x + vxp * 3.0f, y + vyp * 3.0f);
					// modulate color to look vaguely similar to a color spectrum
					c2[1] *= cmo;
					c2[2] *= cmo;

					c3[0] *= cmo;
					c3[1] *= cmo;

					c4[0] *= cmo;
					c4[2] *= cmo;

					tdstcol[0] = 0.5f * (tdstcol[0] + c1[0] + wt * (c2[0] + wt * (c3[0] + wt * c4[0])));
					tdstcol[1] = 0.5f * (tdstcol[1] + c1[1] + wt * (c2[1] + wt * (c3[1] + wt * c4[1])));
					tdstcol[2] = 0.5f * (tdstcol[2] + c1[2] + wt * (c2[2] + wt * (c3[2] + wt * c4[2])));
					tdstcol[3] = 1.0f;
				}
				if (isBreaked()) {
					breaked = true;
				}
			}
			memcpy(tsrc->getBuffer(), tdst->getBuffer(), sizeof(float) * size4);
		}

		float *sourcebuffer = tsrc->getBuffer();
		float factor = 1.0f / (float)(6 - settings->iter);
		for (int i = 0; i < size4; i += 4) {
			madd_v3_v3fl(&data[i], &sourcebuffer[i], factor);
			data[i + 3] =  1.0f;
		}

		tdst->clear();
		memcpy(tsrc->getBuffer(), inputTile->getBuffer(), sizeof(float) * size4);
		nump++;
	}

	delete tsrc;
	delete tdst;
}