コード例 #1
0
ファイル: scanline.c プロジェクト: jihnsius/d2r
// This texture mapper uses floating point extensively and writes 8 pixels at once, so it likely works
// best on 64 bit RISC processors.
// WARNING: it is not endian clean. For big endian, reverse the shift counts in the unrolled loops. I
// have no means to test that, so I didn't try it. Please tell me if you get this to work on a big
// endian machine.
// If you're using an Alpha, use the Compaq compiler for this file for quite some fps more.
// Unfortunately, it won't compile the whole source, so simply compile everything, change the
// compiler to ccc, remove scanline.o and compile again.
// Please send comments/suggestions to [email protected].
void c_fp_tmap_scanline_per()
{
    ubyte          *dest;
    uint            c;
    int             x, j, index = fx_xleft + (bytes_per_row * fx_y);
    double          u, v, z, l, dudx, dvdx, dzdx, dldx, rec_z;
    u_int64_t       destlong;

    u = f2db(fx_u);
    v = f2db(fx_v) * 64.0;
    z = f2db(fx_z);
    l = f2db(fx_l);
    dudx = f2db(fx_du_dx);
    dvdx = f2db(fx_dv_dx) * 64.0;
    dzdx = f2db(fx_dz_dx);
    dldx = f2db(fx_dl_dx);

    rec_z = 1.0 / z; // gcc 2.95.2 is won't do this optimization itself

    dest = (ubyte *) (write_buffer + fx_xleft + (bytes_per_row * fx_y));
    x = fx_xright - fx_xleft + 1;

    if (!Transparency_on) {
        if (x >= 8) {
            if ((j = (size_t) dest & 7) != 0) {
                j = 8 - j;

                while (j > 0) {
                    if (++index >= SWIDTH*SHEIGHT) return;
                    *dest++ =
                        gr_fade_table[((int) fabs(l)) * 256 +
                                      (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
                                                    (((int) (u * rec_z)) & 63)]];
                    l += dldx;
                    u += dudx;
                    v += dvdx;
                    z += dzdx;
                    rec_z = 1.0 / z;
                    x--;
                    j--;
                }
            }

            j = x;
            while (j >= 8) {
                destlong =
                    (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 +
                                              (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
                                                      (((int) (u * rec_z)) & 63)]];
                l += dldx;
                u += dudx;
                v += dvdx;
                z += dzdx;
                rec_z = 1.0 / z;
                destlong |=
                    (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 +
                                              (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
                                                      (((int) (u * rec_z)) & 63)]] << 8;
                l += dldx;
                u += dudx;
                v += dvdx;
                z += dzdx;
                rec_z = 1.0 / z;
                destlong |=
                    (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 +
                                              (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
                                                      (((int) (u * rec_z)) & 63)]] << 16;
                l += dldx;
                u += dudx;
                v += dvdx;
                z += dzdx;
                rec_z = 1.0 / z;
                destlong |=
                    (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 +
                                              (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
                                                      (((int) (u * rec_z)) & 63)]] << 24;
                l += dldx;
                u += dudx;
                v += dvdx;
                z += dzdx;
                rec_z = 1.0 / z;
                destlong |=
                    (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 +
                                              (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
                                                      (((int) (u * rec_z)) & 63)]] << 32;
                l += dldx;
                u += dudx;
                v += dvdx;
                z += dzdx;
                rec_z = 1.0 / z;
                destlong |=
                    (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 +
                                              (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
                                                      (((int) (u * rec_z)) & 63)]] << 40;
                l += dldx;
                u += dudx;
                v += dvdx;
                z += dzdx;
                rec_z = 1.0 / z;
                destlong |=
                    (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 +
                                              (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
                                                      (((int) (u * rec_z)) & 63)]] << 48;
                l += dldx;
                u += dudx;
                v += dvdx;
                z += dzdx;
                rec_z = 1.0 / z;
                destlong |=
                    (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 +
                                              (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
                                                      (((int) (u * rec_z)) & 63)]] << 56;
                l += dldx;
                u += dudx;
                v += dvdx;
                z += dzdx;
                rec_z = 1.0 / z;

                *((u_int64_t *) dest) = destlong;
                dest += 8;
                x -= 8;
                j -= 8;
                index += 8;
                if (index+8 >= SWIDTH*SHEIGHT) return;
            }
        }
        while (x-- > 0) {
            if (++index >= SWIDTH*SHEIGHT) return;
            *dest++ =
                gr_fade_table[((int) fabs(l)) * 256 +
                              (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]];
            l += dldx;
            u += dudx;
            v += dvdx;
            z += dzdx;
            rec_z = 1.0 / z;
        }
    } else {
        if (x >= 8) {
            if ((j = (size_t) dest & 7) != 0) {
                j = 8 - j;

                while (j > 0) {
                    if (++index >= SWIDTH*SHEIGHT) return;
                    c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
                    if (c != 255)
                        *dest = gr_fade_table[((int) fabs(l)) * 256 + c];
                    dest++;
                    l += dldx;
                    u += dudx;
                    v += dvdx;
                    z += dzdx;
                    rec_z = 1.0 / z;
                    x--;
                    j--;
                }
            }

            j = x;
            while (j >= 8) {
                destlong = *((u_int64_t *) dest);
                c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
                if (c != 255) {
                    destlong &= ~(u_int64_t)0xFF;
                    destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + c];
                }
                l += dldx;
                u += dudx;
                v += dvdx;
                z += dzdx;
                rec_z = 1.0 / z;
                c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
                if (c != 255) {
                    destlong &= ~((u_int64_t)0xFF << 8);
                    destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + c] << 8;
                }
                l += dldx;
                u += dudx;
                v += dvdx;
                z += dzdx;
                rec_z = 1.0 / z;
                c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
                if (c != 255) {
                    destlong &= ~((u_int64_t)0xFF << 16);
                    destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + c] << 16;
                }
                l += dldx;
                u += dudx;
                v += dvdx;
                z += dzdx;
                rec_z = 1.0 / z;
                c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
                if (c != 255) {
                    destlong &= ~((u_int64_t)0xFF << 24);
                    destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + c] << 24;
                }
                l += dldx;
                u += dudx;
                v += dvdx;
                z += dzdx;
                rec_z = 1.0 / z;
                c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
                if (c != 255) {
                    destlong &= ~((u_int64_t)0xFF << 32);
                    destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + c] << 32;
                }
                l += dldx;
                u += dudx;
                v += dvdx;
                z += dzdx;
                rec_z = 1.0 / z;
                c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
                if (c != 255) {
                    destlong &= ~((u_int64_t)0xFF << 40);
                    destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + c] << 40;
                }
                l += dldx;
                u += dudx;
                v += dvdx;
                z += dzdx;
                rec_z = 1.0 / z;
                c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
                if (c != 255) {
                    destlong &= ~((u_int64_t)0xFF << 48);
                    destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + c] << 48;
                }
                l += dldx;
                u += dudx;
                v += dvdx;
                z += dzdx;
                rec_z = 1.0 / z;
                c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
                if (c != 255) {
                    destlong &= ~((u_int64_t)0xFF << 56);
                    destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + c] << 56;
                }
                l += dldx;
                u += dudx;
                v += dvdx;
                z += dzdx;
                rec_z = 1.0 / z;

                *((u_int64_t *) dest) = destlong;
                dest += 8;
                x -= 8;
                j -= 8;
                index += 8;
                if (index+8 >= SWIDTH*SHEIGHT) return;
            }
        }
        while (x-- > 0) {
            if (++index >= SWIDTH*SHEIGHT) return;
            c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
            if (c != 255)
                *dest = gr_fade_table[((int) fabs(l)) * 256 + c];
            dest++;
            l += dldx;
            u += dudx;
            v += dvdx;
            z += dzdx;
            rec_z = 1.0 / z;
        }
    }
}
コード例 #2
0
ファイル: scanline.c プロジェクト: jihnsius/d2r
// Used for energy centers. See comments for c_tmap_scanline_per().
void c_fp_tmap_scanline_per_nolight()
{
    ubyte	       *dest;
    uint		c;
    int		x, j, index = fx_xleft + (bytes_per_row * fx_y);
    double		u, v, z, dudx, dvdx, dzdx, rec_z;
    u_int64_t	destlong;

    u = f2db(fx_u);
    v = f2db(fx_v) * 64.0;
    z = f2db(fx_z);
    dudx = f2db(fx_du_dx);
    dvdx = f2db(fx_dv_dx) * 64.0;
    dzdx = f2db(fx_dz_dx);

    rec_z = 1.0 / z;

    dest = (ubyte *) (write_buffer + fx_xleft + (bytes_per_row * fx_y));

    x = fx_xright - fx_xleft + 1;
    if (!Transparency_on) {
        if (x >= 8) {
            if ((j = (size_t) dest & 7) != 0) {
                j = 8 - j;

                while (j > 0) {
                    if (++index >= SWIDTH*SHEIGHT) return;
                    *dest++ =
                        (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
                                      (((int) (u * rec_z)) & 63)];
                    u += dudx;
                    v += dvdx;
                    z += dzdx;
                    rec_z = 1.0 / z;
                    x--;
                    j--;
                }
            }

            while (j >= 8) {
                destlong =
                    (u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
                                       (((int) (u * rec_z)) & 63)];
                u += dudx;
                v += dvdx;
                z += dzdx;
                rec_z = 1.0 / z;
                destlong |=
                    (u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
                                       (((int) (u * rec_z)) & 63)] << 8;
                u += dudx;
                v += dvdx;
                z += dzdx;
                rec_z = 1.0 / z;
                destlong |=
                    (u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
                                       (((int) (u * rec_z)) & 63)] << 16;
                u += dudx;
                v += dvdx;
                z += dzdx;
                rec_z = 1.0 / z;
                destlong |=
                    (u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
                                       (((int) (u * rec_z)) & 63)] << 24;
                u += dudx;
                v += dvdx;
                z += dzdx;
                rec_z = 1.0 / z;
                destlong |=
                    (u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
                                       (((int) (u * rec_z)) & 63)] << 32;
                u += dudx;
                v += dvdx;
                z += dzdx;
                rec_z = 1.0 / z;
                destlong |=
                    (u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
                                       (((int) (u * rec_z)) & 63)] << 40;
                u += dudx;
                v += dvdx;
                z += dzdx;
                rec_z = 1.0 / z;
                destlong |=
                    (u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
                                       (((int) (u * rec_z)) & 63)] << 48;
                u += dudx;
                v += dvdx;
                z += dzdx;
                rec_z = 1.0 / z;
                destlong |=
                    (u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
                                       (((int) (u * rec_z)) & 63)] << 56;
                u += dudx;
                v += dvdx;
                z += dzdx;
                rec_z = 1.0 / z;

                *((u_int64_t *) dest) = destlong;
                dest += 8;
                x -= 8;
                j -= 8;
                index +=8;
                if (index+8 >= SWIDTH*SHEIGHT) return;
            }
        }
        while (x-- > 0) {
            if (++index >= SWIDTH*SHEIGHT) return;
            *dest++ =
                (u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
                                   (((int) (u * rec_z)) & 63)];
            u += dudx;
            v += dvdx;
            z += dzdx;
            rec_z = 1.0 / z;
        }
    } else {
        x = fx_xright - fx_xleft + 1;

        if (x >= 8) {
            if ((j = (size_t) dest & 7) != 0) {
                j = 8 - j;

                while (j > 0) {
                    if (++index >= SWIDTH*SHEIGHT) return;
                    c =
                        (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
                                      (((int) (u * rec_z)) & 63)];
                    if (c != 255)
                        *dest = c;
                    dest++;
                    u += dudx;
                    v += dvdx;
                    z += dzdx;
                    rec_z = 1.0 / z;
                    x--;
                    j--;
                }
            }

            j = x;
            while (j >= 8) {
                destlong = *((u_int64_t *) dest);
                c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
                                  (((int) (u * rec_z)) & 63)];
                if (c != 255) {
                    destlong &= ~(u_int64_t)0xFF;
                    destlong |= (u_int64_t) c;
                }
                u += dudx;
                v += dvdx;
                z += dzdx;
                rec_z = 1.0 / z;
                c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
                                  (((int) (u * rec_z)) & 63)];
                if (c != 255) {
                    destlong &= ~((u_int64_t)0xFF << 8);
                    destlong |= (u_int64_t) c << 8;
                }
                u += dudx;
                v += dvdx;
                z += dzdx;
                rec_z = 1.0 / z;
                c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
                                  (((int) (u * rec_z)) & 63)];
                if (c != 255) {
                    destlong &= ~((u_int64_t)0xFF << 16);
                    destlong |= (u_int64_t) c << 16;
                }
                u += dudx;
                v += dvdx;
                z += dzdx;
                rec_z = 1.0 / z;
                c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
                                  (((int) (u * rec_z)) & 63)];
                if (c != 255) {
                    destlong &= ~((u_int64_t)0xFF << 24);
                    destlong |= (u_int64_t) c << 24;
                }
                u += dudx;
                v += dvdx;
                z += dzdx;
                rec_z = 1.0 / z;
                c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
                                  (((int) (u * rec_z)) & 63)];
                if (c != 255) {
                    destlong &= ~((u_int64_t)0xFF << 32);
                    destlong |= (u_int64_t) c << 32;
                }
                u += dudx;
                v += dvdx;
                z += dzdx;
                rec_z = 1.0 / z;
                c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
                                  (((int) (u * rec_z)) & 63)];
                if (c != 255) {
                    destlong &= ~((u_int64_t)0xFF << 40);
                    destlong |= (u_int64_t) c << 40;
                }
                u += dudx;
                v += dvdx;
                z += dzdx;
                rec_z = 1.0 / z;
                c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
                                  (((int) (u * rec_z)) & 63)];
                if (c != 255) {
                    destlong &= ~((u_int64_t)0xFF << 48);
                    destlong |= (u_int64_t) c << 48;
                }
                u += dudx;
                v += dvdx;
                z += dzdx;
                rec_z = 1.0 / z;
                c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
                                  (((int) (u * rec_z)) & 63)];
                if (c != 255) {
                    destlong &= ~((u_int64_t)0xFF << 56);
                    destlong |= (u_int64_t) c << 56;
                }
                u += dudx;
                v += dvdx;
                z += dzdx;
                rec_z = 1.0 / z;

                *((u_int64_t *) dest) = destlong;
                dest += 8;
                x -= 8;
                j -= 8;
                index += 8;
                if (index+8 >= SWIDTH*SHEIGHT) return;
            }
        }
        while (x-- > 0) {
            if (++index >= SWIDTH*SHEIGHT) return;
            c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
                              (((int) (u * rec_z)) & 63)];
            if (c != 255)
                *dest = c;
            dest++;
            u += dudx;
            v += dvdx;
            z += dzdx;
            rec_z = 1.0 / z;
        }
    }
}
コード例 #3
0
ファイル: scanline.c プロジェクト: btb/d1x
// This texture mapper uses floating point extensively and writes 8 pixels at once, so it likely works
// best on 64 bit RISC processors.
// WARNING: it is not endian clean. For big endian, reverse the shift counts in the unrolled loops. I
// have no means to test that, so I didn't try it. Please tell me if you get this to work on a big
// endian machine.
// If you're using an Alpha, use the Compaq compiler for this file for quite some fps more.
// Unfortunately, it won't compile the whole source, so simply compile everything, change the
// compiler to ccc, remove scanline.o and compile again.
// Please send comments/suggestions to [email protected].
void c_fp_tmap_scanline_per()
{
	ubyte	       *dest;
	ubyte		c;
	int		x;
	double		u, v, z, dudx, dvdx, dzdx, rec_z;
	double		ubyz, vbyz, ubyz0, vbyz0, ubyz8, vbyz8, du1, dv1;
	double		dudx8, dvdx8, dzdx8;
	fix		l, dldx;
	u_int64_t	destlong;//, destmask;

	// give dumb compilers a chance to put these global pointers into registers or at least have
	// nicer names :)
	ubyte	       *texmap = pixptr, *fadetable = gr_fade_table;

#ifdef CYCLECOUNT
	unsigned long	start, stop, time;
	static unsigned long sum, count;
#endif

	// v is pre-scaled by 64 to avoid the multiplication when accessing the 64x64 texture array
	u = f2db(fx_u);
	v = f2db(fx_v) * 64.0;
	z = f2db(fx_z);
	l = fx_l >> 8;

	dudx = f2db(fx_du_dx);
	dvdx = f2db(fx_dv_dx) * 64.0;
	dzdx = f2db(fx_dz_dx);
	dldx = fx_dl_dx >> 8;

	dudx8 = dudx * 8.0;
	dvdx8 = dvdx * 8.0;
	dzdx8 = dzdx * 8.0;

	rec_z = 1.0 / z;	// multiplication is often faster than division

	dest = (ubyte *) (write_buffer + fx_xleft + (bytes_per_row * fx_y));
	x = fx_xright - fx_xleft + 1;

	if (!Transparency_on) {
		if (x >= 8) {
			// draw till we are on a 8-byte aligned address
			for ( ; (size_t) dest & 7; --x) {
				*dest++ = fadetable[(l & 0x7f00) +
						    (uint) texmap[(((int) (v * rec_z)) & (64 * 63)) +
								  (((int) (u * rec_z)) & 63)]];
				l += dldx;
				u += dudx;
				v += dvdx;
				z += dzdx;
				rec_z = 1.0 / z;
			}

			// Now draw 8 pixels at once, interpolating 1/z linearly. Artifacts of the
			// interpolation aren't really noticeable; many games even interpolate over 16
			// pixels.

			// We do these calculations once before and then at the end of the loop instead
			// of simply at the start of the loop, because he scheduler can then interleave
			// them with the texture accesses. Silly, but gains a few fps.
			ubyz0 = u * rec_z;
			vbyz0 = v * rec_z;

			u += dudx8;
			v += dvdx8;
			z += dzdx8;

			rec_z = 1.0 / z;

			ubyz8 = u * rec_z;
			vbyz8 = v * rec_z;

			du1 = (ubyz8 - ubyz0) / 8.0;
			dv1 = (vbyz8 - vbyz0) / 8.0;
			ubyz = ubyz0;
			vbyz = vbyz0;

			// This loop is the "hot spot" of the game; it takes about 70% of the time. The
			// major weak point are the many integer casts, which have to go through memory
			// on processors < 21264. But when using integers, one needs to compensate for
			// inexactness, and the code ends up being not really faster.
			for ( ; x >= 8; x -= 8) {
#ifdef CYCLECOUNT
				start = virtcc();
#endif
				destlong =
					(u_int64_t) fadetable[(l & 0x7f00) +
							     (uint) texmap[(((int) vbyz) & (64 * 63)) +
									  (((int) ubyz) & 63)]];
				l += dldx;
				ubyz += du1;
				vbyz += dv1;

				destlong |=
					(u_int64_t) fadetable[(l & 0x7f00) +
							     (uint) texmap[(((int) vbyz) & (64 * 63)) +
									  (((int) ubyz) & 63)]] << 8;
				l += dldx;
				ubyz += du1;
				vbyz += dv1;

				destlong |=
					(u_int64_t) fadetable[(l & 0x7f00) +
							     (uint) texmap[(((int) vbyz) & (64 * 63)) +
									  (((int) ubyz) & 63)]] << 16;
				l += dldx;
				ubyz += du1;
				vbyz += dv1;

				destlong |=
					(u_int64_t) fadetable[(l & 0x7f00) +
							     (uint) texmap[(((int) vbyz) & (64 * 63)) +
									  (((int) ubyz) & 63)]] << 24;
				l += dldx;
				ubyz += du1;
				vbyz += dv1;

				destlong |=
					(u_int64_t) fadetable[(l & 0x7f00) +
							     (uint) texmap[(((int) vbyz) & (64 * 63)) +
									  (((int) ubyz) & 63)]] << 32;
				l += dldx;
				ubyz += du1;
				vbyz += dv1;

				destlong |=
					(u_int64_t) fadetable[(l & 0x7f00) +
							     (uint) texmap[(((int) vbyz) & (64 * 63)) +
									  (((int) ubyz) & 63)]] << 40;
				l += dldx;
				ubyz += du1;
				vbyz += dv1;

				destlong |=
					(u_int64_t) fadetable[(l & 0x7f00) +
							     (uint) texmap[(((int) vbyz) & (64 * 63)) +
									  (((int) ubyz) & 63)]] << 48;
				l += dldx;
				ubyz += du1;
				vbyz += dv1;

				destlong |=
					(u_int64_t) fadetable[(l & 0x7f00) +
							     (uint) texmap[(((int) vbyz) & (64 * 63)) +
									  (((int) ubyz) & 63)]] << 56;
				l += dldx;

				ubyz0 = ubyz8;
				vbyz0 = vbyz8;

				u += dudx8;
				v += dvdx8;
				z += dzdx8;

				rec_z = 1.0 / z;

				ubyz8 = u * rec_z;
				vbyz8 = v * rec_z;

				du1 = (ubyz8 - ubyz0) / 8.0;
				dv1 = (vbyz8 - vbyz0) / 8.0;
				ubyz = ubyz0;
				vbyz = vbyz0;

				*((u_int64_t *) dest) = destlong;
				dest += 8;
#ifdef CYCLECOUNT
				stop = virtcc();
#endif
			}
			// compensate for being calculated once too often
			u -= dudx8;
			v -= dvdx8;
			z -= dzdx8;
#ifdef CYCLECOUNT
			time = stop - start;
			if (time > 10 && time < 900) {
				sum += time;
				++count;
				if (count % 10000 == 1)
					printf("%f %d\n", (double) sum / (double) count, time);
			}
#endif
		}

		// Draw the last few (<8) pixels.
		rec_z = 1.0 / z;
		for ( ; x > 0; x--) {
			*dest++ =
				fadetable[(l & 0x7f00) +
					 (uint) texmap[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]];
			l += dldx;
			u += dudx;
			v += dvdx;
			z += dzdx;
			rec_z = 1.0 / z;
		}
	} else {		// Transparency_on
		if (x >= 8) {
			for ( ; (size_t) dest & 7; --x) {
				c = (uint) texmap[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
				if (c != TRANSPARENCY_COLOR)
					*dest = fadetable[(l & 0x7f00) + c];
				dest++;
				l += dldx;
				u += dudx;
				v += dvdx;
				z += dzdx;
				rec_z = 1.0 / z;
			}

			ubyz0 = u * rec_z;
			vbyz0 = v * rec_z;

			u += dudx8;
			v += dvdx8;
			z += dzdx8;
			rec_z = 1.0 / z;
			ubyz8 = u * rec_z;
			vbyz8 = v * rec_z;
			du1 = (ubyz8 - ubyz0) / 8.0;
			dv1 = (vbyz8 - vbyz0) / 8.0;
			ubyz = ubyz0;
			vbyz = vbyz0;
			for ( ; x >= 8; x -= 8) {
				destlong = *((u_int64_t *) dest);

				c = texmap[(((int) vbyz) & (64 * 63)) + (((int) ubyz) & 63)];
				if (c != TRANSPARENCY_COLOR) {
					destlong &= ~((u_int64_t) 0xFF);
					destlong |= (u_int64_t) fadetable[(l & 0x7f00) + c];
				}
				l += dldx;
				ubyz += du1;
				vbyz += dv1;

				c = texmap[(((int) vbyz) & (64 * 63)) + (((int) ubyz) & 63)];
				if (c != TRANSPARENCY_COLOR) {
					destlong &= ~((u_int64_t) 0xFF << 8);
					destlong |= (u_int64_t) fadetable[(l & 0x7f00) + c] << 8;
				}
				l += dldx;
				ubyz += du1;
				vbyz += dv1;

				c = texmap[(((int) vbyz) & (64 * 63)) + (((int) ubyz) & 63)];
				if (c != TRANSPARENCY_COLOR) {
					destlong &= ~((u_int64_t) 0xFF << 16);
					destlong |= (u_int64_t) fadetable[(l & 0x7f00) + c] << 16;
				}
				l += dldx;
				ubyz += du1;
				vbyz += dv1;

				c = texmap[(((int) vbyz) & (64 * 63)) + (((int) ubyz) & 63)];
				if (c != TRANSPARENCY_COLOR) {
					destlong &= ~((u_int64_t) 0xFF << 24);
					destlong |= (u_int64_t) fadetable[(l & 0x7f00) + c] << 24;
				}
				l += dldx;
				ubyz += du1;
				vbyz += dv1;

				c = texmap[(((int) vbyz) & (64 * 63)) + (((int) ubyz) & 63)];
				if (c != TRANSPARENCY_COLOR) {
					destlong &= ~((u_int64_t) 0xFF << 32);
					destlong |= (u_int64_t) fadetable[(l & 0x7f00) + c] << 32;
				}
				l += dldx;
				ubyz += du1;
				vbyz += dv1;

				c = texmap[(((int) vbyz) & (64 * 63)) + (((int) ubyz) & 63)];
				if (c != TRANSPARENCY_COLOR) {
					destlong &= ~((u_int64_t) 0xFF << 40);
					destlong |= (u_int64_t) fadetable[(l & 0x7f00) + c] << 40;
				}
				l += dldx;
				ubyz += du1;
				vbyz += dv1;

				c = texmap[(((int) vbyz) & (64 * 63)) + (((int) ubyz) & 63)];
				if (c != TRANSPARENCY_COLOR) {
					destlong &= ~((u_int64_t) 0xFF << 48);
					destlong |= (u_int64_t) fadetable[(l & 0x7f00) + c] << 48;
				}
				l += dldx;
				ubyz += du1;
				vbyz += dv1;

				c = texmap[(((int) vbyz) & (64 * 63)) + (((int) ubyz) & 63)];
				if (c != TRANSPARENCY_COLOR) {
					destlong &= ~((u_int64_t) 0xFF << 56);
					destlong |= (u_int64_t) fadetable[(l & 0x7f00) + c] << 56;
				}
				l += dldx;

				*((u_int64_t *) dest) = destlong;
				dest += 8;

				ubyz0 = ubyz8;
				vbyz0 = vbyz8;

				u += dudx8;
				v += dvdx8;
				z += dzdx8;
				rec_z = 1.0 / z;
				ubyz8 = u * rec_z;
				vbyz8 = v * rec_z;
				du1 = (ubyz8 - ubyz0) / 8.0;
				dv1 = (vbyz8 - vbyz0) / 8.0;
				ubyz = ubyz0;
				vbyz = vbyz0;

			}
			u -= dudx8;
			v -= dvdx8;
			z -= dzdx8;
		}
		rec_z = 1.0 / z;
		for ( ; x > 0; x--) {
			c = (uint) texmap[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
			if (c != TRANSPARENCY_COLOR)
				*dest = fadetable[(l & 0x7f00) + c];
			dest++;
			l += dldx;
			u += dudx;
			v += dvdx;
			z += dzdx;
			rec_z = 1.0 / z;
		}
	}
}
コード例 #4
0
ファイル: lightmap.c プロジェクト: paud/d2x-xl
void ComputeLightMaps (int segNum)
{
	tSegment		*segP; 
	tSide			*sideP; 
	tLightMap	*lmapP; 
	int			sideNum, lastSeg, mapNum; 
	short			sideVerts [4]; 

#if 1
#	define		Xs 8
#	define		Ys 8
#else
	int			Xs = 8, Ys = 8; 
#endif
	int			x, y, xy; 
	int			v0, v1, v2, v3; 
	GLfloat		*pTexColor;// = {0.0, 0.0, 0.0, 1.0}; 
	GLfloat		texColor [Xs][Ys][4];

#if 1
#	define		pixelOffset 0.0
#else
	double		pixelOffset = 0; //0.5
#endif
	int			l, s, nMethod, sideRad; 
	GLfloat		tempBright = 0; 
	vmsVector	OffsetU, OffsetV, pixelPos [Xs][Ys], *pPixelPos, rayVec, faceNorm, sidePos; 
	double		brightPrct, pixelDist; 
	double		delta; 
	double		f_offset [8] = {
						0.0 / (Xs - 1), 1.0 / (Xs - 1), 2.0 / (Xs - 1), 3.0 / (Xs - 1),
						4.0 / (Xs - 1), 5.0 / (Xs - 1), 6.0 / (Xs - 1), 7.0 / (Xs - 1)
						};
#if LMAP_REND2TEX
	ubyte			brightMap [512];
	ubyte			lightMap [512*3];
	tUVL			lMapUVL [4];
	fix			nDist, nMinDist;
	GLuint		lightMapId;
	int			bStart;
#endif

if (segNum <= 0) {
	DestroyLightMaps ();
	if (!InitLightData ())
		return;
#if LMAP_REND2TEX
	InitBrightMap (brightMap);
	memset (&lMapUVL, 0, sizeof (lMapUVL));
#endif
	}
INIT_PROGRESS_LOOP (segNum, lastSeg, gameData.segs.nSegments);
//Next Go through each surface and create a lightmap for it.
for (mapNum = 6 * segNum, segP = gameData.segs.segments + segNum; 
	  segNum < lastSeg; 
	  segNum++, segP++) {
	for (sideNum = 0, sideP = segP->sides; sideNum < 6; sideNum++, mapNum++, sideP++) {
#if TEXTURE_CHECK
		if ((segP->children [sideNum] >= 0) && !IS_WALL (WallNumS (sideP)))
			continue; 	//skip open sides
#endif			
		GetSideVerts (sideVerts, segNum, sideNum); 
#if LMAP_REND2TEX
		OglCreateFBuffer (&lightMaps [mapNum].fbuffer, 64, 64);
		OglEnableFBuffer (&lightMaps [mapNum].fbuffer);
#else
		lightMaps [mapNum].handle = EmptyTexture (Xs, Ys); 
		OGL_BINDTEX (lightMaps [mapNum].handle); 
		glTexParameteri (GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP);
		glTexParameteri (GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP);

		nMethod = (sideP->nType == SIDE_IS_QUAD) || (sideP->nType == SIDE_IS_TRI_02);
		pPixelPos = &pixelPos [0][0];
		for (x = 0; x < Xs; x++) {
			for (y = 0; y < Ys; y++, pPixelPos++) {
				if (nMethod) {
					v0 = sideVerts [0]; 
					v2 = sideVerts [2]; 
					if (x >= y)	{
						v1 = sideVerts [1]; 
						//Next calculate this pixel's place in the world (tricky stuff)
						FindOffset (&OffsetU, gameData.segs.vertices [v0], gameData.segs.vertices [v1], f_offset [x]); //(((double) x) + pixelOffset) / (Xs - 1)); //took me forever to figure out this should be an inverse thingy
						FindOffset (&OffsetV, gameData.segs.vertices [v1], gameData.segs.vertices [v2], f_offset [y]); //(((double) y) + pixelOffset) / (Ys - 1)); 
						VmVecAdd (pPixelPos, &OffsetU, &OffsetV); 
						VmVecInc (pPixelPos, gameData.segs.vertices + v0);  //This should be the real world position of the pixel.
						//Find Normal
						VmVecNormal (&faceNorm, gameData.segs.vertices + v0, gameData.segs.vertices + v2, gameData.segs.vertices + v1); 
						}
					else {
						//Next calculate this pixel's place in the world (tricky stuff)
						v3 = sideVerts [3]; 
						FindOffset (&OffsetV, gameData.segs.vertices [v0], gameData.segs.vertices [v3], f_offset [y]); //(((double) y) + pixelOffset) / (Xs - 1)); //Notice y/x and OffsetU/OffsetV are swapped from above
						FindOffset (&OffsetU, gameData.segs.vertices [v3], gameData.segs.vertices [v2], f_offset [x]); //(((double) x) + pixelOffset) / (Ys - 1)); 
						VmVecAdd (pPixelPos, &OffsetU, &OffsetV); 
						VmVecInc (pPixelPos, gameData.segs.vertices + v0);  //This should be the real world position of the pixel.
						VmVecNormal (&faceNorm, gameData.segs.vertices + v0, gameData.segs.vertices + v3, gameData.segs.vertices + v2); 
						}
					}
				else {//SIDE_IS_TRI_02
					v1 = sideVerts [1]; 
					v3 = sideVerts [3]; 
					if (Xs - x >= y) {
						v0 = sideVerts [0]; 
						FindOffset (&OffsetU, gameData.segs.vertices [v0], gameData.segs.vertices [v1], f_offset [x]); //(((double) x) + pixelOffset) / (Xs - 1)); 
						FindOffset (&OffsetV, gameData.segs.vertices [v0], gameData.segs.vertices [v3], f_offset [y]); //(((double) y) + pixelOffset) / (Xs - 1)); 
						VmVecAdd (pPixelPos, &OffsetU, &OffsetV); 
						VmVecInc (pPixelPos, gameData.segs.vertices + v0);  //This should be the real world position of the pixel.
						}
					else {
						v2 = sideVerts [2]; 
						//Not certain this is correct, may need to subtract something
						FindOffset (&OffsetV, gameData.segs.vertices [v2], gameData.segs.vertices [v1], f_offset [Xs - 1 - y]); //((double) ((Xs - 1) - y) + pixelOffset) / (Xs - 1)); 
						FindOffset (&OffsetU, gameData.segs.vertices [v2], gameData.segs.vertices [v3], f_offset [Xs - 1 - x]); //((double) ((Xs - 1) - x) + pixelOffset) / (Xs - 1)); 
						VmVecAdd (pPixelPos, &OffsetU, &OffsetV); 
						VmVecInc (pPixelPos, gameData.segs.vertices + v2);  //This should be the real world position of the pixel.
						}
					}
				}
			}
#endif
		//Calculate LightVal
		//Next iterate through all the lights and add the light to the pixel every iteration.
		sideRad = (int) (SideRad (segNum, sideNum) + 0.5);
		VmVecAvg4 (
			&sidePos, 
			&pixelPos [0][0],
			&pixelPos [Xs-1][0],
			&pixelPos [Xs-1][Ys-1],
			&pixelPos [0][Ys-1]);
#if 1
		pTexColor = texColor [0][0] + 3;
		memset (texColor, 0, sizeof (texColor));
		for (xy = Xs * Ys; xy; xy--, pTexColor += 4)
			*pTexColor = 1;
#else
		pTexColor = texColor [0][0];
		for (x = 0; x < Xs; x++) {
			for (y = 0; y < Ys; y++, pTexColor += 4) {
				pTexColor [0] = 
				pTexColor [1] = 
				pTexColor [2] = 0; 
				pTexColor [3] = 1; 
				}
			}
#endif
#if LMAP_REND2TEX
		bStart = 1;
#endif
		for (l = 0, lmapP = lightData; l < numLightMaps; l++, lmapP++) {
#if LMAP_REND2TEX
			nMinDist = 0x7FFFFFFF;
			// get the distances of all 4 tSide corners to the light source center 
			// scaled by the light source range
			for (i = 0; i < 4; i++) {
				int svi = sideVerts [i];
				sidePos.x = gameData.segs.vertices [svi].x;
				sidePos.y = gameData.segs.vertices [svi].y;
				sidePos.z = gameData.segs.vertices [svi].z;
				nDist = f2i (VmVecDist (&sidePos, &lmapP->pos));	// calc distance
				if (nMinDist > nDist)
					nMinDist = nDist;
				lMapUVL [i].u = F1_0 * (double) nDist / (double) lmapP->range;	// scale distance
				}
			if ((lmapP->color [0] + lmapP->color [1] + lmapP->color [2] < 3) &&
				(nMinDist < lmapP->range + sideRad)) {
				// create and initialize an OpenGL texture for the lightmap
				InitLightMap (lightMap, brightMap, lmapP->color);
				glGenTextures (1, &lightMapId); 
				glTexImage1D (GL_TEXTURE_1D, 0, GL_RGB, 512, 1, GL_RGB, GL_UNSIGNED_BYTE, lightMap);
				OglActiveTexture (GL_TEXTURE0_ARB);
				glEnable (GL_TEXTURE_1D);
				glEnable (GL_BLEND);
				glBlendFunc (GL_ONE, bStart ? GL_ZERO : GL_ONE);
				// If processing first light, set the lightmap, else modify it
				glTexEnvi (GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, bStart ? GL_REPLACE : GL_ADD);
				glTexEnvi (GL_TEXTURE_ENV, GL_TEXTURE_ENV_COLOR, GL_RGBA);
				glBindTexture (GL_TEXTURE_1D, lightMapId); 
				// extend the lightmap to the texture edges
				glTexParameteri (GL_TEXTURE_1D, GL_TEXTURE_WRAP_S, GL_CLAMP);
				glTexParameteri (GL_TEXTURE_1D, GL_TEXTURE_WRAP_T, GL_CLAMP);
				glBegin (GL_QUADS);
				glColor4f (1.0f, 1.0f, 1.0f, 1.0f);
				for (i = 0; i < 4; i++) {
					glMultiTexCoord2f (GL_TEXTURE0_ARB, f2fl (lMapUVL [i].u), f2fl (lMapUVL [i].v));
					glVertex3f (f2fl (gameData.segs.vertices [sideVerts [i]].x), 
									f2fl (gameData.segs.vertices [sideVerts [i]].y), 
								   f2fl (gameData.segs.vertices [sideVerts [i]].z));
					}
				glEnd ();
				glDisable (GL_BLEND);
				glDisable (GL_TEXTURE_1D);
				glDeleteTextures (1, &lightMapId);
				bStart = 0;
				}
#else
			if (f2i (VmVecDist (&sidePos, &lmapP->pos)) < lmapP->range + sideRad) {
				pPixelPos = &pixelPos [0][0];
				pTexColor = texColor [0][0];
#if 1
				for (xy = Xs * Ys; xy; xy--, pPixelPos++, pTexColor += 4) { 
#else
				for (x = 0; x < Xs; x++)
					for (y = 0; y < Ys; y++, pPixelPos++, pTexColor += 4) {
#endif
						//Find angle to this light.
						pixelDist = f2i (VmVecDist (pPixelPos, &lmapP->pos)); 
						if (pixelDist >= lmapP->range)
							continue;
						VmVecSub (&rayVec, &lmapP->pos, pPixelPos); 
						delta = f2db (VmVecDeltaAng (&lmapP->dir, &rayVec, NULL)); 
						if (delta < 0)
							delta = -delta; 
						brightPrct = 1 - (pixelDist / lmapP->range); 
						brightPrct *= brightPrct; //square result
						if (delta < 0.245)
							brightPrct /= 4; 
						pTexColor [0] += (GLfloat) (brightPrct * lmapP->color [0]); 
						pTexColor [1] += (GLfloat) (brightPrct * lmapP->color [1]); 
						pTexColor [2] += (GLfloat) (brightPrct * lmapP->color [2]); 
						}
				}
#endif
			}
#if LMAP_REND2TEX
		lightMaps [mapNum].handle = lightMaps [mapNum].fbuffer.texId;
		lightMaps [mapNum].fbuffer.texId = 0;
		OglDestroyFBuffer (&lightMaps [mapNum].fbuffer);
#else
		pPixelPos = &pixelPos [0][0];
		pTexColor = texColor [0][0];
		for (x = 0; x < Xs; x++)
			for (y = 0; y < Ys; y++, pPixelPos++, pTexColor += 4) {
				tempBright = 0;
				for (s = 0; s < 3; s++)
					if (pTexColor [s] > tempBright)
						tempBright = pTexColor [s]; 
				if (tempBright > 1.0)
					for (s = 0; s < 3; s++)
						pTexColor [s] /= tempBright; 
				glTexSubImage2D (GL_TEXTURE_2D, 0, x, y, 1, 1, GL_RGBA, GL_FLOAT, pTexColor); 
				}
#endif
		}
	}
}

//------------------------------------------------------------------------------

int HaveLightMaps (void)
{
return (lightData != NULL);
}

//------------------------------------------------------------------------------

static int segNum = 0;

static void CreateLightMapsPoll (int nItems, tMenuItem *m, int *key, int cItem)
{
GrPaletteStepLoad (NULL);
if (segNum < gameData.segs.nSegments) {
	ComputeLightMaps (segNum);
	segNum += PROGRESS_INCR;
	}
else {
	*key = -2;
	GrPaletteStepLoad (NULL);
	return;
	}
m [0].value++;
m [0].rebuild = 1;
*key = 0;
GrPaletteStepLoad (NULL);
return;
}
コード例 #5
0
ファイル: scanline.c プロジェクト: btb/d1x
// Used for energy centers. See comments for c_tmap_scanline_per().
void c_fp_tmap_scanline_per_nolight()
{
	ubyte          *dest;
	ubyte           c;
	int             x;
	double          u, v, z, dudx, dvdx, dzdx, rec_z;
	double          ubyz, vbyz, ubyz0, vbyz0, ubyz8, vbyz8, du1, dv1;
	double          dudx8, dvdx8, dzdx8;
	u_int64_t       destlong;//, destmask;

	ubyte          *texmap = pixptr;//, *fadetable = gr_fade_table;

	u = f2db(fx_u);
	v = f2db(fx_v) * 64.0;
	z = f2db(fx_z);

	dudx = f2db(fx_du_dx);
	dvdx = f2db(fx_dv_dx) * 64.0;
	dzdx = f2db(fx_dz_dx);

	dudx8 = dudx * 8.0;
	dvdx8 = dvdx * 8.0;
	dzdx8 = dzdx * 8.0;

	rec_z = 1.0 / z;

	dest = (ubyte *) (write_buffer + fx_xleft + (bytes_per_row * fx_y));
	x = fx_xright - fx_xleft + 1;

	if (!Transparency_on) { // I'm not sure this is ever used (energy texture is transparent)
		if (x >= 8) {
			for ( ; (size_t) dest & 7; --x) {
				*dest++ = (uint) texmap[(((int) (v * rec_z)) & (64 * 63)) +
						       (((int) (u * rec_z)) & 63)];
				u += dudx;
				v += dvdx;
				z += dzdx;
				rec_z = 1.0 / z;
			}

			ubyz0 = u * rec_z;
			vbyz0 = v * rec_z;

			u += dudx8;
			v += dvdx8;
			z += dzdx8;

			rec_z = 1.0 / z;

			ubyz8 = u * rec_z;
			vbyz8 = v * rec_z;

			du1 = (ubyz8 - ubyz0) / 8.0;
			dv1 = (vbyz8 - vbyz0) / 8.0;
			ubyz = ubyz0;
			vbyz = vbyz0;

			for ( ; x >= 8; x -= 8) {
				destlong = (u_int64_t) texmap[(((int) vbyz) & (64 * 63)) +
							     (((int) ubyz) & 63)];
				ubyz += du1;
				vbyz += dv1;

				destlong |= (u_int64_t) texmap[(((int) vbyz) & (64 * 63)) +
							      (((int) ubyz) & 63)] << 8;
				ubyz += du1;
				vbyz += dv1;

				destlong |= (u_int64_t) texmap[(((int) vbyz) & (64 * 63)) +
							      (((int) ubyz) & 63)] << 16;
				ubyz += du1;
				vbyz += dv1;

				destlong |= (u_int64_t) texmap[(((int) vbyz) & (64 * 63)) +
							      (((int) ubyz) & 63)] << 24;
				ubyz += du1;
				vbyz += dv1;

				destlong |= (u_int64_t) texmap[(((int) vbyz) & (64 * 63)) +
							      (((int) ubyz) & 63)] << 32;
				ubyz += du1;
				vbyz += dv1;

				destlong |= (u_int64_t) texmap[(((int) vbyz) & (64 * 63)) +
							      (((int) ubyz) & 63)] << 40;
				ubyz += du1;
				vbyz += dv1;

				destlong |= (u_int64_t) texmap[(((int) vbyz) & (64 * 63)) +
							      (((int) ubyz) & 63)] << 48;
				ubyz += du1;
				vbyz += dv1;

				destlong |= (u_int64_t) texmap[(((int) vbyz) & (64 * 63)) +
							      (((int) ubyz) & 63)] << 56;

				ubyz0 = ubyz8;
				vbyz0 = vbyz8;

				u += dudx8;
				v += dvdx8;
				z += dzdx8;

				rec_z = 1.0 / z;

				ubyz8 = u * rec_z;
				vbyz8 = v * rec_z;

				du1 = (ubyz8 - ubyz0) / 8.0;
				dv1 = (vbyz8 - vbyz0) / 8.0;
				ubyz = ubyz0;
				vbyz = vbyz0;

				*((u_int64_t *) dest) = destlong;
				dest += 8;
			}
			u -= dudx8;
			v -= dvdx8;
			z -= dzdx8;
		}

		rec_z = 1.0 / z;
		for ( ; x > 0; x--) {
			*dest++ = (uint) texmap[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
			u += dudx;
			v += dvdx;
			z += dzdx;
			rec_z = 1.0 / z;
		}
	} else {		// Transparency_on
		if (x >= 8) {
			for ( ; (size_t) dest & 7; --x) {
				c = (uint) texmap[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
				if (c != TRANSPARENCY_COLOR)
					*dest = c;
				dest++;
				u += dudx;
				v += dvdx;
				z += dzdx;
				rec_z = 1.0 / z;
			}

			ubyz0 = u * rec_z;
			vbyz0 = v * rec_z;

			u += dudx8;
			v += dvdx8;
			z += dzdx8;
			rec_z = 1.0 / z;
			ubyz8 = u * rec_z;
			vbyz8 = v * rec_z;
			du1 = (ubyz8 - ubyz0) / 8.0;
			dv1 = (vbyz8 - vbyz0) / 8.0;
			ubyz = ubyz0;
			vbyz = vbyz0;
			for ( ; x >= 8; x -= 8) {
				destlong = *((u_int64_t *) dest);

				c = texmap[(((int) vbyz) & (64 * 63)) + (((int) ubyz) & 63)];
				if (c != TRANSPARENCY_COLOR) {
					destlong &= ~((u_int64_t) 0xFF);
					destlong |= (u_int64_t) c;
				}
				ubyz += du1;
				vbyz += dv1;

				c = texmap[(((int) vbyz) & (64 * 63)) + (((int) ubyz) & 63)];
				if (c != TRANSPARENCY_COLOR) {
					destlong &= ~((u_int64_t) 0xFF << 8);
					destlong |= (u_int64_t) c << 8;
				}
				ubyz += du1;
				vbyz += dv1;

				c = texmap[(((int) vbyz) & (64 * 63)) + (((int) ubyz) & 63)];
				if (c != TRANSPARENCY_COLOR) {
					destlong &= ~((u_int64_t) 0xFF << 16);
					destlong |= (u_int64_t) c << 16;
				}
				ubyz += du1;
				vbyz += dv1;

				c = texmap[(((int) vbyz) & (64 * 63)) + (((int) ubyz) & 63)];
				if (c != TRANSPARENCY_COLOR) {
					destlong &= ~((u_int64_t) 0xFF << 24);
					destlong |= (u_int64_t) c << 24;
				}
				ubyz += du1;
				vbyz += dv1;

				c = texmap[(((int) vbyz) & (64 * 63)) + (((int) ubyz) & 63)];
				if (c != TRANSPARENCY_COLOR) {
					destlong &= ~((u_int64_t) 0xFF << 32);
					destlong |= (u_int64_t) c << 32;
				}
				ubyz += du1;
				vbyz += dv1;

				c = texmap[(((int) vbyz) & (64 * 63)) + (((int) ubyz) & 63)];
				if (c != TRANSPARENCY_COLOR) {
					destlong &= ~((u_int64_t) 0xFF << 40);
					destlong |= (u_int64_t) c << 40;
				}
				ubyz += du1;
				vbyz += dv1;

				c = texmap[(((int) vbyz) & (64 * 63)) + (((int) ubyz) & 63)];
				if (c != TRANSPARENCY_COLOR) {
					destlong &= ~((u_int64_t) 0xFF << 48);
					destlong |= (u_int64_t) c << 48;
				}
				ubyz += du1;
				vbyz += dv1;

				c = texmap[(((int) vbyz) & (64 * 63)) + (((int) ubyz) & 63)];
				if (c != TRANSPARENCY_COLOR) {
					destlong &= ~((u_int64_t) 0xFF << 56);
					destlong |= (u_int64_t) c << 56;
				}

				*((u_int64_t *) dest) = destlong;
				dest += 8;

				ubyz0 = ubyz8;
				vbyz0 = vbyz8;

				u += dudx8;
				v += dvdx8;
				z += dzdx8;
				rec_z = 1.0 / z;
				ubyz8 = u * rec_z;
				vbyz8 = v * rec_z;
				du1 = (ubyz8 - ubyz0) / 8.0;
				dv1 = (vbyz8 - vbyz0) / 8.0;
				ubyz = ubyz0;
				vbyz = vbyz0;

			}
			u -= dudx8;
			v -= dvdx8;
			z -= dzdx8;
		}
		rec_z = 1.0 / z;
		for ( ; x > 0; x--) {
			c = (uint) texmap[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
			if (c != TRANSPARENCY_COLOR)
				*dest = c;
			dest++;
			u += dudx;
			v += dvdx;
			z += dzdx;
			rec_z = 1.0 / z;
		}
	}
}