// This texture mapper uses floating point extensively and writes 8 pixels at once, so it likely works // best on 64 bit RISC processors. // WARNING: it is not endian clean. For big endian, reverse the shift counts in the unrolled loops. I // have no means to test that, so I didn't try it. Please tell me if you get this to work on a big // endian machine. // If you're using an Alpha, use the Compaq compiler for this file for quite some fps more. // Unfortunately, it won't compile the whole source, so simply compile everything, change the // compiler to ccc, remove scanline.o and compile again. // Please send comments/suggestions to [email protected]. void c_fp_tmap_scanline_per() { ubyte *dest; uint c; int x, j, index = fx_xleft + (bytes_per_row * fx_y); double u, v, z, l, dudx, dvdx, dzdx, dldx, rec_z; u_int64_t destlong; u = f2db(fx_u); v = f2db(fx_v) * 64.0; z = f2db(fx_z); l = f2db(fx_l); dudx = f2db(fx_du_dx); dvdx = f2db(fx_dv_dx) * 64.0; dzdx = f2db(fx_dz_dx); dldx = f2db(fx_dl_dx); rec_z = 1.0 / z; // gcc 2.95.2 is won't do this optimization itself dest = (ubyte *) (write_buffer + fx_xleft + (bytes_per_row * fx_y)); x = fx_xright - fx_xleft + 1; if (!Transparency_on) { if (x >= 8) { if ((j = (size_t) dest & 7) != 0) { j = 8 - j; while (j > 0) { if (++index >= SWIDTH*SHEIGHT) return; *dest++ = gr_fade_table[((int) fabs(l)) * 256 + (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]]; l += dldx; u += dudx; v += dvdx; z += dzdx; rec_z = 1.0 / z; x--; j--; } } j = x; while (j >= 8) { destlong = (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]]; l += dldx; u += dudx; v += dvdx; z += dzdx; rec_z = 1.0 / z; destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]] << 8; l += dldx; u += dudx; v += dvdx; z += dzdx; rec_z = 1.0 / z; destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]] << 16; l += dldx; u += dudx; v += dvdx; z += dzdx; rec_z = 1.0 / z; destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]] << 24; l += dldx; u += dudx; v += dvdx; z += dzdx; rec_z = 1.0 / z; destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]] << 32; l += dldx; u += dudx; v += dvdx; z += dzdx; rec_z = 1.0 / z; destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]] << 40; l += dldx; u += dudx; v += dvdx; z += dzdx; rec_z = 1.0 / z; destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]] << 48; l += dldx; u += dudx; v += dvdx; z += dzdx; rec_z = 1.0 / z; destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]] << 56; l += dldx; u += dudx; v += dvdx; z += dzdx; rec_z = 1.0 / z; *((u_int64_t *) dest) = destlong; dest += 8; x -= 8; j -= 8; index += 8; if (index+8 >= SWIDTH*SHEIGHT) return; } } while (x-- > 0) { if (++index >= SWIDTH*SHEIGHT) return; *dest++ = gr_fade_table[((int) fabs(l)) * 256 + (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]]; l += dldx; u += dudx; v += dvdx; z += dzdx; rec_z = 1.0 / z; } } else { if (x >= 8) { if ((j = (size_t) dest & 7) != 0) { j = 8 - j; while (j > 0) { if (++index >= SWIDTH*SHEIGHT) return; c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]; if (c != 255) *dest = gr_fade_table[((int) fabs(l)) * 256 + c]; dest++; l += dldx; u += dudx; v += dvdx; z += dzdx; rec_z = 1.0 / z; x--; j--; } } j = x; while (j >= 8) { destlong = *((u_int64_t *) dest); c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]; if (c != 255) { destlong &= ~(u_int64_t)0xFF; destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + c]; } l += dldx; u += dudx; v += dvdx; z += dzdx; rec_z = 1.0 / z; c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]; if (c != 255) { destlong &= ~((u_int64_t)0xFF << 8); destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + c] << 8; } l += dldx; u += dudx; v += dvdx; z += dzdx; rec_z = 1.0 / z; c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]; if (c != 255) { destlong &= ~((u_int64_t)0xFF << 16); destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + c] << 16; } l += dldx; u += dudx; v += dvdx; z += dzdx; rec_z = 1.0 / z; c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]; if (c != 255) { destlong &= ~((u_int64_t)0xFF << 24); destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + c] << 24; } l += dldx; u += dudx; v += dvdx; z += dzdx; rec_z = 1.0 / z; c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]; if (c != 255) { destlong &= ~((u_int64_t)0xFF << 32); destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + c] << 32; } l += dldx; u += dudx; v += dvdx; z += dzdx; rec_z = 1.0 / z; c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]; if (c != 255) { destlong &= ~((u_int64_t)0xFF << 40); destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + c] << 40; } l += dldx; u += dudx; v += dvdx; z += dzdx; rec_z = 1.0 / z; c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]; if (c != 255) { destlong &= ~((u_int64_t)0xFF << 48); destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + c] << 48; } l += dldx; u += dudx; v += dvdx; z += dzdx; rec_z = 1.0 / z; c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]; if (c != 255) { destlong &= ~((u_int64_t)0xFF << 56); destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + c] << 56; } l += dldx; u += dudx; v += dvdx; z += dzdx; rec_z = 1.0 / z; *((u_int64_t *) dest) = destlong; dest += 8; x -= 8; j -= 8; index += 8; if (index+8 >= SWIDTH*SHEIGHT) return; } } while (x-- > 0) { if (++index >= SWIDTH*SHEIGHT) return; c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]; if (c != 255) *dest = gr_fade_table[((int) fabs(l)) * 256 + c]; dest++; l += dldx; u += dudx; v += dvdx; z += dzdx; rec_z = 1.0 / z; } } }
// Used for energy centers. See comments for c_tmap_scanline_per(). void c_fp_tmap_scanline_per_nolight() { ubyte *dest; uint c; int x, j, index = fx_xleft + (bytes_per_row * fx_y); double u, v, z, dudx, dvdx, dzdx, rec_z; u_int64_t destlong; u = f2db(fx_u); v = f2db(fx_v) * 64.0; z = f2db(fx_z); dudx = f2db(fx_du_dx); dvdx = f2db(fx_dv_dx) * 64.0; dzdx = f2db(fx_dz_dx); rec_z = 1.0 / z; dest = (ubyte *) (write_buffer + fx_xleft + (bytes_per_row * fx_y)); x = fx_xright - fx_xleft + 1; if (!Transparency_on) { if (x >= 8) { if ((j = (size_t) dest & 7) != 0) { j = 8 - j; while (j > 0) { if (++index >= SWIDTH*SHEIGHT) return; *dest++ = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]; u += dudx; v += dvdx; z += dzdx; rec_z = 1.0 / z; x--; j--; } } while (j >= 8) { destlong = (u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]; u += dudx; v += dvdx; z += dzdx; rec_z = 1.0 / z; destlong |= (u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)] << 8; u += dudx; v += dvdx; z += dzdx; rec_z = 1.0 / z; destlong |= (u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)] << 16; u += dudx; v += dvdx; z += dzdx; rec_z = 1.0 / z; destlong |= (u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)] << 24; u += dudx; v += dvdx; z += dzdx; rec_z = 1.0 / z; destlong |= (u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)] << 32; u += dudx; v += dvdx; z += dzdx; rec_z = 1.0 / z; destlong |= (u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)] << 40; u += dudx; v += dvdx; z += dzdx; rec_z = 1.0 / z; destlong |= (u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)] << 48; u += dudx; v += dvdx; z += dzdx; rec_z = 1.0 / z; destlong |= (u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)] << 56; u += dudx; v += dvdx; z += dzdx; rec_z = 1.0 / z; *((u_int64_t *) dest) = destlong; dest += 8; x -= 8; j -= 8; index +=8; if (index+8 >= SWIDTH*SHEIGHT) return; } } while (x-- > 0) { if (++index >= SWIDTH*SHEIGHT) return; *dest++ = (u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]; u += dudx; v += dvdx; z += dzdx; rec_z = 1.0 / z; } } else { x = fx_xright - fx_xleft + 1; if (x >= 8) { if ((j = (size_t) dest & 7) != 0) { j = 8 - j; while (j > 0) { if (++index >= SWIDTH*SHEIGHT) return; c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]; if (c != 255) *dest = c; dest++; u += dudx; v += dvdx; z += dzdx; rec_z = 1.0 / z; x--; j--; } } j = x; while (j >= 8) { destlong = *((u_int64_t *) dest); c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]; if (c != 255) { destlong &= ~(u_int64_t)0xFF; destlong |= (u_int64_t) c; } u += dudx; v += dvdx; z += dzdx; rec_z = 1.0 / z; c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]; if (c != 255) { destlong &= ~((u_int64_t)0xFF << 8); destlong |= (u_int64_t) c << 8; } u += dudx; v += dvdx; z += dzdx; rec_z = 1.0 / z; c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]; if (c != 255) { destlong &= ~((u_int64_t)0xFF << 16); destlong |= (u_int64_t) c << 16; } u += dudx; v += dvdx; z += dzdx; rec_z = 1.0 / z; c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]; if (c != 255) { destlong &= ~((u_int64_t)0xFF << 24); destlong |= (u_int64_t) c << 24; } u += dudx; v += dvdx; z += dzdx; rec_z = 1.0 / z; c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]; if (c != 255) { destlong &= ~((u_int64_t)0xFF << 32); destlong |= (u_int64_t) c << 32; } u += dudx; v += dvdx; z += dzdx; rec_z = 1.0 / z; c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]; if (c != 255) { destlong &= ~((u_int64_t)0xFF << 40); destlong |= (u_int64_t) c << 40; } u += dudx; v += dvdx; z += dzdx; rec_z = 1.0 / z; c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]; if (c != 255) { destlong &= ~((u_int64_t)0xFF << 48); destlong |= (u_int64_t) c << 48; } u += dudx; v += dvdx; z += dzdx; rec_z = 1.0 / z; c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]; if (c != 255) { destlong &= ~((u_int64_t)0xFF << 56); destlong |= (u_int64_t) c << 56; } u += dudx; v += dvdx; z += dzdx; rec_z = 1.0 / z; *((u_int64_t *) dest) = destlong; dest += 8; x -= 8; j -= 8; index += 8; if (index+8 >= SWIDTH*SHEIGHT) return; } } while (x-- > 0) { if (++index >= SWIDTH*SHEIGHT) return; c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]; if (c != 255) *dest = c; dest++; u += dudx; v += dvdx; z += dzdx; rec_z = 1.0 / z; } } }
// This texture mapper uses floating point extensively and writes 8 pixels at once, so it likely works // best on 64 bit RISC processors. // WARNING: it is not endian clean. For big endian, reverse the shift counts in the unrolled loops. I // have no means to test that, so I didn't try it. Please tell me if you get this to work on a big // endian machine. // If you're using an Alpha, use the Compaq compiler for this file for quite some fps more. // Unfortunately, it won't compile the whole source, so simply compile everything, change the // compiler to ccc, remove scanline.o and compile again. // Please send comments/suggestions to [email protected]. void c_fp_tmap_scanline_per() { ubyte *dest; ubyte c; int x; double u, v, z, dudx, dvdx, dzdx, rec_z; double ubyz, vbyz, ubyz0, vbyz0, ubyz8, vbyz8, du1, dv1; double dudx8, dvdx8, dzdx8; fix l, dldx; u_int64_t destlong;//, destmask; // give dumb compilers a chance to put these global pointers into registers or at least have // nicer names :) ubyte *texmap = pixptr, *fadetable = gr_fade_table; #ifdef CYCLECOUNT unsigned long start, stop, time; static unsigned long sum, count; #endif // v is pre-scaled by 64 to avoid the multiplication when accessing the 64x64 texture array u = f2db(fx_u); v = f2db(fx_v) * 64.0; z = f2db(fx_z); l = fx_l >> 8; dudx = f2db(fx_du_dx); dvdx = f2db(fx_dv_dx) * 64.0; dzdx = f2db(fx_dz_dx); dldx = fx_dl_dx >> 8; dudx8 = dudx * 8.0; dvdx8 = dvdx * 8.0; dzdx8 = dzdx * 8.0; rec_z = 1.0 / z; // multiplication is often faster than division dest = (ubyte *) (write_buffer + fx_xleft + (bytes_per_row * fx_y)); x = fx_xright - fx_xleft + 1; if (!Transparency_on) { if (x >= 8) { // draw till we are on a 8-byte aligned address for ( ; (size_t) dest & 7; --x) { *dest++ = fadetable[(l & 0x7f00) + (uint) texmap[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]]; l += dldx; u += dudx; v += dvdx; z += dzdx; rec_z = 1.0 / z; } // Now draw 8 pixels at once, interpolating 1/z linearly. Artifacts of the // interpolation aren't really noticeable; many games even interpolate over 16 // pixels. // We do these calculations once before and then at the end of the loop instead // of simply at the start of the loop, because he scheduler can then interleave // them with the texture accesses. Silly, but gains a few fps. ubyz0 = u * rec_z; vbyz0 = v * rec_z; u += dudx8; v += dvdx8; z += dzdx8; rec_z = 1.0 / z; ubyz8 = u * rec_z; vbyz8 = v * rec_z; du1 = (ubyz8 - ubyz0) / 8.0; dv1 = (vbyz8 - vbyz0) / 8.0; ubyz = ubyz0; vbyz = vbyz0; // This loop is the "hot spot" of the game; it takes about 70% of the time. The // major weak point are the many integer casts, which have to go through memory // on processors < 21264. But when using integers, one needs to compensate for // inexactness, and the code ends up being not really faster. for ( ; x >= 8; x -= 8) { #ifdef CYCLECOUNT start = virtcc(); #endif destlong = (u_int64_t) fadetable[(l & 0x7f00) + (uint) texmap[(((int) vbyz) & (64 * 63)) + (((int) ubyz) & 63)]]; l += dldx; ubyz += du1; vbyz += dv1; destlong |= (u_int64_t) fadetable[(l & 0x7f00) + (uint) texmap[(((int) vbyz) & (64 * 63)) + (((int) ubyz) & 63)]] << 8; l += dldx; ubyz += du1; vbyz += dv1; destlong |= (u_int64_t) fadetable[(l & 0x7f00) + (uint) texmap[(((int) vbyz) & (64 * 63)) + (((int) ubyz) & 63)]] << 16; l += dldx; ubyz += du1; vbyz += dv1; destlong |= (u_int64_t) fadetable[(l & 0x7f00) + (uint) texmap[(((int) vbyz) & (64 * 63)) + (((int) ubyz) & 63)]] << 24; l += dldx; ubyz += du1; vbyz += dv1; destlong |= (u_int64_t) fadetable[(l & 0x7f00) + (uint) texmap[(((int) vbyz) & (64 * 63)) + (((int) ubyz) & 63)]] << 32; l += dldx; ubyz += du1; vbyz += dv1; destlong |= (u_int64_t) fadetable[(l & 0x7f00) + (uint) texmap[(((int) vbyz) & (64 * 63)) + (((int) ubyz) & 63)]] << 40; l += dldx; ubyz += du1; vbyz += dv1; destlong |= (u_int64_t) fadetable[(l & 0x7f00) + (uint) texmap[(((int) vbyz) & (64 * 63)) + (((int) ubyz) & 63)]] << 48; l += dldx; ubyz += du1; vbyz += dv1; destlong |= (u_int64_t) fadetable[(l & 0x7f00) + (uint) texmap[(((int) vbyz) & (64 * 63)) + (((int) ubyz) & 63)]] << 56; l += dldx; ubyz0 = ubyz8; vbyz0 = vbyz8; u += dudx8; v += dvdx8; z += dzdx8; rec_z = 1.0 / z; ubyz8 = u * rec_z; vbyz8 = v * rec_z; du1 = (ubyz8 - ubyz0) / 8.0; dv1 = (vbyz8 - vbyz0) / 8.0; ubyz = ubyz0; vbyz = vbyz0; *((u_int64_t *) dest) = destlong; dest += 8; #ifdef CYCLECOUNT stop = virtcc(); #endif } // compensate for being calculated once too often u -= dudx8; v -= dvdx8; z -= dzdx8; #ifdef CYCLECOUNT time = stop - start; if (time > 10 && time < 900) { sum += time; ++count; if (count % 10000 == 1) printf("%f %d\n", (double) sum / (double) count, time); } #endif } // Draw the last few (<8) pixels. rec_z = 1.0 / z; for ( ; x > 0; x--) { *dest++ = fadetable[(l & 0x7f00) + (uint) texmap[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]]; l += dldx; u += dudx; v += dvdx; z += dzdx; rec_z = 1.0 / z; } } else { // Transparency_on if (x >= 8) { for ( ; (size_t) dest & 7; --x) { c = (uint) texmap[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]; if (c != TRANSPARENCY_COLOR) *dest = fadetable[(l & 0x7f00) + c]; dest++; l += dldx; u += dudx; v += dvdx; z += dzdx; rec_z = 1.0 / z; } ubyz0 = u * rec_z; vbyz0 = v * rec_z; u += dudx8; v += dvdx8; z += dzdx8; rec_z = 1.0 / z; ubyz8 = u * rec_z; vbyz8 = v * rec_z; du1 = (ubyz8 - ubyz0) / 8.0; dv1 = (vbyz8 - vbyz0) / 8.0; ubyz = ubyz0; vbyz = vbyz0; for ( ; x >= 8; x -= 8) { destlong = *((u_int64_t *) dest); c = texmap[(((int) vbyz) & (64 * 63)) + (((int) ubyz) & 63)]; if (c != TRANSPARENCY_COLOR) { destlong &= ~((u_int64_t) 0xFF); destlong |= (u_int64_t) fadetable[(l & 0x7f00) + c]; } l += dldx; ubyz += du1; vbyz += dv1; c = texmap[(((int) vbyz) & (64 * 63)) + (((int) ubyz) & 63)]; if (c != TRANSPARENCY_COLOR) { destlong &= ~((u_int64_t) 0xFF << 8); destlong |= (u_int64_t) fadetable[(l & 0x7f00) + c] << 8; } l += dldx; ubyz += du1; vbyz += dv1; c = texmap[(((int) vbyz) & (64 * 63)) + (((int) ubyz) & 63)]; if (c != TRANSPARENCY_COLOR) { destlong &= ~((u_int64_t) 0xFF << 16); destlong |= (u_int64_t) fadetable[(l & 0x7f00) + c] << 16; } l += dldx; ubyz += du1; vbyz += dv1; c = texmap[(((int) vbyz) & (64 * 63)) + (((int) ubyz) & 63)]; if (c != TRANSPARENCY_COLOR) { destlong &= ~((u_int64_t) 0xFF << 24); destlong |= (u_int64_t) fadetable[(l & 0x7f00) + c] << 24; } l += dldx; ubyz += du1; vbyz += dv1; c = texmap[(((int) vbyz) & (64 * 63)) + (((int) ubyz) & 63)]; if (c != TRANSPARENCY_COLOR) { destlong &= ~((u_int64_t) 0xFF << 32); destlong |= (u_int64_t) fadetable[(l & 0x7f00) + c] << 32; } l += dldx; ubyz += du1; vbyz += dv1; c = texmap[(((int) vbyz) & (64 * 63)) + (((int) ubyz) & 63)]; if (c != TRANSPARENCY_COLOR) { destlong &= ~((u_int64_t) 0xFF << 40); destlong |= (u_int64_t) fadetable[(l & 0x7f00) + c] << 40; } l += dldx; ubyz += du1; vbyz += dv1; c = texmap[(((int) vbyz) & (64 * 63)) + (((int) ubyz) & 63)]; if (c != TRANSPARENCY_COLOR) { destlong &= ~((u_int64_t) 0xFF << 48); destlong |= (u_int64_t) fadetable[(l & 0x7f00) + c] << 48; } l += dldx; ubyz += du1; vbyz += dv1; c = texmap[(((int) vbyz) & (64 * 63)) + (((int) ubyz) & 63)]; if (c != TRANSPARENCY_COLOR) { destlong &= ~((u_int64_t) 0xFF << 56); destlong |= (u_int64_t) fadetable[(l & 0x7f00) + c] << 56; } l += dldx; *((u_int64_t *) dest) = destlong; dest += 8; ubyz0 = ubyz8; vbyz0 = vbyz8; u += dudx8; v += dvdx8; z += dzdx8; rec_z = 1.0 / z; ubyz8 = u * rec_z; vbyz8 = v * rec_z; du1 = (ubyz8 - ubyz0) / 8.0; dv1 = (vbyz8 - vbyz0) / 8.0; ubyz = ubyz0; vbyz = vbyz0; } u -= dudx8; v -= dvdx8; z -= dzdx8; } rec_z = 1.0 / z; for ( ; x > 0; x--) { c = (uint) texmap[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]; if (c != TRANSPARENCY_COLOR) *dest = fadetable[(l & 0x7f00) + c]; dest++; l += dldx; u += dudx; v += dvdx; z += dzdx; rec_z = 1.0 / z; } } }
void ComputeLightMaps (int segNum) { tSegment *segP; tSide *sideP; tLightMap *lmapP; int sideNum, lastSeg, mapNum; short sideVerts [4]; #if 1 # define Xs 8 # define Ys 8 #else int Xs = 8, Ys = 8; #endif int x, y, xy; int v0, v1, v2, v3; GLfloat *pTexColor;// = {0.0, 0.0, 0.0, 1.0}; GLfloat texColor [Xs][Ys][4]; #if 1 # define pixelOffset 0.0 #else double pixelOffset = 0; //0.5 #endif int l, s, nMethod, sideRad; GLfloat tempBright = 0; vmsVector OffsetU, OffsetV, pixelPos [Xs][Ys], *pPixelPos, rayVec, faceNorm, sidePos; double brightPrct, pixelDist; double delta; double f_offset [8] = { 0.0 / (Xs - 1), 1.0 / (Xs - 1), 2.0 / (Xs - 1), 3.0 / (Xs - 1), 4.0 / (Xs - 1), 5.0 / (Xs - 1), 6.0 / (Xs - 1), 7.0 / (Xs - 1) }; #if LMAP_REND2TEX ubyte brightMap [512]; ubyte lightMap [512*3]; tUVL lMapUVL [4]; fix nDist, nMinDist; GLuint lightMapId; int bStart; #endif if (segNum <= 0) { DestroyLightMaps (); if (!InitLightData ()) return; #if LMAP_REND2TEX InitBrightMap (brightMap); memset (&lMapUVL, 0, sizeof (lMapUVL)); #endif } INIT_PROGRESS_LOOP (segNum, lastSeg, gameData.segs.nSegments); //Next Go through each surface and create a lightmap for it. for (mapNum = 6 * segNum, segP = gameData.segs.segments + segNum; segNum < lastSeg; segNum++, segP++) { for (sideNum = 0, sideP = segP->sides; sideNum < 6; sideNum++, mapNum++, sideP++) { #if TEXTURE_CHECK if ((segP->children [sideNum] >= 0) && !IS_WALL (WallNumS (sideP))) continue; //skip open sides #endif GetSideVerts (sideVerts, segNum, sideNum); #if LMAP_REND2TEX OglCreateFBuffer (&lightMaps [mapNum].fbuffer, 64, 64); OglEnableFBuffer (&lightMaps [mapNum].fbuffer); #else lightMaps [mapNum].handle = EmptyTexture (Xs, Ys); OGL_BINDTEX (lightMaps [mapNum].handle); glTexParameteri (GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP); glTexParameteri (GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP); nMethod = (sideP->nType == SIDE_IS_QUAD) || (sideP->nType == SIDE_IS_TRI_02); pPixelPos = &pixelPos [0][0]; for (x = 0; x < Xs; x++) { for (y = 0; y < Ys; y++, pPixelPos++) { if (nMethod) { v0 = sideVerts [0]; v2 = sideVerts [2]; if (x >= y) { v1 = sideVerts [1]; //Next calculate this pixel's place in the world (tricky stuff) FindOffset (&OffsetU, gameData.segs.vertices [v0], gameData.segs.vertices [v1], f_offset [x]); //(((double) x) + pixelOffset) / (Xs - 1)); //took me forever to figure out this should be an inverse thingy FindOffset (&OffsetV, gameData.segs.vertices [v1], gameData.segs.vertices [v2], f_offset [y]); //(((double) y) + pixelOffset) / (Ys - 1)); VmVecAdd (pPixelPos, &OffsetU, &OffsetV); VmVecInc (pPixelPos, gameData.segs.vertices + v0); //This should be the real world position of the pixel. //Find Normal VmVecNormal (&faceNorm, gameData.segs.vertices + v0, gameData.segs.vertices + v2, gameData.segs.vertices + v1); } else { //Next calculate this pixel's place in the world (tricky stuff) v3 = sideVerts [3]; FindOffset (&OffsetV, gameData.segs.vertices [v0], gameData.segs.vertices [v3], f_offset [y]); //(((double) y) + pixelOffset) / (Xs - 1)); //Notice y/x and OffsetU/OffsetV are swapped from above FindOffset (&OffsetU, gameData.segs.vertices [v3], gameData.segs.vertices [v2], f_offset [x]); //(((double) x) + pixelOffset) / (Ys - 1)); VmVecAdd (pPixelPos, &OffsetU, &OffsetV); VmVecInc (pPixelPos, gameData.segs.vertices + v0); //This should be the real world position of the pixel. VmVecNormal (&faceNorm, gameData.segs.vertices + v0, gameData.segs.vertices + v3, gameData.segs.vertices + v2); } } else {//SIDE_IS_TRI_02 v1 = sideVerts [1]; v3 = sideVerts [3]; if (Xs - x >= y) { v0 = sideVerts [0]; FindOffset (&OffsetU, gameData.segs.vertices [v0], gameData.segs.vertices [v1], f_offset [x]); //(((double) x) + pixelOffset) / (Xs - 1)); FindOffset (&OffsetV, gameData.segs.vertices [v0], gameData.segs.vertices [v3], f_offset [y]); //(((double) y) + pixelOffset) / (Xs - 1)); VmVecAdd (pPixelPos, &OffsetU, &OffsetV); VmVecInc (pPixelPos, gameData.segs.vertices + v0); //This should be the real world position of the pixel. } else { v2 = sideVerts [2]; //Not certain this is correct, may need to subtract something FindOffset (&OffsetV, gameData.segs.vertices [v2], gameData.segs.vertices [v1], f_offset [Xs - 1 - y]); //((double) ((Xs - 1) - y) + pixelOffset) / (Xs - 1)); FindOffset (&OffsetU, gameData.segs.vertices [v2], gameData.segs.vertices [v3], f_offset [Xs - 1 - x]); //((double) ((Xs - 1) - x) + pixelOffset) / (Xs - 1)); VmVecAdd (pPixelPos, &OffsetU, &OffsetV); VmVecInc (pPixelPos, gameData.segs.vertices + v2); //This should be the real world position of the pixel. } } } } #endif //Calculate LightVal //Next iterate through all the lights and add the light to the pixel every iteration. sideRad = (int) (SideRad (segNum, sideNum) + 0.5); VmVecAvg4 ( &sidePos, &pixelPos [0][0], &pixelPos [Xs-1][0], &pixelPos [Xs-1][Ys-1], &pixelPos [0][Ys-1]); #if 1 pTexColor = texColor [0][0] + 3; memset (texColor, 0, sizeof (texColor)); for (xy = Xs * Ys; xy; xy--, pTexColor += 4) *pTexColor = 1; #else pTexColor = texColor [0][0]; for (x = 0; x < Xs; x++) { for (y = 0; y < Ys; y++, pTexColor += 4) { pTexColor [0] = pTexColor [1] = pTexColor [2] = 0; pTexColor [3] = 1; } } #endif #if LMAP_REND2TEX bStart = 1; #endif for (l = 0, lmapP = lightData; l < numLightMaps; l++, lmapP++) { #if LMAP_REND2TEX nMinDist = 0x7FFFFFFF; // get the distances of all 4 tSide corners to the light source center // scaled by the light source range for (i = 0; i < 4; i++) { int svi = sideVerts [i]; sidePos.x = gameData.segs.vertices [svi].x; sidePos.y = gameData.segs.vertices [svi].y; sidePos.z = gameData.segs.vertices [svi].z; nDist = f2i (VmVecDist (&sidePos, &lmapP->pos)); // calc distance if (nMinDist > nDist) nMinDist = nDist; lMapUVL [i].u = F1_0 * (double) nDist / (double) lmapP->range; // scale distance } if ((lmapP->color [0] + lmapP->color [1] + lmapP->color [2] < 3) && (nMinDist < lmapP->range + sideRad)) { // create and initialize an OpenGL texture for the lightmap InitLightMap (lightMap, brightMap, lmapP->color); glGenTextures (1, &lightMapId); glTexImage1D (GL_TEXTURE_1D, 0, GL_RGB, 512, 1, GL_RGB, GL_UNSIGNED_BYTE, lightMap); OglActiveTexture (GL_TEXTURE0_ARB); glEnable (GL_TEXTURE_1D); glEnable (GL_BLEND); glBlendFunc (GL_ONE, bStart ? GL_ZERO : GL_ONE); // If processing first light, set the lightmap, else modify it glTexEnvi (GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, bStart ? GL_REPLACE : GL_ADD); glTexEnvi (GL_TEXTURE_ENV, GL_TEXTURE_ENV_COLOR, GL_RGBA); glBindTexture (GL_TEXTURE_1D, lightMapId); // extend the lightmap to the texture edges glTexParameteri (GL_TEXTURE_1D, GL_TEXTURE_WRAP_S, GL_CLAMP); glTexParameteri (GL_TEXTURE_1D, GL_TEXTURE_WRAP_T, GL_CLAMP); glBegin (GL_QUADS); glColor4f (1.0f, 1.0f, 1.0f, 1.0f); for (i = 0; i < 4; i++) { glMultiTexCoord2f (GL_TEXTURE0_ARB, f2fl (lMapUVL [i].u), f2fl (lMapUVL [i].v)); glVertex3f (f2fl (gameData.segs.vertices [sideVerts [i]].x), f2fl (gameData.segs.vertices [sideVerts [i]].y), f2fl (gameData.segs.vertices [sideVerts [i]].z)); } glEnd (); glDisable (GL_BLEND); glDisable (GL_TEXTURE_1D); glDeleteTextures (1, &lightMapId); bStart = 0; } #else if (f2i (VmVecDist (&sidePos, &lmapP->pos)) < lmapP->range + sideRad) { pPixelPos = &pixelPos [0][0]; pTexColor = texColor [0][0]; #if 1 for (xy = Xs * Ys; xy; xy--, pPixelPos++, pTexColor += 4) { #else for (x = 0; x < Xs; x++) for (y = 0; y < Ys; y++, pPixelPos++, pTexColor += 4) { #endif //Find angle to this light. pixelDist = f2i (VmVecDist (pPixelPos, &lmapP->pos)); if (pixelDist >= lmapP->range) continue; VmVecSub (&rayVec, &lmapP->pos, pPixelPos); delta = f2db (VmVecDeltaAng (&lmapP->dir, &rayVec, NULL)); if (delta < 0) delta = -delta; brightPrct = 1 - (pixelDist / lmapP->range); brightPrct *= brightPrct; //square result if (delta < 0.245) brightPrct /= 4; pTexColor [0] += (GLfloat) (brightPrct * lmapP->color [0]); pTexColor [1] += (GLfloat) (brightPrct * lmapP->color [1]); pTexColor [2] += (GLfloat) (brightPrct * lmapP->color [2]); } } #endif } #if LMAP_REND2TEX lightMaps [mapNum].handle = lightMaps [mapNum].fbuffer.texId; lightMaps [mapNum].fbuffer.texId = 0; OglDestroyFBuffer (&lightMaps [mapNum].fbuffer); #else pPixelPos = &pixelPos [0][0]; pTexColor = texColor [0][0]; for (x = 0; x < Xs; x++) for (y = 0; y < Ys; y++, pPixelPos++, pTexColor += 4) { tempBright = 0; for (s = 0; s < 3; s++) if (pTexColor [s] > tempBright) tempBright = pTexColor [s]; if (tempBright > 1.0) for (s = 0; s < 3; s++) pTexColor [s] /= tempBright; glTexSubImage2D (GL_TEXTURE_2D, 0, x, y, 1, 1, GL_RGBA, GL_FLOAT, pTexColor); } #endif } } } //------------------------------------------------------------------------------ int HaveLightMaps (void) { return (lightData != NULL); } //------------------------------------------------------------------------------ static int segNum = 0; static void CreateLightMapsPoll (int nItems, tMenuItem *m, int *key, int cItem) { GrPaletteStepLoad (NULL); if (segNum < gameData.segs.nSegments) { ComputeLightMaps (segNum); segNum += PROGRESS_INCR; } else { *key = -2; GrPaletteStepLoad (NULL); return; } m [0].value++; m [0].rebuild = 1; *key = 0; GrPaletteStepLoad (NULL); return; }
// Used for energy centers. See comments for c_tmap_scanline_per(). void c_fp_tmap_scanline_per_nolight() { ubyte *dest; ubyte c; int x; double u, v, z, dudx, dvdx, dzdx, rec_z; double ubyz, vbyz, ubyz0, vbyz0, ubyz8, vbyz8, du1, dv1; double dudx8, dvdx8, dzdx8; u_int64_t destlong;//, destmask; ubyte *texmap = pixptr;//, *fadetable = gr_fade_table; u = f2db(fx_u); v = f2db(fx_v) * 64.0; z = f2db(fx_z); dudx = f2db(fx_du_dx); dvdx = f2db(fx_dv_dx) * 64.0; dzdx = f2db(fx_dz_dx); dudx8 = dudx * 8.0; dvdx8 = dvdx * 8.0; dzdx8 = dzdx * 8.0; rec_z = 1.0 / z; dest = (ubyte *) (write_buffer + fx_xleft + (bytes_per_row * fx_y)); x = fx_xright - fx_xleft + 1; if (!Transparency_on) { // I'm not sure this is ever used (energy texture is transparent) if (x >= 8) { for ( ; (size_t) dest & 7; --x) { *dest++ = (uint) texmap[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]; u += dudx; v += dvdx; z += dzdx; rec_z = 1.0 / z; } ubyz0 = u * rec_z; vbyz0 = v * rec_z; u += dudx8; v += dvdx8; z += dzdx8; rec_z = 1.0 / z; ubyz8 = u * rec_z; vbyz8 = v * rec_z; du1 = (ubyz8 - ubyz0) / 8.0; dv1 = (vbyz8 - vbyz0) / 8.0; ubyz = ubyz0; vbyz = vbyz0; for ( ; x >= 8; x -= 8) { destlong = (u_int64_t) texmap[(((int) vbyz) & (64 * 63)) + (((int) ubyz) & 63)]; ubyz += du1; vbyz += dv1; destlong |= (u_int64_t) texmap[(((int) vbyz) & (64 * 63)) + (((int) ubyz) & 63)] << 8; ubyz += du1; vbyz += dv1; destlong |= (u_int64_t) texmap[(((int) vbyz) & (64 * 63)) + (((int) ubyz) & 63)] << 16; ubyz += du1; vbyz += dv1; destlong |= (u_int64_t) texmap[(((int) vbyz) & (64 * 63)) + (((int) ubyz) & 63)] << 24; ubyz += du1; vbyz += dv1; destlong |= (u_int64_t) texmap[(((int) vbyz) & (64 * 63)) + (((int) ubyz) & 63)] << 32; ubyz += du1; vbyz += dv1; destlong |= (u_int64_t) texmap[(((int) vbyz) & (64 * 63)) + (((int) ubyz) & 63)] << 40; ubyz += du1; vbyz += dv1; destlong |= (u_int64_t) texmap[(((int) vbyz) & (64 * 63)) + (((int) ubyz) & 63)] << 48; ubyz += du1; vbyz += dv1; destlong |= (u_int64_t) texmap[(((int) vbyz) & (64 * 63)) + (((int) ubyz) & 63)] << 56; ubyz0 = ubyz8; vbyz0 = vbyz8; u += dudx8; v += dvdx8; z += dzdx8; rec_z = 1.0 / z; ubyz8 = u * rec_z; vbyz8 = v * rec_z; du1 = (ubyz8 - ubyz0) / 8.0; dv1 = (vbyz8 - vbyz0) / 8.0; ubyz = ubyz0; vbyz = vbyz0; *((u_int64_t *) dest) = destlong; dest += 8; } u -= dudx8; v -= dvdx8; z -= dzdx8; } rec_z = 1.0 / z; for ( ; x > 0; x--) { *dest++ = (uint) texmap[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]; u += dudx; v += dvdx; z += dzdx; rec_z = 1.0 / z; } } else { // Transparency_on if (x >= 8) { for ( ; (size_t) dest & 7; --x) { c = (uint) texmap[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]; if (c != TRANSPARENCY_COLOR) *dest = c; dest++; u += dudx; v += dvdx; z += dzdx; rec_z = 1.0 / z; } ubyz0 = u * rec_z; vbyz0 = v * rec_z; u += dudx8; v += dvdx8; z += dzdx8; rec_z = 1.0 / z; ubyz8 = u * rec_z; vbyz8 = v * rec_z; du1 = (ubyz8 - ubyz0) / 8.0; dv1 = (vbyz8 - vbyz0) / 8.0; ubyz = ubyz0; vbyz = vbyz0; for ( ; x >= 8; x -= 8) { destlong = *((u_int64_t *) dest); c = texmap[(((int) vbyz) & (64 * 63)) + (((int) ubyz) & 63)]; if (c != TRANSPARENCY_COLOR) { destlong &= ~((u_int64_t) 0xFF); destlong |= (u_int64_t) c; } ubyz += du1; vbyz += dv1; c = texmap[(((int) vbyz) & (64 * 63)) + (((int) ubyz) & 63)]; if (c != TRANSPARENCY_COLOR) { destlong &= ~((u_int64_t) 0xFF << 8); destlong |= (u_int64_t) c << 8; } ubyz += du1; vbyz += dv1; c = texmap[(((int) vbyz) & (64 * 63)) + (((int) ubyz) & 63)]; if (c != TRANSPARENCY_COLOR) { destlong &= ~((u_int64_t) 0xFF << 16); destlong |= (u_int64_t) c << 16; } ubyz += du1; vbyz += dv1; c = texmap[(((int) vbyz) & (64 * 63)) + (((int) ubyz) & 63)]; if (c != TRANSPARENCY_COLOR) { destlong &= ~((u_int64_t) 0xFF << 24); destlong |= (u_int64_t) c << 24; } ubyz += du1; vbyz += dv1; c = texmap[(((int) vbyz) & (64 * 63)) + (((int) ubyz) & 63)]; if (c != TRANSPARENCY_COLOR) { destlong &= ~((u_int64_t) 0xFF << 32); destlong |= (u_int64_t) c << 32; } ubyz += du1; vbyz += dv1; c = texmap[(((int) vbyz) & (64 * 63)) + (((int) ubyz) & 63)]; if (c != TRANSPARENCY_COLOR) { destlong &= ~((u_int64_t) 0xFF << 40); destlong |= (u_int64_t) c << 40; } ubyz += du1; vbyz += dv1; c = texmap[(((int) vbyz) & (64 * 63)) + (((int) ubyz) & 63)]; if (c != TRANSPARENCY_COLOR) { destlong &= ~((u_int64_t) 0xFF << 48); destlong |= (u_int64_t) c << 48; } ubyz += du1; vbyz += dv1; c = texmap[(((int) vbyz) & (64 * 63)) + (((int) ubyz) & 63)]; if (c != TRANSPARENCY_COLOR) { destlong &= ~((u_int64_t) 0xFF << 56); destlong |= (u_int64_t) c << 56; } *((u_int64_t *) dest) = destlong; dest += 8; ubyz0 = ubyz8; vbyz0 = vbyz8; u += dudx8; v += dvdx8; z += dzdx8; rec_z = 1.0 / z; ubyz8 = u * rec_z; vbyz8 = v * rec_z; du1 = (ubyz8 - ubyz0) / 8.0; dv1 = (vbyz8 - vbyz0) / 8.0; ubyz = ubyz0; vbyz = vbyz0; } u -= dudx8; v -= dvdx8; z -= dzdx8; } rec_z = 1.0 / z; for ( ; x > 0; x--) { c = (uint) texmap[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]; if (c != TRANSPARENCY_COLOR) *dest = c; dest++; u += dudx; v += dvdx; z += dzdx; rec_z = 1.0 / z; } } }