void S_PaintChannelFrom16_altivec( portable_samplepair_t paintbuffer[PAINTBUFFER_SIZE], int snd_vol, channel_t *ch, const sfx_t *sc, int count, int sampleOffset, int bufferOffset ) { int data, aoff, boff; int leftvol, rightvol; int i, j; portable_samplepair_t *samp; sndBuffer *chunk; short *samples; float ooff, fdata[2], fdiv, fleftvol, frightvol; if (sc->soundChannels <= 0) { return; } samp = &paintbuffer[ bufferOffset ]; if (ch->doppler) { sampleOffset = sampleOffset*ch->oldDopplerScale; } if ( sc->soundChannels == 2 ) { sampleOffset *= sc->soundChannels; if ( sampleOffset & 1 ) { sampleOffset &= ~1; } } chunk = sc->soundData; while (sampleOffset>=SND_CHUNK_SIZE) { chunk = chunk->next; sampleOffset -= SND_CHUNK_SIZE; if (!chunk) { chunk = sc->soundData; } } if (!ch->doppler || ch->dopplerScale==1.0f) { vector signed short volume_vec; vector unsigned int volume_shift; int vectorCount, samplesLeft, chunkSamplesLeft; leftvol = ch->leftvol*snd_vol; rightvol = ch->rightvol*snd_vol; samples = chunk->sndChunk; ((short *)&volume_vec)[0] = leftvol; ((short *)&volume_vec)[1] = leftvol; ((short *)&volume_vec)[4] = leftvol; ((short *)&volume_vec)[5] = leftvol; ((short *)&volume_vec)[2] = rightvol; ((short *)&volume_vec)[3] = rightvol; ((short *)&volume_vec)[6] = rightvol; ((short *)&volume_vec)[7] = rightvol; volume_shift = vec_splat_u32(8); i = 0; while(i < count) { /* Try to align destination to 16-byte boundary */ while(i < count && (((unsigned long)&samp[i] & 0x1f) || ((count-i) < 8) || ((SND_CHUNK_SIZE - sampleOffset) < 8))) { data = samples[sampleOffset++]; samp[i].left += (data * leftvol)>>8; if ( sc->soundChannels == 2 ) { data = samples[sampleOffset++]; } samp[i].right += (data * rightvol)>>8; if (sampleOffset == SND_CHUNK_SIZE) { chunk = chunk->next; samples = chunk->sndChunk; sampleOffset = 0; } i++; } /* Destination is now aligned. Process as many 8-sample chunks as we can before we run out of room from the current sound chunk. We do 8 per loop to avoid extra source data reads. */ samplesLeft = count - i; chunkSamplesLeft = SND_CHUNK_SIZE - sampleOffset; if(samplesLeft > chunkSamplesLeft) samplesLeft = chunkSamplesLeft; vectorCount = samplesLeft / 8; if(vectorCount) { vector unsigned char tmp; vector short s0, s1, sampleData0, sampleData1; vector signed int merge0, merge1; vector signed int d0, d1, d2, d3; vector unsigned char samplePermute0 = VECCONST_UINT8(0, 1, 4, 5, 0, 1, 4, 5, 2, 3, 6, 7, 2, 3, 6, 7); vector unsigned char samplePermute1 = VECCONST_UINT8(8, 9, 12, 13, 8, 9, 12, 13, 10, 11, 14, 15, 10, 11, 14, 15); vector unsigned char loadPermute0, loadPermute1; // Rather than permute the vectors after we load them to do the sample // replication and rearrangement, we permute the alignment vector so // we do everything in one step below and avoid data shuffling. tmp = vec_lvsl(0,&samples[sampleOffset]); loadPermute0 = vec_perm(tmp,tmp,samplePermute0); loadPermute1 = vec_perm(tmp,tmp,samplePermute1); s0 = *(vector short *)&samples[sampleOffset]; while(vectorCount) { /* Load up source (16-bit) sample data */ s1 = *(vector short *)&samples[sampleOffset+7]; /* Load up destination sample data */ d0 = *(vector signed int *)&samp[i]; d1 = *(vector signed int *)&samp[i+2]; d2 = *(vector signed int *)&samp[i+4]; d3 = *(vector signed int *)&samp[i+6]; sampleData0 = vec_perm(s0,s1,loadPermute0); sampleData1 = vec_perm(s0,s1,loadPermute1); merge0 = vec_mule(sampleData0,volume_vec); merge0 = vec_sra(merge0,volume_shift); /* Shift down to proper range */ merge1 = vec_mulo(sampleData0,volume_vec); merge1 = vec_sra(merge1,volume_shift); d0 = vec_add(merge0,d0); d1 = vec_add(merge1,d1); merge0 = vec_mule(sampleData1,volume_vec); merge0 = vec_sra(merge0,volume_shift); /* Shift down to proper range */ merge1 = vec_mulo(sampleData1,volume_vec); merge1 = vec_sra(merge1,volume_shift); d2 = vec_add(merge0,d2); d3 = vec_add(merge1,d3); /* Store destination sample data */ *(vector signed int *)&samp[i] = d0; *(vector signed int *)&samp[i+2] = d1; *(vector signed int *)&samp[i+4] = d2; *(vector signed int *)&samp[i+6] = d3; i += 8; vectorCount--; s0 = s1; sampleOffset += 8; } if (sampleOffset == SND_CHUNK_SIZE) { chunk = chunk->next; samples = chunk->sndChunk; sampleOffset = 0; } } } } else {
static void ProjectDlightTexture_altivec( void ) { int i, l; vec_t origin0, origin1, origin2; float texCoords0, texCoords1; vector float floatColorVec0, floatColorVec1; vector float modulateVec, colorVec, zero; vector short colorShort; vector signed int colorInt; vector unsigned char floatColorVecPerm, modulatePerm, colorChar; vector unsigned char vSel = VECCONST_UINT8(0x00, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0xff); float *texCoords; byte *colors; byte clipBits[SHADER_MAX_VERTEXES]; float texCoordsArray[SHADER_MAX_VERTEXES][2]; byte colorArray[SHADER_MAX_VERTEXES][4]; unsigned hitIndexes[SHADER_MAX_INDEXES]; int numIndexes; float scale; float radius; vec3_t floatColor; float modulate = 0.0f; if ( !backEnd.refdef.num_dlights ) { return; } // There has to be a better way to do this so that floatColor // and/or modulate are already 16-byte aligned. floatColorVecPerm = vec_lvsl(0,(float *)floatColor); modulatePerm = vec_lvsl(0,(float *)&modulate); modulatePerm = (vector unsigned char)vec_splat((vector unsigned int)modulatePerm,0); zero = (vector float)vec_splat_s8(0); for ( l = 0 ; l < backEnd.refdef.num_dlights ; l++ ) { dlight_t *dl; if ( !( tess.dlightBits & ( 1 << l ) ) ) { continue; // this surface definately doesn't have any of this light } texCoords = texCoordsArray[0]; colors = colorArray[0]; dl = &backEnd.refdef.dlights[l]; origin0 = dl->transformed[0]; origin1 = dl->transformed[1]; origin2 = dl->transformed[2]; radius = dl->radius; scale = 1.0f / radius; if(r_greyscale->integer) { float luminance; luminance = LUMA(dl->color[0], dl->color[1], dl->color[2]) * 255.0f; floatColor[0] = floatColor[1] = floatColor[2] = luminance; } else if(r_greyscale->value) { float luminance; luminance = LUMA(dl->color[0], dl->color[1], dl->color[2]) * 255.0f; floatColor[0] = LERP(dl->color[0] * 255.0f, luminance, r_greyscale->value); floatColor[1] = LERP(dl->color[1] * 255.0f, luminance, r_greyscale->value); floatColor[2] = LERP(dl->color[2] * 255.0f, luminance, r_greyscale->value); } else { floatColor[0] = dl->color[0] * 255.0f; floatColor[1] = dl->color[1] * 255.0f; floatColor[2] = dl->color[2] * 255.0f; } floatColorVec0 = vec_ld(0, floatColor); floatColorVec1 = vec_ld(11, floatColor); floatColorVec0 = vec_perm(floatColorVec0,floatColorVec0,floatColorVecPerm); for ( i = 0 ; i < tess.numVertexes ; i++, texCoords += 2, colors += 4 ) { int clip = 0; vec_t dist0, dist1, dist2; dist0 = origin0 - tess.xyz[i][0]; dist1 = origin1 - tess.xyz[i][1]; dist2 = origin2 - tess.xyz[i][2]; backEnd.pc.c_dlightVertexes++; texCoords0 = 0.5f + dist0 * scale; texCoords1 = 0.5f + dist1 * scale; if( !r_dlightBacks->integer && // dist . tess.normal[i] ( dist0 * tess.normal[i][0] + dist1 * tess.normal[i][1] + dist2 * tess.normal[i][2] ) < 0.0f ) { clip = 63; } else { if ( texCoords0 < 0.0f ) { clip |= 1; } else if ( texCoords0 > 1.0f ) { clip |= 2; } if ( texCoords1 < 0.0f ) { clip |= 4; } else if ( texCoords1 > 1.0f ) { clip |= 8; } texCoords[0] = texCoords0; texCoords[1] = texCoords1; // modulate the strength based on the height and color if ( dist2 > radius ) { clip |= 16; modulate = 0.0f; } else if ( dist2 < -radius ) { clip |= 32; modulate = 0.0f; } else { dist2 = Q_fabs(dist2); if ( dist2 < radius * 0.5f ) { modulate = 1.0f; } else { modulate = 2.0f * (radius - dist2) * scale; } } } clipBits[i] = clip; modulateVec = vec_ld(0,(float *)&modulate); modulateVec = vec_perm(modulateVec,modulateVec,modulatePerm); colorVec = vec_madd(floatColorVec0,modulateVec,zero); colorInt = vec_cts(colorVec,0); // RGBx colorShort = vec_pack(colorInt,colorInt); // RGBxRGBx colorChar = vec_packsu(colorShort,colorShort); // RGBxRGBxRGBxRGBx colorChar = vec_sel(colorChar,vSel,vSel); // RGBARGBARGBARGBA replace alpha with 255 vec_ste((vector unsigned int)colorChar,0,(unsigned int *)colors); // store color } // build a list of triangles that need light numIndexes = 0; for ( i = 0 ; i < tess.numIndexes ; i += 3 ) { int a, b, c; a = tess.indexes[i]; b = tess.indexes[i+1]; c = tess.indexes[i+2]; if ( clipBits[a] & clipBits[b] & clipBits[c] ) { continue; // not lighted } hitIndexes[numIndexes] = a; hitIndexes[numIndexes+1] = b; hitIndexes[numIndexes+2] = c; numIndexes += 3; } if ( !numIndexes ) { continue; } qglEnableClientState( GL_TEXTURE_COORD_ARRAY ); qglTexCoordPointer( 2, GL_FLOAT, 0, texCoordsArray[0] ); qglEnableClientState( GL_COLOR_ARRAY ); qglColorPointer( 4, GL_UNSIGNED_BYTE, 0, colorArray ); GL_Bind( tr.dlightImage ); // include GLS_DEPTHFUNC_EQUAL so alpha tested surfaces don't add light // where they aren't rendered if ( dl->additive ) { GL_State( GLS_SRCBLEND_ONE | GLS_DSTBLEND_ONE | GLS_DEPTHFUNC_EQUAL ); } else { GL_State( GLS_SRCBLEND_DST_COLOR | GLS_DSTBLEND_ONE | GLS_DEPTHFUNC_EQUAL ); } R_DrawElements( numIndexes, hitIndexes ); backEnd.pc.c_totalIndexes += numIndexes; backEnd.pc.c_dlightIndexes += numIndexes; } }
static void ProjectDlightTexture_altivec( void ) { int i, l; vec_t origin0, origin1, origin2; float texCoords0, texCoords1; vector float floatColorVec0, floatColorVec1; vector float modulateVec, colorVec, zero; vector short colorShort; vector signed int colorInt; vector unsigned char floatColorVecPerm, modulatePerm, colorChar; vector unsigned char vSel = VECCONST_UINT8(0x00, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0xff); float *texCoords; byte *colors; int *intColors; byte clipBits[SHADER_MAX_VERTEXES]; float texCoordsArray[SHADER_MAX_VERTEXES][2]; byte colorArray[SHADER_MAX_VERTEXES][4]; glIndex_t hitIndexes[SHADER_MAX_INDEXES]; int numIndexes; float scale; float radius; float radiusInverseCubed; float intensity, remainder; vec3_t floatColor; float modulate = 0.0f; qboolean vertexLight; if ( !backEnd.refdef.num_dlights ) { return; } // There has to be a better way to do this so that floatColor // and/or modulate are already 16-byte aligned. floatColorVecPerm = vec_lvsl(0,(float *)floatColor); modulatePerm = vec_lvsl(0,(float *)&modulate); modulatePerm = (vector unsigned char)vec_splat((vector unsigned int)modulatePerm,0); zero = (vector float)vec_splat_s8(0); for ( l = 0 ; l < backEnd.refdef.num_dlights ; l++ ) { dlight_t *dl; if ( !( tess.dlightBits & ( 1 << l ) ) ) { continue; // this surface definately doesn't have any of this light } // clear colors Com_Memset( colorArray, 0, sizeof( colorArray ) ); texCoords = texCoordsArray[0]; colors = colorArray[0]; dl = &backEnd.refdef.dlights[l]; origin0 = dl->transformed[0]; origin1 = dl->transformed[1]; origin2 = dl->transformed[2]; radius = dl->radius; scale = 1.0f / radius; radiusInverseCubed = dl->radiusInverseCubed; intensity = dl->intensity; vertexLight = ( ( dl->flags & REF_DIRECTED_DLIGHT ) || ( dl->flags & REF_VERTEX_DLIGHT ) ); // directional lights have max intensity and washout remainder intensity if ( dl->flags & REF_DIRECTED_DLIGHT ) { remainder = intensity * 0.125; } else { remainder = 0.0f; } if(r_greyscale->integer) { float luminance; luminance = LUMA(dl->color[0], dl->color[1], dl->color[2]) * 255.0f; floatColor[0] = floatColor[1] = floatColor[2] = luminance; } else if(r_greyscale->value) { float luminance; luminance = LUMA(dl->color[0], dl->color[1], dl->color[2]) * 255.0f; floatColor[0] = LERP(dl->color[0] * 255.0f, luminance, r_greyscale->value); floatColor[1] = LERP(dl->color[1] * 255.0f, luminance, r_greyscale->value); floatColor[2] = LERP(dl->color[2] * 255.0f, luminance, r_greyscale->value); } else { floatColor[0] = dl->color[0] * 255.0f; floatColor[1] = dl->color[1] * 255.0f; floatColor[2] = dl->color[2] * 255.0f; } floatColorVec0 = vec_ld(0, floatColor); floatColorVec1 = vec_ld(11, floatColor); floatColorVec0 = vec_perm(floatColorVec0,floatColorVec0,floatColorVecPerm); for ( i = 0 ; i < tess.numVertexes ; i++, texCoords += 2, colors += 4 ) { int clip = 0; vec_t dist0, dist1, dist2; dist0 = origin0 - tess.xyz[i][0]; dist1 = origin1 - tess.xyz[i][1]; dist2 = origin2 - tess.xyz[i][2]; backEnd.pc.c_dlightVertexes++; // directional dlight, origin is a directional normal if ( dl->flags & REF_DIRECTED_DLIGHT ) { // twosided surfaces use absolute value of the calculated lighting modulate = intensity * DotProduct( dl->origin, tess.normal[ i ] ); if ( tess.shader->cullType == CT_TWO_SIDED ) { modulate = fabs( modulate ); } modulate += remainder; } // spherical vertex lit dlight else if ( dl->flags & REF_VERTEX_DLIGHT ) { vec3_t dir; dir[ 0 ] = radius - fabs( dist0 ); if ( dir[ 0 ] <= 0.0f ) { continue; } dir[ 1 ] = radius - fabs( dist1 ); if ( dir[ 1 ] <= 0.0f ) { continue; } dir[ 2 ] = radius - fabs( dist2 ); if ( dir[ 2 ] <= 0.0f ) { continue; } modulate = intensity * dir[ 0 ] * dir[ 1 ] * dir[ 2 ] * radiusInverseCubed; } // vertical cylinder dlight else { texCoords0 = 0.5f + dist0 * scale; texCoords1 = 0.5f + dist1 * scale; if( !r_dlightBacks->integer && // dist . tess.normal[i] ( dist0 * tess.normal[i][0] + dist1 * tess.normal[i][1] + dist2 * tess.normal[i][2] ) < 0.0f ) { clip = 63; } else { if ( texCoords0 < 0.0f ) { clip |= 1; } else if ( texCoords0 > 1.0f ) { clip |= 2; } if ( texCoords1 < 0.0f ) { clip |= 4; } else if ( texCoords1 > 1.0f ) { clip |= 8; } texCoords[0] = texCoords0; texCoords[1] = texCoords1; // modulate the strength based on the height and color if ( dist2 > radius ) { clip |= 16; modulate = 0.0f; } else if ( dist2 < -radius ) { clip |= 32; modulate = 0.0f; } else { dist2 = Q_fabs(dist2); if ( dist2 < radius * 0.5f ) { modulate = intensity; } else { modulate = intensity * 2.0f * (radius - dist2) * scale; } } } } clipBits[i] = clip; // optimizations if ( vertexLight && modulate < ( 1.0f / 128.0f ) ) { continue; } else if ( modulate > 1.0f ) { modulate = 1.0f; } // ZTM: FIXME: should probably clamp to 0-255 range before converting to char, // but I don't know how to do altvec stuff or if it's even used anymore modulateVec = vec_ld(0,(float *)&modulate); modulateVec = vec_perm(modulateVec,modulateVec,modulatePerm); colorVec = vec_madd(floatColorVec0,modulateVec,zero); colorInt = vec_cts(colorVec,0); // RGBx colorShort = vec_pack(colorInt,colorInt); // RGBxRGBx colorChar = vec_packsu(colorShort,colorShort); // RGBxRGBxRGBxRGBx colorChar = vec_sel(colorChar,vSel,vSel); // RGBARGBARGBARGBA replace alpha with 255 vec_ste((vector unsigned int)colorChar,0,(unsigned int *)colors); // store color } // build a list of triangles that need light intColors = (int*) colorArray; numIndexes = 0; for ( i = 0 ; i < tess.numIndexes ; i += 3 ) { int a, b, c; a = tess.indexes[i]; b = tess.indexes[i+1]; c = tess.indexes[i+2]; if ( vertexLight ) { if ( !( intColors[ a ] | intColors[ b ] | intColors[ c ] ) ) { continue; } } else { if ( clipBits[a] & clipBits[b] & clipBits[c] ) { continue; // not lighted } } hitIndexes[numIndexes] = a; hitIndexes[numIndexes+1] = b; hitIndexes[numIndexes+2] = c; numIndexes += 3; } if ( !numIndexes ) { continue; } if ( !vertexLight ) { qglEnableClientState( GL_TEXTURE_COORD_ARRAY ); qglTexCoordPointer( 2, GL_FLOAT, 0, texCoordsArray[0] ); } else { qglDisableClientState( GL_TEXTURE_COORD_ARRAY ); } qglEnableClientState( GL_COLOR_ARRAY ); qglColorPointer( 4, GL_UNSIGNED_BYTE, 0, colorArray ); if ( dl->dlshader ) { shader_t *dls = dl->dlshader; for ( i = 0; i < dls->numUnfoggedPasses; i++ ) { shaderStage_t *stage = dls->stages[i]; R_BindAnimatedImage( &dls->stages[i]->bundle[0] ); GL_State( stage->stateBits | GLS_DEPTHFUNC_EQUAL ); R_DrawElements( numIndexes, hitIndexes ); backEnd.pc.c_totalIndexes += numIndexes; backEnd.pc.c_dlightIndexes += numIndexes; } } else { R_FogOff(); if ( !vertexLight ) { GL_Bind( tr.dlightImage ); } else { GL_Bind( tr.whiteImage ); } // include GLS_DEPTHFUNC_EQUAL so alpha tested surfaces don't add light // where they aren't rendered if ( dl->flags & REF_ADDITIVE_DLIGHT ) { GL_State( GLS_SRCBLEND_ONE | GLS_DSTBLEND_ONE | GLS_DEPTHFUNC_EQUAL ); } else { GL_State( GLS_SRCBLEND_DST_COLOR | GLS_DSTBLEND_ONE | GLS_DEPTHFUNC_EQUAL ); } R_DrawElements( numIndexes, hitIndexes ); backEnd.pc.c_totalIndexes += numIndexes; backEnd.pc.c_dlightIndexes += numIndexes; RB_FogOn(); } } }