int signal_energy_nodc(int *input,unsigned int length) { int i; int temp,temp2; register __m64 mm0,mm1,mm2,mm3; __m64 *in = (__m64 *)input; #ifdef MAIN short *printb; #endif mm0 = _m_pxor(mm0,mm0); mm3 = _m_pxor(mm3,mm3); for (i=0;i<length>>1;i++) { mm1 = in[i]; mm2 = mm1; mm1 = _m_pmaddwd(mm1,mm1);// SIMD complex multiplication mm1 = _m_psradi(mm1,shift); mm0 = _m_paddd(mm0,mm1); // temp2 = mm0; // printf("%d %d\n",((int *)&in[i])[0],((int *)&in[i])[1]); // printb = (short *)&mm2; // printf("mm2 %d : %d %d %d %d\n",i,printb[0],printb[1],printb[2],printb[3]); } /* #ifdef MAIN printb = (short *)&mm3; printf("%d %d %d %d\n",printb[0],printb[1],printb[2],printb[3]); #endif */ mm1 = mm0; mm0 = _m_psrlqi(mm0,32); mm0 = _m_paddd(mm0,mm1); temp = _m_to_int(mm0); temp/=length; temp<<=shift; // this is the average of x^2 #ifdef MAIN printf("E x^2 = %d\n",temp); #endif _mm_empty(); _m_empty(); return((temp>0)?temp:1); }
bool BTri::isAbove3DNow(v2sf v0XY, v2sf v0Z1) const { for (int i = 0; i < 3; i++){ v2sf planeXY = ((v2sf *) &edgePlanes[i])[0]; v2sf planeZD = ((v2sf *) &edgePlanes[i])[1]; v2sf dotXY = pfmul(planeXY, v0XY); v2sf dotZD = pfmul(planeZD, v0Z1); v2sf dot = pfacc(dotXY, dotZD); dot = pfacc(dot, dot); int d = _m_to_int(dot); if (d < 0) return false; } return true; }
bool BSP::isInOpenSpace3DNow(const vec3 &pos) const { if (top != NULL){ SSENode *node = sseTop; femms(); v2sf posXY, posZ1; posXY = *(v2sf *) &pos.x; posZ1.m64_f32[0] = pos.z; posZ1.m64_f32[1] = 1.0f; while (true){ v2sf planeXY = ((v2sf *) &node->tri.plane)[0]; v2sf planeZD = ((v2sf *) &node->tri.plane)[1]; v2sf dotXY = pfmul(planeXY, posXY); v2sf dotZD = pfmul(planeZD, posZ1); v2sf dot = pfacc(dotXY, dotZD); dot = pfacc(dot, dot); int d = _m_to_int(dot); if (d > 0){ if (node->front){ node = node->front; } else { femms(); return true; } } else { if (node->back){ node = node->back; } else { femms(); return false; } } } } return false; }
bool BNode::intersects3DNow(const vec4 &v0, const vec4 &v1, const vec4 &dir) const { v2sf planeXY = ((v2sf *) &tri.plane)[0]; v2sf planeZD = ((v2sf *) &tri.plane)[1]; v2sf v0XY = ((v2sf *) &v0)[0]; v2sf v0Z1 = ((v2sf *) &v0)[1]; v2sf dotXY = pfmul(planeXY, v0XY); v2sf dotZD = pfmul(planeZD, v0Z1); v2sf dotD = pfacc(dotXY, dotZD); dotD = pfacc(dotD, dotD); int d = _m_to_int(dotD); if (d > 0){ if (front != NULL && front->intersects3DNow(v0, v1, dir)) return true; v2sf dotXY = pfmul(planeXY, ((v2sf *) &v1)[0]); v2sf dotZD = pfmul(planeZD, ((v2sf *) &v1)[1]); v2sf dot = pfacc(dotXY, dotZD); dot = pfacc(dot, dot); int d = _m_to_int(dot); if (d < 0){ v2sf dirXY = ((v2sf *) &dir)[0]; v2sf dirZ0 = ((v2sf *) &dir)[1]; v2sf dotXY = pfmul(planeXY, dirXY); v2sf dotZ0 = pfmul(planeZD, dirZ0); v2sf dot = pfacc(dotXY, dotZ0); dot = pfacc(dot, dot); dot = pfrcp(dot); dot = pfmul(dot, dotD); dirXY = pfmul(dirXY, dot); dirZ0 = pfmul(dirZ0, dot); v0XY = pfsub(v0XY, dirXY); v0Z1 = pfsub(v0Z1, dirZ0); if (tri.isAbove3DNow(v0XY, v0Z1)){ return true; } if (back != NULL && back->intersects3DNow(v0, v1, dir)) return true; } } else { if (back != NULL && back->intersects3DNow(v0, v1, dir)) return true; v2sf dotXY = pfmul(planeXY, ((v2sf *) &v1)[0]); v2sf dotZD = pfmul(planeZD, ((v2sf *) &v1)[1]); v2sf dot = pfacc(dotXY, dotZD); dot = pfacc(dot, dot); int d = _m_to_int(dot); if (d > 0){ v2sf dirXY = ((v2sf *) &dir)[0]; v2sf dirZ0 = ((v2sf *) &dir)[1]; v2sf dotXY = pfmul(planeXY, dirXY); v2sf dotZ0 = pfmul(planeZD, dirZ0); v2sf dot = pfacc(dotXY, dotZ0); dot = pfacc(dot, dot); dot = pfrcp(dot); dot = pfmul(dot, dotD); dirXY = pfmul(dirXY, dot); dirZ0 = pfmul(dirZ0, dot); v0XY = pfsub(v0XY, dirXY); v0Z1 = pfsub(v0Z1, dirZ0); if (tri.isAbove3DNow(v0XY, v0Z1)){ return true; } if (front != NULL && front->intersects3DNow(v0, v1, dir)) return true; } } return false; }
int test91(__m64 a) { // CHECK: movd return _m_to_int(a); }
int main(int, char**) { __m64 a = _mm_setzero_si64(); a = _mm_shuffle_pi16(a, 0); return _m_to_int(a); }
int test_m_to_int(__m64 a) { // CHECK-LABEL: test_m_to_int // CHECK: extractelement <2 x i32> return _m_to_int(a); }
int signal_energy(int *input,unsigned int length) { int i; int temp,temp2; register __m64 mm0,mm1,mm2,mm3; __m64 *in = (__m64 *)input; #ifdef MAIN short *printb; #endif mm0 = _m_pxor(mm0,mm0); mm3 = _m_pxor(mm3,mm3); for (i=0;i<length>>1;i++) { mm1 = in[i]; mm2 = mm1; mm1 = _m_pmaddwd(mm1,mm1); mm1 = _m_psradi(mm1,shift);// shift any 32 bits blocs of the word by the value shift mm0 = _m_paddd(mm0,mm1);// add the two 64 bits words 4 bytes by 4 bytes // temp2 = mm0; // printf("%d %d\n",((int *)&temp2)[0],((int *)&temp2)[1]); // printb = (short *)&mm2; // printf("mm2 %d : %d %d %d %d\n",i,printb[0],printb[1],printb[2],printb[3]); mm2 = _m_psrawi(mm2,shift_DC); mm3 = _m_paddw(mm3,mm2);// add the two 64 bits words 2 bytes by 2 bytes // printb = (short *)&mm3; // printf("mm3 %d : %d %d %d %d\n",i,printb[0],printb[1],printb[2],printb[3]); } /* #ifdef MAIN printb = (short *)&mm3; printf("%d %d %d %d\n",printb[0],printb[1],printb[2],printb[3]); #endif */ mm1 = mm0; mm0 = _m_psrlqi(mm0,32); mm0 = _m_paddd(mm0,mm1); temp = _m_to_int(mm0); temp/=length; temp<<=shift; // this is the average of x^2 // now remove the DC component mm2 = _m_psrlqi(mm3,32); mm2 = _m_paddw(mm2,mm3); mm2 = _m_pmaddwd(mm2,mm2); temp2 = _m_to_int(mm2); temp2/=(length*length); temp2<<=(2*shift_DC); #ifdef MAIN printf("E x^2 = %d\n",temp); #endif temp -= temp2; #ifdef MAIN printf("(E x)^2=%d\n",temp2); #endif _mm_empty(); _m_empty(); return((temp>0)?temp:1); }