MRI * MRIScomputeDistanceMap(MRI_SURFACE *mris, MRI *mri_distance, int ref_vertex_no) { int vno ; VERTEX *v ; double circumference, angle, distance ; VECTOR *v1, *v2 ; if (mri_distance == NULL) mri_distance = MRIalloc(mris->nvertices, 1, 1, MRI_FLOAT) ; v1 = VectorAlloc(3, MATRIX_REAL) ; v2 = VectorAlloc(3, MATRIX_REAL) ; v = &mris->vertices[ref_vertex_no] ; VECTOR_LOAD(v1, v->x, v->y, v->z) ; /* radius vector */ circumference = M_PI * 2.0 * V3_LEN(v1) ; for (vno = 0 ; vno < mris->nvertices ; vno++) { v = &mris->vertices[vno] ; if (vno == Gdiag_no) DiagBreak() ; VECTOR_LOAD(v2, v->x, v->y, v->z) ; /* radius vector */ angle = fabs(Vector3Angle(v1, v2)) ; distance = circumference * angle / (2.0 * M_PI) ; MRIsetVoxVal(mri_distance, vno, 0, 0, 0, distance) ; } VectorFree(&v1) ; VectorFree(&v2) ; return(mri_distance) ; }
static void computeCurvature(VerTex *vertex,int nvt,FaCe *face, int nfc,int* ref_tab,int nb,float* curv) { int n,m,reference; VECTOR *v_n, *v_e1,*v_e2,*v; float nx,ny,nz,area,dx,dy,dz,y,r2,u1,u2,YR2,R4; v_n=VectorAlloc(3,MATRIX_REAL); v_e1=VectorAlloc(3,MATRIX_REAL); v_e2=VectorAlloc(3,MATRIX_REAL); v=VectorAlloc(3,MATRIX_REAL); for (n=0; n<nb; n++) { reference=ref_tab[n]; //first need to compute normal nx=ny=nz=area=0; for (m=0; m<vertex[reference].fnum; m++) { nx+=face[vertex[reference].f[m]].nx*face[vertex[reference].f[m]].area; ny+=face[vertex[reference].f[m]].ny*face[vertex[reference].f[m]].area; nz+=face[vertex[reference].f[m]].nz*face[vertex[reference].f[m]].area; area+=face[vertex[reference].f[m]].area; } nx/=area; ny/=area; nz/=area; VECTOR_LOAD(v_n,nx,ny,nz); //now need to compute the tangent plane! VECTOR_LOAD(v,ny,nz,nx); V3_CROSS_PRODUCT(v_n,v,v_e1); if ((V3_LEN_IS_ZERO(v_e1))) { if (nz!=0) VECTOR_LOAD(v,ny,-nz,nx) else if (ny!=0) VECTOR_LOAD(v,-ny,nz,nx) else VECTOR_LOAD(v,ny,nz,-nx); V3_CROSS_PRODUCT(v_n,v,v_e1); } V3_CROSS_PRODUCT(v_n,v_e1,v_e2); V3_NORMALIZE(v_e1,v_e1); V3_NORMALIZE(v_e2,v_e2); //finally compute curvature by fitting a 1-d quadratic r->a*r*r: curv=2*a for (YR2=0,R4=0,m=0; m<vertex[reference].vnum; m++) { dx=vertex[vertex[reference].v[m]].x-vertex[reference].x; dy=vertex[vertex[reference].v[m]].y-vertex[reference].y; dz=vertex[vertex[reference].v[m]].z-vertex[reference].z; VECTOR_LOAD(v,dx,dy,dz); y=V3_DOT(v,v_n); u1=V3_DOT(v_e1,v); u2=V3_DOT(v_e2,v); r2=u1*u1+u2*u2; YR2+=y*r2; R4+=r2*r2; } curv[n]=2*YR2/R4; } VectorFree(&v); VectorFree(&v_n); VectorFree(&v_e1); VectorFree(&v_e2); }
static unsigned int evaluateParsimonyIterativeFast(tree *tr) { INT_TYPE allOne = SET_ALL_BITS_ONE; size_t pNumber = (size_t)tr->ti[1], qNumber = (size_t)tr->ti[2]; int model; unsigned int bestScore = tr->bestParsimony, sum; if(tr->ti[0] > 4) newviewParsimonyIterativeFast(tr); sum = tr->parsimonyScore[pNumber] + tr->parsimonyScore[qNumber]; for(model = 0; model < tr->NumberOfModels; model++) { size_t k, states = tr->partitionData[model].states, width = tr->partitionData[model].parsimonyLength, i; switch(states) { case 2: { parsimonyNumber *left[2], *right[2]; for(k = 0; k < 2; k++) { left[k] = &(tr->partitionData[model].parsVect[(width * 2 * qNumber) + width * k]); right[k] = &(tr->partitionData[model].parsVect[(width * 2 * pNumber) + width * k]); } for(i = 0; i < width; i += INTS_PER_VECTOR) { INT_TYPE l_A = VECTOR_BIT_AND(VECTOR_LOAD((CAST)(&left[0][i])), VECTOR_LOAD((CAST)(&right[0][i]))), l_C = VECTOR_BIT_AND(VECTOR_LOAD((CAST)(&left[1][i])), VECTOR_LOAD((CAST)(&right[1][i]))), v_N = VECTOR_BIT_OR(l_A, l_C); v_N = VECTOR_AND_NOT(v_N, allOne); sum += evaluatePopcount(v_N, tr->bits_in_16bits); if(sum >= bestScore) return sum; } } break; case 4: { parsimonyNumber *left[4], *right[4]; for(k = 0; k < 4; k++) { left[k] = &(tr->partitionData[model].parsVect[(width * 4 * qNumber) + width * k]); right[k] = &(tr->partitionData[model].parsVect[(width * 4 * pNumber) + width * k]); } for(i = 0; i < width; i += INTS_PER_VECTOR) { INT_TYPE l_A = VECTOR_BIT_AND(VECTOR_LOAD((CAST)(&left[0][i])), VECTOR_LOAD((CAST)(&right[0][i]))), l_C = VECTOR_BIT_AND(VECTOR_LOAD((CAST)(&left[1][i])), VECTOR_LOAD((CAST)(&right[1][i]))), l_G = VECTOR_BIT_AND(VECTOR_LOAD((CAST)(&left[2][i])), VECTOR_LOAD((CAST)(&right[2][i]))), l_T = VECTOR_BIT_AND(VECTOR_LOAD((CAST)(&left[3][i])), VECTOR_LOAD((CAST)(&right[3][i]))), v_N = VECTOR_BIT_OR(VECTOR_BIT_OR(l_A, l_C), VECTOR_BIT_OR(l_G, l_T)); v_N = VECTOR_AND_NOT(v_N, allOne); sum += evaluatePopcount(v_N, tr->bits_in_16bits); if(sum >= bestScore) return sum; } } break; case 20: { parsimonyNumber *left[20], *right[20]; for(k = 0; k < 20; k++) { left[k] = &(tr->partitionData[model].parsVect[(width * 20 * qNumber) + width * k]); right[k] = &(tr->partitionData[model].parsVect[(width * 20 * pNumber) + width * k]); } for(i = 0; i < width; i += INTS_PER_VECTOR) { int j; INT_TYPE l_A, v_N = SET_ALL_BITS_ZERO; for(j = 0; j < 20; j++) { l_A = VECTOR_BIT_AND(VECTOR_LOAD((CAST)(&left[j][i])), VECTOR_LOAD((CAST)(&right[j][i]))); v_N = VECTOR_BIT_OR(l_A, v_N); } v_N = VECTOR_AND_NOT(v_N, allOne); sum += evaluatePopcount(v_N, tr->bits_in_16bits); if(sum >= bestScore) return sum; } } break; default: { parsimonyNumber *left[32], *right[32]; assert(states <= 32); for(k = 0; k < states; k++) { left[k] = &(tr->partitionData[model].parsVect[(width * states * qNumber) + width * k]); right[k] = &(tr->partitionData[model].parsVect[(width * states * pNumber) + width * k]); } for(i = 0; i < width; i += INTS_PER_VECTOR) { size_t j; INT_TYPE l_A, v_N = SET_ALL_BITS_ZERO; for(j = 0; j < states; j++) { l_A = VECTOR_BIT_AND(VECTOR_LOAD((CAST)(&left[j][i])), VECTOR_LOAD((CAST)(&right[j][i]))); v_N = VECTOR_BIT_OR(l_A, v_N); } v_N = VECTOR_AND_NOT(v_N, allOne); sum += evaluatePopcount(v_N, tr->bits_in_16bits); if(sum >= bestScore) return sum; } } } } return sum; }
static void newviewParsimonyIterativeFast(tree *tr) { INT_TYPE allOne = SET_ALL_BITS_ONE; int model, *ti = tr->ti, count = ti[0], index; for(index = 4; index < count; index += 4) { unsigned int totalScore = 0; size_t pNumber = (size_t)ti[index], qNumber = (size_t)ti[index + 1], rNumber = (size_t)ti[index + 2]; for(model = 0; model < tr->NumberOfModels; model++) { size_t k, states = tr->partitionData[model].states, width = tr->partitionData[model].parsimonyLength; unsigned int i; switch(states) { case 2: { parsimonyNumber *left[2], *right[2], *this[2]; for(k = 0; k < 2; k++) { left[k] = &(tr->partitionData[model].parsVect[(width * 2 * qNumber) + width * k]); right[k] = &(tr->partitionData[model].parsVect[(width * 2 * rNumber) + width * k]); this[k] = &(tr->partitionData[model].parsVect[(width * 2 * pNumber) + width * k]); } for(i = 0; i < width; i += INTS_PER_VECTOR) { INT_TYPE s_r, s_l, v_N, l_A, l_C, v_A, v_C; s_l = VECTOR_LOAD((CAST)(&left[0][i])); s_r = VECTOR_LOAD((CAST)(&right[0][i])); l_A = VECTOR_BIT_AND(s_l, s_r); v_A = VECTOR_BIT_OR(s_l, s_r); s_l = VECTOR_LOAD((CAST)(&left[1][i])); s_r = VECTOR_LOAD((CAST)(&right[1][i])); l_C = VECTOR_BIT_AND(s_l, s_r); v_C = VECTOR_BIT_OR(s_l, s_r); v_N = VECTOR_BIT_OR(l_A, l_C); VECTOR_STORE((CAST)(&this[0][i]), VECTOR_BIT_OR(l_A, VECTOR_AND_NOT(v_N, v_A))); VECTOR_STORE((CAST)(&this[1][i]), VECTOR_BIT_OR(l_C, VECTOR_AND_NOT(v_N, v_C))); v_N = VECTOR_AND_NOT(v_N, allOne); totalScore += populationCount(v_N); } } break; case 4: { parsimonyNumber *left[4], *right[4], *this[4]; for(k = 0; k < 4; k++) { left[k] = &(tr->partitionData[model].parsVect[(width * 4 * qNumber) + width * k]); right[k] = &(tr->partitionData[model].parsVect[(width * 4 * rNumber) + width * k]); this[k] = &(tr->partitionData[model].parsVect[(width * 4 * pNumber) + width * k]); } for(i = 0; i < width; i += INTS_PER_VECTOR) { INT_TYPE s_r, s_l, v_N, l_A, l_C, l_G, l_T, v_A, v_C, v_G, v_T; s_l = VECTOR_LOAD((CAST)(&left[0][i])); s_r = VECTOR_LOAD((CAST)(&right[0][i])); l_A = VECTOR_BIT_AND(s_l, s_r); v_A = VECTOR_BIT_OR(s_l, s_r); s_l = VECTOR_LOAD((CAST)(&left[1][i])); s_r = VECTOR_LOAD((CAST)(&right[1][i])); l_C = VECTOR_BIT_AND(s_l, s_r); v_C = VECTOR_BIT_OR(s_l, s_r); s_l = VECTOR_LOAD((CAST)(&left[2][i])); s_r = VECTOR_LOAD((CAST)(&right[2][i])); l_G = VECTOR_BIT_AND(s_l, s_r); v_G = VECTOR_BIT_OR(s_l, s_r); s_l = VECTOR_LOAD((CAST)(&left[3][i])); s_r = VECTOR_LOAD((CAST)(&right[3][i])); l_T = VECTOR_BIT_AND(s_l, s_r); v_T = VECTOR_BIT_OR(s_l, s_r); v_N = VECTOR_BIT_OR(VECTOR_BIT_OR(l_A, l_C), VECTOR_BIT_OR(l_G, l_T)); VECTOR_STORE((CAST)(&this[0][i]), VECTOR_BIT_OR(l_A, VECTOR_AND_NOT(v_N, v_A))); VECTOR_STORE((CAST)(&this[1][i]), VECTOR_BIT_OR(l_C, VECTOR_AND_NOT(v_N, v_C))); VECTOR_STORE((CAST)(&this[2][i]), VECTOR_BIT_OR(l_G, VECTOR_AND_NOT(v_N, v_G))); VECTOR_STORE((CAST)(&this[3][i]), VECTOR_BIT_OR(l_T, VECTOR_AND_NOT(v_N, v_T))); v_N = VECTOR_AND_NOT(v_N, allOne); totalScore += populationCount(v_N); } } break; case 20: { parsimonyNumber *left[20], *right[20], *this[20]; for(k = 0; k < 20; k++) { left[k] = &(tr->partitionData[model].parsVect[(width * 20 * qNumber) + width * k]); right[k] = &(tr->partitionData[model].parsVect[(width * 20 * rNumber) + width * k]); this[k] = &(tr->partitionData[model].parsVect[(width * 20 * pNumber) + width * k]); } for(i = 0; i < width; i += INTS_PER_VECTOR) { size_t j; INT_TYPE s_r, s_l, v_N = SET_ALL_BITS_ZERO, l_A[20], v_A[20]; for(j = 0; j < 20; j++) { s_l = VECTOR_LOAD((CAST)(&left[j][i])); s_r = VECTOR_LOAD((CAST)(&right[j][i])); l_A[j] = VECTOR_BIT_AND(s_l, s_r); v_A[j] = VECTOR_BIT_OR(s_l, s_r); v_N = VECTOR_BIT_OR(v_N, l_A[j]); } for(j = 0; j < 20; j++) VECTOR_STORE((CAST)(&this[j][i]), VECTOR_BIT_OR(l_A[j], VECTOR_AND_NOT(v_N, v_A[j]))); v_N = VECTOR_AND_NOT(v_N, allOne); totalScore += populationCount(v_N); } } break; default: { parsimonyNumber *left[32], *right[32], *this[32]; assert(states <= 32); for(k = 0; k < states; k++) { left[k] = &(tr->partitionData[model].parsVect[(width * states * qNumber) + width * k]); right[k] = &(tr->partitionData[model].parsVect[(width * states * rNumber) + width * k]); this[k] = &(tr->partitionData[model].parsVect[(width * states * pNumber) + width * k]); } for(i = 0; i < width; i += INTS_PER_VECTOR) { size_t j; INT_TYPE s_r, s_l, v_N = SET_ALL_BITS_ZERO, l_A[32], v_A[32]; for(j = 0; j < states; j++) { s_l = VECTOR_LOAD((CAST)(&left[j][i])); s_r = VECTOR_LOAD((CAST)(&right[j][i])); l_A[j] = VECTOR_BIT_AND(s_l, s_r); v_A[j] = VECTOR_BIT_OR(s_l, s_r); v_N = VECTOR_BIT_OR(v_N, l_A[j]); } for(j = 0; j < states; j++) VECTOR_STORE((CAST)(&this[j][i]), VECTOR_BIT_OR(l_A[j], VECTOR_AND_NOT(v_N, v_A[j]))); v_N = VECTOR_AND_NOT(v_N, allOne); totalScore += populationCount(v_N); } } } } tr->parsimonyScore[pNumber] = totalScore + tr->parsimonyScore[rNumber] + tr->parsimonyScore[qNumber]; } }
bool CDecoder_OMS_fixed_SSE::decode_8bits(char Intrinsic_fix[], char Rprime_fix[], int nombre_iterations) { //////////////////////////////////////////////////////////////////////////// // // Initilisation des espaces memoire // const TYPE zero = VECTOR_ZERO; for (int i=0; i<MESSAGE; i++){ var_mesgs[i] = zero; } // //////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////// // // ENTRELACEMENT DES DONNEES D'ENTREE POUR POUVOIR EXPLOITER LE MODE SIMD // if( NOEUD%16 == 0 ){ uchar_transpose_sse((TYPE*)Intrinsic_fix, (TYPE*)var_nodes, NOEUD); }else{ char *ptrVar = (char*) var_nodes; for (int i=0; i<NOEUD; i++){ for (int z=0; z<16; z++){ ptrVar[16 * i + z] = Intrinsic_fix[z * NOEUD + i]; } } } // //////////////////////////////////////////////////////////////////////////// // unsigned int arret = 0; while ( nombre_iterations-- ) { TYPE *p_msg1r = var_mesgs; TYPE *p_msg1w = var_mesgs; #if PETIT == 1 TYPE **p_indice_nod1 = p_vn_adr; TYPE **p_indice_nod2 = p_vn_adr; #else const unsigned short *p_indice_nod1 = PosNoeudsVariable; const unsigned short *p_indice_nod2 = PosNoeudsVariable; #endif // arret = 0; const TYPE min_var = VECTOR_SET1( vSAT_NEG_VAR ); const TYPE max_msg = VECTOR_SET1( vSAT_POS_MSG ); for (int i=0; i<DEG_1_COMPUTATIONS; i++){ //IACA_START TYPE tab_vContr[DEG_1]; TYPE sign = VECTOR_ZERO; TYPE min1 = VECTOR_SET1(vSAT_POS_VAR); TYPE min2 = min1; #if (DEG_1 & 0x01) == 1 const unsigned char sign8 = 0x80; const unsigned char isign8 = 0xC0; const TYPE msign8 = VECTOR_SET1( sign8 ); const TYPE misign8 = VECTOR_SET1( isign8 ); #else const unsigned char sign8 = 0x80; const unsigned char isign8b = 0x40; const TYPE msign8 = VECTOR_SET1( sign8 ); const TYPE misign8b = VECTOR_SET1( isign8b ); #endif #if PETIT == 1 #if MANUAL_PREFETCH == 1 _mm_prefetch((const char*)(p_indice_nod1[DEG_1]), _MM_HINT_T0); _mm_prefetch((const char*)(&p_msg1r[DEG_1]), _MM_HINT_T0); #endif #endif #pragma unroll(DEG_1) for(int j=0; j<DEG_1; j++){ #if PETIT == 1 TYPE vNoeud = VECTOR_LOAD( *p_indice_nod1 ); #else TYPE vNoeud = VECTOR_LOAD(&var_nodes[(*p_indice_nod1)]); #endif TYPE vMessg = VECTOR_LOAD(p_msg1r); TYPE vContr = VECTOR_SUB_AND_SATURATE_VAR_8bits(vNoeud, vMessg, min_var); TYPE cSign = VECTOR_GET_SIGN_BIT(vContr, msign8); sign = VECTOR_XOR(sign, cSign); TYPE vAbs = VECTOR_MIN( VECTOR_ABS( vContr), max_msg); tab_vContr[j] = vContr; TYPE vTemp = min1; min1 = VECTOR_MIN_1(vAbs, min1); min2 = VECTOR_MIN_2(vAbs, vTemp, min2); p_indice_nod1 += 1; p_msg1r += 1; } #if PETIT == 1 #if MANUAL_PREFETCH == 1 for(int j=0 ; j<DEG_1 ; j++){ _mm_prefetch((const char*)(p_indice_nod1[j]), _MM_HINT_T0); } _mm_prefetch((const char*)(p_indice_nod1[DEG_1]), _MM_HINT_T0); #endif #endif TYPE cste_1 = VECTOR_MIN(VECTOR_SBU(min2, VECTOR_SET1(offset)), max_msg); // ON SATURE DIREECTEMENT AU FORMAT MSG TYPE cste_2 = VECTOR_MIN(VECTOR_SBU(min1, VECTOR_SET1(offset)), max_msg); // ON SATURE DIREECTEMENT AU FORMAT MSG #if (DEG_1 & 0x01) == 1 sign = VECTOR_XOR(sign, misign8); #else sign = VECTOR_XOR(sign, misign8b); #endif #pragma unroll(DEG_1) for(int j=0 ; j<DEG_1 ; j++) { TYPE vContr = tab_vContr[j]; TYPE vAbs = VECTOR_MIN(VECTOR_ABS(vContr), max_msg ); TYPE vRes = VECTOR_CMOV (vAbs, min1, cste_1, cste_2); TYPE vSig = VECTOR_XOR (sign, VECTOR_GET_SIGN_BIT(vContr, msign8)); TYPE v2St = VECTOR_invSIGN2(vRes, vSig); TYPE v2Sr = VECTOR_ADD_AND_SATURATE_VAR_8bits(vContr, v2St, min_var); VECTOR_STORE( p_msg1w, v2St); #if PETIT == 1 VECTOR_STORE( *p_indice_nod2, v2Sr); #else VECTOR_STORE( &var_nodes[(*p_indice_nod2)], v2Sr); #endif p_msg1w += 1; p_indice_nod2 += 1; } // arret = arret || VECTOR_XOR_REDUCE( sign ); //IACA_END } ///////////////////////////////////////////////////////////////////////////////// #if NB_DEGRES >= 2 for (int i=0; i<DEG_2_COMPUTATIONS; i++){ #if (DEG_2 & 0x01) == 1 const unsigned char sign8 = 0x80; const unsigned char isign8 = 0xC0; const TYPE msign8 = VECTOR_SET1( sign8 ); const TYPE misign8 = VECTOR_SET1( isign8 ); #else const unsigned char sign8 = 0x80; const unsigned char isign8b = 0x40; const TYPE msign8 = VECTOR_SET1( sign8 ); const TYPE misign8b = VECTOR_SET1( isign8b ); #endif TYPE tab_vContr[DEG_2]; TYPE sign = zero; TYPE min1 = VECTOR_SET1(vSAT_POS_VAR); TYPE min2 = min1; #pragma unroll(DEG_2) for(int j=0 ; j<DEG_2 ; j++) { #if PETIT == 1 TYPE vNoeud = VECTOR_LOAD( *p_indice_nod1 ); #else TYPE vNoeud = VECTOR_LOAD(&var_nodes[(*p_indice_nod1)]); #endif TYPE vMessg = VECTOR_LOAD( p_msg1r ); TYPE vContr = VECTOR_SUB_AND_SATURATE_VAR_8bits(vNoeud, vMessg, min_var); TYPE cSign = VECTOR_GET_SIGN_BIT(vContr, msign8); sign = VECTOR_XOR (sign, cSign); TYPE vAbs = VECTOR_ABS ( VECTOR_MIN(vContr, max_msg) ); tab_vContr[j] = vContr; TYPE vTemp = min1; min1 = VECTOR_MIN_1(vAbs, min1 ); min2 = VECTOR_MIN_2(vAbs, vTemp, min2); p_indice_nod1 += 1; p_msg1r += 1; } TYPE cste_1 = VECTOR_MIN( VECTOR_SBU(min2, VECTOR_SET1(offset)), max_msg); // ON SATURE DIREECTEMENT AU FORMAT MSG TYPE cste_2 = VECTOR_MIN( VECTOR_SBU(min1, VECTOR_SET1(offset)), max_msg); // ON SATURE DIREECTEMENT AU FORMAT MSG #if (DEG_2 & 0x01) == 1 sign = VECTOR_XOR(sign, misign8); #else sign = VECTOR_XOR(sign, misign8b); #endif #pragma unroll(DEG_2) for(int j=0 ; j<DEG_2 ; j++) { TYPE vContr = tab_vContr[j]; TYPE vAbs = VECTOR_ABS ( VECTOR_MIN(vContr, max_msg) ); TYPE vRes = VECTOR_CMOV (vAbs, min1, cste_1, cste_2); TYPE vSig = VECTOR_XOR (sign, VECTOR_GET_SIGN_BIT(vContr, msign8)); TYPE v2St = VECTOR_invSIGN2(vRes, vSig); TYPE v2Sr = VECTOR_ADD_AND_SATURATE_VAR_8bits(vContr, v2St, min_var); VECTOR_STORE( p_msg1w, v2St); #if PETIT == 1 VECTOR_STORE( *p_indice_nod2, v2Sr); #else VECTOR_STORE( &var_nodes[(*p_indice_nod2)], v2Sr); #endif p_msg1w += 1; p_indice_nod2 += 1; } // arret = arret || VECTOR_XOR_REDUCE( sign ); } #endif ///////////////////////////////////////////////////////////////////////////////// #if NB_DEGRES >= 3 for (int i=0; i<DEG_3_COMPUTATIONS; i++){ #if (DEG_3 & 0x01) == 1 const unsigned char sign8 = 0x80; const unsigned char isign8 = 0xC0; const TYPE msign8 = VECTOR_SET1( sign8 ); const TYPE misign8 = VECTOR_SET1( isign8 ); #else const unsigned char sign8 = 0x80; const unsigned char isign8b = 0x40; const TYPE msign8 = VECTOR_SET1( sign8 ); const TYPE misign8b = VECTOR_SET1( isign8b ); #endif TYPE tab_vContr[DEG_3]; TYPE sign = zero; TYPE min1 = VECTOR_SET1(vSAT_POS_VAR); TYPE min2 = min1; for(int j=0 ; j<DEG_3 ; j++) { #if PETIT == 1 TYPE vNoeud = VECTOR_LOAD( *p_indice_nod1 ); #else TYPE vNoeud = VECTOR_LOAD(&var_nodes[(*p_indice_nod1)]); #endif TYPE vMessg = VECTOR_LOAD( p_msg1r ); TYPE vContr = VECTOR_SUB_AND_SATURATE_VAR_8bits(vNoeud, vMessg, min_var); TYPE cSign = VECTOR_GET_SIGN_BIT(vContr, msign8); sign = VECTOR_XOR (sign, cSign); TYPE vAbs = VECTOR_ABS ( VECTOR_MIN(vContr, max_msg) ); tab_vContr[j] = vContr; TYPE vTemp = min1; min1 = VECTOR_MIN_1(vAbs, min1 ); min2 = VECTOR_MIN_2(vAbs, vTemp, min2); p_indice_nod1 += 1; p_msg1r += 1; } TYPE cste_1 = VECTOR_MIN( VECTOR_SBU(min2, VECTOR_SET1(offset)), max_msg); // ON SATURE DIREECTEMENT AU FORMAT MSG TYPE cste_2 = VECTOR_MIN( VECTOR_SBU(min1, VECTOR_SET1(offset)), max_msg); // ON SATURE DIREECTEMENT AU FORMAT MSG #if (DEG_3 & 0x01) == 1 sign = VECTOR_XOR(sign, misign8); #else sign = VECTOR_XOR(sign, misign8b); #endif for(int j=0 ; j<DEG_3 ; j++) { TYPE vContr = tab_vContr[j]; TYPE vAbs = VECTOR_ABS ( VECTOR_MIN(vContr, max_msg) ); TYPE vRes = VECTOR_CMOV (vAbs, min1, cste_1, cste_2); TYPE vSig = VECTOR_XOR (sign, VECTOR_GET_SIGN_BIT(vContr, msign8)); TYPE v2St = VECTOR_invSIGN2(vRes, vSig); TYPE v2Sr = VECTOR_ADD_AND_SATURATE_VAR_8bits(vContr, v2St, min_var); VECTOR_STORE( p_msg1w, v2St); #if PETIT == 1 VECTOR_STORE( *p_indice_nod2, v2Sr); #else VECTOR_STORE( &var_nodes[(*p_indice_nod2)], v2Sr); #endif p_msg1w += 1; p_indice_nod2 += 1; } // arret = arret || VECTOR_XOR_REDUCE( sign ); } #endif ///////////////////////////////////////////////////////////////////////////////// #if NB_DEGRES >= 4 for (int i=0; i<DEG_4_COMPUTATIONS; i++){ #if (DEG_4 & 0x01) == 1 const unsigned char sign8 = 0x80; const unsigned char isign8 = 0xC0; const TYPE msign8 = VECTOR_SET1( sign8 ); const TYPE misign8 = VECTOR_SET1( isign8 ); #else const unsigned char sign8 = 0x80; const unsigned char isign8b = 0x40; const TYPE msign8 = VECTOR_SET1( sign8 ); const TYPE misign8b = VECTOR_SET1( isign8b ); #endif TYPE tab_vContr[DEG_4]; TYPE sign = zero; TYPE min1 = VECTOR_SET1(vSAT_POS_VAR); TYPE min2 = min1; for(int j=0 ; j<DEG_4 ; j++) { #if PETIT == 1 TYPE vNoeud = VECTOR_LOAD( *p_indice_nod1 ); #else TYPE vNoeud = VECTOR_LOAD(&var_nodes[(*p_indice_nod1)]); #endif TYPE vMessg = VECTOR_LOAD( p_msg1r ); TYPE vContr = VECTOR_SUB_AND_SATURATE_VAR_8bits(vNoeud, vMessg, min_var); TYPE cSign = VECTOR_GET_SIGN_BIT(vContr, msign8); sign = VECTOR_XOR (sign, cSign); TYPE vAbs = VECTOR_ABS ( VECTOR_MIN(vContr, max_msg) ); tab_vContr[j] = vContr; TYPE vTemp = min1; min1 = VECTOR_MIN_1(vAbs, min1 ); min2 = VECTOR_MIN_2(vAbs, vTemp, min2); p_indice_nod1 += 1; p_msg1r += 1; } TYPE cste_1 = VECTOR_MIN( VECTOR_SBU(min2, VECTOR_SET1(offset)), max_msg); // ON SATURE DIREECTEMENT AU FORMAT MSG TYPE cste_2 = VECTOR_MIN( VECTOR_SBU(min1, VECTOR_SET1(offset)), max_msg); // ON SATURE DIREECTEMENT AU FORMAT MSG #if (DEG_4 & 0x01) == 1 sign = VECTOR_XOR(sign, misign8); #else sign = VECTOR_XOR(sign, misign8b); #endif for(int j=0 ; j<DEG_4 ; j++) { TYPE vContr = tab_vContr[j]; TYPE vAbs = VECTOR_ABS ( VECTOR_MIN(vContr, max_msg) ); TYPE vRes = VECTOR_CMOV (vAbs, min1, cste_1, cste_2); TYPE vSig = VECTOR_XOR (sign, VECTOR_GET_SIGN_BIT(vContr, msign8)); TYPE v2St = VECTOR_invSIGN2(vRes, vSig); TYPE v2Sr = VECTOR_ADD_AND_SATURATE_VAR_8bits(vContr, v2St, min_var); VECTOR_STORE( p_msg1w, v2St); #if PETIT == 1 VECTOR_STORE( *p_indice_nod2, v2Sr); #else VECTOR_STORE( &var_nodes[(*p_indice_nod2)], v2Sr); #endif p_msg1w += 1; p_indice_nod2 += 1; } // arret = arret || VECTOR_XOR_REDUCE( sign ); } #endif ///////////////////////////////////////////////////////////////////////////////// #if NB_DEGRES >= 5 for (int i=0; i<DEG_5_COMPUTATIONS; i++){ #if (DEG_5 & 0x01) == 1 const unsigned char sign8 = 0x80; const unsigned char isign8 = 0xC0; const TYPE msign8 = VECTOR_SET1( sign8 ); const TYPE misign8 = VECTOR_SET1( isign8 ); #else const unsigned char sign8 = 0x80; const unsigned char isign8b = 0x40; const TYPE msign8 = VECTOR_SET1( sign8 ); const TYPE misign8b = VECTOR_SET1( isign8b ); #endif TYPE tab_vContr[DEG_5]; TYPE sign = zero; TYPE min1 = VECTOR_SET1(vSAT_POS_VAR); TYPE min2 = min1; for(int j=0 ; j<DEG_5 ; j++) { #if PETIT == 1 TYPE vNoeud = VECTOR_LOAD( *p_indice_nod1 ); #else TYPE vNoeud = VECTOR_LOAD(&var_nodes[(*p_indice_nod1)]); #endif TYPE vMessg = VECTOR_LOAD( p_msg1r ); TYPE vContr = VECTOR_SUB_AND_SATURATE_VAR_8bits(vNoeud, vMessg, min_var); TYPE cSign = VECTOR_GET_SIGN_BIT(vContr, msign8); sign = VECTOR_XOR (sign, cSign); TYPE vAbs = VECTOR_ABS ( VECTOR_MIN(vContr, max_msg) ); tab_vContr[j] = vContr; TYPE vTemp = min1; min1 = VECTOR_MIN_1(vAbs, min1 ); min2 = VECTOR_MIN_2(vAbs, vTemp, min2); p_indice_nod1 += 1; p_msg1r += 1; } TYPE cste_1 = VECTOR_MIN( VECTOR_SBU(min2, VECTOR_SET1(offset)), max_msg); // ON SATURE DIREECTEMENT AU FORMAT MSG TYPE cste_2 = VECTOR_MIN( VECTOR_SBU(min1, VECTOR_SET1(offset)), max_msg); // ON SATURE DIREECTEMENT AU FORMAT MSG #if (DEG_5 & 0x01) == 1 sign = VECTOR_XOR(sign, misign8); #else sign = VECTOR_XOR(sign, misign8b); #endif for(int j=0 ; j<DEG_5 ; j++) { TYPE vContr = tab_vContr[j]; TYPE vAbs = VECTOR_ABS ( VECTOR_MIN(vContr, max_msg) ); TYPE vRes = VECTOR_CMOV (vAbs, min1, cste_1, cste_2); TYPE vSig = VECTOR_XOR (sign, VECTOR_GET_SIGN_BIT(vContr, msign8)); TYPE v2St = VECTOR_invSIGN2(vRes, vSig); TYPE v2Sr = VECTOR_ADD_AND_SATURATE_VAR_8bits(vContr, v2St, min_var); VECTOR_STORE( p_msg1w, v2St); #if PETIT == 1 VECTOR_STORE( *p_indice_nod2, v2Sr); #else VECTOR_STORE( &var_nodes[(*p_indice_nod2)], v2Sr); #endif p_msg1w += 1; p_indice_nod2 += 1; } // arret = arret || VECTOR_XOR_REDUCE( sign ); } #endif ///////////////////////////////////////////////////////////////////////////////// #if NB_DEGRES > 5 printf("The number of DEGREE(Cn) IS HIGHER THAN 5. YOU NEED TO PERFORM A COPY PASTE IN SOURCE CODE...\n"); exit( 0 ); #endif ///////////////////////////////////////////////////////////////////////////////// // // GESTION DU CRITERE D'ARRET // // if( (arret == 0) && (fast_stop == 1) ){ // break; // } } //////////////////////////////////////////////////////////////////////////// // // ON REMET EN FORME LES DONNEES DE SORTIE POUR LA SUITE DU PROCESS // if( NOEUD%16 == 0 ){ uchar_itranspose_sse((TYPE*)var_nodes, (TYPE*)Rprime_fix, NOEUD); }else{ char* ptr = (char*) var_nodes; for (int i=0; i<NOEUD; i+=1){ for (int j=0; j<16; j+=1){ Rprime_fix[j*NOEUD +i] = (ptr[16*i+j] > 0); } } } // //////////////////////////////////////////////////////////////////////////// return 1; }
static int update_histograms(MRI_SURFACE *mris, MRI_SURFACE *mris_avg, float ***histograms, int nbins) { int vno, vno2, vno_avg ; double volume_dist, surface_dist, circumference, angle ; VERTEX *v1, *v2 ; VECTOR *vec1, *vec2 ; MHT *mht ; float **histogram, min_dist ; mht = MHTfillVertexTableRes(mris_avg, NULL, CURRENT_VERTICES, 2.0) ; vec1 = VectorAlloc(3, MATRIX_REAL) ; vec2 = VectorAlloc(3, MATRIX_REAL) ; v1 = &mris->vertices[0] ; VECTOR_LOAD(vec1, v1->cx, v1->cy, v1->cz) ; /* radius vector */ circumference = M_PI * 2.0 * V3_LEN(vec1) ; MRISclearMarks(mris_avg) ; #if 0 for (vno = 0 ; vno < mris->nvertices ; vno++) { if ((vno % 1000) == 0) { printf("\r%d of %d ", vno, mris->nvertices) ; fflush(stdout) ; } v1 = &mris->vertices[vno] ; VECTOR_LOAD(vec1, v1->cx, v1->cy, v1->cz) ; /* radius vector */ vno_avg = MHTfindClosestVertexNo(mht, mris_avg, v1, &min_dist) ; /* which histogram to increment */ if (vno_avg < 0) continue ; if (vno_avg == Gdiag_no) DiagBreak() ; histogram = histograms[vno_avg] ; mris_avg->vertices[vno_avg].marked = 1 ; for (vno2 = 0 ; vno2 < mris->nvertices ; vno2++) { if (vno2 == vno) continue ; v2 = &mris->vertices[vno2] ; VECTOR_LOAD(vec2, v2->cx, v2->cy, v2->cz) ; /* radius vector */ volume_dist = sqrt(SQR(v1->origx-v2->origx)+SQR(v1->origy-v2->origy)+SQR(v1->origz-v2->origz)) ; if (nint(volume_dist) >= nbins || nint(volume_dist) < 0) continue ; angle = fabs(Vector3Angle(vec1, vec2)) ; surface_dist = circumference * angle / (2.0 * M_PI) ; if (surface_dist > nbins*MAX_SURFACE_SCALE) surface_dist = nbins*MAX_SURFACE_SCALE ; if (surface_dist < 1) surface_dist = 1 ; histogram[nint(volume_dist)][nint(surface_dist)]++ ; if (mht->buckets[0][0] != NULL) DiagBreak() ; } } MHTfree(&mht) ; #endif /* map back ones that were missed */ /* printf("\nfilling holes in mapping\n") ;*/ mht = MHTfillVertexTableRes(mris, NULL, CURRENT_VERTICES, 2.0) ; for (vno_avg = 0 ; vno_avg < mris_avg->nvertices ; vno_avg++) { if (mris_avg->vertices[vno_avg].marked > 0) continue ; if ((vno_avg % 1000) == 0) { printf("\r%d of %d ", vno_avg, mris_avg->nvertices) ; fflush(stdout) ; } vno = MHTfindClosestVertexNo(mht, mris, &mris_avg->vertices[vno_avg], &min_dist) ; if (vno < 0) continue ; v1 = &mris->vertices[vno] ; VECTOR_LOAD(vec1, v1->cx, v1->cy, v1->cz) ; /* radius vector */ if (vno_avg < 0) continue ; if (vno_avg == Gdiag_no) DiagBreak() ; histogram = histograms[vno_avg] ; mris_avg->vertices[vno_avg].marked = 1 ; for (vno2 = 0 ; vno2 < mris->nvertices ; vno2++) { if (vno2 == vno) continue ; v2 = &mris->vertices[vno2] ; VECTOR_LOAD(vec2, v2->cx, v2->cy, v2->cz) ; /* radius vector */ volume_dist = sqrt(SQR(v1->origx-v2->origx)+SQR(v1->origy-v2->origy)+SQR(v1->origz-v2->origz)) ; if (nint(volume_dist) >= nbins || nint(volume_dist) < 0) continue ; angle = fabs(Vector3Angle(vec1, vec2)) ; surface_dist = circumference * angle / (2.0 * M_PI) ; if (surface_dist > nbins*MAX_SURFACE_SCALE) surface_dist = nbins*MAX_SURFACE_SCALE ; if (surface_dist < 1) surface_dist = 1 ; histogram[nint(volume_dist)][nint(surface_dist)]++ ; } } MHTfree(&mht) ; printf("\n") ; VectorFree(&vec1) ; VectorFree(&vec2) ; return(NO_ERROR) ; }
bool CDecoder_OMS_fixed_NEON16_v3::decode_8bits(signed char Intrinsic_fix[], signed char Rprime_fix[], int nombre_iterations) { //////////////////////////////////////////////////////////////////////////// // // Initilisation des espaces memoire // // for (int i=0; i<MESSAGE; i++){ // var_mesgs[i] = VECTOR_ZERO; // } // //////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////// // // ENTRELACEMENT DES DONNEES D'ENTREE POUR POUVOIR EXPLOITER LE MODE SIMD // if( NOEUD%16 == 0 ){ uchar_transpose_neon((trans_TYPE*)Intrinsic_fix, (trans_TYPE*)var_nodes, NOEUD); }else{ signed char* ptrVar = (signed char*) var_nodes; for (int i=0; i<NOEUD; i++){ for (int z=0; z<16; z++){ ptrVar[16 * i + z] = Intrinsic_fix[z * NOEUD + i]; } } } // //////////////////////////////////////////////////////////////////////////// nombre_iterations--; if( 1 ) { TYPE *p_msg1w = var_mesgs; const unsigned short *p_indice_nod1 = PosNoeudsVariable; const unsigned short *p_indice_nod2 = PosNoeudsVariable; //const TYPE min_var = VECTOR_SET1( -127 ); const TYPE max_msg = VECTOR_SET1( 31 ); #if NB_DEGRES >= 1 for (int i=0; i<DEG_1_COMPUTATIONS; i++){ TYPE tab_vContr[DEG_1]; TYPE sign = VECTOR_ZERO; TYPE min1 = VECTOR_SET1(vSAT_POS_VAR); TYPE min2 = min1; #ifdef _PREFETCH_ __builtin_prefetch (p_indice_nod1 + DEG_1, 0, 3); #endif for(int j=0; j<DEG_1; j++){ TYPE vContr = VECTOR_LOAD(&var_nodes[(*p_indice_nod1)]); //#ifdef _PREFETCH_ // if( (j & 0x01) == 0 ) __builtin_prefetch (p_msg1r+DEG_1, 0, 0); //#endif TYPE cSign = VECTOR_GET_SIGN_BIT(vContr); sign = VECTOR_XOR(sign, cSign); TYPE vAbs = VECTOR_ABS( vContr ); tab_vContr[j] = vContr; TYPE vTemp = min1; min1 = VECTOR_MIN_1(vAbs, min1); min2 = VECTOR_MIN_2(vAbs, vTemp, min2); p_indice_nod1 += 1; } #ifdef _PREFETCH_ for(int j=0; j<DEG_1; j++){ __builtin_prefetch (&var_nodes[p_indice_nod1[j]], 0, 3); } #endif TYPE cste_1 = VECTOR_MIN( VECTOR_SBU(min2, VECTOR_SET1(offset)), max_msg); TYPE cste_2 = VECTOR_MIN( VECTOR_SBU(min1, VECTOR_SET1(offset)), max_msg); for(int j=0 ; j<DEG_1 ; j++) { TYPE vContr = tab_vContr[j]; TYPE vAbs = VECTOR_ABS (vContr); TYPE vRes = VECTOR_CMOV (vAbs, min1, cste_1, cste_2); vRes = VECTOR_MIN(vRes, max_msg); // BLG TYPE vSig = VECTOR_XOR (sign, VECTOR_GET_SIGN_BIT(vContr)); TYPE v2St = VECTOR_invSIGN2(vRes, vSig); TYPE v2Sr = VECTOR_ADD(vContr, v2St); VECTOR_STORE( p_msg1w, v2St); VECTOR_STORE( &var_nodes[(*p_indice_nod2)], v2Sr); p_msg1w += 1; p_indice_nod2 += 1; } } #endif ///////////////////////////////////////////////////////////////////////////////// #if NB_DEGRES >= 2 for (int i=0; i<DEG_2_COMPUTATIONS; i++){ TYPE tab_vContr[DEG_2]; TYPE sign = VECTOR_ZERO; TYPE min1 = VECTOR_SET1(vSAT_POS_VAR); TYPE min2 = min1; #ifdef _PREFETCH_ __builtin_prefetch (p_indice_nod1 + DEG_2, 0, 3); #endif for(int j=0; j<DEG_2; j++){ TYPE vContr = VECTOR_LOAD(&var_nodes[(*p_indice_nod1)]); //#ifdef _PREFETCH_ // if( (j & 0x01) == 0 ) __builtin_prefetch (p_msg1r+DEG_2, 0, 0); //#endif TYPE cSign = VECTOR_GET_SIGN_BIT(vContr); sign = VECTOR_XOR(sign, cSign); TYPE vAbs = VECTOR_ABS( vContr ); tab_vContr[j] = vContr; TYPE vTemp = min1; min1 = VECTOR_MIN_1(vAbs, min1); min2 = VECTOR_MIN_2(vAbs, vTemp, min2); p_indice_nod1 += 1; } #ifdef _PREFETCH_ for(int j=0; j<DEG_2; j++){ __builtin_prefetch (&var_nodes[p_indice_nod1[j]], 0, 3); } #endif TYPE cste_1 = VECTOR_MIN( VECTOR_SBU(min2, VECTOR_SET1(offset)), max_msg); TYPE cste_2 = VECTOR_MIN( VECTOR_SBU(min1, VECTOR_SET1(offset)), max_msg); for(int j=0 ; j<DEG_2 ; j++) { TYPE vContr = tab_vContr[j]; TYPE vAbs = VECTOR_ABS (vContr); TYPE vRes = VECTOR_CMOV (vAbs, min1, cste_1, cste_2); vRes = VECTOR_MIN(vRes, max_msg); // BLG TYPE vSig = VECTOR_XOR (sign, VECTOR_GET_SIGN_BIT(vContr)); TYPE v2St = VECTOR_invSIGN2(vRes, vSig); TYPE v2Sr = VECTOR_ADD(vContr, v2St); VECTOR_STORE( p_msg1w, v2St); VECTOR_STORE( &var_nodes[(*p_indice_nod2)], v2Sr); p_msg1w += 1; p_indice_nod2 += 1; } } #endif ///////////////////////////////////////////////////////////////////////////////// #if NB_DEGRES > 2 printf("The number of DEGREE(Cn) IS HIGHER THAN 5. YOU NEED TO PERFORM A COPY PASTE IN SOURCE CODE...\n"); exit( 0 ); #endif } // // // ON REPREND LE TRAITEMENT NORMAL DE L'INFORMATION // // while (nombre_iterations-- != 1) { TYPE *p_msg1r = var_mesgs; TYPE *p_msg1w = var_mesgs; const unsigned short *p_indice_nod1 = PosNoeudsVariable; const unsigned short *p_indice_nod2 = PosNoeudsVariable; // const TYPE min_var = VECTOR_SET1( -127 ); const TYPE max_msg = VECTOR_SET1( 31 ); #if NB_DEGRES >= 1 for (int i=0; i<DEG_1_COMPUTATIONS; i++){ TYPE tab_vContr[DEG_1]; TYPE sign = VECTOR_ZERO; TYPE min1 = VECTOR_SET1(vSAT_POS_VAR); TYPE min2 = min1; #ifdef _PREFETCH_ __builtin_prefetch (p_indice_nod1 + DEG_1, 0, 3); #endif for(int j=0; j<DEG_1; j++){ TYPE vNoeud = VECTOR_LOAD(&var_nodes[(*p_indice_nod1)]); TYPE vMessg = VECTOR_LOAD(p_msg1r); #ifdef _PREFETCH_ if( (j & 0x01) == 0 ) __builtin_prefetch (p_msg1r+DEG_1, 0, 0); #endif TYPE vContr = VECTOR_SUB(vNoeud, vMessg); TYPE cSign = VECTOR_GET_SIGN_BIT(vContr); sign = VECTOR_XOR(sign, cSign); TYPE vAbs = VECTOR_ABS( vContr ); tab_vContr[j] = vContr; TYPE vTemp = min1; min1 = VECTOR_MIN_1(vAbs, min1); min2 = VECTOR_MIN_2(vAbs, vTemp, min2); p_indice_nod1 += 1; p_msg1r += 1; } #ifdef _PREFETCH_ for(int j=0; j<DEG_1; j++){ __builtin_prefetch (&var_nodes[p_indice_nod1[j]], 0, 3); } #endif TYPE cste_1 = VECTOR_MIN( VECTOR_SBU(min2, VECTOR_SET1(offset)), max_msg); TYPE cste_2 = VECTOR_MIN( VECTOR_SBU(min1, VECTOR_SET1(offset)), max_msg); for(int j=0 ; j<DEG_1 ; j++) { TYPE vContr = tab_vContr[j]; TYPE vAbs = VECTOR_ABS (vContr); TYPE vRes = VECTOR_CMOV (vAbs, min1, cste_1, cste_2); vRes = VECTOR_MIN(vRes, max_msg); // BLG TYPE vSig = VECTOR_XOR (sign, VECTOR_GET_SIGN_BIT(vContr)); TYPE v2St = VECTOR_invSIGN2(vRes, vSig); TYPE v2Sr = VECTOR_ADD(vContr, v2St); VECTOR_STORE( p_msg1w, v2St); VECTOR_STORE( &var_nodes[(*p_indice_nod2)], v2Sr); p_msg1w += 1; p_indice_nod2 += 1; } } #endif ///////////////////////////////////////////////////////////////////////////////// #if NB_DEGRES >= 2 for (int i=0; i<DEG_2_COMPUTATIONS; i++){ TYPE tab_vContr[DEG_2]; TYPE sign = VECTOR_ZERO; TYPE min1 = VECTOR_SET1(vSAT_POS_VAR); TYPE min2 = min1; #ifdef _PREFETCH_ __builtin_prefetch (p_indice_nod1 + DEG_2, 0, 3); #endif for(int j=0; j<DEG_2; j++){ TYPE vNoeud = VECTOR_LOAD(&var_nodes[(*p_indice_nod1)]); TYPE vMessg = VECTOR_LOAD(p_msg1r); #ifdef _PREFETCH_ if( (j & 0x01) == 0 ) __builtin_prefetch (p_msg1r+DEG_2, 0, 0); #endif TYPE vContr = VECTOR_SUB(vNoeud, vMessg); TYPE cSign = VECTOR_GET_SIGN_BIT(vContr); sign = VECTOR_XOR(sign, cSign); TYPE vAbs = VECTOR_ABS( vContr ); tab_vContr[j] = vContr; TYPE vTemp = min1; min1 = VECTOR_MIN_1(vAbs, min1); min2 = VECTOR_MIN_2(vAbs, vTemp, min2); p_indice_nod1 += 1; p_msg1r += 1; } #ifdef _PREFETCH_ for(int j=0; j<DEG_2; j++){ __builtin_prefetch (&var_nodes[p_indice_nod1[j]], 0, 3); } #endif TYPE cste_1 = VECTOR_MIN( VECTOR_SBU(min2, VECTOR_SET1(offset)), max_msg); TYPE cste_2 = VECTOR_MIN( VECTOR_SBU(min1, VECTOR_SET1(offset)), max_msg); for(int j=0 ; j<DEG_2 ; j++) { TYPE vContr = tab_vContr[j]; TYPE vAbs = VECTOR_ABS (vContr); TYPE vRes = VECTOR_CMOV (vAbs, min1, cste_1, cste_2); vRes = VECTOR_MIN(vRes, max_msg); // BLG TYPE vSig = VECTOR_XOR (sign, VECTOR_GET_SIGN_BIT(vContr)); TYPE v2St = VECTOR_invSIGN2(vRes, vSig); TYPE v2Sr = VECTOR_ADD(vContr, v2St); VECTOR_STORE( p_msg1w, v2St); VECTOR_STORE( &var_nodes[(*p_indice_nod2)], v2Sr); p_msg1w += 1; p_indice_nod2 += 1; } } #endif ///////////////////////////////////////////////////////////////////////////////// #if NB_DEGRES > 2 printf("The number of DEGREE(Cn) IS HIGHER THAN 5. YOU NEED TO PERFORM A COPY PASTE IN SOURCE CODE...\n"); exit( 0 ); #endif } { TYPE *p_msg1r = var_mesgs; const unsigned short *p_indice_nod1 = PosNoeudsVariable; const unsigned short *p_indice_nod2 = PosNoeudsVariable; // const TYPE min_var = VECTOR_SET1( -127 ); const TYPE max_msg = VECTOR_SET1( 31 ); #if NB_DEGRES >= 1 for (int i=0; i<DEG_1_COMPUTATIONS; i++){ TYPE tab_vContr[DEG_1]; TYPE sign = VECTOR_ZERO; TYPE min1 = VECTOR_SET1(vSAT_POS_VAR); TYPE min2 = min1; #ifdef _PREFETCH_ __builtin_prefetch (p_indice_nod1 + DEG_1, 0, 3); #endif for(int j=0; j<DEG_1; j++){ TYPE vNoeud = VECTOR_LOAD(&var_nodes[(*p_indice_nod1)]); TYPE vMessg = VECTOR_LOAD(p_msg1r); #ifdef _PREFETCH_ if( (j & 0x01) == 0 ) __builtin_prefetch (p_msg1r+DEG_1, 0, 0); #endif TYPE vContr = VECTOR_SUB(vNoeud, vMessg); TYPE cSign = VECTOR_GET_SIGN_BIT(vContr); sign = VECTOR_XOR(sign, cSign); TYPE vAbs = VECTOR_ABS( vContr ); tab_vContr[j] = vContr; TYPE vTemp = min1; min1 = VECTOR_MIN_1(vAbs, min1); min2 = VECTOR_MIN_2(vAbs, vTemp, min2); p_indice_nod1 += 1; p_msg1r += 1; } #ifdef _PREFETCH_ for(int j=0; j<DEG_1; j++){ __builtin_prefetch (&var_nodes[p_indice_nod1[j]], 0, 3); } #endif TYPE cste_1 = VECTOR_MIN( VECTOR_SBU(min2, VECTOR_SET1(offset)), max_msg); TYPE cste_2 = VECTOR_MIN( VECTOR_SBU(min1, VECTOR_SET1(offset)), max_msg); for(int j=0 ; j<DEG_1 ; j++) { TYPE vContr = tab_vContr[j]; TYPE vAbs = VECTOR_ABS (vContr); TYPE vRes = VECTOR_CMOV (vAbs, min1, cste_1, cste_2); vRes = VECTOR_MIN(vRes, max_msg); // BLG TYPE vSig = VECTOR_XOR (sign, VECTOR_GET_SIGN_BIT(vContr)); TYPE v2St = VECTOR_invSIGN2(vRes, vSig); TYPE v2Sr = VECTOR_ADD(vContr, v2St); VECTOR_STORE( &var_nodes[(*p_indice_nod2)], v2Sr); p_indice_nod2 += 1; } } #endif ///////////////////////////////////////////////////////////////////////////////// #if NB_DEGRES >= 2 for (int i=0; i<DEG_2_COMPUTATIONS; i++){ TYPE tab_vContr[DEG_2]; TYPE sign = VECTOR_ZERO; TYPE min1 = VECTOR_SET1(vSAT_POS_VAR); TYPE min2 = min1; #ifdef _PREFETCH_ __builtin_prefetch (p_indice_nod1 + DEG_2, 0, 3); #endif for(int j=0; j<DEG_2; j++){ TYPE vNoeud = VECTOR_LOAD(&var_nodes[(*p_indice_nod1)]); TYPE vMessg = VECTOR_LOAD(p_msg1r); #ifdef _PREFETCH_ if( (j & 0x01) == 0 ) __builtin_prefetch (p_msg1r+DEG_2, 0, 0); #endif TYPE vContr = VECTOR_SUB(vNoeud, vMessg); TYPE cSign = VECTOR_GET_SIGN_BIT(vContr); sign = VECTOR_XOR(sign, cSign); TYPE vAbs = VECTOR_ABS( vContr ); tab_vContr[j] = vContr; TYPE vTemp = min1; min1 = VECTOR_MIN_1(vAbs, min1); min2 = VECTOR_MIN_2(vAbs, vTemp, min2); p_indice_nod1 += 1; p_msg1r += 1; } #ifdef _PREFETCH_ for(int j=0; j<DEG_1; j++){ __builtin_prefetch (&var_nodes[p_indice_nod1[j]], 0, 3); } #endif TYPE cste_1 = VECTOR_MIN( VECTOR_SBU(min2, VECTOR_SET1(offset)), max_msg); TYPE cste_2 = VECTOR_MIN( VECTOR_SBU(min1, VECTOR_SET1(offset)), max_msg); for(int j=0 ; j<DEG_2 ; j++) { TYPE vContr = tab_vContr[j]; TYPE vAbs = VECTOR_ABS (vContr); TYPE vRes = VECTOR_CMOV (vAbs, min1, cste_1, cste_2); vRes = VECTOR_MIN(vRes, max_msg); // BLG TYPE vSig = VECTOR_XOR (sign, VECTOR_GET_SIGN_BIT(vContr)); TYPE v2St = VECTOR_invSIGN2(vRes, vSig); TYPE v2Sr = VECTOR_ADD(vContr, v2St); VECTOR_STORE( &var_nodes[(*p_indice_nod2)], v2Sr); p_indice_nod2 += 1; } } #endif ///////////////////////////////////////////////////////////////////////////////// #if NB_DEGRES > 2 printf("The number of DEGREE(Cn) IS HIGHER THAN 5. YOU NEED TO PERFORM A COPY PASTE IN SOURCE CODE...\n"); exit( 0 ); #endif } //////////////////////////////////////////////////////////////////////////// // // ON REMET EN FORME LES DONNEES DE SORTIE POUR LA SUITE DU PROCESS // if( NOEUD%16 == 0 ){ uchar_itranspose_neon((trans_TYPE*)var_nodes, (trans_TYPE*)Rprime_fix, NOEUD); }else{ signed char* ptr = (signed char*) var_nodes; for (int i=0; i<NOEUD; i+=1){ for (int j=0; j<16; j+=1){ Rprime_fix[j*NOEUD +i] = (ptr[16*i+j] > 0); } } } // //////////////////////////////////////////////////////////////////////////// return 0; }