bool CDecoder_OMS_fixed_SSE::decode_8bits(char Intrinsic_fix[], char Rprime_fix[], int nombre_iterations) { //////////////////////////////////////////////////////////////////////////// // // Initilisation des espaces memoire // const TYPE zero = VECTOR_ZERO; for (int i=0; i<MESSAGE; i++){ var_mesgs[i] = zero; } // //////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////// // // ENTRELACEMENT DES DONNEES D'ENTREE POUR POUVOIR EXPLOITER LE MODE SIMD // if( NOEUD%16 == 0 ){ uchar_transpose_sse((TYPE*)Intrinsic_fix, (TYPE*)var_nodes, NOEUD); }else{ char *ptrVar = (char*) var_nodes; for (int i=0; i<NOEUD; i++){ for (int z=0; z<16; z++){ ptrVar[16 * i + z] = Intrinsic_fix[z * NOEUD + i]; } } } // //////////////////////////////////////////////////////////////////////////// // unsigned int arret = 0; while ( nombre_iterations-- ) { TYPE *p_msg1r = var_mesgs; TYPE *p_msg1w = var_mesgs; #if PETIT == 1 TYPE **p_indice_nod1 = p_vn_adr; TYPE **p_indice_nod2 = p_vn_adr; #else const unsigned short *p_indice_nod1 = PosNoeudsVariable; const unsigned short *p_indice_nod2 = PosNoeudsVariable; #endif // arret = 0; const TYPE min_var = VECTOR_SET1( vSAT_NEG_VAR ); const TYPE max_msg = VECTOR_SET1( vSAT_POS_MSG ); for (int i=0; i<DEG_1_COMPUTATIONS; i++){ //IACA_START TYPE tab_vContr[DEG_1]; TYPE sign = VECTOR_ZERO; TYPE min1 = VECTOR_SET1(vSAT_POS_VAR); TYPE min2 = min1; #if (DEG_1 & 0x01) == 1 const unsigned char sign8 = 0x80; const unsigned char isign8 = 0xC0; const TYPE msign8 = VECTOR_SET1( sign8 ); const TYPE misign8 = VECTOR_SET1( isign8 ); #else const unsigned char sign8 = 0x80; const unsigned char isign8b = 0x40; const TYPE msign8 = VECTOR_SET1( sign8 ); const TYPE misign8b = VECTOR_SET1( isign8b ); #endif #if PETIT == 1 #if MANUAL_PREFETCH == 1 _mm_prefetch((const char*)(p_indice_nod1[DEG_1]), _MM_HINT_T0); _mm_prefetch((const char*)(&p_msg1r[DEG_1]), _MM_HINT_T0); #endif #endif #pragma unroll(DEG_1) for(int j=0; j<DEG_1; j++){ #if PETIT == 1 TYPE vNoeud = VECTOR_LOAD( *p_indice_nod1 ); #else TYPE vNoeud = VECTOR_LOAD(&var_nodes[(*p_indice_nod1)]); #endif TYPE vMessg = VECTOR_LOAD(p_msg1r); TYPE vContr = VECTOR_SUB_AND_SATURATE_VAR_8bits(vNoeud, vMessg, min_var); TYPE cSign = VECTOR_GET_SIGN_BIT(vContr, msign8); sign = VECTOR_XOR(sign, cSign); TYPE vAbs = VECTOR_MIN( VECTOR_ABS( vContr), max_msg); tab_vContr[j] = vContr; TYPE vTemp = min1; min1 = VECTOR_MIN_1(vAbs, min1); min2 = VECTOR_MIN_2(vAbs, vTemp, min2); p_indice_nod1 += 1; p_msg1r += 1; } #if PETIT == 1 #if MANUAL_PREFETCH == 1 for(int j=0 ; j<DEG_1 ; j++){ _mm_prefetch((const char*)(p_indice_nod1[j]), _MM_HINT_T0); } _mm_prefetch((const char*)(p_indice_nod1[DEG_1]), _MM_HINT_T0); #endif #endif TYPE cste_1 = VECTOR_MIN(VECTOR_SBU(min2, VECTOR_SET1(offset)), max_msg); // ON SATURE DIREECTEMENT AU FORMAT MSG TYPE cste_2 = VECTOR_MIN(VECTOR_SBU(min1, VECTOR_SET1(offset)), max_msg); // ON SATURE DIREECTEMENT AU FORMAT MSG #if (DEG_1 & 0x01) == 1 sign = VECTOR_XOR(sign, misign8); #else sign = VECTOR_XOR(sign, misign8b); #endif #pragma unroll(DEG_1) for(int j=0 ; j<DEG_1 ; j++) { TYPE vContr = tab_vContr[j]; TYPE vAbs = VECTOR_MIN(VECTOR_ABS(vContr), max_msg ); TYPE vRes = VECTOR_CMOV (vAbs, min1, cste_1, cste_2); TYPE vSig = VECTOR_XOR (sign, VECTOR_GET_SIGN_BIT(vContr, msign8)); TYPE v2St = VECTOR_invSIGN2(vRes, vSig); TYPE v2Sr = VECTOR_ADD_AND_SATURATE_VAR_8bits(vContr, v2St, min_var); VECTOR_STORE( p_msg1w, v2St); #if PETIT == 1 VECTOR_STORE( *p_indice_nod2, v2Sr); #else VECTOR_STORE( &var_nodes[(*p_indice_nod2)], v2Sr); #endif p_msg1w += 1; p_indice_nod2 += 1; } // arret = arret || VECTOR_XOR_REDUCE( sign ); //IACA_END } ///////////////////////////////////////////////////////////////////////////////// #if NB_DEGRES >= 2 for (int i=0; i<DEG_2_COMPUTATIONS; i++){ #if (DEG_2 & 0x01) == 1 const unsigned char sign8 = 0x80; const unsigned char isign8 = 0xC0; const TYPE msign8 = VECTOR_SET1( sign8 ); const TYPE misign8 = VECTOR_SET1( isign8 ); #else const unsigned char sign8 = 0x80; const unsigned char isign8b = 0x40; const TYPE msign8 = VECTOR_SET1( sign8 ); const TYPE misign8b = VECTOR_SET1( isign8b ); #endif TYPE tab_vContr[DEG_2]; TYPE sign = zero; TYPE min1 = VECTOR_SET1(vSAT_POS_VAR); TYPE min2 = min1; #pragma unroll(DEG_2) for(int j=0 ; j<DEG_2 ; j++) { #if PETIT == 1 TYPE vNoeud = VECTOR_LOAD( *p_indice_nod1 ); #else TYPE vNoeud = VECTOR_LOAD(&var_nodes[(*p_indice_nod1)]); #endif TYPE vMessg = VECTOR_LOAD( p_msg1r ); TYPE vContr = VECTOR_SUB_AND_SATURATE_VAR_8bits(vNoeud, vMessg, min_var); TYPE cSign = VECTOR_GET_SIGN_BIT(vContr, msign8); sign = VECTOR_XOR (sign, cSign); TYPE vAbs = VECTOR_ABS ( VECTOR_MIN(vContr, max_msg) ); tab_vContr[j] = vContr; TYPE vTemp = min1; min1 = VECTOR_MIN_1(vAbs, min1 ); min2 = VECTOR_MIN_2(vAbs, vTemp, min2); p_indice_nod1 += 1; p_msg1r += 1; } TYPE cste_1 = VECTOR_MIN( VECTOR_SBU(min2, VECTOR_SET1(offset)), max_msg); // ON SATURE DIREECTEMENT AU FORMAT MSG TYPE cste_2 = VECTOR_MIN( VECTOR_SBU(min1, VECTOR_SET1(offset)), max_msg); // ON SATURE DIREECTEMENT AU FORMAT MSG #if (DEG_2 & 0x01) == 1 sign = VECTOR_XOR(sign, misign8); #else sign = VECTOR_XOR(sign, misign8b); #endif #pragma unroll(DEG_2) for(int j=0 ; j<DEG_2 ; j++) { TYPE vContr = tab_vContr[j]; TYPE vAbs = VECTOR_ABS ( VECTOR_MIN(vContr, max_msg) ); TYPE vRes = VECTOR_CMOV (vAbs, min1, cste_1, cste_2); TYPE vSig = VECTOR_XOR (sign, VECTOR_GET_SIGN_BIT(vContr, msign8)); TYPE v2St = VECTOR_invSIGN2(vRes, vSig); TYPE v2Sr = VECTOR_ADD_AND_SATURATE_VAR_8bits(vContr, v2St, min_var); VECTOR_STORE( p_msg1w, v2St); #if PETIT == 1 VECTOR_STORE( *p_indice_nod2, v2Sr); #else VECTOR_STORE( &var_nodes[(*p_indice_nod2)], v2Sr); #endif p_msg1w += 1; p_indice_nod2 += 1; } // arret = arret || VECTOR_XOR_REDUCE( sign ); } #endif ///////////////////////////////////////////////////////////////////////////////// #if NB_DEGRES >= 3 for (int i=0; i<DEG_3_COMPUTATIONS; i++){ #if (DEG_3 & 0x01) == 1 const unsigned char sign8 = 0x80; const unsigned char isign8 = 0xC0; const TYPE msign8 = VECTOR_SET1( sign8 ); const TYPE misign8 = VECTOR_SET1( isign8 ); #else const unsigned char sign8 = 0x80; const unsigned char isign8b = 0x40; const TYPE msign8 = VECTOR_SET1( sign8 ); const TYPE misign8b = VECTOR_SET1( isign8b ); #endif TYPE tab_vContr[DEG_3]; TYPE sign = zero; TYPE min1 = VECTOR_SET1(vSAT_POS_VAR); TYPE min2 = min1; for(int j=0 ; j<DEG_3 ; j++) { #if PETIT == 1 TYPE vNoeud = VECTOR_LOAD( *p_indice_nod1 ); #else TYPE vNoeud = VECTOR_LOAD(&var_nodes[(*p_indice_nod1)]); #endif TYPE vMessg = VECTOR_LOAD( p_msg1r ); TYPE vContr = VECTOR_SUB_AND_SATURATE_VAR_8bits(vNoeud, vMessg, min_var); TYPE cSign = VECTOR_GET_SIGN_BIT(vContr, msign8); sign = VECTOR_XOR (sign, cSign); TYPE vAbs = VECTOR_ABS ( VECTOR_MIN(vContr, max_msg) ); tab_vContr[j] = vContr; TYPE vTemp = min1; min1 = VECTOR_MIN_1(vAbs, min1 ); min2 = VECTOR_MIN_2(vAbs, vTemp, min2); p_indice_nod1 += 1; p_msg1r += 1; } TYPE cste_1 = VECTOR_MIN( VECTOR_SBU(min2, VECTOR_SET1(offset)), max_msg); // ON SATURE DIREECTEMENT AU FORMAT MSG TYPE cste_2 = VECTOR_MIN( VECTOR_SBU(min1, VECTOR_SET1(offset)), max_msg); // ON SATURE DIREECTEMENT AU FORMAT MSG #if (DEG_3 & 0x01) == 1 sign = VECTOR_XOR(sign, misign8); #else sign = VECTOR_XOR(sign, misign8b); #endif for(int j=0 ; j<DEG_3 ; j++) { TYPE vContr = tab_vContr[j]; TYPE vAbs = VECTOR_ABS ( VECTOR_MIN(vContr, max_msg) ); TYPE vRes = VECTOR_CMOV (vAbs, min1, cste_1, cste_2); TYPE vSig = VECTOR_XOR (sign, VECTOR_GET_SIGN_BIT(vContr, msign8)); TYPE v2St = VECTOR_invSIGN2(vRes, vSig); TYPE v2Sr = VECTOR_ADD_AND_SATURATE_VAR_8bits(vContr, v2St, min_var); VECTOR_STORE( p_msg1w, v2St); #if PETIT == 1 VECTOR_STORE( *p_indice_nod2, v2Sr); #else VECTOR_STORE( &var_nodes[(*p_indice_nod2)], v2Sr); #endif p_msg1w += 1; p_indice_nod2 += 1; } // arret = arret || VECTOR_XOR_REDUCE( sign ); } #endif ///////////////////////////////////////////////////////////////////////////////// #if NB_DEGRES >= 4 for (int i=0; i<DEG_4_COMPUTATIONS; i++){ #if (DEG_4 & 0x01) == 1 const unsigned char sign8 = 0x80; const unsigned char isign8 = 0xC0; const TYPE msign8 = VECTOR_SET1( sign8 ); const TYPE misign8 = VECTOR_SET1( isign8 ); #else const unsigned char sign8 = 0x80; const unsigned char isign8b = 0x40; const TYPE msign8 = VECTOR_SET1( sign8 ); const TYPE misign8b = VECTOR_SET1( isign8b ); #endif TYPE tab_vContr[DEG_4]; TYPE sign = zero; TYPE min1 = VECTOR_SET1(vSAT_POS_VAR); TYPE min2 = min1; for(int j=0 ; j<DEG_4 ; j++) { #if PETIT == 1 TYPE vNoeud = VECTOR_LOAD( *p_indice_nod1 ); #else TYPE vNoeud = VECTOR_LOAD(&var_nodes[(*p_indice_nod1)]); #endif TYPE vMessg = VECTOR_LOAD( p_msg1r ); TYPE vContr = VECTOR_SUB_AND_SATURATE_VAR_8bits(vNoeud, vMessg, min_var); TYPE cSign = VECTOR_GET_SIGN_BIT(vContr, msign8); sign = VECTOR_XOR (sign, cSign); TYPE vAbs = VECTOR_ABS ( VECTOR_MIN(vContr, max_msg) ); tab_vContr[j] = vContr; TYPE vTemp = min1; min1 = VECTOR_MIN_1(vAbs, min1 ); min2 = VECTOR_MIN_2(vAbs, vTemp, min2); p_indice_nod1 += 1; p_msg1r += 1; } TYPE cste_1 = VECTOR_MIN( VECTOR_SBU(min2, VECTOR_SET1(offset)), max_msg); // ON SATURE DIREECTEMENT AU FORMAT MSG TYPE cste_2 = VECTOR_MIN( VECTOR_SBU(min1, VECTOR_SET1(offset)), max_msg); // ON SATURE DIREECTEMENT AU FORMAT MSG #if (DEG_4 & 0x01) == 1 sign = VECTOR_XOR(sign, misign8); #else sign = VECTOR_XOR(sign, misign8b); #endif for(int j=0 ; j<DEG_4 ; j++) { TYPE vContr = tab_vContr[j]; TYPE vAbs = VECTOR_ABS ( VECTOR_MIN(vContr, max_msg) ); TYPE vRes = VECTOR_CMOV (vAbs, min1, cste_1, cste_2); TYPE vSig = VECTOR_XOR (sign, VECTOR_GET_SIGN_BIT(vContr, msign8)); TYPE v2St = VECTOR_invSIGN2(vRes, vSig); TYPE v2Sr = VECTOR_ADD_AND_SATURATE_VAR_8bits(vContr, v2St, min_var); VECTOR_STORE( p_msg1w, v2St); #if PETIT == 1 VECTOR_STORE( *p_indice_nod2, v2Sr); #else VECTOR_STORE( &var_nodes[(*p_indice_nod2)], v2Sr); #endif p_msg1w += 1; p_indice_nod2 += 1; } // arret = arret || VECTOR_XOR_REDUCE( sign ); } #endif ///////////////////////////////////////////////////////////////////////////////// #if NB_DEGRES >= 5 for (int i=0; i<DEG_5_COMPUTATIONS; i++){ #if (DEG_5 & 0x01) == 1 const unsigned char sign8 = 0x80; const unsigned char isign8 = 0xC0; const TYPE msign8 = VECTOR_SET1( sign8 ); const TYPE misign8 = VECTOR_SET1( isign8 ); #else const unsigned char sign8 = 0x80; const unsigned char isign8b = 0x40; const TYPE msign8 = VECTOR_SET1( sign8 ); const TYPE misign8b = VECTOR_SET1( isign8b ); #endif TYPE tab_vContr[DEG_5]; TYPE sign = zero; TYPE min1 = VECTOR_SET1(vSAT_POS_VAR); TYPE min2 = min1; for(int j=0 ; j<DEG_5 ; j++) { #if PETIT == 1 TYPE vNoeud = VECTOR_LOAD( *p_indice_nod1 ); #else TYPE vNoeud = VECTOR_LOAD(&var_nodes[(*p_indice_nod1)]); #endif TYPE vMessg = VECTOR_LOAD( p_msg1r ); TYPE vContr = VECTOR_SUB_AND_SATURATE_VAR_8bits(vNoeud, vMessg, min_var); TYPE cSign = VECTOR_GET_SIGN_BIT(vContr, msign8); sign = VECTOR_XOR (sign, cSign); TYPE vAbs = VECTOR_ABS ( VECTOR_MIN(vContr, max_msg) ); tab_vContr[j] = vContr; TYPE vTemp = min1; min1 = VECTOR_MIN_1(vAbs, min1 ); min2 = VECTOR_MIN_2(vAbs, vTemp, min2); p_indice_nod1 += 1; p_msg1r += 1; } TYPE cste_1 = VECTOR_MIN( VECTOR_SBU(min2, VECTOR_SET1(offset)), max_msg); // ON SATURE DIREECTEMENT AU FORMAT MSG TYPE cste_2 = VECTOR_MIN( VECTOR_SBU(min1, VECTOR_SET1(offset)), max_msg); // ON SATURE DIREECTEMENT AU FORMAT MSG #if (DEG_5 & 0x01) == 1 sign = VECTOR_XOR(sign, misign8); #else sign = VECTOR_XOR(sign, misign8b); #endif for(int j=0 ; j<DEG_5 ; j++) { TYPE vContr = tab_vContr[j]; TYPE vAbs = VECTOR_ABS ( VECTOR_MIN(vContr, max_msg) ); TYPE vRes = VECTOR_CMOV (vAbs, min1, cste_1, cste_2); TYPE vSig = VECTOR_XOR (sign, VECTOR_GET_SIGN_BIT(vContr, msign8)); TYPE v2St = VECTOR_invSIGN2(vRes, vSig); TYPE v2Sr = VECTOR_ADD_AND_SATURATE_VAR_8bits(vContr, v2St, min_var); VECTOR_STORE( p_msg1w, v2St); #if PETIT == 1 VECTOR_STORE( *p_indice_nod2, v2Sr); #else VECTOR_STORE( &var_nodes[(*p_indice_nod2)], v2Sr); #endif p_msg1w += 1; p_indice_nod2 += 1; } // arret = arret || VECTOR_XOR_REDUCE( sign ); } #endif ///////////////////////////////////////////////////////////////////////////////// #if NB_DEGRES > 5 printf("The number of DEGREE(Cn) IS HIGHER THAN 5. YOU NEED TO PERFORM A COPY PASTE IN SOURCE CODE...\n"); exit( 0 ); #endif ///////////////////////////////////////////////////////////////////////////////// // // GESTION DU CRITERE D'ARRET // // if( (arret == 0) && (fast_stop == 1) ){ // break; // } } //////////////////////////////////////////////////////////////////////////// // // ON REMET EN FORME LES DONNEES DE SORTIE POUR LA SUITE DU PROCESS // if( NOEUD%16 == 0 ){ uchar_itranspose_sse((TYPE*)var_nodes, (TYPE*)Rprime_fix, NOEUD); }else{ char* ptr = (char*) var_nodes; for (int i=0; i<NOEUD; i+=1){ for (int j=0; j<16; j+=1){ Rprime_fix[j*NOEUD +i] = (ptr[16*i+j] > 0); } } } // //////////////////////////////////////////////////////////////////////////// return 1; }
/** * Returns the intersections of two vectors of circles. * @ret: the number of intersection [0,1,2] */ static inline VECTOR __attribute__((__always_inline__,__gnu_inline__,__nonnull__,__artificial__)) circle_get_intersection_ps(const VECTOR p1x, const VECTOR p1y, const VECTOR p2x, const VECTOR p2y, const VECTOR r1, const VECTOR r2, VECTOR *restrict retx, VECTOR *restrict rety) { VECTOR d = distance(p1x, p1y, p2x, p2y); // no solutions, the circles are separate || the circles are coincident || no solutions because one circle is contained within the other // => infinite number of solutions possible VECTOR one_sol = VECTOR_GE(r1 + r2, d); one_sol = VECTOR_AND(one_sol, VECTOR_LE(VECTOR_ABS(r1 - r2), d)); one_sol = VECTOR_AND(one_sol, VECTOR_NE(VECTOR_ZERO(), d)); VECTOR a = (r1*r1 - r2*r2 + d*d) / (d + d); VECTOR v = r1*r1 - a*a; VECTOR h = VECTOR_SQRT(v); VECTOR dx = (p2x - p1x) / d; VECTOR dy = (p2y - p1y) / d; VECTOR p3x = p1x + a * dx; VECTOR p3y = p1y + a * dy; dx *= h; dy *= h; VECTOR p4x = p3x + dy; VECTOR p4y = p3y - dx;
bool CDecoder_OMS_fixed_NEON16_v3::decode_8bits(signed char Intrinsic_fix[], signed char Rprime_fix[], int nombre_iterations) { //////////////////////////////////////////////////////////////////////////// // // Initilisation des espaces memoire // // for (int i=0; i<MESSAGE; i++){ // var_mesgs[i] = VECTOR_ZERO; // } // //////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////// // // ENTRELACEMENT DES DONNEES D'ENTREE POUR POUVOIR EXPLOITER LE MODE SIMD // if( NOEUD%16 == 0 ){ uchar_transpose_neon((trans_TYPE*)Intrinsic_fix, (trans_TYPE*)var_nodes, NOEUD); }else{ signed char* ptrVar = (signed char*) var_nodes; for (int i=0; i<NOEUD; i++){ for (int z=0; z<16; z++){ ptrVar[16 * i + z] = Intrinsic_fix[z * NOEUD + i]; } } } // //////////////////////////////////////////////////////////////////////////// nombre_iterations--; if( 1 ) { TYPE *p_msg1w = var_mesgs; const unsigned short *p_indice_nod1 = PosNoeudsVariable; const unsigned short *p_indice_nod2 = PosNoeudsVariable; //const TYPE min_var = VECTOR_SET1( -127 ); const TYPE max_msg = VECTOR_SET1( 31 ); #if NB_DEGRES >= 1 for (int i=0; i<DEG_1_COMPUTATIONS; i++){ TYPE tab_vContr[DEG_1]; TYPE sign = VECTOR_ZERO; TYPE min1 = VECTOR_SET1(vSAT_POS_VAR); TYPE min2 = min1; #ifdef _PREFETCH_ __builtin_prefetch (p_indice_nod1 + DEG_1, 0, 3); #endif for(int j=0; j<DEG_1; j++){ TYPE vContr = VECTOR_LOAD(&var_nodes[(*p_indice_nod1)]); //#ifdef _PREFETCH_ // if( (j & 0x01) == 0 ) __builtin_prefetch (p_msg1r+DEG_1, 0, 0); //#endif TYPE cSign = VECTOR_GET_SIGN_BIT(vContr); sign = VECTOR_XOR(sign, cSign); TYPE vAbs = VECTOR_ABS( vContr ); tab_vContr[j] = vContr; TYPE vTemp = min1; min1 = VECTOR_MIN_1(vAbs, min1); min2 = VECTOR_MIN_2(vAbs, vTemp, min2); p_indice_nod1 += 1; } #ifdef _PREFETCH_ for(int j=0; j<DEG_1; j++){ __builtin_prefetch (&var_nodes[p_indice_nod1[j]], 0, 3); } #endif TYPE cste_1 = VECTOR_MIN( VECTOR_SBU(min2, VECTOR_SET1(offset)), max_msg); TYPE cste_2 = VECTOR_MIN( VECTOR_SBU(min1, VECTOR_SET1(offset)), max_msg); for(int j=0 ; j<DEG_1 ; j++) { TYPE vContr = tab_vContr[j]; TYPE vAbs = VECTOR_ABS (vContr); TYPE vRes = VECTOR_CMOV (vAbs, min1, cste_1, cste_2); vRes = VECTOR_MIN(vRes, max_msg); // BLG TYPE vSig = VECTOR_XOR (sign, VECTOR_GET_SIGN_BIT(vContr)); TYPE v2St = VECTOR_invSIGN2(vRes, vSig); TYPE v2Sr = VECTOR_ADD(vContr, v2St); VECTOR_STORE( p_msg1w, v2St); VECTOR_STORE( &var_nodes[(*p_indice_nod2)], v2Sr); p_msg1w += 1; p_indice_nod2 += 1; } } #endif ///////////////////////////////////////////////////////////////////////////////// #if NB_DEGRES >= 2 for (int i=0; i<DEG_2_COMPUTATIONS; i++){ TYPE tab_vContr[DEG_2]; TYPE sign = VECTOR_ZERO; TYPE min1 = VECTOR_SET1(vSAT_POS_VAR); TYPE min2 = min1; #ifdef _PREFETCH_ __builtin_prefetch (p_indice_nod1 + DEG_2, 0, 3); #endif for(int j=0; j<DEG_2; j++){ TYPE vContr = VECTOR_LOAD(&var_nodes[(*p_indice_nod1)]); //#ifdef _PREFETCH_ // if( (j & 0x01) == 0 ) __builtin_prefetch (p_msg1r+DEG_2, 0, 0); //#endif TYPE cSign = VECTOR_GET_SIGN_BIT(vContr); sign = VECTOR_XOR(sign, cSign); TYPE vAbs = VECTOR_ABS( vContr ); tab_vContr[j] = vContr; TYPE vTemp = min1; min1 = VECTOR_MIN_1(vAbs, min1); min2 = VECTOR_MIN_2(vAbs, vTemp, min2); p_indice_nod1 += 1; } #ifdef _PREFETCH_ for(int j=0; j<DEG_2; j++){ __builtin_prefetch (&var_nodes[p_indice_nod1[j]], 0, 3); } #endif TYPE cste_1 = VECTOR_MIN( VECTOR_SBU(min2, VECTOR_SET1(offset)), max_msg); TYPE cste_2 = VECTOR_MIN( VECTOR_SBU(min1, VECTOR_SET1(offset)), max_msg); for(int j=0 ; j<DEG_2 ; j++) { TYPE vContr = tab_vContr[j]; TYPE vAbs = VECTOR_ABS (vContr); TYPE vRes = VECTOR_CMOV (vAbs, min1, cste_1, cste_2); vRes = VECTOR_MIN(vRes, max_msg); // BLG TYPE vSig = VECTOR_XOR (sign, VECTOR_GET_SIGN_BIT(vContr)); TYPE v2St = VECTOR_invSIGN2(vRes, vSig); TYPE v2Sr = VECTOR_ADD(vContr, v2St); VECTOR_STORE( p_msg1w, v2St); VECTOR_STORE( &var_nodes[(*p_indice_nod2)], v2Sr); p_msg1w += 1; p_indice_nod2 += 1; } } #endif ///////////////////////////////////////////////////////////////////////////////// #if NB_DEGRES > 2 printf("The number of DEGREE(Cn) IS HIGHER THAN 5. YOU NEED TO PERFORM A COPY PASTE IN SOURCE CODE...\n"); exit( 0 ); #endif } // // // ON REPREND LE TRAITEMENT NORMAL DE L'INFORMATION // // while (nombre_iterations-- != 1) { TYPE *p_msg1r = var_mesgs; TYPE *p_msg1w = var_mesgs; const unsigned short *p_indice_nod1 = PosNoeudsVariable; const unsigned short *p_indice_nod2 = PosNoeudsVariable; // const TYPE min_var = VECTOR_SET1( -127 ); const TYPE max_msg = VECTOR_SET1( 31 ); #if NB_DEGRES >= 1 for (int i=0; i<DEG_1_COMPUTATIONS; i++){ TYPE tab_vContr[DEG_1]; TYPE sign = VECTOR_ZERO; TYPE min1 = VECTOR_SET1(vSAT_POS_VAR); TYPE min2 = min1; #ifdef _PREFETCH_ __builtin_prefetch (p_indice_nod1 + DEG_1, 0, 3); #endif for(int j=0; j<DEG_1; j++){ TYPE vNoeud = VECTOR_LOAD(&var_nodes[(*p_indice_nod1)]); TYPE vMessg = VECTOR_LOAD(p_msg1r); #ifdef _PREFETCH_ if( (j & 0x01) == 0 ) __builtin_prefetch (p_msg1r+DEG_1, 0, 0); #endif TYPE vContr = VECTOR_SUB(vNoeud, vMessg); TYPE cSign = VECTOR_GET_SIGN_BIT(vContr); sign = VECTOR_XOR(sign, cSign); TYPE vAbs = VECTOR_ABS( vContr ); tab_vContr[j] = vContr; TYPE vTemp = min1; min1 = VECTOR_MIN_1(vAbs, min1); min2 = VECTOR_MIN_2(vAbs, vTemp, min2); p_indice_nod1 += 1; p_msg1r += 1; } #ifdef _PREFETCH_ for(int j=0; j<DEG_1; j++){ __builtin_prefetch (&var_nodes[p_indice_nod1[j]], 0, 3); } #endif TYPE cste_1 = VECTOR_MIN( VECTOR_SBU(min2, VECTOR_SET1(offset)), max_msg); TYPE cste_2 = VECTOR_MIN( VECTOR_SBU(min1, VECTOR_SET1(offset)), max_msg); for(int j=0 ; j<DEG_1 ; j++) { TYPE vContr = tab_vContr[j]; TYPE vAbs = VECTOR_ABS (vContr); TYPE vRes = VECTOR_CMOV (vAbs, min1, cste_1, cste_2); vRes = VECTOR_MIN(vRes, max_msg); // BLG TYPE vSig = VECTOR_XOR (sign, VECTOR_GET_SIGN_BIT(vContr)); TYPE v2St = VECTOR_invSIGN2(vRes, vSig); TYPE v2Sr = VECTOR_ADD(vContr, v2St); VECTOR_STORE( p_msg1w, v2St); VECTOR_STORE( &var_nodes[(*p_indice_nod2)], v2Sr); p_msg1w += 1; p_indice_nod2 += 1; } } #endif ///////////////////////////////////////////////////////////////////////////////// #if NB_DEGRES >= 2 for (int i=0; i<DEG_2_COMPUTATIONS; i++){ TYPE tab_vContr[DEG_2]; TYPE sign = VECTOR_ZERO; TYPE min1 = VECTOR_SET1(vSAT_POS_VAR); TYPE min2 = min1; #ifdef _PREFETCH_ __builtin_prefetch (p_indice_nod1 + DEG_2, 0, 3); #endif for(int j=0; j<DEG_2; j++){ TYPE vNoeud = VECTOR_LOAD(&var_nodes[(*p_indice_nod1)]); TYPE vMessg = VECTOR_LOAD(p_msg1r); #ifdef _PREFETCH_ if( (j & 0x01) == 0 ) __builtin_prefetch (p_msg1r+DEG_2, 0, 0); #endif TYPE vContr = VECTOR_SUB(vNoeud, vMessg); TYPE cSign = VECTOR_GET_SIGN_BIT(vContr); sign = VECTOR_XOR(sign, cSign); TYPE vAbs = VECTOR_ABS( vContr ); tab_vContr[j] = vContr; TYPE vTemp = min1; min1 = VECTOR_MIN_1(vAbs, min1); min2 = VECTOR_MIN_2(vAbs, vTemp, min2); p_indice_nod1 += 1; p_msg1r += 1; } #ifdef _PREFETCH_ for(int j=0; j<DEG_2; j++){ __builtin_prefetch (&var_nodes[p_indice_nod1[j]], 0, 3); } #endif TYPE cste_1 = VECTOR_MIN( VECTOR_SBU(min2, VECTOR_SET1(offset)), max_msg); TYPE cste_2 = VECTOR_MIN( VECTOR_SBU(min1, VECTOR_SET1(offset)), max_msg); for(int j=0 ; j<DEG_2 ; j++) { TYPE vContr = tab_vContr[j]; TYPE vAbs = VECTOR_ABS (vContr); TYPE vRes = VECTOR_CMOV (vAbs, min1, cste_1, cste_2); vRes = VECTOR_MIN(vRes, max_msg); // BLG TYPE vSig = VECTOR_XOR (sign, VECTOR_GET_SIGN_BIT(vContr)); TYPE v2St = VECTOR_invSIGN2(vRes, vSig); TYPE v2Sr = VECTOR_ADD(vContr, v2St); VECTOR_STORE( p_msg1w, v2St); VECTOR_STORE( &var_nodes[(*p_indice_nod2)], v2Sr); p_msg1w += 1; p_indice_nod2 += 1; } } #endif ///////////////////////////////////////////////////////////////////////////////// #if NB_DEGRES > 2 printf("The number of DEGREE(Cn) IS HIGHER THAN 5. YOU NEED TO PERFORM A COPY PASTE IN SOURCE CODE...\n"); exit( 0 ); #endif } { TYPE *p_msg1r = var_mesgs; const unsigned short *p_indice_nod1 = PosNoeudsVariable; const unsigned short *p_indice_nod2 = PosNoeudsVariable; // const TYPE min_var = VECTOR_SET1( -127 ); const TYPE max_msg = VECTOR_SET1( 31 ); #if NB_DEGRES >= 1 for (int i=0; i<DEG_1_COMPUTATIONS; i++){ TYPE tab_vContr[DEG_1]; TYPE sign = VECTOR_ZERO; TYPE min1 = VECTOR_SET1(vSAT_POS_VAR); TYPE min2 = min1; #ifdef _PREFETCH_ __builtin_prefetch (p_indice_nod1 + DEG_1, 0, 3); #endif for(int j=0; j<DEG_1; j++){ TYPE vNoeud = VECTOR_LOAD(&var_nodes[(*p_indice_nod1)]); TYPE vMessg = VECTOR_LOAD(p_msg1r); #ifdef _PREFETCH_ if( (j & 0x01) == 0 ) __builtin_prefetch (p_msg1r+DEG_1, 0, 0); #endif TYPE vContr = VECTOR_SUB(vNoeud, vMessg); TYPE cSign = VECTOR_GET_SIGN_BIT(vContr); sign = VECTOR_XOR(sign, cSign); TYPE vAbs = VECTOR_ABS( vContr ); tab_vContr[j] = vContr; TYPE vTemp = min1; min1 = VECTOR_MIN_1(vAbs, min1); min2 = VECTOR_MIN_2(vAbs, vTemp, min2); p_indice_nod1 += 1; p_msg1r += 1; } #ifdef _PREFETCH_ for(int j=0; j<DEG_1; j++){ __builtin_prefetch (&var_nodes[p_indice_nod1[j]], 0, 3); } #endif TYPE cste_1 = VECTOR_MIN( VECTOR_SBU(min2, VECTOR_SET1(offset)), max_msg); TYPE cste_2 = VECTOR_MIN( VECTOR_SBU(min1, VECTOR_SET1(offset)), max_msg); for(int j=0 ; j<DEG_1 ; j++) { TYPE vContr = tab_vContr[j]; TYPE vAbs = VECTOR_ABS (vContr); TYPE vRes = VECTOR_CMOV (vAbs, min1, cste_1, cste_2); vRes = VECTOR_MIN(vRes, max_msg); // BLG TYPE vSig = VECTOR_XOR (sign, VECTOR_GET_SIGN_BIT(vContr)); TYPE v2St = VECTOR_invSIGN2(vRes, vSig); TYPE v2Sr = VECTOR_ADD(vContr, v2St); VECTOR_STORE( &var_nodes[(*p_indice_nod2)], v2Sr); p_indice_nod2 += 1; } } #endif ///////////////////////////////////////////////////////////////////////////////// #if NB_DEGRES >= 2 for (int i=0; i<DEG_2_COMPUTATIONS; i++){ TYPE tab_vContr[DEG_2]; TYPE sign = VECTOR_ZERO; TYPE min1 = VECTOR_SET1(vSAT_POS_VAR); TYPE min2 = min1; #ifdef _PREFETCH_ __builtin_prefetch (p_indice_nod1 + DEG_2, 0, 3); #endif for(int j=0; j<DEG_2; j++){ TYPE vNoeud = VECTOR_LOAD(&var_nodes[(*p_indice_nod1)]); TYPE vMessg = VECTOR_LOAD(p_msg1r); #ifdef _PREFETCH_ if( (j & 0x01) == 0 ) __builtin_prefetch (p_msg1r+DEG_2, 0, 0); #endif TYPE vContr = VECTOR_SUB(vNoeud, vMessg); TYPE cSign = VECTOR_GET_SIGN_BIT(vContr); sign = VECTOR_XOR(sign, cSign); TYPE vAbs = VECTOR_ABS( vContr ); tab_vContr[j] = vContr; TYPE vTemp = min1; min1 = VECTOR_MIN_1(vAbs, min1); min2 = VECTOR_MIN_2(vAbs, vTemp, min2); p_indice_nod1 += 1; p_msg1r += 1; } #ifdef _PREFETCH_ for(int j=0; j<DEG_1; j++){ __builtin_prefetch (&var_nodes[p_indice_nod1[j]], 0, 3); } #endif TYPE cste_1 = VECTOR_MIN( VECTOR_SBU(min2, VECTOR_SET1(offset)), max_msg); TYPE cste_2 = VECTOR_MIN( VECTOR_SBU(min1, VECTOR_SET1(offset)), max_msg); for(int j=0 ; j<DEG_2 ; j++) { TYPE vContr = tab_vContr[j]; TYPE vAbs = VECTOR_ABS (vContr); TYPE vRes = VECTOR_CMOV (vAbs, min1, cste_1, cste_2); vRes = VECTOR_MIN(vRes, max_msg); // BLG TYPE vSig = VECTOR_XOR (sign, VECTOR_GET_SIGN_BIT(vContr)); TYPE v2St = VECTOR_invSIGN2(vRes, vSig); TYPE v2Sr = VECTOR_ADD(vContr, v2St); VECTOR_STORE( &var_nodes[(*p_indice_nod2)], v2Sr); p_indice_nod2 += 1; } } #endif ///////////////////////////////////////////////////////////////////////////////// #if NB_DEGRES > 2 printf("The number of DEGREE(Cn) IS HIGHER THAN 5. YOU NEED TO PERFORM A COPY PASTE IN SOURCE CODE...\n"); exit( 0 ); #endif } //////////////////////////////////////////////////////////////////////////// // // ON REMET EN FORME LES DONNEES DE SORTIE POUR LA SUITE DU PROCESS // if( NOEUD%16 == 0 ){ uchar_itranspose_neon((trans_TYPE*)var_nodes, (trans_TYPE*)Rprime_fix, NOEUD); }else{ signed char* ptr = (signed char*) var_nodes; for (int i=0; i<NOEUD; i+=1){ for (int j=0; j<16; j+=1){ Rprime_fix[j*NOEUD +i] = (ptr[16*i+j] > 0); } } } // //////////////////////////////////////////////////////////////////////////// return 0; }