int main(int argc, char* argv[]) { int p; srand( 0 ); printf("(II) LDPC DECODER - Flooding scheduled decoder\n"); printf("(II) MANIPULATION DE DONNEES (IEEE-754 - %ld bits)\n", (long int)8*sizeof(int)); printf("(II) GENEREE : %s - %s\n", __DATE__, __TIME__); double Eb_N0; double MinSignalSurBruit = 0.50; double MaxSignalSurBruit = 3.01; double PasSignalSurBruit = 0.10; int NOMBRE_ITERATIONS = 20; int STOP_TIMER_SECOND = -1; bool QPSK_CHANNEL = false; bool Es_N0 = false; // FALSE => MODE Eb_N0 int NB_THREAD_ON_GPU = 1024;; int FRAME_ERROR_LIMIT = 200; char defDecoder[] = "fMS"; const char* type = defDecoder; cudaSetDevice(0); cudaDeviceSynchronize(); cudaThreadSynchronize(); // // ON VA PARSER LES ARGUMENTS DE LIGNE DE COMMANDE // for (p=1; p<argc; p++) { if( strcmp(argv[p], "-min") == 0 ){ MinSignalSurBruit = atof( argv[p+1] ); p += 1; }else if( strcmp(argv[p], "-max") == 0 ){ MaxSignalSurBruit = atof( argv[p+1] ); p += 1; }else if( strcmp(argv[p], "-pas") == 0 ){ PasSignalSurBruit = atof( argv[p+1] ); p += 1; }else if( strcmp(argv[p], "-timer") == 0 ){ STOP_TIMER_SECOND = atoi( argv[p+1] ); p += 1; }else if( strcmp(argv[p], "-iter") == 0 ){ NOMBRE_ITERATIONS = atoi( argv[p+1] ); p += 1; }else if( strcmp(argv[p], "-fer") == 0 ){ FRAME_ERROR_LIMIT = atoi( argv[p+1] ); p += 1; }else if( strcmp(argv[p], "-qef") == 0 ){ BER_SIMULATION_LIMIT = true; BIT_ERROR_LIMIT = ( atof( argv[p+1] ) ); p += 1; }else if( strcmp(argv[p], "-bpsk") == 0 ){ QPSK_CHANNEL = false; }else if( strcmp(argv[p], "-qpsk") == 0 ){ QPSK_CHANNEL = true; }else if( strcmp(argv[p], "-Eb/N0") == 0 ){ Es_N0 = false; }else if( strcmp(argv[p], "-Es/N0") == 0 ){ Es_N0 = true; }else if( strcmp(argv[p], "-n") == 0 ){ NB_THREAD_ON_GPU = atoi( argv[p+1] ); p += 1; }else if( strcmp(argv[p], "-fMS") == 0 ){ type = "fMS"; }else if( strcmp(argv[p], "-xMS") == 0 ){ type = "xMS"; }else if( strcmp(argv[p], "-MS") == 0 ){ type = "MS"; }else if( strcmp(argv[p], "-OMS") == 0 ){ type = "OMS"; }else if( strcmp(argv[p], "-NMS") == 0 ){ type = "NMS"; }else if( strcmp(argv[p], "-2NMS") == 0 ){ type = "2NMS"; }else if( strcmp(argv[p], "-info") == 0 ){ show_info(); exit( 0 ); }else{ printf("(EE) Unknown argument (%d) => [%s]\n", p, argv[p]); exit(0); } } double rendement = (double)(NmoinsK)/(double)(_N); printf("(II) Code LDPC (N, K) : (%d,%d)\n", _N, _K); printf("(II) Rendement du code : %.3f\n", rendement); printf("(II) # ITERATIONs du CODE : %d\n", NOMBRE_ITERATIONS); printf("(II) FER LIMIT FOR SIMU : %d\n", FRAME_ERROR_LIMIT); printf("(II) SIMULATION RANGE : [%.2f, %.2f], STEP = %.2f\n", MinSignalSurBruit, MaxSignalSurBruit, PasSignalSurBruit); printf("(II) MODE EVALUATION : %s\n", ((Es_N0)?"Es/N0":"Eb/N0") ); printf("(II) MIN-SUM ALGORITHM : %s\n", type ); printf("(II) FAST STOP MODE : %d\n", QUICK_STOP); CTimer simu_timer(true); // // ON CREE AUTANT DE TRAMES QUE L'ON A DE THREADS // CTrame simu_data(_N, _K, NB_THREAD_ON_GPU); CGPUDecoder* decoder; if( strcmp(type, "fMS") == 0 ){ decoder = new CGPU_Decoder_MS_SIMD( NB_THREAD_ON_GPU, _N, _K, _M ); }else if( strcmp(type, "MS") == 0 ){ decoder = new CGPU_Decoder_MS_SIMD( NB_THREAD_ON_GPU, _N, _K, _M ); }else if( strcmp(type, "OMS") == 0 ){ decoder = new CGPU_Decoder_OMS_SIMD( NB_THREAD_ON_GPU, _N, _K, _M ); }else if( strcmp(type, "NMS") == 0 ){ decoder = new CGPU_Decoder_NMS_SIMD( NB_THREAD_ON_GPU, _N, _K, _M ); }else if( strcmp(type, "2NMS") == 0 ){ decoder = new CGPU_Decoder_2NMS_SIMD( NB_THREAD_ON_GPU, _N, _K, _M ); }else if( strcmp(type, "xMS") == 0 ){ decoder = new CGPU_Decoder_MS_SIMD_v2( NB_THREAD_ON_GPU, _N, _K, _M ); }else{ printf("(EE) Requested decoder does not exist !\n"); exit( 0 ); } decoder->initialize(); CChanel_AWGN_SIMD noise(&simu_data, 4, QPSK_CHANNEL, Es_N0); Eb_N0 = MinSignalSurBruit; int temps = 0, fdecoding = 0; while (Eb_N0 <= MaxSignalSurBruit){ // // ON CREE UN OBJET POUR LA MESURE DU TEMPS DE SIMULATION (REMISE A ZERO POUR CHAQUE Eb/N0) // CTimer temps_ecoule(true); CTimer term_refresh(true); noise.configure( Eb_N0 ); CErrorAnalyzer errCounters(&simu_data, FRAME_ERROR_LIMIT, false, false); CErrorAnalyzer errCounter (&simu_data, FRAME_ERROR_LIMIT, true, true); // // ON CREE L'OBJET EN CHARGE DES INFORMATIONS DANS LE TERMINAL UTILISATEUR // CTerminal terminal(&errCounters, &temps_ecoule, Eb_N0); // // ON GENERE LA PREMIERE TRAME BRUITEE // noise.generate(); errCounter.store_enc_bits(); while( 1 ){ // // ON LANCE LE TRAITEMENT SUR PLUSIEURS THREAD... // CTimer essai(true); decoder->decode( simu_data.get_t_noise_data(), simu_data.get_t_decode_data(), NOMBRE_ITERATIONS ); temps += essai.get_time_ms(); fdecoding += 1; #pragma omp sections { #pragma omp section { noise.generate(); } #pragma omp section { errCounter.generate(); } } // // ON COMPTE LE NOMBRE D'ERREURS DANS LA TRAME DECODE // errCounters.reset_internals(); errCounters.accumulate( &errCounter ); // // ON compare le Frame Error avec la limite imposee par l'utilisateur. Si on depasse // alors on affiche les resultats sur Eb/N0 courant. // if ( errCounters.fe_limit_achieved() == true ){ break; } // // AFFICHAGE A L'ECRAN DE L'EVOLUTION DE LA SIMULATION SI NECESSAIRE // if( term_refresh.get_time_sec() >= 1 ){ term_refresh.reset(); terminal.temp_report(); } if( (simu_timer.get_time_sec() >= STOP_TIMER_SECOND) && (STOP_TIMER_SECOND != -1) ){ printf("(II) THE SIMULATION HAS STOP DUE TO THE (USER) TIME CONTRAINT.\n"); printf("(II) PERFORMANCE EVALUATION WAS PERFORMED ON %d RUNS, TOTAL TIME = %dms\n", fdecoding, temps); temps /= fdecoding; printf("(II) + TIME / RUN = %dms\n", temps); int workL = 4 * NB_THREAD_ON_GPU; int kbits = workL * _N / temps ; float mbits = ((float)kbits) / 1000.0; printf("(II) + DECODER LATENCY (ms) = %d\n", temps); printf("(II) + DECODER THROUGHPUT (Mbps)= %.1f\n", mbits); printf("(II) + (%.2fdB, %dThd : %dCw, %dits) THROUGHPUT = %.1f\n", Eb_N0, NB_THREAD_ON_GPU, workL, NOMBRE_ITERATIONS, mbits); cout << endl << "Temps = " << temps << "ms : " << kbits; cout << "kb/s : " << ((float)temps/NB_THREAD_ON_GPU) << "ms/frame" << endl << endl; break; } } terminal.final_report(); if( (simu_timer.get_time_sec() >= STOP_TIMER_SECOND) && (STOP_TIMER_SECOND != -1) ){ break; } Eb_N0 = Eb_N0 + PasSignalSurBruit; if( BER_SIMULATION_LIMIT == true ){ if( errCounters.ber_value() < BIT_ERROR_LIMIT ){ printf("(II) THE SIMULATION HAS STOP DUE TO THE (USER) QUASI-ERROR FREE CONTRAINT.\n"); break; } } } //printf("(II) Simulation is now terminated !\n"); //delete decoder; //printf("(II) Simulation is now terminated !\n"); return 0; }
int main(int argc, char* argv[]) { srand(0); printf("(II) LDPC DECODER - Scheduled Fixed-point decoder (SSE version - 16 frames)\n"); printf("(II) MANIPULATION DE DONNEES (Fixed Point, %d bits)\n", 8 * sizeof (int)); printf("(II) GENEREE : %s - %s\n", __DATE__, __TIME__); param_simulation p_simulation; p_simulation.snr_min = 0.5; p_simulation.snr_max = 3.0; p_simulation.snr_pas = 0.1; p_simulation.fe_limit = 200; p_simulation.channel_type = 0; p_simulation.norm_channel = false; p_simulation.ber_limit_value = 1.0e-10; p_simulation.fer_limit_value = 1.0e-10; p_simulation.llr_optimization = false; p_simulation.real_encoder = false; p_simulation.qpsk_channel = false; p_simulation.Es_N0 = false; p_simulation.worst_case_fer = false; param_decoder p_decoder; p_decoder.early_term = false; p_decoder.nb_iters = 30; p_decoder.nms_factor_fixed = 24; p_decoder.nms_factor_float = 0.75; p_decoder.oms_offset_fixed = 1; p_decoder.oms_offset_float = 0.15; int NOMBRE_ITERATIONS = 30; int STOP_TIMER_SECOND = -1; int nb_frames = 16; // DUE TO SIMD MODE // // ON CONFIGURE LE NOMBRE DE THREAD A UTILISER PAR DEFAUT // int NUM_ACTIVE_THREADS = 1; #ifndef __clang__ omp_set_num_threads(NUM_ACTIVE_THREADS); #endif // // ON VA PARSER LES ARGUMENTS DE LIGNE DE COMMANDE // for (int p = 1; p < argc; p++) { // // REGLAGE DES PARAMETRES DE SIMULATION // if (strcmp(argv[p], "-min") == 0) { p_simulation.snr_min = atof(argv[p + 1]); p += 1; } else if (strcmp(argv[p], "-max") == 0) { p_simulation.snr_max = atof(argv[p + 1]); p += 1; } else if (strcmp(argv[p], "-pas") == 0) { p_simulation.snr_pas = atof(argv[p + 1]); p += 1; } else if (strcmp(argv[p], "-fer") == 0) { p_simulation.fe_limit = atoi(argv[p + 1]); p += 1; } else if (strcmp(argv[p], "-wc_fer") == 0) { p_simulation.worst_case_fer = true; } else if (strcmp(argv[p], "-timer") == 0) { STOP_TIMER_SECOND = atoi(argv[p + 1]); p += 1; } else if (strcmp(argv[p], "-qef") == 0) { p_simulation.ber_limit = true; p_simulation.ber_limit_value = (atof(argv[p + 1])); p += 1; } else if (strcmp(argv[p], "-tfer") == 0) { p_simulation.fer_limit = true; p_simulation.fer_limit_value = (atof(argv[p + 1])); p += 1; } else if (strcmp(argv[p], "-bpsk") == 0) { p_simulation.qpsk_channel = false; } else if (strcmp(argv[p], "-qpsk") == 0) { p_simulation.qpsk_channel = true; } else if (strcmp(argv[p], "-Eb/N0") == 0) { p_simulation.Es_N0 = false; } else if (strcmp(argv[p], "-Es/N0") == 0) { p_simulation.Es_N0 = true; } else if (strcmp(argv[p], "-norm-channel") == 0) { p_simulation.norm_channel = true; } else if (strcmp(argv[p], "-awgn_jego") == 0) { p_simulation.channel_type = 0; } else if (strcmp(argv[p], "-awgn") == 0) { p_simulation.channel_type = 1; } else if (strcmp(argv[p], "-Rayleigh_Fading") == 0) { p_simulation.channel_type = 2; } else if (strcmp(argv[p], "-no-channel") == 0) { p_simulation.channel_type = -1; // // REGLAGE DES DU MODELE DU CANAL // } else if (strcmp(argv[p], "-ollr") == 0) { p_simulation.llr_optimization = true; } else if (strcmp(argv[p], "-encoder") == 0) { p_simulation.real_encoder = true; #ifndef __clang__ } else if (strcmp(argv[p], "-thread") == 0) { int nThreads = atoi(argv[p + 1]); if (nThreads > 4) { printf("(WW) Number of thread can be higher than 4 => Using 4 threads."); NUM_ACTIVE_THREADS = 4; } else if (nThreads < 1) { printf("(WW) Number of thread can be lower than 1 => Using 1 thread."); NUM_ACTIVE_THREADS = 1; } else { NUM_ACTIVE_THREADS = nThreads; } omp_set_num_threads(NUM_ACTIVE_THREADS); p += 1; #endif // // INITIALISATION ALEATOIRE DU GENERATEUR ALEATOIRE // } else if (strcmp(argv[p], "-random") == 0) { printf("(II) Random Generator REAL initialization\n"); srand(time(NULL)); } else if (strcmp(argv[p], "-iter") == 0) { NOMBRE_ITERATIONS = atoi(argv[p + 1]); p_decoder.nb_iters = atoi(argv[p + 1]); p += 1; // // SPECIFICATION DU FORMAT DE CODAGE DES DONNEES EN MODE FIXED-POINT // } else if (strcmp(argv[p], "-var") == 0) { vSAT_NEG_VAR = (-(0x0001 << (atoi(argv[p + 1]) - 1)) + 1); vSAT_POS_VAR = ( (0x0001 << (atoi(argv[p + 1]) - 1)) - 1); BITS_VAR = atoi(argv[p + 1]); p += 1; } else if (strcmp(argv[p], "-msg") == 0) { vSAT_NEG_MSG = (-(0x0001 << (atoi(argv[p + 1]) - 1)) + 1); vSAT_POS_MSG = ( (0x0001 << (atoi(argv[p + 1]) - 1)) - 1); BITS_MSG = atoi(argv[p + 1]); p += 1; } else if (strcmp(argv[p], "-llr") == 0) { vSAT_NEG_LLR = (-(0x0001 << (atoi(argv[p + 1]) - 1)) + 1); vSAT_POS_LLR = ((0x0001 << (atoi(argv[p + 1]) - 1)) - 1); BITS_LLR = atoi(argv[p + 1]); vFRAQ_LLR = BITS_LLR / 2; FACTEUR_BETA = (0x0001 << (vFRAQ_LLR)); p += 1; } else if (strcmp(argv[p], "-fraq") == 0) { vFRAQ_LLR = atoi(argv[p + 1]); FACTEUR_BETA = (0x0001 << (vFRAQ_LLR)); p += 1; } else { printf("(EE) Unknown argument (%d) => [%s]\n", p, argv[p]); exit(0); } } double rendement = (float) (INFORMATION) / (float) (NOEUD); printf("(II) NUMBER OF // THREAD : %d\n", NUM_ACTIVE_THREADS); printf("(II) Code LDPC (N, N-K, K, M): (%d, %d, %d, %d)\n", NOEUD, PARITE, INFORMATION, MESSAGE); printf("(II) Rendement du code : %.3f\n", rendement); printf("(II) # ITERATIONs du CODE : %d\n", NOMBRE_ITERATIONS); printf("(II) FER LIMIT FOR SIMU : %d\n", p_simulation.fe_limit); printf("(II) SIMULATION RANGE : [%.2f, %.2f], STEP = %.2f\n", p_simulation.snr_min, p_simulation.snr_max, p_simulation.snr_pas); printf("(II) FAST STOP MODE : %d\n", p_decoder.early_term); printf("(II) LLR DATA Q(%d,%d) : %d bits [%d, %d]\n", (BITS_LLR - vFRAQ_LLR), (vFRAQ_LLR), BITS_LLR, vSAT_NEG_LLR, vSAT_POS_LLR); printf("(II) MESSAGE Q(%d,%d) : %d bits [%d, %d]\n", (BITS_MSG - vFRAQ_LLR), (vFRAQ_LLR), BITS_MSG, vSAT_NEG_MSG, vSAT_POS_MSG); printf("(II) VARIABLE Q(%d,%d) : %d bits [%d, %d]\n", (BITS_VAR - vFRAQ_LLR), (vFRAQ_LLR), BITS_VAR, vSAT_NEG_VAR, vSAT_POS_VAR); printf("(II) OFFSET FACTOR : %f\n", p_decoder.oms_offset_float); CTimer simu_timer(true); // // ALLOCATION DYNAMIQUE DES DONNESS NECESSAIRES A LA SIMULATION DU SYSTEME // CTrame* simu_data[MAX_THREADS]; for(int i=0; i<4; i++) { simu_data[i] = new CTrame(NOEUD, PARITE, nb_frames); } CDecoder* decoder[MAX_THREADS]; for(int i=0; i<4; i++) { decoder[i] = CreateDecoder(p_decoder, vSAT_NEG_VAR, vSAT_POS_VAR, vSAT_NEG_MSG, vSAT_POS_MSG/*, msOffset, msFactor, OFFSET_FACTOR, NORMALIZED_FACTOR*/); } Encoder *encoder[MAX_THREADS]; for(int i=0; i<4; i++) { encoder[i] = EncoderLibrary(p_simulation.real_encoder, simu_data[i]); } CChanel* noise[MAX_THREADS]; for(int i=0; i<4; i++) { noise[i] = CreateChannel(simu_data[i], p_simulation.qpsk_channel, p_simulation.Es_N0); noise[i]->setNormalize( p_simulation.norm_channel ); } // // ON CREE L'OBJET EN CHARGE DE LA CONVERSION EN VIRGULE FIXE DE L'INFORMATION DU CANAL // CFixConversion* conv_fp[MAX_THREADS]; CErrorAnalyzer* errCounter[MAX_THREADS]; double Eb_N0 = p_simulation.snr_min; while (Eb_N0 <= p_simulation.snr_max) { // // ON CREE LE CANAL DE COMMUNICATION (BRUIT GAUSSIEN) // for(int i=0; i<4; i++){ noise[i]->configure(Eb_N0); } for(int i=0; i<4; i++){ decoder[i]->setSigmaChannel(noise[i]->get_SigB()); } // if (p_simulation.llr_optimization == 0) { for(int i=0; i<4; i++){ conv_fp[i] = new CFastFixConversion(simu_data[i], FACTEUR_BETA, vSAT_NEG_LLR, vSAT_POS_LLR); } // } else { // for(int i=0; i<4; i++){ // conv_fp[i] = new COptimFixConversion(simu_data[i], noise[i]->get_R(), vSAT_NEG_LLR, vSAT_POS_LLR); // } // } bool auto_fe_mode = false; CErrorAnalyzer errCounters (simu_data[0], p_simulation.fe_limit, auto_fe_mode, p_simulation.worst_case_fer); for(int i=0; i<4; i++){ errCounter[i] = new CErrorAnalyzer(simu_data[i], p_simulation.fe_limit, auto_fe_mode, p_simulation.worst_case_fer); } // ON GENERE LA PREMIERE TRAME BRUITEE for(int i=0; i<4; i++){ encoder[i]->encode(); } for(int i=0; i<4; i++){ noise[i]->generate(); } for(int i=0; i<4; i++){ conv_fp[i]->generate(); } for(int i=0; i<4; i++){ errCounter[i]->store_enc_bits(); } // // ON CREE UN OBJET POUR LA MESURE DU TEMPS DE SIMULATION (REMISE A ZERO POUR CHAQUE Eb/N0) // CTimer temps_ecoule(true); // // ON CREE L'OBJET EN CHARGE DES INFORMATIONS DANS LE TERMINAL UTILISATEUR // CTerminal terminal(&errCounters, &temps_ecoule, Eb_N0); CTimer timer[MAX_THREADS]; long int etime[MAX_THREADS] = {0, 0, 0, 0}; while (1) { const int maxLoopF = 32768 / nb_frames; int loopf = (8 * NUM_ACTIVE_THREADS) * (64800 / NOEUD); loopf = loopf > maxLoopF ? maxLoopF: loopf; loopf = 32; int d1[maxLoopF], d2[maxLoopF], d3[maxLoopF], d4[maxLoopF]; int f1[maxLoopF], f2[maxLoopF], f3[maxLoopF], f4[maxLoopF]; #pragma omp parallel sections //num_threads(NUM_ACTIVE_THREADS) { #pragma omp section { for (int q = 0; q < loopf; q++) { float *f_llr = simu_data[0]->get_t_noise_data(); // [NOEUD]; signed char *i_llr = (signed char*)simu_data[0]->get_t_fpoint_data(); // [NOEUD]; signed char *o_llr = (signed char*)simu_data[0]->get_t_decode_data(); // [NOEUD]; timer[0].start(); decoder[0]->decode(f_llr, o_llr, NOMBRE_ITERATIONS); decoder[0]->decode(i_llr, o_llr, NOMBRE_ITERATIONS); timer[0].stop(); etime[0] += timer[0].get_time_us(); encoder[0]->encode(); noise[0]->generate(); // ON GENERE LE BRUIT DU CANAL conv_fp[0]->generate(); // ON CONVERTIT LES DONNEES EN VIRGULE FIXE int q1 = errCounter[0]->nb_be(); int fr = errCounter[0]->nb_fe(); errCounter[0]->generate(); d1[q] = errCounter[0]->nb_be() - q1; f1[q] = errCounter[0]->nb_fe() - fr; errCounter[0]->store_enc_bits(); } } #pragma omp section { for (int q = 0; q < loopf; q++) { float *f_llr = simu_data[1]->get_t_noise_data(); // [NOEUD]; signed char *i_llr = (signed char*)simu_data[1]->get_t_fpoint_data(); // [NOEUD]; signed char *o_llr = (signed char*)simu_data[1]->get_t_decode_data(); // [NOEUD]; timer[1].start(); decoder[1]->decode(f_llr, o_llr, NOMBRE_ITERATIONS); decoder[1]->decode(i_llr, o_llr, NOMBRE_ITERATIONS); timer[1].stop(); etime[1] += timer[1].get_time_us(); encoder[1]->encode(); noise[1]->generate(); // ON GENERE LE BRUIT DU CANAL conv_fp[1]->generate(); // ON CONVERTIT LES DONNEES EN VIRGULE FIXE int fr = errCounter[1]->nb_fe(); int q2 = errCounter[1]->nb_be(); errCounter[1]->generate(); d2[q] = errCounter[1]->nb_be() - q2; f2[q] = errCounter[1]->nb_fe() - fr; errCounter[1]->store_enc_bits(); } } #pragma omp section { for (int q = 0; q < loopf; q++) { float *f_llr = simu_data[2]->get_t_noise_data(); // [NOEUD]; signed char *i_llr = (signed char*)simu_data[2]->get_t_fpoint_data(); // [NOEUD]; signed char *o_llr = (signed char*)simu_data[2]->get_t_decode_data(); // [NOEUD]; timer[2].start(); decoder[2]->decode(f_llr, o_llr, NOMBRE_ITERATIONS); decoder[2]->decode(i_llr, o_llr, NOMBRE_ITERATIONS); timer[2].stop(); etime[2] += timer[2].get_time_us(); encoder[2]->encode(); noise[2]->generate(); // ON GENERE LE BRUIT DU CANAL conv_fp[2]->generate(); // ON CONVERTIT LES DONNEES EN VIRGULE FIXE int q3 = errCounter[2]->nb_be(); int fr = errCounter[2]->nb_fe(); errCounter[2]->generate(); d3[q] = errCounter[2]->nb_be() - q3; f3[q] = errCounter[2]->nb_fe() - fr; errCounter[2]->store_enc_bits(); } } #pragma omp section { for (int q = 0; q < loopf; q++) { float *f_llr = simu_data[3]->get_t_noise_data(); // [NOEUD]; signed char *i_llr = (signed char*)simu_data[3]->get_t_fpoint_data(); // [NOEUD]; signed char *o_llr = (signed char*)simu_data[3]->get_t_decode_data(); // [NOEUD]; timer[3].start(); decoder[3]->decode(f_llr, o_llr, NOMBRE_ITERATIONS); decoder[3]->decode(i_llr, o_llr, NOMBRE_ITERATIONS); timer[3].stop(); etime[3] += timer[3].get_time_us(); encoder[3]->encode(); noise[3]->generate(); // ON GENERE LE BRUIT DU CANAL conv_fp[3]->generate(); // ON CONVERTIT LES DONNEES EN VIRGULE FIXE int q4 = errCounter[3]->nb_be(); int fr = errCounter[3]->nb_fe(); errCounter[3]->generate(); d4[q] = errCounter[3]->nb_be() - q4; f4[q] = errCounter[3]->nb_fe() - fr; errCounter[3]->store_enc_bits(); } } } // // ON COMPTE LE NOMBRE D'ERREURS DANS LA TRAME DECODE // for (int q = 0; q < loopf; q++) { int diff = ((f1[q] - 1) > 0) ? (f1[q] - 1) : 0; errCounters.generate(d1[q] - diff); for (int z = 1; z < nb_frames; z++) errCounters.generate(f1[q] > z ? 1 : 0); diff = ((f2[q] - 1) > 0) ? (f2[q] - 1) : 0; errCounters.generate(d2[q] - diff); for (int z = 1; z < nb_frames; z++) errCounters.generate(f2[q] > z ? 1 : 0); diff = ((f3[q] - 1) > 0) ? (f3[q] - 1) : 0; errCounters.generate(d3[q] - diff); for (int z = 1; z < nb_frames; z++) errCounters.generate(f3[q] > z ? 1 : 0); diff = ((f4[q] - 1) > 0) ? (f4[q] - 1) : 0; errCounters.generate(d4[q] - diff); for (int z = 1; z < nb_frames; z++) errCounters.generate(f4[q] > z ? 1 : 0); } // errCounter.store_enc_bits(); // // ON compare le Frame Error avec la limite imposee par l'utilisateur. Si on depasse // alors on affiche les resultats sur Eb/N0 courant. // if (errCounters.fe_limit_achieved() == true) { break; } // // ON REGARDE SI L'UTILISATEUR A LIMITE LE TEMPS DE SIMULATION... // if ((simu_timer.get_time_sec() >= STOP_TIMER_SECOND) && (STOP_TIMER_SECOND != -1)) { break; } // // AFFICHAGE A L'ECRAN DE L'EVOLUTION DE LA SIMULATION SI NECESSAIRE // //if( (errCounter.nb_processed_frames() % 50) == 0 ) //{ terminal.temp_report(); //} //printf("loop\n"); } terminal.final_report(); if (STOP_TIMER_SECOND != -1) { printf("(PERF) H. LAYERED %d fixed, %dx%d LDPC code, %d its, %d threads, %d early stop\n", nb_frames, NOEUD, PARITE, NOMBRE_ITERATIONS, NUM_ACTIVE_THREADS, p_decoder.early_term); float sum = 0.0; for (int z = 0; z < NUM_ACTIVE_THREADS; z++) { float nf = (errCounters.nb_processed_frames() / 4); // 4 car 4 threads... float nb = ((nf) * (1000000.0 / etime[z]) * NOEUD) / 1000.0 / 1000.0; printf("(PERF) Kernel Execution time = %ld us for %.0f frames => %1.3f Mbps\n", etime[z], nf, nb); sum += nb; } // float latency = 2.0 * (1.0 / sum) * nb_frames * 1000.0; // en us float latenc1 = etime[0] * nb_frames / (errCounters.nb_processed_frames()/4); // en us printf("(PERF) SNR = %.2f, ITERS = %d, LATENCY = %1.3f us\n", Eb_N0, NOMBRE_ITERATIONS, latenc1); // printf("(PERF) SNR = %.2f, ITERS = %d, LATENCY = %1.3f us\n", Eb_N0, NOMBRE_ITERATIONS, latency); printf("(PERF) SNR = %.2f, ITERS = %d, THROUGHPUT = %1.3f Mbps\n", Eb_N0, NOMBRE_ITERATIONS, sum); printf("(PERF) Total Kernel throughput = %1.3f Mbps\n", sum); } Eb_N0 = Eb_N0 + p_simulation.snr_pas; if ((simu_timer.get_time_sec() >= STOP_TIMER_SECOND) && (STOP_TIMER_SECOND != -1)) { printf("(II) THE SIMULATION HAS STOP DUE TO THE (USER) TIME CONTRAINT.\n"); break; } if (p_simulation.ber_limit == true) { if (errCounters.ber_value() < p_simulation.ber_limit_value) { printf("(II) THE SIMULATION HAS STOP DUE TO THE (USER) QUASI-ERROR FREE CONTRAINT (on BER).\n"); break; } } if (p_simulation.fer_limit == true) { if (errCounters.fer_value() < p_simulation.fer_limit_value) { printf("(II) THE SIMULATION HAS STOP DUE TO THE (USER) QUASI-ERROR FREE CONTRAINT (on FER).\n"); break; } } } //////////////////////////////////////////////////////////////////////////////// // // // SECOND EVALUATION OF THE THROUGHPUT WITHOUT ENCODED FRAME REGENERATION // // if( 0 ) { int exec = 0; const int t_eval = STOP_TIMER_SECOND; // // ONE THREAD MODE // if (NUM_ACTIVE_THREADS == 1) { CTimer t_Timer1(true); while (t_Timer1.get_time_sec() < t_eval) { for (int qq = 0; qq < 20; qq++) { // to limit timer runtime impact on performances (for very small LDPC codes) // Indeed, depending on OS and CTimer implementations, time read can be long... decoder[0]->decode(simu_data[0]->get_t_noise_data(), simu_data[0]->get_t_decode_data(), NOMBRE_ITERATIONS); exec += 1; } } t_Timer1.stop(); float debit = _N * ((exec * nb_frames ) / ((float) t_Timer1.get_time_sec())); debit /= 1000000.0f; printf("(PERF1) LDPC decoder air throughput = %1.6f Mbps\n", debit); } // // TWO THREAD MODE // if (NUM_ACTIVE_THREADS == 2) { exec = 0; omp_set_num_threads(2); CTimer t_Timer2(true); while (t_Timer2.get_time_sec() < t_eval) { const int looper = 20; #pragma omp parallel sections { #pragma omp section { for (int qq = 0; qq < looper; qq++) decoder[0]->decode(simu_data[0]->get_t_noise_data(), simu_data[1]->get_t_decode_data(), NOMBRE_ITERATIONS); } #pragma omp section { for (int qq = 0; qq < looper; qq++) decoder[1]->decode(simu_data[1]->get_t_noise_data(), simu_data[2]->get_t_decode_data(), NOMBRE_ITERATIONS); } } exec += 2 * looper; } t_Timer2.stop(); // for each decoder run, we decoded nb_frames codewords (depending on the SIMD width) float debit = _N * ((exec * nb_frames) / ((float) t_Timer2.get_time_sec())); debit /= 1000000.0f; printf("(PERF2) LDPC decoder air throughput = %1.3f Mbps\n", debit); } // // THREE THREAD MODE // if (NUM_ACTIVE_THREADS == 3) { exec = 0; omp_set_num_threads(3); CTimer t_Timer3(true); while (t_Timer3.get_time_sec() < t_eval) { const int looper = 20; #pragma omp parallel sections { #pragma omp section { for (int qq = 0; qq < looper; qq++) decoder[0]->decode(simu_data[0]->get_t_noise_data(), simu_data[1]->get_t_decode_data(), NOMBRE_ITERATIONS); } #pragma omp section { for (int qq = 0; qq < looper; qq++) decoder[1]->decode(simu_data[1]->get_t_noise_data(), simu_data[2]->get_t_decode_data(), NOMBRE_ITERATIONS); } #pragma omp section { for (int qq = 0; qq < looper; qq++) decoder[2]->decode(simu_data[2]->get_t_noise_data(), simu_data[3]->get_t_decode_data(), NOMBRE_ITERATIONS); } } exec += 4 * looper; } t_Timer3.stop(); float debit = _N * ((exec * nb_frames) / ((float) t_Timer3.get_time_sec())); debit /= 1000000.0f; printf("(PERF4) LDPC decoder air throughput = %1.3f Mbps\n", debit); } // // FOUR THREAD MODE // if (NUM_ACTIVE_THREADS == 4) { exec = 0; omp_set_num_threads(4); CTimer t_Timer3(true); while (t_Timer3.get_time_sec() < t_eval) { const int looper = 20; #pragma omp parallel sections { #pragma omp section { for (int qq = 0; qq < looper; qq++) decoder[0]->decode(simu_data[0]->get_t_noise_data(), simu_data[1]->get_t_decode_data(), NOMBRE_ITERATIONS); } #pragma omp section { for (int qq = 0; qq < looper; qq++) decoder[1]->decode(simu_data[1]->get_t_noise_data(), simu_data[2]->get_t_decode_data(), NOMBRE_ITERATIONS); } #pragma omp section { for (int qq = 0; qq < looper; qq++) decoder[2]->decode(simu_data[2]->get_t_noise_data(), simu_data[3]->get_t_decode_data(), NOMBRE_ITERATIONS); } #pragma omp section { for (int qq = 0; qq < looper; qq++) decoder[3]->decode(simu_data[3]->get_t_noise_data(), simu_data[4]->get_t_decode_data(), NOMBRE_ITERATIONS); } } exec += 4 * looper; } t_Timer3.stop(); float debit = _N * ((exec * nb_frames) / ((float) t_Timer3.get_time_sec())); debit /= 1000000.0f; printf("(PERF4) LDPC decoder air throughput = %1.3f Mbps\n", debit); } exit(0); } // ON FAIT LE MENAGE PARMIS TOUS LES OBJETS CREES DYNAMIQUEMENT... for(int i=0; i<4; i++){ delete simu_data[i]; delete noise[i]; delete decoder[i]; delete encoder[i]; delete errCounter[i]; delete conv_fp[i]; } return 1; }