int main(int argc, char* argv[])
{
	int p;
    srand( 0 );
	printf("(II) LDPC DECODER - Flooding scheduled decoder\n");
	printf("(II) MANIPULATION DE DONNEES (IEEE-754 - %ld bits)\n", (long int)8*sizeof(int));
	printf("(II) GENEREE : %s - %s\n", __DATE__, __TIME__);

	double Eb_N0;
	double MinSignalSurBruit  = 0.50;
	double MaxSignalSurBruit  = 3.01;
	double PasSignalSurBruit  = 0.10;
    int    NOMBRE_ITERATIONS  = 20;
	int    STOP_TIMER_SECOND  = -1;
	bool   QPSK_CHANNEL       = false;
    bool   Es_N0              = false; // FALSE => MODE Eb_N0
    int    NB_THREAD_ON_GPU   = 1024;;
	int    FRAME_ERROR_LIMIT  =  200;

	char  defDecoder[] = "fMS";
    const char* type = defDecoder;

    cudaSetDevice(0);
    cudaDeviceSynchronize();
    cudaThreadSynchronize();

	//
	// ON VA PARSER LES ARGUMENTS DE LIGNE DE COMMANDE
	//
	for (p=1; p<argc; p++) {
		if( strcmp(argv[p], "-min") == 0 ){
			MinSignalSurBruit = atof( argv[p+1] );
			p += 1;

		}else if( strcmp(argv[p], "-max") == 0 ){
			MaxSignalSurBruit = atof( argv[p+1] );
			p += 1;

		}else if( strcmp(argv[p], "-pas") == 0 ){
			PasSignalSurBruit = atof( argv[p+1] );
			p += 1;

		}else if( strcmp(argv[p], "-timer") == 0 ){
			STOP_TIMER_SECOND = atoi( argv[p+1] );
			p += 1;

		}else if( strcmp(argv[p], "-iter") == 0 ){
			NOMBRE_ITERATIONS = atoi( argv[p+1] );
			p += 1;

		}else if( strcmp(argv[p], "-fer") == 0 ){
			FRAME_ERROR_LIMIT = atoi( argv[p+1] );
			p += 1;

		}else if( strcmp(argv[p], "-qef") == 0 ){
			BER_SIMULATION_LIMIT =  true;
			BIT_ERROR_LIMIT      = ( atof( argv[p+1] ) );
			p += 1;

		}else if( strcmp(argv[p], "-bpsk") == 0 ){
			QPSK_CHANNEL = false;

		}else if( strcmp(argv[p], "-qpsk") == 0 ){
			QPSK_CHANNEL = true;

		}else if( strcmp(argv[p], "-Eb/N0") == 0 ){
			Es_N0 = false;

		}else if( strcmp(argv[p], "-Es/N0") == 0 ){
			Es_N0 = true;

		}else if( strcmp(argv[p], "-n") == 0 ){
			NB_THREAD_ON_GPU = atoi( argv[p+1] );
			p += 1;

		}else if( strcmp(argv[p], "-fMS") == 0 ){
			type      = "fMS";

		}else if( strcmp(argv[p], "-xMS") == 0 ){
			type      = "xMS";

		}else if( strcmp(argv[p], "-MS") == 0 ){
			type      = "MS";

		}else if( strcmp(argv[p], "-OMS") == 0 ){
			type      = "OMS";

		}else if( strcmp(argv[p], "-NMS") == 0 ){
			type      = "NMS";

		}else if( strcmp(argv[p], "-2NMS") == 0 ){
			type      = "2NMS";

		}else if( strcmp(argv[p], "-info") == 0 ){
			show_info();
			exit( 0 );

		}else{
			printf("(EE) Unknown argument (%d) => [%s]\n", p, argv[p]);
			exit(0);
		}
	}

	double rendement = (double)(NmoinsK)/(double)(_N);
	printf("(II) Code LDPC (N, K)     : (%d,%d)\n", _N, _K);
	printf("(II) Rendement du code    : %.3f\n", rendement);
	printf("(II) # ITERATIONs du CODE : %d\n", NOMBRE_ITERATIONS);
    printf("(II) FER LIMIT FOR SIMU   : %d\n", FRAME_ERROR_LIMIT);
	printf("(II) SIMULATION  RANGE    : [%.2f, %.2f], STEP = %.2f\n", MinSignalSurBruit,  MaxSignalSurBruit, PasSignalSurBruit);
	printf("(II) MODE EVALUATION      : %s\n", ((Es_N0)?"Es/N0":"Eb/N0") );
	printf("(II) MIN-SUM ALGORITHM    : %s\n", type );
	printf("(II) FAST STOP MODE       : %d\n", QUICK_STOP);

	CTimer simu_timer(true);


	//
	// ON CREE AUTANT DE TRAMES QUE L'ON A DE THREADS
	//
	CTrame simu_data(_N, _K, NB_THREAD_ON_GPU);

	CGPUDecoder* decoder;
	if( strcmp(type, "fMS") == 0 ){
			decoder = new CGPU_Decoder_MS_SIMD( NB_THREAD_ON_GPU, _N, _K, _M );
	}else if( strcmp(type, "MS") == 0 ){
		decoder = new CGPU_Decoder_MS_SIMD( NB_THREAD_ON_GPU, _N, _K, _M );
	}else if( strcmp(type, "OMS") == 0 ){
		decoder = new CGPU_Decoder_OMS_SIMD( NB_THREAD_ON_GPU, _N, _K, _M );
	}else if( strcmp(type, "NMS") == 0 ){
		decoder = new CGPU_Decoder_NMS_SIMD( NB_THREAD_ON_GPU, _N, _K, _M );
	}else if( strcmp(type, "2NMS") == 0 ){
		decoder = new CGPU_Decoder_2NMS_SIMD( NB_THREAD_ON_GPU, _N, _K, _M );
	}else if( strcmp(type, "xMS") == 0 ){
		decoder = new CGPU_Decoder_MS_SIMD_v2( NB_THREAD_ON_GPU, _N, _K, _M );
	}else{
		printf("(EE) Requested decoder does not exist !\n");
		exit( 0 );
	}
	decoder->initialize();


	CChanel_AWGN_SIMD noise(&simu_data, 4, QPSK_CHANNEL, Es_N0);


	Eb_N0 = MinSignalSurBruit;
	int temps = 0, fdecoding = 0;
	while (Eb_N0 <= MaxSignalSurBruit){

        //
        // ON CREE UN OBJET POUR LA MESURE DU TEMPS DE SIMULATION (REMISE A ZERO POUR CHAQUE Eb/N0)
        //
        CTimer temps_ecoule(true);
        CTimer term_refresh(true);

		noise.configure( Eb_N0 );

        CErrorAnalyzer errCounters(&simu_data, FRAME_ERROR_LIMIT, false, false);
        CErrorAnalyzer errCounter (&simu_data, FRAME_ERROR_LIMIT, true,  true);

        //
        // ON CREE L'OBJET EN CHARGE DES INFORMATIONS DANS LE TERMINAL UTILISATEUR
        //
		CTerminal terminal(&errCounters, &temps_ecoule, Eb_N0);

        //
        // ON GENERE LA PREMIERE TRAME BRUITEE
        //
        noise.generate();
        errCounter.store_enc_bits();

		while( 1 ){

			//
			//	ON LANCE LE TRAITEMENT SUR PLUSIEURS THREAD...
			//
			CTimer essai(true);
			decoder->decode( simu_data.get_t_noise_data(), simu_data.get_t_decode_data(), NOMBRE_ITERATIONS );
			temps += essai.get_time_ms();
			fdecoding += 1;
			#pragma omp sections
			{
				#pragma omp section
				{
					noise.generate();
				}
				#pragma omp section
				{
					errCounter.generate();
				}
			}

            //
			// ON COMPTE LE NOMBRE D'ERREURS DANS LA TRAME DECODE
            //
			errCounters.reset_internals();
			errCounters.accumulate( &errCounter );

            //
            // ON compare le Frame Error avec la limite imposee par l'utilisateur. Si on depasse
            // alors on affiche les resultats sur Eb/N0 courant.
            //
			if ( errCounters.fe_limit_achieved() == true ){
               break;
            }

            //
            // AFFICHAGE A L'ECRAN DE L'EVOLUTION DE LA SIMULATION SI NECESSAIRE
            //
			if( term_refresh.get_time_sec() >= 1 ){
				term_refresh.reset();
	           	terminal.temp_report();
			}

			if( (simu_timer.get_time_sec() >= STOP_TIMER_SECOND) && (STOP_TIMER_SECOND != -1) ){
        		printf("(II) THE SIMULATION HAS STOP DUE TO THE (USER) TIME CONTRAINT.\n");
        		printf("(II) PERFORMANCE EVALUATION WAS PERFORMED ON %d RUNS, TOTAL TIME = %dms\n", fdecoding, temps);
				temps /= fdecoding;
        		printf("(II) + TIME / RUN = %dms\n", temps);
        		int   workL = 4 * NB_THREAD_ON_GPU;
        		int   kbits = workL * _N / temps ;
        		float mbits = ((float)kbits) / 1000.0;
        		printf("(II) + DECODER LATENCY (ms)     = %d\n", temps);
        		printf("(II) + DECODER THROUGHPUT (Mbps)= %.1f\n", mbits);
        		printf("(II) + (%.2fdB, %dThd : %dCw, %dits) THROUGHPUT = %.1f\n", Eb_N0, NB_THREAD_ON_GPU, workL, NOMBRE_ITERATIONS, mbits);
				cout << endl << "Temps = " << temps << "ms : " << kbits;
				cout << "kb/s : " << ((float)temps/NB_THREAD_ON_GPU) << "ms/frame" << endl << endl;
        		break;
			}
		}

		terminal.final_report();

        if( (simu_timer.get_time_sec() >= STOP_TIMER_SECOND) && (STOP_TIMER_SECOND != -1) ){
        	break;
        }

		Eb_N0 = Eb_N0 + PasSignalSurBruit;

        if( BER_SIMULATION_LIMIT == true ){
        	if( errCounters.ber_value() < BIT_ERROR_LIMIT ){
        		printf("(II) THE SIMULATION HAS STOP DUE TO THE (USER) QUASI-ERROR FREE CONTRAINT.\n");
        		break;
        	}
        }
	}
    //printf("(II) Simulation is now terminated !\n");
	//delete decoder;
	//printf("(II) Simulation is now terminated !\n");
	return 0;
}
Exemplo n.º 2
0
void* Worker::run() 
{
	for (int i = 0;; i++) 
	{
		printf("thread %lu, loop %d - waiting for item...\n",  (long unsigned int)self(), i);
		WorkItem* item = m_queue.remove();
		printf("thread %lu, loop %d - got one item\n",  (long unsigned int)self(), i);
		{
			int temps = 0, fdecoding = 0;
					//
			// ON CREE UN OBJET POUR LA MESURE DU TEMPS DE SIMULATION (REMISE A ZERO POUR CHAQUE Eb/N0)
			//
			CTimer temps_ecoule(true);
			CTimer term_refresh(true);
			CErrorAnalyzer errCounters(item->getData(), frameErrorLimit, false, false);
			CErrorAnalyzer errCounter (item->getData(), frameErrorLimit, true,  true);

			//
			// ON CREE L'OBJET EN CHARGE DES INFORMATIONS DANS LE TERMINAL UTILISATEUR
			//
			CTerminal terminal(&errCounters, &temps_ecoule, item->getNoise()->getEb_N0());

			//
			// ON GENERE LA PREMIERE TRAME BRUITEE
			//
			item->getNoise()->generate();
			errCounter.store_enc_bits();

			while( 1 )
			{
				//
				//	ON LANCE LE TRAITEMENT SUR PLUSIEURS THREAD...
				//
 					CTimer essai(true);
// 					decoder->decode( item->getData()->get_t_noise_data(), item->getData()->get_t_decode_data(), numberIter );
// 					temps += essai.get_time_ms();
				fdecoding += 1;
				#pragma omp sections
				{
					#pragma omp section
					{
// 							item->getNoise()->generate();
					}
					#pragma omp section
					{
// 							errCounter.generate();
					}
				}

				//
				// ON COMPTE LE NOMBRE D'ERREURS DANS LA TRAME DECODE
				//
// 					errCounters.reset_internals();
// 					errCounters.accumulate( &errCounter );

				//
				// ON compares the frame error with the limits imposed by the user. 
				// If it exceeds then displays the results on Eb / N0 current.
				//
// 					if ( errCounters.fe_limit_achieved() == true ){
// 					break;
// 					}

				//
				// AFFICHAGE A L'ECRAN DE L'EVOLUTION DE LA SIMULATION SI NECESSAIRE
				//
// 					if( term_refresh.get_time_sec() >= 1 ){
// 						term_refresh.reset();
// 						terminal.temp_report();
// 					}

// 					if( (simTotalTimer->get_time_sec() >= STOP_TIMER_SECOND) && (STOP_TIMER_SECOND != -1) )
// 					{
// 						printf("(II) THE SIMULATION HAS STOP DUE TO THE (USER) TIME CONTRAINT.\n");
// 						printf("(II) PERFORMANCE EVALUATION WAS PERFORMED ON %d RUNS, TOTAL TIME = %dms\n", fdecoding, temps);
// 						temps /= fdecoding;
// 						printf("(II) + TIME / RUN = %dms\n", temps);
// 						int   workL = 4 * numberThreadOnGpu;
// 						int   kbits = workL * _N / temps ;
// 						float mbits = ((float)kbits) / 1000.0;
// 						printf("(II) + DECODER LATENCY (ms)     = %d\n", temps);
// 						printf("(II) + DECODER THROUGHPUT (Mbps)= %.1f\n", mbits);
// 						printf("(II) + (%.2fdB, %dThd : %dCw, %dits) THROUGHPUT = %.1f\n", item->getNoise()->getEb_N0(), numberThreadOnGpu, workL, numberIter, mbits);
// 						cout << endl << "Temps = " << temps << "ms : " << kbits;
// 						cout << "kb/s : " << ((float)temps/numberThreadOnGpu) << "ms/frame" << endl << endl;
// 						break;
// 					}
			}

// 				terminal.final_report();
			
			
		}
		delete item;
	}
	return NULL;
}
Exemplo n.º 3
0
int main(int argc, char* argv[]) {
    srand(0);
    printf("(II) LDPC DECODER - Scheduled Fixed-point decoder (SSE version - 16 frames)\n");
    printf("(II) MANIPULATION DE DONNEES (Fixed Point, %d bits)\n", 8 * sizeof (int));
    printf("(II) GENEREE : %s - %s\n", __DATE__, __TIME__);


    param_simulation p_simulation;
    p_simulation.snr_min      = 0.5;
    p_simulation.snr_max      = 3.0;
    p_simulation.snr_pas      = 0.1;
    p_simulation.fe_limit     = 200;
    p_simulation.channel_type = 0;
    p_simulation.norm_channel = false;
    p_simulation.ber_limit_value = 1.0e-10;
    p_simulation.fer_limit_value = 1.0e-10;

    p_simulation.llr_optimization = false;
    p_simulation.real_encoder     = false;
    p_simulation.qpsk_channel     = false;
    p_simulation.Es_N0            = false;
    p_simulation.worst_case_fer   = false;

    param_decoder p_decoder;
    p_decoder.early_term = false;
    p_decoder.nb_iters   = 30;

    p_decoder.nms_factor_fixed = 24;
    p_decoder.nms_factor_float = 0.75;

    p_decoder.oms_offset_fixed = 1;
    p_decoder.oms_offset_float = 0.15;

    int    NOMBRE_ITERATIONS = 30;
    int    STOP_TIMER_SECOND = -1;

    int    nb_frames	= 16; // DUE TO SIMD MODE

    //
    // ON CONFIGURE LE NOMBRE DE THREAD A UTILISER PAR DEFAUT
    //
    int NUM_ACTIVE_THREADS = 1;
#ifndef __clang__
    omp_set_num_threads(NUM_ACTIVE_THREADS);
#endif

    //
    // ON VA PARSER LES ARGUMENTS DE LIGNE DE COMMANDE
    //
    for (int p = 1; p < argc; p++) {

        //
        // REGLAGE DES PARAMETRES DE SIMULATION
        //
        if (strcmp(argv[p], "-min") == 0) {
            p_simulation.snr_min = atof(argv[p + 1]);
            p += 1;

        } else if (strcmp(argv[p], "-max") == 0) {
            p_simulation.snr_max = atof(argv[p + 1]);
            p += 1;

        } else if (strcmp(argv[p], "-pas") == 0) {
            p_simulation.snr_pas = atof(argv[p + 1]);
            p += 1;

        } else if (strcmp(argv[p], "-fer") == 0) {
            p_simulation.fe_limit = atoi(argv[p + 1]);
            p += 1;

        } else if (strcmp(argv[p], "-wc_fer") == 0) {
            p_simulation.worst_case_fer = true;

        } else if (strcmp(argv[p], "-timer") == 0) {
            STOP_TIMER_SECOND = atoi(argv[p + 1]);
            p += 1;

        } else if (strcmp(argv[p], "-qef") == 0) {
            p_simulation.ber_limit       = true;
            p_simulation.ber_limit_value = (atof(argv[p + 1]));
            p += 1;

        } else if (strcmp(argv[p], "-tfer") == 0) {
            p_simulation.fer_limit       = true;
            p_simulation.fer_limit_value = (atof(argv[p + 1]));
            p += 1;

        } else if (strcmp(argv[p], "-bpsk") == 0) {
            p_simulation.qpsk_channel = false;

        } else if (strcmp(argv[p], "-qpsk") == 0) {
            p_simulation.qpsk_channel = true;

        } else if (strcmp(argv[p], "-Eb/N0") == 0) {
            p_simulation.Es_N0 = false;

        } else if (strcmp(argv[p], "-Es/N0") == 0) {
            p_simulation.Es_N0 = true;

        } else if (strcmp(argv[p], "-norm-channel") == 0) {
            p_simulation.norm_channel = true;

        } else if (strcmp(argv[p], "-awgn_jego") == 0) {
            p_simulation.channel_type = 0;

        } else if (strcmp(argv[p], "-awgn") == 0) {
            p_simulation.channel_type = 1;

        } else if (strcmp(argv[p], "-Rayleigh_Fading") == 0) {
            p_simulation.channel_type = 2;

        } else if (strcmp(argv[p], "-no-channel") == 0) {
            p_simulation.channel_type = -1;


        //
        // REGLAGE DES DU MODELE DU CANAL
        //
        } else if (strcmp(argv[p], "-ollr") == 0) {
            p_simulation.llr_optimization = true;

        } else if (strcmp(argv[p], "-encoder") == 0) {
            p_simulation.real_encoder = true;

#ifndef __clang__
        } else if (strcmp(argv[p], "-thread") == 0) {
            int nThreads = atoi(argv[p + 1]);
            if (nThreads > 4) {
                printf("(WW) Number of thread can be higher than 4 => Using 4 threads.");
                NUM_ACTIVE_THREADS = 4;
            } else if (nThreads < 1) {
                printf("(WW) Number of thread can be lower than 1 => Using 1 thread.");
                NUM_ACTIVE_THREADS = 1;
            } else {
                NUM_ACTIVE_THREADS = nThreads;
            }
            omp_set_num_threads(NUM_ACTIVE_THREADS);
            p += 1;
#endif

        //
        // INITIALISATION ALEATOIRE DU GENERATEUR ALEATOIRE
        //
        } else if (strcmp(argv[p], "-random") == 0) {
            printf("(II) Random Generator REAL initialization\n");
            srand(time(NULL));

        } else if (strcmp(argv[p], "-iter") == 0) {
            NOMBRE_ITERATIONS      = atoi(argv[p + 1]);
            p_decoder.nb_iters = atoi(argv[p + 1]);
            p += 1;

        //
        // SPECIFICATION DU FORMAT DE CODAGE DES DONNEES EN MODE FIXED-POINT
        //
        } else if (strcmp(argv[p], "-var") == 0) {
            vSAT_NEG_VAR = (-(0x0001 << (atoi(argv[p + 1]) - 1)) + 1);
            vSAT_POS_VAR = ( (0x0001 << (atoi(argv[p + 1]) - 1)) - 1);
            BITS_VAR     = atoi(argv[p + 1]);
            p           += 1;

        } else if (strcmp(argv[p], "-msg") == 0) {
            vSAT_NEG_MSG = (-(0x0001 << (atoi(argv[p + 1]) - 1)) + 1);
            vSAT_POS_MSG = ( (0x0001 << (atoi(argv[p + 1]) - 1)) - 1);
            BITS_MSG     = atoi(argv[p + 1]);
            p           += 1;

        } else if (strcmp(argv[p], "-llr") == 0) {
            vSAT_NEG_LLR = (-(0x0001 << (atoi(argv[p + 1]) - 1)) + 1);
            vSAT_POS_LLR = ((0x0001 << (atoi(argv[p + 1]) - 1)) - 1);
            BITS_LLR     = atoi(argv[p + 1]);
            vFRAQ_LLR    = BITS_LLR / 2;
            FACTEUR_BETA = (0x0001 << (vFRAQ_LLR));
            p           += 1;

        } else if (strcmp(argv[p], "-fraq") == 0) {
            vFRAQ_LLR    = atoi(argv[p + 1]);
            FACTEUR_BETA = (0x0001 << (vFRAQ_LLR));
            p           += 1;

        } else {
            printf("(EE) Unknown argument (%d) => [%s]\n", p, argv[p]);
            exit(0);
        }
    }

    double rendement = (float) (INFORMATION) / (float) (NOEUD);
    printf("(II) NUMBER OF // THREAD     : %d\n", NUM_ACTIVE_THREADS);
    printf("(II) Code LDPC (N, N-K, K, M): (%d, %d, %d, %d)\n", NOEUD, PARITE, INFORMATION, MESSAGE);
    printf("(II) Rendement du code       : %.3f\n", rendement);
    printf("(II) # ITERATIONs du CODE : %d\n", NOMBRE_ITERATIONS);
    printf("(II) FER LIMIT FOR SIMU   : %d\n", p_simulation.fe_limit);
    printf("(II) SIMULATION  RANGE    : [%.2f, %.2f], STEP = %.2f\n", p_simulation.snr_min, p_simulation.snr_max, p_simulation.snr_pas);
    printf("(II) FAST STOP MODE       : %d\n", p_decoder.early_term);

    printf("(II) LLR DATA    Q(%d,%d)   : %d bits [%d, %d]\n", (BITS_LLR - vFRAQ_LLR), (vFRAQ_LLR), BITS_LLR, vSAT_NEG_LLR, vSAT_POS_LLR);
    printf("(II) MESSAGE     Q(%d,%d)   : %d bits [%d, %d]\n", (BITS_MSG - vFRAQ_LLR), (vFRAQ_LLR), BITS_MSG, vSAT_NEG_MSG, vSAT_POS_MSG);
    printf("(II) VARIABLE    Q(%d,%d)   : %d bits [%d, %d]\n", (BITS_VAR - vFRAQ_LLR), (vFRAQ_LLR), BITS_VAR, vSAT_NEG_VAR, vSAT_POS_VAR);
    printf("(II) OFFSET FACTOR        : %f\n", p_decoder.oms_offset_float);


    CTimer simu_timer(true);

    //
    // ALLOCATION DYNAMIQUE DES DONNESS NECESSAIRES A LA SIMULATION DU SYSTEME
    //
    CTrame* simu_data[MAX_THREADS];
    for(int i=0; i<4; i++)
    {
        simu_data[i] = new CTrame(NOEUD, PARITE, nb_frames);
    }

    CDecoder* decoder[MAX_THREADS];
    for(int i=0; i<4; i++)
    {
        decoder[i] = CreateDecoder(p_decoder, vSAT_NEG_VAR, vSAT_POS_VAR, vSAT_NEG_MSG, vSAT_POS_MSG/*, msOffset, msFactor, OFFSET_FACTOR, NORMALIZED_FACTOR*/);
    }

    Encoder *encoder[MAX_THREADS];
    for(int i=0; i<4; i++)
    {
        encoder[i] = EncoderLibrary(p_simulation.real_encoder, simu_data[i]);
    }

    CChanel* noise[MAX_THREADS];
    for(int i=0; i<4; i++)
    {
        noise[i] = CreateChannel(simu_data[i], p_simulation.qpsk_channel, p_simulation.Es_N0);
        noise[i]->setNormalize( p_simulation.norm_channel );
    }

    //
    // ON CREE L'OBJET EN CHARGE DE LA CONVERSION EN VIRGULE FIXE DE L'INFORMATION DU CANAL
    //
    CFixConversion* conv_fp[MAX_THREADS];
    CErrorAnalyzer* errCounter[MAX_THREADS];

    double Eb_N0 = p_simulation.snr_min;
    while (Eb_N0 <= p_simulation.snr_max) {

        //
        // ON CREE LE CANAL DE COMMUNICATION (BRUIT GAUSSIEN)
        //

        for(int i=0; i<4; i++){
            noise[i]->configure(Eb_N0);
        }

        for(int i=0; i<4; i++){
            decoder[i]->setSigmaChannel(noise[i]->get_SigB());
        }


//        if (p_simulation.llr_optimization == 0) {
            for(int i=0; i<4; i++){
                conv_fp[i] = new CFastFixConversion(simu_data[i], FACTEUR_BETA, vSAT_NEG_LLR, vSAT_POS_LLR);
            }
//        } else {
//            for(int i=0; i<4; i++){
//                conv_fp[i] = new COptimFixConversion(simu_data[i], noise[i]->get_R(), vSAT_NEG_LLR, vSAT_POS_LLR);
//            }
//        }

        bool auto_fe_mode = false;
        CErrorAnalyzer  errCounters  (simu_data[0], p_simulation.fe_limit, auto_fe_mode, p_simulation.worst_case_fer);
        for(int i=0; i<4; i++){
            errCounter[i] = new CErrorAnalyzer(simu_data[i], p_simulation.fe_limit, auto_fe_mode, p_simulation.worst_case_fer);
        }

        // ON GENERE LA PREMIERE TRAME BRUITEE
        for(int i=0; i<4; i++){
            encoder[i]->encode();
        }

        for(int i=0; i<4; i++){
            noise[i]->generate();
        }

        for(int i=0; i<4; i++){
            conv_fp[i]->generate();
        }

        for(int i=0; i<4; i++){
            errCounter[i]->store_enc_bits();
        }

        //
        // ON CREE UN OBJET POUR LA MESURE DU TEMPS DE SIMULATION (REMISE A ZERO POUR CHAQUE Eb/N0)
        //
        CTimer temps_ecoule(true);

        //
        // ON CREE L'OBJET EN CHARGE DES INFORMATIONS DANS LE TERMINAL UTILISATEUR
        //
        CTerminal terminal(&errCounters, &temps_ecoule, Eb_N0);

        CTimer timer[MAX_THREADS];
        long int etime[MAX_THREADS] = {0, 0, 0, 0};

        while (1) {
            const int maxLoopF = 32768 / nb_frames;
            int loopf  = (8 * NUM_ACTIVE_THREADS) * (64800 / NOEUD);
            loopf      = loopf > maxLoopF ? maxLoopF: loopf;
            loopf      = 32;


            int d1[maxLoopF], d2[maxLoopF], d3[maxLoopF], d4[maxLoopF];
            int f1[maxLoopF], f2[maxLoopF], f3[maxLoopF], f4[maxLoopF];

#pragma omp parallel sections //num_threads(NUM_ACTIVE_THREADS)
            {

                #pragma omp section
                {
                    for (int q = 0; q < loopf; q++) {
                        float *f_llr = simu_data[0]->get_t_noise_data(); // [NOEUD];
                        signed char *i_llr = (signed char*)simu_data[0]->get_t_fpoint_data();  // [NOEUD];
                        signed char *o_llr = (signed char*)simu_data[0]->get_t_decode_data();  // [NOEUD];

                        timer[0].start();
                        decoder[0]->decode(f_llr, o_llr, NOMBRE_ITERATIONS);
                        decoder[0]->decode(i_llr, o_llr, NOMBRE_ITERATIONS);
                        timer[0].stop();
                        etime[0] += timer[0].get_time_us();

                        encoder[0]->encode();
                        noise[0]->generate();  // ON GENERE LE BRUIT DU CANAL
                        conv_fp[0]->generate(); // ON CONVERTIT LES DONNEES EN VIRGULE FIXE
                        int q1 = errCounter[0]->nb_be();
                        int fr = errCounter[0]->nb_fe();
                        errCounter[0]->generate();
                        d1[q] = errCounter[0]->nb_be() - q1;
                        f1[q] = errCounter[0]->nb_fe() - fr;
                        errCounter[0]->store_enc_bits();
                    }
                }

                #pragma omp section
                {
                    for (int q = 0; q < loopf; q++) {
                        float *f_llr = simu_data[1]->get_t_noise_data(); // [NOEUD];
                        signed char *i_llr  = (signed char*)simu_data[1]->get_t_fpoint_data();  // [NOEUD];
                        signed char *o_llr  = (signed char*)simu_data[1]->get_t_decode_data();  // [NOEUD];

                        timer[1].start();
                        decoder[1]->decode(f_llr, o_llr, NOMBRE_ITERATIONS);
                        decoder[1]->decode(i_llr, o_llr, NOMBRE_ITERATIONS);
                        timer[1].stop();
                        etime[1] += timer[1].get_time_us();

                        encoder[1]->encode();
                        noise[1]->generate();  // ON GENERE LE BRUIT DU CANAL
                        conv_fp[1]->generate(); // ON CONVERTIT LES DONNEES EN VIRGULE FIXE
                        int fr = errCounter[1]->nb_fe();
                        int q2 = errCounter[1]->nb_be();
                        errCounter[1]->generate();
                        d2[q] = errCounter[1]->nb_be() - q2;
                        f2[q] = errCounter[1]->nb_fe() - fr;
                        errCounter[1]->store_enc_bits();
                    }
                }

                #pragma omp section
                {
                    for (int q = 0; q < loopf; q++) {
                        float *f_llr = simu_data[2]->get_t_noise_data(); // [NOEUD];
                        signed char *i_llr  = (signed char*)simu_data[2]->get_t_fpoint_data();  // [NOEUD];
                        signed char *o_llr  = (signed char*)simu_data[2]->get_t_decode_data();  // [NOEUD];

                        timer[2].start();
                        decoder[2]->decode(f_llr, o_llr, NOMBRE_ITERATIONS);
                        decoder[2]->decode(i_llr, o_llr, NOMBRE_ITERATIONS);
                        timer[2].stop();
                        etime[2] += timer[2].get_time_us();

                        encoder[2]->encode();
                        noise[2]->generate();   // ON GENERE LE BRUIT DU CANAL
                        conv_fp[2]->generate(); // ON CONVERTIT LES DONNEES EN VIRGULE FIXE
                        int q3 = errCounter[2]->nb_be();
                        int fr = errCounter[2]->nb_fe();
                        errCounter[2]->generate();
                        d3[q] = errCounter[2]->nb_be() - q3;
                        f3[q] = errCounter[2]->nb_fe() - fr;
                        errCounter[2]->store_enc_bits();
                    }
                }

                #pragma omp section
                {
                    for (int q = 0; q < loopf; q++) {
                        float *f_llr = simu_data[3]->get_t_noise_data(); // [NOEUD];
                        signed char *i_llr = (signed char*)simu_data[3]->get_t_fpoint_data();  // [NOEUD];
                        signed char *o_llr = (signed char*)simu_data[3]->get_t_decode_data();  // [NOEUD];

                        timer[3].start();
                        decoder[3]->decode(f_llr, o_llr, NOMBRE_ITERATIONS);
                        decoder[3]->decode(i_llr, o_llr, NOMBRE_ITERATIONS);
                        timer[3].stop();
                        etime[3] += timer[3].get_time_us();

                        encoder[3]->encode();
                        noise[3]->generate();  // ON GENERE LE BRUIT DU CANAL
                        conv_fp[3]->generate(); // ON CONVERTIT LES DONNEES EN VIRGULE FIXE
                        int q4 = errCounter[3]->nb_be();
                        int fr = errCounter[3]->nb_fe();
                        errCounter[3]->generate();
                        d4[q] = errCounter[3]->nb_be() - q4;
                        f4[q] = errCounter[3]->nb_fe() - fr;
                        errCounter[3]->store_enc_bits();
                    }
                }
            }
            //
            // ON COMPTE LE NOMBRE D'ERREURS DANS LA TRAME DECODE
            //
            for (int q = 0; q < loopf; q++) {

                int diff = ((f1[q] - 1) > 0) ? (f1[q] - 1) : 0;
                errCounters.generate(d1[q] - diff);
                for (int z = 1; z < nb_frames; z++) errCounters.generate(f1[q] > z ? 1 : 0);

                diff = ((f2[q] - 1) > 0) ? (f2[q] - 1) : 0;
                errCounters.generate(d2[q] - diff);
                for (int z = 1; z < nb_frames; z++) errCounters.generate(f2[q] > z ? 1 : 0);

                diff = ((f3[q] - 1) > 0) ? (f3[q] - 1) : 0;
                errCounters.generate(d3[q] - diff);
                for (int z = 1; z < nb_frames; z++) errCounters.generate(f3[q] > z ? 1 : 0);

                diff = ((f4[q] - 1) > 0) ? (f4[q] - 1) : 0;
                errCounters.generate(d4[q] - diff);
                for (int z = 1; z < nb_frames; z++) errCounters.generate(f4[q] > z ? 1 : 0);
            }
            //			errCounter.store_enc_bits();
            //
            // ON compare le Frame Error avec la limite imposee par l'utilisateur. Si on depasse
            // alors on affiche les resultats sur Eb/N0 courant.
            //
            if (errCounters.fe_limit_achieved() == true) {
                break;
            }

            //
            // ON REGARDE SI L'UTILISATEUR A LIMITE LE TEMPS DE SIMULATION...
            //
            if ((simu_timer.get_time_sec() >= STOP_TIMER_SECOND) && (STOP_TIMER_SECOND != -1)) {
                break;
            }

            //
            // AFFICHAGE A L'ECRAN DE L'EVOLUTION DE LA SIMULATION SI NECESSAIRE
            //
            //if( (errCounter.nb_processed_frames() % 50) == 0 )
            //{
            terminal.temp_report();
            //}
            //printf("loop\n");
        }


        terminal.final_report();

        if (STOP_TIMER_SECOND != -1) {
            printf("(PERF) H. LAYERED %d fixed, %dx%d LDPC code, %d its, %d threads, %d early stop\n", nb_frames, NOEUD, PARITE, NOMBRE_ITERATIONS, NUM_ACTIVE_THREADS, p_decoder.early_term);
            float sum = 0.0;
            for (int z = 0; z < NUM_ACTIVE_THREADS; z++) {
                float nf = (errCounters.nb_processed_frames() / 4); // 4 car 4 threads...
                float nb = ((nf) * (1000000.0 / etime[z]) * NOEUD) / 1000.0 / 1000.0;
                printf("(PERF) Kernel Execution time = %ld us for %.0f frames => %1.3f Mbps\n", etime[z], nf, nb);
                sum += nb;
            }
//            float latency = 2.0 * (1.0 / sum) * nb_frames * 1000.0; // en us
            float latenc1 = etime[0] * nb_frames / (errCounters.nb_processed_frames()/4); // en us
            printf("(PERF) SNR = %.2f, ITERS = %d, LATENCY    = %1.3f us\n", Eb_N0, NOMBRE_ITERATIONS, latenc1);
//            printf("(PERF) SNR = %.2f, ITERS = %d, LATENCY    = %1.3f us\n", Eb_N0, NOMBRE_ITERATIONS, latency);
            printf("(PERF) SNR = %.2f, ITERS = %d, THROUGHPUT = %1.3f Mbps\n", Eb_N0, NOMBRE_ITERATIONS, sum);
            printf("(PERF) Total Kernel throughput = %1.3f Mbps\n", sum);
        }

        Eb_N0 = Eb_N0 + p_simulation.snr_pas;

        if ((simu_timer.get_time_sec() >= STOP_TIMER_SECOND) && (STOP_TIMER_SECOND != -1)) {
            printf("(II) THE SIMULATION HAS STOP DUE TO THE (USER) TIME CONTRAINT.\n");
            break;
        }

        if (p_simulation.ber_limit == true) {
            if (errCounters.ber_value() < p_simulation.ber_limit_value) {
                printf("(II) THE SIMULATION HAS STOP DUE TO THE (USER) QUASI-ERROR FREE CONTRAINT (on BER).\n");
                break;
            }
        }

        if (p_simulation.fer_limit == true) {
            if (errCounters.fer_value() < p_simulation.fer_limit_value) {
                printf("(II) THE SIMULATION HAS STOP DUE TO THE (USER) QUASI-ERROR FREE CONTRAINT (on FER).\n");
                break;
            }
        }

    }
    
       ////////////////////////////////////////////////////////////////////////////////
    //
    //
    // SECOND EVALUATION OF THE THROUGHPUT WITHOUT ENCODED FRAME REGENERATION
    //
    //
    if( 0 )
    {
        int exec = 0;
        const int t_eval = STOP_TIMER_SECOND;


        //
        // ONE THREAD MODE
        //
        if (NUM_ACTIVE_THREADS == 1) 
		{
            CTimer t_Timer1(true);
            while (t_Timer1.get_time_sec() < t_eval) 
			{
                for (int qq = 0; qq < 20; qq++) 
				{
                    // to limit timer runtime impact on performances (for very small LDPC codes)
                    // Indeed, depending on OS and CTimer implementations, time read can be long...
                    decoder[0]->decode(simu_data[0]->get_t_noise_data(), simu_data[0]->get_t_decode_data(), NOMBRE_ITERATIONS);
                    exec += 1;
                }
            }
            t_Timer1.stop();
            float debit = _N * ((exec * nb_frames ) / ((float) t_Timer1.get_time_sec()));
            debit /= 1000000.0f;
            printf("(PERF1) LDPC decoder air throughput = %1.6f Mbps\n", debit);
        }

        //
        // TWO THREAD MODE
        //
        if (NUM_ACTIVE_THREADS == 2) {
            exec = 0;
            omp_set_num_threads(2);
            CTimer t_Timer2(true);

            while (t_Timer2.get_time_sec() < t_eval) 
			{
                const int looper = 20;
                #pragma omp parallel sections
                {
                    #pragma omp section
                    {
                        for (int qq = 0; qq < looper; qq++)
                            decoder[0]->decode(simu_data[0]->get_t_noise_data(), simu_data[1]->get_t_decode_data(), NOMBRE_ITERATIONS);
                    }
                    #pragma omp section
                    {
                        for (int qq = 0; qq < looper; qq++)
                            decoder[1]->decode(simu_data[1]->get_t_noise_data(), simu_data[2]->get_t_decode_data(), NOMBRE_ITERATIONS);
                    }
                }
                exec += 2 * looper;
            }
            t_Timer2.stop();

            // for each decoder run, we decoded nb_frames codewords (depending on the SIMD width)
            float debit = _N * ((exec * nb_frames) / ((float) t_Timer2.get_time_sec()));
            debit /= 1000000.0f;
            printf("(PERF2) LDPC decoder air throughput = %1.3f Mbps\n", debit);
        }

        //
        // THREE THREAD MODE
        //
        if (NUM_ACTIVE_THREADS == 3) 
		{
            exec = 0;
            omp_set_num_threads(3);
            CTimer t_Timer3(true);

            while (t_Timer3.get_time_sec() < t_eval) 
			{
                const int looper = 20;
                #pragma omp parallel sections
                {
                    #pragma omp section
                    {
                        for (int qq = 0; qq < looper; qq++)
                            decoder[0]->decode(simu_data[0]->get_t_noise_data(), simu_data[1]->get_t_decode_data(), NOMBRE_ITERATIONS);
                    }
                    #pragma omp section
                    {
                        for (int qq = 0; qq < looper; qq++)
                            decoder[1]->decode(simu_data[1]->get_t_noise_data(), simu_data[2]->get_t_decode_data(), NOMBRE_ITERATIONS);
                    }
                    #pragma omp section
                        {
                        for (int qq = 0; qq < looper; qq++)
                            decoder[2]->decode(simu_data[2]->get_t_noise_data(), simu_data[3]->get_t_decode_data(), NOMBRE_ITERATIONS);
                    }
                }
                exec += 4 * looper;
            }
            t_Timer3.stop();

            float debit = _N * ((exec * nb_frames) / ((float) t_Timer3.get_time_sec()));
            debit /= 1000000.0f;
            printf("(PERF4) LDPC decoder air throughput = %1.3f Mbps\n", debit);
        }

        //
        // FOUR THREAD MODE
        //
        if (NUM_ACTIVE_THREADS == 4) 
		{
            exec = 0;
            omp_set_num_threads(4);
            CTimer t_Timer3(true);

            while (t_Timer3.get_time_sec() < t_eval) 
			{
                const int looper = 20;
                #pragma omp parallel sections
                {
                    #pragma omp section
                    {
                        for (int qq = 0; qq < looper; qq++)
                            decoder[0]->decode(simu_data[0]->get_t_noise_data(), simu_data[1]->get_t_decode_data(), NOMBRE_ITERATIONS);
                    }
                    #pragma omp section
                    {
                        for (int qq = 0; qq < looper; qq++)
                            decoder[1]->decode(simu_data[1]->get_t_noise_data(), simu_data[2]->get_t_decode_data(), NOMBRE_ITERATIONS);
                    }
                    #pragma omp section
                        {
                        for (int qq = 0; qq < looper; qq++)
                            decoder[2]->decode(simu_data[2]->get_t_noise_data(), simu_data[3]->get_t_decode_data(), NOMBRE_ITERATIONS);
                    }
                    #pragma omp section
                    {
                        for (int qq = 0; qq < looper; qq++)
                            decoder[3]->decode(simu_data[3]->get_t_noise_data(), simu_data[4]->get_t_decode_data(), NOMBRE_ITERATIONS);
                    }
                }
                exec += 4 * looper;
            }
            t_Timer3.stop();

            float debit = _N * ((exec * nb_frames) / ((float) t_Timer3.get_time_sec()));
            debit /= 1000000.0f;
            printf("(PERF4) LDPC decoder air throughput = %1.3f Mbps\n", debit);
        }
        exit(0);
    }

//     ON FAIT LE MENAGE PARMIS TOUS LES OBJETS CREES DYNAMIQUEMENT...
    for(int i=0; i<4; i++){
        delete simu_data[i];
        delete noise[i];
        delete decoder[i];
        delete encoder[i];
        delete errCounter[i];
        delete conv_fp[i];
    }

    return 1;
}