T gather(T* send_local, T* recv_root) {
   return MPI_Gather(send_local, 1, detail::MpiDataType<T>::value, recv_root, 1, detail::MpiDataType<T>::value, 0, MPI_COMM_WORLD);
 }
示例#2
0
/** @param sbuf the (filled) array of ice grid values for this MPI node. */
void GCMCoupler::couple_to_ice(
double time_s,
int nfields,			// Number of fields in sbuf.  Not all will necessarily be filled, in the case of heterogeneous ice models.
giss::DynArray<SMBMsg> &sbuf,	// Values, already converted to ice model inputs (from gcm outputs)
std::vector<giss::VectorSparseVector<int,double>> &gcm_ivals)	// Root node only: Already-allocated space to put output values.  Members as defined by the CouplingContract GCMCoupler::gcm_inputs
{
	// TODO: Convert this to use giss::gather_msg_array() instead!!!

	// Gather buffers on root node
	int num_mpi_nodes;
	MPI_Comm_size(gcm_params.gcm_comm, &num_mpi_nodes); 

	int const rank = gcm_params.gcm_rank;

printf("[%d] BEGIN GCMCoupler::couple_to_ice() time_s=%f, sbuf.size=%d, sbuf.ele_size=%d\n", gcm_params.gcm_rank, time_s, sbuf.size, sbuf.ele_size);

	// MPI_Gather the count
	std::unique_ptr<int[]> rcounts;
	rcounts.reset(new int[num_mpi_nodes]);
	for (int i=0; i<num_mpi_nodes; ++i) rcounts[i] = 0;

printf("[%d] EE1\n", rank);

	int nele_l = sbuf.size;
	MPI_Gather(&nele_l, 1, MPI_INT, &rcounts[0], 1, MPI_INT, gcm_params.gcm_root, gcm_params.gcm_comm);

	// Compute displacements as prefix sum of rcounts
	std::unique_ptr<int[]> displs;
	std::unique_ptr<giss::DynArray<SMBMsg>> rbuf;

printf("[%d] EE2\n", rank);
	displs.reset(new int[num_mpi_nodes+1]);
	displs[0] = 0;
	for (int i=0; i<num_mpi_nodes; ++i) displs[i+1] = displs[i] + rcounts[i];
	int nele_g = displs[num_mpi_nodes];

	// Create receive buffer, and gather into it
	// (There's an extra item in the array for a sentinel)
	rbuf.reset(new giss::DynArray<SMBMsg>(SMBMsg::size(nfields), nele_g+1));

printf("[%d] EE3\n", rank);
	MPI_Datatype mpi_type(SMBMsg::new_MPI_struct(nfields));
	MPI_Gatherv(sbuf.begin().get(), sbuf.size, mpi_type,
		rbuf->begin().get(), &rcounts[0], &displs[0], mpi_type,
		gcm_params.gcm_root, gcm_params.gcm_comm);
	MPI_Type_free(&mpi_type);
printf("[%d] EE4\n", rank);

	if (am_i_root()) {
printf("[%d] EE5\n", rank);
		// (ONLY ON GCM ROOT)
		// Clear output arrays, which will be filled in additively
		// on each ice model
//		for (auto ov=gcm_ivals.begin(); ov != gcm_ivals.end(); ++ov) *ov = 0;

		// Add a sentinel
		(*rbuf)[rbuf->size-1].sheetno = 999999;

		// Sort the receive buffer so items in same ice sheet
		// are found together
		qsort(rbuf->begin().get(), rbuf->size, rbuf->ele_size, &SMBMsg::compar);

printf("[%d] EE6\n", rank);
		// (ONLY ON GCM ROOT)
		// Figure out which ice sheets we have data for
		auto lscan(rbuf->begin());
		auto rscan(lscan);
		std::map<int, CallIceModelParams> im_params;
		while (rscan < rbuf->end()) {
			if (rscan->sheetno != lscan->sheetno) {
				int sheetno = lscan->sheetno;
				auto cimp(CallIceModelParams(sheetno, lscan.get(), rscan.get()));
				im_params[sheetno] = cimp;
				lscan = rscan;
			}

			++rscan;
		}

		// (ONLY ON GCM ROOT)
		// NOTE: call_ice_model() is called (below) even on NON-ROOT
		// Call all our ice models
		for (auto model = models.begin(); model != models.end(); ++model) {
			int sheetno = model.key();
			// Assume we have data for all ice models
			// (So we can easily maintain MPI SIMD operation)
			auto params(im_params.find(sheetno));
			call_ice_model(&*model, sheetno, time_s, *rbuf,
				params->second.begin, params->second.next);

			// Convert to variables the GCM wants (but still on the ice grid)
			model->set_gcm_inputs(0);	// Fills in gcm_ivals_I

			// Free ice_ovals_I
			model->free_ice_ovals_I();
		}

		regrid_gcm_inputs_onroot(gcm_ivals, 0);
	} else {
		// (ONLY ON NOT GCM ROOT)
		// We're not root --- we have no data to send to ice
		// models, we just call through anyway because we will
		// receive data in an upcomming MPI_Scatter
		// Call all our ice models
		for (auto model = models.begin(); model != models.end(); ++model) {
			int sheetno = model.key();
			// Assume we have data for all ice models
			// (So we can easily maintain MPI SIMD operation)
			call_ice_model(&*model, sheetno, time_s, *rbuf,
				NULL, NULL);

		}		// if (gcm_params.gcm_rank == gcm_params.gcm_root)

	}

	printf("[%d] END GCMCoupler::couple_to_ice()\n", gcm_params.gcm_rank);
}
示例#3
0
文件: main.c 项目: Moddus/mpi_search
int
main(int argc, char *argv[])
{
    ps_status_t rv = PS_SUCCESS;
    int log_level = LOG_LEVEL_NONE;
    char *search = NULL, *path = NULL;
    size_t search_len = 0;
    int i = 0, c = 0;
    int number_of_procs = 0, own_rank = 0;
    int *slave_nodes = NULL;
    unsigned long chunk_size = DEFAULT_CHUNK_SIZE;
    ps_searcher_t *searcher = NULL;
    ps_search_task_t *task = NULL;
    char *result = NULL;
    size_t result_len = 0, total_result_len = 0, *all_result_len = NULL;
    int search_col = PS_CSV_ALL_COL;

#ifdef TIME_MEASUREMENT
    float total_seconds = 0, total_search_time = 0, total_file_io_time = 0, total_setup_time = 0, total_reduce_time = 0;
    struct timeval time_start, current_time;
    gettimeofday(&time_start, NULL);
    memcpy(&current_time, &time_start, sizeof(struct timeval));
#endif

    out_fd = stdout; /*For Logging*/

    MPI_Init(&argc, &argv);
    MPI_Comm_size(MPI_COMM_WORLD, &number_of_procs);
    MPI_Comm_rank (MPI_COMM_WORLD, &own_rank);

    /*set log level until arguments are passed and processed*/
    set_log_level(log_level);

    if (own_rank == MASTER)
    {
        opterr = 0;
        while ((c = getopt (argc, argv, "hds:f:c:l:")) != -1)
        {
            switch (c)
            {
            case 'd':
                log_level = LOG_LEVEL_DEBUG;
                break;
            case 'f':
                path = optarg;
                break;
            case 's':
                search = optarg;
                search_len = strlen(search);
                break;
            case 'c':
                PS_CHECK_ZERO_GO_ERR( (chunk_size = atol(optarg)), PS_ERROR_WRONG_CHUNK_SIZE);
                break;
            case 'l':
                search_col = atoi(optarg);
                break;
            case 'h':
                break;
            default:
                if (optopt == 's')
                    fprintf (stderr, "Option -%c requires an argument.\n", optopt);
                else if (isprint (optopt))
                    fprintf (stderr, "Unknown option `-%c'.\n", optopt);
                else
                    fprintf (stderr,
                             "Unknown option character `\\x%x'.\n", optopt);

                rv = PS_ERROR_WRONG_ARGUMENTS;
                goto error;
            }
        }
    }

    /*Communicate and set log_level*/
    PS_MPI_CHECK_ERR(MPI_Bcast(&log_level, 1, MPI_INT, MASTER, MPI_COMM_WORLD));
    set_log_level(log_level);

    /*Communicate the token to search for*/
    PS_MPI_CHECK_ERR(MPI_Bcast(&search_len, 1, MPI_UNSIGNED_LONG, MASTER, MPI_COMM_WORLD));
    if (own_rank != MASTER)
    {
        PS_MALLOC(search, sizeof(char) * (search_len + 1));
    }
    PS_MPI_CHECK_ERR(MPI_Bcast(search, search_len + 1, MPI_CHAR, MASTER, MPI_COMM_WORLD));
    log_debug("Process %d: search_len:%u search:%s", own_rank, search_len, search);

#ifdef TIME_MEASUREMENT
    if(own_rank == MASTER)
    {
        update_timestamp_and_total_seconds(&current_time, &total_setup_time);
    }
#endif

    if (own_rank == MASTER)
    {
        log_debug("search = %s, path = %s, chunk_size=%lu", search, path, chunk_size);
        log_debug("sizeof(ps_search_task_t:%lu", sizeof(ps_search_task_t));

        for (i = optind; i < argc; i++)
        {
            log_debug("Non-option argument %s", argv[i]);
        }

        /*-------------------Processing of arguments done!-----------*/
        log_debug("Number of procs:%d", number_of_procs);
        PS_MALLOC( slave_nodes, sizeof(int) * (number_of_procs));
        for (i = 0; i < number_of_procs - 1; i++)
        {
            slave_nodes[i] = i + 1;
        }

        PS_CHECK_GOTO_ERROR( distribute_path_and_search_range(path,
                                                              number_of_procs ,
                                                              slave_nodes,
                                                              chunk_size,
                                                              search_col,
                                                              MPI_COMM_WORLD,
                                                              &task));

        /*Slaves receive path_length and search_task*/

        PS_CHECK_GOTO_ERROR(ps_file_searcher_create(&searcher, search, task));
        PS_CHECK_GOTO_ERROR(ps_file_searcher_search(searcher, &result, &result_len));
        log_debug("Process %d: result_len: %lu", own_rank, result_len);
        PS_CHECK_GOTO_ERROR(ps_file_searcher_free(&searcher));
        PS_MALLOC(all_result_len, sizeof(size_t) * number_of_procs);
    }
    else
    {
        /*Slaves receive path_length and search_task*/
        PS_CHECK_GOTO_ERROR(recv_task(&task, own_rank, MASTER, MPI_COMM_WORLD));

        PS_CHECK_GOTO_ERROR(ps_file_searcher_create(&searcher, search, task));
        PS_CHECK_GOTO_ERROR(ps_file_searcher_search(searcher, &result, &result_len));
        log_debug("Process %d: result_len: %lu", own_rank, result_len);
        PS_CHECK_GOTO_ERROR(ps_file_searcher_free(&searcher));
    }

#ifdef TIME_MEASUREMENT
    if(own_rank == MASTER)
    {
        gettimeofday(&current_time, NULL);
    }
#endif

    PS_MPI_CHECK_ERR(MPI_Gather(&result_len, 1, MPI_UNSIGNED_LONG, all_result_len, 1, MPI_UNSIGNED_LONG, MASTER,
            MPI_COMM_WORLD));

    if(own_rank == MASTER)
    {
        for(i = 0; i < number_of_procs; i++){
            total_result_len += all_result_len[i];
        }
        log_debug("Process %d: total_result_len:%lu", own_rank, total_result_len);
        PS_REALLOC(result, total_result_len);
        for(i = 1; i < number_of_procs; i++){
            PS_MPI_CHECK_ERR(MPI_Recv(result + all_result_len[i - 1], all_result_len[i], MPI_CHAR,
                    i, PS_MPI_TAG_RESULT, MPI_COMM_WORLD, MPI_STATUSES_IGNORE));
            result_len += all_result_len[i];
        }
        write(STDOUT_FILENO, result, result_len);
    }
    else
    {
        PS_MPI_CHECK_ERR(MPI_Send(result, result_len, MPI_CHAR, MASTER, PS_MPI_TAG_RESULT, MPI_COMM_WORLD));
        PS_FREE(search);
    }

    log_debug("Process %d finished", own_rank);

#ifdef TIME_MEASUREMENT
    if(own_rank == MASTER)
    {
        update_timestamp_and_total_seconds(&current_time, &total_reduce_time);
    }
    printf("Process %d: process_search_time: %f, process_file_io_time: %f\n", own_rank
        ,process_search_time, process_file_io_time);

    PS_MPI_CHECK_ERR(MPI_Reduce(&process_search_time, &total_search_time, 1, MPI_FLOAT, MPI_SUM, MASTER,
            MPI_COMM_WORLD));
    PS_MPI_CHECK_ERR(MPI_Reduce(&process_file_io_time , &total_file_io_time, 1, MPI_FLOAT, MPI_SUM, MASTER,
            MPI_COMM_WORLD));

    update_timestamp_and_total_seconds(&time_start, &total_seconds);
    if(own_rank == MASTER)
    {
        printf("Total-Time: %.3fs\n"
                        "\ttotal_setup_time: %.3fs\n"
                        "\ttotal_reduce_time: %.3fs\n"
                        "\taverage-io-time: %.3fs\n"
                        "\taverage-search-time: %.3fs\n"
                        "processes: %d\n"
                        "chunksize: %lu Bytes\n",
                total_seconds, total_setup_time, total_reduce_time,
                total_file_io_time / number_of_procs, total_search_time / number_of_procs,
                number_of_procs, chunk_size);
    }
#endif

    MPI_Finalize();

    PS_FREE(slave_nodes);
    PS_FREE(result);
    PS_FREE(all_result_len);

    return EXIT_SUCCESS;
    /*-----------------ERROR-Handling------------------------------*/
error:
    log_err("Process %d finished with error: %d", own_rank, rv);
    MPI_Finalize();

    if (own_rank != MASTER)
    {
        PS_FREE(search);
    }

    PS_FREE(slave_nodes);
    if(searcher)
    {
        PS_FREE(searcher->task);
    }
    PS_FREE(result);
    PS_FREE(all_result_len);

    return rv;
}
int main(int argc, char **argv)
{
	int num1, num2, proc_num, proc_rank, comp_result, i;
	int buf1[10], buf2[10], buf_result[10]; 
	MPI_Status status;

	MPI_Init( &argc, &argv );
	MPI_Comm_size( MPI_COMM_WORLD, &proc_num );
	MPI_Comm_rank( MPI_COMM_WORLD, &proc_rank );

	if( 10 != proc_num ) // проверка на 10 процессов
	{
		if( 0 == proc_rank ) printf("Wrong number of processes!\n");
                MPI_Finalize();
		return 0;
        }

	
	if( 0 == proc_rank ) // считываем десятичные числа в 0 процессе, они известны только ему
	{
		scanf("%d%d", &num1, &num2 );

        	if( num1>1000 ) // ограничения 
                	num1 = MAX_NUM;

        	if( num2>1000 )
                	num2 = MAX_NUM;

		MPI_Send( &num1, 1, MPI_INT, 1, 0, MPI_COMM_WORLD ); // сделать другой коммуникатор - разослать только им?
		MPI_Send( &num2, 1, MPI_INT, 2, 0, MPI_COMM_WORLD ); // посылаем числа процессу 1 и 2
	}
	
	if( 1 == proc_rank ) // перевод в бинарный вид в этих процессах
	{
		MPI_Recv( &num1, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &status );
		for( i=0;i<10;i++)
			buf1[i] = *( dec_to_bin( num1 ) + i );

		printf("The first number is: ");
		
		for( i=0;i<10;i++)
			printf("%d ", buf1[i]);
		
		printf("\n");
	}
	
	if( 2 == proc_rank)
	{
		MPI_Recv( &num2, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &status );
		for(i=0;i<10;i++)
			buf2[i] = *( dec_to_bin( num2 ) + 1 );

		printf("The second number is: ");

		for(i=0;i<10;i++)
			printf("%d ", buf2[i] );
	
		printf("\n");
	}

	MPI_Bcast( buf1, 10, MPI_INT, 1, MPI_COMM_WORLD ); // теперь бинарный вид у всех
	MPI_Bcast( buf2, 10, MPI_INT, 2, MPI_COMM_WORLD );

	comp_result = ( buf1[proc_rank] == buf2[proc_rank] ) ? 0 : 1; 	// каждый процесс пар-но сравнивает разряд

	MPI_Barrier( MPI_COMM_WORLD );
	
	MPI_Gather( &comp_result, 1, MPI_INT, buf_result, 1, MPI_INT, 0, MPI_COMM_WORLD );	// рез-т сравнения на 0 процессе
	
	if ( 0 == proc_rank )
	{
		for(i=0;i<10;i++)
			printf("%d ", buf_result[i] ); 
		i = 0;
		int flag = 1;

		while( 0 == buf_result[i] ) // По порядку анализируем разряды чисел - равны или не равны
		{
			i++;
			
			if( 10 == i)
			{
				printf("Numbers are equal!\n");
				flag = 0;
			}
		} 
		
		if(flag)
		{
			if( buf1[i] > buf2[i] )
				printf("\nFirst number is bigger!\n");
			else printf("\nSecond number is bigger!\n");
		}
	}

	MPI_Finalize();
	return 0;
}
int main(int argc, char* argv[]) {

    int myrank;
    int nodes;
    double *a;

    int i, j, k, noTest;
    Arguments arguments;

    arguments.matrix_size = -1;
    arguments.matrice = arguments.solution = NULL;
    arguments.imprimer = 0;
    arguments.pivot = 0;
    arguments.write = NULL;

    MPI_Init(&argc,&argv);
    MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
    MPI_Comm_size(MPI_COMM_WORLD, &nodes);

    for(i = 1; i < argc; i++) {
        if(argv[i][0] == '-') {
            switch(argv[i][1]) {
                case 'm': if(i+1 < argc) {
                              arguments.matrix_size = atoi(argv[++i]);
                          }
                          break;
                case 'f': if(i+1 < argc) {
                              arguments.matrice = argv[++i];
                          }
                          break;
                case 's': if(i+1 < argc) {
                              arguments.solution = argv[++i];
                          }
                          break;
                case 'p': arguments.pivot = 1;
                          break;
                case 'i': arguments.imprimer = 1;
                          break; 
                case 'w': if(i+1 < argc) {
                              arguments.write = argv[++i];
                          }
                          break;
                default: fprintf(stderr, "Usage: %s -m <tailleMatrice> "\
                                 "[-f nomFichier] [-s solution]\n", argv[0]);
                         exit(EXIT_FAILURE);
            }
        }
    }

    if(arguments.matrix_size < 0) {
        fprintf(stderr,"Erreur: vous devez fournir une taille de matrice.\n");
        exit(EXIT_FAILURE);
    }

    if(arguments.matrix_size % nodes != 0) {
        fprintf(stderr, "Erreur: le nombre de noeuds doit diviser la"\
                "taille de la matrice.\n");
        exit(EXIT_FAILURE);
    }

    MPI_Barrier( MPI_COMM_WORLD );

    int nbLines = arguments.matrix_size / nodes;

    if(arguments.matrice) {
        a = readMatrixCyclic(arguments.matrice, arguments.matrix_size, 
                nbLines, myrank, nodes);
    } else {  
        a = (double *) malloc( nbLines*arguments.matrix_size*sizeof(double) );
        initializeMatrix( arguments.matrix_size, nbLines, a);
    }
    double tempsEcoule;
    MPI_Barrier( MPI_COMM_WORLD );
    tempsEcoule = -MPI_Wtime();

    factoriserLU(a, arguments.matrix_size, nbLines, MPI_COMM_WORLD, myrank,
            arguments.pivot, nodes);

    double tempsMax;
    tempsEcoule += MPI_Wtime();

    //On ramene le temps max au processeur 0
    MPI_Reduce(&tempsEcoule, &tempsMax, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);

    if(arguments.solution) {
        //on vérifie le résultat
        double *resultat = readMatrixCyclic(arguments.solution, 
                arguments.matrix_size, nbLines, myrank, nodes);
        checkResult(resultat, a, arguments.matrix_size, nbLines, MPI_COMM_WORLD);
        if(myrank == 0) {
            printf("Resultat de la factorisation lu correcte.\n");
        }

        if(resultat) free(resultat);
    }

    if (myrank == 0) {
        printf( "Factorisation LU %s pivot effectuee en %.3f msecs\n", 
                (arguments.pivot?"avec":"sans"), 1000*tempsEcoule);
    }

    if(arguments.imprimer || arguments.write) {

        //On test pour récupérer la matrice
        double *matriceComplete;
        if(myrank == 0) {
            matriceComplete = (double *) malloc(arguments.matrix_size*
                    arguments.matrix_size*sizeof(double));
        }
        for( i = 0; i < nbLines; i++) {
            MPI_Gather(&a[i*arguments.matrix_size], arguments.matrix_size, MPI_DOUBLE, 
                    &matriceComplete[i*nodes*arguments.matrix_size], 
                    arguments.matrix_size, MPI_DOUBLE, 0, MPI_COMM_WORLD);
        }
        if( myrank == 0) {
            if(arguments.imprimer) {
                printMatrix(arguments.matrix_size, arguments.matrix_size, 
                        matriceComplete);
            }
            if(arguments.write) {
                writeMatrix(arguments.matrix_size, arguments.matrix_size, 
                        matriceComplete, arguments.write);
            }
            if(matriceComplete) free(matriceComplete);
        }


    }

#ifndef NDEBUG
    printf("rank G:%i\n",myrank);
#endif

    if(a)free(a);
    MPI_Finalize();
    return 0;
}
void AllgatherDomains(std::set<int> &setOfDomain){
	int i = 0;
	int numLDomains = (int)setOfDomain.size();
	int domainsarray[numLDomains];
	std::set<int>::iterator iter = setOfDomain.begin();
	for (;iter != setOfDomain.end(); iter++) domainsarray[i++] = *iter;

	int numGDomains[P_size()];
	MPI_Gather(&numLDomains,1,MPI_INT,numGDomains,1,MPI_INT,0,MPI_COMM_WORLD);

	//	if (!P_pid()){
	//		for(i=0; i<P_size(); i++) printf("rank %d receives %d domains from rank %d\n",P_pid(),numGDomains[i],i);
	//	}

	// allocate enough space to receive nodes from all processors
	int *recv_buffer2, *displacements;
	int totalDoms = 0;
	if ( !P_pid() ){
		for(i=0; i<P_size(); i++) totalDoms += numGDomains[i];
		recv_buffer2 = new int[totalDoms]; // only root processor allocates memory
		displacements = new int[P_size()];
		displacements[0] = 0;
		for (int i=1; i<P_size(); i++) displacements[i] = displacements[i-1] + numGDomains[i-1];
	}


	// now it's time to send nodes to root processor
	MPI_Gatherv(domainsarray,numLDomains,MPI_INT,
			recv_buffer2,numGDomains,displacements,MPI_INT,
			0,MPI_COMM_WORLD);
	//	if (!P_pid()){
	//		for(i=0; i<totalDoms; i++) printf("rank %d domains %d\n",P_pid(),recv_buffer2[i]);
	//	}

	// let's filter domains flags to avoid repeated values
	setOfDomain.clear();
	if (!P_pid()){
		for(i=0; i<totalDoms; i++) setOfDomain.insert( recv_buffer2[i] );
	}
	//	printf("rank %d setOfDomain.size() = %d\n",P_pid(),setOfDomain.size());

	//	if (!P_pid()){
	//		for (iter = setOfDomain.begin(); iter != setOfDomain.end(); iter++) printf("rank %d domains %d\n",P_pid(),*iter);
	//	}

	// Send these domains flags to all processes

	i = 0;
	int numGDomains2 = (int)setOfDomain.size();
	numGDomains2 = P_getSumInt(numGDomains2);
	int domainsGarray[numGDomains2];

	//	if (!P_pid()){
	for (iter = setOfDomain.begin(); iter != setOfDomain.end(); iter++) domainsGarray[i++] = *iter;
	//	}

	MPI_Bcast(domainsGarray,numGDomains2,MPI_INT,0,MPI_COMM_WORLD);

	for(i=0; i<numGDomains2; i++) setOfDomain.insert( domainsGarray[i] );
	//printf("rank %d numGDomains2 %d\n",P_pid(),numGDomains2);

	//	for(i=0; i<numGDomains2; i++) printf("rank %d domains %d\n",P_pid(),domainsGarray[i]);
}
示例#7
0
int main(int argc, char *argv[])
{
    int np=1, rank=0;
    int splitrank, splitsize;
    int rc = 0;
    nssi_service xfer_svc;

    int server_index=0;
    int rank_in_server=0;

    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &np);

    MPI_Barrier(MPI_COMM_WORLD);

    Teuchos::oblackholestream blackhole;
    std::ostream &out = ( rank == 0 ? std::cout : blackhole );

    struct xfer_args args;

    const int num_io_methods = 8;
    const int io_method_vals[] = {
            XFER_WRITE_ENCODE_SYNC, XFER_WRITE_ENCODE_ASYNC,
            XFER_WRITE_RDMA_SYNC, XFER_WRITE_RDMA_ASYNC,
            XFER_READ_ENCODE_SYNC, XFER_READ_ENCODE_ASYNC,
            XFER_READ_RDMA_SYNC, XFER_READ_RDMA_ASYNC};
    const char * io_method_names[] = {
            "write-encode-sync", "write-encode-async",
            "write-rdma-sync", "write-rdma-async",
            "read-encode-sync", "read-encode-async",
            "read-rdma-sync", "read-rdma-async"};

    const int num_nssi_transports = 4;
    const int nssi_transport_vals[] = {
            NSSI_RPC_PTL,
            NSSI_RPC_IB,
            NSSI_RPC_GEMINI,
            NSSI_RPC_MPI};
    const char * nssi_transport_names[] = {
            "ptl",
            "ib",
            "gni",
            "mpi"
    };


    // Initialize arguments
    args.transport=NSSI_DEFAULT_TRANSPORT;
    args.len = 1;
    args.delay = 1;
    args.io_method = XFER_WRITE_RDMA_SYNC;
    args.debug_level = LOG_WARN;
    args.num_trials = 1;
    args.num_reqs = 1;
    args.result_file_mode = "a";
    args.result_file = "";
    args.url_file = "";
    args.logfile = "";
    args.client_flag = true;
    args.server_flag = true;
    args.num_servers = 1;
    args.num_threads = 0;
    args.timeout = 500;
    args.num_retries = 5;
    args.validate_flag = true;
    args.block_distribution = true;


    bool success = true;

    /**
     * We make extensive use of the \ref Teuchos::CommandLineProcessor for command-line
     * options to control the behavior of the test code.   To evaluate performance,
     * the "num-trials", "num-reqs", and "len" options control the amount of data transferred
     * between client and server.  The "io-method" selects the type of data transfer.  The
     * server-url specifies the URL of the server.  If running as a server, the server-url
     * provides a recommended URL when initializing the network transport.
     */
    try {

        //out << Teuchos::Teuchos_Version() << std::endl << std::endl;

        // Creating an empty command line processor looks like:
        Teuchos::CommandLineProcessor parser;
        parser.setDocString(
                "This example program demonstrates a simple data-transfer service "
                "built using the NEtwork Scalable Service Interface (Nessie)."
        );

        /* To set and option, it must be given a name and default value.  Additionally,
           each option can be given a help std::string.  Although it is not necessary, a help
           std::string aids a users comprehension of the acceptable command line arguments.
           Some examples of setting command line options are:
         */

        parser.setOption("delay", &args.delay, "time(s) for client to wait for server to start" );
        parser.setOption("timeout", &args.timeout, "time(ms) to wait for server to respond" );
        parser.setOption("server", "no-server", &args.server_flag, "Run the server" );
        parser.setOption("client", "no-client", &args.client_flag, "Run the client");
        parser.setOption("len", &args.len, "The number of structures in an input buffer");
        parser.setOption("debug",(int*)(&args.debug_level), "Debug level");
        parser.setOption("logfile", &args.logfile, "log file");
        parser.setOption("num-trials", &args.num_trials, "Number of trials (experiments)");
        parser.setOption("num-reqs", &args.num_reqs, "Number of reqs/trial");
        parser.setOption("result-file", &args.result_file, "Where to store results");
        parser.setOption("result-file-mode", &args.result_file_mode, "Write mode for the result");
        parser.setOption("server-url-file", &args.url_file, "File that has URL client uses to find server");
        parser.setOption("validate", "no-validate", &args.validate_flag, "Validate the data");
        parser.setOption("num-servers", &args.num_servers, "Number of server processes");
        parser.setOption("num-threads", &args.num_threads, "Number of threads used by each server process");
        parser.setOption("block-distribution", "rr-distribution", &args.block_distribution,
                "Use a block distribution scheme to assign clients to servers");

        // Set an enumeration command line option for the io_method
        parser.setOption("io-method", &args.io_method, num_io_methods, io_method_vals, io_method_names,
                "I/O Methods for the example: \n"
                "\t\t\twrite-encode-sync : Write data through the RPC args, synchronous\n"
                "\t\t\twrite-encode-async: Write data through the RPC args - asynchronous\n"
                "\t\t\twrite-rdma-sync : Write data using RDMA (server pulls) - synchronous\n"
                "\t\t\twrite-rdma-async: Write data using RDMA (server pulls) - asynchronous\n"
                "\t\t\tread-encode-sync : Read data through the RPC result - synchronous\n"
                "\t\t\tread-encode-async: Read data through the RPC result - asynchronous\n"
                "\t\t\tread-rdma-sync : Read data using RDMA (server puts) - synchronous\n"
                "\t\t\tread-rdma-async: Read data using RDMA (server puts) - asynchronous");


        // Set an enumeration command line option for the io_method
        parser.setOption("transport", &args.transport, num_nssi_transports, nssi_transport_vals, nssi_transport_names,
                "NSSI transports (not all are available on every platform): \n"
                "\t\t\tportals : Cray or Schutt\n"
                "\t\t\tinfiniband : libibverbs\n"
                "\t\t\tgemini : Cray\n"
                "\t\t\tmpi : isend/irecv implementation\n"
                );



        /* There are also two methods that control the behavior of the
           command line processor.  First, for the command line processor to
           allow an unrecognized a command line option to be ignored (and
           only have a warning printed), use:
         */
        parser.recogniseAllOptions(true);

        /* Second, by default, if the parser finds a command line option it
           doesn't recognize or finds the --help option, it will throw an
           std::exception.  If you want prevent a command line processor from
           throwing an std::exception (which is important in this program since
           we don't have an try/catch around this) when it encounters a
           unrecognized option or help is printed, use:
         */
        parser.throwExceptions(false);

        /* We now parse the command line where argc and argv are passed to
           the parse method.  Note that since we have turned off std::exception
           throwing above we had better grab the return argument so that
           we can see what happened and act accordingly.
         */
        Teuchos::CommandLineProcessor::EParseCommandLineReturn parseReturn= parser.parse( argc, argv );

        if( parseReturn == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED ) {
            return 0;
        }

        if( parseReturn != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL   ) {
            return 1; // Error!

        }

        // Here is where you would use these command line arguments but for this example program
        // we will just print the help message with the new values of the command-line arguments.
        //if (rank == 0)
        //    out << "\nPrinting help message with new values of command-line arguments ...\n\n";

        //parser.printHelpMessage(argv[0],out);

    }

    TEUCHOS_STANDARD_CATCH_STATEMENTS(true,std::cerr,success);

    log_debug(args.debug_level, "%d: Finished processing arguments", rank);


    if (!success) {
        MPI_Abort(MPI_COMM_WORLD, 1);
    }

    if (!args.server_flag && args.client_flag) {
        /* initialize logger */
        if (args.logfile.empty()) {
            logger_init(args.debug_level, NULL);
        } else {
            char fn[1024];
            sprintf(fn, "%s.client.%03d.log", args.logfile.c_str(), rank);
            logger_init(args.debug_level, fn);
        }
    } else if (args.server_flag && !args.client_flag) {
        /* initialize logger */
        if (args.logfile.empty()) {
            logger_init(args.debug_level, NULL);
        } else {
            char fn[1024];
            sprintf(fn, "%s.server.%03d.log", args.logfile.c_str(), rank);
            logger_init(args.debug_level, fn);
        }
    } else if (args.server_flag && args.client_flag) {
        /* initialize logger */
        if (args.logfile.empty()) {
            logger_init(args.debug_level, NULL);
        } else {
            char fn[1024];
            sprintf(fn, "%s.%03d.log", args.logfile.c_str(), rank);
            logger_init(args.debug_level, fn);
        }
    }

    log_level debug_level = args.debug_level;

    // Communicator used for both client and server (may split if using client and server)
    MPI_Comm comm;

    log_debug(debug_level, "%d: Starting xfer-service test", rank);

#ifdef TRIOS_ENABLE_COMMSPLITTER
    if (args.transport == NSSI_RPC_MPI) {
        MPI_Pcontrol(0);
    }
#endif

    /**
     * Since this test can be run as a server, client, or both, we need to play some fancy
     * MPI games to get the communicators working correctly.  If we're executing as both
     * a client and a server, we split the communicator so that the client thinks its
     * running by itself.
     */
    int color = 0;  // color=0-->server, color=1-->client
    if (args.client_flag && args.server_flag) {
        if (np < 2) {
            log_error(debug_level, "Must use at least 2 MPI processes for client and server mode");
            MPI_Abort(MPI_COMM_WORLD, -1);
        }

        // Split the communicators. Put all the servers as the first ranks.
        if (rank < args.num_servers) {
            color = 0;
            log_debug(debug_level, "rank=%d is a server", rank);
        }
        else {
            color = 1;  // all others are clients
            log_debug(debug_level, "rank=%d is a client", rank);
        }

        MPI_Comm_split(MPI_COMM_WORLD, color, rank, &comm);
    }
    else {
        if (args.client_flag) {
            color=1;
            log_debug(debug_level, "rank=%d is a client", rank);
        }
        else if (args.server_flag) {
            color=0;
            log_debug(debug_level, "rank=%d is a server", rank);
        }
        else {
            log_error(debug_level, "Must be either a client or a server");
            MPI_Abort(MPI_COMM_WORLD, -1);
        }
        MPI_Comm_split(MPI_COMM_WORLD, color, rank, &comm);
    }

    MPI_Comm_rank(comm, &splitrank);
    MPI_Comm_size(comm, &splitsize);

    log_debug(debug_level, "%d: Finished splitting communicators", rank);

    /**
     * Initialize the Nessie interface by specifying a transport, encoding scheme, and a
     * recommended URL.  \ref NSSI_DEFAULT_TRANSPORT is usually the best choice, since it
     * is often the case that only one type of transport exists on a particular platform.
     * Currently supported transports are \ref NSSI_RPC_PTL, \ref NSSI_RPC_GNI, and
     * \ref NSSI_RPC_IB.  We only support one type of encoding scheme so NSSI_DEFAULT_ENCODE
     * should always be used for the second argument.   The URL can be specified (as we did for
     * the server, or NULL (as we did for the client).  This is a recommended value.  Use the
     * \ref nssi_get_url function to find the actual value.
     */
    nssi_rpc_init((nssi_rpc_transport)args.transport, NSSI_DEFAULT_ENCODE, NULL);

    // Get the Server URL
    std::string my_url(NSSI_URL_LEN, '\0');
    nssi_get_url((nssi_rpc_transport)args.transport, &my_url[0], NSSI_URL_LEN);

    // If running as both client and server, gather and distribute
    // the server URLs to all the clients.
    if (args.server_flag && args.client_flag) {

        std::string all_urls;

        // This needs to be a vector of chars, not a string
        all_urls.resize(args.num_servers * NSSI_URL_LEN, '\0');

        // Have servers gather their URLs
        if (color == 0) {
            assert(args.num_servers == splitsize);  // these should be equal

            log_debug(debug_level, "%d: Gathering urls: my_url=%s", rank, my_url.c_str());

            // gather all urls to rank 0 of the server comm (also rank 0 of MPI_COMM_WORLD)
            MPI_Gather(&my_url[0], NSSI_URL_LEN, MPI_CHAR,
                    &all_urls[0], NSSI_URL_LEN, MPI_CHAR, 0, comm);
        }

        // broadcast the full set of server urls to all processes
        MPI_Bcast(&all_urls[0], all_urls.size(), MPI_CHAR, 0, MPI_COMM_WORLD);

        log_debug(debug_level, "%d: Bcast urls, urls.size=%d", rank, all_urls.size());

        if (color == 1) {

            // For block distribution scheme use the utility function (in xfer_util.cpp)
            if (args.block_distribution) {
                // Use this utility function to calculate the server_index
                xfer_block_partition(args.num_servers, splitsize, splitrank, &server_index, &rank_in_server);
            }

            // Use a simple round robin distribution scheme
            else {
                server_index   = splitrank % args.num_servers;
                rank_in_server = splitrank / args.num_servers;
            }

            // Copy the server url out of the list of urls
            int offset = server_index * NSSI_URL_LEN;

            args.server_url = all_urls.substr(offset, NSSI_URL_LEN);

            log_debug(debug_level, "client %d assigned to server \"%s\"", splitrank, args.server_url.c_str());
        }


        log_debug(debug_level, "%d: Finished distributing server urls, server_url=%s", rank, args.server_url.c_str());
    }

    // If running as a client only, have to get the list of servers from the urlfile.
    else if (!args.server_flag && args.client_flag){

        sleep(args.delay);  // give server time to get started

        std::vector< std::string > urlbuf;
        xfer_read_server_url_file(args.url_file.c_str(), urlbuf, comm);
        args.num_servers = urlbuf.size();

        // For block distribution scheme use the utility function (in xfer_util.cpp)
        if (args.block_distribution) {
            // Use this utility function to calculate the server_index
            xfer_block_partition(args.num_servers, splitsize, splitrank, &server_index, &rank_in_server);
        }

        // Use a simple round robin distribution scheme
        else {
            server_index   = splitrank % args.num_servers;
            rank_in_server = splitrank / args.num_servers;
        }

        args.server_url = urlbuf[server_index];
        log_debug(debug_level, "client %d assigned to server \"%s\"", splitrank, args.server_url.c_str());
    }

    else if (args.server_flag && !args.client_flag) {
        args.server_url = my_url;

        if (args.url_file.empty()) {
            log_error(debug_level, "Must set --url-file");
            MPI_Abort(MPI_COMM_WORLD, -1);
        }

        xfer_write_server_url_file(args.url_file.c_str(), my_url.c_str(), comm);
    }

    // Set the debug level for the xfer service.
    xfer_debug_level = args.debug_level;

    // Print the arguments after they've all been set.
    args.io_method_name = std::string(io_method_names[args.io_method]);
    args.transport_name = std::string(nssi_transport_names[args.transport]);

    log_debug(debug_level, "%d: server_url=%s", rank, args.server_url.c_str());

    print_args(out, args, "%");

    log_debug(debug_level, "server_url=%s", args.server_url.c_str());

    //------------------------------------------------------------------------------
    /** If we're running this job with a server, the server always executes on node 0.
     *  In this example, the server is a single process.
     */
    if (color == 0) {
        rc = xfer_server_main((nssi_rpc_transport)args.transport, args.num_threads, comm);
        log_debug(debug_level, "Server is finished");
    }

    // ------------------------------------------------------------------------------
     /**  The parallel client will execute this branch.  The root node, node 0, of the client connects
      *   connects with the server, using the \ref nssi_get_service function.  Then the root
      *   broadcasts the service description to the other clients before starting the main
      *   loop of the client code by calling \ref xfer_client_main.
      */
    else {
        int i;
        int client_rank;

        // get rank within the client communicator
        MPI_Comm_rank(comm, &client_rank);

        nssi_init((nssi_rpc_transport)args.transport);

        // Only one process needs to connect to the service
        // TODO: Make get_service a collective call (some transports do not need a connection)
        //if (client_rank == 0) {
        {


            // connect to remote server
            for (i=0; i < args.num_retries; i++) {
                log_debug(debug_level, "Try to connect to server: attempt #%d, url=%s", i, args.server_url.c_str());
                rc=nssi_get_service((nssi_rpc_transport)args.transport, args.server_url.c_str(), args.timeout, &xfer_svc);
                if (rc == NSSI_OK)
                    break;
                else if (rc != NSSI_ETIMEDOUT) {
                    log_error(xfer_debug_level, "could not get svc description: %s",
                            nssi_err_str(rc));
                    break;
                }
            }
        }

        // wait for all the clients to connect
        MPI_Barrier(comm);

        //MPI_Bcast(&rc, 1, MPI_INT, 0, comm);

        if (rc == NSSI_OK) {
            if (client_rank == 0) log_debug(debug_level, "Connected to service on attempt %d\n", i);

            // Broadcast the service description to the other clients
            //log_debug(xfer_debug_level, "Bcasting svc to other clients");
            //MPI_Bcast(&xfer_svc, sizeof(nssi_service), MPI_BYTE, 0, comm);

            log_debug(debug_level, "Starting client main");
            // Start the client code
            xfer_client_main(args, xfer_svc, comm);


            MPI_Barrier(comm);

            // Tell one of the clients to kill the server
            if (rank_in_server == 0) {
                log_debug(debug_level, "%d: Halting xfer service", rank);
                rc = nssi_kill(&xfer_svc, 0, 5000);
            }
        }

        else {
            if (client_rank == 0)
                log_error(debug_level, "Failed to connect to service after %d attempts: ABORTING", i);
            success = false;
            //MPI_Abort(MPI_COMM_WORLD, -1);
        }

        nssi_fini((nssi_rpc_transport)args.transport);

    }

    log_debug(debug_level, "%d: clean up nssi", rank);
    MPI_Barrier(MPI_COMM_WORLD);

    // Clean up nssi_rpc
    rc = nssi_rpc_fini((nssi_rpc_transport)args.transport);
    if (rc != NSSI_OK)
        log_error(debug_level, "Error in nssi_rpc_fini");

    log_debug(debug_level, "%d: MPI_Finalize()", rank);
    MPI_Finalize();

    logger_fini();

    if(success && (rc == NSSI_OK))
      out << "\nEnd Result: TEST PASSED" << std::endl;
    else
        out << "\nEnd Result: TEST FAILED" << std::endl;

    return ((success && (rc==NSSI_OK)) ? 0 : 1 );
}
示例#8
0
void trainOneEpochDenseCPU(int itask, float *data, float *numerator,
                           float *denominator, float *codebook,
                           unsigned int nSomX, unsigned int nSomY,
                           unsigned int nDimensions, unsigned int nVectors,
                           unsigned int nVectorsPerRank, float radius,
                           float scale, string mapType, int *globalBmus)
{
    unsigned int p1[2] = {0, 0};
    unsigned int *bmus = new unsigned int[nVectorsPerRank*2];

    #pragma omp parallel default(shared) private(p1)
    {
        #pragma omp for
        for (unsigned int n = 0; n < nVectorsPerRank; n++) {
            if (itask*nVectorsPerRank+n<nVectors) {
                /// get the best matching unit
                get_bmu_coord(codebook, data, nSomY, nSomX,
                              nDimensions, p1, n);
                bmus[2*n] = p1[0]; bmus[2*n+1] = p1[1];
              }
        }
    }

    float *localNumerator = new float[nSomY*nSomX*nDimensions];
    float *localDenominator = new float[nSomY*nSomX];

    #pragma omp parallel default(shared)
    {
        #pragma omp for
        for (unsigned int som_y = 0; som_y < nSomY; som_y++) {
            for (unsigned int som_x = 0; som_x < nSomX; som_x++) {
                localDenominator[som_y*nSomX + som_x] = 0.0;
                for (unsigned int d = 0; d < nDimensions; d++)
                    localNumerator[som_y*nSomX*nDimensions + som_x*nDimensions + d] = 0.0;
            }
        }
        /// Accumulate denoms and numers
        #pragma omp for
        for (unsigned int som_y = 0; som_y < nSomY; som_y++) {
            for (unsigned int som_x = 0; som_x < nSomX; som_x++) {
                for (unsigned int n = 0; n < nVectorsPerRank; n++) {
                    if (itask*nVectorsPerRank+n<nVectors) {
                        float dist = 0.0f;
                        if (mapType == "planar") {
                            dist = euclideanDistanceOnPlanarMap(som_x, som_y, bmus[2*n], bmus[2*n+1]);
                        } else if (mapType == "toroid") {
                            dist = euclideanDistanceOnToroidMap(som_x, som_y, bmus[2*n], bmus[2*n+1], nSomX, nSomY);
                        }
                        float neighbor_fuct = getWeight(dist, radius, scale);
                        
                        for (unsigned int d = 0; d < nDimensions; d++) {
                            localNumerator[som_y*nSomX*nDimensions + som_x*nDimensions + d] +=
                                1.0f * neighbor_fuct
                                * (*(data + n*nDimensions + d));
                        }
                        localDenominator[som_y*nSomX + som_x] += neighbor_fuct;
                    }
                }
            }
        }
    }
#ifdef HAVE_MPI
    MPI_Reduce(localNumerator, numerator,
               nSomY*nSomX*nDimensions, MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD);
    MPI_Reduce(localDenominator, denominator,
               nSomY*nSomX, MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD);
    MPI_Gather(bmus, nVectorsPerRank*2, MPI_INT, globalBmus, nVectorsPerRank*2, MPI_INT, 0, MPI_COMM_WORLD);

#else
    for (unsigned int i=0; i < nSomY*nSomX*nDimensions; ++i) {
        numerator[i] = localNumerator[i];
    }
    for (unsigned int i=0; i < nSomY*nSomX; ++i) {
        denominator[i] = localDenominator[i];
    }
    for (unsigned int i=0; i < 2*nVectorsPerRank; ++i) {
      globalBmus[i]=bmus[i];
    }
#endif
    delete [] bmus;
    delete [] localNumerator;
    delete [] localDenominator;
}
示例#9
0
文件: timempi2.c 项目: sKeLeTr0n/uni
int main(int argc, char** argv) {
    // Initialize the MPI environment
    MPI_Init(&argc, &argv);

    // Get the number of processes
    int world_size;
    MPI_Comm_size(MPI_COMM_WORLD, &world_size);

    // Get the rank of the process
    int world_rank;
    MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);

    char string_buffer[LEN];
    char* rbuf = NULL;
    if(world_rank == 0)
        rbuf = malloc(world_size * LEN * sizeof(char));

    // Get the name of the processor
    char processor_name[50];
    int name_len = 50;
    if(gethostname(processor_name, name_len) != 0) {
        printf("Error with hostname");
        exit(1);
    }

    // Get current time on host
    struct timeval time;
    if(gettimeofday(&time, NULL) != 0) {
        printf("Error with time");
        exit(1);
    }

    // Generate output
    time_t curtime = time.tv_sec;
    char time_buffer[30];
    strftime(time_buffer, 30, "%Y-%m-%d %T.", localtime(&curtime));
    sprintf(string_buffer, "%s: %s%li", processor_name, time_buffer, time.tv_usec);

    // Gather output
    int rc = MPI_Gather(string_buffer, LEN, MPI_CHAR, rbuf, LEN, MPI_CHAR, 0, MPI_COMM_WORLD);
    if(rc != MPI_SUCCESS) {
        printf("Error while gathering, rc is: %d", rc);
        exit(1);
    }

    // Print output
    if(world_rank == 0) {
        for(int i = 0; i < world_size; ++i)
            printf("%.*s\n", LEN, rbuf + LEN * i);
    }

    // Get microseconds
    int usec = time.tv_usec;
    int * rbuf_usec;
    if(world_rank == 0)
        rbuf_usec = malloc(world_size * sizeof(int));
    
    // Reduce microseconds
    if(MPI_Reduce(&usec, rbuf_usec, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD) != MPI_SUCCESS){
        printf("Error in MPI_Reduce\n");
        exit(1);
    }

    // Print microseconds
    if(world_rank == 0)
        printf("%d\n", usec);

    if(MPI_Barrier(MPI_COMM_WORLD) != MPI_SUCCESS){
        printf("Error with barrier");
        exit(1);
    }

    printf("Rang %d beendet jetzt!\n", world_rank);
	
    // Finalize the MPI environment.
    MPI_Finalize();

}
示例#10
0
//-----------------------------------------------------------------------------
//
//-----------------------------------------------------------------------------
void Image_Exchanger::sync_fragment_info(OverLap_FootPrint* ofp, 
                                         ImageFragment_Tile* ift,
                                         int nviewer)
{

#ifdef _DEBUG7
    fprintf(stderr, "**** %s:%s() ****\n", __FILE__, __func__);
#endif


    std::vector<int> infobuf;
    int count = ofp->save_overlap_info(infobuf);

#ifdef _DEBUG6
    fprintf(stderr, "%d: %s: olcount=%d, olbuffer size=%ld\n", 
            m_rank, __func__, count, infobuf.size());
#endif    

    int c = infobuf.size();

//     fprintf(stderr, "%d: nviewer=%d, gather MPI_INT %d\n", 
//             m_rank, nviewer, c);


    memset(m_rcounts, 0, m_runsize*sizeof(unsigned int));

    if(nviewer == 1) 
    {
        MPI_Gather(&c, 1, MPI_INT, 
               m_rcounts, 1, MPI_INT, 
               0, MPI_COMM_WORLD);
    }
    else
    {
        MPI_Allgather(&c, 1, MPI_INT, 
                      m_rcounts, 1, MPI_INT, 
                      MPI_COMM_WORLD);
    }


    // vector throws a length_error if resized above max_size
    //terminate called after throwing an instance of 'std::length_error'
    //what():  vector::_M_fill_insert

    std::vector<int> ainfobuf(1, 0);
    memset(m_rdispls, 0, m_runsize*sizeof(unsigned int));



    if( (nviewer == 1 && m_rank==0) || (nviewer > 1) )
    {
        int total = 0;
        for(int i=0; i<m_runsize; i++) total += m_rcounts[i];

//         fprintf(stderr, "std::vector max size=%ld, resize to %d\n", 
//                 ainfobuf.max_size(), total);
        assert(total > 0);

        ainfobuf.resize(total, 0);
    }



    for(int i=0; i<m_runsize-1; i++) 
        m_rdispls[i+1] = m_rdispls[i] + m_rcounts[i];

    //to make &infobuf[0] a legal call
    if(c == 0) infobuf.resize(1);

    if(nviewer == 1) 
    {
        MPI_Gatherv(&infobuf[0], c, MPI_INT,
                    &ainfobuf[0], m_rcounts, m_rdispls, 
                    MPI_INT,
                    0, MPI_COMM_WORLD);
    }
    else
    {
        MPI_Allgatherv(&infobuf[0], c, MPI_INT,
                       &ainfobuf[0], (int*)m_rcounts, (int*)m_rdispls, 
                       MPI_INT,
                       MPI_COMM_WORLD);
    }

    //fprintf(stderr, "MPI_SUCCESS on sync frag info\n");

    //only viewer need to have all fragments and count for recv
    //non-viewer only need count send for its own fragments
    if(m_rank < nviewer)
    {
        ift->retrieve_fragments(ainfobuf);
    }
    else if(c > 0)
    {
        ift->retrieve_fragments(infobuf);
    }

}
示例#11
0
int main(int argc, char **argv)
{
    int i, j, k;
    double start, end;
    /* Time array */
    double time[9];
	double comm_time = 0;
	double comp_time = 0;
    int chunkSize;
    MPI_Status status;
    /* Being used in FFT */
    float data[N][N];
    /* Being used in mm */
    float input_1[N][N], input_2[N][N];
    /* Local matrix for FFT */
    float local_data[N][N];

    /* World rank and processor, related to MPI_COMM_WORLD */
    int world_id;
    int world_processor;

    /* Divided rank and processors for communication, related to taskcomm */
    int task_id;
    int task_processor;

    /* A complex array  storing the temp row to operate FFT */
    complex temp_data[N];

    /* Initialize rank and the number of processor for the MPI */
    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &world_id);
    MPI_Comm_size(MPI_COMM_WORLD, &world_processor);

    /* Initialize a new vector for distributing columns */
    MPI_Datatype column, col;
    /* Column vector */
    MPI_Type_vector(N, 1, N, MPI_FLOAT, &col);
    MPI_Type_commit(&col);
    MPI_Type_create_resized(col, 0, 1*sizeof(float), &column);
    MPI_Type_commit(&column);

    int task = world_id%4;
    MPI_Comm taskcomm;
    /* Split the MPI_COMM_WORLD */
    MPI_Comm_split(MPI_COMM_WORLD, task, world_id, &taskcomm);
    MPI_Comm_rank(taskcomm, &task_id);
    MPI_Comm_size(taskcomm, &task_processor);

    /* Initialize inter communicators */
    MPI_Comm t1_t3_comm, t2_t3_comm, t3_t4_comm;

    /* Calculate chunkSize */
    chunkSize = N/task_processor;

    /* Get the start time of all program */
    if(world_id == 0){
        printf("2D convolution using MPI task and data parallelism\n");
        start = MPI_Wtime();
    }

    /* Each group completes work and send results by inter communicators */
    if(task == 0){
        // task 1
        /* Create an inter communicator for task 1 and task 3 */
        MPI_Intercomm_create(taskcomm, 0, MPI_COMM_WORLD, 2, 1, &t1_t3_comm);

        if(task_id == 0){
            time[0] = MPI_Wtime();

            /* Read file */
            readIm1File(data);
            time[1] = MPI_Wtime();

            printf("Group 1: Reading file 1_im1 takes %f s.\n", time[1] - time[0]);
        }

        /* Scatter data to local ranks */
        MPI_Scatter(data, chunkSize*N, MPI_FLOAT,
                    local_data, chunkSize*N, MPI_FLOAT,
                    0, taskcomm);

        /* Compute time for distributing data */
        if(task_id == 0){
            time[2] = MPI_Wtime();
            printf("Group 1: Scattering 1_im1(row) to each processor takes %f s.\n", time[2] - time[1]);
        }

        /* Do 1_im1 2d FFT */
        /* Row FFT */
        for(i = 0; i < chunkSize; i++){
            for(j = 0; j < N; j++){
                /* FFT each row for im1 */
                temp_data[j].r = local_data[i][j];
                temp_data[j].i = 0;
            }

            c_fft1d(temp_data, N, -1);

            for(j = 0; j < N; j++)
                local_data[i][j] = temp_data[j].r;
        }

        /* Gather all the data and distribute in columns */
        if(task_id == 0){
            time[3] = MPI_Wtime();
            printf("Group 1: FFT each row for 1_im1 takes %f s.\n", time[3] - time[2]);
        }

        /* Gather all the data of 1_im1 */
        MPI_Gather(local_data, chunkSize*N, MPI_FLOAT,
                    data, chunkSize*N, MPI_FLOAT,
                    0, taskcomm);

        if(task_id == 0){
            time[4] = MPI_Wtime();
            printf("Group 1: Gathering all the data of 1_im1(row) takes %f s.\n", time[4] - time[3]);
        }

        /* Scatter all the data to column local data */
        MPI_Scatter(data, chunkSize, column,
                    local_data, chunkSize, column,
                    0, taskcomm);

        if(task_id == 0){
            time[5] = MPI_Wtime();
            printf("Group 1: Scattering 1_im1(column) to each processor takes %f s.\n", time[5] - time[4]);
        }

        /* Column FFT */
        for(i = 0; i < chunkSize; i++){
            for(j = 0; j < N; j++){
                /* FFT each column for im1 */
                temp_data[j].r = local_data[j][i];
                temp_data[j].i = 0;
            }

            c_fft1d(temp_data, N, -1);

            for(j = 0; j < N; j++)
                local_data[j][i] = temp_data[j].r;
        }

        /* Gather all the columns from each rank */
        if(task_id == 0){
            time[6] = MPI_Wtime();
            printf("Group 1: FFT each column for 1_im1 takes %f s.\n", time[6] - time[5]);
        }

        MPI_Gather(local_data, chunkSize, column,
                    data, chunkSize, column,
                    0, taskcomm);

        /* Compute time and distribute data to do matrix multiplication */
        if(task_id == 0){
            time[7] = MPI_Wtime();
            printf("Group 1: Gathering all the data of 1_im1(column) takes %f s.\n", time[7] - time[6]);
            /* Total time */
            printf("Group 1: Total time for task 1 in group 1 takes %f s.\n", time[7] - time[0]);

			comm_time += time[7] - time[6] + time[5] - time[3] + time[2] - time[1];
			comp_time += time[6] - time[5] + time[3] - time[2];
            /* Send data to group 3 via the inter communicator */
            MPI_Send(data, N*N, MPI_FLOAT, task_id, 13, t1_t3_comm);
        }
    }
    else if(task == 1){
        // Task 2
        /* Create an inter communicator for task 2 and task 3 */
        MPI_Intercomm_create(taskcomm, 0, MPI_COMM_WORLD, 2, 2, &t2_t3_comm);

        if(task_id == 0){
            time[0] = MPI_Wtime();

            /* Read file */
            readIm2File(data);
            time[1] = MPI_Wtime();

            printf("Group 2: Reading file 1_im2 takes %f s.\n", time[1] - time[0]);
        }

        /* Scatter data to local ranks */
        MPI_Scatter(data, chunkSize*N, MPI_FLOAT,
                    local_data, chunkSize*N, MPI_FLOAT,
                    0, taskcomm);

        /* Compute time for distributing data */
        if(task_id == 0){
            time[2] = MPI_Wtime();
            printf("Group 2: Scatter 1_im2(row) to each processor takes %f s.\n", time[2] - time[1]);
        }

        /* Do 1_im1 2d FFT */
        /* Row FFT */
        for(i = 0; i < chunkSize; i++){
            for(j = 0; j < N; j++){
                /* FFT each row for im1 */
                temp_data[j].r = local_data[i][j];
                temp_data[j].i = 0;
            }

            c_fft1d(temp_data, N, -1);

            for(j = 0; j < N; j++)
                local_data[i][j] = temp_data[j].r;
        }

        /* Gather all the data and distribute in columns */
        if(task_id == 0){
            time[3] = MPI_Wtime();
            printf("Group 2: FFT each row for 1_im2 takes %f s.\n", time[3] - time[2]);
        }

        /* Gather all the data of 1_im1 */
        MPI_Gather(local_data, chunkSize*N, MPI_FLOAT,
                    data, chunkSize*N, MPI_FLOAT,
                    0, taskcomm);

        if(task_id == 0){
            time[4] = MPI_Wtime();
            printf("Group 2: Gather all the data of 1_im2(row) takes %f s.\n", time[4] - time[3]);
        }

        /* Scatter all the data to column local data */
        MPI_Scatter(data, chunkSize, column,
                    local_data, chunkSize, column,
                    0, taskcomm);

        if(task_id == 0){
            time[5] = MPI_Wtime();
            printf("Group 2: Scatter 1_im2(column) to each processor takes %f s.\n", time[5] - time[4]);
        }

        /* Column FFT */
        for(i = 0; i < chunkSize; i++){
            for(j = 0; j < N; j++){
                /* FFT each column for im1 */
                temp_data[j].r = local_data[j][i];
                temp_data[j].i = 0;
            }

            c_fft1d(temp_data, N, -1);

            for(j = 0; j < N; j++)
                local_data[j][i] = temp_data[j].r;
        }

        /* Gather all the columns from each rank */
        if(task_id == 0){
            time[6] = MPI_Wtime();
            printf("Group 2: FFT each column for 1_im2 takes %f s.\n", time[6] - time[5]);
        }

        MPI_Gather(local_data, chunkSize, column,
                    data, chunkSize, column,
                    0, taskcomm);

        /* Compute time and distribute data to do matrix multiplication */
        if(task_id == 0){
            time[7] = MPI_Wtime();
            printf("Group 2: Gather all the data of 1_im2(column) takes %f s.\n", time[7] - time[6]);
            /* Total time */
            printf("Group 2: Total time for task 2 in group 2 takes %f s.\n", time[7] - time[0]);
			
			comm_time += time[7] - time[6] + time[5] - time[3] + time[2] - time[1];
			comp_time += time[6] - time[5] + time[3] - time[2];
            /* Send data to group 3 via the inter communicator */
            MPI_Send(data, N*N, MPI_FLOAT, task_id, 23, t2_t3_comm);
        }
    }
    else if(task == 2){
        // Task 3
        /* Local matrix for matrix multiplication */
        float local_data2[chunkSize][N];
        /* Create inter communicators for task 1 and task3, task 2 and task 3, task 3 and task 4 */
        MPI_Intercomm_create(taskcomm, 0, MPI_COMM_WORLD, 0, 1, &t1_t3_comm);
        MPI_Intercomm_create(taskcomm, 0, MPI_COMM_WORLD, 1, 2, &t2_t3_comm);
        MPI_Intercomm_create(taskcomm, 0, MPI_COMM_WORLD, 3, 3, &t3_t4_comm);

        /* Receive data from group 1 and group 2 */
        if(task_id == 0){
            time[0] = MPI_Wtime();

            MPI_Recv(input_1, N*N, MPI_FLOAT, task_id, 13, t1_t3_comm, &status);
            MPI_Recv(input_2, N*N, MPI_FLOAT, task_id, 23, t2_t3_comm, &status);

            time[1] = MPI_Wtime();

            /* Time of receiving data from group 1 and group 2 */
            printf("Group 3: Receiving data from group 1 and group 2 takes %f s.\n", time[1] - time[0]);
        }

        /* Do matrix multiplication */
        MPI_Scatter(input_1, chunkSize*N, MPI_FLOAT,
                    local_data, chunkSize*N, MPI_FLOAT,
                    0, taskcomm);
        /* Broadcast data2 to all the ranks */
        MPI_Bcast(input_2, N*N, MPI_FLOAT, 0, taskcomm);

        if(task_id == 0){
            time[2] = MPI_Wtime();
            printf("Group 3: Scattering data for multiplication takes %f s.\n", time[2] - time[1]);
        }

        /* Matrix multiplication */
        for(i = 0; i < chunkSize; i++)
            for(j = 0; j < N; j++){
                local_data2[i][j] = 0;
                for(k = 0; k < N; k++)
                    local_data2[i][j] += local_data[i][k]*input_2[k][j];
            }

        /* Collect multiplication result from each rank */
        if(task_id == 0){
            time[3] = MPI_Wtime();
            printf("Group 3: Matrix multiplication takes %f s.\n", time[3] - time[2]);
        }

        /* Gather data */
        MPI_Gather(local_data2, chunkSize*N, MPI_FLOAT,
                   data, chunkSize*N, MPI_FLOAT,
                   0, taskcomm);

        if(task_id == 0){
            time[4] = MPI_Wtime();
            printf("Group 3: Gathering data after Matrix multiplication takes %f s.\n", time[4] - time[3]);
            /* total time */
            printf("Group 3: Total time for task 3 in group 3 takes %f s.\n", time[4] - time[0]);
            /* send result of matrix multiplication to group 4 */
            MPI_Send(data, N*N, MPI_FLOAT, task_id, 34, t3_t4_comm);
        }
		
		comm_time += time[4] - time[3] + time[2] - time[0];
		comp_time += time[3] - time[2];

        MPI_Comm_free(&t1_t3_comm);
        MPI_Comm_free(&t2_t3_comm);
    }
    else{
        // Task 4
        /* Create an inter communicator for task 3 and task 4 */
        MPI_Intercomm_create(taskcomm, 0, MPI_COMM_WORLD, 2, 3, &t3_t4_comm);

        /* Receive data from group 3 */
        if(task_id == 0){
            time[0] = MPI_Wtime();

            MPI_Recv(data, N*N, MPI_FLOAT, task_id, 34, t3_t4_comm, &status);

            time[1] = MPI_Wtime();
            printf("Group 4: Receiving data from group 3 takes %f s.\n", time[1] - time[0]);
        }

        /* Scatter data to each processor */
        MPI_Scatter(data, chunkSize*N, MPI_FLOAT,
                    local_data, chunkSize*N, MPI_FLOAT,
                    0, taskcomm);

        if(task_id == 0){
            time[2] = MPI_Wtime();
            printf("Group 4: Scattering data of rows to each processor takes %f s.\n", time[2] - time[1]);
        }

        /* Inverse-2DFFT(row) */
        for(i = 0; i < chunkSize; i++){
            for(j = 0; j < N; j++){
                /* FFT each row for im1 */
                temp_data[j].r = local_data[i][j];
                temp_data[j].i = 0;
            }

            c_fft1d(temp_data, N, 1);

            for(j = 0; j < N; j++)
                local_data[i][j] = temp_data[j].r;
        }

        if(task_id == 0){
            time[3] = MPI_Wtime();
            printf("Group 4: Inverse-2DFFT(row) takes %f s.\n", time[3] - time[2]);
        }
        /* Gather all the data */
        MPI_Gather(local_data, chunkSize*N, MPI_FLOAT,
                    data, chunkSize*N, MPI_FLOAT,
                    0, taskcomm);

        if(task_id == 0){
            time[4] = MPI_Wtime();
            printf("Group 4: Gathering data of Inverse-2DFFT(row) takes %f s.\n", time[4] - time[3]);
        }

        MPI_Scatter(data, chunkSize, column,
                    local_data, chunkSize, column,
                    0, taskcomm);

        if(task_id == 0){
            time[5] = MPI_Wtime();
            printf("Group 4: Scattering data of columns to each processor takes %f s.\n", time[5] - time[4]);
        }

        /* Inverse-2DFFT(column) for output file */
        for(i = 0; i < chunkSize; i++){
            for(j = 0; j < N; j++){
                /* FFT each column for im1 */
                temp_data[j].r = local_data[j][i];
                temp_data[j].i = 0;
            }

            c_fft1d(temp_data, N, 1);

            for(j = 0; j < N; j++)
                local_data[j][i] = temp_data[j].r;
        }

        if(task_id == 0){
            time[6] = MPI_Wtime();
            printf("Group 4: Inverse-2DFFT(column) takes %f s.\n", time[6] - time[5]);
        }

        /* Gather all the columns of output file from each rank */
        MPI_Gather(local_data, chunkSize, column,
                    data, chunkSize, column,
                    0, taskcomm);

        if(task_id == 0){
            time[7] = MPI_Wtime();
                printf("Group 4: Gathering data of Inverse-2DFFT(column) takes %f s.\n", time[7] - time[6]);

            writeFile(data);
            time[8] = MPI_Wtime();
            printf("Group 4: Writing file to out_1 takes %f s.\n", time[8] - time[7]);
			
			comm_time += time[7] - time[6] + time[5] - time[3] + time[2] - time[0];
			comp_time += time[6] - time[5] + time[3] - time[2];
        }
        MPI_Comm_free(&t3_t4_comm);
    }

    MPI_Barrier(MPI_COMM_WORLD);

    if(world_id == 0){
        end = MPI_Wtime();
		printf("Total communication time of 2D convolution using MPI task parallel takes %f s.\n", comm_time);
		printf("Total computing time of 2D convolution using MPI task parallel takes %f s.\n", comp_time);
		printf("Total running time without loading/writing of 2D convolution using MPI task parallel takes %f s.\n", comm_time + comp_time);
        printf("Total running time of 2D convolution using MPI task parallel takes %f s.\n", end - start);
    }

    /* Free vector and task comm */
    MPI_Type_free(&column);
    MPI_Type_free(&col);
    MPI_Comm_free(&taskcomm);
    MPI_Finalize();
    return 0;
}
示例#12
0
void online_measurement(const int traj, const int id, const int ieo) {
    int i, j, t, tt, t0;
    double *Cpp = NULL, *Cpa = NULL, *Cp4 = NULL;
    double res = 0., respa = 0., resp4 = 0.;
    double atime, etime;
    float tmp;
    operator * optr;
#ifdef MPI
    double mpi_res = 0., mpi_respa = 0., mpi_resp4 = 0.;
    // send buffer for MPI_Gather
    double *sCpp = NULL, *sCpa = NULL, *sCp4 = NULL;
#endif
    FILE *ofs;
    char *filename;
    char buf[100];
    spinor phi;
    filename=buf;
    sprintf(filename,"%s%.6d", "onlinemeas." ,traj);

    init_operators();
    if(no_operators < 1 && g_proc_id == 0) {
        if(g_proc_id == 0) {
            fprintf(stderr, "Warning! no operators defined in input file, cannot perform online correlator mesurements!\n");
        }
        return;
    }
    if(no_operators > 1 && g_proc_id == 0) {
        fprintf(stderr, "Warning! number of operators defined larger than 1, using only the first!\n");
    }
    optr = &operator_list[0];
    // we don't want to do inversion twice for this purpose here
    optr->DownProp = 0;
    if(optr->type != TMWILSON && optr->type != WILSON && optr->type != CLOVER) {
        if(g_proc_id == 0) {
            fprintf(stderr, "Warning! correlator online measurement currently only implemented for TMWILSON, WILSON and CLOVER\n");
            fprintf(stderr, "Cannot perform online measurement!\n");
        }
        return;
    }

    /* generate random timeslice */
    if(ranlxs_init == 0) {
        rlxs_init(1, 123456);
    }
    ranlxs(&tmp, 1);
    t0 = (int)(measurement_list[id].max_source_slice*tmp);
#ifdef MPI
    MPI_Bcast(&t0, 1, MPI_INT, 0, MPI_COMM_WORLD);
#endif
    if(g_debug_level > 1 && g_proc_id == 0) {
        printf("# timeslice set to %d (T=%d) for online measurement\n", t0, g_nproc_t*T);
        printf("# online measurements parameters: kappa = %g, mu = %g\n", g_kappa, g_mu/2./g_kappa);
    }
    atime = gettime();

#ifdef MPI
    sCpp = (double*) calloc(T, sizeof(double));
    sCpa = (double*) calloc(T, sizeof(double));
    sCp4 = (double*) calloc(T, sizeof(double));
    if(g_mpi_time_rank == 0) {
        Cpp = (double*) calloc(g_nproc_t*T, sizeof(double));
        Cpa = (double*) calloc(g_nproc_t*T, sizeof(double));
        Cp4 = (double*) calloc(g_nproc_t*T, sizeof(double));
    }
#else
    Cpp = (double*) calloc(T, sizeof(double));
    Cpa = (double*) calloc(T, sizeof(double));
    Cp4 = (double*) calloc(T, sizeof(double));
#endif
    source_generation_pion_only(g_spinor_field[0], g_spinor_field[1],
                                t0, 0, traj);
    optr->sr0 = g_spinor_field[0];
    optr->sr1 = g_spinor_field[1];
    optr->prop0 = g_spinor_field[2];
    optr->prop1 = g_spinor_field[3];

    // op_id = 0, index_start = 0, write_prop = 0
    optr->inverter(0, 0, 0);

    /* now we bring it to normal format */
    /* here we use implicitly DUM_MATRIX and DUM_MATRIX+1 */
    convert_eo_to_lexic(g_spinor_field[DUM_MATRIX], g_spinor_field[2], g_spinor_field[3]);

    /* now we sum only over local space for every t */
    for(t = 0; t < T; t++) {
        j = g_ipt[t][0][0][0];
        res = 0.;
        respa = 0.;
        resp4 = 0.;
        for(i = j; i < j+LX*LY*LZ; i++) {
            res += _spinor_prod_re(g_spinor_field[DUM_MATRIX][j], g_spinor_field[DUM_MATRIX][j]);
            _gamma0(phi, g_spinor_field[DUM_MATRIX][j]);
            respa += _spinor_prod_re(g_spinor_field[DUM_MATRIX][j], phi);
            _gamma5(phi, phi);
            resp4 += _spinor_prod_im(g_spinor_field[DUM_MATRIX][j], phi);
        }

#if defined MPI
        MPI_Reduce(&res, &mpi_res, 1, MPI_DOUBLE, MPI_SUM, 0, g_mpi_time_slices);
        res = mpi_res;
        MPI_Reduce(&respa, &mpi_respa, 1, MPI_DOUBLE, MPI_SUM, 0, g_mpi_time_slices);
        respa = mpi_respa;
        MPI_Reduce(&resp4, &mpi_resp4, 1, MPI_DOUBLE, MPI_SUM, 0, g_mpi_time_slices);
        resp4 = mpi_resp4;
        sCpp[t] = +res/(g_nproc_x*LX)/(g_nproc_y*LY)/(g_nproc_z*LZ)*2.;
        sCpa[t] = -respa/(g_nproc_x*LX)/(g_nproc_y*LY)/(g_nproc_z*LZ)*2.;
        sCp4[t] = +resp4/(g_nproc_x*LX)/(g_nproc_y*LY)/(g_nproc_z*LZ)*2.;
#else
        Cpp[t] = +res/(g_nproc_x*LX)/(g_nproc_y*LY)/(g_nproc_z*LZ)*2.;
        Cpa[t] = -respa/(g_nproc_x*LX)/(g_nproc_y*LY)/(g_nproc_z*LZ)*2.;
        Cp4[t] = +resp4/(g_nproc_x*LX)/(g_nproc_y*LY)/(g_nproc_z*LZ)*2.;
#endif
    }

#ifdef MPI
    /* some gymnastics needed in case of parallelisation */
    if(g_mpi_time_rank == 0) {
        MPI_Gather(sCpp, T, MPI_DOUBLE, Cpp, T, MPI_DOUBLE, 0, g_mpi_SV_slices);
        MPI_Gather(sCpa, T, MPI_DOUBLE, Cpa, T, MPI_DOUBLE, 0, g_mpi_SV_slices);
        MPI_Gather(sCp4, T, MPI_DOUBLE, Cp4, T, MPI_DOUBLE, 0, g_mpi_SV_slices);
    }
#endif

    /* and write everything into a file */
    if(g_mpi_time_rank == 0 && g_proc_coords[0] == 0) {
        ofs = fopen(filename, "w");
        fprintf( ofs, "1  1  0  %e  %e\n", Cpp[t0], 0.);
        for(t = 1; t < g_nproc_t*T/2; t++) {
            tt = (t0+t)%(g_nproc_t*T);
            fprintf( ofs, "1  1  %d  %e  ", t, Cpp[tt]);
            tt = (t0+g_nproc_t*T-t)%(g_nproc_t*T);
            fprintf( ofs, "%e\n", Cpp[tt]);
        }
        tt = (t0+g_nproc_t*T/2)%(g_nproc_t*T);
        fprintf( ofs, "1  1  %d  %e  %e\n", t, Cpp[tt], 0.);

        fprintf( ofs, "2  1  0  %e  %e\n", Cpa[t0], 0.);
        for(t = 1; t < g_nproc_t*T/2; t++) {
            tt = (t0+t)%(g_nproc_t*T);
            fprintf( ofs, "2  1  %d  %e  ", t, Cpa[tt]);
            tt = (t0+g_nproc_t*T-t)%(g_nproc_t*T);
            fprintf( ofs, "%e\n", Cpa[tt]);
        }
        tt = (t0+g_nproc_t*T/2)%(g_nproc_t*T);
        fprintf( ofs, "2  1  %d  %e  %e\n", t, Cpa[tt], 0.);

        fprintf( ofs, "6  1  0  %e  %e\n", Cp4[t0], 0.);
        for(t = 1; t < g_nproc_t*T/2; t++) {
            tt = (t0+t)%(g_nproc_t*T);
            fprintf( ofs, "6  1  %d  %e  ", t, Cp4[tt]);
            tt = (t0+g_nproc_t*T-t)%(g_nproc_t*T);
            fprintf( ofs, "%e\n", Cp4[tt]);
        }
        tt = (t0+g_nproc_t*T/2)%(g_nproc_t*T);
        fprintf( ofs, "6  1  %d  %e  %e\n", t, Cp4[tt], 0.);
        fclose(ofs);
    }
#ifdef MPI
    if(g_mpi_time_rank == 0) {
        free(Cpp);
        free(Cpa);
        free(Cp4);
    }
    free(sCpp);
    free(sCpa);
    free(sCp4);
#else
    free(Cpp);
    free(Cpa);
    free(Cp4);
#endif
    etime = gettime();

    if(g_proc_id == 0 && g_debug_level > 0) {
        printf("ONLINE: measurement done int t/s = %1.4e\n", etime - atime);
    }
    return;
}
示例#13
0
int main( int argc, char * argv[])
{
  //int argc;
  //char *argv;
    
  int th_id;
  int num_th;
    
  char *relative_path_to_the_input_file;
  char *relative_path_to_the_output_file;
    
  relative_path_to_the_input_file = argv[1];
  relative_path_to_the_output_file= argv[2];

  MPI_Init( &argc, &argv );
  MPI_Comm_rank( MPI_COMM_WORLD, &th_id );
  MPI_Comm_size( MPI_COMM_WORLD, &num_th );


  // **** Your SPMD program goes here ****
  char lines[100000][15];
  unsigned int slice_size = 100000 / num_th;
  char line_buffer[15];
  char lines_slice[slice_size][15];
  char search_string[15];
	int gather_array[num_th][2];
  int i;
	double t1 = MPI_Wtime(); // start timer


  // populate large array
  if (0 == th_id)
  {
    FILE *input_file;

		if (NULL != relative_path_to_the_input_file)
		  input_file = fopen(relative_path_to_the_input_file, "r");
		else
			input_file = fopen("partA.txt", "r");
    
    fgets(line_buffer, 15, input_file);	// skip process amount
    fgets(line_buffer, 15, input_file);	// skip slice amount
    fgets(search_string, 15, input_file);

    for (i = 0; i < 100000; ++i)
      fgets(lines[i], 15, input_file);

		for (i = 1; i < num_th; ++i)
		  MPI_Send(&search_string, 15, MPI_CHAR, i, 0, MPI_COMM_WORLD); // send search string to other processes

		fclose(input_file);
  }
    

	// other processes receive search string
	else
	{
		MPI_Status status;
		MPI_Recv(&search_string, 15, MPI_CHAR, 0, 0, MPI_COMM_WORLD, &status);
	}


  // distribute lines array to processes
  MPI_Scatter(&lines, slice_size * 15, MPI_CHAR, &lines_slice, slice_size * 15, MPI_CHAR, 0, MPI_COMM_WORLD);


  // processes search it's given small array       
  int data[2];
  data[0] = th_id;
  data[1] = -1;        	
        
  for (i = 0; i < slice_size; ++i)
    if (!strcmp(search_string, lines_slice[i]))
    {
      data[1] = (th_id * slice_size) + i;
      break;
    }


	// gather result back to process 0 (master)
	MPI_Gather(&data, 2, MPI_INT, &gather_array, 2, MPI_INT, 0, MPI_COMM_WORLD);


	// End Timer
	double t2 = MPI_Wtime();


  // process 0 (master) writes results to file
  if (th_id == 0)
	{
    //FILE *output_file;
		
		//if (NULL != relative_path_to_the_output_file)
		//	output_file = fopen(relative_path_to_the_output_file, "a");
		//else
		//	output_file = fopen("outputA.txt", "a");
       
  	for (i = 0; i < num_th; ++i)
  	{
		  if (-1 != gather_array[i][1])
			{
        //fprintf(output_file, "Process %d, found yes, slice %d, position %d\n", gather_array[i], gather_array[i], gather_array[i + 1]);
        printf("Process %d, found yes, slice %d, position %d\n", gather_array[i][0], gather_array[i][0], gather_array[i][1]);
			}
			else
			{
        //fprintf(output_file, "Process %d, found no, slice -1, position -1\n", gather_array[i]);
        printf("Process %d, found no, slice -1, position -1\n", gather_array[i][0]);
      }
		}

		//fprintf(output_file, "Total execution time: %d ms\n", t2-t1);
    //printf("t1: %f, t2: %f\n", t1, t2);
		printf("Total execution time: %f ms\n", (t2-t1) * 1000);
       			
    //fclose(output_file);
  }

	
	// Finish Processes
	MPI_Finalize();


  return 0;
}
示例#14
0
文件: par_stats.c 项目: 8l/insieme
int
hypre_BoomerAMGSetupStats( void               *amg_vdata,
                        hypre_ParCSRMatrix *A         )
{
   MPI_Comm 	      comm = hypre_ParCSRMatrixComm(A);   

   hypre_ParAMGData *amg_data = amg_vdata;

   /*hypre_SeqAMGData *seq_data = hypre_ParAMGDataSeqData(amg_data);*/

   /* Data Structure variables */

   hypre_ParCSRMatrix **A_array;
   hypre_ParCSRMatrix **P_array;

   hypre_CSRMatrix *A_diag;
   double          *A_diag_data;
   int             *A_diag_i;

   hypre_CSRMatrix *A_offd;   
   double          *A_offd_data;
   int             *A_offd_i;

   hypre_CSRMatrix *P_diag;
   double          *P_diag_data;
   int             *P_diag_i;

   hypre_CSRMatrix *P_offd;   
   double          *P_offd_data;
   int             *P_offd_i;


   int	    numrows;

   HYPRE_BigInt	    *row_starts;

 
   int      num_levels; 
   int      coarsen_type;
   int      interp_type;
   int      measure_type;
   double   global_nonzeros;

   double  *send_buff;
   double  *gather_buff;
 
   /* Local variables */

   int       level;
   int       j;
   HYPRE_BigInt fine_size;
 
   int       min_entries;
   int       max_entries;

   int       num_procs,my_id, num_threads;


   double    min_rowsum;
   double    max_rowsum;
   double    sparse;


   int       i;
   

   HYPRE_BigInt coarse_size;
   int       entries;

   double    avg_entries;
   double    rowsum;

   double    min_weight;
   double    max_weight;

   int       global_min_e;
   int       global_max_e;
   double    global_min_rsum;
   double    global_max_rsum;
   double    global_min_wt;
   double    global_max_wt;

   double  *num_coeffs;
   double  *num_variables;
   double   total_variables; 
   double   operat_cmplxty;
   double   grid_cmplxty;

   /* amg solve params */
   int      max_iter;
   int      cycle_type;    
   int     *num_grid_sweeps;  
   int     *grid_relax_type;   
   int      relax_order;
   int    **grid_relax_points; 
   double  *relax_weight;
   double  *omega;
   double   tol;


   int one = 1;
   int minus_one = -1;
   int zero = 0;
   int smooth_type;
   int smooth_num_levels;
   int agg_num_levels;
   /*int seq_cg = 0;*/
   
   /*if (seq_data)
      seq_cg = 1;*/


   MPI_Comm_size(comm, &num_procs);   
   MPI_Comm_rank(comm,&my_id);
   num_threads = hypre_NumThreads();

   if (my_id == 0)
      printf("\nNumber of MPI processes: %d , Number of OpenMP threads: %d\n", num_procs, num_threads);
   A_array = hypre_ParAMGDataAArray(amg_data);
   P_array = hypre_ParAMGDataPArray(amg_data);
   num_levels = hypre_ParAMGDataNumLevels(amg_data);
   coarsen_type = hypre_ParAMGDataCoarsenType(amg_data);
   interp_type = hypre_ParAMGDataInterpType(amg_data);
   measure_type = hypre_ParAMGDataMeasureType(amg_data);
   smooth_type = hypre_ParAMGDataSmoothType(amg_data);
   smooth_num_levels = hypre_ParAMGDataSmoothNumLevels(amg_data);
   agg_num_levels = hypre_ParAMGDataAggNumLevels(amg_data);


   /*----------------------------------------------------------
    * Get the amg_data data
    *----------------------------------------------------------*/

   num_levels = hypre_ParAMGDataNumLevels(amg_data);
   max_iter   = hypre_ParAMGDataMaxIter(amg_data);
   cycle_type = hypre_ParAMGDataCycleType(amg_data);    
   num_grid_sweeps = hypre_ParAMGDataNumGridSweeps(amg_data);  
   grid_relax_type = hypre_ParAMGDataGridRelaxType(amg_data);
   grid_relax_points = hypre_ParAMGDataGridRelaxPoints(amg_data);
   relax_weight = hypre_ParAMGDataRelaxWeight(amg_data); 
   relax_order = hypre_ParAMGDataRelaxOrder(amg_data); 
   omega = hypre_ParAMGDataOmega(amg_data); 
   tol = hypre_ParAMGDataTol(amg_data);

   /*block_mode = hypre_ParAMGDataBlockMode(amg_data);*/

   send_buff     = hypre_CTAlloc(double, 6);
#ifdef HYPRE_NO_GLOBAL_PARTITION
   gather_buff = hypre_CTAlloc(double,6);    
#else
   gather_buff = hypre_CTAlloc(double,6*num_procs);    
#endif

   if (my_id==0)
   {
      printf("\nBoomerAMG SETUP PARAMETERS:\n\n");
      printf(" Max levels = %d\n",hypre_ParAMGDataMaxLevels(amg_data));
      printf(" Num levels = %d\n\n",num_levels);
      printf(" Strength Threshold = %f\n", 
                         hypre_ParAMGDataStrongThreshold(amg_data));
      printf(" Interpolation Truncation Factor = %f\n", 
                         hypre_ParAMGDataTruncFactor(amg_data));
      printf(" Maximum Row Sum Threshold for Dependency Weakening = %f\n\n", 
                         hypre_ParAMGDataMaxRowSum(amg_data));

      if (coarsen_type == 0)
      {
	printf(" Coarsening Type = Cleary-Luby-Jones-Plassman\n");
      }
      else if (abs(coarsen_type) == 1) 
      {
	printf(" Coarsening Type = Ruge\n");
      }
      else if (abs(coarsen_type) == 2) 
      {
	printf(" Coarsening Type = Ruge2B\n");
      }
      else if (abs(coarsen_type) == 3) 
      {
	printf(" Coarsening Type = Ruge3\n");
      }
      else if (abs(coarsen_type) == 4) 
      {
	printf(" Coarsening Type = Ruge 3c \n");
      }
      else if (abs(coarsen_type) == 5) 
      {
	printf(" Coarsening Type = Ruge relax special points \n");
      }
      else if (abs(coarsen_type) == 6) 
      {
	printf(" Coarsening Type = Falgout-CLJP \n");
      }
      else if (abs(coarsen_type) == 8) 
      {
	printf(" Coarsening Type = PMIS \n");
      }
      else if (abs(coarsen_type) == 10) 
      {
	printf(" Coarsening Type = HMIS \n");
      }
      else if (abs(coarsen_type) == 11) 
      {
	printf(" Coarsening Type = Ruge 1st pass only \n");
      }
      else if (abs(coarsen_type) == 9) 
      {
	printf(" Coarsening Type = PMIS fixed random \n");
      }
      else if (abs(coarsen_type) == 7) 
      {
	printf(" Coarsening Type = CLJP, fixed random \n");
      }
      if (coarsen_type > 0) 
      {
	printf(" Hybrid Coarsening (switch to CLJP when coarsening slows)\n");
      }
      

      if (coarsen_type)
      	printf(" measures are determined %s\n\n", 
                  (measure_type ? "globally" : "locally"));

      if (agg_num_levels)
	printf(" no. of levels of aggressive coarsening: %d\n\n", agg_num_levels);

#ifdef HYPRE_NO_GLOBAL_PARTITION
      printf( "\n No global partition option chosen.\n\n");
#endif

      if (interp_type == 0)
      {
	printf(" Interpolation = modified classical interpolation\n");
      }
      else if (interp_type == 1) 
      {
	printf(" Interpolation = LS interpolation \n");
      }
      else if (interp_type == 2) 
      {
	printf(" Interpolation = modified classical interpolation for hyperbolic PDEs\n");
      }
      else if (interp_type == 3) 
      {
	printf(" Interpolation = direct interpolation with separation of weights\n");
      }
      else if (interp_type == 4) 
      {
	printf(" Interpolation = multipass interpolation\n");
      }
      else if (interp_type == 5) 
      {
	printf(" Interpolation = multipass interpolation with separation of weights\n");
      }
      else if (interp_type == 6) 
      {
	printf(" Interpolation = extended+i interpolation\n");
      }
      else if (interp_type == 7) 
      {
	printf(" Interpolation = extended+i interpolation (only when needed)\n");
      }
      else if (interp_type == 8) 
      {
	printf(" Interpolation = standard interpolation\n");
      }
      else if (interp_type == 9) 
      {
	printf(" Interpolation = standard interpolation with separation of weights\n");
      }
      else if (interp_type == 12) 
      {
	printf(" FF interpolation \n");
      }
      else if (interp_type == 13) 
      {
	printf(" FF1 interpolation \n");
      }

      {
         printf( "\nOperator Matrix Information:\n\n");
      }
#if HYPRE_LONG_LONG
      printf("                  nonzero         entries p");
      printf("er row        row sums\n");
      printf("lev        rows   entries  sparse  min  max   ");
      printf("avg       min         max\n");
      printf("=======================================");
      printf("==================================\n");
#else      
      printf("            nonzero         entries p");
      printf("er row        row sums\n");
      printf("lev   rows  entries  sparse  min  max   ");
      printf("avg       min         max\n");
      printf("=======================================");
      printf("============================\n");
#endif
   }
  
   /*-----------------------------------------------------
    *  Enter Statistics Loop
    *-----------------------------------------------------*/

   num_coeffs = hypre_CTAlloc(double,num_levels);

   num_variables = hypre_CTAlloc(double,num_levels);

   for (level = 0; level < num_levels; level++)
   { 

      {
         A_diag = hypre_ParCSRMatrixDiag(A_array[level]);
         A_diag_data = hypre_CSRMatrixData(A_diag);
         A_diag_i = hypre_CSRMatrixI(A_diag);
         
         A_offd = hypre_ParCSRMatrixOffd(A_array[level]);   
         A_offd_data = hypre_CSRMatrixData(A_offd);
         A_offd_i = hypre_CSRMatrixI(A_offd);
         
         row_starts = hypre_ParCSRMatrixRowStarts(A_array[level]);
         
         fine_size = hypre_ParCSRMatrixGlobalNumRows(A_array[level]);
         global_nonzeros = hypre_ParCSRMatrixDNumNonzeros(A_array[level]);
         num_coeffs[level] = global_nonzeros;
         num_variables[level] = (double) fine_size;
         
         sparse = global_nonzeros /((double) fine_size * (double) fine_size);

         min_entries = 0;
         max_entries = 0;
         min_rowsum = 0.0;
         max_rowsum = 0.0;
         
         if (hypre_CSRMatrixNumRows(A_diag))
         {
            min_entries = (A_diag_i[1]-A_diag_i[0])+(A_offd_i[1]-A_offd_i[0]);
            for (j = A_diag_i[0]; j < A_diag_i[1]; j++)
               min_rowsum += A_diag_data[j];
            for (j = A_offd_i[0]; j < A_offd_i[1]; j++)
               min_rowsum += A_offd_data[j];
            
            max_rowsum = min_rowsum;
            
            for (j = 0; j < hypre_CSRMatrixNumRows(A_diag); j++)
            {
               entries = (A_diag_i[j+1]-A_diag_i[j])+(A_offd_i[j+1]-A_offd_i[j]);
               min_entries = hypre_min(entries, min_entries);
               max_entries = hypre_max(entries, max_entries);
               
               rowsum = 0.0;
               for (i = A_diag_i[j]; i < A_diag_i[j+1]; i++)
                  rowsum += A_diag_data[i];
               
               for (i = A_offd_i[j]; i < A_offd_i[j+1]; i++)
                  rowsum += A_offd_data[i];
               
               min_rowsum = hypre_min(rowsum, min_rowsum);
               max_rowsum = hypre_max(rowsum, max_rowsum);
            }
         }
         avg_entries = global_nonzeros / ((double) fine_size);
      }
      
#ifdef HYPRE_NO_GLOBAL_PARTITION       

       numrows = (int)(row_starts[1]-row_starts[0]);
       if (!numrows) /* if we don't have any rows, then don't have this count toward
                         min row sum or min num entries */
       {
          min_entries = 1000000;
          min_rowsum =  1.0e7;
       }
       
       send_buff[0] = - (double) min_entries;
       send_buff[1] = (double) max_entries;
       send_buff[2] = - min_rowsum;
       send_buff[3] = max_rowsum;

       MPI_Reduce(send_buff, gather_buff, 4, MPI_DOUBLE, MPI_MAX, 0, comm);
       
       if (my_id ==0)
       {
          global_min_e = - gather_buff[0];
          global_max_e = gather_buff[1];
          global_min_rsum = - gather_buff[2];
          global_max_rsum = gather_buff[3];
#ifdef HYPRE_LONG_LONG
          printf( "%2d %12lld %8.0f  %0.3f  %4d %4d",
                  level, fine_size, global_nonzeros, sparse, global_min_e, 
                  global_max_e);
#else          
          printf( "%2d %7d %8.0f  %0.3f  %4d %4d",
                  level, fine_size, global_nonzeros, sparse, global_min_e, 
                  global_max_e);
#endif          
          printf("  %4.1f  %10.3e  %10.3e\n", avg_entries,
                 global_min_rsum, global_max_rsum);
       }
       
#else

       send_buff[0] = (double) min_entries;
       send_buff[1] = (double) max_entries;
       send_buff[2] = min_rowsum;
       send_buff[3] = max_rowsum;
       
       MPI_Gather(send_buff,4,MPI_DOUBLE,gather_buff,4,MPI_DOUBLE,0,comm);

       if (my_id == 0)
       {
          global_min_e = 1000000;
          global_max_e = 0;
          global_min_rsum = 1.0e7;
          global_max_rsum = 0.0;
          for (j = 0; j < num_procs; j++)
          {
             numrows = row_starts[j+1]-row_starts[j];
             if (numrows)
             {
                global_min_e = hypre_min(global_min_e, (int) gather_buff[j*4]);
                global_min_rsum = hypre_min(global_min_rsum, gather_buff[j*4 +2]);
             }
             global_max_e = hypre_max(global_max_e, (int) gather_buff[j*4 +1]);
             global_max_rsum = hypre_max(global_max_rsum, gather_buff[j*4 +3]);
          }

#ifdef HYPRE_LONG_LONG
          printf( "%2d %12lld %8.0f  %0.3f  %4d %4d",
                  level, fine_size, global_nonzeros, sparse, global_min_e, 
                  global_max_e);
#else          
          printf( "%2d %7d %8.0f  %0.3f  %4d %4d",
                  level, fine_size, global_nonzeros, sparse, global_min_e, 
                  global_max_e);
#endif          
          printf("  %4.1f  %10.3e  %10.3e\n", avg_entries,
                 global_min_rsum, global_max_rsum);
       }

#endif

        
   }

       
   if (my_id == 0)
   {
      {
         printf( "\n\nInterpolation Matrix Information:\n\n");
      }
#if HYPRE_LONG_LONG
      printf("                             entries/row    min     max");
      printf("         row sums\n");
      printf("lev        rows x cols          min max  ");
      printf("   weight   weight     min       max \n");
      printf("=======================================");
      printf("======================================\n");
#else      
      printf("                 entries/row    min     max");
      printf("         row sums\n");
      printf("lev  rows cols    min max  ");
      printf("   weight   weight     min       max \n");
      printf("=======================================");
      printf("==========================\n");
#endif
   }
  
   /*-----------------------------------------------------
    *  Enter Statistics Loop
    *-----------------------------------------------------*/


   for (level = 0; level < num_levels-1; level++)
   {
    
      {
         P_diag = hypre_ParCSRMatrixDiag(P_array[level]);
         P_diag_data = hypre_CSRMatrixData(P_diag);
         P_diag_i = hypre_CSRMatrixI(P_diag);
         
         P_offd = hypre_ParCSRMatrixOffd(P_array[level]);   
         P_offd_data = hypre_CSRMatrixData(P_offd);
         P_offd_i = hypre_CSRMatrixI(P_offd);
         
         row_starts = hypre_ParCSRMatrixRowStarts(P_array[level]);
         
         fine_size = hypre_ParCSRMatrixGlobalNumRows(P_array[level]);
         coarse_size = hypre_ParCSRMatrixGlobalNumCols(P_array[level]);
         global_nonzeros = hypre_ParCSRMatrixNumNonzeros(P_array[level]);
         
         min_weight = 1.0;
         max_weight = 0.0;
         max_rowsum = 0.0;
         min_rowsum = 0.0;
         min_entries = 0;
         max_entries = 0;
         
         if (hypre_CSRMatrixNumRows(P_diag))
         {
            if (hypre_CSRMatrixNumCols(P_diag)) min_weight = P_diag_data[0];
            for (j = P_diag_i[0]; j < P_diag_i[1]; j++)
            {
               min_weight = hypre_min(min_weight, P_diag_data[j]);
               if (P_diag_data[j] != 1.0)
                  max_weight = hypre_max(max_weight, P_diag_data[j]);
               min_rowsum += P_diag_data[j];
            }
            for (j = P_offd_i[0]; j < P_offd_i[1]; j++)
            {        
               min_weight = hypre_min(min_weight, P_offd_data[j]); 
               if (P_offd_data[j] != 1.0)
                  max_weight = hypre_max(max_weight, P_offd_data[j]);     
               min_rowsum += P_offd_data[j];
            }
            
            max_rowsum = min_rowsum;
            
            min_entries = (P_diag_i[1]-P_diag_i[0])+(P_offd_i[1]-P_offd_i[0]); 
            max_entries = 0;
            
            for (j = 0; j < hypre_CSRMatrixNumRows(P_diag); j++)
            {
               entries = (P_diag_i[j+1]-P_diag_i[j])+(P_offd_i[j+1]-P_offd_i[j]);
               min_entries = hypre_min(entries, min_entries);
               max_entries = hypre_max(entries, max_entries);
               
               rowsum = 0.0;
               for (i = P_diag_i[j]; i < P_diag_i[j+1]; i++)
               {
                  min_weight = hypre_min(min_weight, P_diag_data[i]);
                  if (P_diag_data[i] != 1.0)
                     max_weight = hypre_max(max_weight, P_diag_data[i]);
                  rowsum += P_diag_data[i];
               }
               
               for (i = P_offd_i[j]; i < P_offd_i[j+1]; i++)
               {
                  min_weight = hypre_min(min_weight, P_offd_data[i]);
                  if (P_offd_data[i] != 1.0) 
                     max_weight = hypre_max(max_weight, P_offd_data[i]);
                  rowsum += P_offd_data[i];
               }
               
               min_rowsum = hypre_min(rowsum, min_rowsum);
               max_rowsum = hypre_max(rowsum, max_rowsum);
            }
         
         }
         avg_entries = ((double) global_nonzeros) / ((double) fine_size);
      }

#ifdef HYPRE_NO_GLOBAL_PARTITION

      numrows = (int)(row_starts[1]-row_starts[0]);
      if (!numrows) /* if we don't have any rows, then don't have this count toward
                       min row sum or min num entries */
      {
         min_entries = 1000000;
         min_rowsum =  1.0e7;
         min_weight = 1.0e7;
       }
       
      send_buff[0] = - (double) min_entries;
      send_buff[1] = (double) max_entries;
      send_buff[2] = - min_rowsum;
      send_buff[3] = max_rowsum;
      send_buff[4] = - min_weight;
      send_buff[5] = max_weight;

      MPI_Reduce(send_buff, gather_buff, 6, MPI_DOUBLE, MPI_MAX, 0, comm);

      if (my_id == 0)
      {
         global_min_e = - gather_buff[0];
         global_max_e = gather_buff[1];
         global_min_rsum = -gather_buff[2];
         global_max_rsum = gather_buff[3];
         global_min_wt = -gather_buff[4];
         global_max_wt = gather_buff[5];

#ifdef HYPRE_LONG_LONG
          printf( "%2d %12lld x %-12lld %3d %3d",
                 level, fine_size, coarse_size,  global_min_e, global_max_e);
#else          
          printf( "%2d %5d x %-5d %3d %3d",
                 level, fine_size, coarse_size,  global_min_e, global_max_e);
#endif          
         printf("  %10.3e %9.3e %9.3e %9.3e\n",
                global_min_wt, global_max_wt, 
                global_min_rsum, global_max_rsum);
      }


#else
      
      send_buff[0] = (double) min_entries;
      send_buff[1] = (double) max_entries;
      send_buff[2] = min_rowsum;
      send_buff[3] = max_rowsum;
      send_buff[4] = min_weight;
      send_buff[5] = max_weight;
      
      MPI_Gather(send_buff,6,MPI_DOUBLE,gather_buff,6,MPI_DOUBLE,0,comm);
      
      if (my_id == 0)
      {
         global_min_e = 1000000;
         global_max_e = 0;
         global_min_rsum = 1.0e7;
         global_max_rsum = 0.0;
         global_min_wt = 1.0e7;
         global_max_wt = 0.0;
         
         for (j = 0; j < num_procs; j++)
         {
            numrows = row_starts[j+1] - row_starts[j];
            if (numrows)
            {
               global_min_e = hypre_min(global_min_e, (int) gather_buff[j*6]);
               global_min_rsum = hypre_min(global_min_rsum, gather_buff[j*6+2]);
               global_min_wt = hypre_min(global_min_wt, gather_buff[j*6+4]);
            }
            global_max_e = hypre_max(global_max_e, (int) gather_buff[j*6+1]);
            global_max_rsum = hypre_max(global_max_rsum, gather_buff[j*6+3]);
            global_max_wt = hypre_max(global_max_wt, gather_buff[j*6+5]);
         }
         
#ifdef HYPRE_LONG_LONG
         printf( "%2d %12lld x %-12lld %3d %3d",
                 level, fine_size, coarse_size,  global_min_e, global_max_e);
#else          
         printf( "%2d %5d x %-5d %3d %3d",
                 level, fine_size, coarse_size,  global_min_e, global_max_e);
#endif          
         printf("  %10.3e %9.3e %9.3e %9.3e\n",
                global_min_wt, global_max_wt, 
                global_min_rsum, global_max_rsum);
      }

#endif

   }


   total_variables = 0;
   operat_cmplxty = 0;
   for (j=0;j<hypre_ParAMGDataNumLevels(amg_data);j++)
   {
      operat_cmplxty +=  num_coeffs[j] / num_coeffs[0];
      total_variables += num_variables[j];
   }
   if (num_variables[0] != 0)
      grid_cmplxty = total_variables / num_variables[0];
 
   if (my_id == 0 )
   {
      printf("\n\n     Complexity:    grid = %f\n",grid_cmplxty);
      printf("                operator = %f\n",operat_cmplxty);
   }

   if (my_id == 0) printf("\n\n");

   if (my_id == 0)
   { 
      printf("\n\nBoomerAMG SOLVER PARAMETERS:\n\n");
      printf( "  Maximum number of cycles:         %d \n",max_iter);
      printf( "  Stopping Tolerance:               %e \n",tol); 
      printf( "  Cycle type (1 = V, 2 = W, etc.):  %d\n\n", cycle_type);
      printf( "  Relaxation Parameters:\n");
      printf( "   Visiting Grid:                     down   up  coarse\n");
      printf( "            Number of partial sweeps: %4d   %2d  %4d \n",
              num_grid_sweeps[1],
              num_grid_sweeps[2],num_grid_sweeps[3]);
      printf( "   Type 0=Jac, 3=hGS, 6=hSGS, 9=GE:   %4d   %2d  %4d \n",
              grid_relax_type[1],
              grid_relax_type[2],grid_relax_type[3]);
#if 1 /* TO DO: may not want this to print if CG in the coarse grid */
      printf( "   Point types, partial sweeps (1=C, -1=F):\n");
      if (grid_relax_points)
      {
         printf( "                  Pre-CG relaxation (down):");
         for (j = 0; j < num_grid_sweeps[1]; j++)
              printf("  %2d", grid_relax_points[1][j]);
         printf( "\n");
         printf( "                   Post-CG relaxation (up):");
         for (j = 0; j < num_grid_sweeps[2]; j++)
              printf("  %2d", grid_relax_points[2][j]);
         printf( "\n");
         printf( "                             Coarsest grid:");
         for (j = 0; j < num_grid_sweeps[3]; j++)
              printf("  %2d", grid_relax_points[3][j]);
         printf( "\n\n");
      }
      else if (relax_order == 1)
      {
         printf( "                  Pre-CG relaxation (down):");
         for (j = 0; j < num_grid_sweeps[1]; j++)
              printf("  %2d  %2d", one, minus_one);
         printf( "\n");
         printf( "                   Post-CG relaxation (up):");
         for (j = 0; j < num_grid_sweeps[2]; j++)
              printf("  %2d  %2d", minus_one, one);
         printf( "\n");
         printf( "                             Coarsest grid:");
         for (j = 0; j < num_grid_sweeps[3]; j++)
              printf("  %2d", zero);
         printf( "\n\n");
      }
      else 
      {
         printf( "                  Pre-CG relaxation (down):");
         for (j = 0; j < num_grid_sweeps[1]; j++)
              printf("  %2d", zero);
         printf( "\n");
         printf( "                   Post-CG relaxation (up):");
         for (j = 0; j < num_grid_sweeps[2]; j++)
              printf("  %2d", zero);
         printf( "\n");
         printf( "                             Coarsest grid:");
         for (j = 0; j < num_grid_sweeps[3]; j++)
              printf("  %2d", zero);
         printf( "\n\n");
      }
#endif
      if (smooth_type == 6)
         for (j=0; j < smooth_num_levels; j++)
            printf( " Schwarz Relaxation Weight %f level %d\n",
			hypre_ParAMGDataSchwarzRlxWeight(amg_data),j);
      for (j=0; j < num_levels; j++)
         if (relax_weight[j] != 1)
	       printf( " Relaxation Weight %f level %d\n",relax_weight[j],j);
      for (j=0; j < num_levels; j++)
         if (omega[j] != 1)
               printf( " Outer relaxation weight %f level %d\n",omega[j],j);
   }


   /*if (seq_cg) 
   {
      hypre_seqAMGSetupStats(amg_data,num_coeffs[0],num_variables[0], 
                             operat_cmplxty, grid_cmplxty );
   }*/
   




   hypre_TFree(num_coeffs);
   hypre_TFree(num_variables);
   hypre_TFree(send_buff);
   hypre_TFree(gather_buff);
   
   return(0);
}  
int main(int argc, char *argv[])
{
	int p, my_rank;
	MPI_Init(&argc, &argv);
	MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
	MPI_Comm_size(MPI_COMM_WORLD, &p);
	int n, local_n;
	double *A, *A_root, *x, *C, *C_root;

	//Allocating x on all the processors
	x=(double *)malloc(SIZE*sizeof(double));

	if (my_rank==0)
	{
		//Scanning the matrix A and allocating the memory only on the master processor
		A_root=(double *)malloc(SIZE*SIZE*sizeof(double));
		C_root=(double *)malloc(SIZE*sizeof(double));
		for (int i = 0; i < SIZE*SIZE; i++)
		{
			scanf("%lf",&A_root[i]);
		}
		
		for (int i = 0; i < SIZE; i++)
		{
			scanf("%lf",&x[i]);
		}
	}

	MPI_Barrier(MPI_COMM_WORLD);
	MPI_Bcast(x, SIZE, MPI_DOUBLE,0,MPI_COMM_WORLD);
	MPI_Barrier(MPI_COMM_WORLD);

	local_n=SIZE/p;
	A=(double *)malloc(SIZE*local_n*sizeof(double));
	C=(double *)malloc(SIZE*sizeof(double));

	//Scattering the matrix to different processors
	MPI_Scatter(A_root, SIZE*local_n, MPI_DOUBLE, A, SIZE*local_n, MPI_DOUBLE, 0, MPI_COMM_WORLD);
	
	for (int i = 0; i < SIZE/p; i++)
	{
		C[i]=0;
		for (int j = 0; j < local_n; j++)
		{
			C[i]+=A[i*SIZE+j]*x[j];
		}
	}
	
	//Finally gathering all the elements on the master thread
	MPI_Gather(C,SIZE,MPI_DOUBLE,C_root,SIZE,MPI_DOUBLE,0,MPI_COMM_WORLD);
	if (my_rank==0)
	{
		for (int i = 0; i < SIZE; i++)
		{
			printf("%lf\n", C_root[i]);
		}
	}
	
	MPI_Finalize();
	return 0;
}
示例#16
0
int main(int argc, char *argv[])
{
	MPI_Init(&argc, &argv);

	int POPSIZE = atoi(argv[1]);	
	int GENERATION = atoi(argv[2]);
	int NUM_GAMES = atoi(argv[3]);
	float CROSSOVER = atof(argv[4]);
	float MUTATION = atof(argv[5]);
	int i,j,k,q,s,b2d,count;
	int world_rank,world_size;
	unsigned int temp[2], temp2[2];
	float RANDOM2 = drand48();

	srand48(time(NULL));
	pop *player = NULL;
	pop p[2];

	MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
	MPI_Comm_size(MPI_COMM_WORLD, &world_size);
	
/*------------------------MPI_STRUCT----------------------------------*/
	MPI_Datatype mpi_pop;
	MPI_Datatype types[3] = {MPI_UNSIGNED,MPI_UNSIGNED,MPI_UNSIGNED};
	int block[3] = {4,1,1};
	MPI_Aint offset[3] = {offsetof(pop,history),offsetof(pop,fitness),offsetof(pop,move)};
	MPI_Type_create_struct(3,block,offset,types,&mpi_pop);
	MPI_Type_commit(&mpi_pop);

	if (world_rank == 0)
	{
		if (world_size > POPSIZE/4)
		{
			printf("Too many processes for Population Size.\n Please input an even Population Size.\n");
			MPI_Abort(MPI_COMM_WORLD, 1);
		}
	}

/*---------------Allocate the memory for the players------------------*/
	
	if (world_rank == 0)
	{	
		player = malloc(POPSIZE*sizeof(pop));
		for (i=0; i<POPSIZE; i++)
		{
			for(j=0; j<4; j++)
			{
				player[i].history[j] = lrand48() % 2;
				player[i].fitness = 0;
			}
		}	
		printf("Processor %d has data:\n", world_rank);
		for (i=0; i<POPSIZE; i++)
		{
			for (j=0; j<4; j++)
			{
				printf("%d ", player[i].history[j]);
			}
			printf("\n");
		}
	}
	
	int ARRAY_SIZE = POPSIZE/4;
	pop *sub_arrays = NULL;

	if (world_rank != 0)
	{
		sub_arrays = malloc(ARRAY_SIZE*sizeof(pop));
	}

/*-----------------------RUN THE ALGORITHM---------------------------*/	

	for (k=0; k<GENERATION; k++)
	{
		MPI_Scatter(player, ARRAY_SIZE, mpi_pop, sub_arrays, ARRAY_SIZE, mpi_pop, 0, MPI_COMM_WORLD);

		if (world_rank != 0)
		{
			for (i=0; i<ARRAY_SIZE; i++)
			{
				for (j=ARRAY_SIZE; j>=0; j--)
				{
					p[0] = player[i];	
					p[1] = player[j];		
				
					for(q=0; q<NUM_GAMES; q++)
					{
						b2d = ((p[0].history[0]*8) + (p[0].history[1]*4) + (p[0].history[2]*2) + p[0].history[3]);
						b2d = ((p[1].history[0]*8) + (p[1].history[1]*4) + (p[1].history[2]*2) + p[1].history[3]);
									
						Strategy(p[0], b2d);
						Strategy(p[1], b2d);
						Fitness(p);

						for (s=4; s>0; s--)
						{
							p[0].history[s] = p[0].history[s-1];
							p[1].history[s] = p[1].history[s-1];
						} 	
						p[0].history[0] = p[0].move;
						p[1].history[0] = p[1].move;
					}

					player[i] = p[0];
					player[j] = p[1];
				}
			}
		}

		MPI_Barrier(MPI_COMM_WORLD);
		MPI_Gather(sub_arrays, ARRAY_SIZE, mpi_pop, player, ARRAY_SIZE, mpi_pop, 0, MPI_COMM_WORLD);	

/*-----------------------Perform Selection-----------------------------*/		
	
		if (world_rank == 0)
		{
			for(count=0; count<2; count++)
			{
				int sumFitness = 0;
				for (i=0; i<POPSIZE; i++)
				{
					sumFitness += p[i].fitness;
					int RANDOM = lrand48() % sumFitness;
					if (sumFitness >= RANDOM)
					{
						p[count] =  player[i];
					}
				}
			}
/*------------------------Crossover-------------------------------------*/

			if (RANDOM2 < CROSSOVER)
			{
				temp [0] = p[0].history[2];
				temp [1] = p[0].history[3];
				temp2[0] = p[1].history[2];
				temp2[1] = p[1].history[3];
				p[0].history[2] = temp2[0];
				p[0].history[3] = temp2[1];
				p[1].history[2] = temp[ 0];
				p[1].history[3] = temp[ 1];
			}

/*---------------------Mutate Players------------------------------------*/


			if (RANDOM2 < MUTATION)
			{
				int mp = lrand48() % 4;
				for (count=0; count<2; count++)
				{
					if (p[count].history[mp] == 0) 
					{	
						p[count].history[mp] = 1;
					}
					else
					{ 
						p[count].history[mp] = 0;
					}
				}
			}
			
			player[lrand48() % POPSIZE] = p[0];
			player[lrand48() % POPSIZE] = p[1];
		}

		printf("Processor %d has data:\n", world_rank);
		for (i=0; i<POPSIZE; i++)
		{
			for (j=0; j<4; j++)
			{
				printf("%d ", player[i].history[j]);
			}
			printf("\n");
		}
	}

	for(i=0; i<POPSIZE; i++)
	{
		for (j=0; j<4; j++)
		{
			printf("%d", player[i].history[j]);
		}
		printf("\n");
		printf("Fitness: %d\n", player[i].fitness);
	}
	if (world_rank == 0)
		free(player);
	if (world_rank != 0)
		free(sub_arrays);

	MPI_Finalize();
	return 0;

}
void GaussianMean1DRegressionCompute(const QUESO::BaseEnvironment& env,
    double priorMean, double priorVar, const likelihoodData& dat)
{
  // parameter space: 1-D on (-infinity, infinity)
  QUESO::VectorSpace<P_V, P_M> paramSpace(
					 env,       // queso environment
					 "param_",  // name prefix
					 1,         // dimensions
					 NULL);     // names

  P_V paramMin(paramSpace.zeroVector());
  P_V paramMax(paramSpace.zeroVector());
  paramMin[0] = -INFINITY;
  paramMax[0] = INFINITY;
  QUESO::BoxSubset<P_V, P_M> paramDomain(
					"paramBox_",  // name prefix
					paramSpace,   // vector space
					paramMin,     // min values
					paramMax);    // max values

  // gaussian prior with user supplied mean and variance
  P_V priorMeanVec(paramSpace.zeroVector());
  P_V priorVarVec(paramSpace.zeroVector());
  priorMeanVec[0] = priorMean;
  priorVarVec[0] = priorVar;
  QUESO::GaussianVectorRV<P_V, P_M> priorRv("prior_", paramDomain, priorMeanVec,
      priorVarVec);

  // likelihood is important
  QUESO::GenericScalarFunction<P_V, P_M> likelihoodFunctionObj(
							      "like_",                   // name prefix
							      paramDomain,               // image set
							      LikelihoodFunc<P_V, P_M>,  // routine
							      (void *) &dat,             // routine data ptr
							      true);                     // routineIsForLn

  QUESO::GenericVectorRV<P_V, P_M> postRv(
      "post_",       // name prefix
       paramSpace);  // image set


  // Initialize and solve the Inverse Problem with Bayes multi-level sampling
  QUESO::StatisticalInverseProblem<P_V, P_M> invProb(
      "",                     // name prefix
      NULL,                   // alt options
      priorRv,                // prior RV
      likelihoodFunctionObj,  // likelihood fcn
      postRv);                // posterior RV

  invProb.solveWithBayesMLSampling();

  // compute mean and second moment of samples on each proc via Knuth online mean/variance algorithm
  int N = invProb.postRv().realizer().subPeriod();
  double subMean = 0.0;
  double subM2 = 0.0;
  double delta;
  P_V sample(paramSpace.zeroVector());
  for (int n = 1; n <= N; n++) {
    invProb.postRv().realizer().realization(sample);
    delta = sample[0] - subMean;
    subMean += delta / n;
    subM2 += delta * (sample[0] - subMean);
  }

  // gather all Ns, means, and M2s to proc 0
  std::vector<int> unifiedNs(env.inter0Comm().NumProc());
  std::vector<double> unifiedMeans(env.inter0Comm().NumProc());
  std::vector<double> unifiedM2s(env.inter0Comm().NumProc());
  MPI_Gather(&N, 1, MPI_INT, &(unifiedNs[0]), 1, MPI_INT, 0,
      env.inter0Comm().Comm());
  MPI_Gather(&subMean, 1, MPI_DOUBLE, &(unifiedMeans[0]), 1, MPI_DOUBLE, 0,
      env.inter0Comm().Comm());
  MPI_Gather(&subM2, 1, MPI_DOUBLE, &(unifiedM2s[0]), 1, MPI_DOUBLE, 0,
      env.inter0Comm().Comm());

  // get the total number of likelihood calls at proc 0
  unsigned long totalLikelihoodCalls = 0;
  MPI_Reduce(&likelihoodCalls, &totalLikelihoodCalls, 1, MPI_UNSIGNED_LONG,
      MPI_SUM, 0, env.inter0Comm().Comm());

  // compute global posterior mean and std via Chan algorithm, output results on proc 0
  if (env.inter0Rank() == 0) {
    int postN = unifiedNs[0];
    double postMean = unifiedMeans[0];
    double postVar = unifiedM2s[0];
    for (unsigned int i = 1; i < unifiedNs.size(); i++) {
      delta = unifiedMeans[i] - postMean;
      postMean = (postN * postMean + unifiedNs[i] * unifiedMeans[i]) /
        (postN + unifiedNs[i]);
      postVar += unifiedM2s[i] + delta * delta *
        (((double)postN * unifiedNs[i]) / (postN + unifiedNs[i]));
      postN += unifiedNs[i];
    }
    postVar /= postN;

    //compute exact answer - available in this case since the exact posterior is a gaussian
    N = dat.dataSet.size();
    double dataSum = 0.0;
    for (int i = 0; i < N; i++)
      dataSum += dat.dataSet[i];
    double datMean = dataSum / N;
    double postMeanExact = (N * priorVar / (N * priorVar + dat.samplingVar)) *
      datMean + (dat.samplingVar / (N * priorVar + dat.samplingVar)) * priorMean;
    double postVarExact = 1.0 / (N / dat.samplingVar + 1.0 / priorVar);

    std::cout << "Number of posterior samples: " << postN << std::endl;
    std::cout << "Estimated posterior mean: " << postMean << " +/- "
      << std::sqrt(postVar) << std::endl;
    std::cout << "Likelihood function calls: " << totalLikelihoodCalls
      << std::endl;
    std::cout << "\nExact posterior: Gaussian with mean " << postMeanExact
      << ", standard deviation " << std::sqrt(postVarExact) << std::endl;
  }
}
示例#18
0
文件: bCast.c 项目: 8l/insieme
int main (int argc, char *argv[])
{
    int err;
    double time, time_limit, time_maxMsg;

    int iter, iter_limit;
    size_t size, messStart, messStop, mem_limit;
    int testFlags, ndims, partsize;
    int k;

    char  hostname[256];
    char* hostnames;

    int root = 0;

    struct argList args;
    /* process the command-line arguments, printing usage info on error */
    if (!processArgs(argc, argv, &args)) { usage(); }
    iter       = args.iters;
    messStart  = args.messStart;
    messStop   = args.messStop;
    mem_limit  = args.memLimit;
    time_limit = args.timeLimit;
    testFlags  = args.testFlags;
    check_buffers = args.checkBuffers;
    ndims      = args.ndims;
    partsize   = args.partSize; 

    /* initialize MPI */
    err = MPI_Init(&argc, &argv);
    if (err) { printf("Error in MPI_Init\n"); exit(1); }

    /* determine who we are in the MPI world */
    MPI_Comm_rank(MPI_COMM_WORLD, &rank_local);
    MPI_Comm_size(MPI_COMM_WORLD, &rank_count);

#ifdef PRINT_ENV
   /* Print environment as part of Sequoia SOW MPI requirements */
   extern void printEnv(void);
   if (rank_local == 0) { printEnv(); }
#endif 

    /* mark start of mpiBench output */
    if (rank_local == 0) { printf("START mpiBench_Bcast v%s\n", VERS); }

    /* collect hostnames of all the processes and print rank layout */
    gethostname(hostname, sizeof(hostname));
    hostnames = (char*) _ALLOC_MAIN_(sizeof(hostname)*rank_count, "Hostname array");
    MPI_Gather(hostname, sizeof(hostname), MPI_CHAR, hostnames, sizeof(hostname), MPI_CHAR, 0, MPI_COMM_WORLD);
    if (rank_local == 0) {
        for(k=0; k<rank_count; k++) {
            printf("%d : %s\n", k, &hostnames[k*sizeof(hostname)]);
        }
    }

    /* allocate message buffers and initailize timing functions */
    while(messStop*((size_t)rank_count)*2 > mem_limit && messStop > 0) messStop /= 2;
    buffer_size = messStop * rank_count;
    sbuffer   = (char*) _ALLOC_MAIN_(messStop    * rank_count, "Send Buffer");
    rbuffer   = (char*) _ALLOC_MAIN_(messStop    * rank_count, "Receive Buffer");
    sendcounts = (int*) _ALLOC_MAIN_(sizeof(int) * rank_count, "Send Counts");
    sdispls    = (int*) _ALLOC_MAIN_(sizeof(int) * rank_count, "Send Displacements");
    recvcounts = (int*) _ALLOC_MAIN_(sizeof(int) * rank_count, "Recv Counts");
    rdispls    = (int*) _ALLOC_MAIN_(sizeof(int) * rank_count, "Recv Displacements");

    /*time_maxMsg = 2*time_limit; */
    time_maxMsg = 0.0;

    /* if partsize was specified, calculate the number of partions we need */
    int partitions = 0;
    if (partsize > 0) {
        /* keep dividing comm in half until we get to partsize */
        int currentsize = rank_count;
        while (currentsize >= partsize) {
            partitions++;
            currentsize >>= 1;
        }
    }
示例#19
0
mpi_filebuf * mpi_filebuf::flush()
{
  const double start_time = mpi_wall_time();

  int result = -1 ; // Failure return value

  if ( nullptr != comm_buffer && comm_output ) { // Open for write

    int err = 0 ;

    result = 0 ;

    // Determine the local length:

    char * cur_buf = comm_buffer ;
    unsigned int cur_len = pptr() - cur_buf ;

    // Determine the global lengths

    char * recv_buf  = nullptr ;
    int  * recv_len  = nullptr ;
    int  * recv_disp = nullptr ;

    int nproc = 1 ;


//  if ( nullptr != comm_root_fp ) {

//  It should not be neccessary to allocate recv_len on non-root
//  nodes, but the MPI_Gatherv on Janus always accesses recv_len
//  even on non-root processors which causes a segmentaion
//  violation if recv_len is set to nullptr.

    if ( MPI_SUCCESS != ( err = MPI_Comm_size(comm,&nproc) ) )
      MPI_Abort( comm , err );
    recv_len = static_cast<int*>(std::malloc( sizeof(int) * nproc ));

    if ( nullptr == recv_len ) MPI_Abort( comm , MPI_ERR_UNKNOWN );

    for (int j = 0 ; j < nproc ; ++j )
      recv_len[j] = 0;
//  }

    // Gather buffer lengths on the root processor

    if ( MPI_SUCCESS != ( err =
	 MPI_Gather(&cur_len,1,MPI_INT,recv_len,1,MPI_INT,comm_root,comm)))
      MPI_Abort( comm , err );

    // Root processor must allocate enough buffer space:

    if ( nullptr != comm_root_fp ) {

      recv_len[ comm_root ] = 0 ; // Don't send to self

      if ( nullptr == ( recv_disp = static_cast<int*>(std::malloc( sizeof(int) * (nproc + 1) )) ) )
	result = -1 ;

      if ( 0 == result ) { // Allocation succeeded

	recv_disp[0] = 0 ;

	for (int i = 0 ; i < nproc ; ++i )
	  recv_disp[i+1] = recv_disp[i] + recv_len[i] ;

	if ( 0 < recv_disp[nproc] ) {
	  if ( nullptr == ( recv_buf = static_cast<char*>(std::malloc( recv_disp[nproc] ) ) ))
	    result = -1 ;
	}
	else {
	  result = 1 ; // No need to gather!
	}

	if ( -1 != result ) {

	  // Write the root processor's buffer

	  if ( 0 < cur_len ) {
	    if ( std::fwrite(cur_buf,1,cur_len,comm_root_fp) != cur_len )
	      result = -1 ; // Write failed

	    cur_len = 0 ; // Wrote this buffer
	  }
	}
      }
      std::fflush( comm_root_fp );
    }

    // Root process broadcasts that all is well with the allocation

    if ( MPI_SUCCESS != ( err = MPI_Bcast(&result,1,MPI_INT,comm_root,comm)))
      MPI_Abort( comm , err );

    if ( 0 == result ) { // All-is-well, need to gather and write

      // Gather the buffers to the root processor

      if ( MPI_SUCCESS != ( err =
	   MPI_Gatherv(cur_buf,  cur_len,             MPI_BYTE,
		       recv_buf, recv_len, recv_disp, MPI_BYTE,
		       comm_root, comm ) ) )
	MPI_Abort( comm , err );

       // Output the buffers, beginning with 'comm_root'

      if ( nullptr != comm_root_fp ) {

	for (int i = 1 ; i < nproc && 0 == result ; ++i ) {
	  const int j   = ( i + comm_root ) % nproc ;
	  const unsigned int len = recv_len[j] ;

	  if ( 0 < len )
	    if ( std::fwrite(recv_buf+recv_disp[j],1,len,comm_root_fp) != len )
	      result = -1 ; // Write failed
	}

	std::fflush( comm_root_fp );
      }

      // Broadcast that the write succeeded

      if ( MPI_SUCCESS != ( err = MPI_Bcast(&result,1,MPI_INT,comm_root,comm)))
	MPI_Abort( comm , err );
    }
    else if ( 1 == result ) {
      // Did not need to gather

      result = 0 ;
    }

    // Reset the output buffer

    setp( comm_buffer , epptr() );

    // Clean up allocated memory

    if ( nullptr != recv_buf  ) std::free( recv_buf );
    if ( nullptr != recv_len  ) std::free( recv_len );
    if ( nullptr != recv_disp ) std::free( recv_disp );
  }

  comm_time += mpi_wall_time() - start_time ;

  return -1 == result ? nullptr : this ;
}
示例#20
0
文件: Bitonic.c 项目: Dani7B/CPD
int main(int argc, char* argv[]){
	int rank, size, n, i, j, elementiXproc, stage, length, next;
	orderedAfterSwap *m;
	char *binary;
	FILE *file;
	float *elementi, *mieiElementi, *result;

	MPI_Init(&argc, &argv);
	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
	MPI_Comm_size(MPI_COMM_WORLD, &size);

	if(argc<2) {
		printf("Numero argomenti non sufficiente: %d richiesto %d", argc-1, 1);
		MPI_Abort(MPI_COMM_WORLD, 0);
		return 1;
	}

	if(rank==0) {
		writeFile();
		file = fopen(argv[1],"rb");

		if(file==NULL) {
			printf("Non è stato possibile aprire il file: %s", argv[1]);
			MPI_Abort(MPI_COMM_WORLD, 0);
			return 1;
		}

		fread(&n, sizeof(int), 1, file);
		elementiXproc = n/size;
		mieiElementi = malloc(sizeof(float)*elementiXproc);
		elementi = malloc(sizeof(float)*elementiXproc);
		fread(mieiElementi, sizeof(float), elementiXproc, file);

		for(i=1; i<size; i++){
			MPI_Send (&elementiXproc, 1, MPI_INT, i, 0, MPI_COMM_WORLD);
			fread(elementi, sizeof(float), elementiXproc, file);
			MPI_Send (elementi, elementiXproc, MPI_FLOAT, i, 0, MPI_COMM_WORLD);
		}
		fclose(file);
		result = malloc(sizeof(float)*n);
	}

	else {
		MPI_Recv (&elementiXproc, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
		mieiElementi = malloc(sizeof(float)*elementiXproc);
		MPI_Recv (mieiElementi, elementiXproc, MPI_FLOAT, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
		elementi = malloc(sizeof(float)*elementiXproc);
	}

	qsort(mieiElementi, elementiXproc, sizeof(float), floatcomp);
	length = log(size)/log(2);
	binary = intToBinary(rank,length);
	for(stage=0; stage<length; stage++) {
		if(binary[stage]=='0'){
			binary[stage] = '1';
			next = binaryToInt(binary, length);
			binary[stage] = '0';
			MPI_Send (mieiElementi, elementiXproc, MPI_FLOAT, next, 0, MPI_COMM_WORLD);
			MPI_Recv (elementi, elementiXproc, MPI_FLOAT, next, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
			m = swapMin(mieiElementi,elementi,elementiXproc);
			mieiElementi = m->mieiElementi;
		}
		else {
			binary[stage] = '0';
			next = binaryToInt(binary, length);
			binary[stage] = '1';
			MPI_Recv (elementi, elementiXproc, MPI_FLOAT, next, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
			MPI_Send (mieiElementi, elementiXproc, MPI_FLOAT, next, 0, MPI_COMM_WORLD);
			m = swapMax(mieiElementi,elementi,elementiXproc);
			mieiElementi = m->mieiElementi;
		}
	}

	MPI_Gather(mieiElementi, elementiXproc, MPI_FLOAT, result, elementiXproc, MPI_FLOAT, 0, MPI_COMM_WORLD);

	if(rank==0){
		printf("[ ");
		for(j=0; j<n; j++) {
			printf("%f ", result[j]);
		}
		printf("] \n");
		free(result);
	}

	free(m);
	free(binary);
	free(mieiElementi);
	free(elementi);

	MPI_Finalize();
	return 0;
}
示例#21
0
void saveParticle_HDF(Domain D,int iteration,int s,double minPx)
{
    int i,j,k,istart,iend,jstart,jend,kstart,kend;
    int nxSub,nySub,nzSub,cnt,totalCnt,start,index;
    int minXSub,minYSub,minZSub;
    double dx,dy,dz,lambda,tmpDouble;
    char name[100];
    double *saveDouble;
    int *saveInt,offset[2];
    Particle ***particle;
    particle=D.particle;
    ptclList *p;
    LoadList *LL;
    int myrank, nTasks;    
    MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
    MPI_Comm_size(MPI_COMM_WORLD, &nTasks);
    int recv[nTasks];
    void saveParticleComp_Double();
    void saveParticleComp_Int();

    hid_t file_id;
    herr_t status;

    istart=D.istart;
    iend=D.iend;
    jstart=D.jstart;
    jend=D.jend;
    kstart=D.kstart;
    kend=D.kend;
    nxSub=D.nxSub;
    nySub=D.nySub;
    nzSub=D.nzSub;
    dx=D.dx;
    dy=D.dy;
    dz=D.dz;
    lambda=D.lambda;
    minXSub=D.minXSub;
    minYSub=D.minYSub;
    minZSub=D.minZSub;


    sprintf(name,"%dParticle%d.h5",s,iteration);
//    plist_id=H5Pcreate(H5P_FILE_ACCESS);
//    H5Pset_fapl_mpio(plist_id,MPI_COMM_WORLD,MPI_INFO_NULL);
//    H5Pset_fclose_degree(plist_id,H5F_CLOSE_SEMI);
    if(myrank==0)
    {
      file_id=H5Fcreate(name,H5F_ACC_TRUNC,H5P_DEFAULT,H5P_DEFAULT);
      H5Fclose(file_id);
    }
    else	;
    MPI_Barrier(MPI_COMM_WORLD);

    switch(D.dimension) {
    //2D
    case 2:
      k=0;
      cnt=0;
      for(i=istart; i<iend; i++)
        for(j=jstart; j<jend; j++)
        {
          p=particle[i][j][k].head[s]->pt;
          while(p)
          {
            if(p->p1>=minPx)
              cnt++;
            else	;
            p=p->next;
          }
        }
      saveDouble = (double *)malloc(cnt*sizeof(double ));      
      saveInt = (int *)malloc(cnt*sizeof(int ));      
      MPI_Gather(&cnt,1,MPI_INT,recv,1,MPI_INT,0,MPI_COMM_WORLD);
      MPI_Bcast(recv,nTasks,MPI_INT,0,MPI_COMM_WORLD);
      MPI_Barrier(MPI_COMM_WORLD);

      start=0;
      for(i=0; i<myrank; i++)
        start+=recv[i];
      totalCnt=0;
      for(i=0; i<nTasks; i++)
        totalCnt+=recv[i];

      if(myrank==0)
        saveIntMeta(name,"totalCnt",&totalCnt);
      else 	;
      MPI_Barrier(MPI_COMM_WORLD);
  
      index=0;
      for(i=istart; i<iend; i++)
        for(j=jstart; j<jend; j++)
        {
          p=particle[i][j][k].head[s]->pt;
          while(p)
          {
            if(p->p1>=minPx)
            {
              tmpDouble=((i-istart+minXSub)+p->x)*dx*lambda;
              saveDouble[index]=tmpDouble;
              index++;
            }
            else 	;
            p=p->next;
          }
        }
      MPI_Barrier(MPI_COMM_WORLD); 
      saveParticleComp_Double(saveDouble,name,"x",totalCnt,cnt,start);
      index=0;
      for(i=istart; i<iend; i++)
        for(j=jstart; j<jend; j++)
        {
          p=particle[i][j][k].head[s]->pt;
          while(p)
          {
            if(p->p1>=minPx)
            {
              tmpDouble=((j-jstart+minYSub)+p->y)*dy*lambda;
              saveDouble[index]=tmpDouble;
              index++;
            }
            else	;
            p=p->next;
          }
        } 
      MPI_Barrier(MPI_COMM_WORLD); 
      saveParticleComp_Double(saveDouble,name,"y",totalCnt,cnt,start);
      index=0;
      for(i=istart; i<iend; i++)
        for(j=jstart; j<jend; j++)
        {
          p=particle[i][j][k].head[s]->pt;
          while(p)
          {
            if(p->p1>=minPx)
            {
              saveDouble[index]=p->p1;
              index++;
            }
            else	;
            p=p->next;
          }
        } 
      MPI_Barrier(MPI_COMM_WORLD); 
      saveParticleComp_Double(saveDouble,name,"px",totalCnt,cnt,start);
      index=0;
      for(i=istart; i<iend; i++)
        for(j=jstart; j<jend; j++)
        {
          p=particle[i][j][k].head[s]->pt;
          while(p)
          {
            if(p->p1>=minPx)
            {
              saveDouble[index]=p->p2;
              index++;
            }
            else	;
            p=p->next;
          }
        } 
      MPI_Barrier(MPI_COMM_WORLD); 
      saveParticleComp_Double(saveDouble,name,"py",totalCnt,cnt,start);
      index=0;
      for(i=istart; i<iend; i++)
        for(j=jstart; j<jend; j++)
        {
          p=particle[i][j][k].head[s]->pt;
          while(p)
          {
            if(p->p1>=minPx)
            {
              saveDouble[index]=p->p3;
              index++;
            }
            else	;
            p=p->next;
          }
        } 
      MPI_Barrier(MPI_COMM_WORLD); 
      saveParticleComp_Double(saveDouble,name,"pz",totalCnt,cnt,start);
      index=0;
      for(i=istart; i<iend; i++)
        for(j=jstart; j<jend; j++)
        {
          p=particle[i][j][k].head[s]->pt;
          while(p)
          {
            if(p->p1>=minPx)
            {
              saveInt[index]=p->index;
              index++;
            }
            else	;
            p=p->next;
          }
        } 
      MPI_Barrier(MPI_COMM_WORLD); 
      saveParticleComp_Int(saveInt,name,"index",totalCnt,cnt,start);
      index=0;
      for(i=istart; i<iend; i++)
        for(j=jstart; j<jend; j++)
        {
          p=particle[i][j][k].head[s]->pt;
          while(p)
          {
            if(p->p1>=minPx)
            {
              saveInt[index]=p->core;
              index++;
            }
            else	;
            p=p->next;
          }
        } 
      MPI_Barrier(MPI_COMM_WORLD); 
      saveParticleComp_Int(saveInt,name,"core",totalCnt,cnt,start);

      free(saveDouble);
      free(saveInt);
      break;

    //3D
    case 3:
      cnt=0;
      for(i=istart; i<iend; i++)
        for(j=jstart; j<jend; j++)
          for(k=kstart; k<kend; k++)
          {
            p=particle[i][j][k].head[s]->pt;
            while(p)
            {
              if(p->p1>=minPx)
                cnt++;
              else	;
              p=p->next;
            }
          }
      saveDouble = (double *)malloc(cnt*sizeof(double ));      
      saveInt = (int *)malloc(cnt*sizeof(int ));      
      MPI_Gather(&cnt,1,MPI_INT,recv,1,MPI_INT,0,MPI_COMM_WORLD);
      MPI_Bcast(recv,nTasks,MPI_INT,0,MPI_COMM_WORLD);
      MPI_Barrier(MPI_COMM_WORLD);

      start=0;
      for(i=0; i<myrank; i++)
        start+=recv[i];
      totalCnt=0;
      for(i=0; i<nTasks; i++)
        totalCnt+=recv[i];

      if(myrank==0)
        saveIntMeta(name,"totalCnt",&totalCnt);
      else 	;
      MPI_Barrier(MPI_COMM_WORLD);

      index=0;
      for(i=istart; i<iend; i++)
        for(j=jstart; j<jend; j++)
          for(k=kstart; k<kend; k++)
          {
            p=particle[i][j][k].head[s]->pt;
            while(p)
            {
              if(p->p1>=minPx)
              {
                tmpDouble=((i-istart+minXSub)+p->x)*dx*lambda;
                saveDouble[index]=tmpDouble;
                index++;
              }
              else	;
              p=p->next;
            }
          } 
      MPI_Barrier(MPI_COMM_WORLD);
      saveParticleComp_Double(saveDouble,name,"x",totalCnt,cnt,start);
      index=0;
      for(i=istart; i<iend; i++)
        for(j=jstart; j<jend; j++)
          for(k=kstart; k<kend; k++)
          {
            p=particle[i][j][k].head[s]->pt;
            while(p)
            {
              if(p->p1>=minPx)
              {
                tmpDouble=((j-jstart+minYSub)+p->y)*dy*lambda;
                saveDouble[index]=tmpDouble;
                index++;
              }
              else	;
              p=p->next;
            }
          } 
      MPI_Barrier(MPI_COMM_WORLD);
      saveParticleComp_Double(saveDouble,name,"y",totalCnt,cnt,start);
      index=0;
      for(i=istart; i<iend; i++)
        for(j=jstart; j<jend; j++)
          for(k=kstart; k<kend; k++)
          {
            p=particle[i][j][k].head[s]->pt;
            while(p)
            {
              if(p->p1>=minPx)
              {
                tmpDouble=((k-kstart+minZSub)+p->z)*dz*lambda;
                saveDouble[index]=tmpDouble;
                index++;
              }
              else	;
              p=p->next;
            }
          } 
      MPI_Barrier(MPI_COMM_WORLD);
      saveParticleComp_Double(saveDouble,name,"z",totalCnt,cnt,start);
      index=0;
      for(i=istart; i<iend; i++)
        for(j=jstart; j<jend; j++)
          for(k=kstart; k<kend; k++)
          {
            p=particle[i][j][k].head[s]->pt;
            while(p)
            {
              if(p->p1>=minPx)
              {
                saveDouble[index]=p->p1;
                index++;
              }
              else	;
              p=p->next;
            }
          } 
      MPI_Barrier(MPI_COMM_WORLD);
      saveParticleComp_Double(saveDouble,name,"px",totalCnt,cnt,start);
      index=0;
      for(i=istart; i<iend; i++)
        for(j=jstart; j<jend; j++)
          for(k=kstart; k<kend; k++)
          {
            p=particle[i][j][k].head[s]->pt;
            while(p)
            {
              if(p->p1>=minPx)
              {
                saveDouble[index]=p->p2;
                index++;
              }
              else	;
              p=p->next;
            }
          } 
      MPI_Barrier(MPI_COMM_WORLD);
      saveParticleComp_Double(saveDouble,name,"py",totalCnt,cnt,start);
      index=0;
      for(i=istart; i<iend; i++)
        for(j=jstart; j<jend; j++)
          for(k=kstart; k<kend; k++)
          {
            p=particle[i][j][k].head[s]->pt;
            while(p)
            {
              if(p->p1>=minPx)
              {
                saveDouble[index]=p->p3;
                index++;
              }
              else	;
              p=p->next;
            }
          } 
      MPI_Barrier(MPI_COMM_WORLD);
      saveParticleComp_Double(saveDouble,name,"pz",totalCnt,cnt,start);
      index=0;
      for(i=istart; i<iend; i++)
        for(j=jstart; j<jend; j++)
          for(k=kstart; k<kend; k++)
          {
            p=particle[i][j][k].head[s]->pt;
            while(p)
            {
              if(p->p1>=minPx)
              {
                saveInt[index]=p->index;
                index++;
              }
              else	;
              p=p->next;
            }
          } 
      MPI_Barrier(MPI_COMM_WORLD);
      saveParticleComp_Int(saveInt,name,"index",totalCnt,cnt,start);
      index=0;
      for(i=istart; i<iend; i++)
        for(j=jstart; j<jend; j++)
          for(k=kstart; k<kend; k++)
          {
            p=particle[i][j][k].head[s]->pt;
            while(p)
            {
              if(p->p1>=minPx)
              {
                saveInt[index]=p->core;
                index++;
              }
              else	;
              p=p->next;
            }
          } 
      MPI_Barrier(MPI_COMM_WORLD);
      saveParticleComp_Int(saveInt,name,"core",totalCnt,cnt,start);

      free(saveDouble);
      free(saveInt);
      break;
    }		//End of switch(dimension....)
}
示例#22
0
int main(int argc, char * argv[])
{
    int rank, np;
    int * D;
    int * a;
    int i;

    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &np);

    int res=-1;
    int * results;

    srand(rank + time(0));

    for(i = 20; i<100; i+=2)
    {

        // the matrix that contains the compatatibilies
        D = (int*) malloc( sizeof(int)*i*i );
        // the array that contains a solution
        a = (int*) malloc( sizeof(int)*i );

        initArray(a, -1, i);

        if(rank==0)
        {
            //initialize the matrix
            genMatrix(D, i);

            // allocate the array to receive the gold
            results = (int*) malloc( sizeof(int)*np );
        }

        // generate a solution
        genSolution(a, i);

        //send compatibily matrix and initial solution to other processes
        MPI_Bcast(D, sizeof(int)*i*i, MPI_BYTE, 0, MPI_COMM_WORLD);
        //MPI_Bcast(a, sizeof(int)*i, MPI_BYTE, 0, MPI_COMM_WORLD);

        res = alg2(i, D, a, rank);

        //MPI_Barrier(MPI_COMM_WORLD);

        MPI_Gather(&res, 1, MPI_INT, results, 1, MPI_INT, 0, MPI_COMM_WORLD);

        if(rank==0)
        {
            printf("%d\t%d\n", i, getMin(results, np) );

            // clean
            free(results);
        }

        free(D);
        free(a);

    }

    MPI_Finalize();

    return 0;
}
示例#23
0
//---------------------------------------------------------------------------//
int
gatherv(Node &send_node,
        Node &recv_node,
        int root,
        MPI_Comm mpi_comm)
{
    Node n_snd_compact;
    send_node.compact_to(n_snd_compact);

    int m_size = mpi::size(mpi_comm);
    int m_rank = mpi::rank(mpi_comm);

    std::string schema_str = n_snd_compact.schema().to_json();

    int schema_len = schema_str.length() + 1;
    int data_len   = n_snd_compact.total_bytes();

    // to do the conduit gatherv, first need a gather to get the
    // schema and data buffer sizes

    int snd_sizes[] = {schema_len, data_len};

    Node n_rcv_sizes;

    if( m_rank == root )
    {
        Schema s;
        s["schema_len"].set(DataType::c_int());
        s["data_len"].set(DataType::c_int());
        n_rcv_sizes.list_of(s,m_size);
    }

    int mpi_error = MPI_Gather( snd_sizes, // local data
                                2, // two ints per rank
                                MPI_INT, // send ints
                                n_rcv_sizes.data_ptr(),  // rcv buffer
                                2,  // two ints per rank
                                MPI_INT,  // rcv ints
                                root,  // id of root for gather op
                                mpi_comm); // mpi com

    CONDUIT_CHECK_MPI_ERROR(mpi_error);

    Node n_rcv_tmp;

    int  *schema_rcv_counts = NULL;
    int  *schema_rcv_displs = NULL;
    char *schema_rcv_buff   = NULL;

    int  *data_rcv_counts = NULL;
    int  *data_rcv_displs = NULL;
    char *data_rcv_buff   = NULL;

    // we only need rcv params on the gather root
    if( m_rank == root )
    {
        // alloc data for the mpi gather counts and displ arrays
        n_rcv_tmp["schemas/counts"].set(DataType::c_int(m_size));
        n_rcv_tmp["schemas/displs"].set(DataType::c_int(m_size));

        n_rcv_tmp["data/counts"].set(DataType::c_int(m_size));
        n_rcv_tmp["data/displs"].set(DataType::c_int(m_size));

        // get pointers to counts and displs
        schema_rcv_counts = n_rcv_tmp["schemas/counts"].value();
        schema_rcv_displs = n_rcv_tmp["schemas/displs"].value();

        data_rcv_counts = n_rcv_tmp["data/counts"].value();
        data_rcv_displs = n_rcv_tmp["data/displs"].value();

        int schema_curr_displ = 0;
        int data_curr_displ   = 0;
        int i=0;

        NodeIterator itr = n_rcv_sizes.children();
        while(itr.has_next())
        {
            Node &curr = itr.next();

            int schema_curr_count = curr["schema_len"].value();
            int data_curr_count   = curr["data_len"].value();

            schema_rcv_counts[i] = schema_curr_count;
            schema_rcv_displs[i] = schema_curr_displ;
            schema_curr_displ   += schema_curr_count;

            data_rcv_counts[i] = data_curr_count;
            data_rcv_displs[i] = data_curr_displ;
            data_curr_displ   += data_curr_count;

            i++;
        }

        n_rcv_tmp["schemas/data"].set(DataType::c_char(schema_curr_displ));
        schema_rcv_buff = n_rcv_tmp["schemas/data"].value();
    }

    mpi_error = MPI_Gatherv( const_cast <char*>(schema_str.c_str()),
                             schema_len,
                             MPI_CHAR,
                             schema_rcv_buff,
                             schema_rcv_counts,
                             schema_rcv_displs,
                             MPI_CHAR,
                             root,
                             mpi_comm);

    CONDUIT_CHECK_MPI_ERROR(mpi_error);

    // build all schemas from JSON, compact them.
    Schema rcv_schema;
    if( m_rank == root )
    {
        //TODO: should we make it easer to create a compact schema?
        Schema s_tmp;
        for(int i=0; i < m_size; i++)
        {
            Schema &s = s_tmp.append();
            s.set(&schema_rcv_buff[schema_rcv_displs[i]]);
        }

        s_tmp.compact_to(rcv_schema);
    }


    if( m_rank == root )
    {
        // allocate data to hold the gather result
        recv_node.set(rcv_schema);
        data_rcv_buff = (char*)recv_node.data_ptr();
    }

    mpi_error = MPI_Gatherv( n_snd_compact.data_ptr(),
                             data_len,
                             MPI_CHAR,
                             data_rcv_buff,
                             data_rcv_counts,
                             data_rcv_displs,
                             MPI_CHAR,
                             root,
                             mpi_comm);

    CONDUIT_CHECK_MPI_ERROR(mpi_error);

    return mpi_error;
}
示例#24
0
int main(int argc, char **argv) {

 
  int rank, M, j,i, *d_graph;
  int *local_matrix, *row_matrix, *col_matrix, *res_matrix, *rowIds, *colIds;
  int P, N, q, p_row, p_col;
  double start, finish;
  MPI_Status status;
 
  MPI_Init(&argc, &argv);
  MPI_Comm_size(MPI_COMM_WORLD, &P);
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);

  //INPUT HANDLED BY THE ROOT PROCESSOR
  if (rank == ROOT){
    scanf("%d", &N);  
    q = check_fox_conditions(P,N);

    //Check's if the fox's conditions are met
    if(q == 0){
      MPI_Abort(MPI_COMM_WORLD, 0);
      return 1; //error
    }  

    d_graph = (int*)malloc((N*N) * sizeof(int));

    for(i=0; i < N; i++){
      for(j=0; j < N; j++){
	scanf("%d", &d_graph[GET_MTRX_POS(i,j,N)]);
	if (d_graph[GET_MTRX_POS(i,j,N)] == 0 && i != j) {
	  d_graph[GET_MTRX_POS(i,j,N)] = INF;
	}
      }
    }



    MPI_Bcast(&q, 1, MPI_INT, 0, MPI_COMM_WORLD);
    MPI_Bcast(&N, 1, MPI_INT, 0, MPI_COMM_WORLD);

    if(q > 1)
      divide_matrix( d_graph, N, q); 
      
  }
  else{
    MPI_Bcast(&q, 1, MPI_INT, 0, MPI_COMM_WORLD);
    MPI_Bcast(&N, 1, MPI_INT, 0, MPI_COMM_WORLD);
  }
  //---------------COMMON------------------
   
  int lngth = N / q;


  local_matrix = (int*)malloc((lngth*lngth) * sizeof(int));
  row_matrix   = (int*)malloc((lngth*lngth) * sizeof(int));
  col_matrix   = (int*)malloc((lngth*lngth) * sizeof(int));
  res_matrix   = (int*)malloc((lngth*lngth) * sizeof(int));
  
  if(q>1)
    chnkd_MPI_Recv(local_matrix, lngth*lngth, MPI_INT, 0);
  else
    local_matrix = d_graph;
    
  p_row = ( rank / q );
  p_col = ( rank % q );
    
  //CREATE COMMUNICATORS 
  MPI_Group MPI_GROUP_WORLD;
  MPI_Comm_group(MPI_COMM_WORLD, &MPI_GROUP_WORLD);
  MPI_Group row_group, col_group;
  MPI_Comm row_comm, col_comm, grid_comm;
  int tmp_row, tmp_col, proc;
  int row_process_ranks[q], col_process_ranks[q];
    
  for(proc = 0; proc < q; proc++){   
    row_process_ranks[proc] = (p_row * q) + proc;
    col_process_ranks[proc] = ((p_col + proc*q) %(q*q));
  }    
  radixsort(col_process_ranks, q);
  radixsort(row_process_ranks, q);

  MPI_Group_incl(MPI_GROUP_WORLD, q, row_process_ranks, &row_group);  
  MPI_Group_incl(MPI_GROUP_WORLD, q, col_process_ranks, &col_group);  
     
  MPI_Comm_create(MPI_COMM_WORLD, row_group, &row_comm);  
  MPI_Comm_create(MPI_COMM_WORLD, col_group, &col_comm);  

  if ((rank / q) == (rank % q)) {
      memcpy(row_matrix, local_matrix, (lngth*lngth) * sizeof(int));
  }
  int ln,d,flag;
  int step, rotation_src, rotation_dest, src;
  int count = 0;
  memcpy(res_matrix, local_matrix, (lngth*lngth) * sizeof(int));
  rotation_src = (p_row + 1) % q;
  rotation_dest = ((p_row - 1) + q) % q;
  ln = (lngth*q) << 1;
  start = MPI_Wtime();  

  for (d = 2; d < ln; d = d << 1) {
    memcpy(col_matrix, local_matrix, (lngth*lngth) * sizeof(int));
    for ( step = 0;  step < q;  step++) {
      src = (p_row +  step) % q;
      count++;
      if (src == p_col) {
	MPI_Bcast(local_matrix, lngth*lngth, MPI_INT, src, row_comm);
	floyd_warshall( local_matrix, col_matrix, res_matrix, lngth);
      } else {
	MPI_Bcast(row_matrix, lngth*lngth, MPI_INT, src, row_comm);
	floyd_warshall( row_matrix, col_matrix, res_matrix, lngth);
      }  
      if( step < q-1) 
        MPI_Sendrecv_replace(col_matrix, lngth*lngth, MPI_INT, rotation_dest, STD_TAG,rotation_src, STD_TAG, col_comm, MPI_STATUS_IGNORE);
  	
    }
    memcpy(local_matrix, res_matrix, (lngth*lngth) * sizeof(int));
  }
  
  
  int *sol;
  sol = malloc(N*N*sizeof(int));  
  
  MPI_Gather(res_matrix, lngth*lngth, MPI_INT, sol,  lngth*lngth, MPI_INT, 0, MPI_COMM_WORLD);
  
  if (rank == 0) {
    finish = MPI_Wtime();
    printf("Tempo de execução %f\n",finish - start);
  }
 
  if (rank == 0) {
    int row, col, pos_x, pos_y, pos, tmp_y, tmp_x;

    for (i = 0; i < P; i++) {
      pos_x = i / q;
      pos_y = i % q;
      pos = i * lngth*lngth;

      for (row = 0; row < lngth; row++) {
	for (col = 0; col < lngth; col++) {
          tmp_x = GET_MTRX_POS(pos_x,row,lngth);
          tmp_y = GET_MTRX_POS(pos_y,col,lngth);
          
	  if (sol[GET_MTRX_POS(row,col,lngth) + pos] == INF)
	    d_graph[GET_MTRX_POS(tmp_x,tmp_y,N)] = 0;
	  else
	    d_graph[GET_MTRX_POS(tmp_x,tmp_y,N)] = sol[GET_MTRX_POS(row,col,lngth) + pos];
	}
      }
    }
    prints_matrix(d_graph,N);
  }
  
  MPI_Finalize();
  return 0;
}
示例#25
0
int main(int argc, char *argv[]) 
{ 
    int rank, nprocs, i, *counter_mem, *get_array, *get_idx, *acc_idx,
        mask, nlevels, level, idx, tmp_rank, pof2;
    MPI_Datatype get_type, acc_type;
    MPI_Win win;
    int errs = 0, *results, *counter_vals;
 
    MTest_Init(&argc,&argv); 
    MPI_Comm_size(MPI_COMM_WORLD,&nprocs); 
    MPI_Comm_rank(MPI_COMM_WORLD,&rank); 

    if (rank == 0) {
        /* allocate counter memory and initialize to 0 */

        /* find the next power-of-two >= nprocs */
        pof2 = 1;
        while (pof2 < nprocs) pof2 *= 2;

        /* counter_mem = (int *) calloc(pof2*2, sizeof(int)); */

        i = MPI_Alloc_mem(pof2*2*sizeof(int), MPI_INFO_NULL, &counter_mem);
        if (i) {
            printf("Can't allocate memory in test program\n");
            MPI_Abort(MPI_COMM_WORLD, 1);
        }

        for (i=0; i<(pof2*2); i++) counter_mem[i] = 0;

        MPI_Win_create(counter_mem, pof2*2*sizeof(int), sizeof(int),
                       MPI_INFO_NULL, MPI_COMM_WORLD, &win);

        MPI_Win_free(&win); 

        /* free(counter_mem) */
        MPI_Free_mem(counter_mem);

        /* gather the results from other processes, sort them, and check 
           whether they represent a counter being incremented by 1 */

        results = (int *) malloc(NTIMES*nprocs*sizeof(int));
        for (i=0; i<NTIMES*nprocs; i++)
            results[i] = -1;

        MPI_Gather(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, results, NTIMES, MPI_INT, 
                   0, MPI_COMM_WORLD);

        qsort(results+NTIMES, NTIMES*(nprocs-1), sizeof(int), compar);

        for (i=NTIMES+1; i<(NTIMES*nprocs); i++)
            if (results[i] != results[i-1] + 1)
                errs++;
        
        free(results);
    }
    else {
        /* Get the largest power of two smaller than nprocs */ 
        mask = 1; 
        nlevels = 0;
        while (mask < nprocs) {
            mask <<= 1; 
            nlevels++;
        }
        mask >>= 1;

        get_array = (int *) malloc(nlevels * sizeof(int));
        get_idx = (int *) malloc(nlevels * sizeof(int));
        acc_idx = (int *) malloc(nlevels * sizeof(int));

        level = 0; 
        idx   = 0; 
        tmp_rank = rank;
        while (mask >= 1) { 
            if (tmp_rank < mask) { 
                /* go to left for acc_idx, go to right for
                   get_idx. set idx=acc_idx for next iteration */ 
                acc_idx[level] = idx + 1; 
                get_idx[level] = idx + mask*2; 
                idx            = idx + 1; 
            } 
            else { 
                /* go to right for acc_idx, go to left for
                   get_idx. set idx=acc_idx for next iteration */ 
                acc_idx[level] = idx + mask*2; 
                get_idx[level] = idx + 1; 
                idx            = idx + mask*2; 
            } 
            level++;
            tmp_rank = tmp_rank % mask;
            mask >>= 1; 
        } 

/*        for (i=0; i<nlevels; i++)
            printf("Rank %d, acc_idx[%d]=%d, get_idx[%d]=%d\n", rank,
                   i, acc_idx[i], i, get_idx[i]);
*/

        MPI_Type_create_indexed_block(nlevels, 1, get_idx, MPI_INT, &get_type);
        MPI_Type_create_indexed_block(nlevels, 1, acc_idx, MPI_INT, &acc_type);
        MPI_Type_commit(&get_type);
        MPI_Type_commit(&acc_type);

        /* allocate array to store the values obtained from the 
           fetch-and-add counter */
        counter_vals = (int *) malloc(NTIMES * sizeof(int));

        MPI_Win_create(NULL, 0, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &win); 

        for (i=0; i<NTIMES; i++) {
            Get_nextval_tree(win, get_array, get_type, acc_type,
                             nlevels, counter_vals+i); 
            /* printf("Rank %d, counter %d\n", rank, value); */
        }

        MPI_Win_free(&win);
        free(get_array);
        free(get_idx);
        free(acc_idx);
        MPI_Type_free(&get_type);
        MPI_Type_free(&acc_type);

         /* gather the results to the root */
        MPI_Gather(counter_vals, NTIMES, MPI_INT, NULL, 0, MPI_DATATYPE_NULL, 
                   0, MPI_COMM_WORLD);
        free(counter_vals);
   }

    MTest_Finalize(errs);
    MPI_Finalize(); 
    return MTestReturnValue( errs );
} 
示例#26
0
/** 
 * Create a parms_Map object based on the output of ParMetis.
 *
 * @param self     A parms_Map object created.
 * @param vtxdist  An integer array of size np+1, where np is the
 *                 number of PEs. This array indicates the range of
 *                 vertices that are local to each processor.  PE i
 *                 stores vertices in the range of [vtxdist[i],
 *                 vtxdist[i+1]).
 * @param part     An array of size equal to the number of
 *                 locally-stored vertices. part[j] indicates the ID
 *                 of the PE to which the vertex with local index j
 *                 and global index vtxdist[pid]+j belongs (pid is ID
 *                 of local PE).
 * @param comm     MPI communicator.
 * @param offset   The start index.
 *                 - 1 FORTRAN
 *                 - 0 C
 * @param dof      The number of variables associated with each
 *                  vertex.
 * @param vtype    Assuming the variables u_i, v_i are associated
 *                 with vertex i, two styles of numbering variables
 *                 are as follows:
 *                 - INTERLACED. Variables are numbered in the
 *                   order of \f$u_1, v_1, u_2, v_2, \cdots\f$;
 *                 - NONINTERLACED. Variables are numbered in the
 *                   order of \f$u_1, u_2, u_3,...,v_1, v_2,...\f$.
 *
 * @return 0 on success.
 */
int parms_MapCreateFromDist(parms_Map *self, int *vtxdist, int *part,
                            MPI_Comm comm, int offset, int dof,
                            VARSTYPE vtype)
{
    parms_Map newMap;
    int npro, pid, i, j, l, gsize;
    int gv_size, lsize, nl, ind, gindex;
    int *num, *nums, *num_rcv, *disp, *snd_buf, *rcv_buf;
    MPI_Comm newComm;

    MPI_Comm_dup(comm, &newComm);
    PARMS_NEW0((newMap));
    newMap->ref = 1;
    MPI_Comm_rank(newComm, &newMap->pid);
    MPI_Comm_size(newComm, &newMap->npro);
    newMap->comm = newComm;
    npro = newMap->npro;
    pid  = newMap->pid;
    /* get the number of local vertices */
    nl =  vtxdist[pid+1] - vtxdist[pid];
    /* calculate the total number of vertices */
    gsize = vtxdist[npro] - vtxdist[0];
    /* total number of variables  */
    gv_size = newMap->gsize = gsize * dof;
    newMap->start = offset;
    newMap->dof = dof;
    newMap->vtype = vtype;
    newMap->isserial =  false;
    if (newMap->npro == 1) {
        newMap->isserial = true;
    }
    newMap->isperm =  false;
    newMap->isvecperm    = false;
    newMap->ispermalloc = false;
    newMap->isdatalloc = false;

    if (!newMap->isserial) {
        PARMS_NEWARRAY(snd_buf, nl);
        /* create a hash table */
        parms_TableCreate(&newMap->table, NULL, nl);
        PARMS_NEWARRAY0(num, npro);
        PARMS_NEWARRAY(nums, npro);
        /* num[i] stores the number of locally-stored variables being
       distributed to PE i */
        for (i = 0; i < nl; i++) {
            num[part[i]-offset]++;
        }
        MPI_Allreduce(num, nums, npro, MPI_INT, MPI_SUM, newComm);
        /* nums[i] stores the number of variables on PE i */
        lsize = newMap->lsize = nums[pid]*dof;
        PARMS_NEWARRAY(newMap->lvars, lsize);
        PARMS_FREE(nums);

        PARMS_NEWARRAY(disp,     npro+1);
        PARMS_NEWARRAY(num_rcv,  npro);
        /* num_rcv stores the number of data received from other processors */
        for (i = 0; i < npro; i++) {
            MPI_Gather(&num[i], 1, MPI_INT, num_rcv, 1, MPI_INT, i,
                       newComm);
            /* snd_buf stores the data sent to PE i */
            ind = 0;
            for (j = 0; j < nl; j++) {
                if (part[j]-offset == i) {
                    snd_buf[ind++] = vtxdist[pid] + j - offset;
                }
            }
            if (pid == i) {
                /* disp is an integer array.  disp[i]  specifies  the
       displacement relative to rcv_buf  at which to place the
       incoming data from PE i  */
                disp[0] = 0;
                for (j = 0; j < npro; j++) {
                    disp[j+1] = disp[j] + num_rcv[j];
                }

                /* variables in rcv_buf are stored in C-style */
                PARMS_NEWARRAY(rcv_buf, disp[npro]);
                MPI_Gatherv(snd_buf, num[i], MPI_INT, newMap->lvars, num_rcv,
                            disp, MPI_INT, i, newComm);
                if (vtype == INTERLACED) {
                    ind = 0;
                    for (j = 0; j < disp[npro]; j++) {
                        for (l = 0; l < dof; l++) {
                            gindex = dof*rcv_buf[j]+l;
                            parms_TablePut(newMap->table, gindex, ind);
                            newMap->lvars[ind++] = gindex;
                        }
                    }
                }
                else if (vtype == NONINTERLACED) {
                    for (j = 0; j < disp[npro]; j++) {
                        parms_TablePut(newMap->table, newMap->lvars[j], j);
                    }
                    ind = disp[npro];
                    for (j = 0; j < disp[npro]; j++) {
                        for (l = 1; l < dof; l++) {
                            gindex = gsize*l+newMap->lvars[j];
                            parms_TablePut(newMap->table, gindex, ind);
                            newMap->lvars[ind++] = gindex;
                        }
                    }
                }
                PARMS_FREE(rcv_buf);
            }
            else {
                MPI_Gatherv(snd_buf, num[i], MPI_INT, rcv_buf, num_rcv,
                            disp, MPI_INT, i, newComm);
            }
        }

        PARMS_FREE(snd_buf);
        PARMS_FREE(num);
        PARMS_FREE(num_rcv);
        PARMS_FREE(disp);
        newMap->ispermalloc = true;
        PARMS_NEWARRAY0(newMap->perm,  lsize);
        PARMS_NEWARRAY0(newMap->iperm, lsize);
        for (i = 0; i < lsize; i++) {
            newMap->perm[i] = -1;
        }
    }
    else {
        lsize = gv_size;
        newMap->lsize = gv_size;
    }
    newMap->nint    =  lsize;
    newMap->ninf    =  0;
    newMap->n_ext = 0;

    *self = newMap;

    /* Define complex data type for MPI if complex code is compiled */
#if defined(DBL_CMPLX)
    parms_InitComplex();
#endif 

    return 0;
}
示例#27
0
int DisplayGoL(int N, int effective_cols_size, int matrix[N][effective_cols_size], int rank)
{

    int realColumnSize = effective_cols_size-2;
    int arraySize = N * realColumnSize;
    int tempArray[arraySize];
    int count = 0;
    int r, c;
    int displaymatrix[N][N];
    int tempTempArray[N*N];
    int currentGatherTime = 0;
    
    struct timeval send1s, send1e; 
    int tSend;
    //printf("\nEFFECTIVE COL SIXE :%d",effective_cols_size);
	for(c=1;c<effective_cols_size-1;c++){
		for(r=0;r<N;r++){
				tempArray[count] = matrix[r][c];
				count++;
				//printf("SETTING RANK:%d, INDEX: %d and %d, VALUE: %d\n", rank, r,c, tempArray[count-1]);
		}
	}

    gettimeofday(&send1s, NULL);
if(rank==0)
	{
	MPI_Gather(tempArray, N * (realColumnSize), MPI_INT,  tempTempArray,N * (realColumnSize), MPI_INT, 0, MPI_COMM_WORLD);
	}
else
	{
	MPI_Gather(tempArray, N * (realColumnSize), MPI_INT,  NULL,0, MPI_INT, 0, MPI_COMM_WORLD);
	}
    gettimeofday(&send1e, NULL);
    currentGatherTime += (send1e.tv_sec-send1s.tv_sec)*1000 + (send1e.tv_usec-send1s.tv_usec)/1000;
    //printf("%d", currentGatherTime);
            int q = 0;

           // for(q=0; q< N*realColumnSize; q++){
           //     printf("RANK: %d, INDEX: %d, VALUE: %d\n", rank, q, tempArray[q]);
           // }

            if(rank==0){
                // If the rank is 0 we will need to gather from the array
                // put it into a matrix and
                for(c=0;c<N*N;c++){

                    displaymatrix[c%N][c/N] = tempTempArray[c];
                    //printf("INDEX 22:  %d, VALUE:  %d\n", c, tempTempArray[c]);

                }
              //  printf("\n \n GATHER AT RANK %d\n",rank);
			  for (r = 0; r < N; r++) {
				  for (c = 0; c < N; c++)
					  printf("V_G-%d-%d = %d  ",r,c, displaymatrix[r][c]);
				  printf("\n");
			  }

            }




    return currentGatherTime;
   //return;
}
示例#28
0
/*************** REQ_GETPARTS ************/
void cuda_mpi_get_particles(CUDA_particle_data *particle_data_host)
{
    int n_part;
    int g, pnode;
    Cell *cell;
    int c;
    MPI_Status status;

    int i;  
    int *sizes;
    sizes = (int*) Utils::malloc(sizeof(int)*n_nodes);

    n_part = cells_get_n_particles();
    
    /* first collect number of particles on each node */
    MPI_Gather(&n_part, 1, MPI_INT, sizes, 1, MPI_INT, 0, comm_cart);

    /* just check if the number of particles is correct */
    if(this_node > 0){
      /* call slave functions to provide the slave datas */
      cuda_mpi_get_particles_slave();
    }
    else {
      /* master: fetch particle informations into 'result' */
      g = 0;
      for (pnode = 0; pnode < n_nodes; pnode++) {
        if (sizes[pnode] > 0) {
          if (pnode == 0) {
            for (c = 0; c < local_cells.n; c++) {
              Particle *part;
              int npart;  
              int dummy[3] = {0,0,0};
              double pos[3];

              cell = local_cells.cell[c];
              part = cell->part;
              npart = cell->n;
              for (i=0;i<npart;i++) {
                memmove(pos, part[i].r.p, 3*sizeof(double));
                fold_position(pos, dummy);

                particle_data_host[i+g].p[0] = (float)pos[0];
                particle_data_host[i+g].p[1] = (float)pos[1];
                particle_data_host[i+g].p[2] = (float)pos[2];

                particle_data_host[i+g].v[0] = (float)part[i].m.v[0];
                particle_data_host[i+g].v[1] = (float)part[i].m.v[1];
                particle_data_host[i+g].v[2] = (float)part[i].m.v[2];
#ifdef IMMERSED_BOUNDARY
                particle_data_host[i+g].isVirtual = part[i].p.isVirtual;
#endif

#ifdef DIPOLES
                particle_data_host[i+g].dip[0] = (float)part[i].r.dip[0];
                particle_data_host[i+g].dip[1] = (float)part[i].r.dip[1];
                particle_data_host[i+g].dip[2] = (float)part[i].r.dip[2];
#endif

#ifdef SHANCHEN
                // SAW TODO: does this really need to be copied every time?
                int ii;
                for(ii=0;ii<2*LB_COMPONENTS;ii++){
                  particle_data_host[i+g].solvation[ii] = (float)part[i].p.solvation[ii];
                }
#endif

#ifdef LB_ELECTROHYDRODYNAMICS
                particle_data_host[i+g].mu_E[0] = (float)part[i].p.mu_E[0];
                particle_data_host[i+g].mu_E[1] = (float)part[i].p.mu_E[1];
                particle_data_host[i+g].mu_E[2] = (float)part[i].p.mu_E[2];
#endif

#ifdef ELECTROSTATICS
		particle_data_host[i+g].q = (float)part[i].p.q;
#endif

#ifdef ROTATION
                particle_data_host[i+g].quatu[0] = (float)part[i].r.quatu[0];
                particle_data_host[i+g].quatu[1] = (float)part[i].r.quatu[1];
                particle_data_host[i+g].quatu[2] = (float)part[i].r.quatu[2];
#endif

#ifdef ENGINE
                particle_data_host[i+g].swim.v_swim        = (float)part[i].swim.v_swim;
                particle_data_host[i+g].swim.f_swim        = (float)part[i].swim.f_swim;
                particle_data_host[i+g].swim.quatu[0]      = (float)part[i].r.quatu[0];
                particle_data_host[i+g].swim.quatu[1]      = (float)part[i].r.quatu[1];
                particle_data_host[i+g].swim.quatu[2]      = (float)part[i].r.quatu[2];
#if defined(LB) || defined(LB_GPU)
                particle_data_host[i+g].swim.push_pull     =        part[i].swim.push_pull;
                particle_data_host[i+g].swim.dipole_length = (float)part[i].swim.dipole_length;
#endif
                particle_data_host[i+g].swim.swimming      =        part[i].swim.swimming;
#endif
              }  
              g += npart;
            }
          }
          else {
            MPI_Recv(&particle_data_host[g], sizes[pnode]*sizeof(CUDA_particle_data), MPI_BYTE, pnode, REQ_CUDAGETPARTS,
            comm_cart, &status);
            g += sizes[pnode];
          }
        }
      }
    }
    COMM_TRACE(fprintf(stderr, "%d: finished get\n", this_node));
    free(sizes);
}
示例#29
0
  void cuda_mpi_send_forces(float *host_forces,
                            float *host_torques,
                            CUDA_fluid_composition * host_composition){
  int n_part;
  int g, pnode;
  Cell *cell;
  int c;
  int i;  
  int *sizes;
  sizes = (int *) Utils::malloc(sizeof(int)*n_nodes);
  n_part = cells_get_n_particles();
  /* first collect number of particles on each node */
  MPI_Gather(&n_part, 1, MPI_INT, sizes, 1, MPI_INT, 0, comm_cart);

  /* call slave functions to provide the slave data */
  if(this_node > 0) {
    cuda_mpi_send_forces_slave();
  }
  else{
    /* fetch particle informations into 'result' */
    g = 0;
    for (pnode = 0; pnode < n_nodes; pnode++) {
      if (sizes[pnode] > 0) {
        if (pnode == 0) {
          for (c = 0; c < local_cells.n; c++) {
            int npart;  
            cell = local_cells.cell[c];
            npart = cell->n;
            for (i=0;i<npart;i++) { 
              cell->part[i].f.f[0] += (double)host_forces[(i+g)*3+0];
              cell->part[i].f.f[1] += (double)host_forces[(i+g)*3+1];
              cell->part[i].f.f[2] += (double)host_forces[(i+g)*3+2];
#ifdef ROTATION
              cell->part[i].f.torque[0] += (double)host_torques[(i+g)*3+0];
              cell->part[i].f.torque[1] += (double)host_torques[(i+g)*3+1];
              cell->part[i].f.torque[2] += (double)host_torques[(i+g)*3+2];
#endif

#ifdef SHANCHEN
              for (int ii=0;ii<LB_COMPONENTS;ii++) {
                cell->part[i].r.composition[ii] = (double)host_composition[i+g].weight[ii];
              }
#endif
            }
            g += npart;
          }
        }
        else {
          /* and send it back to the slave node */

          MPI_Send(&host_forces[3*g], 3*sizes[pnode]*sizeof(float), MPI_BYTE, pnode, REQ_CUDAGETFORCES, comm_cart);
#ifdef ROTATION          
          MPI_Send(&host_torques[3*g], 3*sizes[pnode]*sizeof(float), MPI_BYTE, pnode, REQ_CUDAGETFORCES, comm_cart);
#endif
#ifdef SHANCHEN
          MPI_Send(&host_composition[g], sizes[pnode]*sizeof(CUDA_fluid_composition), MPI_BYTE, pnode, REQ_CUDAGETPARTS, comm_cart);      
#endif
          g += sizes[pnode];
        }
      }
    }
  }
  COMM_TRACE(fprintf(stderr, "%d: finished send\n", this_node));

  free(sizes);
}
示例#30
-1
int main ( int argc, char *argv[] ) {

  // Auxiliary variables
  int rank;
  int npcs;
  int step;
  dmn domain;
  double wtime;

  // Solution arrays
  double *g_u; /* will be allocated in ROOT only */ 
  double *t_u;
  double *t_un;

  // Initialize MPI
  MPI_Init(&argc, &argv);
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  MPI_Comm_size(MPI_COMM_WORLD, &npcs);

  // Manage Domain sizes
  domain = Manage_Domain(rank,npcs); 

  // Allocate Memory
  Manage_Memory(0,domain,&g_u,&t_u,&t_un);

  // Root mode: Build Initial Condition and scatter it to the rest of processors
  if (domain.rank==ROOT) Call_IC(2,g_u);
  MPI_Scatter(g_u, domain.size, MPI_DOUBLE, t_u+NX*NY, domain.size, MPI_DOUBLE, ROOT, MPI_COMM_WORLD);

  // Exchage Halo regions
  Manage_Comms(domain,&t_u); MPI_Barrier(MPI_COMM_WORLD);

  // ROOT mode: Record the starting time.
  if (rank==ROOT) wtime=MPI_Wtime();

  // Asynchronous MPI Solver
  for (step = 0; step < NO_STEPS; step+=2) {
    // print iteration in ROOT mode
    if (rank==ROOT && step%10000==0) printf("  Step %d of %d\n",step,(int)NO_STEPS);
    
    // Exchange Boundaries and compute stencil
    Call_Laplace(domain,&t_u,&t_un); Manage_Comms(domain,&t_un); // 1st iter
    Call_Laplace(domain,&t_un,&t_u); Manage_Comms(domain,&t_u ); // 2nd iter
  }
  MPI_Barrier(MPI_COMM_WORLD);

  // ROOT mode: Record the final time.
  if (rank==ROOT) {
    wtime = MPI_Wtime()-wtime;
    printf ("\n Wall clock elapsed seconds = %f\n\n", wtime );
  }
  
  // Gather solutions to ROOT and write solution in ROOT mode
  MPI_Gather(t_u+NX*NY, domain.size, MPI_DOUBLE, g_u, domain.size, MPI_DOUBLE, ROOT, MPI_COMM_WORLD);
  if (rank==ROOT) Save_Results(g_u);

  // Free Memory
  Manage_Memory(1,domain,&g_u,&t_u,&t_un); MPI_Barrier(MPI_COMM_WORLD);

  // Terminate MPI.
  MPI_Finalize();

  // ROOT mode: Terminate.
  if (rank==ROOT) {
    printf ("HEAT_MPI:\n" );
    printf ("  Normal end of execution.\n\n" );
  }

  return 0;
}