Beispiel #1
1
void Create_parms_datatype(MPI_Datatype *Parmstype){

	struct s_mc_parms *x;
	x = calloc(1,sizeof(struct s_mc_parms));
	if (!x) Error("Cannot allocate mc_parms");

	MPI_Aint adress[68];
	MPI_Get_address(x, &adress[0]);
	MPI_Get_address(&(*x).npol, &adress[1]);
	MPI_Get_address(&(*x).nstep, &adress[2]);
	MPI_Get_address(&(*x).seed, &adress[3]);
	MPI_Get_address(&(*x).dw_flip, &adress[4]);
	MPI_Get_address(&(*x).dw_pivot, &adress[5]);
	MPI_Get_address(&(*x).dw_mpivot, &adress[6]);
	MPI_Get_address(&(*x).dw_lpivot, &adress[7]);
	MPI_Get_address(&(*x).dw_mflip, &adress[8]);
	MPI_Get_address(&(*x).fntrj, &adress[9]);
	MPI_Get_address(&(*x).fne, &adress[10]);
	MPI_Get_address(&(*x).flastp, &adress[11]);
	MPI_Get_address(&(*x).fnproc, &adress[12]);
	MPI_Get_address(&(*x).nprinttrj, &adress[13]);
	MPI_Get_address(&(*x).nprintlog, &adress[14]);
	MPI_Get_address(&(*x).nprinte, &adress[15]);
	MPI_Get_address(&(*x).shell, &adress[16]);
	MPI_Get_address(&(*x).nshell, &adress[17]);
	MPI_Get_address(&(*x).r2shell, &adress[18]);
	MPI_Get_address(&(*x).ntemp, &adress[19]);
	MPI_Get_address(&(*x).T, &adress[20]);
	MPI_Get_address(&(*x).randdw, &adress[21]);
	MPI_Get_address(&(*x).debug, &adress[22]);
	MPI_Get_address(&(*x).movetype, &adress[23]);
	MPI_Get_address(&(*x).nmul_mpivot, &adress[24]);
	MPI_Get_address(&(*x).nmul_lpivot, &adress[25]);
	MPI_Get_address(&(*x).nmul_mflip, &adress[26]);
	MPI_Get_address(&(*x).nosidechains, &adress[27]);
	MPI_Get_address(&(*x).noangpot, &adress[28]);
	MPI_Get_address(&(*x).nodihpot, &adress[29]);
	MPI_Get_address(&(*x).nrun, &adress[30]);
	MPI_Get_address(&(*x).always_restart, &adress[31]);
	MPI_Get_address(&(*x).record_native, &adress[32]);
	MPI_Get_address(&(*x).acc, &adress[33]);
	MPI_Get_address(&(*x).mov, &adress[34]);
	MPI_Get_address(&(*x).disentangle, &adress[35]);
	MPI_Get_address(&(*x).stempering, &adress[36]);
	MPI_Get_address(&(*x).dx_com, &adress[37]);
	MPI_Get_address(&(*x).dx_clm, &adress[38]);
	MPI_Get_address(&(*x).r_cloose, &adress[39]);
	MPI_Get_address(&(*x).a_cloose, &adress[40]);
	MPI_Get_address(&(*x).d_cloose, &adress[41]);
	MPI_Get_address(&(*x).hb, &adress[42]);
	MPI_Get_address(&(*x).anneal, &adress[43]);
	MPI_Get_address(&(*x).anneal_often, &adress[44]);
	MPI_Get_address(&(*x).anneal_step, &adress[45]);
	MPI_Get_address(&(*x).anneal_t, &adress[46]);
	MPI_Get_address(&(*x).anneal_recov, &adress[47]);
	#ifdef OPTIMIZEPOT
	MPI_Get_address(&(*x).op_minim, &adress[48]);
	MPI_Get_address(&(*x).op_itermax, &adress[49]);
	MPI_Get_address(&(*x).op_step, &adress[50]);
	MPI_Get_address(&(*x).op_T, &adress[51]);
	MPI_Get_address(&(*x).op_deltat, &adress[52]);
	MPI_Get_address(&(*x).op_stop, &adress[53]);
	MPI_Get_address(&(*x).op_print, &adress[54]);
	MPI_Get_address(&(*x).op_emin, &adress[55]);
	MPI_Get_address(&(*x).op_emax, &adress[56]);
	MPI_Get_address(&(*x).op_wait, &adress[57]);
	MPI_Get_address(&(*x).op_r, &adress[58]);	
	MPI_Get_address(&(*x).op_r0, &adress[59]);
	MPI_Get_address(&(*x).nstep_exchange, &adress[60]);
	MPI_Get_address(&(*x).nmul_local,&adress[61]);
	MPI_Get_address(&(*x).chi2start,&adress[62]);
	MPI_Get_address(&(*x).ishell,&adress[63]);
	MPI_Get_address(&(*x).bgs_a,&adress[64]);
	MPI_Get_address(&(*x).bgs_b,&adress[65]);
	MPI_Get_address(&(*x).dtheta,&adress[66]);

	MPI_Get_address(&(*x).iT_bias,&adress[67]);



	#else
	MPI_Get_address(&(*x).nstep_exchange, &adress[48]);
	#endif
	

	
	#ifdef OPTIMIZEPOT
	MPI_Datatype type[67]={MPI_INT, MPI_INT, MPI_LONG, MPI_DOUBLE, MPI_DOUBLE, MPI_DOUBLE, MPI_DOUBLE, MPI_DOUBLE, MPI_CHAR, MPI_CHAR, MPI_CHAR, MPI_CHAR, MPI_INT, MPI_INT, MPI_INT, MPI_INT, MPI_INT, MPI_DOUBLE, MPI_INT, MPI_DOUBLE, MPI_INT, MPI_INT, MPI_INT, MPI_INT,MPI_INT, MPI_INT, MPI_INT, MPI_INT, MPI_INT,  MPI_INT, MPI_INT, MPI_INT, MPI_INT, MPI_INT, MPI_INT, MPI_INT,  MPI_DOUBLE,  MPI_DOUBLE,  MPI_DOUBLE, MPI_DOUBLE, MPI_DOUBLE, MPI_INT,  MPI_INT,  MPI_INT,  MPI_INT, MPI_DOUBLE, MPI_INT, MPI_CHAR, MPI_INT, MPI_DOUBLE, MPI_DOUBLE, MPI_INT, MPI_DOUBLE, MPI_INT, MPI_DOUBLE, MPI_DOUBLE, MPI_INT, MPI_DOUBLE, MPI_DOUBLE, MPI_INT, MPI_INT,MPI_INT,MPI_INT,MPI_DOUBLE,MPI_DOUBLE,MPI_DOUBLE,MPI_INT};
	int blocklen[67]={1,1,1,1,1,1,1,1,50,50,50,50,1,1,1,1,1,1,1,NREPMAX,1,1,NMOVES,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,50,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1};
	MPI_Aint disp[67];
	
	int i;
	for(i=0; i<67; i++) {disp[i]=adress[i+1]-adress[0];}
	
	MPI_Type_create_struct(67,blocklen,disp,type,Parmstype);
	MPI_Type_commit(Parmstype);
	free(x);
	#else
	MPI_Datatype type[48]={MPI_INT, MPI_INT, MPI_LONG, MPI_DOUBLE, MPI_DOUBLE, MPI_DOUBLE, MPI_DOUBLE, MPI_DOUBLE, MPI_CHAR, MPI_CHAR, MPI_CHAR, MPI_CHAR, MPI_INT, MPI_INT, MPI_INT, MPI_INT, MPI_INT, MPI_DOUBLE, MPI_INT, MPI_DOUBLE, MPI_INT, MPI_INT, MPI_INT, MPI_INT,MPI_INT, MPI_INT, MPI_INT, MPI_INT, MPI_INT,  MPI_INT, MPI_INT, MPI_INT, MPI_INT, MPI_INT, MPI_INT, MPI_INT,  MPI_DOUBLE,  MPI_DOUBLE,  MPI_DOUBLE, MPI_DOUBLE, MPI_DOUBLE, MPI_INT,  MPI_INT,  MPI_INT,  MPI_INT, MPI_DOUBLE, MPI_INT, MPI_INT};
	int blocklen[48]={1,1,1,1,1,1,1,1,50,50,50,50,1,1,1,1,1,1,1,NREPMAX,1,1,NMOVES,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1};
	MPI_Aint disp[48];
	
	int i;
	for(i=0; i<48; i++) {disp[i]=adress[i+1]-adress[0];}
	
	MPI_Type_create_struct(48,blocklen,disp,type,Parmstype);
	MPI_Type_commit(Parmstype);
	free(x);
	#endif
}
Beispiel #2
0
/*
 * Dump the timing information to a file.
 * Called both from C and Fortran API's (adios.c and adiosf.c)
*/
void adios_timing_write_xml_common (int64_t fd_p, const char* filename)
{
#if defined ADIOS_TIMER_EVENTS && !defined _NOMPI //No timing information on single process

    struct adios_file_struct * fd = (struct adios_file_struct *) fd_p;
    if (!fd)
    {
        adios_error (err_invalid_file_pointer,
                     "Invalid handle passed to adios_get_timing_name\n");
        return;
    }

    if (!fd->group || !fd->group->prev_timing_obj)
    {
        // No timing info, don't write anything.
        return;
    }

    int size=1, rank=0, i, global_event_count, count_to_send;
 
    int * counts;
    int * displs;
    struct adios_timing_event_struct* events;
    MPI_Datatype event_type;
    if (fd->comm != MPI_COMM_NULL)
    {
        MPI_Comm_size (fd->comm, &size);
        MPI_Comm_rank (fd->comm, &rank);
    }

    if (rank == 0)
    {
        counts = (int*) malloc (sizeof (int) * size);
    }

    // Collect all of the events on proc 0
    // First, per proc event counts

    count_to_send = (fd->group->prev_timing_obj->event_count > ADIOS_TIMING_MAX_EVENTS) ?
                      ADIOS_TIMING_MAX_EVENTS : fd->group->prev_timing_obj->event_count;


    MPI_Gather (
        &count_to_send, // sendbuf
        1,              // sendcount
        MPI_INT,        // sendtype
        counts,         // recvbuf
        1,           // recvcount
        MPI_INT,        // recvtype
        0,              // root
        fd->comm  // comm
    );

    if (rank == 0)
    {

        displs = (int*) malloc (sizeof (int) * size);
        displs[0] = 0;
        global_event_count = counts[0];

        for (i = 1; i < size; i++)
        {
            displs[i] = displs[i-1] + counts[i-1];
            global_event_count += counts[i];
        }

        events = (struct adios_timing_event_struct*) malloc (
            sizeof (struct adios_timing_event_struct) * global_event_count);
    }

    // structure of the adios_timing_event_struct (int, int, double)
    int blocklens[]  = {2,1};
    MPI_Aint disps[]      = {0,2*sizeof(int)};
    MPI_Datatype types[] = {MPI_INT,MPI_DOUBLE};

    MPI_Type_create_struct (
        2, // count
        blocklens, // array_of_blocklengths
        disps, // array_of_displacements
        types, // array_of_types
        &event_type
    );
    MPI_Type_commit (&event_type);


    // Now the events
    MPI_Gatherv (
        &fd->group->prev_timing_obj->events, // sendbuf
        count_to_send, // sendcount
        event_type, // sendtype
        events, //recvbuf
        counts, // recvcounts
        displs, // displacements
        event_type, // recvtype
        0, // root
        fd->comm // comm
    );

    // Gather the write sizes
    int *write_sizes = NULL;
    if (rank == 0)
    {
        write_sizes = (int*) malloc (sizeof(int) * size);
    }

    MPI_Gather (
        &fd->write_size_bytes, //sendbuf
        1, //sendcount
        MPI_INT, //sendtype
        write_sizes, //recvbuf
        1, //recvcount
        MPI_INT, //recvtype
        0, //root
        fd->comm //comm
    );

    // Write the events to a file
    if (rank == 0)
    {
        FILE* f = fopen (filename, "a");
        int event_rank;

        for (i = 0; i < size; i++)
        {
            fprintf (f, "'%i'%i\n", i, write_sizes[i]);
        }

        // Write the labels
        for (i = 0; i < fd->group->prev_timing_obj->internal_count; i++)
        {
            fprintf (f, ":%i:%s\n", ADIOS_TIMING_MAX_USER_TIMERS + i,
                     fd->group->prev_timing_obj->names[ADIOS_TIMING_MAX_USER_TIMERS + i]); 
        }

        // Now the event data
        i = 0;
        for (event_rank = 0; event_rank < size; event_rank++)
        {
            for ( ; i < displs[event_rank] + counts[event_rank]; i++) 
            {
                fprintf (f, "%i,%i%s,%f\n", event_rank, events[i].type,
                         events[i].is_start?"S":"E", events[i].time);
            }
        }

        fclose(f);
    }


    if (rank == 0)
    {
        if (counts)
            free (counts);
    }

#else
    log_warn ("Timing events are not currently available.\n"
              "To use the timing events, you must enable them when building ADIOS.\n"
              "Use --enable-timer-events during the configuration step.\n");
#endif

}
Beispiel #3
0
void SocketServer::handle_conn(int sockfd)
{
    //MPI_CONNECTION_INIT
    // TODO: check this! 
    int argc = 0;
    
    #ifndef NDEBUG
        std::cout << "INFO" << ": trying MPI_Init " << std::endl;
    #endif
    MPI_Init( &argc, NULL );
    #ifndef NDEBUG
        std::cout << "INFO" << ": ... done " << std::endl;
    #endif
    
    // Create MPI Structure
    int sizeOfData;
    MPI_Type_size( MPI_INT,&sizeOfData );
    int array_of_block_lengths[2] = {1, 1};
    MPI_Aint array_of_displacements[2] = {0, sizeOfData};
    MPI_Datatype array_of_types[2] = { MPI_INT, MPI_INT };

    MPI_Type_create_struct(2, array_of_block_lengths, array_of_displacements, array_of_types, &ArgListType);
    MPI_Type_commit(&ArgListType);
    // End of MPI struct
    
    client = MPI_COMM_WORLD;

    #ifndef NDEBUG
        std::cout << "DEBUG: Waiting for IR\n" << std::endl;
    #endif
    
    MPI_Status status;
    int mpi_server_tag = MPI_SERVER_TAG;
    int myrank;
    MPI_Comm_rank(client, &myrank);
    int mpi_server_rank =0;
    
    // TODO: check this! 
    if(myrank==0)
          mpi_server_rank = 1;
    
    int incomingMessageSize=0;    
    MPI_Probe(MPI_ANY_SOURCE, mpi_server_tag, client, &status);
    MPI_Get_count(&status,MPI_CHAR,&incomingMessageSize);    
    char *module_ir_buffer = (char *) calloc(incomingMessageSize + 1 , sizeof(char));    
    MPI_Recv(module_ir_buffer, incomingMessageSize + 1, MPI_CHAR, MPI_ANY_SOURCE, mpi_server_tag, client, &status);
    
    #ifndef NDEBUG
        std::cout << "DEBUG: Recieved IR\n" << std::endl;
    #endif
  
    auto backend = parseIRtoBackend(module_ir_buffer);
    // notify client that calls can be accepted now by sending time taken for optimizing module and initialising backend
    const std::string readyStr(std::to_string(TimeDiffOpt.count()) + ":" + std::to_string(TimeDiffInit.count()));
    MPI_Send((void *)readyStr.c_str(), readyStr.size() , MPI_CHAR, mpi_server_rank, mpi_server_tag, client);
    free(module_ir_buffer);

    // initialise msg_buffer
    std::shared_ptr<char> msg_buffer((char*)calloc(MSG_BUFFER_SIZE, sizeof(char)), &free);
    while (1) {
        bzero(msg_buffer.get(), MSG_BUFFER_SIZE);
        // first acquire message length
        unsigned msg_length;
        auto UINT_MAX_str_len = std::to_string(UINT_MAX).length();
        int num_chars = recv(sockfd, msg_buffer.get(), UINT_MAX_str_len + 1, 0);
    
        if (num_chars == 0) {
            std::cout << "Client assigned to process " << getpid() << " has closed its socket 3 \n";
            exit(0);
        }

        if (num_chars < 0)
            error("ERROR, could not read from socket");

        #ifndef NDEBUG
            //std::cout << getpid() << ": got message \"" << msg_buffer << "\"\n"; // TODO command line argument to print messages
            std::cout << getpid() << ": got message \n";
        #endif

        llvm::Function* calledFunction = nullptr;
        std::vector<llvm::GenericValue> args;
        std::list<std::vector<llvm::GenericValue>::size_type> indexesOfPointersInArgs;
        llvm::GenericValue result = handleCall(backend.get(), msg_buffer.get(), calledFunction, args, indexesOfPointersInArgs);

        // reset buffer and write time taken to buffer
        bzero(msg_buffer.get(), MSG_BUFFER_SIZE);
        sprintf(msg_buffer.get(), ";%ld", (long)TimeDiffLastExecution.count());

        //MPI_DATA_MOVEMENT
        //Send data back to the client
        //Create the MPI data structure
        //allocate memory for struct    
        #ifndef TIMING 
            auto StartTime = std::chrono::high_resolution_clock::now();
        #endif  
    
        struct ArgumentList argList[MAX_NUMBER_OF_ARGUMENTS];
        MPI_Status status;

        //Create the structure
        int structSize=0;
    
        for (const auto& indexOfPtr : indexesOfPointersInArgs) {
            auto paramType = calledFunction->getFunctionType()->getParamType(indexOfPtr);
            while (paramType->getTypeID() == llvm::Type::ArrayTyID || paramType->getTypeID() == llvm::Type::PointerTyID)
            paramType = llvm::cast<llvm::SequentialType>(paramType)->getElementType();

            if (paramType->getTypeID() == llvm::Type::IntegerTyID) {
                argList[structSize].typeofArg = ENUM_MPI_INT;
            } else {
                argList[structSize].typeofArg = ENUM_MPI_DOUBLE;
            }
            argList[structSize].sizeOfArg =argumentList[indexOfPtr].sizeOfArg;
            structSize++;
        }

        #ifndef NDEBUG
            std::cout << "\nMPI SERVER: Sending message back from server to client";
            std::cout.flush();
        #endif


        #ifndef NDEBUG
            std::cout << "\nMPI SERVER: Sending MPI Header";
            std::cout.flush();

            for (int i=0; i<structSize; i++) {
                std::cout <<  "\n MPI Sent DS : Size : " << argList[i].sizeOfArg << "  Type" << argList[i].typeofArg ;
                std::cout.flush();
            }
        #endif
        MPI_Send(argList, structSize, ArgListType, mpi_server_rank, mpi_server_tag, client);

        #ifndef NDEBUG
            std::cout << "\nMPI SERVER: Sent MPI Header";
            std::cout.flush();

            std::cout << "\nMPI SERVER: Sending data";
            std::cout.flush();
        #endif

        //Start sending individual arrrays
        for (const auto& indexOfPtr : indexesOfPointersInArgs) {
            auto paramType = calledFunction->getFunctionType()->getParamType(indexOfPtr);
            while (paramType->getTypeID() == llvm::Type::ArrayTyID || paramType->getTypeID() == llvm::Type::PointerTyID)
            paramType = llvm::cast<llvm::SequentialType>(paramType)->getElementType();

            if (paramType->getTypeID() == llvm::Type::IntegerTyID) {
            MPI_Send(args[indexOfPtr].PointerVal,argList[indexOfPtr].sizeOfArg, MPI_INT, mpi_server_rank, mpi_server_tag, client);
            } else {
            MPI_Send(args[indexOfPtr].PointerVal, argList[indexOfPtr].sizeOfArg, MPI_DOUBLE, mpi_server_rank, mpi_server_tag, client);
            }
            free(args[indexOfPtr].PointerVal);
        }

        #ifndef TIMING 
            auto EndTime = std::chrono::high_resolution_clock::now();
            std::cout << "\n SERVR: MPI_DATA_TRANSFER S->C = " <<    std::chrono::duration_cast<std::chrono::microseconds>(EndTime - StartTime).count() << "\n";
        #endif
    
        #ifndef NDEBUG
            std::cout << "\nMPI SERVER: Data sent";
            std::cout.flush();

            std::cout << "\nMPI SERVER: Return Messages sent";
            std::cout.flush();
        #endif
        
        char returnValStr[MAX_VAL_SIZE];
        switch (calledFunction->getReturnType()->getTypeID()) {
            case llvm::Type::VoidTyID:
                sprintf(returnValStr, ":");
                break;
            case llvm::Type::FloatTyID:
                sprintf(returnValStr, ":%a", result.FloatVal);
                break;
            case llvm::Type::DoubleTyID:
                sprintf(returnValStr, ":%la", result.DoubleVal);
                break;
            case llvm::Type::X86_FP80TyID:
                returnValStr[0]=':';
                llvm::APFloat(llvm::APFloat::x87DoubleExtended, result.IntVal).convertToHexString(returnValStr+1, 0U, false, llvm::APFloat::roundingMode::rmNearestTiesToEven);
                break;
            case llvm::Type::FP128TyID:
                returnValStr[0]=':';
                llvm::APFloat(llvm::APFloat::IEEEquad, result.IntVal).convertToHexString(returnValStr+1, 0U, false, llvm::APFloat::roundingMode::rmNearestTiesToEven);
                break;
            case llvm::Type::IntegerTyID: // Note: LLVM does not differentiate between signed/unsiged int types
                sprintf(returnValStr, ":%s", result.IntVal.toString(16,false).c_str());
                break;
            default:
                error(std::string("ERROR, LLVM TypeID " + std::to_string(calledFunction->getReturnType()->getTypeID()) + " of result of function \"" + calledFunction->getName().str() + "\" is not supported").c_str());
        }
        strcat(msg_buffer.get(), returnValStr);

        //Send the message
        MPI_Send(msg_buffer.get(), strlen(msg_buffer.get()), MPI_CHAR, mpi_server_rank, mpi_server_tag, client);
    
        MPI_Type_free(&ArgListType);
    
        // TODO: check this!
        MPI_Finalize();
    }
}
Beispiel #4
0
static int test_indexed_with_zeros(char *filename, int testcase)
{
    int i, rank, np, buflen, num, err, nr_errors=0;
    int  nelms[MAXLEN], buf[MAXLEN], indices[MAXLEN], blocklen[MAXLEN];
    MPI_File fh;
    MPI_Status status;
    MPI_Datatype filetype;
    MPI_Datatype types[MAXLEN];
    MPI_Aint addrs[MAXLEN];

    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &np);

    /* set up the number of integers to write in each iteration */
    for (i=0; i<MAXLEN; i++) nelms[i] = 0;
    if (rank == 0) nelms[4]=nelms[5]=nelms[7]=1;
    if (rank == 1) nelms[0]=nelms[1]=nelms[2]=nelms[3]=nelms[6]=nelms[8]=1;

    /* pre-fill the file with integers -999 */
    if (rank == 0) {
        for (i=0; i<MAXLEN; i++) buf[i] = -999;
	err =MPI_File_open(MPI_COMM_SELF, filename, MPI_MODE_CREATE|MPI_MODE_WRONLY,
		MPI_INFO_NULL, &fh);
	if (err != MPI_SUCCESS) handle_error(err, "MPI_File_open");
        err = MPI_File_write(fh, buf, MAXLEN, MPI_INT, &status);
	if (err != MPI_SUCCESS) handle_error(err, "MPI_File_write");
        err = MPI_File_close(&fh);
	if (err != MPI_SUCCESS) handle_error(err, "MPI_File_close");
    }
    MPI_Barrier(MPI_COMM_WORLD);

    /* define a filetype with spurious leading zeros */
    buflen = num = 0;
    for (i=0; i<MAXLEN; i++) {
        buflen       += nelms[i];
        indices[num]  = i;
        addrs[num] = i*sizeof(int);
        blocklen[num] = nelms[i];
        types[num] = MPI_INT;
        num++;
    }
    switch (testcase) {
	case INDEXED:
	    MPI_Type_indexed(num, blocklen, indices, MPI_INT, &filetype);
	    break;
	case HINDEXED:
	    MPI_Type_hindexed(num, blocklen, addrs, MPI_INT, &filetype);
	    break;
	case STRUCT:
	    MPI_Type_create_struct(num, blocklen, addrs, types, &filetype);
	    break;
	default:
	    fprintf(stderr, "unknown testcase!\n");
	    return(-100);

    }

    MPI_Type_commit(&filetype);

    /* initialize write buffer and write to file*/
    for (i=0; i<MAXLEN; i++) buf[i] = 1;
    err =MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_WRONLY, MPI_INFO_NULL, &fh);
    if (err != MPI_SUCCESS) handle_error(err, "MPI_File_open");
    err = MPI_File_set_view(fh, 0, MPI_INT, filetype, "native", MPI_INFO_NULL);
    if (err != MPI_SUCCESS) handle_error(err, "MPI_File_set_view");
    err = MPI_File_write_all(fh, buf, buflen, MPI_INT, &status);
    if (err != MPI_SUCCESS) handle_error(err, "MPI_File_write_all");
    MPI_Type_free(&filetype);
    err = MPI_File_close(&fh);
    if (err != MPI_SUCCESS) handle_error(err, "MPI_File_close");

    /* read back and check */
    if (rank == 0) {
        err = MPI_File_open(MPI_COMM_SELF, filename, MPI_MODE_RDONLY, MPI_INFO_NULL, &fh);
	if (err != MPI_SUCCESS) handle_error(err, "MPI_File_open");
        err = MPI_File_read(fh,buf, MAXLEN, MPI_INT, &status);
	if (err != MPI_SUCCESS) handle_error(err, "MPI_File_read");
        err = MPI_File_close(&fh);
	if (err != MPI_SUCCESS) handle_error(err, "MPI_File_close");
        for (i=0; i<MAXLEN; i++) {
            if (buf[i] < 0) {
		nr_errors++;
                printf("Error: unexpected value for case %d at buf[%d] == %d\n",
			testcase,i,buf[i]);
	    }
	}
    }
    return nr_errors;
}
Beispiel #5
0
int
getparams(int argc, char *argv[], pparams *params, FILE **gridfile, 
      FILE **statusfile, MPI_Datatype *pparams_dt, int rank)
{
   MPI_Aint pparams_displ[NUM_PARAMS];
   int   arg;

   /* Compute the displacements necessary to create a new MPI datatype. */
   pparams_displ[0] = (size_t)&(params->dx) - (size_t)params;
   pparams_displ[1] = (size_t)&(params->dt) - (size_t)params;
   pparams_displ[2] = (size_t)&(params->D) - (size_t)params;
   pparams_displ[3] = (size_t)&(params->ntotal) - (size_t)params;
   pparams_displ[4] = (size_t)&(params->ttotal) - (size_t)params;
   pparams_displ[5] = (size_t)&(params->l) - (size_t)params;
   pparams_displ[6] = (size_t)&(params->h) - (size_t)params;
   pparams_displ[7] = (size_t)&(params->freq) - (size_t)params;

   /* Create new MPI datatype. */
   MPI_Type_create_struct(NUM_PARAMS, 
         pparams_blength,
         pparams_displ,
         pparams_type,
         pparams_dt);

   MPI_Type_commit(pparams_dt);

   /* Only rank 0 has to parse the parameters. */
   if (rank > 0)
      return EX_OK;

   params->dx = -1;
   params->dt = -1;
   params->D = -1;
   params->l = 0;
   params->h = 0;
   params->freq = -1;
   *gridfile = NULL;

   while ((arg = getopt(argc, argv, "x:D:t:f:s:h:l:g:")) != -1) {
      switch (arg) {
         case 'x':
            params->dx = (grid_type)strtof(optarg, NULL);
            break;
         case 'D':
            params->D = (grid_type)strtof(optarg, NULL);
            break;
         case 't':
            params->dt = (grid_type)strtof(optarg, NULL);
            break;
         case 'g':
            if ((*gridfile = fopen(optarg, "w")) == NULL) 
               return EX_CANTCREAT;
            break;
         case 's':
            if ((*statusfile = fopen(optarg, "a")) == NULL) 
               return EX_CANTCREAT;
            break;
         case 'l':
            params->l = (int)strtol(optarg, NULL, 10);
            break;
         case 'h':
            params->h = (int)strtol(optarg, NULL, 10);
            break;
         case 'f':
            params->freq = (int)strtol(optarg, NULL, 10);
            break;
         default:
            usage();
      }
   }
   argc -= optind;
   argv += optind;

   /* Although this could be computed every time, we prefer storing the values.
   */
   params->ntotal = (int)(1 / params->dx);
   params->ttotal = (int)(1 / params->dt);

   /* Do some sanity check. */
   if (params->ntotal < 1) {
      warnx("ntotal > 1");
      usage();
   }
   if (params->D < 0) {
      warnx("D >= 0");
      usage();
   }   
   if (*gridfile == NULL) {
      warnx("Could not open a file to store grid points.");
      usage();
   }
   if (params->l == 0 || params->h == 0) {
      warnx("please specify the processor dimensions of the Grid.");
      usage();
   }
   if (params->freq < 0) {
      warnx("frequency >= 0");
      usage();
   }


   return EX_OK;
}
void compute_process(int agents_total, int nreps, int world_width, int world_height)
{
	int np, pid;
	MPI_Comm_rank(MPI_COMM_WORLD, &pid);
	MPI_Comm_size(MPI_COMM_WORLD, &np);
	int server_process = np - 1;
	MPI_Status status;

	/* create a type for struct agent */
	const int nitems=5;
   	int blocklengths[5] = {1,1,1,1,1};
   	MPI_Datatype types[5] = {MPI_INT, MPI_INT, MPI_INT, MPI_FLOAT, MPI_FLOAT};
	MPI_Datatype mpi_agent_type;
	MPI_Aint offsets[5];

	offsets[0] = offsetof(agent, id);
    	offsets[1] = offsetof(agent, x);
    	offsets[2] = offsetof(agent, y);
    	offsets[3] = offsetof(agent, z);
    	offsets[4] = offsetof(agent, w);

	MPI_Type_create_struct(nitems, blocklengths, offsets, types, &mpi_agent_type);
	MPI_Type_commit(&mpi_agent_type);

	unsigned int num_bytes = agents_total * sizeof(float4);
	unsigned int num_halo_points = RADIO * world_width;
	unsigned int num_halo_bytes = num_halo_points * sizeof(short int);

	//unsigned int world_node_height = (world_height / (np-1)) + (RADIO * 2);
	//if(pid == 0 or pid == np - 2)
	//	world_node_height -= RADIO;
 
	size_t size_world = world_width * world_height * sizeof(short int);
	short int *h_world = (short int *)malloc(size_world);
	*h_world = 0;
	short int *d_world;

	for(int j = 0; j < world_width * world_height; j++)
	{	
		h_world[j] = 0;
	}

	/* alloc host memory */
	agent *h_agents_in = (agent *)malloc(num_bytes);
	//agent *d_agents_in;
	float4 *h_agents_pos;
	float4 *d_agents_pos;
	
	
	//MPI_Recv(rcv_address, num_points, MPI_FLOAT, server_process, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
	MPI_Recv(h_agents_in, agents_total, mpi_agent_type, server_process, 0, MPI_COMM_WORLD, &status);

	//Iniatialize world
	for( int i = 0; i < agents_total; i++)
	{
		h_world[(world_width * (h_agents_in[i].y - 1) ) + h_agents_in[i].x] = (h_agents_in[i].x!=0?1:0);
		//if(h_world[(world_width * (h_agents_in[i].y - 1) ) + h_agents_in[i].x] == 1)
			//printf("world x: %d, y: %d\n", h_agents_in[i].x, h_agents_in[i].y);	
		h_agents_pos[i].x = h_agents_in[i].x;
		h_agents_pos[i].y = h_agents_in[i].y;
		h_agents_pos[i].z = h_agents_in[i].z;
		h_agents_pos[i].w = h_agents_in[i].w;
	}

/***
	if(pid ==1)
{
	int k=0;
	for(int j = 0; j < world_width * world_height; j++)
	{	
		if ( j%96 == 0 and j>0)
		{
			k++;
			printf("%d row: %d\n", h_world[j], k);
		}
		else
			printf("%d ", h_world[j]);
	}
}
***/

	// Error code to check return values for CUDA calls
        cudaError_t err = cudaSuccess;

	// Allocate the device pointer
    	err = cudaMalloc((void **)&d_world, size_world);

	if (err != cudaSuccess)
	{
        	fprintf(stderr, "Failed to allocate device pointer (error code %s)!\n", cudaGetErrorString(err));
        	exit(EXIT_FAILURE);
        }

	err = cudaMemcpy(d_world, h_world, size_world, cudaMemcpyHostToDevice);

	if (err != cudaSuccess)
    	{
        	fprintf(stderr, "Failed to copy pointer from host to device (error code %s)!\n", cudaGetErrorString(err));
        	exit(EXIT_FAILURE);
    	}


	//http://cuda-programming.blogspot.com.es/2013/02/cuda-array-in-cuda-how-to-use-cuda.html
	//http://stackoverflow.com/questions/17924705/structure-of-arrays-vs-array-of-structures-in-cuda
	// Allocate the device pointer

    	err = cudaMalloc((void **)&d_agents_pos, num_bytes);

	if (err != cudaSuccess)
	{
        	fprintf(stderr, "Failed to allocate device pointer (error code %s)!\n", cudaGetErrorString(err));
        	exit(EXIT_FAILURE);
        }

	err = cudaMemcpy(d_agents_pos, h_agents_pos, num_bytes, cudaMemcpyHostToDevice);

	if (err != cudaSuccess)
    	{
        	fprintf(stderr, "Failed to copy pointer from host to device (error code %s)!\n", cudaGetErrorString(err));
        	exit(EXIT_FAILURE);
    	}


	launch_kernel(d_agents_pos, d_world, world_width, world_height );

	MPI_Barrier( MPI_COMM_WORLD);

#ifdef DEBUG
//	printf("pid: %d\n", pid);
//	display_data(h_agents_in, agents_total );
#endif

	MPI_Send(h_agents_in, agents_total, mpi_agent_type, server_process, DATA_COLLECT, MPI_COMM_WORLD);


	/* Release resources */
	free(h_agents_in); 
/*	
	free(h_output);
	cudaFreeHost(h_left_boundary); cudaFreeHost(h_right_boundary);
	cudaFreeHost(h_left_halo); cudaFreeHost(h_right_halo);
	cudaFree(d_input); cudaFree(d_output);
*/
}
Beispiel #7
0
/**
 * @brief Island-based genetic algorithm model running in different modes: Sequential, CPU or GPU only and Heterogeneous (full cooperation between all available OpenCL devices)
 * @param subpops The initial subpopulations
 * @param devicesObject Structure containing the OpenCL variables of a device
 * @param trDataBase The training database which will contain the instances and the features
 * @param selInstances The instances choosen as initial centroids
 * @param conf The structure with all configuration parameters
 */
void agIslands(Individual *const subpops, CLDevice *const devicesObject, const float *const trDataBase, const int *const selInstances, const Config *const conf) {


	/********** MPI variables ***********/

	MPI_Datatype Individual_MPI_type;
	MPI_Datatype array_of_types[3] = {MPI_UNSIGNED_CHAR, MPI_FLOAT, MPI_INT};
	int array_of_blocklengths[3] = {conf -> nFeatures, conf -> nObjectives + 1, 2};
	MPI_Aint array_of_displacement[3];
	MPI_Status status;


	/******* Measure and start the master-worker algorithm *******/

	MPI_Barrier(MPI_COMM_WORLD);


	/******* Each process dinamically will request subpopulations *******/

	// Master
	if (conf -> mpiRank == 0) {
		double timeStart = omp_get_wtime();
		int *nIndsFronts0 = new int[conf -> nSubpopulations];
		int finalFront0;

		// The master receives the number of subpopulations that each worker can process
		int workerCapacities[conf -> mpiSize - 1];
		MPI_Request requests[conf -> mpiSize - 1];
		for (int p = 1; p < conf -> mpiSize; ++p) {
			MPI_Irecv(&workerCapacities[p - 1], 1, MPI_INT, p, MPI_ANY_TAG, MPI_COMM_WORLD, &requests[p - 1]);
		}

		// The "Individual" datatype must be converted to a MPI datatype and commit it
		array_of_displacement[0] = (size_t) &(subpops[0].chromosome[0]) - (size_t) &(subpops[0]);
		array_of_displacement[1] = (size_t) &(subpops[0].fitness[0]) - (size_t) &(subpops[0]);
		array_of_displacement[2] = (size_t) &(subpops[0].rank) - (size_t) &(subpops[0]);

		MPI_Type_create_struct(3, array_of_blocklengths, array_of_displacement, array_of_types, &Individual_MPI_type);
		MPI_Type_commit(&Individual_MPI_type);

		MPI_Waitall(conf -> mpiSize - 1, requests, MPI_STATUSES_IGNORE);
		int maxChunk = std::min(*std::max_element(workerCapacities, workerCapacities + conf -> mpiSize - 1), conf -> nSubpopulations);


		/********** In each migration the individuals are exchanged between subpopulations of different nodes  ***********/

		for (int gMig = 0; gMig < conf -> nGlobalMigrations; ++gMig) {

			// Send some work to the workers
			int nextWork = 0;
			int sent = 0;
			int mpiTag = (gMig == 0) ? INITIALIZE : IGNORE_VALUE;
			for (int p = 1; p < conf -> mpiSize && nextWork < conf -> nSubpopulations; ++p) {
				int finallyWork = std::min(workerCapacities[p - 1], conf -> nSubpopulations - nextWork);
				int popIndex = nextWork * conf -> familySize;
				MPI_Isend(subpops + popIndex, finallyWork * conf -> familySize, Individual_MPI_type, p, mpiTag, MPI_COMM_WORLD, &requests[p - 1]);
				nextWork += finallyWork;
				++sent;
			}
			MPI_Waitall(sent, requests, MPI_STATUSES_IGNORE);

			// Dynamically distribute the subpopulations
			int receivedWork = 0;
			int receivedPtr = 0;
			while (nextWork < conf -> nSubpopulations) {
				MPI_Recv(subpops + (receivedPtr * conf -> familySize), maxChunk * conf -> familySize, Individual_MPI_type, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
				MPI_Recv(nIndsFronts0 + receivedPtr, maxChunk, MPI_INT, status.MPI_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
				MPI_Get_count(&status, MPI_INT, &receivedWork);
				receivedPtr += receivedWork;
				int finallyWork = std::min(workerCapacities[status.MPI_SOURCE - 1], conf -> nSubpopulations - nextWork);
				int popIndex = nextWork * conf -> familySize;
				MPI_Send(subpops + popIndex, finallyWork * conf -> familySize, Individual_MPI_type, status.MPI_SOURCE, mpiTag, MPI_COMM_WORLD);
				nextWork += finallyWork;
			}

			// Receive the remaining work
			while (receivedPtr < conf -> nSubpopulations) {
				MPI_Recv(subpops + (receivedPtr * conf -> familySize), maxChunk * conf -> familySize, Individual_MPI_type, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
				MPI_Recv(nIndsFronts0 + receivedPtr, maxChunk, MPI_INT, status.MPI_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
				MPI_Get_count(&status, MPI_INT, &receivedWork);
				receivedPtr += receivedWork;
			}

			// Migration process between subpopulations of different nodes
			if (gMig != conf -> nGlobalMigrations - 1 && conf -> nSubpopulations > 1) {
				migration(subpops, conf -> nSubpopulations, nIndsFronts0, conf);

				#pragma omp parallel for
				for (int sp = 0; sp < conf -> nSubpopulations; ++sp) {
					int popIndex = sp * conf -> familySize;

					// The crowding distance of the subpopulation is initialized again for the next nonDominationSort
					for (int i = popIndex;  i < popIndex + conf -> subpopulationSize; ++i) {
						subpops[i].crowding = 0.0f;
					}
					nonDominationSort(subpops + popIndex, conf -> subpopulationSize, conf);
				}
			}
		}

		// Notify to all workers that the work has finished
		for (int p = 1; p < conf -> mpiSize; ++p) {
			  MPI_Isend(0, 0, MPI_INT, p, FINISH, MPI_COMM_WORLD, &requests[p - 1]);
		}


		/********** Recombination process ***********/

		if (conf -> nSubpopulations > 1) {
			for (int sp = 0; sp < conf -> nSubpopulations; ++sp) {
				memcpy(subpops + (sp * conf -> subpopulationSize), subpops + (sp * conf -> familySize), conf -> subpopulationSize * sizeof(Individual));
			}

			// The crowding distance of the subpopulation is initialized again for the next nonDominationSort
			#pragma omp parallel for
			for (int i = 0;  i < conf -> worldSize; ++i) {
				subpops[i].crowding = 0.0f;
			}
			finalFront0 = std::min(conf -> subpopulationSize, nonDominationSort(subpops, conf -> worldSize, conf));
		}
		else {
			finalFront0 = nIndsFronts0[0];
		}

		// All process must reach this point in order to provide a real time measure
		MPI_Waitall(conf -> mpiSize - 1, requests, MPI_STATUSES_IGNORE);
		MPI_Barrier(MPI_COMM_WORLD);
		fprintf(stdout, "%.10g\n", (omp_get_wtime() - timeStart) * 1000.0);

		// Get the hypervolume
		fprintf(stdout, "%.6g\n", getHypervolume(subpops, finalFront0, conf));

		// Generation of the data file for Gnuplot
		generateDataPlot(subpops, finalFront0, conf);

		// Exclusive variables used by the master are released
		delete[] nIndsFronts0;
		MPI_Type_free(&Individual_MPI_type);
	}

	// Workers
	else {
		// This is only for sequential benchmark
		const bool isSequential = (conf -> nDevices == 0 && conf -> ompThreads < 2);
		const int nDevices = (isSequential) ? conf -> nSubpopulations : std::max(1, conf -> nDevices + (conf -> ompThreads > 0));
		int nChildren[nDevices];
		int nIndsFronts0[nDevices];
		MPI_Request requests[2];

		// The worker tells to the master how many subpopulations can be processed
		MPI_Isend(&nDevices, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &(requests[0]));
		MPI_Request_free(&(requests[0]));

		// Each worker will compute as many subpopulations as OpenCL devices at most
		Individual *subpops = new Individual[nDevices * conf -> familySize];

		// Create MPI datatype for the individuals and commit it
		array_of_displacement[0] = (size_t) &(subpops[0].chromosome[0]) - (size_t) &(subpops[0]);
		array_of_displacement[1] = (size_t) &(subpops[0].fitness[0]) - (size_t) &(subpops[0]);
		array_of_displacement[2] = (size_t) &(subpops[0].rank) - (size_t) &(subpops[0]);

		MPI_Type_create_struct(3, array_of_blocklengths, array_of_displacement, array_of_types, &Individual_MPI_type);
		MPI_Type_commit(&Individual_MPI_type);

		// The worker receives as many subpopulations as number of OpenCL devices at most
		MPI_Recv(subpops, nDevices * conf -> familySize, Individual_MPI_type, 0, MPI_ANY_TAG, MPI_COMM_WORLD, &status);

		while (status.MPI_TAG != FINISH) {
			int receivedWork;
			MPI_Get_count(&status, Individual_MPI_type, &receivedWork);
			int nSubpopulations = receivedWork / conf -> familySize;
			int nThreads = (isSequential) ? 1 : std::min(nDevices, nSubpopulations);

			if (status.MPI_TAG == INITIALIZE) {


				/********** Multi-objective individuals evaluation over all subpopulations ***********/

				omp_set_nested(1);
				#pragma omp parallel for num_threads(nThreads) schedule(dynamic, 1)
				for (int sp = 0; sp < nSubpopulations; ++sp) {
					int popIndex = sp * conf -> familySize;
					if (isSequential) {
						evaluationCPU(subpops + popIndex, conf -> subpopulationSize, trDataBase, selInstances, 1, conf);
					}
					else if (nSubpopulations == 1) {
						evaluationHET(subpops + popIndex, conf -> subpopulationSize, devicesObject, nDevices, trDataBase, selInstances, conf);
					}
					else {
						evaluationHET(subpops + popIndex, conf -> subpopulationSize, &devicesObject[omp_get_thread_num()], 1, trDataBase, selInstances, conf);
					}

					// Fitness normalization
					normalizeFitness(subpops + popIndex, conf -> subpopulationSize, conf);
				}


				/********** Sort each subpopulation with the "Non-Domination-Sort" method ***********/

				#pragma omp parallel for
				for (int sp = 0; sp < nSubpopulations; ++sp) {
					int popIndex = sp * conf -> familySize;
					nIndsFronts0[sp] = nonDominationSort(subpops + popIndex, conf -> subpopulationSize, conf);
				}
			}


			/********** In each migration the individuals are exchanged between subpopulations of the same node  ***********/

			int nLocalMigrations = (nSubpopulations > 1) ? conf -> nLocalMigrations : 1;
			for (int lMig = 0; lMig < nLocalMigrations; ++lMig) {


				/********** Start the evolution process ***********/

				for (int g = 0; g < conf -> nGenerations; ++g) {


					/********** Fill the mating pool and perform crossover ***********/

					#pragma omp parallel for
					for (int sp = 0; sp < nSubpopulations; ++sp) {
						const int *const pool = getPool(conf);
						int popIndex = sp * conf -> familySize;	
						nChildren[sp] = crossoverUniform(subpops + popIndex, pool, conf);

						// Local resources used are released
						delete[] pool;
					}


					/********** Multi-objective individuals evaluation over all subpopulations ***********/

					#pragma omp parallel for num_threads(nThreads) schedule(dynamic, 1)
					for (int sp = 0; sp < nSubpopulations; ++sp) {
						int popIndex = sp * conf -> familySize;
						if (isSequential) {
							evaluationCPU(subpops + popIndex + conf -> subpopulationSize, nChildren[sp], trDataBase, selInstances, 1, conf);
						}
						else if (nSubpopulations == 1) {
							evaluationHET(subpops + popIndex + conf -> subpopulationSize, nChildren[sp], devicesObject, nDevices, trDataBase, selInstances, conf);
						}
						else {
							evaluationHET(subpops + popIndex + conf -> subpopulationSize, nChildren[sp], &devicesObject[omp_get_thread_num()], 1, trDataBase, selInstances, conf);
						}

						// Fitness normalization
						normalizeFitness(subpops + popIndex + conf -> subpopulationSize, nChildren[sp], conf);
					}


					/********** The crowding distance of the parents is initialized again for the next nonDominationSort ***********/

					#pragma omp parallel for
					for (int sp = 0; sp < nSubpopulations; ++sp) {
						int popIndex = sp * conf -> familySize;
						for (int i = popIndex;  i < popIndex + conf -> subpopulationSize; ++i) {
							subpops[i].crowding = 0.0f;
						}

						// Replace subpopulation
						// Parents and children are sorted by rank and crowding distance.
						// The first "subpopulationSize" individuals will advance the next generation
						nIndsFronts0[sp] = nonDominationSort(subpops + popIndex, conf -> subpopulationSize + nChildren[sp], conf);
					}
				}

				// Migration process between subpopulations of the same node
				if (lMig != nLocalMigrations - 1 && nSubpopulations > 1) {
					migration(subpops, nSubpopulations, nIndsFronts0, conf);

					#pragma omp parallel for
					for (int sp = 0; sp < nSubpopulations; ++sp) {
						int popIndex = sp * conf -> familySize;

						// The crowding distance of the subpopulation is initialized again for the next nonDominationSort
						for (int i = popIndex;  i < popIndex + conf -> subpopulationSize; ++i) {
							subpops[i].crowding = 0.0f;
						}
						nonDominationSort(subpops + popIndex, conf -> subpopulationSize, conf);
					}
				}
			}

			// The worker send to the master the subpopulations already evaluated and will request new work
			MPI_Isend(subpops, receivedWork, Individual_MPI_type, 0, 0, MPI_COMM_WORLD, &(requests[0]));
			MPI_Isend(nIndsFronts0, nSubpopulations, MPI_INT, 0, 0, MPI_COMM_WORLD, &(requests[1]));
			MPI_Waitall(2, requests, MPI_STATUSES_IGNORE);
			MPI_Recv(subpops, nDevices * conf -> familySize, Individual_MPI_type, 0, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
		}

		// All process must reach this point in order to provide a real time measure
		MPI_Barrier(MPI_COMM_WORLD);

		// Exclusive variables used by the workers are released
		delete[] subpops;
		MPI_Type_free(&Individual_MPI_type);
	}
}
Beispiel #8
0
int main(int argc, char ** argv){
  int my_id, root, ierr, num_procs;
  MPI_Status status;

  ierr = MPI_Init(&argc, &argv);//Creat processes
  ierr = MPI_Comm_rank(MPI_COMM_WORLD, &my_id);
  ierr = MPI_Comm_size(MPI_COMM_WORLD, &num_procs);

  /*Make MPI data type for Vars*/
  const int nitems=5;
  int blocklengths[5] = {1, 1, 1, 1, 1};
  MPI_Datatype types[5] = { MPI_DOUBLE, MPI_DOUBLE, MPI_DOUBLE, MPI_DOUBLE, MPI_DOUBLE};
  MPI_Datatype mpi_Vars;
  MPI_Aint offsets[5];

  offsets[0] = offsetof(Vars, mass);
  offsets[1] = offsetof(Vars, xvelocity);
  offsets[2] = offsetof(Vars, yvelocity);
  offsets[3] = offsetof(Vars, energy);
  offsets[4] = offsetof(Vars, press);

  MPI_Type_create_struct(nitems, blocklengths, offsets, types, &mpi_Vars);
  MPI_Type_commit(&mpi_Vars);
  /*start the program*/

  
  int N, type; N = num_procs*100;
  type = 1;
  int zones_to_do = N/num_procs;
  double dt; int count = 0;char str[80];

  FILE *fid, *finit;

  double dx = 1./(double)N;
  double t, T; t = 0.; T = .2;
  int num = 30;
  Vars * U = malloc((N+4)*(N+4)*sizeof(Vars)); init_sys(N+4, N+4, U, dx, dx, 1);
  if(my_id == 0){
    /*I am root*/
    
    finit = fopen("2Dinit.dat","w");
    Write_Cons(N+4, N+4, U, dx, dx, finit);
    fclose(finit);
    int count = 0;
    
  }
  while(t<T){
    //printf("before\n");
    dt = advance_system(N+4, N+4, U, dx, dx, my_id, zones_to_do, num_procs, mpi_Vars);
    t+=dt;    
    //break; 
    //printf("what time is it = %f\n", dt);
    /*Broadcast U*/
    ierr = MPI_Bcast(U, (N+4)*(N+4), mpi_Vars, 0, MPI_COMM_WORLD);
    /*
    if(my_id == 0){ 
      if( count % 1 == 0){
	sprintf(str, "T_%d.dat", count);
	fid = fopen(str, "w");
	Write_Cons(N+4, N+4, U, dx, dx, fid);
	fclose(fid);
	//printf("T=%f\n", t);
      }
      count += 1;
      }*/
  }
  if(my_id == 0){
    /*I am Root*/
    printf("%d\n", count);
    fid = fopen("22data.dat","w");
    Write_Cons(N+4, N+4, U, dx, dx, fid);
    fclose(fid);
  }
  free(U);
  MPI_Finalize();
}
Beispiel #9
0
void main(int argc, char **argv) {
	
	double start_t;
	double end_t;
	
	int my_rank, p, my_loc_rank, loc_p;
	complex *A;
	complex *B;
	complex *C;

	/* initialize MPI */
	MPI_Init(&argc, &argv);
	MPI_Comm_size(MPI_COMM_WORLD, &p);
	MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
	
	/* Create MPI Datatype for Complex */
    const float nitems=2;
    int          blocklengths[2] = {1,1};
    MPI_Datatype types[2] = {MPI_FLOAT, MPI_FLOAT};
    MPI_Aint     offsets[2];

    offsets[0] = offsetof(complex, r);
    offsets[1] = offsetof(complex, i);

    MPI_Type_create_struct(nitems, blocklengths, offsets, types, &mpi_complex);
    MPI_Type_commit(&mpi_complex);
	
	int workload = 512 / p;
	complex a[512*workload];
	complex b[512*workload];
	complex c[512*workload];
	
	
	/* Split the first two groups and a collector group*/
	if(my_rank == 0) {
		MPI_Comm_split(MPI_COMM_WORLD, 0, my_rank, &comm1);
		MPI_Comm_split(MPI_COMM_WORLD, MPI_UNDEFINED, my_rank, &comm2);
		MPI_Comm_split(MPI_COMM_WORLD, 0, my_rank, &commR2);
	} else if(my_rank < p/2) {
		MPI_Comm_split(MPI_COMM_WORLD, 0, my_rank, &comm1);
		MPI_Comm_split(MPI_COMM_WORLD, MPI_UNDEFINED, my_rank, &comm2);
		MPI_Comm_split(MPI_COMM_WORLD, MPI_UNDEFINED, my_rank, &commR2);
	} else {
		MPI_Comm_split(MPI_COMM_WORLD, MPI_UNDEFINED, my_rank-(p/2), &comm1);
		MPI_Comm_split(MPI_COMM_WORLD, 0, my_rank-(p/2), &comm2);	
		MPI_Comm_split(MPI_COMM_WORLD, 0, my_rank-(p/2), &commR2);
	}
	
	/* Split the group for the latter two tasks */
	/* All processors may participate, we only need one group for the two tasks */
	MPI_Comm_split(MPI_COMM_WORLD, 0, my_rank, &comm3);
	
	
	/* Initialize Data*/
	workload = 512 / (p/2);
	if(my_rank == 0) {
		A = malloc(512*512 * sizeof(complex));
		B = malloc(512*512 * sizeof(complex));
		C = malloc(512*512 * sizeof(complex));
		initialize_data(f1_name, A);
		start_t = MPI_Wtime();
	} else if(my_rank == p/2 || p == 1) {
		B = malloc(512*512 * sizeof(complex));
		initialize_data(f2_name, B);
	}
	
	if(my_rank < p/2) {
		MPI_Scatter(A, 512*workload, mpi_complex, 
		            a, 512*workload, mpi_complex,
		            0, comm1);
	} else {
		MPI_Scatter(B, 512*workload, mpi_complex, 
		            b, 512*workload, mpi_complex,
		            0, comm2);
	}
	
	/* 2D FFT on A */
	if(my_rank < p/2) {
	
		MPI_Comm_rank(comm1, &my_loc_rank);
		MPI_Comm_size(comm1, &loc_p);
		
		execute_fft(a, 1, loc_p, my_loc_rank);
		MPI_Gather(a, 512*workload, mpi_complex,
			       A, 512*workload, mpi_complex,
			       0, comm1);
		if(my_loc_rank == 0) {
			transpose(A);
		}
		MPI_Scatter(A, 512*workload, mpi_complex, 
				    a, 512*workload, mpi_complex,
				    0, comm1);
		execute_fft(a, 1, loc_p, my_loc_rank);
		
	} else if(my_rank >= p/2 || p == 1) {
	
		/* 2D FFT on B */
		MPI_Comm_rank(comm2, &my_loc_rank);
		MPI_Comm_size(comm2, &loc_p);
		execute_fft(b, 1, loc_p, my_loc_rank);
		MPI_Gather(b, 512*workload, mpi_complex,
		  	       B, 512*workload, mpi_complex,
			       0, comm2);
		if(my_loc_rank == 0) {
			transpose(B);
		}
		MPI_Scatter(B, 512*workload, mpi_complex, 
			        b, 512*workload, mpi_complex,
			        0, comm2);
		execute_fft(b, 1, loc_p, my_loc_rank);
	}
	
	/* Multiplication Step */
	workload = 512 / p;
	sync_tasks(a, b, A, B, p, my_rank);
	MPI_Scatter(A, 512*workload, mpi_complex, 
			        b, 512*workload, mpi_complex,
			        0, comm3);
	MPI_Scatter(B, 512*workload, mpi_complex, 
			        b, 512*workload, mpi_complex,
			        0, comm3);
	execute_mm(a, b, c, p, my_rank);
	
	/* 2D FFT on C */
	execute_fft(c, -1, p, my_rank);
	MPI_Gather(c, 512*workload, mpi_complex,
			   C, 512*workload, mpi_complex,
			   0, comm3);
	if(my_rank == 0) {
		transpose(C);
	}
	MPI_Scatter(C, 512*workload, mpi_complex, 
			   c, 512*workload, mpi_complex,
			   0, comm3);
	execute_fft(c, -1, p, my_rank);
	MPI_Gather(c, 512*workload, mpi_complex,
			   C, 512*workload, mpi_complex,
			   0, comm3);
	
	
	end_t = MPI_Wtime();
	
	if(my_rank == 0) {
		output_data(f_out, C);
		printf("\nElapsed time = %g s\n", end_t - start_t);
		printf("--------------------------------------------\n");
		int i;
		for(i = 0; i < 512*512; i++) {
			free(&A[i]);
			free(&B[i]);
			free(&C[i]);
		}
	}
	
	MPI_Finalize();
}
Beispiel #10
0
int main(int argc, char* argv[]) {

	int* bodies_off;
	int* n_bodies_split;
	int n_local_bodies;
	const MPI_Comm comm = MPI_COMM_WORLD;
	FILE *inputf;
	FILE *outputf;
	double clockStart, clockEnd;
	int rc, n_proc, rank;

	rc = MPI_Init(&argc, &argv);
	if (rc != MPI_SUCCESS) {
		puts("MPI_Init failed");
		exit(-1);
	}

	MPI_Comm_size(comm, &n_proc);
	MPI_Comm_rank(comm, &rank);

	//creazione datatype per mpi!
	MPI_Datatype bodytype;
	MPI_Datatype type[6] = { MPI_LB, MPI_DOUBLE, MPI_DOUBLE, MPI_DOUBLE, MPI_DOUBLE, MPI_UB };
	int block_len[6] = {1, 1, 3, 3, 3, 1};
	MPI_Aint disp[6];
	leaf_t example[2];

	MPI_Get_address(&example[0], &disp[0]);
	MPI_Get_address(&(example[0].mass), &disp[1]);
	MPI_Get_address(&(example[0].pos), &disp[2]);
	MPI_Get_address(&(example[0].vel), &disp[3]);
	MPI_Get_address(&(example[0].acc), &disp[4]);
	MPI_Get_address(&(example[1].acc), &disp[5]);
//	int i;
//	for(i = 6; i >= 0; --i)
//		disp[i] -= disp[0];

	disp[1] = disp[1] - disp[0];
	disp[2] = disp[2] - disp[0];
	disp[3] = disp[3] - disp[0];
	disp[4] = disp[4] - disp[0];
	disp[5] = disp[5] - disp[0];



	MPI_Type_create_struct(6, block_len, disp, type, &bodytype);

	MPI_Type_commit(&bodytype);
	bodies_off = malloc((n_proc + 1) * sizeof(int));
	n_bodies_split = malloc((n_proc) * sizeof(int));
	bodies = malloc(nbodies * sizeof(node_t*));
	leafs = malloc(nbodies * sizeof(leaf_t));
	char* inputfile = argv[1];
	inputf = fopen(inputfile, "r");

	if (inputf == NULL) {
		printf("impossibile leggere da file");
		exit(1);
	}

	fscanf(inputf, "%d", &nbodies);
	fscanf(inputf, "%d", &steps);
	fscanf(inputf, "%lf", &dt);
	fscanf(inputf, "%lf", &eps);
	fscanf(inputf, "%lf", &tol);

	fclose(inputf);

	if (rank == 0) {
		int i;

		create_bodies();

		quicksort(0, nbodies - 1);

		//	bublesort();
		//	int i = 0;
		//	for (i = 0; i < nbodies; i++) {
		//		printf("%lf, %lf, %lf \n", bodies[i]->pos[0], bodies[i]->pos[1],
		//				bodies[i]->pos[2]);
		//	}
		n_local_bodies = nbodies / n_proc;

		//split delle particelle secondo shark & fish
		//		split_bodies(n_proc, bodies_off, n_bodies_split);
		//		n_local_bodies = n_bodies_split[rank];
		//
		//		MPI_Bcast(n_bodies_split, n_proc, MPI_INT, 0, comm);

		MPI_Bcast(leafs, nbodies, bodytype, 0, comm);

		dthf = 0.5 * dt;
		epssq = eps * eps;
		itolsq = 1.0 / (tol * tol);

		clockStart = MPI_Wtime();
		int step = 0;
		root = NULL;
		for (step = 0; step < steps; step++) {
			compute_center_and_diameter();

			root = malloc(sizeof(struct node_t)); // "new" is like "malloc"
			double mass_root = 0.0;

			root->type = 1;
			root->mass = &mass_root;
			root->pos = center;
			root->cell.childs[0] = NULL;
			root->cell.childs[1] = NULL;
			root->cell.childs[2] = NULL;
			root->cell.childs[3] = NULL;
			root->cell.childs[4] = NULL;
			root->cell.childs[5] = NULL;
			root->cell.childs[6] = NULL;
			root->cell.childs[7] = NULL;

			double radius = diameter * 0.5;

			int i = 0;
			for (i = 0; i < nbodies; i++) {
				insert(root, bodies[i], radius); // questo è il modo per passare i dati per riferimento... cioè mandare l'indirizzo della struttura puntata dal puntatore
			}
			curr = 0;
			compute_center_of_mass(&(*root));

			for (i = 0; i < n_local_bodies; i++) {
				compute_force(&(*root), &(*bodies[i]), diameter, step);
			}
			//		for (i = 0; i < nbodies; i++) {
			//		}

			deallocate_tree(root);

			//inserire all gather
			MPI_Allgather(leafs, n_local_bodies, bodytype, leafs,
					n_local_bodies, bodytype, comm);

			for (i = 0; i < nbodies; i++) {
				advance(&(*bodies[i]));
			}

			//		int p = 0;
			//		for (p = 0; p < nbodies; p++)
			//			printf("%lf, %lf, %lf \n", bodies[p]->pos[0], bodies[p]->pos[1],
			//					bodies[p]->pos[2]);
			//		printf("*************************************** \n");
		}
		//	int i = 0;
		// dopo l'esecuzione!!
		//		int proc_rec = 1;
		//		while (proc_rec < n_proc) {
		//			MPI_Status status;
		//			int proc_rank;
		//			int cap = nbodies / n_proc;
		//			node_t temp[cap];
		//			MPI_Recv(temp, cap, bodytype, MPI_ANY_SOURCE, MPI_ANY_TAG, comm,
		//					&status);
		//			proc_rank = status.MPI_SOURCE;
		//
		//			int idx = 0;
		//			for (idx = proc_rec * (cap); idx < cap; idx++)
		//				*bodies[idx] = temp[idx];
		//			proc_rec++;
		//		}
		clockEnd = MPI_Wtime();
		if (nbodies == 16384) {
			system("echo 'Host:' `hostname` >> output16384 ");
			outputf = fopen("output16384", "a");
			fprintf(outputf, "Tempo di esecuzione: %lf \n", clockEnd
					- clockStart);
			for (i = 0; i < nbodies; i++) {
				fprintf(outputf, "%lf, %lf, %lf \n", bodies[i]->pos[0],
						bodies[i]->pos[1], bodies[i]->pos[2]);
			}
		} else if (nbodies == 32768) {
			system("echo 'Host:' `hostname` >> output32768 ");
			outputf = fopen("output32768", "a");
			fprintf(outputf, "Tempo di esecuzione: %lf \n", clockEnd
					- clockStart);
			for (i = 0; i < nbodies; i++) {
				fprintf(outputf, "%lf, %lf, %lf \n", bodies[i]->pos[0],
						bodies[i]->pos[1], bodies[i]->pos[2]);
			}
		} else if (nbodies == 65536) {
			system("echo 'Host:' `hostname` >> output65536 ");
			outputf = fopen("output65536", "a");
			fprintf(outputf, "Tempo di esecuzione: %lf \n", clockEnd
					- clockStart);
			for (i = 0; i < nbodies; i++) {
				fprintf(outputf, "%lf, %lf, %lf \n", bodies[i]->pos[0],
						bodies[i]->pos[1], bodies[i]->pos[2]);
			}
		} else {
			system("echo 'Host:' `hostname` >> output ");
			outputf = fopen("output", "a");
			fprintf(outputf, "Tempo di esecuzione: %lf \n", clockEnd
					- clockStart);
			for (i = 0; i < nbodies; i++) {
				fprintf(outputf, "%lf, %lf, %lf \n", bodies[i]->pos[0],
						bodies[i]->pos[1], bodies[i]->pos[2]);
			}
		}

		fflush(outputf);
		fclose(outputf);
		printf("Esecuzione completata\n");

	} else {

		int low = 1, up = 0;
		int i;
		dthf = 0.5 * dt;
		epssq = eps * eps;
		itolsq = 1.0 / (tol * tol);

		//	if (PAPI_library_init(PAPI_VER_CURRENT) != PAPI_VER_CURRENT) {
		//		printf("Inizializzazione della libreria di papi fallita \n");
		//		exit(1);
		//	}
		//
		//	if (PAPI_create_eventset(&event_set) != PAPI_OK) {
		//		printf("E' andata a male la creazione dell'eventSet \n");
		//		exit(1);
		//	}
		//
		//	if (PAPI_add_events(event_set, events, 2) != PAPI_OK) {
		//		printf("E' andata a male l'aggiunta degli eventi\n");
		//		exit(1);
		//	}

		n_local_bodies = nbodies / n_proc;
		MPI_Bcast(leafs, nbodies, bodytype, 0, comm);
		int step = 0;
		root = NULL;

		low += (rank * n_local_bodies);

		up = low + n_local_bodies;

		//	PAPI_start(event_set);
		//	clockStart = PAPI_get_real_usec();
		for (step = 0; step < steps; step++) {
			compute_center_and_diameter();

			root = malloc(sizeof(struct node_t)); // "new" is like "malloc"

			root->type = 1;
			*(root->mass) = 0.0;
			root->pos = center;
			root->cell.childs[0] = NULL;
			root->cell.childs[1] = NULL;
			root->cell.childs[2] = NULL;
			root->cell.childs[3] = NULL;
			root->cell.childs[4] = NULL;
			root->cell.childs[5] = NULL;
			root->cell.childs[6] = NULL;
			root->cell.childs[7] = NULL;

			double radius = diameter * 0.5;

			for (i = 0; i < nbodies; i++) {
				bodies[i] = malloc(sizeof(node_t));
				bodies[i]->cell.leaf = &leafs[i];
				bodies[i]->mass = &leafs[i].mass;
				bodies[i]->pos = leafs[i].pos;
				insert(&(*root), &(*bodies[i]), radius); // questo è il modo per passare i dati per riferimento... cioè mandare l'indirizzo della struttura puntata dal puntatore
			}
			curr = 0;
			compute_center_of_mass(&(*root));

			for (i = low; i < up; i++) {
				compute_force(&(*root), &(*bodies[i]), diameter, step);
			}
			//		for (i = 0; i < nbodies; i++) {
			//		}

			deallocate_tree(root);

			local_leafs = &leafs[low];
			//inserire all_gather
			MPI_Allgather(local_leafs, up - low, bodytype, leafs, up - low,
					bodytype, comm);

			for (i = 0; i < nbodies; i++) {
				advance(&(*bodies[i]));
			}
			//		int p = 0;
			//		for (p = 0; p < nbodies; p++)
			//			printf("%lf, %lf, %lf \n", bodies[p]->pos[0], bodies[p]->pos[1],
			//					bodies[p]->pos[2]);
			//		printf("*************************************** \n");
		}
		//	clockEnd = PAPI_get_real_usec();
		//	PAPI_stop(event_set, values);
		//	int i = 0;
		//		MPI_Send(bodies[low], up - low + 1, bodytype, 0, MPI_ANY_TAG, comm);

	}

	MPI_Finalize();
	return 0;
}
Beispiel #11
0
int main(int argc, char **argv) {
    size_t dimensions;
    size_t i, j;
    scanf("%zu", &dimensions);
    struct complex *matrix = calloc(sizeof(struct complex), dimensions * dimensions);

    struct complex temp;
    for (i = 0; i < dimensions; ++i) {
        for (j = 0; j < dimensions; ++j) {
            scanf("%lf", &temp.re);
            scanf("%lf", &temp.im);
            temp.x = (int) i;
            temp.y = (int) j;
            matrix[i * dimensions + j] = temp;
        }
    }

    int counter, size;
    double begin, end;
    begin = omp_get_wtime();
    MPI_Init(&argc, &argv);
    MPI_Datatype complex_t;
    MPI_Datatype type[4] = {MPI_DOUBLE, MPI_DOUBLE, MPI_INT, MPI_INT};
    int blocklen[4] = {1, 1, 1, 1};
    //*because readability is our main concern*//*
    MPI_Aint disp[4];
    MPI_Type_create_struct(4, blocklen, disp, type, &complex_t);
    MPI_Type_commit(&complex_t);
    MPI_Comm_rank(MPI_COMM_WORLD, &counter);
    MPI_Comm_size(MPI_COMM_WORLD, &size);
    printf("%d %d", counter, size);
    struct complex thread_min = matrix[0];
    thread_min.x = counter;
    thread_min.y = 0;
    struct complex thread_max = matrix[0];
    thread_max.x = counter;
    thread_max.y = 0;
    for (i = (size_t) counter; i < dimensions; i += size) {
        for (j = 0; j < dimensions; ++j) {
            if (length(matrix[i * dimensions + j]) < length(thread_min)) {
                thread_min = matrix[i * dimensions + j];
            }
            if (length(matrix[i * dimensions + j]) > length(thread_max)) {
                thread_max = matrix[i * dimensions + j];
            }
        }
    }
    if (counter != 0) {
        MPI_Send(&thread_min, 1, complex_t, 0, 0, MPI_COMM_WORLD);
        MPI_Send(&thread_max, 1, complex_t, 0, 0, MPI_COMM_WORLD);
    }
    if (counter == 0) {
        struct complex min = thread_min;
        struct complex max = thread_max;
        for (i = 1; i < size; ++i) {
            MPI_Recv(&thread_min, 1, complex_t, 0, 0, MPI_COMM_WORLD, NULL);
            MPI_Recv(&thread_max, 1, complex_t, 0, 0, MPI_COMM_WORLD, NULL);
            printf("%.2f+i*%.2f", thread_min.re, thread_min.im);
            printf("%.2f+i*%.2f", thread_max.re, thread_max.im);
            if (length(thread_min) < length(min)) {
                min = thread_min;
            }
            if (length(thread_max) > length(max)) {
                max = thread_max;
            }
        }
        printf("max complex number %.2f+i*%.2f position x:%d y:%d \n", max.re, max.im,
               max.x, max.y);

        printf("min complex number %.2f+i*%.2f position x:%d, y:%d \n", min.re, min.im,
               min.x,
               min.y);
    }
    MPI_Finalize();
    end = omp_get_wtime();
    printf("execution time: %f\n", end - begin);
    free(matrix);
    return 0;
}
FORT_DLL_SPEC void FORT_CALL mpi_type_create_struct_ ( MPI_Fint *v1, MPI_Fint v2[], MPI_Aint * v3, MPI_Fint v4[], MPI_Fint *v5, MPI_Fint *ierr ){
    *ierr = MPI_Type_create_struct( *v1, v2, v3, (MPI_Datatype *)(v4), (MPI_Datatype *)(v5) );
}
Beispiel #13
0
/**
 * main function
 * divided to two brances for master & slave processors respectively
 * @param argc commandline argument count
 * @param argv array of commandline arguments
 * @return 0 if success
 */
int main(int argc, char* argv[])
{
    	int rank;
	int size;
    	int num_clusters;
    	int num_points;
	int dex;
	int job_size;
	int job_done=0;
	
	Point* centroids;
	Point* points;
	Point* received_points;
	int  * slave_clusters;
	int  * former_clusters;
	int  * latter_clusters;
    	
	MPI_Init(&argc, &argv);
	
	MPI_Status status;

    	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    	MPI_Comm_size(MPI_COMM_WORLD, &size);
	
	//creation of derived MPI structure
	MPI_Datatype MPI_POINT;
	MPI_Datatype type=MPI_DOUBLE;
	int blocklen=2;
	MPI_Aint disp=0;
	MPI_Type_create_struct(1,&blocklen,&disp,&type,&MPI_POINT);
	MPI_Type_commit(&MPI_POINT);

/******** MASTER PROCESSOR WORKS HERE******************************************************/ 
      
   	if(rank==MASTER)
  	{
		//inputting from file
		FILE *input;
    		input=fopen(argv[1],"r");
		readHeaders(input,&num_clusters,&num_points);
    		points=(Point*)malloc(sizeof(Point)*num_points);
		readPoints(input,points,num_points);
		fclose(input);

		//other needed memory locations
		former_clusters=(int*)malloc(sizeof(int)*num_points);
		latter_clusters=(int*)malloc(sizeof(int)*num_points);
		job_size=num_points/(size-1);
		centroids=malloc(sizeof(Point)*num_clusters);
		
		//reseting and initializing to default behaviour		
		initialize(centroids,num_clusters);
		resetData(former_clusters,num_points);
		resetData(latter_clusters,num_points);
		
		//Sending the essential data to slave processors
		for(dex=1;dex<size;dex++)
		{
			printf("Sending to [%d]\n",dex);
			MPI_Send(&job_size              ,1           , MPI_INT        ,dex,0,MPI_COMM_WORLD);
			MPI_Send(&num_clusters          ,1           , MPI_INT        ,dex,0,MPI_COMM_WORLD);
			MPI_Send(centroids              ,num_clusters, MPI_POINT      ,dex,0,MPI_COMM_WORLD);
			MPI_Send(points+(dex-1)*job_size,job_size    , MPI_POINT      ,dex,0,MPI_COMM_WORLD);
		}
    		printf("Sent!\n");

		MPI_Barrier(MPI_COMM_WORLD);

		//Main job of master processor is done here		
		while(1)
		{	
			MPI_Barrier(MPI_COMM_WORLD);
			
			printf("Master Receiving\n");
			for(dex=1;dex<size;dex++)
				MPI_Recv(latter_clusters+(job_size*(dex-1)),job_size,MPI_INT,dex,0,MPI_COMM_WORLD,&status);
			
			printf("Master Received\n");
			
			calculateNewCentroids(points,latter_clusters,centroids,num_clusters,num_points);
			printf("New Centroids are done!\n");
			if(checkConvergence(latter_clusters,former_clusters,num_points)==0)
			{
				printf("Converged!\n");
				job_done=1;
			}
			else    
			{
				printf("Not converged!\n");
				for(dex=0;dex<num_points;dex++)
					former_clusters[dex]=latter_clusters[dex];
			}
			
			//Informing slaves that no more job to be done
			for(dex=1;dex<size;dex++)
				MPI_Send(&job_done,1, MPI_INT,dex,0,MPI_COMM_WORLD);

			MPI_Barrier(MPI_COMM_WORLD);
			if(job_done==1)
				break;
	
			//Sending the recently created centroids			
			for(dex=1;dex<size;dex++)
				MPI_Send(centroids,num_clusters, MPI_POINT,dex,0, MPI_COMM_WORLD);

			MPI_Barrier(MPI_COMM_WORLD);
		}
		
		//Outputting to the output file		
		FILE* output=fopen(argv[2],"w");
		fprintf(output,"%d\n",num_clusters);
		fprintf(output,"%d\n",num_points);
		for(dex=0;dex<num_clusters;dex++)
			fprintf(output,"%lf,%lf\n",centroids[dex]._x,centroids[dex]._y);
		for(dex=0;dex<num_points;dex++)
			fprintf(output,"%lf,%lf,%d\n",points[dex]._x,points[dex]._y,latter_clusters[dex]+1);
		fclose(output);
	}
/*************END OF MASTER PROCESSOR'S BRANCH -- SLAVE PROCESSORS' JOB IS TO FOLLOW ************************/
	else
	{
		//Receiving the essential data
		printf("Receiving\n");
		MPI_Recv(&job_size    ,1           ,MPI_INT  ,MASTER,0,MPI_COMM_WORLD,&status);
		MPI_Recv(&num_clusters,1           ,MPI_INT  ,MASTER,0,MPI_COMM_WORLD,&status);
		centroids=malloc(sizeof(Point)*num_clusters);
		MPI_Recv(centroids    ,num_clusters,MPI_POINT,MASTER,0,MPI_COMM_WORLD,&status);
		printf("part_size =%d\n",job_size);
		received_points=(Point*)malloc(sizeof(Point)*job_size);
		slave_clusters=(int*)malloc(sizeof(int)*job_size);
		MPI_Recv(received_points,job_size,MPI_POINT      ,MASTER,0,MPI_COMM_WORLD,&status);
		printf("Received [%d]\n",rank);

		MPI_Barrier(MPI_COMM_WORLD);
		
		while(1)
		{
			printf("Calculation of new clusters [%d]\n",rank);
			for(dex=0;dex<job_size;dex++)
			{
				slave_clusters[dex]=whoIsYourDaddy(received_points[dex],centroids,num_clusters);
			}
			
			printf("sending to master [%d]\n",rank);
			MPI_Send(slave_clusters,job_size, MPI_INT,MASTER, 0, MPI_COMM_WORLD);
			MPI_Barrier(MPI_COMM_WORLD);
			MPI_Barrier(MPI_COMM_WORLD);
			MPI_Recv(&job_done,1, MPI_INT,MASTER,0,MPI_COMM_WORLD,&status);
					
			if(job_done==1) //No more work to be done
				break;
			
			//Receiving recently created centroids from master
			MPI_Recv(centroids,num_clusters,MPI_POINT,MASTER,0, MPI_COMM_WORLD,&status);

			MPI_Barrier(MPI_COMM_WORLD);
		}
	}
	//End of all	
	MPI_Finalize();
    	return 0;
}
int main(int argc, char *argv[]) {
	
	clock_t startTime, endTime;
	startTime = clock();

	int p, my_rank;

	/* initialize MPI stuff */
	MPI_Init(&argc, &argv);
	MPI_Comm_size(MPI_COMM_WORLD,&p);
	MPI_Comm_rank(MPI_COMM_WORLD,&my_rank);	

	srand(time(NULL));
	
	int row_num, nz, col_num, i;

	FILE *fp;
	fp = fopen("crs48x48.txt", "r");
	fscanf(fp, "%d", &nz);
	while (fgetc(fp) != '\n');

	fscanf(fp, "%d", &row_num);
	while (fgetc(fp) != '\n');

	fscanf(fp, "%d", &col_num);
	while (fgetc(fp) != '\n');

	printf("%d => NZ = %d\n",my_rank, nz);

	FILE *fpseed;
	int seed[p];
	

	//int *column_partition = (int *)malloc(sizeof(int)*col_num);
	int *column_ptr;
	int *hash_weights;

	int num_cols_per_process[p];
	int *YPartition = (int*)calloc(row_num, sizeof(int));
	int *YmaxPartition = (int*)calloc(row_num, sizeof(int));
	
	const int nitems 		=  2;
    int blocklengths[2] 	= {1, 1};
    MPI_Datatype types[2] 	= {MPI_INT, MPI_INT};
    MPI_Datatype mpi_pair;
    MPI_Aint offsets[2];

    offsets[0] = offsetof(pair, col);
    offsets[1] = offsetof(pair, nz);

    MPI_Type_create_struct(nitems, blocklengths, offsets, types, &mpi_pair);
    MPI_Type_commit(&mpi_pair);

    //printf("datatype created\n");
	pair A_partition_column[p];
	pair *A_partition[p];

	pair *my_columns;

	column_ptr = (int *)malloc(sizeof(int) * (col_num+1));
	// I need how many non-zeros in each column in the matrix data
	for (i=0; i <= col_num; i++) {
		fscanf(fp, "%d", &column_ptr[i]);
		while (fgetc(fp) != '\n');
	}
	//column_ptr[i] = nz;

	if (my_rank == 0) {	
		fpseed = fopen("seed48x48.txt", "r");
		for(i=0; i<p; i++)
		{
			fscanf(fpseed, "%d\n", &seed[i]);
			printf("seed[%d]: %d\n", i, seed[i]);
		}
		fclose(fpseed);

		int i;
		int prime_arr[prime_arr_len] = {  2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97,
											101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 181,
											191, 193, 197, 199, 211, 223, 227, 229, 233, 239, 241, 251, 257, 263, 269, 271, 277, 281,
											283, 293, 307, 311, 313, 317, 331, 337, 347, 349, 353, 359, 367, 373, 379, 383, 389, 397,
											401, 409, 419, 421, 431, 433, 439, 443, 449, 457, 461, 463, 467, 479, 487, 491, 499, 503,
											509, 521, 523, 541 
										 };

	
		hash_weights = (int *)malloc(sizeof(int)*row_num);
		genHashWeightsArr(hash_weights, row_num, prime_arr);

		/*for (i=0; i<row_num; i++) {
			printf("hashweights[%d]: %d\n",i,  hash_weights[i]);
		}*/

		int *current_column_rows = (int *)malloc(sizeof(int)*row_num);	
		//printf("check 1\n");
		HASHTABLE hash_columns;
		hash_columns = createHashtable(p, row_num);

		// read row_arr and insert in the hashtable for each column
		int j,c, flag;

		//insert seed cols
		for (c=0; c<p; c++) {
			j = seed[c];
			int nz_in_current_col = column_ptr[j+1] - column_ptr[j];
		
			fseek(fp, col_block_size*(col_num+1) + init_block_size*3 + rowVal_block_size*(column_ptr[j]), SEEK_SET);
			//printf("inserting\n");
			for (i=0; i<nz_in_current_col; i++) {
				fscanf(fp, "%d,", &current_column_rows[i]);
				while (fgetc(fp) != '\n');
			}
			
			hash_columns = insert_hash(hash_columns, current_column_rows, nz_in_current_col, j, p, hash_weights, row_num, c);
		}

		printf("\nSeeds:\n");
		print_hash(hash_columns, p);

		fseek(fp, col_block_size*(col_num+1) + init_block_size*3, SEEK_SET);

		//#pragma omp parallel for private(fp, j) num_threads(8)
		for (j=0; j<col_num; j++) {
			int nz_in_current_col = column_ptr[j+1] - column_ptr[j];
			flag =1;
			for (i=0; i<nz_in_current_col; i++) {
				fscanf(fp, "%d,", &current_column_rows[i]);
				while (fgetc(fp) != '\n');
			}
			//current_column_rows[i] = -1;
			/*if (j==0)
			{
				for (i=0; i<nz_in_current_col; i++) {
					printf("cur col[%d]: %d\n",i,  current_column_rows[i]);
				}
			*/	
			for(c =0 ; c<p; c++)
			{
				if(seed[c] == j)
				{
					flag = 0;
				}
			}
			
			if(flag == 1)
			{
				hash_columns = insert_hash(hash_columns, current_column_rows, nz_in_current_col, j, p, hash_weights, row_num, -1);
			}
				
			//}
		}
		// Load balancing
		//printf("inserted in hash\n");
		print_hash(hash_columns, p);

		// Generate a column-wise index storing the partition alloted to each column
		NODE temp;
		int max;

		#pragma omp parallel for num_threads(p)
		for (i=0; i<p; i++) {
			max = 0;
			A_partition_column[i].col = hash_columns->col_counts[i];
			A_partition[i] = (pair *)malloc(sizeof(pair)*A_partition_column[i].col);

			temp = hash_columns->buckets[i];
			for (j = 0; j < A_partition_column[i].col; j++) {
				A_partition[i][j].col = temp->col_index;
				A_partition[i][j].nz = temp->col_nz;
				if (temp->col_nz > max) {
					max = temp->col_nz;
				}
				temp = temp->next;
			}

			for (j=0; j<row_num; j++) {
				if(hash_columns->row_indices[i][j] > YmaxPartition[j]) {
					YmaxPartition[j] = hash_columns->row_indices[i][j];
					YPartition[j] = i;
				}
			}
			
			A_partition_column[i].nz = max;
		}		
	}

	// Broadcast the column-wise partition array
	MPI_Bcast(A_partition_column, p, mpi_pair, 0, MPI_COMM_WORLD);

	if (my_rank == 0) {
		my_columns = *A_partition;
	}
	else {
		my_columns = (pair *)malloc(sizeof(struct _pair)*A_partition_column[my_rank].col);
	}

	if (my_rank == 0) {
		for (i=1; i<p; i++) {
			MPI_Send(A_partition[i], A_partition_column[i].col, mpi_pair, i, 0, MPI_COMM_WORLD);
		}
	}
	else {
		MPI_Recv(my_columns, A_partition_column[my_rank].col, mpi_pair, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);			
	}

	MPI_Bcast(YPartition, row_num, MPI_INT, 0, MPI_COMM_WORLD);
	

	//check what recvd in mycolumns
	/*for(i=0; i<A_partition_column[my_rank].col; i++)
	{
		printf("Rank %d , Col no: %d, myNz : %d\n", my_rank, my_columns[i].col, my_columns[i].nz);
	}*/

	//partition_fp = (FILE **)malloc(sizeof(FILE*)*p);
	FILE *my_output;
	char f_name[20];


	int colIndex, myNz, rowIndex, j;
	float val;
	char *buffer;
	float *Y;
	
	Y = (float*)calloc(row_num, sizeof(float));

	
	//Read X
	FILE *fp2;
	fp2 = fopen("Xvector_algo2.txt", "r");
	int *X;
	X = (int*)malloc(sizeof(int)*A_partition_column[my_rank].col);
	
	printf("Rank %d recvd %d columns\n", my_rank, A_partition_column[my_rank].col);

	#pragma omp parallel for private(fp2, colIndex, i)
	for(i=0; i<A_partition_column[my_rank].col; i++)
	{
		colIndex = my_columns[i].col;
		fseek(fp2, colIndex*vector_block_size, SEEK_SET);
		fscanf(fp2, "%d\n", &X[i]);	
	}
  	fclose(fp2);

  	/*for(i=0; i<A_partition_column[my_rank].col; i++)
	{
		printf("Rank %d ::, X[%d] = %d\n",my_rank, i, X[i]);	
	}*/

	//for each column in A_partition_column[my_rank]...
	//Read non zeroes and multiply (computing local Y)...
	#pragma omp parallel for private(fp, colIndex, myNz, rowIndex, val)
	for(i=0; i<A_partition_column[my_rank].col; i++)
	{
		//printf("proc: %d, Operating on col %d \n", my_rank, colIndex);
		colIndex = my_columns[i].col;
		myNz = my_columns[i].nz;
		
		
		//seek to non-zeroes corresponding to this column in file
		fseek(fp, col_block_size*(col_num+1) + init_block_size*3 + rowVal_block_size*(column_ptr[colIndex]), SEEK_SET);
		
		//fread(buffer, myNz*rowVal_block_size,1,fp);
		//for each non zero...
		for(j=0; j<myNz; j++)
		{
			fscanf(fp, "%d, %f", &rowIndex, &val);
			while (fgetc(fp) != '\n');
			if(rowIndex>=row_num)
			{
				//printf("\n\n***********ERROR %d\n\n\n\n", rowIndex);
			}
			#pragma omp atomic
			Y[rowIndex]+= X[i]*val;
		}
	}
	//printf("end of loop: %d\n", my_rank);

	pairF *sendOthers[p];
	int numRowsInPartition[p], part;
	//numRowsInPartition = (int*) malloc(sizeof(int)*p);
	
	#pragma omp parallel for 
	for(i=0; i<row_num; i++)
	{
		numRowsInPartition[i] = 0;
	}

/*	for(i=0; i<row_num; i++)
	{
		printf("YPartition[%d] = %d\n", i, YPartition[i]);
	}
*/
	#pragma omp parallel for
	for(i=0; i<row_num; i++)
	{
		part = YPartition[i];
		#pragma omp atomic
		numRowsInPartition[part]++;
	}
	
	if (my_rank == 0) {
		for(i=0;i < p; i++)
		{
			printf("Rank %d got %d rows of Y vector\n", i, numRowsInPartition[i]);
		}
	}

	//make the arrays that have to be sent to other processes. 
	//pair arrays that store rowIndex and val.
	//allocate!
	for(i=0; i<p;i++)
	{
		//if(i!=my_rank)
		//{
			sendOthers[i] = (pairF*)malloc(sizeof(pairF)*numRowsInPartition[i]);
		//}
	}
	
	int *current = (int*) calloc(p, sizeof(int));
	int other, other_pos;

	//populate!
	for(i=0; i<row_num; i++)
	{
		other = YPartition[i];
		//if(other!=my_rank)
		//{
			other_pos = current[other];
			sendOthers[other][other_pos].row = i;
			sendOthers[other][other_pos].val = Y[i];
			current[other]++;
		//}
	}

	//write to respective files
	FILE *partition_fp[p];
	//open output files
	for (i=0; i< p; i++)
	{
		sprintf(f_name, "%d", i);
		//printf("open file %d\n", i);
		partition_fp[i] = fopen(f_name, "a");
	}

	//FILE *fp21 = fopen("hehe.txt", "a");
	for(i=0; i<p; i++)
	{
		if(i!=my_rank)
		{	
			other = i;
			for(j=0; j< numRowsInPartition[other]; j++)
			{
				if(sendOthers[other][j].val!=0){
					fprintf(partition_fp[other], "%d, %f, process %d\n",sendOthers[other][j].row, sendOthers[other][j].val, my_rank);
				}
			}
		}
	}

	//read from respective files and add! 
	MPI_Barrier(MPI_COMM_WORLD);
	
	for (i = 0; i < p; ++i)
	{
		fclose(partition_fp[i]);
	}
	//printf("all files closed by rank %d\n", my_rank);
	sprintf(f_name, "%d", my_rank);
	partition_fp[my_rank] = fopen(f_name, "r");
	strcat(f_name, "_output.txt");
	my_output = fopen(f_name, "w");

	while (fscanf(partition_fp[my_rank], "%d, %f", &rowIndex, &val) > 0) // expect 1 successful conversion
	{
		while (fgetc(partition_fp[my_rank]) != '\n');
		//update local y
		//printf("\n****\nRank %d read value %f\n\n", my_rank, val);
		Y[rowIndex]+=val;
	}

	for(i=0; i<numRowsInPartition[my_rank]; i++)
	{
		rowIndex = sendOthers[my_rank][i].row;
		sendOthers[my_rank][i].val = Y[rowIndex];
		//these are the final values!
		fprintf(my_output, "%d, %f, process %d\n",sendOthers[my_rank][i].row, sendOthers[my_rank][i].val, my_rank);
	}

	fclose(partition_fp[my_rank]);
	fclose(my_output);
	
	endTime = clock();

	printf("\nrank = %d, Time taken: %lf\n", my_rank, (double)(endTime - startTime)/CLOCKS_PER_SEC);
	MPI_Finalize();
	return 0;

}
/* test case from tt#1030 ported to C
 *
 * Thanks to Matthias Lieber for reporting the bug and providing a good test
 * program. */
int struct_struct_test(void)
{
    int err, errs = 0;
    int i, j, dt_size = 0;
    MPI_Request req[2];


#define COUNT (2)
    MPI_Aint displ[COUNT];
    int blens[COUNT];
    MPI_Datatype types[COUNT];
    MPI_Datatype datatype;

    /* A slight difference from the F90 test: F90 arrays are column-major, C
     * arrays are row-major.  So we invert the order of dimensions. */
#define N (2)
#define M (4)
    int array[N][M] =    { {-1, -1, -1, -1}, {-1, -1, -1, -1} };
    int expected[N][M] = { {-1,  1,  2,  5}, {-1,  3,  4,  6} };
    int seq_array[N*M];
    MPI_Aint astart, aend;
    MPI_Aint size_exp = 0;

    /* 1st section selects elements 1 and 2 out of 2nd dimension, complete 1st dim.
     * should receive the values 1, 2, 3, 4 */
    astart = 1;
    aend   = 2;
    err = build_array_section_type(M, astart, aend, &types[0]);
    if (err) {
        errs++;
        if (verbose) fprintf(stderr, "build_array_section_type failed\n");
        return errs;
    }
    blens[0] = N;
    displ[0] = 0;
    size_exp = size_exp + N * (aend-astart+1) * sizeof(int);

    /* 2nd section selects last element of 2nd dimension, complete 1st dim.
     * should receive the values 5, 6 */
    astart = 3;
    aend   = 3;
    err = build_array_section_type(M, astart, aend, &types[1]);
    if (err) {
        errs++;
        if (verbose) fprintf(stderr, "build_array_section_type failed\n");
        return errs;
    }
    blens[1] = N;
    displ[1] = 0;
    size_exp = size_exp + N * (aend-astart+1) * sizeof(int);

    /* create type */
    err = MPI_Type_create_struct(COUNT, blens, displ, types, &datatype);
    check_err(MPI_Type_create_struct);
    err = MPI_Type_commit(&datatype);
    check_err(MPI_Type_commit);

    err = MPI_Type_size(datatype, &dt_size);
    check_err(MPI_Type_size);
    if (dt_size != size_exp) {
        errs++;
        if (verbose) fprintf(stderr, "unexpected type size\n");
    }


    /* send the type to ourselves to make sure that the type describes data correctly */
    for (i = 0; i < (N*M) ; ++i)
        seq_array[i] = i + 1; /* source values 1..(N*M) */
    err = MPI_Isend(&seq_array[0], dt_size/sizeof(int), MPI_INT, 0, 42, MPI_COMM_SELF, &req[0]);
    check_err(MPI_Isend);
    err = MPI_Irecv(&array[0][0], 1, datatype, 0, 42, MPI_COMM_SELF, &req[1]);
    check_err(MPI_Irecv);
    err = MPI_Waitall(2, req, MPI_STATUSES_IGNORE);
    check_err(MPI_Waitall);

    /* check against expected */
    for (i = 0; i < N; ++i) {
        for (j = 0; j < M; ++j) {
            if (array[i][j] != expected[i][j]) {
                errs++;
                if (verbose)
                    fprintf(stderr, "array[%d][%d]=%d, should be %d\n", i, j, array[i][j], expected[i][j]);
            }
        }
    }

    err = MPI_Type_free(&datatype);
    check_err(MPI_Type_free);
    err = MPI_Type_free(&types[0]);
    check_err(MPI_Type_free);
    err = MPI_Type_free(&types[1]);
    check_err(MPI_Type_free);

    return errs;
#undef M
#undef N
#undef COUNT
}
Beispiel #16
0
int main(int argc, char **argv)
{	char * dirPrefix = "/mirror/local/vita/input/";
    FILE * fp;
    
    News *news;
	News Test;
	int count = atoi(argv[1]);
	//printf("Count %d",count);
	//news = (News *)malloc(sizeof(News)*(count));
	char *buffer = (char *)malloc(sizeof(char)*(count*630)); // 30 + 100 + 500
	int p = 0;
    for (int i = 1 ; i <= count ; i++ ) {
		char * line = NULL;
   	 	size_t len = 0;
    	ssize_t read;
		char *filePath = (char *)malloc(sizeof(char)*200);
		strcpy(filePath,dirPrefix);
		char *arg = (char *)malloc(sizeof(char)*32);
		snprintf(arg, 32, "%d", i);
		strcat(filePath,arg);
		//printf("So file path is : %s\n",filePath);
        fp = fopen(filePath, "r");
		if (fp == NULL) {
		    exit(EXIT_FAILURE); 
		}
		//news[i] = (News *)malloc(sizeof(News));   
		int j = 0 ;
		while ((read = getline(&line, &len, fp)) != -1) {
		    //printf("Retrieved line of length %zu :\n", read);
		    //printf("%s", line);

		    if (j == 0) {
		        //news[i]->timeStamp = (char *)malloc(sizeof(char)*read);
		        //strncpy(news[i].timeStamp,line,read);
		        //news[i].timeStamp[read] = '\0';
				strncpy(&(buffer[p]),line,read);
				if (read < 30) {
					int k = p + read;
					for (; k < 30 ; k++) {
						buffer[k] = '\0';					
					} 
				} else {
					buffer[29] = '\0';				
				}
		        j++;
				p = p+ 30;
		    } else if ( j ==  1) {
		        //news[i]->title = (char *)malloc(sizeof(char)*read);
		        //strncpy(news[i].title,line,read);
		        //news[i].title[read] = '\0';
				strncpy(&(buffer[p]),line,read);
				if (read < 100) {
					int k = p + read;
					for (; k < 100 ; k++) {
						buffer[k] = '\0';					
					} 
				} else {
					buffer[99] = '\0';				
				}
		        j++;
				p = p+ 100;
		    } else {
		       	//news[i]->details = (char *)malloc(sizeof(char)*read);
		        //strncpy(news[i].details,line,read);
		        //news[i].details[read] = '\0';
				strncpy(&(buffer[p]),line,read);
				if (read < 500) {
					int k = p + read;
					for (; k < 500 ; k++) {
						buffer[k] = '\0';					
					} 
				} else {
					buffer[99] = '\0';				
				}
		        j++;
				p = p + 500;
		        j = 0;
		    }  
			    
		}
		fclose(fp);
		if (line)
		    free(line);
    }
/*
	int limit = count * 630 -1; 
	for (int i = 0 ; i < limit; i++) {
		printf("%c", buffer[i]);
	} 
	*/
    //printf("Time Stamp : %s\n",news1.timeStamp);
    //printf("Title : %s\n",news1.title);
    //printf("Details : %s\n",news1.details);
	/*
   for (int i = 1 ; i <= count ; i++) {
				printf("News item : %d \n", i);
				printf("News TimeStamp: %s \n", news[i].timeStamp);
				printf("News Title: %s \n", news[i].title);
				printf("News Details: %s \n", news[i].details);
	}
	*/


	/*
	News *latestNews = findLatest(news, count);
	printf("Latest News TimeStamp: %s \n", latestNews->timeStamp);
	printf("Latest News Title: %s \n", latestNews->title);
	printf("Latest News Details: %s \n", latestNews->details);
	*/
    //%a %b %e %T %Z %Y => Thu Mar  3 22:32:41 IST 2016
    //struct tm time;
    //strptime(news1.timeStamp,"%a %b %e %T %Z %Y",&time);
    //time_t loctime = mktime(&time);
    //printf ( "Current local time and date: %s", asctime (&time) );

	
    const int tag = 0;
	int world_size, world_rank;
	int rep_size, rep_rank;
	int *process_rank;

	MPI_Group world_group, new_group;
	MPI_Comm rep_comm,world_comm;

	MPI_Init(&argc, &argv);
	MPI_Comm_size(MPI_COMM_WORLD, &world_size);
	MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);

	//number of items inside structure Test
	const int nitems = 3;

	//count of item of each type inside Test in order
	int blocklengths[3] = {1, 1, 1};

	MPI_Datatype mpi_timestamp;
	MPI_Datatype mpi_title;
	MPI_Datatype mpi_details;

	MPI_Type_contiguous(100,MPI_CHAR,&mpi_title);
	MPI_Type_commit(&mpi_title);

	MPI_Type_contiguous(30,MPI_CHAR,&mpi_timestamp);
	MPI_Type_commit(&mpi_timestamp);

	MPI_Type_contiguous(500,MPI_CHAR,&mpi_details);
	MPI_Type_commit(&mpi_details);


	//data types present inside Test in order
	MPI_Datatype types[3] = {mpi_timestamp, mpi_title, mpi_details};

	//name of derived data type
	MPI_Datatype mpi_test_type;

	//array to store starting address of each item inside Test
	MPI_Aint offsets[3];

	//offset of each item in Test with respect to base address of Test
	offsets[0] = offsetof(News, timeStamp);
	offsets[1] = offsetof(News, title);
	offsets[2] = offsetof(News, details);

	//create the new derived data type
	MPI_Type_create_struct(nitems, blocklengths, offsets, types, &mpi_test_type);

	//commit the new data type
	MPI_Type_commit(&mpi_test_type);

	//get rank of current process
	//MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);

	//Code for the creation of REPORTER COMM goes here.
	
	process_rank = 	(int*)malloc(sizeof(int) * (world_size - 1));

	for(int i = 1 ; i < world_size ; i++){
		process_rank[i] = i;
	}


	MPI_Comm_dup(MPI_COMM_WORLD, &world_comm);
	MPI_Comm_group(world_comm, &world_group);
	MPI_Group_incl(world_group, (world_size - 1), process_rank, &new_group);
	MPI_Comm_create(world_comm, new_group, &rep_comm);

	//printf("%d\n",error);

	//Get the size of the Comm REPORTER.

	if(world_rank == 0){
		//Do editor's task
	}else{
		MPI_Comm_size(rep_comm, &rep_size);
		MPI_Comm_rank(rep_comm, &rep_rank);


	/*
	for (int i = 1 ; i <= count ; i++) {
				printf("News item : %d \n", i);
				printf("News TimeStamp: %s \n", news[i].timeStamp);
				printf("News Title: %s \n", news[i].title);
				printf("News Details: %s \n", news[i].details);
	}
	MPI_Barrier(MPI_COMM_WORLD);
	/*
	if(rank == 1) {
		// News send;
		// News.one = 1;
		// News.two = 2.0;
		// strncpy(send.news,"This is simple news.",sizeof(send.news));
		const int dest = 2;
		MPI_Send(news[1], 1, mpi_test_type, dest, tag, MPI_COMM_WORLD);
		printf("\nRank %d sending \n %s  \n %s \n %s\n", rank, news[1]->timeStamp, news[1]->title, news[1]->details);
	}		
	if(rank == 2) {
		MPI_Status status;
		const int src = 1;
		News recv;
		MPI_Recv(&recv, 1, mpi_test_type, src, tag, MPI_COMM_WORLD, &status);
		printf("\nRank %d received \n %s \n %s \n %s \n", rank, recv.timeStamp,recv.title,recv.details);
	}
	
	*/
	//News *recvNews = (News *)malloc(sizeof(News )*(count));
	char *recvBuffer = (char *)malloc(sizeof(char) * (count*630));
	
	int status = MPI_Alltoall(buffer,630,MPI_CHAR,recvBuffer,630,MPI_CHAR,rep_comm);	
	
	if(status != 0) {
		printf("MPI_Alltoall failed with status %d\n", status);
		exit(EXIT_FAILURE);	
	}	
	MPI_Barrier(rep_comm);
	printf(" \n \n \n");
	/*
	if (rank == 1) {
	int limit = count * 630 -1; 
	for (int i = 0 ; i < limit; i++) {
		printf("%c", recvBuffer[i]);
	} 
		
	} 
*/
	//MPI_Barrier(MPI_COMM_WORLD);
	News* newsArray = getNewsArray(recvBuffer,count);
	

	for( int my_rank = 0; my_rank < count; my_rank++) {
		if( my_rank == rep_rank ) {
			/*for (int i = 0 ; i < size ; i++) {
						printf("News item : %d rank %d\n", i , rank);
						printf("News TimeStamp: %s rank %d\n", newsArray[i].timeStamp,rank);
						printf("News Title: %s rank %d\n", newsArray[i].title,rank);
						printf("News Details: %s rank %d \n", newsArray[i].details,rank);
			} */
			News latestNews = findLatest(newsArray,count);
			printf("Latest news : \n");
			printf("News TimeStamp: %s\n", latestNews.timeStamp);
			printf("News Title: %s\n", latestNews.title);
			printf("News Details: %s\n", latestNews.details);
		}
	}
	
	/*
	else if(rank == 1) {
	for (int i = 1 ; i <= size ; i++) {
				printf("News item : %d rank %d\n", i , rank);
				printf("News TimeStamp: %s rank %d\n", recvNews[i].timeStamp,rank);
				printf("News Title: %s rank %d\n", recvNews[i].title,rank);
				printf("News Details: %s rank %d \n", recvNews[i].details,rank);
	}
	}
	*/
	//dumpRecvNews(,recvNews,size);	

	
	//free the derived data type
	

	}


	

	
	MPI_Type_free(&mpi_test_type);
	MPI_Finalize();
	
	

    exit(EXIT_SUCCESS);
}
/* regression for tt#1030, checks for bad offset math in the
 * blockindexed and indexed dataloop flattening code */
int flatten_test(void)
{
    int err, errs = 0;
#define ARR_SIZE (9)
    /* real indices              0  1  2  3  4  5  6  7  8
     * indices w/ &array[3]     -3 -2 -1  0  1  2  3  4  5 */
    int array[ARR_SIZE]      = {-1,-1,-1,-1,-1,-1,-1,-1,-1};
    int expected[ARR_SIZE]   = {-1, 0, 1,-1, 2,-1, 3,-1, 4};
    MPI_Datatype idx_type = MPI_DATATYPE_NULL;
    MPI_Datatype blkidx_type = MPI_DATATYPE_NULL;
    MPI_Datatype combo = MPI_DATATYPE_NULL;
#define COUNT (2)
    int displ[COUNT];
    MPI_Aint adispl[COUNT];
    int blens[COUNT];
    MPI_Datatype types[COUNT];

    /* indexed type layout:
     * XX_X
     * 2101  <-- pos (left of 0 is neg)
     *
     * different blens to prevent optimization into a blockindexed
     */
    blens[0] = 2;
    displ[0] = -2; /* elements, puts byte after block end at 0 */
    blens[1] = 1;
    displ[1] = 1; /*elements*/

    err = MPI_Type_indexed(COUNT, blens, displ, MPI_INT, &idx_type);
    check_err(MPI_Type_indexed);
    err = MPI_Type_commit(&idx_type);
    check_err(MPI_Type_commit);

    /* indexed type layout:
     * _X_X
     * 2101  <-- pos (left of 0 is neg)
     */
    displ[0] = -1;
    displ[1] = 1;
    err = MPI_Type_create_indexed_block(COUNT, 1, displ, MPI_INT, &blkidx_type);
    check_err(MPI_Type_indexed_block);
    err = MPI_Type_commit(&blkidx_type);
    check_err(MPI_Type_commit);

    /* struct type layout:
     * II_I_B_B  (I=idx_type, B=blkidx_type)
     * 21012345  <-- pos (left of 0 is neg)
     */
    blens[0]  = 1;
    adispl[0] = 0; /*bytes*/
    types[0]  = idx_type;

    blens[1]  = 1;
    adispl[1] = 4 * sizeof(int); /* bytes */
    types[1]  = blkidx_type;

    /* must be a struct in order to trigger flattening code */
    err = MPI_Type_create_struct(COUNT, blens, adispl, types, &combo);
    check_err(MPI_Type_indexed);
    err = MPI_Type_commit(&combo);
    check_err(MPI_Type_commit);

    /* pack/unpack with &array[3] */
    errs += pack_and_check_expected(combo, "combo", 3, ARR_SIZE, array, expected);

    MPI_Type_free(&combo);
    MPI_Type_free(&idx_type);
    MPI_Type_free(&blkidx_type);

    return errs;
#undef COUNT
}
int main(int argc, char **argv){

	int task, len, chunks = CHUNK;
	MPI_Status status;
	char hostname[MPI_MAX_PROCESSOR_NAME];

	#ifdef GETTIME
	double start = MPI_Wtime();
	#endif

	const int nitems=3;
	int blocklengths[3] = {2,2,1};
	MPI_Datatype types[3] = {MPI_CHAR, MPI_UNSIGNED_SHORT, MPI_UNSIGNED};
	MPI_Aint offsets[3];

	offsets[0] = offsetof(world_cell, type);
	offsets[1] = offsetof(world_cell, breeding_period);
	offsets[2] = offsetof(world_cell, number);

	/* MPI Initialization */
	if (MPI_Init(&argc, &argv) != MPI_SUCCESS) {
	printf ("Error starting MPI program. Terminating.\n");
		/*MPI_Abort(MPI_COMM_WORLD, ret);*/
		return -1;
	}

	MPI_Get_processor_name(hostname, &len);

	MPI_Type_create_struct(nitems, blocklengths, offsets, types, &mpi_world_cell_type);
	MPI_Type_commit(&mpi_world_cell_type);
        
    MPI_Comm_size(MPI_COMM_WORLD, &numtasks);
    MPI_Comm_rank(MPI_COMM_WORLD, &taskid);

	if(taskid == MASTER){
		MPI_Request size_reqs[numtasks-1];

		info[1] = wolf_breeding_period = atoi(argv[2]);
		info[2] = squirrel_breeding_period = atoi(argv[3]);
		info[3] = wolf_starvation_period = atoi(argv[4]);
		info[4] = number_of_generations = atoi(argv[5]);

		parse_input(argv[1]);

		info[0] = grid_size;

		bottom = 0;
		top = chunk_size = CHUNK;
		payload = top + 2;

		for(task = 1; task < numtasks; task++)
			MPI_Isend(info, 5, MPI_INT, task, INIT_TAG, MPI_COMM_WORLD, &size_reqs[task-1]);

		MPI_Waitall(numtasks - 1, size_reqs, MPI_STATUS_IGNORE);

		for(task = 1; task < numtasks; task++){
			int bottom_task = FLIMIT_INF_CHUNK(task),
				top_task = FLIMIT_SUP_CHUNK(task),
				chunk_size = top_task-bottom_task;
			
			bottom_task -= 2;

			if (task == numtasks-1)
				top_task += CHUNK_REMAINDER;
			else
				top_task += 2;

			for( ; bottom_task < top_task; bottom_task++)
				MPI_Send(world[bottom_task], grid_size, mpi_world_cell_type, task, FILL_TAG, MPI_COMM_WORLD);
		}

	} else {
		int j = 0;

		MPI_Recv(info, 5, MPI_INT, MASTER, INIT_TAG, MPI_COMM_WORLD, &status);

		grid_size = info[0];
		wolf_breeding_period = info[1];
		squirrel_breeding_period = info[2];
		wolf_starvation_period = info[3];
		number_of_generations = info[4];
	
		bottom = 2;
		if(taskid == numtasks-1){
			chunk_size = CHUNK + CHUNK_REMAINDER;
			payload = top = chunk_size+bottom;
		} else {
			chunk_size = CHUNK;
			top = chunk_size+bottom;
			payload = top + 2;
		}

		initialize_world_array(payload );
		
		for( ; j < payload; j++)
			MPI_Recv(world[j], grid_size, mpi_world_cell_type, MASTER, FILL_TAG, MPI_COMM_WORLD, &status);
	}

	start_world_simulation();

	gather();

	#ifdef GETTIME
	if(taskid == MASTER){
	  printf("MPI time: %lf\n", MPI_Wtime() - start);
	  print_world(grid_size);
	}
	#endif

	//freemem();
	MPI_Finalize();

	return 0;
}
Beispiel #19
0
void data_server(int agents_total, int world_width, int world_height)
{
	int np; 
	MPI_Comm_size(MPI_COMM_WORLD, &np);

	/* create a type for struct agent */
	const int nitems=5;
   	int blocklengths[5] = {1,1,1,1,1};
   	MPI_Datatype types[5] = {MPI_INT, MPI_INT, MPI_INT, MPI_FLOAT, MPI_FLOAT};
	MPI_Datatype mpi_agent_type;
	MPI_Aint offsets[5];

	offsets[0] = offsetof(agent, id);
    	offsets[1] = offsetof(agent, x);
    	offsets[2] = offsetof(agent, y);
    	offsets[3] = offsetof(agent, z);
    	offsets[4] = offsetof(agent, w);

	MPI_Type_create_struct(nitems, blocklengths, offsets, types, &mpi_agent_type);
	MPI_Type_commit(&mpi_agent_type);


	int num_comp_nodes = np -1;
	unsigned int num_bytes = agents_total * sizeof(agent);
	agent *h_agents_in, *h_agents_out;

	/* allocate input data */
	h_agents_in = (agent *)malloc(num_bytes);
	h_agents_out = (agent *)malloc(num_bytes);
	if(h_agents_in == NULL || h_agents_out == NULL)
	{
		printf("server couldn't allocate memory\n");
		MPI_Abort(MPI_COMM_WORLD, 1);
	}

	/* initialize input data */
	init_data(h_agents_in, agents_total);

#ifdef DEBUG 
	printf("Init data\n");
	display_data(h_agents_in, agents_total);
#endif

	int world_height_node = world_height / num_comp_nodes;
//	printf("world_height: %d\n", world_height_node);
	agent h_agents_node_in[num_comp_nodes][agents_total], h_agents_node_out[num_comp_nodes][agents_total];
	for(int process = 0; process < num_comp_nodes; process++)
	{	
		for(int i = 0; i < agents_total; i++)
		{
			if(  ( h_agents_in[i].y >= (process * world_height_node) ) and ( h_agents_in[i].y < ( (process + 1) * world_height_node ) )  )
				h_agents_node_in[process][i] = h_agents_in[i];
		}
	}

/***	
	printf("copy data 0\n");
	display_data(h_agents_node_in[0], agents_total);
	printf("copy data 1\n");
	display_data(h_agents_node_in[1], agents_total);
	printf("copy data 2\n");
	display_data(h_agents_node_in[2], agents_total);
***/

	/* send data to compute nodes */
	for(int process = 0; process < num_comp_nodes; process++)
		MPI_Send(h_agents_node_in[process], agents_total, mpi_agent_type, process, 0, MPI_COMM_WORLD);

	/* Wait for nodes to compute */
	MPI_Barrier(MPI_COMM_WORLD);
	
	/* Collect output data */
	MPI_Status status;

	for(int process = 0; process < num_comp_nodes; process++)
		MPI_Recv(h_agents_node_out[process], agents_total, mpi_agent_type, process, DATA_COLLECT, MPI_COMM_WORLD, &status); 

#ifdef DEBUG
        printf("Final Data\n");	
	/* display output data */
//	display_data(h_agents_out, agents_total);
#endif
	
	/* release resources */
	free(h_agents_in);
	free(h_agents_out); 
//	free(h_agents_node_in); 
//	free(h_agents_node_out); 
}
Beispiel #20
0
/* struct_of_basics_test(void)
 *
 * There's nothing simple about structs :).  Although this is an easy one.
 *
 * Returns number of errors encountered.
 *
 * NOT TESTED.
 */
int struct_of_basics_test(void)
{
    MPI_Datatype parent_type;
    int s_count = 3, s_blocklengths[3] = { 3, 2, 1 };
    MPI_Aint s_displacements[3] = { 10, 20, 30 };
    MPI_Datatype s_types[3] = { MPI_CHAR, MPI_INT, MPI_FLOAT };

    int nints, nadds, ntypes, combiner, *ints;
    MPI_Aint *adds = NULL;
    MPI_Datatype *types;

    int err, errs = 0;

    /* set up type */
    err = MPI_Type_create_struct(s_count,
				 s_blocklengths,
				 s_displacements,
				 s_types,
				 &parent_type);

    /* decode */
    err = MPI_Type_get_envelope(parent_type,
				&nints,
				&nadds,
				&ntypes,
				&combiner);

    if (nints != 4) errs++;
    if (nadds != 3) errs++;
    if (ntypes != 3) errs++;
    if (combiner != MPI_COMBINER_STRUCT) errs++;

    if (verbose) {
        if (nints != 4) fprintf(stderr, "nints = %d; should be 3\n", nints);
	if (nadds != 3) fprintf(stderr, "nadds = %d; should be 0\n", nadds);
	if (ntypes != 3) fprintf(stderr, "ntypes = %d; should be 3\n", ntypes);
	if (combiner != MPI_COMBINER_STRUCT)
	    fprintf(stderr, "combiner = %s; should be struct\n",
		    combiner_to_string(combiner));
    }

    ints = malloc(nints * sizeof(*ints));
    adds = malloc(nadds * sizeof(*adds));
    types = malloc(ntypes *sizeof(*types));

    err = MPI_Type_get_contents(parent_type,
				nints,
				nadds,
				ntypes,
				ints,
				adds,
				types);

    if (ints[0] != s_count) errs++;
    if (ints[1] != s_blocklengths[0]) errs++;
    if (ints[2] != s_blocklengths[1]) errs++;
    if (ints[3] != s_blocklengths[2]) errs++;
    if (adds[0] != s_displacements[0]) errs++;
    if (adds[1] != s_displacements[1]) errs++;
    if (adds[2] != s_displacements[2]) errs++;
    if (types[0] != s_types[0]) errs++;
    if (types[1] != s_types[1]) errs++;
    if (types[2] != s_types[2]) errs++;

    if (verbose) {
	if (ints[0] != s_count) 
	    fprintf(stderr, "count = %d; should be %d\n", ints[0], s_count);
	if (ints[1] != s_blocklengths[0])
	    fprintf(stderr, "blocklength[0] = %d; should be %d\n", ints[1], s_blocklengths[0]);
	if (ints[2] != s_blocklengths[1]) 
	    fprintf(stderr, "blocklength[1] = %d; should be %d\n", ints[2], s_blocklengths[1]);
	if (ints[3] != s_blocklengths[2]) 
	    fprintf(stderr, "blocklength[2] = %d; should be %d\n", ints[3], s_blocklengths[2]);
	if (adds[0] != s_displacements[0]) 
	    fprintf(stderr, "displacement[0] = %d; should be %d\n", adds[0], s_displacements[0]);
	if (adds[1] != s_displacements[1]) 
	    fprintf(stderr, "displacement[1] = %d; should be %d\n", adds[1], s_displacements[1]);
	if (adds[2] != s_displacements[2]) 
	    fprintf(stderr, "displacement[2] = %d; should be %d\n", adds[2], s_displacements[2]);
	if (types[0] != s_types[0]) 
	    fprintf(stderr, "type[0] does not match\n");
	if (types[1] != s_types[1]) 
	    fprintf(stderr, "type[1] does not match\n");
	if (types[2] != s_types[2]) 
	    fprintf(stderr, "type[2] does not match\n");
    }

    free(ints);
    free(adds);
    free(types);

    MPI_Type_free( &parent_type );

    return errs;
}
Beispiel #21
0
int main(int argc, char **argv)
{
	int i, size, ierr, instructionmsg, ctr;
	char hostname[MAX_LINE];
	int hostnamelen, filepoolnumel, claimedfilepoolnumel;
	int *nodepoolentriesk, *nodepoolentriesv;
	int *filepoolkeys, *claimedfilepoolkeys;
	mpiconfig_t mpicfg;
	MPI_Datatype instructmsg_mpi_t;

	MPI_Datatype array_of_types[3];
	int array_of_blocklengths[3];
	MPI_Aint array_of_displaysments[3];
	MPI_Aint intex, charex, lb;

	MPI_Init(&argc, &argv);
	ierr = MPI_Comm_size(MPI_COMM_WORLD, &mpicfg.num_procs);
	ierr = MPI_Comm_rank(MPI_COMM_WORLD, &mpicfg.rank);
	MPI_Get_processor_name(hostname, &hostnamelen);

	ierr = MPI_Type_get_extent(MPI_INT, &lb, &intex);
	ierr = MPI_Type_get_extent(MPI_CHAR, &lb, &charex);

	//Says the type of every block
	array_of_types[0] = MPI_CHAR;
	array_of_types[1] = MPI_INT;
	array_of_types[2] = MPI_INT;

	//Says how many elements for block
	array_of_blocklengths[0] = MAX_LINE;
	array_of_blocklengths[1] = 1;
	array_of_blocklengths[2] = 1;

	/*Says where every block starts in memory, counting from the beginning of the struct.*/
	array_of_displaysments[0] = 0;
	array_of_displaysments[1] = MAX_LINE * charex;
	array_of_displaysments[2] = MAX_LINE * charex + intex;

	/*Create MPI Datatype and commit*/
	MPI_Type_create_struct(3, array_of_blocklengths, array_of_displaysments, array_of_types, &instructmsg_mpi_t);
	MPI_Type_commit(&instructmsg_mpi_t);

	mpicfg.imsg_t = instructmsg_mpi_t;

	if(EBUG){printf("Hello world!  I am process number: %d on host %s\n", mpicfg.rank, hostname);}

	//hashtable_t *nodepool;
	/* Create node/file pool hash table */
	mpicfg.nodepool = ht_create( mpicfg.num_procs );
	mpicfg.filepool = ht_create( mpicfg.num_procs );

	//Initialize structs:
	mpicfg.procstatus = (int*)calloc(mpicfg.num_procs,sizeof(int));
	mpicfg.aliveprocs = (int*)calloc(mpicfg.num_procs,sizeof(int));
	for ( i = 0 ; i < mpicfg.num_procs ; i++) {
		mpicfg.aliveprocs[i]  = -1;
	}
	mpicfg.id = -1;
	mpicfg.stopexecution = 0;

	
	if( mpicfg.rank == 0 ) {
		/*This is the coordinator process.*/

		//Load instructin list:
		/* the coordinator node must be different from the others!! */
		/* Execute next instruction */
		join( &mpicfg, 1 );
		join( &mpicfg, 4 );
		join( &mpicfg, 8 );
		join( &mpicfg, 6 );
		insert( &mpicfg, 9 );
		insert( &mpicfg, 3 );
		insert( &mpicfg, 4 );
		insert( &mpicfg, 9 );

		find( &mpicfg, 4 );
		del( &mpicfg, 4 );
		find( &mpicfg, 4 );
		find( &mpicfg, 3 );
		leave( &mpicfg, 8 );
		find( &mpicfg, 3 );
		

		/*find( &mpicfg, 8 );
		insert( &mpicfg, 5 );
		insert( &mpicfg, 6 );
		insert( &mpicfg, 7 );
		insert( &mpicfg, 8 );
		*/
		end( &mpicfg);

	}else{

		/*All other ranks */
		while (!mpicfg.stopexecution){
			/* Wait instruction message: */
			MPI_Recv(&mpicfg.imsg, 1, mpicfg.imsg_t, 0, 0, MPI_COMM_WORLD,
				 MPI_STATUS_IGNORE);
			/* Execute whatever instruction in that message */
			executeinstruction(&mpicfg, mpicfg.imsg);
		}

	}

	free(mpicfg.procstatus);
	free(mpicfg.aliveprocs);

	ierr = MPI_Finalize();

	return 0;
}
Beispiel #22
0
static PetscErrorCode TestCellShape(DM dm)
{
  PetscMPIInt    rank;
  PetscInt       dim, c, cStart, cEnd, count = 0;
  ex1_stats_t    stats, globalStats;
  PetscReal      *J, *invJ, min = 0, max = 0, mean = 0, stdev = 0;
  MPI_Comm       comm = PetscObjectComm((PetscObject)dm);
  DM             dmCoarse;
  PetscErrorCode ierr;

  PetscFunctionBegin;
  stats.min = PETSC_MAX_REAL;
  stats.max = PETSC_MIN_REAL;
  stats.sum = stats.squaresum = 0.;
  stats.count = 0;

  ierr = DMGetDimension(dm,&dim);CHKERRQ(ierr);

  ierr = PetscMalloc2(dim * dim, &J, dim * dim, &invJ);CHKERRQ(ierr);

  ierr = DMPlexGetHeightStratum(dm,0,&cStart,&cEnd);CHKERRQ(ierr);
  for (c = cStart; c < cEnd; c++) {
    PetscInt  i;
    PetscReal frobJ = 0., frobInvJ = 0., cond2, cond, detJ;

    ierr = DMPlexComputeCellGeometryAffineFEM(dm,c,NULL,J,invJ,&detJ);CHKERRQ(ierr);

    for (i = 0; i < dim * dim; i++) {
      frobJ += J[i] * J[i];
      frobInvJ += invJ[i] * invJ[i];
    }
    cond2 = frobJ * frobInvJ;
    cond  = PetscSqrtReal(cond2);

    stats.min = PetscMin(stats.min,cond);
    stats.max = PetscMax(stats.max,cond);
    stats.sum += cond;
    stats.squaresum += cond2;
    stats.count++;
  }

  {
    PetscMPIInt    blockLengths[2] = {4,1};
    MPI_Aint       blockOffsets[2] = {offsetof(ex1_stats_t,min),offsetof(ex1_stats_t,count)};
    MPI_Datatype   blockTypes[2]   = {MPIU_REAL,MPIU_INT}, statType;
    MPI_Op         statReduce;

    ierr = MPI_Type_create_struct(2,blockLengths,blockOffsets,blockTypes,&statType);CHKERRQ(ierr);
    ierr = MPI_Type_commit(&statType);CHKERRQ(ierr);
    ierr = MPI_Op_create(ex1_stats_reduce, PETSC_TRUE, &statReduce);CHKERRQ(ierr);
    ierr = MPI_Reduce(&stats,&globalStats,1,statType,statReduce,0,comm);CHKERRQ(ierr);
    ierr = MPI_Op_free(&statReduce);CHKERRQ(ierr);
    ierr = MPI_Type_free(&statType);CHKERRQ(ierr);
  }

  ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
  if (!rank) {
    count = globalStats.count;
    min = globalStats.min;
    max = globalStats.max;
    mean = globalStats.sum / globalStats.count;
    stdev = PetscSqrtReal(globalStats.squaresum / globalStats.count - mean * mean);
  }
  ierr = PetscPrintf(comm,"Mesh with %d cells, shape condition numbers: min = %g, max = %g, mean = %g, stddev = %g\n", count, (double) min, (double) max, (double) mean, (double) stdev);

  ierr = PetscFree2(J,invJ);CHKERRQ(ierr);

  ierr = DMPlexGetCoarseDM(dm,&dmCoarse);CHKERRQ(ierr);
  if (dmCoarse) {
    ierr = TestCellShape(dmCoarse);CHKERRQ(ierr);
  }

  PetscFunctionReturn(0);
}
int main(int argc, char *argv[])
{
    int numtasks, taskid; 
    int n = atoi(argv[1]);
    const int num_item =2;
    int blocklengths[2] = {1,1};
    int seed = clock();
    int i,j;
    clock_t endt,start;
    srand(seed);

    MPI_Init(&argc,&argv);
    MPI_Comm_rank(MPI_COMM_WORLD,&taskid);
    MPI_Comm_size(MPI_COMM_WORLD,&numtasks);
    if(n % numtasks != 0){
	printf("points number not dividable by number of processors\n");
	MPI_Finalize();
	return -1;
    }
    MPI_Status status;

    MPI_Datatype types[2] = {MPI_INT, MPI_INT};
    MPI_Datatype mpi_point_type;
    MPI_Aint offsets[2];

    offsets[0] = offsetof(point, x);
    offsets[1] = offsetof(point, y);

    MPI_Type_create_struct(num_item, blocklengths, offsets, types, &mpi_point_type);
    MPI_Type_commit(&mpi_point_type);
    
    point s[n];
    struct point_struct *p_x = (struct point_struct*)malloc(n*sizeof(struct point_struct));
    
    double *dist_closest_pair = (double*)malloc(numtasks*sizeof(double));
    int offset[numtasks], share_len = (n/numtasks);
    offset[taskid] = taskid * share_len;
    
    if (taskid == MASTER){
	for(i = 0; i < n ; i++) {
		s[i].x =  rand()%1000;
		s[i].y =  rand()%1000;
	}

	for(i=0 ; i<n ; i++){
		p_x[i].x = s[i].x;
		p_x[i].y = s[i].y;
	}
	start  = clock();
  b_s_x(n,p_x);

    }//MASTER

    MPI_Scatter(&p_x[0], share_len, mpi_point_type, &p_x[offset[taskid]], share_len, mpi_point_type, MASTER , MPI_COMM_WORLD);

    for(i=0 ;i < numtasks; i++){
	if(taskid == i ){
	    dist_closest_pair[taskid] = Closest_Pair(taskid,offset[taskid], offset[taskid]+share_len-1, share_len, p_x);
	}
    }

    MPI_Gather(&dist_closest_pair[taskid] , 1, MPI_DOUBLE, &dist_closest_pair[taskid] , 1 , MPI_DOUBLE , MASTER , MPI_COMM_WORLD);

    if(taskid == MASTER){
	point p_y[2*share_len];
	int x[numtasks-1];
	for(i=0 ;i< numtasks-1 ; i++){
	    x[i]= (i*share_len)+share_len;
	} 
	double d_boundary[numtasks-1], d_min_proc=dist_closest_pair[0];
	for(i=1 ; i<numtasks ; i++){
	    if(d_min_proc > dist_closest_pair[i])
		d_min_proc=dist_closest_pair[i];
	}

	for(i=0 ; i<numtasks-1 ; i++){
	    for(j=x[i]-share_len ; j<x[i]+share_len ; j++){
		p_y[j] = p_x[j];
	    } 
	    b_s_y(2*share_len,p_y);
	    d_boundary[i] = boundary_check(x[i]-share_len, 2*share_len, p_y, x[i], d_min_proc );
	}

	double D_min = d_min_proc;	
	for(i=0 ; i<numtasks-1 ; i++){
	    if(d_boundary[i] < D_min )
		D_min = d_boundary[i];
	}
	
	printf("\n minimum distanse is : %f.\n",D_min);
   }

    MPI_Finalize();
    return 0;
}
int main (int argc, char *argv[])
{
  int my_rank, size;
  int right, left;

  struct buff{
     int   i;
     float f;
  } snd_buf, rcv_buf, sum;

  int i;

  int          array_of_blocklengths[COUNT];
  MPI_Aint     array_of_displacements[COUNT], first_var_address, second_var_address;
  MPI_Datatype array_of_types[COUNT], datatype;

  MPI_Status  status;

  /* Get process and neighbour info. */
  MPI_Init(&argc, &argv);

  MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
  MPI_Comm_size(MPI_COMM_WORLD, &size);

  right = (my_rank+1)      % size;
  left  = (my_rank-1+size) % size;
/* ... this SPMD-style neighbor computation with modulo has the same meaning as: */
/* right = my_rank + 1;          */
/* if (right == size) right = 0; */
/* left = my_rank - 1;           */
/* if (left == -1) left = size-1;*/

  /* Set MPI datatypes for sending and receiving partial sums. */
  array_of_blocklengths[0] = 1;
  array_of_blocklengths[1] = 1;

  MPI_Get_address(&snd_buf.i, &first_var_address);
  MPI_Get_address(&snd_buf.f, &second_var_address);

  array_of_displacements[0] = (MPI_Aint) 0;
  array_of_displacements[1] = second_var_address - first_var_address;

  array_of_types[0] = MPI_INT;
  array_of_types[1] = MPI_FLOAT;

  MPI_Type_create_struct(COUNT, array_of_blocklengths, array_of_displacements, array_of_types, &datatype);
  MPI_Type_commit(&datatype);

  /* Compute global sum. */
  sum.i = 0;            sum.f = 0;
  snd_buf.i = my_rank;  snd_buf.f = my_rank;  /* Step 1 = init */

  for( i = 0; i < size; i++) 
  {
    MPI_Sendrecv(&snd_buf, 1, datatype, right, to_right,  /* Step 2 */
                 &rcv_buf, 1, datatype, left, to_right,   /* Step 3 */
                 MPI_COMM_WORLD, &status);
    snd_buf = rcv_buf;                        /* Step 4 */
    sum.i += rcv_buf.i;  sum.f += rcv_buf.f;  /* Step 5 */
  }

  printf ("PE%i:\tSum = %i\t%f\n", my_rank, sum.i, sum.f);

  MPI_Finalize();
}
Beispiel #25
0
int main( int argc, char **argv )
{
    int          vcount, vstride;
    int32_t      counts[2];
    int          v2stride, typesize, packsize, i, position, errs = 0;
    double       *outbuf, *outbuf2;
    double       *vsource;
    MPI_Datatype vtype, stype;
    MPI_Aint     lb, extent;
    double       t0, t1;
    double       tspack, tvpack, tmanual;
    int          ntry;
    int          blocklengths[2];
    MPI_Aint     displacements[2];
    MPI_Datatype typesArray[2];

    MPI_Init( &argc, &argv );
    
    /* Create a struct consisting of a two 32-bit ints, followed by a 
       vector of stride 3 but count 128k (less than a few MB of data area) */
    vcount  = 128000;
    vstride = 3;
    MPI_Type_vector( vcount, 1, vstride, MPI_DOUBLE, &vtype );

    vsource = (double *)malloc( (vcount + 1) * (vstride + 1) * sizeof(double) );
    if (!vsource) {
	fprintf( stderr, "Unable to allocate vsource\n" );
	MPI_Abort( MPI_COMM_WORLD, 1 );
    }
    for (i=0; i<vcount*vstride; i++) {
	vsource[i] = i;
    }
    blocklengths[0] = 2; MPI_Get_address( &counts[0], &displacements[0] );
    blocklengths[1] = 1; MPI_Get_address( vsource, &displacements[1] );
    if (verbose) {
	printf( "%p = %p?\n", vsource, (void *)displacements[1] );
    }
    typesArray[0] = MPI_INT32_T;
    typesArray[1] = vtype;
    MPI_Type_create_struct( 2, blocklengths, displacements, typesArray, 
			    &stype );
    MPI_Type_commit( &stype );
    MPI_Type_commit( &vtype );

#if defined(MPICH) && defined(PRINT_DATATYPE_INTERNALS)
    /* To use MPIDU_Datatype_debug to print the datatype internals,
       you must configure MPICH with --enable-g=log */
    if (verbose) {
	printf( "Original struct datatype:\n" );
	MPIDU_Datatype_debug( stype, 10 );
    }
#endif

    MPI_Pack_size( 1, stype, MPI_COMM_WORLD, &packsize );
    outbuf  = (double *)malloc( packsize );
    outbuf2 = (double *)malloc( packsize );
    if (!outbuf) {
	fprintf( stderr, "Unable to allocate %ld for outbuf\n", (long)packsize );
	MPI_Abort( MPI_COMM_WORLD, 1 );
    }
    if (!outbuf2) {
	fprintf( stderr, "Unable to allocate %ld for outbuf2\n", (long)packsize );
	MPI_Abort( MPI_COMM_WORLD, 1 );
    }
    position = 0;
    /* Warm up the code and data */
    MPI_Pack( MPI_BOTTOM, 1, stype, outbuf, packsize, &position, 
	      MPI_COMM_WORLD );

    tspack = 1e12;
    for (ntry = 0; ntry < 5; ntry++) {
	position = 0;
	t0 = MPI_Wtime();
	MPI_Pack( MPI_BOTTOM, 1, stype, outbuf, packsize, &position, 
		  MPI_COMM_WORLD );
	t1 = MPI_Wtime() - t0;
	if (t1 < tspack) tspack = t1;
    }
    MPI_Type_free( &stype );

    /* An equivalent packing, using the 2 ints and the vector separately */
    tvpack = 1e12;
    for (ntry = 0; ntry < 5; ntry++) {
	position = 0;
	t0 = MPI_Wtime();
	MPI_Pack( counts, 2, MPI_INT32_T, outbuf, packsize, &position, 
		  MPI_COMM_WORLD );
	MPI_Pack( vsource, 1, vtype, outbuf, packsize, &position, 
		  MPI_COMM_WORLD );
	t1 = MPI_Wtime() - t0;
	if (t1 < tvpack) tvpack = t1;
    }
    MPI_Type_free( &vtype );

    /* Note that we exploit the fact that the vector type contains vblock 
       instances of a contiguous type of size 24, or a single block of 24*vblock
       bytes.
    */
    tmanual = 1e12;
    for (ntry = 0; ntry < 5; ntry++) {
	const double * restrict ppe = (const double *)vsource;
	double * restrict ppo = outbuf2;
	int j;
	t0 = MPI_Wtime();
	position = 0;
	*(int32_t *)ppo          = counts[0];
	*( ((int32_t *)ppo) + 1) = counts[1];
	ppo++;
	/* Some hand optimization because this file is not normally 
	   compiled with optimization by the test suite */
	j = vcount;
	while (j) {
	    *ppo++ = *ppe;
	    ppe += vstride;
	    *ppo++ = *ppe;
	    ppe += vstride;
	    *ppo++ = *ppe;
	    ppe += vstride;
	    *ppo++ = *ppe;
	    ppe += vstride;
	    j -= 4;
	}
	position += (1 + vcount);
	position *= sizeof(double);
	t1 = MPI_Wtime() - t0;
	if (t1 < tmanual) tmanual = t1;

	/* Check on correctness */
#ifdef PACK_IS_NATIVE
	if (memcmp( outbuf, outbuf2, position ) != 0) {
	    printf( "Panic(manual) - pack buffers differ\n" );
	    for (j=0; j<8; j++) {
		printf( "%d: %llx\t%llx\n", j, (long long unsigned)outbuf[j], 
			(long long unsigned)outbuf2[j] );
	    }
	}
#endif
    }

    if (verbose) {
	printf( "Bytes packed = %d\n", position );
	printf( "MPI_Pack time = %e (struct), = %e (vector), manual pack time = %e\n", 
		tspack, tvpack, tmanual );
    }

    if (4 * tmanual < tspack) {
	errs++;
	printf( "MPI_Pack time using struct with vector = %e, manual pack time = %e\n", tspack, tmanual )
;
	printf( "MPI_Pack time should be less than 4 times the manual time\n" );
	printf( "For most informative results, be sure to compile this test with optimization\n" );
    }
    if (4 * tmanual < tvpack) {
	errs++;
	printf( "MPI_Pack using vector = %e, manual pack time = %e\n", tvpack, 
		tmanual );
	printf( "MPI_Pack time should be less than 4 times the manual time\n" );
	printf( "For most informative results, be sure to compile this test with optimization\n" );
    }
    if (4 * tvpack < tspack) {
	errs++;
	printf( "MPI_Pack using a vector = %e, using a struct with vector = %e\n", tvpack, tspack );
	printf( "MPI_Pack time using vector should be about the same as the struct containing the vector\n" );
	printf( "For most informative results, be sure to compile this test with optimization\n" );
    }

    if (errs) {
        printf( " Found %d errors\n", errs );
    }
    else {
        printf( " No Errors\n" );
    } 
    
    free( vsource );
    free( outbuf );
    free( outbuf2 );

    MPI_Finalize();
    return 0;
}
// TODO: add player payments for chance/chest
int main(int argc, char ** argv)
{
    struct timeval t1, t2;
    MPI_Init(&argc, &argv);
    gettimeofday(&t1, NULL);
    int rank, size;
    MPI_Comm_rank(MPI_COMM_WORLD, &globalrank);
    MPI_Comm_size(MPI_COMM_WORLD, &size);
    globalsize = size;
    srand(time(NULL) + globalrank);
    struct location board[BSIZE];
    struct player players[NUMPLAYERS];
    int itr = 10000;
    long long bills[4]; // how much you owe each player at end of round
    init_players(players);
    init_board(board);
    char plocation;
    int pvalue;
    int numcomms = 1;
    MPI_Group world_group;
    MPI_Comm_group(MPI_COMM_WORLD, &world_group);
    playerdata d;
    d.money[0] = 0;
    d.money[1] = 0;
    d.money[2] = 0;
    d.money[3] = 0;
    output = (FILE **) malloc(size * sizeof(FILE *));

    // if 1 process created just run sequentially
    if (size == 1)
    {
        int done[4] = {1, 1, 1, 1};
        while (itr)
        {
            itr--;
            int i;
            for (i = 0; i < NUMPLAYERS; i++)
            {
                plocation = 0;
                pvalue = 0;
                if (players[i].money > 0)
                {
                    move(players, board, i, &pvalue, &plocation);
                    if (plocation)
                    {
                        board[plocation].owner = i;
                        players[i].money -= pvalue;
                    }
                }
                else
                {
                    players[i].order = -1;
                    if (done[i])
                    {
                        remove_properties(board, i);
                        done[i] = 0;
                    }
                }
            }
        }
        gettimeofday(&t2, NULL);
        results(players, board);
        double exectime = (t2.tv_sec - t1.tv_sec) * 1000000 + ((t2.tv_usec - t1.tv_usec));
        printf("Exec Time %lf\n", exectime);
        return 0;
    }

    // create a communicator for each monopoly game (for n > 4)
    MPI_Group * gamesel;
    MPI_Comm * games;
    int ranksel[4];
    if (size > 4)
    {
        numcomms = size / 4;
        games = (MPI_Comm *) malloc(numcomms * sizeof(MPI_Comm));
        gamesel = (MPI_Group *) malloc(numcomms * sizeof(MPI_Group));
        int i;
        for (i = 0; i < numcomms; i++)
        {
            ranksel[0] = 4 * i;
            ranksel[1] = 4 * i + 1;
            ranksel[2] = 4 * i + 2;
            ranksel[3] = 4 * i + 3;
            MPI_Group_incl(world_group, 4, ranksel, &gamesel[i]);
            MPI_Comm_create(MPI_COMM_WORLD, gamesel[i], &games[i]);
        }
    }
    else
    {
        // n < 4 so use MPI_COMM_WORLD
        games = (MPI_Comm *) malloc(1 * sizeof(MPI_Comm));
        games[0] = MPI_COMM_WORLD;
        numcomms = 1;
    }

    // create an MPI type so that we can use our player data struct in MPI communication calls
    const int nitems = 5;
    int blocklengths[5] = {4, 1, 1, 1, 1};
    MPI_Datatype types[5] = {MPI_LONG_LONG, MPI_INT, MPI_CHAR, MPI_CHAR, MPI_CHAR};
    MPI_Datatype MPI_MONO_DATA;
    MPI_Aint offsets[5];
    offsets[0] = offsetof(playerdata, money);
    offsets[1] = offsetof(playerdata, pvalue);
    offsets[2] = offsetof(playerdata, plocation);
    offsets[3] = offsetof(playerdata, order);
    offsets[4] = offsetof(playerdata, trade);
    MPI_Type_create_struct(nitems, blocklengths, offsets, types, &MPI_MONO_DATA);
    MPI_Type_commit(&MPI_MONO_DATA);

    MPI_Comm_rank(games[globalrank / 4], &rank);

#ifdef DEBUG
    char fname[10];
    snprintf(fname, 10, "mon%d.dbg", globalrank);
    output[globalrank] = fopen(fname, "w");
    fprintf(output[globalrank], "MAIN begin loop\n");
    print_board_info(board);
#endif
    // run the game for 40000 turns (10000 per player)
    while (itr > 0)
    {
        itr--;
        pvalue = 0;
        plocation = 0;
        d.trade = 0;
        d.order = rank;
#ifdef DEBUG
        fprintf(output[globalrank], "MAIN tag 1 rank %d\n", rank);
#endif
        move(players, board, rank, &pvalue, &plocation);
        d.pvalue = pvalue;
        d.plocation = plocation;
#ifdef DEBUG
        fprintf(output[globalrank], "using comm %d\n", globalrank / 4);
        if (games[globalrank / 4] != MPI_COMM_WORLD)
        {
            fprintf(output[globalrank], "COMM ERROR\n");
        }
#endif
        send_info(&d, players, board, rank, games[globalrank / 4], MPI_MONO_DATA);
#ifdef DEBUG
        fprintf(output[globalrank], "MAIN tag 3 rank %d\n", rank);
        print_board_info(board);
#endif
    }
    
#ifdef DEBUG
    fprintf(output[globalrank], "MAIN last tag rank %d\n", rank);
#endif
    // get results from each process
    gather_results(players, board, games, numcomms, globalrank);
    gettimeofday(&t2, NULL);
    if (globalrank == 0)
    {
        results(players, board);
    }


#ifdef DEBUG
    fclose(output[globalrank]);
#endif
    double exectime = (t2.tv_sec - t1.tv_sec) * 1000000 + ((t2.tv_usec - t1.tv_usec));
    if (globalrank == 0)
    {
        printf("Exec Time %lf\n", exectime);
    }
    MPI_Finalize();

    return 0;
}