Пример #1
0
int 
adios_read_flexpath_advance_step(ADIOS_FILE *adiosfile, int last, float timeout_sec) 
{
    flexpath_reader_file *fp = (flexpath_reader_file*)adiosfile->fh;
    MPI_Barrier(fp->comm);
    int count = 0; // for perf measurements
    send_flush_msg(fp, fp->writer_coordinator, STEP, 1);
    //put this on a timer, so to speak, for timeout_sec
    while(fp->mystep == fp->last_writer_step){
	if(fp->writer_finalized){
	    adios_errno = err_end_of_stream;
	    return err_end_of_stream;
	}
	CMsleep(fp_read_data->fp_cm, 1);
	send_flush_msg(fp, fp->writer_coordinator, STEP, 1);
    }

    double advclose_start = dgettimeofday();
    int i=0;    
    for(i=0; i<fp->num_bridges; i++) {
        if(fp->bridges[i].created && fp->bridges[i].opened) {
	    count++;
	    send_close_msg(fp, i);
	}
    }
    MPI_Barrier(fp->comm);

    double advclose_end = dgettimeofday();
    count = 0;
    adiosfile->current_step++;
    fp->mystep = adiosfile->current_step;

    
    double advopen_start = dgettimeofday();
    for(i=0; i<fp->num_bridges; i++){
	if(fp->bridges[i].created && !fp->bridges[i].opened){	    
	    send_open_msg(fp, i);
	    count++;
        }
    }   
    double advopen_end = dgettimeofday();
    // need to remove selectors from each var now.
    send_flush_msg(fp, fp->writer_coordinator, DATA, 1);
      
    // should only happen if there are more steps available.
    // writer should have advanced.
    double offset_start = dgettimeofday();
    send_flush_msg(fp, fp->writer_coordinator, EVGROUP, 1);
    double offset_end = dgettimeofday();    
    return 0;
}
Пример #2
0
int adios_read_flexpath_perform_reads(const ADIOS_FILE *adiosfile, int blocking)
{
    fp_log("FUNC", "entering perform_reads.\n");
    flexpath_reader_file * fp = (flexpath_reader_file*)adiosfile->fh;
    fp->data_read = 0;
    int i,j;
    int num_sendees = fp->num_sendees;
    int total_sent = 0;
    fp->time_in = 0.00;
    double start_poll = MPI_Wtime();
    for(i = 0; i<num_sendees; i++){
	pthread_mutex_lock(&fp->data_mutex);
	int sendee = fp->sendees[i];	
	fp->pending_requests++;
	total_sent++;
	send_flush_msg(fp, sendee, DATA, 0);

	if((total_sent % FP_BATCH_SIZE == 0) || (total_sent = num_sendees)){
	    pthread_cond_wait(&fp->data_condition, &fp->data_mutex);
	    pthread_mutex_unlock(&fp->data_mutex);
	    fp->completed_requests = 0;
	    fp->pending_requests = 0;
	    total_sent = 0;
	}

    }
    double end_poll = MPI_Wtime();

    free(fp->sendees);
    fp->sendees = NULL;    
    fp->num_sendees = 0;
    fp_log("FUNC", "leaving perform_reads.\n");
    return 0;
}
Пример #3
0
static inline int check_and_switch_target_state(MPIR_Win * win_ptr, MPIDI_RMA_Target_t * target,
                                                int *is_able_to_issue, int *made_progress)
{
    int rank = win_ptr->comm_ptr->rank;
    int mpi_errno = MPI_SUCCESS;

    (*made_progress) = 0;
    (*is_able_to_issue) = 0;

    if (target == NULL)
        goto fn_exit;

    /* When user event happens, move op in user pending list to network pending list */
    if (target->sync.sync_flag == MPIDI_RMA_SYNC_FLUSH ||
        target->sync.sync_flag == MPIDI_RMA_SYNC_FLUSH_LOCAL ||
        target->sync.sync_flag == MPIDI_RMA_SYNC_UNLOCK || target->win_complete_flag) {

        MPIDI_RMA_Op_t *user_op = target->pending_user_ops_list_head;

        if (user_op != NULL) {
            if (target->pending_net_ops_list_head == NULL)
                win_ptr->num_targets_with_pending_net_ops++;

            DL_DELETE(target->pending_user_ops_list_head, user_op);
            DL_APPEND(target->pending_net_ops_list_head, user_op);

            if (target->next_op_to_issue == NULL)
                target->next_op_to_issue = user_op;
        }
    }

    switch (target->access_state) {
    case MPIDI_RMA_LOCK_CALLED:
        if (target->sync.sync_flag == MPIDI_RMA_SYNC_NONE ||
            target->sync.sync_flag == MPIDI_RMA_SYNC_FLUSH_LOCAL ||
            target->sync.sync_flag == MPIDI_RMA_SYNC_FLUSH) {
            if ((target->pending_net_ops_list_head == NULL ||
                 !target->pending_net_ops_list_head->piggyback_lock_candidate) &&
                (target->pending_user_ops_list_head == NULL ||
                 !target->pending_user_ops_list_head->piggyback_lock_candidate)) {
                /* issue lock request */
                target->access_state = MPIDI_RMA_LOCK_ISSUED;
                if (target->target_rank == rank) {
                    mpi_errno = acquire_local_lock(win_ptr, target->lock_type);
                    if (mpi_errno != MPI_SUCCESS)
                        MPIR_ERR_POP(mpi_errno);
                }
                else {
                    mpi_errno = send_lock_msg(target->target_rank, target->lock_type, win_ptr);
                    if (mpi_errno != MPI_SUCCESS)
                        MPIR_ERR_POP(mpi_errno);
                }

                (*made_progress) = 1;
            }
        }
        else if (target->sync.sync_flag == MPIDI_RMA_SYNC_UNLOCK) {
            if (target->pending_net_ops_list_head == NULL) {
                /* No RMA operation has ever been posted to this target,
                 * finish issuing, no need to acquire the lock. Cleanup
                 * function will clean it up. */
                target->access_state = MPIDI_RMA_LOCK_GRANTED;

                /* We are done with ending synchronization, unset target's sync_flag. */
                target->sync.sync_flag = MPIDI_RMA_SYNC_NONE;

                (*made_progress) = 1;
            }
            else {
                /* if we reach WIN_UNLOCK and there is still operation existing
                 * in pending list, this operation must be the only operation
                 * and it is prepared to piggyback LOCK and UNLOCK. */
                MPIR_Assert(MPIR_CVAR_CH3_RMA_DELAY_ISSUING_FOR_PIGGYBACKING);
                MPIR_Assert(target->pending_net_ops_list_head->next == NULL);
                MPIR_Assert(target->pending_net_ops_list_head->piggyback_lock_candidate);
            }
        }
        break;

    case MPIDI_RMA_LOCK_GRANTED:
    case MPIDI_RMA_NONE:
        if (target->win_complete_flag) {
            if (target->pending_net_ops_list_head == NULL) {
                MPIDI_CH3_Pkt_flags_t flags = MPIDI_CH3_PKT_FLAG_NONE;
                if (target->sync.sync_flag == MPIDI_RMA_SYNC_FLUSH &&
                    target->num_ops_flush_not_issued > 0) {
                    flags |= MPIDI_CH3_PKT_FLAG_RMA_FLUSH;
                    win_ptr->outstanding_acks++;
                    target->sync.outstanding_acks++;
                    target->num_ops_flush_not_issued = 0;
                }

                mpi_errno = send_decr_at_cnt_msg(target->target_rank, win_ptr, flags);
                if (mpi_errno != MPI_SUCCESS)
                    MPIR_ERR_POP(mpi_errno);

                /* We are done with ending synchronization, unset target's sync_flag. */
                target->sync.sync_flag = MPIDI_RMA_SYNC_NONE;

                (*made_progress) = 1;
            }
        }
        else if (target->sync.sync_flag == MPIDI_RMA_SYNC_FLUSH) {
            if (target->pending_net_ops_list_head == NULL) {
                if (target->target_rank != rank) {
                    if (target->num_ops_flush_not_issued > 0) {

                        win_ptr->outstanding_acks++;
                        target->sync.outstanding_acks++;
                        target->num_ops_flush_not_issued = 0;

                        mpi_errno = send_flush_msg(target->target_rank, win_ptr);
                        if (mpi_errno != MPI_SUCCESS)
                            MPIR_ERR_POP(mpi_errno);
                    }
                }

                /* We are done with ending synchronization, unset target's sync_flag. */
                target->sync.sync_flag = MPIDI_RMA_SYNC_NONE;

                (*made_progress) = 1;
            }
        }
        else if (target->sync.sync_flag == MPIDI_RMA_SYNC_UNLOCK) {
            if (target->pending_net_ops_list_head == NULL) {
                if (target->target_rank == rank) {
                    mpi_errno = MPIDI_CH3I_Release_lock(win_ptr);
                    if (mpi_errno != MPI_SUCCESS)
                        MPIR_ERR_POP(mpi_errno);
                }
                else {
                    MPIDI_CH3_Pkt_flags_t flag = MPIDI_CH3_PKT_FLAG_NONE;
                    if (target->num_ops_flush_not_issued == 0) {
                        flag = MPIDI_CH3_PKT_FLAG_RMA_UNLOCK_NO_ACK;
                    }
                    else {
                        win_ptr->outstanding_acks++;
                        target->sync.outstanding_acks++;
                        target->num_ops_flush_not_issued = 0;
                    }
                    mpi_errno = send_unlock_msg(target->target_rank, win_ptr, flag);
                    if (mpi_errno != MPI_SUCCESS)
                        MPIR_ERR_POP(mpi_errno);
                }

                /* We are done with ending synchronization, unset target's sync_flag. */
                target->sync.sync_flag = MPIDI_RMA_SYNC_NONE;

                (*made_progress) = 1;
            }
        }
        break;

    default:
        break;
    }   /* end of switch */

    if (target->access_state != MPIDI_RMA_LOCK_ISSUED) {
        (*is_able_to_issue) = 1;
    }

  fn_exit:
    return mpi_errno;
  fn_fail:
    goto fn_exit;
}
Пример #4
0
static inline int check_target_state(MPID_Win * win_ptr, MPIDI_RMA_Target_t * target,
                                     int *made_progress)
{
    int rank = win_ptr->comm_ptr->rank;
    int mpi_errno = MPI_SUCCESS;

    (*made_progress) = 0;

    if (target == NULL)
        goto fn_exit;

    /* This check should only be performed when window-wide sync is finished, or
     * current sync is per-target sync. */
    if (win_ptr->states.access_state == MPIDI_RMA_NONE ||
        win_ptr->states.access_state == MPIDI_RMA_FENCE_ISSUED ||
        win_ptr->states.access_state == MPIDI_RMA_PSCW_ISSUED ||
        win_ptr->states.access_state == MPIDI_RMA_LOCK_ALL_ISSUED) {
        goto fn_exit;
    }

    switch (target->access_state) {
    case MPIDI_RMA_LOCK_CALLED:
        if (target->sync.sync_flag == MPIDI_RMA_SYNC_NONE ||
            target->sync.sync_flag == MPIDI_RMA_SYNC_FLUSH_LOCAL ||
            target->sync.sync_flag == MPIDI_RMA_SYNC_FLUSH) {
            if (target->pending_op_list == NULL ||
                !target->pending_op_list->piggyback_lock_candidate) {
                /* issue lock request */
                target->access_state = MPIDI_RMA_LOCK_ISSUED;
                if (target->target_rank == rank) {
                    mpi_errno = acquire_local_lock(win_ptr, target->lock_type);
                    if (mpi_errno != MPI_SUCCESS)
                        MPIU_ERR_POP(mpi_errno);
                }
                else {
                    mpi_errno = send_lock_msg(target->target_rank, target->lock_type, win_ptr);
                    if (mpi_errno != MPI_SUCCESS)
                        MPIU_ERR_POP(mpi_errno);
                }

                (*made_progress) = 1;
            }
        }
        else if (target->sync.sync_flag == MPIDI_RMA_SYNC_UNLOCK) {
            if (target->pending_op_list == NULL) {
                /* No RMA operation has ever been posted to this target,
                 * finish issuing, no need to acquire the lock. Cleanup
                 * function will clean it up. */
                target->access_state = MPIDI_RMA_LOCK_GRANTED;

                target->sync.outstanding_acks--;
                MPIU_Assert(target->sync.outstanding_acks >= 0);

                /* We are done with ending synchronization, unset target's sync_flag. */
                target->sync.sync_flag = MPIDI_RMA_SYNC_NONE;

                (*made_progress) = 1;
            }
            else {
                /* if we reach WIN_UNLOCK and there is still operation existing
                 * in pending list, this operation must be the only operation
                 * and it is prepared to piggyback LOCK and UNLOCK. */
                MPIU_Assert(target->pending_op_list->next == NULL);
                MPIU_Assert(target->pending_op_list->piggyback_lock_candidate);
            }
        }
        break;

    case MPIDI_RMA_LOCK_GRANTED:
    case MPIDI_RMA_NONE:
        if (target->sync.sync_flag == MPIDI_RMA_SYNC_FLUSH) {
            if (target->pending_op_list == NULL) {
                if (target->target_rank == rank) {
                    target->sync.outstanding_acks--;
                    MPIU_Assert(target->sync.outstanding_acks >= 0);
                }
                else {
                    if (target->put_acc_issued) {
                        mpi_errno = send_flush_msg(target->target_rank, win_ptr);
                        if (mpi_errno != MPI_SUCCESS)
                            MPIU_ERR_POP(mpi_errno);
                    }
                    else {
                        /* We did not issue PUT/ACC since the last
                         * synchronization call, therefore here we
                         * don't need ACK back */
                        target->sync.outstanding_acks--;
                        MPIU_Assert(target->sync.outstanding_acks >= 0);
                    }
                }

                /* We are done with ending synchronization, unset target's sync_flag. */
                target->sync.sync_flag = MPIDI_RMA_SYNC_NONE;

                (*made_progress) = 1;
            }
        }
        else if (target->sync.sync_flag == MPIDI_RMA_SYNC_UNLOCK) {
            if (target->pending_op_list == NULL) {
                if (target->target_rank == rank) {
                    target->sync.outstanding_acks--;
                    MPIU_Assert(target->sync.outstanding_acks >= 0);

                    mpi_errno = MPIDI_CH3I_Release_lock(win_ptr);
                    if (mpi_errno != MPI_SUCCESS)
                        MPIU_ERR_POP(mpi_errno);
                }
                else {
                    MPIDI_CH3_Pkt_flags_t flag = MPIDI_CH3_PKT_FLAG_NONE;
                    if (!target->put_acc_issued) {
                        /* We did not issue PUT/ACC since the last
                         * synchronization call, therefore here we
                         * don't need ACK back */
                        target->sync.outstanding_acks--;
                        MPIU_Assert(target->sync.outstanding_acks >= 0);

                        flag = MPIDI_CH3_PKT_FLAG_RMA_UNLOCK_NO_ACK;
                    }
                    mpi_errno = send_unlock_msg(target->target_rank, win_ptr, flag);
                    if (mpi_errno != MPI_SUCCESS)
                        MPIU_ERR_POP(mpi_errno);
                }

                /* We are done with ending synchronization, unset target's sync_flag. */
                target->sync.sync_flag = MPIDI_RMA_SYNC_NONE;

                (*made_progress) = 1;
            }
        }
        break;

    default:
        break;
    }   /* end of switch */

  fn_exit:
    return mpi_errno;
  fn_fail:
    goto fn_exit;
}
Пример #5
0
/*
 * Sets up local data structure for series of reads on an adios file
 * - create evpath graph and structures
 * -- create evpath control stone (outgoing)
 * -- create evpath data stone (incoming)
 * -- rank 0 dumps contact info to file
 * -- create connections using contact info from file
 */
ADIOS_FILE*
adios_read_flexpath_open(const char * fname,
			 MPI_Comm comm,
			 enum ADIOS_LOCKMODE lock_mode,
			 float timeout_sec)
{
    fp_log("FUNC", "entering flexpath_open\n");
    ADIOS_FILE *adiosfile = malloc(sizeof(ADIOS_FILE));        
    if(!adiosfile){
	adios_error (err_no_memory, 
		     "Cannot allocate memory for file info.\n");
	return NULL;
    }    
    
    flexpath_reader_file *fp = new_flexpath_reader_file(fname);
	
    adios_errno = 0;
    fp->stone = EValloc_stone(fp_read_data->fp_cm);	
    fp->comm = comm;

    MPI_Comm_size(fp->comm, &(fp->size));
    MPI_Comm_rank(fp->comm, &(fp->rank));

    EVassoc_terminal_action(fp_read_data->fp_cm,
			    fp->stone,
			    op_format_list,
			    op_msg_handler,
			    adiosfile);       

    EVassoc_terminal_action(fp_read_data->fp_cm,
			    fp->stone,
			    update_step_msg_format_list,
			    update_step_msg_handler,
			    adiosfile);       


    EVassoc_terminal_action(fp_read_data->fp_cm,
			    fp->stone,
			    evgroup_format_list,
			    group_msg_handler,
			    adiosfile);

    EVassoc_raw_terminal_action(fp_read_data->fp_cm,
				fp->stone,
				raw_handler,
				adiosfile);

    /* Gather the contact info from the other readers
       and write it to a file. Create a ready file so
       that the writer knows it can parse this file. */
    double setup_start = dgettimeofday();

    char writer_ready_filename[200];
    char writer_info_filename[200];
    char reader_ready_filename[200];
    char reader_info_filename[200];
	
    sprintf(reader_ready_filename, "%s_%s", fname, READER_READY_FILE);
    sprintf(reader_info_filename, "%s_%s", fname, READER_CONTACT_FILE);
    sprintf(writer_ready_filename, "%s_%s", fname, WRITER_READY_FILE);
    sprintf(writer_info_filename, "%s_%s", fname, WRITER_CONTACT_FILE);
	
    char *string_list;
    char data_contact_info[CONTACT_LENGTH];
    string_list = attr_list_to_string(CMget_contact_list(fp_read_data->fp_cm));
    sprintf(&data_contact_info[0], "%d:%s", fp->stone, string_list);
    free(string_list);

    char * recvbuf;
    if(fp->rank == 0){	
	recvbuf = (char*)malloc(sizeof(char)*CONTACT_LENGTH*(fp->size));
    }

    MPI_Gather(data_contact_info, CONTACT_LENGTH, MPI_CHAR, recvbuf,
	       CONTACT_LENGTH, MPI_CHAR, 0, fp->comm);

    if(fp->rank == 0){	
	// print our own contact information
	FILE * fp_out = fopen(reader_info_filename, "w");
	int i;
	if(!fp_out){	    
	    adios_error(err_file_open_error,
			"File for contact info could not be opened for writing.\n");
	    exit(1);
	}
	for(i=0; i<fp->size; i++) {
	    fprintf(fp_out,"%s\n", &recvbuf[i*CONTACT_LENGTH]);
	}
	fclose(fp_out);
	free(recvbuf);
	
	FILE * read_ready = fopen(reader_ready_filename, "w");
	fprintf(read_ready, "ready");
	fclose(read_ready);
    }
    MPI_Barrier(fp->comm);

    FILE * fp_in = fopen(writer_ready_filename,"r");
    while(!fp_in) {
	//CMsleep(fp_read_data->fp_cm, 1);
	fp_in = fopen(writer_ready_filename, "r");
    }
    fclose(fp_in);

    fp_in = fopen(writer_info_filename, "r");
    while(!fp_in){
	//CMsleep(fp_read_data->fp_cm, 1);
	fp_in = fopen(writer_info_filename, "r");
    }

    char in_contact[CONTACT_LENGTH] = "";
    //fp->bridges = malloc(sizeof(bridge_info));
    int num_bridges = 0;
    int their_stone;

    // change to read all numbers, dont create stones, turn bridge array into linked list
    while(fscanf(fp_in, "%d:%s", &their_stone, in_contact) != EOF){	
	//fprintf(stderr, "writer contact: %d:%s\n", their_stone, in_contact);
	fp->bridges = realloc(fp->bridges,
					  sizeof(bridge_info) * (num_bridges+1));
	fp->bridges[num_bridges].their_num = their_stone;
	fp->bridges[num_bridges].contact = strdup(in_contact);
	fp->bridges[num_bridges].created = 0;
	fp->bridges[num_bridges].step = 0;
	fp->bridges[num_bridges].opened = 0;
	fp->bridges[num_bridges].scheduled = 0;
	num_bridges++;
    }
    fclose(fp_in);
    fp->num_bridges = num_bridges;
    // clean up of writer's files
    MPI_Barrier(fp->comm);
    if(fp->rank == 0){
	unlink(writer_info_filename);
	unlink(writer_ready_filename);
    }	    

    adiosfile->fh = (uint64_t)fp;
    adiosfile->current_step = 0;
    
    /* Init with a writer to get initial scalar
       data so we can handle inq_var calls and
       also populate the ADIOS_FILE struct. */

    double bridge_start = MPI_Wtime();
    if(fp->size < num_bridges){
    	int mystart = (num_bridges/fp->size) * fp->rank;
    	int myend = (num_bridges/fp->size) * (fp->rank+1);
    	fp->writer_coordinator = mystart;
    	int z;
    	for(z=mystart; z<myend; z++){
    	    build_bridge(&fp->bridges[z]);
    	}
    }
    else{
	int writer_rank = fp->rank % num_bridges;
	build_bridge(&fp->bridges[writer_rank]);
	fp->writer_coordinator = writer_rank;
    }

    // requesting initial data.
    send_open_msg(fp, fp->writer_coordinator);
    

    fp->data_read = 0;
    send_flush_msg(fp, fp->writer_coordinator, DATA, 1);

    send_flush_msg(fp, fp->writer_coordinator, EVGROUP, 1);
    fp->data_read = 0;
    // this has to change. Writer needs to have some way of
    // taking the attributes out of the xml document
    // and sending them over ffs encoded. Not yet implemented.
    // the rest of this info for adiosfile gets filled in raw_handler.
    adiosfile->nattrs = 0;
    adiosfile->attr_namelist = NULL;
    // first step is at least one, otherwise raw_handler will not execute.
    // in reality, writer might be further along, so we might have to make
    // the writer explitly send across messages each time it calls close, to
    // indicate which timesteps are available. 
    adiosfile->last_step = 1;
    adiosfile->path = strdup(fname);
    // verifies these two fields. It's not BP, so no BP version.
    // It's a stream, so how can the file size be known?
    adiosfile->version = -1;
    adiosfile->file_size = 0;
    adios_errno = err_no_error;        
    fp_log("FUNC", "leaving flexpath_open\n");
    return adiosfile;
}