Пример #1
0
/* Check the environment for label control
   If either prefix is set, the labels are enabled.  If only one prefix
   is set, the default prefix is used for the other label */
int IOLabelCheckEnv( void )
{
    char *envval;
    envval = getenv( "MPIEXEC_PREFIX_STDOUT" );
    if (envval) {
	if (strlen(envval) < MAX_LABEL) {
	    MPIU_Strncpy( outLabelPattern, envval, MAX_LABEL );
	    useLabels = 1;
	}
	else {
	    MPIU_Error_printf( "Pattern for stdout label specified by MPIEXEC_PREFIX_STROUT is too long" );
	}
    }
    envval = getenv( "MPIEXEC_PREFIX_STDERR" );
    if (envval) {
	if (strlen(envval) < MAX_LABEL) {
	    MPIU_Strncpy( errLabelPattern, envval, MAX_LABEL );
	    useLabels = 1;
	}
	else {
	    MPIU_Error_printf( "Pattern for stderr label specified by MPIEXEC_PREFIX_STRERR is too long" );
	}
    }

    return 0;
}
Пример #2
0
IRLOG_IOStruct *IRLOG_CreateOutputStruct(const char *filename)
{
    IRLOG_IOStruct *pOutput = NULL;

    /* allocate a data structure */
    pOutput = (IRLOG_IOStruct*)MPIU_Malloc(sizeof(IRLOG_IOStruct));
    if (pOutput == NULL)
    {
	MPIU_Error_printf("malloc failed - %s\n", strerror(errno));
	return NULL;
    }

    /* open the output clog file */
    pOutput->f = fopen(filename, "wb");
    if (pOutput->f == NULL)
    {
	MPIU_Error_printf("Unable to open output file '%s' - %s\n", filename, strerror(errno));
	MPIU_Free(pOutput);
	return NULL;
    }

    /* set all the data fields */
    pOutput->header.type = RLOG_INVALID_TYPE;
    pOutput->pCurHeader = pOutput->buffer;
    pOutput->pNextHeader = pOutput->buffer;
    pOutput->pEnd = &pOutput->buffer[RLOG_BUFFSIZE];

    return pOutput;
}
Пример #3
0
int MPIDI_CH3_Abort(int exit_code, char *error_msg)
{
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_ABORT);
    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_ABORT);

    /* print backtrace */
    if (show_backtrace) print_backtrace();
    
    PMI_Abort(exit_code, error_msg);

    /* if abort returns for some reason, exit here */

    MPIU_Error_printf("%s", error_msg);
    fflush(stderr);

    exit(exit_code);
#if defined(__SUNPRO_C) || defined(__SUNPRO_CC)
#pragma error_messages(off, E_STATEMENT_NOT_REACHED)
#endif /* defined(__SUNPRO_C) || defined(__SUNPRO_CC) */
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_ABORT);
    return MPI_ERR_INTERN;
#if defined(__SUNPRO_C) || defined(__SUNPRO_CC)
#pragma error_messages(default, E_STATEMENT_NOT_REACHED)
#endif /* defined(__SUNPRO_C) || defined(__SUNPRO_CC) */
}
Пример #4
0
IRLOG_IOStruct *IRLOG_CreateInputStruct(const char *filename)
{
    int num_read;
    IRLOG_IOStruct *pInput;

    /* allocate an input structure */
    pInput = (IRLOG_IOStruct*)MPIU_Malloc(sizeof(IRLOG_IOStruct));
    if (pInput == NULL)
    {
	MPIU_Error_printf("malloc failed - %s\n", strerror(errno));
	return NULL;
    }
    /* open the input clog file */
    pInput->f = fopen(filename, "rb");
    if (pInput->f == NULL)
    {
	MPIU_Error_printf("fopen(%s) failed, error: %s\n", filename, strerror(errno));
	MPIU_Free(pInput);
	return NULL;
    }
    /* read some data */
    num_read = (int)fread(pInput->buffer, 1, RLOG_BUFFSIZE, pInput->f);
    if (num_read == 0)
    {
	MPIU_Error_printf("Unable to read data from the input file.\n");
	fclose(pInput->f);
	MPIU_Free(pInput);
	return NULL;
    }
    /* set the data fields and get the first record */
    pInput->pCurHeader = pInput->buffer;
    pInput->pNextHeader = pInput->buffer;
    pInput->pEnd = pInput->buffer + num_read;
    if (IRLOG_GetNextRecord(pInput))
    {
	MPIU_Error_printf("Unable to get the first record from the file.\n");
	fclose(pInput->f);
	MPIU_Free(pInput);
	return NULL;
    }
    return pInput;
}
Пример #5
0
/**
 * Check the ibv_port_attr and ibv_device_attr.
 */
static int check_attrs( struct ibv_port_attr *port_attr, struct ibv_device_attr *dev_attr)
{
    int ret = 0;
#ifdef _ENABLE_XRC_
    if (USE_XRC && !(dev_attr->device_cap_flags & IBV_DEVICE_XRC)) {
        fprintf (stderr, "HCA does not support XRC. Disable MV2_USE_XRC.\n");
        ret = 1;
    }
#endif /* _ENABLE_XRC_ */
    if(port_attr->active_mtu < rdma_default_mtu) {
    	MPIU_Error_printf( "Active MTU is %d, MV2_DEFAULT_MTU set to %d. See User Guide\n",
                port_attr->active_mtu, rdma_default_mtu);
        ret = 1;
    }

    if(dev_attr->max_qp_rd_atom < rdma_default_qp_ous_rd_atom) {
    	MPIU_Error_printf( "Max MV2_DEFAULT_QP_OUS_RD_ATOM is %d, set to %d\n",
                dev_attr->max_qp_rd_atom, rdma_default_qp_ous_rd_atom);
        ret = 1;
    }

    if(process_info.has_srq) {
        if(dev_attr->max_srq_sge < rdma_default_max_sg_list) {
        	MPIU_Error_printf( "Max MV2_DEFAULT_MAX_SG_LIST is %d, set to %d\n",
                    dev_attr->max_srq_sge, rdma_default_max_sg_list);
            ret = 1;
        }

        if(dev_attr->max_srq_wr < viadev_srq_alloc_size) {
        	MPIU_Error_printf( "Max MV2_SRQ_SIZE is %d, set to %d\n",
                    dev_attr->max_srq_wr, (int) viadev_srq_alloc_size);
            ret = 1;
        }
    } else {
        if(dev_attr->max_sge < rdma_default_max_sg_list) {
        	MPIU_Error_printf( "Max MV2_DEFAULT_MAX_SG_LIST is %d, set to %d\n",
                    dev_attr->max_sge, rdma_default_max_sg_list);
            ret = 1;
        }

        if(dev_attr->max_qp_wr < rdma_default_max_send_wqe) {
        	MPIU_Error_printf( "Max MV2_DEFAULT_MAX_SEND_WQE is %d, set to %d\n",
                    dev_attr->max_qp_wr, (int) rdma_default_max_send_wqe);
            ret = 1;
        }
    }
    if(dev_attr->max_cqe < rdma_default_max_cq_size) {
    	MPIU_Error_printf( "Max MV2_DEFAULT_MAX_CQ_SIZE is %d, set to %d\n",
                dev_attr->max_cqe, (int) rdma_default_max_cq_size);
        ret = 1;
    }

    return ret;
}
Пример #6
0
void AppendFile(FILE *fout, FILE *fin)
{
    int total;
    int num_read, num_written;
    char *buffer, *buf;

    buffer = (char*)MPIU_Malloc(sizeof(char) * BUFFER_SIZE);

    total = ftell(fin);
    fseek(fin, 0L, SEEK_SET);

    while (total)
    {
	num_read = fread(buffer, 1, min(BUFFER_SIZE, total), fin);
	if (num_read == 0)
	{
	    MPIU_Error_printf("failed to read from input file\n");
	    return;
	}
	total -= num_read;
	buf = buffer;
	while (num_read)
	{
	    num_written = fwrite(buf, 1, num_read, fout);
	    if (num_written == 0)
	    {
		MPIU_Error_printf("failed to write to output file\n");
		return;
	    }
	    num_read -= num_written;
	    buf += num_written;
	}
    }

    MPIU_Free(buffer);
}
Пример #7
0
static int WriteFileData(const char *pBuffer, int length, FILE *fout)
{
    int num_written;

    while (length)
    {
	num_written = (int)fwrite(pBuffer, 1, length, fout);
	if (num_written == -1)
	{
	    MPIU_Error_printf("Error: fwrite failed - %s\n", strerror(errno));
	    return errno;
	}

	/*printf("fwrite(%d)", num_written);fflush(stdout);*/

	length -= num_written;
	pBuffer += num_written;
    }
    return 0;
}
Пример #8
0
static int ReadFileData(char *pBuffer, int length, FILE *fin)
{
    int num_read;

    while (length)
    {
	num_read = (int)fread(pBuffer, 1, length, fin);
	if (num_read == -1)
	{
	    MPIU_Error_printf("Error: fread failed - %s\n", strerror(errno));
	    return errno;
	}

	/*printf("fread(%d)", num_read);fflush(stdout);*/

	length -= num_read;
	pBuffer += num_read;
    }
    return 0;
}
Пример #9
0
int main(int argc, char *argv[])
{
    RLOG_FILE_HEADER header;
    RLOG_State_list *pState;

    int nNumInputs;
    IRLOG_IOStruct *pInput;
    IRLOG_IOStruct **ppInput;

    int nMaxRank = 0;
    int nMinRank = MAX_RANK;
    int nNumStates = 0;
    int type, length;
    int nMaxLevel = 0;
    int nNumLevels = 0;
    int nTotalNumEvents = 0;
    int nNumEvents;
    RecursionStruct *pLevel;
    FILE *fout;
    int i, j;
    int nRank;

    if (argc < 3)
    {
	MPIU_Error_printf("Usage:\nirlog2rlog out.rlog in0.irlog in1.irlog ...\nirlog2rlog out.rlog n\n");
	return 0;
    }

    if (argc == 3 && IsNumber(argv[2]))
    {
	GenerateNewArgv(&argc, &argv, atoi(argv[2]));
    }

    nNumInputs = argc - 2;

    /* open the output rlog file */
    fout = fopen(argv[1], "wb");
    if (fout == NULL)
    {
	MPIU_Error_printf("unable to open output file '%s'\n", argv[1]);
	return -1;
    }

    /* read the arrows from all the files in order */
    ppInput = (IRLOG_IOStruct**)MPIU_Malloc(nNumInputs * sizeof(IRLOG_IOStruct*));
    for (i=0; i<nNumInputs; i++)
    {
	ppInput[i] = IRLOG_CreateInputStruct(argv[i+2]);
	if (ppInput[i] == NULL)
	{
	    MPIU_Error_printf("Unable to create an input structure for '%s', skipping\n", argv[i+2]);
	}
    }
    for (i=0; i<nNumInputs; i++)
    {
	if (ppInput[i] == NULL)
	{
	    for (j=i; j<nNumInputs-1; j++)
		ppInput[j] = ppInput[j+1];
	    nNumInputs--;
	    i--;
	}
    }
    MPIU_Msg_printf("reading the arrows from all the input files.\n");fflush(stdout);
    ReadAllArrows(ppInput, nNumInputs);

    nNumInputs = argc - 2;

    /* read, parse and save all the data from the irlog files */
    for (i=0; i<nNumInputs; i++)
    {
	pInput = IRLOG_CreateInputStruct(argv[i+2]);
	if (pInput == NULL)
	{
	    MPIU_Error_printf("Unable to create an input structure for '%s', skipping\n", argv[i+2]);
	}
	else
	{
	    MPIU_Msg_printf("reading irlog file: %s\n", argv[i+2]);fflush(stdout);
	    for(;;)
	    {
		switch (pInput->header.type)
		{
		case RLOG_STATE_TYPE:
		    SaveState(&pInput->record.state);
		    break;
		case RLOG_COMM_TYPE:
		    nMaxRank = (pInput->record.comm.rank > nMaxRank) ? pInput->record.comm.rank : nMaxRank;
		    nMinRank = (pInput->record.comm.rank < nMinRank) ? pInput->record.comm.rank : nMinRank;
		    break;
		case RLOG_IARROW_TYPE:
		    /* SaveArrow(&pInput->record.iarrow); */
		    break;
		case RLOG_EVENT_TYPE:
		    SaveEvent(&pInput->record.event);
		    break;
		default:
		    MPIU_Error_printf("Unknown irlog record type: %d\n", pInput->header.type);
		    break;
		}
		
		if (IRLOG_GetNextRecord(pInput))
		{
		    IRLOG_CloseInputStruct(&pInput);
		    break;
		}
	    }
	}
    }

    /* set the fields in the header */
    header.nMinRank = FindMinRank(g_pLevel);
    header.nMaxRank = FindMaxRank(g_pLevel);
    if (nMinRank != header.nMinRank)
	MPIU_Error_printf("minimum event rank %d does not equal the minimum comm record rank %d\n", header.nMinRank, nMinRank);
    if (nMaxRank != header.nMaxRank)
	MPIU_Error_printf("maximum event rank %d does not equal the maximum comm record rank %d\n", header.nMaxRank, nMaxRank);

    /* write header */
    MPIU_Msg_printf("writing header.\n");fflush(stdout);
    type = RLOG_HEADER_SECTION;
    length = sizeof(RLOG_FILE_HEADER);
    /* fwrite(&type, sizeof(int), 1, fout); */
    WriteFileData(&type, sizeof(int), fout);
    /* fwrite(&length, sizeof(int), 1, fout);*/
    WriteFileData(&length, sizeof(int), fout);
    /* fwrite(&header, sizeof(RLOG_FILE_HEADER), 1, fout); */
    WriteFileData(&header, sizeof(RLOG_FILE_HEADER), fout);

    /* write states */
    if (g_pList)
    {
	MPIU_Msg_printf("writing states.\n");fflush(stdout);
    }
    pState = g_pList;
    while (pState)
    {
	nNumStates++;
	pState = pState->next;
    }
    type = RLOG_STATE_SECTION;
    length = nNumStates * sizeof(RLOG_STATE);
    /* fwrite(&type, sizeof(int), 1, fout); */
    WriteFileData(&type, sizeof(int), fout);
    /* fwrite(&length, sizeof(int), 1, fout); */
    WriteFileData(&length, sizeof(int), fout);
    pState = g_pList;
    while (pState)
    {
	/* fwrite(pState, sizeof(RLOG_STATE), 1, fout); */
	WriteFileData(pState, sizeof(RLOG_STATE), fout);
	pState = pState->next;
    }

    /* write arrows */
    if (g_fArrow)
    {
	MPIU_Msg_printf("writing arrows.\n");fflush(stdout);
	type = RLOG_ARROW_SECTION;
	length = ftell(g_fArrow);
	/* fwrite(&type, sizeof(int), 1, fout); */
	WriteFileData(&type, sizeof(int), fout);
	/* fwrite(&length, sizeof(int), 1, fout); */
	WriteFileData(&length, sizeof(int), fout);
	AppendFile(fout, g_fArrow);
    }

    /* write events */
    while (g_pLevel)
    {
	pLevel = FindMinLevel(g_pLevel);
	nNumLevels = FindMaxRecursion(g_pLevel, pLevel->rank);
	nRank = pLevel->rank;

	/* count the events for this rank */
	nNumEvents = 0;
	for (i=0; i<nNumLevels; i++)
	{
	    pLevel = GetLevel(pLevel->rank, i);
	    nNumEvents += pLevel->num_events;
	}
	/* write an event section for this rank */
	type = RLOG_EVENT_SECTION;
	length = sizeof(int) + sizeof(int) + (nNumLevels * sizeof(int)) + (nNumEvents * sizeof(RLOG_EVENT));
	/* fwrite(&type, sizeof(int), 1, fout); */
	WriteFileData(&type, sizeof(int), fout);
	/* fwrite(&length, sizeof(int), 1, fout); */
	WriteFileData(&length, sizeof(int), fout);
        /* fwrite(&nRank, sizeof(int), 1, fout); */
	WriteFileData(&nRank, sizeof(int), fout);
	/* fwrite(&nNumLevels, sizeof(int), 1, fout); */
	WriteFileData(&nNumLevels, sizeof(int), fout);
	for (i=0; i<nNumLevels; i++)
	{
	    pLevel = GetLevel(nRank, i);
	    /* fwrite(&pLevel->num_events, sizeof(int), 1, fout); */
	    WriteFileData(&pLevel->num_events, sizeof(int), fout);
	}
	for (i=0; i<nNumLevels; i++)
	{
	    MPIU_Msg_printf("writing event level %d:%d\n", nRank, i);fflush(stdout);
	    pLevel = GetLevel(nRank, i);
	    AppendFile(fout, pLevel->fout);
	}
	/* remove this rank from the list of levels */
	RemoveLevel(nRank);
    }

    /* free resources */
    while (g_pList)
    {
	pState = g_pList;
	g_pList = g_pList->next;
	MPIU_Free(pState);
    }
    if (g_fArrow)
    {
	fclose(g_fArrow);
	unlink(g_pszArrowFilename);
    }

    if (s_bFreeArgv)
	MPIU_Free(argv);

    return 0;
}
Пример #10
0
int IRLOG_GetNextRecord(IRLOG_IOStruct *pInput)
{
    int num_valid, num_read;

    pInput->pCurHeader = pInput->pNextHeader;

    if (pInput->pEnd - pInput->pCurHeader < sizeof(RLOG_HEADER))
    {
	num_valid = (int)(pInput->pEnd - pInput->pCurHeader);
	if (pInput->pCurHeader != pInput->buffer)
	    memcpy(pInput->buffer, pInput->pCurHeader, num_valid);
	ReadFileData(pInput->buffer + num_valid, sizeof(RLOG_HEADER) - num_valid, pInput->f);
	pInput->pCurHeader = pInput->buffer;
	pInput->pNextHeader = pInput->buffer;
	pInput->pEnd = pInput->buffer + sizeof(RLOG_HEADER);
    }

    /* copy the current header into a temporary variable so the bytes can be manipulated */
    memcpy(&pInput->header, pInput->pCurHeader, sizeof(RLOG_HEADER));
    /*
    CLOGByteSwapDouble(&(header.timestamp), 1);
    CLOGByteSwapInt(&(header.rectype), 1);
    CLOGByteSwapInt(&(header.length), 1);
    */

    while (pInput->pCurHeader + pInput->header.length > pInput->pEnd)
    {
	num_valid = (int)(pInput->pEnd - pInput->pCurHeader);
	if (pInput->pCurHeader != pInput->buffer)
	    memcpy(pInput->buffer, pInput->pCurHeader, num_valid);
	num_read = (int)fread(pInput->buffer + num_valid, 1, RLOG_BUFFSIZE - num_valid, pInput->f);
	if (num_read == 0)
	{
	    MPIU_Error_printf("RLOG Error: unable to get the next record.\n");
	    return 1;
	}
	pInput->pEnd = pInput->buffer + num_valid + num_read;
	pInput->pCurHeader = pInput->buffer;
    }

    pInput->pNextHeader = pInput->pCurHeader + pInput->header.length;

    switch (pInput->header.type)
    {
    case RLOG_INVALID_TYPE:
	MPIU_Error_printf("RLOG Error: invalid record type.\n");
	return 1;
	break;
    case RLOG_ENDLOG_TYPE:
	return 1;
	break;
    case RLOG_EVENT_TYPE:
	memcpy(&pInput->record.event, pInput->pCurHeader + sizeof(RLOG_HEADER), sizeof(RLOG_EVENT));
	break;
    case RLOG_IARROW_TYPE:
	memcpy(&pInput->record.iarrow, pInput->pCurHeader + sizeof(RLOG_HEADER), sizeof(RLOG_IARROW));
	break;
    case RLOG_STATE_TYPE:
	memcpy(&pInput->record.state, pInput->pCurHeader + sizeof(RLOG_HEADER), sizeof(RLOG_STATE));
	break;
    case RLOG_COMM_TYPE:
	memcpy(&pInput->record.comm, pInput->pCurHeader + sizeof(RLOG_HEADER), sizeof(RLOG_COMM));
	break;
    default:
	MPIU_Error_printf("RLOG Error: unknown record type %d.\n", pInput->header.type);
	return 1;
	break;
    }

    return 0;
}
Пример #11
0
/**
 *  FIXME: Ideally the header size should be determined by high level macros,
 * instead of hacking the message header at the device layer
 */
int MPIDI_CH3I_nem_ib_parse_header(MPIDI_VC_t * vc,
                                  vbuf * v, void **pkt, int *header_size)
{
    void *vstart;
    MPIDI_nem_ib_pkt_comm_header *header;
#ifdef CRC_CHECK
    unsigned long crc;
#endif
    int mpi_errno = MPI_SUCCESS;
    int ret;
    DEBUG_PRINT("[parse header] vbuf address %p\n", v);
    vstart = v->pheader;
    header = (MPIDI_nem_ib_pkt_comm_header *)v->iheader;
    DEBUG_PRINT("[parse header] header type %d\n", header->type);

    /* set it to the header size by default */
    *header_size = sizeof(MPIDI_CH3_Pkt_t);
#ifdef CRC_CHECK
    crc = update_crc(1, (void *)((uintptr_t)header+sizeof *header),
                     v->content_size - sizeof *header);
    if (crc != header->mrail.crc) {
        int rank; PMI_Get_rank(&rank);
        MPIU_Error_printf(stderr, "CRC mismatch, get %lx, should be %lx "
                "type %d, ocntent size %d\n",
                crc, header->mrail.crc, header->type, v->content_size);
        exit( EXIT_FAILURE );
    }
#endif
    switch (header->type) {
/*header caching codes */
#ifndef MV2_DISABLE_HEADER_CACHING 
    case (MPIDI_CH3_PKT_FAST_EAGER_SEND):
    case (MPIDI_CH3_PKT_FAST_EAGER_SEND_WITH_REQ):
        {
            /* since header caching do not have regular iheader,
             * revert back pre-adjust
             */
            v->content_size += IB_PKT_HEADER_LENGTH;
            vstart -= IB_PKT_HEADER_LENGTH;
            v->pheader -= IB_PKT_HEADER_LENGTH;
            MPIDI_nem_ib_pkt_fast_eager *fast_header = vstart;
            MPIDI_CH3_Pkt_eager_send_t *eager_header =
                (MPIDI_CH3_Pkt_eager_send_t *) VC_FIELD(vc, connection)->rfp.
                cached_incoming;
            MPIDI_nem_ib_pkt_comm_header *eager_iheader =
                (MPIDI_nem_ib_pkt_comm_header *) VC_FIELD(vc, connection)->rfp.
                cached_incoming_iheader;

            if (MPIDI_CH3_PKT_FAST_EAGER_SEND == header->type) {
                *header_size = sizeof(MPIDI_nem_ib_pkt_fast_eager);
            } else {
                *header_size =
                    sizeof(MPIDI_nem_ib_pkt_fast_eager_with_req);
                eager_header->sender_req_id =
                    ((MPIDI_nem_ib_pkt_fast_eager_with_req *)
                     vstart)->sender_req_id;
            }
            header = eager_iheader;

            DEBUG_PRINT("[receiver side] cached credit %d\n",
                        eager_iheader->rdma_credit);

            eager_header->data_sz = fast_header->bytes_in_pkt;

            *pkt = (void *) eager_header;
            DEBUG_PRINT
                ("[recv: parse header] faster headersize returned %d\n",
                 *header_size);
        }
        break;
#endif
    case (MPIDI_CH3_PKT_EAGER_SEND):
        {
            DEBUG_PRINT("[recv: parse header] pkt eager send\n");
/* header caching codes */
#ifndef MV2_DISABLE_HEADER_CACHING 
            if (v->padding != NORMAL_VBUF_FLAG && 
                    (v->content_size - sizeof(MPIDI_CH3_Pkt_t) <= MAX_SIZE_WITH_HEADER_CACHING) ) {
                /* Only cache header if the packet is from RdMA path
                 * XXXX: what is R3_FLAG?
                 */
                MPIU_Memcpy((VC_FIELD(vc, connection)->rfp.cached_incoming), vstart,
                       sizeof(MPIDI_CH3_Pkt_eager_send_t));
                MPIU_Memcpy((VC_FIELD(vc, connection)->rfp.cached_incoming_iheader), header,
                    sizeof(MPIDI_nem_ib_pkt_comm_header));
            }
#endif
            *pkt = (MPIDI_CH3_Pkt_t *) vstart;
            *header_size = sizeof(MPIDI_CH3_Pkt_t);
            DEBUG_PRINT("[recv: parse header] headersize returned %d\n",
                        *header_size);
        }
        break;
    case (MPIDI_CH3_PKT_RNDV_REQ_TO_SEND):
    case (MPIDI_CH3_PKT_RNDV_CLR_TO_SEND):
    case MPIDI_CH3_PKT_EAGER_SYNC_ACK:
    case MPIDI_NEM_PKT_LMT_RTS:
    case MPIDI_NEM_PKT_LMT_CTS:
    case MPIDI_NEM_PKT_LMT_DONE:
    case MPIDI_NEM_PKT_LMT_COOKIE:
/* CKPT codes */
#ifdef CKPT
    case MPIDI_CH3_PKT_CM_SUSPEND:
    case MPIDI_CH3_PKT_CM_REACTIVATION_DONE:
    case MPIDI_CH3_PKT_CR_REMOTE_UPDATE:
#endif
        {
            *pkt = vstart;
        }
        break;
    case MPIDI_CH3_PKT_CANCEL_SEND_REQ:
        {
            *pkt = vstart;
            /*Fix: Need to unregister and free the rndv buffer in get protocol.*/
        }
        break;
    case MPIDI_CH3_PKT_CANCEL_SEND_RESP:
        {
            MPID_Request *req;
            *pkt = vstart;
            MPID_Request_get_ptr(((MPIDI_CH3_Pkt_cancel_send_resp_t *)(*pkt))->sender_req_id, req);
            if (req != NULL) {
              /* unregister and free the rndv buffer */
              MPIDI_NEM_IB_RREQ_RNDV_FINISH(req);
            }
        }
        break;
    case (MPIDI_CH3_PKT_NOOP):
        {
            *pkt = v->iheader;
        }
        break;
    /* rfp codes */
    case MPIDI_CH3_PKT_ADDRESS:
        {
            *pkt = v->iheader;
            MPIDI_nem_ib_recv_addr(vc, vstart);
            break;
        }
    case MPIDI_CH3_PKT_ADDRESS_REPLY:
        {
            *pkt = v->iheader;
            MPIDI_nem_ib_recv_addr_reply(vc, vstart);
            break;
        }
    case MPIDI_CH3_PKT_PACKETIZED_SEND_START:
        {
            *pkt = vstart;
            *header_size = sizeof(MPIDI_CH3_Pkt_packetized_send_start_t);
            break;
        }
    case MPIDI_CH3_PKT_PACKETIZED_SEND_DATA:
        {
            *header_size = sizeof(MPIDI_CH3_Pkt_packetized_send_data_t);
            *pkt = vstart;
            break;
        }
    case MPIDI_CH3_PKT_RNDV_R3_DATA:
        {
            *header_size = sizeof(MPIDI_CH3_Pkt_rndv_r3_data_t);
            *pkt = vstart;
            break;
        }
    case MPIDI_CH3_PKT_RNDV_R3_ACK:
        {
            *pkt = v->iheader;
            MPIDI_nem_ib_lmt_r3_recv_ack(vc, vstart);
            break;
        }
#if defined(USE_EAGER_SHORT)
    case MPIDI_CH3_PKT_EAGERSHORT_SEND:
#endif
    case MPIDI_CH3_PKT_EAGER_SYNC_SEND:
    case MPIDI_CH3_PKT_READY_SEND:
        {
            *pkt = vstart;
            break;
        }
    case MPIDI_CH3_PKT_PUT:
        {
            *pkt = vstart;
            break;
        }
    case MPIDI_CH3_PKT_GET:
        {
            *pkt = vstart;
            break;
        }
    case MPIDI_CH3_PKT_GET_RESP:       /*15 */
        {
            *pkt = vstart;
            break;
        }
    case MPIDI_CH3_PKT_ACCUMULATE:
        {
            *pkt = vstart;
            break;
        }
    case MPIDI_CH3_PKT_LOCK:
        {
            *pkt = vstart;
            break;
        }
    case MPIDI_CH3_PKT_LOCK_GRANTED:
        {
            *pkt = vstart;
            break;
        }
    case MPIDI_CH3_PKT_PT_RMA_DONE:
        {
            *pkt = vstart;
            break;
        }
    case MPIDI_CH3_PKT_LOCK_PUT_UNLOCK:
        {
            *pkt = vstart;
            break;
        }
    case MPIDI_CH3_PKT_LOCK_GET_UNLOCK:
        {
            *pkt = vstart;
            break;
        }
    case MPIDI_CH3_PKT_LOCK_ACCUM_UNLOCK:
    case MPIDI_CH3_PKT_ACCUM_IMMED:
        {
            *pkt = vstart;
            break;
        }
    case MPIDI_CH3_PKT_FLOW_CNTL_UPDATE:
        {
            *pkt = vstart;
            break;
        }
    case MPIDI_CH3_PKT_CLOSE:
        {
            *pkt = vstart;
        }
        break;
    default:
        {
            /* Header is corrupted if control has reached here in prototype */
            /* */
            MPIU_ERR_SETFATALANDJUMP2(mpi_errno,
                    MPI_ERR_OTHER,
                    "**fail",
                    "**fail %s %d",
                    "Control shouldn't reach here "
                    "in prototype, header %d\n",
                    header->type);
        }
    }

    DEBUG_PRINT("Before set credit, vc: %p, v->rail: %d, "
            "pkt: %p, pheader: %p\n", vc, v->rail, pkt, v->pheader);

    SET_CREDIT(header, VC_FIELD(vc, connection), (v->rail));


    if (VC_FIELD(vc, connection)->srp.credits[v->rail].remote_credit > 0 &&
        VC_FIELD(vc, connection)->srp.credits[v->rail].backlog.len > 0) {
        /* backlog send codes */
        MRAILI_Backlog_send(vc, v->rail);
    }

    /* if any credits remain, schedule rendezvous progress */
    if ((VC_FIELD(vc, connection)->srp.credits[v->rail].remote_credit > 0
/* rfp codes */
            || (VC_FIELD(vc, connection)->rfp.ptail_RDMA_send !=
                VC_FIELD(vc, connection)->rfp.phead_RDMA_send)
        )
        && (VC_FIELD(vc, connection)->sreq_head != NULL)) {
        /* rndv codes */
#if 0
        PUSH_FLOWLIST(vc);
#endif
    }

    /* rfp codes */
    if ((VC_FIELD(vc, connection)->rfp.RDMA_recv_buf == NULL) &&       /*(c->initialized) && */
            num_rdma_buffer && !VC_FIELD(vc, connection)->rfp.rdma_failed) {
        if ((process_info.polling_group_size + rdma_pending_conn_request) <
                rdma_polling_set_limit) {
            VC_FIELD(vc, connection)->rfp.eager_start_cnt++;
            if (rdma_polling_set_threshold <
                    VC_FIELD(vc, connection)->rfp.eager_start_cnt) {
                {
                    ret = vbuf_fast_rdma_alloc(vc, 1);
                    if (ret == MPI_SUCCESS) {
                        vbuf_address_send(vc);
                        rdma_pending_conn_request++;
                    } else {
                        VC_FIELD(vc, connection)->rfp.rdma_failed = 1;
                    }
                    goto fn_exit;
                }
            }
        }
    }

fn_exit:
    return mpi_errno;

fn_fail:
    goto fn_exit;
}
Пример #12
0
int MPIDI_nem_ib_recv_addr_reply(MPIDI_VC_t * vc, void *vstart)
{
    int hca_index;
    int ret;
    MPIDI_nem_ib_pkt_address_reply *pkt = vstart;
    DEBUG_PRINT("Received addr reply packet. reply data :%d\n", pkt->reply_data);

    if (pkt->reply_data == RDMA_FP_SENDBUFF_ALLOC_FAILED
        || pkt->reply_data == RDMA_FP_MAX_SEND_CONN_REACHED) {

        DEBUG_PRINT("RDMA FP setup failed. clean up recv buffers\n ");

        /* de-regster the recv buffers */
        for (hca_index = 0; hca_index < ib_hca_num_hcas; hca_index++) {
            if (VC_FIELD(vc, connection)->rfp.RDMA_recv_buf_mr[hca_index]) {
                ret = deregister_memory(VC_FIELD(vc, connection)->rfp.RDMA_recv_buf_mr[hca_index]);
            if (ret) {
                MPIU_Error_printf("Failed to deregister mr (%d)\n", ret);
            } else {
                VC_FIELD(vc, connection)->rfp.RDMA_recv_buf_mr[hca_index] = NULL;
            }
            }
        }
        /* deallocate recv RDMA buffers */
        if (VC_FIELD(vc, connection)->rfp.RDMA_recv_buf_DMA) {
            MPIU_Free(VC_FIELD(vc, connection)->rfp.RDMA_recv_buf_DMA);
            VC_FIELD(vc, connection)->rfp.RDMA_recv_buf_DMA = NULL;
        }

        /* deallocate vbuf struct buffers */
        if (VC_FIELD(vc, connection)->rfp.RDMA_recv_buf) {
            MPIU_Free(VC_FIELD(vc, connection)->rfp.RDMA_recv_buf);
            VC_FIELD(vc, connection)->rfp.RDMA_recv_buf = NULL;
        }

        /* set flag to mark that FP setup is failed/rejected. 
        we sholdn't try further on this vc */
        VC_FIELD(vc, connection)->rfp.rdma_failed = 1;

    } else if (pkt->reply_data == RDMA_FP_SUCCESS) {

        /* set pointers */
        VC_FIELD(vc, connection)->rfp.p_RDMA_recv = 0;
        VC_FIELD(vc, connection)->rfp.p_RDMA_recv_tail = num_rdma_buffer - 1;

        /* Add the connection to the RDMA polling list */
        MPIU_Assert(process_info.polling_group_size < rdma_polling_set_limit);

        process_info.polling_set
            [process_info.polling_group_size] = vc;
        process_info.polling_group_size++;

        VC_FIELD(vc, cmanager)->num_channels      += 1;
        VC_FIELD(vc, cmanager)->num_local_pollings = 1;
        VC_FIELD(vc, connection)->rfp.in_polling_set          = 1;

    } else {
        ibv_va_error_abort(GEN_EXIT_ERR,
                "Invalid reply data received. reply_data: pkt->reply_data%d\n",
                                                              pkt->reply_data);
    }


    rdma_pending_conn_request--;

    return MPI_SUCCESS;
}
Пример #13
0
int MPIDI_CH3_Init(int has_parent, MPIDI_PG_t * pg, int pg_rank)
{
    int mpi_errno = MPI_SUCCESS;
    int pg_size, threshold, dpm = 0, p;
    char *dpm_str, *value, *conn_info = NULL;
    MPIDI_VC_t *vc;

    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_INIT);
    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_INIT);
 
    if (MPIDI_CH3_Pkt_size_index[MPIDI_CH3_PKT_CLOSE] != sizeof (MPIDI_CH3_Pkt_close_t))
    {
        MPIU_ERR_SETFATALANDJUMP1(
            mpi_errno,
            MPI_ERR_OTHER,
            "**fail",
            "**fail %s", 
            "Failed sanity check! Packet size table mismatch");
    }
    
    pg_size = MPIDI_PG_Get_size(pg);

    /*Determine to use which connection management*/
    threshold = MPIDI_CH3I_CM_DEFAULT_ON_DEMAND_THRESHOLD;

    /*check ON_DEMAND_THRESHOLD*/
    value = getenv("MV2_ON_DEMAND_THRESHOLD");
    if (value)
    {
        threshold = atoi(value);
    }

    dpm_str = getenv("MV2_SUPPORT_DPM");
    if (dpm_str) {
        dpm = !!atoi(dpm_str);
    }
    MPIDI_CH3I_Process.has_dpm = dpm;
    if(MPIDI_CH3I_Process.has_dpm) {
        setenv("MV2_ENABLE_AFFINITY", "0", 1);
    }

#ifdef _ENABLE_XRC_
    value = getenv ("MV2_USE_XRC");
    if (value) {
        USE_XRC = atoi(value);
        if (USE_XRC) {
            /* Enable on-demand */
            threshold = 1;
        }
    }
#endif /* _ENABLE_XRC_ */
#ifdef _ENABLE_UD_
    if ((value = getenv("MV2_HYBRID_ENABLE_THRESHOLD")) != NULL) {
        rdma_hybrid_enable_threshold = atoi(value);
    }
    if ((value = getenv("MV2_USE_UD_HYBRID")) != NULL) {
        rdma_enable_hybrid = atoi(value);
    }
    if (pg_size < rdma_hybrid_enable_threshold) {
        rdma_enable_hybrid = 0;
    }
#endif

    if (pg_size > threshold || dpm 
#ifdef _ENABLE_XRC_
            || USE_XRC
#endif /* _ENABLE_XRC_ */
#ifdef _ENABLE_UD_
            || rdma_enable_hybrid
#endif
            )
    {
        MPIDI_CH3I_Process.cm_type = MPIDI_CH3I_CM_ON_DEMAND;
	    MPIDI_CH3I_Process.num_conn = 0;
    }
    else
    {
        MPIDI_CH3I_Process.cm_type = MPIDI_CH3I_CM_BASIC_ALL2ALL;
    }

#if defined(RDMA_CM)
    if (((value = getenv("MV2_USE_RDMA_CM")) != NULL
        || (value = getenv("MV2_USE_IWARP_MODE")) != NULL)
        && atoi(value) && ! dpm) {
        MPIDI_CH3I_Process.cm_type = MPIDI_CH3I_CM_RDMA_CM;
    } else {
        rdma_cm_get_hca_type(&MPIDI_CH3I_RDMA_Process);
    }
#endif /* defined(RDMA_CM) */

    MPIDI_PG_GetConnKVSname(&pg->ch.kvs_name);

#if defined(CKPT)
#if defined(RDMA_CM)
    if (MPIDI_CH3I_Process.cm_type == MPIDI_CH3I_CM_RDMA_CM)
    {
        MPIU_Error_printf("Error: Checkpointing does not work with RDMA CM.\n"
            "Please configure and compile MVAPICH2 with checkpointing disabled "
            "or without support for RDMA CM.\n");
	MPIU_ERR_SETFATALANDJUMP(mpi_errno, MPI_ERR_OTHER, "**fail");
    }
#endif /* defined(RDMA_CM) */

    // Always use CM_ON_DEMAND for Checkpoint/Restart and Migration
    MPIDI_CH3I_Process.cm_type = MPIDI_CH3I_CM_ON_DEMAND;

#endif /* defined(CKPT) */
#ifdef _ENABLE_UD_
    if (rdma_enable_hybrid) {
        MPIU_Assert(MPIDI_CH3I_Process.cm_type == MPIDI_CH3I_CM_ON_DEMAND);
    }
#endif

    /* save my vc_ptr for easy access */
    MPIDI_PG_Get_vc(pg, pg_rank, &MPIDI_CH3I_Process.vc);

    /* Initialize Progress Engine */
    if ((mpi_errno = MPIDI_CH3I_Progress_init()))
    {
        MPIU_ERR_POP(mpi_errno);
    }

    /* Check for SMP only */
    MPIDI_CH3I_set_smp_only();

    if (!SMP_ONLY) 
    {
        switch (MPIDI_CH3I_Process.cm_type)
        {
            /* allocate rmda memory and set up the queues */
            case MPIDI_CH3I_CM_ON_DEMAND:
#if defined(RDMA_CM)
            case MPIDI_CH3I_CM_RDMA_CM:
#endif /* defined(RDMA_CM) */
                mpi_errno = MPIDI_CH3I_CM_Init(pg, pg_rank, &conn_info);
                if (mpi_errno != MPI_SUCCESS)
                {
                    MPIU_ERR_POP(mpi_errno);
                }
                break;
            default:
                /*call old init to setup all connections*/
                if ((mpi_errno = MPIDI_CH3I_RDMA_init(pg, pg_rank)) != MPI_SUCCESS)
                {
                    MPIU_ERR_POP(mpi_errno);
                }

                /* All vc should be connected */
                for (p = 0; p < pg_size; ++p)
                {
                    MPIDI_PG_Get_vc(pg, p, &vc);
                    vc->ch.state = MPIDI_CH3I_VC_STATE_IDLE;
                }
                break;
        }
    }
#if defined(CKPT)
#if defined(DISABLE_PTMALLOC)
    MPIU_Error_printf("Error: Checkpointing does not work without registration "
        "caching enabled.\nPlease configure and compile MVAPICH2 without checkpointing "
        " or enable registration caching.\n");
    MPIU_ERR_SETFATALANDJUMP(mpi_errno, MPI_ERR_OTHER, "**fail");
#endif /* defined(DISABLE_PTMALLOC) */

    if ((mpi_errno = MPIDI_CH3I_CR_Init(pg, pg_rank, pg_size)))
    {
        MPIU_ERR_POP(mpi_errno);
    }
#endif /* defined(CKPT) */

    /* set connection info for dynamic process management */
    if (conn_info && dpm) {
        mpi_errno = MPIDI_PG_SetConnInfo(pg_rank, (const char *)conn_info);
        if (mpi_errno != MPI_SUCCESS)
        {
            MPIU_ERR_POP(mpi_errno);
        }
    }
    MPIU_Free(conn_info);

    /* Initialize the smp channel */
    if ((mpi_errno = MPIDI_CH3I_SMP_init(pg)))
    {
        MPIU_ERR_POP(mpi_errno);
    }

    if (SMP_INIT)
    {
        for (p = 0; p < pg_size; ++p)
        {
            MPIDI_PG_Get_vc(pg, p, &vc);
	    /* Mark the SMP VC as Idle */
	    if (vc->smp.local_nodes >= 0)
            {
                vc->ch.state = MPIDI_CH3I_VC_STATE_IDLE;
                if (SMP_ONLY) {
                    MPIDI_CH3I_SMP_Init_VC(vc);
                }
#ifdef _ENABLE_XRC_
                VC_XST_SET (vc, XF_SMP_VC);
#endif
            }
        }
    } else {
        extern int enable_shmem_collectives;
        enable_shmem_collectives = SMP_INIT;
    }

    /* Set the eager max msg size now that we know SMP and RDMA are initialized.
     * The max message size is also set during VC initialization, but the state
     * of SMP is unknown at that time.
     */
    for (p = 0; p < pg_size; ++p)
    {
        MPIDI_PG_Get_vc(pg, p, &vc);
        vc->eager_max_msg_sz = MPIDI_CH3_EAGER_MAX_MSG_SIZE(vc);
    }
    
fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_INIT);
    return mpi_errno;

fn_fail:
    goto fn_exit;
}
Пример #14
0
/**
 * Initialize the HCAs
 * Look at rdma_open_hca() & rdma_iba_hca_init_noqp() in
 * mvapich2/trunk/src/mpid/ch3/channels/mrail/src/gen2/rdma_iba_priv.c
 *
 * Store all the HCA info in mv2_nem_dev_info_t->hca[hca_num]
 *
 * Output:
 *         hca_list: fill it with the HCAs information
 *
 * \see hca_list
 */
int MPID_nem_ib_init_hca()
{
    int mpi_errno = MPI_SUCCESS;

    MPIDI_STATE_DECL(MPID_STATE_MPIDI_INIT_HCA);
    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_INIT_HCA);


    struct ibv_device *ib_dev    = NULL;
    struct ibv_device **dev_list = NULL;
    int nHca;
    int num_devices = 0;

#ifdef CRC_CHECK
    gen_crc_table();
#endif
    memset( hca_list, 0, sizeof(hca_list) );

    /* Get the list of devices */
    dev_list = ibv_get_device_list(&num_devices);
    if (dev_list==NULL) {
        MPIU_ERR_SETFATALANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**fail",
	            "**fail %s", "No IB device found");
    }

    if (umad_init() < 0)
        MPIU_ERR_SETFATALANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**fail",
	            "**fail %s", "Can't init UMAD library");

    /* Runtime checks */
    MPIU_Assert( num_devices<=MAX_NUM_HCAS );
    if ( num_devices> MAX_NUM_HCAS) {
        MPIU_Error_printf( "WARNING: found %d IB devices, the maximum is %d (MAX_NUM_HCAS). ",
        		num_devices, MAX_NUM_HCAS);
        num_devices = MAX_NUM_HCAS;
    }

    if ( ib_hca_num_hcas > num_devices) {
    	MPIU_Error_printf( "WARNING: user requested %d IB devices, the available number is %d. ",
        		ib_hca_num_hcas, num_devices);
        ib_hca_num_hcas = num_devices;
    }

    MPIU_DBG_MSG_P( CH3_CHANNEL, VERBOSE, "[HCA] Found %d HCAs\n", num_devices);
    MPIU_DBG_MSG_P( CH3_CHANNEL, VERBOSE, "[HCA] User requested %d\n", ib_hca_num_hcas);


    /* Retrieve information for each found device */
    for (nHca = 0; nHca < ib_hca_num_hcas; nHca++) {

    	/* Check for user choice */
        if( (rdma_iba_hca[0]==0) || (strncmp(rdma_iba_hca, RDMA_IBA_NULL_HCA, 32)!=0) || (ib_hca_num_hcas > 1)) {
            /* User hasn't specified any HCA name, or the number of HCAs is greater then 1 */
            ib_dev = dev_list[nHca];

        } else {
            /* User specified a HCA, try to look for it */
            int dev_count;

            dev_count = 0;
            while(dev_list[dev_count]) {
                if(!strncmp(ibv_get_device_name(dev_list[dev_count]), rdma_iba_hca, 32)) {
                    ib_dev = dev_list[dev_count];
                    break;
                }
                dev_count++;
            }
        }

        /* Check if device has been identified */
        hca_list[nHca].ib_dev = ib_dev;
        if (!ib_dev) {
	        MPIU_ERR_SETFATALANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**fail",
		            "**fail %s", "No IB device found");
        }

        MPIU_DBG_MSG_P( CH3_CHANNEL, VERBOSE, "[HCA] HCA device %d found\n", nHca);



        hca_list[nHca].nic_context = ibv_open_device(ib_dev);
        if (hca_list[nHca].nic_context==NULL) {
	        MPIU_ERR_SETFATALANDJUMP2(mpi_errno, MPI_ERR_OTHER, "**fail",
		            "%s %d", "Failed to open HCA number", nHca);
        }

        hca_list[nHca].ptag = ibv_alloc_pd(hca_list[nHca].nic_context);
        if (!hca_list[nHca].ptag) {
            MPIU_ERR_SETFATALANDJUMP2(mpi_errno, MPI_ERR_OTHER,
                    "**fail", "%s%d", "Failed to alloc pd number ", nHca);
        }


        /* Set the hca type */
    #if defined(RDMA_CM)
        if (process_info.use_iwarp_mode) {
    	    if ((mpi_errno = rdma_cm_get_hca_type(process_info.use_iwarp_mode, &process_info.hca_type)) != MPI_SUCCESS)
    	    {
    		    MPIU_ERR_POP(mpi_errno);
    	    }

    	    if (process_info.hca_type == CHELSIO_T3)
    	    {
    		    process_info.use_iwarp_mode = 1;
    	    }
        }
        else
    #endif /* defined(RDMA_CM) */

		mpi_errno = get_hca_type(hca_list[nHca].ib_dev, hca_list[nHca].nic_context, &hca_list[nHca].hca_type);
        if (mpi_errno != MPI_SUCCESS)
        {
        	fprintf(stderr, "[%s, %d] Error in get_hca_type", __FILE__, __LINE__ );
            MPIU_ERR_POP(mpi_errno);
        }

    }



    if (!strncmp(rdma_iba_hca, RDMA_IBA_NULL_HCA, 32) &&
        (ib_hca_num_hcas==1) && (num_devices > nHca) &&
        (rdma_find_active_port(hca_list[0].nic_context, hca_list[nHca].ib_dev)==-1)) {
        /* Trac #376 - There are multiple rdma capable devices (num_devices) in
         * the system. The user has asked us to use ANY (!strncmp) ONE device
         * (rdma_num_hcas), and the first device does not have an active port. So
         * try to find some other device with an active port.
         */
    	int j;
        for (j = 0; dev_list[j]; j++) {
            ib_dev = dev_list[j];
            if (ib_dev) {
            	hca_list[0].nic_context = ibv_open_device(ib_dev);
                if (!hca_list[0].nic_context) {
                    /* Go to next device */
                    continue;
                }
                if (rdma_find_active_port(hca_list[0].nic_context, ib_dev)!=-1) {
                	hca_list[0].ib_dev = ib_dev;
                	hca_list[0].ptag = ibv_alloc_pd(hca_list[0].nic_context);
                    if (!hca_list[0].ptag) {
                        MPIU_ERR_SETFATALANDJUMP2(mpi_errno, MPI_ERR_OTHER,
                             "**fail", "%s%d", "Failed to alloc pd number ", nHca);
                    }
                }
            }
        }
    }

fn_exit:
    /* Clean up before exit */
	if (dev_list!=NULL)
	  ibv_free_device_list(dev_list);

    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_INIT_HCA);
    return mpi_errno;
fn_fail:
    goto fn_exit;
}
Пример #15
0
void SaveArrow(RLOG_IARROW *pArrow)
{
    ArrowNode *pNode;
    StartArrowStruct *pStart, *pStartIter;
    EndArrowStruct *pEnd, *pEndIter;
    RLOG_ARROW arrow;

    if (g_fArrow == NULL)
    {
	MPIU_Strncpy(g_pszArrowFilename, "ArrowFile.tmp", 1024);
	g_fArrow = fopen(g_pszArrowFilename, "w+b");
	if (g_fArrow == NULL)
	{
	    MPIU_Error_printf("unable to open ArrowFile.tmp\n");
	    return;
	}
    }

    if (pArrow->sendrecv == RLOG_SENDER)
    {
	pNode = GetArrowNode(pArrow->remote);
	pEnd = ExtractEndNode(pNode, pArrow->rank, pArrow->tag);
	if (pEnd == NULL)
	{
	    pStart = (StartArrowStruct *)MPIU_Malloc(sizeof(StartArrowStruct));
	    pStart->src = pArrow->rank;
	    pStart->tag = pArrow->tag;
	    pStart->length = pArrow->length;
	    pStart->start_time = pArrow->timestamp;
	    pStart->next = NULL;
	    if (pNode->pStartList == NULL)
	    {
		pNode->pStartList = pStart;
	    }
	    else
	    {
		pStartIter = pNode->pStartList;
		while (pStartIter->next != NULL)
		    pStartIter = pStartIter->next;
		pStartIter->next = pStart;
	    }
	    return;
	}
	arrow.src = pArrow->rank;
	arrow.dest = pArrow->remote;
	arrow.length = pArrow->length;
	arrow.start_time = pEnd->timestamp;
	arrow.end_time = pArrow->timestamp;
	arrow.tag = pArrow->tag;
	arrow.leftright = RLOG_ARROW_LEFT;
	/* fwrite(&arrow, sizeof(RLOG_ARROW), 1, g_fArrow); */
	WriteFileData(&arrow, sizeof(RLOG_ARROW), g_fArrow);
	MPIU_Free(pEnd);
    }
    else
    {
	arrow.dest = pArrow->rank;
	arrow.end_time = pArrow->timestamp;
	arrow.tag = pArrow->tag;
	arrow.length = pArrow->length;

	pNode = GetArrowNode(pArrow->rank);
	pStart = ExtractStartNode(pNode, pArrow->remote, pArrow->tag);
	if (pStart != NULL)
	{
	    arrow.src = pStart->src;
	    arrow.start_time = pStart->start_time;
	    arrow.length = pStart->length; /* the sender length is more accurate than the receiver length */
	    arrow.leftright = RLOG_ARROW_RIGHT;
	    MPIU_Free(pStart);
	    /* fwrite(&arrow, sizeof(RLOG_ARROW), 1, g_fArrow); */
	    WriteFileData(&arrow, sizeof(RLOG_ARROW), g_fArrow);
	}
	else
	{
	    pEnd = (EndArrowStruct *)MPIU_Malloc(sizeof(EndArrowStruct));
	    pEnd->src = pArrow->remote;
	    pEnd->tag = pArrow->tag;
	    pEnd->timestamp = pArrow->timestamp;
	    pEnd->next = NULL;
	    if (pNode->pEndList == NULL)
	    {
		pNode->pEndList = pEnd;
	    }
	    else
	    {
		pEndIter = pNode->pEndList;
		while (pEndIter->next != NULL)
		    pEndIter = pEndIter->next;
		pEndIter->next = pEnd;
	    }
	}
    }

    /* fwrite(pArrow, sizeof(RLOG_IARROW), 1, g_fArrow); */
}
Пример #16
0
/*@
   MPI_Init - Initialize the MPI execution environment

Input Parameters:
+  argc - Pointer to the number of arguments 
-  argv - Pointer to the argument vector

Thread and Signal Safety:
This routine must be called by one thread only.  That thread is called
the `main thread` and must be the thread that calls 'MPI_Finalize'.

Notes:
   The MPI standard does not say what a program can do before an 'MPI_INIT' or
   after an 'MPI_FINALIZE'.  In the MPICH implementation, you should do
   as little as possible.  In particular, avoid anything that changes the
   external state of the program, such as opening files, reading standard
   input or writing to standard output.

Notes for Fortran:
The Fortran binding for 'MPI_Init' has only the error return
.vb
    subroutine MPI_INIT( ierr )
    integer ierr
.ve

.N Errors
.N MPI_SUCCESS
.N MPI_ERR_INIT

.seealso: MPI_Init_thread, MPI_Finalize
@*/
int MPI_Init( int *argc, char ***argv )
{
    static const char FCNAME[] = "MPI_Init";
    int mpi_errno = MPI_SUCCESS;
    int rc;
    int threadLevel, provided;
    MPID_MPI_INIT_STATE_DECL(MPID_STATE_MPI_INIT);

    rc = MPID_Wtime_init();
#ifdef USE_DBG_LOGGING
    MPIU_DBG_PreInit( argc, argv, rc );
#endif

    MPID_MPI_INIT_FUNC_ENTER(MPID_STATE_MPI_INIT);
#   ifdef HAVE_ERROR_CHECKING
    {
        MPID_BEGIN_ERROR_CHECKS;
        {
            if (MPIR_Process.initialized != MPICH_PRE_INIT) {
                mpi_errno = MPIR_Err_create_code( MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER,
						  "**inittwice", NULL );
	    }
            if (mpi_errno) goto fn_fail;
        }
        MPID_END_ERROR_CHECKS;
    }
#   endif /* HAVE_ERROR_CHECKING */

    /* ... body of routine ... */

#if (MPICH_THREAD_LEVEL == MPI_THREAD_MULTIPLE)
    /* If we support all thread levels, allow the use of an environment 
       variable to set the default thread level */
    {
	const char *str = 0;
	threadLevel = MPI_THREAD_SINGLE;
	if (MPL_env2str( "MPICH_THREADLEVEL_DEFAULT", &str )) {
	    if (strcmp(str,"MULTIPLE") == 0 || strcmp(str,"multiple") == 0) {
		threadLevel = MPI_THREAD_MULTIPLE;
	    }
	    else if (strcmp(str,"SERIALIZED") == 0 || strcmp(str,"serialized") == 0) {
		threadLevel = MPI_THREAD_SERIALIZED;
	    }
	    else if (strcmp(str,"FUNNELED") == 0 || strcmp(str,"funneled") == 0) {
		threadLevel = MPI_THREAD_FUNNELED;
	    }
	    else if (strcmp(str,"SINGLE") == 0 || strcmp(str,"single") == 0) {
		threadLevel = MPI_THREAD_SINGLE;
	    }
	    else {
		MPIU_Error_printf( "Unrecognized thread level %s\n", str );
		exit(1);
	    }
	}
    }
#else 
    threadLevel = MPI_THREAD_SINGLE;
#endif

    /* If the user requested for asynchronous progress, request for
     * THREAD_MULTIPLE. */
    rc = 0;
    MPL_env2bool("MPICH_ASYNC_PROGRESS", &rc);
    if (rc)
        threadLevel = MPI_THREAD_MULTIPLE;

    mpi_errno = MPIR_Init_thread( argc, argv, threadLevel, &provided );
    if (mpi_errno != MPI_SUCCESS) goto fn_fail;

    if (rc && provided == MPI_THREAD_MULTIPLE) {
        mpi_errno = MPIR_Init_async_thread();
        if (mpi_errno) goto fn_fail;

        MPIR_async_thread_initialized = 1;
    }

    /* ... end of body of routine ... */
    MPID_MPI_INIT_FUNC_EXIT(MPID_STATE_MPI_INIT);
    return mpi_errno;

  fn_fail:
    /* --BEGIN ERROR HANDLING-- */
#   ifdef HAVE_ERROR_REPORTING
    {
	mpi_errno = MPIR_Err_create_code(
	    mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, 
	    "**mpi_init", "**mpi_init %p %p", argc, argv);
    }
#   endif
    mpi_errno = MPIR_Err_return_comm( 0, FCNAME, mpi_errno );
    return mpi_errno;
    /* --END ERROR HANDLING-- */
}
Пример #17
0
static int fPMI_Handle_spawn( PMIProcess *pentry )
{
    char          inbuf[PMIU_MAXLINE];
    char          *(args[PMI_MAX_ARGS]);
    char          key[MAXKEYLEN];
    char          outbuf[PMIU_MAXLINE];
    ProcessWorld *pWorld;
    ProcessApp   *app = 0;
    int           preputNum = 0, rc;
    int           i;
    int           totspawns=0, spawnnum=0;
    PMIKVSpace    *kvs = 0;
    /* Variables for info */
    char curInfoKey[PMI_MAX_INFO_KEY], curInfoVal[PMI_MAX_INFO_VAL];
    int  curInfoIdx = -1;

    DBG_PRINTFCOND(pmidebug,( "Entering fPMI_Handle_spawn\n" ));

    if (!pentry->spawnWorld) {
	pWorld = (ProcessWorld *)MPIU_Malloc( sizeof(ProcessWorld) );
	if (!pWorld) return 1;
	
	pentry->spawnWorld = pWorld;
	pWorld->apps       = 0;
	pWorld->nProcess   = 0;
	pWorld->nextWorld  = 0;
	pWorld->nApps      = 0;
	pWorld->worldNum   = pUniv.nWorlds++;
	/* FIXME: What should be the defaults for the spawned env? 
	   Should the default be the env ov the spawner? */
	pWorld->genv       = 0;
	pentry->spawnKVS   = fPMIKVSAllocate();
    }
    else {
	pWorld = pentry->spawnWorld;
    }
    kvs    = pentry->spawnKVS;

    /* Note that each mcmd=spawn creates an app.  When all apps
       are present, then then can be linked to a world.  A 
       spawnmultiple command makes use of multiple mcmd=spawn PMI
       commands */ 

    /* Create a new app */
    app = (ProcessApp *)MPIU_Malloc( sizeof(ProcessApp) );
    if (!app) return 1;
    app->myAppNum  = 0;
    app->exename   = 0;
    app->arch      = 0;
    app->path      = 0;
    app->wdir      = 0;
    app->hostname  = 0;
    app->args      = 0;
    app->nArgs     = 0;
    app->soft.nelm = 0;
    app->nProcess  = 0;
    app->pState    = 0;
    app->nextApp   = 0;
    app->env       = 0;
    app->pWorld    = pWorld;

    /* Add to the pentry spawn structure */
    if (pentry->spawnAppTail) {
	pentry->spawnAppTail->nextApp = app;
    }
    else {
	pentry->spawnApp = app;
	pWorld->apps     = app;
    }
    pentry->spawnAppTail = app;

    for (i=0; i<PMI_MAX_ARGS; i++) args[i] = 0;

    /* Get lines until we find either cmd or mcmd (an error) or endcmd 
       (expected end) */
    while ((rc = PMIUBufferedReadLine( pentry, inbuf, sizeof(inbuf) )) > 0) {
	char *cmdPtr, *valPtr, *p;

	/* Find the command = format */
	p = inbuf;
	/* Find first nonblank */
	while (*p && isascii(*p) && isspace(*p)) p++;
	if (!*p) {
	    /* Empty string.  Ignore */
	    continue;
	}
	cmdPtr = p++;
	/* Find '=' */
	while (*p && *p != '=') p++;
	if (!*p) {
	    /* No =.  Check for endcmd */
	    p--;
	    /* Trim spaces */
	    while (isascii(*p) && isspace(*p)) p--;
	    /* Add null to end */
	    *++p = 0;
	    if (strcmp( "endcmd", cmdPtr ) == 0) { break; }
	    /* FIXME: Otherwise, we have a problem */
	    MPIU_Error_printf( "Malformed PMI command (no endcmd seen\n" );
	    return 1;
	}
	else {
	    *p = 0;
	}
	
	/* Found an = .  value is the rest of the line */
	valPtr = ++p; 
	while (*p && *p != '\n') p++;
	if (*p) *p = 0;     /* Remove the newline */

	/* Now, process the cmd and value */
	if (strcmp( "nprocs", cmdPtr ) == 0) {
	    app->nProcess     = atoi(valPtr);
	    pWorld->nProcess += app->nProcess;
	}
	else if (strcmp( "execname", cmdPtr ) == 0) {
	    app->exename = MPIU_Strdup( valPtr );
	}
	else if (strcmp( "totspawns", cmdPtr ) == 0) {
	    /* This tells us how many separate spawn commands
	       we expect to see (e.g., for spawn multiple).
	       Each spawn command is a separate "app" */
	    totspawns = atoi(valPtr);
	}
	else if (strcmp( "spawnssofar", cmdPtr ) == 0) {
	    /* This tells us which app we are (starting from 1) */
	    spawnnum      = atoi(valPtr);
	    app->myAppNum = spawnnum - 1;
	}
	else if (strcmp( "argcnt", cmdPtr ) == 0) {
	    /* argcnt may not be set before the args */
	    app->nArgs = atoi(valPtr);
	}
	else if (strncmp( "arg", cmdPtr, 3 ) == 0) {
	    int argnum;
	    /* argcnt may not be set before the args */
	    /* Handle arg%d.  Values are 1 - origin */
	    argnum = atoi( cmdPtr + 3 ) - 1;
	    if (argnum < 0 || argnum >= PMI_MAX_ARGS) {
		MPIU_Error_printf( "Malformed PMI Spawn command; the index of an argument in the command is %d but must be between 0 and %d\n",
				   argnum, PMI_MAX_ARGS );
		return 1;
	    }
	    args[argnum] = MPIU_Strdup( valPtr );
	}
	else if (strcmp( "preput_num", cmdPtr ) == 0) {
	    preputNum = atoi(valPtr);
	}
	else if (strncmp( "preput_key_", cmdPtr, 11 ) == 0) {
	    /* Save the key */
	    MPIU_Strncpy( key, valPtr, sizeof(key) );
	}
	else if (strncmp( "preput_val_", cmdPtr, 11 ) == 0) {
	    /* Place the key,val into the space associate with the current 
	       PMI group */
	    fPMIKVSAddPair( kvs, key, valPtr );
	}
	/* Info is on a per-app basis (it is an array of info items in
	   spawn multiple).  We can ignore most info values.
	   The ones that are handled are processed by a 
	   separate routine (not yet implemented).
	   simple_pmi.c sends (key,value), so we can keep just the
	   last key and pass the key/value to the registered info
	   handler, along with tha app structure.  Alternately,
	   we could save all info items and let the user's 
	   spawner handle it */
	else if (strcmp( "info_num", cmdPtr ) == 0) {
	    /* Number of info values */
	    ;
	}
	else if (strncmp( "info_key_", cmdPtr, 9 ) == 0) {
	    /* The actual name has a digit, which indicates *which* info 
	       key this is */
	    curInfoIdx = atoi( cmdPtr + 9 );
	    MPIU_Strncpy( curInfoKey, valPtr, sizeof(curInfoKey) );
	}
	else if (strncmp( "info_val_", cmdPtr, 9 ) == 0) {
	    /* The actual name has a digit, which indicates *which* info 
	       value this is */
	    int idx = atoi( cmdPtr + 9 );
	    if (idx != curInfoIdx) {
		MPIU_Error_printf( "Malformed PMI command: info keys and values not ordered as expected (expected value %d but got %d)\n", curInfoIdx, idx );
		return 1;
	    }
	    else {
		MPIU_Strncpy( curInfoVal, valPtr, sizeof(curInfoVal) );
		/* Apply this info item */
		fPMIInfoKey( app, curInfoKey, curInfoVal );
		/* printf( "Got info %s+%s\n", curInfoKey, curInfoVal ); */
	    }
	}
	else {
	    MPIU_Error_printf( "Unrecognized PMI subcommand on spawnmult: %s\n",
			       cmdPtr );
	    return 1;
	}
    }	

    if (app->nArgs > 0) {
	app->args  = (const char **)MPIU_Malloc( app->nArgs * sizeof(char *) );
	for (i=0; i<app->nArgs; i++) {
	    app->args[i] = args[i];
	    args[i]      = 0;
	}
    }

    pWorld->nApps ++;

    /* Now that we've read the commands, invoke the user's spawn command */
    if (totspawns == spawnnum) {
	PMISetupNewGroup( pWorld->nProcess, kvs );
	
	if (userSpawner) {
	    rc = (*userSpawner)( pWorld, userSpawnerData );
	}
	else {
	    MPIU_Error_printf( "Unable to spawn %s\n", app->exename );
	    rc = 1;
	    MPIE_PrintProcessWorld( stdout, pWorld );
	}
	
	MPIU_Snprintf( outbuf, PMIU_MAXLINE, "cmd=spawn_result rc=%d\n", rc );
	PMIWriteLine( pentry->fd, outbuf );
	DBG_PRINTFCOND(pmidebug,( "%s", outbuf ));

	/* Clear for the next spawn */
	pentry->spawnApp     = 0;
	pentry->spawnAppTail = 0;
	pentry->spawnKVS     = 0;
	pentry->spawnWorld   = 0;
    }
    
    /* If totspawnnum != spawnnum, then we are expecting a 
       spawnmult with additional items */
    return 0;
}