Esempio n. 1
0
static int
ml_dispose_data(
	struct list_head *pStruNode
)
{
	MLDispose  *pStruD = NULL;
	MLCommNode *pStruCN = NULL;

	ML_ERROR("\n");
	pStruCN = list_entry(pStruNode, MLCommNode, struNode);
	pStruD  = (MLDispose *)pStruCN->ulData;

	if( pStruD->pDisposeFunc )
	{
	ML_ERROR("\n");
		pStruD->pDisposeFunc(pStruCN->iRecvLen, pStruCN->sRecvData, pStruCN->pUserData);
	}

	ML_ERROR("\n");
	if( pStruCN->sRecvData )
	{
		ML_FREE(pStruCN->sRecvData);
	}
	ML_FREE(pStruCN);
	ML_ERROR("\n");

	return ML_OK;
}
mca_coll_ml_collective_operation_description_t * 
        mca_coll_ml_schedule_alloc(mca_coll_ml_schedule_hier_info_t *h_info)
{
    mca_coll_ml_collective_operation_description_t  *schedule = NULL;

    schedule = (mca_coll_ml_collective_operation_description_t *)
        malloc(sizeof(mca_coll_ml_collective_operation_description_t));
    if (NULL == schedule) {
        ML_ERROR(("Can't allocate memory.\n"));
        return NULL;
    }

    /* Set dependencies equal to number of hierarchies */
    schedule->n_fns = h_info->nbcol_functions;
    schedule->progress_type = 0;
    /* Allocated the component function */
    schedule->component_functions = (struct mca_coll_ml_compound_functions_t *)
        calloc(h_info->nbcol_functions, sizeof(struct mca_coll_ml_compound_functions_t));
    if (NULL == schedule->component_functions) {
        ML_ERROR(("Can't allocate memory.\n"));
        free(schedule);
        return NULL;
    }
    return schedule;
}
Esempio n. 3
0
void rmultinom(int n, double* prob, int K, int* rN)
/* `Return' vector  rN[1:K] {K := length(prob)}
 *  where rN[j] ~ Bin(n, prob[j]) ,  sum_j rN[j] == n,  sum_j prob[j] == 1,
 */
{
    int k;
    double pp;
    LDOUBLE p_tot = 0.;
    /* This calculation is sensitive to exact values, so we try to
       ensure that the calculations are as accurate as possible
       so different platforms are more likely to give the same
       result. */

#ifdef MATHLIB_STANDALONE
    if (K < 1) {
        ML_ERROR(ME_DOMAIN, "rmultinom");
        return;
    }
    if (n < 0)  ML_ERR_ret_NAN(0);
#else
    if (K == NA_INTEGER || K < 1) {
        ML_ERROR(ME_DOMAIN, "rmultinom");
        return;
    }
    if (n == NA_INTEGER || n < 0)  ML_ERR_ret_NAN(0);
#endif

    /* Note: prob[K] is only used here for checking  sum_k prob[k] = 1 ;
     *       Could make loop one shorter and drop that check !
     */
    for(k = 0; k < K; k++) {
        pp = prob[k];
        if (!R_FINITE(pp) || pp < 0. || pp > 1.) ML_ERR_ret_NAN(k);
        p_tot += pp;
        rN[k] = 0;
    }
    if(fabs((double)(p_tot - 1.)) > 1e-7)
        MATHLIB_ERROR(_("rbinom: probability sum should be 1, but is %g"),
                      (double) p_tot);
    if (n == 0) return;
    if (K == 1 && p_tot == 0.) return;/* trivial border case: do as rbinom */

    /* Generate the first K-1 obs. via binomials */

    for(k = 0; k < K-1; k++) { /* (p_tot, n) are for "remaining binomial" */
        if(prob[k]) {
            pp = (double)(prob[k] / p_tot);
            /* printf("[%d] %.17f\n", k+1, pp); */
            rN[k] = ((pp < 1.) ? (int) rbinom((double) n,  pp) :
                     /*>= 1; > 1 happens because of rounding */
                     n);
            n -= rN[k];
        }
        else rN[k] = 0;
        if(n <= 0) /* we have all*/ return;
        p_tot -= prob[k]; /* i.e. = sum(prob[(k+1):K]) */
    }
    rN[K-1] = n;
    return;
}
Esempio n. 4
0
static int mca_coll_ml_lmngr_init(mca_coll_ml_lmngr_t *lmngr)
{
    int i, num_blocks;
    int rc;
    unsigned char *addr;
    bcol_base_network_context_t *nc;

    ML_VERBOSE(7, ("List initialization"));

#ifdef HAVE_POSIX_MEMALIGN
    if((errno = posix_memalign(&lmngr->base_addr,
                    lmngr->list_alignment,
                    lmngr->list_size * lmngr->list_block_size)) != 0) {
        ML_ERROR(("Failed to allocate memory: %d [%s]", errno, strerror(errno)));
        return OMPI_ERROR;
    }
    lmngr->alloc_base = lmngr->base_addr;
#else
    lmngr->alloc_base =
        malloc(lmngr->list_size * lmngr->list_block_size + lmngr->list_alignment);
    if(NULL == lmngr->alloc_base) {
        ML_ERROR(("Failed to allocate memory: %d [%s]", errno, strerror(errno)));
        return OMPI_ERROR;
    }

    lmngr->base_addr = (void*)OPAL_ALIGN((uintptr_t)lmngr->alloc_base,
            lmngr->list_alignment, uintptr_t);
#endif

    assert(lmngr->n_resources < MCA_COLL_ML_MAX_REG_INFO);

    for(i= 0 ;i < lmngr->n_resources ;i++) {
        nc = lmngr->net_context[i];
        ML_VERBOSE(7, ("Call registration for resource index %d", i));
        rc = lmngr_register(lmngr, nc);
        if (OMPI_SUCCESS != rc) {
            ML_ERROR(("Failed to lmngr register: %d [%s]", errno, strerror(errno)));
            return rc;
        }
    }

    /* slice the memory to blocks */
    addr = (unsigned char *) lmngr->base_addr;
    for(num_blocks = 0; num_blocks < (int)lmngr->list_size; num_blocks++) {
        mca_bcol_base_lmngr_block_t *item = OBJ_NEW(mca_bcol_base_lmngr_block_t);
        item->base_addr = (void *)addr;
        item->lmngr = lmngr;
        /* ML_VERBOSE(10, ("Appending block # %d %p", num_blocks, (void *)addr)); */
        opal_list_append(&lmngr->blocks_list, (opal_list_item_t *)item);
        /* advance the address */
        addr += lmngr->list_block_size;
    }

    ML_VERBOSE(7, ("List initialization done %d",
                opal_list_get_size(&lmngr->blocks_list)));
    return OMPI_SUCCESS;
}
Esempio n. 5
0
void gammalims(double *xmin, double *xmax)
{
/* FIXME: Even better: If IEEE, #define these in nmath.h
          and don't call gammalims() at all
*/
#ifdef IEEE_754
    *xmin = -170.5674972726612;
    *xmax =  171.61447887182298;/*(3 Intel/Sparc architectures)*/
#else
    double alnbig, alnsml, xln, xold;
    int i;

    alnsml = log(d1mach(1));
    *xmin = -alnsml;
    for (i=1; i<=10; ++i) {
        xold = *xmin;
        xln = log(*xmin);
        *xmin -= *xmin * ((*xmin + .5) * xln - *xmin - .2258 + alnsml) /
                (*xmin * xln + .5);
        if (fabs(*xmin - xold) < .005) {
            *xmin = -(*xmin) + .01;
            goto find_xmax;
        }
    }

    /* unable to find xmin */

    ML_ERROR(ME_NOCONV);
    *xmin = *xmax = numeric_limits<double>::quiet_NaN();

find_xmax:

    alnbig = log(d1mach(2));
    *xmax = alnbig;
    for (i=1; i<=10; ++i) {
        xold = *xmax;
        xln = log(*xmax);
        *xmax -= *xmax * ((*xmax - .5) * xln - *xmax + .9189 - alnbig) /
                (*xmax * xln - .5);
        if (fabs(*xmax - xold) < .005) {
            *xmax += -.01;
            goto done;
        }
    }

    /* unable to find xmax */

    ML_ERROR(ME_NOCONV);
    *xmin = *xmax = numeric_limits<double>::quiet_NaN();

done:
    *xmin = std::max(*xmin, -(*xmax) + 1);
#endif
}
int mca_coll_ml_check_if_bcol_is_used(const char *bcol_name, const mca_coll_ml_module_t *ml_module,
        int topo_index)
{
    int i, rc, hier, *ranks_in_comm,
        is_used = 0,
        comm_size = ompi_comm_size(ml_module->comm);
    int n_hier, tp , max_tp;
    const mca_coll_ml_topology_t *topo_info;

    ranks_in_comm = (int *) malloc(comm_size * sizeof(int));
    if (OPAL_UNLIKELY(NULL == ranks_in_comm)) {
        ML_ERROR(("Memory allocation failed."));
        ompi_mpi_abort(&ompi_mpi_comm_world.comm, MPI_ERR_NO_MEM, true);
    }

    for (i = 0; i < comm_size; ++i) {
        ranks_in_comm[i] = i;
    }

    if (COLL_ML_TOPO_MAX == topo_index) {
        tp = 0;
        max_tp = COLL_ML_TOPO_MAX;
    } else {
        tp = topo_index;
        max_tp = topo_index + 1;
    }

    for (; tp < max_tp; tp++) {
        topo_info = &ml_module->topo_list[tp];
        n_hier = topo_info->n_levels;
        for (hier = 0; hier < n_hier; ++hier) {
            hierarchy_pairs *pair = &topo_info->component_pairs[hier];
            mca_bcol_base_component_t *b_cm = pair->bcol_component;
            if(0 == strcmp(bcol_name,
                        b_cm->bcol_version.mca_component_name)) {
                is_used = 1;
                break;
            }
        }
    }

    rc = comm_allreduce_pml(&is_used, &is_used, 1, MPI_INT,
                  ompi_comm_rank(ml_module->comm), MPI_MAX,
                  comm_size, ranks_in_comm, ml_module->comm);

    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
        ML_ERROR(("comm_allreduce_pml failed."));
        ompi_mpi_abort(&ompi_mpi_comm_world.comm, MPI_ERR_OP, true);
    }

    free(ranks_in_comm);

    return is_used;
}
Esempio n. 7
0
/* modified version of bessel_k that accepts a work array instead of
   allocating one. */
double bessel_k_ex(double x, double alpha, double expo, double *bk)
{
    long nb, ncalc, ize;

#ifdef IEEE_754
    /* NaNs propagated correctly */
    if (ISNAN(x) || ISNAN(alpha)) return x + alpha;
#endif
    if (x < 0) {
	ML_ERROR(ME_RANGE, "bessel_k");
	return ML_NAN;
    }
    ize = (long)expo;
    if(alpha < 0)
	alpha = -alpha;
    nb = 1+ (long)floor(alpha);/* nb-1 <= |alpha| < nb */
    alpha -= (double)(nb-1);
    K_bessel(&x, &alpha, &nb, &ize, bk, &ncalc);
    if(ncalc != nb) {/* error input */
      if(ncalc < 0)
	MATHLIB_WARNING4(_("bessel_k(%g): ncalc (=%ld) != nb (=%ld); alpha=%g. Arg. out of range?\n"),
			 x, ncalc, nb, alpha);
      else
	MATHLIB_WARNING2(_("bessel_k(%g,nu=%g): precision lost in result\n"),
			 x, alpha+(double)nb-1);
    }
    x = bk[nb-1];
    return x;
}
Esempio n. 8
0
double pnchisq(double x, double df, double ncp, int lower_tail, int log_p)
{
    double ans;
#ifdef IEEE_754
    if (ISNAN(x) || ISNAN(df) || ISNAN(ncp))
	return x + df + ncp;
    if (!R_FINITE(df) || !R_FINITE(ncp))
	ML_ERR_return_NAN;
#endif

    if (df < 0. || ncp < 0.) ML_ERR_return_NAN;

    ans = pnchisq_raw(x, df, ncp, 1e-12, 8*DBL_EPSILON, 1000000, lower_tail, log_p);
    if(ncp >= 80) {
	if(lower_tail) {
	    ans = fmin2(ans, R_D__1);  /* e.g., pchisq(555, 1.01, ncp = 80) */
	} else { /* !lower_tail */
	    /* since we computed the other tail cancellation is likely */
	    if(ans < (log_p ? (-10. * M_LN10) : 1e-10)) ML_ERROR(ME_PRECISION, "pnchisq");
	    if(!log_p) ans = fmax2(ans, 0.0);  /* Precaution PR#7099 */
	}
    }
    if (!log_p || ans < -1e-8)
	return ans;
    else { // log_p  &&  ans > -1e-8
	// prob. = exp(ans) is near one: we can do better using the other tail
#ifdef DEBUG_pnch
	REprintf("   pnchisq_raw(*, log_p): ans=%g => 2nd call, other tail\n", ans);
#endif
	// FIXME: (sum,sum2) will be the same (=> return them as well and reuse here ?)
	ans = pnchisq_raw(x, df, ncp, 1e-12, 8*DBL_EPSILON, 1000000, !lower_tail, FALSE);
	return log1p(-ans);
    }
}
Esempio n. 9
0
/**
 * Hierarchical blocking barrier
 */
int mca_coll_ml_barrier_intra(struct ompi_communicator_t *comm,
                              mca_coll_base_module_t *module)
{
    int rc;
    ompi_request_t *req;

    mca_coll_ml_module_t *ml_module = (mca_coll_ml_module_t *) module;

#if OPAL_ENABLE_DEBUG
    static int barriers_count = 0;
#endif

    ML_VERBOSE(10, ("Barrier num %d start.", ++barriers_count));

    rc = mca_coll_ml_barrier_launch(ml_module, &req);
    if (OPAL_UNLIKELY(rc != OMPI_SUCCESS)) {
        ML_ERROR(("Failed to launch a barrier."));
        return rc;
    }

    /* Blocking barrier */
    ompi_request_wait_completion(req);
    ompi_request_free(&req);

    ML_VERBOSE(10, ("Barrier num %d was done.", barriers_count));

    return OMPI_SUCCESS;
}
Esempio n. 10
0
double pnchisq(double x, double df, double ncp, int lower_tail, int log_p)
{
    double ans;
#ifdef IEEE_754
    if (ISNAN(x) || ISNAN(df) || ISNAN(ncp))
	return x + df + ncp;
    if (!R_FINITE(df) || !R_FINITE(ncp))
	ML_ERR_return_NAN;
#endif

    if (df < 0. || ncp < 0.) ML_ERR_return_NAN;

    ans = pnchisq_raw(x, df, ncp, 1e-12, 8*DBL_EPSILON, 1000000, lower_tail);
    if(ncp >= 80) {
	if(lower_tail) {
	    ans = fmin2(ans, 1.0);  /* e.g., pchisq(555, 1.01, ncp = 80) */
	} else { /* !lower_tail */
	    /* since we computed the other tail cancellation is likely */
	    if(ans < 1e-10) ML_ERROR(ME_PRECISION, "pnchisq");
	    ans = fmax2(ans, 0.0);  /* Precaution PR#7099 */
	}
    }
    if (!log_p) return ans;
    /* if ans is near one, we can do better using the other tail */
    if (ncp >= 80 || ans < 1 - 1e-8) return log(ans);
    ans = pnchisq_raw(x, df, ncp, 1e-12, 8*DBL_EPSILON, 1000000, !lower_tail);
    return log1p(-ans);
}
Esempio n. 11
0
/* modified version of bessel_j that accepts a work array instead of
   allocating one. */
double bessel_j_ex(double x, double alpha, double *bj)
{
    long nb, ncalc;
    double na;

#ifdef IEEE_754
    /* NaNs propagated correctly */
    if (ISNAN(x) || ISNAN(alpha)) return x + alpha;
#endif
    if (x < 0) {
	ML_ERROR(ME_RANGE, "bessel_j");
	return ML_NAN;
    }
    na = floor(alpha);
    if (alpha < 0) {
	/* Using Abramowitz & Stegun  9.1.2
	 * this may not be quite optimal (CPU and accuracy wise) */
	return(bessel_j_ex(x, -alpha, bj) * cos(M_PI * alpha) +
	       ((alpha == na) ? 0 :
		bessel_y_ex(x, -alpha, bj) * sin(M_PI * alpha)));
    }
    nb = 1 + (long)na; /* nb-1 <= alpha < nb */
    alpha -= (nb-1);
    J_bessel(&x, &alpha, &nb, bj, &ncalc);
    if(ncalc != nb) {/* error input */
      if(ncalc < 0)
	MATHLIB_WARNING4(_("bessel_j(%g): ncalc (=%ld) != nb (=%ld); alpha=%g. Arg. out of range?\n"),
			 x, ncalc, nb, alpha);
      else
	MATHLIB_WARNING2(_("bessel_j(%g,nu=%g): precision lost in result\n"),
			 x, alpha+nb-1);
    }
    x = bj[nb-1];
    return x;
}
int ml_coll_hier_reduce_setup(mca_coll_ml_module_t *ml_module)
{
    int alg, ret, topo_index=0;
    mca_coll_ml_topology_t *topo_info =
           &ml_module->topo_list[ml_module->collectives_topology_map[ML_REDUCE][ML_SMALL_MSG]];

    if ( ml_module->max_fn_calls < topo_info->n_levels ) {
        ml_module->max_fn_calls = topo_info->n_levels;
    }


    alg = mca_coll_ml_component.coll_config[ML_REDUCE][ML_SMALL_MSG].algorithm_id;
    topo_index = ml_module->collectives_topology_map[ML_REDUCE][alg];
    if (ML_UNDEFINED == alg || ML_UNDEFINED == topo_index) {
        ML_ERROR(("No topology index or algorithm was defined"));
        topo_info->hierarchical_algorithms[ML_REDUCE] = NULL;
        return OMPI_ERROR;
    }

    ret = mca_coll_ml_build_static_reduce_schedule(&ml_module->topo_list[topo_index],
            &ml_module->coll_ml_reduce_functions[alg]);
    if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
        ML_VERBOSE(10, ("Failed to setup static reduce"));
        return ret;
    }


    return OMPI_SUCCESS;
}
/**
 * Non blocking memory syncronization
 */
int mca_coll_ml_memsync_intra(mca_coll_ml_module_t *ml_module, int bank_index)
{
    int rc;
    ompi_request_t *req;

    ML_VERBOSE(8, ("MEMSYNC start"));

    if (OPAL_UNLIKELY(0 == opal_list_get_size(&ml_module->active_bcols_list))) {
        /* Josh's change: In the case where only p2p is active, we have no way
         * to reset the bank release counters to zero, I am doing that here since it
         * would actually be "correct" to do it outside of this conditional, however
         * I suspect that reseting the value to zero elsewhere would result in corrupted 
         * flow for non-contiguous data types
         */
        
        /* nasty hack to ensure that resources are released in the single level 
         * ptp case. 
         */
        mca_coll_ml_collective_operation_progress_t dummy_coll;

        dummy_coll.coll_module = (mca_coll_base_module_t *) ml_module;
        dummy_coll.fragment_data.current_coll_op = ML_MEMSYNC;
        dummy_coll.full_message.bank_index_to_recycle = bank_index;

        /* Handling special case when memory syncronization is not required */
        rc = mca_coll_ml_memsync_recycle_memory(&dummy_coll);
        if(OPAL_UNLIKELY(rc != OMPI_SUCCESS)){
            ML_ERROR(("Failed to flush the list."));
            return rc;
        } 
    } else {
        /* retain the communicator until the operation is finished. the communicator
         * will be released by CHECK_AND_RECYCLE */
        OBJ_RETAIN(ml_module->comm);

        rc = mca_coll_ml_memsync_launch(ml_module, &req, bank_index);
        if (OPAL_UNLIKELY(rc != OMPI_SUCCESS)) {
            ML_ERROR(("Failed to launch a barrier."));
            return rc;
        }
    }

    return OMPI_SUCCESS;
}
Esempio n. 14
0
static gnm_float lgammacor(gnm_float x)
{
    static const gnm_float algmcs[15] = {
	GNM_const(+.1666389480451863247205729650822e+0),
	GNM_const(-.1384948176067563840732986059135e-4),
	GNM_const(+.9810825646924729426157171547487e-8),
	GNM_const(-.1809129475572494194263306266719e-10),
	GNM_const(+.6221098041892605227126015543416e-13),
	GNM_const(-.3399615005417721944303330599666e-15),
	GNM_const(+.2683181998482698748957538846666e-17),
	GNM_const(-.2868042435334643284144622399999e-19),
	GNM_const(+.3962837061046434803679306666666e-21),
	GNM_const(-.6831888753985766870111999999999e-23),
	GNM_const(+.1429227355942498147573333333333e-24),
	GNM_const(-.3547598158101070547199999999999e-26),
	GNM_const(+.1025680058010470912000000000000e-27),
	GNM_const(-.3401102254316748799999999999999e-29),
	GNM_const(+.1276642195630062933333333333333e-30)
    };

    gnm_float tmp;

#ifdef NOMORE_FOR_THREADS
    static int nalgm = 0;
    static gnm_float xbig = 0, xmax = 0;

    /* Initialize machine dependent constants, the first time gamma() is called.
	FIXME for threads ! */
    if (nalgm == 0) {
	/* For IEEE gnm_float precision : nalgm = 5 */
	nalgm = chebyshev_init(algmcs, 15, GNM_EPSILON/2);/*was d1mach(3)*/
	xbig = 1 / gnm_sqrt(GNM_EPSILON/2); /* ~ 94906265.6 for IEEE gnm_float */
	xmax = gnm_exp(fmin2(gnm_log(GNM_MAX / 12), -gnm_log(12 * GNM_MIN)));
	/*   = GNM_MAX / 48 ~= 3.745e306 for IEEE gnm_float */
    }
#else
/* For IEEE gnm_float precision GNM_EPSILON = 2^-52 = GNM_const(2.220446049250313e-16) :
 *   xbig = 2 ^ 26.5
 *   xmax = GNM_MAX / 48 =  2^1020 / 3 */
# define nalgm 5
# define xbig  GNM_const(94906265.62425156)
# define xmax  GNM_const(3.745194030963158e306)
#endif

    if (x < 10)
	ML_ERR_return_NAN
    else if (x >= xmax) {
	ML_ERROR(ME_UNDERFLOW);
	return ML_UNDERFLOW;
    }
    else if (x < xbig) {
	tmp = 10 / x;
	return chebyshev_eval(tmp * tmp * 2 - 1, algmcs, nalgm) / x;
    }
    else return 1 / (x * 12);
}
Esempio n. 15
0
// unused now from R
double bessel_j(double x, double alpha)
{
    int nb, ncalc;
    double na, *bj;
#ifndef MATHLIB_STANDALONE
    const void *vmax;
#endif

#ifdef IEEE_754
    /* NaNs propagated correctly */
    if (ISNAN(x) || ISNAN(alpha)) return x + alpha;
#endif
    if (x < 0) {
	ML_ERROR(ME_RANGE, "bessel_j");
	return ML_NAN;
    }
    na = floor(alpha);
    if (alpha < 0) {
	/* Using Abramowitz & Stegun  9.1.2
	 * this may not be quite optimal (CPU and accuracy wise) */
	return(((alpha - na == 0.5) ? 0 : bessel_j(x, -alpha) * cospi(alpha)) +
	       ((alpha      == na ) ? 0 : bessel_y(x, -alpha) * sinpi(alpha)));
    }
    else if (alpha > 1e7) {
	MATHLIB_WARNING("besselJ(x, nu): nu=%g too large for bessel_j() algorithm", alpha);
	return ML_NAN;
    }
    nb = 1 + (int)na; /* nb-1 <= alpha < nb */
    alpha -= (double)(nb-1);
#ifdef MATHLIB_STANDALONE
    bj = (double *) calloc(nb, sizeof(double));
#ifndef _RENJIN
    if (!bj) MATHLIB_ERROR("%s", _("bessel_j allocation error"));
#endif
#else
    vmax = vmaxget();
    bj = (double *) R_alloc((size_t) nb, sizeof(double));
#endif
    J_bessel(&x, &alpha, &nb, bj, &ncalc);
    if(ncalc != nb) {/* error input */
      if(ncalc < 0)
	MATHLIB_WARNING4(_("bessel_j(%g): ncalc (=%d) != nb (=%d); alpha=%g. Arg. out of range?\n"),
			 x, ncalc, nb, alpha);
      else
	MATHLIB_WARNING2(_("bessel_j(%g,nu=%g): precision lost in result\n"),
			 x, alpha+(double)nb-1);
    }
    x = bj[nb-1];
#ifdef MATHLIB_STANDALONE
    free(bj);
#else
    vmaxset(vmax);
#endif
    return x;
}
static int mca_coll_ml_memsync_recycle_memory(mca_coll_ml_collective_operation_progress_t *coll_op)
{
    mca_coll_ml_module_t *ml_module = (mca_coll_ml_module_t *)coll_op->coll_module;
    mca_bcol_base_memory_block_desc_t *ml_memblock = ml_module->payload_block;
    mca_coll_ml_collective_operation_progress_t *pending_op = NULL;
    int bank = coll_op->full_message.bank_index_to_recycle;
    int rc;
    bool have_resources = true;

    assert(bank >= 0 || 
           bank < (int)ml_memblock->num_banks ||
           ML_MEMSYNC == coll_op->fragment_data.current_coll_op);

    ML_VERBOSE(10,("MEMSYNC: bank %d was recycled coll_op %p", bank, coll_op));

    /* set the bank as free */

    ml_memblock->bank_is_busy[bank] = false;
    ml_memblock->bank_release_counters[bank] = 0;

    /* Check if we have any requests that are waiting for memory */
    while(opal_list_get_size(&ml_module->waiting_for_memory_list) && have_resources) {
        pending_op = (mca_coll_ml_collective_operation_progress_t *)
            opal_list_get_first(&ml_module->waiting_for_memory_list);

        ML_VERBOSE(10, ("Trying to start pending %p", pending_op));
        assert(pending_op->pending & REQ_OUT_OF_MEMORY);
        rc = pending_op->fragment_data.message_descriptor->fragment_launcher(pending_op);
        switch (rc) {
            case OMPI_SUCCESS: 
                ML_VERBOSE(10, ("Pending fragment was started %p", pending_op));
                pending_op->pending ^= REQ_OUT_OF_MEMORY;
                opal_list_remove_item(&ml_module->waiting_for_memory_list,
                        (opal_list_item_t *)pending_op);
                if (0 != pending_op->fragment_data.offset_into_user_buffer) {
                    /* non-zero offset ==> this is not fragment 0 */
                    CHECK_AND_RECYCLE(pending_op);
                }
                break;
            case OMPI_ERR_TEMP_OUT_OF_RESOURCE: 
                ML_VERBOSE(10, ("Already on the list %p", pending_op));
                have_resources = false;
                break;
            default:
                ML_ERROR(("Error happened %d", rc));
                return rc;
        }
    }

    ML_VERBOSE(10, ("Memsync done %p", coll_op));
    return OMPI_SUCCESS;
}
Esempio n. 17
0
double bessel_y(double x, double alpha)
{
    long nb, ncalc;
    double na, *by;
#ifndef MATHLIB_STANDALONE
    const void *vmax;
#endif

#ifdef IEEE_754
    /* NaNs propagated correctly */
    if (ISNAN(x) || ISNAN(alpha)) return x + alpha;
#endif
    if (x < 0) {
	ML_ERROR(ME_RANGE, "bessel_y");
	return ML_NAN;
    }
    na = floor(alpha);
    if (alpha < 0) {
	/* Using Abramowitz & Stegun  9.1.2
	 * this may not be quite optimal (CPU and accuracy wise) */
	return(bessel_y(x, -alpha) * cos(M_PI * alpha) -
	       ((alpha == na) ? 0 :
		bessel_j(x, -alpha) * sin(M_PI * alpha)));
    }
    nb = 1+ (long)na;/* nb-1 <= alpha < nb */
    alpha -= (nb-1);
#ifdef MATHLIB_STANDALONE
    by = (double *) calloc(nb, sizeof(double));
    if (!by) MATHLIB_ERROR("%s", _("bessel_y allocation error"));
#else
    vmax = vmaxget();
    by = (double *) R_alloc((size_t) nb, sizeof(double));
#endif
    Y_bessel(&x, &alpha, &nb, by, &ncalc);
    if(ncalc != nb) {/* error input */
	if(ncalc == -1)
	    return ML_POSINF;
	else if(ncalc < -1)
	    MATHLIB_WARNING4(_("bessel_y(%g): ncalc (=%ld) != nb (=%ld); alpha=%g. Arg. out of range?\n"),
			     x, ncalc, nb, alpha);
	else /* ncalc >= 0 */
	    MATHLIB_WARNING2(_("bessel_y(%g,nu=%g): precision lost in result\n"),
			     x, alpha+nb-1);
    }
    x = by[nb-1];
#ifdef MATHLIB_STANDALONE
    free(by);
#else
    vmaxset(vmax);
#endif
    return x;
}
Esempio n. 18
0
static int parse_fragmentation_key(section_config_t *section, char *value)
{
    if(!strcasecmp(value, "enable")) {
        section->config.fragmentation_enabled = 1;
    } else if (!strcasecmp(value, "disable")) {
        section->config.fragmentation_enabled = 0;
    } else {
        ML_ERROR(("Line %d, unexpected fragmentation value %s. Ligal values are: enable/disable",
                    coll_ml_config_yynewlines, value));
        return OMPI_ERROR;
    }
    return OMPI_SUCCESS;
}
Esempio n. 19
0
double beta(double a, double b)
{
#ifdef NOMORE_FOR_THREADS
    static double xmin, xmax = 0;/*-> typically = 171.61447887 for IEEE */
    static double lnsml = 0;/*-> typically = -708.3964185 */

    if (xmax == 0) {
	    gammalims(&xmin, &xmax);
	    lnsml = log(d1mach(1));
    }
#else
/* For IEEE double precision DBL_EPSILON = 2^-52 = 2.220446049250313e-16 :
 *   xmin, xmax : see ./gammalims.c
 *   lnsml = log(DBL_MIN) = log(2 ^ -1022) = -1022 * log(2)
*/
# define xmin  -170.5674972726612
# define xmax   171.61447887182298
# define lnsml -708.39641853226412
#endif


#ifdef IEEE_754
    /* NaNs propagated correctly */
    if(ISNAN(a) || ISNAN(b)) return a + b;
#endif

    if (a < 0 || b < 0)
	ML_ERR_return_NAN
    else if (a == 0 || b == 0)
	return ML_POSINF;
    else if (!R_FINITE(a) || !R_FINITE(b))
	return 0;

    if (a + b < xmax) {/* ~= 171.61 for IEEE */
//	return gammafn(a) * gammafn(b) / gammafn(a+b);
	/* All the terms are positive, and all can be large for large
	   or small arguments.  They are never much less than one.
	   gammafn(x) can still overflow for x ~ 1e-308, 
	   but the result would too. 
	*/
	return (1 / gammafn(a+b)) * gammafn(a) * gammafn(b);
    } else {
	double val = lbeta(a, b);
	if (val < lnsml) {
	    /* a and/or b so big that beta underflows */
	    ML_ERROR(ME_UNDERFLOW, "beta");
	    /* return ML_UNDERFLOW; pointless giving incorrect value */
	}
	return exp(val);
    }
}
Esempio n. 20
0
double attribute_hidden
pnbeta2(double x, double o_x, double a, double b, double ncp,
        /* o_x  == 1 - x  but maybe more accurate */
        int lower_tail, int log_p)
{
    long double ans = pnbeta_raw(x, o_x, a,b, ncp);

    /* return R_DT_val(ans), but we want to warn about cancellation here */
    if (lower_tail) return (double) (log_p ? logl(ans) : ans);
    else {
        if(ans > 1 - 1e-10) ML_ERROR(ME_PRECISION, "pnbeta");
        ans = fmin2(ans, 1.0);  /* Precaution */
        return (double) (log_p ? log1p((double)-ans) : (1. - ans));
    }
}
Esempio n. 21
0
double bessel_k(double x, double alpha, double expo)
{
    long nb, ncalc, ize;
    double *bk;
#ifndef MATHLIB_STANDALONE
    const void *vmax;
#endif

#ifdef IEEE_754
    /* NaNs propagated correctly */
    if (ISNAN(x) || ISNAN(alpha)) return x + alpha;
#endif
    if (x < 0) {
	ML_ERROR(ME_RANGE, "bessel_k");
	return ML_NAN;
    }
    ize = (long)expo;
    if(alpha < 0)
	alpha = -alpha;
    nb = 1+ (long)floor(alpha);/* nb-1 <= |alpha| < nb */
    alpha -= (double)(nb-1);
#ifdef MATHLIB_STANDALONE
    bk = (double *) calloc(nb, sizeof(double));
    if (!bk) MATHLIB_ERROR("%s", _("bessel_k allocation error"));
#else
    vmax = vmaxget();
    bk = (double *) R_alloc((size_t) nb, sizeof(double));
#endif
    K_bessel(&x, &alpha, &nb, &ize, bk, &ncalc);
    if(ncalc != nb) {/* error input */
      if(ncalc < 0)
	MATHLIB_WARNING4(_("bessel_k(%g): ncalc (=%ld) != nb (=%ld); alpha=%g. Arg. out of range?\n"),
			 x, ncalc, nb, alpha);
      else
	MATHLIB_WARNING2(_("bessel_k(%g,nu=%g): precision lost in result\n"),
			 x, alpha+(double)nb-1);
    }
    x = bk[nb-1];
#ifdef MATHLIB_STANDALONE
    free(bk);
#else
    vmaxset(vmax);
#endif
    return x;
}
Esempio n. 22
0
int mca_coll_ml_lmngr_append_nc(mca_coll_ml_lmngr_t *lmngr, bcol_base_network_context_t *nc)
{
    int i, rc;

    ML_VERBOSE(7, ("Append new network context %p to list manager %p",
                nc, lmngr));

    if (NULL == nc) {
        return OMPI_ERROR;
    }

    /* check if we already have the context on the list.
       if we do have - do not do anything, just return success
     */
    if (OPAL_UNLIKELY(MCA_COLL_ML_MAX_REG_INFO == lmngr->n_resources)) {
        ML_ERROR(("MPI overflows maximum supported network contexts is %d", MCA_COLL_ML_MAX_REG_INFO));
        return OMPI_ERROR;
    }

    for (i = 0; i < lmngr->n_resources; i++) {
        if (lmngr->net_context[i] == nc) {
            ML_VERBOSE(7, ("It is not new "));
            return OMPI_SUCCESS;
        }
    }

    ML_VERBOSE(7, ("Adding new context"));

    /* Setting context id */
    nc->context_id = lmngr->n_resources;
    lmngr->net_context[lmngr->n_resources] = nc;

    lmngr->n_resources++;

    /* Register the memory with new context */
    if (NULL != lmngr->base_addr) {
        rc = lmngr_register(lmngr, nc);
        if (OMPI_SUCCESS == rc) {
            return rc;
        }
    }

    return OMPI_SUCCESS;
}
Esempio n. 23
0
double attribute_hidden lgammacor(double x)
{
    const static double algmcs[15] = {
	+.1666389480451863247205729650822e+0,
	-.1384948176067563840732986059135e-4,
	+.9810825646924729426157171547487e-8,
	-.1809129475572494194263306266719e-10,
	+.6221098041892605227126015543416e-13,
	-.3399615005417721944303330599666e-15,
	+.2683181998482698748957538846666e-17,
	-.2868042435334643284144622399999e-19,
	+.3962837061046434803679306666666e-21,
	-.6831888753985766870111999999999e-23,
	+.1429227355942498147573333333333e-24,
	-.3547598158101070547199999999999e-26,
	+.1025680058010470912000000000000e-27,
	-.3401102254316748799999999999999e-29,
	+.1276642195630062933333333333333e-30
    };

    double tmp;

/* For IEEE double precision DBL_EPSILON = 2^-52 = 2.220446049250313e-16 :
 *   xbig = 2 ^ 26.5
 *   xmax = DBL_MAX / 48 =  2^1020 / 3 */
#define nalgm 5
#define xbig  94906265.62425156
#define xmax  3.745194030963158e306

    if (x < 10)
	ML_ERR_return_NAN
    else if (x >= xmax) {
	ML_ERROR(ME_UNDERFLOW, "lgammacor");
	/* allow to underflow below */
    }
    else if (x < xbig) {
	tmp = 10 / x;
	return chebyshev_eval(tmp * tmp * 2 - 1, algmcs, nalgm) / x;
    }
    return 1 / (x * 12);
}
Esempio n. 24
0
double dnbeta(double x, double a, double b, double lambda, int give_log)
{
    const double eps = 1.e-14;
    const int maxiter = 200;

    double k, lambda2, psum, sum, term, weight;

#ifdef IEEE_754
    if (ISNAN(x) || ISNAN(a) || ISNAN(b) || ISNAN(lambda))
	return x + a + b + lambda;
#endif
    if (lambda < 0 || a <= 0 || b <= 0)
	ML_ERR_return_NAN;

    if (!R_FINITE(a) || !R_FINITE(b) || !R_FINITE(lambda))
	ML_ERR_return_NAN;

    if(x <= 0) return R_D__0;

    if(lambda == 0)
	return dbeta(x, a, b, give_log);

    term =  dbeta(x, a, b, /* log = */ false);
    lambda2 = 0.5 * lambda;
    weight = exp(- lambda2);
    sum	 = weight * term;
    psum = weight;
    for(k = 1; k <= maxiter; k++) {
	weight *= (lambda2 / k);
	term *= x * (a + b) / a;
	sum  += weight * term;
	psum += weight;
	a += 1;
	if(1 - psum < eps) break;
    }
    if(1 - psum >= eps) { /* not converged */
	ML_ERROR(ME_PRECISION);
    }
    return R_D_val(sum);
}
Esempio n. 25
0
static void destruct_lmngr(mca_coll_ml_lmngr_t *lmngr)
{
    int max_nc = lmngr->n_resources;
    int rc, i;
    bcol_base_network_context_t *nc;
    opal_list_item_t *item;

    ML_VERBOSE(6, ("Destructing list manager %p", (void *)lmngr));

    while (NULL != (item = opal_list_remove_first(&lmngr->blocks_list))) {
        OBJ_RELEASE(item);
    }

    OBJ_DESTRUCT(&lmngr->blocks_list);

    if (NULL != lmngr->alloc_base) {
        for( i = 0; i < max_nc; i++ ) {
            nc = lmngr->net_context[i];
            rc = nc->deregister_memory_fn(nc->context_data,
                    lmngr->reg_desc[nc->context_id]);
            if(rc != OMPI_SUCCESS) {
                ML_ERROR(("Failed to unregister , lmngr %p", (void *)lmngr));
            }
        }

        ML_VERBOSE(10, ("Release base addr %p", lmngr->alloc_base));

        free(lmngr->alloc_base);
        lmngr->alloc_base = NULL;
        lmngr->base_addr = NULL;
    }

    lmngr->list_block_size = 0;
    lmngr->list_alignment = 0;
    lmngr->list_size = 0;
    lmngr->n_resources = 0;

    OBJ_DESTRUCT(&lmngr->mem_lock);
}
Esempio n. 26
0
/* Called from R: modified version of bessel_j(), accepting a work array
 * instead of allocating one. */
double bessel_j_ex(double x, double alpha, double *bj)
{
    int nb, ncalc;
    double na;

#ifdef IEEE_754
    /* NaNs propagated correctly */
    if (ISNAN(x) || ISNAN(alpha)) return x + alpha;
#endif
    if (x < 0) {
	ML_ERROR(ME_RANGE, "bessel_j");
	return ML_NAN;
    }
    na = floor(alpha);
    if (alpha < 0) {
	/* Using Abramowitz & Stegun  9.1.2
	 * this may not be quite optimal (CPU and accuracy wise) */
	return(((alpha - na == 0.5) ? 0 : bessel_j_ex(x, -alpha, bj) * cospi(alpha)) +
	       ((alpha      == na ) ? 0 : bessel_y_ex(x, -alpha, bj) * sinpi(alpha)));
    }
    else if (alpha > 1e7) {
	MATHLIB_WARNING("besselJ(x, nu): nu=%g too large for bessel_j() algorithm", alpha);
	return ML_NAN;
    }
    nb = 1 + (int)na; /* nb-1 <= alpha < nb */
    alpha -= (double)(nb-1); // ==> alpha' in [0, 1)
    J_bessel(&x, &alpha, &nb, bj, &ncalc);
    if(ncalc != nb) {/* error input */
      if(ncalc < 0)
	MATHLIB_WARNING4(_("bessel_j(%g): ncalc (=%d) != nb (=%d); alpha=%g. Arg. out of range?\n"),
			 x, ncalc, nb, alpha);
      else
	MATHLIB_WARNING2(_("bessel_j(%g,nu=%g): precision lost in result\n"),
			 x, alpha+(double)nb-1);
    }
    x = bj[nb-1];
    return x;
}
Esempio n. 27
0
/**
 * Hierarchical non-blocking barrier
 */
int mca_coll_ml_ibarrier_intra(struct ompi_communicator_t *comm,
                               ompi_request_t **req,
                               mca_coll_base_module_t *module)
{
    int rc;
    mca_coll_ml_module_t *ml_module = (mca_coll_ml_module_t *) module;

#if OPAL_ENABLE_DEBUG
    static int barriers_count = 0;
#endif

    ML_VERBOSE(10, ("IBarrier num %d start.", ++barriers_count));

    rc = mca_coll_ml_barrier_launch(ml_module, req);
    if (OPAL_UNLIKELY(rc != OMPI_SUCCESS)) {
        ML_ERROR(("Failed to launch a barrier."));
        return rc;
    }

    ML_VERBOSE(10, ("IBarrier num %d was done.", barriers_count));

    return OMPI_SUCCESS;
}
Esempio n. 28
0
mca_bcol_base_lmngr_block_t* mca_coll_ml_lmngr_alloc (
        mca_coll_ml_lmngr_t *lmngr)
{
    int rc;
    opal_list_t *list = &lmngr->blocks_list;

    /* Check if the list manager was initialized */
    if(OPAL_UNLIKELY(NULL == lmngr->base_addr)) {
        ML_VERBOSE(7 ,("Starting memory initialization"));
        rc = mca_coll_ml_lmngr_init(lmngr);
        if (OMPI_SUCCESS != rc) {
            ML_ERROR(("Failed to init memory"));
            return NULL;
        }
    }

    if(OPAL_UNLIKELY(opal_list_is_empty(list))) {
        /* Upper layer need to handle the NULL */
        ML_VERBOSE(1, ("List manager is empty."));
        return NULL;
    }

    return (mca_bcol_base_lmngr_block_t *)opal_list_remove_first(list);
}
Esempio n. 29
0
File: gamma.c Progetto: csilles/cxxr
double gammafn(double x)
{
    const static double gamcs[42] = {
	+.8571195590989331421920062399942e-2,
	+.4415381324841006757191315771652e-2,
	+.5685043681599363378632664588789e-1,
	-.4219835396418560501012500186624e-2,
	+.1326808181212460220584006796352e-2,
	-.1893024529798880432523947023886e-3,
	+.3606925327441245256578082217225e-4,
	-.6056761904460864218485548290365e-5,
	+.1055829546302283344731823509093e-5,
	-.1811967365542384048291855891166e-6,
	+.3117724964715322277790254593169e-7,
	-.5354219639019687140874081024347e-8,
	+.9193275519859588946887786825940e-9,
	-.1577941280288339761767423273953e-9,
	+.2707980622934954543266540433089e-10,
	-.4646818653825730144081661058933e-11,
	+.7973350192007419656460767175359e-12,
	-.1368078209830916025799499172309e-12,
	+.2347319486563800657233471771688e-13,
	-.4027432614949066932766570534699e-14,
	+.6910051747372100912138336975257e-15,
	-.1185584500221992907052387126192e-15,
	+.2034148542496373955201026051932e-16,
	-.3490054341717405849274012949108e-17,
	+.5987993856485305567135051066026e-18,
	-.1027378057872228074490069778431e-18,
	+.1762702816060529824942759660748e-19,
	-.3024320653735306260958772112042e-20,
	+.5188914660218397839717833550506e-21,
	-.8902770842456576692449251601066e-22,
	+.1527474068493342602274596891306e-22,
	-.2620731256187362900257328332799e-23,
	+.4496464047830538670331046570666e-24,
	-.7714712731336877911703901525333e-25,
	+.1323635453126044036486572714666e-25,
	-.2270999412942928816702313813333e-26,
	+.3896418998003991449320816639999e-27,
	-.6685198115125953327792127999999e-28,
	+.1146998663140024384347613866666e-28,
	-.1967938586345134677295103999999e-29,
	+.3376448816585338090334890666666e-30,
	-.5793070335782135784625493333333e-31
    };

    int i, n;
    double y;
    double sinpiy, value;

#ifdef NOMORE_FOR_THREADS
    static int ngam = 0;
    static double xmin = 0, xmax = 0., xsml = 0., dxrel = 0.;

    /* Initialize machine dependent constants, the first time gamma() is called.
	FIXME for threads ! */
    if (ngam == 0) {
	ngam = chebyshev_init(gamcs, 42, DBL_EPSILON/20);/*was .1*d1mach(3)*/
	gammalims(&xmin, &xmax);/*-> ./gammalims.c */
	xsml = exp(fmax2(log(DBL_MIN), -log(DBL_MAX)) + 0.01);
	/*   = exp(.01)*DBL_MIN = 2.247e-308 for IEEE */
	dxrel = sqrt(DBL_EPSILON);/*was sqrt(d1mach(4)) */
    }
#else
/* For IEEE double precision DBL_EPSILON = 2^-52 = 2.220446049250313e-16 :
 * (xmin, xmax) are non-trivial, see ./gammalims.c
 * xsml = exp(.01)*DBL_MIN
 * dxrel = sqrt(DBL_EPSILON) = 2 ^ -26
*/
# define ngam 22
# define xmin -170.5674972726612
# define xmax  171.61447887182298
# define xsml 2.2474362225598545e-308
# define dxrel 1.490116119384765696e-8
#endif

    if(ISNAN(x)) return x;

    /* If the argument is exactly zero or a negative integer
     * then return NaN. */
    if (x == 0 || (x < 0 && x == (long)x)) {
	ML_ERROR(ME_DOMAIN, "gammafn");
	return ML_NAN;
    }

    y = fabs(x);

    if (y <= 10) {

	/* Compute gamma(x) for -10 <= x <= 10
	 * Reduce the interval and find gamma(1 + y) for 0 <= y < 1
	 * first of all. */

	n = (int) x;
	if(x < 0) --n;
	y = x - n;/* n = floor(x)  ==>	y in [ 0, 1 ) */
	--n;
	value = chebyshev_eval(y * 2 - 1, gamcs, ngam) + .9375;
	if (n == 0)
	    return value;/* x = 1.dddd = 1+y */

	if (n < 0) {
	    /* compute gamma(x) for -10 <= x < 1 */

	    /* exact 0 or "-n" checked already above */

	    /* The answer is less than half precision */
	    /* because x too near a negative integer. */
	    if (x < -0.5 && fabs(x - (int)(x - 0.5) / x) < dxrel) {
		ML_ERROR(ME_PRECISION, "gammafn");
	    }

	    /* The argument is so close to 0 that the result would overflow. */
	    if (y < xsml) {
		ML_ERROR(ME_RANGE, "gammafn");
		if(x > 0) return ML_POSINF;
		else return ML_NEGINF;
	    }

	    n = -n;

	    for (i = 0; i < n; i++) {
		value /= (x + i);
	    }
	    return value;
	}
	else {
	    /* gamma(x) for 2 <= x <= 10 */

	    for (i = 1; i <= n; i++) {
		value *= (y + i);
	    }
	    return value;
	}
    }
    else {
	/* gamma(x) for	 y = |x| > 10. */

	if (x > xmax) {			/* Overflow */
	    ML_ERROR(ME_RANGE, "gammafn");
	    return ML_POSINF;
	}

	if (x < xmin) {			/* Underflow */
	    ML_ERROR(ME_UNDERFLOW, "gammafn");
	    return 0.;
	}

	if(y <= 50 && y == (int)y) { /* compute (n - 1)! */
	    value = 1.;
	    for (i = 2; i < y; i++) value *= i;
	}
	else { /* normal case */
	    value = exp((y - 0.5) * log(y) - y + M_LN_SQRT_2PI +
			((2*y == (int)2*y)? stirlerr(y) : lgammacor(y)));
	}
	if (x > 0)
	    return value;

	if (fabs((x - (int)(x - 0.5))/x) < dxrel){

	    /* The answer is less than half precision because */
	    /* the argument is too near a negative integer. */

	    ML_ERROR(ME_PRECISION, "gammafn");
	}

	sinpiy = sin(M_PI * y);
	if (sinpiy == 0) {		/* Negative integer arg - overflow */
	    ML_ERROR(ME_RANGE, "gammafn");
	    return ML_POSINF;
	}

	return -M_PI / (y * sinpiy * value);
    }
}
/*
 * Parse a single file
 */
static int parse_file(char *filename)
{
    int val;
    int ret = OMPI_SUCCESS;
    bool first_section = true, first_coll = true;
    coll_config_t coll_config;

    memset (&coll_config, 0, sizeof (coll_config));
    reset_collective(&coll_config);

    /* Open the file */
    coll_ml_config_yyin = fopen(filename, "r");
    if (NULL == coll_ml_config_yyin) {
        ML_ERROR(("Failed to open config file %s", filename));
        ret = OMPI_ERR_NOT_FOUND;
        goto cleanup;
    }

    /* Do the parsing */
    coll_ml_config_parse_done = false;
    coll_ml_config_yynewlines = 1;
    coll_ml_config_init_buffer(coll_ml_config_yyin);
    while (!coll_ml_config_parse_done) {
        val = coll_ml_config_yylex();
        switch (val) {
        case COLL_ML_CONFIG_PARSE_DONE:
        case COLL_ML_CONFIG_PARSE_NEWLINE:
            break;
        case COLL_ML_CONFIG_PARSE_COLLECTIVE:
            /* dump all the information to last section that was defined */
            if (!first_coll) {
                ret = save_settings(&coll_config);

                if (OMPI_SUCCESS != ret) {
                    ML_ERROR(("Error in syntax for collective %s", coll_config.coll_name));
                    goto cleanup;
                }
            }
            
            /* reset collective config */
            reset_collective(&coll_config);

            first_coll    = false;
            first_section = true;

            ret = set_collective_name(&coll_config);
            if (OMPI_SUCCESS != ret) {
                goto cleanup;
            }
            break;
        case COLL_ML_CONFIG_PARSE_SECTION:
            if (ML_UNDEFINED == coll_config.coll_id) {
                ML_ERROR(("Collective section wasn't defined !"));
                ret = OMPI_ERROR;
                goto cleanup;
            }

            if (!first_section) {
                /* dump all the information to last section that was defined */
                ret = save_settings(&coll_config);
                if (OMPI_SUCCESS != ret) {
                    ML_ERROR(("Error in syntax for collective %s section %s", coll_config.coll_name,
                              coll_config.section.section_name));
                    goto cleanup;
                }
            }

            first_section = false;

            /* reset all section values */
            reset_section(&coll_config.section);

            /* set new section name */
            ret = set_section_name(&coll_config.section);
            if (OMPI_SUCCESS != ret) {
                goto cleanup;
            }
            break;
        case COLL_ML_CONFIG_PARSE_SINGLE_WORD:
            if (ML_UNDEFINED == coll_config.coll_id ||
                ML_UNDEFINED == coll_config.section.section_id) {
                ML_ERROR(("Collective section or sub-section was not defined !"));
                ret = OMPI_ERROR;
                goto cleanup;
            } else {
                parse_line(&coll_config.section);
            }
            break;

        default:
            /* anything else is an error */
            ML_ERROR(("Unexpected token!"));
            ret = OMPI_ERROR;
            goto cleanup;
            break;
        }
    }

    save_settings(&coll_config);
    fclose(coll_ml_config_yyin);
    coll_ml_config_yylex_destroy ();
    ret = OMPI_SUCCESS;

cleanup:
    reset_collective(&coll_config);
    if (NULL != key_buffer) {
        free(key_buffer);
        key_buffer = NULL;
        key_buffer_len = 0;
    }
    return ret;
}