コード例 #1
0
ファイル: rvine.c プロジェクト: yasserglez/dml
static void
rvine_trees_to_vine(dml_vine_t *vine, igraph_t **trees)
{
    size_t n = vine->dim;
    size_t *order_inv;
    gsl_vector_short *B;
    igraph_integer_t Cea, Ceb;
    size_t x = 0, x_hat = 0, x_hat_hat = 0; // Initialized to avoid GCC warnings.
    dml_copula_t *copula = NULL; // Initialized to avoid GCC warnings.
    igraph_integer_t e; // Edge id.
    igraph_integer_t a, b, aa, ab, ba, bb; // Vertex id.
    igraph_t **last_trees = NULL;
    igraph_t *graph = NULL;
    gsl_vector_short *Ue, *Ua, *Ub;

    // Set the number of trees of the vines.
    vine->trees = n - 1;
    while (trees[vine->trees - 1] == NULL) vine->trees--;

    // Nothing to do for vines without trees.
    if (vine->trees == 0) return;

    // Selecting a structure for the trees that were truncated.
    // Is this really necessary? Think a better solution.
    if (vine->trees != n - 1) {
        igraph_i_set_attribute_table(&igraph_cattribute_table);
        last_trees = g_malloc_n(n - 1 - vine->trees, sizeof(igraph_t *));
        graph = g_malloc(sizeof(igraph_t));

        for (size_t k = vine->trees; k < n - 1; k++) { // Tree index.
            igraph_empty(graph, n - k, IGRAPH_UNDIRECTED);

            // Adding all "possible" edges.
            for (a = 0; a < igraph_vcount(graph) - 1; a++) {
                for (b = a + 1; b < igraph_vcount(graph); b++) {
                    // Checking the proximity condition.
                    igraph_edge(k <= vine->trees ? trees[k - 1] : last_trees[k - 1 - vine->trees], a, &aa, &ab);
                    igraph_edge(k <= vine->trees ? trees[k - 1] : last_trees[k - 1 - vine->trees], b, &ba, &bb);
                    if (aa == ba || aa == bb || ab == ba || ab == bb) {
                        igraph_add_edge(graph, a, b);
                        igraph_get_eid(graph, &e, a, b, IGRAPH_UNDIRECTED, 1);

                        // Variables "connected" by this edge and conditioned set.
                        Ua = EAP(k <= vine->trees ? trees[k - 1] : last_trees[k - 1 - vine->trees], "Ue", a);
                        Ub = EAP(k <= vine->trees ? trees[k - 1] : last_trees[k - 1 - vine->trees], "Ue", b);
                        Ue = gsl_vector_short_calloc(n);
                        for (size_t i = 0; i < n; i++) {
                            gsl_vector_short_set(Ue, i,
                                    gsl_vector_short_get(Ua, i)
                                            | gsl_vector_short_get(Ub, i));
                            if (gsl_vector_short_get(Ua, i)
                                    && !gsl_vector_short_get(Ub, i)) {
                                SETEAN(graph, "Cea", e, i + 1);
                            }
                            if (gsl_vector_short_get(Ub, i)
                                    && !gsl_vector_short_get(Ua, i)) {
                                SETEAN(graph, "Ceb", e, i + 1);
                            }
                        }
                        SETEAP(graph, "Ue", e, Ue);
                    }
                }
            }

            // Compute the minimum weight spanning tree.
            last_trees[k - vine->trees] = g_malloc(sizeof(igraph_t));
            igraph_minimum_spanning_tree_unweighted(graph, last_trees[k - vine->trees]);

            igraph_destroy(graph);
        }
    }

    order_inv = g_malloc0_n(n, sizeof(size_t));
    B = gsl_vector_short_calloc(n);

    // for loop in line 2.
    for (size_t i = 0; i < n - 1; i++) {
        if (trees[n - i - 2] == NULL) {
            for (e = 0; e < igraph_ecount(last_trees[n - i - 2 - vine->trees]); e++) {
                x = EAN(last_trees[n - i - 2 - vine->trees], "Cea", e);
                x_hat = EAN(last_trees[n - i - 2 - vine->trees], "Ceb", e);
                if (!gsl_vector_short_get(B, x - 1)
                        && !gsl_vector_short_get(B, x_hat - 1)) {
                    x_hat = 0;
                    copula = NULL;
                    break;
                }
            }
        } else {
            for (e = 0; e < igraph_ecount(trees[n - i - 2]); e++) {
                x = EAN(trees[n - i - 2], "Cea", e);
                x_hat = EAN(trees[n - i - 2], "Ceb", e);
                if (!gsl_vector_short_get(B, x - 1)
                        && !gsl_vector_short_get(B, x_hat - 1)) {
                    copula = EAP(trees[n - i - 2], "copula", e);
                    break;
                }
            }
        }

        // Line 4.
        gsl_vector_short_set(B, x - 1, 1);
        vine->order[n - i - 1] = x - 1;
        order_inv[x - 1] = n - i;
        vine->matrix[i][i] = x;
        vine->matrix[i + 1][i] = x_hat;
        vine->copulas[i + 1][i] = copula;

        // for loop in line 5.
        for (size_t k = i + 2; k < n; k++) {
            if (trees[n - k - 1] != NULL) {
                for (e = 0; e < igraph_ecount(trees[n - k - 1]); e++) {
                    Cea = EAN(trees[n - k - 1], "Cea", e);
                    Ceb = EAN(trees[n - k - 1], "Ceb", e);
                    if (x == Cea) {
                        x_hat_hat = Ceb;
                        if (!gsl_vector_short_get(B, x_hat_hat - 1)) {
                            // The pseudocode of the algorithm does not included
                            // this check. Invalid matrices were generated when
                            // x_hat_hat is set to an index already assigned
                            // to a diagonal entry.
                            copula = EAP(trees[n - k - 1], "copula", e);
                            break;
                        }
                    } else if (x == Ceb) {
                        x_hat_hat = Cea;
                        if (!gsl_vector_short_get(B, x_hat_hat - 1)) {
                            // Ibdem to the previous comment.
                            copula = EAP(trees[n - k - 1], "copula", e);
                            break;
                        }
                    }
                }
                vine->matrix[k][i] = x_hat_hat;
                vine->copulas[k][i] = copula;
            }
        }
    }

    for (size_t i = 0; i < n; i++) {
        if (!gsl_vector_short_get(B, i)) {
            vine->matrix[n - 1][n - 1] = i + 1;
            vine->order[0] = i;
            order_inv[i] = 1;
            break;
        }
    }
    // Reorder the variables. The simulation algorithm assumes that the
    // diagonal entries of the R-vine matrix are ordered from n to 1.
    for (size_t i = 0; i < n; i++) {
        for (size_t j = 0; j <= i; j++) {
            if (vine->matrix[i][j] > 0) {
                vine->matrix[i][j] = order_inv[vine->matrix[i][j] - 1];
            }
        }
    }

    if (vine->trees != n - 1) {
        for (size_t i = 0; i < n - 1 - vine->trees; i++) {
            for (e = 0; e < igraph_ecount(last_trees[i]); e++) {
                Ue = EAP(last_trees[i], "Ue", e);
                gsl_vector_short_free(Ue);
            }
            DELEA(last_trees[i], "Ue");
            igraph_destroy(last_trees[i]);
            g_free(last_trees[i]);
        }
        g_free(last_trees);
        g_free(graph);
    }
    g_free(order_inv);
    gsl_vector_short_free(B);
}
コード例 #2
0
//MCMCMC algorithm
double MC3 (int N,
			gsl_matrix_short * Adj,
			int Steps,
			int nChains,
			gsl_vector_short * BestSolution,
			gsl_rng * r,
			gsl_vector * lgammaLookup,
			gsl_vector * logLookup){

	// create the chains
	gsl_vector_short * Chains[nChains];
	//create copies for use by Gibbs and marginal functions
	gsl_vector_short * ChainCopy = gsl_vector_short_calloc(N);
	gsl_vector_short * ChainCopy2 = gsl_vector_short_calloc(N);
	// create the fitness vector
	gsl_vector * Marginals = gsl_vector_calloc(nChains);
	//initialize swapping vector for RGF
	gsl_vector_short * RGFswap = gsl_vector_short_calloc(N+1);
	
	int i,j,k;
	double BestMarginal;
	BestMarginal = -1000000000.0;
	
	//initialize chains
	for(i=0; i<nChains; i++){
		// allocate memory
		Chains[i] = gsl_vector_short_calloc(N);
		// initialize the population
		Partition_Initialize(Chains[i], N, r);
	}

	//generate temperatures assuming uniform spacing
	gsl_vector * Temps = gsl_vector_calloc(nChains);
	//step size for incrementing temperatures
	double StepSize;
	StepSize = (COLDTEMP - HOTTEMP)/((double)nChains - 1);
	gsl_vector_set(Temps, 0, HOTTEMP);
	for(i=1; i<(nChains-1); i++){
		gsl_vector_set(Temps, i, gsl_vector_get(Temps, i-1)+StepSize);
	}
	gsl_vector_set(Temps, nChains-1, COLDTEMP);

	//for convenience, we want a copy of the Temps vector that doesn't
	//get swapped around
	gsl_vector * TempsCopy = gsl_vector_calloc(nChains);
	gsl_vector_memcpy(TempsCopy, Temps);
	
	//RGF all of the chains to start with
	for(i=0; i<nChains; i++){
		RGF(N, Chains[i], RGFswap);
	}
	
	int chInd1, chInd2;
	double dtmp;
	int itmp;
	int swapFlag;

	for(i=0; i<Steps; i++){
		//print the best likelihood we've found so far every so often
		if(i % 1000 == 0){
			fprintf(stderr, "Step %d Best solution %1.4f\n", i, BestMarginal);
		}

		//if enough steps have passed, swap temperatures
		if(i % SWAPSTEPS == 0){
			//try to swap using "bucket brigade"
			for(j=0; j<(nChains-1); j++){
				//find which chains have adjacent temperatures
				chInd1 = -1;
				chInd2 = -1;
				for(k=0; k<nChains; k++){
					if(gsl_vector_get(TempsCopy, j) == gsl_vector_get(Temps, k)){
						chInd1 = k;
					}
					if(gsl_vector_get(TempsCopy, j+1) == gsl_vector_get(Temps, k)){
						chInd2 = k;
					}
					if(chInd1 >= 0 && chInd2 >=0){
						break;
					}
				}
				//try to swap them
				swapFlag = TrySwap(N, Adj,
								   Chains[chInd1], Chains[chInd2],
								   ChainCopy, RGFswap,
								   gsl_vector_get(Temps, chInd1),
								   gsl_vector_get(Temps, chInd2),
								   r, lgammaLookup, logLookup);
				if(swapFlag == 1){
					dtmp = gsl_vector_get(Temps, chInd1);
					gsl_vector_set(Temps, chInd1, gsl_vector_get(Temps, chInd2));
					gsl_vector_set(Temps, chInd2, dtmp);
				}
			}
		}
		
		//take a step
		for(j=0; j<nChains; j++){
			dtmp = Gibbs(N, Chains[j], ChainCopy,
						 ChainCopy2, Adj,
						 gsl_vector_get(Temps, j), RGFswap,
						 r, lgammaLookup, logLookup);
			gsl_vector_set(Marginals, j, dtmp);
		}
		
		//update the best solution, if appropriate
		if(gsl_vector_max(Marginals) > BestMarginal){
			itmp = gsl_vector_max_index(Marginals);
			BestMarginal = gsl_vector_get(Marginals, itmp);
			gsl_vector_short_memcpy(BestSolution, Chains[itmp]);
			fprintf(stderr, "Steps %d Best solution %.4f\n", i, BestMarginal);
		}
	}

	//free memory
	gsl_vector_short_free(RGFswap);
	gsl_vector_free(Temps);
	gsl_vector_free(TempsCopy);
	gsl_vector_free(Marginals);
	gsl_vector_short_free(ChainCopy);
	gsl_vector_short_free(ChainCopy2);
	for(i=0; i<nChains; i++){
		gsl_vector_short_free(Chains[i]);
	}
	
	return BestMarginal;		
}
コード例 #3
0
ファイル: rvine.c プロジェクト: yasserglez/dml
static void
fit_rvine_trees(igraph_t **trees,
                const gsl_matrix *data,
                const dml_vine_weight_t weight,
                const dml_vine_trunc_t trunc,
                const dml_copula_indeptest_t indeptest,
                const double indeptest_level,
                const dml_copula_type_t *types,
                const size_t types_size,
                const dml_copula_select_t select,
                const gsl_rng *rng)
{
    size_t m, n;
    igraph_t *graph;
    igraph_vector_t *graph_weight;
    dml_copula_t *copula;
    gsl_vector *x;
    igraph_integer_t e; // Edge id.
    igraph_integer_t a, aa, ab, b, ba, bb; // Vertex id.
    gsl_vector *u = NULL, *v = NULL;
    igraph_integer_t Cea, Ceb;
    gsl_vector_short *Ue, *Ua, *Ub;
    size_t k;
    dml_measure_t *measure;
    double tree_aic, copula_aic;
    gsl_permutation *perm, *rank, *u_rank = NULL, *v_rank = NULL;

    igraph_i_set_attribute_table(&igraph_cattribute_table);

    m = data->size1;
    n = data->size2;
    graph = g_malloc(sizeof(igraph_t));
    graph_weight = g_malloc(sizeof(igraph_vector_t));
    perm = gsl_permutation_alloc(m);

    for (k = 0; k < n - 1; k++) { // Tree index.
        if (k == 0) {
            igraph_full(graph, n, IGRAPH_UNDIRECTED, IGRAPH_NO_LOOPS);

            // Assign the observations to the nodes.
            for (size_t i = 0; i < n; i++) { // Variable and node index.
                x = gsl_vector_alloc(m);
                gsl_matrix_get_col(x, data, i);

                // Results of the h-function of the copula assigned to the
                // edge that corresponds to this vertex in the previous tree.
                // h for the h-function with its arguments in order and
                // hrev for the h-function with its arguments reversed. In the
                // first tree both are equal to the observations of the
                // corresponding variable, in the rest of the trees they differ.
                SETVAP(graph, "h", i, x);
                SETVAP(graph, "hrev", i, x);
                gsl_sort_vector_index(perm, x);
                rank = gsl_permutation_alloc(m);
                gsl_permutation_inverse(rank, perm);
                // Ranks of the h and hrev vectors.
                SETVAP(graph, "hrank", i, rank);
                SETVAP(graph, "hrevrank", i, rank);
            }

            for (e = 0; e < igraph_ecount(graph); e++) {
                igraph_edge(graph, e, &a, &b);

                // Variables "connected" by this edge.
                Ue = gsl_vector_short_calloc(n);
                gsl_vector_short_set(Ue, a, 1);
                gsl_vector_short_set(Ue, b, 1);
                SETEAP(graph, "Ue", e, Ue);

                // Conditioned set.
                SETEAN(graph, "Cea", e, a + 1);
                SETEAN(graph, "Ceb", e, b + 1);
                Cea = EAN(graph, "Cea", e);
                Ceb = EAN(graph, "Ceb", e);

                // Calculate the weight of the edge.
                u = VAP(graph, "h", a);
                v = VAP(graph, "h", b);
                u_rank = VAP(graph, "hrank", a);
                v_rank = VAP(graph, "hrank", b);
                // The conditioned set is ordered to make the order of the
                // arguments in the bivariate copulas unique as suggested in
                // Czado, C. (2010) Pair-Copula Constructions of Multivariate
                // Copulas. In Jaworski, P. and Durante, F. and Hardle, W. K.
                // and Rychlik, T. (eds.) Copula Theory and Its Applications,
                // Springer-Verlag, 93-109.
                if (Cea < Ceb) {
                    rvine_set_weight(graph, weight, e, u, v, u_rank, v_rank);
                } else {
                    rvine_set_weight(graph, weight, e, v, u, v_rank, u_rank);
                }
            }
        } else {
            igraph_empty(graph, n - k, IGRAPH_UNDIRECTED);

            // Adding all "possible" edges.
            for (a = 0; a < igraph_vcount(graph) - 1; a++) {
                for (b = a + 1; b < igraph_vcount(graph); b++) {
                    igraph_edge(trees[k - 1], a, &aa, &ab);
                    igraph_edge(trees[k - 1], b, &ba, &bb);

                    // Checking the proximity condition.
                    if (aa == ba || aa == bb || ab == ba || ab == bb) {
                        igraph_add_edge(graph, a, b);
                        igraph_get_eid(graph, &e, a, b, IGRAPH_UNDIRECTED, 1);

                        // Variables "connected" by this edge and conditioned set.
                        Ua = EAP(trees[k - 1], "Ue", a);
                        Ub = EAP(trees[k - 1], "Ue", b);
                        Ue = gsl_vector_short_calloc(n);
                        for (size_t i = 0; i < n; i++) {
                            gsl_vector_short_set(Ue, i,
                                    gsl_vector_short_get(Ua, i)
                                            | gsl_vector_short_get(Ub, i));
                            if (gsl_vector_short_get(Ua, i)
                                    && !gsl_vector_short_get(Ub, i)) {
                                SETEAN(graph, "Cea", e, i + 1);
                            }
                            if (gsl_vector_short_get(Ub, i)
                                    && !gsl_vector_short_get(Ua, i)) {
                                SETEAN(graph, "Ceb", e, i + 1);
                            }
                        }
                        SETEAP(graph, "Ue", e, Ue);
                    }
                }
            }

            // Compute pseudo-observations and edge weights.
            for (a = 0; a < igraph_vcount(graph); a++) {
                // See the comment in the code for the first tree.
                SETVAP(graph, "h", a, NULL);
                SETVAP(graph, "hrev", a, NULL);
                SETVAP(graph, "hrank", a, NULL);
                SETVAP(graph, "hrevrank", a, NULL);
            }
            for (e = 0; e < igraph_ecount(graph); e++) {
                igraph_edge(graph, e, &a, &b);
                Cea = EAN(graph, "Cea", e);
                Ceb = EAN(graph, "Ceb", e);

                // Assign u and u_rank.
                if ((Cea == EAN(trees[k - 1], "Cea", a)
                        && (EAN(trees[k - 1], "Cea", a)
                                < EAN(trees[k - 1], "Ceb", a)))
                        || (Cea != EAN(trees[k - 1], "Cea", a)
                                && (EAN(trees[k - 1], "Cea", a)
                                        > EAN(trees[k - 1], "Ceb", a)))) {
                    u = VAP(graph, "h", a);
                    if (u == NULL) {
                        copula = EAP(trees[k - 1], "copula", a);
                        measure = EAP(trees[k - 1], "measure", a);
                        u = gsl_vector_alloc(m);
                        dml_copula_h(copula, measure->x, measure->y, u);
                        SETVAP(graph, "h", a, u);
                        gsl_sort_vector_index(perm, u);
                        rank = gsl_permutation_alloc(m);
                        gsl_permutation_inverse(rank, perm);
                        SETVAP(graph, "hrank", a, rank);
                    }
                    u_rank = VAP(graph, "hrank", a);
                }
                if ((Cea == EAN(trees[k - 1], "Cea", a)
                        && (EAN(trees[k - 1], "Cea", a)
                                > EAN(trees[k - 1], "Ceb", a)))
                        || (Cea != EAN(trees[k - 1], "Cea", a)
                                && (EAN(trees[k - 1], "Cea", a)
                                        < EAN(trees[k - 1], "Ceb", a)))) {
                    u = VAP(graph, "hrev", a);
                    if (u == NULL) {
                        copula = EAP(trees[k - 1], "copula", a);
                        measure = EAP(trees[k - 1], "measure", a);
                        u = gsl_vector_alloc(m);
                        dml_copula_h(copula, measure->y, measure->x, u);
                        SETVAP(graph, "hrev", a, u);
                        gsl_sort_vector_index(perm, u);
                        rank = gsl_permutation_alloc(m);
                        gsl_permutation_inverse(rank, perm);
                        SETVAP(graph, "hrevrank", a, rank);
                    }
                    u_rank = VAP(graph, "hrevrank", a);
                }

                // Assign v and v_rank.
                if ((Ceb == EAN(trees[k - 1], "Cea", b)
                        && (EAN(trees[k - 1], "Cea", b)
                                < EAN(trees[k - 1], "Ceb", b)))
                        || (Ceb != EAN(trees[k - 1], "Cea", b)
                                && (EAN(trees[k - 1], "Cea", b)
                                        > EAN(trees[k - 1], "Ceb", b)))) {
                    v = VAP(graph, "h", b);
                    if (v == NULL) {
                        copula = EAP(trees[k - 1], "copula", b);
                        measure = EAP(trees[k - 1], "measure", b);
                        v = gsl_vector_alloc(m);
                        dml_copula_h(copula, measure->x, measure->y, v);
                        SETVAP(graph, "h", b, v);
                        gsl_sort_vector_index(perm, v);
                        rank = gsl_permutation_alloc(m);
                        gsl_permutation_inverse(rank, perm);
                        SETVAP(graph, "hrank", b, rank);
                    }
                    v_rank = VAP(graph, "hrank", b);

                }
                if ((Ceb == EAN(trees[k - 1], "Cea", b)
                        && (EAN(trees[k - 1], "Cea", b)
                                > EAN(trees[k - 1], "Ceb", b)))
                        || (Ceb != EAN(trees[k - 1], "Cea", b)
                                && (EAN(trees[k - 1], "Cea", b)
                                        < EAN(trees[k - 1], "Ceb", b)))) {
                    v = VAP(graph, "hrev", b);
                    if (v == NULL) {
                        copula = EAP(trees[k - 1], "copula", b);
                        measure = EAP(trees[k - 1], "measure", b);
                        v = gsl_vector_alloc(m);
                        dml_copula_h(copula, measure->y, measure->x, v);
                        SETVAP(graph, "hrev", b, v);
                        gsl_sort_vector_index(perm, v);
                        rank = gsl_permutation_alloc(m);
                        gsl_permutation_inverse(rank, perm);
                        SETVAP(graph, "hrevrank", b, rank);
                    }
                    v_rank = VAP(graph, "hrevrank", b);
                }

                // Set the weight of the edge. The arguments are ordered here.
                // The order determines the x and y fields of measure.
                if (Cea < Ceb) {
                    rvine_set_weight(graph, weight, e, u, v, u_rank, v_rank);
                } else {
                    rvine_set_weight(graph, weight, e, v, u, v_rank, u_rank);
                }
            }
        }

        // Compute the minimum weight spanning tree.
        trees[k] = g_malloc(sizeof(igraph_t));
        igraph_vector_init(graph_weight, igraph_ecount(graph));
        EANV(graph, "weight", graph_weight);
        igraph_minimum_spanning_tree_prim(graph, trees[k], graph_weight);
        igraph_vector_destroy(graph_weight);

        tree_aic = 0;
        for (e = 0; e < igraph_ecount(trees[k]); e++) {
            igraph_edge(trees[k], e, &a, &b);
            Cea = EAN(trees[k], "Cea", e);
            Ceb = EAN(trees[k], "Ceb", e);
            measure = EAP(trees[k], "measure", e);

            // Assign a bivariate copula to the edge.
            if (Cea < Ceb) {
                copula = dml_copula_select(measure->x, measure->y, measure,
                                           indeptest, indeptest_level, types,
                                           types_size, select, rng);
                // Get information for the truncation of the vine.
                if (trunc == DML_VINE_TRUNC_AIC) {
                    dml_copula_aic(copula, measure->x, measure->y, &copula_aic);
                    tree_aic += copula_aic;
                }
            } else {
                copula = dml_copula_select(measure->y, measure->x, measure,
                                           indeptest, indeptest_level, types,
                                           types_size, select, rng);
                // Get information for the truncation of the vine.
                if (trunc == DML_VINE_TRUNC_AIC) {
                    dml_copula_aic(copula, measure->y, measure->x, &copula_aic);
                    tree_aic += copula_aic;
                }
            }
            SETEAP(trees[k], "copula", e, copula);
        }

        igraph_destroy(graph);

        // Check if the vine should be truncated.
        if (trunc == DML_VINE_TRUNC_AIC && tree_aic >= 0) {
            // Free the memory used for the last tree.
            rvine_tree_cleanup(trees[k]);
            for (e = 0; e < igraph_ecount(trees[k]); e++) {
                copula = EAP(trees[k], "copula", e);
                dml_copula_free(copula);
            }
            igraph_destroy(trees[k]);
            g_free(trees[k]);
            trees[k] = NULL;
            break;
        }

        if (k > 0) rvine_tree_cleanup(trees[k - 1]);
    }

    // Cleanup the last tree if the vine was completely estimated.
    // If the vine was truncated, the last tree will be freed in
    // the function vine_fit_rvine, because the rvine_trees_to_vine
    // function needs some attributes of its edges.
    if (k == n - 1) {
        rvine_tree_cleanup(trees[n - 2]);
    }

    g_free(graph_weight);
    g_free(graph);
    gsl_permutation_free(perm);
}