static void rvine_trees_to_vine(dml_vine_t *vine, igraph_t **trees) { size_t n = vine->dim; size_t *order_inv; gsl_vector_short *B; igraph_integer_t Cea, Ceb; size_t x = 0, x_hat = 0, x_hat_hat = 0; // Initialized to avoid GCC warnings. dml_copula_t *copula = NULL; // Initialized to avoid GCC warnings. igraph_integer_t e; // Edge id. igraph_integer_t a, b, aa, ab, ba, bb; // Vertex id. igraph_t **last_trees = NULL; igraph_t *graph = NULL; gsl_vector_short *Ue, *Ua, *Ub; // Set the number of trees of the vines. vine->trees = n - 1; while (trees[vine->trees - 1] == NULL) vine->trees--; // Nothing to do for vines without trees. if (vine->trees == 0) return; // Selecting a structure for the trees that were truncated. // Is this really necessary? Think a better solution. if (vine->trees != n - 1) { igraph_i_set_attribute_table(&igraph_cattribute_table); last_trees = g_malloc_n(n - 1 - vine->trees, sizeof(igraph_t *)); graph = g_malloc(sizeof(igraph_t)); for (size_t k = vine->trees; k < n - 1; k++) { // Tree index. igraph_empty(graph, n - k, IGRAPH_UNDIRECTED); // Adding all "possible" edges. for (a = 0; a < igraph_vcount(graph) - 1; a++) { for (b = a + 1; b < igraph_vcount(graph); b++) { // Checking the proximity condition. igraph_edge(k <= vine->trees ? trees[k - 1] : last_trees[k - 1 - vine->trees], a, &aa, &ab); igraph_edge(k <= vine->trees ? trees[k - 1] : last_trees[k - 1 - vine->trees], b, &ba, &bb); if (aa == ba || aa == bb || ab == ba || ab == bb) { igraph_add_edge(graph, a, b); igraph_get_eid(graph, &e, a, b, IGRAPH_UNDIRECTED, 1); // Variables "connected" by this edge and conditioned set. Ua = EAP(k <= vine->trees ? trees[k - 1] : last_trees[k - 1 - vine->trees], "Ue", a); Ub = EAP(k <= vine->trees ? trees[k - 1] : last_trees[k - 1 - vine->trees], "Ue", b); Ue = gsl_vector_short_calloc(n); for (size_t i = 0; i < n; i++) { gsl_vector_short_set(Ue, i, gsl_vector_short_get(Ua, i) | gsl_vector_short_get(Ub, i)); if (gsl_vector_short_get(Ua, i) && !gsl_vector_short_get(Ub, i)) { SETEAN(graph, "Cea", e, i + 1); } if (gsl_vector_short_get(Ub, i) && !gsl_vector_short_get(Ua, i)) { SETEAN(graph, "Ceb", e, i + 1); } } SETEAP(graph, "Ue", e, Ue); } } } // Compute the minimum weight spanning tree. last_trees[k - vine->trees] = g_malloc(sizeof(igraph_t)); igraph_minimum_spanning_tree_unweighted(graph, last_trees[k - vine->trees]); igraph_destroy(graph); } } order_inv = g_malloc0_n(n, sizeof(size_t)); B = gsl_vector_short_calloc(n); // for loop in line 2. for (size_t i = 0; i < n - 1; i++) { if (trees[n - i - 2] == NULL) { for (e = 0; e < igraph_ecount(last_trees[n - i - 2 - vine->trees]); e++) { x = EAN(last_trees[n - i - 2 - vine->trees], "Cea", e); x_hat = EAN(last_trees[n - i - 2 - vine->trees], "Ceb", e); if (!gsl_vector_short_get(B, x - 1) && !gsl_vector_short_get(B, x_hat - 1)) { x_hat = 0; copula = NULL; break; } } } else { for (e = 0; e < igraph_ecount(trees[n - i - 2]); e++) { x = EAN(trees[n - i - 2], "Cea", e); x_hat = EAN(trees[n - i - 2], "Ceb", e); if (!gsl_vector_short_get(B, x - 1) && !gsl_vector_short_get(B, x_hat - 1)) { copula = EAP(trees[n - i - 2], "copula", e); break; } } } // Line 4. gsl_vector_short_set(B, x - 1, 1); vine->order[n - i - 1] = x - 1; order_inv[x - 1] = n - i; vine->matrix[i][i] = x; vine->matrix[i + 1][i] = x_hat; vine->copulas[i + 1][i] = copula; // for loop in line 5. for (size_t k = i + 2; k < n; k++) { if (trees[n - k - 1] != NULL) { for (e = 0; e < igraph_ecount(trees[n - k - 1]); e++) { Cea = EAN(trees[n - k - 1], "Cea", e); Ceb = EAN(trees[n - k - 1], "Ceb", e); if (x == Cea) { x_hat_hat = Ceb; if (!gsl_vector_short_get(B, x_hat_hat - 1)) { // The pseudocode of the algorithm does not included // this check. Invalid matrices were generated when // x_hat_hat is set to an index already assigned // to a diagonal entry. copula = EAP(trees[n - k - 1], "copula", e); break; } } else if (x == Ceb) { x_hat_hat = Cea; if (!gsl_vector_short_get(B, x_hat_hat - 1)) { // Ibdem to the previous comment. copula = EAP(trees[n - k - 1], "copula", e); break; } } } vine->matrix[k][i] = x_hat_hat; vine->copulas[k][i] = copula; } } } for (size_t i = 0; i < n; i++) { if (!gsl_vector_short_get(B, i)) { vine->matrix[n - 1][n - 1] = i + 1; vine->order[0] = i; order_inv[i] = 1; break; } } // Reorder the variables. The simulation algorithm assumes that the // diagonal entries of the R-vine matrix are ordered from n to 1. for (size_t i = 0; i < n; i++) { for (size_t j = 0; j <= i; j++) { if (vine->matrix[i][j] > 0) { vine->matrix[i][j] = order_inv[vine->matrix[i][j] - 1]; } } } if (vine->trees != n - 1) { for (size_t i = 0; i < n - 1 - vine->trees; i++) { for (e = 0; e < igraph_ecount(last_trees[i]); e++) { Ue = EAP(last_trees[i], "Ue", e); gsl_vector_short_free(Ue); } DELEA(last_trees[i], "Ue"); igraph_destroy(last_trees[i]); g_free(last_trees[i]); } g_free(last_trees); g_free(graph); } g_free(order_inv); gsl_vector_short_free(B); }
//MCMCMC algorithm double MC3 (int N, gsl_matrix_short * Adj, int Steps, int nChains, gsl_vector_short * BestSolution, gsl_rng * r, gsl_vector * lgammaLookup, gsl_vector * logLookup){ // create the chains gsl_vector_short * Chains[nChains]; //create copies for use by Gibbs and marginal functions gsl_vector_short * ChainCopy = gsl_vector_short_calloc(N); gsl_vector_short * ChainCopy2 = gsl_vector_short_calloc(N); // create the fitness vector gsl_vector * Marginals = gsl_vector_calloc(nChains); //initialize swapping vector for RGF gsl_vector_short * RGFswap = gsl_vector_short_calloc(N+1); int i,j,k; double BestMarginal; BestMarginal = -1000000000.0; //initialize chains for(i=0; i<nChains; i++){ // allocate memory Chains[i] = gsl_vector_short_calloc(N); // initialize the population Partition_Initialize(Chains[i], N, r); } //generate temperatures assuming uniform spacing gsl_vector * Temps = gsl_vector_calloc(nChains); //step size for incrementing temperatures double StepSize; StepSize = (COLDTEMP - HOTTEMP)/((double)nChains - 1); gsl_vector_set(Temps, 0, HOTTEMP); for(i=1; i<(nChains-1); i++){ gsl_vector_set(Temps, i, gsl_vector_get(Temps, i-1)+StepSize); } gsl_vector_set(Temps, nChains-1, COLDTEMP); //for convenience, we want a copy of the Temps vector that doesn't //get swapped around gsl_vector * TempsCopy = gsl_vector_calloc(nChains); gsl_vector_memcpy(TempsCopy, Temps); //RGF all of the chains to start with for(i=0; i<nChains; i++){ RGF(N, Chains[i], RGFswap); } int chInd1, chInd2; double dtmp; int itmp; int swapFlag; for(i=0; i<Steps; i++){ //print the best likelihood we've found so far every so often if(i % 1000 == 0){ fprintf(stderr, "Step %d Best solution %1.4f\n", i, BestMarginal); } //if enough steps have passed, swap temperatures if(i % SWAPSTEPS == 0){ //try to swap using "bucket brigade" for(j=0; j<(nChains-1); j++){ //find which chains have adjacent temperatures chInd1 = -1; chInd2 = -1; for(k=0; k<nChains; k++){ if(gsl_vector_get(TempsCopy, j) == gsl_vector_get(Temps, k)){ chInd1 = k; } if(gsl_vector_get(TempsCopy, j+1) == gsl_vector_get(Temps, k)){ chInd2 = k; } if(chInd1 >= 0 && chInd2 >=0){ break; } } //try to swap them swapFlag = TrySwap(N, Adj, Chains[chInd1], Chains[chInd2], ChainCopy, RGFswap, gsl_vector_get(Temps, chInd1), gsl_vector_get(Temps, chInd2), r, lgammaLookup, logLookup); if(swapFlag == 1){ dtmp = gsl_vector_get(Temps, chInd1); gsl_vector_set(Temps, chInd1, gsl_vector_get(Temps, chInd2)); gsl_vector_set(Temps, chInd2, dtmp); } } } //take a step for(j=0; j<nChains; j++){ dtmp = Gibbs(N, Chains[j], ChainCopy, ChainCopy2, Adj, gsl_vector_get(Temps, j), RGFswap, r, lgammaLookup, logLookup); gsl_vector_set(Marginals, j, dtmp); } //update the best solution, if appropriate if(gsl_vector_max(Marginals) > BestMarginal){ itmp = gsl_vector_max_index(Marginals); BestMarginal = gsl_vector_get(Marginals, itmp); gsl_vector_short_memcpy(BestSolution, Chains[itmp]); fprintf(stderr, "Steps %d Best solution %.4f\n", i, BestMarginal); } } //free memory gsl_vector_short_free(RGFswap); gsl_vector_free(Temps); gsl_vector_free(TempsCopy); gsl_vector_free(Marginals); gsl_vector_short_free(ChainCopy); gsl_vector_short_free(ChainCopy2); for(i=0; i<nChains; i++){ gsl_vector_short_free(Chains[i]); } return BestMarginal; }
static void fit_rvine_trees(igraph_t **trees, const gsl_matrix *data, const dml_vine_weight_t weight, const dml_vine_trunc_t trunc, const dml_copula_indeptest_t indeptest, const double indeptest_level, const dml_copula_type_t *types, const size_t types_size, const dml_copula_select_t select, const gsl_rng *rng) { size_t m, n; igraph_t *graph; igraph_vector_t *graph_weight; dml_copula_t *copula; gsl_vector *x; igraph_integer_t e; // Edge id. igraph_integer_t a, aa, ab, b, ba, bb; // Vertex id. gsl_vector *u = NULL, *v = NULL; igraph_integer_t Cea, Ceb; gsl_vector_short *Ue, *Ua, *Ub; size_t k; dml_measure_t *measure; double tree_aic, copula_aic; gsl_permutation *perm, *rank, *u_rank = NULL, *v_rank = NULL; igraph_i_set_attribute_table(&igraph_cattribute_table); m = data->size1; n = data->size2; graph = g_malloc(sizeof(igraph_t)); graph_weight = g_malloc(sizeof(igraph_vector_t)); perm = gsl_permutation_alloc(m); for (k = 0; k < n - 1; k++) { // Tree index. if (k == 0) { igraph_full(graph, n, IGRAPH_UNDIRECTED, IGRAPH_NO_LOOPS); // Assign the observations to the nodes. for (size_t i = 0; i < n; i++) { // Variable and node index. x = gsl_vector_alloc(m); gsl_matrix_get_col(x, data, i); // Results of the h-function of the copula assigned to the // edge that corresponds to this vertex in the previous tree. // h for the h-function with its arguments in order and // hrev for the h-function with its arguments reversed. In the // first tree both are equal to the observations of the // corresponding variable, in the rest of the trees they differ. SETVAP(graph, "h", i, x); SETVAP(graph, "hrev", i, x); gsl_sort_vector_index(perm, x); rank = gsl_permutation_alloc(m); gsl_permutation_inverse(rank, perm); // Ranks of the h and hrev vectors. SETVAP(graph, "hrank", i, rank); SETVAP(graph, "hrevrank", i, rank); } for (e = 0; e < igraph_ecount(graph); e++) { igraph_edge(graph, e, &a, &b); // Variables "connected" by this edge. Ue = gsl_vector_short_calloc(n); gsl_vector_short_set(Ue, a, 1); gsl_vector_short_set(Ue, b, 1); SETEAP(graph, "Ue", e, Ue); // Conditioned set. SETEAN(graph, "Cea", e, a + 1); SETEAN(graph, "Ceb", e, b + 1); Cea = EAN(graph, "Cea", e); Ceb = EAN(graph, "Ceb", e); // Calculate the weight of the edge. u = VAP(graph, "h", a); v = VAP(graph, "h", b); u_rank = VAP(graph, "hrank", a); v_rank = VAP(graph, "hrank", b); // The conditioned set is ordered to make the order of the // arguments in the bivariate copulas unique as suggested in // Czado, C. (2010) Pair-Copula Constructions of Multivariate // Copulas. In Jaworski, P. and Durante, F. and Hardle, W. K. // and Rychlik, T. (eds.) Copula Theory and Its Applications, // Springer-Verlag, 93-109. if (Cea < Ceb) { rvine_set_weight(graph, weight, e, u, v, u_rank, v_rank); } else { rvine_set_weight(graph, weight, e, v, u, v_rank, u_rank); } } } else { igraph_empty(graph, n - k, IGRAPH_UNDIRECTED); // Adding all "possible" edges. for (a = 0; a < igraph_vcount(graph) - 1; a++) { for (b = a + 1; b < igraph_vcount(graph); b++) { igraph_edge(trees[k - 1], a, &aa, &ab); igraph_edge(trees[k - 1], b, &ba, &bb); // Checking the proximity condition. if (aa == ba || aa == bb || ab == ba || ab == bb) { igraph_add_edge(graph, a, b); igraph_get_eid(graph, &e, a, b, IGRAPH_UNDIRECTED, 1); // Variables "connected" by this edge and conditioned set. Ua = EAP(trees[k - 1], "Ue", a); Ub = EAP(trees[k - 1], "Ue", b); Ue = gsl_vector_short_calloc(n); for (size_t i = 0; i < n; i++) { gsl_vector_short_set(Ue, i, gsl_vector_short_get(Ua, i) | gsl_vector_short_get(Ub, i)); if (gsl_vector_short_get(Ua, i) && !gsl_vector_short_get(Ub, i)) { SETEAN(graph, "Cea", e, i + 1); } if (gsl_vector_short_get(Ub, i) && !gsl_vector_short_get(Ua, i)) { SETEAN(graph, "Ceb", e, i + 1); } } SETEAP(graph, "Ue", e, Ue); } } } // Compute pseudo-observations and edge weights. for (a = 0; a < igraph_vcount(graph); a++) { // See the comment in the code for the first tree. SETVAP(graph, "h", a, NULL); SETVAP(graph, "hrev", a, NULL); SETVAP(graph, "hrank", a, NULL); SETVAP(graph, "hrevrank", a, NULL); } for (e = 0; e < igraph_ecount(graph); e++) { igraph_edge(graph, e, &a, &b); Cea = EAN(graph, "Cea", e); Ceb = EAN(graph, "Ceb", e); // Assign u and u_rank. if ((Cea == EAN(trees[k - 1], "Cea", a) && (EAN(trees[k - 1], "Cea", a) < EAN(trees[k - 1], "Ceb", a))) || (Cea != EAN(trees[k - 1], "Cea", a) && (EAN(trees[k - 1], "Cea", a) > EAN(trees[k - 1], "Ceb", a)))) { u = VAP(graph, "h", a); if (u == NULL) { copula = EAP(trees[k - 1], "copula", a); measure = EAP(trees[k - 1], "measure", a); u = gsl_vector_alloc(m); dml_copula_h(copula, measure->x, measure->y, u); SETVAP(graph, "h", a, u); gsl_sort_vector_index(perm, u); rank = gsl_permutation_alloc(m); gsl_permutation_inverse(rank, perm); SETVAP(graph, "hrank", a, rank); } u_rank = VAP(graph, "hrank", a); } if ((Cea == EAN(trees[k - 1], "Cea", a) && (EAN(trees[k - 1], "Cea", a) > EAN(trees[k - 1], "Ceb", a))) || (Cea != EAN(trees[k - 1], "Cea", a) && (EAN(trees[k - 1], "Cea", a) < EAN(trees[k - 1], "Ceb", a)))) { u = VAP(graph, "hrev", a); if (u == NULL) { copula = EAP(trees[k - 1], "copula", a); measure = EAP(trees[k - 1], "measure", a); u = gsl_vector_alloc(m); dml_copula_h(copula, measure->y, measure->x, u); SETVAP(graph, "hrev", a, u); gsl_sort_vector_index(perm, u); rank = gsl_permutation_alloc(m); gsl_permutation_inverse(rank, perm); SETVAP(graph, "hrevrank", a, rank); } u_rank = VAP(graph, "hrevrank", a); } // Assign v and v_rank. if ((Ceb == EAN(trees[k - 1], "Cea", b) && (EAN(trees[k - 1], "Cea", b) < EAN(trees[k - 1], "Ceb", b))) || (Ceb != EAN(trees[k - 1], "Cea", b) && (EAN(trees[k - 1], "Cea", b) > EAN(trees[k - 1], "Ceb", b)))) { v = VAP(graph, "h", b); if (v == NULL) { copula = EAP(trees[k - 1], "copula", b); measure = EAP(trees[k - 1], "measure", b); v = gsl_vector_alloc(m); dml_copula_h(copula, measure->x, measure->y, v); SETVAP(graph, "h", b, v); gsl_sort_vector_index(perm, v); rank = gsl_permutation_alloc(m); gsl_permutation_inverse(rank, perm); SETVAP(graph, "hrank", b, rank); } v_rank = VAP(graph, "hrank", b); } if ((Ceb == EAN(trees[k - 1], "Cea", b) && (EAN(trees[k - 1], "Cea", b) > EAN(trees[k - 1], "Ceb", b))) || (Ceb != EAN(trees[k - 1], "Cea", b) && (EAN(trees[k - 1], "Cea", b) < EAN(trees[k - 1], "Ceb", b)))) { v = VAP(graph, "hrev", b); if (v == NULL) { copula = EAP(trees[k - 1], "copula", b); measure = EAP(trees[k - 1], "measure", b); v = gsl_vector_alloc(m); dml_copula_h(copula, measure->y, measure->x, v); SETVAP(graph, "hrev", b, v); gsl_sort_vector_index(perm, v); rank = gsl_permutation_alloc(m); gsl_permutation_inverse(rank, perm); SETVAP(graph, "hrevrank", b, rank); } v_rank = VAP(graph, "hrevrank", b); } // Set the weight of the edge. The arguments are ordered here. // The order determines the x and y fields of measure. if (Cea < Ceb) { rvine_set_weight(graph, weight, e, u, v, u_rank, v_rank); } else { rvine_set_weight(graph, weight, e, v, u, v_rank, u_rank); } } } // Compute the minimum weight spanning tree. trees[k] = g_malloc(sizeof(igraph_t)); igraph_vector_init(graph_weight, igraph_ecount(graph)); EANV(graph, "weight", graph_weight); igraph_minimum_spanning_tree_prim(graph, trees[k], graph_weight); igraph_vector_destroy(graph_weight); tree_aic = 0; for (e = 0; e < igraph_ecount(trees[k]); e++) { igraph_edge(trees[k], e, &a, &b); Cea = EAN(trees[k], "Cea", e); Ceb = EAN(trees[k], "Ceb", e); measure = EAP(trees[k], "measure", e); // Assign a bivariate copula to the edge. if (Cea < Ceb) { copula = dml_copula_select(measure->x, measure->y, measure, indeptest, indeptest_level, types, types_size, select, rng); // Get information for the truncation of the vine. if (trunc == DML_VINE_TRUNC_AIC) { dml_copula_aic(copula, measure->x, measure->y, &copula_aic); tree_aic += copula_aic; } } else { copula = dml_copula_select(measure->y, measure->x, measure, indeptest, indeptest_level, types, types_size, select, rng); // Get information for the truncation of the vine. if (trunc == DML_VINE_TRUNC_AIC) { dml_copula_aic(copula, measure->y, measure->x, &copula_aic); tree_aic += copula_aic; } } SETEAP(trees[k], "copula", e, copula); } igraph_destroy(graph); // Check if the vine should be truncated. if (trunc == DML_VINE_TRUNC_AIC && tree_aic >= 0) { // Free the memory used for the last tree. rvine_tree_cleanup(trees[k]); for (e = 0; e < igraph_ecount(trees[k]); e++) { copula = EAP(trees[k], "copula", e); dml_copula_free(copula); } igraph_destroy(trees[k]); g_free(trees[k]); trees[k] = NULL; break; } if (k > 0) rvine_tree_cleanup(trees[k - 1]); } // Cleanup the last tree if the vine was completely estimated. // If the vine was truncated, the last tree will be freed in // the function vine_fit_rvine, because the rvine_trees_to_vine // function needs some attributes of its edges. if (k == n - 1) { rvine_tree_cleanup(trees[n - 2]); } g_free(graph_weight); g_free(graph); gsl_permutation_free(perm); }