//Gibbs sampler step double Gibbs (int N, gsl_vector_short * Chain, gsl_vector_short * ChainCopy, gsl_vector_short * ChainCopy2, gsl_matrix_short * Adj, double Temp, gsl_vector_short * RGFswap, gsl_rng * r, gsl_vector * lgammaLookup, gsl_vector * logLookup){ //add one to nGroups to allow for a new group to be formed int nGroups = gsl_vector_short_max(Chain); //add one unless it already has all possible groups //max group number is N-1 if(nGroups < (N-1)){ nGroups++; } //initialize vector of swap likelihoods //it will need to be length N at most double swapLikelihoods[N]; //choose a random vector element to mutate int mutInd = gsl_rng_uniform_int(r, N); int i; //get likelihoods for all possible swaps for that element //this includes no change for(i=0; i<=nGroups; i++){ gsl_vector_short_memcpy(ChainCopy, Chain); gsl_vector_short_set(ChainCopy, mutInd, i); RGF(N, ChainCopy, RGFswap); swapLikelihoods[i] = Temp * Partition_Marginal(ChainCopy, ChainCopy2, RGFswap, Adj, N, lgammaLookup, logLookup); } //find the minimum in the array double minLik = 1000000; for(i=0; i<nGroups; i++){ if(swapLikelihoods[i] < minLik){ minLik = swapLikelihoods[i]; } } double CutOff = .0000000000000001; //precision cutoff = 10e-16 CutOff = log(CutOff) - log(nGroups); //subtract largest value from all, to avoid precision issues for(i=0; i<nGroups; i++){ swapLikelihoods[i] = swapLikelihoods[i] - minLik; if(swapLikelihoods[i] < CutOff){ swapLikelihoods[i] = sqrt(-1); //should result in NaN } swapLikelihoods[i] = exp(swapLikelihoods[i]); } //turn these likelihoods into cumulative probabilities for //sampling double runningTotal = 0.0; for(i=0; i<nGroups; i++){ if(!isnan(swapLikelihoods[i])){ runningTotal += swapLikelihoods[i]; swapLikelihoods[i] = runningTotal; } } //choose a mutation based on these weighted probabilities double mutVal = gsl_ran_flat(r, 0.0, runningTotal); //figure out which mutation this corresponds to int chosenIndex = 0; for(i=0; i<nGroups; i++){ if(mutVal <= swapLikelihoods[i]){ chosenIndex = i; break; } } //now update the Chain gsl_vector_short_set(Chain, mutInd, chosenIndex); RGF(N, Chain, RGFswap); return Partition_Marginal(Chain, ChainCopy, RGFswap, Adj, N, lgammaLookup, logLookup); }
static void fit_rvine_trees(igraph_t **trees, const gsl_matrix *data, const dml_vine_weight_t weight, const dml_vine_trunc_t trunc, const dml_copula_indeptest_t indeptest, const double indeptest_level, const dml_copula_type_t *types, const size_t types_size, const dml_copula_select_t select, const gsl_rng *rng) { size_t m, n; igraph_t *graph; igraph_vector_t *graph_weight; dml_copula_t *copula; gsl_vector *x; igraph_integer_t e; // Edge id. igraph_integer_t a, aa, ab, b, ba, bb; // Vertex id. gsl_vector *u = NULL, *v = NULL; igraph_integer_t Cea, Ceb; gsl_vector_short *Ue, *Ua, *Ub; size_t k; dml_measure_t *measure; double tree_aic, copula_aic; gsl_permutation *perm, *rank, *u_rank = NULL, *v_rank = NULL; igraph_i_set_attribute_table(&igraph_cattribute_table); m = data->size1; n = data->size2; graph = g_malloc(sizeof(igraph_t)); graph_weight = g_malloc(sizeof(igraph_vector_t)); perm = gsl_permutation_alloc(m); for (k = 0; k < n - 1; k++) { // Tree index. if (k == 0) { igraph_full(graph, n, IGRAPH_UNDIRECTED, IGRAPH_NO_LOOPS); // Assign the observations to the nodes. for (size_t i = 0; i < n; i++) { // Variable and node index. x = gsl_vector_alloc(m); gsl_matrix_get_col(x, data, i); // Results of the h-function of the copula assigned to the // edge that corresponds to this vertex in the previous tree. // h for the h-function with its arguments in order and // hrev for the h-function with its arguments reversed. In the // first tree both are equal to the observations of the // corresponding variable, in the rest of the trees they differ. SETVAP(graph, "h", i, x); SETVAP(graph, "hrev", i, x); gsl_sort_vector_index(perm, x); rank = gsl_permutation_alloc(m); gsl_permutation_inverse(rank, perm); // Ranks of the h and hrev vectors. SETVAP(graph, "hrank", i, rank); SETVAP(graph, "hrevrank", i, rank); } for (e = 0; e < igraph_ecount(graph); e++) { igraph_edge(graph, e, &a, &b); // Variables "connected" by this edge. Ue = gsl_vector_short_calloc(n); gsl_vector_short_set(Ue, a, 1); gsl_vector_short_set(Ue, b, 1); SETEAP(graph, "Ue", e, Ue); // Conditioned set. SETEAN(graph, "Cea", e, a + 1); SETEAN(graph, "Ceb", e, b + 1); Cea = EAN(graph, "Cea", e); Ceb = EAN(graph, "Ceb", e); // Calculate the weight of the edge. u = VAP(graph, "h", a); v = VAP(graph, "h", b); u_rank = VAP(graph, "hrank", a); v_rank = VAP(graph, "hrank", b); // The conditioned set is ordered to make the order of the // arguments in the bivariate copulas unique as suggested in // Czado, C. (2010) Pair-Copula Constructions of Multivariate // Copulas. In Jaworski, P. and Durante, F. and Hardle, W. K. // and Rychlik, T. (eds.) Copula Theory and Its Applications, // Springer-Verlag, 93-109. if (Cea < Ceb) { rvine_set_weight(graph, weight, e, u, v, u_rank, v_rank); } else { rvine_set_weight(graph, weight, e, v, u, v_rank, u_rank); } } } else { igraph_empty(graph, n - k, IGRAPH_UNDIRECTED); // Adding all "possible" edges. for (a = 0; a < igraph_vcount(graph) - 1; a++) { for (b = a + 1; b < igraph_vcount(graph); b++) { igraph_edge(trees[k - 1], a, &aa, &ab); igraph_edge(trees[k - 1], b, &ba, &bb); // Checking the proximity condition. if (aa == ba || aa == bb || ab == ba || ab == bb) { igraph_add_edge(graph, a, b); igraph_get_eid(graph, &e, a, b, IGRAPH_UNDIRECTED, 1); // Variables "connected" by this edge and conditioned set. Ua = EAP(trees[k - 1], "Ue", a); Ub = EAP(trees[k - 1], "Ue", b); Ue = gsl_vector_short_calloc(n); for (size_t i = 0; i < n; i++) { gsl_vector_short_set(Ue, i, gsl_vector_short_get(Ua, i) | gsl_vector_short_get(Ub, i)); if (gsl_vector_short_get(Ua, i) && !gsl_vector_short_get(Ub, i)) { SETEAN(graph, "Cea", e, i + 1); } if (gsl_vector_short_get(Ub, i) && !gsl_vector_short_get(Ua, i)) { SETEAN(graph, "Ceb", e, i + 1); } } SETEAP(graph, "Ue", e, Ue); } } } // Compute pseudo-observations and edge weights. for (a = 0; a < igraph_vcount(graph); a++) { // See the comment in the code for the first tree. SETVAP(graph, "h", a, NULL); SETVAP(graph, "hrev", a, NULL); SETVAP(graph, "hrank", a, NULL); SETVAP(graph, "hrevrank", a, NULL); } for (e = 0; e < igraph_ecount(graph); e++) { igraph_edge(graph, e, &a, &b); Cea = EAN(graph, "Cea", e); Ceb = EAN(graph, "Ceb", e); // Assign u and u_rank. if ((Cea == EAN(trees[k - 1], "Cea", a) && (EAN(trees[k - 1], "Cea", a) < EAN(trees[k - 1], "Ceb", a))) || (Cea != EAN(trees[k - 1], "Cea", a) && (EAN(trees[k - 1], "Cea", a) > EAN(trees[k - 1], "Ceb", a)))) { u = VAP(graph, "h", a); if (u == NULL) { copula = EAP(trees[k - 1], "copula", a); measure = EAP(trees[k - 1], "measure", a); u = gsl_vector_alloc(m); dml_copula_h(copula, measure->x, measure->y, u); SETVAP(graph, "h", a, u); gsl_sort_vector_index(perm, u); rank = gsl_permutation_alloc(m); gsl_permutation_inverse(rank, perm); SETVAP(graph, "hrank", a, rank); } u_rank = VAP(graph, "hrank", a); } if ((Cea == EAN(trees[k - 1], "Cea", a) && (EAN(trees[k - 1], "Cea", a) > EAN(trees[k - 1], "Ceb", a))) || (Cea != EAN(trees[k - 1], "Cea", a) && (EAN(trees[k - 1], "Cea", a) < EAN(trees[k - 1], "Ceb", a)))) { u = VAP(graph, "hrev", a); if (u == NULL) { copula = EAP(trees[k - 1], "copula", a); measure = EAP(trees[k - 1], "measure", a); u = gsl_vector_alloc(m); dml_copula_h(copula, measure->y, measure->x, u); SETVAP(graph, "hrev", a, u); gsl_sort_vector_index(perm, u); rank = gsl_permutation_alloc(m); gsl_permutation_inverse(rank, perm); SETVAP(graph, "hrevrank", a, rank); } u_rank = VAP(graph, "hrevrank", a); } // Assign v and v_rank. if ((Ceb == EAN(trees[k - 1], "Cea", b) && (EAN(trees[k - 1], "Cea", b) < EAN(trees[k - 1], "Ceb", b))) || (Ceb != EAN(trees[k - 1], "Cea", b) && (EAN(trees[k - 1], "Cea", b) > EAN(trees[k - 1], "Ceb", b)))) { v = VAP(graph, "h", b); if (v == NULL) { copula = EAP(trees[k - 1], "copula", b); measure = EAP(trees[k - 1], "measure", b); v = gsl_vector_alloc(m); dml_copula_h(copula, measure->x, measure->y, v); SETVAP(graph, "h", b, v); gsl_sort_vector_index(perm, v); rank = gsl_permutation_alloc(m); gsl_permutation_inverse(rank, perm); SETVAP(graph, "hrank", b, rank); } v_rank = VAP(graph, "hrank", b); } if ((Ceb == EAN(trees[k - 1], "Cea", b) && (EAN(trees[k - 1], "Cea", b) > EAN(trees[k - 1], "Ceb", b))) || (Ceb != EAN(trees[k - 1], "Cea", b) && (EAN(trees[k - 1], "Cea", b) < EAN(trees[k - 1], "Ceb", b)))) { v = VAP(graph, "hrev", b); if (v == NULL) { copula = EAP(trees[k - 1], "copula", b); measure = EAP(trees[k - 1], "measure", b); v = gsl_vector_alloc(m); dml_copula_h(copula, measure->y, measure->x, v); SETVAP(graph, "hrev", b, v); gsl_sort_vector_index(perm, v); rank = gsl_permutation_alloc(m); gsl_permutation_inverse(rank, perm); SETVAP(graph, "hrevrank", b, rank); } v_rank = VAP(graph, "hrevrank", b); } // Set the weight of the edge. The arguments are ordered here. // The order determines the x and y fields of measure. if (Cea < Ceb) { rvine_set_weight(graph, weight, e, u, v, u_rank, v_rank); } else { rvine_set_weight(graph, weight, e, v, u, v_rank, u_rank); } } } // Compute the minimum weight spanning tree. trees[k] = g_malloc(sizeof(igraph_t)); igraph_vector_init(graph_weight, igraph_ecount(graph)); EANV(graph, "weight", graph_weight); igraph_minimum_spanning_tree_prim(graph, trees[k], graph_weight); igraph_vector_destroy(graph_weight); tree_aic = 0; for (e = 0; e < igraph_ecount(trees[k]); e++) { igraph_edge(trees[k], e, &a, &b); Cea = EAN(trees[k], "Cea", e); Ceb = EAN(trees[k], "Ceb", e); measure = EAP(trees[k], "measure", e); // Assign a bivariate copula to the edge. if (Cea < Ceb) { copula = dml_copula_select(measure->x, measure->y, measure, indeptest, indeptest_level, types, types_size, select, rng); // Get information for the truncation of the vine. if (trunc == DML_VINE_TRUNC_AIC) { dml_copula_aic(copula, measure->x, measure->y, &copula_aic); tree_aic += copula_aic; } } else { copula = dml_copula_select(measure->y, measure->x, measure, indeptest, indeptest_level, types, types_size, select, rng); // Get information for the truncation of the vine. if (trunc == DML_VINE_TRUNC_AIC) { dml_copula_aic(copula, measure->y, measure->x, &copula_aic); tree_aic += copula_aic; } } SETEAP(trees[k], "copula", e, copula); } igraph_destroy(graph); // Check if the vine should be truncated. if (trunc == DML_VINE_TRUNC_AIC && tree_aic >= 0) { // Free the memory used for the last tree. rvine_tree_cleanup(trees[k]); for (e = 0; e < igraph_ecount(trees[k]); e++) { copula = EAP(trees[k], "copula", e); dml_copula_free(copula); } igraph_destroy(trees[k]); g_free(trees[k]); trees[k] = NULL; break; } if (k > 0) rvine_tree_cleanup(trees[k - 1]); } // Cleanup the last tree if the vine was completely estimated. // If the vine was truncated, the last tree will be freed in // the function vine_fit_rvine, because the rvine_trees_to_vine // function needs some attributes of its edges. if (k == n - 1) { rvine_tree_cleanup(trees[n - 2]); } g_free(graph_weight); g_free(graph); gsl_permutation_free(perm); }
static void rvine_trees_to_vine(dml_vine_t *vine, igraph_t **trees) { size_t n = vine->dim; size_t *order_inv; gsl_vector_short *B; igraph_integer_t Cea, Ceb; size_t x = 0, x_hat = 0, x_hat_hat = 0; // Initialized to avoid GCC warnings. dml_copula_t *copula = NULL; // Initialized to avoid GCC warnings. igraph_integer_t e; // Edge id. igraph_integer_t a, b, aa, ab, ba, bb; // Vertex id. igraph_t **last_trees = NULL; igraph_t *graph = NULL; gsl_vector_short *Ue, *Ua, *Ub; // Set the number of trees of the vines. vine->trees = n - 1; while (trees[vine->trees - 1] == NULL) vine->trees--; // Nothing to do for vines without trees. if (vine->trees == 0) return; // Selecting a structure for the trees that were truncated. // Is this really necessary? Think a better solution. if (vine->trees != n - 1) { igraph_i_set_attribute_table(&igraph_cattribute_table); last_trees = g_malloc_n(n - 1 - vine->trees, sizeof(igraph_t *)); graph = g_malloc(sizeof(igraph_t)); for (size_t k = vine->trees; k < n - 1; k++) { // Tree index. igraph_empty(graph, n - k, IGRAPH_UNDIRECTED); // Adding all "possible" edges. for (a = 0; a < igraph_vcount(graph) - 1; a++) { for (b = a + 1; b < igraph_vcount(graph); b++) { // Checking the proximity condition. igraph_edge(k <= vine->trees ? trees[k - 1] : last_trees[k - 1 - vine->trees], a, &aa, &ab); igraph_edge(k <= vine->trees ? trees[k - 1] : last_trees[k - 1 - vine->trees], b, &ba, &bb); if (aa == ba || aa == bb || ab == ba || ab == bb) { igraph_add_edge(graph, a, b); igraph_get_eid(graph, &e, a, b, IGRAPH_UNDIRECTED, 1); // Variables "connected" by this edge and conditioned set. Ua = EAP(k <= vine->trees ? trees[k - 1] : last_trees[k - 1 - vine->trees], "Ue", a); Ub = EAP(k <= vine->trees ? trees[k - 1] : last_trees[k - 1 - vine->trees], "Ue", b); Ue = gsl_vector_short_calloc(n); for (size_t i = 0; i < n; i++) { gsl_vector_short_set(Ue, i, gsl_vector_short_get(Ua, i) | gsl_vector_short_get(Ub, i)); if (gsl_vector_short_get(Ua, i) && !gsl_vector_short_get(Ub, i)) { SETEAN(graph, "Cea", e, i + 1); } if (gsl_vector_short_get(Ub, i) && !gsl_vector_short_get(Ua, i)) { SETEAN(graph, "Ceb", e, i + 1); } } SETEAP(graph, "Ue", e, Ue); } } } // Compute the minimum weight spanning tree. last_trees[k - vine->trees] = g_malloc(sizeof(igraph_t)); igraph_minimum_spanning_tree_unweighted(graph, last_trees[k - vine->trees]); igraph_destroy(graph); } } order_inv = g_malloc0_n(n, sizeof(size_t)); B = gsl_vector_short_calloc(n); // for loop in line 2. for (size_t i = 0; i < n - 1; i++) { if (trees[n - i - 2] == NULL) { for (e = 0; e < igraph_ecount(last_trees[n - i - 2 - vine->trees]); e++) { x = EAN(last_trees[n - i - 2 - vine->trees], "Cea", e); x_hat = EAN(last_trees[n - i - 2 - vine->trees], "Ceb", e); if (!gsl_vector_short_get(B, x - 1) && !gsl_vector_short_get(B, x_hat - 1)) { x_hat = 0; copula = NULL; break; } } } else { for (e = 0; e < igraph_ecount(trees[n - i - 2]); e++) { x = EAN(trees[n - i - 2], "Cea", e); x_hat = EAN(trees[n - i - 2], "Ceb", e); if (!gsl_vector_short_get(B, x - 1) && !gsl_vector_short_get(B, x_hat - 1)) { copula = EAP(trees[n - i - 2], "copula", e); break; } } } // Line 4. gsl_vector_short_set(B, x - 1, 1); vine->order[n - i - 1] = x - 1; order_inv[x - 1] = n - i; vine->matrix[i][i] = x; vine->matrix[i + 1][i] = x_hat; vine->copulas[i + 1][i] = copula; // for loop in line 5. for (size_t k = i + 2; k < n; k++) { if (trees[n - k - 1] != NULL) { for (e = 0; e < igraph_ecount(trees[n - k - 1]); e++) { Cea = EAN(trees[n - k - 1], "Cea", e); Ceb = EAN(trees[n - k - 1], "Ceb", e); if (x == Cea) { x_hat_hat = Ceb; if (!gsl_vector_short_get(B, x_hat_hat - 1)) { // The pseudocode of the algorithm does not included // this check. Invalid matrices were generated when // x_hat_hat is set to an index already assigned // to a diagonal entry. copula = EAP(trees[n - k - 1], "copula", e); break; } } else if (x == Ceb) { x_hat_hat = Cea; if (!gsl_vector_short_get(B, x_hat_hat - 1)) { // Ibdem to the previous comment. copula = EAP(trees[n - k - 1], "copula", e); break; } } } vine->matrix[k][i] = x_hat_hat; vine->copulas[k][i] = copula; } } } for (size_t i = 0; i < n; i++) { if (!gsl_vector_short_get(B, i)) { vine->matrix[n - 1][n - 1] = i + 1; vine->order[0] = i; order_inv[i] = 1; break; } } // Reorder the variables. The simulation algorithm assumes that the // diagonal entries of the R-vine matrix are ordered from n to 1. for (size_t i = 0; i < n; i++) { for (size_t j = 0; j <= i; j++) { if (vine->matrix[i][j] > 0) { vine->matrix[i][j] = order_inv[vine->matrix[i][j] - 1]; } } } if (vine->trees != n - 1) { for (size_t i = 0; i < n - 1 - vine->trees; i++) { for (e = 0; e < igraph_ecount(last_trees[i]); e++) { Ue = EAP(last_trees[i], "Ue", e); gsl_vector_short_free(Ue); } DELEA(last_trees[i], "Ue"); igraph_destroy(last_trees[i]); g_free(last_trees[i]); } g_free(last_trees); g_free(graph); } g_free(order_inv); gsl_vector_short_free(B); }
static void dvine_select_order(dml_vine_t *vine, const gsl_matrix *data, dml_vine_weight_t weight, dml_measure_t ***measure_matrix, const gsl_rng *rng) { int n = (int) vine->dim; double **weight_matrix; size_t selected_node; gsl_vector_ulong *tour; gsl_vector_short *in_tour; double selected_cost, current_cost; size_t current_pos = 0, selected_pos = 0; // Initialized to avoid GCC warnings. double dik, dkj, dij; size_t i, j; int cut_index; // Compute the weights. The weights are minimized. weight_matrix = g_malloc_n(n, sizeof(double *)); for (size_t i = 0; i < n; i++) { weight_matrix[i] = g_malloc_n(n, sizeof(double)); for (size_t j = 0; j < i; j++) { switch (weight) { case DML_VINE_WEIGHT_TAU: weight_matrix[i][j] = 1 - fabs(dml_measure_tau_coef(measure_matrix[i][j])); break; case DML_VINE_WEIGHT_CVM: weight_matrix[i][j] = measure_matrix[i][j]->x->size - dml_measure_cvm_stat(measure_matrix[i][j]); break; default: weight_matrix[i][j] = 0; break; } weight_matrix[j][i] = weight_matrix[i][j]; } } // Compute an approximate solution for the TSP instance using the // Cheapest insertion heuristic. The dummy node has index n. tour = gsl_vector_ulong_alloc(n + 1); in_tour = gsl_vector_short_alloc(n + 1); gsl_vector_short_set_all(in_tour, FALSE); for (size_t node_count = 0; node_count < n + 1; node_count++) { // Select the node to be inserted. if (node_count == 0) { // Initial node (randomly selected). selected_node = floor((n + 1) * gsl_rng_uniform(rng)); gsl_vector_ulong_set(tour, node_count, selected_node); gsl_vector_short_set(in_tour, selected_node, TRUE); } else { selected_cost = GSL_DBL_MAX; // Rest of the nodes. Choose the nodes with the minimal insertion cost. for (size_t k = 0; k < n + 1; k++) { if (!gsl_vector_short_get(in_tour, k)) { if (node_count == 1) { i = gsl_vector_ulong_get(tour, 0); current_cost = (i == n || k == n) ? 0: weight_matrix[i][k]; current_pos = 0; } else { current_cost = GSL_DBL_MAX; for (size_t pos = 0; pos < node_count - 1; pos++) { i = gsl_vector_ulong_get(tour, pos); j = gsl_vector_ulong_get(tour, pos + 1); dik = (i == n || k == n) ? 0: weight_matrix[i][k]; dkj = (k == n || j == n) ? 0: weight_matrix[k][j]; dij = (i == n || j == n) ? 0: weight_matrix[i][j]; if (dik + dkj + dij < current_cost) { current_cost = dik + dkj + dij; current_pos = pos; } } } // Check the last position. i = gsl_vector_ulong_get(tour, node_count - 1); j = gsl_vector_ulong_get(tour, 0); dik = (i == n || k == n) ? 0: weight_matrix[i][k]; dkj = (k == n || j == n) ? 0: weight_matrix[k][j]; dij = (i == n || j == n) ? 0: weight_matrix[i][j]; if (dik + dkj + dij < current_cost) { current_cost = dik + dkj + dij; current_pos = node_count - 1; } if (current_cost < selected_cost) { selected_node = k; selected_cost = current_cost; selected_pos = current_pos; } } } // Add the selected node to the tour. for (size_t pos = selected_pos; pos < node_count; pos++) { i = gsl_vector_ulong_get(tour, pos + 1); if (pos == selected_pos) { gsl_vector_ulong_set(tour, pos + 1, selected_node); } else { gsl_vector_ulong_set(tour, pos + 1, j); } j = i; } gsl_vector_short_set(in_tour, selected_node, TRUE); } } // Cut the tour at the dummy node. cut_index = -1; for (int i = 0; i < n + 1; i++) { if (cut_index >= 0) { vine->order[i - cut_index - 1] = gsl_vector_ulong_get(tour, i); } else if (gsl_vector_ulong_get(tour, i) == n) { cut_index = i; } } for (int i = 0; i < cut_index; i++) { vine->order[n - cut_index + i] = gsl_vector_ulong_get(tour, i); } gsl_vector_ulong_free(tour); gsl_vector_short_free(in_tour); for (size_t i = 0; i < n; i++) { g_free(weight_matrix[i]); } g_free(weight_matrix); }