void MlSldaState::InitializeAssignments(bool random_init) { InitializeResponse(); InitializeLength(); LdawnState::InitializeAssignments(random_init); if (FLAGS_num_seed_docs > 0) { const gsl_vector* y = static_cast<lib_corpora::ReviewCorpus*> (corpus_.get())->train_ratings(); boost::shared_ptr<gsl_permutation> sorted(gsl_permutation_alloc(y->size), gsl_permutation_free); boost::shared_ptr<gsl_permutation> rank(gsl_permutation_alloc(y->size), gsl_permutation_free); std::vector< std::vector<int> > num_seeds_used; num_seeds_used.resize(corpus_->num_languages()); for (int ii = 0; ii < corpus_->num_languages(); ++ii) { num_seeds_used[ii].resize(num_topics_); } gsl_sort_vector_index(sorted.get(), y); gsl_permutation_inverse(rank.get(), sorted.get()); // We add one for padding so we don't try to set a document to be equal to // the number of topics. double num_train = corpus_->num_train() + 1.0; int train_seen = 0; int num_docs = corpus_->num_docs(); for (int dd = 0; dd < num_docs; ++dd) { MlSeqDoc* doc = corpus_->seq_doc(dd); int lang = doc->language(); if (!corpus_->doc(dd)->is_test()) { // We don't assign to topic zero, so it can be stopwordy int val = (int) floor((num_topics_ - 1) * rank->data[train_seen] / num_train) + 1; // Stop once we've used our limit of seed docs (too many leads to an // overfit initial state) if (num_seeds_used[lang][val] < FLAGS_num_seed_docs) { cout << "Initializing doc " << lang << " " << dd << " to " << val << " score=" << truth_[dd] << endl; for (int jj = 0; jj < (int)topic_assignments_[dd].size(); ++jj) { int term = (*doc)[jj]; const topicmod_projects_ldawn::WordPaths word = wordnet_->word(lang, term); int num_paths = word.size(); if (num_paths > 0) { ChangePath(dd, jj, val, rand() % num_paths); } else { if (use_aux_topics()) ChangeTopic(dd, jj, val); } } ++num_seeds_used[lang][val]; } ++train_seen; } } } }
static VALUE rb_gsl_permutation_inverse(VALUE obj) { gsl_permutation *p, *inv; Data_Get_Struct(obj, gsl_permutation, p); inv = gsl_permutation_alloc(p->size); gsl_permutation_inverse(inv, p); return Data_Wrap_Struct(cgsl_permutation, 0, gsl_permutation_free, inv); }
double kendall(double *arr1,double *arr2,int n) { static gsl_vector *vec = NULL; static gsl_permutation *perm=NULL,*rank1=NULL,*rank2=NULL; static double *r=NULL; int i; double S,W,R; double nx=0; if (vec == NULL) { vec = gsl_vector_calloc(n); perm = gsl_permutation_alloc(n); rank1 = gsl_permutation_alloc(n); rank2 = gsl_permutation_alloc(n); r = (double *) VCalloc(n,sizeof(double)); } for (i=0; i<n; i++) gsl_vector_set(vec,i,arr1[i]); gsl_sort_vector_index (perm, vec); gsl_permutation_inverse (rank1, perm); for (i=0; i<n; i++) gsl_vector_set(vec,i,arr2[i]); gsl_sort_vector_index (perm, vec); gsl_permutation_inverse (rank2, perm); for (i=0; i<n; i++) r[i] = (double)(rank1->data[i] + rank2->data[i]); nx = (double)n; R = 0; for (i=0; i<n; i++) R += r[i]; R /= nx; S = 0; for (i=0; i<n; i++) S += SQR(r[i] - R); W = 12.0*S/(4.0*(nx*nx-1.0)*nx); return W; }
VImage PairedWilcoxTest(VImage *src1, VImage *src2, VImage dest, int n) { int i, m, k, b, r, c, nslices, nrows, ncols; int sumpos, sumneg, w; double wx, u, v, z, p, tiny = 1.0e-10; double *ptr1, *ptr2; float *table = NULL; gsl_vector *vec1 = NULL, *vec2 = NULL; gsl_permutation *perm = NULL, *rank = NULL; extern void gsl_sort_vector_index(gsl_permutation *, gsl_vector *); nslices = VImageNBands(src1[0]); nrows = VImageNRows(src1[0]); ncols = VImageNColumns(src1[0]); dest = VCopyImage(src1[0], NULL, VAllBands); VFillImage(dest, VAllBands, 0); VSetAttr(VImageAttrList(dest), "num_images", NULL, VShortRepn, (VShort)n); VSetAttr(VImageAttrList(dest), "patient", NULL, VStringRepn, "paired_wilcoxtest"); VSetAttr(VImageAttrList(dest), "modality", NULL, VStringRepn, "zmap"); m = 0; for(i = 1; i <= n; i++) m += i; if(n > 18) { table = getTable(n); for(i = 0; i < m; i++) { p = table[i]; p *= 0.5; if(p < tiny) p = tiny; z = p2z(p); if(z < 0) z = 0; table[i] = z; } } else { table = (float *) VMalloc(sizeof(float) * m); for(i = 0; i < m; i++) { for(i = 0; i < m; i++) { wx = i; p = LevelOfSignificanceWXMPSR(wx, (long int)n); p *= 0.5; z = p2z(p); table[i] = z; } } } vec1 = gsl_vector_calloc(n); vec2 = gsl_vector_calloc(n); perm = gsl_permutation_alloc(n); rank = gsl_permutation_alloc(n); for(b = 0; b < nslices; b++) { for(r = 0; r < nrows; r++) { for(c = 0; c < ncols; c++) { k = 0; ptr1 = vec1->data; ptr2 = vec2->data; for(i = 0; i < n; i++) { u = VPixel(src1[i], b, r, c, VFloat); v = VPixel(src2[i], b, r, c, VFloat); if(ABS(u) > tiny && ABS(v) > tiny) k++; *ptr1++ = ABS(u - v); *ptr2++ = u - v; } if(k < n / 2) continue; gsl_sort_vector_index(perm, vec1); gsl_permutation_inverse(rank, perm); sumpos = sumneg = 0; ptr2 = vec2->data; for(i = 0; i < n; i++) { u = *ptr2++; if(u > 0) sumpos += rank->data[i]; else if(u < 0) sumneg += rank->data[i]; } w = sumpos; if(sumpos > sumneg) w = sumneg; if(w >= m) z = 0; else z = table[w]; if(sumneg > sumpos) z = -z; VPixel(dest, b, r, c, VFloat) = z; } } } return dest; }
void gsl_matrix_hungarian(gsl_matrix* gm_C,gsl_matrix* gm_P,gsl_vector* gv_col_inc, gsl_permutation* gp_sol, int _bprev_init, gsl_matrix *gm_C_denied, bool bgreedy) { // mexPrintf("VV\n"); long dim, startdim, enddim, n1,n2; double *C; int i,j; int **m; double *z; hungarian_problem_t p, *q; int matrix_size; double C_min=gsl_matrix_min(gm_C)-1; n1 = gm_C->size1; /* first dimension of the cost matrix */ n2 = gm_C->size2; /* second dimension of the cost matrix */ C = gm_C->data; //greedy solution if (bgreedy) { int ind,ind1,ind2; size_t *C_ind=new size_t[n1*n2]; gsl_heapsort_index(C_ind,C,n1*n2,sizeof(double),compare_doubles); bool* bperm_fix_1=new bool[n1]; bool* bperm_fix_2=new bool[n2]; int inummatch=0; for (i=0;i<n1;i++) {bperm_fix_1[i]=false;bperm_fix_2[i]=false;}; gsl_matrix_set_zero(gm_P); for (long l=0;l<n1*n2;l++) { ind=C_ind[l]; ind1=floor(ind/n1); ind2=ind%n2; if (!bperm_fix_1[ind1] and !bperm_fix_2[ind2]) { bperm_fix_1[ind1]=true; bperm_fix_2[ind2]=true; gm_P->data[ind]=1;inummatch++; }; if (inummatch==n1) break; }; delete[] bperm_fix_1;delete[] bperm_fix_2; //because C is a transpose matrix gsl_matrix_transpose(gm_P); return; }; double C_max=((gsl_matrix_max(gm_C)-C_min>1)?(gsl_matrix_max(gm_C)-C_min):1)*(n1>n2?n1:n2); m = (int**)calloc(n1,sizeof(int*)); // mexPrintf("C[2] = %f \n",C[2]); for (i=0;i<n1;i++) { m[i] = (int*)calloc(n2,sizeof(int)); for (j=0;j<n2;j++) m[i][j] = (int) (C[i+n1*j] - C_min); // mexPrintf("m[%d][%d] = %f %f\n",i,j,m[i][j],C[i+n1*j] - C_min); if (gm_C_denied!=NULL) for (j=0;j<n2;j++){ if (j==30) int dbg=1; bool bden=(gm_C_denied->data[n2*i+j]<1e-10); if (bden) m[i][j] =C_max; else int dbg=1; }; }; //normalization: rows and columns // mexPrintf("C[2] = %f \n",C[2]); double dmin; for (i=0;i<n1;i++) { dmin=m[i][0]; for (j=1;j<n2;j++) dmin= (m[i][j]<dmin)? m[i][j]:dmin; for (j=0;j<n2;j++) m[i][j]-=dmin; }; for (j=0;j<n2;j++) { dmin=m[0][j]; for (i=1;i<n1;i++) dmin= (m[i][j]<dmin)? m[i][j]:dmin; for (i=0;i<n1;i++) m[i][j]-=dmin; }; if ((_bprev_init) &&(gv_col_inc !=NULL)) { //dual solution v substraction for (j=0;j<n2;j++) for (i=0;i<n1;i++) m[i][j]-=gv_col_inc->data[j]; //permutation of m columns int *mt = new int[n2]; for (i=0;i<n1;i++) { for (j=0;j<n2;j++) mt[j]=m[i][j]; for (j=0;j<n2;j++) m[i][j]=mt[gsl_permutation_get(gp_sol,j)]; }; delete[] mt; }; /* initialize the hungarian_problem using the cost matrix*/ matrix_size = hungarian_init(&p, m , n1,n2, HUNGARIAN_MODE_MINIMIZE_COST) ; /* solve the assignement problem */ hungarian_solve(&p); q = &p; //gsl_matrix* gm_P=gsl_matrix_alloc(n1,n2); gsl_permutation* gp_sol_inv=gsl_permutation_alloc(n2); if (gp_sol!=NULL) gsl_permutation_inverse(gp_sol_inv,gp_sol); else gsl_permutation_init(gp_sol_inv); for (i=0;i<n1;i++) for (j=0;j<n2;j++) gsl_matrix_set(gm_P,i,j,q->assignment[i][gp_sol_inv->data[j]]); //initialization by the previous solution if ((_bprev_init) &&(gv_col_inc !=NULL)) for (j=0;j<n2;j++) gv_col_inc->data[j]=q->col_inc[gp_sol_inv->data[j]]; if ((_bprev_init) && (gp_sol!=NULL)) { for (i=0;i<n1;i++) for (j=0;j<n2;j++) if (gsl_matrix_get(gm_P,i,j)==HUNGARIAN_ASSIGNED) gp_sol->data[i]=j; }; /* free used memory */ gsl_permutation_free(gp_sol_inv); hungarian_free(&p); for (i=0;i<n1;i++) free(m[i]); free(m); /* for (int i=0;i<gm_C->size1;i++) { for (int j=0;j<gm_C->size1;j++) { mexPrintf("G[%d][%d] = %f %f \n",i,j,gsl_matrix_get(gm_P,i,j),gsl_matrix_get(gm_C,i,j)); } }*/ // mexPrintf("AAA"); //return gm_P; }
static void fit_rvine_trees(igraph_t **trees, const gsl_matrix *data, const dml_vine_weight_t weight, const dml_vine_trunc_t trunc, const dml_copula_indeptest_t indeptest, const double indeptest_level, const dml_copula_type_t *types, const size_t types_size, const dml_copula_select_t select, const gsl_rng *rng) { size_t m, n; igraph_t *graph; igraph_vector_t *graph_weight; dml_copula_t *copula; gsl_vector *x; igraph_integer_t e; // Edge id. igraph_integer_t a, aa, ab, b, ba, bb; // Vertex id. gsl_vector *u = NULL, *v = NULL; igraph_integer_t Cea, Ceb; gsl_vector_short *Ue, *Ua, *Ub; size_t k; dml_measure_t *measure; double tree_aic, copula_aic; gsl_permutation *perm, *rank, *u_rank = NULL, *v_rank = NULL; igraph_i_set_attribute_table(&igraph_cattribute_table); m = data->size1; n = data->size2; graph = g_malloc(sizeof(igraph_t)); graph_weight = g_malloc(sizeof(igraph_vector_t)); perm = gsl_permutation_alloc(m); for (k = 0; k < n - 1; k++) { // Tree index. if (k == 0) { igraph_full(graph, n, IGRAPH_UNDIRECTED, IGRAPH_NO_LOOPS); // Assign the observations to the nodes. for (size_t i = 0; i < n; i++) { // Variable and node index. x = gsl_vector_alloc(m); gsl_matrix_get_col(x, data, i); // Results of the h-function of the copula assigned to the // edge that corresponds to this vertex in the previous tree. // h for the h-function with its arguments in order and // hrev for the h-function with its arguments reversed. In the // first tree both are equal to the observations of the // corresponding variable, in the rest of the trees they differ. SETVAP(graph, "h", i, x); SETVAP(graph, "hrev", i, x); gsl_sort_vector_index(perm, x); rank = gsl_permutation_alloc(m); gsl_permutation_inverse(rank, perm); // Ranks of the h and hrev vectors. SETVAP(graph, "hrank", i, rank); SETVAP(graph, "hrevrank", i, rank); } for (e = 0; e < igraph_ecount(graph); e++) { igraph_edge(graph, e, &a, &b); // Variables "connected" by this edge. Ue = gsl_vector_short_calloc(n); gsl_vector_short_set(Ue, a, 1); gsl_vector_short_set(Ue, b, 1); SETEAP(graph, "Ue", e, Ue); // Conditioned set. SETEAN(graph, "Cea", e, a + 1); SETEAN(graph, "Ceb", e, b + 1); Cea = EAN(graph, "Cea", e); Ceb = EAN(graph, "Ceb", e); // Calculate the weight of the edge. u = VAP(graph, "h", a); v = VAP(graph, "h", b); u_rank = VAP(graph, "hrank", a); v_rank = VAP(graph, "hrank", b); // The conditioned set is ordered to make the order of the // arguments in the bivariate copulas unique as suggested in // Czado, C. (2010) Pair-Copula Constructions of Multivariate // Copulas. In Jaworski, P. and Durante, F. and Hardle, W. K. // and Rychlik, T. (eds.) Copula Theory and Its Applications, // Springer-Verlag, 93-109. if (Cea < Ceb) { rvine_set_weight(graph, weight, e, u, v, u_rank, v_rank); } else { rvine_set_weight(graph, weight, e, v, u, v_rank, u_rank); } } } else { igraph_empty(graph, n - k, IGRAPH_UNDIRECTED); // Adding all "possible" edges. for (a = 0; a < igraph_vcount(graph) - 1; a++) { for (b = a + 1; b < igraph_vcount(graph); b++) { igraph_edge(trees[k - 1], a, &aa, &ab); igraph_edge(trees[k - 1], b, &ba, &bb); // Checking the proximity condition. if (aa == ba || aa == bb || ab == ba || ab == bb) { igraph_add_edge(graph, a, b); igraph_get_eid(graph, &e, a, b, IGRAPH_UNDIRECTED, 1); // Variables "connected" by this edge and conditioned set. Ua = EAP(trees[k - 1], "Ue", a); Ub = EAP(trees[k - 1], "Ue", b); Ue = gsl_vector_short_calloc(n); for (size_t i = 0; i < n; i++) { gsl_vector_short_set(Ue, i, gsl_vector_short_get(Ua, i) | gsl_vector_short_get(Ub, i)); if (gsl_vector_short_get(Ua, i) && !gsl_vector_short_get(Ub, i)) { SETEAN(graph, "Cea", e, i + 1); } if (gsl_vector_short_get(Ub, i) && !gsl_vector_short_get(Ua, i)) { SETEAN(graph, "Ceb", e, i + 1); } } SETEAP(graph, "Ue", e, Ue); } } } // Compute pseudo-observations and edge weights. for (a = 0; a < igraph_vcount(graph); a++) { // See the comment in the code for the first tree. SETVAP(graph, "h", a, NULL); SETVAP(graph, "hrev", a, NULL); SETVAP(graph, "hrank", a, NULL); SETVAP(graph, "hrevrank", a, NULL); } for (e = 0; e < igraph_ecount(graph); e++) { igraph_edge(graph, e, &a, &b); Cea = EAN(graph, "Cea", e); Ceb = EAN(graph, "Ceb", e); // Assign u and u_rank. if ((Cea == EAN(trees[k - 1], "Cea", a) && (EAN(trees[k - 1], "Cea", a) < EAN(trees[k - 1], "Ceb", a))) || (Cea != EAN(trees[k - 1], "Cea", a) && (EAN(trees[k - 1], "Cea", a) > EAN(trees[k - 1], "Ceb", a)))) { u = VAP(graph, "h", a); if (u == NULL) { copula = EAP(trees[k - 1], "copula", a); measure = EAP(trees[k - 1], "measure", a); u = gsl_vector_alloc(m); dml_copula_h(copula, measure->x, measure->y, u); SETVAP(graph, "h", a, u); gsl_sort_vector_index(perm, u); rank = gsl_permutation_alloc(m); gsl_permutation_inverse(rank, perm); SETVAP(graph, "hrank", a, rank); } u_rank = VAP(graph, "hrank", a); } if ((Cea == EAN(trees[k - 1], "Cea", a) && (EAN(trees[k - 1], "Cea", a) > EAN(trees[k - 1], "Ceb", a))) || (Cea != EAN(trees[k - 1], "Cea", a) && (EAN(trees[k - 1], "Cea", a) < EAN(trees[k - 1], "Ceb", a)))) { u = VAP(graph, "hrev", a); if (u == NULL) { copula = EAP(trees[k - 1], "copula", a); measure = EAP(trees[k - 1], "measure", a); u = gsl_vector_alloc(m); dml_copula_h(copula, measure->y, measure->x, u); SETVAP(graph, "hrev", a, u); gsl_sort_vector_index(perm, u); rank = gsl_permutation_alloc(m); gsl_permutation_inverse(rank, perm); SETVAP(graph, "hrevrank", a, rank); } u_rank = VAP(graph, "hrevrank", a); } // Assign v and v_rank. if ((Ceb == EAN(trees[k - 1], "Cea", b) && (EAN(trees[k - 1], "Cea", b) < EAN(trees[k - 1], "Ceb", b))) || (Ceb != EAN(trees[k - 1], "Cea", b) && (EAN(trees[k - 1], "Cea", b) > EAN(trees[k - 1], "Ceb", b)))) { v = VAP(graph, "h", b); if (v == NULL) { copula = EAP(trees[k - 1], "copula", b); measure = EAP(trees[k - 1], "measure", b); v = gsl_vector_alloc(m); dml_copula_h(copula, measure->x, measure->y, v); SETVAP(graph, "h", b, v); gsl_sort_vector_index(perm, v); rank = gsl_permutation_alloc(m); gsl_permutation_inverse(rank, perm); SETVAP(graph, "hrank", b, rank); } v_rank = VAP(graph, "hrank", b); } if ((Ceb == EAN(trees[k - 1], "Cea", b) && (EAN(trees[k - 1], "Cea", b) > EAN(trees[k - 1], "Ceb", b))) || (Ceb != EAN(trees[k - 1], "Cea", b) && (EAN(trees[k - 1], "Cea", b) < EAN(trees[k - 1], "Ceb", b)))) { v = VAP(graph, "hrev", b); if (v == NULL) { copula = EAP(trees[k - 1], "copula", b); measure = EAP(trees[k - 1], "measure", b); v = gsl_vector_alloc(m); dml_copula_h(copula, measure->y, measure->x, v); SETVAP(graph, "hrev", b, v); gsl_sort_vector_index(perm, v); rank = gsl_permutation_alloc(m); gsl_permutation_inverse(rank, perm); SETVAP(graph, "hrevrank", b, rank); } v_rank = VAP(graph, "hrevrank", b); } // Set the weight of the edge. The arguments are ordered here. // The order determines the x and y fields of measure. if (Cea < Ceb) { rvine_set_weight(graph, weight, e, u, v, u_rank, v_rank); } else { rvine_set_weight(graph, weight, e, v, u, v_rank, u_rank); } } } // Compute the minimum weight spanning tree. trees[k] = g_malloc(sizeof(igraph_t)); igraph_vector_init(graph_weight, igraph_ecount(graph)); EANV(graph, "weight", graph_weight); igraph_minimum_spanning_tree_prim(graph, trees[k], graph_weight); igraph_vector_destroy(graph_weight); tree_aic = 0; for (e = 0; e < igraph_ecount(trees[k]); e++) { igraph_edge(trees[k], e, &a, &b); Cea = EAN(trees[k], "Cea", e); Ceb = EAN(trees[k], "Ceb", e); measure = EAP(trees[k], "measure", e); // Assign a bivariate copula to the edge. if (Cea < Ceb) { copula = dml_copula_select(measure->x, measure->y, measure, indeptest, indeptest_level, types, types_size, select, rng); // Get information for the truncation of the vine. if (trunc == DML_VINE_TRUNC_AIC) { dml_copula_aic(copula, measure->x, measure->y, &copula_aic); tree_aic += copula_aic; } } else { copula = dml_copula_select(measure->y, measure->x, measure, indeptest, indeptest_level, types, types_size, select, rng); // Get information for the truncation of the vine. if (trunc == DML_VINE_TRUNC_AIC) { dml_copula_aic(copula, measure->y, measure->x, &copula_aic); tree_aic += copula_aic; } } SETEAP(trees[k], "copula", e, copula); } igraph_destroy(graph); // Check if the vine should be truncated. if (trunc == DML_VINE_TRUNC_AIC && tree_aic >= 0) { // Free the memory used for the last tree. rvine_tree_cleanup(trees[k]); for (e = 0; e < igraph_ecount(trees[k]); e++) { copula = EAP(trees[k], "copula", e); dml_copula_free(copula); } igraph_destroy(trees[k]); g_free(trees[k]); trees[k] = NULL; break; } if (k > 0) rvine_tree_cleanup(trees[k - 1]); } // Cleanup the last tree if the vine was completely estimated. // If the vine was truncated, the last tree will be freed in // the function vine_fit_rvine, because the rvine_trees_to_vine // function needs some attributes of its edges. if (k == n - 1) { rvine_tree_cleanup(trees[n - 2]); } g_free(graph_weight); g_free(graph); gsl_permutation_free(perm); }