/* ***************************************************************** This procedure sort the strings a[0] ... a[n-1] with the help of an anchor. The real sorting is done by the procedure anchor_sort(). Here we choose the anchor. The parameter depth is the number of chars that a[0] ... a[n-1] are known to have in common (thus a direct comparison among a[i] and a[j] should start from position depth) Note that a[] is a subsection of the sa therefore a[0] ... a[n-1] are starting position of suffixes For every a[i] we look at the anchor a[i]/Anchor_dist and the one after that. This justifies the definition of Anchor_num (the size of Anchor_ofset[] and Anchor_rank[] defined in ds_sort()) as Anchor_num = 2 + (n-1)/Anchor_dist ***************************************************************** */ void helped_sort(Int32 *a, int n, int depth) { Int32 i, curr_sb, diff, toffset, aoffset; Int32 text_pos, anchor_pos, anchor, anchor_rank; Int32 min_forw_offset, min_forw_offset_buc, max_back_offset; Int32 best_forw_anchor, best_forw_anchor_buc, best_back_anchor; Int32 forw_anchor_index, forw_anchor_index_buc, back_anchor_index; Calls_helped_sort++; // update count if(n==1) goto done_sorting; // simplest case: only one string // if there are no anchors use pseudo-anchors or deep_sort if(Anchor_dist==0) { pseudo_or_deep_sort(a, n, depth); return; } // compute the current bucket curr_sb = Get_small_bucket(a[0]); // init best anchor variables with illegal values min_forw_offset = min_forw_offset_buc = INT_MAX; max_back_offset = INT_MIN; best_forw_anchor = best_forw_anchor_buc = best_back_anchor = -1; forw_anchor_index = forw_anchor_index_buc = back_anchor_index = -1; // look at the anchor preceeding each a[i] for(i=0;i<n;i++) { text_pos = a[i]; // get anchor preceeding text_pos=a[i] anchor = text_pos/Anchor_dist; toffset = text_pos % Anchor_dist; // distance of a[i] from anchor aoffset = Anchor_offset[anchor]; // distance of sorted suf from anchor if(aoffset<Anchor_dist) { // check if it is a "sorted" anchor diff = aoffset - toffset; assert(diff!=0); if(diff>0) { // anchor <= a[i] < (sorted suffix) if(curr_sb!=Get_small_bucket(text_pos+diff)) { if(diff<min_forw_offset) { min_forw_offset = diff; best_forw_anchor = anchor; forw_anchor_index = i; } } else { // the sorted suffix belongs to the same bucket of a[0]..a[n-1] if(diff<min_forw_offset_buc) { min_forw_offset_buc = diff; best_forw_anchor_buc = anchor; forw_anchor_index_buc = i; } } } else { // diff<0 => anchor <= (sorted suffix) < a[i] if(diff>max_back_offset) { max_back_offset = diff; best_back_anchor = anchor; back_anchor_index = i; } // try to find a sorted suffix > a[i] by looking at next anchor aoffset = Anchor_offset[++anchor]; if(aoffset<Anchor_dist) { diff = Anchor_dist + aoffset - toffset; assert(diff>0); if(curr_sb!=Get_small_bucket(text_pos+diff)) { if(diff<min_forw_offset) { min_forw_offset = diff; best_forw_anchor = anchor; forw_anchor_index = i; } } else { if(diff<min_forw_offset_buc) { min_forw_offset_buc = diff; best_forw_anchor_buc = anchor; forw_anchor_index_buc = i; } } } } } } // ------ if forward anchor_sort is possible, do it! -------- if(best_forw_anchor>=0 && min_forw_offset<depth-1) { Calls_anchor_sort_forw++; assert(min_forw_offset<2*Anchor_dist); anchor_pos = a[forw_anchor_index] + min_forw_offset; anchor_rank = Anchor_rank[best_forw_anchor]; assert(Sa[anchor_rank]==anchor_pos); general_anchor_sort(a,n,anchor_pos,anchor_rank,min_forw_offset); goto done_sorting; } // ------ if backward anchor_sort is possible do it! --------- if(best_back_anchor>=0) { UChar *T0, *Ti; int j; assert(max_back_offset>-Anchor_dist && max_back_offset<0); // make sure that the offset is legal for all a[i] for(i=0;i<n;i++) { if(a[i]+max_back_offset<0) goto fail; // illegal offset, give up } // make sure that a[0] .. a[n-1] are preceded by the same substring T0 = Text + a[0]; for(i=1;i<n;i++) { Ti = Text + a[i]; for(j=max_back_offset; j<= -1; j++) if(T0[j]!=Ti[j]) goto fail; // mismatch, give up } // backward anchor sorting is possible Calls_anchor_sort_backw++; anchor_pos = a[back_anchor_index] + max_back_offset; anchor_rank = Anchor_rank[best_back_anchor]; assert(Sa[anchor_rank]==anchor_pos); general_anchor_sort(a,n,anchor_pos,anchor_rank,max_back_offset); goto done_sorting; } fail: // ----- try forward anchor_sort with anchor in the same bucket if(best_forw_anchor_buc>=0 && min_forw_offset_buc<depth-1) { int equal,lower,upper; assert(min_forw_offset_buc<2*Anchor_dist); anchor_pos = a[forw_anchor_index_buc] + min_forw_offset_buc; anchor_rank = Anchor_rank[best_forw_anchor_buc]; assert(Sa[anchor_rank]==anchor_pos); // establish how many suffixes can be sorted using anchor_sort() equal=split_group(a,n,depth,min_forw_offset_buc, forw_anchor_index_buc,&lower); if(equal==n) { Calls_anchor_sort_forw++; general_anchor_sort(a,n,anchor_pos,anchor_rank,min_forw_offset_buc); } else { // -- a[0] ... a[n-1] are split into 3 groups: lower, equal, upper upper = n-equal-lower; assert(upper>=0); // printf("Warning! lo=%d eq=%d up=%d a=%x\n",lower,equal,upper,(int)a); // sort the equal group Calls_anchor_sort_forw++; if(equal>1) general_anchor_sort(a+lower,equal,anchor_pos,anchor_rank, min_forw_offset_buc); // sort upper and lower groups using deep_sort if(lower>1) pseudo_or_deep_sort(a,lower,depth); if(upper>1) pseudo_or_deep_sort(a+lower+equal,upper,depth); } // end if(equal==n) ... else goto done_sorting; } // end hard case // --------------------------------------------------------------- // If we get here it means that everything failed // In this case we simply deep_sort a[0] ... a[n-1] // --------------------------------------------------------------- pseudo_or_deep_sort(a, n, depth); done_sorting: // -------- update Anchor_rank[], Anchor_offset[] ------- if(Anchor_dist>0) update_anchors(a, n); }
mat mars(Agraph_t* g, struct marsopts opts) { int i, j, n = agnnodes(g), k = MIN(n, MAX(opts.k, 2)), iter = 0; mat dij, u, u_trans, q, r, q_t, tmp, tmp2, z; double* s = (double*) malloc(sizeof(double)*k); double* ones = (double*) malloc(sizeof(double)*n); double* d; int* anchors = (int*) malloc(sizeof(int)*k); int* clusters = NULL; double change = 1, old_stress = -1; dij = mat_new(k, n); u = mat_new(n,k); tmp = mat_new(n,k); darrset(ones,n,-1); select_anchors(g, dij, anchors, k); if(opts.color) { for(i = 0; i < k; i++) { Agnode_t* anchor = get_node(anchors[i]); agset(anchor, "color", "red"); } } if(opts.power != 1) { clusters = graph_cluster(g,dij,anchors); } singular_vectors(g, dij, opts.power, u, s); vec_scalar_mult(s, k, -1); u_trans = mat_trans(u); d = mat_mult_for_d(u, s, u_trans, ones); for(i = 0; i < u->c; i++) { double* col = mat_col(u,i); double* b = inv_mul_ax(d,col,u->r); for(j = 0; j < u->r; j++) { tmp->m[mindex(j,i,tmp)] = b[j]; } free(b); free(col); } tmp2 = mat_mult(u_trans,tmp); for(i = 0; i < k; i++) { tmp2->m[mindex(i,i,tmp2)] += (1.0/s[i]); } q = mat_new(tmp2->r, tmp2->c); r = mat_new(tmp2->c, tmp2->c); qr_factorize(tmp2,q,r); q_t = mat_trans(q); if(opts.given) { z = get_positions(g, opts.dim); } else { z = mat_rand(n, opts.dim); } translate_by_centroid(z); if(opts.viewer) { init_viewer(g, opts.max_iter); append_layout(z); } old_stress = stress(z, dij, anchors, opts.power); while(change > EPSILON && iter < opts.max_iter) { mat right_side; double new_stress; if(opts.power == 1) { right_side = barnes_hut(z); } else { right_side = barnes_hut_cluster(z, dij, clusters, opts.power); } for(i = 0; i < opts.dim; i++) { double sum = 0; double* x; double* b = mat_col(right_side,i); for(j = 0; j < right_side->r; j++) { sum += b[j]; } x = inv_mul_full(d, b, right_side->r, u, u_trans, q_t, r); for(j = 0; j < z->r; j++) { z->m[mindex(j,i,z)] = x[j] - sum/right_side->r; } free(x); free(b); } adjust_anchors(g, anchors, k, z); update_anchors(z, dij, anchors, opts.power); translate_by_centroid(z); if(opts.viewer) { append_layout(z); } new_stress = stress(z, dij, anchors, opts.power); change = fabs(new_stress-old_stress)/old_stress; old_stress = new_stress; mat_free(right_side); iter++; } mat_free(dij); mat_free(u); mat_free(u_trans); mat_free(q); mat_free(r); mat_free(q_t); mat_free(tmp); mat_free(tmp2); free(s); free(ones); free(d); free(anchors); free(clusters); return z; }