示例#1
0
/* *****************************************************************
   This procedure sort the strings a[0] ... a[n-1] with the help of an
   anchor. The real sorting is done by the procedure
   anchor_sort(). Here we choose the anchor.  The parameter depth is
   the number of chars that a[0] ... a[n-1] are known to have in
   common (thus a direct comparison among a[i] and a[j] should start
   from position depth) Note that a[] is a subsection of the sa therefore
   a[0] ... a[n-1] are starting position of suffixes
   For every a[i] we look at the anchor a[i]/Anchor_dist and the one 
   after that. This justifies the definition of Anchor_num (the size of
   Anchor_ofset[] and Anchor_rank[] defined in ds_sort()) as
     Anchor_num = 2 + (n-1)/Anchor_dist    
   ***************************************************************** */
void helped_sort(Int32 *a, int n, int depth)
{ 
  Int32 i, curr_sb, diff, toffset, aoffset;
  Int32 text_pos, anchor_pos, anchor, anchor_rank;
  Int32 min_forw_offset, min_forw_offset_buc, max_back_offset;
  Int32 best_forw_anchor, best_forw_anchor_buc, best_back_anchor; 
  Int32 forw_anchor_index, forw_anchor_index_buc, back_anchor_index;

  Calls_helped_sort++;          // update count
  if(n==1) goto done_sorting;    // simplest case: only one string

  // if there are no anchors use pseudo-anchors or deep_sort
  if(Anchor_dist==0) {
    pseudo_or_deep_sort(a, n, depth);
    return;
  }

  // compute the current bucket 
  curr_sb = Get_small_bucket(a[0]);

  // init best anchor variables with illegal values
  min_forw_offset = min_forw_offset_buc = INT_MAX;
  max_back_offset = INT_MIN;
  best_forw_anchor = best_forw_anchor_buc = best_back_anchor = -1; 
  forw_anchor_index = forw_anchor_index_buc = back_anchor_index = -1;
  // look at the anchor preceeding each a[i]
  for(i=0;i<n;i++) {
    text_pos = a[i];
    // get anchor preceeding text_pos=a[i]
    anchor = text_pos/Anchor_dist;
    toffset = text_pos % Anchor_dist;  // distance of a[i] from anchor
    aoffset = Anchor_offset[anchor];   // distance of sorted suf from anchor 
    if(aoffset<Anchor_dist) {          // check if it is a "sorted" anchor
      diff = aoffset - toffset;
      assert(diff!=0);
      if(diff>0) {     // anchor <=  a[i] < (sorted suffix)
	if(curr_sb!=Get_small_bucket(text_pos+diff)) {
	  if(diff<min_forw_offset) {
	    min_forw_offset = diff;
	    best_forw_anchor = anchor;
	    forw_anchor_index = i;
	  }
	}
	else {  // the sorted suffix belongs to the same bucket of a[0]..a[n-1]
	  if(diff<min_forw_offset_buc) {
	    min_forw_offset_buc = diff;
	    best_forw_anchor_buc = anchor;
	    forw_anchor_index_buc = i;
	  }
	}
      }
      else {          // diff<0 =>  anchor <= (sorted suffix) < a[i]
	if(diff>max_back_offset) {
	  max_back_offset = diff;
	  best_back_anchor = anchor;
	  back_anchor_index = i;
	}
	// try to find a sorted suffix > a[i] by looking at next anchor
	aoffset = Anchor_offset[++anchor];
	if(aoffset<Anchor_dist) {
	  diff = Anchor_dist + aoffset - toffset;
	  assert(diff>0);
	  if(curr_sb!=Get_small_bucket(text_pos+diff)) {
	    if(diff<min_forw_offset) {
	      min_forw_offset = diff;
	      best_forw_anchor = anchor;
	      forw_anchor_index = i;
	    }
	  } else {
	    if(diff<min_forw_offset_buc) {
	      min_forw_offset_buc = diff;
	      best_forw_anchor_buc = anchor;
	      forw_anchor_index_buc = i;
	    }
	  }
	}
      }
    }
  }
  // ------ if forward anchor_sort is possible, do it! --------	    
  if(best_forw_anchor>=0 && min_forw_offset<depth-1) {
    Calls_anchor_sort_forw++;
    assert(min_forw_offset<2*Anchor_dist);
    anchor_pos = a[forw_anchor_index] + min_forw_offset;
    anchor_rank = Anchor_rank[best_forw_anchor];
    assert(Sa[anchor_rank]==anchor_pos);
    general_anchor_sort(a,n,anchor_pos,anchor_rank,min_forw_offset);
    goto done_sorting;
  }
  // ------ if backward anchor_sort is possible do it! ---------
  if(best_back_anchor>=0) {
    UChar *T0, *Ti; int j;

    assert(max_back_offset>-Anchor_dist && max_back_offset<0);
    // make sure that the offset is legal for all a[i]
    for(i=0;i<n;i++) {
      if(a[i]+max_back_offset<0) 
	goto fail;                    // illegal offset, give up
    }
    // make sure that a[0] .. a[n-1] are preceded by the same substring
    T0 = Text + a[0];
    for(i=1;i<n;i++) {
      Ti = Text + a[i];
      for(j=max_back_offset; j<= -1; j++)
	if(T0[j]!=Ti[j]) goto fail;   // mismatch, give up
    }
    // backward anchor sorting is possible
    Calls_anchor_sort_backw++;
    anchor_pos = a[back_anchor_index] + max_back_offset;
    anchor_rank = Anchor_rank[best_back_anchor];
    assert(Sa[anchor_rank]==anchor_pos);
    general_anchor_sort(a,n,anchor_pos,anchor_rank,max_back_offset);
    goto done_sorting;
  }
 fail:
  // ----- try forward anchor_sort with anchor in the same bucket
  if(best_forw_anchor_buc>=0 && min_forw_offset_buc<depth-1) {
    int equal,lower,upper;

    assert(min_forw_offset_buc<2*Anchor_dist);
    anchor_pos = a[forw_anchor_index_buc] + min_forw_offset_buc;
    anchor_rank = Anchor_rank[best_forw_anchor_buc];
    assert(Sa[anchor_rank]==anchor_pos);

    // establish how many suffixes can be sorted using anchor_sort()
    equal=split_group(a,n,depth,min_forw_offset_buc,
                                forw_anchor_index_buc,&lower);
    if(equal==n) {
      Calls_anchor_sort_forw++;
      general_anchor_sort(a,n,anchor_pos,anchor_rank,min_forw_offset_buc);
    }
    else {
      //  -- a[0] ... a[n-1] are split into 3 groups: lower, equal, upper
      upper = n-equal-lower;
      assert(upper>=0);
      // printf("Warning! lo=%d eq=%d up=%d a=%x\n",lower,equal,upper,(int)a);
      // sort the equal group 
      Calls_anchor_sort_forw++;
      if(equal>1)
	general_anchor_sort(a+lower,equal,anchor_pos,anchor_rank,
			    min_forw_offset_buc);

      // sort upper and lower groups using deep_sort
      if(lower>1) pseudo_or_deep_sort(a,lower,depth);
      if(upper>1) pseudo_or_deep_sort(a+lower+equal,upper,depth);
    }       // end if(equal==n) ... else
    goto done_sorting;
  }         // end hard case

  // ---------------------------------------------------------------
  // If we get here it means that everything failed
  // In this case we simply deep_sort a[0] ... a[n-1]
  // ---------------------------------------------------------------
  pseudo_or_deep_sort(a, n, depth);
 done_sorting:
  // -------- update Anchor_rank[], Anchor_offset[] ------- 
  if(Anchor_dist>0) update_anchors(a, n);
}
示例#2
0
文件: layout.c 项目: ekg/mars
mat mars(Agraph_t* g, struct marsopts opts)
{
    int i, j, n = agnnodes(g), k = MIN(n, MAX(opts.k, 2)), iter = 0;
    mat dij, u, u_trans, q, r, q_t, tmp, tmp2, z;
    double* s = (double*) malloc(sizeof(double)*k);
    double* ones = (double*) malloc(sizeof(double)*n);
    double* d;
    int* anchors = (int*) malloc(sizeof(int)*k);
    int* clusters = NULL;
    double change = 1, old_stress = -1;
    dij = mat_new(k, n);
    u = mat_new(n,k);
    tmp = mat_new(n,k);
    darrset(ones,n,-1);
    
    select_anchors(g, dij, anchors, k);
    if(opts.color) {
        for(i = 0; i < k; i++) {
            Agnode_t* anchor = get_node(anchors[i]);
            agset(anchor, "color", "red");
        }
    }
    if(opts.power != 1) {
        clusters = graph_cluster(g,dij,anchors);
    }

    singular_vectors(g, dij, opts.power, u, s);
    vec_scalar_mult(s, k, -1);
    u_trans = mat_trans(u);
    d = mat_mult_for_d(u, s, u_trans, ones);
    for(i = 0; i < u->c; i++) {
        double* col = mat_col(u,i);
        double* b = inv_mul_ax(d,col,u->r);
        for(j = 0; j < u->r; j++) {
            tmp->m[mindex(j,i,tmp)] = b[j];     
        }
        free(b);
        free(col);
    }
    tmp2 = mat_mult(u_trans,tmp);
    for(i = 0; i < k; i++) {
        tmp2->m[mindex(i,i,tmp2)] += (1.0/s[i]);
    }
    q = mat_new(tmp2->r, tmp2->c);
    r = mat_new(tmp2->c, tmp2->c);
    qr_factorize(tmp2,q,r);
    q_t = mat_trans(q);

    if(opts.given) {
        z = get_positions(g, opts.dim);
    } else {
        z = mat_rand(n, opts.dim);
    }
    translate_by_centroid(z);
   
    if(opts.viewer) {
        init_viewer(g, opts.max_iter);
        append_layout(z);
    }
     
    old_stress = stress(z, dij, anchors, opts.power);
    while(change > EPSILON && iter < opts.max_iter) {
        mat right_side;
        double new_stress;
        
        if(opts.power == 1) {
            right_side = barnes_hut(z);
        } else {
            right_side = barnes_hut_cluster(z, dij, clusters, opts.power);
        }
        for(i = 0; i < opts.dim; i++) {
            double sum = 0;         
            double* x;
            double* b = mat_col(right_side,i);
            for(j = 0; j < right_side->r; j++) {
                sum += b[j];
            }
            x = inv_mul_full(d, b, right_side->r, u, u_trans, q_t, r);
            for(j = 0; j < z->r; j++) {
                z->m[mindex(j,i,z)] = x[j] - sum/right_side->r;
            }
            free(x);
            free(b);
        }
        
        adjust_anchors(g, anchors, k, z);
        update_anchors(z, dij, anchors, opts.power);
        translate_by_centroid(z);
   
        if(opts.viewer) {
            append_layout(z);
        }
         
        new_stress = stress(z, dij, anchors, opts.power);
        change = fabs(new_stress-old_stress)/old_stress;
        old_stress = new_stress;
        
        mat_free(right_side);
        iter++;
    }
    
    mat_free(dij);
    mat_free(u);
    mat_free(u_trans);
    mat_free(q);
    mat_free(r);
    mat_free(q_t);
    mat_free(tmp);
    mat_free(tmp2);
    free(s);
    free(ones);
    free(d);
    free(anchors);
    free(clusters);
    
    return z;
}