Пример #1
0
/* 
 *
 *
 * TODO check for memory allocation failure
 */
Stringdist *open_stringdist(Distance d, int str_len_a, int str_len_b, ...){
  va_list args;
  va_start(args, str_len_b);

  Stringdist *S = (Stringdist *) malloc(sizeof(Stringdist)); 
  (*S) = (Stringdist) {d, NULL, NULL, NULL, NULL, 0L, 0.0, 0L};
  switch (d){
    case osa :
      S->work = (double *) malloc( (str_len_a + 1) * (str_len_b + 1) * sizeof(double)); 
      S->weight = (double *) malloc(4*sizeof(double));
      memcpy(S->weight, va_arg(args, double *), 4*sizeof(double));
      break;
    case lv :
      S->work = (double *) malloc( (str_len_a + 1) * (str_len_b + 1) *sizeof(double));
      S->weight = (double *) malloc(3 * sizeof(double));
      memcpy(S->weight, va_arg(args, double *), 3*sizeof(double));
      break;
    case dl :
      S->dict = new_dictionary( str_len_a + str_len_b + 1);
      S->work = (double *) malloc( (str_len_a + 3) * (str_len_b + 3) * sizeof(double)); 
      S->weight = (double *) malloc(4*sizeof(double));
      memcpy(S->weight, va_arg(args, double *), 4*sizeof(double));
      break;
    case hamming :
      break;
    case lcs :
      S->work = (double *) malloc( (str_len_a + 1) * (str_len_b + 1) *sizeof(double));
      break;
    case qgram :
      S->q = va_arg(args, unsigned int);
      S->tree = new_qtree(S->q, 2L); 
      break;
    case cosine :
      S->q = va_arg(args, unsigned int);
      S->tree = new_qtree(S->q, 2L); 
      break;
    case jaccard :
      S->q = va_arg(args, unsigned int);
      S->tree = new_qtree(S->q, 2L); 
      break;
    case jw :
//      S->work = (double *) malloc( sizeof(double) * MAX(str_len_a,str_len_b));
      S->work = (double *) malloc( sizeof(double) * (str_len_a+str_len_b));

      S->weight = (double *) malloc(3L*sizeof(double));
      memcpy(S->weight, va_arg(args, double *), 3*sizeof(double));
      S->p = va_arg(args, double);
      break;
    case soundex :
      break;
    default :
      break;
      //TODO: set errno, return NULL
  };

  va_end(args);
  return S; 
  
}
Пример #2
0
int main()
{
	dict *list,*head;
	list = new_dictionary();
	add_item(list,3);
	add_item(list,3);
	add_item(list,3);
	add_item(list,2);
	add_item(list,2);
	add_item(list,3);
	add_item(list,3);
	add_item(list,2);
	add_item(list,4);
	add_item(list,2);
	print_list(list);
	putchar('\n');
	sort_by_value(list);
	print_list(list);
	getch();
	return 0;
}
Пример #3
0
int main(int argc, char *argv[])
{
    /* setup */
    char *path = "./data/ct/";
	char *path2 = "/home/gyc/Sources/linux.doc/kernel/";
	vector_char str;
	vector_char_init(&str,100);
	VECTOR(str)[0] = '\0';

    SetupLexer();
    dic_setup();
    struct dictionary *dict = new_dictionary(10000);

    graph_t lkn;
    vector_int edges;
    catch_function_call_dir(path, dict, &edges);
    printf("capacity=%d,size=%d\n",dict_capacity(dict), dict_size(dict));
    new_graph(&lkn, &edges, 0, GRAPH_DIRECTED);

	struct dictionary *filedict = new_dictionary(4);
	vector_funcP flist;
	vector_funcP_init_(&flist, dict_size(dict));
	get_function_filename(path2, dict, filedict, &flist);
    printf("filedict: capacity=%d,size=%d\n",dict_capacity(filedict), dict_size(filedict));

	/* reciprocal */
	printf("reciprocal = %f \n", graph_reciprocal(&lkn));
	vector_double res;
	vector_double_init(&res, 0);
	graph_betweenness(&lkn, &res, graph_vss_all(), GRAPH_DIRECTED);
	printf("betweenness directed:"); print_vector_double(&(res),stdout);
	vector_double_destroy(&res);

	/* degree */
    graph_degree(&lkn, &edges, graph_vss_all(), GRAPH_OUT, GRAPH_NO_LOOPS);
    printf(">>>out, no loops");
	int min, max, sum;
	double ave;
	graph_degree_minmax_avesum(&lkn, &min, &max, &ave, &sum, GRAPH_OUT, GRAPH_NO_LOOPS);
	printf("minout=%d\nmaxout=%d\nsumout=%d\naveout=%f\n",min,max,sum,ave);
	graph_degree_minmax_avesum(&lkn, &min, &max, &ave, &sum, GRAPH_IN, GRAPH_NO_LOOPS);
	printf("minin=%d\nmaxin=%d\nsumin=%d\navein=%f\n",min,max,sum,ave);

	/* fast community */
	graph_reverse(&lkn);
	vector_int v1;
	vector_int_init(&v1,0);
	int ncom = 0;
	double modularity = graph_community_fastgreedy(&lkn, &v1, &ncom);
	
	printf("modularity = %f,ncom = %d\n",modularity,ncom);
	FILE *f = fopen("funccom.fc.xlsx","w");
	fprintf(f, "comID\tname\n");
	for (int i = 0; i < dict_size(dict);i++) {
		fprintf(f, "%d\t", VECTOR(v1)[i]);
		struct rb_node* e = dict_ele(dict, i);
		dic_traceback_string(e, &str);
		fprintf(f, "%s\n",VECTOR(str));
	}
	fclose(f);
	f = fopen("comID.fc.xlsx","w");
	output_com_filename(&flist, &v1, graph_vertices_count(&lkn), ncom, filedict, f);
	fclose(f);

	//print_vector_int(&v1, stdout);

	print_communities(&lkn, &v1, "community.fc.xlsx", "comedge.fc.xlsx");

	vector_funcP_destroy(&flist);
	vector_int_destroy(&v1);
	vector_char_destroy(&str);
    vector_int_destroy(&edges);
    graph_destroy(&lkn);
    return 0;
}
Пример #4
0
SEXP R_match_dl(SEXP x, SEXP table, SEXP nomatch, SEXP matchNA, SEXP weight, SEXP maxDistance){
  PROTECT(x);
  PROTECT(table);
  PROTECT(nomatch);
  PROTECT(matchNA);
  PROTECT(weight);
  PROTECT(maxDistance);

  int nx = length(x)
    , ntable = length(table)
    , no_match = INTEGER(nomatch)[0]
    , match_na = INTEGER(matchNA)[0]
    , bytes = IS_CHARACTER(x)
    , ml_x = max_length(x)
    , ml_t = max_length(table);

  double *w = REAL(weight);
  double maxDist = REAL(maxDistance)[0];
  
  /* claim space for workhorse */
  dictionary *dict = new_dictionary( ml_x + ml_t + 1 );
  double *scores = (double *) malloc( (ml_x + 3) * (ml_t + 2) * sizeof(double) );

  unsigned int *X = NULL, *T = NULL;

  X = (unsigned int *) malloc( (ml_x + ml_t + 2) * sizeof(int) );

  if ( (scores == NULL) ||  (X == NULL) ){
    UNPROTECT(6); free(X); free(scores); 
    error("Unable to allocate enough memory");
  }

  T = X + ml_x + 1;
  memset(X, 0, (ml_x + ml_t + 2)*sizeof(int));


  // output vector
  SEXP yy;
  PROTECT(yy = allocVector(INTSXP, nx));
  int *y = INTEGER(yy);

  double d = R_PosInf, d1 = R_PosInf;
  int index, len_X, len_T, isna_X, isna_T;
  unsigned int *X1, *T1;
  for ( int i=0; i<nx; i++){
    index = no_match;
    if ( bytes ){
      X = get_elem(x, i , bytes, &len_X, &isna_X, X);
    } else {
      X1 = get_elem(x, i , bytes, &len_X, &isna_X, X);
      memcpy(X, X1, len_X*sizeof(int));
    }
    d1 = R_PosInf;

    for ( int j=0; j<ntable; j++){
      if ( bytes ){
        T = get_elem(table, j, bytes, &len_T, &isna_T, T);
      } else {
        T1 = get_elem(table, j, bytes, &len_T, &isna_T, T);
        memcpy(T, T1, len_T * sizeof(int));
      }
      if ( !isna_X && !isna_T ){        // both are char (usual case)
        d = distance(
          X, T, len_X, len_T, w, dict, scores
        );
        memset(T,0, (ml_t+1)*sizeof(int));
        if ( d <= maxDist && d < d1){ 
          index = j + 1;
          if ( abs(d) < 1e-14 ) break;
          d1 = d;
        }
      } else if ( isna_X && isna_T ) {  // both are NA
        index = match_na ? j + 1 : no_match;
        break;
      }
    }
    
    y[i] = index;
    memset(X,0,(ml_x + 1)*sizeof(int));
  }  
  UNPROTECT(7);
  free(X);
  free_dictionary(dict);
  free(scores);
  return(yy);
}
Пример #5
0
SEXP R_dl(SEXP a, SEXP b, SEXP weight){
  PROTECT(a);
  PROTECT(b);
  PROTECT(weight);
   
  int na = length(a)
    , nb = length(b)
    , nt = (na > nb) ? na : nb
    , bytes = IS_CHARACTER(a)
    , ml_a = max_length(a)
    , ml_b = max_length(b);
  
  double *w = REAL(weight);

  /* claim space for workhorse */
  unsigned int *s=NULL, *t=NULL;
  dictionary *dict = new_dictionary( ml_a + ml_b + 1 );

  double *scores = (double *) malloc( (ml_a + 3) * (ml_b + 2) * sizeof(double) );

  int slen = (ml_a + ml_b + 2) * sizeof(int);
  s = (unsigned int *) malloc(slen);

  if ( (scores == NULL) | ( s == NULL ) ){
    UNPROTECT(3); free(scores); free(s);
    error("Unable to allocate enough memory");
  } 

  t = s + ml_a + 1;
  memset(s, 0, slen);


  // output
  SEXP yy; 
  PROTECT(yy = allocVector(REALSXP, nt));
  double *y = REAL(yy);

  int i=0, j=0, len_s, len_t, isna_s, isna_t;
  unsigned int *s1, *t1;
  for ( int k=0; k < nt; ++k ){
    if (bytes){
      s = get_elem(a, i, bytes, &len_s, &isna_s, s);
      t = get_elem(b, j, bytes, &len_t, &isna_t, t);
    } else { // make sure there's an extra 0 at the end of the string.
      s1 = get_elem(a, i, bytes, &len_s, &isna_s, s);
      t1 = get_elem(b, j, bytes, &len_t, &isna_t, t);
      memcpy(s,s1,len_s*sizeof(int));
      memcpy(t,t1,len_t*sizeof(int));
    }
    if ( isna_s || isna_t ){
      y[k] = NA_REAL;
      continue;
    }

    y[k] = distance(
     s, t, len_s, len_t,
     w, dict, scores
    );
    if (y[k] < 0 ) y[k] = R_PosInf;
    i = RECYCLE(i+1,na);
    j = RECYCLE(j+1,nb);
    memset(s, 0, slen);
  }
  
  free_dictionary(dict);
  free(scores);
  free(s);
  UNPROTECT(4);
  return yy;
}