/* * * * TODO check for memory allocation failure */ Stringdist *open_stringdist(Distance d, int str_len_a, int str_len_b, ...){ va_list args; va_start(args, str_len_b); Stringdist *S = (Stringdist *) malloc(sizeof(Stringdist)); (*S) = (Stringdist) {d, NULL, NULL, NULL, NULL, 0L, 0.0, 0L}; switch (d){ case osa : S->work = (double *) malloc( (str_len_a + 1) * (str_len_b + 1) * sizeof(double)); S->weight = (double *) malloc(4*sizeof(double)); memcpy(S->weight, va_arg(args, double *), 4*sizeof(double)); break; case lv : S->work = (double *) malloc( (str_len_a + 1) * (str_len_b + 1) *sizeof(double)); S->weight = (double *) malloc(3 * sizeof(double)); memcpy(S->weight, va_arg(args, double *), 3*sizeof(double)); break; case dl : S->dict = new_dictionary( str_len_a + str_len_b + 1); S->work = (double *) malloc( (str_len_a + 3) * (str_len_b + 3) * sizeof(double)); S->weight = (double *) malloc(4*sizeof(double)); memcpy(S->weight, va_arg(args, double *), 4*sizeof(double)); break; case hamming : break; case lcs : S->work = (double *) malloc( (str_len_a + 1) * (str_len_b + 1) *sizeof(double)); break; case qgram : S->q = va_arg(args, unsigned int); S->tree = new_qtree(S->q, 2L); break; case cosine : S->q = va_arg(args, unsigned int); S->tree = new_qtree(S->q, 2L); break; case jaccard : S->q = va_arg(args, unsigned int); S->tree = new_qtree(S->q, 2L); break; case jw : // S->work = (double *) malloc( sizeof(double) * MAX(str_len_a,str_len_b)); S->work = (double *) malloc( sizeof(double) * (str_len_a+str_len_b)); S->weight = (double *) malloc(3L*sizeof(double)); memcpy(S->weight, va_arg(args, double *), 3*sizeof(double)); S->p = va_arg(args, double); break; case soundex : break; default : break; //TODO: set errno, return NULL }; va_end(args); return S; }
int main() { dict *list,*head; list = new_dictionary(); add_item(list,3); add_item(list,3); add_item(list,3); add_item(list,2); add_item(list,2); add_item(list,3); add_item(list,3); add_item(list,2); add_item(list,4); add_item(list,2); print_list(list); putchar('\n'); sort_by_value(list); print_list(list); getch(); return 0; }
int main(int argc, char *argv[]) { /* setup */ char *path = "./data/ct/"; char *path2 = "/home/gyc/Sources/linux.doc/kernel/"; vector_char str; vector_char_init(&str,100); VECTOR(str)[0] = '\0'; SetupLexer(); dic_setup(); struct dictionary *dict = new_dictionary(10000); graph_t lkn; vector_int edges; catch_function_call_dir(path, dict, &edges); printf("capacity=%d,size=%d\n",dict_capacity(dict), dict_size(dict)); new_graph(&lkn, &edges, 0, GRAPH_DIRECTED); struct dictionary *filedict = new_dictionary(4); vector_funcP flist; vector_funcP_init_(&flist, dict_size(dict)); get_function_filename(path2, dict, filedict, &flist); printf("filedict: capacity=%d,size=%d\n",dict_capacity(filedict), dict_size(filedict)); /* reciprocal */ printf("reciprocal = %f \n", graph_reciprocal(&lkn)); vector_double res; vector_double_init(&res, 0); graph_betweenness(&lkn, &res, graph_vss_all(), GRAPH_DIRECTED); printf("betweenness directed:"); print_vector_double(&(res),stdout); vector_double_destroy(&res); /* degree */ graph_degree(&lkn, &edges, graph_vss_all(), GRAPH_OUT, GRAPH_NO_LOOPS); printf(">>>out, no loops"); int min, max, sum; double ave; graph_degree_minmax_avesum(&lkn, &min, &max, &ave, &sum, GRAPH_OUT, GRAPH_NO_LOOPS); printf("minout=%d\nmaxout=%d\nsumout=%d\naveout=%f\n",min,max,sum,ave); graph_degree_minmax_avesum(&lkn, &min, &max, &ave, &sum, GRAPH_IN, GRAPH_NO_LOOPS); printf("minin=%d\nmaxin=%d\nsumin=%d\navein=%f\n",min,max,sum,ave); /* fast community */ graph_reverse(&lkn); vector_int v1; vector_int_init(&v1,0); int ncom = 0; double modularity = graph_community_fastgreedy(&lkn, &v1, &ncom); printf("modularity = %f,ncom = %d\n",modularity,ncom); FILE *f = fopen("funccom.fc.xlsx","w"); fprintf(f, "comID\tname\n"); for (int i = 0; i < dict_size(dict);i++) { fprintf(f, "%d\t", VECTOR(v1)[i]); struct rb_node* e = dict_ele(dict, i); dic_traceback_string(e, &str); fprintf(f, "%s\n",VECTOR(str)); } fclose(f); f = fopen("comID.fc.xlsx","w"); output_com_filename(&flist, &v1, graph_vertices_count(&lkn), ncom, filedict, f); fclose(f); //print_vector_int(&v1, stdout); print_communities(&lkn, &v1, "community.fc.xlsx", "comedge.fc.xlsx"); vector_funcP_destroy(&flist); vector_int_destroy(&v1); vector_char_destroy(&str); vector_int_destroy(&edges); graph_destroy(&lkn); return 0; }
SEXP R_match_dl(SEXP x, SEXP table, SEXP nomatch, SEXP matchNA, SEXP weight, SEXP maxDistance){ PROTECT(x); PROTECT(table); PROTECT(nomatch); PROTECT(matchNA); PROTECT(weight); PROTECT(maxDistance); int nx = length(x) , ntable = length(table) , no_match = INTEGER(nomatch)[0] , match_na = INTEGER(matchNA)[0] , bytes = IS_CHARACTER(x) , ml_x = max_length(x) , ml_t = max_length(table); double *w = REAL(weight); double maxDist = REAL(maxDistance)[0]; /* claim space for workhorse */ dictionary *dict = new_dictionary( ml_x + ml_t + 1 ); double *scores = (double *) malloc( (ml_x + 3) * (ml_t + 2) * sizeof(double) ); unsigned int *X = NULL, *T = NULL; X = (unsigned int *) malloc( (ml_x + ml_t + 2) * sizeof(int) ); if ( (scores == NULL) || (X == NULL) ){ UNPROTECT(6); free(X); free(scores); error("Unable to allocate enough memory"); } T = X + ml_x + 1; memset(X, 0, (ml_x + ml_t + 2)*sizeof(int)); // output vector SEXP yy; PROTECT(yy = allocVector(INTSXP, nx)); int *y = INTEGER(yy); double d = R_PosInf, d1 = R_PosInf; int index, len_X, len_T, isna_X, isna_T; unsigned int *X1, *T1; for ( int i=0; i<nx; i++){ index = no_match; if ( bytes ){ X = get_elem(x, i , bytes, &len_X, &isna_X, X); } else { X1 = get_elem(x, i , bytes, &len_X, &isna_X, X); memcpy(X, X1, len_X*sizeof(int)); } d1 = R_PosInf; for ( int j=0; j<ntable; j++){ if ( bytes ){ T = get_elem(table, j, bytes, &len_T, &isna_T, T); } else { T1 = get_elem(table, j, bytes, &len_T, &isna_T, T); memcpy(T, T1, len_T * sizeof(int)); } if ( !isna_X && !isna_T ){ // both are char (usual case) d = distance( X, T, len_X, len_T, w, dict, scores ); memset(T,0, (ml_t+1)*sizeof(int)); if ( d <= maxDist && d < d1){ index = j + 1; if ( abs(d) < 1e-14 ) break; d1 = d; } } else if ( isna_X && isna_T ) { // both are NA index = match_na ? j + 1 : no_match; break; } } y[i] = index; memset(X,0,(ml_x + 1)*sizeof(int)); } UNPROTECT(7); free(X); free_dictionary(dict); free(scores); return(yy); }
SEXP R_dl(SEXP a, SEXP b, SEXP weight){ PROTECT(a); PROTECT(b); PROTECT(weight); int na = length(a) , nb = length(b) , nt = (na > nb) ? na : nb , bytes = IS_CHARACTER(a) , ml_a = max_length(a) , ml_b = max_length(b); double *w = REAL(weight); /* claim space for workhorse */ unsigned int *s=NULL, *t=NULL; dictionary *dict = new_dictionary( ml_a + ml_b + 1 ); double *scores = (double *) malloc( (ml_a + 3) * (ml_b + 2) * sizeof(double) ); int slen = (ml_a + ml_b + 2) * sizeof(int); s = (unsigned int *) malloc(slen); if ( (scores == NULL) | ( s == NULL ) ){ UNPROTECT(3); free(scores); free(s); error("Unable to allocate enough memory"); } t = s + ml_a + 1; memset(s, 0, slen); // output SEXP yy; PROTECT(yy = allocVector(REALSXP, nt)); double *y = REAL(yy); int i=0, j=0, len_s, len_t, isna_s, isna_t; unsigned int *s1, *t1; for ( int k=0; k < nt; ++k ){ if (bytes){ s = get_elem(a, i, bytes, &len_s, &isna_s, s); t = get_elem(b, j, bytes, &len_t, &isna_t, t); } else { // make sure there's an extra 0 at the end of the string. s1 = get_elem(a, i, bytes, &len_s, &isna_s, s); t1 = get_elem(b, j, bytes, &len_t, &isna_t, t); memcpy(s,s1,len_s*sizeof(int)); memcpy(t,t1,len_t*sizeof(int)); } if ( isna_s || isna_t ){ y[k] = NA_REAL; continue; } y[k] = distance( s, t, len_s, len_t, w, dict, scores ); if (y[k] < 0 ) y[k] = R_PosInf; i = RECYCLE(i+1,na); j = RECYCLE(j+1,nb); memset(s, 0, slen); } free_dictionary(dict); free(scores); free(s); UNPROTECT(4); return yy; }