int main(int argc, char** argv) { /** * format: ./convpoly [*.csv] */ polygons p; if(argc<2) { readpolygons(&p); printpolygons(&p); } else { if(access(argv[1], F_OK) == -1) { fprintf(stderr, "Error: file %s does not exist.\n", argv[1]); exit(1); } /* Read and convert the csv file to a polygons type */ FILE* f; f = fopen(argv[1], "r"); readcsv(f, &p); fclose(f); writepolygons(&p); printf("Wrote to polygons/all.ply.\n"); } return 0; }
int main(int argc, char** argv) { char* content; Uint contentlen,i,j; stringset_t *set, *set2, **csv; int space; content = readfile(&space, "casessmall.rtxt", &contentlen); printf("read file of length: %d\n", contentlen); set = tokensToStringset(&space, "\n", content, contentlen); FREEMEMORY(&space, content); printf("file contained %d lines\n", set->noofstrings); for(i=0; i < set->noofstrings; i++) { set2 = tokensToStringset(&space, ",", set->strings[i].str, set->strings[i].len); for(j=0; j < set2->noofstrings; j++) { printf("%s,", set2->strings[j].str); } printf("\n"); destructStringset(&space, set2); } destructStringset(&space, set); printf("reading csv ...\n"); csv = readcsv(&space, "cases.rtxt", ",", &contentlen); for (i=0; i < contentlen; i++) { for(j=0; j < csv[i]->noofstrings; j++) { printf("%s",csv[i]->strings[j].str); } destructStringset(&space, csv[i]); printf("\n"); } FREEMEMORY(&space, csv); return EXIT_SUCCESS; }
int main(int argc, char** argv) { Uint noofvecs, i; Sint optindex, c; vector_t info; #ifdef MEMMAN_H Spacetable spacetab; #endif void *space = NULL; char *url = NULL; char *outpath = NULL; char *pveclistfile = NULL; char *alphabetfile = NULL; char *vecext="vec"; char *seqext="seq"; struct prob_vec *p_vec; IntSequence *sequence; FAlphabet *alphabet; stringset_t *tok; stringset_t **fn; #ifdef MEMMAN_H initmemoryblocks(&spacetab, 1000); space = &spacetab; #endif while(1) { c=getopt_long(argc, argv, "f:a:o:", long_options, &optindex); if (c==-1) break; switch(c) { case 'f': pveclistfile = optarg; break; case 'a': alphabetfile = optarg; break; case 'o': outpath = optarg; break; default: usage(argv[0]); exit (EXIT_FAILURE); } } if (pveclistfile==NULL || alphabetfile == NULL) { usage(argv[0]); exit (EXIT_FAILURE); } fn=readcsv(space, pveclistfile, ".", &noofvecs); alphabet = loadCSValphabet(space, alphabetfile); sortMapdomain(space, alphabet); for(i=0; i<noofvecs; i++) { INITVECTOR(&info); SETSTR(fn[i],0) = concatdelim(space, SETSTR(fn[i],0), vecext, SETSTRLEN(fn[i],0), 3, '.'); p_vec = prob_vec_read (SETSTR(fn[i],0)); if (p_vec->mship == NULL) prob_vec_expand(p_vec); sequence = encode_prob_vec(space, alphabet, p_vec, 0, 0, cantorchar, &info); sequence->info = (Uint*) info.elements; sequence->namelen= strlen(alphabetfile); COPYSTR(space, sequence->alphabetname, alphabetfile, strlen(alphabetfile)); /*this is a potential security risk*/ if (p_vec->compnd_len > 0) { sequence->descrlen = p_vec->compnd_len-1; COPYSTR(space, sequence->description, p_vec->compnd, p_vec->compnd_len-1); } else { sequence->descrlen = 14; COPYSTR(space, sequence->description, "descriptor n/a", 14); } sequence->urllen = SETSTRLEN(fn[i],0); COPYSTR(space, sequence->url, SETSTR(fn[i],0), SETSTRLEN(fn[i],0)); SETSTR(fn[i],0) = concatdelim(space, SETSTR(fn[i],0), seqext, SETSTRLEN(fn[i],0), 3, '.'); SETSTRLEN(fn[i],0) += 4; if (outpath) { tok = tokensToStringset(space, "/", SETSTR(fn[i],0), SETSTRLEN(fn[i],0)); COPYSTR(space, url, outpath, strlen(outpath)); url = concat(space, url, SETSTR(tok, tok->noofstrings-1), strlen(url), SETSTRLEN(tok, tok->noofstrings-1)); saveSequence(sequence, url); destructStringset(space, tok); FREEMEMORY(space, url); url = NULL; } else { saveSequence(sequence, SETSTR(fn[i],0)); } destructSequence (space, sequence); prob_vec_destroy (p_vec); destructStringset (space, fn[i]); progressBarVT("probability vectors converted", noofvecs-1, i, 25); } printf("\nexit.\n"); FREEMEMORY(space, fn); destructAlphabet(space, alphabet); return EXIT_SUCCESS; }
int main (int argc, char** argv) { Sint optindex, c; unsigned char depictsw=0; unsigned char wurst=1; unsigned char gnuplot=0; Uint i, j, noofseqs=0, nooffreqs=0, noofqueries=0; Uint noofhits=100; Uint substrlen = 10; Uint minseeds = 5; Uint maxmatches = 10000; char *seq, *vec, *bin; imbissinfo *imbiss; void *space = NULL; double *scores = NULL; int swscores[2]={3,-2}; char *pveclistfile=NULL; char *alphabetfile=NULL; char *inputfile=NULL; char *batchfile = NULL; char *subfile=NULL; char *reportfile = NULL; int (*handler) (void *, Matchtype *, IntSequence **, Uint, Uint, void *) = allscores; double (*filter) (void *, Matchtype *, IntSequence *, IntSequence *, Uint *, Uint, Uint, void *) = swconstfilter; Matchtype* (*select) (void *, Matchtype *, Uint k, IntSequence *, IntSequence **, void *) = selectSW; stringset_t **fn, **freq, *queryurl, **queries=NULL; Suffixarray *arr = NULL; IntSequence **sequences = NULL; IntSequence *input = NULL; FAlphabet *alphabet = NULL; PairSint *matches=NULL; Uint percent=0; time_t startsuf, endsuf; double difsuf, difmatch, difrank; #ifdef MEMMAN_H Spacetable spacetab; initmemoryblocks(&spacetab, 100000); space = &spacetab; #endif while(1) { c=getopt_long(argc, argv, "SAghFGBLM:D:r:m:x:n:p:b:s:a:q:l:c:dvw", long_options, &optindex); if (c==-1) break; switch(c) { case 'r': reportfile=optarg; break; case 'v': verbose_flag=1; break; case 'd': depictsw = 1; break; case 's': pveclistfile = optarg; break; case 'a': alphabetfile = optarg; break; case 'q': inputfile = optarg; noofqueries = 1; break; case 'l': substrlen = atoi(optarg); break; case 'c': minseeds = atoi(optarg); break; case 'b': batchfile = optarg; break; case 'p': percent = atoi(optarg); break; case 'x': subfile = optarg; break; case 'n': noofhits = atoi(optarg); break; case 'w': wurst = 0; break; case 'B': filter = scorefilter; select = selectBlastScore; break; case 'S': filter = scorefilter; select = selectScore; break; case 'A': filter = swconstfilter; select = selectSW; break; case 'F': filter = scorefilter; select = selectScoreSWconst; break; case 'G': filter = scorefilter; select = selectBlastScoreSWconst; break; case 'M': swscores[0]=atoi(optarg); break; case 'L': handler = latexscores; break; case 'D': swscores[1]=atoi(optarg); break; case 'g': gnuplot = 1; break; case 'm': maxmatches=atoi(optarg); break; case 'h': default: usage(argv[0]); exit (EXIT_FAILURE); } } if (pveclistfile==NULL || (inputfile == NULL && batchfile==NULL) || alphabetfile == NULL) { usage(argv[0]); exit (EXIT_FAILURE); } imbiss = ALLOCMEMORY(space, NULL, imbissinfo, 1); imbiss->reportfile = reportfile; imbiss->swscores = swscores; imbiss->noofhits = noofhits; imbiss->minseeds = minseeds; imbiss->wurst = wurst; /*read batch file*/ if (batchfile) { queries = readcsv(space, batchfile, "", &noofqueries); } /*read substitution matrix*/ if (subfile) { freq=readcsv(space, subfile,",", &nooffreqs); scores = ALLOCMEMORY(space, NULL, double, ((nooffreqs-1)*(nooffreqs-1)) ); for(i=1; i < nooffreqs; i++) { for(j=1; j < nooffreqs; j++) { if(strcmp(SETSTR(freq[i],j),"inf")==0){ MATRIX2D(scores, nooffreqs-1, i, j)=0; }else{ MATRIX2D(scores, nooffreqs-1, i, j)=atof(SETSTR(freq[i],j)); } } } } /*read alphabet*/ if (alphabetfile != NULL) { alphabet = loadCSValphabet(space, alphabetfile); sortMapdomain(space, alphabet); } /*load sequence database*/ fn=readcsv(space, pveclistfile, "", &noofseqs); sequences = ALLOCMEMORY(space, NULL, IntSequence *, noofseqs); for(i=0; i < noofseqs; i++) { sequences[i] = loadSequence(space, SETSTR(fn[i],0)); } for (i=0; i < noofseqs; i++) { destructStringset(space, fn[i]); } FREEMEMORY(space, fn); /*construct the suffix array*/ time (&startsuf); arr = constructSufArr(space, sequences, noofseqs, NULL); constructLcp(space, arr); time (&endsuf); difsuf = difftime (endsuf, startsuf); /*do search*/ for (i=0; i < noofqueries; i++) { /*get query form batchfile*/ if (queries) { inputfile = SETSTR(queries[i],0); } /*typically only used with batchfile*/ if (percent != 0) { substrlen = ((double)((double)input->length/100)*(double) percent); } input = loadSequence(space, inputfile); //seq = printSequence (space, input, 60); printf(">IMBISS order delivered\n"); //printf("%s\n",seq); printf("%s\n", input->url); //FREEMEMORY(space, seq); time (&startsuf); matches=sufSubstring(space, arr, input->sequence, input->length, substrlen); time (&endsuf); difmatch = difftime (endsuf, startsuf); /*get prob vector url for salami/wurst*/ //printf("%.*s\n", 5, input->url + 58); vec = malloc(sizeof(char)*66); sprintf(vec, "/smallfiles/public/no_backup/bm/pdb_all_vec_6mer_struct/%5s.vec\0", input->url+56); bin = malloc(sizeof(char)*54); sprintf(bin, "/smallfiles/public/no_backup/bm/pdb_all_bin/%5s.bin\0", input->url+56); queryurl = initStringset(space); addString(space, queryurl, bin, strlen(bin)); addString(space, queryurl, vec, strlen(vec)); getimbissblast(space, input, sequences, noofseqs, alphabet, imbiss); imbiss->query = queryurl; imbiss->substrlen = substrlen; imbiss->alphabet = alphabet; /*if a substition file was given ...*/ if (subfile) { imbiss->sub = createsubmatrix(scores, imbiss->score, nooffreqs-1); } /*match 'n' report*/ time (&startsuf); imbiss->consensus = ALLOCMEMORY(space, NULL, Uint, (input->length-substrlen)); memset(imbiss->consensus, 0, (sizeof(Uint)*(input->length-substrlen))); rankSufmatch(space, arr, matches, input->length-substrlen, maxmatches, substrlen, sequences, noofseqs, filter, select, handler, input, imbiss, scores, depictsw); if (gnuplot) { consensus (space, imbiss->consensus, input->length-substrlen, input, substrlen, imbiss); } time (&endsuf); difrank = difftime (endsuf, startsuf); printf ("Building the suffixtree has taken %f seconds.\n", difsuf); printf ("Match the suffixtree has taken %f seconds.\n", difmatch); printf ("Rank the suffixtree has taken %f seconds.\n", difrank); /*partial cleanup*/ //destructStringset(space, queryurl); destructSequence(space, input); if(subfile) { FREEMEMORY(space, imbiss->sub); } FREEMEMORY(space, imbiss->consensus); FREEMEMORY(space, imbiss->score); FREEMEMORY(space, matches); free(bin); free(vec); } /*final cleanup*/ for (i=0; i < noofseqs; i++) { destructSequence(space, sequences[i]); } FREEMEMORY(space, sequences); destructSufArr(space, arr); #ifdef MEMMAN_H activeblocks(space); #endif printf("Goodbye.\n"); return EXIT_SUCCESS; }
int main (int argc, char *argv[]) { if (argc < 3) { fprintf (stderr, "usage: kmeans <data> <centroids>\n"); return 1; } // read data from file int d_n=0, d_m=0; double data [MAX_N][MAX_M] = { 0 }; readcsv (argv[1], data, &d_n, &d_m); // read centroids from file int c_n=0, c_m=0; double centroids [MAX_N][MAX_M] = { 0 }; readcsv (argv[2], centroids, &c_n, &c_m); if (d_n < c_n) { fprintf (stderr, "error: More clusters than data\n"); return -1; } if (d_m != c_m) { fprintf (stderr, "error: Data and centroid dimensionalities differ\n"); return -1; } int clusters [d_n]; // assigned cluster of datum int cluster_pop [c_n]; // population of cluster double data_sum [c_n][c_m]; // data column sum of cluster double cent_last [c_n][c_m]; // centroids (last iteration) int i, j, k; double dist, dist_min, c; // // Iterate, converge absolutely do // { // For each observation, calculate the distance from // each centroid and assign it to the nearest one memset (cluster_pop, 0, c_n*sizeof(int)); for (i = 0; i < d_n; ++i) { dist_min = INFINITY; // assume distance minimum is +inf for (j = 0; j < c_n; ++j) { dist = 0.0; // calculate distance to centroid j for (k = 0; k < d_m; ++k) { c = data[i][k] - centroids[j][k]; dist += c * c; } // minimize distance if (dist < dist_min) { dist_min = dist; clusters[i] = j; } } // add element its nearest cluster ++cluster_pop[clusters[i]]; } // // Calculate new centroids memset (data_sum, 0, c_n*c_m*sizeof(double)); // sum data columns per cluster for (i = 0; i < d_n; ++i) for (k = 0; k < d_m; ++k) data_sum[clusters[i]][k] += data[i][k]; // centroid coordinate is mean of corresponding column for (j = 0; j < c_n; ++j) { for (k = 0; k < d_m; ++k) { cent_last[j][k] = centroids[j][k]; centroids[j][k] = data_sum[j][k] / cluster_pop[j]; } } // displacement-->0 (n-->inf) } while (norm(*centroids, c_n, c_m) - norm(*cent_last, c_n, c_m) > TOL); // // Printout final centroid coordinates printf ("data (%ix%i)\n", d_n, d_m); for (i = 0; i < d_n; ++i) { printf ("%5i { ", i); for (j = 0; j < d_m; ++j) printf ("%7.3f ", data[i][j]); printf ("} --> %2i\n", clusters[i]); } printf ("centroids (%ix%i)\n", c_n, c_m); for (j = 0; j < c_n; ++j) { printf ("%5i { ", j); for (k = 0; k < c_m; ++k) printf ("%7.3f ", centroids[j][k]); printf ("}\n"); } return 0; }