示例#1
0
文件: convpoly.c 项目: oeo4b/krigmap
int main(int argc, char** argv) {
  /**
   * format: ./convpoly [*.csv]
   */
  polygons p;
  if(argc<2) {
    readpolygons(&p);
    printpolygons(&p);
  }
  else {
    if(access(argv[1], F_OK) == -1) {
      fprintf(stderr, "Error: file %s does not exist.\n", argv[1]);
      exit(1);
    }
    /* Read and convert the csv file to a polygons type */
    FILE* f;
    f = fopen(argv[1], "r");
    readcsv(f, &p);
    fclose(f);
    writepolygons(&p);
    printf("Wrote to polygons/all.ply.\n");
  }

  return 0;
}
 int main(int argc, char** argv) {
	char* content;
	Uint contentlen,i,j;
	stringset_t *set, *set2, **csv;
    int space;	


	content = readfile(&space, "casessmall.rtxt", &contentlen);
	printf("read file of length: %d\n", contentlen);
	set = tokensToStringset(&space, "\n", content, contentlen);
	FREEMEMORY(&space, content);
	printf("file contained %d lines\n", set->noofstrings);
	
	for(i=0; i < set->noofstrings; i++) {
		
		set2 = tokensToStringset(&space, ",", set->strings[i].str, set->strings[i].len);
		for(j=0; j < set2->noofstrings; j++) {
			printf("%s,", set2->strings[j].str);
		}
		printf("\n");
		destructStringset(&space, set2);
	}
	destructStringset(&space, set);
		
	printf("reading csv ...\n");
	csv = readcsv(&space, "cases.rtxt", ",", &contentlen);
	
	for (i=0; i < contentlen; i++) {
		for(j=0; j < csv[i]->noofstrings; j++) {
			printf("%s",csv[i]->strings[j].str);
		}
		destructStringset(&space, csv[i]);
		printf("\n");
	}
	
	FREEMEMORY(&space, csv);
	
	return EXIT_SUCCESS;
 }
 int 
 main(int argc, char** argv) 
 {
	Uint noofvecs, i;
	Sint optindex, c;
	vector_t info;
#ifdef MEMMAN_H	
	Spacetable spacetab;
#endif
	void *space = NULL;
	char *url = NULL;
	char *outpath = NULL;
	char *pveclistfile = NULL;
	char *alphabetfile = NULL;
	char *vecext="vec";
	char *seqext="seq";	
	struct prob_vec *p_vec;
	IntSequence *sequence;
	FAlphabet *alphabet;	
	stringset_t *tok;
	stringset_t **fn;
	
#ifdef MEMMAN_H 
	initmemoryblocks(&spacetab, 1000);
	space = &spacetab;
#endif

    while(1) {
		c=getopt_long(argc, argv, "f:a:o:", long_options, &optindex);
		if (c==-1) break;
		
		switch(c) {
			case 'f':
				pveclistfile = optarg;	
				break;
			case 'a':
				alphabetfile = optarg;	
				break;
			case 'o':
				outpath = optarg;
				break;
			default:
				usage(argv[0]);
				exit (EXIT_FAILURE);
		}

	}

	if (pveclistfile==NULL || alphabetfile == NULL) {
		usage(argv[0]);
		exit (EXIT_FAILURE);
	}
	
	fn=readcsv(space, pveclistfile, ".", &noofvecs);
	alphabet = loadCSValphabet(space, alphabetfile);
	sortMapdomain(space, alphabet);
   
	for(i=0; i<noofvecs; i++) 
	{	
	  
		INITVECTOR(&info);	
	  	SETSTR(fn[i],0) = concatdelim(space, SETSTR(fn[i],0), vecext,
							  SETSTRLEN(fn[i],0), 3, '.');
	   
		p_vec = prob_vec_read (SETSTR(fn[i],0));	
			
		if (p_vec->mship == NULL) 
			prob_vec_expand(p_vec);
			
		sequence = encode_prob_vec(space, alphabet, p_vec, 0, 0, 
									cantorchar, &info);
		sequence->info = (Uint*) info.elements;
		sequence->namelen= strlen(alphabetfile);
		
		COPYSTR(space, sequence->alphabetname, alphabetfile, 
							 strlen(alphabetfile));

		/*this is a potential security risk*/
		if (p_vec->compnd_len > 0) {
		  sequence->descrlen = p_vec->compnd_len-1; 
		  COPYSTR(space, sequence->description, p_vec->compnd, 
							 p_vec->compnd_len-1);
		} else {
		  sequence->descrlen = 14;			
		  COPYSTR(space, sequence->description, "descriptor n/a", 14);
		}

		sequence->urllen = SETSTRLEN(fn[i],0);
		COPYSTR(space, sequence->url, SETSTR(fn[i],0), 
						   SETSTRLEN(fn[i],0));
				
		SETSTR(fn[i],0) = concatdelim(space, SETSTR(fn[i],0), seqext,
							SETSTRLEN(fn[i],0), 3, '.');	
		SETSTRLEN(fn[i],0) += 4;
		
		if (outpath) {
			tok = tokensToStringset(space, "/", SETSTR(fn[i],0), 
					SETSTRLEN(fn[i],0));
			
			COPYSTR(space, url, outpath, strlen(outpath));
	
			url = concat(space, url, SETSTR(tok, tok->noofstrings-1), 
				strlen(url), SETSTRLEN(tok, tok->noofstrings-1));
		
			saveSequence(sequence, url);
			
			destructStringset(space, tok);
			FREEMEMORY(space, url);
			url = NULL;
		
		} else {
	
			saveSequence(sequence, SETSTR(fn[i],0));
		}
		
		destructSequence (space, sequence);		
		prob_vec_destroy (p_vec);			 
		destructStringset (space, fn[i]);
		progressBarVT("probability vectors converted", noofvecs-1, i, 25);	
	}
	
	printf("\nexit.\n");
	FREEMEMORY(space, fn);
	destructAlphabet(space, alphabet);
		
	return EXIT_SUCCESS;
 }
int
main (int argc, char** argv)
{	
 	Sint optindex, c;
	unsigned char depictsw=0;
	unsigned char wurst=1;
	unsigned char gnuplot=0;

	Uint i, j, noofseqs=0, nooffreqs=0, noofqueries=0;
	Uint noofhits=100;
	Uint substrlen = 10;
	Uint minseeds = 5;
	Uint maxmatches = 10000;
	char *seq, *vec, *bin;
	imbissinfo *imbiss;
	void *space = NULL;	
	double *scores = NULL;

	int swscores[2]={3,-2};
	char *pveclistfile=NULL;
	char *alphabetfile=NULL;	
	char *inputfile=NULL;
	char *batchfile = NULL;
	char *subfile=NULL;
	char *reportfile = NULL;

	 int (*handler)
	   (void *, Matchtype *, IntSequence **, Uint, 
		Uint, void *) = allscores;
	 
	 double (*filter) 
	   (void *, Matchtype *, IntSequence *, IntSequence *,
		 Uint *, Uint, Uint, void *) = swconstfilter;

	 Matchtype* (*select)
	   (void *, Matchtype *, Uint k, 
		IntSequence *, IntSequence **, void *) = selectSW;
  

	stringset_t **fn, **freq, *queryurl, **queries=NULL;
	Suffixarray *arr = NULL;	
	IntSequence **sequences = NULL;
	IntSequence *input = NULL;
	FAlphabet *alphabet = NULL;		
    PairSint *matches=NULL;
	Uint percent=0;
	
	time_t startsuf, endsuf; 
	double difsuf, difmatch, difrank;	

#ifdef MEMMAN_H 	
	Spacetable spacetab;
	initmemoryblocks(&spacetab, 100000);
	space = &spacetab;
#endif
	
	while(1) 
	{
		c=getopt_long(argc, argv, "SAghFGBLM:D:r:m:x:n:p:b:s:a:q:l:c:dvw", 
						long_options, &optindex);
		if (c==-1) break;
		
		switch(c) {
		    case 'r':
			  reportfile=optarg;
			  break;
		    case 'v':
			  verbose_flag=1;	
			  break;
		    case 'd':
			  	depictsw = 1;
				break;
		    case 's':
				pveclistfile = optarg;	
				break;
			case 'a':
				alphabetfile = optarg;	
				break;
			case 'q':
				inputfile = optarg;
				noofqueries = 1;
				break;
			case 'l':
				substrlen = atoi(optarg);
				break;
			case 'c':
			  	minseeds = atoi(optarg);
			    break;
			case 'b':
				batchfile = optarg;
				break;
			case 'p':
				percent = atoi(optarg);
				break;
			case 'x':
				subfile = optarg;
				break;
			case 'n':
				noofhits = atoi(optarg);
				break;
			case 'w':
				wurst = 0;
				break;
			case 'B':
				filter = scorefilter;
				select = selectBlastScore;
				break;
			case 'S':
				filter = scorefilter;
				select = selectScore;
				break;
			case 'A':
				filter = swconstfilter;
				select = selectSW;
				break;
			case 'F':
				filter = scorefilter;
				select = selectScoreSWconst;
				break;
			case 'G':
				filter = scorefilter;
				select = selectBlastScoreSWconst;
				break;
			case 'M':
				swscores[0]=atoi(optarg);
				break;
			case 'L':
				handler = latexscores;
				break;
			case 'D':
				swscores[1]=atoi(optarg);
				break;
			case 'g':
				gnuplot = 1;
				break;
			case 'm':
				maxmatches=atoi(optarg);
				break;
			case 'h':
			default:
				usage(argv[0]);
				exit (EXIT_FAILURE);
		}
	}
	if (pveclistfile==NULL || (inputfile == NULL && batchfile==NULL)
		|| alphabetfile == NULL) {
		usage(argv[0]);
		exit (EXIT_FAILURE);
	}
	
	imbiss = ALLOCMEMORY(space, NULL, imbissinfo, 1);
	imbiss->reportfile = reportfile;
	imbiss->swscores = swscores;
	imbiss->noofhits = noofhits;
	imbiss->minseeds = minseeds; 	
	imbiss->wurst = wurst; 
	
	/*read batch file*/
	if (batchfile) {
		queries = readcsv(space, batchfile, "", &noofqueries);	
	}

	/*read substitution matrix*/
	if (subfile) {
		freq=readcsv(space, subfile,",", &nooffreqs);
		scores = ALLOCMEMORY(space, NULL, double, ((nooffreqs-1)*(nooffreqs-1)) );
		for(i=1; i < nooffreqs; i++) {
			for(j=1; j < nooffreqs; j++) {
				if(strcmp(SETSTR(freq[i],j),"inf")==0){
					MATRIX2D(scores, nooffreqs-1, i, j)=0;
				}else{
					MATRIX2D(scores, nooffreqs-1, i, j)=atof(SETSTR(freq[i],j));
				}
			}
		}
	}
	
	/*read alphabet*/	
	if (alphabetfile != NULL) {
		alphabet = loadCSValphabet(space, alphabetfile);
		sortMapdomain(space, alphabet);
    }

	
	/*load sequence database*/
	fn=readcsv(space, pveclistfile, "", &noofseqs);
	sequences = ALLOCMEMORY(space, NULL, IntSequence *, noofseqs);
	for(i=0; i < noofseqs; i++) 
	{		  
		sequences[i] = loadSequence(space, SETSTR(fn[i],0));		
	}

	for (i=0; i < noofseqs; i++) {	
	  	destructStringset(space, fn[i]);
	}
	FREEMEMORY(space, fn);
	
	
	/*construct the suffix array*/
	time (&startsuf);
	arr = constructSufArr(space, sequences, noofseqs, NULL);
        constructLcp(space, arr); 	
   	time (&endsuf);
	difsuf = difftime (endsuf, startsuf);


	/*do search*/
    for (i=0; i < noofqueries; i++) {
	  
	    /*get query form batchfile*/
	  	if (queries) {
			inputfile = SETSTR(queries[i],0);
		}	
		
		/*typically only used with batchfile*/
		if (percent != 0) {
			substrlen = 
			  ((double)((double)input->length/100)*(double) percent);
		}
			
		input = loadSequence(space, inputfile);
		//seq = printSequence (space, input, 60); 
		printf(">IMBISS order delivered\n");	
		//printf("%s\n",seq);
		printf("%s\n", input->url); 
		//FREEMEMORY(space, seq);	
		
		time (&startsuf);
		matches=sufSubstring(space, arr, input->sequence, input->length, substrlen);	 
		time (&endsuf);
		difmatch = difftime (endsuf, startsuf);

		/*get prob vector url for salami/wurst*/
        //printf("%.*s\n", 5, input->url + 58);
        vec = malloc(sizeof(char)*66);
        sprintf(vec, "/smallfiles/public/no_backup/bm/pdb_all_vec_6mer_struct/%5s.vec\0", input->url+56);
        bin = malloc(sizeof(char)*54);
        sprintf(bin, "/smallfiles/public/no_backup/bm/pdb_all_bin/%5s.bin\0", input->url+56);

		queryurl = initStringset(space);
		addString(space, queryurl, bin, strlen(bin));
		addString(space, queryurl, vec, strlen(vec));

		
        getimbissblast(space, input, sequences, noofseqs, alphabet, imbiss);
	

		imbiss->query = queryurl;
		imbiss->substrlen = substrlen;
		imbiss->alphabet = alphabet;
		
		/*if a substition file was given ...*/
		if (subfile) {
			imbiss->sub = createsubmatrix(scores, imbiss->score, nooffreqs-1);
		}
		
		/*match 'n' report*/
		time (&startsuf);
		
		imbiss->consensus = ALLOCMEMORY(space, NULL, Uint, (input->length-substrlen));
		memset(imbiss->consensus, 0, (sizeof(Uint)*(input->length-substrlen)));
	
		rankSufmatch(space, arr, matches, input->length-substrlen,
			maxmatches, substrlen, 
			sequences, noofseqs, filter, select, handler,
			input, imbiss, scores, depictsw);
		
		if (gnuplot) {	
			consensus (space, imbiss->consensus, input->length-substrlen, 
				input, substrlen, imbiss);
		}

		time (&endsuf);
		difrank = difftime (endsuf, startsuf);
	
		printf ("Building  the suffixtree has taken %f seconds.\n", difsuf);
		printf ("Match the suffixtree has taken %f seconds.\n", difmatch);
    		printf ("Rank  the suffixtree has taken %f seconds.\n", difrank);
	
		/*partial cleanup*/
		//destructStringset(space, queryurl);
		destructSequence(space, input);
		if(subfile) {
			FREEMEMORY(space, imbiss->sub);
		}

		FREEMEMORY(space, imbiss->consensus);
		FREEMEMORY(space, imbiss->score);
		FREEMEMORY(space, matches);
        free(bin);
        free(vec);
	}
	
	/*final cleanup*/
	for (i=0; i < noofseqs; i++) {
		destructSequence(space, sequences[i]);
	}
    FREEMEMORY(space, sequences);	
	destructSufArr(space, arr);
	
#ifdef MEMMAN_H
	activeblocks(space);
#endif
	
	printf("Goodbye.\n");	
	return EXIT_SUCCESS;
}
示例#5
0
文件: kmeans.c 项目: blkqi/kmeans
int main (int argc, char *argv[])
{
    if (argc < 3) {
        fprintf (stderr, "usage: kmeans <data> <centroids>\n");
        return 1;
    }

    //  read data from file
    int d_n=0, d_m=0;
    double data [MAX_N][MAX_M] = { 0 };
    readcsv (argv[1], data, &d_n, &d_m);
 
    //  read centroids from file
    int c_n=0, c_m=0;
    double centroids [MAX_N][MAX_M] = { 0 };
    readcsv (argv[2], centroids, &c_n, &c_m);
 
    if (d_n < c_n) {
        fprintf (stderr, "error: More clusters than data\n");
        return -1;
    }
    if (d_m != c_m) {
        fprintf (stderr, "error: Data and centroid dimensionalities differ\n");
        return -1;
    }

    int clusters [d_n]; // assigned cluster of datum
    int cluster_pop [c_n]; // population of cluster
    double data_sum [c_n][c_m]; // data column sum of cluster
    double cent_last [c_n][c_m]; // centroids (last iteration)

    int i, j, k;
    double dist, dist_min, c;

    //
    //  Iterate, converge absolutely

    do  //
    {   //  For each observation, calculate the distance from
        //  each centroid and assign it to the nearest one

        memset (cluster_pop, 0, c_n*sizeof(int));

        for (i = 0; i < d_n; ++i) {
            dist_min = INFINITY;
            // assume distance minimum is +inf
            for (j = 0; j < c_n; ++j) {
                dist = 0.0;
                //  calculate distance to centroid j
                for (k = 0; k < d_m; ++k) {
                    c = data[i][k] - centroids[j][k];
                    dist += c * c;
                }
                //  minimize distance
                if (dist < dist_min) {
                    dist_min = dist;
                    clusters[i] = j;
                }
            }
            //  add element its nearest cluster
            ++cluster_pop[clusters[i]];
        }

        //
        //  Calculate new centroids

        memset (data_sum, 0, c_n*c_m*sizeof(double));

        //  sum data columns per cluster
        for (i = 0; i < d_n; ++i)
            for (k = 0; k < d_m; ++k)
                data_sum[clusters[i]][k] += data[i][k];

        //  centroid coordinate is mean of corresponding column
        for (j = 0; j < c_n; ++j) {
            for (k = 0; k < d_m; ++k) {
                cent_last[j][k] = centroids[j][k];
                centroids[j][k] = data_sum[j][k] / cluster_pop[j];
            }
        }

    //  displacement-->0 (n-->inf)
    } while (norm(*centroids, c_n, c_m) - norm(*cent_last, c_n, c_m) > TOL);

    //
    //  Printout final centroid coordinates

    printf ("data (%ix%i)\n", d_n, d_m);
    for (i = 0; i < d_n; ++i) {
        printf ("%5i  { ", i);
        for (j = 0; j < d_m; ++j)
			printf ("%7.3f ", data[i][j]);
        printf ("}  --> %2i\n", clusters[i]);
    }
    printf ("centroids (%ix%i)\n", c_n, c_m);
    for (j = 0; j < c_n; ++j) {
        printf ("%5i  { ", j);
        for (k = 0; k < c_m; ++k)
            printf ("%7.3f ", centroids[j][k]);
        printf ("}\n");
    }

    return 0;
}