Пример #1
0
main(int argc, char **argv)
{
	int	i, j, k, l, m, n, centro[200], len_chroseq[100];
	int	maxi;
	int	num_seq;
	char	**chrname, **repnames;
	int	num_chro;
	int	*counts, maxc;
	SEGMENT	*segment;
	int	num_segment;
	int	*repeats, num_repeats;
	char	name[1000], temp[1000], c;
	double	id;
	FILE	*fp;

	readpar();
	initenv(argc, argv);

/*	Input chromsomal information	*/
	chrname = alloc_name(100, 100);
	fp = ckopen(lenfile, "r");
	num_chro = read_chro_centro(chrname, len_chroseq, centro, fp);
	fclose(fp);

	nchro = findgenname(chriname, chrname, num_chro);

/*	input segments	*/

	segment = (SEGMENT *) ckalloc(60000 * sizeof(SEGMENT));
	fp = stdin;
	num_segment = input_segment(segment, chrname, num_chro, fp);

/*	sort the segments	*/
/*
	qsort((void *) segment, num_segment, sizeof(SEGMENT), (void *) segcompar);
*/

/*	Define repeats from sub-repeats	*/

	repeats = (int *) ckalloc(num_segment * sizeof(int));
	num_repeats = 0;
	counts = (int *) ckalloc(10000 * num_segment * sizeof(int));
	n = m = 0;
	j = 0;

	for(i = 0; i < num_segment; i ++)	{
		if(i == num_segment - 1 || segment[i + 1].pos[0] > segment[i].pos[1] + min_length ||
		   segment[i + 1].chro != segment[i].chro)	{
			repeats[num_repeats ++] = i;
			if(segment[i].chro == nchro)	j ++;
		}
		if(segment[i].pos[1] - segment[i].pos[0] < min_length)	continue;
		counts[segment[i].eq_pos[0]] ++;
		if(segment[i].eq_pos[0] > n)	n = segment[i].eq_pos[0];
	}
	m += n;
	k = maxc = 0;
	for(i = 0; i < m; i ++)	{
		if(counts[i] > 1)	k ++;
		if(counts[i] > maxc)	{
			maxi = i;
			maxc = counts[i];
		}
	}
printf("m %d k %d\n", m, k);
	printf("%d repeats (%s %d) %d subrepeats %d max_multip subrepeats index %d.\n", num_repeats,
		chriname, j, k, maxc, maxi);

	free((void *) counts);
	chrname = free_name(chrname, 100);
	free((void *) repeats);
	free((void *) segment);
}
Пример #2
0
Файл: xygrid.c Проект: krzul/dia
/*--------------------------------------------------------*/
int main(int argc, char *argv[])
{	
        char    *parfname, *inpfname, *outfname;
        int     i, nobj, nterm, ik, ik0, delta, niter;
        float   *x, *nx, *nzx, *zx,
                *y, *ny, *nzy, *zy,
                dx, dy, rr, thresh, sx, sy;
        double  *coeffx, *coeffy;
        FILE    *outf;
        PARAMS  par;

/* IO stuff */

  if (argc != 4)
  {
    printf("\n\tUSAGE: xygrid  parameter_file  input_list  coeff_file\n\n");
    exit(1);
  }

  parfname = argv[1];
  inpfname = argv[2];
  outfname = argv[3];

  readpar(parfname, &par);

  nterm = (par.ndeg+1)*(par.ndeg+2)/2;

  nobj=readcoor(inpfname, &x, &y, &zx, &zy);

  if (!(coeffx=(double *)calloc(nterm, sizeof(double))))
    errmess("calloc(coeffx)");
  if (!(coeffy=(double *)calloc(nterm, sizeof(double))))
    errmess("calloc(coeffy)");

/* make initial fit */
  fit(x, y, zx, par.ndeg, nobj, coeffx);
  fit(x, y, zy, par.ndeg, nobj, coeffy);

  sx = sigma(x, y, zx, par.ndeg, nobj, coeffx);
  sy = sigma(x, y, zy, par.ndeg, nobj, coeffy);

  thresh = par.sigmaf*par.sigmaf*(sx*sx + sy*sy);

  ik = nobj;
  delta = 1;
  niter = 0;

  if (!(nx=(float *)calloc(nobj, sizeof(float))))
    errmess("readcoor: calloc(nx)");
  if (!(ny=(float *)calloc(nobj, sizeof(float))))
    errmess("readcoor: calloc(ny)");
  if (!(nzx=(float *)calloc(nobj, sizeof(float))))
    errmess("readcoor: calloc(nzx)");
  if (!(nzy=(float *)calloc(nobj, sizeof(float))))
    errmess("readcoor: calloc(nzy)");

/* sigma clipping of the fit until MAX_NITER reached or nothing changes */

  while ((delta > 0) && (niter < par.maxniter))
  {
    ik0 = ik;
    ik  = 0;

    for (i=0; i<nobj; i++)
    {
      dx  = poly(x[i], y[i], par.ndeg, coeffx) - zx[i];
      dy  = poly(x[i], y[i], par.ndeg, coeffy) - zy[i];

      rr  = dx*dx + dy*dy;

      nx[ik]  = x[i];
      ny[ik]  = y[i];
      nzx[ik] = zx[i];
      nzy[ik] = zy[i];

      if (rr < thresh) ++ik;
    }

    delta = ik0 - ik;

    fit(nx, ny, nzx, par.ndeg, ik, coeffx);
    fit(nx, ny, nzy, par.ndeg, ik, coeffy);

    sx = sigma(nx, ny, nzx, par.ndeg, ik, coeffx);
    sy = sigma(nx, ny, nzy, par.ndeg, ik, coeffy);

    niter++;
  }

  free(x);
  free(y);
  free(zx);
  free(zy);
  free(nx);
  free(ny);
  free(nzx);
  free(nzy);

/* print results and store coefficients in binary file */

  if (par.verbose)
  {
    printf("\n");
    for (i=0; i<nterm; i++)
    {
      printf("coeffx[%d] = %9.6f   ", i, coeffx[i]);
      printf("coeffy[%d] = %9.6f \n", i, coeffy[i]);
    }
  }

  printf("%s:  sigmax= %.4f   sigmay= %.4f   ndata= %d   nleft= %d\n",
          outfname, sx, sy, nobj, ik);

  if (!(outf=fopen(outfname, "w"))) errmess(outfname);

  fwrite(&nterm, sizeof(int), 1, outf);
  fwrite(coeffx, sizeof(double), nterm, outf);
  fwrite(coeffy, sizeof(double), nterm, outf);

  fclose(outf);

  free(coeffx);
  free(coeffy);

  return(0);
}
Пример #3
0
main(int argc, char **argv)
{
	int	i, j, k, l, m, n;
	char	**src_seq, **src_name;
	int	*len_seq, num_seq;
	char	temp[100];
	ALIGN	**align, *aln, *aln0;
	FILE	*fp;

	readpar();
	random1(&idum);
	initenv(argc, argv);

/*	Input the length of the reads (required) */

	len_seq = (int *) ckalloc(2 * sizeof(int));
	src_seq = (char **) ckalloc(2 * sizeof(char *));
	src_name = (char **) ckalloc(1 * sizeof(char *));
	src_name[0] = (char *) ckalloc(100 * sizeof(char));

	fp = ckopen(seqfile, "r");
	num_seq = readseq1by1(src_seq, src_name, len_seq, fp);
	fclose(fp);
	printf("Genome length: %d\n", len_seq[0]);

/*	Make reverse complements of input sequences rev(i) --> i + num_seq	*/

	len_seq[1] = len_seq[0];
	src_seq[1] = (char *) ckalloc(len_seq[0] * sizeof(char));
	for(j = 0; j < len_seq[0]; j ++)	{
		src_seq[1][j] = rev(src_seq[0][len_seq[0] - j - 1]);
	}

/*      read in pairwise alignments by Reputer	*/

	align = (ALIGN **) ckalloc(2 * sizeof(ALIGN *));
	fp = ckopen(inpfile, "r");
	n = readph(align, src_seq, len_seq, fp, min_leg, min_id);
	fclose(fp);
	printf("# alignments input: %d.\n", n);

/*	Write alignments	*/

	fp = ckopen(outfile, "w");
	for(m = 0; m < 2; m ++)	{
		n = size_align(align[m]);
		fwrite(&n, sizeof(int), 1, fp);
		aln = align[m];
		while(aln)	{
			fwrite(&(aln -> reads[1]), sizeof(int), 1, fp);
			fwrite(&(aln -> mis_match), sizeof(int), 1, fp);
			fwrite(&(aln -> length), sizeof(int), 1, fp);
			fwrite(aln -> pos[0], sizeof(int), aln -> length, fp);
			fwrite(aln -> pos[1], sizeof(int), aln -> length, fp);
			aln0 = aln -> next;
			free((void *) aln -> pos[0]);
			free((void *) aln -> pos[1]);
			free((void *) aln);
			aln = aln0;
		}
	}
	fclose(fp);
	printf("Done...\n");

	free((void **) align);
	for(i = 0; i < 2 * num_seq; i ++)	{
		free((void *) src_seq[i]);
	}
	for(i = 0; i < num_seq; i ++)	{
		free((void *) src_name[i]);
	}
	free((void **) src_seq);
	free((void **) src_name);
	free((void *) len_seq);
}
Пример #4
0
main(int argc, char **argv)
{
	int	i, j, k, l, m, n;
	int	dist[20];
	int	reads;
	int	num_vertex, num_class, num_edge;
	int	*len_seq, num_seq, num_remain;
	int	**num_pa;
	char	**src_seq, **src_name;
	char	temp[100];
	ALIGN	**eq_class, *align;
	EDGE	**edge, *edge1, *edge2, *bal_edge1, *bal_edge2;
	PATH	*path;
	int	num_path;
	NODES	**vertex, *begin, *node, *node_next, **start_node;
	LIST	**list;
	READINTERVAL	*readinterval;
	POSITION	*position;
	FILE	*fp, *fp1;

	readpar();
	random1(&idum);
	initenv(argc, argv);
	printf("%d %d %d\n", sizeof(POSITION), sizeof(NODES), sizeof(LIST));

/*	Input the length of the genome (required) */

	len_seq = (int *) ckalloc(2 * MAX_NUM * sizeof(int));
	src_name = alloc_name(MAX_NUM, 100);
	fp = ckopen(lenfile, "r");
	num_seq = readlen(fp, len_seq, src_name);
	fclose(fp);

	src_seq = (char **) ckalloc(2 * num_seq * sizeof(char *));
	l = 0;
	printf("Genome length: ");
	for(i = 0; i < num_seq; i ++)	{
		l += len_seq[i];
		printf("%d ", len_seq[i]);
	}
	printf("\n");
	printf("Total length: %d\n", l);

/*	Make reverse complements of input sequences rev(i) --> i + num_seq	*/

	for(i = 0; i < num_seq; i ++)	{
		len_seq[i + num_seq] = len_seq[i];
		src_seq[i] = (char *) ckalloc(len_seq[i] * sizeof(char));
		src_seq[i + num_seq] = (char *) ckalloc(len_seq[i] * sizeof(char));
		for(j = 0; j < len_seq[i]; j ++)	{
			src_seq[num_seq + i][j] = rev(src_seq[i][len_seq[i] - j - 1]);
		}
	}

/*	Input equivalent readintervales between reads --
	see the format of the equivalent readinterval files	*/

	printf("Read equivalent readintervales...\n");
	eq_class = (ALIGN **) ckalloc(2 * num_seq * sizeof(ALIGN *));
	fp = ckopen(inpfile, "r");
	num_class = readclass(eq_class, num_seq, fp);
	fclose(fp);
	printf("# equivalent readintervales input: %d\n", num_class);

/*
	for(i = 0; i < 2 * num_seq; i ++)	{
		align = eq_class[i];
		while(align)	{
			printf("See: \n");
			output_align(align, src_name, src_seq, len_seq, num_seq);
			getchar();
			align = align -> next;
		}
	}
*/

/*	Initialize the nodes: each position in each read is assigned
	as a new node. An array of "list" is set up for each read	*/

	list = (LIST **) ckalloc(2 * num_seq * sizeof(LIST *));
	for(i = 0; i < 2 * num_seq; i ++)	{
		list[i] = (LIST *) ckalloc(len_seq[i] * sizeof(LIST));
	}
	printf("intitialize nodes...\n");
	initialize(list, len_seq, num_seq);
	printf("done.\n");
	n = countnode(list, len_seq, 2 * num_seq);
	printf("# of nodes before merge: %d\n", n);

/*	Glue together two nodes if their corresponding positions are defined
	as equivalent in a pairwise alignment		*/

	printf("Merge...\n");
	merge(num_seq, len_seq, eq_class, num_class, list);
	printf("done.\n");
	for(i = 0; i < num_seq; i ++)	{
		while(eq_class[i])	{
			eq_class[i] = free_align(eq_class[i]);
		}
	}
	free((void **) eq_class);

/*      Compute the width of each node  */

        for(i = 0; i < 2 * num_seq; i ++)       {
                for(j = 0; j < len_seq[i]; j ++)        {
                        if(!list[i][j].node -> visit)   {
                                list[i][j].node -> num_path = countthickness(list[i][j].node);
                                list[i][j].node -> visit = 1;
                        }
                }
        }
	cleannode(list, len_seq, 2 * num_seq);
	n = countnode(list, len_seq, 2 * num_seq);
	printf("# of nodes after merge: %d\n", n);

/*	Add edges to the graph		*/
	edge = (EDGE **) ckalloc(n * sizeof(EDGE *));
	num_edge = graph(num_seq, len_seq, list, edge);
	printf("# edges: %d\n", num_edge);
	start_node = (NODES **) ckalloc(num_seq * sizeof(NODES *));
	for(i = 0; i < num_seq; i ++)	{
		if(len_seq[i] > 0)	{
			start_node[i] = list[i][0].node;
		} else	{
			start_node[i] = (NODES *) NULL;
		}
	}
	for(i = 0; i < 2 * num_seq; i ++)	{
		free((void *) list[i]);
	}
	free((void **) list);

	vertex = (NODES **) ckalloc(2 * num_edge * sizeof(NODES *));
	num_vertex = count_vertex(edge, num_edge, vertex);
	free((void **) edge);

	num_pa = (int **) ckalloc(MAX_BRA * sizeof(int *));
	for(i = 0; i < MAX_BRA; i ++)	{
		num_pa[i] = (int *) ckalloc(MAX_BRA * sizeof(int));
	}
	num_edge = count_edge_simp(vertex, num_vertex, num_pa);
	printf("%d vertices %d edges (%d source %d sinks) remained.\n", num_vertex, num_edge,
		num_pa[0][1], num_pa[1][0]);

/*	Assign the complementary edges of each edge	*/
	for(i = 0; i < num_vertex; i ++)	{
		for(j = 0; j < vertex[i] -> num_nextedge; j ++)	{
			edge1 = vertex[i] -> nextedge[j];
			edge1 -> bal_edge = find_bal_edge(edge1, len_seq, num_seq, i);
		}
	}

/*	Remove bulges in the graph	*/
	printf("Shave...\n");
	num_vertex = shave_graph(vertex, num_vertex);
	printf("done.\n");

/*      Remove cycles shorter than some threshold in the graph  */
/*
        printf("Shaving graph...\n");
        num_vertex = rem_cycle(vertex, num_vertex);
        printf("done.\n%d vertices remained.\n", num_vertex);
*/

/*	remove short edges	*/
/*
	printf("Remove shortedges...\n");
	num_vertex = rem_short_edge(vertex, num_vertex, len_seq);
	printf("done.\n%d vertices remained.\n", num_vertex);
	fflush(stdout);
*/

	num_edge = count_edge_simp(vertex, num_vertex, num_pa);
	printf("%d vertices %d edges (%d source %d sinks) remained.\n", num_vertex, num_edge,
		num_pa[0][1], num_pa[1][0]);
	fflush(stdout);

/*	Allocate the spaces for paths	*/
	printf("Allocating paths...\n");
	for(i = 0; i < num_vertex; i ++)	{
		vertex[i] -> num_path = 0;
	}

/*	Build sequence paths	*/
	printf("Define paths...\n");
	m = 0;
	for(i = 0; i < num_vertex; i ++)	{
		for(j = 0; j < vertex[i] -> num_nextedge; j ++)	{
			m += vertex[i] -> nextedge[j] -> multip;
		}
	}
	path = (PATH *) ckalloc(2 * num_seq * sizeof(PATH));
	for(i = 0; i < 2 * num_seq; i ++)	{
		path[i].edge = (EDGE **) ckalloc(m * sizeof(EDGE *));
	}
	num_path = readpath(start_node, path, num_seq);
	free((void **) start_node);
	num_edge = count_edge_simp(vertex, num_vertex, num_pa);
	m = l = 0;
	for(i = 0; i < num_vertex; i ++)	{
		for(j = 0; j < vertex[i] -> num_nextedge; j ++)	{
			l += vertex[i] -> nextedge[j] -> length;
			if(vertex[i] -> nextedge[j] -> length > m)	{
				m = vertex[i] -> nextedge[j] -> length;
			}
		}
	}
	printf("%d vertics %d edges (%d source %d sinks) remained: total length %d (maximal %d).\n", num_vertex, num_edge,
	 	num_pa[0][1], num_pa[1][0], l, m);
	fflush(stdout);

/*	Make consensus of edges	*/
	initial_edge(vertex, num_vertex, src_seq, num_seq);
	printf("edge initialed\n");

/*	Output sequence path	*/

	n = 0;
	for(i = 0; i < num_vertex; i ++)	{
		vertex[i] -> visit = i;
		for(j = 0; j < vertex[i] -> num_nextedge; j ++)	{
			vertex[i] -> nextedge[j] -> start_cover = n;
			n ++;
		}
	}
	for(m = 0; m < num_seq; m ++)	{
		printf("len_path %d\n", path[m].len_path);
		printf("Sequence%d: ", m + 1);
		for(i = 0; i < path[m].len_path; i ++)	{
			printf("%d -- %d(%d,%d) --> ", path[m].edge[i] -> begin -> visit,
				path[m].edge[i] -> start_cover, path[m].edge[i] -> multip,
				path[m].edge[i] -> length);
			if(i % 5 == 4)	{
				printf("\n");
			}
		}
		if(path[m].len_path > 0)	{
			printf("%d\n", path[m].edge[i - 1] -> end -> visit);
		} else	{
			printf("\n");
		}
		fflush(stdout);
	}

/*	Output graph & contigs	*/
	sprintf(temp, "%s.edge", seqfile);
	fp = ckopen(temp, "w");
	sprintf(temp, "%s.graph", seqfile);
	fp1 = ckopen(temp, "w");
	write_graph(vertex, num_vertex, fp, fp1);
	fclose(fp);
	fclose(fp1);

/*	Output read intervals in each edge	*/
	sprintf(temp, "%s.intv", seqfile);
	fp = ckopen(temp, "w");
	write_interval(vertex, num_vertex, fp);
	fclose(fp);

/*	Output graphviz format graph	*/

	sprintf(temp, "%s", outfile);
	fp = ckopen(temp, "w");
	output_graph(vertex, num_vertex, fp);
	fclose(fp);

	for(i = 0; i < MAX_BRA; i ++)	{
		free((void *) num_pa[i]);
	}
	free((void **) num_pa);
	for(i = 0; i < 2 * num_seq; i ++)	{
		if(path[i].len_path > 0)	{
			free((void **) path[i].edge);
		}
	}
	free((void *) path);
	free_graph(vertex, num_vertex);
	for(i = 0; i < 2 * num_seq; i ++)	{
		free((void *) src_seq[i]);
	}
	free((void **) src_seq);
	free_name(src_name, MAX_NUM);
	free((void *) len_seq);
}
Пример #5
0
/*==========================================
 * main
 *========================================== */
int main(int argc, char* argv[])
{
  int c, iter, ITER=0, seed=0;
  enum dataType data = LdaC;
  enum dataType testdata = LdaC;
  int dots = 0;

  enum GibbsType fix_hold = GibbsNone;
  char *stem;
  char *resstem;
  int topwords = 20;
  int noerrorlog = 0;
  int displayed = 0;
  int load_vocab = 0;
  int checkpoint = 0;
  int restart = 0;
  int dopmi = 0;
  int restart_hca = 0;
  int load_phi = 0;
  int load_mu = 0;
  int procs = 1;
  int maxW = 0;
  enum ScoreType score=ST_idf;
  
  double BM0val=0, BM1val =0, BP0val=0, BP1val=0;
  
  clock_t t1=0, t2=0, t3=0;
  double tot_time = 0;
  double psample_time = 0;
  enum ParType par;
  /*
   *  default values
   */
  ddN.T = 10;
  ITER = 100;
  ddN.TEST = 0;

  pctl_init();

  while ( (c=getopt(argc, argv,"b:c:C:d:ef:F:g:G:h:K:l:L:N:o:pq:vr:s:S:t:T:vVW:"))>=0 ) {
    switch ( c ) {
    case 'b':
      if ( !optarg || sscanf(optarg,"%d",&ddP.back)!=1 )
        yap_quit("Need a valid 'b' argument\n");
      break;
    case 'c':
      if ( !optarg || sscanf(optarg,"%d",&checkpoint)!=1 )
        yap_quit("Need a valid 'c' argument\n");
      break;
    case 'C':
      if ( !optarg || sscanf(optarg,"%d",&ITER)!=1 )
	yap_quit("Need a valid 'C' argument\n");
      break;
    case 'd':
      if ( !optarg || sscanf(optarg,"%d",&dots)!=1 )
	yap_quit("Need a valid 'd' argument\n");
      break;
    case 'e':
      noerrorlog++;
      break;
    case 'f':
      if ( strcmp(optarg,"witdit")==0 ) 
	data = WitDit;
      else if ( strcmp(optarg,"docword")==0 ) 
	data = Docword;
      else if ( strcmp(optarg,"ldac")==0 ) 
	data = LdaC;
      else if ( strcmp(optarg,"bag")==0 ) 
	data = TxtBag;
      else if ( strcmp(optarg,"lst")==0 ) 
	data = SeqTxtBag;
       else
	yap_quit("Illegal data type for -f\n");
      break;
    case 'F':
      if ( strcmp(optarg,"all")==0 ) {
	for (par=ParAM; par<=ParBB; par++) 
	  ddT[par].fix = 1;
      } else {
	par = findpar(optarg);
	if ( par==ParNone )
	  yap_quit("Illegal arg for -F\n");
	ddT[par].fix = 1;
      }
      break;
    case 'g':
	{
	  char var[100];
	  int st=0;
	  if ( !optarg || sscanf(optarg,"%[^, ],%d", &var[0], &st)<1  )
            yap_quit("Need a valid 'g' argument\n");
          par = findpar(var);
          if ( par==ParBP1 )
            ddP.kbatch = st;
          else
            yap_quit("Illegal var for -g\n");
        }
        break;      
    case 'G':
      {
	char var[100];
	int st=0, cy=0;
	if ( !optarg || sscanf(optarg,"%[^, ],%d,%d",
			       &var[0], &cy, &st)<2 || st<0 || cy<0 )
	  yap_quit("Need a valid 'G' argument\n");
	par = findpar(var);
	if ( par==ParNone || par==ParB0P || par==ParB0M )
	  yap_quit("Illegal var for -G\n");
        ddT[par].fix = 0;
	ddT[par].start = st;
	ddT[par].cycles = cy;
      }
      break;
    case 'h':
      {
	fix_hold = GibbsHold;
	if ( !optarg  )
	  yap_quit("Need a valid 'h' argument\n");
        if ( strncmp(optarg,"dict,",5)==0 ) {
          if ( sscanf(&optarg[5],"%d",&ddP.hold_dict)<1 || ddP.hold_dict<2 )
            yap_quit("Need a valid 'hdict' argument\n");
        } else if ( strncmp(optarg,"fract,",6)==0 ) {
          if ( sscanf(&optarg[6],"%lf",&ddP.hold_fraction)<1 
               || ddP.hold_fraction<=0 || ddP.hold_fraction>=1 )
            yap_quit("Need a valid 'hfract' argument\n");
        } else if ( strncmp(optarg,"doc,",4)==0 ) {
          if ( sscanf(&optarg[4],"%d",&ddP.hold_every)<1 || ddP.hold_every<2 )
            yap_quit("Need a valid 'hdoc' argument\n");
        } else
          yap_quit("Need a valid 'h' argument\n");
      }
      break;
   case 'K':
      if ( !optarg || sscanf(optarg,"%d",&ddN.T)!=1 )
	yap_quit("Need a valid 'K' argument\n");
      break;
    case 'l':
      if ( !optarg )
	yap_quit("Need a valid 'l ' argument\n");
      if ( strncmp(optarg,"phi,",4)==0 ) {
	if ( sscanf(&optarg[4],"%d,%d",&ddP.phiiter, &ddP.phiburn)<2 )
	  yap_quit("Need a valid 'l word,' argument\n");      
      } else if ( strncmp(optarg,"theta,",6)==0 ) {
	if ( sscanf(&optarg[6],"%d,%d",&ddP.thetaiter, &ddP.thetaburn)<2 )
	  yap_quit("Need a valid 'l word,' argument\n");      
      } else if ( strncmp(optarg,"mu,",3)==0 ) {
	if ( sscanf(&optarg[3],"%d,%d",&ddP.muiter, &ddP.muburn)<2 )
	  yap_quit("Need a valid 'l word,' argument\n");      
      } else if ( strncmp(optarg,"prog,",5)==0 ) {
	if ( sscanf(&optarg[5],"%d,%d",&ddP.progiter, &ddP.progburn)<2 )
	  yap_quit("Need a valid 'l prog,' argument\n");
      } else
	yap_quit("Need a valid DIAG code in 'l' argument\n");
      break;
    case 'L':
      if ( !optarg )
	yap_quit("Need a valid 'L ' argument\n");
      if ( strncmp(optarg,"like,",5)==0 ) {
	if ( sscanf(&optarg[5],"%d,%d",&ddP.mltiter, &ddP.mltburn)<1 )
	  yap_quit("Need a valid 'L like' argument\n");
      } else
	yap_quit("Need a valid DIAG code in 'L' argument\n");
      break;
    case 'N':
      if ( !optarg || sscanf(optarg,"%d,%d", &ddP.maxN, &ddP.maxM)<1 )
	yap_quit("Need a valid 'N' argument\n");
      break;
    case 'o':
      {
	char *ptr = strchr(optarg, ',');
	int len = strlen(optarg);
	if ( ptr ) 
	  len = ptr - optarg;
        if ( strncmp(optarg,"idf",len)==0 )
          score = ST_idf;
        else if ( strncmp(optarg,"count",len)==0 )
          score = ST_count;
        else if ( strncmp(optarg,"Q",len)==0 )
          score = ST_Q;
        else if ( strncmp(optarg,"cost",len)==0 )
          score = ST_cost;
        else
          yap_quit("Need a valid parameter for 'o' argument\n");
	if ( ptr ) {
	  /*  there was a second arg */
	  if ( sscanf(ptr+1, "%d", &topwords) != 1)
	    yap_quit("Need a valid second 'o' argument\n");
	}
      break;
      }
      break;
   case 'p':
      dopmi++;
      break;
   case 'q':
      if(!optarg || sscanf(optarg, "%d", &procs) != 1)
	yap_quit("Need a valid 'q' argument\n");
      break;
    case 'r':
      if(!optarg )
	yap_quit("Need a valid 'r' argument\n");
      if ( strcmp(optarg,"tca")==0 )
	restart++;
      else if ( strcmp(optarg,"hca")==0 )
	restart_hca++;
      else if ( strcmp(optarg,"phi")==0 )
	load_phi++;
      else if ( strcmp(optarg,"mu")==0 )
	load_mu++;
      else
	yap_quit("Need a valid 'r' argument\n");
      break;
    case 's':
      if ( !optarg || sscanf(optarg,"%d",&seed)!=1 )
	yap_quit("Need a valid 's' argument\n");
      break;
    case 'S':
      {
	char var[100];
	double vin=0;
	if ( !optarg || sscanf(optarg,"%[^=, ]=%lf",
			       &var[0], &vin)<2  )
	  yap_quit("Need a valid 'S' argument\n");
	par = findpar(var);
	if ( par==ParNone )
	  yap_quit("Illegal var for -S\n");
	else if ( par==ParBM0 ) 
	  BM0val = vin;
	else if ( par==ParBM1 ) 
	  BM1val = vin;
	else if ( par==ParBP0 ) 
	  BP0val = vin;
	else if ( par==ParBP1 ) 
	  BP1val = vin;
	else
	  *(ddT[par].ptr) = vin;
      }   
      break;
    case 't':
      if ( !optarg || sscanf(optarg,"%d",&ddP.training)!=1 )
	yap_quit("Need a valid 't' argument\n");
      break;
    case 'T':
      if ( !optarg )
	yap_quit("Need a valid 'T' argument\n");
      {
	char *tname = data_name(optarg,data);
	FILE *fp = fopen(tname,"r");
	if ( fp==NULL ) {
          free(tname);
	  tname = data_name(optarg,testdata);
	  fp = fopen(tname,"r");
        } else {
	  testdata = data;
        }
        free(tname);
	if ( fp!=NULL ) {
	  /*  its a valid test filename */
          ddP.teststem = optarg;
	  fclose(fp);
	} else if ( sscanf(optarg,"%d",&ddN.TEST)!=1 )
	  yap_quit("Need a valid 'T' argument\n");
      }
      break;
    case 'v':
      verbose++;
      break;
    case 'V':
      load_vocab = 1;
      break;
    case 'W':
      if ( !optarg || sscanf(optarg,"%d",&maxW)<1 )
	yap_quit("Need a valid 'W' argument\n");
      break;
    default:
      yap_quit("Unknown option '%c'\n", c);
    }
  }

  if (argc-optind != 2) {
    usage();
    exit(-1);
  }
  if ( optind>=argc ) {
    yap_quit("No arguments given\n");
  }
  stem = strdup(argv[optind++]);
  resstem = strdup(argv[optind++]);

  if ( dopmi )
    load_vocab = 1;
  if ( dopmi && verbose !=2 ) {
    /*  
     *   due to the use of the ".top" file
     *   its really multi-purpose 
     */
    yap_quit("When computing PMI verbose must be exactly 2\n");
  }

  if ( noerrorlog==0 ) {
    char *wname = yap_makename(resstem, ".log");
    yap_file(wname);
    free(wname);
  }
  
  yap_commandline(argc, argv);
#ifdef H_THREADS
  yap_message(" Threads,");
#endif

  if ( restart || restart_hca ) {
    char *fname = yap_makename(resstem,".par");
    FILE *fp = fopen(fname,"r");
    char *buf;
    if ( !fp ) 
      yap_quit("Parameter file '%s' doesn't exist\n", fname);
    fclose(fp);
    free(fname);
    buf = readpar(resstem,"T",50);
    if ( !buf ) 
      yap_quit("Parameter file '%s' has no T\n", fname);
    ddN.T = atoi(buf);
    free(buf);
    if ( restart ) {
      buf = readpar(resstem,"E",50);
      if ( !buf ) 
	yap_quit("Parameter file '%s' has no E\n", fname);
      ddN.E = atoi(buf);
      free(buf);
      pctl_read(resstem);
    }
    if ( maxW==0 ) {
      buf = readpar(resstem,"W",50);
      if ( buf ) {
	maxW = atoi(buf);
	free(buf);
      }
    }
    if ( ddP.training==0 ) {
      buf = readpar(resstem,"TRAIN",50);
      if ( buf ) {
	ddP.training = atoi(buf);
	free(buf);
      } 
    }
    if ( ddN.TEST==0 ) {
      buf = readpar(resstem,"TEST",50);
      if ( buf ) {
	ddN.TEST = atoi(buf);
	free(buf);
      }
    }
  } 

  assert(ddN.T>0);
  assert(ddN.TEST>=0);
  assert(restart || restart_hca || ITER>0);
	
  if ( load_phi && ddP.phiiter>0 )
    yap_quit("Options '-l phi,...' and '-r phi' incompatible\n");
  if ( load_mu && ddP.muiter>0 )
    yap_quit("Options '-l mu,...' and '-r mu' incompatible\n");

  /*
   *   set random number generator
   */
  if ( seed ) {
    rng_seed(rngp,seed);
  } else {
    rng_time(rngp,&seed);
  }
  yap_message("Setting seed = %lu\n", seed);
  
  /*
   *  read data and get dimensions
   */
  {
    D_bag_t *dbp = data_read(stem, data);
    int training = pctl_training(dbp->D);
    if ( ddP.teststem ) {
      D_bag_t *dbpt = data_read(ddP.teststem, testdata);
      /* need to load a separate test set, strip to bare training */
      data_shrink(dbp, training);
      ddN.TEST = dbpt->D;
      data_append(dbp, dbpt);
      free(dbpt->w);  free(dbpt->d); free(dbpt);
    }
    if ( maxW>0 ) {
      if ( dbp->W <= maxW ) 
        dbp->W = maxW;
      if ( dbp->W > maxW )
        data_vocabshrink(dbp, maxW);
    }
    /*
     *  transfer into system
     */
    ddN.D = dbp->D;
    ddN.W = dbp->W;    
    ddN.N = dbp->N;
    ddN.NT = dbp->N;
    ddN.DT = training;
    ddD.w = dbp->w;
    ddD.d = dbp->d;
    free(dbp);
    if ( ddN.DT<ddN.D ) {
      /*  recompute NT */
      int i;
      for (i=0; i<ddN.N; i++)
        if ( ddD.d[i]>=ddN.DT )
          break;
      ddN.NT = i;
    }
  }

  data_read_epoch(stem);

  /*
   *  at this point, dimensions are fixed, so load phi and mu if needed
   */
  if ( load_phi )
    pctl_loadphi(resstem);
  if ( load_mu )
    pctl_loadmu(resstem);

  /*
   *   correct parameters after command line
   */
  pctl_fix(ITER);
  if ( BM0val>0 ) {
    ddP.b_mu[0] = BM0val;
  }
  if ( BM1val>0 ) {
    int i;
    for (i=1; i<ddN.E; i++)
      ddP.b_mu[i] = BM1val;
  }
  if ( BP0val>0 ) {
    int i;
    for (i=0; i<ddN.T; i++)
      ddP.b_phi[0][i] = BP0val;
  }
  if ( BP1val>0 ) {
    int i;
    if ( ddN.E==1 )
      yap_quit("b_phi[1] invalid when epochs==1\n");
    for (i=0; i<ddN.T; i++)
      ddP.b_phi[1][i] = BP1val;
  }
  pctl_samplereport();

  /*
   *   all data structures
   */
  data_alloc();
  if ( ddP.phiiter>0 )
    phi_init(resstem);
  else 
    ddS.phi = NULL;
  if ( ddP.muiter>0 )
    mu_init(resstem);
  else 
    ddS.mu = NULL;
  if ( ddP.thetaiter>0 )
    theta_init(resstem);
  else 
    ddS.theta = NULL;
  tca_alloc();
  if ( PCTL_BURSTY() ) 
    dmi_init(&ddM, ddS.z, ddD.w, ddD.N_dTcum,
             ddN.T, ddN.N, ddN.W, ddN.D, ddN.DT,
	     (fix_hold==GibbsHold)?pctl_hold:NULL);
  if ( load_vocab ) {
    data_vocab(stem);
  }

  cache_init();
  
  /*
   *  yap some details
   */
  data_report(ITER, seed);
  pctl_report();
 
  /*
   *  load/init topic assignments and prepare statistics
   */
  if ( restart || restart_hca) {
    tca_read_z(resstem, 0, ddN.DT);
    tca_rand_z(ddN.T, ddN.DT, ddN.D);
  } else {
    tca_rand_z(ddN.T, 0, ddN.D);
  }
  tca_reset_stats(resstem, restart, 0);

  if ( (restart || restart_hca ) && ITER ) 
      yap_message("Initial log_2(perp)=%lf\n", -M_LOG2E * likelihood()/ddN.NT);

  if ( ITER )
      yap_report("cycles: ");
  
  for (iter=0; iter<ITER; iter++) {
    int  pro;
    double thislp = 0;
    int   thisNd = 0;
    int doc;
#ifdef H_THREADS
    pthread_t thread[procs];
#endif
    D_pargs_p parg[procs];
    
#ifdef MU_CACHE
    mu_side_fact_reinit();
#endif
#ifdef PHI_CACHE
    phi_cache_reinit();
#endif

    t1 = clock();
    
    /*
     *  sampling
     */
#ifdef IND_STATS
    ddP.doc_ind_stats = u32tri(ddN.T,ddN.E,ddN.E);
    ddP.word_ind_stats = u32tri(ddN.T,ddN.E,ddN.E);
#endif

   /*  a bit complex if no threads!  */
    doc = 0;
    for (pro = 0 ; pro < procs ; pro++){
      parg[pro].dots=dots;
      parg[pro].procs=procs;
      parg[pro].doc = &doc;
#ifndef H_THREADS
      sampling_p(&parg[pro]);
#else
      if ( procs==1 ) 
	sampling_p(&parg[pro]);
      else if( pthread_create(&thread[pro],NULL,sampling_p,(void*) &parg[pro]) != 0){
        yap_message("thread failed %d\n",pro+1 );
      }
#endif
    }
#ifdef H_THREADS
    if ( procs>1 ) {
       //waiting for threads to finish
       for (pro = 0; pro < procs; pro++){
         pthread_join(thread[pro], NULL);
       }
    }
#endif

    // getting lp, Nd and clock
    for(pro = 0; pro < procs; pro++){
      thislp +=  parg[pro].thislp;
      thisNd +=  parg[pro].thisNd;
      tot_time += parg[pro].tot_time;
    }
#ifdef H_THREADS
    if ( procs>1 )
      tca_reset_stats(NULL,1,1);
#endif
    /*
     *  full check
     */
#ifndef NDEBUG
    {
      int e, d;
      check_cp_et();
      for (e=0; e<ddN.E; e++)
        check_m_vte(e);
      for (d=0; d<ddN.DT; d++)
        check_n_dt(d);
    }
#endif

#ifdef IND_STATS
    {
      char *fname = yap_makename(resstem,".istats");
      FILE *ifp = fopen(fname,"a");
      int e1, e2, kk;
      fprintf(ifp,"Iteration %d\n", iter);
      for (kk=0; kk<ddN.T; kk++) {
	fprintf(ifp," Topic %d\n", kk);
	for (e1=0; e1<ddN.E; e1++) {
	  fprintf(ifp,"  Epoch %d\n     ", e1);
	  for (e2=0; e2<ddN.E; e2++)
	    fprintf(ifp," %u", (unsigned)ddP.doc_ind_stats[kk][e1][e2]);
	  fprintf(ifp,"\n     ");
	  for (e2=0; e2<ddN.E; e2++)
	    fprintf(ifp," %u", (unsigned)ddP.word_ind_stats[kk][e1][e2]);
	  fprintf(ifp,"\n");
	}
      }
      fclose(ifp);
      free(ddP.doc_ind_stats[0][0]); free(ddP.doc_ind_stats[0]); 
      free(ddP.doc_ind_stats); 
      free(ddP.word_ind_stats[0][0]); free(ddP.word_ind_stats[0]); 
      free(ddP.word_ind_stats);
      free(fname);
    }
#endif
    
    /*
     *   sample hyperparameters
     */
    t3 = clock();
    pctl_sample(iter, procs);
   
    /*
     *   do time calcs here to remove diagnostics+reporting
     */
    t2 = clock();
    tot_time += (double)(t2 - t1) / CLOCKS_PER_SEC;
    psample_time += (double)(t2 - t3) / CLOCKS_PER_SEC;
    /*
     *   progress reports
     */
    if ( ( iter>ddP.progburn && (iter%ddP.progiter)==0 ) || iter+1>=ITER ) {
      yap_message(" %d\nlog_2(perp)=%lf,%lf", iter, 
		  -M_LOG2E * likelihood()/ddN.NT, -M_LOG2E * thislp/thisNd);
      pctl_update(iter);
      if ( verbose && iter%10==0 )
	yap_probs();
      if ( iter>0 && verbose>1 ) {
	if ( ddN.tokens ) {
	  tca_displaytopics(resstem,topwords,score);
	  displayed++;
	}
      }
      if ( iter+1<ITER ) {
	// yap_message("\n");
	yap_report("cycles: ");
      }
    } else {
      yap_message(" %d", iter);
      if ( verbose>1)  yap_message("\n");
    }
  
    if ( checkpoint>0 && iter>0 && iter%checkpoint==0 ) {
      data_checkpoint(resstem, stem, iter+1);
      yap_message(" checkpointed\n");
      tca_report(resstem, stem, ITER, procs, fix_hold, 
		 (dopmi&&displayed>0)?1:0);
    }
    if ( ddP.phiiter>0 && iter>ddP.phiburn && (iter%ddP.phiiter)==0 )
      phi_update();
    if ( ddP.thetaiter>0 && iter>ddP.thetaburn && (iter%ddP.thetaiter)==0 )
      theta_update();
    if ( ddP.muiter>0 && iter>ddP.muburn && (iter%ddP.muiter)==0 )
      mu_update();
  } // over iter
  
  if ( ITER ) 
      yap_report("Finished after %d cycles on average of %lf+%lf(s) per cycle\n",
	     iter,  (tot_time-psample_time)/iter, psample_time/iter);
  
  if ( ( verbose==1 || ((iter+1)%5!=0 && verbose>1) ) ) {
    if ( ddN.tokens ) {
       tca_displaytopics(resstem,topwords,score);
       displayed++;
    }
  }

  yap_probs();

  if ( ITER>0 ) 
	data_checkpoint(resstem, stem, ITER);
 
  tca_report(resstem, stem, ITER, procs, fix_hold, (dopmi&&displayed>0)?1:0);

  if ( ddP.phiiter>0 )
      phi_save(resstem);
  if ( ddP.thetaiter>0 )
      theta_save(resstem);
  if ( ddP.muiter>0 )
      mu_save(resstem);

  /*
   *  free
   */
  phi_free();
  theta_free();
  mu_free();
  cache_free();
  pctl_free();
  data_free();
  dmi_free(&ddM);
  tca_free();
  free(stem);
  free(resstem);
  rng_free(rngp);

  return 0;
}