Esempio n. 1
0
void cband_solve(dcomplex **a, int n, int m1, int m2, dcomplex *b)
{
  static dcomplex **al;
  static unsigned long *indx;
  static int an = 0, am1 = 0; // Allocated sizes
  dcomplex d;
  
  if(an < n) {
    if(an != 0) {
      free_cmatrix(al);
      delete[] indx;
    }
    al = cmatrix(n, m1);
    indx = new unsigned long[n];
    an = n;
    am1 = m1;
  }
  
  if(am1 < m1) {
    if(am1 != 0)
      free_cmatrix(al);
    al =  cmatrix(an, m1);
    am1 = m1;
  }

  // LU decompose matrix
  cbandec(a, n, m1, m2, al, indx, &d);

  // Solve
  cbanbks(a, n, m1, m2, al, indx, b);
}
Esempio n. 2
0
void get_pair_type(long num_hbonds, char **hb_atom1, char **hb_atom2,
               long i, long j, char *bseq, char *type)

/* Indentify the type of pair interaction according to Leontis and 
   Westhof' nomenclature */

{
    char type_wd1[5], type_wd2[5], **atom;
    long nh1,nh2;
    
    atom=cmatrix(0,num_hbonds+40, 0,4);

    if(num_hbonds >= 1){
        
        get_unequility(num_hbonds, hb_atom1, &nh1, atom); 
        edge_type(nh1, atom,  i, bseq, type_wd1);

        get_unequility(num_hbonds, hb_atom2, &nh2, atom);
        edge_type(nh2, atom,  j, bseq, type_wd2);
        
        sprintf(type,"%s/%s",type_wd1, type_wd2);
            
    }else
        sprintf(type,"?/?");
 
}
/*--------------------------------------------------------------------------------*/
Data *read_gbt(const char *fname)
{
  Data *dat=(Data *)malloc(sizeof(Data));
  memset(dat,0,sizeof(Data));
  int nchan=4096;
  int npol=4;
  long ndat=get_file_size(fname);
  if (ndat<=0) {
    printf("FILE %s unavailable for reading.\n",fname);
    return NULL;
  }
  int nsamp=ndat/npol/nchan;
  printf("have %d samples.\n",nsamp);
  char **mat=cmatrix(nsamp,nchan);
  char *tmp=(char *)malloc(sizeof(char)*nchan*npol);
  FILE *infile=fopen(fname,"r");
  for (int i=0;i<nsamp;i++) {
    size_t nread=fread(tmp,sizeof(char),nchan*npol,infile);
    memcpy(mat[i],tmp,sizeof(char)*nchan);
  }
  fclose(infile);
  free(tmp);
  dat->raw_nchan=nchan;
  dat->ndata=nsamp;
  //dat->raw_data=mat;
  dat->raw_data=matrix(nchan,nsamp);
  for (int i=0;i<nchan;i++)
    for (int j=0;j<nsamp;j++)
      dat->raw_data[i][j]=mat[j][i];
  
  dat->raw_chans=(float *)malloc(sizeof(float)*dat->raw_nchan);
  dat->dt=1e-3;
  float dnu=(900-700.0)/dat->raw_nchan;
  for (int i=0;i<dat->raw_nchan;i++) {
    dat->raw_chans[i]=900-(0.5+i)*dnu;
  }

  free(mat[0]);
  free(mat);
  return dat;
}
Esempio n. 4
0
const Field3D Delp2(const Field3D &f, real zsmooth)
{
  Field3D result;
  real ***fd, ***rd;

#ifdef CHECK
  int msg_pos = msg_stack.push("Delp2( Field3D )");
#endif

  //return G1*DDX(f) + G3*DDZ(f) + g11*D2DX2(f) + g33*D2DZ2(f); //+ 2.0*g13*D2DXDZ(f)

  // NEW: SOLVE USING FFT

  static dcomplex **ft = (dcomplex**) NULL, **delft;
  int jx, jy, jz;
  real filter;
  dcomplex a, b, c;

  result.Allocate();

  fd = f.getData();
  rd = result.getData();

  if(ft == (dcomplex**) NULL) {
    // Allocate memory
    ft = cmatrix(ngx, ncz/2 + 1);
    delft = cmatrix(ngx, ncz/2 + 1);
  }
  
  // Loop over all y indices
  for(jy=0;jy<ngy;jy++) {

    // Take forward FFT
    
    for(jx=0;jx<ngx;jx++)
      ZFFT(fd[jx][jy], zShift[jx][jy], ft[jx]);

    // Loop over kz
    for(jz=0;jz<=ncz/2;jz++) {

      if ((zsmooth > 0.0) && (jz > (int) (zsmooth*((real) ncz)))) filter=0.0; else filter=1.0;

      // No smoothing in the x direction
      for(jx=2;jx<(ngx-2);jx++) {
	// Perform x derivative
	
	laplace_tridag_coefs(jx, jy, jz, a, b, c);

	delft[jx][jz] = a*ft[jx-1][jz] + b*ft[jx][jz] + c*ft[jx+1][jz];
	delft[jx][jz] *= filter;
	
	//Savitzky-Golay 2nd order, 2nd degree in x
        /*
	delft[jx][jz] = coef1*(  0.285714 * (ft[jx-2][jz] + ft[jx+2][jz])
				 - 0.142857 * (ft[jx-1][jz] + ft[jx+1][jz])
				 - 0.285714 * ft[jx][jz] );
	
	delft[jx][jz] -= SQ(kwave)*coef2*ft[jx][jz];
	*/
      }
    }
  
    // Reverse FFT
    for(jx=1;jx<(ngx-1);jx++) {

      ZFFT_rev(delft[jx], zShift[jx][jy], rd[jx][jy]);
      rd[jx][jy][ncz] = rd[jx][jy][0];
    }

    // Boundaries
    for(jz=0;jz<ncz;jz++) {
      rd[0][jy][jz] = 0.0;
      rd[ngx-1][jy][jz] = 0.0;
    }
  }

#ifdef CHECK
  msg_stack.pop(msg_pos);
#endif

  // Set the output location
  result.setLocation(f.getLocation());

  return result;
}
Esempio n. 5
0
void agfit5a(Sint *nusedx, Sint *nvarx, double *yy, 
	       double *covar2, double *offset2,
	       double *weights2, 
	       Sint   *strata,  Sint   *sort,
	       double *means, double *beta, double *u, 
	       double *loglik, 
	       Sint *methodx, Sint *ptype2, Sint *pdiag2,
	       Sint *nfrail,  Sint *frail2,
               void *fexpr1, void *fexpr2, void *rho) {

    int i,j,k, person;
    int     nused, nvar;
    int    nf, nvar2;
    int  deaths, itemp;
    int  istrat, indx2, p, ksave;  

    double  denom, zbeta, risk;
    double  temp;
    double  d2, efron_wt;
    double  method;
    double  meanwt, time;

    nused = *nusedx;
    nvar  = *nvarx;
    nf= *nfrail;
    method= *methodx;
    nvar2 = nvar + nf;
    ptype = *ptype2;
    pdiag = *pdiag2;

    /*
    **  Allocate storage for the arrays and vectors
    **  Since they will be used later, sizes are based on what will be
    **    needed with the frailty terms.
    */
    if (nvar >0) {
	covar= cmatrix(covar2, nused, nvar);
	cmat = cmatrix(0, nvar2, nvar+1);
	cmat2= cmatrix(0, nvar2, nvar+1);
        }

    a = Calloc(4*nvar2 + 5*nused , double);
    oldbeta = a + nvar2;
    a2 =  oldbeta + nvar2;
    weights = a2+ nvar2;
    offset  = weights + nused;
    score   = offset + nused;
    tmean   = score + nused;
    start   = tmean + nvar2;
    stop    = start + nused;
    
    event  = Calloc(3*nused, int);
    sort1   = event + nused;
    sort2   = sort1 + nused;

    for (i=0; i<nused; i++) {
	weights[i] = weights2[i];
	offset[i]  = offset2[i];
	event[i]  =  yy[nused + nused +i];
	sort1[i]  = sort[i];
	sort2[i]  = sort[nused+i];
	start[i]  = yy[i];
	stop[i]   = yy[nused+i];
        }

    /* scratch space for penalty 
    **    upen needs to be max(nvar, nfrail), 
    **    ipen max(nfrail, nvar(if pdiag=0) or nvar^2 )
    */
    if (nf > nvar) i=nf; else i=nvar;
    if (nf > nvar*nvar) j=nf; else j=nvar*nvar;
    if (pdiag==0)  upen = Calloc(2*i, double);
    else           upen = Calloc(i+j, double);
Esempio n. 6
0
void itegeppXXR(int *tog, double *lim, char **gent, double *qtrait,
int *xnp, double *likeres, char **freqres, char **hapres, char **desres)
{
  char      lino[10000], lin[10000];
  
  char* CharNull = "\0"; /* 06.11.2014/SKn */

  double    likold,
            pe, pex,                 /* 10.3. 2000 ROHDE */
	    *p2max,
            gsum;                    /* 10.3. 2000 ROHDE */
  int       i, inp, it, j, k, ki, kj, h, s, glev, non,
            ac[2],
	    drei,
	    null,
	    df = 0 /*SKn*/,
	    combinations,
	    nz,
	    iqual,
            nhap,
           *hlist,
	    **pimax,
            h1x, h2x;
  uint      iterations, h1, h2;
  bool      loop;

  // new for create design matrix (tog=0)
  double  pehh,
          *peh;
  

/* Max. 16 SNPs */
 if ( strlen(gent[0]) > 16 ) error ("Number of SNPs should smaller than 17.") ;

  np       = *xnp;
  len      =  (int) (strlen(gent[0]) + 1);

  mg       = ivector(np);
  merke    = ivector(np);
  nulmer   = ivector(np);
  ge       = ivector(np);
  hlist    = ivector(np);
  po       = uivector(len);

  geno     = cmatrix(np, len);
  max_prob = init_dvector(NULL, 0.0, np);
  prob	   = init_dvector(NULL, 0.0, np);


  hap	   = init_dvector (NULL,  0.0, Hapco);
  hc	   = init_ivector (NULL, -1,Hapco);

  po[0]=1;
  for(i=1;i<len;i++)po[i] = 2*po[i-1];
  combinations = po[len-1];


    init_dvector(hap,  0.0, Hapco);
    init_ivector (hc, -1,Hapco);

    ng = 0;

    /* read input data */
    for(inp=0;inp<np;inp++){
    drei = 0;
    null = 0;
    for (i=0; i<len-1; i++) {
    if(i < len-1 && (gent[inp][i] < 48 || gent[inp][i] > 51) ){
    Rprintf("%d %d %d\n",inp, i, gent[inp][i]);
    //Rprintf("\n Error in data person %d\n",inp+1); /* ROHDE 15.03.2000 */
    error("\n Error in data person %d\n",inp+1);;
    }
    if ( gent[inp][i] == '3' )  drei  ++;
    if ( gent[inp][i] == '0' )  null  ++;
    }
    gent[inp][len-1] = '\0';

    it = 1;
    for (i=0; i<ng; i++) {
    if ( strncmp (geno[i], gent[inp], len) == 0 ) {

    /*** a certain genotype was found more than just once ***/
    ge[inp] = i;
    mg[i] ++;
    it       = 0;
    merke[i] = drei;
    break;
    }
    }
    if (it) {
    /*** a certain genotype was encountered the first time ***/
    strcpy (geno[ng], gent[inp]);
    ge[inp] = ng;
    mg[ng]    = 1;
    merke[ng] = drei;
    nulmer[ng] = null;
    ng ++;
    }
    }   /* end while */
    People 	= np;
    Loci 	= len-1;
    /* end of reading sample data */

    nall = 2 * np;
    nstate  = init_ivector (NULL, 0, ng);
    mstate  = init_ivector (NULL, 0, ng);

    state = (uint***) calloc(ng , sizeof(uint**));
    for (i=0; i<ng; i++) {
      nz       = po[merke[i]] * po[nulmer[i]] * po[nulmer[i]];
      state[i] = uimatrix(nz, 2);
    }
    /*** sort genotypes by weights *******************************************/
    genoProb = dvector(ng);
    genoId   = ivector(ng);
    for (i=0; i<ng; i++) {
      genoId[i]   = i;
      genoProb[i] = ((double)mg[i])/((double) po[merke[i]])/pow(4.0,nulmer[i]);
    }
    sortByProb(genoProb, genoId, ng);

    glev=0;
    for(i=0;i<ng;i++)if(genoProb[i] >= SignificanceLevel)glev++;

    /*** process sorted genotypes ********************************************/

    nh = 0;

    for (i=0; i<glev; i++) {
/*    printf("\n ng: %d glev: %d  i: %d",ng,glev+1,i+1);    */
      rechap(genoId[i], 0, len-1);
/*    printf("\n %s >> %d\n",geno[genoId[i]],mg[genoId[i]]);
      for(k=0;k<16;k++)printf("%2d:%g ",hc[k],hap[k]);
      printf("\n");                                         */
    }

    for (i=glev; i<ng; i++) {
      s = 0;
/*    printf("\n ng: %d glev: %d  i: %d",ng,glev+1,i+1);    */
      for (j=0; j<nh; j++) {
        ac[0] = hc[j];
        for (k=j; k<nh; k++) {
          ac[1] = hc[k];
	  if ( compatible(geno[genoId[i]], ac) ) {
            state[genoId[i]][s][0] = j;
            state[genoId[i]][s][1] = k;
            s ++;
            if ( j != k ) {
              state[genoId[i]][s][0] = k;
              state[genoId[i]][s][1] = j;
              s ++;
	    }
	  }
        }
      }
      nstate[genoId[i]] = s;
    }
    for (i=glev; i<ng; i++) {
      addon(genoId[i]);
    }

/*  printf("\n");
    printf("\ngloop: %d ng: %d glev: %d nh: %d\n",gloop,ng,glev,nh);  */

    /*** now comes the output that does not need simulated annealing *********/

    first  = 1;           /*** start likelihood outside of annealing loops ***/

    df = nh;

    hapnew = init_dvector(NULL, 0.0, nh );
    haptmp = init_dvector(NULL, 0.0, nh );

    for (i=0; i<ng; i++)selprob(i);

    likold = likea();

    /* Continue computation of mean probabilities */

    for(i=0;i<ng;i++)
      for(j=0;j<mstate[i];j++) {
        double pp = 0.0;

        if ( nstate[i] > 1 ) {
          h          = state[i][j][0];
          hapnew[h] += (double)mg[i] / (double)mstate[i];
          h          = state[i][j][1];
          pp        += hapnew[h];
          hapnew[h] += ((double) mg[i]) / ((double) mstate[i]);
          pp        += hapnew[h];
        }
        else {
          h          = state[i][j][0];
          hapnew[h] += 2.0 * ((double) mg[i]) / ((double) mstate[i]);
          pp        += hapnew[h];
        }
      }


    non = 0;
    for (i=0; i<nh; i++) {
    if(hapnew[i]==0.0)non++;
    else hapnew[i] /= (double) nall;
    }

    for (i=0; i<nh; i++) {
    if(hapnew[i]==0.0)hapnew[i] = 0.0001/(double)non;
    else hapnew[i] *= 0.9999;
    }


	iterations = 0;
    first = 0;

    do {
      loop = 0;
      iterations ++;
/*    printf("gloop:%3d  count: %d\n",gloop,iterations);  */
      /* Recompute mean probabilities */
      for (i=0; i<nh; i++) {
        if ( fabs(hap[i] - hapnew[i]) > LoopPrecision )
          loop = 1;
        hap[i] = hapnew[i];
      }

      init_dvector(prob,   0.0, np);
      init_dvector(haptmp, 0.0, nh);
      init_dvector(hapnew, 0.0, nh);
      likold = likea();
      for (i=0; i<nh; i++)
        hapnew[i] /= (double) nall;

    } while (loop);

/*  Rprintf("\n");
    Rprintf("  Results Ensemble means: \n\n"); */
    nhap = 0;
    j    = 0;
    for (i=0; i<nh; i++)
      {
      if ( hapnew[i] >= *lim ) {
    /* 07.06.2007  S.Kn|ppel > Beschrdnken der geschdtzten Haplotypen. */
    if ( (*tog==0) &&  ((nhap+1) > 1500) ) {
     error ("Error in itegeppXXR: Too much estimated haplotypes. Increase option lim.") ;
    }
    if ( (*tog==1) &&  ((nhap+1) > 1500) ) {
     error ("Error in itegeppXXR: Too much estimated haplotypes. Increase option lim.") ;
    }


       
/*    	sprintf(lino,"\0"); 02.06.2015/SKn */  
    /*    sprintf("%s", "%s", *lino, *CharNull);*/
       sprintf(lino, "%s", CharNull);


   printHaplotype(hc[i], len, lino);
	/*
        printf("    hapnew[%8d] = %7.4f  (%7.4f)\n",
	    hc[i], hapnew[i], hap[i]);
        */
  /* sprintf(lin,"%9.6f\0", hapnew[i]); 06.11.2014/SKn */
	sprintf(lin,"%9.6f%s", hapnew[i], CharNull); /* 06.11.2014/SKn */
	strcat(lino,lin);
	strcpy(freqres[j],lino);
	j++;
        hlist[nhap++] = i;



      }
      }
      k = 0;
      htpp = init_uimatrix(NULL,0,nhap+1,nhap+1);
      for(i=0;i<nhap+1;i++)
	      for(j=i;j<nhap+1;j++)htpp[i][j]=k++;

      pgen = init_dmatrix(NULL,0.0,ng,(nhap+1)*(nhap+2)/2);

      /* start find best states after MLE   10.3.2000 ROHDE  */

   	  pimax  = imatrix(ng,10);               /* ROHDE  10.3.2000 */
          p2max = init_dvector(NULL,0.0,ng);     /* ROHDE  10.3.2000 */
	  for(i=0;i<ng;i++)max_prob[i] = 0.0;    /* ROHDE  10.3.2000 */
      for (i=0;i<ng;i++){
      for(j=0;j<10;j++)pimax[genoId[i]][j] = -1;
          iqual=1;
      for (j=0;j<nstate[genoId[i]];j++){
      pe = hapnew[state[genoId[i]][j][0]] * hapnew[state[genoId[i]][j][1]];
	  if( state[genoId[i]][j][0] != state[genoId[i]][j][1] ) pe += pe;

	  if(pe > p2max[genoId[i]]){

      if (pe > max_prob[genoId[i]]){
		p2max[genoId[i]] = max_prob[genoId[i]];
      	max_prob[genoId[i]] = pe;
      	pimax[genoId[i]][0]=j;
		for(k=1;k<10;k++)pimax[genoId[i]][k]=-1;
		iqual = 1;                 /***   ROHDE  04.09.2001 ***/
      	}

	  else{
	  if (pe == max_prob[genoId[i]] && iqual < 9){
	  	for(k=0;k<iqual;k++) if(state[genoId[i]][j][0] ==
						state[genoId[i]][pimax[genoId[i]][k]][1]) pe=0.0;
	  	if(pe > 0.0)pimax[genoId[i]][iqual++]=j;
	  	}
	    else p2max[genoId[i]] = pe;
		}
	  }

      }
      }    /* end of maximum state search */

/*    Rprintf("\n Haplotypes after MLE\n");           */
      jjx = 0;
      for(i=0;i<np;i++){
        /* sprintf(lino,"%i %s >> \0",i, geno[ge[i]]); 06.11.2014/SKn */
           sprintf(lino,"%i %s >> %s",i, geno[ge[i]], CharNull);
        for(k=0;k<10;k++){
          j = pimax[ge[i]][k];
          if(j > -1){
            if(k>0)pspace(len+3,lino);   /*** ROHDE  11.09.2001 ***/
            printHaplotype(hc[state[ge[i]][j][0]],len,lino);
            strcat(lino," <> \0");
            printHaplotype(hc[state[ge[i]][j][1]],len,lino);
            sprintf(lin,"  P>> %9.7f D>> %9.7f",
      	                 max_prob[ge[i]],max_prob[ge[i]]-p2max[ge[i]]);
            strcat(lino,lin);
          } else break;
        }
        strcpy(hapres[jjx++],lino);
      }

      /* endfind best states after MLE   10.3.2000 ROHDE  */


/*    Rprintf("\n\n       Likelihood = %f\n", likold);
      Rprintf("\n");                                      */
      /* sprintf(lino,"Likelihood = %f\0", likold); 06.11.2014/SKn */
         sprintf(lino,"Likelihood = %f%s", likold, CharNull);
     // strcpy(likeres[0],lino);
      (*likeres) = likold ;
      
/*  Sample over states for each genotype ***********************************/

      for(i=0;i<ng;i++){
      gsum = 0.0;
      for(j=0;j<nstate[genoId[i]];j++){
      h1 = state[genoId[i]][j][0];
      h2 = state[genoId[i]][j][1];
      h1x = h2x = 0;
      for(ki=1;ki<=nhap;ki++) if( h1 == hlist[ki-1] ) h1x=ki;
      for(kj=1;kj<=nhap;kj++) if( h2 == hlist[kj-1] ) h2x=kj;
      if(h1x>0 && h2x>0){
	      if(h2x < h1x){ k=h1x; h1x=h2x; h2x=k;}
	      pgen[genoId[i]][htpp[h1x-1][h2x-1]] += hapnew[h1]*hapnew[h2];
	                                     gsum += hapnew[h1]*hapnew[h2];
	}
	else{ pgen[genoId[i]][htpp[nhap][nhap]] += hapnew[h1]*hapnew[h2];
		                           gsum += hapnew[h1]*hapnew[h2];
	}
      }
      for(k=0;k<(nhap+1)*(nhap+2)/2;k++)pgen[genoId[i]][k] /= gsum;
      }

/*    for(i=0;i<ng;i++){
      Rprintf("i:%2d  %s\t",i,geno[genoId[i]]);
      for(ki=0;ki<nhap+1;ki++){
	for(kj=ki;kj<nhap+1;kj++)
               Rprintf("%4.2f ",pgen[genoId[i]][htpp[ki][kj]]);
	if(kj<ki)printf("0.000\t ");
	else	printf("%4.2f\t",pgen[genoId[i]][htpp[ki][kj]]);
      Rprintf("\t");
      }
      Rprintf("\n");
      }
*/

      jjx = 0;

      if (*tog == 1){
      for(i=0;i<np;i++){
      /*
      printf("\n%4s %s %4.2f >> ",pid[i],geno[ge[i]],qtrait[i]);
      */
      strcpy(lino,"\0");
      for(ki=0;ki<nhap;ki++){  /*  each haplotype alone   */
        for(kj=ki;kj<nhap;kj++){
          /* sprintf(lin,"%8.6f \0",pgen[ge[i]][htpp[ki][kj]]); 06.11.2014/SKn */
	           sprintf(lin,"%8.6f %s",pgen[ge[i]][htpp[ki][kj]], CharNull);
          strcat(lino,lin);
	      }

      }
      
      /* sprintf(lin,"%8.6f\0",pgen[ge[i]][htpp[nhap][nhap]]); 06.11.2014/SKn */
         sprintf(lin,"%8.6f%s",pgen[ge[i]][htpp[nhap][nhap]], CharNull);
      strcat(lino,lin);
      strcpy(desres[jjx],lino);
      jjx++;
      }
      }
      
 /* gedndert nach Klaus; Bildung Designmatrix
    16.09.2008 */
 
 /*     if(*tog == 0){
      for(i=0;i<np;i++){
      //
      //printf("\n%4s %s %4.2f >> ",id[i],geno[ge[i]],qtrait[i]);
      //
      strcpy(lino,"\0");
          pex = 0.0;
      for(j=0;j<nhap;j++){
          pe = 0.0;
      for(ki=0;ki<nhap;ki++){    // over all haplotype pairs  
      for(kj=ki;kj<nhap;kj++){
      if(ki==j && kj==j && pgen[ge[i]][htpp[ki][kj]] > 0.0)
	      pe +=2.0*pgen[ge[i]][htpp[ki][kj]];
      else if ((ki==j || kj==j) && pgen[ge[i]][htpp[ki][kj]] > 0.0)
                                    pe += pgen[ge[i]][htpp[ki][kj]];
      }
      }
      pex += pe;
      sprintf(lin,"%8.6f \0",pe);
      strcat(lino,lin);
      }
      sprintf(lin,"%8.6f\0",2.0-pex);
      strcat(lino,lin);
      strcpy(desres[jjx],lino);
      jjx++;
      }
      }
*/
/* new: nach Klaus; 17.09.2008 */
        if(*tog == 0){
        
      peh = init_dvector(NULL, 0.0, nhap+1);
      for(i=0;i<np;i++){
      /*
      printf("\n%4s %s %4.2f >> ",id[i],geno[ge[i]],qtrait[i]);
      */
      strcpy(lino,"\0");
      for(j=0;j<nhap;j++){
      gsum  = 0.0;
      /*
      for(ki=0;ki<nhap;ki++){    * over all haplotype pairs  *
      for(kj=ki;kj<nhap;kj++){
      if(ki==j && kj==j && pgen[ge[i]][htpp[ki][kj]] > 0.0)
	      pe +=2.0*pgen[ge[i]][htpp[ki][kj]];
      else if ((ki==j || kj==j) && pgen[ge[i]][htpp[ki][kj]] > 0.0)
                                    pe += pgen[ge[i]][htpp[ki][kj]];
      }
      }
      */
      for(ki=0;ki<nstate[ge[i]];ki++){
      h1 = state[ge[i]][ki][0];
      h2 = state[ge[i]][ki][1];
      h  = hlist[j];
      pex = hapnew[h1]*hapnew[h2];
      gsum += 2*pex;
      if((h == h1) && (h == h2))peh[j] += 2*pex;
      else if((h == h1) || (h == h2))peh[j] += pex;
      }  /* end nstate */
      }  /* end nhap   */
      pehh = 0.0;
      for(j=0;j<nhap;j++){
      pehh += 2*peh[j];
      /* sprintf(lin,"%8.6f \0",2*peh[j]/gsum); 06.11.2014/SKn */
      sprintf(lin,"%8.6f %s",2*peh[j]/gsum, CharNull);
      strcat(lino,lin);
      }  /* end print */
      /* sprintf(lin,"%8.6f\0",2.0-pehh/gsum); 06.11.2014/SKn */
         sprintf(lin,"%8.6f%s",2.0-pehh/gsum, CharNull);
      
      strcat(lino,lin);
      strcpy(desres[jjx],lino);
      jjx++;
      init_dvector(peh, 0.0, nhap+1);
      }  /*  end np  */
      destroy_d_array(peh);
      }

    
    destroy_c_array2(geno);
    destroy_u_array(po);
    for ( i=0;i<ng;i++) { destroy_u_array2(state[i]) ; }
    free((uint***)state);
    destroy_u_array2(htpp);
    destroy_i_array(nstate);
    destroy_i_array(mstate);
    destroy_i_array(genoId);
    destroy_i_array(mg);
    destroy_i_array(merke);
    destroy_i_array(nulmer);
    destroy_i_array(ge);
    destroy_i_array(hlist);
    destroy_d_array(prob);
    destroy_d_array(max_prob);
    destroy_d_array(hapnew);
    destroy_d_array(haptmp);
    destroy_d_array(hap);
    destroy_d_array(genoProb);
    destroy_d_array2(pgen);
    destroy_i_array(hc);
    destroy_d_array(p2max);
    destroy_i_array2(pimax);
}
Esempio n. 7
0
/* =============================== */
void Read_Index(char **argv,int args,int nRead,int nSeq)
/* =============================== */
{
     int i,j,k,n_reads=nSeq+nRead;
     FILE *namef;
     fasta *seqp;
     void ArraySort_String(int n,char Pair_Name[][Max_N_NameBase],int *brr);
     char **cmatrix(long nrl,long nrh,long ncl,long nch);
     char DBname[n_reads][Max_N_NameBase];
     char tempct[60],tempc1[60],tempc2[60],tempc3[60],tempc4[60],tempc5[60];
     int temp1,temp2,temp3;
     int i_contig,i_reads,num_rd_find,stopflag;
     int *readIndex;
     int mapindex=0;

     if((readIndex= (int *)calloc(n_reads,sizeof(int))) == NULL)
     {
       printf("Error Contig_Merge: calloc - readIndex\n");
       exit(1);
     } 
     rdnames=cmatrix(0,nRead+1,0,Max_N_NameBase);
     i_contig=0;
     if((namef = fopen(argv[args],"r")) == NULL)
     {
       printf("ERROR Memory_Allocate:: reads group file \n");
       exit(1);
     }

     i_reads=0;
     printf("before read: %d %d\n",release_flag,nRead);
     if(release_flag==0)
     {
       while(fscanf(namef,"%s %s %s %d %s %s %d %d",tempc1,tempc2,tempc3,&temp1,tempc4,tempc5,&temp2,&temp3)!=EOF)
       {
         strcpy(rdnames[i_reads],tempc1);
         i_reads++;
       }
     }
     else if(release_flag==1)
     {
       while(fscanf(namef,"%s %s %s %s %d %s %s %d %d",tempct,tempc1,tempc2,tempc3,&temp1,tempc4,tempc5,&temp2,&temp3)!=EOF)
       {
         strcpy(rdnames[i_reads],tempc1);
         i_reads++;
       }
     }
     else if(release_flag==2)
     {
       while(fscanf(namef,"%s",rdnames[i_reads])!=EOF)
       {
//     printf("after read: %d %s\n",i_reads,rdnames[i_reads]);
         i_reads++;
       }
     }
     fclose(namef);

     for(j=0;j<nSeq;j++)
     {
        seqp=expp[rd_head[j]]+rd_index[j];
        strcpy(DBname[j],seqp->name);
        readIndex[j]=j;
     }
     for(j=0;j<nRead;j++)
     {
        strcpy(DBname[j+nSeq],rdnames[j]);
        readIndex[j+nSeq]=j+nSeq;
     }
     n_reads=nSeq+nRead;
     printf("before sort: %d %d\n",nSeq,nRead);
     ArraySort_String(n_reads,DBname,readIndex);

     num_rd_find=0;
     mapindex=0;
     for(i=0;i<n_reads-1;i++)
     {
        if(readIndex[i]>=nSeq)
          mapindex = readIndex[i];
/*      search reads with an index < i     */
/*      search reads with an index > i     */
        stopflag=0;
        j=i+1;
        while((j<n_reads)&&(stopflag==0))
        {
          if(strcmp(DBname[j],DBname[i])==0)
          {
            if(readIndex[j]>=nSeq)
              mapindex = readIndex[j];
            j++;
          }
          else
            stopflag=1;
        }
        if((j-i)>1)
        {
          for(k=i;k<j;k++)
          {
             if(readIndex[k]<nSeq)
             {
               ctg2wgs_index[mapindex-nSeq]=readIndex[k];
               num_rd_find++;
               k=j;
             }
          }
        }
        i=j-1;
     }
       printf("reads found: %d %d\n",nRead,num_rd_find);
}
Esempio n. 8
0
/*************************************************************************
Weighted  fitting  by  Chebyshev  polynomial  in  barycentric  form,  with
constraints on function values or first derivatives.

Small regularizing term is used when solving constrained tasks (to improve
stability).

Task is linear, so linear least squares solver is used. Complexity of this
computational scheme is O(N*M^2), mostly dominated by least squares solver

SEE ALSO:
    PolynomialFit()

INPUT PARAMETERS:
    X   -   points, array[0..N-1].
    Y   -   function values, array[0..N-1].
    W   -   weights, array[0..N-1]
            Each summand in square  sum  of  approximation deviations from
            given  values  is  multiplied  by  the square of corresponding
            weight. Fill it by 1's if you don't  want  to  solve  weighted
            task.
    N   -   number of points, N>0.
    XC  -   points where polynomial values/derivatives are constrained,
            array[0..K-1].
    YC  -   values of constraints, array[0..K-1]
    DC  -   array[0..K-1], types of constraints:
            * DC[i]=0   means that P(XC[i])=YC[i]
            * DC[i]=1   means that P'(XC[i])=YC[i]
            SEE BELOW FOR IMPORTANT INFORMATION ON CONSTRAINTS
    K   -   number of constraints, 0<=K<M.
            K=0 means no constraints (XC/YC/DC are not used in such cases)
    M   -   number of basis functions (= polynomial_degree + 1), M>=1

OUTPUT PARAMETERS:
    Info-   same format as in LSFitLinearW() subroutine:
            * Info>0    task is solved
            * Info<=0   an error occured:
                        -4 means inconvergence of internal SVD
                        -3 means inconsistent constraints
                        -1 means another errors in parameters passed
                           (N<=0, for example)
    P   -   interpolant in barycentric form.
    Rep -   report, same format as in LSFitLinearW() subroutine.
            Following fields are set:
            * RMSError      rms error on the (X,Y).
            * AvgError      average error on the (X,Y).
            * AvgRelError   average relative error on the non-zero Y
            * MaxError      maximum error
                            NON-WEIGHTED ERRORS ARE CALCULATED

IMPORTANT:
    this subroitine doesn't calculate task's condition number for K<>0.

SETTING CONSTRAINTS - DANGERS AND OPPORTUNITIES:

Setting constraints can lead  to undesired  results,  like ill-conditioned
behavior, or inconsistency being detected. From the other side,  it allows
us to improve quality of the fit. Here we summarize  our  experience  with
constrained regression splines:
* even simple constraints can be inconsistent, see  Wikipedia  article  on
  this subject: http://en.wikipedia.org/wiki/Birkhoff_interpolation
* the  greater  is  M (given  fixed  constraints),  the  more chances that
  constraints will be consistent
* in the general case, consistency of constraints is NOT GUARANTEED.
* in the one special cases, however, we can  guarantee  consistency.  This
  case  is:  M>1  and constraints on the function values (NOT DERIVATIVES)

Our final recommendation is to use constraints  WHEN  AND  ONLY  when  you
can't solve your task without them. Anything beyond  special  cases  given
above is not guaranteed and may result in inconsistency.

  -- ALGLIB PROJECT --
     Copyright 10.12.2009 by Bochkanov Sergey
*************************************************************************/
void polynomialfitwc(ap::real_1d_array x,
     ap::real_1d_array y,
     const ap::real_1d_array& w,
     int n,
     ap::real_1d_array xc,
     ap::real_1d_array yc,
     const ap::integer_1d_array& dc,
     int k,
     int m,
     int& info,
     barycentricinterpolant& p,
     polynomialfitreport& rep)
{
    double xa;
    double xb;
    double sa;
    double sb;
    ap::real_1d_array xoriginal;
    ap::real_1d_array yoriginal;
    ap::real_1d_array y2;
    ap::real_1d_array w2;
    ap::real_1d_array tmp;
    ap::real_1d_array tmp2;
    ap::real_1d_array tmpdiff;
    ap::real_1d_array bx;
    ap::real_1d_array by;
    ap::real_1d_array bw;
    ap::real_2d_array fmatrix;
    ap::real_2d_array cmatrix;
    int i;
    int j;
    double mx;
    double decay;
    double u;
    double v;
    double s;
    int relcnt;
    lsfitreport lrep;

    if( m<1||n<1||k<0||k>=m )
    {
        info = -1;
        return;
    }
    for(i = 0; i <= k-1; i++)
    {
        info = 0;
        if( dc(i)<0 )
        {
            info = -1;
        }
        if( dc(i)>1 )
        {
            info = -1;
        }
        if( info<0 )
        {
            return;
        }
    }
    
    //
    // weight decay for correct handling of task which becomes
    // degenerate after constraints are applied
    //
    decay = 10000*ap::machineepsilon;
    
    //
    // Scale X, Y, XC, YC
    //
    lsfitscalexy(x, y, n, xc, yc, dc, k, xa, xb, sa, sb, xoriginal, yoriginal);
    
    //
    // allocate space, initialize/fill:
    // * FMatrix-   values of basis functions at X[]
    // * CMatrix-   values (derivatives) of basis functions at XC[]
    // * fill constraints matrix
    // * fill first N rows of design matrix with values
    // * fill next M rows of design matrix with regularizing term
    // * append M zeros to Y
    // * append M elements, mean(abs(W)) each, to W
    //
    y2.setlength(n+m);
    w2.setlength(n+m);
    tmp.setlength(m);
    tmpdiff.setlength(m);
    fmatrix.setlength(n+m, m);
    if( k>0 )
    {
        cmatrix.setlength(k, m+1);
    }
    
    //
    // Fill design matrix, Y2, W2:
    // * first N rows with basis functions for original points
    // * next M rows with decay terms
    //
    for(i = 0; i <= n-1; i++)
    {
        
        //
        // prepare Ith row
        // use Tmp for calculations to avoid multidimensional arrays overhead
        //
        for(j = 0; j <= m-1; j++)
        {
            if( j==0 )
            {
                tmp(j) = 1;
            }
            else
            {
                if( j==1 )
                {
                    tmp(j) = x(i);
                }
                else
                {
                    tmp(j) = 2*x(i)*tmp(j-1)-tmp(j-2);
                }
            }
        }
        ap::vmove(&fmatrix(i, 0), &tmp(0), ap::vlen(0,m-1));
    }
    for(i = 0; i <= m-1; i++)
    {
        for(j = 0; j <= m-1; j++)
        {
            if( i==j )
            {
                fmatrix(n+i,j) = decay;
            }
            else
            {
                fmatrix(n+i,j) = 0;
            }
        }
    }
    ap::vmove(&y2(0), &y(0), ap::vlen(0,n-1));
    ap::vmove(&w2(0), &w(0), ap::vlen(0,n-1));
    mx = 0;
    for(i = 0; i <= n-1; i++)
    {
        mx = mx+fabs(w(i));
    }
    mx = mx/n;
    for(i = 0; i <= m-1; i++)
    {
        y2(n+i) = 0;
        w2(n+i) = mx;
    }
    
    //
    // fill constraints matrix
    //
    for(i = 0; i <= k-1; i++)
    {
        
        //
        // prepare Ith row
        // use Tmp for basis function values,
        // TmpDiff for basos function derivatives
        //
        for(j = 0; j <= m-1; j++)
        {
            if( j==0 )
            {
                tmp(j) = 1;
                tmpdiff(j) = 0;
            }
            else
            {
                if( j==1 )
                {
                    tmp(j) = xc(i);
                    tmpdiff(j) = 1;
                }
                else
                {
                    tmp(j) = 2*xc(i)*tmp(j-1)-tmp(j-2);
                    tmpdiff(j) = 2*(tmp(j-1)+xc(i)*tmpdiff(j-1))-tmpdiff(j-2);
                }
            }
        }
        if( dc(i)==0 )
        {
            ap::vmove(&cmatrix(i, 0), &tmp(0), ap::vlen(0,m-1));
        }
        if( dc(i)==1 )
        {
            ap::vmove(&cmatrix(i, 0), &tmpdiff(0), ap::vlen(0,m-1));
        }
        cmatrix(i,m) = yc(i);
    }
    
    //
    // Solve constrained task
    //
    if( k>0 )
    {
        
        //
        // solve using regularization
        //
        lsfitlinearwc(y2, w2, fmatrix, cmatrix, n+m, m, k, info, tmp, lrep);
    }
    else
    {
        
        //
        // no constraints, no regularization needed
        //
        lsfitlinearwc(y, w, fmatrix, cmatrix, n, m, 0, info, tmp, lrep);
    }
    if( info<0 )
    {
        return;
    }
    
    //
    // Generate barycentric model and scale it
    // * BX, BY store barycentric model nodes
    // * FMatrix is reused (remember - it is at least MxM, what we need)
    //
    // Model intialization is done in O(M^2). In principle, it can be
    // done in O(M*log(M)), but before it we solved task with O(N*M^2)
    // complexity, so it is only a small amount of total time spent.
    //
    bx.setlength(m);
    by.setlength(m);
    bw.setlength(m);
    tmp2.setlength(m);
    s = 1;
    for(i = 0; i <= m-1; i++)
    {
        if( m!=1 )
        {
            u = cos(ap::pi()*i/(m-1));
        }
        else
        {
            u = 0;
        }
        v = 0;
        for(j = 0; j <= m-1; j++)
        {
            if( j==0 )
            {
                tmp2(j) = 1;
            }
            else
            {
                if( j==1 )
                {
                    tmp2(j) = u;
                }
                else
                {
                    tmp2(j) = 2*u*tmp2(j-1)-tmp2(j-2);
                }
            }
            v = v+tmp(j)*tmp2(j);
        }
        bx(i) = u;
        by(i) = v;
        bw(i) = s;
        if( i==0||i==m-1 )
        {
            bw(i) = 0.5*bw(i);
        }
        s = -s;
    }
    barycentricbuildxyw(bx, by, bw, m, p);
    barycentriclintransx(p, 2/(xb-xa), -(xa+xb)/(xb-xa));
    barycentriclintransy(p, sb-sa, sa);
    
    //
    // Scale absolute errors obtained from LSFitLinearW.
    // Relative error should be calculated separately
    // (because of shifting/scaling of the task)
    //
    rep.taskrcond = lrep.taskrcond;
    rep.rmserror = lrep.rmserror*(sb-sa);
    rep.avgerror = lrep.avgerror*(sb-sa);
    rep.maxerror = lrep.maxerror*(sb-sa);
    rep.avgrelerror = 0;
    relcnt = 0;
    for(i = 0; i <= n-1; i++)
    {
        if( ap::fp_neq(yoriginal(i),0) )
        {
            rep.avgrelerror = rep.avgrelerror+fabs(barycentriccalc(p, xoriginal(i))-yoriginal(i))/fabs(yoriginal(i));
            relcnt = relcnt+1;
        }
    }
    if( relcnt!=0 )
    {
        rep.avgrelerror = rep.avgrelerror/relcnt;
    }
}
Esempio n. 9
0
static void
ReadInMSoutputAndCalculateSummaryStatistics (FILE * pfin){
  msOutputArray *msOutArr;
  msOutput *outPtr;
  struct SumStat **sumStatArr;
  struct SumStat **sumStatArrTemp;

  int initialArrSize;
  int msbayesFormat, taxonID, locusID;

  int maxsites = 1000; /* max number of seg sites, used for size of data mat */
  int nsam, i, howmany, npops, *config;
  char **list, line[MAX_LNSZ], longline[262145], *mutscanline;
  char dum[100];
  tPositionOfSegSites *posit;
  double theta;
  int segsites, count, numTaxonLocusPairs, BasePairs, taxonLocusID;
  int Fst_bool = 0, Qbool = 0;
  int isNumSegSitesConst = 0;	/* 1 with -s, the number of segregating sites
				 *    will be constant in each sample
				 * 0 with -t, varies between samples
				 */

  /* read in first line of output (command line options of msDQH) */
  fgets (line, MAX_LNSZ, pfin);

  if (! (msOutArr = malloc(sizeof(msOutputArray)))) {
    perror ("ERROR: not enough memory in ReadInMSoutput\n");
    exit(EXIT_FAILURE);
  }
  
  /* processing the header information */
  if (strcmp(line, "# BEGIN MSBAYES\n") == 0) { /* process info from msbayes.pl */
    fgets(line, MAX_LNSZ,pfin); /* get next line */
    sscanf(line, "# numTaxonLocusPairs %d numTaxonPairs %d numLoci %d",
	   &(msOutArr->numTaxonLocusPairs), &(msOutArr->numTaxonPairs),
	   &(msOutArr->numLoci));
    /* if taxon:locus matrix is required, we can process here  */

    fgets(line, MAX_LNSZ,pfin); /* get next line */
    msbayesFormat = 1;
    initialArrSize = 500;
  } else {
    msOutArr->numTaxonPairs=msOutArr->numTaxonLocusPairs = 1;
    msOutArr->numLoci = 1;
    msbayesFormat = 0;
    initialArrSize = 1;
  }

  /* allocate memory to msOutArr */
  if (! (msOutArr->dat = (msOutput *)calloc(initialArrSize, sizeof(msOutput)))) {
    perror ("ERROR: not enough memory 2 in ReadInMSoutput\n");
    exit(EXIT_FAILURE);
   }
  msOutArr->numElements = 0;
  msOutArr->allocatedSize = initialArrSize;

   /* go through the array of each msDQH run and get sum stats  */
  if ((sumStatArr = calloc(initialArrSize,
			    sizeof(struct SumStat *))) == NULL) {
    perror("ERROR: No mem in main ");
    exit(EXIT_FAILURE);
  }
				
   /* go through the array of each msDQH run and get sum stats  */
  if ((sumStatArrTemp = calloc(msOutArr->numTaxonLocusPairs,
			    sizeof(struct SumStat *))) == NULL) {
    perror("ERROR: No mem in main ");
    exit(EXIT_FAILURE);
  }
					
  int sumStatCounter = 0;					
  do {
    
    int endOfFile = 0;
    if (msbayesFormat) {
      while (strncmp(line, "# taxonID ", 10) != 0) {
	if (! fgets(line, MAX_LNSZ,pfin)) { /* basically skipping empty line */
	  endOfFile = 1;
	  break;
	}
      }
      if(endOfFile)
	break;
      /* Before msDQH output, numerial IDs for taxon and locus are inserted */
      sscanf(line, "# taxonID %d locusID %d\n", &taxonID, &locusID);
      fgets(line, MAX_LNSZ,pfin);
    } else {
      /* CHECK THIS WELL, NAOKI, SWRS */
      while (BlankCharStringQ(line)) {
	if (! fgets(line,MAX_LNSZ,pfin)) {
	  endOfFile=1;
	  break;
	}
      }
      if (endOfFile)
	break;
      taxonID = locusID = 1;
    }

    /*
     * Get the following variables from the command line options
     * NOTE: this line has to match with system() line of msbayes.pl
     *
     * nsam:      number of total samples
     * howmany:   how many simulations were run
     * THETA:     4 Ne mu used for the simulation
     *            This is removed, and getting this in more prper way
     * BasePairs: sequence length
     * taxonLocusID: sequential ID for each taxon:locus pair
                     (1 to # of taxon:locus pairs)
     * numTaxonLocusPairs: total number of taxon:locus pairs per 1 set of sims.
     */
    numTaxonLocusPairs = taxonLocusID = BasePairs = -1;
    sscanf (line,
	    " %s %s %d %d %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %u %s %s %s %d %s %s %s %u ",
	    dum, dum, &nsam, &howmany, dum, dum, dum, dum, dum, dum, dum,
	    dum, dum, dum, dum, dum, dum, dum, dum, dum, dum, dum, dum, dum,
	    dum, dum, dum, dum, dum, dum, dum, dum, dum, dum, dum, dum, dum,
	    dum, dum, dum, dum, dum, dum, dum, dum, &BasePairs, dum, dum,
	    dum, &taxonLocusID, dum, dum, dum, &numTaxonLocusPairs);
	
    if(!msbayesFormat) {
      msOutArr->numTaxonPairs=msOutArr->numTaxonLocusPairs = 1;
      taxonLocusID=numTaxonLocusPairs=1;
    }
    /*
     * of course, I have to put a prior generator in the actual sample
     * generator for theta and tau down below for each count
     */

    /* Find the theta or number of segregating sites from -t or -s */
    mutscanline = strstr (line, "-s");
    if (mutscanline != NULL)
      {
	/* number of segregating sites is constant */
	sscanf (mutscanline, " %d", &segsites);
	isNumSegSitesConst = 1;
	theta = thetaW (nsam, segsites);
      }
    else
      {
	mutscanline = strstr (line, "-t");
	if (mutscanline != NULL)
	  sscanf (mutscanline, "-t %s", dum);
	else
	  {
	    fprintf (stderr, "\nmutscanline problem -s or -t not found \n");
	    exit (1);
	  }
	theta = atof (dum);

	/* -Q will tell transition transversion rate ratio and base freqs */
	if ((mutscanline = strstr (line, "-Q")) != NULL) {
	  Qbool = 1;
	}
      }

    mutscanline = strstr(line, "-r");
    if(mutscanline != NULL) {
      sscanf(mutscanline, "-r %s %d", dum, &BasePairs);
      // dum contains recomb rate
    }

    /*
     * config become an array with npops elements,
     * it contains subpop sample sizes
     */
    npops = FindNumPopsAndSubpopSampleSizes (line, &config);

    if (npops == 1) {
      config[0] = nsam;
    }

    /* Checking if 0 < config[i] < nsam for all i */
    if ((npops > 1) && (multiplepopssampledfrom (nsam, npops, config)))
      Fst_bool = 1;

    /* prepare the storage for segregating sites data */
    if (isNumSegSitesConst)
      maxsites = segsites;

    list = cmatrix (nsam, maxsites + 1);

    posit =
      (tPositionOfSegSites *) calloc (maxsites, sizeof (tPositionOfSegSites));

    if (list == NULL)
      {
	    fprintf (stderr, "No mem for segregating sites data, couldn't allocated memory for list\n");
	    exit (EXIT_FAILURE);
      }

	if (posit == NULL)
      {
	    fprintf (stderr, "No mem for segregating sites data, couldn't allocated memory for posit\n");
	    exit (EXIT_FAILURE);
      }

    /* Start to process the data */
    count = 0;
    while (howmany - count++)
      {
	  
	/* The line after "//" is the beginning of simulation data */
	while (strcmp (line, "//\n") != 0)
	  fgets (line, MAX_LNSZ, pfin);

	/* Number of segregating sites line */
	fgets (line, MAX_LNSZ, pfin);
	if (!isNumSegSitesConst)
	  {
	    sscanf (line, "segsites: %d\n", &segsites);

        /* JRO: this should be a while loop and double the size of the data
         * matrix until large enough;  updating accordingly. */
	    /* if (segsites >= maxsites)	/1* readjust the size of data matrix *1/ */
	    while (segsites >= maxsites)	/* readjust the size of data matrix */
	      {
		/* maxsites = segsites + 10;	/1* extra 10 elements *1/ */
		maxsites *= 2;	/* extra 10 elements */
		posit = (tPositionOfSegSites *)
		  realloc (posit, maxsites * sizeof (tPositionOfSegSites));
		/*printf("PRE %d %d %d\n", segsites, maxsites, nsam); */
		if (posit == NULL || biggerlist (nsam, maxsites, list) != 0)
		  {
		    fprintf (stderr,
			     "Not enough memory for reallocating char matrix\n");
		    exit (EXIT_FAILURE);
		  }
	      }
	  }

	/* get rid of base frequency line */
	if (Qbool)
	  {
	    fgets (line, MAX_LNSZ, pfin);
	    sscanf (line, "freqACGT: %s %s %s %s", dum, dum, dum, dum);
	  }

	if (segsites > 0)
	  {
	    /* read in position of segregating sites */
	    fgets (longline, 262144, pfin);

	    /* posit array initialized */
	    ReadInPositionOfSegSites (longline, posit, segsites);

	    /* list[][] get initialized with character states */
	    for (i = 0; i < nsam; i++)
	      fscanf (pfin, " %s", list[i]);

	  }
	/* what do we do if segsites = 0?, Naoki */

	/* insert the data into the array */
	msOutArr->numElements ++;
	if (msOutArr->numElements > msOutArr->allocatedSize) {
	  /* reallocate the memory */
	  msOutArr->allocatedSize += 1000;
	  msOutArr->dat = realloc(msOutArr->dat,
				  sizeof(msOutput) * (msOutArr->allocatedSize));
	  if(msOutArr->dat ==NULL) {
	    perror("Realloc of msOutArr->dat failed\n");
	    exit(EXIT_FAILURE);
	  }
	  
	  sumStatArr = realloc(sumStatArr,
				  msOutArr->allocatedSize * sizeof(struct SumStat *));
	  if(sumStatArr ==NULL) {
	    perror("Realloc of sumStatArr failed\n");
	    exit(EXIT_FAILURE);
	  }
	}
	outPtr = & msOutArr->dat[msOutArr->numElements-1];

	outPtr->nsam = nsam;
	outPtr->segsites = segsites;
	outPtr->seqDat = list;
	outPtr->nsub = gNadv;
	outPtr->npops = npops;
	outPtr->n = config;
	outPtr->theta = theta;
	outPtr->isNumSegSitesFixed = isNumSegSitesConst;
	outPtr->Qbool = Qbool;
	outPtr->Fst_bool = Fst_bool;
	outPtr->replicateID = count;
	outPtr->numReplicates = howmany;
	outPtr->taxonID = taxonID;
	outPtr->locusID = locusID;
	outPtr->taxonLocusID = taxonLocusID;
	outPtr->NumTaxonLocusPairs = numTaxonLocusPairs;
	outPtr->BasePairs = BasePairs;
	
	sumStatArr[sumStatCounter++] = CalcSumStats (outPtr);

	}
	 	 
    freeCMatrix (nsam, list);
	free(posit);
	
  } while (fgets(line, MAX_LNSZ,pfin));

 
  int k, j;
  for (k = 0; k < msOutArr->numElements; k++) {
    j = k % msOutArr->numTaxonLocusPairs;
    sumStatArrTemp[j] = sumStatArr[k];

    if (j == msOutArr->numTaxonLocusPairs - 1) {
      // we got sumStats for 1 set 
      PrintSumStatsArray(sumStatArrTemp, msOutArr->numTaxonLocusPairs, msOutArr->numLoci, msOutArr->numTaxonPairs);
    }
  }
  
  free(msOutArr->dat); 
  free(msOutArr);
  free(sumStatArr);
 
}
Esempio n. 10
0
void read_input(char input_file[50]){

    int pop, ind, locus;
    int ret;
    FILE* fp;
    int trash;

    fp = fopen(input_file,"r");
    if( fp ) printf( "\nOpening input file %s \n", input_file );
    else{
        printf( "Cannot open file %s \n", input_file );
    }

    fprintf(flog,"\nInput file contains:\n");

    fscanf( fp, "%d %d\n", &number_of_pop, &number_of_loci );

    if (number_of_pop==1) printf("\nFile contains 1 population\n");
    else printf("\nFile contains %d populations\n",number_of_pop);

    if (number_of_loci==1) printf("\nIndividuals are typed for 1 locus\n\n");
    else printf("\nIndividuals are typed for %d loci\n\n",number_of_loci);

    fprintf(flog,"%d populations\nindividuals typed for %d loci\n",number_of_pop,number_of_loci);
    fflush(flog);

    pop_size=ivector(0,number_of_pop);
    num_of_pairs=ivector(0,number_of_pop);
    pop_name=cmatrix(0,number_of_pop,0,50);
    p=dvector(0,number_of_loci);
    mut=dvector(0,number_of_loci);
    mut95=dmatrix(0,number_of_loci,0,2);

    total_sample_size=0;
    total_num_of_pairs=0;
    max_num_of_pairs=0;
    for (pop=0 ; pop<number_of_pop ; pop++){
        //printf("pop %d\n",pop);
        fscanf( fp, "%d %s \n", &pop_size[pop], pop_name[pop]);

        num_of_pairs[pop] = pop_size[pop] * ( pop_size[pop]-1 ) / 2;

        printf("Population %d is called \"%s\" and has sample size of %d individuals\n", pop+1, pop_name[pop], pop_size[pop]);
        printf("(this makes %d pairwise haplotypes comparisons)\n\n", num_of_pairs[pop]);

        fprintf(flog, "Population \"%s\" has %d individuals (%d pairwise comparisons)\n", pop_name[pop],pop_size[pop],num_of_pairs[pop]);
        fflush(flog);

        total_sample_size += pop_size[pop];
        total_num_of_pairs += num_of_pairs[pop];

        if(num_of_pairs[pop]>max_num_of_pairs){
            max_num_of_pairs=num_of_pairs[pop];
        }

        for (ind=0 ; ind<pop_size[pop] ; ind++){
            for (locus=0 ; locus<number_of_loci ; locus++) fscanf(fp,"%d",&trash);
            fscanf(fp,"\n");
        }
    }
    fprintf(flog, "\n");

    printf("max_num_of_pairs=%d\n",max_num_of_pairs);
    if(max_num_of_pairs>100000){
        max_num_of_pairs=100000;
        printf("max_num_of_pairs=%d\n",max_num_of_pairs);
    }




    printf("\nTotal sample size is %d individuals\n\n", total_sample_size);

    ret = fclose(fp);

    if( ret==0 )
        printf( "File %s closed\n", input_file );
    else{
        printf( "\nCannot close file %s \n", input_file );
    }
}
Esempio n. 11
0
File: io.c Progetto: Rewarp/phylocom
// ----------- ReadTraits -------------
traits ReadTraits(char traitfile[50])
{
  struct traits C;
  FILE *Cread;
  char line[MAXTRAITLINE];
  int i, j, waitingforspace;
  int nline, words;
  int extra = 0;
  char word[(int) (MAXTRAITLINE / 2)][(int) (MAXTRAITLINE / 2)];
  char tmp[MAXTAXONLENGTH + 6];
  int lineending;

  C.ntaxa = 0;
  C.ntraits = 0;

  // pre-pre-read
  lineending = whatnewline(traitfile);

  // preread:
  if ((Cread = fopen(traitfile, "r")) == NULL)
    {
      printf("Cannot open traits file\n");
      exit(0);
    }
  nline = 0;
  words= 0;

  while (myfgets(line, 1000, Cread, lineending) != NULL)
    {
      // parse the first line
      // should be able to do this with iterative scanf, but I couldn't
      // work it out - this is clunky!
      if (nline < 2)
        {
          waitingforspace = 1;
          strcpy(word[0], "");
          words= 0;

          for (i = 0; i < strlen(line); i++)
            {
              if (isspace(line[i]) && (waitingforspace))
                {
                  words++;
                  strcpy(word[words], "");
                  waitingforspace = 0;
                }
              else if (isspace(line[i])) // 2nd+ space in a series of white
                {
                }
              else
                {
                  strncat(word[words] , &line[i], 1);
                  waitingforspace = 1;
                }
            }

          if(nline==1)
            {
              // to see if there is a names line
              if (strcmp(word[0], "name") ==0) extra = 1;
            }
        }

      nline++;
    }
  fclose(Cread);

  C.ntraits = words-1;
  C.type = ivector(0, C.ntraits-1);
  C.ntaxa = nline -1 -extra;
  C.taxon = cmatrix(0 , C.ntaxa-1, 0 , MAXTAXONLENGTH);
  C.tr = matrix(0, C.ntaxa-1, 0, C.ntraits-1);
  C.trname = cmatrix(0, C.ntraits-1, 0, MAXTAXONLENGTH);
  strcpy(C.trname[0], "nonames");

  // printf("traits: %d\ttaxa: %d\n", C.ntraits, C.ntaxa);

  // real read
  Cread = fopen(traitfile, "r");
  nline = 0;

  while (myfgets(line, 1000, Cread, lineending) != NULL)
    {
      words= 0;
      strcpy(word[0], "");
      waitingforspace = 1;

      // parse into words, every line
      for (i = 0; i < strlen(line); i++)
        {
          if (isspace(line[i]) && (waitingforspace))
            {
              words++;
              strcpy(word[words], "");
              waitingforspace = 0;
            }
          else if (isspace(line[i]))
            {
            }
          else
            {
              strncat(word[words] , &line[i], 1);
              waitingforspace = 1;
            }
        }

      if(nline==0)
        {
          // now we know how many traits

          for (j = 1; j< words; j++)
            {
              C.type[j-1] = atoi(word[j]);
            }
        }

      else // lines 2+
        {
          // check (again) for trait names
          if ((nline == 1) && (extra == 1))
            {
              for (j = 1; j < words; j++)
                {
                  strcpy(C.trname[j-1], word[j]);
                }
            }
          else // either line 3+ or line 2 with no trait names
            {
              // assign taxon name
              strcpy(C.taxon[(nline-1)-extra], word[0]);
              for (j = 1; j < words; j++)
                {
                  C.tr[nline-1-extra][j-1] = (float) atof(word[j]);
                  // printf("%s\t%d\t%f\n", C.taxon[nline-1], j-1, (float) atof(word[j]));
                }
            }
        }

      nline++;
    }

  fclose(Cread);

  // assign taxon names if there are none
  if (extra != 1) // if no trait names given
    {
      for (j = 0; j < C.ntraits; j++)
        {
          sprintf(tmp, "trait_%d", j+1);
          strcpy(C.trname[j], tmp);
        }
    }

  for (i = 0; i < C.ntraits; i++)
    {
      if (C.type[i] == 1)
        {
          printf("Char type = 1; multistate characters not implemented yet\n");
          exit(EXIT_FAILURE);
        }
    }

  return C;

}
Esempio n. 12
0
File: io.c Progetto: Rewarp/phylocom
// ---------------- ReadSample ---------------------------
sample ReadSample(char samplefile[50])
{
  struct sample InSample;

  char line[500];
  int abundI;
  char preAbund[50];
  int i, j, z, found;
  char plotname[MAXPLOTLENGTH+1];
  char lastplot[MAXPLOTLENGTH+1];
  char taxaI[MAXTAXONLENGTH+1];
  int lineending;

  strcpy(lastplot, "");
  i = 0;
  InSample.nrec = 0;
  InSample.nsamples = 0;
  InSample.maxrec = 0;
  InSample.ntaxa = 0;
  InSample.totabund = 0;

  // pre-pre-read
  lineending = whatnewline(samplefile);

  // preread:
  // READ PROPERLY
  if ((Ft = fopen(samplefile, "r")) == NULL)
    {
      printf("Cannot open sample file\n");
      exit(0);
    }

  while (myfgets(line, 500, Ft, lineending) != NULL)
    // OK to lv length undynamic
    {
      sscanf(line, "%s %s %s", plotname, preAbund, taxaI); // string
      // Test for non-integer abundances:
      z = 0;
      while (preAbund[z] != '\0')
        {
          // printf("%d %d\n", z, preAbund[z]);
          if (!isdigit(preAbund[z]))
            {
              fprintf(stderr,"Error: sample abundances must be integers\n");
              exit(1);
            }
          z++;
        }

      InSample.nrec++;

      // if a new plot:
      if (strcmp(lastplot, plotname) != 0)
        {
          InSample.nsamples ++;
          i = 0;
          strcpy(lastplot, plotname);
        }
      i++;
      if (InSample.maxrec < i) InSample.maxrec = i;

    }
  fclose(Ft);

  //printf("nrec = %d; nsamples = %d; maxrec = %d\n", InSample.nrec, InSample.nsamples, InSample.maxrec);

  // Dimension structure

  InSample.srec = ivector(0, InSample.nsamples - 1);
  InSample.irec = ivector(0, InSample.nsamples - 1);
  InSample.id =   imatrix(0, InSample.nsamples - 1, 0, InSample.maxrec);
  InSample.abund =   imatrix(0, InSample.nsamples - 1, 0, InSample.maxrec);
  InSample.pabund =   matrix(0, InSample.nsamples - 1, 0, InSample.maxrec);
  InSample.taxa  =   cmatrix(0, InSample.nrec-1, 0, MAXTAXONLENGTH);
  InSample.pname  = cmatrix(0, InSample.nsamples - 1, 0, MAXPLOTLENGTH);

  // Read file for dynamic structure
  strcpy(lastplot, "");
  i = 0;
  InSample.nsamples = 0; // recycling as counter

  Ft = fopen(SampleFile, "r");
  while (myfgets(line, 500, Ft, lineending) != NULL)
    {
      sscanf(line, "%s %d %s", plotname, &abundI, taxaI);

      // do this on the first line of each new plot
      if (strcmp(lastplot, plotname) != 0)
        {
          strcpy(InSample.pname[InSample.nsamples], plotname);
          InSample.nsamples ++;
          i = 0;
          strcpy(lastplot, plotname);
          InSample.srec[InSample.nsamples-1]=0;
        }
      // recs per sample counter
      InSample.srec[InSample.nsamples-1]++;

      // set abundance
      InSample.abund[InSample.nsamples-1][i] = abundI;

      // set taxon
      // all other cases than first
      found = 0;
      for (j = 0; j < InSample.ntaxa; j++)
        {
          if (strcmp(InSample.taxa[j], taxaI) == 0)
            {
              InSample.id[InSample.nsamples-1][i] = j;
              found = 1;
              break;
            }
        }
      // new taxon found - executed on first line
      if(found == 0)
        {
          strcpy(InSample.taxa[InSample.ntaxa], taxaI);
          InSample.id[InSample.nsamples-1][i] = InSample.ntaxa;
          InSample.ntaxa++;
        }

      i++;
    }
  fclose(Ft);

  //Calculate sample and species total abundances and frequency
  InSample.sppabund = lvector(0, InSample.ntaxa - 1);
  InSample.psppabund = vector(0, InSample.ntaxa -1 );
  InSample.sppfreq = lvector(0, InSample.ntaxa - 1);
  InSample.psppfreq = vector(0, InSample.ntaxa -1 );

  // clear by sample, insample
  for (i = 0; i < InSample.nsamples; i++)
    {
      InSample.irec[i] = 0;
      for (j = 0; j < InSample.srec[i];j++)
        {
          InSample.irec[i] += InSample.abund[i][j];
          InSample.sppabund[InSample.id[i][j]] = 0;
        }
    }

  // clear by taxonNo in all samples
  for (i = 0; i < InSample.ntaxa; i++)
    {
      InSample.sppfreq[i] = 0;
    }

  // calculate
  for (i = 0; i < InSample.nsamples; i++)
    {
      for (j = 0; j < InSample.srec[i];j++)
        {
          InSample.pabund[i][j] = (float) InSample.abund[i][j] / (float) InSample.irec[i];
          InSample.sppabund[InSample.id[i][j]] += InSample.abund[i][j];
          InSample.totabund += InSample.abund[i][j];
          InSample.sppfreq[InSample.id[i][j]]++;
          // printf("%d %d %d %s %d\n", i, j, InSample.id[i][j], InSample.taxa[InSample.id[i][j]], (int) InSample.sppabund[InSample.id[i][j]]);
        }
    }

  for (i = 0; i < InSample.ntaxa; i++)
    {
      InSample.psppabund[i] = (float) InSample.sppabund[i] / (float) InSample.totabund;
      InSample.psppfreq[i] = (float) InSample.sppfreq[i] / (float) InSample.nsamples;
    }
  return InSample;
}
Esempio n. 13
0
File: io.c Progetto: Rewarp/phylocom
// -------------- WriteNexus ------------------
void WriteNexus(phylo P[], int ntree, sample S, int nsamp, traits T, int ntrf)
{
  // Mesquite style!
  time_t rawtime;
  int i, j, q, k, x, pass, present;
  int makedisc, makecont;
  float abnd;
  int nterm = 0;
  phylo WN[ntree];
  char tmp[MAXTAXONLENGTH+10];

  for (i = 0; i < ntree; i++)
    {
      WN[i] = P[i]; // inefficient to make copy so much, but need to
      // to create third dimension of taxon array

      // reassign the pointer to a new space - free this!
      WN[i].taxon = cmatrix(0, P[0].nnodes-1, 0, MAXTAXONLENGTH+10);
    }

  // determine number of terminal taxa - assume all trees contain same taxa
  for (i = 0; i < P[0].nnodes; i++)
    {
      if (P[0].noat[i] == 0) nterm++;
    }

  time ( &rawtime );
  strncpy(tmp , ctime(&rawtime), 24);

  printf("#NEXUS\n[output from phylocom, written %s]\n\n", tmp );
  printf("BEGIN TAXA;\n");
  if (TreeView == 0) printf("TITLE Phylocom_Phylogeny_Taxa;\n"); // Needed for correct Mesquite grammar, but V1.1 busted! Will not read interior names correctly.
  printf("\tDIMENSIONS NTAX=%d;\n\tTAXLABELS\n\t", nterm);

  for (i = 0; i < P[0].nnodes; i++)
    {
      if (P[0].noat[i] == 0) printf("   %s", P[0].taxon[i]);
    }
  printf(";\nEND;\n\n");

  if (nsamp > 0)
    {
      // Characters
      printf("BEGIN CHARACTERS;\n\tTITLE  Phylocom_Presence_in_Sample;\n\tDIMENSIONS NCHAR=%d;\n\tFORMAT DATATYPE = STANDARD GAP = - MISSING = ? SYMBOLS = \"  0 1\";\n", S.nsamples);

      printf("\tCHARSTATELABELS\n\t\t");
      printf("%d %s", 1, S.pname[0]);
      for (i = 1; i < S.nsamples; i++)
        {
          printf(", %d %s", i+1, S.pname[i]);
        }
      printf(";\n\tMATRIX\n");
      for (i = 0; i < P[0].nnodes; i++)
        {
          if (P[0].noat[i] == 0)
            {
              printf("\t%s\t" , P[0].taxon[i]);
              for (j = 0; j < S.nsamples; j++)
                {
                  present = 0;
                  for (k = 0; k < S.srec[j]; k++)
                    {
                      if (strcmp(S.taxa[S.id[j][k]], P[0].taxon[i]) == 0) present = 1;

                    }
                  printf("%d", present);
                }
              printf("\n");
            }
        }
      printf(";\nEND;\n\n");

      // Abundances as continuous
      printf("BEGIN CHARACTERS;\n\tTITLE  Phylocom_Abundance_in_Sample;\n\tDIMENSIONS NCHAR=%d;\n\tFORMAT DATATYPE = CONTINUOUS GAP = - MISSING = ?;\n", S.nsamples);

      printf("\tCHARSTATELABELS\n\t\t");
      printf("%d %s", 1, S.pname[0]);
      for (i = 1; i < S.nsamples; i++)
        {
          printf(", %d %s", i+1, S.pname[i]);
        }
      printf(";\n\tMATRIX\n");
      for (i = 0; i < P[0].nnodes; i++)
        {
          if (P[0].noat[i] == 0)
            {
              printf("\t%s\t" , P[0].taxon[i]);
              for (j = 0; j < S.nsamples; j++)
                {
                  abnd = 0.0;
                  for (k = 0; k < S.srec[j]; k++)
                    {
                      if (strcmp(S.taxa[S.id[j][k]], P[0].taxon[i]) == 0) abnd = (float) S.abund[j][k];

                    }
                  printf("  %f", abnd);
                }
              printf("\n");
            }
        }
      printf(";\nEND;\n\n");
    }

  if (ntrf > 0)
    {
      makedisc = 0;
      makecont = 0;
      pass = 0;

      for (i = 0; i < T.ntraits; i++)
        {
          if ((T.type[i] == 0) || (T.type[i] == 1) || (T.type[i] == 2)) makedisc++;

          if (T.type[i] == 3) makecont++;
        }
      if (makedisc > 0)
        {
          // Discrete Traits
          printf("BEGIN CHARACTERS;\n\tTITLE  Phylocom_Discrete_Traits;\n\tDIMENSIONS NCHAR=%d;\n\tFORMAT DATATYPE = STANDARD GAP = - MISSING = ? SYMBOLS = \"  0 1 2 3 4 5 6 7 8 9\";\n", makedisc);
          x = 1;
          printf("\tCHARSTATELABELS\n\t\t");
          // first one
          for (i = 0; i < T.ntraits; i++)
            {
              if ((T.type[i] == 0) || (T.type[i] == 1) || (T.type[i] == 2))
                {
                  printf("%d %s", x, T.trname[i]);
                  x++;
                  pass = i;
                  break;
                }
            }
          for (i = pass+1; i < T.ntraits; i++)
            {
              if ((T.type[i] == 0) || (T.type[i] == 1) || (T.type[i] == 2))
                {
                  printf(", %d %s", x, T.trname[i]);
                  x++;
                }
            }

          printf(";\n\tMATRIX\n");
          for (i = 0; i < T.ntaxa; i++)
            {
              printf("\t%s\t" , T.taxon[i]);
              for (j = 0; j < T.ntraits; j++)
                {
                  if ((T.type[j] == 0) || (T.type[j] == 1) || (T.type[j] == 2))
                    {
                      printf("%d", (int) T.tr[i][j]);
                    }
                }
              printf("\n");
            }
          printf(";\nEND;\n\n");
        }


      if (makecont > 0)
        {
          // Continous Traits
          printf("BEGIN CHARACTERS;\n\tTITLE  Phylocom_Continuous_Traits;\n\tDIMENSIONS NCHAR=%d;\n\tFORMAT DATATYPE = CONTINUOUS GAP = - MISSING = ?;\n", makecont);
          x=1;
          printf("\tCHARSTATELABELS\n\t\t");
          // first one
          for (i = 0; i < T.ntraits; i++)
            {
              if (T.type[i] == 3)
                {
                  printf("%d %s", x, T.trname[i]);
                  x++;
                  pass = i;
                  break;
                }
            }
          for (i = pass+1; i < T.ntraits; i++)
            {
              if (T.type[i] == 3)
                {
                  printf(", %d %s", x, T.trname[i]);
                  x++;
                }
            }

          printf(";\n\tMATRIX\n");
          for (i = 0; i < T.ntaxa; i++)
            {
              printf("\t%s\t" , T.taxon[i]);
              for (j = 0; j < T.ntraits; j++)
                {
                  if (T.type[j] == 3)
                    {
                      printf("  %f", T.tr[i][j]);
                    }
                }
              printf("\n");
            }
          printf(";\nEND;\n\n");
        }
    }

  printf("BEGIN TREES;\n");
  if (TreeView == 0) printf("\tTITLE Phylocom_Phylogenies;\n\tLINK Taxa = Phylocom_Phylogeny_Taxa;\n"); // Ditto!
  printf("\tTRANSLATE\n\t");


  for (q = 0; q < ntree; q++)
    {
      j = 0;
      for (i = 0; i < P[0].nnodes; i++)
        {
          if (P[0].noat[i] == 0)
            {
              j++;
              if (q == 0)
                {
                  if (i == P[0].nnodes-1) printf(" %d %s;\n", j, P[0].taxon[i]);
                  else printf(" %d %s,", j, P[0].taxon[i]);
                }
              sprintf(tmp, "%d", j);
              strcpy(WN[q].taxon[i], tmp);
            }
          else if ((strcmp(P[q].taxon[i], "") != 0) && \
                   (strcmp(P[q].taxon[i], ".") != 0))
            {
              strcpy(WN[q].taxon[i], "'");
              strcat(WN[q].taxon[i], P[q].taxon[i]);
              strcat(WN[q].taxon[i], "'");
            }
          else strcpy(WN[q].taxon[i], "");
          // test if (strcmp(WN[q].notes[i], "") != 0) printf("%s\n", WN[q].notes[i]);
        }
    }
  for (q = 0; q < ntree; q++)
    {

      printf("\tTREE %s = ", WN[q].phyname);
      Fy2newRec(WN[q]);
      free_cmatrix(WN[q].taxon, 0, P[0].nnodes-1, 0, MAXTAXONLENGTH+10);
    }
  printf("END;\n");

  printf("\nBEGIN PHYLOCOM;\n\tTITLE Phylocom_Main;\n\tDATA\n");
  for (i = 0; i < S.nsamples; i++)
    {
      for (j = 0; j < S.srec[i]; j++)
        {
          printf("%s\t%d\t%s\n", S.pname[i], S.abund[i][j], S.taxa[S.id[i][j]]);
        }
    }
  printf(";\nEND;\n");

  //free_cmatrix(WN.taxon, 0, P.nnodes-1, 0, MAXTAXONLENGTH+10);

}
Esempio n. 14
0
main (int argc, char *argv[]) {

	int i, j, **seqs, **nall, ord=1, ns, **pij, lkf=0, npt=0, pnew=0, anc=0;
	int tcat=1, rcat=0, verb=1, miss=0, *flocs;

	int sw_flag=0, moment_flag=0, rmin_flag=0, sim_flag=0, test_flag=0;
	char fname[MAXNAME+1], **seqnames;
	long seed=-setseed();
	extern int sizeofpset;
	double *locs;

	double **lkmat, *lkres;
	FILE *ifp=NULL, *ifp2=NULL, *ifp3=NULL, *tfp;
	struct site_type **pset;
	struct data_sum *data;
	int ask_questions = 1;
	char *in_str;

	print_help(argc, argv);
	idum = &seed;
	data = malloc((size_t) sizeof(struct data_sum));
	data->exact = 0;
	strcpy(data->prefix, "");

	for(i = 0; i < argc; i++)
	{
		if(*argv[i] == '-')
		{ 
			in_str = argv[i];
			ask_questions = 0;
			if(strcmp(in_str, "-seq") == 0) ifp = fopen(argv[i+1], "r");		
			if(strcmp(in_str, "-loc") == 0) ifp2 = fopen(argv[i+1], "r");
			if(strcmp(in_str, "-lk") == 0) 
			{
				lkf = 1;
				ifp3 = fopen(argv[i+1], "r");
			}
			if(strcmp(in_str, "-exact") == 0) data->exact = 1;
			if(strcmp(in_str, "-concise") == 0) verb=0;
			if(strcmp(in_str, "-window") == 0) sw_flag=1;
			if(strcmp(in_str, "-moment") == 0) moment_flag=1;
			if(strcmp(in_str, "-simulate") == 0) sim_flag=1;
			if(strcmp(in_str, "-rmin_flag") == 0) rmin_flag=2;
			if(strcmp(in_str, "-test") == 0) test_flag=1;
			if(strcmp(in_str, "-prefix") == 0) strcpy(data->prefix, argv[i+1]);
		}
	}
	if (ifp == NULL) 
	{
		printf("\nCould not find seqs file in command line.\n");
		printf("\nInput filename for seqs:\n");
		scanf("%s", &fname);
		ifp = fopen(fname, "r");
	}
	if (ifp == NULL) nrerror("Error in opening sequence file");

	
	fscanf(ifp,"%i%i%i", &data->nseq, &data->lseq, &data->hd);
	if ((data->nseq < 2) || (data->lseq < 2)) {printf("\n\nInsufficient data for analysis (n > 1, L > 1) \n\n"); exit(1);}
	if (data->nseq > SEQ_MAX) {printf("\n\nMore than max no. sequences: Using first %i for analysis\n\n", SEQ_MAX); data->nseq=SEQ_MAX;}
	printf("\nAnalysing %i (n=%i) sequences of length %i seg sites\n", data->nseq, data->hd, data->lseq);
	seqs = imatrix(1, data->nseq, 1, data->lseq);
    seqnames = cmatrix(1, data->nseq+11, 1, MAXNAME+11);
	if (read_fasta(seqs, ifp, data->nseq, data->lseq, seqnames)) printf("\nSequences read succesfully\n");
    fclose(ifp);

	nall = imatrix(1, data->lseq, 1, 6);
	allele_count(seqs, data->nseq, data->lseq, nall,1, data->hd, data->prefix);

	/*Store lnfac values in array for speed of computation*/

	lnfac_array = (double *) malloc((size_t) ((int) (data->nseq+2)*(data->hd))*sizeof(double));

	lnfac_array[0]=lnfac_array[1]=0;

	for (j=2;j<=((int) data->nseq*(data->hd));j++) lnfac_array[j]=(double) lnfac_array[j-1]+log(j);


	/*Open file with location of seg sites and read in data*/	
	if (ifp2 == NULL) 
	{
		printf("\nCould not find locs file in command line.\n");
		printf("\nInput name of file containing location of seg sites\n\n");
		scanf("%s", &fname);
		ifp2 = fopen(fname, "r");
	}

	if (ifp2 == NULL) nrerror("Cannot open loc file");
	fscanf(ifp2, "%i %lf %c", &ns, &data->tlseq, &data->lc);
	if (ns != data->lseq) nrerror("Lseq and Locs disagree");
	if ((data->lc != 'C')&&(data->lc != 'L')) nrerror("Must input linear(L)/conversion(C)");
	if (data->lc == 'C') {
	  data->avc=0;
	  while (data->avc <= 0) {
	    printf("\n\nInput average tract length for conversion model: ");scanf("%lf", &(data->avc));
	  }
	}

	locs = dvector(1, data->lseq);
	flocs = ivector(1, data->lseq); /*Array to use when simulating data*/


	for (i=1; i<=data->lseq; i++) {
		fscanf(ifp2, "%lf", &locs[i]); 
		if ((locs[i]==0)||(locs[i]>data->tlseq)) {printf("\n\nError in Loc file\n\n%lf\n", data->tlseq); exit(1);}
		if (i>1 && locs[i]<=locs[i-1]) nrerror("Error in locs file: SNPs must be montonically increasing");
	}
	printf("\nLocation of seg sites\n\n");
	for (i=1; i<=data->lseq; i++) printf("%3i   %4.2lf\n", i, locs[i]);
	fclose(ifp2);

	/*Read in likelihood file where needed*/
    if (ask_questions) 
	{
			printf("\n\nUse existing likelihood file? (yes=1, no=0):");
			scanf("%i", &lkf);  /*lkf is a flag: 1 means use existing likelihood file as starting point*/
			if (lkf) 
			{
				printf("\n\nInput name of likelihood file: ");
				scanf("%s", &fname);
				ifp3 = fopen(fname, "r");
			}
			else 
				data->exact=0;

			if (lkf == 1)
			{
				printf("\n\nIs likelihood file an exact match to data?(no=0/yes=1): ");
				scanf("%i", &data->exact);
			}
	}

	if (lkf && !ifp3) nrerror("Cannot open likelihood file");
	if (!lkf && data->hd==2) nrerror("For diploid data need complete lookup table for sequences");

	/*Store pair-types in pij matrix - classify in pair_spectrum routine*/

	data->w	= data->lseq;  /*Note for this program use all data - pair_int restricts to a smaller window*/
	pij = imatrix((int) 1,(int) data->lseq,(int) 1,(int) data->w);

	for (i=1;i<=data->lseq;i++) for (j=1;j<=data->w;j++) pij[i][j]=0;

	pset = init_pset(pset, lkf, ifp3, &npt, data);  /*Reads in type configurations from likelihood file*/

	printf("\n\n*** Calculating distribution of pair types ***\n\n");
	pset = pair_spectrum(seqs, data, nall, pset, &npt, &pnew, &miss, anc, pij);
	printf("\n\n *** Completed classification of pair types ***\n\n");

	if (data->exact && (pnew || miss)) nrerror("Lookup table is not exact for sequences\n(possibly generated by interval)");
	printf("\n\nOld = %i: New = %i: Missing = %i\n\n", npt,pnew,miss);
	data->ptt = (int) npt+pnew+miss;  /*npt is number from likelihood file, pnew is number new with no missing data, miss is # new with missing data*/
	if (verb) {
		strcpy(fname, data->prefix);
		tfp = fopen(strcat(fname, "type_table.txt"), "w");
		if (!tfp) nrerror("Cannot open type file");
		type_print(pij, data->lseq, data->w,tfp);
		fclose(tfp);
	}
	if (verb) print_pairs(stdout, pset, npt+pnew, data->hd, data->nseq);

	/*Need a complete set for missing data or diploid data - check this*/
	if (!data->exact && (data->hd ==2 || miss)) {
		printf("\n\nMissing data or diploid: checking that likelihood table is exhaustive\n\n");
		check_exhaustive(pset,npt,(data->nseq)*((int) data->hd));
	}
	/*Read parameters and likelihoods from likelihood file - where appropriate*/
	if (lkf) {
		read_pars(ifp3, &tcat, &data->th, &data->rcat, &data->rmax);
		lkmat = dmatrix(1,npt+pnew+miss,1,data->rcat);
		if (lkf) read_lk(ifp3, lkmat, npt, tcat, data->rcat);
	}

	/*If haploid, but novel types, need to calculate new likelihoods and input parameter values*/
	if (data->hd ==1 && pnew) { /*Note can have pnew for diploid data, but this has been checked for already*/
		if (!lkf) {
			data->th=data->rmax=-1.0; data->rcat=0;
			printf("\n\nInput theta per site (suggest Watterson estimate of %.5lf):",(double) data->lseq/(watterson(data->nseq*data->hd)*data->tlseq));
			while (data->th<0.0) scanf("%lf", &data->th);
			printf("\n\nMax 4Ner for grid (suggest 100):");
			while(data->rmax<0.0) scanf("%lf", &data->rmax);
			printf("\n\nNumber of points on grid (suggest 101, min=2):");
			while(data->rcat<2) scanf("%i", &data->rcat);
			lkmat = dmatrix(1,npt+pnew+miss,1,data->rcat);
		}
		lk_est(pset,npt,pnew,lkmat,data->th,data->rcat,data->rmax);
		data->exact=1;
	}

	/*Sum over missing data or resolve genotypes and sum over missing data+configurations*/
	else if (miss && data->hd==1) {  
		printf("\n\n*** Calculating likelihoods for missing data ***\n\n");
		for (i=1;i<=miss;i++) {
			lk_miss(pset[npt+i],lkmat[npt+i],lkmat,data);
			printf("\rType %i", i);
		}

		printf("  ...Done!\n\n");
	}


	/*Sum over resolutions for diploid data*/
	else if (data->hd==2 && !data->exact) {
	  printf("\n\n*** Resolving diploid data: %i ***\n\n",pnew+miss);
	  lkres = dvector(1,data->rcat);
	  for (i=1;i<=pnew+miss;i++) {
	    lk_resolve(lkres,pset[npt+i],lkmat[npt+i],lkmat,data);
	    printf("\rType %i", i); 
	  }
	  free_dvector(lkres,1,data->rcat); 

	  printf("  ...Done!\n\n");
	}

	/*If new likelihood generated can output likelihood file for future analyses*/
	if (verb) print_lks(pset, data, npt+pnew+miss, lkmat);


	/*Basic analysis - estimation of 4Ner asuming constant rate*/

	data->rme=data->rmax; data->rce=data->rcat;
	if (1) {
		printf("\n\nDo you wish to change grid over which to estimate likelihoods for (default = %i points, 4Ner 0 - %.1lf) (1/0) :",data->rcat,data->rmax);
		scanf("%i", &lkf);
		if (lkf) {
			data->rme=-10; data->rce=0;
			printf("\n\nMax 4Ner for estimation           : ");
			while (data->rme < 0.0) scanf("%lf", &data->rme);  
       		printf("\n\nNumber of classes to estimate for: ");
       		while (data->rce < 1) scanf("%i", &data->rce);
		}
	}
	data->lksurf = dmatrix(1,data->rce,1,2);
	lk_surf(pset, pij, data, lkmat, data->th, locs, 1);


	/*Print marginal likelihood ratio test statistics for each pair of sites*/
	printf("\n\nCalculating fits\n\n");
	fit_pwlk(data,pij,locs,lkmat,verb);

	/*Sliding windows version*/
	if (1) {
		printf("\n\nDo you wish to carry out a sliding windows analysis? (yes=1/no=0):");
		scanf("%i", &sw_flag);
	}
	if (sw_flag) lk_win(pset,pij,data,lkmat,locs,nall);

	/*Nonparametric estimation of recombination rate*/
	if (1) {
		printf("\n\nPrint out table of Rmin values?\n(0=No, 1=Total only, 2=Full table):");
		scanf("%i", &rmin_flag);
	}

	if (rmin_flag) {
		rmin(data, pset, pij, locs, lkf-1);
		printf("\n\nLower bound on Rmin = %i\n\n",data->rmin);
	}

	/*Estimate 4Ner by Wakeley 1997 method*/
	if (1) {
		printf("\n\nEstimate 4Ner by moment method? (yes=1, no=0)");
		scanf("%i", &moment_flag);
	}

	if (moment_flag) wakeley_est(data, seqs, locs);

	/*Recombination tests - only available for haploid data!*/
	if (data->hd==1) {
		if (1) {
			printf("\n\nDo you wish to test for recombination? (yes=1, no=0): ");
			scanf("%i", &test_flag);
		}
		if (test_flag) {
			rec_test(data, pij, locs, lkmat, pset, npt+pnew+miss);
		}
	}

	/*Conditional simulation - only available for haploid data with a complete lk file*/
	if (data->hd==1 && !(data->exact)) {

		if (1) {
	  printf("\n\nDo you wish to test constant-rate model and estimate sampling distribution by simulation? (yes=1/no=0): ");
	  scanf("%i", &test_flag);
		}
	  if (test_flag) {
	    freq_min(locs, flocs, nall, data);
	    printf("\n\nHow many simulations? ");
	    scanf("%i", &lkf);
	    snp_sim(locs, flocs, pset, lkmat, lkf, data);
	  }
	}

	free_imatrix(pij,1,data->lseq,1,data->w);
	free_imatrix(seqs,1,data->nseq,1,data->lseq);
	free_imatrix(nall,1,data->lseq,1,5);
	for (i=1;i<sizeofpset;i++) free(pset[i]);
	free(pset);
	free(data);
	free_dvector(locs, 1, data->lseq);
	free_ivector(flocs, 1, data->lseq);

	/* system("PAUSE"); */
}
Esempio n. 15
0
void NodeSig(phylo P, sample S, int outmethod, int abundWeighted) {
  // Currently need to use taxon name of interior as a marker,
  // because Mesquite will not show all labels, but if it will soon,
  // best to use notes, and not muck around with taxon name

  //TODO modifying to use abundances, need longs instead of ints for counters

  int plot, node, taxon, i, ordHI, ordLO, run;
  int tipsReal_n[P.nnodes];
  float test_r[RUNS];
  char mark[2];
  char tmp[15];
  int **tips_rn;
  // was: int tips_rn[RUNS][MaxNode+1];
  phylo Out[S.nsamples];
  int *attach;

  attach = ivector(0, S.ntaxa-1);

  tips_rn = imatrix(0, RUNS-1, 0, P.nnodes-1);

  for (i = 0; i < S.nsamples; i++) {
    Out[i] = P; // all the pointers in Out are the same as those in Intree
                // careful not to change any preexisting arrays in Out, or they will
                // also be changed in Intree!

    if (TreeView == 0)
      Out[i].arenotes = 1;
    if (TreeView == 1)
      Out[i].arenotes = 0;

    // dimension a new array for Out names:
    if (TreeView == 0)
      Out[i].notes = cmatrix(0, P.nnodes-1, 0, MAXNOTELENGTH+10);
    if (TreeView == 1)
      Out[i].taxon = cmatrix(0, P.nnodes-1, 0, MAXTAXONLENGTH+10);

  }

  // for each plot

  if (outmethod == 1)
    printf("plot\tnode\tnode_name           \tntaxa\tmedian\trank\tsig\n");
  for (plot = 0; plot < S.nsamples; plot++) {
    if (S.srec[plot] > 2) {

      for (node = 0; node < P.nnodes; node++) {
        tipsReal_n[node] = 0;
        for (run = 0; run < RUNS; run++)
          tips_rn[run][node] = 0;
      }

      // need to reset it
      AttachSampleToPhylo(S, P, attach);

      // follow up from tips, adding 1 to each node passed through
      for (taxon = 0; taxon < S.srec[plot]; taxon++) {
        i = P.t2n[ attach[ S.id[plot][taxon] ] ];
        while (i != -1) {
          if (abundWeighted)
            tipsReal_n[i] += S.abund[plot][taxon];
          else
            tipsReal_n[i]++;
          i = P.up[i];
        }
      }
      // ooo this is slow, putting rnd inside the plot loop!
      for (run = 0; run < RUNS; run++) {
        // now randomize the plot
        // RandomizeB(plot);
        PhylogenyAttachShuffle(P, S, attach);

        for (taxon = 0; taxon < S.srec[plot]; taxon++) {
          i = P.t2n[ attach[ S.id[plot][taxon] ] ];
          while (i != -1) {
            if (abundWeighted)
              tips_rn[run][i] += S.abund[plot][taxon];
            else
              tips_rn[run][i]++;
            i = P.up[i];
          }
        }
      }

      // now unpeel the nodes
      for (node = 0; node < P.nnodes; node++) {
        if (TreeView == 0)
          strcpy(Out[plot].notes[node], "");
        if (TreeView == 1)
          strcpy(Out[plot].taxon[node], "");

        // interior nodes only
        if (P.noat[node] != 0) {
          ordHI = 0;
          ordLO = 0;
          for (run = 0; run < RUNS; run++) {
            if (tips_rn[run][node] < tipsReal_n[node])
              ordHI++;
            if (tips_rn[run][node] > tipsReal_n[node])
              ordLO++;
          }

          strcpy(mark, " ");

          if (ordLO >= (int) ((float) RUNS * 0.975)) {
            strcpy(mark, "-");
            if (TreeView == 0)
              strcat(Out[plot].notes[node], "SIGLESS");
            if (TreeView == 1) {
              sprintf(tmp, "LESS_%d_", node);
              strcpy(Out[plot].taxon[node], tmp);
            }
            if (outmethod == 1)
              printf("%d\t%d\t%-10s\t%d\t%d\t%d\t%d\t%s\n", plot
                     +1, node, P.taxon[node], tipsReal_n[node],
                     (int) test_r[(int) ((float) RUNS * 0.5)], ordHI,
                     ordLO, mark);
          } else if (ordHI >= (int) ((float) RUNS * 0.975)) {
            strcpy(mark, "+");
            if (TreeView == 0)
              strcat(Out[plot].notes[node], "SIGMORE");

            if (TreeView == 1) {
              sprintf(tmp, "MORE_%d_", node);
              strcpy(Out[plot].taxon[node], tmp);
            }
            if (outmethod == 1)
              printf("%d\t%d\t%-20s\t%d\t%d\t%d\t%d\t%s\n", plot
                     +1, node, P.taxon[node], tipsReal_n[node],
                     (int) test_r[(int) ((float) RUNS * 0.5)], ordHI,
                     ordLO, mark);
          }
          // printf("%s\t%s\n", Out.taxon[node], Intree.taxon[node]);
          else if (outmethod == 1)
            printf("%d\t%d\t%-20s\t%d\t%d\t%d\t%d\t%s\n", plot+1,
                   node, P.taxon[node], tipsReal_n[node],
                   (int) test_r[(int) ((float) RUNS * 0.5)], ordHI,
                   ordLO, mark);
        }
        if ((TreeView == 1) && (strcmp(P.taxon[node], ".") != 0)) {
          strcat(Out[plot].taxon[node], P.taxon[node]);
        }
      }

    }
    // Name tree
    strcpy(Out[plot].phyname, "NodeSig_");
    strcat(Out[plot].phyname, S.pname[plot]);
  }
  if (outmethod ==0)
    WriteNexus(Out, S.nsamples, ReadSample(SampleFile), 1,
               ReadTraits(TraitFile), 1);

}
/* Init function for uniform table.
 phase = phase name to tag this table with
 pf = input parameter file object to be parsed.


The following keys are required to be found in pf:
	int:
	nx, nz
	scalar double:
	dx, dz
	&Tbl:
	uniform_grid_time_slowness_table

The later contains the actual tables.  They are ascii tables make up
of nx*nz lines (x varies most rapidly) of the following format:
	time, slowness, slowness derivative wrt distance, branch

The "branch" variable is a character key defined in location.h

Optional parameters with defaults:
	scalar double:
	x0, y0 coordinates of first point in table  (default = (0,0))
	strings:

Notice that this routine requires mixed units.  dx, dz, x0, and y0
must all be specified in degrees.  Everything else has units derived
from km and s.  That is, time is is in seconds, slowness (p) is
assumed to be in s/km, and dpdx (slowness derivative) is (s/km)/km.
This was done because the input tables are ascii, and these numbers
are scaled to units that make sense to most of us.  This format is
connected to a related program called taup_convert that writes
ttables in this format using the tau-p library.

Returns 0 if no problems are encountered.  REturns 1 if a serious
error occurred that rendered setup impossible for this phase.
In the later case, register_error is always called and should be
handled by calling program.

There are some fatal errors that lead to die being called here from
things like malloc failures.
*/
int uniform_table_interpolate_init(char *phase, Pf *pf)
{
    XZ_table_uniform *ttable, *utable;

    Tbl *t;  /* pfget_tbl return to hold strings of prototables stored
		in the pf structure. */
    int i,j,k;

    GenlocVerbose = verbose_exists() ;

    if(time_tables_uniform==NULL) time_tables_uniform = newarr(0);
    if(slow_tables_uniform==NULL) slow_tables_uniform = newarr(0);

    ttable = (XZ_table_uniform *)malloc(sizeof(XZ_table_uniform));
    utable = (XZ_table_uniform *)malloc(sizeof(XZ_table_uniform));


    if( (ttable == NULL) || (utable == NULL) )
        elog_die(1,"Can't alloc memory in uniform_table_interpolate_init\n");

    /* This version requires t and u tables to be parallel.  This
    restriction would not be necessary, but it simplifies things
    greatly and we only have to store times in the values matrix
    and the slowness values in the slopes matrix. */

    ttable->nx = pfget_int(pf, "nx");
    ttable->nz = pfget_int(pf, "nz");
    utable->nx = ttable->nx;
    utable->nz = ttable->nz;
    ttable->dx = pfget_double(pf, "dx");
    ttable->dz = pfget_double(pf, "dz");
    utable->dx = ttable->dx;
    utable->dz = ttable->dz;
    /* These parameters default to 0 */
    if(pfget_string(pf,"x0")==NULL)
    {
        ttable->x0 = 0.0;
        utable->x0 = 0.0;
    }
    else
    {
        ttable->x0 = pfget_double(pf,"x0");
        utable->x0 = ttable->x0;
    }
    if(pfget_string(pf,"z0")==NULL)
    {
        ttable->z0 = 0.0;
        utable->z0 = 0.0;
    }
    else
    {
        ttable->z0 = pfget_double(pf,"z0");
        utable->z0 = ttable->z0;
    }

    /* IMPORTANT WARNING:  notice I only alloc one space for the
    slowness values array, although it gets placed in two different
    places -> values section of utable and slopes section of ttable
    This leaves a nasty dependency if this space is to be freed, but
    saves a lot of memory.  p.s  I did the same thing with velocity,
    but not with the branch array (see below) */

    ttable->values = dmatrix(0,(ttable->nx)-1,0,(ttable->nz)-1);
    if(ttable->values == NULL)
        elog_die(1,"Cannot alloc memory for travel time table of size %d by %d for phase %s\n",
                 ttable->nx, ttable->nz, phase);
    ttable->slopes = dmatrix(0,(ttable->nx)-1,0,(ttable->nz)-1);
    if(ttable->slopes == NULL)
        elog_die(1,"Cannot alloc memory for slowness table of size %d by %d for phase %s\n",
                 ttable->nx, ttable->nz, phase);
    ttable->branch = cmatrix(0,(ttable->nx)-1,0,(ttable->nz)-1);
    if(ttable->branch == NULL)
        elog_die(1,"Cannot alloc memory for time branch table for phase %s\n",
                 phase);
    utable->branch = cmatrix(0,(utable->nx)-1,0,(utable->nz)-1);
    if(utable->branch == NULL)
        elog_die(1,"Cannot alloc memory for slowness branch table for phase %s\n",
                 phase);

    ttable->velocity = (double *) calloc(ttable->nz,sizeof(double));
    if(ttable->velocity == NULL)
        elog_die(1,"Cannot alloc memory for velocity model for phase %s\n",
                 phase);

    utable->slopes = dmatrix(0,(utable->nx)-1,0,(utable->nz)-1);
    if(utable->slopes == NULL)
        elog_die(1,"Cannot alloc memory for dudr table of size %d by %d for phase %s\n",
                 utable->nx, utable->nz, phase);

    /* here is where we set the redundant pointers */
    utable->values = ttable->slopes;
    utable->velocity = ttable->velocity;


    /* Now it is time to actually parse the tables.  We assume the
    table is entered as a pf &Tbl, and table is scanned with x
    varying most rapidly.  (i.e. you get the tables for x=x0 first,
    then x=x0+dx, etc. Note we read three entries for each grid
    point:  time, slowness, branch_code */
    t = pfget_tbl(pf,"uniform_grid_time_slowness_table");
    if(t == NULL)
    {
        elog_log(1,"Can't find travel time-slowness table for phase %s\n",
                 phase);
        free_uniform_table(ttable, utable);
        return(1);
    }

    if( maxtbl(t) != ( (ttable->nx)*(ttable->nz) ) )
    {
        elog_log(1,"Table size mismatch for phase %s\nTable should have %d rows\nFound %ld\n",
                 phase, (ttable->nx)*(ttable->nz), maxtbl(t));
        free_uniform_table(ttable, utable);
        return(1);
    }

    for(j=0,k=0; j<ttable->nz; ++j)
    {
        for(i=0; i<ttable->nx; ++i)
        {
            char *s;
            int nitems;
            double tt,u,dudx;
            char b;
            s = gettbl(t,k);
            nitems = sscanf(s,"%lf%lf%lf%1s",
                            &tt, &u, &dudx,&b);
            if(nitems !=4)
            {
                elog_log(1,"Syntax error reading table for phase %s, Problem read value for i=%d, j=%d\n",
                         phase,i,j);
                free_uniform_table(ttable, utable);
                return(1);
            }
            ttable->values[i][j] = tt;
            ttable->slopes[i][j] = u;
            utable->slopes[i][j] = dudx;
            ttable->branch[i][j] = b;
            ++k;
        }
    }

    /* In order to utilize a common set of interpolation routines,
    scan the time->branch matrix.  Mark the crossover points for
    time as jump discontinuities for slowness (which they are) */
    for(j=0; j<ttable->nz; ++j)
        for(i=0; i<ttable->nx; ++i)
            if(ttable->branch[i][j] == CROSSOVER)
                utable->branch[i][j] = JUMP;
            else
                utable->branch[i][j] = ttable->branch[i][j];
    /* An error check is needed here so we don't have to worry about it
    later.  Other than a blunder, this can happen if x0 is anything
    other than 0, so we need to watch for this.  We could try to
    repair this automatically, but because it mostly likely indicates
    a serious blunder we abort */

    for(j=0; j<ttable->nz; ++j)
        if( (utable->branch[0][j] == CROSSOVER)
                || (ttable->branch[0][j] == CROSSOVER)
                || (utable->branch[0][j] == JUMP)
                || (ttable->branch[0][j] == JUMP) )
        {

            elog_log(1,
                     "Error in travel time table for phase %s\nFirst point cannot be marked as a crossover or jump discontinuity\n",phase);
            free_uniform_table(ttable, utable);
            return(1);
        }
    /* Now we read the velocity model parameters */
    t = pfget_tbl(pf,"velocities");
    if((ttable->nz) != maxtbl(t))
    {
        elog_log(1,"Error in phase parameter file.  \
Mismatch between velocity entries and table entries\n\
Tables have %d depth entries, but velocity vector is of length %ld\n",
                 ttable->nz, maxtbl(t));
        free_uniform_table(ttable,utable);
        return(1);
    }
    for(i=0; i<maxtbl(t); ++i)
    {
        char *s;
        s = gettbl(t,i);
        sscanf(s,"%lf", &(ttable->velocity[i]));
    }
    setarr(time_tables_uniform,phase,ttable);
    setarr(slow_tables_uniform,phase,utable);
    return(0);
}
Esempio n. 17
0
/* ====================================================  */
void Align_Process(char **argv,int args,int nRead)
/* ====================================================  */
{
     int i,j,k,n_reads = nRead;
     void ArraySort_Mix(int n,B64_long *arr,int *brr);
     int  **imatrix(B64_long nrl,B64_long nrh,B64_long ncl,B64_long nch);
     char **cmatrix(B64_long nrl,B64_long nrh,B64_long ncl,B64_long nch);
     void ArraySort_String(int n,char **Pair_Name,int *brr);
//     char **rdname;
     char **DBname,**ctgname,*ptr,*st,*ed,line[2000],RC;
     FILE *namef,*namef2;
     int read_index[2000];
     B64_long read_offsets[2000];
     int n_find,idd,stopflag,num_align,refhit1,refhit2;
     int readhit1 = 0,readhit2 = 0;
     int rd_forward[2000],rd_reverse[2000]; 
     int insertSize1 = insert_size*(1.0+insert_std);
     int insertSize2 = insert_size*(1.0-insert_std);

     n_reads = nRead;
     RC = '+';
     cell_name = cmatrix(0,2,0,Max_N_NameBase);
     rdname=cmatrix(0,nRead,0,Max_N_NameBase);
     ctgname=cmatrix(0,nRead,0,Max_N_NameBase);
     DBname=cmatrix(0,nRead,0,Max_N_NameBase);

     if((readIndex= (int *)calloc(n_reads,sizeof(int))) == NULL)
     {
       printf("ERROR Memory_Allocate: Align_Process - readIndex\n");
       exit(1);
     }
     if((read2contig= (int *)calloc(n_reads,sizeof(int))) == NULL)
     {
       printf("ERROR Memory_Allocate: Align_Process - read2contig\n");
       exit(1);
     }
     if((map_score= (int *)calloc(nRead,sizeof(int))) == NULL)
     {
       printf("ERROR Memory_Allocate: Align_Process - map_score\n");
       exit(1);
     }
     if((hit_frdex= (int *)calloc(nRead,sizeof(int))) == NULL)
     {
       printf("ERROR Memory_Allocate: Align_Process - hit_frdex\n");
       exit(1);
     }
     if((hit_rcdex= (int *)calloc(nRead,sizeof(int))) == NULL)
     {
       printf("ERROR Memory_Allocate: Align_Process - hit_rcdex\n");
       exit(1);
     }
     if((hit_quest= (int *)calloc(nRead,sizeof(int))) == NULL)
     {
       printf("ERROR Memory_Allocate: Align_Process - hit_quest\n");
       exit(1);
     }
     if((hit_queed= (int *)calloc(nRead,sizeof(int))) == NULL)
     {
       printf("ERROR Memory_Allocate: Align_Process - hit_queed\n");
       exit(1);
     }
     if((hit_refst= (int *)calloc(nRead,sizeof(int))) == NULL)
     {
       printf("ERROR Memory_Allocate: Align_Process - hit_refst\n");
       exit(1);
     }
     if((hit_refed= (int *)calloc(nRead,sizeof(int))) == NULL)
     {
       printf("ERROR Memory_Allocate: Align_Process - hit_refed\n");
       exit(1);
     }
     if((hit_score= (int *)calloc(nRead,sizeof(int))) == NULL)
     {
       printf("ERROR Memory_Allocate: Align_Process - hit_score\n");
       exit(1);
     }
     if((map_unique= (int *)calloc(nRead,sizeof(int))) == NULL)
     {
       printf("ERROR Memory_Allocate: Align_Process - map_unique\n");
       exit(1);
     }
     if((map_rdpair= (int *)calloc(nRead,sizeof(int))) == NULL)
     {
       printf("ERROR Memory_Allocate: Align_Process - map_rdpair\n");
       exit(1);
     }
     if((ctg_index= (int *)calloc(nRead,sizeof(int))) == NULL)
     {
       printf("ERROR Memory_Allocate: calloc - ctg_index\n");
       exit(1);
     }

     if((namef = fopen(argv[args],"r")) == NULL)
     {
       printf("ERROR main:: reads group file \n");
       exit(1);
     }

/*   read the SNP output file         */
     num_align=0;
     while(!feof(namef))
     {
       int nPair=0,len;
       char line2[2000],line3[2000],base[500],score[3];
      
       fgets(line,2000,namef);
       if(feof(namef)) break;
       strcpy(line2,line);
       strcpy(line3,line);
       if((strncmp(line,"cigar",5))==0)
       { 
         refhit1 = 0;
         refhit2 = 0;     
         readhit1 = 0;
         readhit2 = 0;     
         for(ptr=strtok(line," ");ptr!=NULL;ptr=strtok((char *)NULL," "),nPair++)
         {
         }
         i=0;
         for(ptr=strtok(line2," ");ptr!=NULL;ptr=strtok((char *)NULL," "),i++)
         {
            if(i==0)
            {
              memset(score,'\0',3);
              memset(base,'\0',500);
              strcat(base,ptr);
	      len = strlen(base);
	      if(len < 8)
	      {
	        map_score[num_align] = 50;
//	        printf("score: %d %s",len,line3);
	      }
	      else
	      {
	        score[0] = line3[7];
	        score[1] = line3[8];
	        map_score[num_align] = atoi(score);
//		if(map_score[num_align]>=30)
//	          map_unique[num_align] = 1;
//	        printf("score: %d %s",len,line3);
	      }
            }
            else if(i==1)
            {
              memset(base,'\0',500);
              strcat(base,ptr);
              strcpy(rdname[num_align],ptr);
            }
            else if(i==2)
            {
              memset(base,'\0',500);
              strcat(base,ptr);
              readhit1 = atoi(ptr);
            }
            else if(i==3)
            {
              memset(base,'\0',500);
              strcat(base,ptr);
              readhit2 = atoi(ptr);
            }
            else if(i==4)
            {
              memset(base,'\0',500);
              strcat(base,ptr);
	      RC = *ptr;
	    }
            else if(i==5)
            {
              memset(base,'\0',500);
              strcpy(ctgname[num_align],ptr);
	      st = rdname[num_align];
	      ed = strrchr(rdname[num_align],'.');
	      if(ed==NULL)
	        strcpy(DBname[num_align],rdname[num_align]);
              else
	      {
	        strncpy(DBname[num_align],rdname[num_align],ed-st);
	        if(*(ed+1) == 'p')
		  hit_frdex[num_align] = 1;
	        if(*(ed+1) == 'b')
		  hit_frdex[num_align] = 1;
	        if(*(ed+1) == 'x')
		  hit_frdex[num_align] = 1;
	        if(*(ed+1) == 'F')
		  hit_frdex[num_align] = 1;
	        if(*(ed+1) == 'q')
		  hit_frdex[num_align] = 2;
	        if(*(ed+1) == 'g')
		  hit_frdex[num_align] = 2;
	        if(*(ed+1) == 'y')
		  hit_frdex[num_align] = 2;
	        if(*(ed+1) == 'R')
		  hit_frdex[num_align] = 2;
	      }
            }
            else if(i==6)
            {
              memset(base,'\0',500);
              strcat(base,ptr);
              refhit1 = atoi(ptr);
            }
            else if(i==7)
            {
              memset(base,'\0',500);
              strcat(base,ptr);
              refhit2 = atoi(ptr);
            }
            else if(i==8)
            {
              if(RC=='+')
              {
	        hit_rcdex[num_align]=0;
                hit_refst[num_align]=refhit1;
                hit_refed[num_align]=refhit2;
                hit_quest[num_align]=readhit1;
                hit_queed[num_align]=readhit2;
              } 
              else
              {
                hit_refst[num_align]=refhit2;
                hit_refed[num_align]=refhit1;
                hit_quest[num_align]=readhit2;
                hit_queed[num_align]=readhit1;
	        hit_rcdex[num_align]=1;
              }
            }
	    else if(i==9)
	    {
              memset(base,'\0',500);
              strcat(base,ptr);
              hit_score[num_align] = atoi(ptr);
              readIndex[num_align] = num_align;
              num_align++;
	    }
         }
       }
     }
     fclose(namef);

/*   sort out contig/chromosome idnex */
     n_reads = nRead; 
     ArraySort_String(n_reads,ctgname,readIndex);

     n_find = 0;
     idd = 0;
     for(i=0;i<n_reads;i++)
     {
/*      search reads with an index < i     */
/*      search reads with an index > i     */
        stopflag=0;
        j=i+1;
        while((j<n_reads)&&(stopflag==0))
        {
          if(strcmp(ctgname[j],ctgname[i])==0)
          {
            j++;
          }
          else
            stopflag=1;
        }
        if((j-i)>=1)
        {
          for(k=i;k<j;k++)
	  {
             ctg_index[readIndex[k]] = idd;
	     read2contig[readIndex[k]] = k;
	  }
        }
	idd++;
        i=j-1;
     }

/*   sort out read pairs */
     n_reads = nRead;
     for(i=0;i<n_reads;i++)
        readIndex[i] = i;

    printf("started the process: \n");
     ArraySort_String(n_reads,DBname,readIndex);

     n_find = 0;
     idd = -1;
     for(i=0;i<n_reads;i++)
     {
/*      search reads with an index < i     */
/*      search reads with an index > i     */
        stopflag=0;
        j=i+1;
        while((j<n_reads)&&(stopflag==0))
        {
          if(strcmp(DBname[j],DBname[i])==0)
          {
            j++;
          }
          else
            stopflag=1;
        }
        idd = -1;
        if((j-i)==2)
	{
	  int num_hits = j-i;
	  int stopflag2,m,n_pair;
	  int ctg1,ctg2,idt,idd;

	  idt = readIndex[i];
	  idd = readIndex[i+1];
	  ctg1 = ctg_index[idt];
	  ctg2 = ctg_index[idd];
          if((ctg1!=ctg2)||(abs(hit_refst[idt]-hit_refst[idd])>max_space))
	  {
	    if(map_score[idt]==map_score[idd])
	    {
	      if(hit_score[idt]>hit_score[idd])
	      {
	        map_unique[idt] = 1;
	        map_unique[idd] = 0;
	      }
	      else
	      {
	        map_unique[idt] = 0;
	        map_unique[idd] = 1;
	      }
	    }
	    else
	    {
	      if(map_score[idt]>map_score[idd])
	      {
	        map_unique[idt] = 1;
	        map_unique[idd] = 0;
	      }
	      else
	      {
	        map_unique[idt] = 0;
	        map_unique[idd] = 1;
	      }
	    }

	  }
	  else
	  {
	    map_unique[idt] = 1;
	    map_unique[idd] = 1;
	  }
	}
	else if((j-i)==1)
	  map_unique[readIndex[i]] = 1;
        i=j-1;
     }
/*   read the cigar line file   */
     if((namef = fopen(argv[args],"r")) == NULL)
     {
       printf("ERROR main:: alignment file 2 \n");
       exit(1);
     }
/*   read the cigar line file   */
     if((namef2 = fopen(argv[args+1],"w")) == NULL)
     {
       printf("ERROR main:: alignment file 2 \n");
       exit(1);
     }
/*   read the SNP output file         */
     i=0;
     n_find = 0;
     while(!feof(namef))
     {
       fgets(line,2000,namef);
//         printf("%s",line);
       if(feof(namef)) break;
       if(map_unique[i]==1)
       {
         char score[3] = {0};
	 score[0] = map_score[i]/10 + '0';
	 score[1] = map_score[i]%10 + '0';
	 line[7] = score[0];
	 line[8] = score[1];
         fprintf(namef2,"%s",line);
	 n_find++;
       }
       i++; 
     }
     printf("number of reads uniquely placed on to genome: %d\n",n_find);
     fclose(namef);
     fclose(namef2);
}
Esempio n. 18
0
/* "read_springfile_sysenv" READS THE SPRINGFILE FOR 
   BOTH THE 'SYSTEM' AND THE 'ENVIRONMENT' */
Sprngmtx *read_springfile_sysenv(char *file,Centroid *SYS,Centroid *ENV,
			   int nss,int nen,int *ntp)
{
  FILE *data;
  Sprngmtx *foo;
  char **LIST,nup1[NAME_LNG],nup2[NAME_LNG];
  double x;
  int nn=nss+nen,ok,i,j;

  if((data=fopen(file,"r"))==NULL){
    fprintf(stderr,"\nread_springfile_sysenv: unable to open %s\n\n",file);
    exit(1);}

  /* GET THE LIST OF UNIQUE CENTROID NAMES */
  LIST=cmatrix(1,nn,0,NAME_LNG);
  (*ntp)=0;
  for(i=1;i<=nss;i++){
    ok=1;
    for(j=1;j<=(*ntp);j++){
      if(!strcmp(SYS[i].name,LIST[j])){
	ok=0;
	break;
      }
    }
    if(ok==1)
      strcpy(LIST[++(*ntp)],SYS[i].name);
  }
  for(i=1;i<=nen;i++){
    ok=1;
    for(j=1;j<=(*ntp);j++){
      if(!strcmp(ENV[i].name,LIST[j])){
	ok=0;
	break;
      }
    }
    if(ok==1)
      strcpy(LIST[++(*ntp)],ENV[i].name);
  }

  foo=(Sprngmtx *)malloc((size_t) sizeof(Sprngmtx *));
  foo->name=cmatrix(1,(*ntp),0,NAME_LNG);
  foo->M=dmatrix(1,(*ntp),1,(*ntp));

  for(i=1;i<=(*ntp);i++){
    strcpy(foo->name[i],LIST[i]);
    for(j=i;j<=(*ntp);j++)
      foo->M[i][j]=foo->M[j][i]=DEFGAM;
  }
  free_cmatrix(LIST,1,nn,0,NAME_LNG);

  /* READ THE FILE */
  while(!feof(data)){
    fscanf(data,"%s%s%lf",nup1,nup2,&x);
    for(i=1;i<=(*ntp);i++)
      if(!strcmp(nup1,foo->name[i])){
	for(j=1;j<=(*ntp);j++)
	  if(!strcmp(nup2,foo->name[j])){
	    foo->M[i][j]=foo->M[j][i]=x;
	    break;
	  }
	break;
      }
  }
  fclose(data);
  return foo;
}
Esempio n. 19
0
SEXP rfsrcReadMatrix(SEXP traceFlag,
                     SEXP fName,
                     SEXP rowType,
                     SEXP rowCnt,
                     SEXP tokenDelim,
                     SEXP colHeader,
                     SEXP rowHeader) {
  FILE  *fopen();
  FILE  *fPtr;
  char  *_fName;
  SEXP   _sexp_rowType ;
  char **_rowType;
  uint   _rowCnt;
  char  *_tokenDelim;
  char   _colHeadF;
  char   _rowHeadF;
  SmartBuffer *sb;
  uint p, i;
  uint rowCntActual;
  uint colCntActual;
  uint sbSizeActual;
  char flag;
  setTraceFlag(INTEGER(traceFlag)[0], 0);
  _fName = (char*) CHAR(STRING_ELT(AS_CHARACTER(fName), 0));
  _sexp_rowType = rowType;
  _rowCnt = INTEGER(rowCnt)[0];
  _tokenDelim = (char*) CHAR(STRING_ELT(AS_CHARACTER(tokenDelim), 0));
  _colHeadF = (INTEGER(colHeader)[0] != 0) ? TRUE : FALSE;
  _rowHeadF = (INTEGER(rowHeader)[0] != 0) ? TRUE : FALSE;
  _rowType = (char**) new_vvector(1, _rowCnt, NRUTIL_CPTR);
  for (p = 1; p <= _rowCnt; p++) {
    _rowType[p] = (char*) CHAR(STRING_ELT(AS_CHARACTER(_sexp_rowType), p-1));
    if ((strcmp(_rowType[p], "X") != 0) &&
        (strcmp(_rowType[p], "C") != 0) &&
        (strcmp(_rowType[p], "c") != 0) &&
        (strcmp(_rowType[p], "I") != 0) &&
        (strcmp(_rowType[p], "R") != 0)) {
      Rprintf("\nRF-SRC:  *** ERROR *** ");
      Rprintf("\nRF-SRC:  Invalid predictor type:  [%10d] = %2s", p, _rowType[p]);
      Rprintf("\nRF-SRC:  Type must be 'C', 'c', 'I', or 'R'.");
      Rprintf("\nRF-SRC:  Please Contact Technical Support.");
      error("\nRF-SRC:  The application will now exit.\n");
    }
  }
  fPtr = fopen(_fName, "r");
  sb = parseLineSB(fPtr, *_tokenDelim, 0);
  colCntActual = _rowHeadF ? (sb -> tokenCnt - 1) : (sb -> tokenCnt);
  rowCntActual = 0;
  if (_colHeadF) {
    freeSB(sb);
    sb = parseLineSB(fPtr, *_tokenDelim, 0);
    sbSizeActual = sb -> size;
    freeSB(sb);
  }
  else {
    sbSizeActual = sb -> size;
    freeSB(sb);
  }
  ++rowCntActual;
  flag = TRUE;
  while (flag) {
    sb = parseLineSB(fPtr, *_tokenDelim, sbSizeActual);
    if (sb -> tokenCnt == 0) {
      flag = FALSE;
      freeSB(sb);
    }
    else {
      ++rowCntActual;
      freeSB(sb);
    }
  }
  if (rowCntActual != _rowCnt) {
    Rprintf("\nRF-SRC:  *** ERROR *** ");
    Rprintf("\nRF-SRC:  Inconsistent Predictor Count.");
    Rprintf("\nRF-SRC:  (encountered, expected) =  (%10d, %10d)", rowCntActual, _rowCnt);
    Rprintf("\nRF-SRC:  Please Contact Technical Support.");
    error("\nRF-SRC:  The application will now exit.\n");
  }
  fclose(fPtr);
  if (_colHeadF) {
    fPtr = fopen(_fName, "r");
    sb = parseLineSB(fPtr, *_tokenDelim, sbSizeActual);
    freeSB(sb);
  }
  char **dataMatrix = cmatrix(1, colCntActual, 1, rowCntActual);
  for (p=1; p <= rowCntActual; p++) {
    sb = parseLineSB(fPtr, *_tokenDelim, sbSizeActual);
    if (_rowHeadF) {
    }
    for (i = 1; i <= colCntActual; i++) {
      dataMatrix[i][p] = (char) strtol(sb -> token, NULL, 10);
    }
    freeSB(sb);
  }
  free_cmatrix(dataMatrix, 1, colCntActual, 1, rowCntActual);
  return R_NilValue;
}
Esempio n. 20
0
File: ms.c Progetto: EdRice4/P2C2M
int
main(int argc, char *argv[])
{
    int i, k, howmany, segsites ;
    char **list, **cmatrix(), **tbsparamstrs ;
    FILE *pf, *fopen() ;
    double probss, tmrca, ttot ;
    void seedit( const char * ) ;
    void getpars( int argc, char *argv[], int *howmany )  ;
    int gensam( char **list, double *probss, double *ptmrca, double *pttot ) ;


    ntbs = 0 ;   /* these next few lines are for reading in parameters from a file (for each sample) */
    tbsparamstrs = (char **)malloc( argc*sizeof(char *) ) ;

    for( i=0; i<argc; i++) printf("%s ",argv[i]);
    for( i =0; i<argc; i++) tbsparamstrs[i] = (char *)malloc(30*sizeof(char) ) ;
    for( i = 1; i<argc ; i++)
        if( strcmp( argv[i],"tbs") == 0 )  argv[i] = tbsparamstrs[ ntbs++] ;

    count=0;

    if( ntbs > 0 )  for( k=0; k<ntbs; k++)  scanf(" %s", tbsparamstrs[k] );
    getpars( argc, argv, &howmany) ;   /* results are stored in global variable, pars */

    if( !pars.commandlineseedflag ) seedit( "s");
    pf = stdout ;

    if( pars.mp.segsitesin ==  0 ) {
        list = cmatrix(pars.cp.nsam,maxsites+1);
        posit = (double *)malloc( (unsigned)( maxsites*sizeof( double)) ) ;
    }
    else {
        list = cmatrix(pars.cp.nsam, pars.mp.segsitesin+1 ) ;
        posit = (double *)malloc( (unsigned)( pars.mp.segsitesin*sizeof( double)) ) ;
        if( pars.mp.theta > 0.0 ) {
            segfac = 1.0 ;
            for(  i= pars.mp.segsitesin; i > 1; i--) segfac *= i ;
        }
    }

    while( howmany-count++ ) {
        if( (ntbs > 0) && (count >1 ) ) {
            for( k=0; k<ntbs; k++) {
                if( scanf(" %s", tbsparamstrs[k]) == EOF ) {
                    if( !pars.commandlineseedflag ) seedit( "end" );
                    exit(0);
                }
            }
            getpars( argc, argv, &howmany) ;
        }

        fprintf(pf,"\n//");
        if( ntbs >0 ) {
            for(k=0; k< ntbs; k++) printf("\t%s", tbsparamstrs[k] ) ;
        }
        printf("\n");
        segsites = gensam( list, &probss, &tmrca, &ttot ) ;
        if( pars.mp.timeflag ) fprintf(pf,"time:\t%lf\t%lf\n",tmrca, ttot ) ;
        if( (segsites > 0 ) || ( pars.mp.theta > 0.0 ) ) {
            if( (pars.mp.segsitesin > 0 ) && ( pars.mp.theta > 0.0 ))
                fprintf(pf,"prob: %g\n", probss ) ;
            fprintf(pf,"segsites: %d\n",segsites);
            if( segsites > 0 )	fprintf(pf,"positions: ");
            for( i=0; i<segsites; i++)
                fprintf(pf,"%6.*lf ", pars.output_precision,posit[i] );
            fprintf(pf,"\n");
            if( segsites > 0 )
                for(i=0; i<pars.cp.nsam; i++) {
                    fprintf(pf,"%s\n", list[i] );
                }
        }
    }
    if( !pars.commandlineseedflag ) seedit( "end" );

}
Esempio n. 21
0
int main(int argc, char **argv)
{
    FILE *namef;
    int i;
    int n_reads, nseq = 0;
    float identy;
    char line[2000] = {0}, tempc1[60], RC[1];
    char *filename;
    
    filename = argv[1];

    fflush(stdout);

    if (argc < 2) {
      	printf("Usage: %s <cross_genome_ouput file>\n", argv[0]);
    	exit(1);
    }

    if ((namef = fopen(filename, "r")) == NULL) {
      	fprintf(stderr, "ERROR: unable to open file %s\n", filename);
      	exit(1);
    }

    while (!feof(namef)) {
    	fgets(line, 2000, namef);
	
      	if (feof(namef)) break;
	
      	nseq++;
    }
    
    fclose(namef); 

    if((hit_sindex = (int *)calloc(nseq,sizeof(int))) == NULL)
    {
      fprintf(stderr, "Error: out of memory: calloc - hit_sindex\n");
      exit(1);
    }
    
    if ((hit_rcdex = (int *)calloc(nseq, sizeof(int))) == NULL) {
      	fprintf(stderr, "Error: out of memory: calloc - hit_rcdex\n");
      	exit(1);
    }
    
    if ((hit_read1 = (int *)calloc(nseq, sizeof(int))) == NULL) {
      	fprintf(stderr, "Error: out of memory: calloc - hit_read1\n");
      	exit(1);
    }
    
    if ((hit_read2 = (int *)calloc(nseq, sizeof(int))) == NULL) {
      	fprintf(stderr, "Error: out of memory: calloc - hit_read2\n");
      	exit(1);
    }
    
    if ((hit_locus1 = (int *)calloc(nseq, sizeof(int))) == NULL) {
      	fprintf(stderr, "Error: out of memoryfmate: calloc - hit_locus1\n");
      	exit(1);
    }
    
    if ((hit_locus2 = (int *)calloc(nseq, sizeof(int))) == NULL) {
      	fprintf(stderr, "Error: out of memory: calloc - hit_locus2\n");
      	exit(1);
    }
    
    if ((hit_length = (int *)calloc(nseq, sizeof(int))) == NULL) {
      	fprintf(stderr, "Error: calloc - hit_length\n");
      	exit(1);
    }
    
    if ((readlength = (int *)calloc(nseq, sizeof(int))) == NULL) {
      	fprintf(stderr, "Error: calloc - readlength\n");
      	exit(1);
    }
    
    if ((superlength = (int *)calloc(nseq, sizeof(int))) == NULL) {
      	fprintf(stderr, "Error: calloc - superlength\n");
      	exit(1);
    }
    
    if ((hit_qindex = (int *)calloc(nseq, sizeof(int))) == NULL) {
      	fprintf(stderr, "Error: calloc - hit_qindex\n");
      	exit(1);
    }

    R_Name = cmatrix(0, nseq + 1, 0, Max_N_NameBase);
    S_Name = cmatrix(0, nseq + 1, 0, Max_N_NameBase);
    T_Name = cmatrix(0, nseq + 1, 0, 6);

    if ((namef = fopen(filename, "r")) == NULL) {
    	fprintf(stderr, "ERROR: unable to open file %s\n", filename);
      	exit(1);
    }

    /*  read the alignment files         */
    i = 0;
    
    while (fscanf(namef, "%s %d %s %s %d %d %d %d %s %d %f %d %d", tempc1, &hit_sindex[i],
    	    R_Name[i], S_Name[i], &hit_read1[i], &hit_read2[i], &hit_locus1[i],
	    &hit_locus2[i], RC, &hit_qindex[i], &identy, &readlength[i],
	    &superlength[i]) != EOF) {
	    
        if (RC[0] == 'F')
            hit_rcdex[i] = 0;
        else
            hit_rcdex[i] = 1;
	
	strncpy(T_Name[i], S_Name[i], 4);
        i++;
    }
    
    fclose(namef);

    n_reads = i;
    printf("reads: %d %s\n", n_reads, filename);
    Indel_Process(n_reads);

    printf("Job finished for %d reads!\n", nseq);
    return EXIT_SUCCESS;

}
Esempio n. 22
0
/*
 * Function to be called from Python
 */
static PyObject* py_smith_waterman_context(PyObject* self, PyObject* args)
{
    char *seq1 = NULL;
    char *seq2 = NULL;
    char retstr[100]   = {'\0'};
    int  len1, len2;
    int i, j;
    int gap_opening, gap_extension;
    static int ** similarity = NULL;
      
    
    PyArg_ParseTuple(args, "s#s#ii", &seq1, &len1, &seq2, &len2, &gap_opening, &gap_extension);


    if (!seq1 || !seq2) {
	sprintf (retstr, "no seq in py_smith_waterman_context");
	return Py_BuildValue("s", retstr);
    }

    /* passing a matrix this way is all to painful, so we'll elegantyly hardcode it: */
    if ( !similarity) {
	similarity = imatrix(ASCII_SIZE, ASCII_SIZE);
	if (!similarity) {
	    sprintf (retstr, "error alloc matrix space");
	    return Py_BuildValue("s", retstr);
	}
	load_sim_matrix (similarity);
	
    } 

    
    /**********************************************************************************/
    //int gap_opening   =  -5; // used in 15_make_maps
    //int gap_extension =  -3;
    //char gap_character = '-'
    //int gap_opening    =  -3;  // used in 25_db_migration/06_make_alignments
    //int gap_extension  =   0;
    char gap_character = '#';
    int endgap         =   0;
    int use_endgap     =   0;

    int far_away = -1;

    int max_i    = len1;
    int max_j    = len2;

    // allocation, initialization
    int  **F         = NULL;
    char **direction = NULL;
    int *map_i2j     = NULL;
    int *map_j2i     = NULL;

    if ( ! (F= imatrix (max_i+1, max_j+1)) ) {
	sprintf (retstr, "error alloc matrix space");
	return Py_BuildValue("s", retstr);
    }
    if ( ! (direction = cmatrix (max_i+1, max_j+1)) ) {
	sprintf (retstr, "error alloc matrix space");
	return Py_BuildValue("s", retstr);
    }
    if (! (map_i2j = emalloc( (max_i+1)*sizeof(int))) ) {
	sprintf (retstr, "error alloc matrix space");
	return Py_BuildValue("s", retstr);
    }
    if (! (map_j2i = emalloc( (max_j+1)*sizeof(int))) ) {
	sprintf (retstr, "error alloc matrix space");
	return Py_BuildValue("s", retstr);
    }
    for (i=0; i<=max_i; i++) map_i2j[i]=far_away;
    for (j=0; j<=max_j; j++) map_j2i[j]=far_away;

    
    int F_max   = far_away;
    int F_max_i = 0;
    int F_max_j = 0;
    int penalty = 0;
    int i_sim, j_sim, diag_sim, max_sim;
    
    int i_between_exons = 1;
    int j_between_exons = 1;
    //
    for (i=0; i<=max_i; i++) {

        if (i > 0) {
            if (seq1[i-1] == 'B') {
		i_between_exons = 0;
            } else if ( seq1[i-1] == 'Z'){
		i_between_exons = 1;
	    }
	}
	for (j=0; j<=max_j; j++) {

            if (j > 0) {
                if (seq2[j-1] == 'B') {
                    j_between_exons = 0;
		} else if (seq2[j-1] == 'Z') {
                    j_between_exons = 1;
		}
	    }
               
	    if ( !i && !j ){
		F[0][0] = 0;
		direction[i][j] = 'd';
		continue;
	    }
	    
	    if ( i && j ){

		/**********************************/
		penalty =  0;
		if ( direction[i-1][j] == 'i' ) {
		    //  gap extension
		    if  (j_between_exons) {
			penalty =  0;
                    } else {
			if (use_endgap && j==max_j){
                            penalty = endgap;
			} else {
                            penalty = gap_extension;
			}
		    }
                } else {
		    //  gap opening  */
		    if  (j_between_exons) {
			penalty =  0;
		    } else {
			if (use_endgap && j==max_j){
			    penalty = endgap;
			} else{
			    penalty = gap_opening;
			}
		    }
		}
                i_sim =  F[i-1][j] + penalty;
		
		/**********************************/
		penalty =  0;
		if ( direction[i][j-1] == 'j' ) {
		    //  gap extension
		    if (i_between_exons) {
			    penalty = 0;
		    } else {
                        if (use_endgap && i==max_i){
                            penalty = endgap;
                        } else{
                            penalty = gap_extension;
			}
		    }
		} else {
		    //  gap opening  */
		    if  (i_between_exons) {
			penalty =  0;
		    } else {
			if (use_endgap && i==max_i){
			    penalty = endgap;
			} else {
			    penalty = gap_opening;
			}
		    }

		}
		j_sim = F[i][j-1] + penalty;

		/**********************************/
		diag_sim =  F[i-1][j-1] + similarity [seq1[i-1]][seq2[j-1]];
		
		/**********************************/
		max_sim         = diag_sim;
		direction[i][j] = 'd';
		if ( i_sim > max_sim ){
		    max_sim = i_sim;
		    direction[i][j] = 'i';
		}
		if ( j_sim > max_sim ) {
		    max_sim = j_sim;
		    direction[i][j] = 'j';
		}
		

		
		
            } else if (j) {
		
		penalty =  0;
		if (j_between_exons) {
		    penalty = 0;
                } else {
		    if (use_endgap) {
			penalty = endgap;
                    } else {
			if ( direction[i][j-1] =='j' ) {
			    penalty = gap_extension;
                        } else {
			    penalty = gap_opening;
			}
		    }
		}
		j_sim   = F[i][j-1] + penalty;
		max_sim = j_sim;
		direction[i][j] = 'j';


            } else if (i) {
		
		penalty =  0;
		if (i_between_exons) {
		    penalty = 0;
                } else {
		    if ( use_endgap) {
			penalty = endgap;
                    } else {
			if ( direction[i-1][j] == 'i' ) {
			    penalty =  gap_extension;
                        } else {
			    penalty =  gap_opening;
			
			}
		    }
		}
		i_sim   = F[i-1][j] + penalty;
		max_sim = i_sim;
		direction[i][j] = 'i';
	    }
	    
	    if (max_sim < 0.0 ) max_sim = 0.0;
	    
	    F[i][j] = max_sim;
	    if ( F_max < max_sim ) {
		// TODO{ tie break here */
		F_max = max_sim;
		F_max_i = i;
		F_max_j = j;
	    }


	}
   }
		
		
	 
    i = F_max_i;
    j = F_max_j;
    // aln_score = F[i][j] ;


    while ( i>0 || j >0 ){

	if ( i<0 || j<0 ){
	    sprintf (retstr, "Retracing error");
	    return Py_BuildValue("s", retstr);
	}
	
        if (direction[i][j] == 'd'){
	    map_i2j [i-1] = j-1;
	    map_j2i [j-1] = i-1;
	    i-= 1;
	    j-= 1;
	} else if (direction[i][j] == 'i') {
	    map_i2j [i-1] = far_away;
	    i-= 1 ;
	   
	} else if (direction[i][j] == 'j') {
	    map_j2i [j-1] = far_away;
	    j-= 1 ;
	   
	} else{ 
  	    sprintf (retstr, "Retracing error");
	    return Py_BuildValue("s", retstr);
	}
    }
	
    char * aligned_seq_1 = NULL;
    char * aligned_seq_2 = NULL;

    /* (lets hope it gets properly freed in the main program */
    if (! (aligned_seq_1 = emalloc( (len1+len2)*sizeof(char))) ) {
	sprintf (retstr, "error alloc array space");
	return Py_BuildValue("s", retstr);
    }
    if (! (aligned_seq_2 = emalloc( (len1+len2)*sizeof(char))) ) {
	sprintf (retstr, "error alloc array space");
	return Py_BuildValue("s", retstr);
    }
    
    i = 0;
    j = 0;
    int done = 0;
    int pos  = 0;
    while (!done) {

        if (j>=max_j && i>=max_i){
            done = 1;
	} else if (j<max_j && i<max_i){

            if (map_i2j[i] == j){
                aligned_seq_1[pos] = seq1[i];
                aligned_seq_2[pos] = seq2[j];
                i += 1;
                j += 1;
	    } else if (map_i2j[i] < 0){
                aligned_seq_1[pos] = seq1[i];
                aligned_seq_2[pos] = gap_character;
                i += 1;
	    } else if (map_j2i[j] < 0){
                aligned_seq_1[pos] = gap_character;
                aligned_seq_2[pos] = seq2[j];
                j += 1;
	    }

	} else if (j<max_j){
	    aligned_seq_1[pos] = gap_character;
	    aligned_seq_2[pos] = seq2[j];
	    j += 1;
	} else {
            aligned_seq_1[pos] = seq1[i];
            aligned_seq_2[pos] = gap_character;
            i += 1;
	}
	pos ++;
    }
               
    free_imatrix(F);
    free_cmatrix(direction);
    free(map_i2j);
    free(map_j2i);
    
    return Py_BuildValue("ss", aligned_seq_1, aligned_seq_2 );
    
}
Esempio n. 23
0
/*********************************************************
 *                                                       *
 * Main Function                                         *
 * -------------                                         *
 *                                                       *
 *********************************************************
 | Takes arguments and launches the gof simulations.      |
 *-------------------------------------------------------*/
int main(int argc, char *argv[])                    // Array of char=arguments line
{
  //--- Declarations & Call Function ---//
  int i=0, j=0, k=0, count=0, howmany=0, segsites=0, okim=0, oksim=0, numsim=0, totsim=0, nokl=0, nokl0=0;
  int **statseg=NULL, **nbvariant=NULL, oks[3], okstot[3];
  FILE *pf=NULL, *fopen(const char*, const char*); // Pointer on File for outputs (pf) and IM input file
  double tajd();
  char **list=NULL;                                // Haplotype list
  void updatemainparams(struct params*);
  int gensam(struct params*, char**, int**, int*);
  int **imatrix(int, int);
  //// From rand1.c ////
  /* Celine changed 03/18/2010 */
  void seedit(char*, FILE*, struct params *);/*/////*/
  char **cmatrix(int, int);
  //// From params.c ////
  void changeparams(struct params*);
  void changeparamslocus(struct params*, int);
  struct params getpars(int, char*[], int*);
  //--- Structure declaration---//
  struct params param;

  //--- Get arguments ---//
  param=getpars(argc, argv, &howmany);  // Get input by user for parameters

  pf=stdout;                            // Output
  /* Celine changed 03/18/2010 */
  if( !param.commandlineseedflag ) seedit("s", pf, &param);// WRITE seeds in summary output file 
  /*/////*/
  /* Uncommented for Celine's use */
  /* for(i=0;i<argc;i++)                // Information on simulation
     fprintf(pf, "%s ", argv[i]);
  */////\
  //---------- Initialisation & Memory allocation ------------------//
  nbvariant=imatrix(param.cp.npop+1, maxsites);            // array of nb of frequency spectrum
  typeseg=(int*)malloc((unsigned)(maxsites*sizeof(int)));  // type of sites
  statseg=imatrix(param.cp.npop+3, howmany);               // Records locus specific S1, S2, Ss, Sf, 
  changeparams(&param);                                    // Change estimates parameters from priors
  updatemainparams(&param);                                // Update parameters for the coalescent

  oksim=totsim=okstot[0]=okstot[1]=okstot[1]=okstot[2]=0; // simulation check, total #of sim, check on statistics for all simulations
  for(numsim=0; numsim<param.cp.nsim;numsim++)            // Loop along number of simulations for this set of parameters
    {
      //--- Initialization and reset of quality checks ---//
      count=nokl=nokl0=okim=oks[0]=oks[1]=oks[1]=oks[2]=0; // number of loci, # loci with ok genealogies, no set sites, #statistics ok
      for(i=0;i<11;i++)                 // Sim specific Stats
        param.cp.sSiFst[i]=0;

      //--- Loop along the loci in the simulation ---//
      while((howmany-count++))
        {
          if(okim==0)                   // Case All loci ok in the sample
            {
              for(i=0;i<11;i++)
                {
                  param.cp.lSiFst[i]=0.0;
                  if(i<9)
                    param.lp[count-1].tpH[i]=0;
                }
              changeparamslocus(&param, count-1);      // Get locus specific parameters
               
              list=cmatrix(param.cp.nsam, maxsites+1); // Allocate list of haplotypes
              segsites=gensam(&param, list, nbvariant, param.lp[count-1].S);// Generate a new gene ARG
              statseg[0][count-1]=segsites;            // Total number of seg sites in sample
              for(i=1;i<3+param.cp.npop;i++)
                statseg[i][count-1]=0;

              if((segsites>0))                         // Case segsite>0: get stats
                {
                  /*   fprintf(pf, "segsites:%d\npositions:\n",segsites); */
                  /*                      for(i=0;i<param.cp.nsam;i++) fprintf(pf, "%s\n", list[i]);  */
                  /*                      fprintf(pf, "\n"); */
                  if(segsites<=param.cp.nsites)        // Case segsite < lenght of locus
                    {
                      for(k=0;k<param.cp.nsam;k++)
                        {
                          for(i=k+1;i<param.cp.nsam;i++)
                            {
                              if((k<param.lp[count-1].ni[1])&&(i<param.lp[count-1].ni[1])) // pop1
                                {
                                  param.lp[count-1].tpH[0]++;                              // # chromosomes
                                  for(j=0;j<segsites;j++)
                                    {
                                      if(list[k][j]!=list[i][j])
                                        param.lp[count-1].tpH[1]++;                         // # seg sites
                                    }
                                }
                              else if((k>=param.lp[count-1].ni[1])&&(i>=param.lp[count-1].ni[1])) // pop2
                                {
                                  param.lp[count-1].tpH[2]++;                                     // # chromosomes
                                  for(j=0;j<segsites;j++)
                                    {
                                      if(list[k][j]!=list[i][j])                                          
                                        param.lp[count-1].tpH[3]++;                              // # seg sites
                                    }
                                }
                              else                                                               // total sample
                                {
                                  param.lp[count-1].tpH[4]++;;                                   // Totsal sample size
                                  for(j=0;j<segsites;j++)
                                    {
                                      if(list[k][j]!=list[i][j])
                                        param.lp[count-1].tpH[5]++;                              // total S
                                    }
                                }
                            }// Loop on chromosome
                        }// Loop along all sampled sequence for the locus

                      for(i=0;i<segsites;i++)                     // Calulate S statistics for the locus
                        {
                          if(typeseg[i]<0) statseg[3][count-1]++; // shared
                          else if(typeseg[i]<param.cp.npop+1) statseg[typeseg[i]][count-1]++; // population specific
                          else statseg[4][count-1]++;             // fixed
                        }
                      for(i=1;i<5;i++)                            //--- Record S1 S2 Ss Sf forthe locus ---//
                        param.cp.lSiFst[i-1]+=statseg[i][count-1];                             
                      
                      for(i=0;i<param.cp.nsam;i++)                // Free memory for this locus
                        free(list[i]);
                      free(list);
                    }// End case segsite<lenght of locus
                  else                                 // Case segsites>lenght of locus
                    {
                      okim=1;                          // Sample have a wrong locus
                      oksim=1;                         // stop this simulation
                      break;
                    }
                }// End locus polymorphic
              else                                     // Locus without seg sites
                {
                  okim=1;                              // Sample have a wrong locus (S=0)
                  oksim=2;                             // 0 for all stats
                }
            }// End Sample good until now

          if(okim==0)                                  // All loci good until now
            {
              nokl++;                                  // +1 good locus
              nokl0++;                                 // +1 polymorphic locus
              for(i=0;i<7;i++)
                {
                  if(i<4)
                    param.cp.sSiFst[i]+=param.cp.lSiFst[i];                      // sum of Sk
                  param.lp[count-1].H[i]=param.lp[count-1].tpH[i];               // locus specific stats  
                }
              param.cp.lSiFst[5]=param.lp[count-1].H[1]/=param.lp[count-1].H[0]; // Hw1
              param.cp.lSiFst[6]=param.lp[count-1].H[3]/=param.lp[count-1].H[2]; // Hw2
              param.lp[count-1].H[5]/=param.lp[count-1].H[4];                    // Hb
              param.cp.lSiFst[4]=param.lp[count-1].H[6]=1-((param.lp[count-1].H[1]+param.lp[count-1].H[3])/2)/ param.lp[count-1].H[5];// locis specific Fst
              if((param.lp[count-1].S[0]>0)&&(param.lp[count-1].ni[1]>2)) // Locus popymorphic in pop1
                {
                  param.cp.lSiFst[7]=tajd(param.lp[count-1].ni[1], param.lp[count-1].S[0], param.lp[count-1].H[1]);
                  param.cp.sSiFst[7]+=param.cp.lSiFst[7];
                  oks[0]++;             // +1 good stat for pop1
                }
              if((param.lp[count-1].S[1]>0)&&(param.lp[count-1].ni[2]>2)) // Locus popymorphic in pop2
                {
                  param.cp.lSiFst[8]=tajd(param.lp[count-1].ni[2], param.lp[count-1].S[1], param.lp[count-1].H[3]);
                  param.cp.sSiFst[8]+=param.cp.lSiFst[8];
                  oks[1]++;             // +1 good stat for pop2
                }
              if(statseg[1][count-1]>0)                                  // Locus popymorphic private in pop1
                param.lp[count-1].H[7]=param.cp.lSiFst[9]+=(double) param.lp[count-1].S[2]/(statseg[1][count-1]*param.lp[count-1].ni[1]*2); // p(1)

              if(statseg[2][count-1]>0)                                  // Locus popymorphic private in pop2
                param.lp[count-1].H[7]=param.cp.lSiFst[9]+=(double) param.lp[count-1].S[3]/(statseg[2][count-1]*param.lp[count-1].ni[2]*2); // p(1)

              param.cp.sSiFst[9]+=param.cp.lSiFst[9];                    // sum p1
              if(statseg[3][count-1]>0)                                  // Locus popymorphic private in pop2
                {
                  param.lp[count-1].H[8]=param.cp.lSiFst[10]=(double) param.lp[count-1].S[4]/(statseg[3][count-1]*(param.lp[count-1].ni[2]+param.lp[count-1].ni[1])); // p(2)
                  param.cp.sSiFst[10]+=param.cp.lSiFst[10];              // sum p2
                  oks[2]++;
                }

              param.cp.sSiFst[4]+=param.lp[count-1].H[6]; // sum Fst
              param.cp.sSiFst[5]+=param.lp[count-1].H[1]; // sum Hw1
              param.cp.sSiFst[6]+=param.lp[count-1].H[3]; // sum Hw2
            }
          else if(oksim==2)             // Case no seg site for that locus
            {
              oksim=0;                  // reset checks
              okim=0;
              nokl++;                   // 1+ locus to count in mean (all 0 values)
              for(i=0;i<9;i++)
                param.lp[count-1].H[i]=param.lp[count-1].tpH[i]; // locus specific stats
            }
        }// End loop on loci
    
      if(nokl==howmany)                 // All sample good
        {
          totsim++;                     // 1+ good simulation
          for(i=0;i<4;i++)
            param.cp.SiFst[i]+=param.cp.sSiFst[i];                    // sum of S stats along simulations
          for(i=4;i<11;i++)
            {
              if(((i<7)||(i>=9))&&(nokl0>0))
                param.cp.SiFst[i]+=(double)param.cp.sSiFst[i]/nokl0; // mean of other stats along simulations
              if((i>=7)&&(i<9)&&(oks[i-7]>0))
                {
                  param.cp.SiFst[i]+=(double)param.cp.sSiFst[i]/oks[i-7];
                  okstot[i-7]++;
                }
            }
        }
    }// End loop on simulations
  if(oksim==0)                          // All simulations worked
    {
      /* Uncommented for Celine's use */
      /* for(i=0;i<11;i++) */////
      for(i=0;i<9;i++)
        {
          if((i<7)||(i>=9))
            fprintf(pf, "%lg\t", (double) param.cp.SiFst[i]/(totsim));           // write mean of sum of S stats, Fst and Hws over simulations
          
          if((i>=7)&&(i<9))
            {
              if(oks[i-7]>0)
                fprintf(pf, "%lg\t", (double) param.cp.SiFst[i]/(okstot[i-7])); // write mean Tds if S>0 in pops 
              else fprintf(pf, "NA\t" ); 
            }
        }
      fprintf(pf, "\n");
    } 
  else                                  // Case one locus with too much seg sites
    {
      for(i=0;i<9;i++)
        fprintf(pf, "NA\t" );
      fprintf(pf, "\n");
    }
  /* Celine changed 03/18/2010 */
  seedit("end", pf, &param);                 // in randx.c, flag[0]!="s" so create/rewrite seed in seedmimar
  /*/////*/
  fclose(pf);
  
  free(typeseg);
  for(i=0;i<param.cp.npop+3;i++)
    {
      if(i<param.cp.npop+1)
        free(nbvariant[i]);
      free(statseg[i]);
    }
  free(nbvariant);
  free(statseg);
    
  ///////// FREE PARAM ///////
  for(i=0;i<param.cp.npop;i++)
    free(param.cp.mig_mat[i]);
  free(param.cp.mig_mat);
  free(param.cp.config);
  /* Celine changed 11/27/2009 */
  for(i=9;i>=0;i--)
    if(param.cp.listevent[i]!=NULL && param.cp.listevent[i]->nextde!=NULL)
      free(param.cp.listevent[i]->nextde);
  if(param.cp.listevent!=NULL)
    free(param.cp.listevent);
  /*/////*/
  free(param.cp.deventlist);
  free(param.cp.size);
  free(param.cp.alphag);
  for(i=0;i<3;i++)
    free(param.cp.uniform[i]);
  free(param.cp.uniform);
  free(param.cp.oldest);
  free(param.cp.newest);
  free(param.cp.newparam);
  /* Celine changed 11/27/2009 */
  for(i=0;i<howmany;i++)
    free(param.lp[i].name);
  /*/////*/
  free(param.lp);
  /* Celine changed 03/18/2010 */
  free( param.tableseeds); /*/////*/
  exit(0);
}// End main function
Esempio n. 24
0
main(int argc, char *argv[])
{
	char **oldimage,**newimage,**cmatrix(int a, int b, int c, int d)  ;
	int i,j,iold,jold,ioldp,joldp ;
	double xmax,ymax,dx2,x1max ;
	double r,th,diold,djold,x1,x2 ;
	void new_coord(int i,int j, double *r, double *th) ;
	void old_coord(double r,double th,double *x1,double *x2) ;


	/* get size of image from arguments */
	if(argc < 8) {
		fprintf(stderr,"Usage: \n" ) ;
	        fprintf(stderr,"\t image_interp oN1 oN2 nN1 nN2 Rin Rout xmax ymax hslope\n") ; 
	        fprintf(stderr,"  where \n");
		fprintf(stderr,"\t oN1     = N1 from simulation \n");
		fprintf(stderr,"\t oN2     = N2 from simulation \n");
		fprintf(stderr,"\t nN1     = # of pixels in x-dir \n");
		fprintf(stderr,"\t nN2     = # of pixels in x-dir \n"); 
		fprintf(stderr,"\t Rin     = Rin used in simulation \n");
		fprintf(stderr,"\t Rout    = Rout used in simulation\n");
		fprintf(stderr,"\t xmax    = interpolated image will span x=[0,xmax] \n");
		fprintf(stderr,"\t ymax    = interpolated image will span y=[-ymax,ymax] \n");
		fprintf(stderr,"\t hslope  = hslope value from run\n");
		fprintf(stderr,"\n\n Example: \n");
		fprintf(stderr," cat im_lrho_0000.r8|awk '(FNR>4) {print}'|image_interp 128 64 128 256 1.321025567223338859 40. 40. 40. 0.3 > im_int.r8 \n");
		fprintf(stderr,"\n\n ");
		exit(0) ;
	}
	sscanf(argv[1],"%d",&oN1) ;
	sscanf(argv[2],"%d",&oN2) ;
	sscanf(argv[3],"%d",&nN1) ;
	sscanf(argv[4],"%d",&nN2) ;
	sscanf(argv[5],"%lf",&rin) ;
	sscanf(argv[6],"%lf",&rout) ;
	sscanf(argv[7],"%lf",&xmax) ;
	sscanf(argv[8],"%lf",&ymax) ;
	sscanf(argv[9],"%lf",&hslope) ;

	x1max = log(rout/rin) ;
	dx1 = x1max/oN1 ;
	dx2 = 1./(double)oN2 ;

	dx = xmax/(double)nN1 ;
	dy = 2.*ymax/(double)nN2 ;

	/* make arrays for images */
	oldimage = cmatrix(0,oN1-1,0,oN2-1) ;
	newimage = cmatrix(0,nN1-1,0,nN2-1) ;

	fprintf(stdout,"RAW\n#   \n%d %d\n%d\n",nN1,nN2,255);

	/* read in old image */
	for(j=oN2-1;j>=0;j--)
	for(i=0;i<oN1;i++) {
		fread(&oldimage[i][j], sizeof(unsigned char), 1, stdin) ;
		/*
		fprintf(stderr,"%d %d %u\n",i,j,oldimage[i][j]) ;
		*/
	}

	/* interpolate to new image */
	for(j=nN2-1;j>=0;j--)
	for(i=0;i<nN1;i++) {
		new_coord(i,j,&r,&th) ;
		old_coord(r,th,&x1,&x2) ;
		/*
		fprintf(stderr,"%d %d %g %g %g %g\n",i,j,r,th,x1,x2) ;
		*/
		if(x1 < 0. || x1 >= x1max || 
		   x2 < 0. || x2 >= 1.) 
			newimage[i][j] = 0 ;

		else {

#if 0
			iold = (int)(x1/dx1 - 0.5) ;
			diold = x1/dx1 - 0.5 - (int)(x1/dx1 - 0.5) ;
			jold = (int)(x2/dx2 - 0.5) ;
			djold = x2/dx2 - 0.5 - (int)(x2/dx2 - 0.5) ;
			ioldp = iold+1 ;
			joldp = jold+1 ;


			/* take care of boundary effects */
			if( diold < 0. ) ioldp = 0 ;
			else if (iold == oN1-1) ioldp = oN1-1 ;
			if( djold < 0. ) joldp = 0 ;
			else if( jold == oN2-1 ) joldp = oN2-1 ;

			/*
			fprintf(stderr,"iold, jold: %d %d %d %d %g %g\n",
					iold,jold,ioldp,joldp,diold,djold) ;
			fprintf(stderr,"old: %u\n",oldimage[iold][jold]) ;
			*/

			newimage[i][j] = (char)(0.5 + 
				(1. - diold)*(1.-djold)*oldimage[iold][jold] +
				(1. - diold)*djold*oldimage[iold][joldp] +
				diold*(1.-djold)*oldimage[ioldp][jold] +
				diold*djold*oldimage[ioldp][joldp]) ;
#endif

			iold = (int)(x1/dx1 - 1.e-20) ;
			jold = (int)(x2/dx2 - 1.e-20) ;

			newimage[i][j] = oldimage[iold][jold] ;
			/*
			fprintf(stderr,"newim:%d %d %d %d %u\n",i,j,iold,jold,newimage[i][j]) ;
			*/
		}
		fwrite(&newimage[i][j], sizeof(unsigned char), 1, stdout) ;
	}
}
Esempio n. 25
0
/*
 * Function read_init is used to read in the initial values of selfing rates (S) for MCMC updating
 * Input argument:  initialfilename is the directory of the file containing the initial values, 
 *					if it is NULL,then using random number generator to generate the INIT structure
 *					chainnum is the number of MCMC chains, determining the number of sets of initial values
 *					popnum is the number of subpopulation assumed
 *					chainnum and popnum determines the dimensions of the "initd" element of the INIT struture
 * Output argument: this application returns an INIT structure, which contains the "chainnum" sets of initial values for F per subpopulation
 * The data in the file should be ranged as:
 *	>chain_name1							each set of the initial values should begin with ">S"
 *	num1 num2..
 *
 *  >chain_name2
 *	....
 */
INIT read_init(char *initialfilename,int chainnum,int popnum,long update,long burnin,int thinning)
{
	//char sign='#';
	int i,j,temp,cnt_chn=0;
	FILE *initfp;
	char *line;
	INIT initial;
	
	initial.chainnum=chainnum;
	initial.update=update;
	initial.burnin=burnin;
	initial.thinning=thinning;
	initial.popnum=popnum;
	
	initial.initd=matrix(0,chainnum-1,0,popnum-1);
	initial.name_len=ivector(0,chainnum-1);
	initial.chn_name=cmatrix(0,chainnum-1,0,MAXLEN-1);
	
	if(initialfilename==NULL)
	{
		for(i=0;i<chainnum;i++)
		{
			for(j=0;j<popnum;j++)
				initial.initd[i][j]=ran1();
			strcpy(initial.chn_name[i],"Chain#");
			strcat(initial.chn_name[i],int_to_string(i+1));			

			initial.name_len[i]=strlen(initial.chn_name[i])+1;
	
			initial.chn_name[i][initial.name_len[i]-1]='\0';
		}
	}
	else{
		if((initfp=fopen(initialfilename,"r"))==NULL)
		{	nrerror("Cannot open inital file!");}
		line=cvector(0,MAXLINE-1);
		cnt_chn=0;
		for(i=0;i<chainnum&&(!feof(initfp));i++)
		{
			while(!feof(initfp))
			{
				fgets(line,MAXLINE,initfp);
				if(line[0]=='>') break;
			}
			for(j=1;j<strlen(line)&&line[j]!='\n';j++)
			{
				initial.chn_name[i][j-1]=line[j];
			}
			initial.chn_name[j]='\0';
			//printf("%s\n",initial.chn_name[i]);
			initial.name_len[i]=j;
			fgets(line,MAXLINE,initfp);
			temp=word_cnt(line);
			if(temp!=popnum)
			{
				nrerror("The number of initial values for selfing rates is not equal the number of subpopulation assumed!\n");
			}
			word_split(line,initial.initd[cnt_chn],popnum);
			cnt_chn++;
		}
		if(cnt_chn<=chainnum)
		{
			for(i=cnt_chn;i<chainnum;i++)
			{
				for(j=0;j<popnum;j++)
					initial.initd[i][j]=ran1();
				strcpy(initial.chn_name[i],"Chain#");
				strcat(initial.chn_name[i],int_to_string(i+1));
				
				initial.name_len[i]=strlen(initial.chn_name[i])+1;
				initial.chn_name[i][initial.name_len[i]-1]='\0';
	
			}
		}
		if(cnt_chn>chainnum)
		{
			nrerror("The number of chain starting points is greater than the number of chains!\n");
		}
		fclose(initfp);
		free_cvector(line,0,MAXLINE-1);
	}
	/*for(i=0;i<chainnum;i++)
	{
		for(j=0;j<popnum;j++)
			fprintf(stdout,"%f\t",initial.initd[i][j]);
		fprintf(stdout,"\n");
	}*/	
	return(initial);
}