void cband_solve(dcomplex **a, int n, int m1, int m2, dcomplex *b) { static dcomplex **al; static unsigned long *indx; static int an = 0, am1 = 0; // Allocated sizes dcomplex d; if(an < n) { if(an != 0) { free_cmatrix(al); delete[] indx; } al = cmatrix(n, m1); indx = new unsigned long[n]; an = n; am1 = m1; } if(am1 < m1) { if(am1 != 0) free_cmatrix(al); al = cmatrix(an, m1); am1 = m1; } // LU decompose matrix cbandec(a, n, m1, m2, al, indx, &d); // Solve cbanbks(a, n, m1, m2, al, indx, b); }
void get_pair_type(long num_hbonds, char **hb_atom1, char **hb_atom2, long i, long j, char *bseq, char *type) /* Indentify the type of pair interaction according to Leontis and Westhof' nomenclature */ { char type_wd1[5], type_wd2[5], **atom; long nh1,nh2; atom=cmatrix(0,num_hbonds+40, 0,4); if(num_hbonds >= 1){ get_unequility(num_hbonds, hb_atom1, &nh1, atom); edge_type(nh1, atom, i, bseq, type_wd1); get_unequility(num_hbonds, hb_atom2, &nh2, atom); edge_type(nh2, atom, j, bseq, type_wd2); sprintf(type,"%s/%s",type_wd1, type_wd2); }else sprintf(type,"?/?"); }
/*--------------------------------------------------------------------------------*/ Data *read_gbt(const char *fname) { Data *dat=(Data *)malloc(sizeof(Data)); memset(dat,0,sizeof(Data)); int nchan=4096; int npol=4; long ndat=get_file_size(fname); if (ndat<=0) { printf("FILE %s unavailable for reading.\n",fname); return NULL; } int nsamp=ndat/npol/nchan; printf("have %d samples.\n",nsamp); char **mat=cmatrix(nsamp,nchan); char *tmp=(char *)malloc(sizeof(char)*nchan*npol); FILE *infile=fopen(fname,"r"); for (int i=0;i<nsamp;i++) { size_t nread=fread(tmp,sizeof(char),nchan*npol,infile); memcpy(mat[i],tmp,sizeof(char)*nchan); } fclose(infile); free(tmp); dat->raw_nchan=nchan; dat->ndata=nsamp; //dat->raw_data=mat; dat->raw_data=matrix(nchan,nsamp); for (int i=0;i<nchan;i++) for (int j=0;j<nsamp;j++) dat->raw_data[i][j]=mat[j][i]; dat->raw_chans=(float *)malloc(sizeof(float)*dat->raw_nchan); dat->dt=1e-3; float dnu=(900-700.0)/dat->raw_nchan; for (int i=0;i<dat->raw_nchan;i++) { dat->raw_chans[i]=900-(0.5+i)*dnu; } free(mat[0]); free(mat); return dat; }
const Field3D Delp2(const Field3D &f, real zsmooth) { Field3D result; real ***fd, ***rd; #ifdef CHECK int msg_pos = msg_stack.push("Delp2( Field3D )"); #endif //return G1*DDX(f) + G3*DDZ(f) + g11*D2DX2(f) + g33*D2DZ2(f); //+ 2.0*g13*D2DXDZ(f) // NEW: SOLVE USING FFT static dcomplex **ft = (dcomplex**) NULL, **delft; int jx, jy, jz; real filter; dcomplex a, b, c; result.Allocate(); fd = f.getData(); rd = result.getData(); if(ft == (dcomplex**) NULL) { // Allocate memory ft = cmatrix(ngx, ncz/2 + 1); delft = cmatrix(ngx, ncz/2 + 1); } // Loop over all y indices for(jy=0;jy<ngy;jy++) { // Take forward FFT for(jx=0;jx<ngx;jx++) ZFFT(fd[jx][jy], zShift[jx][jy], ft[jx]); // Loop over kz for(jz=0;jz<=ncz/2;jz++) { if ((zsmooth > 0.0) && (jz > (int) (zsmooth*((real) ncz)))) filter=0.0; else filter=1.0; // No smoothing in the x direction for(jx=2;jx<(ngx-2);jx++) { // Perform x derivative laplace_tridag_coefs(jx, jy, jz, a, b, c); delft[jx][jz] = a*ft[jx-1][jz] + b*ft[jx][jz] + c*ft[jx+1][jz]; delft[jx][jz] *= filter; //Savitzky-Golay 2nd order, 2nd degree in x /* delft[jx][jz] = coef1*( 0.285714 * (ft[jx-2][jz] + ft[jx+2][jz]) - 0.142857 * (ft[jx-1][jz] + ft[jx+1][jz]) - 0.285714 * ft[jx][jz] ); delft[jx][jz] -= SQ(kwave)*coef2*ft[jx][jz]; */ } } // Reverse FFT for(jx=1;jx<(ngx-1);jx++) { ZFFT_rev(delft[jx], zShift[jx][jy], rd[jx][jy]); rd[jx][jy][ncz] = rd[jx][jy][0]; } // Boundaries for(jz=0;jz<ncz;jz++) { rd[0][jy][jz] = 0.0; rd[ngx-1][jy][jz] = 0.0; } } #ifdef CHECK msg_stack.pop(msg_pos); #endif // Set the output location result.setLocation(f.getLocation()); return result; }
void agfit5a(Sint *nusedx, Sint *nvarx, double *yy, double *covar2, double *offset2, double *weights2, Sint *strata, Sint *sort, double *means, double *beta, double *u, double *loglik, Sint *methodx, Sint *ptype2, Sint *pdiag2, Sint *nfrail, Sint *frail2, void *fexpr1, void *fexpr2, void *rho) { int i,j,k, person; int nused, nvar; int nf, nvar2; int deaths, itemp; int istrat, indx2, p, ksave; double denom, zbeta, risk; double temp; double d2, efron_wt; double method; double meanwt, time; nused = *nusedx; nvar = *nvarx; nf= *nfrail; method= *methodx; nvar2 = nvar + nf; ptype = *ptype2; pdiag = *pdiag2; /* ** Allocate storage for the arrays and vectors ** Since they will be used later, sizes are based on what will be ** needed with the frailty terms. */ if (nvar >0) { covar= cmatrix(covar2, nused, nvar); cmat = cmatrix(0, nvar2, nvar+1); cmat2= cmatrix(0, nvar2, nvar+1); } a = Calloc(4*nvar2 + 5*nused , double); oldbeta = a + nvar2; a2 = oldbeta + nvar2; weights = a2+ nvar2; offset = weights + nused; score = offset + nused; tmean = score + nused; start = tmean + nvar2; stop = start + nused; event = Calloc(3*nused, int); sort1 = event + nused; sort2 = sort1 + nused; for (i=0; i<nused; i++) { weights[i] = weights2[i]; offset[i] = offset2[i]; event[i] = yy[nused + nused +i]; sort1[i] = sort[i]; sort2[i] = sort[nused+i]; start[i] = yy[i]; stop[i] = yy[nused+i]; } /* scratch space for penalty ** upen needs to be max(nvar, nfrail), ** ipen max(nfrail, nvar(if pdiag=0) or nvar^2 ) */ if (nf > nvar) i=nf; else i=nvar; if (nf > nvar*nvar) j=nf; else j=nvar*nvar; if (pdiag==0) upen = Calloc(2*i, double); else upen = Calloc(i+j, double);
void itegeppXXR(int *tog, double *lim, char **gent, double *qtrait, int *xnp, double *likeres, char **freqres, char **hapres, char **desres) { char lino[10000], lin[10000]; char* CharNull = "\0"; /* 06.11.2014/SKn */ double likold, pe, pex, /* 10.3. 2000 ROHDE */ *p2max, gsum; /* 10.3. 2000 ROHDE */ int i, inp, it, j, k, ki, kj, h, s, glev, non, ac[2], drei, null, df = 0 /*SKn*/, combinations, nz, iqual, nhap, *hlist, **pimax, h1x, h2x; uint iterations, h1, h2; bool loop; // new for create design matrix (tog=0) double pehh, *peh; /* Max. 16 SNPs */ if ( strlen(gent[0]) > 16 ) error ("Number of SNPs should smaller than 17.") ; np = *xnp; len = (int) (strlen(gent[0]) + 1); mg = ivector(np); merke = ivector(np); nulmer = ivector(np); ge = ivector(np); hlist = ivector(np); po = uivector(len); geno = cmatrix(np, len); max_prob = init_dvector(NULL, 0.0, np); prob = init_dvector(NULL, 0.0, np); hap = init_dvector (NULL, 0.0, Hapco); hc = init_ivector (NULL, -1,Hapco); po[0]=1; for(i=1;i<len;i++)po[i] = 2*po[i-1]; combinations = po[len-1]; init_dvector(hap, 0.0, Hapco); init_ivector (hc, -1,Hapco); ng = 0; /* read input data */ for(inp=0;inp<np;inp++){ drei = 0; null = 0; for (i=0; i<len-1; i++) { if(i < len-1 && (gent[inp][i] < 48 || gent[inp][i] > 51) ){ Rprintf("%d %d %d\n",inp, i, gent[inp][i]); //Rprintf("\n Error in data person %d\n",inp+1); /* ROHDE 15.03.2000 */ error("\n Error in data person %d\n",inp+1);; } if ( gent[inp][i] == '3' ) drei ++; if ( gent[inp][i] == '0' ) null ++; } gent[inp][len-1] = '\0'; it = 1; for (i=0; i<ng; i++) { if ( strncmp (geno[i], gent[inp], len) == 0 ) { /*** a certain genotype was found more than just once ***/ ge[inp] = i; mg[i] ++; it = 0; merke[i] = drei; break; } } if (it) { /*** a certain genotype was encountered the first time ***/ strcpy (geno[ng], gent[inp]); ge[inp] = ng; mg[ng] = 1; merke[ng] = drei; nulmer[ng] = null; ng ++; } } /* end while */ People = np; Loci = len-1; /* end of reading sample data */ nall = 2 * np; nstate = init_ivector (NULL, 0, ng); mstate = init_ivector (NULL, 0, ng); state = (uint***) calloc(ng , sizeof(uint**)); for (i=0; i<ng; i++) { nz = po[merke[i]] * po[nulmer[i]] * po[nulmer[i]]; state[i] = uimatrix(nz, 2); } /*** sort genotypes by weights *******************************************/ genoProb = dvector(ng); genoId = ivector(ng); for (i=0; i<ng; i++) { genoId[i] = i; genoProb[i] = ((double)mg[i])/((double) po[merke[i]])/pow(4.0,nulmer[i]); } sortByProb(genoProb, genoId, ng); glev=0; for(i=0;i<ng;i++)if(genoProb[i] >= SignificanceLevel)glev++; /*** process sorted genotypes ********************************************/ nh = 0; for (i=0; i<glev; i++) { /* printf("\n ng: %d glev: %d i: %d",ng,glev+1,i+1); */ rechap(genoId[i], 0, len-1); /* printf("\n %s >> %d\n",geno[genoId[i]],mg[genoId[i]]); for(k=0;k<16;k++)printf("%2d:%g ",hc[k],hap[k]); printf("\n"); */ } for (i=glev; i<ng; i++) { s = 0; /* printf("\n ng: %d glev: %d i: %d",ng,glev+1,i+1); */ for (j=0; j<nh; j++) { ac[0] = hc[j]; for (k=j; k<nh; k++) { ac[1] = hc[k]; if ( compatible(geno[genoId[i]], ac) ) { state[genoId[i]][s][0] = j; state[genoId[i]][s][1] = k; s ++; if ( j != k ) { state[genoId[i]][s][0] = k; state[genoId[i]][s][1] = j; s ++; } } } } nstate[genoId[i]] = s; } for (i=glev; i<ng; i++) { addon(genoId[i]); } /* printf("\n"); printf("\ngloop: %d ng: %d glev: %d nh: %d\n",gloop,ng,glev,nh); */ /*** now comes the output that does not need simulated annealing *********/ first = 1; /*** start likelihood outside of annealing loops ***/ df = nh; hapnew = init_dvector(NULL, 0.0, nh ); haptmp = init_dvector(NULL, 0.0, nh ); for (i=0; i<ng; i++)selprob(i); likold = likea(); /* Continue computation of mean probabilities */ for(i=0;i<ng;i++) for(j=0;j<mstate[i];j++) { double pp = 0.0; if ( nstate[i] > 1 ) { h = state[i][j][0]; hapnew[h] += (double)mg[i] / (double)mstate[i]; h = state[i][j][1]; pp += hapnew[h]; hapnew[h] += ((double) mg[i]) / ((double) mstate[i]); pp += hapnew[h]; } else { h = state[i][j][0]; hapnew[h] += 2.0 * ((double) mg[i]) / ((double) mstate[i]); pp += hapnew[h]; } } non = 0; for (i=0; i<nh; i++) { if(hapnew[i]==0.0)non++; else hapnew[i] /= (double) nall; } for (i=0; i<nh; i++) { if(hapnew[i]==0.0)hapnew[i] = 0.0001/(double)non; else hapnew[i] *= 0.9999; } iterations = 0; first = 0; do { loop = 0; iterations ++; /* printf("gloop:%3d count: %d\n",gloop,iterations); */ /* Recompute mean probabilities */ for (i=0; i<nh; i++) { if ( fabs(hap[i] - hapnew[i]) > LoopPrecision ) loop = 1; hap[i] = hapnew[i]; } init_dvector(prob, 0.0, np); init_dvector(haptmp, 0.0, nh); init_dvector(hapnew, 0.0, nh); likold = likea(); for (i=0; i<nh; i++) hapnew[i] /= (double) nall; } while (loop); /* Rprintf("\n"); Rprintf(" Results Ensemble means: \n\n"); */ nhap = 0; j = 0; for (i=0; i<nh; i++) { if ( hapnew[i] >= *lim ) { /* 07.06.2007 S.Kn|ppel > Beschrdnken der geschdtzten Haplotypen. */ if ( (*tog==0) && ((nhap+1) > 1500) ) { error ("Error in itegeppXXR: Too much estimated haplotypes. Increase option lim.") ; } if ( (*tog==1) && ((nhap+1) > 1500) ) { error ("Error in itegeppXXR: Too much estimated haplotypes. Increase option lim.") ; } /* sprintf(lino,"\0"); 02.06.2015/SKn */ /* sprintf("%s", "%s", *lino, *CharNull);*/ sprintf(lino, "%s", CharNull); printHaplotype(hc[i], len, lino); /* printf(" hapnew[%8d] = %7.4f (%7.4f)\n", hc[i], hapnew[i], hap[i]); */ /* sprintf(lin,"%9.6f\0", hapnew[i]); 06.11.2014/SKn */ sprintf(lin,"%9.6f%s", hapnew[i], CharNull); /* 06.11.2014/SKn */ strcat(lino,lin); strcpy(freqres[j],lino); j++; hlist[nhap++] = i; } } k = 0; htpp = init_uimatrix(NULL,0,nhap+1,nhap+1); for(i=0;i<nhap+1;i++) for(j=i;j<nhap+1;j++)htpp[i][j]=k++; pgen = init_dmatrix(NULL,0.0,ng,(nhap+1)*(nhap+2)/2); /* start find best states after MLE 10.3.2000 ROHDE */ pimax = imatrix(ng,10); /* ROHDE 10.3.2000 */ p2max = init_dvector(NULL,0.0,ng); /* ROHDE 10.3.2000 */ for(i=0;i<ng;i++)max_prob[i] = 0.0; /* ROHDE 10.3.2000 */ for (i=0;i<ng;i++){ for(j=0;j<10;j++)pimax[genoId[i]][j] = -1; iqual=1; for (j=0;j<nstate[genoId[i]];j++){ pe = hapnew[state[genoId[i]][j][0]] * hapnew[state[genoId[i]][j][1]]; if( state[genoId[i]][j][0] != state[genoId[i]][j][1] ) pe += pe; if(pe > p2max[genoId[i]]){ if (pe > max_prob[genoId[i]]){ p2max[genoId[i]] = max_prob[genoId[i]]; max_prob[genoId[i]] = pe; pimax[genoId[i]][0]=j; for(k=1;k<10;k++)pimax[genoId[i]][k]=-1; iqual = 1; /*** ROHDE 04.09.2001 ***/ } else{ if (pe == max_prob[genoId[i]] && iqual < 9){ for(k=0;k<iqual;k++) if(state[genoId[i]][j][0] == state[genoId[i]][pimax[genoId[i]][k]][1]) pe=0.0; if(pe > 0.0)pimax[genoId[i]][iqual++]=j; } else p2max[genoId[i]] = pe; } } } } /* end of maximum state search */ /* Rprintf("\n Haplotypes after MLE\n"); */ jjx = 0; for(i=0;i<np;i++){ /* sprintf(lino,"%i %s >> \0",i, geno[ge[i]]); 06.11.2014/SKn */ sprintf(lino,"%i %s >> %s",i, geno[ge[i]], CharNull); for(k=0;k<10;k++){ j = pimax[ge[i]][k]; if(j > -1){ if(k>0)pspace(len+3,lino); /*** ROHDE 11.09.2001 ***/ printHaplotype(hc[state[ge[i]][j][0]],len,lino); strcat(lino," <> \0"); printHaplotype(hc[state[ge[i]][j][1]],len,lino); sprintf(lin," P>> %9.7f D>> %9.7f", max_prob[ge[i]],max_prob[ge[i]]-p2max[ge[i]]); strcat(lino,lin); } else break; } strcpy(hapres[jjx++],lino); } /* endfind best states after MLE 10.3.2000 ROHDE */ /* Rprintf("\n\n Likelihood = %f\n", likold); Rprintf("\n"); */ /* sprintf(lino,"Likelihood = %f\0", likold); 06.11.2014/SKn */ sprintf(lino,"Likelihood = %f%s", likold, CharNull); // strcpy(likeres[0],lino); (*likeres) = likold ; /* Sample over states for each genotype ***********************************/ for(i=0;i<ng;i++){ gsum = 0.0; for(j=0;j<nstate[genoId[i]];j++){ h1 = state[genoId[i]][j][0]; h2 = state[genoId[i]][j][1]; h1x = h2x = 0; for(ki=1;ki<=nhap;ki++) if( h1 == hlist[ki-1] ) h1x=ki; for(kj=1;kj<=nhap;kj++) if( h2 == hlist[kj-1] ) h2x=kj; if(h1x>0 && h2x>0){ if(h2x < h1x){ k=h1x; h1x=h2x; h2x=k;} pgen[genoId[i]][htpp[h1x-1][h2x-1]] += hapnew[h1]*hapnew[h2]; gsum += hapnew[h1]*hapnew[h2]; } else{ pgen[genoId[i]][htpp[nhap][nhap]] += hapnew[h1]*hapnew[h2]; gsum += hapnew[h1]*hapnew[h2]; } } for(k=0;k<(nhap+1)*(nhap+2)/2;k++)pgen[genoId[i]][k] /= gsum; } /* for(i=0;i<ng;i++){ Rprintf("i:%2d %s\t",i,geno[genoId[i]]); for(ki=0;ki<nhap+1;ki++){ for(kj=ki;kj<nhap+1;kj++) Rprintf("%4.2f ",pgen[genoId[i]][htpp[ki][kj]]); if(kj<ki)printf("0.000\t "); else printf("%4.2f\t",pgen[genoId[i]][htpp[ki][kj]]); Rprintf("\t"); } Rprintf("\n"); } */ jjx = 0; if (*tog == 1){ for(i=0;i<np;i++){ /* printf("\n%4s %s %4.2f >> ",pid[i],geno[ge[i]],qtrait[i]); */ strcpy(lino,"\0"); for(ki=0;ki<nhap;ki++){ /* each haplotype alone */ for(kj=ki;kj<nhap;kj++){ /* sprintf(lin,"%8.6f \0",pgen[ge[i]][htpp[ki][kj]]); 06.11.2014/SKn */ sprintf(lin,"%8.6f %s",pgen[ge[i]][htpp[ki][kj]], CharNull); strcat(lino,lin); } } /* sprintf(lin,"%8.6f\0",pgen[ge[i]][htpp[nhap][nhap]]); 06.11.2014/SKn */ sprintf(lin,"%8.6f%s",pgen[ge[i]][htpp[nhap][nhap]], CharNull); strcat(lino,lin); strcpy(desres[jjx],lino); jjx++; } } /* gedndert nach Klaus; Bildung Designmatrix 16.09.2008 */ /* if(*tog == 0){ for(i=0;i<np;i++){ // //printf("\n%4s %s %4.2f >> ",id[i],geno[ge[i]],qtrait[i]); // strcpy(lino,"\0"); pex = 0.0; for(j=0;j<nhap;j++){ pe = 0.0; for(ki=0;ki<nhap;ki++){ // over all haplotype pairs for(kj=ki;kj<nhap;kj++){ if(ki==j && kj==j && pgen[ge[i]][htpp[ki][kj]] > 0.0) pe +=2.0*pgen[ge[i]][htpp[ki][kj]]; else if ((ki==j || kj==j) && pgen[ge[i]][htpp[ki][kj]] > 0.0) pe += pgen[ge[i]][htpp[ki][kj]]; } } pex += pe; sprintf(lin,"%8.6f \0",pe); strcat(lino,lin); } sprintf(lin,"%8.6f\0",2.0-pex); strcat(lino,lin); strcpy(desres[jjx],lino); jjx++; } } */ /* new: nach Klaus; 17.09.2008 */ if(*tog == 0){ peh = init_dvector(NULL, 0.0, nhap+1); for(i=0;i<np;i++){ /* printf("\n%4s %s %4.2f >> ",id[i],geno[ge[i]],qtrait[i]); */ strcpy(lino,"\0"); for(j=0;j<nhap;j++){ gsum = 0.0; /* for(ki=0;ki<nhap;ki++){ * over all haplotype pairs * for(kj=ki;kj<nhap;kj++){ if(ki==j && kj==j && pgen[ge[i]][htpp[ki][kj]] > 0.0) pe +=2.0*pgen[ge[i]][htpp[ki][kj]]; else if ((ki==j || kj==j) && pgen[ge[i]][htpp[ki][kj]] > 0.0) pe += pgen[ge[i]][htpp[ki][kj]]; } } */ for(ki=0;ki<nstate[ge[i]];ki++){ h1 = state[ge[i]][ki][0]; h2 = state[ge[i]][ki][1]; h = hlist[j]; pex = hapnew[h1]*hapnew[h2]; gsum += 2*pex; if((h == h1) && (h == h2))peh[j] += 2*pex; else if((h == h1) || (h == h2))peh[j] += pex; } /* end nstate */ } /* end nhap */ pehh = 0.0; for(j=0;j<nhap;j++){ pehh += 2*peh[j]; /* sprintf(lin,"%8.6f \0",2*peh[j]/gsum); 06.11.2014/SKn */ sprintf(lin,"%8.6f %s",2*peh[j]/gsum, CharNull); strcat(lino,lin); } /* end print */ /* sprintf(lin,"%8.6f\0",2.0-pehh/gsum); 06.11.2014/SKn */ sprintf(lin,"%8.6f%s",2.0-pehh/gsum, CharNull); strcat(lino,lin); strcpy(desres[jjx],lino); jjx++; init_dvector(peh, 0.0, nhap+1); } /* end np */ destroy_d_array(peh); } destroy_c_array2(geno); destroy_u_array(po); for ( i=0;i<ng;i++) { destroy_u_array2(state[i]) ; } free((uint***)state); destroy_u_array2(htpp); destroy_i_array(nstate); destroy_i_array(mstate); destroy_i_array(genoId); destroy_i_array(mg); destroy_i_array(merke); destroy_i_array(nulmer); destroy_i_array(ge); destroy_i_array(hlist); destroy_d_array(prob); destroy_d_array(max_prob); destroy_d_array(hapnew); destroy_d_array(haptmp); destroy_d_array(hap); destroy_d_array(genoProb); destroy_d_array2(pgen); destroy_i_array(hc); destroy_d_array(p2max); destroy_i_array2(pimax); }
/* =============================== */ void Read_Index(char **argv,int args,int nRead,int nSeq) /* =============================== */ { int i,j,k,n_reads=nSeq+nRead; FILE *namef; fasta *seqp; void ArraySort_String(int n,char Pair_Name[][Max_N_NameBase],int *brr); char **cmatrix(long nrl,long nrh,long ncl,long nch); char DBname[n_reads][Max_N_NameBase]; char tempct[60],tempc1[60],tempc2[60],tempc3[60],tempc4[60],tempc5[60]; int temp1,temp2,temp3; int i_contig,i_reads,num_rd_find,stopflag; int *readIndex; int mapindex=0; if((readIndex= (int *)calloc(n_reads,sizeof(int))) == NULL) { printf("Error Contig_Merge: calloc - readIndex\n"); exit(1); } rdnames=cmatrix(0,nRead+1,0,Max_N_NameBase); i_contig=0; if((namef = fopen(argv[args],"r")) == NULL) { printf("ERROR Memory_Allocate:: reads group file \n"); exit(1); } i_reads=0; printf("before read: %d %d\n",release_flag,nRead); if(release_flag==0) { while(fscanf(namef,"%s %s %s %d %s %s %d %d",tempc1,tempc2,tempc3,&temp1,tempc4,tempc5,&temp2,&temp3)!=EOF) { strcpy(rdnames[i_reads],tempc1); i_reads++; } } else if(release_flag==1) { while(fscanf(namef,"%s %s %s %s %d %s %s %d %d",tempct,tempc1,tempc2,tempc3,&temp1,tempc4,tempc5,&temp2,&temp3)!=EOF) { strcpy(rdnames[i_reads],tempc1); i_reads++; } } else if(release_flag==2) { while(fscanf(namef,"%s",rdnames[i_reads])!=EOF) { // printf("after read: %d %s\n",i_reads,rdnames[i_reads]); i_reads++; } } fclose(namef); for(j=0;j<nSeq;j++) { seqp=expp[rd_head[j]]+rd_index[j]; strcpy(DBname[j],seqp->name); readIndex[j]=j; } for(j=0;j<nRead;j++) { strcpy(DBname[j+nSeq],rdnames[j]); readIndex[j+nSeq]=j+nSeq; } n_reads=nSeq+nRead; printf("before sort: %d %d\n",nSeq,nRead); ArraySort_String(n_reads,DBname,readIndex); num_rd_find=0; mapindex=0; for(i=0;i<n_reads-1;i++) { if(readIndex[i]>=nSeq) mapindex = readIndex[i]; /* search reads with an index < i */ /* search reads with an index > i */ stopflag=0; j=i+1; while((j<n_reads)&&(stopflag==0)) { if(strcmp(DBname[j],DBname[i])==0) { if(readIndex[j]>=nSeq) mapindex = readIndex[j]; j++; } else stopflag=1; } if((j-i)>1) { for(k=i;k<j;k++) { if(readIndex[k]<nSeq) { ctg2wgs_index[mapindex-nSeq]=readIndex[k]; num_rd_find++; k=j; } } } i=j-1; } printf("reads found: %d %d\n",nRead,num_rd_find); }
/************************************************************************* Weighted fitting by Chebyshev polynomial in barycentric form, with constraints on function values or first derivatives. Small regularizing term is used when solving constrained tasks (to improve stability). Task is linear, so linear least squares solver is used. Complexity of this computational scheme is O(N*M^2), mostly dominated by least squares solver SEE ALSO: PolynomialFit() INPUT PARAMETERS: X - points, array[0..N-1]. Y - function values, array[0..N-1]. W - weights, array[0..N-1] Each summand in square sum of approximation deviations from given values is multiplied by the square of corresponding weight. Fill it by 1's if you don't want to solve weighted task. N - number of points, N>0. XC - points where polynomial values/derivatives are constrained, array[0..K-1]. YC - values of constraints, array[0..K-1] DC - array[0..K-1], types of constraints: * DC[i]=0 means that P(XC[i])=YC[i] * DC[i]=1 means that P'(XC[i])=YC[i] SEE BELOW FOR IMPORTANT INFORMATION ON CONSTRAINTS K - number of constraints, 0<=K<M. K=0 means no constraints (XC/YC/DC are not used in such cases) M - number of basis functions (= polynomial_degree + 1), M>=1 OUTPUT PARAMETERS: Info- same format as in LSFitLinearW() subroutine: * Info>0 task is solved * Info<=0 an error occured: -4 means inconvergence of internal SVD -3 means inconsistent constraints -1 means another errors in parameters passed (N<=0, for example) P - interpolant in barycentric form. Rep - report, same format as in LSFitLinearW() subroutine. Following fields are set: * RMSError rms error on the (X,Y). * AvgError average error on the (X,Y). * AvgRelError average relative error on the non-zero Y * MaxError maximum error NON-WEIGHTED ERRORS ARE CALCULATED IMPORTANT: this subroitine doesn't calculate task's condition number for K<>0. SETTING CONSTRAINTS - DANGERS AND OPPORTUNITIES: Setting constraints can lead to undesired results, like ill-conditioned behavior, or inconsistency being detected. From the other side, it allows us to improve quality of the fit. Here we summarize our experience with constrained regression splines: * even simple constraints can be inconsistent, see Wikipedia article on this subject: http://en.wikipedia.org/wiki/Birkhoff_interpolation * the greater is M (given fixed constraints), the more chances that constraints will be consistent * in the general case, consistency of constraints is NOT GUARANTEED. * in the one special cases, however, we can guarantee consistency. This case is: M>1 and constraints on the function values (NOT DERIVATIVES) Our final recommendation is to use constraints WHEN AND ONLY when you can't solve your task without them. Anything beyond special cases given above is not guaranteed and may result in inconsistency. -- ALGLIB PROJECT -- Copyright 10.12.2009 by Bochkanov Sergey *************************************************************************/ void polynomialfitwc(ap::real_1d_array x, ap::real_1d_array y, const ap::real_1d_array& w, int n, ap::real_1d_array xc, ap::real_1d_array yc, const ap::integer_1d_array& dc, int k, int m, int& info, barycentricinterpolant& p, polynomialfitreport& rep) { double xa; double xb; double sa; double sb; ap::real_1d_array xoriginal; ap::real_1d_array yoriginal; ap::real_1d_array y2; ap::real_1d_array w2; ap::real_1d_array tmp; ap::real_1d_array tmp2; ap::real_1d_array tmpdiff; ap::real_1d_array bx; ap::real_1d_array by; ap::real_1d_array bw; ap::real_2d_array fmatrix; ap::real_2d_array cmatrix; int i; int j; double mx; double decay; double u; double v; double s; int relcnt; lsfitreport lrep; if( m<1||n<1||k<0||k>=m ) { info = -1; return; } for(i = 0; i <= k-1; i++) { info = 0; if( dc(i)<0 ) { info = -1; } if( dc(i)>1 ) { info = -1; } if( info<0 ) { return; } } // // weight decay for correct handling of task which becomes // degenerate after constraints are applied // decay = 10000*ap::machineepsilon; // // Scale X, Y, XC, YC // lsfitscalexy(x, y, n, xc, yc, dc, k, xa, xb, sa, sb, xoriginal, yoriginal); // // allocate space, initialize/fill: // * FMatrix- values of basis functions at X[] // * CMatrix- values (derivatives) of basis functions at XC[] // * fill constraints matrix // * fill first N rows of design matrix with values // * fill next M rows of design matrix with regularizing term // * append M zeros to Y // * append M elements, mean(abs(W)) each, to W // y2.setlength(n+m); w2.setlength(n+m); tmp.setlength(m); tmpdiff.setlength(m); fmatrix.setlength(n+m, m); if( k>0 ) { cmatrix.setlength(k, m+1); } // // Fill design matrix, Y2, W2: // * first N rows with basis functions for original points // * next M rows with decay terms // for(i = 0; i <= n-1; i++) { // // prepare Ith row // use Tmp for calculations to avoid multidimensional arrays overhead // for(j = 0; j <= m-1; j++) { if( j==0 ) { tmp(j) = 1; } else { if( j==1 ) { tmp(j) = x(i); } else { tmp(j) = 2*x(i)*tmp(j-1)-tmp(j-2); } } } ap::vmove(&fmatrix(i, 0), &tmp(0), ap::vlen(0,m-1)); } for(i = 0; i <= m-1; i++) { for(j = 0; j <= m-1; j++) { if( i==j ) { fmatrix(n+i,j) = decay; } else { fmatrix(n+i,j) = 0; } } } ap::vmove(&y2(0), &y(0), ap::vlen(0,n-1)); ap::vmove(&w2(0), &w(0), ap::vlen(0,n-1)); mx = 0; for(i = 0; i <= n-1; i++) { mx = mx+fabs(w(i)); } mx = mx/n; for(i = 0; i <= m-1; i++) { y2(n+i) = 0; w2(n+i) = mx; } // // fill constraints matrix // for(i = 0; i <= k-1; i++) { // // prepare Ith row // use Tmp for basis function values, // TmpDiff for basos function derivatives // for(j = 0; j <= m-1; j++) { if( j==0 ) { tmp(j) = 1; tmpdiff(j) = 0; } else { if( j==1 ) { tmp(j) = xc(i); tmpdiff(j) = 1; } else { tmp(j) = 2*xc(i)*tmp(j-1)-tmp(j-2); tmpdiff(j) = 2*(tmp(j-1)+xc(i)*tmpdiff(j-1))-tmpdiff(j-2); } } } if( dc(i)==0 ) { ap::vmove(&cmatrix(i, 0), &tmp(0), ap::vlen(0,m-1)); } if( dc(i)==1 ) { ap::vmove(&cmatrix(i, 0), &tmpdiff(0), ap::vlen(0,m-1)); } cmatrix(i,m) = yc(i); } // // Solve constrained task // if( k>0 ) { // // solve using regularization // lsfitlinearwc(y2, w2, fmatrix, cmatrix, n+m, m, k, info, tmp, lrep); } else { // // no constraints, no regularization needed // lsfitlinearwc(y, w, fmatrix, cmatrix, n, m, 0, info, tmp, lrep); } if( info<0 ) { return; } // // Generate barycentric model and scale it // * BX, BY store barycentric model nodes // * FMatrix is reused (remember - it is at least MxM, what we need) // // Model intialization is done in O(M^2). In principle, it can be // done in O(M*log(M)), but before it we solved task with O(N*M^2) // complexity, so it is only a small amount of total time spent. // bx.setlength(m); by.setlength(m); bw.setlength(m); tmp2.setlength(m); s = 1; for(i = 0; i <= m-1; i++) { if( m!=1 ) { u = cos(ap::pi()*i/(m-1)); } else { u = 0; } v = 0; for(j = 0; j <= m-1; j++) { if( j==0 ) { tmp2(j) = 1; } else { if( j==1 ) { tmp2(j) = u; } else { tmp2(j) = 2*u*tmp2(j-1)-tmp2(j-2); } } v = v+tmp(j)*tmp2(j); } bx(i) = u; by(i) = v; bw(i) = s; if( i==0||i==m-1 ) { bw(i) = 0.5*bw(i); } s = -s; } barycentricbuildxyw(bx, by, bw, m, p); barycentriclintransx(p, 2/(xb-xa), -(xa+xb)/(xb-xa)); barycentriclintransy(p, sb-sa, sa); // // Scale absolute errors obtained from LSFitLinearW. // Relative error should be calculated separately // (because of shifting/scaling of the task) // rep.taskrcond = lrep.taskrcond; rep.rmserror = lrep.rmserror*(sb-sa); rep.avgerror = lrep.avgerror*(sb-sa); rep.maxerror = lrep.maxerror*(sb-sa); rep.avgrelerror = 0; relcnt = 0; for(i = 0; i <= n-1; i++) { if( ap::fp_neq(yoriginal(i),0) ) { rep.avgrelerror = rep.avgrelerror+fabs(barycentriccalc(p, xoriginal(i))-yoriginal(i))/fabs(yoriginal(i)); relcnt = relcnt+1; } } if( relcnt!=0 ) { rep.avgrelerror = rep.avgrelerror/relcnt; } }
static void ReadInMSoutputAndCalculateSummaryStatistics (FILE * pfin){ msOutputArray *msOutArr; msOutput *outPtr; struct SumStat **sumStatArr; struct SumStat **sumStatArrTemp; int initialArrSize; int msbayesFormat, taxonID, locusID; int maxsites = 1000; /* max number of seg sites, used for size of data mat */ int nsam, i, howmany, npops, *config; char **list, line[MAX_LNSZ], longline[262145], *mutscanline; char dum[100]; tPositionOfSegSites *posit; double theta; int segsites, count, numTaxonLocusPairs, BasePairs, taxonLocusID; int Fst_bool = 0, Qbool = 0; int isNumSegSitesConst = 0; /* 1 with -s, the number of segregating sites * will be constant in each sample * 0 with -t, varies between samples */ /* read in first line of output (command line options of msDQH) */ fgets (line, MAX_LNSZ, pfin); if (! (msOutArr = malloc(sizeof(msOutputArray)))) { perror ("ERROR: not enough memory in ReadInMSoutput\n"); exit(EXIT_FAILURE); } /* processing the header information */ if (strcmp(line, "# BEGIN MSBAYES\n") == 0) { /* process info from msbayes.pl */ fgets(line, MAX_LNSZ,pfin); /* get next line */ sscanf(line, "# numTaxonLocusPairs %d numTaxonPairs %d numLoci %d", &(msOutArr->numTaxonLocusPairs), &(msOutArr->numTaxonPairs), &(msOutArr->numLoci)); /* if taxon:locus matrix is required, we can process here */ fgets(line, MAX_LNSZ,pfin); /* get next line */ msbayesFormat = 1; initialArrSize = 500; } else { msOutArr->numTaxonPairs=msOutArr->numTaxonLocusPairs = 1; msOutArr->numLoci = 1; msbayesFormat = 0; initialArrSize = 1; } /* allocate memory to msOutArr */ if (! (msOutArr->dat = (msOutput *)calloc(initialArrSize, sizeof(msOutput)))) { perror ("ERROR: not enough memory 2 in ReadInMSoutput\n"); exit(EXIT_FAILURE); } msOutArr->numElements = 0; msOutArr->allocatedSize = initialArrSize; /* go through the array of each msDQH run and get sum stats */ if ((sumStatArr = calloc(initialArrSize, sizeof(struct SumStat *))) == NULL) { perror("ERROR: No mem in main "); exit(EXIT_FAILURE); } /* go through the array of each msDQH run and get sum stats */ if ((sumStatArrTemp = calloc(msOutArr->numTaxonLocusPairs, sizeof(struct SumStat *))) == NULL) { perror("ERROR: No mem in main "); exit(EXIT_FAILURE); } int sumStatCounter = 0; do { int endOfFile = 0; if (msbayesFormat) { while (strncmp(line, "# taxonID ", 10) != 0) { if (! fgets(line, MAX_LNSZ,pfin)) { /* basically skipping empty line */ endOfFile = 1; break; } } if(endOfFile) break; /* Before msDQH output, numerial IDs for taxon and locus are inserted */ sscanf(line, "# taxonID %d locusID %d\n", &taxonID, &locusID); fgets(line, MAX_LNSZ,pfin); } else { /* CHECK THIS WELL, NAOKI, SWRS */ while (BlankCharStringQ(line)) { if (! fgets(line,MAX_LNSZ,pfin)) { endOfFile=1; break; } } if (endOfFile) break; taxonID = locusID = 1; } /* * Get the following variables from the command line options * NOTE: this line has to match with system() line of msbayes.pl * * nsam: number of total samples * howmany: how many simulations were run * THETA: 4 Ne mu used for the simulation * This is removed, and getting this in more prper way * BasePairs: sequence length * taxonLocusID: sequential ID for each taxon:locus pair (1 to # of taxon:locus pairs) * numTaxonLocusPairs: total number of taxon:locus pairs per 1 set of sims. */ numTaxonLocusPairs = taxonLocusID = BasePairs = -1; sscanf (line, " %s %s %d %d %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %u %s %s %s %d %s %s %s %u ", dum, dum, &nsam, &howmany, dum, dum, dum, dum, dum, dum, dum, dum, dum, dum, dum, dum, dum, dum, dum, dum, dum, dum, dum, dum, dum, dum, dum, dum, dum, dum, dum, dum, dum, dum, dum, dum, dum, dum, dum, dum, dum, dum, dum, dum, dum, &BasePairs, dum, dum, dum, &taxonLocusID, dum, dum, dum, &numTaxonLocusPairs); if(!msbayesFormat) { msOutArr->numTaxonPairs=msOutArr->numTaxonLocusPairs = 1; taxonLocusID=numTaxonLocusPairs=1; } /* * of course, I have to put a prior generator in the actual sample * generator for theta and tau down below for each count */ /* Find the theta or number of segregating sites from -t or -s */ mutscanline = strstr (line, "-s"); if (mutscanline != NULL) { /* number of segregating sites is constant */ sscanf (mutscanline, " %d", &segsites); isNumSegSitesConst = 1; theta = thetaW (nsam, segsites); } else { mutscanline = strstr (line, "-t"); if (mutscanline != NULL) sscanf (mutscanline, "-t %s", dum); else { fprintf (stderr, "\nmutscanline problem -s or -t not found \n"); exit (1); } theta = atof (dum); /* -Q will tell transition transversion rate ratio and base freqs */ if ((mutscanline = strstr (line, "-Q")) != NULL) { Qbool = 1; } } mutscanline = strstr(line, "-r"); if(mutscanline != NULL) { sscanf(mutscanline, "-r %s %d", dum, &BasePairs); // dum contains recomb rate } /* * config become an array with npops elements, * it contains subpop sample sizes */ npops = FindNumPopsAndSubpopSampleSizes (line, &config); if (npops == 1) { config[0] = nsam; } /* Checking if 0 < config[i] < nsam for all i */ if ((npops > 1) && (multiplepopssampledfrom (nsam, npops, config))) Fst_bool = 1; /* prepare the storage for segregating sites data */ if (isNumSegSitesConst) maxsites = segsites; list = cmatrix (nsam, maxsites + 1); posit = (tPositionOfSegSites *) calloc (maxsites, sizeof (tPositionOfSegSites)); if (list == NULL) { fprintf (stderr, "No mem for segregating sites data, couldn't allocated memory for list\n"); exit (EXIT_FAILURE); } if (posit == NULL) { fprintf (stderr, "No mem for segregating sites data, couldn't allocated memory for posit\n"); exit (EXIT_FAILURE); } /* Start to process the data */ count = 0; while (howmany - count++) { /* The line after "//" is the beginning of simulation data */ while (strcmp (line, "//\n") != 0) fgets (line, MAX_LNSZ, pfin); /* Number of segregating sites line */ fgets (line, MAX_LNSZ, pfin); if (!isNumSegSitesConst) { sscanf (line, "segsites: %d\n", &segsites); /* JRO: this should be a while loop and double the size of the data * matrix until large enough; updating accordingly. */ /* if (segsites >= maxsites) /1* readjust the size of data matrix *1/ */ while (segsites >= maxsites) /* readjust the size of data matrix */ { /* maxsites = segsites + 10; /1* extra 10 elements *1/ */ maxsites *= 2; /* extra 10 elements */ posit = (tPositionOfSegSites *) realloc (posit, maxsites * sizeof (tPositionOfSegSites)); /*printf("PRE %d %d %d\n", segsites, maxsites, nsam); */ if (posit == NULL || biggerlist (nsam, maxsites, list) != 0) { fprintf (stderr, "Not enough memory for reallocating char matrix\n"); exit (EXIT_FAILURE); } } } /* get rid of base frequency line */ if (Qbool) { fgets (line, MAX_LNSZ, pfin); sscanf (line, "freqACGT: %s %s %s %s", dum, dum, dum, dum); } if (segsites > 0) { /* read in position of segregating sites */ fgets (longline, 262144, pfin); /* posit array initialized */ ReadInPositionOfSegSites (longline, posit, segsites); /* list[][] get initialized with character states */ for (i = 0; i < nsam; i++) fscanf (pfin, " %s", list[i]); } /* what do we do if segsites = 0?, Naoki */ /* insert the data into the array */ msOutArr->numElements ++; if (msOutArr->numElements > msOutArr->allocatedSize) { /* reallocate the memory */ msOutArr->allocatedSize += 1000; msOutArr->dat = realloc(msOutArr->dat, sizeof(msOutput) * (msOutArr->allocatedSize)); if(msOutArr->dat ==NULL) { perror("Realloc of msOutArr->dat failed\n"); exit(EXIT_FAILURE); } sumStatArr = realloc(sumStatArr, msOutArr->allocatedSize * sizeof(struct SumStat *)); if(sumStatArr ==NULL) { perror("Realloc of sumStatArr failed\n"); exit(EXIT_FAILURE); } } outPtr = & msOutArr->dat[msOutArr->numElements-1]; outPtr->nsam = nsam; outPtr->segsites = segsites; outPtr->seqDat = list; outPtr->nsub = gNadv; outPtr->npops = npops; outPtr->n = config; outPtr->theta = theta; outPtr->isNumSegSitesFixed = isNumSegSitesConst; outPtr->Qbool = Qbool; outPtr->Fst_bool = Fst_bool; outPtr->replicateID = count; outPtr->numReplicates = howmany; outPtr->taxonID = taxonID; outPtr->locusID = locusID; outPtr->taxonLocusID = taxonLocusID; outPtr->NumTaxonLocusPairs = numTaxonLocusPairs; outPtr->BasePairs = BasePairs; sumStatArr[sumStatCounter++] = CalcSumStats (outPtr); } freeCMatrix (nsam, list); free(posit); } while (fgets(line, MAX_LNSZ,pfin)); int k, j; for (k = 0; k < msOutArr->numElements; k++) { j = k % msOutArr->numTaxonLocusPairs; sumStatArrTemp[j] = sumStatArr[k]; if (j == msOutArr->numTaxonLocusPairs - 1) { // we got sumStats for 1 set PrintSumStatsArray(sumStatArrTemp, msOutArr->numTaxonLocusPairs, msOutArr->numLoci, msOutArr->numTaxonPairs); } } free(msOutArr->dat); free(msOutArr); free(sumStatArr); }
void read_input(char input_file[50]){ int pop, ind, locus; int ret; FILE* fp; int trash; fp = fopen(input_file,"r"); if( fp ) printf( "\nOpening input file %s \n", input_file ); else{ printf( "Cannot open file %s \n", input_file ); } fprintf(flog,"\nInput file contains:\n"); fscanf( fp, "%d %d\n", &number_of_pop, &number_of_loci ); if (number_of_pop==1) printf("\nFile contains 1 population\n"); else printf("\nFile contains %d populations\n",number_of_pop); if (number_of_loci==1) printf("\nIndividuals are typed for 1 locus\n\n"); else printf("\nIndividuals are typed for %d loci\n\n",number_of_loci); fprintf(flog,"%d populations\nindividuals typed for %d loci\n",number_of_pop,number_of_loci); fflush(flog); pop_size=ivector(0,number_of_pop); num_of_pairs=ivector(0,number_of_pop); pop_name=cmatrix(0,number_of_pop,0,50); p=dvector(0,number_of_loci); mut=dvector(0,number_of_loci); mut95=dmatrix(0,number_of_loci,0,2); total_sample_size=0; total_num_of_pairs=0; max_num_of_pairs=0; for (pop=0 ; pop<number_of_pop ; pop++){ //printf("pop %d\n",pop); fscanf( fp, "%d %s \n", &pop_size[pop], pop_name[pop]); num_of_pairs[pop] = pop_size[pop] * ( pop_size[pop]-1 ) / 2; printf("Population %d is called \"%s\" and has sample size of %d individuals\n", pop+1, pop_name[pop], pop_size[pop]); printf("(this makes %d pairwise haplotypes comparisons)\n\n", num_of_pairs[pop]); fprintf(flog, "Population \"%s\" has %d individuals (%d pairwise comparisons)\n", pop_name[pop],pop_size[pop],num_of_pairs[pop]); fflush(flog); total_sample_size += pop_size[pop]; total_num_of_pairs += num_of_pairs[pop]; if(num_of_pairs[pop]>max_num_of_pairs){ max_num_of_pairs=num_of_pairs[pop]; } for (ind=0 ; ind<pop_size[pop] ; ind++){ for (locus=0 ; locus<number_of_loci ; locus++) fscanf(fp,"%d",&trash); fscanf(fp,"\n"); } } fprintf(flog, "\n"); printf("max_num_of_pairs=%d\n",max_num_of_pairs); if(max_num_of_pairs>100000){ max_num_of_pairs=100000; printf("max_num_of_pairs=%d\n",max_num_of_pairs); } printf("\nTotal sample size is %d individuals\n\n", total_sample_size); ret = fclose(fp); if( ret==0 ) printf( "File %s closed\n", input_file ); else{ printf( "\nCannot close file %s \n", input_file ); } }
// ----------- ReadTraits ------------- traits ReadTraits(char traitfile[50]) { struct traits C; FILE *Cread; char line[MAXTRAITLINE]; int i, j, waitingforspace; int nline, words; int extra = 0; char word[(int) (MAXTRAITLINE / 2)][(int) (MAXTRAITLINE / 2)]; char tmp[MAXTAXONLENGTH + 6]; int lineending; C.ntaxa = 0; C.ntraits = 0; // pre-pre-read lineending = whatnewline(traitfile); // preread: if ((Cread = fopen(traitfile, "r")) == NULL) { printf("Cannot open traits file\n"); exit(0); } nline = 0; words= 0; while (myfgets(line, 1000, Cread, lineending) != NULL) { // parse the first line // should be able to do this with iterative scanf, but I couldn't // work it out - this is clunky! if (nline < 2) { waitingforspace = 1; strcpy(word[0], ""); words= 0; for (i = 0; i < strlen(line); i++) { if (isspace(line[i]) && (waitingforspace)) { words++; strcpy(word[words], ""); waitingforspace = 0; } else if (isspace(line[i])) // 2nd+ space in a series of white { } else { strncat(word[words] , &line[i], 1); waitingforspace = 1; } } if(nline==1) { // to see if there is a names line if (strcmp(word[0], "name") ==0) extra = 1; } } nline++; } fclose(Cread); C.ntraits = words-1; C.type = ivector(0, C.ntraits-1); C.ntaxa = nline -1 -extra; C.taxon = cmatrix(0 , C.ntaxa-1, 0 , MAXTAXONLENGTH); C.tr = matrix(0, C.ntaxa-1, 0, C.ntraits-1); C.trname = cmatrix(0, C.ntraits-1, 0, MAXTAXONLENGTH); strcpy(C.trname[0], "nonames"); // printf("traits: %d\ttaxa: %d\n", C.ntraits, C.ntaxa); // real read Cread = fopen(traitfile, "r"); nline = 0; while (myfgets(line, 1000, Cread, lineending) != NULL) { words= 0; strcpy(word[0], ""); waitingforspace = 1; // parse into words, every line for (i = 0; i < strlen(line); i++) { if (isspace(line[i]) && (waitingforspace)) { words++; strcpy(word[words], ""); waitingforspace = 0; } else if (isspace(line[i])) { } else { strncat(word[words] , &line[i], 1); waitingforspace = 1; } } if(nline==0) { // now we know how many traits for (j = 1; j< words; j++) { C.type[j-1] = atoi(word[j]); } } else // lines 2+ { // check (again) for trait names if ((nline == 1) && (extra == 1)) { for (j = 1; j < words; j++) { strcpy(C.trname[j-1], word[j]); } } else // either line 3+ or line 2 with no trait names { // assign taxon name strcpy(C.taxon[(nline-1)-extra], word[0]); for (j = 1; j < words; j++) { C.tr[nline-1-extra][j-1] = (float) atof(word[j]); // printf("%s\t%d\t%f\n", C.taxon[nline-1], j-1, (float) atof(word[j])); } } } nline++; } fclose(Cread); // assign taxon names if there are none if (extra != 1) // if no trait names given { for (j = 0; j < C.ntraits; j++) { sprintf(tmp, "trait_%d", j+1); strcpy(C.trname[j], tmp); } } for (i = 0; i < C.ntraits; i++) { if (C.type[i] == 1) { printf("Char type = 1; multistate characters not implemented yet\n"); exit(EXIT_FAILURE); } } return C; }
// ---------------- ReadSample --------------------------- sample ReadSample(char samplefile[50]) { struct sample InSample; char line[500]; int abundI; char preAbund[50]; int i, j, z, found; char plotname[MAXPLOTLENGTH+1]; char lastplot[MAXPLOTLENGTH+1]; char taxaI[MAXTAXONLENGTH+1]; int lineending; strcpy(lastplot, ""); i = 0; InSample.nrec = 0; InSample.nsamples = 0; InSample.maxrec = 0; InSample.ntaxa = 0; InSample.totabund = 0; // pre-pre-read lineending = whatnewline(samplefile); // preread: // READ PROPERLY if ((Ft = fopen(samplefile, "r")) == NULL) { printf("Cannot open sample file\n"); exit(0); } while (myfgets(line, 500, Ft, lineending) != NULL) // OK to lv length undynamic { sscanf(line, "%s %s %s", plotname, preAbund, taxaI); // string // Test for non-integer abundances: z = 0; while (preAbund[z] != '\0') { // printf("%d %d\n", z, preAbund[z]); if (!isdigit(preAbund[z])) { fprintf(stderr,"Error: sample abundances must be integers\n"); exit(1); } z++; } InSample.nrec++; // if a new plot: if (strcmp(lastplot, plotname) != 0) { InSample.nsamples ++; i = 0; strcpy(lastplot, plotname); } i++; if (InSample.maxrec < i) InSample.maxrec = i; } fclose(Ft); //printf("nrec = %d; nsamples = %d; maxrec = %d\n", InSample.nrec, InSample.nsamples, InSample.maxrec); // Dimension structure InSample.srec = ivector(0, InSample.nsamples - 1); InSample.irec = ivector(0, InSample.nsamples - 1); InSample.id = imatrix(0, InSample.nsamples - 1, 0, InSample.maxrec); InSample.abund = imatrix(0, InSample.nsamples - 1, 0, InSample.maxrec); InSample.pabund = matrix(0, InSample.nsamples - 1, 0, InSample.maxrec); InSample.taxa = cmatrix(0, InSample.nrec-1, 0, MAXTAXONLENGTH); InSample.pname = cmatrix(0, InSample.nsamples - 1, 0, MAXPLOTLENGTH); // Read file for dynamic structure strcpy(lastplot, ""); i = 0; InSample.nsamples = 0; // recycling as counter Ft = fopen(SampleFile, "r"); while (myfgets(line, 500, Ft, lineending) != NULL) { sscanf(line, "%s %d %s", plotname, &abundI, taxaI); // do this on the first line of each new plot if (strcmp(lastplot, plotname) != 0) { strcpy(InSample.pname[InSample.nsamples], plotname); InSample.nsamples ++; i = 0; strcpy(lastplot, plotname); InSample.srec[InSample.nsamples-1]=0; } // recs per sample counter InSample.srec[InSample.nsamples-1]++; // set abundance InSample.abund[InSample.nsamples-1][i] = abundI; // set taxon // all other cases than first found = 0; for (j = 0; j < InSample.ntaxa; j++) { if (strcmp(InSample.taxa[j], taxaI) == 0) { InSample.id[InSample.nsamples-1][i] = j; found = 1; break; } } // new taxon found - executed on first line if(found == 0) { strcpy(InSample.taxa[InSample.ntaxa], taxaI); InSample.id[InSample.nsamples-1][i] = InSample.ntaxa; InSample.ntaxa++; } i++; } fclose(Ft); //Calculate sample and species total abundances and frequency InSample.sppabund = lvector(0, InSample.ntaxa - 1); InSample.psppabund = vector(0, InSample.ntaxa -1 ); InSample.sppfreq = lvector(0, InSample.ntaxa - 1); InSample.psppfreq = vector(0, InSample.ntaxa -1 ); // clear by sample, insample for (i = 0; i < InSample.nsamples; i++) { InSample.irec[i] = 0; for (j = 0; j < InSample.srec[i];j++) { InSample.irec[i] += InSample.abund[i][j]; InSample.sppabund[InSample.id[i][j]] = 0; } } // clear by taxonNo in all samples for (i = 0; i < InSample.ntaxa; i++) { InSample.sppfreq[i] = 0; } // calculate for (i = 0; i < InSample.nsamples; i++) { for (j = 0; j < InSample.srec[i];j++) { InSample.pabund[i][j] = (float) InSample.abund[i][j] / (float) InSample.irec[i]; InSample.sppabund[InSample.id[i][j]] += InSample.abund[i][j]; InSample.totabund += InSample.abund[i][j]; InSample.sppfreq[InSample.id[i][j]]++; // printf("%d %d %d %s %d\n", i, j, InSample.id[i][j], InSample.taxa[InSample.id[i][j]], (int) InSample.sppabund[InSample.id[i][j]]); } } for (i = 0; i < InSample.ntaxa; i++) { InSample.psppabund[i] = (float) InSample.sppabund[i] / (float) InSample.totabund; InSample.psppfreq[i] = (float) InSample.sppfreq[i] / (float) InSample.nsamples; } return InSample; }
// -------------- WriteNexus ------------------ void WriteNexus(phylo P[], int ntree, sample S, int nsamp, traits T, int ntrf) { // Mesquite style! time_t rawtime; int i, j, q, k, x, pass, present; int makedisc, makecont; float abnd; int nterm = 0; phylo WN[ntree]; char tmp[MAXTAXONLENGTH+10]; for (i = 0; i < ntree; i++) { WN[i] = P[i]; // inefficient to make copy so much, but need to // to create third dimension of taxon array // reassign the pointer to a new space - free this! WN[i].taxon = cmatrix(0, P[0].nnodes-1, 0, MAXTAXONLENGTH+10); } // determine number of terminal taxa - assume all trees contain same taxa for (i = 0; i < P[0].nnodes; i++) { if (P[0].noat[i] == 0) nterm++; } time ( &rawtime ); strncpy(tmp , ctime(&rawtime), 24); printf("#NEXUS\n[output from phylocom, written %s]\n\n", tmp ); printf("BEGIN TAXA;\n"); if (TreeView == 0) printf("TITLE Phylocom_Phylogeny_Taxa;\n"); // Needed for correct Mesquite grammar, but V1.1 busted! Will not read interior names correctly. printf("\tDIMENSIONS NTAX=%d;\n\tTAXLABELS\n\t", nterm); for (i = 0; i < P[0].nnodes; i++) { if (P[0].noat[i] == 0) printf(" %s", P[0].taxon[i]); } printf(";\nEND;\n\n"); if (nsamp > 0) { // Characters printf("BEGIN CHARACTERS;\n\tTITLE Phylocom_Presence_in_Sample;\n\tDIMENSIONS NCHAR=%d;\n\tFORMAT DATATYPE = STANDARD GAP = - MISSING = ? SYMBOLS = \" 0 1\";\n", S.nsamples); printf("\tCHARSTATELABELS\n\t\t"); printf("%d %s", 1, S.pname[0]); for (i = 1; i < S.nsamples; i++) { printf(", %d %s", i+1, S.pname[i]); } printf(";\n\tMATRIX\n"); for (i = 0; i < P[0].nnodes; i++) { if (P[0].noat[i] == 0) { printf("\t%s\t" , P[0].taxon[i]); for (j = 0; j < S.nsamples; j++) { present = 0; for (k = 0; k < S.srec[j]; k++) { if (strcmp(S.taxa[S.id[j][k]], P[0].taxon[i]) == 0) present = 1; } printf("%d", present); } printf("\n"); } } printf(";\nEND;\n\n"); // Abundances as continuous printf("BEGIN CHARACTERS;\n\tTITLE Phylocom_Abundance_in_Sample;\n\tDIMENSIONS NCHAR=%d;\n\tFORMAT DATATYPE = CONTINUOUS GAP = - MISSING = ?;\n", S.nsamples); printf("\tCHARSTATELABELS\n\t\t"); printf("%d %s", 1, S.pname[0]); for (i = 1; i < S.nsamples; i++) { printf(", %d %s", i+1, S.pname[i]); } printf(";\n\tMATRIX\n"); for (i = 0; i < P[0].nnodes; i++) { if (P[0].noat[i] == 0) { printf("\t%s\t" , P[0].taxon[i]); for (j = 0; j < S.nsamples; j++) { abnd = 0.0; for (k = 0; k < S.srec[j]; k++) { if (strcmp(S.taxa[S.id[j][k]], P[0].taxon[i]) == 0) abnd = (float) S.abund[j][k]; } printf(" %f", abnd); } printf("\n"); } } printf(";\nEND;\n\n"); } if (ntrf > 0) { makedisc = 0; makecont = 0; pass = 0; for (i = 0; i < T.ntraits; i++) { if ((T.type[i] == 0) || (T.type[i] == 1) || (T.type[i] == 2)) makedisc++; if (T.type[i] == 3) makecont++; } if (makedisc > 0) { // Discrete Traits printf("BEGIN CHARACTERS;\n\tTITLE Phylocom_Discrete_Traits;\n\tDIMENSIONS NCHAR=%d;\n\tFORMAT DATATYPE = STANDARD GAP = - MISSING = ? SYMBOLS = \" 0 1 2 3 4 5 6 7 8 9\";\n", makedisc); x = 1; printf("\tCHARSTATELABELS\n\t\t"); // first one for (i = 0; i < T.ntraits; i++) { if ((T.type[i] == 0) || (T.type[i] == 1) || (T.type[i] == 2)) { printf("%d %s", x, T.trname[i]); x++; pass = i; break; } } for (i = pass+1; i < T.ntraits; i++) { if ((T.type[i] == 0) || (T.type[i] == 1) || (T.type[i] == 2)) { printf(", %d %s", x, T.trname[i]); x++; } } printf(";\n\tMATRIX\n"); for (i = 0; i < T.ntaxa; i++) { printf("\t%s\t" , T.taxon[i]); for (j = 0; j < T.ntraits; j++) { if ((T.type[j] == 0) || (T.type[j] == 1) || (T.type[j] == 2)) { printf("%d", (int) T.tr[i][j]); } } printf("\n"); } printf(";\nEND;\n\n"); } if (makecont > 0) { // Continous Traits printf("BEGIN CHARACTERS;\n\tTITLE Phylocom_Continuous_Traits;\n\tDIMENSIONS NCHAR=%d;\n\tFORMAT DATATYPE = CONTINUOUS GAP = - MISSING = ?;\n", makecont); x=1; printf("\tCHARSTATELABELS\n\t\t"); // first one for (i = 0; i < T.ntraits; i++) { if (T.type[i] == 3) { printf("%d %s", x, T.trname[i]); x++; pass = i; break; } } for (i = pass+1; i < T.ntraits; i++) { if (T.type[i] == 3) { printf(", %d %s", x, T.trname[i]); x++; } } printf(";\n\tMATRIX\n"); for (i = 0; i < T.ntaxa; i++) { printf("\t%s\t" , T.taxon[i]); for (j = 0; j < T.ntraits; j++) { if (T.type[j] == 3) { printf(" %f", T.tr[i][j]); } } printf("\n"); } printf(";\nEND;\n\n"); } } printf("BEGIN TREES;\n"); if (TreeView == 0) printf("\tTITLE Phylocom_Phylogenies;\n\tLINK Taxa = Phylocom_Phylogeny_Taxa;\n"); // Ditto! printf("\tTRANSLATE\n\t"); for (q = 0; q < ntree; q++) { j = 0; for (i = 0; i < P[0].nnodes; i++) { if (P[0].noat[i] == 0) { j++; if (q == 0) { if (i == P[0].nnodes-1) printf(" %d %s;\n", j, P[0].taxon[i]); else printf(" %d %s,", j, P[0].taxon[i]); } sprintf(tmp, "%d", j); strcpy(WN[q].taxon[i], tmp); } else if ((strcmp(P[q].taxon[i], "") != 0) && \ (strcmp(P[q].taxon[i], ".") != 0)) { strcpy(WN[q].taxon[i], "'"); strcat(WN[q].taxon[i], P[q].taxon[i]); strcat(WN[q].taxon[i], "'"); } else strcpy(WN[q].taxon[i], ""); // test if (strcmp(WN[q].notes[i], "") != 0) printf("%s\n", WN[q].notes[i]); } } for (q = 0; q < ntree; q++) { printf("\tTREE %s = ", WN[q].phyname); Fy2newRec(WN[q]); free_cmatrix(WN[q].taxon, 0, P[0].nnodes-1, 0, MAXTAXONLENGTH+10); } printf("END;\n"); printf("\nBEGIN PHYLOCOM;\n\tTITLE Phylocom_Main;\n\tDATA\n"); for (i = 0; i < S.nsamples; i++) { for (j = 0; j < S.srec[i]; j++) { printf("%s\t%d\t%s\n", S.pname[i], S.abund[i][j], S.taxa[S.id[i][j]]); } } printf(";\nEND;\n"); //free_cmatrix(WN.taxon, 0, P.nnodes-1, 0, MAXTAXONLENGTH+10); }
main (int argc, char *argv[]) { int i, j, **seqs, **nall, ord=1, ns, **pij, lkf=0, npt=0, pnew=0, anc=0; int tcat=1, rcat=0, verb=1, miss=0, *flocs; int sw_flag=0, moment_flag=0, rmin_flag=0, sim_flag=0, test_flag=0; char fname[MAXNAME+1], **seqnames; long seed=-setseed(); extern int sizeofpset; double *locs; double **lkmat, *lkres; FILE *ifp=NULL, *ifp2=NULL, *ifp3=NULL, *tfp; struct site_type **pset; struct data_sum *data; int ask_questions = 1; char *in_str; print_help(argc, argv); idum = &seed; data = malloc((size_t) sizeof(struct data_sum)); data->exact = 0; strcpy(data->prefix, ""); for(i = 0; i < argc; i++) { if(*argv[i] == '-') { in_str = argv[i]; ask_questions = 0; if(strcmp(in_str, "-seq") == 0) ifp = fopen(argv[i+1], "r"); if(strcmp(in_str, "-loc") == 0) ifp2 = fopen(argv[i+1], "r"); if(strcmp(in_str, "-lk") == 0) { lkf = 1; ifp3 = fopen(argv[i+1], "r"); } if(strcmp(in_str, "-exact") == 0) data->exact = 1; if(strcmp(in_str, "-concise") == 0) verb=0; if(strcmp(in_str, "-window") == 0) sw_flag=1; if(strcmp(in_str, "-moment") == 0) moment_flag=1; if(strcmp(in_str, "-simulate") == 0) sim_flag=1; if(strcmp(in_str, "-rmin_flag") == 0) rmin_flag=2; if(strcmp(in_str, "-test") == 0) test_flag=1; if(strcmp(in_str, "-prefix") == 0) strcpy(data->prefix, argv[i+1]); } } if (ifp == NULL) { printf("\nCould not find seqs file in command line.\n"); printf("\nInput filename for seqs:\n"); scanf("%s", &fname); ifp = fopen(fname, "r"); } if (ifp == NULL) nrerror("Error in opening sequence file"); fscanf(ifp,"%i%i%i", &data->nseq, &data->lseq, &data->hd); if ((data->nseq < 2) || (data->lseq < 2)) {printf("\n\nInsufficient data for analysis (n > 1, L > 1) \n\n"); exit(1);} if (data->nseq > SEQ_MAX) {printf("\n\nMore than max no. sequences: Using first %i for analysis\n\n", SEQ_MAX); data->nseq=SEQ_MAX;} printf("\nAnalysing %i (n=%i) sequences of length %i seg sites\n", data->nseq, data->hd, data->lseq); seqs = imatrix(1, data->nseq, 1, data->lseq); seqnames = cmatrix(1, data->nseq+11, 1, MAXNAME+11); if (read_fasta(seqs, ifp, data->nseq, data->lseq, seqnames)) printf("\nSequences read succesfully\n"); fclose(ifp); nall = imatrix(1, data->lseq, 1, 6); allele_count(seqs, data->nseq, data->lseq, nall,1, data->hd, data->prefix); /*Store lnfac values in array for speed of computation*/ lnfac_array = (double *) malloc((size_t) ((int) (data->nseq+2)*(data->hd))*sizeof(double)); lnfac_array[0]=lnfac_array[1]=0; for (j=2;j<=((int) data->nseq*(data->hd));j++) lnfac_array[j]=(double) lnfac_array[j-1]+log(j); /*Open file with location of seg sites and read in data*/ if (ifp2 == NULL) { printf("\nCould not find locs file in command line.\n"); printf("\nInput name of file containing location of seg sites\n\n"); scanf("%s", &fname); ifp2 = fopen(fname, "r"); } if (ifp2 == NULL) nrerror("Cannot open loc file"); fscanf(ifp2, "%i %lf %c", &ns, &data->tlseq, &data->lc); if (ns != data->lseq) nrerror("Lseq and Locs disagree"); if ((data->lc != 'C')&&(data->lc != 'L')) nrerror("Must input linear(L)/conversion(C)"); if (data->lc == 'C') { data->avc=0; while (data->avc <= 0) { printf("\n\nInput average tract length for conversion model: ");scanf("%lf", &(data->avc)); } } locs = dvector(1, data->lseq); flocs = ivector(1, data->lseq); /*Array to use when simulating data*/ for (i=1; i<=data->lseq; i++) { fscanf(ifp2, "%lf", &locs[i]); if ((locs[i]==0)||(locs[i]>data->tlseq)) {printf("\n\nError in Loc file\n\n%lf\n", data->tlseq); exit(1);} if (i>1 && locs[i]<=locs[i-1]) nrerror("Error in locs file: SNPs must be montonically increasing"); } printf("\nLocation of seg sites\n\n"); for (i=1; i<=data->lseq; i++) printf("%3i %4.2lf\n", i, locs[i]); fclose(ifp2); /*Read in likelihood file where needed*/ if (ask_questions) { printf("\n\nUse existing likelihood file? (yes=1, no=0):"); scanf("%i", &lkf); /*lkf is a flag: 1 means use existing likelihood file as starting point*/ if (lkf) { printf("\n\nInput name of likelihood file: "); scanf("%s", &fname); ifp3 = fopen(fname, "r"); } else data->exact=0; if (lkf == 1) { printf("\n\nIs likelihood file an exact match to data?(no=0/yes=1): "); scanf("%i", &data->exact); } } if (lkf && !ifp3) nrerror("Cannot open likelihood file"); if (!lkf && data->hd==2) nrerror("For diploid data need complete lookup table for sequences"); /*Store pair-types in pij matrix - classify in pair_spectrum routine*/ data->w = data->lseq; /*Note for this program use all data - pair_int restricts to a smaller window*/ pij = imatrix((int) 1,(int) data->lseq,(int) 1,(int) data->w); for (i=1;i<=data->lseq;i++) for (j=1;j<=data->w;j++) pij[i][j]=0; pset = init_pset(pset, lkf, ifp3, &npt, data); /*Reads in type configurations from likelihood file*/ printf("\n\n*** Calculating distribution of pair types ***\n\n"); pset = pair_spectrum(seqs, data, nall, pset, &npt, &pnew, &miss, anc, pij); printf("\n\n *** Completed classification of pair types ***\n\n"); if (data->exact && (pnew || miss)) nrerror("Lookup table is not exact for sequences\n(possibly generated by interval)"); printf("\n\nOld = %i: New = %i: Missing = %i\n\n", npt,pnew,miss); data->ptt = (int) npt+pnew+miss; /*npt is number from likelihood file, pnew is number new with no missing data, miss is # new with missing data*/ if (verb) { strcpy(fname, data->prefix); tfp = fopen(strcat(fname, "type_table.txt"), "w"); if (!tfp) nrerror("Cannot open type file"); type_print(pij, data->lseq, data->w,tfp); fclose(tfp); } if (verb) print_pairs(stdout, pset, npt+pnew, data->hd, data->nseq); /*Need a complete set for missing data or diploid data - check this*/ if (!data->exact && (data->hd ==2 || miss)) { printf("\n\nMissing data or diploid: checking that likelihood table is exhaustive\n\n"); check_exhaustive(pset,npt,(data->nseq)*((int) data->hd)); } /*Read parameters and likelihoods from likelihood file - where appropriate*/ if (lkf) { read_pars(ifp3, &tcat, &data->th, &data->rcat, &data->rmax); lkmat = dmatrix(1,npt+pnew+miss,1,data->rcat); if (lkf) read_lk(ifp3, lkmat, npt, tcat, data->rcat); } /*If haploid, but novel types, need to calculate new likelihoods and input parameter values*/ if (data->hd ==1 && pnew) { /*Note can have pnew for diploid data, but this has been checked for already*/ if (!lkf) { data->th=data->rmax=-1.0; data->rcat=0; printf("\n\nInput theta per site (suggest Watterson estimate of %.5lf):",(double) data->lseq/(watterson(data->nseq*data->hd)*data->tlseq)); while (data->th<0.0) scanf("%lf", &data->th); printf("\n\nMax 4Ner for grid (suggest 100):"); while(data->rmax<0.0) scanf("%lf", &data->rmax); printf("\n\nNumber of points on grid (suggest 101, min=2):"); while(data->rcat<2) scanf("%i", &data->rcat); lkmat = dmatrix(1,npt+pnew+miss,1,data->rcat); } lk_est(pset,npt,pnew,lkmat,data->th,data->rcat,data->rmax); data->exact=1; } /*Sum over missing data or resolve genotypes and sum over missing data+configurations*/ else if (miss && data->hd==1) { printf("\n\n*** Calculating likelihoods for missing data ***\n\n"); for (i=1;i<=miss;i++) { lk_miss(pset[npt+i],lkmat[npt+i],lkmat,data); printf("\rType %i", i); } printf(" ...Done!\n\n"); } /*Sum over resolutions for diploid data*/ else if (data->hd==2 && !data->exact) { printf("\n\n*** Resolving diploid data: %i ***\n\n",pnew+miss); lkres = dvector(1,data->rcat); for (i=1;i<=pnew+miss;i++) { lk_resolve(lkres,pset[npt+i],lkmat[npt+i],lkmat,data); printf("\rType %i", i); } free_dvector(lkres,1,data->rcat); printf(" ...Done!\n\n"); } /*If new likelihood generated can output likelihood file for future analyses*/ if (verb) print_lks(pset, data, npt+pnew+miss, lkmat); /*Basic analysis - estimation of 4Ner asuming constant rate*/ data->rme=data->rmax; data->rce=data->rcat; if (1) { printf("\n\nDo you wish to change grid over which to estimate likelihoods for (default = %i points, 4Ner 0 - %.1lf) (1/0) :",data->rcat,data->rmax); scanf("%i", &lkf); if (lkf) { data->rme=-10; data->rce=0; printf("\n\nMax 4Ner for estimation : "); while (data->rme < 0.0) scanf("%lf", &data->rme); printf("\n\nNumber of classes to estimate for: "); while (data->rce < 1) scanf("%i", &data->rce); } } data->lksurf = dmatrix(1,data->rce,1,2); lk_surf(pset, pij, data, lkmat, data->th, locs, 1); /*Print marginal likelihood ratio test statistics for each pair of sites*/ printf("\n\nCalculating fits\n\n"); fit_pwlk(data,pij,locs,lkmat,verb); /*Sliding windows version*/ if (1) { printf("\n\nDo you wish to carry out a sliding windows analysis? (yes=1/no=0):"); scanf("%i", &sw_flag); } if (sw_flag) lk_win(pset,pij,data,lkmat,locs,nall); /*Nonparametric estimation of recombination rate*/ if (1) { printf("\n\nPrint out table of Rmin values?\n(0=No, 1=Total only, 2=Full table):"); scanf("%i", &rmin_flag); } if (rmin_flag) { rmin(data, pset, pij, locs, lkf-1); printf("\n\nLower bound on Rmin = %i\n\n",data->rmin); } /*Estimate 4Ner by Wakeley 1997 method*/ if (1) { printf("\n\nEstimate 4Ner by moment method? (yes=1, no=0)"); scanf("%i", &moment_flag); } if (moment_flag) wakeley_est(data, seqs, locs); /*Recombination tests - only available for haploid data!*/ if (data->hd==1) { if (1) { printf("\n\nDo you wish to test for recombination? (yes=1, no=0): "); scanf("%i", &test_flag); } if (test_flag) { rec_test(data, pij, locs, lkmat, pset, npt+pnew+miss); } } /*Conditional simulation - only available for haploid data with a complete lk file*/ if (data->hd==1 && !(data->exact)) { if (1) { printf("\n\nDo you wish to test constant-rate model and estimate sampling distribution by simulation? (yes=1/no=0): "); scanf("%i", &test_flag); } if (test_flag) { freq_min(locs, flocs, nall, data); printf("\n\nHow many simulations? "); scanf("%i", &lkf); snp_sim(locs, flocs, pset, lkmat, lkf, data); } } free_imatrix(pij,1,data->lseq,1,data->w); free_imatrix(seqs,1,data->nseq,1,data->lseq); free_imatrix(nall,1,data->lseq,1,5); for (i=1;i<sizeofpset;i++) free(pset[i]); free(pset); free(data); free_dvector(locs, 1, data->lseq); free_ivector(flocs, 1, data->lseq); /* system("PAUSE"); */ }
void NodeSig(phylo P, sample S, int outmethod, int abundWeighted) { // Currently need to use taxon name of interior as a marker, // because Mesquite will not show all labels, but if it will soon, // best to use notes, and not muck around with taxon name //TODO modifying to use abundances, need longs instead of ints for counters int plot, node, taxon, i, ordHI, ordLO, run; int tipsReal_n[P.nnodes]; float test_r[RUNS]; char mark[2]; char tmp[15]; int **tips_rn; // was: int tips_rn[RUNS][MaxNode+1]; phylo Out[S.nsamples]; int *attach; attach = ivector(0, S.ntaxa-1); tips_rn = imatrix(0, RUNS-1, 0, P.nnodes-1); for (i = 0; i < S.nsamples; i++) { Out[i] = P; // all the pointers in Out are the same as those in Intree // careful not to change any preexisting arrays in Out, or they will // also be changed in Intree! if (TreeView == 0) Out[i].arenotes = 1; if (TreeView == 1) Out[i].arenotes = 0; // dimension a new array for Out names: if (TreeView == 0) Out[i].notes = cmatrix(0, P.nnodes-1, 0, MAXNOTELENGTH+10); if (TreeView == 1) Out[i].taxon = cmatrix(0, P.nnodes-1, 0, MAXTAXONLENGTH+10); } // for each plot if (outmethod == 1) printf("plot\tnode\tnode_name \tntaxa\tmedian\trank\tsig\n"); for (plot = 0; plot < S.nsamples; plot++) { if (S.srec[plot] > 2) { for (node = 0; node < P.nnodes; node++) { tipsReal_n[node] = 0; for (run = 0; run < RUNS; run++) tips_rn[run][node] = 0; } // need to reset it AttachSampleToPhylo(S, P, attach); // follow up from tips, adding 1 to each node passed through for (taxon = 0; taxon < S.srec[plot]; taxon++) { i = P.t2n[ attach[ S.id[plot][taxon] ] ]; while (i != -1) { if (abundWeighted) tipsReal_n[i] += S.abund[plot][taxon]; else tipsReal_n[i]++; i = P.up[i]; } } // ooo this is slow, putting rnd inside the plot loop! for (run = 0; run < RUNS; run++) { // now randomize the plot // RandomizeB(plot); PhylogenyAttachShuffle(P, S, attach); for (taxon = 0; taxon < S.srec[plot]; taxon++) { i = P.t2n[ attach[ S.id[plot][taxon] ] ]; while (i != -1) { if (abundWeighted) tips_rn[run][i] += S.abund[plot][taxon]; else tips_rn[run][i]++; i = P.up[i]; } } } // now unpeel the nodes for (node = 0; node < P.nnodes; node++) { if (TreeView == 0) strcpy(Out[plot].notes[node], ""); if (TreeView == 1) strcpy(Out[plot].taxon[node], ""); // interior nodes only if (P.noat[node] != 0) { ordHI = 0; ordLO = 0; for (run = 0; run < RUNS; run++) { if (tips_rn[run][node] < tipsReal_n[node]) ordHI++; if (tips_rn[run][node] > tipsReal_n[node]) ordLO++; } strcpy(mark, " "); if (ordLO >= (int) ((float) RUNS * 0.975)) { strcpy(mark, "-"); if (TreeView == 0) strcat(Out[plot].notes[node], "SIGLESS"); if (TreeView == 1) { sprintf(tmp, "LESS_%d_", node); strcpy(Out[plot].taxon[node], tmp); } if (outmethod == 1) printf("%d\t%d\t%-10s\t%d\t%d\t%d\t%d\t%s\n", plot +1, node, P.taxon[node], tipsReal_n[node], (int) test_r[(int) ((float) RUNS * 0.5)], ordHI, ordLO, mark); } else if (ordHI >= (int) ((float) RUNS * 0.975)) { strcpy(mark, "+"); if (TreeView == 0) strcat(Out[plot].notes[node], "SIGMORE"); if (TreeView == 1) { sprintf(tmp, "MORE_%d_", node); strcpy(Out[plot].taxon[node], tmp); } if (outmethod == 1) printf("%d\t%d\t%-20s\t%d\t%d\t%d\t%d\t%s\n", plot +1, node, P.taxon[node], tipsReal_n[node], (int) test_r[(int) ((float) RUNS * 0.5)], ordHI, ordLO, mark); } // printf("%s\t%s\n", Out.taxon[node], Intree.taxon[node]); else if (outmethod == 1) printf("%d\t%d\t%-20s\t%d\t%d\t%d\t%d\t%s\n", plot+1, node, P.taxon[node], tipsReal_n[node], (int) test_r[(int) ((float) RUNS * 0.5)], ordHI, ordLO, mark); } if ((TreeView == 1) && (strcmp(P.taxon[node], ".") != 0)) { strcat(Out[plot].taxon[node], P.taxon[node]); } } } // Name tree strcpy(Out[plot].phyname, "NodeSig_"); strcat(Out[plot].phyname, S.pname[plot]); } if (outmethod ==0) WriteNexus(Out, S.nsamples, ReadSample(SampleFile), 1, ReadTraits(TraitFile), 1); }
/* Init function for uniform table. phase = phase name to tag this table with pf = input parameter file object to be parsed. The following keys are required to be found in pf: int: nx, nz scalar double: dx, dz &Tbl: uniform_grid_time_slowness_table The later contains the actual tables. They are ascii tables make up of nx*nz lines (x varies most rapidly) of the following format: time, slowness, slowness derivative wrt distance, branch The "branch" variable is a character key defined in location.h Optional parameters with defaults: scalar double: x0, y0 coordinates of first point in table (default = (0,0)) strings: Notice that this routine requires mixed units. dx, dz, x0, and y0 must all be specified in degrees. Everything else has units derived from km and s. That is, time is is in seconds, slowness (p) is assumed to be in s/km, and dpdx (slowness derivative) is (s/km)/km. This was done because the input tables are ascii, and these numbers are scaled to units that make sense to most of us. This format is connected to a related program called taup_convert that writes ttables in this format using the tau-p library. Returns 0 if no problems are encountered. REturns 1 if a serious error occurred that rendered setup impossible for this phase. In the later case, register_error is always called and should be handled by calling program. There are some fatal errors that lead to die being called here from things like malloc failures. */ int uniform_table_interpolate_init(char *phase, Pf *pf) { XZ_table_uniform *ttable, *utable; Tbl *t; /* pfget_tbl return to hold strings of prototables stored in the pf structure. */ int i,j,k; GenlocVerbose = verbose_exists() ; if(time_tables_uniform==NULL) time_tables_uniform = newarr(0); if(slow_tables_uniform==NULL) slow_tables_uniform = newarr(0); ttable = (XZ_table_uniform *)malloc(sizeof(XZ_table_uniform)); utable = (XZ_table_uniform *)malloc(sizeof(XZ_table_uniform)); if( (ttable == NULL) || (utable == NULL) ) elog_die(1,"Can't alloc memory in uniform_table_interpolate_init\n"); /* This version requires t and u tables to be parallel. This restriction would not be necessary, but it simplifies things greatly and we only have to store times in the values matrix and the slowness values in the slopes matrix. */ ttable->nx = pfget_int(pf, "nx"); ttable->nz = pfget_int(pf, "nz"); utable->nx = ttable->nx; utable->nz = ttable->nz; ttable->dx = pfget_double(pf, "dx"); ttable->dz = pfget_double(pf, "dz"); utable->dx = ttable->dx; utable->dz = ttable->dz; /* These parameters default to 0 */ if(pfget_string(pf,"x0")==NULL) { ttable->x0 = 0.0; utable->x0 = 0.0; } else { ttable->x0 = pfget_double(pf,"x0"); utable->x0 = ttable->x0; } if(pfget_string(pf,"z0")==NULL) { ttable->z0 = 0.0; utable->z0 = 0.0; } else { ttable->z0 = pfget_double(pf,"z0"); utable->z0 = ttable->z0; } /* IMPORTANT WARNING: notice I only alloc one space for the slowness values array, although it gets placed in two different places -> values section of utable and slopes section of ttable This leaves a nasty dependency if this space is to be freed, but saves a lot of memory. p.s I did the same thing with velocity, but not with the branch array (see below) */ ttable->values = dmatrix(0,(ttable->nx)-1,0,(ttable->nz)-1); if(ttable->values == NULL) elog_die(1,"Cannot alloc memory for travel time table of size %d by %d for phase %s\n", ttable->nx, ttable->nz, phase); ttable->slopes = dmatrix(0,(ttable->nx)-1,0,(ttable->nz)-1); if(ttable->slopes == NULL) elog_die(1,"Cannot alloc memory for slowness table of size %d by %d for phase %s\n", ttable->nx, ttable->nz, phase); ttable->branch = cmatrix(0,(ttable->nx)-1,0,(ttable->nz)-1); if(ttable->branch == NULL) elog_die(1,"Cannot alloc memory for time branch table for phase %s\n", phase); utable->branch = cmatrix(0,(utable->nx)-1,0,(utable->nz)-1); if(utable->branch == NULL) elog_die(1,"Cannot alloc memory for slowness branch table for phase %s\n", phase); ttable->velocity = (double *) calloc(ttable->nz,sizeof(double)); if(ttable->velocity == NULL) elog_die(1,"Cannot alloc memory for velocity model for phase %s\n", phase); utable->slopes = dmatrix(0,(utable->nx)-1,0,(utable->nz)-1); if(utable->slopes == NULL) elog_die(1,"Cannot alloc memory for dudr table of size %d by %d for phase %s\n", utable->nx, utable->nz, phase); /* here is where we set the redundant pointers */ utable->values = ttable->slopes; utable->velocity = ttable->velocity; /* Now it is time to actually parse the tables. We assume the table is entered as a pf &Tbl, and table is scanned with x varying most rapidly. (i.e. you get the tables for x=x0 first, then x=x0+dx, etc. Note we read three entries for each grid point: time, slowness, branch_code */ t = pfget_tbl(pf,"uniform_grid_time_slowness_table"); if(t == NULL) { elog_log(1,"Can't find travel time-slowness table for phase %s\n", phase); free_uniform_table(ttable, utable); return(1); } if( maxtbl(t) != ( (ttable->nx)*(ttable->nz) ) ) { elog_log(1,"Table size mismatch for phase %s\nTable should have %d rows\nFound %ld\n", phase, (ttable->nx)*(ttable->nz), maxtbl(t)); free_uniform_table(ttable, utable); return(1); } for(j=0,k=0; j<ttable->nz; ++j) { for(i=0; i<ttable->nx; ++i) { char *s; int nitems; double tt,u,dudx; char b; s = gettbl(t,k); nitems = sscanf(s,"%lf%lf%lf%1s", &tt, &u, &dudx,&b); if(nitems !=4) { elog_log(1,"Syntax error reading table for phase %s, Problem read value for i=%d, j=%d\n", phase,i,j); free_uniform_table(ttable, utable); return(1); } ttable->values[i][j] = tt; ttable->slopes[i][j] = u; utable->slopes[i][j] = dudx; ttable->branch[i][j] = b; ++k; } } /* In order to utilize a common set of interpolation routines, scan the time->branch matrix. Mark the crossover points for time as jump discontinuities for slowness (which they are) */ for(j=0; j<ttable->nz; ++j) for(i=0; i<ttable->nx; ++i) if(ttable->branch[i][j] == CROSSOVER) utable->branch[i][j] = JUMP; else utable->branch[i][j] = ttable->branch[i][j]; /* An error check is needed here so we don't have to worry about it later. Other than a blunder, this can happen if x0 is anything other than 0, so we need to watch for this. We could try to repair this automatically, but because it mostly likely indicates a serious blunder we abort */ for(j=0; j<ttable->nz; ++j) if( (utable->branch[0][j] == CROSSOVER) || (ttable->branch[0][j] == CROSSOVER) || (utable->branch[0][j] == JUMP) || (ttable->branch[0][j] == JUMP) ) { elog_log(1, "Error in travel time table for phase %s\nFirst point cannot be marked as a crossover or jump discontinuity\n",phase); free_uniform_table(ttable, utable); return(1); } /* Now we read the velocity model parameters */ t = pfget_tbl(pf,"velocities"); if((ttable->nz) != maxtbl(t)) { elog_log(1,"Error in phase parameter file. \ Mismatch between velocity entries and table entries\n\ Tables have %d depth entries, but velocity vector is of length %ld\n", ttable->nz, maxtbl(t)); free_uniform_table(ttable,utable); return(1); } for(i=0; i<maxtbl(t); ++i) { char *s; s = gettbl(t,i); sscanf(s,"%lf", &(ttable->velocity[i])); } setarr(time_tables_uniform,phase,ttable); setarr(slow_tables_uniform,phase,utable); return(0); }
/* ==================================================== */ void Align_Process(char **argv,int args,int nRead) /* ==================================================== */ { int i,j,k,n_reads = nRead; void ArraySort_Mix(int n,B64_long *arr,int *brr); int **imatrix(B64_long nrl,B64_long nrh,B64_long ncl,B64_long nch); char **cmatrix(B64_long nrl,B64_long nrh,B64_long ncl,B64_long nch); void ArraySort_String(int n,char **Pair_Name,int *brr); // char **rdname; char **DBname,**ctgname,*ptr,*st,*ed,line[2000],RC; FILE *namef,*namef2; int read_index[2000]; B64_long read_offsets[2000]; int n_find,idd,stopflag,num_align,refhit1,refhit2; int readhit1 = 0,readhit2 = 0; int rd_forward[2000],rd_reverse[2000]; int insertSize1 = insert_size*(1.0+insert_std); int insertSize2 = insert_size*(1.0-insert_std); n_reads = nRead; RC = '+'; cell_name = cmatrix(0,2,0,Max_N_NameBase); rdname=cmatrix(0,nRead,0,Max_N_NameBase); ctgname=cmatrix(0,nRead,0,Max_N_NameBase); DBname=cmatrix(0,nRead,0,Max_N_NameBase); if((readIndex= (int *)calloc(n_reads,sizeof(int))) == NULL) { printf("ERROR Memory_Allocate: Align_Process - readIndex\n"); exit(1); } if((read2contig= (int *)calloc(n_reads,sizeof(int))) == NULL) { printf("ERROR Memory_Allocate: Align_Process - read2contig\n"); exit(1); } if((map_score= (int *)calloc(nRead,sizeof(int))) == NULL) { printf("ERROR Memory_Allocate: Align_Process - map_score\n"); exit(1); } if((hit_frdex= (int *)calloc(nRead,sizeof(int))) == NULL) { printf("ERROR Memory_Allocate: Align_Process - hit_frdex\n"); exit(1); } if((hit_rcdex= (int *)calloc(nRead,sizeof(int))) == NULL) { printf("ERROR Memory_Allocate: Align_Process - hit_rcdex\n"); exit(1); } if((hit_quest= (int *)calloc(nRead,sizeof(int))) == NULL) { printf("ERROR Memory_Allocate: Align_Process - hit_quest\n"); exit(1); } if((hit_queed= (int *)calloc(nRead,sizeof(int))) == NULL) { printf("ERROR Memory_Allocate: Align_Process - hit_queed\n"); exit(1); } if((hit_refst= (int *)calloc(nRead,sizeof(int))) == NULL) { printf("ERROR Memory_Allocate: Align_Process - hit_refst\n"); exit(1); } if((hit_refed= (int *)calloc(nRead,sizeof(int))) == NULL) { printf("ERROR Memory_Allocate: Align_Process - hit_refed\n"); exit(1); } if((hit_score= (int *)calloc(nRead,sizeof(int))) == NULL) { printf("ERROR Memory_Allocate: Align_Process - hit_score\n"); exit(1); } if((map_unique= (int *)calloc(nRead,sizeof(int))) == NULL) { printf("ERROR Memory_Allocate: Align_Process - map_unique\n"); exit(1); } if((map_rdpair= (int *)calloc(nRead,sizeof(int))) == NULL) { printf("ERROR Memory_Allocate: Align_Process - map_rdpair\n"); exit(1); } if((ctg_index= (int *)calloc(nRead,sizeof(int))) == NULL) { printf("ERROR Memory_Allocate: calloc - ctg_index\n"); exit(1); } if((namef = fopen(argv[args],"r")) == NULL) { printf("ERROR main:: reads group file \n"); exit(1); } /* read the SNP output file */ num_align=0; while(!feof(namef)) { int nPair=0,len; char line2[2000],line3[2000],base[500],score[3]; fgets(line,2000,namef); if(feof(namef)) break; strcpy(line2,line); strcpy(line3,line); if((strncmp(line,"cigar",5))==0) { refhit1 = 0; refhit2 = 0; readhit1 = 0; readhit2 = 0; for(ptr=strtok(line," ");ptr!=NULL;ptr=strtok((char *)NULL," "),nPair++) { } i=0; for(ptr=strtok(line2," ");ptr!=NULL;ptr=strtok((char *)NULL," "),i++) { if(i==0) { memset(score,'\0',3); memset(base,'\0',500); strcat(base,ptr); len = strlen(base); if(len < 8) { map_score[num_align] = 50; // printf("score: %d %s",len,line3); } else { score[0] = line3[7]; score[1] = line3[8]; map_score[num_align] = atoi(score); // if(map_score[num_align]>=30) // map_unique[num_align] = 1; // printf("score: %d %s",len,line3); } } else if(i==1) { memset(base,'\0',500); strcat(base,ptr); strcpy(rdname[num_align],ptr); } else if(i==2) { memset(base,'\0',500); strcat(base,ptr); readhit1 = atoi(ptr); } else if(i==3) { memset(base,'\0',500); strcat(base,ptr); readhit2 = atoi(ptr); } else if(i==4) { memset(base,'\0',500); strcat(base,ptr); RC = *ptr; } else if(i==5) { memset(base,'\0',500); strcpy(ctgname[num_align],ptr); st = rdname[num_align]; ed = strrchr(rdname[num_align],'.'); if(ed==NULL) strcpy(DBname[num_align],rdname[num_align]); else { strncpy(DBname[num_align],rdname[num_align],ed-st); if(*(ed+1) == 'p') hit_frdex[num_align] = 1; if(*(ed+1) == 'b') hit_frdex[num_align] = 1; if(*(ed+1) == 'x') hit_frdex[num_align] = 1; if(*(ed+1) == 'F') hit_frdex[num_align] = 1; if(*(ed+1) == 'q') hit_frdex[num_align] = 2; if(*(ed+1) == 'g') hit_frdex[num_align] = 2; if(*(ed+1) == 'y') hit_frdex[num_align] = 2; if(*(ed+1) == 'R') hit_frdex[num_align] = 2; } } else if(i==6) { memset(base,'\0',500); strcat(base,ptr); refhit1 = atoi(ptr); } else if(i==7) { memset(base,'\0',500); strcat(base,ptr); refhit2 = atoi(ptr); } else if(i==8) { if(RC=='+') { hit_rcdex[num_align]=0; hit_refst[num_align]=refhit1; hit_refed[num_align]=refhit2; hit_quest[num_align]=readhit1; hit_queed[num_align]=readhit2; } else { hit_refst[num_align]=refhit2; hit_refed[num_align]=refhit1; hit_quest[num_align]=readhit2; hit_queed[num_align]=readhit1; hit_rcdex[num_align]=1; } } else if(i==9) { memset(base,'\0',500); strcat(base,ptr); hit_score[num_align] = atoi(ptr); readIndex[num_align] = num_align; num_align++; } } } } fclose(namef); /* sort out contig/chromosome idnex */ n_reads = nRead; ArraySort_String(n_reads,ctgname,readIndex); n_find = 0; idd = 0; for(i=0;i<n_reads;i++) { /* search reads with an index < i */ /* search reads with an index > i */ stopflag=0; j=i+1; while((j<n_reads)&&(stopflag==0)) { if(strcmp(ctgname[j],ctgname[i])==0) { j++; } else stopflag=1; } if((j-i)>=1) { for(k=i;k<j;k++) { ctg_index[readIndex[k]] = idd; read2contig[readIndex[k]] = k; } } idd++; i=j-1; } /* sort out read pairs */ n_reads = nRead; for(i=0;i<n_reads;i++) readIndex[i] = i; printf("started the process: \n"); ArraySort_String(n_reads,DBname,readIndex); n_find = 0; idd = -1; for(i=0;i<n_reads;i++) { /* search reads with an index < i */ /* search reads with an index > i */ stopflag=0; j=i+1; while((j<n_reads)&&(stopflag==0)) { if(strcmp(DBname[j],DBname[i])==0) { j++; } else stopflag=1; } idd = -1; if((j-i)==2) { int num_hits = j-i; int stopflag2,m,n_pair; int ctg1,ctg2,idt,idd; idt = readIndex[i]; idd = readIndex[i+1]; ctg1 = ctg_index[idt]; ctg2 = ctg_index[idd]; if((ctg1!=ctg2)||(abs(hit_refst[idt]-hit_refst[idd])>max_space)) { if(map_score[idt]==map_score[idd]) { if(hit_score[idt]>hit_score[idd]) { map_unique[idt] = 1; map_unique[idd] = 0; } else { map_unique[idt] = 0; map_unique[idd] = 1; } } else { if(map_score[idt]>map_score[idd]) { map_unique[idt] = 1; map_unique[idd] = 0; } else { map_unique[idt] = 0; map_unique[idd] = 1; } } } else { map_unique[idt] = 1; map_unique[idd] = 1; } } else if((j-i)==1) map_unique[readIndex[i]] = 1; i=j-1; } /* read the cigar line file */ if((namef = fopen(argv[args],"r")) == NULL) { printf("ERROR main:: alignment file 2 \n"); exit(1); } /* read the cigar line file */ if((namef2 = fopen(argv[args+1],"w")) == NULL) { printf("ERROR main:: alignment file 2 \n"); exit(1); } /* read the SNP output file */ i=0; n_find = 0; while(!feof(namef)) { fgets(line,2000,namef); // printf("%s",line); if(feof(namef)) break; if(map_unique[i]==1) { char score[3] = {0}; score[0] = map_score[i]/10 + '0'; score[1] = map_score[i]%10 + '0'; line[7] = score[0]; line[8] = score[1]; fprintf(namef2,"%s",line); n_find++; } i++; } printf("number of reads uniquely placed on to genome: %d\n",n_find); fclose(namef); fclose(namef2); }
/* "read_springfile_sysenv" READS THE SPRINGFILE FOR BOTH THE 'SYSTEM' AND THE 'ENVIRONMENT' */ Sprngmtx *read_springfile_sysenv(char *file,Centroid *SYS,Centroid *ENV, int nss,int nen,int *ntp) { FILE *data; Sprngmtx *foo; char **LIST,nup1[NAME_LNG],nup2[NAME_LNG]; double x; int nn=nss+nen,ok,i,j; if((data=fopen(file,"r"))==NULL){ fprintf(stderr,"\nread_springfile_sysenv: unable to open %s\n\n",file); exit(1);} /* GET THE LIST OF UNIQUE CENTROID NAMES */ LIST=cmatrix(1,nn,0,NAME_LNG); (*ntp)=0; for(i=1;i<=nss;i++){ ok=1; for(j=1;j<=(*ntp);j++){ if(!strcmp(SYS[i].name,LIST[j])){ ok=0; break; } } if(ok==1) strcpy(LIST[++(*ntp)],SYS[i].name); } for(i=1;i<=nen;i++){ ok=1; for(j=1;j<=(*ntp);j++){ if(!strcmp(ENV[i].name,LIST[j])){ ok=0; break; } } if(ok==1) strcpy(LIST[++(*ntp)],ENV[i].name); } foo=(Sprngmtx *)malloc((size_t) sizeof(Sprngmtx *)); foo->name=cmatrix(1,(*ntp),0,NAME_LNG); foo->M=dmatrix(1,(*ntp),1,(*ntp)); for(i=1;i<=(*ntp);i++){ strcpy(foo->name[i],LIST[i]); for(j=i;j<=(*ntp);j++) foo->M[i][j]=foo->M[j][i]=DEFGAM; } free_cmatrix(LIST,1,nn,0,NAME_LNG); /* READ THE FILE */ while(!feof(data)){ fscanf(data,"%s%s%lf",nup1,nup2,&x); for(i=1;i<=(*ntp);i++) if(!strcmp(nup1,foo->name[i])){ for(j=1;j<=(*ntp);j++) if(!strcmp(nup2,foo->name[j])){ foo->M[i][j]=foo->M[j][i]=x; break; } break; } } fclose(data); return foo; }
SEXP rfsrcReadMatrix(SEXP traceFlag, SEXP fName, SEXP rowType, SEXP rowCnt, SEXP tokenDelim, SEXP colHeader, SEXP rowHeader) { FILE *fopen(); FILE *fPtr; char *_fName; SEXP _sexp_rowType ; char **_rowType; uint _rowCnt; char *_tokenDelim; char _colHeadF; char _rowHeadF; SmartBuffer *sb; uint p, i; uint rowCntActual; uint colCntActual; uint sbSizeActual; char flag; setTraceFlag(INTEGER(traceFlag)[0], 0); _fName = (char*) CHAR(STRING_ELT(AS_CHARACTER(fName), 0)); _sexp_rowType = rowType; _rowCnt = INTEGER(rowCnt)[0]; _tokenDelim = (char*) CHAR(STRING_ELT(AS_CHARACTER(tokenDelim), 0)); _colHeadF = (INTEGER(colHeader)[0] != 0) ? TRUE : FALSE; _rowHeadF = (INTEGER(rowHeader)[0] != 0) ? TRUE : FALSE; _rowType = (char**) new_vvector(1, _rowCnt, NRUTIL_CPTR); for (p = 1; p <= _rowCnt; p++) { _rowType[p] = (char*) CHAR(STRING_ELT(AS_CHARACTER(_sexp_rowType), p-1)); if ((strcmp(_rowType[p], "X") != 0) && (strcmp(_rowType[p], "C") != 0) && (strcmp(_rowType[p], "c") != 0) && (strcmp(_rowType[p], "I") != 0) && (strcmp(_rowType[p], "R") != 0)) { Rprintf("\nRF-SRC: *** ERROR *** "); Rprintf("\nRF-SRC: Invalid predictor type: [%10d] = %2s", p, _rowType[p]); Rprintf("\nRF-SRC: Type must be 'C', 'c', 'I', or 'R'."); Rprintf("\nRF-SRC: Please Contact Technical Support."); error("\nRF-SRC: The application will now exit.\n"); } } fPtr = fopen(_fName, "r"); sb = parseLineSB(fPtr, *_tokenDelim, 0); colCntActual = _rowHeadF ? (sb -> tokenCnt - 1) : (sb -> tokenCnt); rowCntActual = 0; if (_colHeadF) { freeSB(sb); sb = parseLineSB(fPtr, *_tokenDelim, 0); sbSizeActual = sb -> size; freeSB(sb); } else { sbSizeActual = sb -> size; freeSB(sb); } ++rowCntActual; flag = TRUE; while (flag) { sb = parseLineSB(fPtr, *_tokenDelim, sbSizeActual); if (sb -> tokenCnt == 0) { flag = FALSE; freeSB(sb); } else { ++rowCntActual; freeSB(sb); } } if (rowCntActual != _rowCnt) { Rprintf("\nRF-SRC: *** ERROR *** "); Rprintf("\nRF-SRC: Inconsistent Predictor Count."); Rprintf("\nRF-SRC: (encountered, expected) = (%10d, %10d)", rowCntActual, _rowCnt); Rprintf("\nRF-SRC: Please Contact Technical Support."); error("\nRF-SRC: The application will now exit.\n"); } fclose(fPtr); if (_colHeadF) { fPtr = fopen(_fName, "r"); sb = parseLineSB(fPtr, *_tokenDelim, sbSizeActual); freeSB(sb); } char **dataMatrix = cmatrix(1, colCntActual, 1, rowCntActual); for (p=1; p <= rowCntActual; p++) { sb = parseLineSB(fPtr, *_tokenDelim, sbSizeActual); if (_rowHeadF) { } for (i = 1; i <= colCntActual; i++) { dataMatrix[i][p] = (char) strtol(sb -> token, NULL, 10); } freeSB(sb); } free_cmatrix(dataMatrix, 1, colCntActual, 1, rowCntActual); return R_NilValue; }
int main(int argc, char *argv[]) { int i, k, howmany, segsites ; char **list, **cmatrix(), **tbsparamstrs ; FILE *pf, *fopen() ; double probss, tmrca, ttot ; void seedit( const char * ) ; void getpars( int argc, char *argv[], int *howmany ) ; int gensam( char **list, double *probss, double *ptmrca, double *pttot ) ; ntbs = 0 ; /* these next few lines are for reading in parameters from a file (for each sample) */ tbsparamstrs = (char **)malloc( argc*sizeof(char *) ) ; for( i=0; i<argc; i++) printf("%s ",argv[i]); for( i =0; i<argc; i++) tbsparamstrs[i] = (char *)malloc(30*sizeof(char) ) ; for( i = 1; i<argc ; i++) if( strcmp( argv[i],"tbs") == 0 ) argv[i] = tbsparamstrs[ ntbs++] ; count=0; if( ntbs > 0 ) for( k=0; k<ntbs; k++) scanf(" %s", tbsparamstrs[k] ); getpars( argc, argv, &howmany) ; /* results are stored in global variable, pars */ if( !pars.commandlineseedflag ) seedit( "s"); pf = stdout ; if( pars.mp.segsitesin == 0 ) { list = cmatrix(pars.cp.nsam,maxsites+1); posit = (double *)malloc( (unsigned)( maxsites*sizeof( double)) ) ; } else { list = cmatrix(pars.cp.nsam, pars.mp.segsitesin+1 ) ; posit = (double *)malloc( (unsigned)( pars.mp.segsitesin*sizeof( double)) ) ; if( pars.mp.theta > 0.0 ) { segfac = 1.0 ; for( i= pars.mp.segsitesin; i > 1; i--) segfac *= i ; } } while( howmany-count++ ) { if( (ntbs > 0) && (count >1 ) ) { for( k=0; k<ntbs; k++) { if( scanf(" %s", tbsparamstrs[k]) == EOF ) { if( !pars.commandlineseedflag ) seedit( "end" ); exit(0); } } getpars( argc, argv, &howmany) ; } fprintf(pf,"\n//"); if( ntbs >0 ) { for(k=0; k< ntbs; k++) printf("\t%s", tbsparamstrs[k] ) ; } printf("\n"); segsites = gensam( list, &probss, &tmrca, &ttot ) ; if( pars.mp.timeflag ) fprintf(pf,"time:\t%lf\t%lf\n",tmrca, ttot ) ; if( (segsites > 0 ) || ( pars.mp.theta > 0.0 ) ) { if( (pars.mp.segsitesin > 0 ) && ( pars.mp.theta > 0.0 )) fprintf(pf,"prob: %g\n", probss ) ; fprintf(pf,"segsites: %d\n",segsites); if( segsites > 0 ) fprintf(pf,"positions: "); for( i=0; i<segsites; i++) fprintf(pf,"%6.*lf ", pars.output_precision,posit[i] ); fprintf(pf,"\n"); if( segsites > 0 ) for(i=0; i<pars.cp.nsam; i++) { fprintf(pf,"%s\n", list[i] ); } } } if( !pars.commandlineseedflag ) seedit( "end" ); }
int main(int argc, char **argv) { FILE *namef; int i; int n_reads, nseq = 0; float identy; char line[2000] = {0}, tempc1[60], RC[1]; char *filename; filename = argv[1]; fflush(stdout); if (argc < 2) { printf("Usage: %s <cross_genome_ouput file>\n", argv[0]); exit(1); } if ((namef = fopen(filename, "r")) == NULL) { fprintf(stderr, "ERROR: unable to open file %s\n", filename); exit(1); } while (!feof(namef)) { fgets(line, 2000, namef); if (feof(namef)) break; nseq++; } fclose(namef); if((hit_sindex = (int *)calloc(nseq,sizeof(int))) == NULL) { fprintf(stderr, "Error: out of memory: calloc - hit_sindex\n"); exit(1); } if ((hit_rcdex = (int *)calloc(nseq, sizeof(int))) == NULL) { fprintf(stderr, "Error: out of memory: calloc - hit_rcdex\n"); exit(1); } if ((hit_read1 = (int *)calloc(nseq, sizeof(int))) == NULL) { fprintf(stderr, "Error: out of memory: calloc - hit_read1\n"); exit(1); } if ((hit_read2 = (int *)calloc(nseq, sizeof(int))) == NULL) { fprintf(stderr, "Error: out of memory: calloc - hit_read2\n"); exit(1); } if ((hit_locus1 = (int *)calloc(nseq, sizeof(int))) == NULL) { fprintf(stderr, "Error: out of memoryfmate: calloc - hit_locus1\n"); exit(1); } if ((hit_locus2 = (int *)calloc(nseq, sizeof(int))) == NULL) { fprintf(stderr, "Error: out of memory: calloc - hit_locus2\n"); exit(1); } if ((hit_length = (int *)calloc(nseq, sizeof(int))) == NULL) { fprintf(stderr, "Error: calloc - hit_length\n"); exit(1); } if ((readlength = (int *)calloc(nseq, sizeof(int))) == NULL) { fprintf(stderr, "Error: calloc - readlength\n"); exit(1); } if ((superlength = (int *)calloc(nseq, sizeof(int))) == NULL) { fprintf(stderr, "Error: calloc - superlength\n"); exit(1); } if ((hit_qindex = (int *)calloc(nseq, sizeof(int))) == NULL) { fprintf(stderr, "Error: calloc - hit_qindex\n"); exit(1); } R_Name = cmatrix(0, nseq + 1, 0, Max_N_NameBase); S_Name = cmatrix(0, nseq + 1, 0, Max_N_NameBase); T_Name = cmatrix(0, nseq + 1, 0, 6); if ((namef = fopen(filename, "r")) == NULL) { fprintf(stderr, "ERROR: unable to open file %s\n", filename); exit(1); } /* read the alignment files */ i = 0; while (fscanf(namef, "%s %d %s %s %d %d %d %d %s %d %f %d %d", tempc1, &hit_sindex[i], R_Name[i], S_Name[i], &hit_read1[i], &hit_read2[i], &hit_locus1[i], &hit_locus2[i], RC, &hit_qindex[i], &identy, &readlength[i], &superlength[i]) != EOF) { if (RC[0] == 'F') hit_rcdex[i] = 0; else hit_rcdex[i] = 1; strncpy(T_Name[i], S_Name[i], 4); i++; } fclose(namef); n_reads = i; printf("reads: %d %s\n", n_reads, filename); Indel_Process(n_reads); printf("Job finished for %d reads!\n", nseq); return EXIT_SUCCESS; }
/* * Function to be called from Python */ static PyObject* py_smith_waterman_context(PyObject* self, PyObject* args) { char *seq1 = NULL; char *seq2 = NULL; char retstr[100] = {'\0'}; int len1, len2; int i, j; int gap_opening, gap_extension; static int ** similarity = NULL; PyArg_ParseTuple(args, "s#s#ii", &seq1, &len1, &seq2, &len2, &gap_opening, &gap_extension); if (!seq1 || !seq2) { sprintf (retstr, "no seq in py_smith_waterman_context"); return Py_BuildValue("s", retstr); } /* passing a matrix this way is all to painful, so we'll elegantyly hardcode it: */ if ( !similarity) { similarity = imatrix(ASCII_SIZE, ASCII_SIZE); if (!similarity) { sprintf (retstr, "error alloc matrix space"); return Py_BuildValue("s", retstr); } load_sim_matrix (similarity); } /**********************************************************************************/ //int gap_opening = -5; // used in 15_make_maps //int gap_extension = -3; //char gap_character = '-' //int gap_opening = -3; // used in 25_db_migration/06_make_alignments //int gap_extension = 0; char gap_character = '#'; int endgap = 0; int use_endgap = 0; int far_away = -1; int max_i = len1; int max_j = len2; // allocation, initialization int **F = NULL; char **direction = NULL; int *map_i2j = NULL; int *map_j2i = NULL; if ( ! (F= imatrix (max_i+1, max_j+1)) ) { sprintf (retstr, "error alloc matrix space"); return Py_BuildValue("s", retstr); } if ( ! (direction = cmatrix (max_i+1, max_j+1)) ) { sprintf (retstr, "error alloc matrix space"); return Py_BuildValue("s", retstr); } if (! (map_i2j = emalloc( (max_i+1)*sizeof(int))) ) { sprintf (retstr, "error alloc matrix space"); return Py_BuildValue("s", retstr); } if (! (map_j2i = emalloc( (max_j+1)*sizeof(int))) ) { sprintf (retstr, "error alloc matrix space"); return Py_BuildValue("s", retstr); } for (i=0; i<=max_i; i++) map_i2j[i]=far_away; for (j=0; j<=max_j; j++) map_j2i[j]=far_away; int F_max = far_away; int F_max_i = 0; int F_max_j = 0; int penalty = 0; int i_sim, j_sim, diag_sim, max_sim; int i_between_exons = 1; int j_between_exons = 1; // for (i=0; i<=max_i; i++) { if (i > 0) { if (seq1[i-1] == 'B') { i_between_exons = 0; } else if ( seq1[i-1] == 'Z'){ i_between_exons = 1; } } for (j=0; j<=max_j; j++) { if (j > 0) { if (seq2[j-1] == 'B') { j_between_exons = 0; } else if (seq2[j-1] == 'Z') { j_between_exons = 1; } } if ( !i && !j ){ F[0][0] = 0; direction[i][j] = 'd'; continue; } if ( i && j ){ /**********************************/ penalty = 0; if ( direction[i-1][j] == 'i' ) { // gap extension if (j_between_exons) { penalty = 0; } else { if (use_endgap && j==max_j){ penalty = endgap; } else { penalty = gap_extension; } } } else { // gap opening */ if (j_between_exons) { penalty = 0; } else { if (use_endgap && j==max_j){ penalty = endgap; } else{ penalty = gap_opening; } } } i_sim = F[i-1][j] + penalty; /**********************************/ penalty = 0; if ( direction[i][j-1] == 'j' ) { // gap extension if (i_between_exons) { penalty = 0; } else { if (use_endgap && i==max_i){ penalty = endgap; } else{ penalty = gap_extension; } } } else { // gap opening */ if (i_between_exons) { penalty = 0; } else { if (use_endgap && i==max_i){ penalty = endgap; } else { penalty = gap_opening; } } } j_sim = F[i][j-1] + penalty; /**********************************/ diag_sim = F[i-1][j-1] + similarity [seq1[i-1]][seq2[j-1]]; /**********************************/ max_sim = diag_sim; direction[i][j] = 'd'; if ( i_sim > max_sim ){ max_sim = i_sim; direction[i][j] = 'i'; } if ( j_sim > max_sim ) { max_sim = j_sim; direction[i][j] = 'j'; } } else if (j) { penalty = 0; if (j_between_exons) { penalty = 0; } else { if (use_endgap) { penalty = endgap; } else { if ( direction[i][j-1] =='j' ) { penalty = gap_extension; } else { penalty = gap_opening; } } } j_sim = F[i][j-1] + penalty; max_sim = j_sim; direction[i][j] = 'j'; } else if (i) { penalty = 0; if (i_between_exons) { penalty = 0; } else { if ( use_endgap) { penalty = endgap; } else { if ( direction[i-1][j] == 'i' ) { penalty = gap_extension; } else { penalty = gap_opening; } } } i_sim = F[i-1][j] + penalty; max_sim = i_sim; direction[i][j] = 'i'; } if (max_sim < 0.0 ) max_sim = 0.0; F[i][j] = max_sim; if ( F_max < max_sim ) { // TODO{ tie break here */ F_max = max_sim; F_max_i = i; F_max_j = j; } } } i = F_max_i; j = F_max_j; // aln_score = F[i][j] ; while ( i>0 || j >0 ){ if ( i<0 || j<0 ){ sprintf (retstr, "Retracing error"); return Py_BuildValue("s", retstr); } if (direction[i][j] == 'd'){ map_i2j [i-1] = j-1; map_j2i [j-1] = i-1; i-= 1; j-= 1; } else if (direction[i][j] == 'i') { map_i2j [i-1] = far_away; i-= 1 ; } else if (direction[i][j] == 'j') { map_j2i [j-1] = far_away; j-= 1 ; } else{ sprintf (retstr, "Retracing error"); return Py_BuildValue("s", retstr); } } char * aligned_seq_1 = NULL; char * aligned_seq_2 = NULL; /* (lets hope it gets properly freed in the main program */ if (! (aligned_seq_1 = emalloc( (len1+len2)*sizeof(char))) ) { sprintf (retstr, "error alloc array space"); return Py_BuildValue("s", retstr); } if (! (aligned_seq_2 = emalloc( (len1+len2)*sizeof(char))) ) { sprintf (retstr, "error alloc array space"); return Py_BuildValue("s", retstr); } i = 0; j = 0; int done = 0; int pos = 0; while (!done) { if (j>=max_j && i>=max_i){ done = 1; } else if (j<max_j && i<max_i){ if (map_i2j[i] == j){ aligned_seq_1[pos] = seq1[i]; aligned_seq_2[pos] = seq2[j]; i += 1; j += 1; } else if (map_i2j[i] < 0){ aligned_seq_1[pos] = seq1[i]; aligned_seq_2[pos] = gap_character; i += 1; } else if (map_j2i[j] < 0){ aligned_seq_1[pos] = gap_character; aligned_seq_2[pos] = seq2[j]; j += 1; } } else if (j<max_j){ aligned_seq_1[pos] = gap_character; aligned_seq_2[pos] = seq2[j]; j += 1; } else { aligned_seq_1[pos] = seq1[i]; aligned_seq_2[pos] = gap_character; i += 1; } pos ++; } free_imatrix(F); free_cmatrix(direction); free(map_i2j); free(map_j2i); return Py_BuildValue("ss", aligned_seq_1, aligned_seq_2 ); }
/********************************************************* * * * Main Function * * ------------- * * * ********************************************************* | Takes arguments and launches the gof simulations. | *-------------------------------------------------------*/ int main(int argc, char *argv[]) // Array of char=arguments line { //--- Declarations & Call Function ---// int i=0, j=0, k=0, count=0, howmany=0, segsites=0, okim=0, oksim=0, numsim=0, totsim=0, nokl=0, nokl0=0; int **statseg=NULL, **nbvariant=NULL, oks[3], okstot[3]; FILE *pf=NULL, *fopen(const char*, const char*); // Pointer on File for outputs (pf) and IM input file double tajd(); char **list=NULL; // Haplotype list void updatemainparams(struct params*); int gensam(struct params*, char**, int**, int*); int **imatrix(int, int); //// From rand1.c //// /* Celine changed 03/18/2010 */ void seedit(char*, FILE*, struct params *);/*/////*/ char **cmatrix(int, int); //// From params.c //// void changeparams(struct params*); void changeparamslocus(struct params*, int); struct params getpars(int, char*[], int*); //--- Structure declaration---// struct params param; //--- Get arguments ---// param=getpars(argc, argv, &howmany); // Get input by user for parameters pf=stdout; // Output /* Celine changed 03/18/2010 */ if( !param.commandlineseedflag ) seedit("s", pf, ¶m);// WRITE seeds in summary output file /*/////*/ /* Uncommented for Celine's use */ /* for(i=0;i<argc;i++) // Information on simulation fprintf(pf, "%s ", argv[i]); */////\ //---------- Initialisation & Memory allocation ------------------// nbvariant=imatrix(param.cp.npop+1, maxsites); // array of nb of frequency spectrum typeseg=(int*)malloc((unsigned)(maxsites*sizeof(int))); // type of sites statseg=imatrix(param.cp.npop+3, howmany); // Records locus specific S1, S2, Ss, Sf, changeparams(¶m); // Change estimates parameters from priors updatemainparams(¶m); // Update parameters for the coalescent oksim=totsim=okstot[0]=okstot[1]=okstot[1]=okstot[2]=0; // simulation check, total #of sim, check on statistics for all simulations for(numsim=0; numsim<param.cp.nsim;numsim++) // Loop along number of simulations for this set of parameters { //--- Initialization and reset of quality checks ---// count=nokl=nokl0=okim=oks[0]=oks[1]=oks[1]=oks[2]=0; // number of loci, # loci with ok genealogies, no set sites, #statistics ok for(i=0;i<11;i++) // Sim specific Stats param.cp.sSiFst[i]=0; //--- Loop along the loci in the simulation ---// while((howmany-count++)) { if(okim==0) // Case All loci ok in the sample { for(i=0;i<11;i++) { param.cp.lSiFst[i]=0.0; if(i<9) param.lp[count-1].tpH[i]=0; } changeparamslocus(¶m, count-1); // Get locus specific parameters list=cmatrix(param.cp.nsam, maxsites+1); // Allocate list of haplotypes segsites=gensam(¶m, list, nbvariant, param.lp[count-1].S);// Generate a new gene ARG statseg[0][count-1]=segsites; // Total number of seg sites in sample for(i=1;i<3+param.cp.npop;i++) statseg[i][count-1]=0; if((segsites>0)) // Case segsite>0: get stats { /* fprintf(pf, "segsites:%d\npositions:\n",segsites); */ /* for(i=0;i<param.cp.nsam;i++) fprintf(pf, "%s\n", list[i]); */ /* fprintf(pf, "\n"); */ if(segsites<=param.cp.nsites) // Case segsite < lenght of locus { for(k=0;k<param.cp.nsam;k++) { for(i=k+1;i<param.cp.nsam;i++) { if((k<param.lp[count-1].ni[1])&&(i<param.lp[count-1].ni[1])) // pop1 { param.lp[count-1].tpH[0]++; // # chromosomes for(j=0;j<segsites;j++) { if(list[k][j]!=list[i][j]) param.lp[count-1].tpH[1]++; // # seg sites } } else if((k>=param.lp[count-1].ni[1])&&(i>=param.lp[count-1].ni[1])) // pop2 { param.lp[count-1].tpH[2]++; // # chromosomes for(j=0;j<segsites;j++) { if(list[k][j]!=list[i][j]) param.lp[count-1].tpH[3]++; // # seg sites } } else // total sample { param.lp[count-1].tpH[4]++;; // Totsal sample size for(j=0;j<segsites;j++) { if(list[k][j]!=list[i][j]) param.lp[count-1].tpH[5]++; // total S } } }// Loop on chromosome }// Loop along all sampled sequence for the locus for(i=0;i<segsites;i++) // Calulate S statistics for the locus { if(typeseg[i]<0) statseg[3][count-1]++; // shared else if(typeseg[i]<param.cp.npop+1) statseg[typeseg[i]][count-1]++; // population specific else statseg[4][count-1]++; // fixed } for(i=1;i<5;i++) //--- Record S1 S2 Ss Sf forthe locus ---// param.cp.lSiFst[i-1]+=statseg[i][count-1]; for(i=0;i<param.cp.nsam;i++) // Free memory for this locus free(list[i]); free(list); }// End case segsite<lenght of locus else // Case segsites>lenght of locus { okim=1; // Sample have a wrong locus oksim=1; // stop this simulation break; } }// End locus polymorphic else // Locus without seg sites { okim=1; // Sample have a wrong locus (S=0) oksim=2; // 0 for all stats } }// End Sample good until now if(okim==0) // All loci good until now { nokl++; // +1 good locus nokl0++; // +1 polymorphic locus for(i=0;i<7;i++) { if(i<4) param.cp.sSiFst[i]+=param.cp.lSiFst[i]; // sum of Sk param.lp[count-1].H[i]=param.lp[count-1].tpH[i]; // locus specific stats } param.cp.lSiFst[5]=param.lp[count-1].H[1]/=param.lp[count-1].H[0]; // Hw1 param.cp.lSiFst[6]=param.lp[count-1].H[3]/=param.lp[count-1].H[2]; // Hw2 param.lp[count-1].H[5]/=param.lp[count-1].H[4]; // Hb param.cp.lSiFst[4]=param.lp[count-1].H[6]=1-((param.lp[count-1].H[1]+param.lp[count-1].H[3])/2)/ param.lp[count-1].H[5];// locis specific Fst if((param.lp[count-1].S[0]>0)&&(param.lp[count-1].ni[1]>2)) // Locus popymorphic in pop1 { param.cp.lSiFst[7]=tajd(param.lp[count-1].ni[1], param.lp[count-1].S[0], param.lp[count-1].H[1]); param.cp.sSiFst[7]+=param.cp.lSiFst[7]; oks[0]++; // +1 good stat for pop1 } if((param.lp[count-1].S[1]>0)&&(param.lp[count-1].ni[2]>2)) // Locus popymorphic in pop2 { param.cp.lSiFst[8]=tajd(param.lp[count-1].ni[2], param.lp[count-1].S[1], param.lp[count-1].H[3]); param.cp.sSiFst[8]+=param.cp.lSiFst[8]; oks[1]++; // +1 good stat for pop2 } if(statseg[1][count-1]>0) // Locus popymorphic private in pop1 param.lp[count-1].H[7]=param.cp.lSiFst[9]+=(double) param.lp[count-1].S[2]/(statseg[1][count-1]*param.lp[count-1].ni[1]*2); // p(1) if(statseg[2][count-1]>0) // Locus popymorphic private in pop2 param.lp[count-1].H[7]=param.cp.lSiFst[9]+=(double) param.lp[count-1].S[3]/(statseg[2][count-1]*param.lp[count-1].ni[2]*2); // p(1) param.cp.sSiFst[9]+=param.cp.lSiFst[9]; // sum p1 if(statseg[3][count-1]>0) // Locus popymorphic private in pop2 { param.lp[count-1].H[8]=param.cp.lSiFst[10]=(double) param.lp[count-1].S[4]/(statseg[3][count-1]*(param.lp[count-1].ni[2]+param.lp[count-1].ni[1])); // p(2) param.cp.sSiFst[10]+=param.cp.lSiFst[10]; // sum p2 oks[2]++; } param.cp.sSiFst[4]+=param.lp[count-1].H[6]; // sum Fst param.cp.sSiFst[5]+=param.lp[count-1].H[1]; // sum Hw1 param.cp.sSiFst[6]+=param.lp[count-1].H[3]; // sum Hw2 } else if(oksim==2) // Case no seg site for that locus { oksim=0; // reset checks okim=0; nokl++; // 1+ locus to count in mean (all 0 values) for(i=0;i<9;i++) param.lp[count-1].H[i]=param.lp[count-1].tpH[i]; // locus specific stats } }// End loop on loci if(nokl==howmany) // All sample good { totsim++; // 1+ good simulation for(i=0;i<4;i++) param.cp.SiFst[i]+=param.cp.sSiFst[i]; // sum of S stats along simulations for(i=4;i<11;i++) { if(((i<7)||(i>=9))&&(nokl0>0)) param.cp.SiFst[i]+=(double)param.cp.sSiFst[i]/nokl0; // mean of other stats along simulations if((i>=7)&&(i<9)&&(oks[i-7]>0)) { param.cp.SiFst[i]+=(double)param.cp.sSiFst[i]/oks[i-7]; okstot[i-7]++; } } } }// End loop on simulations if(oksim==0) // All simulations worked { /* Uncommented for Celine's use */ /* for(i=0;i<11;i++) *///// for(i=0;i<9;i++) { if((i<7)||(i>=9)) fprintf(pf, "%lg\t", (double) param.cp.SiFst[i]/(totsim)); // write mean of sum of S stats, Fst and Hws over simulations if((i>=7)&&(i<9)) { if(oks[i-7]>0) fprintf(pf, "%lg\t", (double) param.cp.SiFst[i]/(okstot[i-7])); // write mean Tds if S>0 in pops else fprintf(pf, "NA\t" ); } } fprintf(pf, "\n"); } else // Case one locus with too much seg sites { for(i=0;i<9;i++) fprintf(pf, "NA\t" ); fprintf(pf, "\n"); } /* Celine changed 03/18/2010 */ seedit("end", pf, ¶m); // in randx.c, flag[0]!="s" so create/rewrite seed in seedmimar /*/////*/ fclose(pf); free(typeseg); for(i=0;i<param.cp.npop+3;i++) { if(i<param.cp.npop+1) free(nbvariant[i]); free(statseg[i]); } free(nbvariant); free(statseg); ///////// FREE PARAM /////// for(i=0;i<param.cp.npop;i++) free(param.cp.mig_mat[i]); free(param.cp.mig_mat); free(param.cp.config); /* Celine changed 11/27/2009 */ for(i=9;i>=0;i--) if(param.cp.listevent[i]!=NULL && param.cp.listevent[i]->nextde!=NULL) free(param.cp.listevent[i]->nextde); if(param.cp.listevent!=NULL) free(param.cp.listevent); /*/////*/ free(param.cp.deventlist); free(param.cp.size); free(param.cp.alphag); for(i=0;i<3;i++) free(param.cp.uniform[i]); free(param.cp.uniform); free(param.cp.oldest); free(param.cp.newest); free(param.cp.newparam); /* Celine changed 11/27/2009 */ for(i=0;i<howmany;i++) free(param.lp[i].name); /*/////*/ free(param.lp); /* Celine changed 03/18/2010 */ free( param.tableseeds); /*/////*/ exit(0); }// End main function
main(int argc, char *argv[]) { char **oldimage,**newimage,**cmatrix(int a, int b, int c, int d) ; int i,j,iold,jold,ioldp,joldp ; double xmax,ymax,dx2,x1max ; double r,th,diold,djold,x1,x2 ; void new_coord(int i,int j, double *r, double *th) ; void old_coord(double r,double th,double *x1,double *x2) ; /* get size of image from arguments */ if(argc < 8) { fprintf(stderr,"Usage: \n" ) ; fprintf(stderr,"\t image_interp oN1 oN2 nN1 nN2 Rin Rout xmax ymax hslope\n") ; fprintf(stderr," where \n"); fprintf(stderr,"\t oN1 = N1 from simulation \n"); fprintf(stderr,"\t oN2 = N2 from simulation \n"); fprintf(stderr,"\t nN1 = # of pixels in x-dir \n"); fprintf(stderr,"\t nN2 = # of pixels in x-dir \n"); fprintf(stderr,"\t Rin = Rin used in simulation \n"); fprintf(stderr,"\t Rout = Rout used in simulation\n"); fprintf(stderr,"\t xmax = interpolated image will span x=[0,xmax] \n"); fprintf(stderr,"\t ymax = interpolated image will span y=[-ymax,ymax] \n"); fprintf(stderr,"\t hslope = hslope value from run\n"); fprintf(stderr,"\n\n Example: \n"); fprintf(stderr," cat im_lrho_0000.r8|awk '(FNR>4) {print}'|image_interp 128 64 128 256 1.321025567223338859 40. 40. 40. 0.3 > im_int.r8 \n"); fprintf(stderr,"\n\n "); exit(0) ; } sscanf(argv[1],"%d",&oN1) ; sscanf(argv[2],"%d",&oN2) ; sscanf(argv[3],"%d",&nN1) ; sscanf(argv[4],"%d",&nN2) ; sscanf(argv[5],"%lf",&rin) ; sscanf(argv[6],"%lf",&rout) ; sscanf(argv[7],"%lf",&xmax) ; sscanf(argv[8],"%lf",&ymax) ; sscanf(argv[9],"%lf",&hslope) ; x1max = log(rout/rin) ; dx1 = x1max/oN1 ; dx2 = 1./(double)oN2 ; dx = xmax/(double)nN1 ; dy = 2.*ymax/(double)nN2 ; /* make arrays for images */ oldimage = cmatrix(0,oN1-1,0,oN2-1) ; newimage = cmatrix(0,nN1-1,0,nN2-1) ; fprintf(stdout,"RAW\n# \n%d %d\n%d\n",nN1,nN2,255); /* read in old image */ for(j=oN2-1;j>=0;j--) for(i=0;i<oN1;i++) { fread(&oldimage[i][j], sizeof(unsigned char), 1, stdin) ; /* fprintf(stderr,"%d %d %u\n",i,j,oldimage[i][j]) ; */ } /* interpolate to new image */ for(j=nN2-1;j>=0;j--) for(i=0;i<nN1;i++) { new_coord(i,j,&r,&th) ; old_coord(r,th,&x1,&x2) ; /* fprintf(stderr,"%d %d %g %g %g %g\n",i,j,r,th,x1,x2) ; */ if(x1 < 0. || x1 >= x1max || x2 < 0. || x2 >= 1.) newimage[i][j] = 0 ; else { #if 0 iold = (int)(x1/dx1 - 0.5) ; diold = x1/dx1 - 0.5 - (int)(x1/dx1 - 0.5) ; jold = (int)(x2/dx2 - 0.5) ; djold = x2/dx2 - 0.5 - (int)(x2/dx2 - 0.5) ; ioldp = iold+1 ; joldp = jold+1 ; /* take care of boundary effects */ if( diold < 0. ) ioldp = 0 ; else if (iold == oN1-1) ioldp = oN1-1 ; if( djold < 0. ) joldp = 0 ; else if( jold == oN2-1 ) joldp = oN2-1 ; /* fprintf(stderr,"iold, jold: %d %d %d %d %g %g\n", iold,jold,ioldp,joldp,diold,djold) ; fprintf(stderr,"old: %u\n",oldimage[iold][jold]) ; */ newimage[i][j] = (char)(0.5 + (1. - diold)*(1.-djold)*oldimage[iold][jold] + (1. - diold)*djold*oldimage[iold][joldp] + diold*(1.-djold)*oldimage[ioldp][jold] + diold*djold*oldimage[ioldp][joldp]) ; #endif iold = (int)(x1/dx1 - 1.e-20) ; jold = (int)(x2/dx2 - 1.e-20) ; newimage[i][j] = oldimage[iold][jold] ; /* fprintf(stderr,"newim:%d %d %d %d %u\n",i,j,iold,jold,newimage[i][j]) ; */ } fwrite(&newimage[i][j], sizeof(unsigned char), 1, stdout) ; } }
/* * Function read_init is used to read in the initial values of selfing rates (S) for MCMC updating * Input argument: initialfilename is the directory of the file containing the initial values, * if it is NULL,then using random number generator to generate the INIT structure * chainnum is the number of MCMC chains, determining the number of sets of initial values * popnum is the number of subpopulation assumed * chainnum and popnum determines the dimensions of the "initd" element of the INIT struture * Output argument: this application returns an INIT structure, which contains the "chainnum" sets of initial values for F per subpopulation * The data in the file should be ranged as: * >chain_name1 each set of the initial values should begin with ">S" * num1 num2.. * * >chain_name2 * .... */ INIT read_init(char *initialfilename,int chainnum,int popnum,long update,long burnin,int thinning) { //char sign='#'; int i,j,temp,cnt_chn=0; FILE *initfp; char *line; INIT initial; initial.chainnum=chainnum; initial.update=update; initial.burnin=burnin; initial.thinning=thinning; initial.popnum=popnum; initial.initd=matrix(0,chainnum-1,0,popnum-1); initial.name_len=ivector(0,chainnum-1); initial.chn_name=cmatrix(0,chainnum-1,0,MAXLEN-1); if(initialfilename==NULL) { for(i=0;i<chainnum;i++) { for(j=0;j<popnum;j++) initial.initd[i][j]=ran1(); strcpy(initial.chn_name[i],"Chain#"); strcat(initial.chn_name[i],int_to_string(i+1)); initial.name_len[i]=strlen(initial.chn_name[i])+1; initial.chn_name[i][initial.name_len[i]-1]='\0'; } } else{ if((initfp=fopen(initialfilename,"r"))==NULL) { nrerror("Cannot open inital file!");} line=cvector(0,MAXLINE-1); cnt_chn=0; for(i=0;i<chainnum&&(!feof(initfp));i++) { while(!feof(initfp)) { fgets(line,MAXLINE,initfp); if(line[0]=='>') break; } for(j=1;j<strlen(line)&&line[j]!='\n';j++) { initial.chn_name[i][j-1]=line[j]; } initial.chn_name[j]='\0'; //printf("%s\n",initial.chn_name[i]); initial.name_len[i]=j; fgets(line,MAXLINE,initfp); temp=word_cnt(line); if(temp!=popnum) { nrerror("The number of initial values for selfing rates is not equal the number of subpopulation assumed!\n"); } word_split(line,initial.initd[cnt_chn],popnum); cnt_chn++; } if(cnt_chn<=chainnum) { for(i=cnt_chn;i<chainnum;i++) { for(j=0;j<popnum;j++) initial.initd[i][j]=ran1(); strcpy(initial.chn_name[i],"Chain#"); strcat(initial.chn_name[i],int_to_string(i+1)); initial.name_len[i]=strlen(initial.chn_name[i])+1; initial.chn_name[i][initial.name_len[i]-1]='\0'; } } if(cnt_chn>chainnum) { nrerror("The number of chain starting points is greater than the number of chains!\n"); } fclose(initfp); free_cvector(line,0,MAXLINE-1); } /*for(i=0;i<chainnum;i++) { for(j=0;j<popnum;j++) fprintf(stdout,"%f\t",initial.initd[i][j]); fprintf(stdout,"\n"); }*/ return(initial); }