Ejemplo n.º 1
0
// find all k-tuples. overlapping ones within w discarded
void count_k_tuples(char **seq,char **rseq,int numSeq,int *seqLen,
   char **kmer,int numKer,int kmerLen,int *kmerCn) {

   register int i,j,k,m,l;
   int used,slide,numUniq;
   int *uniq,*id;
   char *s1,*s2;

   s1=alloc_char(kmerLen+1);
   s2=alloc_char(kmerLen+1);
   uniq=alloc_int(2*kmerLen);
   id=alloc_int(2*kmerLen);

   for (m=0; m<numKer; m++) kmerCn[m]=0;
   for (i=0; i<numSeq; i++) {

      for (m=0; m<2*kmerLen; m++) id[m]=-1;
      slide=0;
      for (j=0; j<seqLen[i]-kmerLen+1; j++) {
         for (k=0; k<kmerLen; k++)  s1[k]=seq[i][j+k];  s1[k]='\0';
         for (k=0; k<kmerLen; k++)  s2[k]=rseq[i][seqLen[i]-kmerLen-j+k];  s2[k]='\0';

         for (m=0; m<numKer; m++) {
            if (strncmp(s1,kmer[m],kmerLen)==0) { id[slide]=m; break; } 
         }
         slide++;

         for (m=0; m<numKer; m++) {
            if (strncmp(s2,kmer[m],kmerLen)==0) { id[slide]=m; break; } 
         }
         slide++;

         if ((slide==2*kmerLen) || (j==seqLen[i]-kmerLen)) {
            for (l=0; l<2*kmerLen; l++) uniq[l]=-2;

            numUniq=0;
            for (k=0; k<2*kmerLen; k++) {

               used=0;
               for (l=0; l<numUniq; l++) {
                  if (id[k]==uniq[l]) { used=1; break; } 
               }

               if (!used && id[k]!=-1) {
                  uniq[numUniq]=id[k]; numUniq++; 
               }
            }
            for (l=0; l<numUniq; l++)  (kmerCn[uniq[l]])++;
            slide=0;
            for (m=0; m<2*kmerLen; m++) id[m]=-1;
         }
      }
   }
   if (s1)   { free(s1);   s1=NULL;   }
   if (s2)   { free(s2);   s2=NULL;   }
   if (id)   { free(id);   id=NULL;   }
   if (uniq) { free(uniq); uniq=NULL; }
}
Ejemplo n.º 2
0
static void insert_node(struct node **pn, char c)
{
    struct node *n = malloc(sizeof *n);
    if (!n)
        abort();

    n->data = (c) ? alloc_char(c) : NULL;
    n->next = *pn;
    *pn = n;
}
Ejemplo n.º 3
0
Ktuples *alloc_ktuples(int numKmer,int kmerLen) {

   int i;
   Ktuples *tmp=NULL;

   tmp=(Ktuples *)calloc(numKmer,sizeof(Ktuples));

   for (i=0; i<numKmer; i++) tmp[i].seq=alloc_char(kmerLen+1);
   return (tmp);   
}
Ejemplo n.º 4
0
char* convertRString2Char(SEXP rstring)
{
  char* charArray;
  int rstringlength=length(rstring);
  //int i=0;
  int lengthcharArray=0;

  charArray=alloc_char(rstringlength+1);

  strcpy(charArray, CHAR(STRING_ELT(rstring, 0)));

  lengthcharArray = (int)(strlen(charArray)); 
  charArray[lengthcharArray]='\0';

  return charArray;
}
Ejemplo n.º 5
0
void print_bed(Sites *site,int nsites,char **geneID,int *seqLen,int pwmLen,int id) {

   FILE *f1;
   char *fileName,*s1,*chr;
   int s,e,e2,len,start;
   register int i,j,k;

   s1=alloc_char(20);
   chr=alloc_char(20);
   fileName=alloc_char(500);
   /*Rprintf("%d.bed",id);*/
   /*f1=fopen(fileName,"w");*/
   for (i=0; i<nsites; i++) {
      len=strlen(geneID[site[i].seq]);

      s=-1; e=-1;
      for (j=0; j<len-3; j++) {
         if (geneID[site[i].seq][j]=='c' && geneID[site[i].seq][j+1]=='h' && geneID[site[i].seq][j+2]=='r') {
            s=j; break;
         }
      }
      for (j=s; j<len; j++) {
         if (geneID[site[i].seq][j]==':') {
            e=j; break;
         }
      }
      if (s!=-1 && e!=-1) {
         for (k=0,j=s; j<e; j++,k++)  chr[k]=geneID[site[i].seq][j];
         chr[k]='\0';
      }
      else {
         error("%s chr not found! %d %d\n",geneID[site[i].seq],s,e);
         /*Rprintf("%s chr not found! %d %d\n",geneID[site[i].seq],s,e); exit(0);*/
      }
      e2=-1;
      for (j=e+1; j<len; j++) {
         if (geneID[site[i].seq][j]=='-') {
            e2=j; break;
         }
      }
      if (e2!=-1) {
         for (k=0,j=e+1; j<e2; j++,k++)  s1[k]=geneID[site[i].seq][j];
         s1[k]='\0';
         start=atoi(s1);
      }
      else {
         error("start not found!\n");
         /*Rprintf("start not found!\n"); exit(0);*/
      }

      if (site[i].rev=='0') {
         if (site[i].pos>=0) Rprintf("%s\t%d\t%d\n",chr,site[i].pos+start,site[i].pos+pwmLen+start-1);
      }
      else {
         if (site[i].pos>=0) Rprintf("%s\t%d\t%d\n",chr,seqLen[site[i].seq]-site[i].pos-pwmLen+start,seqLen[site[i].seq]-site[i].pos+start-1);
      }
   }
   /*fclose(f1);*/
   if (fileName) { free(fileName); fileName=NULL; }
   if (s1)       { free(s1);       s1=NULL;       }
}
Ejemplo n.º 6
0
void print_motif(Sites *site,int nsites,char **seq,char **rseq,int *seqLen,int pwmLen,int id,double **opwm) {

   //FILE *f1;
   char *fileName;
   register int i,j;

   fileName=alloc_char(500);
   /*Rprintf("%d.seq",id);*/
  //f1=fopen(fileName,"w");
   for (i=0; i<nsites; i++) {
      if (site[i].rev=='0') {
         if (site[i].pos<0) {
            //for (j=site[i].pos; j<0; j++) Rprintf("x"); 
            for (j=0; j<pwmLen+site[i].pos; j++) {
               switch(seq[site[i].seq][j]) {
                  //case 'a': Rprintf("a"); break;
                  //case 'c': Rprintf("c"); break;
                  //case 'g': Rprintf("g"); break;
                  //case 't': Rprintf("t"); break;
                  //case 'n': Rprintf("n"); break;
                  default: break;
               }
            }
         }
         else {
            for (j=site[i].pos; j<min(seqLen[site[i].seq],site[i].pos+pwmLen); j++) {
               switch(seq[site[i].seq][j]) {
                  //case 'a': Rprintf("a"); break;
                  //case 'c': Rprintf("c"); break;
                  //case 'g': Rprintf("g"); break;
                  //case 't': Rprintf("t"); break;
                  //case 'n': Rprintf("n"); break;
                  default: break;
               }
            }
         }
         if (site[i].pos+pwmLen-seqLen[site[i].seq]>0) {
            //for (j=seqLen[site[i].seq]; j<site[i].pos+pwmLen; j++) Rprintf("x"); 
         }
         //Rprintf("\n");
      }
      else {
         if (site[i].pos<0) {
            //for (j=site[i].pos; j<0; j++) Rprintf("x"); 
            for (j=0; j<pwmLen+site[i].pos; j++) {
               switch(rseq[site[i].seq][j]) {
               //   case 'a': Rprintf("a"); break;
                //  case 'c': Rprintf("c"); break;
                //  case 'g': Rprintf("g"); break;
                //  case 't': Rprintf("t"); break;
                //  case 'n': Rprintf("n"); break;
                  default: break;
               }
            }
         }
         else {
            for (j=site[i].pos; j<min(seqLen[site[i].seq],site[i].pos+pwmLen); j++) {
               switch(rseq[site[i].seq][j]) {
                 // case 'a': Rprintf("a"); break;
                //  case 'c': Rprintf("c"); break;
                //  case 'g': Rprintf("g"); break;
                //  case 't': Rprintf("t"); break;
                //  case 'n': Rprintf("n"); break;
                  default: break;
               }
            }
         }
         if (site[i].pos+pwmLen-seqLen[site[i].seq]>0) {
            //for (j=seqLen[site[i].seq]; j<site[i].pos+pwmLen; j++) Rprintf("x"); 
         }
         //Rprintf("\n");
      }
   }
   //fclose(f1);
   if (fileName) { free(fileName); fileName=NULL; }

   // print out individual observed PWM in gadem format
   /*-----------------------------------------------------------------------
      fileName=alloc_char(500);
      Rprintf("%d.mx",id);
      f1=fopen(fileName,"w");

      Rprintf("4\t%d\n",pwmLen);
      for (i=0; i<4; i++) {
         for (j=0; j<pwmLen; j++) {
            if (j<pwmLen-1) Rprintf("%5.4f\t",opwm[j][i]);
            else Rprintf("%5.4f\n",opwm[j][i]);
         }
      }
      fclose(f1);
      if (fileName) { free(fileName); fileName=NULL; }
   }
   -----------------------------------------------------------------------*/
}
Ejemplo n.º 7
0
SEXP GADEM_Analysis(SEXP sequence,SEXP sizeSeq, SEXP accession, SEXP Rverbose,SEXP RnumWordGroup,SEXP RnumTop3mer,SEXP RnumTop4mer,SEXP RnumTop5mer,SEXP RnumGeneration,SEXP RpopulationSize, SEXP RpValue,SEXP ReValue,SEXP RextTrim,SEXP RminSpaceWidth,SEXP RmaxSpaceWidth,SEXP RuseChIPscore,SEXP RnumEM,SEXP RfEM, SEXP RwidthWt,SEXP RfullScan, SEXP RslideWinPWM,SEXP RstopCriterion,SEXP RnumBackgSets,SEXP RweightType,SEXP RbFileName,SEXP RListPWM,SEXP RminSites,SEXP RmaskR,SEXP Rnmotifs) 
{
  char *bFileName;
  
  SEXP ResultsGadem;
  SEXP RSpwm;
  PROTECT(ResultsGadem=NEW_LIST(100));  
  
  int increment=0;
  
  double testrand;
  
  //Number of sequences
  int numSeq = INTEGER_VALUE(sizeSeq);
  // const
//  char *Fastaheader[size];
  int incr=0;
  
  int longueur=length(sequence);
  int IncrementTemp=0;
  
  // basic settings/info
  int maxSeqLen,*seqLen;       		 // sequence info	
  double aveSeqLen;                      // sequence info
  char **seq,**rseq;
  int *geneID;         			 // sequence info
  char **oseq,**orseq;                   // copy of the original sequences
  char **sseq,**rsseq;                   // simulated seqs.
  double *bfreq1, *bfreq0=NULL;                // base frequencies
  double *ChIPScore;                     // chip score
  int maskR;				 // mask simple repeats before running the algorithm
    
  // pwms
  double ***pwm;                         // initial population of PWMs from spaced dyads
  int *pwmLen;                           // initial pwm lengths
  double **opwm2;                        // EM-derived PWM
  double ***opwm;                        // observed PWMs from identified sites
  double ***epwm;                        // em-optimized PWMs
  double **logepwm;                      // log(em-optimized PWM)
  int *pwmnewLen;                        // final motif length after extending to both ends
  
  // llr score distr.
  Pgfs *llrDist;                         // llr distribution from pgf method
  int llrDim;                            // llr distribution dimension
  int **ipwm;                            // integer pwm for computing llr score distribution
  
  // EM, motif, sites
  double pvalueCutoff;                   // user input, used to determine score cutoff based on ipwm
  int *scoreCutoff;                      // pwm score cutoff for the corresponding p-value cutoff
  double logev;                          // log of E-value of a motif;
  int useChIPscore;                      // indicator for using ChIP-seq score for seq. selection for EM
  int numEM;                             // number of EM steps
  double E_valueCutoff;                  // log E-value cutoff
  //int nsitesEM;                          // number of binding sites in sequences subjected to EM
  int minsitesEM;                        // minimal number of sites in a motif in EM sequences
  int *nsites;                           // number of binding sites in full data
  int minsites;                          // minimal number of sites in a motif in full data
  Sites **site;                          // binding sites in all sequences
  int motifCn;                           // number of motifs sought and found
  int extTrim;
  int noMotifFound;                      // none of the dyads in the population resulted in a motif
  char **pwmConsensus;                   // consensus sequences of motifs
  double pwmDistCutoff;                  // test statistic for motif pwm similarity
  char *uniqMotif;                       // motifs in a population unique or not
  int numUniq;                           // number of unique motifs in a population
  int slideWinPWM;                       // sliding window for comparing pwm similarity
  int widthWt;                           // window width in which nucleotides are given large weights for PWM optimization
  int fullScan;                          // scan scan on the original sequences or masked sequences
  
  // background
  int numBackgSets;
  
  // weights
  double **posWeight;                    // spatial weights
  int weightType;                        // four weight types 0, 1, 2, 3, or 4
  
  // words for spaced dyad
  Words *word;                           // top-ranked k-mers as the words for spaced dyads
  int numTop3mer,numTop4mer,numTop5mer;  // No. of top-ranked k-mers as words for dyads
  int maxWordSize;                       // max of the above three
  int numWordGroup;                      // number of non-zero k-mer groups
  int minSpaceWidth,maxSpaceWidth;       // min and max width of spacer of the spaced dyads
  Chrs **dyad;                           // initial population of "chromosomes"
  char **sdyad;                          // char of spaced dyads
  
  // GA
  int populationSize,numGeneration;      // GA parameters
  double maxpMutationRate;
  Fitness *fitness;                      // "chromosome" fitness
  Wheel *wheel;                          // roulette-wheel selection
  
  // to speed up only select a subset of sequences for EM algorithm
  double fEM;                            // percentage of sequences used in EM algorithm
  int numSeqEM;                          // number of sequences subject to EM
  char *Iseq;                            // Indicator if a sequence is used in EM or not
  int *emSeqLen;                         // length of sequences used in EM
  double *maxpFactor;
  
  int numCycle;                          // number of GADEM cycles
  int generationNoMotif;                 // maximal number of GA generations in a GADEM cycle resulted in no motifs
  
  // mis.
  //seed_t  seed;                          // random seed
  int motifCn2,id,numCycleNoMotif,verbose,minminSites,nmotifs;
  int startPWMfound,stopCriterion;
  char *mFileName,*oFileName,*pwmFileName,*tempRbFileName;
  time_t start;
  int cn[4],bcn[4],*seqCn,*bseqCn,avebnsites,avebnsiteSeq,totalSitesInput;
  int i; 
  int ii=0;
  int jjj=0;
  
  /*************/
  FILE * output = fopen("output.txt", "w"); 
  /*************/
  
  GetRNGstate();
  

  mFileName=alloc_char(500);         mFileName[0]='\0';
  oFileName=alloc_char(500);         oFileName[0]='\0';
  pwmFileName=alloc_char(500);       pwmFileName[0]='\0';
  bFileName=alloc_char(500);         bFileName[0]='\0';
  //tempRbFileName=alloc_char(500);    tempRbFileName[0]='\0';
  seq=NULL; aveSeqLen=0; maxSeqLen=0; 
  //minsites=-1; 
  
  startPWMfound=0;    

  maxSeqLen=0;
  for(incr=1;incr<longueur;incr=incr+2)
  { 
    if (length(STRING_ELT(sequence,(incr)))>maxSeqLen) maxSeqLen=length(STRING_ELT(sequence,(incr))); 
  }
//  fprintf(output,"maxLength=%d",maxSeqLen);
//  exit(0);
  seq=alloc_char_char(numSeq,maxSeqLen+1);
  for(incr=1;incr<longueur;incr=incr+2)
  { 
    for (int j=0; j<length(STRING_ELT(sequence,(incr))); j++)
    {
      seq[IncrementTemp][j]=CHAR(STRING_ELT(sequence,(incr)))[j];
    }
    IncrementTemp++;
  }
  
  
  verbose=LOGICAL_VALUE(Rverbose);
  numWordGroup=INTEGER_VALUE(RnumWordGroup);
  minsites=INTEGER_VALUE(RminSites);
  numTop3mer=INTEGER_VALUE(RnumTop3mer);
  numTop4mer=INTEGER_VALUE(RnumTop4mer);
  numTop5mer=INTEGER_VALUE(RnumTop5mer);
  numGeneration=INTEGER_VALUE(RnumGeneration);
  populationSize=INTEGER_VALUE(RpopulationSize);
  pvalueCutoff=NUMERIC_VALUE(RpValue);
  E_valueCutoff=NUMERIC_VALUE(ReValue);
  extTrim=INTEGER_VALUE(RextTrim);
  minSpaceWidth=INTEGER_VALUE(RminSpaceWidth);
  maxSpaceWidth=INTEGER_VALUE(RmaxSpaceWidth);
  useChIPscore=NUMERIC_VALUE(RuseChIPscore);
  numEM=INTEGER_VALUE(RnumEM);
  fEM=NUMERIC_VALUE(RfEM);
  widthWt=INTEGER_VALUE(RwidthWt);
  fullScan=INTEGER_VALUE(RfullScan);
  slideWinPWM=INTEGER_VALUE(RslideWinPWM);
  numUniq=populationSize;
  stopCriterion=INTEGER_VALUE(RstopCriterion);  
  numBackgSets=INTEGER_VALUE(RnumBackgSets);
  weightType=NUMERIC_VALUE(RweightType);
  //const char *tempRbFileName[1];

 	tempRbFileName = convertRString2Char(RbFileName);	

  //tempRbFileName[0]=CHAR(STRING_ELT(RbFileName,0));
  nmotifs = INTEGER_VALUE(Rnmotifs);
  maskR = INTEGER_VALUE(RmaskR);

  

  if(numSeq>MAX_NUM_SEQ)
  {
    error("Error: maximal number of seqences reached!\nPlease reset MAX_NUM_SEQ in gadem.h and rebuild (see installation)\n");
  }
  
  strcpy(bFileName,tempRbFileName);

  ChIPScore=alloc_double(MAX_NUM_SEQ);
  seqLen=alloc_int(MAX_NUM_SEQ); 
  geneID=alloc_int(MAX_NUM_SEQ);

//  seq=sequences;
  
//  numSeq=size;
  int len; 
  
  for (i=0; i<numSeq; i++)
  {
    len=strlen(seq[i]); 
    seqLen[i]=len;
    geneID[i]=INTEGER(accession)[i];
  }

  aveSeqLen=0; 
  for (i=0; i<numSeq; i++) aveSeqLen +=seqLen[i]; aveSeqLen /=(double)numSeq;
  
  for (i=0; i<numSeq; i++) {
    if (seqLen[i]>maxSeqLen) maxSeqLen=seqLen[i]; 
  }
  
  rseq=alloc_char_char(numSeq,maxSeqLen+1);
  oseq=alloc_char_char(numSeq,maxSeqLen+1);
  orseq=alloc_char_char(numSeq,maxSeqLen+1);
  
  for (i=0; i<numSeq; i++)
  {
    if(seqLen[i]>maxSeqLen) maxSeqLen=seqLen[i]; 
  }
  
  reverse_seq(seq,rseq,numSeq,seqLen);
  
  // make a copy of the original sequences both strands
  for (i=0; i<numSeq; i++)
  {
    for (int j=0; j<seqLen[i]; j++)
    {
      oseq[i][j]=seq[i][j];
      orseq[i][j]=rseq[i][j];
    }
    oseq[i][seqLen[i]]='\0'; orseq[i][seqLen[i]]='\0'; 
  }
    
  if (strcmp(bFileName,"NULL")!= 0)
  {
    bfreq0=alloc_double(5);
    read_background(bFileName,bfreq0);
  }

  if (GET_LENGTH(RListPWM)!= 0)
  {
    startPWMfound=1; 
  }
  else { }
  
    // check for input parameters
  if(numGeneration<1)
  { 
    error("number of generaton < 1.\n");
  }
  if(populationSize<1)
  {
    error("population size < 1.\n");
  }
  if (minSpaceWidth<0)
  { 
    error("minimal number of unspecified bases in spaced dyads <0.\n"); 
  }
  if (maxSpaceWidth<0)
  { 
    error("maximal number of unspecified bases in spaced dyads <0.\n"); 
  }
  if (minSpaceWidth>maxSpaceWidth)
  {
    error("mingap setting must <= to maxgap setting.\n\n"); 
  }
  if (maxSpaceWidth+12>MAX_PWM_LENGTH)
  {
    error("maxgap setting plus word lengths exceed <MAX_PWM_LENGTH>.\n");
  }
  if (numEM<0)
  {
    error("number of EM steps is zero.\n");
  }
  if (numEM==0)
  {
    error("number of EM steps = 0, no EM optimization is carried out.\n");
  }
  
  if (fullScan!=0 && fullScan!=1)
    fullScan=0;
  
  
  maxWordSize=0;
  if (numTop3mer>maxWordSize) maxWordSize=numTop3mer;
  if (numTop4mer>maxWordSize) maxWordSize=numTop4mer;
  if (numTop5mer>maxWordSize) maxWordSize=numTop5mer;
  
    // any one, two or three: tetramer, pentamer, hexamer
  if (numTop3mer==0 && numTop4mer==0 && numTop5mer==0)
  {
    error("maxw3, maxw4, and maxw5 all zero - no words for spaced dyads.\n");
  }
  
  // if (startPWMfound && fEM!=0.5 && fEM!=1.0 & verbose)
  // {
  //   warning("fEM argument is ignored in a seeded analysis\n");
  // }
  
  if (startPWMfound)
  {
    // if(verbose)
    // {
    //   if (populationSize!=10 && populationSize!=100) warning("pop argument is ignored in a seeded analysis, -pop is set to 10.\n");
    //   if (numGeneration!=1 && numGeneration!=5)      warning("gen argument is ignored in a seeded analysis, -gen is set to 1.\n");
    // }
    fEM=1.0;
    populationSize=FIXED_POPULATION; numGeneration=1; 
  }
  
    // number of sequences for EM
  if (fEM>1.0 || fEM<=0.0)
  { 
    error("The fraction of sequences subject to EM is %3.2f.\n",fEM);
  } 
  numSeqEM=(int)(fEM*numSeq);
  


  // memory callocations
  Iseq  =alloc_char(numSeq+1); 
  opwm2 =alloc_double_double(MAX_PWM_LENGTH,4);
  ipwm  =alloc_int_int(MAX_PWM_LENGTH,4);
  logepwm=alloc_double_double(MAX_PWM_LENGTH,4);
  emSeqLen=alloc_int(numSeqEM);
  scoreCutoff=alloc_int(1000);
  // scoreCutoff=alloc_int(populationSize);
  llrDist=alloc_distr(MAX_DIMENSION);
  posWeight=alloc_double_double(numSeq,maxSeqLen);
  sseq=alloc_char_char(MAX_NUM_SEQ,maxSeqLen+1);
  rsseq=alloc_char_char(MAX_NUM_SEQ,maxSeqLen+1);
  bfreq1=base_frequency(numSeq,seq,seqLen);

  if (strcmp(bFileName,"NULL") == 0)
  {
    bfreq0=alloc_double(5);
    for (i=0; i<4; i++)
      {
	bfreq0[i]=bfreq1[i];
      }
  }
  

  // if minN not specified, set the defaults accordingly
  if (minsites==-1) 
  {
    minsites =max(2,(int)(numSeq/20)); 
  }
  minsitesEM=(int)(fEM*minsites);
  
  maxpMutationRate=MAXP_MUTATION_RATE;
  
  // determine the distribution and critical cut point
  pwmDistCutoff=vector_similarity();
  
  /*---------- select a subset of sequences for EM only --------------*/
  if (useChIPscore==1)
  {
    select_high_scoring_seq_for_EM (ChIPScore,numSeq,numSeqEM,Iseq,fEM);
  }
  else
  {
    sample_without_replacement(Iseq,numSeqEM,numSeq);
  }
  /*-------------------- end of selection --------------------------*/
  
  if (maskR==1) mask_repetitive(geneID,seq,numSeq,seqLen,mFileName);

  if (widthWt<20)
  {
    warning("The window width of sequence centered on the nucleotides having large weights in EM for PWM optimization is small\n Motif longer than %d will not be discovered\n",widthWt);
  }
  
  time(&start);
  
    // if (weightType==1 || weightType==3) 
    //ffprintf(output,fp,"window width of sequence centered on the nucleotides having large weights for PWM optimization: %d\n",widthWt);
    //ffprintf(output,fp,"pwm score p-value cutoff for declaring binding site:\t%e\n",pvalueCutoff);
  
  if(verbose)
  {
    ffprintf(output,output,"==============================================================================================\n");
    ffprintf(output,output,"input sequence file:  %s\n",mFileName);
    fprintf(output,"number of sequences and average length:\t\t\t\t%d %5.1f\n",numSeq,aveSeqLen);
    
    fprintf(output,"Use pgf method to approximate llr null distribution\n");
    fprintf(output,"parameters estimated from sequences in:  %s\n\n",mFileName);

    if (weightType!=0) 
      fprintf(output,"non-uniform weight applies to each sequence - type:\t\t%d\n",weightType);
    fprintf(output,"number of GA generations & population size:\t\t\t%d %d\n\n",numGeneration,populationSize);
    fprintf(output,"PWM score p-value cutoff for binding site declaration:\t\t%e\n",pvalueCutoff);
    fprintf(output,"ln(E-value) cutoff for motif declaration:\t\t\t%f\n\n",E_valueCutoff);
//    fprintf(output,"number (percentage) of sequences selected for EM:\t\t%d(%4.1f\%)\n",numSeqEM,100.0*(double)numSeqEM/(double)numSeq);
    fprintf(output,"number of EM steps:\t\t\t\t\t\t%d\n",numEM);
    fprintf(output,"minimal no. sites considered for a motif:\t\t\t%d\n\n",minsites);
    fprintf(output,"[a,c,g,t] frequencies in input data:\t\t\t\t%f %f %f %f\n",bfreq1[0],bfreq1[1],bfreq1[2],bfreq1[3]);
    fprintf(output,"==============================================================================================\n");
  }
  
  // if (pgf) 
  // {
  //   if (userMarkovOrder!=0 & verbose) 
  //   {
  //     warning("The user-specified background Markov order (%d) is ignored when -pgf is set to 1\n",userMarkovOrder);
  //   }
  //   if (bFileName[0]!='\0' & verbose)
  //   {
  //     warning("The user-specified background models: %s are not used when -pgf is set to 1\n",bFileName);
  //   }
  // }
  // if (startPWMfound && fEM!=1.0  & verbose)
  // {
  //   warning("fEM argument is ignored in a seeded analysis\n");
  // }
  
    // determine seq length by counting only [a,c,g,t], seqLen is used in E-value calculation
    // determine the distribution and critical cut point
  pwmDistCutoff=vector_similarity();
  
  if      (weightType==0) assign_weight_uniform(seqLen,numSeq,posWeight);
  else if (weightType==1) assign_weight_triangular(seqLen,numSeq,posWeight);
  else if (weightType==2) assign_weight_normal(seqLen,numSeq,posWeight);
  else
  {
    error("Motif prior probability type not found - please choose: 0, 1, or 2\n");
    // fprintf(output,"Consider: -posWt 1 for strong central enrichment as in ChIP-seq\n");
    // fprintf(output,"          -posWt 0 for others\n\n");
    // exit(0);
  }
  /*    if (startPWMfound) minminSites=minsites;
   else               minminSites=(int)(0.40*minsitesEM);*/
  
  motifCn=0; noMotifFound=0; numCycle=0; numCycleNoMotif=0; 
  int compt=0;
  int lengthList=GET_LENGTH(RListPWM);
 
    /****************************************/ 
    broadcastOnce(maxSeqLen, numEM, startPWMfound, minminSites, maxpFactor, numSeq, numSeqEM, Iseq, bfreq0, posWeight, weightType, pvalueCutoff, emSeqLen, populationSize);
    /****************************************/ 

  do
  {
    if(!startPWMfound)
    {
      
      if(verbose)
      {
        fprintf(output,"*** Running an unseeded analysis ***\n");
        // fprintf(output,"\n|------------------------------------------------------------------|\n");
        // fprintf(output,"|                                                                  |\n");
        // fprintf(output,"|              *** Running an unseeded analysis ***                |\n");
        // fprintf(output,"|                                                                  |\n");
        // fprintf(output,"|------------------------------------------------------------------|\n\n");
      }
      populationSize=INTEGER_VALUE(RpopulationSize);
      numGeneration=INTEGER_VALUE(RnumGeneration);
      dyad  =alloc_chrs(populationSize,4);
      wheel =alloc_wheel(populationSize);
      fitness=alloc_fitness(populationSize);
      maxpFactor=alloc_double(populationSize);
      uniqMotif=alloc_char(populationSize+1);
      opwm  =alloc_double_double_double(populationSize,MAX_PWM_LENGTH,4);
      epwm=alloc_double_double_double(populationSize,MAX_PWM_LENGTH,4);
      pwmConsensus=alloc_char_char(populationSize,MAX_PWM_LENGTH+1);
      pwm   =alloc_double_double_double(populationSize,MAX_PWM_LENGTH,4);
      pwmLen=alloc_int(populationSize);
      sdyad =alloc_char_char(populationSize,MAX_PWM_LENGTH+1);
      word  =alloc_word(numWordGroup,maxWordSize);
      minminSites=(int)(0.40*minsitesEM);

        // identify top-ranked k-mers (k=3,4,5) for spaced dyads
      if(verbose)
        fprintf(output,"GADEM cycle %2d: enumerate and count k-mers... ",numCycle+1);
        
      numWordGroup=word_for_dyad(word,seq,rseq,numSeq,seqLen,bfreq1,&numTop3mer,&numTop4mer,&numTop5mer);
      
      if(verbose)
        fprintf(output,"Done.\n");
      
        // generating a "population" of spaced dyads
      if(verbose)
        fprintf(output,"Initializing GA... ");

      initialisation(dyad,populationSize,numWordGroup,word,minSpaceWidth,maxSpaceWidth,maxpFactor);
      if(verbose)
        fprintf(output,"Done.\n");
      
    }
    else
    {
      if(verbose)
      {
        fprintf(output,"*** Running an seeded analysis ***\n");
        // fprintf(output,"\n|------------------------------------------------------------------|\n");
        // fprintf(output,"|                                                                  |\n");
        // fprintf(output,"|               *** Running a seeded analysis ***                  |\n");
        // fprintf(output,"|                                                                  |\n");
        // fprintf(output,"|------------------------------------------------------------------|\n\n");
      }
      populationSize=FIXED_POPULATION; 
      dyad  =alloc_chrs(populationSize,4);
      pwm=alloc_double_double_double(populationSize,MAX_PWM_LENGTH,4);
      pwmLen=alloc_int(populationSize);
      maxpFactor=alloc_double(populationSize);
      uniqMotif=alloc_char(populationSize+1);
      opwm  =alloc_double_double_double(populationSize,MAX_PWM_LENGTH,4);
      epwm=alloc_double_double_double(populationSize,MAX_PWM_LENGTH,4);
      pwmConsensus=alloc_char_char(populationSize,MAX_PWM_LENGTH+1);
      sdyad =alloc_char_char(populationSize,MAX_PWM_LENGTH+1);
      word  =alloc_word(numWordGroup,maxWordSize);
      wheel =alloc_wheel(populationSize);
      fitness=alloc_fitness(populationSize);
      minminSites=minsites;
      int lengthMatrix;
      
      lengthMatrix=GET_LENGTH(VECTOR_ELT(RListPWM,compt));
      RSpwm=allocMatrix(REALSXP,4,(lengthMatrix/4));
      RSpwm=VECTOR_ELT(RListPWM,compt);
      
      
      pwmLen[0]=read_pwm0(RSpwm,pwm[0],lengthMatrix);
      
      for(i=1; i<populationSize; i++)
      {
        for (int j=0; j<pwmLen[0]; j++)
        {
          for (int k=0; k<4; k++)
          {
            pwm[i][j][k]=pwm[0][j][k];
          }
        }
        pwmLen[i]=pwmLen[0];
      }
      for (i=0; i<populationSize; i++)
      {
        maxpFactor[i]=FIXED_MAXPF*(i+1);
        standardize_pwm(pwm[i],pwmLen[i]);
        consensus_pwm(pwm[i],pwmLen[i],pwmConsensus[i]);
        strcpy(sdyad[i],pwmConsensus[i]);
      }
    }
    generationNoMotif=0;
    
    for (jjj=0; jjj<numGeneration; jjj++)
    {
        // convert spaced dyads to letter probability matrix
      if (!startPWMfound)
      {
        dyad_to_pwm(word,populationSize,dyad,pwm,pwmLen);
      }

    /*
      DO_APPLY(populationCalculation(maxSeqLen, numEM, fitness+ii, 
                                     startPWMfound, minminSites, maxpFactor[ii], 
                                     numSeq, numSeqEM, seq, rseq, seqLen, Iseq, 
                                     bfreq0, posWeight, weightType, 
                                     pvalueCutoff, emSeqLen, 
                                     pwm[ii], pwmLen[ii], epwm[ii], opwm[ii], 
                                     pwmConsensus[ii], scoreCutoff+ii, sdyad[ii], ii),
               populationSize, ii);
    */
      
     /* Create the structure to send to all the other slaves  */ 
      
      broadcastEveryCycle(Iseq, pwm, pwmLen, pwmConsensus, scoreCutoff, sdyad, populationSize);

      populationCalculation(maxSeqLen, numEM, fitness+ii, 
                                     startPWMfound, minminSites, maxpFactor[ii], 
                                     numSeq, numSeqEM, seq, rseq, seqLen, Iseq, 
                                     bfreq0, posWeight, weightType, 
                                     pvalueCutoff, emSeqLen, 
                                     pwm[ii], pwmLen[ii], epwm[ii], opwm[ii], 
                                     pwmConsensus[ii], scoreCutoff+ii, sdyad[ii], ii);

    /* Receive the analyzed data from all the other slaves and compile them */
    //getPopCalcResults(...);


      // for (i=0; i<5; i++)
      // {
      //   fprintf(output,"fitness.value=%lf\n",fitness[i].value);
      //   fprintf(output,"fitness.index=%d\n",fitness[i].index);
      //   fprintf(output,"maxpfactor=%lf\n",maxpFactor[i]);
      //   fprintf(output,"scoreCutoff=%d\n",scoreCutoff[i]);
      //   fprintf(output,"   spacedDyad: %s\n",sdyad[i]);
      //   
      //   for (l=0; l<pwmLen[i]; l++)
      //   {
      //     for (m=0; m<4; m++) 
      //     { 
      //       fprintf(output,"opwm[%d][%d][%d]=%lf ",i,l,m,opwm[i][l][m]);
      //       fprintf(output,"epwm[%d][%d][%d]=%lf ",i,l,m,epwm[i][l][m]);
      //       fprintf(output,"pwm[%d][%d][%d]=%lf ",i,l,m,pwm[i][l][m]);
      //     }
      //     fprintf(output,"\n");
      //   }
      //   fprintf(output,"\n");
      // }
      // 
      // testrand=runif(0,1);
      // fprintf(output,"testrand1=%lf\n",testrand);
      
      if (populationSize>1)
      {
        sort_fitness(fitness,populationSize);
      }



      // for (i=0; i<5; i++)
      // {
      //   fprintf(output,"fitness.value=%lf\n",fitness[i].value);
      //   fprintf(output,"fitness.index=%d\n",fitness[i].index);
      // }
      numUniq=check_pwm_uniqueness_dist(opwm, pwmLen,
                                        populationSize, fitness,
                                        pwmDistCutoff, E_valueCutoff,
                                        uniqMotif, slideWinPWM);


      // for (i=0; i<5; i++)
      // {
      //   fprintf(output,"fitness.value=%lf\n",fitness[i].value);
      //   fprintf(output,"fitness.index=%d\n",fitness[i].index);
      //   fprintf(output,"maxpfactor=%lf\n",maxpFactor[i]);
      //   fprintf(output,"scoreCutoff=%d\n",scoreCutoff[i]);
      //   fprintf(output,"   spacedDyad: %s\n",sdyad[i]);
      //   
      //   for (l=0; l<pwmLen[i]; l++)
      //   {
      //     for (m=0; m<4; m++) 
      //     { 
      //       fprintf(output,"opwm[%d][%d][%d]=%lf",i,l,m,opwm[i][l][m]); 
      //     }
      //     fprintf(output,"\n");
      //   }
      //   fprintf(output,"\n");
      // }
      
      if(verbose)
      {
        fprintf(output,"GADEM cycle[%3d] generation[%3d] number of unique motif: %d\n",numCycle+1,jjj+1,numUniq);
        for (i=0; i<populationSize; i++)
        {
          if (uniqMotif[i]=='1')
          {
            fprintf(output,"   spacedDyad: %s ",sdyad[fitness[i].index]);
            for (int j=strlen(sdyad[fitness[i].index]); j<maxSpaceWidth+10; j++) fprintf(output," ");
            fprintf(output,"motifConsensus: %s ",pwmConsensus[fitness[i].index]);
            for (int j=strlen(sdyad[fitness[i].index]); j<maxSpaceWidth+10; j++) fprintf(output," ");
            fprintf(output," %3.2f fitness: %7.2f\n",maxpFactor[fitness[i].index],fitness[i].value);
          }
        }
        fprintf(output,"\n");
      }


      if (jjj<numGeneration-1)
      {
        // fitness based selection with replacement
        roulett_wheel_fitness(fitness,populationSize,wheel);
        // mutation and crossover operations
        if (populationSize>1)
        {
          testrand=runif(0,1);
          if (testrand>=0.5)
          {
            mutation(dyad,numWordGroup,word,minSpaceWidth,maxSpaceWidth,wheel,populationSize,fitness,uniqMotif,
                      maxpFactor,maxpMutationRate); 
          }
          else
          {
            crossover(dyad,numWordGroup,word,minSpaceWidth,maxSpaceWidth,wheel,populationSize,fitness,uniqMotif, maxpFactor,maxpMutationRate); 
          }
        }
        else
        {
          mutation(dyad,numWordGroup,word,minSpaceWidth,maxSpaceWidth,wheel,populationSize,fitness,uniqMotif, maxpFactor,maxpMutationRate);
        }
      }
    }

    if((numCycle+1)< lengthList)
    {
      compt++;
    }
    else
    {
      startPWMfound=0;
    }
    numCycle++;


    site=alloc_site_site(numUniq+1,MAX_SITES);
    nsites=alloc_int(numUniq+1);
    pwmnewLen=alloc_int(numUniq+1); // after base extension and trimming
    seqCn=alloc_int(MAX_NUM_SEQ);
    bseqCn=alloc_int(MAX_NUM_SEQ);

    // final step user-specified background model is used
    motifCn2=0; // motifCn per GADEM cycle
    for (ii=0; ii<populationSize; ii++) 
    {

      id=fitness[ii].index;
      if(uniqMotif[ii]=='0')
      {
        continue;
      }


      // approximate the exact llr distribution using Staden's method
      // if(verbose)
      // {
      //   fprintf(output,"Approximate the exact pwm llr score distribution using the pgf method.\n");
      // }
      log_ratio_to_int(epwm[id],ipwm,pwmLen[id],bfreq0);

        // compute score distribution of the (int)PWM using Staden's method
      llrDim=pwm_score_dist(ipwm,pwmLen[id],llrDist,bfreq0);

        //fprintf(output,"Avant ScoreCutoff %d \n",scoreCutoff[id]);
      scoreCutoff[id]=determine_cutoff(llrDist,llrDim,pvalueCutoff);
        //fprintf(output,"Apres ScoreCutoff %d \n",scoreCutoff[id]);
        
      if(fullScan)
      {
        nsites[motifCn2]=scan_llr_pgf(llrDist,llrDim,site[motifCn2],numSeq,oseq,orseq,seqLen,ipwm,pwmLen[id],scoreCutoff[id],bfreq0);
      }
      else
      {
        nsites[motifCn2]=scan_llr_pgf(llrDist,llrDim,site[motifCn2],numSeq,seq,rseq,seqLen,ipwm,pwmLen[id],scoreCutoff[id],bfreq0);
      }
      if (nsites[motifCn2]>=max(2,minsites))
      {
      for (int j=0; j<numSeq; j++) seqCn[j]=0;
        for (int j=0; j<nsites[motifCn2]; j++) seqCn[site[motifCn2][j].seq]++;
        
        for (int j=0; j<4; j++) cn[j]=0;
        for (int j=0; j<numSeq; j++)
        {
          if (seqCn[j]==0) cn[0]++;
          if (seqCn[j]==1) cn[1]++;
          if (seqCn[j]==2) cn[2]++;
          if (seqCn[j]>2)  cn[3]++;
        }
        totalSitesInput=nsites[motifCn2];
        if (extTrim)
        {
          if (fullScan)
          {
            extend_alignment(site[motifCn2],numSeq,oseq,orseq,seqLen,nsites[motifCn2],pwmLen[id],&(pwmnewLen[motifCn2]));
          }
          else
          {
            extend_alignment(site[motifCn2],numSeq,seq,rseq,seqLen,nsites[motifCn2],pwmLen[id],&(pwmnewLen[motifCn2]));
          }
        }
        else
        { 
          pwmnewLen[motifCn2]=pwmLen[id];
        } 

        if (fullScan)
        {
          align_sites_count(site[motifCn2],oseq,orseq,nsites[motifCn2],pwmnewLen[motifCn2],opwm2);
        }
        else
        {
          align_sites_count(site[motifCn2],seq,rseq,nsites[motifCn2],pwmnewLen[motifCn2],opwm2);
        }
        standardize_pwm(opwm2,pwmnewLen[motifCn2]);
        logev=E_value(opwm2,nsites[motifCn2],bfreq0,pwmnewLen[motifCn2],numSeq,seqLen);

        if (logev<=E_valueCutoff)
        {
          consensus_pwm(opwm2,pwmnewLen[motifCn2],pwmConsensus[id]);
          if (fullScan)
          {
            SET_VECTOR_ELT(ResultsGadem,increment,print_result_R(site[motifCn2],nsites[motifCn2],numSeq,oseq,orseq,seqLen,logev,opwm2,pwmnewLen[motifCn2],motifCn+1,sdyad[id],pwmConsensus[id],numCycle,pvalueCutoff,maxpFactor[id],geneID));
            increment++;           
            print_motif(site[motifCn2],nsites[motifCn2],oseq,orseq,seqLen,pwmnewLen[motifCn2],motifCn+1,opwm2);
          }
          else
          {
            SET_VECTOR_ELT(ResultsGadem,increment,print_result_R(site[motifCn2],nsites[motifCn2],numSeq,seq,rseq,seqLen,logev,opwm2,pwmnewLen[motifCn2],
                                                                 motifCn+1,sdyad[id],pwmConsensus[id],numCycle,pvalueCutoff,maxpFactor[id],geneID));
            increment++;
            print_motif(site[motifCn2],nsites[motifCn2],seq,rseq,seqLen,pwmnewLen[motifCn2],motifCn+1,opwm2);
          }

          mask_sites(nsites[motifCn2],seq,rseq,seqLen,site[motifCn2],pwmnewLen[motifCn2]);

          /* ----------------------compute the average number of sites in background sequences ----------------------*/
          avebnsites=0; avebnsiteSeq=0;
          for (i=0; i<numBackgSets; i++)
          {
            simulate_background_seq(bfreq0,numSeq,seqLen,sseq);
            reverse_seq(sseq,rsseq,numSeq,seqLen);

            nsites[motifCn2]=scan_llr_pgf(llrDist,llrDim,site[motifCn2],numSeq,sseq,rsseq,seqLen,ipwm,pwmLen[id],scoreCutoff[id],bfreq0);
            
            for (int j=0; j<numSeq; j++) bseqCn[j]=0;
            for (int j=0; j<nsites[motifCn2]; j++) bseqCn[site[motifCn2][j].seq]++;
            
            for (int j=0; j<4; j++) bcn[j]=0;
            for (int j=0; j<numSeq; j++)
            {
              if (bseqCn[j]==0) bcn[0]++;
              if (bseqCn[j]==1) bcn[1]++;
              if (bseqCn[j]==2) bcn[2]++;
              if (bseqCn[j]>2)  bcn[3]++;
            }
              //ffprintf(output,fq,"background set[%2d] Seqs with 0,1,2,>2 sites: %d %d %d %d\n",i+1,bcn[0],bcn[1],bcn[2],bcn[3]);
            avebnsites+=nsites[motifCn2]; avebnsiteSeq+=(numSeq-bcn[0]);
          } 
          avebnsites/=numBackgSets; avebnsiteSeq/=numBackgSets;
          /* -----------------end compute the average number of sites in background sequences ----------------------*/
          motifCn++; motifCn2++; 

			//if((numCycle+1) > lengthList & fixSeeded)
			//	{	
			//	  numCycleNoMotif=1;
			//		startPWMfound=1;
			//		} else {
					numCycleNoMotif=0;
			//	}

        }
      }
    }
    
    /* for (int i=0; i<motifCn2; i++)
    {
      mask_sites(nsites[i],seq,rseq,seqLen,site[i],pwmnewLen[i]); 
    } */
    
    if (site[0])
    { 
      free(site[0]);
      site[0]=NULL;
    }
    if (site)
    {
      free(site);
      site=NULL;
    }
    if (nsites)
    {
      free(nsites);
      nsites=NULL;
    }
    if (pwmnewLen) 
    {
      free(pwmnewLen);
      pwmnewLen=NULL;
    }
    
    if (motifCn2==0)
      numCycleNoMotif++;   
    if (motifCn==nmotifs)
      {
	fprintf(output,"Maximal number of motifs (%d) reached\n",nmotifs);
	break;
      }
    if (numCycleNoMotif==stopCriterion)
      noMotifFound=1;
  }while (!noMotifFound);
  
  
    // fclose(fp);
  /*if (!startPWMfound) {  
   if (dyad[0])      { free(dyad[0]);         dyad[0]=NULL;    }
   if (dyad)         { free(dyad);            dyad=NULL;       }
   }*/
  if (seqLen)
  { 
    free(seqLen);
    seqLen=NULL;
  }
  if (pwm[0][0])       
  {
    free(pwm[0][0]);
    pwm[0][0]=NULL; 
  }
  if (pwm[0])
  { 
    free(pwm[0]);
    pwm[0]=NULL;     
  }
  if (pwm)             
  {
    free(pwm); 
    pwm=NULL;        
  }
  if (opwm2[0])  
  { 
    free(opwm2[0]); 
    opwm2[0]=NULL;
  }
  if (opwm2)     
  {
    free(opwm2); 
    opwm2=NULL;
  }
  if (opwm[0][0])      
  { 
    free(opwm[0][0]);
    opwm[0][0]=NULL;
  }
  if (opwm[0])    
  {
    free(opwm[0]);
    opwm[0]=NULL;
  }
  if (opwm)       
  {
    free(opwm);
    opwm=NULL;
  }
  if(ipwm[0])
  { 
    free(ipwm[0]);     
    ipwm[0]=NULL;  
  }
  if (ipwm)
  {
    free(ipwm);   
    ipwm=NULL;
  }
  if (pwmLen)   
  { 
    free(pwmLen);    
    pwmLen=NULL; 
  }
  if (seq[0])          { free(seq[0]);          seq[0]=NULL;     }
  if (seq)             { free(seq);             seq=NULL;        }
    //  if (rseq[0])         { free(rseq[0]);         rseq[0]=NULL;    }
    // if (rseq)            { free(rseq);            rseq=NULL;       }
    // if (oseq[0])         { free(oseq[0]);         oseq[0]=NULL;    }
    // if (oseq)            { free(oseq);            oseq=NULL;       }
    // if (orseq[0])        { free(orseq[0]);        orseq[0]=NULL;   }
    // if (orseq)           { free(orseq);           orseq=NULL;      }
  if (bfreq1)    
  { 
    free(bfreq1);    
    bfreq1=NULL;  
  }
  if (bfreq0)
  {
    free(bfreq0);
    bfreq0=NULL;
  }

  if (wheel)    
  { 
    free(wheel);    
    wheel=NULL;    
  }
  if (fitness)    
  { 
    free(fitness); 
    fitness=NULL;
  }
  if (mFileName)  
  { 
    free(mFileName);    
    mFileName=NULL; 
  }
  if (oFileName)    
  { 
    free(oFileName);  
    oFileName=NULL;
  }
  if (pwmFileName)    
  {
    free(pwmFileName);
    pwmFileName=NULL;
  }
  if (sdyad[0]) 
  { 
    free(sdyad[0]); 
    sdyad[0]=NULL;
  }
  if (sdyad)    
  {
    free(sdyad);
    sdyad=NULL;
  }
  if (pwmConsensus[0])
  { 
    free(pwmConsensus[0]);
    pwmConsensus[0]=NULL;
  }
  if (pwmConsensus)   
  {
    free(pwmConsensus);
    pwmConsensus=NULL;
  }
  //if (!startPWMfound && word) destroy_word(word,numWordGroup);

  PutRNGstate();
  UNPROTECT(1);
  return(ResultsGadem);
}
Ejemplo n.º 8
0
void read_background(char *filename,double *bfreq) {

   FILE *fp;
   char *buffer,*tok,letter[2];
   int i,len,numTab;
   double sum;

   fp=fopen(filename,"r");
   if (!fp)
      { 
         error("Incorrect filename for background model\n"); 
      } 

   buffer=alloc_char(250);

   for (i=0; i<4; i++) bfreq[i]=-1;

   while (!feof(fp)) {
      if (fgets(buffer,250,fp)>0) {
         if (buffer[0]=='#') continue;
         len=strlen(buffer);
         buffer[len-1]='\0';
         numTab=0;
         for (i=0; i<len; i++) {
            if (buffer[i]=='\0') numTab++;
         }
         if (numTab>0) {
            tok=strtok(buffer,"\t");
            if (strlen(tok)>1) continue;
            letter[0]=tok[0];
            tok=strtok(0,"\t");
            if      (letter[0]=='A' || letter[0]=='a') { 
               if (bfreq[0]==-1)  bfreq[0]=atof(tok); 
            }
            else if (letter[0]=='C' || letter[0]=='c') { 
               if (bfreq[1]==-1)  bfreq[1]=atof(tok); 
            }
            else if (letter[0]=='G' || letter[0]=='g') {
               if (bfreq[2]==-1)  bfreq[2]=atof(tok); 
            }
            else if (letter[0]=='T' || letter[0]=='t') {
               if (bfreq[3]==-1)  bfreq[3]=atof(tok); 
            }
            else  { fprintf(output,"Error reading %s: non-[A,C,G,T]\n",filename); exit(0); } 
         }
         else {
            tok=strtok(buffer," ");
            letter[0]=tok[0];
            if (strlen(tok)>1) continue;
            tok=strtok(0," ");
            if      (letter[0]=='A' || letter[0]=='a') { 
               if (bfreq[0]==-1)  bfreq[0]=atof(tok); 
            }
            else if (letter[0]=='C' || letter[0]=='c') { 
               if (bfreq[1]==-1)  bfreq[1]=atof(tok); 
            }
            else if (letter[0]=='G' || letter[0]=='g') {
               if (bfreq[2]==-1)  bfreq[2]=atof(tok); 
            }
            else if (letter[0]=='T' || letter[0]=='t') {
               if (bfreq[3]==-1)  bfreq[3]=atof(tok); 
            }
            else  { fprintf(output,"Error reading %s: non-[A,C,G,T]\n",filename); exit(0); } 
         }
      }
   }
   fclose(fp);

   for (i=0; i<4; i++) {
      if (bfreq[i]==-1) {
         switch (i) {
            case 0: fprintf(output,"freq. for 'a' not found in %s\n",filename); break;
            case 1: fprintf(output,"freq. for 'c' not found in %s\n",filename); break;
            case 2: fprintf(output,"freq. for 'g' not found in %s\n",filename); break;
            case 3: fprintf(output,"freq. for 't' not found in %s\n",filename); break;
            default: break;
         }
         exit(0);
      }
   }
   sum=0; for (i=0; i<4; i++) sum +=bfreq[i];
   if (fabs(sum-1.0)>0.001) {
      fprintf(output,"Warning: frequenices do not add to 1.0\n");
      fprintf(output,"Please check %s\n",filename);
      exit(0);
   }
   if (buffer) { free(buffer); buffer=NULL; }
}
Ejemplo n.º 9
0
struct response *evresp_itp(char *stalst, char *chalst, char *net_code,
                            char *locidlst, char *date_time, char *units,
                            char *file, double *freqs, int nfreqs,
                            char *rtype, char *verbose, int start_stage,
                            int stop_stage, int stdio_flag,
                            int listinterp_out_flag, int listinterp_in_flag,
                            double listinterp_tension, int useTotalSensitivityFlag)
{
  struct channel this_channel;
  struct scn *scn;
  struct string_array *sta_list, *chan_list;
  struct string_array *locid_list;
  int i, j, k, count = 0, which_matched, test = 1, mode, new_file;
  int  locid_pos;
  int  err_type;
  char  out_name[MAXLINELEN], locid[LOCIDLEN+1];
  char *locid_ptr, *end_locid_ptr;
  struct matched_files *flst_head = (struct matched_files *)NULL;
  struct matched_files *flst_ptr = NULL, *output_files = NULL;
  struct file_list *lst_ptr = NULL, *tmp_ptr = NULL, *out_file = NULL, *tmp_file = NULL;
  struct response *resp = NULL, *next_ptr = NULL;
  struct response *prev_ptr = (struct response *)NULL;
  struct response *first_resp = (struct response *)NULL;
  struct complex *output = NULL;
  struct scn_list *scns = NULL;
  FILE *fptr = NULL;
  double *freqs_orig = NULL;  /* for saving the original frequencies */
  int nfreqs_orig;

 /* Let's save the original frequencies requested by a user since they can be overwritten */
 /* if we process blockette 55 IGD for version 3.2.17 of evalresp*/

   nfreqs_orig = nfreqs;
   freqs_orig = (double *) malloc(sizeof(double) * nfreqs_orig);
   memcpy (freqs_orig, freqs, sizeof(double) * nfreqs_orig);

  /* set 'GblChanPtr' to point to 'this_channel' */

  GblChanPtr = &this_channel;

  /* clear out the FirstLine buffer */

  memset(FirstLine, 0, sizeof(FirstLine));


  /* if the verbose flag is set, then print some diagnostic output (other than
     errors) */

  if(verbose && !strcmp(verbose,"-v")) {
    fprintf(stderr, "<< EVALRESP RESPONSE OUTPUT V%s >>\n", REVNUM);
    fflush(stderr);
  }

  /* first, determine the values of Pi and twoPi for use in evaluating
     the instrument responses later */

  Pi = acos(-1.0);
  twoPi = 2.0 * Pi;

  /* set the values of first_units and last_units to null strings */

  strncpy(this_channel.staname,"",STALEN);
  strncpy(this_channel.network,"",NETLEN);
  strncpy(this_channel.locid,"",LOCIDLEN);
  strncpy(this_channel.chaname,"",CHALEN);
  strncpy(this_channel.beg_t,"",DATIMLEN);
  strncpy(this_channel.end_t,"",DATIMLEN);
  strncpy(this_channel.first_units,"",MAXLINELEN);
  strncpy(this_channel.last_units,"",MAXLINELEN);

  /* and initialize the linked list of pointers to filters */

  this_channel.first_stage = (struct stage *)NULL;

  /* parse the "stalst" string to form a list of stations */

  for(i = 0; i < (int)strlen(stalst); i++) {
    if(stalst[i] == ',')
      stalst[i] = ' ';
  }
  sta_list = ev_parse_line(stalst);

  /* remove any blank spaces from the beginning and end of the string */

  locid_ptr = locidlst; locid_pos = 0;
  strncpy(locid, "", LOCIDLEN);
  while(*locid_ptr && *locid_ptr == ' ') locid_ptr++;
  end_locid_ptr = locid_ptr + strlen(locid_ptr) - 1;
  while(end_locid_ptr > locid_ptr && *end_locid_ptr == ' ') end_locid_ptr--;
  strncpy(locid, locid_ptr, (end_locid_ptr - locid_ptr + 1));

  /* parse the "locidlst" string to form a list of channels  */

  locid_list = parse_delim_line(locid,",");

  /* parse the "chalst" string to form a list of channels */

  for(i = 0; i < (int)strlen(chalst); i++) {
    if(chalst[i] == ',')
      chalst[i] = ' ';
  }
  chan_list = ev_parse_line(chalst);

  /* then form a set of network-station-locid-channel tuples to search for */

  scns = alloc_scn_list(chan_list->nstrings*sta_list->nstrings*locid_list->nstrings);
  for(i = 0; i < sta_list->nstrings; i++) {
    for(j = 0; j < locid_list->nstrings; j++) {
      for(k = 0; k < chan_list->nstrings; k++, count++) {
	scn = scns->scn_vec[count];
	strncpy(scn->station, sta_list->strings[i], STALEN);
	if(strlen(locid_list->strings[j]) == strspn(locid_list->strings[j], " "))
	  memset(scn->locid, 0, LOCIDLEN);
	else
	  strncpy(scn->locid, locid_list->strings[j], LOCIDLEN);
	strncpy(scn->channel, chan_list->strings[k], CHALEN);
	strncpy(scn->network, net_code, NETLEN);
      }
    }
  }
#ifdef LOG_LABEL
  sprintf(myLabel, "[%s.%s.%s.%s]", scn->network, scn->station, scn->locid, scn->channel);
#else
  myLabel[0] = '\0';
#endif
  /* if input is from stdin, set fptr to stdin, else find whatever matching
     files there are */

  if(stdio_flag) {
    fptr = stdin;
    mode = 0;
  }
  else {
    flst_head = find_files(file, scns, &mode);
    flst_ptr = flst_head;
  }

  /* find the responses for each of the station channel pairs as they
     occur in the file */

  if(!mode && !stdio_flag) {
    curr_file = file;
    if((fptr = fopen(file,"r")) == (FILE *)NULL)  {
#ifdef LIB_MODE
      fprintf(stderr, "%s failed to open file %s\n", myLabel, file);
      return NULL;
#else
      error_exit(OPEN_FILE_ERROR,"failed to open file %s", file);
#endif
    }
  }

  /* allocate space for the first response */

  resp = alloc_response(nfreqs);

  for(i = 0; i < scns->nscn && (mode || test); i++) {

    /* allocate space for 'matched_files' pointer used to determine if a
       file has already been read */

    if(!stdio_flag)
      output_files = alloc_matched_files();

    /* then check the mode to determine if are parsing one file or a list
       of files (note: if input is from stdin, is one file) */

    if(!mode) {
      which_matched = 0;
      while(test && which_matched >= 0) {
        if(!(err_type = setjmp(jump_buffer))) {
          new_file = 0;
          which_matched = find_resp(fptr, scns, date_time, &this_channel);
#ifdef LIB_MODE /* IGD 25-Sep-2007 Looks like we do not need this: function returns anyway */
//        if(which_matched < 0) {
//        if(!stdio_flag)            /* if not input from console then */
//          fclose(fptr);            /* close input file
//        return NULL;
//       }
#endif

          /* found a station-channel-network that matched.  First construct
             an output filename and compare to other output files. If this
             filename doesn't match any of them, (or if it is the first
             file found) parse the channel's response information.
             Otherwise skip it (since a match has already been found) */

          sprintf(out_name,"%s.%s.%s.%s",this_channel.network,
                  this_channel.staname,this_channel.locid,
                  this_channel.chaname);
#ifdef LOG_LABEL
          sprintf(myLabel, "[%s]", out_name);
#else
          myLabel[0] = '\0';
#endif
	  if(!stdio_flag) {
	    tmp_file = output_files->first_list;
	    for(k = 0; k < output_files->nfiles; k++) {
	      out_file = tmp_file;
	      if(!strcmp(out_file->name,out_name))
		break;
	      tmp_file = out_file->next_file;
	    }
	  }
          if((stdio_flag && !new_file) || !output_files->nfiles) {
	    if(!stdio_flag) {
	      output_files->nfiles++;
	      out_file = alloc_file_list();
	      output_files->first_list = out_file;
	      out_file->name = alloc_char(strlen(out_name)+1);
	      strcpy(out_file->name,out_name);
	    }
            new_file = 1;
          }
          else if((stdio_flag && !new_file) || k == output_files->nfiles) {
	    if(!stdio_flag) {
	      output_files->nfiles++;
	      out_file->next_file = alloc_file_list();
	      tmp_file = out_file->next_file;
	      out_file = tmp_file;
	      out_file->name = alloc_char(strlen(out_name)+1);
	      strcpy(out_file->name,out_name);
	    }
            new_file = 1;
          }
          else
            new_file = 0;

          if(new_file && which_matched >= 0) {

            /* fill in station-channel-net information for the response */

            strncpy(resp->station,this_channel.staname,STALEN);
            strncpy(resp->locid,this_channel.locid,LOCIDLEN);
            strncpy(resp->channel,this_channel.chaname,CHALEN);
            strncpy(resp->network,this_channel.network,NETLEN);
            output = resp->rvec;

            /* found a station channel pair that matched a response, so parse
               the response into a channel/filter list */

            test = parse_channel(fptr, &this_channel);

            if(listinterp_in_flag &&
                         this_channel.first_stage->first_blkt->type == LIST)
            {      /* flag set for interpolation and stage type is "List" */
              interpolate_list_blockette(
                &(this_channel.first_stage->first_blkt->blkt_info.list.freq),
                &(this_channel.first_stage->first_blkt->blkt_info.list.amp),
                &(this_channel.first_stage->first_blkt->blkt_info.list.phase),
                &(this_channel.first_stage->first_blkt->blkt_info.list.nresp),
                freqs,nfreqs,listinterp_tension);
            }

           /* check the filter sequence that was just read */
            check_channel(&this_channel);

		/* If we process blockette 55, we should recompute resp->rvec */
		/* because the number of output responses is generally different from */
		/* what is the user requested */
		/*if we don't use blockette 55, we should set the frequencies to the original */
		/* user defined position if we did mess up with frequencies in -possible - blockette 55*/
		/* containing previous file. Modifications by I.Dricker IGD*/


	    free(resp->rvec);
/* 'freqs' array is passed in and should not be freed -- 10/18/2005 -- [ET] */
/*	    free(freqs); */

	    if (this_channel.first_stage->first_blkt != NULL && this_channel.first_stage->first_blkt->type == LIST)
	    {
	      /*to prevent segmentation in case of bogus input files */
	      nfreqs = this_channel.first_stage->first_blkt->blkt_info.list.nresp;
	      freqs = (double *) malloc(sizeof(double) * nfreqs); /* malloc a new vector */
    	      memcpy (freqs, this_channel.first_stage->first_blkt->blkt_info.list.freq, sizeof(double) * nfreqs); /*cp*/
			resp->rvec = alloc_complex(nfreqs);
	      output=resp->rvec;
	      resp->nfreqs = nfreqs;
	      resp->freqs = (double *) malloc(sizeof(double) * nfreqs); /* malloc a new vector */
	      memcpy (resp->freqs, this_channel.first_stage->first_blkt->blkt_info.list.freq, sizeof(double) * nfreqs); /*cp*/
	   }
	  else
	  {
	    nfreqs = nfreqs_orig;
	    freqs = (double *) malloc(sizeof(double) * nfreqs); /* malloc a new vector */
    	    memcpy (freqs, freqs_orig, sizeof(double) * nfreqs); /*cp*/
	    resp->rvec = alloc_complex(nfreqs);
	    output=resp->rvec;
	    resp->nfreqs = nfreqs;
	    resp->freqs = (double *) malloc(sizeof(double) * nfreqs); /* malloc a new vector */
    	    memcpy (resp->freqs, freqs_orig, sizeof(double) * nfreqs); /*cp*/
	  }


            /* normalize the response of the filter sequence */

            norm_resp(&this_channel, start_stage, stop_stage);

            /* calculate the response at the requested frequencies */

            calc_resp(&this_channel, freqs, nfreqs, output, units,
                      start_stage, stop_stage, useTotalSensitivityFlag);

            /* diagnostic output, if the user requested it */

            if(verbose && !strcmp(verbose,"-v")) {
              print_chan(&this_channel, start_stage, stop_stage,
                       stdio_flag, listinterp_out_flag, listinterp_in_flag, useTotalSensitivityFlag);
            }

            free(freqs);          /* free array that was allocated above */

            /* and, finally, free the memory associated with this channel/filter
               list and continue searching for the next match */

            free_channel(&this_channel);
            if(first_resp == (struct response *)NULL) {
              first_resp = resp;
            }
            next_ptr = alloc_response(nfreqs);
            resp->next = next_ptr;
            prev_ptr = resp;
            resp = next_ptr;
          }
          else {
            strncpy(FirstLine,"",MAXLINELEN);
            test = next_resp(fptr);
          }
        }
        else {
          if(new_file)
            output_files->nfiles--;
          free_channel(&this_channel);
          /* catch errors that cause parsing to fail midstream */
          if(err_type == PARSE_ERROR || err_type == UNRECOG_FILTYPE ||
             err_type == UNDEF_SEPSTR || err_type == IMPROP_DATA_TYPE ||
             err_type == RE_COMP_FAILED || err_type == UNRECOG_UNITS) {
            strncpy(FirstLine,"",MAXLINELEN);
            test = next_resp(fptr);
          }
          else if(err_type == UNDEF_PREFIX) {
            test = 0;
          }
        }
      }
      if(!stdio_flag)
	free_matched_files(output_files);     /* added 3/28/2006 -- [ET] */

      /* allocated one too many responses */

      free_response(resp);
      if(prev_ptr != (struct response *)NULL)
        prev_ptr->next = (struct response *)NULL;
      break;

    }
    else if(mode) {
      lst_ptr = flst_ptr->first_list;
      scn = scns->scn_vec[i];
    next_scn:
      for(j = 0; j < flst_ptr->nfiles; j++) {
        if(!stdio_flag) {
          fptr = fopen(lst_ptr->name,"r");
        }
        if(fptr != (FILE *)NULL) {
          curr_file = lst_ptr->name;
    look_again:
          if(!(err_type = setjmp(jump_buffer))) {
            new_file = 0;
            which_matched = get_resp(fptr, scn, date_time, &this_channel);
#ifdef LIB_MODE /* IGD 25-Sep-2007 Looks like we do not need this: function returns anyway */
//           if(which_matched < 1) {
//             if(!stdio_flag)           /* if not input from console then */
//               fclose(fptr);           /* close input file */
//             return NULL;
//          }
#endif
            if(which_matched >= 0) {

              /* found a station-channel-network that matched.  First construct
                 an output filename and compare to other output files. If this
                 filename doesn't match any of them, (or if it is the first
                 file found) parse the channel's response information.
                 Otherwise skip it (since a match has already been found) */

              sprintf(out_name,"%s.%s.%s.%s",this_channel.network,
                      this_channel.staname,this_channel.locid,
                      this_channel.chaname);
#ifdef LOG_LABEL
              sprintf (myLabel, "[%s]", out_name);
#else
              myLabel[0] = '\0';
#endif
              tmp_file = output_files->first_list;
              for(k = 0; k < output_files->nfiles; k++) {
                out_file = tmp_file;
                if(!strcmp(out_file->name,out_name))
                  break;
                tmp_file = out_file->next_file;
              }
              if(!output_files->nfiles) {
                output_files->nfiles++;
                out_file = alloc_file_list();
                output_files->first_list = out_file;
                out_file->name = alloc_char(strlen(out_name)+1);
                strcpy(out_file->name,out_name);
                new_file = 1;
              }
              else if(k == output_files->nfiles) {
                output_files->nfiles++;
                out_file->next_file = alloc_file_list();
                tmp_file = out_file->next_file;
                out_file = tmp_file;
                out_file->name = alloc_char(strlen(out_name)+1);
                strcpy(out_file->name,out_name);
                new_file = 1;
              }
              else
                new_file = 0;

              if(new_file) {

                /* fill in station-channel-net information for the response */

                strncpy(resp->station,this_channel.staname,STALEN);
                strncpy(resp->locid,this_channel.locid,LOCIDLEN);
                strncpy(resp->channel,this_channel.chaname,CHALEN);
                strncpy(resp->network,this_channel.network,NETLEN);
                output = resp->rvec;

                /* parse the response into a channel/filter list */

                test = parse_channel(fptr, &this_channel);

                /* IGD 01/04/01 Add code preventing a user from defining output units as DIS and ACC if
                the input units are PRESSURE after */
                if (strncmp (this_channel.first_units, "PA -",4) == 0)  {
                  if (strcmp(units, "VEL") != 0)	{
                    if(strcmp(units, "DEF") != 0)  {
                      fprintf(stderr, "%s WARNING: OUTPUT %s does not make sense if INPUT is PRESSURE\n",
                                      myLabel, units);
                      strcpy (units, "VEL");
                      fprintf(stderr, "%s      OUTPUT units are reset and interpreted as PRESSURE\n", myLabel);
                    }
                  }
                }
                /* IGD 08/21/06 Add code preventing a user from defining output units as DIS and ACC if
                the input units are TESLA */
                if (strncmp (this_channel.first_units, "T -", 3) == 0)  {
                  if (strcmp(units, "VEL") != 0)  {
                    if(strcmp(units, "DEF") != 0)  {
                      fprintf(stderr, "%s WARNING: OUTPUT %s does not make sense if INPUT is MAGNETIC FLUX\n",
                                                        myLabel, units);
                      strcpy (units, "VEL");
                      fprintf(stderr, "%s      OUTPUT units are reset and interpreted as TESLA\n", myLabel);
                   }
                  }
                }

                if(listinterp_in_flag &&
                         this_channel.first_stage->first_blkt->type == LIST)  {
                        /* flag set for interpolation and stage type is "List" */
                  interpolate_list_blockette(
                   &(this_channel.first_stage->first_blkt->blkt_info.list.freq),
                   &(this_channel.first_stage->first_blkt->blkt_info.list.amp),
                   &(this_channel.first_stage->first_blkt->blkt_info.list.phase),
                   &(this_channel.first_stage->first_blkt->blkt_info.list.nresp),
                   freqs,nfreqs,listinterp_tension);
                }

                /* check the filter sequence that was just read */
                check_channel(&this_channel);

		/* If we process blockette 55, we should recompute resp->rvec */
		/* because the number of output responses is generally different from */
		/* what is the user requested */
		/*if we don't use blockette 55, we should set the frequencies to the original */
		/* user defined position if we did mess up with frequencies in -possible - blockette 55*/
		/* containing previous file. Modifications by I.Dricker / IGD */

		free(resp->rvec);
/* 'freqs' array is passed in and should not be freed -- 10/18/2005 -- [ET] */
/*		free(freqs); */
		if (this_channel.first_stage->first_blkt != NULL && this_channel.first_stage->first_blkt->type == LIST) {
			/* This is to prevent segmentation if the response input is bogus responses */
			nfreqs = this_channel.first_stage->first_blkt->blkt_info.list.nresp;
			freqs = (double *) malloc(sizeof(double) * nfreqs); /* malloc a new vector */
    			memcpy (freqs, this_channel.first_stage->first_blkt->blkt_info.list.freq, sizeof(double) * nfreqs); /*cp*/
			resp->rvec = alloc_complex(nfreqs);
			output=resp->rvec;
			resp->nfreqs = nfreqs;
			resp->freqs = (double *) malloc(sizeof(double) * nfreqs); /* malloc a new vector */
    			memcpy (resp->freqs, this_channel.first_stage->first_blkt->blkt_info.list.freq, sizeof(double) * nfreqs); /*cp*/
		}
		else	{
			nfreqs = nfreqs_orig;
			freqs = (double *) malloc(sizeof(double) * nfreqs); /* malloc a new vector */
    			memcpy (freqs, freqs_orig, sizeof(double) * nfreqs); /*cp*/
			resp->rvec = alloc_complex(nfreqs);
			output=resp->rvec;
			resp->nfreqs = nfreqs;
			resp->freqs = (double *) malloc(sizeof(double) * nfreqs); /* malloc a new vector */
    			memcpy (resp->freqs, freqs_orig, sizeof(double) * nfreqs); /*cp*/
		}

                /* normalize the response of the filter sequence */

                norm_resp(&this_channel, start_stage, stop_stage);

                /* calculate the response at the requested frequencies */

                calc_resp(&this_channel, freqs, nfreqs, output, units,
                          start_stage, stop_stage, useTotalSensitivityFlag);

                /* diagnostic output, if the user requested it */

                if(verbose && !strcmp(verbose,"-v")) {
                  print_chan(&this_channel, start_stage, stop_stage,
                       stdio_flag, listinterp_out_flag, listinterp_in_flag, useTotalSensitivityFlag);
                }

                free(freqs);      /* free array that was allocated above */

                /* and, finally, free the memory associated with this
                   channel/filter list and continue searching for the
                   next match */

                free_channel(&this_channel);
                if(first_resp == (struct response *)NULL) {
                  first_resp = resp;
                }
                next_ptr = alloc_response(nfreqs);
                resp->next = next_ptr;
                prev_ptr = resp;
                resp = next_ptr;
              }
              FirstField = 0;
              strncpy(FirstLine,"",MAXLINELEN);
              if(!stdio_flag) {
                fclose(fptr);
              }
            }
            else {
              strncpy(FirstLine,"",MAXLINELEN);
              test = next_resp(fptr);
	      if (!test) {
		if(!stdio_flag) {
		  fclose(fptr);
		}
	      }
            }

            /* if not the last file in the list, move on to the next one */

            if(lst_ptr->next_file != (struct file_list *)NULL) {
              tmp_ptr = lst_ptr->next_file;
              lst_ptr = tmp_ptr;
            }
          }
          else {
            if (new_file)
              output_files->nfiles--;
            /* catch errors that cause parsing to fail midstream */
            if(err_type == PARSE_ERROR || err_type == UNRECOG_FILTYPE ||
               err_type == UNDEF_SEPSTR || err_type == IMPROP_DATA_TYPE ||
               err_type == RE_COMP_FAILED || err_type == UNRECOG_UNITS) {
              strncpy(FirstLine,"",MAXLINELEN);
              test = next_resp(fptr);
            }
            else if(err_type == UNDEF_PREFIX) {
              test = 0;
            }
            free_channel(&this_channel);
            if (!test) {
              FirstField = 0;
              strncpy(FirstLine,"",MAXLINELEN);
              if(!stdio_flag) {
                fclose(fptr);
              }
            }
            else
              goto look_again;

            /* if not the last file in the list, move on to the next one */

            if(lst_ptr->next_file != (struct file_list *)NULL) {
               tmp_ptr = lst_ptr->next_file;
               lst_ptr = tmp_ptr;
            }

          }
        }
      }
      /* if not the last station-channel-network in the list,
         move on to the next one */
      if(i < (scns->nscn-1)) {
        flst_ptr = flst_ptr->ptr_next;
        lst_ptr = flst_ptr->first_list;
        i++;
        scn = scns->scn_vec[i];
        goto next_scn;
      }
      if(!stdio_flag)
	free_matched_files(output_files);

      /* allocated one too many responses */

      free_response(resp);
      if(prev_ptr != (struct response *)NULL)
        prev_ptr->next = (struct response *)NULL;

    } /* end else if mode */

  }  /* end for loop */

         /* added file close if single input file -- 2/13/2006 -- [ET]: */
  if(!mode && !stdio_flag)        /* if single file was opened then */
    fclose(fptr);                 /* close input file */

  /* and print a list of WARNINGS about the station-channel pairs that were not
     found in the input RESP files */

  for(i = 0; i < scns->nscn; i++) {
    scn = scns->scn_vec[i];
    if(!scn->found) {
      fprintf(stderr,"%s WARNING: no response found for NET=%s,STA=%s,LOCID=%s,CHAN=%s,DATE=%s\n",
              myLabel, scn->network, scn->station, scn->locid, scn->channel, date_time);
      fflush(stderr);
    }
  }
  free_scn_list(scns);
  if(flst_head != (struct matched_files *)NULL)
    free_matched_files(flst_head);
  free_string_array(chan_list);
  free_string_array(locid_list);
  free_string_array(sta_list);

  free(freqs_orig);               /* added 3/28/2006 -- [ET] */

  return(first_resp);

}