예제 #1
0
double vector_similarity(void) {

  double v1[4],v2[4],sum,dist;
  double *d;
  int numRandom;
  register int i,j;
 
  numRandom=100000;  
  d=alloc_double(numRandom);

  for (i=0; i<numRandom; i++) {
     for (j=0; j<4; j++) {
        v1[j]=genrand();
        v2[j]=genrand();
     }
     sum=0; for (j=0; j<4; j++) sum+=v1[j];
     if (sum!=0) { for (j=0; j<4; j++) v1[j]/=sum; }
     else        { for (j=0; j<4; j++) v1[j]=0.25; }

     sum=0; for (j=0; j<4; j++) sum+=v2[j];
     if (sum!=0) { for (j=0; j<4; j++) v2[j]/=sum; }
     else        { for (j=0; j<4; j++) v2[j]=0.25; }

     d[i]=0; for (j=0; j<4; j++) { d[i] +=fabs(v1[j]-v2[j]); } 
  }

  // sort in increasing order
  sort_double2(d,numRandom);
  dist=d[(int)(numRandom*SIMILARITY_ALPHA)];

  if (d) { free(d); d=NULL; }
  return (dist); 
}
예제 #2
0
파일: top_kmers.c 프로젝트: qenvio/rGADEM
double *base_frequency(int numSeq,char **seq,int *seqLen) {

   register int i,j;
   int bcount[4];
   int sum;
   double *freq;

   freq=alloc_double(4);

   for (j=0; j<4; j++) bcount[j]=0;

   for (i=0; i<numSeq; i++) {
      for (j=0; j<seqLen[i]; j++) {
         switch (seq[i][j]) {
            case 'a': (bcount[0])++; break;
            case 'c': (bcount[1])++; break;
            case 'g': (bcount[2])++; break;
            case 't': (bcount[3])++; break;
            default: break;
         }
      }
   }

   sum=0; for (j=0; j<4; j++) sum +=bcount[j];
   for (j=0; j<4; j++) freq[j]=(double)bcount[j]/(double)sum;   

   for (j=0; j<4; j++) freq[j]=(freq[j]+PSEUDO_COUNT)/(1.0+PSEUDO_COUNT*4.0);   

   freq[0]=(freq[0]+freq[3])/2.0; freq[3]=freq[0];
   freq[1]=(freq[1]+freq[2])/2.0; freq[2]=freq[1];
  
   return (freq);
}
예제 #3
0
void roulett_wheel_rank(Fitness *fitness,int populationSize,Wheel *wheel) {

    register int i;
    int sum;
    double *weight;

    weight=alloc_double(populationSize);

    sum=0;
    for (i=1; i<populationSize+1; i++) sum += i;
    for (i=0; i<populationSize; i++) weight[i]=(double)(populationSize-i)/(double)sum;

    wheel[0].start=0.0;
    wheel[0].end  =(double)populationSize*weight[0];
    wheel[0].index=fitness[0].index;

    for (i=1; i<populationSize; i++) {
        wheel[i].start=wheel[i-1].end;
        wheel[i].end=(double)populationSize*weight[i]+wheel[i].start;
        wheel[i].index=fitness[i].index;
    }
    //for (i=0; i<populationSize; i++) {
    //   printf("%4d\t%8.3f\t%3d\t%8.6f\t%8.6f\n",
    //      fitness[i].index,fitness[i].value,wheel[i].index,wheel[i].start,wheel[i].end);
    //}
    if (weight) {
        free(weight);
        weight=NULL;
    }
}
예제 #4
0
파일: alloc.c 프로젝트: qenvio/rGADEM
Words *alloc_word(int numCategory,int maxSize) {

   int i;
   Words *tmp=NULL;
   
   tmp=(Words *)calloc(numCategory,sizeof(Words));
   if (!tmp) {
      error("calloc failed for Words.\n");
      /*Rprintf("calloc failed for Words.\n"); exit(0); */
   }

   for (i=0; i<numCategory; i++) {
      tmp[i].s1=alloc_char_char(maxSize,10); 
      tmp[i].prob_sta=alloc_double(maxSize); 
      tmp[i].prob_end=alloc_double(maxSize); 
   }
   return (tmp);
}
예제 #5
0
// this subroutine assigns probabilities that are proportional to fitness scores
// Note that this minimizes rather than maximizes
// The minimum is assigned the largest probability
void roulett_wheel_fitness(Fitness *fitness,int populationSize,Wheel *wheel) {

    register int i;
    double totalScore,worstScore,range;
    double *scaledScore;

    worstScore=fitness[populationSize-1].value;
    range=worstScore-fitness[0].value;

    // for (i=0; i<populationSize; i++) printf("%d\t%5.3f\n",fitness[i].index,fitness[i].value);

    if ((range<0.0001) ||
            fitness[1].value==DUMMY_FITNESS ||
            fabs(fitness[1].value-fitness[populationSize-1].value)<0.0001) {
        printf("GA converged ...\n");
        for (i=0; i<populationSize; i++) {
            wheel[i].index=fitness[i].index;
            wheel[i].start=i;
            wheel[i].end=i+1;
        }
    }
    else {
        scaledScore=alloc_double(populationSize);

        totalScore=0;
        for (i=0; i<populationSize; i++) {
            // range scale
            // make sure the minimum is assigned the largest area
            scaledScore[i]=1.0-(fitness[i].value-fitness[0].value)/range;
            totalScore += scaledScore[i];
        }
        for (i=0; i<populationSize; i++) scaledScore[i] /= totalScore;

        //for (i=0; i<populationSize; i++) printf("%6.5f\n",scaledScore[i]); exit(0);

        wheel[0].start=0;
        wheel[0].end  =(double)populationSize*scaledScore[0];
        wheel[0].index=fitness[0].index;
        //if (fitness[0].value<0) wheel[0].I_evalue='0';
        //else                    wheel[0].I_evalue='1';

        for (i=1; i<populationSize; i++) {
            wheel[i].start=wheel[i-1].end;
            wheel[i].end=(double)populationSize*scaledScore[i]+wheel[i].start;
            wheel[i].index=fitness[i].index;
            //if (fitness[i].value<0) wheel[i].I_evalue='0';
            //else                    wheel[i].I_evalue='1';
        }
        if (scaledScore) {
            free(scaledScore);
            scaledScore = NULL;
        }
    }
    //for (i=0; i<populationSize; i++)
    //  printf("%4d\t%5.3f\t%3d\t%c\t%5.3f\t%5.3f\n",
    //  fitness[i].index,fitness[i].value,wheel[i].index,wheel[i].I_evalue,wheel[i].start,wheel[i].end);
}
예제 #6
0
extern "C" void
invoke_copy_to_stack(uint64_t* stk, uint64_t *end,
                     uint32_t paramCount, nsXPTCVariant* s)
{
    uint64_t *ireg_args = stk;
    uint64_t *ireg_end  = ireg_args + 8;
    double *freg_args = (double *)ireg_end;
    double *freg_end  = freg_args + 8;
    uint64_t *stack_args = (uint64_t *)freg_end;

    // leave room for 'that' argument in x0
    ++ireg_args;

    for (uint32_t i = 0; i < paramCount; i++, s++) {
        if (s->IsPtrData()) {
            alloc_word(ireg_args, stack_args, ireg_end, (uint64_t)s->ptr);
            continue;
        }
        // According to the ABI, integral types that are smaller than 8 bytes
        // are to be passed in 8-byte registers or 8-byte stack slots.
        switch (s->type) {
            case nsXPTType::T_FLOAT:
                alloc_float(freg_args, stack_args, freg_end, s->val.f);
                break;
            case nsXPTType::T_DOUBLE:
                alloc_double(freg_args, stack_args, freg_end, s->val.d);
                break;
            case nsXPTType::T_I8:  alloc_word(ireg_args, stk, end, s->val.i8);   break;
            case nsXPTType::T_I16: alloc_word(ireg_args, stk, end, s->val.i16);  break;
            case nsXPTType::T_I32: alloc_word(ireg_args, stk, end, s->val.i32);  break;
            case nsXPTType::T_I64: alloc_word(ireg_args, stk, end, s->val.i64);  break;
            case nsXPTType::T_U8:  alloc_word(ireg_args, stk, end, s->val.u8);   break;
            case nsXPTType::T_U16: alloc_word(ireg_args, stk, end, s->val.u16);  break;
            case nsXPTType::T_U32: alloc_word(ireg_args, stk, end, s->val.u32);  break;
            case nsXPTType::T_U64: alloc_word(ireg_args, stk, end, s->val.u64);  break;
            case nsXPTType::T_BOOL: alloc_word(ireg_args, stk, end, s->val.b);   break;
            case nsXPTType::T_CHAR: alloc_word(ireg_args, stk, end, s->val.c);   break;
            case nsXPTType::T_WCHAR: alloc_word(ireg_args, stk, end, s->val.wc); break;
            default:
                // all the others are plain pointer types
                alloc_word(ireg_args, stack_args, ireg_end,
                           reinterpret_cast<uint64_t>(s->val.p));
                break;
        }
    }
}
예제 #7
0
void mutation(Chrs **dyad,int numWordGroup,Words *word,int minSpaceWidth,int maxSpaceWidth,
   Wheel *wheel,int populationSize,Fitness *fitness,char *uniqMotif,double *maxpFactor,double maxpMutationRate) {

   register int i,j;
   int popuCn;             // index of the number of dyads in population
   int whichToMutate;      // the spaced dyad for mutation
   int kmerGroup;          // pick a k-mer group (tetramer,pentamer,hexamer) in which a new word will be selected
   int whichPartDyad;      // pick a component of a spaced dyad for mutation
   int spaceWidth;
   double *tmpmaxpFactor;
   double rand,newFactor;
   Chrs **tmpDyad;

   tmpDyad=alloc_chrs(populationSize,4);
   tmpmaxpFactor=alloc_double(populationSize);

   popuCn=0; 
   for (i=0; i<populationSize; i++) {
      if (uniqMotif[i]=='1') {
         for (j=0; j<3; j++) {
            tmpDyad[popuCn][j].wordID   =dyad[fitness[i].index][j].wordID; 
            tmpDyad[popuCn][j].wordGroup=dyad[fitness[i].index][j].wordGroup; 
         }
         tmpmaxpFactor[popuCn]=maxpFactor[fitness[i].index];
         popuCn++;
      }
   }
   // printf("popuCn=%d\n",popuCn);

   while (popuCn<populationSize) {

      // sample with replacement with probability proportional to fitness score
      rand=(double)populationSize*runif(0,1);
      whichToMutate=0;
      for (j=0; j<populationSize; j++) {
         if (rand>=wheel[j].start && rand<=wheel[j].end) { 
            whichToMutate=wheel[j].index; break; 
         }
      }
      tmpmaxpFactor[popuCn]=maxpFactor[whichToMutate];
      for (j=0; j<3; j++) { 
         tmpDyad[popuCn][j].wordID   =dyad[whichToMutate][j].wordID;
         tmpDyad[popuCn][j].wordGroup=dyad[whichToMutate][j].wordGroup; 
      }
    
      if (runif(0,1)<maxpMutationRate) {
         // find a different maxp factor, keep dyad unchanged
         do {
            newFactor=MAXP_BASE+MAXP_FACTOR*((int)(MAXP_SCALE*runif(0,1)));
         } while (fabs(newFactor-tmpmaxpFactor[popuCn])<0.001);
         tmpmaxpFactor[popuCn]=newFactor;
      }
      else {
         // change one component of the dyad, first find out how many components there are
         if (maxSpaceWidth==0) { 
            whichPartDyad=(int)(2*runif(0,1));      // change w1
            if (whichPartDyad==1) whichPartDyad=2; // skip spacer and change w2
         }
         else {
            whichPartDyad=(int)(3*runif(0,1));
            if (whichPartDyad==3) whichPartDyad--;
         }
         
         // if w1 of the dyad (w1-s-w2) is chosen
         if (whichPartDyad==0) {
            // decide which kmer kmerGroup (4-mer, 5-mer or 6-mer) from which w1 whill be replaced
            kmerGroup=(int)(numWordGroup*runif(0,1));
            if (kmerGroup==numWordGroup) kmerGroup--;
   
            // select a k-mer from the k-group with prob proportional to z-score
            rand=runif(0,1);
            tmpDyad[popuCn][0].wordID=0;
            for (j=0; j<word[kmerGroup].count; j++) {
               if (rand>=word[kmerGroup].prob_sta[j] && rand<=word[kmerGroup].prob_end[j]) { 
                  tmpDyad[popuCn][0].wordID=j; break; 
               }
            }
            tmpDyad[popuCn][0].wordGroup=kmerGroup;        // mark the kmer kmerGroup from which w1 is selected
         }
   
         // if the spacer is chosen
         else if (whichPartDyad==1) {
            // choose a gap that is different from the current one
            do {
               spaceWidth=minSpaceWidth+(int)((maxSpaceWidth-minSpaceWidth+1)*runif(0,1));
            } while (spaceWidth==tmpDyad[popuCn][1].wordID); 
            tmpDyad[popuCn][1].wordID=spaceWidth;
            tmpDyad[popuCn][1].wordGroup=-1;                 // dummy - not used
         }
   
         // if w2 of the dyad (w1-space-w2) is chosen 
         else {
            kmerGroup=(int)(numWordGroup*runif(0,1));
            if (kmerGroup==numWordGroup) kmerGroup--;
   
            rand=runif(0,1);
            tmpDyad[popuCn][2].wordID=0;
            for (j=0; j<word[kmerGroup].count; j++) {
               if (rand>=word[kmerGroup].prob_sta[j] && rand<word[kmerGroup].prob_end[j]) { 
                  tmpDyad[popuCn][2].wordID=j; break; 
               }
            }
            tmpDyad[popuCn][2].wordGroup=kmerGroup;     // mark the kmer kmerGroup from which w2 is selected 
         }
      }
      popuCn++;
   }

   // update the population
   for (i=0; i<populationSize; i++) {
      for (j=0; j<3; j++) { 
         dyad[i][j].wordID   =tmpDyad[i][j].wordID; 
         dyad[i][j].wordGroup=tmpDyad[i][j].wordGroup; 
      }
      maxpFactor[i]=tmpmaxpFactor[i]; 
   }

   if (tmpDyad[0])    { free(tmpDyad[0]);    tmpDyad[0]=NULL;    }
   if (tmpDyad)       { free(tmpDyad);       tmpDyad=NULL;       }
   if (tmpmaxpFactor) { free(tmpmaxpFactor); tmpmaxpFactor=NULL; }
}
예제 #8
0
void crossover(Chrs **dyad,int numWordGroup,Words *word,int minSpaceWidth,int maxSpaceWidth,
   Wheel *wheel,int populationSize,Fitness *fitness,char *motifUniq,double *maxpFactor,double maxpMutationRate) {

   register int i,j;
   int found,id1,id2,popuCn;
   double *tmpmaxpFactor;
   Chrs **tmpDyad;
   double rand,du,newFactor;

   tmpDyad=alloc_chrs(populationSize,4);
   tmpmaxpFactor=alloc_double(populationSize);

   // save a copy of all unique top-ranked spaced dyads
   popuCn=0;
   for (i=0; i<populationSize; i++) {
      if (motifUniq[i]=='1') {
         for (j=0; j<3; j++) {
            tmpDyad[popuCn][j].wordID   =dyad[fitness[i].index][j].wordID;
            tmpDyad[popuCn][j].wordGroup=dyad[fitness[i].index][j].wordGroup;
         }
         tmpmaxpFactor[popuCn]=maxpFactor[fitness[i].index];
         popuCn++;
      }
   }

   // crossover
   while (popuCn<populationSize) {
      do {
         // select the first dyad 
         du=(double)populationSize*genrand();
         found=0; id1=0; id2=0;
         for (j=0; j<populationSize; j++) {
            if (du>=wheel[j].start && du<=wheel[j].end) { id1=wheel[j].index; found=1; break; }
         }
         if (!found) {
            id1=(int)(populationSize*genrand());
            if (id1==populationSize) id1--;
         }
         // select the second dyad
         du=(double)populationSize*genrand();
         found=0;
         for (j=0; j<populationSize; j++) {
            if (du>=wheel[j].start && du<=wheel[j].end) { id2=wheel[j].index; found=1; break; }
         }
         if (!found) {
            id2=(int)(populationSize*genrand());
            if (id2==populationSize) id2--;
         }
         // printf("id1 id2: %d %d\n",id1,id2);
      } while (id1==id2); // do not pick the same "chromosome"

      if (genrand()<maxpMutationRate) {
         // if change maxp factor other than crossing over the dyads
         do {
            newFactor=MAXP_BASE+MAXP_FACTOR*((int)(MAXP_SCALE*genrand()));
         } while (fabs(newFactor-maxpFactor[id1])<0.001);
         tmpmaxpFactor[popuCn]=newFactor;

         tmpDyad[popuCn][0].wordID   =dyad[id1][0].wordID;
         tmpDyad[popuCn][1].wordID   =dyad[id1][1].wordID; 
         tmpDyad[popuCn][2].wordID   =dyad[id1][2].wordID;
         tmpDyad[popuCn][0].wordGroup=dyad[id1][0].wordGroup;
         tmpDyad[popuCn][1].wordGroup=dyad[id1][1].wordGroup; 
         tmpDyad[popuCn][2].wordGroup=dyad[id1][2].wordGroup;
         popuCn++;
         if (popuCn==populationSize) break;

         do {
            newFactor=MAXP_BASE+MAXP_FACTOR*((int)(MAXP_SCALE*genrand()));
         } while (fabs(newFactor-maxpFactor[id2])<0.001);
         tmpmaxpFactor[popuCn]=newFactor;

         tmpDyad[popuCn][0].wordID   =dyad[id2][0].wordID;
         tmpDyad[popuCn][1].wordID   =dyad[id2][1].wordID; 
         tmpDyad[popuCn][2].wordID   =dyad[id2][2].wordID;
         tmpDyad[popuCn][0].wordGroup=dyad[id2][0].wordGroup;
         tmpDyad[popuCn][1].wordGroup=dyad[id2][1].wordGroup; 
         tmpDyad[popuCn][2].wordGroup=dyad[id2][2].wordGroup;
         popuCn++;
         if (popuCn==populationSize) break;
      }

      else {
         rand=genrand();
         if (rand>=0 && rand<1/3) {
            // replace w1 of dyad1 by w1 of dyad2
            tmpDyad[popuCn][0].wordID   =dyad[id2][0].wordID;       // replacement
            tmpDyad[popuCn][1].wordID   =dyad[id1][1].wordID; 
            tmpDyad[popuCn][2].wordID   =dyad[id1][2].wordID;
            tmpDyad[popuCn][0].wordGroup=dyad[id2][0].wordGroup;    // replacement
            tmpDyad[popuCn][1].wordGroup=dyad[id1][1].wordGroup; 
            tmpDyad[popuCn][2].wordGroup=dyad[id1][2].wordGroup;
            tmpmaxpFactor[popuCn]=maxpFactor[id1];
            popuCn++;
            if (popuCn==populationSize) break;
            
            // replace w1 of dyad2 by w1 of dyad1
            tmpDyad[popuCn][0].wordID   =dyad[id1][0].wordID;       // replacement
            tmpDyad[popuCn][1].wordID   =dyad[id2][1].wordID; 
            tmpDyad[popuCn][2].wordID   =dyad[id2][2].wordID;
            tmpDyad[popuCn][0].wordGroup=dyad[id1][0].wordGroup;    // replacement
            tmpDyad[popuCn][1].wordGroup=dyad[id2][1].wordGroup; 
            tmpDyad[popuCn][2].wordGroup=dyad[id2][2].wordGroup;
            tmpmaxpFactor[popuCn]=maxpFactor[id2];
            popuCn++;
            if (popuCn==populationSize) break;
         }
         else if (rand>=1/3 && rand<2/3) {
            // replace spacer of dyad1 by spacer of dyad2
            tmpDyad[popuCn][0].wordID   =dyad[id1][0].wordID;
            tmpDyad[popuCn][1].wordID   =dyad[id2][1].wordID;       // replacement
            tmpDyad[popuCn][2].wordID   =dyad[id1][2].wordID;
            tmpDyad[popuCn][0].wordGroup=dyad[id1][0].wordGroup;
            tmpDyad[popuCn][1].wordGroup=dyad[id2][1].wordGroup;    // replacement 
            tmpDyad[popuCn][2].wordGroup=dyad[id1][2].wordGroup;
            tmpmaxpFactor[popuCn]=maxpFactor[id1];
            popuCn++;
            if (popuCn==populationSize) break;
   
            // replace spacer dyad2 by spacer of dyad1
            tmpDyad[popuCn][0].wordID   =dyad[id2][0].wordID;
            tmpDyad[popuCn][1].wordID   =dyad[id1][1].wordID;       // replacement
            tmpDyad[popuCn][2].wordID   =dyad[id2][2].wordID;      
            tmpDyad[popuCn][0].wordGroup=dyad[id2][0].wordGroup;
            tmpDyad[popuCn][1].wordGroup=dyad[id1][1].wordGroup;    // replacement 
            tmpDyad[popuCn][2].wordGroup=dyad[id2][2].wordGroup;
            tmpmaxpFactor[popuCn]=maxpFactor[id2];
            popuCn++;
            if (popuCn==populationSize) break;
         }
         else {
            // replace w2 of dyad1 by w2 of dyad2
            tmpDyad[popuCn][0].wordID   =dyad[id1][0].wordID;       
            tmpDyad[popuCn][1].wordID   =dyad[id1][1].wordID; 
            tmpDyad[popuCn][2].wordID   =dyad[id2][2].wordID;       // replacement
            tmpDyad[popuCn][0].wordGroup=dyad[id1][0].wordGroup;   
            tmpDyad[popuCn][1].wordGroup=dyad[id1][1].wordGroup; 
            tmpDyad[popuCn][2].wordGroup=dyad[id2][2].wordGroup;    // replacement
            tmpmaxpFactor[popuCn]=maxpFactor[id1];
            popuCn++;
            if (popuCn==populationSize) break;
   
            // replace w2 of dyad2 by w2 of dyad1
            tmpDyad[popuCn][0].wordID   =dyad[id2][0].wordID;       
            tmpDyad[popuCn][1].wordID   =dyad[id2][1].wordID; 
            tmpDyad[popuCn][2].wordID   =dyad[id1][2].wordID;       // replacement
            tmpDyad[popuCn][0].wordGroup=dyad[id2][0].wordGroup;
            tmpDyad[popuCn][1].wordGroup=dyad[id2][1].wordGroup; 
            tmpDyad[popuCn][2].wordGroup=dyad[id1][2].wordGroup;    // replacement
            tmpmaxpFactor[popuCn]=maxpFactor[id2];
            popuCn++;
            if (popuCn==populationSize) break;
         }
      }
      // printf("popuCn: %d\n",popuCn);
   }

   // update the population - both pwm lengths and members
   for (i=0; i<populationSize; i++) {
      for (j=0; j<3; j++) {
         dyad[i][j].wordID   =tmpDyad[i][j].wordID;
         dyad[i][j].wordGroup=tmpDyad[i][j].wordGroup;
      }
      maxpFactor[i]=tmpmaxpFactor[i];
   }

   if (tmpDyad[0])    { free(tmpDyad[0]);     tmpDyad[0]=NULL;    }
   if (tmpDyad)       { free(tmpDyad);        tmpDyad=NULL;       }
   if (tmpmaxpFactor) { free(tmpmaxpFactor);  tmpmaxpFactor=NULL; }
}
예제 #9
0
SEXP GADEM_Analysis(SEXP sequence,SEXP sizeSeq, SEXP accession, SEXP Rverbose,SEXP RnumWordGroup,SEXP RnumTop3mer,SEXP RnumTop4mer,SEXP RnumTop5mer,SEXP RnumGeneration,SEXP RpopulationSize, SEXP RpValue,SEXP ReValue,SEXP RextTrim,SEXP RminSpaceWidth,SEXP RmaxSpaceWidth,SEXP RuseChIPscore,SEXP RnumEM,SEXP RfEM, SEXP RwidthWt,SEXP RfullScan, SEXP RslideWinPWM,SEXP RstopCriterion,SEXP RnumBackgSets,SEXP RweightType,SEXP RbFileName,SEXP RListPWM,SEXP RminSites,SEXP RmaskR,SEXP Rnmotifs) 
{
  char *bFileName;
  
  SEXP ResultsGadem;
  SEXP RSpwm;
  PROTECT(ResultsGadem=NEW_LIST(100));  
  
  int increment=0;
  
  double testrand;
  
  //Number of sequences
  int numSeq = INTEGER_VALUE(sizeSeq);
  // const
//  char *Fastaheader[size];
  int incr=0;
  
  int longueur=length(sequence);
  int IncrementTemp=0;
  
  // basic settings/info
  int maxSeqLen,*seqLen;       		 // sequence info	
  double aveSeqLen;                      // sequence info
  char **seq,**rseq;
  int *geneID;         			 // sequence info
  char **oseq,**orseq;                   // copy of the original sequences
  char **sseq,**rsseq;                   // simulated seqs.
  double *bfreq1, *bfreq0=NULL;                // base frequencies
  double *ChIPScore;                     // chip score
  int maskR;				 // mask simple repeats before running the algorithm
    
  // pwms
  double ***pwm;                         // initial population of PWMs from spaced dyads
  int *pwmLen;                           // initial pwm lengths
  double **opwm2;                        // EM-derived PWM
  double ***opwm;                        // observed PWMs from identified sites
  double ***epwm;                        // em-optimized PWMs
  double **logepwm;                      // log(em-optimized PWM)
  int *pwmnewLen;                        // final motif length after extending to both ends
  
  // llr score distr.
  Pgfs *llrDist;                         // llr distribution from pgf method
  int llrDim;                            // llr distribution dimension
  int **ipwm;                            // integer pwm for computing llr score distribution
  
  // EM, motif, sites
  double pvalueCutoff;                   // user input, used to determine score cutoff based on ipwm
  int *scoreCutoff;                      // pwm score cutoff for the corresponding p-value cutoff
  double logev;                          // log of E-value of a motif;
  int useChIPscore;                      // indicator for using ChIP-seq score for seq. selection for EM
  int numEM;                             // number of EM steps
  double E_valueCutoff;                  // log E-value cutoff
  //int nsitesEM;                          // number of binding sites in sequences subjected to EM
  int minsitesEM;                        // minimal number of sites in a motif in EM sequences
  int *nsites;                           // number of binding sites in full data
  int minsites;                          // minimal number of sites in a motif in full data
  Sites **site;                          // binding sites in all sequences
  int motifCn;                           // number of motifs sought and found
  int extTrim;
  int noMotifFound;                      // none of the dyads in the population resulted in a motif
  char **pwmConsensus;                   // consensus sequences of motifs
  double pwmDistCutoff;                  // test statistic for motif pwm similarity
  char *uniqMotif;                       // motifs in a population unique or not
  int numUniq;                           // number of unique motifs in a population
  int slideWinPWM;                       // sliding window for comparing pwm similarity
  int widthWt;                           // window width in which nucleotides are given large weights for PWM optimization
  int fullScan;                          // scan scan on the original sequences or masked sequences
  
  // background
  int numBackgSets;
  
  // weights
  double **posWeight;                    // spatial weights
  int weightType;                        // four weight types 0, 1, 2, 3, or 4
  
  // words for spaced dyad
  Words *word;                           // top-ranked k-mers as the words for spaced dyads
  int numTop3mer,numTop4mer,numTop5mer;  // No. of top-ranked k-mers as words for dyads
  int maxWordSize;                       // max of the above three
  int numWordGroup;                      // number of non-zero k-mer groups
  int minSpaceWidth,maxSpaceWidth;       // min and max width of spacer of the spaced dyads
  Chrs **dyad;                           // initial population of "chromosomes"
  char **sdyad;                          // char of spaced dyads
  
  // GA
  int populationSize,numGeneration;      // GA parameters
  double maxpMutationRate;
  Fitness *fitness;                      // "chromosome" fitness
  Wheel *wheel;                          // roulette-wheel selection
  
  // to speed up only select a subset of sequences for EM algorithm
  double fEM;                            // percentage of sequences used in EM algorithm
  int numSeqEM;                          // number of sequences subject to EM
  char *Iseq;                            // Indicator if a sequence is used in EM or not
  int *emSeqLen;                         // length of sequences used in EM
  double *maxpFactor;
  
  int numCycle;                          // number of GADEM cycles
  int generationNoMotif;                 // maximal number of GA generations in a GADEM cycle resulted in no motifs
  
  // mis.
  //seed_t  seed;                          // random seed
  int motifCn2,id,numCycleNoMotif,verbose,minminSites,nmotifs;
  int startPWMfound,stopCriterion;
  char *mFileName,*oFileName,*pwmFileName,*tempRbFileName;
  time_t start;
  int cn[4],bcn[4],*seqCn,*bseqCn,avebnsites,avebnsiteSeq,totalSitesInput;
  int i; 
  int ii=0;
  int jjj=0;
  
  /*************/
  FILE * output = fopen("output.txt", "w"); 
  /*************/
  
  GetRNGstate();
  

  mFileName=alloc_char(500);         mFileName[0]='\0';
  oFileName=alloc_char(500);         oFileName[0]='\0';
  pwmFileName=alloc_char(500);       pwmFileName[0]='\0';
  bFileName=alloc_char(500);         bFileName[0]='\0';
  //tempRbFileName=alloc_char(500);    tempRbFileName[0]='\0';
  seq=NULL; aveSeqLen=0; maxSeqLen=0; 
  //minsites=-1; 
  
  startPWMfound=0;    

  maxSeqLen=0;
  for(incr=1;incr<longueur;incr=incr+2)
  { 
    if (length(STRING_ELT(sequence,(incr)))>maxSeqLen) maxSeqLen=length(STRING_ELT(sequence,(incr))); 
  }
//  fprintf(output,"maxLength=%d",maxSeqLen);
//  exit(0);
  seq=alloc_char_char(numSeq,maxSeqLen+1);
  for(incr=1;incr<longueur;incr=incr+2)
  { 
    for (int j=0; j<length(STRING_ELT(sequence,(incr))); j++)
    {
      seq[IncrementTemp][j]=CHAR(STRING_ELT(sequence,(incr)))[j];
    }
    IncrementTemp++;
  }
  
  
  verbose=LOGICAL_VALUE(Rverbose);
  numWordGroup=INTEGER_VALUE(RnumWordGroup);
  minsites=INTEGER_VALUE(RminSites);
  numTop3mer=INTEGER_VALUE(RnumTop3mer);
  numTop4mer=INTEGER_VALUE(RnumTop4mer);
  numTop5mer=INTEGER_VALUE(RnumTop5mer);
  numGeneration=INTEGER_VALUE(RnumGeneration);
  populationSize=INTEGER_VALUE(RpopulationSize);
  pvalueCutoff=NUMERIC_VALUE(RpValue);
  E_valueCutoff=NUMERIC_VALUE(ReValue);
  extTrim=INTEGER_VALUE(RextTrim);
  minSpaceWidth=INTEGER_VALUE(RminSpaceWidth);
  maxSpaceWidth=INTEGER_VALUE(RmaxSpaceWidth);
  useChIPscore=NUMERIC_VALUE(RuseChIPscore);
  numEM=INTEGER_VALUE(RnumEM);
  fEM=NUMERIC_VALUE(RfEM);
  widthWt=INTEGER_VALUE(RwidthWt);
  fullScan=INTEGER_VALUE(RfullScan);
  slideWinPWM=INTEGER_VALUE(RslideWinPWM);
  numUniq=populationSize;
  stopCriterion=INTEGER_VALUE(RstopCriterion);  
  numBackgSets=INTEGER_VALUE(RnumBackgSets);
  weightType=NUMERIC_VALUE(RweightType);
  //const char *tempRbFileName[1];

 	tempRbFileName = convertRString2Char(RbFileName);	

  //tempRbFileName[0]=CHAR(STRING_ELT(RbFileName,0));
  nmotifs = INTEGER_VALUE(Rnmotifs);
  maskR = INTEGER_VALUE(RmaskR);

  

  if(numSeq>MAX_NUM_SEQ)
  {
    error("Error: maximal number of seqences reached!\nPlease reset MAX_NUM_SEQ in gadem.h and rebuild (see installation)\n");
  }
  
  strcpy(bFileName,tempRbFileName);

  ChIPScore=alloc_double(MAX_NUM_SEQ);
  seqLen=alloc_int(MAX_NUM_SEQ); 
  geneID=alloc_int(MAX_NUM_SEQ);

//  seq=sequences;
  
//  numSeq=size;
  int len; 
  
  for (i=0; i<numSeq; i++)
  {
    len=strlen(seq[i]); 
    seqLen[i]=len;
    geneID[i]=INTEGER(accession)[i];
  }

  aveSeqLen=0; 
  for (i=0; i<numSeq; i++) aveSeqLen +=seqLen[i]; aveSeqLen /=(double)numSeq;
  
  for (i=0; i<numSeq; i++) {
    if (seqLen[i]>maxSeqLen) maxSeqLen=seqLen[i]; 
  }
  
  rseq=alloc_char_char(numSeq,maxSeqLen+1);
  oseq=alloc_char_char(numSeq,maxSeqLen+1);
  orseq=alloc_char_char(numSeq,maxSeqLen+1);
  
  for (i=0; i<numSeq; i++)
  {
    if(seqLen[i]>maxSeqLen) maxSeqLen=seqLen[i]; 
  }
  
  reverse_seq(seq,rseq,numSeq,seqLen);
  
  // make a copy of the original sequences both strands
  for (i=0; i<numSeq; i++)
  {
    for (int j=0; j<seqLen[i]; j++)
    {
      oseq[i][j]=seq[i][j];
      orseq[i][j]=rseq[i][j];
    }
    oseq[i][seqLen[i]]='\0'; orseq[i][seqLen[i]]='\0'; 
  }
    
  if (strcmp(bFileName,"NULL")!= 0)
  {
    bfreq0=alloc_double(5);
    read_background(bFileName,bfreq0);
  }

  if (GET_LENGTH(RListPWM)!= 0)
  {
    startPWMfound=1; 
  }
  else { }
  
    // check for input parameters
  if(numGeneration<1)
  { 
    error("number of generaton < 1.\n");
  }
  if(populationSize<1)
  {
    error("population size < 1.\n");
  }
  if (minSpaceWidth<0)
  { 
    error("minimal number of unspecified bases in spaced dyads <0.\n"); 
  }
  if (maxSpaceWidth<0)
  { 
    error("maximal number of unspecified bases in spaced dyads <0.\n"); 
  }
  if (minSpaceWidth>maxSpaceWidth)
  {
    error("mingap setting must <= to maxgap setting.\n\n"); 
  }
  if (maxSpaceWidth+12>MAX_PWM_LENGTH)
  {
    error("maxgap setting plus word lengths exceed <MAX_PWM_LENGTH>.\n");
  }
  if (numEM<0)
  {
    error("number of EM steps is zero.\n");
  }
  if (numEM==0)
  {
    error("number of EM steps = 0, no EM optimization is carried out.\n");
  }
  
  if (fullScan!=0 && fullScan!=1)
    fullScan=0;
  
  
  maxWordSize=0;
  if (numTop3mer>maxWordSize) maxWordSize=numTop3mer;
  if (numTop4mer>maxWordSize) maxWordSize=numTop4mer;
  if (numTop5mer>maxWordSize) maxWordSize=numTop5mer;
  
    // any one, two or three: tetramer, pentamer, hexamer
  if (numTop3mer==0 && numTop4mer==0 && numTop5mer==0)
  {
    error("maxw3, maxw4, and maxw5 all zero - no words for spaced dyads.\n");
  }
  
  // if (startPWMfound && fEM!=0.5 && fEM!=1.0 & verbose)
  // {
  //   warning("fEM argument is ignored in a seeded analysis\n");
  // }
  
  if (startPWMfound)
  {
    // if(verbose)
    // {
    //   if (populationSize!=10 && populationSize!=100) warning("pop argument is ignored in a seeded analysis, -pop is set to 10.\n");
    //   if (numGeneration!=1 && numGeneration!=5)      warning("gen argument is ignored in a seeded analysis, -gen is set to 1.\n");
    // }
    fEM=1.0;
    populationSize=FIXED_POPULATION; numGeneration=1; 
  }
  
    // number of sequences for EM
  if (fEM>1.0 || fEM<=0.0)
  { 
    error("The fraction of sequences subject to EM is %3.2f.\n",fEM);
  } 
  numSeqEM=(int)(fEM*numSeq);
  


  // memory callocations
  Iseq  =alloc_char(numSeq+1); 
  opwm2 =alloc_double_double(MAX_PWM_LENGTH,4);
  ipwm  =alloc_int_int(MAX_PWM_LENGTH,4);
  logepwm=alloc_double_double(MAX_PWM_LENGTH,4);
  emSeqLen=alloc_int(numSeqEM);
  scoreCutoff=alloc_int(1000);
  // scoreCutoff=alloc_int(populationSize);
  llrDist=alloc_distr(MAX_DIMENSION);
  posWeight=alloc_double_double(numSeq,maxSeqLen);
  sseq=alloc_char_char(MAX_NUM_SEQ,maxSeqLen+1);
  rsseq=alloc_char_char(MAX_NUM_SEQ,maxSeqLen+1);
  bfreq1=base_frequency(numSeq,seq,seqLen);

  if (strcmp(bFileName,"NULL") == 0)
  {
    bfreq0=alloc_double(5);
    for (i=0; i<4; i++)
      {
	bfreq0[i]=bfreq1[i];
      }
  }
  

  // if minN not specified, set the defaults accordingly
  if (minsites==-1) 
  {
    minsites =max(2,(int)(numSeq/20)); 
  }
  minsitesEM=(int)(fEM*minsites);
  
  maxpMutationRate=MAXP_MUTATION_RATE;
  
  // determine the distribution and critical cut point
  pwmDistCutoff=vector_similarity();
  
  /*---------- select a subset of sequences for EM only --------------*/
  if (useChIPscore==1)
  {
    select_high_scoring_seq_for_EM (ChIPScore,numSeq,numSeqEM,Iseq,fEM);
  }
  else
  {
    sample_without_replacement(Iseq,numSeqEM,numSeq);
  }
  /*-------------------- end of selection --------------------------*/
  
  if (maskR==1) mask_repetitive(geneID,seq,numSeq,seqLen,mFileName);

  if (widthWt<20)
  {
    warning("The window width of sequence centered on the nucleotides having large weights in EM for PWM optimization is small\n Motif longer than %d will not be discovered\n",widthWt);
  }
  
  time(&start);
  
    // if (weightType==1 || weightType==3) 
    //ffprintf(output,fp,"window width of sequence centered on the nucleotides having large weights for PWM optimization: %d\n",widthWt);
    //ffprintf(output,fp,"pwm score p-value cutoff for declaring binding site:\t%e\n",pvalueCutoff);
  
  if(verbose)
  {
    ffprintf(output,output,"==============================================================================================\n");
    ffprintf(output,output,"input sequence file:  %s\n",mFileName);
    fprintf(output,"number of sequences and average length:\t\t\t\t%d %5.1f\n",numSeq,aveSeqLen);
    
    fprintf(output,"Use pgf method to approximate llr null distribution\n");
    fprintf(output,"parameters estimated from sequences in:  %s\n\n",mFileName);

    if (weightType!=0) 
      fprintf(output,"non-uniform weight applies to each sequence - type:\t\t%d\n",weightType);
    fprintf(output,"number of GA generations & population size:\t\t\t%d %d\n\n",numGeneration,populationSize);
    fprintf(output,"PWM score p-value cutoff for binding site declaration:\t\t%e\n",pvalueCutoff);
    fprintf(output,"ln(E-value) cutoff for motif declaration:\t\t\t%f\n\n",E_valueCutoff);
//    fprintf(output,"number (percentage) of sequences selected for EM:\t\t%d(%4.1f\%)\n",numSeqEM,100.0*(double)numSeqEM/(double)numSeq);
    fprintf(output,"number of EM steps:\t\t\t\t\t\t%d\n",numEM);
    fprintf(output,"minimal no. sites considered for a motif:\t\t\t%d\n\n",minsites);
    fprintf(output,"[a,c,g,t] frequencies in input data:\t\t\t\t%f %f %f %f\n",bfreq1[0],bfreq1[1],bfreq1[2],bfreq1[3]);
    fprintf(output,"==============================================================================================\n");
  }
  
  // if (pgf) 
  // {
  //   if (userMarkovOrder!=0 & verbose) 
  //   {
  //     warning("The user-specified background Markov order (%d) is ignored when -pgf is set to 1\n",userMarkovOrder);
  //   }
  //   if (bFileName[0]!='\0' & verbose)
  //   {
  //     warning("The user-specified background models: %s are not used when -pgf is set to 1\n",bFileName);
  //   }
  // }
  // if (startPWMfound && fEM!=1.0  & verbose)
  // {
  //   warning("fEM argument is ignored in a seeded analysis\n");
  // }
  
    // determine seq length by counting only [a,c,g,t], seqLen is used in E-value calculation
    // determine the distribution and critical cut point
  pwmDistCutoff=vector_similarity();
  
  if      (weightType==0) assign_weight_uniform(seqLen,numSeq,posWeight);
  else if (weightType==1) assign_weight_triangular(seqLen,numSeq,posWeight);
  else if (weightType==2) assign_weight_normal(seqLen,numSeq,posWeight);
  else
  {
    error("Motif prior probability type not found - please choose: 0, 1, or 2\n");
    // fprintf(output,"Consider: -posWt 1 for strong central enrichment as in ChIP-seq\n");
    // fprintf(output,"          -posWt 0 for others\n\n");
    // exit(0);
  }
  /*    if (startPWMfound) minminSites=minsites;
   else               minminSites=(int)(0.40*minsitesEM);*/
  
  motifCn=0; noMotifFound=0; numCycle=0; numCycleNoMotif=0; 
  int compt=0;
  int lengthList=GET_LENGTH(RListPWM);
 
    /****************************************/ 
    broadcastOnce(maxSeqLen, numEM, startPWMfound, minminSites, maxpFactor, numSeq, numSeqEM, Iseq, bfreq0, posWeight, weightType, pvalueCutoff, emSeqLen, populationSize);
    /****************************************/ 

  do
  {
    if(!startPWMfound)
    {
      
      if(verbose)
      {
        fprintf(output,"*** Running an unseeded analysis ***\n");
        // fprintf(output,"\n|------------------------------------------------------------------|\n");
        // fprintf(output,"|                                                                  |\n");
        // fprintf(output,"|              *** Running an unseeded analysis ***                |\n");
        // fprintf(output,"|                                                                  |\n");
        // fprintf(output,"|------------------------------------------------------------------|\n\n");
      }
      populationSize=INTEGER_VALUE(RpopulationSize);
      numGeneration=INTEGER_VALUE(RnumGeneration);
      dyad  =alloc_chrs(populationSize,4);
      wheel =alloc_wheel(populationSize);
      fitness=alloc_fitness(populationSize);
      maxpFactor=alloc_double(populationSize);
      uniqMotif=alloc_char(populationSize+1);
      opwm  =alloc_double_double_double(populationSize,MAX_PWM_LENGTH,4);
      epwm=alloc_double_double_double(populationSize,MAX_PWM_LENGTH,4);
      pwmConsensus=alloc_char_char(populationSize,MAX_PWM_LENGTH+1);
      pwm   =alloc_double_double_double(populationSize,MAX_PWM_LENGTH,4);
      pwmLen=alloc_int(populationSize);
      sdyad =alloc_char_char(populationSize,MAX_PWM_LENGTH+1);
      word  =alloc_word(numWordGroup,maxWordSize);
      minminSites=(int)(0.40*minsitesEM);

        // identify top-ranked k-mers (k=3,4,5) for spaced dyads
      if(verbose)
        fprintf(output,"GADEM cycle %2d: enumerate and count k-mers... ",numCycle+1);
        
      numWordGroup=word_for_dyad(word,seq,rseq,numSeq,seqLen,bfreq1,&numTop3mer,&numTop4mer,&numTop5mer);
      
      if(verbose)
        fprintf(output,"Done.\n");
      
        // generating a "population" of spaced dyads
      if(verbose)
        fprintf(output,"Initializing GA... ");

      initialisation(dyad,populationSize,numWordGroup,word,minSpaceWidth,maxSpaceWidth,maxpFactor);
      if(verbose)
        fprintf(output,"Done.\n");
      
    }
    else
    {
      if(verbose)
      {
        fprintf(output,"*** Running an seeded analysis ***\n");
        // fprintf(output,"\n|------------------------------------------------------------------|\n");
        // fprintf(output,"|                                                                  |\n");
        // fprintf(output,"|               *** Running a seeded analysis ***                  |\n");
        // fprintf(output,"|                                                                  |\n");
        // fprintf(output,"|------------------------------------------------------------------|\n\n");
      }
      populationSize=FIXED_POPULATION; 
      dyad  =alloc_chrs(populationSize,4);
      pwm=alloc_double_double_double(populationSize,MAX_PWM_LENGTH,4);
      pwmLen=alloc_int(populationSize);
      maxpFactor=alloc_double(populationSize);
      uniqMotif=alloc_char(populationSize+1);
      opwm  =alloc_double_double_double(populationSize,MAX_PWM_LENGTH,4);
      epwm=alloc_double_double_double(populationSize,MAX_PWM_LENGTH,4);
      pwmConsensus=alloc_char_char(populationSize,MAX_PWM_LENGTH+1);
      sdyad =alloc_char_char(populationSize,MAX_PWM_LENGTH+1);
      word  =alloc_word(numWordGroup,maxWordSize);
      wheel =alloc_wheel(populationSize);
      fitness=alloc_fitness(populationSize);
      minminSites=minsites;
      int lengthMatrix;
      
      lengthMatrix=GET_LENGTH(VECTOR_ELT(RListPWM,compt));
      RSpwm=allocMatrix(REALSXP,4,(lengthMatrix/4));
      RSpwm=VECTOR_ELT(RListPWM,compt);
      
      
      pwmLen[0]=read_pwm0(RSpwm,pwm[0],lengthMatrix);
      
      for(i=1; i<populationSize; i++)
      {
        for (int j=0; j<pwmLen[0]; j++)
        {
          for (int k=0; k<4; k++)
          {
            pwm[i][j][k]=pwm[0][j][k];
          }
        }
        pwmLen[i]=pwmLen[0];
      }
      for (i=0; i<populationSize; i++)
      {
        maxpFactor[i]=FIXED_MAXPF*(i+1);
        standardize_pwm(pwm[i],pwmLen[i]);
        consensus_pwm(pwm[i],pwmLen[i],pwmConsensus[i]);
        strcpy(sdyad[i],pwmConsensus[i]);
      }
    }
    generationNoMotif=0;
    
    for (jjj=0; jjj<numGeneration; jjj++)
    {
        // convert spaced dyads to letter probability matrix
      if (!startPWMfound)
      {
        dyad_to_pwm(word,populationSize,dyad,pwm,pwmLen);
      }

    /*
      DO_APPLY(populationCalculation(maxSeqLen, numEM, fitness+ii, 
                                     startPWMfound, minminSites, maxpFactor[ii], 
                                     numSeq, numSeqEM, seq, rseq, seqLen, Iseq, 
                                     bfreq0, posWeight, weightType, 
                                     pvalueCutoff, emSeqLen, 
                                     pwm[ii], pwmLen[ii], epwm[ii], opwm[ii], 
                                     pwmConsensus[ii], scoreCutoff+ii, sdyad[ii], ii),
               populationSize, ii);
    */
      
     /* Create the structure to send to all the other slaves  */ 
      
      broadcastEveryCycle(Iseq, pwm, pwmLen, pwmConsensus, scoreCutoff, sdyad, populationSize);

      populationCalculation(maxSeqLen, numEM, fitness+ii, 
                                     startPWMfound, minminSites, maxpFactor[ii], 
                                     numSeq, numSeqEM, seq, rseq, seqLen, Iseq, 
                                     bfreq0, posWeight, weightType, 
                                     pvalueCutoff, emSeqLen, 
                                     pwm[ii], pwmLen[ii], epwm[ii], opwm[ii], 
                                     pwmConsensus[ii], scoreCutoff+ii, sdyad[ii], ii);

    /* Receive the analyzed data from all the other slaves and compile them */
    //getPopCalcResults(...);


      // for (i=0; i<5; i++)
      // {
      //   fprintf(output,"fitness.value=%lf\n",fitness[i].value);
      //   fprintf(output,"fitness.index=%d\n",fitness[i].index);
      //   fprintf(output,"maxpfactor=%lf\n",maxpFactor[i]);
      //   fprintf(output,"scoreCutoff=%d\n",scoreCutoff[i]);
      //   fprintf(output,"   spacedDyad: %s\n",sdyad[i]);
      //   
      //   for (l=0; l<pwmLen[i]; l++)
      //   {
      //     for (m=0; m<4; m++) 
      //     { 
      //       fprintf(output,"opwm[%d][%d][%d]=%lf ",i,l,m,opwm[i][l][m]);
      //       fprintf(output,"epwm[%d][%d][%d]=%lf ",i,l,m,epwm[i][l][m]);
      //       fprintf(output,"pwm[%d][%d][%d]=%lf ",i,l,m,pwm[i][l][m]);
      //     }
      //     fprintf(output,"\n");
      //   }
      //   fprintf(output,"\n");
      // }
      // 
      // testrand=runif(0,1);
      // fprintf(output,"testrand1=%lf\n",testrand);
      
      if (populationSize>1)
      {
        sort_fitness(fitness,populationSize);
      }



      // for (i=0; i<5; i++)
      // {
      //   fprintf(output,"fitness.value=%lf\n",fitness[i].value);
      //   fprintf(output,"fitness.index=%d\n",fitness[i].index);
      // }
      numUniq=check_pwm_uniqueness_dist(opwm, pwmLen,
                                        populationSize, fitness,
                                        pwmDistCutoff, E_valueCutoff,
                                        uniqMotif, slideWinPWM);


      // for (i=0; i<5; i++)
      // {
      //   fprintf(output,"fitness.value=%lf\n",fitness[i].value);
      //   fprintf(output,"fitness.index=%d\n",fitness[i].index);
      //   fprintf(output,"maxpfactor=%lf\n",maxpFactor[i]);
      //   fprintf(output,"scoreCutoff=%d\n",scoreCutoff[i]);
      //   fprintf(output,"   spacedDyad: %s\n",sdyad[i]);
      //   
      //   for (l=0; l<pwmLen[i]; l++)
      //   {
      //     for (m=0; m<4; m++) 
      //     { 
      //       fprintf(output,"opwm[%d][%d][%d]=%lf",i,l,m,opwm[i][l][m]); 
      //     }
      //     fprintf(output,"\n");
      //   }
      //   fprintf(output,"\n");
      // }
      
      if(verbose)
      {
        fprintf(output,"GADEM cycle[%3d] generation[%3d] number of unique motif: %d\n",numCycle+1,jjj+1,numUniq);
        for (i=0; i<populationSize; i++)
        {
          if (uniqMotif[i]=='1')
          {
            fprintf(output,"   spacedDyad: %s ",sdyad[fitness[i].index]);
            for (int j=strlen(sdyad[fitness[i].index]); j<maxSpaceWidth+10; j++) fprintf(output," ");
            fprintf(output,"motifConsensus: %s ",pwmConsensus[fitness[i].index]);
            for (int j=strlen(sdyad[fitness[i].index]); j<maxSpaceWidth+10; j++) fprintf(output," ");
            fprintf(output," %3.2f fitness: %7.2f\n",maxpFactor[fitness[i].index],fitness[i].value);
          }
        }
        fprintf(output,"\n");
      }


      if (jjj<numGeneration-1)
      {
        // fitness based selection with replacement
        roulett_wheel_fitness(fitness,populationSize,wheel);
        // mutation and crossover operations
        if (populationSize>1)
        {
          testrand=runif(0,1);
          if (testrand>=0.5)
          {
            mutation(dyad,numWordGroup,word,minSpaceWidth,maxSpaceWidth,wheel,populationSize,fitness,uniqMotif,
                      maxpFactor,maxpMutationRate); 
          }
          else
          {
            crossover(dyad,numWordGroup,word,minSpaceWidth,maxSpaceWidth,wheel,populationSize,fitness,uniqMotif, maxpFactor,maxpMutationRate); 
          }
        }
        else
        {
          mutation(dyad,numWordGroup,word,minSpaceWidth,maxSpaceWidth,wheel,populationSize,fitness,uniqMotif, maxpFactor,maxpMutationRate);
        }
      }
    }

    if((numCycle+1)< lengthList)
    {
      compt++;
    }
    else
    {
      startPWMfound=0;
    }
    numCycle++;


    site=alloc_site_site(numUniq+1,MAX_SITES);
    nsites=alloc_int(numUniq+1);
    pwmnewLen=alloc_int(numUniq+1); // after base extension and trimming
    seqCn=alloc_int(MAX_NUM_SEQ);
    bseqCn=alloc_int(MAX_NUM_SEQ);

    // final step user-specified background model is used
    motifCn2=0; // motifCn per GADEM cycle
    for (ii=0; ii<populationSize; ii++) 
    {

      id=fitness[ii].index;
      if(uniqMotif[ii]=='0')
      {
        continue;
      }


      // approximate the exact llr distribution using Staden's method
      // if(verbose)
      // {
      //   fprintf(output,"Approximate the exact pwm llr score distribution using the pgf method.\n");
      // }
      log_ratio_to_int(epwm[id],ipwm,pwmLen[id],bfreq0);

        // compute score distribution of the (int)PWM using Staden's method
      llrDim=pwm_score_dist(ipwm,pwmLen[id],llrDist,bfreq0);

        //fprintf(output,"Avant ScoreCutoff %d \n",scoreCutoff[id]);
      scoreCutoff[id]=determine_cutoff(llrDist,llrDim,pvalueCutoff);
        //fprintf(output,"Apres ScoreCutoff %d \n",scoreCutoff[id]);
        
      if(fullScan)
      {
        nsites[motifCn2]=scan_llr_pgf(llrDist,llrDim,site[motifCn2],numSeq,oseq,orseq,seqLen,ipwm,pwmLen[id],scoreCutoff[id],bfreq0);
      }
      else
      {
        nsites[motifCn2]=scan_llr_pgf(llrDist,llrDim,site[motifCn2],numSeq,seq,rseq,seqLen,ipwm,pwmLen[id],scoreCutoff[id],bfreq0);
      }
      if (nsites[motifCn2]>=max(2,minsites))
      {
      for (int j=0; j<numSeq; j++) seqCn[j]=0;
        for (int j=0; j<nsites[motifCn2]; j++) seqCn[site[motifCn2][j].seq]++;
        
        for (int j=0; j<4; j++) cn[j]=0;
        for (int j=0; j<numSeq; j++)
        {
          if (seqCn[j]==0) cn[0]++;
          if (seqCn[j]==1) cn[1]++;
          if (seqCn[j]==2) cn[2]++;
          if (seqCn[j]>2)  cn[3]++;
        }
        totalSitesInput=nsites[motifCn2];
        if (extTrim)
        {
          if (fullScan)
          {
            extend_alignment(site[motifCn2],numSeq,oseq,orseq,seqLen,nsites[motifCn2],pwmLen[id],&(pwmnewLen[motifCn2]));
          }
          else
          {
            extend_alignment(site[motifCn2],numSeq,seq,rseq,seqLen,nsites[motifCn2],pwmLen[id],&(pwmnewLen[motifCn2]));
          }
        }
        else
        { 
          pwmnewLen[motifCn2]=pwmLen[id];
        } 

        if (fullScan)
        {
          align_sites_count(site[motifCn2],oseq,orseq,nsites[motifCn2],pwmnewLen[motifCn2],opwm2);
        }
        else
        {
          align_sites_count(site[motifCn2],seq,rseq,nsites[motifCn2],pwmnewLen[motifCn2],opwm2);
        }
        standardize_pwm(opwm2,pwmnewLen[motifCn2]);
        logev=E_value(opwm2,nsites[motifCn2],bfreq0,pwmnewLen[motifCn2],numSeq,seqLen);

        if (logev<=E_valueCutoff)
        {
          consensus_pwm(opwm2,pwmnewLen[motifCn2],pwmConsensus[id]);
          if (fullScan)
          {
            SET_VECTOR_ELT(ResultsGadem,increment,print_result_R(site[motifCn2],nsites[motifCn2],numSeq,oseq,orseq,seqLen,logev,opwm2,pwmnewLen[motifCn2],motifCn+1,sdyad[id],pwmConsensus[id],numCycle,pvalueCutoff,maxpFactor[id],geneID));
            increment++;           
            print_motif(site[motifCn2],nsites[motifCn2],oseq,orseq,seqLen,pwmnewLen[motifCn2],motifCn+1,opwm2);
          }
          else
          {
            SET_VECTOR_ELT(ResultsGadem,increment,print_result_R(site[motifCn2],nsites[motifCn2],numSeq,seq,rseq,seqLen,logev,opwm2,pwmnewLen[motifCn2],
                                                                 motifCn+1,sdyad[id],pwmConsensus[id],numCycle,pvalueCutoff,maxpFactor[id],geneID));
            increment++;
            print_motif(site[motifCn2],nsites[motifCn2],seq,rseq,seqLen,pwmnewLen[motifCn2],motifCn+1,opwm2);
          }

          mask_sites(nsites[motifCn2],seq,rseq,seqLen,site[motifCn2],pwmnewLen[motifCn2]);

          /* ----------------------compute the average number of sites in background sequences ----------------------*/
          avebnsites=0; avebnsiteSeq=0;
          for (i=0; i<numBackgSets; i++)
          {
            simulate_background_seq(bfreq0,numSeq,seqLen,sseq);
            reverse_seq(sseq,rsseq,numSeq,seqLen);

            nsites[motifCn2]=scan_llr_pgf(llrDist,llrDim,site[motifCn2],numSeq,sseq,rsseq,seqLen,ipwm,pwmLen[id],scoreCutoff[id],bfreq0);
            
            for (int j=0; j<numSeq; j++) bseqCn[j]=0;
            for (int j=0; j<nsites[motifCn2]; j++) bseqCn[site[motifCn2][j].seq]++;
            
            for (int j=0; j<4; j++) bcn[j]=0;
            for (int j=0; j<numSeq; j++)
            {
              if (bseqCn[j]==0) bcn[0]++;
              if (bseqCn[j]==1) bcn[1]++;
              if (bseqCn[j]==2) bcn[2]++;
              if (bseqCn[j]>2)  bcn[3]++;
            }
              //ffprintf(output,fq,"background set[%2d] Seqs with 0,1,2,>2 sites: %d %d %d %d\n",i+1,bcn[0],bcn[1],bcn[2],bcn[3]);
            avebnsites+=nsites[motifCn2]; avebnsiteSeq+=(numSeq-bcn[0]);
          } 
          avebnsites/=numBackgSets; avebnsiteSeq/=numBackgSets;
          /* -----------------end compute the average number of sites in background sequences ----------------------*/
          motifCn++; motifCn2++; 

			//if((numCycle+1) > lengthList & fixSeeded)
			//	{	
			//	  numCycleNoMotif=1;
			//		startPWMfound=1;
			//		} else {
					numCycleNoMotif=0;
			//	}

        }
      }
    }
    
    /* for (int i=0; i<motifCn2; i++)
    {
      mask_sites(nsites[i],seq,rseq,seqLen,site[i],pwmnewLen[i]); 
    } */
    
    if (site[0])
    { 
      free(site[0]);
      site[0]=NULL;
    }
    if (site)
    {
      free(site);
      site=NULL;
    }
    if (nsites)
    {
      free(nsites);
      nsites=NULL;
    }
    if (pwmnewLen) 
    {
      free(pwmnewLen);
      pwmnewLen=NULL;
    }
    
    if (motifCn2==0)
      numCycleNoMotif++;   
    if (motifCn==nmotifs)
      {
	fprintf(output,"Maximal number of motifs (%d) reached\n",nmotifs);
	break;
      }
    if (numCycleNoMotif==stopCriterion)
      noMotifFound=1;
  }while (!noMotifFound);
  
  
    // fclose(fp);
  /*if (!startPWMfound) {  
   if (dyad[0])      { free(dyad[0]);         dyad[0]=NULL;    }
   if (dyad)         { free(dyad);            dyad=NULL;       }
   }*/
  if (seqLen)
  { 
    free(seqLen);
    seqLen=NULL;
  }
  if (pwm[0][0])       
  {
    free(pwm[0][0]);
    pwm[0][0]=NULL; 
  }
  if (pwm[0])
  { 
    free(pwm[0]);
    pwm[0]=NULL;     
  }
  if (pwm)             
  {
    free(pwm); 
    pwm=NULL;        
  }
  if (opwm2[0])  
  { 
    free(opwm2[0]); 
    opwm2[0]=NULL;
  }
  if (opwm2)     
  {
    free(opwm2); 
    opwm2=NULL;
  }
  if (opwm[0][0])      
  { 
    free(opwm[0][0]);
    opwm[0][0]=NULL;
  }
  if (opwm[0])    
  {
    free(opwm[0]);
    opwm[0]=NULL;
  }
  if (opwm)       
  {
    free(opwm);
    opwm=NULL;
  }
  if(ipwm[0])
  { 
    free(ipwm[0]);     
    ipwm[0]=NULL;  
  }
  if (ipwm)
  {
    free(ipwm);   
    ipwm=NULL;
  }
  if (pwmLen)   
  { 
    free(pwmLen);    
    pwmLen=NULL; 
  }
  if (seq[0])          { free(seq[0]);          seq[0]=NULL;     }
  if (seq)             { free(seq);             seq=NULL;        }
    //  if (rseq[0])         { free(rseq[0]);         rseq[0]=NULL;    }
    // if (rseq)            { free(rseq);            rseq=NULL;       }
    // if (oseq[0])         { free(oseq[0]);         oseq[0]=NULL;    }
    // if (oseq)            { free(oseq);            oseq=NULL;       }
    // if (orseq[0])        { free(orseq[0]);        orseq[0]=NULL;   }
    // if (orseq)           { free(orseq);           orseq=NULL;      }
  if (bfreq1)    
  { 
    free(bfreq1);    
    bfreq1=NULL;  
  }
  if (bfreq0)
  {
    free(bfreq0);
    bfreq0=NULL;
  }

  if (wheel)    
  { 
    free(wheel);    
    wheel=NULL;    
  }
  if (fitness)    
  { 
    free(fitness); 
    fitness=NULL;
  }
  if (mFileName)  
  { 
    free(mFileName);    
    mFileName=NULL; 
  }
  if (oFileName)    
  { 
    free(oFileName);  
    oFileName=NULL;
  }
  if (pwmFileName)    
  {
    free(pwmFileName);
    pwmFileName=NULL;
  }
  if (sdyad[0]) 
  { 
    free(sdyad[0]); 
    sdyad[0]=NULL;
  }
  if (sdyad)    
  {
    free(sdyad);
    sdyad=NULL;
  }
  if (pwmConsensus[0])
  { 
    free(pwmConsensus[0]);
    pwmConsensus[0]=NULL;
  }
  if (pwmConsensus)   
  {
    free(pwmConsensus);
    pwmConsensus=NULL;
  }
  //if (!startPWMfound && word) destroy_word(word,numWordGroup);

  PutRNGstate();
  UNPROTECT(1);
  return(ResultsGadem);
}
예제 #10
0
void select_high_scoring_seq_for_EM (double *ChIPScore,int numSeq,int numSeqEM,char *Iseq,double fEM) {
  
  register int i;
  int numSeqWithQualityScore,numSeqEMtmp1,numSeqEMtmp2;
  double *tmpScore;
  double ChIPscoreCutoff;
  
  tmpScore=alloc_double(numSeq);
  
  numSeqWithQualityScore=0;
  for (i=0; i<numSeq; i++)
  {
    if (ChIPScore[i]>0) numSeqWithQualityScore++;
  }
  
  tmpScore=alloc_double(numSeq);
  for (i=0; i<numSeq; i++) tmpScore[i]=ChIPScore[i];
  sort_double(tmpScore,numSeq);
  
  ChIPscoreCutoff=tmpScore[(int)(fEM*numSeq)];
  
  if (numSeqWithQualityScore<=(int)(fEM*numSeq))
  {
    for (i=0; i<numSeq; i++) Iseq[i]='0';
    numSeqEMtmp1=0;
    for (i=0; i<numSeq; i++)
    {
      if (ChIPScore[i]>0)
      {
        Iseq[i]='1'; numSeqEMtmp1++;
      }
    }
    numSeqEMtmp2=0;
    for (i=0; i<numSeq; i++)
    {
      if (ChIPScore[i]<=0)
      {
        Iseq[i]='1'; numSeqEMtmp2++;
        if (numSeqEMtmp1+numSeqEMtmp2==numSeqEM) break;
      }
    }
  }
  else
  {
    for (i=0; i<numSeq; i++) Iseq[i]='0';
    numSeqEMtmp1=0; numSeqEMtmp2=0;
    for (i=0; i<numSeq; i++) {
      if (ChIPScore[i]>=ChIPscoreCutoff)
      {
        Iseq[i]='1'; numSeqEMtmp1++;
        if (numSeqEMtmp1==numSeqEM) break;
      }
    }
  }
  if (tmpScore) 
  { 
    free(tmpScore);  
    tmpScore=NULL;  
  }
  if (ChIPScore) 
  { 
    free(ChIPScore);
    ChIPScore=NULL; 
  }
  
}
예제 #11
0
파일: evresp.c 프로젝트: gthompson/obspy
int evresp_(char *sta, char *cha, char *net, char *locid, char *datime,
	    char *units, char *file, float *freqs, int *nfreqs_in, float *resp,
	    char *rtype, char *verbose, int *start_stage, int *stop_stage,
	    int *stdio_flag, int lsta, int lcha, int lnet, int llocid,
	    int ldatime, int lunits, int lfile, int lrtype, int lverbose, int useTotalSensitivityFlag)
{
  struct response *first = (struct response *)NULL;
  double *dfreqs;
  int i,j, nfreqs, start, stop, flag;

  /* add null characters to end of input string arguments (remove trailing
     spaces first */

  add_null(sta, lsta-1, 'a');
  add_null(cha, lcha-1, 'a');
  add_null(net, lnet-1, 'a');
  add_null(locid, llocid-1, 'a');
  add_null(datime, ldatime-1, 'a');
  add_null(units, lunits-1, 'a');
  add_null(file, lfile-1, 'a');
  add_null(rtype, lrtype-1, 'a');
  add_null(verbose, lverbose-1, 'a');

  nfreqs = *nfreqs_in;
  start = *start_stage;
  stop = *stop_stage;
  flag = *stdio_flag;

  dfreqs = alloc_double(nfreqs);
  for(i = 0; i < nfreqs; i++)
    dfreqs[i] = freqs[i];

  /* then call evresp */

  first = evresp(sta, cha, net, locid, datime, units, file, dfreqs, nfreqs,
             rtype, verbose, start, stop, flag, useTotalSensitivityFlag);

  /* free up the frequency vector */

  free(dfreqs);

  /* check the output.  If no response found, return 1, else if more than one response
     found, return -1 */

  if(first == (struct response *)NULL) {
    return(1);
  }
  else if(first->next != (struct response *)NULL) {
    free_response(first);
    return(-1);
  }

  /* if only one response found, convert from complex output vector into multiplexed
     real output for FORTRAN (real1, imag1, real2, imag2, ..., realN, imagN) */

  for(i = 0, j = 0; i < nfreqs; i++) {
    resp[j++] = (float) first->rvec[i].real;
    resp[j++] = (float) first->rvec[i].imag;
  }

  /* free up dynamically allocated space */

  free_response(first);

  /* and return to FORTRAN program */

  return(0);

}
예제 #12
0
파일: supervised.c 프로젝트: LibOPF/LibOPF
// Find prototypes by the MST approach
static void
mst_prototypes (struct opf_graph * sg)
{
  int p, q;
  double weight;
  struct real_heap *Q = NULL;
  double *path_val = NULL;
  int pred;
  int nproto;

  // initialization
  path_val = alloc_double (sg->node_n);
  Q = real_heap_create (sg->node_n, path_val);

  for (p = 0; p < sg->node_n; p++)
    {
      path_val[p] = DBL_MAX;
      sg->node[p].status = STATUS_NOTHING;
    }

  path_val[0] = 0;
  sg->node[0].pred = NIL;
  real_heap_insert (Q, 0);

  nproto = 0;

  // Prim's algorithm for Minimum Spanning Tree
  while (!real_heap_is_empty (Q))
    {
      real_heap_remove (Q, &p);
      assert (p >= 0 && p < sg->node_n);

      sg->node[p].path_val = path_val[p];

      pred = sg->node[p].pred;
      if (pred != NIL)
        if (sg->node[p].label_true != sg->node[pred].label_true)
          {
            if (sg->node[p].status != STATUS_PROTOTYPE)
              {
                sg->node[p].status = STATUS_PROTOTYPE;
                nproto++;
              }
            if (sg->node[pred].status != STATUS_PROTOTYPE)
              {
                sg->node[pred].status = STATUS_PROTOTYPE;
                nproto++;
              }
          }

      for (q = 0; q < sg->node_n; q++)
        {
          if (Q->color[q] != COLOR_BLACK)
            {
              if (p != q)
                {
                  weight = opf_graph_get_distance (sg, &sg->node[p], &sg->node[q]);

                  if (weight < path_val[q])
                    {
                      sg->node[q].pred = p;
                      real_heap_update (Q, q, weight);
                    }
                }
            }
        }
    }
  real_heap_destroy (&Q);
  free (path_val);

  /* the algorithm will work even if there
     is just one class in the training set */
  if (nproto == 0)
    sg->node[0].status = STATUS_PROTOTYPE;
}
예제 #13
0
파일: supervised.c 프로젝트: LibOPF/LibOPF
void
opf_supervised_train (struct opf_graph * sg)
{
  int p, q, i;
  double tmp, weight;
  struct real_heap *Q = NULL;
  double *path_val = NULL;

  // compute optimum prototypes
  mst_prototypes (sg);

  // initialization
  path_val = alloc_double (sg->node_n);

  Q = real_heap_create (sg->node_n, path_val);

  for (p = 0; p < sg->node_n; p++)
    {
      if (sg->node[p].status == STATUS_PROTOTYPE)
        {
          sg->node[p].pred = NIL;
          path_val[p] = 0;
          sg->node[p].label = sg->node[p].label_true;
          real_heap_insert (Q, p);
        }
      else // non-prototypes
        {
          path_val[p] = DBL_MAX;
        }
    }

  // IFT with fmax
  i = 0;
  while (!real_heap_is_empty (Q))
    {
      real_heap_remove (Q, &p);
      assert (p >= 0 && p < sg->node_n);

      sg->ordered_list_of_nodes[i] = p;

      i++;
      sg->node[p].path_val = path_val[p];

      for (q = 0; q < sg->node_n; q++)
        {
          if (p != q)
            {
              if (path_val[p] < path_val[q])
                {
                  weight = opf_graph_get_distance (sg, &sg->node[p], &sg->node[q]);

                  tmp = MAX (path_val[p], weight);
                  if (tmp < path_val[q])
                    {
                      sg->node[q].pred = p;
                      sg->node[q].label = sg->node[p].label;
                      real_heap_update (Q, q, tmp);
                    }
                }
            }
        }
    }

  real_heap_destroy (&Q);
  free (path_val);
}