Пример #1
0
int findspans ( int start, int end, char *seq, char *seqname )
{
  int i ;
  int sc = 0 ;
  int lsc = 0 ; 
  int imn = -1 ;  /* else sequences which start with pattern reported badly */
  int imx = 0 ;
  int mx = 0 ;

  int ncpg, ngpc, ngc ;  

  i = start ;
  while ( i < end )  
    {
      lsc = sc ;
      sc = ( end-1-i && seq[i]=='C' && seq[i+1]=='G' ) ? sc += CPGSCORE : --sc ;
      sc = sc < 0 ? 0 : sc ;
/*      printf("%d \t %d \t%d \t %d \t%d \t%d\n", i, sc, lsc, imn, imx, mx) ; */
      if ( sc == 0 && lsc )  /* should threshold here */
	{
	  /* imn+1 is the start of the match. 
	     imx+1 is the end of the match, given pattern-length=2.
	     fetchdb using reported co-ordinates will return a sequence
	     which both starts and ends with a complete pattern.
	     Further +1 adjusts so that start of sequence = 1 
	  */

	  getstats ( imn+1, imx+2, seq, seqname, &ncpg, &ngpc, &ngc ) ;
	  printf("%s\t %d\t %d\t %d\t CpG: %d\t %.1f\t %.1f\n", seqname, imn+2, imx+2, mx, ncpg, ngc*100.0/(imx+1-imn), 1.0*ncpg/ngpc) ; 
	  
/* 	  printf("%s \t %d\t %d\t %d \n", seqname, imn+2, imx+2, mx ) ; 
  */
	  /* Recursive call searches from one position after the end of the 
	     last match to the current position */
	  findspans( imx+2, i, seq, seqname ) ;
	  sc = lsc = imn = imx =  mx = 0 ;
	}
      imx = sc > mx ? i : imx ;
      mx = sc > mx ? sc : mx ;
      imn = sc == 0 ? i : imn ;
      ++i ;
    }
  if ( sc != 0 )  /* should threshold here */
    {
/*      printf("%d \t %d \t%d \t %d \t%d \t%d\n", i, sc, lsc, imn, imx, mx) ;  */
      
      getstats ( imn+1, imx+2, seq, seqname, &ncpg, &ngpc, &ngc ) ;
      printf("%s\t %d\t %d\t %d\t CpG: %d\t %.1f\t %.2f\n", seqname, imn+2, imx+2, mx, ncpg, ngc*100.0/(imx+1-imn), 1.0*ncpg/ngpc) ; 
      
/*      printf("%s \t %d\t %d\t %d \n", seqname, imn+2, imx+2, mx ) ; 
   */
      findspans( imx+2, end, seq, seqname ) ;
    }
}
Пример #2
0
/*------------------------------------------------------*/
void main (int argc, char **argv)
{ 
  
  char *seq, *seqname, *desc ;
  int conv[] =   { 0, 1, 2, 3, 4 } ;
  
  int length ;
  int i ;
  static FILE *fil ;

  char c, *cp ;
  extern char* malloc() ;

  /*------------------------------------------------------*/  
  switch ( argc )
    {
    default: if (argc != 2)
      usage () ;
    }
     if (!(seqname = malloc (MAXNAMELEN+1)))
     { fprintf (stderr, "Couldn't malloc %d bytes", MAXNAMELEN) ;
     exit (-1) ;
     }
     
     if (!(seq = malloc (MALLOCBLOCK+1)))
       { fprintf (stderr, "Couldn't malloc %d bytes", MALLOCBLOCK) ;
	 exit (-1) ;
       }

  if (!(fil = fopen ( argv[1], "r" ))) 
    usage ();
  
  while ( readSequence(fil, dna2textConv, &seq, &seqname, &desc, &length) ) 
    /* once through per sequence */
    { 
      i = 0 ;
      while ( seqname[i] != ' ' && seqname[i] != '\0' && i < 256 )
	++i ;
      seqname[i] = '\0' ;

      findspans ( 0, length, seq, seqname ) ;
    }

  exit (0);
}
Пример #3
0
void findspans ( int start, int end, char *seq, char *seqname )
{
  int i ;
  int sc = 0 ;
  int lsc = 0 ; 
  int imn = -1 ;  /* else sequences which start with pattern reported badly */
  int imx = 0 ;
  int mx = 0 ;
  int winlen = 0;
  float expect, obsToExp;

  int ncpg, ngpc, ngc, ng, nc;  

  i = start ;
  while ( i < end )  
    {
      lsc = sc ;
      sc += ( end-1-i && seq[i]=='C' && seq[i+1]=='G' ) ? CPGSCORE : -1 ;
      sc = sc < 0 ? 0 : sc ;
/*      printf("%d \t %d \t%d \t %d \t%d \t%d\n", i, sc, lsc, imn, imx, mx) ; */
      if ( sc == 0 && lsc )  /* should threshold here */
	{
	  /* imn+1 is the start of the match. 
	     imx+1 is the end of the match, given pattern-length=2.
	     fetchdb using reported co-ordinates will return a sequence
	     which both starts and ends with a complete pattern.
	     Further +1 adjusts so that start of sequence = 1 
	  */

	  getstats ( imn+1, imx+2, seq, seqname, &ncpg, &ngpc, &ng, &nc ) ;
	  ngc = ng + nc;
      if (((imx+2)-(imn+2))>199 && (ngc*100.0/(imx+1-imn))>50.0 ) {
	/* old gos estimate	  printf("%s\t %d\t %d\t %d\t CpG: %d\t %.1f\t %.1f\n", seqname, imn+2, imx+2, mx, ncpg, ngc*100.0/(imx+1-imn), 1.0*ncpg/ngpc) ; */
	winlen=imx+1-imn;
	/* ASH 3/23/04: expected val from Gardiner-Garden & Frommer '87: */
	expect = (float)(nc * ng) / (float)winlen;
	obsToExp = (float)ncpg / expect;
	if ( obsToExp > 0.60 )
           printf("%s\t %d\t %d\t %d\t CpG: %d\t %.1f\t %.2f\t %.2f\n",
		  seqname, imn+2, imx+2, mx, ncpg, ngc*100.0/(imx+1-imn),
		  1.0*ncpg/ngpc, obsToExp) ; 
      }
/* 	  printf("%s \t %d\t %d\t %d \n", seqname, imn+2, imx+2, mx ) ; 
  */
	  /* Recursive call searches from one position after the end of the 
	     last match to the current position */
	  findspans( imx+2, i, seq, seqname ) ;
	  sc = lsc = imn = imx =  mx = 0 ;
	}
      imx = sc > mx ? i : imx ;
      mx = sc > mx ? sc : mx ;
      imn = sc == 0 ? i : imn ;
      ++i ;
    }
  if ( sc != 0 )  /* should threshold here */
    {
/*      printf("%d \t %d \t%d \t %d \t%d \t%d\n", i, sc, lsc, imn, imx, mx) ;  */
      
      /* ASH 3/23/04: Make this test & output consistent w/above. */
      getstats ( imn+1, imx+2, seq, seqname, &ncpg, &ngpc, &ng, &nc ) ;
      ngc = nc + ng;
      if (((imx+2)-(imn+2))>199 && (ngc*100.0/(imx+1-imn))>50.0 ) {
	winlen=imx+1-imn;
	expect = (float)(nc * ng) / (float)winlen;
	obsToExp = (float)ncpg / expect;
	if ( obsToExp > 0.60 )
           printf("%s\t %d\t %d\t %d\t CpG: %d\t %.1f\t %.2f\t %.2f\n",
		  seqname, imn+2, imx+2, mx, ncpg, ngc*100.0/(imx+1-imn),
		  1.0*ncpg/ngpc, obsToExp) ; 
      }
      
/*      printf("%s \t %d\t %d\t %d \n", seqname, imn+2, imx+2, mx ) ; 
   */
      findspans( imx+2, end, seq, seqname ) ;
    }
}