Beispiel #1
0
void SortHSPs(packHSP* p)
{
  int frame;
  char mess[MAXSTRING];

  for (frame=0; frame < FRAMES; frame++)
	{
	  sprintf(mess,"Quicksorting FWD HSPs in frame %d",frame);
	  printMess(mess);
	  
	  quickSort(p->sPairs[frame],
				0,p->nSegments[frame]-1);

	  sprintf(mess,"\t%ld HSPs successfully quicksorted",p->nSegments[frame]);
	  printMess(mess);
	}

  for (frame=FRAMES; frame < 2*FRAMES; frame++)
	{
	  sprintf(mess,"R-Quicksorting RVS HSPs in frame %d",frame);
	  printMess(mess);
	  
	  RquickSort(p->sPairs[frame],
				 0,p->nSegments[frame]-1);

	  sprintf(mess,"\t%ld HSPs successfully r-quicksorted",p->nSegments[frame]);
	  printMess(mess);
	}
}
Beispiel #2
0
/* Sort exons by donor site (for every GM rule): genamic requirement */
void BuildSort(dict *D,
               int nc[],
               int ne[],
               int UC[][MAXENTRY],
               int DE[][MAXENTRY],
               int nclass,
               long km[],
               exonGFF* **d,
               exonGFF *E,
               long nexons)
{
  long i,k;
  int j;
  int type;
  int class;
  char aux[MAXTYPE];
  char mess[MAXSTRING];
  /* Every exon will be classified into some sorting function (d) */
  /* Input exons are sorted by acceptor (left position) */
  for(i=0; i < nexons; i++)
    {
      aux[0]='\0';
      strcpy (aux, (E+i)->Type);
      strcat (aux, &((E+i)->Strand));
      
      /* What's the type of exon? "Type+Strand" */
      type = getkeyDict(D,aux);
      
      /* Checking and getting exon type (dictionary) */
      if (type != NOTFOUND)
	{
	  /* Exon may belong to some upstream compatible classes (UC) */
	  for(j=0; j < nc[type]; j++)
	    {
	      class = UC[type][j];
	      k = km[class]-1;
			  
	      /* Screening the exons sorted before: sorting by insertion */
	      while (k>=0 && (((E+i)->Donor->Position + (E+i)->offset2) 
			      < 
			      (d[class][k]->Donor->Position 
			       + d[class][k]->offset2)))  
		{
		  /* Shifting down previous exons */
		  d[class][k+1] = d[class][k];
		  k--;
		}
	      /* Insert new exon before the previously shifted exons */
	      d[class][k+1] = (E+i);
	      km[class]++;
	    }
	}else{ /* end if type found */
	sprintf(mess,"type %s(%d) not found",aux,type);
	printMess(mess);
      }
    } /* end forall exons */
Beispiel #3
0
/* Output: header for results displayed immediately */
void OutputHeader(char* locus, long l)
{
  char* s;
  char mess[MAXSTRING];

  /* 0. Size checkpoint and information */
  if (!l)
     {
	   sprintf(mess,"%s: sequence is empty",locus);     
	   printError(mess);
     }
  else
	{
	  sprintf(mess,"%s: %ld nucleotides\n",locus,l);     
	  printMess(mess);
	}
  
  /* 1. Extract the starting time to display */
  s = ctime(&m->tStart);
  
  /* 2. Output headers: gff, geneid or xml format */
  
  if (GFF3){
    printf("##gff-version 3\n");
  } else {
  	if (GFF)
    printf("## gff-version 2\n");
  }
  
  if (XML)
    {
      /* XML format header */
      printf("<?xml version=\"1.0\" ?>\n");  
      printf("<!DOCTYPE prediction SYSTEM \"geneid.dtd\">\n");
      s[strlen(s)-1] = '\0';  
      printf("<prediction locus=\"%s\" length=\"%ld\" source=\"%s\" date=\"%s\"",
			 locus,l,VERSION,s);
    }   
  else
    {
      /* gff and geneid formats */
      s[strlen(s)-1] = '\n';
      if (GFF3){  
	  	printf("# date %s",s);
      	printf("# source-version: %s -- [email protected]\n",VERSION);
      	printf("##sequence-region %s 1 %ld\n",locus,l);
	  } else {
	  	printf("## date %s",s);
      	printf("## source-version: %s -- [email protected]\n",VERSION);
      	printf("# Sequence %s - Length = %ld bps\n",locus,l);
	  }
    }
}
Beispiel #4
0
/* Printing messages (parameters read for PWA - signal prediction) */
void PrintProfile (profile* p, char* signal)
{
  char mess[MAXSTRING];

  sprintf(mess, 
		  "Reading... %s:\t%d\t%d\t%d\t(%ld)\t%5.2f", 
		  signal,
		  p->dimension, 
		  p->offset, 
		  p->order,
		  p->dimensionTrans,
		  p->cutoff);
  
  printMess(mess);
}
Beispiel #5
0
/* Management function to score and filter exons */
void ProcessHSPs(long l1,
                 long l2,
                 int Strand,
                 packExternalInformation* external,
                 packHSP* hsp
                )
{

    /* Fill in the temporary HSP arrays (pre-processing) */
    /* GENIS hack */
    if (SRP)
    {
        if (UTR) {
            printMess("Preprocessing read information: step 1");
            ReadScan(external,hsp,Strand,l1,l2);
        } else {
            printMess("Preprocessing homology information: step 1");
            HSPScan(external,hsp,Strand,l1,l2);
        }

        printMess("Preprocessing homology information: step 2");
        HSPScan2(external,hsp,Strand,l1,l2);
    }
}
Beispiel #6
0
/* Print best genes using selected format */
void OutputGene(packGenes* pg,
                long nExons,
                char* Locus,
                char* Sequence,
                gparam* gp,
                dict* dAA,
		char* GenePrefix)
{
  /* Retrieving the best predicted genes recursively */
  if (nExons>0)
    {
      printMess("Recovering gene-solution...");
      CookingGenes(pg->GOptim, Locus, Sequence, gp, dAA, GenePrefix);
      if (XML)
		printf("</prediction>\n");
    }
  else
	if (XML)
	  printf(" genes=\"0\" score =\"0.00\">\n</prediction>\n");	
}
Beispiel #7
0
int
Write (
  sEvent *sp,
  FILE *outFile
)
{
  switch (sp->EventType) {
  case mh_eEvent_Alarm:
  case mh_eEvent_MaintenanceAlarm:
  case mh_eEvent_SystemAlarm:
  case mh_eEvent_UserAlarm1:
  case mh_eEvent_UserAlarm2:
  case mh_eEvent_UserAlarm3:
  case mh_eEvent_UserAlarm4:
  case mh_eEvent_Info:
    printMess(sp, outFile);
    break;
  case mh_eEvent_Ack:
    printAck(sp, outFile);
    break;
  case mh_eEvent_Cancel:
  case mh_eEvent_Return:
    printRet(sp, outFile);
    break;
  case mh_eEvent_Block:
  case mh_eEvent_Unblock:
  case mh_eEvent_Reblock:
  case mh_eEvent_CancelBlock:
    printBlock(sp, outFile);
    break;
  default:
    printf("rt_elog_dump: Error in Write unknown EventType");
    break;
  }  
  return 1;
}
Beispiel #8
0
/* Display some predictions results according to the options selected */
void Output(packSites* allSites,
            packSites* allSites_r,
            packExons* allExons,
            packExons* allExons_r,
            exonGFF* exons,
            long nExons,
            char* Locus,
            long l1,
            long l2,
            long lowerlimit,
            char* Sequence,
            gparam* gp,
            dict* dAA, 
	    char* GenePrefix)
{
  /* 1. Printing Forward */
  if (FWD)
    {
      printMess("Printing forward selected elements");
	  
      /* sites */
      if (SFP) 
		PrintSites(allSites->StartCodons, allSites->nStartCodons,
				   STA, Locus, FORWARD, l1, l2, lowerlimit, Sequence, gp->StartProfile);
      if (SAP){
		PrintSites(allSites->AcceptorSites, allSites->nAcceptorSites,
				   ACC, Locus, FORWARD, l1, l2, lowerlimit, Sequence, gp->AcceptorProfile);
	  }
      if (SDP){
		PrintSites(allSites->DonorSites, allSites->nDonorSites,
				   DON, Locus, FORWARD, l1, l2, lowerlimit, Sequence, gp->DonorProfile);	   
	  }
      if (STP){
		PrintSites(allSites->StopCodons, allSites->nStopCodons,
				   STO, Locus, FORWARD, l1, l2, lowerlimit, Sequence, gp->StopProfile);
      }
      if (UTR && SFP){
		PrintSites(allSites->TS, allSites->nTS,
				   TSS, Locus, FORWARD, l1, l2, lowerlimit, Sequence, gp->DonorProfile);
      }
      if (UTR && STP){
		PrintSites(allSites->TE, allSites->nTE,
				   TES, Locus, FORWARD, l1, l2, lowerlimit, Sequence, gp->AcceptorProfile);
      }
      /* exons */
      if (EFP){
		PrintExons(allExons->InitialExons,allExons->nInitialExons,
				   FIRST, Locus, l1, l2, Sequence, dAA, GenePrefix);
      }
	  if (EIP){
		PrintExons(allExons->InternalExons,allExons->nInternalExons,
				   INTERNAL, Locus, l1, l2, Sequence, dAA, GenePrefix);
		PrintExons(allExons->ZeroLengthExons,allExons->nZeroLengthExons,
				   ZEROLENGTH, Locus, l1, l2, Sequence, dAA, GenePrefix);
      }
	  if (ETP){
		PrintExons(allExons->TerminalExons,allExons->nTerminalExons,
				   TERMINAL, Locus, l1, l2, Sequence, dAA, GenePrefix);
      }
	  if (ESP)
		PrintExons(allExons->Singles,allExons->nSingles,
				   SINGLE, Locus, l1, l2, Sequence, dAA, GenePrefix);
      if (EOP)
		PrintExons(allExons->ORFs,allExons->nORFs,
				   ORF, Locus, l1, l2, Sequence, dAA, GenePrefix);
    }
  
  /* 2. Printing Reverse */
  if (RVS)
    {
      printMess("Printing reverse selected elements\n");
	  
      /* sites */ 
      if (SFP)
		PrintSites(allSites_r->StartCodons,allSites_r->nStartCodons,STA,
				   Locus,REVERSE, l1, l2, lowerlimit, Sequence, gp->StartProfile);
      if (SAP){
		PrintSites(allSites_r->AcceptorSites, allSites_r->nAcceptorSites,
				   ACC, Locus, REVERSE, l1, l2, lowerlimit, Sequence, gp->AcceptorProfile);
	  }
      if (SDP){
		PrintSites(allSites_r->DonorSites, allSites_r->nDonorSites,
				   DON, Locus, REVERSE, l1, l2, lowerlimit, Sequence, gp->DonorProfile);
	  }
      if (STP){
		PrintSites(allSites_r->StopCodons,allSites_r->nStopCodons,STO,
				   Locus,REVERSE, l1, l2, lowerlimit, Sequence, gp->StopProfile);
      }
      if (UTR && SFP){
		PrintSites(allSites_r->TS, allSites_r->nTS,
				   TSS, Locus, REVERSE, l1, l2, lowerlimit, Sequence, gp->StartProfile);
      }
      if (UTR && STP){
		PrintSites(allSites_r->TE, allSites_r->nTE,
				   TES, Locus, REVERSE, l1, l2, lowerlimit, Sequence, gp->StopProfile);
      }
      /* exons */
      if (EFP){
		PrintExons(allExons_r->InitialExons,allExons_r->nInitialExons,
				   FIRST, Locus, l1, l2, Sequence, dAA, GenePrefix);
      }
	  if (EIP){
		PrintExons(allExons_r->InternalExons,allExons_r->nInternalExons,
				   INTERNAL, Locus, l1, l2, Sequence, dAA, GenePrefix);
		PrintExons(allExons_r->ZeroLengthExons,allExons_r->nZeroLengthExons,
				   ZEROLENGTH, Locus, l1, l2, Sequence, dAA, GenePrefix);
      }
	  if (ETP){
		PrintExons(allExons_r->TerminalExons,allExons_r->nTerminalExons,
				   TERMINAL, Locus, l1, l2, Sequence, dAA, GenePrefix);
      }
      if (ESP)
		PrintExons(allExons_r->Singles,allExons_r->nSingles,
				   SINGLE, Locus, l1, l2, Sequence, dAA, GenePrefix);
      if (EOP)
		PrintExons(allExons_r->ORFs,allExons_r->nORFs,
				   ORF, Locus, l1, l2, Sequence, dAA, GenePrefix);
    }
  
  /* 3. Print all exons */
  if (EXP) 
    {
      printMess("Printing all predicted Exons of current split\n");   
      PrintExons(exons, nExons, FIRST + INTERNAL + TERMINAL + SINGLE + ORF, 
				 Locus, l1, l2, Sequence, dAA, GenePrefix);
    }
}
Beispiel #9
0
/* Management of splice sites prediction and exon construction/scoring */
void  manager(char *Sequence, 
	      long LengthSequence,
	      packSites* allSites,
	      packExons* allExons,
	      packLib** lib,
	      long l1, long l2, long lowerlimit, long upperlimit,
	      int Strand,
	      packExternalInformation* external,
	      packHSP* hsp,
	      gparam* gp,
	      gparam** isochores,
	      int nIsochores,
	      packGC* GCInfo,
	      site* acceptorsites,
	      site* donorsites,
	      site* tssites,
	      site* tesites
	      )
{
  char mess[MAXSTRING];
  //struct timespec requestStart;
  //struct timespec requestEnd;

  /* For sorting sites */
/*   site* acceptorsites;  */
/*   site* donorsites;  */
  long l1a, l1b,
	l2a, l2b,
	l1c, l2c;

  long cutPoint;
 
  /* 0. Define boundaries of splice site prediction
	 according to current split positions and strand selected */
  if (Strand == FORWARD)
    {
      /* Forward sense */
      /* Start codons and Acceptor sites limits */
      l1a = l1;
      l2a = (l2 == upperlimit)? l2 : l2 - OVERLAP;

      /* Donor sites limits */
      l1b = l1;
      l2b = l2;

      /* Stop codon limits */
      l1c = l1;
      l2c = l2;

      /* Terminal/Single exons: */
      /* are allowed if their Stop codon is placed behind cutPoint */
      /* FWD: every stop codon might be used without problems */
      cutPoint = l1;
    }
  else
    {
      /* Reverse sense */
      /* Start codons and Acceptor sites limits */
      l1a = l1;
      l2a = l2;

      /* Donor sites limits */
      l1b = (l1 == lowerlimit)? l1: l1 + OVERLAP;
      l2b = l2;

      /* Stop codon limits */
      l1c = l1;
      l2c = l2;

      /* Terminal/Single exons: */
      /* are allowed if their Stop codon is placed behind cutPoint (RVS) */
      /* RVS: reading from right to left the forward sense sequence */
      cutPoint = (l1 == lowerlimit)? l1 : l1 + OVERLAP;
    }

/*   sprintf(mess,"Strand:%i\nl1a:%ld\nl1b:%ld\nl2a:%ld\nl2b:%ld\nl1c:%ld\nl2c:%ld\ncutPoint:%ld\n",Strand,l1a, l1b,l2a, l2b,l1c, l2c,cutPoint); */
/*   printMess(mess); */

  /* 0. Preprocss HSPs */
  if (SRP){
    ProcessHSPs(l1, l2, Strand, 
		external, hsp);
  }

  /* 0.1 Preprocess Library */
  if(LIB)
  {
	  //clock_gettime(CLOCK_MONOTONIC, &requestStart);
	  ProcessLibrary2(external, lib, Strand, l1, l2);
	  //clock_gettime(CLOCK_MONOTONIC, &requestEnd);
	  //accum = ( requestEnd.tv_sec - requestStart.tv_sec ) + ( requestEnd.tv_nsec - requestStart.tv_nsec )/ BILLION;
	  //sprintf(mess,"Library preprocessing time: %lf\n", accum );
	  //printMess(mess);
	  //cleanLibraryMemory(external);
  }

  /* 1. Predicting splice sites of current split of DNA sequence */ 
  printMess ("Computing sites ...");

  allSites->nStartCodons =
    GetSitesWithProfile(Sequence,gp->StartProfile,allSites->StartCodons,l1a,l2a);
  sprintf(mess, "Start Codons \t\t%8ld", allSites->nStartCodons);
  printRes(mess);
  
  long numAccsites = 0;
  
  allSites->nAcceptorSites =
    BuildAcceptors(Sequence,
		   U2,
		   sU2type,
		   sU2,
		   gp->AcceptorProfile,
		   gp->PolyPTractProfile,
		   gp->BranchPointProfile,
		   allSites->AcceptorSites,
		   l1a,l2a,numAccsites,NUMSITES,Strand,external);
  
  sprintf(mess, "Acceptor Sites \t\t%8ld", allSites->nAcceptorSites - numAccsites);
  numAccsites = allSites->nAcceptorSites;
  printRes(mess);

  if (U12GTAG){ 
	  allSites->nAcceptorSites =
	    BuildU12Acceptors(Sequence,U12gtag,sU12type,
					   sU12gtag,
					   gp->U12gtagAcceptorProfile,
					   gp->U12BranchPointProfile,
					   gp->PolyPTractProfile,
					   allSites->AcceptorSites,
					   l1a,l2a,numAccsites,NUMSITES,Strand,external);

	  sprintf(mess, "U12gtag Acceptor Sites \t%8ld", allSites->nAcceptorSites - numAccsites);
	  numAccsites = allSites->nAcceptorSites;
	  printRes(mess);
  }
  if (U12ATAC){ 
	  allSites->nAcceptorSites =
	    BuildU12Acceptors(Sequence,U12atac,sU12type,
				 	   sU12atac,
					   gp->U12atacAcceptorProfile,
					   gp->U12BranchPointProfile,
					   gp->PolyPTractProfile,
					   allSites->AcceptorSites,
					   l1a,l2a,numAccsites,NUMSITES,Strand,external);

	  sprintf(mess, "U12atac Acceptor Sites \t%8ld", allSites->nAcceptorSites - numAccsites);
	  numAccsites = allSites->nAcceptorSites;
	  printRes(mess);
  }  

  long numDonsites = 0;

  allSites->nDonorSites =
    /* BuildDonors(Sequence,U2,sU2type,sU2, gp->DonorProfile,allSites->DonorSites,l1b,l2b,numDonsites,NUMSITES); */
    BuildDonors(Sequence,U2,sU2type,sU2, gp->DonorProfile,allSites->DonorSites,l1b,l2b,numDonsites,NUMSITES,Strand,external);
  sprintf (mess,"Donor Sites \t\t%8ld", allSites->nDonorSites);
  numDonsites = allSites->nDonorSites;
  printRes(mess);

  if (U12GTAG){
	  allSites->nDonorSites =
	    BuildDonors(Sequence,U12gtag,sU12type,sU12gtag, gp->U12gtagDonorProfile,allSites->DonorSites,l1b,l2b,numDonsites,NUMSITES,Strand,external);
	  sprintf (mess,"U12gtag Donor Sites \t%8ld", allSites->nDonorSites - numDonsites);
	  numDonsites = allSites->nDonorSites;
	  printRes(mess);
  }
  if (U12ATAC){
	  allSites->nDonorSites =
	    BuildDonors(Sequence, U12atac,sU12type,sU12atac, gp->U12atacDonorProfile,allSites->DonorSites,l1b,l2b,numDonsites,NUMSITES,Strand,external);
	  sprintf (mess,"U12atac Donor Sites \t%8ld", allSites->nDonorSites - numDonsites);
	  numDonsites = allSites->nDonorSites;
	  printRes(mess);
  }
  if (U2GCAG){
	  allSites->nDonorSites =
	    BuildDonors(Sequence,U2, sU2type,sU2gcag, gp->U2gcagDonorProfile,allSites->DonorSites,l1b,l2b,numDonsites,NUMSITES,Strand,external);
	  sprintf (mess,"U2gcag Donor Sites \t%8ld", allSites->nDonorSites - numDonsites);
	  numDonsites = allSites->nDonorSites;
	  printRes(mess);
  }  
  if (U2GTA){
	  allSites->nDonorSites =
    	BuildDonors(Sequence,U2,sU2type,sU2gta, gp->U2gtaDonorProfile,allSites->DonorSites,l1b,l2b,numDonsites,NUMSITES,Strand,external);
	  sprintf (mess,"U2gta Donor Sites \t%8ld", allSites->nDonorSites - numDonsites);
	  numDonsites = allSites->nDonorSites;
	  printRes(mess);
  }  
  if (U2GTG){
	  allSites->nDonorSites =
    	BuildDonors(Sequence,U2,sU2type,sU2gtg, gp->U2gtgDonorProfile,allSites->DonorSites,l1b,l2b,numDonsites,NUMSITES,Strand,external);
	  sprintf (mess,"U2gtg Donor Sites \t%8ld", allSites->nDonorSites - numDonsites);
	  numDonsites = allSites->nDonorSites;
	  printRes(mess);
  }  
  if (U2GTY){
	  allSites->nDonorSites =
    	BuildDonors(Sequence,U2,sU2type,sU2gty, gp->U2gtyDonorProfile,allSites->DonorSites,l1b,l2b,numDonsites,NUMSITES,Strand,external);
	  sprintf (mess,"U2gty Donor Sites \t%8ld", allSites->nDonorSites - numDonsites);
	  numDonsites = allSites->nDonorSites;
	  printRes(mess);
  }  

  allSites->nStopCodons =
	GetStopCodons(Sequence,gp->StopProfile, allSites->StopCodons,l1c,l2c);
  sprintf (mess,"Stop Codons \t\t%8ld", allSites->nStopCodons);
  printRes(mess);
  
  if ( U12GTAG || U12ATAC || U2GCAG || U2GTA || U2GTG || U2GTY ){
    /* Predicted sites must be sorted by position */
    printMess ("Sorting Donor and Acceptor sites ...");
    SortSites(allSites->DonorSites,allSites->nDonorSites,donorsites,l1b,l2b);
    SortSites(allSites->AcceptorSites,allSites->nAcceptorSites,acceptorsites,l1a,l2a);
  }
  allSites->nTS=0;
  allSites->nTE=0;
  if (UTR){
    allSites->nTS =
      GetTSS(allSites->TS,allSites->AcceptorSites, allSites->nAcceptorSites, external,hsp,Strand,LengthSequence,l1,l2);
    sprintf(mess, "TS \t\t\t%8ld", allSites->nTS);
    printRes(mess);
    long numTE = 0;
    if(PAS){allSites->nTE =
	GetSitesWithProfile(Sequence,gp->PolyASignalProfile,allSites->TE,l1,l2);
      sprintf(mess, "PolyA Signals \t\t%8ld", allSites->nTE);
      numTE = allSites->nTE;
      printRes(mess);
    }
    allSites->nTE =
      GetTES(allSites->TE,allSites->DonorSites, allSites->nDonorSites,external,hsp,Strand,LengthSequence,l1,l2,numTE);
    sprintf(mess, "TE \t\t\t%8ld", allSites->nTE);
    printRes(mess);
  }

  /* Total number of predicted splice sites in this strand */
  allSites->nSites =
	allSites->nStartCodons +
	allSites->nAcceptorSites +
	allSites->nDonorSites +	
	allSites->nStopCodons + 
        allSites->nTS +
        allSites->nTE;

  sprintf(mess,"---------\t\t%8ld", allSites->nSites);
  printRes(mess);
  

  if ( UTR ){
    /* Predicted sites must be sorted by position */
    printMess ("Sorting TSS/TES sites ...");
    SortSites(allSites->TS,allSites->nTS,tssites,l1,l2);
    SortSites(allSites->TE,allSites->nTE,tesites,l1,l2);
  }
  if (GENAMIC || (!GENAMIC && (EFP || EIP || ETP || ESP || EOP || EXP))){
    /* 2. Building exons with splice sites predicted before */ 
    printMess ("Computing exons ...");   
  

    allExons->nInitialExons =
      BuildInitialExons(allSites->StartCodons,allSites->nStartCodons,
			allSites->DonorSites,allSites->nDonorSites,
			allSites->StopCodons,allSites->nStopCodons,
			gp->MaxDonors,sFIRST,Sequence,
			allExons->InitialExons,NUMEXONS);
    sprintf(mess,"Initial Exons \t\t%8ld", allExons->nInitialExons);
    printRes(mess); 

    allExons->nInternalExons =
      BuildInternalExons(allSites->AcceptorSites,allSites->nAcceptorSites,
			 allSites->DonorSites,allSites->nDonorSites,
			 allSites->StopCodons,allSites->nStopCodons,
			 gp->MaxDonors,sINTERNAL,Sequence,
			 allExons->InternalExons,NUMEXONS);
    sprintf(mess,"Internal Exons \t\t%8ld", allExons->nInternalExons);
    printRes(mess); 

    if (RSS){
      allExons->nZeroLengthExons =
	BuildZeroLengthExons(allSites->AcceptorSites,allSites->nAcceptorSites,
			     allSites->DonorSites,allSites->nDonorSites,
			     allSites->StopCodons,allSites->nStopCodons,
			     gp->MaxDonors,sZEROLENGTH,Sequence,
			     allExons->ZeroLengthExons,NUMEXONS);
      sprintf(mess,"Zero-Length Exons \t%8ld", allExons->nZeroLengthExons);
      printRes(mess); 
    }
    allExons->nTerminalExons =
      BuildTerminalExons(allSites->AcceptorSites,allSites->nAcceptorSites,
			 allSites->StopCodons,allSites->nStopCodons,
			 LengthSequence,cutPoint,sTERMINAL,Sequence,
			 allExons->TerminalExons,NUMEXONS);
    sprintf(mess,"Terminal Exons \t\t%8ld", allExons->nTerminalExons);
    printRes(mess); 
  
    allExons->nSingles =
      BuildSingles(allSites->StartCodons,allSites->nStartCodons,
		   allSites->StopCodons,allSites->nStopCodons,
		   cutPoint, Sequence,
		   allExons->Singles);
    sprintf(mess,"Single genes \t\t%8ld", allExons->nSingles);
    printRes(mess); 
    
    if (UTR){
      allExons->nUtrInitialExons =
	BuildUTRExons(allSites->TS,allSites->nTS,
		      allSites->DonorSites,allSites->nDonorSites,
		      MAXUTRDONORS,MAXUTREXONLENGTH,cutPoint,sUTRFIRST,
		      allExons->UtrInitialExons,NUMEXONS);
      sprintf(mess,"UTR Initial Exons \t%8ld", allExons->nUtrInitialExons);
      printRes(mess);

      allExons->nUtrInitialHalfExons =
	BuildUTRExons(allSites->TS,allSites->nTS,
		      allSites->StartCodons,allSites->nStartCodons,
		      MAXUTRDONORS,MAXUTREXONLENGTH,cutPoint,sUTRFIRSTHALF,
		      allExons->UtrInitialHalfExons,NUMEXONS);
      sprintf(mess,"UTR Initial Half Exons \t%8ld", allExons->nUtrInitialHalfExons);
      printRes(mess);

      allExons->nUtrInternalExons =
	BuildUTRExons(allSites->AcceptorSites,allSites->nAcceptorSites,
		      allSites->DonorSites,allSites->nDonorSites,
		      MAXUTRDONORS,MAXUTREXONLENGTH,cutPoint,sUTRINTERNAL,
		      allExons->UtrInternalExons,NUMEXONS);
      sprintf(mess,"UTR Internal Exons \t%8ld", allExons->nUtrInternalExons);
      printRes(mess); 

      allExons->nUtr5InternalHalfExons =
	BuildUTRExons(allSites->AcceptorSites,allSites->nAcceptorSites,
		      allSites->StartCodons,allSites->nStartCodons,
		      MAXUTRDONORS,MAXUTREXONLENGTH,cutPoint,sUTR5INTERNALHALF,
		      allExons->Utr5InternalHalfExons,NUMEXONS);
      sprintf(mess,"UTR 5' Int. Half Exons \t%8ld", allExons->nUtr5InternalHalfExons);
      printRes(mess);  

      allExons->nUtr3InternalHalfExons =
	BuildUTRExons(allSites->StopCodons,allSites->nStopCodons,
		      allSites->DonorSites,allSites->nDonorSites,
		      MAXUTRDONORS,MAXNMDLENGTH,cutPoint,sUTR3INTERNALHALF,
		      allExons->Utr3InternalHalfExons,NUMEXONS);
      sprintf(mess,"UTR 3' Int. Half Exons \t%8ld", allExons->nUtr3InternalHalfExons);
      printRes(mess);  

      allExons->nUtrTerminalHalfExons =
	BuildUTRExons(allSites->StopCodons,allSites->nStopCodons,
		      allSites->TE,allSites->nTE,
		      MAXUTRDONORS,MAX3UTREXONLENGTH,cutPoint,sUTRTERMINALHALF,
		      allExons->UtrTerminalHalfExons,NUMEXONS);
      sprintf(mess,"UTR Term. Half Exons \t%8ld", allExons->nUtrTerminalHalfExons);
      printRes(mess);   

      allExons->nUtrTerminalExons =
	BuildUTRExons(allSites->AcceptorSites,allSites->nAcceptorSites,
		      allSites->TE,allSites->nTE,
		      MAXUTRDONORS,MAX3UTREXONLENGTH,cutPoint,sUTRTERMINAL,
		      allExons->UtrTerminalExons,NUMEXONS);
      sprintf(mess,"UTR Terminal Exons \t%8ld", allExons->nUtrTerminalExons);
      printRes(mess); 
 
    }

    if (scanORF)
      {
	allExons->nORFs =
	  BuildORFs(allSites->StopCodons,allSites->nStopCodons,
		    allSites->StopCodons,allSites->nStopCodons,
		    cutPoint, Sequence,
		    allExons->ORFs);
	sprintf(mess,"ORFs \t\t\t%8ld", allExons->nORFs);
	printRes(mess); 
      }
    else
      allExons->nORFs = 0;

    /* 3. Scoring and Filtering Exons */
    ScoreExons(Sequence, allExons, 
	       l1, l2, Strand, 
	       external, hsp,
	       isochores,nIsochores,
	       GCInfo);
  
    /* Total number of built exons in this strand */
    allExons->nExons =
      allExons->nInitialExons +
      allExons->nInternalExons +
      allExons->nZeroLengthExons +
      allExons->nTerminalExons +
      allExons->nSingles +
      allExons->nORFs +
      allExons->nUtrInitialExons +
      allExons->nUtrInitialHalfExons +
      allExons->nUtrInternalExons +
      allExons->nUtr5InternalHalfExons +
      allExons->nUtr3InternalHalfExons +
      allExons->nUtrTerminalExons +
      allExons->nUtrTerminalHalfExons;

    sprintf(mess,"---------\t\t%8ld", allExons->nExons);
    printRes(mess); 
  }
}
Beispiel #10
0
/* Read the input of statistics data model */
int readparam (char* name, gparam** isochores)
{
  FILE* RootFile;
  char* Geneid;
  char ExternalFileName[FILENAMELENGTH];

  int i;
  char line[MAXLINE];
  char mess[MAXSTRING];
  char header[MAXSTRING];
  int nIsochores;

  /* 0. Select parameters filename for reading it */
  /* Filename must be: option P, env.var GENEID or default (none) */
  Geneid=getenv("GENEID");

  /* (a) Using -P option */
  if (strcmp(PARAMETERFILE,name))
	{
	  sprintf(mess,"Loading parameter file by using -P option: %s",name);
	  if ((RootFile = fopen(name,"rb"))==NULL)
		printError("Parameter file (-P) can not be open to read");
	}
  /* (b) Using GENEID environment var */
  else
    if (Geneid)
      {
        sprintf(mess,"Loading parameter file by using GENEID (env. var): %s",Geneid);
		sprintf(ExternalFileName,"%s", Geneid);
		if ((RootFile = fopen(ExternalFileName,"rb"))==NULL) 
          printError("Parameter file (GENEID env.var) can not be open to read");
      }
  /* (c) Using default parameter file */
    else
      {
        sprintf(mess,"Loading parameter file default");
		if ((RootFile = fopen(name,"rb"))==NULL)
          printError("Parameter file (default) can not be open to read");
      }

  /* rootfile will be the parameter file handle descriptor */
  printMess(mess);

  /* 1. Read NO_SCORE penalty for nucleotides not supported by homology */
  readHeader(RootFile,line);
  readLine(RootFile,line);
  if ((sscanf(line,"%f\n",
			  &(NO_SCORE)))!=1)
	printError("Wrong format: NO_SCORE value scores (number/type)");  
  
  sprintf(mess,"NO_SCORE: \t%9.2f",
		  NO_SCORE);
  printMess(mess);
  /* BKGD_SUBTRACT_FLANK_LENGTH */


  /* 2. Read the number of isochores */
  readHeader(RootFile,line);
  if ((sscanf(line,"%s",header))!=1)
		{
		  sprintf(mess,"Wrong format: header for number of isochores");
		  printError(mess);
		}
  while(strcasecmp(header,sNUMISO))
  { 
    if(!strcasecmp(header,sBKGD_SUBTRACT_FLANK_LENGTH)){
		  readLine(RootFile,line);
		  if ((sscanf(line,"%d\n", &(BKGD_SUBTRACT_FLANK_LENGTH)))!=1)
			printError("Wrong format: BKGD_SUBTRACT_FLANK_LENGTH value (integer)");  

		  sprintf(mess,"BKGD_SUBTRACT_FLANK_LENGTH: \t%d",
				  BKGD_SUBTRACT_FLANK_LENGTH);
		  printMess(mess);
	}
	readHeader(RootFile,line);
	if ((sscanf(line,"%s",header))!=1)
		{
		  sprintf(mess,"Wrong format: header for number of isochores");
		  printError(mess);
		}
  }
  readLine(RootFile,line);
  if ((sscanf(line,"%d\n", &(nIsochores)))!=1)
	printError("Wrong format: Number of isochores");
    
  sprintf(mess,"Number of isochores: %d", nIsochores);
  printMess(mess);

  if (nIsochores > MAXISOCHORES || !(nIsochores>0))
    printError("Wrong value: number of isochores(MAXISOCHORES)");

  /* 3. Reading every one of the isochores */
  for(i=0; i<nIsochores; i++)
    {
      sprintf(mess,"Reading isochore %d", i+1);
      printMess(mess);
      ReadIsochore(RootFile,isochores[i]);
    }

  /* 4. Reading the GeneModel */
  readHeader(RootFile,line);
  
  /* Ready to update dictionary of exon types */
  resetDict(isochores[0]->D);
  printMess("Dictionary ready to acquire information");
   
  if (SGE)
	{
	  printMess("Using an internal Gene Model");
	  isochores[0]->nclass = ForceGeneModel(isochores[0]->D,
											isochores[0]->nc,
											isochores[0]->ne,
											isochores[0]->UC,
											isochores[0]->DE,
											isochores[0]->md,
											isochores[0]->Md,
											isochores[0]->block);
	  
	  sprintf(mess,"%d Gene Model rules have been read and saved\n",
			  isochores[0]->nclass);
	  printMess(mess);   
	}
  else
	{
	  printMess("Reading Gene Model rules");
	  isochores[0]->nclass = ReadGeneModel(RootFile,
										   isochores[0]->D,
										   isochores[0]->nc,
										   isochores[0]->ne,
										   isochores[0]->UC,
										   isochores[0]->DE,
										   isochores[0]->md,
										   isochores[0]->Md,
										   isochores[0]->block);
	  
	  sprintf(mess,"%d Gene Model rules have been read and saved\n",
			  isochores[0]->nclass);
	  printMess(mess);   
	}

  /* Replication of gene model information for each isochore */
  shareGeneModel(isochores, nIsochores);

  return(nIsochores);
}
Beispiel #11
0
/* - isochores are specific DNA regions according to the G+C content - */
void ReadIsochore(FILE* RootFile, gparam* gp)
{
  float lscore;       
  int OligoLength_1;
  int i,j,f;
  char line[MAXLINE];
  char mess[MAXSTRING];
  char header[MAXSTRING];
  
  /* 1. read boundaries of isochores */
  readHeader(RootFile,line);
  readLine(RootFile,line);

  if ((sscanf(line,"%d %d\n", 
			  &(gp->leftValue),
			  &(gp->rightValue)))!=2)
    printError("Wrong format: isochore boundaries (G+C percent)");

  sprintf(mess,"Isochores boundaries(min/max percentage): %d,%d", 
		  gp->leftValue,
		  gp->rightValue);
  printMess(mess); 

  /* 2. read cutoff (final score) to accept one predicted exon */
  readHeader(RootFile,line);
  readLine(RootFile,line);

  if ((sscanf(line,"%f %f %f %f %f\n",
			  &(gp->Initial->ExonCutoff),
			  &(gp->Internal->ExonCutoff),
			  &(gp->Terminal->ExonCutoff),
	                  &(gp->Single->ExonCutoff),
			  &(gp->utr->ExonCutoff)))< 4)
    printError("Wrong format: exon score cutoffs (number/type)");  

  sprintf(mess,"Exon cutoffs: \t%9.3f\t%9.3f\t%9.3f\t%9.3f\t%9.3f",
		  gp->Initial->ExonCutoff,
		  gp->Internal->ExonCutoff,
		  gp->Terminal->ExonCutoff,
	          gp->Single->ExonCutoff,
	          gp->utr->ExonCutoff);
  printMess(mess); 

  /* 3. read cutoff (potential coding score) to accept one predicted exon */
  readHeader(RootFile,line);
  readLine(RootFile,line);
  if ((sscanf(line,"%f %f %f %f\n",     
			  &(gp->Initial->OligoCutoff),
			  &(gp->Internal->OligoCutoff),
			  &(gp->Terminal->OligoCutoff),
			  &(gp->Single->OligoCutoff)))!=4)
    printError("Wrong format: potential coding score cutoffs (number/type)");  
  
  sprintf(mess,"Oligo cutoffs: \t%9.3f\t%9.3f\t%9.3f\t%9.3f",
		  gp->Initial->OligoCutoff,
		  gp->Internal->OligoCutoff,
		  gp->Terminal->OligoCutoff,
		  gp->Single->OligoCutoff);
  printMess(mess); 

  /* 4. Weight of signals in final exon score */
  readHeader(RootFile,line);
  readLine(RootFile,line);
  if ((sscanf(line,"%f %f %f %f %f\n",
			  &(gp->Initial->siteFactor),
			  &(gp->Internal->siteFactor),
			  &(gp->Terminal->siteFactor),
			  &(gp->Single->siteFactor),
			  &(gp->utr->siteFactor)))<4)
	printError("Wrong format: weight of signal scores (number/type)");  
  
  sprintf(mess,"Site factors: \t%9.2f\t%9.2f\t%9.2f\t%9.2f\t%9.2f",
		  gp->Initial->siteFactor,
		  gp->Internal->siteFactor,
		  gp->Terminal->siteFactor,
	          gp->Single->siteFactor,
	          gp->utr->siteFactor);
  printMess(mess); 

  /* 5. Weight of coding potential in final exon score */
  readHeader(RootFile,line);
  readLine(RootFile,line);
  if ((sscanf(line,"%f %f %f %f\n",
			  &(gp->Initial->exonFactor),
			  &(gp->Internal->exonFactor),
			  &(gp->Terminal->exonFactor),
			  &(gp->Single->exonFactor)))<4)
	printError("Wrong format: weight of coding potential scores (number/type)");  
  
  sprintf(mess,"Exon factors: \t%9.2f\t%9.2f\t%9.2f\t%9.2f",
		  gp->Initial->exonFactor,
		  gp->Internal->exonFactor,
		  gp->Terminal->exonFactor,
		  gp->Single->exonFactor);
  printMess(mess); 

  /* 6. Weight of homology information in final exon score */
  readHeader(RootFile,line);
  readLine(RootFile,line);
  if ((sscanf(line,"%f %f %f %f %f\n",
			  &(gp->Initial->HSPFactor),
			  &(gp->Internal->HSPFactor),
			  &(gp->Terminal->HSPFactor),
			  &(gp->Single->HSPFactor),
			  &(gp->utr->HSPFactor)))<4)

	printError("Wrong format: weight of homology scores (number/type)");  
  
  sprintf(mess,"HSP factors: \t\t%9.2f\t%9.2f\t%9.2f\t%9.2f\t%9.2f",
		  gp->Initial->HSPFactor,
		  gp->Internal->HSPFactor,
		  gp->Terminal->HSPFactor,
		  gp->Single->HSPFactor,
		  gp->utr->HSPFactor);
  printMess(mess); 

  /* 7. read weights to correct the score of exons after general cutoff */
  readHeader(RootFile,line);
   if ((sscanf(line,"%s",header))!=1)
		{
		  sprintf(mess,"Wrong format: header for exon weights and optional U12 score threshold");
		  printError(mess);
		}
  while(strcasecmp(header,sExon_weights)&& strcmp(header,"Exon_weigths"))
  { 
	 /* 1. Read RSSMARKOVSCORE for markov score to assign non-exonic recursively spliced elements */
	if(!strcasecmp(header,sRSSMARKOVSCORE)){
		  readLine(RootFile,line);
		  if ((sscanf(line,"%f\n", &(RSSMARKOVSCORE)))!=1)
			printError("Wrong format: RSSMARKOVSCORE value scores (number/type)");  

		  sprintf(mess,"RSSMARKOVSCORE: \t%9.2f",RSSMARKOVSCORE);
		  printMess(mess);
	}
	 /* 1. Read Evidence Exon Weight */
	if(!strcasecmp(header,sEVIDENCEW)){
		  readLine(RootFile,line);
		  if ((sscanf(line,"%f\n", &(EvidenceEW)))!=1)
			printError("Wrong format: EvidenceExonWeight value (number/type)");  

		  sprintf(mess,"EvidenceExonWeight: \t%9.2f",EvidenceEW);
		  printMess(mess);
	}
	 /* 1. Read Evidence Exon Factor */
	if(!strcasecmp(header,sEVIDENCEF)){
		  readLine(RootFile,line);
		  if ((sscanf(line,"%f\n", &(EvidenceFactor)))!=1)
			printError("Wrong format: EvidenceFactor value (number/type)");  

		  sprintf(mess,"EvidenceFactor: \t%9.2f",EvidenceFactor);
		  printMess(mess);
	}
	 /* 1. Read RSS_Donor_Score_Cutoff */
	if(!strcasecmp(header,sRSS_DONOR_SCORE_CUTOFF)){
		  readLine(RootFile,line);
		  if ((sscanf(line,"%f\n", &(RSSDON)))!=1)
			printError("Wrong format: RSSDON value scores (number/type)");  

		  sprintf(mess,"RSSDON: \t%9.2f",RSSDON);
		  printMess(mess);
	}
	 /* 1. Read RSSMARKOVSCORE for markov score to assign non-exonic recursively spliced elements */
	if(!strcasecmp(header,sRSS_ACCEPTOR_SCORE_CUTOFF)){
		  readLine(RootFile,line);
		  if ((sscanf(line,"%f\n", &(RSSACC)))!=1)
			printError("Wrong format: RSSACC value scores (number/type)");  

		  sprintf(mess,"RSSACC: \t%9.2f",RSSACC);
		  printMess(mess);
	}
	 /* 1. Read U12_SPLICE_SCORE_THRESH for sum of U12 donor and acceptor splice scores */
	if(!strcasecmp(header,sU12_SPLICE_SCORE_THRESH)){
		  readLine(RootFile,line);
		  if ((sscanf(line,"%f\n", &(U12_SPLICE_SCORE_THRESH)))!=1)
			printError("Wrong format: U12_SPLICE_SCORE_THRESH value scores (number/type)");  

		  sprintf(mess,"U12_SPLICE_SCORE_THRESH: \t%9.2f",
				  U12_SPLICE_SCORE_THRESH);
		  printMess(mess);
	}
	 /* 1. Read U12_EXON_SCORE_THRESH for sum of U12 donor and acceptor exon scores */
	if(!strcasecmp(header,sU12_EXON_SCORE_THRESH)){
		  readLine(RootFile,line);
		  if ((sscanf(line,"%f\n", &(U12_EXON_SCORE_THRESH)))!=1)
			printError("Wrong format: U12_EXON_SCORE_THRESH value scores (number/type)");  

		  sprintf(mess,"U12_EXON_SCORE_THRESH: \t%9.2f",
				  U12_EXON_SCORE_THRESH);
		  printMess(mess);
	}
	 /* 1. Read U12_EXON_WEIGHT, an additional exon weight that applies to exons flanking U12 introns */
	if(!strcasecmp(header,sU12_EXON_WEIGHT)){
		  readLine(RootFile,line);
		  if ((sscanf(line,"%f\n", &(U12EW)))!=1)
			printError("Wrong format: U12_EXON_WEIGHT value score (number/type)");  

		  sprintf(mess,"U12_EXON_WEIGHT: \t%9.2f",
				  U12EW);
		  printMess(mess);
	}
	readHeader(RootFile,line);
	if ((sscanf(line,"%s",header))!=1)
		{
		  sprintf(mess,"Wrong format: header for exon weights");
		  printError(mess);
		}
  }
  readLine(RootFile,line);
  if ((sscanf(line,"%f %f %f %f %f\n",
			  &(gp->Initial->ExonWeight),
			  &(gp->Internal->ExonWeight),
			  &(gp->Terminal->ExonWeight),
			  &(gp->Single->ExonWeight),
			  &(gp->utr->ExonWeight)))<4)
    printError("Wrong format: exon weight values (number/type)");  

  sprintf(mess,"Exon weights: \t%9.3f\t%9.3f\t%9.3f\t%9.3f\t%9.3f",
		  gp->Initial->ExonWeight,
		  gp->Internal->ExonWeight,
		  gp->Terminal->ExonWeight,
		  gp->Single->ExonWeight,
		  gp->utr->ExonWeight);
  printMess(mess);

 
  /* 8. Read splice site profiles */
  /* (a).start codon profile */
  ReadProfile(RootFile, gp->StartProfile , sSTA,1);

  /* (b).acceptor and donor site profiles */
  ReadProfileSpliceSites(RootFile, gp);

  /* (c).donor site profile */
  /* ReadProfile(RootFile, gp->DonorProfile , sDON,1); */

  /* (d).stop codon profile */
  ReadProfile(RootFile, gp->StopProfile , sSTO,1);
  
  /* 9. read coding potential log-likelihood values (Markov chains) */
  readHeader(RootFile,line);   
  if ((sscanf(line,"%s",header))!=1)
    {
      sprintf(mess,"Wrong format: header ");
      printError(mess);
      printMess(header);
    }


  while(strcasecmp(header,sMarkov)&&strcasecmp(header,"Markov_oligo_logs_file"))
    {
      /* printMess(header); */
      
      
      if (!strcasecmp(header,sprofilePolyA))
	{

	  PAS++;
	  printMess("Reading PolyA Signal Profile");
	  /* Reading the U2gty donor profile */
	  ReadProfile(RootFile, gp->PolyASignalProfile, sPOL, 0);
	  
	}
      
      readHeader(RootFile,line);
      if ((sscanf(line,"%s",header))!=1)
	{
	  sprintf(mess,"Wrong format: header ");
	  printError(mess);
	  printMess(header);
	}
  }      
  /* Next profile for Markov order */
  readLine(RootFile,line);

  if ((sscanf(line,"%d", &(gp->OligoLength)))!=1)
    printError("Wrong format: oligonucleotide length");

  sprintf(mess,"Oligonucleotide (word) length: %d",gp->OligoLength);
  printMess(mess);

  /* (a). Initial probability matrix */
  printMess("Reading Markov Initial likelihood matrix");

  /* Computing the right number of initial values to read */      
  gp->OligoDim=(int)pow((float)4,(float)gp->OligoLength);

  sprintf(mess,"Used oligo array size: %ld",gp->OligoDim * 3);
  printMess(mess);

  readHeader(RootFile,line);
  for(j = 0; j < gp->OligoDim * 3; j++)
    { 
      readLine(RootFile,line); 
      if ((sscanf(line, "%*s %d %d %f", &i, &f, &lscore))!=3)
        {
          sprintf(mess, "Wrong format/nunber (%s): Initial Markov value", line);
          printError(mess);
        }
      gp->OligoLogsIni[f][i]=lscore;
    }

  /* (b). Transition probability matrix */
  printMess("Reading Markov Transition likelihood matrix");

  OligoLength_1= gp->OligoLength + 1;
  gp->OligoDim_1=(int)pow((float)4,(float)OligoLength_1);

  sprintf(mess,"Used oligo array size: %ld",gp->OligoDim_1 * 3);
  printMess(mess);

  readHeader(RootFile,line);
  for(j = 0; j < gp->OligoDim_1 * 3; j++)
    { 
      readLine(RootFile,line); 
      if ((sscanf(line, "%*s %d %d %f", &i, &f, &lscore))!=3)
        {
          sprintf(mess, "Wrong format/number (%s): Transition Markov value", line);
          printError(mess);
        }
      gp->OligoLogsTran[f][i]=lscore;
    }

  /* 10. read maximum number of donors per acceptor site (BuildExons) */
  readHeader(RootFile,line);
  readLine(RootFile,line); 
  
  if ((sscanf(line,"%d", &(gp->MaxDonors)))!=1)
    printError("Bad format: MaxDonors value");   
    
  sprintf(mess,"Maximum donors by acceptor = %d\n", gp->MaxDonors);
  printMess(mess);
}