void SortHSPs(packHSP* p) { int frame; char mess[MAXSTRING]; for (frame=0; frame < FRAMES; frame++) { sprintf(mess,"Quicksorting FWD HSPs in frame %d",frame); printMess(mess); quickSort(p->sPairs[frame], 0,p->nSegments[frame]-1); sprintf(mess,"\t%ld HSPs successfully quicksorted",p->nSegments[frame]); printMess(mess); } for (frame=FRAMES; frame < 2*FRAMES; frame++) { sprintf(mess,"R-Quicksorting RVS HSPs in frame %d",frame); printMess(mess); RquickSort(p->sPairs[frame], 0,p->nSegments[frame]-1); sprintf(mess,"\t%ld HSPs successfully r-quicksorted",p->nSegments[frame]); printMess(mess); } }
/* Sort exons by donor site (for every GM rule): genamic requirement */ void BuildSort(dict *D, int nc[], int ne[], int UC[][MAXENTRY], int DE[][MAXENTRY], int nclass, long km[], exonGFF* **d, exonGFF *E, long nexons) { long i,k; int j; int type; int class; char aux[MAXTYPE]; char mess[MAXSTRING]; /* Every exon will be classified into some sorting function (d) */ /* Input exons are sorted by acceptor (left position) */ for(i=0; i < nexons; i++) { aux[0]='\0'; strcpy (aux, (E+i)->Type); strcat (aux, &((E+i)->Strand)); /* What's the type of exon? "Type+Strand" */ type = getkeyDict(D,aux); /* Checking and getting exon type (dictionary) */ if (type != NOTFOUND) { /* Exon may belong to some upstream compatible classes (UC) */ for(j=0; j < nc[type]; j++) { class = UC[type][j]; k = km[class]-1; /* Screening the exons sorted before: sorting by insertion */ while (k>=0 && (((E+i)->Donor->Position + (E+i)->offset2) < (d[class][k]->Donor->Position + d[class][k]->offset2))) { /* Shifting down previous exons */ d[class][k+1] = d[class][k]; k--; } /* Insert new exon before the previously shifted exons */ d[class][k+1] = (E+i); km[class]++; } }else{ /* end if type found */ sprintf(mess,"type %s(%d) not found",aux,type); printMess(mess); } } /* end forall exons */
/* Output: header for results displayed immediately */ void OutputHeader(char* locus, long l) { char* s; char mess[MAXSTRING]; /* 0. Size checkpoint and information */ if (!l) { sprintf(mess,"%s: sequence is empty",locus); printError(mess); } else { sprintf(mess,"%s: %ld nucleotides\n",locus,l); printMess(mess); } /* 1. Extract the starting time to display */ s = ctime(&m->tStart); /* 2. Output headers: gff, geneid or xml format */ if (GFF3){ printf("##gff-version 3\n"); } else { if (GFF) printf("## gff-version 2\n"); } if (XML) { /* XML format header */ printf("<?xml version=\"1.0\" ?>\n"); printf("<!DOCTYPE prediction SYSTEM \"geneid.dtd\">\n"); s[strlen(s)-1] = '\0'; printf("<prediction locus=\"%s\" length=\"%ld\" source=\"%s\" date=\"%s\"", locus,l,VERSION,s); } else { /* gff and geneid formats */ s[strlen(s)-1] = '\n'; if (GFF3){ printf("# date %s",s); printf("# source-version: %s -- [email protected]\n",VERSION); printf("##sequence-region %s 1 %ld\n",locus,l); } else { printf("## date %s",s); printf("## source-version: %s -- [email protected]\n",VERSION); printf("# Sequence %s - Length = %ld bps\n",locus,l); } } }
/* Printing messages (parameters read for PWA - signal prediction) */ void PrintProfile (profile* p, char* signal) { char mess[MAXSTRING]; sprintf(mess, "Reading... %s:\t%d\t%d\t%d\t(%ld)\t%5.2f", signal, p->dimension, p->offset, p->order, p->dimensionTrans, p->cutoff); printMess(mess); }
/* Management function to score and filter exons */ void ProcessHSPs(long l1, long l2, int Strand, packExternalInformation* external, packHSP* hsp ) { /* Fill in the temporary HSP arrays (pre-processing) */ /* GENIS hack */ if (SRP) { if (UTR) { printMess("Preprocessing read information: step 1"); ReadScan(external,hsp,Strand,l1,l2); } else { printMess("Preprocessing homology information: step 1"); HSPScan(external,hsp,Strand,l1,l2); } printMess("Preprocessing homology information: step 2"); HSPScan2(external,hsp,Strand,l1,l2); } }
/* Print best genes using selected format */ void OutputGene(packGenes* pg, long nExons, char* Locus, char* Sequence, gparam* gp, dict* dAA, char* GenePrefix) { /* Retrieving the best predicted genes recursively */ if (nExons>0) { printMess("Recovering gene-solution..."); CookingGenes(pg->GOptim, Locus, Sequence, gp, dAA, GenePrefix); if (XML) printf("</prediction>\n"); } else if (XML) printf(" genes=\"0\" score =\"0.00\">\n</prediction>\n"); }
int Write ( sEvent *sp, FILE *outFile ) { switch (sp->EventType) { case mh_eEvent_Alarm: case mh_eEvent_MaintenanceAlarm: case mh_eEvent_SystemAlarm: case mh_eEvent_UserAlarm1: case mh_eEvent_UserAlarm2: case mh_eEvent_UserAlarm3: case mh_eEvent_UserAlarm4: case mh_eEvent_Info: printMess(sp, outFile); break; case mh_eEvent_Ack: printAck(sp, outFile); break; case mh_eEvent_Cancel: case mh_eEvent_Return: printRet(sp, outFile); break; case mh_eEvent_Block: case mh_eEvent_Unblock: case mh_eEvent_Reblock: case mh_eEvent_CancelBlock: printBlock(sp, outFile); break; default: printf("rt_elog_dump: Error in Write unknown EventType"); break; } return 1; }
/* Display some predictions results according to the options selected */ void Output(packSites* allSites, packSites* allSites_r, packExons* allExons, packExons* allExons_r, exonGFF* exons, long nExons, char* Locus, long l1, long l2, long lowerlimit, char* Sequence, gparam* gp, dict* dAA, char* GenePrefix) { /* 1. Printing Forward */ if (FWD) { printMess("Printing forward selected elements"); /* sites */ if (SFP) PrintSites(allSites->StartCodons, allSites->nStartCodons, STA, Locus, FORWARD, l1, l2, lowerlimit, Sequence, gp->StartProfile); if (SAP){ PrintSites(allSites->AcceptorSites, allSites->nAcceptorSites, ACC, Locus, FORWARD, l1, l2, lowerlimit, Sequence, gp->AcceptorProfile); } if (SDP){ PrintSites(allSites->DonorSites, allSites->nDonorSites, DON, Locus, FORWARD, l1, l2, lowerlimit, Sequence, gp->DonorProfile); } if (STP){ PrintSites(allSites->StopCodons, allSites->nStopCodons, STO, Locus, FORWARD, l1, l2, lowerlimit, Sequence, gp->StopProfile); } if (UTR && SFP){ PrintSites(allSites->TS, allSites->nTS, TSS, Locus, FORWARD, l1, l2, lowerlimit, Sequence, gp->DonorProfile); } if (UTR && STP){ PrintSites(allSites->TE, allSites->nTE, TES, Locus, FORWARD, l1, l2, lowerlimit, Sequence, gp->AcceptorProfile); } /* exons */ if (EFP){ PrintExons(allExons->InitialExons,allExons->nInitialExons, FIRST, Locus, l1, l2, Sequence, dAA, GenePrefix); } if (EIP){ PrintExons(allExons->InternalExons,allExons->nInternalExons, INTERNAL, Locus, l1, l2, Sequence, dAA, GenePrefix); PrintExons(allExons->ZeroLengthExons,allExons->nZeroLengthExons, ZEROLENGTH, Locus, l1, l2, Sequence, dAA, GenePrefix); } if (ETP){ PrintExons(allExons->TerminalExons,allExons->nTerminalExons, TERMINAL, Locus, l1, l2, Sequence, dAA, GenePrefix); } if (ESP) PrintExons(allExons->Singles,allExons->nSingles, SINGLE, Locus, l1, l2, Sequence, dAA, GenePrefix); if (EOP) PrintExons(allExons->ORFs,allExons->nORFs, ORF, Locus, l1, l2, Sequence, dAA, GenePrefix); } /* 2. Printing Reverse */ if (RVS) { printMess("Printing reverse selected elements\n"); /* sites */ if (SFP) PrintSites(allSites_r->StartCodons,allSites_r->nStartCodons,STA, Locus,REVERSE, l1, l2, lowerlimit, Sequence, gp->StartProfile); if (SAP){ PrintSites(allSites_r->AcceptorSites, allSites_r->nAcceptorSites, ACC, Locus, REVERSE, l1, l2, lowerlimit, Sequence, gp->AcceptorProfile); } if (SDP){ PrintSites(allSites_r->DonorSites, allSites_r->nDonorSites, DON, Locus, REVERSE, l1, l2, lowerlimit, Sequence, gp->DonorProfile); } if (STP){ PrintSites(allSites_r->StopCodons,allSites_r->nStopCodons,STO, Locus,REVERSE, l1, l2, lowerlimit, Sequence, gp->StopProfile); } if (UTR && SFP){ PrintSites(allSites_r->TS, allSites_r->nTS, TSS, Locus, REVERSE, l1, l2, lowerlimit, Sequence, gp->StartProfile); } if (UTR && STP){ PrintSites(allSites_r->TE, allSites_r->nTE, TES, Locus, REVERSE, l1, l2, lowerlimit, Sequence, gp->StopProfile); } /* exons */ if (EFP){ PrintExons(allExons_r->InitialExons,allExons_r->nInitialExons, FIRST, Locus, l1, l2, Sequence, dAA, GenePrefix); } if (EIP){ PrintExons(allExons_r->InternalExons,allExons_r->nInternalExons, INTERNAL, Locus, l1, l2, Sequence, dAA, GenePrefix); PrintExons(allExons_r->ZeroLengthExons,allExons_r->nZeroLengthExons, ZEROLENGTH, Locus, l1, l2, Sequence, dAA, GenePrefix); } if (ETP){ PrintExons(allExons_r->TerminalExons,allExons_r->nTerminalExons, TERMINAL, Locus, l1, l2, Sequence, dAA, GenePrefix); } if (ESP) PrintExons(allExons_r->Singles,allExons_r->nSingles, SINGLE, Locus, l1, l2, Sequence, dAA, GenePrefix); if (EOP) PrintExons(allExons_r->ORFs,allExons_r->nORFs, ORF, Locus, l1, l2, Sequence, dAA, GenePrefix); } /* 3. Print all exons */ if (EXP) { printMess("Printing all predicted Exons of current split\n"); PrintExons(exons, nExons, FIRST + INTERNAL + TERMINAL + SINGLE + ORF, Locus, l1, l2, Sequence, dAA, GenePrefix); } }
/* Management of splice sites prediction and exon construction/scoring */ void manager(char *Sequence, long LengthSequence, packSites* allSites, packExons* allExons, packLib** lib, long l1, long l2, long lowerlimit, long upperlimit, int Strand, packExternalInformation* external, packHSP* hsp, gparam* gp, gparam** isochores, int nIsochores, packGC* GCInfo, site* acceptorsites, site* donorsites, site* tssites, site* tesites ) { char mess[MAXSTRING]; //struct timespec requestStart; //struct timespec requestEnd; /* For sorting sites */ /* site* acceptorsites; */ /* site* donorsites; */ long l1a, l1b, l2a, l2b, l1c, l2c; long cutPoint; /* 0. Define boundaries of splice site prediction according to current split positions and strand selected */ if (Strand == FORWARD) { /* Forward sense */ /* Start codons and Acceptor sites limits */ l1a = l1; l2a = (l2 == upperlimit)? l2 : l2 - OVERLAP; /* Donor sites limits */ l1b = l1; l2b = l2; /* Stop codon limits */ l1c = l1; l2c = l2; /* Terminal/Single exons: */ /* are allowed if their Stop codon is placed behind cutPoint */ /* FWD: every stop codon might be used without problems */ cutPoint = l1; } else { /* Reverse sense */ /* Start codons and Acceptor sites limits */ l1a = l1; l2a = l2; /* Donor sites limits */ l1b = (l1 == lowerlimit)? l1: l1 + OVERLAP; l2b = l2; /* Stop codon limits */ l1c = l1; l2c = l2; /* Terminal/Single exons: */ /* are allowed if their Stop codon is placed behind cutPoint (RVS) */ /* RVS: reading from right to left the forward sense sequence */ cutPoint = (l1 == lowerlimit)? l1 : l1 + OVERLAP; } /* sprintf(mess,"Strand:%i\nl1a:%ld\nl1b:%ld\nl2a:%ld\nl2b:%ld\nl1c:%ld\nl2c:%ld\ncutPoint:%ld\n",Strand,l1a, l1b,l2a, l2b,l1c, l2c,cutPoint); */ /* printMess(mess); */ /* 0. Preprocss HSPs */ if (SRP){ ProcessHSPs(l1, l2, Strand, external, hsp); } /* 0.1 Preprocess Library */ if(LIB) { //clock_gettime(CLOCK_MONOTONIC, &requestStart); ProcessLibrary2(external, lib, Strand, l1, l2); //clock_gettime(CLOCK_MONOTONIC, &requestEnd); //accum = ( requestEnd.tv_sec - requestStart.tv_sec ) + ( requestEnd.tv_nsec - requestStart.tv_nsec )/ BILLION; //sprintf(mess,"Library preprocessing time: %lf\n", accum ); //printMess(mess); //cleanLibraryMemory(external); } /* 1. Predicting splice sites of current split of DNA sequence */ printMess ("Computing sites ..."); allSites->nStartCodons = GetSitesWithProfile(Sequence,gp->StartProfile,allSites->StartCodons,l1a,l2a); sprintf(mess, "Start Codons \t\t%8ld", allSites->nStartCodons); printRes(mess); long numAccsites = 0; allSites->nAcceptorSites = BuildAcceptors(Sequence, U2, sU2type, sU2, gp->AcceptorProfile, gp->PolyPTractProfile, gp->BranchPointProfile, allSites->AcceptorSites, l1a,l2a,numAccsites,NUMSITES,Strand,external); sprintf(mess, "Acceptor Sites \t\t%8ld", allSites->nAcceptorSites - numAccsites); numAccsites = allSites->nAcceptorSites; printRes(mess); if (U12GTAG){ allSites->nAcceptorSites = BuildU12Acceptors(Sequence,U12gtag,sU12type, sU12gtag, gp->U12gtagAcceptorProfile, gp->U12BranchPointProfile, gp->PolyPTractProfile, allSites->AcceptorSites, l1a,l2a,numAccsites,NUMSITES,Strand,external); sprintf(mess, "U12gtag Acceptor Sites \t%8ld", allSites->nAcceptorSites - numAccsites); numAccsites = allSites->nAcceptorSites; printRes(mess); } if (U12ATAC){ allSites->nAcceptorSites = BuildU12Acceptors(Sequence,U12atac,sU12type, sU12atac, gp->U12atacAcceptorProfile, gp->U12BranchPointProfile, gp->PolyPTractProfile, allSites->AcceptorSites, l1a,l2a,numAccsites,NUMSITES,Strand,external); sprintf(mess, "U12atac Acceptor Sites \t%8ld", allSites->nAcceptorSites - numAccsites); numAccsites = allSites->nAcceptorSites; printRes(mess); } long numDonsites = 0; allSites->nDonorSites = /* BuildDonors(Sequence,U2,sU2type,sU2, gp->DonorProfile,allSites->DonorSites,l1b,l2b,numDonsites,NUMSITES); */ BuildDonors(Sequence,U2,sU2type,sU2, gp->DonorProfile,allSites->DonorSites,l1b,l2b,numDonsites,NUMSITES,Strand,external); sprintf (mess,"Donor Sites \t\t%8ld", allSites->nDonorSites); numDonsites = allSites->nDonorSites; printRes(mess); if (U12GTAG){ allSites->nDonorSites = BuildDonors(Sequence,U12gtag,sU12type,sU12gtag, gp->U12gtagDonorProfile,allSites->DonorSites,l1b,l2b,numDonsites,NUMSITES,Strand,external); sprintf (mess,"U12gtag Donor Sites \t%8ld", allSites->nDonorSites - numDonsites); numDonsites = allSites->nDonorSites; printRes(mess); } if (U12ATAC){ allSites->nDonorSites = BuildDonors(Sequence, U12atac,sU12type,sU12atac, gp->U12atacDonorProfile,allSites->DonorSites,l1b,l2b,numDonsites,NUMSITES,Strand,external); sprintf (mess,"U12atac Donor Sites \t%8ld", allSites->nDonorSites - numDonsites); numDonsites = allSites->nDonorSites; printRes(mess); } if (U2GCAG){ allSites->nDonorSites = BuildDonors(Sequence,U2, sU2type,sU2gcag, gp->U2gcagDonorProfile,allSites->DonorSites,l1b,l2b,numDonsites,NUMSITES,Strand,external); sprintf (mess,"U2gcag Donor Sites \t%8ld", allSites->nDonorSites - numDonsites); numDonsites = allSites->nDonorSites; printRes(mess); } if (U2GTA){ allSites->nDonorSites = BuildDonors(Sequence,U2,sU2type,sU2gta, gp->U2gtaDonorProfile,allSites->DonorSites,l1b,l2b,numDonsites,NUMSITES,Strand,external); sprintf (mess,"U2gta Donor Sites \t%8ld", allSites->nDonorSites - numDonsites); numDonsites = allSites->nDonorSites; printRes(mess); } if (U2GTG){ allSites->nDonorSites = BuildDonors(Sequence,U2,sU2type,sU2gtg, gp->U2gtgDonorProfile,allSites->DonorSites,l1b,l2b,numDonsites,NUMSITES,Strand,external); sprintf (mess,"U2gtg Donor Sites \t%8ld", allSites->nDonorSites - numDonsites); numDonsites = allSites->nDonorSites; printRes(mess); } if (U2GTY){ allSites->nDonorSites = BuildDonors(Sequence,U2,sU2type,sU2gty, gp->U2gtyDonorProfile,allSites->DonorSites,l1b,l2b,numDonsites,NUMSITES,Strand,external); sprintf (mess,"U2gty Donor Sites \t%8ld", allSites->nDonorSites - numDonsites); numDonsites = allSites->nDonorSites; printRes(mess); } allSites->nStopCodons = GetStopCodons(Sequence,gp->StopProfile, allSites->StopCodons,l1c,l2c); sprintf (mess,"Stop Codons \t\t%8ld", allSites->nStopCodons); printRes(mess); if ( U12GTAG || U12ATAC || U2GCAG || U2GTA || U2GTG || U2GTY ){ /* Predicted sites must be sorted by position */ printMess ("Sorting Donor and Acceptor sites ..."); SortSites(allSites->DonorSites,allSites->nDonorSites,donorsites,l1b,l2b); SortSites(allSites->AcceptorSites,allSites->nAcceptorSites,acceptorsites,l1a,l2a); } allSites->nTS=0; allSites->nTE=0; if (UTR){ allSites->nTS = GetTSS(allSites->TS,allSites->AcceptorSites, allSites->nAcceptorSites, external,hsp,Strand,LengthSequence,l1,l2); sprintf(mess, "TS \t\t\t%8ld", allSites->nTS); printRes(mess); long numTE = 0; if(PAS){allSites->nTE = GetSitesWithProfile(Sequence,gp->PolyASignalProfile,allSites->TE,l1,l2); sprintf(mess, "PolyA Signals \t\t%8ld", allSites->nTE); numTE = allSites->nTE; printRes(mess); } allSites->nTE = GetTES(allSites->TE,allSites->DonorSites, allSites->nDonorSites,external,hsp,Strand,LengthSequence,l1,l2,numTE); sprintf(mess, "TE \t\t\t%8ld", allSites->nTE); printRes(mess); } /* Total number of predicted splice sites in this strand */ allSites->nSites = allSites->nStartCodons + allSites->nAcceptorSites + allSites->nDonorSites + allSites->nStopCodons + allSites->nTS + allSites->nTE; sprintf(mess,"---------\t\t%8ld", allSites->nSites); printRes(mess); if ( UTR ){ /* Predicted sites must be sorted by position */ printMess ("Sorting TSS/TES sites ..."); SortSites(allSites->TS,allSites->nTS,tssites,l1,l2); SortSites(allSites->TE,allSites->nTE,tesites,l1,l2); } if (GENAMIC || (!GENAMIC && (EFP || EIP || ETP || ESP || EOP || EXP))){ /* 2. Building exons with splice sites predicted before */ printMess ("Computing exons ..."); allExons->nInitialExons = BuildInitialExons(allSites->StartCodons,allSites->nStartCodons, allSites->DonorSites,allSites->nDonorSites, allSites->StopCodons,allSites->nStopCodons, gp->MaxDonors,sFIRST,Sequence, allExons->InitialExons,NUMEXONS); sprintf(mess,"Initial Exons \t\t%8ld", allExons->nInitialExons); printRes(mess); allExons->nInternalExons = BuildInternalExons(allSites->AcceptorSites,allSites->nAcceptorSites, allSites->DonorSites,allSites->nDonorSites, allSites->StopCodons,allSites->nStopCodons, gp->MaxDonors,sINTERNAL,Sequence, allExons->InternalExons,NUMEXONS); sprintf(mess,"Internal Exons \t\t%8ld", allExons->nInternalExons); printRes(mess); if (RSS){ allExons->nZeroLengthExons = BuildZeroLengthExons(allSites->AcceptorSites,allSites->nAcceptorSites, allSites->DonorSites,allSites->nDonorSites, allSites->StopCodons,allSites->nStopCodons, gp->MaxDonors,sZEROLENGTH,Sequence, allExons->ZeroLengthExons,NUMEXONS); sprintf(mess,"Zero-Length Exons \t%8ld", allExons->nZeroLengthExons); printRes(mess); } allExons->nTerminalExons = BuildTerminalExons(allSites->AcceptorSites,allSites->nAcceptorSites, allSites->StopCodons,allSites->nStopCodons, LengthSequence,cutPoint,sTERMINAL,Sequence, allExons->TerminalExons,NUMEXONS); sprintf(mess,"Terminal Exons \t\t%8ld", allExons->nTerminalExons); printRes(mess); allExons->nSingles = BuildSingles(allSites->StartCodons,allSites->nStartCodons, allSites->StopCodons,allSites->nStopCodons, cutPoint, Sequence, allExons->Singles); sprintf(mess,"Single genes \t\t%8ld", allExons->nSingles); printRes(mess); if (UTR){ allExons->nUtrInitialExons = BuildUTRExons(allSites->TS,allSites->nTS, allSites->DonorSites,allSites->nDonorSites, MAXUTRDONORS,MAXUTREXONLENGTH,cutPoint,sUTRFIRST, allExons->UtrInitialExons,NUMEXONS); sprintf(mess,"UTR Initial Exons \t%8ld", allExons->nUtrInitialExons); printRes(mess); allExons->nUtrInitialHalfExons = BuildUTRExons(allSites->TS,allSites->nTS, allSites->StartCodons,allSites->nStartCodons, MAXUTRDONORS,MAXUTREXONLENGTH,cutPoint,sUTRFIRSTHALF, allExons->UtrInitialHalfExons,NUMEXONS); sprintf(mess,"UTR Initial Half Exons \t%8ld", allExons->nUtrInitialHalfExons); printRes(mess); allExons->nUtrInternalExons = BuildUTRExons(allSites->AcceptorSites,allSites->nAcceptorSites, allSites->DonorSites,allSites->nDonorSites, MAXUTRDONORS,MAXUTREXONLENGTH,cutPoint,sUTRINTERNAL, allExons->UtrInternalExons,NUMEXONS); sprintf(mess,"UTR Internal Exons \t%8ld", allExons->nUtrInternalExons); printRes(mess); allExons->nUtr5InternalHalfExons = BuildUTRExons(allSites->AcceptorSites,allSites->nAcceptorSites, allSites->StartCodons,allSites->nStartCodons, MAXUTRDONORS,MAXUTREXONLENGTH,cutPoint,sUTR5INTERNALHALF, allExons->Utr5InternalHalfExons,NUMEXONS); sprintf(mess,"UTR 5' Int. Half Exons \t%8ld", allExons->nUtr5InternalHalfExons); printRes(mess); allExons->nUtr3InternalHalfExons = BuildUTRExons(allSites->StopCodons,allSites->nStopCodons, allSites->DonorSites,allSites->nDonorSites, MAXUTRDONORS,MAXNMDLENGTH,cutPoint,sUTR3INTERNALHALF, allExons->Utr3InternalHalfExons,NUMEXONS); sprintf(mess,"UTR 3' Int. Half Exons \t%8ld", allExons->nUtr3InternalHalfExons); printRes(mess); allExons->nUtrTerminalHalfExons = BuildUTRExons(allSites->StopCodons,allSites->nStopCodons, allSites->TE,allSites->nTE, MAXUTRDONORS,MAX3UTREXONLENGTH,cutPoint,sUTRTERMINALHALF, allExons->UtrTerminalHalfExons,NUMEXONS); sprintf(mess,"UTR Term. Half Exons \t%8ld", allExons->nUtrTerminalHalfExons); printRes(mess); allExons->nUtrTerminalExons = BuildUTRExons(allSites->AcceptorSites,allSites->nAcceptorSites, allSites->TE,allSites->nTE, MAXUTRDONORS,MAX3UTREXONLENGTH,cutPoint,sUTRTERMINAL, allExons->UtrTerminalExons,NUMEXONS); sprintf(mess,"UTR Terminal Exons \t%8ld", allExons->nUtrTerminalExons); printRes(mess); } if (scanORF) { allExons->nORFs = BuildORFs(allSites->StopCodons,allSites->nStopCodons, allSites->StopCodons,allSites->nStopCodons, cutPoint, Sequence, allExons->ORFs); sprintf(mess,"ORFs \t\t\t%8ld", allExons->nORFs); printRes(mess); } else allExons->nORFs = 0; /* 3. Scoring and Filtering Exons */ ScoreExons(Sequence, allExons, l1, l2, Strand, external, hsp, isochores,nIsochores, GCInfo); /* Total number of built exons in this strand */ allExons->nExons = allExons->nInitialExons + allExons->nInternalExons + allExons->nZeroLengthExons + allExons->nTerminalExons + allExons->nSingles + allExons->nORFs + allExons->nUtrInitialExons + allExons->nUtrInitialHalfExons + allExons->nUtrInternalExons + allExons->nUtr5InternalHalfExons + allExons->nUtr3InternalHalfExons + allExons->nUtrTerminalExons + allExons->nUtrTerminalHalfExons; sprintf(mess,"---------\t\t%8ld", allExons->nExons); printRes(mess); } }
/* Read the input of statistics data model */ int readparam (char* name, gparam** isochores) { FILE* RootFile; char* Geneid; char ExternalFileName[FILENAMELENGTH]; int i; char line[MAXLINE]; char mess[MAXSTRING]; char header[MAXSTRING]; int nIsochores; /* 0. Select parameters filename for reading it */ /* Filename must be: option P, env.var GENEID or default (none) */ Geneid=getenv("GENEID"); /* (a) Using -P option */ if (strcmp(PARAMETERFILE,name)) { sprintf(mess,"Loading parameter file by using -P option: %s",name); if ((RootFile = fopen(name,"rb"))==NULL) printError("Parameter file (-P) can not be open to read"); } /* (b) Using GENEID environment var */ else if (Geneid) { sprintf(mess,"Loading parameter file by using GENEID (env. var): %s",Geneid); sprintf(ExternalFileName,"%s", Geneid); if ((RootFile = fopen(ExternalFileName,"rb"))==NULL) printError("Parameter file (GENEID env.var) can not be open to read"); } /* (c) Using default parameter file */ else { sprintf(mess,"Loading parameter file default"); if ((RootFile = fopen(name,"rb"))==NULL) printError("Parameter file (default) can not be open to read"); } /* rootfile will be the parameter file handle descriptor */ printMess(mess); /* 1. Read NO_SCORE penalty for nucleotides not supported by homology */ readHeader(RootFile,line); readLine(RootFile,line); if ((sscanf(line,"%f\n", &(NO_SCORE)))!=1) printError("Wrong format: NO_SCORE value scores (number/type)"); sprintf(mess,"NO_SCORE: \t%9.2f", NO_SCORE); printMess(mess); /* BKGD_SUBTRACT_FLANK_LENGTH */ /* 2. Read the number of isochores */ readHeader(RootFile,line); if ((sscanf(line,"%s",header))!=1) { sprintf(mess,"Wrong format: header for number of isochores"); printError(mess); } while(strcasecmp(header,sNUMISO)) { if(!strcasecmp(header,sBKGD_SUBTRACT_FLANK_LENGTH)){ readLine(RootFile,line); if ((sscanf(line,"%d\n", &(BKGD_SUBTRACT_FLANK_LENGTH)))!=1) printError("Wrong format: BKGD_SUBTRACT_FLANK_LENGTH value (integer)"); sprintf(mess,"BKGD_SUBTRACT_FLANK_LENGTH: \t%d", BKGD_SUBTRACT_FLANK_LENGTH); printMess(mess); } readHeader(RootFile,line); if ((sscanf(line,"%s",header))!=1) { sprintf(mess,"Wrong format: header for number of isochores"); printError(mess); } } readLine(RootFile,line); if ((sscanf(line,"%d\n", &(nIsochores)))!=1) printError("Wrong format: Number of isochores"); sprintf(mess,"Number of isochores: %d", nIsochores); printMess(mess); if (nIsochores > MAXISOCHORES || !(nIsochores>0)) printError("Wrong value: number of isochores(MAXISOCHORES)"); /* 3. Reading every one of the isochores */ for(i=0; i<nIsochores; i++) { sprintf(mess,"Reading isochore %d", i+1); printMess(mess); ReadIsochore(RootFile,isochores[i]); } /* 4. Reading the GeneModel */ readHeader(RootFile,line); /* Ready to update dictionary of exon types */ resetDict(isochores[0]->D); printMess("Dictionary ready to acquire information"); if (SGE) { printMess("Using an internal Gene Model"); isochores[0]->nclass = ForceGeneModel(isochores[0]->D, isochores[0]->nc, isochores[0]->ne, isochores[0]->UC, isochores[0]->DE, isochores[0]->md, isochores[0]->Md, isochores[0]->block); sprintf(mess,"%d Gene Model rules have been read and saved\n", isochores[0]->nclass); printMess(mess); } else { printMess("Reading Gene Model rules"); isochores[0]->nclass = ReadGeneModel(RootFile, isochores[0]->D, isochores[0]->nc, isochores[0]->ne, isochores[0]->UC, isochores[0]->DE, isochores[0]->md, isochores[0]->Md, isochores[0]->block); sprintf(mess,"%d Gene Model rules have been read and saved\n", isochores[0]->nclass); printMess(mess); } /* Replication of gene model information for each isochore */ shareGeneModel(isochores, nIsochores); return(nIsochores); }
/* - isochores are specific DNA regions according to the G+C content - */ void ReadIsochore(FILE* RootFile, gparam* gp) { float lscore; int OligoLength_1; int i,j,f; char line[MAXLINE]; char mess[MAXSTRING]; char header[MAXSTRING]; /* 1. read boundaries of isochores */ readHeader(RootFile,line); readLine(RootFile,line); if ((sscanf(line,"%d %d\n", &(gp->leftValue), &(gp->rightValue)))!=2) printError("Wrong format: isochore boundaries (G+C percent)"); sprintf(mess,"Isochores boundaries(min/max percentage): %d,%d", gp->leftValue, gp->rightValue); printMess(mess); /* 2. read cutoff (final score) to accept one predicted exon */ readHeader(RootFile,line); readLine(RootFile,line); if ((sscanf(line,"%f %f %f %f %f\n", &(gp->Initial->ExonCutoff), &(gp->Internal->ExonCutoff), &(gp->Terminal->ExonCutoff), &(gp->Single->ExonCutoff), &(gp->utr->ExonCutoff)))< 4) printError("Wrong format: exon score cutoffs (number/type)"); sprintf(mess,"Exon cutoffs: \t%9.3f\t%9.3f\t%9.3f\t%9.3f\t%9.3f", gp->Initial->ExonCutoff, gp->Internal->ExonCutoff, gp->Terminal->ExonCutoff, gp->Single->ExonCutoff, gp->utr->ExonCutoff); printMess(mess); /* 3. read cutoff (potential coding score) to accept one predicted exon */ readHeader(RootFile,line); readLine(RootFile,line); if ((sscanf(line,"%f %f %f %f\n", &(gp->Initial->OligoCutoff), &(gp->Internal->OligoCutoff), &(gp->Terminal->OligoCutoff), &(gp->Single->OligoCutoff)))!=4) printError("Wrong format: potential coding score cutoffs (number/type)"); sprintf(mess,"Oligo cutoffs: \t%9.3f\t%9.3f\t%9.3f\t%9.3f", gp->Initial->OligoCutoff, gp->Internal->OligoCutoff, gp->Terminal->OligoCutoff, gp->Single->OligoCutoff); printMess(mess); /* 4. Weight of signals in final exon score */ readHeader(RootFile,line); readLine(RootFile,line); if ((sscanf(line,"%f %f %f %f %f\n", &(gp->Initial->siteFactor), &(gp->Internal->siteFactor), &(gp->Terminal->siteFactor), &(gp->Single->siteFactor), &(gp->utr->siteFactor)))<4) printError("Wrong format: weight of signal scores (number/type)"); sprintf(mess,"Site factors: \t%9.2f\t%9.2f\t%9.2f\t%9.2f\t%9.2f", gp->Initial->siteFactor, gp->Internal->siteFactor, gp->Terminal->siteFactor, gp->Single->siteFactor, gp->utr->siteFactor); printMess(mess); /* 5. Weight of coding potential in final exon score */ readHeader(RootFile,line); readLine(RootFile,line); if ((sscanf(line,"%f %f %f %f\n", &(gp->Initial->exonFactor), &(gp->Internal->exonFactor), &(gp->Terminal->exonFactor), &(gp->Single->exonFactor)))<4) printError("Wrong format: weight of coding potential scores (number/type)"); sprintf(mess,"Exon factors: \t%9.2f\t%9.2f\t%9.2f\t%9.2f", gp->Initial->exonFactor, gp->Internal->exonFactor, gp->Terminal->exonFactor, gp->Single->exonFactor); printMess(mess); /* 6. Weight of homology information in final exon score */ readHeader(RootFile,line); readLine(RootFile,line); if ((sscanf(line,"%f %f %f %f %f\n", &(gp->Initial->HSPFactor), &(gp->Internal->HSPFactor), &(gp->Terminal->HSPFactor), &(gp->Single->HSPFactor), &(gp->utr->HSPFactor)))<4) printError("Wrong format: weight of homology scores (number/type)"); sprintf(mess,"HSP factors: \t\t%9.2f\t%9.2f\t%9.2f\t%9.2f\t%9.2f", gp->Initial->HSPFactor, gp->Internal->HSPFactor, gp->Terminal->HSPFactor, gp->Single->HSPFactor, gp->utr->HSPFactor); printMess(mess); /* 7. read weights to correct the score of exons after general cutoff */ readHeader(RootFile,line); if ((sscanf(line,"%s",header))!=1) { sprintf(mess,"Wrong format: header for exon weights and optional U12 score threshold"); printError(mess); } while(strcasecmp(header,sExon_weights)&& strcmp(header,"Exon_weigths")) { /* 1. Read RSSMARKOVSCORE for markov score to assign non-exonic recursively spliced elements */ if(!strcasecmp(header,sRSSMARKOVSCORE)){ readLine(RootFile,line); if ((sscanf(line,"%f\n", &(RSSMARKOVSCORE)))!=1) printError("Wrong format: RSSMARKOVSCORE value scores (number/type)"); sprintf(mess,"RSSMARKOVSCORE: \t%9.2f",RSSMARKOVSCORE); printMess(mess); } /* 1. Read Evidence Exon Weight */ if(!strcasecmp(header,sEVIDENCEW)){ readLine(RootFile,line); if ((sscanf(line,"%f\n", &(EvidenceEW)))!=1) printError("Wrong format: EvidenceExonWeight value (number/type)"); sprintf(mess,"EvidenceExonWeight: \t%9.2f",EvidenceEW); printMess(mess); } /* 1. Read Evidence Exon Factor */ if(!strcasecmp(header,sEVIDENCEF)){ readLine(RootFile,line); if ((sscanf(line,"%f\n", &(EvidenceFactor)))!=1) printError("Wrong format: EvidenceFactor value (number/type)"); sprintf(mess,"EvidenceFactor: \t%9.2f",EvidenceFactor); printMess(mess); } /* 1. Read RSS_Donor_Score_Cutoff */ if(!strcasecmp(header,sRSS_DONOR_SCORE_CUTOFF)){ readLine(RootFile,line); if ((sscanf(line,"%f\n", &(RSSDON)))!=1) printError("Wrong format: RSSDON value scores (number/type)"); sprintf(mess,"RSSDON: \t%9.2f",RSSDON); printMess(mess); } /* 1. Read RSSMARKOVSCORE for markov score to assign non-exonic recursively spliced elements */ if(!strcasecmp(header,sRSS_ACCEPTOR_SCORE_CUTOFF)){ readLine(RootFile,line); if ((sscanf(line,"%f\n", &(RSSACC)))!=1) printError("Wrong format: RSSACC value scores (number/type)"); sprintf(mess,"RSSACC: \t%9.2f",RSSACC); printMess(mess); } /* 1. Read U12_SPLICE_SCORE_THRESH for sum of U12 donor and acceptor splice scores */ if(!strcasecmp(header,sU12_SPLICE_SCORE_THRESH)){ readLine(RootFile,line); if ((sscanf(line,"%f\n", &(U12_SPLICE_SCORE_THRESH)))!=1) printError("Wrong format: U12_SPLICE_SCORE_THRESH value scores (number/type)"); sprintf(mess,"U12_SPLICE_SCORE_THRESH: \t%9.2f", U12_SPLICE_SCORE_THRESH); printMess(mess); } /* 1. Read U12_EXON_SCORE_THRESH for sum of U12 donor and acceptor exon scores */ if(!strcasecmp(header,sU12_EXON_SCORE_THRESH)){ readLine(RootFile,line); if ((sscanf(line,"%f\n", &(U12_EXON_SCORE_THRESH)))!=1) printError("Wrong format: U12_EXON_SCORE_THRESH value scores (number/type)"); sprintf(mess,"U12_EXON_SCORE_THRESH: \t%9.2f", U12_EXON_SCORE_THRESH); printMess(mess); } /* 1. Read U12_EXON_WEIGHT, an additional exon weight that applies to exons flanking U12 introns */ if(!strcasecmp(header,sU12_EXON_WEIGHT)){ readLine(RootFile,line); if ((sscanf(line,"%f\n", &(U12EW)))!=1) printError("Wrong format: U12_EXON_WEIGHT value score (number/type)"); sprintf(mess,"U12_EXON_WEIGHT: \t%9.2f", U12EW); printMess(mess); } readHeader(RootFile,line); if ((sscanf(line,"%s",header))!=1) { sprintf(mess,"Wrong format: header for exon weights"); printError(mess); } } readLine(RootFile,line); if ((sscanf(line,"%f %f %f %f %f\n", &(gp->Initial->ExonWeight), &(gp->Internal->ExonWeight), &(gp->Terminal->ExonWeight), &(gp->Single->ExonWeight), &(gp->utr->ExonWeight)))<4) printError("Wrong format: exon weight values (number/type)"); sprintf(mess,"Exon weights: \t%9.3f\t%9.3f\t%9.3f\t%9.3f\t%9.3f", gp->Initial->ExonWeight, gp->Internal->ExonWeight, gp->Terminal->ExonWeight, gp->Single->ExonWeight, gp->utr->ExonWeight); printMess(mess); /* 8. Read splice site profiles */ /* (a).start codon profile */ ReadProfile(RootFile, gp->StartProfile , sSTA,1); /* (b).acceptor and donor site profiles */ ReadProfileSpliceSites(RootFile, gp); /* (c).donor site profile */ /* ReadProfile(RootFile, gp->DonorProfile , sDON,1); */ /* (d).stop codon profile */ ReadProfile(RootFile, gp->StopProfile , sSTO,1); /* 9. read coding potential log-likelihood values (Markov chains) */ readHeader(RootFile,line); if ((sscanf(line,"%s",header))!=1) { sprintf(mess,"Wrong format: header "); printError(mess); printMess(header); } while(strcasecmp(header,sMarkov)&&strcasecmp(header,"Markov_oligo_logs_file")) { /* printMess(header); */ if (!strcasecmp(header,sprofilePolyA)) { PAS++; printMess("Reading PolyA Signal Profile"); /* Reading the U2gty donor profile */ ReadProfile(RootFile, gp->PolyASignalProfile, sPOL, 0); } readHeader(RootFile,line); if ((sscanf(line,"%s",header))!=1) { sprintf(mess,"Wrong format: header "); printError(mess); printMess(header); } } /* Next profile for Markov order */ readLine(RootFile,line); if ((sscanf(line,"%d", &(gp->OligoLength)))!=1) printError("Wrong format: oligonucleotide length"); sprintf(mess,"Oligonucleotide (word) length: %d",gp->OligoLength); printMess(mess); /* (a). Initial probability matrix */ printMess("Reading Markov Initial likelihood matrix"); /* Computing the right number of initial values to read */ gp->OligoDim=(int)pow((float)4,(float)gp->OligoLength); sprintf(mess,"Used oligo array size: %ld",gp->OligoDim * 3); printMess(mess); readHeader(RootFile,line); for(j = 0; j < gp->OligoDim * 3; j++) { readLine(RootFile,line); if ((sscanf(line, "%*s %d %d %f", &i, &f, &lscore))!=3) { sprintf(mess, "Wrong format/nunber (%s): Initial Markov value", line); printError(mess); } gp->OligoLogsIni[f][i]=lscore; } /* (b). Transition probability matrix */ printMess("Reading Markov Transition likelihood matrix"); OligoLength_1= gp->OligoLength + 1; gp->OligoDim_1=(int)pow((float)4,(float)OligoLength_1); sprintf(mess,"Used oligo array size: %ld",gp->OligoDim_1 * 3); printMess(mess); readHeader(RootFile,line); for(j = 0; j < gp->OligoDim_1 * 3; j++) { readLine(RootFile,line); if ((sscanf(line, "%*s %d %d %f", &i, &f, &lscore))!=3) { sprintf(mess, "Wrong format/number (%s): Transition Markov value", line); printError(mess); } gp->OligoLogsTran[f][i]=lscore; } /* 10. read maximum number of donors per acceptor site (BuildExons) */ readHeader(RootFile,line); readLine(RootFile,line); if ((sscanf(line,"%d", &(gp->MaxDonors)))!=1) printError("Bad format: MaxDonors value"); sprintf(mess,"Maximum donors by acceptor = %d\n", gp->MaxDonors); printMess(mess); }