boolean read_codon_GeneStats(double * codon_array,char* line,FILE * ifp) { boolean ret = TRUE; char * codon; char * number; if( strwhitestartcmp(line,"rndcodon",spacestr) != 0 ) { warn("In reading codon line, got no 'rndcoodon' tag [%s]",line); return FALSE; } while( fgets(line,MAXLINE,ifp) != NULL ) { if( line[0] == '#' ) continue; if( strwhitestartcmp(line,"//",spacestr) == 0 ) break; codon = strtok(line,spacestr); number = strtok(NULL,spacestr); if( codon == NULL ) { warn("Found an uncommented line in codon consensus with no leading codon word"); continue; } if( number == NULL ) { warn("For codon %s, no number found",codon); ret = FALSE; continue; } if( strchr(codon,'N') != NULL ) continue; if( is_non_ambiguous_codon_seq(codon) == FALSE ) { warn("Codon %s is not really a codon... problem!"); ret = FALSE; continue; } codon_array[base4_codon_from_seq(codon)]= atof(number); } return ret; }
boolean read_base_GeneConsensus(double * base_array,char* line,FILE * ifp) { boolean ret = TRUE; int b; char * base; char * number; if( strwhitestartcmp(line,"begin",spacestr) != 0 || strstr(line,"consensus") == NULL ) { warn("In reading base GeneConsensus line, got no 'begin consensus' tag [%s]",line); return FALSE; } while( fgets(line,MAXLINE,ifp) != NULL ) { if( line[0] == '#' ) continue; if( strwhitestartcmp(line,"end",spacestr) == 0 ) break; base = strtok(line,spacestr); number = strtok(NULL,spacestr); if( base == NULL ) { warn("Found an uncommented line in base consensus with no leading base word"); continue; } if( number == NULL ) { warn("For base %s, no number found",base); ret = FALSE; continue; } if( strlen(base) > 1 || (b=base_from_char(*base)) == BASE_N ) { warn("Could not interpret %s as an actual DNA base in read_base_GeneConsensus"); ret = FALSE; continue; } base_array[b]= atof(number); } return ret; }
GeneConsensus * read_line_GeneConsensus(char * line,FILE * ifp) { GeneConsensus * out; GeneSingleCons * temp; char buffer[MAXLINE]; char * runner; if( strwhitestartcmp(line,"begin",spacestr) != 0 ) { warn("Attempting to read a GeneConsensus structure with a line not starting with 'begin' [%s]",line); return NULL; } runner = strtok(line,spacestr); runner = strtok(NULL,spacestr); if( runner == NULL || strcmp(runner,"consensus") != 0 ) { warn("Attempting to read a GeneConsensus structure without a 'begin consensus' tag [%s]",line); return NULL; } out = GeneConsensus_alloc_std(); while( fgets(buffer,MAXLINE,ifp) != NULL ) { if( buffer[0] == '#' ) continue; if( strwhitestartcmp(buffer,"end",spacestr) == 0 ) break; temp = read_line_GeneSingleCons(buffer); if( temp == NULL ) { warn("Unable to process GeneSingleCons line... dropping out..."); break; } add_GeneConsensus(out,temp); } return out; }
boolean skip_consensus(FILE * ifp) { char buffer[MAXLINE]; while(fgets(buffer,MAXLINE,ifp) != NULL ) if( strwhitestartcmp(buffer,"end",spacestr) == 0) break; if( feof(ifp) || ferror(ifp) ) return FALSE; return TRUE; }
boolean read_into_MethodTypeSet(MethodTypeSet * mts,FILE * ifp) { char buffer[MAXLINE]; Method * me; Type * ty; Input * in; while( fgets(buffer,MAXLINE,ifp) != NULL) { chop_newline(buffer); if( buffer[0] == '#' || strwhitestartcmp(buffer,"#",spacestr) == 0 ) continue; if( only_whitespace(buffer,spacestr) == TRUE) continue; if( strstartcmp(buffer,"method") == 0 ) { if( (me=read_Method_line(buffer,ifp)) == NULL ) { warn("Unable to read method in line [%s] ",buffer); } else { add_me_MethodTypeSet(mts,me); } } else if ( strstartcmp(buffer,"type") == 0 ) { if( (ty=read_Type_line(buffer,ifp)) == NULL ) { warn("Unable to read type in line [%s] ",buffer); } else { add_ty_MethodTypeSet(mts,ty); } } else if ( strstartcmp(buffer,"input") == 0 ) { if( (in = read_Input_line(buffer,ifp)) == NULL ) { warn("Unable to read type in line [%s]",buffer); } else { add_in_MethodTypeSet(mts,in); } } else { warn("In reading only method/types got an impossible line [%s]",buffer); } } return TRUE; }
int check_type_GeneFrequency(char *line,FILE * ifp,int * center,int * phase) { int ret = GeneConsensusType_Error; char * runner; if( strwhitestartcmp(line,"type",spacestr) != 0 ) { warn("Attempting to check phase of consensus with no type line..."); return GeneConsensusType_Error; } runner = strtok(line,spacestr); runner = strtok(NULL,spacestr); if( runner == NULL ) { warn("GeneFrequency type with no type. Can't read type, must set to error, but problem in later parsing"); ret = GeneConsensusType_Error; } else { ret = string_to_GeneConsensusType(runner); } while( fgets(line,MAXLINE,ifp) != NULL ) { if( line[0] == '#' ) continue; else if( strwhitestartcmp(line,"phase",spacestr) == 0 ) { runner = strtok(line,spacestr); runner = strtok(NULL,spacestr); if( runner == NULL ) { warn("Got phase line with no phase. Sad...."); continue; } if( phase != NULL ) { if( strcmp(runner,"all") ==0 || strcmp(runner,"All") == 0) *phase = 3; else *phase = atoi(runner); } } else if( strwhitestartcmp(line,"center",spacestr) == 0 || strwhitestartcmp(line,"centre",spacestr) == 0) { runner = strtok(line,spacestr); runner = strtok(NULL,spacestr); if( runner == NULL ) { warn("Got center line with no phase. Sad...."); continue; } if( center != NULL ) { *center = atoi(runner); } } else { break; } } return ret; }
GeneFrequency21 * read_GeneFrequency21(FILE * ifp) { GeneFrequency21 * out; GeneConsensus * temp; char buffer[MAXLINE]; int phase; int center; int type; boolean err = FALSE; out = GeneFrequency21_alloc(); while( fgets(buffer,MAXLINE,ifp) != NULL ) { if( buffer[0] == '#' ) continue; if( strwhitestartcmp(buffer,"type",spacestr) == 0 ) { phase = 3; /** if no phase, assumme it is for all phases **/ type = check_type_GeneFrequency(buffer,ifp,¢er,&phase); switch(type) { case GeneConsensusType_5SS : if( phase == 3) { temp = read_line_GeneConsensus(buffer,ifp); temp->center = center; out->ss5 = temp; } else { if( skip_consensus(ifp) == FALSE ) { warn("Unable to skip phase'd 5'SS information ... problem!"); break; } } break; case GeneConsensusType_3SS : if( phase == 3) { temp = read_line_GeneConsensus(buffer,ifp); temp->center = center; out->ss3 = temp; } else { if( skip_consensus(ifp) == FALSE ) { warn("Unable to skip phase'd 5'SS information ... problem!"); err = TRUE; } } break; case GeneConsensusType_CDS : if( phase == 0) { if( read_codon_GeneConsensus(out->codon,buffer,ifp) == FALSE ) { warn("Unable to read codon information in GeneFrequency21... problem!"); break; } } else if( phase == 3 ) { /*** we need this! ***/ if( read_codon_GeneConsensus(out->cds_triplet,buffer,ifp) == FALSE ) { warn("Unable to read codon information in GeneFrequency21... problem!"); break; } } else { /** in a different phase **/ if( skip_consensus(ifp) == FALSE ) { warn("Unable to skip phase'd CDS information ... problem!"); err = TRUE; } } break; case GeneConsensusType_Intron_emission : if( phase == 3 ) { if( read_base_GeneConsensus(out->central,buffer,ifp) == FALSE ) { warn("Unable to read Intron emissions in genefrequency21 ... problem!"); err = TRUE; } } else { if( skip_consensus(ifp) == FALSE ) { warn("Unable to skip phase'd CDS information ... problem!"); err = TRUE; } } break; case GeneConsensusType_Pyrimidine_emission : if( phase == 3 ) { if( read_base_GeneConsensus(out->py,buffer,ifp) == FALSE ) { warn("Unable to read pyrimidine emissions in genefrequency21 ... problem!"); err = TRUE; } } else { if( skip_consensus(ifp) == FALSE ) { warn("Unable to skip phase'd pyrimidine information ... problem!"); err = TRUE; } } break; case GeneConsensusType_Spacer_emission : if( phase == 3 ) { if( read_base_GeneConsensus(out->spacer,buffer,ifp) == FALSE ) { warn("Unable to read spacer emissions in genefrequency21 ... problem!"); err = TRUE; } } else { if( skip_consensus(ifp) == FALSE ) { warn("Unable to skip phase'd spacer information ... problem!"); err = TRUE; } } break; case GeneConsensusType_Central_stay : out->transition[GF21_CENTRAL_STAY] = double_from_line(buffer); break; case GeneConsensusType_Pyrimidine_stay : out->transition[GF21_PY_STAY] = double_from_line(buffer); break; case GeneConsensusType_Spacer_stay : out->transition[GF21_SPACER_STAY] = double_from_line(buffer); break; case GeneConsensusType_No_spacer : out->transition[GF21_NO_SPACER] = double_from_line(buffer); break; case GeneConsensusType_Intron_Corr_Term : switch(phase) { case 0 : /* out->transition[GF21_INTRON_CORR_TERM_0] = double_from_line(buffer); */ break; case 1 : /* out->transition[GF21_INTRON_CORR_TERM_1] = double_from_line(buffer); */ break; case 2 : /* out->transition[GF21_INTRON_CORR_TERM_2] = double_from_line(buffer); */ break; case 3 : out->transition[GF21_INTRON_CORR_TERM] = double_from_line(buffer); break; default : warn("Well... I have got some bad news for you. We found a phase of %d in Intron correction term. ",phase); break; } break; default : warn("Got an unidenitifable type in GeneFrequency21 parse. Skippping"); if( skip_consensus(ifp) == FALSE ) { warn("Unable to skip phase'd 5'SS information ... problem!"); err = TRUE; } } if( err == TRUE ) { warn("You have had an unrecoverable error in GeneFrequency21 parsing"); break; } } else { striptoprint(buffer); warn("Could not understand line [%s] in GeneFrequency21 parse",buffer); } } return out; }