boolean dataentry_add_ThreeStateDB(DataEntry * de,ThreeStateScore * tss,ThreeStateDB * mdb) { switch(mdb->dbtype) { case TSMDB_SINGLE : de->name = stringalloc(mdb->single->name); return TRUE; case TSMDB_HMMER1PFAM : if( tss == NULL ) { } else { de->name = stringalloc(tss->name); } return TRUE; case TSMDB_PROTEIN : add_SequenceDB_info_DataEntry(mdb->sdb,de); return TRUE; case TSMDB_GENERIC : if( (*mdb->dataentry_add)(mdb,de) == FALSE ) { warn("Could not add dataentry info to the entry %s",tss->name); return FALSE; } else { return TRUE; } default : warn("Unknown threestatedb type"); return FALSE; } return TRUE; }
SequenceErrorSet * genewise_SequenceErrorSet(AlnSequence * als ) { SequenceErrorSet * out; SequenceError * se; AlnUnit * alu; assert(als); out = SequenceErrorSet_alloc_std(); for(alu=als->start;alu;alu = alu->next ) { if( strcmp(alu->text_label,"SEQUENCE_INSERTION") == 0 ) { se = SequenceError_alloc(); add_SequenceErrorSet(out,se); se->start = alu->start+1; se->end = alu->end; se->type = SeqErrorInsertion; se->inserted_bases = stringalloc("?"); se->replaced_bases = stringalloc("NNN"); } else if ( strcmp(alu->text_label,"SEQUENCE_DELETION") == 0 ) { se = SequenceError_alloc(); se->start = alu->start+1; se->end = alu->end; se->type = SeqErrorDeletion; se->replaced_bases = stringalloc("NNN"); add_SequenceErrorSet(out,se); } } return out; }
StructElement * StructElement_from_nameandtype(char * name,char * type) { StructElement * out; out = StructElement_alloc(); out->name = stringalloc(name); out->element_type = stringalloc(type); out->islinked = TRUE; return out; }
GeneSingleCons * read_line_GeneSingleCons(char * line) { GeneSingleCons * out; char * runner; char * run2; runner = strtok(line,spacestr); run2 = strtok(NULL,spacestr); if( runner == NULL || run2 == NULL ) { warn("In read_line_GeneSingleCons was not give two different words in line [%s]",line); return NULL; } out = GeneSingleCons_alloc(); out->string = stringalloc(runner); out->number = strtod(run2,&runner); if( runner == run2 || *runner != '\0' ) { warn("In read_line_GeneSingleCons, for string [%s], unable to convert the number [%s]",out->string,run2); } return out; }
void * stream_to_object_string(Wise2ReadStreamInterface * read) { char buffer[1024]; char * out; int i; fprintf(stderr,"reading string...\n"); WISE2_READ_BUFFER(buffer,1024,read); fprintf(stderr,"read buffer %s...\n",buffer); for(i=0;i<1024;i++) { if( buffer[i] == '\n' || buffer[i] == '\r' || buffer[i] == '\0' ) { break; } } buffer[i] = '\0'; out = stringalloc(buffer); WISE2_READ_BUFFER(buffer,1024,read); fprintf(stderr,"discarding buffer... %s, have string %s\n",buffer,out); return out; }
Sequence * translate_Sequence(Sequence * dna,CodonTable * ct) { Sequence * out; int i; int j; int len; char * seq; char * name; char buffer[512]; if( is_dna_Sequence(dna) == FALSE) { warn("Trying to make a translation from a non DNA sequence... type is [%s]",Sequence_type_to_string(dna->type)); return NULL; } len = dna->len/3 + 1; seq = ckcalloc(len,sizeof(char)); sprintf(buffer,"%s.tr",dna->name == NULL ? "NoNameDNASeq" : dna->name); name = stringalloc(buffer); out = Sequence_from_dynamic_memory(name,seq); for(i=0,j=0;i<dna->len-3;i+=3,j++) { out->seq[j] = aminoacid_from_seq(ct,dna->seq+i); } out->seq[j] = '\0'; out->type = SEQUENCE_PROTEIN; out->len = strlen(out->seq); return out; }
Sequence * translate_swapped(Sequence * swapped) { CodonTable * ct; int i,j; Sequence * out; out = Sequence_alloc(); out->name = stringalloc(swapped->name); out->seq = calloc(1+swapped->len/3,sizeof(char)); ct = read_CodonTable_file("codon.table"); for(i=0,j=0;i<swapped->len;i+=3,j++) { out->seq[j] = aminoacid_from_seq(ct,swapped->seq+i); if( isupper(swapped->seq[i]) && isupper(swapped->seq[i+1]) && isupper(swapped->seq[i+2]) ) { out->seq[j] = toupper(out->seq[j]); } else{ out->seq[j] = tolower(out->seq[j]); } } out->seq[j] = '\0'; return out; }
ThreeStateModel * read_TSM_from_PfamHmmer1Entry(PfamHmmer1Entry * en,char * dir) { char buffer[512]; ThreeStateModel * tsm; sprintf(buffer,"%s/%s.hmm",dir,en->entryname); /* tsm = Wise2_read_ThreeStateModel_from_hmmer1_file(buffer); */ tsm = HMMer2_read_ThreeStateModel(buffer); if( tsm == NULL ) { warn("Could not open Hmmer1 style hmm from Pfam db on file [%s]",buffer); return NULL; } if( tsm->name != NULL ) { ckfree(tsm->name); } tsm->name = stringalloc(en->entryname); display_char_in_ThreeStateModel(tsm); /* ignore random stuff for the moment */ if( en->is_hmmls == FALSE ) { force_weighted_local_model(tsm,1.0,1.0,1.0); } else { force_weighted_local_model(tsm,1.0,0.5,0.5); } return tsm; }
FileSource * FileSource_from_line(char * line) { FileSource * out; char * runner; char * run2; char * run3; runner = strtok(line,spacestr); run2 = strtok(line,spacestr); run3 = strtok(line,spacestr); if( runner == NULL || run2 == NULL || run3 == NULL ) { warn("You have not provided a database source line"); return NULL; } out = FileSource_alloc(); out->filename = stringalloc(runner); if( (out->format = word_to_format(run2)) == SEQ_DB_UNKNOWN) { warn("For filename %s, the format [%s] is unknown to me",runner,run2); } return out; }
void set_log_display_string(char * str) { if( log_display_string != NULL ) { ckfree(log_display_string); } log_display_string = stringalloc(str); }
GenoVarSet * only_simple_snp_loci_GenoVarSet(GenoVarSet * gvs) { int i,j; GenoVarSet * out; GenoVarChr * chr; out = GenoVarSet_alloc_len(gvs->len); for(i=0;i<gvs->ind_len;i++) { add_ind_GenoVarSet(out,hard_link_Individual(gvs->ind[i])); } for(i=0;i<gvs->len;i++) { chr = GenoVarChr_alloc_len(gvs->chr[i]->len); chr->chr = stringalloc(gvs->chr[i]->chr); for(j=0;j<gvs->chr[i]->len;j++) { if( gvs->chr[i]->loci[j]->var->locus_type == SIMPLE_SNP_LOCUS ) { add_GenoVarChr(chr,hard_link_GenoVarLocus(gvs->chr[i]->loci[j])); } } add_GenoVarSet(out,chr); } return(out); }
boolean reconcile_FuncInfo_with_argstr(FuncInfo * fi,char * str,int pos) { char * runner; char * name; ArgInfo * temp; if( strchr(str,'(') != NULL ) return reconcile_FuncInfo_with_pfunc(fi,str,pos); for(;isspace(*str);str++) ; runner = str + strlen(str) -1; for(;runner > str && isspace(*runner);runner--) ; *(runner+1) = '\0'; for(;runner > str && !isspace(*runner);runner--) ; name = runner+1; if( strcmp(name,"void") == 0 ) return TRUE; for(;runner > str && isspace(*runner);runner--) ; *(runner+1) = '\0'; if( (temp=get_ArgInfo_by_name(fi,name)) == NULL ) { temp = ArgInfo_alloc(); add_FuncInfo(fi,temp); temp->name = stringalloc(name); temp->desc = stringalloc("Undocumented argument"); } temp->type = stringalloc(str); temp->argpos = pos; return TRUE; }
boolean reconcile_FuncInfo_with_pfunc(FuncInfo * fi,char * str,int pos) { char * runner; char * name; ArgInfo * temp; char * held; /** This is a HUGE kludge. V.v.v. embarrasing ***/ /** assumme type (*name)(type,type,type) ***/ held = stringalloc(str); name = runner = strchr(str,'('); name = runner = strchr(runner,'*'); name++; for(runner++;!isspace(*runner) && *runner != ')' ;runner++) ; *runner = '\0'; if( (temp=get_ArgInfo_by_name(fi,name)) == NULL ) { temp = ArgInfo_alloc(); add_FuncInfo(fi,temp); temp->name = stringalloc(name); temp->desc = stringalloc("Undocumented argument"); temp->func_decl = held; } else { temp->type = stringalloc(str); temp->argtype = ARGTYPE_P2FUNC; temp->argpos = pos; temp->func_decl = held; } return TRUE; }
ArgInfo * read_ArgInfo_line(char * line) { ArgInfo * out; char * runner; char * fix; out = ArgInfo_alloc(); for(runner=line;*runner && !isalpha(*runner) ;runner++) ; fix = runner; for(runner=line;*runner && iscword(*runner);runner++) ; /*** got first word ***/ *runner = '\0'; out->name = stringalloc(fix); /*** next word ***/ for(runner++;*runner && !isalpha(*runner);runner++) ; /*** if it is a valid arg type, get it and move on ***/ if( (out->argtype=get_arg_type(runner,&out->should_NULL)) != ARGTYPE_UNKNOWN) { for(;*runner && isalnum(*runner);runner++) ; for(;*runner && isspace(*runner);runner++) ; } fix = runner; for(;*runner && *runner != '\n';runner++) ; *runner = '\0'; out->desc = stringalloc(fix); return out; }
DPImplementation * new_DPImplementation_from_argstr(int * argc,char ** argv) { DPImplementation * out; char * temp; out = DPImplementation_alloc(); if( (strip_out_boolean_argument(argc,argv,"pthreads")) == TRUE ) { out->do_threads = TRUE; } if( (temp=strip_out_assigned_argument(argc,argv,"dbtrace")) != NULL ) { if( is_integer_string(temp,&out->db_trace_level) == FALSE ) { warn("%s is not an integer argument for dbtrace",temp); } } if( strip_out_boolean_argument(argc,argv,"O") == TRUE ) { out->largemem= TRUE; /* other optimisations */ } strip_out_boolean_def_argument(argc,argv,"largemem",&out->largemem); strip_out_boolean_def_argument(argc,argv,"onemodel",&out->doone); if( strip_out_boolean_argument(argc,argv,"prob") == TRUE ) { out->doprob = TRUE; } if( strip_out_boolean_argument(argc,argv,"g") == TRUE ) { out->dydebug = TRUE; } if( (temp=strip_out_assigned_argument(argc,argv,"logsum")) != NULL ) { out->calcfunc = stringalloc(temp); } else { out->calcfunc = stringalloc("Probability_logsum"); } out->dycw = new_DycWarning_from_argstr(argc,argv); /* fprintf(stderr,"And %d is extern warning",out->dycw->warn_extern);*/ return out; }
char * alloc_aminoacid_from_seq(CodonTable * ct,char * seq) { char buf[2]; buf[1] = '\0'; buf[0] = aminoacid_from_codon(ct,codon_from_seq(seq)); return stringalloc(buf); }
void copy_DataEntry(DataEntry * from,DataEntry * to) { int i; to->name = stringalloc(from->name); for(i=0;i<DATAENTRYSTDPOINTS;i++) to->data[i] = from->data[i]; to->is_reversed = from->is_reversed; to->byte_position = from->byte_position; to->filename = from->filename; /* linked! */ }
boolean dataentry_add_GenomicDB(DataEntry * de,ComplexSequence * cs,GenomicDB * gendb) { de->name = stringalloc(cs->seq->name); de->is_reversed = is_reversed_Sequence(cs->seq); if( gendb->is_single_seq ) { return TRUE; } add_SequenceDB_info_DataEntry(gendb->sdb,de); return TRUE; }
GeneModelParam * new_GeneModelParam_from_argv(int * argc,char ** argv) { GeneModelParam * out; char * temp; out = std_GeneModelParam(); if( (temp=strip_out_assigned_argument(argc,argv,"splice_min_collar")) != NULL ) { if( is_double_string(temp,&out->min_collar) == FALSE ) { warn("%s is not a floating point number. Can't be a splice_min_collar",temp); free_GeneModelParam(out); return NULL; } } strip_out_boolean_def_argument(argc,argv,"splice_gtag",&out->use_gtag_splice); if( (temp=strip_out_assigned_argument(argc,argv,"splice_max_collar")) != NULL ) { if( is_double_string(temp,&out->max_collar) == FALSE ) { warn("%s is not a floating point number. Can't be a splice_max_collar",temp); free_GeneModelParam(out); return NULL; } } if( (temp=strip_out_assigned_argument(argc,argv,"splice_score_offset")) != NULL ) { if( is_double_string(temp,&out->score_offset) == FALSE ) { warn("%s is not a floating point number. Can't be a splice_score_offset",temp); free_GeneModelParam(out); return NULL; } } if( (temp=strip_out_assigned_argument(argc,argv,"genestats")) != NULL ) { if( out->gene_stats_file != NULL ) { ckfree(out->gene_stats_file); } out->gene_stats_file = stringalloc(temp); } if( (temp=strip_out_assigned_argument(argc,argv,"splice_gtag_prob")) != NULL ) { if( is_double_string(temp,&out->prob_for_gtag) == FALSE ) { warn("%s is not a floating pointer number. Can't be a probability for gtag",temp); free_GeneModelParam(out); return NULL; } } return out; }
ComplexConsensusWord * ComplexConsensusWord_from_string_and_prob(char * string,Probability p) { ComplexConsensusWord * out; out = ComplexConsensusWord_alloc(); out->pattern = stringalloc(string); out->p = p; out->score = Probability2Score(p); return out; }
MethodTypeSet * standard_dynamite_MethodTypeSet(void) { MethodTypeSet * mts; Type * temp; mts = empty_MethodTypeSet(); temp = Type_alloc(); temp->logical=stringalloc("int"); temp->real=stringalloc("int"); add_ty_MethodTypeSet(mts,temp); temp = Type_alloc(); temp->logical=stringalloc("double"); temp->real=stringalloc("double"); add_ty_MethodTypeSet(mts,temp); temp = Type_alloc(); temp->logical=stringalloc("Score"); temp->real=stringalloc("Score"); add_ty_MethodTypeSet(mts,temp); return mts; }
boolean dataentry_add_cDNADB(DataEntry * de,ComplexSequence * cs,cDNADB * cdnadb) { if( cs == NULL || cs->seq == NULL ) { warn("Adding a dataentry with a NULL complex sequence or null internal sequence. Nope!"); return FALSE; } if( cdnadb->is_single_seq == FALSE) add_SequenceDB_info_DataEntry(cdnadb->sdb,de); de->name = stringalloc(cs->seq->name); de->is_reversed = is_reversed_Sequence(cs->seq); return TRUE; }
TransferedFunctionCall * new_hspscan_protein_TransferedFunctionCall(void) { TransferedFunctionCall * out; out = TransferedFunctionCall_alloc_std(); out->name = stringalloc("hspscan_protein"); out->returned_type = LinearHSPmanager_TransferedObjectMarshaller(); add_TransferedFunctionCall(out,Sequence_TransferedObjectMarshaller()); add_TransferedFunctionCall(out,HSPScanInterfacePara_TransferedObjectMarshaller()); return out; }
RandomCodonScore * RandomCodonScore_from_RandomCodon(RandomCodon * rc) { RandomCodonScore * out; out = RandomCodonScore_alloc(); Probability2Score_move(rc->codon,out->codon,126); if( rc-> name != NULL) out->name = stringalloc(rc->name); return out; }
PhasedProteinPara * new_PhasedProteinPara_from_argv(int * argc,char ** argv) { PhasedProteinPara * out; char * temp; out = PhasedProteinPara_alloc(); out->marked_intron = 0.95; out->unmarked_intron = 0.00001; out->use_phase = 0; strip_out_float_argument(argc,argv,"phase_marked",&out->marked_intron); strip_out_float_argument(argc,argv,"phase_unmarked",&out->unmarked_intron); /* strip_out_boolean_def_argument(argc,argv,"phase_model",&out->use_phase); */ if( (temp = strip_out_assigned_argument(argc,argv,"phase_file")) != NULL ) { out->intron_file = stringalloc(temp); } if( strip_out_boolean_argument(argc,argv,"phase_help") == TRUE ) { fprintf(stdout,"Phased marks provide the ability to restrict the position of introns\n"); fprintf(stdout,"relative to the protein sequence; ie, assuming conserved introns. This\n"); fprintf(stdout,"is most useful for fast evolving genes inside of relatively consistent\n"); fprintf(stdout,"clades, eg for fast evolving genes, such as cytokines, in vertebrates\n"); fprintf(stdout,"As moving between clades - say between Human and Drosophila - the intron\n"); fprintf(stdout,"positions change, using these options would actively hinder good gene prediction\n"); fprintf(stdout,"\n"); fprintf(stdout,"This option can be used for either HMMs or proteins, although it is harder\n"); fprintf(stdout,"to coordinate the HMM intron position than the protein positions.\n"); fprintf(stdout,"Two things need to occur to use the phase information\n"); fprintf(stdout," provide a phase mark file as -phase_file <xxxxxx>\n"); fprintf(stdout," use the algorithm type 623P (6 states, 23 transitions, phased introns)\n"); fprintf(stdout,"\n"); fprintf(stdout,"The phase model attempts to make a ATG to STOP gene, even if the protein match\n"); fprintf(stdout,"is not present across the entire gene. One major headache in this are introns in first\n"); fprintf(stdout,"ATG, which is not handled at the moment\n\n"); fprintf(stdout,"Genewise uses the protein position, in 1 coordinates, (first amino acid is 1)\n"); fprintf(stdout,"for the definition of the intron. For phase 0 introns, it should be labeled as\n"); fprintf(stdout,"the amino acid before the intron. For phase 1 and 2 introns, this is on the intron\n\n"); fprintf(stdout,"We suggest using a small spread of positions to cope with intron positioning errors\n"); fprintf(stdout," eg, defining an intron at position 4, phase 0, make postions 3,4 and 5 with position 0\n\n"); fprintf(stdout,"The phase file format is\n"); fprintf(stdout,"# lines starting with hash are comments\n"); fprintf(stdout,"# three tab delimited columns\n"); fprintf(stdout,"# <protein-position> <phase>\n"); fprintf(stdout,"# eg\n"); fprintf(stdout,"4 0\n"); exit(0); } return out; }
char * parse_and_get_module_name_from_func(char * line,boolean isalloc) { char * name; char * next; char * func; char buffer[128]; /** max function name! **/ name = strtok(line," \t("); if( name == NULL ) { warn("Cannot even get first name from line [%s] in parse module_name_alloc",line); return NULL; } if( *(name + strlen(name) - 1) == '*' ) next = name + strlen(name) -1; else { next = strtok(NULL," \t("); if ( next == NULL ) { warn("Cannot get pointer ref from line [%s] in parse module_name_alloc [name %s]",line,name); return NULL; } } func = strtok(NULL," \t("); if( name == NULL ) { warn("Cannot get function from line [%s] in parse module_name_alloc [name %s]",line,name); return NULL; } if( strlen(next) > 1 || *next != '*' ) { warn("In parse_module_name, the pointer string [%s] was invalid for name [%s]",next,name); return NULL; } if( isalloc == TRUE ) { sprintf(buffer,"%s_alloc",name); if( strcmp(buffer,func) != 0 ) { warn("In parse_module_name, the function [%s] did not match the type-proto [%s]",func,buffer); return NULL; } } else { sprintf(buffer,"free_%s",name); if( strcmp(buffer,func) != 0 ) { warn("In parse_module_name, the function [%s] did not match the type-proto [%s]",func,buffer); return NULL; } } return stringalloc(name); }
TransferedFunctionCall * test_stringcat_TransferedFunctionCall(void) { TransferedFunctionCall * out; out = TransferedFunctionCall_alloc_std(); out->name = stringalloc("stringcat"); out->returned_type = new_string_Marshaller(); add_TransferedFunctionCall(out,new_string_Marshaller()); add_TransferedFunctionCall(out,new_string_Marshaller()); return out; }
ExprTree * new_ExprTree_token(char t) { ExprTree * out; char buf[2]; buf[0] = t; buf[1] = '\0'; out= new_ExprTree(); out->type= ETR_OPERATOR; out->token = t; out->word = stringalloc(buf); return out; }
SequenceDB * read_SequenceDB_line(char * line,FILE * ifp) { SequenceDB * out = NULL; FileSource * fs; char buffer[MAXLINE]; char * runner; if( strstartcmp(line,"seqdb") != 0 ) { warn("Attempting to read a sequence line without a seqdb start"); return NULL; } runner = strtok(line,spacestr); runner = strtok(line,spacestr); if( runner == NULL ) { out->name = stringalloc("UnNamedDatabase"); } else out->name = stringalloc(runner); out = SequenceDB_alloc_std(); while( fgets(buffer,MAXLINE,ifp) != NULL ){ if( strstartcmp(buffer,"#") == 0 ) continue; if( strstartcmp(buffer,"end") == 0 ) break; fs = FileSource_from_line(buffer); if( fs != NULL ) add_SequenceDB(out,fs); } return out; }
ModuleFunction * new_ModuleFunction(ModuleFunctionList * mfl,char * name) { ModuleFunction * out; out = ModuleFunction_alloc(); if( out == NULL ) return out; out->name = stringalloc(name); add_ModuleFunctionList(mfl,out); return out; }