Ejemplo n.º 1
0
//process input into three profiles Hs,Hu,Hn
//s = selected profiles, u = unselected in sample,n=in native only
//connects them, puts Hs in cluster with freq = 0 if not already there
int process_one_input(FILE *fp) {
  HASHTBL *halfbrac;
  FILE *file;
  char temp[100],tmp[ARRAYSIZE],*profile,*fullprofile,*diff,*native,*diffn,*dup;
  int i,j,k,*id,last=0,insample;
  int numhelix = 0,fullnum = 0,natnum = 0;
  int size = INIT_SIZE,size2 = INIT_SIZE,size3 = INIT_SIZE,size4 = INIT_SIZE,size5 = INIT_SIZE;
  
  if (!(halfbrac = hashtbl_create(HASHSIZE,NULL))) {
    fprintf(stderr, "ERROR: hashtbl_create() for halfbrac failed");
    exit(EXIT_FAILURE);
  }
  if (!(input = hashtbl_create(HASHSIZE,NULL))) {
    fprintf(stderr, "ERROR: hashtbl_create() for input failed");
    exit(EXIT_FAILURE);
  }
  //longest = hashtbl_get(max,"longest");
  profile = malloc(sizeof(char)*ARRAYSIZE*size);
  fullprofile = malloc(sizeof(char)*ARRAYSIZE*size2);
  diff = malloc(sizeof(char)*ARRAYSIZE*size3);
  native = malloc(sizeof(char)*ARRAYSIZE*size4);
  diffn = malloc(sizeof(char)*ARRAYSIZE*size5);
  profile[0] = '\0';
  fullprofile[0] = '\0';
  diff[0] = '\0';
  native[0] = '\0';
  diffn[0] = '\0';
  if (!(file = fopen(INPUT,"r")))
    fprintf(stderr,"Cannot open %s\n",INPUT);
  while (fgets(temp,100,file)) {
    //    if (sscanf(temp,"Structure %d (%d)",&i,&prob) == 2) 
    if (sscanf(temp,"%d %d %d",&i,&j,&k) == 3) {
      insample = 1;
      sprintf(tmp,"%d %d",i,j);
      id = hashtbl_get(bp,tmp);
      if (!id) {
	id = process_native(i,j,k);
	//printf("number in marginals %d\n",hashtbl_numkeys(marginals));
	if (*id > hashtbl_numkeys(marginals))
	  insample = 0;
      }
      if (*id != last) {
	sprintf(tmp,"%d",*id);
	if (strlen(native)+strlen(tmp) > (ARRAYSIZE*size4-2))
	  native = realloc(native,ARRAYSIZE*(++size4));
	sprintf(native,"%s%s ",native,tmp);
	natnum++;
	if (insample) {
	  fullnum++;
	  if (strlen(fullprofile)+strlen(tmp) > (ARRAYSIZE*size2-2)) 
	    fullprofile = realloc(fullprofile,ARRAYSIZE*(++size2));
	  sprintf(fullprofile,"%s%s ",fullprofile,tmp);
	  if (hashtbl_get(freq,tmp)) {   //is a freq helix, save to profile
	    numhelix++;
	    if (strlen(profile)+strlen(tmp) > (ARRAYSIZE*size-2)) 
	      profile = realloc(profile,ARRAYSIZE*(++size));
	    //printf("adding %d to profile\n",*id);
	    sprintf(profile,"%s%s ",profile,tmp);
	  } 
	  else { //if not freq record diff
	    if (strlen(diff)+strlen(tmp)+2 > ARRAYSIZE*size3)
	      diff = realloc(diff,ARRAYSIZE*(++size3));
	    sprintf(diff,"%s%s ",diff,tmp);	  
	    //printf("printing diff %s\n",diff);
	  }
	} 
	else {
	  if (strlen(diffn)+strlen(tmp)+2 > ARRAYSIZE*size5)
	    diffn = realloc(diffn,ARRAYSIZE*(++size5));
	  sprintf(diffn,"%s%s ",diffn,tmp);
	}	
	last = *id;
	make_brackets(halfbrac,i,j,*id);
      }
      else if (VERBOSE)
	printf("helix %d %d %d is duplicate with id %d: process_input()\n",i,j,k,*id);
    }
  }
  
  native = sort_input(native,natnum);
  //printf("native is now %s\n",native);
  make_bracket_rep(halfbrac,native);
  hashtbl_destroy(halfbrac);

  fullprofile = sort_input(fullprofile,fullnum);
  profile = sort_input(profile,numhelix);
  //printf("native %s, fullprofile %s, profile %s, diff %s, diffn %s\n",native,fullprofile,profile,diff,diffn);
  if (fullnum != natnum)
    make_edge_and_node(fp,fullprofile,native,diffn,natnum);
  if (numhelix != fullnum)
    make_edge_and_node(fp,profile,fullprofile,diff,fullnum);

  fprintf(fp,"\"%s\" [style = filled, fillcolor = gray60];\n",profile);  
  sprintf(tmp,"%d ",numhelix);
  if (strlen(tmp)+strlen(profile) > ARRAYSIZE*size+1)
    profile = realloc(profile,ARRAYSIZE*(++size));
  dup = mystrdup(profile);
  sprintf(profile,"%s%s",tmp,dup);
  id = hashtbl_get(cluster,profile);
  free(dup);
  
  if (!id) {			    
    id = malloc(sizeof(int));
    *id = 0;
    hashtbl_insert(cluster,profile,id);
    //printf("inserting input %s into cluster\n",profile);
  }
  
  free(profile);
  return numhelix;
}
Ejemplo n.º 2
0
//takes input profile and checks if in graph
//if so, changes node shape to hexagon
//if not, insert new vertex
HASHTBL* process_input_profile(FILE *fp,HASHTBL *brac,char *fullprofile, int fullnum,char *profile,int numhelix, char *diff, int prob) {
  HASHTBL *hash, *temp=NULL;
  char *diff1,*bracket,*difftrip;
  int *val,k1=0,k2=0;
  KEY *parent,*next;

  if (!(temp = hashtbl_create(HASHSIZE,NULL))) {
    fprintf(stderr, "ERROR: hashtbl_create() failed");
    exit(EXIT_FAILURE);
  }

  make_bracket_rep(brac,fullprofile);
  hashtbl_destroy(brac);
  
  profile = sort_input(profile,numhelix);
  //printf("(sorted) profile is %s with fullprofile %s and diff %s\n",profile,fullprofile,diff);

  if ((hash = graph[numhelix-1]) && (hashtbl_get(hash,profile))) {
    if (numhelix == fullnum) {
      //printf("case 1: full profile found in graph\n");
      fullprofile = sort_input(fullprofile,numhelix);
    } else {
    //cannot use find_diff because fullprofile has helices not in table[]
      //puts("case 2: profile found in graph");
      diff = insert_diff(temp,diff);
      bracket = edge_label(temp,profile,fullprofile,fullnum);      
      fprintf(fp,"\"%s\"-> \"%s\" [label =\" %s\\n%s \",fontsize=8,style = dotted];\n",profile,fullprofile,diff,bracket); 
    }
  }
  else {
    /*          
    if (numhelix == fullnum)
      puts("case 3: full profile not found");
    else
      puts("case 4: profile not found");
    */
    for (parent = find_parents(profile); parent; parent = next) {
      diff1 = find_diff(temp,parent->data,profile,&k1,&k2);      
      if (numhelix != fullnum) {
	difftrip = insert_diff(temp,diff);
	diff1 = realloc(diff1,strlen(diff1)+strlen(difftrip)+4);
	//printf("for parent %s, diff1 is now %s, diff is %s and difftrip is %s\n",parent->data,diff1,diff,difftrip);
	sprintf(diff1,"%s\\n%s",diff1,difftrip);
	//printf("Diff is %s for parent %s of profile %s; diff %s for %s\n",diff1,parent->data,profile,difftrip,fullprofile);	
      }
      bracket = edge_label(temp,parent->data,fullprofile,fullnum);
      fprintf(fp,"\"%s\"-> \"%s\" [label =\" %s\\n%s \",fontsize=8,style = dotted];\n",parent->data,fullprofile,bracket,diff1); 
      next = parent->next;
      free(parent);
    }
  }
  //printf("%s has size %d and prob %d\n",fullprofile,fullnum,prob);
  fprintf(fp,"\"%s\" [shape = hexagon];\n",fullprofile);
  val = malloc(sizeof(int)*2);
  val[0] = fullnum;
  val[1] = prob;
  hashtbl_insert(input,fullprofile,val);
  hashtbl_destroy(temp);  

  if (!(brac = hashtbl_create(HASHSIZE,NULL))) {
    fprintf(stderr, "ERROR: hashtbl_create() failed");
    exit(EXIT_FAILURE);
  }
  return brac;
}
extern input_list* unflatten_area(input_list *entry, int entry_number)
{
   input_list *pred = NULL, *input, *new_input;
   chain_list* abl;
   cell_list* cell;
   note_list* note, *all_note=NULL, *best_note;
   int cell_source_positiv, neg_cell_source_positiv;   /*flag*/

   if (!entry) {
      fprintf(stderr,"unflatten_area: no input\n");
      exit(1);
   }
   
   /*test all the simple logic library*/
   for (cell=getcell_oper_lib(); cell; cell=cell->NEXT) {
      all_note=eval_note(all_note, cell, entry, entry_number);
   }
      
   /*no result pattern, impossible to map, signal it to caller function*/
   if (!all_note) return entry;

   /*take the best for a positiv and negativ results*/
   best_note=all_note;
   best_note->AVERAGE_COST=(best_note->COST+best_note->NEG_COST)/2;
   for (note=all_note->NEXT; note; note=note->NEXT) {
      note->AVERAGE_COST=(note->COST+note->NEG_COST)/2;
      if (note->AVERAGE_COST<best_note->AVERAGE_COST) best_note=note;
      else if (note->AVERAGE_COST==best_note->AVERAGE_COST 
         && note->ARITY>best_note->ARITY) best_note=note;
      else if (note->AVERAGE_COST==best_note->AVERAGE_COST 
         && note->ARITY==best_note->ARITY
         && note->DELAY+note->NEG_DELAY<best_note->DELAY+best_note->NEG_DELAY)
            best_note=note;
   }

   /*one of those cells are missing, impossible to unflat*/
   if (!best_note->CELL || !best_note->NEG_CELL) {
      fprintf(stderr,
      "unflatten_area: opposite cell of %s is missing (%d inputs)\n",
      best_note->CELL?best_note->CELL->NAME:best_note->NEG_CELL->NAME,
      entry_number);
      exit(1);
   }   

   /*build new input*/
   new_input=newinput();
   new_input->DELAY=best_note->DELAY;
   new_input->NEG_DELAY=best_note->NEG_DELAY;
   new_input->R=best_note->R;
   new_input->NEG_R=best_note->NEG_R;
   new_input->ABL=createabloper(ABL_OPER(best_note->CELL->ABL));
   new_input->NEG_ABL=createabloper(ABL_OPER(best_note->NEG_CELL->ABL));
   cell_source_positiv=is_source_positiv(ABL_OPER(best_note->CELL->ABL));
   neg_cell_source_positiv=is_source_positiv(
                                         ABL_OPER(best_note->NEG_CELL->ABL));   /*put operands*/
   for (input=entry; input; input=input->NEXT) {
      if (best_note->ARITY==0) break;
      best_note->ARITY--;
      /*positiv*/
      if (cell_source_positiv) {
         abl=input->ABL;
         input->ABL=NULL; /* protect from freeinput() */
      }
      else {   
         abl=input->NEG_ABL;
         input->NEG_ABL=NULL; /* protect from freeinput() */
      }
      ABL_CDR(new_input->ABL)=addchain(ABL_CDR(new_input->ABL),abl);
      /*opposite*/
      if (neg_cell_source_positiv) {
         if (input->ABL) abl=input->ABL;
         else abl=dupablexpr(abl);     /*already used above*/
         input->ABL=NULL; /* protect from freeinput() */
      }
      else {   
         if (input->NEG_ABL) abl=input->NEG_ABL;
         else abl=dupablexpr(abl);     /*already used above*/
         input->NEG_ABL=NULL; /* protect from freeinput() */
      }
      ABL_CDR(new_input->NEG_ABL)=addchain(ABL_CDR(new_input->NEG_ABL),abl);

      pred=input;
   }
   
   pred->NEXT=NULL;
   freeinput(entry);
   freenote(all_note);
   
   return sort_input(input,new_input);      /*return a sort list*/
}