Пример #1
0
Sequence * declare_sequence ( int min, int max, int nseq)
    {
    Sequence *LS;



    LS=vcalloc (1, sizeof ( Sequence));

    LS->seq_comment=declare_char ( nseq,COMMENT_SIZE);
    LS->aln_comment=declare_char ( nseq,COMMENT_SIZE);

    LS->file=declare_char( nseq,STRING+1);
    LS->seq=declare_char ( nseq, max+1);
    LS->name=declare_char( nseq,MAXNAMES+1);

    LS->len=vcalloc ( nseq, sizeof (int));
    LS->max_len=max;
    LS->min_len=min;
    LS->nseq=nseq;
    LS->max_nseq=nseq;
    LS->type=vcalloc(30, sizeof (char));
    LS->T=declare_arrayN(2, sizeof (Template), nseq, 1);


    LS->dc=declare_int (nseq, 2);
    return LS;
    }
Пример #2
0
Oligo * read_oligo_list ( char *fname)
    {
    Oligo *O;
    FILE *fp;
    int a, b;
    

    
    O=vcalloc (1, sizeof (Oligo));
    O->ALPHABET=vcalloc ( 100, sizeof (char));
    O->EALPHABET=vcalloc ( 100, sizeof (char));
    O->AMBIGUITIES=vcalloc ( 100, sizeof (char));
    
    fp=vfopen ( fname, "r");
    fscanf ( fp, "ALPHABET %s\n", O->ALPHABET);
    fscanf ( fp, "AMBIG_ALPHABET %s\n", O->AMBIGUITIES);
    if ( O->AMBIGUITIES[0]=='@')O->AMBIGUITIES[0]='\0';
    fscanf ( fp, "WORD_SIZE %d\n", &O->WSIZE);
    fscanf ( fp, "NSEQ %d\n", &O->NSEQ);
    fscanf ( fp, "LEN %d\n", &O->LEN);
    fscanf ( fp, "SCORE %d", &a);
    sprintf ( O->EALPHABET, "%s%s", O->ALPHABET, O->AMBIGUITIES);
   
    O->seq=declare_char ( O->NSEQ, O->LEN+1);
    for ( a=0; a< O->NSEQ; a++)
	{
	fscanf ( fp, "%*s\n%s\n",O->seq[a]);
	}
    vfclose (fp);
    return O;
    }
Пример #3
0
Constraint_list *prepare_cl_for_moca ( Constraint_list *CL)
{
    int a, b, c;
    int tot_l, l;
    char **name, **seq;
    Sequence *NS=NULL;

    /*Prepare the constraint list*/
    CL->do_self=1;
    CL->get_dp_cost=moca_slow_get_dp_cost;
    CL->evaluate_residue_pair=moca_residue_pair_extended_list;

    /*Prepare the moca parameters*/
    (CL->moca)->evaluate_domain=evaluate_moca_domain;
    (CL->moca)->cache_cl_with_domain=cache_cl_with_moca_domain;
    (CL->moca)->make_nol_aln=make_moca_nol_aln;

    /*Prepare the packing of the sequences*/
    for ( a=0, b=1; a< (CL->S)->nseq; a++)b+=strlen ( (CL->S)->seq[a])+1;

    seq =declare_char ( 1,b+1);
    name=declare_char(  1,30);
    CL->packed_seq_lu  =declare_int ( b, 2);


    for (tot_l=1,a=0; a< (CL->S)->nseq; a++)
    {
        strcat (seq[0], (CL->S)->seq[a]);
        strcat (seq[0], "X");
        l=strlen((CL->S)->seq[a]);
        for ( c=1; c<= l; c++, tot_l++)
        {
            CL->packed_seq_lu[tot_l][0]=a;
            CL->packed_seq_lu[tot_l][1]=c;
        }
        CL->packed_seq_lu[tot_l++][0]=UNDEFINED;
    }
    sprintf ( name[0], "catseq");
    NS=fill_sequence_struc(1, seq, name, NULL);
    CL->S=add_sequence (NS, CL->S, 0);
    free_char( seq, -1);
    free_char(name, -1);
    free_sequence (NS, NS->nseq);


    return CL;
}
Пример #4
0
int *code_seq (char *seq, char *type)
{
  static int *code;
  static int *aa, ng;
  int a, b, l;


  if (!aa)
    {
      char **gl;
      if ( strm (type, "DNA") || strm (type, "RNA"))
	{
	  gl=declare_char (4,5);
	  sprintf ( gl[ng++], "Aa");
	  sprintf ( gl[ng++], "Gg");
	  sprintf ( gl[ng++], "TtUu");
	  sprintf ( gl[ng++], "Cc");
	}
      else
	{

	  gl=make_group_aa ( &ng, "mafft");
	}
      aa=(int*)vcalloc ( 256, sizeof (int));
      for ( a=0; a<ng; a++)
	{
	  for ( b=0; b< strlen (gl[a]); b++)
	    {
	      aa[(int)gl[a][b]]=a;
	    }
	}
      free_char (gl, -1);
    }


  l=strlen (seq);

  if ( code) code--;

  if ( !code || read_array_size (code, sizeof (int))<(l+2))
    {
      vfree (code);
      code=(int*)vcalloc (l+2, sizeof (int));
    }
  code[0]=ng;
  code++;
  for (a=0; a<l; a++)
    {
      code[a]=aa[(int)seq[a]];
    }

  code[a]=END_ARRAY;
  return code;
}
Пример #5
0
Alignment *declare_Alignment ( Sequence *S)
	{
	Alignment *LA;
	int a;

	/*ordre:
	  [x][0]= which is the xth seq of aln
	  [x][1]= how many deleted residues before the first one
	*/


	LA=vcalloc (1, sizeof ( Alignment));
	aln_stack (LA, DECLARE_ALN);
	if ( S==NULL)
	    {
	      LA->declared_len=MAX_LEN_ALN;
	      LA->max_n_seq=MAX_N_SEQ;
	    }
	else
	  {
	    LA->declared_len=2*S->max_len+1;
	    LA->max_n_seq=S->nseq+1;
	  }
	LA->S=S;


	LA->seq_comment=declare_char (LA->max_n_seq, COMMENT_SIZE);
	LA->aln_comment=declare_char (LA->max_n_seq, COMMENT_SIZE);


	LA->seq_al=declare_char ( LA->max_n_seq,LA->declared_len );
	LA->name=declare_char (LA->max_n_seq, MAXNAMES+1);


	LA->file=declare_char (LA->max_n_seq, STRING);
	LA->tree_order=declare_char (LA->max_n_seq, STRING);
	LA->order= declare_int (LA->max_n_seq , 5);
	//order[a][0]: sequence index in S
	//order[a][1]: offset of the sequence
	//order[a][2]: used by sw_gotoh_pair_wise
	//order[a][3]: used by sw_gotoh_pair_wise
	//order[a][4]: weight, -1
	LA->score_seq= vcalloc (LA->max_n_seq, sizeof (int));

	for ( a=0; a< LA->max_n_seq; a++)LA->order[a][0]=a;

	LA->len_aln=0;
	LA->score_aln=0;
	LA->len=vcalloc (LA->max_n_seq, sizeof (int));

	if (S && S->name)for ( a=0; a<S->nseq; a++)
	  {
	    sprintf ( LA->name[a], "%s", S->name[a]);

	  }

	return LA;

	}
Пример #6
0
Weights* declare_weights ( int nseq)
	{
	Weights *W;

	W=vcalloc ( 1, sizeof ( Weights));
	W->comments=vcalloc ( 1000, sizeof (char));
	W->nseq=nseq;
	W->mode=vcalloc (FILENAMELEN, sizeof (char));
	W->seq_name= declare_char ( W->nseq*2, 200);
	W->PW_SD=declare_float ( W->nseq, W->nseq);
	W->PW_ID=declare_float ( W->nseq, W->nseq);
	W->SEQ_W=vcalloc ( W->nseq, sizeof ( float));
	return W;
	}
Пример #7
0
Constraint_list * declare_constraint_list ( Sequence *S, char *name, int *L, int ne,FILE *fp, int **M)
    {
    Constraint_list *CL;

    CL=vcalloc (1, sizeof ( Constraint_list));


    CL->S=S;
    CL->M=M;

    if ( name!=NULL)
	{
	sprintf ( CL->list_name, "%s", name);

	}
    CL->cpu=1;
    CL->fp=fp;
    if (L)
      {
	HERE ("The USE of L is now Deprecated with Constraint Lists");
	exit (0);
      }
    CL->ne=ne;
    CL->entry_len=LIST_N_FIELDS;
    CL->el_size=sizeof (CLIST_TYPE);
    CL->matrices_list=declare_char(20,20);


    CL->weight_field=WE;
    if ( S)CL->seq_for_quadruplet=vcalloc ( S->nseq, sizeof (int));
    CL->Prot_Blast=vcalloc ( 1, sizeof ( Blast_param));
    CL->DNA_Blast=vcalloc ( 1, sizeof ( Blast_param));
    CL->Pdb_Blast=vcalloc ( 1, sizeof ( Blast_param));
    CL->TC=vcalloc (1, sizeof (TC_param));

    //New data structure
    CL->residue_index=declare_residue_index (S);



    return CL;
    }
Dp_Model * initialize_sseq_model(int left_tg_mode, int right_tg_mode, Constraint_list *CL)
  {
    
    Dp_Model *M;
    int a, b, c,d;
    int Sa,Sb,St, Da, Db, Dt, Ia, Ib, It;
    int tgop=CL->gep*3;
    

    
    
    M=vcalloc ( 1, sizeof (Dp_Model));
    
    M->nstate=9;
    M->START=M->nstate++;
    M->END  =M->nstate++;
    
    M->model_comments=declare_char (M->nstate+1, 100);
    M->bounded_model=declare_int (M->nstate+1, M->nstate+1); 
    M->model=declare_int (M->nstate+1, M->nstate+1); 
    for ( a=0; a<=M->nstate; a++)
      for ( b=0; b<= M->nstate; b++)
	M->model[a][b]=UNDEFINED;
    
    
    M->model_properties=declare_int ( M->nstate, 10); 
    
    a=0;     
    M->TYPE=a++;M->LEN_I=a++; M->LEN_J=a++; M->DELTA_I=a++;M->DELTA_J=a++;M->EMISSION=a++;M->TERM_EMISSION=a++;M->START_EMISSION=a++;
    M->CODING0=a++;M->DELETION=a++;
    M->model_properties=declare_int ( M->nstate, 10); 

    a=0;
    M->EMISSION=a++;M->TERM_EMISSION=a++;M->START_EMISSION=a++;
    M->model_emission_function=vcalloc(M->nstate, sizeof (int (**)(Alignment*, int **, int, int*, int, int **, int, int*, int, struct Constraint_list *)));
    for ( a=0; a< M->nstate; a++)
       M->model_emission_function[a]=vcalloc(3, sizeof (int (*)(Alignment*, int **, int, int*, int, int **, int, int*, int, struct Constraint_list *)));
    

        
    a=0;
    
    Sa=a++;Da=a++;Ia=a++;
    Sb=a++;Db=a++;Ib=a++;
    St=a++;Dt=a++;It=a++;
   

    sprintf ( M->model_comments[M->START], "START");
    sprintf ( M->model_comments[M->END], "END");
	      
    /*ALPHA*/
    /*Substitution in Alpha*/
    if (CL->matrices_list[0][0])sprintf ( M->model_comments[Sa], "Substitution %s", CL->matrices_list[0]);
    M->model_properties[Sa][M->TYPE]=Sa;
    M->model_properties[Sa][M->LEN_I]=1;
    M->model_properties[Sa][M->LEN_J]=1;
    M->model_properties[Sa][M->DELTA_I]=-1;
    M->model_properties[Sa][M->DELTA_J]= 0;	

    M->model_emission_function[Sa][M->EMISSION]      =get_alpha_sub_cost;
    M->model_emission_function[Sa][M->START_EMISSION]=get_ssec_no_cost;
    M->model_emission_function[Sa][M->TERM_EMISSION] =get_ssec_no_cost;
   
    /*Deletions*/       
    if (CL->matrices_list[0][0])sprintf ( M->model_comments[Da], "Deletion %s", CL->matrices_list[0]);
    M->model_properties[Da][M->TYPE]=Da;
    M->model_properties[Da][M->LEN_I]=1;
    M->model_properties[Da][M->LEN_J]=0;
    M->model_properties[Da][M->DELTA_I]=-1;
    M->model_properties[Da][M->DELTA_J]=+1;

    
    M->model_emission_function[Da][M->EMISSION]      =get_alpha_gep_cost;
    M->model_emission_function[Da][M->START_EMISSION]=get_alpha_start_gep_cost;
    M->model_emission_function[Da][M->TERM_EMISSION] =get_alpha_term_gep_cost;

        
    /*Insertion*/
    if (CL->matrices_list[0][0])sprintf ( M->model_comments[Ia], "Insertion %s", CL->matrices_list[0]);
    M->model_properties[Ia][M->TYPE]=Ia;
    M->model_properties[Ia][M->LEN_I]=0;
    M->model_properties[Ia][M->LEN_J]=1;
    M->model_properties[Ia][M->DELTA_I]=0;
    M->model_properties[Ia][M->DELTA_J]=-1;
    
    M->model_emission_function[Ia][M->EMISSION]      =get_alpha_gep_cost;
    M->model_emission_function[Ia][M->START_EMISSION]=get_alpha_start_gep_cost;
    M->model_emission_function[Ia][M->TERM_EMISSION] =get_alpha_term_gep_cost;
    
/*BETA*/
    /*Substitution in Beta*/
    if (CL->matrices_list[1][0])sprintf ( M->model_comments[Sb], "Substitution %s", CL->matrices_list[1]);
    M->model_properties[Sb][M->TYPE]=Sb;
    M->model_properties[Sb][M->LEN_I]=1;
    M->model_properties[Sb][M->LEN_J]=1;
    M->model_properties[Sb][M->DELTA_I]=-1;
    M->model_properties[Sb][M->DELTA_J]= 0;	
    
    M->model_emission_function[Sb][M->EMISSION]      =get_beta_sub_cost;
    M->model_emission_function[Sb][M->START_EMISSION]=get_ssec_no_cost;
    M->model_emission_function[Sb][M->TERM_EMISSION] =get_ssec_no_cost;
    
   
    /*Deletions*/       
    if (CL->matrices_list[1][0])sprintf ( M->model_comments[Db], "Deletion %s", CL->matrices_list[1]);
    M->model_properties[Db][M->TYPE]=Db;
    M->model_properties[Db][M->LEN_I]=1;
    M->model_properties[Db][M->LEN_J]=0;
    M->model_properties[Db][M->DELTA_I]=-1;
    M->model_properties[Db][M->DELTA_J]=+1;
    
    M->model_emission_function[Db][M->EMISSION]      =get_beta_gep_cost;
    M->model_emission_function[Db][M->START_EMISSION]=get_beta_start_gep_cost;
    M->model_emission_function[Db][M->TERM_EMISSION] =get_beta_term_gep_cost;
    
    
    /*Insertion*/
    
    if (CL->matrices_list[1][0])sprintf ( M->model_comments[Ib], "Insertion %s", CL->matrices_list[1]);
    M->model_properties[Ib][M->TYPE]=Ib;
    M->model_properties[Ib][M->LEN_I]=0;
    M->model_properties[Ib][M->LEN_J]=1;
    M->model_properties[Ib][M->DELTA_I]=0;
    M->model_properties[Ib][M->DELTA_J]=-1;

    
    
    M->model_emission_function[Ib][M->EMISSION]      =get_beta_gep_cost;
    M->model_emission_function[Ib][M->START_EMISSION]=get_beta_start_gep_cost;
    M->model_emission_function[Ib][M->TERM_EMISSION] =get_beta_term_gep_cost;
    
 /*TURNS*/
    /*Substitution in Turn*/
    if (CL->matrices_list[2][0])sprintf ( M->model_comments[St], "Substitution %s", CL->matrices_list[2]);
    M->model_properties[St][M->TYPE]=St;
    M->model_properties[St][M->LEN_I]=1;
    M->model_properties[St][M->LEN_J]=1;
    M->model_properties[St][M->DELTA_I]=-1;
    M->model_properties[St][M->DELTA_J]= 0;
	
    M->model_emission_function[St][M->EMISSION]      =get_turn_sub_cost;
    M->model_emission_function[St][M->START_EMISSION]=get_ssec_no_cost;
    M->model_emission_function[St][M->TERM_EMISSION] =get_ssec_no_cost;
    
   
    /*Deletions*/       
    if (CL->matrices_list[2][0])sprintf ( M->model_comments[Dt], "Deletion %s", CL->matrices_list[2]);
    M->model_properties[Dt][M->TYPE]=Dt;
    M->model_properties[Dt][M->LEN_I]=1;
    M->model_properties[Dt][M->LEN_J]=0;
    M->model_properties[Dt][M->DELTA_I]=-1;
    M->model_properties[Dt][M->DELTA_J]=+1;
    
    M->model_emission_function[Dt][M->EMISSION]      =get_turn_gep_cost;
    M->model_emission_function[Dt][M->START_EMISSION]=get_turn_start_gep_cost;
    M->model_emission_function[Dt][M->TERM_EMISSION] =get_turn_term_gep_cost;
    /*Insertion*/
    if (CL->matrices_list[2][0])sprintf ( M->model_comments[It], "Insertion %s", CL->matrices_list[2]);
    M->model_properties[It][M->TYPE]=It;
    M->model_properties[It][M->LEN_I]=0;
    M->model_properties[It][M->LEN_J]=1;
    M->model_properties[It][M->DELTA_I]=0;
    M->model_properties[It][M->DELTA_J]=-1;

    M->model_emission_function[It][M->EMISSION]      =get_turn_gep_cost;
    M->model_emission_function[It][M->START_EMISSION]=get_turn_start_gep_cost;
    M->model_emission_function[It][M->TERM_EMISSION] =get_turn_term_gep_cost;


/*Transitions*/

    M->model[M->START][Sa]=ALLOWED;
    M->model[M->START][Sb]=ALLOWED;
    M->model[M->START][St]=ALLOWED;   
    M->model[M->START][Db]=M->model[M->START][Ib]=(CL->TG_MODE==0)?CL->gop*SCORE_K:0;
    M->model[M->START][Da]=M->model[M->START][Ia]=(CL->TG_MODE==0)?CL->gop*SCORE_K:0;
    M->model[M->START][Dt]=M->model[M->START][It]=(CL->TG_MODE==0)?CL->gop*SCORE_K:0;
    
    
    M->model[Sa][M->END]=ALLOWED;
    M->model[Sb][M->END]=ALLOWED;
    M->model[St][M->END]=ALLOWED;
    M->model[Ia][M->END]=M->model[Da][M->END]=(CL->TG_MODE==0)?0:CL->gop*SCORE_K*(-1);
    M->model[Ib][M->END]=M->model[Db][M->END]=(CL->TG_MODE==0)?0:CL->gop*SCORE_K*(-1);
    M->model[It][M->END]=M->model[Dt][M->END]=(CL->TG_MODE==0)?0:CL->gop*SCORE_K*(-1);
    
    for ( a=0; a< M->nstate; a++)M->model[a][a]=ALLOWED;
    
    M->model[Sa][Ia]=M->model[Sa][Da]=CL->gop*SCORE_K;
    M->model[Sa][Ib]=M->model[Sa][Db]=CL->gop*SCORE_K+tgop*SCORE_K;
    M->model[Sa][It]=M->model[Sa][Dt]=CL->gop*SCORE_K+tgop*SCORE_K;
    M->model[Sa][Sb]=M->model[Sa][St]=tgop*SCORE_K;

    M->model[Sb][Ib]=M->model[Sb][Db]=CL->gop*SCORE_K;
    M->model[Sb][Ia]=M->model[Sb][Da]=CL->gop*SCORE_K+tgop*SCORE_K;
    M->model[Sb][It]=M->model[Sb][Dt]=CL->gop*SCORE_K+tgop*SCORE_K;
    M->model[Sb][Sa]=M->model[Sb][St]=tgop*SCORE_K;

    M->model[St][It]=M->model[St][Dt]=CL->gop*SCORE_K;
    M->model[St][Ia]=M->model[St][Da]=CL->gop*SCORE_K+tgop*SCORE_K;
    M->model[St][Ib]=M->model[St][Db]=CL->gop*SCORE_K+tgop*SCORE_K;
    M->model[St][Sa]=M->model[St][Sb]=tgop*SCORE_K;
    
    M->model[Ia][Sa]=M->model[Da][Sa]=ALLOWED;
    M->model[Ia][Sb]=M->model[Da][Sb]=tgop*SCORE_K;
    M->model[Ia][St]=M->model[Da][St]=tgop*SCORE_K;

    M->model[Ib][Sa]=M->model[Db][Sa]=tgop*SCORE_K;
    M->model[Ib][Sb]=M->model[Db][Sb]=ALLOWED;
    M->model[Ib][St]=M->model[Db][St]=tgop*SCORE_K;

    M->model[It][Sa]=M->model[Dt][Sa]=tgop*SCORE_K;
    M->model[It][Sb]=M->model[Dt][Sb]=tgop*SCORE_K;
    M->model[It][St]=M->model[Dt][St]=ALLOWED;
    

        
    /*Prune the model*/

    for (c=0,a=0, d=0; a< M->START; a++)
      for ( b=0; b<M->START; b++, d++)
	{
	  if (M->model[a][b]!=UNDEFINED)
	    {
	      M->bounded_model[b][1+M->bounded_model[b][0]++]=a;
	      c++;
	    }
	}
    M->CL=CL;
   
    return M;
  }
int make_fasta_cdna_pair_wise (Alignment *B,Alignment *A,int*in_ns, int **l_s,Constraint_list *CL, int *diag)
    {
      int a,c,p,k;
      Dp_Result *DPR;
      static Dp_Model  *M;
      int l0, l1;
      int len_i, len_j;
      int f0=0, f1=0;
      int deltaf0, deltaf1, delta;
      int nr1, nr2;
      int ala, alb, aa0, aa1;
      int type;
      
      char **al;
      int **tl_s;
      int *tns;
      /*DEBUG*/
      int debug_cdna_fasta=0;
      Alignment *DA;
      int score;
      int state,prev_state;
      int t, e;
      int a1, a2;
      
      
      l0=strlen ( B->seq_al[l_s[0][0]]);
      l1=strlen ( B->seq_al[l_s[1][0]]);

      al=declare_char (2, l0+l1+1); 
      B=realloc_aln2 (B,B->nseq,l0+l1+1);


      free_int (B->cdna_cache, -1);
      B->cdna_cache=declare_int(1, l0+l1+1);
      
      if ( !M)M=initialize_dna_dp_model (CL);

     
      M->diag=diag;

      tl_s=declare_int (2, 2);tns=vcalloc(2, sizeof(int));tl_s[0][0]=0;tl_s[1][0]=3;tns[0]=tns[1]=1;
      DPR=make_fast_dp_pair_wise (A,tns, tl_s,CL,M);
      vfree(tns);free_int(tl_s, -1);


      
      /*new_trace_back*/
      a=p=0;
      aa0=aa1=ala=alb=0;
      while ( (k=DPR->traceback[a++])!=M->START);
      while ( (k=DPR->traceback[a++])!=M->END)
	{
	  
	  f0=M->model_properties[k][M->F0];
	  f1=M->model_properties[k][M->F1];

	  len_i=M->model_properties[k][M->LEN_I];
	  len_j=M->model_properties[k][M->LEN_J];
	  
	  type=M->model_properties[k][M->TYPE];
	  
	  

	  if (type==M->CODING0)
	    {
	      deltaf0=(aa0*3+f0)-ala;
	      deltaf1=(aa1*3+f1)-alb;

	      delta=MAX(deltaf0, deltaf1);
	      
	      for (nr1=0, nr2=0,c=0; c<delta; c++, nr1++, nr2++,p++)		  
		      {
			if (nr1<deltaf0 && ala<l0)al[0][p]=B->seq_al[l_s[0][0]][ala++];
			else al[0][p]='-';
			
			if (nr2<deltaf1 && alb<l1)al[1][p]=B->seq_al[l_s[1][0]][alb++];
			else al[1][p]='-'; 
			
			B->cdna_cache[0][p]=M->NON_CODING;	
			if ( is_gap(al[1][p]) && is_gap(al[0][p]))p--;
			else if ( debug_cdna_fasta)fprintf (stderr, "\nUM: %c %c",  al[0][p], al[1][p]);
		      } 
	      for ( c=0; c< 3; c++, p++)
		{
		  if ( c==0)B->cdna_cache[0][p]=M->CODING0;
		  else if ( c==1)B->cdna_cache[0][p]=M->CODING1;
		  else if ( c==2)B->cdna_cache[0][p]=M->CODING2;
		  if (ala<l0)al[0][p]=B->seq_al[l_s[0][0]][ala++];
		  else al[0][p]='-';

		  if (alb<l1)al[1][p]=B->seq_al[l_s[1][0]][alb++];
		  else al[1][p]='-';
			
		  if ( is_gap(al[1][p]) && is_gap(al[0][p]))p--;
		  else if ( debug_cdna_fasta)fprintf (stderr, "\n%d: %c %c",k,  al[0][p], al[1][p]);
		}
	    }

	  aa0+=len_i;
	  aa1+=len_j;
	}
      
      deltaf0=(aa0*3+f0)-ala;
      deltaf1=(aa1*3+f1)-alb;
      delta=MAX(deltaf0, deltaf1);
      for (nr1=0, nr2=0,c=0; c<delta; c++, nr1++, nr2++,p++)		  
	{
	  if (nr1<deltaf0 && ala<l0)al[0][p]=B->seq_al[l_s[0][0]][ala++];
	  else al[0][p]='-';
	  
	  if (nr2<deltaf1 && alb<l1)al[1][p]=B->seq_al[l_s[1][0]][alb++];
	  else al[1][p]='-'; 
	  
	  B->cdna_cache[0][p]=M->NON_CODING;	
	  if ( is_gap(al[1][p]) && is_gap(al[0][p]))p--;
	  else if ( debug_cdna_fasta)fprintf (stderr, "\nUM: %c %c",  al[0][p], al[1][p]);
	}
      

      /*End New traceback*/
      



      al[0][p]='\0';
      al[1][p]='\0';


      sprintf( B->seq_al[l_s[0][0]], "%s", al[0]);
      sprintf( B->seq_al[l_s[1][0]], "%s", al[1]);
      B->len_aln=strlen (al[0]);
      B->nseq=2;
     
      
     
      
      if ( debug_cdna_fasta)
	  {
	    fprintf ( stderr, "\nA-A=%d, %d", CL->M['a'-'A']['a'-'A'], CL->M['a'-'A']['a'-'A'] *SCORE_K);
	    for ( a=1; a<diag[0]; a++)
	      {
		fprintf ( stderr, "\nchosen diag: %d", diag[a]);
	      }
	    
	    fprintf ( stderr, "\n  GOP=%d   GEP=%d   TG_MODE=%d", M->gop, M->gep, M->TG_MODE);
	    fprintf ( stderr, "\nF_GOP=%d F_GEP=%d F_TG_MODE=%d", M->gop, M->gep, M->F_TG_MODE);
	    
	    DA=copy_aln (B, NULL);
	    DA=realloc_aln2 (DA,6,(DA->len_aln+1));
	

	    for ( a=0; a<B->len_aln; a++)
	      {

		fprintf ( stderr, "\n%d", DA->cdna_cache[0][a]);
		if (DA->cdna_cache[0][a]>=M->CODING0)DA->seq_al[DA->nseq][a]=DA->cdna_cache[0][a]-M->nstate+'0';
		else DA->seq_al[DA->nseq][a]=DA->cdna_cache[0][a]-M->nstate+'0';

		if (DA->cdna_cache[0][a]==M->CODING0)
		  {
		    DA->seq_al[DA->nseq+1][a]=translate_dna_codon (DA->seq_al[0]+a,'*');
		    DA->seq_al[DA->nseq+2][a]=translate_dna_codon (DA->seq_al[1]+a,'*');
		  }
		else
		  {
		    DA->seq_al[DA->nseq+1][a]='-'; 
		    DA->seq_al[DA->nseq+2][a]='-'; 
		  }
		
	      }
	    DA->nseq+=3;
	    print_aln (DA);
	    
	    free_aln(DA);		      
	    score=0;
	    
	    
	    for (prev_state=M->START,a=0; a< DA->len_aln;)
	      {
		state=DA->cdna_cache[0][a];
		t=M->model[prev_state][state];
		if ( DA->cdna_cache[0][a]==M->CODING0)
		  {
		    a1=translate_dna_codon (A->seq_al[0]+a,'x');
		    a2=translate_dna_codon (A->seq_al[1]+a,'x');
		    
		    if ( a1!='x' && a2!='x')
		      {
			e=CL->M[a1-'A'][a2-'A']*SCORE_K;
		      }
		  }
		else if ( DA->cdna_cache[0][a]>M->CODING0);
		else
		  {
		    e=M->model_properties[B->cdna_cache[0][a]][M->EMISSION];
		  }
		if ( e==UNDEFINED || t==UNDEFINED) fprintf ( stderr, "\nPROBLEM %d\n", a);
		
		fprintf ( stderr, "\n[%c..%c: %d(e)+%d(t)=%d]", A->seq_al[0][a], A->seq_al[1][a], e,t,e+t);
		score+=e+t;
		prev_state=state;
		
		if (B->cdna_cache[0][a]==M->NON_CODING)a++;
		else a+=3;
		
	      }
	    
	  }
      
      for ( a=0; a<B->len_aln; a++)
	{
	  
	  if ( B->cdna_cache[0][a]<M->CODING0)B->cdna_cache[0][a]=0;
	  else B->cdna_cache[0][a]=1;
	}
      
      free_char ( al, -1);
      return DPR->score;
      
    }
Пример #10
0
int make_fasta_gotoh_pair_wise (Alignment *A,int*ns, int **l_s,Constraint_list *CL, int *diag)
    {
/*TREATMENT OF THE TERMINAL GAP PENALTIES*/
/*TG_MODE=0---> gop and gep*/
/*TG_MODE=1---> ---     gep*/
      /*TG_MODE=2---> ---     ---*/


	int TG_MODE, gop, l_gop, gep,l_gep, maximise;

/*VARIABLES FOR THE MULTIPLE SEQUENCE ALIGNMENT*/
	int a, b,c,k, t;
	int l1, l2,eg, ch, sub,score=0, last_i=0, last_j=0, i, delta_i, j, pos_j, ala, alb, LEN, n_diag, match1, match2;
	int su, in, de, tr;

	int **C, **D, **I, **trace, **pos0, **LD;
	int lenal[2], len;
	char *buffer, *char_buf;
	char **aln, **al;

        /********Prepare Penalties******/
	gop=CL->gop*SCORE_K;
	gep=CL->gep*SCORE_K;
	TG_MODE=CL->TG_MODE;
	maximise=CL->maximise;


	/********************************/


        n_diag=diag[0];



       l1=lenal[0]=strlen (A->seq_al[l_s[0][0]]);
       l2=lenal[1]=strlen (A->seq_al[l_s[1][0]]);

       if ( getenv ("DEBUG_TCOFFEE"))fprintf ( stderr, "\n\tNdiag=%d%%  ", (diag[0]*100)/(l1+l2));

	/*diag:
	  diag[1..n_diag]--> flaged diagonal in order;
	  diag[0]=0--> first diagonal;
	  diag[n_diag+1]=l1+l2-1;
	*/

	/*numeration of the diagonals strats from the bottom right [1...l1+l2-1]*/
	/*sequence s1 is vertical and seq s2 is horizontal*/
	/*D contains the best Deletion  in S2==>comes from diagonal N+1*/
	/*I contains the best insertion in S2=> comes from diagonal N-1*/





       C=declare_int (lenal[0]+lenal[1]+1, n_diag+2);
       D=declare_int (lenal[0]+lenal[1]+1, n_diag+2);
       LD=declare_int (lenal[0]+lenal[1]+1, n_diag+2);
       I=declare_int (lenal[0]+lenal[1]+1, n_diag+2);
       trace=declare_int (lenal[0]+lenal[1]+1, n_diag+2);


       al=declare_char (2,lenal[0]+lenal[1]+lenal[1]+1);

       len= MAX(lenal[0],lenal[1])+1;
       buffer=(char*)vcalloc ( 2*len, sizeof (char));
       char_buf=(char*) vcalloc (2*len, sizeof (char));

       pos0=aln2pos_simple ( A,-1, ns, l_s);
       C[0][0]=0;

       t=(TG_MODE==0)?gop:0;
       for ( j=1; j<= n_diag; j++)
	    {
		l_gop=(TG_MODE==0)?gop:0;
		l_gep=(TG_MODE==2)?0:gep;



		if ( (diag[j]-lenal[0])<0 )
		    {
		    trace[0][j]=UNDEFINED;
		    continue;
		    }
		C[0][j]=(diag[j]-lenal[0])*l_gep +l_gop;
		D[0][j]=(diag[j]-lenal[0])*l_gep +l_gop+gop;
	    }
       D[0][j]=D[0][j-1]+gep;


       t=(TG_MODE==0)?gop:0;
       for ( i=1; i<=lenal[0]; i++)
           {
	        l_gop=(TG_MODE==0)?gop:0;
		l_gep=(TG_MODE==2)?0:gep;

		C[i][0]=C[i][n_diag+1]=t=t+l_gep;
		I[i][0]=D[i][n_diag+1]=t+    gop;

		for ( j=1; j<=n_diag; j++)
		    {
			C[i][j]=C[i][0];
			D[i][j]=I[i][j]=I[i][0];
		    }

		for (eg=0, j=1; j<=n_diag; j++)
		    {

			pos_j=diag[j]-lenal[0]+i;
			if (pos_j<=0 || pos_j>l2 )
			    {
			    trace[i][j]=UNDEFINED;
			    continue;
			    }
			sub=(CL->get_dp_cost) ( A, pos0, ns[0], l_s[0], i-1, pos0, ns[1], l_s[1],pos_j-1, CL );

		    /*1 identify the best insertion in S2:*/
			l_gop=(i==lenal[0])?((TG_MODE==0)?gop:0):gop;
			l_gep=(i==lenal[0])?((TG_MODE==2)?0:gep):gep;
			len=(j==1)?0:(diag[j]-diag[j-1]);
			if ( a_better_than_b(I[i][j-1], C[i][j-1]+l_gop, maximise))eg++;
			else eg=1;
			I[i][j]=best_of_a_b (I[i][j-1], C[i][j-1]+l_gop, maximise)+len*l_gep;

		    /*2 Identify the best deletion in S2*/
			l_gop=(pos_j==lenal[1])?((TG_MODE==0)?gop:0):gop;
			l_gep=(pos_j==lenal[1])?((TG_MODE==2)?0:gep):gep;

			len=(j==n_diag)?0:(diag[j+1]-diag[j]);
			delta_i=((i-len)>0)?(i-len):0;

			if ( a_better_than_b(D[delta_i][j+1],C[delta_i][j+1]+l_gop, maximise)){LD[i][j]=LD[delta_i][j+1]+1;}
			else {LD[i][j]=1;}
			D[i][j]=best_of_a_b (D[delta_i][j+1],C[delta_i][j+1]+l_gop, maximise)+len*l_gep;


			/*Identify the best way*/
			/*
			score=C[i][j]=best_int ( 3, maximise, &fop, I[i][j], C[i-1][j]+sub, D[i][j]);
			fop-=1;
			if ( fop<0)trace[i][j]=fop*eg;
			else if ( fop>0 ) {trace[i][j]=fop*LD[i][j];}
			else if ( fop==0) trace[i][j]=0;
			*/

			su=C[i-1][j]+sub;
			in=I[i][j];
			de=D[i][j];

			/*HERE ("%d %d %d", su, in, de);*/
			if (su>=in && su>=de)
			  {
			    score=su;
			    tr=0;
			  }
			else if (in>=de)
			  {
			    score=in;
			    tr=-eg;
			  }
			else
			  {
			    score=de;
			    tr=LD[i][j];
			  }
			trace[i][j]=tr;
			C[i][j]=score;


			last_i=i;
			last_j=j;
		    }
	    }


       /*
	            [0][Positive]
	             ^     ^
	             |    /
                     |   /
                     |  /
                     | /
                     |/
       [Neg]<-------[*]
	*/


	i=last_i;
	j=last_j;



	ala=alb=0;
	match1=match2=0;
	while (!(match1==l1 && match2==l2))
	      {


		  if ( match1==l1)
		     {
			 len=l2-match2;
			 for ( a=0; a< len; a++)
			     {
			     al[0][ala++]=0;
			     al[1][alb++]=1;
			     match2++;
			     }
			 k=0;
			 break;

			 /*k=-(j-1);*/

		     }
		  else if ( match2==l2)
		     {
			 len=l1-match1;
			 for ( a=0; a< len; a++)
			     {
			     al[0][ala++]=1;
			     al[1][alb++]=0;
			     match1++;
			     }
			 k=0;
			 break;
			 /*k= n_diag-j;*/
		     }
		  else
		      {
			  k=trace[i][j];
		      }


		  if ( k==0)
			     {
				 if ( match2==l2 || match1==l1);
				 else
				    {

				    al[0][ala++]=1;
				    al[1][alb++]=1;
				    i--;
				    match1++;
				    match2++;
				    }
			     }
		  else if ( k>0)
			     {

			     len=diag[j+k]-diag[j];
			     for ( a=0; a<len; a++)
			         {
				     if ( match1==l1)break;
				     al[0][ala++]=1;
				     al[1][alb++]=0;
				     match1++;
				 }
			     i-=len;
			     j+=k;
			     }
		  else if ( k<0)
			     {
			     k*=-1;
			     len=diag[j]-diag[j-k];
			     for ( a=0; a<len; a++)
			         {
				     if ( match2==l2)break;
				     al[0][ala++]=0;
				     al[1][alb++]=1;
				     match2++;
				 }


			     j-=k;
			     }
	      }

	LEN=ala;
	c=LEN-1;
	invert_list_char ( al[0], LEN);
	invert_list_char ( al[1], LEN);
	if ( A->declared_len<=LEN)A=realloc_aln2  ( A,A->max_n_seq, 2*LEN);
	aln=A->seq_al;

	for ( c=0; c< 2; c++)
	    {
	    for ( a=0; a< ns[c]; a++)
		{
		ch=0;
		for ( b=0; b< LEN; b++)
		    {
		    if (al[c][b]==1)
			char_buf[b]=aln[l_s[c][a]][ch++];
		    else
			char_buf[b]='-';
		   }
		char_buf[b]='\0';
		aln[l_s[c][a]]=csprintf (aln[l_s[c][a]],"%s", char_buf);
	        }
	     }


	A->len_aln=LEN;
	A->nseq=ns[0]+ns[1];

	free_int (pos0, -1);
	free_int (C, -1);
	free_int (D, -1);
	free_int (I, -1);
	free_int (trace, -1);
	free_int (LD, -1);
	free_char ( al, -1);
	vfree(buffer);
	vfree(char_buf);


	return score;
    }
Пример #11
0
Alignment* copy_aln ( Alignment *A, Alignment *B)
        {
	  int a, b;
	  int nnseq;
	  int nlen;
	  /*	  c[100]=10;*/



	  if ( A==NULL){free_aln(B); return NULL;}

	  nnseq=MAX(A->nseq, A->max_n_seq);
	  nlen=A->len_aln+1;
	  if (B)
	    B=realloc_alignment2 (B, nnseq, nlen);
	  else
	    B=declare_aln2 (nnseq, nlen);
	  B->S=A->S;


	  /*SIZES*/
	  B->max_len=A->max_len;
	  B->min_len=A->min_len;
	  B->declared_len=nlen;
	  B->max_n_seq=nnseq;

	  B->nseq=A->nseq;
	  B->len_aln=A->len_aln;


/*sequence Information*/
	    if ( A->generic_comment)
	      {
		vfree(B->generic_comment);
		B->generic_comment=vcalloc (strlen(A->generic_comment)+1, sizeof (char));
		sprintf ( B->generic_comment, "%s", A->generic_comment);
	      }
	    if ( (A->S)==NULL){vfree (B->len); B->len=vcalloc ( A->max_n_seq, sizeof (int));}
	    ga_memcpy_int ( A->len, B->len, B->nseq);

	    B->seq_comment=copy_char ( A->seq_comment,  B->seq_comment,  -1,-1);
	    B->aln_comment=copy_char ( A->aln_comment,  B->aln_comment,  -1,-1);

	    B->name=copy_char ( A->name,     B->name,     -1,-1);

	    B->file=copy_char ( A->file,     B->file,     -1,-1);
	    B->tree_order=copy_char ( A->tree_order,     B->tree_order,     -1,-1);
	    B->expanded_order=A->expanded_order;
	    free_char ( B->seq_al, -1);
	    B->seq_al=declare_char(B->max_n_seq, B->declared_len);
	    // HERE ("A: MAX_NSEQ=%d %d %d %d",B->nseq, B->max_n_seq, B->declared_len, B->len_aln);
	    // HERE ("B: MAX_NSEQ=%d %d %d %d",A->nseq, A->max_n_seq, A->declared_len, A->len_aln);
	    for ( a=0; a< nnseq; a++)
	      {
		if (A->seq_al[a])
		  {
		    for ( b=0; b< A->len_aln; b++)
		      B->seq_al[a][b]=A->seq_al[a][b];
		  }
	      }



	    B->order=copy_int  ( A->order,    B->order,    -1, -1);
	    B->S=A->S;
	    if (A->seq_cache)
	        {
		B->seq_cache=copy_int  ( A->seq_cache,    B->seq_cache,-1,-1);
		}

	    if (A->cdna_cache)
	        {
		B->cdna_cache=copy_int  ( A->cdna_cache,    B->cdna_cache,-1,-1);
		}

	    B->P=copy_profile (A->P);

	    B->Dp_result=A->Dp_result;

/*Score*/

	    if ( (A->S)==NULL){vfree (B->score_seq); B->score_seq=vcalloc ( A->max_n_seq, sizeof (int));}
	    ga_memcpy_int(  A->score_seq,B->score_seq,B->nseq);
	    B->score_res=A->score_res;

	    B->score_aln=A->score_aln;
	    B->score=A->score;
	    B->ibit=A->ibit;
	    B->cpu=A->cpu;
	    B->finished=A->finished;

/*Output Options*/
	    B->output_res_num=A->output_res_num;
	    B->residue_case=A->residue_case;
	    B->expand=A->expand;

	    B->CL=A->CL;
	    B->random_tag=A->random_tag;

/*Make the function Recursive */
	    if ( A->A)
	      {
		B->A=copy_aln (A->A, NULL);
	      }
	    else B->A=NULL;

	    return B;
	}
Пример #12
0
int myers_miller_pair_wise (Alignment *A,int *ns, int **ls,Constraint_list *CL )
{
    int **pos;
    int a,b, i, j, l,l1, l2, len;
    int *S;
    char ** char_buf;
    int score;

    /********Prepare Penalties******/
    //ns2master_ns (ns,ls, &sns,&sls);
    sns=ns;
    sls=ls;

    /********************************/


    pos=aln2pos_simple ( A,-1, ns, ls);


    l1=strlen (A->seq_al[ls[0][0]]);
    l2=strlen (A->seq_al[ls[1][0]]);
    S=(int*)vcalloc (l1+l2+1, sizeof (int));
    last=0;
    sapp=S;

    score=diff (A,ns, ls, 0, l1, 0, l2, 0, 0, CL, pos);
    diff (NULL,ns, ls, 0, l1, 0, l2, 0, 0, CL, pos);



    i=0;
    j=0;
    sapp=S;
    len=0;
    while (!(i==l1 && j==l2))
    {
        if (*sapp==0) {
            i++;
            j++;
            len++;
        }
        else if ( *sapp<0) {
            i-=*sapp;
            len-=*sapp;
        }
        else if ( *sapp>0) {
            j+=*sapp;
            len+=*sapp;
        }
        sapp++;
    }



    A=realloc_aln2  ( A,A->max_n_seq,len+1);
    char_buf=declare_char (A->max_n_seq,len+1);

    i=0;
    j=0;
    sapp=S;
    len=0;
    while (!(i==l1 && j==l2))
    {

        if (*sapp==0)
        {
            for (b=0; b< ns[0]; b++)
                char_buf[ls[0][b]][len]=A->seq_al[ls[0][b]][i];
            for (b=0; b< ns[1]; b++)
                char_buf[ls[1][b]][len]=A->seq_al[ls[1][b]][j];
            i++;
            j++;
            len++;
        }
        else if ( *sapp>0)
        {
            l=*sapp;
            for ( a=0; a<l; a++, j++, len++)
            {
                for (b=0; b< ns[0]; b++)
                    char_buf[ls[0][b]][len]='-';
                for (b=0; b< ns[1]; b++)
                    char_buf[ls[1][b]][len]=A->seq_al[ls[1][b]][j];
            }
        }
        else if ( *sapp<0)
        {
            l=-*sapp;
            for ( a=0; a<l; a++, i++, len++)
            {
                for (b=0; b< ns[0]; b++)
                    char_buf[ls[0][b]][len]=A->seq_al[ls[0][b]][i];;
                for (b=0; b< ns[1]; b++)
                    char_buf[ls[1][b]][len]='-';
            }
        }

        sapp++;
    }


    A->len_aln=len;
    A->nseq=ns[0]+ns[1];

    for ( a=0; a< ns[0]; a++) {
        char_buf[ls[0][a]][len]='\0';
        sprintf ( A->seq_al[ls[0][a]], "%s", char_buf[ls[0][a]]);
    }
    for ( a=0; a< ns[1]; a++) {
        char_buf[ls[1][a]][len]='\0';
        sprintf ( A->seq_al[ls[1][a]], "%s", char_buf[ls[1][a]]);
    }


    vfree (S);
    free_char ( char_buf, -1);
    l1=strlen (A->seq_al[ls[0][0]]);
    l2=strlen (A->seq_al[ls[1][0]]);
    if ( l1!=l2) exit(1);

    free_int (pos, -1);
    return score;
}