Exemple #1
0
/* Function: MSAFromAINFO()
 * Date:     SRE, Mon Jun 14 11:22:24 1999 [St. Louis]
 *
 * Purpose:  Convert the old aseq/ainfo alignment structure
 *           to new MSA structure. Enables more rapid conversion
 *           of codebase to the new world order.
 *
 * Args:     aseq  - [0..nseq-1][0..alen-1] alignment
 *           ainfo - old-style optional info
 *
 * Returns:  MSA *
 */
MSA *
MSAFromAINFO(char **aseq, AINFO *ainfo)
{
  MSA *msa;
  int  i, j;

  msa = MSAAlloc(ainfo->nseq, ainfo->alen);
  for (i = 0; i < ainfo->nseq; i++)
    {
      strcpy(msa->aseq[i], aseq[i]);
      msa->wgt[i]    = ainfo->wgt[i];
      msa->sqname[i] = sre_strdup(ainfo->sqinfo[i].name, -1);
      msa->sqlen[i]  = msa->alen;
      GKIStoreKey(msa->index, msa->sqname[i]);

      if (ainfo->sqinfo[i].flags & SQINFO_ACC) 
	MSASetSeqAccession(msa, i, ainfo->sqinfo[i].acc);

      if (ainfo->sqinfo[i].flags & SQINFO_DESC) 
	MSASetSeqDescription(msa, i, ainfo->sqinfo[i].desc);

      if (ainfo->sqinfo[i].flags & SQINFO_SS) {
	if (msa->ss == NULL) {
	  msa->ss    = MallocOrDie(sizeof(char *) * msa->nseqalloc);
	  msa->sslen = MallocOrDie(sizeof(int)    * msa->nseqalloc);
	  for (j = 0; j < msa->nseqalloc; j++) {
	    msa->ss[j]    = NULL;
	    msa->sslen[j] = 0;
	  }
	}
	MakeAlignedString(msa->aseq[i], msa->alen, ainfo->sqinfo[i].ss, &(msa->ss[i]));
	msa->sslen[i] = msa->alen;
      }

      if (ainfo->sqinfo[i].flags & SQINFO_SA) {
	if (msa->sa == NULL) {
	  msa->sa    = MallocOrDie(sizeof(char *) * msa->nseqalloc);
	  msa->salen = MallocOrDie(sizeof(int)    * msa->nseqalloc);
	  for (j = 0; j < msa->nseqalloc; j++) {
	    msa->sa[j]    = NULL;
	    msa->salen[j] = 0;
	  }
	}
	MakeAlignedString(msa->aseq[i], msa->alen, ainfo->sqinfo[i].sa, &(msa->sa[i]));
	msa->salen[i] = msa->alen;
      }
    }
			/* note that sre_strdup() returns NULL when passed NULL */
  msa->name    = sre_strdup(ainfo->name, -1);
  msa->desc    = sre_strdup(ainfo->desc, -1);
  msa->acc     = sre_strdup(ainfo->acc,  -1);
  msa->au      = sre_strdup(ainfo->au,   -1);
  msa->ss_cons = sre_strdup(ainfo->cs,   -1);
  msa->rf      = sre_strdup(ainfo->rf,   -1);
  if (ainfo->flags & AINFO_TC) {
    msa->cutoff[MSA_CUTOFF_TC1] = ainfo->tc1; msa->cutoff_is_set[MSA_CUTOFF_TC1] = TRUE;
    msa->cutoff[MSA_CUTOFF_TC2] = ainfo->tc2; msa->cutoff_is_set[MSA_CUTOFF_TC2] = TRUE;
  }
  if (ainfo->flags & AINFO_NC) {
    msa->cutoff[MSA_CUTOFF_NC1] = ainfo->nc1; msa->cutoff_is_set[MSA_CUTOFF_NC1] = TRUE;
    msa->cutoff[MSA_CUTOFF_NC2] = ainfo->nc2; msa->cutoff_is_set[MSA_CUTOFF_NC2] = TRUE;
  }
  if (ainfo->flags & AINFO_GA) {
    msa->cutoff[MSA_CUTOFF_GA1] = ainfo->ga1; msa->cutoff_is_set[MSA_CUTOFF_GA1] = TRUE;
    msa->cutoff[MSA_CUTOFF_GA2] = ainfo->ga2; msa->cutoff_is_set[MSA_CUTOFF_GA2] = TRUE;
  }
  msa->nseq = ainfo->nseq;
  msa->alen = ainfo->alen;
  return msa;
}
Exemple #2
0
/* Function: WriteSELEX()
 * 
 * Write aligned sequences to an open file pointer,
 * breaking into multiple blocks if the sequences are
 * long. Number of symbols written per line is set by cpl.
 * The alignment must be flushed (all aseqs the same length).
 *
 * cpl cannot exceed 32767.
 *
 * May also write optional information from ainfo;
 * ainfo may be NULL.
 * 
 * Return: (void)
 */
void
WriteSELEX(FILE *fp, char **aseqs, AINFO *ainfo, int cpl)
{
  int    idx;			/* counter for sequences         */
  int    namelen;		/* maximum name length used      */
  int    len;			/* tmp variable for name lengths */
  char   buffer[32768];     	/* buffer for writing seq        */
  int    currpos;
  char **ss;                    /* aligned secondary structure strings */
  char **sa;			/* aligned accessibility strings       */

  if (cpl > 32767) 
    Die("You can't WriteSELEX() with lines as long as %d, pal.\n", cpl);

			/* calculate max namelen used */
  namelen = 0;
  for (idx = 0; idx < ainfo->nseq; idx++)
    if ((len = strlen(ainfo->sqinfo[idx].name)) > namelen) 
      namelen = len;
  if (namelen < 6) namelen = 6;


  /* Make aligned secondary structure strings
   */
  ss = (char **) MallocOrDie(sizeof(char *) * ainfo->nseq);
  sa = (char **) MallocOrDie(sizeof(char *) * ainfo->nseq);
  for (idx = 0; idx < ainfo->nseq; idx++)
    {
      if (ainfo->sqinfo[idx].flags & SQINFO_SS)
	MakeAlignedString(aseqs[idx], ainfo->alen, ainfo->sqinfo[idx].ss, &(ss[idx]));
      if (ainfo->sqinfo[idx].flags & SQINFO_SA)
	MakeAlignedString(aseqs[idx], ainfo->alen, ainfo->sqinfo[idx].sa, &(sa[idx]));
    }

  /* Write header info
   */
  if (ainfo->name != NULL)
    fprintf(fp, "#=ID %s\n", ainfo->name);
  if (ainfo->acc != NULL)
    fprintf(fp, "#=AC %s\n", ainfo->acc);
  if (ainfo->desc != NULL)
    fprintf(fp, "#=DE %s\n", ainfo->desc);
  if (ainfo->au != NULL)
    fprintf(fp, "#=AU %s\n", ainfo->au);
  if (ainfo->flags & AINFO_GA)
    fprintf(fp, "#=GA %.1f %.1f\n", ainfo->ga1, ainfo->ga2);
  if (ainfo->flags & AINFO_TC)
    fprintf(fp, "#=TC %.1f %.1f\n", ainfo->tc1, ainfo->tc2);
  if (ainfo->flags & AINFO_NC)
    fprintf(fp, "#=NC %.1f %.1f\n", ainfo->nc1, ainfo->nc2);

  for (idx = 0; idx < ainfo->nseq; idx++)
    fprintf(fp, "#=SQ %-*.*s %6.4f %s %s %d..%d::%d %s\n", 
	    namelen, namelen, ainfo->sqinfo[idx].name,
	    ainfo->wgt[idx],
	    (ainfo->sqinfo[idx].flags & SQINFO_ID)    ? ainfo->sqinfo[idx].id     : "-",
	    (ainfo->sqinfo[idx].flags & SQINFO_ACC)   ? ainfo->sqinfo[idx].acc    : "-",
	    (ainfo->sqinfo[idx].flags & SQINFO_START) ? ainfo->sqinfo[idx].start  : 0,
	    (ainfo->sqinfo[idx].flags & SQINFO_STOP)  ? ainfo->sqinfo[idx].stop   : 0,
	    (ainfo->sqinfo[idx].flags & SQINFO_OLEN)  ? ainfo->sqinfo[idx].olen   : 0,
	    (ainfo->sqinfo[idx].flags & SQINFO_DESC)  ? ainfo->sqinfo[idx].desc   : "-");
  fprintf(fp, "\n");

				/* main loop: write seqs in blocks. */
  for (currpos = 0; currpos < ainfo->alen; currpos += cpl)
    {
				/* Reference coord system */
      if (ainfo->rf != NULL)
	{
	  strncpy(buffer, ainfo->rf + currpos, cpl);
	  buffer[cpl] = '\0';
	  fprintf(fp, "%-*.*s  %s\n", namelen, namelen, "#=RF", buffer);
	}

				/* Consensus secondary structure */
      if (ainfo->cs != NULL)
	{
	  strncpy(buffer, ainfo->cs + currpos, cpl);
	  buffer[cpl] = '\0';
	  fprintf(fp, "%-*.*s  %s\n", namelen, namelen, "#=CS", buffer);
	}      
      
      for (idx = 0; idx < ainfo->nseq; idx++)
	{
				/* Aligned sequence */
	  strncpy(buffer, aseqs[idx] + currpos, cpl);
	  buffer[cpl] = '\0';
	  fprintf(fp, "%-*.*s  %s\n", namelen, namelen, 
		  ainfo->sqinfo[idx].name, buffer);

				/* Individual secondary structure */
	  if (ainfo->sqinfo[idx].flags & SQINFO_SS)
	    {
	      strncpy(buffer, ss[idx] + currpos, cpl);
	      buffer[cpl] = '\0';
	      fprintf(fp, "%-*.*s  %s\n", namelen, namelen, "#=SS", buffer);
	    }

				/* Surface accessibility */
	  if (ainfo->sqinfo[idx].flags & SQINFO_SA)
	    {
	      strncpy(buffer, sa[idx] + currpos, cpl);
	      buffer[cpl] = '\0';
	      fprintf(fp, "%-*.*s  %s\n", namelen, namelen, "#=SA", buffer);
	    }
	}
				/* put blank line between blocks */
      fprintf(fp, "\n");
    }

  /* Garbage collection
   */
  for (idx = 0; idx < ainfo->nseq; idx++)
    {
      if (ainfo->sqinfo[idx].flags & SQINFO_SS)	free(ss[idx]);
      if (ainfo->sqinfo[idx].flags & SQINFO_SA) free(sa[idx]); 
    }
  free(ss);
  free(sa);
}