/* Function: MSAFromAINFO() * Date: SRE, Mon Jun 14 11:22:24 1999 [St. Louis] * * Purpose: Convert the old aseq/ainfo alignment structure * to new MSA structure. Enables more rapid conversion * of codebase to the new world order. * * Args: aseq - [0..nseq-1][0..alen-1] alignment * ainfo - old-style optional info * * Returns: MSA * */ MSA * MSAFromAINFO(char **aseq, AINFO *ainfo) { MSA *msa; int i, j; msa = MSAAlloc(ainfo->nseq, ainfo->alen); for (i = 0; i < ainfo->nseq; i++) { strcpy(msa->aseq[i], aseq[i]); msa->wgt[i] = ainfo->wgt[i]; msa->sqname[i] = sre_strdup(ainfo->sqinfo[i].name, -1); msa->sqlen[i] = msa->alen; GKIStoreKey(msa->index, msa->sqname[i]); if (ainfo->sqinfo[i].flags & SQINFO_ACC) MSASetSeqAccession(msa, i, ainfo->sqinfo[i].acc); if (ainfo->sqinfo[i].flags & SQINFO_DESC) MSASetSeqDescription(msa, i, ainfo->sqinfo[i].desc); if (ainfo->sqinfo[i].flags & SQINFO_SS) { if (msa->ss == NULL) { msa->ss = MallocOrDie(sizeof(char *) * msa->nseqalloc); msa->sslen = MallocOrDie(sizeof(int) * msa->nseqalloc); for (j = 0; j < msa->nseqalloc; j++) { msa->ss[j] = NULL; msa->sslen[j] = 0; } } MakeAlignedString(msa->aseq[i], msa->alen, ainfo->sqinfo[i].ss, &(msa->ss[i])); msa->sslen[i] = msa->alen; } if (ainfo->sqinfo[i].flags & SQINFO_SA) { if (msa->sa == NULL) { msa->sa = MallocOrDie(sizeof(char *) * msa->nseqalloc); msa->salen = MallocOrDie(sizeof(int) * msa->nseqalloc); for (j = 0; j < msa->nseqalloc; j++) { msa->sa[j] = NULL; msa->salen[j] = 0; } } MakeAlignedString(msa->aseq[i], msa->alen, ainfo->sqinfo[i].sa, &(msa->sa[i])); msa->salen[i] = msa->alen; } } /* note that sre_strdup() returns NULL when passed NULL */ msa->name = sre_strdup(ainfo->name, -1); msa->desc = sre_strdup(ainfo->desc, -1); msa->acc = sre_strdup(ainfo->acc, -1); msa->au = sre_strdup(ainfo->au, -1); msa->ss_cons = sre_strdup(ainfo->cs, -1); msa->rf = sre_strdup(ainfo->rf, -1); if (ainfo->flags & AINFO_TC) { msa->cutoff[MSA_CUTOFF_TC1] = ainfo->tc1; msa->cutoff_is_set[MSA_CUTOFF_TC1] = TRUE; msa->cutoff[MSA_CUTOFF_TC2] = ainfo->tc2; msa->cutoff_is_set[MSA_CUTOFF_TC2] = TRUE; } if (ainfo->flags & AINFO_NC) { msa->cutoff[MSA_CUTOFF_NC1] = ainfo->nc1; msa->cutoff_is_set[MSA_CUTOFF_NC1] = TRUE; msa->cutoff[MSA_CUTOFF_NC2] = ainfo->nc2; msa->cutoff_is_set[MSA_CUTOFF_NC2] = TRUE; } if (ainfo->flags & AINFO_GA) { msa->cutoff[MSA_CUTOFF_GA1] = ainfo->ga1; msa->cutoff_is_set[MSA_CUTOFF_GA1] = TRUE; msa->cutoff[MSA_CUTOFF_GA2] = ainfo->ga2; msa->cutoff_is_set[MSA_CUTOFF_GA2] = TRUE; } msa->nseq = ainfo->nseq; msa->alen = ainfo->alen; return msa; }
/* Function: WriteSELEX() * * Write aligned sequences to an open file pointer, * breaking into multiple blocks if the sequences are * long. Number of symbols written per line is set by cpl. * The alignment must be flushed (all aseqs the same length). * * cpl cannot exceed 32767. * * May also write optional information from ainfo; * ainfo may be NULL. * * Return: (void) */ void WriteSELEX(FILE *fp, char **aseqs, AINFO *ainfo, int cpl) { int idx; /* counter for sequences */ int namelen; /* maximum name length used */ int len; /* tmp variable for name lengths */ char buffer[32768]; /* buffer for writing seq */ int currpos; char **ss; /* aligned secondary structure strings */ char **sa; /* aligned accessibility strings */ if (cpl > 32767) Die("You can't WriteSELEX() with lines as long as %d, pal.\n", cpl); /* calculate max namelen used */ namelen = 0; for (idx = 0; idx < ainfo->nseq; idx++) if ((len = strlen(ainfo->sqinfo[idx].name)) > namelen) namelen = len; if (namelen < 6) namelen = 6; /* Make aligned secondary structure strings */ ss = (char **) MallocOrDie(sizeof(char *) * ainfo->nseq); sa = (char **) MallocOrDie(sizeof(char *) * ainfo->nseq); for (idx = 0; idx < ainfo->nseq; idx++) { if (ainfo->sqinfo[idx].flags & SQINFO_SS) MakeAlignedString(aseqs[idx], ainfo->alen, ainfo->sqinfo[idx].ss, &(ss[idx])); if (ainfo->sqinfo[idx].flags & SQINFO_SA) MakeAlignedString(aseqs[idx], ainfo->alen, ainfo->sqinfo[idx].sa, &(sa[idx])); } /* Write header info */ if (ainfo->name != NULL) fprintf(fp, "#=ID %s\n", ainfo->name); if (ainfo->acc != NULL) fprintf(fp, "#=AC %s\n", ainfo->acc); if (ainfo->desc != NULL) fprintf(fp, "#=DE %s\n", ainfo->desc); if (ainfo->au != NULL) fprintf(fp, "#=AU %s\n", ainfo->au); if (ainfo->flags & AINFO_GA) fprintf(fp, "#=GA %.1f %.1f\n", ainfo->ga1, ainfo->ga2); if (ainfo->flags & AINFO_TC) fprintf(fp, "#=TC %.1f %.1f\n", ainfo->tc1, ainfo->tc2); if (ainfo->flags & AINFO_NC) fprintf(fp, "#=NC %.1f %.1f\n", ainfo->nc1, ainfo->nc2); for (idx = 0; idx < ainfo->nseq; idx++) fprintf(fp, "#=SQ %-*.*s %6.4f %s %s %d..%d::%d %s\n", namelen, namelen, ainfo->sqinfo[idx].name, ainfo->wgt[idx], (ainfo->sqinfo[idx].flags & SQINFO_ID) ? ainfo->sqinfo[idx].id : "-", (ainfo->sqinfo[idx].flags & SQINFO_ACC) ? ainfo->sqinfo[idx].acc : "-", (ainfo->sqinfo[idx].flags & SQINFO_START) ? ainfo->sqinfo[idx].start : 0, (ainfo->sqinfo[idx].flags & SQINFO_STOP) ? ainfo->sqinfo[idx].stop : 0, (ainfo->sqinfo[idx].flags & SQINFO_OLEN) ? ainfo->sqinfo[idx].olen : 0, (ainfo->sqinfo[idx].flags & SQINFO_DESC) ? ainfo->sqinfo[idx].desc : "-"); fprintf(fp, "\n"); /* main loop: write seqs in blocks. */ for (currpos = 0; currpos < ainfo->alen; currpos += cpl) { /* Reference coord system */ if (ainfo->rf != NULL) { strncpy(buffer, ainfo->rf + currpos, cpl); buffer[cpl] = '\0'; fprintf(fp, "%-*.*s %s\n", namelen, namelen, "#=RF", buffer); } /* Consensus secondary structure */ if (ainfo->cs != NULL) { strncpy(buffer, ainfo->cs + currpos, cpl); buffer[cpl] = '\0'; fprintf(fp, "%-*.*s %s\n", namelen, namelen, "#=CS", buffer); } for (idx = 0; idx < ainfo->nseq; idx++) { /* Aligned sequence */ strncpy(buffer, aseqs[idx] + currpos, cpl); buffer[cpl] = '\0'; fprintf(fp, "%-*.*s %s\n", namelen, namelen, ainfo->sqinfo[idx].name, buffer); /* Individual secondary structure */ if (ainfo->sqinfo[idx].flags & SQINFO_SS) { strncpy(buffer, ss[idx] + currpos, cpl); buffer[cpl] = '\0'; fprintf(fp, "%-*.*s %s\n", namelen, namelen, "#=SS", buffer); } /* Surface accessibility */ if (ainfo->sqinfo[idx].flags & SQINFO_SA) { strncpy(buffer, sa[idx] + currpos, cpl); buffer[cpl] = '\0'; fprintf(fp, "%-*.*s %s\n", namelen, namelen, "#=SA", buffer); } } /* put blank line between blocks */ fprintf(fp, "\n"); } /* Garbage collection */ for (idx = 0; idx < ainfo->nseq; idx++) { if (ainfo->sqinfo[idx].flags & SQINFO_SS) free(ss[idx]); if (ainfo->sqinfo[idx].flags & SQINFO_SA) free(sa[idx]); } free(ss); free(sa); }