Exemplo n.º 1
0
void 
MSAFileWrite(FILE *fp, MSA *msa, int outfmt, int do_oneline)
{
  switch (outfmt) {
  case MSAFILE_A2M:       WriteA2M(fp, msa);     break;
  case MSAFILE_CLUSTAL:   WriteClustal(fp, msa); break;
  case MSAFILE_MSF:       WriteMSF(fp, msa);     break;
  case MSAFILE_PHYLIP:    WritePhylip(fp, msa);  break;
  case MSAFILE_SELEX:     WriteSELEX(fp, msa);   break;
  case MSAFILE_STOCKHOLM:
    if (do_oneline) WriteStockholmOneBlock(fp, msa);
    else            WriteStockholm(fp, msa);
    break;
  default:
    Die("can't write. no such alignment format %d\n", outfmt);
  }
}
/*****************************************************************
 * msf.c test driver: 
 * cc -DTESTDRIVE_MSF -g -O2 -Wall -o test msf.c msa.c gki.c sqerror.c sre_string.c file.c hsregex.c sre_math.c sre_ctype.c sqio.c alignio.c selex.c interleaved.c types.c -lm
 * 
 */
int
main(int argc, char **argv)
{
  MSAFILE *afp;
  MSA     *msa;
  char    *file;
  
  file = argv[1];

  if ((afp = MSAFileOpen(file, MSAFILE_STOCKHOLM, NULL)) == NULL)
    Die("Couldn't open %s\n", file);

  while ((msa = ReadMSF(afp)) != NULL)
    {
      WriteMSF(stdout, msa);
      MSAFree(msa); 
    }
  
  MSAFileClose(afp);
  exit(0);
}
Exemplo n.º 3
0
/**
 * @brief Write alignment to file.
 *
 * @param[in] mseq
 * The mseq_t struct containing the aligned sequences
 * @param[in] pcAlnOutfile
 * The name of the output file
 * @param[in] outfmt
 * The alignment output format (defined in squid.h)
 * @param[in] iWrap
 * length of line for Clustal/Fasta format
 *
 * @return Non-zero on error
 *
 * @note We create a temporary squid MSA struct in here because we never
 * use it within clustal. We might be better of using the old clustal
 * output routines instead.
 *
 */
int
WriteAlignment(mseq_t *mseq, const char *pcAlnOutfile, int outfmt, int iWrap, bool bResno)
{
    int i; /* aux */
    MSA *msa; /* squid's alignment structure */
    FILE *pfOut = NULL;
    int key; /* MSA struct internal index for sequence */
    int alen; /* alignment length */
    bool use_stdout;

    assert(mseq!=NULL);

    if (MSAFILE_UNKNOWN == outfmt) {
        Log(&rLog, LOG_ERROR, "Unknown output format chosen");
        return -1;
    }

    if (NULL == pcAlnOutfile) {
        pfOut = stdout;
        use_stdout = TRUE;
    } else {
        use_stdout = FALSE;
        if (NULL == (pfOut = fopen(pcAlnOutfile, "w"))) {
            Log(&rLog, LOG_ERROR, "Could not open file %s for writing", pcAlnOutfile);
            return -1;
        }
    }


    /* derive alignment length from first seq */
    alen = strlen(mseq->seq[0]);

    msa  = MSAAlloc(mseq->nseqs, alen);

    /* basic structure borrowed code from squid-1.9g/a2m.c:ReadA2M()
     * we actually create a copy of mseq. keeping the pointers becomes
     * messy when calling MSAFree()
     */
    for (i=0; i<mseq->nseqs; i++) {
        char *this_name = NULL; /* mseq sequence name */
        char *this_seq = NULL; /* mseq sequence */
        SQINFO *this_sqinfo = NULL; /* mseq sequence name */
        int iI;

        /* mseq->tree_order encodes to order in which sequences are listed in the guide-tree,
           if the user wants the sequence output in the input-order then mseq->tree_order==NULL,
           otherwise mseq->tree_order!=NULL, containing the indices of the sequences, FS, r274 ->  */
        iI = (NULL == mseq->tree_order) ? i : mseq->tree_order[i];

        this_name = mseq->sqinfo[iI].name; /* mseq sequence name */
        this_seq = mseq->seq[iI]; /* mseq sequence */
        this_sqinfo = &mseq->sqinfo[iI]; /* mseq sequence name */

        key = GKIStoreKey(msa->index, this_name);
        msa->sqname[key] = sre_strdup(this_name, strlen(this_name));

        /* setting msa->sqlen[idx] and msa->aseq[idx] */
        msa->sqlen[key] = sre_strcat(&(msa->aseq[key]), msa->sqlen[key],
                                     this_seq, strlen(this_seq));

        if (this_sqinfo->flags & SQINFO_DESC) {
            /* FIXME never get here ... */
            MSASetSeqDescription(msa, key, this_sqinfo->desc);
        }
        /* FIXME extend this by copying more stuff according to flags.
         * See MSAFileRead() in msa.c and used functions there
         *
         * Problem is that we never parse MSA information as we use squid'sSeqFile
         */

        msa->nseq++;

    } /* 0 <= i < mseq->nseqs */


    /* FIXME Would like to, but can't use MSAVerifyParse(msa) here, as it
     * will die on error. Need to implement our own version
     */
#if 0
    MSAVerifyParse(msa);
#endif

    /* The below is copy of MSAFileWrite() which originally only writes to stdout.
     */

    /* Be sloppy and make a2m and fasta the same. same for vienna (which is
       the same). same same. can can. boleh boleh */
    if (outfmt==SQFILE_FASTA)
        outfmt = MSAFILE_A2M;
    if (outfmt==SQFILE_VIENNA)
        outfmt = MSAFILE_VIENNA;

    switch (outfmt) {
    case MSAFILE_A2M:
        /*WriteA2M(pfOut, msa, 0);*/
        WriteA2M(pfOut, msa, iWrap);
        break;
    case MSAFILE_VIENNA:
        /*WriteA2M(pfOut, msa, 1);*/
        WriteA2M(pfOut, msa, INT_MAX);
        break;
    case MSAFILE_CLUSTAL:
        WriteClustal(pfOut, msa, iWrap, TRUE==bResno ? 1 : 0, mseq->seqtype);
        break;
    case MSAFILE_MSF:
        WriteMSF(pfOut, msa);
        break;
    case MSAFILE_PHYLIP:
        WritePhylip(pfOut, msa);
        break;
    case MSAFILE_SELEX:
        WriteSELEX(pfOut, msa);
        break;
    case MSAFILE_STOCKHOLM:
        WriteStockholm(pfOut, msa);
        break;
    default:
        Log(&rLog, LOG_FATAL, "internal error: %s",
            "invalid output format should have been detected before");
    }

    if (use_stdout == FALSE) {
        (void) fclose(pfOut);
        Log(&rLog, LOG_INFO,
            "Alignment written to %s", pcAlnOutfile);
    }
    MSAFree(msa);

    return 0;
}