Ejemplo n.º 1
0
/**
 * @brief copies an mseq structure
 *
 * @param[out] prMSeqDest_p
 * Copy of mseq structure
 * @param[in]  prMSeqSrc
 * Source mseq structure to copy
 *
 * @note caller has to free copy by calling FreeMSeq()
 *
 */
void
CopyMSeq(mseq_t **prMSeqDest_p, mseq_t *prMSeqSrc)
{
    int i;
    assert(prMSeqSrc != NULL && prMSeqDest_p != NULL);

    NewMSeq(prMSeqDest_p);

    (*prMSeqDest_p)->nseqs = prMSeqSrc->nseqs;
    (*prMSeqDest_p)->seqtype = prMSeqSrc->seqtype;
    if (prMSeqSrc->filename!=NULL) {
        (*prMSeqDest_p)->filename = CkStrdup(prMSeqSrc->filename);
    }

    (*prMSeqDest_p)->seq =  (char **)
                            CKMALLOC((*prMSeqDest_p)->nseqs * sizeof(char *));
    (*prMSeqDest_p)->orig_seq =  (char **)
                                 CKMALLOC((*prMSeqDest_p)->nseqs * sizeof(char *));
    (*prMSeqDest_p)->sqinfo =  (SQINFO *)
                               CKMALLOC((*prMSeqDest_p)->nseqs * sizeof(SQINFO));



    for (i=0; i<(*prMSeqDest_p)->nseqs; i++) {
        (*prMSeqDest_p)->seq[i] = CkStrdup(prMSeqSrc->seq[i]);
        (*prMSeqDest_p)->orig_seq[i] = CkStrdup(prMSeqSrc->orig_seq[i]);
        SeqinfoCopy(&(*prMSeqDest_p)->sqinfo[i], &prMSeqSrc->sqinfo[i]);
    }
}
Ejemplo n.º 2
0
Archivo: seq.c Proyecto: rforge/phyexe
/**
 * @brief Appends an mseq structure to an already existing one.
 * filename will be left untouched.
 *
 * @param[in] prMSeqDest_p
 * MSeq structure to which to append to
 * @param[out] prMSeqToAdd
 * MSeq structure which is to append
 *
 * 
 */    
void
JoinMSeqs(mseq_t **prMSeqDest_p, mseq_t *prMSeqToAdd)
{
    int iSrcSeqIndex;
    int iNewNSeq;
    
    assert(NULL != prMSeqDest_p && NULL != (*prMSeqDest_p));
    assert(NULL != prMSeqToAdd);
    
    if (0 == prMSeqToAdd->nseqs) {
        Log(&rLog, LOG_WARN, "Was asked to add 0 sequences");
        return;
    }
    
    /* warn on seqtype mismatch and keep original seqtype */
    if ((*prMSeqDest_p)->seqtype != prMSeqToAdd->seqtype) {
        Log(&rLog, LOG_WARN, "Joining sequences of different type");
    }
    
    /* leave filename as it is */

    /*
     * copy new seq/s, orig_seq/s, sqinfo/s
     */
    iNewNSeq = (*prMSeqDest_p)->nseqs + prMSeqToAdd->nseqs;
    
    (*prMSeqDest_p)->seq =  (char **)
        CKREALLOC((*prMSeqDest_p)->seq, iNewNSeq * sizeof(char *));
    
    (*prMSeqDest_p)->orig_seq =  (char **)
        CKREALLOC((*prMSeqDest_p)->orig_seq, iNewNSeq * sizeof(char *));
    
    (*prMSeqDest_p)->sqinfo =  (SQINFO *)
        CKREALLOC((*prMSeqDest_p)->sqinfo, iNewNSeq * sizeof(SQINFO));
    
    
    for (iSrcSeqIndex=0; iSrcSeqIndex < prMSeqToAdd->nseqs; iSrcSeqIndex++) {
        int iDstSeqIndex = (*prMSeqDest_p)->nseqs++;
        
        (*prMSeqDest_p)->seq[iDstSeqIndex] =
            CkStrdup(prMSeqToAdd->seq[iSrcSeqIndex]);
        
        (*prMSeqDest_p)->orig_seq[iDstSeqIndex] =
            CkStrdup(prMSeqToAdd->orig_seq[iSrcSeqIndex]);
        
        SeqinfoCopy(&(*prMSeqDest_p)->sqinfo[iDstSeqIndex],
                    & prMSeqToAdd->sqinfo[iSrcSeqIndex]);
    }

    (*prMSeqDest_p)->nseqs = iNewNSeq;
    
    (*prMSeqDest_p)->aligned = SeqsAreAligned(*prMSeqDest_p);
    
    return; 
}
Ejemplo n.º 3
0
/**
 * @brief Creates a new sequence entry and appends it to an existing mseq
 * structure.
 *
 * @param[out] prMSeqDest_p
 * Already existing and initialised mseq structure
 * @param[in] pcSeqName
 * sequence name of the sequence to add
 * @param[in] pcSeqRes
 * the actual sequence (residues) to add
 *
 * @note Don't forget to update the align and type flag if necessary!
 *
 * FIXME allow adding of more features
 *
 */
void
AddSeq(mseq_t **prMSeqDest_p, char *pcSeqName, char *pcSeqRes)
{
    int iSeqIdx = 0;
    SQINFO sqinfo;

    assert(NULL != prMSeqDest_p);
    assert(NULL != pcSeqName);
    assert(NULL != pcSeqRes);

    iSeqIdx = (*prMSeqDest_p)->nseqs;

    (*prMSeqDest_p)->seq =  (char **)
                            CKREALLOC((*prMSeqDest_p)->seq, (iSeqIdx+1) * sizeof(char *));
    (*prMSeqDest_p)->orig_seq =  (char **)
                                 CKREALLOC((*prMSeqDest_p)->orig_seq, (iSeqIdx+1) * sizeof(char *));
    (*prMSeqDest_p)->sqinfo =  (SQINFO *)
                               CKREALLOC((*prMSeqDest_p)->sqinfo, (iSeqIdx+1) * sizeof(SQINFO));


    (*prMSeqDest_p)->seq[iSeqIdx] = CkStrdup(pcSeqRes);
    (*prMSeqDest_p)->orig_seq[iSeqIdx] = CkStrdup(pcSeqRes);

    /* should probably get ri of SqInfo altogether in the long run and just
       transfer the intersting members into our own struct
     */
    sqinfo.flags = 0; /* init */

    sqinfo.len = strlen(pcSeqRes);
    sqinfo.flags |= SQINFO_LEN;

    /* name is an array of SQINFO_NAMELEN length */
    strncpy(sqinfo.name, pcSeqName, SQINFO_NAMELEN-1);
    sqinfo.name[SQINFO_NAMELEN-1] = '\0';
    sqinfo.flags |= SQINFO_NAME;

    SeqinfoCopy(&(*prMSeqDest_p)->sqinfo[iSeqIdx],
                & sqinfo);

    (*prMSeqDest_p)->nseqs++;

    return;
}
Ejemplo n.º 4
0
/**
 * @brief Sort sequences by length
 *
 * @param[out] prMSeq
 * mseq to sort by length
 * @param[out] cOrder
 * Sorting order. 'd' for descending, 'a' for ascending.
 *
 *
 */
void
SortMSeqByLength(mseq_t *prMSeq, const char cOrder)
{
    int *piSeqLen;
    int *piOrder;
    int iSeqIndex;
    mseq_t *prMSeqCopy = NULL;

    assert('a'==cOrder || 'd'==cOrder);

    Log(&rLog, LOG_WARN,
        "FIXME: This modifies sequence ordering. Might not be what user wants. Will change output order as well");

    piSeqLen = (int *) CKMALLOC(prMSeq->nseqs * sizeof(int));
    piOrder = (int *) CKMALLOC(prMSeq->nseqs * sizeof(int));
    for (iSeqIndex=0; iSeqIndex<prMSeq->nseqs; iSeqIndex++) {
        piSeqLen[iSeqIndex] = prMSeq->sqinfo[iSeqIndex].len;
    }
    QSortAndTrackIndex(piOrder, piSeqLen, prMSeq->nseqs, cOrder, FALSE);

    CopyMSeq(&prMSeqCopy, prMSeq);
    for (iSeqIndex=0; iSeqIndex<prMSeq->nseqs; iSeqIndex++) {
        /* copy mseq entry
         */
        CKFREE(prMSeq->seq[iSeqIndex]);
        prMSeq->seq[iSeqIndex] = CkStrdup(prMSeqCopy->seq[piOrder[iSeqIndex]]);

        CKFREE(prMSeq->orig_seq[iSeqIndex]);
        prMSeq->orig_seq[iSeqIndex] = CkStrdup(prMSeqCopy->orig_seq[piOrder[iSeqIndex]]);

        SeqinfoCopy(&prMSeq->sqinfo[iSeqIndex], &prMSeqCopy->sqinfo[piOrder[iSeqIndex]]);
    }

    CKFREE(piSeqLen);
    CKFREE(piOrder);
    FreeMSeq(&prMSeqCopy);

    return;
}
Ejemplo n.º 5
0
/**
 * @brief reads sequences from file
 *
 * @param[out] prMSeq
 * Multiple sequence struct. Must be preallocated.
 * FIXME: would make more sense to allocate it here.
 * @param[in] seqfile
 * Sequence file name. If '-' sequence will be read from stdin.
 * @param[in] iSeqType
 * int-encoded sequence type. Set to
 * SEQTYPE_UNKNOWN for autodetect (guessed from first sequence)
 * @param[in] iMaxNumSeq
 * Return an error, if more than iMaxNumSeq have been read
 * @param[in] iMaxSeqLen
 * Return an error, if a seq longer than iMaxSeqLen has been read
 *
 * @return 0 on success, -1 on error
 *
 * @note
 *  - Depends heavily on squid
 *  - Sequence file format will be guessed
 *  - If supported by squid, gzipped files can be read as well.
 */
int
ReadSequences(mseq_t *prMSeq, char *seqfile,
              int iSeqType, int iSeqFmt, bool bIsProfile, bool bDealignInputSeqs,
              int iMaxNumSeq, int iMaxSeqLen)
{
    SQFILE *dbfp; /* sequence file descriptor */
    char *cur_seq;
    SQINFO cur_sqinfo;
    int iSeqIdx; /* sequence counter */
    int iSeqPos; /* sequence string position counter */

    assert(NULL!=seqfile);


    /* Try to work around inability to autodetect from a pipe or .gz:
     * assume FASTA format
     */
    if (SQFILE_UNKNOWN == iSeqFmt  &&
            (Strparse("^.*\\.gz$", seqfile, 0) || strcmp(seqfile, "-") == 0)) {
        iSeqFmt = SQFILE_FASTA;
    }

    /* Using squid routines to read input. taken from seqstat_main.c. we don't
     * know if input is aligned, so we use SeqfileOpen instead of MSAFileOpen
     * etc. NOTE this also means we discard some information, e.g. when
     * reading from and writing to a stockholm file, all extra MSA
     * info/annotation will be lost.
     *
     */

    if (NULL == (dbfp = SeqfileOpen(seqfile, iSeqFmt, NULL))) {
        Log(&rLog, LOG_ERROR, "Failed to open sequence file %s for reading", seqfile);
        return -1;
    }


    /* FIXME squid's ReadSeq() will exit with fatal error if format is
     * unknown. This will be a problem for a GUI. Same is true for many squid
     * other functions.
     *
     * The original squid:ReadSeq() dealigns sequences on input. We
     * use a patched version.
     *
     */
    while (ReadSeq(dbfp, dbfp->format,
                   &cur_seq,
                   &cur_sqinfo)) {

        if (prMSeq->nseqs+1>iMaxNumSeq) {
            Log(&rLog, LOG_ERROR, "Maximum number of sequences (=%d) exceeded after reading sequence '%s' from '%s'",
                iMaxNumSeq, cur_sqinfo.name, seqfile);
            return -1;
        }
        if ((int)strlen(cur_seq)>iMaxSeqLen) {
            Log(&rLog, LOG_ERROR, "Sequence '%s' has %d residues and is therefore longer than allowed (max. sequence length is %d)",
                cur_sqinfo.name, strlen(cur_seq), iMaxSeqLen);
            return -1;
        }
        if ((int)strlen(cur_seq)==0) {
            Log(&rLog, LOG_ERROR, "Sequence '%s' has 0 residues",
                cur_sqinfo.name);
            return -1;
        }

        /* FIXME: use modified version of AddSeq() that allows handing down SqInfo
         */

        prMSeq->seq =  (char **)
                       CKREALLOC(prMSeq->seq, (prMSeq->nseqs+1) * sizeof(char *));
        prMSeq->seq[prMSeq->nseqs] = CkStrdup(cur_seq);


        prMSeq->sqinfo =  (SQINFO *)
                          CKREALLOC(prMSeq->sqinfo, (prMSeq->nseqs+1) * sizeof(SQINFO));
        SeqinfoCopy(&prMSeq->sqinfo[prMSeq->nseqs], &cur_sqinfo);

#ifdef TRACE
        Log(&rLog, LOG_FORCED_DEBUG, "seq no %d: seq = %s", prMSeq->nseqs, prMSeq->seq[prMSeq->nseqs]);
        LogSqInfo(&prMSeq->sqinfo[prMSeq->nseqs]);
#endif
        /* always guess type from first seq. use squid function and
         * convert value
         */
        if (0 == prMSeq->nseqs) {
            int type = Seqtype(prMSeq->seq[prMSeq->nseqs]);
            switch (type)  {
            case kDNA:
                prMSeq->seqtype = SEQTYPE_DNA;
                break;
            case kRNA:
                prMSeq->seqtype = SEQTYPE_RNA;
                break;
            case kAmino:
                prMSeq->seqtype = SEQTYPE_PROTEIN;
                break;
            case kOtherSeq:
                prMSeq->seqtype = SEQTYPE_UNKNOWN;
                break;
            default:
                Log(&rLog, LOG_FATAL, "Internal error in %s", __FUNCTION__);
            }

            /* override with given sequence type but check with
             * automatically detected type and warn if necessary
             */
            if (SEQTYPE_UNKNOWN != iSeqType) {
                if (prMSeq->seqtype != iSeqType) {
                    Log(&rLog, LOG_WARN, "Overriding automatically determined seq-type %s to %s as requested",
                        SeqTypeToStr(prMSeq->seqtype), SeqTypeToStr(iSeqType));
                    prMSeq->seqtype = iSeqType;
                }
            }
            /* if type could not be determined and was not set return error */
            if (SEQTYPE_UNKNOWN == iSeqType && SEQTYPE_UNKNOWN == prMSeq->seqtype) {
                Log(&rLog, LOG_ERROR, "Couldn't guess sequence type from first sequence");
                FreeSequence(cur_seq, &cur_sqinfo);
                SeqfileClose(dbfp);
                return -1;
            }
        }

        Log(&rLog, LOG_DEBUG, "seq-no %d: type=%s name=%s len=%d seq=%s",
            prMSeq->nseqs, SeqTypeToStr(prMSeq->seqtype),
            prMSeq->sqinfo[prMSeq->nseqs].name, prMSeq->sqinfo[prMSeq->nseqs].len,
            prMSeq->seq[prMSeq->nseqs]);

        /* FIXME IPUAC and/or case conversion? If yes see
         * corresponding squid functions. Special treatment of
         * Stockholm tilde-gaps for ktuple code?
         */

        prMSeq->nseqs++;

        FreeSequence(cur_seq, &cur_sqinfo);
    }
    SeqfileClose(dbfp);

    /*#if ALLOW_ONLY_PROTEIN
        if (SEQTYPE_PROTEIN != prMSeq->seqtype) {
            Log(&rLog, LOG_FATAL, "Sequence type is %s. %s only works on protein.",
                  SeqTypeToStr(prMSeq->seqtype), PACKAGE_NAME);
        }
    #endif*/

    /* Check if sequences are aligned */
    prMSeq->aligned = SeqsAreAligned(prMSeq, bIsProfile, bDealignInputSeqs);


    /* keep original sequence as copy and convert "working" sequence
     *
     */
    prMSeq->orig_seq = (char**) CKMALLOC(prMSeq->nseqs * sizeof(char *));
    for (iSeqIdx=0; iSeqIdx<prMSeq->nseqs; iSeqIdx++) {

        prMSeq->orig_seq[iSeqIdx] = CkStrdup(prMSeq->seq[iSeqIdx]);


        /* convert unknown characters according to set seqtype
         * be conservative, i.e. don't allow any fancy ambiguity
         * characters to make sure that ktuple code etc. works.
         */

        /* first on the fly conversion between DNA and RNA
         */
        if (prMSeq->seqtype==SEQTYPE_DNA)
            ToDNA(prMSeq->seq[iSeqIdx]);
        if (prMSeq->seqtype==SEQTYPE_RNA)
            ToRNA(prMSeq->seq[iSeqIdx]);

        /* then check of each character
         */
        for (iSeqPos=0; iSeqPos<(int)strlen(prMSeq->seq[iSeqIdx]); iSeqPos++) {
            char *res = &(prMSeq->seq[iSeqIdx][iSeqPos]);
            if (isgap(*res))
                continue;

            if (prMSeq->seqtype==SEQTYPE_PROTEIN) {
                if (NULL == strchr(AMINO_ALPHABET, toupper(*res))) {
                    *res = AMINOACID_ANY;
                }
            } else if (prMSeq->seqtype==SEQTYPE_DNA) {
                if (NULL == strchr(DNA_ALPHABET, toupper(*res))) {
                    *res = NUCLEOTIDE_ANY;
                }
            } else if (prMSeq->seqtype==SEQTYPE_RNA) {
                if (NULL == strchr(RNA_ALPHABET, toupper(*res))) {
                    *res = NUCLEOTIDE_ANY;
                }
            }
        }
    }

    /* order in which sequences appear in guide-tree
     * only allocate if different output-order desired */
    prMSeq->tree_order = NULL;

    prMSeq->filename = CkStrdup(seqfile);
    Log(&rLog, LOG_INFO, "Read %d sequences (type: %s) from %s",
        prMSeq->nseqs, SeqTypeToStr(prMSeq->seqtype), prMSeq->filename);

    return 0;
}
Ejemplo n.º 6
0
/**
 * @brief Appends an mseq structure to an already existing one.
 * filename will be left untouched.
 *
 * @param[in] prMSeqDest_p
 * MSeq structure to which to append to
 * @param[out] prMSeqToAdd
 * MSeq structure which is to append
 *
 */
void
JoinMSeqs(mseq_t **prMSeqDest_p, mseq_t *prMSeqToAdd)
{
    int iSrcSeqIndex;
    int iNewNSeq;

    assert(NULL != prMSeqDest_p && NULL != (*prMSeqDest_p));
    assert(NULL != prMSeqToAdd);

    if (0 == prMSeqToAdd->nseqs) {
        Log(&rLog, LOG_WARN, "Was asked to add 0 sequences");
        return;
    }

    /* warn on seqtype mismatch and keep original seqtype */
    if ((*prMSeqDest_p)->seqtype != prMSeqToAdd->seqtype) {
        Log(&rLog, LOG_WARN, "Joining sequences of different type");
    }

    /* leave filename as it is */

    /*
     * copy new seq/s, orig_seq/s, sqinfo/s
     */
    iNewNSeq = (*prMSeqDest_p)->nseqs + prMSeqToAdd->nseqs;

    (*prMSeqDest_p)->seq =  (char **)
                            CKREALLOC((*prMSeqDest_p)->seq, iNewNSeq * sizeof(char *));

    (*prMSeqDest_p)->orig_seq =  (char **)
                                 CKREALLOC((*prMSeqDest_p)->orig_seq, iNewNSeq * sizeof(char *));

    (*prMSeqDest_p)->sqinfo =  (SQINFO *)
                               CKREALLOC((*prMSeqDest_p)->sqinfo, iNewNSeq * sizeof(SQINFO));


    for (iSrcSeqIndex=0; iSrcSeqIndex < prMSeqToAdd->nseqs; iSrcSeqIndex++) {
        int iDstSeqIndex = (*prMSeqDest_p)->nseqs++;

        (*prMSeqDest_p)->seq[iDstSeqIndex] =
            CkStrdup(prMSeqToAdd->seq[iSrcSeqIndex]);

        (*prMSeqDest_p)->orig_seq[iDstSeqIndex] =
            CkStrdup(prMSeqToAdd->orig_seq[iSrcSeqIndex]);

        SeqinfoCopy(&(*prMSeqDest_p)->sqinfo[iDstSeqIndex],
                    & prMSeqToAdd->sqinfo[iSrcSeqIndex]);
    }

    (*prMSeqDest_p)->nseqs = iNewNSeq;

#if 0
    /* 2nd arg is bIsProfile, which when set TRUE skips
     * the check for gaps. here always check for gaps,
     * so set FALSE (main reason is that it is easier), FS, r282 -> */
    /* had a problem at this stage, therefore dispense with gap check, FS, r290 -> */
    /* 3rd argument is dealignment flag, do not dealign profiles */
    (*prMSeqDest_p)->aligned = SeqsAreAligned(*prMSeqDest_p, TRUE/*FALSE*/, FALSE);
#else
    (*prMSeqDest_p)->aligned = TRUE;
#endif

    return;
}
Ejemplo n.º 7
0
/**
 * @brief Parse command line parameters. Will exit if help/usage etc
 * are called or or call Log(&rLog, LOG_FATAL, ) if an error was detected.
 *
 * @param[out] user_opts
 * User parameter struct, with defaults already set.
 * @param[in] argc
 * mains argc
 * @param[in] argv
 * mains argv
 * 
 */    
void
ParseCommandLine(cmdline_opts_t *user_opts, int argc, char **argv)
{

     /* argtable command line parsing:
     * see
     * http://argtable.sourceforge.net/doc/argtable2-intro.html
     *
     * basic structure is: arg_xxxN:
     * xxx can be int, lit, db1, str, rex, file or date
     * If N = 0, arguments may appear zero-or-once; N = 1 means
     * exactly once, N = n means up to maxcount times
     *
     *
     * @note: changes here, might also affect main.cpp:ConvertOldCmdLine()
     *
     */  
   
    struct arg_rem  *rem_seq_input  = arg_rem(NULL, "\nSequence Input:");
    struct arg_file *opt_seqin = arg_file0("i", "in,infile",
                                            "{<file>,-}",
                                            "Multiple sequence input file (- for stdin)");
    struct arg_file *opt_hmm_in = arg_filen(NULL, "hmm-in", "<file>",
                                            /*min*/ 0, /*max*/ 128,
                                            "HMM input files");
    struct arg_lit *opt_dealign = arg_lit0(NULL, "dealign",
                                           "Dealign input sequences");
    struct arg_file *opt_profile1 = arg_file0(NULL, "profile1,p1",
                                              "<file>",
                                              "Pre-aligned multiple sequence file (aligned columns will be kept fix)");
    struct arg_file *opt_profile2 = arg_file0(NULL, "profile2,p2",
                                              "<file>",
                                              "Pre-aligned multiple sequence file (aligned columns will be kept fix)");
    struct arg_str *opt_seqtype = arg_str0("t", "seqtype",
                                           "{Protein, RNA, DNA}",
                                           "Force a sequence type (default: auto)");
/*    struct arg_lit *opt_force_protein = arg_lit0(NULL, "protein",
                                         "Set sequence type to protein even if Clustal guessed nucleic acid"); */
    struct arg_str *opt_infmt = arg_str0(NULL, "infmt",
                                            "{a2m=fa[sta],clu[stal],msf,phy[lip],selex,st[ockholm],vie[nna]}",
                                            "Forced sequence input file format (default: auto)");

    
    struct arg_rem  *rem_guidetree  = arg_rem(NULL, "\nClustering:");
    struct arg_str *opt_pairdist = arg_str0("p", "pairdist",
                                             "{ktuple}",
                                             "Pairwise alignment distance measure");
    struct arg_file *opt_distmat_in = arg_file0(NULL, "distmat-in",
                                                "<file>",
                                                "Pairwise distance matrix input file (skips distance computation)");
    struct arg_file *opt_distmat_out = arg_file0(NULL, "distmat-out",
                                                 "<file>",
                                                 "Pairwise distance matrix output file");
    struct arg_file *opt_guidetree_in = arg_file0(NULL, "guidetree-in",
                                                  "<file>",
                                                  "Guide tree input file (skips distance computation and guide-tree clustering step)");
    struct arg_file *opt_guidetree_out = arg_file0(NULL, "guidetree-out",
                                                   "<file>",
                                                   "Guide tree output file");
    /* AW: mbed is default since at least R253
       struct arg_lit *opt_mbed = arg_lit0(NULL, "mbed",
       "Fast, Mbed-like clustering for guide-tree calculation");
       struct arg_lit *opt_mbed_iter = arg_lit0(NULL, "mbed-iter",
       "Use Mbed-like clustering also during iteration");
    */
    /* Note: might be better to use arg_str (mbed=YES/NO) but I don't want to introduce an '=' into pipeline, FS, r250 -> */
    struct arg_lit *opt_full = arg_lit0(NULL, "full",
                                        "Use full distance matrix for guide-tree calculation (might be slow; mBed is default)");
    struct arg_lit *opt_full_iter = arg_lit0(NULL, "full-iter",
                                        "Use full distance matrix for guide-tree calculation during iteration (might be slowish; mBed is default)");

    struct arg_str *opt_clustering = arg_str0("c", "clustering",
                                              "{UPGMA}",
                                              "Clustering method for guide tree");

    
    struct arg_rem *rem_aln_output  = arg_rem(NULL, "\nAlignment Output:");
    struct arg_file *opt_outfile = arg_file0("o", "out,outfile",
                                             "{file,-}",
                                             "Multiple sequence alignment output file (default: stdout)");
    struct arg_str *opt_outfmt = arg_str0(NULL, "outfmt",
                                            "{a2m=fa[sta],clu[stal],msf,phy[lip],selex,st[ockholm],vie[nna]}",
                                            "MSA output file format (default: fasta)");

    
    struct arg_rem *rem_iteration  = arg_rem(NULL, "\nIteration:");
    struct arg_str *opt_num_iterations = arg_str0(NULL, "iterations,iter",
                                                  /* FIXME "{<n>,auto}", "Number of combined guide-tree/HMM iterations"); */
                                                  "<n>", "Number of (combined guide-tree/HMM) iterations");
    struct arg_int *opt_max_guidetree_iterations = arg_int0(NULL, "max-guidetree-iterations",
                                                            "<n>", "Maximum number guidetree iterations");
    struct arg_int *opt_max_hmm_iterations = arg_int0(NULL, "max-hmm-iterations",
                                                      "<n>", "Maximum number of HMM iterations");

   
    struct arg_rem *rem_limits  = arg_rem(NULL, "\nLimits (will exit early, if exceeded):");
    struct arg_int *opt_max_seq = arg_int0(NULL, "maxnumseq", "<n>",
                                           "Maximum allowed number of sequences");
    struct arg_int *opt_max_seqlen = arg_int0(NULL, "maxseqlen", "<l>", 
                                              "Maximum allowed sequence length");


    struct arg_rem *rem_misc  = arg_rem(NULL, "\nMiscellaneous:");

    struct arg_lit *opt_autooptions = arg_lit0(NULL, "auto",
                                         "Set options automatically (might overwrite some of your options)");
    struct arg_int *opt_threads = arg_int0(NULL, "threads", "<n>", 
                                              "Number of processors to use");
    struct arg_file *opt_logfile = arg_file0("l", "log",
                                             "<file>",
                                             "Log all non-essential output to this file");
    struct arg_lit *opt_help = arg_lit0("h", "help",
                                         "Print this help and exit");
    struct arg_lit *opt_version = arg_lit0(NULL, "version",
                                           "Print version information and exit");
    struct arg_lit *opt_long_version = arg_lit0(NULL, "long-version",
                                           "Print long version information and exit");
    struct arg_lit *opt_verbose = arg_litn("v", "verbose",
                                           0, 3,
                                           "Verbose output (increases if given multiple times)");
    struct arg_lit *opt_force = arg_lit0(NULL, "force",
                                         "Force file overwriting");
    struct arg_int *opt_macram = arg_int0(NULL, "MAC-RAM", "<n>", /* keep this quiet for the moment, FS r240 -> */
                                          NULL/*"maximum amount of RAM to use for MAC algorithm (in MB)"*/);


    struct arg_end *opt_end = arg_end(10); /* maximum number of errors
                                            * to store */

    void *argtable[] = {rem_seq_input,
                        opt_seqin,
                        opt_hmm_in,
                        opt_dealign,
                        opt_profile1,
                        opt_profile2,
                        opt_seqtype,
                        /* opt_force_protein, */
                        opt_infmt,
                        rem_guidetree,
#if 0
                        /* no other options then default available or not implemented */
                        opt_pairdist,
#endif
                        opt_distmat_in,
                        opt_distmat_out,
                        opt_guidetree_in,
                        opt_guidetree_out,
                        opt_full, /* FS, r250 -> */
                        opt_full_iter, /* FS, r250 -> */
#if 0
                        /* no other options then default available */
                        opt_clustering,
#endif
                        rem_aln_output,
                        opt_outfile,
                        opt_outfmt,

                        rem_iteration,
                        opt_num_iterations,
                        opt_max_guidetree_iterations,
                        opt_max_hmm_iterations,

                        rem_limits,
                        opt_max_seq,
                        opt_max_seqlen,

                        rem_misc,
                        opt_autooptions,
                        opt_threads,
                        opt_logfile,
                        opt_help,
                        opt_verbose,
                        opt_version,
                        opt_long_version,
                        opt_force,
                        opt_macram, /* FS, r240 -> r241 */

                        opt_end};
    int nerrors;


    /* Verify the argtable[] entries were allocated sucessfully
     */
    if (arg_nullcheck(argtable)) {
        Log(&rLog, LOG_FATAL, "insufficient memory (for argtable allocation)");
    }

    /* Parse the command line as defined by argtable[]
     */
    nerrors = arg_parse(argc, argv, argtable);

    /* Special case: '--help' takes precedence over error reporting
     */
    if (opt_help->count > 0) {
        printf("%s - %s (%s)\n", PACKAGE_NAME, PACKAGE_VERSION, PACKAGE_CODENAME);

        printf("\n");
        printf("If you like Clustal-Omega please cite:\n%s\n", CITATION);
        printf("If you don't like Clustal-Omega, please let us know why (and cite us anyway).\n");
        /* printf("You can contact reach us under %s\n", PACKAGE_BUGREPORT); */
        printf("\n");
        printf("Check http://www.clustal.org for more information and updates.\n");
            
        printf("\n");
        printf("Usage: %s", basename(argv[0]));
        arg_print_syntax(stdout,argtable, "\n");

        printf("\n");
        printf("A typical invocation would be: %s -i my-in-seqs.fa -o my-out-seqs.fa -v\n",
               basename(argv[0]));
        printf("See below for a list of all options.\n");

        arg_print_glossary(stdout, argtable, "  %-25s %s\n");
        arg_freetable(argtable, sizeof(argtable)/sizeof(argtable[0]));
        exit(EXIT_SUCCESS);
    }

    /* Special case: '--version' takes precedence over error reporting
     */
    if (opt_version->count > 0) {
        printf("%s\n", PACKAGE_VERSION);
        arg_freetable(argtable,sizeof(argtable)/sizeof(argtable[0]));
        exit(EXIT_SUCCESS);
    }

    /* Special case: '--long-version' takes precedence over error reporting
     */
    if (opt_long_version->count > 0) {
        char zcLongVersion[1024];
        PrintLongVersion(zcLongVersion, sizeof(zcLongVersion));
        printf("%s\n", zcLongVersion);
        arg_freetable(argtable,sizeof(argtable)/sizeof(argtable[0]));
        exit(EXIT_SUCCESS);
    }

    /* If the parser returned any errors then display them and exit
     */
    if (nerrors > 0) {
        /* Display the error details contained in the arg_end struct.*/
        arg_print_errors(stdout, opt_end, PACKAGE);
        fprintf(stderr, "For more information try: %s --help\n", argv[0]);
        arg_freetable(argtable,sizeof(argtable)/sizeof(argtable[0]));
        exit(EXIT_FAILURE);
    }

    
    /* ------------------------------------------------------------
     *
     * Command line successfully parsed. Now transfer values to
     * user_opts. While doing so, make sure that given input files
     * exist and given output files are writable do not exist, or if
     * they do, should be overwritten.
     *
     * No logic checks here! They are done in a different function
     *
     * ------------------------------------------------------------*/
        
    
    /* not part of user_opts because it declared in src/util.h */
    if (0 == opt_verbose->count) {
        rLog.iLogLevelEnabled = LOG_WARN;
    } else if (1 == opt_verbose->count) {
        rLog.iLogLevelEnabled = LOG_INFO;
    } else if (2 == opt_verbose->count) {
        rLog.iLogLevelEnabled = LOG_VERBOSE;
    } else if (3 == opt_verbose->count) {
        rLog.iLogLevelEnabled = LOG_DEBUG;
    }

    user_opts->aln_opts.bAutoOptions = opt_autooptions->count;

    user_opts->bDealignInputSeqs = opt_dealign->count;

    /* NOTE: full distance matrix used to be default - there was
       --mbed flag but no --full flag after r250 decided that mBed
       should be default - now need --full flag to turn off mBed.
       wanted to retain mBed Boolean, so simply added --full flag. if
       both flags set (erroneously) want --mbed to overwrite --full,
       therefore do --full 1st, the --mbed, FS, r250 */
    if (opt_full->count){
        user_opts->aln_opts.bUseMbed = FALSE;
    }

    if (opt_full_iter->count){
        user_opts->aln_opts.bUseMbedForIteration = FALSE;
    }

    user_opts->bForceFileOverwrite = opt_force->count;



    /* log-file
     */
    if (opt_logfile->count > 0) {
        user_opts->pcLogFile = CkStrdup(opt_logfile->filename[0]);
        
        /* warn if already exists or not writable */
        if (FileExists(user_opts->pcLogFile) && ! user_opts->bForceFileOverwrite) {
            Log(&rLog, LOG_FATAL, "%s '%s'. %s",
                  "Cowardly refusing to overwrite already existing file",
                  user_opts->pcLogFile,
                  "Use --force to force overwriting.");
        }
        if (! FileIsWritable(user_opts->pcLogFile)) {
            Log(&rLog, LOG_FATAL, "Sorry, I do not have permission to write to file '%s'.",
                  user_opts->pcLogFile);
        }
    }


    /* normal sequence input (no profile)
     */
    if (opt_seqin->count > 0) {
        user_opts->pcSeqInfile = CkStrdup(opt_seqin->filename[0]);
    }

    /* Input limitations
     */
    /* maximum number of sequences */
    if (opt_max_seq->count > 0) {
        user_opts->iMaxNumSeq = opt_max_seq->ival[0];
    }
    
    /* maximum sequence length */
    if (opt_max_seqlen->count > 0) {
        user_opts->iMaxSeqLen = opt_max_seqlen->ival[0];
    }
    
    /* Output format
     */
    if (opt_infmt->count > 0) {
        /* avoid gcc warning about discarded qualifier */
        char *tmp = (char *)opt_infmt->sval[0];
        user_opts->iSeqInFormat = String2SeqfileFormat(tmp);
    } else {
        user_opts->iSeqInFormat = SQFILE_UNKNOWN;
    }


    /* Sequence type
     */
    if (opt_seqtype->count > 0) {
        if (STR_NC_EQ(opt_seqtype->sval[0], "protein")) {
            user_opts->iSeqType = SEQTYPE_PROTEIN;
        } else if (STR_NC_EQ(opt_seqtype->sval[0], "rna")) {
            user_opts->iSeqType = SEQTYPE_RNA;
        } else if  (STR_NC_EQ(opt_seqtype->sval[0], "dna")) {
            user_opts->iSeqType = SEQTYPE_DNA;
        } else {
            Log(&rLog, LOG_FATAL, "Unknown sequence type '%s'", opt_seqtype->sval[0]);
        }
    }
/*    if (opt_force_protein->count > 0) {
        user_opts->iSeqType = SEQTYPE_PROTEIN;
    } */

    /* Profile input
     */
    if (opt_profile1->count > 0) {
        user_opts->pcProfile1Infile = CkStrdup(opt_profile1->filename[0]);
        if (! FileExists(user_opts->pcProfile1Infile)) {
            Log(&rLog, LOG_FATAL, "File '%s' does not exist.", user_opts->pcProfile1Infile);
        }
    }
    
    if (opt_profile2->count > 0) {
        user_opts->pcProfile2Infile = CkStrdup(opt_profile2->filename[0]);
        if (! FileExists(user_opts->pcProfile2Infile)) {
            Log(&rLog, LOG_FATAL, "File '%s' does not exist.", user_opts->pcProfile2Infile);
        }
    }
    
    
    /* HMM input
     */
    user_opts->aln_opts.iHMMInputFiles = 0;
    user_opts->aln_opts.ppcHMMInput = NULL;
    if (opt_hmm_in->count>0) {
        int iAux;
        user_opts->aln_opts.iHMMInputFiles = opt_hmm_in->count;
        user_opts->aln_opts.ppcHMMInput = (char **) CKMALLOC(
            user_opts->aln_opts.iHMMInputFiles * sizeof(char*));
        for (iAux=0; iAux<opt_hmm_in->count; iAux++) {
            user_opts->aln_opts.ppcHMMInput[iAux] = CkStrdup(opt_hmm_in->filename[iAux]);
            if (! FileExists(user_opts->aln_opts.ppcHMMInput[iAux])) {
                Log(&rLog, LOG_FATAL, "File '%s' does not exist.", user_opts->aln_opts.ppcHMMInput[iAux]);
            }
        }
    }


    /* Pair distance method
     */
    if (opt_pairdist->count > 0) {
        if (STR_NC_EQ(opt_pairdist->sval[0], "ktuple")) {
            user_opts->aln_opts.iPairDistType = PAIRDIST_KTUPLE;
        } else {
            Log(&rLog, LOG_FATAL, "Unknown pairdist method '%s'", opt_pairdist->sval[0]);
        }
    }


    /* Distance matrix input
     */
    if (opt_distmat_in->count > 0) {
        user_opts->aln_opts.pcDistmatInfile = CkStrdup(opt_distmat_in->filename[0]);
        if (! FileExists(user_opts->aln_opts.pcDistmatInfile)) {
            Log(&rLog, LOG_FATAL, "File '%s' does not exist.", user_opts->aln_opts.pcDistmatInfile);
        }
    }


    /* Distance matrix output
     */
    if (opt_distmat_out->count > 0) {
        user_opts->aln_opts.pcDistmatOutfile = CkStrdup(opt_distmat_out->filename[0]);
        
        /* warn if already exists or not writable */
        if (FileExists(user_opts->aln_opts.pcDistmatOutfile) && ! user_opts->bForceFileOverwrite) {
            Log(&rLog, LOG_FATAL, "%s '%s'. %s",
                  "Cowardly refusing to overwrite already existing file",
                  user_opts->aln_opts.pcDistmatOutfile,
                  "Use --force to force overwriting.");
        }
        if (! FileIsWritable(user_opts->aln_opts.pcDistmatOutfile)) {
            Log(&rLog, LOG_FATAL, "Sorry, I do not have permission to write to file '%s'.",
                user_opts->aln_opts.pcDistmatOutfile);
        }
    }

    /* Clustering
     *
     */
    if (opt_clustering->count > 0) {
        if (STR_NC_EQ(opt_clustering->sval[0], "upgma")) {
            user_opts->aln_opts.iClusteringType = CLUSTERING_UPGMA;
        } else {
            Log(&rLog, LOG_FATAL, "Unknown guide-tree clustering method '%s'", opt_clustering->sval[0]);
        }
    }

    
    /* Guidetree input
     */
    if (opt_guidetree_in->count > 0) {
        user_opts->aln_opts.pcGuidetreeInfile = CkStrdup(opt_guidetree_in->filename[0]);
        if (! FileExists(user_opts->aln_opts.pcGuidetreeInfile)) {
            Log(&rLog, LOG_FATAL, "File '%s' does not exist.", user_opts->aln_opts.pcGuidetreeInfile);
        }
    }
    
    
    /* Guidetree output
     */
    if (opt_guidetree_out->count > 0) {
        user_opts->aln_opts.pcGuidetreeOutfile = CkStrdup(opt_guidetree_out->filename[0]);
        
        /* warn if already exists or not writable */
        if (FileExists(user_opts->aln_opts.pcGuidetreeOutfile) && ! user_opts->bForceFileOverwrite) {
            Log(&rLog, LOG_FATAL, "%s '%s'. %s",
                  "Cowardly refusing to overwrite already existing file",
                  user_opts->aln_opts.pcGuidetreeOutfile,
                  "Use --force to force overwriting.");
        }
        if (! FileIsWritable(user_opts->aln_opts.pcGuidetreeOutfile)) {
            Log(&rLog, LOG_FATAL, "Sorry, I do not have permission to write to file '%s'.",
                  user_opts->aln_opts.pcGuidetreeOutfile);
        }
    }
    

    /* max guidetree iterations
     */
    if (opt_max_guidetree_iterations->count > 0) {
        user_opts->aln_opts.iMaxGuidetreeIterations = opt_max_guidetree_iterations->ival[0];
    }


    /* max guidetree iterations
     */
    if (opt_max_hmm_iterations->count > 0) {
        user_opts->aln_opts.iMaxHMMIterations = opt_max_hmm_iterations->ival[0];
    }

     /* number of iterations
      */
     if (opt_num_iterations->count > 0) {
        if (STR_NC_EQ(opt_num_iterations->sval[0], "auto")) {
            Log(&rLog, LOG_FATAL, "Automatic iteration not supported at the moment.");
            user_opts->aln_opts.bIterationsAuto = TRUE;

        } else {
            int iAux;
            user_opts->aln_opts.bIterationsAuto = FALSE;
            for (iAux=0; iAux<(int)strlen(opt_num_iterations->sval[0]); iAux++) {
                if (! isdigit(opt_num_iterations->sval[0][iAux])) {
                    Log(&rLog, LOG_FATAL, "Couldn't iteration parameter: %s",
                          opt_num_iterations->sval[0]);
                }
            }
            user_opts->aln_opts.iNumIterations = atoi(opt_num_iterations->sval[0]);
        }
    }

    
    /* Alignment output
     */
    if (opt_outfile->count > 0) {
        user_opts->pcAlnOutfile = CkStrdup(opt_outfile->filename[0]);

        /* warn if already exists or not writable */
        if (FileExists(user_opts->pcAlnOutfile) && ! user_opts->bForceFileOverwrite) {
            Log(&rLog, LOG_FATAL, "%s '%s'. %s",
                  "Cowardly refusing to overwrite already existing file",
                  user_opts->pcAlnOutfile,
                  "Use --force to force overwriting.");
        }
        if (! FileIsWritable(user_opts->pcAlnOutfile)) {
            Log(&rLog, LOG_FATAL, "Sorry, I do not have permission to write to file '%s'.",
                  user_opts->pcAlnOutfile);
        }
    }
    

    /* Output format
     */
    if (opt_outfmt->count > 0) {
        /* avoid gcc warning about discarded qualifier */
        char *tmp = (char *)opt_outfmt->sval[0];
        user_opts->iAlnOutFormat = String2SeqfileFormat(tmp);
        if (SQFILE_UNKNOWN == user_opts->iAlnOutFormat) {
            Log(&rLog, LOG_FATAL, "Unknown output format '%s'", opt_outfmt->sval[0]);
        }
    }

    /* Number of threads
     */
#ifdef HAVE_OPENMP
    if (opt_threads->count > 0) {
        if (opt_threads->ival[0] <= 0) {
            Log(&rLog, LOG_FATAL, "Changing number of threads to %d doesn't make sense.", 
                  opt_threads->ival[0]);    
        }
        user_opts->iThreads = opt_threads->ival[0];
    }

#else
    if (opt_threads->count > 0) {
        if (opt_threads->ival[0] > 1) {
            Log(&rLog, LOG_FATAL, "Cannot change number of threads to %d. %s was build without OpenMP support.", 
                  opt_threads->ival[0], PACKAGE_NAME);    
        }
    }
#endif


    /* max MAC RAM (maximum amount of RAM set aside for MAC algorithm)
     */
    if (opt_macram->count > 0) { /* FS, r240 -> r241 */
        user_opts->aln_opts.rHhalignPara.iMacRamMB = opt_macram->ival[0];
    }

    arg_freetable(argtable,sizeof(argtable)/sizeof(argtable[0]));

    UserOptsLogicCheck(user_opts);

    return; 
}