Ejemplo n.º 1
0
/**
 * @brief Sort sequences by length
 *
 * @param[out] prMSeq
 * mseq to sort by length
 * @param[out] cOrder
 * Sorting order. 'd' for descending, 'a' for ascending.
 *
 *
 */
void
SortMSeqByLength(mseq_t *prMSeq, const char cOrder)
{
    int *piSeqLen;
    int *piOrder;
    int iSeqIndex;
    mseq_t *prMSeqCopy = NULL;

    assert('a'==cOrder || 'd'==cOrder);

    Log(&rLog, LOG_WARN,
        "FIXME: This modifies sequence ordering. Might not be what user wants. Will change output order as well");

    piSeqLen = (int *) CKMALLOC(prMSeq->nseqs * sizeof(int));
    piOrder = (int *) CKMALLOC(prMSeq->nseqs * sizeof(int));
    for (iSeqIndex=0; iSeqIndex<prMSeq->nseqs; iSeqIndex++) {
        piSeqLen[iSeqIndex] = prMSeq->sqinfo[iSeqIndex].len;
    }
    QSortAndTrackIndex(piOrder, piSeqLen, prMSeq->nseqs, cOrder, FALSE);

    CopyMSeq(&prMSeqCopy, prMSeq);
    for (iSeqIndex=0; iSeqIndex<prMSeq->nseqs; iSeqIndex++) {
        /* copy mseq entry
         */
        CKFREE(prMSeq->seq[iSeqIndex]);
        prMSeq->seq[iSeqIndex] = CkStrdup(prMSeqCopy->seq[piOrder[iSeqIndex]]);

        CKFREE(prMSeq->orig_seq[iSeqIndex]);
        prMSeq->orig_seq[iSeqIndex] = CkStrdup(prMSeqCopy->orig_seq[piOrder[iSeqIndex]]);

        SeqinfoCopy(&prMSeq->sqinfo[iSeqIndex], &prMSeqCopy->sqinfo[piOrder[iSeqIndex]]);
    }

    CKFREE(piSeqLen);
    CKFREE(piOrder);
    FreeMSeq(&prMSeqCopy);

    return;
}
Ejemplo n.º 2
0
/**
 *
 * @brief the 'real' main function
 *
 */
int
MyMain(int argc, char **argv)
{
    mseq_t *prMSeq = NULL;
    mseq_t *prMSeqProfile1 = NULL;
    mseq_t *prMSeqProfile2 = NULL;
    cmdline_opts_t cmdline_opts;

    /* Must happen first: setup logger */
    LogDefaultSetup(&rLog);

    /*Log(&rLog, LOG_WARN, "This is a non-public realase of %s. Please do not distribute.", PACKAGE_NAME);*/
    /*Log(&rLog, LOG_WARN, "This is a beta version of %s, for protein only.", PACKAGE_NAME);*/ /* FS, r237 -> 238 */

    SetDefaultUserOpts(&(cmdline_opts));

    ParseCommandLine(&cmdline_opts, argc, argv);
    
    if (NULL != cmdline_opts.pcLogFile) {
        prLogFile = fopen(cmdline_opts.pcLogFile, "w");
        LogSetFP(&rLog, LOG_INFO, prLogFile);
        LogSetFP(&rLog, LOG_VERBOSE, prLogFile);
        LogSetFP(&rLog, LOG_DEBUG, prLogFile);
    }

    InitClustalOmega(cmdline_opts.iThreads);

    if (rLog.iLogLevelEnabled < LOG_INFO) {
        PrintUserOpts(LogGetFP(&rLog, LOG_INFO), & cmdline_opts);
        PrintAlnOpts(LogGetFP(&rLog, LOG_INFO), & (cmdline_opts.aln_opts));
    }

    /* Read sequence file
     *
     */
    if (NULL != cmdline_opts.pcSeqInfile) {
        NewMSeq(&prMSeq);
        if (ReadSequences(prMSeq, cmdline_opts.pcSeqInfile,
                          cmdline_opts.iSeqType, cmdline_opts.iSeqInFormat,
                          cmdline_opts.iMaxNumSeq, cmdline_opts.iMaxSeqLen)) {
            Log(&rLog, LOG_FATAL, "Reading sequence file '%s' failed", cmdline_opts.pcSeqInfile);
        }
#if TRACE
        {
            int iAux;
            for (iAux=0; iAux<prMSeq->nseqs; iAux++) {
                Log(&rLog, LOG_FORCED_DEBUG, "seq no %d: seq = %s", iAux, prMSeq->seq[iAux]);
                LogSqInfo(&prMSeq->sqinfo[iAux]);
            }
        }
#endif
    }
    /* k-tuple pairwise distance calculation seg-faults if 
     * only one sequence, simply exit early.
     * note that for profile/profile alignment prMSeq is NULL 
     * FS, r222->r223 */
    if (prMSeq && (prMSeq->nseqs <= 1)){
        Log(&rLog, LOG_FATAL, "File '%s' contains %d sequence%s, nothing to align",
              cmdline_opts.pcSeqInfile, prMSeq->nseqs, 1==prMSeq->nseqs?"":"s");
    }

    /* Dealign if requested and neccessary
     */
    if (NULL != prMSeq) {
        if (TRUE == prMSeq->aligned && cmdline_opts.bDealignInputSeqs) {
            Log(&rLog, LOG_INFO, "Dealigning already aligned input sequences as requested.");
            DealignMSeq(prMSeq);
        }
    }


    /* Read profile1
     *
     */
    if (NULL != cmdline_opts.pcProfile1Infile) {
        NewMSeq(&prMSeqProfile1);
        if (ReadSequences(prMSeqProfile1, cmdline_opts.pcProfile1Infile,
                          cmdline_opts.iSeqType,  cmdline_opts.iSeqInFormat,
                          cmdline_opts.iMaxNumSeq, cmdline_opts.iMaxSeqLen)) {
            Log(&rLog, LOG_FATAL, "Reading sequences from profile file '%s' failed",
                  cmdline_opts.pcProfile1Infile);
        }
        /* FIXME: commented out. FS, r240 -> r241  
         * for explanation see below */
        /*if (1==prMSeqProfile1->nseqs) {
          Log(&rLog, LOG_FATAL, "'%s' contains only one sequence and can therefore not be used as a profile",
          cmdline_opts.pcProfile1Infile);
          }*/
        if (FALSE == prMSeqProfile1->aligned) {
            Log(&rLog, LOG_FATAL, "Sequences in '%s' are not aligned, i.e. this is not a profile",
                  cmdline_opts.pcProfile1Infile);
        }
    }

    

    /* Read profile2
     *
     */
    if (NULL != cmdline_opts.pcProfile2Infile) {
        NewMSeq(&prMSeqProfile2);
        if (ReadSequences(prMSeqProfile2, cmdline_opts.pcProfile2Infile,
                          cmdline_opts.iSeqType,  cmdline_opts.iSeqInFormat,
                          cmdline_opts.iMaxNumSeq, cmdline_opts.iMaxSeqLen)) {
            Log(&rLog, LOG_FATAL, "Reading sequences from profile file '%s' failed",
                  cmdline_opts.pcProfile2Infile);
        }
        /* FIXME: there is no (clean) way to align a single sequence to a profile. 
         * if we go down the -i route, it causes a seg-fault in the pair-wise 
         * k-tuple distance calculation. However, single sequences can be 
         * understood as 1-profiles. Therefore we have to allow for 1-profiles.
         * FS, r240 -> r241 
         */
        /*if (1==prMSeqProfile2->nseqs) {
          Log(&rLog, LOG_FATAL, "'%s' contains only one sequence and can therefore not be used as a profile",
          cmdline_opts.pcProfile2Infile);
          }*/
        if (FALSE == prMSeqProfile1->aligned) {
            Log(&rLog, LOG_FATAL, "Sequences in '%s' are not aligned, i.e. this is not a profile",
                  cmdline_opts.pcProfile2Infile);
        }
    }


    /* Depending on the input we got perform
     *
     * (i) normal alignment: seq + optional profile
     * or
     * (ii) profile profile alignment
     *
     */
    if (NULL != prMSeq) {
        if (Align(prMSeq, prMSeqProfile1, & cmdline_opts.aln_opts)) {
            Log(&rLog, LOG_FATAL, "An error occured during the alignment");
        }

        if (WriteAlignment(prMSeq, cmdline_opts.pcAlnOutfile, 
                           cmdline_opts.iAlnOutFormat)) {
            Log(&rLog, LOG_FATAL, "Could not save alignment to %s", cmdline_opts.pcAlnOutfile);
        }
#if 0
        {
            bool bSampling = FALSE; /* better set to TRUE for many sequences */
            bool bReportAll = TRUE;
            AliStat(prMSeq, bSampling, bReportAll);
        }
#endif
        

    } else if (NULL != prMSeqProfile1 && NULL != prMSeqProfile2) {
        if (AlignProfiles(prMSeqProfile1, prMSeqProfile2, 
                          cmdline_opts.aln_opts.rHhalignPara)) {
            Log(&rLog, LOG_FATAL, "An error occured during the alignment");
        }
        if (WriteAlignment(prMSeqProfile1, cmdline_opts.pcAlnOutfile, 
                           cmdline_opts.iAlnOutFormat)) {
            Log(&rLog, LOG_FATAL, "Could not save alignment to %s", cmdline_opts.pcAlnOutfile);
        }
    }


    /* cleanup
     */
    if (NULL != prMSeq) {
        FreeMSeq(&prMSeq);
    }
    if (NULL != prMSeqProfile1) {
        FreeMSeq(&prMSeqProfile1);
    }
    if (NULL != prMSeqProfile2) {
        FreeMSeq(&prMSeqProfile2);
    }

    FreeUserOpts(&cmdline_opts);

    Log(&rLog, LOG_DEBUG, "Successful program exit");

    if (NULL != cmdline_opts.pcLogFile) {
        fclose(prLogFile);
    }
    return EXIT_SUCCESS;
}