Beispiel #1
0
/**
 * @brief copies an mseq structure
 *
 * @param[out] prMSeqDest_p
 * Copy of mseq structure
 * @param[in]  prMSeqSrc
 * Source mseq structure to copy
 *
 * @note caller has to free copy by calling FreeMSeq()
 *
 */
void
CopyMSeq(mseq_t **prMSeqDest_p, mseq_t *prMSeqSrc)
{
    int i;
    assert(prMSeqSrc != NULL && prMSeqDest_p != NULL);

    NewMSeq(prMSeqDest_p);

    (*prMSeqDest_p)->nseqs = prMSeqSrc->nseqs;
    (*prMSeqDest_p)->seqtype = prMSeqSrc->seqtype;
    if (prMSeqSrc->filename!=NULL) {
        (*prMSeqDest_p)->filename = CkStrdup(prMSeqSrc->filename);
    }

    (*prMSeqDest_p)->seq =  (char **)
                            CKMALLOC((*prMSeqDest_p)->nseqs * sizeof(char *));
    (*prMSeqDest_p)->orig_seq =  (char **)
                                 CKMALLOC((*prMSeqDest_p)->nseqs * sizeof(char *));
    (*prMSeqDest_p)->sqinfo =  (SQINFO *)
                               CKMALLOC((*prMSeqDest_p)->nseqs * sizeof(SQINFO));



    for (i=0; i<(*prMSeqDest_p)->nseqs; i++) {
        (*prMSeqDest_p)->seq[i] = CkStrdup(prMSeqSrc->seq[i]);
        (*prMSeqDest_p)->orig_seq[i] = CkStrdup(prMSeqSrc->orig_seq[i]);
        SeqinfoCopy(&(*prMSeqDest_p)->sqinfo[i], &prMSeqSrc->sqinfo[i]);
    }
}
Beispiel #2
0
/**
 *
 * @brief the 'real' main function
 *
 */
int
MyMain(int argc, char **argv)
{
    mseq_t *prMSeq = NULL;
    mseq_t *prMSeqProfile1 = NULL;
    mseq_t *prMSeqProfile2 = NULL;
    cmdline_opts_t cmdline_opts;

    /* Must happen first: setup logger */
    LogDefaultSetup(&rLog);

    /*Log(&rLog, LOG_WARN, "This is a non-public realase of %s. Please do not distribute.", PACKAGE_NAME);*/
    /*Log(&rLog, LOG_WARN, "This is a beta version of %s, for protein only.", PACKAGE_NAME);*/ /* FS, r237 -> 238 */

    SetDefaultUserOpts(&(cmdline_opts));

    ParseCommandLine(&cmdline_opts, argc, argv);
    
    if (NULL != cmdline_opts.pcLogFile) {
        prLogFile = fopen(cmdline_opts.pcLogFile, "w");
        LogSetFP(&rLog, LOG_INFO, prLogFile);
        LogSetFP(&rLog, LOG_VERBOSE, prLogFile);
        LogSetFP(&rLog, LOG_DEBUG, prLogFile);
    }

    InitClustalOmega(cmdline_opts.iThreads);

    if (rLog.iLogLevelEnabled < LOG_INFO) {
        PrintUserOpts(LogGetFP(&rLog, LOG_INFO), & cmdline_opts);
        PrintAlnOpts(LogGetFP(&rLog, LOG_INFO), & (cmdline_opts.aln_opts));
    }

    /* Read sequence file
     *
     */
    if (NULL != cmdline_opts.pcSeqInfile) {
        NewMSeq(&prMSeq);
        if (ReadSequences(prMSeq, cmdline_opts.pcSeqInfile,
                          cmdline_opts.iSeqType, cmdline_opts.iSeqInFormat,
                          cmdline_opts.iMaxNumSeq, cmdline_opts.iMaxSeqLen)) {
            Log(&rLog, LOG_FATAL, "Reading sequence file '%s' failed", cmdline_opts.pcSeqInfile);
        }
#if TRACE
        {
            int iAux;
            for (iAux=0; iAux<prMSeq->nseqs; iAux++) {
                Log(&rLog, LOG_FORCED_DEBUG, "seq no %d: seq = %s", iAux, prMSeq->seq[iAux]);
                LogSqInfo(&prMSeq->sqinfo[iAux]);
            }
        }
#endif
    }
    /* k-tuple pairwise distance calculation seg-faults if 
     * only one sequence, simply exit early.
     * note that for profile/profile alignment prMSeq is NULL 
     * FS, r222->r223 */
    if (prMSeq && (prMSeq->nseqs <= 1)){
        Log(&rLog, LOG_FATAL, "File '%s' contains %d sequence%s, nothing to align",
              cmdline_opts.pcSeqInfile, prMSeq->nseqs, 1==prMSeq->nseqs?"":"s");
    }

    /* Dealign if requested and neccessary
     */
    if (NULL != prMSeq) {
        if (TRUE == prMSeq->aligned && cmdline_opts.bDealignInputSeqs) {
            Log(&rLog, LOG_INFO, "Dealigning already aligned input sequences as requested.");
            DealignMSeq(prMSeq);
        }
    }


    /* Read profile1
     *
     */
    if (NULL != cmdline_opts.pcProfile1Infile) {
        NewMSeq(&prMSeqProfile1);
        if (ReadSequences(prMSeqProfile1, cmdline_opts.pcProfile1Infile,
                          cmdline_opts.iSeqType,  cmdline_opts.iSeqInFormat,
                          cmdline_opts.iMaxNumSeq, cmdline_opts.iMaxSeqLen)) {
            Log(&rLog, LOG_FATAL, "Reading sequences from profile file '%s' failed",
                  cmdline_opts.pcProfile1Infile);
        }
        /* FIXME: commented out. FS, r240 -> r241  
         * for explanation see below */
        /*if (1==prMSeqProfile1->nseqs) {
          Log(&rLog, LOG_FATAL, "'%s' contains only one sequence and can therefore not be used as a profile",
          cmdline_opts.pcProfile1Infile);
          }*/
        if (FALSE == prMSeqProfile1->aligned) {
            Log(&rLog, LOG_FATAL, "Sequences in '%s' are not aligned, i.e. this is not a profile",
                  cmdline_opts.pcProfile1Infile);
        }
    }

    

    /* Read profile2
     *
     */
    if (NULL != cmdline_opts.pcProfile2Infile) {
        NewMSeq(&prMSeqProfile2);
        if (ReadSequences(prMSeqProfile2, cmdline_opts.pcProfile2Infile,
                          cmdline_opts.iSeqType,  cmdline_opts.iSeqInFormat,
                          cmdline_opts.iMaxNumSeq, cmdline_opts.iMaxSeqLen)) {
            Log(&rLog, LOG_FATAL, "Reading sequences from profile file '%s' failed",
                  cmdline_opts.pcProfile2Infile);
        }
        /* FIXME: there is no (clean) way to align a single sequence to a profile. 
         * if we go down the -i route, it causes a seg-fault in the pair-wise 
         * k-tuple distance calculation. However, single sequences can be 
         * understood as 1-profiles. Therefore we have to allow for 1-profiles.
         * FS, r240 -> r241 
         */
        /*if (1==prMSeqProfile2->nseqs) {
          Log(&rLog, LOG_FATAL, "'%s' contains only one sequence and can therefore not be used as a profile",
          cmdline_opts.pcProfile2Infile);
          }*/
        if (FALSE == prMSeqProfile1->aligned) {
            Log(&rLog, LOG_FATAL, "Sequences in '%s' are not aligned, i.e. this is not a profile",
                  cmdline_opts.pcProfile2Infile);
        }
    }


    /* Depending on the input we got perform
     *
     * (i) normal alignment: seq + optional profile
     * or
     * (ii) profile profile alignment
     *
     */
    if (NULL != prMSeq) {
        if (Align(prMSeq, prMSeqProfile1, & cmdline_opts.aln_opts)) {
            Log(&rLog, LOG_FATAL, "An error occured during the alignment");
        }

        if (WriteAlignment(prMSeq, cmdline_opts.pcAlnOutfile, 
                           cmdline_opts.iAlnOutFormat)) {
            Log(&rLog, LOG_FATAL, "Could not save alignment to %s", cmdline_opts.pcAlnOutfile);
        }
#if 0
        {
            bool bSampling = FALSE; /* better set to TRUE for many sequences */
            bool bReportAll = TRUE;
            AliStat(prMSeq, bSampling, bReportAll);
        }
#endif
        

    } else if (NULL != prMSeqProfile1 && NULL != prMSeqProfile2) {
        if (AlignProfiles(prMSeqProfile1, prMSeqProfile2, 
                          cmdline_opts.aln_opts.rHhalignPara)) {
            Log(&rLog, LOG_FATAL, "An error occured during the alignment");
        }
        if (WriteAlignment(prMSeqProfile1, cmdline_opts.pcAlnOutfile, 
                           cmdline_opts.iAlnOutFormat)) {
            Log(&rLog, LOG_FATAL, "Could not save alignment to %s", cmdline_opts.pcAlnOutfile);
        }
    }


    /* cleanup
     */
    if (NULL != prMSeq) {
        FreeMSeq(&prMSeq);
    }
    if (NULL != prMSeqProfile1) {
        FreeMSeq(&prMSeqProfile1);
    }
    if (NULL != prMSeqProfile2) {
        FreeMSeq(&prMSeqProfile2);
    }

    FreeUserOpts(&cmdline_opts);

    Log(&rLog, LOG_DEBUG, "Successful program exit");

    if (NULL != cmdline_opts.pcLogFile) {
        fclose(prLogFile);
    }
    return EXIT_SUCCESS;
}