/** * @brief Sort sequences by length * * @param[out] prMSeq * mseq to sort by length * @param[out] cOrder * Sorting order. 'd' for descending, 'a' for ascending. * * */ void SortMSeqByLength(mseq_t *prMSeq, const char cOrder) { int *piSeqLen; int *piOrder; int iSeqIndex; mseq_t *prMSeqCopy = NULL; assert('a'==cOrder || 'd'==cOrder); Log(&rLog, LOG_WARN, "FIXME: This modifies sequence ordering. Might not be what user wants. Will change output order as well"); piSeqLen = (int *) CKMALLOC(prMSeq->nseqs * sizeof(int)); piOrder = (int *) CKMALLOC(prMSeq->nseqs * sizeof(int)); for (iSeqIndex=0; iSeqIndex<prMSeq->nseqs; iSeqIndex++) { piSeqLen[iSeqIndex] = prMSeq->sqinfo[iSeqIndex].len; } QSortAndTrackIndex(piOrder, piSeqLen, prMSeq->nseqs, cOrder, FALSE); CopyMSeq(&prMSeqCopy, prMSeq); for (iSeqIndex=0; iSeqIndex<prMSeq->nseqs; iSeqIndex++) { /* copy mseq entry */ CKFREE(prMSeq->seq[iSeqIndex]); prMSeq->seq[iSeqIndex] = CkStrdup(prMSeqCopy->seq[piOrder[iSeqIndex]]); CKFREE(prMSeq->orig_seq[iSeqIndex]); prMSeq->orig_seq[iSeqIndex] = CkStrdup(prMSeqCopy->orig_seq[piOrder[iSeqIndex]]); SeqinfoCopy(&prMSeq->sqinfo[iSeqIndex], &prMSeqCopy->sqinfo[piOrder[iSeqIndex]]); } CKFREE(piSeqLen); CKFREE(piOrder); FreeMSeq(&prMSeqCopy); return; }
/** * * @brief the 'real' main function * */ int MyMain(int argc, char **argv) { mseq_t *prMSeq = NULL; mseq_t *prMSeqProfile1 = NULL; mseq_t *prMSeqProfile2 = NULL; cmdline_opts_t cmdline_opts; /* Must happen first: setup logger */ LogDefaultSetup(&rLog); /*Log(&rLog, LOG_WARN, "This is a non-public realase of %s. Please do not distribute.", PACKAGE_NAME);*/ /*Log(&rLog, LOG_WARN, "This is a beta version of %s, for protein only.", PACKAGE_NAME);*/ /* FS, r237 -> 238 */ SetDefaultUserOpts(&(cmdline_opts)); ParseCommandLine(&cmdline_opts, argc, argv); if (NULL != cmdline_opts.pcLogFile) { prLogFile = fopen(cmdline_opts.pcLogFile, "w"); LogSetFP(&rLog, LOG_INFO, prLogFile); LogSetFP(&rLog, LOG_VERBOSE, prLogFile); LogSetFP(&rLog, LOG_DEBUG, prLogFile); } InitClustalOmega(cmdline_opts.iThreads); if (rLog.iLogLevelEnabled < LOG_INFO) { PrintUserOpts(LogGetFP(&rLog, LOG_INFO), & cmdline_opts); PrintAlnOpts(LogGetFP(&rLog, LOG_INFO), & (cmdline_opts.aln_opts)); } /* Read sequence file * */ if (NULL != cmdline_opts.pcSeqInfile) { NewMSeq(&prMSeq); if (ReadSequences(prMSeq, cmdline_opts.pcSeqInfile, cmdline_opts.iSeqType, cmdline_opts.iSeqInFormat, cmdline_opts.iMaxNumSeq, cmdline_opts.iMaxSeqLen)) { Log(&rLog, LOG_FATAL, "Reading sequence file '%s' failed", cmdline_opts.pcSeqInfile); } #if TRACE { int iAux; for (iAux=0; iAux<prMSeq->nseqs; iAux++) { Log(&rLog, LOG_FORCED_DEBUG, "seq no %d: seq = %s", iAux, prMSeq->seq[iAux]); LogSqInfo(&prMSeq->sqinfo[iAux]); } } #endif } /* k-tuple pairwise distance calculation seg-faults if * only one sequence, simply exit early. * note that for profile/profile alignment prMSeq is NULL * FS, r222->r223 */ if (prMSeq && (prMSeq->nseqs <= 1)){ Log(&rLog, LOG_FATAL, "File '%s' contains %d sequence%s, nothing to align", cmdline_opts.pcSeqInfile, prMSeq->nseqs, 1==prMSeq->nseqs?"":"s"); } /* Dealign if requested and neccessary */ if (NULL != prMSeq) { if (TRUE == prMSeq->aligned && cmdline_opts.bDealignInputSeqs) { Log(&rLog, LOG_INFO, "Dealigning already aligned input sequences as requested."); DealignMSeq(prMSeq); } } /* Read profile1 * */ if (NULL != cmdline_opts.pcProfile1Infile) { NewMSeq(&prMSeqProfile1); if (ReadSequences(prMSeqProfile1, cmdline_opts.pcProfile1Infile, cmdline_opts.iSeqType, cmdline_opts.iSeqInFormat, cmdline_opts.iMaxNumSeq, cmdline_opts.iMaxSeqLen)) { Log(&rLog, LOG_FATAL, "Reading sequences from profile file '%s' failed", cmdline_opts.pcProfile1Infile); } /* FIXME: commented out. FS, r240 -> r241 * for explanation see below */ /*if (1==prMSeqProfile1->nseqs) { Log(&rLog, LOG_FATAL, "'%s' contains only one sequence and can therefore not be used as a profile", cmdline_opts.pcProfile1Infile); }*/ if (FALSE == prMSeqProfile1->aligned) { Log(&rLog, LOG_FATAL, "Sequences in '%s' are not aligned, i.e. this is not a profile", cmdline_opts.pcProfile1Infile); } } /* Read profile2 * */ if (NULL != cmdline_opts.pcProfile2Infile) { NewMSeq(&prMSeqProfile2); if (ReadSequences(prMSeqProfile2, cmdline_opts.pcProfile2Infile, cmdline_opts.iSeqType, cmdline_opts.iSeqInFormat, cmdline_opts.iMaxNumSeq, cmdline_opts.iMaxSeqLen)) { Log(&rLog, LOG_FATAL, "Reading sequences from profile file '%s' failed", cmdline_opts.pcProfile2Infile); } /* FIXME: there is no (clean) way to align a single sequence to a profile. * if we go down the -i route, it causes a seg-fault in the pair-wise * k-tuple distance calculation. However, single sequences can be * understood as 1-profiles. Therefore we have to allow for 1-profiles. * FS, r240 -> r241 */ /*if (1==prMSeqProfile2->nseqs) { Log(&rLog, LOG_FATAL, "'%s' contains only one sequence and can therefore not be used as a profile", cmdline_opts.pcProfile2Infile); }*/ if (FALSE == prMSeqProfile1->aligned) { Log(&rLog, LOG_FATAL, "Sequences in '%s' are not aligned, i.e. this is not a profile", cmdline_opts.pcProfile2Infile); } } /* Depending on the input we got perform * * (i) normal alignment: seq + optional profile * or * (ii) profile profile alignment * */ if (NULL != prMSeq) { if (Align(prMSeq, prMSeqProfile1, & cmdline_opts.aln_opts)) { Log(&rLog, LOG_FATAL, "An error occured during the alignment"); } if (WriteAlignment(prMSeq, cmdline_opts.pcAlnOutfile, cmdline_opts.iAlnOutFormat)) { Log(&rLog, LOG_FATAL, "Could not save alignment to %s", cmdline_opts.pcAlnOutfile); } #if 0 { bool bSampling = FALSE; /* better set to TRUE for many sequences */ bool bReportAll = TRUE; AliStat(prMSeq, bSampling, bReportAll); } #endif } else if (NULL != prMSeqProfile1 && NULL != prMSeqProfile2) { if (AlignProfiles(prMSeqProfile1, prMSeqProfile2, cmdline_opts.aln_opts.rHhalignPara)) { Log(&rLog, LOG_FATAL, "An error occured during the alignment"); } if (WriteAlignment(prMSeqProfile1, cmdline_opts.pcAlnOutfile, cmdline_opts.iAlnOutFormat)) { Log(&rLog, LOG_FATAL, "Could not save alignment to %s", cmdline_opts.pcAlnOutfile); } } /* cleanup */ if (NULL != prMSeq) { FreeMSeq(&prMSeq); } if (NULL != prMSeqProfile1) { FreeMSeq(&prMSeqProfile1); } if (NULL != prMSeqProfile2) { FreeMSeq(&prMSeqProfile2); } FreeUserOpts(&cmdline_opts); Log(&rLog, LOG_DEBUG, "Successful program exit"); if (NULL != cmdline_opts.pcLogFile) { fclose(prLogFile); } return EXIT_SUCCESS; }