/** * @brief copies an mseq structure * * @param[out] prMSeqDest_p * Copy of mseq structure * @param[in] prMSeqSrc * Source mseq structure to copy * * @note caller has to free copy by calling FreeMSeq() * */ void CopyMSeq(mseq_t **prMSeqDest_p, mseq_t *prMSeqSrc) { int i; assert(prMSeqSrc != NULL && prMSeqDest_p != NULL); NewMSeq(prMSeqDest_p); (*prMSeqDest_p)->nseqs = prMSeqSrc->nseqs; (*prMSeqDest_p)->seqtype = prMSeqSrc->seqtype; if (prMSeqSrc->filename!=NULL) { (*prMSeqDest_p)->filename = CkStrdup(prMSeqSrc->filename); } (*prMSeqDest_p)->seq = (char **) CKMALLOC((*prMSeqDest_p)->nseqs * sizeof(char *)); (*prMSeqDest_p)->orig_seq = (char **) CKMALLOC((*prMSeqDest_p)->nseqs * sizeof(char *)); (*prMSeqDest_p)->sqinfo = (SQINFO *) CKMALLOC((*prMSeqDest_p)->nseqs * sizeof(SQINFO)); for (i=0; i<(*prMSeqDest_p)->nseqs; i++) { (*prMSeqDest_p)->seq[i] = CkStrdup(prMSeqSrc->seq[i]); (*prMSeqDest_p)->orig_seq[i] = CkStrdup(prMSeqSrc->orig_seq[i]); SeqinfoCopy(&(*prMSeqDest_p)->sqinfo[i], &prMSeqSrc->sqinfo[i]); } }
/*! arc_udc_init */ static struct arcotg_udc *arc_udc_init (struct otg_dev *otg_dev) { struct device *device = otg_dev_get_drvdata(otg_dev); struct platform_device *pdev = to_platform_device(device); struct fsl_usb2_platform_data *pdata = (struct fsl_usb2_platform_data*)pdev->dev.platform_data; struct otg_instance *otg = otg_dev->otg_instance; struct pcd_instance *pcd = otg_dev->pcd_instance; struct arcotg_udc *udc = NULL; int timeout; /* Setting up the udc structure */ THROW_UNLESS((udc = (struct arcotg_udc *) CKMALLOC(sizeof(struct arcotg_udc))), error); /* Allocate queue */ THROW_UNLESS((udc->ep_qh = (struct ep_queue_head *) KMALLOC_ALIGN( USB_MAX_PIPES * sizeof(struct ep_queue_head), GFP_KERNEL | GFP_DMA, 2 * 1024, (void **)&ep_qh_base)), error); THROW_UNLESS(ep_qh_base, error); THROW_UNLESS((udc->ep_dtd = (struct ep_td_struct *) CKMALLOC(USB_MAX_PIPES * sizeof(struct ep_td_struct))), error); /* Stop, reset and wait for the UDC to reset */ UOG_USBCMD &= ~USB_CMD_RUN_STOP; UOG_USBCMD |= USB_CMD_CTRL_RESET; timeout = 10000000; // XXX This needs to be fixed, should not need to resort to timeout while ((UOG_USBCMD & USB_CMD_CTRL_RESET) && --timeout) { continue; } if (timeout == 0) { printk(KERN_DEBUG "%s: TIMEOUT\n", __FUNCTION__); return NULL; } /* Setup UDC mode and disable lock out mode*/ UOG_USBMODE |= USB_MODE_CTRL_MODE_DEVICE | USB_MODE_SETUP_LOCK_OFF; UOG_EPLISTADDR = virt_to_phys(udc->ep_qh); UOG_EPLISTADDR &= USB_EP_LIST_ADDRESS_MASK; /* Setup transceiver type, N.B. this must be done in one assignment */ UOG_PORTSC1 = (UOG_PORTSC1 & ~PORTSCX_PHY_TYPE_SEL) | pdata->xcvr_type; #if !defined(CONFIG_OTG_HIGH_SPEED) UOG_PORTSC1 |= (0x01000000); #endif fsl_platform_set_vbus_power(pdata, 0); CATCH(error) { if (ep_qh_base) kfree(ep_qh_base); if (udc) { if (udc->ep_dtd) LKFREE(udc->ep_dtd); LKFREE(udc); } udc = NULL; } return udc; }
/** * * @brief Creates a UPGMA guide tree. This is a frontend function to * the ported Muscle UPGMA code (). * * @param[out] tree * created upgma tree. will be allocated here. use FreeMuscleTree() * to free * @param[in] labels * pointer to nseq sequence names * @param[in] distmat * distance matrix * @param[in] ftree * optional: if non-NULL, tree will be written to this files * * @see FreeMuscleTree() * @see MuscleUpgma2() * */ void GuideTreeUpgma(tree_t **tree, char **labels, symmatrix_t *distmat, char *ftree) { linkage_t linkage = LINKAGE_AVG; FILE *fp = NULL; if (NULL != ftree) { if (NULL == (fp=fopen(ftree, "w"))) { Log(&rLog, LOG_ERROR, "Couldn't open tree-file '%s' for writing. Skipping", ftree); } /* fp NULL is handled later */ } (*tree) = (tree_t *) CKMALLOC(1 * sizeof(tree_t)); MuscleUpgma2((*tree), distmat, linkage, labels); if (rLog.iLogLevelEnabled <= LOG_DEBUG) { Log(&rLog, LOG_DEBUG, "tree logging..."); LogTree((*tree), LogGetFP(&rLog, LOG_DEBUG)); } if (NULL != fp) { MuscleTreeToFile(fp, (*tree)); Log(&rLog, LOG_INFO, "Guide tree written to %s", ftree); fclose(fp); } }
/*! otg_tasklet_init * Create otg task structure, create workqueue, initialize it. * @param name - otg_tasklet name * @param proc - otg_tasklet process * @param data - otg_taskle data, argument for proc * @param tag - otg_tasklet tag * @return initialized otg_tasklet instance pointer */ static inline struct otg_tasklet *otg_tasklet_init(char *name, otg_tasklet_proc_t proc, otg_tasklet_arg_t data, otg_tag_t tag) { struct otg_tasklet *tasklet; //TRACE_STRING(tag, "INIT: %s", name); //printk(KERN_INFO"%s: %s\n", __FUNCTION__, name); RETURN_NULL_UNLESS((tasklet = CKMALLOC(sizeof (struct otg_tasklet)))); tasklet->tag = tag; tasklet->name = name; tasklet->terminated = TRUE; tasklet->proc = proc; tasklet->data = data; #ifdef OTG_TASKLET_WORK INIT_WORK(&tasklet->work, otg_tasklet_run, tasklet); #else /* OTG_TASKLET_WORK */ tasklet_init(&tasklet->tasklet, otg_tasklet_run, (otg_tasklet_arg_t) tasklet); #endif /* OTG_TASKLET_WORK */ return tasklet; CATCH(error) { if (tasklet) LKFREE(tasklet); return NULL; } }
/*! otg_workitem_init * Create otg work structure, create workqueue, initialize it. * @param name - workitem name * @param proc - workitem handler * @param data - workitem data * @param tag - otg tag * @return otg_workitem instance pointer */ static inline struct otg_workitem *otg_workitem_init(char *name, otg_workitem_proc_t proc, otg_workitem_arg_t data, otg_tag_t tag) { struct otg_workitem *workitem; //TRACE_STRING(tag, "INIT: %s", name); //printk(KERN_INFO"%s: %s\n", __FUNCTION__, name); RETURN_NULL_UNLESS((workitem = CKMALLOC(sizeof (struct otg_workitem)))); workitem->data = data; workitem->tag = tag; workitem->name = name; workitem->proc = proc; //workitem->debug = TRUE; workitem->terminated = workitem->terminate = TRUE; INIT_WORK(&workitem->work, otg_workitem_run, workitem); return workitem; CATCH(error) { printk(KERN_INFO"%s: ERROR\n", __FUNCTION__); if (workitem) LKFREE(workitem); return NULL; } }
/** * @brief allocate and initialise new mseq_t * * @param[out] prMSeq * newly allocated and initialised mseq_t * * @note caller has to free by calling FreeMSeq() * * @see FreeMSeq * */ void NewMSeq(mseq_t **prMSeq) { *prMSeq = (mseq_t *) CKMALLOC(1 * sizeof(mseq_t)); (*prMSeq)->nseqs = 0; (*prMSeq)->seq = NULL; (*prMSeq)->orig_seq = NULL; (*prMSeq)->seqtype = SEQTYPE_UNKNOWN; (*prMSeq)->sqinfo = NULL; (*prMSeq)->filename = NULL; }
/** * @brief Sort sequences by length * * @param[out] prMSeq * mseq to sort by length * @param[out] cOrder * Sorting order. 'd' for descending, 'a' for ascending. * * */ void SortMSeqByLength(mseq_t *prMSeq, const char cOrder) { int *piSeqLen; int *piOrder; int iSeqIndex; mseq_t *prMSeqCopy = NULL; assert('a'==cOrder || 'd'==cOrder); Log(&rLog, LOG_WARN, "FIXME: This modifies sequence ordering. Might not be what user wants. Will change output order as well"); piSeqLen = (int *) CKMALLOC(prMSeq->nseqs * sizeof(int)); piOrder = (int *) CKMALLOC(prMSeq->nseqs * sizeof(int)); for (iSeqIndex=0; iSeqIndex<prMSeq->nseqs; iSeqIndex++) { piSeqLen[iSeqIndex] = prMSeq->sqinfo[iSeqIndex].len; } QSortAndTrackIndex(piOrder, piSeqLen, prMSeq->nseqs, cOrder, FALSE); CopyMSeq(&prMSeqCopy, prMSeq); for (iSeqIndex=0; iSeqIndex<prMSeq->nseqs; iSeqIndex++) { /* copy mseq entry */ CKFREE(prMSeq->seq[iSeqIndex]); prMSeq->seq[iSeqIndex] = CkStrdup(prMSeqCopy->seq[piOrder[iSeqIndex]]); CKFREE(prMSeq->orig_seq[iSeqIndex]); prMSeq->orig_seq[iSeqIndex] = CkStrdup(prMSeqCopy->orig_seq[piOrder[iSeqIndex]]); SeqinfoCopy(&prMSeq->sqinfo[iSeqIndex], &prMSeqCopy->sqinfo[piOrder[iSeqIndex]]); } CKFREE(piSeqLen); CKFREE(piOrder); FreeMSeq(&prMSeqCopy); return; }
/** * * @brief * * @param[out] tree * created upgma tree. will be allocated here. use FreeMuscleTree() * to free * @param[in] mseq * @param[in] ftree * * @return non-zero on error * */ int GuideTreeFromFile(tree_t **tree, mseq_t *mseq, char *ftree) { int iNodeCount; int iNodeIndex; (*tree) = (tree_t *) CKMALLOC(1 * sizeof(tree_t)); if (MuscleTreeFromFile((*tree), ftree)!=0) { Log(&rLog, LOG_ERROR, "%s", "MuscleTreeFromFile failed"); return -1; } /* Make sure tree is rooted */ if (!IsRooted((*tree))) { Log(&rLog, LOG_ERROR, "User tree must be rooted"); return -1; } if ((int)GetLeafCount((*tree)) != mseq->nseqs) { Log(&rLog, LOG_ERROR, "User tree does not match input sequences"); return -1; } /* compare tree labels and sequence names and set leaf-ids */ iNodeCount = GetNodeCount((*tree)); for (iNodeIndex = 0; iNodeIndex < iNodeCount; ++iNodeIndex) { char *LeafName; int iSeqIndex; if (!IsLeaf(iNodeIndex, (*tree))) continue; LeafName = GetLeafName(iNodeIndex, (*tree)); if ((iSeqIndex=FindSeqName(LeafName, mseq))==-1) { Log(&rLog, LOG_ERROR, "Label '%s' in tree could not be found in sequence names", LeafName); return -1; } SetLeafId((*tree), iNodeIndex, iSeqIndex); } if (rLog.iLogLevelEnabled <= LOG_DEBUG) { Log(&rLog, LOG_DEBUG, "tree logging..."); LogTree((*tree), LogGetFP(&rLog, LOG_DEBUG)); } return 0; }
/*! otg_task_init *@brief Create otg task structure, create workqueue, initialize it. *@param name - name of task or workqueue *@param proc - handler *@param data - parameter pointer for handler *@param tag- *@return initialized otg_task instance pointer */ struct otg_task *otg_task_init2(char *name, otg_task_proc_t proc, otg_task_arg_t data, otg_tag_t tag) { struct otg_task *task; //TRACE_STRING(tag, "INIT: %s", name); RETURN_NULL_UNLESS((task = CKMALLOC(sizeof (struct otg_task)))); task->tag = tag; task->data = data; task->name = name; task->proc = proc; #if defined(OTG_TASK_WORK) task->terminated = task->terminate = TRUE; #else /* defined(OTG_TASK_WORK) */ task->terminated = task->terminate = FALSE; #if defined(LINUX26) THROW_UNLESS((task->work_queue = create_singlethread_workqueue(name)), error); #else /* LINUX26 */ THROW_UNLESS((task->work_queue = create_workqueue(name)), error); #endif /* LINUX26 */ init_MUTEX_LOCKED(&task->admin_sem); init_MUTEX_LOCKED(&task->work_sem); #endif /* defined(OTG_TASK_WORK) */ INIT_WORK(&task->work, otg_task_proc, task); return task; CATCH(error) { printk(KERN_INFO"%s: ERROR\n", __FUNCTION__); if (task) LKFREE(task); return NULL; } }
/** * @brief reads sequences from file * * @param[out] prMSeq * Multiple sequence struct. Must be preallocated. * FIXME: would make more sense to allocate it here. * @param[in] seqfile * Sequence file name. If '-' sequence will be read from stdin. * @param[in] iSeqType * int-encoded sequence type. Set to * SEQTYPE_UNKNOWN for autodetect (guessed from first sequence) * @param[in] iMaxNumSeq * Return an error, if more than iMaxNumSeq have been read * @param[in] iMaxSeqLen * Return an error, if a seq longer than iMaxSeqLen has been read * * @return 0 on success, -1 on error * * @note * - Depends heavily on squid * - Sequence file format will be guessed * - If supported by squid, gzipped files can be read as well. */ int ReadSequences(mseq_t *prMSeq, char *seqfile, int iSeqType, int iSeqFmt, bool bIsProfile, bool bDealignInputSeqs, int iMaxNumSeq, int iMaxSeqLen) { SQFILE *dbfp; /* sequence file descriptor */ char *cur_seq; SQINFO cur_sqinfo; int iSeqIdx; /* sequence counter */ int iSeqPos; /* sequence string position counter */ assert(NULL!=seqfile); /* Try to work around inability to autodetect from a pipe or .gz: * assume FASTA format */ if (SQFILE_UNKNOWN == iSeqFmt && (Strparse("^.*\\.gz$", seqfile, 0) || strcmp(seqfile, "-") == 0)) { iSeqFmt = SQFILE_FASTA; } /* Using squid routines to read input. taken from seqstat_main.c. we don't * know if input is aligned, so we use SeqfileOpen instead of MSAFileOpen * etc. NOTE this also means we discard some information, e.g. when * reading from and writing to a stockholm file, all extra MSA * info/annotation will be lost. * */ if (NULL == (dbfp = SeqfileOpen(seqfile, iSeqFmt, NULL))) { Log(&rLog, LOG_ERROR, "Failed to open sequence file %s for reading", seqfile); return -1; } /* FIXME squid's ReadSeq() will exit with fatal error if format is * unknown. This will be a problem for a GUI. Same is true for many squid * other functions. * * The original squid:ReadSeq() dealigns sequences on input. We * use a patched version. * */ while (ReadSeq(dbfp, dbfp->format, &cur_seq, &cur_sqinfo)) { if (prMSeq->nseqs+1>iMaxNumSeq) { Log(&rLog, LOG_ERROR, "Maximum number of sequences (=%d) exceeded after reading sequence '%s' from '%s'", iMaxNumSeq, cur_sqinfo.name, seqfile); return -1; } if ((int)strlen(cur_seq)>iMaxSeqLen) { Log(&rLog, LOG_ERROR, "Sequence '%s' has %d residues and is therefore longer than allowed (max. sequence length is %d)", cur_sqinfo.name, strlen(cur_seq), iMaxSeqLen); return -1; } if ((int)strlen(cur_seq)==0) { Log(&rLog, LOG_ERROR, "Sequence '%s' has 0 residues", cur_sqinfo.name); return -1; } /* FIXME: use modified version of AddSeq() that allows handing down SqInfo */ prMSeq->seq = (char **) CKREALLOC(prMSeq->seq, (prMSeq->nseqs+1) * sizeof(char *)); prMSeq->seq[prMSeq->nseqs] = CkStrdup(cur_seq); prMSeq->sqinfo = (SQINFO *) CKREALLOC(prMSeq->sqinfo, (prMSeq->nseqs+1) * sizeof(SQINFO)); SeqinfoCopy(&prMSeq->sqinfo[prMSeq->nseqs], &cur_sqinfo); #ifdef TRACE Log(&rLog, LOG_FORCED_DEBUG, "seq no %d: seq = %s", prMSeq->nseqs, prMSeq->seq[prMSeq->nseqs]); LogSqInfo(&prMSeq->sqinfo[prMSeq->nseqs]); #endif /* always guess type from first seq. use squid function and * convert value */ if (0 == prMSeq->nseqs) { int type = Seqtype(prMSeq->seq[prMSeq->nseqs]); switch (type) { case kDNA: prMSeq->seqtype = SEQTYPE_DNA; break; case kRNA: prMSeq->seqtype = SEQTYPE_RNA; break; case kAmino: prMSeq->seqtype = SEQTYPE_PROTEIN; break; case kOtherSeq: prMSeq->seqtype = SEQTYPE_UNKNOWN; break; default: Log(&rLog, LOG_FATAL, "Internal error in %s", __FUNCTION__); } /* override with given sequence type but check with * automatically detected type and warn if necessary */ if (SEQTYPE_UNKNOWN != iSeqType) { if (prMSeq->seqtype != iSeqType) { Log(&rLog, LOG_WARN, "Overriding automatically determined seq-type %s to %s as requested", SeqTypeToStr(prMSeq->seqtype), SeqTypeToStr(iSeqType)); prMSeq->seqtype = iSeqType; } } /* if type could not be determined and was not set return error */ if (SEQTYPE_UNKNOWN == iSeqType && SEQTYPE_UNKNOWN == prMSeq->seqtype) { Log(&rLog, LOG_ERROR, "Couldn't guess sequence type from first sequence"); FreeSequence(cur_seq, &cur_sqinfo); SeqfileClose(dbfp); return -1; } } Log(&rLog, LOG_DEBUG, "seq-no %d: type=%s name=%s len=%d seq=%s", prMSeq->nseqs, SeqTypeToStr(prMSeq->seqtype), prMSeq->sqinfo[prMSeq->nseqs].name, prMSeq->sqinfo[prMSeq->nseqs].len, prMSeq->seq[prMSeq->nseqs]); /* FIXME IPUAC and/or case conversion? If yes see * corresponding squid functions. Special treatment of * Stockholm tilde-gaps for ktuple code? */ prMSeq->nseqs++; FreeSequence(cur_seq, &cur_sqinfo); } SeqfileClose(dbfp); /*#if ALLOW_ONLY_PROTEIN if (SEQTYPE_PROTEIN != prMSeq->seqtype) { Log(&rLog, LOG_FATAL, "Sequence type is %s. %s only works on protein.", SeqTypeToStr(prMSeq->seqtype), PACKAGE_NAME); } #endif*/ /* Check if sequences are aligned */ prMSeq->aligned = SeqsAreAligned(prMSeq, bIsProfile, bDealignInputSeqs); /* keep original sequence as copy and convert "working" sequence * */ prMSeq->orig_seq = (char**) CKMALLOC(prMSeq->nseqs * sizeof(char *)); for (iSeqIdx=0; iSeqIdx<prMSeq->nseqs; iSeqIdx++) { prMSeq->orig_seq[iSeqIdx] = CkStrdup(prMSeq->seq[iSeqIdx]); /* convert unknown characters according to set seqtype * be conservative, i.e. don't allow any fancy ambiguity * characters to make sure that ktuple code etc. works. */ /* first on the fly conversion between DNA and RNA */ if (prMSeq->seqtype==SEQTYPE_DNA) ToDNA(prMSeq->seq[iSeqIdx]); if (prMSeq->seqtype==SEQTYPE_RNA) ToRNA(prMSeq->seq[iSeqIdx]); /* then check of each character */ for (iSeqPos=0; iSeqPos<(int)strlen(prMSeq->seq[iSeqIdx]); iSeqPos++) { char *res = &(prMSeq->seq[iSeqIdx][iSeqPos]); if (isgap(*res)) continue; if (prMSeq->seqtype==SEQTYPE_PROTEIN) { if (NULL == strchr(AMINO_ALPHABET, toupper(*res))) { *res = AMINOACID_ANY; } } else if (prMSeq->seqtype==SEQTYPE_DNA) { if (NULL == strchr(DNA_ALPHABET, toupper(*res))) { *res = NUCLEOTIDE_ANY; } } else if (prMSeq->seqtype==SEQTYPE_RNA) { if (NULL == strchr(RNA_ALPHABET, toupper(*res))) { *res = NUCLEOTIDE_ANY; } } } } /* order in which sequences appear in guide-tree * only allocate if different output-order desired */ prMSeq->tree_order = NULL; prMSeq->filename = CkStrdup(seqfile); Log(&rLog, LOG_INFO, "Read %d sequences (type: %s) from %s", prMSeq->nseqs, SeqTypeToStr(prMSeq->seqtype), prMSeq->filename); return 0; }
/*! * generic_cf_register() * */ void generic_cf_register(struct generic_config *config, char *match) { char *cp, *sp, *lp; int i; char **interface_list = NULL; int interfaces = 0; //printk(KERN_INFO"%s: Driver: \"%s\" idVendor: %04x idProduct: %04x interface_names: \"%s\" match: \"%s\"\n", // __FUNCTION__, // config->composite_driver.driver.name, MODPARM(idVendor), MODPARM(idProduct), config->interface_names, // match ? match : ""); TRACE_MSG5(GENERIC, "Driver: \"%s\" idVendor: %04x idProduct: %04x interface_names: \"%s\" match: \"%s\"", config->composite_driver.driver.name, MODPARM(idVendor), MODPARM(idProduct), config->interface_names, match ? match : ""); RETURN_IF (match && strlen(match) && strcmp(match, config->composite_driver.driver.name)); //printk(KERN_INFO"%s: MATCHED\n", __FUNCTION__); /* decompose interface names to construct interface_list */ RETURN_UNLESS (config->interface_names && strlen(config->interface_names)); /* count interface names and allocate _interface_names array */ for (cp = sp = config->interface_names, interfaces = 0; cp && *cp; ) { for (; *cp && *cp == ':'; cp++); // skip white space sp = cp; // save start of token for (; *cp && *cp != ':'; cp++); // find end of token BREAK_IF (sp == cp); if (*cp) cp++; interfaces++; } THROW_UNLESS(interfaces, error); TRACE_MSG1(GENERIC, "interfaces: %d", interfaces); THROW_UNLESS((interface_list = (char **) CKMALLOC (sizeof (char *) * (interfaces + 1), GFP_KERNEL)), error); for (cp = sp = config->interface_names, interfaces = 0; cp && *cp; interfaces++) { for (; *cp && *cp == ':'; cp++); // skip white space sp = cp; // save start of token for (; *cp && *cp != ':'; cp++); // find end of token BREAK_IF (sp == cp); lp = cp; if (*cp) cp++; *lp = '\0'; lp = CKMALLOC(strlen(sp), GFP_KERNEL); strcpy(lp, sp); interface_list[interfaces] = lp; TRACE_MSG3(GENERIC, "INTERFACE[%2d] %x \"%s\"", interfaces, interface_list[interfaces], interface_list[interfaces]); } config->composite_driver.device_description = &config->device_description; config->composite_driver.configuration_description = &config->configuration_description; config->composite_driver.driver.fops = &generic_function_ops; config->interface_list = interface_list; THROW_IF (usbd_register_composite_function ( &config->composite_driver, config->composite_driver.driver.name, config->class_name, config->interface_list, NULL), error); config->registered++; TRACE_MSG0(GENERIC, "REGISTER FINISHED"); CATCH(error) { otg_trace_invalidate_tag(GENERIC); } }
/** * * Will compute ktuple scores and store in tmat * Following values will be set: tmat[i][j], where * istart <= i <iend * and * jstart <= j < jend * i.e. zero-offset * tmat data members have to be preallocated * * if ktuple_param_t *aln_param == NULL defaults will be used */ void KTuplePairDist(symmatrix_t *tmat, mseq_t *mseq, int istart, int iend, int jstart, int jend, ktuple_param_t *param_override, progress_t *prProgress, unsigned long int *ulStepNo, unsigned long int ulTotalStepNo) { /* this first group of variables were previously static and hence un-parallelisable */ char **seq_array; int maxsf; int **accum; int max_aln_length; /* divide score with length of smallest sequence */ int *zza, *zzb, *zzc, *zzd; int private_step_no = 0; int i, j, dsr; double calc_score; int max_res_code = -1; int max_seq_len; int *seqlen_array; /* progress_t *prProgress; */ /* int uStepNo, uTotalStepNo; */ ktuple_param_t aln_param = default_protein_param; bool bPrintCR = (rLog.iLogLevelEnabled<=LOG_VERBOSE) ? FALSE : TRUE; if(prProgress == NULL) { NewProgress(&prProgress, LogGetFP(&rLog, LOG_INFO), "Ktuple-distance calculation progress", bPrintCR); } /* conversion to old style data types follows * */ seqlen_array = (int*) CKMALLOC((mseq->nseqs+1) * sizeof(int)); for (i=0; i<mseq->nseqs; i++) { seqlen_array[i+1] = mseq->sqinfo[i].len; } /* setup alignment parameters */ if (SEQTYPE_PROTEIN == mseq->seqtype) { DNAFLAG = FALSE; max_res_code = strlen(AMINO_ACID_CODES)-2; aln_param = default_protein_param; } else if (SEQTYPE_RNA == mseq->seqtype || SEQTYPE_DNA == mseq->seqtype) { DNAFLAG = TRUE; max_res_code = strlen(NUCLEIC_ACID_CODES)-2; aln_param = default_dna_param; } else { Log(&rLog, LOG_FATAL, "Internal error in %s: Unknown sequence type.", __FUNCTION__); } if (NULL!=param_override) { aln_param.ktup = param_override->ktup; aln_param.wind_gap = param_override->wind_gap; aln_param.signif = param_override->signif; aln_param.window = param_override->window; } /*LOG_DEBUG("DNAFLAG = %d max_res_code = %d", DNAFLAG, max_res_code);*/ /* convert mseq to clustal's old-style int encoded sequences (unit-offset) */ max_aln_length = 0; max_seq_len = 0; seq_array = (char **) CKMALLOC((mseq->nseqs+1) * sizeof(char *)); seq_array[0] = NULL; /* FIXME check that non of the seqs is smaller than ktup (?). * Otherwise segfault occurs */ for (i=0; i<mseq->nseqs; i++) { seq_array[i+1] = (char *) CKMALLOC((seqlen_array[i+1]+2) * sizeof (char));; } for (i=0; i<mseq->nseqs; i++) { /*LOG_DEBUG("calling encode with seq_array[%d+1] len=%d and seq=%s", i, seqlen_array[i+1], mseq->seq[i]);*/ if (TRUE == DNAFLAG) { encode(&(mseq->seq[i][-1]), seq_array[i+1], seqlen_array[i+1], NUCLEIC_ACID_CODES); } else { encode(&(mseq->seq[i][-1]), seq_array[i+1], seqlen_array[i+1], AMINO_ACID_CODES); } if (seqlen_array[i+1]>max_seq_len) { max_seq_len = seqlen_array[i+1]; } } max_aln_length = max_seq_len * 2; /* see sequence.c in old source */ /* FIXME: short sequences can cause seg-fault * because max_aln_length can get shorter * than (max_res_code+1)^k * FS, r222->r223 */ max_aln_length = max_aln_length > pow((max_res_code+1), aln_param.ktup)+1 ? max_aln_length : pow((max_res_code+1), aln_param.ktup)+1; /* * * conversion to old style clustal done (in no time) */ accum = (int **) CKCALLOC(5, sizeof (int *)); for (i=0;i<5;i++) { accum[i] = (int *) CKCALLOC((2*max_aln_length+1), sizeof(int)); } zza = (int *) CKCALLOC( (max_aln_length+1), sizeof(int)); zzb = (int *) CKCALLOC( (max_aln_length+1), sizeof(int)); zzc = (int *) CKCALLOC( (max_aln_length+1), sizeof(int)); zzd = (int *) CKCALLOC( (max_aln_length+1), sizeof(int)); /* estimation of total number of steps (if istart and jstart are * both 0) (now handled in the calling routine) */ /* uTotalStepNo = iend*jend - iend*iend/2 + iend/2; uStepNo = 0; */ /*LOG_DEBUG("istart=%d iend=%d jstart=%d jend=%d", istart, iend, jstart, jend);*/ for (i=istart+1; i<=iend; ++i) { /* by definition a sequence compared to itself should give a score of 0. AW */ SymMatrixSetValue(tmat, i-1, i-1, 0.0); make_ptrs(zza, zzc, i, seqlen_array[i], aln_param.ktup, max_res_code, seq_array); #ifdef HAVE_OPENMP #pragma omp critical(ktuple) #endif { ProgressLog(prProgress, *ulStepNo, ulTotalStepNo, FALSE); } for (j=MAX(i+1, jstart+1); j<=jend; ++j) { (*ulStepNo)++; private_step_no++; /*LOG_DEBUG("comparing pair %d:%d", i, j);*/ make_ptrs(zzb, zzd, j, seqlen_array[j], aln_param.ktup, max_res_code, seq_array); pair_align(i, seqlen_array[i], seqlen_array[j], max_res_code, &aln_param, seq_array, &maxsf, accum, max_aln_length, zza, zzb, zzc, zzd); if (!maxsf) { calc_score=0.0; } else { calc_score=(double)accum[0][maxsf]; if (percent) { dsr=(seqlen_array[i]<seqlen_array[j]) ? seqlen_array[i] : seqlen_array[j]; calc_score = (calc_score/(double)dsr) * 100.0; } } /* printf("%d %d %d\n", i-1, j-1, (100.0 - calc_score)/100.0); */ SymMatrixSetValue(tmat, i-1, j-1, (100.0 - calc_score)/100.0); /* the function allows you not to compute the full matrix. * here we explicitely make the resulting matrix a * rectangle, i.e. we always set full rows. in other * words, if we don't complete the full matrix then we * don't have a full symmetry. so only use the defined * symmetric part. AW */ /*LOG_DEBUG("setting %d : %d = %f", j, i, tmat[i][j]);*/ /* not needed anymore since we use symmatrix_t if (j<=iend) { tmat[j][i] = tmat[i][j]; } */ #ifdef HAVE_OPENMP #pragma omp critical(ktuple) #endif { Log(&rLog, LOG_DEBUG, "K-tuple distance for sequence pair %d:%d = %lg", i, j, SymMatrixGetValue(tmat, i-1, j-1)); } } } /* Log(&rLog, LOG_FORCED_DEBUG, "uTotalStepNo=%d for istart=%d iend=%d jstart=%d jend=%d", uStepNo, istart, iend, jstart, jend); Log(&rLog, LOG_FORCED_DEBUG, "Fabian = %d", iend*jend - iend*iend/2 + iend/2); */ /* printf("\n\n%d\t%d\t%d\t%d\n\n", omp_get_thread_num(), uStepNo, istart, iend); */ for (i=0;i<5;i++) { CKFREE(accum[i]); } CKFREE(accum); #ifdef HAVE_OPENMP #pragma omp critical(ktuple) #if 0 { printf("steps: %d\n", private_step_no); } #endif #endif CKFREE(zza); CKFREE(zzb); CKFREE(zzc); CKFREE(zzd); free(seqlen_array); for (i=1; i<=mseq->nseqs; i++) { CKFREE(seq_array[i]); } CKFREE(seq_array); }
/** * * FIXME together with des_quick_sort most time consuming routine * according to gprof on r110 * */ static void pair_align(int seq_no, int l1, int l2, int max_res_code, ktuple_param_t *aln_param, char **seq_array, int *maxsf, int **accum, int max_aln_length, int *zza, int *zzb, int *zzc, int *zzd) { int next; /* forrmerly static */ int pot[8],i, j, l, m, flag, limit, pos, vn1, vn2, flen, osptr, fs; int tv1, tv2, encrypt, subt1, subt2, rmndr; char residue; int *diag_index; int *displ; char *slopes; int curr_frag; const int tl1 = (l1+l2)-1; assert(NULL!=aln_param); /* Log(&rLog, LOG_FORCED_DEBUG, "DNAFLAG=%d seq_no=%d l1=%d l2=%d window=%d ktup=%d signif=%d wind_gap=%d", DNAFLAG, seq_no, l1, l2, window, ktup, signif, wind_gap); */ slopes = (char *) CKCALLOC(tl1+1, sizeof(char)); displ = (int *) CKCALLOC(tl1+1, sizeof(int)); diag_index = (int *) CKMALLOC((tl1+1) * sizeof(int)); for (i=1; i<=tl1; ++i) { /* unnecessary, because we calloced: slopes[i] = displ[i] = 0; */ diag_index[i] = i; } for (i=1;i<=aln_param->ktup;i++) pot[i] = (int) pow((double)(max_res_code+1),(double)(i-1)); limit = (int) pow((double)(max_res_code+1),(double)aln_param->ktup); /* increment diagonal score for each k_tuple match */ for (i=1; i<=limit; ++i) { vn1=zzc[i]; while (TRUE) { if (!vn1) break; vn2 = zzd[i]; while (0 != vn2) { osptr = vn1-vn2+l2; ++displ[osptr]; vn2 = zzb[vn2]; } vn1=zza[vn1]; } } /* choose the top SIGNIF diagonals */ #ifdef QSORT_REPLACEMENT /* This was an attempt to replace des_quick_sort with qsort(), * which turns out to be much slower, so don't use this */ /* FIXME: if we use this branch, we don't need to init diag_index * before, because that is done in QSortAndTrackIndex() * automatically. */ #if 0 for (i=1; i<=tl1; i++) { Log(&rLog, LOG_FORCED_DEBUG, "b4 sort disp[%d]=%d diag_index[%d]=%d", i, diag_index[i], i, displ[i]); } #endif QSortAndTrackIndex(&(diag_index[1]), &(displ[1]), tl1, 'a', TRUE); #if 0 for (i=1; i<=tl1; i++) { Log(&rLog, LOG_FORCED_DEBUG, "after sort disp[%d]=%d diag_index[%d]=%d", i, diag_index[i], i, displ[i]); } #endif #else des_quick_sort(displ, diag_index, tl1); #endif j = tl1 - aln_param->signif + 1; if (j < 1) { j = 1; } /* flag all diagonals within WINDOW of a top diagonal */ for (i=tl1; i>=j; i--) { if (displ[i] > 0) { pos = diag_index[i]; l = (1 > pos - aln_param->window) ? 1 : pos - aln_param->window; m = (tl1 < pos + aln_param->window) ? tl1 : pos + aln_param->window; for (; l <= m; l++) slopes[l] = 1; } } for (i=1; i<=tl1; i++) { displ[i] = 0; } curr_frag=*maxsf=0; for (i=1; i<=(l1-aln_param->ktup+1); ++i) { encrypt=flag=0; for (j=1; j<=aln_param->ktup; ++j) { residue = seq_array[seq_no][i+j-1]; if ((residue<0) || (residue>max_res_code)) { flag=TRUE; break; } encrypt += ((residue)*pot[j]); } if (flag) { continue; } ++encrypt; vn2=zzd[encrypt]; flag=FALSE; while (TRUE) { if (!vn2) { flag=TRUE; break; } osptr=i-vn2+l2; if (1 != slopes[osptr]) { vn2=zzb[vn2]; continue; } flen=0; fs=aln_param->ktup; next=*maxsf; /* * A-loop */ while (TRUE) { if (!next) { ++curr_frag; if (curr_frag >= 2*max_aln_length) { Log(&rLog, LOG_VERBOSE, "(Partial alignment)"); goto free_and_exit; /* Yesss! Always wanted to * use a goto (AW) */ } displ[osptr]=curr_frag; put_frag(fs, i, vn2, flen, curr_frag, &next, maxsf, accum); } else { tv1=accum[1][next]; tv2=accum[2][next]; if (frag_rel_pos(i, vn2, tv1, tv2, aln_param->ktup)) { if (i-vn2 == accum[1][next]-accum[2][next]) { if (i > accum[1][next]+(aln_param->ktup-1)) { fs = accum[0][next]+aln_param->ktup; } else { rmndr = i-accum[1][next]; fs = accum[0][next]+rmndr; } flen=next; next=0; continue; } else { if (0 == displ[osptr]) { subt1=aln_param->ktup; } else { if (i > accum[1][displ[osptr]]+(aln_param->ktup-1)) { subt1=accum[0][displ[osptr]]+aln_param->ktup; } else { rmndr=i-accum[1][displ[osptr]]; subt1=accum[0][displ[osptr]]+rmndr; } } subt2=accum[0][next] - aln_param->wind_gap + aln_param->ktup; if (subt2>subt1) { flen=next; fs=subt2; } else { flen=displ[osptr]; fs=subt1; } next=0; continue; } } else { next=accum[4][next]; continue; } } break; } /* * End of Aloop */ vn2=zzb[vn2]; } } free_and_exit: CKFREE(displ); CKFREE(slopes); CKFREE(diag_index); return; }
/** * @brief Parse command line parameters. Will exit if help/usage etc * are called or or call Log(&rLog, LOG_FATAL, ) if an error was detected. * * @param[out] user_opts * User parameter struct, with defaults already set. * @param[in] argc * mains argc * @param[in] argv * mains argv * */ void ParseCommandLine(cmdline_opts_t *user_opts, int argc, char **argv) { /* argtable command line parsing: * see * http://argtable.sourceforge.net/doc/argtable2-intro.html * * basic structure is: arg_xxxN: * xxx can be int, lit, db1, str, rex, file or date * If N = 0, arguments may appear zero-or-once; N = 1 means * exactly once, N = n means up to maxcount times * * * @note: changes here, might also affect main.cpp:ConvertOldCmdLine() * */ struct arg_rem *rem_seq_input = arg_rem(NULL, "\nSequence Input:"); struct arg_file *opt_seqin = arg_file0("i", "in,infile", "{<file>,-}", "Multiple sequence input file (- for stdin)"); struct arg_file *opt_hmm_in = arg_filen(NULL, "hmm-in", "<file>", /*min*/ 0, /*max*/ 128, "HMM input files"); struct arg_lit *opt_dealign = arg_lit0(NULL, "dealign", "Dealign input sequences"); struct arg_file *opt_profile1 = arg_file0(NULL, "profile1,p1", "<file>", "Pre-aligned multiple sequence file (aligned columns will be kept fix)"); struct arg_file *opt_profile2 = arg_file0(NULL, "profile2,p2", "<file>", "Pre-aligned multiple sequence file (aligned columns will be kept fix)"); struct arg_str *opt_seqtype = arg_str0("t", "seqtype", "{Protein, RNA, DNA}", "Force a sequence type (default: auto)"); /* struct arg_lit *opt_force_protein = arg_lit0(NULL, "protein", "Set sequence type to protein even if Clustal guessed nucleic acid"); */ struct arg_str *opt_infmt = arg_str0(NULL, "infmt", "{a2m=fa[sta],clu[stal],msf,phy[lip],selex,st[ockholm],vie[nna]}", "Forced sequence input file format (default: auto)"); struct arg_rem *rem_guidetree = arg_rem(NULL, "\nClustering:"); struct arg_str *opt_pairdist = arg_str0("p", "pairdist", "{ktuple}", "Pairwise alignment distance measure"); struct arg_file *opt_distmat_in = arg_file0(NULL, "distmat-in", "<file>", "Pairwise distance matrix input file (skips distance computation)"); struct arg_file *opt_distmat_out = arg_file0(NULL, "distmat-out", "<file>", "Pairwise distance matrix output file"); struct arg_file *opt_guidetree_in = arg_file0(NULL, "guidetree-in", "<file>", "Guide tree input file (skips distance computation and guide-tree clustering step)"); struct arg_file *opt_guidetree_out = arg_file0(NULL, "guidetree-out", "<file>", "Guide tree output file"); /* AW: mbed is default since at least R253 struct arg_lit *opt_mbed = arg_lit0(NULL, "mbed", "Fast, Mbed-like clustering for guide-tree calculation"); struct arg_lit *opt_mbed_iter = arg_lit0(NULL, "mbed-iter", "Use Mbed-like clustering also during iteration"); */ /* Note: might be better to use arg_str (mbed=YES/NO) but I don't want to introduce an '=' into pipeline, FS, r250 -> */ struct arg_lit *opt_full = arg_lit0(NULL, "full", "Use full distance matrix for guide-tree calculation (might be slow; mBed is default)"); struct arg_lit *opt_full_iter = arg_lit0(NULL, "full-iter", "Use full distance matrix for guide-tree calculation during iteration (might be slowish; mBed is default)"); struct arg_str *opt_clustering = arg_str0("c", "clustering", "{UPGMA}", "Clustering method for guide tree"); struct arg_rem *rem_aln_output = arg_rem(NULL, "\nAlignment Output:"); struct arg_file *opt_outfile = arg_file0("o", "out,outfile", "{file,-}", "Multiple sequence alignment output file (default: stdout)"); struct arg_str *opt_outfmt = arg_str0(NULL, "outfmt", "{a2m=fa[sta],clu[stal],msf,phy[lip],selex,st[ockholm],vie[nna]}", "MSA output file format (default: fasta)"); struct arg_rem *rem_iteration = arg_rem(NULL, "\nIteration:"); struct arg_str *opt_num_iterations = arg_str0(NULL, "iterations,iter", /* FIXME "{<n>,auto}", "Number of combined guide-tree/HMM iterations"); */ "<n>", "Number of (combined guide-tree/HMM) iterations"); struct arg_int *opt_max_guidetree_iterations = arg_int0(NULL, "max-guidetree-iterations", "<n>", "Maximum number guidetree iterations"); struct arg_int *opt_max_hmm_iterations = arg_int0(NULL, "max-hmm-iterations", "<n>", "Maximum number of HMM iterations"); struct arg_rem *rem_limits = arg_rem(NULL, "\nLimits (will exit early, if exceeded):"); struct arg_int *opt_max_seq = arg_int0(NULL, "maxnumseq", "<n>", "Maximum allowed number of sequences"); struct arg_int *opt_max_seqlen = arg_int0(NULL, "maxseqlen", "<l>", "Maximum allowed sequence length"); struct arg_rem *rem_misc = arg_rem(NULL, "\nMiscellaneous:"); struct arg_lit *opt_autooptions = arg_lit0(NULL, "auto", "Set options automatically (might overwrite some of your options)"); struct arg_int *opt_threads = arg_int0(NULL, "threads", "<n>", "Number of processors to use"); struct arg_file *opt_logfile = arg_file0("l", "log", "<file>", "Log all non-essential output to this file"); struct arg_lit *opt_help = arg_lit0("h", "help", "Print this help and exit"); struct arg_lit *opt_version = arg_lit0(NULL, "version", "Print version information and exit"); struct arg_lit *opt_long_version = arg_lit0(NULL, "long-version", "Print long version information and exit"); struct arg_lit *opt_verbose = arg_litn("v", "verbose", 0, 3, "Verbose output (increases if given multiple times)"); struct arg_lit *opt_force = arg_lit0(NULL, "force", "Force file overwriting"); struct arg_int *opt_macram = arg_int0(NULL, "MAC-RAM", "<n>", /* keep this quiet for the moment, FS r240 -> */ NULL/*"maximum amount of RAM to use for MAC algorithm (in MB)"*/); struct arg_end *opt_end = arg_end(10); /* maximum number of errors * to store */ void *argtable[] = {rem_seq_input, opt_seqin, opt_hmm_in, opt_dealign, opt_profile1, opt_profile2, opt_seqtype, /* opt_force_protein, */ opt_infmt, rem_guidetree, #if 0 /* no other options then default available or not implemented */ opt_pairdist, #endif opt_distmat_in, opt_distmat_out, opt_guidetree_in, opt_guidetree_out, opt_full, /* FS, r250 -> */ opt_full_iter, /* FS, r250 -> */ #if 0 /* no other options then default available */ opt_clustering, #endif rem_aln_output, opt_outfile, opt_outfmt, rem_iteration, opt_num_iterations, opt_max_guidetree_iterations, opt_max_hmm_iterations, rem_limits, opt_max_seq, opt_max_seqlen, rem_misc, opt_autooptions, opt_threads, opt_logfile, opt_help, opt_verbose, opt_version, opt_long_version, opt_force, opt_macram, /* FS, r240 -> r241 */ opt_end}; int nerrors; /* Verify the argtable[] entries were allocated sucessfully */ if (arg_nullcheck(argtable)) { Log(&rLog, LOG_FATAL, "insufficient memory (for argtable allocation)"); } /* Parse the command line as defined by argtable[] */ nerrors = arg_parse(argc, argv, argtable); /* Special case: '--help' takes precedence over error reporting */ if (opt_help->count > 0) { printf("%s - %s (%s)\n", PACKAGE_NAME, PACKAGE_VERSION, PACKAGE_CODENAME); printf("\n"); printf("If you like Clustal-Omega please cite:\n%s\n", CITATION); printf("If you don't like Clustal-Omega, please let us know why (and cite us anyway).\n"); /* printf("You can contact reach us under %s\n", PACKAGE_BUGREPORT); */ printf("\n"); printf("Check http://www.clustal.org for more information and updates.\n"); printf("\n"); printf("Usage: %s", basename(argv[0])); arg_print_syntax(stdout,argtable, "\n"); printf("\n"); printf("A typical invocation would be: %s -i my-in-seqs.fa -o my-out-seqs.fa -v\n", basename(argv[0])); printf("See below for a list of all options.\n"); arg_print_glossary(stdout, argtable, " %-25s %s\n"); arg_freetable(argtable, sizeof(argtable)/sizeof(argtable[0])); exit(EXIT_SUCCESS); } /* Special case: '--version' takes precedence over error reporting */ if (opt_version->count > 0) { printf("%s\n", PACKAGE_VERSION); arg_freetable(argtable,sizeof(argtable)/sizeof(argtable[0])); exit(EXIT_SUCCESS); } /* Special case: '--long-version' takes precedence over error reporting */ if (opt_long_version->count > 0) { char zcLongVersion[1024]; PrintLongVersion(zcLongVersion, sizeof(zcLongVersion)); printf("%s\n", zcLongVersion); arg_freetable(argtable,sizeof(argtable)/sizeof(argtable[0])); exit(EXIT_SUCCESS); } /* If the parser returned any errors then display them and exit */ if (nerrors > 0) { /* Display the error details contained in the arg_end struct.*/ arg_print_errors(stdout, opt_end, PACKAGE); fprintf(stderr, "For more information try: %s --help\n", argv[0]); arg_freetable(argtable,sizeof(argtable)/sizeof(argtable[0])); exit(EXIT_FAILURE); } /* ------------------------------------------------------------ * * Command line successfully parsed. Now transfer values to * user_opts. While doing so, make sure that given input files * exist and given output files are writable do not exist, or if * they do, should be overwritten. * * No logic checks here! They are done in a different function * * ------------------------------------------------------------*/ /* not part of user_opts because it declared in src/util.h */ if (0 == opt_verbose->count) { rLog.iLogLevelEnabled = LOG_WARN; } else if (1 == opt_verbose->count) { rLog.iLogLevelEnabled = LOG_INFO; } else if (2 == opt_verbose->count) { rLog.iLogLevelEnabled = LOG_VERBOSE; } else if (3 == opt_verbose->count) { rLog.iLogLevelEnabled = LOG_DEBUG; } user_opts->aln_opts.bAutoOptions = opt_autooptions->count; user_opts->bDealignInputSeqs = opt_dealign->count; /* NOTE: full distance matrix used to be default - there was --mbed flag but no --full flag after r250 decided that mBed should be default - now need --full flag to turn off mBed. wanted to retain mBed Boolean, so simply added --full flag. if both flags set (erroneously) want --mbed to overwrite --full, therefore do --full 1st, the --mbed, FS, r250 */ if (opt_full->count){ user_opts->aln_opts.bUseMbed = FALSE; } if (opt_full_iter->count){ user_opts->aln_opts.bUseMbedForIteration = FALSE; } user_opts->bForceFileOverwrite = opt_force->count; /* log-file */ if (opt_logfile->count > 0) { user_opts->pcLogFile = CkStrdup(opt_logfile->filename[0]); /* warn if already exists or not writable */ if (FileExists(user_opts->pcLogFile) && ! user_opts->bForceFileOverwrite) { Log(&rLog, LOG_FATAL, "%s '%s'. %s", "Cowardly refusing to overwrite already existing file", user_opts->pcLogFile, "Use --force to force overwriting."); } if (! FileIsWritable(user_opts->pcLogFile)) { Log(&rLog, LOG_FATAL, "Sorry, I do not have permission to write to file '%s'.", user_opts->pcLogFile); } } /* normal sequence input (no profile) */ if (opt_seqin->count > 0) { user_opts->pcSeqInfile = CkStrdup(opt_seqin->filename[0]); } /* Input limitations */ /* maximum number of sequences */ if (opt_max_seq->count > 0) { user_opts->iMaxNumSeq = opt_max_seq->ival[0]; } /* maximum sequence length */ if (opt_max_seqlen->count > 0) { user_opts->iMaxSeqLen = opt_max_seqlen->ival[0]; } /* Output format */ if (opt_infmt->count > 0) { /* avoid gcc warning about discarded qualifier */ char *tmp = (char *)opt_infmt->sval[0]; user_opts->iSeqInFormat = String2SeqfileFormat(tmp); } else { user_opts->iSeqInFormat = SQFILE_UNKNOWN; } /* Sequence type */ if (opt_seqtype->count > 0) { if (STR_NC_EQ(opt_seqtype->sval[0], "protein")) { user_opts->iSeqType = SEQTYPE_PROTEIN; } else if (STR_NC_EQ(opt_seqtype->sval[0], "rna")) { user_opts->iSeqType = SEQTYPE_RNA; } else if (STR_NC_EQ(opt_seqtype->sval[0], "dna")) { user_opts->iSeqType = SEQTYPE_DNA; } else { Log(&rLog, LOG_FATAL, "Unknown sequence type '%s'", opt_seqtype->sval[0]); } } /* if (opt_force_protein->count > 0) { user_opts->iSeqType = SEQTYPE_PROTEIN; } */ /* Profile input */ if (opt_profile1->count > 0) { user_opts->pcProfile1Infile = CkStrdup(opt_profile1->filename[0]); if (! FileExists(user_opts->pcProfile1Infile)) { Log(&rLog, LOG_FATAL, "File '%s' does not exist.", user_opts->pcProfile1Infile); } } if (opt_profile2->count > 0) { user_opts->pcProfile2Infile = CkStrdup(opt_profile2->filename[0]); if (! FileExists(user_opts->pcProfile2Infile)) { Log(&rLog, LOG_FATAL, "File '%s' does not exist.", user_opts->pcProfile2Infile); } } /* HMM input */ user_opts->aln_opts.iHMMInputFiles = 0; user_opts->aln_opts.ppcHMMInput = NULL; if (opt_hmm_in->count>0) { int iAux; user_opts->aln_opts.iHMMInputFiles = opt_hmm_in->count; user_opts->aln_opts.ppcHMMInput = (char **) CKMALLOC( user_opts->aln_opts.iHMMInputFiles * sizeof(char*)); for (iAux=0; iAux<opt_hmm_in->count; iAux++) { user_opts->aln_opts.ppcHMMInput[iAux] = CkStrdup(opt_hmm_in->filename[iAux]); if (! FileExists(user_opts->aln_opts.ppcHMMInput[iAux])) { Log(&rLog, LOG_FATAL, "File '%s' does not exist.", user_opts->aln_opts.ppcHMMInput[iAux]); } } } /* Pair distance method */ if (opt_pairdist->count > 0) { if (STR_NC_EQ(opt_pairdist->sval[0], "ktuple")) { user_opts->aln_opts.iPairDistType = PAIRDIST_KTUPLE; } else { Log(&rLog, LOG_FATAL, "Unknown pairdist method '%s'", opt_pairdist->sval[0]); } } /* Distance matrix input */ if (opt_distmat_in->count > 0) { user_opts->aln_opts.pcDistmatInfile = CkStrdup(opt_distmat_in->filename[0]); if (! FileExists(user_opts->aln_opts.pcDistmatInfile)) { Log(&rLog, LOG_FATAL, "File '%s' does not exist.", user_opts->aln_opts.pcDistmatInfile); } } /* Distance matrix output */ if (opt_distmat_out->count > 0) { user_opts->aln_opts.pcDistmatOutfile = CkStrdup(opt_distmat_out->filename[0]); /* warn if already exists or not writable */ if (FileExists(user_opts->aln_opts.pcDistmatOutfile) && ! user_opts->bForceFileOverwrite) { Log(&rLog, LOG_FATAL, "%s '%s'. %s", "Cowardly refusing to overwrite already existing file", user_opts->aln_opts.pcDistmatOutfile, "Use --force to force overwriting."); } if (! FileIsWritable(user_opts->aln_opts.pcDistmatOutfile)) { Log(&rLog, LOG_FATAL, "Sorry, I do not have permission to write to file '%s'.", user_opts->aln_opts.pcDistmatOutfile); } } /* Clustering * */ if (opt_clustering->count > 0) { if (STR_NC_EQ(opt_clustering->sval[0], "upgma")) { user_opts->aln_opts.iClusteringType = CLUSTERING_UPGMA; } else { Log(&rLog, LOG_FATAL, "Unknown guide-tree clustering method '%s'", opt_clustering->sval[0]); } } /* Guidetree input */ if (opt_guidetree_in->count > 0) { user_opts->aln_opts.pcGuidetreeInfile = CkStrdup(opt_guidetree_in->filename[0]); if (! FileExists(user_opts->aln_opts.pcGuidetreeInfile)) { Log(&rLog, LOG_FATAL, "File '%s' does not exist.", user_opts->aln_opts.pcGuidetreeInfile); } } /* Guidetree output */ if (opt_guidetree_out->count > 0) { user_opts->aln_opts.pcGuidetreeOutfile = CkStrdup(opt_guidetree_out->filename[0]); /* warn if already exists or not writable */ if (FileExists(user_opts->aln_opts.pcGuidetreeOutfile) && ! user_opts->bForceFileOverwrite) { Log(&rLog, LOG_FATAL, "%s '%s'. %s", "Cowardly refusing to overwrite already existing file", user_opts->aln_opts.pcGuidetreeOutfile, "Use --force to force overwriting."); } if (! FileIsWritable(user_opts->aln_opts.pcGuidetreeOutfile)) { Log(&rLog, LOG_FATAL, "Sorry, I do not have permission to write to file '%s'.", user_opts->aln_opts.pcGuidetreeOutfile); } } /* max guidetree iterations */ if (opt_max_guidetree_iterations->count > 0) { user_opts->aln_opts.iMaxGuidetreeIterations = opt_max_guidetree_iterations->ival[0]; } /* max guidetree iterations */ if (opt_max_hmm_iterations->count > 0) { user_opts->aln_opts.iMaxHMMIterations = opt_max_hmm_iterations->ival[0]; } /* number of iterations */ if (opt_num_iterations->count > 0) { if (STR_NC_EQ(opt_num_iterations->sval[0], "auto")) { Log(&rLog, LOG_FATAL, "Automatic iteration not supported at the moment."); user_opts->aln_opts.bIterationsAuto = TRUE; } else { int iAux; user_opts->aln_opts.bIterationsAuto = FALSE; for (iAux=0; iAux<(int)strlen(opt_num_iterations->sval[0]); iAux++) { if (! isdigit(opt_num_iterations->sval[0][iAux])) { Log(&rLog, LOG_FATAL, "Couldn't iteration parameter: %s", opt_num_iterations->sval[0]); } } user_opts->aln_opts.iNumIterations = atoi(opt_num_iterations->sval[0]); } } /* Alignment output */ if (opt_outfile->count > 0) { user_opts->pcAlnOutfile = CkStrdup(opt_outfile->filename[0]); /* warn if already exists or not writable */ if (FileExists(user_opts->pcAlnOutfile) && ! user_opts->bForceFileOverwrite) { Log(&rLog, LOG_FATAL, "%s '%s'. %s", "Cowardly refusing to overwrite already existing file", user_opts->pcAlnOutfile, "Use --force to force overwriting."); } if (! FileIsWritable(user_opts->pcAlnOutfile)) { Log(&rLog, LOG_FATAL, "Sorry, I do not have permission to write to file '%s'.", user_opts->pcAlnOutfile); } } /* Output format */ if (opt_outfmt->count > 0) { /* avoid gcc warning about discarded qualifier */ char *tmp = (char *)opt_outfmt->sval[0]; user_opts->iAlnOutFormat = String2SeqfileFormat(tmp); if (SQFILE_UNKNOWN == user_opts->iAlnOutFormat) { Log(&rLog, LOG_FATAL, "Unknown output format '%s'", opt_outfmt->sval[0]); } } /* Number of threads */ #ifdef HAVE_OPENMP if (opt_threads->count > 0) { if (opt_threads->ival[0] <= 0) { Log(&rLog, LOG_FATAL, "Changing number of threads to %d doesn't make sense.", opt_threads->ival[0]); } user_opts->iThreads = opt_threads->ival[0]; } #else if (opt_threads->count > 0) { if (opt_threads->ival[0] > 1) { Log(&rLog, LOG_FATAL, "Cannot change number of threads to %d. %s was build without OpenMP support.", opt_threads->ival[0], PACKAGE_NAME); } } #endif /* max MAC RAM (maximum amount of RAM set aside for MAC algorithm) */ if (opt_macram->count > 0) { /* FS, r240 -> r241 */ user_opts->aln_opts.rHhalignPara.iMacRamMB = opt_macram->ival[0]; } arg_freetable(argtable,sizeof(argtable)/sizeof(argtable[0])); UserOptsLogicCheck(user_opts); return; }