Exemplo n.º 1
0
void DNASequenceGenerator::evaluateBaseContent(const MultipleSequenceAlignment& ma, QMap<char, qreal>& result) {
    QList< QMap<char, qreal> > rowsContents;
    foreach(const MultipleSequenceAlignmentRow& row, ma->getMsaRows()) {
        QMap<char, qreal> rowContent;
        evaluate(row->getData(), rowContent);
        rowsContents.append(rowContent);
    }

    QListIterator< QMap<char, qreal> > listIter(rowsContents);
    while (listIter.hasNext()) {
        const QMap<char, qreal>& cm = listIter.next();
        QMapIterator<char, qreal> mapIter(cm);
        while (mapIter.hasNext()) {
            mapIter.next();
            char ch = mapIter.key();
            qreal freq = mapIter.value();
            if (!result.keys().contains(ch)) {
                result.insertMulti(ch, freq);
            } else {
                result[ch] += freq;
            }
        }
    }

    int rowsNum = ma->getNumRows();
    QMutableMapIterator<char, qreal> i(result);
    while (i.hasNext()) {
        i.next();
        i.value() /= rowsNum;
    }
}
Exemplo n.º 2
0
void convertMAlignment2SecVect(SeqVect& sv, const MultipleSequenceAlignment& ma, bool fixAlpha) {
    sv.Clear();

    MuscleContext *ctx = getMuscleContext();
    ctx->fillUidsVectors(ma->getNumRows());

    unsigned i=0;
    unsigned seq_count = 0;
    foreach(const MultipleSequenceAlignmentRow& row, ma->getMsaRows()) {
        Seq *ptrSeq = new Seq();
        QByteArray name =  row->getName().toLocal8Bit();
        ptrSeq->FromString(row->getCore().constData(), name.constData());
        //stripping gaps, original Seq::StripGaps fails on MSVC9
        Seq::iterator newEnd = std::remove(ptrSeq->begin(), ptrSeq->end(), U2Msa::GAP_CHAR);
        ptrSeq->erase(newEnd, ptrSeq->end());
        if (ptrSeq->Length()!=0) {
            ctx->tmp_uIds[seq_count] = ctx->input_uIds[i];
            sv.push_back(ptrSeq);
            seq_count++; 
        }
        i++;
    }
    if (fixAlpha) {
        sv.FixAlpha();
    }
}
Exemplo n.º 3
0
MSADistanceMatrix::MSADistanceMatrix(const MultipleSequenceAlignment& ma, bool _excludeGaps, bool _usePercents)
: usePercents(_usePercents), excludeGaps(_excludeGaps), alignmentLength(ma->getLength()) {
    int nSeq = ma->getNumRows();
    table.reserve(nSeq);
    for (int i = 0; i < nSeq; i++) {
        table.append(QVarLengthArray<int>(i + 1));
        memset(table[i].data(), 0, (i + 1) * sizeof(int));
        seqsUngappedLenghts.append(ma->getMsaRow(i)->getUngappedLength());
    }
}
Exemplo n.º 4
0
void convertMAlignment2MSA(MSA& muscleMSA, const MultipleSequenceAlignment& ma, bool fixAlpha) {    
    MuscleContext *ctx = getMuscleContext();
    ctx->fillUidsVectors(ma->getNumRows());
    for (int i=0, n = ma->getNumRows(); i<n; i++) {
        const MultipleSequenceAlignmentRow row = ma->getMsaRow(i);
        
        int coreLen = row->getCoreLength();
        int maLen = ma->getLength();
        char* seq  = new char[maLen + 1];
        memcpy(seq, row->getCore().constData(), coreLen);
        memset(seq + coreLen, '-', maLen - coreLen + 1);
        seq[maLen] = 0;

        char* name = new char[row->getName().length() + 1];
        memcpy(name, row->getName().toLocal8Bit().constData(), row->getName().length());
        name[row->getName().length()] = '\0';
        
        muscleMSA.AppendSeq(seq, maLen, name);
        ctx->tmp_uIds[i] = ctx->input_uIds[i];
    }
    if (fixAlpha) {
        muscleMSA.FixAlpha();
    }
}
Exemplo n.º 5
0
MuscleTask::MuscleTask(const MultipleSequenceAlignment &ma, const MuscleTaskSettings& _config)
    : Task(tr("MUSCLE alignment"), TaskFlags_FOSCOE | TaskFlag_MinimizeSubtaskErrorText),
      config(_config),
      inputMA(ma->getExplicitCopy())
{
    GCOUNTER( cvar, tvar, "MuscleTask" );
    config.nThreads = (config.nThreads == 0 ? AppContext::getAppSettings()->getAppResourcePool()->getIdealThreadCount() : config.nThreads);
    SAFE_POINT_EXT(config.nThreads > 0,
        setError("Incorrect number of max parallel subtasks"), );
    setMaxParallelSubtasks(config.nThreads);

    algoLog.info(tr("MUSCLE alignment started"));

    ctx = new MuscleContext(config.nThreads);
    ctx->params.g_bStable = config.stableMode;
    ctx->params.g_uMaxIters = config.maxIterations;
    ctx->params.g_ulMaxSecs = config.maxSecs;
    parallelSubTask = NULL;

    //todo: make more precise estimation, use config.op mode
    int aliLen = ma->getLength();
    int nSeq = ma->getNumRows();
    int memUseMB = qint64(aliLen) * qint64(nSeq) * 200 / (1024 * 1024); //200x per char in alignment
    TaskResourceUsage tru(RESOURCE_MEMORY, memUseMB);

    QString inputAlName = inputMA->getName();
    resultMA->setName(inputAlName);
    resultSubMA->setName(inputAlName);

    inputSubMA = inputMA->getExplicitCopy();
    if (config.alignRegion && config.regionToAlign.length != inputMA->getLength()) {
        SAFE_POINT_EXT(config.regionToAlign.length > 0,
            setError(tr("Incorrect region to align")), );
        inputSubMA = inputMA->mid(config.regionToAlign.startPos, config.regionToAlign.length);
        CHECK_EXT(inputSubMA != MultipleSequenceAlignment(), setError(tr("Stopping MUSCLE task, because of error in MultipleSequenceAlignment::mid function")), );
    }
Exemplo n.º 6
0
void KalignAdapter::alignUnsafe(const MultipleSequenceAlignment& ma, MultipleSequenceAlignment& res, TaskStateInfo& ti) {
    ti.progress = 0;
    int* tree = 0;
    quint32 a, b, c;

    struct alignment* aln = 0;
    struct parameters* param = 0;
    struct aln_tree_node* tree2 = 0;

    param = (parameters*)malloc(sizeof(struct parameters));

    param =  interface(param,0,0);

    kalign_context *ctx = get_kalign_context();
    unsigned int &numseq = ctx->numseq;
    unsigned int &numprofiles = ctx->numprofiles;

    if (ma->getNumRows() < 2){
        if (!numseq){
            k_printf("No sequences found.\n\n");
        } else {
            k_printf("Only one sequence found.\n\n");
        }
        free_param(param);
        throw KalignException("Can't align less then 2 sequences");
    }

    if(ctx->gpo != -1) {
        param->gpo = ctx->gpo;
    }
    if(ctx->gpe != -1) {
        param->gpe = ctx->gpe;
    }
    if(ctx->tgpe != -1) {
        param->tgpe = ctx->tgpe;
    }
    if(ctx->secret != -1) {
        param->secret = ctx->secret;
    }

    /************************************************************************/
    /* Convert MA to aln                                                    */
    /************************************************************************/
    k_printf("Prepare data");
    numseq = ma->getNumRows();
    numprofiles = (numseq << 1) - 1;
    aln = aln_alloc(aln);
    for(quint32 i = 0 ; i < numseq; i++) {
        const MultipleSequenceAlignmentRow row= ma->getMsaRow(i);
        aln->sl[i] = row->getUngappedLength();
        aln->lsn[i] = row->getName().length();
    }

    for (quint32 i = 0; i < numseq;i++) {
        try {
            aln->s[i] = (int*) malloc(sizeof(int)*(aln->sl[i]+1));
            checkAllocatedMemory(aln->s[i]);
            aln->seq[i] = (char*)malloc(sizeof(char)*(aln->sl[i]+1));
            checkAllocatedMemory(aln->seq[i]);
            aln->sn[i] = (char*)malloc(sizeof(char)*(aln->lsn[i]+1));
            checkAllocatedMemory(aln->sn[i]);
        } catch (...) {
            cleanupMemory(NULL, numseq, NULL, aln, param);
            throw;
        }
    }

    int aacode[26] = {0,1,2,3,4,5,6,7,8,-1,9,10,11,12,23,13,14,15,16,17,17,18,19,20,21,22};
    for(quint32 i = 0; i < numseq; i++) {
        const MultipleSequenceAlignmentRow row= ma->getMsaRow(i);
        qstrncpy(aln->sn[i], row->getName().toLatin1(), row->getName().length() + 1); //+1 to include '\0'
        QString gapless = QString(row->getCore()).remove('-');
        qstrncpy(aln->seq[i], gapless.toLatin1(), gapless.length() + 1);	//+1 to include '\0'
        for (quint32 j = 0; j < aln->sl[i]; j++) {
            if (isalpha((int)aln->seq[i][j])){
                aln->s[i][j] = aacode[toupper(aln->seq[i][j])-65];
            } else {
                aln->s[i][j] = -1;
            }
        }
        aln->s[i][aln->sl[i]] = 0;
        aln->seq[i][aln->sl[i]] = 0;
        aln->sn[i][aln->lsn[i]] = 0;
    }

    /*for(int i=0;i<numseq;i++) {
        for(int j=0;j<aln->sl[i];j++)
            printf("%d  ", aln->s[i][j]);
    }*/

    //aln_dump(aln);

    //aln = detect_and_read_sequences(aln,param);

    if(param->ntree > (int)numseq){
        param->ntree = (int)numseq;
    }

    //DETECT DNA
    if(param->dna == -1){
        for (quint32 i = 0; i < numseq;i++){
            param->dna = byg_detect(aln->s[i],aln->sl[i]);
            if(param->dna){
                break;
            }
        }
    }
    //param->dna = 0;
    //k_printf("DNA:%d\n",param->dna);
    //exit(0);

    if(param->dna == 1){
        //brief sanity check...
        for (quint32 i = 0; i < numseq;i++){
            if(aln->sl[i] < 6){
                //k_printf("Dna/Rna alignments are only supported for sequences longer than 6.");
                free(param);
                free_aln(aln);
                throw KalignException("Dna/Rna alignments are only supported for sequences longer than 6.");
            }
        }
        aln =  make_dna(aln);
    }

    //int j;

    //fast distance calculation;
    float** submatrix = 0;
    submatrix = read_matrix(submatrix,param); // sets gap penalties as well.....

    //if(byg_start(param->alignment_type,"profPROFprofilePROFILE") != -1){
    //	profile_alignment_main(aln,param,submatrix);
    //}

    float** dm = 0;
    if(param->ntree > 1){
        //if(byg_start(param->distance,"pairclustalPAIRCLUSTAL") != -1){
        //	if(byg_start(param->tree,"njNJ") != -1){
        //		dm = protein_pairwise_alignment_distance(aln,dm,param,submatrix,1);
        //	}else{
        //		dm = protein_pairwise_alignment_distance(aln,dm,param,submatrix,0);
        //	}
        //}else if(byg_start("wu",param->alignment_type) != -1){
        //	dm =  protein_wu_distance2(aln,dm,param);
        //	//	param->feature_type = "wumanber";
        if(param->dna == 1){
        //	if(byg_start(param->tree,"njNJ") != -1){
        //		dm =  dna_distance(aln,dm,param,1);
        //	}else{
                dm =  dna_distance(aln,dm,param,0);
        //	}
        }else{
            //if(byg_start(param->tree,"njNJ") != -1){
            //	dm =  protein_wu_distance(aln,dm,param,1);
            //}else{
            try {
                dm =  protein_wu_distance(aln,dm,param,0);
            } catch (const KalignException &) {
                cleanupMemory(submatrix, numseq, dm, aln, param);
                throw;
            }
            //}
        }
        if(check_task_canceled(ctx)) {
            cleanupMemory(submatrix, numseq, dm, aln, param);
            throwCancellingException();
        }
        /*int j;
        for (int i = 0; i< numseq;i++){
        for (j = 0; j< numseq;j++){
        k_printf("%f	",dm[i][j]);
        }
        k_printf("\n");
        }*/

        //if(byg_start(param->tree,"njNJ") != -1){
        //	tree2 = real_nj(dm,param->ntree);
        //}else{
            tree2 = real_upgma(dm,param->ntree);
        //}
        //if(param->print_tree){
        //	print_tree(tree2,aln,param->print_tree);
        //}
    }

    tree = (int*) malloc(sizeof(int)*(numseq*3+1));
    for (quint32 i = 1; i < (numseq*3)+1;i++){
        tree[i] = 0;
    }
    tree[0] = 1;

    if(param->ntree < 2){
        tree[0] = 0;
        tree[1] = 1;

        c = numseq;
        tree[2] = c;
        a = 2;
        for (quint32 i = 3; i < (numseq-1)*3;i+=3){
            tree[i] = c;
            tree[i+1] = a;
            c++;
            tree[i+2] = c;
            a++;
        }
    }else if(param->ntree > 2){
        ntreeify(tree2,param->ntree);
    }else{
        tree = readtree(tree2,tree);
        for (quint32 i = 0; i < (numseq*3);i++){
            tree[i] = tree[i+1];
        }
        free(tree2->links);
        free(tree2->internal_lables);
        free(tree2);
    }

    //get matrices...
    //struct feature_matrix* fm = 0;

    //struct ntree_data* ntree_data = 0;

    int** map = 0;
    //if(param->ntree > 2){
    //	ntree_data = (struct ntree_data*)malloc(sizeof(struct ntree_data));
    //	ntree_data->realtree = tree2;
    //	ntree_data->aln = aln;
    //	ntree_data->profile = 0;
    //	ntree_data->map = 0;
    //	ntree_data->ntree = param->ntree;
    //	ntree_data->submatrix = submatrix;
    //	ntree_data->tree = tree;

    //	ntree_data = ntree_alignment(ntree_data);
    //	map = ntree_data->map;
    //	tree = ntree_data->tree;
    //	for (int i = 0; i < (numseq*3);i++){
    //		tree[i] = tree[i+1];
    //	}
    //	free(ntree_data);
    //}else if (param->feature_type){
    //	fm = get_feature_matrix(fm,aln,param);
    //	if(!fm){
    //		for (int i = 32;i--;){
    //			free(submatrix[i]);
    //		}
    //		free(submatrix);
    //		free_param(param);
    //		free(map);
    //		free(tree);
    //		throw KalignException("getting feature matrix error");
    //	}

    //	map = feature_hirschberg_alignment(aln,tree,submatrix,map,fm);
    //	//exit(0);
    //	//map =  feature_alignment(aln,tree,submatrix, map,fm);

    //}else if (byg_start("pairwise",param->alignment_type) != -1){
    //	if(param->dna == 1){
    //		map = dna_alignment_against_a(aln,tree,submatrix, map,param->gap_inc);
    //	}else{
    //		map = hirschberg_alignment_against_a(aln,tree,submatrix, map,param->smooth_window,param->gap_inc);
    //	}
    //	//map =  default_alignment(aln,tree,submatrix, map);
    //}else if (byg_start("fast",param->alignment_type) != -1){
    //	map =  default_alignment(aln,tree,submatrix, map);
    if(param->dna == 1){
            map =  dna_alignment(aln,tree,submatrix, map,param->gap_inc);

    //	/*}else if (byg_start("test",param->alignment_type) != -1){
    //	map =  test_alignment(aln,tree,submatrix, map,param->internal_gap_weight,param->smooth_window,param->gap_inc);
    //	}else if (param->aa){
    //	map =  aa_alignment(aln,tree,submatrix, map,param->aa);
    //	}else if (param->alter_gaps){
    //	map = alter_gaps_alignment(aln,tree,submatrix,map,param->alter_gaps,param->alter_range,param->alter_weight);
    //	}else if (byg_start("altergaps",param->alignment_type) != -1){
    //	map = alter_gaps_alignment(aln,tree,submatrix,map,param->alter_gaps,param->alter_range,param->alter_weight);
    //	}else if(byg_start("simple",param->alignment_type) != -1){
    //	map =  simple_hirschberg_alignment(aln,tree,submatrix, map);*/
    //}else if(byg_start("advanced",param->alignment_type) != -1){
    //	map =  advanced_hirschberg_alignment(aln,tree,submatrix, map,param->smooth_window,param->gap_inc,param->internal_gap_weight);
    }else{
        map =  hirschberg_alignment(aln,tree,submatrix, map,param->smooth_window,param->gap_inc);
    }
    if (map == NULL) {
        throw KalignException("Failed to build alignment.");
    }
    if(check_task_canceled(ctx)) {
        free_param(param);
        free_aln(aln);
        free(map);
        free(tree);
        throwCancellingException();
    }

    //clear up sequence array to be reused as gap array....
    int *p = 0;
    for (quint32 i = 0; i < numseq;i++){
        p = aln->s[i];
        for (a = 0; a < aln->sl[i];a++){
            p[a] = 0;
        }
    }
    //clear up

    for (quint32 i = 0; i < (numseq-1)*3;i +=3){
        a = tree[i];
        b = tree[i+1];
        aln = make_seq(aln,a,b,map[tree[i+2]]);
    }

    //for (int i = 0; i < numseq;i++){
    //	k_printf("%s	%d\n",aln->sn[i],aln->nsip[i]);
    //}


    for (quint32 i = 0; i < numseq;i++){
        aln->nsip[i] = 0;
    }

    aln =  sort_sequences(aln,tree,param->sort);

    //for (int i = 0; i < numseq;i++){
    //	k_printf("%d	%d	%d\n",i,aln->nsip[i],aln->sip[i][0]);
    //}

    /************************************************************************/
    /* Convert aln to MA                                                    */
    /************************************************************************/
    res->setAlphabet(ma->getAlphabet());
    for (quint32 i = 0; i < numseq;i++){
        int f = aln->nsip[i];
        QString seq;
        for(quint32 j=0;j<aln->sl[f];j++) {
            seq += QString(aln->s[f][j],'-') + aln->seq[f][j];
        }
        seq += QString(aln->s[f][aln->sl[f]],'-');
        res->addRow(QString(aln->sn[f]), seq.toLatin1());
    }

    //output(aln,param);
    /*	if(!param->format){
    fasta_output(aln,param->outfile);
    }else{
    if (byg_start("msf",param->format) != -1){
    msf_output(aln,param->outfile);
    }else if (byg_start("clustal",param->format) != -1){
    clustal_output(aln,param->outfile);
    }else if (byg_start("macsim",param->format) != -1){
    macsim_output(aln,param->outfile,param->infile[0]);
    }
    }
    */
    free_param(param);
    free_aln(aln);
    free(map);
    free(tree);
    //KalignContext* ctx = getKalignContext();
}