Esempio n. 1
0
void convertMSA2MAlignment(MSA& msa, const DNAAlphabet* al, MultipleSequenceAlignment& res) {
    assert(res->isEmpty());
    MuscleContext *ctx = getMuscleContext();
    res->setAlphabet(al);
    ctx->output_uIds.clear();

    for(int i=0, n = msa.GetSeqCount(); i < n; i++) {
        QString name = msa.GetSeqName(i);
        QByteArray seq;
        seq.reserve(msa.GetColCount());
        for (int j = 0, m = msa.GetColCount(); j < m ; j++) {
            char c = msa.GetChar(i, j);
            seq.append(c);
        }
        ctx->output_uIds.append(ctx->tmp_uIds[msa.GetSeqId(i)]);
        res->addRow(name, seq);
    }
}
Esempio n. 2
0
void KalignAdapter::alignUnsafe(const MultipleSequenceAlignment& ma, MultipleSequenceAlignment& res, TaskStateInfo& ti) {
    ti.progress = 0;
    int* tree = 0;
    quint32 a, b, c;

    struct alignment* aln = 0;
    struct parameters* param = 0;
    struct aln_tree_node* tree2 = 0;

    param = (parameters*)malloc(sizeof(struct parameters));

    param =  interface(param,0,0);

    kalign_context *ctx = get_kalign_context();
    unsigned int &numseq = ctx->numseq;
    unsigned int &numprofiles = ctx->numprofiles;

    if (ma->getNumRows() < 2){
        if (!numseq){
            k_printf("No sequences found.\n\n");
        } else {
            k_printf("Only one sequence found.\n\n");
        }
        free_param(param);
        throw KalignException("Can't align less then 2 sequences");
    }

    if(ctx->gpo != -1) {
        param->gpo = ctx->gpo;
    }
    if(ctx->gpe != -1) {
        param->gpe = ctx->gpe;
    }
    if(ctx->tgpe != -1) {
        param->tgpe = ctx->tgpe;
    }
    if(ctx->secret != -1) {
        param->secret = ctx->secret;
    }

    /************************************************************************/
    /* Convert MA to aln                                                    */
    /************************************************************************/
    k_printf("Prepare data");
    numseq = ma->getNumRows();
    numprofiles = (numseq << 1) - 1;
    aln = aln_alloc(aln);
    for(quint32 i = 0 ; i < numseq; i++) {
        const MultipleSequenceAlignmentRow row= ma->getMsaRow(i);
        aln->sl[i] = row->getUngappedLength();
        aln->lsn[i] = row->getName().length();
    }

    for (quint32 i = 0; i < numseq;i++) {
        try {
            aln->s[i] = (int*) malloc(sizeof(int)*(aln->sl[i]+1));
            checkAllocatedMemory(aln->s[i]);
            aln->seq[i] = (char*)malloc(sizeof(char)*(aln->sl[i]+1));
            checkAllocatedMemory(aln->seq[i]);
            aln->sn[i] = (char*)malloc(sizeof(char)*(aln->lsn[i]+1));
            checkAllocatedMemory(aln->sn[i]);
        } catch (...) {
            cleanupMemory(NULL, numseq, NULL, aln, param);
            throw;
        }
    }

    int aacode[26] = {0,1,2,3,4,5,6,7,8,-1,9,10,11,12,23,13,14,15,16,17,17,18,19,20,21,22};
    for(quint32 i = 0; i < numseq; i++) {
        const MultipleSequenceAlignmentRow row= ma->getMsaRow(i);
        qstrncpy(aln->sn[i], row->getName().toLatin1(), row->getName().length() + 1); //+1 to include '\0'
        QString gapless = QString(row->getCore()).remove('-');
        qstrncpy(aln->seq[i], gapless.toLatin1(), gapless.length() + 1);	//+1 to include '\0'
        for (quint32 j = 0; j < aln->sl[i]; j++) {
            if (isalpha((int)aln->seq[i][j])){
                aln->s[i][j] = aacode[toupper(aln->seq[i][j])-65];
            } else {
                aln->s[i][j] = -1;
            }
        }
        aln->s[i][aln->sl[i]] = 0;
        aln->seq[i][aln->sl[i]] = 0;
        aln->sn[i][aln->lsn[i]] = 0;
    }

    /*for(int i=0;i<numseq;i++) {
        for(int j=0;j<aln->sl[i];j++)
            printf("%d  ", aln->s[i][j]);
    }*/

    //aln_dump(aln);

    //aln = detect_and_read_sequences(aln,param);

    if(param->ntree > (int)numseq){
        param->ntree = (int)numseq;
    }

    //DETECT DNA
    if(param->dna == -1){
        for (quint32 i = 0; i < numseq;i++){
            param->dna = byg_detect(aln->s[i],aln->sl[i]);
            if(param->dna){
                break;
            }
        }
    }
    //param->dna = 0;
    //k_printf("DNA:%d\n",param->dna);
    //exit(0);

    if(param->dna == 1){
        //brief sanity check...
        for (quint32 i = 0; i < numseq;i++){
            if(aln->sl[i] < 6){
                //k_printf("Dna/Rna alignments are only supported for sequences longer than 6.");
                free(param);
                free_aln(aln);
                throw KalignException("Dna/Rna alignments are only supported for sequences longer than 6.");
            }
        }
        aln =  make_dna(aln);
    }

    //int j;

    //fast distance calculation;
    float** submatrix = 0;
    submatrix = read_matrix(submatrix,param); // sets gap penalties as well.....

    //if(byg_start(param->alignment_type,"profPROFprofilePROFILE") != -1){
    //	profile_alignment_main(aln,param,submatrix);
    //}

    float** dm = 0;
    if(param->ntree > 1){
        //if(byg_start(param->distance,"pairclustalPAIRCLUSTAL") != -1){
        //	if(byg_start(param->tree,"njNJ") != -1){
        //		dm = protein_pairwise_alignment_distance(aln,dm,param,submatrix,1);
        //	}else{
        //		dm = protein_pairwise_alignment_distance(aln,dm,param,submatrix,0);
        //	}
        //}else if(byg_start("wu",param->alignment_type) != -1){
        //	dm =  protein_wu_distance2(aln,dm,param);
        //	//	param->feature_type = "wumanber";
        if(param->dna == 1){
        //	if(byg_start(param->tree,"njNJ") != -1){
        //		dm =  dna_distance(aln,dm,param,1);
        //	}else{
                dm =  dna_distance(aln,dm,param,0);
        //	}
        }else{
            //if(byg_start(param->tree,"njNJ") != -1){
            //	dm =  protein_wu_distance(aln,dm,param,1);
            //}else{
            try {
                dm =  protein_wu_distance(aln,dm,param,0);
            } catch (const KalignException &) {
                cleanupMemory(submatrix, numseq, dm, aln, param);
                throw;
            }
            //}
        }
        if(check_task_canceled(ctx)) {
            cleanupMemory(submatrix, numseq, dm, aln, param);
            throwCancellingException();
        }
        /*int j;
        for (int i = 0; i< numseq;i++){
        for (j = 0; j< numseq;j++){
        k_printf("%f	",dm[i][j]);
        }
        k_printf("\n");
        }*/

        //if(byg_start(param->tree,"njNJ") != -1){
        //	tree2 = real_nj(dm,param->ntree);
        //}else{
            tree2 = real_upgma(dm,param->ntree);
        //}
        //if(param->print_tree){
        //	print_tree(tree2,aln,param->print_tree);
        //}
    }

    tree = (int*) malloc(sizeof(int)*(numseq*3+1));
    for (quint32 i = 1; i < (numseq*3)+1;i++){
        tree[i] = 0;
    }
    tree[0] = 1;

    if(param->ntree < 2){
        tree[0] = 0;
        tree[1] = 1;

        c = numseq;
        tree[2] = c;
        a = 2;
        for (quint32 i = 3; i < (numseq-1)*3;i+=3){
            tree[i] = c;
            tree[i+1] = a;
            c++;
            tree[i+2] = c;
            a++;
        }
    }else if(param->ntree > 2){
        ntreeify(tree2,param->ntree);
    }else{
        tree = readtree(tree2,tree);
        for (quint32 i = 0; i < (numseq*3);i++){
            tree[i] = tree[i+1];
        }
        free(tree2->links);
        free(tree2->internal_lables);
        free(tree2);
    }

    //get matrices...
    //struct feature_matrix* fm = 0;

    //struct ntree_data* ntree_data = 0;

    int** map = 0;
    //if(param->ntree > 2){
    //	ntree_data = (struct ntree_data*)malloc(sizeof(struct ntree_data));
    //	ntree_data->realtree = tree2;
    //	ntree_data->aln = aln;
    //	ntree_data->profile = 0;
    //	ntree_data->map = 0;
    //	ntree_data->ntree = param->ntree;
    //	ntree_data->submatrix = submatrix;
    //	ntree_data->tree = tree;

    //	ntree_data = ntree_alignment(ntree_data);
    //	map = ntree_data->map;
    //	tree = ntree_data->tree;
    //	for (int i = 0; i < (numseq*3);i++){
    //		tree[i] = tree[i+1];
    //	}
    //	free(ntree_data);
    //}else if (param->feature_type){
    //	fm = get_feature_matrix(fm,aln,param);
    //	if(!fm){
    //		for (int i = 32;i--;){
    //			free(submatrix[i]);
    //		}
    //		free(submatrix);
    //		free_param(param);
    //		free(map);
    //		free(tree);
    //		throw KalignException("getting feature matrix error");
    //	}

    //	map = feature_hirschberg_alignment(aln,tree,submatrix,map,fm);
    //	//exit(0);
    //	//map =  feature_alignment(aln,tree,submatrix, map,fm);

    //}else if (byg_start("pairwise",param->alignment_type) != -1){
    //	if(param->dna == 1){
    //		map = dna_alignment_against_a(aln,tree,submatrix, map,param->gap_inc);
    //	}else{
    //		map = hirschberg_alignment_against_a(aln,tree,submatrix, map,param->smooth_window,param->gap_inc);
    //	}
    //	//map =  default_alignment(aln,tree,submatrix, map);
    //}else if (byg_start("fast",param->alignment_type) != -1){
    //	map =  default_alignment(aln,tree,submatrix, map);
    if(param->dna == 1){
            map =  dna_alignment(aln,tree,submatrix, map,param->gap_inc);

    //	/*}else if (byg_start("test",param->alignment_type) != -1){
    //	map =  test_alignment(aln,tree,submatrix, map,param->internal_gap_weight,param->smooth_window,param->gap_inc);
    //	}else if (param->aa){
    //	map =  aa_alignment(aln,tree,submatrix, map,param->aa);
    //	}else if (param->alter_gaps){
    //	map = alter_gaps_alignment(aln,tree,submatrix,map,param->alter_gaps,param->alter_range,param->alter_weight);
    //	}else if (byg_start("altergaps",param->alignment_type) != -1){
    //	map = alter_gaps_alignment(aln,tree,submatrix,map,param->alter_gaps,param->alter_range,param->alter_weight);
    //	}else if(byg_start("simple",param->alignment_type) != -1){
    //	map =  simple_hirschberg_alignment(aln,tree,submatrix, map);*/
    //}else if(byg_start("advanced",param->alignment_type) != -1){
    //	map =  advanced_hirschberg_alignment(aln,tree,submatrix, map,param->smooth_window,param->gap_inc,param->internal_gap_weight);
    }else{
        map =  hirschberg_alignment(aln,tree,submatrix, map,param->smooth_window,param->gap_inc);
    }
    if (map == NULL) {
        throw KalignException("Failed to build alignment.");
    }
    if(check_task_canceled(ctx)) {
        free_param(param);
        free_aln(aln);
        free(map);
        free(tree);
        throwCancellingException();
    }

    //clear up sequence array to be reused as gap array....
    int *p = 0;
    for (quint32 i = 0; i < numseq;i++){
        p = aln->s[i];
        for (a = 0; a < aln->sl[i];a++){
            p[a] = 0;
        }
    }
    //clear up

    for (quint32 i = 0; i < (numseq-1)*3;i +=3){
        a = tree[i];
        b = tree[i+1];
        aln = make_seq(aln,a,b,map[tree[i+2]]);
    }

    //for (int i = 0; i < numseq;i++){
    //	k_printf("%s	%d\n",aln->sn[i],aln->nsip[i]);
    //}


    for (quint32 i = 0; i < numseq;i++){
        aln->nsip[i] = 0;
    }

    aln =  sort_sequences(aln,tree,param->sort);

    //for (int i = 0; i < numseq;i++){
    //	k_printf("%d	%d	%d\n",i,aln->nsip[i],aln->sip[i][0]);
    //}

    /************************************************************************/
    /* Convert aln to MA                                                    */
    /************************************************************************/
    res->setAlphabet(ma->getAlphabet());
    for (quint32 i = 0; i < numseq;i++){
        int f = aln->nsip[i];
        QString seq;
        for(quint32 j=0;j<aln->sl[f];j++) {
            seq += QString(aln->s[f][j],'-') + aln->seq[f][j];
        }
        seq += QString(aln->s[f][aln->sl[f]],'-');
        res->addRow(QString(aln->sn[f]), seq.toLatin1());
    }

    //output(aln,param);
    /*	if(!param->format){
    fasta_output(aln,param->outfile);
    }else{
    if (byg_start("msf",param->format) != -1){
    msf_output(aln,param->outfile);
    }else if (byg_start("clustal",param->format) != -1){
    clustal_output(aln,param->outfile);
    }else if (byg_start("macsim",param->format) != -1){
    macsim_output(aln,param->outfile,param->infile[0]);
    }
    }
    */
    free_param(param);
    free_aln(aln);
    free(map);
    free(tree);
    //KalignContext* ctx = getKalignContext();
}