Пример #1
0
cell *
prepare_dictionary(int *argcp, char *(*argvp[]))
{
    u_char *origin;
    u_char *here;
    u_char *xlimit;
    int dict_size;
    u_char *extension;

    char *dictionary_file = "";

    // Allocate space for the Forth dictionary and read its initial contents
    origin = aln_alloc(MAXDICT, variables);

    xlimit = &origin[MAXDICT];
    if(*argcp < 2
       || (extension = strrchr((*argvp)[1],'.')) == NULL
       || strcmp(extension, ".dic") != 0 ) {
	dictionary_file = is_readable("app.dic") ? "app.dic" : DEFAULT_EXE;
    } else {
        dictionary_file = (*argvp)[1];
        *argcp -= 1;
        *argvp += 1;
    }

    dict_size = read_dictionary(dictionary_file, origin, variables);
    here = &origin[dict_size];

    init_compiler(origin, xlimit, 0xfffe, here, xlimit, variables);
    return variables;
}
Пример #2
0
int main(int argc, char **argv)
{
    extern void init_dictionary(cell *up);
    u_char *origin;

    if (argc != 3) {
        fprintf(stderr, "usage: meta input-file-name output-file-name\n");
        exit(1);
    }
    infile = argv[1];
    outfile = argv[2];

    origin = aln_alloc(MAXDICT, variables);
    *(token_t *)origin = 0;
    init_compiler(origin, origin+MAXDICT, 0xfffe, origin, origin+MAXDICT, variables);
    init_dictionary(variables);
    return 0;
}
Пример #3
0
struct alignment* detect_and_read_sequences(struct alignment* aln,struct parameters* param)
{
	
	int feature = 0;
	char **input = 0;
	unsigned short int* input_type = 0;
	unsigned short int* input_numseq = 0;
	
	int num_input = 0;
	int i = 0;
	int j = 0;
	int c = 0;
	int a,b;
	int free_read = 1;
	unsigned int numseq = get_kalign_context()->numseq;
	while(free_read == 1 || param->infile[i]){
		num_input++;
		i++;
		free_read = 0;
	}
	numseq = 0;

	
	input = malloc(sizeof(char*) * num_input);
	input_type = malloc(sizeof(unsigned short int) * num_input);
	input_numseq = malloc(sizeof(unsigned short int) * num_input);
	
	for (i = 0; i < num_input;i++){
		input[i] = 0;
		input_type[i] = 0;
		input_numseq[i] = 0;
	}

	free_read = 0;
	
	if(param->quiet){
		c = 1;
	}else{
		c = 0;
	}
	
	
	for (i = c; i < num_input;i++){
		if(!param->infile[i]){
			k_printf("reading from STDIN: ");
		}else{
			k_printf("reading from %s: ",param->infile[i]);
		}
		input[i] = get_input_into_string(input[i],param->infile[i]);
		if(input[i]){
			free_read++;
			if (byg_start("<macsim>",input[i]) != -1){
				input_numseq[i] = count_sequences_macsim(input[i]);
				feature = 1;
				input_type[i] = 1;
			}else if (byg_start("<uniprot",input[i]) != -1){
				input_numseq[i] = count_sequences_uniprot(input[i]);
				input_type[i] = 2;
			}else if(byg_start("This SWISS-PROT",input[i]) != -1){
				input_numseq[i] = count_sequences_swissprot(input[i]);
				input_type[i] = 3;
			}else if (byg_start("This Swiss-Prot",input[i]) != -1){
				input_numseq[i] = count_sequences_swissprot(input[i]);
				input_type[i] = 3;
			}else if (byg_start("CLUSTAL W",input[i]) != -1){
				input_numseq[i] = count_sequences_clustalw(input[i]);
				input_type[i] = 4;
			}else if (byg_start("PileUp",input[i]) != -1){
				input_numseq[i] = count_sequences_clustalw(input[i]);
				input_type[i] = 4;
			}else if (byg_start("MSF:",input[i]) != -1){
				input_numseq[i] = count_sequences_clustalw(input[i]);
				input_type[i] = 4;
			}else if (byg_start("STOCKHOLM",input[i]) != -1){
				input_numseq[i] = count_sequences_stockholm(input[i]);
				input_type[i] = 5;
			}else{
				input_numseq[i]  = count_sequences_fasta(input[i]);
				input_type[i] = 0;
			}
			k_printf("found %d sequences\n",input_numseq[i]);
			
			if(input_numseq[i] < 1){
				free(input[i]);
				input[i] = 0;
			}else{
				numseq += input_numseq[i];
			}
		}else{
			k_printf("found no sequences.\n");
			if(!param->outfile && i){
				param->outfile = param->infile[i];
				k_printf("-> output file, in ");
				//try to set format.... 
				if(!param->format){
					if (byg_start("msf",param->outfile) != -1){
						param->format = "msf";
					}else if (byg_start("clustal",param->outfile) != -1){
						param->format = "clustal";
					}else if (byg_start("aln",param->outfile) != -1){
						param->format = "clustal";
					}else if (byg_start("macsim",param->outfile) != -1){
						param->format = "macsim";
					}else{
						param->format = "fasta";
					}
					if(param->reformat){
						k_printf("unaligned fasta format\n");
					}else if(param->format){
						k_printf("%s format\n",param->format);
					}else{
						k_printf("fasta format\n");
					}
				}
			}
			k_printf("\n");
		}
	}

	
	if(numseq < 2){
		k_printf("%s\n", usage);
		if(!numseq){
		k_printf("\nWARNING: No sequences found.\n\n");
		}else{
		k_printf("\nWARNING: Only one sequence found.\n\n");
		}
		for (i = 0; i < num_input;i++){
			free(input[i]);
		}
		free(input_numseq);
		free(input_type);
		free(input);
		free_param(param);
		exit(0);
	}

	if(byg_start(param->alignment_type,"profPROFprofilePROFILE") != -1){
		if( free_read  < 2){
			k_printf("\nWARNING: You are trying to perform a profile - profile alignment but ony one input file was detected.\n\n");
			param->alignment_type = "default";
		}
	}

	
	if (param->feature_type && !feature){
		for (i = 0; i < num_input;i++){
			free(input[i]);
		}
		free(input_numseq);
		free(input_type);
		free(input);
		free_param(param);
		throwKalignException(k_printf("\nWARNING: You are trying to perform a feature alignment but the input format(s) do not contain feature information.\n"));
	}
	
	get_kalign_context()->numprofiles = (numseq << 1) - 1;
	aln = aln_alloc(aln);
	//numseq = 0;
	if(byg_start(param->alignment_type,"profPROFprofilePROFILE") != -1){
		j = 0;
		for (i = 0; i < num_input;i++){
			
			if(input[i]){
					
				switch(input_type[i]){
					case 0:
						aln = read_alignment(aln,input[i]);
						break;
					case 1:
						aln = read_alignment_macsim_xml(aln,input[i]);
						break;
					case 2:
						aln = read_alignment_uniprot_xml(aln,input[i]);
						break;
					case 3:

						aln = read_alignment_from_swissprot(aln, input[i]);
						break;
					case 4:
						aln = read_alignment_clustal(aln,input[i]);
						break;
					case 5:
						aln = read_alignment_stockholm(aln,input[i]);
						break;
					
					default:
						aln = read_alignment(aln,input[i]);
						break;
				}
				input[i] = 0;
				//create partial profile....
				aln->nsip[numseq+j] = input_numseq[i];
				aln->sip[numseq+j] = malloc(sizeof(int)*aln->nsip[numseq+j]);
				
				//k_printf("%d	%d\n",numseq+j,aln->sl[numseq+j]);
				j++;
			}
		}
		num_input = j;
		c = 0;
		for (i = 0;i < num_input;i++){
		//	
			for ( j = 0; j < aln->nsip[numseq+i];j++){
				aln->sip[numseq+i][j] = c;
				c++;
		//		k_printf("%d ",aln->sip[numseq+i][j]);
			}
			aln->sl[numseq+i] = aln->sl[aln->sip[numseq+i][0]];
		//	k_printf("PROFILE:%d	contains: %d long:%d\n",i+numseq,aln->nsip[numseq+i],aln->sl[numseq+i]);
	//		k_printf("\n");
		}
		
		//sanity check -are all input 
		
		for (i = 0;i < num_input;i++){
			for ( j = 0; j < aln->nsip[numseq+i]-1;j++){
				a = aln->sip[numseq+i][j];
				a = aln->sl[a];
				for (c =  j+1; j < aln->nsip[numseq+i];j++){
					b = aln->sip[numseq+i][c];
					b = aln->sl[b];
					if(a != b){
						
						for (i = 0; i < num_input;i++){
							free(input[i]);
						}
						free(input_numseq);
						free(input_type);
						free(input);
						free_aln(aln);
						free_param(param);
						throwKalignException(k_printf("Unaligned sequences in input %s.\n",param->infile[i]));
					}
				}
				
			}

		}
		
		//exit(0);
		
		/*for (i = 0; i < numseq;i++){
			k_printf("len%d:%d\n",i,aln->sl[i]);	
			for ( j =0 ; j < aln->sl[i];j++){
				//if(aln->s[i][j]> 23 || aln->s[i][j] < 0){
				//	 aln->s[i][j] = -1;
				//}
				k_printf("%d ",aln->s[i][j]);
			}
		//	k_printf("\n");
		}
		exit(0);*/
	}else{
		for (i = 0; i < num_input;i++){
			if(input[i]){
				switch(input_type[i]){
					case 0:
						aln = read_sequences(aln,input[i]);
						break;
					case 1:
						aln = read_sequences_macsim_xml(aln,input[i]);
						break;
					case 2:
						aln = read_sequences_uniprot_xml(aln,input[i]);
						break;
					case 3:
						aln = read_sequences_from_swissprot(aln, input[i]);
						break;
					case 4:
						aln = read_sequences_clustal(aln,input[i]);
						break;
					case 5:
						aln = read_sequences_stockholm(aln,input[i]);
						break;
					
					default:
						aln = read_sequences(aln,input[i]);
						break;
				}
				/*if (byg_start("<macsim>",input[i]) != -1){
					aln = read_sequences_macsim_xml(aln,input[i]);
				}else if (byg_start("<uniprot",input[i]) != -1){
					aln = read_sequences_uniprot_xml(aln,input[i]);
				}else if(byg_start("This SWISS-PROT entry is copyright.",input[i]) != -1){
					aln = read_sequences_from_swissprot(aln, input[i]);
				}else if (byg_start("This Swiss-Prot entry is copyright.",input[i]) != -1){
					aln = read_sequences_from_swissprot(aln, input[i]);
				}else if (byg_start("CLUSTAL W",input[i]) != -1){
					aln = read_sequences_clustal(aln,input[i]);
				}else if (byg_start("PileUp",input[i]) != -1){
					aln = read_sequences_clustal(aln,input[i]);
				}else if (byg_start("MSF:",input[i]) != -1){
					aln = read_sequences_clustal(aln,input[i]);
				}else if (byg_start("STOCKHOLM",input[i]) != -1){
					aln = read_sequences_stockholm(aln,input[i]);
				}else{
					aln = read_sequences(aln,input[i]);
				}*/
				input[i] = 0;
			}
		}
	}
	if(numseq < 2){
		free_param(param);
		throwKalignException(k_printf("\nNo sequences could be read.\n"));
	}
	if(!param->format && param->outfile){
			if (byg_start("msf",param->outfile) != -1){
				param->format = "msf";
			}else if (byg_start("clustal",param->outfile) != -1){
				param->format = "clustal";
			}else if (byg_start("aln",param->outfile) != -1){
				param->format = "clustal";
			}else if (byg_start("macsim",param->outfile) != -1){
				param->format = "macsim";
			}
			k_printf("Output file: %s, in %s format.\n",param->outfile,param->format);
	}
	
	
	free(input);
	free(input_type);
	free(input_numseq);
	return aln;
}
Пример #4
0
void KalignAdapter::alignUnsafe(const MultipleSequenceAlignment& ma, MultipleSequenceAlignment& res, TaskStateInfo& ti) {
    ti.progress = 0;
    int* tree = 0;
    quint32 a, b, c;

    struct alignment* aln = 0;
    struct parameters* param = 0;
    struct aln_tree_node* tree2 = 0;

    param = (parameters*)malloc(sizeof(struct parameters));

    param =  interface(param,0,0);

    kalign_context *ctx = get_kalign_context();
    unsigned int &numseq = ctx->numseq;
    unsigned int &numprofiles = ctx->numprofiles;

    if (ma->getNumRows() < 2){
        if (!numseq){
            k_printf("No sequences found.\n\n");
        } else {
            k_printf("Only one sequence found.\n\n");
        }
        free_param(param);
        throw KalignException("Can't align less then 2 sequences");
    }

    if(ctx->gpo != -1) {
        param->gpo = ctx->gpo;
    }
    if(ctx->gpe != -1) {
        param->gpe = ctx->gpe;
    }
    if(ctx->tgpe != -1) {
        param->tgpe = ctx->tgpe;
    }
    if(ctx->secret != -1) {
        param->secret = ctx->secret;
    }

    /************************************************************************/
    /* Convert MA to aln                                                    */
    /************************************************************************/
    k_printf("Prepare data");
    numseq = ma->getNumRows();
    numprofiles = (numseq << 1) - 1;
    aln = aln_alloc(aln);
    for(quint32 i = 0 ; i < numseq; i++) {
        const MultipleSequenceAlignmentRow row= ma->getMsaRow(i);
        aln->sl[i] = row->getUngappedLength();
        aln->lsn[i] = row->getName().length();
    }

    for (quint32 i = 0; i < numseq;i++) {
        try {
            aln->s[i] = (int*) malloc(sizeof(int)*(aln->sl[i]+1));
            checkAllocatedMemory(aln->s[i]);
            aln->seq[i] = (char*)malloc(sizeof(char)*(aln->sl[i]+1));
            checkAllocatedMemory(aln->seq[i]);
            aln->sn[i] = (char*)malloc(sizeof(char)*(aln->lsn[i]+1));
            checkAllocatedMemory(aln->sn[i]);
        } catch (...) {
            cleanupMemory(NULL, numseq, NULL, aln, param);
            throw;
        }
    }

    int aacode[26] = {0,1,2,3,4,5,6,7,8,-1,9,10,11,12,23,13,14,15,16,17,17,18,19,20,21,22};
    for(quint32 i = 0; i < numseq; i++) {
        const MultipleSequenceAlignmentRow row= ma->getMsaRow(i);
        qstrncpy(aln->sn[i], row->getName().toLatin1(), row->getName().length() + 1); //+1 to include '\0'
        QString gapless = QString(row->getCore()).remove('-');
        qstrncpy(aln->seq[i], gapless.toLatin1(), gapless.length() + 1);	//+1 to include '\0'
        for (quint32 j = 0; j < aln->sl[i]; j++) {
            if (isalpha((int)aln->seq[i][j])){
                aln->s[i][j] = aacode[toupper(aln->seq[i][j])-65];
            } else {
                aln->s[i][j] = -1;
            }
        }
        aln->s[i][aln->sl[i]] = 0;
        aln->seq[i][aln->sl[i]] = 0;
        aln->sn[i][aln->lsn[i]] = 0;
    }

    /*for(int i=0;i<numseq;i++) {
        for(int j=0;j<aln->sl[i];j++)
            printf("%d  ", aln->s[i][j]);
    }*/

    //aln_dump(aln);

    //aln = detect_and_read_sequences(aln,param);

    if(param->ntree > (int)numseq){
        param->ntree = (int)numseq;
    }

    //DETECT DNA
    if(param->dna == -1){
        for (quint32 i = 0; i < numseq;i++){
            param->dna = byg_detect(aln->s[i],aln->sl[i]);
            if(param->dna){
                break;
            }
        }
    }
    //param->dna = 0;
    //k_printf("DNA:%d\n",param->dna);
    //exit(0);

    if(param->dna == 1){
        //brief sanity check...
        for (quint32 i = 0; i < numseq;i++){
            if(aln->sl[i] < 6){
                //k_printf("Dna/Rna alignments are only supported for sequences longer than 6.");
                free(param);
                free_aln(aln);
                throw KalignException("Dna/Rna alignments are only supported for sequences longer than 6.");
            }
        }
        aln =  make_dna(aln);
    }

    //int j;

    //fast distance calculation;
    float** submatrix = 0;
    submatrix = read_matrix(submatrix,param); // sets gap penalties as well.....

    //if(byg_start(param->alignment_type,"profPROFprofilePROFILE") != -1){
    //	profile_alignment_main(aln,param,submatrix);
    //}

    float** dm = 0;
    if(param->ntree > 1){
        //if(byg_start(param->distance,"pairclustalPAIRCLUSTAL") != -1){
        //	if(byg_start(param->tree,"njNJ") != -1){
        //		dm = protein_pairwise_alignment_distance(aln,dm,param,submatrix,1);
        //	}else{
        //		dm = protein_pairwise_alignment_distance(aln,dm,param,submatrix,0);
        //	}
        //}else if(byg_start("wu",param->alignment_type) != -1){
        //	dm =  protein_wu_distance2(aln,dm,param);
        //	//	param->feature_type = "wumanber";
        if(param->dna == 1){
        //	if(byg_start(param->tree,"njNJ") != -1){
        //		dm =  dna_distance(aln,dm,param,1);
        //	}else{
                dm =  dna_distance(aln,dm,param,0);
        //	}
        }else{
            //if(byg_start(param->tree,"njNJ") != -1){
            //	dm =  protein_wu_distance(aln,dm,param,1);
            //}else{
            try {
                dm =  protein_wu_distance(aln,dm,param,0);
            } catch (const KalignException &) {
                cleanupMemory(submatrix, numseq, dm, aln, param);
                throw;
            }
            //}
        }
        if(check_task_canceled(ctx)) {
            cleanupMemory(submatrix, numseq, dm, aln, param);
            throwCancellingException();
        }
        /*int j;
        for (int i = 0; i< numseq;i++){
        for (j = 0; j< numseq;j++){
        k_printf("%f	",dm[i][j]);
        }
        k_printf("\n");
        }*/

        //if(byg_start(param->tree,"njNJ") != -1){
        //	tree2 = real_nj(dm,param->ntree);
        //}else{
            tree2 = real_upgma(dm,param->ntree);
        //}
        //if(param->print_tree){
        //	print_tree(tree2,aln,param->print_tree);
        //}
    }

    tree = (int*) malloc(sizeof(int)*(numseq*3+1));
    for (quint32 i = 1; i < (numseq*3)+1;i++){
        tree[i] = 0;
    }
    tree[0] = 1;

    if(param->ntree < 2){
        tree[0] = 0;
        tree[1] = 1;

        c = numseq;
        tree[2] = c;
        a = 2;
        for (quint32 i = 3; i < (numseq-1)*3;i+=3){
            tree[i] = c;
            tree[i+1] = a;
            c++;
            tree[i+2] = c;
            a++;
        }
    }else if(param->ntree > 2){
        ntreeify(tree2,param->ntree);
    }else{
        tree = readtree(tree2,tree);
        for (quint32 i = 0; i < (numseq*3);i++){
            tree[i] = tree[i+1];
        }
        free(tree2->links);
        free(tree2->internal_lables);
        free(tree2);
    }

    //get matrices...
    //struct feature_matrix* fm = 0;

    //struct ntree_data* ntree_data = 0;

    int** map = 0;
    //if(param->ntree > 2){
    //	ntree_data = (struct ntree_data*)malloc(sizeof(struct ntree_data));
    //	ntree_data->realtree = tree2;
    //	ntree_data->aln = aln;
    //	ntree_data->profile = 0;
    //	ntree_data->map = 0;
    //	ntree_data->ntree = param->ntree;
    //	ntree_data->submatrix = submatrix;
    //	ntree_data->tree = tree;

    //	ntree_data = ntree_alignment(ntree_data);
    //	map = ntree_data->map;
    //	tree = ntree_data->tree;
    //	for (int i = 0; i < (numseq*3);i++){
    //		tree[i] = tree[i+1];
    //	}
    //	free(ntree_data);
    //}else if (param->feature_type){
    //	fm = get_feature_matrix(fm,aln,param);
    //	if(!fm){
    //		for (int i = 32;i--;){
    //			free(submatrix[i]);
    //		}
    //		free(submatrix);
    //		free_param(param);
    //		free(map);
    //		free(tree);
    //		throw KalignException("getting feature matrix error");
    //	}

    //	map = feature_hirschberg_alignment(aln,tree,submatrix,map,fm);
    //	//exit(0);
    //	//map =  feature_alignment(aln,tree,submatrix, map,fm);

    //}else if (byg_start("pairwise",param->alignment_type) != -1){
    //	if(param->dna == 1){
    //		map = dna_alignment_against_a(aln,tree,submatrix, map,param->gap_inc);
    //	}else{
    //		map = hirschberg_alignment_against_a(aln,tree,submatrix, map,param->smooth_window,param->gap_inc);
    //	}
    //	//map =  default_alignment(aln,tree,submatrix, map);
    //}else if (byg_start("fast",param->alignment_type) != -1){
    //	map =  default_alignment(aln,tree,submatrix, map);
    if(param->dna == 1){
            map =  dna_alignment(aln,tree,submatrix, map,param->gap_inc);

    //	/*}else if (byg_start("test",param->alignment_type) != -1){
    //	map =  test_alignment(aln,tree,submatrix, map,param->internal_gap_weight,param->smooth_window,param->gap_inc);
    //	}else if (param->aa){
    //	map =  aa_alignment(aln,tree,submatrix, map,param->aa);
    //	}else if (param->alter_gaps){
    //	map = alter_gaps_alignment(aln,tree,submatrix,map,param->alter_gaps,param->alter_range,param->alter_weight);
    //	}else if (byg_start("altergaps",param->alignment_type) != -1){
    //	map = alter_gaps_alignment(aln,tree,submatrix,map,param->alter_gaps,param->alter_range,param->alter_weight);
    //	}else if(byg_start("simple",param->alignment_type) != -1){
    //	map =  simple_hirschberg_alignment(aln,tree,submatrix, map);*/
    //}else if(byg_start("advanced",param->alignment_type) != -1){
    //	map =  advanced_hirschberg_alignment(aln,tree,submatrix, map,param->smooth_window,param->gap_inc,param->internal_gap_weight);
    }else{
        map =  hirschberg_alignment(aln,tree,submatrix, map,param->smooth_window,param->gap_inc);
    }
    if (map == NULL) {
        throw KalignException("Failed to build alignment.");
    }
    if(check_task_canceled(ctx)) {
        free_param(param);
        free_aln(aln);
        free(map);
        free(tree);
        throwCancellingException();
    }

    //clear up sequence array to be reused as gap array....
    int *p = 0;
    for (quint32 i = 0; i < numseq;i++){
        p = aln->s[i];
        for (a = 0; a < aln->sl[i];a++){
            p[a] = 0;
        }
    }
    //clear up

    for (quint32 i = 0; i < (numseq-1)*3;i +=3){
        a = tree[i];
        b = tree[i+1];
        aln = make_seq(aln,a,b,map[tree[i+2]]);
    }

    //for (int i = 0; i < numseq;i++){
    //	k_printf("%s	%d\n",aln->sn[i],aln->nsip[i]);
    //}


    for (quint32 i = 0; i < numseq;i++){
        aln->nsip[i] = 0;
    }

    aln =  sort_sequences(aln,tree,param->sort);

    //for (int i = 0; i < numseq;i++){
    //	k_printf("%d	%d	%d\n",i,aln->nsip[i],aln->sip[i][0]);
    //}

    /************************************************************************/
    /* Convert aln to MA                                                    */
    /************************************************************************/
    res->setAlphabet(ma->getAlphabet());
    for (quint32 i = 0; i < numseq;i++){
        int f = aln->nsip[i];
        QString seq;
        for(quint32 j=0;j<aln->sl[f];j++) {
            seq += QString(aln->s[f][j],'-') + aln->seq[f][j];
        }
        seq += QString(aln->s[f][aln->sl[f]],'-');
        res->addRow(QString(aln->sn[f]), seq.toLatin1());
    }

    //output(aln,param);
    /*	if(!param->format){
    fasta_output(aln,param->outfile);
    }else{
    if (byg_start("msf",param->format) != -1){
    msf_output(aln,param->outfile);
    }else if (byg_start("clustal",param->format) != -1){
    clustal_output(aln,param->outfile);
    }else if (byg_start("macsim",param->format) != -1){
    macsim_output(aln,param->outfile,param->infile[0]);
    }
    }
    */
    free_param(param);
    free_aln(aln);
    free(map);
    free(tree);
    //KalignContext* ctx = getKalignContext();
}