예제 #1
0
struct alignment* sort_in_relation (struct alignment* aln, const char* sort)
{
    /* JGG DEBUG */
    int i,j,c;
    int target = -1;
    int id = 0;
    int positions = 0;
    int posa = 0;
    int posb = 0;
    for (i = 0; i < numseq; i++) {
        if (byg_start(sort,aln->sn[i]) != -1) {
            target = i;
            aln->sip[i][0] = 1000;
            break;
        }
    }
    if(target == -1) {
        target = 0;
        aln->sip[0][0] = 1000;
    }
    for (i = 0; i < numseq; i++) {
        if(i != target) {
            posa = 0;
            posb =0;
            c = 0;
            id = 0;
            positions = 0;
            for (j = 0; j < aln->sl[i]; j++) {
                posa += aln->s[i][j]+1;
                while(posa > posb) {
                    posb += aln->s[target][c]+1;
                    c++;
                }
                if(posa == posb) {
                    if((int) aln->seq[i][j] == (int) aln->seq[target][c-1]) {
                        id += 1000;
                    }
                    positions += 1;
                }
            }
            if(positions) {
                aln->sip[i][0] = id/positions;
            } else {
                aln->sip[i][0] = 0;
            }
        }
    }
    for (i = 0; i < numseq; i++) {
        aln->nsip[i] = i;
    }
    quickSort(aln, numseq);

    return aln;

}
예제 #2
0
int count_sequences_stockholm(char* string)
{
	char* p1 = string;
	int i = 0;
	int j = 0;
	int n = 0;
	while((i = byg_end("\n",p1))!=-1){
		p1+=i;
		if (!(byg_start("//",p1))){
			break;
		}	
		j = byg_end("#",p1);
		if(j != 1){
			n++;
		}
	}
	if(!n){
		return 0;
	}
	return n;
}
예제 #3
0
struct alignment* sort_sequences (struct alignment* aln, int* tree, const char* sort)
{
    int i, j, a, b, c;
    int choice = 0;

    if (sort)
    {
        if (byg_start ("input", sort) != -1)
        {
            choice = 0;
        }
        else if (byg_start ("tree", sort) != -1)
        {
            choice = 1;
        }
        else if (byg_start ("gaps", sort) != -1)
        {
            choice = 2;
        }
        else
        {
            choice = 3;
        }
    }

    // fprintf (stderr, "CHOICE:%d\n", choice);

    switch (choice)
    {
    case 0:

        for (i = 0; i < numseq; i++)
        {
            aln->nsip[i] = i;
        }

        break;
    case 1:
        c = 0;

        for (i = 0; i < (numseq - 1) * 3; i += 3)
        {
            //fprintf(stderr,"TREE %d %d  %d\n",tree[i],tree[i+1],tree[i+2]);
            if (tree[i]  < numseq)
            {
                aln->nsip[c] = tree[i];
                c++;
            }

            if (tree[i+1]  < numseq)
            {
                aln->nsip[c] = tree[i+1];
                c++;
            }
        }

        break;
    case 2:

        for (i = 0; i < numseq; i++)
        {
            a = 1000000;
            b = -1;

            for (j = 0; j < numseq; j++)
            {
                if (aln->nsip[j] < a)
                {
                    a = aln->nsip[j];
                    b = j;
                }
            }

            tree[i] = b;
            aln->nsip[b] = 1000000;
        }

        for (i = 0; i < numseq; i++)
        {
            aln->nsip[i] = tree[i];
        }

        break;
    case 3:
        aln = sort_in_relation (aln, sort);
        break;
    default:

        for (i = 0; i < numseq; i++)
        {
            aln->nsip[i] = i;
        }

        break;
    }

    /*for (i = 0; i < numseq;i++){
      fprintf(stderr,"%d\n",aln->nsip[i]);
    }*/

    return aln;
}
예제 #4
0
struct alignment* read_sequences_uniprot_xml(struct alignment* aln,char* string)
{
	int c = 0;
	int n = 0;
	int i = 0;
	int j = 0;
	char *p1 = 0;

	int aacode[26] = {0,1,2,3,4,5,6,7,8,-1,9,10,11,12,23,13,14,15,16,17,17,18,19,20,21,22};
	//int aacode[26] = {0,1,2,3,4,5,6,7,8,-1,9,10,11,12,-1,13,14,15,16,17,-1,18,19,20,21,22};

	/*aln = (struct alignment *) malloc(sizeof(struct alignment));
	numseq = byg_count("<entry",string);
	if(!numseq){
		k_printf("No sequences found!\n");
		exit(1);
	}
	
	numprofiles = (numseq << 1) - 1;
	aln->s = malloc(sizeof(int*) * (numseq ));
	aln->seq = malloc(sizeof(char*) * (numseq ));
	aln->si = 0;
	aln->ft = 0;
	aln->sl = malloc(sizeof(int) * (numprofiles));
	aln->sip = malloc(sizeof(int*)* numprofiles);
	aln->nsip = malloc(sizeof(int)* numprofiles);
	aln->sn = malloc(sizeof(char*) * numseq);
	aln->lsn = malloc(sizeof(int) * numseq);
	for (i =0;i < numprofiles;i++){
		aln->sip[i] = 0;
		aln->nsip[i] = 0;
	}
	for(i =0;i < numseq;i++){
		aln->sip[i] = malloc(sizeof(int)*1);
		aln->nsip[i] = 1;
		aln->sip[i][0] = i;
	}*/

	p1 = string;

	
	c = 0;
	while(aln->sl[c]){
		c++;
	}
	
	while((i = byg_end("<entry",p1))!=-1){

		p1+=i;// p1 is at start of entry;
		i = byg_end("<name>",p1);
		p1 +=i; //p1 is at the end of the sequence name tag
		j = byg_start("</name>",p1);
		aln->lsn[c] = j;
		aln->sn[c] = malloc(sizeof(char)*(j+1));
		for (i = 0;i < j;i++){
			aln->sn[c][i] = p1[i];
		}
		aln->sn[c][j] = 0;
		
		while((i = byg_end("<sequence",p1))!= -1 ){
			i = byg_end("<sequence",p1);
			p1+= i;
			i = byg_end(">",p1);
			p1 +=i;
		}
		
		j = byg_start("</sequence>",p1);

		aln->s[c] = malloc(sizeof(int)*(j+1));
		aln->seq[c] = malloc(sizeof(char)*(j+1));
		n = 0;
		for (i = 0;i < j;i++){
			if(isalpha((int)p1[i])){
				aln->s[c][n] = aacode[toupper(p1[i])-65];
				aln->seq[c][n] = p1[i];
				n++;
			}
		}
		aln->s[c][n] = 0;
		aln->seq[c][n] = 0;
		aln->sl[c] = n;
		c++;
	}
	free(string);
	return aln;
}
예제 #5
0
struct feature* read_ft(struct feature* ft,char* p)
{

	int i,j;
	struct feature *n = 0;
	struct feature *old_n = 0;
	char tmp[10];
	char* p1 = 0;
	p1 = p;
	while((j = byg_end("<fitem>",p1))!= -1){
		i = byg_end("</seq-info>",p1);
		
		if(j >i){
			break;
		}

		n = malloc(sizeof(struct feature));
		n->next = 0;
		n->color = -1;

		p1+=j;// p1 is at start of entry;
		i = byg_end("<ftype>",p1);
		p1 +=i; //p1 is at the end of the sequence name tag
		j = byg_start("</ftype>",p1);

		n->type = malloc(sizeof(char*)*(j+1));
		for (i = 0; i < j;i++){
			n->type[i] = p1[i];
		}
		n->type[j] = 0;
		
		i = byg_end("<fstart>",p1);
		p1+= i;
		j = byg_start("</fstart>",p1);
		
		for (i = 0; i < j;i++){
			tmp[i] = p1[i];
		}
		tmp[j] = 0;
		n->start = atoi(tmp);
		i = byg_end("<fstop>",p1);
		p1+= i;
		j = byg_start("</fstop>",p1);
		for (i = 0; i < j;i++){
			tmp[i] = p1[i];
		}
		tmp[j] = 0;
		n->end = atoi(tmp);

		i = byg_end("<fnote>",p1);
		p1+= i;
		j = byg_start("</fnote>",p1);
		n->note = malloc(sizeof(char*)*(j+1));
		for (i = 0; i < j;i++){
			n->note[i] = p1[i];
		}
		
		n->note[j] = 0;

		
		if((old_n = ft)!= 0){
			while(old_n->next!=0){
				old_n = old_n->next;
			}
			old_n->next = n;
		}else{
			ft = n;
		}
		n = 0;
	}
	return ft;
}
예제 #6
0
struct alignment* read_alignment_macsim_xml(struct alignment* aln,char* string)
{
	int c = 0;
	int n = 0;
	int i = 0;
	int j = 0;
	char *p = 0;
	int max = 0;
	
	int aacode[26] = {0,1,2,3,4,5,6,7,8,-1,9,10,11,12,23,13,14,15,16,17,17,18,19,20,21,22};
	//int aacode[26] = {0,1,2,3,4,5,6,7,8,-1,9,10,11,12,-1,13,14,15,16,17,-1,18,19,20,21,22};

	/*aln = (struct alignment*) malloc(sizeof(struct alignment));
	numseq = byg_count("<seq-name>",string);
	if(!numseq){
		k_printf("No sequences found!\n");
		exit(1);
	}
	
	numprofiles = (numseq << 1) - 1;
	aln->s = malloc(sizeof(int*) * (numseq ));
	aln->seq = malloc(sizeof(char*) * (numseq ));
	aln->ft =  malloc(sizeof(struct feature* ) * (numseq));
	aln->si  =  malloc(sizeof(struct sequence_information* ) * (numseq));
	
	aln->sl = malloc(sizeof(int) * (numprofiles));
	aln->sip = malloc(sizeof(int*)* numprofiles);
	aln->nsip = malloc(sizeof(int)* numprofiles);
	aln->sn = malloc(sizeof(char*) * numseq);
	aln->lsn = malloc(sizeof(int) * numseq);
	
	for (i =0;i < numprofiles;i++){
		aln->sip[i] = 0;
		aln->nsip[i] = 0;
	}
	for(i =0;i < numseq;i++){
		aln->ft[i] = 0;
		aln->si[i] = 0;
		aln->sip[i] = malloc(sizeof(int)*1);
		aln->nsip[i] = 1;
		aln->sip[i][0] = i;
	}*/
		
	p = string;
	
	if(byg_count("<g>",p)){
		while((i = byg_start("<g>",p))!=-1){
			p+=i;
			j = byg_end("<r>",p);
			for(i = 0; i< j;i++){
				p[i] = ' ';
			}
			i = byg_start("</r>",p);
			p+=i;
			
			j = byg_end("</g>",p);
			for(i = 0; i< j;i++){
				p[i] = ' ';
			}
			
		}
	}
	p = string;

	c = 0;
	while(aln->sl[c]){
		c++;
	}
	

	
	while((i = byg_end("<sequence",p))!=-1){	
		p+=i;// p1 is at start of entry;
		max = byg_end("</sequence>",p);
			
		i = byg_end("<seq-name>",p);
		if(i < max){
			p +=i; //p1 is at the end of the sequence name tag
			j = byg_start("</seq-name>",p);
		
			aln->lsn[c] = j;
			aln->sn[c] = malloc(sizeof(char)*(j+1));
			for (i = 0;i < j;i++){
				aln->sn[c][i] = p[i];
			}
			aln->sn[c][j] = 0;
			
		}
		i = byg_end("<ftable>",p);
		if(i < max){
			aln->ft[c] = read_ft(aln->ft[c],p);
		}
		i = byg_end("<seq-data>",p);
		if(i < max){
			p+= i;
			j = byg_start("</seq-data>",p);
			aln->s[c] = malloc(sizeof(int)*(j+1));
			aln->seq[c] = malloc(sizeof(char)*(j+1));
			n = 0;
			for (i = 0;i < j;i++){
				if((int)p[i]>32){
					if(isalpha((int)p[i])){
						aln->s[c][n] = aacode[toupper(p[i])-65];
					}else{
						aln->s[c][n] = -1;
					}
					aln->seq[c][n] = p[i];
					n++;
				}
			}
			aln->s[c][n] = 0;
			aln->seq[c][n] = 0;
			aln->sl[c] = n;
		}
		
		c++;
	}
	free(string);
	return aln;
}
예제 #7
0
struct alignment* read_alignment_from_swissprot(struct alignment* aln,char* string)
{
	//int aacode[26] = {0,1,2,3,4,5,6,7,8,-1,9,10,11,12,-1,13,14,15,16,17,-1,18,19,20,21,22};
	int aacode[26] = {0,1,2,3,4,5,6,7,8,-1,9,10,11,12,23,13,14,15,16,17,17,18,19,20,21,22};
	int i,j,c,n;
	char* p = 0;
	p = string;
	/*numseq = byg_count("ID   ",p);
	if(!numseq){
		k_printf("No sequences found!\n");
		exit(1);
	}
	aln = (struct alignment *) malloc(sizeof(struct alignment));
	numprofiles = (numseq << 1) - 1;
	aln->ft = 0;
	aln->si = 0;
	aln->s = malloc(sizeof(int*) * (numseq ));
	aln->seq = malloc(sizeof(char*) * (numseq ));
	aln->sl = malloc(sizeof(int) * (numprofiles));
	aln->sip = malloc(sizeof(int*)* numprofiles);	
	aln->nsip = malloc(sizeof(int)* numprofiles);
	aln->sn = malloc(sizeof(char*) * numseq);
	aln->lsn = malloc(sizeof(int) * numseq);
	for (i =0;i < numprofiles;i++){
		aln->sip[i] = 0;
		aln->nsip[i] = 0;
	}

	for (i = numseq;i--;){
		aln->sip[i] = malloc(sizeof(int)*1);
		aln->nsip[i] = 1;
		aln->sip[i][0] = i;
	}*/
	c = 0;
	while(aln->sl[c]){
		c++;
	}

	k_printf("found sequence:\n");
	while ((i = byg_end("ID   ",p)) != -1){
		p+=i;
		j = byg_start(" ",p);
		aln->lsn[c] = j;
		aln->sn[c] = malloc(sizeof(char)*(j+1));
		for (i = 0;i < j;i++){
			aln->sn[c][i] = p[i];
		}
		aln->sn[c][j] = 0;
		p+= j;
		j = byg_end("SQ   ",p);
		p+= j;
		j = byg_end("\n",p);
		p+= j;
		j = byg_start("//",p);
		k_printf("found sequence:\n");
		aln->s[c] = malloc(sizeof(int)*(j+1));
		aln->seq[c] = malloc(sizeof(char)*(j+1));
		n = 0;
		for (i = 0;i < j;i++){
			if((int)p[i] > 32){
				if(isalpha((int)p[i])){
					aln->s[c][n] = aacode[toupper(p[i])-65];
				}else{
					aln->s[c][n] = -1;
				}
				k_printf("%c",p[i]);
				aln->seq[c][n] = p[i];
				n++;
			}
		}
		
		k_printf("\n\n");
		aln->s[c][n] = 0;
		aln->seq[c][n] = 0;
		aln->sl[c] = n;
		c++;
	}
	free(string);
	return aln;
}
예제 #8
0
struct alignment* detect_and_read_sequences(struct alignment* aln,struct parameters* param)
{
	
	int feature = 0;
	char **input = 0;
	unsigned short int* input_type = 0;
	unsigned short int* input_numseq = 0;
	
	int num_input = 0;
	int i = 0;
	int j = 0;
	int c = 0;
	int a,b;
	int free_read = 1;
	unsigned int numseq = get_kalign_context()->numseq;
	while(free_read == 1 || param->infile[i]){
		num_input++;
		i++;
		free_read = 0;
	}
	numseq = 0;

	
	input = malloc(sizeof(char*) * num_input);
	input_type = malloc(sizeof(unsigned short int) * num_input);
	input_numseq = malloc(sizeof(unsigned short int) * num_input);
	
	for (i = 0; i < num_input;i++){
		input[i] = 0;
		input_type[i] = 0;
		input_numseq[i] = 0;
	}

	free_read = 0;
	
	if(param->quiet){
		c = 1;
	}else{
		c = 0;
	}
	
	
	for (i = c; i < num_input;i++){
		if(!param->infile[i]){
			k_printf("reading from STDIN: ");
		}else{
			k_printf("reading from %s: ",param->infile[i]);
		}
		input[i] = get_input_into_string(input[i],param->infile[i]);
		if(input[i]){
			free_read++;
			if (byg_start("<macsim>",input[i]) != -1){
				input_numseq[i] = count_sequences_macsim(input[i]);
				feature = 1;
				input_type[i] = 1;
			}else if (byg_start("<uniprot",input[i]) != -1){
				input_numseq[i] = count_sequences_uniprot(input[i]);
				input_type[i] = 2;
			}else if(byg_start("This SWISS-PROT",input[i]) != -1){
				input_numseq[i] = count_sequences_swissprot(input[i]);
				input_type[i] = 3;
			}else if (byg_start("This Swiss-Prot",input[i]) != -1){
				input_numseq[i] = count_sequences_swissprot(input[i]);
				input_type[i] = 3;
			}else if (byg_start("CLUSTAL W",input[i]) != -1){
				input_numseq[i] = count_sequences_clustalw(input[i]);
				input_type[i] = 4;
			}else if (byg_start("PileUp",input[i]) != -1){
				input_numseq[i] = count_sequences_clustalw(input[i]);
				input_type[i] = 4;
			}else if (byg_start("MSF:",input[i]) != -1){
				input_numseq[i] = count_sequences_clustalw(input[i]);
				input_type[i] = 4;
			}else if (byg_start("STOCKHOLM",input[i]) != -1){
				input_numseq[i] = count_sequences_stockholm(input[i]);
				input_type[i] = 5;
			}else{
				input_numseq[i]  = count_sequences_fasta(input[i]);
				input_type[i] = 0;
			}
			k_printf("found %d sequences\n",input_numseq[i]);
			
			if(input_numseq[i] < 1){
				free(input[i]);
				input[i] = 0;
			}else{
				numseq += input_numseq[i];
			}
		}else{
			k_printf("found no sequences.\n");
			if(!param->outfile && i){
				param->outfile = param->infile[i];
				k_printf("-> output file, in ");
				//try to set format.... 
				if(!param->format){
					if (byg_start("msf",param->outfile) != -1){
						param->format = "msf";
					}else if (byg_start("clustal",param->outfile) != -1){
						param->format = "clustal";
					}else if (byg_start("aln",param->outfile) != -1){
						param->format = "clustal";
					}else if (byg_start("macsim",param->outfile) != -1){
						param->format = "macsim";
					}else{
						param->format = "fasta";
					}
					if(param->reformat){
						k_printf("unaligned fasta format\n");
					}else if(param->format){
						k_printf("%s format\n",param->format);
					}else{
						k_printf("fasta format\n");
					}
				}
			}
			k_printf("\n");
		}
	}

	
	if(numseq < 2){
		k_printf("%s\n", usage);
		if(!numseq){
		k_printf("\nWARNING: No sequences found.\n\n");
		}else{
		k_printf("\nWARNING: Only one sequence found.\n\n");
		}
		for (i = 0; i < num_input;i++){
			free(input[i]);
		}
		free(input_numseq);
		free(input_type);
		free(input);
		free_param(param);
		exit(0);
	}

	if(byg_start(param->alignment_type,"profPROFprofilePROFILE") != -1){
		if( free_read  < 2){
			k_printf("\nWARNING: You are trying to perform a profile - profile alignment but ony one input file was detected.\n\n");
			param->alignment_type = "default";
		}
	}

	
	if (param->feature_type && !feature){
		for (i = 0; i < num_input;i++){
			free(input[i]);
		}
		free(input_numseq);
		free(input_type);
		free(input);
		free_param(param);
		throwKalignException(k_printf("\nWARNING: You are trying to perform a feature alignment but the input format(s) do not contain feature information.\n"));
	}
	
	get_kalign_context()->numprofiles = (numseq << 1) - 1;
	aln = aln_alloc(aln);
	//numseq = 0;
	if(byg_start(param->alignment_type,"profPROFprofilePROFILE") != -1){
		j = 0;
		for (i = 0; i < num_input;i++){
			
			if(input[i]){
					
				switch(input_type[i]){
					case 0:
						aln = read_alignment(aln,input[i]);
						break;
					case 1:
						aln = read_alignment_macsim_xml(aln,input[i]);
						break;
					case 2:
						aln = read_alignment_uniprot_xml(aln,input[i]);
						break;
					case 3:

						aln = read_alignment_from_swissprot(aln, input[i]);
						break;
					case 4:
						aln = read_alignment_clustal(aln,input[i]);
						break;
					case 5:
						aln = read_alignment_stockholm(aln,input[i]);
						break;
					
					default:
						aln = read_alignment(aln,input[i]);
						break;
				}
				input[i] = 0;
				//create partial profile....
				aln->nsip[numseq+j] = input_numseq[i];
				aln->sip[numseq+j] = malloc(sizeof(int)*aln->nsip[numseq+j]);
				
				//k_printf("%d	%d\n",numseq+j,aln->sl[numseq+j]);
				j++;
			}
		}
		num_input = j;
		c = 0;
		for (i = 0;i < num_input;i++){
		//	
			for ( j = 0; j < aln->nsip[numseq+i];j++){
				aln->sip[numseq+i][j] = c;
				c++;
		//		k_printf("%d ",aln->sip[numseq+i][j]);
			}
			aln->sl[numseq+i] = aln->sl[aln->sip[numseq+i][0]];
		//	k_printf("PROFILE:%d	contains: %d long:%d\n",i+numseq,aln->nsip[numseq+i],aln->sl[numseq+i]);
	//		k_printf("\n");
		}
		
		//sanity check -are all input 
		
		for (i = 0;i < num_input;i++){
			for ( j = 0; j < aln->nsip[numseq+i]-1;j++){
				a = aln->sip[numseq+i][j];
				a = aln->sl[a];
				for (c =  j+1; j < aln->nsip[numseq+i];j++){
					b = aln->sip[numseq+i][c];
					b = aln->sl[b];
					if(a != b){
						
						for (i = 0; i < num_input;i++){
							free(input[i]);
						}
						free(input_numseq);
						free(input_type);
						free(input);
						free_aln(aln);
						free_param(param);
						throwKalignException(k_printf("Unaligned sequences in input %s.\n",param->infile[i]));
					}
				}
				
			}

		}
		
		//exit(0);
		
		/*for (i = 0; i < numseq;i++){
			k_printf("len%d:%d\n",i,aln->sl[i]);	
			for ( j =0 ; j < aln->sl[i];j++){
				//if(aln->s[i][j]> 23 || aln->s[i][j] < 0){
				//	 aln->s[i][j] = -1;
				//}
				k_printf("%d ",aln->s[i][j]);
			}
		//	k_printf("\n");
		}
		exit(0);*/
	}else{
		for (i = 0; i < num_input;i++){
			if(input[i]){
				switch(input_type[i]){
					case 0:
						aln = read_sequences(aln,input[i]);
						break;
					case 1:
						aln = read_sequences_macsim_xml(aln,input[i]);
						break;
					case 2:
						aln = read_sequences_uniprot_xml(aln,input[i]);
						break;
					case 3:
						aln = read_sequences_from_swissprot(aln, input[i]);
						break;
					case 4:
						aln = read_sequences_clustal(aln,input[i]);
						break;
					case 5:
						aln = read_sequences_stockholm(aln,input[i]);
						break;
					
					default:
						aln = read_sequences(aln,input[i]);
						break;
				}
				/*if (byg_start("<macsim>",input[i]) != -1){
					aln = read_sequences_macsim_xml(aln,input[i]);
				}else if (byg_start("<uniprot",input[i]) != -1){
					aln = read_sequences_uniprot_xml(aln,input[i]);
				}else if(byg_start("This SWISS-PROT entry is copyright.",input[i]) != -1){
					aln = read_sequences_from_swissprot(aln, input[i]);
				}else if (byg_start("This Swiss-Prot entry is copyright.",input[i]) != -1){
					aln = read_sequences_from_swissprot(aln, input[i]);
				}else if (byg_start("CLUSTAL W",input[i]) != -1){
					aln = read_sequences_clustal(aln,input[i]);
				}else if (byg_start("PileUp",input[i]) != -1){
					aln = read_sequences_clustal(aln,input[i]);
				}else if (byg_start("MSF:",input[i]) != -1){
					aln = read_sequences_clustal(aln,input[i]);
				}else if (byg_start("STOCKHOLM",input[i]) != -1){
					aln = read_sequences_stockholm(aln,input[i]);
				}else{
					aln = read_sequences(aln,input[i]);
				}*/
				input[i] = 0;
			}
		}
	}
	if(numseq < 2){
		free_param(param);
		throwKalignException(k_printf("\nNo sequences could be read.\n"));
	}
	if(!param->format && param->outfile){
			if (byg_start("msf",param->outfile) != -1){
				param->format = "msf";
			}else if (byg_start("clustal",param->outfile) != -1){
				param->format = "clustal";
			}else if (byg_start("aln",param->outfile) != -1){
				param->format = "clustal";
			}else if (byg_start("macsim",param->outfile) != -1){
				param->format = "macsim";
			}
			k_printf("Output file: %s, in %s format.\n",param->outfile,param->format);
	}
	
	
	free(input);
	free(input_type);
	free(input_numseq);
	return aln;
}
예제 #9
0
struct alignment* read_alignment_stockholm(struct alignment* aln,char* string)
{
	int c = 0;
	int n = 0;
	int i = 0;
	int j = 0;
	char *p1 = 0;

	int aacode[26] = {0,1,2,3,4,5,6,7,8,-1,9,10,11,12,23,13,14,15,16,17,17,18,19,20,21,22};
	//int aacode[26] = {0,1,2,3,4,5,6,7,8,-1,9,10,11,12,-1,13,14,15,16,17,-1,18,19,20,21,22};

	/*aln = (struct alignment*) malloc(sizeof(struct alignment));
	p1 = string;
	while((i = byg_end("\n",p1))!=-1){
		p1+=i;
		if (!(byg_start("//",p1))){
			break;
		}	
		j = byg_end("#",p1);
		if(j != 1){
			numseq++;
		}
	}

	numprofiles = (numseq << 1) - 1;
	aln->s = malloc(sizeof(int*) * (numseq ));
	aln->seq = malloc(sizeof(char*) * (numseq ));
	aln->ft = 0;
	aln->si = 0;
	aln->sl = malloc(sizeof(int) * (numprofiles));
	aln->sip = malloc(sizeof(int*)* numprofiles);
	
	aln->nsip = malloc(sizeof(int)* numprofiles);
	aln->sn = malloc(sizeof(char*) * numseq);
	aln->lsn = malloc(sizeof(int) * numseq);
	for (i =0;i < numprofiles;i++){
		aln->sip[i] = 0;
		aln->nsip[i] = 0;
	}
	for(i =0;i < numseq;i++){
		aln->sip[i] = malloc(sizeof(int)*1);
		aln->nsip[i] = 1;
		aln->sip[i][0] = i;
	}*/

	c = 0;
	while(aln->sl[c]){
		c++;
	}

	p1 = string;
	while((i = byg_end("\n",p1))!=-1){
		p1+=i;
		if (!(byg_start("//",p1))){
			break;
		}	
		j = byg_end("#",p1);
		if(j != 1){
			j = byg_start(" ",p1);
			aln->lsn[c] = j;
			aln->sn[c] = malloc(sizeof(char)*(j+1));
			for (i = 0;i < j;i++){
				aln->sn[c][i] = p1[i];
			}
			aln->sn[c][j] = 0;

			
			p1+=j;
			j = byg_start("\n",p1);

			aln->s[c] = malloc(sizeof(int)*(j+1));
			aln->seq[c] = malloc(sizeof(char)*(j+1));
			n = 0;
			for (i = 0;i < j;i++){
				if((int)p1[i] > 32){
					if(isalpha((int)p1[i])){
						aln->s[c][n] = aacode[toupper(p1[i])-65];
					}else{
						aln->s[c][n] = -1;
					}
					aln->seq[c][n] = p1[i];
					n++;
				}
			}
			aln->s[c][n] = 0;
			aln->seq[c][n] = 0;
			aln->sl[c] = n;
			c++;
		}
	}

	free(string);
	return aln;
}
void profile_alignment_main(struct alignment* aln,struct parameters* param,float** submatrix)
{
	float** dm = 0;
	int* tree = 0;
	struct aln_tree_node* tree2 = 0;
	int i,j;
	int tmp_numseq;
	int tmp_numprofiles;
	
	local_numseq = 0;
	local_numprofiles = 0;
	
	//determine number of profiles that were inputted....
	
	while(aln->sl[local_numseq+numseq]){
		local_numseq++;
	}
	
	local_numprofiles = (local_numseq << 1) - 1;
	//fprintf(stderr,"%d	%d\n",local_numseq,local_numprofiles);
	
	for (i = 0;i < numseq;i++){
	//	fprintf(stderr,"%d	%d	%d\n",i,aln->s[i][0],aln->s[i][1]);
		aln->s[i] = assign_gap_codes(aln->s[i],aln->sl[i]); 
	}
	
	if(param->dna == 1){
		if(byg_start(param->tree,"njNJ") != -1){
			dm =  dna_profile_distance(aln,dm,param,1);
		}else{
			dm =  dna_profile_distance(aln,dm,param,0);
		}
	}else{
		if(byg_start(param->tree,"njNJ") != -1){
			dm =  protein_profile_wu_distance(aln,dm,param,1);
		}else{
			dm =  protein_profile_wu_distance(aln,dm,param,0);
		}
	}
	/*for ( i=0; i < local_numseq;i++){
		for (j = 0;j < local_numseq;j++){
			fprintf(stderr,"%f ",dm[i][j]);
		}
		fprintf(stderr,"\n");
	}*/
	
	tmp_numseq = numseq;
	tmp_numprofiles = numprofiles;
	
	numseq = local_numseq;
 	numprofiles = local_numprofiles;
	
	if(byg_start(param->tree,"njNJ") != -1){
		tree2 = real_nj(dm,param->ntree);
	}else{
		tree2 = real_upgma(dm,param->ntree);
	}
	
	
	
	//WAs here need too add tree2 -> treee..... 
	
	
	tree = malloc(sizeof(int)*(numseq*3+1));
	for ( i = 1; i < (numseq*3)+1;i++){
		tree[i] = 0;
	}
	tree[0] = 1;
	tree = readtree(tree2,tree);
	for (i = 0; i < (numseq*3);i++){
		tree[i] = tree[i+1]+ tmp_numseq;
	}
	//exit(0);
	
	numseq = tmp_numseq;
	numprofiles = tmp_numprofiles;
	
	int** map = 0;
	
	map =  hirschberg_profile_alignment(aln,tree,submatrix, map);
	//clear up sequence array to be reused as gap array....
	int *p = 0;
	for (i = 0; i < numseq;i++){
		p = aln->s[i];
		for (j = 0; j < aln->sl[i];j++){
			p[j] = 0;
		}
	}
	//clear up
	int a,b,c;
	for (i = 0; i < (local_numseq-1)*3;i +=3){
		a = tree[i];
		b = tree[i+1];
		c =  tree[i+2];
		aln = make_seq(aln,a,b,map[c]);
	}

	for (i = 0; i < numseq;i++){
		aln->nsip[i] = 0;
	}
	aln =  sort_sequences(aln,tree,param->sort);

	//for (i = 0; i < numseq;i++){
	//	fprintf(stderr,"%d	%d	%d\n",i,aln->nsip[i],aln->sip[i][0]);
	//}
	
	
	output(aln,param);
	
	
	free(tree2->links);
	free(tree2->internal_lables);
	free(tree2);
	

	free(map);
	free(tree);
	exit(0);
}