Exemple #1
0
int count_sequences_stockholm(char* string)
{
	char* p1 = string;
	int i = 0;
	int j = 0;
	int n = 0;
	while((i = byg_end("\n",p1))!=-1){
		p1+=i;
		if (!(byg_start("//",p1))){
			break;
		}	
		j = byg_end("#",p1);
		if(j != 1){
			n++;
		}
	}
	if(!n){
		return 0;
	}
	return n;
}
Exemple #2
0
int count_sequences_clustalw(char* string)
{
	char* p1 = string;
	int i = 0;
	int j = 0;
	int c = 0;
	int n = 0;
	int f = 0;
	

	while((i = byg_end("\n",p1))!=-1){
		p1+=i;
		j = byg_end(" ",p1);
		f = byg_end("\n",p1);
		if(f > 2 && f>j && j!= 1){
			if(c ==0){
				i = j;
				while(p1[i] != '\n'){
					//if (!isspace((int)p1[i])){
					//	len++;
					//}
					i++;
				}		
			}
			c++;
		}else{
			if (c){
				if(c > n){
					n = c;
				}
				c =0;
			}
		}
	}
	if(!n){
		return 0;
	}
	return n;
}
Exemple #3
0
struct alignment* read_sequences_uniprot_xml(struct alignment* aln,char* string)
{
	int c = 0;
	int n = 0;
	int i = 0;
	int j = 0;
	char *p1 = 0;

	int aacode[26] = {0,1,2,3,4,5,6,7,8,-1,9,10,11,12,23,13,14,15,16,17,17,18,19,20,21,22};
	//int aacode[26] = {0,1,2,3,4,5,6,7,8,-1,9,10,11,12,-1,13,14,15,16,17,-1,18,19,20,21,22};

	/*aln = (struct alignment *) malloc(sizeof(struct alignment));
	numseq = byg_count("<entry",string);
	if(!numseq){
		k_printf("No sequences found!\n");
		exit(1);
	}
	
	numprofiles = (numseq << 1) - 1;
	aln->s = malloc(sizeof(int*) * (numseq ));
	aln->seq = malloc(sizeof(char*) * (numseq ));
	aln->si = 0;
	aln->ft = 0;
	aln->sl = malloc(sizeof(int) * (numprofiles));
	aln->sip = malloc(sizeof(int*)* numprofiles);
	aln->nsip = malloc(sizeof(int)* numprofiles);
	aln->sn = malloc(sizeof(char*) * numseq);
	aln->lsn = malloc(sizeof(int) * numseq);
	for (i =0;i < numprofiles;i++){
		aln->sip[i] = 0;
		aln->nsip[i] = 0;
	}
	for(i =0;i < numseq;i++){
		aln->sip[i] = malloc(sizeof(int)*1);
		aln->nsip[i] = 1;
		aln->sip[i][0] = i;
	}*/

	p1 = string;

	
	c = 0;
	while(aln->sl[c]){
		c++;
	}
	
	while((i = byg_end("<entry",p1))!=-1){

		p1+=i;// p1 is at start of entry;
		i = byg_end("<name>",p1);
		p1 +=i; //p1 is at the end of the sequence name tag
		j = byg_start("</name>",p1);
		aln->lsn[c] = j;
		aln->sn[c] = malloc(sizeof(char)*(j+1));
		for (i = 0;i < j;i++){
			aln->sn[c][i] = p1[i];
		}
		aln->sn[c][j] = 0;
		
		while((i = byg_end("<sequence",p1))!= -1 ){
			i = byg_end("<sequence",p1);
			p1+= i;
			i = byg_end(">",p1);
			p1 +=i;
		}
		
		j = byg_start("</sequence>",p1);

		aln->s[c] = malloc(sizeof(int)*(j+1));
		aln->seq[c] = malloc(sizeof(char)*(j+1));
		n = 0;
		for (i = 0;i < j;i++){
			if(isalpha((int)p1[i])){
				aln->s[c][n] = aacode[toupper(p1[i])-65];
				aln->seq[c][n] = p1[i];
				n++;
			}
		}
		aln->s[c][n] = 0;
		aln->seq[c][n] = 0;
		aln->sl[c] = n;
		c++;
	}
	free(string);
	return aln;
}
Exemple #4
0
struct feature* read_ft(struct feature* ft,char* p)
{

	int i,j;
	struct feature *n = 0;
	struct feature *old_n = 0;
	char tmp[10];
	char* p1 = 0;
	p1 = p;
	while((j = byg_end("<fitem>",p1))!= -1){
		i = byg_end("</seq-info>",p1);
		
		if(j >i){
			break;
		}

		n = malloc(sizeof(struct feature));
		n->next = 0;
		n->color = -1;

		p1+=j;// p1 is at start of entry;
		i = byg_end("<ftype>",p1);
		p1 +=i; //p1 is at the end of the sequence name tag
		j = byg_start("</ftype>",p1);

		n->type = malloc(sizeof(char*)*(j+1));
		for (i = 0; i < j;i++){
			n->type[i] = p1[i];
		}
		n->type[j] = 0;
		
		i = byg_end("<fstart>",p1);
		p1+= i;
		j = byg_start("</fstart>",p1);
		
		for (i = 0; i < j;i++){
			tmp[i] = p1[i];
		}
		tmp[j] = 0;
		n->start = atoi(tmp);
		i = byg_end("<fstop>",p1);
		p1+= i;
		j = byg_start("</fstop>",p1);
		for (i = 0; i < j;i++){
			tmp[i] = p1[i];
		}
		tmp[j] = 0;
		n->end = atoi(tmp);

		i = byg_end("<fnote>",p1);
		p1+= i;
		j = byg_start("</fnote>",p1);
		n->note = malloc(sizeof(char*)*(j+1));
		for (i = 0; i < j;i++){
			n->note[i] = p1[i];
		}
		
		n->note[j] = 0;

		
		if((old_n = ft)!= 0){
			while(old_n->next!=0){
				old_n = old_n->next;
			}
			old_n->next = n;
		}else{
			ft = n;
		}
		n = 0;
	}
	return ft;
}
Exemple #5
0
struct alignment* read_alignment_macsim_xml(struct alignment* aln,char* string)
{
	int c = 0;
	int n = 0;
	int i = 0;
	int j = 0;
	char *p = 0;
	int max = 0;
	
	int aacode[26] = {0,1,2,3,4,5,6,7,8,-1,9,10,11,12,23,13,14,15,16,17,17,18,19,20,21,22};
	//int aacode[26] = {0,1,2,3,4,5,6,7,8,-1,9,10,11,12,-1,13,14,15,16,17,-1,18,19,20,21,22};

	/*aln = (struct alignment*) malloc(sizeof(struct alignment));
	numseq = byg_count("<seq-name>",string);
	if(!numseq){
		k_printf("No sequences found!\n");
		exit(1);
	}
	
	numprofiles = (numseq << 1) - 1;
	aln->s = malloc(sizeof(int*) * (numseq ));
	aln->seq = malloc(sizeof(char*) * (numseq ));
	aln->ft =  malloc(sizeof(struct feature* ) * (numseq));
	aln->si  =  malloc(sizeof(struct sequence_information* ) * (numseq));
	
	aln->sl = malloc(sizeof(int) * (numprofiles));
	aln->sip = malloc(sizeof(int*)* numprofiles);
	aln->nsip = malloc(sizeof(int)* numprofiles);
	aln->sn = malloc(sizeof(char*) * numseq);
	aln->lsn = malloc(sizeof(int) * numseq);
	
	for (i =0;i < numprofiles;i++){
		aln->sip[i] = 0;
		aln->nsip[i] = 0;
	}
	for(i =0;i < numseq;i++){
		aln->ft[i] = 0;
		aln->si[i] = 0;
		aln->sip[i] = malloc(sizeof(int)*1);
		aln->nsip[i] = 1;
		aln->sip[i][0] = i;
	}*/
		
	p = string;
	
	if(byg_count("<g>",p)){
		while((i = byg_start("<g>",p))!=-1){
			p+=i;
			j = byg_end("<r>",p);
			for(i = 0; i< j;i++){
				p[i] = ' ';
			}
			i = byg_start("</r>",p);
			p+=i;
			
			j = byg_end("</g>",p);
			for(i = 0; i< j;i++){
				p[i] = ' ';
			}
			
		}
	}
	p = string;

	c = 0;
	while(aln->sl[c]){
		c++;
	}
	

	
	while((i = byg_end("<sequence",p))!=-1){	
		p+=i;// p1 is at start of entry;
		max = byg_end("</sequence>",p);
			
		i = byg_end("<seq-name>",p);
		if(i < max){
			p +=i; //p1 is at the end of the sequence name tag
			j = byg_start("</seq-name>",p);
		
			aln->lsn[c] = j;
			aln->sn[c] = malloc(sizeof(char)*(j+1));
			for (i = 0;i < j;i++){
				aln->sn[c][i] = p[i];
			}
			aln->sn[c][j] = 0;
			
		}
		i = byg_end("<ftable>",p);
		if(i < max){
			aln->ft[c] = read_ft(aln->ft[c],p);
		}
		i = byg_end("<seq-data>",p);
		if(i < max){
			p+= i;
			j = byg_start("</seq-data>",p);
			aln->s[c] = malloc(sizeof(int)*(j+1));
			aln->seq[c] = malloc(sizeof(char)*(j+1));
			n = 0;
			for (i = 0;i < j;i++){
				if((int)p[i]>32){
					if(isalpha((int)p[i])){
						aln->s[c][n] = aacode[toupper(p[i])-65];
					}else{
						aln->s[c][n] = -1;
					}
					aln->seq[c][n] = p[i];
					n++;
				}
			}
			aln->s[c][n] = 0;
			aln->seq[c][n] = 0;
			aln->sl[c] = n;
		}
		
		c++;
	}
	free(string);
	return aln;
}
Exemple #6
0
struct alignment* read_alignment_from_swissprot(struct alignment* aln,char* string)
{
	//int aacode[26] = {0,1,2,3,4,5,6,7,8,-1,9,10,11,12,-1,13,14,15,16,17,-1,18,19,20,21,22};
	int aacode[26] = {0,1,2,3,4,5,6,7,8,-1,9,10,11,12,23,13,14,15,16,17,17,18,19,20,21,22};
	int i,j,c,n;
	char* p = 0;
	p = string;
	/*numseq = byg_count("ID   ",p);
	if(!numseq){
		k_printf("No sequences found!\n");
		exit(1);
	}
	aln = (struct alignment *) malloc(sizeof(struct alignment));
	numprofiles = (numseq << 1) - 1;
	aln->ft = 0;
	aln->si = 0;
	aln->s = malloc(sizeof(int*) * (numseq ));
	aln->seq = malloc(sizeof(char*) * (numseq ));
	aln->sl = malloc(sizeof(int) * (numprofiles));
	aln->sip = malloc(sizeof(int*)* numprofiles);	
	aln->nsip = malloc(sizeof(int)* numprofiles);
	aln->sn = malloc(sizeof(char*) * numseq);
	aln->lsn = malloc(sizeof(int) * numseq);
	for (i =0;i < numprofiles;i++){
		aln->sip[i] = 0;
		aln->nsip[i] = 0;
	}

	for (i = numseq;i--;){
		aln->sip[i] = malloc(sizeof(int)*1);
		aln->nsip[i] = 1;
		aln->sip[i][0] = i;
	}*/
	c = 0;
	while(aln->sl[c]){
		c++;
	}

	k_printf("found sequence:\n");
	while ((i = byg_end("ID   ",p)) != -1){
		p+=i;
		j = byg_start(" ",p);
		aln->lsn[c] = j;
		aln->sn[c] = malloc(sizeof(char)*(j+1));
		for (i = 0;i < j;i++){
			aln->sn[c][i] = p[i];
		}
		aln->sn[c][j] = 0;
		p+= j;
		j = byg_end("SQ   ",p);
		p+= j;
		j = byg_end("\n",p);
		p+= j;
		j = byg_start("//",p);
		k_printf("found sequence:\n");
		aln->s[c] = malloc(sizeof(int)*(j+1));
		aln->seq[c] = malloc(sizeof(char)*(j+1));
		n = 0;
		for (i = 0;i < j;i++){
			if((int)p[i] > 32){
				if(isalpha((int)p[i])){
					aln->s[c][n] = aacode[toupper(p[i])-65];
				}else{
					aln->s[c][n] = -1;
				}
				k_printf("%c",p[i]);
				aln->seq[c][n] = p[i];
				n++;
			}
		}
		
		k_printf("\n\n");
		aln->s[c][n] = 0;
		aln->seq[c][n] = 0;
		aln->sl[c] = n;
		c++;
	}
	free(string);
	return aln;
}
Exemple #7
0
struct alignment* read_alignment_clustal(struct alignment* aln,char* string)
{
	int c = 0;
	int n = 0;
	int len = 0;
	int i = 0;
	int j = 0;
	int start = 0;
	char *p1 = 0;
	int local_numseq = 0;

	int aacode[26] = {0,1,2,3,4,5,6,7,8,-1,9,10,11,12,23,13,14,15,16,17,17,18,19,20,21,22};
	//int aacode[26] = {0,1,2,3,4,5,6,7,8,-1,9,10,11,12,-1,13,14,15,16,17,-1,18,19,20,21,22};


	//aln = (struct alignment*) malloc(sizeof(struct alignment));
	p1 = string;

	while((i = byg_end("\n",p1))!=-1){
		p1+=i;
		j = byg_end(" ",p1);
		n = byg_end("\n",p1);
		if(n > 2 && n>j && j!= 1){
			if(c ==0){
				i = j;
				while(p1[i] != '\n'){
					if ((int)p1[i] > 32){
						len++;
					}
					i++;
				}		
			}
			c++;
		}else{
			if (c){
				if(c > local_numseq){
					local_numseq = c;
				}
				c =0;
			}
		}
	}

	/*numprofiles = (numseq << 1) - 1;
	aln->s = malloc(sizeof(int*) * (numseq ));
	aln->seq = malloc(sizeof(char*) * (numseq ));
	aln->ft = 0;
	aln->si = 0;
	aln->sl = malloc(sizeof(int) * (numprofiles));
	aln->sip = malloc(sizeof(int*)* numprofiles);
	aln->nsip = malloc(sizeof(int)* numprofiles);
	aln->sn = malloc(sizeof(char*) * numseq);
	aln->lsn = malloc(sizeof(int) * numseq);

	for (i =0;i < numprofiles;i++){
		aln->sip[i] = 0;
		aln->nsip[i] = 0;
	}

	for(i =0;i < numseq;i++){
		aln->lsn[i] = 0;
		aln->sip[i] = malloc(sizeof(int)*1);
		aln->nsip[i] = 1;
		aln->sip[i][0] = i;
		aln->sl[i] = 0;*/
	start = 0;
	while(aln->sl[start]){
		start++;
	}

	for(i =start;i < local_numseq+start;i++){
		aln->s[i] = malloc(sizeof(int)*(len+1));
		aln->seq[i] = malloc(sizeof(char)*(len+1));
	}

	p1 = string;
	c = start;
	while((i = byg_end("\n",p1))!=-1){
		p1+=i;
		j = byg_end(" ",p1);
		n = byg_end("\n",p1);
		if(n > 2 && n>j && j!= 1){
			if(aln->lsn[c] == 0){
				aln->lsn[c] = j;
				aln->sn[c] = malloc(sizeof(char)*(j+1));
				for (i = 0;i < j;i++){
					aln->sn[c][i] = p1[i];
				}
				aln->sn[c][j] = 0;
			}
			for (i = j;i < n;i++){
				if((int)p1[i] > 32){
					if(isalpha((int)p1[i])){
						aln->s[c][aln->sl[c]] = aacode[toupper(p1[i])-65];
					}else{
						aln->s[c][aln->sl[c]] = -1;
					}
					aln->seq[c][aln->sl[c]] = p1[i];
					aln->sl[c]++;
				}		
			}		
			c++;
		}else{
 			if (c != start){
				//c =0;
				c = start;
			}	
		}
	}
	for (i = start; i < local_numseq+start;i++){
		aln->s[i][aln->sl[i]] = 0;
		aln->seq[i][aln->sl[i]] = 0;
	}
	free(string);
	return aln;
}
Exemple #8
0
struct alignment* read_alignment_stockholm(struct alignment* aln,char* string)
{
	int c = 0;
	int n = 0;
	int i = 0;
	int j = 0;
	char *p1 = 0;

	int aacode[26] = {0,1,2,3,4,5,6,7,8,-1,9,10,11,12,23,13,14,15,16,17,17,18,19,20,21,22};
	//int aacode[26] = {0,1,2,3,4,5,6,7,8,-1,9,10,11,12,-1,13,14,15,16,17,-1,18,19,20,21,22};

	/*aln = (struct alignment*) malloc(sizeof(struct alignment));
	p1 = string;
	while((i = byg_end("\n",p1))!=-1){
		p1+=i;
		if (!(byg_start("//",p1))){
			break;
		}	
		j = byg_end("#",p1);
		if(j != 1){
			numseq++;
		}
	}

	numprofiles = (numseq << 1) - 1;
	aln->s = malloc(sizeof(int*) * (numseq ));
	aln->seq = malloc(sizeof(char*) * (numseq ));
	aln->ft = 0;
	aln->si = 0;
	aln->sl = malloc(sizeof(int) * (numprofiles));
	aln->sip = malloc(sizeof(int*)* numprofiles);
	
	aln->nsip = malloc(sizeof(int)* numprofiles);
	aln->sn = malloc(sizeof(char*) * numseq);
	aln->lsn = malloc(sizeof(int) * numseq);
	for (i =0;i < numprofiles;i++){
		aln->sip[i] = 0;
		aln->nsip[i] = 0;
	}
	for(i =0;i < numseq;i++){
		aln->sip[i] = malloc(sizeof(int)*1);
		aln->nsip[i] = 1;
		aln->sip[i][0] = i;
	}*/

	c = 0;
	while(aln->sl[c]){
		c++;
	}

	p1 = string;
	while((i = byg_end("\n",p1))!=-1){
		p1+=i;
		if (!(byg_start("//",p1))){
			break;
		}	
		j = byg_end("#",p1);
		if(j != 1){
			j = byg_start(" ",p1);
			aln->lsn[c] = j;
			aln->sn[c] = malloc(sizeof(char)*(j+1));
			for (i = 0;i < j;i++){
				aln->sn[c][i] = p1[i];
			}
			aln->sn[c][j] = 0;

			
			p1+=j;
			j = byg_start("\n",p1);

			aln->s[c] = malloc(sizeof(int)*(j+1));
			aln->seq[c] = malloc(sizeof(char)*(j+1));
			n = 0;
			for (i = 0;i < j;i++){
				if((int)p1[i] > 32){
					if(isalpha((int)p1[i])){
						aln->s[c][n] = aacode[toupper(p1[i])-65];
					}else{
						aln->s[c][n] = -1;
					}
					aln->seq[c][n] = p1[i];
					n++;
				}
			}
			aln->s[c][n] = 0;
			aln->seq[c][n] = 0;
			aln->sl[c] = n;
			c++;
		}
	}

	free(string);
	return aln;
}
Exemple #9
0
int read_sam_chunk(struct read_info** ri,struct parameters* param,FILE* file)
{
	//char line[MAX_LINE];
	int column = 0; 
	int i,j,g,tmp;
	
	int c = 0;
	
	ri = clear_read_info(ri, param->num_query);
	
	char *line = NULL;
	size_t len = 0;
	ssize_t read;
	while ((read = getline(&line, &len, file)) != -1) {
	//while(fgets(line, MAX_LINE, file)){
		if(line[0] != '@'){
			column = 1; //<QNAME>
			tmp = 0;
			for(j = 0;j < read;j++){
				tmp++;
				if(isspace((int)line[j])){
					break;
				}
			}
			
			MMALLOC(ri[c]->name,sizeof(unsigned char)* tmp);
			for(j = 0;j < read;j++){
				
				if(isspace((int)line[j])){
					ri[c]->name[j] = 0;
					break;
				}
				ri[c]->name[j] = line[j];
			}
			
			for(i = 0; i < read;i++){
				if(line[i] == '\n'){
					break;
				}
				if(isspace((int)line[i])){
					column++;
					switch(column){
						case 2: // <FLAG>
							tmp = atoi(line+i+1);
							ri[i]->strand = (tmp & 0x10);

							//WARNING - read should be reverse complemented if mapped to negative strand before tagdusting...
							
							/*tmp = atoi(line+i+1);
							ri[c]->strand[hit] = (tmp & 0x10);
							if(tmp == 4){
								ri[c]->hits[hit] = 0;
							}else{
								ri[c]->hits[hit] = 1;
							}
							hit++;*/
							
							break;
						case 3: // <RNAME> 
							
							break;
						case 4: // <POS>
							
							break;
						case 5: //  <MAPQ>
							
							ri[c]->mapq =  atof(line +i +1); 
							
							break;
						case 6: //  <CIGAR>
							tmp = 0;
							for(j = i+1;j < read;j++){
								tmp++;
								if(isspace((int)line[j])){
									break;
								}
							}
							
							ri[c]->cigar = malloc(sizeof(unsigned char)* tmp);
							g = 0;
							for(j = i+1;j < read;j++){
								if(isspace((int)line[j])){
									ri[c]->cigar[g] = 0;
									break;
								}
								ri[c]->cigar[g] = line[j];
								g++;
							}
							break;
						case 7: //  <MRNM>
							break;
						case 8: //  <MPOS>
							break;
						case 9: //  <ISIZE>
							break;
						case 10: // <SEQ>
							
							tmp = 0;
							for(j = i+1;j < read;j++){
								tmp++;
								if(isspace((int)line[j])){
									break;
								}
							}
							
							MMALLOC(ri[c]->seq,sizeof(unsigned char)* tmp);
							MMALLOC(ri[c]->labels,sizeof(unsigned char)* tmp);
							
							g = 0;
							for(j = i+1;j < read;j++){
								
								if(isspace((int)line[j])){
									ri[c]->seq[g] = 0;
									ri[c]->labels[g] = 0;
									break;
								}
								ri[c]->seq[g] = nuc_code[(int)line[j]];
								ri[c]->labels[g] = 0;

								g++;
							}
							
							ri[c]->len = g;
							break;
						case 11: // <QUAL>
							tmp = 0;
							for(j = i+1;j < read;j++){
								tmp++;
								if(isspace((int)line[j])){
									break;
								}
							}
							g= 0;
							MMALLOC(ri[c]->qual,sizeof(unsigned char)* tmp);
							for(j = i+1;j < read;j++){
								
								if(isspace((int)line[j])){
									ri[c]->qual[g] = 0;
									break;
								}
								ri[c]->qual[g] = line[j];
								g++;
							}
							break;
						default: 
							
									
							i = (int) read;
							break;
					}				}

			}
			tmp = byg_end("NM:i:", line  );
			if(tmp){
				ri[c]->errors = atoi(line+tmp);
				//if(ri[c]->errors > 20){
				///fprintf(stderr,"%s\n,%c,%c,%c,%d\n",line, *(line +tmp), *(line +tmp+1),*(line +tmp+2), ri[c]->errors);
				//}
				
			}else{
				ri[c]->errors = -1;
			}
			tmp = byg_end("MD:Z:", line  );
			if(tmp){
				g = 0;
				for(j = tmp ;j < read;j++){
					g++;
					if(isspace((int)line[j])){
						break;
					}
					
				}
				ri[c]->md = malloc(sizeof(unsigned char)* g);
				g = 0;
				for(j = tmp ;j < read;j++){
					
					if(isspace((int)line[j])){
						ri[c]->md[g] = 0;
						break;
					}
					ri[c]->md[g] = line[j];
					g++;
				}
			}
						
			
			
			//ri[c]->hits[hit] = 0xFFFFFFFFu;
			
			c++;
			if(c == param->num_query){
				MFREE(line);
				return c;
			}
		}
	}
	MFREE(line);
	return c;
}