Ejemplo n.º 1
0
Archivo: anal.c Proyecto: jkeuffer/pari
/* filtered readseq = remove blanks and comments */
GEN
gp_read_str(const char *s)
{
  char *t = gp_filter(s);
  GEN x = readseq(t);
  pari_free(t); return x;
}
Ejemplo n.º 2
0
align* readMultial(FILE* alfile) {
  int letcnt = cntlets(alfile), i, j;
  align* res = (align*)malloc (sizeof(align));
  res->algn = (int*) malloc (sizeof(int)* letcnt);
  for (j=0; j<CNTS_LEN; j++)
    res->cnts[j] = (char*) malloc (sizeof(char)* letcnt);
  for (i=0; i<letcnt; i++) {
    res->algn[i] = 0;
    for (j=0; j<CNTS_LEN; j++)
      res->cnts[j][i] = 0;
  }
  i = 0;
  while (readseq(alfile, res, i++, letcnt)) 
    ;

  res->numseq = i-1;
  res->algnlen = letcnt;
  return res;
}
void merge_both_alignments(vector<reference_index>& refindex, string sam_output_name, fragment_alignment& alignment_first, 
				fragment_alignment& alignment_second, ofstream& fp_sam, ofstream& fp_detail, 
				ofstream& fp_fastq, unordered_map<string, int>& umap)
{
	if(alignment_first.gaps != 0 || alignment_second.gaps != 0)
	{
		if(DEBUG == 99)
			fp_detail << "\n****************************************GAPS FOUND****************************************" << endl << endl;
		return;
	}

	fragment_alignment pointer_first;
	fragment_alignment pointer_second;

	int first_start_index, second_start_index;
	int first_last_index, second_last_index;

	if(alignment_first.ref_start < alignment_second.ref_start)
	{
		first_start_index = alignment_first.ref_start;
		first_last_index = alignment_second.ref_start;
		second_start_index = alignment_first.ref_end;
		second_last_index = alignment_second.ref_end;
		pointer_first = alignment_first;
		pointer_second = alignment_second;	
	}
	else
	{
		first_start_index = alignment_second.ref_start;
		first_last_index = alignment_first.ref_start;
		second_start_index = alignment_second.ref_end;
		second_last_index = alignment_first.ref_end;
		pointer_first = alignment_second;
		pointer_second = alignment_first;	
	}

	if(first_last_index > second_start_index)
		return;
	if(second_last_index < second_start_index)
		return;
	/*
	cout << "\nFind Problems in Merging" << endl;
	cout << "First start index = " << first_start_index << endl;
	cout << "First last index = " << first_last_index << endl;
	cout << "Second start index = " << second_start_index << endl;
	cout << "Second last index = " << second_last_index << endl;
	*/

	//linking reference base
	
	int ref_start = 0, read_start = 0;
	consensus *head_node = new consensus;

	head_node->ref_ind = 0;
	head_node->refch = refindex[0].ref.at(0);

	head_node->read_ind = -1;
	head_node->readch = '\0';

	head_node->quality = 0;

	head_node->next = NULL;
	head_node->up = NULL;
	head_node->down = NULL;

	consensus *current_node = head_node;

	for(int i = 1; i < refindex[0].ref.length(); i++)
	{
		consensus *next_node = new consensus;
		
		next_node->ref_ind = i;	
		next_node->refch = refindex[0].ref.at(i);
		
		next_node->read_ind = -1;
		next_node->readch = '\0';

		next_node->quality = 0;

		next_node->next = NULL;
		next_node->up = NULL;
		next_node->down = NULL;

		current_node->next = next_node;
		current_node->up = next_node;

		current_node = current_node->next;
	}

	fp_detail << endl << "Testing Reference Linked List" << endl;
	current_node = head_node;
	while(current_node != NULL)
	{
		fp_detail << "" << current_node->refch;
		current_node = current_node->next;
	}
	fp_detail << endl << "Reference Linked List Printed" << endl << endl;

	//first read alignment

	consensus *previous_node = NULL;
	int current_ref_ind = -1;
	int current_read_ind = -1;
	current_node = head_node;

	for(int i = 0; i < pointer_first.alignment.size(); i++)
	{
		if(pointer_first.alignment[i].first != '-')
			current_ref_ind += 1;
		if(pointer_first.alignment[i].second != '-')
			current_read_ind += 1;

		if(current_ref_ind == current_node->ref_ind)
		{
			current_node->read_ind = current_read_ind;
			current_node->readch = '-';

			if(pointer_first.alignment[i].second != '-')
			{
				current_node->readch = pointer_first.alignment[i].second;
				current_node->quality = pointer_first.quality.at(i);
			}

			previous_node = current_node;
			current_node = current_node->next;
		}
		else
		{
			consensus *next_node = new consensus;

			next_node->ref_ind = current_ref_ind;
			next_node->refch = '-';

			next_node->read_ind = current_read_ind;
			next_node->readch = pointer_first.alignment[i].second;
			current_node->quality = pointer_first.quality.at(i);

			next_node->next = NULL;
			next_node->up = NULL;
			next_node->down = NULL;

			next_node->next = current_node;
			if(previous_node != NULL)
				previous_node->next = next_node;

			previous_node = next_node;
		}
	}


	fp_detail << endl << "Testing Read_One Linked List" << endl;
	current_node = head_node;
	while(current_node != NULL)
	{
		fp_detail << "" << current_node->readch;
		current_node = current_node->next;
	}
	fp_detail << endl << "Read_One Linked List Printed" << endl << endl;



	//second read alignment

	current_node = head_node;
	while(current_node->ref_ind < first_last_index)
	{
		previous_node = current_node;
		current_node = current_node->next;
	}

	current_ref_ind = first_last_index - 1;
	current_read_ind = -1;

	for(int i = 0; i < pointer_second.alignment.size(); i++)
	{
		if(pointer_second.alignment[i].first != '-')
			current_ref_ind += 1;
		if(pointer_second.alignment[i].second != '-')
			current_read_ind += 1;

		if(current_ref_ind == current_node->ref_ind)
		{
			if(current_node->quality < pointer_second.quality.at(i))
			{
				current_node->read_ind = current_read_ind;
				current_node->readch = pointer_second.alignment[i].second;
				current_node->quality = pointer_second.quality.at(i);
			}	
			else
			{
				if(current_node->readch == '\0')
				{
					current_node->read_ind = current_read_ind;
					current_node->readch = '-';
				}
			}

			previous_node = current_node;
			current_node = current_node->next;
		}
		else
		{
			consensus *next_node = new consensus;

			next_node->ref_ind = current_ref_ind;
			next_node->refch = '-';

			next_node->read_ind = current_read_ind;
			next_node->readch = pointer_second.alignment[i].second;
			current_node->quality = pointer_second.quality.at(i);

			next_node->next = NULL;
			next_node->up = NULL;
			next_node->down = NULL;

			next_node->next = current_node;
			previous_node->up = next_node;

			previous_node = next_node;
		}
	}


	fp_detail << endl << "Testing Read_Two Linked List" << endl;
	current_node = head_node;
	while(current_node != NULL)
	{
		fp_detail << "" << current_node->readch;
		current_node = current_node->next;
	}
	fp_detail << endl << "Read_Two Linked List Printed" << endl << endl;


	/*
	consensus *head_node = new consensus;
	ref_start = head_node->ref_ind = pointer_first.ref_start;
	head_node->refch = pointer_first.alignment[0].first;

	read_start = head_node->read_ind = pointer_first.read_start;
	head_node->readch = pointer_first.alignment[0].second;

	head_node->next = NULL;
	head_node->up = NULL;
	head_node->down = NULL;

	consensus *current_node = head_node;
	
	for(int i = 1; i < pointer_first.alignment.size(); i++)
	{
		consensus *next_node = new consensus;
		if(pointer_first.alignment[i].first != '-')
			ref_start += 1;
		if(pointer_first.alignment[i].second != '-')
			read_start += 1;

		next_node->ref_ind = ref_start;
		next_node->refch = pointer_first.alignment[0].first;

		next_node->read_ind = read_start;
		next_node->readch = pointer_first.alignment[0].second;

		next_node->next = NULL;
		next_node->up = NULL;
		next_node->down = NULL;

		current_node->next = next_node;
		current_node = current_node->next;
	
	}
	*/
	
	vector<pair<char, char> > alignment;
	vector<char> quality;

	int read_index_first, read_index_second;
	int ref_index_first, ref_index_second;
	int i, k, l, x;

	int read_end, ref_end;
	int match = 0, gaps = 0, mismatch = 0;

	read_index_first = pointer_first.read_start;
	ref_index_first = pointer_first.ref_start;

	read_index_second = pointer_second.read_start;
	ref_index_second = pointer_second.ref_start;

	read_start = read_index_first;
	ref_start = ref_index_first;

	for(i = 0; ref_index_first < first_last_index && i < pointer_first.alignment.size(); i++)
	{
		alignment.push_back(pointer_first.alignment[i]);
		//if(pointer_first.alignment[i].second != '-')
		{
			quality.push_back(pointer_first.quality.at(read_index_first));
			read_index_first += 1;
		}	
		//if(pointer_first.alignment[i].first != '-')
		{
			ref_index_first += 1;
		}	

	}

	//print_vector_alignment(alignment);

	for(k = i, l = 0; ref_index_first < second_start_index && k < pointer_first.alignment.size(); k++, l++)
	{
		
		assert(ref_index_first == ref_index_second);
		assert(pointer_first.alignment[k].first == pointer_second.alignment[l].first);		

		{
			if(pointer_first.quality.at(read_index_first) > 
				pointer_second.quality.at(read_index_second))
			{
				alignment.push_back(pointer_first.alignment[k]);
				quality.push_back(pointer_first.quality.at(read_index_first));
			}
			else
			{
				alignment.push_back(pointer_second.alignment[l]);
				quality.push_back(pointer_second.quality.at(read_index_second));
			}
		}

		ref_index_first += 1;
		ref_index_second += 1;

		read_index_first += 1;
		read_index_second += 1;
	}

	//cout << endl << endl;
	//print_vector_alignment(alignment);

	for(x = l; ref_index_second < second_last_index && x < pointer_second.alignment.size(); x++)
	{
		alignment.push_back(pointer_second.alignment[x]);
		//if(pointer_first.alignment[i].second != '-')
		{
			quality.push_back(pointer_second.quality.at(read_index_second));
			read_index_second += 1;
		}	
		//if(pointer_first.alignment[i].first != '-')
		{
			ref_index_second += 1;
		}	
	}

	read_end = read_index_second;
	ref_end = ref_index_second;
	
	string alignment_quality(quality.begin(), quality.end());
	fragment_alignment final_alignment_info;
	vector<string> final_result;
	vector<char> read;

	for(int k = 0; k < alignment.size(); k++)
	{
		final_alignment_info.alignment.push_back(alignment[k]);
		if(alignment[k].first == alignment[k].second && alignment[k].first != '-')
			match += 1;
		if(alignment[k].first != alignment[k].second && alignment[k].first != '-')
			mismatch += 1;
		if(alignment[k].first == '-' || alignment[k].second == '-')
			gaps += 1;

		read.push_back(alignment[k].second);
	}

	//cout << endl << endl;
	string readseq(read.begin(), read.end());
	//print_vector_alignment(alignment);
				
	final_alignment_info.total_len = alignment.size();
        final_alignment_info.identity_match = match;
        final_alignment_info.ref_start = ref_start;
        final_alignment_info.read_start = read_start;
        final_alignment_info.ref_end = ref_end;//total_ref_ind
        final_alignment_info.read_end = read_end;//total_read_ind
        final_alignment_info.ref_ind = 0;
        final_alignment_info.read_dir = 1;
        final_alignment_info.gaps = gaps;
        final_alignment_info.mismatches = mismatch;
	final_alignment_info.quality = alignment_quality;

	sam_format(final_alignment_info, refindex, readseq, sam_output_name, final_result, fp_detail);
	fp_sam << final_result[0];
	for(int k = 1; k < final_result.size(); k++)
        {
		fp_sam << "\t" << final_result[k];
                //cout << i << ": " << output[k] << endl;
	}		
	fp_sam << endl;

	/*
	fp_fastq << "> " << final_result[0] << endl;
	fp_fastq << final_result[9] << endl;
	fp_fastq << "+" << endl;
	fp_fastq << final_result[10] << endl;
	*/
	
	if(umap.find(final_result[5]) == umap.end())
	{
		umap[final_result[5]] = 1;
	}
	else
	{
		umap[final_result[5]] += 1;
		return;
	}
	
	//fp_fastq << ">" << final_result[0] << "|CIGAR=" << final_result[5] << endl;
	//fp_fastq << final_result[9] << endl;
	fp_fastq << final_result[0] << endl;
	fp_fastq << final_result[5] << endl;
	fp_fastq << final_result[9] << endl;

	print_alignment(final_alignment_info.alignment, refindex[0].ref, readseq, final_alignment_info.ref_start,
              	        0, 1, final_alignment_info, true, fp_detail);
	if(DEBUG == 99)
	{
		fp_detail << "reference = " << refindex[0].ref.substr(final_alignment_info.ref_start, 80) << endl;
		fp_detail << "for_read  = " << readseq.substr(0, 80) << endl << endl;
	}	
	assert(alignment_quality.length() == readseq.length());

	return;
}
Ejemplo n.º 4
0
int main (int argc, char *argv[]) {

	char	seq1[ MAXSEQ ];                  // input seq1
	char	seq2[ MAXSEQ ];                  // input seq2
	char	bts1[ MAXSEQ ];                  // back trace seq1
	char	bts2[ MAXSEQ ];                  // back trace seq2
	char	aln[ MAXSEQ ];                   // the one line alignment string
	int		swas;                            // smith-waterman alginment score
	int		c;                               // options
	char	infile1[ MAXFNL ];               // input file1
	char	infile2[ MAXFNL ];               // input file2
	int		simple;                          // output controls

	// options
	while ( ( c = getopt( argc, argv, "ha:b:i:j:s" )  ) != EOF )
		
		switch (c) {
			case 'h':
				usage();
				break;
			case 'a':
				strcpy(seq1, optarg);
				break;
			case 'b':
				strcpy(seq2, optarg);
				break;
			case 'i':
				strcpy(infile1, optarg);
				break;
			case 'j':
				strcpy(infile2, optarg);
				break;
			case 's':
				simple = 1;
				break;

		}

	if ( argc == 1 ) {
	
		usage();

	}

	// check if sequences were passed by file
	if ( ( infile1[0] != '\0' ) && ( infile2[0] != '\0' ) ) {

		readseq( infile1, seq1 );
		readseq( infile2, seq2 );

	}

	// check if sequences are ready
	if ( ( seq1[0] == '\0' ) || ( seq2[0] == '\0' ) ) {

		printf("Error: insufficent input sequences found!\n");
		usage();

	}

	// covert sequences to UPPER case
	upper( seq1 );
	upper( seq2 );

	// do smith-waterman alignment
	swas = swalign( seq1, seq2, bts1, bts2, aln );

	// output alignment
	if ( simple == 1 ) {

		printf("%d\n", swas);
		puts(bts1);
		puts(aln);
		puts(bts2);

	} else { 

		printf("Smith-Waterman Alignment:\n\n");
		printf("Score:\t%d\n\n", swas);
		print_align( bts1, bts2, aln );
	
	}

}