/* filtered readseq = remove blanks and comments */ GEN gp_read_str(const char *s) { char *t = gp_filter(s); GEN x = readseq(t); pari_free(t); return x; }
align* readMultial(FILE* alfile) { int letcnt = cntlets(alfile), i, j; align* res = (align*)malloc (sizeof(align)); res->algn = (int*) malloc (sizeof(int)* letcnt); for (j=0; j<CNTS_LEN; j++) res->cnts[j] = (char*) malloc (sizeof(char)* letcnt); for (i=0; i<letcnt; i++) { res->algn[i] = 0; for (j=0; j<CNTS_LEN; j++) res->cnts[j][i] = 0; } i = 0; while (readseq(alfile, res, i++, letcnt)) ; res->numseq = i-1; res->algnlen = letcnt; return res; }
void merge_both_alignments(vector<reference_index>& refindex, string sam_output_name, fragment_alignment& alignment_first, fragment_alignment& alignment_second, ofstream& fp_sam, ofstream& fp_detail, ofstream& fp_fastq, unordered_map<string, int>& umap) { if(alignment_first.gaps != 0 || alignment_second.gaps != 0) { if(DEBUG == 99) fp_detail << "\n****************************************GAPS FOUND****************************************" << endl << endl; return; } fragment_alignment pointer_first; fragment_alignment pointer_second; int first_start_index, second_start_index; int first_last_index, second_last_index; if(alignment_first.ref_start < alignment_second.ref_start) { first_start_index = alignment_first.ref_start; first_last_index = alignment_second.ref_start; second_start_index = alignment_first.ref_end; second_last_index = alignment_second.ref_end; pointer_first = alignment_first; pointer_second = alignment_second; } else { first_start_index = alignment_second.ref_start; first_last_index = alignment_first.ref_start; second_start_index = alignment_second.ref_end; second_last_index = alignment_first.ref_end; pointer_first = alignment_second; pointer_second = alignment_first; } if(first_last_index > second_start_index) return; if(second_last_index < second_start_index) return; /* cout << "\nFind Problems in Merging" << endl; cout << "First start index = " << first_start_index << endl; cout << "First last index = " << first_last_index << endl; cout << "Second start index = " << second_start_index << endl; cout << "Second last index = " << second_last_index << endl; */ //linking reference base int ref_start = 0, read_start = 0; consensus *head_node = new consensus; head_node->ref_ind = 0; head_node->refch = refindex[0].ref.at(0); head_node->read_ind = -1; head_node->readch = '\0'; head_node->quality = 0; head_node->next = NULL; head_node->up = NULL; head_node->down = NULL; consensus *current_node = head_node; for(int i = 1; i < refindex[0].ref.length(); i++) { consensus *next_node = new consensus; next_node->ref_ind = i; next_node->refch = refindex[0].ref.at(i); next_node->read_ind = -1; next_node->readch = '\0'; next_node->quality = 0; next_node->next = NULL; next_node->up = NULL; next_node->down = NULL; current_node->next = next_node; current_node->up = next_node; current_node = current_node->next; } fp_detail << endl << "Testing Reference Linked List" << endl; current_node = head_node; while(current_node != NULL) { fp_detail << "" << current_node->refch; current_node = current_node->next; } fp_detail << endl << "Reference Linked List Printed" << endl << endl; //first read alignment consensus *previous_node = NULL; int current_ref_ind = -1; int current_read_ind = -1; current_node = head_node; for(int i = 0; i < pointer_first.alignment.size(); i++) { if(pointer_first.alignment[i].first != '-') current_ref_ind += 1; if(pointer_first.alignment[i].second != '-') current_read_ind += 1; if(current_ref_ind == current_node->ref_ind) { current_node->read_ind = current_read_ind; current_node->readch = '-'; if(pointer_first.alignment[i].second != '-') { current_node->readch = pointer_first.alignment[i].second; current_node->quality = pointer_first.quality.at(i); } previous_node = current_node; current_node = current_node->next; } else { consensus *next_node = new consensus; next_node->ref_ind = current_ref_ind; next_node->refch = '-'; next_node->read_ind = current_read_ind; next_node->readch = pointer_first.alignment[i].second; current_node->quality = pointer_first.quality.at(i); next_node->next = NULL; next_node->up = NULL; next_node->down = NULL; next_node->next = current_node; if(previous_node != NULL) previous_node->next = next_node; previous_node = next_node; } } fp_detail << endl << "Testing Read_One Linked List" << endl; current_node = head_node; while(current_node != NULL) { fp_detail << "" << current_node->readch; current_node = current_node->next; } fp_detail << endl << "Read_One Linked List Printed" << endl << endl; //second read alignment current_node = head_node; while(current_node->ref_ind < first_last_index) { previous_node = current_node; current_node = current_node->next; } current_ref_ind = first_last_index - 1; current_read_ind = -1; for(int i = 0; i < pointer_second.alignment.size(); i++) { if(pointer_second.alignment[i].first != '-') current_ref_ind += 1; if(pointer_second.alignment[i].second != '-') current_read_ind += 1; if(current_ref_ind == current_node->ref_ind) { if(current_node->quality < pointer_second.quality.at(i)) { current_node->read_ind = current_read_ind; current_node->readch = pointer_second.alignment[i].second; current_node->quality = pointer_second.quality.at(i); } else { if(current_node->readch == '\0') { current_node->read_ind = current_read_ind; current_node->readch = '-'; } } previous_node = current_node; current_node = current_node->next; } else { consensus *next_node = new consensus; next_node->ref_ind = current_ref_ind; next_node->refch = '-'; next_node->read_ind = current_read_ind; next_node->readch = pointer_second.alignment[i].second; current_node->quality = pointer_second.quality.at(i); next_node->next = NULL; next_node->up = NULL; next_node->down = NULL; next_node->next = current_node; previous_node->up = next_node; previous_node = next_node; } } fp_detail << endl << "Testing Read_Two Linked List" << endl; current_node = head_node; while(current_node != NULL) { fp_detail << "" << current_node->readch; current_node = current_node->next; } fp_detail << endl << "Read_Two Linked List Printed" << endl << endl; /* consensus *head_node = new consensus; ref_start = head_node->ref_ind = pointer_first.ref_start; head_node->refch = pointer_first.alignment[0].first; read_start = head_node->read_ind = pointer_first.read_start; head_node->readch = pointer_first.alignment[0].second; head_node->next = NULL; head_node->up = NULL; head_node->down = NULL; consensus *current_node = head_node; for(int i = 1; i < pointer_first.alignment.size(); i++) { consensus *next_node = new consensus; if(pointer_first.alignment[i].first != '-') ref_start += 1; if(pointer_first.alignment[i].second != '-') read_start += 1; next_node->ref_ind = ref_start; next_node->refch = pointer_first.alignment[0].first; next_node->read_ind = read_start; next_node->readch = pointer_first.alignment[0].second; next_node->next = NULL; next_node->up = NULL; next_node->down = NULL; current_node->next = next_node; current_node = current_node->next; } */ vector<pair<char, char> > alignment; vector<char> quality; int read_index_first, read_index_second; int ref_index_first, ref_index_second; int i, k, l, x; int read_end, ref_end; int match = 0, gaps = 0, mismatch = 0; read_index_first = pointer_first.read_start; ref_index_first = pointer_first.ref_start; read_index_second = pointer_second.read_start; ref_index_second = pointer_second.ref_start; read_start = read_index_first; ref_start = ref_index_first; for(i = 0; ref_index_first < first_last_index && i < pointer_first.alignment.size(); i++) { alignment.push_back(pointer_first.alignment[i]); //if(pointer_first.alignment[i].second != '-') { quality.push_back(pointer_first.quality.at(read_index_first)); read_index_first += 1; } //if(pointer_first.alignment[i].first != '-') { ref_index_first += 1; } } //print_vector_alignment(alignment); for(k = i, l = 0; ref_index_first < second_start_index && k < pointer_first.alignment.size(); k++, l++) { assert(ref_index_first == ref_index_second); assert(pointer_first.alignment[k].first == pointer_second.alignment[l].first); { if(pointer_first.quality.at(read_index_first) > pointer_second.quality.at(read_index_second)) { alignment.push_back(pointer_first.alignment[k]); quality.push_back(pointer_first.quality.at(read_index_first)); } else { alignment.push_back(pointer_second.alignment[l]); quality.push_back(pointer_second.quality.at(read_index_second)); } } ref_index_first += 1; ref_index_second += 1; read_index_first += 1; read_index_second += 1; } //cout << endl << endl; //print_vector_alignment(alignment); for(x = l; ref_index_second < second_last_index && x < pointer_second.alignment.size(); x++) { alignment.push_back(pointer_second.alignment[x]); //if(pointer_first.alignment[i].second != '-') { quality.push_back(pointer_second.quality.at(read_index_second)); read_index_second += 1; } //if(pointer_first.alignment[i].first != '-') { ref_index_second += 1; } } read_end = read_index_second; ref_end = ref_index_second; string alignment_quality(quality.begin(), quality.end()); fragment_alignment final_alignment_info; vector<string> final_result; vector<char> read; for(int k = 0; k < alignment.size(); k++) { final_alignment_info.alignment.push_back(alignment[k]); if(alignment[k].first == alignment[k].second && alignment[k].first != '-') match += 1; if(alignment[k].first != alignment[k].second && alignment[k].first != '-') mismatch += 1; if(alignment[k].first == '-' || alignment[k].second == '-') gaps += 1; read.push_back(alignment[k].second); } //cout << endl << endl; string readseq(read.begin(), read.end()); //print_vector_alignment(alignment); final_alignment_info.total_len = alignment.size(); final_alignment_info.identity_match = match; final_alignment_info.ref_start = ref_start; final_alignment_info.read_start = read_start; final_alignment_info.ref_end = ref_end;//total_ref_ind final_alignment_info.read_end = read_end;//total_read_ind final_alignment_info.ref_ind = 0; final_alignment_info.read_dir = 1; final_alignment_info.gaps = gaps; final_alignment_info.mismatches = mismatch; final_alignment_info.quality = alignment_quality; sam_format(final_alignment_info, refindex, readseq, sam_output_name, final_result, fp_detail); fp_sam << final_result[0]; for(int k = 1; k < final_result.size(); k++) { fp_sam << "\t" << final_result[k]; //cout << i << ": " << output[k] << endl; } fp_sam << endl; /* fp_fastq << "> " << final_result[0] << endl; fp_fastq << final_result[9] << endl; fp_fastq << "+" << endl; fp_fastq << final_result[10] << endl; */ if(umap.find(final_result[5]) == umap.end()) { umap[final_result[5]] = 1; } else { umap[final_result[5]] += 1; return; } //fp_fastq << ">" << final_result[0] << "|CIGAR=" << final_result[5] << endl; //fp_fastq << final_result[9] << endl; fp_fastq << final_result[0] << endl; fp_fastq << final_result[5] << endl; fp_fastq << final_result[9] << endl; print_alignment(final_alignment_info.alignment, refindex[0].ref, readseq, final_alignment_info.ref_start, 0, 1, final_alignment_info, true, fp_detail); if(DEBUG == 99) { fp_detail << "reference = " << refindex[0].ref.substr(final_alignment_info.ref_start, 80) << endl; fp_detail << "for_read = " << readseq.substr(0, 80) << endl << endl; } assert(alignment_quality.length() == readseq.length()); return; }
int main (int argc, char *argv[]) { char seq1[ MAXSEQ ]; // input seq1 char seq2[ MAXSEQ ]; // input seq2 char bts1[ MAXSEQ ]; // back trace seq1 char bts2[ MAXSEQ ]; // back trace seq2 char aln[ MAXSEQ ]; // the one line alignment string int swas; // smith-waterman alginment score int c; // options char infile1[ MAXFNL ]; // input file1 char infile2[ MAXFNL ]; // input file2 int simple; // output controls // options while ( ( c = getopt( argc, argv, "ha:b:i:j:s" ) ) != EOF ) switch (c) { case 'h': usage(); break; case 'a': strcpy(seq1, optarg); break; case 'b': strcpy(seq2, optarg); break; case 'i': strcpy(infile1, optarg); break; case 'j': strcpy(infile2, optarg); break; case 's': simple = 1; break; } if ( argc == 1 ) { usage(); } // check if sequences were passed by file if ( ( infile1[0] != '\0' ) && ( infile2[0] != '\0' ) ) { readseq( infile1, seq1 ); readseq( infile2, seq2 ); } // check if sequences are ready if ( ( seq1[0] == '\0' ) || ( seq2[0] == '\0' ) ) { printf("Error: insufficent input sequences found!\n"); usage(); } // covert sequences to UPPER case upper( seq1 ); upper( seq2 ); // do smith-waterman alignment swas = swalign( seq1, seq2, bts1, bts2, aln ); // output alignment if ( simple == 1 ) { printf("%d\n", swas); puts(bts1); puts(aln); puts(bts2); } else { printf("Smith-Waterman Alignment:\n\n"); printf("Score:\t%d\n\n", swas); print_align( bts1, bts2, aln ); } }