char* find_lcs(const char *a, int na, const char *b, int nb) { if (na > nb) { const char *c; int t; c = a, a = b, b = c; t = na, na = nb, nb = t; } if (na == 0) return alloc_str(1); if (na == 1) { for (int i = 0; i < nb; i++) { if (a[0] == b[i]) return substr(a, 0, 1); } return alloc_str(1); } static int t1[MAXN]; static int t2[MAXN]; int len = lcs_len(a, na, b, nb, t1); if (len == 0) return alloc_str(1); int half_len = na / 2; char *la = substr(a, 0, half_len); char *ra = substr(a, half_len, na - half_len); char *tb = reverse(b, nb); char *ta = reverse(ra, na - half_len); lcs_len(la, half_len, b, nb, t1); lcs_len(ta, na - half_len, tb, nb, t2); int split = -1; for (int i = 0; i <= nb; i++) { if (t1[i] + t2[nb-i] == len) { split = i; break; } } char *lb = substr(b, 0, split); char *rb = substr(b, split, nb - split); char *sl = find_lcs(la, half_len, lb, split); char *sr = find_lcs(ra, na - half_len, rb, nb - split); char *ret = cat(sl, sr); free(la), free(ra), free(ta); free(lb), free(rb), free(tb); free(sl), free(sr); return ret; }
static int histogram_diff(xpparam_t const *xpp, xdfenv_t *env, int line1, int count1, int line2, int count2) { struct region lcs; int lcs_found; int result; redo: result = -1; if (count1 <= 0 && count2 <= 0) return 0; if (LINE_END(1) >= MAX_PTR) return -1; if (!count1) { while(count2--) env->xdf2.rchg[line2++ - 1] = 1; return 0; } else if (!count2) { while(count1--) env->xdf1.rchg[line1++ - 1] = 1; return 0; } memset(&lcs, 0, sizeof(lcs)); lcs_found = find_lcs(xpp, env, &lcs, line1, count1, line2, count2); if (lcs_found < 0) goto out; else if (lcs_found) result = fall_back_to_classic_diff(xpp, env, line1, count1, line2, count2); else { if (lcs.begin1 == 0 && lcs.begin2 == 0) { while (count1--) env->xdf1.rchg[line1++ - 1] = 1; while (count2--) env->xdf2.rchg[line2++ - 1] = 1; result = 0; } else { result = histogram_diff(xpp, env, line1, lcs.begin1 - line1, line2, lcs.begin2 - line2); if (result) goto out; /* * result = histogram_diff(xpp, env, * lcs.end1 + 1, LINE_END(1) - lcs.end1, * lcs.end2 + 1, LINE_END(2) - lcs.end2); * but let's optimize tail recursion ourself: */ count1 = LINE_END(1) - lcs.end1; line1 = lcs.end1 + 1; count2 = LINE_END(2) - lcs.end2; line2 = lcs.end2 + 1; goto redo; } } out: return result; }
//--------------------------------------------------------------------------------------- bool ScoreComparer::are_equal(ImoScore* pA, ImoScore* pB) { //top level method: compares two scores and returns true if both are equal. //In case of differences the LCD (longest Common Subsequence) and the //SES (Shortest Edit Script) are computed. initialize(); m_pScoreA = pA; m_pScoreB = pB; m_N = encode(pA, &m_A); m_M = encode(pB, &m_B); if (encodings_are_equal()) return true; //both scores are equal //scores are different. Find LCS and SES allocate_v_matrix(); find_lcs(); if (m_D > 0) { find_optimal_path(); compute_differences(); } else { //nothing in common between both scores set_full_differences(); } return false; }
int main() { char *buffer1 = malloc(sizeof(char) * LEN+1); char *buffer2 = malloc(sizeof(char) * LEN+1); /* int i = 0; for(i=0;i<LEN;i++){ buffer1[i] = rand () % 26 + 65; buffer2[i] = rand () % 26 + 65; } */ bzero(buffer1,LEN+1); bzero(buffer2,LEN+1); sprintf(buffer1,"%s\n","Hi,hello world"); sprintf(buffer2,"%s\n","he world he"); printf("%s\n",buffer1); printf("%s\n",buffer2); LCS lcs = find_lcs(buffer1,strlen(buffer1),buffer2,strlen(buffer2)); char buffer[1024] ; bzero(buffer,1024); memcpy(buffer,lcs.start,lcs.len); printf("len:%d %s\n",lcs.len,buffer); free(buffer1); free(buffer2); return 0; }
int main(){ int count1, count2; char* arr1; char* arr2; int i; //get the inputs printf("Enter count1<tab>count2 : \n"); scanf("%d\t%d", &count1, &count2); arr1 = (char*)malloc(sizeof(char)*count1); arr2 = (char*)malloc(sizeof(char)*count2); printf("Enter string1 : \n"); scanf("%s", arr1); printf("Enter string2 : \n"); scanf("%s", arr2); //form lcs object lcs* l = initialize_lcs(count1, count2, arr1, arr2); //find the LCS int res = find_lcs(l); printf("RESULT : %d\n", res); getchar(); getchar(); return 0; }
int main() { static char A[MAXN], B[MAXN]; int dp[MAXN]; while (scanf("%s %s", A, B) == 2) { int na = strlen(A); int nb = strlen(B); int len = lcs_len(A, na, B, nb, dp); char *str = find_lcs(A, na, B, nb); printf("%d\n", len); printf("%s\n", str); free(str); } return 0; }
static int histogram_diff(xpparam_t const *xpp, xdfenv_t *env, int line1, int count1, int line2, int count2) { struct histindex index; struct region lcs; int sz; int result = -1; if (count1 <= 0 && count2 <= 0) return 0; if (LINE_END(1) >= MAX_PTR) return -1; if (!count1) { while(count2--) env->xdf2.rchg[line2++ - 1] = 1; return 0; } else if (!count2) { while(count1--) env->xdf1.rchg[line1++ - 1] = 1; return 0; } memset(&index, 0, sizeof(index)); index.env = env; index.xpp = xpp; index.records = NULL; index.line_map = NULL; /* in case of early xdl_cha_free() */ index.rcha.head = NULL; index.table_bits = xdl_hashbits(count1); sz = index.records_size = 1 << index.table_bits; sz *= sizeof(struct record *); if (!(index.records = (struct record **) xdl_malloc(sz))) goto cleanup; memset(index.records, 0, sz); sz = index.line_map_size = count1; sz *= sizeof(struct record *); if (!(index.line_map = (struct record **) xdl_malloc(sz))) goto cleanup; memset(index.line_map, 0, sz); sz = index.line_map_size; sz *= sizeof(unsigned int); if (!(index.next_ptrs = (unsigned int *) xdl_malloc(sz))) goto cleanup; memset(index.next_ptrs, 0, sz); /* lines / 4 + 1 comes from xprepare.c:xdl_prepare_ctx() */ if (xdl_cha_init(&index.rcha, sizeof(struct record), count1 / 4 + 1) < 0) goto cleanup; index.ptr_shift = line1; index.max_chain_length = 64; memset(&lcs, 0, sizeof(lcs)); if (find_lcs(&index, &lcs, line1, count1, line2, count2)) result = fall_back_to_classic_diff(&index, line1, count1, line2, count2); else { if (lcs.begin1 == 0 && lcs.begin2 == 0) { while (count1--) env->xdf1.rchg[line1++ - 1] = 1; while (count2--) env->xdf2.rchg[line2++ - 1] = 1; result = 0; } else { result = histogram_diff(xpp, env, line1, lcs.begin1 - line1, line2, lcs.begin2 - line2); if (result) goto cleanup; result = histogram_diff(xpp, env, lcs.end1 + 1, LINE_END(1) - lcs.end1, lcs.end2 + 1, LINE_END(2) - lcs.end2); if (result) goto cleanup; } } cleanup: xdl_free(index.records); xdl_free(index.line_map); xdl_free(index.next_ptrs); xdl_cha_free(&index.rcha); return result; }
void referenceless_alignment(string& readseq_first, string& readseq_second, string& quality_first, string& quality_second, string& readsequence, string& readquality, cell **matrix, ofstream& fp_detail) { int rind = -1, qind = -1, len; int refindex = -1, queryindex = -1, max_len; assert(readseq_first.length() == quality_first.length()); assert(readseq_second.length() == quality_second.length()); find_lcs(readseq_first, readseq_second, refindex, queryindex, max_len); if(refindex == -1 || queryindex == -1) return; string query = reverse_complement(readseq_second); find_lcs(readseq_first, query, rind, qind, len); if(rind == -1 || qind == -1) return; if(max_len < len) { max_len = len; refindex = rind; queryindex = qind; reverse_str(quality_second); } else { query = readseq_second; } assert(query.length() == quality_second.length()); fp_detail << "Reference Index = " << refindex << endl; fp_detail << "Query Index = " << queryindex << endl; fp_detail << "Substring = " << query.substr(queryindex, max_len) << endl; fp_detail << "Longest Common Substring Length: " << max_len << endl << endl; int subtract = min(refindex, queryindex); int addition = min(readseq_first.length() - refindex, query.length() - queryindex); //fp_detail << "subtract from refindex = " << subtract << endl; //fp_detail << "addition to refindex = " << addition << endl; int ref_start_index = refindex - subtract; int ref_end_index = refindex + addition; string reference = readseq_first.substr(ref_start_index, ref_end_index - ref_start_index); int read_start_index = queryindex - subtract; int read_end_index = queryindex + addition; string read = query.substr(read_start_index, read_end_index - read_start_index); fp_detail << "Ref:: " << reference << endl; fp_detail << "Read: " << read << endl << endl; /////////////////////////////////////////////////////////////////////////////////// string merged_string = ""; string merged_quality = ""; for(int i = ref_start_index, k = read_start_index; i < ref_end_index && k < read_end_index; i++, k++) { if(quality_first.at(i) > quality_second.at(k)) { merged_string += readseq_first.at(i); merged_quality += quality_first.at(i); } else { merged_string += query.at(k); merged_quality += quality_second.at(k); } } assert(merged_string.length() == merged_quality.length()); //fp_detail << "Merged:: " << merged_string << endl; //fp_detail << "Quality: " << merged_quality << endl << endl; string str_first_part = "", str_second_part = ""; string quality_first_part = "", quality_second_part = ""; if(ref_start_index >= read_start_index) //if(read_start_index == 0) { str_first_part = readseq_first.substr(0, ref_start_index); quality_first_part = quality_first.substr(0, ref_start_index); str_second_part = query.substr(read_end_index, query.length() - read_end_index); quality_second_part = quality_second.substr(read_end_index, query.length() - read_end_index); } else //if(ref_start_index == 0) { str_first_part = query.substr(0, read_start_index); quality_first_part = quality_second.substr(0, read_start_index); str_second_part = readseq_first.substr(ref_end_index, readseq_first.length() - ref_end_index); quality_second_part = quality_first.substr(ref_end_index, readseq_first.length() - ref_end_index); } /* fp_detail << "First string:: " << str_first_part << endl; fp_detail << "First quality: " << quality_first_part << endl << endl; fp_detail << "Second string:: " << str_second_part << endl; fp_detail << "Second quality: " << quality_second_part << endl << endl; */ fp_detail << "Total length (Only substitution) = " << (str_first_part.length() + merged_string.length() + str_second_part.length()) << endl << endl; string string_alignment = str_first_part + merged_string + str_second_part; string quality = quality_first_part + merged_quality + quality_second_part; fp_detail << "Alignment: " << string_alignment << endl; fp_detail << "Quality::: " << quality << endl << endl; assert(str_first_part.length() == quality_first_part.length()); assert(str_second_part.length() == quality_second_part.length()); assert(string_alignment.length() == quality.length()); //***************************************************************************************// int ref_iterator = ref_start_index; int read_iterator = read_start_index; int str1_end = 0, str2_end = 0; vector<pair<char, char> > alignment; find_kband_similarity(reference, read, alignment, str1_end, str2_end, matrix); merged_string = ""; merged_quality = ""; for(int i = alignment.size() - 1; i >= 0; i--) { if(alignment[i].first != '-' && alignment[i].second != '-') { if(quality_first.at(ref_iterator) > quality_second.at(read_iterator)) { merged_string += alignment[i].first; merged_quality += quality_first.at(ref_iterator); } else { merged_string += alignment[i].second; merged_quality += quality_second.at(read_iterator); } ref_iterator += 1; read_iterator += 1; } else if(alignment[i].first != alignment[i].second && alignment[i].second == '-') { merged_string += alignment[i].first; merged_quality += quality_first.at(ref_iterator); ref_iterator += 1; } else if(alignment[i].first != alignment[i].second && alignment[i].first == '-') { merged_string += alignment[i].second; merged_quality += quality_second.at(read_iterator); read_iterator += 1; } else { assert(false); } } fp_detail << "Total length (Including indels) = " << (str_first_part.length() + merged_string.length() + str_second_part.length()) << endl << endl; string_alignment = str_first_part + merged_string + str_second_part; quality = quality_first_part + merged_quality + quality_second_part; fp_detail << "Alignment: " << string_alignment << endl; fp_detail << "Quality::: " << quality << endl << endl; readsequence = string_alignment; readquality = quality; assert(readsequence.length() == readquality.length()); return; }