Esempio n. 1
0
char* find_lcs(const char *a, int na, const char *b, int nb) {
	if (na > nb) {
		const char *c; int t;
		c = a, a = b, b = c;
		t = na, na = nb, nb = t;
	}

	if (na == 0)
		return alloc_str(1);

	if (na == 1) {
		for (int i = 0; i < nb; i++) {
			if (a[0] == b[i])
				return substr(a, 0, 1);
		}
		return alloc_str(1);
	}

	static int t1[MAXN];
	static int t2[MAXN];
	int len = lcs_len(a, na, b, nb, t1);
	if (len == 0)
		return alloc_str(1);
	int half_len = na / 2;
	char *la = substr(a, 0, half_len);
	char *ra = substr(a, half_len, na - half_len);
	char *tb = reverse(b, nb);
	char *ta = reverse(ra, na - half_len);
	lcs_len(la, half_len, b, nb, t1);
	lcs_len(ta, na - half_len, tb, nb, t2);
	
	int split = -1;
	for (int i = 0; i <= nb; i++) {
		if (t1[i] + t2[nb-i] == len) {
			split = i;
			break;
		}
	}
	
	char *lb = substr(b, 0, split);
	char *rb = substr(b, split, nb - split);
	char *sl = find_lcs(la, half_len, lb, split);
	char *sr = find_lcs(ra, na - half_len, rb, nb - split);
	char *ret = cat(sl, sr);
	free(la), free(ra), free(ta);
	free(lb), free(rb), free(tb);
	free(sl), free(sr);
	return ret;
}
Esempio n. 2
0
static int histogram_diff(xpparam_t const *xpp, xdfenv_t *env,
	int line1, int count1, int line2, int count2)
{
	struct region lcs;
	int lcs_found;
	int result;
redo:
	result = -1;

	if (count1 <= 0 && count2 <= 0)
		return 0;

	if (LINE_END(1) >= MAX_PTR)
		return -1;

	if (!count1) {
		while(count2--)
			env->xdf2.rchg[line2++ - 1] = 1;
		return 0;
	} else if (!count2) {
		while(count1--)
			env->xdf1.rchg[line1++ - 1] = 1;
		return 0;
	}

	memset(&lcs, 0, sizeof(lcs));
	lcs_found = find_lcs(xpp, env, &lcs, line1, count1, line2, count2);
	if (lcs_found < 0)
		goto out;
	else if (lcs_found)
		result = fall_back_to_classic_diff(xpp, env, line1, count1, line2, count2);
	else {
		if (lcs.begin1 == 0 && lcs.begin2 == 0) {
			while (count1--)
				env->xdf1.rchg[line1++ - 1] = 1;
			while (count2--)
				env->xdf2.rchg[line2++ - 1] = 1;
			result = 0;
		} else {
			result = histogram_diff(xpp, env,
						line1, lcs.begin1 - line1,
						line2, lcs.begin2 - line2);
			if (result)
				goto out;
			/*
			 * result = histogram_diff(xpp, env,
			 *            lcs.end1 + 1, LINE_END(1) - lcs.end1,
			 *            lcs.end2 + 1, LINE_END(2) - lcs.end2);
			 * but let's optimize tail recursion ourself:
			*/
			count1 = LINE_END(1) - lcs.end1;
			line1 = lcs.end1 + 1;
			count2 = LINE_END(2) - lcs.end2;
			line2 = lcs.end2 + 1;
			goto redo;
		}
	}
out:
	return result;
}
Esempio n. 3
0
//---------------------------------------------------------------------------------------
bool ScoreComparer::are_equal(ImoScore* pA, ImoScore* pB)
{
    //top level method: compares two scores and returns true if both are equal.
    //In case of differences the LCD (longest Common Subsequence) and the
    //SES (Shortest Edit Script) are computed.

    initialize();

    m_pScoreA = pA;
    m_pScoreB = pB;

    m_N = encode(pA, &m_A);
    m_M = encode(pB, &m_B);

    if (encodings_are_equal())
        return true;    //both scores are equal

    //scores are different. Find LCS and SES
    allocate_v_matrix();
    find_lcs();
    if (m_D > 0)
    {
        find_optimal_path();
        compute_differences();
    }
    else
    {
        //nothing in common between both scores
        set_full_differences();
    }

    return false;
}
Esempio n. 4
0
int main()
{

	char *buffer1 = malloc(sizeof(char) * LEN+1);
	char *buffer2 = malloc(sizeof(char) * LEN+1);

	/*
	int i = 0;
	for(i=0;i<LEN;i++){	
		buffer1[i] = rand () % 26 + 65; 	
		buffer2[i] = rand () % 26 + 65; 	
	}
	*/
	bzero(buffer1,LEN+1);
	bzero(buffer2,LEN+1);
	sprintf(buffer1,"%s\n","Hi,hello world");
	sprintf(buffer2,"%s\n","he world he");
	
	printf("%s\n",buffer1);
	printf("%s\n",buffer2);

	LCS lcs = find_lcs(buffer1,strlen(buffer1),buffer2,strlen(buffer2));
	char buffer[1024] ;
	bzero(buffer,1024);
	memcpy(buffer,lcs.start,lcs.len);
	printf("len:%d %s\n",lcs.len,buffer);
	free(buffer1);
	free(buffer2);
	return 0;
}
Esempio n. 5
0
int main(){
    int count1, count2;
    char* arr1;
    char* arr2;
    int i;
    
    //get the inputs
    printf("Enter count1<tab>count2 : \n");
    scanf("%d\t%d", &count1, &count2);
    
    arr1 = (char*)malloc(sizeof(char)*count1);
    arr2 = (char*)malloc(sizeof(char)*count2);
    
    printf("Enter string1 : \n");
    scanf("%s", arr1);
    
    printf("Enter string2 : \n");
    scanf("%s", arr2);
    
    //form lcs object
    lcs* l = initialize_lcs(count1, count2, arr1, arr2);
    
    //find the LCS
    int res = find_lcs(l);
    
    printf("RESULT : %d\n", res);
    
    getchar();
    getchar();
    return 0;
}
Esempio n. 6
0
int main() {
	static char A[MAXN], B[MAXN];
	int dp[MAXN];
	while (scanf("%s %s", A, B) == 2) {
		int na = strlen(A);
		int nb = strlen(B);
		int len = lcs_len(A, na, B, nb, dp);
		char *str = find_lcs(A, na, B, nb);
		printf("%d\n", len);
		printf("%s\n", str);
		free(str);
	}
	return 0;
}
Esempio n. 7
0
static int histogram_diff(xpparam_t const *xpp, xdfenv_t *env,
	int line1, int count1, int line2, int count2)
{
	struct histindex index;
	struct region lcs;
	int sz;
	int result = -1;

	if (count1 <= 0 && count2 <= 0)
		return 0;

	if (LINE_END(1) >= MAX_PTR)
		return -1;

	if (!count1) {
		while(count2--)
			env->xdf2.rchg[line2++ - 1] = 1;
		return 0;
	} else if (!count2) {
		while(count1--)
			env->xdf1.rchg[line1++ - 1] = 1;
		return 0;
	}

	memset(&index, 0, sizeof(index));

	index.env = env;
	index.xpp = xpp;

	index.records = NULL;
	index.line_map = NULL;
	/* in case of early xdl_cha_free() */
	index.rcha.head = NULL;

	index.table_bits = xdl_hashbits(count1);
	sz = index.records_size = 1 << index.table_bits;
	sz *= sizeof(struct record *);
	if (!(index.records = (struct record **) xdl_malloc(sz)))
		goto cleanup;
	memset(index.records, 0, sz);

	sz = index.line_map_size = count1;
	sz *= sizeof(struct record *);
	if (!(index.line_map = (struct record **) xdl_malloc(sz)))
		goto cleanup;
	memset(index.line_map, 0, sz);

	sz = index.line_map_size;
	sz *= sizeof(unsigned int);
	if (!(index.next_ptrs = (unsigned int *) xdl_malloc(sz)))
		goto cleanup;
	memset(index.next_ptrs, 0, sz);

	/* lines / 4 + 1 comes from xprepare.c:xdl_prepare_ctx() */
	if (xdl_cha_init(&index.rcha, sizeof(struct record), count1 / 4 + 1) < 0)
		goto cleanup;

	index.ptr_shift = line1;
	index.max_chain_length = 64;

	memset(&lcs, 0, sizeof(lcs));
	if (find_lcs(&index, &lcs, line1, count1, line2, count2))
		result = fall_back_to_classic_diff(&index, line1, count1, line2, count2);
	else {
		if (lcs.begin1 == 0 && lcs.begin2 == 0) {
			while (count1--)
				env->xdf1.rchg[line1++ - 1] = 1;
			while (count2--)
				env->xdf2.rchg[line2++ - 1] = 1;
			result = 0;
		} else {
			result = histogram_diff(xpp, env,
						line1, lcs.begin1 - line1,
						line2, lcs.begin2 - line2);
			if (result)
				goto cleanup;
			result = histogram_diff(xpp, env,
						lcs.end1 + 1, LINE_END(1) - lcs.end1,
						lcs.end2 + 1, LINE_END(2) - lcs.end2);
			if (result)
				goto cleanup;
		}
	}

cleanup:
	xdl_free(index.records);
	xdl_free(index.line_map);
	xdl_free(index.next_ptrs);
	xdl_cha_free(&index.rcha);

	return result;
}
void referenceless_alignment(string& readseq_first, string& readseq_second, string& quality_first, string& quality_second, 
				string& readsequence, string& readquality, cell **matrix, ofstream& fp_detail)
{
	int rind = -1, qind = -1, len;
	int refindex = -1, queryindex = -1, max_len;

	assert(readseq_first.length() == quality_first.length());
	assert(readseq_second.length() == quality_second.length());

	find_lcs(readseq_first, readseq_second, refindex, queryindex,  max_len);	
	if(refindex == -1 || queryindex == -1)
		return;


	string query = reverse_complement(readseq_second);
	find_lcs(readseq_first, query, rind, qind, len);
	if(rind == -1 || qind == -1)
		return;

	if(max_len < len)
	{
		max_len = len;
		refindex = rind;
		queryindex = qind;
		reverse_str(quality_second);
	}
	else
	{
		query = readseq_second;
	}

	assert(query.length() == quality_second.length());

	fp_detail << "Reference Index = " << refindex << endl;
	fp_detail << "Query Index = " << queryindex << endl;
	fp_detail << "Substring = " << query.substr(queryindex, max_len) << endl;
	fp_detail << "Longest Common Substring Length: " << max_len << endl << endl;

	int subtract = min(refindex, queryindex);
	int addition = min(readseq_first.length() - refindex, query.length() - queryindex);
	
	//fp_detail << "subtract from refindex = " << subtract << endl;
	//fp_detail << "addition to refindex = " << addition << endl;

	int ref_start_index = refindex - subtract;
	int ref_end_index = refindex + addition;
	string reference = readseq_first.substr(ref_start_index, ref_end_index - ref_start_index);

	int read_start_index = queryindex - subtract;
	int read_end_index = queryindex + addition;
	string read = query.substr(read_start_index, read_end_index - read_start_index);

	fp_detail << "Ref:: " << reference << endl;
	fp_detail << "Read: " << read << endl << endl;

	///////////////////////////////////////////////////////////////////////////////////
	string merged_string = "";
	string merged_quality = "";
	for(int i = ref_start_index, k = read_start_index; i < ref_end_index && k < read_end_index; i++, k++)
	{
		if(quality_first.at(i) > quality_second.at(k))
		{
			merged_string += readseq_first.at(i);
			merged_quality += quality_first.at(i);
		}
		else
		{
			merged_string += query.at(k);
			merged_quality += quality_second.at(k); 
		}
	}

	assert(merged_string.length() == merged_quality.length());

	//fp_detail << "Merged:: " << merged_string << endl;
	//fp_detail << "Quality: " << merged_quality << endl << endl;

	string str_first_part = "", str_second_part = "";
	string quality_first_part = "", quality_second_part = "";

	if(ref_start_index >= read_start_index)
	//if(read_start_index == 0)
	{
		str_first_part = readseq_first.substr(0, ref_start_index);
		quality_first_part = quality_first.substr(0, ref_start_index);
		
		str_second_part = query.substr(read_end_index, query.length() - read_end_index);
		quality_second_part = quality_second.substr(read_end_index, query.length() - read_end_index);
	}
	else
	//if(ref_start_index == 0)
	{
		str_first_part = query.substr(0, read_start_index);
		quality_first_part = quality_second.substr(0, read_start_index);

		str_second_part = readseq_first.substr(ref_end_index, readseq_first.length() - ref_end_index);
		quality_second_part = quality_first.substr(ref_end_index, readseq_first.length() - ref_end_index);
	}
	/*
	fp_detail << "First string:: " << str_first_part << endl;
	fp_detail << "First quality: " << quality_first_part << endl << endl;

	fp_detail << "Second string:: " << str_second_part << endl;
	fp_detail << "Second quality: " << quality_second_part << endl << endl;
	*/
	fp_detail << "Total length (Only substitution) = " << (str_first_part.length() + merged_string.length() + str_second_part.length()) << endl << endl;

	string string_alignment = str_first_part + merged_string + str_second_part;
	string quality = quality_first_part + merged_quality + quality_second_part;
	fp_detail << "Alignment: " << string_alignment << endl;
	fp_detail << "Quality::: " << quality << endl << endl;

	assert(str_first_part.length() == quality_first_part.length());
	assert(str_second_part.length() == quality_second_part.length());
	assert(string_alignment.length() == quality.length());

	//***************************************************************************************//
		
	int ref_iterator = ref_start_index;
	int read_iterator = read_start_index;
	int str1_end = 0, str2_end = 0;
	vector<pair<char, char> > alignment;

	find_kband_similarity(reference, read, alignment, str1_end, str2_end, matrix);

	merged_string = "";
	merged_quality = "";

	for(int i = alignment.size() - 1; i >= 0; i--)
	{
		if(alignment[i].first != '-' && alignment[i].second != '-')
		{
			if(quality_first.at(ref_iterator) > quality_second.at(read_iterator))
			{
				merged_string += alignment[i].first;
				merged_quality += quality_first.at(ref_iterator);
			}
			else
			{
				merged_string += alignment[i].second;
				merged_quality += quality_second.at(read_iterator);
			}
			ref_iterator += 1;
			read_iterator += 1;
		}
		else if(alignment[i].first != alignment[i].second && alignment[i].second == '-')
		{
			merged_string += alignment[i].first;
			merged_quality += quality_first.at(ref_iterator);
			ref_iterator += 1;
		}
		else if(alignment[i].first != alignment[i].second && alignment[i].first == '-')
		{
			merged_string += alignment[i].second;
			merged_quality += quality_second.at(read_iterator);
			read_iterator += 1;
		}
		else
		{
			assert(false);
		}
	}

	fp_detail << "Total length (Including indels) = " << (str_first_part.length() + merged_string.length() + str_second_part.length()) << endl << endl;

	string_alignment = str_first_part + merged_string + str_second_part;
	quality = quality_first_part + merged_quality + quality_second_part;
	fp_detail << "Alignment: " << string_alignment << endl;
	fp_detail << "Quality::: " << quality << endl << endl;

	readsequence = string_alignment;
	readquality = quality;
	
	assert(readsequence.length() == readquality.length());

	return;
}