bool test_r_str_ichr(void) {
	char* test = "rrrrrradare2";
	char* out = r_str_ichr (test, 'r');
	mu_assert_streq (out, "adare2",
			"string after the first non-r character in rrrrrradare2");
	mu_end;
}
Beispiel #2
0
R_API int r_diff_buffers_static(RDiff *d, const ut8 *a, int la, const ut8 *b, int lb) {
	int i, len;
	int hit = 0;
	la = R_ABS(la);
	lb = R_ABS(lb);
	if (la != lb) {
	 	len = R_MIN(la, lb);
		fprintf(stderr,
			"Buffer truncated to %d bytes (%d not compared)\n",
			len, R_ABS(lb-la));
	} else len = la;
	for(i = 0; i<len; i++) {
		if (a[i]!=b[i]) {
			hit++;
		} else {
			if (hit>0) {
				struct r_diff_op_t o = {
					.a_off = d->off_a+i-hit, .a_buf = a+i-hit, .a_len = hit,
					.b_off = d->off_b+i-hit, .b_buf = b+i-hit, .b_len = hit
				};
				d->callback (d, d->user, &o);
				hit = 0;
			}
		}
	}
	if (hit>0) {
		struct r_diff_op_t o = {
			.a_off = d->off_a+i-hit, .a_buf = a+i-hit, .a_len = hit,
			.b_off = d->off_b+i-hit, .b_buf = b+i-hit, .b_len = hit
		};
		d->callback (d, d->user, &o);
		hit = 0;
	}
	return 0;
}

// XXX: temporary files are
R_API int r_diff_buffers_radiff(RDiff *d, const ut8 *a, int la, const ut8 *b, int lb) {
	char *ptr, *str, buf[64], oop = 0;
	int ret, atl, btl, hit;
	ut8 at[128], bt[128];
	ut64 ooa, oob;
	FILE *fd;

	hit = atl = btl = 0;
	ooa = oob = 0LL;
	oop = -1;

	r_file_dump (".a", a, la, 0);
	r_file_dump (".b", b, lb, 0);
	r_sys_cmd ("radiff -d .a .b | rsc uncolor > .d");
	fd = fopen (".d", "r");
	if (!fd) return 0;

	while (!feof (fd)) {
		ut64 oa, ob; // offset
		int ba, bb = 0; // byte
		char op; // operation

		oa = ob = 0LL;
		if (!fgets (buf, 63, fd))
			break;
		if (feof (fd))
			break;
		str = buf;

		ptr = strchr (buf, ' ');
		if (!ptr) continue;
		*ptr='\0';
		sscanf (str, "0x%08"PFMT64x"", &oa);

		str = r_str_ichr (ptr+1, ' ');
		if (*str!='|'&&*str!='>'&&*str!='<') {
			ptr = strchr (str, ' ');
			if (!ptr) continue;
			*ptr='\0';
			sscanf (str, "%02x", &ba);
		} else ba = 0;

		str = r_str_ichr (ptr+1, ' ');
		ptr = strchr (str, ' ');
		if (!ptr) continue;
		*ptr='\0';
		sscanf (str, "%c", &op);

		str = r_str_ichr (ptr+1, ' ');
		if (str[0]!='0' || str[1]!='x') {
			ptr = strchr(str, ' ');
			if (!ptr) continue;
			*ptr = '\0';
			sscanf (str, "%02x", &bb);
		}

		str = ptr+1;
		ptr = strchr (str, '\n');
		if (!ptr) continue;
		*ptr='\0';
		sscanf (str, "0x%08"PFMT64x"", &ob);

		if (oop == op || oop==-1) {
			if (hit == 0) {
				ooa = oa;
				oob = ob;
			}
			at[atl] = ba;
			bt[btl] = bb;
			switch (op) {
			case '|':
				atl++;
				btl++;
				break;
			case '>':
				btl++;
				break;
			case '<':
				atl++;
				break;
			}
			hit++;
		} else {
			if (hit>0) {
				struct r_diff_op_t o = {
					.a_off = ooa, .a_buf = at, .a_len = atl,
					.b_off = oob, .b_buf = bt, .b_len = btl
				};
				ret = d->callback(d, d->user, &o);
				if (!ret)
					break;
				atl = btl = 0;
				hit = 0;
			}
		}
		oop = op;
	}
	if (hit > 0) {
		struct r_diff_op_t o = {
			.a_off = ooa, .a_buf = at, .a_len = atl,
			.b_off = oob, .b_buf = bt, .b_len = btl
		};
		if (!d->callback (d, d->user, &o)) {
			fclose (fd);
			return 0;
		}
		atl = btl = 0;
		hit = 0;
	}
	fclose (fd);
	unlink (".a");
	unlink (".b");
	unlink (".d");
	return 0;
}

R_API int r_diff_buffers(RDiff *d, const ut8 *a, ut32 la, const ut8 *b, ut32 lb) {
	if (d->delta) {
		return r_diff_buffers_delta (d, a, la, b, lb);
	}
	return r_diff_buffers_static (d, a, la, b, lb);
}

/* TODO: Move into r_util maybe? */
R_API bool r_diff_buffers_distance(RDiff *d, const ut8 *a, ut32 la, const ut8 *b, ut32 lb, ut32 *distance, double *similarity) {
	const bool verbose = d? d->verbose: false;
	/*
	More memory efficient version on Levenshtein Distance from:
	https://en.wikipedia.org/wiki/Levenshtein_distance
	http://www.codeproject.com/Articles/13525/Fast-memory-efficient-Levenshtein-algorithm
	ObM..

	8/July/2016 - More time efficient Levenshtein Distance. Now runs in about O(N*sum(MDistance)) instead of O(NM)
	In real world testing the speedups for similar files are immense. Processing of
	radiff2 -sV routerA/firmware_extract/bin/httpd routerB/firmware_extract/bin/httpd
	reduced from 28 hours to about 13 minutes.
	*/
	int i, j;
	const ut8 *aBufPtr;
	const ut8 *bBufPtr;
	ut32 aLen;
	ut32 bLen;

	// temp pointer will be used to switch v0 and v1 after processing the inner loop.
	int *temp;
	int *v0, *v1;

	// We need these variables outside the context of the loops as we need to
	// survive multiple loop iterations.
	// start and stop are used in our inner loop
	// colMin tells us the current 'best' edit distance.
	// extendStop & extendStart are used when we get 'double up' edge conditions
	// that require us to keep some more data.
	int start = 0;
	int stop = 0;
	int smallest;
	int colMin = 0;
	int extendStop = 0;
	int extendStart = 0;

	//we could move cost into the 'i' loop.
	int cost = 0;

	// loops can get very big, this can be removed, but it's currently in there for debugging
	// and optimisation testing.
	ut64 loops = 0;

	// We need the longest file to be 'A' because our optimisation tries to stop and start
	// around the diagonal.
	//  AAAAAAA
	// B*
	// B *
	// B  *____
	// if we have them the other way around and we terminate on the diagonal, we won't have
	// inspected all the bytes of file B..
	//  AAAA
	// B*
	// B *
	// B  *
	// B   *
	// B   ?

	if (la < lb) {
		aBufPtr = b;
		bBufPtr = a;
		aLen = lb;
		bLen = la;
	} else {
		aBufPtr = a;
		bBufPtr = b;
		aLen = la;
		bLen = lb;
	}
	stop = bLen;
	// Preliminary tests

	//Do we have both files a & b, and are they at least one byte?
	if (!aBufPtr || !bBufPtr || aLen < 1 || bLen < 1) {
		return false;
	}

	//IF the files are the same size and are identical, then we have matching files
	if (aLen == bLen && !memcmp (aBufPtr, bBufPtr, aLen)) {
		if (distance) {
			*distance = 0;
		}
		if (similarity) {
			*similarity = 1.0;
		}
		return true;
	}
	// Only calloc if we have to do some processing

	// calloc v0 & v1 and check they initialised
	v0 = (int*) calloc ((bLen + 3), sizeof (int));
	if (!v0) {
		eprintf ("Error: cannot allocate %i bytes.", bLen + 3);
		return false;
	}

	v1 = (int*) calloc ((bLen + 3), sizeof (int));
	if (!v1) {
		eprintf ("Error: cannot allocate %i bytes", 2 * (bLen + 3));
		free (v0);
		return false;
	}

	// initialise v0 and v1.
	// With optimisiation we only strictly we only need to initialise v0[0..2]=0..2 & v1[0] = 1;
	for (i = 0; i < bLen + 1 ; i++) {
		v0[i] = i;
		v1[i] = i + 1;
	}

	// Outer loop = the length of the longest input file.
	for (i = 0; i < aLen; i++) {

		// We're going to stop the inner loop at:
		// bLen (so we don't run off the end of our array)
		// or 'two below the diagonal' PLUS any extension we need for 'double up' edge values
		// (see extendStop for logic)
		stop = R_MIN ((i + extendStop + 2), bLen);

		// We need a value in the result column (v1[start]).
		// If you look at the loop below, we need it because we look at v1[j] as one of the
		// potential shortest edit distances.
		// In all cases where the edit distance can't 'reach',
		// the value of v1[start] simply increments.
		if (start > bLen) {
			break;
		} 
		v1[start] = v0[start] + 1;

		// need to have a bigger number in colMin than we'll ever encounter in the inner loop
		colMin = aLen;

		// Inner loop does all the work:
		for (j = start; j <= stop; j++) {
			loops++;

			// The main levenshtein comparison:
			cost = (aBufPtr[i] == bBufPtr[j]) ? 0 : 1;
			smallest = R_MIN ((v1[j] + 1), (v0[j + 1] + 1));
			smallest = R_MIN (smallest, (v0[j] + cost));

			// populate the next two entries in v1.
			// only really required if this is the last loop.
			if (j + 2 > bLen + 3) {
				break;
			}
			v1[j + 1] = smallest;
			v1[j + 2] = smallest + 1;

			// If we have seen a smaller number, it's the new column Minimum
			colMin = R_MIN ((colMin), (smallest));

		}

		// We're going to start at i+1 next iteration
		// The column minimum is the current edit distance
		// This distance is the minimum 'search width' from the optimal 'i' diagonal
		// The extendStart picks up an edge case where we have a match on the first iteration
		// We update extendStart after we've set start for the next iteration.
		start = i + 1 - colMin - extendStart;

		// If the last processed entry is a match, AND
		// the current byte in 'a' and the previous processed entry in 'b' aren't a match
		// then we need to extend our search below the optimal 'i' diagonal. because we'll
		// have a vertical double up condition in our last two values of the results column.
		// j-2 is used because j++ increments prior to loop exit in the processing loop above.
		if (!cost && aBufPtr[i] != bBufPtr[j - 2]) {
			extendStop ++;
		}

		// If new start would be a match then we have a horizontal 'double up'
		// which means we need to keep an extra row of data
		// so don't increment the start counter this time, BUT keep
		// extendStart up our sleeves for next iteration.
		if (i + 1 < aLen && start < bLen && aBufPtr[i + 1] == bBufPtr[start]) {
			start --;
			extendStart ++;
		}
		//Switch v0 and v1 pointers via temp pointer
		temp = v0;
		v0 = v1;
		v1 = temp;

		//Print a processing update every 10K of outer loop
		if (verbose && i % 10000==0) {
			eprintf ("\rProcessing %d of %d\r", i, aLen);
		}
	}
	//Clean up output on loop exit (purely aesthetic)
	if (verbose) {
		eprintf ("\rProcessing %d of %d (loops=%llu)\n", i, aLen,loops);
	}
	if (distance) {
		// the final distance is the last byte we processed in the inner loop.
		// v0 is used instead of v1 because we switched the pointers before exiting the outer loop
		*distance = v0[stop];
		if (similarity) {
			double diff = (double) (*distance) / (double) (R_MAX (aLen, bLen));
			*similarity = (double)1 - diff;
		}
	}
	free (v0);
	free (v1);
	return true;
}
Beispiel #3
0
R_API int r_diff_buffers_static(RDiff *d, const ut8 *a, int la, const ut8 *b, int lb) {
	int i, len;
	int hit = 0;
	la = R_ABS(la);
	lb = R_ABS(lb);
	if (la != lb) {
	 	len = R_MIN(la, lb);
		fprintf(stderr,
			"Buffer truncated to %d bytes (%d not compared)\n",
			len, R_ABS(lb-la));
	} else len = la;
	for(i = 0; i<len; i++) {
		if (a[i]!=b[i]) {
			hit++;
		} else {
			if (hit>0) {
				struct r_diff_op_t o = {
					.a_off = d->off_a+i-hit, .a_buf = a+i-hit, .a_len = hit,
					.b_off = d->off_b+i-hit, .b_buf = b+i-hit, .b_len = hit
				};
				d->callback (d, d->user, &o);
				hit = 0;
			}
		}
	}
	if (hit>0) {
		struct r_diff_op_t o = {
			.a_off = d->off_a+i-hit, .a_buf = a+i-hit, .a_len = hit,
			.b_off = d->off_b+i-hit, .b_buf = b+i-hit, .b_len = hit
		};
		d->callback (d, d->user, &o);
		hit = 0;
	}
	return 0;
}

// XXX: temporary files are
R_API int r_diff_buffers_radiff(RDiff *d, const ut8 *a, int la, const ut8 *b, int lb) {
	char *ptr, *str, buf[64], oop = 0;
	int ret, atl, btl, hit;
	ut8 at[128], bt[128];
	ut64 ooa, oob;
	FILE *fd;

	hit = atl = btl = 0;
	ooa = oob = 0LL;
	oop = -1;

	r_file_dump (".a", a, la, 0);
	r_file_dump (".b", b, lb, 0);
	r_sys_cmd ("radiff -d .a .b | rsc uncolor > .d");
	fd = fopen (".d", "r");

	while (!feof (fd)) {
		ut64 oa, ob; // offset
		int ba, bb = 0; // byte
		char op; // operation

		oa = ob = 0LL;
		if (!fgets (buf, 63, fd))
			break;
		if (feof (fd))
			break;
		str = buf;

		ptr = strchr (buf, ' ');
		if (!ptr) continue;
		*ptr='\0';
		sscanf (str, "0x%08"PFMT64x"", &oa);

		str = r_str_ichr (ptr+1, ' ');
		if (*str!='|'&&*str!='>'&&*str!='<') {
			ptr = strchr (str, ' ');
			if (!ptr) continue;
			*ptr='\0';
			sscanf (str, "%02x", &ba);
		} else ba = 0;

		str = r_str_ichr (ptr+1, ' ');
		ptr = strchr (str, ' ');
		if (!ptr) continue;
		*ptr='\0';
		sscanf (str, "%c", &op);

		str = r_str_ichr (ptr+1, ' ');
		if (str[0]!='0' || str[1]!='x') {
			ptr = strchr(str, ' ');
			if (!ptr) continue;
			*ptr = '\0';
			sscanf (str, "%02x", &bb);
		}

		str = ptr+1;
		ptr = strchr (str, '\n');
		if (!ptr) continue;
		*ptr='\0';
		sscanf (str, "0x%08"PFMT64x"", &ob);

		if (oop == op || oop==-1) {
			if (hit == 0) {
				ooa = oa;
				oob = ob;
			}
			at[atl] = ba;
			bt[btl] = bb;
			switch (op) {
			case '|':
				atl++;
				btl++;
				break;
			case '>':
				btl++;
				break;
			case '<':
				atl++;
				break;
			}
			hit++;
		} else {
			if (hit>0) {
				struct r_diff_op_t o = {
					.a_off = ooa, .a_buf = at, .a_len = atl,
					.b_off = oob, .b_buf = bt, .b_len = btl
				};
				ret = d->callback(d, d->user, &o);
				if (!ret)
					break;
				atl = btl = 0;
				hit = 0;
			}
		}
		oop = op;
	}
	if (hit>0) {
		struct r_diff_op_t o = {
			.a_off = ooa, .a_buf = at, .a_len = atl,
			.b_off = oob, .b_buf = bt, .b_len = btl
		};
		if (!d->callback (d, d->user, &o)) {
			fclose (fd);
			return 0;
		}
		atl = btl = 0;
		hit = 0;
	}
	fclose (fd);
	unlink (".a");
	unlink (".b");
	unlink (".d");
	return 0;
}

R_API int r_diff_buffers(RDiff *d, const ut8 *a, ut32 la, const ut8 *b, ut32 lb) {
	if (d->delta)
		return r_diff_buffers_delta (d, a, la, b, lb);
	return r_diff_buffers_static (d, a, la, b, lb);
}

/* TODO: Move into r_util maybe? */
R_API int r_diff_buffers_distance(RDiff *d, const ut8 *a, ut32 la, const ut8 *b, ut32 lb,
		ut32 *distance, double *similarity) {
	int i, j, cost, tmin, **m;

	if (!a || !b || la < 1 || lb < 1)
		return R_FALSE;

	if ((m = malloc ((la+1) * sizeof(int*))) == NULL)
		return R_FALSE;
	for(i = 0; i <= la; i++)
		if ((m[i] = malloc ((lb+1) * sizeof(int))) == NULL) {
			while (i--)
				free (m[i]);
			free (m);
			return R_FALSE;
		}

	for (i = 0; i <= la; i++)
		m[i][0] = i;
	for (j = 0; j <= lb; j++)
		m[0][j] = j;

	for (i = 1; i <= la; i++) {
		for (j = 1; j <= lb; j++) {
			if (a[i-1] == b[j-1])
				cost = 0;
			else cost = 1;
			tmin = R_MIN (m[i-1][j] + 1, m[i][j-1] + 1);
			m[i][j] = R_MIN (tmin, m[i-1][j-1] + cost);
		}
	}

	if (distance != NULL)
		*distance = m[la][lb];
	if (similarity != NULL)
		*similarity = (double)1 - (double)(m[la][lb])/(double)(R_MAX(la, lb));

	for(i = 0; i <= la; i++)
		free (m[i]);
	free (m);

	return R_TRUE;
}
Beispiel #4
0
R_API int r_diff_buffers_static(RDiff *d, const ut8 *a, int la, const ut8 *b, int lb) {
	int i, len;
	int hit = 0;
	la = R_ABS(la);
	lb = R_ABS(lb);
	if (la != lb) {
	 	len = R_MIN(la, lb);
		fprintf(stderr,
			"Buffer truncated to %d bytes (%d not compared)\n",
			len, R_ABS(lb-la));
	} else len = la;
	for(i = 0; i<len; i++) {
		if (a[i]!=b[i]) {
			hit++;
		} else {
			if (hit>0) {
				struct r_diff_op_t o = {
					.a_off = d->off_a+i-hit, .a_buf = a+i-hit, .a_len = hit,
					.b_off = d->off_b+i-hit, .b_buf = b+i-hit, .b_len = hit
				};
				d->callback (d, d->user, &o);
				hit = 0;
			}
		}
	}
	if (hit>0) {
		struct r_diff_op_t o = {
			.a_off = d->off_a+i-hit, .a_buf = a+i-hit, .a_len = hit,
			.b_off = d->off_b+i-hit, .b_buf = b+i-hit, .b_len = hit
		};
		d->callback (d, d->user, &o);
		hit = 0;
	}
	return 0;
}

// XXX: temporary files are
R_API int r_diff_buffers_radiff(RDiff *d, const ut8 *a, int la, const ut8 *b, int lb) {
	char *ptr, *str, buf[64], oop = 0;
	int ret, atl, btl, hit;
	ut8 at[128], bt[128];
	ut64 ooa, oob;
	FILE *fd;

	hit = atl = btl = 0;
	ooa = oob = 0LL;
	oop = -1;

	r_file_dump (".a", a, la, 0);
	r_file_dump (".b", b, lb, 0);
	r_sys_cmd ("radiff -d .a .b | rsc uncolor > .d");
	fd = fopen (".d", "r");
	if (!fd) return 0;

	while (!feof (fd)) {
		ut64 oa, ob; // offset
		int ba, bb = 0; // byte
		char op; // operation

		oa = ob = 0LL;
		if (!fgets (buf, 63, fd))
			break;
		if (feof (fd))
			break;
		str = buf;

		ptr = strchr (buf, ' ');
		if (!ptr) continue;
		*ptr='\0';
		sscanf (str, "0x%08"PFMT64x"", &oa);

		str = r_str_ichr (ptr+1, ' ');
		if (*str!='|'&&*str!='>'&&*str!='<') {
			ptr = strchr (str, ' ');
			if (!ptr) continue;
			*ptr='\0';
			sscanf (str, "%02x", &ba);
		} else ba = 0;

		str = r_str_ichr (ptr+1, ' ');
		ptr = strchr (str, ' ');
		if (!ptr) continue;
		*ptr='\0';
		sscanf (str, "%c", &op);

		str = r_str_ichr (ptr+1, ' ');
		if (str[0]!='0' || str[1]!='x') {
			ptr = strchr(str, ' ');
			if (!ptr) continue;
			*ptr = '\0';
			sscanf (str, "%02x", &bb);
		}

		str = ptr+1;
		ptr = strchr (str, '\n');
		if (!ptr) continue;
		*ptr='\0';
		sscanf (str, "0x%08"PFMT64x"", &ob);

		if (oop == op || oop==-1) {
			if (hit == 0) {
				ooa = oa;
				oob = ob;
			}
			at[atl] = ba;
			bt[btl] = bb;
			switch (op) {
			case '|':
				atl++;
				btl++;
				break;
			case '>':
				btl++;
				break;
			case '<':
				atl++;
				break;
			}
			hit++;
		} else {
			if (hit>0) {
				struct r_diff_op_t o = {
					.a_off = ooa, .a_buf = at, .a_len = atl,
					.b_off = oob, .b_buf = bt, .b_len = btl
				};
				ret = d->callback(d, d->user, &o);
				if (!ret)
					break;
				atl = btl = 0;
				hit = 0;
			}
		}
		oop = op;
	}
	if (hit > 0) {
		struct r_diff_op_t o = {
			.a_off = ooa, .a_buf = at, .a_len = atl,
			.b_off = oob, .b_buf = bt, .b_len = btl
		};
		if (!d->callback (d, d->user, &o)) {
			fclose (fd);
			return 0;
		}
		atl = btl = 0;
		hit = 0;
	}
	fclose (fd);
	unlink (".a");
	unlink (".b");
	unlink (".d");
	return 0;
}

R_API int r_diff_buffers(RDiff *d, const ut8 *a, ut32 la, const ut8 *b, ut32 lb) {
	if (d->delta) {
		return r_diff_buffers_delta (d, a, la, b, lb);
	}
	return r_diff_buffers_static (d, a, la, b, lb);
}

/* TODO: Move into r_util maybe? */
R_API bool r_diff_buffers_distance(RDiff *d, const ut8 *a, ut32 la, const ut8 *b, ut32 lb, ut32 *distance, double *similarity) {
	const bool verbose = d? d->verbose: false;
	/* 
	More memory efficient version on Levenshtein Distance from:
	https://en.wikipedia.org/wiki/Levenshtein_distance
	http://www.codeproject.com/Articles/13525/Fast-memory-efficient-Levenshtein-algorithm
	ObM..
	*/
	int i, j;
	/* TODO: ensure those pointers are allocated */
	int *v0 = (int*) calloc ((lb + 1), sizeof (int));
	int *v1 = (int*) calloc ((lb + 1), sizeof (int));	
	
	if (!a || !b || la < 1 || lb < 1) {
		free (v0);
		free (v1);
		return false;
	}

	if (la == lb && !memcmp (a, b, la)) {
		if (distance) {
			*distance = 0;
		}
		if (similarity) {
			*similarity = 1.0;
		}
		free (v0);
		free (v1);
		return true;
	}

	for (i = 0; i < lb + 1 ; i++) {
		v0[i] = i;
	}

	for (i = 0; i < la; i++) {
		v1[0] = i + 1;

		for (j = 0; j < lb; j++) {
			int cost = (a[i] == b[j]) ? 0 : 1;
			int smallest = R_MIN ((v1[j] + 1), (v0[j + 1] + 1));
			smallest = R_MIN (smallest, (v0[j] + cost));
			v1[j + 1] = smallest;
		}

		for (j = 0; j < lb + 1; j++) {
			v0[j] = v1[j];
		}
		if (verbose && (i % 10000 == 0))
			eprintf ("Processing %d of %d\r", i, la - 1);
	}
	if (verbose) {
		eprintf ("\rProcessing %d of %d\n", i, la - 1);
	}
	
	if (distance) {
		*distance = v1[lb];
		if (similarity) {
			double diff = (double) (*distance) / (double) (R_MAX (la, lb));
			*similarity = (double)1 - diff;
		}
	}
	free (v0);
	free (v1);
	return true;
}