Example #1
0
static u64 siphash(const u8 key[16], const unsigned char *m, const u64 n)
{
	u64 v0, v1, v2, v3;
	u64 k0, k1;
	u64 mi, mask, len;
	size_t i, k;

	k0 = *((u64*)(key + 0));
	k1 = *((u64*)(key + 8));

	v0 = k0 ^ 0x736f6d6570736575ULL;
	v1 = k1 ^ 0x646f72616e646f6dULL;
	v2 = k0 ^ 0x6c7967656e657261ULL;
	v3 = k1 ^ 0x7465646279746573ULL;

#define HALF_ROUND(a,b,c,d,s,t) \
	do \
	{ \
		a += b;  c += d; \
		b = rotl64(b, s); d = rotl64(d, t); \
		b ^= a;  d ^= c; \
	} while(0)

#define COMPRESS(v0,v1,v2,v3) \
	do \
	{ \
		HALF_ROUND(v0,v1,v2,v3,13,16); \
		v0 = rotl64(v0,32); \
		HALF_ROUND(v2,v1,v0,v3,17,21); \
		v2 = rotl64(v2, 32); \
	} while(0)

	for(i = 0; i < (n-n%8); i += 8)
	{
		mi = *((u64*)(m + i));
		v3 ^= mi;
		for(k = 0; k < SIPHASH_ROUNDS; ++k) COMPRESS(v0,v1,v2,v3);
		v0 ^= mi;
	}

	mi = *((u64*)(m + i));
	len = (n&0xff) << 56;
	mask = n%8 == 0 ? 0 : 0xffffffffffffffffULL >> (8*(8-n%8));
	mi = (mi&mask) ^ len;

	v3 ^= mi;
	for(k = 0; k < SIPHASH_ROUNDS; ++k) COMPRESS(v0,v1,v2,v3);
	v0 ^= mi;
	
	v2 ^= 0xff;
	for(k = 0; k < SIPHASH_FINALROUNDS; ++k) COMPRESS(v0,v1,v2,v3);

#undef COMPRESS
#undef HALF_ROUND
	return (v0 ^ v1) ^ (v2 ^ v3);
}
void LengauerTarjan::COMPRESS(uint32_t vertexV){
    if(ancestor[ancestor[vertexV]] != 0){
        COMPRESS(ancestor[vertexV]);
        if(semi[label[ancestor[vertexV]]] < semi[label[vertexV]]){
            label[vertexV] = label[ancestor[vertexV]];
        }
        ancestor[vertexV] = ancestor[ancestor[vertexV]];
    }
}
uint32_t LengauerTarjan::EVAL(uint32_t vertexV){
    if(ancestor[vertexV] == 0){
        return label[vertexV];
    }
    COMPRESS(vertexV);
    if(semi[label[ancestor[vertexV]]] >= semi[label[vertexV]]){
        return label[vertexV];
    }
    return label[ancestor[vertexV]];
}
static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
			   int offset)
{
	int ret;
	u32 store_offset;
	size_t clen;
	struct zobj_header *zheader;
	struct page *page, *page_store;
	unsigned char *user_mem, *cmem, *src, *uncmem = NULL;

	page = bvec->bv_page;
	src = zram->compress_buffer;

	if (is_partial_io(bvec)) {
		/*
		 * This is a partial IO. We need to read the full page
		 * before to write the changes.
		 */
		uncmem = kmalloc(PAGE_SIZE, GFP_KERNEL);
		if (!uncmem) {
			pr_info("Error allocating temp memory!\n");
			ret = -ENOMEM;
			goto out;
		}
		ret = zram_read_before_write(zram, uncmem, index);
		if (ret) {
			kfree(uncmem);
			goto out;
		}
	}

	/*
	 * System overwrites unused sectors. Free memory associated
	 * with this sector now.
	 */
	if (zram->table[index].page ||
	    zram_test_flag(zram, index, ZRAM_ZERO))
		zram_free_page(zram, index);

	user_mem = kmap_atomic(page, KM_USER0);

	if (is_partial_io(bvec))
		memcpy(uncmem + offset, user_mem + bvec->bv_offset,
		       bvec->bv_len);
	else
		uncmem = user_mem;

	if (page_zero_filled(uncmem)) {
		kunmap_atomic(user_mem, KM_USER0);
		if (is_partial_io(bvec))
			kfree(uncmem);
		zram_stat_inc(&zram->stats.pages_zero);
		zram_set_flag(zram, index, ZRAM_ZERO);
		ret = 0;
		goto out;
	}

	COMPRESS(uncmem, PAGE_SIZE, src, &clen,
		zram->compress_workmem);
	ret = 0;

	kunmap_atomic(user_mem, KM_USER0);
	if (is_partial_io(bvec))
			kfree(uncmem);

	if (unlikely(ret != 0)) {
		pr_err("Compression failed! err=%d\n", ret);
		goto out;
	}

	/*
	 * Page is incompressible. Store it as-is (uncompressed)
	 * since we do not want to return too many disk write
	 * errors which has side effect of hanging the system.
	 */
	if (unlikely(clen > max_zpage_size)) {
		clen = PAGE_SIZE;
		page_store = alloc_page(GFP_NOIO | __GFP_HIGHMEM);
		if (unlikely(!page_store)) {
			pr_info("Error allocating memory for "
				"incompressible page: %u\n", index);
			ret = -ENOMEM;
			goto out;
		}

		store_offset = 0;
		zram_set_flag(zram, index, ZRAM_UNCOMPRESSED);
		zram_stat_inc(&zram->stats.pages_expand);
		zram->table[index].page = page_store;
		src = kmap_atomic(page, KM_USER0);
		goto memstore;
	}

	if (xv_malloc(zram->mem_pool, clen + sizeof(*zheader),
		      &zram->table[index].page, &store_offset,
		      GFP_NOIO | __GFP_HIGHMEM)) {
		pr_info("Error allocating memory for compressed "
			"page: %u, size=%zu\n", index, clen);
		ret = -ENOMEM;
		goto out;
	}

memstore:
	zram->table[index].offset = store_offset;

	cmem = kmap_atomic(zram->table[index].page, KM_USER1) +
		zram->table[index].offset;

#if 0
	/* Back-reference needed for memory defragmentation */
	if (!zram_test_flag(zram, index, ZRAM_UNCOMPRESSED)) {
		zheader = (struct zobj_header *)cmem;
		zheader->table_idx = index;
		cmem += sizeof(*zheader);
	}
#endif

	memcpy(cmem, src, clen);

	kunmap_atomic(cmem, KM_USER1);
	if (unlikely(zram_test_flag(zram, index, ZRAM_UNCOMPRESSED)))
		kunmap_atomic(src, KM_USER0);

	/* Update stats */
	zram_stat64_add(zram, &zram->stats.compr_size, clen);
	zram_stat_inc(&zram->stats.pages_stored);
	if (clen <= PAGE_SIZE / 2)
		zram_stat_inc(&zram->stats.good_compress);

	return 0;

out:
	if (ret)
		zram_stat64_inc(zram, &zram->stats.failed_writes);
	return ret;
}
Example #5
0
//------------------------------EVAL-------------------------------------------
Tarjan *Tarjan::EVAL() {
  if( !_ancestor ) return _label;
  COMPRESS();
  return (_ancestor->_label->_semi >= _label->_semi) ? _label : _ancestor->_label;
}
Example #6
0
static void
md2_compress(MD2Context *cx)
{
	int j;
	unsigned char P;
	P = cx->checksum[MD2_CHECKSUM_SIZE-1];
	/* Compute the running checksum, and set the tmp variables to be 
	 * CV[i] XOR input[i] 
	 */
#define CKSUMFN(n) \
	P = cx->checksum[n] ^ MD2S[cx->X[MD2_INPUT+n] ^ P]; \
	cx->checksum[n] = P; \
	cx->X[MD2_TMPVARS+n] = cx->X[n] ^ cx->X[MD2_INPUT+n];
	CKSUMFN(0);
	CKSUMFN(1);
	CKSUMFN(2);
	CKSUMFN(3);
	CKSUMFN(4);
	CKSUMFN(5);
	CKSUMFN(6);
	CKSUMFN(7);
	CKSUMFN(8);
	CKSUMFN(9);
	CKSUMFN(10);
	CKSUMFN(11);
	CKSUMFN(12);
	CKSUMFN(13);
	CKSUMFN(14);
	CKSUMFN(15);
	/* The compression function. */
#define COMPRESS(n) \
	P = cx->X[n] ^ MD2S[P]; \
	cx->X[n] = P;
	P = 0x00;
	for (j=0; j<18; j++) {
		COMPRESS(0);
		COMPRESS(1);
		COMPRESS(2);
		COMPRESS(3);
		COMPRESS(4);
		COMPRESS(5);
		COMPRESS(6);
		COMPRESS(7);
		COMPRESS(8);
		COMPRESS(9);
		COMPRESS(10);
		COMPRESS(11);
		COMPRESS(12);
		COMPRESS(13);
		COMPRESS(14);
		COMPRESS(15);
		COMPRESS(16);
		COMPRESS(17);
		COMPRESS(18);
		COMPRESS(19);
		COMPRESS(20);
		COMPRESS(21);
		COMPRESS(22);
		COMPRESS(23);
		COMPRESS(24);
		COMPRESS(25);
		COMPRESS(26);
		COMPRESS(27);
		COMPRESS(28);
		COMPRESS(29);
		COMPRESS(30);
		COMPRESS(31);
		COMPRESS(32);
		COMPRESS(33);
		COMPRESS(34);
		COMPRESS(35);
		COMPRESS(36);
		COMPRESS(37);
		COMPRESS(38);
		COMPRESS(39);
		COMPRESS(40);
		COMPRESS(41);
		COMPRESS(42);
		COMPRESS(43);
		COMPRESS(44);
		COMPRESS(45);
		COMPRESS(46);
		COMPRESS(47);
		P = (P + j) % 256;
	}
	cx->unusedBuffer = MD2_BUFSIZE;
}
Example #7
0
u64 hashable_siphash24_sse2(u64 ik0, u64 ik1, const u8 *m, size_t n)
{
	__m128i v0, v1, v2, v3;
	__m128i k0, k1;
	__m128i mi, mask, len;
	size_t i, k;
	union { u64 gpr; __m128i xmm; } hash;
	const u8 *p;

	/* We used to use the _mm_seti_epi32 intrinsic to initialize
	   SSE2 registers. This compiles to a movdqa instruction,
	   which requires 16-byte alignment. On 32-bit Windows, it
	   looks like ghc's runtime linker doesn't align ".rdata"
	   sections as requested, so we got segfaults for our trouble.

	   Now we use an intrinsic that cares less about alignment
	   (_mm_loadu_si128, aka movdqu) instead, and all seems
	   happy. */

	static const u32 const iv[6][4] = {
		{ 0x70736575, 0x736f6d65, 0, 0 },
		{ 0x6e646f6d, 0x646f7261, 0, 0 },
		{ 0x6e657261, 0x6c796765, 0, 0 },
		{ 0x79746573, 0x74656462, 0, 0 },
		{ -1, -1, 0, 0 },
		{ 255, 0, 0, 0 },
	};

	k0 = _mm_loadl_epi64((__m128i*)(&ik0));
	k1 = _mm_loadl_epi64((__m128i*)(&ik1));

	v0 = _mm_xor_si128(k0, _mm_loadu_si128((__m128i*) &iv[0]));
	v1 = _mm_xor_si128(k1, _mm_loadu_si128((__m128i*) &iv[1]));
	v2 = _mm_xor_si128(k0, _mm_loadu_si128((__m128i*) &iv[2]));
	v3 = _mm_xor_si128(k1, _mm_loadu_si128((__m128i*) &iv[3]));

#define HALF_ROUND(a,b,c,d,s,t) \
	do \
	{ \
		a = _mm_add_epi64(a, b);  c = _mm_add_epi64(c, d); \
		b = _mm_roti_epi64(b, s); d = _mm_roti_epi64(d, t); \
		b = _mm_xor_si128(b, a);  d = _mm_xor_si128(d, c); \
	} while(0)

#define COMPRESS(v0,v1,v2,v3) \
	do \
	{ \
		HALF_ROUND(v0,v1,v2,v3,13,16); \
		v0 = _mm_shufflelo_epi16(v0, _MM_SHUFFLE(1,0,3,2)); \
		HALF_ROUND(v2,v1,v0,v3,17,21); \
		v2 = _mm_shufflelo_epi16(v2, _MM_SHUFFLE(1,0,3,2)); \
	} while(0)

	for(i = 0; i < (n-n%8); i += 8)
	{
		mi = _mm_loadl_epi64((__m128i*)(m + i));
		v3 = _mm_xor_si128(v3, mi);
		if (SIPHASH_ROUNDS == 2) {
			COMPRESS(v0,v1,v2,v3); COMPRESS(v0,v1,v2,v3);
		} else {
			for (k = 0; k < SIPHASH_ROUNDS; ++k)
				COMPRESS(v0,v1,v2,v3);
		}
		v0 = _mm_xor_si128(v0, mi);
	}

	p = m + n;

	/* We must be careful to not trigger a segfault by reading an
	   unmapped page. So where is the end of our input? */

	if (((uintptr_t) p & 4095) == 0)
		/* Exactly at a page boundary: do not read past the end. */
		mi = _mm_setzero_si128();
	else if (((uintptr_t) p & 4095) <= 4088)
		/* Inside a page: safe to read past the end, as we'll
		   mask out any bits we shouldn't have looked at below. */
		mi = _mm_loadl_epi64((__m128i*)(m + i));
	else
		/* Within 8 bytes of the end of a page: ensure that
		   our final read re-reads some bytes so that we do
		   not cross the page boundary, then shift our result
		   right so that the re-read bytes vanish. */
		mi = _mm_srli_epi64(_mm_loadl_epi64((__m128i*)(((uintptr_t) m + i) & ~7)),
				    8 * (((uintptr_t) m + i) % 8));

	len = _mm_set_epi32(0, 0, (n&0xff) << 24, 0);
	mask = _mm_srli_epi64(_mm_loadu_si128((__m128i*) &iv[4]), 8*(8-n%8));
	mi = _mm_xor_si128(_mm_and_si128(mi, mask), len);

	v3 = _mm_xor_si128(v3, mi);
	if (SIPHASH_ROUNDS == 2) {
		COMPRESS(v0,v1,v2,v3); COMPRESS(v0,v1,v2,v3);
	} else {
		for (k = 0; k < SIPHASH_ROUNDS; ++k)
			COMPRESS(v0,v1,v2,v3);
	}
	v0 = _mm_xor_si128(v0, mi);

	v2 = _mm_xor_si128(v2, _mm_loadu_si128((__m128i*) &iv[5]));
	if (SIPHASH_FINALROUNDS == 4) {
		COMPRESS(v0,v1,v2,v3); COMPRESS(v0,v1,v2,v3);
		COMPRESS(v0,v1,v2,v3); COMPRESS(v0,v1,v2,v3);
	} else {
		for (k = 0; k < SIPHASH_FINALROUNDS; ++k)
			COMPRESS(v0,v1,v2,v3);
	}

	v0 = _mm_xor_si128(_mm_xor_si128(v0, v1), _mm_xor_si128(v2, v3));
	hash.xmm = v0;

#undef COMPRESS
#undef HALF_ROUND
	//return _mm_extract_epi32(v0, 0) | (((u64)_mm_extract_epi32(v0, 1)) << 32);
	return hash.gpr;
}
Example #8
0
u64 hashable_siphash24_sse2(u64 ik0, u64 ik1, const u8 *m, size_t n)
{
	__m128i v0, v1, v2, v3;
	__m128i k0, k1;
	__m128i mi, mask, len;
	size_t i, k;
	union { u64 gpr; __m128i xmm; } hash;

	/* We used to use the _mm_seti_epi32 intrinsic to initialize
	   SSE2 registers. This compiles to a movdqa instruction,
	   which requires 16-byte alignment. On 32-bit Windows, it
	   looks like ghc's runtime linker doesn't align ".rdata"
	   sections as requested, so we got segfaults for our trouble.

	   Now we use an intrinsic that cares less about alignment
	   (_mm_loadu_si128, aka movdqu) instead, and all seems
	   happy. */

	static const u32 const iv[6][4] = {
		{ 0x70736575, 0x736f6d65, 0, 0 },
		{ 0x6e646f6d, 0x646f7261, 0, 0 },
		{ 0x6e657261, 0x6c796765, 0, 0 },
		{ 0x79746573, 0x74656462, 0, 0 },
		{ -1, -1, 0, 0 },
		{ 255, 0, 0, 0 },
	};

	k0 = _mm_loadl_epi64((__m128i*)(&ik0));
	k1 = _mm_loadl_epi64((__m128i*)(&ik1));

	v0 = _mm_xor_si128(k0, _mm_loadu_si128((__m128i*) &iv[0]));
	v1 = _mm_xor_si128(k1, _mm_loadu_si128((__m128i*) &iv[1]));
	v2 = _mm_xor_si128(k0, _mm_loadu_si128((__m128i*) &iv[2]));
	v3 = _mm_xor_si128(k1, _mm_loadu_si128((__m128i*) &iv[3]));

#define HALF_ROUND(a,b,c,d,s,t) \
	do \
	{ \
		a = _mm_add_epi64(a, b);  c = _mm_add_epi64(c, d); \
		b = _mm_roti_epi64(b, s); d = _mm_roti_epi64(d, t); \
		b = _mm_xor_si128(b, a);  d = _mm_xor_si128(d, c); \
	} while(0)

#define COMPRESS(v0,v1,v2,v3) \
	do \
	{ \
		HALF_ROUND(v0,v1,v2,v3,13,16); \
		v0 = _mm_shufflelo_epi16(v0, _MM_SHUFFLE(1,0,3,2)); \
		HALF_ROUND(v2,v1,v0,v3,17,21); \
		v2 = _mm_shufflelo_epi16(v2, _MM_SHUFFLE(1,0,3,2)); \
	} while(0)

	for(i = 0; i < (n-n%8); i += 8)
	{
		mi = _mm_loadl_epi64((__m128i*)(m + i));
		v3 = _mm_xor_si128(v3, mi);
		if (SIPHASH_ROUNDS == 2) {
			COMPRESS(v0,v1,v2,v3); COMPRESS(v0,v1,v2,v3);
		} else {
			for (k = 0; k < SIPHASH_ROUNDS; ++k)
				COMPRESS(v0,v1,v2,v3);
		}
		v0 = _mm_xor_si128(v0, mi);
	}

	mi = _mm_loadl_epi64((__m128i*)(m + i));
	len = _mm_set_epi32(0, 0, (n&0xff) << 24, 0);
	mask = _mm_srli_epi64(_mm_loadu_si128((__m128i*) &iv[4]), 8*(8-n%8));
	mi = _mm_xor_si128(_mm_and_si128(mi, mask), len);

	v3 = _mm_xor_si128(v3, mi);
	if (SIPHASH_ROUNDS == 2) {
		COMPRESS(v0,v1,v2,v3); COMPRESS(v0,v1,v2,v3);
	} else {	
		for (k = 0; k < SIPHASH_ROUNDS; ++k)
			COMPRESS(v0,v1,v2,v3);
	}
	v0 = _mm_xor_si128(v0, mi);

	v2 = _mm_xor_si128(v2, _mm_loadu_si128((__m128i*) &iv[5]));
	if (SIPHASH_FINALROUNDS == 4) {
		COMPRESS(v0,v1,v2,v3); COMPRESS(v0,v1,v2,v3);
		COMPRESS(v0,v1,v2,v3); COMPRESS(v0,v1,v2,v3);
	} else {
		for (k = 0; k < SIPHASH_FINALROUNDS; ++k)
			COMPRESS(v0,v1,v2,v3);
	}

	v0 = _mm_xor_si128(_mm_xor_si128(v0, v1), _mm_xor_si128(v2, v3));
	hash.xmm = v0;

#undef COMPRESS
#undef HALF_ROUND
	//return _mm_extract_epi32(v0, 0) | (((u64)_mm_extract_epi32(v0, 1)) << 32);
	return hash.gpr;
}
Example #9
0
static u64 siphash(const u8 key[16], const unsigned char *m, const u64 n)
{
	__m128i v0, v1, v2, v3;
	__m128i k0, k1;
	__m128i mi, mask, len;
	size_t i, k;
	union { u64 gpr; __m128i xmm; } hash;

	k0 = _mm_loadl_epi64((__m128i*)(key + 0));
	k1 = _mm_loadl_epi64((__m128i*)(key + 8));

	v0 = _mm_xor_si128(k0, _mm_set_epi32(0, 0, 0x736f6d65, 0x70736575));
	v1 = _mm_xor_si128(k1, _mm_set_epi32(0, 0, 0x646f7261, 0x6e646f6d));
	v2 = _mm_xor_si128(k0, _mm_set_epi32(0, 0, 0x6c796765, 0x6e657261));
	v3 = _mm_xor_si128(k1, _mm_set_epi32(0, 0, 0x74656462, 0x79746573));

#define HALF_ROUND(a,b,c,d,s,t) \
	do \
	{ \
		a = _mm_add_epi64(a, b);  c = _mm_add_epi64(c, d); \
		b = _mm_roti_epi64(b, s); d = _mm_roti_epi64(d, t); \
		b = _mm_xor_si128(b, a);  d = _mm_xor_si128(d, c); \
	} while(0)

#define COMPRESS(v0,v1,v2,v3) \
	do \
	{ \
		HALF_ROUND(v0,v1,v2,v3,13,16); \
		v0 = _mm_shufflelo_epi16(v0, _MM_SHUFFLE(1,0,3,2)); \
		HALF_ROUND(v2,v1,v0,v3,17,21); \
		v2 = _mm_shufflelo_epi16(v2, _MM_SHUFFLE(1,0,3,2)); \
	} while(0)

	for(i = 0; i < (n-n%8); i += 8)
	{
		mi = _mm_loadl_epi64((__m128i*)(m + i));
		v3 = _mm_xor_si128(v3, mi);
		for(k = 0; k < SIPHASH_ROUNDS; ++k) COMPRESS(v0,v1,v2,v3);
		v0 = _mm_xor_si128(v0, mi);
	}

	mi = _mm_loadl_epi64((__m128i*)(m + i)); 
	len = _mm_set_epi32(0, 0, (n&0xff) << 24, 0);
	mask = _mm_srli_epi64(_mm_set_epi32(0, 0, 0xffffffff, 0xffffffff), 8*(8-n%8));
	mi = _mm_xor_si128(_mm_and_si128(mi, mask), len);

	v3 = _mm_xor_si128(v3, mi);
	for(k = 0; k < SIPHASH_ROUNDS; ++k) COMPRESS(v0,v1,v2,v3);
	v0 = _mm_xor_si128(v0, mi);
	
	v2 = _mm_xor_si128(v2, _mm_set_epi32(0, 0, 0, 0xff));
	for(k = 0; k < SIPHASH_FINALROUNDS; ++k) COMPRESS(v0,v1,v2,v3);

	v0 = _mm_xor_si128(_mm_xor_si128(v0, v1), _mm_xor_si128(v2, v3));
	hash.xmm = v0;

#undef COMPRESS
#undef HALF_ROUND
	//return _mm_extract_epi32(v0, 0) | (((u64)_mm_extract_epi32(v0, 1)) << 32);
	return hash.gpr;
}