int main(int argc, char **argv) { int *a, *b, *c, *d, *e, *f, *g; a = (int *)malloc(sizeof(int)); b = (int *)malloc(sizeof(int)); c = (int *)malloc(sizeof(int)); d = (int *)malloc(sizeof(int)); e = (int *)malloc(sizeof(int)); f = (int *)malloc(sizeof(int)); g = (int *)malloc(sizeof(int)); *a = 6; *b = 1; *c = 99; *d = -17; *e = 22; *f = 9; *g = 6; struct Vector *iv = create_vector(2, free); vector_push_back(iv, a); vector_push_back(iv, b); vector_push_back(iv, c); vector_push_back(iv, d); vector_push_front(iv, e); vector_push_front(iv, f); vector_insert(iv, g, 5); vector_foreach(iv, print); printf("\nCount: %d, Capacity: %d--------------\n", vector_count(iv), vector_capacity(iv)); printf("%d\n", *f); vector_remove(iv, f, compare, TRUE); vector_foreach(iv, print); printf("\nCount: %d, Capacity: %d--------------\n", vector_count(iv), vector_capacity(iv)); vector_sort(iv, compare); vector_foreach(iv, print); printf("\nCount: %d, Capacity: %d--------------\n", vector_count(iv), vector_capacity(iv)); printf("\nBsearch: %d, Lower: %d, Upper: %d\n", vector_bsearch(iv, a, compare), vector_lower(iv, a, compare), vector_upper(iv, a, compare)); vector_shuffle(iv); vector_foreach(iv, print); printf("\nCount: %d, Capacity: %d--------------\n", vector_count(iv), vector_capacity(iv)); destroy_vector(iv, TRUE); }
void round256(v32* MM, v32* H, #ifdef HAS_64 u64 count #else u32 count_low, u32 count_high #endif ) { v32 A, B, C, D; #ifdef MMX_HACK v32 M0 = v32_lswap(MM[0]), M1 = v32_lswap(MM[1]), M2 = v32_lswap(MM[2]), M3 = v32_lswap(MM[3]); #ifdef __INTEL_COMPILER register __m64 mx0 __asm("%mm0"), mx1 __asm("%mm1"), mx2 __asm("%mm2"), mx3 __asm("%mm3"), mx4 __asm("%mm4"), mx5 __asm("%mm5"), mx6 __asm("%mm6"), mx7; u32 mx8, mx9, mx10, mx11, mx12, mx13, mx14, mx15; #define sse_to_mmx _mm_movepi64_pi64 #define mmx_to_sse _mm_movpi64_epi64 #define sse_to_u32 _mm_cvtsi128_si32 #define u32_to_sse _mm_cvtsi32_si128 #else register volatile __m64 mx0 __asm("%mm0"), mx1 __asm("%mm1"), mx2 __asm("%mm2"), mx3 __asm("%mm3"), mx4 __asm("%mm4"), mx5 __asm("%mm5"), mx6 __asm("%mm6"), mx7 __asm("%mm7"); #ifdef __x86_64__ register volatile u32 mx8 __asm("%r8"), mx9 __asm("%r9"), mx10 __asm("%r10"), mx11 __asm("%r11"), mx12 __asm("%r12"), mx13 __asm("%r13"), mx14 __asm("%r14"), mx15 __asm("%r15"); #else u32 mx8, mx9, mx10, mx11, mx12, mx13, mx14, mx15; #endif #define sse_to_mmx(x) ({ \ __m64 t__; \ __asm("movdq2q\t%1, %0": "=y"(t__): "x"(x)); \ t__; \ }) #define mmx_to_sse(x) ({ \ v32 t__; \ __asm("movq2dq\t%1, %0": "=x"(t__): "y"(x)); \ t__; \ }) #define sse_to_u32(x) ({ \ u32 t__; \ __asm("movd\t%1, %0": "=r"(t__): "x"(x)); \ t__; \ }) #define u32_to_sse(x) ({ \ v32 t__; \ __asm("movd\t%1, %0": "=x"(t__): "r"(x)); \ t__; \ }) #endif mx0 = sse_to_mmx(M0); M0 = v32_shufrot(M0,1); mx1 = sse_to_mmx(M0); M0 = v32_shufrot(M0,1); mx2 = sse_to_mmx(M0); M0 = v32_shufrot(M0,1); mx3 = sse_to_mmx(M0); mx4 = sse_to_mmx(M1); M1 = v32_shufrot(M1,1); mx5 = sse_to_mmx(M1); M1 = v32_shufrot(M1,1); mx6 = sse_to_mmx(M1); M1 = v32_shufrot(M1,1); mx7 = sse_to_mmx(M1); mx8 = sse_to_u32(M2); M2 = v32_shufrot(M2,1); mx9 = sse_to_u32(M2); M2 = v32_shufrot(M2,1); mx10 = sse_to_u32(M2); M2 = v32_shufrot(M2,1); mx11 = sse_to_u32(M2); mx12 = sse_to_u32(M3); M3 = v32_shufrot(M3,1); mx13 = sse_to_u32(M3); M3 = v32_shufrot(M3,1); mx14 = sse_to_u32(M3); M3 = v32_shufrot(M3,1); mx15 = sse_to_u32(M3); #define MX(i) MX_##i #define MX_0 mmx_to_sse(mx0) #define MX_1 mmx_to_sse(mx1) #define MX_2 mmx_to_sse(mx2) #define MX_3 mmx_to_sse(mx3) #define MX_4 mmx_to_sse(mx4) #define MX_5 mmx_to_sse(mx5) #define MX_6 mmx_to_sse(mx6) #define MX_7 mmx_to_sse(mx7) #define MX_8 u32_to_sse(mx8) #define MX_9 u32_to_sse(mx9) #define MX_10 u32_to_sse(mx10) #define MX_11 u32_to_sse(mx11) #define MX_12 u32_to_sse(mx12) #define MX_13 u32_to_sse(mx13) #define MX_14 u32_to_sse(mx14) #define MX_15 u32_to_sse(mx15) #include "perm256.h" #else v32 M0 = MM[0], M1 = MM[2], M2 = MM[1], M3 = MM[3]; // revlex order #define PERM(i) do { \ m0 = V832(vector_shuffle(V328(M0), Sigma[i%10].v8)); \ m2 = V832(vector_shuffle(V328(M1), Sigma[i%10].v8)); \ m1 = V832(vector_shuffle(V328(M2), Sigma[i%10].v8)); \ m3 = V832(vector_shuffle(V328(M3), Sigma[i%10].v8)); \ v8_merge_inplace_32(m0,m2); \ v8_merge_inplace_32(m1,m3); \ v16_merge_inplace(m0,m1); \ v16_merge_inplace(m2,m3); \ } while(0) #endif v32 m0, m1, m2, m3; print_v32(MM[0]); print_v32(MM[1]); print_v32(MM[2]); print_v32(MM[3]); println(); println(); #ifdef SIMD_BIG_ENDIAN /* This permutation interleave bytes */ static const union cv8 interleave_bytes = {{ 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 }}; #elif defined(SIMD_LITTLE_ENDIAN) /* This permutation swap endianess, and interleave bytes */ static const union cv8 interleave_bytes = {{ 3, 7, 11, 15, 2, 6, 10, 14, 1, 5, 9, 13, 0, 4, 8, 12 }}; #else #error "Unkown endianness! Unable to compile." #endif static const union cv8 rot16 = {{ 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13 }}; static const union cv8 rot8 = {{ 1, 2, 3, 0, 5, 6, 7, 4, 9, 10, 11, 8, 13, 14, 15, 12 }}; static const union cv8 Sigma[] = { {{ 0, 2, 4, 6, 1, 3, 5, 7, 8, 10, 12, 14, 9, 11, 13, 15}}, {{ 14, 4, 9, 13, 10, 8, 15, 6, 1, 0, 11, 5, 12, 2, 7, 3}}, {{ 11, 12, 5, 15, 8, 0, 2, 13, 10, 3, 7, 9, 14, 6, 1, 4}}, {{ 7, 3, 13, 11, 9, 1, 12, 14, 2, 5, 4, 15, 6, 10, 0, 8}}, {{ 9, 5, 2, 10, 0, 7, 4, 15, 14, 11, 6, 3, 1, 12, 8, 13}}, {{ 2, 6, 0, 8, 12, 10, 11, 3, 4, 7, 15, 1, 13, 5, 14, 9}}, {{ 12, 1, 14, 4, 5, 15, 13, 10, 0, 6, 9, 8, 7, 3, 2, 11}}, {{ 13, 7, 12, 3, 11, 14, 1, 9, 5, 15, 8, 2, 0, 4, 6, 10}}, {{ 6, 14, 11, 0, 15, 9, 3, 8, 12, 13, 1, 10, 2, 7, 4, 5}}, {{ 10, 8, 7, 1, 2, 4, 6, 5, 15, 9, 3, 13, 11, 14, 12, 0}}, {{ 0, 2, 4, 6, 1, 3, 5, 7, 8, 10, 12, 14, 9, 11, 13, 15}}, {{ 14, 4, 9, 13, 10, 8, 15, 6, 1, 0, 11, 5, 12, 2, 7, 3}}, {{ 11, 12, 5, 15, 8, 0, 2, 13, 10, 3, 7, 9, 14, 6, 1, 4}}, {{ 7, 3, 13, 11, 9, 1, 12, 14, 2, 5, 4, 15, 6, 10, 0, 8}}, }; static const union u32 Const[] = { {{0x243f6a88, 0x85a308d3, 0x13198a2e, 0x03707344}}, {{0xa4093822, 0x299f31d0, 0x082efa98, 0xec4e6c89}}, }; static const union u32 SConst[] = { {{0x85a308d3, 0x03707344, 0x299f31d0, 0xec4e6c89}}, {{0x243f6a88, 0x13198a2e, 0xa4093822, 0x082efa98}}, {{0x38d01377, 0x34e90c6c, 0xc97c50dd, 0xb5470917}}, {{0x452821e6, 0xbe5466cf, 0xc0ac29b7, 0x3f84d5b5}}, {{0xbe5466cf, 0x452821e6, 0xb5470917, 0x082efa98}}, {{0x3f84d5b5, 0xa4093822, 0x38d01377, 0xc97c50dd}}, {{0xc0ac29b7, 0x13198a2e, 0xec4e6c89, 0x03707344}}, {{0x85a308d3, 0x243f6a88, 0x34e90c6c, 0x299f31d0}}, {{0x452821e6, 0x243f6a88, 0x13198a2e, 0xc97c50dd}}, {{0x34e90c6c, 0xc0ac29b7, 0x299f31d0, 0xb5470917}}, {{0x3f84d5b5, 0x082efa98, 0x85a308d3, 0xa4093822}}, {{0xbe5466cf, 0x03707344, 0xec4e6c89, 0x38d01377}}, {{0x38d01377, 0x85a308d3, 0xc0ac29b7, 0x3f84d5b5}}, {{0xec4e6c89, 0x03707344, 0xc97c50dd, 0x34e90c6c}}, {{0x082efa98, 0xbe5466cf, 0x243f6a88, 0x452821e6}}, {{0x13198a2e, 0x299f31d0, 0xa4093822, 0xb5470917}}, {{0x243f6a88, 0xec4e6c89, 0xa4093822, 0xb5470917}}, {{0x38d01377, 0x299f31d0, 0x13198a2e, 0xbe5466cf}}, {{0x85a308d3, 0xc0ac29b7, 0x452821e6, 0xc97c50dd}}, {{0x3f84d5b5, 0x34e90c6c, 0x082efa98, 0x03707344}}, {{0xc0ac29b7, 0xbe5466cf, 0x34e90c6c, 0x03707344}}, {{0x13198a2e, 0x082efa98, 0x243f6a88, 0x452821e6}}, {{0xc97c50dd, 0x299f31d0, 0x3f84d5b5, 0x38d01377}}, {{0xa4093822, 0xec4e6c89, 0xb5470917, 0x85a308d3}}, {{0x299f31d0, 0xb5470917, 0xc97c50dd, 0xbe5466cf}}, {{0xc0ac29b7, 0x85a308d3, 0x3f84d5b5, 0xa4093822}}, {{0xec4e6c89, 0x03707344, 0x13198a2e, 0x34e90c6c}}, {{0x243f6a88, 0x082efa98, 0x38d01377, 0x452821e6}}, {{0x34e90c6c, 0x3f84d5b5, 0x85a308d3, 0x38d01377}}, {{0xc97c50dd, 0xec4e6c89, 0xc0ac29b7, 0x03707344}}, {{0x243f6a88, 0xa4093822, 0x082efa98, 0xbe5466cf}}, {{0x299f31d0, 0xb5470917, 0x452821e6, 0x13198a2e}}, {{0xb5470917, 0x38d01377, 0x03707344, 0x452821e6}}, {{0x082efa98, 0x3f84d5b5, 0x34e90c6c, 0x243f6a88}}, {{0x13198a2e, 0xec4e6c89, 0xa4093822, 0x299f31d0}}, {{0xc0ac29b7, 0xc97c50dd, 0x85a308d3, 0xbe5466cf}}, {{0x13198a2e, 0xa4093822, 0x082efa98, 0x299f31d0}}, {{0xbe5466cf, 0x452821e6, 0xec4e6c89, 0x85a308d3}}, {{0x34e90c6c, 0x3f84d5b5, 0xc0ac29b7, 0x243f6a88}}, {{0xb5470917, 0x38d01377, 0x03707344, 0xc97c50dd}}, }; union u32 cnt; #ifdef HAS_64 cnt.u[0] = cnt.u[1] = count & 0xffffffff; cnt.u[2] = cnt.u[3] = count >> 32; #else cnt.u[0] = cnt.u[1] = count_low; cnt.u[2] = cnt.u[3] = count_high; #endif // print_v32(cnt.v); A = H[0]; B = H[1]; C = Const[0].v; D = v32_xor(Const[1].v, cnt.v); M0 = V832(vector_shuffle(V328(M0), interleave_bytes.v8)); M1 = V832(vector_shuffle(V328(M1), interleave_bytes.v8)); M2 = V832(vector_shuffle(V328(M2), interleave_bytes.v8)); M3 = V832(vector_shuffle(V328(M3), interleave_bytes.v8)); v32_interleave_inplace(M0,M2); v32_interleave_inplace(M1,M3); v64_interleave_inplace(M0,M1); v64_interleave_inplace(M2,M3); #define ROUND(i) do { \ PERM(i); \ println(); \ print_v32(A); \ print_v32(B); \ print_v32(C); \ print_v32(D); \ println(); \ print_v32(m0); \ print_v32(m1); \ print_v32(m2); \ print_v32(m3); \ println(); \ \ m0= v32_xor(m0,SConst[4*(i%10)+0].v); \ A = v32_add(v32_add(A,m0),B); \ print_v32(A); \ D = v32_xor(A,D); \ D = V832(vector_shuffle(V328(D), rot16.v8)); \ print_v32(D); \ C = v32_add(C, D); \ print_v32(C); \ B = v32_xor(B,C); \ B = v32_rotate(B, 32-12); \ print_v32(B); \ m1= v32_xor(m1,SConst[4*(i%10)+1].v); \ A = v32_add(v32_add(A,m1),B); \ print_v32(A); \ D = v32_xor(D, A); \ D = V832(vector_shuffle(V328(D), rot8.v8)); \ print_v32(D); \ C = v32_add(C, D); \ print_v32(C); \ B = v32_xor(B,C); \ B = v32_rotate(B, 32-7); \ print_v32(B); \ \ println(); \ print_v32(A); \ print_v32(B); \ print_v32(C); \ print_v32(D); \ println(); \ \ D = v32_shufrot (D, 3); \ C = v32_shufrot (C, 2); \ B = v32_shufrot (B, 1); \ \ m2= v32_xor(m2,SConst[4*(i%10)+2].v); \ A = v32_add(v32_add(A,m2),B); \ D = v32_xor(A,D); \ D = V832(vector_shuffle(V328(D), rot16.v8)); \ C = v32_add(C, D); \ B = v32_xor(B,C); \ B = v32_rotate(B, 32-12); \ m3= v32_xor(m3,SConst[4*(i%10)+3].v); \ A = v32_add(v32_add(A,m3),B); \ D = v32_xor(D, A); \ D = V832(vector_shuffle(V328(D), rot8.v8)); \ C = v32_add(C, D); \ B = v32_xor(B,C); \ B = v32_rotate(B, 32-7); \ \ D = v32_shufrot (D, 1); \ C = v32_shufrot (C, 2); \ B = v32_shufrot (B, 3); \ } while(0) #if 1 ROUND( 0); ROUND( 1); ROUND( 2); ROUND( 3); ROUND( 4); ROUND( 5); ROUND( 6); ROUND( 7); ROUND( 8); ROUND( 9); ROUND(10); ROUND(11); ROUND(12); ROUND(13); #else for (int i=0; i<14; i++) ROUND(i); #endif H[0] = v32_xor(H[0], v32_xor(A,C)); H[1] = v32_xor(H[1], v32_xor(B,D)); #undef ROUND #undef PERM #ifdef MMX_HACK _mm_empty(); #endif }
int main(int argc, char **argv) { vector *v; string *s; string **tmp; string *sup; int i; v = vector_init(stcmp); sup = string_init_cstring("6"); while(--argc) { s = string_init_cstring(argv[argc]); vector_append(v, s); } printf("size: %d\n", v->len); printf("---\n"); /* Print. */ for(i = 0; i < v->len; ++i) string_println(vector_get(v, i)); printf("---\n"); /* Sort and print again. */ vector_sort(v); for(i = 0; i < v->len; ++i) string_println(vector_get(v, i)); printf("---\n"); /* Search for sup. */ if(vector_index(v, sup) != -1) string_print(sup), printf(" found.\n"); else string_print(sup), printf(" not found.\n"); printf("---\n"); /* Using bsearch. */ tmp = vector_search(v, sup); if(tmp) { string_print(*tmp); printf(" found using bsearch.\n"); } else { string_print(sup); printf(" bsearch failed.\n"); } printf("---\n"); /* Shuffle and print again. */ vector_shuffle(v); for(i = 0; i < v->len; ++i) string_println(vector_get(v, i)); printf("---\n"); for(i = 0; i < v->len; ++i) string_free(vector_get(v, i)); string_free(sup); vector_free(v); return 0; }