static void DoubleBlockSHA256(const void* pin, void* pad, const void *pre, unsigned int thash[9][NPAR], const void *init)
{
    unsigned int* In = (unsigned int*)pin;
    unsigned int* Pad = (unsigned int*)pad;
    unsigned int* hPre = (unsigned int*)pre;
    unsigned int* hInit = (unsigned int*)init;
    unsigned int /* i, j, */ k;

    /* vectors used in calculation */
    vector unsigned int w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, w13, w14, w15;
    vector unsigned int T1;
    vector unsigned int a, b, c, d, e, f, g, h;
    vector unsigned int nonce, preNonce;

    /* nonce offset for vector */
    vector unsigned int offset = (vector unsigned int)(0, 1, 2, 3);

    preNonce = vec_add((vector unsigned int)(In[3],In[3],In[3],In[3]), offset);

   for(k = 0; k<NPAR; k+=4)
   {
        w0 = (vector unsigned int)(In[0],In[0],In[0],In[0]);
        w1 = (vector unsigned int)(In[1],In[1],In[1],In[1]);
        w2 = (vector unsigned int)(In[2],In[2],In[2],In[2]);
        //w3 = (vector unsigned int)(In[3],In[3],In[3],In[3]); nonce will be later hacked into the hash
        w4 = (vector unsigned int)(In[4],In[4],In[4],In[4]);
        w5 = (vector unsigned int)(In[5],In[5],In[5],In[5]);
        w6 = (vector unsigned int)(In[6],In[6],In[6],In[6]);
        w7 = (vector unsigned int)(In[7],In[7],In[7],In[7]);
        w8 = (vector unsigned int)(In[8],In[8],In[8],In[8]);
        w9 = (vector unsigned int)(In[9],In[9],In[9],In[9]);
        w10 = (vector unsigned int)(In[10],In[10],In[10],In[10]);
        w11 = (vector unsigned int)(In[11],In[11],In[11],In[11]);
        w12 = (vector unsigned int)(In[12],In[12],In[12],In[12]);
        w13 = (vector unsigned int)(In[13],In[13],In[13],In[13]);
        w14 = (vector unsigned int)(In[14],In[14],In[14],In[14]);
        w15 = (vector unsigned int)(In[15],In[15],In[15],In[15]);

        /* hack nonce into lowest byte of w3 */
	nonce = vec_add(preNonce, (vector unsigned int)(k,k,k,k));

        w3 = nonce;
        //printf ("W3: %08vlx\n", w3);

        a = (vector unsigned int)(hPre[0],hPre[0],hPre[0],hPre[0]);
        b = (vector unsigned int)(hPre[1],hPre[1],hPre[1],hPre[1]);
        c = (vector unsigned int)(hPre[2],hPre[2],hPre[2],hPre[2]);
        d = (vector unsigned int)(hPre[3],hPre[3],hPre[3],hPre[3]);
        e = (vector unsigned int)(hPre[4],hPre[4],hPre[4],hPre[4]);
        f = (vector unsigned int)(hPre[5],hPre[5],hPre[5],hPre[5]);
        g = (vector unsigned int)(hPre[6],hPre[6],hPre[6],hPre[6]);
        h = (vector unsigned int)(hPre[7],hPre[7],hPre[7],hPre[7]);

        SHA256ROUND(a, b, c, d, e, f, g, h, 0, w0);
        SHA256ROUND(h, a, b, c, d, e, f, g, 1, w1);
        SHA256ROUND(g, h, a, b, c, d, e, f, 2, w2);
        SHA256ROUND(f, g, h, a, b, c, d, e, 3, w3);
        SHA256ROUND(e, f, g, h, a, b, c, d, 4, w4);
        SHA256ROUND(d, e, f, g, h, a, b, c, 5, w5);
        SHA256ROUND(c, d, e, f, g, h, a, b, 6, w6);
        SHA256ROUND(b, c, d, e, f, g, h, a, 7, w7);
        SHA256ROUND(a, b, c, d, e, f, g, h, 8, w8);
        SHA256ROUND(h, a, b, c, d, e, f, g, 9, w9);
        SHA256ROUND(g, h, a, b, c, d, e, f, 10, w10);
        SHA256ROUND(f, g, h, a, b, c, d, e, 11, w11);
        SHA256ROUND(e, f, g, h, a, b, c, d, 12, w12);
        SHA256ROUND(d, e, f, g, h, a, b, c, 13, w13);
        SHA256ROUND(c, d, e, f, g, h, a, b, 14, w14);
        SHA256ROUND(b, c, d, e, f, g, h, a, 15, w15);

        w0 = add4(SIGMA1_256(w14), w9, SIGMA0_256(w1), w0);
        SHA256ROUND(a, b, c, d, e, f, g, h, 16, w0);
        w1 = add4(SIGMA1_256(w15), w10, SIGMA0_256(w2), w1);
        SHA256ROUND(h, a, b, c, d, e, f, g, 17, w1);
        w2 = add4(SIGMA1_256(w0), w11, SIGMA0_256(w3), w2);
        SHA256ROUND(g, h, a, b, c, d, e, f, 18, w2);
        w3 = add4(SIGMA1_256(w1), w12, SIGMA0_256(w4), w3);
        SHA256ROUND(f, g, h, a, b, c, d, e, 19, w3);
        w4 = add4(SIGMA1_256(w2), w13, SIGMA0_256(w5), w4);
        SHA256ROUND(e, f, g, h, a, b, c, d, 20, w4);
        w5 = add4(SIGMA1_256(w3), w14, SIGMA0_256(w6), w5);
        SHA256ROUND(d, e, f, g, h, a, b, c, 21, w5);
        w6 = add4(SIGMA1_256(w4), w15, SIGMA0_256(w7), w6);
        SHA256ROUND(c, d, e, f, g, h, a, b, 22, w6);
        w7 = add4(SIGMA1_256(w5), w0, SIGMA0_256(w8), w7);
        SHA256ROUND(b, c, d, e, f, g, h, a, 23, w7);
        w8 = add4(SIGMA1_256(w6), w1, SIGMA0_256(w9), w8);
        SHA256ROUND(a, b, c, d, e, f, g, h, 24, w8);
        w9 = add4(SIGMA1_256(w7), w2, SIGMA0_256(w10), w9);
        SHA256ROUND(h, a, b, c, d, e, f, g, 25, w9);
        w10 = add4(SIGMA1_256(w8), w3, SIGMA0_256(w11), w10);
        SHA256ROUND(g, h, a, b, c, d, e, f, 26, w10);
        w11 = add4(SIGMA1_256(w9), w4, SIGMA0_256(w12), w11);
        SHA256ROUND(f, g, h, a, b, c, d, e, 27, w11);
        w12 = add4(SIGMA1_256(w10), w5, SIGMA0_256(w13), w12);
        SHA256ROUND(e, f, g, h, a, b, c, d, 28, w12);
        w13 = add4(SIGMA1_256(w11), w6, SIGMA0_256(w14), w13);
        SHA256ROUND(d, e, f, g, h, a, b, c, 29, w13);
        w14 = add4(SIGMA1_256(w12), w7, SIGMA0_256(w15), w14);
        SHA256ROUND(c, d, e, f, g, h, a, b, 30, w14);
        w15 = add4(SIGMA1_256(w13), w8, SIGMA0_256(w0), w15);
        SHA256ROUND(b, c, d, e, f, g, h, a, 31, w15);

        w0 = add4(SIGMA1_256(w14), w9, SIGMA0_256(w1), w0);
        SHA256ROUND(a, b, c, d, e, f, g, h, 32, w0);
        w1 = add4(SIGMA1_256(w15), w10, SIGMA0_256(w2), w1);
        SHA256ROUND(h, a, b, c, d, e, f, g, 33, w1);
        w2 = add4(SIGMA1_256(w0), w11, SIGMA0_256(w3), w2);
        SHA256ROUND(g, h, a, b, c, d, e, f, 34, w2);
        w3 = add4(SIGMA1_256(w1), w12, SIGMA0_256(w4), w3);
        SHA256ROUND(f, g, h, a, b, c, d, e, 35, w3);
        w4 = add4(SIGMA1_256(w2), w13, SIGMA0_256(w5), w4);
        SHA256ROUND(e, f, g, h, a, b, c, d, 36, w4);
        w5 = add4(SIGMA1_256(w3), w14, SIGMA0_256(w6), w5);
        SHA256ROUND(d, e, f, g, h, a, b, c, 37, w5);
        w6 = add4(SIGMA1_256(w4), w15, SIGMA0_256(w7), w6);
        SHA256ROUND(c, d, e, f, g, h, a, b, 38, w6);
        w7 = add4(SIGMA1_256(w5), w0, SIGMA0_256(w8), w7);
        SHA256ROUND(b, c, d, e, f, g, h, a, 39, w7);
        w8 = add4(SIGMA1_256(w6), w1, SIGMA0_256(w9), w8);
        SHA256ROUND(a, b, c, d, e, f, g, h, 40, w8);
        w9 = add4(SIGMA1_256(w7), w2, SIGMA0_256(w10), w9);
        SHA256ROUND(h, a, b, c, d, e, f, g, 41, w9);
        w10 = add4(SIGMA1_256(w8), w3, SIGMA0_256(w11), w10);
        SHA256ROUND(g, h, a, b, c, d, e, f, 42, w10);
        w11 = add4(SIGMA1_256(w9), w4, SIGMA0_256(w12), w11);
        SHA256ROUND(f, g, h, a, b, c, d, e, 43, w11);
        w12 = add4(SIGMA1_256(w10), w5, SIGMA0_256(w13), w12);
        SHA256ROUND(e, f, g, h, a, b, c, d, 44, w12);
        w13 = add4(SIGMA1_256(w11), w6, SIGMA0_256(w14), w13);
        SHA256ROUND(d, e, f, g, h, a, b, c, 45, w13);
        w14 = add4(SIGMA1_256(w12), w7, SIGMA0_256(w15), w14);
        SHA256ROUND(c, d, e, f, g, h, a, b, 46, w14);
        w15 = add4(SIGMA1_256(w13), w8, SIGMA0_256(w0), w15);
        SHA256ROUND(b, c, d, e, f, g, h, a, 47, w15);

        w0 = add4(SIGMA1_256(w14), w9, SIGMA0_256(w1), w0);
        SHA256ROUND(a, b, c, d, e, f, g, h, 48, w0);
        w1 = add4(SIGMA1_256(w15), w10, SIGMA0_256(w2), w1);
        SHA256ROUND(h, a, b, c, d, e, f, g, 49, w1);
        w2 = add4(SIGMA1_256(w0), w11, SIGMA0_256(w3), w2);
        SHA256ROUND(g, h, a, b, c, d, e, f, 50, w2);
        w3 = add4(SIGMA1_256(w1), w12, SIGMA0_256(w4), w3);
        SHA256ROUND(f, g, h, a, b, c, d, e, 51, w3);
        w4 = add4(SIGMA1_256(w2), w13, SIGMA0_256(w5), w4);
        SHA256ROUND(e, f, g, h, a, b, c, d, 52, w4);
        w5 = add4(SIGMA1_256(w3), w14, SIGMA0_256(w6), w5);
        SHA256ROUND(d, e, f, g, h, a, b, c, 53, w5);
        w6 = add4(SIGMA1_256(w4), w15, SIGMA0_256(w7), w6);
        SHA256ROUND(c, d, e, f, g, h, a, b, 54, w6);
        w7 = add4(SIGMA1_256(w5), w0, SIGMA0_256(w8), w7);
        SHA256ROUND(b, c, d, e, f, g, h, a, 55, w7);
        w8 = add4(SIGMA1_256(w6), w1, SIGMA0_256(w9), w8);
        SHA256ROUND(a, b, c, d, e, f, g, h, 56, w8);
        w9 = add4(SIGMA1_256(w7), w2, SIGMA0_256(w10), w9);
        SHA256ROUND(h, a, b, c, d, e, f, g, 57, w9);
        w10 = add4(SIGMA1_256(w8), w3, SIGMA0_256(w11), w10);
        SHA256ROUND(g, h, a, b, c, d, e, f, 58, w10);
        w11 = add4(SIGMA1_256(w9), w4, SIGMA0_256(w12), w11);
        SHA256ROUND(f, g, h, a, b, c, d, e, 59, w11);
        w12 = add4(SIGMA1_256(w10), w5, SIGMA0_256(w13), w12);
        SHA256ROUND(e, f, g, h, a, b, c, d, 60, w12);
        w13 = add4(SIGMA1_256(w11), w6, SIGMA0_256(w14), w13);
        SHA256ROUND(d, e, f, g, h, a, b, c, 61, w13);
        w14 = add4(SIGMA1_256(w12), w7, SIGMA0_256(w15), w14);
        SHA256ROUND(c, d, e, f, g, h, a, b, 62, w14);
        w15 = add4(SIGMA1_256(w13), w8, SIGMA0_256(w0), w15);
        SHA256ROUND(b, c, d, e, f, g, h, a, 63, w15);

#define store_load(x, i, dest) \
        T1 = (vector unsigned int)((hPre)[i],(hPre)[i],(hPre)[i],(hPre)[i]); \
        dest = vec_add(T1, x);

        store_load(a, 0, w0);
        store_load(b, 1, w1);
        store_load(c, 2, w2);
        store_load(d, 3, w3);
        store_load(e, 4, w4);
        store_load(f, 5, w5);
        store_load(g, 6, w6);
        store_load(h, 7, w7);

        /* end of first SHA256 round */

        w8 = (vector unsigned int)(Pad[8],Pad[8],Pad[8],Pad[8]);
        w9 = (vector unsigned int)(Pad[9],Pad[9],Pad[9],Pad[9]);
        w10 = (vector unsigned int)(Pad[10],Pad[10],Pad[10],Pad[10]);
        w11 = (vector unsigned int)(Pad[11],Pad[11],Pad[11],Pad[11]);
        w12 = (vector unsigned int)(Pad[12],Pad[12],Pad[12],Pad[12]);
        w13 = (vector unsigned int)(Pad[13],Pad[13],Pad[13],Pad[13]);
        w14 = (vector unsigned int)(Pad[14],Pad[14],Pad[14],Pad[14]);
        w15 = (vector unsigned int)(Pad[15],Pad[15],Pad[15],Pad[15]);

        a = (vector unsigned int)(hInit[0],hInit[0],hInit[0],hInit[0]);
        b = (vector unsigned int)(hInit[1],hInit[1],hInit[1],hInit[1]);
        c = (vector unsigned int)(hInit[2],hInit[2],hInit[2],hInit[2]);
        d = (vector unsigned int)(hInit[3],hInit[3],hInit[3],hInit[3]);
        e = (vector unsigned int)(hInit[4],hInit[4],hInit[4],hInit[4]);
        f = (vector unsigned int)(hInit[5],hInit[5],hInit[5],hInit[5]);
        g = (vector unsigned int)(hInit[6],hInit[6],hInit[6],hInit[6]);
        h = (vector unsigned int)(hInit[7],hInit[7],hInit[7],hInit[7]);

        SHA256ROUND(a, b, c, d, e, f, g, h, 0, w0);
        SHA256ROUND(h, a, b, c, d, e, f, g, 1, w1);
        SHA256ROUND(g, h, a, b, c, d, e, f, 2, w2);
        SHA256ROUND(f, g, h, a, b, c, d, e, 3, w3);
        SHA256ROUND(e, f, g, h, a, b, c, d, 4, w4);
        SHA256ROUND(d, e, f, g, h, a, b, c, 5, w5);
        SHA256ROUND(c, d, e, f, g, h, a, b, 6, w6);
        SHA256ROUND(b, c, d, e, f, g, h, a, 7, w7);
        SHA256ROUND(a, b, c, d, e, f, g, h, 8, w8);
        SHA256ROUND(h, a, b, c, d, e, f, g, 9, w9);
        SHA256ROUND(g, h, a, b, c, d, e, f, 10, w10);
        SHA256ROUND(f, g, h, a, b, c, d, e, 11, w11);
        SHA256ROUND(e, f, g, h, a, b, c, d, 12, w12);
        SHA256ROUND(d, e, f, g, h, a, b, c, 13, w13);
        SHA256ROUND(c, d, e, f, g, h, a, b, 14, w14);
        SHA256ROUND(b, c, d, e, f, g, h, a, 15, w15);

        w0 = add4(SIGMA1_256(w14), w9, SIGMA0_256(w1), w0);
        SHA256ROUND(a, b, c, d, e, f, g, h, 16, w0);
        w1 = add4(SIGMA1_256(w15), w10, SIGMA0_256(w2), w1);
        SHA256ROUND(h, a, b, c, d, e, f, g, 17, w1);
        w2 = add4(SIGMA1_256(w0), w11, SIGMA0_256(w3), w2);
        SHA256ROUND(g, h, a, b, c, d, e, f, 18, w2);
        w3 = add4(SIGMA1_256(w1), w12, SIGMA0_256(w4), w3);
        SHA256ROUND(f, g, h, a, b, c, d, e, 19, w3);
        w4 = add4(SIGMA1_256(w2), w13, SIGMA0_256(w5), w4);
        SHA256ROUND(e, f, g, h, a, b, c, d, 20, w4);
        w5 = add4(SIGMA1_256(w3), w14, SIGMA0_256(w6), w5);
        SHA256ROUND(d, e, f, g, h, a, b, c, 21, w5);
        w6 = add4(SIGMA1_256(w4), w15, SIGMA0_256(w7), w6);
        SHA256ROUND(c, d, e, f, g, h, a, b, 22, w6);
        w7 = add4(SIGMA1_256(w5), w0, SIGMA0_256(w8), w7);
        SHA256ROUND(b, c, d, e, f, g, h, a, 23, w7);
        w8 = add4(SIGMA1_256(w6), w1, SIGMA0_256(w9), w8);
        SHA256ROUND(a, b, c, d, e, f, g, h, 24, w8);
        w9 = add4(SIGMA1_256(w7), w2, SIGMA0_256(w10), w9);
        SHA256ROUND(h, a, b, c, d, e, f, g, 25, w9);
        w10 = add4(SIGMA1_256(w8), w3, SIGMA0_256(w11), w10);
        SHA256ROUND(g, h, a, b, c, d, e, f, 26, w10);
        w11 = add4(SIGMA1_256(w9), w4, SIGMA0_256(w12), w11);
        SHA256ROUND(f, g, h, a, b, c, d, e, 27, w11);
        w12 = add4(SIGMA1_256(w10), w5, SIGMA0_256(w13), w12);
        SHA256ROUND(e, f, g, h, a, b, c, d, 28, w12);
        w13 = add4(SIGMA1_256(w11), w6, SIGMA0_256(w14), w13);
        SHA256ROUND(d, e, f, g, h, a, b, c, 29, w13);
        w14 = add4(SIGMA1_256(w12), w7, SIGMA0_256(w15), w14);
        SHA256ROUND(c, d, e, f, g, h, a, b, 30, w14);
        w15 = add4(SIGMA1_256(w13), w8, SIGMA0_256(w0), w15);
        SHA256ROUND(b, c, d, e, f, g, h, a, 31, w15);

        w0 = add4(SIGMA1_256(w14), w9, SIGMA0_256(w1), w0);
        SHA256ROUND(a, b, c, d, e, f, g, h, 32, w0);
        w1 = add4(SIGMA1_256(w15), w10, SIGMA0_256(w2), w1);
        SHA256ROUND(h, a, b, c, d, e, f, g, 33, w1);
        w2 = add4(SIGMA1_256(w0), w11, SIGMA0_256(w3), w2);
        SHA256ROUND(g, h, a, b, c, d, e, f, 34, w2);
        w3 = add4(SIGMA1_256(w1), w12, SIGMA0_256(w4), w3);
        SHA256ROUND(f, g, h, a, b, c, d, e, 35, w3);
        w4 = add4(SIGMA1_256(w2), w13, SIGMA0_256(w5), w4);
        SHA256ROUND(e, f, g, h, a, b, c, d, 36, w4);
        w5 = add4(SIGMA1_256(w3), w14, SIGMA0_256(w6), w5);
        SHA256ROUND(d, e, f, g, h, a, b, c, 37, w5);
        w6 = add4(SIGMA1_256(w4), w15, SIGMA0_256(w7), w6);
        SHA256ROUND(c, d, e, f, g, h, a, b, 38, w6);
        w7 = add4(SIGMA1_256(w5), w0, SIGMA0_256(w8), w7);
        SHA256ROUND(b, c, d, e, f, g, h, a, 39, w7);
        w8 = add4(SIGMA1_256(w6), w1, SIGMA0_256(w9), w8);
        SHA256ROUND(a, b, c, d, e, f, g, h, 40, w8);
        w9 = add4(SIGMA1_256(w7), w2, SIGMA0_256(w10), w9);
        SHA256ROUND(h, a, b, c, d, e, f, g, 41, w9);
        w10 = add4(SIGMA1_256(w8), w3, SIGMA0_256(w11), w10);
        SHA256ROUND(g, h, a, b, c, d, e, f, 42, w10);
        w11 = add4(SIGMA1_256(w9), w4, SIGMA0_256(w12), w11);
        SHA256ROUND(f, g, h, a, b, c, d, e, 43, w11);
        w12 = add4(SIGMA1_256(w10), w5, SIGMA0_256(w13), w12);
        SHA256ROUND(e, f, g, h, a, b, c, d, 44, w12);
        w13 = add4(SIGMA1_256(w11), w6, SIGMA0_256(w14), w13);
        SHA256ROUND(d, e, f, g, h, a, b, c, 45, w13);
        w14 = add4(SIGMA1_256(w12), w7, SIGMA0_256(w15), w14);
        SHA256ROUND(c, d, e, f, g, h, a, b, 46, w14);
        w15 = add4(SIGMA1_256(w13), w8, SIGMA0_256(w0), w15);
        SHA256ROUND(b, c, d, e, f, g, h, a, 47, w15);

        w0 = add4(SIGMA1_256(w14), w9, SIGMA0_256(w1), w0);
        SHA256ROUND(a, b, c, d, e, f, g, h, 48, w0);
        w1 = add4(SIGMA1_256(w15), w10, SIGMA0_256(w2), w1);
        SHA256ROUND(h, a, b, c, d, e, f, g, 49, w1);
        w2 = add4(SIGMA1_256(w0), w11, SIGMA0_256(w3), w2);
        SHA256ROUND(g, h, a, b, c, d, e, f, 50, w2);
        w3 = add4(SIGMA1_256(w1), w12, SIGMA0_256(w4), w3);
        SHA256ROUND(f, g, h, a, b, c, d, e, 51, w3);
        w4 = add4(SIGMA1_256(w2), w13, SIGMA0_256(w5), w4);
        SHA256ROUND(e, f, g, h, a, b, c, d, 52, w4);
        w5 = add4(SIGMA1_256(w3), w14, SIGMA0_256(w6), w5);
        SHA256ROUND(d, e, f, g, h, a, b, c, 53, w5);
        w6 = add4(SIGMA1_256(w4), w15, SIGMA0_256(w7), w6);
        SHA256ROUND(c, d, e, f, g, h, a, b, 54, w6);
        w7 = add4(SIGMA1_256(w5), w0, SIGMA0_256(w8), w7);
        SHA256ROUND(b, c, d, e, f, g, h, a, 55, w7);
        w8 = add4(SIGMA1_256(w6), w1, SIGMA0_256(w9), w8);
        SHA256ROUND(a, b, c, d, e, f, g, h, 56, w8);
        w9 = add4(SIGMA1_256(w7), w2, SIGMA0_256(w10), w9);
        SHA256ROUND(h, a, b, c, d, e, f, g, 57, w9);
        w10 = add4(SIGMA1_256(w8), w3, SIGMA0_256(w11), w10);
        SHA256ROUND(g, h, a, b, c, d, e, f, 58, w10);
        w11 = add4(SIGMA1_256(w9), w4, SIGMA0_256(w12), w11);
        SHA256ROUND(f, g, h, a, b, c, d, e, 59, w11);
        w12 = add4(SIGMA1_256(w10), w5, SIGMA0_256(w13), w12);
        SHA256ROUND(e, f, g, h, a, b, c, d, 60, w12);

	/* Skip last 3-rounds; not necessary for H==0 */
/*#if 0
        w13 = add4(SIGMA1_256(w11), w6, SIGMA0_256(w14), w13);
        SHA256ROUND(d, e, f, g, h, a, b, c, 61, w13);
        w14 = add4(SIGMA1_256(w12), w7, SIGMA0_256(w15), w14);
        SHA256ROUND(c, d, e, f, g, h, a, b, 62, w14);
        w15 = add4(SIGMA1_256(w13), w8, SIGMA0_256(w0), w15);
        SHA256ROUND(b, c, d, e, f, g, h, a, 63, w15);
#endif*/

        /* store resulsts directly in thash */
#define store_2(x,i)  \
        w0 = (vector unsigned int)(hInit[i],hInit[i],hInit[i],hInit[i]); \
        vec_st(vec_add(w0, x), 0 ,&thash[i][k]);

        store_2(a, 0);
        store_2(b, 1);
        store_2(c, 2);
        store_2(d, 3);
        store_2(e, 4);
        store_2(f, 5);
        store_2(g, 6);
        store_2(h, 7);

        vec_st(nonce, 0 ,&thash[8][k]);
        /* writing the results into the array is time intensive */
        /* -> try if it´s faster to compare the results with the target inside this function */
    }

}
Esempio n. 2
0
static void
SHA256Transform(SHA2_CTX *ctx, const uint8_t *blk)
{
	uint32_t a = ctx->state.s32[0];
	uint32_t b = ctx->state.s32[1];
	uint32_t c = ctx->state.s32[2];
	uint32_t d = ctx->state.s32[3];
	uint32_t e = ctx->state.s32[4];
	uint32_t f = ctx->state.s32[5];
	uint32_t g = ctx->state.s32[6];
	uint32_t h = ctx->state.s32[7];

	uint32_t w0, w1, w2, w3, w4, w5, w6, w7;
	uint32_t w8, w9, w10, w11, w12, w13, w14, w15;
	uint32_t T1, T2;

#if	defined(__sparc)
	static const uint32_t sha256_consts[] = {
		SHA256_CONST_0, SHA256_CONST_1, SHA256_CONST_2,
		SHA256_CONST_3, SHA256_CONST_4, SHA256_CONST_5,
		SHA256_CONST_6, SHA256_CONST_7, SHA256_CONST_8,
		SHA256_CONST_9, SHA256_CONST_10, SHA256_CONST_11,
		SHA256_CONST_12, SHA256_CONST_13, SHA256_CONST_14,
		SHA256_CONST_15, SHA256_CONST_16, SHA256_CONST_17,
		SHA256_CONST_18, SHA256_CONST_19, SHA256_CONST_20,
		SHA256_CONST_21, SHA256_CONST_22, SHA256_CONST_23,
		SHA256_CONST_24, SHA256_CONST_25, SHA256_CONST_26,
		SHA256_CONST_27, SHA256_CONST_28, SHA256_CONST_29,
		SHA256_CONST_30, SHA256_CONST_31, SHA256_CONST_32,
		SHA256_CONST_33, SHA256_CONST_34, SHA256_CONST_35,
		SHA256_CONST_36, SHA256_CONST_37, SHA256_CONST_38,
		SHA256_CONST_39, SHA256_CONST_40, SHA256_CONST_41,
		SHA256_CONST_42, SHA256_CONST_43, SHA256_CONST_44,
		SHA256_CONST_45, SHA256_CONST_46, SHA256_CONST_47,
		SHA256_CONST_48, SHA256_CONST_49, SHA256_CONST_50,
		SHA256_CONST_51, SHA256_CONST_52, SHA256_CONST_53,
		SHA256_CONST_54, SHA256_CONST_55, SHA256_CONST_56,
		SHA256_CONST_57, SHA256_CONST_58, SHA256_CONST_59,
		SHA256_CONST_60, SHA256_CONST_61, SHA256_CONST_62,
		SHA256_CONST_63
	};
#endif	/* __sparc */

	if ((uintptr_t)blk & 0x3) {		/* not 4-byte aligned? */
		bcopy(blk, ctx->buf_un.buf32,  sizeof (ctx->buf_un.buf32));
		blk = (uint8_t *)ctx->buf_un.buf32;
	}

	/* LINTED E_BAD_PTR_CAST_ALIGN */
	w0 =  LOAD_BIG_32(blk + 4 * 0);
	SHA256ROUND(a, b, c, d, e, f, g, h, 0, w0);
	/* LINTED E_BAD_PTR_CAST_ALIGN */
	w1 =  LOAD_BIG_32(blk + 4 * 1);
	SHA256ROUND(h, a, b, c, d, e, f, g, 1, w1);
	/* LINTED E_BAD_PTR_CAST_ALIGN */
	w2 =  LOAD_BIG_32(blk + 4 * 2);
	SHA256ROUND(g, h, a, b, c, d, e, f, 2, w2);
	/* LINTED E_BAD_PTR_CAST_ALIGN */
	w3 =  LOAD_BIG_32(blk + 4 * 3);
	SHA256ROUND(f, g, h, a, b, c, d, e, 3, w3);
	/* LINTED E_BAD_PTR_CAST_ALIGN */
	w4 =  LOAD_BIG_32(blk + 4 * 4);
	SHA256ROUND(e, f, g, h, a, b, c, d, 4, w4);
	/* LINTED E_BAD_PTR_CAST_ALIGN */
	w5 =  LOAD_BIG_32(blk + 4 * 5);
	SHA256ROUND(d, e, f, g, h, a, b, c, 5, w5);
	/* LINTED E_BAD_PTR_CAST_ALIGN */
	w6 =  LOAD_BIG_32(blk + 4 * 6);
	SHA256ROUND(c, d, e, f, g, h, a, b, 6, w6);
	/* LINTED E_BAD_PTR_CAST_ALIGN */
	w7 =  LOAD_BIG_32(blk + 4 * 7);
	SHA256ROUND(b, c, d, e, f, g, h, a, 7, w7);
	/* LINTED E_BAD_PTR_CAST_ALIGN */
	w8 =  LOAD_BIG_32(blk + 4 * 8);
	SHA256ROUND(a, b, c, d, e, f, g, h, 8, w8);
	/* LINTED E_BAD_PTR_CAST_ALIGN */
	w9 =  LOAD_BIG_32(blk + 4 * 9);
	SHA256ROUND(h, a, b, c, d, e, f, g, 9, w9);
	/* LINTED E_BAD_PTR_CAST_ALIGN */
	w10 =  LOAD_BIG_32(blk + 4 * 10);
	SHA256ROUND(g, h, a, b, c, d, e, f, 10, w10);
	/* LINTED E_BAD_PTR_CAST_ALIGN */
	w11 =  LOAD_BIG_32(blk + 4 * 11);
	SHA256ROUND(f, g, h, a, b, c, d, e, 11, w11);
	/* LINTED E_BAD_PTR_CAST_ALIGN */
	w12 =  LOAD_BIG_32(blk + 4 * 12);
	SHA256ROUND(e, f, g, h, a, b, c, d, 12, w12);
	/* LINTED E_BAD_PTR_CAST_ALIGN */
	w13 =  LOAD_BIG_32(blk + 4 * 13);
	SHA256ROUND(d, e, f, g, h, a, b, c, 13, w13);
	/* LINTED E_BAD_PTR_CAST_ALIGN */
	w14 =  LOAD_BIG_32(blk + 4 * 14);
	SHA256ROUND(c, d, e, f, g, h, a, b, 14, w14);
	/* LINTED E_BAD_PTR_CAST_ALIGN */
	w15 =  LOAD_BIG_32(blk + 4 * 15);
	SHA256ROUND(b, c, d, e, f, g, h, a, 15, w15);

	w0 = SIGMA1_256(w14) + w9 + SIGMA0_256(w1) + w0;
	SHA256ROUND(a, b, c, d, e, f, g, h, 16, w0);
	w1 = SIGMA1_256(w15) + w10 + SIGMA0_256(w2) + w1;
	SHA256ROUND(h, a, b, c, d, e, f, g, 17, w1);
	w2 = SIGMA1_256(w0) + w11 + SIGMA0_256(w3) + w2;
	SHA256ROUND(g, h, a, b, c, d, e, f, 18, w2);
	w3 = SIGMA1_256(w1) + w12 + SIGMA0_256(w4) + w3;
	SHA256ROUND(f, g, h, a, b, c, d, e, 19, w3);
	w4 = SIGMA1_256(w2) + w13 + SIGMA0_256(w5) + w4;
	SHA256ROUND(e, f, g, h, a, b, c, d, 20, w4);
	w5 = SIGMA1_256(w3) + w14 + SIGMA0_256(w6) + w5;
	SHA256ROUND(d, e, f, g, h, a, b, c, 21, w5);
	w6 = SIGMA1_256(w4) + w15 + SIGMA0_256(w7) + w6;
	SHA256ROUND(c, d, e, f, g, h, a, b, 22, w6);
	w7 = SIGMA1_256(w5) + w0 + SIGMA0_256(w8) + w7;
	SHA256ROUND(b, c, d, e, f, g, h, a, 23, w7);
	w8 = SIGMA1_256(w6) + w1 + SIGMA0_256(w9) + w8;
	SHA256ROUND(a, b, c, d, e, f, g, h, 24, w8);
	w9 = SIGMA1_256(w7) + w2 + SIGMA0_256(w10) + w9;
	SHA256ROUND(h, a, b, c, d, e, f, g, 25, w9);
	w10 = SIGMA1_256(w8) + w3 + SIGMA0_256(w11) + w10;
	SHA256ROUND(g, h, a, b, c, d, e, f, 26, w10);
	w11 = SIGMA1_256(w9) + w4 + SIGMA0_256(w12) + w11;
	SHA256ROUND(f, g, h, a, b, c, d, e, 27, w11);
	w12 = SIGMA1_256(w10) + w5 + SIGMA0_256(w13) + w12;
	SHA256ROUND(e, f, g, h, a, b, c, d, 28, w12);
	w13 = SIGMA1_256(w11) + w6 + SIGMA0_256(w14) + w13;
	SHA256ROUND(d, e, f, g, h, a, b, c, 29, w13);
	w14 = SIGMA1_256(w12) + w7 + SIGMA0_256(w15) + w14;
	SHA256ROUND(c, d, e, f, g, h, a, b, 30, w14);
	w15 = SIGMA1_256(w13) + w8 + SIGMA0_256(w0) + w15;
	SHA256ROUND(b, c, d, e, f, g, h, a, 31, w15);

	w0 = SIGMA1_256(w14) + w9 + SIGMA0_256(w1) + w0;
	SHA256ROUND(a, b, c, d, e, f, g, h, 32, w0);
	w1 = SIGMA1_256(w15) + w10 + SIGMA0_256(w2) + w1;
	SHA256ROUND(h, a, b, c, d, e, f, g, 33, w1);
	w2 = SIGMA1_256(w0) + w11 + SIGMA0_256(w3) + w2;
	SHA256ROUND(g, h, a, b, c, d, e, f, 34, w2);
	w3 = SIGMA1_256(w1) + w12 + SIGMA0_256(w4) + w3;
	SHA256ROUND(f, g, h, a, b, c, d, e, 35, w3);
	w4 = SIGMA1_256(w2) + w13 + SIGMA0_256(w5) + w4;
	SHA256ROUND(e, f, g, h, a, b, c, d, 36, w4);
	w5 = SIGMA1_256(w3) + w14 + SIGMA0_256(w6) + w5;
	SHA256ROUND(d, e, f, g, h, a, b, c, 37, w5);
	w6 = SIGMA1_256(w4) + w15 + SIGMA0_256(w7) + w6;
	SHA256ROUND(c, d, e, f, g, h, a, b, 38, w6);
	w7 = SIGMA1_256(w5) + w0 + SIGMA0_256(w8) + w7;
	SHA256ROUND(b, c, d, e, f, g, h, a, 39, w7);
	w8 = SIGMA1_256(w6) + w1 + SIGMA0_256(w9) + w8;
	SHA256ROUND(a, b, c, d, e, f, g, h, 40, w8);
	w9 = SIGMA1_256(w7) + w2 + SIGMA0_256(w10) + w9;
	SHA256ROUND(h, a, b, c, d, e, f, g, 41, w9);
	w10 = SIGMA1_256(w8) + w3 + SIGMA0_256(w11) + w10;
	SHA256ROUND(g, h, a, b, c, d, e, f, 42, w10);
	w11 = SIGMA1_256(w9) + w4 + SIGMA0_256(w12) + w11;
	SHA256ROUND(f, g, h, a, b, c, d, e, 43, w11);
	w12 = SIGMA1_256(w10) + w5 + SIGMA0_256(w13) + w12;
	SHA256ROUND(e, f, g, h, a, b, c, d, 44, w12);
	w13 = SIGMA1_256(w11) + w6 + SIGMA0_256(w14) + w13;
	SHA256ROUND(d, e, f, g, h, a, b, c, 45, w13);
	w14 = SIGMA1_256(w12) + w7 + SIGMA0_256(w15) + w14;
	SHA256ROUND(c, d, e, f, g, h, a, b, 46, w14);
	w15 = SIGMA1_256(w13) + w8 + SIGMA0_256(w0) + w15;
	SHA256ROUND(b, c, d, e, f, g, h, a, 47, w15);

	w0 = SIGMA1_256(w14) + w9 + SIGMA0_256(w1) + w0;
	SHA256ROUND(a, b, c, d, e, f, g, h, 48, w0);
	w1 = SIGMA1_256(w15) + w10 + SIGMA0_256(w2) + w1;
	SHA256ROUND(h, a, b, c, d, e, f, g, 49, w1);
	w2 = SIGMA1_256(w0) + w11 + SIGMA0_256(w3) + w2;
	SHA256ROUND(g, h, a, b, c, d, e, f, 50, w2);
	w3 = SIGMA1_256(w1) + w12 + SIGMA0_256(w4) + w3;
	SHA256ROUND(f, g, h, a, b, c, d, e, 51, w3);
	w4 = SIGMA1_256(w2) + w13 + SIGMA0_256(w5) + w4;
	SHA256ROUND(e, f, g, h, a, b, c, d, 52, w4);
	w5 = SIGMA1_256(w3) + w14 + SIGMA0_256(w6) + w5;
	SHA256ROUND(d, e, f, g, h, a, b, c, 53, w5);
	w6 = SIGMA1_256(w4) + w15 + SIGMA0_256(w7) + w6;
	SHA256ROUND(c, d, e, f, g, h, a, b, 54, w6);
	w7 = SIGMA1_256(w5) + w0 + SIGMA0_256(w8) + w7;
	SHA256ROUND(b, c, d, e, f, g, h, a, 55, w7);
	w8 = SIGMA1_256(w6) + w1 + SIGMA0_256(w9) + w8;
	SHA256ROUND(a, b, c, d, e, f, g, h, 56, w8);
	w9 = SIGMA1_256(w7) + w2 + SIGMA0_256(w10) + w9;
	SHA256ROUND(h, a, b, c, d, e, f, g, 57, w9);
	w10 = SIGMA1_256(w8) + w3 + SIGMA0_256(w11) + w10;
	SHA256ROUND(g, h, a, b, c, d, e, f, 58, w10);
	w11 = SIGMA1_256(w9) + w4 + SIGMA0_256(w12) + w11;
	SHA256ROUND(f, g, h, a, b, c, d, e, 59, w11);
	w12 = SIGMA1_256(w10) + w5 + SIGMA0_256(w13) + w12;
	SHA256ROUND(e, f, g, h, a, b, c, d, 60, w12);
	w13 = SIGMA1_256(w11) + w6 + SIGMA0_256(w14) + w13;
	SHA256ROUND(d, e, f, g, h, a, b, c, 61, w13);
	w14 = SIGMA1_256(w12) + w7 + SIGMA0_256(w15) + w14;
	SHA256ROUND(c, d, e, f, g, h, a, b, 62, w14);
	w15 = SIGMA1_256(w13) + w8 + SIGMA0_256(w0) + w15;
	SHA256ROUND(b, c, d, e, f, g, h, a, 63, w15);

	ctx->state.s32[0] += a;
	ctx->state.s32[1] += b;
	ctx->state.s32[2] += c;
	ctx->state.s32[3] += d;
	ctx->state.s32[4] += e;
	ctx->state.s32[5] += f;
	ctx->state.s32[6] += g;
	ctx->state.s32[7] += h;
}