int main(void)
{
    veci16_t pointers = { &foo, &foo, &foo, &foo, &foo, &foo, 0x17,  &foo, &foo, &foo, &foo, &foo, &foo, &foo, &foo, &foo };

    __builtin_nyuzi_write_control_reg(CR_FAULT_HANDLER, (unsigned int) faultHandler);
    __builtin_nyuzi_write_control_reg(CR_TLB_MISS_HANDLER, (unsigned int) tlb_miss_handler);
    __builtin_nyuzi_write_control_reg(CR_FLAGS, FLAG_MMU_EN | FLAG_SUPERVISOR_EN);

    // This ensures the libc functions are mapped into the TLB so we don't generate
    // multiple TLB misses in the fault handler (doesn't break the test, just makes
    // debugging cleaner)
    printf("Starting test %d\n", 12);

    // This will cause an alignment fault on the 6th lane and jump to 'faultHandler'.
    // Use scatter store rather than a normal scalar store to ensure the
    // subcycle counter is saved correctly.
    __builtin_nyuzi_scatter_storei(pointers, __builtin_nyuzi_makevectori(0));

    printf("should_not_be_here\n"); // CHECKN: should_not_be_here

    return 0;
}
Exemple #2
0
// Run 16 parallel hashes
void sha2Hash(vecu16_t pointers, int totalBlocks, vecu16_t outHashes)
{
	// Initial H values
	vecu16_t h0 = __builtin_nyuzi_makevectori(0x6A09E667);
	vecu16_t h1 = __builtin_nyuzi_makevectori(0xBB67AE85);
	vecu16_t h2 = __builtin_nyuzi_makevectori(0x3C6EF372);
	vecu16_t h3 = __builtin_nyuzi_makevectori(0xA54FF53A);
	vecu16_t h4 = __builtin_nyuzi_makevectori(0x510E527F);
	vecu16_t h5 = __builtin_nyuzi_makevectori(0x9B05688C);
	vecu16_t h6 = __builtin_nyuzi_makevectori(0x1F83D9AB);
	vecu16_t h7 = __builtin_nyuzi_makevectori(0x5BE0CD19);

	for (int i = 0; i < totalBlocks; i++)
	{
		vecu16_t w[64];
		for (int index = 0; index < 16; index++)
		{
			w[index] = __builtin_nyuzi_gather_loadi(pointers);
			pointers += __builtin_nyuzi_makevectori(4);
		}
	
		for (int index = 16; index < 64; index++)
	  		w[index] = SIG1(w[index - 2]) + w[index - 7] + SIG0(w[index - 15]) + w[index - 16];

		vecu16_t a = h0;
		vecu16_t b = h1;
		vecu16_t c = h2;
		vecu16_t d = h3;
		vecu16_t e = h4;
		vecu16_t f = h5;
		vecu16_t g = h6;
		vecu16_t h = h7;
	
		for (int round = 0; round < 64; round++)
		{
			vecu16_t temp1 = h + SIG1(e) + CH(e, f, g) + __builtin_nyuzi_makevectori(K[round]) + w[round];
			vecu16_t temp2 = SIG0(a) + MA(a, b, c);
			h = g;
			g = f;
			f = e;
			e = d + temp1;
			d = c;
			c = b;
			b = a;
			a = temp1 + temp2;
		}
		
		h0 += a;
		h1 += b;
		h2 += c;
		h3 += d;
		h4 += e;
		h5 += f;
		h6 += g;
		h7 += h;
	}

	// doesn't add padding or length fields to end...
	
	__builtin_nyuzi_scatter_storei(outHashes, h0);
	__builtin_nyuzi_scatter_storei(outHashes + __builtin_nyuzi_makevectori(4), h1);
	__builtin_nyuzi_scatter_storei(outHashes + __builtin_nyuzi_makevectori(8), h2);
	__builtin_nyuzi_scatter_storei(outHashes + __builtin_nyuzi_makevectori(12), h3);
	__builtin_nyuzi_scatter_storei(outHashes + __builtin_nyuzi_makevectori(16), h4);
	__builtin_nyuzi_scatter_storei(outHashes + __builtin_nyuzi_makevectori(20), h5);
	__builtin_nyuzi_scatter_storei(outHashes + __builtin_nyuzi_makevectori(24), h6);
	__builtin_nyuzi_scatter_storei(outHashes + __builtin_nyuzi_makevectori(28), h7);
}