Ejemplo n.º 1
0
void readDataFromLMem(uint64_t *dataOut, int size, int sizeBytes, int burstLengthInBytes, max_engine_t *engine, max_file_t *maxfile)
{

	max_actions_t *actions = max_actions_init(maxfile, NULL);

	max_set_ticks(actions, "KernelLMem_Write_CommandAndDataStream", 0);
	max_set_uint64t(actions, "KernelLMem_Write_CommandAndDataStream", "totalBursts", size * 8 / burstLengthInBytes);
	max_set_uint64t(actions, "KernelLMem_Write_CommandAndDataStream", "wordsPerBurst", burstLengthInBytes / 8);
	max_set_ticks(actions, "KernelLMem_Read_CommandAndDataStream", size);
	max_set_uint64t(actions, "KernelLMem_Read_CommandAndDataStream", "totalBursts", size * 8 / burstLengthInBytes);
	max_set_uint64t(actions, "KernelLMem_Read_CommandAndDataStream", "wordsPerBurst", burstLengthInBytes / 8);
	max_run(engine, actions);

	max_reset_engine(engine);

	max_queue_output(actions, "toCpu", dataOut, sizeBytes);
	max_run(engine, actions);
	max_actions_free(actions);

}
Ejemplo n.º 2
0
void AirfoilDFEInterface::runOutputAction () {

	max_actions_t * act;
	act =  max_actions_init(maxfile, NULL);

	max_queue_output(act, "qCPUOut", dfeQ, qDatSize);
	max_lmem_linear(act, "qRead", memAddresses[q], qDatSize);

	max_ignore_lmem(act, "setupWrite");
	max_ignore_lmem(act, "updateQ");
	max_ignore_lmem(act, "updateQold");
	max_ignore_lmem(act, "updateSaveQold");
	max_ignore_lmem(act, "adtQ");
	max_ignore_lmem(act, "adtDxRead");
	max_ignore_lmem(act, "resReadOnly");
	max_ignore_kernel(act, "AirfoilDFEResKernel");
	max_ignore_kernel(act, "AirfoilDFEAdtKernel");
	max_ignore_kernel(act, "AirfoilDFEUpdateKernel");
	max_run(engine, act);
	max_actions_free(act);

	int thispart = 1;
	int thisind = 0;
	for (int i = 0; i < (*domain).ncellcomputedfe; i++){

		int cellpart = readlocs[2*i];
		int cellind =  readlocs[2*i+1];

		for (int j = 0; j < 4; j ++) (*domain).q[cellpart][cellind*4+j] = dfeQ[i*4+j];

		thisind++;
		if (thisind == (*domain).ncell[thispart]){
			thispart++;
			thisind = 0;
		}
	}
}
int main(void)
{
	const int no = 50;
	const int k = 2;
	const int row = 24;
	const int col = 24;
	const int batch_size = 384;

	int z2_offset = 0;
	int z2_size = no*row*col*batch_size*sizeof(real);
	int sel_offset = z2_offset+z2_size;
	int sel_size = no*row*col*batch_size/8;
	int z_offset = sel_offset+sel_size;
	int z_size = no*row*col/k/k*batch_size*sizeof(real);
	int a_offset = z_offset+z_size;
	int a_size = no*row*col/k/k*batch_size*sizeof(real);

	real* z2 = (real*)malloc(z2_size);
	uchar* sel = (uchar*)malloc(sel_size);
	real* z = (real*)malloc(z_size);
	real* a = (real*)malloc(a_size);

	max_file_t *maxfile = CNN_FW_MaxPool_V0_DP_L0_0_init();
	max_engine_t *engine = max_load(maxfile, "*");

	printf("Writing to LMem.\n");
	max_actions_t* act = max_actions_init(maxfile, "writeLMem");
	max_set_param_uint64t(act, "offset", z2_offset);
	max_set_param_uint64t(act, "size", z2_size);
	max_queue_input(act, "cpu_to_lmem_at_cpu", z2, z2_size);
	max_run(engine, act);

	printf("Running on DFE.\n");
	act = max_actions_init(maxfile, "default");
	max_set_param_uint64t(act, "no", no);
	max_set_param_uint64t(act, "z2_offset", z2_offset);
	max_set_param_uint64t(act, "sel_offset", sel_offset);
	max_set_param_uint64t(act, "z_offset", z_offset);
	max_set_param_uint64t(act, "a_offset", a_offset);
	max_run(engine, act);

	printf("Reading from LMemBytes.\n");
	act = max_actions_init(maxfile, "readLMemBytes");
	max_set_param_uint64t(act, "offset", sel_offset);
	max_set_param_uint64t(act, "size", sel_size);
	max_queue_output(act, "lmem_to_cpu_at_cpu", sel, sel_size);
	max_run(engine, act);

	printf("Reading from LMem.\n");
	act = max_actions_init(maxfile, "readLMem");
	max_set_param_uint64t(act, "offset", z_offset);
	max_set_param_uint64t(act, "size", z_size);
	max_queue_output(act, "lmem_to_cpu_at_cpu", z, z_size);
	max_run(engine, act);

	printf("Reading from LMem.\n");
	act = max_actions_init(maxfile, "readLMem");
	max_set_param_uint64t(act, "offset", a_offset);
	max_set_param_uint64t(act, "size", a_size);
	max_queue_output(act, "lmem_to_cpu_at_cpu", a, a_size);
	max_run(engine, act);

	max_unload(engine);
	printf("Done.\n");
	
	free(z2);
	free(sel);
	free(z);
	free(a);

	return 0;
}
Ejemplo n.º 4
0
/**
 * Runs the main action to compute a predictor or corrector step
 */
void AirfoilDFEInterface::runMainAction(int k, double cfl, double gam, double gm1, double eps, double *rms) {

	int cpuresind = 0;
	int schedind = 0;
	for (int d = 1; d < (*domain).ndomain; d++){

		for (int res_edge_iter = 0; res_edge_iter < (*domain).nedge[d]; res_edge_iter++){

			int thispart = d;
			int thisind = res_edge_iter;

			for (int i = 0; i < 2; i++){
				int thiscellpart = (*domain).ecellpart[thispart][thisind*2+i];
				int thiscellind = (*domain).ecellind[thispart][thisind*2+i];
				if (reads[7*schedind+3] == 1){
					for (int j = 0; j < 4; j++) {
						cpu_res_qpadt[cpuresind*5+j] = (*domain).q[thiscellpart][4*thiscellind+j];
					}
					cpu_res_qpadt[cpuresind*5+4] = (*domain).adt[thiscellpart][thiscellind];
					cpuresind ++;
				}
				schedind ++ ;
			}
		}
	}

	max_actions_t * act =  max_actions_init(maxfile, NULL);

	max_set_ticks(act, "AirfoilDFEAdtKernel", (*domain).ncellcomputedfe);
	max_set_uint64t(act, "AirfoilDFEAdtKernel", "numTicks", (*domain).ncellcomputedfe);
	max_set_double(act, "AirfoilDFEAdtKernel", "cfl", cfl);
	max_set_double(act, "AirfoilDFEAdtKernel", "gam", gam);
	max_set_double(act, "AirfoilDFEAdtKernel", "gm1", gm1);
	max_lmem_linear(act, "adtQ", memAddresses[q], memAddresses[q+1] - memAddresses[q]);
	max_lmem_linear(act, "adtDxRead", memAddresses[adtDx], adtDxDatSize);

	max_set_ticks(act, "AirfoilDFEResKernel", resFlushTicks);
	max_set_double(act, "AirfoilDFEResKernel", "gm1", gm1);
	max_set_double(act, "AirfoilDFEResKernel", "eps", eps);
	max_set_uint64t(act, "AirfoilDFEResKernel", "nTicks", resFlushTicks);
	max_queue_input(act, "cpu_qpadt_to_res", cpu_res_qpadt, cpuQpadtSize);
	max_lmem_linear(act, "resReadOnly", memAddresses[resReadOnly], resReadOnlyDatSize);
	max_queue_output(act,"cpu_res_from_res", dfe_res_res, passtorescount*sizeof(double)*4);

	double * rmsOut = (double *) malloc(16*sizeof(double));
	max_set_ticks(act,"AirfoilDFEUpdateKernel", (*domain).ncellcomputedfe);
	max_set_uint64t(act, "AirfoilDFEUpdateKernel", "numCells", (*domain).ncellcomputedfe);
	max_set_uint64t(act, "AirfoilDFEUpdateKernel", "doSaveQold", k==1);
	max_lmem_linear(act, "updateQ", memAddresses[q], memAddresses[q+1] - memAddresses[q]);
	max_lmem_linear(act, "updateQold", memAddresses[qold], memAddresses[qold+1] - memAddresses[qold]);
	max_queue_output(act,"rmsOut", rmsOut, 16*sizeof(double));
	if (k == 0) {
		max_ignore_lmem(act, "updateSaveQold");
	} else {
		max_lmem_linear(act, "updateSaveQold", memAddresses[qold], memAddresses[qold+1] - memAddresses[qold]);
	}

	max_ignore_lmem(act, "setupWrite");
	max_ignore_lmem(act, "qRead");

	max_run(engine, act);
	max_actions_free(act);


	for (int i = 0; i < 16; i++) (*rms) += rmsOut[i];

	cpuresind = 0;
	schedind = 0;
	for (int d = 1; d < (*domain).ndomain; d++){

		for (int res_edge_iter = 0; res_edge_iter < (*domain).nedge[d]; res_edge_iter++){

			int thispart = d;
			int thisind = res_edge_iter;

			for (int i = 0; i < 2; i++){
				int thiscellpart = (*domain).ecellpart[thispart][thisind*2+i];
				int thiscellind = (*domain).ecellind[thispart][thisind*2+i];
				if (reads[7*schedind+3] == 1){
					for (int j = 0; j < 4; j++) {
						(*domain).res[thiscellpart][4*thiscellind+j] += dfe_res_res[cpuresind*4+j];
					}
					cpuresind ++;
				}
				schedind ++ ;
			}
		}
	}
}
Ejemplo n.º 5
0
int main(int argc, char *argv[])
{
	(void) argc;
	(void) argv;
	max_file_t *maxfile = INIT_NAME();
	if(!maxfile) {
		printf("Failed to init MAX file\n");
		return -1;
	}

	max_config_set_bool(MAX_CONFIG_PRINTF_TO_STDOUT, true);

	const char *device_name = "*";
	printf("Opening device: %s\n", device_name);

	max_engine_t *engine = max_load(maxfile, device_name);
	if(!engine) {
		printf("Failed to open Max device\n");
		exit(-1);
	}

	max_reset_engine(engine);

	/*
	 * SLiC is so shit, that if we don't run an empty action, no debug outputs will be generated.
	 */
	max_actions_t *action = max_actions_init(maxfile, NULL);
	max_run(engine, action);
	max_actions_free(action);


	srand(time(NULL));
	single_entry_t *outputData = calloc(MAX_DEPTH, sizeof(single_entry_t));

	void *configWordBuffer = NULL;
	posix_memalign(&configWordBuffer, 4096, 512 * sizeof(configWord_t));
	max_llstream_t *configWordStream = max_llstream_setup(engine, "configWord", 512, sizeof(configWord_t), configWordBuffer);

	uint64_t configBase = 0;
	printf("Sending config word...\n");
	void *configWordSlot;
	while (max_llstream_write_acquire(configWordStream, 1, &configWordSlot) != 1) usleep(10);
	configWord_t *configWord = configWordSlot;
	configWord->wordCount = MAX_DEPTH;
	configWord->base = configBase;
	max_llstream_write(configWordStream, 1);

	getchar();



	printf("Streaming 'read_fifo'...\n"); fflush(stdout);
	action = max_actions_init(maxfile, NULL);
	max_queue_output(action, "read_fifo", outputData, sizeof(single_entry_t) * MAX_DEPTH);
	max_disable_reset(action);
	max_disable_validation(action);
	max_enable_partial_memory(action);
	max_run(engine, action);
	max_actions_free(action);

	printf("Comparing...\n"); fflush(stdout);
	uint8_t fail = 0;
	for (size_t entryIx=0; entryIx < MAX_DEPTH; entryIx++) {
		uint64_t *output = (uint64_t *)outputData[entryIx].data;
		size_t quadsPerEntry = sizeof(single_entry_t) / sizeof(uint64_t);

		uint64_t expected = (configBase + entryIx);
		if (expected != output[0]) {
			fail = 1;
			printf("[Entry: %zd, Quad: %zd] Mismatch: input 0x%lx, output 0x%lx\n", entryIx, 0L, expected, output[0]);
		}
		for (size_t q = 1; !fail && q < quadsPerEntry; q++) {
			if (0 != output[q]) {
				fail = 1;
				printf("[Entry: %zd, Quad: %zd] Mismatch: input 0x%lx, output 0x%lx\n", entryIx, q, 0L, output[q]);
			}
		}
	}

	printf("%s\n", fail ? "FAILED!" : "Success");
	return fail;
}
void my_process(int data_x_offset,const cateType* data_y,int mb_idx,real learning_rate){
    {
        int t = K_fw_l0_conv;
        load_engine(t);
        printf("Running on DFE: fw_l0_conv");
        mark_timer(false,1);
        max_actions_t* act = max_actions_init(max_files[t], "default");
        max_set_param_uint64t(act, "ni", 1);
        max_set_param_uint64t(act, "no", NKERS[0]);
        max_queue_input(act, "b", layer0_b, layer0_b_size);
        max_queue_input(act, "w", layer0_w, layer0_w_size);
        max_set_param_uint64t(act, "x_offset", data_x_offset+mb_idx*layer0_x_size);
        max_set_param_uint64t(act, "z_offset", layer0_z2_offset);
        max_run(max_engines[t], act);
        max_actions_free(act);
        mark_timer(true,1);
    }
    {
        int t = K_fw_l0_maxpool;
        load_engine(t);
        printf("Running on DFE: fw_l0_maxpool");
        mark_timer(false,1);
        max_actions_t* act = max_actions_init(max_files[t], "default");
        max_set_param_uint64t(act, "no", NKERS[0]);
        max_set_param_uint64t(act, "z2_offset", layer0_z2_offset);
        max_set_param_uint64t(act, "sel_offset", layer0_sel_offset);
        max_set_param_uint64t(act, "z_offset", layer0_z_offset);
        max_set_param_uint64t(act, "a_offset", layer0_a_offset);
        max_run(max_engines[t], act);
        max_actions_free(act);
        mark_timer(true,1);
    }
    {
        int t = K_fw_l1_conv;
        load_engine(t);
        printf("Running on DFE: fw_l1_conv");
        mark_timer(false,1);
        max_actions_t* act = max_actions_init(max_files[t], "default");
        max_set_param_uint64t(act, "ni", NKERS[0]);
        max_set_param_uint64t(act, "no", NKERS[1]);
        max_queue_input(act, "b", layer1_b, layer1_b_size);
        max_queue_input(act, "w", layer1_w, layer1_w_size);
        max_set_param_uint64t(act, "x_offset", layer1_x_offset);
        max_set_param_uint64t(act, "z_offset", layer1_z2_offset);
        max_run(max_engines[t], act);
        max_actions_free(act);
        mark_timer(true,1);
    }
    {
        int t = K_fw_l1_maxpool;
        load_engine(t);
        printf("Running on DFE: fw_l1_maxpool");
        mark_timer(false,1);
        max_actions_t* act = max_actions_init(max_files[t], "default");
        max_set_param_uint64t(act, "no", NKERS[1]);
        max_set_param_uint64t(act, "z2_offset", layer1_z2_offset);
        max_set_param_uint64t(act, "sel_offset", layer1_sel_offset);
        max_set_param_uint64t(act, "z_offset", layer1_z_offset);
        max_set_param_uint64t(act, "a_offset", layer1_a_offset);
        max_run(max_engines[t], act);
        max_actions_free(act);
        mark_timer(true,1);
    }
    {
        int t = K_fw_l3_softmax;
        load_engine(t);
        printf("Running on DFE: fw_l3_softmax");
        mark_timer(false,1);
        max_actions_t* act = max_actions_init(max_files[t], "default");
        max_set_param_uint64t(act, "ni", NKERS[2]);
        max_set_param_uint64t(act, "x_offset", layer3_x_offset);
        max_queue_input(act, "w", layer3_w, layer3_w_size);
        max_queue_input(act, "b", layer3_b, layer3_b_size);
        max_set_param_uint64t(act, "softmax_offset", layer3_sm_offset);
        max_queue_output(act, "pred", layer3_pred, layer3_pred_size);
        max_run(max_engines[t], act);
        max_actions_free(act);
        mark_timer(true,1);
    }
    //TODO: learning rate<0 exit
    {
        int t = K_bp_l3_softmax;
        load_engine(t);
        printf("Running on DFE: bp_l3_softmax");
        mark_timer(false,1);
        max_actions_t* act = max_actions_init(max_files[t], "default");
        max_set_param_uint64t(act, "ni", NKERS[2]);
        max_set_param_uint64t(act, "x_offset", layer3_x_offset);
        max_queue_input(act, "w", layer3_w, layer3_w_size);
        max_set_param_uint64t(act, "softmax_offset", layer3_sm_offset);
        max_queue_input(act, "std", data_y+mb_idx*layer3_pred_size, layer3_pred_size);
        max_queue_output(act, "w_grad", layer3_w_grad, layer3_w_grad_size);
        max_queue_output(act, "b_grad", layer3_b_grad, layer3_b_grad_size);
        max_set_param_uint64t(act, "x_grad_offset", layer3_x_grad_offset);
        max_run(max_engines[t], act);
        max_actions_free(act);
        mark_timer(true,1);
    }
    {
        int t = K_bp_l1_maxpool;
        load_engine(t);
        printf("Running on DFE: bp_l1_maxpool");
        mark_timer(false,1);
        max_actions_t* act = max_actions_init(max_files[t], "default");
        max_set_param_uint64t(act, "no", NKERS[1]);
        max_set_param_uint64t(act, "a_grad_offset", layer1_a_grad_offset);
        max_set_param_uint64t(act, "z_offset", layer1_z_offset);
        max_set_param_uint64t(act, "sel_offset", layer1_sel_offset);
        max_set_param_uint64t(act, "z2_grad_offset", layer1_z2_grad_offset);
        max_run(max_engines[t], act);
        max_actions_free(act);
        mark_timer(true,1);
    }
    {
        int t = K_bp_l1_conv;
        load_engine(t);
        printf("Running on DFE: bp_l1_conv");
        mark_timer(false,1);
        max_actions_t* act = max_actions_init(max_files[t], "default");
        max_set_param_uint64t(act, "ni", NKERS[0]);
        max_set_param_uint64t(act, "no", NKERS[1]);
        max_set_param_uint64t(act, "z_grad_offset", layer1_z2_grad_offset);
        max_set_param_uint64t(act, "x_offset", layer1_x_offset);
        max_set_param_uint64t(act, "x_grad_offset", layer1_x_grad_offset);
        max_queue_input(act, "w", layer1_w, layer1_w_size);
        max_queue_output(act, "w_grad", layer1_w_grad, layer1_w_grad_size);
        max_run(max_engines[t], act);
        max_actions_free(act);
        mark_timer(true,1);
    }
    {
        int t = K_bp_l0_maxpool;
        load_engine(t);
        printf("Running on DFE: bp_l0_maxpool");
        mark_timer(false,1);
        max_actions_t* act = max_actions_init(max_files[t], "default");
        max_set_param_uint64t(act, "no", NKERS[0]);
        max_set_param_uint64t(act, "a_grad_offset", layer0_a_grad_offset);
        max_set_param_uint64t(act, "z_offset", layer0_z_offset);
        max_set_param_uint64t(act, "sel_offset", layer0_sel_offset);
        max_set_param_uint64t(act, "z2_grad_offset", layer0_z2_grad_offset);
        max_run(max_engines[t], act);
        max_actions_free(act);
        mark_timer(true,1);
    }
    {
        int t = K_bp_l0_conv;
        load_engine(t);
        printf("Running on DFE: bp_l0_conv");
        mark_timer(false,1);
        max_actions_t* act = max_actions_init(max_files[t], "default");
        max_set_param_uint64t(act, "ni", 1);
        max_set_param_uint64t(act, "no", NKERS[0]);
        max_set_param_uint64t(act, "z_grad_offset", layer0_z2_grad_offset);
        max_set_param_uint64t(act, "x_offset", data_x_offset+mb_idx*layer0_x_size);
        max_set_param_uint64t(act, "x_grad_offset", layer0_x_grad_offset);
        max_queue_input(act, "w", layer0_w, layer0_w_size);
        max_queue_output(act, "w_grad", layer0_w_grad, layer0_w_grad_size);
        max_run(max_engines[t], act);
        max_actions_free(act);
        mark_timer(true,1);
    }
}