int main(int argc, char *argv[]) { if(argc < 3) { printf("Usage: $0 dfe_ip cpu_ip\n"); return 1; } struct in_addr dfe_ip; inet_aton(argv[1], &dfe_ip); struct in_addr cpu_ip; inet_aton(argv[2], &cpu_ip); struct in_addr netmask; inet_aton("255.255.255.0", &netmask); const int port = 5007; max_file_t *maxfile = Tracker_init(); max_engine_t * engine = max_load(maxfile, "*"); max_config_set_bool(MAX_CONFIG_PRINTF_TO_STDOUT, true); max_actions_t *actions = max_actions_init(maxfile, NULL); char regName[32]; for (int i=0; i < 1024; i++) { sprintf(regName, "filter_%d", i); if (i == 150) { max_set_uint64t(actions, "filteringKernel", regName, 0xCC /* a value to match... */); } else { max_set_uint64t(actions, "filteringKernel", regName, 0x4D1B /* or any value you want */); } } max_run(engine, actions); max_actions_free(actions); void *buffer; size_t bufferSize = 4096 * 512; posix_memalign(&buffer, 4096, bufferSize); max_framed_stream_t *toCpu = max_framed_stream_setup(engine, "toCPU", buffer, bufferSize, -1); /* * This executable both creates a normal Linux UDP socket as well as a DFE UDP Socket. * We then exchange data between the two. */ // DFE Socket max_ip_config(engine, MAX_NET_CONNECTION_QSFP_TOP_10G_PORT1, &dfe_ip, &netmask); max_udp_socket_t *dfe_socket = max_udp_create_socket(engine, "udpTopPort1"); max_udp_bind(dfe_socket, port); max_udp_connect(dfe_socket, &cpu_ip, port); // Linux Socket int cpu_socket = create_cpu_udp_socket(&cpu_ip, &dfe_ip, port); printf("Sending test frame...\n"); sendTestFrame(cpu_socket); printf("Waiting for kernel response...\n"); fflush(stdout); void *f; size_t fsz; size_t numMessageRx = 0; uint8_t received_data[512]; while (numMessageRx < NUM_MESSAGES_EXPECTED) { if (max_framed_stream_read(toCpu, 1, &f, &fsz) == 1) { printf("CPU: Got output frame - size %zd - NumMsg = %zd!\n", fsz, numMessageRx); // Frame size would be rounded up to the next 8 bytes. memcpy(received_data, f, fsz); numMessageRx++; max_framed_stream_discard(toCpu, 1); } else usleep(10); } max_udp_close(dfe_socket); max_unload(engine); max_file_free(maxfile); printf("Done.\n"); fflush(stdout); return 0; }
int main(int argc, char *argv[]) { max_file_t *maxfile = Gap_init(); max_engine_t * engine = max_load(maxfile, "*"); max_config_set_bool(MAX_CONFIG_PRINTF_TO_STDOUT, true); max_actions_t *action = max_actions_init(maxfile, NULL); max_run(engine, action); size_t bufferSize = 4096 * 4096; void *inBuffer = NULL; void *outBuffer = NULL; if (posix_memalign(&inBuffer, 4096, bufferSize)) { err(1, "Couldn't allocation input buffer"); } if (posix_memalign(&outBuffer, 4096, bufferSize)) { err(1, "Couldn't allocation output buffer"); } max_framed_stream_t *inFrame = max_framed_stream_setup(engine, "src", inBuffer, bufferSize, 2048-16); max_framed_stream_t *outFrame = max_framed_stream_setup(engine, "dst", outBuffer, bufferSize, -1); // Now, stream in some frames and see what happens. for (size_t i=0 ; i < 8; i++) { void *f; while (max_framed_stream_write_acquire(inFrame, 1, &f) != 1) usleep(10); uint8_t *inputData = f; /* * Request a gap every other packet */ inputData[20] = i % 2 == 1 ? 'G' : 'N'; size_t frameSize = 60; printf("Sending frame %zd\n", i); max_framed_stream_write(inFrame, 1, &frameSize); void *oFrame; size_t oFrameSize; while (max_framed_stream_read(outFrame, 1, &oFrame, &oFrameSize) != 1) usleep(10); printf("Got frame %zd - %zd bytes (Expecting %zd)\n", i, oFrameSize, frameSize); dump(oFrame, oFrameSize); max_framed_stream_discard(outFrame, 1); } max_unload(engine); max_file_free(maxfile); printf("Done.\n"); return 0; }
/** * Runs the main action to compute a predictor or corrector step */ void AirfoilDFEInterface::runMainAction(int k, double cfl, double gam, double gm1, double eps, double *rms) { int cpuresind = 0; int schedind = 0; for (int d = 1; d < (*domain).ndomain; d++){ for (int res_edge_iter = 0; res_edge_iter < (*domain).nedge[d]; res_edge_iter++){ int thispart = d; int thisind = res_edge_iter; for (int i = 0; i < 2; i++){ int thiscellpart = (*domain).ecellpart[thispart][thisind*2+i]; int thiscellind = (*domain).ecellind[thispart][thisind*2+i]; if (reads[7*schedind+3] == 1){ for (int j = 0; j < 4; j++) { cpu_res_qpadt[cpuresind*5+j] = (*domain).q[thiscellpart][4*thiscellind+j]; } cpu_res_qpadt[cpuresind*5+4] = (*domain).adt[thiscellpart][thiscellind]; cpuresind ++; } schedind ++ ; } } } max_actions_t * act = max_actions_init(maxfile, NULL); max_set_ticks(act, "AirfoilDFEAdtKernel", (*domain).ncellcomputedfe); max_set_uint64t(act, "AirfoilDFEAdtKernel", "numTicks", (*domain).ncellcomputedfe); max_set_double(act, "AirfoilDFEAdtKernel", "cfl", cfl); max_set_double(act, "AirfoilDFEAdtKernel", "gam", gam); max_set_double(act, "AirfoilDFEAdtKernel", "gm1", gm1); max_lmem_linear(act, "adtQ", memAddresses[q], memAddresses[q+1] - memAddresses[q]); max_lmem_linear(act, "adtDxRead", memAddresses[adtDx], adtDxDatSize); max_set_ticks(act, "AirfoilDFEResKernel", resFlushTicks); max_set_double(act, "AirfoilDFEResKernel", "gm1", gm1); max_set_double(act, "AirfoilDFEResKernel", "eps", eps); max_set_uint64t(act, "AirfoilDFEResKernel", "nTicks", resFlushTicks); max_queue_input(act, "cpu_qpadt_to_res", cpu_res_qpadt, cpuQpadtSize); max_lmem_linear(act, "resReadOnly", memAddresses[resReadOnly], resReadOnlyDatSize); max_queue_output(act,"cpu_res_from_res", dfe_res_res, passtorescount*sizeof(double)*4); double * rmsOut = (double *) malloc(16*sizeof(double)); max_set_ticks(act,"AirfoilDFEUpdateKernel", (*domain).ncellcomputedfe); max_set_uint64t(act, "AirfoilDFEUpdateKernel", "numCells", (*domain).ncellcomputedfe); max_set_uint64t(act, "AirfoilDFEUpdateKernel", "doSaveQold", k==1); max_lmem_linear(act, "updateQ", memAddresses[q], memAddresses[q+1] - memAddresses[q]); max_lmem_linear(act, "updateQold", memAddresses[qold], memAddresses[qold+1] - memAddresses[qold]); max_queue_output(act,"rmsOut", rmsOut, 16*sizeof(double)); if (k == 0) { max_ignore_lmem(act, "updateSaveQold"); } else { max_lmem_linear(act, "updateSaveQold", memAddresses[qold], memAddresses[qold+1] - memAddresses[qold]); } max_ignore_lmem(act, "setupWrite"); max_ignore_lmem(act, "qRead"); max_run(engine, act); max_actions_free(act); for (int i = 0; i < 16; i++) (*rms) += rmsOut[i]; cpuresind = 0; schedind = 0; for (int d = 1; d < (*domain).ndomain; d++){ for (int res_edge_iter = 0; res_edge_iter < (*domain).nedge[d]; res_edge_iter++){ int thispart = d; int thisind = res_edge_iter; for (int i = 0; i < 2; i++){ int thiscellpart = (*domain).ecellpart[thispart][thisind*2+i]; int thiscellind = (*domain).ecellind[thispart][thisind*2+i]; if (reads[7*schedind+3] == 1){ for (int j = 0; j < 4; j++) { (*domain).res[thiscellpart][4*thiscellind+j] += dfe_res_res[cpuresind*4+j]; } cpuresind ++; } schedind ++ ; } } } }
/** * The initial action to set up arrays in lmem for main compute */ void AirfoilDFEInterface::runSetupAction () { max_actions_t * act; act = max_actions_init(maxfile, NULL); max_queue_input(act, "setupCPU", dfeAdtDX, adtDxDatSize); max_lmem_linear(act, "setupWrite", memAddresses[adtDx], adtDxDatSize); max_ignore_lmem(act, "adtDxRead"); max_ignore_lmem(act, "resReadOnly"); max_ignore_lmem(act, "qRead"); max_ignore_lmem(act, "updateQ"); max_ignore_lmem(act, "updateQold"); max_ignore_lmem(act, "adtQ"); max_ignore_lmem(act, "updateSaveQold"); max_ignore_kernel(act, "AirfoilDFEResKernel"); max_ignore_kernel(act, "AirfoilDFEAdtKernel"); max_ignore_kernel(act, "AirfoilDFEUpdateKernel"); max_run(engine, act); max_actions_free(act); act = max_actions_init(maxfile, NULL); max_queue_input(act, "setupCPU", dfeQ, qDatSize); max_lmem_linear(act, "setupWrite", memAddresses[q], qDatSize); max_ignore_lmem(act, "adtDxRead"); max_ignore_lmem(act, "resReadOnly"); max_ignore_lmem(act, "qRead"); max_ignore_lmem(act, "updateQ"); max_ignore_lmem(act, "updateQold"); max_ignore_lmem(act, "updateSaveQold"); max_ignore_lmem(act, "adtQ"); max_ignore_kernel(act, "AirfoilDFEResKernel"); max_ignore_kernel(act, "AirfoilDFEAdtKernel"); max_ignore_kernel(act, "AirfoilDFEUpdateKernel"); max_run(engine, act); max_actions_free(act); act = max_actions_init(maxfile, NULL); max_queue_input(act, "setupCPU", dfeQ, qDatSize); max_lmem_linear(act, "setupWrite", memAddresses[qold], qDatSize); max_ignore_lmem(act, "adtDxRead"); max_ignore_lmem(act, "resReadOnly"); max_ignore_lmem(act, "qRead"); max_ignore_lmem(act, "updateQ"); max_ignore_lmem(act, "updateQold"); max_ignore_lmem(act, "adtQ"); max_ignore_lmem(act, "updateSaveQold"); max_ignore_kernel(act, "AirfoilDFEResKernel"); max_ignore_kernel(act, "AirfoilDFEAdtKernel"); max_ignore_kernel(act, "AirfoilDFEUpdateKernel"); max_run(engine, act); max_actions_free(act); act = max_actions_init(maxfile, NULL); max_queue_input(act, "setupCPU", dfeResReadOnly, resReadOnlyDatSize); max_lmem_linear(act, "setupWrite", memAddresses[resReadOnly], resReadOnlyDatSize); max_ignore_lmem(act, "adtDxRead"); max_ignore_lmem(act, "resReadOnly"); max_ignore_lmem(act, "qRead"); max_ignore_lmem(act, "updateQ"); max_ignore_lmem(act, "updateQold"); max_ignore_lmem(act, "adtQ"); max_ignore_lmem(act, "updateSaveQold"); max_ignore_kernel(act, "AirfoilDFEResKernel"); max_ignore_kernel(act, "AirfoilDFEAdtKernel"); max_ignore_kernel(act, "AirfoilDFEUpdateKernel"); max_run(engine, act); max_actions_free(act); }
int main(int argc, char *argv[]) { if(argc < 3) { printf("Usage: $0 dfe_ip remote_ip\n"); return 1; } struct in_addr dfe_ip; inet_aton(argv[1], &dfe_ip); struct in_addr remote_ip; inet_aton(argv[2], &remote_ip); struct in_addr netmask; inet_aton("255.255.255.0", &netmask); const int in_port = 2000; const int out_port = 2000; // struct in_addr mcastaddr; // inet_aton("224.0.0.1", &mcastaddr); max_file_t *maxfile = SignExtWithPatternMatching_init(); max_engine_t * engine = max_load(maxfile, "*"); max_config_set_bool(MAX_CONFIG_PRINTF_TO_STDOUT, true); max_actions_t *actions = max_actions_init(maxfile, NULL); max_run(engine, actions); max_actions_free(actions); void *buffer; size_t bufferSize = 4096 * 512; posix_memalign(&buffer, 4096, bufferSize); max_framed_stream_t *toCpu = max_framed_stream_setup(engine, "toCPU", buffer, bufferSize, -1); max_ip_config(engine, MAX_NET_CONNECTION_QSFP_TOP_10G_PORT1, &dfe_ip, &netmask); max_udp_socket_t *dfe_socket = max_udp_create_socket(engine, "udpTopPort1"); // max_ip_multicast_join_group(engine, MAX_NET_CONNECTION_QSFP_TOP_10G_PORT1, &mcastaddr); // max_udp_bind_ip(dfe_socket, &mcastaddr, in_port); max_udp_bind(dfe_socket, in_port); max_udp_connect(dfe_socket, &remote_ip, out_port); printf("Listening on %s in_port %d\n", argv[1], in_port); printf("Waiting for kernel response...\n"); fflush(stdout); void *f; size_t fsz; size_t numMessageRx = 0; while (1) { if (max_framed_stream_read(toCpu, 1, &f, &fsz) == 1) { numMessageRx++; printf("CPU: Got output frame %zd - size %zd bytes\n", numMessageRx, fsz); uint64_t *w = f; for (size_t i=0; i < 3; i++) { printf("Frame [%zd] Word[%zd]: 0x%lx\n", numMessageRx, i, w[i]); } max_framed_stream_discard(toCpu, 1); } else usleep(10); } // max_ip_multicast_leave_group(engine, MAX_NET_CONNECTION_QSFP_TOP_10G_PORT1, &mcastaddr); max_udp_close(dfe_socket); max_unload(engine); max_file_free(maxfile); printf("Done.\n"); fflush(stdout); return 0; }
int main(int argc, char *argv[]) { (void) argc; (void) argv; max_file_t *maxfile = INIT_NAME(); if(!maxfile) { printf("Failed to init MAX file\n"); return -1; } max_config_set_bool(MAX_CONFIG_PRINTF_TO_STDOUT, true); const char *device_name = "*"; printf("Opening device: %s\n", device_name); max_engine_t *engine = max_load(maxfile, device_name); if(!engine) { printf("Failed to open Max device\n"); exit(-1); } max_reset_engine(engine); /* * SLiC is so shit, that if we don't run an empty action, no debug outputs will be generated. */ max_actions_t *action = max_actions_init(maxfile, NULL); max_run(engine, action); max_actions_free(action); srand(time(NULL)); single_entry_t *outputData = calloc(MAX_DEPTH, sizeof(single_entry_t)); void *configWordBuffer = NULL; posix_memalign(&configWordBuffer, 4096, 512 * sizeof(configWord_t)); max_llstream_t *configWordStream = max_llstream_setup(engine, "configWord", 512, sizeof(configWord_t), configWordBuffer); uint64_t configBase = 0; printf("Sending config word...\n"); void *configWordSlot; while (max_llstream_write_acquire(configWordStream, 1, &configWordSlot) != 1) usleep(10); configWord_t *configWord = configWordSlot; configWord->wordCount = MAX_DEPTH; configWord->base = configBase; max_llstream_write(configWordStream, 1); getchar(); printf("Streaming 'read_fifo'...\n"); fflush(stdout); action = max_actions_init(maxfile, NULL); max_queue_output(action, "read_fifo", outputData, sizeof(single_entry_t) * MAX_DEPTH); max_disable_reset(action); max_disable_validation(action); max_enable_partial_memory(action); max_run(engine, action); max_actions_free(action); printf("Comparing...\n"); fflush(stdout); uint8_t fail = 0; for (size_t entryIx=0; entryIx < MAX_DEPTH; entryIx++) { uint64_t *output = (uint64_t *)outputData[entryIx].data; size_t quadsPerEntry = sizeof(single_entry_t) / sizeof(uint64_t); uint64_t expected = (configBase + entryIx); if (expected != output[0]) { fail = 1; printf("[Entry: %zd, Quad: %zd] Mismatch: input 0x%lx, output 0x%lx\n", entryIx, 0L, expected, output[0]); } for (size_t q = 1; !fail && q < quadsPerEntry; q++) { if (0 != output[q]) { fail = 1; printf("[Entry: %zd, Quad: %zd] Mismatch: input 0x%lx, output 0x%lx\n", entryIx, q, 0L, output[q]); } } } printf("%s\n", fail ? "FAILED!" : "Success"); return fail; }
void my_process(int data_x_offset,const cateType* data_y,int mb_idx,real learning_rate){ { int t = K_fw_l0_conv; load_engine(t); printf("Running on DFE: fw_l0_conv"); mark_timer(false,1); max_actions_t* act = max_actions_init(max_files[t], "default"); max_set_param_uint64t(act, "ni", 1); max_set_param_uint64t(act, "no", NKERS[0]); max_queue_input(act, "b", layer0_b, layer0_b_size); max_queue_input(act, "w", layer0_w, layer0_w_size); max_set_param_uint64t(act, "x_offset", data_x_offset+mb_idx*layer0_x_size); max_set_param_uint64t(act, "z_offset", layer0_z2_offset); max_run(max_engines[t], act); max_actions_free(act); mark_timer(true,1); } { int t = K_fw_l0_maxpool; load_engine(t); printf("Running on DFE: fw_l0_maxpool"); mark_timer(false,1); max_actions_t* act = max_actions_init(max_files[t], "default"); max_set_param_uint64t(act, "no", NKERS[0]); max_set_param_uint64t(act, "z2_offset", layer0_z2_offset); max_set_param_uint64t(act, "sel_offset", layer0_sel_offset); max_set_param_uint64t(act, "z_offset", layer0_z_offset); max_set_param_uint64t(act, "a_offset", layer0_a_offset); max_run(max_engines[t], act); max_actions_free(act); mark_timer(true,1); } { int t = K_fw_l1_conv; load_engine(t); printf("Running on DFE: fw_l1_conv"); mark_timer(false,1); max_actions_t* act = max_actions_init(max_files[t], "default"); max_set_param_uint64t(act, "ni", NKERS[0]); max_set_param_uint64t(act, "no", NKERS[1]); max_queue_input(act, "b", layer1_b, layer1_b_size); max_queue_input(act, "w", layer1_w, layer1_w_size); max_set_param_uint64t(act, "x_offset", layer1_x_offset); max_set_param_uint64t(act, "z_offset", layer1_z2_offset); max_run(max_engines[t], act); max_actions_free(act); mark_timer(true,1); } { int t = K_fw_l1_maxpool; load_engine(t); printf("Running on DFE: fw_l1_maxpool"); mark_timer(false,1); max_actions_t* act = max_actions_init(max_files[t], "default"); max_set_param_uint64t(act, "no", NKERS[1]); max_set_param_uint64t(act, "z2_offset", layer1_z2_offset); max_set_param_uint64t(act, "sel_offset", layer1_sel_offset); max_set_param_uint64t(act, "z_offset", layer1_z_offset); max_set_param_uint64t(act, "a_offset", layer1_a_offset); max_run(max_engines[t], act); max_actions_free(act); mark_timer(true,1); } { int t = K_fw_l3_softmax; load_engine(t); printf("Running on DFE: fw_l3_softmax"); mark_timer(false,1); max_actions_t* act = max_actions_init(max_files[t], "default"); max_set_param_uint64t(act, "ni", NKERS[2]); max_set_param_uint64t(act, "x_offset", layer3_x_offset); max_queue_input(act, "w", layer3_w, layer3_w_size); max_queue_input(act, "b", layer3_b, layer3_b_size); max_set_param_uint64t(act, "softmax_offset", layer3_sm_offset); max_queue_output(act, "pred", layer3_pred, layer3_pred_size); max_run(max_engines[t], act); max_actions_free(act); mark_timer(true,1); } //TODO: learning rate<0 exit { int t = K_bp_l3_softmax; load_engine(t); printf("Running on DFE: bp_l3_softmax"); mark_timer(false,1); max_actions_t* act = max_actions_init(max_files[t], "default"); max_set_param_uint64t(act, "ni", NKERS[2]); max_set_param_uint64t(act, "x_offset", layer3_x_offset); max_queue_input(act, "w", layer3_w, layer3_w_size); max_set_param_uint64t(act, "softmax_offset", layer3_sm_offset); max_queue_input(act, "std", data_y+mb_idx*layer3_pred_size, layer3_pred_size); max_queue_output(act, "w_grad", layer3_w_grad, layer3_w_grad_size); max_queue_output(act, "b_grad", layer3_b_grad, layer3_b_grad_size); max_set_param_uint64t(act, "x_grad_offset", layer3_x_grad_offset); max_run(max_engines[t], act); max_actions_free(act); mark_timer(true,1); } { int t = K_bp_l1_maxpool; load_engine(t); printf("Running on DFE: bp_l1_maxpool"); mark_timer(false,1); max_actions_t* act = max_actions_init(max_files[t], "default"); max_set_param_uint64t(act, "no", NKERS[1]); max_set_param_uint64t(act, "a_grad_offset", layer1_a_grad_offset); max_set_param_uint64t(act, "z_offset", layer1_z_offset); max_set_param_uint64t(act, "sel_offset", layer1_sel_offset); max_set_param_uint64t(act, "z2_grad_offset", layer1_z2_grad_offset); max_run(max_engines[t], act); max_actions_free(act); mark_timer(true,1); } { int t = K_bp_l1_conv; load_engine(t); printf("Running on DFE: bp_l1_conv"); mark_timer(false,1); max_actions_t* act = max_actions_init(max_files[t], "default"); max_set_param_uint64t(act, "ni", NKERS[0]); max_set_param_uint64t(act, "no", NKERS[1]); max_set_param_uint64t(act, "z_grad_offset", layer1_z2_grad_offset); max_set_param_uint64t(act, "x_offset", layer1_x_offset); max_set_param_uint64t(act, "x_grad_offset", layer1_x_grad_offset); max_queue_input(act, "w", layer1_w, layer1_w_size); max_queue_output(act, "w_grad", layer1_w_grad, layer1_w_grad_size); max_run(max_engines[t], act); max_actions_free(act); mark_timer(true,1); } { int t = K_bp_l0_maxpool; load_engine(t); printf("Running on DFE: bp_l0_maxpool"); mark_timer(false,1); max_actions_t* act = max_actions_init(max_files[t], "default"); max_set_param_uint64t(act, "no", NKERS[0]); max_set_param_uint64t(act, "a_grad_offset", layer0_a_grad_offset); max_set_param_uint64t(act, "z_offset", layer0_z_offset); max_set_param_uint64t(act, "sel_offset", layer0_sel_offset); max_set_param_uint64t(act, "z2_grad_offset", layer0_z2_grad_offset); max_run(max_engines[t], act); max_actions_free(act); mark_timer(true,1); } { int t = K_bp_l0_conv; load_engine(t); printf("Running on DFE: bp_l0_conv"); mark_timer(false,1); max_actions_t* act = max_actions_init(max_files[t], "default"); max_set_param_uint64t(act, "ni", 1); max_set_param_uint64t(act, "no", NKERS[0]); max_set_param_uint64t(act, "z_grad_offset", layer0_z2_grad_offset); max_set_param_uint64t(act, "x_offset", data_x_offset+mb_idx*layer0_x_size); max_set_param_uint64t(act, "x_grad_offset", layer0_x_grad_offset); max_queue_input(act, "w", layer0_w, layer0_w_size); max_queue_output(act, "w_grad", layer0_w_grad, layer0_w_grad_size); max_run(max_engines[t], act); max_actions_free(act); mark_timer(true,1); } }