void load_maxfiles(){ fprintf(stdout,"Init maxfiles\n"); max_files[K_fw_l0_conv] = CNN_FW_Conv_V0_DP_L0_0_init(); max_files[K_fw_l0_maxpool] = CNN_FW_MaxPool_V0_DP_L0_0_init(); max_files[K_fw_l1_conv] = CNN_FW_Conv_V0_DP_L1_0_init(); max_files[K_fw_l1_maxpool] = CNN_FW_MaxPool_V0_DP_L1_0_init(); max_files[K_fw_l2_mlp] = NULL; max_files[K_fw_l3_softmax] = CNN_FW_Softmax_V0_DP_L3_0_init(); max_files[K_bp_l3_softmax] = CNN_BP_Softmax_V0_DP_L3_0_init(); max_files[K_bp_l2_mlp] = NULL; max_files[K_bp_l1_maxpool] = CNN_BP_MaxPool_V0_DP_L1_0_init(); max_files[K_bp_l1_conv] = CNN_BP_Conv_V0_DP_L1_0_init(); max_files[K_bp_l0_maxpool] = CNN_BP_MaxPool_V0_DP_L0_0_init(); max_files[K_bp_l0_conv] = CNN_BP_Conv_V0_DP_L0_0_init(); for (int i=0;i<K_TOTAL;++i){ max_engines[i] = NULL; } cur_engine = -1; int t = K_fw_l0_conv; load_engine(t); { fprintf(stdout,"Writing to LMem : train_set_x\n"); max_actions_t* act; act = max_actions_init(max_files[t], "writeLMem"); max_set_param_uint64t(act, "offset", train_set_x_offset); max_set_param_uint64t(act, "size", train_set_x_size); max_queue_input(act, "cpu_to_lmem_at_cpu", train_set_x, train_set_x_size); max_run(max_engines[t], act); max_actions_free(act); } { fprintf(stdout,"Writing to LMem : valid_set_x\n"); max_actions_t* act; act = max_actions_init(max_files[t], "writeLMem"); max_set_param_uint64t(act, "offset", valid_set_x_offset); max_set_param_uint64t(act, "size", valid_set_x_size); max_queue_input(act, "cpu_to_lmem_at_cpu", valid_set_x, valid_set_x_size); max_run(max_engines[t], act); max_actions_free(act); } { fprintf(stdout,"Writing to LMem : test_set_x\n"); max_actions_t* act; act = max_actions_init(max_files[t], "writeLMem"); max_set_param_uint64t(act, "offset", test_set_x_offset); max_set_param_uint64t(act, "size", test_set_x_size); max_queue_input(act, "cpu_to_lmem_at_cpu", test_set_x, test_set_x_size); max_run(max_engines[t], act); max_actions_free(act); } }
void writeDataToLMem(uint64_t *dataIn, int size, int sizeBytes, int burstLengthInBytes, max_engine_t *engine, max_file_t *maxfile) { printf("size=%d, sizeBytes=%d, burstLengthInBytes=%d\n", size, sizeBytes, burstLengthInBytes); printf("Performing max_actions_init()\n"); max_actions_t *actions = max_actions_init(maxfile, NULL); printf("Done\n"); max_set_ticks(actions, "KernelLMem_Write_CommandAndDataStream", size); max_set_uint64t(actions, "KernelLMem_Write_CommandAndDataStream", "totalBursts", size * 8 / burstLengthInBytes); max_set_uint64t(actions, "KernelLMem_Write_CommandAndDataStream", "wordsPerBurst", burstLengthInBytes / 8); max_set_ticks(actions, "KernelLMem_Read_CommandAndDataStream", 0); max_set_uint64t(actions, "KernelLMem_Read_CommandAndDataStream", "totalBursts", size * 8 / burstLengthInBytes); max_set_uint64t(actions, "KernelLMem_Read_CommandAndDataStream", "wordsPerBurst", burstLengthInBytes / 8); max_run(engine, actions); max_reset_engine(engine); max_queue_input(actions, "fromCpu", dataIn, sizeBytes); max_lmem_set_interrupt_on(actions, "toLmem"); printf("Performing max_run()\n"); max_run(engine, actions); printf("Done\n"); max_actions_free(actions); }
void readDataFromLMem(uint64_t *dataOut, int size, int sizeBytes, int burstLengthInBytes, max_engine_t *engine, max_file_t *maxfile) { max_actions_t *actions = max_actions_init(maxfile, NULL); max_set_ticks(actions, "KernelLMem_Write_CommandAndDataStream", 0); max_set_uint64t(actions, "KernelLMem_Write_CommandAndDataStream", "totalBursts", size * 8 / burstLengthInBytes); max_set_uint64t(actions, "KernelLMem_Write_CommandAndDataStream", "wordsPerBurst", burstLengthInBytes / 8); max_set_ticks(actions, "KernelLMem_Read_CommandAndDataStream", size); max_set_uint64t(actions, "KernelLMem_Read_CommandAndDataStream", "totalBursts", size * 8 / burstLengthInBytes); max_set_uint64t(actions, "KernelLMem_Read_CommandAndDataStream", "wordsPerBurst", burstLengthInBytes / 8); max_run(engine, actions); max_reset_engine(engine); max_queue_output(actions, "toCpu", dataOut, sizeBytes); max_run(engine, actions); max_actions_free(actions); }
void AirfoilDFEInterface::runOutputAction () { max_actions_t * act; act = max_actions_init(maxfile, NULL); max_queue_output(act, "qCPUOut", dfeQ, qDatSize); max_lmem_linear(act, "qRead", memAddresses[q], qDatSize); max_ignore_lmem(act, "setupWrite"); max_ignore_lmem(act, "updateQ"); max_ignore_lmem(act, "updateQold"); max_ignore_lmem(act, "updateSaveQold"); max_ignore_lmem(act, "adtQ"); max_ignore_lmem(act, "adtDxRead"); max_ignore_lmem(act, "resReadOnly"); max_ignore_kernel(act, "AirfoilDFEResKernel"); max_ignore_kernel(act, "AirfoilDFEAdtKernel"); max_ignore_kernel(act, "AirfoilDFEUpdateKernel"); max_run(engine, act); max_actions_free(act); int thispart = 1; int thisind = 0; for (int i = 0; i < (*domain).ncellcomputedfe; i++){ int cellpart = readlocs[2*i]; int cellind = readlocs[2*i+1]; for (int j = 0; j < 4; j ++) (*domain).q[cellpart][cellind*4+j] = dfeQ[i*4+j]; thisind++; if (thisind == (*domain).ncell[thispart]){ thispart++; thisind = 0; } } }
int main(int argc, char *argv[]) { if (argc != 3) { printf("Usage: %s <dfe_ip> <netmask>\n", argv[0]); return 1; } uint16_t Nsockets = 1; const int port = 80; struct in_addr dfe_ip; inet_aton(argv[1], &dfe_ip); struct in_addr netmask; inet_aton(argv[2], &netmask); // initialization files for crcIndex table, generated by init_code char fileCrcIndex1[] = "./results/romCrcIndex1_init.html"; char fileCrcIndex2[] = "./results/romCrcIndex2_init.html"; // LMEM initialization file location, generated by init_code char fileLmem[] = "./results/lmem_generated_file.html"; uint64_t *arrCrc1; uint64_t *arrCrc2; long Lcrc; FILE *fpCrc1 = fopen(fileCrcIndex1, "rb"); FILE *fpCrc2 = fopen(fileCrcIndex2, "rb"); FILE *fpLmem = fopen(fileLmem, "rb"); if (!(fpCrc1 && fpCrc2 && fpLmem)) { printf("Error with file\n"); exit(0); } // obtain file size fseek(fpCrc1, 0, SEEK_END); Lcrc = ftell(fpCrc1); rewind(fpCrc1); fillRomCrcIndex(fpCrc1, &arrCrc1, Lcrc); fillRomCrcIndex(fpCrc2, &arrCrc2, Lcrc); printf("Preparing for init() and max_load()\n"); max_file_t *maxfile = httpserver_init(); max_engine_t * engine = max_load(maxfile, "*"); printf("Done\n"); max_actions_t *actions = max_actions_init(maxfile, NULL); int romDepthCrc = Lcrc / 8; for (uint32_t i = 0; i < romDepthCrc; i++) { max_set_mem_uint64t(actions, "CrcIndexTable", "romCrcIndex1", i, arrCrc1[i]); max_set_mem_uint64t(actions, "CrcIndexTable", "romCrcIndex2", i, arrCrc2[i]); } max_run(engine, actions); max_actions_free(actions); long L; size_t result; uint64_t* arrLmem; // obtain file size fseek(fpLmem, 0, SEEK_END); L = ftell(fpLmem); rewind(fpLmem); double diff = ceil(L / 8.0) - L / 8.0; // NULL character padding if (diff != 0) { L = (int) ceil(L / 8.0) * 8; } // allocate memory to contain the whole file size_t Nelem = sizeof(uint64_t) * (L / 8); arrLmem = (uint64_t*) malloc(Nelem); result = fread(arrLmem, 1, L, fpLmem); int romDepth = L / 8; int burstLengthInBytes = max_get_burst_size(maxfile, "cmd_tolmem"); inline int max(int a, int b) { return a > b ? a : b; } ; const int size = romDepth; int sizeBytes = size * sizeof(uint64_t); uint64_t *inData; printf("Writing to DFE memory.\n"); inData = arrLmem; writeDataToLMem(inData, size, sizeBytes, burstLengthInBytes, engine, maxfile); printf("Done\n"); max_ip_config(engine, MAX_NET_CONNECTION_QSFP_BOT_10G_PORT1, &dfe_ip, &netmask); //all sockets MUST be created before first call to max_tcp_connect or max_tcp_listen max_tcp_socket_t *(dfe_socket[Nsockets]); uint16_t socketNumber[Nsockets]; for (int i = 0; i < Nsockets; i++) { //dfe_socket[i] = max_tcp_create_socket(engine, "tcp_ISCA_QSFP_BOT_10G_PORT1"); dfe_socket[i] = max_tcp_create_socket_with_number(engine, "tcp_ISCA_QSFP_BOT_10G_PORT1", i); socketNumber[i] = max_tcp_get_socket_number(dfe_socket[i]); printf("Socket %d was assigned socket number %u\n", i, socketNumber[i]); } for (int i = 0; i < Nsockets; i++) { max_tcp_listen(dfe_socket[i], port + i); max_tcp_await_state(dfe_socket[i], MAX_TCP_STATE_LISTEN, NULL); } printf("CPU code: Total %u socket(s), listening on the port(s) %u-%u\n\n", Nsockets, port, port + Nsockets - 1); void *read_ptr; uint8_t *read_buffer; max_llstream_t *read_llstream; uint64_t *byteNumber; printf("CPU code: Setting up 'toCpuByteNumber' stream.\n"); int Nslots_byteNumber = 512; size_t tCBN_buffer_size = Nslots_byteNumber * 16; posix_memalign((void *) &read_buffer, 4096, tCBN_buffer_size); read_llstream = max_llstream_setup(engine, "toCpuFileSizeBytes", Nslots_byteNumber, 16, read_buffer); uint8_t *read_buffer_socket; max_llstream_t *read_llstream_socket; printf("CPU code: Setting up 'toCpuSocketNumber' stream.\n"); int Nslots_socketNumber = 512; size_t tCSB_buffer_size = Nslots_socketNumber * 16; posix_memalign((void *) &read_buffer_socket, 4096, tCSB_buffer_size); read_llstream_socket = max_llstream_setup(engine, "toCpuSocketNumber", Nslots_socketNumber, 16, read_buffer_socket); void *read_ptr_socket_slot; uint16_t ti = 10; while(ti > 0) { printf("CPU code: time=%u, waiting file size and socket numbers stream data to be sent to CPU\n", ti); usleep(1000*1000*1); ti--; } //while(1); uint64_t num_rx_bytes; uint64_t num_tx_bytes; uint8_t session_id; while (1) { //part 1: first wait to receive LengthBytes number printf("CPU code: PART 1 - waiting to receive LengthBytes number\n"); int FoundByteNumber = 0; ti=0; while (FoundByteNumber != 1) //first wait to receive LengthBytes number { usleep(1000*1000*1); for (int i = 0; i < Nsockets; i++) { max_tcp_get_num_bytes_received(dfe_socket[i], &num_rx_bytes); max_tcp_get_num_bytes_transmitted(dfe_socket[i], &num_tx_bytes, &session_id); printf("CPU code: waiting, time=%u, port=%u, socket=%i, max_tcp_get_num_bytes_received=%llu, max_tcp_get_num_bytes_transmitted=%llu\n", ti, port + i, i, (long long unsigned int) num_rx_bytes, (long long unsigned int) num_tx_bytes); } ti++; uint8_t ii = max_llstream_read(read_llstream, 1, &read_ptr); if (ii) { byteNumber = (uint64_t*) read_ptr; printf("CPU code: number of slots found to contain new data=%u, fileSizeBytes=%u\n", ii, (unsigned int) *byteNumber); max_llstream_read_discard(read_llstream, 1); FoundByteNumber = 1; } } //part 2: receive total number of data transfered printf("CPU code: PART 2 - receive socket number\n"); while (max_llstream_read(read_llstream_socket, 1, &read_ptr_socket_slot) == 0) ; uint16_t socket_returned = (uint16_t) *((uint16_t*) read_ptr_socket_slot); //event->socketID; unsigned int fileBytes = (unsigned int) *byteNumber; printf("CPU code: fileBytes=%u, socket_returned=%u\n", fileBytes, socket_returned); ti = 0; while (1) { { for (int i = 0; i < Nsockets; i++) { max_tcp_get_num_bytes_received(dfe_socket[i], &num_rx_bytes); max_tcp_get_num_bytes_transmitted(dfe_socket[i], &num_tx_bytes, &session_id); printf("CPU code: time=%i, port=%u, socket=%i, max_tcp_get_num_bytes_received=%llu, max_tcp_get_num_bytes_transmitted=%llu\n", ti, port + i, i, (long long unsigned int) num_rx_bytes, (long long unsigned int) num_tx_bytes); } ti++; printf("\n"); max_tcp_get_num_bytes_transmitted(dfe_socket[socket_returned], &num_tx_bytes, &session_id); printf("CPU code: fileSizeBytes=%u, socketReturned=%u, num_tx_bytes=%llu\n", fileBytes, socket_returned, (long long unsigned int) num_tx_bytes); } //usleep(1000*100); //printf("CPU code: While LOOP, socket_returned=%u, fileBytes=%u, num_tx_bytes(max_tcp_get_num_bytes_transmitted)=%llu\n", socket_returned, fileBytes, (long long unsigned int) num_tx_bytes); if (num_tx_bytes == fileBytes) { //usleep(1000*1000*3); printf("CPU code: MATCH num_tx_bytes==fileBytes, socket_returned=%u, fileBytes=%u, num_tx_bytes(max_tcp_get_num_bytes_transmitted)=%llu\n", socket_returned, fileBytes, (long long unsigned int) num_tx_bytes); printf("CPU code: Closing socket=%u\n", socket_returned); max_tcp_close(dfe_socket[socket_returned]); //max_tcp_close_mode_t close_mode=MAX_TCP_CLOSE_ABORT_RESET; //max_tcp_close_advanced(dfe_socket[socket_returned],close_mode); printf("CPU code: Waiting for MAX_TCP_STATE_CLOSED\n"); max_tcp_await_state(dfe_socket[socket_returned], MAX_TCP_STATE_CLOSED, NULL); printf("CPU code: Set LISTEN state\n"); max_tcp_listen(dfe_socket[socket_returned], port); printf("CPU code: Waiting for MAX_TCP_STATE_LISTEN\n"); max_tcp_await_state(dfe_socket[socket_returned], MAX_TCP_STATE_LISTEN, NULL); printf("CPU code: Again opened socket=%u\n", socket_returned); printf("\nCPU code: State of rx/tx after socket closing\n"); break; } usleep(1000*1000*1); } } for (int i = 0; i < Nsockets; i++) { max_tcp_close(dfe_socket[i]); printf("max_tcp_close(dfe_socket[i])"); } max_unload(engine); printf("max_unload(engine)"); max_file_free(maxfile); printf("max_file_free(maxfile)"); printf("The end\n"); return 0; }
int main(int argc, char *argv[]) { if(argc < 3) { printf("Usage: $0 dfe_ip cpu_ip\n"); return 1; } struct in_addr dfe_ip; inet_aton(argv[1], &dfe_ip); struct in_addr cpu_ip; inet_aton(argv[2], &cpu_ip); struct in_addr netmask; inet_aton("255.255.255.0", &netmask); const int port = 5007; max_file_t *maxfile = Tracker_init(); max_engine_t * engine = max_load(maxfile, "*"); max_config_set_bool(MAX_CONFIG_PRINTF_TO_STDOUT, true); max_actions_t *actions = max_actions_init(maxfile, NULL); char regName[32]; for (int i=0; i < 1024; i++) { sprintf(regName, "filter_%d", i); if (i == 150) { max_set_uint64t(actions, "filteringKernel", regName, 0xCC /* a value to match... */); } else { max_set_uint64t(actions, "filteringKernel", regName, 0x4D1B /* or any value you want */); } } max_run(engine, actions); max_actions_free(actions); void *buffer; size_t bufferSize = 4096 * 512; posix_memalign(&buffer, 4096, bufferSize); max_framed_stream_t *toCpu = max_framed_stream_setup(engine, "toCPU", buffer, bufferSize, -1); /* * This executable both creates a normal Linux UDP socket as well as a DFE UDP Socket. * We then exchange data between the two. */ // DFE Socket max_ip_config(engine, MAX_NET_CONNECTION_QSFP_TOP_10G_PORT1, &dfe_ip, &netmask); max_udp_socket_t *dfe_socket = max_udp_create_socket(engine, "udpTopPort1"); max_udp_bind(dfe_socket, port); max_udp_connect(dfe_socket, &cpu_ip, port); // Linux Socket int cpu_socket = create_cpu_udp_socket(&cpu_ip, &dfe_ip, port); printf("Sending test frame...\n"); sendTestFrame(cpu_socket); printf("Waiting for kernel response...\n"); fflush(stdout); void *f; size_t fsz; size_t numMessageRx = 0; uint8_t received_data[512]; while (numMessageRx < NUM_MESSAGES_EXPECTED) { if (max_framed_stream_read(toCpu, 1, &f, &fsz) == 1) { printf("CPU: Got output frame - size %zd - NumMsg = %zd!\n", fsz, numMessageRx); // Frame size would be rounded up to the next 8 bytes. memcpy(received_data, f, fsz); numMessageRx++; max_framed_stream_discard(toCpu, 1); } else usleep(10); } max_udp_close(dfe_socket); max_unload(engine); max_file_free(maxfile); printf("Done.\n"); fflush(stdout); return 0; }
/** * Runs the main action to compute a predictor or corrector step */ void AirfoilDFEInterface::runMainAction(int k, double cfl, double gam, double gm1, double eps, double *rms) { int cpuresind = 0; int schedind = 0; for (int d = 1; d < (*domain).ndomain; d++){ for (int res_edge_iter = 0; res_edge_iter < (*domain).nedge[d]; res_edge_iter++){ int thispart = d; int thisind = res_edge_iter; for (int i = 0; i < 2; i++){ int thiscellpart = (*domain).ecellpart[thispart][thisind*2+i]; int thiscellind = (*domain).ecellind[thispart][thisind*2+i]; if (reads[7*schedind+3] == 1){ for (int j = 0; j < 4; j++) { cpu_res_qpadt[cpuresind*5+j] = (*domain).q[thiscellpart][4*thiscellind+j]; } cpu_res_qpadt[cpuresind*5+4] = (*domain).adt[thiscellpart][thiscellind]; cpuresind ++; } schedind ++ ; } } } max_actions_t * act = max_actions_init(maxfile, NULL); max_set_ticks(act, "AirfoilDFEAdtKernel", (*domain).ncellcomputedfe); max_set_uint64t(act, "AirfoilDFEAdtKernel", "numTicks", (*domain).ncellcomputedfe); max_set_double(act, "AirfoilDFEAdtKernel", "cfl", cfl); max_set_double(act, "AirfoilDFEAdtKernel", "gam", gam); max_set_double(act, "AirfoilDFEAdtKernel", "gm1", gm1); max_lmem_linear(act, "adtQ", memAddresses[q], memAddresses[q+1] - memAddresses[q]); max_lmem_linear(act, "adtDxRead", memAddresses[adtDx], adtDxDatSize); max_set_ticks(act, "AirfoilDFEResKernel", resFlushTicks); max_set_double(act, "AirfoilDFEResKernel", "gm1", gm1); max_set_double(act, "AirfoilDFEResKernel", "eps", eps); max_set_uint64t(act, "AirfoilDFEResKernel", "nTicks", resFlushTicks); max_queue_input(act, "cpu_qpadt_to_res", cpu_res_qpadt, cpuQpadtSize); max_lmem_linear(act, "resReadOnly", memAddresses[resReadOnly], resReadOnlyDatSize); max_queue_output(act,"cpu_res_from_res", dfe_res_res, passtorescount*sizeof(double)*4); double * rmsOut = (double *) malloc(16*sizeof(double)); max_set_ticks(act,"AirfoilDFEUpdateKernel", (*domain).ncellcomputedfe); max_set_uint64t(act, "AirfoilDFEUpdateKernel", "numCells", (*domain).ncellcomputedfe); max_set_uint64t(act, "AirfoilDFEUpdateKernel", "doSaveQold", k==1); max_lmem_linear(act, "updateQ", memAddresses[q], memAddresses[q+1] - memAddresses[q]); max_lmem_linear(act, "updateQold", memAddresses[qold], memAddresses[qold+1] - memAddresses[qold]); max_queue_output(act,"rmsOut", rmsOut, 16*sizeof(double)); if (k == 0) { max_ignore_lmem(act, "updateSaveQold"); } else { max_lmem_linear(act, "updateSaveQold", memAddresses[qold], memAddresses[qold+1] - memAddresses[qold]); } max_ignore_lmem(act, "setupWrite"); max_ignore_lmem(act, "qRead"); max_run(engine, act); max_actions_free(act); for (int i = 0; i < 16; i++) (*rms) += rmsOut[i]; cpuresind = 0; schedind = 0; for (int d = 1; d < (*domain).ndomain; d++){ for (int res_edge_iter = 0; res_edge_iter < (*domain).nedge[d]; res_edge_iter++){ int thispart = d; int thisind = res_edge_iter; for (int i = 0; i < 2; i++){ int thiscellpart = (*domain).ecellpart[thispart][thisind*2+i]; int thiscellind = (*domain).ecellind[thispart][thisind*2+i]; if (reads[7*schedind+3] == 1){ for (int j = 0; j < 4; j++) { (*domain).res[thiscellpart][4*thiscellind+j] += dfe_res_res[cpuresind*4+j]; } cpuresind ++; } schedind ++ ; } } } }
/** * The initial action to set up arrays in lmem for main compute */ void AirfoilDFEInterface::runSetupAction () { max_actions_t * act; act = max_actions_init(maxfile, NULL); max_queue_input(act, "setupCPU", dfeAdtDX, adtDxDatSize); max_lmem_linear(act, "setupWrite", memAddresses[adtDx], adtDxDatSize); max_ignore_lmem(act, "adtDxRead"); max_ignore_lmem(act, "resReadOnly"); max_ignore_lmem(act, "qRead"); max_ignore_lmem(act, "updateQ"); max_ignore_lmem(act, "updateQold"); max_ignore_lmem(act, "adtQ"); max_ignore_lmem(act, "updateSaveQold"); max_ignore_kernel(act, "AirfoilDFEResKernel"); max_ignore_kernel(act, "AirfoilDFEAdtKernel"); max_ignore_kernel(act, "AirfoilDFEUpdateKernel"); max_run(engine, act); max_actions_free(act); act = max_actions_init(maxfile, NULL); max_queue_input(act, "setupCPU", dfeQ, qDatSize); max_lmem_linear(act, "setupWrite", memAddresses[q], qDatSize); max_ignore_lmem(act, "adtDxRead"); max_ignore_lmem(act, "resReadOnly"); max_ignore_lmem(act, "qRead"); max_ignore_lmem(act, "updateQ"); max_ignore_lmem(act, "updateQold"); max_ignore_lmem(act, "updateSaveQold"); max_ignore_lmem(act, "adtQ"); max_ignore_kernel(act, "AirfoilDFEResKernel"); max_ignore_kernel(act, "AirfoilDFEAdtKernel"); max_ignore_kernel(act, "AirfoilDFEUpdateKernel"); max_run(engine, act); max_actions_free(act); act = max_actions_init(maxfile, NULL); max_queue_input(act, "setupCPU", dfeQ, qDatSize); max_lmem_linear(act, "setupWrite", memAddresses[qold], qDatSize); max_ignore_lmem(act, "adtDxRead"); max_ignore_lmem(act, "resReadOnly"); max_ignore_lmem(act, "qRead"); max_ignore_lmem(act, "updateQ"); max_ignore_lmem(act, "updateQold"); max_ignore_lmem(act, "adtQ"); max_ignore_lmem(act, "updateSaveQold"); max_ignore_kernel(act, "AirfoilDFEResKernel"); max_ignore_kernel(act, "AirfoilDFEAdtKernel"); max_ignore_kernel(act, "AirfoilDFEUpdateKernel"); max_run(engine, act); max_actions_free(act); act = max_actions_init(maxfile, NULL); max_queue_input(act, "setupCPU", dfeResReadOnly, resReadOnlyDatSize); max_lmem_linear(act, "setupWrite", memAddresses[resReadOnly], resReadOnlyDatSize); max_ignore_lmem(act, "adtDxRead"); max_ignore_lmem(act, "resReadOnly"); max_ignore_lmem(act, "qRead"); max_ignore_lmem(act, "updateQ"); max_ignore_lmem(act, "updateQold"); max_ignore_lmem(act, "adtQ"); max_ignore_lmem(act, "updateSaveQold"); max_ignore_kernel(act, "AirfoilDFEResKernel"); max_ignore_kernel(act, "AirfoilDFEAdtKernel"); max_ignore_kernel(act, "AirfoilDFEUpdateKernel"); max_run(engine, act); max_actions_free(act); }
int main(int argc, char *argv[]) { if(argc < 3) { printf("Usage: $0 dfe_ip remote_ip\n"); return 1; } struct in_addr dfe_ip; inet_aton(argv[1], &dfe_ip); struct in_addr remote_ip; inet_aton(argv[2], &remote_ip); struct in_addr netmask; inet_aton("255.255.255.0", &netmask); const int in_port = 2000; const int out_port = 2000; // struct in_addr mcastaddr; // inet_aton("224.0.0.1", &mcastaddr); max_file_t *maxfile = SignExtWithPatternMatching_init(); max_engine_t * engine = max_load(maxfile, "*"); max_config_set_bool(MAX_CONFIG_PRINTF_TO_STDOUT, true); max_actions_t *actions = max_actions_init(maxfile, NULL); max_run(engine, actions); max_actions_free(actions); void *buffer; size_t bufferSize = 4096 * 512; posix_memalign(&buffer, 4096, bufferSize); max_framed_stream_t *toCpu = max_framed_stream_setup(engine, "toCPU", buffer, bufferSize, -1); max_ip_config(engine, MAX_NET_CONNECTION_QSFP_TOP_10G_PORT1, &dfe_ip, &netmask); max_udp_socket_t *dfe_socket = max_udp_create_socket(engine, "udpTopPort1"); // max_ip_multicast_join_group(engine, MAX_NET_CONNECTION_QSFP_TOP_10G_PORT1, &mcastaddr); // max_udp_bind_ip(dfe_socket, &mcastaddr, in_port); max_udp_bind(dfe_socket, in_port); max_udp_connect(dfe_socket, &remote_ip, out_port); printf("Listening on %s in_port %d\n", argv[1], in_port); printf("Waiting for kernel response...\n"); fflush(stdout); void *f; size_t fsz; size_t numMessageRx = 0; while (1) { if (max_framed_stream_read(toCpu, 1, &f, &fsz) == 1) { numMessageRx++; printf("CPU: Got output frame %zd - size %zd bytes\n", numMessageRx, fsz); uint64_t *w = f; for (size_t i=0; i < 3; i++) { printf("Frame [%zd] Word[%zd]: 0x%lx\n", numMessageRx, i, w[i]); } max_framed_stream_discard(toCpu, 1); } else usleep(10); } // max_ip_multicast_leave_group(engine, MAX_NET_CONNECTION_QSFP_TOP_10G_PORT1, &mcastaddr); max_udp_close(dfe_socket); max_unload(engine); max_file_free(maxfile); printf("Done.\n"); fflush(stdout); return 0; }
int main(int argc, char *argv[]) { (void) argc; (void) argv; max_file_t *maxfile = INIT_NAME(); if(!maxfile) { printf("Failed to init MAX file\n"); return -1; } max_config_set_bool(MAX_CONFIG_PRINTF_TO_STDOUT, true); const char *device_name = "*"; printf("Opening device: %s\n", device_name); max_engine_t *engine = max_load(maxfile, device_name); if(!engine) { printf("Failed to open Max device\n"); exit(-1); } max_reset_engine(engine); /* * SLiC is so shit, that if we don't run an empty action, no debug outputs will be generated. */ max_actions_t *action = max_actions_init(maxfile, NULL); max_run(engine, action); max_actions_free(action); srand(time(NULL)); single_entry_t *outputData = calloc(MAX_DEPTH, sizeof(single_entry_t)); void *configWordBuffer = NULL; posix_memalign(&configWordBuffer, 4096, 512 * sizeof(configWord_t)); max_llstream_t *configWordStream = max_llstream_setup(engine, "configWord", 512, sizeof(configWord_t), configWordBuffer); uint64_t configBase = 0; printf("Sending config word...\n"); void *configWordSlot; while (max_llstream_write_acquire(configWordStream, 1, &configWordSlot) != 1) usleep(10); configWord_t *configWord = configWordSlot; configWord->wordCount = MAX_DEPTH; configWord->base = configBase; max_llstream_write(configWordStream, 1); getchar(); printf("Streaming 'read_fifo'...\n"); fflush(stdout); action = max_actions_init(maxfile, NULL); max_queue_output(action, "read_fifo", outputData, sizeof(single_entry_t) * MAX_DEPTH); max_disable_reset(action); max_disable_validation(action); max_enable_partial_memory(action); max_run(engine, action); max_actions_free(action); printf("Comparing...\n"); fflush(stdout); uint8_t fail = 0; for (size_t entryIx=0; entryIx < MAX_DEPTH; entryIx++) { uint64_t *output = (uint64_t *)outputData[entryIx].data; size_t quadsPerEntry = sizeof(single_entry_t) / sizeof(uint64_t); uint64_t expected = (configBase + entryIx); if (expected != output[0]) { fail = 1; printf("[Entry: %zd, Quad: %zd] Mismatch: input 0x%lx, output 0x%lx\n", entryIx, 0L, expected, output[0]); } for (size_t q = 1; !fail && q < quadsPerEntry; q++) { if (0 != output[q]) { fail = 1; printf("[Entry: %zd, Quad: %zd] Mismatch: input 0x%lx, output 0x%lx\n", entryIx, q, 0L, output[q]); } } } printf("%s\n", fail ? "FAILED!" : "Success"); return fail; }
void my_process(int data_x_offset,const cateType* data_y,int mb_idx,real learning_rate){ { int t = K_fw_l0_conv; load_engine(t); printf("Running on DFE: fw_l0_conv"); mark_timer(false,1); max_actions_t* act = max_actions_init(max_files[t], "default"); max_set_param_uint64t(act, "ni", 1); max_set_param_uint64t(act, "no", NKERS[0]); max_queue_input(act, "b", layer0_b, layer0_b_size); max_queue_input(act, "w", layer0_w, layer0_w_size); max_set_param_uint64t(act, "x_offset", data_x_offset+mb_idx*layer0_x_size); max_set_param_uint64t(act, "z_offset", layer0_z2_offset); max_run(max_engines[t], act); max_actions_free(act); mark_timer(true,1); } { int t = K_fw_l0_maxpool; load_engine(t); printf("Running on DFE: fw_l0_maxpool"); mark_timer(false,1); max_actions_t* act = max_actions_init(max_files[t], "default"); max_set_param_uint64t(act, "no", NKERS[0]); max_set_param_uint64t(act, "z2_offset", layer0_z2_offset); max_set_param_uint64t(act, "sel_offset", layer0_sel_offset); max_set_param_uint64t(act, "z_offset", layer0_z_offset); max_set_param_uint64t(act, "a_offset", layer0_a_offset); max_run(max_engines[t], act); max_actions_free(act); mark_timer(true,1); } { int t = K_fw_l1_conv; load_engine(t); printf("Running on DFE: fw_l1_conv"); mark_timer(false,1); max_actions_t* act = max_actions_init(max_files[t], "default"); max_set_param_uint64t(act, "ni", NKERS[0]); max_set_param_uint64t(act, "no", NKERS[1]); max_queue_input(act, "b", layer1_b, layer1_b_size); max_queue_input(act, "w", layer1_w, layer1_w_size); max_set_param_uint64t(act, "x_offset", layer1_x_offset); max_set_param_uint64t(act, "z_offset", layer1_z2_offset); max_run(max_engines[t], act); max_actions_free(act); mark_timer(true,1); } { int t = K_fw_l1_maxpool; load_engine(t); printf("Running on DFE: fw_l1_maxpool"); mark_timer(false,1); max_actions_t* act = max_actions_init(max_files[t], "default"); max_set_param_uint64t(act, "no", NKERS[1]); max_set_param_uint64t(act, "z2_offset", layer1_z2_offset); max_set_param_uint64t(act, "sel_offset", layer1_sel_offset); max_set_param_uint64t(act, "z_offset", layer1_z_offset); max_set_param_uint64t(act, "a_offset", layer1_a_offset); max_run(max_engines[t], act); max_actions_free(act); mark_timer(true,1); } { int t = K_fw_l3_softmax; load_engine(t); printf("Running on DFE: fw_l3_softmax"); mark_timer(false,1); max_actions_t* act = max_actions_init(max_files[t], "default"); max_set_param_uint64t(act, "ni", NKERS[2]); max_set_param_uint64t(act, "x_offset", layer3_x_offset); max_queue_input(act, "w", layer3_w, layer3_w_size); max_queue_input(act, "b", layer3_b, layer3_b_size); max_set_param_uint64t(act, "softmax_offset", layer3_sm_offset); max_queue_output(act, "pred", layer3_pred, layer3_pred_size); max_run(max_engines[t], act); max_actions_free(act); mark_timer(true,1); } //TODO: learning rate<0 exit { int t = K_bp_l3_softmax; load_engine(t); printf("Running on DFE: bp_l3_softmax"); mark_timer(false,1); max_actions_t* act = max_actions_init(max_files[t], "default"); max_set_param_uint64t(act, "ni", NKERS[2]); max_set_param_uint64t(act, "x_offset", layer3_x_offset); max_queue_input(act, "w", layer3_w, layer3_w_size); max_set_param_uint64t(act, "softmax_offset", layer3_sm_offset); max_queue_input(act, "std", data_y+mb_idx*layer3_pred_size, layer3_pred_size); max_queue_output(act, "w_grad", layer3_w_grad, layer3_w_grad_size); max_queue_output(act, "b_grad", layer3_b_grad, layer3_b_grad_size); max_set_param_uint64t(act, "x_grad_offset", layer3_x_grad_offset); max_run(max_engines[t], act); max_actions_free(act); mark_timer(true,1); } { int t = K_bp_l1_maxpool; load_engine(t); printf("Running on DFE: bp_l1_maxpool"); mark_timer(false,1); max_actions_t* act = max_actions_init(max_files[t], "default"); max_set_param_uint64t(act, "no", NKERS[1]); max_set_param_uint64t(act, "a_grad_offset", layer1_a_grad_offset); max_set_param_uint64t(act, "z_offset", layer1_z_offset); max_set_param_uint64t(act, "sel_offset", layer1_sel_offset); max_set_param_uint64t(act, "z2_grad_offset", layer1_z2_grad_offset); max_run(max_engines[t], act); max_actions_free(act); mark_timer(true,1); } { int t = K_bp_l1_conv; load_engine(t); printf("Running on DFE: bp_l1_conv"); mark_timer(false,1); max_actions_t* act = max_actions_init(max_files[t], "default"); max_set_param_uint64t(act, "ni", NKERS[0]); max_set_param_uint64t(act, "no", NKERS[1]); max_set_param_uint64t(act, "z_grad_offset", layer1_z2_grad_offset); max_set_param_uint64t(act, "x_offset", layer1_x_offset); max_set_param_uint64t(act, "x_grad_offset", layer1_x_grad_offset); max_queue_input(act, "w", layer1_w, layer1_w_size); max_queue_output(act, "w_grad", layer1_w_grad, layer1_w_grad_size); max_run(max_engines[t], act); max_actions_free(act); mark_timer(true,1); } { int t = K_bp_l0_maxpool; load_engine(t); printf("Running on DFE: bp_l0_maxpool"); mark_timer(false,1); max_actions_t* act = max_actions_init(max_files[t], "default"); max_set_param_uint64t(act, "no", NKERS[0]); max_set_param_uint64t(act, "a_grad_offset", layer0_a_grad_offset); max_set_param_uint64t(act, "z_offset", layer0_z_offset); max_set_param_uint64t(act, "sel_offset", layer0_sel_offset); max_set_param_uint64t(act, "z2_grad_offset", layer0_z2_grad_offset); max_run(max_engines[t], act); max_actions_free(act); mark_timer(true,1); } { int t = K_bp_l0_conv; load_engine(t); printf("Running on DFE: bp_l0_conv"); mark_timer(false,1); max_actions_t* act = max_actions_init(max_files[t], "default"); max_set_param_uint64t(act, "ni", 1); max_set_param_uint64t(act, "no", NKERS[0]); max_set_param_uint64t(act, "z_grad_offset", layer0_z2_grad_offset); max_set_param_uint64t(act, "x_offset", data_x_offset+mb_idx*layer0_x_size); max_set_param_uint64t(act, "x_grad_offset", layer0_x_grad_offset); max_queue_input(act, "w", layer0_w, layer0_w_size); max_queue_output(act, "w_grad", layer0_w_grad, layer0_w_grad_size); max_run(max_engines[t], act); max_actions_free(act); mark_timer(true,1); } }