void unload_maxfiles(){ if (-1!=cur_engine){ max_unload(max_engines[cur_engine]); cur_engine = -1; } for (int i=0;i<K_TOTAL;++i){ if (max_files[i]){ max_file_free(max_files[i]); } } }
int main(int argc, char *argv[]) { if (argc < 4) { printf("Syntax: %s <TOP local IP> <BOT local IP> <forward IP>\n", argv[0]); return 1; } struct in_addr top_ip; struct in_addr bot_ip; struct in_addr fwd_ip; struct in_addr netmask; inet_aton(argv[1], &top_ip); inet_aton(argv[2], &bot_ip); inet_aton(argv[3], &fwd_ip); inet_aton("255.255.255.0", &netmask); uint16_t port = 7653; printf("EthFwd: TOP IP '%s', BOT IP '%s', Forward IP '%s', port %u\n", argv[1], argv[2], argv[3], port); max_file_t *maxfile = EthFwd_init(); max_engine_t * engine = max_load(maxfile, "*"); max_ip_config(engine, MAX_NET_CONNECTION_QSFP_TOP_10G_PORT1, &top_ip, &netmask); max_ip_config(engine, MAX_NET_CONNECTION_QSFP_BOT_10G_PORT1, &bot_ip, &netmask); struct ether_addr local_mac2, remote_mac2; max_arp_lookup_entry(engine, MAX_NET_CONNECTION_QSFP_BOT_10G_PORT1, &fwd_ip, &remote_mac2); max_eth_get_default_mac_address(engine, MAX_NET_CONNECTION_QSFP_BOT_10G_PORT1, &local_mac2); uint64_t localMac = 0, forwardMac = 0; memcpy(&localMac, &local_mac2, 6); memcpy(&forwardMac, &remote_mac2, 6); max_config_set_bool(MAX_CONFIG_PRINTF_TO_STDOUT, true); max_actions_t *action = max_actions_init(maxfile, NULL); max_set_uint64t(action, "fwdKernel", "localIp", bot_ip.s_addr); max_set_uint64t(action, "fwdKernel", "forwardIp", fwd_ip.s_addr); max_set_uint64t(action, "fwdKernel", "localMac", localMac); max_set_uint64t(action, "fwdKernel", "forwardMac", forwardMac); max_set_uint64t(action, "fwdKernel", "port", port); max_run(engine, action); printf("JDFE Running.\n"); getchar(); max_unload(engine); max_file_free(maxfile); printf("Done.\n"); return 0; }
int main(int argc, char *argv[]) { if (argc != 3) { printf("Usage: %s <dfe_ip> <netmask>\n", argv[0]); return 1; } uint16_t Nsockets = 1; const int port = 80; struct in_addr dfe_ip; inet_aton(argv[1], &dfe_ip); struct in_addr netmask; inet_aton(argv[2], &netmask); // initialization files for crcIndex table, generated by init_code char fileCrcIndex1[] = "./results/romCrcIndex1_init.html"; char fileCrcIndex2[] = "./results/romCrcIndex2_init.html"; // LMEM initialization file location, generated by init_code char fileLmem[] = "./results/lmem_generated_file.html"; uint64_t *arrCrc1; uint64_t *arrCrc2; long Lcrc; FILE *fpCrc1 = fopen(fileCrcIndex1, "rb"); FILE *fpCrc2 = fopen(fileCrcIndex2, "rb"); FILE *fpLmem = fopen(fileLmem, "rb"); if (!(fpCrc1 && fpCrc2 && fpLmem)) { printf("Error with file\n"); exit(0); } // obtain file size fseek(fpCrc1, 0, SEEK_END); Lcrc = ftell(fpCrc1); rewind(fpCrc1); fillRomCrcIndex(fpCrc1, &arrCrc1, Lcrc); fillRomCrcIndex(fpCrc2, &arrCrc2, Lcrc); printf("Preparing for init() and max_load()\n"); max_file_t *maxfile = httpserver_init(); max_engine_t * engine = max_load(maxfile, "*"); printf("Done\n"); max_actions_t *actions = max_actions_init(maxfile, NULL); int romDepthCrc = Lcrc / 8; for (uint32_t i = 0; i < romDepthCrc; i++) { max_set_mem_uint64t(actions, "CrcIndexTable", "romCrcIndex1", i, arrCrc1[i]); max_set_mem_uint64t(actions, "CrcIndexTable", "romCrcIndex2", i, arrCrc2[i]); } max_run(engine, actions); max_actions_free(actions); long L; size_t result; uint64_t* arrLmem; // obtain file size fseek(fpLmem, 0, SEEK_END); L = ftell(fpLmem); rewind(fpLmem); double diff = ceil(L / 8.0) - L / 8.0; // NULL character padding if (diff != 0) { L = (int) ceil(L / 8.0) * 8; } // allocate memory to contain the whole file size_t Nelem = sizeof(uint64_t) * (L / 8); arrLmem = (uint64_t*) malloc(Nelem); result = fread(arrLmem, 1, L, fpLmem); int romDepth = L / 8; int burstLengthInBytes = max_get_burst_size(maxfile, "cmd_tolmem"); inline int max(int a, int b) { return a > b ? a : b; } ; const int size = romDepth; int sizeBytes = size * sizeof(uint64_t); uint64_t *inData; printf("Writing to DFE memory.\n"); inData = arrLmem; writeDataToLMem(inData, size, sizeBytes, burstLengthInBytes, engine, maxfile); printf("Done\n"); max_ip_config(engine, MAX_NET_CONNECTION_QSFP_BOT_10G_PORT1, &dfe_ip, &netmask); //all sockets MUST be created before first call to max_tcp_connect or max_tcp_listen max_tcp_socket_t *(dfe_socket[Nsockets]); uint16_t socketNumber[Nsockets]; for (int i = 0; i < Nsockets; i++) { //dfe_socket[i] = max_tcp_create_socket(engine, "tcp_ISCA_QSFP_BOT_10G_PORT1"); dfe_socket[i] = max_tcp_create_socket_with_number(engine, "tcp_ISCA_QSFP_BOT_10G_PORT1", i); socketNumber[i] = max_tcp_get_socket_number(dfe_socket[i]); printf("Socket %d was assigned socket number %u\n", i, socketNumber[i]); } for (int i = 0; i < Nsockets; i++) { max_tcp_listen(dfe_socket[i], port + i); max_tcp_await_state(dfe_socket[i], MAX_TCP_STATE_LISTEN, NULL); } printf("CPU code: Total %u socket(s), listening on the port(s) %u-%u\n\n", Nsockets, port, port + Nsockets - 1); void *read_ptr; uint8_t *read_buffer; max_llstream_t *read_llstream; uint64_t *byteNumber; printf("CPU code: Setting up 'toCpuByteNumber' stream.\n"); int Nslots_byteNumber = 512; size_t tCBN_buffer_size = Nslots_byteNumber * 16; posix_memalign((void *) &read_buffer, 4096, tCBN_buffer_size); read_llstream = max_llstream_setup(engine, "toCpuFileSizeBytes", Nslots_byteNumber, 16, read_buffer); uint8_t *read_buffer_socket; max_llstream_t *read_llstream_socket; printf("CPU code: Setting up 'toCpuSocketNumber' stream.\n"); int Nslots_socketNumber = 512; size_t tCSB_buffer_size = Nslots_socketNumber * 16; posix_memalign((void *) &read_buffer_socket, 4096, tCSB_buffer_size); read_llstream_socket = max_llstream_setup(engine, "toCpuSocketNumber", Nslots_socketNumber, 16, read_buffer_socket); void *read_ptr_socket_slot; uint16_t ti = 10; while(ti > 0) { printf("CPU code: time=%u, waiting file size and socket numbers stream data to be sent to CPU\n", ti); usleep(1000*1000*1); ti--; } //while(1); uint64_t num_rx_bytes; uint64_t num_tx_bytes; uint8_t session_id; while (1) { //part 1: first wait to receive LengthBytes number printf("CPU code: PART 1 - waiting to receive LengthBytes number\n"); int FoundByteNumber = 0; ti=0; while (FoundByteNumber != 1) //first wait to receive LengthBytes number { usleep(1000*1000*1); for (int i = 0; i < Nsockets; i++) { max_tcp_get_num_bytes_received(dfe_socket[i], &num_rx_bytes); max_tcp_get_num_bytes_transmitted(dfe_socket[i], &num_tx_bytes, &session_id); printf("CPU code: waiting, time=%u, port=%u, socket=%i, max_tcp_get_num_bytes_received=%llu, max_tcp_get_num_bytes_transmitted=%llu\n", ti, port + i, i, (long long unsigned int) num_rx_bytes, (long long unsigned int) num_tx_bytes); } ti++; uint8_t ii = max_llstream_read(read_llstream, 1, &read_ptr); if (ii) { byteNumber = (uint64_t*) read_ptr; printf("CPU code: number of slots found to contain new data=%u, fileSizeBytes=%u\n", ii, (unsigned int) *byteNumber); max_llstream_read_discard(read_llstream, 1); FoundByteNumber = 1; } } //part 2: receive total number of data transfered printf("CPU code: PART 2 - receive socket number\n"); while (max_llstream_read(read_llstream_socket, 1, &read_ptr_socket_slot) == 0) ; uint16_t socket_returned = (uint16_t) *((uint16_t*) read_ptr_socket_slot); //event->socketID; unsigned int fileBytes = (unsigned int) *byteNumber; printf("CPU code: fileBytes=%u, socket_returned=%u\n", fileBytes, socket_returned); ti = 0; while (1) { { for (int i = 0; i < Nsockets; i++) { max_tcp_get_num_bytes_received(dfe_socket[i], &num_rx_bytes); max_tcp_get_num_bytes_transmitted(dfe_socket[i], &num_tx_bytes, &session_id); printf("CPU code: time=%i, port=%u, socket=%i, max_tcp_get_num_bytes_received=%llu, max_tcp_get_num_bytes_transmitted=%llu\n", ti, port + i, i, (long long unsigned int) num_rx_bytes, (long long unsigned int) num_tx_bytes); } ti++; printf("\n"); max_tcp_get_num_bytes_transmitted(dfe_socket[socket_returned], &num_tx_bytes, &session_id); printf("CPU code: fileSizeBytes=%u, socketReturned=%u, num_tx_bytes=%llu\n", fileBytes, socket_returned, (long long unsigned int) num_tx_bytes); } //usleep(1000*100); //printf("CPU code: While LOOP, socket_returned=%u, fileBytes=%u, num_tx_bytes(max_tcp_get_num_bytes_transmitted)=%llu\n", socket_returned, fileBytes, (long long unsigned int) num_tx_bytes); if (num_tx_bytes == fileBytes) { //usleep(1000*1000*3); printf("CPU code: MATCH num_tx_bytes==fileBytes, socket_returned=%u, fileBytes=%u, num_tx_bytes(max_tcp_get_num_bytes_transmitted)=%llu\n", socket_returned, fileBytes, (long long unsigned int) num_tx_bytes); printf("CPU code: Closing socket=%u\n", socket_returned); max_tcp_close(dfe_socket[socket_returned]); //max_tcp_close_mode_t close_mode=MAX_TCP_CLOSE_ABORT_RESET; //max_tcp_close_advanced(dfe_socket[socket_returned],close_mode); printf("CPU code: Waiting for MAX_TCP_STATE_CLOSED\n"); max_tcp_await_state(dfe_socket[socket_returned], MAX_TCP_STATE_CLOSED, NULL); printf("CPU code: Set LISTEN state\n"); max_tcp_listen(dfe_socket[socket_returned], port); printf("CPU code: Waiting for MAX_TCP_STATE_LISTEN\n"); max_tcp_await_state(dfe_socket[socket_returned], MAX_TCP_STATE_LISTEN, NULL); printf("CPU code: Again opened socket=%u\n", socket_returned); printf("\nCPU code: State of rx/tx after socket closing\n"); break; } usleep(1000*1000*1); } } for (int i = 0; i < Nsockets; i++) { max_tcp_close(dfe_socket[i]); printf("max_tcp_close(dfe_socket[i])"); } max_unload(engine); printf("max_unload(engine)"); max_file_free(maxfile); printf("max_file_free(maxfile)"); printf("The end\n"); return 0; }
int main(int argc, char *argv[]) { if(argc != 4) { printf("Usage: %s <dfe_ip> <cpu_ip> <netmask>\n", argv[0]); return 1; } struct in_addr dfe_ip; inet_aton(argv[1], &dfe_ip); struct in_addr cpu_ip; inet_aton(argv[2], &cpu_ip); struct in_addr netmask; inet_aton(argv[3], &netmask); const int port = 5008; /* Create DFE Socket, then listen */ max_file_t *maxfile = FieldAccumulatorTCP_init(); max_engine_t *engine = max_load(maxfile, "*"); max_ip_config(engine, MAX_NET_CONNECTION_CH2_SFP1, &dfe_ip, &netmask); max_udp_socket_t *dfe_socket = max_udp_create_socket(engine, "udp_ch2_sfp1"); max_udp_bind(dfe_socket, port); max_udp_connect(dfe_socket, &cpu_ip, port); int cpu_socket = create_cpu_udp_socket(&cpu_ip, &dfe_ip, port); FILE *stream = fopen("source_data1.csv", "r"); char line[BUFFERSIZE]; // char *to_be_free = line; /* Ignore Header File */ fgets(line, BUFFERSIZE, stream); printf(line); while (fgets(line, BUFFERSIZE, stream)) { struct input_data data; parse(line,&data); printf("\n Instrument id = %d \n level = %d \n side = %d \n Quantity = %d \n Price = %d",data.instrument_id,data.level,data.side,data.quantity,data.price); calculateDeltas(cpu_socket, &data); } // /* Set Value A */ // data.instrument_id = 0; // data.level = 0; // data.side = 0; // data.quantity = 5; // data.price = 10; // calculateDeltas(cpu_socket, &data); // // /* Set B*/ // data.instrument_id = 1; // data.level = 0; // data.side = 1; // data.quantity = 3; // data.price = 4; // calculateDeltas(cpu_socket, &data); // // /* Hold */ // data.instrument_id = 1; // data.level = 0; // data.side = 1; // data.quantity = 5; // data.price = 6; // calculateDeltas(cpu_socket, &data); // // /* Set AB */ // data.instrument_id = 2; // data.level = 0; // data.side = 1; // data.quantity = 7; // data.price = 8; // calculateDeltas(cpu_socket, &data); max_udp_close(dfe_socket); max_unload(engine); max_file_free(maxfile); return 0; }
int main(int argc, char *argv[]) { max_file_t *maxfile = Gap_init(); max_engine_t * engine = max_load(maxfile, "*"); max_config_set_bool(MAX_CONFIG_PRINTF_TO_STDOUT, true); max_actions_t *action = max_actions_init(maxfile, NULL); max_run(engine, action); size_t bufferSize = 4096 * 4096; void *inBuffer = NULL; void *outBuffer = NULL; if (posix_memalign(&inBuffer, 4096, bufferSize)) { err(1, "Couldn't allocation input buffer"); } if (posix_memalign(&outBuffer, 4096, bufferSize)) { err(1, "Couldn't allocation output buffer"); } max_framed_stream_t *inFrame = max_framed_stream_setup(engine, "src", inBuffer, bufferSize, 2048-16); max_framed_stream_t *outFrame = max_framed_stream_setup(engine, "dst", outBuffer, bufferSize, -1); // Now, stream in some frames and see what happens. for (size_t i=0 ; i < 8; i++) { void *f; while (max_framed_stream_write_acquire(inFrame, 1, &f) != 1) usleep(10); uint8_t *inputData = f; /* * Request a gap every other packet */ inputData[20] = i % 2 == 1 ? 'G' : 'N'; size_t frameSize = 60; printf("Sending frame %zd\n", i); max_framed_stream_write(inFrame, 1, &frameSize); void *oFrame; size_t oFrameSize; while (max_framed_stream_read(outFrame, 1, &oFrame, &oFrameSize) != 1) usleep(10); printf("Got frame %zd - %zd bytes (Expecting %zd)\n", i, oFrameSize, frameSize); dump(oFrame, oFrameSize); max_framed_stream_discard(outFrame, 1); } max_unload(engine); max_file_free(maxfile); printf("Done.\n"); return 0; }
int main(int argc, char *argv[]) { if(argc < 3) { printf("Usage: $0 dfe_ip cpu_ip\n"); return 1; } struct in_addr dfe_ip; inet_aton(argv[1], &dfe_ip); struct in_addr cpu_ip; inet_aton(argv[2], &cpu_ip); struct in_addr netmask; inet_aton("255.255.255.0", &netmask); const int port = 5007; max_file_t *maxfile = Tracker_init(); max_engine_t * engine = max_load(maxfile, "*"); max_config_set_bool(MAX_CONFIG_PRINTF_TO_STDOUT, true); max_actions_t *actions = max_actions_init(maxfile, NULL); char regName[32]; for (int i=0; i < 1024; i++) { sprintf(regName, "filter_%d", i); if (i == 150) { max_set_uint64t(actions, "filteringKernel", regName, 0xCC /* a value to match... */); } else { max_set_uint64t(actions, "filteringKernel", regName, 0x4D1B /* or any value you want */); } } max_run(engine, actions); max_actions_free(actions); void *buffer; size_t bufferSize = 4096 * 512; posix_memalign(&buffer, 4096, bufferSize); max_framed_stream_t *toCpu = max_framed_stream_setup(engine, "toCPU", buffer, bufferSize, -1); /* * This executable both creates a normal Linux UDP socket as well as a DFE UDP Socket. * We then exchange data between the two. */ // DFE Socket max_ip_config(engine, MAX_NET_CONNECTION_QSFP_TOP_10G_PORT1, &dfe_ip, &netmask); max_udp_socket_t *dfe_socket = max_udp_create_socket(engine, "udpTopPort1"); max_udp_bind(dfe_socket, port); max_udp_connect(dfe_socket, &cpu_ip, port); // Linux Socket int cpu_socket = create_cpu_udp_socket(&cpu_ip, &dfe_ip, port); printf("Sending test frame...\n"); sendTestFrame(cpu_socket); printf("Waiting for kernel response...\n"); fflush(stdout); void *f; size_t fsz; size_t numMessageRx = 0; uint8_t received_data[512]; while (numMessageRx < NUM_MESSAGES_EXPECTED) { if (max_framed_stream_read(toCpu, 1, &f, &fsz) == 1) { printf("CPU: Got output frame - size %zd - NumMsg = %zd!\n", fsz, numMessageRx); // Frame size would be rounded up to the next 8 bytes. memcpy(received_data, f, fsz); numMessageRx++; max_framed_stream_discard(toCpu, 1); } else usleep(10); } max_udp_close(dfe_socket); max_unload(engine); max_file_free(maxfile); printf("Done.\n"); fflush(stdout); return 0; }
int main(int argc, char *argv[]) { if(argc < 3) { printf("Usage: $0 dfe_ip remote_ip\n"); return 1; } struct in_addr dfe_ip; inet_aton(argv[1], &dfe_ip); struct in_addr remote_ip; inet_aton(argv[2], &remote_ip); struct in_addr netmask; inet_aton("255.255.255.0", &netmask); const int in_port = 2000; const int out_port = 2000; // struct in_addr mcastaddr; // inet_aton("224.0.0.1", &mcastaddr); max_file_t *maxfile = SignExtWithPatternMatching_init(); max_engine_t * engine = max_load(maxfile, "*"); max_config_set_bool(MAX_CONFIG_PRINTF_TO_STDOUT, true); max_actions_t *actions = max_actions_init(maxfile, NULL); max_run(engine, actions); max_actions_free(actions); void *buffer; size_t bufferSize = 4096 * 512; posix_memalign(&buffer, 4096, bufferSize); max_framed_stream_t *toCpu = max_framed_stream_setup(engine, "toCPU", buffer, bufferSize, -1); max_ip_config(engine, MAX_NET_CONNECTION_QSFP_TOP_10G_PORT1, &dfe_ip, &netmask); max_udp_socket_t *dfe_socket = max_udp_create_socket(engine, "udpTopPort1"); // max_ip_multicast_join_group(engine, MAX_NET_CONNECTION_QSFP_TOP_10G_PORT1, &mcastaddr); // max_udp_bind_ip(dfe_socket, &mcastaddr, in_port); max_udp_bind(dfe_socket, in_port); max_udp_connect(dfe_socket, &remote_ip, out_port); printf("Listening on %s in_port %d\n", argv[1], in_port); printf("Waiting for kernel response...\n"); fflush(stdout); void *f; size_t fsz; size_t numMessageRx = 0; while (1) { if (max_framed_stream_read(toCpu, 1, &f, &fsz) == 1) { numMessageRx++; printf("CPU: Got output frame %zd - size %zd bytes\n", numMessageRx, fsz); uint64_t *w = f; for (size_t i=0; i < 3; i++) { printf("Frame [%zd] Word[%zd]: 0x%lx\n", numMessageRx, i, w[i]); } max_framed_stream_discard(toCpu, 1); } else usleep(10); } // max_ip_multicast_leave_group(engine, MAX_NET_CONNECTION_QSFP_TOP_10G_PORT1, &mcastaddr); max_udp_close(dfe_socket); max_unload(engine); max_file_free(maxfile); printf("Done.\n"); fflush(stdout); return 0; }
int main(int argc, char** argv) { max_file_t *max_file = jacobi_init(); size_t dim = 64; // this should be a scalar input in the bitstream size_t MAX_ITER = 20; size_t C = max_get_constant_uint64t(max_file, "C"); size_t blks = 100; size_t total_equations = blks*C; clock_t engine_start = 0; clock_t engine_end = 0; double engine_total_time = 0.0; size_t max_dim = max_get_constant_uint64t(max_file, "maxDimLen"); if(argc == 1) { fprintf(stderr, "====>Info:Runing Jacobi with default parameter values:[Dimension = %ld, Iteration = %ld, blocks = %ld(%ld*%ld equations)], for details, see the README.txt\n", dim, MAX_ITER, blks, blks, C); } char *opt_str = "hd:b:i:"; int opt = 0; int input_dim = dim; int input_iter = MAX_ITER; int input_blks = blks; while( (opt = getopt(argc, argv, opt_str)) != -1) { switch(opt) { case 'd': input_dim = atoi(optarg); break; case 'b': input_blks = atoi(optarg); break; case 'i': input_iter = atoi(optarg); break; case 'h': usage(); return 1; default: fprintf(stderr, "====>Error: Inputs contain invalid command line paramter(s)!\n"); usage(); return 1; } } max_file_free(max_file); if(input_dim <= 0 || input_dim > max_dim || input_dim % 2 != 0) { fprintf(stderr, "\n====>Error: Input dimension length is invalid, for details, see the usage below:\n"); usage(); return 1; } else { dim = (size_t)input_dim; } if(input_blks <= 0) { fprintf(stderr, "\n====>Error: Input block number is invalid, should bigger than zero.\n"); usage(); return 1; } else { blks = (size_t)input_blks; } if(input_iter <= 1) { fprintf(stderr, "\n====>Error: Input iteration number is invalid, should bigger than 1.\n"); usage(); return 1; } else { MAX_ITER = (size_t)input_iter; } total_equations = blks * C; double *A = malloc(dim*dim*sizeof(double)); double *A_trans = malloc(dim*dim*sizeof(double)); double *b = malloc(total_equations*dim*sizeof(double)); double *b_trans = malloc(total_equations*dim*sizeof(double)); double *diagA = malloc(dim*sizeof(double)); double *reverse_diagA = malloc(dim*sizeof(double)); double *x_init = malloc(C*dim*sizeof(double)); double *x_trans_init = malloc(C*dim*sizeof(double)); double *result = malloc(total_equations * dim * sizeof(double)); double *reorder_result = malloc(total_equations * dim *sizeof(double)); double *solutions = malloc(total_equations * dim *sizeof(double)); double *error = malloc(total_equations*sizeof(double)); double *error_bak = malloc(total_equations*sizeof(double)); int *is_solution_valid = malloc(total_equations*sizeof(int)); int *recacu_error_index = malloc(total_equations*sizeof(int)); double *expected_error = malloc(total_equations*sizeof(double)); double *x_base = malloc(total_equations * dim * sizeof(double)); double *x_all_init = malloc(total_equations * dim *sizeof(double)); double *x_all_trans_init = malloc(total_equations * dim *sizeof(double)); memset(A, 0 , sizeof(double)*dim*dim); memset(A_trans, 0 , sizeof(double)*dim*dim); memset(b, 0 , sizeof(double)*dim*total_equations); memset(b_trans, 0 , sizeof(double)*dim*total_equations); memset(diagA, 0 , sizeof(double)*dim); memset(reverse_diagA, 0 , sizeof(double)*dim); memset(x_init, 0 , sizeof(double) *C*dim); memset(result, 0 , sizeof(double)*dim*total_equations); memset(reorder_result, 0 , sizeof(double)*dim*total_equations); memset(error, 0 , sizeof(double)*total_equations); memset(expected_error, 0 , sizeof(double)*total_equations); memset(x_base, 0 , sizeof(double)*dim*total_equations); memset(x_all_init, 0 , sizeof(double)*dim*total_equations); memset(x_all_trans_init, 0 , sizeof(double)*dim*total_equations); memset(is_solution_valid,0 , sizeof(int)*total_equations); for(int i = 0; i < total_equations; i ++) { recacu_error_index[i] = -1; expected_error[i] = 1000; error_bak[i] = 1000; for(int j = 0; j < dim; j ++) { solutions[i*dim + j] = 1000; } } /** * Generating random value for b and A */ srand(time(NULL)); for(int i = 0; i < dim; ++i) { double sum = 0; for(int j = 0; j < dim; ++j) { if(i != j) { A[i*dim+j] = 2.0*rand()/(double)RAND_MAX - 1 ; // random number between -1 and 1 sum += fabs(A[i*dim+j]) ; } } A[i * dim + i] = 1 + sum; diagA[i] = 1.0/A[i * dim + i]; reverse_diagA[i] = A[i * dim + i]; } double A_original[dim * dim]; for(int i = 0; i < C*blks; i ++) { for(int j = 0; j < dim; j ++) { b[i * dim + j] = 2.0*rand()/(double)RAND_MAX - 1; } } for(int i = 0; i < dim; i ++) { for(int j = 0; j < dim; j ++) { A_original[i * dim + j] = A[i * dim + j]; if(i != j) { A[i * dim + j] = A[i*dim + j] * diagA[i]; } } } /** * Reorder the input A and b */ engine_start = clock(); for(int i = 0; i < dim; i ++) { for(int j = 0; j < dim; j ++) { A_trans[i * dim + j] = A[j * dim + i]; } } int count = 0; for(int yy = 0; yy < total_equations; yy += C) { for(int i = 0; i < dim; i ++) { for(int j = yy; j <yy + C; j ++) { b_trans[count] = b[j * dim + i]*diagA[i]; count ++; } } } for(int k = 0; k < blks; k ++) { for ( int i = 0; i < C ; i ++ ) { for ( int j = 0; j < dim; j ++ ) { x_init[i * dim + j] = 0; x_trans_init[j*C + i] = x_init[i * dim + j]; } } memcpy(x_all_trans_init + k * C * dim , x_trans_init , sizeof(double)*C*dim); memcpy(x_all_init + k * C * dim , x_init , sizeof(double)*C*dim); } jacobi( dim, total_equations, MAX_ITER, A_trans , dim * dim * sizeof(double) , b_trans , total_equations * dim * sizeof(double) , reverse_diagA , dim * sizeof(double) , x_all_trans_init , total_equations * dim * sizeof(double) , error , total_equations * sizeof(double) , result , total_equations * dim * sizeof(double) ); for(int yy = 0; yy<total_equations; yy += C) { for(int i = 0; i < C; i ++) { for(int j = 0; j < dim; j ++) { reorder_result[yy *dim + i*dim + j] = result[yy * dim + i + j * C]; } } } /*Check Error to decide whether we need to restream into kernel again*/ int recacu_cnt = 0; int new_recacu_cnt = 0; int actual_recacu_cnt = 0; int new_actual_recacu_cnt = 0; double *x_latest_init = malloc(total_equations * dim * sizeof(double)) ; double *x_latest_trans_init = malloc(total_equations * dim * sizeof(double)) ; double *recacu_b = malloc(total_equations * dim * sizeof(double)) ; double *recacu_trans_b = malloc(total_equations * dim * sizeof(double)) ; memset(x_latest_init , 0 , total_equations * dim * sizeof(double)) ; memset(x_latest_trans_init , 0 , total_equations * dim * sizeof(double)) ; memset(recacu_b , 0 , total_equations * dim * sizeof(double)) ; memset(recacu_trans_b , 0 , total_equations * dim * sizeof(double)) ; int idx = 0; for(int i = 0; i < total_equations; i ++) { if(error[i] > CUR_EPS) { memcpy(x_latest_init + idx*dim, reorder_result + i*dim, dim*sizeof(double)); memcpy(recacu_b + idx*dim, b + i*dim, dim*sizeof(double)); recacu_error_index[idx] = i; recacu_cnt ++ ; actual_recacu_cnt ++ ; idx ++; } else { error_bak[i] = error[i]; memcpy(solutions + i*dim, reorder_result + i*dim, dim*sizeof(double)); } } while( recacu_cnt % C ) { recacu_cnt ++; } /** * if recaculate count not zero, we start to restream data into kernel again */ int times = 1; while( recacu_cnt != 0 ) { /*Reorder Latest solutions init value */ times ++; memset(x_latest_trans_init, 0, recacu_cnt*dim*sizeof(double)); count = 0; for(int yy = 0; yy < recacu_cnt; yy += C) { for(int i = 0; i < dim; i ++) { for(int j = yy; j < yy + C; j ++) { x_latest_trans_init[count] = x_latest_init[j * dim + i]; count ++; } } } /*Reorder latest b*/ memset(recacu_trans_b, 0, total_equations*dim*sizeof(double)); count = 0; for(int yy = 0; yy < recacu_cnt; yy += C) { for(int i = 0; i < dim; i ++) { for(int j = yy; j < yy + C; j ++) { recacu_trans_b[count] = recacu_b[j * dim + i]*diagA[i]; count ++; } } } memset(error , 0 , recacu_cnt * sizeof(double ) ) ; memset(result , 0 , recacu_cnt * dim * sizeof(double ) ) ; jacobi( dim, recacu_cnt, MAX_ITER, A_trans , dim * dim * sizeof(double) , recacu_trans_b , recacu_cnt * dim * sizeof(double) , reverse_diagA , dim * sizeof(double) , x_latest_trans_init , recacu_cnt * dim * sizeof(double) , error , recacu_cnt * sizeof(double) , result , recacu_cnt * dim * sizeof(double) ); for(int yy = 0; yy < recacu_cnt; yy += C) { for(int i = 0; i < C; i ++) { for(int j = 0; j < dim; j ++) { reorder_result[yy *dim + i*dim + j] = result[yy * dim + i + j * C]; } } } new_recacu_cnt = 0; new_actual_recacu_cnt = 0; int idx2 = 0; for(int i = 0; i < actual_recacu_cnt; i ++) { if(error[i] > CUR_EPS) { memcpy(x_latest_init + new_recacu_cnt*dim, reorder_result + i*dim, dim*sizeof(double)); memcpy(recacu_b + new_recacu_cnt*dim, recacu_b + i*dim, dim*sizeof(double)); recacu_error_index[idx2] = recacu_error_index[i]; new_recacu_cnt ++; new_actual_recacu_cnt ++; idx2 ++; } else { error_bak[ recacu_error_index[i]] = error[i]; memcpy(solutions + recacu_error_index[i] *dim, reorder_result + i*dim, dim*sizeof(double)); } } /* padding to multipy of C */ while( new_recacu_cnt % C ) { new_recacu_cnt ++; } /* update the current recaculating solution numbers */ recacu_cnt = new_recacu_cnt; actual_recacu_cnt = new_actual_recacu_cnt; }//loop while engine_end = clock(); engine_total_time = (double)(engine_end - engine_start) / CLOCKS_PER_SEC; fprintf(stderr, "=========>Kernel Complete, Stream Times: %d\n", times); clock_t cpu_start = clock(); jacobi_opt(A_original, x_base, b, dim, C, total_equations, x_all_init , expected_error); clock_t cpu_end = clock(); double cpu_total_time = (double)(cpu_end - cpu_start) / CLOCKS_PER_SEC; /* Compare the result with the standard result */ int cnt = 0; int index = 0; for(int i = 0; i < total_equations; i ++) { for(int j = 0; j < dim; j ++) { double diff = solutions[i * dim + j] - x_base[i*dim + j]; if(fabs(diff) > EPS) { fprintf(stderr, "error: atual=%.10f, expect=%.10f, err=%.10e\n", solutions[i * dim + j], x_base[i*dim + j], diff); cnt ++; index ++; } } } if(cnt == 0) { max_print_result(dim, total_equations, MAX_ITER, engine_total_time, cpu_total_time); fprintf(stderr, "==========>All Test Passed\n\n"); } else { fprintf(stderr, "!!!Test Failed:%d\n\n", cnt); } free ( A ) ; free ( A_trans ) ; free ( b ) ; free ( b_trans ) ; free ( diagA ) ; free ( reverse_diagA ) ; free ( x_init ) ; free ( error ) ; free ( error_bak ) ; free ( recacu_error_index ) ; free ( expected_error ) ; free ( result ) ; free ( reorder_result ) ; free ( solutions ) ; free ( x_base ) ; free ( x_all_init ) ; free ( x_all_trans_init ) ; free ( x_latest_init ) ; free ( x_latest_trans_init ) ; free ( recacu_b ) ; int status = (cnt == 0) ? 0:1; return status; }