double avgTime() const { return m_Count == 0 ? 0.0 : accTime() / m_Count; }
int main(int argc, char** argv) { int err; // error code returned from api calls int* a = NULL; // input pointer int* results = NULL; // output pointer unsigned int correct; // number of correct results returned size_t global[2]; // global domain size for our calculation size_t local[2]; // local domain size for our calculation cl_platform_id platform_id; // platform id cl_device_id device_id; // compute device id cl_context context; // compute context cl_command_queue commands; // compute command queue cl_program program; // compute program cl_kernel kernel; // compute kernel char cl_platform_vendor[1001]; char cl_platform_name[1001]; cl_mem input_a; // device memory used for the input array //cl_mem input_b; // device memory used for the input array cl_mem output; // device memory used for the output array int inc; double t_start, t_end; if (argc != 2) { printf("%s <inputfile>\n", argv[0]); return EXIT_FAILURE; } // Connect to first platform // err = clGetPlatformIDs(1,&platform_id,NULL); if (err != CL_SUCCESS) { printf("Error: Failed to find an OpenCL platform!\n"); printf("Test failed\n"); return EXIT_FAILURE; } err = clGetPlatformInfo(platform_id,CL_PLATFORM_VENDOR,1000,(void *)cl_platform_vendor,NULL); if (err != CL_SUCCESS) { printf("Error: clGetPlatformInfo(CL_PLATFORM_VENDOR) failed!\n"); printf("Test failed\n"); return EXIT_FAILURE; } printf("CL_PLATFORM_VENDOR %s\n",cl_platform_vendor); err = clGetPlatformInfo(platform_id,CL_PLATFORM_NAME,1000,(void *)cl_platform_name,NULL); if (err != CL_SUCCESS) { printf("Error: clGetPlatformInfo(CL_PLATFORM_NAME) failed!\n"); printf("Test failed\n"); return EXIT_FAILURE; } printf("CL_PLATFORM_NAME %s\n",cl_platform_name); // Connect to a compute device // int fpga = 0; #if defined (FPGA_DEVICE) fpga = 1; #endif err = clGetDeviceIDs(platform_id, fpga ? CL_DEVICE_TYPE_ACCELERATOR : CL_DEVICE_TYPE_CPU, 1, &device_id, NULL); if (err != CL_SUCCESS) { printf("Error: Failed to create a device group!\n"); printf("Test failed\n"); return EXIT_FAILURE; } // Create a compute context // context = clCreateContext(0, 1, &device_id, NULL, NULL, &err); if (!context) { printf("Error: Failed to create a compute context!\n"); printf("Test failed\n"); return EXIT_FAILURE; } // Create a command commands // commands = clCreateCommandQueue(context, device_id, 0, &err); if (!commands) { printf("Error: Failed to create a command commands!\n"); printf("Error: code %i\n",err); printf("Test failed\n"); return EXIT_FAILURE; } int status; // Create Program Objects // // Load binary from disk unsigned char *kernelbinary; char *xclbin=argv[1]; printf("loading %s\n", xclbin); int n_i = load_file_to_memory(xclbin, (char **) &kernelbinary); if (n_i < 0) { printf("failed to load kernel from xclbin: %s\n", xclbin); printf("Test failed\n"); return EXIT_FAILURE; } else { printf("Succeed to load kernel from xclbin: %s\n", xclbin); } size_t n = n_i; // Create the compute program from offline program = clCreateProgramWithBinary(context, 1, &device_id, &n, (const unsigned char **) &kernelbinary, &status, &err); if ((!program) || (err!=CL_SUCCESS)) { printf("Error: Failed to create compute program from binary %d!\n", err); printf("Test failed\n"); return EXIT_FAILURE; } else { printf("Succeed to create compute program from binary %d!\n", err); } // Build the program executable // err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); if (err != CL_SUCCESS) { size_t len; char buffer[2048]; printf("Error: Failed to build program executable!\n"); clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len); printf("%s\n", buffer); printf("Test failed\n"); return EXIT_FAILURE; } else { printf("Succeed to build program executable!\n"); } // Create the compute kernel in the program we wish to run // kernel = clCreateKernel(program, "mmult", &err); if (!kernel || err != CL_SUCCESS) { printf("Error: Failed to create compute kernel!\n"); printf("Test failed\n"); return EXIT_FAILURE; } else { printf("Succeed to create compute kernel!\n"); } // Create the input and output arrays in device memory for our calculation // input_a = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(int) * DATA_SIZE, NULL, NULL); output = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(int) * RESULT_SIZE, NULL, NULL); if (!input_a || !output) { printf("Error: Failed to allocate device memory!\n"); printf("Test failed\n"); return EXIT_FAILURE; } else { printf("Succeed to allocate device memory!\n"); } // set up socket printf("\n************* Welcome to UCLA FPGA agent! **********\n"); struct sockaddr_in stSockAddr; int SocketFD = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP); if(-1 == SocketFD) { perror("can not create socket"); exit(EXIT_FAILURE); } memset(&stSockAddr, 0, sizeof(stSockAddr)); stSockAddr.sin_family = AF_INET; stSockAddr.sin_port = htons(7000); stSockAddr.sin_addr.s_addr = htonl(INADDR_ANY); if(-1 == bind(SocketFD,(struct sockaddr *)&stSockAddr, sizeof(stSockAddr))) { perror("error bind failed"); close(SocketFD); exit(EXIT_FAILURE); } if(-1 == listen(SocketFD, 10)) { perror("error listen failed"); close(SocketFD); exit(EXIT_FAILURE); } int taskNum = -1; // polling setting timespec deadline; deadline.tv_sec = 0; deadline.tv_nsec = 100; // Get the start time timespec timer = tic( ); timespec socListenTime = diff(timer, timer); timespec socSendTime = diff(timer, timer); timespec socRecvTime = diff(timer, timer); timespec exeTime = diff(timer, timer); bool broadcastFlag = false; int packet_buf[PACKET_SIZE]; int time_buf[TIME_BUF_SIZE]; while (true) { //printf("\n************* Got a new task! *************\n"); timer = tic(); int ConnectFD = accept(SocketFD, NULL, NULL); if (!broadcastFlag) { broadcastFlag = true; timer = tic(); } // For profiling only //struct timeval tv; //gettimeofday(&tv, NULL); //double time_in_mill = (tv.tv_sec) * 1000 + (tv.tv_usec) / 1000 ; // convert tv_sec & tv_usec to millisecond //printf("Receive time (ms): %lf\n", time_in_mill); accTime (&socListenTime, &timer); if(0 > ConnectFD) { perror("error accept failed"); close(SocketFD); exit(EXIT_FAILURE); } read(ConnectFD, &packet_buf, PACKET_SIZE * sizeof(int)); // send FPGA stats back to java application if(packet_buf[0] == -1) { // for profiling use collect_timer_stats(ConnectFD, &socListenTime, &socSendTime, &socRecvTime, &exeTime, &timer); broadcastFlag = false; continue; } char* shm_addr; int shmid = -1; int data_size = -1; // data sent to FPGA (unit: int) shmid = packet_buf[0]; data_size = packet_buf[1]; printf("Shmid: %d, Data size (# of int): %d\n", shmid, data_size); // shared memory if((shm_addr = (char *) shmat(shmid, NULL, 0)) == (char *) -1) { perror("Server: shmat failed."); exit(1); } //else //printf("Server: attach shared memory: %p\n", shm_addr); int done = 0; while(done == 0) { done = (int) *((int*)shm_addr); clock_nanosleep(CLOCK_REALTIME, 0, &deadline, NULL); } //printf("Copy data to the array in the host\n"); a = (int *)(shm_addr + FLAG_NUM * sizeof(int)); results = (int *)(shm_addr + FLAG_NUM * sizeof(int)); accTime (&socSendTime, &timer); taskNum = a[2]; for (int i=0; i<taskNum; i++) { int tmp = *(a+8+i*8+7); assert(tmp >=0 && tmp < TOTAL_TASK_NUMS); } printf("Task Num: %d\n", taskNum); //printf("\nparameter recieved --- \n"); //Write our data set into the input array in device memory //printf("Write data from host to FPGA\n"); err = clEnqueueWriteBuffer(commands, input_a, CL_TRUE, 0, sizeof(int) * data_size, a, 0, NULL, NULL); if (err != CL_SUCCESS) { printf("Error: Failed to write to source array a!\n"); printf("Test failed\n"); return EXIT_FAILURE; } // Set the arguments to our compute kernel // err = 0; err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &input_a); err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &output); err |= clSetKernelArg(kernel, 2, sizeof(int), &taskNum); if (err != CL_SUCCESS) { printf("Error: Failed to set kernel arguments! %d\n", err); printf("Test failed\n"); return EXIT_FAILURE; } // Execute the kernel over the entire range of our 1d input data set // using the maximum number of work group items for this device // //printf("Enqueue Task\n"); err = clEnqueueTask(commands, kernel, 0, NULL, NULL); if (err) { printf("Error: Failed to execute kernel! %d\n", err); printf("Test failed\n"); return EXIT_FAILURE; } // Read back the results from the device to verify the output // cl_event readevent; //printf("Enqueue read buffer\n"); err = clEnqueueReadBuffer( commands, output, CL_TRUE, 0, sizeof(int) * FPGA_RET_PARAM_NUM * taskNum, results, 0, NULL, &readevent ); if (err != CL_SUCCESS) { printf("Error: Failed to read output array! %d\n", err); printf("Test failed\n"); return EXIT_FAILURE; } //printf("Wait for FPGA results\n"); clWaitForEvents(1, &readevent); accTime(&exeTime, &timer); // Get the execution time //toc(&timer); // put data back to shared memory //printf("Put data back to the shared memory\n"); *((int*)(shm_addr + sizeof(int))) = DONE; //printf("\n************* Task finished! *************\n"); if (-1 == shutdown(ConnectFD, SHUT_RDWR)) { perror("can not shutdown socket"); close(ConnectFD); close(SocketFD); exit(EXIT_FAILURE); } close(ConnectFD); //printf("done\n"); // free the shared memory shmdt(shm_addr); //shmctl(shmid, IPC_RMID, 0); accTime(&socRecvTime, &timer); printf("**********timing begin**********\n"); printTimeSpec(socListenTime); printTimeSpec(socSendTime); printTimeSpec(socRecvTime); printTimeSpec(exeTime); printf("**********timing end**********\n\n"); } close(SocketFD); // Shutdown and cleanup // clReleaseMemObject(input_a); clReleaseMemObject(output); clReleaseProgram(program); clReleaseKernel(kernel); clReleaseCommandQueue(commands); clReleaseContext(context); return EXIT_SUCCESS; }